1mod bpf_skel;
9pub use bpf_skel::*;
10pub mod bpf_intf;
11pub use bpf_intf::*;
12
13mod stats;
14use std::collections::BTreeMap;
15use std::ffi::c_int;
16use std::fmt::Write;
17use std::fs::File;
18use std::io::{BufRead, BufReader};
19use std::mem::MaybeUninit;
20use std::sync::atomic::AtomicBool;
21use std::sync::atomic::Ordering;
22use std::sync::Arc;
23use std::time::Duration;
24
25use anyhow::anyhow;
26use anyhow::bail;
27use anyhow::Context;
28use anyhow::Result;
29use clap::Parser;
30use crossbeam::channel::RecvTimeoutError;
31use libbpf_rs::OpenObject;
32use libbpf_rs::ProgramInput;
33use log::{debug, info, warn};
34use scx_stats::prelude::*;
35use scx_utils::autopower::{fetch_power_profile, PowerProfile};
36use scx_utils::build_id;
37use scx_utils::compat;
38use scx_utils::libbpf_clap_opts::LibbpfOpts;
39use scx_utils::pm::{cpu_idle_resume_latency_supported, update_cpu_idle_resume_latency};
40use scx_utils::scx_ops_attach;
41use scx_utils::scx_ops_load;
42use scx_utils::scx_ops_open;
43use scx_utils::try_set_rlimit_infinity;
44use scx_utils::uei_exited;
45use scx_utils::uei_report;
46use scx_utils::CoreType;
47use scx_utils::Cpumask;
48use scx_utils::Topology;
49use scx_utils::UserExitInfo;
50use scx_utils::NR_CPU_IDS;
51use stats::Metrics;
52
53const SCHEDULER_NAME: &str = "scx_flash";
54
55#[derive(PartialEq)]
56enum Powermode {
57 Turbo,
58 Performance,
59 Powersave,
60 Any,
61}
62
63fn get_primary_cpus(mode: Powermode) -> std::io::Result<Vec<usize>> {
64 let topo = Topology::new().unwrap();
65
66 let cpus: Vec<usize> = topo
67 .all_cores
68 .values()
69 .flat_map(|core| &core.cpus)
70 .filter_map(|(cpu_id, cpu)| match (&mode, &cpu.core_type) {
71 (Powermode::Turbo, CoreType::Big { turbo: true }) |
73 (Powermode::Performance, CoreType::Big { .. }) |
75 (Powermode::Powersave, CoreType::Little) => Some(*cpu_id),
77 (Powermode::Any, ..) => Some(*cpu_id),
78 _ => None,
79 })
80 .collect();
81
82 Ok(cpus)
83}
84
85fn cpus_to_cpumask(cpus: &Vec<usize>) -> String {
87 if cpus.is_empty() {
88 return String::from("none");
89 }
90
91 let max_cpu_id = *cpus.iter().max().unwrap();
93
94 let mut bitmask = vec![0u8; (max_cpu_id + 1 + 7) / 8];
96
97 for cpu_id in cpus {
99 let byte_index = cpu_id / 8;
100 let bit_index = cpu_id % 8;
101 bitmask[byte_index] |= 1 << bit_index;
102 }
103
104 let hex_str: String = bitmask.iter().rev().fold(String::new(), |mut f, byte| {
106 let _ = write!(&mut f, "{:02x}", byte);
107 f
108 });
109
110 format!("0x{}", hex_str)
111}
112
113#[derive(Debug, clap::Parser)]
114#[command(
115 name = "scx_flash",
116 version,
117 disable_version_flag = true,
118 about = "A deadline-based scheduler focused on fairness and performance predictability.",
119 long_about = r#"
120scx_flash is scheduler that focuses on ensuring fairness and performance predictability.
121
122It operates using an earliest deadline first (EDF) policy. The deadline of each task deadline is
123defined as:
124
125 deadline = vruntime + exec_vruntime
126
127Here, `vruntime` represents the task's total accumulated runtime, inversely scaled by its weight,
128while `exec_vruntime` accounts for the scaled runtime accumulated since the last sleep event.
129
130Fairness is driven by `vruntime`, while `exec_vruntime` helps prioritize latency-sensitive tasks
131that sleep frequently and use the CPU in short bursts.
132
133To prevent sleeping tasks from gaining excessive priority, the maximum vruntime credit a task can
134accumulate while sleeping is capped by `slice_lag`, scaled by the task’s voluntary context switch
135rate (`max_avg_nvcsw`): tasks that sleep frequently can receive a larger credit, while tasks that
136perform fewer, longer sleeps are granted a smaller credit. This encourages responsive behavior
137without excessively boosting idle tasks.
138
139When dynamic fairness is enabled (`--slice-lag-scaling`), the maximum vruntime sleep credit is also
140scaled depending on the user-mode CPU utilization:
141
142 - At low utilization (mostly idle system), the impact of `vruntime` is reduced, and scheduling
143 decisions are driven primarily by `exec_vruntime`. This favors bursty, latency-sensitive
144 workloads (i.e., hackbench), improving their performance and latency.
145
146 - At high utilization, sleeping tasks regain their vruntime credit, increasing the influence of
147 `vruntime` in deadline calculation. This restores fairness and ensures system responsiveness
148 under load.
149
150This adaptive behavior allows the scheduler to prioritize intense message-passing workloads when
151the system is lightly loaded, while maintaining fairness and responsiveness when the system is
152saturated or overcommitted.
153"#
154)]
155struct Opts {
156 #[clap(long, default_value = "0")]
158 exit_dump_len: u32,
159
160 #[clap(short = 's', long, default_value = "4096")]
162 slice_us: u64,
163
164 #[clap(short = 'S', long, default_value = "128")]
166 slice_us_min: u64,
167
168 #[clap(short = 'l', long, default_value = "4096")]
173 slice_us_lag: u64,
174
175 #[clap(short = 'L', long, action = clap::ArgAction::SetTrue)]
180 slice_lag_scaling: bool,
181
182 #[clap(short = 'r', long, default_value = "32768")]
187 run_us_lag: u64,
188
189 #[clap(short = 'c', long, default_value = "128")]
198 max_avg_nvcsw: u64,
199
200 #[clap(short = 'C', long, allow_hyphen_values = true, default_value = "-1")]
211 cpu_busy_thresh: i64,
212
213 #[clap(short = 't', long, default_value = "0")]
218 throttle_us: u64,
219
220 #[clap(short = 'I', long, allow_hyphen_values = true, default_value = "32")]
226 idle_resume_us: i64,
227
228 #[clap(short = 'T', long, action = clap::ArgAction::SetTrue)]
234 tickless: bool,
235
236 #[clap(short = 'R', long, action = clap::ArgAction::SetTrue)]
241 rr_sched: bool,
242
243 #[clap(short = 'b', long, action = clap::ArgAction::SetTrue)]
248 no_builtin_idle: bool,
249
250 #[clap(short = 'p', long, action = clap::ArgAction::SetTrue)]
258 local_pcpu: bool,
259
260 #[clap(short = 'D', long, action = clap::ArgAction::SetTrue)]
271 direct_dispatch: bool,
272
273 #[clap(short = 'y', long, action = clap::ArgAction::SetTrue)]
281 sticky_cpu: bool,
282
283 #[clap(short = 'n', long, action = clap::ArgAction::SetTrue)]
289 native_priority: bool,
290
291 #[clap(short = 'k', long, action = clap::ArgAction::SetTrue)]
297 local_kthreads: bool,
298
299 #[clap(short = 'w', long, action = clap::ArgAction::SetTrue)]
306 no_wake_sync: bool,
307
308 #[clap(short = 'm', long, default_value = "auto")]
320 primary_domain: String,
321
322 #[clap(long, action = clap::ArgAction::SetTrue)]
324 disable_l2: bool,
325
326 #[clap(long, action = clap::ArgAction::SetTrue)]
328 disable_l3: bool,
329
330 #[clap(long, action = clap::ArgAction::SetTrue)]
332 disable_smt: bool,
333
334 #[clap(long, action = clap::ArgAction::SetTrue)]
336 disable_numa: bool,
337
338 #[clap(short = 'f', long, action = clap::ArgAction::SetTrue)]
342 cpufreq: bool,
343
344 #[clap(long)]
346 stats: Option<f64>,
347
348 #[clap(long)]
351 monitor: Option<f64>,
352
353 #[clap(short = 'd', long, action = clap::ArgAction::SetTrue)]
355 debug: bool,
356
357 #[clap(short = 'v', long, action = clap::ArgAction::SetTrue)]
359 verbose: bool,
360
361 #[clap(short = 'V', long, action = clap::ArgAction::SetTrue)]
363 version: bool,
364
365 #[clap(long)]
367 help_stats: bool,
368
369 #[clap(flatten, next_help_heading = "Libbpf Options")]
370 pub libbpf: LibbpfOpts,
371}
372
373#[derive(Debug, Clone, Copy)]
374struct CpuTimes {
375 user: u64,
376 nice: u64,
377 total: u64,
378}
379
380struct Scheduler<'a> {
381 skel: BpfSkel<'a>,
382 struct_ops: Option<libbpf_rs::Link>,
383 opts: &'a Opts,
384 topo: Topology,
385 power_profile: PowerProfile,
386 stats_server: StatsServer<(), Metrics>,
387 user_restart: bool,
388}
389
390impl<'a> Scheduler<'a> {
391 fn init(opts: &'a Opts, open_object: &'a mut MaybeUninit<OpenObject>) -> Result<Self> {
392 try_set_rlimit_infinity();
393
394 assert!(opts.slice_us >= opts.slice_us_min);
396
397 let topo = Topology::new().unwrap();
399
400 let smt_enabled = !opts.disable_smt && topo.smt_enabled;
402
403 info!(
404 "{} {} {}",
405 SCHEDULER_NAME,
406 build_id::full_version(env!("CARGO_PKG_VERSION")),
407 if smt_enabled { "SMT on" } else { "SMT off" }
408 );
409
410 info!(
412 "scheduler options: {}",
413 std::env::args().collect::<Vec<_>>().join(" ")
414 );
415
416 if opts.idle_resume_us >= 0 {
417 if !cpu_idle_resume_latency_supported() {
418 warn!("idle resume latency not supported");
419 } else {
420 info!("Setting idle QoS to {} us", opts.idle_resume_us);
421 for cpu in topo.all_cpus.values() {
422 update_cpu_idle_resume_latency(
423 cpu.id,
424 opts.idle_resume_us.try_into().unwrap(),
425 )?;
426 }
427 }
428 }
429
430 let nr_nodes = topo
432 .nodes
433 .values()
434 .filter(|node| !node.all_cpus.is_empty())
435 .count();
436 info!("NUMA nodes: {}", nr_nodes);
437
438 let numa_disabled = opts.disable_numa || nr_nodes == 1;
440 if numa_disabled {
441 info!("Disabling NUMA optimizations");
442 }
443
444 let power_profile = Self::power_profile();
446 let domain =
447 Self::resolve_energy_domain(&opts.primary_domain, power_profile).map_err(|err| {
448 anyhow!(
449 "failed to resolve primary domain '{}': {}",
450 &opts.primary_domain,
451 err
452 )
453 })?;
454
455 let mut skel_builder = BpfSkelBuilder::default();
457 skel_builder.obj_builder.debug(opts.verbose);
458 let open_opts = opts.libbpf.clone().into_bpf_open_opts();
459 let mut skel = scx_ops_open!(skel_builder, open_object, flash_ops, open_opts)?;
460
461 skel.struct_ops.flash_ops_mut().exit_dump_len = opts.exit_dump_len;
462
463 let rodata = skel.maps.rodata_data.as_mut().unwrap();
465 rodata.debug = opts.debug;
466 rodata.smt_enabled = smt_enabled;
467 rodata.numa_disabled = numa_disabled;
468 rodata.rr_sched = opts.rr_sched;
469 rodata.local_pcpu = opts.local_pcpu;
470 rodata.direct_dispatch = opts.direct_dispatch;
471 rodata.sticky_cpu = opts.sticky_cpu;
472 rodata.no_wake_sync = opts.no_wake_sync;
473 rodata.tickless_sched = opts.tickless;
474 rodata.native_priority = opts.native_priority;
475 rodata.slice_lag_scaling = opts.slice_lag_scaling;
476 rodata.builtin_idle = !opts.no_builtin_idle;
477 rodata.slice_max = opts.slice_us * 1000;
478 rodata.slice_min = opts.slice_us_min * 1000;
479 rodata.slice_lag = opts.slice_us_lag * 1000;
480 rodata.run_lag = opts.run_us_lag * 1000;
481 rodata.throttle_ns = opts.throttle_us * 1000;
482 rodata.max_avg_nvcsw = opts.max_avg_nvcsw;
483 rodata.primary_all = domain.weight() == *NR_CPU_IDS;
484
485 rodata.cpu_busy_thresh = if opts.cpu_busy_thresh < 0 {
487 opts.cpu_busy_thresh
488 } else {
489 opts.cpu_busy_thresh * 1024 / 100
490 };
491
492 rodata.local_kthreads = opts.local_kthreads || opts.throttle_us > 0;
495
496 rodata.__COMPAT_SCX_PICK_IDLE_IN_NODE = *compat::SCX_PICK_IDLE_IN_NODE;
498
499 skel.struct_ops.flash_ops_mut().flags = *compat::SCX_OPS_ENQ_EXITING
501 | *compat::SCX_OPS_ENQ_LAST
502 | *compat::SCX_OPS_ENQ_MIGRATION_DISABLED
503 | *compat::SCX_OPS_ALLOW_QUEUED_WAKEUP
504 | if numa_disabled {
505 0
506 } else {
507 *compat::SCX_OPS_BUILTIN_IDLE_PER_NODE
508 };
509 info!(
510 "scheduler flags: {:#x}",
511 skel.struct_ops.flash_ops_mut().flags
512 );
513
514 let mut skel = scx_ops_load!(skel, flash_ops, uei)?;
516
517 Self::init_energy_domain(&mut skel, &domain).map_err(|err| {
519 anyhow!(
520 "failed to initialize primary domain 0x{:x}: {}",
521 domain,
522 err
523 )
524 })?;
525
526 if let Err(err) = Self::init_cpufreq_perf(&mut skel, &opts.primary_domain, opts.cpufreq) {
527 bail!(
528 "failed to initialize cpufreq performance level: error {}",
529 err
530 );
531 }
532
533 if smt_enabled {
535 Self::init_smt_domains(&mut skel, &topo)?;
536 }
537
538 if !opts.disable_l2 {
540 Self::init_l2_cache_domains(&mut skel, &topo)?;
541 }
542 if !opts.disable_l3 {
544 Self::init_l3_cache_domains(&mut skel, &topo)?;
545 }
546
547 let struct_ops = Some(scx_ops_attach!(skel, flash_ops)?);
549 let stats_server = StatsServer::new(stats::server_data()).launch()?;
550
551 Ok(Self {
552 skel,
553 struct_ops,
554 opts,
555 topo,
556 power_profile,
557 stats_server,
558 user_restart: false,
559 })
560 }
561
562 fn enable_primary_cpu(skel: &mut BpfSkel<'_>, cpu: i32) -> Result<(), u32> {
563 let prog = &mut skel.progs.enable_primary_cpu;
564 let mut args = cpu_arg {
565 cpu_id: cpu as c_int,
566 };
567 let input = ProgramInput {
568 context_in: Some(unsafe {
569 std::slice::from_raw_parts_mut(
570 &mut args as *mut _ as *mut u8,
571 std::mem::size_of_val(&args),
572 )
573 }),
574 ..Default::default()
575 };
576 let out = prog.test_run(input).unwrap();
577 if out.return_value != 0 {
578 return Err(out.return_value);
579 }
580
581 Ok(())
582 }
583
584 fn epp_to_cpumask(profile: Powermode) -> Result<Cpumask> {
585 let mut cpus = get_primary_cpus(profile).unwrap_or_default();
586 if cpus.is_empty() {
587 cpus = get_primary_cpus(Powermode::Any).unwrap_or_default();
588 }
589 Cpumask::from_str(&cpus_to_cpumask(&cpus))
590 }
591
592 fn resolve_energy_domain(primary_domain: &str, power_profile: PowerProfile) -> Result<Cpumask> {
593 let domain = match primary_domain {
594 "powersave" => Self::epp_to_cpumask(Powermode::Powersave)?,
595 "performance" => Self::epp_to_cpumask(Powermode::Performance)?,
596 "turbo" => Self::epp_to_cpumask(Powermode::Turbo)?,
597 "auto" => match power_profile {
598 PowerProfile::Powersave => Self::epp_to_cpumask(Powermode::Powersave)?,
599 PowerProfile::Balanced { power: true } => {
600 Self::epp_to_cpumask(Powermode::Powersave)?
601 }
602 PowerProfile::Balanced { power: false }
603 | PowerProfile::Performance
604 | PowerProfile::Unknown => Self::epp_to_cpumask(Powermode::Any)?,
605 },
606 "all" => Self::epp_to_cpumask(Powermode::Any)?,
607 &_ => Cpumask::from_str(primary_domain)?,
608 };
609
610 Ok(domain)
611 }
612
613 fn init_energy_domain(skel: &mut BpfSkel<'_>, domain: &Cpumask) -> Result<()> {
614 info!("primary CPU domain = 0x{:x}", domain);
615
616 if let Err(err) = Self::enable_primary_cpu(skel, -1) {
618 bail!("failed to reset primary domain: error {}", err);
619 }
620
621 for cpu in 0..*NR_CPU_IDS {
623 if domain.test_cpu(cpu) {
624 if let Err(err) = Self::enable_primary_cpu(skel, cpu as i32) {
625 bail!("failed to add CPU {} to primary domain: error {}", cpu, err);
626 }
627 }
628 }
629
630 Ok(())
631 }
632
633 fn init_cpufreq_perf(
635 skel: &mut BpfSkel<'_>,
636 primary_domain: &String,
637 auto: bool,
638 ) -> Result<()> {
639 let perf_lvl: i64 = match primary_domain.as_str() {
642 "powersave" => 0,
643 _ if auto => -1,
644 _ => 1024,
645 };
646 info!(
647 "cpufreq performance level: {}",
648 match perf_lvl {
649 1024 => "max".into(),
650 0 => "min".into(),
651 n if n < 0 => "auto".into(),
652 _ => perf_lvl.to_string(),
653 }
654 );
655 skel.maps.bss_data.as_mut().unwrap().cpufreq_perf_lvl = perf_lvl;
656
657 Ok(())
658 }
659
660 fn power_profile() -> PowerProfile {
661 let profile = fetch_power_profile(true);
662 if profile == PowerProfile::Unknown {
663 fetch_power_profile(false)
664 } else {
665 profile
666 }
667 }
668
669 fn refresh_sched_domain(&mut self) -> bool {
670 if self.power_profile != PowerProfile::Unknown {
671 let power_profile = Self::power_profile();
672 if power_profile != self.power_profile {
673 self.power_profile = power_profile;
674
675 if self.opts.primary_domain == "auto" {
676 return true;
677 }
678 if let Err(err) = Self::init_cpufreq_perf(
679 &mut self.skel,
680 &self.opts.primary_domain,
681 self.opts.cpufreq,
682 ) {
683 warn!("failed to refresh cpufreq performance level: error {}", err);
684 }
685 }
686 }
687
688 false
689 }
690
691 fn enable_sibling_cpu(
692 skel: &mut BpfSkel<'_>,
693 lvl: usize,
694 cpu: usize,
695 sibling_cpu: usize,
696 ) -> Result<(), u32> {
697 let prog = &mut skel.progs.enable_sibling_cpu;
698 let mut args = domain_arg {
699 lvl_id: lvl as c_int,
700 cpu_id: cpu as c_int,
701 sibling_cpu_id: sibling_cpu as c_int,
702 };
703 let input = ProgramInput {
704 context_in: Some(unsafe {
705 std::slice::from_raw_parts_mut(
706 &mut args as *mut _ as *mut u8,
707 std::mem::size_of_val(&args),
708 )
709 }),
710 ..Default::default()
711 };
712 let out = prog.test_run(input).unwrap();
713 if out.return_value != 0 {
714 return Err(out.return_value);
715 }
716
717 Ok(())
718 }
719
720 fn init_smt_domains(skel: &mut BpfSkel<'_>, topo: &Topology) -> Result<(), std::io::Error> {
721 let smt_siblings = topo.sibling_cpus();
722
723 info!("SMT sibling CPUs: {:?}", smt_siblings);
724 for (cpu, sibling_cpu) in smt_siblings.iter().enumerate() {
725 Self::enable_sibling_cpu(skel, 0, cpu, *sibling_cpu as usize).unwrap();
726 }
727
728 Ok(())
729 }
730
731 fn are_smt_siblings(topo: &Topology, cpus: &[usize]) -> bool {
732 if cpus.len() <= 1 {
734 return true;
735 }
736
737 let first_cpu = cpus[0];
739 let smt_siblings = topo.sibling_cpus();
740 cpus.iter().all(|&cpu| {
741 cpu == first_cpu
742 || smt_siblings[cpu] == first_cpu as i32
743 || (smt_siblings[first_cpu] >= 0 && smt_siblings[first_cpu] == cpu as i32)
744 })
745 }
746
747 fn init_cache_domains(
748 skel: &mut BpfSkel<'_>,
749 topo: &Topology,
750 cache_lvl: usize,
751 enable_sibling_cpu_fn: &dyn Fn(&mut BpfSkel<'_>, usize, usize, usize) -> Result<(), u32>,
752 ) -> Result<(), std::io::Error> {
753 let mut cache_id_map: BTreeMap<usize, Vec<usize>> = BTreeMap::new();
755 for core in topo.all_cores.values() {
756 for (cpu_id, cpu) in &core.cpus {
757 let cache_id = match cache_lvl {
758 2 => cpu.l2_id,
759 3 => cpu.llc_id,
760 _ => panic!("invalid cache level {}", cache_lvl),
761 };
762 cache_id_map.entry(cache_id).or_default().push(*cpu_id);
763 }
764 }
765
766 for (cache_id, cpus) in cache_id_map {
768 if cpus.len() <= 1 {
770 continue;
771 }
772
773 if Self::are_smt_siblings(topo, &cpus) {
775 continue;
776 }
777
778 info!(
779 "L{} cache ID {}: sibling CPUs: {:?}",
780 cache_lvl, cache_id, cpus
781 );
782 for cpu in &cpus {
783 for sibling_cpu in &cpus {
784 if enable_sibling_cpu_fn(skel, cache_lvl, *cpu, *sibling_cpu).is_err() {
785 warn!(
786 "L{} cache ID {}: failed to set CPU {} sibling {}",
787 cache_lvl, cache_id, *cpu, *sibling_cpu
788 );
789 }
790 }
791 }
792 }
793
794 Ok(())
795 }
796
797 fn init_l2_cache_domains(
798 skel: &mut BpfSkel<'_>,
799 topo: &Topology,
800 ) -> Result<(), std::io::Error> {
801 Self::init_cache_domains(skel, topo, 2, &|skel, lvl, cpu, sibling_cpu| {
802 Self::enable_sibling_cpu(skel, lvl, cpu, sibling_cpu)
803 })
804 }
805
806 fn init_l3_cache_domains(
807 skel: &mut BpfSkel<'_>,
808 topo: &Topology,
809 ) -> Result<(), std::io::Error> {
810 Self::init_cache_domains(skel, topo, 3, &|skel, lvl, cpu, sibling_cpu| {
811 Self::enable_sibling_cpu(skel, lvl, cpu, sibling_cpu)
812 })
813 }
814
815 fn get_metrics(&self) -> Metrics {
816 let bss_data = self.skel.maps.bss_data.as_ref().unwrap();
817 Metrics {
818 nr_running: bss_data.nr_running,
819 nr_cpus: bss_data.nr_online_cpus,
820 nr_kthread_dispatches: bss_data.nr_kthread_dispatches,
821 nr_direct_dispatches: bss_data.nr_direct_dispatches,
822 nr_shared_dispatches: bss_data.nr_shared_dispatches,
823 }
824 }
825
826 pub fn exited(&mut self) -> bool {
827 uei_exited!(&self.skel, uei)
828 }
829
830 fn compute_user_cpu_pct(prev: &CpuTimes, curr: &CpuTimes) -> Option<u64> {
831 let total_diff = curr.total.saturating_sub(prev.total);
832 let user_diff = (curr.user + curr.nice).saturating_sub(prev.user + prev.nice);
833
834 if total_diff > 0 {
835 let user_ratio = user_diff as f64 / total_diff as f64;
836 Some((user_ratio * 1024.0).round() as u64)
837 } else {
838 None
839 }
840 }
841
842 fn read_cpu_times() -> Option<CpuTimes> {
843 let file = File::open("/proc/stat").ok()?;
844 let reader = BufReader::new(file);
845
846 for line in reader.lines() {
847 let line = line.ok()?;
848 if line.starts_with("cpu ") {
849 let fields: Vec<&str> = line.split_whitespace().collect();
850 if fields.len() < 5 {
851 return None;
852 }
853
854 let user: u64 = fields[1].parse().ok()?;
855 let nice: u64 = fields[2].parse().ok()?;
856
857 let total: u64 = fields
859 .iter()
860 .skip(1)
861 .take(8)
862 .filter_map(|v| v.parse::<u64>().ok())
863 .sum();
864
865 return Some(CpuTimes { user, nice, total });
866 }
867 }
868
869 None
870 }
871
872 fn run(&mut self, shutdown: Arc<AtomicBool>) -> Result<UserExitInfo> {
873 let mut prev_cputime = Self::read_cpu_times().expect("Failed to read initial CPU stats");
874 let (res_ch, req_ch) = self.stats_server.channels();
875
876 while !shutdown.load(Ordering::Relaxed) && !self.exited() {
877 if self.refresh_sched_domain() {
878 self.user_restart = true;
879 break;
880 }
881
882 if self.opts.cpu_busy_thresh < 0 {
883 if let Some(curr_cputime) = Self::read_cpu_times() {
884 if let Some(cpu_util) = Self::compute_user_cpu_pct(&prev_cputime, &curr_cputime)
885 {
886 self.skel.maps.bss_data.as_mut().unwrap().cpu_util = cpu_util;
887 }
888 prev_cputime = curr_cputime;
889 }
890 }
891
892 match req_ch.recv_timeout(Duration::from_secs(1)) {
893 Ok(()) => res_ch.send(self.get_metrics())?,
894 Err(RecvTimeoutError::Timeout) => {}
895 Err(e) => Err(e)?,
896 }
897 }
898
899 let _ = self.struct_ops.take();
900 uei_report!(&self.skel, uei)
901 }
902}
903
904impl Drop for Scheduler<'_> {
905 fn drop(&mut self) {
906 info!("Unregister {SCHEDULER_NAME} scheduler");
907
908 if self.opts.idle_resume_us >= 0 {
910 if cpu_idle_resume_latency_supported() {
911 for cpu in self.topo.all_cpus.values() {
912 update_cpu_idle_resume_latency(cpu.id, cpu.pm_qos_resume_latency_us as i32)
913 .unwrap();
914 }
915 }
916 }
917 }
918}
919
920fn main() -> Result<()> {
921 let opts = Opts::parse();
922
923 if opts.version {
924 println!(
925 "{} {}",
926 SCHEDULER_NAME,
927 build_id::full_version(env!("CARGO_PKG_VERSION"))
928 );
929 return Ok(());
930 }
931
932 if opts.help_stats {
933 stats::server_data().describe_meta(&mut std::io::stdout(), None)?;
934 return Ok(());
935 }
936
937 let loglevel = simplelog::LevelFilter::Info;
938
939 let mut lcfg = simplelog::ConfigBuilder::new();
940 lcfg.set_time_offset_to_local()
941 .expect("Failed to set local time offset")
942 .set_time_level(simplelog::LevelFilter::Error)
943 .set_location_level(simplelog::LevelFilter::Off)
944 .set_target_level(simplelog::LevelFilter::Off)
945 .set_thread_level(simplelog::LevelFilter::Off);
946 simplelog::TermLogger::init(
947 loglevel,
948 lcfg.build(),
949 simplelog::TerminalMode::Stderr,
950 simplelog::ColorChoice::Auto,
951 )?;
952
953 let shutdown = Arc::new(AtomicBool::new(false));
954 let shutdown_clone = shutdown.clone();
955 ctrlc::set_handler(move || {
956 shutdown_clone.store(true, Ordering::Relaxed);
957 })
958 .context("Error setting Ctrl-C handler")?;
959
960 if let Some(intv) = opts.monitor.or(opts.stats) {
961 let shutdown_copy = shutdown.clone();
962 let jh = std::thread::spawn(move || {
963 match stats::monitor(Duration::from_secs_f64(intv), shutdown_copy) {
964 Ok(_) => {
965 debug!("stats monitor thread finished successfully")
966 }
967 Err(error_object) => {
968 warn!(
969 "stats monitor thread finished because of an error {}",
970 error_object
971 )
972 }
973 }
974 });
975 if opts.monitor.is_some() {
976 let _ = jh.join();
977 return Ok(());
978 }
979 }
980
981 let mut open_object = MaybeUninit::uninit();
982 loop {
983 let mut sched = Scheduler::init(&opts, &mut open_object)?;
984 if !sched.run(shutdown.clone())?.should_restart() {
985 if sched.user_restart {
986 continue;
987 }
988 break;
989 }
990 }
991
992 Ok(())
993}