1mod bpf_skel;
9pub use bpf_skel::*;
10pub mod bpf_intf;
11pub use bpf_intf::*;
12
13mod stats;
14use std::collections::BTreeMap;
15use std::ffi::c_int;
16use std::fmt::Write;
17use std::fs::File;
18use std::io::{BufRead, BufReader};
19use std::mem::MaybeUninit;
20use std::sync::atomic::AtomicBool;
21use std::sync::atomic::Ordering;
22use std::sync::Arc;
23use std::time::Duration;
24
25use anyhow::anyhow;
26use anyhow::bail;
27use anyhow::Context;
28use anyhow::Result;
29use clap::Parser;
30use crossbeam::channel::RecvTimeoutError;
31use libbpf_rs::OpenObject;
32use libbpf_rs::ProgramInput;
33use log::{debug, info, warn};
34use scx_stats::prelude::*;
35use scx_utils::autopower::{fetch_power_profile, PowerProfile};
36use scx_utils::build_id;
37use scx_utils::compat;
38use scx_utils::pm::{cpu_idle_resume_latency_supported, update_cpu_idle_resume_latency};
39use scx_utils::scx_ops_attach;
40use scx_utils::scx_ops_load;
41use scx_utils::scx_ops_open;
42use scx_utils::set_rlimit_infinity;
43use scx_utils::uei_exited;
44use scx_utils::uei_report;
45use scx_utils::CoreType;
46use scx_utils::Cpumask;
47use scx_utils::Topology;
48use scx_utils::UserExitInfo;
49use scx_utils::NR_CPU_IDS;
50use stats::Metrics;
51
52const SCHEDULER_NAME: &str = "scx_flash";
53
54#[derive(PartialEq)]
55enum Powermode {
56 Turbo,
57 Performance,
58 Powersave,
59 Any,
60}
61
62fn get_primary_cpus(mode: Powermode) -> std::io::Result<Vec<usize>> {
63 let topo = Topology::new().unwrap();
64
65 let cpus: Vec<usize> = topo
66 .all_cores
67 .values()
68 .flat_map(|core| &core.cpus)
69 .filter_map(|(cpu_id, cpu)| match (&mode, &cpu.core_type) {
70 (Powermode::Turbo, CoreType::Big { turbo: true }) |
72 (Powermode::Performance, CoreType::Big { .. }) |
74 (Powermode::Powersave, CoreType::Little) => Some(*cpu_id),
76 (Powermode::Any, ..) => Some(*cpu_id),
77 _ => None,
78 })
79 .collect();
80
81 Ok(cpus)
82}
83
84fn cpus_to_cpumask(cpus: &Vec<usize>) -> String {
86 if cpus.is_empty() {
87 return String::from("none");
88 }
89
90 let max_cpu_id = *cpus.iter().max().unwrap();
92
93 let mut bitmask = vec![0u8; (max_cpu_id + 1 + 7) / 8];
95
96 for cpu_id in cpus {
98 let byte_index = cpu_id / 8;
99 let bit_index = cpu_id % 8;
100 bitmask[byte_index] |= 1 << bit_index;
101 }
102
103 let hex_str: String = bitmask.iter().rev().fold(String::new(), |mut f, byte| {
105 let _ = write!(&mut f, "{:02x}", byte);
106 f
107 });
108
109 format!("0x{}", hex_str)
110}
111
112#[derive(Debug, clap::Parser)]
113#[command(
114 name = "scx_flash",
115 version,
116 disable_version_flag = true,
117 about = "A deadline-based scheduler focused on fairness and performance predictability.",
118 long_about = r#"
119scx_flash is scheduler that focuses on ensuring fairness and performance predictability.
120
121It operates using an earliest deadline first (EDF) policy. The deadline of each task deadline is
122defined as:
123
124 deadline = vruntime + exec_vruntime
125
126Here, `vruntime` represents the task's total accumulated runtime, inversely scaled by its weight,
127while `exec_vruntime` accounts for the scaled runtime accumulated since the last sleep event.
128
129Fairness is driven by `vruntime`, while `exec_vruntime` helps prioritize latency-sensitive tasks
130that sleep frequently and use the CPU in short bursts.
131
132To prevent sleeping tasks from gaining excessive priority, the maximum vruntime credit a task can
133accumulate while sleeping is capped by `slice_lag`, scaled by the task’s voluntary context switch
134rate (`max_avg_nvcsw`): tasks that sleep frequently can receive a larger credit, while tasks that
135perform fewer, longer sleeps are granted a smaller credit. This encourages responsive behavior
136without excessively boosting idle tasks.
137
138When dynamic fairness is enabled (`--slice-lag-scaling`), the maximum vruntime sleep credit is also
139scaled depending on the user-mode CPU utilization:
140
141 - At low utilization (mostly idle system), the impact of `vruntime` is reduced, and scheduling
142 decisions are driven primarily by `exec_vruntime`. This favors bursty, latency-sensitive
143 workloads (i.e., hackbench), improving their performance and latency.
144
145 - At high utilization, sleeping tasks regain their vruntime credit, increasing the influence of
146 `vruntime` in deadline calculation. This restores fairness and ensures system responsiveness
147 under load.
148
149This adaptive behavior allows the scheduler to prioritize intense message-passing workloads when
150the system is lightly loaded, while maintaining fairness and responsiveness when the system is
151saturated or overcommitted.
152"#
153)]
154struct Opts {
155 #[clap(long, default_value = "0")]
157 exit_dump_len: u32,
158
159 #[clap(short = 's', long, default_value = "4096")]
161 slice_us: u64,
162
163 #[clap(short = 'S', long, default_value = "128")]
165 slice_us_min: u64,
166
167 #[clap(short = 'l', long, default_value = "4096")]
172 slice_us_lag: u64,
173
174 #[clap(short = 'L', long, action = clap::ArgAction::SetTrue)]
179 slice_lag_scaling: bool,
180
181 #[clap(short = 'r', long, default_value = "32768")]
186 run_us_lag: u64,
187
188 #[clap(short = 'c', long, default_value = "128")]
197 max_avg_nvcsw: u64,
198
199 #[clap(short = 'C', long, allow_hyphen_values = true, default_value = "-1")]
210 cpu_busy_thresh: i64,
211
212 #[clap(short = 't', long, default_value = "0")]
217 throttle_us: u64,
218
219 #[clap(short = 'I', long, allow_hyphen_values = true, default_value = "32")]
225 idle_resume_us: i64,
226
227 #[clap(short = 'T', long, action = clap::ArgAction::SetTrue)]
233 tickless: bool,
234
235 #[clap(short = 'R', long, action = clap::ArgAction::SetTrue)]
240 rr_sched: bool,
241
242 #[clap(short = 'b', long, action = clap::ArgAction::SetTrue)]
247 no_builtin_idle: bool,
248
249 #[clap(short = 'p', long, action = clap::ArgAction::SetTrue)]
257 local_pcpu: bool,
258
259 #[clap(short = 'D', long, action = clap::ArgAction::SetTrue)]
270 direct_dispatch: bool,
271
272 #[clap(short = 'y', long, action = clap::ArgAction::SetTrue)]
280 sticky_cpu: bool,
281
282 #[clap(short = 'n', long, action = clap::ArgAction::SetTrue)]
288 native_priority: bool,
289
290 #[clap(short = 'k', long, action = clap::ArgAction::SetTrue)]
296 local_kthreads: bool,
297
298 #[clap(short = 'w', long, action = clap::ArgAction::SetTrue)]
305 no_wake_sync: bool,
306
307 #[clap(short = 'm', long, default_value = "auto")]
319 primary_domain: String,
320
321 #[clap(long, action = clap::ArgAction::SetTrue)]
323 disable_l2: bool,
324
325 #[clap(long, action = clap::ArgAction::SetTrue)]
327 disable_l3: bool,
328
329 #[clap(long, action = clap::ArgAction::SetTrue)]
331 disable_smt: bool,
332
333 #[clap(long, action = clap::ArgAction::SetTrue)]
335 disable_numa: bool,
336
337 #[clap(short = 'f', long, action = clap::ArgAction::SetTrue)]
341 cpufreq: bool,
342
343 #[clap(long)]
345 stats: Option<f64>,
346
347 #[clap(long)]
350 monitor: Option<f64>,
351
352 #[clap(short = 'd', long, action = clap::ArgAction::SetTrue)]
354 debug: bool,
355
356 #[clap(short = 'v', long, action = clap::ArgAction::SetTrue)]
358 verbose: bool,
359
360 #[clap(short = 'V', long, action = clap::ArgAction::SetTrue)]
362 version: bool,
363
364 #[clap(long)]
366 help_stats: bool,
367}
368
369#[derive(Debug, Clone, Copy)]
370struct CpuTimes {
371 user: u64,
372 nice: u64,
373 total: u64,
374}
375
376struct Scheduler<'a> {
377 skel: BpfSkel<'a>,
378 struct_ops: Option<libbpf_rs::Link>,
379 opts: &'a Opts,
380 topo: Topology,
381 power_profile: PowerProfile,
382 stats_server: StatsServer<(), Metrics>,
383 user_restart: bool,
384}
385
386impl<'a> Scheduler<'a> {
387 fn init(opts: &'a Opts, open_object: &'a mut MaybeUninit<OpenObject>) -> Result<Self> {
388 set_rlimit_infinity();
389
390 assert!(opts.slice_us >= opts.slice_us_min);
392
393 let topo = Topology::new().unwrap();
395
396 let smt_enabled = !opts.disable_smt && topo.smt_enabled;
398
399 info!(
400 "{} {} {}",
401 SCHEDULER_NAME,
402 build_id::full_version(env!("CARGO_PKG_VERSION")),
403 if smt_enabled { "SMT on" } else { "SMT off" }
404 );
405
406 info!(
408 "scheduler options: {}",
409 std::env::args().collect::<Vec<_>>().join(" ")
410 );
411
412 if opts.idle_resume_us >= 0 {
413 if !cpu_idle_resume_latency_supported() {
414 warn!("idle resume latency not supported");
415 } else {
416 info!("Setting idle QoS to {} us", opts.idle_resume_us);
417 for cpu in topo.all_cpus.values() {
418 update_cpu_idle_resume_latency(
419 cpu.id,
420 opts.idle_resume_us.try_into().unwrap(),
421 )?;
422 }
423 }
424 }
425
426 let nr_nodes = topo
428 .nodes
429 .values()
430 .filter(|node| !node.all_cpus.is_empty())
431 .count();
432 info!("NUMA nodes: {}", nr_nodes);
433
434 let numa_disabled = opts.disable_numa || nr_nodes == 1;
436 if numa_disabled {
437 info!("Disabling NUMA optimizations");
438 }
439
440 let power_profile = Self::power_profile();
442 let domain =
443 Self::resolve_energy_domain(&opts.primary_domain, power_profile).map_err(|err| {
444 anyhow!(
445 "failed to resolve primary domain '{}': {}",
446 &opts.primary_domain,
447 err
448 )
449 })?;
450
451 let mut skel_builder = BpfSkelBuilder::default();
453 skel_builder.obj_builder.debug(opts.verbose);
454 let mut skel = scx_ops_open!(skel_builder, open_object, flash_ops)?;
455
456 skel.struct_ops.flash_ops_mut().exit_dump_len = opts.exit_dump_len;
457
458 let rodata = skel.maps.rodata_data.as_mut().unwrap();
460 rodata.debug = opts.debug;
461 rodata.smt_enabled = smt_enabled;
462 rodata.numa_disabled = numa_disabled;
463 rodata.rr_sched = opts.rr_sched;
464 rodata.local_pcpu = opts.local_pcpu;
465 rodata.direct_dispatch = opts.direct_dispatch;
466 rodata.sticky_cpu = opts.sticky_cpu;
467 rodata.no_wake_sync = opts.no_wake_sync;
468 rodata.tickless_sched = opts.tickless;
469 rodata.native_priority = opts.native_priority;
470 rodata.slice_lag_scaling = opts.slice_lag_scaling;
471 rodata.builtin_idle = !opts.no_builtin_idle;
472 rodata.slice_max = opts.slice_us * 1000;
473 rodata.slice_min = opts.slice_us_min * 1000;
474 rodata.slice_lag = opts.slice_us_lag * 1000;
475 rodata.run_lag = opts.run_us_lag * 1000;
476 rodata.throttle_ns = opts.throttle_us * 1000;
477 rodata.max_avg_nvcsw = opts.max_avg_nvcsw;
478 rodata.primary_all = domain.weight() == *NR_CPU_IDS;
479
480 rodata.cpu_busy_thresh = if opts.cpu_busy_thresh < 0 {
482 opts.cpu_busy_thresh
483 } else {
484 opts.cpu_busy_thresh * 1024 / 100
485 };
486
487 rodata.local_kthreads = opts.local_kthreads || opts.throttle_us > 0;
490
491 rodata.__COMPAT_SCX_PICK_IDLE_IN_NODE = *compat::SCX_PICK_IDLE_IN_NODE;
493
494 skel.struct_ops.flash_ops_mut().flags = *compat::SCX_OPS_ENQ_EXITING
496 | *compat::SCX_OPS_ENQ_LAST
497 | *compat::SCX_OPS_ENQ_MIGRATION_DISABLED
498 | *compat::SCX_OPS_ALLOW_QUEUED_WAKEUP
499 | if numa_disabled {
500 0
501 } else {
502 *compat::SCX_OPS_BUILTIN_IDLE_PER_NODE
503 };
504 info!(
505 "scheduler flags: {:#x}",
506 skel.struct_ops.flash_ops_mut().flags
507 );
508
509 let mut skel = scx_ops_load!(skel, flash_ops, uei)?;
511
512 Self::init_energy_domain(&mut skel, &domain).map_err(|err| {
514 anyhow!(
515 "failed to initialize primary domain 0x{:x}: {}",
516 domain,
517 err
518 )
519 })?;
520
521 if let Err(err) = Self::init_cpufreq_perf(&mut skel, &opts.primary_domain, opts.cpufreq) {
522 bail!(
523 "failed to initialize cpufreq performance level: error {}",
524 err
525 );
526 }
527
528 if smt_enabled {
530 Self::init_smt_domains(&mut skel, &topo)?;
531 }
532
533 if !opts.disable_l2 {
535 Self::init_l2_cache_domains(&mut skel, &topo)?;
536 }
537 if !opts.disable_l3 {
539 Self::init_l3_cache_domains(&mut skel, &topo)?;
540 }
541
542 let struct_ops = Some(scx_ops_attach!(skel, flash_ops)?);
544 let stats_server = StatsServer::new(stats::server_data()).launch()?;
545
546 Ok(Self {
547 skel,
548 struct_ops,
549 opts,
550 topo,
551 power_profile,
552 stats_server,
553 user_restart: false,
554 })
555 }
556
557 fn enable_primary_cpu(skel: &mut BpfSkel<'_>, cpu: i32) -> Result<(), u32> {
558 let prog = &mut skel.progs.enable_primary_cpu;
559 let mut args = cpu_arg {
560 cpu_id: cpu as c_int,
561 };
562 let input = ProgramInput {
563 context_in: Some(unsafe {
564 std::slice::from_raw_parts_mut(
565 &mut args as *mut _ as *mut u8,
566 std::mem::size_of_val(&args),
567 )
568 }),
569 ..Default::default()
570 };
571 let out = prog.test_run(input).unwrap();
572 if out.return_value != 0 {
573 return Err(out.return_value);
574 }
575
576 Ok(())
577 }
578
579 fn epp_to_cpumask(profile: Powermode) -> Result<Cpumask> {
580 let mut cpus = get_primary_cpus(profile).unwrap_or_default();
581 if cpus.is_empty() {
582 cpus = get_primary_cpus(Powermode::Any).unwrap_or_default();
583 }
584 Cpumask::from_str(&cpus_to_cpumask(&cpus))
585 }
586
587 fn resolve_energy_domain(primary_domain: &str, power_profile: PowerProfile) -> Result<Cpumask> {
588 let domain = match primary_domain {
589 "powersave" => Self::epp_to_cpumask(Powermode::Powersave)?,
590 "performance" => Self::epp_to_cpumask(Powermode::Performance)?,
591 "turbo" => Self::epp_to_cpumask(Powermode::Turbo)?,
592 "auto" => match power_profile {
593 PowerProfile::Powersave => Self::epp_to_cpumask(Powermode::Powersave)?,
594 PowerProfile::Balanced { power: true } => {
595 Self::epp_to_cpumask(Powermode::Powersave)?
596 }
597 PowerProfile::Balanced { power: false }
598 | PowerProfile::Performance
599 | PowerProfile::Unknown => Self::epp_to_cpumask(Powermode::Any)?,
600 },
601 "all" => Self::epp_to_cpumask(Powermode::Any)?,
602 &_ => Cpumask::from_str(primary_domain)?,
603 };
604
605 Ok(domain)
606 }
607
608 fn init_energy_domain(skel: &mut BpfSkel<'_>, domain: &Cpumask) -> Result<()> {
609 info!("primary CPU domain = 0x{:x}", domain);
610
611 if let Err(err) = Self::enable_primary_cpu(skel, -1) {
613 bail!("failed to reset primary domain: error {}", err);
614 }
615
616 for cpu in 0..*NR_CPU_IDS {
618 if domain.test_cpu(cpu) {
619 if let Err(err) = Self::enable_primary_cpu(skel, cpu as i32) {
620 bail!("failed to add CPU {} to primary domain: error {}", cpu, err);
621 }
622 }
623 }
624
625 Ok(())
626 }
627
628 fn init_cpufreq_perf(
630 skel: &mut BpfSkel<'_>,
631 primary_domain: &String,
632 auto: bool,
633 ) -> Result<()> {
634 let perf_lvl: i64 = match primary_domain.as_str() {
637 "powersave" => 0,
638 _ if auto => -1,
639 _ => 1024,
640 };
641 info!(
642 "cpufreq performance level: {}",
643 match perf_lvl {
644 1024 => "max".into(),
645 0 => "min".into(),
646 n if n < 0 => "auto".into(),
647 _ => perf_lvl.to_string(),
648 }
649 );
650 skel.maps.bss_data.as_mut().unwrap().cpufreq_perf_lvl = perf_lvl;
651
652 Ok(())
653 }
654
655 fn power_profile() -> PowerProfile {
656 let profile = fetch_power_profile(true);
657 if profile == PowerProfile::Unknown {
658 fetch_power_profile(false)
659 } else {
660 profile
661 }
662 }
663
664 fn refresh_sched_domain(&mut self) -> bool {
665 if self.power_profile != PowerProfile::Unknown {
666 let power_profile = Self::power_profile();
667 if power_profile != self.power_profile {
668 self.power_profile = power_profile;
669
670 if self.opts.primary_domain == "auto" {
671 return true;
672 }
673 if let Err(err) = Self::init_cpufreq_perf(
674 &mut self.skel,
675 &self.opts.primary_domain,
676 self.opts.cpufreq,
677 ) {
678 warn!("failed to refresh cpufreq performance level: error {}", err);
679 }
680 }
681 }
682
683 false
684 }
685
686 fn enable_sibling_cpu(
687 skel: &mut BpfSkel<'_>,
688 lvl: usize,
689 cpu: usize,
690 sibling_cpu: usize,
691 ) -> Result<(), u32> {
692 let prog = &mut skel.progs.enable_sibling_cpu;
693 let mut args = domain_arg {
694 lvl_id: lvl as c_int,
695 cpu_id: cpu as c_int,
696 sibling_cpu_id: sibling_cpu as c_int,
697 };
698 let input = ProgramInput {
699 context_in: Some(unsafe {
700 std::slice::from_raw_parts_mut(
701 &mut args as *mut _ as *mut u8,
702 std::mem::size_of_val(&args),
703 )
704 }),
705 ..Default::default()
706 };
707 let out = prog.test_run(input).unwrap();
708 if out.return_value != 0 {
709 return Err(out.return_value);
710 }
711
712 Ok(())
713 }
714
715 fn init_smt_domains(skel: &mut BpfSkel<'_>, topo: &Topology) -> Result<(), std::io::Error> {
716 let smt_siblings = topo.sibling_cpus();
717
718 info!("SMT sibling CPUs: {:?}", smt_siblings);
719 for (cpu, sibling_cpu) in smt_siblings.iter().enumerate() {
720 Self::enable_sibling_cpu(skel, 0, cpu, *sibling_cpu as usize).unwrap();
721 }
722
723 Ok(())
724 }
725
726 fn are_smt_siblings(topo: &Topology, cpus: &[usize]) -> bool {
727 if cpus.len() <= 1 {
729 return true;
730 }
731
732 let first_cpu = cpus[0];
734 let smt_siblings = topo.sibling_cpus();
735 cpus.iter().all(|&cpu| {
736 cpu == first_cpu
737 || smt_siblings[cpu] == first_cpu as i32
738 || (smt_siblings[first_cpu] >= 0 && smt_siblings[first_cpu] == cpu as i32)
739 })
740 }
741
742 fn init_cache_domains(
743 skel: &mut BpfSkel<'_>,
744 topo: &Topology,
745 cache_lvl: usize,
746 enable_sibling_cpu_fn: &dyn Fn(&mut BpfSkel<'_>, usize, usize, usize) -> Result<(), u32>,
747 ) -> Result<(), std::io::Error> {
748 let mut cache_id_map: BTreeMap<usize, Vec<usize>> = BTreeMap::new();
750 for core in topo.all_cores.values() {
751 for (cpu_id, cpu) in &core.cpus {
752 let cache_id = match cache_lvl {
753 2 => cpu.l2_id,
754 3 => cpu.llc_id,
755 _ => panic!("invalid cache level {}", cache_lvl),
756 };
757 cache_id_map.entry(cache_id).or_default().push(*cpu_id);
758 }
759 }
760
761 for (cache_id, cpus) in cache_id_map {
763 if cpus.len() <= 1 {
765 continue;
766 }
767
768 if Self::are_smt_siblings(topo, &cpus) {
770 continue;
771 }
772
773 info!(
774 "L{} cache ID {}: sibling CPUs: {:?}",
775 cache_lvl, cache_id, cpus
776 );
777 for cpu in &cpus {
778 for sibling_cpu in &cpus {
779 if enable_sibling_cpu_fn(skel, cache_lvl, *cpu, *sibling_cpu).is_err() {
780 warn!(
781 "L{} cache ID {}: failed to set CPU {} sibling {}",
782 cache_lvl, cache_id, *cpu, *sibling_cpu
783 );
784 }
785 }
786 }
787 }
788
789 Ok(())
790 }
791
792 fn init_l2_cache_domains(
793 skel: &mut BpfSkel<'_>,
794 topo: &Topology,
795 ) -> Result<(), std::io::Error> {
796 Self::init_cache_domains(skel, topo, 2, &|skel, lvl, cpu, sibling_cpu| {
797 Self::enable_sibling_cpu(skel, lvl, cpu, sibling_cpu)
798 })
799 }
800
801 fn init_l3_cache_domains(
802 skel: &mut BpfSkel<'_>,
803 topo: &Topology,
804 ) -> Result<(), std::io::Error> {
805 Self::init_cache_domains(skel, topo, 3, &|skel, lvl, cpu, sibling_cpu| {
806 Self::enable_sibling_cpu(skel, lvl, cpu, sibling_cpu)
807 })
808 }
809
810 fn get_metrics(&self) -> Metrics {
811 let bss_data = self.skel.maps.bss_data.as_ref().unwrap();
812 Metrics {
813 nr_running: bss_data.nr_running,
814 nr_cpus: bss_data.nr_online_cpus,
815 nr_kthread_dispatches: bss_data.nr_kthread_dispatches,
816 nr_direct_dispatches: bss_data.nr_direct_dispatches,
817 nr_shared_dispatches: bss_data.nr_shared_dispatches,
818 }
819 }
820
821 pub fn exited(&mut self) -> bool {
822 uei_exited!(&self.skel, uei)
823 }
824
825 fn compute_user_cpu_pct(prev: &CpuTimes, curr: &CpuTimes) -> Option<u64> {
826 let total_diff = curr.total.saturating_sub(prev.total);
827 let user_diff = (curr.user + curr.nice).saturating_sub(prev.user + prev.nice);
828
829 if total_diff > 0 {
830 let user_ratio = user_diff as f64 / total_diff as f64;
831 Some((user_ratio * 1024.0).round() as u64)
832 } else {
833 None
834 }
835 }
836
837 fn read_cpu_times() -> Option<CpuTimes> {
838 let file = File::open("/proc/stat").ok()?;
839 let reader = BufReader::new(file);
840
841 for line in reader.lines() {
842 let line = line.ok()?;
843 if line.starts_with("cpu ") {
844 let fields: Vec<&str> = line.split_whitespace().collect();
845 if fields.len() < 5 {
846 return None;
847 }
848
849 let user: u64 = fields[1].parse().ok()?;
850 let nice: u64 = fields[2].parse().ok()?;
851
852 let total: u64 = fields
854 .iter()
855 .skip(1)
856 .take(8)
857 .filter_map(|v| v.parse::<u64>().ok())
858 .sum();
859
860 return Some(CpuTimes { user, nice, total });
861 }
862 }
863
864 None
865 }
866
867 fn run(&mut self, shutdown: Arc<AtomicBool>) -> Result<UserExitInfo> {
868 let mut prev_cputime = Self::read_cpu_times().expect("Failed to read initial CPU stats");
869 let (res_ch, req_ch) = self.stats_server.channels();
870
871 while !shutdown.load(Ordering::Relaxed) && !self.exited() {
872 if self.refresh_sched_domain() {
873 self.user_restart = true;
874 break;
875 }
876
877 if self.opts.cpu_busy_thresh < 0 {
878 if let Some(curr_cputime) = Self::read_cpu_times() {
879 if let Some(cpu_util) = Self::compute_user_cpu_pct(&prev_cputime, &curr_cputime)
880 {
881 self.skel.maps.bss_data.as_mut().unwrap().cpu_util = cpu_util;
882 }
883 prev_cputime = curr_cputime;
884 }
885 }
886
887 match req_ch.recv_timeout(Duration::from_secs(1)) {
888 Ok(()) => res_ch.send(self.get_metrics())?,
889 Err(RecvTimeoutError::Timeout) => {}
890 Err(e) => Err(e)?,
891 }
892 }
893
894 let _ = self.struct_ops.take();
895 uei_report!(&self.skel, uei)
896 }
897}
898
899impl Drop for Scheduler<'_> {
900 fn drop(&mut self) {
901 info!("Unregister {} scheduler", SCHEDULER_NAME);
902
903 if self.opts.idle_resume_us >= 0 {
905 if cpu_idle_resume_latency_supported() {
906 for cpu in self.topo.all_cpus.values() {
907 update_cpu_idle_resume_latency(cpu.id, cpu.pm_qos_resume_latency_us as i32)
908 .unwrap();
909 }
910 }
911 }
912 }
913}
914
915fn main() -> Result<()> {
916 let opts = Opts::parse();
917
918 if opts.version {
919 println!(
920 "{} {}",
921 SCHEDULER_NAME,
922 build_id::full_version(env!("CARGO_PKG_VERSION"))
923 );
924 return Ok(());
925 }
926
927 if opts.help_stats {
928 stats::server_data().describe_meta(&mut std::io::stdout(), None)?;
929 return Ok(());
930 }
931
932 let loglevel = simplelog::LevelFilter::Info;
933
934 let mut lcfg = simplelog::ConfigBuilder::new();
935 lcfg.set_time_offset_to_local()
936 .expect("Failed to set local time offset")
937 .set_time_level(simplelog::LevelFilter::Error)
938 .set_location_level(simplelog::LevelFilter::Off)
939 .set_target_level(simplelog::LevelFilter::Off)
940 .set_thread_level(simplelog::LevelFilter::Off);
941 simplelog::TermLogger::init(
942 loglevel,
943 lcfg.build(),
944 simplelog::TerminalMode::Stderr,
945 simplelog::ColorChoice::Auto,
946 )?;
947
948 let shutdown = Arc::new(AtomicBool::new(false));
949 let shutdown_clone = shutdown.clone();
950 ctrlc::set_handler(move || {
951 shutdown_clone.store(true, Ordering::Relaxed);
952 })
953 .context("Error setting Ctrl-C handler")?;
954
955 if let Some(intv) = opts.monitor.or(opts.stats) {
956 let shutdown_copy = shutdown.clone();
957 let jh = std::thread::spawn(move || {
958 match stats::monitor(Duration::from_secs_f64(intv), shutdown_copy) {
959 Ok(_) => {
960 debug!("stats monitor thread finished successfully")
961 }
962 Err(error_object) => {
963 warn!(
964 "stats monitor thread finished because of an error {}",
965 error_object
966 )
967 }
968 }
969 });
970 if opts.monitor.is_some() {
971 let _ = jh.join();
972 return Ok(());
973 }
974 }
975
976 let mut open_object = MaybeUninit::uninit();
977 loop {
978 let mut sched = Scheduler::init(&opts, &mut open_object)?;
979 if !sched.run(shutdown.clone())?.should_restart() {
980 if sched.user_restart {
981 continue;
982 }
983 break;
984 }
985 }
986
987 Ok(())
988}