1mod bpf_skel;
10pub use bpf_skel::*;
11pub mod bpf_intf;
12pub use bpf_intf::*;
13
14mod stats;
15use std::cell::Cell;
16use std::cell::RefCell;
17use std::collections::BTreeMap;
18use std::ffi::c_int;
19use std::ffi::CStr;
20use std::fmt;
21use std::mem;
22use std::mem::MaybeUninit;
23use std::str;
24use std::sync::atomic::AtomicBool;
25use std::sync::atomic::Ordering;
26use std::sync::Arc;
27use std::thread::ThreadId;
28use std::time::Duration;
29
30use anyhow::Context;
31use anyhow::Result;
32use clap::Parser;
33use clap_num::number_range;
34use crossbeam::channel;
35use crossbeam::channel::Receiver;
36use crossbeam::channel::RecvTimeoutError;
37use crossbeam::channel::Sender;
38use crossbeam::channel::TrySendError;
39use itertools::iproduct;
40use libbpf_rs::OpenObject;
41use libbpf_rs::ProgramInput;
42use libc::c_char;
43use log::debug;
44use log::info;
45use plain::Plain;
46use scx_stats::prelude::*;
47use scx_utils::autopower::{fetch_power_profile, PowerProfile};
48use scx_utils::build_id;
49use scx_utils::compat;
50use scx_utils::read_cpulist;
51use scx_utils::scx_ops_attach;
52use scx_utils::scx_ops_load;
53use scx_utils::scx_ops_open;
54use scx_utils::set_rlimit_infinity;
55use scx_utils::uei_exited;
56use scx_utils::uei_report;
57use scx_utils::CoreType;
58use scx_utils::Cpumask;
59use scx_utils::EnergyModel;
60use scx_utils::Topology;
61use scx_utils::UserExitInfo;
62use scx_utils::NR_CPU_IDS;
63use stats::SchedSample;
64use stats::SchedSamples;
65use stats::StatsReq;
66use stats::StatsRes;
67use stats::SysStats;
68
69#[derive(Debug, Parser)]
75struct Opts {
76 #[clap(long = "autopilot", action = clap::ArgAction::SetTrue)]
83 autopilot: bool,
84
85 #[clap(long = "autopower", action = clap::ArgAction::SetTrue)]
92 autopower: bool,
93
94 #[clap(long = "performance", action = clap::ArgAction::SetTrue)]
99 performance: bool,
100
101 #[clap(long = "powersave", action = clap::ArgAction::SetTrue)]
106 powersave: bool,
107
108 #[clap(long = "balanced", action = clap::ArgAction::SetTrue)]
113 balanced: bool,
114
115 #[clap(long = "slice-max-us", default_value = "5000")]
117 slice_max_us: u64,
118
119 #[clap(long = "slice-min-us", default_value = "500")]
121 slice_min_us: u64,
122
123 #[clap(long = "preempt-shift", default_value = "6", value_parser=Opts::preempt_shift_range)]
128 preempt_shift: u8,
129
130 #[clap(long = "cpu-pref-order", default_value = "")]
135 cpu_pref_order: String,
136
137 #[clap(long = "no-futex-boost", action = clap::ArgAction::SetTrue)]
139 no_futex_boost: bool,
140
141 #[clap(long = "no-preemption", action = clap::ArgAction::SetTrue)]
143 no_preemption: bool,
144
145 #[clap(long = "no-wake-sync", action = clap::ArgAction::SetTrue)]
147 no_wake_sync: bool,
148
149 #[clap(long = "no-core-compaction", action = clap::ArgAction::SetTrue)]
158 no_core_compaction: bool,
159
160 #[clap(long = "no-freq-scaling", action = clap::ArgAction::SetTrue)]
162 no_freq_scaling: bool,
163
164 #[clap(long)]
166 stats: Option<f64>,
167
168 #[clap(long)]
170 monitor: Option<f64>,
171
172 #[clap(long)]
175 monitor_sched_samples: Option<u64>,
176
177 #[clap(short = 'v', long, action = clap::ArgAction::Count)]
180 verbose: u8,
181
182 #[clap(short = 'V', long, action = clap::ArgAction::SetTrue)]
184 version: bool,
185
186 #[clap(long)]
188 help_stats: bool,
189}
190
191impl Opts {
192 fn can_autopilot(&self) -> bool {
193 self.autopower == false
194 && self.performance == false
195 && self.powersave == false
196 && self.balanced == false
197 && self.no_core_compaction == false
198 }
199
200 fn can_autopower(&self) -> bool {
201 self.autopilot == false
202 && self.performance == false
203 && self.powersave == false
204 && self.balanced == false
205 && self.no_core_compaction == false
206 }
207
208 fn can_performance(&self) -> bool {
209 self.autopilot == false
210 && self.autopower == false
211 && self.powersave == false
212 && self.balanced == false
213 }
214
215 fn can_balanced(&self) -> bool {
216 self.autopilot == false
217 && self.autopower == false
218 && self.performance == false
219 && self.powersave == false
220 && self.no_core_compaction == false
221 }
222
223 fn can_powersave(&self) -> bool {
224 self.autopilot == false
225 && self.autopower == false
226 && self.performance == false
227 && self.balanced == false
228 && self.no_core_compaction == false
229 }
230
231 fn proc(&mut self) -> Option<&mut Self> {
232 if !self.autopilot {
233 self.autopilot = self.can_autopilot();
234 }
235 if self.autopilot {
236 if !self.can_autopilot() {
237 info!("Autopilot mode cannot be used with conflicting options.");
238 return None;
239 }
240 info!("Autopilot mode is enabled.");
241 return Some(self);
242 }
243
244 if self.autopower {
245 if !self.can_autopower() {
246 info!("Autopower mode cannot be used with conflicting options.");
247 return None;
248 }
249 info!("Autopower mode is enabled.");
250 return Some(self);
251 }
252
253 if self.performance {
254 if !self.can_performance() {
255 info!("Performance mode cannot be used with conflicting options.");
256 return None;
257 }
258 info!("Performance mode is enabled.");
259 self.no_core_compaction = true;
260 return Some(self);
261 }
262
263 if self.powersave {
264 if !self.can_powersave() {
265 info!("Powersave mode cannot be used with conflicting options.");
266 return None;
267 }
268 info!("Powersave mode is enabled.");
269 self.no_core_compaction = false;
270 return Some(self);
271 }
272
273 if self.balanced {
274 if !self.can_balanced() {
275 info!("Balanced mode cannot be used with conflicting options.");
276 return None;
277 }
278 info!("Balanced mode is enabled.");
279 self.no_core_compaction = false;
280 return Some(self);
281 }
282
283 Some(self)
284 }
285
286 fn preempt_shift_range(s: &str) -> Result<u8, String> {
287 number_range(s, 0, 10)
288 }
289}
290
291unsafe impl Plain for msg_task_ctx {}
292
293impl msg_task_ctx {
294 fn from_bytes(buf: &[u8]) -> &msg_task_ctx {
295 plain::from_bytes(buf).expect("The buffer is either too short or not aligned!")
296 }
297}
298
299impl introspec {
300 fn new() -> Self {
301 let intrspc = unsafe { mem::MaybeUninit::<introspec>::zeroed().assume_init() };
302 intrspc
303 }
304}
305
306#[derive(Debug, Clone)]
307struct CpuFlatId {
308 node_id: usize,
309 pd_id: usize,
310 llc_pos: usize,
311 core_pos: usize,
312 cpu_pos: usize,
313 cpu_id: usize,
314 smt_level: usize,
315 cache_size: usize,
316 cpu_cap: usize,
317 big_core: bool,
318 turbo_core: bool,
319}
320
321#[derive(Debug, Eq, PartialEq, Ord, PartialOrd, Clone)]
322struct ComputeDomainKey {
323 node_id: usize,
324 llc_pos: usize,
325 is_big: bool,
326}
327
328#[derive(Debug, Clone)]
329struct ComputeDomainValue {
330 cpdom_id: usize,
331 cpdom_alt_id: Cell<usize>,
332 cpu_ids: Vec<usize>,
333 neighbor_map: RefCell<BTreeMap<usize, RefCell<Vec<usize>>>>,
334}
335
336#[derive(Debug)]
337struct FlatTopology {
338 all_cpus_mask: Cpumask,
339 cpu_fids_performance: Vec<CpuFlatId>,
340 cpu_fids_powersave: Vec<CpuFlatId>,
341 cpdom_map: BTreeMap<ComputeDomainKey, ComputeDomainValue>,
342 smt_enabled: bool,
343}
344
345impl fmt::Display for FlatTopology {
346 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
347 for cpu_fid in self.cpu_fids_performance.iter() {
348 write!(f, "\nCPU in performance: {:?}", cpu_fid).ok();
349 }
350 for cpu_fid in self.cpu_fids_powersave.iter() {
351 write!(f, "\nCPU in powersave: {:?}", cpu_fid).ok();
352 }
353 for (k, v) in self.cpdom_map.iter() {
354 write!(f, "\nCPDOM: {:?} {:?}", k, v).ok();
355 }
356 write!(f, "SMT: {}", self.smt_enabled).ok();
357 Ok(())
358 }
359}
360
361impl FlatTopology {
362 pub fn new() -> Result<FlatTopology> {
364 let sys_topo = Topology::new().expect("Failed to build host topology");
365 let sys_em = EnergyModel::new();
366 debug!("{:#?}", sys_topo);
367 debug!("{:#?}", sys_em);
368
369 let cpu_fids_performance = Self::build_cpu_fids(&sys_topo, &sys_em, false).unwrap();
370 let cpu_fids_powersave = Self::build_cpu_fids(&sys_topo, &sys_em, true).unwrap();
371
372 let cpdom_map = Self::build_cpdom(&cpu_fids_performance).unwrap();
375
376 Ok(FlatTopology {
377 all_cpus_mask: sys_topo.span,
378 cpu_fids_performance,
379 cpu_fids_powersave,
380 cpdom_map,
381 smt_enabled: sys_topo.smt_enabled,
382 })
383 }
384
385 fn build_cpu_fids(
387 topo: &Topology,
388 em: &Result<EnergyModel>,
389 prefer_powersave: bool,
390 ) -> Option<Vec<CpuFlatId>> {
391 let mut cpu_fids = Vec::new();
392
393 for (&node_id, node) in topo.nodes.iter() {
395 for (llc_pos, (_llc_id, llc)) in node.llcs.iter().enumerate() {
396 for (core_pos, (_core_id, core)) in llc.cores.iter().enumerate() {
397 for (cpu_pos, (cpu_id, cpu)) in core.cpus.iter().enumerate() {
398 let cpu_id = *cpu_id;
399 let pd_id = Self::get_pd_id(em, cpu_id, node_id);
400 let cpu_fid = CpuFlatId {
401 node_id,
402 pd_id,
403 llc_pos,
404 core_pos,
405 cpu_pos,
406 cpu_id,
407 smt_level: cpu.smt_level,
408 cache_size: cpu.cache_size,
409 cpu_cap: cpu.cpu_capacity,
410 big_core: cpu.core_type != CoreType::Little,
411 turbo_core: cpu.core_type == CoreType::Big { turbo: true },
412 };
413 cpu_fids.push(RefCell::new(cpu_fid));
414 }
415 }
416 }
417 }
418
419 let mut cpu_fids2 = Vec::new();
421 for cpu_fid in cpu_fids.iter() {
422 cpu_fids2.push(cpu_fid.borrow().clone());
423 }
424 let mut cpu_fids = cpu_fids2;
425
426 match prefer_powersave {
428 true => {
429 cpu_fids.sort_by(|a, b| {
431 a.node_id
432 .cmp(&b.node_id)
433 .then_with(|| a.llc_pos.cmp(&b.llc_pos))
434 .then_with(|| a.cpu_cap.cmp(&b.cpu_cap))
435 .then_with(|| b.smt_level.cmp(&a.smt_level))
436 .then_with(|| b.cache_size.cmp(&a.cache_size))
437 .then_with(|| a.pd_id.cmp(&b.pd_id))
438 .then_with(|| a.core_pos.cmp(&b.core_pos))
439 .then_with(|| a.cpu_pos.cmp(&b.cpu_pos))
440 });
441 }
442 false => {
443 cpu_fids.sort_by(|a, b| {
446 a.node_id
447 .cmp(&b.node_id) .then_with(|| a.llc_pos.cmp(&b.llc_pos)) .then_with(|| b.cpu_cap.cmp(&a.cpu_cap)) .then_with(|| a.cpu_pos.cmp(&b.cpu_pos)) .then_with(|| a.smt_level.cmp(&b.smt_level))
452 .then_with(|| b.cache_size.cmp(&a.cache_size))
453 .then_with(|| a.pd_id.cmp(&b.pd_id))
454 .then_with(|| a.core_pos.cmp(&b.core_pos))
455 });
456 }
457 }
458
459 Some(cpu_fids)
460 }
461
462 fn get_pd_id(em: &Result<EnergyModel>, cpu_id: usize, node_id: usize) -> usize {
465 match em {
466 Ok(em) => em.get_pd(cpu_id).unwrap().id,
467 Err(_) => node_id,
468 }
469 }
470
471 fn build_cpdom(
473 cpu_fids: &Vec<CpuFlatId>,
474 ) -> Option<BTreeMap<ComputeDomainKey, ComputeDomainValue>> {
475 let mut cpdom_id = 0;
478 let mut cpdom_map: BTreeMap<ComputeDomainKey, ComputeDomainValue> = BTreeMap::new();
479 let mut cpdom_types: BTreeMap<usize, bool> = BTreeMap::new();
480 for cpu_fid in cpu_fids.iter() {
481 let key = ComputeDomainKey {
482 node_id: cpu_fid.node_id,
483 llc_pos: cpu_fid.llc_pos,
484 is_big: cpu_fid.big_core,
485 };
486 let mut value;
487 match cpdom_map.get(&key) {
488 Some(v) => {
489 value = v.clone();
490 }
491 None => {
492 value = ComputeDomainValue {
493 cpdom_id,
494 cpdom_alt_id: Cell::new(cpdom_id),
495 cpu_ids: Vec::new(),
496 neighbor_map: RefCell::new(BTreeMap::new()),
497 };
498 cpdom_types.insert(cpdom_id, key.is_big);
499
500 cpdom_id += 1;
501 }
502 }
503 value.cpu_ids.push(cpu_fid.cpu_id);
504 cpdom_map.insert(key, value);
505 }
506
507 for ((from_k, from_v), (to_k, to_v)) in iproduct!(cpdom_map.iter(), cpdom_map.iter()) {
510 if from_k == to_k {
511 continue;
512 }
513
514 let d = Self::dist(from_k, to_k);
515 let mut map = from_v.neighbor_map.borrow_mut();
516 match map.get(&d) {
517 Some(v) => {
518 v.borrow_mut().push(to_v.cpdom_id);
519 }
520 None => {
521 map.insert(d, RefCell::new(vec![to_v.cpdom_id]));
522 }
523 }
524 }
525
526 for (k, v) in cpdom_map.iter() {
528 let mut key = k.clone();
529 key.is_big = !k.is_big;
530
531 if let Some(alt_v) = cpdom_map.get(&key) {
532 v.cpdom_alt_id.set(alt_v.cpdom_id);
535 } else {
536 'outer: for (_dist, ncpdoms) in v.neighbor_map.borrow().iter() {
543 for ncpdom_id in ncpdoms.borrow().iter() {
544 if let Some(is_big) = cpdom_types.get(ncpdom_id) {
545 if *is_big == key.is_big {
546 v.cpdom_alt_id.set(*ncpdom_id);
547 break 'outer;
548 }
549 }
550 }
551 }
552 }
553 }
554
555 Some(cpdom_map)
556 }
557
558 fn dist(from: &ComputeDomainKey, to: &ComputeDomainKey) -> usize {
560 let mut d = 0;
561 if from.is_big != to.is_big {
563 d += 3;
564 }
565 if from.node_id != to.node_id {
566 d += 2;
567 } else {
568 if from.llc_pos != to.llc_pos {
569 d += 1;
570 }
571 }
572 d
573 }
574}
575
576struct Scheduler<'a> {
577 skel: BpfSkel<'a>,
578 struct_ops: Option<libbpf_rs::Link>,
579 rb_mgr: libbpf_rs::RingBuffer<'static>,
580 intrspc: introspec,
581 intrspc_rx: Receiver<SchedSample>,
582 monitor_tid: Option<ThreadId>,
583 stats_server: StatsServer<StatsReq, StatsRes>,
584 mseq_id: u64,
585}
586
587impl<'a> Scheduler<'a> {
588 fn init(opts: &'a Opts, open_object: &'a mut MaybeUninit<OpenObject>) -> Result<Self> {
589 if *NR_CPU_IDS > LAVD_CPU_ID_MAX as usize {
590 panic!(
591 "Num possible CPU IDs ({}) exceeds maximum of ({})",
592 *NR_CPU_IDS, LAVD_CPU_ID_MAX
593 );
594 }
595
596 set_rlimit_infinity();
599
600 let mut skel_builder = BpfSkelBuilder::default();
602 skel_builder.obj_builder.debug(opts.verbose > 0);
603 let mut skel = scx_ops_open!(skel_builder, open_object, lavd_ops)?;
604
605 if !opts.no_futex_boost {
608 compat::cond_tracepoint_enable(
609 "syscalls:sys_enter_futex",
610 &skel.progs.rtp_sys_enter_futex,
611 )?;
612 compat::cond_tracepoint_enable(
613 "syscalls:sys_exit_futex",
614 &skel.progs.rtp_sys_exit_futex,
615 )?;
616 compat::cond_tracepoint_enable(
617 "syscalls:sys_exit_futex_wait",
618 &skel.progs.rtp_sys_exit_futex_wait,
619 )?;
620 compat::cond_tracepoint_enable(
621 "syscalls:sys_exit_futex_waitv",
622 &skel.progs.rtp_sys_exit_futex_waitv,
623 )?;
624 compat::cond_tracepoint_enable(
625 "syscalls:sys_exit_futex_wake",
626 &skel.progs.rtp_sys_exit_futex_wake,
627 )?;
628 }
629
630 let topo = FlatTopology::new().unwrap();
632 Self::init_cpus(&mut skel, &opts, &topo);
633
634 Self::init_globals(&mut skel, &opts, &topo);
636
637 let mut skel = scx_ops_load!(skel, lavd_ops, uei)?;
639 let struct_ops = Some(scx_ops_attach!(skel, lavd_ops)?);
640 let stats_server = StatsServer::new(stats::server_data(*NR_CPU_IDS as u64)).launch()?;
641
642 let (intrspc_tx, intrspc_rx) = channel::bounded(65536);
644 let rb_map = &mut skel.maps.introspec_msg;
645 let mut builder = libbpf_rs::RingBufferBuilder::new();
646 builder
647 .add(rb_map, move |data| {
648 Scheduler::relay_introspec(data, &intrspc_tx)
649 })
650 .unwrap();
651 let rb_mgr = builder.build().unwrap();
652
653 Ok(Self {
654 skel,
655 struct_ops,
656 rb_mgr,
657 intrspc: introspec::new(),
658 intrspc_rx,
659 monitor_tid: None,
660 stats_server,
661 mseq_id: 0,
662 })
663 }
664
665 fn init_cpus(skel: &mut OpenBpfSkel, opts: &Opts, topo: &FlatTopology) {
666 debug!("{:#?}", topo);
667
668 for (_, cpu) in topo.cpu_fids_performance.iter().enumerate() {
670 skel.maps.rodata_data.cpu_capacity[cpu.cpu_id] = cpu.cpu_cap as u16;
671 skel.maps.rodata_data.cpu_big[cpu.cpu_id] = cpu.big_core as u8;
672 skel.maps.rodata_data.cpu_turbo[cpu.cpu_id] = cpu.turbo_core as u8;
673 }
674
675 let (cpu_pf_order, cpu_ps_order) = if opts.cpu_pref_order.is_empty() {
679 (
680 topo.cpu_fids_performance
681 .iter()
682 .map(|cpu| cpu.cpu_id)
683 .collect(),
684 topo.cpu_fids_powersave
685 .iter()
686 .map(|cpu| cpu.cpu_id)
687 .collect(),
688 )
689 } else {
690 let cpu_list = read_cpulist(&opts.cpu_pref_order).unwrap();
691 let pref_mask = Cpumask::from_cpulist(&opts.cpu_pref_order).unwrap();
692 if pref_mask != topo.all_cpus_mask {
693 panic!("--cpu_pref_order does not cover the whole CPUs.");
694 }
695 (cpu_list.clone(), cpu_list)
696 };
697 for (pos, cpu) in cpu_pf_order.iter().enumerate() {
698 skel.maps.rodata_data.cpu_order_performance[pos] = *cpu as u16;
699 }
700 for (pos, cpu) in cpu_ps_order.iter().enumerate() {
701 skel.maps.rodata_data.cpu_order_powersave[pos] = *cpu as u16;
702 }
703 if !opts.powersave {
704 info!("CPU pref order in performance mode: {:?}", cpu_pf_order);
705 }
706 if !opts.performance {
707 info!("CPU pref order in powersave mode: {:?}", cpu_ps_order);
708 }
709
710 for (k, v) in topo.cpdom_map.iter() {
712 skel.maps.bss_data.cpdom_ctxs[v.cpdom_id].id = v.cpdom_id as u64;
713 skel.maps.bss_data.cpdom_ctxs[v.cpdom_id].alt_id = v.cpdom_alt_id.get() as u64;
714 skel.maps.bss_data.cpdom_ctxs[v.cpdom_id].node_id = k.node_id as u8;
715 skel.maps.bss_data.cpdom_ctxs[v.cpdom_id].is_big = k.is_big as u8;
716 skel.maps.bss_data.cpdom_ctxs[v.cpdom_id].is_valid = 1;
717 for cpu_id in v.cpu_ids.iter() {
718 let i = cpu_id / 64;
719 let j = cpu_id % 64;
720 skel.maps.bss_data.cpdom_ctxs[v.cpdom_id].__cpumask[i] |= 0x01 << j;
721 }
722
723 if v.neighbor_map.borrow().iter().len() > LAVD_CPDOM_MAX_DIST as usize {
724 panic!("The processor topology is too complex to handle in BPF.");
725 }
726
727 for (k, (_d, neighbors)) in v.neighbor_map.borrow().iter().enumerate() {
728 let nr_neighbors = neighbors.borrow().len() as u8;
729 if nr_neighbors > LAVD_CPDOM_MAX_NR as u8 {
730 panic!("The processor topology is too complex to handle in BPF.");
731 }
732 skel.maps.bss_data.cpdom_ctxs[v.cpdom_id].nr_neighbors[k] = nr_neighbors;
733 for n in neighbors.borrow().iter() {
734 skel.maps.bss_data.cpdom_ctxs[v.cpdom_id].neighbor_bits[k] |= 0x1 << n;
735 }
736 }
737 }
738 }
739
740 fn init_globals(skel: &mut OpenBpfSkel, opts: &Opts, topo: &FlatTopology) {
741 skel.maps.bss_data.no_preemption = opts.no_preemption;
742 skel.maps.bss_data.no_wake_sync = opts.no_wake_sync;
743 skel.maps.bss_data.no_core_compaction = opts.no_core_compaction;
744 skel.maps.bss_data.no_freq_scaling = opts.no_freq_scaling;
745 skel.maps.bss_data.is_powersave_mode = opts.powersave;
746 skel.maps.rodata_data.nr_cpu_ids = *NR_CPU_IDS as u64;
747 skel.maps.rodata_data.is_smt_active = topo.smt_enabled;
748 skel.maps.rodata_data.is_autopilot_on = opts.autopilot;
749 skel.maps.rodata_data.verbose = opts.verbose;
750 skel.maps.rodata_data.slice_max_ns = opts.slice_max_us * 1000;
751 skel.maps.rodata_data.slice_min_ns = opts.slice_min_us * 1000;
752 skel.maps.rodata_data.preempt_shift = opts.preempt_shift;
753
754 skel.struct_ops.lavd_ops_mut().flags = *compat::SCX_OPS_ALLOW_QUEUED_WAKEUP
755 | *compat::SCX_OPS_ENQ_EXITING
756 | *compat::SCX_OPS_ENQ_LAST
757 | *compat::SCX_OPS_ENQ_MIGRATION_DISABLED
758 | *compat::SCX_OPS_KEEP_BUILTIN_IDLE;
759 }
760
761 fn get_msg_seq_id() -> u64 {
762 static mut MSEQ: u64 = 0;
763 unsafe {
764 MSEQ += 1;
765 MSEQ
766 }
767 }
768
769 fn relay_introspec(data: &[u8], intrspc_tx: &Sender<SchedSample>) -> i32 {
770 let mt = msg_task_ctx::from_bytes(data);
771 let tx = mt.taskc_x;
772 let tc = mt.taskc;
773
774 if mt.hdr.kind != LAVD_MSG_TASKC {
776 return 0;
777 }
778
779 let mseq = Scheduler::get_msg_seq_id();
780
781 let c_tx_cm: *const c_char = (&tx.comm as *const [c_char; 17]) as *const c_char;
782 let c_tx_cm_str: &CStr = unsafe { CStr::from_ptr(c_tx_cm) };
783 let tx_comm: &str = c_tx_cm_str.to_str().unwrap();
784
785 let c_tx_st: *const c_char = (&tx.stat as *const [c_char; 5]) as *const c_char;
786 let c_tx_st_str: &CStr = unsafe { CStr::from_ptr(c_tx_st) };
787 let tx_stat: &str = c_tx_st_str.to_str().unwrap();
788
789 match intrspc_tx.try_send(SchedSample {
790 mseq,
791 pid: tx.pid,
792 comm: tx_comm.into(),
793 stat: tx_stat.into(),
794 cpu_id: tx.cpu_id,
795 slice_ns: tc.slice_ns,
796 lat_cri: tc.lat_cri,
797 avg_lat_cri: tx.avg_lat_cri,
798 static_prio: tx.static_prio,
799 run_freq: tc.run_freq,
800 avg_runtime: tc.avg_runtime,
801 wait_freq: tc.wait_freq,
802 wake_freq: tc.wake_freq,
803 perf_cri: tc.perf_cri,
804 thr_perf_cri: tx.thr_perf_cri,
805 cpuperf_cur: tx.cpuperf_cur,
806 cpu_util: tx.cpu_util,
807 cpu_sutil: tx.cpu_sutil,
808 nr_active: tx.nr_active,
809 }) {
810 Ok(()) | Err(TrySendError::Full(_)) => 0,
811 Err(e) => panic!("failed to send on intrspc_tx ({})", e),
812 }
813 }
814
815 fn prep_introspec(&mut self) {
816 self.skel.maps.bss_data.intrspc.cmd = self.intrspc.cmd;
817 self.skel.maps.bss_data.intrspc.arg = self.intrspc.arg;
818 }
819
820 fn cleanup_introspec(&mut self) {
821 self.skel.maps.bss_data.intrspc.cmd = LAVD_CMD_NOP;
822 }
823
824 fn get_pc(x: u64, y: u64) -> f64 {
825 return 100. * x as f64 / y as f64;
826 }
827
828 fn get_power_mode(power_mode: i32) -> &'static str {
829 match power_mode as u32 {
830 LAVD_PM_PERFORMANCE => "performance",
831 LAVD_PM_BALANCED => "balanced",
832 LAVD_PM_POWERSAVE => "powersave",
833 _ => "unknown",
834 }
835 }
836
837 fn stats_req_to_res(&mut self, req: &StatsReq) -> Result<StatsRes> {
838 Ok(match req {
839 StatsReq::NewSampler(tid) => {
840 self.rb_mgr.consume().unwrap();
841 self.monitor_tid = Some(*tid);
842 StatsRes::Ack
843 }
844 StatsReq::SysStatsReq { tid } => {
845 if Some(*tid) != self.monitor_tid {
846 return Ok(StatsRes::Bye);
847 }
848 self.mseq_id += 1;
849
850 let bss_data = &self.skel.maps.bss_data;
851 let st = bss_data.sys_stat;
852
853 let mseq = self.mseq_id;
854 let nr_queued_task = st.nr_queued_task;
855 let nr_active = st.nr_active;
856 let nr_sched = st.nr_sched;
857 let nr_preempt = st.nr_preempt;
858 let pc_pc = Self::get_pc(st.nr_perf_cri, nr_sched);
859 let pc_lc = Self::get_pc(st.nr_lat_cri, nr_sched);
860 let pc_x_migration = Self::get_pc(st.nr_x_migration, nr_sched);
861 let nr_stealee = st.nr_stealee;
862 let nr_big = st.nr_big;
863 let pc_big = Self::get_pc(nr_big, nr_sched);
864 let pc_pc_on_big = Self::get_pc(st.nr_pc_on_big, nr_big);
865 let pc_lc_on_big = Self::get_pc(st.nr_lc_on_big, nr_big);
866 let power_mode = Self::get_power_mode(bss_data.power_mode);
867 let total_time = bss_data.performance_mode_ns
868 + bss_data.balanced_mode_ns
869 + bss_data.powersave_mode_ns;
870 let pc_performance = Self::get_pc(bss_data.performance_mode_ns, total_time);
871 let pc_balanced = Self::get_pc(bss_data.balanced_mode_ns, total_time);
872 let pc_powersave = Self::get_pc(bss_data.powersave_mode_ns, total_time);
873
874 StatsRes::SysStats(SysStats {
875 mseq,
876 nr_queued_task,
877 nr_active,
878 nr_sched,
879 nr_preempt,
880 pc_pc,
881 pc_lc,
882 pc_x_migration,
883 nr_stealee,
884 pc_big,
885 pc_pc_on_big,
886 pc_lc_on_big,
887 power_mode: power_mode.to_string(),
888 pc_performance,
889 pc_balanced,
890 pc_powersave,
891 })
892 }
893 StatsReq::SchedSamplesNr {
894 tid,
895 nr_samples,
896 interval_ms,
897 } => {
898 if Some(*tid) != self.monitor_tid {
899 return Ok(StatsRes::Bye);
900 }
901
902 self.intrspc.cmd = LAVD_CMD_SCHED_N;
903 self.intrspc.arg = *nr_samples;
904 self.prep_introspec();
905 std::thread::sleep(Duration::from_millis(*interval_ms));
906 self.rb_mgr.poll(Duration::from_millis(100)).unwrap();
907
908 let mut samples = vec![];
909 while let Ok(ts) = self.intrspc_rx.try_recv() {
910 samples.push(ts);
911 }
912
913 self.cleanup_introspec();
914
915 StatsRes::SchedSamples(SchedSamples { samples })
916 }
917 })
918 }
919
920 pub fn exited(&mut self) -> bool {
921 uei_exited!(&self.skel, uei)
922 }
923
924 fn set_power_profile(&mut self, mode: u32) -> Result<(), u32> {
925 let prog = &mut self.skel.progs.set_power_profile;
926 let mut args = power_arg {
927 power_mode: mode as c_int,
928 };
929 let input = ProgramInput {
930 context_in: Some(unsafe {
931 std::slice::from_raw_parts_mut(
932 &mut args as *mut _ as *mut u8,
933 std::mem::size_of_val(&args),
934 )
935 }),
936 ..Default::default()
937 };
938 let out = prog.test_run(input).unwrap();
939 if out.return_value != 0 {
940 return Err(out.return_value);
941 }
942
943 Ok(())
944 }
945
946 fn update_power_profile(&mut self, prev_profile: PowerProfile) -> (bool, PowerProfile) {
947 let profile = fetch_power_profile(false);
948 if profile == prev_profile {
949 return (true, profile);
951 }
952
953 let _ = match profile {
954 PowerProfile::Performance => self.set_power_profile(LAVD_PM_PERFORMANCE),
955 PowerProfile::Balanced { .. } => self.set_power_profile(LAVD_PM_BALANCED),
956 PowerProfile::Powersave => self.set_power_profile(LAVD_PM_POWERSAVE),
957 PowerProfile::Unknown => {
958 return (false, profile);
961 }
962 };
963
964 info!("Set the scheduler's power profile to {profile} mode.");
965 (true, profile)
966 }
967
968 fn run(&mut self, opts: &Opts, shutdown: Arc<AtomicBool>) -> Result<UserExitInfo> {
969 let (res_ch, req_ch) = self.stats_server.channels();
970 let mut autopower = opts.autopower;
971 let mut profile = PowerProfile::Unknown;
972
973 if opts.performance {
974 let _ = self.set_power_profile(LAVD_PM_PERFORMANCE);
975 } else if opts.powersave {
976 let _ = self.set_power_profile(LAVD_PM_POWERSAVE);
977 } else {
978 let _ = self.set_power_profile(LAVD_PM_BALANCED);
979 }
980
981 while !shutdown.load(Ordering::Relaxed) && !self.exited() {
982 if autopower {
983 (autopower, profile) = self.update_power_profile(profile);
984 }
985
986 match req_ch.recv_timeout(Duration::from_secs(1)) {
987 Ok(req) => {
988 let res = self.stats_req_to_res(&req)?;
989 res_ch.send(res)?;
990 }
991 Err(RecvTimeoutError::Timeout) => {}
992 Err(e) => Err(e)?,
993 }
994 self.cleanup_introspec();
995 }
996 self.rb_mgr.consume().unwrap();
997
998 let _ = self.struct_ops.take();
999 uei_report!(&self.skel, uei)
1000 }
1001}
1002
1003impl Drop for Scheduler<'_> {
1004 fn drop(&mut self) {
1005 if let Some(struct_ops) = self.struct_ops.take() {
1006 drop(struct_ops);
1007 }
1008 }
1009}
1010
1011fn init_log(opts: &Opts) {
1012 let llv = match opts.verbose {
1013 0 => simplelog::LevelFilter::Info,
1014 1 => simplelog::LevelFilter::Debug,
1015 _ => simplelog::LevelFilter::Trace,
1016 };
1017 let mut lcfg = simplelog::ConfigBuilder::new();
1018 lcfg.set_time_offset_to_local()
1019 .expect("Failed to set local time offset")
1020 .set_time_level(simplelog::LevelFilter::Error)
1021 .set_location_level(simplelog::LevelFilter::Off)
1022 .set_target_level(simplelog::LevelFilter::Off)
1023 .set_thread_level(simplelog::LevelFilter::Off);
1024 simplelog::TermLogger::init(
1025 llv,
1026 lcfg.build(),
1027 simplelog::TerminalMode::Stderr,
1028 simplelog::ColorChoice::Auto,
1029 )
1030 .unwrap();
1031}
1032
1033fn main() -> Result<()> {
1034 let mut opts = Opts::parse();
1035
1036 if opts.version {
1037 println!(
1038 "scx_lavd {}",
1039 build_id::full_version(env!("CARGO_PKG_VERSION"))
1040 );
1041 return Ok(());
1042 }
1043
1044 if opts.help_stats {
1045 let sys_stats_meta_name = SysStats::meta().name;
1046 let sched_sample_meta_name = SchedSample::meta().name;
1047 let stats_meta_names: &[&str] = &[
1048 sys_stats_meta_name.as_str(),
1049 sched_sample_meta_name.as_str(),
1050 ];
1051 stats::server_data(0).describe_meta(&mut std::io::stdout(), Some(&stats_meta_names))?;
1052 return Ok(());
1053 }
1054
1055 init_log(&opts);
1056
1057 opts.proc().unwrap();
1058 info!("{:#?}", opts);
1059
1060 let shutdown = Arc::new(AtomicBool::new(false));
1061 let shutdown_clone = shutdown.clone();
1062 ctrlc::set_handler(move || {
1063 shutdown_clone.store(true, Ordering::Relaxed);
1064 })
1065 .context("Error setting Ctrl-C handler")?;
1066
1067 if let Some(nr_samples) = opts.monitor_sched_samples {
1068 let shutdown_copy = shutdown.clone();
1069 let jh = std::thread::spawn(move || {
1070 stats::monitor_sched_samples(nr_samples, shutdown_copy).unwrap()
1071 });
1072 let _ = jh.join();
1073 return Ok(());
1074 }
1075
1076 if let Some(intv) = opts.monitor.or(opts.stats) {
1077 let shutdown_copy = shutdown.clone();
1078 let jh = std::thread::spawn(move || {
1079 stats::monitor(Duration::from_secs_f64(intv), shutdown_copy).unwrap()
1080 });
1081 if opts.monitor.is_some() {
1082 let _ = jh.join();
1083 return Ok(());
1084 }
1085 }
1086
1087 let mut open_object = MaybeUninit::uninit();
1088 loop {
1089 let mut sched = Scheduler::init(&opts, &mut open_object)?;
1090 info!(
1091 "scx_lavd scheduler is initialized (build ID: {})",
1092 build_id::full_version(env!("CARGO_PKG_VERSION"))
1093 );
1094 info!("scx_lavd scheduler starts running.");
1095 if !sched.run(&opts, shutdown.clone())?.should_restart() {
1096 break;
1097 }
1098 }
1099
1100 Ok(())
1101}