1mod bpf_skel;
10pub use bpf_skel::*;
11pub mod bpf_intf;
12pub use bpf_intf::*;
13
14mod stats;
15use std::cell::Cell;
16use std::cell::RefCell;
17use std::collections::BTreeMap;
18use std::ffi::CStr;
19use std::ffi::c_int;
20use std::fmt;
21use std::mem;
22use std::mem::MaybeUninit;
23use std::str;
24use std::sync::Arc;
25use std::sync::atomic::AtomicBool;
26use std::sync::atomic::Ordering;
27use std::thread::ThreadId;
28use std::time::Duration;
29
30use anyhow::Context;
31use anyhow::Result;
32use clap::Parser;
33use crossbeam::channel;
34use crossbeam::channel::Receiver;
35use crossbeam::channel::RecvTimeoutError;
36use crossbeam::channel::Sender;
37use crossbeam::channel::TrySendError;
38use itertools::iproduct;
39use libbpf_rs::OpenObject;
40use libbpf_rs::ProgramInput;
41use libc::c_char;
42use log::debug;
43use log::info;
44use plain::Plain;
45use scx_stats::prelude::*;
46use scx_utils::Cpumask;
47use scx_utils::EnergyModel;
48use scx_utils::NR_CPU_IDS;
49use scx_utils::Topology;
50use scx_utils::UserExitInfo;
51use scx_utils::autopower::{PowerProfile, fetch_power_profile};
52use scx_utils::build_id;
53use scx_utils::compat;
54use scx_utils::read_cpulist;
55use scx_utils::scx_ops_attach;
56use scx_utils::scx_ops_load;
57use scx_utils::scx_ops_open;
58use scx_utils::set_rlimit_infinity;
59use scx_utils::uei_exited;
60use scx_utils::uei_report;
61use stats::SchedSample;
62use stats::SchedSamples;
63use stats::StatsReq;
64use stats::StatsRes;
65use stats::SysStats;
66
67#[derive(Debug, Parser)]
73struct Opts {
74 #[clap(long = "autopilot", action = clap::ArgAction::SetTrue)]
77 autopilot: bool,
78
79 #[clap(long = "autopower", action = clap::ArgAction::SetTrue)]
81 autopower: bool,
82
83 #[clap(long = "performance", action = clap::ArgAction::SetTrue)]
85 performance: bool,
86
87 #[clap(long = "powersave", action = clap::ArgAction::SetTrue)]
89 powersave: bool,
90
91 #[clap(long = "balanced", action = clap::ArgAction::SetTrue)]
93 balanced: bool,
94
95 #[clap(long = "slice-max-us", default_value = "5000")]
97 slice_max_us: u64,
98
99 #[clap(long = "slice-min-us", default_value = "300")]
101 slice_min_us: u64,
102
103 #[clap(long = "cpu-pref-order", default_value = "")]
105 cpu_pref_order: String,
106
107 #[clap(long = "no-futex-boost", action = clap::ArgAction::SetTrue)]
109 no_futex_boost: bool,
110
111 #[clap(long = "no-core-compaction", action = clap::ArgAction::SetTrue)]
116 no_core_compaction: bool,
117
118 #[clap(long = "prefer-smt-core", action = clap::ArgAction::SetTrue)]
121 prefer_smt_core: bool,
122
123 #[clap(long = "prefer-little-core", action = clap::ArgAction::SetTrue)]
126 prefer_little_core: bool,
127
128 #[clap(long = "no-prefer-turbo-core", action = clap::ArgAction::SetTrue)]
131 no_prefer_turbo_core: bool,
132
133 #[clap(long = "no-freq-scaling", action = clap::ArgAction::SetTrue)]
138 no_freq_scaling: bool,
139
140 #[clap(long)]
142 stats: Option<f64>,
143
144 #[clap(long)]
146 monitor: Option<f64>,
147
148 #[clap(long)]
151 monitor_sched_samples: Option<u64>,
152
153 #[clap(short = 'v', long, action = clap::ArgAction::Count)]
156 verbose: u8,
157
158 #[clap(short = 'V', long, action = clap::ArgAction::SetTrue)]
160 version: bool,
161
162 #[clap(long)]
164 help_stats: bool,
165}
166
167impl Opts {
168 fn autopilot_allowed(&self) -> bool {
169 self.autopilot == false
170 && self.autopower == false
171 && self.performance == false
172 && self.powersave == false
173 && self.balanced == false
174 && self.cpu_pref_order == ""
175 && self.no_core_compaction == false
176 && self.prefer_smt_core == false
177 && self.prefer_little_core == false
178 && self.no_prefer_turbo_core == false
179 && self.no_freq_scaling == false
180 && self.monitor == None
181 && self.monitor_sched_samples == None
182 }
183
184 fn proc(&mut self) -> Option<&mut Self> {
185 if self.autopilot_allowed() {
186 self.autopilot = true;
187 info!("Autopilot mode is enabled by default.");
188 return Some(self);
189 }
190
191 if self.performance {
192 self.no_core_compaction = true;
193 self.prefer_smt_core = false;
194 self.prefer_little_core = false;
195 self.no_prefer_turbo_core = false;
196 self.no_freq_scaling = true;
197 } else if self.powersave {
198 self.no_core_compaction = false;
199 self.prefer_smt_core = true;
200 self.prefer_little_core = true;
201 self.no_prefer_turbo_core = true;
202 self.no_freq_scaling = false;
203 } else if self.balanced {
204 self.no_core_compaction = false;
205 self.prefer_smt_core = false;
206 self.prefer_little_core = false;
207 self.no_prefer_turbo_core = false;
208 self.no_freq_scaling = false;
209 }
210
211 Some(self)
212 }
213}
214
215unsafe impl Plain for msg_task_ctx {}
216
217impl msg_task_ctx {
218 fn from_bytes(buf: &[u8]) -> &msg_task_ctx {
219 plain::from_bytes(buf).expect("The buffer is either too short or not aligned!")
220 }
221}
222
223impl introspec {
224 fn new() -> Self {
225 let intrspc = unsafe { mem::MaybeUninit::<introspec>::zeroed().assume_init() };
226 intrspc
227 }
228}
229
230#[derive(Debug, Clone)]
231struct CpuFlatId {
232 node_id: usize,
233 pd_id: usize,
234 llc_pos: usize,
235 core_pos: usize,
236 cpu_pos: usize,
237 cpu_id: usize,
238 smt_level: usize,
239 cache_size: usize,
240 cpu_cap: usize,
241}
242
243#[derive(Debug, Eq, PartialEq, Ord, PartialOrd, Clone)]
244struct ComputeDomainKey {
245 node_id: usize,
246 llc_pos: usize,
247 is_big: bool,
248}
249
250#[derive(Debug, Clone)]
251struct ComputeDomainValue {
252 cpdom_id: usize,
253 cpdom_alt_id: Cell<usize>,
254 cpu_ids: Vec<usize>,
255 neighbor_map: RefCell<BTreeMap<usize, RefCell<Vec<usize>>>>,
256}
257
258#[derive(Debug)]
259struct FlatTopology {
260 all_cpus_mask: Cpumask,
261 cpu_fids_performance: Vec<CpuFlatId>,
262 cpu_fids_powersave: Vec<CpuFlatId>,
263 cpdom_map: BTreeMap<ComputeDomainKey, ComputeDomainValue>,
264 smt_enabled: bool,
265}
266
267impl fmt::Display for FlatTopology {
268 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
269 for cpu_fid in self.cpu_fids_performance.iter() {
270 write!(f, "\nCPU in performance: {:?}", cpu_fid).ok();
271 }
272 for cpu_fid in self.cpu_fids_powersave.iter() {
273 write!(f, "\nCPU in powersave: {:?}", cpu_fid).ok();
274 }
275 for (k, v) in self.cpdom_map.iter() {
276 write!(f, "\nCPDOM: {:?} {:?}", k, v).ok();
277 }
278 write!(f, "SMT: {}", self.smt_enabled).ok();
279 Ok(())
280 }
281}
282
283impl FlatTopology {
284 pub fn new() -> Result<FlatTopology> {
286 let sys_topo = Topology::new().expect("Failed to build host topology");
287 let sys_em = EnergyModel::new();
288 debug!("{:#?}", sys_topo);
289 debug!("{:#?}", sys_em);
290
291 let (cpu_fids_performance, avg_cap) =
292 Self::build_cpu_fids(&sys_topo, &sys_em, false).unwrap();
293 let (cpu_fids_powersave, _) = Self::build_cpu_fids(&sys_topo, &sys_em, true).unwrap();
294
295 let cpdom_map = Self::build_cpdom(&cpu_fids_performance, avg_cap).unwrap();
298
299 Ok(FlatTopology {
300 all_cpus_mask: sys_topo.span,
301 cpu_fids_performance,
302 cpu_fids_powersave,
303 cpdom_map,
304 smt_enabled: sys_topo.smt_enabled,
305 })
306 }
307
308 fn build_cpu_fids(
310 topo: &Topology,
311 em: &Result<EnergyModel>,
312 prefer_powersave: bool,
313 ) -> Option<(Vec<CpuFlatId>, usize)> {
314 let mut cpu_fids = Vec::new();
315
316 let mut avg_cap = 0;
318 for (&node_id, node) in topo.nodes.iter() {
319 for (llc_pos, (_llc_id, llc)) in node.llcs.iter().enumerate() {
320 for (core_pos, (_core_id, core)) in llc.cores.iter().enumerate() {
321 for (cpu_pos, (cpu_id, cpu)) in core.cpus.iter().enumerate() {
322 let cpu_id = *cpu_id;
323 let pd_id = Self::get_pd_id(em, cpu_id, node_id);
324 let cpu_fid = CpuFlatId {
325 node_id,
326 pd_id,
327 llc_pos,
328 core_pos,
329 cpu_pos,
330 cpu_id,
331 smt_level: cpu.smt_level,
332 cache_size: cpu.cache_size,
333 cpu_cap: cpu.cpu_capacity,
334 };
335 cpu_fids.push(RefCell::new(cpu_fid));
336 avg_cap += cpu.cpu_capacity;
337 }
338 }
339 }
340 }
341 avg_cap /= cpu_fids.len() as usize;
342
343 let mut cpu_fids2 = Vec::new();
345 for cpu_fid in cpu_fids.iter() {
346 cpu_fids2.push(cpu_fid.borrow().clone());
347 }
348 let mut cpu_fids = cpu_fids2;
349
350 match prefer_powersave {
352 true => {
353 cpu_fids.sort_by(|a, b| {
355 a.node_id
356 .cmp(&b.node_id)
357 .then_with(|| a.llc_pos.cmp(&b.llc_pos))
358 .then_with(|| a.cpu_cap.cmp(&b.cpu_cap))
359 .then_with(|| b.smt_level.cmp(&a.smt_level))
360 .then_with(|| b.cache_size.cmp(&a.cache_size))
361 .then_with(|| a.pd_id.cmp(&b.pd_id))
362 .then_with(|| a.core_pos.cmp(&b.core_pos))
363 .then_with(|| a.cpu_pos.cmp(&b.cpu_pos))
364 });
365 }
366 false => {
367 cpu_fids.sort_by(|a, b| {
369 a.cpu_pos
370 .cmp(&b.cpu_pos)
371 .then_with(|| a.node_id.cmp(&b.node_id))
372 .then_with(|| a.llc_pos.cmp(&b.llc_pos))
373 .then_with(|| b.cpu_cap.cmp(&a.cpu_cap))
374 .then_with(|| a.smt_level.cmp(&b.smt_level))
375 .then_with(|| b.cache_size.cmp(&a.cache_size))
376 .then_with(|| a.pd_id.cmp(&b.pd_id))
377 .then_with(|| a.core_pos.cmp(&b.core_pos))
378 });
379 }
380 }
381
382 Some((cpu_fids, avg_cap))
383 }
384
385 fn get_pd_id(em: &Result<EnergyModel>, cpu_id: usize, node_id: usize) -> usize {
388 match em {
389 Ok(em) => em.get_pd(cpu_id).unwrap().id,
390 Err(_) => node_id,
391 }
392 }
393
394 fn build_cpdom(
396 cpu_fids: &Vec<CpuFlatId>,
397 avg_cap: usize,
398 ) -> Option<BTreeMap<ComputeDomainKey, ComputeDomainValue>> {
399 let mut cpdom_id = 0;
402 let mut cpdom_map: BTreeMap<ComputeDomainKey, ComputeDomainValue> = BTreeMap::new();
403 for cpu_fid in cpu_fids.iter() {
404 let key = ComputeDomainKey {
405 node_id: cpu_fid.node_id,
406 llc_pos: cpu_fid.llc_pos,
407 is_big: cpu_fid.cpu_cap >= avg_cap,
408 };
409 let mut value;
410 match cpdom_map.get(&key) {
411 Some(v) => {
412 value = v.clone();
413 }
414 None => {
415 value = ComputeDomainValue {
416 cpdom_id,
417 cpdom_alt_id: Cell::new(cpdom_id),
418 cpu_ids: Vec::new(),
419 neighbor_map: RefCell::new(BTreeMap::new()),
420 };
421 cpdom_id += 1;
422 }
423 }
424 value.cpu_ids.push(cpu_fid.cpu_id);
425 cpdom_map.insert(key, value);
426 }
427
428 for (k, v) in cpdom_map.iter() {
432 let mut key = k.clone();
433 key.is_big = !k.is_big;
434
435 if let Some(alt_v) = cpdom_map.get(&key) {
436 v.cpdom_alt_id.set(alt_v.cpdom_id);
437 }
438 }
439
440 for ((from_k, from_v), (to_k, to_v)) in iproduct!(cpdom_map.iter(), cpdom_map.iter()) {
443 if from_k == to_k {
444 continue;
445 }
446
447 let d = Self::dist(from_k, to_k);
448 let mut map = from_v.neighbor_map.borrow_mut();
449 match map.get(&d) {
450 Some(v) => {
451 v.borrow_mut().push(to_v.cpdom_id);
452 }
453 None => {
454 map.insert(d, RefCell::new(vec![to_v.cpdom_id]));
455 }
456 }
457 }
458
459 Some(cpdom_map)
460 }
461
462 fn dist(from: &ComputeDomainKey, to: &ComputeDomainKey) -> usize {
464 let mut d = 0;
465 if from.is_big != to.is_big {
467 d += 3;
468 }
469 if from.node_id != to.node_id {
470 d += 2;
471 } else {
472 if from.llc_pos != to.llc_pos {
473 d += 1;
474 }
475 }
476 d
477 }
478}
479
480struct Scheduler<'a> {
481 skel: BpfSkel<'a>,
482 struct_ops: Option<libbpf_rs::Link>,
483 rb_mgr: libbpf_rs::RingBuffer<'static>,
484 intrspc: introspec,
485 intrspc_rx: Receiver<SchedSample>,
486 monitor_tid: Option<ThreadId>,
487 stats_server: StatsServer<StatsReq, StatsRes>,
488 mseq_id: u64,
489}
490
491impl<'a> Scheduler<'a> {
492 fn init(opts: &'a Opts, open_object: &'a mut MaybeUninit<OpenObject>) -> Result<Self> {
493 if *NR_CPU_IDS > LAVD_CPU_ID_MAX as usize {
494 panic!(
495 "Num possible CPU IDs ({}) exceeds maximum of ({})",
496 *NR_CPU_IDS, LAVD_CPU_ID_MAX
497 );
498 }
499
500 set_rlimit_infinity();
503
504 let mut skel_builder = BpfSkelBuilder::default();
506 skel_builder.obj_builder.debug(opts.verbose > 0);
507 let mut skel = scx_ops_open!(skel_builder, open_object, lavd_ops)?;
508
509 if !opts.no_futex_boost {
512 compat::cond_tracepoint_enable(
513 "syscalls:sys_enter_futex",
514 &skel.progs.rtp_sys_enter_futex,
515 )?;
516 compat::cond_tracepoint_enable(
517 "syscalls:sys_exit_futex",
518 &skel.progs.rtp_sys_exit_futex,
519 )?;
520 compat::cond_tracepoint_enable(
521 "syscalls:sys_exit_futex_wait",
522 &skel.progs.rtp_sys_exit_futex_wait,
523 )?;
524 compat::cond_tracepoint_enable(
525 "syscalls:sys_exit_futex_waitv",
526 &skel.progs.rtp_sys_exit_futex_waitv,
527 )?;
528 compat::cond_tracepoint_enable(
529 "syscalls:sys_exit_futex_wake",
530 &skel.progs.rtp_sys_exit_futex_wake,
531 )?;
532 }
533
534 let topo = FlatTopology::new().unwrap();
536 Self::init_cpus(&mut skel, &opts, &topo);
537
538 Self::init_globals(&mut skel, &opts, &topo);
540
541 let mut skel = scx_ops_load!(skel, lavd_ops, uei)?;
543 let struct_ops = Some(scx_ops_attach!(skel, lavd_ops)?);
544 let stats_server = StatsServer::new(stats::server_data(*NR_CPU_IDS as u64)).launch()?;
545
546 let (intrspc_tx, intrspc_rx) = channel::bounded(65536);
548 let rb_map = &mut skel.maps.introspec_msg;
549 let mut builder = libbpf_rs::RingBufferBuilder::new();
550 builder
551 .add(rb_map, move |data| {
552 Scheduler::relay_introspec(data, &intrspc_tx)
553 })
554 .unwrap();
555 let rb_mgr = builder.build().unwrap();
556
557 Ok(Self {
558 skel,
559 struct_ops,
560 rb_mgr,
561 intrspc: introspec::new(),
562 intrspc_rx,
563 monitor_tid: None,
564 stats_server,
565 mseq_id: 0,
566 })
567 }
568
569 fn init_cpus(skel: &mut OpenBpfSkel, opts: &Opts, topo: &FlatTopology) {
570 debug!("{:#?}", topo);
571
572 for (_, cpu) in topo.cpu_fids_performance.iter().enumerate() {
574 skel.maps.rodata_data.cpu_capacity[cpu.cpu_id] = cpu.cpu_cap as u16;
575 }
576
577 let mut cpu_pf_order = vec![];
581 let mut cpu_ps_order = vec![];
582 if opts.cpu_pref_order == "" {
583 for cpu in topo.cpu_fids_performance.iter() {
584 cpu_pf_order.push(cpu.cpu_id);
585 }
586 for cpu in topo.cpu_fids_powersave.iter() {
587 cpu_ps_order.push(cpu.cpu_id);
588 }
589 } else {
590 let cpu_list = read_cpulist(&opts.cpu_pref_order).unwrap();
591 let pref_mask = Cpumask::from_cpulist(&opts.cpu_pref_order).unwrap();
592 if pref_mask != topo.all_cpus_mask {
593 panic!("--cpu_pref_order does not cover the whole CPUs.");
594 }
595 cpu_pf_order = cpu_list.clone();
596 cpu_ps_order = cpu_list.clone();
597 }
598 for (pos, cpu) in cpu_pf_order.iter().enumerate() {
599 skel.maps.rodata_data.cpu_order_performance[pos] = *cpu as u16;
600 }
601 for (pos, cpu) in cpu_ps_order.iter().enumerate() {
602 skel.maps.rodata_data.cpu_order_powersave[pos] = *cpu as u16;
603 }
604 info!("CPU pref order in performance mode: {:?}", cpu_pf_order);
605 info!("CPU pref order in powersave mode: {:?}", cpu_ps_order);
606
607 for (k, v) in topo.cpdom_map.iter() {
609 skel.maps.bss_data.cpdom_ctxs[v.cpdom_id].id = v.cpdom_id as u64;
610 skel.maps.bss_data.cpdom_ctxs[v.cpdom_id].alt_id = v.cpdom_alt_id.get() as u64;
611 skel.maps.bss_data.cpdom_ctxs[v.cpdom_id].node_id = k.node_id as u8;
612 skel.maps.bss_data.cpdom_ctxs[v.cpdom_id].is_big = k.is_big as u8;
613 skel.maps.bss_data.cpdom_ctxs[v.cpdom_id].is_valid = 1;
614 for cpu_id in v.cpu_ids.iter() {
615 let i = cpu_id / 64;
616 let j = cpu_id % 64;
617 skel.maps.bss_data.cpdom_ctxs[v.cpdom_id].__cpumask[i] |= 0x01 << j;
618 }
619
620 if v.neighbor_map.borrow().iter().len() > LAVD_CPDOM_MAX_DIST as usize {
621 panic!("The processor topology is too complex to handle in BPF.");
622 }
623
624 for (k, (_d, neighbors)) in v.neighbor_map.borrow().iter().enumerate() {
625 let nr_neighbors = neighbors.borrow().len() as u8;
626 if nr_neighbors > LAVD_CPDOM_MAX_NR as u8 {
627 panic!("The processor topology is too complex to handle in BPF.");
628 }
629 skel.maps.bss_data.cpdom_ctxs[v.cpdom_id].nr_neighbors[k] = nr_neighbors;
630 for n in neighbors.borrow().iter() {
631 skel.maps.bss_data.cpdom_ctxs[v.cpdom_id].neighbor_bits[k] |= 0x1 << n;
632 }
633 }
634 }
635 }
636
637 fn is_powersave_mode(opts: &Opts) -> bool {
638 opts.prefer_smt_core && opts.prefer_little_core
639 }
640
641 fn init_globals(skel: &mut OpenBpfSkel, opts: &Opts, topo: &FlatTopology) {
642 skel.maps.bss_data.no_core_compaction = opts.no_core_compaction;
643 skel.maps.bss_data.no_freq_scaling = opts.no_freq_scaling;
644 skel.maps.bss_data.no_prefer_turbo_core = opts.no_prefer_turbo_core;
645 skel.maps.bss_data.is_powersave_mode = Self::is_powersave_mode(&opts);
646 skel.maps.rodata_data.nr_cpu_ids = *NR_CPU_IDS as u64;
647 skel.maps.rodata_data.is_smt_active = topo.smt_enabled;
648 skel.maps.rodata_data.is_autopilot_on = opts.autopilot;
649 skel.maps.rodata_data.verbose = opts.verbose;
650 skel.maps.rodata_data.slice_max_ns = opts.slice_max_us * 1000;
651 skel.maps.rodata_data.slice_min_ns = opts.slice_min_us * 1000;
652
653 skel.struct_ops.lavd_ops_mut().flags = *compat::SCX_OPS_ALLOW_QUEUED_WAKEUP
654 | *compat::SCX_OPS_ENQ_EXITING
655 | *compat::SCX_OPS_ENQ_LAST
656 | *compat::SCX_OPS_ENQ_MIGRATION_DISABLED
657 | *compat::SCX_OPS_KEEP_BUILTIN_IDLE;
658 }
659
660 fn get_msg_seq_id() -> u64 {
661 static mut MSEQ: u64 = 0;
662 unsafe {
663 MSEQ += 1;
664 MSEQ
665 }
666 }
667
668 fn relay_introspec(data: &[u8], intrspc_tx: &Sender<SchedSample>) -> i32 {
669 let mt = msg_task_ctx::from_bytes(data);
670 let tx = mt.taskc_x;
671 let tc = mt.taskc;
672
673 if mt.hdr.kind != LAVD_MSG_TASKC {
675 return 0;
676 }
677
678 let mseq = Scheduler::get_msg_seq_id();
679
680 let c_tx_cm: *const c_char = (&tx.comm as *const [c_char; 17]) as *const c_char;
681 let c_tx_cm_str: &CStr = unsafe { CStr::from_ptr(c_tx_cm) };
682 let tx_comm: &str = c_tx_cm_str.to_str().unwrap();
683
684 let c_tx_st: *const c_char = (&tx.stat as *const [c_char; 5]) as *const c_char;
685 let c_tx_st_str: &CStr = unsafe { CStr::from_ptr(c_tx_st) };
686 let tx_stat: &str = c_tx_st_str.to_str().unwrap();
687
688 match intrspc_tx.try_send(SchedSample {
689 mseq,
690 pid: tx.pid,
691 comm: tx_comm.into(),
692 stat: tx_stat.into(),
693 cpu_id: tx.cpu_id,
694 slice_ns: tc.slice_ns,
695 lat_cri: tc.lat_cri,
696 avg_lat_cri: tx.avg_lat_cri,
697 static_prio: tx.static_prio,
698 slice_boost_prio: tc.slice_boost_prio,
699 run_freq: tc.run_freq,
700 avg_runtime: tc.avg_runtime,
701 wait_freq: tc.wait_freq,
702 wake_freq: tc.wake_freq,
703 perf_cri: tc.perf_cri,
704 thr_perf_cri: tx.thr_perf_cri,
705 cpuperf_cur: tx.cpuperf_cur,
706 cpu_util: tx.cpu_util,
707 cpu_sutil: tx.cpu_sutil,
708 nr_active: tx.nr_active,
709 }) {
710 Ok(()) | Err(TrySendError::Full(_)) => 0,
711 Err(e) => panic!("failed to send on intrspc_tx ({})", &e),
712 }
713 }
714
715 fn prep_introspec(&mut self) {
716 self.skel.maps.bss_data.intrspc.cmd = self.intrspc.cmd;
717 self.skel.maps.bss_data.intrspc.arg = self.intrspc.arg;
718 }
719
720 fn cleanup_introspec(&mut self) {
721 self.skel.maps.bss_data.intrspc.cmd = LAVD_CMD_NOP;
722 }
723
724 fn get_pc(x: u64, y: u64) -> f64 {
725 return 100. * x as f64 / y as f64;
726 }
727
728 fn get_power_mode(power_mode: i32) -> &'static str {
729 match power_mode as u32 {
730 LAVD_PM_PERFORMANCE => "performance",
731 LAVD_PM_BALANCED => "balanced",
732 LAVD_PM_POWERSAVE => "powersave",
733 _ => "unknown",
734 }
735 }
736
737 fn stats_req_to_res(&mut self, req: &StatsReq) -> Result<StatsRes> {
738 Ok(match req {
739 StatsReq::NewSampler(tid) => {
740 self.rb_mgr.consume().unwrap();
741 self.monitor_tid = Some(*tid);
742 StatsRes::Ack
743 }
744 StatsReq::SysStatsReq { tid } => {
745 if Some(*tid) != self.monitor_tid {
746 return Ok(StatsRes::Bye);
747 }
748 self.mseq_id += 1;
749
750 let bss_data = &self.skel.maps.bss_data;
751 let st = bss_data.sys_stat;
752
753 let mseq = self.mseq_id;
754 let nr_queued_task = st.nr_queued_task;
755 let nr_active = st.nr_active;
756 let nr_sched = st.nr_sched;
757 let pc_pc = Self::get_pc(st.nr_perf_cri, nr_sched);
758 let pc_lc = Self::get_pc(st.nr_lat_cri, nr_sched);
759 let pc_x_migration = Self::get_pc(st.nr_x_migration, nr_sched);
760 let nr_stealee = st.nr_stealee;
761 let nr_big = st.nr_big;
762 let pc_big = Self::get_pc(nr_big, nr_sched);
763 let pc_pc_on_big = Self::get_pc(st.nr_pc_on_big, nr_big);
764 let pc_lc_on_big = Self::get_pc(st.nr_lc_on_big, nr_big);
765 let power_mode = Self::get_power_mode(bss_data.power_mode);
766 let total_time = bss_data.performance_mode_ns
767 + bss_data.balanced_mode_ns
768 + bss_data.powersave_mode_ns;
769 let pc_performance = Self::get_pc(bss_data.performance_mode_ns, total_time);
770 let pc_balanced = Self::get_pc(bss_data.balanced_mode_ns, total_time);
771 let pc_powersave = Self::get_pc(bss_data.powersave_mode_ns, total_time);
772
773 StatsRes::SysStats(SysStats {
774 mseq,
775 nr_queued_task,
776 nr_active,
777 nr_sched,
778 pc_pc,
779 pc_lc,
780 pc_x_migration,
781 nr_stealee,
782 pc_big,
783 pc_pc_on_big,
784 pc_lc_on_big,
785 power_mode: power_mode.to_string(),
786 pc_performance,
787 pc_balanced,
788 pc_powersave,
789 })
790 }
791 StatsReq::SchedSamplesNr {
792 tid,
793 nr_samples,
794 interval_ms,
795 } => {
796 if Some(*tid) != self.monitor_tid {
797 return Ok(StatsRes::Bye);
798 }
799
800 self.intrspc.cmd = LAVD_CMD_SCHED_N;
801 self.intrspc.arg = *nr_samples;
802 self.prep_introspec();
803 std::thread::sleep(Duration::from_millis(*interval_ms));
804 self.rb_mgr.poll(Duration::from_millis(100)).unwrap();
805
806 let mut samples = vec![];
807 while let Ok(ts) = self.intrspc_rx.try_recv() {
808 samples.push(ts);
809 }
810
811 self.cleanup_introspec();
812
813 StatsRes::SchedSamples(SchedSamples { samples })
814 }
815 })
816 }
817
818 pub fn exited(&mut self) -> bool {
819 uei_exited!(&self.skel, uei)
820 }
821
822 fn set_power_profile(&mut self, mode: u32) -> Result<(), u32> {
823 let prog = &mut self.skel.progs.set_power_profile;
824 let mut args = power_arg {
825 power_mode: mode as c_int,
826 };
827 let input = ProgramInput {
828 context_in: Some(unsafe {
829 std::slice::from_raw_parts_mut(
830 &mut args as *mut _ as *mut u8,
831 std::mem::size_of_val(&args),
832 )
833 }),
834 ..Default::default()
835 };
836 let out = prog.test_run(input).unwrap();
837 if out.return_value != 0 {
838 return Err(out.return_value);
839 }
840
841 Ok(())
842 }
843
844 fn update_power_profile(&mut self, prev_profile: PowerProfile) -> (bool, PowerProfile) {
845 let profile = fetch_power_profile(false);
846 if profile == prev_profile {
847 return (true, profile);
849 }
850
851 let _ = match profile {
852 PowerProfile::Performance => self.set_power_profile(LAVD_PM_PERFORMANCE),
853 PowerProfile::Balanced => self.set_power_profile(LAVD_PM_BALANCED),
854 PowerProfile::Powersave => self.set_power_profile(LAVD_PM_POWERSAVE),
855 PowerProfile::Unknown => {
856 return (false, profile);
859 }
860 };
861
862 info!("Set the scheduler's power profile to {profile} mode.");
863 (true, profile)
864 }
865
866 fn run(&mut self, opts: &Opts, shutdown: Arc<AtomicBool>) -> Result<UserExitInfo> {
867 let (res_ch, req_ch) = self.stats_server.channels();
868 let mut autopower = opts.autopower;
869 let mut profile = PowerProfile::Unknown;
870
871 if opts.performance {
872 let _ = self.set_power_profile(LAVD_PM_PERFORMANCE);
873 } else if opts.powersave {
874 let _ = self.set_power_profile(LAVD_PM_POWERSAVE);
875 } else {
876 let _ = self.set_power_profile(LAVD_PM_BALANCED);
877 }
878
879 while !shutdown.load(Ordering::Relaxed) && !self.exited() {
880 if autopower {
881 (autopower, profile) = self.update_power_profile(profile);
882 }
883
884 match req_ch.recv_timeout(Duration::from_secs(1)) {
885 Ok(req) => {
886 let res = self.stats_req_to_res(&req)?;
887 res_ch.send(res)?;
888 }
889 Err(RecvTimeoutError::Timeout) => {}
890 Err(e) => Err(e)?,
891 }
892 self.cleanup_introspec();
893 }
894 self.rb_mgr.consume().unwrap();
895
896 self.struct_ops.take();
897 uei_report!(&self.skel, uei)
898 }
899}
900
901impl Drop for Scheduler<'_> {
902 fn drop(&mut self) {
903 if let Some(struct_ops) = self.struct_ops.take() {
904 drop(struct_ops);
905 }
906 }
907}
908
909fn init_log(opts: &Opts) {
910 let llv = match opts.verbose {
911 0 => simplelog::LevelFilter::Info,
912 1 => simplelog::LevelFilter::Debug,
913 _ => simplelog::LevelFilter::Trace,
914 };
915 let mut lcfg = simplelog::ConfigBuilder::new();
916 lcfg.set_time_level(simplelog::LevelFilter::Error)
917 .set_location_level(simplelog::LevelFilter::Off)
918 .set_target_level(simplelog::LevelFilter::Off)
919 .set_thread_level(simplelog::LevelFilter::Off);
920 simplelog::TermLogger::init(
921 llv,
922 lcfg.build(),
923 simplelog::TerminalMode::Stderr,
924 simplelog::ColorChoice::Auto,
925 )
926 .unwrap();
927}
928
929fn main() -> Result<()> {
930 let mut opts = Opts::parse();
931
932 if opts.version {
933 println!(
934 "scx_lavd {}",
935 build_id::full_version(env!("CARGO_PKG_VERSION"))
936 );
937 return Ok(());
938 }
939
940 if opts.help_stats {
941 let sys_stats_meta_name = SysStats::meta().name;
942 let sched_sample_meta_name = SchedSample::meta().name;
943 let stats_meta_names: &[&str] = &[
944 sys_stats_meta_name.as_str(),
945 sched_sample_meta_name.as_str(),
946 ];
947 stats::server_data(0).describe_meta(&mut std::io::stdout(), Some(&stats_meta_names))?;
948 return Ok(());
949 }
950
951 init_log(&opts);
952
953 opts.proc().unwrap();
954 debug!("{:#?}", opts);
955
956 let shutdown = Arc::new(AtomicBool::new(false));
957 let shutdown_clone = shutdown.clone();
958 ctrlc::set_handler(move || {
959 shutdown_clone.store(true, Ordering::Relaxed);
960 })
961 .context("Error setting Ctrl-C handler")?;
962
963 if let Some(nr_samples) = opts.monitor_sched_samples {
964 let shutdown_copy = shutdown.clone();
965 let jh = std::thread::spawn(move || {
966 stats::monitor_sched_samples(nr_samples, shutdown_copy).unwrap()
967 });
968 let _ = jh.join();
969 return Ok(());
970 }
971
972 if let Some(intv) = opts.monitor.or(opts.stats) {
973 let shutdown_copy = shutdown.clone();
974 let jh = std::thread::spawn(move || {
975 stats::monitor(Duration::from_secs_f64(intv), shutdown_copy).unwrap()
976 });
977 if opts.monitor.is_some() {
978 let _ = jh.join();
979 return Ok(());
980 }
981 }
982
983 let mut open_object = MaybeUninit::uninit();
984 loop {
985 let mut sched = Scheduler::init(&opts, &mut open_object)?;
986 info!(
987 "scx_lavd scheduler is initialized (build ID: {})",
988 build_id::full_version(env!("CARGO_PKG_VERSION"))
989 );
990 info!("scx_lavd scheduler starts running.");
991 if !sched.run(&opts, shutdown.clone())?.should_restart() {
992 break;
993 }
994 }
995
996 Ok(())
997}