1mod bpf_skel;
10pub use bpf_skel::*;
11pub mod bpf_intf;
12pub use bpf_intf::*;
13
14mod cpu_order;
15use scx_utils::init_libbpf_logging;
16mod stats;
17use std::ffi::c_int;
18use std::ffi::CStr;
19use std::mem;
20use std::mem::MaybeUninit;
21use std::str;
22use std::sync::atomic::AtomicBool;
23use std::sync::atomic::Ordering;
24use std::sync::Arc;
25use std::thread::ThreadId;
26use std::time::Duration;
27
28use anyhow::Context;
29use anyhow::Result;
30use clap::Parser;
31use clap_num::number_range;
32use cpu_order::CpuOrder;
33use cpu_order::PerfCpuOrder;
34use crossbeam::channel;
35use crossbeam::channel::Receiver;
36use crossbeam::channel::RecvTimeoutError;
37use crossbeam::channel::Sender;
38use crossbeam::channel::TrySendError;
39use libbpf_rs::skel::Skel;
40use libbpf_rs::OpenObject;
41use libbpf_rs::PrintLevel;
42use libbpf_rs::ProgramInput;
43use libc::c_char;
44use log::debug;
45use log::info;
46use plain::Plain;
47use scx_arena::ArenaLib;
48use scx_stats::prelude::*;
49use scx_utils::autopower::{fetch_power_profile, PowerProfile};
50use scx_utils::build_id;
51use scx_utils::compat;
52use scx_utils::libbpf_clap_opts::LibbpfOpts;
53use scx_utils::scx_ops_attach;
54use scx_utils::scx_ops_load;
55use scx_utils::scx_ops_open;
56use scx_utils::try_set_rlimit_infinity;
57use scx_utils::uei_exited;
58use scx_utils::uei_report;
59use scx_utils::EnergyModel;
60use scx_utils::TopologyArgs;
61use scx_utils::UserExitInfo;
62use scx_utils::NR_CPU_IDS;
63use stats::SchedSample;
64use stats::SchedSamples;
65use stats::StatsReq;
66use stats::StatsRes;
67use stats::SysStats;
68
69const SCHEDULER_NAME: &str = "scx_lavd";
70#[derive(Debug, Parser)]
76struct Opts {
77 #[clap(long = "autopilot", action = clap::ArgAction::SetTrue)]
84 autopilot: bool,
85
86 #[clap(long = "autopower", action = clap::ArgAction::SetTrue)]
93 autopower: bool,
94
95 #[clap(long = "performance", action = clap::ArgAction::SetTrue)]
100 performance: bool,
101
102 #[clap(long = "powersave", action = clap::ArgAction::SetTrue)]
107 powersave: bool,
108
109 #[clap(long = "balanced", action = clap::ArgAction::SetTrue)]
114 balanced: bool,
115
116 #[clap(long = "slice-max-us", default_value = "5000")]
118 slice_max_us: u64,
119
120 #[clap(long = "slice-min-us", default_value = "500")]
122 slice_min_us: u64,
123
124 #[clap(long = "mig-delta-pct", default_value = "0", value_parser=Opts::mig_delta_pct_range)]
132 mig_delta_pct: u8,
133
134 #[clap(long = "pinned-slice-us")]
140 pinned_slice_us: Option<u64>,
141
142 #[clap(long = "preempt-shift", default_value = "6", value_parser=Opts::preempt_shift_range)]
147 preempt_shift: u8,
148
149 #[clap(long = "cpu-pref-order", default_value = "")]
155 cpu_pref_order: String,
156
157 #[clap(long = "no-use-em", action = clap::ArgAction::SetTrue)]
159 no_use_em: bool,
160
161 #[clap(long = "no-futex-boost", action = clap::ArgAction::SetTrue)]
163 no_futex_boost: bool,
164
165 #[clap(long = "no-preemption", action = clap::ArgAction::SetTrue)]
167 no_preemption: bool,
168
169 #[clap(long = "no-wake-sync", action = clap::ArgAction::SetTrue)]
171 no_wake_sync: bool,
172
173 #[clap(long = "no-slice-boost", action = clap::ArgAction::SetTrue)]
175 no_slice_boost: bool,
176
177 #[clap(long = "per-cpu-dsq", action = clap::ArgAction::SetTrue)]
183 per_cpu_dsq: bool,
184
185 #[clap(long = "enable-cpu-bw", action = clap::ArgAction::SetTrue)]
188 enable_cpu_bw: bool,
189
190 #[clap(long = "no-core-compaction", action = clap::ArgAction::SetTrue)]
200 no_core_compaction: bool,
201
202 #[clap(long = "no-freq-scaling", action = clap::ArgAction::SetTrue)]
204 no_freq_scaling: bool,
205
206 #[clap(long)]
208 stats: Option<f64>,
209
210 #[clap(long)]
212 monitor: Option<f64>,
213
214 #[clap(long)]
217 monitor_sched_samples: Option<u64>,
218
219 #[clap(short = 'v', long, action = clap::ArgAction::Count)]
222 verbose: u8,
223
224 #[clap(short = 'V', long, action = clap::ArgAction::SetTrue)]
226 version: bool,
227
228 #[clap(long)]
230 help_stats: bool,
231
232 #[clap(flatten, next_help_heading = "Libbpf Options")]
233 pub libbpf: LibbpfOpts,
234
235 #[clap(flatten)]
237 topology: Option<TopologyArgs>,
238}
239
240impl Opts {
241 fn can_autopilot(&self) -> bool {
242 self.autopower == false
243 && self.performance == false
244 && self.powersave == false
245 && self.balanced == false
246 && self.no_core_compaction == false
247 }
248
249 fn can_autopower(&self) -> bool {
250 self.autopilot == false
251 && self.performance == false
252 && self.powersave == false
253 && self.balanced == false
254 && self.no_core_compaction == false
255 }
256
257 fn can_performance(&self) -> bool {
258 self.autopilot == false
259 && self.autopower == false
260 && self.powersave == false
261 && self.balanced == false
262 }
263
264 fn can_balanced(&self) -> bool {
265 self.autopilot == false
266 && self.autopower == false
267 && self.performance == false
268 && self.powersave == false
269 && self.no_core_compaction == false
270 }
271
272 fn can_powersave(&self) -> bool {
273 self.autopilot == false
274 && self.autopower == false
275 && self.performance == false
276 && self.balanced == false
277 && self.no_core_compaction == false
278 }
279
280 fn proc(&mut self) -> Option<&mut Self> {
281 if !self.autopilot {
282 self.autopilot = self.can_autopilot();
283 }
284
285 if self.autopilot {
286 if !self.can_autopilot() {
287 info!("Autopilot mode cannot be used with conflicting options.");
288 return None;
289 }
290 info!("Autopilot mode is enabled.");
291 }
292
293 if self.autopower {
294 if !self.can_autopower() {
295 info!("Autopower mode cannot be used with conflicting options.");
296 return None;
297 }
298 info!("Autopower mode is enabled.");
299 }
300
301 if self.performance {
302 if !self.can_performance() {
303 info!("Performance mode cannot be used with conflicting options.");
304 return None;
305 }
306 info!("Performance mode is enabled.");
307 self.no_core_compaction = true;
308 }
309
310 if self.powersave {
311 if !self.can_powersave() {
312 info!("Powersave mode cannot be used with conflicting options.");
313 return None;
314 }
315 info!("Powersave mode is enabled.");
316 self.no_core_compaction = false;
317 }
318
319 if self.balanced {
320 if !self.can_balanced() {
321 info!("Balanced mode cannot be used with conflicting options.");
322 return None;
323 }
324 info!("Balanced mode is enabled.");
325 self.no_core_compaction = false;
326 }
327
328 if !EnergyModel::has_energy_model() || !self.cpu_pref_order.is_empty() {
329 self.no_use_em = true;
330 info!("Energy model won't be used for CPU preference order.");
331 }
332
333 if let Some(pinned_slice) = self.pinned_slice_us {
334 if pinned_slice < self.slice_min_us || pinned_slice > self.slice_max_us {
335 info!(
336 "pinned-slice-us ({}) must be between slice-min-us ({}) and slice-max-us ({})",
337 pinned_slice, self.slice_min_us, self.slice_max_us
338 );
339 return None;
340 }
341 info!(
342 "Pinned task slice mode is enabled ({} us). Pinned tasks will use per-CPU DSQs.",
343 pinned_slice
344 );
345 }
346
347 Some(self)
348 }
349
350 fn preempt_shift_range(s: &str) -> Result<u8, String> {
351 number_range(s, 0, 10)
352 }
353
354 fn mig_delta_pct_range(s: &str) -> Result<u8, String> {
355 number_range(s, 0, 100)
356 }
357}
358
359unsafe impl Plain for msg_task_ctx {}
360
361impl msg_task_ctx {
362 fn from_bytes(buf: &[u8]) -> &msg_task_ctx {
363 plain::from_bytes(buf).expect("The buffer is either too short or not aligned!")
364 }
365}
366
367impl introspec {
368 fn new() -> Self {
369 let intrspc = unsafe { mem::MaybeUninit::<introspec>::zeroed().assume_init() };
370 intrspc
371 }
372}
373
374struct Scheduler<'a> {
375 skel: BpfSkel<'a>,
376 struct_ops: Option<libbpf_rs::Link>,
377 rb_mgr: libbpf_rs::RingBuffer<'static>,
378 intrspc: introspec,
379 intrspc_rx: Receiver<SchedSample>,
380 monitor_tid: Option<ThreadId>,
381 stats_server: StatsServer<StatsReq, StatsRes>,
382 mseq_id: u64,
383}
384
385impl<'a> Scheduler<'a> {
386 fn init(opts: &'a Opts, open_object: &'a mut MaybeUninit<OpenObject>) -> Result<Self> {
387 if *NR_CPU_IDS > LAVD_CPU_ID_MAX as usize {
388 panic!(
389 "Num possible CPU IDs ({}) exceeds maximum of ({})",
390 *NR_CPU_IDS, LAVD_CPU_ID_MAX
391 );
392 }
393
394 try_set_rlimit_infinity();
395
396 let mut skel_builder = BpfSkelBuilder::default();
398 skel_builder.obj_builder.debug(opts.verbose > 0);
399 init_libbpf_logging(Some(PrintLevel::Debug));
400
401 let open_opts = opts.libbpf.clone().into_bpf_open_opts();
402 let mut skel = scx_ops_open!(skel_builder, open_object, lavd_ops, open_opts)?;
403
404 if !opts.no_futex_boost {
407 if Self::attach_futex_ftraces(&mut skel)? == false {
408 info!("Fail to attach futex ftraces. Try with tracepoints.");
409 if Self::attach_futex_tracepoints(&mut skel)? == false {
410 info!("Fail to attach futex tracepoints.");
411 }
412 }
413 }
414
415 let order = CpuOrder::new(opts.topology.as_ref()).unwrap();
417 Self::init_cpus(&mut skel, &order);
418 Self::init_cpdoms(&mut skel, &order);
419
420 Self::init_globals(&mut skel, &opts, &order);
422
423 let mut skel = scx_ops_load!(skel, lavd_ops, uei)?;
425 let task_size = std::mem::size_of::<types::task_ctx>();
426 let arenalib = ArenaLib::init(skel.object_mut(), task_size, *NR_CPU_IDS)?;
427 arenalib.setup()?;
428
429 let struct_ops = Some(scx_ops_attach!(skel, lavd_ops)?);
431 let stats_server = StatsServer::new(stats::server_data(*NR_CPU_IDS as u64)).launch()?;
432
433 let (intrspc_tx, intrspc_rx) = channel::bounded(65536);
435 let rb_map = &mut skel.maps.introspec_msg;
436 let mut builder = libbpf_rs::RingBufferBuilder::new();
437 builder
438 .add(rb_map, move |data| {
439 Scheduler::relay_introspec(data, &intrspc_tx)
440 })
441 .unwrap();
442 let rb_mgr = builder.build().unwrap();
443
444 Ok(Self {
445 skel,
446 struct_ops,
447 rb_mgr,
448 intrspc: introspec::new(),
449 intrspc_rx,
450 monitor_tid: None,
451 stats_server,
452 mseq_id: 0,
453 })
454 }
455
456 fn attach_futex_ftraces(skel: &mut OpenBpfSkel) -> Result<bool> {
457 let ftraces = vec![
458 ("__futex_wait", &skel.progs.fexit___futex_wait),
459 ("futex_wait_multiple", &skel.progs.fexit_futex_wait_multiple),
460 (
461 "futex_wait_requeue_pi",
462 &skel.progs.fexit_futex_wait_requeue_pi,
463 ),
464 ("futex_wake", &skel.progs.fexit_futex_wake),
465 ("futex_wake_op", &skel.progs.fexit_futex_wake_op),
466 ("futex_lock_pi", &skel.progs.fexit_futex_lock_pi),
467 ("futex_unlock_pi", &skel.progs.fexit_futex_unlock_pi),
468 ];
469
470 if compat::tracer_available("function")? == false {
471 info!("Ftrace is not enabled in the kernel.");
472 return Ok(false);
473 }
474
475 compat::cond_kprobes_enable(ftraces)
476 }
477
478 fn attach_futex_tracepoints(skel: &mut OpenBpfSkel) -> Result<bool> {
479 let tracepoints = vec![
480 ("syscalls:sys_enter_futex", &skel.progs.rtp_sys_enter_futex),
481 ("syscalls:sys_exit_futex", &skel.progs.rtp_sys_exit_futex),
482 (
483 "syscalls:sys_exit_futex_wait",
484 &skel.progs.rtp_sys_exit_futex_wait,
485 ),
486 (
487 "syscalls:sys_exit_futex_waitv",
488 &skel.progs.rtp_sys_exit_futex_waitv,
489 ),
490 (
491 "syscalls:sys_exit_futex_wake",
492 &skel.progs.rtp_sys_exit_futex_wake,
493 ),
494 ];
495
496 compat::cond_tracepoints_enable(tracepoints)
497 }
498
499 fn init_cpus(skel: &mut OpenBpfSkel, order: &CpuOrder) {
500 debug!("{:#?}", order);
501
502 for cpu in order.cpuids.iter() {
504 skel.maps.rodata_data.as_mut().unwrap().cpu_capacity[cpu.cpu_adx] = cpu.cpu_cap as u16;
505 skel.maps.rodata_data.as_mut().unwrap().cpu_big[cpu.cpu_adx] = cpu.big_core as u8;
506 skel.maps.rodata_data.as_mut().unwrap().cpu_turbo[cpu.cpu_adx] = cpu.turbo_core as u8;
507 skel.maps.rodata_data.as_mut().unwrap().cpu_sibling[cpu.cpu_adx] =
508 cpu.cpu_sibling as u32;
509 }
510
511 let nr_pco_states: u8 = order.perf_cpu_order.len() as u8;
513 if nr_pco_states > LAVD_PCO_STATE_MAX as u8 {
514 panic!("Generated performance vs. CPU order stats are too complex ({nr_pco_states}) to handle");
515 }
516
517 skel.maps.rodata_data.as_mut().unwrap().nr_pco_states = nr_pco_states;
518 for (i, (_, pco)) in order.perf_cpu_order.iter().enumerate() {
519 Self::init_pco_tuple(skel, i, &pco);
520 info!("{:#}", pco);
521 }
522
523 let (_, last_pco) = order.perf_cpu_order.last_key_value().unwrap();
524 for i in nr_pco_states..LAVD_PCO_STATE_MAX as u8 {
525 Self::init_pco_tuple(skel, i as usize, &last_pco);
526 }
527 }
528
529 fn init_pco_tuple(skel: &mut OpenBpfSkel, i: usize, pco: &PerfCpuOrder) {
530 let cpus_perf = pco.cpus_perf.borrow();
531 let cpus_ovflw = pco.cpus_ovflw.borrow();
532 let pco_nr_primary = cpus_perf.len();
533
534 skel.maps.rodata_data.as_mut().unwrap().pco_bounds[i] = pco.perf_cap as u32;
535 skel.maps.rodata_data.as_mut().unwrap().pco_nr_primary[i] = pco_nr_primary as u16;
536
537 for (j, &cpu_adx) in cpus_perf.iter().enumerate() {
538 skel.maps.rodata_data.as_mut().unwrap().pco_table[i][j] = cpu_adx as u16;
539 }
540
541 for (j, &cpu_adx) in cpus_ovflw.iter().enumerate() {
542 let k = j + pco_nr_primary;
543 skel.maps.rodata_data.as_mut().unwrap().pco_table[i][k] = cpu_adx as u16;
544 }
545 }
546
547 fn init_cpdoms(skel: &mut OpenBpfSkel, order: &CpuOrder) {
548 for (k, v) in order.cpdom_map.iter() {
550 skel.maps.bss_data.as_mut().unwrap().cpdom_ctxs[v.cpdom_id].id = v.cpdom_id as u64;
551 skel.maps.bss_data.as_mut().unwrap().cpdom_ctxs[v.cpdom_id].alt_id =
552 v.cpdom_alt_id.get() as u64;
553 skel.maps.bss_data.as_mut().unwrap().cpdom_ctxs[v.cpdom_id].numa_id = k.numa_adx as u8;
554 skel.maps.bss_data.as_mut().unwrap().cpdom_ctxs[v.cpdom_id].llc_id = k.llc_adx as u8;
555 skel.maps.bss_data.as_mut().unwrap().cpdom_ctxs[v.cpdom_id].is_big = k.is_big as u8;
556 skel.maps.bss_data.as_mut().unwrap().cpdom_ctxs[v.cpdom_id].is_valid = 1;
557 for cpu_id in v.cpu_ids.iter() {
558 let i = cpu_id / 64;
559 let j = cpu_id % 64;
560 skel.maps.bss_data.as_mut().unwrap().cpdom_ctxs[v.cpdom_id].__cpumask[i] |=
561 0x01 << j;
562 }
563
564 if v.neighbor_map.borrow().iter().len() > LAVD_CPDOM_MAX_DIST as usize {
565 panic!("The processor topology is too complex to handle in BPF.");
566 }
567
568 for (k, (_d, neighbors)) in v.neighbor_map.borrow().iter().enumerate() {
569 let nr_neighbors = neighbors.borrow().len() as u8;
570 if nr_neighbors > LAVD_CPDOM_MAX_NR as u8 {
571 panic!("The processor topology is too complex to handle in BPF.");
572 }
573 skel.maps.bss_data.as_mut().unwrap().cpdom_ctxs[v.cpdom_id].nr_neighbors[k] =
574 nr_neighbors;
575 for n in neighbors.borrow().iter() {
576 skel.maps.bss_data.as_mut().unwrap().cpdom_ctxs[v.cpdom_id].neighbor_bits[k] |=
577 0x1 << n;
578 }
579 }
580 }
581 }
582
583 fn init_globals(skel: &mut OpenBpfSkel, opts: &Opts, order: &CpuOrder) {
584 let bss_data = skel.maps.bss_data.as_mut().unwrap();
585 bss_data.no_preemption = opts.no_preemption;
586 bss_data.no_core_compaction = opts.no_core_compaction;
587 bss_data.no_freq_scaling = opts.no_freq_scaling;
588 bss_data.is_powersave_mode = opts.powersave;
589 let rodata = skel.maps.rodata_data.as_mut().unwrap();
590 rodata.nr_llcs = order.nr_llcs as u64;
591 rodata.nr_cpu_ids = *NR_CPU_IDS as u32;
592 rodata.is_smt_active = order.smt_enabled;
593 rodata.is_autopilot_on = opts.autopilot;
594 rodata.verbose = opts.verbose;
595 rodata.slice_max_ns = opts.slice_max_us * 1000;
596 rodata.slice_min_ns = opts.slice_min_us * 1000;
597 rodata.pinned_slice_ns = opts.pinned_slice_us.map(|v| v * 1000).unwrap_or(0);
598 rodata.preempt_shift = opts.preempt_shift;
599 rodata.mig_delta_pct = opts.mig_delta_pct;
600 rodata.no_use_em = opts.no_use_em as u8;
601 rodata.no_wake_sync = opts.no_wake_sync;
602 rodata.no_slice_boost = opts.no_slice_boost;
603 rodata.per_cpu_dsq = opts.per_cpu_dsq;
604 rodata.enable_cpu_bw = opts.enable_cpu_bw;
605
606 skel.struct_ops.lavd_ops_mut().flags = *compat::SCX_OPS_ENQ_EXITING
607 | *compat::SCX_OPS_ENQ_LAST
608 | *compat::SCX_OPS_ENQ_MIGRATION_DISABLED
609 | *compat::SCX_OPS_KEEP_BUILTIN_IDLE;
610 }
611
612 fn get_msg_seq_id() -> u64 {
613 static mut MSEQ: u64 = 0;
614 unsafe {
615 MSEQ += 1;
616 MSEQ
617 }
618 }
619
620 fn relay_introspec(data: &[u8], intrspc_tx: &Sender<SchedSample>) -> i32 {
621 let mt = msg_task_ctx::from_bytes(data);
622 let tx = mt.taskc_x;
623 let tc = mt.taskc;
624
625 if mt.hdr.kind != LAVD_MSG_TASKC {
627 return 0;
628 }
629
630 let mseq = Scheduler::get_msg_seq_id();
631
632 let c_tx_cm: *const c_char = (&tx.comm as *const [c_char; 17]) as *const c_char;
633 let c_tx_cm_str: &CStr = unsafe { CStr::from_ptr(c_tx_cm) };
634 let tx_comm: &str = c_tx_cm_str.to_str().unwrap();
635
636 let c_waker_cm: *const c_char = (&tc.waker_comm as *const [c_char; 17]) as *const c_char;
637 let c_waker_cm_str: &CStr = unsafe { CStr::from_ptr(c_waker_cm) };
638 let waker_comm: &str = c_waker_cm_str.to_str().unwrap();
639
640 let c_tx_st: *const c_char = (&tx.stat as *const [c_char; 5]) as *const c_char;
641 let c_tx_st_str: &CStr = unsafe { CStr::from_ptr(c_tx_st) };
642 let tx_stat: &str = c_tx_st_str.to_str().unwrap();
643
644 match intrspc_tx.try_send(SchedSample {
645 mseq,
646 pid: tc.pid,
647 comm: tx_comm.into(),
648 stat: tx_stat.into(),
649 cpu_id: tc.cpu_id,
650 prev_cpu_id: tc.prev_cpu_id,
651 suggested_cpu_id: tc.suggested_cpu_id,
652 waker_pid: tc.waker_pid,
653 waker_comm: waker_comm.into(),
654 slice: tc.slice,
655 lat_cri: tc.lat_cri,
656 avg_lat_cri: tx.avg_lat_cri,
657 static_prio: tx.static_prio,
658 rerunnable_interval: tx.rerunnable_interval,
659 resched_interval: tc.resched_interval,
660 run_freq: tc.run_freq,
661 avg_runtime: tc.avg_runtime,
662 wait_freq: tc.wait_freq,
663 wake_freq: tc.wake_freq,
664 perf_cri: tc.perf_cri,
665 thr_perf_cri: tx.thr_perf_cri,
666 cpuperf_cur: tx.cpuperf_cur,
667 cpu_util: tx.cpu_util,
668 cpu_sutil: tx.cpu_sutil,
669 nr_active: tx.nr_active,
670 dsq_id: tx.dsq_id,
671 dsq_consume_lat: tx.dsq_consume_lat,
672 slice_used: tc.last_slice_used,
673 }) {
674 Ok(()) | Err(TrySendError::Full(_)) => 0,
675 Err(e) => panic!("failed to send on intrspc_tx ({})", e),
676 }
677 }
678
679 fn prep_introspec(&mut self) {
680 if !self.skel.maps.bss_data.as_ref().unwrap().is_monitored {
681 self.skel.maps.bss_data.as_mut().unwrap().is_monitored = true;
682 }
683 self.skel.maps.bss_data.as_mut().unwrap().intrspc.cmd = self.intrspc.cmd;
684 self.skel.maps.bss_data.as_mut().unwrap().intrspc.arg = self.intrspc.arg;
685 }
686
687 fn cleanup_introspec(&mut self) {
688 self.skel.maps.bss_data.as_mut().unwrap().intrspc.cmd = LAVD_CMD_NOP;
689 }
690
691 fn get_pc(x: u64, y: u64) -> f64 {
692 return 100. * x as f64 / y as f64;
693 }
694
695 fn get_power_mode(power_mode: i32) -> &'static str {
696 match power_mode as u32 {
697 LAVD_PM_PERFORMANCE => "performance",
698 LAVD_PM_BALANCED => "balanced",
699 LAVD_PM_POWERSAVE => "powersave",
700 _ => "unknown",
701 }
702 }
703
704 fn stats_req_to_res(&mut self, req: &StatsReq) -> Result<StatsRes> {
705 Ok(match req {
706 StatsReq::NewSampler(tid) => {
707 self.rb_mgr.consume().unwrap();
708 self.monitor_tid = Some(*tid);
709 StatsRes::Ack
710 }
711 StatsReq::SysStatsReq { tid } => {
712 if Some(*tid) != self.monitor_tid {
713 return Ok(StatsRes::Bye);
714 }
715 self.mseq_id += 1;
716
717 let bss_data = self.skel.maps.bss_data.as_ref().unwrap();
718 let st = bss_data.sys_stat;
719
720 let mseq = self.mseq_id;
721 let nr_queued_task = st.nr_queued_task;
722 let nr_active = st.nr_active;
723 let nr_sched = st.nr_sched;
724 let nr_preempt = st.nr_preempt;
725 let pc_pc = Self::get_pc(st.nr_perf_cri, nr_sched);
726 let pc_lc = Self::get_pc(st.nr_lat_cri, nr_sched);
727 let pc_x_migration = Self::get_pc(st.nr_x_migration, nr_sched);
728 let nr_stealee = st.nr_stealee;
729 let nr_big = st.nr_big;
730 let pc_big = Self::get_pc(nr_big, nr_sched);
731 let pc_pc_on_big = Self::get_pc(st.nr_pc_on_big, nr_big);
732 let pc_lc_on_big = Self::get_pc(st.nr_lc_on_big, nr_big);
733 let power_mode = Self::get_power_mode(bss_data.power_mode);
734 let total_time = bss_data.performance_mode_ns
735 + bss_data.balanced_mode_ns
736 + bss_data.powersave_mode_ns;
737 let pc_performance = Self::get_pc(bss_data.performance_mode_ns, total_time);
738 let pc_balanced = Self::get_pc(bss_data.balanced_mode_ns, total_time);
739 let pc_powersave = Self::get_pc(bss_data.powersave_mode_ns, total_time);
740
741 StatsRes::SysStats(SysStats {
742 mseq,
743 nr_queued_task,
744 nr_active,
745 nr_sched,
746 nr_preempt,
747 pc_pc,
748 pc_lc,
749 pc_x_migration,
750 nr_stealee,
751 pc_big,
752 pc_pc_on_big,
753 pc_lc_on_big,
754 power_mode: power_mode.to_string(),
755 pc_performance,
756 pc_balanced,
757 pc_powersave,
758 })
759 }
760 StatsReq::SchedSamplesNr {
761 tid,
762 nr_samples,
763 interval_ms,
764 } => {
765 if Some(*tid) != self.monitor_tid {
766 return Ok(StatsRes::Bye);
767 }
768
769 self.intrspc.cmd = LAVD_CMD_SCHED_N;
770 self.intrspc.arg = *nr_samples;
771 self.prep_introspec();
772 std::thread::sleep(Duration::from_millis(*interval_ms));
773 self.rb_mgr.poll(Duration::from_millis(100)).unwrap();
774
775 let mut samples = vec![];
776 while let Ok(ts) = self.intrspc_rx.try_recv() {
777 samples.push(ts);
778 }
779
780 self.cleanup_introspec();
781
782 StatsRes::SchedSamples(SchedSamples { samples })
783 }
784 })
785 }
786
787 fn stop_monitoring(&mut self) {
788 if self.skel.maps.bss_data.as_ref().unwrap().is_monitored {
789 self.skel.maps.bss_data.as_mut().unwrap().is_monitored = false;
790 }
791 }
792
793 pub fn exited(&mut self) -> bool {
794 uei_exited!(&self.skel, uei)
795 }
796
797 fn set_power_profile(&mut self, mode: u32) -> Result<(), u32> {
798 let prog = &mut self.skel.progs.set_power_profile;
799 let mut args = power_arg {
800 power_mode: mode as c_int,
801 };
802 let input = ProgramInput {
803 context_in: Some(unsafe {
804 std::slice::from_raw_parts_mut(
805 &mut args as *mut _ as *mut u8,
806 std::mem::size_of_val(&args),
807 )
808 }),
809 ..Default::default()
810 };
811 let out = prog.test_run(input).unwrap();
812 if out.return_value != 0 {
813 return Err(out.return_value);
814 }
815
816 Ok(())
817 }
818
819 fn update_power_profile(&mut self, prev_profile: PowerProfile) -> (bool, PowerProfile) {
820 let profile = fetch_power_profile(false);
821 if profile == prev_profile {
822 return (true, profile);
824 }
825
826 let _ = match profile {
827 PowerProfile::Performance => self.set_power_profile(LAVD_PM_PERFORMANCE),
828 PowerProfile::Balanced { .. } => self.set_power_profile(LAVD_PM_BALANCED),
829 PowerProfile::Powersave => self.set_power_profile(LAVD_PM_POWERSAVE),
830 PowerProfile::Unknown => {
831 return (false, profile);
834 }
835 };
836
837 info!("Set the scheduler's power profile to {profile} mode.");
838 (true, profile)
839 }
840
841 fn run(&mut self, opts: &Opts, shutdown: Arc<AtomicBool>) -> Result<UserExitInfo> {
842 let (res_ch, req_ch) = self.stats_server.channels();
843 let mut autopower = opts.autopower;
844 let mut profile = PowerProfile::Unknown;
845
846 if opts.performance {
847 let _ = self.set_power_profile(LAVD_PM_PERFORMANCE);
848 } else if opts.powersave {
849 let _ = self.set_power_profile(LAVD_PM_POWERSAVE);
850 } else {
851 let _ = self.set_power_profile(LAVD_PM_BALANCED);
852 }
853
854 while !shutdown.load(Ordering::Relaxed) && !self.exited() {
855 if autopower {
856 (autopower, profile) = self.update_power_profile(profile);
857 }
858
859 match req_ch.recv_timeout(Duration::from_secs(1)) {
860 Ok(req) => {
861 let res = self.stats_req_to_res(&req)?;
862 res_ch.send(res)?;
863 }
864 Err(RecvTimeoutError::Timeout) => {
865 self.stop_monitoring();
866 }
867 Err(e) => {
868 self.stop_monitoring();
869 Err(e)?
870 }
871 }
872 self.cleanup_introspec();
873 }
874 self.rb_mgr.consume().unwrap();
875
876 let _ = self.struct_ops.take();
877 uei_report!(&self.skel, uei)
878 }
879}
880
881impl Drop for Scheduler<'_> {
882 fn drop(&mut self) {
883 info!("Unregister {SCHEDULER_NAME} scheduler");
884
885 if let Some(struct_ops) = self.struct_ops.take() {
886 drop(struct_ops);
887 }
888 }
889}
890
891fn init_log(opts: &Opts) {
892 let llv = match opts.verbose {
893 0 => simplelog::LevelFilter::Info,
894 1 => simplelog::LevelFilter::Debug,
895 _ => simplelog::LevelFilter::Trace,
896 };
897 let mut lcfg = simplelog::ConfigBuilder::new();
898 lcfg.set_time_offset_to_local()
899 .expect("Failed to set local time offset")
900 .set_time_level(simplelog::LevelFilter::Error)
901 .set_location_level(simplelog::LevelFilter::Off)
902 .set_target_level(simplelog::LevelFilter::Off)
903 .set_thread_level(simplelog::LevelFilter::Off);
904 simplelog::TermLogger::init(
905 llv,
906 lcfg.build(),
907 simplelog::TerminalMode::Stderr,
908 simplelog::ColorChoice::Auto,
909 )
910 .unwrap();
911}
912
913fn main() -> Result<()> {
914 let mut opts = Opts::parse();
915
916 if opts.version {
917 println!(
918 "scx_lavd {}",
919 build_id::full_version(env!("CARGO_PKG_VERSION"))
920 );
921 return Ok(());
922 }
923
924 if opts.help_stats {
925 let sys_stats_meta_name = SysStats::meta().name;
926 let sched_sample_meta_name = SchedSample::meta().name;
927 let stats_meta_names: &[&str] = &[
928 sys_stats_meta_name.as_str(),
929 sched_sample_meta_name.as_str(),
930 ];
931 stats::server_data(0).describe_meta(&mut std::io::stdout(), Some(&stats_meta_names))?;
932 return Ok(());
933 }
934
935 init_log(&opts);
936
937 if opts.monitor.is_none() && opts.monitor_sched_samples.is_none() {
938 opts.proc().unwrap();
939 info!("{:#?}", opts);
940 }
941
942 let shutdown = Arc::new(AtomicBool::new(false));
943 let shutdown_clone = shutdown.clone();
944 ctrlc::set_handler(move || {
945 shutdown_clone.store(true, Ordering::Relaxed);
946 })
947 .context("Error setting Ctrl-C handler")?;
948
949 if let Some(nr_samples) = opts.monitor_sched_samples {
950 let shutdown_copy = shutdown.clone();
951 let jh = std::thread::spawn(move || {
952 stats::monitor_sched_samples(nr_samples, shutdown_copy).unwrap()
953 });
954 let _ = jh.join();
955 return Ok(());
956 }
957
958 if let Some(intv) = opts.monitor.or(opts.stats) {
959 let shutdown_copy = shutdown.clone();
960 let jh = std::thread::spawn(move || {
961 stats::monitor(Duration::from_secs_f64(intv), shutdown_copy).unwrap()
962 });
963 if opts.monitor.is_some() {
964 let _ = jh.join();
965 return Ok(());
966 }
967 }
968
969 let mut open_object = MaybeUninit::uninit();
970 loop {
971 let mut sched = Scheduler::init(&opts, &mut open_object)?;
972 info!(
973 "scx_lavd scheduler is initialized (build ID: {})",
974 build_id::full_version(env!("CARGO_PKG_VERSION"))
975 );
976 info!("scx_lavd scheduler starts running.");
977 if !sched.run(&opts, shutdown.clone())?.should_restart() {
978 break;
979 }
980 }
981
982 Ok(())
983}