1mod bpf_skel;
10pub use bpf_skel::*;
11pub mod bpf_intf;
12pub use bpf_intf::*;
13
14mod cpu_order;
15use scx_utils::init_libbpf_logging;
16mod stats;
17use std::ffi::c_int;
18use std::ffi::CStr;
19use std::mem;
20use std::mem::MaybeUninit;
21use std::str;
22use std::sync::atomic::AtomicBool;
23use std::sync::atomic::Ordering;
24use std::sync::Arc;
25use std::thread::ThreadId;
26use std::time::Duration;
27
28use anyhow::Context;
29use anyhow::Result;
30use clap::Parser;
31use clap_num::number_range;
32use cpu_order::CpuOrder;
33use cpu_order::PerfCpuOrder;
34use crossbeam::channel;
35use crossbeam::channel::Receiver;
36use crossbeam::channel::RecvTimeoutError;
37use crossbeam::channel::Sender;
38use crossbeam::channel::TrySendError;
39use libbpf_rs::skel::Skel;
40use libbpf_rs::OpenObject;
41use libbpf_rs::PrintLevel;
42use libbpf_rs::ProgramInput;
43use libc::c_char;
44use plain::Plain;
45use scx_arena::ArenaLib;
46use scx_stats::prelude::*;
47use scx_utils::autopower::{fetch_power_profile, PowerProfile};
48use scx_utils::build_id;
49use scx_utils::compat;
50use scx_utils::ksym_exists;
51use scx_utils::libbpf_clap_opts::LibbpfOpts;
52use scx_utils::scx_ops_attach;
53use scx_utils::scx_ops_load;
54use scx_utils::scx_ops_open;
55use scx_utils::try_set_rlimit_infinity;
56use scx_utils::uei_exited;
57use scx_utils::uei_report;
58use scx_utils::EnergyModel;
59use scx_utils::TopologyArgs;
60use scx_utils::UserExitInfo;
61use scx_utils::NR_CPU_IDS;
62use stats::SchedSample;
63use stats::SchedSamples;
64use stats::StatsReq;
65use stats::StatsRes;
66use stats::SysStats;
67use tracing::{debug, info, warn};
68use tracing_subscriber::filter::EnvFilter;
69
70const SCHEDULER_NAME: &str = "scx_lavd";
71#[derive(Debug, Parser)]
77struct Opts {
78 #[clap(short = 'v', long, action = clap::ArgAction::Count)]
80 verbose: u8,
81
82 #[clap(long = "autopilot", action = clap::ArgAction::SetTrue)]
89 autopilot: bool,
90
91 #[clap(long = "autopower", action = clap::ArgAction::SetTrue)]
98 autopower: bool,
99
100 #[clap(long = "performance", action = clap::ArgAction::SetTrue)]
105 performance: bool,
106
107 #[clap(long = "powersave", action = clap::ArgAction::SetTrue)]
112 powersave: bool,
113
114 #[clap(long = "balanced", action = clap::ArgAction::SetTrue)]
119 balanced: bool,
120
121 #[clap(long = "slice-max-us", default_value = "5000")]
123 slice_max_us: u64,
124
125 #[clap(long = "slice-min-us", default_value = "500")]
127 slice_min_us: u64,
128
129 #[clap(long = "mig-delta-pct", default_value = "0", value_parser=Opts::mig_delta_pct_range)]
137 mig_delta_pct: u8,
138
139 #[clap(long = "pinned-slice-us")]
145 pinned_slice_us: Option<u64>,
146
147 #[clap(long = "preempt-shift", default_value = "6", value_parser=Opts::preempt_shift_range)]
152 preempt_shift: u8,
153
154 #[clap(long = "cpu-pref-order", default_value = "")]
160 cpu_pref_order: String,
161
162 #[clap(long = "no-use-em", action = clap::ArgAction::SetTrue)]
164 no_use_em: bool,
165
166 #[clap(long = "no-futex-boost", action = clap::ArgAction::SetTrue)]
168 no_futex_boost: bool,
169
170 #[clap(long = "no-preemption", action = clap::ArgAction::SetTrue)]
172 no_preemption: bool,
173
174 #[clap(long = "no-wake-sync", action = clap::ArgAction::SetTrue)]
176 no_wake_sync: bool,
177
178 #[clap(long = "no-slice-boost", action = clap::ArgAction::SetTrue)]
180 no_slice_boost: bool,
181
182 #[clap(long = "per-cpu-dsq", action = clap::ArgAction::SetTrue)]
188 per_cpu_dsq: bool,
189
190 #[clap(long = "enable-cpu-bw", action = clap::ArgAction::SetTrue)]
193 enable_cpu_bw: bool,
194
195 #[clap(long = "no-core-compaction", action = clap::ArgAction::SetTrue)]
205 no_core_compaction: bool,
206
207 #[clap(long = "no-freq-scaling", action = clap::ArgAction::SetTrue)]
209 no_freq_scaling: bool,
210
211 #[clap(long)]
213 stats: Option<f64>,
214
215 #[clap(long)]
217 monitor: Option<f64>,
218
219 #[clap(long)]
222 monitor_sched_samples: Option<u64>,
223
224 #[clap(long, default_value = "info")]
227 log_level: String,
228
229 #[clap(short = 'V', long, action = clap::ArgAction::SetTrue)]
231 version: bool,
232
233 #[clap(long)]
235 run_id: Option<u64>,
236
237 #[clap(long)]
239 help_stats: bool,
240
241 #[clap(flatten, next_help_heading = "Libbpf Options")]
242 pub libbpf: LibbpfOpts,
243
244 #[clap(flatten)]
246 topology: Option<TopologyArgs>,
247}
248
249impl Opts {
250 fn can_autopilot(&self) -> bool {
251 self.autopower == false
252 && self.performance == false
253 && self.powersave == false
254 && self.balanced == false
255 && self.no_core_compaction == false
256 }
257
258 fn can_autopower(&self) -> bool {
259 self.autopilot == false
260 && self.performance == false
261 && self.powersave == false
262 && self.balanced == false
263 && self.no_core_compaction == false
264 }
265
266 fn can_performance(&self) -> bool {
267 self.autopilot == false
268 && self.autopower == false
269 && self.powersave == false
270 && self.balanced == false
271 }
272
273 fn can_balanced(&self) -> bool {
274 self.autopilot == false
275 && self.autopower == false
276 && self.performance == false
277 && self.powersave == false
278 && self.no_core_compaction == false
279 }
280
281 fn can_powersave(&self) -> bool {
282 self.autopilot == false
283 && self.autopower == false
284 && self.performance == false
285 && self.balanced == false
286 && self.no_core_compaction == false
287 }
288
289 fn proc(&mut self) -> Option<&mut Self> {
290 if !self.autopilot {
291 self.autopilot = self.can_autopilot();
292 }
293
294 if self.autopilot {
295 if !self.can_autopilot() {
296 info!("Autopilot mode cannot be used with conflicting options.");
297 return None;
298 }
299 info!("Autopilot mode is enabled.");
300 }
301
302 if self.autopower {
303 if !self.can_autopower() {
304 info!("Autopower mode cannot be used with conflicting options.");
305 return None;
306 }
307 info!("Autopower mode is enabled.");
308 }
309
310 if self.performance {
311 if !self.can_performance() {
312 info!("Performance mode cannot be used with conflicting options.");
313 return None;
314 }
315 info!("Performance mode is enabled.");
316 self.no_core_compaction = true;
317 }
318
319 if self.powersave {
320 if !self.can_powersave() {
321 info!("Powersave mode cannot be used with conflicting options.");
322 return None;
323 }
324 info!("Powersave mode is enabled.");
325 self.no_core_compaction = false;
326 }
327
328 if self.balanced {
329 if !self.can_balanced() {
330 info!("Balanced mode cannot be used with conflicting options.");
331 return None;
332 }
333 info!("Balanced mode is enabled.");
334 self.no_core_compaction = false;
335 }
336
337 if !EnergyModel::has_energy_model() || !self.cpu_pref_order.is_empty() {
338 self.no_use_em = true;
339 info!("Energy model won't be used for CPU preference order.");
340 }
341
342 if let Some(pinned_slice) = self.pinned_slice_us {
343 if pinned_slice < self.slice_min_us || pinned_slice > self.slice_max_us {
344 info!(
345 "pinned-slice-us ({}) must be between slice-min-us ({}) and slice-max-us ({})",
346 pinned_slice, self.slice_min_us, self.slice_max_us
347 );
348 return None;
349 }
350 info!(
351 "Pinned task slice mode is enabled ({} us). Pinned tasks will use per-CPU DSQs.",
352 pinned_slice
353 );
354 }
355
356 Some(self)
357 }
358
359 fn preempt_shift_range(s: &str) -> Result<u8, String> {
360 number_range(s, 0, 10)
361 }
362
363 fn mig_delta_pct_range(s: &str) -> Result<u8, String> {
364 number_range(s, 0, 100)
365 }
366}
367
368unsafe impl Plain for msg_task_ctx {}
369
370impl msg_task_ctx {
371 fn from_bytes(buf: &[u8]) -> &msg_task_ctx {
372 plain::from_bytes(buf).expect("The buffer is either too short or not aligned!")
373 }
374}
375
376impl introspec {
377 fn new() -> Self {
378 let intrspc = unsafe { mem::MaybeUninit::<introspec>::zeroed().assume_init() };
379 intrspc
380 }
381}
382
383struct Scheduler<'a> {
384 skel: BpfSkel<'a>,
385 struct_ops: Option<libbpf_rs::Link>,
386 rb_mgr: libbpf_rs::RingBuffer<'static>,
387 intrspc: introspec,
388 intrspc_rx: Receiver<SchedSample>,
389 monitor_tid: Option<ThreadId>,
390 stats_server: StatsServer<StatsReq, StatsRes>,
391 mseq_id: u64,
392}
393
394impl<'a> Scheduler<'a> {
395 fn init(opts: &'a Opts, open_object: &'a mut MaybeUninit<OpenObject>) -> Result<Self> {
396 if *NR_CPU_IDS > LAVD_CPU_ID_MAX as usize {
397 panic!(
398 "Num possible CPU IDs ({}) exceeds maximum of ({})",
399 *NR_CPU_IDS, LAVD_CPU_ID_MAX
400 );
401 }
402
403 try_set_rlimit_infinity();
404
405 let debug_level = if opts.log_level.contains("trace") {
407 2
408 } else if opts.log_level.contains("debug") {
409 1
410 } else {
411 0
412 };
413 let mut skel_builder = BpfSkelBuilder::default();
414 skel_builder.obj_builder.debug(debug_level > 1);
415 init_libbpf_logging(Some(PrintLevel::Debug));
416
417 let open_opts = opts.libbpf.clone().into_bpf_open_opts();
418 let mut skel = scx_ops_open!(skel_builder, open_object, lavd_ops, open_opts)?;
419
420 if !opts.no_futex_boost {
423 if Self::attach_futex_ftraces(&mut skel)? == false {
424 info!("Fail to attach futex ftraces. Try with tracepoints.");
425 if Self::attach_futex_tracepoints(&mut skel)? == false {
426 info!("Fail to attach futex tracepoints.");
427 }
428 }
429 }
430
431 let order = CpuOrder::new(opts.topology.as_ref()).unwrap();
433 Self::init_cpus(&mut skel, &order);
434 Self::init_cpdoms(&mut skel, &order);
435
436 Self::init_globals(&mut skel, &opts, &order, debug_level);
438
439 let mut skel = scx_ops_load!(skel, lavd_ops, uei)?;
441 let task_size = std::mem::size_of::<types::task_ctx>();
442 let arenalib = ArenaLib::init(skel.object_mut(), task_size, *NR_CPU_IDS)?;
443 arenalib.setup()?;
444
445 let struct_ops = Some(scx_ops_attach!(skel, lavd_ops)?);
447 let stats_server = StatsServer::new(stats::server_data(*NR_CPU_IDS as u64)).launch()?;
448
449 let (intrspc_tx, intrspc_rx) = channel::bounded(65536);
451 let rb_map = &mut skel.maps.introspec_msg;
452 let mut builder = libbpf_rs::RingBufferBuilder::new();
453 builder
454 .add(rb_map, move |data| {
455 Scheduler::relay_introspec(data, &intrspc_tx)
456 })
457 .unwrap();
458 let rb_mgr = builder.build().unwrap();
459
460 Ok(Self {
461 skel,
462 struct_ops,
463 rb_mgr,
464 intrspc: introspec::new(),
465 intrspc_rx,
466 monitor_tid: None,
467 stats_server,
468 mseq_id: 0,
469 })
470 }
471
472 fn attach_futex_ftraces(skel: &mut OpenBpfSkel) -> Result<bool> {
473 let ftraces = vec![
474 ("__futex_wait", &skel.progs.fexit___futex_wait),
475 ("futex_wait_multiple", &skel.progs.fexit_futex_wait_multiple),
476 (
477 "futex_wait_requeue_pi",
478 &skel.progs.fexit_futex_wait_requeue_pi,
479 ),
480 ("futex_wake", &skel.progs.fexit_futex_wake),
481 ("futex_wake_op", &skel.progs.fexit_futex_wake_op),
482 ("futex_lock_pi", &skel.progs.fexit_futex_lock_pi),
483 ("futex_unlock_pi", &skel.progs.fexit_futex_unlock_pi),
484 ];
485
486 if compat::tracer_available("function")? == false {
487 info!("Ftrace is not enabled in the kernel.");
488 return Ok(false);
489 }
490
491 compat::cond_kprobes_enable(ftraces)
492 }
493
494 fn attach_futex_tracepoints(skel: &mut OpenBpfSkel) -> Result<bool> {
495 let tracepoints = vec![
496 ("syscalls:sys_enter_futex", &skel.progs.rtp_sys_enter_futex),
497 ("syscalls:sys_exit_futex", &skel.progs.rtp_sys_exit_futex),
498 (
499 "syscalls:sys_exit_futex_wait",
500 &skel.progs.rtp_sys_exit_futex_wait,
501 ),
502 (
503 "syscalls:sys_exit_futex_waitv",
504 &skel.progs.rtp_sys_exit_futex_waitv,
505 ),
506 (
507 "syscalls:sys_exit_futex_wake",
508 &skel.progs.rtp_sys_exit_futex_wake,
509 ),
510 ];
511
512 compat::cond_tracepoints_enable(tracepoints)
513 }
514
515 fn init_cpus(skel: &mut OpenBpfSkel, order: &CpuOrder) {
516 debug!("{:#?}", order);
517
518 for cpu in order.cpuids.iter() {
520 skel.maps.rodata_data.as_mut().unwrap().cpu_capacity[cpu.cpu_adx] = cpu.cpu_cap as u16;
521 skel.maps.rodata_data.as_mut().unwrap().cpu_big[cpu.cpu_adx] = cpu.big_core as u8;
522 skel.maps.rodata_data.as_mut().unwrap().cpu_turbo[cpu.cpu_adx] = cpu.turbo_core as u8;
523 skel.maps.rodata_data.as_mut().unwrap().cpu_sibling[cpu.cpu_adx] =
524 cpu.cpu_sibling as u32;
525 }
526
527 let nr_pco_states: u8 = order.perf_cpu_order.len() as u8;
529 if nr_pco_states > LAVD_PCO_STATE_MAX as u8 {
530 panic!("Generated performance vs. CPU order stats are too complex ({nr_pco_states}) to handle");
531 }
532
533 skel.maps.rodata_data.as_mut().unwrap().nr_pco_states = nr_pco_states;
534 for (i, (_, pco)) in order.perf_cpu_order.iter().enumerate() {
535 Self::init_pco_tuple(skel, i, &pco);
536 info!("{:#}", pco);
537 }
538
539 let (_, last_pco) = order.perf_cpu_order.last_key_value().unwrap();
540 for i in nr_pco_states..LAVD_PCO_STATE_MAX as u8 {
541 Self::init_pco_tuple(skel, i as usize, &last_pco);
542 }
543 }
544
545 fn init_pco_tuple(skel: &mut OpenBpfSkel, i: usize, pco: &PerfCpuOrder) {
546 let cpus_perf = pco.cpus_perf.borrow();
547 let cpus_ovflw = pco.cpus_ovflw.borrow();
548 let pco_nr_primary = cpus_perf.len();
549
550 skel.maps.rodata_data.as_mut().unwrap().pco_bounds[i] = pco.perf_cap as u32;
551 skel.maps.rodata_data.as_mut().unwrap().pco_nr_primary[i] = pco_nr_primary as u16;
552
553 for (j, &cpu_adx) in cpus_perf.iter().enumerate() {
554 skel.maps.rodata_data.as_mut().unwrap().pco_table[i][j] = cpu_adx as u16;
555 }
556
557 for (j, &cpu_adx) in cpus_ovflw.iter().enumerate() {
558 let k = j + pco_nr_primary;
559 skel.maps.rodata_data.as_mut().unwrap().pco_table[i][k] = cpu_adx as u16;
560 }
561 }
562
563 fn init_cpdoms(skel: &mut OpenBpfSkel, order: &CpuOrder) {
564 for (k, v) in order.cpdom_map.iter() {
566 skel.maps.bss_data.as_mut().unwrap().cpdom_ctxs[v.cpdom_id].id = v.cpdom_id as u64;
567 skel.maps.bss_data.as_mut().unwrap().cpdom_ctxs[v.cpdom_id].alt_id =
568 v.cpdom_alt_id.get() as u64;
569 skel.maps.bss_data.as_mut().unwrap().cpdom_ctxs[v.cpdom_id].numa_id = k.numa_adx as u8;
570 skel.maps.bss_data.as_mut().unwrap().cpdom_ctxs[v.cpdom_id].llc_id = k.llc_adx as u8;
571 skel.maps.bss_data.as_mut().unwrap().cpdom_ctxs[v.cpdom_id].is_big = k.is_big as u8;
572 skel.maps.bss_data.as_mut().unwrap().cpdom_ctxs[v.cpdom_id].is_valid = 1;
573 for cpu_id in v.cpu_ids.iter() {
574 let i = cpu_id / 64;
575 let j = cpu_id % 64;
576 skel.maps.bss_data.as_mut().unwrap().cpdom_ctxs[v.cpdom_id].__cpumask[i] |=
577 0x01 << j;
578 }
579
580 if v.neighbor_map.borrow().iter().len() > LAVD_CPDOM_MAX_DIST as usize {
581 panic!("The processor topology is too complex to handle in BPF.");
582 }
583
584 for (k, (_d, neighbors)) in v.neighbor_map.borrow().iter().enumerate() {
585 let nr_neighbors = neighbors.borrow().len() as u8;
586 if nr_neighbors > LAVD_CPDOM_MAX_NR as u8 {
587 panic!("The processor topology is too complex to handle in BPF.");
588 }
589 skel.maps.bss_data.as_mut().unwrap().cpdom_ctxs[v.cpdom_id].nr_neighbors[k] =
590 nr_neighbors;
591 for (i, &id) in neighbors.borrow().iter().enumerate() {
592 let idx = (k * LAVD_CPDOM_MAX_NR as usize) + i;
593 skel.maps.bss_data.as_mut().unwrap().cpdom_ctxs[v.cpdom_id].neighbor_ids[idx] =
594 id as u8;
595 }
596 }
597 }
598 }
599
600 fn init_globals(skel: &mut OpenBpfSkel, opts: &Opts, order: &CpuOrder, debug_level: u8) {
601 let bss_data = skel.maps.bss_data.as_mut().unwrap();
602 bss_data.no_preemption = opts.no_preemption;
603 bss_data.no_core_compaction = opts.no_core_compaction;
604 bss_data.no_freq_scaling = opts.no_freq_scaling;
605 bss_data.is_powersave_mode = opts.powersave;
606 let rodata = skel.maps.rodata_data.as_mut().unwrap();
607 rodata.nr_llcs = order.nr_llcs as u64;
608 rodata.nr_cpu_ids = *NR_CPU_IDS as u32;
609 rodata.is_smt_active = order.smt_enabled;
610 rodata.is_autopilot_on = opts.autopilot;
611 rodata.verbose = debug_level;
612 rodata.slice_max_ns = opts.slice_max_us * 1000;
613 rodata.slice_min_ns = opts.slice_min_us * 1000;
614 rodata.pinned_slice_ns = opts.pinned_slice_us.map(|v| v * 1000).unwrap_or(0);
615 rodata.preempt_shift = opts.preempt_shift;
616 rodata.mig_delta_pct = opts.mig_delta_pct;
617 rodata.no_use_em = opts.no_use_em as u8;
618 rodata.no_wake_sync = opts.no_wake_sync;
619 rodata.no_slice_boost = opts.no_slice_boost;
620 rodata.per_cpu_dsq = opts.per_cpu_dsq;
621 rodata.enable_cpu_bw = opts.enable_cpu_bw;
622
623 if !ksym_exists("scx_group_set_bandwidth").unwrap() {
624 skel.struct_ops.lavd_ops_mut().cgroup_set_bandwidth = std::ptr::null_mut();
625 warn!("Kernel does not support ops.cgroup_set_bandwidth(), so disable it.");
626 }
627
628 skel.struct_ops.lavd_ops_mut().flags = *compat::SCX_OPS_ENQ_EXITING
629 | *compat::SCX_OPS_ENQ_LAST
630 | *compat::SCX_OPS_ENQ_MIGRATION_DISABLED
631 | *compat::SCX_OPS_KEEP_BUILTIN_IDLE;
632 }
633
634 fn get_msg_seq_id() -> u64 {
635 static mut MSEQ: u64 = 0;
636 unsafe {
637 MSEQ += 1;
638 MSEQ
639 }
640 }
641
642 fn relay_introspec(data: &[u8], intrspc_tx: &Sender<SchedSample>) -> i32 {
643 let mt = msg_task_ctx::from_bytes(data);
644 let tx = mt.taskc_x;
645 let tc = mt.taskc;
646
647 if mt.hdr.kind != LAVD_MSG_TASKC {
649 return 0;
650 }
651
652 let mseq = Scheduler::get_msg_seq_id();
653
654 let c_tx_cm: *const c_char = (&tx.comm as *const [c_char; 17]) as *const c_char;
655 let c_tx_cm_str: &CStr = unsafe { CStr::from_ptr(c_tx_cm) };
656 let tx_comm: &str = c_tx_cm_str.to_str().unwrap();
657
658 let c_waker_cm: *const c_char = (&tc.waker_comm as *const [c_char; 17]) as *const c_char;
659 let c_waker_cm_str: &CStr = unsafe { CStr::from_ptr(c_waker_cm) };
660 let waker_comm: &str = c_waker_cm_str.to_str().unwrap();
661
662 let c_tx_st: *const c_char = (&tx.stat as *const [c_char; 5]) as *const c_char;
663 let c_tx_st_str: &CStr = unsafe { CStr::from_ptr(c_tx_st) };
664 let tx_stat: &str = c_tx_st_str.to_str().unwrap();
665
666 match intrspc_tx.try_send(SchedSample {
667 mseq,
668 pid: tc.pid,
669 comm: tx_comm.into(),
670 stat: tx_stat.into(),
671 cpu_id: tc.cpu_id,
672 prev_cpu_id: tc.prev_cpu_id,
673 suggested_cpu_id: tc.suggested_cpu_id,
674 waker_pid: tc.waker_pid,
675 waker_comm: waker_comm.into(),
676 slice: tc.slice,
677 lat_cri: tc.lat_cri,
678 avg_lat_cri: tx.avg_lat_cri,
679 static_prio: tx.static_prio,
680 rerunnable_interval: tx.rerunnable_interval,
681 resched_interval: tc.resched_interval,
682 run_freq: tc.run_freq,
683 avg_runtime: tc.avg_runtime,
684 wait_freq: tc.wait_freq,
685 wake_freq: tc.wake_freq,
686 perf_cri: tc.perf_cri,
687 thr_perf_cri: tx.thr_perf_cri,
688 cpuperf_cur: tx.cpuperf_cur,
689 cpu_util: tx.cpu_util,
690 cpu_sutil: tx.cpu_sutil,
691 nr_active: tx.nr_active,
692 dsq_id: tx.dsq_id,
693 dsq_consume_lat: tx.dsq_consume_lat,
694 slice_used: tc.last_slice_used,
695 }) {
696 Ok(()) | Err(TrySendError::Full(_)) => 0,
697 Err(e) => panic!("failed to send on intrspc_tx ({})", e),
698 }
699 }
700
701 fn prep_introspec(&mut self) {
702 if !self.skel.maps.bss_data.as_ref().unwrap().is_monitored {
703 self.skel.maps.bss_data.as_mut().unwrap().is_monitored = true;
704 }
705 self.skel.maps.bss_data.as_mut().unwrap().intrspc.cmd = self.intrspc.cmd;
706 self.skel.maps.bss_data.as_mut().unwrap().intrspc.arg = self.intrspc.arg;
707 }
708
709 fn cleanup_introspec(&mut self) {
710 self.skel.maps.bss_data.as_mut().unwrap().intrspc.cmd = LAVD_CMD_NOP;
711 }
712
713 fn get_pc(x: u64, y: u64) -> f64 {
714 return 100. * x as f64 / y as f64;
715 }
716
717 fn get_power_mode(power_mode: i32) -> &'static str {
718 match power_mode as u32 {
719 LAVD_PM_PERFORMANCE => "performance",
720 LAVD_PM_BALANCED => "balanced",
721 LAVD_PM_POWERSAVE => "powersave",
722 _ => "unknown",
723 }
724 }
725
726 fn stats_req_to_res(&mut self, req: &StatsReq) -> Result<StatsRes> {
727 Ok(match req {
728 StatsReq::NewSampler(tid) => {
729 self.rb_mgr.consume().unwrap();
730 self.monitor_tid = Some(*tid);
731 StatsRes::Ack
732 }
733 StatsReq::SysStatsReq { tid } => {
734 if Some(*tid) != self.monitor_tid {
735 return Ok(StatsRes::Bye);
736 }
737 self.mseq_id += 1;
738
739 let bss_data = self.skel.maps.bss_data.as_ref().unwrap();
740 let st = bss_data.sys_stat;
741
742 let mseq = self.mseq_id;
743 let nr_queued_task = st.nr_queued_task;
744 let nr_active = st.nr_active;
745 let nr_sched = st.nr_sched;
746 let nr_preempt = st.nr_preempt;
747 let pc_pc = Self::get_pc(st.nr_perf_cri, nr_sched);
748 let pc_lc = Self::get_pc(st.nr_lat_cri, nr_sched);
749 let pc_x_migration = Self::get_pc(st.nr_x_migration, nr_sched);
750 let nr_stealee = st.nr_stealee;
751 let nr_big = st.nr_big;
752 let pc_big = Self::get_pc(nr_big, nr_sched);
753 let pc_pc_on_big = Self::get_pc(st.nr_pc_on_big, nr_big);
754 let pc_lc_on_big = Self::get_pc(st.nr_lc_on_big, nr_big);
755 let power_mode = Self::get_power_mode(bss_data.power_mode);
756 let total_time = bss_data.performance_mode_ns
757 + bss_data.balanced_mode_ns
758 + bss_data.powersave_mode_ns;
759 let pc_performance = Self::get_pc(bss_data.performance_mode_ns, total_time);
760 let pc_balanced = Self::get_pc(bss_data.balanced_mode_ns, total_time);
761 let pc_powersave = Self::get_pc(bss_data.powersave_mode_ns, total_time);
762
763 StatsRes::SysStats(SysStats {
764 mseq,
765 nr_queued_task,
766 nr_active,
767 nr_sched,
768 nr_preempt,
769 pc_pc,
770 pc_lc,
771 pc_x_migration,
772 nr_stealee,
773 pc_big,
774 pc_pc_on_big,
775 pc_lc_on_big,
776 power_mode: power_mode.to_string(),
777 pc_performance,
778 pc_balanced,
779 pc_powersave,
780 })
781 }
782 StatsReq::SchedSamplesNr {
783 tid,
784 nr_samples,
785 interval_ms,
786 } => {
787 if Some(*tid) != self.monitor_tid {
788 return Ok(StatsRes::Bye);
789 }
790
791 self.intrspc.cmd = LAVD_CMD_SCHED_N;
792 self.intrspc.arg = *nr_samples;
793 self.prep_introspec();
794 std::thread::sleep(Duration::from_millis(*interval_ms));
795 self.rb_mgr.poll(Duration::from_millis(100)).unwrap();
796
797 let mut samples = vec![];
798 while let Ok(ts) = self.intrspc_rx.try_recv() {
799 samples.push(ts);
800 }
801
802 self.cleanup_introspec();
803
804 StatsRes::SchedSamples(SchedSamples { samples })
805 }
806 })
807 }
808
809 fn stop_monitoring(&mut self) {
810 if self.skel.maps.bss_data.as_ref().unwrap().is_monitored {
811 self.skel.maps.bss_data.as_mut().unwrap().is_monitored = false;
812 }
813 }
814
815 pub fn exited(&mut self) -> bool {
816 uei_exited!(&self.skel, uei)
817 }
818
819 fn set_power_profile(&mut self, mode: u32) -> Result<(), u32> {
820 let prog = &mut self.skel.progs.set_power_profile;
821 let mut args = power_arg {
822 power_mode: mode as c_int,
823 };
824 let input = ProgramInput {
825 context_in: Some(unsafe {
826 std::slice::from_raw_parts_mut(
827 &mut args as *mut _ as *mut u8,
828 std::mem::size_of_val(&args),
829 )
830 }),
831 ..Default::default()
832 };
833 let out = prog.test_run(input).unwrap();
834 if out.return_value != 0 {
835 return Err(out.return_value);
836 }
837
838 Ok(())
839 }
840
841 fn update_power_profile(&mut self, prev_profile: PowerProfile) -> (bool, PowerProfile) {
842 let profile = fetch_power_profile(false);
843 if profile == prev_profile {
844 return (true, profile);
846 }
847
848 let _ = match profile {
849 PowerProfile::Performance => self.set_power_profile(LAVD_PM_PERFORMANCE),
850 PowerProfile::Balanced { .. } => self.set_power_profile(LAVD_PM_BALANCED),
851 PowerProfile::Powersave => self.set_power_profile(LAVD_PM_POWERSAVE),
852 PowerProfile::Unknown => {
853 return (false, profile);
856 }
857 };
858
859 info!("Set the scheduler's power profile to {profile} mode.");
860 (true, profile)
861 }
862
863 fn run(&mut self, opts: &Opts, shutdown: Arc<AtomicBool>) -> Result<UserExitInfo> {
864 let (res_ch, req_ch) = self.stats_server.channels();
865 let mut autopower = opts.autopower;
866 let mut profile = PowerProfile::Unknown;
867
868 if opts.performance {
869 let _ = self.set_power_profile(LAVD_PM_PERFORMANCE);
870 } else if opts.powersave {
871 let _ = self.set_power_profile(LAVD_PM_POWERSAVE);
872 } else {
873 let _ = self.set_power_profile(LAVD_PM_BALANCED);
874 }
875
876 while !shutdown.load(Ordering::Relaxed) && !self.exited() {
877 if autopower {
878 (autopower, profile) = self.update_power_profile(profile);
879 }
880
881 match req_ch.recv_timeout(Duration::from_secs(1)) {
882 Ok(req) => {
883 let res = self.stats_req_to_res(&req)?;
884 res_ch.send(res)?;
885 }
886 Err(RecvTimeoutError::Timeout) => {
887 self.stop_monitoring();
888 }
889 Err(e) => {
890 self.stop_monitoring();
891 Err(e)?
892 }
893 }
894 self.cleanup_introspec();
895 }
896 self.rb_mgr.consume().unwrap();
897
898 let _ = self.struct_ops.take();
899 uei_report!(&self.skel, uei)
900 }
901}
902
903impl Drop for Scheduler<'_> {
904 fn drop(&mut self) {
905 info!("Unregister {SCHEDULER_NAME} scheduler");
906
907 if let Some(struct_ops) = self.struct_ops.take() {
908 drop(struct_ops);
909 }
910 }
911}
912
913fn init_log(opts: &Opts) {
914 let env_filter = EnvFilter::try_from_default_env()
915 .or_else(|_| match EnvFilter::try_new(&opts.log_level) {
916 Ok(filter) => Ok(filter),
917 Err(e) => {
918 eprintln!(
919 "invalid log envvar: {}, using info, err is: {}",
920 opts.log_level, e
921 );
922 EnvFilter::try_new("info")
923 }
924 })
925 .unwrap_or_else(|_| EnvFilter::new("info"));
926
927 match tracing_subscriber::fmt()
928 .with_env_filter(env_filter)
929 .with_target(true)
930 .with_thread_ids(true)
931 .with_file(true)
932 .with_line_number(true)
933 .try_init()
934 {
935 Ok(()) => {}
936 Err(e) => eprintln!("failed to init logger: {}", e),
937 }
938}
939
940#[clap_main::clap_main]
941fn main(mut opts: Opts) -> Result<()> {
942 if opts.version {
943 println!(
944 "scx_lavd {}",
945 build_id::full_version(env!("CARGO_PKG_VERSION"))
946 );
947 return Ok(());
948 }
949
950 if opts.help_stats {
951 let sys_stats_meta_name = SysStats::meta().name;
952 let sched_sample_meta_name = SchedSample::meta().name;
953 let stats_meta_names: &[&str] = &[
954 sys_stats_meta_name.as_str(),
955 sched_sample_meta_name.as_str(),
956 ];
957 stats::server_data(0).describe_meta(&mut std::io::stdout(), Some(&stats_meta_names))?;
958 return Ok(());
959 }
960
961 init_log(&opts);
962
963 if opts.verbose > 0 {
964 warn!("Setting verbose via -v is depricated and will be an error in future releases.");
965 }
966
967 if let Some(run_id) = opts.run_id {
968 info!("scx_lavd run_id: {}", run_id);
969 }
970
971 if opts.monitor.is_none() && opts.monitor_sched_samples.is_none() {
972 opts.proc().unwrap();
973 info!("{:#?}", opts);
974 }
975
976 let shutdown = Arc::new(AtomicBool::new(false));
977 let shutdown_clone = shutdown.clone();
978 ctrlc::set_handler(move || {
979 shutdown_clone.store(true, Ordering::Relaxed);
980 })
981 .context("Error setting Ctrl-C handler")?;
982
983 if let Some(nr_samples) = opts.monitor_sched_samples {
984 let shutdown_copy = shutdown.clone();
985 let jh = std::thread::spawn(move || {
986 stats::monitor_sched_samples(nr_samples, shutdown_copy).unwrap()
987 });
988 let _ = jh.join();
989 return Ok(());
990 }
991
992 if let Some(intv) = opts.monitor.or(opts.stats) {
993 let shutdown_copy = shutdown.clone();
994 let jh = std::thread::spawn(move || {
995 stats::monitor(Duration::from_secs_f64(intv), shutdown_copy).unwrap()
996 });
997 if opts.monitor.is_some() {
998 let _ = jh.join();
999 return Ok(());
1000 }
1001 }
1002
1003 let mut open_object = MaybeUninit::uninit();
1004 loop {
1005 let mut sched = Scheduler::init(&opts, &mut open_object)?;
1006 info!(
1007 "scx_lavd scheduler is initialized (build ID: {})",
1008 build_id::full_version(env!("CARGO_PKG_VERSION"))
1009 );
1010 info!("scx_lavd scheduler starts running.");
1011 if !sched.run(&opts, shutdown.clone())?.should_restart() {
1012 break;
1013 }
1014 }
1015
1016 Ok(())
1017}