1mod bpf_skel;
10pub use bpf_skel::*;
11pub mod bpf_intf;
12pub use bpf_intf::*;
13
14mod cpu_order;
15use scx_utils::init_libbpf_logging;
16mod stats;
17use std::ffi::c_int;
18use std::ffi::CStr;
19use std::mem;
20use std::mem::MaybeUninit;
21use std::str;
22use std::sync::atomic::AtomicBool;
23use std::sync::atomic::Ordering;
24use std::sync::Arc;
25use std::thread::ThreadId;
26use std::time::Duration;
27
28use anyhow::Context;
29use anyhow::Result;
30use clap::Parser;
31use clap_num::number_range;
32use cpu_order::CpuOrder;
33use cpu_order::PerfCpuOrder;
34use crossbeam::channel;
35use crossbeam::channel::Receiver;
36use crossbeam::channel::RecvTimeoutError;
37use crossbeam::channel::Sender;
38use crossbeam::channel::TrySendError;
39use libbpf_rs::OpenObject;
40use libbpf_rs::PrintLevel;
41use libbpf_rs::ProgramInput;
42use libc::c_char;
43use log::debug;
44use log::info;
45use plain::Plain;
46use scx_stats::prelude::*;
47use scx_utils::autopower::{fetch_power_profile, PowerProfile};
48use scx_utils::build_id;
49use scx_utils::compat;
50use scx_utils::libbpf_clap_opts::LibbpfOpts;
51use scx_utils::scx_ops_attach;
52use scx_utils::scx_ops_load;
53use scx_utils::scx_ops_open;
54use scx_utils::try_set_rlimit_infinity;
55use scx_utils::uei_exited;
56use scx_utils::uei_report;
57use scx_utils::EnergyModel;
58use scx_utils::TopologyArgs;
59use scx_utils::UserExitInfo;
60use scx_utils::NR_CPU_IDS;
61use stats::SchedSample;
62use stats::SchedSamples;
63use stats::StatsReq;
64use stats::StatsRes;
65use stats::SysStats;
66
67const SCHEDULER_NAME: &str = "scx_lavd";
68#[derive(Debug, Parser)]
74struct Opts {
75 #[clap(long = "autopilot", action = clap::ArgAction::SetTrue)]
82 autopilot: bool,
83
84 #[clap(long = "autopower", action = clap::ArgAction::SetTrue)]
91 autopower: bool,
92
93 #[clap(long = "performance", action = clap::ArgAction::SetTrue)]
98 performance: bool,
99
100 #[clap(long = "powersave", action = clap::ArgAction::SetTrue)]
105 powersave: bool,
106
107 #[clap(long = "balanced", action = clap::ArgAction::SetTrue)]
112 balanced: bool,
113
114 #[clap(long = "slice-max-us", default_value = "5000")]
116 slice_max_us: u64,
117
118 #[clap(long = "slice-min-us", default_value = "500")]
120 slice_min_us: u64,
121
122 #[clap(long = "mig-delta-pct", default_value = "0", value_parser=Opts::mig_delta_pct_range)]
130 mig_delta_pct: u8,
131
132 #[clap(long = "pinned-slice-us")]
138 pinned_slice_us: Option<u64>,
139
140 #[clap(long = "preempt-shift", default_value = "6", value_parser=Opts::preempt_shift_range)]
145 preempt_shift: u8,
146
147 #[clap(long = "cpu-pref-order", default_value = "")]
153 cpu_pref_order: String,
154
155 #[clap(long = "no-use-em", action = clap::ArgAction::SetTrue)]
157 no_use_em: bool,
158
159 #[clap(long = "no-futex-boost", action = clap::ArgAction::SetTrue)]
161 no_futex_boost: bool,
162
163 #[clap(long = "no-preemption", action = clap::ArgAction::SetTrue)]
165 no_preemption: bool,
166
167 #[clap(long = "no-wake-sync", action = clap::ArgAction::SetTrue)]
169 no_wake_sync: bool,
170
171 #[clap(long = "no-slice-boost", action = clap::ArgAction::SetTrue)]
173 no_slice_boost: bool,
174
175 #[clap(long = "per-cpu-dsq", action = clap::ArgAction::SetTrue)]
181 per_cpu_dsq: bool,
182
183 #[clap(long = "no-core-compaction", action = clap::ArgAction::SetTrue)]
193 no_core_compaction: bool,
194
195 #[clap(long = "no-freq-scaling", action = clap::ArgAction::SetTrue)]
197 no_freq_scaling: bool,
198
199 #[clap(long)]
201 stats: Option<f64>,
202
203 #[clap(long)]
205 monitor: Option<f64>,
206
207 #[clap(long)]
210 monitor_sched_samples: Option<u64>,
211
212 #[clap(short = 'v', long, action = clap::ArgAction::Count)]
215 verbose: u8,
216
217 #[clap(short = 'V', long, action = clap::ArgAction::SetTrue)]
219 version: bool,
220
221 #[clap(long)]
223 help_stats: bool,
224
225 #[clap(flatten, next_help_heading = "Libbpf Options")]
226 pub libbpf: LibbpfOpts,
227
228 #[clap(flatten)]
230 topology: Option<TopologyArgs>,
231}
232
233impl Opts {
234 fn can_autopilot(&self) -> bool {
235 self.autopower == false
236 && self.performance == false
237 && self.powersave == false
238 && self.balanced == false
239 && self.no_core_compaction == false
240 }
241
242 fn can_autopower(&self) -> bool {
243 self.autopilot == false
244 && self.performance == false
245 && self.powersave == false
246 && self.balanced == false
247 && self.no_core_compaction == false
248 }
249
250 fn can_performance(&self) -> bool {
251 self.autopilot == false
252 && self.autopower == false
253 && self.powersave == false
254 && self.balanced == false
255 }
256
257 fn can_balanced(&self) -> bool {
258 self.autopilot == false
259 && self.autopower == false
260 && self.performance == false
261 && self.powersave == false
262 && self.no_core_compaction == false
263 }
264
265 fn can_powersave(&self) -> bool {
266 self.autopilot == false
267 && self.autopower == false
268 && self.performance == false
269 && self.balanced == false
270 && self.no_core_compaction == false
271 }
272
273 fn proc(&mut self) -> Option<&mut Self> {
274 if !self.autopilot {
275 self.autopilot = self.can_autopilot();
276 }
277
278 if self.autopilot {
279 if !self.can_autopilot() {
280 info!("Autopilot mode cannot be used with conflicting options.");
281 return None;
282 }
283 info!("Autopilot mode is enabled.");
284 }
285
286 if self.autopower {
287 if !self.can_autopower() {
288 info!("Autopower mode cannot be used with conflicting options.");
289 return None;
290 }
291 info!("Autopower mode is enabled.");
292 }
293
294 if self.performance {
295 if !self.can_performance() {
296 info!("Performance mode cannot be used with conflicting options.");
297 return None;
298 }
299 info!("Performance mode is enabled.");
300 self.no_core_compaction = true;
301 }
302
303 if self.powersave {
304 if !self.can_powersave() {
305 info!("Powersave mode cannot be used with conflicting options.");
306 return None;
307 }
308 info!("Powersave mode is enabled.");
309 self.no_core_compaction = false;
310 }
311
312 if self.balanced {
313 if !self.can_balanced() {
314 info!("Balanced mode cannot be used with conflicting options.");
315 return None;
316 }
317 info!("Balanced mode is enabled.");
318 self.no_core_compaction = false;
319 }
320
321 if !EnergyModel::has_energy_model() || !self.cpu_pref_order.is_empty() {
322 self.no_use_em = true;
323 info!("Energy model won't be used for CPU preference order.");
324 }
325
326 if let Some(pinned_slice) = self.pinned_slice_us {
327 if pinned_slice < self.slice_min_us || pinned_slice > self.slice_max_us {
328 info!(
329 "pinned-slice-us ({}) must be between slice-min-us ({}) and slice-max-us ({})",
330 pinned_slice, self.slice_min_us, self.slice_max_us
331 );
332 return None;
333 }
334 info!(
335 "Pinned task slice mode is enabled ({} us). Pinned tasks will use per-CPU DSQs.",
336 pinned_slice
337 );
338 }
339
340 Some(self)
341 }
342
343 fn preempt_shift_range(s: &str) -> Result<u8, String> {
344 number_range(s, 0, 10)
345 }
346
347 fn mig_delta_pct_range(s: &str) -> Result<u8, String> {
348 number_range(s, 0, 100)
349 }
350}
351
352unsafe impl Plain for msg_task_ctx {}
353
354impl msg_task_ctx {
355 fn from_bytes(buf: &[u8]) -> &msg_task_ctx {
356 plain::from_bytes(buf).expect("The buffer is either too short or not aligned!")
357 }
358}
359
360impl introspec {
361 fn new() -> Self {
362 let intrspc = unsafe { mem::MaybeUninit::<introspec>::zeroed().assume_init() };
363 intrspc
364 }
365}
366
367struct Scheduler<'a> {
368 skel: BpfSkel<'a>,
369 struct_ops: Option<libbpf_rs::Link>,
370 rb_mgr: libbpf_rs::RingBuffer<'static>,
371 intrspc: introspec,
372 intrspc_rx: Receiver<SchedSample>,
373 monitor_tid: Option<ThreadId>,
374 stats_server: StatsServer<StatsReq, StatsRes>,
375 mseq_id: u64,
376}
377
378impl<'a> Scheduler<'a> {
379 fn init(opts: &'a Opts, open_object: &'a mut MaybeUninit<OpenObject>) -> Result<Self> {
380 if *NR_CPU_IDS > LAVD_CPU_ID_MAX as usize {
381 panic!(
382 "Num possible CPU IDs ({}) exceeds maximum of ({})",
383 *NR_CPU_IDS, LAVD_CPU_ID_MAX
384 );
385 }
386
387 try_set_rlimit_infinity();
388
389 let mut skel_builder = BpfSkelBuilder::default();
391 skel_builder.obj_builder.debug(opts.verbose > 0);
392 init_libbpf_logging(Some(PrintLevel::Debug));
393
394 let open_opts = opts.libbpf.clone().into_bpf_open_opts();
395 let mut skel = scx_ops_open!(skel_builder, open_object, lavd_ops, open_opts)?;
396
397 if !opts.no_futex_boost {
400 if Self::attach_futex_ftraces(&mut skel)? == false {
401 info!("Fail to attach futex ftraces. Try with tracepoints.");
402 if Self::attach_futex_tracepoints(&mut skel)? == false {
403 info!("Fail to attach futex tracepoints.");
404 }
405 }
406 }
407
408 let order = CpuOrder::new(opts.topology.as_ref()).unwrap();
410 Self::init_cpus(&mut skel, &order);
411 Self::init_cpdoms(&mut skel, &order);
412
413 Self::init_globals(&mut skel, &opts, &order);
415
416 let mut skel = scx_ops_load!(skel, lavd_ops, uei)?;
418 let struct_ops = Some(scx_ops_attach!(skel, lavd_ops)?);
419 let stats_server = StatsServer::new(stats::server_data(*NR_CPU_IDS as u64)).launch()?;
420
421 let (intrspc_tx, intrspc_rx) = channel::bounded(65536);
423 let rb_map = &mut skel.maps.introspec_msg;
424 let mut builder = libbpf_rs::RingBufferBuilder::new();
425 builder
426 .add(rb_map, move |data| {
427 Scheduler::relay_introspec(data, &intrspc_tx)
428 })
429 .unwrap();
430 let rb_mgr = builder.build().unwrap();
431
432 Ok(Self {
433 skel,
434 struct_ops,
435 rb_mgr,
436 intrspc: introspec::new(),
437 intrspc_rx,
438 monitor_tid: None,
439 stats_server,
440 mseq_id: 0,
441 })
442 }
443
444 fn attach_futex_ftraces(skel: &mut OpenBpfSkel) -> Result<bool> {
445 let ftraces = vec![
446 ("__futex_wait", &skel.progs.fexit___futex_wait),
447 ("futex_wait_multiple", &skel.progs.fexit_futex_wait_multiple),
448 (
449 "futex_wait_requeue_pi",
450 &skel.progs.fexit_futex_wait_requeue_pi,
451 ),
452 ("futex_wake", &skel.progs.fexit_futex_wake),
453 ("futex_wake_op", &skel.progs.fexit_futex_wake_op),
454 ("futex_lock_pi", &skel.progs.fexit_futex_lock_pi),
455 ("futex_unlock_pi", &skel.progs.fexit_futex_unlock_pi),
456 ];
457
458 if compat::tracer_available("function")? == false {
459 info!("Ftrace is not enabled in the kernel.");
460 return Ok(false);
461 }
462
463 compat::cond_kprobes_enable(ftraces)
464 }
465
466 fn attach_futex_tracepoints(skel: &mut OpenBpfSkel) -> Result<bool> {
467 let tracepoints = vec![
468 ("syscalls:sys_enter_futex", &skel.progs.rtp_sys_enter_futex),
469 ("syscalls:sys_exit_futex", &skel.progs.rtp_sys_exit_futex),
470 (
471 "syscalls:sys_exit_futex_wait",
472 &skel.progs.rtp_sys_exit_futex_wait,
473 ),
474 (
475 "syscalls:sys_exit_futex_waitv",
476 &skel.progs.rtp_sys_exit_futex_waitv,
477 ),
478 (
479 "syscalls:sys_exit_futex_wake",
480 &skel.progs.rtp_sys_exit_futex_wake,
481 ),
482 ];
483
484 compat::cond_tracepoints_enable(tracepoints)
485 }
486
487 fn init_cpus(skel: &mut OpenBpfSkel, order: &CpuOrder) {
488 debug!("{:#?}", order);
489
490 for cpu in order.cpuids.iter() {
492 skel.maps.rodata_data.as_mut().unwrap().cpu_capacity[cpu.cpu_adx] = cpu.cpu_cap as u16;
493 skel.maps.rodata_data.as_mut().unwrap().cpu_big[cpu.cpu_adx] = cpu.big_core as u8;
494 skel.maps.rodata_data.as_mut().unwrap().cpu_turbo[cpu.cpu_adx] = cpu.turbo_core as u8;
495 skel.maps.rodata_data.as_mut().unwrap().cpu_sibling[cpu.cpu_adx] =
496 cpu.cpu_sibling as u32;
497 }
498
499 let nr_pco_states: u8 = order.perf_cpu_order.len() as u8;
501 if nr_pco_states > LAVD_PCO_STATE_MAX as u8 {
502 panic!("Generated performance vs. CPU order stats are too complex ({nr_pco_states}) to handle");
503 }
504
505 skel.maps.rodata_data.as_mut().unwrap().nr_pco_states = nr_pco_states;
506 for (i, (_, pco)) in order.perf_cpu_order.iter().enumerate() {
507 Self::init_pco_tuple(skel, i, &pco);
508 info!("{:#}", pco);
509 }
510
511 let (_, last_pco) = order.perf_cpu_order.last_key_value().unwrap();
512 for i in nr_pco_states..LAVD_PCO_STATE_MAX as u8 {
513 Self::init_pco_tuple(skel, i as usize, &last_pco);
514 }
515 }
516
517 fn init_pco_tuple(skel: &mut OpenBpfSkel, i: usize, pco: &PerfCpuOrder) {
518 let cpus_perf = pco.cpus_perf.borrow();
519 let cpus_ovflw = pco.cpus_ovflw.borrow();
520 let pco_nr_primary = cpus_perf.len();
521
522 skel.maps.rodata_data.as_mut().unwrap().pco_bounds[i] = pco.perf_cap as u32;
523 skel.maps.rodata_data.as_mut().unwrap().pco_nr_primary[i] = pco_nr_primary as u16;
524
525 for (j, &cpu_adx) in cpus_perf.iter().enumerate() {
526 skel.maps.rodata_data.as_mut().unwrap().pco_table[i][j] = cpu_adx as u16;
527 }
528
529 for (j, &cpu_adx) in cpus_ovflw.iter().enumerate() {
530 let k = j + pco_nr_primary;
531 skel.maps.rodata_data.as_mut().unwrap().pco_table[i][k] = cpu_adx as u16;
532 }
533 }
534
535 fn init_cpdoms(skel: &mut OpenBpfSkel, order: &CpuOrder) {
536 for (k, v) in order.cpdom_map.iter() {
538 skel.maps.bss_data.as_mut().unwrap().cpdom_ctxs[v.cpdom_id].id = v.cpdom_id as u64;
539 skel.maps.bss_data.as_mut().unwrap().cpdom_ctxs[v.cpdom_id].alt_id =
540 v.cpdom_alt_id.get() as u64;
541 skel.maps.bss_data.as_mut().unwrap().cpdom_ctxs[v.cpdom_id].numa_id = k.numa_adx as u8;
542 skel.maps.bss_data.as_mut().unwrap().cpdom_ctxs[v.cpdom_id].llc_id = k.llc_adx as u8;
543 skel.maps.bss_data.as_mut().unwrap().cpdom_ctxs[v.cpdom_id].is_big = k.is_big as u8;
544 skel.maps.bss_data.as_mut().unwrap().cpdom_ctxs[v.cpdom_id].is_valid = 1;
545 for cpu_id in v.cpu_ids.iter() {
546 let i = cpu_id / 64;
547 let j = cpu_id % 64;
548 skel.maps.bss_data.as_mut().unwrap().cpdom_ctxs[v.cpdom_id].__cpumask[i] |=
549 0x01 << j;
550 }
551
552 if v.neighbor_map.borrow().iter().len() > LAVD_CPDOM_MAX_DIST as usize {
553 panic!("The processor topology is too complex to handle in BPF.");
554 }
555
556 for (k, (_d, neighbors)) in v.neighbor_map.borrow().iter().enumerate() {
557 let nr_neighbors = neighbors.borrow().len() as u8;
558 if nr_neighbors > LAVD_CPDOM_MAX_NR as u8 {
559 panic!("The processor topology is too complex to handle in BPF.");
560 }
561 skel.maps.bss_data.as_mut().unwrap().cpdom_ctxs[v.cpdom_id].nr_neighbors[k] =
562 nr_neighbors;
563 for n in neighbors.borrow().iter() {
564 skel.maps.bss_data.as_mut().unwrap().cpdom_ctxs[v.cpdom_id].neighbor_bits[k] |=
565 0x1 << n;
566 }
567 }
568 }
569 }
570
571 fn init_globals(skel: &mut OpenBpfSkel, opts: &Opts, order: &CpuOrder) {
572 let bss_data = skel.maps.bss_data.as_mut().unwrap();
573 bss_data.no_preemption = opts.no_preemption;
574 bss_data.no_core_compaction = opts.no_core_compaction;
575 bss_data.no_freq_scaling = opts.no_freq_scaling;
576 bss_data.is_powersave_mode = opts.powersave;
577 let rodata = skel.maps.rodata_data.as_mut().unwrap();
578 rodata.nr_llcs = order.nr_llcs as u64;
579 rodata.__nr_cpu_ids = *NR_CPU_IDS as u64;
580 rodata.is_smt_active = order.smt_enabled;
581 rodata.is_autopilot_on = opts.autopilot;
582 rodata.verbose = opts.verbose;
583 rodata.slice_max_ns = opts.slice_max_us * 1000;
584 rodata.slice_min_ns = opts.slice_min_us * 1000;
585 rodata.pinned_slice_ns = opts.pinned_slice_us.map(|v| v * 1000).unwrap_or(0);
586 rodata.preempt_shift = opts.preempt_shift;
587 rodata.mig_delta_pct = opts.mig_delta_pct;
588 rodata.no_use_em = opts.no_use_em as u8;
589 rodata.no_wake_sync = opts.no_wake_sync;
590 rodata.no_slice_boost = opts.no_slice_boost;
591 rodata.per_cpu_dsq = opts.per_cpu_dsq;
592
593 skel.struct_ops.lavd_ops_mut().flags = *compat::SCX_OPS_ENQ_EXITING
594 | *compat::SCX_OPS_ENQ_LAST
595 | *compat::SCX_OPS_ENQ_MIGRATION_DISABLED
596 | *compat::SCX_OPS_KEEP_BUILTIN_IDLE;
597 }
598
599 fn get_msg_seq_id() -> u64 {
600 static mut MSEQ: u64 = 0;
601 unsafe {
602 MSEQ += 1;
603 MSEQ
604 }
605 }
606
607 fn relay_introspec(data: &[u8], intrspc_tx: &Sender<SchedSample>) -> i32 {
608 let mt = msg_task_ctx::from_bytes(data);
609 let tx = mt.taskc_x;
610 let tc = mt.taskc;
611
612 if mt.hdr.kind != LAVD_MSG_TASKC {
614 return 0;
615 }
616
617 let mseq = Scheduler::get_msg_seq_id();
618
619 let c_tx_cm: *const c_char = (&tx.comm as *const [c_char; 17]) as *const c_char;
620 let c_tx_cm_str: &CStr = unsafe { CStr::from_ptr(c_tx_cm) };
621 let tx_comm: &str = c_tx_cm_str.to_str().unwrap();
622
623 let c_waker_cm: *const c_char = (&tc.waker_comm as *const [c_char; 17]) as *const c_char;
624 let c_waker_cm_str: &CStr = unsafe { CStr::from_ptr(c_waker_cm) };
625 let waker_comm: &str = c_waker_cm_str.to_str().unwrap();
626
627 let c_tx_st: *const c_char = (&tx.stat as *const [c_char; 5]) as *const c_char;
628 let c_tx_st_str: &CStr = unsafe { CStr::from_ptr(c_tx_st) };
629 let tx_stat: &str = c_tx_st_str.to_str().unwrap();
630
631 match intrspc_tx.try_send(SchedSample {
632 mseq,
633 pid: tx.pid,
634 comm: tx_comm.into(),
635 stat: tx_stat.into(),
636 cpu_id: tc.cpu_id,
637 prev_cpu_id: tc.prev_cpu_id,
638 suggested_cpu_id: tc.suggested_cpu_id,
639 waker_pid: tc.waker_pid,
640 waker_comm: waker_comm.into(),
641 slice: tc.slice,
642 lat_cri: tc.lat_cri,
643 avg_lat_cri: tx.avg_lat_cri,
644 static_prio: tx.static_prio,
645 rerunnable_interval: tx.rerunnable_interval,
646 resched_interval: tc.resched_interval,
647 run_freq: tc.run_freq,
648 avg_runtime: tc.avg_runtime,
649 wait_freq: tc.wait_freq,
650 wake_freq: tc.wake_freq,
651 perf_cri: tc.perf_cri,
652 thr_perf_cri: tx.thr_perf_cri,
653 cpuperf_cur: tx.cpuperf_cur,
654 cpu_util: tx.cpu_util,
655 cpu_sutil: tx.cpu_sutil,
656 nr_active: tx.nr_active,
657 dsq_id: tx.dsq_id,
658 dsq_consume_lat: tx.dsq_consume_lat,
659 slice_used: tc.last_slice_used,
660 }) {
661 Ok(()) | Err(TrySendError::Full(_)) => 0,
662 Err(e) => panic!("failed to send on intrspc_tx ({})", e),
663 }
664 }
665
666 fn prep_introspec(&mut self) {
667 if !self.skel.maps.bss_data.as_ref().unwrap().is_monitored {
668 self.skel.maps.bss_data.as_mut().unwrap().is_monitored = true;
669 }
670 self.skel.maps.bss_data.as_mut().unwrap().intrspc.cmd = self.intrspc.cmd;
671 self.skel.maps.bss_data.as_mut().unwrap().intrspc.arg = self.intrspc.arg;
672 }
673
674 fn cleanup_introspec(&mut self) {
675 self.skel.maps.bss_data.as_mut().unwrap().intrspc.cmd = LAVD_CMD_NOP;
676 }
677
678 fn get_pc(x: u64, y: u64) -> f64 {
679 return 100. * x as f64 / y as f64;
680 }
681
682 fn get_power_mode(power_mode: i32) -> &'static str {
683 match power_mode as u32 {
684 LAVD_PM_PERFORMANCE => "performance",
685 LAVD_PM_BALANCED => "balanced",
686 LAVD_PM_POWERSAVE => "powersave",
687 _ => "unknown",
688 }
689 }
690
691 fn stats_req_to_res(&mut self, req: &StatsReq) -> Result<StatsRes> {
692 Ok(match req {
693 StatsReq::NewSampler(tid) => {
694 self.rb_mgr.consume().unwrap();
695 self.monitor_tid = Some(*tid);
696 StatsRes::Ack
697 }
698 StatsReq::SysStatsReq { tid } => {
699 if Some(*tid) != self.monitor_tid {
700 return Ok(StatsRes::Bye);
701 }
702 self.mseq_id += 1;
703
704 let bss_data = self.skel.maps.bss_data.as_ref().unwrap();
705 let st = bss_data.sys_stat;
706
707 let mseq = self.mseq_id;
708 let nr_queued_task = st.nr_queued_task;
709 let nr_active = st.nr_active;
710 let nr_sched = st.nr_sched;
711 let nr_preempt = st.nr_preempt;
712 let pc_pc = Self::get_pc(st.nr_perf_cri, nr_sched);
713 let pc_lc = Self::get_pc(st.nr_lat_cri, nr_sched);
714 let pc_x_migration = Self::get_pc(st.nr_x_migration, nr_sched);
715 let nr_stealee = st.nr_stealee;
716 let nr_big = st.nr_big;
717 let pc_big = Self::get_pc(nr_big, nr_sched);
718 let pc_pc_on_big = Self::get_pc(st.nr_pc_on_big, nr_big);
719 let pc_lc_on_big = Self::get_pc(st.nr_lc_on_big, nr_big);
720 let power_mode = Self::get_power_mode(bss_data.power_mode);
721 let total_time = bss_data.performance_mode_ns
722 + bss_data.balanced_mode_ns
723 + bss_data.powersave_mode_ns;
724 let pc_performance = Self::get_pc(bss_data.performance_mode_ns, total_time);
725 let pc_balanced = Self::get_pc(bss_data.balanced_mode_ns, total_time);
726 let pc_powersave = Self::get_pc(bss_data.powersave_mode_ns, total_time);
727
728 StatsRes::SysStats(SysStats {
729 mseq,
730 nr_queued_task,
731 nr_active,
732 nr_sched,
733 nr_preempt,
734 pc_pc,
735 pc_lc,
736 pc_x_migration,
737 nr_stealee,
738 pc_big,
739 pc_pc_on_big,
740 pc_lc_on_big,
741 power_mode: power_mode.to_string(),
742 pc_performance,
743 pc_balanced,
744 pc_powersave,
745 })
746 }
747 StatsReq::SchedSamplesNr {
748 tid,
749 nr_samples,
750 interval_ms,
751 } => {
752 if Some(*tid) != self.monitor_tid {
753 return Ok(StatsRes::Bye);
754 }
755
756 self.intrspc.cmd = LAVD_CMD_SCHED_N;
757 self.intrspc.arg = *nr_samples;
758 self.prep_introspec();
759 std::thread::sleep(Duration::from_millis(*interval_ms));
760 self.rb_mgr.poll(Duration::from_millis(100)).unwrap();
761
762 let mut samples = vec![];
763 while let Ok(ts) = self.intrspc_rx.try_recv() {
764 samples.push(ts);
765 }
766
767 self.cleanup_introspec();
768
769 StatsRes::SchedSamples(SchedSamples { samples })
770 }
771 })
772 }
773
774 fn stop_monitoring(&mut self) {
775 if self.skel.maps.bss_data.as_ref().unwrap().is_monitored {
776 self.skel.maps.bss_data.as_mut().unwrap().is_monitored = false;
777 }
778 }
779
780 pub fn exited(&mut self) -> bool {
781 uei_exited!(&self.skel, uei)
782 }
783
784 fn set_power_profile(&mut self, mode: u32) -> Result<(), u32> {
785 let prog = &mut self.skel.progs.set_power_profile;
786 let mut args = power_arg {
787 power_mode: mode as c_int,
788 };
789 let input = ProgramInput {
790 context_in: Some(unsafe {
791 std::slice::from_raw_parts_mut(
792 &mut args as *mut _ as *mut u8,
793 std::mem::size_of_val(&args),
794 )
795 }),
796 ..Default::default()
797 };
798 let out = prog.test_run(input).unwrap();
799 if out.return_value != 0 {
800 return Err(out.return_value);
801 }
802
803 Ok(())
804 }
805
806 fn update_power_profile(&mut self, prev_profile: PowerProfile) -> (bool, PowerProfile) {
807 let profile = fetch_power_profile(false);
808 if profile == prev_profile {
809 return (true, profile);
811 }
812
813 let _ = match profile {
814 PowerProfile::Performance => self.set_power_profile(LAVD_PM_PERFORMANCE),
815 PowerProfile::Balanced { .. } => self.set_power_profile(LAVD_PM_BALANCED),
816 PowerProfile::Powersave => self.set_power_profile(LAVD_PM_POWERSAVE),
817 PowerProfile::Unknown => {
818 return (false, profile);
821 }
822 };
823
824 info!("Set the scheduler's power profile to {profile} mode.");
825 (true, profile)
826 }
827
828 fn run(&mut self, opts: &Opts, shutdown: Arc<AtomicBool>) -> Result<UserExitInfo> {
829 let (res_ch, req_ch) = self.stats_server.channels();
830 let mut autopower = opts.autopower;
831 let mut profile = PowerProfile::Unknown;
832
833 if opts.performance {
834 let _ = self.set_power_profile(LAVD_PM_PERFORMANCE);
835 } else if opts.powersave {
836 let _ = self.set_power_profile(LAVD_PM_POWERSAVE);
837 } else {
838 let _ = self.set_power_profile(LAVD_PM_BALANCED);
839 }
840
841 while !shutdown.load(Ordering::Relaxed) && !self.exited() {
842 if autopower {
843 (autopower, profile) = self.update_power_profile(profile);
844 }
845
846 match req_ch.recv_timeout(Duration::from_secs(1)) {
847 Ok(req) => {
848 let res = self.stats_req_to_res(&req)?;
849 res_ch.send(res)?;
850 }
851 Err(RecvTimeoutError::Timeout) => {
852 self.stop_monitoring();
853 }
854 Err(e) => {
855 self.stop_monitoring();
856 Err(e)?
857 }
858 }
859 self.cleanup_introspec();
860 }
861 self.rb_mgr.consume().unwrap();
862
863 let _ = self.struct_ops.take();
864 uei_report!(&self.skel, uei)
865 }
866}
867
868impl Drop for Scheduler<'_> {
869 fn drop(&mut self) {
870 info!("Unregister {SCHEDULER_NAME} scheduler");
871
872 if let Some(struct_ops) = self.struct_ops.take() {
873 drop(struct_ops);
874 }
875 }
876}
877
878fn init_log(opts: &Opts) {
879 let llv = match opts.verbose {
880 0 => simplelog::LevelFilter::Info,
881 1 => simplelog::LevelFilter::Debug,
882 _ => simplelog::LevelFilter::Trace,
883 };
884 let mut lcfg = simplelog::ConfigBuilder::new();
885 lcfg.set_time_offset_to_local()
886 .expect("Failed to set local time offset")
887 .set_time_level(simplelog::LevelFilter::Error)
888 .set_location_level(simplelog::LevelFilter::Off)
889 .set_target_level(simplelog::LevelFilter::Off)
890 .set_thread_level(simplelog::LevelFilter::Off);
891 simplelog::TermLogger::init(
892 llv,
893 lcfg.build(),
894 simplelog::TerminalMode::Stderr,
895 simplelog::ColorChoice::Auto,
896 )
897 .unwrap();
898}
899
900fn main() -> Result<()> {
901 let mut opts = Opts::parse();
902
903 if opts.version {
904 println!(
905 "scx_lavd {}",
906 build_id::full_version(env!("CARGO_PKG_VERSION"))
907 );
908 return Ok(());
909 }
910
911 if opts.help_stats {
912 let sys_stats_meta_name = SysStats::meta().name;
913 let sched_sample_meta_name = SchedSample::meta().name;
914 let stats_meta_names: &[&str] = &[
915 sys_stats_meta_name.as_str(),
916 sched_sample_meta_name.as_str(),
917 ];
918 stats::server_data(0).describe_meta(&mut std::io::stdout(), Some(&stats_meta_names))?;
919 return Ok(());
920 }
921
922 init_log(&opts);
923
924 if opts.monitor.is_none() && opts.monitor_sched_samples.is_none() {
925 opts.proc().unwrap();
926 info!("{:#?}", opts);
927 }
928
929 let shutdown = Arc::new(AtomicBool::new(false));
930 let shutdown_clone = shutdown.clone();
931 ctrlc::set_handler(move || {
932 shutdown_clone.store(true, Ordering::Relaxed);
933 })
934 .context("Error setting Ctrl-C handler")?;
935
936 if let Some(nr_samples) = opts.monitor_sched_samples {
937 let shutdown_copy = shutdown.clone();
938 let jh = std::thread::spawn(move || {
939 stats::monitor_sched_samples(nr_samples, shutdown_copy).unwrap()
940 });
941 let _ = jh.join();
942 return Ok(());
943 }
944
945 if let Some(intv) = opts.monitor.or(opts.stats) {
946 let shutdown_copy = shutdown.clone();
947 let jh = std::thread::spawn(move || {
948 stats::monitor(Duration::from_secs_f64(intv), shutdown_copy).unwrap()
949 });
950 if opts.monitor.is_some() {
951 let _ = jh.join();
952 return Ok(());
953 }
954 }
955
956 let mut open_object = MaybeUninit::uninit();
957 loop {
958 let mut sched = Scheduler::init(&opts, &mut open_object)?;
959 info!(
960 "scx_lavd scheduler is initialized (build ID: {})",
961 build_id::full_version(env!("CARGO_PKG_VERSION"))
962 );
963 info!("scx_lavd scheduler starts running.");
964 if !sched.run(&opts, shutdown.clone())?.should_restart() {
965 break;
966 }
967 }
968
969 Ok(())
970}