1mod bpf_skel;
10pub use bpf_skel::*;
11pub mod bpf_intf;
12pub use bpf_intf::*;
13
14mod cpu_order;
15use scx_utils::init_libbpf_logging;
16mod stats;
17use std::ffi::c_int;
18use std::ffi::CStr;
19use std::mem;
20use std::mem::MaybeUninit;
21use std::str;
22use std::sync::atomic::AtomicBool;
23use std::sync::atomic::Ordering;
24use std::sync::Arc;
25use std::thread::ThreadId;
26use std::time::Duration;
27
28use anyhow::Context;
29use anyhow::Result;
30use clap::Parser;
31use clap_num::number_range;
32use cpu_order::CpuOrder;
33use cpu_order::PerfCpuOrder;
34use crossbeam::channel;
35use crossbeam::channel::Receiver;
36use crossbeam::channel::RecvTimeoutError;
37use crossbeam::channel::Sender;
38use crossbeam::channel::TrySendError;
39use libbpf_rs::OpenObject;
40use libbpf_rs::PrintLevel;
41use libbpf_rs::ProgramInput;
42use libc::c_char;
43use log::debug;
44use log::info;
45use plain::Plain;
46use scx_stats::prelude::*;
47use scx_utils::autopower::{fetch_power_profile, PowerProfile};
48use scx_utils::build_id;
49use scx_utils::compat;
50use scx_utils::libbpf_clap_opts::LibbpfOpts;
51use scx_utils::scx_ops_attach;
52use scx_utils::scx_ops_load;
53use scx_utils::scx_ops_open;
54use scx_utils::try_set_rlimit_infinity;
55use scx_utils::uei_exited;
56use scx_utils::uei_report;
57use scx_utils::EnergyModel;
58use scx_utils::TopologyArgs;
59use scx_utils::UserExitInfo;
60use scx_utils::NR_CPU_IDS;
61use stats::SchedSample;
62use stats::SchedSamples;
63use stats::StatsReq;
64use stats::StatsRes;
65use stats::SysStats;
66
67const SCHEDULER_NAME: &str = "scx_lavd";
68#[derive(Debug, Parser)]
74struct Opts {
75 #[clap(long = "autopilot", action = clap::ArgAction::SetTrue)]
82 autopilot: bool,
83
84 #[clap(long = "autopower", action = clap::ArgAction::SetTrue)]
91 autopower: bool,
92
93 #[clap(long = "performance", action = clap::ArgAction::SetTrue)]
98 performance: bool,
99
100 #[clap(long = "powersave", action = clap::ArgAction::SetTrue)]
105 powersave: bool,
106
107 #[clap(long = "balanced", action = clap::ArgAction::SetTrue)]
112 balanced: bool,
113
114 #[clap(long = "slice-max-us", default_value = "5000")]
116 slice_max_us: u64,
117
118 #[clap(long = "slice-min-us", default_value = "500")]
120 slice_min_us: u64,
121
122 #[clap(long = "preempt-shift", default_value = "6", value_parser=Opts::preempt_shift_range)]
127 preempt_shift: u8,
128
129 #[clap(long = "cpu-pref-order", default_value = "")]
135 cpu_pref_order: String,
136
137 #[clap(long = "no-use-em", action = clap::ArgAction::SetTrue)]
139 no_use_em: bool,
140
141 #[clap(long = "no-futex-boost", action = clap::ArgAction::SetTrue)]
143 no_futex_boost: bool,
144
145 #[clap(long = "no-preemption", action = clap::ArgAction::SetTrue)]
147 no_preemption: bool,
148
149 #[clap(long = "no-wake-sync", action = clap::ArgAction::SetTrue)]
151 no_wake_sync: bool,
152
153 #[clap(long = "no-slice-boost", action = clap::ArgAction::SetTrue)]
155 no_slice_boost: bool,
156
157 #[clap(long = "per-cpu-dsq", action = clap::ArgAction::SetTrue)]
163 per_cpu_dsq: bool,
164
165 #[clap(long = "no-core-compaction", action = clap::ArgAction::SetTrue)]
175 no_core_compaction: bool,
176
177 #[clap(long = "no-freq-scaling", action = clap::ArgAction::SetTrue)]
179 no_freq_scaling: bool,
180
181 #[clap(long)]
183 stats: Option<f64>,
184
185 #[clap(long)]
187 monitor: Option<f64>,
188
189 #[clap(long)]
192 monitor_sched_samples: Option<u64>,
193
194 #[clap(short = 'v', long, action = clap::ArgAction::Count)]
197 verbose: u8,
198
199 #[clap(short = 'V', long, action = clap::ArgAction::SetTrue)]
201 version: bool,
202
203 #[clap(long)]
205 help_stats: bool,
206
207 #[clap(flatten, next_help_heading = "Libbpf Options")]
208 pub libbpf: LibbpfOpts,
209
210 #[clap(flatten)]
212 topology: Option<TopologyArgs>,
213}
214
215impl Opts {
216 fn can_autopilot(&self) -> bool {
217 self.autopower == false
218 && self.performance == false
219 && self.powersave == false
220 && self.balanced == false
221 && self.no_core_compaction == false
222 }
223
224 fn can_autopower(&self) -> bool {
225 self.autopilot == false
226 && self.performance == false
227 && self.powersave == false
228 && self.balanced == false
229 && self.no_core_compaction == false
230 }
231
232 fn can_performance(&self) -> bool {
233 self.autopilot == false
234 && self.autopower == false
235 && self.powersave == false
236 && self.balanced == false
237 }
238
239 fn can_balanced(&self) -> bool {
240 self.autopilot == false
241 && self.autopower == false
242 && self.performance == false
243 && self.powersave == false
244 && self.no_core_compaction == false
245 }
246
247 fn can_powersave(&self) -> bool {
248 self.autopilot == false
249 && self.autopower == false
250 && self.performance == false
251 && self.balanced == false
252 && self.no_core_compaction == false
253 }
254
255 fn proc(&mut self) -> Option<&mut Self> {
256 if !self.autopilot {
257 self.autopilot = self.can_autopilot();
258 }
259
260 if self.autopilot {
261 if !self.can_autopilot() {
262 info!("Autopilot mode cannot be used with conflicting options.");
263 return None;
264 }
265 info!("Autopilot mode is enabled.");
266 }
267
268 if self.autopower {
269 if !self.can_autopower() {
270 info!("Autopower mode cannot be used with conflicting options.");
271 return None;
272 }
273 info!("Autopower mode is enabled.");
274 }
275
276 if self.performance {
277 if !self.can_performance() {
278 info!("Performance mode cannot be used with conflicting options.");
279 return None;
280 }
281 info!("Performance mode is enabled.");
282 self.no_core_compaction = true;
283 }
284
285 if self.powersave {
286 if !self.can_powersave() {
287 info!("Powersave mode cannot be used with conflicting options.");
288 return None;
289 }
290 info!("Powersave mode is enabled.");
291 self.no_core_compaction = false;
292 }
293
294 if self.balanced {
295 if !self.can_balanced() {
296 info!("Balanced mode cannot be used with conflicting options.");
297 return None;
298 }
299 info!("Balanced mode is enabled.");
300 self.no_core_compaction = false;
301 }
302
303 if !EnergyModel::has_energy_model() || !self.cpu_pref_order.is_empty() {
304 self.no_use_em = true;
305 info!("Energy model won't be used for CPU preference order.");
306 }
307
308 Some(self)
309 }
310
311 fn preempt_shift_range(s: &str) -> Result<u8, String> {
312 number_range(s, 0, 10)
313 }
314}
315
316unsafe impl Plain for msg_task_ctx {}
317
318impl msg_task_ctx {
319 fn from_bytes(buf: &[u8]) -> &msg_task_ctx {
320 plain::from_bytes(buf).expect("The buffer is either too short or not aligned!")
321 }
322}
323
324impl introspec {
325 fn new() -> Self {
326 let intrspc = unsafe { mem::MaybeUninit::<introspec>::zeroed().assume_init() };
327 intrspc
328 }
329}
330
331struct Scheduler<'a> {
332 skel: BpfSkel<'a>,
333 struct_ops: Option<libbpf_rs::Link>,
334 rb_mgr: libbpf_rs::RingBuffer<'static>,
335 intrspc: introspec,
336 intrspc_rx: Receiver<SchedSample>,
337 monitor_tid: Option<ThreadId>,
338 stats_server: StatsServer<StatsReq, StatsRes>,
339 mseq_id: u64,
340}
341
342impl<'a> Scheduler<'a> {
343 fn init(opts: &'a Opts, open_object: &'a mut MaybeUninit<OpenObject>) -> Result<Self> {
344 if *NR_CPU_IDS > LAVD_CPU_ID_MAX as usize {
345 panic!(
346 "Num possible CPU IDs ({}) exceeds maximum of ({})",
347 *NR_CPU_IDS, LAVD_CPU_ID_MAX
348 );
349 }
350
351 try_set_rlimit_infinity();
352
353 let mut skel_builder = BpfSkelBuilder::default();
355 skel_builder.obj_builder.debug(opts.verbose > 0);
356 init_libbpf_logging(Some(PrintLevel::Debug));
357
358 let open_opts = opts.libbpf.clone().into_bpf_open_opts();
359 let mut skel = scx_ops_open!(skel_builder, open_object, lavd_ops, open_opts)?;
360
361 if !opts.no_futex_boost {
364 if Self::attach_futex_ftraces(&mut skel)? == false {
365 info!("Fail to attach futex ftraces. Try with tracepoints.");
366 if Self::attach_futex_tracepoints(&mut skel)? == false {
367 info!("Fail to attach futex tracepoints.");
368 }
369 }
370 }
371
372 let order = CpuOrder::new(opts.topology.as_ref()).unwrap();
374 Self::init_cpus(&mut skel, &order);
375 Self::init_cpdoms(&mut skel, &order);
376
377 Self::init_globals(&mut skel, &opts, &order);
379
380 let mut skel = scx_ops_load!(skel, lavd_ops, uei)?;
382 let struct_ops = Some(scx_ops_attach!(skel, lavd_ops)?);
383 let stats_server = StatsServer::new(stats::server_data(*NR_CPU_IDS as u64)).launch()?;
384
385 let (intrspc_tx, intrspc_rx) = channel::bounded(65536);
387 let rb_map = &mut skel.maps.introspec_msg;
388 let mut builder = libbpf_rs::RingBufferBuilder::new();
389 builder
390 .add(rb_map, move |data| {
391 Scheduler::relay_introspec(data, &intrspc_tx)
392 })
393 .unwrap();
394 let rb_mgr = builder.build().unwrap();
395
396 Ok(Self {
397 skel,
398 struct_ops,
399 rb_mgr,
400 intrspc: introspec::new(),
401 intrspc_rx,
402 monitor_tid: None,
403 stats_server,
404 mseq_id: 0,
405 })
406 }
407
408 fn attach_futex_ftraces(skel: &mut OpenBpfSkel) -> Result<bool> {
409 let ftraces = vec![
410 ("__futex_wait", &skel.progs.fexit___futex_wait),
411 ("futex_wait_multiple", &skel.progs.fexit_futex_wait_multiple),
412 (
413 "futex_wait_requeue_pi",
414 &skel.progs.fexit_futex_wait_requeue_pi,
415 ),
416 ("futex_wake", &skel.progs.fexit_futex_wake),
417 ("futex_wake_op", &skel.progs.fexit_futex_wake_op),
418 ("futex_lock_pi", &skel.progs.fexit_futex_lock_pi),
419 ("futex_unlock_pi", &skel.progs.fexit_futex_unlock_pi),
420 ];
421
422 compat::cond_kprobes_enable(ftraces)
423 }
424
425 fn attach_futex_tracepoints(skel: &mut OpenBpfSkel) -> Result<bool> {
426 let tracepoints = vec![
427 ("syscalls:sys_enter_futex", &skel.progs.rtp_sys_enter_futex),
428 ("syscalls:sys_exit_futex", &skel.progs.rtp_sys_exit_futex),
429 (
430 "syscalls:sys_exit_futex_wait",
431 &skel.progs.rtp_sys_exit_futex_wait,
432 ),
433 (
434 "syscalls:sys_exit_futex_waitv",
435 &skel.progs.rtp_sys_exit_futex_waitv,
436 ),
437 (
438 "syscalls:sys_exit_futex_wake",
439 &skel.progs.rtp_sys_exit_futex_wake,
440 ),
441 ];
442
443 compat::cond_tracepoints_enable(tracepoints)
444 }
445
446 fn init_cpus(skel: &mut OpenBpfSkel, order: &CpuOrder) {
447 debug!("{:#?}", order);
448
449 for cpu in order.cpuids.iter() {
451 skel.maps.rodata_data.as_mut().unwrap().cpu_capacity[cpu.cpu_adx] = cpu.cpu_cap as u16;
452 skel.maps.rodata_data.as_mut().unwrap().cpu_big[cpu.cpu_adx] = cpu.big_core as u8;
453 skel.maps.rodata_data.as_mut().unwrap().cpu_turbo[cpu.cpu_adx] = cpu.turbo_core as u8;
454 skel.maps.rodata_data.as_mut().unwrap().cpu_sibling[cpu.cpu_adx] =
455 cpu.cpu_sibling as u32;
456 }
457
458 let nr_pco_states: u8 = order.perf_cpu_order.len() as u8;
460 if nr_pco_states > LAVD_PCO_STATE_MAX as u8 {
461 panic!("Generated performance vs. CPU order stats are too complex ({nr_pco_states}) to handle");
462 }
463
464 skel.maps.rodata_data.as_mut().unwrap().nr_pco_states = nr_pco_states;
465 for (i, (_, pco)) in order.perf_cpu_order.iter().enumerate() {
466 Self::init_pco_tuple(skel, i, &pco);
467 info!("{:#}", pco);
468 }
469
470 let (_, last_pco) = order.perf_cpu_order.last_key_value().unwrap();
471 for i in nr_pco_states..LAVD_PCO_STATE_MAX as u8 {
472 Self::init_pco_tuple(skel, i as usize, &last_pco);
473 }
474 }
475
476 fn init_pco_tuple(skel: &mut OpenBpfSkel, i: usize, pco: &PerfCpuOrder) {
477 let cpus_perf = pco.cpus_perf.borrow();
478 let cpus_ovflw = pco.cpus_ovflw.borrow();
479 let pco_nr_primary = cpus_perf.len();
480
481 skel.maps.rodata_data.as_mut().unwrap().pco_bounds[i] = pco.perf_cap as u32;
482 skel.maps.rodata_data.as_mut().unwrap().pco_nr_primary[i] = pco_nr_primary as u16;
483
484 for (j, &cpu_adx) in cpus_perf.iter().enumerate() {
485 skel.maps.rodata_data.as_mut().unwrap().pco_table[i][j] = cpu_adx as u16;
486 }
487
488 for (j, &cpu_adx) in cpus_ovflw.iter().enumerate() {
489 let k = j + pco_nr_primary;
490 skel.maps.rodata_data.as_mut().unwrap().pco_table[i][k] = cpu_adx as u16;
491 }
492 }
493
494 fn init_cpdoms(skel: &mut OpenBpfSkel, order: &CpuOrder) {
495 for (k, v) in order.cpdom_map.iter() {
497 skel.maps.bss_data.as_mut().unwrap().cpdom_ctxs[v.cpdom_id].id = v.cpdom_id as u64;
498 skel.maps.bss_data.as_mut().unwrap().cpdom_ctxs[v.cpdom_id].alt_id =
499 v.cpdom_alt_id.get() as u64;
500 skel.maps.bss_data.as_mut().unwrap().cpdom_ctxs[v.cpdom_id].numa_id = k.numa_adx as u8;
501 skel.maps.bss_data.as_mut().unwrap().cpdom_ctxs[v.cpdom_id].llc_id = k.llc_adx as u8;
502 skel.maps.bss_data.as_mut().unwrap().cpdom_ctxs[v.cpdom_id].is_big = k.is_big as u8;
503 skel.maps.bss_data.as_mut().unwrap().cpdom_ctxs[v.cpdom_id].is_valid = 1;
504 for cpu_id in v.cpu_ids.iter() {
505 let i = cpu_id / 64;
506 let j = cpu_id % 64;
507 skel.maps.bss_data.as_mut().unwrap().cpdom_ctxs[v.cpdom_id].__cpumask[i] |=
508 0x01 << j;
509 }
510
511 if v.neighbor_map.borrow().iter().len() > LAVD_CPDOM_MAX_DIST as usize {
512 panic!("The processor topology is too complex to handle in BPF.");
513 }
514
515 for (k, (_d, neighbors)) in v.neighbor_map.borrow().iter().enumerate() {
516 let nr_neighbors = neighbors.borrow().len() as u8;
517 if nr_neighbors > LAVD_CPDOM_MAX_NR as u8 {
518 panic!("The processor topology is too complex to handle in BPF.");
519 }
520 skel.maps.bss_data.as_mut().unwrap().cpdom_ctxs[v.cpdom_id].nr_neighbors[k] =
521 nr_neighbors;
522 for n in neighbors.borrow().iter() {
523 skel.maps.bss_data.as_mut().unwrap().cpdom_ctxs[v.cpdom_id].neighbor_bits[k] |=
524 0x1 << n;
525 }
526 }
527 }
528 }
529
530 fn init_globals(skel: &mut OpenBpfSkel, opts: &Opts, order: &CpuOrder) {
531 let bss_data = skel.maps.bss_data.as_mut().unwrap();
532 bss_data.no_preemption = opts.no_preemption;
533 bss_data.no_core_compaction = opts.no_core_compaction;
534 bss_data.no_freq_scaling = opts.no_freq_scaling;
535 bss_data.is_powersave_mode = opts.powersave;
536 let rodata = skel.maps.rodata_data.as_mut().unwrap();
537 rodata.nr_llcs = order.nr_llcs as u64;
538 rodata.__nr_cpu_ids = *NR_CPU_IDS as u64;
539 rodata.is_smt_active = order.smt_enabled;
540 rodata.is_autopilot_on = opts.autopilot;
541 rodata.verbose = opts.verbose;
542 rodata.slice_max_ns = opts.slice_max_us * 1000;
543 rodata.slice_min_ns = opts.slice_min_us * 1000;
544 rodata.preempt_shift = opts.preempt_shift;
545 rodata.no_use_em = opts.no_use_em as u8;
546 rodata.no_wake_sync = opts.no_wake_sync;
547 rodata.no_slice_boost = opts.no_slice_boost;
548 rodata.per_cpu_dsq = opts.per_cpu_dsq;
549
550 skel.struct_ops.lavd_ops_mut().flags = *compat::SCX_OPS_ENQ_EXITING
551 | *compat::SCX_OPS_ENQ_LAST
552 | *compat::SCX_OPS_ENQ_MIGRATION_DISABLED
553 | *compat::SCX_OPS_KEEP_BUILTIN_IDLE;
554 }
555
556 fn get_msg_seq_id() -> u64 {
557 static mut MSEQ: u64 = 0;
558 unsafe {
559 MSEQ += 1;
560 MSEQ
561 }
562 }
563
564 fn relay_introspec(data: &[u8], intrspc_tx: &Sender<SchedSample>) -> i32 {
565 let mt = msg_task_ctx::from_bytes(data);
566 let tx = mt.taskc_x;
567 let tc = mt.taskc;
568
569 if mt.hdr.kind != LAVD_MSG_TASKC {
571 return 0;
572 }
573
574 let mseq = Scheduler::get_msg_seq_id();
575
576 let c_tx_cm: *const c_char = (&tx.comm as *const [c_char; 17]) as *const c_char;
577 let c_tx_cm_str: &CStr = unsafe { CStr::from_ptr(c_tx_cm) };
578 let tx_comm: &str = c_tx_cm_str.to_str().unwrap();
579
580 let c_waker_cm: *const c_char = (&tc.waker_comm as *const [c_char; 17]) as *const c_char;
581 let c_waker_cm_str: &CStr = unsafe { CStr::from_ptr(c_waker_cm) };
582 let waker_comm: &str = c_waker_cm_str.to_str().unwrap();
583
584 let c_tx_st: *const c_char = (&tx.stat as *const [c_char; 5]) as *const c_char;
585 let c_tx_st_str: &CStr = unsafe { CStr::from_ptr(c_tx_st) };
586 let tx_stat: &str = c_tx_st_str.to_str().unwrap();
587
588 match intrspc_tx.try_send(SchedSample {
589 mseq,
590 pid: tx.pid,
591 comm: tx_comm.into(),
592 stat: tx_stat.into(),
593 cpu_id: tc.cpu_id,
594 prev_cpu_id: tc.prev_cpu_id,
595 suggested_cpu_id: tc.suggested_cpu_id,
596 waker_pid: tc.waker_pid,
597 waker_comm: waker_comm.into(),
598 slice: tc.slice,
599 lat_cri: tc.lat_cri,
600 avg_lat_cri: tx.avg_lat_cri,
601 static_prio: tx.static_prio,
602 rerunnable_interval: tx.rerunnable_interval,
603 resched_interval: tc.resched_interval,
604 run_freq: tc.run_freq,
605 avg_runtime: tc.avg_runtime,
606 wait_freq: tc.wait_freq,
607 wake_freq: tc.wake_freq,
608 perf_cri: tc.perf_cri,
609 thr_perf_cri: tx.thr_perf_cri,
610 cpuperf_cur: tx.cpuperf_cur,
611 cpu_util: tx.cpu_util,
612 cpu_sutil: tx.cpu_sutil,
613 nr_active: tx.nr_active,
614 dsq_id: tx.dsq_id,
615 dsq_consume_lat: tx.dsq_consume_lat,
616 slice_used: tc.last_slice_used,
617 }) {
618 Ok(()) | Err(TrySendError::Full(_)) => 0,
619 Err(e) => panic!("failed to send on intrspc_tx ({})", e),
620 }
621 }
622
623 fn prep_introspec(&mut self) {
624 if !self.skel.maps.bss_data.as_ref().unwrap().is_monitored {
625 self.skel.maps.bss_data.as_mut().unwrap().is_monitored = true;
626 }
627 self.skel.maps.bss_data.as_mut().unwrap().intrspc.cmd = self.intrspc.cmd;
628 self.skel.maps.bss_data.as_mut().unwrap().intrspc.arg = self.intrspc.arg;
629 }
630
631 fn cleanup_introspec(&mut self) {
632 self.skel.maps.bss_data.as_mut().unwrap().intrspc.cmd = LAVD_CMD_NOP;
633 }
634
635 fn get_pc(x: u64, y: u64) -> f64 {
636 return 100. * x as f64 / y as f64;
637 }
638
639 fn get_power_mode(power_mode: i32) -> &'static str {
640 match power_mode as u32 {
641 LAVD_PM_PERFORMANCE => "performance",
642 LAVD_PM_BALANCED => "balanced",
643 LAVD_PM_POWERSAVE => "powersave",
644 _ => "unknown",
645 }
646 }
647
648 fn stats_req_to_res(&mut self, req: &StatsReq) -> Result<StatsRes> {
649 Ok(match req {
650 StatsReq::NewSampler(tid) => {
651 self.rb_mgr.consume().unwrap();
652 self.monitor_tid = Some(*tid);
653 StatsRes::Ack
654 }
655 StatsReq::SysStatsReq { tid } => {
656 if Some(*tid) != self.monitor_tid {
657 return Ok(StatsRes::Bye);
658 }
659 self.mseq_id += 1;
660
661 let bss_data = self.skel.maps.bss_data.as_ref().unwrap();
662 let st = bss_data.sys_stat;
663
664 let mseq = self.mseq_id;
665 let nr_queued_task = st.nr_queued_task;
666 let nr_active = st.nr_active;
667 let nr_sched = st.nr_sched;
668 let nr_preempt = st.nr_preempt;
669 let pc_pc = Self::get_pc(st.nr_perf_cri, nr_sched);
670 let pc_lc = Self::get_pc(st.nr_lat_cri, nr_sched);
671 let pc_x_migration = Self::get_pc(st.nr_x_migration, nr_sched);
672 let nr_stealee = st.nr_stealee;
673 let nr_big = st.nr_big;
674 let pc_big = Self::get_pc(nr_big, nr_sched);
675 let pc_pc_on_big = Self::get_pc(st.nr_pc_on_big, nr_big);
676 let pc_lc_on_big = Self::get_pc(st.nr_lc_on_big, nr_big);
677 let power_mode = Self::get_power_mode(bss_data.power_mode);
678 let total_time = bss_data.performance_mode_ns
679 + bss_data.balanced_mode_ns
680 + bss_data.powersave_mode_ns;
681 let pc_performance = Self::get_pc(bss_data.performance_mode_ns, total_time);
682 let pc_balanced = Self::get_pc(bss_data.balanced_mode_ns, total_time);
683 let pc_powersave = Self::get_pc(bss_data.powersave_mode_ns, total_time);
684
685 StatsRes::SysStats(SysStats {
686 mseq,
687 nr_queued_task,
688 nr_active,
689 nr_sched,
690 nr_preempt,
691 pc_pc,
692 pc_lc,
693 pc_x_migration,
694 nr_stealee,
695 pc_big,
696 pc_pc_on_big,
697 pc_lc_on_big,
698 power_mode: power_mode.to_string(),
699 pc_performance,
700 pc_balanced,
701 pc_powersave,
702 })
703 }
704 StatsReq::SchedSamplesNr {
705 tid,
706 nr_samples,
707 interval_ms,
708 } => {
709 if Some(*tid) != self.monitor_tid {
710 return Ok(StatsRes::Bye);
711 }
712
713 self.intrspc.cmd = LAVD_CMD_SCHED_N;
714 self.intrspc.arg = *nr_samples;
715 self.prep_introspec();
716 std::thread::sleep(Duration::from_millis(*interval_ms));
717 self.rb_mgr.poll(Duration::from_millis(100)).unwrap();
718
719 let mut samples = vec![];
720 while let Ok(ts) = self.intrspc_rx.try_recv() {
721 samples.push(ts);
722 }
723
724 self.cleanup_introspec();
725
726 StatsRes::SchedSamples(SchedSamples { samples })
727 }
728 })
729 }
730
731 fn stop_monitoring(&mut self) {
732 if self.skel.maps.bss_data.as_ref().unwrap().is_monitored {
733 self.skel.maps.bss_data.as_mut().unwrap().is_monitored = false;
734 }
735 }
736
737 pub fn exited(&mut self) -> bool {
738 uei_exited!(&self.skel, uei)
739 }
740
741 fn set_power_profile(&mut self, mode: u32) -> Result<(), u32> {
742 let prog = &mut self.skel.progs.set_power_profile;
743 let mut args = power_arg {
744 power_mode: mode as c_int,
745 };
746 let input = ProgramInput {
747 context_in: Some(unsafe {
748 std::slice::from_raw_parts_mut(
749 &mut args as *mut _ as *mut u8,
750 std::mem::size_of_val(&args),
751 )
752 }),
753 ..Default::default()
754 };
755 let out = prog.test_run(input).unwrap();
756 if out.return_value != 0 {
757 return Err(out.return_value);
758 }
759
760 Ok(())
761 }
762
763 fn update_power_profile(&mut self, prev_profile: PowerProfile) -> (bool, PowerProfile) {
764 let profile = fetch_power_profile(false);
765 if profile == prev_profile {
766 return (true, profile);
768 }
769
770 let _ = match profile {
771 PowerProfile::Performance => self.set_power_profile(LAVD_PM_PERFORMANCE),
772 PowerProfile::Balanced { .. } => self.set_power_profile(LAVD_PM_BALANCED),
773 PowerProfile::Powersave => self.set_power_profile(LAVD_PM_POWERSAVE),
774 PowerProfile::Unknown => {
775 return (false, profile);
778 }
779 };
780
781 info!("Set the scheduler's power profile to {profile} mode.");
782 (true, profile)
783 }
784
785 fn run(&mut self, opts: &Opts, shutdown: Arc<AtomicBool>) -> Result<UserExitInfo> {
786 let (res_ch, req_ch) = self.stats_server.channels();
787 let mut autopower = opts.autopower;
788 let mut profile = PowerProfile::Unknown;
789
790 if opts.performance {
791 let _ = self.set_power_profile(LAVD_PM_PERFORMANCE);
792 } else if opts.powersave {
793 let _ = self.set_power_profile(LAVD_PM_POWERSAVE);
794 } else {
795 let _ = self.set_power_profile(LAVD_PM_BALANCED);
796 }
797
798 while !shutdown.load(Ordering::Relaxed) && !self.exited() {
799 if autopower {
800 (autopower, profile) = self.update_power_profile(profile);
801 }
802
803 match req_ch.recv_timeout(Duration::from_secs(1)) {
804 Ok(req) => {
805 let res = self.stats_req_to_res(&req)?;
806 res_ch.send(res)?;
807 }
808 Err(RecvTimeoutError::Timeout) => {
809 self.stop_monitoring();
810 }
811 Err(e) => {
812 self.stop_monitoring();
813 Err(e)?
814 }
815 }
816 self.cleanup_introspec();
817 }
818 self.rb_mgr.consume().unwrap();
819
820 let _ = self.struct_ops.take();
821 uei_report!(&self.skel, uei)
822 }
823}
824
825impl Drop for Scheduler<'_> {
826 fn drop(&mut self) {
827 info!("Unregister {SCHEDULER_NAME} scheduler");
828
829 if let Some(struct_ops) = self.struct_ops.take() {
830 drop(struct_ops);
831 }
832 }
833}
834
835fn init_log(opts: &Opts) {
836 let llv = match opts.verbose {
837 0 => simplelog::LevelFilter::Info,
838 1 => simplelog::LevelFilter::Debug,
839 _ => simplelog::LevelFilter::Trace,
840 };
841 let mut lcfg = simplelog::ConfigBuilder::new();
842 lcfg.set_time_offset_to_local()
843 .expect("Failed to set local time offset")
844 .set_time_level(simplelog::LevelFilter::Error)
845 .set_location_level(simplelog::LevelFilter::Off)
846 .set_target_level(simplelog::LevelFilter::Off)
847 .set_thread_level(simplelog::LevelFilter::Off);
848 simplelog::TermLogger::init(
849 llv,
850 lcfg.build(),
851 simplelog::TerminalMode::Stderr,
852 simplelog::ColorChoice::Auto,
853 )
854 .unwrap();
855}
856
857fn main() -> Result<()> {
858 let mut opts = Opts::parse();
859
860 if opts.version {
861 println!(
862 "scx_lavd {}",
863 build_id::full_version(env!("CARGO_PKG_VERSION"))
864 );
865 return Ok(());
866 }
867
868 if opts.help_stats {
869 let sys_stats_meta_name = SysStats::meta().name;
870 let sched_sample_meta_name = SchedSample::meta().name;
871 let stats_meta_names: &[&str] = &[
872 sys_stats_meta_name.as_str(),
873 sched_sample_meta_name.as_str(),
874 ];
875 stats::server_data(0).describe_meta(&mut std::io::stdout(), Some(&stats_meta_names))?;
876 return Ok(());
877 }
878
879 init_log(&opts);
880
881 if opts.monitor.is_none() && opts.monitor_sched_samples.is_none() {
882 opts.proc().unwrap();
883 info!("{:#?}", opts);
884 }
885
886 let shutdown = Arc::new(AtomicBool::new(false));
887 let shutdown_clone = shutdown.clone();
888 ctrlc::set_handler(move || {
889 shutdown_clone.store(true, Ordering::Relaxed);
890 })
891 .context("Error setting Ctrl-C handler")?;
892
893 if let Some(nr_samples) = opts.monitor_sched_samples {
894 let shutdown_copy = shutdown.clone();
895 let jh = std::thread::spawn(move || {
896 stats::monitor_sched_samples(nr_samples, shutdown_copy).unwrap()
897 });
898 let _ = jh.join();
899 return Ok(());
900 }
901
902 if let Some(intv) = opts.monitor.or(opts.stats) {
903 let shutdown_copy = shutdown.clone();
904 let jh = std::thread::spawn(move || {
905 stats::monitor(Duration::from_secs_f64(intv), shutdown_copy).unwrap()
906 });
907 if opts.monitor.is_some() {
908 let _ = jh.join();
909 return Ok(());
910 }
911 }
912
913 let mut open_object = MaybeUninit::uninit();
914 loop {
915 let mut sched = Scheduler::init(&opts, &mut open_object)?;
916 info!(
917 "scx_lavd scheduler is initialized (build ID: {})",
918 build_id::full_version(env!("CARGO_PKG_VERSION"))
919 );
920 info!("scx_lavd scheduler starts running.");
921 if !sched.run(&opts, shutdown.clone())?.should_restart() {
922 break;
923 }
924 }
925
926 Ok(())
927}