1#[allow(non_upper_case_globals)]
8#[allow(non_camel_case_types)]
9#[allow(non_snake_case)]
10#[allow(dead_code)]
11mod bpf_skel;
12
13#[macro_use]
14mod log;
15mod adaptive;
16mod cli;
17mod procdb;
18mod scheduler;
19mod topology;
20mod tuning;
21
22use std::mem::MaybeUninit;
23use std::sync::atomic::{AtomicBool, Ordering};
24use std::time::Duration;
25
26use anyhow::Result;
27use clap::{Parser, Subcommand};
28
29use scheduler::Scheduler;
30use scx_utils::build_id;
31
32static SHUTDOWN: AtomicBool = AtomicBool::new(false);
33
34#[derive(Parser)]
35#[command(name = "scx_pandemonium")]
36#[command(
37 version,
38 disable_version_flag = true,
39 about = "PANDEMONIUM -- ADAPTIVE LINUX SCHEDULER"
40)]
41struct Cli {
42 #[command(subcommand)]
43 command: Option<SubCmd>,
44
45 #[arg(short, long)]
46 verbose: bool,
47
48 #[arg(long)]
50 version: bool,
51
52 #[arg(long)]
53 dump_log: bool,
54
55 #[arg(long)]
57 nr_cpus: Option<u64>,
58
59 #[arg(long)]
61 no_adaptive: bool,
62
63 #[arg(long)]
65 compositor: Vec<String>,
66}
67
68#[derive(Subcommand)]
69enum SubCmd {
70 Check,
72
73 Probe(ProbeArgs),
75
76 Start(StartArgs),
78
79 Dmesg,
81
82 Bench(BenchArgs),
84
85 BenchRun(BenchRunArgs),
87
88 Test,
90
91 StressWorker(StressWorkerArgs),
93}
94
95#[derive(Parser)]
96struct ProbeArgs {
97 #[arg(long)]
99 death_pipe_fd: Option<i32>,
100}
101
102#[derive(Parser)]
103struct StressWorkerArgs {
104 #[arg(long)]
106 cpu: u32,
107}
108
109#[derive(Parser)]
110struct StartArgs {
111 #[arg(long)]
113 observe: bool,
114
115 #[arg(last = true)]
117 sched_args: Vec<String>,
118}
119
120#[derive(Parser)]
121struct BenchArgs {
122 #[arg(long, value_enum)]
124 mode: cli::bench::BenchMode,
125
126 #[arg(long)]
128 cmd: Option<String>,
129
130 #[arg(long, default_value_t = 3)]
132 iterations: usize,
133
134 #[arg(long)]
136 clean_cmd: Option<String>,
137
138 #[arg(last = true)]
140 sched_args: Vec<String>,
141}
142
143#[derive(Parser)]
144struct BenchRunArgs {
145 #[arg(long, value_enum)]
147 mode: cli::bench::BenchMode,
148
149 #[arg(long)]
151 cmd: Option<String>,
152
153 #[arg(long, default_value_t = 3)]
155 iterations: usize,
156
157 #[arg(long)]
159 clean_cmd: Option<String>,
160
161 #[arg(last = true)]
163 sched_args: Vec<String>,
164}
165
166fn main() -> Result<()> {
167 let cli = Cli::parse();
168
169 let verbose = cli.verbose;
170 let dump_log = cli.dump_log;
171 let nr_cpus = cli.nr_cpus;
172 let no_adaptive = cli.no_adaptive;
173 let extra_compositors = cli.compositor;
174
175 if cli.version {
176 println!(
177 "scx_pandemonium {}",
178 build_id::full_version(env!("CARGO_PKG_VERSION"))
179 );
180 return Ok(());
181 }
182
183 match cli.command {
184 None => run_scheduler(verbose, dump_log, nr_cpus, no_adaptive, &extra_compositors),
185 Some(SubCmd::Check) => cli::check::run_check(),
186 Some(SubCmd::Probe(args)) => {
187 cli::probe::run_probe(args.death_pipe_fd);
188 Ok(())
189 }
190 Some(SubCmd::Start(args)) => cli::run::run_start(args.observe, &args.sched_args),
191 Some(SubCmd::Dmesg) => cli::run::run_dmesg(),
192 Some(SubCmd::Bench(args)) => cli::bench::run_bench(
193 args.mode,
194 args.cmd.as_deref(),
195 args.iterations,
196 args.clean_cmd.as_deref(),
197 &args.sched_args,
198 ),
199 Some(SubCmd::BenchRun(args)) => cli::bench::run_bench_run(
200 args.mode,
201 args.cmd.as_deref(),
202 args.iterations,
203 args.clean_cmd.as_deref(),
204 &args.sched_args,
205 ),
206 Some(SubCmd::Test) => cli::test_gate::run_test_gate(),
207 Some(SubCmd::StressWorker(args)) => {
208 cli::stress::run_stress_worker(args.cpu);
209 Ok(())
210 }
211 }
212}
213
214const DEFAULT_COMPOSITORS: &[&str] = &[
216 "kwin",
217 "gnome-shell",
218 "mutter",
219 "sway",
220 "Hyprland",
221 "picom",
222 "weston",
223 "labwc",
224 "wayfire",
225 "niri",
226 "pandemonium",
227];
228
229fn run_scheduler(
230 verbose: bool,
231 dump_log: bool,
232 nr_cpus: Option<u64>,
233 no_adaptive: bool,
234 extra_compositors: &[String],
235) -> Result<()> {
236 ctrlc::set_handler(move || {
237 SHUTDOWN.store(true, Ordering::Relaxed);
238 })?;
239
240 let nr_cpus_display =
241 nr_cpus.unwrap_or_else(|| libbpf_rs::num_possible_cpus().unwrap_or(1) as u64);
242 let governor = std::fs::read_to_string("/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor")
243 .unwrap_or_default()
244 .trim()
245 .to_string();
246
247 let smt_on = std::fs::read_to_string("/sys/devices/system/cpu/smt/active")
248 .map(|s| s.trim() == "1")
249 .unwrap_or(false);
250
251 log_info!(
252 "scx_pandemonium {} SMT {}",
253 build_id::full_version(env!("CARGO_PKG_VERSION")),
254 if smt_on { "on" } else { "off" }
255 );
256 log_info!(
257 "CPUS: {} (governor: {})",
258 nr_cpus_display,
259 if governor.is_empty() {
260 "unknown"
261 } else {
262 &governor
263 }
264 );
265 log_info!("VERBOSE: {}", verbose);
266
267 let mut is_restart = false;
268 loop {
269 if is_restart {
273 std::thread::sleep(Duration::from_secs(2));
274 }
275
276 let mut open_object = MaybeUninit::uninit();
277 let mut sched = Scheduler::init(&mut open_object, nr_cpus)?;
278
279 match topology::CpuTopology::detect(nr_cpus_display as usize) {
281 Ok(topo) => {
282 topo.log_summary();
283 if let Err(e) = topo.populate_bpf_map(&sched) {
284 log_warn!("CACHE TOPOLOGY MAP WRITE FAILED: {}", e);
285 }
286 if let Err(e) = topo.populate_l2_siblings_map(&sched) {
287 log_warn!("L2 SIBLINGS MAP WRITE FAILED: {}", e);
288 }
289 let (reff, rank) = topo.compute_resistance_affinity();
292 topo.log_resistance_affinity(&reff, &rank);
293 if let Err(e) = topo.populate_affinity_rank_map(&sched, &rank) {
294 log_warn!("AFFINITY RANK MAP WRITE FAILED: {}", e);
295 }
296 }
297 Err(e) => log_warn!("CACHE TOPOLOGY DETECT FAILED: {}", e),
298 }
299
300 for name in DEFAULT_COMPOSITORS {
302 if let Err(e) = sched.write_compositor(name) {
303 log_warn!("COMPOSITOR MAP WRITE FAILED: {} ({})", name, e);
304 }
305 }
306 for name in extra_compositors {
307 if let Err(e) = sched.write_compositor(name) {
308 log_warn!("COMPOSITOR MAP WRITE FAILED: {} ({})", name, e);
309 }
310 }
311
312 let should_restart = if no_adaptive {
313 log_info!("PANDEMONIUM IS ACTIVE (BPF ONLY, CTRL+C TO EXIT)");
316 let mut prev = scheduler::PandemoniumStats::default();
317 while !SHUTDOWN.load(Ordering::Relaxed) && !sched.exited() {
318 std::thread::sleep(Duration::from_secs(1));
319
320 let stats = sched.read_stats();
321
322 let delta_d = stats.nr_dispatches.wrapping_sub(prev.nr_dispatches);
323 let delta_idle = stats.nr_idle_hits.wrapping_sub(prev.nr_idle_hits);
324 let delta_shared = stats.nr_shared.wrapping_sub(prev.nr_shared);
325 let delta_preempt = stats.nr_preempt.wrapping_sub(prev.nr_preempt);
326 let delta_keep = stats.nr_keep_running.wrapping_sub(prev.nr_keep_running);
327 let delta_wake_sum = stats.wake_lat_sum.wrapping_sub(prev.wake_lat_sum);
328 let delta_wake_samples = stats.wake_lat_samples.wrapping_sub(prev.wake_lat_samples);
329 let delta_hard = stats.nr_hard_kicks.wrapping_sub(prev.nr_hard_kicks);
330 let delta_soft = stats.nr_soft_kicks.wrapping_sub(prev.nr_soft_kicks);
331 let delta_enq_wake = stats.nr_enq_wakeup.wrapping_sub(prev.nr_enq_wakeup);
332 let delta_enq_requeue = stats.nr_enq_requeue.wrapping_sub(prev.nr_enq_requeue);
333 let wake_avg_us = if delta_wake_samples > 0 {
334 delta_wake_sum / delta_wake_samples / 1000
335 } else {
336 0
337 };
338
339 let d_idle_sum = stats.wake_lat_idle_sum.wrapping_sub(prev.wake_lat_idle_sum);
340 let d_idle_cnt = stats.wake_lat_idle_cnt.wrapping_sub(prev.wake_lat_idle_cnt);
341 let d_kick_sum = stats.wake_lat_kick_sum.wrapping_sub(prev.wake_lat_kick_sum);
342 let d_kick_cnt = stats.wake_lat_kick_cnt.wrapping_sub(prev.wake_lat_kick_cnt);
343 let lat_idle_us = if d_idle_cnt > 0 {
344 d_idle_sum / d_idle_cnt / 1000
345 } else {
346 0
347 };
348 let lat_kick_us = if d_kick_cnt > 0 {
349 d_kick_sum / d_kick_cnt / 1000
350 } else {
351 0
352 };
353 let delta_procdb = stats.nr_procdb_hits.wrapping_sub(prev.nr_procdb_hits);
354 let delta_reenq = stats.nr_reenqueue.wrapping_sub(prev.nr_reenqueue);
355
356 let dl2_hb = stats.nr_l2_hit_batch.wrapping_sub(prev.nr_l2_hit_batch);
358 let dl2_mb = stats.nr_l2_miss_batch.wrapping_sub(prev.nr_l2_miss_batch);
359 let dl2_hi = stats
360 .nr_l2_hit_interactive
361 .wrapping_sub(prev.nr_l2_hit_interactive);
362 let dl2_mi = stats
363 .nr_l2_miss_interactive
364 .wrapping_sub(prev.nr_l2_miss_interactive);
365 let dl2_hl = stats
366 .nr_l2_hit_lat_crit
367 .wrapping_sub(prev.nr_l2_hit_lat_crit);
368 let dl2_ml = stats
369 .nr_l2_miss_lat_crit
370 .wrapping_sub(prev.nr_l2_miss_lat_crit);
371 let l2_pct_b = if dl2_hb + dl2_mb > 0 {
372 dl2_hb * 100 / (dl2_hb + dl2_mb)
373 } else {
374 0
375 };
376 let l2_pct_i = if dl2_hi + dl2_mi > 0 {
377 dl2_hi * 100 / (dl2_hi + dl2_mi)
378 } else {
379 0
380 };
381 let l2_pct_l = if dl2_hl + dl2_ml > 0 {
382 dl2_hl * 100 / (dl2_hl + dl2_ml)
383 } else {
384 0
385 };
386
387 let idle_pct = if delta_d > 0 {
388 delta_idle * 100 / delta_d
389 } else {
390 0
391 };
392
393 let sojourn_ms = stats.batch_sojourn_ns / 1_000_000;
394 let delta_burst = stats.burst_mode_active.wrapping_sub(prev.burst_mode_active);
395 let burst_label = if delta_burst > 0 { " BURST" } else { "" };
396 let longrun_label = if stats.longrun_mode_active > 0 {
397 " LONGRUN"
398 } else {
399 ""
400 };
401
402 if verbose {
403 println!(
404 "d/s: {:<8} idle: {}% shared: {:<6} preempt: {:<4} keep: {:<4} kick: H={:<4} S={:<4} enq: W={:<4} R={:<4} wake: {}us lat_idle: {}us lat_kick: {}us procdb: {} reenq: {} sjrn: {}ms l2: B={}% I={}% L={}% [BPF{}{}]",
405 delta_d, idle_pct, delta_shared, delta_preempt, delta_keep,
406 delta_hard, delta_soft, delta_enq_wake, delta_enq_requeue,
407 wake_avg_us, lat_idle_us, lat_kick_us, delta_procdb,
408 delta_reenq, sojourn_ms, l2_pct_b, l2_pct_i, l2_pct_l,
409 burst_label, longrun_label,
410 );
411 }
412
413 sched.log.snapshot(
414 delta_d,
415 delta_idle,
416 delta_shared,
417 delta_preempt,
418 delta_keep,
419 wake_avg_us,
420 delta_hard,
421 delta_soft,
422 lat_idle_us,
423 lat_kick_us,
424 );
425
426 prev = stats;
427 }
428
429 let knobs = sched.read_tuning_knobs();
431 let final_stats = sched.read_stats();
432 let l2_total_b = final_stats.nr_l2_hit_batch + final_stats.nr_l2_miss_batch;
433 let l2_total_i = final_stats.nr_l2_hit_interactive + final_stats.nr_l2_miss_interactive;
434 let l2_total_l = final_stats.nr_l2_hit_lat_crit + final_stats.nr_l2_miss_lat_crit;
435 let l2_cum_b = if l2_total_b > 0 {
436 final_stats.nr_l2_hit_batch * 100 / l2_total_b
437 } else {
438 0
439 };
440 let l2_cum_i = if l2_total_i > 0 {
441 final_stats.nr_l2_hit_interactive * 100 / l2_total_i
442 } else {
443 0
444 };
445 let l2_cum_l = if l2_total_l > 0 {
446 final_stats.nr_l2_hit_lat_crit * 100 / l2_total_l
447 } else {
448 0
449 };
450 println!(
451 "[KNOBS] regime=BPF slice_ns={} batch_ns={} preempt_ns={} demotion_ns={} lag={} l2_hit=B:{}%/I:{}%/L:{}%",
452 knobs.slice_ns, knobs.batch_slice_ns,
453 knobs.preempt_thresh_ns, knobs.cpu_bound_thresh_ns,
454 knobs.lag_scale, l2_cum_b, l2_cum_i, l2_cum_l,
455 );
456
457 sched.read_exit_info()
458 } else {
459 log_info!("PANDEMONIUM IS ACTIVE (CTRL+C TO EXIT)");
461 adaptive::monitor_loop(&mut sched, &SHUTDOWN, verbose, nr_cpus_display)?
462 };
463
464 log_info!("PANDEMONIUM IS SHUTTING DOWN");
465
466 if dump_log {
467 sched.log.dump();
468 }
469 sched.log.summary();
470
471 if !should_restart || SHUTDOWN.load(Ordering::Relaxed) {
472 break;
473 }
474
475 SHUTDOWN.store(false, Ordering::Relaxed);
477 log_info!("RESTARTING PANDEMONIUM...");
478 is_restart = true;
479 }
480
481 log_info!("Shutdown complete");
482 Ok(())
483}