1#[allow(non_upper_case_globals)]
8#[allow(non_camel_case_types)]
9#[allow(non_snake_case)]
10#[allow(dead_code)]
11mod bpf_skel;
12
13mod bpf_intf;
14
15#[macro_use]
16mod log;
17mod adaptive;
18mod cli;
19mod procdb;
20mod scheduler;
21mod topology;
22mod tuning;
23mod watchdog;
24
25use std::mem::MaybeUninit;
26use std::sync::atomic::{AtomicBool, Ordering};
27use std::time::Duration;
28
29use anyhow::Result;
30use clap::{Parser, Subcommand};
31
32use scheduler::Scheduler;
33use scx_utils::build_id;
34
35static SHUTDOWN: AtomicBool = AtomicBool::new(false);
36
37#[derive(Parser)]
38#[command(name = "scx_pandemonium")]
39#[command(
40 version,
41 disable_version_flag = true,
42 about = "PANDEMONIUM -- ADAPTIVE LINUX SCHEDULER"
43)]
44struct Cli {
45 #[command(subcommand)]
46 command: Option<SubCmd>,
47
48 #[arg(short, long)]
49 verbose: bool,
50
51 #[arg(long)]
53 version: bool,
54
55 #[arg(long, hide = true)]
57 dump_log: bool,
58
59 #[arg(long, hide = true)]
61 nr_cpus: Option<u64>,
62
63 #[arg(long)]
65 no_adaptive: bool,
66
67 #[arg(long)]
69 compositor: Vec<String>,
70}
71
72#[derive(Subcommand)]
73enum SubCmd {
74 #[command(hide = true)]
76 Probe,
77
78 #[command(hide = true)]
80 StressWorker(StressWorkerArgs),
81}
82
83#[derive(Parser)]
84struct StressWorkerArgs {
85 #[arg(long)]
87 cpu: u32,
88}
89
90fn main() -> Result<()> {
91 let cli = Cli::parse();
92
93 let verbose = cli.verbose;
94 let dump_log = cli.dump_log;
95 let nr_cpus = cli.nr_cpus;
96 let no_adaptive = cli.no_adaptive;
97 let extra_compositors = cli.compositor;
98
99 if cli.version {
100 println!(
101 "scx_pandemonium {}",
102 build_id::full_version(env!("CARGO_PKG_VERSION"))
103 );
104 return Ok(());
105 }
106
107 match cli.command {
108 None => run_scheduler(verbose, dump_log, nr_cpus, no_adaptive, &extra_compositors),
109 Some(SubCmd::Probe) => {
110 cli::probe::run_probe();
111 Ok(())
112 }
113 Some(SubCmd::StressWorker(args)) => {
114 cli::stress::run_stress_worker(args.cpu);
115 Ok(())
116 }
117 }
118}
119
120const DEFAULT_COMPOSITORS: &[&str] = &[
122 "kwin",
123 "gnome-shell",
124 "mutter",
125 "sway",
126 "Hyprland",
127 "picom",
128 "weston",
129 "labwc",
130 "wayfire",
131 "niri",
132];
133
134fn run_scheduler(
135 verbose: bool,
136 dump_log: bool,
137 nr_cpus: Option<u64>,
138 no_adaptive: bool,
139 extra_compositors: &[String],
140) -> Result<()> {
141 ctrlc::set_handler(move || {
142 SHUTDOWN.store(true, Ordering::Relaxed);
143 })?;
144
145 watchdog::spawn(&SHUTDOWN, Duration::from_secs(10));
149
150 let nr_cpus_display =
151 nr_cpus.unwrap_or_else(|| libbpf_rs::num_possible_cpus().unwrap_or(1) as u64);
152 let governor = std::fs::read_to_string("/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor")
153 .unwrap_or_default()
154 .trim()
155 .to_string();
156
157 let smt_on = std::fs::read_to_string("/sys/devices/system/cpu/smt/active")
158 .map(|s| s.trim() == "1")
159 .unwrap_or(false);
160
161 log_info!(
162 "scx_pandemonium {} SMT {}",
163 build_id::full_version(env!("CARGO_PKG_VERSION")),
164 if smt_on { "on" } else { "off" }
165 );
166 log_info!(
167 "CPUS: {} (governor: {})",
168 nr_cpus_display,
169 if governor.is_empty() {
170 "unknown"
171 } else {
172 &governor
173 }
174 );
175 log_info!("VERBOSE: {}", verbose);
176
177 let mut is_restart = false;
178 loop {
179 if is_restart {
183 std::thread::sleep(Duration::from_secs(2));
184 }
185
186 let mut open_object = MaybeUninit::uninit();
187 let mut sched = Scheduler::init(&mut open_object, nr_cpus)?;
188
189 match topology::CpuTopology::detect(nr_cpus_display as usize) {
191 Ok(topo) => {
192 topo.log_summary();
193 if let Err(e) = topo.populate_bpf_map(&sched) {
194 log_warn!("CACHE TOPOLOGY MAP WRITE FAILED: {}", e);
195 }
196 if let Err(e) = topo.populate_l2_siblings_map(&sched) {
197 log_warn!("L2 SIBLINGS MAP WRITE FAILED: {}", e);
198 }
199 let (reff, rank, spectrum) = topo.compute_resistance_affinity();
203 topo.log_resistance_affinity(&reff, &rank, spectrum);
204 if let Err(e) = topo.populate_affinity_rank_map(&sched, &rank) {
205 log_warn!("AFFINITY RANK MAP WRITE FAILED: {}", e);
206 }
207 if let Err(e) = sched.write_topology_fields(spectrum.tau_ns, spectrum.codel_eq_ns) {
211 log_warn!("TOPOLOGY KNOB WRITE FAILED: {}", e);
212 }
213 }
214 Err(e) => log_warn!("CACHE TOPOLOGY DETECT FAILED: {}", e),
215 }
216
217 for name in DEFAULT_COMPOSITORS {
219 if let Err(e) = sched.write_compositor(name) {
220 log_warn!("COMPOSITOR MAP WRITE FAILED: {} ({})", name, e);
221 }
222 }
223 for name in extra_compositors {
224 if let Err(e) = sched.write_compositor(name) {
225 log_warn!("COMPOSITOR MAP WRITE FAILED: {} ({})", name, e);
226 }
227 }
228
229 let should_restart = if no_adaptive {
230 log_info!("PANDEMONIUM IS ACTIVE (BPF ONLY, CTRL+C TO EXIT)");
233 let mut prev = scheduler::PandemoniumStats::default();
234 while !SHUTDOWN.load(Ordering::Relaxed) && !sched.exited() {
235 watchdog::LOOP_HEARTBEAT.fetch_add(1, Ordering::Relaxed);
236 std::thread::sleep(Duration::from_secs(1));
237
238 let stats = sched.read_stats();
239
240 let delta_d = stats.nr_dispatches.wrapping_sub(prev.nr_dispatches);
241 let delta_idle = stats.nr_idle_hits.wrapping_sub(prev.nr_idle_hits);
242 let delta_shared = stats.nr_shared.wrapping_sub(prev.nr_shared);
243 let delta_preempt = stats.nr_preempt.wrapping_sub(prev.nr_preempt);
244 let delta_keep = stats.nr_keep_running.wrapping_sub(prev.nr_keep_running);
245 let delta_wake_sum = stats.wake_lat_sum.wrapping_sub(prev.wake_lat_sum);
246 let delta_wake_samples = stats.wake_lat_samples.wrapping_sub(prev.wake_lat_samples);
247 let delta_hard = stats.nr_hard_kicks.wrapping_sub(prev.nr_hard_kicks);
248 let delta_soft = stats.nr_soft_kicks.wrapping_sub(prev.nr_soft_kicks);
249 let delta_enq_wake = stats.nr_enq_wakeup.wrapping_sub(prev.nr_enq_wakeup);
250 let delta_enq_requeue = stats.nr_enq_requeue.wrapping_sub(prev.nr_enq_requeue);
251 let wake_avg_us = if delta_wake_samples > 0 {
252 delta_wake_sum / delta_wake_samples / 1000
253 } else {
254 0
255 };
256
257 let d_idle_sum = stats.wake_lat_idle_sum.wrapping_sub(prev.wake_lat_idle_sum);
258 let d_idle_cnt = stats.wake_lat_idle_cnt.wrapping_sub(prev.wake_lat_idle_cnt);
259 let d_kick_sum = stats.wake_lat_kick_sum.wrapping_sub(prev.wake_lat_kick_sum);
260 let d_kick_cnt = stats.wake_lat_kick_cnt.wrapping_sub(prev.wake_lat_kick_cnt);
261 let lat_idle_us = if d_idle_cnt > 0 {
262 d_idle_sum / d_idle_cnt / 1000
263 } else {
264 0
265 };
266 let lat_kick_us = if d_kick_cnt > 0 {
267 d_kick_sum / d_kick_cnt / 1000
268 } else {
269 0
270 };
271 let delta_reenq = stats.nr_reenqueue.wrapping_sub(prev.nr_reenqueue);
272
273 let dl2_hb = stats.nr_l2_hit_batch.wrapping_sub(prev.nr_l2_hit_batch);
275 let dl2_mb = stats.nr_l2_miss_batch.wrapping_sub(prev.nr_l2_miss_batch);
276 let dl2_hi = stats
277 .nr_l2_hit_interactive
278 .wrapping_sub(prev.nr_l2_hit_interactive);
279 let dl2_mi = stats
280 .nr_l2_miss_interactive
281 .wrapping_sub(prev.nr_l2_miss_interactive);
282 let dl2_hl = stats
283 .nr_l2_hit_lat_crit
284 .wrapping_sub(prev.nr_l2_hit_lat_crit);
285 let dl2_ml = stats
286 .nr_l2_miss_lat_crit
287 .wrapping_sub(prev.nr_l2_miss_lat_crit);
288 let l2_pct_b = if dl2_hb + dl2_mb > 0 {
289 dl2_hb * 100 / (dl2_hb + dl2_mb)
290 } else {
291 0
292 };
293 let l2_pct_i = if dl2_hi + dl2_mi > 0 {
294 dl2_hi * 100 / (dl2_hi + dl2_mi)
295 } else {
296 0
297 };
298 let l2_pct_l = if dl2_hl + dl2_ml > 0 {
299 dl2_hl * 100 / (dl2_hl + dl2_ml)
300 } else {
301 0
302 };
303
304 let idle_pct = if delta_d > 0 {
305 delta_idle * 100 / delta_d
306 } else {
307 0
308 };
309
310 let sojourn_ms = stats.batch_sojourn_ns / 1_000_000;
311 let longrun_label = if stats.longrun_mode_active > 0 {
312 " LONGRUN"
313 } else {
314 ""
315 };
316
317 if verbose {
318 println!(
319 "d/s: {:<8} idle: {}% shared: {:<6} preempt: {:<4} keep: {:<4} kick: H={:<4} S={:<4} enq: W={:<4} R={:<4} wake: {}us lat_idle: {}us lat_kick: {}us reenq: {} sjrn: {}ms l2: B={}% I={}% L={}% [BPF{}]",
320 delta_d, idle_pct, delta_shared, delta_preempt, delta_keep,
321 delta_hard, delta_soft, delta_enq_wake, delta_enq_requeue,
322 wake_avg_us, lat_idle_us, lat_kick_us,
323 delta_reenq, sojourn_ms, l2_pct_b, l2_pct_i, l2_pct_l,
324 longrun_label,
325 );
326 }
327
328 sched.log.snapshot(
329 delta_d,
330 delta_idle,
331 delta_shared,
332 delta_preempt,
333 delta_keep,
334 wake_avg_us,
335 delta_hard,
336 delta_soft,
337 lat_idle_us,
338 lat_kick_us,
339 );
340
341 prev = stats;
342 }
343
344 let knobs = sched.read_tuning_knobs();
346 let final_stats = sched.read_stats();
347 let l2_total_b = final_stats.nr_l2_hit_batch + final_stats.nr_l2_miss_batch;
348 let l2_total_i = final_stats.nr_l2_hit_interactive + final_stats.nr_l2_miss_interactive;
349 let l2_total_l = final_stats.nr_l2_hit_lat_crit + final_stats.nr_l2_miss_lat_crit;
350 let l2_cum_b = if l2_total_b > 0 {
351 final_stats.nr_l2_hit_batch * 100 / l2_total_b
352 } else {
353 0
354 };
355 let l2_cum_i = if l2_total_i > 0 {
356 final_stats.nr_l2_hit_interactive * 100 / l2_total_i
357 } else {
358 0
359 };
360 let l2_cum_l = if l2_total_l > 0 {
361 final_stats.nr_l2_hit_lat_crit * 100 / l2_total_l
362 } else {
363 0
364 };
365 println!(
366 "[KNOBS] regime=BPF slice_ns={} batch_ns={} preempt_ns={} lag={} l2_hit=B:{}%/I:{}%/L:{}%",
367 knobs.slice_ns, knobs.batch_slice_ns,
368 knobs.preempt_thresh_ns,
369 knobs.lag_scale, l2_cum_b, l2_cum_i, l2_cum_l,
370 );
371
372 sched.read_exit_info()
373 } else {
374 log_info!("PANDEMONIUM IS ACTIVE (CTRL+C TO EXIT)");
376 adaptive::monitor_loop(&mut sched, &SHUTDOWN, verbose, nr_cpus_display)?
377 };
378
379 log_info!("PANDEMONIUM IS SHUTTING DOWN");
380
381 if dump_log {
382 sched.log.dump();
383 }
384 sched.log.summary();
385
386 if !should_restart || SHUTDOWN.load(Ordering::Relaxed) {
387 break;
388 }
389
390 SHUTDOWN.store(false, Ordering::Relaxed);
392 log_info!("RESTARTING PANDEMONIUM...");
393 is_restart = true;
394 }
395
396 log_info!("Shutdown complete");
397 Ok(())
398}