scx_cake/main.rs
1// SPDX-License-Identifier: GPL-2.0
2// scx_cake - sched_ext scheduler applying CAKE bufferbloat concepts to CPU scheduling
3
4mod calibrate;
5mod stats;
6mod topology;
7mod tui;
8
9use core::sync::atomic::Ordering;
10use std::os::fd::AsRawFd;
11use std::sync::atomic::AtomicBool;
12use std::sync::Arc;
13
14use anyhow::{Context, Result};
15use clap::{Parser, ValueEnum};
16use log::{info, warn};
17use nix::sys::signal::{SigSet, Signal};
18use nix::sys::signalfd::{SfdFlags, SignalFd};
19// Include the generated interface bindings
20#[allow(non_camel_case_types, non_upper_case_globals, dead_code)]
21mod bpf_intf {
22 include!(concat!(env!("OUT_DIR"), "/bpf_intf.rs"));
23}
24
25// Include the generated BPF skeleton
26#[allow(non_camel_case_types, non_upper_case_globals, dead_code)]
27mod bpf_skel {
28 include!(concat!(env!("OUT_DIR"), "/bpf_skel.rs"));
29}
30use bpf_skel::*;
31
32/// Scheduler profile presets
33#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)]
34pub enum Profile {
35 /// Ultra-low-latency for competitive esports (1ms quantum)
36 Esports,
37 /// Optimized for older/lower-power hardware (4ms quantum)
38 Legacy,
39 /// Low-latency profile optimized for gaming and interactive workloads
40 Gaming,
41 /// Balanced profile for general desktop use (same as gaming for now)
42 Default,
43}
44
45impl Profile {
46 /// Returns (quantum_us, new_flow_bonus_us, starvation_us)
47 fn values(&self) -> (u64, u64, u64) {
48 match self {
49 // Esports: Ultra-aggressive, 1ms quantum for maximum responsiveness
50 Profile::Esports => (1000, 4000, 50000),
51 // Legacy: High efficiency, 4ms quantum to reduce overhead on older CPUs
52 Profile::Legacy => (4000, 12000, 200000),
53 // Gaming: Aggressive latency, 2ms quantum
54 Profile::Gaming => (2000, 8000, 100000),
55 // Default: Same as gaming for now
56 Profile::Default => (2000, 8000, 100000),
57 }
58 }
59
60 /// Per-tier starvation thresholds in nanoseconds (4 tiers + padding)
61 fn starvation_threshold(&self) -> [u64; 8] {
62 match self {
63 Profile::Esports => [
64 1_500_000, // T0 Critical: 1.5ms
65 4_000_000, // T1 Interactive: 4ms
66 20_000_000, // T2 Frame: 20ms
67 50_000_000, // T3 Bulk: 50ms
68 50_000_000, 50_000_000, 50_000_000, 50_000_000, // Padding
69 ],
70 Profile::Legacy => [
71 6_000_000, // T0 Critical: 6ms
72 16_000_000, // T1 Interactive: 16ms
73 80_000_000, // T2 Frame: 80ms
74 200_000_000, // T3 Bulk: 200ms
75 200_000_000,
76 200_000_000,
77 200_000_000,
78 200_000_000, // Padding
79 ],
80 Profile::Gaming | Profile::Default => [
81 3_000_000, // T0 Critical: 3ms
82 8_000_000, // T1 Interactive: 8ms
83 40_000_000, // T2 Frame: 40ms
84 100_000_000, // T3 Bulk: 100ms
85 100_000_000,
86 100_000_000,
87 100_000_000,
88 100_000_000, // Padding
89 ],
90 }
91 }
92
93 /// Tier quantum multipliers (fixed-point, 1024 = 1.0x) — 4 tiers + padding
94 fn tier_multiplier(&self) -> [u32; 8] {
95 match self {
96 Profile::Esports | Profile::Legacy | Profile::Gaming | Profile::Default => [
97 768, // T0 Critical: 0.75x
98 1024, // T1 Interactive: 1.0x
99 1229, // T2 Frame: 1.2x
100 1434, // T3 Bulk: 1.4x
101 1434, 1434, 1434, 1434, // Padding
102 ],
103 }
104 }
105
106 /// Wait budget per tier in nanoseconds — 4 tiers + padding
107 fn wait_budget(&self) -> [u64; 8] {
108 match self {
109 Profile::Esports => [
110 50_000, // T0 Critical: 50µs
111 1_000_000, // T1 Interactive: 1ms
112 4_000_000, // T2 Frame: 4ms
113 0, // T3 Bulk: no limit
114 0, 0, 0, 0, // Padding
115 ],
116 Profile::Legacy => [
117 200_000, // T0 Critical: 200µs
118 4_000_000, // T1 Interactive: 4ms
119 16_000_000, // T2 Frame: 16ms
120 0, // T3 Bulk: no limit
121 0, 0, 0, 0, // Padding
122 ],
123 Profile::Gaming | Profile::Default => [
124 100_000, // T0 Critical: 100µs
125 2_000_000, // T1 Interactive: 2ms
126 8_000_000, // T2 Frame: 8ms
127 0, // T3 Bulk: no limit
128 0, 0, 0, 0, // Padding
129 ],
130 }
131 }
132
133 /// Consolidated tier config - packs quantum/multiplier/budget/starvation into 64-bit per tier.
134 fn tier_configs(&self, quantum_us: u64) -> [u64; 8] {
135 let starvation = self.starvation_threshold();
136 let multiplier = self.tier_multiplier();
137 let budget = self.wait_budget();
138
139 let mut configs = [0u64; 8];
140 for i in 0..8 {
141 configs[i] = (multiplier[i] as u64 & 0xFFF)
142 | ((quantum_us & 0xFFFF) << 12)
143 | (((budget[i] >> 10) & 0xFFFF) << 28)
144 | (((starvation[i] >> 10) & 0xFFFFF) << 44);
145 }
146 configs
147 }
148}
149
150/// 🍰 scx_cake: A sched_ext scheduler applying CAKE bufferbloat concepts
151///
152/// This scheduler adapts CAKE's DRR++ (Deficit Round Robin++) algorithm
153/// for CPU scheduling, providing low-latency scheduling for gaming and
154/// interactive workloads while maintaining fairness.
155///
156/// PROFILES set all tuning parameters at once. Individual options override profile defaults.
157///
158/// 4-TIER SYSTEM (classified by avg_runtime):
159/// T0 Critical (<100µs): IRQ, input, audio, network
160/// T1 Interact (<2ms): compositor, physics, AI
161/// T2 Frame (<8ms): game render, encoding
162/// T3 Bulk (≥8ms): compilation, background
163///
164/// EXAMPLES:
165/// scx_cake # Run with gaming profile (default)
166/// scx_cake -p esports # Ultra-low-latency for competitive play
167/// scx_cake --quantum 1500 # Gaming profile with custom quantum
168/// scx_cake -v # Run with live TUI stats display
169#[derive(Parser, Debug)]
170#[command(
171 author,
172 version,
173 about = "🍰 A sched_ext scheduler applying CAKE bufferbloat concepts to CPU scheduling",
174 verbatim_doc_comment
175)]
176struct Args {
177 /// Scheduler profile preset.
178 ///
179 /// Profiles configure all tier thresholds, quantum multipliers, and wait budgets.
180 /// Individual CLI options (--quantum, etc.) override profile values.
181 ///
182 /// ESPORTS: Ultra-low-latency for competitive gaming.
183 /// - Quantum: 1000µs, Starvation: 50ms
184 ///
185 /// LEGACY: Optimized for older/lower-power hardware.
186 /// - Quantum: 4000µs, Starvation: 200ms
187 ///
188 /// GAMING: Optimized for low-latency gaming and interactive workloads.
189 /// - Quantum: 2000µs, Starvation: 100ms
190 ///
191 /// DEFAULT: Balanced profile for general desktop use.
192 /// - Currently same as gaming; will diverge in future versions
193 #[arg(long, short, value_enum, default_value_t = Profile::Gaming, verbatim_doc_comment)]
194 profile: Profile,
195
196 /// Base scheduling time slice in MICROSECONDS [default: 2000].
197 ///
198 /// How long a task runs before potentially yielding.
199 ///
200 /// Smaller quantum = more responsive but higher overhead.
201 /// Esports: 1000µs | Gaming: 2000µs | Legacy: 4000µs
202 /// Recommended range: 1000-8000µs
203 #[arg(long, verbatim_doc_comment)]
204 quantum: Option<u64>,
205
206 /// Bonus time for newly woken tasks in MICROSECONDS [default: 8000].
207 ///
208 /// Tasks waking from sleep get this extra time added to their deficit,
209 /// allowing them to run longer on first dispatch. Helps bursty workloads.
210 ///
211 /// Esports: 4000µs | Gaming: 8000µs
212 /// Recommended range: 4000-16000µs
213 #[arg(long, verbatim_doc_comment)]
214 new_flow_bonus: Option<u64>,
215
216 /// Max run time before forced preemption in MICROSECONDS [default: 100000].
217 ///
218 /// Safety limit: tasks running longer than this are forcibly preempted.
219 /// Prevents any single task from monopolizing the CPU.
220 ///
221 /// Esports: 50000µs (50ms) | Gaming: 100000µs (100ms) | Legacy: 200000µs (200ms)
222 /// Recommended range: 50000-200000µs
223 #[arg(long, verbatim_doc_comment)]
224 starvation: Option<u64>,
225
226 /// Enable live TUI (Terminal User Interface) with real-time statistics.
227 ///
228 /// Shows dispatch counts per tier, tier transitions,
229 /// wait time stats, and system topology information.
230 /// Press 'q' to exit TUI mode.
231 #[arg(long, short, verbatim_doc_comment)]
232 verbose: bool,
233
234 /// Statistics refresh interval in SECONDS (only with --verbose).
235 ///
236 /// How often the TUI updates. Lower values = more responsive but
237 /// higher overhead. Has no effect without --verbose.
238 ///
239 /// Default: 1 second
240 #[arg(long, default_value_t = 1, verbatim_doc_comment)]
241 interval: u64,
242}
243
244impl Args {
245 /// Get effective values (profile defaults with CLI overrides applied)
246 fn effective_values(&self) -> (u64, u64, u64) {
247 let (q, nfb, starv) = self.profile.values();
248 (
249 self.quantum.unwrap_or(q),
250 self.new_flow_bonus.unwrap_or(nfb),
251 self.starvation.unwrap_or(starv),
252 )
253 }
254}
255
256struct Scheduler<'a> {
257 skel: BpfSkel<'a>,
258 args: Args,
259 topology: topology::TopologyInfo,
260 latency_matrix: Vec<Vec<f64>>,
261}
262
263impl<'a> Scheduler<'a> {
264 fn new(
265 args: Args,
266 open_object: &'a mut std::mem::MaybeUninit<libbpf_rs::OpenObject>,
267 ) -> Result<Self> {
268 use libbpf_rs::skel::{OpenSkel, SkelBuilder};
269
270 // Open and load the BPF skeleton
271 let skel_builder = BpfSkelBuilder::default();
272
273 let mut open_skel = skel_builder
274 .open(open_object)
275 .context("Failed to open BPF skeleton")?;
276
277 // Populate SCX enum RODATA from kernel BTF (SCX_DSQ_LOCAL_ON, SCX_KICK_PREEMPT, etc.)
278 scx_utils::import_enums!(open_skel);
279
280 // Detect system topology (CCDs, P/E cores)
281 let topo = topology::detect()?;
282
283 // Get effective values (profile + CLI overrides)
284 let (quantum, new_flow_bonus, _starvation) = args.effective_values();
285
286 // ETD: Empirical Topology Discovery — display-grade measurement
287 // Measures inter-core CAS latency for startup heatmap and TUI display
288 info!("Starting ETD calibration...");
289 let latency_matrix = calibrate::calibrate_full_matrix(
290 topo.nr_cpus,
291 &calibrate::EtdConfig::default(),
292 |current, total, is_complete| {
293 tui::render_calibration_progress(current, total, is_complete);
294 },
295 );
296
297 // Configure the scheduler via rodata (read-only data)
298 if let Some(rodata) = &mut open_skel.maps.rodata_data {
299 rodata.quantum_ns = quantum * 1000;
300 rodata.new_flow_bonus_ns = new_flow_bonus * 1000;
301 rodata.enable_stats = args.verbose;
302 rodata.tier_configs = args.profile.tier_configs(quantum);
303
304 // Topology: only has_hybrid is live (DVFS scaling in cake_tick)
305 rodata.has_hybrid = topo.has_hybrid_cores;
306
307 // Per-LLC DSQ partitioning: populate CPU→LLC mapping
308 let llc_count = topo.llc_cpu_mask.iter().filter(|&&m| m != 0).count() as u32;
309 rodata.nr_llcs = llc_count.max(1);
310 rodata.nr_cpus = topo.nr_cpus.min(64) as u32; // Rule 39: bounds kick scan loop
311 for (i, &llc_id) in topo.cpu_llc_id.iter().enumerate() {
312 rodata.cpu_llc_id[i] = llc_id as u32;
313 }
314 }
315
316 // Load the BPF program
317 let skel = open_skel.load().context("Failed to load BPF program")?;
318
319 Ok(Self {
320 skel,
321 args,
322 topology: topo,
323 latency_matrix,
324 })
325 }
326
327 fn run(&mut self, shutdown: Arc<AtomicBool>) -> Result<()> {
328 // Attach the scheduler
329 let _link = self
330 .skel
331 .maps
332 .cake_ops
333 .attach_struct_ops()
334 .context("Failed to attach scheduler")?;
335
336 self.show_startup_splash()?;
337
338 if self.args.verbose {
339 // Run TUI mode
340 tui::run_tui(
341 &mut self.skel,
342 shutdown.clone(),
343 self.args.interval,
344 self.topology.clone(),
345 )?;
346 } else {
347 // Event-based silent mode - block on signalfd, poll with 60s timeout for UEI check
348
349 // Block SIGINT and SIGTERM from normal delivery
350 let mut mask = SigSet::empty();
351 mask.add(Signal::SIGINT);
352 mask.add(Signal::SIGTERM);
353 mask.thread_block().context("Failed to block signals")?;
354
355 // Create signalfd to receive signals as readable events
356 let sfd = SignalFd::with_flags(&mask, SfdFlags::SFD_NONBLOCK)
357 .context("Failed to create signalfd")?;
358
359 use nix::poll::{poll, PollFd, PollFlags};
360 use std::os::fd::BorrowedFd;
361
362 loop {
363 // Block for up to 60 seconds, then check UEI
364 // poll() returns: >0 = readable, 0 = timeout, -1 = error
365 // SAFETY: sfd is valid for the duration of this loop
366 let poll_fd = unsafe {
367 PollFd::new(BorrowedFd::borrow_raw(sfd.as_raw_fd()), PollFlags::POLLIN)
368 };
369 let mut fds = [poll_fd];
370 let result = poll(&mut fds, nix::poll::PollTimeout::from(60_000u16)); // 60 seconds
371
372 match result {
373 Ok(n) if n > 0 => {
374 // Signal received - read it to clear and exit
375 if let Ok(Some(siginfo)) = sfd.read_signal() {
376 info!("Received signal {} - shutting down", siginfo.ssi_signo);
377 shutdown.store(true, Ordering::Relaxed);
378 }
379 break;
380 }
381 Ok(_) => {
382 // Timeout - check UEI
383 if scx_utils::uei_exited!(&self.skel, uei) {
384 match scx_utils::uei_report!(&self.skel, uei) {
385 Ok(reason) => {
386 warn!("BPF scheduler exited: {:?}", reason);
387 }
388 Err(e) => {
389 warn!("BPF scheduler exited (failed to get reason: {})", e);
390 }
391 }
392 break;
393 }
394 }
395 Err(nix::errno::Errno::EINTR) => {
396 // Interrupted - check shutdown flag
397 if shutdown.load(Ordering::Relaxed) {
398 break;
399 }
400 }
401 Err(e) => {
402 warn!("poll() error: {}", e);
403 break;
404 }
405 }
406 }
407 }
408
409 info!("scx_cake scheduler shutting down");
410 Ok(())
411 }
412
413 fn show_startup_splash(&self) -> Result<()> {
414 let (q, _nfb, starv) = self.args.effective_values();
415 let profile_str = format!("{:?}", self.args.profile).to_uppercase();
416
417 tui::render_startup_screen(tui::StartupParams {
418 topology: &self.topology,
419 latency_matrix: &self.latency_matrix,
420 profile: &profile_str,
421 quantum: q,
422 starvation: starv,
423 })
424 }
425}
426
427fn main() -> Result<()> {
428 env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
429
430 let args = Args::parse();
431
432 // Set up signal handler
433 let shutdown = Arc::new(AtomicBool::new(false));
434 let shutdown_clone = shutdown.clone();
435
436 ctrlc::set_handler(move || {
437 info!("Received shutdown signal");
438 shutdown_clone.store(true, Ordering::Relaxed);
439 })?;
440
441 // Create open object for BPF - needs to outlive scheduler
442 let mut open_object = std::mem::MaybeUninit::uninit();
443
444 // Create and run the scheduler
445 let mut scheduler = Scheduler::new(args, &mut open_object)?;
446 scheduler.run(shutdown)?;
447
448 Ok(())
449}