scx_flash/
main.rs

1// SPDX-License-Identifier: GPL-2.0
2//
3// Copyright (c) 2024 Andrea Righi <arighi@nvidia.com>
4
5// This software may be used and distributed according to the terms of the
6// GNU General Public License version 2.
7
8mod bpf_skel;
9pub use bpf_skel::*;
10pub mod bpf_intf;
11pub use bpf_intf::*;
12
13mod stats;
14use std::mem::MaybeUninit;
15use std::sync::Arc;
16use std::sync::atomic::AtomicBool;
17use std::sync::atomic::Ordering;
18use std::time::Duration;
19
20use anyhow::Context;
21use anyhow::Result;
22use clap::Parser;
23use crossbeam::channel::RecvTimeoutError;
24use libbpf_rs::AsRawLibbpf;
25use libbpf_rs::OpenObject;
26use libbpf_rs::libbpf_sys::bpf_program__set_autoload;
27use log::info;
28use log::warn;
29use scx_stats::prelude::*;
30use scx_utils::Topology;
31use scx_utils::UserExitInfo;
32use scx_utils::build_id;
33use scx_utils::compat;
34use scx_utils::scx_ops_attach;
35use scx_utils::scx_ops_load;
36use scx_utils::scx_ops_open;
37use scx_utils::set_rlimit_infinity;
38use scx_utils::uei_exited;
39use scx_utils::uei_report;
40use stats::Metrics;
41
42const SCHEDULER_NAME: &'static str = "scx_flash";
43
44#[derive(Debug, Parser)]
45struct Opts {
46    /// Exit debug dump buffer length. 0 indicates default.
47    #[clap(long, default_value = "0")]
48    exit_dump_len: u32,
49
50    /// Maximum scheduling slice duration in microseconds.
51    #[clap(short = 's', long, default_value = "20000")]
52    slice_us_max: u64,
53
54    /// Maximum time slice lag in microseconds.
55    ///
56    /// Increasing this value can help to enhance the responsiveness of interactive tasks, but it
57    /// can also make performance more "spikey".
58    #[clap(short = 'l', long, default_value = "20000")]
59    slice_us_lag: u64,
60
61    /// Enable kthreads prioritization.
62    ///
63    /// Enabling this can improve system performance, but it may also introduce interactivity
64    /// issues or unfairness in scenarios with high kthread activity, such as heavy I/O or network
65    /// traffic.
66    #[clap(short = 'k', long, action = clap::ArgAction::SetTrue)]
67    local_kthreads: bool,
68
69    /// Enable stats monitoring with the specified interval.
70    #[clap(long)]
71    stats: Option<f64>,
72
73    /// Run in stats monitoring mode with the specified interval. Scheduler
74    /// is not launched.
75    #[clap(long)]
76    monitor: Option<f64>,
77
78    /// Enable verbose output, including libbpf details.
79    #[clap(short = 'v', long, action = clap::ArgAction::SetTrue)]
80    verbose: bool,
81
82    /// Print scheduler version and exit.
83    #[clap(short = 'V', long, action = clap::ArgAction::SetTrue)]
84    version: bool,
85
86    /// Show descriptions for statistics.
87    #[clap(long)]
88    help_stats: bool,
89}
90
91struct Scheduler<'a> {
92    skel: BpfSkel<'a>,
93    struct_ops: Option<libbpf_rs::Link>,
94    stats_server: StatsServer<(), Metrics>,
95}
96
97impl<'a> Scheduler<'a> {
98    fn init(opts: &'a Opts, open_object: &'a mut MaybeUninit<OpenObject>) -> Result<Self> {
99        set_rlimit_infinity();
100
101        // Initialize CPU topology.
102        let topo = Topology::new().unwrap();
103
104        // Check host topology to determine if we need to enable SMT capabilities.
105        info!(
106            "{} {} {}",
107            SCHEDULER_NAME,
108            build_id::full_version(env!("CARGO_PKG_VERSION")),
109            if topo.smt_enabled {
110                "SMT on"
111            } else {
112                "SMT off"
113            }
114        );
115
116        // Initialize BPF connector.
117        let mut skel_builder = BpfSkelBuilder::default();
118        skel_builder.obj_builder.debug(opts.verbose);
119        let mut skel = scx_ops_open!(skel_builder, open_object, flash_ops)?;
120
121        skel.struct_ops.flash_ops_mut().exit_dump_len = opts.exit_dump_len;
122
123        // Override default BPF scheduling parameters.
124        skel.maps.rodata_data.slice_max = opts.slice_us_max * 1000;
125        skel.maps.rodata_data.slice_lag = opts.slice_us_lag * 1000;
126        skel.maps.rodata_data.local_kthreads = opts.local_kthreads;
127
128        skel.maps.rodata_data.smt_enabled = topo.smt_enabled;
129
130        // Conditionally load the kprobes used by the scheduler.
131        if compat::ksym_exists("vfs_fsync_range").unwrap_or(false) {
132            unsafe {
133                bpf_program__set_autoload(
134                    skel.progs
135                        .kprobe_vfs_fsync_range
136                        .as_libbpf_object()
137                        .as_ptr(),
138                    true,
139                );
140            }
141        } else {
142            warn!("vfs_fsync_range symbol is missing")
143        }
144
145        // Set scheduler flags.
146        skel.struct_ops.flash_ops_mut().flags = *compat::SCX_OPS_ENQ_EXITING
147            | *compat::SCX_OPS_ENQ_LAST
148            | *compat::SCX_OPS_ENQ_MIGRATION_DISABLED;
149        info!(
150            "scheduler flags: {:#x}",
151            skel.struct_ops.flash_ops_mut().flags
152        );
153
154        // Load the BPF program for validation.
155        let mut skel = scx_ops_load!(skel, flash_ops, uei)?;
156
157        // Attach the scheduler.
158        let struct_ops = Some(scx_ops_attach!(skel, flash_ops)?);
159        let stats_server = StatsServer::new(stats::server_data()).launch()?;
160
161        Ok(Self {
162            skel,
163            struct_ops,
164            stats_server,
165        })
166    }
167
168    fn get_metrics(&self) -> Metrics {
169        Metrics {
170            nr_kthread_dispatches: self.skel.maps.bss_data.nr_kthread_dispatches,
171            nr_direct_dispatches: self.skel.maps.bss_data.nr_direct_dispatches,
172            nr_shared_dispatches: self.skel.maps.bss_data.nr_shared_dispatches,
173        }
174    }
175
176    pub fn exited(&mut self) -> bool {
177        uei_exited!(&self.skel, uei)
178    }
179
180    fn run(&mut self, shutdown: Arc<AtomicBool>) -> Result<UserExitInfo> {
181        let (res_ch, req_ch) = self.stats_server.channels();
182        while !shutdown.load(Ordering::Relaxed) && !self.exited() {
183            match req_ch.recv_timeout(Duration::from_secs(1)) {
184                Ok(()) => res_ch.send(self.get_metrics())?,
185                Err(RecvTimeoutError::Timeout) => {}
186                Err(e) => Err(e)?,
187            }
188        }
189
190        self.struct_ops.take();
191        uei_report!(&self.skel, uei)
192    }
193}
194
195impl Drop for Scheduler<'_> {
196    fn drop(&mut self) {
197        info!("Unregister {} scheduler", SCHEDULER_NAME);
198    }
199}
200
201fn main() -> Result<()> {
202    let opts = Opts::parse();
203
204    if opts.version {
205        println!(
206            "{} {}",
207            SCHEDULER_NAME,
208            build_id::full_version(env!("CARGO_PKG_VERSION"))
209        );
210        return Ok(());
211    }
212
213    if opts.help_stats {
214        stats::server_data().describe_meta(&mut std::io::stdout(), None)?;
215        return Ok(());
216    }
217
218    let loglevel = simplelog::LevelFilter::Info;
219
220    let mut lcfg = simplelog::ConfigBuilder::new();
221    lcfg.set_time_level(simplelog::LevelFilter::Error)
222        .set_location_level(simplelog::LevelFilter::Off)
223        .set_target_level(simplelog::LevelFilter::Off)
224        .set_thread_level(simplelog::LevelFilter::Off);
225    simplelog::TermLogger::init(
226        loglevel,
227        lcfg.build(),
228        simplelog::TerminalMode::Stderr,
229        simplelog::ColorChoice::Auto,
230    )?;
231
232    let shutdown = Arc::new(AtomicBool::new(false));
233    let shutdown_clone = shutdown.clone();
234    ctrlc::set_handler(move || {
235        shutdown_clone.store(true, Ordering::Relaxed);
236    })
237    .context("Error setting Ctrl-C handler")?;
238
239    if let Some(intv) = opts.monitor.or(opts.stats) {
240        let shutdown_copy = shutdown.clone();
241        let jh = std::thread::spawn(move || {
242            stats::monitor(Duration::from_secs_f64(intv), shutdown_copy).unwrap()
243        });
244        if opts.monitor.is_some() {
245            let _ = jh.join();
246            return Ok(());
247        }
248    }
249
250    let mut open_object = MaybeUninit::uninit();
251    loop {
252        let mut sched = Scheduler::init(&opts, &mut open_object)?;
253        if !sched.run(shutdown.clone())?.should_restart() {
254            break;
255        }
256    }
257
258    Ok(())
259}