Skip to main content

scx_bpfland/
stats.rs

1use std::io::Write;
2use std::sync::atomic::AtomicBool;
3use std::sync::atomic::Ordering;
4use std::sync::Arc;
5use std::time::Duration;
6
7use anyhow::Result;
8use scx_stats::prelude::*;
9use scx_stats_derive::stat_doc;
10use scx_stats_derive::Stats;
11use serde::Deserialize;
12use serde::Serialize;
13
14#[stat_doc]
15#[derive(Clone, Debug, Default, Serialize, Deserialize, Stats)]
16#[stat(top)]
17pub struct Metrics {
18    #[stat(desc = "Number of running tasks")]
19    pub nr_running: u64,
20    #[stat(desc = "Number of online CPUs")]
21    pub nr_cpus: u64,
22    #[stat(desc = "Number of kthread direct dispatches")]
23    pub nr_kthread_dispatches: u64,
24    #[stat(desc = "Number of task direct dispatches")]
25    pub nr_direct_dispatches: u64,
26    #[stat(desc = "Number of regular task dispatches")]
27    pub nr_shared_dispatches: u64,
28    // TIMELY stats (zero when timely mode is disabled)
29    #[stat(desc = "Number of delay recovery dispatches")]
30    pub nr_delay_recovery_dispatches: u64,
31    #[stat(desc = "Number of delay middle add dispatches")]
32    pub nr_delay_middle_add_dispatches: u64,
33    #[stat(desc = "Number of delay fast recovery dispatches")]
34    pub nr_delay_fast_recovery_dispatches: u64,
35    #[stat(desc = "Number of delay rate-limited dispatches")]
36    pub nr_delay_rate_limited_dispatches: u64,
37    #[stat(desc = "Number of gain floor dispatches")]
38    pub nr_gain_floor_dispatches: u64,
39    #[stat(desc = "Number of gain ceiling dispatches")]
40    pub nr_gain_ceiling_dispatches: u64,
41    #[stat(desc = "Number of delay low region samples")]
42    pub nr_delay_low_region_samples: u64,
43    #[stat(desc = "Number of delay mid region samples")]
44    pub nr_delay_mid_region_samples: u64,
45    #[stat(desc = "Number of delay high region samples")]
46    pub nr_delay_high_region_samples: u64,
47    #[stat(desc = "Number of gain floor resident samples")]
48    pub nr_gain_floor_resident_samples: u64,
49    #[stat(desc = "Number of gain mid resident samples")]
50    pub nr_gain_mid_resident_samples: u64,
51    #[stat(desc = "Number of gain ceiling resident samples")]
52    pub nr_gain_ceiling_resident_samples: u64,
53    #[stat(desc = "Number of idle select path picks")]
54    pub nr_idle_select_path_picks: u64,
55    #[stat(desc = "Number of idle enqueue path picks")]
56    pub nr_idle_enqueue_path_picks: u64,
57    #[stat(desc = "Number of idle prev CPU picks")]
58    pub nr_idle_prev_cpu_picks: u64,
59    #[stat(desc = "Number of idle primary picks")]
60    pub nr_idle_primary_picks: u64,
61    #[stat(desc = "Number of idle spill picks")]
62    pub nr_idle_spill_picks: u64,
63    #[stat(desc = "Number of idle pick failures")]
64    pub nr_idle_pick_failures: u64,
65    #[stat(desc = "Number of idle primary domain misses")]
66    pub nr_idle_primary_domain_misses: u64,
67    #[stat(desc = "Number of idle global misses")]
68    pub nr_idle_global_misses: u64,
69    #[stat(desc = "Number of waker CPU biases")]
70    pub nr_waker_cpu_biases: u64,
71    #[stat(desc = "Number of keep running reuses")]
72    pub nr_keep_running_reuses: u64,
73    #[stat(desc = "Number of keep running queue empty")]
74    pub nr_keep_running_queue_empty: u64,
75    #[stat(desc = "Number of keep running SMT blocked")]
76    pub nr_keep_running_smt_blocked: u64,
77    #[stat(desc = "Number of keep running queued work")]
78    pub nr_keep_running_queued_work: u64,
79    #[stat(desc = "Number of dispatch CPU DSQ consumes")]
80    pub nr_dispatch_cpu_dsq_consumes: u64,
81    #[stat(desc = "Number of dispatch node DSQ consumes")]
82    pub nr_dispatch_node_dsq_consumes: u64,
83    #[stat(desc = "Number of CPU release reenqueues")]
84    pub nr_cpu_release_reenqueue: u64,
85}
86
87impl Metrics {
88    fn format<W: Write>(&self, w: &mut W) -> Result<()> {
89        writeln!(
90            w,
91            "[{}] tasks -> r: {:>2}/{:<2} | dispatch -> k: {:<5} d: {:<5} s: {:<5} | timely -> rec: {:<5} mid: {:<5} rl: {:<5} min: {:<5} max: {:<5}",
92            crate::SCHEDULER_NAME,
93            self.nr_running,
94            self.nr_cpus,
95            self.nr_kthread_dispatches,
96            self.nr_direct_dispatches,
97            self.nr_shared_dispatches,
98            self.nr_delay_recovery_dispatches,
99            self.nr_delay_middle_add_dispatches,
100            self.nr_delay_rate_limited_dispatches,
101            self.nr_gain_floor_dispatches,
102            self.nr_gain_ceiling_dispatches
103        )?;
104        Ok(())
105    }
106
107    fn delta(&self, rhs: &Self) -> Self {
108        Self {
109            nr_kthread_dispatches: self.nr_kthread_dispatches - rhs.nr_kthread_dispatches,
110            nr_direct_dispatches: self.nr_direct_dispatches - rhs.nr_direct_dispatches,
111            nr_shared_dispatches: self.nr_shared_dispatches - rhs.nr_shared_dispatches,
112            nr_delay_recovery_dispatches: self.nr_delay_recovery_dispatches
113                - rhs.nr_delay_recovery_dispatches,
114            nr_delay_middle_add_dispatches: self.nr_delay_middle_add_dispatches
115                - rhs.nr_delay_middle_add_dispatches,
116            nr_delay_fast_recovery_dispatches: self.nr_delay_fast_recovery_dispatches
117                - rhs.nr_delay_fast_recovery_dispatches,
118            nr_delay_rate_limited_dispatches: self.nr_delay_rate_limited_dispatches
119                - rhs.nr_delay_rate_limited_dispatches,
120            nr_gain_floor_dispatches: self.nr_gain_floor_dispatches - rhs.nr_gain_floor_dispatches,
121            nr_gain_ceiling_dispatches: self.nr_gain_ceiling_dispatches
122                - rhs.nr_gain_ceiling_dispatches,
123            nr_delay_low_region_samples: self.nr_delay_low_region_samples
124                - rhs.nr_delay_low_region_samples,
125            nr_delay_mid_region_samples: self.nr_delay_mid_region_samples
126                - rhs.nr_delay_mid_region_samples,
127            nr_delay_high_region_samples: self.nr_delay_high_region_samples
128                - rhs.nr_delay_high_region_samples,
129            nr_gain_floor_resident_samples: self.nr_gain_floor_resident_samples
130                - rhs.nr_gain_floor_resident_samples,
131            nr_gain_mid_resident_samples: self.nr_gain_mid_resident_samples
132                - rhs.nr_gain_mid_resident_samples,
133            nr_gain_ceiling_resident_samples: self.nr_gain_ceiling_resident_samples
134                - rhs.nr_gain_ceiling_resident_samples,
135            nr_idle_select_path_picks: self.nr_idle_select_path_picks
136                - rhs.nr_idle_select_path_picks,
137            nr_idle_enqueue_path_picks: self.nr_idle_enqueue_path_picks
138                - rhs.nr_idle_enqueue_path_picks,
139            nr_idle_prev_cpu_picks: self.nr_idle_prev_cpu_picks - rhs.nr_idle_prev_cpu_picks,
140            nr_idle_primary_picks: self.nr_idle_primary_picks - rhs.nr_idle_primary_picks,
141            nr_idle_spill_picks: self.nr_idle_spill_picks - rhs.nr_idle_spill_picks,
142            nr_idle_pick_failures: self.nr_idle_pick_failures - rhs.nr_idle_pick_failures,
143            nr_idle_primary_domain_misses: self.nr_idle_primary_domain_misses
144                - rhs.nr_idle_primary_domain_misses,
145            nr_idle_global_misses: self.nr_idle_global_misses - rhs.nr_idle_global_misses,
146            nr_waker_cpu_biases: self.nr_waker_cpu_biases - rhs.nr_waker_cpu_biases,
147            nr_keep_running_reuses: self.nr_keep_running_reuses - rhs.nr_keep_running_reuses,
148            nr_keep_running_queue_empty: self.nr_keep_running_queue_empty
149                - rhs.nr_keep_running_queue_empty,
150            nr_keep_running_smt_blocked: self.nr_keep_running_smt_blocked
151                - rhs.nr_keep_running_smt_blocked,
152            nr_keep_running_queued_work: self.nr_keep_running_queued_work
153                - rhs.nr_keep_running_queued_work,
154            nr_dispatch_cpu_dsq_consumes: self.nr_dispatch_cpu_dsq_consumes
155                - rhs.nr_dispatch_cpu_dsq_consumes,
156            nr_dispatch_node_dsq_consumes: self.nr_dispatch_node_dsq_consumes
157                - rhs.nr_dispatch_node_dsq_consumes,
158            nr_cpu_release_reenqueue: self.nr_cpu_release_reenqueue - rhs.nr_cpu_release_reenqueue,
159            ..self.clone()
160        }
161    }
162}
163
164pub fn server_data() -> StatsServerData<(), Metrics> {
165    let open: Box<dyn StatsOpener<(), Metrics>> = Box::new(move |(req_ch, res_ch)| {
166        req_ch.send(())?;
167        let mut prev = res_ch.recv()?;
168
169        let read: Box<dyn StatsReader<(), Metrics>> = Box::new(move |_args, (req_ch, res_ch)| {
170            req_ch.send(())?;
171            let cur = res_ch.recv()?;
172            let delta = cur.delta(&prev);
173            prev = cur;
174            delta.to_json()
175        });
176
177        Ok(read)
178    });
179
180    StatsServerData::new()
181        .add_meta(Metrics::meta())
182        .add_ops("top", StatsOps { open, close: None })
183}
184
185pub fn monitor(intv: Duration, shutdown: Arc<AtomicBool>) -> Result<()> {
186    scx_utils::monitor_stats::<Metrics>(
187        &[],
188        intv,
189        || shutdown.load(Ordering::Relaxed),
190        |metrics| metrics.format(&mut std::io::stdout()),
191    )
192}