scx_layered/
stats.rs

1use std::collections::BTreeMap;
2use std::io::Write;
3use std::sync::atomic::AtomicBool;
4use std::sync::atomic::Ordering;
5use std::sync::Arc;
6use std::thread::current;
7use std::thread::ThreadId;
8use std::time::Duration;
9use std::time::SystemTime;
10use std::time::UNIX_EPOCH;
11
12use anyhow::bail;
13use anyhow::Result;
14use chrono::DateTime;
15use chrono::Local;
16use log::warn;
17use scx_stats::prelude::*;
18use scx_stats_derive::stat_doc;
19use scx_stats_derive::Stats;
20use scx_utils::Cpumask;
21use serde::Deserialize;
22use serde::Serialize;
23
24use crate::bpf_intf;
25use crate::BpfStats;
26use crate::Layer;
27use crate::Stats;
28use crate::LAYER_USAGE_OPEN;
29use crate::LAYER_USAGE_PROTECTED;
30use crate::LAYER_USAGE_PROTECTED_PREEMPT;
31use crate::LAYER_USAGE_SUM_UPTO;
32
33const GSTAT_EXCL_IDLE: usize = bpf_intf::global_stat_id_GSTAT_EXCL_IDLE as usize;
34const GSTAT_EXCL_WAKEUP: usize = bpf_intf::global_stat_id_GSTAT_EXCL_WAKEUP as usize;
35const GSTAT_HI_FB_EVENTS: usize = bpf_intf::global_stat_id_GSTAT_HI_FB_EVENTS as usize;
36const GSTAT_HI_FB_USAGE: usize = bpf_intf::global_stat_id_GSTAT_HI_FB_USAGE as usize;
37const GSTAT_LO_FB_EVENTS: usize = bpf_intf::global_stat_id_GSTAT_LO_FB_EVENTS as usize;
38const GSTAT_LO_FB_USAGE: usize = bpf_intf::global_stat_id_GSTAT_LO_FB_USAGE as usize;
39const GSTAT_FB_CPU_USAGE: usize = bpf_intf::global_stat_id_GSTAT_FB_CPU_USAGE as usize;
40const GSTAT_ANTISTALL: usize = bpf_intf::global_stat_id_GSTAT_ANTISTALL as usize;
41const GSTAT_SKIP_PREEMPT: usize = bpf_intf::global_stat_id_GSTAT_SKIP_PREEMPT as usize;
42const GSTAT_FIXUP_VTIME: usize = bpf_intf::global_stat_id_GSTAT_FIXUP_VTIME as usize;
43const GSTAT_PREEMPTING_MISMATCH: usize =
44    bpf_intf::global_stat_id_GSTAT_PREEMPTING_MISMATCH as usize;
45
46const LSTAT_SEL_LOCAL: usize = bpf_intf::layer_stat_id_LSTAT_SEL_LOCAL as usize;
47const LSTAT_ENQ_LOCAL: usize = bpf_intf::layer_stat_id_LSTAT_ENQ_LOCAL as usize;
48const LSTAT_ENQ_WAKEUP: usize = bpf_intf::layer_stat_id_LSTAT_ENQ_WAKEUP as usize;
49const LSTAT_ENQ_EXPIRE: usize = bpf_intf::layer_stat_id_LSTAT_ENQ_EXPIRE as usize;
50const LSTAT_ENQ_REENQ: usize = bpf_intf::layer_stat_id_LSTAT_ENQ_REENQ as usize;
51const LSTAT_ENQ_DSQ: usize = bpf_intf::layer_stat_id_LSTAT_ENQ_DSQ as usize;
52const LSTAT_MIN_EXEC: usize = bpf_intf::layer_stat_id_LSTAT_MIN_EXEC as usize;
53const LSTAT_MIN_EXEC_NS: usize = bpf_intf::layer_stat_id_LSTAT_MIN_EXEC_NS as usize;
54const LSTAT_OPEN_IDLE: usize = bpf_intf::layer_stat_id_LSTAT_OPEN_IDLE as usize;
55const LSTAT_AFFN_VIOL: usize = bpf_intf::layer_stat_id_LSTAT_AFFN_VIOL as usize;
56const LSTAT_KEEP: usize = bpf_intf::layer_stat_id_LSTAT_KEEP as usize;
57const LSTAT_KEEP_FAIL_MAX_EXEC: usize = bpf_intf::layer_stat_id_LSTAT_KEEP_FAIL_MAX_EXEC as usize;
58const LSTAT_KEEP_FAIL_BUSY: usize = bpf_intf::layer_stat_id_LSTAT_KEEP_FAIL_BUSY as usize;
59const LSTAT_PREEMPT: usize = bpf_intf::layer_stat_id_LSTAT_PREEMPT as usize;
60const LSTAT_PREEMPT_FIRST: usize = bpf_intf::layer_stat_id_LSTAT_PREEMPT_FIRST as usize;
61const LSTAT_PREEMPT_XLLC: usize = bpf_intf::layer_stat_id_LSTAT_PREEMPT_XLLC as usize;
62const LSTAT_PREEMPT_XNUMA: usize = bpf_intf::layer_stat_id_LSTAT_PREEMPT_XNUMA as usize;
63const LSTAT_PREEMPT_IDLE: usize = bpf_intf::layer_stat_id_LSTAT_PREEMPT_IDLE as usize;
64const LSTAT_PREEMPT_FAIL: usize = bpf_intf::layer_stat_id_LSTAT_PREEMPT_FAIL as usize;
65const LSTAT_EXCL_COLLISION: usize = bpf_intf::layer_stat_id_LSTAT_EXCL_COLLISION as usize;
66const LSTAT_EXCL_PREEMPT: usize = bpf_intf::layer_stat_id_LSTAT_EXCL_PREEMPT as usize;
67const LSTAT_YIELD: usize = bpf_intf::layer_stat_id_LSTAT_YIELD as usize;
68const LSTAT_YIELD_IGNORE: usize = bpf_intf::layer_stat_id_LSTAT_YIELD_IGNORE as usize;
69const LSTAT_MIGRATION: usize = bpf_intf::layer_stat_id_LSTAT_MIGRATION as usize;
70const LSTAT_XNUMA_MIGRATION: usize = bpf_intf::layer_stat_id_LSTAT_XNUMA_MIGRATION as usize;
71const LSTAT_XLLC_MIGRATION: usize = bpf_intf::layer_stat_id_LSTAT_XLLC_MIGRATION as usize;
72const LSTAT_XLLC_MIGRATION_SKIP: usize = bpf_intf::layer_stat_id_LSTAT_XLLC_MIGRATION_SKIP as usize;
73const LSTAT_XLAYER_WAKE: usize = bpf_intf::layer_stat_id_LSTAT_XLAYER_WAKE as usize;
74const LSTAT_XLAYER_REWAKE: usize = bpf_intf::layer_stat_id_LSTAT_XLAYER_REWAKE as usize;
75const LSTAT_LLC_DRAIN_TRY: usize = bpf_intf::layer_stat_id_LSTAT_LLC_DRAIN_TRY as usize;
76const LSTAT_LLC_DRAIN: usize = bpf_intf::layer_stat_id_LSTAT_LLC_DRAIN as usize;
77const LSTAT_SKIP_REMOTE_NODE: usize = bpf_intf::layer_stat_id_LSTAT_SKIP_REMOTE_NODE as usize;
78
79const LLC_LSTAT_LAT: usize = bpf_intf::llc_layer_stat_id_LLC_LSTAT_LAT as usize;
80const LLC_LSTAT_CNT: usize = bpf_intf::llc_layer_stat_id_LLC_LSTAT_CNT as usize;
81
82fn calc_frac(a: f64, b: f64) -> f64 {
83    if b != 0.0 {
84        a / b * 100.0
85    } else {
86        0.0
87    }
88}
89
90fn fmt_pct(v: f64) -> String {
91    if v >= 99.995 {
92        format!("{:5.1}", v)
93    } else if v > 0.0 && v < 0.01 {
94        format!("{:5.2}", 0.01)
95    } else {
96        format!("{:5.2}", v)
97    }
98}
99
100fn fmt_num(v: u64) -> String {
101    if v > 1_000_000 {
102        format!("{:5.1}m", v as f64 / 1_000_000.0)
103    } else if v > 1_000 {
104        format!("{:5.1}k", v as f64 / 1_000.0)
105    } else {
106        format!("{:5.0} ", v)
107    }
108}
109
110#[stat_doc]
111#[derive(Clone, Debug, Default, Serialize, Deserialize, Stats)]
112#[stat(_om_prefix = "l_", _om_label = "layer_name")]
113pub struct LayerStats {
114    #[stat(desc = "index", _om_skip)]
115    pub index: usize,
116    #[stat(desc = "Total CPU utilization (100% means one full CPU)")]
117    pub util: f64,
118    #[stat(desc = "Protected CPU utilization %")]
119    pub util_protected_frac: f64,
120    #[stat(desc = "Preempt-protected CPU utilization %")]
121    pub util_protected_preempt_frac: f64,
122    #[stat(desc = "Open CPU utilization %")]
123    pub util_open_frac: f64,
124    #[stat(desc = "fraction of total CPU utilization")]
125    pub util_frac: f64,
126    #[stat(desc = "number of tasks")]
127    pub tasks: u32,
128    #[stat(desc = "count of sched events during the period")]
129    pub total: u64,
130    #[stat(desc = "% dispatched into idle CPU from select_cpu")]
131    pub sel_local: f64,
132    #[stat(desc = "% dispatched into idle CPU from enqueue")]
133    pub enq_local: f64,
134    #[stat(desc = "% enqueued after wakeup")]
135    pub enq_wakeup: f64,
136    #[stat(desc = "% enqueued after slice expiration")]
137    pub enq_expire: f64,
138    #[stat(desc = "% re-enqueued due to RT preemption")]
139    pub enq_reenq: f64,
140    #[stat(desc = "% enqueued into the layer's LLC DSQs")]
141    pub enq_dsq: f64,
142    #[stat(desc = "count of times exec duration < min_exec_us")]
143    pub min_exec: f64,
144    #[stat(desc = "total exec durations extended due to min_exec_us")]
145    pub min_exec_us: u64,
146    #[stat(desc = "% dispatched into idle CPUs occupied by other layers")]
147    pub open_idle: f64,
148    #[stat(desc = "% preempted other tasks")]
149    pub preempt: f64,
150    #[stat(desc = "% preempted XLLC tasks")]
151    pub preempt_xllc: f64,
152    #[stat(desc = "% preempted XNUMA tasks")]
153    pub preempt_xnuma: f64,
154    #[stat(desc = "% first-preempted other tasks")]
155    pub preempt_first: f64,
156    #[stat(desc = "% idle-preempted other tasks")]
157    pub preempt_idle: f64,
158    #[stat(desc = "% attempted to preempt other tasks but failed")]
159    pub preempt_fail: f64,
160    #[stat(desc = "% violated config due to CPU affinity")]
161    pub affn_viol: f64,
162    #[stat(desc = "% continued executing after slice expiration")]
163    pub keep: f64,
164    #[stat(desc = "% disallowed to continue executing due to max_exec")]
165    pub keep_fail_max_exec: f64,
166    #[stat(desc = "% disallowed to continue executing due to other tasks")]
167    pub keep_fail_busy: f64,
168    #[stat(desc = "whether is exclusive", _om_skip)]
169    pub is_excl: u32,
170    #[stat(desc = "count of times an excl task skipped a CPU as the sibling was also excl")]
171    pub excl_collision: f64,
172    #[stat(desc = "% a sibling CPU was preempted for an exclusive task")]
173    pub excl_preempt: f64,
174    #[stat(desc = "% yielded")]
175    pub yielded: f64,
176    #[stat(desc = "count of times yield was ignored")]
177    pub yield_ignore: u64,
178    #[stat(desc = "% migrated across CPUs")]
179    pub migration: f64,
180    #[stat(desc = "% migrated across NUMA nodes")]
181    pub xnuma_migration: f64,
182    #[stat(desc = "% migrated across LLCs")]
183    pub xllc_migration: f64,
184    #[stat(desc = "% migration skipped across LLCs due to xllc_mig_min_us")]
185    pub xllc_migration_skip: f64,
186    #[stat(desc = "% wakers across layers")]
187    pub xlayer_wake: f64,
188    #[stat(desc = "% rewakers across layers where waker has waken the task previously")]
189    pub xlayer_rewake: f64,
190    #[stat(desc = "% LLC draining tried")]
191    pub llc_drain_try: f64,
192    #[stat(desc = "% LLC draining succeeded")]
193    pub llc_drain: f64,
194    #[stat(desc = "% skip LLC dispatch on remote node")]
195    pub skip_remote_node: f64,
196    #[stat(desc = "mask of allocated CPUs", _om_skip)]
197    pub cpus: Vec<u64>,
198    #[stat(desc = "count of CPUs assigned")]
199    pub cur_nr_cpus: u32,
200    #[stat(desc = "minimum # of CPUs assigned")]
201    pub min_nr_cpus: u32,
202    #[stat(desc = "maximum # of CPUs assigned")]
203    pub max_nr_cpus: u32,
204    #[stat(desc = "count of CPUs assigned per LLC")]
205    pub nr_llc_cpus: Vec<u32>,
206    #[stat(desc = "slice duration config")]
207    pub slice_us: u64,
208    #[stat(desc = "Per-LLC scheduling event fractions")]
209    pub llc_fracs: Vec<f64>,
210    #[stat(desc = "Per-LLC average latency")]
211    pub llc_lats: Vec<f64>,
212}
213
214impl LayerStats {
215    pub fn new(
216        lidx: usize,
217        layer: &Layer,
218        stats: &Stats,
219        bstats: &BpfStats,
220        nr_cpus_range: (usize, usize),
221    ) -> Self {
222        let lstat = |sidx| bstats.lstats[lidx][sidx];
223        let ltotal = lstat(LSTAT_SEL_LOCAL)
224            + lstat(LSTAT_ENQ_LOCAL)
225            + lstat(LSTAT_ENQ_WAKEUP)
226            + lstat(LSTAT_ENQ_EXPIRE)
227            + lstat(LSTAT_ENQ_REENQ)
228            + lstat(LSTAT_KEEP);
229        let lstat_pct = |sidx| {
230            if ltotal != 0 {
231                lstat(sidx) as f64 / ltotal as f64 * 100.0
232            } else {
233                0.0
234            }
235        };
236
237        let util_sum = stats.layer_utils[lidx]
238            .iter()
239            .take(LAYER_USAGE_SUM_UPTO + 1)
240            .sum::<f64>();
241
242        Self {
243            index: lidx,
244            util: util_sum * 100.0,
245            util_open_frac: calc_frac(stats.layer_utils[lidx][LAYER_USAGE_OPEN], util_sum),
246            util_protected_frac: calc_frac(
247                stats.layer_utils[lidx][LAYER_USAGE_PROTECTED],
248                util_sum,
249            ),
250            util_protected_preempt_frac: calc_frac(
251                stats.layer_utils[lidx][LAYER_USAGE_PROTECTED_PREEMPT],
252                util_sum,
253            ),
254            util_frac: calc_frac(util_sum, stats.total_util),
255            tasks: stats.nr_layer_tasks[lidx] as u32,
256            total: ltotal,
257            sel_local: lstat_pct(LSTAT_SEL_LOCAL),
258            enq_local: lstat_pct(LSTAT_ENQ_LOCAL),
259            enq_wakeup: lstat_pct(LSTAT_ENQ_WAKEUP),
260            enq_expire: lstat_pct(LSTAT_ENQ_EXPIRE),
261            enq_reenq: lstat_pct(LSTAT_ENQ_REENQ),
262            enq_dsq: lstat_pct(LSTAT_ENQ_DSQ),
263            min_exec: lstat_pct(LSTAT_MIN_EXEC),
264            min_exec_us: (lstat(LSTAT_MIN_EXEC_NS) / 1000) as u64,
265            open_idle: lstat_pct(LSTAT_OPEN_IDLE),
266            preempt: lstat_pct(LSTAT_PREEMPT),
267            preempt_xllc: lstat_pct(LSTAT_PREEMPT_XLLC),
268            preempt_xnuma: lstat_pct(LSTAT_PREEMPT_XNUMA),
269            preempt_first: lstat_pct(LSTAT_PREEMPT_FIRST),
270            preempt_idle: lstat_pct(LSTAT_PREEMPT_IDLE),
271            preempt_fail: lstat_pct(LSTAT_PREEMPT_FAIL),
272            affn_viol: lstat_pct(LSTAT_AFFN_VIOL),
273            keep: lstat_pct(LSTAT_KEEP),
274            keep_fail_max_exec: lstat_pct(LSTAT_KEEP_FAIL_MAX_EXEC),
275            keep_fail_busy: lstat_pct(LSTAT_KEEP_FAIL_BUSY),
276            is_excl: layer.kind.common().exclusive as u32,
277            excl_collision: lstat_pct(LSTAT_EXCL_COLLISION),
278            excl_preempt: lstat_pct(LSTAT_EXCL_PREEMPT),
279            yielded: lstat_pct(LSTAT_YIELD),
280            yield_ignore: lstat(LSTAT_YIELD_IGNORE) as u64,
281            migration: lstat_pct(LSTAT_MIGRATION),
282            xnuma_migration: lstat_pct(LSTAT_XNUMA_MIGRATION),
283            xlayer_wake: lstat_pct(LSTAT_XLAYER_WAKE),
284            xlayer_rewake: lstat_pct(LSTAT_XLAYER_REWAKE),
285            xllc_migration: lstat_pct(LSTAT_XLLC_MIGRATION),
286            xllc_migration_skip: lstat_pct(LSTAT_XLLC_MIGRATION_SKIP),
287            llc_drain_try: lstat_pct(LSTAT_LLC_DRAIN_TRY),
288            llc_drain: lstat_pct(LSTAT_LLC_DRAIN),
289            skip_remote_node: lstat_pct(LSTAT_SKIP_REMOTE_NODE),
290            cpus: layer.cpus.as_raw_slice().to_vec(),
291            cur_nr_cpus: layer.cpus.weight() as u32,
292            min_nr_cpus: nr_cpus_range.0 as u32,
293            max_nr_cpus: nr_cpus_range.1 as u32,
294            nr_llc_cpus: layer.nr_llc_cpus.iter().map(|&v| v as u32).collect(),
295            slice_us: stats.layer_slice_us[lidx],
296            llc_fracs: {
297                let sid = LLC_LSTAT_CNT;
298                let sum = bstats.llc_lstats[lidx]
299                    .iter()
300                    .map(|lstats| lstats[sid])
301                    .sum::<u64>() as f64;
302                bstats.llc_lstats[lidx]
303                    .iter()
304                    .map(|lstats| calc_frac(lstats[sid] as f64, sum))
305                    .collect()
306            },
307            llc_lats: bstats.llc_lstats[lidx]
308                .iter()
309                .map(|lstats| lstats[LLC_LSTAT_LAT] as f64 / 1_000_000_000.0)
310                .collect(),
311        }
312    }
313
314    pub fn format<W: Write>(&self, w: &mut W, name: &str, header_width: usize) -> Result<()> {
315        writeln!(
316            w,
317            "  {:<width$}: util/open/frac={:6.1}/{}/{:7.1} prot/prot_preempt={}/{} tasks={:6}",
318            name,
319            self.util,
320            fmt_pct(self.util_open_frac),
321            self.util_frac,
322            fmt_pct(self.util_protected_frac),
323            fmt_pct(self.util_protected_preempt_frac),
324            self.tasks,
325            width = header_width,
326        )?;
327
328        writeln!(
329            w,
330            "  {:<width$}  tot={:7} local_sel/enq={}/{} enq_dsq={} wake/exp/reenq={}/{}/{}",
331            "",
332            self.total,
333            fmt_pct(self.sel_local),
334            fmt_pct(self.enq_local),
335            fmt_pct(self.enq_dsq),
336            fmt_pct(self.enq_wakeup),
337            fmt_pct(self.enq_expire),
338            fmt_pct(self.enq_reenq),
339            width = header_width,
340        )?;
341
342        writeln!(
343            w,
344            "  {:<width$}  keep/max/busy={}/{}/{} yield/ign={}/{}",
345            "",
346            fmt_pct(self.keep),
347            fmt_pct(self.keep_fail_max_exec),
348            fmt_pct(self.keep_fail_busy),
349            fmt_pct(self.yielded),
350            fmt_num(self.yield_ignore),
351            width = header_width,
352        )?;
353
354        writeln!(
355            w,
356            "  {:<width$}  open_idle={} mig={} xnuma_mig={} xllc_mig/skip={}/{} affn_viol={}",
357            "",
358            fmt_pct(self.open_idle),
359            fmt_pct(self.migration),
360            fmt_pct(self.xnuma_migration),
361            fmt_pct(self.xllc_migration),
362            fmt_pct(self.xllc_migration_skip),
363            fmt_pct(self.affn_viol),
364            width = header_width,
365        )?;
366
367        writeln!(
368            w,
369            "  {:<width$}  preempt/first/xllc/xnuma/idle/fail={}/{}/{}/{}/{}/{}",
370            "",
371            fmt_pct(self.preempt),
372            fmt_pct(self.preempt_first),
373            fmt_pct(self.preempt_xllc),
374            fmt_pct(self.preempt_xnuma),
375            fmt_pct(self.preempt_idle),
376            fmt_pct(self.preempt_fail),
377            width = header_width,
378        )?;
379
380        writeln!(
381            w,
382            "  {:<width$}  xlayer_wake/re={}/{} llc_drain/try={}/{} skip_rnode={}",
383            "",
384            fmt_pct(self.xlayer_wake),
385            fmt_pct(self.xlayer_rewake),
386            fmt_pct(self.llc_drain),
387            fmt_pct(self.llc_drain_try),
388            fmt_pct(self.skip_remote_node),
389            width = header_width,
390        )?;
391
392        writeln!(
393            w,
394            "  {:<width$}  slice={}ms min_exec={}/{:7.2}ms",
395            "",
396            self.slice_us as f64 / 1000.0,
397            fmt_pct(self.min_exec),
398            self.min_exec_us as f64 / 1000.0,
399            width = header_width
400        )?;
401
402        let cpumask = Cpumask::from_vec(self.cpus.clone());
403
404        writeln!(
405            w,
406            "  {:<width$}  cpus={:3} [{:3},{:3}] {}",
407            "",
408            self.cur_nr_cpus,
409            self.min_nr_cpus,
410            self.max_nr_cpus,
411            &cpumask,
412            width = header_width
413        )?;
414
415        write!(
416            w,
417            "  {:<width$}  [LLC] nr_cpus: sched% lat_ms",
418            "",
419            width = header_width
420        )?;
421
422        for (i, (&frac, &lat)) in self.llc_fracs.iter().zip(self.llc_lats.iter()).enumerate() {
423            if (i % 4) == 0 {
424                writeln!(w, "")?;
425                write!(w, "  {:<width$}  [{:03}]", "", i, width = header_width)?;
426            } else {
427                write!(w, " |")?;
428            }
429            write!(
430                w,
431                " {:2}:{}%{:7.2}",
432                self.nr_llc_cpus[i],
433                fmt_pct(frac),
434                lat * 1_000.0
435            )?;
436        }
437        writeln!(w, "")?;
438
439        if self.is_excl != 0 {
440            writeln!(
441                w,
442                "  {:<width$}  excl_coll={} excl_preempt={}",
443                "",
444                fmt_pct(self.excl_collision),
445                fmt_pct(self.excl_preempt),
446                width = header_width,
447            )?;
448        } else if self.excl_collision != 0.0 || self.excl_preempt != 0.0 {
449            warn!(
450                "{}: exclusive is off but excl_coll={} excl_preempt={}",
451                name,
452                fmt_pct(self.excl_collision),
453                fmt_pct(self.excl_preempt),
454            );
455        }
456
457        Ok(())
458    }
459}
460
461#[stat_doc]
462#[derive(Clone, Debug, Default, Serialize, Deserialize, Stats)]
463#[stat(top)]
464pub struct SysStats {
465    #[stat(desc = "timestamp", _om_skip)]
466    pub at: f64,
467    #[stat(desc = "# of NUMA nodes")]
468    pub nr_nodes: usize,
469    #[stat(desc = "# sched events during the period")]
470    pub total: u64,
471    #[stat(desc = "% dispatched directly into an idle CPU from select_cpu")]
472    pub local_sel: f64,
473    #[stat(desc = "% dispatched directly into an idle CPU from enqueue")]
474    pub local_enq: f64,
475    #[stat(desc = "% open layer tasks scheduled into allocated but idle CPUs")]
476    pub open_idle: f64,
477    #[stat(desc = "% violated config due to CPU affinity")]
478    pub affn_viol: f64,
479    #[stat(desc = "% sent to hi fallback DSQs")]
480    pub hi_fb: f64,
481    #[stat(desc = "% sent to lo fallback DSQs")]
482    pub lo_fb: f64,
483    #[stat(desc = "count of times an excl task skipped a CPU as the sibling was also excl")]
484    pub excl_collision: f64,
485    #[stat(desc = "count of times a sibling CPU was preempted for an excl task")]
486    pub excl_preempt: f64,
487    #[stat(desc = "count of times a CPU skipped dispatching due to an excl task on the sibling")]
488    pub excl_idle: f64,
489    #[stat(
490        desc = "count of times an idle sibling CPU was woken up after an excl task is finished"
491    )]
492    pub excl_wakeup: f64,
493    #[stat(desc = "CPU time this binary consumed during the period")]
494    pub proc_ms: u64,
495    #[stat(desc = "CPU busy % (100% means all CPU)")]
496    pub busy: f64,
497    #[stat(desc = "CPU util % (100% means one CPU)")]
498    pub util: f64,
499    #[stat(desc = "CPU util % used by hi fallback DSQs")]
500    pub hi_fb_util: f64,
501    #[stat(desc = "CPU util % used by lo fallback DSQs")]
502    pub lo_fb_util: f64,
503    #[stat(desc = "Number of tasks dispatched via antistall")]
504    pub antistall: u64,
505    #[stat(desc = "Number of times preemptions of non-scx tasks were avoided")]
506    pub skip_preempt: u64,
507    #[stat(desc = "Number of times vtime was out of range and fixed up")]
508    pub fixup_vtime: u64,
509    #[stat(desc = "Number of times cpuc->preempting_task didn't come on the CPU")]
510    pub preempting_mismatch: u64,
511    #[stat(desc = "fallback CPU")]
512    pub fallback_cpu: u32,
513    #[stat(desc = "per-layer statistics")]
514    pub fallback_cpu_util: f64,
515    #[stat(desc = "fallback CPU util %")]
516    pub layers: BTreeMap<String, LayerStats>,
517}
518
519impl SysStats {
520    pub fn new(stats: &Stats, bstats: &BpfStats, fallback_cpu: usize) -> Result<Self> {
521        let lsum = |idx| stats.bpf_stats.lstats_sums[idx];
522        let total = lsum(LSTAT_SEL_LOCAL)
523            + lsum(LSTAT_ENQ_LOCAL)
524            + lsum(LSTAT_ENQ_WAKEUP)
525            + lsum(LSTAT_ENQ_EXPIRE)
526            + lsum(LSTAT_ENQ_REENQ)
527            + lsum(LSTAT_KEEP);
528        let lsum_pct = |idx| {
529            if total != 0 {
530                lsum(idx) as f64 / total as f64 * 100.0
531            } else {
532                0.0
533            }
534        };
535
536        let elapsed_ns = stats.elapsed.as_nanos();
537
538        Ok(Self {
539            at: SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs_f64(),
540            nr_nodes: stats.nr_nodes,
541            total,
542            local_sel: lsum_pct(LSTAT_SEL_LOCAL),
543            local_enq: lsum_pct(LSTAT_ENQ_LOCAL),
544            open_idle: lsum_pct(LSTAT_OPEN_IDLE),
545            affn_viol: lsum_pct(LSTAT_AFFN_VIOL),
546            hi_fb: calc_frac(
547                stats.bpf_stats.gstats[GSTAT_HI_FB_EVENTS] as f64,
548                total as f64,
549            ),
550            lo_fb: calc_frac(
551                stats.bpf_stats.gstats[GSTAT_LO_FB_EVENTS] as f64,
552                total as f64,
553            ),
554            excl_collision: lsum_pct(LSTAT_EXCL_COLLISION),
555            excl_preempt: lsum_pct(LSTAT_EXCL_PREEMPT),
556            excl_idle: bstats.gstats[GSTAT_EXCL_IDLE] as f64 / total as f64,
557            excl_wakeup: bstats.gstats[GSTAT_EXCL_WAKEUP] as f64 / total as f64,
558            proc_ms: stats.processing_dur.as_millis() as u64,
559            busy: stats.cpu_busy * 100.0,
560            util: stats.total_util * 100.0,
561            hi_fb_util: stats.bpf_stats.gstats[GSTAT_HI_FB_USAGE] as f64 / elapsed_ns as f64
562                * 100.0,
563            lo_fb_util: stats.bpf_stats.gstats[GSTAT_LO_FB_USAGE] as f64 / elapsed_ns as f64
564                * 100.0,
565            antistall: stats.bpf_stats.gstats[GSTAT_ANTISTALL],
566            skip_preempt: stats.bpf_stats.gstats[GSTAT_SKIP_PREEMPT],
567            fixup_vtime: stats.bpf_stats.gstats[GSTAT_FIXUP_VTIME],
568            preempting_mismatch: stats.bpf_stats.gstats[GSTAT_PREEMPTING_MISMATCH],
569            fallback_cpu: fallback_cpu as u32,
570            fallback_cpu_util: stats.bpf_stats.gstats[GSTAT_FB_CPU_USAGE] as f64
571                / elapsed_ns as f64
572                * 100.0,
573            layers: BTreeMap::new(),
574        })
575    }
576
577    pub fn format<W: Write>(&self, w: &mut W) -> Result<()> {
578        writeln!(
579            w,
580            "tot={:7} local_sel/enq={}/{} open_idle={} affn_viol={} hi/lo={}/{}",
581            self.total,
582            fmt_pct(self.local_sel),
583            fmt_pct(self.local_enq),
584            fmt_pct(self.open_idle),
585            fmt_pct(self.affn_viol),
586            fmt_pct(self.hi_fb),
587            fmt_pct(self.lo_fb),
588        )?;
589
590        writeln!(
591            w,
592            "busy={:5.1} util/hi/lo={:7.1}/{}/{} fallback_cpu/util={:3}/{:4.1} proc={:?}ms",
593            self.busy,
594            self.util,
595            fmt_pct(self.hi_fb_util),
596            fmt_pct(self.lo_fb_util),
597            self.fallback_cpu,
598            self.fallback_cpu_util,
599            self.proc_ms,
600        )?;
601
602        writeln!(
603            w,
604            "excl_coll={:.2} excl_preempt={:.2} excl_idle={:.2} excl_wakeup={:.2}",
605            self.excl_collision, self.excl_preempt, self.excl_idle, self.excl_wakeup
606        )?;
607
608        writeln!(
609            w,
610            "skip_preempt={} antistall={} fixup_vtime={} preempting_mismatch={}",
611            self.skip_preempt, self.antistall, self.fixup_vtime, self.preempting_mismatch
612        )?;
613
614        Ok(())
615    }
616
617    pub fn format_all<W: Write>(&self, w: &mut W) -> Result<()> {
618        self.format(w)?;
619
620        let header_width = self
621            .layers
622            .keys()
623            .map(|name| name.len())
624            .max()
625            .unwrap_or(0)
626            .max(4);
627
628        let mut idx_to_name: Vec<(usize, &String)> =
629            self.layers.iter().map(|(k, v)| (v.index, k)).collect();
630
631        idx_to_name.sort();
632
633        for (_idx, name) in &idx_to_name {
634            self.layers[*name].format(w, name, header_width)?;
635        }
636
637        Ok(())
638    }
639}
640
641#[derive(Debug)]
642pub enum StatsReq {
643    Hello(ThreadId),
644    Refresh(ThreadId, Stats),
645    Bye(ThreadId),
646}
647
648#[derive(Debug)]
649pub enum StatsRes {
650    Hello(Stats),
651    Refreshed((Stats, SysStats)),
652    Bye,
653}
654
655pub fn server_data() -> StatsServerData<StatsReq, StatsRes> {
656    let open: Box<dyn StatsOpener<StatsReq, StatsRes>> = Box::new(move |(req_ch, res_ch)| {
657        let tid = current().id();
658        req_ch.send(StatsReq::Hello(tid))?;
659        let mut stats = Some(match res_ch.recv()? {
660            StatsRes::Hello(v) => v,
661            res => bail!("invalid response to Hello: {:?}", res),
662        });
663
664        let read: Box<dyn StatsReader<StatsReq, StatsRes>> =
665            Box::new(move |_args, (req_ch, res_ch)| {
666                req_ch.send(StatsReq::Refresh(tid, stats.take().unwrap()))?;
667                let (new_stats, sys_stats) = match res_ch.recv()? {
668                    StatsRes::Refreshed(v) => v,
669                    res => bail!("invalid response to Refresh: {:?}", res),
670                };
671                stats = Some(new_stats);
672                sys_stats.to_json()
673            });
674
675        Ok(read)
676    });
677
678    let close: Box<dyn StatsCloser<StatsReq, StatsRes>> = Box::new(move |(req_ch, res_ch)| {
679        req_ch.send(StatsReq::Bye(current().id())).unwrap();
680        match res_ch.recv().unwrap() {
681            StatsRes::Bye => {}
682            res => panic!("invalid response to Bye: {:?}", res),
683        }
684    });
685
686    StatsServerData::new()
687        .add_meta(LayerStats::meta())
688        .add_meta(SysStats::meta())
689        .add_ops(
690            "top",
691            StatsOps {
692                open,
693                close: Some(close),
694            },
695        )
696}
697
698pub fn monitor(intv: Duration, shutdown: Arc<AtomicBool>) -> Result<()> {
699    scx_utils::monitor_stats::<SysStats>(
700        &vec![],
701        intv,
702        || shutdown.load(Ordering::Relaxed),
703        |sst| {
704            let dt = DateTime::<Local>::from(UNIX_EPOCH + Duration::from_secs_f64(sst.at));
705            println!("###### {} ######", dt.to_rfc2822());
706            sst.format_all(&mut std::io::stdout())
707        },
708    )
709}