scx_layered/
stats.rs

1use std::collections::BTreeMap;
2use std::io::Write;
3use std::sync::Arc;
4use std::sync::atomic::AtomicBool;
5use std::sync::atomic::Ordering;
6use std::thread::ThreadId;
7use std::thread::current;
8use std::time::Duration;
9use std::time::SystemTime;
10use std::time::UNIX_EPOCH;
11
12use anyhow::Result;
13use anyhow::bail;
14use chrono::DateTime;
15use chrono::Local;
16use log::warn;
17use scx_stats::prelude::*;
18use scx_stats_derive::Stats;
19use scx_stats_derive::stat_doc;
20use scx_utils::Cpumask;
21use serde::Deserialize;
22use serde::Serialize;
23
24use crate::BpfStats;
25use crate::LAYER_USAGE_OPEN;
26use crate::LAYER_USAGE_PROTECTED;
27use crate::LAYER_USAGE_PROTECTED_PREEMPT;
28use crate::LAYER_USAGE_SUM_UPTO;
29use crate::Layer;
30use crate::Stats;
31use crate::bpf_intf;
32
33const GSTAT_EXCL_IDLE: usize = bpf_intf::global_stat_id_GSTAT_EXCL_IDLE as usize;
34const GSTAT_EXCL_WAKEUP: usize = bpf_intf::global_stat_id_GSTAT_EXCL_WAKEUP as usize;
35const GSTAT_HI_FB_EVENTS: usize = bpf_intf::global_stat_id_GSTAT_HI_FB_EVENTS as usize;
36const GSTAT_HI_FB_USAGE: usize = bpf_intf::global_stat_id_GSTAT_HI_FB_USAGE as usize;
37const GSTAT_LO_FB_EVENTS: usize = bpf_intf::global_stat_id_GSTAT_LO_FB_EVENTS as usize;
38const GSTAT_LO_FB_USAGE: usize = bpf_intf::global_stat_id_GSTAT_LO_FB_USAGE as usize;
39const GSTAT_FB_CPU_USAGE: usize = bpf_intf::global_stat_id_GSTAT_FB_CPU_USAGE as usize;
40const GSTAT_ANTISTALL: usize = bpf_intf::global_stat_id_GSTAT_ANTISTALL as usize;
41
42const LSTAT_SEL_LOCAL: usize = bpf_intf::layer_stat_id_LSTAT_SEL_LOCAL as usize;
43const LSTAT_ENQ_LOCAL: usize = bpf_intf::layer_stat_id_LSTAT_ENQ_LOCAL as usize;
44const LSTAT_ENQ_WAKEUP: usize = bpf_intf::layer_stat_id_LSTAT_ENQ_WAKEUP as usize;
45const LSTAT_ENQ_EXPIRE: usize = bpf_intf::layer_stat_id_LSTAT_ENQ_EXPIRE as usize;
46const LSTAT_ENQ_REENQ: usize = bpf_intf::layer_stat_id_LSTAT_ENQ_REENQ as usize;
47const LSTAT_MIN_EXEC: usize = bpf_intf::layer_stat_id_LSTAT_MIN_EXEC as usize;
48const LSTAT_MIN_EXEC_NS: usize = bpf_intf::layer_stat_id_LSTAT_MIN_EXEC_NS as usize;
49const LSTAT_OPEN_IDLE: usize = bpf_intf::layer_stat_id_LSTAT_OPEN_IDLE as usize;
50const LSTAT_AFFN_VIOL: usize = bpf_intf::layer_stat_id_LSTAT_AFFN_VIOL as usize;
51const LSTAT_KEEP: usize = bpf_intf::layer_stat_id_LSTAT_KEEP as usize;
52const LSTAT_KEEP_FAIL_MAX_EXEC: usize = bpf_intf::layer_stat_id_LSTAT_KEEP_FAIL_MAX_EXEC as usize;
53const LSTAT_KEEP_FAIL_BUSY: usize = bpf_intf::layer_stat_id_LSTAT_KEEP_FAIL_BUSY as usize;
54const LSTAT_PREEMPT: usize = bpf_intf::layer_stat_id_LSTAT_PREEMPT as usize;
55const LSTAT_PREEMPT_FIRST: usize = bpf_intf::layer_stat_id_LSTAT_PREEMPT_FIRST as usize;
56const LSTAT_PREEMPT_XLLC: usize = bpf_intf::layer_stat_id_LSTAT_PREEMPT_XLLC as usize;
57const LSTAT_PREEMPT_XNUMA: usize = bpf_intf::layer_stat_id_LSTAT_PREEMPT_XNUMA as usize;
58const LSTAT_PREEMPT_IDLE: usize = bpf_intf::layer_stat_id_LSTAT_PREEMPT_IDLE as usize;
59const LSTAT_PREEMPT_FAIL: usize = bpf_intf::layer_stat_id_LSTAT_PREEMPT_FAIL as usize;
60const LSTAT_EXCL_COLLISION: usize = bpf_intf::layer_stat_id_LSTAT_EXCL_COLLISION as usize;
61const LSTAT_EXCL_PREEMPT: usize = bpf_intf::layer_stat_id_LSTAT_EXCL_PREEMPT as usize;
62const LSTAT_YIELD: usize = bpf_intf::layer_stat_id_LSTAT_YIELD as usize;
63const LSTAT_YIELD_IGNORE: usize = bpf_intf::layer_stat_id_LSTAT_YIELD_IGNORE as usize;
64const LSTAT_MIGRATION: usize = bpf_intf::layer_stat_id_LSTAT_MIGRATION as usize;
65const LSTAT_XNUMA_MIGRATION: usize = bpf_intf::layer_stat_id_LSTAT_XNUMA_MIGRATION as usize;
66const LSTAT_XLLC_MIGRATION: usize = bpf_intf::layer_stat_id_LSTAT_XLLC_MIGRATION as usize;
67const LSTAT_XLLC_MIGRATION_SKIP: usize = bpf_intf::layer_stat_id_LSTAT_XLLC_MIGRATION_SKIP as usize;
68const LSTAT_XLAYER_WAKE: usize = bpf_intf::layer_stat_id_LSTAT_XLAYER_WAKE as usize;
69const LSTAT_XLAYER_REWAKE: usize = bpf_intf::layer_stat_id_LSTAT_XLAYER_REWAKE as usize;
70const LSTAT_LLC_DRAIN_TRY: usize = bpf_intf::layer_stat_id_LSTAT_LLC_DRAIN_TRY as usize;
71const LSTAT_LLC_DRAIN: usize = bpf_intf::layer_stat_id_LSTAT_LLC_DRAIN as usize;
72const LSTAT_SKIP_REMOTE_NODE: usize = bpf_intf::layer_stat_id_LSTAT_SKIP_REMOTE_NODE as usize;
73
74const LLC_LSTAT_LAT: usize = bpf_intf::llc_layer_stat_id_LLC_LSTAT_LAT as usize;
75const LLC_LSTAT_CNT: usize = bpf_intf::llc_layer_stat_id_LLC_LSTAT_CNT as usize;
76
77fn calc_frac(a: f64, b: f64) -> f64 {
78    if b != 0.0 { a / b * 100.0 } else { 0.0 }
79}
80
81fn fmt_pct(v: f64) -> String {
82    if v >= 99.995 {
83        format!("{:5.1}", v)
84    } else if v > 0.0 && v < 0.01 {
85        format!("{:5.2}", 0.01)
86    } else {
87        format!("{:5.2}", v)
88    }
89}
90
91fn fmt_num(v: u64) -> String {
92    if v > 1_000_000 {
93        format!("{:5.1}m", v as f64 / 1_000_000.0)
94    } else if v > 1_000 {
95        format!("{:5.1}k", v as f64 / 1_000.0)
96    } else {
97        format!("{:5.0} ", v)
98    }
99}
100
101#[stat_doc]
102#[derive(Clone, Debug, Default, Serialize, Deserialize, Stats)]
103#[stat(_om_prefix = "l_", _om_label = "layer_name")]
104pub struct LayerStats {
105    #[stat(desc = "index", _om_skip)]
106    pub index: usize,
107    #[stat(desc = "Total CPU utilization (100% means one full CPU)")]
108    pub util: f64,
109    #[stat(desc = "Protected CPU utilization %")]
110    pub util_protected_frac: f64,
111    #[stat(desc = "Preempt-protected CPU utilization %")]
112    pub util_protected_preempt_frac: f64,
113    #[stat(desc = "Open CPU utilization %")]
114    pub util_open_frac: f64,
115    #[stat(desc = "fraction of total CPU utilization")]
116    pub util_frac: f64,
117    #[stat(desc = "number of tasks")]
118    pub tasks: u32,
119    #[stat(desc = "count of sched events during the period")]
120    pub total: u64,
121    #[stat(desc = "% dispatched into idle CPU from select_cpu")]
122    pub sel_local: f64,
123    #[stat(desc = "% dispatched into idle CPU from enqueue")]
124    pub enq_local: f64,
125    #[stat(desc = "% enqueued after wakeup")]
126    pub enq_wakeup: f64,
127    #[stat(desc = "% enqueued after slice expiration")]
128    pub enq_expire: f64,
129    #[stat(desc = "% re-enqueued due to RT preemption")]
130    pub enq_reenq: f64,
131    #[stat(desc = "count of times exec duration < min_exec_us")]
132    pub min_exec: f64,
133    #[stat(desc = "total exec durations extended due to min_exec_us")]
134    pub min_exec_us: u64,
135    #[stat(desc = "% dispatched into idle CPUs occupied by other layers")]
136    pub open_idle: f64,
137    #[stat(desc = "% preempted other tasks")]
138    pub preempt: f64,
139    #[stat(desc = "% preempted XLLC tasks")]
140    pub preempt_xllc: f64,
141    #[stat(desc = "% preempted XNUMA tasks")]
142    pub preempt_xnuma: f64,
143    #[stat(desc = "% first-preempted other tasks")]
144    pub preempt_first: f64,
145    #[stat(desc = "% idle-preempted other tasks")]
146    pub preempt_idle: f64,
147    #[stat(desc = "% attempted to preempt other tasks but failed")]
148    pub preempt_fail: f64,
149    #[stat(desc = "% violated config due to CPU affinity")]
150    pub affn_viol: f64,
151    #[stat(desc = "% continued executing after slice expiration")]
152    pub keep: f64,
153    #[stat(desc = "% disallowed to continue executing due to max_exec")]
154    pub keep_fail_max_exec: f64,
155    #[stat(desc = "% disallowed to continue executing due to other tasks")]
156    pub keep_fail_busy: f64,
157    #[stat(desc = "whether is exclusive", _om_skip)]
158    pub is_excl: u32,
159    #[stat(desc = "count of times an excl task skipped a CPU as the sibling was also excl")]
160    pub excl_collision: f64,
161    #[stat(desc = "% a sibling CPU was preempted for an exclusive task")]
162    pub excl_preempt: f64,
163    #[stat(desc = "% yielded")]
164    pub yielded: f64,
165    #[stat(desc = "count of times yield was ignored")]
166    pub yield_ignore: u64,
167    #[stat(desc = "% migrated across CPUs")]
168    pub migration: f64,
169    #[stat(desc = "% migrated across NUMA nodes")]
170    pub xnuma_migration: f64,
171    #[stat(desc = "% migrated across LLCs")]
172    pub xllc_migration: f64,
173    #[stat(desc = "% migration skipped across LLCs due to xllc_mig_min_us")]
174    pub xllc_migration_skip: f64,
175    #[stat(desc = "% wakers across layers")]
176    pub xlayer_wake: f64,
177    #[stat(desc = "% rewakers across layers where waker has waken the task previously")]
178    pub xlayer_rewake: f64,
179    #[stat(desc = "% LLC draining tried")]
180    pub llc_drain_try: f64,
181    #[stat(desc = "% LLC draining succeeded")]
182    pub llc_drain: f64,
183    #[stat(desc = "% skip LLC dispatch on remote node")]
184    pub skip_remote_node: f64,
185    #[stat(desc = "mask of allocated CPUs", _om_skip)]
186    pub cpus: Vec<u64>,
187    #[stat(desc = "count of CPUs assigned")]
188    pub cur_nr_cpus: u32,
189    #[stat(desc = "minimum # of CPUs assigned")]
190    pub min_nr_cpus: u32,
191    #[stat(desc = "maximum # of CPUs assigned")]
192    pub max_nr_cpus: u32,
193    #[stat(desc = "count of CPUs assigned per LLC")]
194    pub nr_llc_cpus: Vec<u32>,
195    #[stat(desc = "slice duration config")]
196    pub slice_us: u64,
197    #[stat(desc = "Per-LLC scheduling event fractions")]
198    pub llc_fracs: Vec<f64>,
199    #[stat(desc = "Per-LLC average latency")]
200    pub llc_lats: Vec<f64>,
201}
202
203impl LayerStats {
204    pub fn new(
205        lidx: usize,
206        layer: &Layer,
207        stats: &Stats,
208        bstats: &BpfStats,
209        nr_cpus_range: (usize, usize),
210    ) -> Self {
211        let lstat = |sidx| bstats.lstats[lidx][sidx];
212        let ltotal = lstat(LSTAT_SEL_LOCAL)
213            + lstat(LSTAT_ENQ_LOCAL)
214            + lstat(LSTAT_ENQ_WAKEUP)
215            + lstat(LSTAT_ENQ_EXPIRE)
216            + lstat(LSTAT_ENQ_REENQ)
217            + lstat(LSTAT_KEEP);
218        let lstat_pct = |sidx| {
219            if ltotal != 0 {
220                lstat(sidx) as f64 / ltotal as f64 * 100.0
221            } else {
222                0.0
223            }
224        };
225
226        let util_sum = stats.layer_utils[lidx]
227            .iter()
228            .take(LAYER_USAGE_SUM_UPTO + 1)
229            .sum::<f64>();
230
231        Self {
232            index: lidx,
233            util: util_sum * 100.0,
234            util_open_frac: calc_frac(stats.layer_utils[lidx][LAYER_USAGE_OPEN], util_sum),
235            util_protected_frac: calc_frac(
236                stats.layer_utils[lidx][LAYER_USAGE_PROTECTED],
237                util_sum,
238            ),
239            util_protected_preempt_frac: calc_frac(
240                stats.layer_utils[lidx][LAYER_USAGE_PROTECTED_PREEMPT],
241                util_sum,
242            ),
243            util_frac: calc_frac(util_sum, stats.total_util),
244            tasks: stats.nr_layer_tasks[lidx] as u32,
245            total: ltotal,
246            sel_local: lstat_pct(LSTAT_SEL_LOCAL),
247            enq_local: lstat_pct(LSTAT_ENQ_LOCAL),
248            enq_wakeup: lstat_pct(LSTAT_ENQ_WAKEUP),
249            enq_expire: lstat_pct(LSTAT_ENQ_EXPIRE),
250            enq_reenq: lstat_pct(LSTAT_ENQ_REENQ),
251            min_exec: lstat_pct(LSTAT_MIN_EXEC),
252            min_exec_us: (lstat(LSTAT_MIN_EXEC_NS) / 1000) as u64,
253            open_idle: lstat_pct(LSTAT_OPEN_IDLE),
254            preempt: lstat_pct(LSTAT_PREEMPT),
255            preempt_xllc: lstat_pct(LSTAT_PREEMPT_XLLC),
256            preempt_xnuma: lstat_pct(LSTAT_PREEMPT_XNUMA),
257            preempt_first: lstat_pct(LSTAT_PREEMPT_FIRST),
258            preempt_idle: lstat_pct(LSTAT_PREEMPT_IDLE),
259            preempt_fail: lstat_pct(LSTAT_PREEMPT_FAIL),
260            affn_viol: lstat_pct(LSTAT_AFFN_VIOL),
261            keep: lstat_pct(LSTAT_KEEP),
262            keep_fail_max_exec: lstat_pct(LSTAT_KEEP_FAIL_MAX_EXEC),
263            keep_fail_busy: lstat_pct(LSTAT_KEEP_FAIL_BUSY),
264            is_excl: layer.kind.common().exclusive as u32,
265            excl_collision: lstat_pct(LSTAT_EXCL_COLLISION),
266            excl_preempt: lstat_pct(LSTAT_EXCL_PREEMPT),
267            yielded: lstat_pct(LSTAT_YIELD),
268            yield_ignore: lstat(LSTAT_YIELD_IGNORE) as u64,
269            migration: lstat_pct(LSTAT_MIGRATION),
270            xnuma_migration: lstat_pct(LSTAT_XNUMA_MIGRATION),
271            xlayer_wake: lstat_pct(LSTAT_XLAYER_WAKE),
272            xlayer_rewake: lstat_pct(LSTAT_XLAYER_REWAKE),
273            xllc_migration: lstat_pct(LSTAT_XLLC_MIGRATION),
274            xllc_migration_skip: lstat_pct(LSTAT_XLLC_MIGRATION_SKIP),
275            llc_drain_try: lstat_pct(LSTAT_LLC_DRAIN_TRY),
276            llc_drain: lstat_pct(LSTAT_LLC_DRAIN),
277            skip_remote_node: lstat_pct(LSTAT_SKIP_REMOTE_NODE),
278            cpus: layer.cpus.as_raw_slice().to_vec(),
279            cur_nr_cpus: layer.cpus.weight() as u32,
280            min_nr_cpus: nr_cpus_range.0 as u32,
281            max_nr_cpus: nr_cpus_range.1 as u32,
282            nr_llc_cpus: layer.nr_llc_cpus.iter().map(|&v| v as u32).collect(),
283            slice_us: stats.layer_slice_us[lidx],
284            llc_fracs: {
285                let sid = LLC_LSTAT_CNT;
286                let sum = bstats.llc_lstats[lidx]
287                    .iter()
288                    .map(|lstats| lstats[sid])
289                    .sum::<u64>() as f64;
290                bstats.llc_lstats[lidx]
291                    .iter()
292                    .map(|lstats| calc_frac(lstats[sid] as f64, sum))
293                    .collect()
294            },
295            llc_lats: bstats.llc_lstats[lidx]
296                .iter()
297                .map(|lstats| lstats[LLC_LSTAT_LAT] as f64 / 1_000_000_000.0)
298                .collect(),
299        }
300    }
301
302    pub fn format<W: Write>(&self, w: &mut W, name: &str, header_width: usize) -> Result<()> {
303        writeln!(
304            w,
305            "  {:<width$}: util/open/frac={:6.1}/{}/{:7.1} prot/prot_preempt={}/{} tasks={:6}",
306            name,
307            self.util,
308            fmt_pct(self.util_open_frac),
309            self.util_frac,
310            fmt_pct(self.util_protected_frac),
311            fmt_pct(self.util_protected_preempt_frac),
312            self.tasks,
313            width = header_width,
314        )?;
315
316        writeln!(
317            w,
318            "  {:<width$}  tot={:7} local_sel/enq={}/{} wake/exp/reenq={}/{}/{}",
319            "",
320            self.total,
321            fmt_pct(self.sel_local),
322            fmt_pct(self.enq_local),
323            fmt_pct(self.enq_wakeup),
324            fmt_pct(self.enq_expire),
325            fmt_pct(self.enq_reenq),
326            width = header_width,
327        )?;
328
329        writeln!(
330            w,
331            "  {:<width$}  keep/max/busy={}/{}/{} yield/ign={}/{}",
332            "",
333            fmt_pct(self.keep),
334            fmt_pct(self.keep_fail_max_exec),
335            fmt_pct(self.keep_fail_busy),
336            fmt_pct(self.yielded),
337            fmt_num(self.yield_ignore),
338            width = header_width,
339        )?;
340
341        writeln!(
342            w,
343            "  {:<width$}  open_idle={} mig={} xnuma_mig={} xllc_mig/skip={}/{} affn_viol={}",
344            "",
345            fmt_pct(self.open_idle),
346            fmt_pct(self.migration),
347            fmt_pct(self.xnuma_migration),
348            fmt_pct(self.xllc_migration),
349            fmt_pct(self.xllc_migration_skip),
350            fmt_pct(self.affn_viol),
351            width = header_width,
352        )?;
353
354        writeln!(
355            w,
356            "  {:<width$}  preempt/first/xllc/xnuma/idle/fail={}/{}/{}/{}/{}/{}",
357            "",
358            fmt_pct(self.preempt),
359            fmt_pct(self.preempt_first),
360            fmt_pct(self.preempt_xllc),
361            fmt_pct(self.preempt_xnuma),
362            fmt_pct(self.preempt_idle),
363            fmt_pct(self.preempt_fail),
364            width = header_width,
365        )?;
366
367        writeln!(
368            w,
369            "  {:<width$}  xlayer_wake/re={}/{} llc_drain/try={}/{} skip_rnode={}",
370            "",
371            fmt_pct(self.xlayer_wake),
372            fmt_pct(self.xlayer_rewake),
373            fmt_pct(self.llc_drain),
374            fmt_pct(self.llc_drain_try),
375            fmt_pct(self.skip_remote_node),
376            width = header_width,
377        )?;
378
379        writeln!(
380            w,
381            "  {:<width$}  slice={}ms min_exec={}/{:7.2}ms",
382            "",
383            self.slice_us as f64 / 1000.0,
384            fmt_pct(self.min_exec),
385            self.min_exec_us as f64 / 1000.0,
386            width = header_width
387        )?;
388
389        let cpumask = Cpumask::from_vec(self.cpus.clone());
390
391        writeln!(
392            w,
393            "  {:<width$}  cpus={:3} [{:3},{:3}] {}",
394            "",
395            self.cur_nr_cpus,
396            self.min_nr_cpus,
397            self.max_nr_cpus,
398            &cpumask,
399            width = header_width
400        )?;
401
402        write!(
403            w,
404            "  {:<width$}  [LLC] nr_cpus: sched% lat_ms",
405            "",
406            width = header_width
407        )?;
408
409        for (i, (&frac, &lat)) in self.llc_fracs.iter().zip(self.llc_lats.iter()).enumerate() {
410            if (i % 4) == 0 {
411                writeln!(w, "")?;
412                write!(w, "  {:<width$}  [{:03}]", "", i, width = header_width)?;
413            } else {
414                write!(w, " |")?;
415            }
416            write!(
417                w,
418                " {:2}:{}%{:7.2}",
419                self.nr_llc_cpus[i],
420                fmt_pct(frac),
421                lat * 1_000.0
422            )?;
423        }
424        writeln!(w, "")?;
425
426        if self.is_excl != 0 {
427            writeln!(
428                w,
429                "  {:<width$}  excl_coll={} excl_preempt={}",
430                "",
431                fmt_pct(self.excl_collision),
432                fmt_pct(self.excl_preempt),
433                width = header_width,
434            )?;
435        } else if self.excl_collision != 0.0 || self.excl_preempt != 0.0 {
436            warn!(
437                "{}: exclusive is off but excl_coll={} excl_preempt={}",
438                name,
439                fmt_pct(self.excl_collision),
440                fmt_pct(self.excl_preempt),
441            );
442        }
443
444        Ok(())
445    }
446}
447
448#[stat_doc]
449#[derive(Clone, Debug, Default, Serialize, Deserialize, Stats)]
450#[stat(top)]
451pub struct SysStats {
452    #[stat(desc = "timestamp", _om_skip)]
453    pub at: f64,
454    #[stat(desc = "# of NUMA nodes")]
455    pub nr_nodes: usize,
456    #[stat(desc = "# sched events during the period")]
457    pub total: u64,
458    #[stat(desc = "% dispatched directly into an idle CPU from select_cpu")]
459    pub local_sel: f64,
460    #[stat(desc = "% dispatched directly into an idle CPU from enqueue")]
461    pub local_enq: f64,
462    #[stat(desc = "% open layer tasks scheduled into allocated but idle CPUs")]
463    pub open_idle: f64,
464    #[stat(desc = "% violated config due to CPU affinity")]
465    pub affn_viol: f64,
466    #[stat(desc = "% sent to hi fallback DSQs")]
467    pub hi_fb: f64,
468    #[stat(desc = "% sent to lo fallback DSQs")]
469    pub lo_fb: f64,
470    #[stat(desc = "count of times an excl task skipped a CPU as the sibling was also excl")]
471    pub excl_collision: f64,
472    #[stat(desc = "count of times a sibling CPU was preempted for an excl task")]
473    pub excl_preempt: f64,
474    #[stat(desc = "count of times a CPU skipped dispatching due to an excl task on the sibling")]
475    pub excl_idle: f64,
476    #[stat(
477        desc = "count of times an idle sibling CPU was woken up after an excl task is finished"
478    )]
479    pub excl_wakeup: f64,
480    #[stat(desc = "CPU time this binary consumed during the period")]
481    pub proc_ms: u64,
482    #[stat(desc = "CPU busy % (100% means all CPU)")]
483    pub busy: f64,
484    #[stat(desc = "CPU util % (100% means one CPU)")]
485    pub util: f64,
486    #[stat(desc = "CPU util % used by hi fallback DSQs")]
487    pub hi_fb_util: f64,
488    #[stat(desc = "CPU util % used by lo fallback DSQs")]
489    pub lo_fb_util: f64,
490    #[stat(desc = "Number of tasks dispatched via antistall")]
491    pub antistall: u64,
492    #[stat(desc = "fallback CPU")]
493    pub fallback_cpu: u32,
494    #[stat(desc = "per-layer statistics")]
495    pub fallback_cpu_util: f64,
496    #[stat(desc = "fallback CPU util %")]
497    pub layers: BTreeMap<String, LayerStats>,
498}
499
500impl SysStats {
501    pub fn new(stats: &Stats, bstats: &BpfStats, fallback_cpu: usize) -> Result<Self> {
502        let lsum = |idx| stats.bpf_stats.lstats_sums[idx];
503        let total = lsum(LSTAT_SEL_LOCAL)
504            + lsum(LSTAT_ENQ_LOCAL)
505            + lsum(LSTAT_ENQ_WAKEUP)
506            + lsum(LSTAT_ENQ_EXPIRE)
507            + lsum(LSTAT_ENQ_REENQ)
508            + lsum(LSTAT_KEEP);
509        let lsum_pct = |idx| {
510            if total != 0 {
511                lsum(idx) as f64 / total as f64 * 100.0
512            } else {
513                0.0
514            }
515        };
516
517        let elapsed_ns = stats.elapsed.as_nanos();
518
519        Ok(Self {
520            at: SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs_f64(),
521            nr_nodes: stats.nr_nodes,
522            total,
523            local_sel: lsum_pct(LSTAT_SEL_LOCAL),
524            local_enq: lsum_pct(LSTAT_ENQ_LOCAL),
525            open_idle: lsum_pct(LSTAT_OPEN_IDLE),
526            affn_viol: lsum_pct(LSTAT_AFFN_VIOL),
527            hi_fb: calc_frac(
528                stats.bpf_stats.gstats[GSTAT_HI_FB_EVENTS] as f64,
529                total as f64,
530            ),
531            lo_fb: calc_frac(
532                stats.bpf_stats.gstats[GSTAT_LO_FB_EVENTS] as f64,
533                total as f64,
534            ),
535            excl_collision: lsum_pct(LSTAT_EXCL_COLLISION),
536            excl_preempt: lsum_pct(LSTAT_EXCL_PREEMPT),
537            excl_idle: bstats.gstats[GSTAT_EXCL_IDLE] as f64 / total as f64,
538            excl_wakeup: bstats.gstats[GSTAT_EXCL_WAKEUP] as f64 / total as f64,
539            proc_ms: stats.processing_dur.as_millis() as u64,
540            busy: stats.cpu_busy * 100.0,
541            util: stats.total_util * 100.0,
542            hi_fb_util: stats.bpf_stats.gstats[GSTAT_HI_FB_USAGE] as f64 / elapsed_ns as f64
543                * 100.0,
544            lo_fb_util: stats.bpf_stats.gstats[GSTAT_LO_FB_USAGE] as f64 / elapsed_ns as f64
545                * 100.0,
546            antistall: stats.bpf_stats.gstats[GSTAT_ANTISTALL],
547            fallback_cpu: fallback_cpu as u32,
548            fallback_cpu_util: stats.bpf_stats.gstats[GSTAT_FB_CPU_USAGE] as f64
549                / elapsed_ns as f64
550                * 100.0,
551            layers: BTreeMap::new(),
552        })
553    }
554
555    pub fn format<W: Write>(&self, w: &mut W) -> Result<()> {
556        writeln!(
557            w,
558            "tot={:7} local_sel/enq={}/{} open_idle={} affn_viol={} hi/lo={}/{}",
559            self.total,
560            fmt_pct(self.local_sel),
561            fmt_pct(self.local_enq),
562            fmt_pct(self.open_idle),
563            fmt_pct(self.affn_viol),
564            fmt_pct(self.hi_fb),
565            fmt_pct(self.lo_fb),
566        )?;
567
568        writeln!(
569            w,
570            "busy={:5.1} util/hi/lo={:7.1}/{}/{} fallback_cpu/util={:3}/{:4.1} proc={:?}ms antistall={}",
571            self.busy,
572            self.util,
573            fmt_pct(self.hi_fb_util),
574            fmt_pct(self.lo_fb_util),
575            self.fallback_cpu,
576            self.fallback_cpu_util,
577            self.proc_ms,
578            self.antistall,
579        )?;
580
581        writeln!(
582            w,
583            "excl_coll={:.2} excl_preempt={:.2} excl_idle={:.2} excl_wakeup={:.2}",
584            self.excl_collision, self.excl_preempt, self.excl_idle, self.excl_wakeup
585        )?;
586
587        Ok(())
588    }
589
590    pub fn format_all<W: Write>(&self, w: &mut W) -> Result<()> {
591        self.format(w)?;
592
593        let header_width = self
594            .layers
595            .keys()
596            .map(|name| name.len())
597            .max()
598            .unwrap_or(0)
599            .max(4);
600
601        let mut idx_to_name: Vec<(usize, &String)> =
602            self.layers.iter().map(|(k, v)| (v.index, k)).collect();
603
604        idx_to_name.sort();
605
606        for (_idx, name) in &idx_to_name {
607            self.layers[*name].format(w, name, header_width)?;
608        }
609
610        Ok(())
611    }
612}
613
614#[derive(Debug)]
615pub enum StatsReq {
616    Hello(ThreadId),
617    Refresh(ThreadId, Stats),
618    Bye(ThreadId),
619}
620
621#[derive(Debug)]
622pub enum StatsRes {
623    Hello(Stats),
624    Refreshed((Stats, SysStats)),
625    Bye,
626}
627
628pub fn server_data() -> StatsServerData<StatsReq, StatsRes> {
629    let open: Box<dyn StatsOpener<StatsReq, StatsRes>> = Box::new(move |(req_ch, res_ch)| {
630        let tid = current().id();
631        req_ch.send(StatsReq::Hello(tid))?;
632        let mut stats = Some(match res_ch.recv()? {
633            StatsRes::Hello(v) => v,
634            res => bail!("invalid response to Hello: {:?}", &res),
635        });
636
637        let read: Box<dyn StatsReader<StatsReq, StatsRes>> =
638            Box::new(move |_args, (req_ch, res_ch)| {
639                req_ch.send(StatsReq::Refresh(tid, stats.take().unwrap()))?;
640                let (new_stats, sys_stats) = match res_ch.recv()? {
641                    StatsRes::Refreshed(v) => v,
642                    res => bail!("invalid response to Refresh: {:?}", &res),
643                };
644                stats = Some(new_stats);
645                sys_stats.to_json()
646            });
647
648        Ok(read)
649    });
650
651    let close: Box<dyn StatsCloser<StatsReq, StatsRes>> = Box::new(move |(req_ch, res_ch)| {
652        req_ch.send(StatsReq::Bye(current().id())).unwrap();
653        match res_ch.recv().unwrap() {
654            StatsRes::Bye => {}
655            res => panic!("invalid response to Bye: {:?}", &res),
656        }
657    });
658
659    StatsServerData::new()
660        .add_meta(LayerStats::meta())
661        .add_meta(SysStats::meta())
662        .add_ops(
663            "top",
664            StatsOps {
665                open,
666                close: Some(close),
667            },
668        )
669}
670
671pub fn monitor(intv: Duration, shutdown: Arc<AtomicBool>) -> Result<()> {
672    scx_utils::monitor_stats::<SysStats>(
673        &vec![],
674        intv,
675        || shutdown.load(Ordering::Relaxed),
676        |sst| {
677            let dt = DateTime::<Local>::from(UNIX_EPOCH + Duration::from_secs_f64(sst.at));
678            println!("###### {} ######", dt.to_rfc2822());
679            sst.format_all(&mut std::io::stdout())
680        },
681    )
682}