Skip to main content

scx_mitosis/
stats.rs

1use std::collections::BTreeMap;
2use std::io::Write;
3use std::sync::atomic::AtomicBool;
4use std::sync::atomic::Ordering;
5use std::sync::Arc;
6use std::time::Duration;
7
8use anyhow::Result;
9use serde::Deserialize;
10use serde::Serialize;
11
12use scx_stats::prelude::*;
13use scx_stats_derive::stat_doc;
14use scx_stats_derive::Stats;
15
16use crate::DistributionStats;
17
18#[stat_doc]
19#[derive(Clone, Debug, Default, Serialize, Deserialize, Stats)]
20#[stat(_om_prefix = "c_")]
21#[stat(top)]
22pub struct CellMetrics {
23    #[stat(desc = "Number of cpus")]
24    pub num_cpus: u32,
25    #[stat(desc = "Local queue %")]
26    pub local_q_pct: f64,
27    #[stat(desc = "CPU queue %")]
28    pub cpu_q_pct: f64,
29    #[stat(desc = "Cell queue %")]
30    pub cell_q_pct: f64,
31    #[stat(desc = "Borrowed CPU %")]
32    pub borrowed_pct: f64,
33    #[stat(desc = "Affinity violations % of global")]
34    pub affn_violations_pct: f64,
35    #[stat(desc = "Steal %")]
36    pub steal_pct: f64,
37    #[stat(desc = "Pin reject skipped %")]
38    pub pin_skip_pct: f64,
39    #[stat(desc = "Slice shrink events")]
40    pub slice_shrink: u64,
41    #[stat(desc = "Slice shrink at max")]
42    pub slice_shrink_max: u64,
43    #[stat(desc = "Slice shrink proportional")]
44    pub slice_shrink_proportional: u64,
45    #[stat(desc = "Slice shrink at min")]
46    pub slice_shrink_min: u64,
47    #[stat(desc = "Decision share % of global")]
48    pub share_of_decisions_pct: f64,
49    #[stat(desc = "Cell scheduling decisions")]
50    total_decisions: u64,
51    #[stat(desc = "CPU utilization %")]
52    pub util_pct: f64,
53    #[stat(desc = "Borrowed CPU time % of running")]
54    pub demand_borrow_pct: f64,
55    #[stat(desc = "Lent CPU time %")]
56    pub lent_pct: f64,
57    #[stat(desc = "EWMA-smoothed utilization %")]
58    pub smoothed_util_pct: f64,
59}
60
61impl CellMetrics {
62    pub fn update(&mut self, ds: &DistributionStats) {
63        self.local_q_pct = ds.local_q_pct;
64        self.cpu_q_pct = ds.cpu_q_pct;
65        self.cell_q_pct = ds.cell_q_pct;
66        self.borrowed_pct = ds.borrowed_pct;
67        self.affn_violations_pct = ds.affn_viol_pct;
68        self.steal_pct = ds.steal_pct;
69        self.pin_skip_pct = ds.pin_skip_pct;
70        self.share_of_decisions_pct = ds.share_of_decisions_pct;
71        self.total_decisions = ds.total_decisions;
72    }
73
74    pub fn update_demand(&mut self, util_pct: f64, demand_borrow_pct: f64, lent_pct: f64) {
75        self.util_pct = util_pct;
76        self.demand_borrow_pct = demand_borrow_pct;
77        self.lent_pct = lent_pct;
78    }
79}
80
81#[stat_doc]
82#[derive(Clone, Debug, Default, Serialize, Deserialize, Stats)]
83#[stat(top)]
84pub struct Metrics {
85    #[stat(desc = "Number of cells")]
86    pub num_cells: u32,
87    #[stat(desc = "Local queue %")]
88    pub local_q_pct: f64,
89    #[stat(desc = "CPU queue %")]
90    pub cpu_q_pct: f64,
91    #[stat(desc = "Cell queue %")]
92    pub cell_q_pct: f64,
93    #[stat(desc = "Borrowed CPU %")]
94    pub borrowed_pct: f64,
95    #[stat(desc = "Affinity violations % of global")]
96    pub affn_violations_pct: f64,
97    #[stat(desc = "Steal %")]
98    pub steal_pct: f64,
99    #[stat(desc = "Pin reject skipped %")]
100    pub pin_skip_pct: f64,
101    #[stat(desc = "Slice shrink events")]
102    pub slice_shrink: u64,
103    #[stat(desc = "Slice shrink at max")]
104    pub slice_shrink_max: u64,
105    #[stat(desc = "Slice shrink proportional")]
106    pub slice_shrink_proportional: u64,
107    #[stat(desc = "Slice shrink at min")]
108    pub slice_shrink_min: u64,
109    #[stat(desc = "Decision share % of global")]
110    pub share_of_decisions_pct: f64,
111    #[stat(desc = "Cell scheduling decisions")]
112    total_decisions: u64,
113    #[stat(desc = "CPU utilization %")]
114    pub util_pct: f64,
115    #[stat(desc = "Borrowed CPU time % of running")]
116    pub demand_borrow_pct: f64,
117    #[stat(desc = "Lent CPU time %")]
118    pub lent_pct: f64,
119    #[stat(desc = "Number of rebalancing events")]
120    pub rebalance_count: u64,
121    #[stat(desc = "Per-cell metrics")] // TODO: cell names
122    pub cells: BTreeMap<u32, CellMetrics>,
123}
124
125impl Metrics {
126    pub fn update(&mut self, ds: &DistributionStats) {
127        self.local_q_pct = ds.local_q_pct;
128        self.cpu_q_pct = ds.cpu_q_pct;
129        self.cell_q_pct = ds.cell_q_pct;
130        self.borrowed_pct = ds.borrowed_pct;
131        self.affn_violations_pct = ds.affn_viol_pct;
132        self.steal_pct = ds.steal_pct;
133        self.pin_skip_pct = ds.pin_skip_pct;
134        self.share_of_decisions_pct = ds.share_of_decisions_pct;
135        self.total_decisions = ds.total_decisions;
136    }
137
138    pub fn update_demand(&mut self, util_pct: f64, demand_borrow_pct: f64, lent_pct: f64) {
139        self.util_pct = util_pct;
140        self.demand_borrow_pct = demand_borrow_pct;
141        self.lent_pct = lent_pct;
142    }
143
144    fn delta(&self, _: &Self) -> Self {
145        Self { ..self.clone() }
146    }
147
148    fn format<W: Write>(&self, w: &mut W) -> Result<()> {
149        writeln!(w, "{}", serde_json::to_string_pretty(self)?)?;
150        Ok(())
151    }
152}
153
154pub fn server_data() -> StatsServerData<(), Metrics> {
155    let open: Box<dyn StatsOpener<(), Metrics>> = Box::new(move |(req_ch, res_ch)| {
156        req_ch.send(())?;
157        let mut prev = res_ch.recv()?;
158
159        let read: Box<dyn StatsReader<(), Metrics>> = Box::new(move |_args, (req_ch, res_ch)| {
160            req_ch.send(())?;
161            let cur = res_ch.recv()?;
162            let delta = cur.delta(&prev);
163            prev = cur;
164            delta.to_json()
165        });
166
167        Ok(read)
168    });
169
170    StatsServerData::new()
171        .add_meta(Metrics::meta())
172        .add_meta(CellMetrics::meta())
173        .add_ops("top", StatsOps { open, close: None })
174}
175
176pub fn monitor(intv: Duration, shutdown: Arc<AtomicBool>) -> Result<()> {
177    scx_utils::monitor_stats::<Metrics>(
178        &[],
179        intv,
180        || shutdown.load(Ordering::Relaxed),
181        |metrics| metrics.format(&mut std::io::stdout()),
182    )
183}