1use std::collections::BTreeMap;
2use std::io::Write;
3use std::sync::atomic::AtomicBool;
4use std::sync::atomic::Ordering;
5use std::sync::Arc;
6use std::time::Duration;
7
8use anyhow::Result;
9use serde::Deserialize;
10use serde::Serialize;
11
12use scx_stats::prelude::*;
13use scx_stats_derive::stat_doc;
14use scx_stats_derive::Stats;
15
16use crate::DistributionStats;
17
18#[stat_doc]
19#[derive(Clone, Debug, Default, Serialize, Deserialize, Stats)]
20#[stat(_om_prefix = "c_")]
21#[stat(top)]
22pub struct CellMetrics {
23 #[stat(desc = "Number of cpus")]
24 pub num_cpus: u32,
25 #[stat(desc = "Local queue %")]
26 pub local_q_pct: f64,
27 #[stat(desc = "CPU queue %")]
28 pub cpu_q_pct: f64,
29 #[stat(desc = "Cell queue %")]
30 pub cell_q_pct: f64,
31 #[stat(desc = "Borrowed CPU %")]
32 pub borrowed_pct: f64,
33 #[stat(desc = "Affinity violations % of global")]
34 pub affn_violations_pct: f64,
35 #[stat(desc = "Steal %")]
36 pub steal_pct: f64,
37 #[stat(desc = "Pin reject skipped %")]
38 pub pin_skip_pct: f64,
39 #[stat(desc = "Slice shrink events")]
40 pub slice_shrink: u64,
41 #[stat(desc = "Slice shrink at max")]
42 pub slice_shrink_max: u64,
43 #[stat(desc = "Slice shrink proportional")]
44 pub slice_shrink_proportional: u64,
45 #[stat(desc = "Slice shrink at min")]
46 pub slice_shrink_min: u64,
47 #[stat(desc = "Decision share % of global")]
48 pub share_of_decisions_pct: f64,
49 #[stat(desc = "Cell scheduling decisions")]
50 total_decisions: u64,
51 #[stat(desc = "CPU utilization %")]
52 pub util_pct: f64,
53 #[stat(desc = "Borrowed CPU time % of running")]
54 pub demand_borrow_pct: f64,
55 #[stat(desc = "Lent CPU time %")]
56 pub lent_pct: f64,
57 #[stat(desc = "EWMA-smoothed utilization %")]
58 pub smoothed_util_pct: f64,
59}
60
61impl CellMetrics {
62 pub fn update(&mut self, ds: &DistributionStats) {
63 self.local_q_pct = ds.local_q_pct;
64 self.cpu_q_pct = ds.cpu_q_pct;
65 self.cell_q_pct = ds.cell_q_pct;
66 self.borrowed_pct = ds.borrowed_pct;
67 self.affn_violations_pct = ds.affn_viol_pct;
68 self.steal_pct = ds.steal_pct;
69 self.pin_skip_pct = ds.pin_skip_pct;
70 self.share_of_decisions_pct = ds.share_of_decisions_pct;
71 self.total_decisions = ds.total_decisions;
72 }
73
74 pub fn update_demand(&mut self, util_pct: f64, demand_borrow_pct: f64, lent_pct: f64) {
75 self.util_pct = util_pct;
76 self.demand_borrow_pct = demand_borrow_pct;
77 self.lent_pct = lent_pct;
78 }
79}
80
81#[stat_doc]
82#[derive(Clone, Debug, Default, Serialize, Deserialize, Stats)]
83#[stat(top)]
84pub struct Metrics {
85 #[stat(desc = "Number of cells")]
86 pub num_cells: u32,
87 #[stat(desc = "Local queue %")]
88 pub local_q_pct: f64,
89 #[stat(desc = "CPU queue %")]
90 pub cpu_q_pct: f64,
91 #[stat(desc = "Cell queue %")]
92 pub cell_q_pct: f64,
93 #[stat(desc = "Borrowed CPU %")]
94 pub borrowed_pct: f64,
95 #[stat(desc = "Affinity violations % of global")]
96 pub affn_violations_pct: f64,
97 #[stat(desc = "Steal %")]
98 pub steal_pct: f64,
99 #[stat(desc = "Pin reject skipped %")]
100 pub pin_skip_pct: f64,
101 #[stat(desc = "Slice shrink events")]
102 pub slice_shrink: u64,
103 #[stat(desc = "Slice shrink at max")]
104 pub slice_shrink_max: u64,
105 #[stat(desc = "Slice shrink proportional")]
106 pub slice_shrink_proportional: u64,
107 #[stat(desc = "Slice shrink at min")]
108 pub slice_shrink_min: u64,
109 #[stat(desc = "Decision share % of global")]
110 pub share_of_decisions_pct: f64,
111 #[stat(desc = "Cell scheduling decisions")]
112 total_decisions: u64,
113 #[stat(desc = "CPU utilization %")]
114 pub util_pct: f64,
115 #[stat(desc = "Borrowed CPU time % of running")]
116 pub demand_borrow_pct: f64,
117 #[stat(desc = "Lent CPU time %")]
118 pub lent_pct: f64,
119 #[stat(desc = "Number of rebalancing events")]
120 pub rebalance_count: u64,
121 #[stat(desc = "Per-cell metrics")] pub cells: BTreeMap<u32, CellMetrics>,
123}
124
125impl Metrics {
126 pub fn update(&mut self, ds: &DistributionStats) {
127 self.local_q_pct = ds.local_q_pct;
128 self.cpu_q_pct = ds.cpu_q_pct;
129 self.cell_q_pct = ds.cell_q_pct;
130 self.borrowed_pct = ds.borrowed_pct;
131 self.affn_violations_pct = ds.affn_viol_pct;
132 self.steal_pct = ds.steal_pct;
133 self.pin_skip_pct = ds.pin_skip_pct;
134 self.share_of_decisions_pct = ds.share_of_decisions_pct;
135 self.total_decisions = ds.total_decisions;
136 }
137
138 pub fn update_demand(&mut self, util_pct: f64, demand_borrow_pct: f64, lent_pct: f64) {
139 self.util_pct = util_pct;
140 self.demand_borrow_pct = demand_borrow_pct;
141 self.lent_pct = lent_pct;
142 }
143
144 fn delta(&self, _: &Self) -> Self {
145 Self { ..self.clone() }
146 }
147
148 fn format<W: Write>(&self, w: &mut W) -> Result<()> {
149 writeln!(w, "{}", serde_json::to_string_pretty(self)?)?;
150 Ok(())
151 }
152}
153
154pub fn server_data() -> StatsServerData<(), Metrics> {
155 let open: Box<dyn StatsOpener<(), Metrics>> = Box::new(move |(req_ch, res_ch)| {
156 req_ch.send(())?;
157 let mut prev = res_ch.recv()?;
158
159 let read: Box<dyn StatsReader<(), Metrics>> = Box::new(move |_args, (req_ch, res_ch)| {
160 req_ch.send(())?;
161 let cur = res_ch.recv()?;
162 let delta = cur.delta(&prev);
163 prev = cur;
164 delta.to_json()
165 });
166
167 Ok(read)
168 });
169
170 StatsServerData::new()
171 .add_meta(Metrics::meta())
172 .add_meta(CellMetrics::meta())
173 .add_ops("top", StatsOps { open, close: None })
174}
175
176pub fn monitor(intv: Duration, shutdown: Arc<AtomicBool>) -> Result<()> {
177 scx_utils::monitor_stats::<Metrics>(
178 &[],
179 intv,
180 || shutdown.load(Ordering::Relaxed),
181 |metrics| metrics.format(&mut std::io::stdout()),
182 )
183}