1use std::collections::BTreeMap;
2use std::io::Write;
3use std::sync::atomic::AtomicBool;
4use std::sync::atomic::Ordering;
5use std::sync::Arc;
6use std::thread::current;
7use std::thread::ThreadId;
8use std::time::Duration;
9use std::time::SystemTime;
10use std::time::UNIX_EPOCH;
11
12use anyhow::bail;
13use anyhow::Result;
14use chrono::DateTime;
15use chrono::Local;
16use log::warn;
17use scx_stats::prelude::*;
18use scx_stats_derive::stat_doc;
19use scx_stats_derive::Stats;
20use scx_utils::Cpumask;
21use serde::Deserialize;
22use serde::Serialize;
23
24use crate::bpf_intf;
25use crate::BpfStats;
26use crate::Layer;
27use crate::Stats;
28use crate::LAYER_USAGE_OPEN;
29use crate::LAYER_USAGE_PROTECTED;
30use crate::LAYER_USAGE_PROTECTED_PREEMPT;
31use crate::LAYER_USAGE_SUM_UPTO;
32
33const GSTAT_EXCL_IDLE: usize = bpf_intf::global_stat_id_GSTAT_EXCL_IDLE as usize;
34const GSTAT_EXCL_WAKEUP: usize = bpf_intf::global_stat_id_GSTAT_EXCL_WAKEUP as usize;
35const GSTAT_HI_FB_EVENTS: usize = bpf_intf::global_stat_id_GSTAT_HI_FB_EVENTS as usize;
36const GSTAT_HI_FB_USAGE: usize = bpf_intf::global_stat_id_GSTAT_HI_FB_USAGE as usize;
37const GSTAT_LO_FB_EVENTS: usize = bpf_intf::global_stat_id_GSTAT_LO_FB_EVENTS as usize;
38const GSTAT_LO_FB_USAGE: usize = bpf_intf::global_stat_id_GSTAT_LO_FB_USAGE as usize;
39const GSTAT_FB_CPU_USAGE: usize = bpf_intf::global_stat_id_GSTAT_FB_CPU_USAGE as usize;
40const GSTAT_ANTISTALL: usize = bpf_intf::global_stat_id_GSTAT_ANTISTALL as usize;
41const GSTAT_SKIP_PREEMPT: usize = bpf_intf::global_stat_id_GSTAT_SKIP_PREEMPT as usize;
42const GSTAT_FIXUP_VTIME: usize = bpf_intf::global_stat_id_GSTAT_FIXUP_VTIME as usize;
43const GSTAT_PREEMPTING_MISMATCH: usize =
44 bpf_intf::global_stat_id_GSTAT_PREEMPTING_MISMATCH as usize;
45
46const LSTAT_SEL_LOCAL: usize = bpf_intf::layer_stat_id_LSTAT_SEL_LOCAL as usize;
47const LSTAT_ENQ_LOCAL: usize = bpf_intf::layer_stat_id_LSTAT_ENQ_LOCAL as usize;
48const LSTAT_ENQ_WAKEUP: usize = bpf_intf::layer_stat_id_LSTAT_ENQ_WAKEUP as usize;
49const LSTAT_ENQ_EXPIRE: usize = bpf_intf::layer_stat_id_LSTAT_ENQ_EXPIRE as usize;
50const LSTAT_ENQ_REENQ: usize = bpf_intf::layer_stat_id_LSTAT_ENQ_REENQ as usize;
51const LSTAT_ENQ_DSQ: usize = bpf_intf::layer_stat_id_LSTAT_ENQ_DSQ as usize;
52const LSTAT_MIN_EXEC: usize = bpf_intf::layer_stat_id_LSTAT_MIN_EXEC as usize;
53const LSTAT_MIN_EXEC_NS: usize = bpf_intf::layer_stat_id_LSTAT_MIN_EXEC_NS as usize;
54const LSTAT_OPEN_IDLE: usize = bpf_intf::layer_stat_id_LSTAT_OPEN_IDLE as usize;
55const LSTAT_AFFN_VIOL: usize = bpf_intf::layer_stat_id_LSTAT_AFFN_VIOL as usize;
56const LSTAT_KEEP: usize = bpf_intf::layer_stat_id_LSTAT_KEEP as usize;
57const LSTAT_KEEP_FAIL_MAX_EXEC: usize = bpf_intf::layer_stat_id_LSTAT_KEEP_FAIL_MAX_EXEC as usize;
58const LSTAT_KEEP_FAIL_BUSY: usize = bpf_intf::layer_stat_id_LSTAT_KEEP_FAIL_BUSY as usize;
59const LSTAT_PREEMPT: usize = bpf_intf::layer_stat_id_LSTAT_PREEMPT as usize;
60const LSTAT_PREEMPT_FIRST: usize = bpf_intf::layer_stat_id_LSTAT_PREEMPT_FIRST as usize;
61const LSTAT_PREEMPT_XLLC: usize = bpf_intf::layer_stat_id_LSTAT_PREEMPT_XLLC as usize;
62const LSTAT_PREEMPT_XNUMA: usize = bpf_intf::layer_stat_id_LSTAT_PREEMPT_XNUMA as usize;
63const LSTAT_PREEMPT_IDLE: usize = bpf_intf::layer_stat_id_LSTAT_PREEMPT_IDLE as usize;
64const LSTAT_PREEMPT_FAIL: usize = bpf_intf::layer_stat_id_LSTAT_PREEMPT_FAIL as usize;
65const LSTAT_EXCL_COLLISION: usize = bpf_intf::layer_stat_id_LSTAT_EXCL_COLLISION as usize;
66const LSTAT_EXCL_PREEMPT: usize = bpf_intf::layer_stat_id_LSTAT_EXCL_PREEMPT as usize;
67const LSTAT_YIELD: usize = bpf_intf::layer_stat_id_LSTAT_YIELD as usize;
68const LSTAT_YIELD_IGNORE: usize = bpf_intf::layer_stat_id_LSTAT_YIELD_IGNORE as usize;
69const LSTAT_MIGRATION: usize = bpf_intf::layer_stat_id_LSTAT_MIGRATION as usize;
70const LSTAT_XNUMA_MIGRATION: usize = bpf_intf::layer_stat_id_LSTAT_XNUMA_MIGRATION as usize;
71const LSTAT_XLLC_MIGRATION: usize = bpf_intf::layer_stat_id_LSTAT_XLLC_MIGRATION as usize;
72const LSTAT_XLLC_MIGRATION_SKIP: usize = bpf_intf::layer_stat_id_LSTAT_XLLC_MIGRATION_SKIP as usize;
73const LSTAT_XLAYER_WAKE: usize = bpf_intf::layer_stat_id_LSTAT_XLAYER_WAKE as usize;
74const LSTAT_XLAYER_REWAKE: usize = bpf_intf::layer_stat_id_LSTAT_XLAYER_REWAKE as usize;
75const LSTAT_LLC_DRAIN_TRY: usize = bpf_intf::layer_stat_id_LSTAT_LLC_DRAIN_TRY as usize;
76const LSTAT_LLC_DRAIN: usize = bpf_intf::layer_stat_id_LSTAT_LLC_DRAIN as usize;
77const LSTAT_SKIP_REMOTE_NODE: usize = bpf_intf::layer_stat_id_LSTAT_SKIP_REMOTE_NODE as usize;
78
79const LLC_LSTAT_LAT: usize = bpf_intf::llc_layer_stat_id_LLC_LSTAT_LAT as usize;
80const LLC_LSTAT_CNT: usize = bpf_intf::llc_layer_stat_id_LLC_LSTAT_CNT as usize;
81
82fn calc_frac(a: f64, b: f64) -> f64 {
83 if b != 0.0 {
84 a / b * 100.0
85 } else {
86 0.0
87 }
88}
89
90fn fmt_pct(v: f64) -> String {
91 if v >= 99.995 {
92 format!("{:5.1}", v)
93 } else if v > 0.0 && v < 0.01 {
94 format!("{:5.2}", 0.01)
95 } else {
96 format!("{:5.2}", v)
97 }
98}
99
100fn fmt_num(v: u64) -> String {
101 if v > 1_000_000 {
102 format!("{:5.1}m", v as f64 / 1_000_000.0)
103 } else if v > 1_000 {
104 format!("{:5.1}k", v as f64 / 1_000.0)
105 } else {
106 format!("{:5.0} ", v)
107 }
108}
109
110#[stat_doc]
111#[derive(Clone, Debug, Default, Serialize, Deserialize, Stats)]
112#[stat(_om_prefix = "l_", _om_label = "layer_name")]
113pub struct LayerStats {
114 #[stat(desc = "index", _om_skip)]
115 pub index: usize,
116 #[stat(desc = "Total CPU utilization (100% means one full CPU)")]
117 pub util: f64,
118 #[stat(desc = "Protected CPU utilization %")]
119 pub util_protected_frac: f64,
120 #[stat(desc = "Preempt-protected CPU utilization %")]
121 pub util_protected_preempt_frac: f64,
122 #[stat(desc = "Open CPU utilization %")]
123 pub util_open_frac: f64,
124 #[stat(desc = "fraction of total CPU utilization")]
125 pub util_frac: f64,
126 #[stat(desc = "number of tasks")]
127 pub tasks: u32,
128 #[stat(desc = "count of sched events during the period")]
129 pub total: u64,
130 #[stat(desc = "% dispatched into idle CPU from select_cpu")]
131 pub sel_local: f64,
132 #[stat(desc = "% dispatched into idle CPU from enqueue")]
133 pub enq_local: f64,
134 #[stat(desc = "% enqueued after wakeup")]
135 pub enq_wakeup: f64,
136 #[stat(desc = "% enqueued after slice expiration")]
137 pub enq_expire: f64,
138 #[stat(desc = "% re-enqueued due to RT preemption")]
139 pub enq_reenq: f64,
140 #[stat(desc = "% enqueued into the layer's LLC DSQs")]
141 pub enq_dsq: f64,
142 #[stat(desc = "count of times exec duration < min_exec_us")]
143 pub min_exec: f64,
144 #[stat(desc = "total exec durations extended due to min_exec_us")]
145 pub min_exec_us: u64,
146 #[stat(desc = "% dispatched into idle CPUs occupied by other layers")]
147 pub open_idle: f64,
148 #[stat(desc = "% preempted other tasks")]
149 pub preempt: f64,
150 #[stat(desc = "% preempted XLLC tasks")]
151 pub preempt_xllc: f64,
152 #[stat(desc = "% preempted XNUMA tasks")]
153 pub preempt_xnuma: f64,
154 #[stat(desc = "% first-preempted other tasks")]
155 pub preempt_first: f64,
156 #[stat(desc = "% idle-preempted other tasks")]
157 pub preempt_idle: f64,
158 #[stat(desc = "% attempted to preempt other tasks but failed")]
159 pub preempt_fail: f64,
160 #[stat(desc = "% violated config due to CPU affinity")]
161 pub affn_viol: f64,
162 #[stat(desc = "% continued executing after slice expiration")]
163 pub keep: f64,
164 #[stat(desc = "% disallowed to continue executing due to max_exec")]
165 pub keep_fail_max_exec: f64,
166 #[stat(desc = "% disallowed to continue executing due to other tasks")]
167 pub keep_fail_busy: f64,
168 #[stat(desc = "whether is exclusive", _om_skip)]
169 pub is_excl: u32,
170 #[stat(desc = "count of times an excl task skipped a CPU as the sibling was also excl")]
171 pub excl_collision: f64,
172 #[stat(desc = "% a sibling CPU was preempted for an exclusive task")]
173 pub excl_preempt: f64,
174 #[stat(desc = "% yielded")]
175 pub yielded: f64,
176 #[stat(desc = "count of times yield was ignored")]
177 pub yield_ignore: u64,
178 #[stat(desc = "% migrated across CPUs")]
179 pub migration: f64,
180 #[stat(desc = "% migrated across NUMA nodes")]
181 pub xnuma_migration: f64,
182 #[stat(desc = "% migrated across LLCs")]
183 pub xllc_migration: f64,
184 #[stat(desc = "% migration skipped across LLCs due to xllc_mig_min_us")]
185 pub xllc_migration_skip: f64,
186 #[stat(desc = "% wakers across layers")]
187 pub xlayer_wake: f64,
188 #[stat(desc = "% rewakers across layers where waker has waken the task previously")]
189 pub xlayer_rewake: f64,
190 #[stat(desc = "% LLC draining tried")]
191 pub llc_drain_try: f64,
192 #[stat(desc = "% LLC draining succeeded")]
193 pub llc_drain: f64,
194 #[stat(desc = "% skip LLC dispatch on remote node")]
195 pub skip_remote_node: f64,
196 #[stat(desc = "mask of allocated CPUs", _om_skip)]
197 pub cpus: Vec<u64>,
198 #[stat(desc = "count of CPUs assigned")]
199 pub cur_nr_cpus: u32,
200 #[stat(desc = "minimum # of CPUs assigned")]
201 pub min_nr_cpus: u32,
202 #[stat(desc = "maximum # of CPUs assigned")]
203 pub max_nr_cpus: u32,
204 #[stat(desc = "count of CPUs assigned per LLC")]
205 pub nr_llc_cpus: Vec<u32>,
206 #[stat(desc = "slice duration config")]
207 pub slice_us: u64,
208 #[stat(desc = "Per-LLC scheduling event fractions")]
209 pub llc_fracs: Vec<f64>,
210 #[stat(desc = "Per-LLC average latency")]
211 pub llc_lats: Vec<f64>,
212}
213
214impl LayerStats {
215 pub fn new(
216 lidx: usize,
217 layer: &Layer,
218 stats: &Stats,
219 bstats: &BpfStats,
220 nr_cpus_range: (usize, usize),
221 ) -> Self {
222 let lstat = |sidx| bstats.lstats[lidx][sidx];
223 let ltotal = lstat(LSTAT_SEL_LOCAL)
224 + lstat(LSTAT_ENQ_LOCAL)
225 + lstat(LSTAT_ENQ_WAKEUP)
226 + lstat(LSTAT_ENQ_EXPIRE)
227 + lstat(LSTAT_ENQ_REENQ)
228 + lstat(LSTAT_KEEP);
229 let lstat_pct = |sidx| {
230 if ltotal != 0 {
231 lstat(sidx) as f64 / ltotal as f64 * 100.0
232 } else {
233 0.0
234 }
235 };
236
237 let util_sum = stats.layer_utils[lidx]
238 .iter()
239 .take(LAYER_USAGE_SUM_UPTO + 1)
240 .sum::<f64>();
241
242 Self {
243 index: lidx,
244 util: util_sum * 100.0,
245 util_open_frac: calc_frac(stats.layer_utils[lidx][LAYER_USAGE_OPEN], util_sum),
246 util_protected_frac: calc_frac(
247 stats.layer_utils[lidx][LAYER_USAGE_PROTECTED],
248 util_sum,
249 ),
250 util_protected_preempt_frac: calc_frac(
251 stats.layer_utils[lidx][LAYER_USAGE_PROTECTED_PREEMPT],
252 util_sum,
253 ),
254 util_frac: calc_frac(util_sum, stats.total_util),
255 tasks: stats.nr_layer_tasks[lidx] as u32,
256 total: ltotal,
257 sel_local: lstat_pct(LSTAT_SEL_LOCAL),
258 enq_local: lstat_pct(LSTAT_ENQ_LOCAL),
259 enq_wakeup: lstat_pct(LSTAT_ENQ_WAKEUP),
260 enq_expire: lstat_pct(LSTAT_ENQ_EXPIRE),
261 enq_reenq: lstat_pct(LSTAT_ENQ_REENQ),
262 enq_dsq: lstat_pct(LSTAT_ENQ_DSQ),
263 min_exec: lstat_pct(LSTAT_MIN_EXEC),
264 min_exec_us: (lstat(LSTAT_MIN_EXEC_NS) / 1000) as u64,
265 open_idle: lstat_pct(LSTAT_OPEN_IDLE),
266 preempt: lstat_pct(LSTAT_PREEMPT),
267 preempt_xllc: lstat_pct(LSTAT_PREEMPT_XLLC),
268 preempt_xnuma: lstat_pct(LSTAT_PREEMPT_XNUMA),
269 preempt_first: lstat_pct(LSTAT_PREEMPT_FIRST),
270 preempt_idle: lstat_pct(LSTAT_PREEMPT_IDLE),
271 preempt_fail: lstat_pct(LSTAT_PREEMPT_FAIL),
272 affn_viol: lstat_pct(LSTAT_AFFN_VIOL),
273 keep: lstat_pct(LSTAT_KEEP),
274 keep_fail_max_exec: lstat_pct(LSTAT_KEEP_FAIL_MAX_EXEC),
275 keep_fail_busy: lstat_pct(LSTAT_KEEP_FAIL_BUSY),
276 is_excl: layer.kind.common().exclusive as u32,
277 excl_collision: lstat_pct(LSTAT_EXCL_COLLISION),
278 excl_preempt: lstat_pct(LSTAT_EXCL_PREEMPT),
279 yielded: lstat_pct(LSTAT_YIELD),
280 yield_ignore: lstat(LSTAT_YIELD_IGNORE) as u64,
281 migration: lstat_pct(LSTAT_MIGRATION),
282 xnuma_migration: lstat_pct(LSTAT_XNUMA_MIGRATION),
283 xlayer_wake: lstat_pct(LSTAT_XLAYER_WAKE),
284 xlayer_rewake: lstat_pct(LSTAT_XLAYER_REWAKE),
285 xllc_migration: lstat_pct(LSTAT_XLLC_MIGRATION),
286 xllc_migration_skip: lstat_pct(LSTAT_XLLC_MIGRATION_SKIP),
287 llc_drain_try: lstat_pct(LSTAT_LLC_DRAIN_TRY),
288 llc_drain: lstat_pct(LSTAT_LLC_DRAIN),
289 skip_remote_node: lstat_pct(LSTAT_SKIP_REMOTE_NODE),
290 cpus: layer.cpus.as_raw_slice().to_vec(),
291 cur_nr_cpus: layer.cpus.weight() as u32,
292 min_nr_cpus: nr_cpus_range.0 as u32,
293 max_nr_cpus: nr_cpus_range.1 as u32,
294 nr_llc_cpus: layer.nr_llc_cpus.iter().map(|&v| v as u32).collect(),
295 slice_us: stats.layer_slice_us[lidx],
296 llc_fracs: {
297 let sid = LLC_LSTAT_CNT;
298 let sum = bstats.llc_lstats[lidx]
299 .iter()
300 .map(|lstats| lstats[sid])
301 .sum::<u64>() as f64;
302 bstats.llc_lstats[lidx]
303 .iter()
304 .map(|lstats| calc_frac(lstats[sid] as f64, sum))
305 .collect()
306 },
307 llc_lats: bstats.llc_lstats[lidx]
308 .iter()
309 .map(|lstats| lstats[LLC_LSTAT_LAT] as f64 / 1_000_000_000.0)
310 .collect(),
311 }
312 }
313
314 pub fn format<W: Write>(&self, w: &mut W, name: &str, header_width: usize) -> Result<()> {
315 writeln!(
316 w,
317 " {:<width$}: util/open/frac={:6.1}/{}/{:7.1} prot/prot_preempt={}/{} tasks={:6}",
318 name,
319 self.util,
320 fmt_pct(self.util_open_frac),
321 self.util_frac,
322 fmt_pct(self.util_protected_frac),
323 fmt_pct(self.util_protected_preempt_frac),
324 self.tasks,
325 width = header_width,
326 )?;
327
328 writeln!(
329 w,
330 " {:<width$} tot={:7} local_sel/enq={}/{} enq_dsq={} wake/exp/reenq={}/{}/{}",
331 "",
332 self.total,
333 fmt_pct(self.sel_local),
334 fmt_pct(self.enq_local),
335 fmt_pct(self.enq_dsq),
336 fmt_pct(self.enq_wakeup),
337 fmt_pct(self.enq_expire),
338 fmt_pct(self.enq_reenq),
339 width = header_width,
340 )?;
341
342 writeln!(
343 w,
344 " {:<width$} keep/max/busy={}/{}/{} yield/ign={}/{}",
345 "",
346 fmt_pct(self.keep),
347 fmt_pct(self.keep_fail_max_exec),
348 fmt_pct(self.keep_fail_busy),
349 fmt_pct(self.yielded),
350 fmt_num(self.yield_ignore),
351 width = header_width,
352 )?;
353
354 writeln!(
355 w,
356 " {:<width$} open_idle={} mig={} xnuma_mig={} xllc_mig/skip={}/{} affn_viol={}",
357 "",
358 fmt_pct(self.open_idle),
359 fmt_pct(self.migration),
360 fmt_pct(self.xnuma_migration),
361 fmt_pct(self.xllc_migration),
362 fmt_pct(self.xllc_migration_skip),
363 fmt_pct(self.affn_viol),
364 width = header_width,
365 )?;
366
367 writeln!(
368 w,
369 " {:<width$} preempt/first/xllc/xnuma/idle/fail={}/{}/{}/{}/{}/{}",
370 "",
371 fmt_pct(self.preempt),
372 fmt_pct(self.preempt_first),
373 fmt_pct(self.preempt_xllc),
374 fmt_pct(self.preempt_xnuma),
375 fmt_pct(self.preempt_idle),
376 fmt_pct(self.preempt_fail),
377 width = header_width,
378 )?;
379
380 writeln!(
381 w,
382 " {:<width$} xlayer_wake/re={}/{} llc_drain/try={}/{} skip_rnode={}",
383 "",
384 fmt_pct(self.xlayer_wake),
385 fmt_pct(self.xlayer_rewake),
386 fmt_pct(self.llc_drain),
387 fmt_pct(self.llc_drain_try),
388 fmt_pct(self.skip_remote_node),
389 width = header_width,
390 )?;
391
392 writeln!(
393 w,
394 " {:<width$} slice={}ms min_exec={}/{:7.2}ms",
395 "",
396 self.slice_us as f64 / 1000.0,
397 fmt_pct(self.min_exec),
398 self.min_exec_us as f64 / 1000.0,
399 width = header_width
400 )?;
401
402 let cpumask = Cpumask::from_vec(self.cpus.clone());
403
404 writeln!(
405 w,
406 " {:<width$} cpus={:3} [{:3},{:3}] {}",
407 "",
408 self.cur_nr_cpus,
409 self.min_nr_cpus,
410 self.max_nr_cpus,
411 &cpumask,
412 width = header_width
413 )?;
414
415 write!(
416 w,
417 " {:<width$} [LLC] nr_cpus: sched% lat_ms",
418 "",
419 width = header_width
420 )?;
421
422 for (i, (&frac, &lat)) in self.llc_fracs.iter().zip(self.llc_lats.iter()).enumerate() {
423 if (i % 4) == 0 {
424 writeln!(w, "")?;
425 write!(w, " {:<width$} [{:03}]", "", i, width = header_width)?;
426 } else {
427 write!(w, " |")?;
428 }
429 write!(
430 w,
431 " {:2}:{}%{:7.2}",
432 self.nr_llc_cpus[i],
433 fmt_pct(frac),
434 lat * 1_000.0
435 )?;
436 }
437 writeln!(w, "")?;
438
439 if self.is_excl != 0 {
440 writeln!(
441 w,
442 " {:<width$} excl_coll={} excl_preempt={}",
443 "",
444 fmt_pct(self.excl_collision),
445 fmt_pct(self.excl_preempt),
446 width = header_width,
447 )?;
448 } else if self.excl_collision != 0.0 || self.excl_preempt != 0.0 {
449 warn!(
450 "{}: exclusive is off but excl_coll={} excl_preempt={}",
451 name,
452 fmt_pct(self.excl_collision),
453 fmt_pct(self.excl_preempt),
454 );
455 }
456
457 Ok(())
458 }
459}
460
461#[stat_doc]
462#[derive(Clone, Debug, Default, Serialize, Deserialize, Stats)]
463#[stat(top)]
464pub struct SysStats {
465 #[stat(desc = "timestamp", _om_skip)]
466 pub at: f64,
467 #[stat(desc = "# of NUMA nodes")]
468 pub nr_nodes: usize,
469 #[stat(desc = "# sched events during the period")]
470 pub total: u64,
471 #[stat(desc = "% dispatched directly into an idle CPU from select_cpu")]
472 pub local_sel: f64,
473 #[stat(desc = "% dispatched directly into an idle CPU from enqueue")]
474 pub local_enq: f64,
475 #[stat(desc = "% open layer tasks scheduled into allocated but idle CPUs")]
476 pub open_idle: f64,
477 #[stat(desc = "% violated config due to CPU affinity")]
478 pub affn_viol: f64,
479 #[stat(desc = "% sent to hi fallback DSQs")]
480 pub hi_fb: f64,
481 #[stat(desc = "% sent to lo fallback DSQs")]
482 pub lo_fb: f64,
483 #[stat(desc = "count of times an excl task skipped a CPU as the sibling was also excl")]
484 pub excl_collision: f64,
485 #[stat(desc = "count of times a sibling CPU was preempted for an excl task")]
486 pub excl_preempt: f64,
487 #[stat(desc = "count of times a CPU skipped dispatching due to an excl task on the sibling")]
488 pub excl_idle: f64,
489 #[stat(
490 desc = "count of times an idle sibling CPU was woken up after an excl task is finished"
491 )]
492 pub excl_wakeup: f64,
493 #[stat(desc = "CPU time this binary consumed during the period")]
494 pub proc_ms: u64,
495 #[stat(desc = "CPU busy % (100% means all CPU)")]
496 pub busy: f64,
497 #[stat(desc = "CPU util % (100% means one CPU)")]
498 pub util: f64,
499 #[stat(desc = "CPU util % used by hi fallback DSQs")]
500 pub hi_fb_util: f64,
501 #[stat(desc = "CPU util % used by lo fallback DSQs")]
502 pub lo_fb_util: f64,
503 #[stat(desc = "Number of tasks dispatched via antistall")]
504 pub antistall: u64,
505 #[stat(desc = "Number of times preemptions of non-scx tasks were avoided")]
506 pub skip_preempt: u64,
507 #[stat(desc = "Number of times vtime was out of range and fixed up")]
508 pub fixup_vtime: u64,
509 #[stat(desc = "Number of times cpuc->preempting_task didn't come on the CPU")]
510 pub preempting_mismatch: u64,
511 #[stat(desc = "fallback CPU")]
512 pub fallback_cpu: u32,
513 #[stat(desc = "per-layer statistics")]
514 pub fallback_cpu_util: f64,
515 #[stat(desc = "fallback CPU util %")]
516 pub layers: BTreeMap<String, LayerStats>,
517}
518
519impl SysStats {
520 pub fn new(stats: &Stats, bstats: &BpfStats, fallback_cpu: usize) -> Result<Self> {
521 let lsum = |idx| stats.bpf_stats.lstats_sums[idx];
522 let total = lsum(LSTAT_SEL_LOCAL)
523 + lsum(LSTAT_ENQ_LOCAL)
524 + lsum(LSTAT_ENQ_WAKEUP)
525 + lsum(LSTAT_ENQ_EXPIRE)
526 + lsum(LSTAT_ENQ_REENQ)
527 + lsum(LSTAT_KEEP);
528 let lsum_pct = |idx| {
529 if total != 0 {
530 lsum(idx) as f64 / total as f64 * 100.0
531 } else {
532 0.0
533 }
534 };
535
536 let elapsed_ns = stats.elapsed.as_nanos();
537
538 Ok(Self {
539 at: SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs_f64(),
540 nr_nodes: stats.nr_nodes,
541 total,
542 local_sel: lsum_pct(LSTAT_SEL_LOCAL),
543 local_enq: lsum_pct(LSTAT_ENQ_LOCAL),
544 open_idle: lsum_pct(LSTAT_OPEN_IDLE),
545 affn_viol: lsum_pct(LSTAT_AFFN_VIOL),
546 hi_fb: calc_frac(
547 stats.bpf_stats.gstats[GSTAT_HI_FB_EVENTS] as f64,
548 total as f64,
549 ),
550 lo_fb: calc_frac(
551 stats.bpf_stats.gstats[GSTAT_LO_FB_EVENTS] as f64,
552 total as f64,
553 ),
554 excl_collision: lsum_pct(LSTAT_EXCL_COLLISION),
555 excl_preempt: lsum_pct(LSTAT_EXCL_PREEMPT),
556 excl_idle: bstats.gstats[GSTAT_EXCL_IDLE] as f64 / total as f64,
557 excl_wakeup: bstats.gstats[GSTAT_EXCL_WAKEUP] as f64 / total as f64,
558 proc_ms: stats.processing_dur.as_millis() as u64,
559 busy: stats.cpu_busy * 100.0,
560 util: stats.total_util * 100.0,
561 hi_fb_util: stats.bpf_stats.gstats[GSTAT_HI_FB_USAGE] as f64 / elapsed_ns as f64
562 * 100.0,
563 lo_fb_util: stats.bpf_stats.gstats[GSTAT_LO_FB_USAGE] as f64 / elapsed_ns as f64
564 * 100.0,
565 antistall: stats.bpf_stats.gstats[GSTAT_ANTISTALL],
566 skip_preempt: stats.bpf_stats.gstats[GSTAT_SKIP_PREEMPT],
567 fixup_vtime: stats.bpf_stats.gstats[GSTAT_FIXUP_VTIME],
568 preempting_mismatch: stats.bpf_stats.gstats[GSTAT_PREEMPTING_MISMATCH],
569 fallback_cpu: fallback_cpu as u32,
570 fallback_cpu_util: stats.bpf_stats.gstats[GSTAT_FB_CPU_USAGE] as f64
571 / elapsed_ns as f64
572 * 100.0,
573 layers: BTreeMap::new(),
574 })
575 }
576
577 pub fn format<W: Write>(&self, w: &mut W) -> Result<()> {
578 writeln!(
579 w,
580 "tot={:7} local_sel/enq={}/{} open_idle={} affn_viol={} hi/lo={}/{}",
581 self.total,
582 fmt_pct(self.local_sel),
583 fmt_pct(self.local_enq),
584 fmt_pct(self.open_idle),
585 fmt_pct(self.affn_viol),
586 fmt_pct(self.hi_fb),
587 fmt_pct(self.lo_fb),
588 )?;
589
590 writeln!(
591 w,
592 "busy={:5.1} util/hi/lo={:7.1}/{}/{} fallback_cpu/util={:3}/{:4.1} proc={:?}ms",
593 self.busy,
594 self.util,
595 fmt_pct(self.hi_fb_util),
596 fmt_pct(self.lo_fb_util),
597 self.fallback_cpu,
598 self.fallback_cpu_util,
599 self.proc_ms,
600 )?;
601
602 writeln!(
603 w,
604 "excl_coll={:.2} excl_preempt={:.2} excl_idle={:.2} excl_wakeup={:.2}",
605 self.excl_collision, self.excl_preempt, self.excl_idle, self.excl_wakeup
606 )?;
607
608 writeln!(
609 w,
610 "skip_preempt={} antistall={} fixup_vtime={} preempting_mismatch={}",
611 self.skip_preempt, self.antistall, self.fixup_vtime, self.preempting_mismatch
612 )?;
613
614 Ok(())
615 }
616
617 pub fn format_all<W: Write>(&self, w: &mut W) -> Result<()> {
618 self.format(w)?;
619
620 let header_width = self
621 .layers
622 .keys()
623 .map(|name| name.len())
624 .max()
625 .unwrap_or(0)
626 .max(4);
627
628 let mut idx_to_name: Vec<(usize, &String)> =
629 self.layers.iter().map(|(k, v)| (v.index, k)).collect();
630
631 idx_to_name.sort();
632
633 for (_idx, name) in &idx_to_name {
634 self.layers[*name].format(w, name, header_width)?;
635 }
636
637 Ok(())
638 }
639}
640
641#[derive(Debug)]
642pub enum StatsReq {
643 Hello(ThreadId),
644 Refresh(ThreadId, Stats),
645 Bye(ThreadId),
646}
647
648#[derive(Debug)]
649pub enum StatsRes {
650 Hello(Stats),
651 Refreshed((Stats, SysStats)),
652 Bye,
653}
654
655pub fn server_data() -> StatsServerData<StatsReq, StatsRes> {
656 let open: Box<dyn StatsOpener<StatsReq, StatsRes>> = Box::new(move |(req_ch, res_ch)| {
657 let tid = current().id();
658 req_ch.send(StatsReq::Hello(tid))?;
659 let mut stats = Some(match res_ch.recv()? {
660 StatsRes::Hello(v) => v,
661 res => bail!("invalid response to Hello: {:?}", res),
662 });
663
664 let read: Box<dyn StatsReader<StatsReq, StatsRes>> =
665 Box::new(move |_args, (req_ch, res_ch)| {
666 req_ch.send(StatsReq::Refresh(tid, stats.take().unwrap()))?;
667 let (new_stats, sys_stats) = match res_ch.recv()? {
668 StatsRes::Refreshed(v) => v,
669 res => bail!("invalid response to Refresh: {:?}", res),
670 };
671 stats = Some(new_stats);
672 sys_stats.to_json()
673 });
674
675 Ok(read)
676 });
677
678 let close: Box<dyn StatsCloser<StatsReq, StatsRes>> = Box::new(move |(req_ch, res_ch)| {
679 req_ch.send(StatsReq::Bye(current().id())).unwrap();
680 match res_ch.recv().unwrap() {
681 StatsRes::Bye => {}
682 res => panic!("invalid response to Bye: {:?}", res),
683 }
684 });
685
686 StatsServerData::new()
687 .add_meta(LayerStats::meta())
688 .add_meta(SysStats::meta())
689 .add_ops(
690 "top",
691 StatsOps {
692 open,
693 close: Some(close),
694 },
695 )
696}
697
698pub fn monitor(intv: Duration, shutdown: Arc<AtomicBool>) -> Result<()> {
699 scx_utils::monitor_stats::<SysStats>(
700 &vec![],
701 intv,
702 || shutdown.load(Ordering::Relaxed),
703 |sst| {
704 let dt = DateTime::<Local>::from(UNIX_EPOCH + Duration::from_secs_f64(sst.at));
705 println!("###### {} ######", dt.to_rfc2822());
706 sst.format_all(&mut std::io::stdout())
707 },
708 )
709}