1use std::collections::BTreeMap;
2use std::io::Write;
3use std::sync::atomic::AtomicBool;
4use std::sync::atomic::Ordering;
5use std::sync::Arc;
6use std::thread::ThreadId;
7use std::time::Duration;
8
9use anyhow::bail;
10use anyhow::{Context, Result};
11use gpoint::GPoint;
12use scx_stats::prelude::*;
13use scx_stats_derive::stat_doc;
14use scx_stats_derive::Stats;
15use serde::Deserialize;
16use serde::Serialize;
17
18#[stat_doc]
19#[derive(Clone, Debug, Default, Serialize, Deserialize, Stats)]
20#[stat(top)]
21pub struct SysStats {
22 #[stat(desc = "Sequence ID of this message")]
23 pub mseq: u64,
24
25 #[stat(desc = "Number of runnable tasks in runqueues")]
26 pub nr_queued_task: u64,
27
28 #[stat(desc = "Number of active CPUs when core compaction is enabled")]
29 pub nr_active: u32,
30
31 #[stat(desc = "Number of context switches")]
32 pub nr_sched: u64,
33
34 #[stat(desc = "Number of task preemption triggered")]
35 pub nr_preempt: u64,
36
37 #[stat(desc = "% of performance-critical tasks")]
38 pub pc_pc: f64,
39
40 #[stat(desc = "% of latency-critical tasks")]
41 pub pc_lc: f64,
42
43 #[stat(desc = "% of cross domain task migration")]
44 pub pc_x_migration: f64,
45
46 #[stat(desc = "Number of stealee domains")]
47 pub nr_stealee: u32,
48
49 #[stat(desc = "% of tasks scheduled on big cores")]
50 pub pc_big: f64,
51
52 #[stat(desc = "% of performance-critical tasks scheduled on big cores")]
53 pub pc_pc_on_big: f64,
54
55 #[stat(desc = "% of latency-critical tasks scheduled on big cores")]
56 pub pc_lc_on_big: f64,
57
58 #[stat(desc = "Current power mode")]
59 pub power_mode: String,
60
61 #[stat(desc = "% of performance mode")]
62 pub pc_performance: f64,
63
64 #[stat(desc = "% of balanced mode")]
65 pub pc_balanced: f64,
66
67 #[stat(desc = "% of powersave mode")]
68 pub pc_powersave: f64,
69}
70
71impl SysStats {
72 pub fn format_header<W: Write>(w: &mut W) -> Result<()> {
73 writeln!(
74 w,
75 "\x1b[93m| {:8} | {:9} | {:9} | {:8} | {:9} | {:8} | {:8} | {:8} | {:8} | {:8} | {:8} | {:8} | {:11} | {:12} | {:12} | {:12} |\x1b[0m",
76 "MSEQ",
77 "# Q TASK",
78 "# ACT CPU",
79 "# SCHED",
80 "# PREEMPT",
81 "PERF-CR%",
82 "LAT-CR%",
83 "X-MIG%",
84 "# STLEE",
85 "BIG%",
86 "PC/BIG%",
87 "LC/BIG%",
88 "POWER MODE",
89 "PERFORMANCE%",
90 "BALANCED%",
91 "POWERSAVE%",
92 )?;
93 Ok(())
94 }
95
96 fn format<W: Write>(&self, w: &mut W) -> Result<()> {
97 if self.mseq % 10 == 1 {
98 Self::format_header(w)?;
99 }
100
101 let color = if self.mseq % 2 == 0 {
102 "\x1b[90m" } else {
104 "\x1b[37m" };
106
107 writeln!(
108 w,
109 "{color}| {:8} | {:9} | {:9} | {:8} | {:9} | {:8} | {:8} | {:8} | {:8} | {:8} | {:8} | {:8} | {:11} | {:12} | {:12} | {:12} |\x1b[0m",
110 self.mseq,
111 self.nr_queued_task,
112 self.nr_active,
113 self.nr_sched,
114 self.nr_preempt,
115 GPoint(self.pc_pc),
116 GPoint(self.pc_lc),
117 GPoint(self.pc_x_migration),
118 self.nr_stealee,
119 GPoint(self.pc_big),
120 GPoint(self.pc_pc_on_big),
121 GPoint(self.pc_lc_on_big),
122 self.power_mode,
123 GPoint(self.pc_performance),
124 GPoint(self.pc_balanced),
125 GPoint(self.pc_powersave),
126 )?;
127 Ok(())
128 }
129}
130
131#[stat_doc]
132#[derive(Clone, Debug, Default, Serialize, Deserialize, Stats)]
133#[stat(top, _om_prefix = "s_", _om_label = "sched_sample")]
134pub struct SchedSample {
135 #[stat(desc = "Sequence ID of this message")]
136 pub mseq: u64,
137 #[stat(desc = "Process ID")]
138 pub pid: i32,
139 #[stat(desc = "Task name")]
140 pub comm: String,
141 #[stat(
142 desc = "LR: 'L'atency-critical or 'R'egular, HI: performance-'H'ungry or performance-'I'nsensitive, BT: 'B'ig or li'T'tle, EG: 'E'ligible or 'G'reedy, PN: 'P'reempting or 'N'ot"
143 )]
144 pub stat: String,
145 #[stat(desc = "CPU ID where this task is scheduled on")]
146 pub cpu_id: u32,
147 #[stat(desc = "CPU ID where a task ran last time.")]
148 pub prev_cpu_id: u32,
149 #[stat(desc = "CPU ID suggested when a task is enqueued.")]
150 pub suggested_cpu_id: u32,
151 #[stat(desc = "Waker's process ID")]
152 pub waker_pid: i32,
153 #[stat(desc = "Waker's task name")]
154 pub waker_comm: String,
155 #[stat(desc = "Assigned time slice")]
156 pub slice_wall: u64,
157 #[stat(desc = "Amount of time actually used by task in a slice")]
158 pub slice_used_wall: u64,
159 #[stat(desc = "Latency criticality of this task")]
160 pub lat_cri: u32,
161 #[stat(desc = "Average latency criticality in a system")]
162 pub avg_lat_cri: u32,
163 #[stat(desc = "Static priority (20 == nice 0)")]
164 pub static_prio: u16,
165 #[stat(desc = "Time interval from the last quiescent time to this runnable time.")]
166 pub rerunnable_interval_wall: u64,
167 #[stat(desc = "Time interval from the last stopped time.")]
168 pub resched_interval_wall: u64,
169 #[stat(desc = "How often this task is scheduled per second")]
170 pub run_freq: u64,
171 #[stat(desc = "Average runtime per schedule")]
172 pub avg_runtime_wall: u64,
173 #[stat(desc = "How frequently this task waits for other tasks")]
174 pub wait_freq: u64,
175 #[stat(desc = "How frequently this task wakes other tasks")]
176 pub wake_freq: u64,
177 #[stat(desc = "Performance criticality of this task")]
178 pub perf_cri: u32,
179 #[stat(desc = "Performance criticality threshold")]
180 pub thr_perf_cri: u32,
181 #[stat(desc = "Target performance level of this CPU")]
182 pub cpuperf_cur: u32,
183 #[stat(desc = "CPU utilization of this CPU")]
184 pub cpu_util_wall: u64,
185 #[stat(desc = "Invariant CPU utilization of this CPU scaled by CPU capacity and frequency")]
186 pub cpu_util_invr: u64,
187 #[stat(desc = "Steal utilization of this CPU (IRQ + hypervisor steal + RT/DL)")]
188 pub steal_util_wall: u64,
189 #[stat(desc = "Invariant steal utilization of this CPU scaled by CPU capacity and frequency")]
190 pub steal_util_invr: u64,
191 #[stat(desc = "Utilization of this CPU by domain-pinned tasks")]
192 pub dom_pinned_util_wall: u64,
193 #[stat(
194 desc = "Invariant utilization of this CPU by domain-pinned tasks scaled by CPU capacity and frequency"
195 )]
196 pub dom_pinned_util_invr: u64,
197 #[stat(desc = "Number of active CPUs when core compaction is enabled")]
198 pub nr_active: u32,
199 #[stat(desc = "DSQ ID where this task was dispatched from")]
200 pub dsq_id: u64,
201 #[stat(desc = "Consume latency of this DSQ (shows how contended the DSQ is)")]
202 pub dsq_consume_lat: u64,
203 #[stat(desc = "CPU's latency headroom (1024 - ravg(irq_steal_util))")]
204 pub lat_headroom: u32,
205 #[stat(desc = "Preemption vulnerability threshold step")]
206 pub vuln_thresh: u32,
207 #[stat(desc = "Task's estimated utilization from ravg")]
208 pub task_util_est: u32,
209 #[stat(desc = "Task's normalized latency criticality [0, 1024]")]
210 pub norm_lat_cri: u16,
211}
212
213impl SchedSample {
214 pub fn format_header<W: Write>(w: &mut W) -> Result<()> {
215 writeln!(
216 w,
217 "\x1b[93m| {:6} | {:7} | {:17} | {:5} | {:4} | {:8} | {:8} | {:8} | {:17} | {:8} | {:11} | {:8} | {:7} | {:8} | {:12} | {:12} | {:9} | {:9} | {:9} | {:9} | {:8} | {:8} | {:8} | {:8} | {:9} | {:10} | {:11} | {:9} | {:10} | {:6} | {:6} | {:10} | {:7} | {:6} | {:8} | {:7} |\x1b[0m",
218 "MSEQ",
219 "PID",
220 "COMM",
221 "STAT",
222 "CPU",
223 "PRV_CPU",
224 "SUG_CPU",
225 "WKER_PID",
226 "WKER_COMM",
227 "SLC_NS",
228 "SLC_USED_NS",
229 "LAT_CRI",
230 "AVG_LC",
231 "ST_PRIO",
232 "RERNBL_NS",
233 "RESCHD_NS",
234 "RUN_FREQ",
235 "RUN_TM_NS",
236 "WAIT_FREQ",
237 "WAKE_FREQ",
238 "PERF_CRI",
239 "THR_PC",
240 "CPUFREQ",
241 "CPU_UTIL",
242 "CPU_IUTIL",
243 "STEAL_UTIL",
244 "STEAL_IUTIL",
245 "DPIN_UTIL",
246 "DPIN_IUTIL",
247 "NR_ACT",
248 "DSQ_ID",
249 "DSQ_LAT_NS",
250 "LAT_HDR",
251 "VLN_TH",
252 "TSK_UTIL",
253 "NRM_LC",
254 )?;
255 Ok(())
256 }
257
258 pub fn format<W: Write>(&self, w: &mut W) -> Result<()> {
259 if self.mseq % 10 == 1 {
260 Self::format_header(w)?;
261 }
262
263 writeln!(
264 w,
265 "| {:6} | {:7} | {:17} | {:5} | {:4} | {:8} | {:8} | {:8} | {:17} | {:8} | {:11} | {:8} | {:7} | {:8} | {:12} | {:12} | {:9} | {:9} | {:9} | {:9} | {:8} | {:8} | {:8} | {:8} | {:9} | {:10} | {:11} | {:9} | {:10} | {:6} | {:6} | {:10} | {:7} | {:6} | {:8} | {:7} |",
266 self.mseq,
267 self.pid,
268 self.comm,
269 self.stat,
270 self.cpu_id,
271 self.prev_cpu_id,
272 self.suggested_cpu_id,
273 self.waker_pid,
274 self.waker_comm,
275 self.slice_wall,
276 self.slice_used_wall,
277 self.lat_cri,
278 self.avg_lat_cri,
279 self.static_prio,
280 self.rerunnable_interval_wall,
281 self.resched_interval_wall,
282 self.run_freq,
283 self.avg_runtime_wall,
284 self.wait_freq,
285 self.wake_freq,
286 self.perf_cri,
287 self.thr_perf_cri,
288 self.cpuperf_cur,
289 self.cpu_util_wall,
290 self.cpu_util_invr,
291 self.steal_util_wall,
292 self.steal_util_invr,
293 self.dom_pinned_util_wall,
294 self.dom_pinned_util_invr,
295 self.nr_active,
296 self.dsq_id,
297 self.dsq_consume_lat,
298 self.lat_headroom,
299 self.vuln_thresh,
300 self.task_util_est,
301 self.norm_lat_cri,
302 )?;
303 Ok(())
304 }
305}
306
307#[derive(Clone, Debug, Default, Serialize, Deserialize, Stats)]
308pub struct SchedSamples {
309 pub samples: Vec<SchedSample>,
310}
311
312#[derive(Debug)]
313pub enum StatsReq {
314 NewSampler(ThreadId),
315 SysStatsReq {
316 tid: ThreadId,
317 },
318 SchedSamplesNr {
319 tid: ThreadId,
320 nr_samples: u64,
321 interval_ms: u64,
322 },
323}
324
325impl StatsReq {
326 fn from_args_stats(tid: ThreadId) -> Result<Self> {
327 Ok(Self::SysStatsReq { tid })
328 }
329
330 fn from_args_samples(
331 tid: ThreadId,
332 nr_cpus_onln: u64,
333 args: &BTreeMap<String, String>,
334 ) -> Result<Self> {
335 let mut nr_samples = 1;
336
337 if let Some(arg) = args.get("nr_samples") {
338 nr_samples = arg.trim().parse()?;
339 }
340
341 let mut interval_ms = 1000;
342 if nr_samples > nr_cpus_onln {
343 let f = nr_samples / nr_cpus_onln * 2;
345 interval_ms /= f;
346 }
347
348 Ok(Self::SchedSamplesNr {
349 tid,
350 nr_samples,
351 interval_ms,
352 })
353 }
354}
355
356#[derive(Debug)]
357pub enum StatsRes {
358 Ack,
359 Bye,
360 SysStats(SysStats),
361 SchedSamples(SchedSamples),
362}
363
364pub fn server_data(nr_cpus_onln: u64) -> StatsServerData<StatsReq, StatsRes> {
365 let open: Box<dyn StatsOpener<StatsReq, StatsRes>> = Box::new(move |(req_ch, res_ch)| {
366 let tid = std::thread::current().id();
367 req_ch.send(StatsReq::NewSampler(tid))?;
368 match res_ch.recv()? {
369 StatsRes::Ack => {}
370 res => bail!("invalid response: {:?}", res),
371 }
372
373 let read: Box<dyn StatsReader<StatsReq, StatsRes>> =
374 Box::new(move |_args, (req_ch, res_ch)| {
375 let req = StatsReq::from_args_stats(tid)?;
376 req_ch.send(req)?;
377
378 let stats = match res_ch.recv()? {
379 StatsRes::SysStats(v) => v,
380 StatsRes::Bye => bail!("preempted by another sampler"),
381 res => bail!("invalid response: {:?}", res),
382 };
383
384 stats.to_json()
385 });
386 Ok(read)
387 });
388
389 let samples_open: Box<dyn StatsOpener<StatsReq, StatsRes>> =
390 Box::new(move |(req_ch, res_ch)| {
391 let tid = std::thread::current().id();
392 req_ch.send(StatsReq::NewSampler(tid))?;
393 match res_ch.recv()? {
394 StatsRes::Ack => {}
395 res => bail!("invalid response: {:?}", res),
396 }
397
398 let read: Box<dyn StatsReader<StatsReq, StatsRes>> =
399 Box::new(move |args, (req_ch, res_ch)| {
400 let req = StatsReq::from_args_samples(tid, nr_cpus_onln, args)?;
401 req_ch.send(req)?;
402
403 let samples = match res_ch.recv()? {
404 StatsRes::SchedSamples(v) => v,
405 StatsRes::Bye => bail!("preempted by another sampler"),
406 res => bail!("invalid response: {:?}", res),
407 };
408
409 samples.to_json()
410 });
411 Ok(read)
412 });
413
414 StatsServerData::new()
415 .add_meta(SysStats::meta())
416 .add_ops("top", StatsOps { open, close: None })
417 .add_meta(SchedSample::meta())
418 .add_ops(
419 "sched_samples",
420 StatsOps {
421 open: samples_open,
422 close: None,
423 },
424 )
425}
426
427pub fn monitor_sched_samples(nr_samples: u64, shutdown: Arc<AtomicBool>) -> Result<()> {
428 scx_utils::monitor_stats::<SchedSamples>(
429 &vec![
430 ("target".into(), "sched_samples".into()),
431 ("nr_samples".into(), nr_samples.to_string()),
432 ],
433 Duration::from_secs(0),
434 || shutdown.load(Ordering::Relaxed),
435 |ts| {
436 let mut stdout = std::io::stdout();
437 for sample in ts.samples.iter() {
438 sample.format(&mut stdout)?;
439 }
440 Ok(())
441 },
442 )
443}
444
445pub fn monitor(intv: Duration, shutdown: Arc<AtomicBool>) -> Result<()> {
446 scx_utils::monitor_stats::<SysStats>(
447 &[],
448 intv,
449 || shutdown.load(Ordering::Relaxed),
450 |sysstats| {
451 sysstats
452 .format(&mut std::io::stdout())
453 .context("failed to format sysstats")?;
454 Ok(())
455 },
456 )
457}