Skip to main content

scx_cake/
tui.rs

1// SPDX-License-Identifier: GPL-2.0
2// TUI module - ratatui-based terminal UI for real-time scheduler statistics
3
4use std::io::{self, Stdout};
5use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
6use std::sync::Arc;
7use std::thread;
8use std::time::{Duration, Instant};
9
10use anyhow::{Context, Result};
11use arboard::Clipboard;
12use crossterm::{
13    event::{self, Event, KeyCode, KeyEventKind},
14    terminal::{disable_raw_mode, enable_raw_mode, EnterAlternateScreen, LeaveAlternateScreen},
15    ExecutableCommand,
16};
17use ratatui::{
18    buffer::Buffer,
19    prelude::*,
20    widgets::{
21        Block, BorderType, Borders, Cell, Padding, Paragraph, Row, Table, TableState, Tabs, Widget,
22        Wrap,
23    },
24};
25use std::collections::HashMap;
26use sysinfo::{Components, System};
27
28use crate::bpf_skel::types::cake_stats;
29use crate::bpf_skel::BpfSkel;
30
31use crate::topology::TopologyInfo;
32
33/// System hardware and kernel information, collected once at startup.
34#[derive(Clone, Debug)]
35pub struct SystemInfo {
36    pub cpu_model: String,
37    pub cpu_arch: String,
38    pub phys_cores: usize,
39    pub logical_cpus: usize,
40    pub smt_enabled: bool,
41    pub dual_ccd: bool,
42    pub has_vcache: bool,
43    pub has_hybrid: bool,
44    pub total_ram_mb: u64,
45    pub kernel_version: String,
46}
47
48impl SystemInfo {
49    pub fn detect(topo: &TopologyInfo) -> Self {
50        // CPU model from /proc/cpuinfo
51        let cpu_model = std::fs::read_to_string("/proc/cpuinfo")
52            .ok()
53            .and_then(|s| {
54                s.lines()
55                    .find(|l| l.starts_with("model name"))
56                    .and_then(|l| l.split(':').nth(1))
57                    .map(|v| v.trim().to_string())
58            })
59            .unwrap_or_else(|| "Unknown".to_string());
60
61        // Architecture from uname
62        let cpu_arch = std::fs::read_to_string("/proc/sys/kernel/arch")
63            .map(|s| s.trim().to_string())
64            .or_else(|_| {
65                // Fallback: parse from uname -m via /proc
66                std::process::Command::new("uname")
67                    .arg("-m")
68                    .output()
69                    .map(|o| String::from_utf8_lossy(&o.stdout).trim().to_string())
70            })
71            .unwrap_or_else(|_| "unknown".to_string());
72
73        // Total RAM from /proc/meminfo
74        let total_ram_mb = std::fs::read_to_string("/proc/meminfo")
75            .ok()
76            .and_then(|s| {
77                s.lines()
78                    .find(|l| l.starts_with("MemTotal:"))
79                    .and_then(|l| {
80                        l.split_whitespace()
81                            .nth(1)
82                            .and_then(|v| v.parse::<u64>().ok())
83                    })
84            })
85            .map(|kb| kb / 1024)
86            .unwrap_or(0);
87
88        // Kernel version from /proc/version
89        let kernel_version = std::fs::read_to_string("/proc/version")
90            .ok()
91            .and_then(|s| s.split_whitespace().nth(2).map(|v| v.to_string()))
92            .unwrap_or_else(|| "Unknown".to_string());
93
94        Self {
95            cpu_model,
96            cpu_arch,
97            phys_cores: topo.nr_phys_cpus,
98            logical_cpus: topo.nr_cpus,
99            smt_enabled: topo.smt_enabled,
100            dual_ccd: topo.has_dual_ccd,
101            has_vcache: topo.has_vcache,
102            has_hybrid: topo.has_hybrid_cores,
103            total_ram_mb,
104            kernel_version,
105        }
106    }
107
108    /// Format as compact one-line header for dump files (AI-optimized, all data)
109    pub fn format_header(&self) -> String {
110        let ram_display = if self.total_ram_mb >= 1024 {
111            format!("{:.1}GB", self.total_ram_mb as f64 / 1024.0)
112        } else {
113            format!("{}MB", self.total_ram_mb)
114        };
115        let smt_label = if self.smt_enabled { "SMT" } else { "no-SMT" };
116        let mut topo_tags = Vec::new();
117        if self.dual_ccd {
118            topo_tags.push("DualCCD");
119        }
120        if self.has_vcache {
121            topo_tags.push("VCache");
122        }
123        if self.has_hybrid {
124            topo_tags.push("HybridPE");
125        }
126        if topo_tags.is_empty() {
127            topo_tags.push("Sym");
128        }
129        format!(
130            "sys: cpu={} arch={} cores={}P/{}T {} [{}] ram={} kernel={}\n",
131            self.cpu_model,
132            self.cpu_arch,
133            self.phys_cores,
134            self.logical_cpus,
135            smt_label,
136            topo_tags.join(","),
137            ram_display,
138            self.kernel_version,
139        )
140    }
141}
142
143#[derive(Clone, Copy, Debug, PartialEq, Eq)]
144pub enum TuiTab {
145    Dashboard = 0,
146    Topology = 1,
147    BenchLab = 2,
148    ReferenceGuide = 3,
149}
150
151impl TuiTab {
152    fn next(self) -> Self {
153        match self {
154            TuiTab::Dashboard => TuiTab::Topology,
155            TuiTab::Topology => TuiTab::BenchLab,
156            TuiTab::BenchLab => TuiTab::ReferenceGuide,
157            TuiTab::ReferenceGuide => TuiTab::Dashboard,
158        }
159    }
160
161    fn previous(self) -> Self {
162        match self {
163            TuiTab::Dashboard => TuiTab::ReferenceGuide,
164            TuiTab::Topology => TuiTab::Dashboard,
165            TuiTab::BenchLab => TuiTab::Topology,
166            TuiTab::ReferenceGuide => TuiTab::BenchLab,
167        }
168    }
169}
170
171#[derive(Clone, Copy, PartialEq, Eq, Debug)]
172pub enum TaskStatus {
173    Alive, // In sysinfo + has BPF telemetry (total_runs > 0)
174    Idle,  // In sysinfo but no BPF telemetry (sleeping/background)
175    Dead,  // Not in sysinfo, stale arena entry
176}
177
178impl TaskStatus {
179    fn label(&self) -> &'static str {
180        match self {
181            TaskStatus::Alive => "●LIVE",
182            TaskStatus::Idle => "○IDLE",
183            TaskStatus::Dead => "✗DEAD",
184        }
185    }
186
187    fn color(&self) -> Color {
188        match self {
189            TaskStatus::Alive => Color::Green,
190            TaskStatus::Idle => Color::DarkGray,
191            TaskStatus::Dead => Color::Red,
192        }
193    }
194}
195
196#[derive(Clone, Copy, PartialEq, Eq)]
197pub enum SortColumn {
198    TargetCpu,
199    Pid,
200    RunDuration,
201    SelectCpu,
202    Enqueue,
203    Gate1Pct,
204    Jitter,
205    Tier,
206    Pelt,
207    Vcsw,
208    Hog,
209    RunsPerSec,
210    Gap,
211}
212
213/// TUI Application state
214pub struct TuiApp {
215    start_time: Instant,
216    status_message: Option<(String, Instant)>,
217    pub topology: TopologyInfo,
218    pub latency_matrix: Vec<Vec<f64>>,
219    pub task_rows: HashMap<u32, TaskTelemetryRow>,
220    pub sorted_pids: Vec<u32>,
221    pub table_state: TableState,
222    pub bench_table_state: TableState,
223    pub active_tab: TuiTab,
224    pub sort_column: SortColumn,
225    pub sort_descending: bool,
226
227    pub sys: System,
228    pub components: Components,
229    pub cpu_stats: Vec<(f32, f32)>,            // (Load %, Temp C)
230    pub show_all_tasks: bool,                  // false = BPF-tracked only, true = all
231    pub arena_active: usize,                   // Arena slots with tid != 0
232    pub arena_max: usize,                      // Arena pool max_elems
233    pub bpf_task_count: usize,                 // Tasks with total_runs > 0
234    pub prev_deltas: HashMap<u32, (u32, u16)>, // (total_runs, migration_count) — lightweight delta snapshot
235    pub active_pids_buf: std::collections::HashSet<u32>, // Reused per-tick to avoid alloc
236    pub collapsed_tgids: std::collections::HashSet<u32>, // Collapsed process groups
237    pub collapsed_ppids: std::collections::HashSet<u32>, // Collapsed PPID groups
238    pub bench_latency_handle: Option<thread::JoinHandle<Vec<Vec<f64>>>>, // Background c2c bench
239    pub _prev_stats: Option<cake_stats>,       // Previous global stats for rate calc
240    // BenchLab cached results
241    pub bench_entries: [(u64, u64, u64, u64); 67], // (min_ns, max_ns, total_ns, last_value)
242    pub bench_samples: Vec<Vec<u64>>, // Per-entry accumulated raw samples for percentiles
243    pub bench_cpu: u32,
244    pub bench_iterations: u32,
245    pub bench_timestamp: u64,
246    pub bench_run_count: u32,
247    pub last_bench_timestamp: u64, // to detect new results
248    pub system_info: SystemInfo,
249    // Game TGID detection: process-level yielder promotion
250    pub tracked_game_tgid: u32, // currently detected game tgid (0 = none)
251    pub tracked_game_ppid: u32, // PPID of the locked game family (for hysteresis comparison)
252    pub game_thread_count: usize, // thread count for detected game
253    pub game_name: String,      // process name from /proc/{tgid}/comm
254    // Hysteresis: challenger must beat current game for 15s to take over
255    pub game_challenger_ppid: u32, // PPID contesting game slot (0 = none)
256    pub game_challenger_since: Option<Instant>, // When challenger first appeared
257    // Confidence-based polling throttle (Rule 40)
258    pub game_stable_polls: u32, // consecutive polls with same PPID winner
259    pub game_skip_counter: u32, // how many polls we've skipped this interval
260    // Scheduler state machine (IDLE=0, COMPILATION=1, GAMING=2)
261    pub sched_state: u8,           // current operating state written to BPF BSS
262    pub compile_task_count: usize, // active compiler task count for display
263    // Game detection confidence tier (100=Steam, 90=Wine .exe, 0=none)
264    pub game_confidence: u8,
265}
266
267#[derive(Clone, Debug)]
268pub struct TaskTelemetryRow {
269    pub pid: u32,
270    pub comm: String,
271    pub tier: u8,
272    pub pelt_util: u32,
273    pub deficit_us: u32,
274    pub wait_duration_ns: u64,
275    pub gate_hit_pcts: [f64; 10], // G1, G2, G1W, G3, G1P, G1C, G1CP, G1D, G1WC, GTUN
276    pub select_cpu_ns: u32,
277    pub enqueue_ns: u32,
278    pub core_placement: u16,
279    pub dsq_insert_ns: u32,
280    pub migration_count: u16,
281    pub preempt_count: u16,
282    pub yield_count: u16,
283    pub total_runs: u32,
284    pub jitter_accum_ns: u64,
285    pub direct_dispatch_count: u16,
286    pub enqueue_count: u16,
287    pub cpumask_change_count: u16,
288    pub stopping_duration_ns: u32,
289    pub running_duration_ns: u32,
290    pub max_runtime_us: u32,
291    // Scheduling period (dispatch gap)
292    pub dispatch_gap_us: u64,
293    pub max_dispatch_gap_us: u64,
294    // Wait latency histogram
295    pub wait_hist: [u32; 4], // <10µs, <100µs, <1ms, >=1ms
296    // Delta mode: per-interval rates
297    pub runs_per_sec: f64,
298    pub migrations_per_sec: f64,
299    pub status: TaskStatus,
300    pub is_bpf_tracked: bool,
301    pub tgid: u32,
302    // Phase B: blind spot metrics
303    pub slice_util_pct: u16,
304    pub llc_id: u16,
305    pub same_cpu_streak: u16,
306    pub wakeup_source_pid: u32,
307    // Voluntary/involuntary context switch tracking (GPU detection)
308    pub nvcsw_delta: u32,
309    pub nivcsw_delta: u32,
310    pub _pad_recomp: u16,
311    pub is_hog: bool,         // Hog squeeze: BULK + non-yielder + deprioritized
312    pub is_bg: bool,          // Background noise squeeze: non-game, non-wb, non-kernel
313    pub is_game_member: bool, // Task is in the game PPID family (tgid==game_tgid or ppid==game_ppid)
314    pub ppid: u32,            // Parent PID for game family detection
315    // Phase 8: Per-callback sub-function stopwatch (ns)
316    pub gate_cascade_ns: u32,  // select_cpu: full gate cascade duration
317    pub idle_probe_ns: u32,    // select_cpu: winning gate idle probe cost
318    pub vtime_compute_ns: u32, // enqueue: vtime calculation + tier weighting
319    pub mbox_staging_ns: u32,  // running: mailbox CL0 write burst
320    pub _pad_ewma: u32,
321    pub classify_ns: u32,      // stopping: tier classify + squeeze fusion
322    pub vtime_staging_ns: u32, // stopping: dsq_vtime bit packing + writes
323    pub warm_history_ns: u32,  // stopping: warm CPU ring shift
324    // Phase 8: Quantum completion tracking
325    pub quantum_full_count: u16,    // Task consumed entire slice
326    pub quantum_yield_count: u16,   // Task yielded before slice exhaustion
327    pub quantum_preempt_count: u16, // Task was kicked/preempted mid-slice
328    // Phase 8: Wake chain enhancement
329    pub waker_cpu: u16,  // CPU the waker was running on
330    pub waker_tgid: u32, // TGID of the waker (process group)
331    // Phase 8: CPU core distribution histogram
332    pub cpu_run_count: [u16; 64], // Per-CPU run count (TUI normalizes to %)
333    // EEVDF telemetry
334    pub vtime_mult: u16, // EEVDF vtime reciprocal (1024=nice0, <1024 high-pri, >1024 low-pri)
335}
336
337impl Default for TaskTelemetryRow {
338    fn default() -> Self {
339        Self {
340            pid: 0,
341            comm: String::new(),
342            tier: 0,
343            pelt_util: 0,
344            deficit_us: 0,
345            wait_duration_ns: 0,
346            gate_hit_pcts: [0.0; 10],
347            select_cpu_ns: 0,
348            enqueue_ns: 0,
349            core_placement: 0,
350            dsq_insert_ns: 0,
351            migration_count: 0,
352            preempt_count: 0,
353            yield_count: 0,
354            total_runs: 0,
355            jitter_accum_ns: 0,
356            direct_dispatch_count: 0,
357            enqueue_count: 0,
358            cpumask_change_count: 0,
359            stopping_duration_ns: 0,
360            running_duration_ns: 0,
361            max_runtime_us: 0,
362            dispatch_gap_us: 0,
363            max_dispatch_gap_us: 0,
364            wait_hist: [0; 4],
365            runs_per_sec: 0.0,
366            migrations_per_sec: 0.0,
367            status: TaskStatus::Idle,
368            is_bpf_tracked: false,
369            tgid: 0,
370            slice_util_pct: 0,
371            llc_id: 0,
372            same_cpu_streak: 0,
373            wakeup_source_pid: 0,
374            nvcsw_delta: 0,
375            nivcsw_delta: 0,
376            _pad_recomp: 0,
377            is_hog: false,
378            is_bg: false,
379            is_game_member: false,
380            ppid: 0,
381            // Phase 8
382            gate_cascade_ns: 0,
383            idle_probe_ns: 0,
384            vtime_compute_ns: 0,
385            mbox_staging_ns: 0,
386            _pad_ewma: 0,
387            classify_ns: 0,
388            vtime_staging_ns: 0,
389            warm_history_ns: 0,
390            quantum_full_count: 0,
391            quantum_yield_count: 0,
392            quantum_preempt_count: 0,
393            waker_cpu: 0,
394            waker_tgid: 0,
395            cpu_run_count: [0u16; 64],
396            vtime_mult: 1024,
397        }
398    }
399}
400
401fn aggregate_stats(skel: &BpfSkel) -> cake_stats {
402    let mut total: cake_stats = Default::default();
403
404    if let Some(bss) = &skel.maps.bss_data {
405        for s in &bss.global_stats {
406            // Sum all fields
407            total.nr_new_flow_dispatches += s.nr_new_flow_dispatches;
408            total.nr_old_flow_dispatches += s.nr_old_flow_dispatches;
409            total.nr_dropped_allocations += s.nr_dropped_allocations;
410
411            total.total_gate1_latency_ns += s.total_gate1_latency_ns;
412            total.total_gate2_latency_ns += s.total_gate2_latency_ns;
413            total.total_enqueue_latency_ns += s.total_enqueue_latency_ns;
414
415            // Callback profiling aggregation
416            total.total_select_cpu_ns += s.total_select_cpu_ns;
417            total.total_stopping_ns += s.total_stopping_ns;
418            total.total_running_ns += s.total_running_ns;
419            total.max_select_cpu_ns = total.max_select_cpu_ns.max(s.max_select_cpu_ns);
420            total.max_stopping_ns = total.max_stopping_ns.max(s.max_stopping_ns);
421            total.max_running_ns = total.max_running_ns.max(s.max_running_ns);
422            total.nr_stop_confidence_skip += s.nr_stop_confidence_skip;
423            total.nr_stop_classify += s.nr_stop_classify;
424            total.nr_stop_ramp += s.nr_stop_ramp;
425            total.nr_stop_miss += s.nr_stop_miss;
426
427            // Dispatch locality (cake_dispatch stats)
428            total.nr_local_dispatches += s.nr_local_dispatches;
429            total.nr_stolen_dispatches += s.nr_stolen_dispatches;
430            total.nr_dispatch_misses += s.nr_dispatch_misses;
431            total.nr_dispatch_hint_skip += s.nr_dispatch_hint_skip;
432            total.nr_dsq_queued += s.nr_dsq_queued;
433            total.nr_dsq_consumed += s.nr_dsq_consumed;
434
435            // Phase 8: dispatch callback timing
436            total.total_dispatch_ns += s.total_dispatch_ns;
437            total.max_dispatch_ns = total.max_dispatch_ns.max(s.max_dispatch_ns);
438
439            // EEVDF topology telemetry
440            total.nr_vprot_suppressed += s.nr_vprot_suppressed;
441            total.nr_lag_applied += s.nr_lag_applied;
442            total.nr_capacity_scaled += s.nr_capacity_scaled;
443        }
444    }
445
446    total
447}
448
449impl TuiApp {
450    pub fn new(topology: TopologyInfo, latency_matrix: Vec<Vec<f64>>) -> Self {
451        let nr_cpus = topology.nr_cpus;
452
453        let mut sys = System::new();
454        // Only load CPU specific metrics to reduce background overhead
455        sys.refresh_cpu_usage();
456
457        let components = Components::new_with_refreshed_list();
458
459        // Collect system info once at startup (cold path only)
460        let system_info = SystemInfo::detect(&topology);
461
462        Self {
463            start_time: Instant::now(),
464            status_message: None,
465            topology,
466            latency_matrix,
467            task_rows: HashMap::new(),
468            sorted_pids: Vec::new(),
469            table_state: TableState::default(),
470            bench_table_state: TableState::default(),
471            active_tab: TuiTab::Dashboard,
472            sort_column: SortColumn::RunDuration,
473            sort_descending: true,
474
475            sys,
476            components,
477            cpu_stats: vec![(0.0, 0.0); nr_cpus],
478            show_all_tasks: false,
479            arena_active: 0,
480            arena_max: 0,
481            bpf_task_count: 0,
482            prev_deltas: HashMap::new(),
483            active_pids_buf: std::collections::HashSet::new(),
484            collapsed_tgids: std::collections::HashSet::new(),
485            collapsed_ppids: std::collections::HashSet::new(),
486            bench_latency_handle: None,
487            _prev_stats: None,
488            bench_entries: [(0, 0, 0, 0); 67],
489            bench_samples: vec![Vec::new(); 67],
490            bench_cpu: 0,
491            bench_iterations: 0,
492            bench_timestamp: 0,
493            bench_run_count: 0,
494            last_bench_timestamp: 0,
495            system_info,
496            tracked_game_tgid: 0,
497            tracked_game_ppid: 0,
498            game_thread_count: 0,
499            game_name: String::new(),
500            game_challenger_ppid: 0,
501            game_challenger_since: None,
502            game_stable_polls: 0,
503            game_skip_counter: 0,
504            sched_state: 0,
505            compile_task_count: 0,
506            game_confidence: 0,
507        }
508    }
509
510    /// Format uptime as "Xm Ys" or "Xh Ym"
511    fn format_uptime(&self) -> String {
512        let elapsed = self.start_time.elapsed();
513        let secs = elapsed.as_secs();
514        if secs < 3600 {
515            format!("{}m {}s", secs / 60, secs % 60)
516        } else {
517            format!("{}h {}m", secs / 3600, (secs % 3600) / 60)
518        }
519    }
520
521    /// Set a temporary status message that disappears after 2 seconds
522    fn set_status(&mut self, msg: &str) {
523        self.status_message = Some((msg.to_string(), Instant::now()));
524    }
525
526    /// Get current status message if not expired
527    fn get_status(&self) -> Option<&str> {
528        match &self.status_message {
529            Some((msg, timestamp)) if timestamp.elapsed() < Duration::from_secs(2) => Some(msg),
530            _ => None,
531        }
532    }
533
534    pub fn next_tab(&mut self) {
535        self.active_tab = self.active_tab.next();
536    }
537
538    pub fn previous_tab(&mut self) {
539        self.active_tab = self.active_tab.previous();
540    }
541
542    pub fn cycle_sort(&mut self) {
543        self.sort_column = match self.sort_column {
544            SortColumn::RunDuration => SortColumn::Jitter,
545            SortColumn::Jitter => SortColumn::Gate1Pct,
546            SortColumn::Gate1Pct => SortColumn::TargetCpu,
547            SortColumn::TargetCpu => SortColumn::Pid,
548            SortColumn::Pid => SortColumn::Tier,
549            SortColumn::Tier => SortColumn::Pelt,
550            SortColumn::Pelt => SortColumn::Vcsw,
551            SortColumn::Vcsw => SortColumn::Hog,
552            SortColumn::Hog => SortColumn::RunsPerSec,
553            SortColumn::RunsPerSec => SortColumn::Gap,
554            SortColumn::Gap => SortColumn::SelectCpu,
555            SortColumn::SelectCpu => SortColumn::Enqueue,
556            SortColumn::Enqueue => SortColumn::RunDuration,
557        };
558    }
559
560    pub fn scroll_table_down(&mut self) {
561        let i = match self.table_state.selected() {
562            Some(i) => {
563                if i >= self.sorted_pids.len().saturating_sub(1) {
564                    0
565                } else {
566                    i + 1
567                }
568            }
569            None => 0,
570        };
571        self.table_state.select(Some(i));
572    }
573
574    pub fn scroll_table_up(&mut self) {
575        let i = match self.table_state.selected() {
576            Some(i) => {
577                if i == 0 {
578                    self.sorted_pids.len().saturating_sub(1)
579                } else {
580                    i - 1
581                }
582            }
583            None => 0,
584        };
585        self.table_state.select(Some(i));
586    }
587
588    pub fn scroll_bench_down(&mut self) {
589        let max = 32; // bench rows + category headers
590        let i = match self.bench_table_state.selected() {
591            Some(i) => {
592                if i >= max {
593                    0
594                } else {
595                    i + 1
596                }
597            }
598            None => 0,
599        };
600        self.bench_table_state.select(Some(i));
601    }
602
603    pub fn scroll_bench_up(&mut self) {
604        let max = 32;
605        let i = match self.bench_table_state.selected() {
606            Some(i) => {
607                if i == 0 {
608                    max
609                } else {
610                    i - 1
611                }
612            }
613            None => 0,
614        };
615        self.bench_table_state.select(Some(i));
616    }
617
618    pub fn toggle_filter(&mut self) {
619        self.show_all_tasks = !self.show_all_tasks;
620    }
621}
622
623/// Initialize the terminal for TUI mode
624fn setup_terminal() -> Result<Terminal<CrosstermBackend<Stdout>>> {
625    enable_raw_mode().context("Failed to enable raw mode")?;
626    io::stdout()
627        .execute(EnterAlternateScreen)
628        .context("Failed to enter alternate screen")?;
629    let backend = CrosstermBackend::new(io::stdout());
630    Terminal::new(backend).context("Failed to create terminal")
631}
632
633/// Restore terminal to normal mode
634fn restore_terminal() -> Result<()> {
635    disable_raw_mode().context("Failed to disable raw mode")?;
636    io::stdout()
637        .execute(LeaveAlternateScreen)
638        .context("Failed to leave alternate screen")?;
639    Ok(())
640}
641
642/// A very compact topology display: shows LLC clusters cleanly formatted.
643// Includes active Sysinfo hardware polling (load & temperature).
644fn build_cpu_topology_grid_compact<'a>(
645    topo: &'a TopologyInfo,
646    cpu_stats: &'a [(f32, f32)],
647) -> impl Widget + 'a {
648    let mut text = Vec::new();
649
650    text.push(Line::from(vec![
651        Span::styled("Node 0 Topology", Style::default().fg(Color::DarkGray)),
652        Span::styled("  [ Load% | Temp°C ]", Style::default().fg(Color::Yellow)),
653    ]));
654
655    // Group CPUs by LLC
656    let mut llc_groups: HashMap<u32, Vec<usize>> = HashMap::new();
657    for (cpu_id, &llc_id) in topo.cpu_llc_id.iter().enumerate() {
658        llc_groups.entry(llc_id as u32).or_default().push(cpu_id);
659    }
660
661    let mut sorted_llcs: Vec<_> = llc_groups.into_iter().collect();
662    sorted_llcs.sort_by_key(|k| k.0); // Sort by LLC ID
663
664    for (llc_idx, (llc_id, cpus)) in sorted_llcs.iter().enumerate() {
665        let l3_color = match llc_idx % 4 {
666            0 => Color::Cyan,
667            1 => Color::Magenta,
668            2 => Color::Yellow,
669            _ => Color::Green,
670        };
671
672        // Determine if this LLC has 3D V-Cache
673        let has_vcache = (topo.vcache_llc_mask & (1 << *llc_id)) != 0;
674        let vcache_label = if has_vcache { " [3D V-Cache]" } else { "" };
675
676        text.push(Line::from(vec![Span::styled(
677            format!(" L3 Cache {}{}", llc_id, vcache_label),
678            Style::default().fg(l3_color).add_modifier(Modifier::BOLD),
679        )]));
680
681        let mut sorted_cpus = cpus.clone();
682        sorted_cpus.sort(); // Sort CPUs within LLC
683
684        // Arrange CPUs in a compact grid
685        let cpus_per_row = 4;
686        for chunk in sorted_cpus.chunks(cpus_per_row) {
687            let mut line_spans = vec![Span::raw("  ")]; // Indent
688            for &cpu in chunk {
689                let is_e_core = (topo.little_core_mask & (1 << cpu)) != 0;
690                let core_color = if is_e_core {
691                    Color::DarkGray
692                } else {
693                    Color::White
694                };
695
696                let (load, temp) = cpu_stats.get(cpu).copied().unwrap_or((0.0, 0.0));
697
698                // Color scaling based on load and temp
699                let load_color = if load > 90.0 {
700                    Color::Red
701                } else if load > 50.0 {
702                    Color::Yellow
703                } else {
704                    Color::Green
705                };
706                let temp_color = if temp > 85.0 {
707                    Color::Red
708                } else if temp > 70.0 {
709                    Color::LightRed
710                } else {
711                    Color::Cyan
712                };
713
714                line_spans.push(Span::styled(
715                    format!("CPU{:02} ", cpu),
716                    Style::default().fg(core_color),
717                ));
718                line_spans.push(Span::styled("[", Style::default().fg(Color::DarkGray)));
719                line_spans.push(Span::styled(
720                    format!("{:>3.0}%", load),
721                    Style::default().fg(load_color),
722                ));
723                line_spans.push(Span::styled("|", Style::default().fg(Color::DarkGray)));
724                line_spans.push(Span::styled(
725                    format!("{:<2.0}°C", temp),
726                    Style::default().fg(temp_color),
727                ));
728                line_spans.push(Span::styled("]  ", Style::default().fg(Color::DarkGray)));
729            }
730            text.push(Line::from(line_spans));
731        }
732    }
733
734    Paragraph::new(text).block(
735        Block::default()
736            .title(" Topology ")
737            .borders(Borders::ALL)
738            .border_type(BorderType::Rounded)
739            .border_style(Style::default().fg(Color::Cyan).dim())
740            .padding(Padding::horizontal(1)),
741    )
742}
743
744/// Custom Widget for high-density Latency Heatmap
745struct LatencyHeatmap<'a> {
746    matrix: &'a [Vec<f64>],
747    topology: &'a TopologyInfo,
748    title: &'a str,
749}
750
751impl<'a> LatencyHeatmap<'a> {
752    fn new(matrix: &'a [Vec<f64>], topology: &'a TopologyInfo, title: &'a str) -> Self {
753        Self {
754            matrix,
755            topology,
756            title,
757        }
758    }
759}
760
761impl<'a> Widget for LatencyHeatmap<'a> {
762    fn render(self, area: Rect, buf: &mut Buffer) {
763        let nr_cpus = self.matrix.len();
764
765        let block = Block::default()
766            .title(self.title)
767            .borders(Borders::ALL)
768            .border_type(BorderType::Rounded)
769            .border_style(Style::default().fg(Color::Cyan).dim());
770
771        let inner_area = block.inner(area);
772        block.render(area, buf);
773
774        if inner_area.width < 10 || inner_area.height < 5 {
775            return;
776        }
777
778        // Header for Target CPUs (X-axis)
779        for j in 0..nr_cpus {
780            let x = inner_area.x + 6 + (j as u16 * 2);
781            if x < inner_area.right() {
782                buf.set_string(
783                    x,
784                    inner_area.y,
785                    format!("{:1}", j % 10),
786                    Style::default().fg(Color::Cyan).dim(),
787                );
788            }
789        }
790
791        for i in 0..nr_cpus {
792            let y = inner_area.y + 1 + i as u16;
793            if y >= inner_area.bottom() {
794                break;
795            }
796
797            // Row Label (Source CPU)
798            buf.set_string(
799                inner_area.x + 1,
800                y,
801                format!("C{:02}", i),
802                Style::default().fg(Color::Cyan).dim(),
803            );
804
805            for j in 0..nr_cpus {
806                let x = inner_area.x + 6 + (j as u16 * 2);
807                if x >= inner_area.right() - 1 {
808                    continue;
809                }
810
811                let is_self = i == j;
812                let is_smt = self.topology.cpu_sibling_map[i] as usize == j;
813                let same_ccd = self.topology.cpu_llc_id[i] == self.topology.cpu_llc_id[j];
814
815                let style = if is_self {
816                    Style::default().fg(Color::Rgb(40, 40, 40))
817                } else if is_smt {
818                    Style::default().fg(Color::Rgb(0, 255, 150)) // Turquoise
819                } else if same_ccd {
820                    Style::default().fg(Color::Rgb(0, 200, 255)) // Cyan
821                } else {
822                    Style::default().fg(Color::Rgb(255, 180, 0)) // Amber
823                };
824
825                buf.set_string(x, y, "█", style);
826                buf.set_string(x + 1, y, " ", Style::default());
827            }
828        }
829
830        // Legend at bottom
831        let legend_y = inner_area.bottom().saturating_sub(1);
832        let legend_x = inner_area.x + 1;
833        if legend_y > inner_area.y + nr_cpus as u16 {
834            buf.set_string(
835                legend_x,
836                legend_y,
837                "█ SMT",
838                Style::default().fg(Color::Rgb(0, 255, 150)),
839            );
840            buf.set_string(
841                legend_x + 9,
842                legend_y,
843                "█ Same CCD",
844                Style::default().fg(Color::Rgb(0, 200, 255)),
845            );
846            buf.set_string(
847                legend_x + 22,
848                legend_y,
849                "█ Cross-CCD",
850                Style::default().fg(Color::Rgb(255, 180, 0)),
851            );
852        }
853    }
854}
855
856/// Custom Widget for numerical latency table
857struct LatencyTable<'a> {
858    matrix: &'a [Vec<f64>],
859    topology: &'a TopologyInfo,
860}
861
862impl<'a> LatencyTable<'a> {
863    fn new(matrix: &'a [Vec<f64>], topology: &'a TopologyInfo) -> Self {
864        Self { matrix, topology }
865    }
866}
867
868impl<'a> Widget for LatencyTable<'a> {
869    fn render(self, area: Rect, buf: &mut Buffer) {
870        let nr_cpus = self.matrix.len();
871
872        let block = Block::default()
873            .title(" Latency Data ")
874            .borders(Borders::ALL)
875            .border_type(BorderType::Rounded)
876            .border_style(Style::default().fg(Color::Cyan).dim());
877
878        let inner_area = block.inner(area);
879        block.render(area, buf);
880
881        if inner_area.width < 10 || inner_area.height < 5 {
882            return;
883        }
884
885        // Header for Target CPUs
886        for j in 0..nr_cpus {
887            let x = inner_area.x + 5 + (j as u16 * 3);
888            if x < inner_area.right() {
889                buf.set_string(
890                    x,
891                    inner_area.y,
892                    format!("{:>2}", j),
893                    Style::default().fg(Color::Cyan).dim(),
894                );
895            }
896        }
897
898        for i in 0..nr_cpus {
899            let y = inner_area.y + 1 + i as u16;
900            if y >= inner_area.bottom() {
901                break;
902            }
903
904            buf.set_string(
905                inner_area.x + 1,
906                y,
907                format!("C{:02}", i),
908                Style::default().fg(Color::Cyan).dim(),
909            );
910
911            for j in 0..nr_cpus {
912                let x = inner_area.x + 5 + (j as u16 * 3);
913                if x >= inner_area.right() - 2 {
914                    continue;
915                }
916
917                let val = self.matrix[i][j].min(999.0);
918                let is_self = i == j;
919                let is_smt = self.topology.cpu_sibling_map[i] as usize == j;
920                let same_ccd = self.topology.cpu_llc_id[i] == self.topology.cpu_llc_id[j];
921
922                let style = if is_self {
923                    Style::default().fg(Color::Rgb(40, 40, 40))
924                } else if is_smt {
925                    Style::default().fg(Color::Rgb(0, 255, 150))
926                } else if same_ccd {
927                    Style::default().fg(Color::Rgb(0, 200, 255))
928                } else {
929                    Style::default().fg(Color::Rgb(255, 180, 0))
930                };
931
932                buf.set_string(x, y, format!("{:>2.0}", val), style);
933            }
934        }
935    }
936}
937
938/// Format BenchLab results as a copyable text string (tab-specific copy)
939fn format_bench_for_clipboard(app: &TuiApp) -> String {
940    // (index, name, category, source: K=kernel kfunc, C=cake custom code)
941    // Groups: kfunc baseline first, then cake replacements. SPEED is per-group.
942    let bench_items: &[(usize, &str, &str, &str)] = &[
943        // Timing: all available clock sources
944        (0, "bpf_ktime_get_ns()", "Timing", "K"),
945        (1, "scx_bpf_now()", "Timing", "K"),
946        (24, "bpf_ktime_get_boot_ns()", "Timing", "K"),
947        (10, "Timing harness (cal)", "Timing", "C"),
948        // Task Lookup: kfunc vs arena direct access
949        (3, "bpf_task_from_pid()", "Task Lookup", "K"),
950        (29, "bpf_get_current_task_btf()", "Task Lookup", "K"),
951        (36, "bpf_task_storage_get()", "Task Lookup", "K"),
952        (6, "get_task_ctx() [arena]", "Task Lookup", "C"),
953        (22, "get_task_ctx+arena CL0", "Task Lookup", "C"),
954        // Process Info: kfunc alternatives
955        (28, "bpf_get_current_pid_tgid()", "Process Info", "K"),
956        (30, "bpf_get_current_comm()", "Process Info", "K"),
957        (14, "task_struct p->scx+nvcsw", "Process Info", "K"),
958        (32, "scx_bpf_task_running(p)", "Process Info", "K"),
959        (33, "scx_bpf_task_cpu(p)", "Process Info", "K"),
960        (46, "Arena tctx.pid+tgid", "Process Info", "C"),
961        (47, "Mbox CL0 cached_cpu", "Process Info", "C"),
962        // CPU Identification: kfunc vs mailbox cached
963        (2, "bpf_get_smp_proc_id()", "CPU ID", "K"),
964        (31, "bpf_get_numa_node_id()", "CPU ID", "K"),
965        (11, "Mbox CL0 cached CPU", "CPU ID", "C"),
966        // Idle Probing: kfunc vs cake probes
967        (4, "test_and_clear_idle()", "Idle Probing", "K"),
968        (37, "scx_bpf_pick_idle_cpu()", "Idle Probing", "K"),
969        (38, "idle_cpumask get+put", "Idle Probing", "K"),
970        (19, "idle_probe(remote) MESI", "Idle Probing", "C"),
971        (20, "smtmask read-only check", "Idle Probing", "C"),
972        // Data Read: kernel struct vs cake data paths
973        (8, "BSS global_stats[cpu]", "Data Read", "C"),
974        (9, "Arena per_cpu.mbox", "Data Read", "C"),
975        (15, "RODATA llc+quantum_ns", "Data Read", "C"),
976        // Mailbox CL0: cake's Disruptor handoff variants
977        (12, "Mbox CL0 tctx+deref", "Mailbox CL0", "C"),
978        (18, "CL0 ptr+fused+packed", "Mailbox CL0", "C"),
979        (21, "Disruptor CL0 full read", "Mailbox CL0", "C"),
980        // Composite: cake-only multi-step operations
981        (16, "Bitflag shift+mask+brless", "Composite Ops", "C"),
982        (17, "(reserved, was compute_ewma)", "Composite Ops", "C"),
983        // DVFS / Performance: CPU frequency queries
984        (35, "scx_bpf_cpuperf_cur(cpu)", "DVFS / Perf", "K"),
985        (42, "scx_bpf_cpuperf_cap(cpu)", "DVFS / Perf", "K"),
986        (45, "RODATA cpuperf_cap[cpu]", "DVFS / Perf", "C"),
987        // Topology Constants: kfunc vs RODATA
988        (5, "scx_bpf_nr_cpu_ids()", "Topology", "K"),
989        (34, "scx_bpf_nr_node_ids()", "Topology", "K"),
990        (43, "RODATA nr_cpus const", "Topology", "C"),
991        (44, "RODATA nr_nodes const", "Topology", "C"),
992        // Standalone Kfuncs: reference costs
993        (7, "scx_bpf_dsq_nr_queued()", "Standalone Kfuncs", "K"),
994        (13, "ringbuf reserve+discard", "Standalone Kfuncs", "K"),
995        (39, "scx_bpf_kick_cpu(self)", "Standalone Kfuncs", "K"),
996        // Synchronization: lock/RNG costs
997        (41, "bpf_spin_lock+unlock", "Synchronization", "K"),
998        (40, "bpf_get_prandom_u32()", "Synchronization", "K"),
999        (48, "CL0 lock-free 3-field", "Synchronization", "C"),
1000        (49, "BSS xorshift32 PRNG", "Synchronization", "C"),
1001        // TLB/Memory: arena access pattern cost
1002        (23, "Arena stride (TLB/hugepage)", "TLB/Memory", "C"),
1003        // Kernel Free Data: zero-cost task_struct field reads
1004        (50, "PELT util+runnable_avg", "Kernel Free Data", "K"),
1005        (51, "PELT runnable_avg only", "Kernel Free Data", "K"),
1006        (52, "schedstats nr_wakeups", "Kernel Free Data", "K"),
1007        (53, "p->policy+prio+flags", "Kernel Free Data", "K"),
1008        (54, "PELT read+tier classify", "Kernel Free Data", "K"),
1009        // End-to-End Workflow Comparisons
1010        (55, "task_storage write+read", "Storage Roundtrip", "C"),
1011        (56, "Arena write+read", "Storage Roundtrip", "C"),
1012        (57, "3-probe cascade (cake)", "Idle Selection", "C"),
1013        (58, "pick_idle_cpu full", "Idle Selection", "K"),
1014        (59, "Weight classify (bpfland)", "Classification", "C"),
1015        (60, "Lat-cri classify (lavd)", "Classification", "C"),
1016        (61, "SMT: cake sib probe", "SMT Probing", "C"),
1017        (62, "SMT: cpumask probe", "SMT Probing", "K"),
1018        // ═══ Fairness Fixes (cold-cache + remote) ═══
1019        // Note: cold probes use arena-stride L1 pollution. storage_get cold
1020        // can't evict task_struct — add ~10ns (L3 hit) conservatively.
1021        // kick_cpu remote measures bit-set only — add ~100ns for IPI delivery.
1022        (63, "storage_get COLD ~est", "Cold Cache", "K"),
1023        (64, "PELT classify COLD", "Cold Cache", "K"),
1024        (65, "legacy EWMA COLD", "Cold Cache", "C"),
1025        (66, "kick_cpu REMOTE ~est", "Cold Cache", "K"),
1026    ];
1027
1028    let percentile = |samples: &[u64], pct: f64| -> u64 {
1029        if samples.is_empty() {
1030            return 0;
1031        }
1032        let mut sorted = samples.to_vec();
1033        sorted.sort_unstable();
1034        let idx = ((pct / 100.0) * (sorted.len() as f64 - 1.0)).round() as usize;
1035        sorted[idx.min(sorted.len() - 1)]
1036    };
1037
1038    let mut output = String::new();
1039    // System hardware context header
1040    output.push_str(&app.system_info.format_header());
1041    output.push('\n');
1042    output.push_str(&format!(
1043        "=== BenchLab ({} runs, {} samples, CPU {}) ===\n\n",
1044        app.bench_run_count, app.bench_iterations, app.bench_cpu
1045    ));
1046    output.push_str(&format!(
1047        "{:<30} {:>7} {:>7} {:>7} {:>7} {:>8} {:>7} {:>8} {:>7}\n",
1048        "HELPER", "MIN", "P1 LOW", "P50", "AVG", "P1 HIGH", "MAX", "JITTER", "SPEED"
1049    ));
1050    output.push_str(&format!("{}\n", "─".repeat(100)));
1051
1052    let mut last_cat = "";
1053    let mut cat_baseline: u64 = 1; // per-category baseline AVG
1054    for &(idx, name, cat, src) in bench_items {
1055        if cat != last_cat {
1056            last_cat = cat;
1057            // Reset baseline for new category — first entry with data becomes base
1058            cat_baseline = 0;
1059            output.push_str(&format!("\n▸ {}\n", cat));
1060        }
1061        let (min_ns, max_ns, total_ns, _) = app.bench_entries[idx];
1062        if app.bench_iterations > 0 && total_ns > 0 {
1063            let avg_ns = total_ns / app.bench_iterations as u64;
1064            let samples = &app.bench_samples[idx];
1065            let p1 = percentile(samples, 1.0);
1066            let p50 = percentile(samples, 50.0);
1067            let p99 = percentile(samples, 99.0);
1068            let jitter = max_ns.saturating_sub(min_ns);
1069            let speedup = if cat_baseline == 0 {
1070                cat_baseline = avg_ns.max(1);
1071                "base".to_string()
1072            } else if avg_ns > 0 {
1073                format!("{:.1}×", cat_baseline as f64 / avg_ns as f64)
1074            } else {
1075                "--".to_string()
1076            };
1077            let tagged = format!("[{}] {}", src, name);
1078            output.push_str(&format!(
1079                "  {:<30} {:>5}ns {:>5}ns {:>5}ns {:>5}ns {:>6}ns {:>5}ns {:>6}ns {:>5}\n",
1080                tagged, min_ns, p1, p50, avg_ns, p99, max_ns, jitter, speedup
1081            ));
1082        }
1083    }
1084    output
1085}
1086
1087/// Format stats as a copyable text string
1088fn format_stats_for_clipboard(stats: &cake_stats, app: &TuiApp) -> String {
1089    let total_dispatches = stats.nr_new_flow_dispatches + stats.nr_old_flow_dispatches;
1090    let new_pct = if total_dispatches > 0 {
1091        (stats.nr_new_flow_dispatches as f64 / total_dispatches as f64) * 100.0
1092    } else {
1093        0.0
1094    };
1095
1096    let mut output = String::new();
1097    output.push_str(&app.system_info.format_header());
1098
1099    // Compact state/game/uptime line
1100    let state_str = match app.sched_state {
1101        2 => {
1102            let conf_label = match app.game_confidence {
1103                100 => "Steam",
1104                90 => "Wine",
1105                _ => "?",
1106            };
1107            format!(
1108                "GAMING game={} pid={} threads={} conf={}%[{}]",
1109                if app.game_name.is_empty() {
1110                    "?"
1111                } else {
1112                    &app.game_name
1113                },
1114                app.tracked_game_tgid,
1115                app.game_thread_count,
1116                app.game_confidence,
1117                conf_label,
1118            )
1119        }
1120
1121        1 => format!("COMPILATION compile_tasks={}", app.compile_task_count),
1122        _ => "IDLE".to_string(),
1123    };
1124    output.push_str(&format!(
1125        "cake: uptime={} state={}\n",
1126        app.format_uptime(),
1127        state_str
1128    ));
1129
1130    // Compact dispatch stats
1131    let dsq_depth = stats.nr_dsq_queued.saturating_sub(stats.nr_dsq_consumed);
1132    let total_dispatch_calls = stats.nr_dispatch_hint_skip
1133        + stats.nr_dispatch_misses
1134        + stats.nr_local_dispatches
1135        + stats.nr_stolen_dispatches;
1136    let hint_pct = if total_dispatch_calls > 0 {
1137        (stats.nr_dispatch_hint_skip as f64 / total_dispatch_calls as f64) * 100.0
1138    } else {
1139        0.0
1140    };
1141    output.push_str(&format!(
1142        "disp: total={} new={:.1}% local={} steal={} miss={} hint_skip={} hint%={:.0} queue={}\n",
1143        total_dispatches,
1144        new_pct,
1145        stats.nr_local_dispatches,
1146        stats.nr_stolen_dispatches,
1147        stats.nr_dispatch_misses,
1148        stats.nr_dispatch_hint_skip,
1149        hint_pct,
1150        dsq_depth,
1151    ));
1152
1153    // Compact callback profile (all on 2 lines)
1154    let stop_total = stats.nr_stop_confidence_skip
1155        + stats.nr_stop_classify
1156        + stats.nr_stop_ramp
1157        + stats.nr_stop_miss;
1158    let stop_total_f = (stop_total as f64).max(1.0);
1159    output.push_str(&format!(
1160        "cb.stop: tot_µs={} max_ns={} calls={} skip={:.1}% classify={:.1}% ramp={:.1}% miss={:.1}%\n",
1161        stats.total_stopping_ns / 1000,
1162        stats.max_stopping_ns,
1163        stop_total,
1164        stats.nr_stop_confidence_skip as f64 / stop_total_f * 100.0,
1165        stats.nr_stop_classify as f64 / stop_total_f * 100.0,
1166        stats.nr_stop_ramp as f64 / stop_total_f * 100.0,
1167        stats.nr_stop_miss as f64 / stop_total_f * 100.0,
1168    ));
1169    output.push_str(&format!(
1170        "cb.run: tot_µs={} max_ns={} calls={}  cb.enq: tot_µs={} calls={}  sel: g1_µs={} g2_µs={}  cb.disp: tot_µs={} max_ns={} calls={}\n",
1171        stats.total_running_ns / 1000, stats.max_running_ns, total_dispatches,
1172        stats.total_enqueue_latency_ns / 1000, total_dispatches,
1173        stats.total_gate1_latency_ns / 1000, stats.total_gate2_latency_ns / 1000,
1174        stats.total_dispatch_ns / 1000, stats.max_dispatch_ns, total_dispatch_calls,
1175    ));
1176
1177    if app.bench_run_count > 0 {
1178        output.push_str(&format_bench_for_clipboard(app));
1179    }
1180
1181    // Task matrix header — compact column key
1182    output.push_str("\ntasks: [PPID PID ST COMM CLS PELT AVG MAX GAP JIT WAIT R/s CPU SEL ENQ STOP RUN G1 G3 DSQ MIG/s WHIST]\n");
1183    output.push_str("       [detail-A: gates% + DIRECT DEFI YIELD PRMPT ENQ MASK MAX_GAP DSQ_INS RUNS SUTIL LLC STREAK WAKER VCSW ICSW CONF TGID]\n");
1184    output.push_str("       [detail-B: sw=cascade/probe/vtime/mbox/pelt/classify/vstg/warm(ns) qc=F%/Y%/P% wk=pid/tgid@cpu dist=C:pct,...]\n");
1185
1186    // Dump always captures ALL BPF-tracked tasks (not filtered by TUI view)
1187    let mut dump_pids: Vec<u32> = app
1188        .task_rows
1189        .iter()
1190        .filter(|(_, row)| row.is_bpf_tracked && row.total_runs > 0)
1191        .map(|(pid, _)| *pid)
1192        .collect();
1193    dump_pids.sort_by(|a, b| {
1194        let r_a = app.task_rows.get(a).unwrap();
1195        let r_b = app.task_rows.get(b).unwrap();
1196        r_b.pelt_util.cmp(&r_a.pelt_util)
1197    });
1198    // TGID grouping (same logic as TUI)
1199    let mut tgid_rank: std::collections::HashMap<u32, usize> = std::collections::HashMap::new();
1200    for (i, pid) in dump_pids.iter().enumerate() {
1201        if let Some(row) = app.task_rows.get(pid) {
1202            let tgid = if row.tgid > 0 { row.tgid } else { *pid };
1203            tgid_rank.entry(tgid).or_insert(i);
1204        }
1205    }
1206    dump_pids.sort_by(|a, b| {
1207        let r_a = app.task_rows.get(a).unwrap();
1208        let r_b = app.task_rows.get(b).unwrap();
1209        let tgid_a = if r_a.tgid > 0 { r_a.tgid } else { *a };
1210        let tgid_b = if r_b.tgid > 0 { r_b.tgid } else { *b };
1211        let rank_a = tgid_rank.get(&tgid_a).copied().unwrap_or(usize::MAX);
1212        let rank_b = tgid_rank.get(&tgid_b).copied().unwrap_or(usize::MAX);
1213        rank_a
1214            .cmp(&rank_b)
1215            .then_with(|| r_b.pelt_util.cmp(&r_a.pelt_util))
1216    });
1217
1218    // Pre-compute thread counts per tgid
1219    let mut tgid_counts: std::collections::HashMap<u32, u32> = std::collections::HashMap::new();
1220    for &pid in &dump_pids {
1221        if let Some(row) = app.task_rows.get(&pid) {
1222            let tgid = if row.tgid > 0 { row.tgid } else { pid };
1223            *tgid_counts.entry(tgid).or_insert(0) += 1;
1224        }
1225    }
1226
1227    let mut last_tgid: u32 = 0;
1228    for &pid in &dump_pids {
1229        if let Some(row) = app.task_rows.get(&pid) {
1230            let tgid = if row.tgid > 0 { row.tgid } else { pid };
1231
1232            // Process group header
1233            if tgid != last_tgid {
1234                let count = tgid_counts.get(&tgid).copied().unwrap_or(1);
1235                let proc_name = if let Some(tgid_row) = app.task_rows.get(&tgid) {
1236                    tgid_row.comm.clone()
1237                } else {
1238                    row.comm.clone()
1239                };
1240                if count > 1 || tgid != pid {
1241                    output.push_str(&format!(
1242                        "\n▼ {} (PID {} PPID {}) — {} threads\n",
1243                        proc_name, tgid, row.ppid, count
1244                    ));
1245                }
1246                last_tgid = tgid;
1247            }
1248
1249            let j_us = if row.total_runs > 0 {
1250                row.jitter_accum_ns / row.total_runs as u64 / 1000
1251            } else {
1252                0
1253            };
1254            let status_str = match row.status {
1255                // Game family member: show ●GAME badge to signal boost status.
1256                // This takes priority over HOG/BG which are cosmetic PELT labels.
1257                // Note: BPF's can_squeeze = !is_game so game tasks are NEVER squeezed.
1258                TaskStatus::Alive if row.is_game_member => "●GAME",
1259                TaskStatus::Alive if row.is_hog => "●HOG",
1260                TaskStatus::Alive if row.is_bg => "●BG",
1261                TaskStatus::Alive => "●",
1262                TaskStatus::Idle => "○",
1263                TaskStatus::Dead => "✗",
1264            };
1265            let indent = if tgid != pid { "  " } else { "" };
1266            let cls_str = match row.tier {
1267                1 => "GAME",
1268                2 => "HOG",
1269                3 => "BG",
1270                _ => "NORM",
1271            };
1272            let avg_wait_us = if row.total_runs > 0 {
1273                row.wait_duration_ns / row.total_runs as u64 / 1000
1274            } else {
1275                0
1276            };
1277            let wait_str = if row.status == TaskStatus::Dead && avg_wait_us > 10000 {
1278                format!("{}†", avg_wait_us)
1279            } else {
1280                format!("{}", avg_wait_us)
1281            };
1282            output.push_str(&format!(
1283                "{}{:<5} {:<7} {:<3} {:<15} {:<4} {:<4} {:<6} {:<7} {:<7} {:<6} {:<7.1} C{:<3} {:<5} {:<5} {:<5} {:<5} {:<4.0} {:<4.0} {:<4.0} {:<7.1} {}/{}/{}/{}\n",
1284                indent,
1285                row.ppid,
1286                row.pid,
1287                status_str,
1288                row.comm,
1289                cls_str,
1290                row.pelt_util,  // PELT utilization (0-1024)
1291                row.max_runtime_us,
1292                row.dispatch_gap_us,
1293                j_us,
1294                wait_str,
1295                row.runs_per_sec,
1296                row.core_placement,
1297                row.select_cpu_ns,
1298                row.enqueue_ns,
1299                row.stopping_duration_ns,
1300                row.running_duration_ns,
1301                row.gate_hit_pcts[0],  // G1
1302                row.gate_hit_pcts[3],  // G3
1303                row.gate_hit_pcts[9],  // DSQ
1304                row.migrations_per_sec,
1305                row.wait_hist[0], row.wait_hist[1], row.wait_hist[2], row.wait_hist[3],
1306            ));
1307            // detail-A: gate % (G1/G3/DSQ) + all extended fields, compact labels
1308            output.push_str(&format!(
1309                "{}  g={:.0}/{:.0}/{:.0} dir={} defi={}µs yld={} prmpt={} enq={} mask={} maxgap={}µs dsqins={}ns runs={} sutil={}% llc=L{:02} streak={} waker={} vcsw={} icsw={} conf={}/{} tgid={}\n",
1310                indent,
1311                row.gate_hit_pcts[0], row.gate_hit_pcts[3], row.gate_hit_pcts[9],
1312                row.direct_dispatch_count, row.deficit_us, row.yield_count,
1313                row.preempt_count, row.enqueue_count, row.cpumask_change_count,
1314                row.max_dispatch_gap_us, row.dsq_insert_ns, row.total_runs,
1315                row.slice_util_pct, row.llc_id, row.same_cpu_streak,
1316                row.wakeup_source_pid, row.nvcsw_delta, row.nivcsw_delta,
1317                row._pad_recomp, row.total_runs, row.tgid,
1318            ));
1319            // detail-B: stopwatch(ns) + quantum completion % + waker + cpu dist — all one line
1320            let q_total = row.quantum_full_count as u32
1321                + row.quantum_yield_count as u32
1322                + row.quantum_preempt_count as u32;
1323            let (q_full_pct, q_yield_pct, q_preempt_pct) = if q_total > 0 {
1324                (
1325                    row.quantum_full_count as f64 / q_total as f64 * 100.0,
1326                    row.quantum_yield_count as f64 / q_total as f64 * 100.0,
1327                    row.quantum_preempt_count as f64 / q_total as f64 * 100.0,
1328                )
1329            } else {
1330                (0.0, 0.0, 0.0)
1331            };
1332            let total_cpu_runs: u32 = row.cpu_run_count.iter().map(|&c| c as u32).sum();
1333            let dist_str = if total_cpu_runs > 0 {
1334                let mut cpu_pcts: Vec<(usize, f64)> = row
1335                    .cpu_run_count
1336                    .iter()
1337                    .enumerate()
1338                    .filter(|(_, &c)| c > 0)
1339                    .map(|(i, &c)| (i, c as f64 / total_cpu_runs as f64 * 100.0))
1340                    .collect();
1341                cpu_pcts.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
1342                cpu_pcts
1343                    .iter()
1344                    .take(8)
1345                    .map(|(cpu, pct)| format!("C{}:{:.0}", cpu, pct))
1346                    .collect::<Vec<_>>()
1347                    .join(",")
1348            } else {
1349                "-".to_string()
1350            };
1351            output.push_str(&format!(
1352                "{}  sw={}/{}/{}/{}/{}/{}/{}/{} qc=F{:.0}/Y{:.0}/P{:.0} wk={}/{}@{} ppid={} dist={}\n",
1353                indent,
1354                row.gate_cascade_ns, row.idle_probe_ns, row.vtime_compute_ns,
1355                row.mbox_staging_ns, row._pad_ewma, row.classify_ns,
1356                row.vtime_staging_ns, row.warm_history_ns,
1357                q_full_pct, q_yield_pct, q_preempt_pct,
1358                row.wakeup_source_pid, row.waker_tgid, row.waker_cpu,
1359                row.ppid, dist_str,
1360            ));
1361        }
1362    }
1363
1364    output
1365}
1366
1367/// Draw the UI
1368fn draw_ui(frame: &mut Frame, app: &mut TuiApp, stats: &cake_stats) {
1369    let area = frame.area();
1370
1371    // --- Tab Bar ---
1372    let tab_titles = vec![" Dashboard ", " Topology ", " BenchLab ", " Reference "];
1373    let tabs = Tabs::new(tab_titles)
1374        .select(match app.active_tab {
1375            TuiTab::Dashboard => 0,
1376            TuiTab::Topology => 1,
1377            TuiTab::BenchLab => 2,
1378            TuiTab::ReferenceGuide => 3,
1379        })
1380        .style(Style::default().fg(Color::DarkGray))
1381        .highlight_style(
1382            Style::default()
1383                .fg(Color::Yellow)
1384                .add_modifier(Modifier::BOLD | Modifier::UNDERLINED),
1385        )
1386        .divider("│")
1387        .block(
1388            Block::default()
1389                .title(format!(" scx_cake v{} ", env!("CARGO_PKG_VERSION")))
1390                .title_style(
1391                    Style::default()
1392                        .fg(Color::Cyan)
1393                        .add_modifier(Modifier::BOLD),
1394                )
1395                .borders(Borders::ALL)
1396                .border_style(Style::default().fg(Color::DarkGray))
1397                .border_type(BorderType::Rounded),
1398        );
1399
1400    // Create main outer layout
1401    let main_layout = Layout::default()
1402        .direction(Direction::Vertical)
1403        .constraints([
1404            Constraint::Length(3), // Tab bar (bordered)
1405            Constraint::Min(0),    // Active View
1406            Constraint::Length(3), // Footer
1407        ])
1408        .split(area);
1409
1410    frame.render_widget(tabs, main_layout[0]);
1411
1412    // Render active view
1413    match app.active_tab {
1414        TuiTab::Dashboard => draw_dashboard_tab(frame, app, stats, main_layout[1]),
1415        TuiTab::Topology => draw_topology_tab(frame, app, main_layout[1]),
1416        TuiTab::BenchLab => draw_bench_tab(frame, app, main_layout[1]),
1417        TuiTab::ReferenceGuide => draw_reference_tab(frame, main_layout[1]),
1418    }
1419
1420    // --- Footer (key bindings + status) ---
1421    let arrow = if app.sort_descending { "▼" } else { "▲" };
1422    let sort_label = match app.sort_column {
1423        SortColumn::RunDuration => format!("[RunTM]{}", arrow),
1424        SortColumn::Gate1Pct => format!("[G1%]{}", arrow),
1425        SortColumn::TargetCpu => format!("[CPU]{}", arrow),
1426        SortColumn::Pid => format!("[PID]{}", arrow),
1427        SortColumn::SelectCpu => format!("[SEL_NS]{}", arrow),
1428        SortColumn::Enqueue => format!("[ENQ_NS]{}", arrow),
1429        SortColumn::Jitter => format!("[JITTER]{}", arrow),
1430        SortColumn::Tier => format!("[TIER]{}", arrow),
1431        SortColumn::Pelt => format!("[PELT]{}", arrow),
1432        SortColumn::Vcsw => format!("[VCSW]{}", arrow),
1433        SortColumn::Hog => format!("[HOG]{}", arrow),
1434        SortColumn::RunsPerSec => format!("[RUN/s]{}", arrow),
1435        SortColumn::Gap => format!("[GAP]{}", arrow),
1436    };
1437
1438    let footer_text = match app.get_status() {
1439        Some(status) => format!(
1440            " {} [s]Sort [S]Rev [+/-]Rate [↑↓]Scrl [T]Top [⏎]Fold [␣]Grp [x]FoldAll [Tab]Tabs [f]Filt [r]Reset [b]Bench [c]Copy [d]Dump [q]Quit │ {}",
1441            sort_label, status
1442        ),
1443        None => format!(
1444            " {} [s]Sort [S]Rev [+/-]Rate [↑↓]Scrl [T]Top [⏎]Fold [␣]Grp [x]FoldAll [Tab]Tabs [f]Filt [r]Reset [b]Bench [c]Copy [d]Dump [q]Quit",
1445            sort_label
1446        ),
1447    };
1448    let (fg_color, border_color) = if app.get_status().is_some() {
1449        (Color::Green, Color::Green)
1450    } else {
1451        (Color::DarkGray, Color::DarkGray)
1452    };
1453    let footer = Paragraph::new(footer_text)
1454        .style(Style::default().fg(fg_color))
1455        .block(
1456            Block::default()
1457                .borders(Borders::ALL)
1458                .border_style(Style::default().fg(border_color)),
1459        );
1460    frame.render_widget(footer, main_layout[2]);
1461}
1462
1463fn draw_dashboard_tab(frame: &mut Frame, app: &mut TuiApp, stats: &cake_stats, area: Rect) {
1464    // Full-width stacked layout: compact header → tier performance → task matrix
1465    let outer_layout = Layout::default()
1466        .direction(Direction::Vertical)
1467        .constraints([
1468            Constraint::Length(5), // Header: 3 content lines (stats + sched + state/game) + 2 borders
1469            Constraint::Length(8), // Tier performance panel (4 rows + header + borders)
1470            Constraint::Min(10),   // Full-width Task Matrix
1471        ])
1472        .split(area);
1473
1474    // --- Compact Header: system info + tier counts on one line ---
1475    let total_dispatches = stats.nr_new_flow_dispatches + stats.nr_old_flow_dispatches;
1476    let new_pct = if total_dispatches > 0 {
1477        (stats.nr_new_flow_dispatches as f64 / total_dispatches as f64) * 100.0
1478    } else {
1479        0.0
1480    };
1481
1482    // PELT tier summary: count tasks by utilization bands
1483    let (mut wc0, mut wc1, mut wc2, mut wc3) = (0u32, 0u32, 0u32, 0u32);
1484    for row in app.task_rows.values() {
1485        if !row.is_bpf_tracked || row.total_runs == 0 {
1486            continue;
1487        }
1488        match row.pelt_util {
1489            0..=49 => wc0 += 1,
1490            50..=255 => wc1 += 1,
1491            256..=799 => wc2 += 1,
1492            _ => wc3 += 1,
1493        }
1494    }
1495
1496    let topo_flags = format!(
1497        "{}C{}{}{}",
1498        app.topology.nr_cpus,
1499        if app.topology.has_dual_ccd {
1500            " 2CCD"
1501        } else {
1502            ""
1503        },
1504        if app.topology.has_hybrid_cores {
1505            " HYB"
1506        } else {
1507            ""
1508        },
1509        if app.topology.smt_enabled { " SMT" } else { "" },
1510    );
1511
1512    let drop_warn = if stats.nr_dropped_allocations > 0 {
1513        format!("  ⚠ {}×ENOMEM", stats.nr_dropped_allocations)
1514    } else {
1515        String::new()
1516    };
1517
1518    // CPU frequency from sysfs (best-effort, CPU 0 as representative)
1519    let cpu_freq_str =
1520        std::fs::read_to_string("/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq")
1521            .ok()
1522            .and_then(|s| s.trim().parse::<u64>().ok())
1523            .map(|khz| format!(" {:.1}GHz", khz as f64 / 1_000_000.0))
1524            .unwrap_or_default();
1525
1526    // Hog count for header visibility
1527    let hog_count = app.task_rows.values().filter(|r| r.is_hog).count();
1528    let bg_count = app.task_rows.values().filter(|r| r.is_bg).count();
1529    let squeeze_str = match (hog_count > 0, bg_count > 0) {
1530        (true, true) => format!("  HOG:{}  BG:{}", hog_count, bg_count),
1531        (true, false) => format!("  HOG:{}", hog_count),
1532        (false, true) => format!("  BG:{}", bg_count),
1533        (false, false) => String::new(),
1534    };
1535
1536    // Line 1: CPU | Dispatches | Tier Distribution
1537    let line1 =
1538        format!(
1539        " CPU: {}{}  │  Dispatches: {} ({:.0}% new)  │  Tiers: T0:{} T1:{} T2:{} T3:{}  │  {}{}{}",
1540        topo_flags,
1541        cpu_freq_str,
1542        total_dispatches,
1543        new_pct,
1544        wc0, wc1, wc2, wc3,
1545        app.format_uptime(),
1546        squeeze_str,
1547        drop_warn,
1548    );
1549
1550    // Line 2: Dispatch locality | Queue depth | Tasks | Filter
1551    let dsq_depth = stats.nr_dsq_queued.saturating_sub(stats.nr_dsq_consumed);
1552    let _filter_label = if app.show_all_tasks {
1553        "ALL tasks"
1554    } else {
1555        "BPF-tracked only"
1556    };
1557
1558    // Hint effectiveness: what % of dispatch calls skipped the kfunc
1559    let total_dispatch_calls = stats.nr_dispatch_hint_skip
1560        + stats.nr_dispatch_misses
1561        + stats.nr_local_dispatches
1562        + stats.nr_stolen_dispatches;
1563    let hint_pct = if total_dispatch_calls > 0 {
1564        (stats.nr_dispatch_hint_skip as f64 / total_dispatch_calls as f64) * 100.0
1565    } else {
1566        0.0
1567    };
1568
1569    // Queue depth warning: if tasks are piling up, the hint may be causing stalls
1570    let queue_str = if dsq_depth > 10 {
1571        format!("⚠ Queue:{}", dsq_depth)
1572    } else {
1573        format!("Queue:{}", dsq_depth)
1574    };
1575
1576    let line2 = format!(
1577        " Dispatch: Local:{} Steal:{} Miss:{} HintSkip:{} ({:.0}%)  │  {}  │  EEVDF: Vprot:{} Lag:{} Cap:{}",
1578        stats.nr_local_dispatches,
1579        stats.nr_stolen_dispatches,
1580        stats.nr_dispatch_misses,
1581        stats.nr_dispatch_hint_skip,
1582        hint_pct,
1583        queue_str,
1584        stats.nr_vprot_suppressed,
1585        stats.nr_lag_applied,
1586        stats.nr_capacity_scaled,
1587    );
1588
1589    // State label — shown in header for all three operating states
1590    let state_line = match app.sched_state {
1591        2 => String::new(), // GAMING: state shown inline in game_line below
1592        1 => format!(
1593            " State: COMPILATION | {} compiler task{} active",
1594            app.compile_task_count,
1595            if app.compile_task_count == 1 { "" } else { "s" }
1596        ),
1597        _ => " State: IDLE".to_string(),
1598    };
1599
1600    let header_text = if app.tracked_game_tgid > 0 {
1601        // Confidence tag: shows detection tier + poll stability
1602        let conf_label = match app.game_confidence {
1603            100 => "Steam",
1604            90 => "Wine/.exe",
1605            _ => "unknown",
1606        };
1607        let stability = if app.game_stable_polls >= 20 {
1608            "\u{1F512}".to_string()
1609        } else {
1610            format!("{}/20", app.game_stable_polls)
1611        };
1612        let mut game_line = format!(
1613            " State: GAMING | Game: {} (PID {}, {} threads) [{}% {} {}]",
1614            if app.game_name.is_empty() {
1615                "unknown"
1616            } else {
1617                &app.game_name
1618            },
1619            app.tracked_game_tgid,
1620            app.game_thread_count,
1621            app.game_confidence,
1622            conf_label,
1623            stability,
1624        );
1625        // Show challenger holdoff status if active
1626        if app.game_challenger_ppid > 0 {
1627            if let Some(since) = app.game_challenger_since {
1628                let elapsed = since.elapsed().as_secs();
1629                game_line.push_str(&format!(" [contender: {}s/15s]", elapsed));
1630            }
1631        }
1632        format!("{}\n{}\n{}", line1, line2, game_line)
1633    } else if !state_line.is_empty() {
1634        format!("{}\n{}\n{}", line1, line2, state_line)
1635    } else {
1636        format!("{}\n{}", line1, line2)
1637    };
1638
1639    let header_border_color = if stats.nr_dropped_allocations > 0 {
1640        Color::Red
1641    } else {
1642        Color::Blue
1643    };
1644
1645    let header = Paragraph::new(header_text).block(
1646        Block::default()
1647            .title(" scx_cake Dashboard ")
1648            .title_style(
1649                Style::default()
1650                    .fg(Color::Cyan)
1651                    .add_modifier(Modifier::BOLD),
1652            )
1653            .borders(Borders::ALL)
1654            .border_style(Style::default().fg(header_border_color)),
1655    );
1656    frame.render_widget(header, outer_layout[0]);
1657
1658    // --- PELT Utilization Tier Panel ---
1659    // Aggregate by PELT bands: P0 <5%, P1 5-25%, P2 25-78%, P3 ≥HOG
1660    let mut tier_pids = [0u32; 4];
1661    let mut tier_avg_rt_sum = [0u64; 4];
1662    let mut tier_jitter_sum = [0u64; 4];
1663    let mut tier_runs_per_sec = [0.0f64; 4];
1664    let mut tier_wait_sum = [0u64; 4];
1665    let mut tier_active = [0u32; 4];
1666
1667    // --- CLASS Distribution Panel ---
1668    // Aggregate by scheduling class: GAME=1, NORM=0, HOG=2, BG=3
1669    let mut cls_pids = [0u32; 4];
1670    let mut cls_pelt_sum = [0u64; 4];
1671    let mut cls_wait_sum = [0u64; 4];
1672    let mut cls_runs_per_sec = [0.0f64; 4];
1673    let mut cls_active = [0u32; 4];
1674
1675    for row in app.task_rows.values() {
1676        if !row.is_bpf_tracked || row.total_runs == 0 {
1677            continue;
1678        }
1679        // PELT tier aggregation
1680        let t = match row.pelt_util {
1681            0..=49 => 0,
1682            50..=255 => 1,
1683            256..=799 => 2,
1684            _ => 3,
1685        };
1686        tier_pids[t] += 1;
1687        tier_avg_rt_sum[t] += row.pelt_util as u64;
1688        tier_active[t] += 1;
1689        let j = row.jitter_accum_ns / row.total_runs as u64;
1690        tier_jitter_sum[t] += j / 1000;
1691        tier_runs_per_sec[t] += row.runs_per_sec;
1692        tier_wait_sum[t] += row.wait_duration_ns / row.total_runs as u64 / 1000;
1693
1694        // CLASS aggregation (tier field: 0=NORM, 1=GAME, 2=HOG, 3=BG)
1695        let c = match row.tier {
1696            1 => 0, // GAME
1697            0 => 1, // NORM
1698            2 => 2, // HOG
1699            3 => 3, // BG
1700            _ => 1, // default NORM
1701        };
1702        cls_pids[c] += 1;
1703        cls_pelt_sum[c] += row.pelt_util as u64;
1704        cls_wait_sum[c] += row.wait_duration_ns / row.total_runs as u64 / 1000;
1705        cls_runs_per_sec[c] += row.runs_per_sec;
1706        cls_active[c] += 1;
1707    }
1708
1709    let total_runs_sec: f64 = tier_runs_per_sec.iter().sum();
1710
1711    // Split tier row into two side-by-side panels
1712    let tier_cols = Layout::horizontal([Constraint::Percentage(55), Constraint::Percentage(45)])
1713        .split(outer_layout[1]);
1714
1715    // ── Left: PELT Utilization Tiers ──
1716    let tier_names = ["P0 <5%", "P1 5-25%", "P2 25-78%", "P3 ≥HOG"];
1717    let tier_colors = [Color::LightCyan, Color::Green, Color::Yellow, Color::Red];
1718
1719    let tier_header = Row::new(vec![
1720        Cell::from("PELT").style(
1721            Style::default()
1722                .fg(Color::Yellow)
1723                .add_modifier(Modifier::BOLD),
1724        ),
1725        Cell::from("PIDs").style(
1726            Style::default()
1727                .fg(Color::White)
1728                .add_modifier(Modifier::BOLD),
1729        ),
1730        Cell::from("AVG RT").style(
1731            Style::default()
1732                .fg(Color::Cyan)
1733                .add_modifier(Modifier::BOLD),
1734        ),
1735        Cell::from("AVG JIT").style(
1736            Style::default()
1737                .fg(Color::LightCyan)
1738                .add_modifier(Modifier::BOLD),
1739        ),
1740        Cell::from("WAIT µs").style(
1741            Style::default()
1742                .fg(Color::LightYellow)
1743                .add_modifier(Modifier::BOLD),
1744        ),
1745        Cell::from("RUNS/s").style(
1746            Style::default()
1747                .fg(Color::Green)
1748                .add_modifier(Modifier::BOLD),
1749        ),
1750        Cell::from("WORK%").style(
1751            Style::default()
1752                .fg(Color::Magenta)
1753                .add_modifier(Modifier::BOLD),
1754        ),
1755    ])
1756    .height(1);
1757
1758    let tier_rows: Vec<Row> = (0..4)
1759        .map(|t| {
1760            let count = tier_active[t].max(1) as u64;
1761            let avg_rt = tier_avg_rt_sum[t] / count;
1762            let avg_jit = tier_jitter_sum[t] / count;
1763            let avg_wait = tier_wait_sum[t] / count;
1764            let work_pct = if total_runs_sec > 0.0 {
1765                (tier_runs_per_sec[t] / total_runs_sec) * 100.0
1766            } else {
1767                0.0
1768            };
1769
1770            Row::new(vec![
1771                Cell::from(tier_names[t]).style(
1772                    Style::default()
1773                        .fg(tier_colors[t])
1774                        .add_modifier(Modifier::BOLD),
1775                ),
1776                Cell::from(format!("{}", tier_pids[t])),
1777                Cell::from(format!("{} µs", avg_rt)),
1778                Cell::from(format!("{} µs", avg_jit)),
1779                Cell::from(format!("{}", avg_wait)),
1780                Cell::from(format!("{:.1}", tier_runs_per_sec[t])),
1781                Cell::from(format!("{:.1}%", work_pct)),
1782            ])
1783        })
1784        .collect();
1785
1786    let tier_table = Table::new(
1787        tier_rows,
1788        [
1789            Constraint::Length(15), // TIER
1790            Constraint::Length(6),  // PIDs
1791            Constraint::Length(10), // AVG RT
1792            Constraint::Length(10), // AVG JIT
1793            Constraint::Length(9),  // WAIT µs
1794            Constraint::Length(9),  // RUNS/s
1795            Constraint::Length(7),  // WORK%
1796        ],
1797    )
1798    .header(tier_header)
1799    .block(
1800        Block::default()
1801            .title(" PELT Utilization Tiers ")
1802            .title_style(
1803                Style::default()
1804                    .fg(Color::Yellow)
1805                    .add_modifier(Modifier::BOLD),
1806            )
1807            .borders(Borders::ALL)
1808            .border_style(Style::default().fg(Color::DarkGray))
1809            .border_type(BorderType::Rounded),
1810    );
1811    frame.render_widget(tier_table, tier_cols[0]);
1812
1813    // ── Right: CLASS Distribution ──
1814    let cls_names = ["GAME", "NORM", "HOG", "BG"];
1815    let cls_colors = [Color::Green, Color::Blue, Color::Yellow, Color::Red];
1816
1817    let cls_header = Row::new(vec![
1818        Cell::from("CLASS").style(
1819            Style::default()
1820                .fg(Color::LightMagenta)
1821                .add_modifier(Modifier::BOLD),
1822        ),
1823        Cell::from("PIDs").style(
1824            Style::default()
1825                .fg(Color::White)
1826                .add_modifier(Modifier::BOLD),
1827        ),
1828        Cell::from("PELT").style(
1829            Style::default()
1830                .fg(Color::Cyan)
1831                .add_modifier(Modifier::BOLD),
1832        ),
1833        Cell::from("WAIT µs").style(
1834            Style::default()
1835                .fg(Color::LightYellow)
1836                .add_modifier(Modifier::BOLD),
1837        ),
1838        Cell::from("RUNS/s").style(
1839            Style::default()
1840                .fg(Color::Green)
1841                .add_modifier(Modifier::BOLD),
1842        ),
1843        Cell::from("WORK%").style(
1844            Style::default()
1845                .fg(Color::Magenta)
1846                .add_modifier(Modifier::BOLD),
1847        ),
1848    ])
1849    .height(1);
1850
1851    let cls_rows: Vec<Row> = (0..4)
1852        .map(|c| {
1853            let count = cls_active[c].max(1) as u64;
1854            let avg_pelt = cls_pelt_sum[c] / count;
1855            let avg_wait = cls_wait_sum[c] / count;
1856            let work_pct = if total_runs_sec > 0.0 {
1857                (cls_runs_per_sec[c] / total_runs_sec) * 100.0
1858            } else {
1859                0.0
1860            };
1861
1862            Row::new(vec![
1863                Cell::from(cls_names[c]).style(
1864                    Style::default()
1865                        .fg(cls_colors[c])
1866                        .add_modifier(Modifier::BOLD),
1867                ),
1868                Cell::from(format!("{}", cls_pids[c])),
1869                Cell::from(format!("{}", avg_pelt)),
1870                Cell::from(format!("{}", avg_wait)),
1871                Cell::from(format!("{:.1}", cls_runs_per_sec[c])),
1872                Cell::from(format!("{:.1}%", work_pct)),
1873            ])
1874        })
1875        .collect();
1876
1877    let cls_table = Table::new(
1878        cls_rows,
1879        [
1880            Constraint::Length(7), // CLASS
1881            Constraint::Length(6), // PIDs
1882            Constraint::Length(6), // PELT
1883            Constraint::Length(9), // WAIT µs
1884            Constraint::Length(9), // RUNS/s
1885            Constraint::Length(7), // WORK%
1886        ],
1887    )
1888    .header(cls_header)
1889    .block(
1890        Block::default()
1891            .title(" Class Distribution ")
1892            .title_style(
1893                Style::default()
1894                    .fg(Color::LightMagenta)
1895                    .add_modifier(Modifier::BOLD),
1896            )
1897            .borders(Borders::ALL)
1898            .border_style(Style::default().fg(Color::DarkGray))
1899            .border_type(BorderType::Rounded),
1900    );
1901    frame.render_widget(cls_table, tier_cols[1]);
1902
1903    // All timing columns standardized to µs (noted in block title)
1904    let matrix_header = Row::new(vec![
1905        // ── Identity (DarkGray = secondary, Yellow = primary key) ──
1906        Cell::from("PPID").style(
1907            Style::default()
1908                .fg(Color::DarkGray)
1909                .add_modifier(Modifier::BOLD),
1910        ),
1911        Cell::from("PID").style(
1912            Style::default()
1913                .fg(Color::Yellow)
1914                .add_modifier(Modifier::BOLD),
1915        ),
1916        Cell::from("ST").style(
1917            Style::default()
1918                .fg(Color::White)
1919                .add_modifier(Modifier::BOLD),
1920        ),
1921        Cell::from("COMM").style(
1922            Style::default()
1923                .fg(Color::Yellow)
1924                .add_modifier(Modifier::BOLD),
1925        ),
1926        // ── Classification (LightMagenta) ──
1927        Cell::from("CLS").style(
1928            Style::default()
1929                .fg(Color::LightMagenta)
1930                .add_modifier(Modifier::BOLD),
1931        ),
1932        // ── Timing (Cyan) ──
1933        Cell::from("VCSW").style(
1934            Style::default()
1935                .fg(Color::Cyan)
1936                .add_modifier(Modifier::BOLD),
1937        ),
1938        Cell::from("AVGRT").style(
1939            Style::default()
1940                .fg(Color::Cyan)
1941                .add_modifier(Modifier::BOLD),
1942        ),
1943        Cell::from("MAXRT").style(
1944            Style::default()
1945                .fg(Color::Cyan)
1946                .add_modifier(Modifier::BOLD),
1947        ),
1948        Cell::from("GAP").style(
1949            Style::default()
1950                .fg(Color::Cyan)
1951                .add_modifier(Modifier::BOLD),
1952        ),
1953        Cell::from("JITTER").style(
1954            Style::default()
1955                .fg(Color::Cyan)
1956                .add_modifier(Modifier::BOLD),
1957        ),
1958        Cell::from("WAIT").style(
1959            Style::default()
1960                .fg(Color::Cyan)
1961                .add_modifier(Modifier::BOLD),
1962        ),
1963        Cell::from("RUNS/s").style(
1964            Style::default()
1965                .fg(Color::Cyan)
1966                .add_modifier(Modifier::BOLD),
1967        ),
1968        // ── Placement (Magenta) ──
1969        Cell::from("CPU").style(
1970            Style::default()
1971                .fg(Color::Magenta)
1972                .add_modifier(Modifier::BOLD),
1973        ),
1974        // ── Callback Overhead (LightCyan) ──
1975        Cell::from("SEL").style(
1976            Style::default()
1977                .fg(Color::LightCyan)
1978                .add_modifier(Modifier::BOLD),
1979        ),
1980        Cell::from("ENQ").style(
1981            Style::default()
1982                .fg(Color::LightCyan)
1983                .add_modifier(Modifier::BOLD),
1984        ),
1985        Cell::from("STOP").style(
1986            Style::default()
1987                .fg(Color::LightCyan)
1988                .add_modifier(Modifier::BOLD),
1989        ),
1990        Cell::from("RUN").style(
1991            Style::default()
1992                .fg(Color::LightCyan)
1993                .add_modifier(Modifier::BOLD),
1994        ),
1995        // ── Gate Distribution (Green) ──
1996        Cell::from("G1").style(
1997            Style::default()
1998                .fg(Color::Green)
1999                .add_modifier(Modifier::BOLD),
2000        ),
2001        Cell::from("G3").style(
2002            Style::default()
2003                .fg(Color::Green)
2004                .add_modifier(Modifier::BOLD),
2005        ),
2006        Cell::from("DSQ").style(
2007            Style::default()
2008                .fg(Color::Green)
2009                .add_modifier(Modifier::BOLD),
2010        ),
2011        // ── Placement (Magenta) ──
2012        Cell::from("MIGR/s").style(
2013            Style::default()
2014                .fg(Color::Magenta)
2015                .add_modifier(Modifier::BOLD),
2016        ),
2017        // ── Identity (DarkGray) ──
2018        Cell::from("TGID").style(
2019            Style::default()
2020                .fg(Color::DarkGray)
2021                .add_modifier(Modifier::BOLD),
2022        ),
2023        // ── Quantum Completion (LightYellow) ──
2024        Cell::from("Q%F").style(
2025            Style::default()
2026                .fg(Color::LightYellow)
2027                .add_modifier(Modifier::BOLD),
2028        ),
2029        Cell::from("Q%Y").style(
2030            Style::default()
2031                .fg(Color::LightYellow)
2032                .add_modifier(Modifier::BOLD),
2033        ),
2034        Cell::from("Q%P").style(
2035            Style::default()
2036                .fg(Color::LightYellow)
2037                .add_modifier(Modifier::BOLD),
2038        ),
2039        // ── EEVDF (LightGreen) ──
2040        Cell::from("WAKER").style(
2041            Style::default()
2042                .fg(Color::LightGreen)
2043                .add_modifier(Modifier::BOLD),
2044        ),
2045        // ── Classification (LightMagenta) ──
2046        Cell::from("NICE").style(
2047            Style::default()
2048                .fg(Color::LightMagenta)
2049                .add_modifier(Modifier::BOLD),
2050        ),
2051    ])
2052    .height(1);
2053
2054    let mut matrix_rows: Vec<Row> = Vec::new();
2055    let mut last_tgid: u32 = 0;
2056
2057    // Pre-compute thread counts per tgid for the header
2058    let mut tgid_thread_counts: std::collections::HashMap<u32, u32> =
2059        std::collections::HashMap::new();
2060    for pid in &app.sorted_pids {
2061        if let Some(row) = app.task_rows.get(pid) {
2062            let tgid = if row.tgid > 0 { row.tgid } else { *pid };
2063            *tgid_thread_counts.entry(tgid).or_insert(0) += 1;
2064        }
2065    }
2066
2067    for pid in &app.sorted_pids {
2068        let row = match app.task_rows.get(pid) {
2069            Some(r) => r,
2070            None => continue,
2071        };
2072        let tgid = if row.tgid > 0 { row.tgid } else { *pid };
2073
2074        // Insert process group header when tgid changes
2075        if tgid != last_tgid {
2076            let thread_count = tgid_thread_counts.get(&tgid).copied().unwrap_or(1);
2077            let proc_name = if let Some(tgid_row) = app.task_rows.get(&tgid) {
2078                tgid_row.comm.as_str()
2079            } else {
2080                row.comm.as_str()
2081            };
2082            let is_collapsed = app.collapsed_tgids.contains(&tgid);
2083            if thread_count > 1 || tgid != *pid {
2084                let arrow = if is_collapsed { "▶" } else { "▼" };
2085                let header_text = format!(
2086                    "{} {} (PID {}) — {} threads",
2087                    arrow, proc_name, tgid, thread_count
2088                );
2089                let header_cells = vec![Cell::from(header_text).style(
2090                    Style::default()
2091                        .fg(Color::White)
2092                        .add_modifier(Modifier::BOLD | Modifier::UNDERLINED),
2093                )];
2094                matrix_rows.push(Row::new(header_cells).height(1));
2095            }
2096            last_tgid = tgid;
2097        }
2098
2099        // Skip entire PPID group if collapsed
2100        if app.collapsed_ppids.contains(&row.ppid) && row.ppid > 0 {
2101            continue;
2102        }
2103
2104        // Skip child threads if their TGID is collapsed
2105        if tgid != *pid && app.collapsed_tgids.contains(&tgid) {
2106            continue;
2107        }
2108
2109        // Voluntary context switch color: higher = more GPU/IO activity
2110        let vcsw_style = match row.nvcsw_delta {
2111            0..=10 => Style::default().fg(Color::DarkGray),
2112            11..=64 => Style::default().fg(Color::Green),
2113            65..=200 => Style::default()
2114                .fg(Color::LightGreen)
2115                .add_modifier(Modifier::BOLD),
2116            _ => Style::default()
2117                .fg(Color::Cyan)
2118                .add_modifier(Modifier::BOLD),
2119        };
2120        let jitter_us = if row.total_runs > 0 {
2121            row.jitter_accum_ns / row.total_runs as u64 / 1000
2122        } else {
2123            0
2124        };
2125        let indent = if tgid != *pid { "  " } else { "" };
2126        // Quantum completion percentages
2127        let q_total = row.quantum_full_count as u32
2128            + row.quantum_yield_count as u32
2129            + row.quantum_preempt_count as u32;
2130        let (q_full_pct, q_yield_pct, q_preempt_pct) = if q_total > 0 {
2131            (
2132                row.quantum_full_count as f64 / q_total as f64 * 100.0,
2133                row.quantum_yield_count as f64 / q_total as f64 * 100.0,
2134                row.quantum_preempt_count as f64 / q_total as f64 * 100.0,
2135            )
2136        } else {
2137            (0.0, 0.0, 0.0)
2138        };
2139        // All ns → µs conversions at render time
2140        let cells = vec![
2141            Cell::from(format!("{}{}", indent, row.ppid)),
2142            Cell::from(format!("{}", row.pid)),
2143            Cell::from(if row.is_hog {
2144                "●HOG"
2145            } else if row.is_bg {
2146                "●BG"
2147            } else {
2148                row.status.label()
2149            })
2150            .style(Style::default().fg(if row.is_hog {
2151                Color::LightRed
2152            } else if row.is_bg {
2153                Color::Rgb(255, 165, 0) // orange for bg_noise
2154            } else {
2155                row.status.color()
2156            })),
2157            Cell::from(row.comm.as_str()),
2158            Cell::from(match row.tier {
2159                1 => "GAME",
2160                2 => "HOG",
2161                3 => "BG",
2162                _ => "NORM",
2163            })
2164            .style(Style::default().fg(match row.tier {
2165                1 => Color::Green,
2166                2 => Color::Yellow,
2167                3 => Color::Red,
2168                _ => Color::Blue,
2169            })),
2170            Cell::from(format!("{}", row.nvcsw_delta)).style(vcsw_style),
2171            Cell::from(format!("{}", row.pelt_util)),
2172            Cell::from(format!("{}", row.max_runtime_us)),
2173            Cell::from(format!("{}", row.dispatch_gap_us)),
2174            Cell::from(format!("{}", jitter_us)),
2175            Cell::from(format!(
2176                "{}",
2177                if row.total_runs > 0 {
2178                    row.wait_duration_ns / row.total_runs as u64 / 1000
2179                } else {
2180                    0
2181                }
2182            )),
2183            Cell::from(format!("{:.1}", row.runs_per_sec)),
2184            Cell::from(format!("C{:02}", row.core_placement)),
2185            Cell::from(format!("{}", row.select_cpu_ns)),
2186            Cell::from(format!("{}", row.enqueue_ns)),
2187            Cell::from(format!("{}", row.stopping_duration_ns)),
2188            Cell::from(format!("{}", row.running_duration_ns)),
2189            Cell::from(format!("{:.0}", row.gate_hit_pcts[0])), // G1
2190            Cell::from(format!("{:.0}", row.gate_hit_pcts[3])), // G3
2191            Cell::from(format!("{:.0}", row.gate_hit_pcts[9])), // DSQ (tunnel)
2192            Cell::from(format!("{:.1}", row.migrations_per_sec)),
2193            Cell::from(format!("{}", row.tgid)),
2194            Cell::from(format!("{:.0}", q_full_pct)),
2195            Cell::from(format!("{:.0}", q_yield_pct)),
2196            Cell::from(format!("{:.0}", q_preempt_pct)),
2197            Cell::from(format!("{}", row.wakeup_source_pid)),
2198            Cell::from(if row.vtime_mult == 1024 {
2199                "N0".to_string()
2200            } else if row.vtime_mult < 1024 {
2201                "N-".to_string()
2202            } else {
2203                "N+".to_string()
2204            })
2205            .style(Style::default().fg(if row.vtime_mult < 1024 {
2206                Color::LightGreen
2207            } else if row.vtime_mult > 1024 {
2208                Color::LightRed
2209            } else {
2210                Color::DarkGray
2211            })),
2212        ];
2213        matrix_rows.push(Row::new(cells).height(1));
2214    }
2215    let filter_label = if app.show_all_tasks {
2216        "ALL Tasks"
2217    } else {
2218        "BPF-Tracked"
2219    };
2220
2221    let matrix_table = Table::new(
2222        matrix_rows,
2223        [
2224            Constraint::Length(6),  // PPID
2225            Constraint::Length(8),  // PID
2226            Constraint::Length(3),  // ST
2227            Constraint::Length(15), // COMM
2228            Constraint::Length(5),  // CLS
2229            Constraint::Length(5),  // VCSW
2230            Constraint::Length(6),  // AVGRT
2231            Constraint::Length(6),  // MAXRT
2232            Constraint::Length(7),  // GAP
2233            Constraint::Length(7),  // JITTER
2234            Constraint::Length(6),  // WAIT
2235            Constraint::Length(7),  // RUNS/s
2236            Constraint::Length(4),  // CPU
2237            Constraint::Length(5),  // SEL
2238            Constraint::Length(5),  // ENQ
2239            Constraint::Length(5),  // STOP
2240            Constraint::Length(5),  // RUN
2241            Constraint::Length(3),  // G1
2242            Constraint::Length(3),  // G3
2243            Constraint::Length(4),  // DSQ
2244            Constraint::Length(7),  // MIGR/s
2245            Constraint::Length(7),  // TGID
2246            Constraint::Length(4),  // Q%F
2247            Constraint::Length(4),  // Q%Y
2248            Constraint::Length(4),  // Q%P
2249            Constraint::Length(7),  // WAKER
2250            Constraint::Length(4),  // NICE
2251            Constraint::Length(6),  // TIER∆
2252        ],
2253    )
2254    .header(matrix_header)
2255    .block(
2256        Block::default()
2257            .title(format!(
2258                " Live Task Matrix (times: µs │ SEL/ENQ/STOP/RUN: ns) [{}] ",
2259                filter_label
2260            ))
2261            .borders(Borders::ALL)
2262            .border_style(Style::default().fg(Color::Blue)),
2263    )
2264    .row_highlight_style(Style::default().add_modifier(Modifier::REVERSED))
2265    .highlight_symbol(">> ");
2266
2267    // Using render_stateful_widget instead of render_widget to manage scroll table state
2268    frame.render_stateful_widget(matrix_table, outer_layout[2], &mut app.table_state);
2269}
2270
2271fn draw_topology_tab(frame: &mut Frame, app: &TuiApp, area: Rect) {
2272    let nr_cpus = app.latency_matrix.len();
2273    let heatmap_min_width = (6 + nr_cpus * 2 + 4) as u16;
2274    let data_min_width = (5 + nr_cpus * 3 + 4) as u16;
2275
2276    let layout = Layout::default()
2277        .direction(Direction::Horizontal)
2278        .constraints([
2279            Constraint::Min(22),
2280            Constraint::Min(heatmap_min_width),
2281            Constraint::Min(data_min_width),
2282        ])
2283        .split(area);
2284    let topology_grid = build_cpu_topology_grid_compact(&app.topology, &app.cpu_stats);
2285    frame.render_widget(topology_grid, layout[0]);
2286
2287    // Dynamic heatmap title based on benchmark state
2288    let heatmap_title = if app.bench_latency_handle.is_some() {
2289        " Latency Heatmap ⏱ Benchmarking... ".to_string()
2290    } else if app
2291        .latency_matrix
2292        .iter()
2293        .any(|row| row.iter().any(|&v| v > 0.0))
2294    {
2295        " Latency Heatmap (ns) ".to_string()
2296    } else {
2297        " Latency Heatmap [b] Benchmark ".to_string()
2298    };
2299    let heatmap = LatencyHeatmap::new(&app.latency_matrix, &app.topology, &heatmap_title);
2300    frame.render_widget(heatmap, layout[1]);
2301
2302    let data_table = LatencyTable::new(&app.latency_matrix, &app.topology);
2303    frame.render_widget(data_table, layout[2]);
2304}
2305
2306fn draw_reference_tab(frame: &mut Frame, area: Rect) {
2307    // 2-column layout: left = matrix columns, right = dump/profile/keys
2308    let cols =
2309        Layout::horizontal([Constraint::Percentage(50), Constraint::Percentage(50)]).split(area);
2310
2311    // Helper: styled section header
2312    fn section(text: &str) -> Line<'_> {
2313        Line::from(Span::styled(
2314            text,
2315            Style::default()
2316                .fg(Color::Yellow)
2317                .add_modifier(Modifier::BOLD),
2318        ))
2319    }
2320    // Helper: styled subsection header
2321    fn subsection(text: &str) -> Line<'_> {
2322        Line::from(Span::styled(
2323            text,
2324            Style::default()
2325                .fg(Color::Cyan)
2326                .add_modifier(Modifier::BOLD),
2327        ))
2328    }
2329    // Helper: column definition entry
2330    fn col(name: &str, desc: &str) -> Line<'static> {
2331        Line::from(vec![
2332            Span::styled(
2333                format!("{:<8}", name),
2334                Style::default()
2335                    .fg(Color::White)
2336                    .add_modifier(Modifier::BOLD),
2337            ),
2338            Span::raw(desc.to_string()),
2339        ])
2340    }
2341    // Helper: indented sub-entry
2342    fn sub(prefix: &str, desc: &str, color: Color) -> Line<'static> {
2343        Line::from(vec![
2344            Span::styled(format!("          {}", prefix), Style::default().fg(color)),
2345            Span::raw(format!(" {}", desc)),
2346        ])
2347    }
2348
2349    // ═══ LEFT PANEL: Matrix Columns ═══
2350    let left_text = vec![
2351        section("═══ MATRIX COLUMNS (28) ═══"),
2352        Line::from(""),
2353        subsection("── Identity & Status ──"),
2354        col("PPID", "Parent PID — groups threads by launcher"),
2355        col("PID", "Thread ID (per-thread, not process)"),
2356        col("ST", "Task status:"),
2357        sub(
2358            "●",
2359            "Alive — actively scheduled, has telemetry",
2360            Color::Green,
2361        ),
2362        sub("●HOG", "Hog — CPU hog detection, HOG class", Color::Red),
2363        sub(
2364            "●BG",
2365            "Background — low-priority noise task",
2366            Color::Rgb(255, 165, 0),
2367        ),
2368        sub(
2369            "○",
2370            "Idle — in sysinfo but no BPF telemetry",
2371            Color::DarkGray,
2372        ),
2373        sub("✗", "Dead — exited since last refresh", Color::DarkGray),
2374        col("COMM", "Thread name (first 15 chars, from /proc)"),
2375        col("CLS", "CAKE class assignment:"),
2376        sub(
2377            "GAME",
2378            "Game family member (Steam/Wine detected)",
2379            Color::Green,
2380        ),
2381        sub("NORM", "Normal interactive task (default)", Color::Blue),
2382        sub(
2383            "HOG",
2384            "CPU hog (high PELT, low voluntary yield)",
2385            Color::Yellow,
2386        ),
2387        sub("BG", "Background noise (low PELT, infrequent)", Color::Red),
2388        col("TGID", "Thread Group ID (process that owns thread)"),
2389        Line::from(""),
2390        subsection("── Timing ──"),
2391        col("VCSW", "Voluntary context switches (high = GPU/IO)"),
2392        col("AVGRT", "PELT util_avg (0-1024) — kernel CPU usage"),
2393        col("MAXRT", "Max runtime seen this interval (µs)"),
2394        col("GAP", "Dispatch gap: time between runs (µs)"),
2395        col("JITTER", "Avg jitter: variance in inter-run gap (µs)"),
2396        col("WAIT", "Last DSQ wait before scheduling (µs)"),
2397        col("RUNS/s", "Runs per second — scheduling frequency"),
2398        Line::from(""),
2399        subsection("── Placement ──"),
2400        col("CPU", "Last CPU core this task ran on (Cxx)"),
2401        col("MIGR/s", "CPU migrations per second"),
2402        Line::from(""),
2403        subsection("── Callback Overhead (ns) ──"),
2404        col("SEL", "select_cpu: gate cascade to find idle CPU"),
2405        col("ENQ", "enqueue: vtime calc + DSQ insert kfunc"),
2406        col("STOP", "stopping: PELT classify + staging + DRR"),
2407        col("RUN", "running: mailbox writes + arena telemetry"),
2408        Line::from(""),
2409        subsection("── Gate Distribution (%) ──"),
2410        col("G1", "Gate 1: prev_cpu idle — direct dispatch"),
2411        col("G3", "Gate 3: kernel scx_select_cpu_dfl fallback"),
2412        col("DSQ", "Tunnel: all busy → LLC DSQ vtime ordering"),
2413        Line::from(""),
2414        subsection("── Quantum Completion (%) ──"),
2415        col("Q%F", "Full: slice exhausted (preempted at expiry)"),
2416        col("Q%Y", "Yield: voluntarily slept before expiry"),
2417        col("Q%P", "Preempt: forcibly kicked mid-slice"),
2418        Line::from(""),
2419        subsection("── EEVDF ──"),
2420        col("WAKER", "PID of last waker (0 = self/kernel-woken)"),
2421        col("NICE", "Nice tier: N0=baseline, N-x=high, N+x=low"),
2422        sub(
2423            "N-x",
2424            "Higher priority (lower nice, weight > 100)",
2425            Color::LightGreen,
2426        ),
2427        sub("N0", "Baseline (nice 0, weight = 100)", Color::DarkGray),
2428        sub(
2429            "N+x",
2430            "Lower priority (higher nice, weight < 100)",
2431            Color::LightRed,
2432        ),
2433    ];
2434
2435    let left_paragraph = Paragraph::new(left_text)
2436        .block(
2437            Block::default()
2438                .title(" Matrix Columns ")
2439                .title_style(
2440                    Style::default()
2441                        .fg(Color::Yellow)
2442                        .add_modifier(Modifier::BOLD),
2443                )
2444                .borders(Borders::ALL)
2445                .border_style(Style::default().fg(Color::Blue))
2446                .border_type(BorderType::Rounded),
2447        )
2448        .wrap(Wrap { trim: false });
2449
2450    // ═══ RIGHT PANEL: Dump Fields + Profile + Keys ═══
2451    let right_text = vec![
2452        section("═══ DUMP / COPY FIELDS ═══"),
2453        Line::from(""),
2454        subsection("── Per-Callback Stopwatch (ns) ──"),
2455        col("gate_cas", "select_cpu: full gate cascade duration"),
2456        col("idle_prb", "select_cpu: winning gate idle probe cost"),
2457        col("vtime_cm", "enqueue: vtime + tier weighting overhead"),
2458        col("mbox", "running: per-CPU mailbox CL0 write burst"),
2459        col("classify", "stopping: tier classify + DRR + deficit"),
2460        col("vtime_st", "stopping: dsq_vtime bit packing + write"),
2461        col("warm", "stopping: warm CPU ring shift (migration)"),
2462        Line::from(""),
2463        subsection("── Extended Detail Fields ──"),
2464        col("DIRECT", "Direct dispatch count (bypassed DSQ)"),
2465        col("DEFICIT", "DRR++ deficit (µs) — 0=yielder, max=bulk"),
2466        col("SUTIL", "Slice util % (actual_run / slice)"),
2467        col("LLC", "Last LLC (L3 cache) node"),
2468        col("STREAK", "Consecutive same-CPU runs (locality)"),
2469        col("WHIST", "Wait histogram: <10µ/<100µ/<1m/≥1ms"),
2470        Line::from(""),
2471        section("═══ CALLBACK PROFILE ═══"),
2472        Line::from(""),
2473        col("stopping", "PELT classify + staging + warm history"),
2474        sub(
2475            "skip",
2476            "98.4% — confidence gate skips reclassify",
2477            Color::DarkGray,
2478        ),
2479        sub(
2480            "classify",
2481            "~1.6% — full PELT (every 64th stop)",
2482            Color::DarkGray,
2483        ),
2484        col("running", "Mailbox stamping + arena telemetry"),
2485        col("enqueue", "Vtime + scx_bpf_dsq_insert_vtime"),
2486        col("select", "Gate cascade probing idle CPUs"),
2487        col("dispatch", "Per-LLC DSQ consume + cross-LLC steal"),
2488        Line::from(""),
2489        section("═══ KEY BINDINGS ═══"),
2490        Line::from(""),
2491        col("←/→ Tab", "Switch tabs"),
2492        col("↑/↓ j/k", "Scroll task list / navigate"),
2493        col("Enter", "Open Task Inspector for selected task"),
2494        col("r", "Sort by Runtime"),
2495        col("g", "Sort by Gate 1 %"),
2496        col("c", "Sort by CPU / Copy to clipboard"),
2497        col("p", "Sort by PID"),
2498        col("s", "Sort by runs/Second"),
2499        col("a", "Toggle all tasks vs BPF-tracked only"),
2500        col("d", "Dump to file (tui_dump_*.txt)"),
2501        col("b", "Run BenchLab benchmark iteration"),
2502        col("q / Esc", "Quit scx_cake"),
2503        Line::from(""),
2504        subsection("── Scheduler States ──"),
2505        sub(
2506            "IDLE",
2507            "No game detected — standard scheduling",
2508            Color::DarkGray,
2509        ),
2510        sub("COMPILE", "≥2 compiler procs at ≥78% PELT", Color::Yellow),
2511        sub(
2512            "GAMING",
2513            "Game detected — full priority system",
2514            Color::Green,
2515        ),
2516    ];
2517
2518    let right_paragraph = Paragraph::new(right_text)
2519        .block(
2520            Block::default()
2521                .title(" Fields & Keybindings ")
2522                .title_style(
2523                    Style::default()
2524                        .fg(Color::Yellow)
2525                        .add_modifier(Modifier::BOLD),
2526                )
2527                .borders(Borders::ALL)
2528                .border_style(Style::default().fg(Color::Blue))
2529                .border_type(BorderType::Rounded),
2530        )
2531        .wrap(Wrap { trim: false });
2532
2533    frame.render_widget(left_paragraph, cols[0]);
2534    frame.render_widget(right_paragraph, cols[1]);
2535}
2536
2537fn draw_bench_tab(frame: &mut Frame, app: &mut TuiApp, area: Rect) {
2538    // (index, name, category, source: K=kernel kfunc, C=cake custom code)
2539    // Groups: kfunc baseline first, then cake replacements. SPEED is per-group.
2540    let bench_items: &[(usize, &str, &str, &str)] = &[
2541        // Timing: all available clock sources
2542        (0, "bpf_ktime_get_ns()", "Timing", "K"),
2543        (1, "scx_bpf_now()", "Timing", "K"),
2544        (24, "bpf_ktime_get_boot_ns()", "Timing", "K"),
2545        (10, "Timing harness (cal)", "Timing", "C"),
2546        // Task Lookup: kfunc vs arena direct access
2547        (3, "bpf_task_from_pid()", "Task Lookup", "K"),
2548        (29, "bpf_get_current_task_btf()", "Task Lookup", "K"),
2549        (36, "bpf_task_storage_get()", "Task Lookup", "K"),
2550        (6, "get_task_ctx() [arena]", "Task Lookup", "C"),
2551        (22, "get_task_ctx+arena CL0", "Task Lookup", "C"),
2552        // Process Info: kfunc alternatives
2553        (28, "bpf_get_current_pid_tgid()", "Process Info", "K"),
2554        (30, "bpf_get_current_comm()", "Process Info", "K"),
2555        (14, "task_struct p->scx+nvcsw", "Process Info", "K"),
2556        (32, "scx_bpf_task_running(p)", "Process Info", "K"),
2557        (33, "scx_bpf_task_cpu(p)", "Process Info", "K"),
2558        (46, "Arena tctx.pid+tgid", "Process Info", "C"),
2559        (47, "Mbox CL0 cached_cpu", "Process Info", "C"),
2560        // CPU Identification: kfunc vs mailbox cached
2561        (2, "bpf_get_smp_proc_id()", "CPU ID", "K"),
2562        (31, "bpf_get_numa_node_id()", "CPU ID", "K"),
2563        (11, "Mbox CL0 cached CPU", "CPU ID", "C"),
2564        // Idle Probing: kfunc vs cake probes
2565        (4, "test_and_clear_idle()", "Idle Probing", "K"),
2566        (37, "scx_bpf_pick_idle_cpu()", "Idle Probing", "K"),
2567        (38, "idle_cpumask get+put", "Idle Probing", "K"),
2568        (19, "idle_probe(remote) MESI", "Idle Probing", "C"),
2569        (20, "smtmask read-only check", "Idle Probing", "C"),
2570        // Data Read: kernel struct vs cake data paths
2571        (8, "BSS global_stats[cpu]", "Data Read", "C"),
2572        (9, "Arena per_cpu.mbox", "Data Read", "C"),
2573        (15, "RODATA llc+quantum_ns", "Data Read", "C"),
2574        // Mailbox CL0: cake's Disruptor handoff variants
2575        (12, "Mbox CL0 tctx+deref", "Mailbox CL0", "C"),
2576        (18, "CL0 ptr+fused+packed", "Mailbox CL0", "C"),
2577        (21, "Disruptor CL0 full read", "Mailbox CL0", "C"),
2578        // Composite: cake-only multi-step operations
2579        (16, "Bitflag shift+mask+brless", "Composite Ops", "C"),
2580        (17, "(reserved, was compute_ewma)", "Composite Ops", "C"),
2581        // DVFS / Performance: CPU frequency queries
2582        (35, "scx_bpf_cpuperf_cur(cpu)", "DVFS / Perf", "K"),
2583        (42, "scx_bpf_cpuperf_cap(cpu)", "DVFS / Perf", "K"),
2584        (45, "RODATA cpuperf_cap[cpu]", "DVFS / Perf", "C"),
2585        // Topology Constants: kfunc vs RODATA
2586        (5, "scx_bpf_nr_cpu_ids()", "Topology", "K"),
2587        (34, "scx_bpf_nr_node_ids()", "Topology", "K"),
2588        (43, "RODATA nr_cpus const", "Topology", "C"),
2589        (44, "RODATA nr_nodes const", "Topology", "C"),
2590        // Standalone Kfuncs: reference costs
2591        (7, "scx_bpf_dsq_nr_queued()", "Standalone Kfuncs", "K"),
2592        (13, "ringbuf reserve+discard", "Standalone Kfuncs", "K"),
2593        (39, "scx_bpf_kick_cpu(self)", "Standalone Kfuncs", "K"),
2594        // Synchronization: lock/RNG costs
2595        (41, "bpf_spin_lock+unlock", "Synchronization", "K"),
2596        (40, "bpf_get_prandom_u32()", "Synchronization", "K"),
2597        (48, "CL0 lock-free 3-field", "Synchronization", "C"),
2598        (49, "BSS xorshift32 PRNG", "Synchronization", "C"),
2599        // TLB/Memory: arena access pattern cost
2600        (23, "Arena stride (TLB/hugepage)", "TLB/Memory", "C"),
2601        // Kernel Free Data: zero-cost task_struct field reads
2602        (50, "PELT util+runnable_avg", "Kernel Free Data", "K"),
2603        (51, "PELT runnable_avg only", "Kernel Free Data", "K"),
2604        (52, "schedstats nr_wakeups", "Kernel Free Data", "K"),
2605        (53, "p->policy+prio+flags", "Kernel Free Data", "K"),
2606        (54, "PELT read+tier classify", "Kernel Free Data", "K"),
2607        // End-to-End Workflow Comparisons
2608        (55, "task_storage write+read", "Storage Roundtrip", "C"),
2609        (56, "Arena write+read", "Storage Roundtrip", "C"),
2610        (57, "3-probe cascade (cake)", "Idle Selection", "C"),
2611        (58, "pick_idle_cpu full", "Idle Selection", "K"),
2612        (59, "Weight classify (bpfland)", "Classification", "C"),
2613        (60, "Lat-cri classify (lavd)", "Classification", "C"),
2614        (61, "SMT: cake sib probe", "SMT Probing", "C"),
2615        (62, "SMT: cpumask probe", "SMT Probing", "K"),
2616        // ═══ Fairness Fixes (cold-cache + remote) ═══
2617        (63, "storage_get COLD ~est", "Cold Cache", "K"),
2618        (64, "PELT classify COLD", "Cold Cache", "K"),
2619        (65, "legacy EWMA COLD", "Cold Cache", "C"),
2620        (66, "kick_cpu REMOTE ~est", "Cold Cache", "K"),
2621    ];
2622
2623    // Pre-compute percentiles: sort once per entry, extract p1/p50/p99 together.
2624    // Old approach sorted 3× per entry per frame (7200 sorts/sec at 60fps) — killed navigation.
2625    let percentiles_for = |samples: &[u64]| -> (u64, u64, u64) {
2626        if samples.is_empty() {
2627            return (0, 0, 0);
2628        }
2629        let mut sorted = samples.to_vec();
2630        sorted.sort_unstable();
2631        let len = sorted.len() as f64 - 1.0;
2632        let p1 = sorted[((1.0 / 100.0 * len).round() as usize).min(sorted.len() - 1)];
2633        let p50 = sorted[((50.0 / 100.0 * len).round() as usize).min(sorted.len() - 1)];
2634        let p99 = sorted[((99.0 / 100.0 * len).round() as usize).min(sorted.len() - 1)];
2635        (p1, p50, p99)
2636    };
2637
2638    let age_s = if app.bench_timestamp > 0 {
2639        let uptime = app.start_time.elapsed().as_nanos() as u64;
2640        format!(
2641            "{:.1}s ago",
2642            (uptime.saturating_sub(app.bench_timestamp)) as f64 / 1e9
2643        )
2644    } else {
2645        "never".to_string()
2646    };
2647
2648    let header = Row::new(vec![
2649        Cell::from("HELPER").style(
2650            Style::default()
2651                .fg(Color::Yellow)
2652                .add_modifier(Modifier::BOLD),
2653        ),
2654        Cell::from("MIN").style(
2655            Style::default()
2656                .fg(Color::Cyan)
2657                .add_modifier(Modifier::BOLD),
2658        ),
2659        Cell::from("P1 LOW").style(
2660            Style::default()
2661                .fg(Color::Green)
2662                .add_modifier(Modifier::BOLD),
2663        ),
2664        Cell::from("P50 MED").style(
2665            Style::default()
2666                .fg(Color::LightCyan)
2667                .add_modifier(Modifier::BOLD),
2668        ),
2669        Cell::from("AVG").style(
2670            Style::default()
2671                .fg(Color::Cyan)
2672                .add_modifier(Modifier::BOLD),
2673        ),
2674        Cell::from("P1 HIGH").style(
2675            Style::default()
2676                .fg(Color::LightRed)
2677                .add_modifier(Modifier::BOLD),
2678        ),
2679        Cell::from("MAX").style(Style::default().fg(Color::Red).add_modifier(Modifier::BOLD)),
2680        Cell::from("JITTER").style(
2681            Style::default()
2682                .fg(Color::LightMagenta)
2683                .add_modifier(Modifier::BOLD),
2684        ),
2685        Cell::from("SPEED").style(
2686            Style::default()
2687                .fg(Color::White)
2688                .add_modifier(Modifier::BOLD),
2689        ),
2690    ])
2691    .height(1);
2692
2693    let mut rows: Vec<Row> = Vec::new();
2694    let mut last_cat = "";
2695    let mut cat_baseline: u64 = 0; // per-category baseline AVG
2696
2697    for &(idx, name, cat, src) in bench_items {
2698        if cat != last_cat {
2699            last_cat = cat;
2700            cat_baseline = 0; // reset for new category
2701            rows.push(
2702                Row::new(vec![Cell::from(format!("▸ {}", cat)).style(
2703                    Style::default()
2704                        .fg(Color::White)
2705                        .add_modifier(Modifier::BOLD | Modifier::UNDERLINED),
2706                )])
2707                .height(1),
2708            );
2709        }
2710
2711        let (min_ns, max_ns, total_ns, _last_val) = app.bench_entries[idx];
2712        if app.bench_iterations == 0 || total_ns == 0 {
2713            rows.push(Row::new(vec![
2714                Cell::from(format!("  [{}] {}", src, name))
2715                    .style(Style::default().fg(Color::DarkGray)),
2716                Cell::from("--"),
2717                Cell::from("--"),
2718                Cell::from("--"),
2719                Cell::from("--"),
2720                Cell::from("--"),
2721                Cell::from("--"),
2722                Cell::from("--"),
2723                Cell::from("--"),
2724            ]));
2725            continue;
2726        }
2727
2728        let avg_ns = total_ns / app.bench_iterations as u64;
2729        let samples = &app.bench_samples[idx];
2730        let (p1, p50, p99) = percentiles_for(samples);
2731        let jitter = max_ns.saturating_sub(min_ns);
2732
2733        let speedup = if cat_baseline == 0 {
2734            cat_baseline = avg_ns.max(1);
2735            "base".to_string()
2736        } else if avg_ns > 0 {
2737            format!("{:.1}×", cat_baseline as f64 / avg_ns as f64)
2738        } else {
2739            "--".to_string()
2740        };
2741
2742        // Color: green if faster than baseline, yellow if comparable, white otherwise
2743        let color = if cat_baseline == avg_ns || cat_baseline == 0 {
2744            Color::Yellow // baseline entry
2745        } else if avg_ns < cat_baseline / 2 {
2746            Color::Green // >2× faster
2747        } else if avg_ns < cat_baseline {
2748            Color::Cyan // faster
2749        } else {
2750            Color::White // slower or same
2751        };
2752
2753        rows.push(Row::new(vec![
2754            Cell::from(format!("  [{}] {}", src, name)).style(Style::default().fg(color)),
2755            Cell::from(format!("{}ns", min_ns)).style(Style::default().fg(Color::Cyan)),
2756            Cell::from(format!("{}ns", p1)).style(Style::default().fg(Color::Green)),
2757            Cell::from(format!("{}ns", p50)).style(Style::default().fg(Color::LightCyan)),
2758            Cell::from(format!("{}ns", avg_ns)).style(Style::default().fg(color)),
2759            Cell::from(format!("{}ns", p99)).style(Style::default().fg(Color::LightRed)),
2760            Cell::from(format!("{}ns", max_ns)).style(Style::default().fg(Color::Red)),
2761            Cell::from(format!("{}ns", jitter)).style(Style::default().fg(Color::LightMagenta)),
2762            Cell::from(speedup).style(Style::default().fg(Color::White)),
2763        ]));
2764    }
2765
2766    let table = Table::new(
2767        rows,
2768        [
2769            Constraint::Length(34), // Name
2770            Constraint::Length(8),  // MIN
2771            Constraint::Length(8),  // P1 LOW
2772            Constraint::Length(9),  // P50 MED
2773            Constraint::Length(8),  // AVG
2774            Constraint::Length(9),  // P1 HIGH
2775            Constraint::Length(8),  // MAX
2776            Constraint::Length(10), // JITTER
2777            Constraint::Length(7),  // SPEED
2778        ],
2779    )
2780    .header(header)
2781    .block(
2782        Block::default()
2783            .borders(Borders::ALL)
2784            .border_style(Style::default().fg(Color::DarkGray))
2785            .border_type(BorderType::Rounded)
2786            .title(ratatui::text::Span::styled(
2787                format!(
2788                    " ⚡ BenchLab  [b=run]  Runs: {}  Samples: {}  CPU: {}  Ran: {} ",
2789                    app.bench_run_count, app.bench_iterations, app.bench_cpu, age_s
2790                ),
2791                Style::default()
2792                    .fg(Color::Yellow)
2793                    .add_modifier(Modifier::BOLD),
2794            )),
2795    )
2796    .row_highlight_style(Style::default().add_modifier(Modifier::REVERSED))
2797    .highlight_symbol(">> ");
2798
2799    // Split area into header and table
2800    let bench_layout = Layout::default()
2801        .direction(Direction::Vertical)
2802        .constraints([
2803            Constraint::Length(7), // System Info Header (6 lines + 1 padding)
2804            Constraint::Min(0),    // Bench table
2805        ])
2806        .split(area);
2807
2808    let info_text = app.system_info.format_header();
2809    let info_paragraph = Paragraph::new(info_text)
2810        .style(Style::default().fg(Color::DarkGray))
2811        .block(Block::default().padding(Padding::new(1, 1, 0, 1)));
2812
2813    frame.render_widget(info_paragraph, bench_layout[0]);
2814    frame.render_stateful_widget(table, bench_layout[1], &mut app.bench_table_state);
2815}
2816
2817/// Run a core-to-core latency benchmark using atomic ping-pong.
2818/// Hot loop uses only `wrapping_add(1)` — no multiply or checked add —
2819/// so debug builds don't inflate measurements with overflow checks.
2820/// Runs 3 attempts per pair with warmup, takes the minimum.
2821fn run_core_latency_bench(nr_cpus: usize) -> Vec<Vec<f64>> {
2822    let mut matrix = vec![vec![0.0f64; nr_cpus]; nr_cpus];
2823    const ITERATIONS: u64 = 5000;
2824    const WARMUP: u64 = 500;
2825    const RUNS: usize = 3;
2826
2827    #[allow(clippy::needless_range_loop)]
2828    for i in 0..nr_cpus {
2829        for j in (i + 1)..nr_cpus {
2830            let mut best = f64::MAX;
2831
2832            for _run in 0..RUNS {
2833                let flag = Arc::new(AtomicU64::new(0));
2834                let flag_a = flag.clone();
2835                let flag_b = flag.clone();
2836                let core_a = i;
2837                let core_b = j;
2838
2839                // Thread A: pinger
2840                let handle_a = thread::spawn(move || {
2841                    unsafe {
2842                        let mut set: libc::cpu_set_t = std::mem::zeroed();
2843                        libc::CPU_SET(core_a, &mut set);
2844                        libc::sched_setaffinity(0, std::mem::size_of::<libc::cpu_set_t>(), &set);
2845                    }
2846
2847                    // Warmup — same code path as measurement, just uncounted
2848                    let mut val = 0u64;
2849                    for _ in 0..WARMUP {
2850                        val = val.wrapping_add(1); // odd: ping
2851                        flag_a.store(val, Ordering::Release);
2852                        val = val.wrapping_add(1); // even: expect pong
2853                        while flag_a.load(Ordering::Acquire) != val {
2854                            std::hint::spin_loop();
2855                        }
2856                    }
2857
2858                    // Measured run — zero arithmetic in hot path
2859                    let start = std::time::Instant::now();
2860                    for _ in 0..ITERATIONS {
2861                        val = val.wrapping_add(1);
2862                        flag_a.store(val, Ordering::Release);
2863                        val = val.wrapping_add(1);
2864                        while flag_a.load(Ordering::Acquire) != val {
2865                            std::hint::spin_loop();
2866                        }
2867                    }
2868                    start.elapsed().as_nanos() as f64 / ITERATIONS as f64
2869                });
2870
2871                // Thread B: ponger
2872                let handle_b = thread::spawn(move || {
2873                    unsafe {
2874                        let mut set: libc::cpu_set_t = std::mem::zeroed();
2875                        libc::CPU_SET(core_b, &mut set);
2876                        libc::sched_setaffinity(0, std::mem::size_of::<libc::cpu_set_t>(), &set);
2877                    }
2878
2879                    let mut val = 0u64;
2880                    // Warmup
2881                    for _ in 0..WARMUP {
2882                        val = val.wrapping_add(1); // odd: expect ping
2883                        while flag_b.load(Ordering::Acquire) != val {
2884                            std::hint::spin_loop();
2885                        }
2886                        val = val.wrapping_add(1); // even: pong
2887                        flag_b.store(val, Ordering::Release);
2888                    }
2889
2890                    // Measured run
2891                    for _ in 0..ITERATIONS {
2892                        val = val.wrapping_add(1);
2893                        while flag_b.load(Ordering::Acquire) != val {
2894                            std::hint::spin_loop();
2895                        }
2896                        val = val.wrapping_add(1);
2897                        flag_b.store(val, Ordering::Release);
2898                    }
2899                });
2900
2901                let latency_ns = handle_a.join().unwrap_or(f64::MAX);
2902                let _ = handle_b.join();
2903                if latency_ns < best {
2904                    best = latency_ns;
2905                }
2906            }
2907
2908            // Each round-trip = 2 hops, one-way = half
2909            let one_way = best / 2.0;
2910            matrix[i][j] = one_way;
2911            matrix[j][i] = one_way;
2912        }
2913    }
2914    matrix
2915}
2916
2917/// Run the TUI event loop
2918pub fn run_tui(
2919    skel: &mut BpfSkel,
2920    shutdown: Arc<AtomicBool>,
2921    interval_secs: u64,
2922    topology: TopologyInfo,
2923    latency_matrix: Vec<Vec<f64>>,
2924) -> Result<()> {
2925    let mut terminal = setup_terminal()?;
2926    let mut app = TuiApp::new(topology, latency_matrix);
2927    let mut tick_rate = Duration::from_secs(interval_secs);
2928    // Backdate last_tick so the first loop instantly populates the matrix
2929    let mut last_tick = Instant::now()
2930        .checked_sub(tick_rate)
2931        .unwrap_or(Instant::now());
2932
2933    // Initialize clipboard (may fail on headless systems)
2934    let mut clipboard = Clipboard::new().ok();
2935
2936    loop {
2937        // Check for shutdown signal
2938        if shutdown.load(Ordering::Relaxed) {
2939            break;
2940        }
2941
2942        // Check for UEI exit
2943        if scx_utils::uei_exited!(skel, uei) {
2944            break;
2945        }
2946
2947        // Get current stats (aggregate from per-cpu BSS array)
2948        let stats = aggregate_stats(skel);
2949
2950        // Read bench results from BSS (for BenchLab tab)
2951        if let Some(bss) = &skel.maps.bss_data {
2952            let br = &bss.bench_results;
2953            if br.bench_timestamp > 0 && br.bench_timestamp != app.last_bench_timestamp {
2954                // New bench results — accumulate (merge min/max, sum totals)
2955                app.last_bench_timestamp = br.bench_timestamp;
2956                app.bench_cpu = br.cpu;
2957                app.bench_run_count += 1;
2958                app.bench_timestamp = br.bench_timestamp;
2959                for i in 0..67 {
2960                    let new_min = br.entries[i].min_ns;
2961                    let new_max = br.entries[i].max_ns;
2962                    let new_total = br.entries[i].total_ns;
2963                    let new_value = br.entries[i].last_value;
2964                    let (old_min, old_max, old_total, _) = app.bench_entries[i];
2965                    app.bench_entries[i] = (
2966                        if app.bench_run_count == 1 {
2967                            new_min
2968                        } else {
2969                            old_min.min(new_min)
2970                        },
2971                        old_max.max(new_max),
2972                        old_total + new_total,
2973                        new_value,
2974                    );
2975                    // Accumulate raw samples for percentile computation
2976                    for s in 0..8 {
2977                        let sample = br.entries[i].samples[s];
2978                        if sample > 0 {
2979                            app.bench_samples[i].push(sample);
2980                        }
2981                    }
2982                }
2983                app.bench_iterations += br.iterations;
2984            }
2985        }
2986
2987        // Poll for core-to-core latency benchmark completion
2988        if let Some(handle) = app.bench_latency_handle.take() {
2989            if handle.is_finished() {
2990                match handle.join() {
2991                    Ok(matrix) => {
2992                        app.latency_matrix = matrix;
2993                        app.set_status("✓ Core-to-core latency benchmark complete");
2994                    }
2995                    Err(_) => {
2996                        app.set_status("✗ Latency benchmark failed");
2997                    }
2998                }
2999            } else {
3000                // Not done yet, put it back
3001                app.bench_latency_handle = Some(handle);
3002            }
3003        }
3004
3005        // Draw UI
3006        terminal.draw(|frame| draw_ui(frame, &mut app, &stats))?;
3007
3008        // Handle events with timeout
3009        let timeout = tick_rate.saturating_sub(last_tick.elapsed());
3010        if event::poll(timeout)? {
3011            if let Event::Key(key) = event::read()? {
3012                if key.kind == KeyEventKind::Press {
3013                    match key.code {
3014                        KeyCode::Char('q') | KeyCode::Esc => {
3015                            shutdown.store(true, Ordering::Relaxed);
3016                            break;
3017                        }
3018                        KeyCode::Enter => {
3019                            // Toggle collapse/expand for selected row's PPID group
3020                            if app.active_tab == TuiTab::Dashboard {
3021                                if let Some(i) = app.table_state.selected() {
3022                                    if let Some(pid) = app.sorted_pids.get(i) {
3023                                        if let Some(row) = app.task_rows.get(pid) {
3024                                            let ppid = row.ppid;
3025                                            if ppid > 0 {
3026                                                if app.collapsed_ppids.contains(&ppid) {
3027                                                    app.collapsed_ppids.remove(&ppid);
3028                                                } else {
3029                                                    app.collapsed_ppids.insert(ppid);
3030                                                }
3031                                            }
3032                                        }
3033                                    }
3034                                }
3035                            }
3036                        }
3037                        KeyCode::Tab | KeyCode::Right => {
3038                            app.next_tab();
3039                        }
3040                        KeyCode::BackTab | KeyCode::Left => {
3041                            app.previous_tab();
3042                        }
3043                        KeyCode::Down | KeyCode::PageDown => match app.active_tab {
3044                            TuiTab::BenchLab => app.scroll_bench_down(),
3045                            _ => app.scroll_table_down(),
3046                        },
3047                        KeyCode::Up | KeyCode::PageUp => match app.active_tab {
3048                            TuiTab::BenchLab => app.scroll_bench_up(),
3049                            _ => app.scroll_table_up(),
3050                        },
3051                        KeyCode::Char('t') | KeyCode::Char('T')
3052                            if key.modifiers.is_empty()
3053                                || key.modifiers == crossterm::event::KeyModifiers::SHIFT =>
3054                        {
3055                            match app.active_tab {
3056                                TuiTab::BenchLab => app.bench_table_state.select(Some(0)),
3057                                _ => app.table_state.select(Some(0)),
3058                            }
3059                        }
3060                        KeyCode::Char(' ') => {
3061                            // Toggle collapse/expand for selected row's TGID group
3062                            if app.active_tab == TuiTab::Dashboard {
3063                                if let Some(i) = app.table_state.selected() {
3064                                    if let Some(pid) = app.sorted_pids.get(i) {
3065                                        if let Some(row) = app.task_rows.get(pid) {
3066                                            let tgid = if row.tgid > 0 { row.tgid } else { *pid };
3067                                            if app.collapsed_tgids.contains(&tgid) {
3068                                                app.collapsed_tgids.remove(&tgid);
3069                                            } else {
3070                                                app.collapsed_tgids.insert(tgid);
3071                                            }
3072                                        }
3073                                    }
3074                                }
3075                            }
3076                        }
3077                        KeyCode::Char('x') => {
3078                            // Toggle fold all: collapse all PPIDs, or expand all if already collapsed
3079                            if app.active_tab == TuiTab::Dashboard {
3080                                if app.collapsed_ppids.is_empty() {
3081                                    // Collapse all — collect all unique PPIDs
3082                                    let ppids: Vec<u32> = app
3083                                        .task_rows
3084                                        .values()
3085                                        .filter(|r| r.ppid > 0)
3086                                        .map(|r| r.ppid)
3087                                        .collect();
3088                                    for ppid in ppids {
3089                                        app.collapsed_ppids.insert(ppid);
3090                                    }
3091                                    app.set_status("Folded all PPID groups");
3092                                } else {
3093                                    app.collapsed_ppids.clear();
3094                                    app.set_status("Unfolded all PPID groups");
3095                                }
3096                            }
3097                        }
3098                        KeyCode::Char('s') => {
3099                            app.cycle_sort();
3100                        }
3101                        KeyCode::Char('S') => {
3102                            app.sort_descending = !app.sort_descending;
3103                            let dir = if app.sort_descending {
3104                                "descending"
3105                            } else {
3106                                "ascending"
3107                            };
3108                            app.set_status(&format!("Sort: {}", dir));
3109                        }
3110                        KeyCode::Char('+') | KeyCode::Char('=') => {
3111                            // Faster refresh: halve tick_rate (min 250ms)
3112                            let current_ms = tick_rate.as_millis() as u64;
3113                            if current_ms > 250 {
3114                                tick_rate = Duration::from_millis(current_ms / 2);
3115                                app.set_status(&format!("Refresh: {}ms", tick_rate.as_millis()));
3116                            }
3117                        }
3118                        KeyCode::Char('-') => {
3119                            // Slower refresh: double tick_rate (max 5s)
3120                            let current_ms = tick_rate.as_millis() as u64;
3121                            if current_ms < 5000 {
3122                                tick_rate = Duration::from_millis(current_ms * 2);
3123                                app.set_status(&format!("Refresh: {}ms", tick_rate.as_millis()));
3124                            }
3125                        }
3126                        KeyCode::Char('c') => {
3127                            // Copy ACTIVE TAB data to clipboard (tab-aware)
3128                            let text = match app.active_tab {
3129                                TuiTab::BenchLab => format_bench_for_clipboard(&app),
3130                                _ => format_stats_for_clipboard(&stats, &app),
3131                            };
3132                            match &mut clipboard {
3133                                Some(cb) => match cb.set_text(text) {
3134                                    Ok(_) => app.set_status(&format!(
3135                                        "✓ Copied {:?} tab to clipboard!",
3136                                        app.active_tab
3137                                    )),
3138                                    Err(_) => app.set_status("✗ Failed to copy"),
3139                                },
3140                                None => app.set_status("✗ Clipboard not available"),
3141                            }
3142                        }
3143                        KeyCode::Char('d') => {
3144                            // Dump full snapshot to timestamped file
3145                            let text = format_stats_for_clipboard(&stats, &app);
3146                            let secs = std::time::SystemTime::now()
3147                                .duration_since(std::time::UNIX_EPOCH)
3148                                .unwrap_or_default()
3149                                .as_secs();
3150                            let filename = format!("tui_dump_{}.txt", secs);
3151                            match std::fs::write(&filename, &text) {
3152                                Ok(_) => app.set_status(&format!("✓ Dumped to {}", filename)),
3153                                Err(e) => app.set_status(&format!("✗ Dump failed: {}", e)),
3154                            }
3155                        }
3156                        KeyCode::Char('r') => {
3157                            // Reset stats (clear the BSS array)
3158                            if let Some(bss) = &mut skel.maps.bss_data {
3159                                for s in &mut bss.global_stats {
3160                                    *s = Default::default();
3161                                }
3162                                app.set_status("✓ Stats reset");
3163                            }
3164                        }
3165                        KeyCode::Char('b') => {
3166                            if app.active_tab == TuiTab::Topology
3167                                && app.bench_latency_handle.is_none()
3168                            {
3169                                // Core-to-core latency benchmark (Topology tab)
3170                                let nr_cpus = app.topology.nr_cpus;
3171                                app.bench_latency_handle =
3172                                    Some(thread::spawn(move || run_core_latency_bench(nr_cpus)));
3173                                app.set_status("⏱ Running core-to-core latency benchmark...");
3174                            } else if app.active_tab != TuiTab::Topology {
3175                                // Trigger kfunc benchmark (BenchLab)
3176                                if let Some(bss) = &mut skel.maps.bss_data {
3177                                    bss.bench_request = 1;
3178                                    app.set_status(&format!(
3179                                        "⚡ BenchLab: run #{} queued...",
3180                                        app.bench_run_count + 1
3181                                    ));
3182                                }
3183                            }
3184                        }
3185                        KeyCode::Char('f') => {
3186                            app.toggle_filter();
3187                            if app.show_all_tasks {
3188                                app.set_status("Filter: ALL tasks");
3189                            } else {
3190                                app.set_status("Filter: BPF-tracked only");
3191                            }
3192                        }
3193                        _ => {}
3194                    }
3195                }
3196            }
3197        }
3198
3199        if last_tick.elapsed() >= tick_rate {
3200            // Skip dashboard updates while c2c benchmark is running —
3201            // avoids BPF map reads and sysinfo polls that could create noise
3202            if app.bench_latency_handle.is_some() {
3203                last_tick = std::time::Instant::now();
3204                terminal.draw(|frame| draw_ui(frame, &mut app, &Default::default()))?;
3205                continue;
3206            }
3207            // Hardware Poll Vector
3208            app.sys.refresh_cpu_usage();
3209            app.sys
3210                .refresh_processes(sysinfo::ProcessesToUpdate::All, true);
3211            app.components.refresh(true);
3212
3213            // Map thermals by sorting them (assuming matching logical order, or picking the best reading per CPU)
3214            // On AMD/Intel, core temps usually align with `core_id`. Let's grab Tdie or Core X temps.
3215            let mut temp_map: HashMap<usize, f32> = HashMap::new();
3216            for comp in &app.components {
3217                let name = comp.label().to_lowercase();
3218                if name.contains("core") || name.contains("tctl") || name.contains("cpu") {
3219                    // Try to extract core number (e.g. "core 0" or "Tctl")
3220                    if let Some(core_id) = name
3221                        .split_whitespace()
3222                        .last()
3223                        .and_then(|s| s.parse::<usize>().ok())
3224                    {
3225                        if let Some(temp) = comp.temperature() {
3226                            temp_map.insert(core_id, temp);
3227                        }
3228                    } else if temp_map.is_empty() {
3229                        // If we can't parse core ID, stash a global fallback temp at ID 0
3230                        if let Some(temp) = comp.temperature() {
3231                            temp_map.insert(0, temp);
3232                        }
3233                    }
3234                }
3235            }
3236
3237            for (i, cpu) in app.sys.cpus().iter().enumerate() {
3238                if i < app.topology.nr_cpus {
3239                    let load = cpu.cpu_usage();
3240                    // If per-core temp is missing, fallback to 0 (global) or 0.0
3241                    let temp = temp_map
3242                        .get(&(i / 2))
3243                        .copied()
3244                        .or_else(|| temp_map.get(&0).copied())
3245                        .unwrap_or(0.0);
3246                    app.cpu_stats[i] = (load, temp);
3247                }
3248            }
3249
3250            // --- Attach cake_task_iter BPF iterator (once, at first tick) ---
3251            // cake_task_iter is SEC("iter/task") — no map_fd needed.
3252            // We store the raw *mut bpf_link as usize in a static to avoid lifetime issues.
3253            // bpf_program__attach_iter(prog, NULL) → *mut bpf_link (NULL = task iter, no map).
3254            static mut TASK_ITER_LINK_RAW: usize = 0; // 0 = uninit, 1 = failed, else ptr
3255            if unsafe { TASK_ITER_LINK_RAW } == 0 {
3256                // AsRawLibbpf trait: as_libbpf_object() → NonNull<bpf_program> → .as_ptr()
3257                use libbpf_rs::AsRawLibbpf;
3258                let link_ptr = unsafe {
3259                    libbpf_rs::libbpf_sys::bpf_program__attach_iter(
3260                        skel.progs.cake_task_iter.as_libbpf_object().as_ptr(),
3261                        std::ptr::null(),
3262                    )
3263                };
3264                unsafe {
3265                    TASK_ITER_LINK_RAW = if link_ptr.is_null() {
3266                        1 // sentinel: attach failed, don't retry
3267                    } else {
3268                        link_ptr as usize
3269                    };
3270                }
3271            }
3272
3273            // --- Arena Telemetry Sweep (via cake_task_iter bpf_iter_task) ---
3274            // Track currently active PIDs in this sweep to prune dead tasks
3275            app.active_pids_buf.clear();
3276
3277            let link_raw = unsafe { TASK_ITER_LINK_RAW };
3278            if link_raw > 1 {
3279                // bpf_iter_create(link_fd: c_int) — get fd from the stored *mut bpf_link
3280                let link_fd_c = unsafe {
3281                    libbpf_rs::libbpf_sys::bpf_link__fd(
3282                        link_raw as *mut libbpf_rs::libbpf_sys::bpf_link,
3283                    )
3284                };
3285                let iter_fd = unsafe { libbpf_rs::libbpf_sys::bpf_iter_create(link_fd_c) };
3286                if iter_fd >= 0 {
3287                    // Read cake_iter_record structs sequentially from the iter fd
3288                    use std::os::unix::io::FromRawFd;
3289                    let mut f = unsafe { std::fs::File::from_raw_fd(iter_fd) };
3290                    let rec_size = std::mem::size_of::<crate::bpf_intf::cake_iter_record>();
3291                    let mut buf = vec![0u8; rec_size];
3292                    use std::io::Read;
3293                    while f.read_exact(&mut buf).is_ok() {
3294                        let rec: crate::bpf_intf::cake_iter_record =
3295                            unsafe { std::ptr::read_unaligned(buf.as_ptr() as *const _) };
3296
3297                        let pid = rec.telemetry.pid_inner;
3298                        let ppid = rec.ppid;
3299                        let packed = rec.packed_info;
3300                        let tier = (packed >> 28) & 0x03;
3301                        let is_hog = (packed >> 27) & 1 != 0;
3302                        let is_bg = (packed >> 22) & 1 != 0;
3303
3304                        if pid == 0 || tier > 3 {
3305                            continue;
3306                        }
3307
3308                        app.active_pids_buf.insert(pid);
3309
3310                        let comm_bytes: [u8; 16] =
3311                            unsafe { std::mem::transmute(rec.telemetry.comm) };
3312                        let comm = match std::ffi::CStr::from_bytes_until_nul(&comm_bytes) {
3313                            Ok(c) => c.to_string_lossy().into_owned(),
3314                            Err(_) => String::from_utf8_lossy(&comm_bytes)
3315                                .trim_end_matches('\0')
3316                                .to_string(),
3317                        };
3318
3319                        // pelt_util and deficit_us now directly in cake_iter_record
3320                        let pelt_util = rec.pelt_util as u32;
3321                        let deficit_us: u32 = rec.deficit_us as u32;
3322
3323                        let g1 = rec.telemetry.gate_1_hits;
3324                        let g2 = rec.telemetry.gate_2_hits;
3325                        let g1w = rec.telemetry.gate_1w_hits;
3326                        let g3 = rec.telemetry.gate_3_hits;
3327                        let g1p = rec.telemetry.gate_1p_hits;
3328                        let g1c = rec.telemetry.gate_1c_hits;
3329                        let g1cp = rec.telemetry.gate_1cp_hits;
3330                        let g1d = rec.telemetry.gate_1d_hits;
3331                        let g1wc = rec.telemetry.gate_1wc_hits;
3332                        let g5 = rec.telemetry.gate_tun_hits;
3333                        let total_sel = g1 + g2 + g1w + g3 + g1p + g1c + g1cp + g1d + g1wc + g5;
3334                        let gate_hit_pcts = if total_sel > 0 {
3335                            [
3336                                (g1 as f64 / total_sel as f64) * 100.0,
3337                                (g2 as f64 / total_sel as f64) * 100.0,
3338                                (g1w as f64 / total_sel as f64) * 100.0,
3339                                (g3 as f64 / total_sel as f64) * 100.0,
3340                                (g1p as f64 / total_sel as f64) * 100.0,
3341                                (g1c as f64 / total_sel as f64) * 100.0,
3342                                (g1cp as f64 / total_sel as f64) * 100.0,
3343                                (g1d as f64 / total_sel as f64) * 100.0,
3344                                (g1wc as f64 / total_sel as f64) * 100.0,
3345                                (g5 as f64 / total_sel as f64) * 100.0,
3346                            ]
3347                        } else {
3348                            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
3349                        };
3350                        let total_runs = rec.telemetry.total_runs;
3351                        let jitter_accum_ns = rec.telemetry.jitter_accum_ns;
3352
3353                        let row = app
3354                            .task_rows
3355                            .entry(pid)
3356                            .or_insert_with(|| TaskTelemetryRow {
3357                                pid,
3358                                comm: comm.clone(),
3359                                tier: tier as u8,
3360                                pelt_util,
3361                                deficit_us,
3362                                wait_duration_ns: rec.telemetry.wait_duration_ns,
3363                                select_cpu_ns: rec.telemetry.select_cpu_duration_ns,
3364                                enqueue_ns: rec.telemetry.enqueue_duration_ns,
3365                                gate_hit_pcts,
3366                                core_placement: rec.telemetry.core_placement,
3367                                dsq_insert_ns: rec.telemetry.dsq_insert_ns,
3368                                migration_count: rec.telemetry.migration_count,
3369                                preempt_count: rec.telemetry.preempt_count,
3370                                yield_count: rec.telemetry.yield_count,
3371                                total_runs,
3372                                jitter_accum_ns,
3373                                direct_dispatch_count: rec.telemetry.direct_dispatch_count,
3374                                enqueue_count: rec.telemetry.enqueue_count,
3375                                cpumask_change_count: rec.telemetry.cpumask_change_count,
3376                                stopping_duration_ns: rec.telemetry.stopping_duration_ns,
3377                                running_duration_ns: rec.telemetry.running_duration_ns,
3378                                max_runtime_us: rec.telemetry.max_runtime_us,
3379                                dispatch_gap_us: rec.telemetry.dispatch_gap_ns / 1000,
3380                                max_dispatch_gap_us: rec.telemetry.max_dispatch_gap_ns / 1000,
3381                                wait_hist: [
3382                                    rec.telemetry.wait_hist_lt10us,
3383                                    rec.telemetry.wait_hist_lt100us,
3384                                    rec.telemetry.wait_hist_lt1ms,
3385                                    rec.telemetry.wait_hist_ge1ms,
3386                                ],
3387                                runs_per_sec: 0.0,
3388                                migrations_per_sec: 0.0,
3389                                status: TaskStatus::Alive,
3390                                is_bpf_tracked: true,
3391                                tgid: rec.telemetry.tgid,
3392                                slice_util_pct: rec.telemetry.slice_util_pct,
3393                                llc_id: rec.telemetry.llc_id,
3394                                same_cpu_streak: rec.telemetry.same_cpu_streak,
3395                                wakeup_source_pid: rec.telemetry.wakeup_source_pid,
3396                                nvcsw_delta: rec.telemetry.nvcsw_delta,
3397                                nivcsw_delta: rec.telemetry.nivcsw_delta,
3398                                _pad_recomp: rec.telemetry._pad_recomp,
3399                                is_hog,
3400                                is_bg,
3401                                ppid,
3402                                gate_cascade_ns: rec.telemetry.gate_cascade_ns,
3403                                idle_probe_ns: rec.telemetry.idle_probe_ns,
3404                                vtime_compute_ns: rec.telemetry.vtime_compute_ns,
3405                                mbox_staging_ns: rec.telemetry.mbox_staging_ns,
3406                                _pad_ewma: rec.telemetry._pad_ewma,
3407                                classify_ns: rec.telemetry.classify_ns,
3408                                vtime_staging_ns: rec.telemetry.vtime_staging_ns,
3409                                warm_history_ns: rec.telemetry.warm_history_ns,
3410                                quantum_full_count: rec.telemetry.quantum_full_count,
3411                                quantum_yield_count: rec.telemetry.quantum_yield_count,
3412                                quantum_preempt_count: rec.telemetry.quantum_preempt_count,
3413                                waker_cpu: rec.telemetry.waker_cpu,
3414                                waker_tgid: rec.telemetry.waker_tgid,
3415                                cpu_run_count: rec.telemetry.cpu_run_count,
3416                                is_game_member: false,
3417                                vtime_mult: rec.vtime_mult,
3418                            });
3419
3420                        // Update dynamic row elements
3421                        row.tier = tier as u8;
3422                        row.pelt_util = pelt_util;
3423                        row.deficit_us = deficit_us;
3424                        row.wait_duration_ns = rec.telemetry.wait_duration_ns;
3425                        row.select_cpu_ns = rec.telemetry.select_cpu_duration_ns;
3426                        row.enqueue_ns = rec.telemetry.enqueue_duration_ns;
3427                        row.gate_hit_pcts = gate_hit_pcts;
3428                        row.core_placement = rec.telemetry.core_placement;
3429                        row.dsq_insert_ns = rec.telemetry.dsq_insert_ns;
3430                        row.migration_count = rec.telemetry.migration_count;
3431                        row.preempt_count = rec.telemetry.preempt_count;
3432                        row.yield_count = rec.telemetry.yield_count;
3433                        row.total_runs = total_runs;
3434                        row.jitter_accum_ns = jitter_accum_ns;
3435                        row.direct_dispatch_count = rec.telemetry.direct_dispatch_count;
3436                        row.enqueue_count = rec.telemetry.enqueue_count;
3437                        row.cpumask_change_count = rec.telemetry.cpumask_change_count;
3438                        row.stopping_duration_ns = rec.telemetry.stopping_duration_ns;
3439                        row.running_duration_ns = rec.telemetry.running_duration_ns;
3440                        row.max_runtime_us = rec.telemetry.max_runtime_us;
3441                        row.dispatch_gap_us = rec.telemetry.dispatch_gap_ns / 1000;
3442                        row.max_dispatch_gap_us = rec.telemetry.max_dispatch_gap_ns / 1000;
3443                        row.wait_hist = [
3444                            rec.telemetry.wait_hist_lt10us,
3445                            rec.telemetry.wait_hist_lt100us,
3446                            rec.telemetry.wait_hist_lt1ms,
3447                            rec.telemetry.wait_hist_ge1ms,
3448                        ];
3449                        row.is_bpf_tracked = true;
3450                        row.slice_util_pct = rec.telemetry.slice_util_pct;
3451                        row.llc_id = rec.telemetry.llc_id;
3452                        row.same_cpu_streak = rec.telemetry.same_cpu_streak;
3453                        row.wakeup_source_pid = rec.telemetry.wakeup_source_pid;
3454                        row._pad_recomp = rec.telemetry._pad_recomp;
3455                        row.is_hog = is_hog;
3456                        row.is_bg = is_bg;
3457                        row.is_game_member = app.tracked_game_tgid > 0
3458                            && (row.tgid == app.tracked_game_tgid
3459                                || (row.ppid > 0 && row.ppid == app.tracked_game_ppid));
3460                        row.ppid = ppid;
3461                        row.vtime_mult = rec.vtime_mult;
3462                        row.gate_cascade_ns = rec.telemetry.gate_cascade_ns;
3463                        row.idle_probe_ns = rec.telemetry.idle_probe_ns;
3464                        row.vtime_compute_ns = rec.telemetry.vtime_compute_ns;
3465                        row.mbox_staging_ns = rec.telemetry.mbox_staging_ns;
3466                        row._pad_ewma = rec.telemetry._pad_ewma;
3467                        row.classify_ns = rec.telemetry.classify_ns;
3468                        row.vtime_staging_ns = rec.telemetry.vtime_staging_ns;
3469                        row.warm_history_ns = rec.telemetry.warm_history_ns;
3470                        row.quantum_full_count = rec.telemetry.quantum_full_count;
3471                        row.quantum_yield_count = rec.telemetry.quantum_yield_count;
3472                        row.quantum_preempt_count = rec.telemetry.quantum_preempt_count;
3473                        row.waker_cpu = rec.telemetry.waker_cpu;
3474                        row.waker_tgid = rec.telemetry.waker_tgid;
3475                        row.cpu_run_count = rec.telemetry.cpu_run_count;
3476                    } // end read loop
3477                      // f drops here, closing the iter fd automatically
3478                } // end if iter_fd >= 0
3479            } // end if link_ptr > 0
3480
3481            // --- Inject ALL System PIDs (Fallback) ---
3482            // Ensures visibility for PIDs that never triggered cake_init_task
3483            let sysinfo_pids: std::collections::HashSet<u32> =
3484                app.sys.processes().keys().map(|p| p.as_u32()).collect();
3485
3486            for (pid, process) in app.sys.processes() {
3487                let pid_u32 = pid.as_u32();
3488                app.task_rows
3489                    .entry(pid_u32)
3490                    .or_insert_with(|| TaskTelemetryRow {
3491                        pid: pid_u32,
3492                        comm: process.name().to_string_lossy().to_string(),
3493                        tier: 3,
3494                        ..Default::default()
3495                    });
3496            }
3497
3498            // --- Liveness Detection: cross-reference arena with sysinfo ---
3499            let mut bpf_count = 0usize;
3500            for (pid, row) in app.task_rows.iter_mut() {
3501                let in_sysinfo = sysinfo_pids.contains(pid);
3502                row.status = if row.is_bpf_tracked && in_sysinfo {
3503                    TaskStatus::Alive
3504                } else if in_sysinfo {
3505                    TaskStatus::Idle
3506                } else {
3507                    TaskStatus::Dead
3508                };
3509                if row.is_bpf_tracked && row.total_runs > 0 {
3510                    bpf_count += 1;
3511                }
3512            }
3513            app.bpf_task_count = bpf_count;
3514
3515            // --- Game Detection: aggregate yields per PPID, pick winner ---
3516            // Proton/Wine: all siblings (wineserver, game.exe, winedevice)
3517            // share the same parent (pv-adverb). Aggregating by PPID means
3518            // wineserver's yields + game yields combine, giving a stronger
3519            // signal and ensuring the entire Wine prefix is detected as a family.
3520            //
3521            // GATE: Minimum 5 threads at PPID-family level.
3522            //   Prevents idle browsers (1-3 yield-active threads) from qualifying.
3523            //   Games under Proton easily satisfy: game.exe + wineserver +
3524            //   winedevice + render workers = 5+ threads always.
3525            //   Native Linux games also easily satisfy (main + audio + IO + render).
3526            //
3527            // CONFIDENCE THROTTLE (Rule 40): After GAME_CONFIDENCE_THRESHOLD
3528            //   stable polls with the same PPID winning, reduce detection sweep
3529            //   to every GAME_CONFIDENCE_SKIP-th poll (~2s effective instead of 500ms).
3530            //   Resets immediately on game exit or PPID switch.
3531            const GAME_MIN_THREADS: usize = 5;
3532            const GAME_CONFIDENCE_THRESHOLD: u32 = 20; // 20 × 500ms = 10s stable
3533            const GAME_CONFIDENCE_SKIP: u32 = 4; // check every 4th poll when confident
3534
3535            // Game exit detection fires before the throttle check so a dead game
3536            // always clears on the very next poll, regardless of confidence state.
3537            if app.tracked_game_tgid > 0 {
3538                let proc_path = format!("/proc/{}", app.tracked_game_tgid);
3539                if !std::path::Path::new(&proc_path).exists() {
3540                    app.tracked_game_tgid = 0;
3541                    app.tracked_game_ppid = 0;
3542                    app.game_thread_count = 0;
3543                    app.game_name.clear();
3544                    app.game_challenger_ppid = 0;
3545                    app.game_challenger_since = None;
3546                    app.game_stable_polls = 0;
3547                    app.game_skip_counter = 0;
3548                    app.game_confidence = 0;
3549                }
3550            }
3551
3552            // Confidence throttle: if we've been stable long enough, skip
3553            // the full PPID aggregation sweep on most polls.
3554            let should_skip_sweep = app.game_stable_polls >= GAME_CONFIDENCE_THRESHOLD
3555                && app.game_skip_counter > 0
3556                && app.game_skip_counter < GAME_CONFIDENCE_SKIP;
3557
3558            if should_skip_sweep {
3559                // Confident path: reuse existing winner, just bump skip counter.
3560                // BPF write still happens below unconditionally.
3561                app.game_skip_counter += 1;
3562            } else {
3563                // Full detection sweep.
3564                app.game_skip_counter = 0;
3565
3566                // --- Three-phase game detection ---
3567                // Priority: Steam (100) → Wine .exe (90) → yield fallback (50)
3568                //
3569                // Phase 1 + 2 scan qualifying PPIDs (≥GAME_MIN_THREADS threads with
3570                // any activity). No yield threshold required for Steam/.exe —
3571                // the binary signal is definitive on its own.
3572                //
3573                // Phase 3 (yield fallback) removed: yield-heavy non-games (Brave, Chrome,
3574                // IDEs, Electron apps) too easily exceed the threshold and false-positive.
3575
3576                // Reusable Steam env probe (cold path, ~1 file read per PPID).
3577                let has_steam_env = |pid: u32| -> bool {
3578                    if let Ok(env) = std::fs::read(format!("/proc/{}/environ", pid)) {
3579                        env.split(|&b| b == 0)
3580                            .filter_map(|kv| std::str::from_utf8(kv).ok())
3581                            .any(|s| s.starts_with("SteamGameId=") || s.starts_with("STEAM_GAME="))
3582                    } else {
3583                        false
3584                    }
3585                };
3586
3587                // Reusable .exe probe (cold path, ~1 file read per PPID).
3588                let has_exe_cmdline = |pid: u32| -> bool {
3589                    if let Ok(cmdline) = std::fs::read(format!("/proc/{}/cmdline", pid)) {
3590                        cmdline
3591                            .split(|&b| b == 0)
3592                            .filter_map(|arg| std::str::from_utf8(arg).ok())
3593                            .any(|s| s.to_lowercase().ends_with(".exe"))
3594                    } else {
3595                        false
3596                    }
3597                };
3598
3599                // Known Steam infrastructure comms — never game processes.
3600                const STEAM_INFRA: &[&str] = &[
3601                    "steam",
3602                    "steamwebhelper",
3603                    "pressure-vessel",
3604                    "pv-bwrap",
3605                    "reaper",
3606                ];
3607
3608                // Aggregate thread counts by PPID for Phase 1 + 2 thread-count gate.
3609                let mut ppid_data: std::collections::HashMap<u32, usize> =
3610                    std::collections::HashMap::new(); // ppid → thread_count
3611                for (_pid, row) in app.task_rows.iter() {
3612                    if row.status == TaskStatus::Dead || row.ppid == 0 {
3613                        continue;
3614                    }
3615                    *ppid_data.entry(row.ppid).or_insert(0) += 1;
3616                }
3617
3618                // Phase 1: Steam scan — highest priority, no yield threshold.
3619                // Covers: Proton games, native Linux Steam games (CS2, Dota 2),
3620                // Battle.net/Epic via Steam. SteamGameId= is the definitive signal.
3621                // Filter: skip PPID groups where ALL threads are Steam infrastructure.
3622                let mut steam_ppid: u32 = 0;
3623                for (&ppid, &thread_count) in &ppid_data {
3624                    if thread_count >= GAME_MIN_THREADS && has_steam_env(ppid) {
3625                        // Skip if ALL threads under this PPID are Steam infra.
3626                        let has_non_infra = app.task_rows.values().any(|row| {
3627                            row.ppid == ppid
3628                                && row.status != TaskStatus::Dead
3629                                && !STEAM_INFRA
3630                                    .iter()
3631                                    .any(|&infra| row.comm.to_lowercase().contains(infra))
3632                        });
3633                        if has_non_infra {
3634                            steam_ppid = ppid;
3635                            break;
3636                        }
3637                    }
3638                }
3639
3640                // Phase 2: .exe scan — Wine/Proton without Steam env (Heroic, Lutris, etc.).
3641                let mut exe_ppid: u32 = 0;
3642                if steam_ppid == 0 {
3643                    for (&ppid, &thread_count) in &ppid_data {
3644                        if thread_count >= GAME_MIN_THREADS && has_exe_cmdline(ppid) {
3645                            exe_ppid = ppid;
3646                            break;
3647                        }
3648                    }
3649                }
3650
3651                // Resolve winning PPID: Steam wins → .exe wins → no game.
3652                let new_game_ppid = if steam_ppid > 0 {
3653                    steam_ppid
3654                } else if exe_ppid > 0 {
3655                    exe_ppid
3656                } else {
3657                    0
3658                };
3659
3660                // Helper: resolve best TGID + name for a given PPID (cold path only).
3661                // Picks the TGID with the highest pelt_util — the game's main/render
3662                // thread runs for ms; infra processes run for µs.
3663                // Works for both Proton (.exe) and native (cs2, dota2) games.
3664                let resolve_game = |ppid: u32,
3665                                    rows: &HashMap<u32, TaskTelemetryRow>|
3666                 -> (u32, String) {
3667                    // Known infra exes to skip when selecting game TGID.
3668                    const INFRA_BLOCKLIST: &[&str] = &[
3669                        "steam",
3670                        "steamwebhelper",
3671                        "pressure-vessel",
3672                        "pv-bwrap",
3673                        "reaper",
3674                        "bash",
3675                        "sh",
3676                        "services",
3677                        "pluginhost",
3678                        "winedevice",
3679                        "rpcss",
3680                        "svchost",
3681                        "explorer",
3682                        "wineboot",
3683                        "start",
3684                        "conhost",
3685                        "dxvk-cache-me",
3686                        "crashhandler",
3687                        "unitycrashhandler64",
3688                        "werfault",
3689                        "ngen",
3690                        "mscorsvw",
3691                        "gamebarfullscreensession",
3692                        "gamebarpresencewriter",
3693                        "rundll32",
3694                        "regsvr32",
3695                        "winedbg",
3696                        "cmd",
3697                    ];
3698
3699                    // Build per-TGID max pelt_util.
3700                    let mut tgid_max_rt: std::collections::HashMap<u32, u32> =
3701                        std::collections::HashMap::new();
3702                    for (_pid, row) in rows.iter() {
3703                        if row.ppid == ppid && row.pelt_util > 0 {
3704                            let tgid = if row.tgid > 0 { row.tgid } else { row.pid };
3705                            let entry = tgid_max_rt.entry(tgid).or_insert(0);
3706                            if row.pelt_util > *entry {
3707                                *entry = row.pelt_util;
3708                            }
3709                        }
3710                    }
3711
3712                    // Sort TGIDs by max pelt_util descending; skip infra.
3713                    let mut ranked: Vec<(u32, u32)> = tgid_max_rt.into_iter().collect();
3714                    ranked.sort_unstable_by(|a, b| b.1.cmp(&a.1));
3715
3716                    let mut game_tgid: u32 = ppid; // fallback
3717                    for (tgid, _rt) in &ranked {
3718                        // Check comm against blocklist.
3719                        let comm_lc = rows
3720                            .values()
3721                            .find(|r| {
3722                                let t = if r.tgid > 0 { r.tgid } else { r.pid };
3723                                t == *tgid
3724                            })
3725                            .map(|r| r.comm.to_lowercase())
3726                            .unwrap_or_default();
3727                        if INFRA_BLOCKLIST.iter().any(|&b| comm_lc.contains(b)) {
3728                            continue;
3729                        }
3730                        game_tgid = *tgid;
3731                        break;
3732                    }
3733
3734                    // Read display name: try .exe basename (Proton), then comm (native).
3735                    let name = {
3736                        let mut n = String::from("unknown");
3737                        if let Ok(cmdline) = std::fs::read(format!("/proc/{}/cmdline", game_tgid)) {
3738                            for arg in cmdline.split(|&b| b == 0) {
3739                                if let Ok(s) = std::str::from_utf8(arg) {
3740                                    if s.to_lowercase().ends_with(".exe") {
3741                                        let basename = s.rsplit(['\\', '/']).next().unwrap_or(s);
3742                                        n = basename
3743                                            .trim_end_matches(".exe")
3744                                            .trim_end_matches(".EXE")
3745                                            .to_string();
3746                                        break;
3747                                    }
3748                                }
3749                            }
3750                        }
3751                        // Native game fallback: use comm (e.g., "cs2", "dota2").
3752                        if n == "unknown" {
3753                            if let Ok(comm) =
3754                                std::fs::read_to_string(format!("/proc/{}/comm", game_tgid))
3755                            {
3756                                n = comm.trim().to_string();
3757                            }
3758                        }
3759                        n
3760                    };
3761                    (game_tgid, name)
3762                };
3763
3764                // Confidence for the candidate comes from the winning detection phase.
3765                // Phase 1 (Steam) → 100, Phase 2 (.exe) → 90, no game → 0.
3766                let new_game_confidence: u8 = if new_game_ppid == 0 {
3767                    0
3768                } else if new_game_ppid == steam_ppid {
3769                    100
3770                } else {
3771                    90 // exe match
3772                };
3773
3774                // Holdoff by confidence tier:
3775                //   100 (Steam) → instant lock
3776                //    90 (.exe)  → 5s holdoff (Wine apps nearly always games, but brief wait)
3777                let holdoff_for_conf = |conf: u8| -> u64 {
3778                    if conf >= 100 {
3779                        0
3780                    } else {
3781                        5
3782                    }
3783                };
3784
3785                // --- Hysteresis State Machine ---
3786                // Challenger can only displace a locked game if challenger_confidence >=
3787                // locked_game_confidence. Steam (100) always beats .exe (90).
3788
3789                if app.tracked_game_tgid == 0 {
3790                    // No game locked — try to lock now.
3791                    if new_game_confidence > 0 {
3792                        let holdoff = holdoff_for_conf(new_game_confidence);
3793                        if holdoff == 0 || app.game_challenger_ppid == new_game_ppid {
3794                            // Either instant (Steam) or challenger already waited enough.
3795                            let accept = holdoff == 0
3796                                || app
3797                                    .game_challenger_since
3798                                    .is_some_and(|s| s.elapsed() >= Duration::from_secs(holdoff));
3799                            if accept {
3800                                let (tgid, name) = resolve_game(new_game_ppid, &app.task_rows);
3801                                app.tracked_game_tgid = tgid;
3802                                app.tracked_game_ppid = new_game_ppid;
3803                                app.game_thread_count =
3804                                    ppid_data.get(&new_game_ppid).copied().unwrap_or(0);
3805                                app.game_name = name;
3806                                app.game_confidence = new_game_confidence;
3807                                app.game_challenger_ppid = 0;
3808                                app.game_challenger_since = None;
3809                                app.game_stable_polls = 1;
3810                            }
3811                        } else {
3812                            // Start or continue holdoff timer.
3813                            if app.game_challenger_ppid != new_game_ppid {
3814                                app.game_challenger_ppid = new_game_ppid;
3815                                app.game_challenger_since = Some(Instant::now());
3816                            }
3817                        }
3818                    }
3819                } else if new_game_ppid == app.tracked_game_ppid {
3820                    // Same game family still winning — update thread count.
3821                    app.game_thread_count = ppid_data.get(&new_game_ppid).copied().unwrap_or(0);
3822                    // GAME SWAP FIX (C): preserve active challenger timer.
3823                    // When two games coexist (Game A dying + Game B starting), HashMap
3824                    // iteration non-determinism alternates the scan winner. Resetting
3825                    // the challenger here kills Game B's holdoff timer every other poll.
3826                    if app.game_challenger_ppid == 0 {
3827                        app.game_stable_polls = app.game_stable_polls.saturating_add(1);
3828                    }
3829                } else if new_game_confidence > 0 && new_game_confidence >= app.game_confidence {
3830                    // GAME SWAP FIX (B): equal-or-higher confidence can contest.
3831                    // Handles: close Game A → launch Game B (both Steam = 100%).
3832                    // Equal confidence gets 5s holdoff to prevent HashMap flicker;
3833                    // higher confidence uses phase-based holdoff (0s Steam, 5s .exe).
3834                    app.game_stable_polls = 0;
3835                    if app.game_challenger_ppid != new_game_ppid {
3836                        app.game_challenger_ppid = new_game_ppid;
3837                        app.game_challenger_since = Some(Instant::now());
3838                    } else if let Some(since) = app.game_challenger_since {
3839                        let holdoff = if new_game_confidence > app.game_confidence {
3840                            holdoff_for_conf(new_game_confidence)
3841                        } else {
3842                            5 // Equal confidence: 5s holdoff prevents HashMap flicker
3843                        };
3844                        if since.elapsed() >= Duration::from_secs(holdoff) {
3845                            let (tgid, name) = resolve_game(new_game_ppid, &app.task_rows);
3846                            app.tracked_game_tgid = tgid;
3847                            app.tracked_game_ppid = new_game_ppid;
3848                            app.game_thread_count =
3849                                ppid_data.get(&new_game_ppid).copied().unwrap_or(0);
3850                            app.game_name = name;
3851                            app.game_confidence = new_game_confidence;
3852                            app.game_challenger_ppid = 0;
3853                            app.game_challenger_since = None;
3854                            app.game_stable_polls = 1;
3855                        }
3856                    }
3857                } else {
3858                    // No qualifying candidate or lower-confidence challenger — hold current.
3859                    app.game_challenger_ppid = 0;
3860                    app.game_challenger_since = None;
3861                    app.game_stable_polls = 0;
3862                }
3863            }
3864
3865            // Write game state to BPF BSS — drives all scheduling decisions.
3866            // game_ppid is the primary family signal (includes wineserver + siblings).
3867            // game_tgid written for display/existence checks.
3868            // sched_state drives HOG/bg/vprot/quantum policy.
3869            if let Some(bss) = &mut skel.maps.bss_data {
3870                bss.game_tgid = app.tracked_game_tgid;
3871                bss.game_ppid = app.tracked_game_ppid;
3872                bss.game_confidence = app.game_confidence;
3873                // --- State machine: GAMING > COMPILATION > IDLE ---
3874                const CAKE_STATE_IDLE: u8 = 0;
3875                const CAKE_STATE_COMPILATION: u8 = 1;
3876                const CAKE_STATE_GAMING: u8 = 2;
3877
3878                let new_state = if app.tracked_game_tgid > 0 {
3879                    CAKE_STATE_GAMING
3880                } else {
3881                    // Detect active compiler processes: PELT util >= 800 (~78% CPU) AND
3882                    // comm matches a known compiler binary. Require >=2 to avoid
3883                    // false positives from a single transient ld/as invocation.
3884                    const COMPILE_COMMS: &[&str] = &[
3885                        "cc1", "rustc", "clang", "clang++", "ld", "ld.lld", "lld", "ninja",
3886                        "cmake", "as", "gcc", "g++", "link",
3887                    ];
3888                    let compile_count = app
3889                        .task_rows
3890                        .values()
3891                        .filter(|r| {
3892                            r.status != TaskStatus::Dead
3893                                && r.pelt_util >= 800
3894                                && COMPILE_COMMS.iter().any(|&c| r.comm.contains(c))
3895                        })
3896                        .count();
3897                    app.compile_task_count = compile_count;
3898                    if compile_count >= 2 {
3899                        CAKE_STATE_COMPILATION
3900                    } else {
3901                        CAKE_STATE_IDLE
3902                    }
3903                };
3904                app.sched_state = new_state;
3905                bss.sched_state = new_state as u32;
3906                // Sync sched_state_local to all per-CPU BSS entries (Rule 78).
3907                // Eliminates remote global BSS cache line fetch at 5 BPF hot-path sites.
3908                // Cold path: runs every 500ms in TUI poll — bounded staleness.
3909                for i in 0..app.topology.nr_cpus.min(bss.cpu_bss.len()) {
3910                    bss.cpu_bss[i].sched_state_local = new_state;
3911                }
3912                // Precompute quantum ceiling alongside sched_state (Rule 5: no BPF branch).
3913                // COMPILATION → 8ms, else → 2ms. Values match intf.h constants.
3914                bss.quantum_ceiling_ns = if new_state == CAKE_STATE_COMPILATION {
3915                    8_000_000 // AQ_BULK_CEILING_COMPILE_NS
3916                } else {
3917                    2_000_000 // AQ_BULK_CEILING_NS
3918                };
3919            }
3920
3921            // --- Delta Mode: compute per-second rates ---
3922            let actual_elapsed = last_tick.elapsed().as_secs_f64().max(0.1);
3923            for (pid, row) in app.task_rows.iter_mut() {
3924                if let Some(&(prev_runs, prev_migr)) = app.prev_deltas.get(pid) {
3925                    let d_runs = row.total_runs.saturating_sub(prev_runs);
3926                    let d_migr = row.migration_count.saturating_sub(prev_migr);
3927                    row.runs_per_sec = d_runs as f64 / actual_elapsed;
3928                    row.migrations_per_sec = d_migr as f64 / actual_elapsed;
3929                }
3930            }
3931            // Lightweight delta snapshot: only (total_runs, migration_count)
3932            // Eliminates ~500 String allocs/drops per tick from deep-cloning task_rows
3933            app.prev_deltas.clear();
3934            for (pid, row) in app.task_rows.iter() {
3935                app.prev_deltas
3936                    .insert(*pid, (row.total_runs, row.migration_count));
3937            }
3938
3939            // --- Arena diagnostics ---
3940            app.arena_max = 0; // arena max not tracked via iter path
3941            app.arena_active = app.active_pids_buf.len();
3942
3943            /* EXPLICITLY DISABLED: Dead Tasks are no longer removed so users can view
3944             * the absolute hardware scheduling history of all tasks on the system.
3945             * app.task_rows.retain(|pid, _| active_pids.contains(pid)); */
3946
3947            // Re-sort with smart ordering:
3948            //   - Primary: BPF-tracked (is_bpf_tracked) descending
3949            //   - Secondary: current sort column
3950            //   - Dead tasks always last
3951            let mut sorted_pids: Vec<u32> = if app.show_all_tasks {
3952                app.task_rows.keys().copied().collect()
3953            } else {
3954                // Filter: only BPF-tracked tasks with total_runs > 0
3955                app.task_rows
3956                    .iter()
3957                    .filter(|(_, row)| row.is_bpf_tracked && row.total_runs > 0)
3958                    .map(|(pid, _)| *pid)
3959                    .collect()
3960            };
3961            // Apply sort with direction support
3962            let desc = app.sort_descending;
3963            match app.sort_column {
3964                SortColumn::RunDuration => sorted_pids.sort_by(|a, b| {
3965                    let r_a = app.task_rows.get(a).unwrap();
3966                    let r_b = app.task_rows.get(b).unwrap();
3967                    let cmp = r_b.pelt_util.cmp(&r_a.pelt_util);
3968                    if desc {
3969                        cmp
3970                    } else {
3971                        cmp.reverse()
3972                    }
3973                }),
3974                SortColumn::Gate1Pct => sorted_pids.sort_by(|a, b| {
3975                    let r_a = app.task_rows.get(a).unwrap();
3976                    let r_b = app.task_rows.get(b).unwrap();
3977                    let cmp = r_b.gate_hit_pcts[0]
3978                        .partial_cmp(&r_a.gate_hit_pcts[0])
3979                        .unwrap_or(std::cmp::Ordering::Equal);
3980                    if desc {
3981                        cmp
3982                    } else {
3983                        cmp.reverse()
3984                    }
3985                }),
3986                SortColumn::TargetCpu => sorted_pids.sort_by(|a, b| {
3987                    let r_a = app.task_rows.get(a).unwrap();
3988                    let r_b = app.task_rows.get(b).unwrap();
3989                    let cmp = r_a.core_placement.cmp(&r_b.core_placement);
3990                    if desc {
3991                        cmp.reverse()
3992                    } else {
3993                        cmp
3994                    }
3995                }),
3996                SortColumn::Pid => sorted_pids.sort_by(|a, b| {
3997                    let cmp = a.cmp(b);
3998                    if desc {
3999                        cmp.reverse()
4000                    } else {
4001                        cmp
4002                    }
4003                }),
4004                SortColumn::SelectCpu => sorted_pids.sort_by(|a, b| {
4005                    let r_a = app.task_rows.get(a).unwrap();
4006                    let r_b = app.task_rows.get(b).unwrap();
4007                    let cmp = r_b.select_cpu_ns.cmp(&r_a.select_cpu_ns);
4008                    if desc {
4009                        cmp
4010                    } else {
4011                        cmp.reverse()
4012                    }
4013                }),
4014                SortColumn::Enqueue => sorted_pids.sort_by(|a, b| {
4015                    let r_a = app.task_rows.get(a).unwrap();
4016                    let r_b = app.task_rows.get(b).unwrap();
4017                    let cmp = r_b.enqueue_ns.cmp(&r_a.enqueue_ns);
4018                    if desc {
4019                        cmp
4020                    } else {
4021                        cmp.reverse()
4022                    }
4023                }),
4024                SortColumn::Jitter => sorted_pids.sort_by(|a, b| {
4025                    let r_a = app.task_rows.get(a).unwrap();
4026                    let r_b = app.task_rows.get(b).unwrap();
4027                    let j_a = if r_a.total_runs > 0 {
4028                        r_a.jitter_accum_ns / r_a.total_runs as u64
4029                    } else {
4030                        0
4031                    };
4032                    let j_b = if r_b.total_runs > 0 {
4033                        r_b.jitter_accum_ns / r_b.total_runs as u64
4034                    } else {
4035                        0
4036                    };
4037                    let cmp = j_b.cmp(&j_a);
4038                    if desc {
4039                        cmp
4040                    } else {
4041                        cmp.reverse()
4042                    }
4043                }),
4044                SortColumn::Tier => sorted_pids.sort_by(|a, b| {
4045                    let r_a = app.task_rows.get(a).unwrap();
4046                    let r_b = app.task_rows.get(b).unwrap();
4047                    let cmp = r_a.tier.cmp(&r_b.tier);
4048                    if desc {
4049                        cmp
4050                    } else {
4051                        cmp.reverse()
4052                    }
4053                }),
4054                SortColumn::Pelt => sorted_pids.sort_by(|a, b| {
4055                    let r_a = app.task_rows.get(a).unwrap();
4056                    let r_b = app.task_rows.get(b).unwrap();
4057                    let cmp = r_b.pelt_util.cmp(&r_a.pelt_util);
4058                    if desc {
4059                        cmp
4060                    } else {
4061                        cmp.reverse()
4062                    }
4063                }),
4064                SortColumn::Vcsw => sorted_pids.sort_by(|a, b| {
4065                    let r_a = app.task_rows.get(a).unwrap();
4066                    let r_b = app.task_rows.get(b).unwrap();
4067                    let cmp = r_b.nvcsw_delta.cmp(&r_a.nvcsw_delta);
4068                    if desc {
4069                        cmp
4070                    } else {
4071                        cmp.reverse()
4072                    }
4073                }),
4074                SortColumn::Hog => sorted_pids.sort_by(|a, b| {
4075                    let r_a = app.task_rows.get(a).unwrap();
4076                    let r_b = app.task_rows.get(b).unwrap();
4077                    // Hogs first when descending
4078                    let cmp = (r_b.is_hog as u8).cmp(&(r_a.is_hog as u8));
4079                    if desc {
4080                        cmp
4081                    } else {
4082                        cmp.reverse()
4083                    }
4084                }),
4085                SortColumn::RunsPerSec => sorted_pids.sort_by(|a, b| {
4086                    let r_a = app.task_rows.get(a).unwrap();
4087                    let r_b = app.task_rows.get(b).unwrap();
4088                    let cmp = r_b
4089                        .runs_per_sec
4090                        .partial_cmp(&r_a.runs_per_sec)
4091                        .unwrap_or(std::cmp::Ordering::Equal);
4092                    if desc {
4093                        cmp
4094                    } else {
4095                        cmp.reverse()
4096                    }
4097                }),
4098                SortColumn::Gap => sorted_pids.sort_by(|a, b| {
4099                    let r_a = app.task_rows.get(a).unwrap();
4100                    let r_b = app.task_rows.get(b).unwrap();
4101                    let cmp = r_b.dispatch_gap_us.cmp(&r_a.dispatch_gap_us);
4102                    if desc {
4103                        cmp
4104                    } else {
4105                        cmp.reverse()
4106                    }
4107                }),
4108            }
4109
4110            // TGID grouping: stable-sort by tgid so threads of the
4111            // same process stay adjacent. The first thread in each
4112            // group (after the primary sort) defines the group rank,
4113            // so processes with high-priority threads sort first.
4114            let mut tgid_rank: std::collections::HashMap<u32, usize> =
4115                std::collections::HashMap::new();
4116            for (i, pid) in sorted_pids.iter().enumerate() {
4117                if let Some(row) = app.task_rows.get(pid) {
4118                    let tgid = if row.tgid > 0 { row.tgid } else { *pid };
4119                    tgid_rank.entry(tgid).or_insert(i);
4120                }
4121            }
4122            // Pin game-family rows to the top, preserving the user's sort order within
4123            // each group. Uses stable_sort so relative order is unchanged.
4124            if app.tracked_game_tgid > 0 {
4125                sorted_pids.sort_by(|a, b| {
4126                    let gm_a = app.task_rows.get(a).is_some_and(|r| r.is_game_member);
4127                    let gm_b = app.task_rows.get(b).is_some_and(|r| r.is_game_member);
4128                    // true sorts before false (1 > 0), so game members come first.
4129                    gm_b.cmp(&gm_a)
4130                });
4131            }
4132
4133            sorted_pids.sort_by(|a, b| {
4134                let r_a = app.task_rows.get(a).unwrap();
4135                let r_b = app.task_rows.get(b).unwrap();
4136                let tgid_a = if r_a.tgid > 0 { r_a.tgid } else { *a };
4137                let tgid_b = if r_b.tgid > 0 { r_b.tgid } else { *b };
4138                let rank_a = tgid_rank.get(&tgid_a).copied().unwrap_or(usize::MAX);
4139                let rank_b = tgid_rank.get(&tgid_b).copied().unwrap_or(usize::MAX);
4140                rank_a.cmp(&rank_b).then_with(|| {
4141                    // Within same tgid group, keep original sort order
4142                    r_b.pelt_util.cmp(&r_a.pelt_util)
4143                })
4144            });
4145
4146            app.sorted_pids = sorted_pids;
4147
4148            last_tick = Instant::now();
4149        }
4150    }
4151
4152    restore_terminal()?;
4153    Ok(())
4154}