Skip to main content

scx_cake/
topology.rs

1// SPDX-License-Identifier: GPL-2.0
2// Topology detection - CPUs, CCDs, P/E cores. Results passed to BPF as const volatile.
3
4use anyhow::Result;
5use scx_utils::{CoreType, Topology};
6
7/// Maximum supported CPUs (matches BPF array sizes)
8pub const MAX_CPUS: usize = 64;
9/// Maximum supported LLCs (matches BPF array sizes)
10pub const MAX_LLCS: usize = 8;
11
12/// Detected topology information
13#[derive(Debug, Clone)]
14pub struct TopologyInfo {
15    /// Number of online CPUs
16    pub nr_cpus: usize,
17    /// Number of physical cores (for PHYS_FIRST scan)
18    pub nr_phys_cpus: usize,
19
20    /// True if system has multiple L3 cache domains (CCDs)
21    pub has_dual_ccd: bool,
22
23    /// True if system has hybrid P/E cores (Intel hybrid or similar)
24    pub has_hybrid_cores: bool,
25
26    /// SMT enabled status
27    pub smt_enabled: bool,
28    /// Map of CPU ID -> Sibling CPU ID (or self if none/disabled)
29    pub cpu_sibling_map: [u8; MAX_CPUS],
30
31    // BPF Maps
32    pub cpu_llc_id: [u8; MAX_CPUS],
33    pub cpu_is_big: [u8; MAX_CPUS],
34    pub cpu_core_id: [u8; MAX_CPUS],
35    pub cpu_thread_bit: [u8; MAX_CPUS],
36    pub cpu_dsq_id: [u32; MAX_CPUS],
37    /// Pre-computed 64-bit mask of all CPUs in a physical core
38    pub core_cpu_mask: [u64; 32],
39    /// Bitmask requirement for a core to be "fully idle" (e.g. 0x3 for dual SMT)
40    pub core_thread_mask: [u8; 32],
41    pub llc_cpu_mask: [u64; MAX_LLCS],
42    pub big_cpu_mask: u64,
43
44    /// Heterogeneous Routing Masks
45    pub big_core_phys_mask: u64,
46    pub big_core_smt_mask: u64,
47    pub little_core_mask: u64,
48    pub vcache_llc_mask: u64,
49    pub has_vcache: bool,
50
51    // Info
52    pub cpus_per_ccd: u32,
53}
54
55pub fn detect() -> Result<TopologyInfo> {
56    // robustly detect topology using scx_utils
57    let topo = Topology::new()?;
58
59    let nr_cpus = topo.all_cpus.len();
60    let nr_llcs = topo.all_llcs.len();
61
62    // Get sibling map directly from scx_utils
63    let siblings = topo.sibling_cpus();
64    let mut cpu_sibling_map = [0u8; MAX_CPUS];
65
66    // Default to self-mapping
67    for (i, sibling) in cpu_sibling_map.iter_mut().enumerate().take(MAX_CPUS) {
68        *sibling = i as u8;
69    }
70
71    // Populate with detected siblings
72    for (cpu, &sibling) in siblings.iter().enumerate() {
73        if cpu < MAX_CPUS && sibling >= 0 {
74            let sib = sibling as usize;
75            if sib < MAX_CPUS {
76                cpu_sibling_map[cpu] = sib as u8;
77            }
78        }
79    }
80
81    let mut info = TopologyInfo {
82        nr_cpus,
83        nr_phys_cpus: topo.all_cores.len(),
84        has_dual_ccd: nr_llcs > 1,
85        has_hybrid_cores: false, // Will detect below
86        smt_enabled: topo.smt_enabled,
87        cpu_sibling_map,
88        cpu_llc_id: [0; MAX_CPUS],
89        cpu_is_big: [1; MAX_CPUS], // Default to 1 (Big) to be safe
90        cpu_core_id: [0; MAX_CPUS],
91        cpu_thread_bit: [0; MAX_CPUS],
92        cpu_dsq_id: [0; MAX_CPUS],
93        core_cpu_mask: [0; 32],
94        core_thread_mask: [0; 32],
95        llc_cpu_mask: [0; MAX_LLCS],
96        big_cpu_mask: 0,
97
98        // Heterogeneous Masks
99        big_core_phys_mask: 0,
100        big_core_smt_mask: 0,
101        little_core_mask: 0,
102        vcache_llc_mask: 0,
103        has_vcache: false,
104
105        cpus_per_ccd: 0,
106    };
107
108    // 1. Map LLCs
109    // Note: topo.all_llcs keys are arbitrary kernel IDs. We must map them to 0..MAX_LLCS-1.
110    // We'll just use a simple counter 0,1,2... as we iterate.
111    let mut llc_idx = 0;
112
113    for llc in topo.all_llcs.values() {
114        if llc_idx >= MAX_LLCS {
115            break;
116        }
117
118        let mut mask = 0u64;
119        let mut core_count = 0;
120
121        for cpu_id in llc.all_cpus.keys() {
122            let cpu = *cpu_id;
123            if cpu < MAX_CPUS {
124                info.cpu_llc_id[cpu] = llc_idx as u8;
125                mask |= 1u64 << cpu;
126                core_count += 1;
127            }
128        }
129
130        info.llc_cpu_mask[llc_idx] = mask;
131        if info.cpus_per_ccd == 0 {
132            info.cpus_per_ccd = core_count;
133        } // Estimate
134
135        llc_idx += 1;
136    }
137
138    // 2. Identify P-cores vs E-cores and V-Cache
139    info.cpu_is_big = [0; MAX_CPUS];
140    info.big_cpu_mask = 0;
141    info.big_core_phys_mask = 0;
142    info.big_core_smt_mask = 0;
143    info.little_core_mask = 0;
144    info.vcache_llc_mask = 0;
145    info.has_vcache = false;
146
147    let mut p_cores_found = 0;
148    let mut e_cores_found = 0;
149
150    for (core_id_usize, core) in &topo.all_cores {
151        let core_id = *core_id_usize;
152
153        // Determine is_big.
154        let is_big = match core.core_type {
155            CoreType::Little => 0,
156            _ => 1,
157        };
158
159        if is_big == 1 {
160            p_cores_found += 1;
161        } else {
162            e_cores_found += 1;
163        }
164
165        // Calculate SMT requirement mask for this core
166        if core_id < 32 {
167            info.core_thread_mask[core_id] = ((1u16 << core.cpus.len()) - 1) as u8;
168        }
169
170        // Iterate over CPUs in this core
171        let mut thread_idx = 0;
172        let mut sorted_cpus: Vec<_> = core.cpus.keys().collect();
173        sorted_cpus.sort();
174
175        for cpu_id in sorted_cpus {
176            let cpu = *cpu_id;
177            if cpu < MAX_CPUS {
178                info.cpu_is_big[cpu] = is_big;
179                info.cpu_core_id[cpu] = core_id as u8;
180                info.cpu_thread_bit[cpu] = 1 << thread_idx;
181                info.cpu_dsq_id[cpu] = 1000 /* CAKE_DSQ_LC_BASE */ + cpu as u32;
182
183                if core_id < 32 {
184                    info.core_cpu_mask[core_id] |= 1u64 << cpu;
185                }
186
187                if is_big == 1 {
188                    info.big_cpu_mask |= 1u64 << cpu;
189                    if thread_idx == 0 {
190                        info.big_core_phys_mask |= 1u64 << cpu;
191                    } else {
192                        info.big_core_smt_mask |= 1u64 << cpu;
193                    }
194                } else {
195                    info.little_core_mask |= 1u64 << cpu;
196                }
197
198                thread_idx += 1;
199            }
200        }
201    }
202
203    // Evaluate V-Cache / LLC Asymmetry
204    let mut max_llc_cache_size = 0;
205    let mut max_llc_idx = 0;
206
207    // Find the LLC cluster with the absolute largest cache_size
208    for llc in topo.all_llcs.values() {
209        let mut cluster_cache_size = 0;
210        if let Some(cpu) = llc.all_cpus.values().next() {
211            cluster_cache_size += cpu.cache_size;
212            // All CPUs in the LLC report the same size
213        }
214
215        if cluster_cache_size > max_llc_cache_size {
216            max_llc_cache_size = cluster_cache_size;
217            max_llc_idx = llc.id;
218        }
219    }
220
221    // Verify if there is actual Cache Asymmetry (V-Cache detection)
222    // We only flag V-Cache if there is MORE THAN 1 LLC and they have UNEQUAL cache sizes.
223    if info.has_dual_ccd {
224        for llc in topo.all_llcs.values() {
225            let mut expected_size = 0;
226            if let Some(cpu) = llc.all_cpus.values().next() {
227                expected_size += cpu.cache_size;
228            }
229            // Significant cache disparity = Asymmetric CCD (V-CACHE)
230            if expected_size > 0 && max_llc_cache_size > (expected_size * 2) {
231                info.has_vcache = true;
232                if max_llc_idx < 8 {
233                    info.vcache_llc_mask = info.llc_cpu_mask[max_llc_idx];
234                }
235            }
236        }
237    }
238
239    // Update hybrid flag
240    if p_cores_found > 0 && e_cores_found > 0 {
241        info.has_hybrid_cores = true;
242    } else {
243        info.has_hybrid_cores = false;
244        // On Homogenous CPUs (Like 9800X3D), everything is a "Big Phys" core.
245        // If there's no V-Cache Asymmetry, everything collapses perfectly to prevent extra BPF scans.
246    }
247
248    // Log detected topology (debug level - use RUST_LOG=debug to see)
249    log::debug!("Topology detected:");
250    log::debug!("  CPUs:          {}", info.nr_cpus);
251    log::debug!("  Phys cores:    {}", info.nr_phys_cpus);
252    log::debug!("  SMT Enabled:   {}", info.smt_enabled);
253    log::debug!("  Dual CCD:      {}", info.has_dual_ccd);
254    if info.has_dual_ccd {
255        log::debug!("    Masks:       {:x?}", &info.llc_cpu_mask[..llc_idx]);
256    }
257    log::debug!("  Hybrid cores:  {}", info.has_hybrid_cores);
258    if info.has_hybrid_cores {
259        log::debug!("    P-core Phys mask: {:016x}", info.big_core_phys_mask);
260        log::debug!("    E-core mask:      {:016x}", info.little_core_mask);
261    }
262    log::debug!("  V-Cache CCD:   {}", info.has_vcache);
263    if info.has_vcache {
264        log::debug!("    V-Cache mask: {:016x}", info.vcache_llc_mask);
265    }
266
267    Ok(info)
268}