scx_utils/
topology.rs

1// Copyright (c) Meta Platforms, Inc. and affiliates.
2
3// This software may be used and distributed according to the terms of the
4// GNU General Public License version 2.
5
6//! # SCX Topology
7//!
8//! A crate that allows schedulers to inspect and model the host's topology, in
9//! service of creating scheduling domains.
10//!
11//! A Topology is comprised of one or more Node objects, which themselves are
12//! comprised hierarchically of LLC -> Core -> Cpu objects respectively:
13//!```rust,ignore
14//!                                   Topology
15//!                                       |
16//! o--------------------------------o   ...   o----------------o---------------o
17//! |         Node                   |         |         Node                   |
18//! | ID      0                      |         | ID      1                      |
19//! | LLCs    <id, Llc>              |         | LLCs    <id, Llc>              |
20//! | Span    0x00000fffff00000fffff |         | Span    0xfffff00000fffff00000 |
21//! o--------------------------------o         o--------------------------------o
22//!                 \
23//!                  --------------------
24//!                                      \
25//! o--------------------------------o   ...   o--------------------------------o
26//! |             Llc                |         |             Llc                |
27//! | ID     0                       |         | ID     1                       |
28//! | Cores  <id, Core>              |         | Cores  <id, Core>              |
29//! | Span   0x00000ffc0000000ffc00  |         | Span   0x00000003ff00000003ff  |
30//! o--------------------------------o         o----------------o---------------o
31//!                                                             /
32//!                                        ---------------------
33//!                                       /
34//! o--------------------------------o   ...   o--------------------------------o
35//! |              Core              |         |              Core              |
36//! | ID     0                       |         | ID     9                       |
37//! | Cpus   <id, Cpu>               |         | Cpus   <id, Cpu>               |
38//! | Span   0x00000000010000000001  |         | Span   0x00000002000000000200  |
39//! o--------------------------------o         o----------------o---------------o
40//!                                                             /
41//!                                        ---------------------
42//!                                       /
43//! o--------------------------------o   ...   o---------------------------------o
44//! |              Cpu               |         |               Cpu               |
45//! | ID       9                     |         | ID       49                     |
46//! | online   1                     |         | online   1                      |
47//! | min_freq 400000                |         | min_freq 400000                 |
48//! | max_freq 5881000               |         | min_freq 5881000                |
49//! o--------------------------------o         o---------------------------------o
50//!```
51//! Every object contains a Cpumask that spans all CPUs in that point in the
52//! topological hierarchy.
53//!
54//! Creating Topology
55//! -----------------
56//!
57//! Topology objects are created using the static new function:
58//!
59//!```  
60//!     use scx_utils::Topology;
61//!     let top = Topology::new().unwrap();
62//!```
63//!
64//! Querying Topology
65//! -----------------
66//!
67//! With a created Topology, you can query the topological hierarchy using the
68//! set of accessor functions defined below. All objects in the topological
69//! hierarchy are entirely read-only. If the host topology were to change (due
70//! to e.g. hotplug), a new Topology object should be created.
71
72use crate::compat::ROOT_PREFIX;
73use crate::cpumask::read_cpulist;
74use crate::misc::read_file_byte;
75use crate::misc::read_file_usize_vec;
76use crate::misc::read_from_file;
77use crate::Cpumask;
78use anyhow::bail;
79use anyhow::Result;
80use glob::glob;
81use log::warn;
82use sscanf::sscanf;
83use std::collections::BTreeMap;
84use std::path::Path;
85use std::path::PathBuf;
86use std::sync::Arc;
87
88#[cfg(feature = "gpu-topology")]
89use crate::gpu::{create_gpus, Gpu, GpuIndex};
90
91lazy_static::lazy_static! {
92    /// The maximum possible number of CPU IDs in the system. As mentioned
93    /// above, this is different than the number of possible CPUs on the
94    /// system (though very seldom is). This number may differ from the
95    /// number of possible CPUs on the system when e.g. there are fully
96    /// disabled CPUs in the middle of the range of possible CPUs (i.e. CPUs
97    /// that may not be onlined).
98    pub static ref NR_CPU_IDS: usize = read_cpu_ids().unwrap().last().unwrap() + 1;
99
100    /// The number of possible CPUs that may be active on the system. Note
101    /// that this value is separate from the number of possible _CPU IDs_ in
102    /// the system, as there may be gaps in what CPUs are allowed to be
103    /// onlined. For example, some BIOS implementations may report spans of
104    /// disabled CPUs that may not be onlined, whose IDs are lower than the
105    /// IDs of other CPUs that may be onlined.
106    pub static ref NR_CPUS_POSSIBLE: usize = libbpf_rs::num_possible_cpus().unwrap();
107}
108
109#[derive(Debug, Clone, Eq, Hash, Ord, PartialEq, PartialOrd)]
110pub enum CoreType {
111    Big { turbo: bool },
112    Little,
113}
114
115#[derive(Debug, Clone, Eq, Hash, Ord, PartialEq, PartialOrd)]
116pub struct Cpu {
117    pub id: usize,
118    pub min_freq: usize,
119    pub max_freq: usize,
120    /// Base operational frqeuency. Only available on Intel Turbo Boost
121    /// CPUs. If not available, this will simply return maximum frequency.
122    pub base_freq: usize,
123    /// The best-effort guessing of cpu_capacity scaled to 1024.
124    pub cpu_capacity: usize,
125    pub smt_level: usize,
126    /// CPU idle resume latency
127    pub pm_qos_resume_latency_us: usize,
128    pub trans_lat_ns: usize,
129    pub l2_id: usize,
130    pub l3_id: usize,
131    /// Per-CPU cache size of all levels.
132    pub cache_size: usize,
133    pub core_type: CoreType,
134
135    /// Ancestor IDs.
136    pub core_id: usize,
137    pub llc_id: usize,
138    pub node_id: usize,
139    pub package_id: usize,
140    pub cluster_id: isize,
141}
142
143#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)]
144pub struct Core {
145    /// Monotonically increasing unique id
146    pub id: usize,
147    /// The sysfs value of core_id
148    pub kernel_id: usize,
149    pub cluster_id: isize,
150    pub cpus: BTreeMap<usize, Arc<Cpu>>,
151    /// Cpumask of all CPUs in this core.
152    pub span: Cpumask,
153    pub core_type: CoreType,
154
155    /// Ancestor IDs.
156    pub llc_id: usize,
157    pub node_id: usize,
158}
159
160#[derive(Debug, Clone)]
161pub struct Llc {
162    /// Monotonically increasing unique id
163    pub id: usize,
164    /// The kernel id of the llc
165    pub kernel_id: usize,
166    pub cores: BTreeMap<usize, Arc<Core>>,
167    /// Cpumask of all CPUs in this llc.
168    pub span: Cpumask,
169
170    /// Ancestor IDs.
171    pub node_id: usize,
172
173    /// Skip indices to access lower level members easily.
174    pub all_cpus: BTreeMap<usize, Arc<Cpu>>,
175}
176
177#[derive(Debug, Clone)]
178pub struct Node {
179    pub id: usize,
180    pub distance: Vec<usize>,
181    pub llcs: BTreeMap<usize, Arc<Llc>>,
182    /// Cpumask of all CPUs in this node.
183    pub span: Cpumask,
184
185    /// Skip indices to access lower level members easily.
186    pub all_cores: BTreeMap<usize, Arc<Core>>,
187    pub all_cpus: BTreeMap<usize, Arc<Cpu>>,
188
189    #[cfg(feature = "gpu-topology")]
190    pub gpus: BTreeMap<GpuIndex, Gpu>,
191}
192
193#[derive(Debug)]
194pub struct Topology {
195    pub nodes: BTreeMap<usize, Node>,
196    /// Cpumask all CPUs in the system.
197    pub span: Cpumask,
198    /// True if SMT is enabled in the system, false otherwise.
199    pub smt_enabled: bool,
200
201    /// Skip indices to access lower level members easily.
202    pub all_llcs: BTreeMap<usize, Arc<Llc>>,
203    pub all_cores: BTreeMap<usize, Arc<Core>>,
204    pub all_cpus: BTreeMap<usize, Arc<Cpu>>,
205}
206
207impl Topology {
208    fn instantiate(span: Cpumask, mut nodes: BTreeMap<usize, Node>) -> Result<Self> {
209        // Build skip indices prefixed with all_ for easy lookups. As Arc
210        // objects can only be modified while there's only one reference,
211        // skip indices must be built from bottom to top.
212        let mut topo_llcs = BTreeMap::new();
213        let mut topo_cores = BTreeMap::new();
214        let mut topo_cpus = BTreeMap::new();
215
216        for (_node_id, node) in nodes.iter_mut() {
217            let mut node_cores = BTreeMap::new();
218            let mut node_cpus = BTreeMap::new();
219
220            for (&llc_id, llc) in node.llcs.iter_mut() {
221                let llc_mut = Arc::get_mut(llc).unwrap();
222                let mut llc_cpus = BTreeMap::new();
223
224                for (&core_id, core) in llc_mut.cores.iter_mut() {
225                    let core_mut = Arc::get_mut(core).unwrap();
226                    let smt_level = core_mut.cpus.len();
227
228                    for (&cpu_id, cpu) in core_mut.cpus.iter_mut() {
229                        let cpu_mut = Arc::get_mut(cpu).unwrap();
230                        cpu_mut.smt_level = smt_level;
231
232                        if topo_cpus
233                            .insert(cpu_id, cpu.clone())
234                            .or(node_cpus.insert(cpu_id, cpu.clone()))
235                            .or(llc_cpus.insert(cpu_id, cpu.clone()))
236                            .is_some()
237                        {
238                            bail!("Duplicate CPU ID {}", cpu_id);
239                        }
240                    }
241
242                    // Note that in some weird architectures, core ids can be
243                    // duplicated in different LLC domains.
244                    topo_cores
245                        .insert(core_id, core.clone())
246                        .or(node_cores.insert(core_id, core.clone()));
247                }
248
249                llc_mut.all_cpus = llc_cpus;
250
251                if topo_llcs.insert(llc_id, llc.clone()).is_some() {
252                    bail!("Duplicate LLC ID {}", llc_id);
253                }
254            }
255
256            node.all_cores = node_cores;
257            node.all_cpus = node_cpus;
258        }
259
260        Ok(Topology {
261            nodes,
262            span,
263            smt_enabled: is_smt_active().unwrap_or(false),
264            all_llcs: topo_llcs,
265            all_cores: topo_cores,
266            all_cpus: topo_cpus,
267        })
268    }
269
270    /// Build a complete host Topology
271    pub fn new() -> Result<Topology> {
272        let span = cpus_online()?;
273        let mut topo_ctx = TopoCtx::new();
274        // If the kernel is compiled with CONFIG_NUMA, then build a topology
275        // from the NUMA hierarchy in sysfs. Otherwise, just make a single
276        // default node of ID 0 which contains all cores.
277        let path = format!("{}/sys/devices/system/node", *ROOT_PREFIX);
278        let nodes = if Path::new(&path).exists() {
279            create_numa_nodes(&span, &mut topo_ctx)?
280        } else {
281            create_default_node(&span, &mut topo_ctx, false)?
282        };
283
284        Self::instantiate(span, nodes)
285    }
286
287    pub fn with_flattened_llc_node() -> Result<Topology> {
288        let span = cpus_online()?;
289        let mut topo_ctx = TopoCtx::new();
290        let nodes = create_default_node(&span, &mut topo_ctx, true)?;
291        Self::instantiate(span, nodes)
292    }
293
294    /// Get a vec of all GPUs on the hosts.
295    #[cfg(feature = "gpu-topology")]
296    pub fn gpus(&self) -> BTreeMap<GpuIndex, &Gpu> {
297        let mut gpus = BTreeMap::new();
298        for node in self.nodes.values() {
299            for (idx, gpu) in &node.gpus {
300                gpus.insert(*idx, gpu);
301            }
302        }
303        gpus
304    }
305
306    /// Returns whether the Topology has a hybrid architecture of big and little cores.
307    pub fn has_little_cores(&self) -> bool {
308        self.all_cores
309            .values()
310            .any(|c| c.core_type == CoreType::Little)
311    }
312
313    /// Returns a vector that maps the index of each logical CPU to the
314    /// sibling CPU. This represents the "next sibling" CPU within a package
315    /// in systems that support SMT. The sibling CPU is the other logical
316    /// CPU that shares the physical resources of the same physical core.
317    ///
318    /// Assuming each core holds exactly at most two cpus.
319    pub fn sibling_cpus(&self) -> Vec<i32> {
320        let mut sibling_cpu = vec![-1i32; *NR_CPUS_POSSIBLE];
321        for core in self.all_cores.values() {
322            let mut first = -1i32;
323            for &cpu in core.cpus.keys() {
324                if first < 0 {
325                    first = cpu as i32;
326                } else {
327                    sibling_cpu[first as usize] = cpu as i32;
328                    sibling_cpu[cpu] = first;
329                    break;
330                }
331            }
332        }
333        sibling_cpu
334    }
335}
336
337/******************************************************
338 * Helper structs/functions for creating the Topology *
339 ******************************************************/
340/// TopoCtx is a helper struct used to build a topology.
341struct TopoCtx {
342    /// Mapping of NUMA node core ids
343    node_core_kernel_ids: BTreeMap<(usize, usize, usize), usize>,
344    /// Mapping of NUMA node LLC ids
345    node_llc_kernel_ids: BTreeMap<(usize, usize, usize), usize>,
346    /// Mapping of L2 ids
347    l2_ids: BTreeMap<String, usize>,
348    /// Mapping of L3 ids
349    l3_ids: BTreeMap<String, usize>,
350}
351
352impl TopoCtx {
353    fn new() -> TopoCtx {
354        let core_kernel_ids = BTreeMap::new();
355        let llc_kernel_ids = BTreeMap::new();
356        let l2_ids = BTreeMap::new();
357        let l3_ids = BTreeMap::new();
358        TopoCtx {
359            node_core_kernel_ids: core_kernel_ids,
360            node_llc_kernel_ids: llc_kernel_ids,
361            l2_ids,
362            l3_ids,
363        }
364    }
365}
366
367fn cpus_online() -> Result<Cpumask> {
368    let path = format!("{}/sys/devices/system/cpu/online", *ROOT_PREFIX);
369    let online = std::fs::read_to_string(path)?;
370    Cpumask::from_cpulist(&online)
371}
372
373fn get_cache_id(topo_ctx: &mut TopoCtx, cache_level_path: &PathBuf, cache_level: usize) -> usize {
374    // Check if the cache id is already cached
375    let id_map = match cache_level {
376        2 => &mut topo_ctx.l2_ids,
377        3 => &mut topo_ctx.l3_ids,
378        _ => return usize::MAX,
379    };
380
381    let path = &cache_level_path.join("shared_cpu_list");
382    let key = match std::fs::read_to_string(path) {
383        Ok(key) => key,
384        Err(_) => return usize::MAX,
385    };
386
387    let id = *id_map.get(&key).unwrap_or(&usize::MAX);
388    if id != usize::MAX {
389        return id;
390    }
391
392    // In case of a cache miss, try to get the id from the sysfs first.
393    let id = read_from_file(&cache_level_path.join("id")).unwrap_or(usize::MAX);
394    if id != usize::MAX {
395        // Keep the id in the map
396        id_map.insert(key, id);
397        return id;
398    }
399
400    // If the id file does not exist, assign an id and keep it in the map.
401    let id = id_map.len();
402    id_map.insert(key, id);
403
404    id
405}
406
407fn get_per_cpu_cache_size(cache_path: &PathBuf) -> Result<usize> {
408    let path_str = cache_path.to_str().unwrap();
409    let paths = glob(&(path_str.to_owned() + "/index[0-9]*"))?;
410    let mut tot_size = 0;
411
412    for index in paths.filter_map(Result::ok) {
413        // If there is no size information under sysfs (e.g., many ARM SoCs),
414        // give 1024 as a default value. 1024 is small enough compared to the
415        // real cache size of the CPU, but it is large enough to give a penalty
416        // when multiple CPUs share the cache.
417        let size = read_file_byte(&index.join("size")).unwrap_or(1024_usize);
418        let cpulist: String = read_from_file(&index.join("shared_cpu_list"))?;
419        let num_cpus = read_cpulist(&cpulist)?.len();
420        tot_size += size / num_cpus;
421    }
422
423    Ok(tot_size)
424}
425
426#[allow(clippy::too_many_arguments)]
427fn create_insert_cpu(
428    id: usize,
429    node: &mut Node,
430    online_mask: &Cpumask,
431    topo_ctx: &mut TopoCtx,
432    cs: &CapacitySource,
433    flatten_llc: bool,
434) -> Result<()> {
435    // CPU is offline. The Topology hierarchy is read-only, and assumes
436    // that hotplug will cause the scheduler to restart. Thus, we can
437    // just skip this CPU altogether.
438    if !online_mask.test_cpu(id) {
439        return Ok(());
440    }
441
442    let cpu_str = format!("{}/sys/devices/system/cpu/cpu{}", *ROOT_PREFIX, id);
443    let cpu_path = Path::new(&cpu_str);
444
445    // Physical core ID
446    let top_path = cpu_path.join("topology");
447    let core_kernel_id = read_from_file(&top_path.join("core_id"))?;
448    let package_id = read_from_file(&top_path.join("physical_package_id"))?;
449    let cluster_id = read_from_file(&top_path.join("cluster_id"))?;
450
451    // Evaluate L2, L3 and LLC cache IDs.
452    //
453    // Use ID 0 if we fail to detect the cache hierarchy. This seems to happen on certain SKUs, so
454    // if there's no cache information then we have no option but to assume a single unified cache
455    // per node.
456    let cache_path = cpu_path.join("cache");
457    let l2_id = get_cache_id(topo_ctx, &cache_path.join(format!("index{}", 2)), 2);
458    let l3_id = get_cache_id(topo_ctx, &cache_path.join(format!("index{}", 3)), 3);
459    let llc_kernel_id = if flatten_llc {
460        0
461    } else if l3_id == usize::MAX {
462        l2_id
463    } else {
464        l3_id
465    };
466
467    // Per-CPU cache size
468    let cache_size = get_per_cpu_cache_size(&cache_path).unwrap_or(0_usize);
469
470    // Min and max frequencies. If the kernel is not compiled with
471    // CONFIG_CPU_FREQ, just assume 0 for both frequencies.
472    let freq_path = cpu_path.join("cpufreq");
473    let min_freq = read_from_file(&freq_path.join("scaling_min_freq")).unwrap_or(0_usize);
474    let max_freq = read_from_file(&freq_path.join("scaling_max_freq")).unwrap_or(0_usize);
475    let base_freq = read_from_file(&freq_path.join("base_frequency")).unwrap_or(max_freq);
476    let trans_lat_ns =
477        read_from_file(&freq_path.join("cpuinfo_transition_latency")).unwrap_or(0_usize);
478
479    // Cpu capacity
480    let cap_path = cpu_path.join(cs.suffix.clone());
481    let rcap = read_from_file(&cap_path).unwrap_or(cs.max_rcap);
482    let cpu_capacity = (rcap * 1024) / cs.max_rcap;
483
484    // Power management
485    let power_path = cpu_path.join("power");
486    let pm_qos_resume_latency_us =
487        read_from_file(&power_path.join("pm_qos_resume_latency_us")).unwrap_or(0_usize);
488
489    let num_llcs = topo_ctx.node_llc_kernel_ids.len();
490    let llc_id = topo_ctx
491        .node_llc_kernel_ids
492        .entry((node.id, package_id, llc_kernel_id))
493        .or_insert(num_llcs);
494
495    let llc = node.llcs.entry(*llc_id).or_insert(Arc::new(Llc {
496        id: *llc_id,
497        cores: BTreeMap::new(),
498        span: Cpumask::new(),
499        all_cpus: BTreeMap::new(),
500
501        node_id: node.id,
502        kernel_id: llc_kernel_id,
503    }));
504    let llc_mut = Arc::get_mut(llc).unwrap();
505
506    let core_type = if cs.avg_rcap < cs.max_rcap && rcap == cs.max_rcap {
507        CoreType::Big { turbo: true }
508    } else if !cs.has_biglittle || rcap >= cs.avg_rcap {
509        CoreType::Big { turbo: false }
510    } else {
511        CoreType::Little
512    };
513
514    let num_cores = topo_ctx.node_core_kernel_ids.len();
515    let core_id = topo_ctx
516        .node_core_kernel_ids
517        .entry((node.id, package_id, core_kernel_id))
518        .or_insert(num_cores);
519
520    let core = llc_mut.cores.entry(*core_id).or_insert(Arc::new(Core {
521        id: *core_id,
522        cpus: BTreeMap::new(),
523        span: Cpumask::new(),
524        core_type: core_type.clone(),
525
526        llc_id: *llc_id,
527        node_id: node.id,
528        kernel_id: core_kernel_id,
529        cluster_id: cluster_id,
530    }));
531    let core_mut = Arc::get_mut(core).unwrap();
532
533    core_mut.cpus.insert(
534        id,
535        Arc::new(Cpu {
536            id,
537            min_freq,
538            max_freq,
539            base_freq,
540            cpu_capacity,
541            smt_level: 0, // Will be initialized at instantiate().
542            pm_qos_resume_latency_us,
543            trans_lat_ns,
544            l2_id,
545            l3_id,
546            cache_size,
547            core_type: core_type.clone(),
548
549            core_id: *core_id,
550            llc_id: *llc_id,
551            node_id: node.id,
552            package_id,
553            cluster_id,
554        }),
555    );
556
557    if node.span.test_cpu(id) {
558        bail!("Node {} already had CPU {}", node.id, id);
559    }
560
561    // Update all of the devices' spans to include this CPU.
562    core_mut.span.set_cpu(id)?;
563    llc_mut.span.set_cpu(id)?;
564    node.span.set_cpu(id)?;
565
566    Ok(())
567}
568
569fn read_cpu_ids() -> Result<Vec<usize>> {
570    let mut cpu_ids = vec![];
571    let path = format!("{}/sys/devices/system/cpu/cpu[0-9]*", *ROOT_PREFIX);
572    let cpu_paths = glob(&path)?;
573    for cpu_path in cpu_paths.filter_map(Result::ok) {
574        let cpu_str = cpu_path.to_str().unwrap().trim();
575        if *ROOT_PREFIX == "" {
576            match sscanf!(cpu_str, "/sys/devices/system/cpu/cpu{usize}") {
577                Ok(val) => cpu_ids.push(val),
578                Err(_) => {
579                    bail!("Failed to parse cpu ID {}", cpu_str);
580                }
581            }
582        } else {
583            match sscanf!(cpu_str, "{str}/sys/devices/system/cpu/cpu{usize}") {
584                Ok((_, val)) => cpu_ids.push(val),
585                Err(_) => {
586                    bail!("Failed to parse cpu ID {}", cpu_str);
587                }
588            }
589        }
590    }
591    cpu_ids.sort();
592    Ok(cpu_ids)
593}
594
595struct CapacitySource {
596    /// Path suffix after /sys/devices/system/cpu/cpuX
597    suffix: String,
598    /// Average raw capacity value
599    avg_rcap: usize,
600    /// Maximum raw capacity value
601    max_rcap: usize,
602    /// Does a system have little cores?
603    has_biglittle: bool,
604}
605
606fn get_capacity_source() -> Option<CapacitySource> {
607    // Sources for guessing cpu_capacity under /sys/devices/system/cpu/cpuX.
608    // They should be ordered from the most precise to the least precise.
609    let sources = [
610        "cpufreq/amd_pstate_prefcore_ranking",
611        "cpufreq/amd_pstate_highest_perf",
612        "acpi_cppc/highest_perf",
613        "cpu_capacity",
614        "cpufreq/cpuinfo_max_freq",
615    ];
616
617    // Find the most precise source for cpu_capacity estimation.
618    let prefix = format!("{}/sys/devices/system/cpu/cpu0", *ROOT_PREFIX);
619    let mut raw_capacity;
620    let mut suffix = sources[sources.len() - 1];
621    'outer: for src in sources {
622        let path_str = [prefix.clone(), src.to_string()].join("/");
623        let path = Path::new(&path_str);
624        raw_capacity = read_from_file(&path).unwrap_or(0_usize);
625        if raw_capacity > 0 {
626            // It would be an okay source...
627            suffix = src;
628            // But double-check if the source has meaningful information.
629            let path = format!("{}/sys/devices/system/cpu/cpu[0-9]*", *ROOT_PREFIX);
630            let cpu_paths = glob(&path).ok()?;
631            for cpu_path in cpu_paths.filter_map(Result::ok) {
632                let raw_capacity2 = read_from_file(&cpu_path.join(suffix)).unwrap_or(0_usize);
633                if raw_capacity != raw_capacity2 {
634                    break 'outer;
635                }
636            }
637            // The source exists, but it tells that all CPUs have the same
638            // capacity. Let's search more if there is any source that can
639            // tell the capacity differences among CPUs. This can happen when
640            // a buggy driver lies (e.g., "acpi_cppc/highest_perf").
641        }
642    }
643
644    // Find the max raw_capacity value for scaling to 1024.
645    let mut max_rcap = 0;
646    let mut min_rcap = usize::MAX;
647    let mut avg_rcap = 0;
648    let mut nr_cpus = 0;
649    let mut has_biglittle = false;
650    let path = format!("{}/sys/devices/system/cpu/cpu[0-9]*", *ROOT_PREFIX);
651    let cpu_paths = glob(&path).ok()?;
652    for cpu_path in cpu_paths.filter_map(Result::ok) {
653        let rcap = read_from_file(&cpu_path.join(suffix)).unwrap_or(0_usize);
654        if max_rcap < rcap {
655            max_rcap = rcap;
656        }
657        if min_rcap > rcap {
658            min_rcap = rcap;
659        }
660        avg_rcap += rcap;
661        nr_cpus += 1;
662    }
663
664    if nr_cpus == 0 || max_rcap == 0 {
665        suffix = "";
666        avg_rcap = 1024;
667        max_rcap = 1024;
668        warn!("CPU capacity information is not available under sysfs.");
669    } else {
670        avg_rcap /= nr_cpus;
671        // We consider a system to have a heterogeneous CPU architecture only
672        // when there is a significant capacity gap (e.g., 1.3x). CPU capacities
673        // can still vary in a homogeneous architecture—for instance, due to
674        // chip binning or when only a subset of CPUs supports turbo boost.
675        //
676        // Note that we need a more systematic approach to accurately detect
677        // big/LITTLE architectures across various SoC designs. The current
678        // approach, with a significant capacity difference, is somewhat ad-hoc.
679        has_biglittle = max_rcap as f32 >= (1.3 * min_rcap as f32);
680    }
681
682    Some(CapacitySource {
683        suffix: suffix.to_string(),
684        avg_rcap,
685        max_rcap,
686        has_biglittle,
687    })
688}
689
690fn is_smt_active() -> Option<bool> {
691    let path = format!("{}/sys/devices/system/cpu/smt/active", *ROOT_PREFIX);
692    let smt_on: u8 = read_from_file(Path::new(&path)).ok()?;
693    Some(smt_on == 1)
694}
695
696fn create_default_node(
697    online_mask: &Cpumask,
698    topo_ctx: &mut TopoCtx,
699    flatten_llc: bool,
700) -> Result<BTreeMap<usize, Node>> {
701    let mut nodes = BTreeMap::<usize, Node>::new();
702
703    let mut node = Node {
704        id: 0,
705        distance: vec![],
706        llcs: BTreeMap::new(),
707        span: Cpumask::new(),
708        #[cfg(feature = "gpu-topology")]
709        gpus: BTreeMap::new(),
710        all_cores: BTreeMap::new(),
711        all_cpus: BTreeMap::new(),
712    };
713
714    #[cfg(feature = "gpu-topology")]
715    {
716        let system_gpus = create_gpus();
717        if let Some(gpus) = system_gpus.get(&0) {
718            for gpu in gpus {
719                node.gpus.insert(gpu.index, gpu.clone());
720            }
721        }
722    }
723
724    let path = format!("{}/sys/devices/system/cpu", *ROOT_PREFIX);
725    if !Path::new(&path).exists() {
726        bail!("/sys/devices/system/cpu sysfs node not found");
727    }
728
729    let cs = get_capacity_source().unwrap();
730    let cpu_ids = read_cpu_ids()?;
731    for cpu_id in cpu_ids.iter() {
732        create_insert_cpu(*cpu_id, &mut node, online_mask, topo_ctx, &cs, flatten_llc)?;
733    }
734
735    nodes.insert(node.id, node);
736
737    Ok(nodes)
738}
739
740fn create_numa_nodes(
741    online_mask: &Cpumask,
742    topo_ctx: &mut TopoCtx,
743) -> Result<BTreeMap<usize, Node>> {
744    let mut nodes = BTreeMap::<usize, Node>::new();
745
746    #[cfg(feature = "gpu-topology")]
747    let system_gpus = create_gpus();
748
749    let path = format!("{}/sys/devices/system/node/node*", *ROOT_PREFIX);
750    let numa_paths = glob(&path)?;
751    for numa_path in numa_paths.filter_map(Result::ok) {
752        let numa_str = numa_path.to_str().unwrap().trim();
753        let node_id = if *ROOT_PREFIX == "" {
754            match sscanf!(numa_str, "/sys/devices/system/node/node{usize}") {
755                Ok(val) => val,
756                Err(_) => {
757                    bail!("Failed to parse NUMA node ID {}", numa_str);
758                }
759            }
760        } else {
761            match sscanf!(numa_str, "{str}/sys/devices/system/node/node{usize}") {
762                Ok((_, val)) => val,
763                Err(_) => {
764                    bail!("Failed to parse NUMA node ID {}", numa_str);
765                }
766            }
767        };
768
769        let distance = read_file_usize_vec(
770            Path::new(&format!(
771                "{}/sys/devices/system/node/node{}/distance",
772                *ROOT_PREFIX, node_id
773            )),
774            ' ',
775        )?;
776        let mut node = Node {
777            id: node_id,
778            distance,
779            llcs: BTreeMap::new(),
780            span: Cpumask::new(),
781
782            all_cores: BTreeMap::new(),
783            all_cpus: BTreeMap::new(),
784
785            #[cfg(feature = "gpu-topology")]
786            gpus: BTreeMap::new(),
787        };
788
789        #[cfg(feature = "gpu-topology")]
790        {
791            if let Some(gpus) = system_gpus.get(&node_id) {
792                for gpu in gpus {
793                    node.gpus.insert(gpu.index, gpu.clone());
794                }
795            }
796        }
797
798        let cpu_pattern = numa_path.join("cpu[0-9]*");
799        let cpu_paths = glob(cpu_pattern.to_string_lossy().as_ref())?;
800        let cs = get_capacity_source().unwrap();
801        let mut cpu_ids = vec![];
802        for cpu_path in cpu_paths.filter_map(Result::ok) {
803            let cpu_str = cpu_path.to_str().unwrap().trim();
804            let cpu_id = if *ROOT_PREFIX == "" {
805                match sscanf!(cpu_str, "/sys/devices/system/node/node{usize}/cpu{usize}") {
806                    Ok((_, val)) => val,
807                    Err(_) => {
808                        bail!("Failed to parse cpu ID {}", cpu_str);
809                    }
810                }
811            } else {
812                match sscanf!(
813                    cpu_str,
814                    "{str}/sys/devices/system/node/node{usize}/cpu{usize}"
815                ) {
816                    Ok((_, _, val)) => val,
817                    Err(_) => {
818                        bail!("Failed to parse cpu ID {}", cpu_str);
819                    }
820                }
821            };
822            cpu_ids.push(cpu_id);
823        }
824        cpu_ids.sort();
825
826        for cpu_id in cpu_ids {
827            create_insert_cpu(cpu_id, &mut node, online_mask, topo_ctx, &cs, false)?;
828        }
829
830        nodes.insert(node.id, node);
831    }
832    Ok(nodes)
833}