scx_utils/
topology.rs

1// Copyright (c) Meta Platforms, Inc. and affiliates.
2
3// This software may be used and distributed according to the terms of the
4// GNU General Public License version 2.
5
6//! # SCX Topology
7//!
8//! A crate that allows schedulers to inspect and model the host's topology, in
9//! service of creating scheduling domains.
10//!
11//! A Topology is comprised of one or more Node objects, which themselves are
12//! comprised hierarchically of LLC -> Core -> Cpu objects respectively:
13//!```rust,ignore
14//!                                   Topology
15//!                                       |
16//! o--------------------------------o   ...   o----------------o---------------o
17//! |         Node                   |         |         Node                   |
18//! | ID      0                      |         | ID      1                      |
19//! | LLCs    <id, Llc>              |         | LLCs    <id, Llc>              |
20//! | Span    0x00000fffff00000fffff |         | Span    0xfffff00000fffff00000 |
21//! o--------------------------------o         o--------------------------------o
22//!                 \
23//!                  --------------------
24//!                                      \
25//! o--------------------------------o   ...   o--------------------------------o
26//! |             Llc                |         |             Llc                |
27//! | ID     0                       |         | ID     1                       |
28//! | Cores  <id, Core>              |         | Cores  <id, Core>              |
29//! | Span   0x00000ffc0000000ffc00  |         | Span   0x00000003ff00000003ff  |
30//! o--------------------------------o         o----------------o---------------o
31//!                                                             /
32//!                                        ---------------------
33//!                                       /
34//! o--------------------------------o   ...   o--------------------------------o
35//! |              Core              |         |              Core              |
36//! | ID     0                       |         | ID     9                       |
37//! | Cpus   <id, Cpu>               |         | Cpus   <id, Cpu>               |
38//! | Span   0x00000000010000000001  |         | Span   0x00000002000000000200  |
39//! o--------------------------------o         o----------------o---------------o
40//!                                                             /
41//!                                        ---------------------
42//!                                       /
43//! o--------------------------------o   ...   o---------------------------------o
44//! |              Cpu               |         |               Cpu               |
45//! | ID       9                     |         | ID       49                     |
46//! | online   1                     |         | online   1                      |
47//! | min_freq 400000                |         | min_freq 400000                 |
48//! | max_freq 5881000               |         | min_freq 5881000                |
49//! o--------------------------------o         o---------------------------------o
50//!```
51//! Every object contains a Cpumask that spans all CPUs in that point in the
52//! topological hierarchy.
53//!
54//! Creating Topology
55//! -----------------
56//!
57//! Topology objects are created using the static new function:
58//!
59//!```  
60//!     use scx_utils::Topology;
61//!     let top = Topology::new().unwrap();
62//!```
63//!
64//! Querying Topology
65//! -----------------
66//!
67//! With a created Topology, you can query the topological hierarchy using the
68//! set of accessor functions defined below. All objects in the topological
69//! hierarchy are entirely read-only. If the host topology were to change (due
70//! to e.g. hotplug), a new Topology object should be created.
71
72use crate::cpumask::read_cpulist;
73use crate::misc::read_file_byte;
74use crate::misc::read_file_usize_vec;
75use crate::misc::read_from_file;
76use crate::Cpumask;
77use anyhow::bail;
78use anyhow::Result;
79use glob::glob;
80use log::warn;
81use sscanf::sscanf;
82use std::collections::BTreeMap;
83use std::path::Path;
84use std::path::PathBuf;
85use std::sync::Arc;
86
87#[cfg(feature = "gpu-topology")]
88use crate::gpu::{create_gpus, Gpu, GpuIndex};
89
90lazy_static::lazy_static! {
91    /// The maximum possible number of CPU IDs in the system. As mentioned
92    /// above, this is different than the number of possible CPUs on the
93    /// system (though very seldom is). This number may differ from the
94    /// number of possible CPUs on the system when e.g. there are fully
95    /// disabled CPUs in the middle of the range of possible CPUs (i.e. CPUs
96    /// that may not be onlined).
97    pub static ref NR_CPU_IDS: usize = read_cpu_ids().unwrap().last().unwrap() + 1;
98
99    /// The number of possible CPUs that may be active on the system. Note
100    /// that this value is separate from the number of possible _CPU IDs_ in
101    /// the system, as there may be gaps in what CPUs are allowed to be
102    /// onlined. For example, some BIOS implementations may report spans of
103    /// disabled CPUs that may not be onlined, whose IDs are lower than the
104    /// IDs of other CPUs that may be onlined.
105    pub static ref NR_CPUS_POSSIBLE: usize = libbpf_rs::num_possible_cpus().unwrap();
106}
107
108#[derive(Debug, Clone, Eq, Hash, Ord, PartialEq, PartialOrd)]
109pub enum CoreType {
110    Big { turbo: bool },
111    Little,
112}
113
114#[derive(Debug, Clone, Eq, Hash, Ord, PartialEq, PartialOrd)]
115pub struct Cpu {
116    pub id: usize,
117    pub min_freq: usize,
118    pub max_freq: usize,
119    /// Base operational frqeuency. Only available on Intel Turbo Boost
120    /// CPUs. If not available, this will simply return maximum frequency.
121    pub base_freq: usize,
122    /// The best-effort guessing of cpu_capacity scaled to 1024.
123    pub cpu_capacity: usize,
124    pub smt_level: usize,
125    /// CPU idle resume latency
126    pub pm_qos_resume_latency_us: usize,
127    pub trans_lat_ns: usize,
128    pub l2_id: usize,
129    pub l3_id: usize,
130    /// Per-CPU cache size of all levels.
131    pub cache_size: usize,
132    pub core_type: CoreType,
133
134    /// Ancestor IDs.
135    pub core_id: usize,
136    pub llc_id: usize,
137    pub node_id: usize,
138    pub package_id: usize,
139    pub cluster_id: isize,
140}
141
142#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)]
143pub struct Core {
144    /// Monotonically increasing unique id
145    pub id: usize,
146    /// The sysfs value of core_id
147    pub kernel_id: usize,
148    pub cluster_id: isize,
149    pub cpus: BTreeMap<usize, Arc<Cpu>>,
150    /// Cpumask of all CPUs in this core.
151    pub span: Cpumask,
152    pub core_type: CoreType,
153
154    /// Ancestor IDs.
155    pub llc_id: usize,
156    pub node_id: usize,
157}
158
159#[derive(Debug, Clone)]
160pub struct Llc {
161    /// Monotonically increasing unique id
162    pub id: usize,
163    /// The kernel id of the llc
164    pub kernel_id: usize,
165    pub cores: BTreeMap<usize, Arc<Core>>,
166    /// Cpumask of all CPUs in this llc.
167    pub span: Cpumask,
168
169    /// Ancestor IDs.
170    pub node_id: usize,
171
172    /// Skip indices to access lower level members easily.
173    pub all_cpus: BTreeMap<usize, Arc<Cpu>>,
174}
175
176#[derive(Debug, Clone)]
177pub struct Node {
178    pub id: usize,
179    pub distance: Vec<usize>,
180    pub llcs: BTreeMap<usize, Arc<Llc>>,
181    /// Cpumask of all CPUs in this node.
182    pub span: Cpumask,
183
184    /// Skip indices to access lower level members easily.
185    pub all_cores: BTreeMap<usize, Arc<Core>>,
186    pub all_cpus: BTreeMap<usize, Arc<Cpu>>,
187
188    #[cfg(feature = "gpu-topology")]
189    pub gpus: BTreeMap<GpuIndex, Gpu>,
190}
191
192#[derive(Debug)]
193pub struct Topology {
194    pub nodes: BTreeMap<usize, Node>,
195    /// Cpumask all CPUs in the system.
196    pub span: Cpumask,
197    /// True if SMT is enabled in the system, false otherwise.
198    pub smt_enabled: bool,
199
200    /// Skip indices to access lower level members easily.
201    pub all_llcs: BTreeMap<usize, Arc<Llc>>,
202    pub all_cores: BTreeMap<usize, Arc<Core>>,
203    pub all_cpus: BTreeMap<usize, Arc<Cpu>>,
204}
205
206impl Topology {
207    fn instantiate(span: Cpumask, mut nodes: BTreeMap<usize, Node>) -> Result<Self> {
208        // Build skip indices prefixed with all_ for easy lookups. As Arc
209        // objects can only be modified while there's only one reference,
210        // skip indices must be built from bottom to top.
211        let mut topo_llcs = BTreeMap::new();
212        let mut topo_cores = BTreeMap::new();
213        let mut topo_cpus = BTreeMap::new();
214
215        for (_node_id, node) in nodes.iter_mut() {
216            let mut node_cores = BTreeMap::new();
217            let mut node_cpus = BTreeMap::new();
218
219            for (&llc_id, llc) in node.llcs.iter_mut() {
220                let llc_mut = Arc::get_mut(llc).unwrap();
221                let mut llc_cpus = BTreeMap::new();
222
223                for (&core_id, core) in llc_mut.cores.iter_mut() {
224                    let core_mut = Arc::get_mut(core).unwrap();
225                    let smt_level = core_mut.cpus.len();
226
227                    for (&cpu_id, cpu) in core_mut.cpus.iter_mut() {
228                        let cpu_mut = Arc::get_mut(cpu).unwrap();
229                        cpu_mut.smt_level = smt_level;
230
231                        if topo_cpus
232                            .insert(cpu_id, cpu.clone())
233                            .or(node_cpus.insert(cpu_id, cpu.clone()))
234                            .or(llc_cpus.insert(cpu_id, cpu.clone()))
235                            .is_some()
236                        {
237                            bail!("Duplicate CPU ID {}", cpu_id);
238                        }
239                    }
240
241                    // Note that in some weird architectures, core ids can be
242                    // duplicated in different LLC domains.
243                    topo_cores
244                        .insert(core_id, core.clone())
245                        .or(node_cores.insert(core_id, core.clone()));
246                }
247
248                llc_mut.all_cpus = llc_cpus;
249
250                if topo_llcs.insert(llc_id, llc.clone()).is_some() {
251                    bail!("Duplicate LLC ID {}", llc_id);
252                }
253            }
254
255            node.all_cores = node_cores;
256            node.all_cpus = node_cpus;
257        }
258
259        Ok(Topology {
260            nodes,
261            span,
262            smt_enabled: is_smt_active().unwrap_or(false),
263            all_llcs: topo_llcs,
264            all_cores: topo_cores,
265            all_cpus: topo_cpus,
266        })
267    }
268
269    /// Build a complete host Topology
270    pub fn new() -> Result<Topology> {
271        let span = cpus_online()?;
272        let mut topo_ctx = TopoCtx::new();
273        // If the kernel is compiled with CONFIG_NUMA, then build a topology
274        // from the NUMA hierarchy in sysfs. Otherwise, just make a single
275        // default node of ID 0 which contains all cores.
276        let nodes = if Path::new("/sys/devices/system/node").exists() {
277            create_numa_nodes(&span, &mut topo_ctx)?
278        } else {
279            create_default_node(&span, &mut topo_ctx, false)?
280        };
281
282        Self::instantiate(span, nodes)
283    }
284
285    pub fn with_flattened_llc_node() -> Result<Topology> {
286        let span = cpus_online()?;
287        let mut topo_ctx = TopoCtx::new();
288        let nodes = create_default_node(&span, &mut topo_ctx, true)?;
289        Self::instantiate(span, nodes)
290    }
291
292    /// Get a vec of all GPUs on the hosts.
293    #[cfg(feature = "gpu-topology")]
294    pub fn gpus(&self) -> BTreeMap<GpuIndex, &Gpu> {
295        let mut gpus = BTreeMap::new();
296        for node in self.nodes.values() {
297            for (idx, gpu) in &node.gpus {
298                gpus.insert(*idx, gpu);
299            }
300        }
301        gpus
302    }
303
304    /// Returns whether the Topology has a hybrid architecture of big and little cores.
305    pub fn has_little_cores(&self) -> bool {
306        self.all_cores
307            .values()
308            .any(|c| c.core_type == CoreType::Little)
309    }
310
311    /// Returns a vector that maps the index of each logical CPU to the
312    /// sibling CPU. This represents the "next sibling" CPU within a package
313    /// in systems that support SMT. The sibling CPU is the other logical
314    /// CPU that shares the physical resources of the same physical core.
315    ///
316    /// Assuming each core holds exactly at most two cpus.
317    pub fn sibling_cpus(&self) -> Vec<i32> {
318        let mut sibling_cpu = vec![-1i32; *NR_CPUS_POSSIBLE];
319        for core in self.all_cores.values() {
320            let mut first = -1i32;
321            for &cpu in core.cpus.keys() {
322                if first < 0 {
323                    first = cpu as i32;
324                } else {
325                    sibling_cpu[first as usize] = cpu as i32;
326                    sibling_cpu[cpu] = first;
327                    break;
328                }
329            }
330        }
331        sibling_cpu
332    }
333}
334
335/******************************************************
336 * Helper structs/functions for creating the Topology *
337 ******************************************************/
338/// TopoCtx is a helper struct used to build a topology.
339struct TopoCtx {
340    /// Mapping of NUMA node core ids
341    node_core_kernel_ids: BTreeMap<(usize, usize, usize), usize>,
342    /// Mapping of NUMA node LLC ids
343    node_llc_kernel_ids: BTreeMap<(usize, usize, usize), usize>,
344    /// Mapping of L2 ids
345    l2_ids: BTreeMap<String, usize>,
346    /// Mapping of L3 ids
347    l3_ids: BTreeMap<String, usize>,
348}
349
350impl TopoCtx {
351    fn new() -> TopoCtx {
352        let core_kernel_ids = BTreeMap::new();
353        let llc_kernel_ids = BTreeMap::new();
354        let l2_ids = BTreeMap::new();
355        let l3_ids = BTreeMap::new();
356        TopoCtx {
357            node_core_kernel_ids: core_kernel_ids,
358            node_llc_kernel_ids: llc_kernel_ids,
359            l2_ids,
360            l3_ids,
361        }
362    }
363}
364
365fn cpus_online() -> Result<Cpumask> {
366    let path = "/sys/devices/system/cpu/online";
367    let online = std::fs::read_to_string(path)?;
368    Cpumask::from_cpulist(&online)
369}
370
371fn get_cache_id(topo_ctx: &mut TopoCtx, cache_level_path: &PathBuf, cache_level: usize) -> usize {
372    // Check if the cache id is already cached
373    let id_map = match cache_level {
374        2 => &mut topo_ctx.l2_ids,
375        3 => &mut topo_ctx.l3_ids,
376        _ => return usize::MAX,
377    };
378
379    let path = &cache_level_path.join("shared_cpu_list");
380    let key = match std::fs::read_to_string(path) {
381        Ok(key) => key,
382        Err(_) => return usize::MAX,
383    };
384
385    let id = *id_map.get(&key).unwrap_or(&usize::MAX);
386    if id != usize::MAX {
387        return id;
388    }
389
390    // In case of a cache miss, try to get the id from the sysfs first.
391    let id = read_from_file(&cache_level_path.join("id")).unwrap_or(usize::MAX);
392    if id != usize::MAX {
393        // Keep the id in the map
394        id_map.insert(key, id);
395        return id;
396    }
397
398    // If the id file does not exist, assign an id and keep it in the map.
399    let id = id_map.len();
400    id_map.insert(key, id);
401
402    id
403}
404
405fn get_per_cpu_cache_size(cache_path: &PathBuf) -> Result<usize> {
406    let path_str = cache_path.to_str().unwrap();
407    let paths = glob(&(path_str.to_owned() + "/index[0-9]*"))?;
408    let mut tot_size = 0;
409
410    for index in paths.filter_map(Result::ok) {
411        // If there is no size information under sysfs (e.g., many ARM SoCs),
412        // give 1024 as a default value. 1024 is small enough compared to the
413        // real cache size of the CPU, but it is large enough to give a penalty
414        // when multiple CPUs share the cache.
415        let size = read_file_byte(&index.join("size")).unwrap_or(1024_usize);
416        let cpulist: String = read_from_file(&index.join("shared_cpu_list"))?;
417        let num_cpus = read_cpulist(&cpulist)?.len();
418        tot_size += size / num_cpus;
419    }
420
421    Ok(tot_size)
422}
423
424#[allow(clippy::too_many_arguments)]
425fn create_insert_cpu(
426    id: usize,
427    node: &mut Node,
428    online_mask: &Cpumask,
429    topo_ctx: &mut TopoCtx,
430    cs: &CapacitySource,
431    flatten_llc: bool,
432) -> Result<()> {
433    // CPU is offline. The Topology hierarchy is read-only, and assumes
434    // that hotplug will cause the scheduler to restart. Thus, we can
435    // just skip this CPU altogether.
436    if !online_mask.test_cpu(id) {
437        return Ok(());
438    }
439
440    let cpu_str = format!("/sys/devices/system/cpu/cpu{}", id);
441    let cpu_path = Path::new(&cpu_str);
442
443    // Physical core ID
444    let top_path = cpu_path.join("topology");
445    let core_kernel_id = read_from_file(&top_path.join("core_id"))?;
446    let package_id = read_from_file(&top_path.join("physical_package_id"))?;
447    let cluster_id = read_from_file(&top_path.join("cluster_id"))?;
448
449    // Evaluate L2, L3 and LLC cache IDs.
450    //
451    // Use ID 0 if we fail to detect the cache hierarchy. This seems to happen on certain SKUs, so
452    // if there's no cache information then we have no option but to assume a single unified cache
453    // per node.
454    let cache_path = cpu_path.join("cache");
455    let l2_id = get_cache_id(topo_ctx, &cache_path.join(format!("index{}", 2)), 2);
456    let l3_id = get_cache_id(topo_ctx, &cache_path.join(format!("index{}", 3)), 3);
457    let llc_kernel_id = if flatten_llc {
458        0
459    } else if l3_id == usize::MAX {
460        l2_id
461    } else {
462        l3_id
463    };
464
465    // Per-CPU cache size
466    let cache_size = get_per_cpu_cache_size(&cache_path).unwrap_or(0_usize);
467
468    // Min and max frequencies. If the kernel is not compiled with
469    // CONFIG_CPU_FREQ, just assume 0 for both frequencies.
470    let freq_path = cpu_path.join("cpufreq");
471    let min_freq = read_from_file(&freq_path.join("scaling_min_freq")).unwrap_or(0_usize);
472    let max_freq = read_from_file(&freq_path.join("scaling_max_freq")).unwrap_or(0_usize);
473    let base_freq = read_from_file(&freq_path.join("base_frequency")).unwrap_or(max_freq);
474    let trans_lat_ns =
475        read_from_file(&freq_path.join("cpuinfo_transition_latency")).unwrap_or(0_usize);
476
477    // Cpu capacity
478    let cap_path = cpu_path.join(cs.suffix.clone());
479    let rcap = read_from_file(&cap_path).unwrap_or(cs.max_rcap);
480    let cpu_capacity = (rcap * 1024) / cs.max_rcap;
481
482    // Power management
483    let power_path = cpu_path.join("power");
484    let pm_qos_resume_latency_us =
485        read_from_file(&power_path.join("pm_qos_resume_latency_us")).unwrap_or(0_usize);
486
487    let num_llcs = topo_ctx.node_llc_kernel_ids.len();
488    let llc_id = topo_ctx
489        .node_llc_kernel_ids
490        .entry((node.id, package_id, llc_kernel_id))
491        .or_insert(num_llcs);
492
493    let llc = node.llcs.entry(*llc_id).or_insert(Arc::new(Llc {
494        id: *llc_id,
495        cores: BTreeMap::new(),
496        span: Cpumask::new(),
497        all_cpus: BTreeMap::new(),
498
499        node_id: node.id,
500        kernel_id: llc_kernel_id,
501    }));
502    let llc_mut = Arc::get_mut(llc).unwrap();
503
504    let core_type = if cs.avg_rcap < cs.max_rcap && rcap == cs.max_rcap {
505        CoreType::Big { turbo: true }
506    } else if !cs.has_biglittle || rcap >= cs.avg_rcap {
507        CoreType::Big { turbo: false }
508    } else {
509        CoreType::Little
510    };
511
512    let num_cores = topo_ctx.node_core_kernel_ids.len();
513    let core_id = topo_ctx
514        .node_core_kernel_ids
515        .entry((node.id, package_id, core_kernel_id))
516        .or_insert(num_cores);
517
518    let core = llc_mut.cores.entry(*core_id).or_insert(Arc::new(Core {
519        id: *core_id,
520        cpus: BTreeMap::new(),
521        span: Cpumask::new(),
522        core_type: core_type.clone(),
523
524        llc_id: *llc_id,
525        node_id: node.id,
526        kernel_id: core_kernel_id,
527        cluster_id: cluster_id,
528    }));
529    let core_mut = Arc::get_mut(core).unwrap();
530
531    core_mut.cpus.insert(
532        id,
533        Arc::new(Cpu {
534            id,
535            min_freq,
536            max_freq,
537            base_freq,
538            cpu_capacity,
539            smt_level: 0, // Will be initialized at instantiate().
540            pm_qos_resume_latency_us,
541            trans_lat_ns,
542            l2_id,
543            l3_id,
544            cache_size,
545            core_type: core_type.clone(),
546
547            core_id: *core_id,
548            llc_id: *llc_id,
549            node_id: node.id,
550            package_id,
551            cluster_id,
552        }),
553    );
554
555    if node.span.test_cpu(id) {
556        bail!("Node {} already had CPU {}", node.id, id);
557    }
558
559    // Update all of the devices' spans to include this CPU.
560    core_mut.span.set_cpu(id)?;
561    llc_mut.span.set_cpu(id)?;
562    node.span.set_cpu(id)?;
563
564    Ok(())
565}
566
567fn read_cpu_ids() -> Result<Vec<usize>> {
568    let mut cpu_ids = vec![];
569    let cpu_paths = glob("/sys/devices/system/cpu/cpu[0-9]*")?;
570    for cpu_path in cpu_paths.filter_map(Result::ok) {
571        let cpu_str = cpu_path.to_str().unwrap().trim();
572        match sscanf!(cpu_str, "/sys/devices/system/cpu/cpu{usize}") {
573            Ok(val) => cpu_ids.push(val),
574            Err(_) => {
575                bail!("Failed to parse cpu ID {}", cpu_str);
576            }
577        }
578    }
579    cpu_ids.sort();
580    Ok(cpu_ids)
581}
582
583struct CapacitySource {
584    /// Path suffix after /sys/devices/system/cpu/cpuX
585    suffix: String,
586    /// Average raw capacity value
587    avg_rcap: usize,
588    /// Maximum raw capacity value
589    max_rcap: usize,
590    /// Does a system have little cores?
591    has_biglittle: bool,
592}
593
594fn get_capacity_source() -> Option<CapacitySource> {
595    // Sources for guessing cpu_capacity under /sys/devices/system/cpu/cpuX.
596    // They should be ordered from the most precise to the least precise.
597    let sources = [
598        "cpufreq/amd_pstate_highest_perf",
599        "acpi_cppc/highest_perf",
600        "cpu_capacity",
601        "cpufreq/cpuinfo_max_freq",
602    ];
603
604    // Find the most precise source for cpu_capacity estimation.
605    let prefix = "/sys/devices/system/cpu/cpu0";
606    let mut raw_capacity;
607    let mut suffix = sources[sources.len() - 1];
608    'outer: for src in sources {
609        let path_str = [prefix, src].join("/");
610        let path = Path::new(&path_str);
611        raw_capacity = read_from_file(&path).unwrap_or(0_usize);
612        if raw_capacity > 0 {
613            // It would be an okay source...
614            suffix = src;
615            // But double-check if the source has meaningful information.
616            let cpu_paths = glob("/sys/devices/system/cpu/cpu[0-9]*").ok()?;
617            for cpu_path in cpu_paths.filter_map(Result::ok) {
618                let raw_capacity2 = read_from_file(&cpu_path.join(suffix)).unwrap_or(0_usize);
619                if raw_capacity != raw_capacity2 {
620                    break 'outer;
621                }
622            }
623            // The source exists, but it tells that all CPUs have the same
624            // capacity. Let's search more if there is any source that can
625            // tell the capacity differences among CPUs. This can happen when
626            // a buggy driver lies (e.g., "acpi_cppc/highest_perf").
627        }
628    }
629
630    // Find the max raw_capacity value for scaling to 1024.
631    let mut max_rcap = 0;
632    let mut min_rcap = usize::MAX;
633    let mut avg_rcap = 0;
634    let mut nr_cpus = 0;
635    let mut has_biglittle = false;
636    let cpu_paths = glob("/sys/devices/system/cpu/cpu[0-9]*").ok()?;
637    for cpu_path in cpu_paths.filter_map(Result::ok) {
638        let rcap = read_from_file(&cpu_path.join(suffix)).unwrap_or(0_usize);
639        if max_rcap < rcap {
640            max_rcap = rcap;
641        }
642        if min_rcap > rcap {
643            min_rcap = rcap;
644        }
645        avg_rcap += rcap;
646        nr_cpus += 1;
647    }
648
649    if nr_cpus == 0 || max_rcap == 0 {
650        suffix = "";
651        avg_rcap = 1024;
652        max_rcap = 1024;
653        warn!("CPU capacity information is not available under sysfs.");
654    } else {
655        avg_rcap /= nr_cpus;
656        // We consider a system to have a heterogeneous CPU architecture only
657        // when there is a significant capacity gap (e.g., 1.3x). CPU capacities
658        // can still vary in a homogeneous architecture—for instance, due to
659        // chip binning or when only a subset of CPUs supports turbo boost.
660        //
661        // Note that we need a more systematic approach to accurately detect
662        // big/LITTLE architectures across various SoC designs. The current
663        // approach, with a significant capacity difference, is somewhat ad-hoc.
664        has_biglittle = max_rcap as f32 >= (1.3 * min_rcap as f32);
665    }
666
667    Some(CapacitySource {
668        suffix: suffix.to_string(),
669        avg_rcap,
670        max_rcap,
671        has_biglittle,
672    })
673}
674
675fn is_smt_active() -> Option<bool> {
676    let smt_on: u8 = read_from_file(Path::new("/sys/devices/system/cpu/smt/active")).ok()?;
677    Some(smt_on == 1)
678}
679
680fn create_default_node(
681    online_mask: &Cpumask,
682    topo_ctx: &mut TopoCtx,
683    flatten_llc: bool,
684) -> Result<BTreeMap<usize, Node>> {
685    let mut nodes = BTreeMap::<usize, Node>::new();
686
687    let mut node = Node {
688        id: 0,
689        distance: vec![],
690        llcs: BTreeMap::new(),
691        span: Cpumask::new(),
692        #[cfg(feature = "gpu-topology")]
693        gpus: BTreeMap::new(),
694        all_cores: BTreeMap::new(),
695        all_cpus: BTreeMap::new(),
696    };
697
698    #[cfg(feature = "gpu-topology")]
699    {
700        let system_gpus = create_gpus();
701        if let Some(gpus) = system_gpus.get(&0) {
702            for gpu in gpus {
703                node.gpus.insert(gpu.index, gpu.clone());
704            }
705        }
706    }
707
708    if !Path::new("/sys/devices/system/cpu").exists() {
709        bail!("/sys/devices/system/cpu sysfs node not found");
710    }
711
712    let cs = get_capacity_source().unwrap();
713    let cpu_ids = read_cpu_ids()?;
714    for cpu_id in cpu_ids.iter() {
715        create_insert_cpu(*cpu_id, &mut node, online_mask, topo_ctx, &cs, flatten_llc)?;
716    }
717
718    nodes.insert(node.id, node);
719
720    Ok(nodes)
721}
722
723fn create_numa_nodes(
724    online_mask: &Cpumask,
725    topo_ctx: &mut TopoCtx,
726) -> Result<BTreeMap<usize, Node>> {
727    let mut nodes = BTreeMap::<usize, Node>::new();
728
729    #[cfg(feature = "gpu-topology")]
730    let system_gpus = create_gpus();
731
732    let numa_paths = glob("/sys/devices/system/node/node*")?;
733    for numa_path in numa_paths.filter_map(Result::ok) {
734        let numa_str = numa_path.to_str().unwrap().trim();
735        let node_id = match sscanf!(numa_str, "/sys/devices/system/node/node{usize}") {
736            Ok(val) => val,
737            Err(_) => {
738                bail!("Failed to parse NUMA node ID {}", numa_str);
739            }
740        };
741        let distance = read_file_usize_vec(
742            Path::new(&format!(
743                "/sys/devices/system/node/node{}/distance",
744                node_id
745            )),
746            ' ',
747        )?;
748        let mut node = Node {
749            id: node_id,
750            distance,
751            llcs: BTreeMap::new(),
752            span: Cpumask::new(),
753
754            all_cores: BTreeMap::new(),
755            all_cpus: BTreeMap::new(),
756
757            #[cfg(feature = "gpu-topology")]
758            gpus: BTreeMap::new(),
759        };
760
761        #[cfg(feature = "gpu-topology")]
762        {
763            if let Some(gpus) = system_gpus.get(&node_id) {
764                for gpu in gpus {
765                    node.gpus.insert(gpu.index, gpu.clone());
766                }
767            }
768        }
769
770        let cpu_pattern = numa_path.join("cpu[0-9]*");
771        let cpu_paths = glob(cpu_pattern.to_string_lossy().as_ref())?;
772        let cs = get_capacity_source().unwrap();
773        let mut cpu_ids = vec![];
774        for cpu_path in cpu_paths.filter_map(Result::ok) {
775            let cpu_str = cpu_path.to_str().unwrap().trim();
776            let cpu_id = match sscanf!(cpu_str, "/sys/devices/system/node/node{usize}/cpu{usize}") {
777                Ok((_, val)) => val,
778                Err(_) => {
779                    bail!("Failed to parse cpu ID {}", cpu_str);
780                }
781            };
782            cpu_ids.push(cpu_id);
783        }
784        cpu_ids.sort();
785
786        for cpu_id in cpu_ids {
787            create_insert_cpu(cpu_id, &mut node, online_mask, topo_ctx, &cs, false)?;
788        }
789
790        nodes.insert(node.id, node);
791    }
792    Ok(nodes)
793}