1use crate::cpumask::read_cpulist;
73use crate::misc::read_file_byte;
74use crate::misc::read_file_usize_vec;
75use crate::misc::read_from_file;
76use crate::Cpumask;
77use anyhow::bail;
78use anyhow::Result;
79use glob::glob;
80use log::warn;
81use sscanf::sscanf;
82use std::collections::BTreeMap;
83use std::path::Path;
84use std::path::PathBuf;
85use std::sync::Arc;
86
87#[cfg(feature = "gpu-topology")]
88use crate::gpu::{create_gpus, Gpu, GpuIndex};
89
90lazy_static::lazy_static! {
91 pub static ref NR_CPU_IDS: usize = read_cpu_ids().unwrap().last().unwrap() + 1;
98
99 pub static ref NR_CPUS_POSSIBLE: usize = libbpf_rs::num_possible_cpus().unwrap();
106}
107
108#[derive(Debug, Clone, Eq, Hash, Ord, PartialEq, PartialOrd)]
109pub enum CoreType {
110 Big { turbo: bool },
111 Little,
112}
113
114#[derive(Debug, Clone, Eq, Hash, Ord, PartialEq, PartialOrd)]
115pub struct Cpu {
116 pub id: usize,
117 pub min_freq: usize,
118 pub max_freq: usize,
119 pub base_freq: usize,
122 pub cpu_capacity: usize,
124 pub smt_level: usize,
125 pub pm_qos_resume_latency_us: usize,
127 pub trans_lat_ns: usize,
128 pub l2_id: usize,
129 pub l3_id: usize,
130 pub cache_size: usize,
132 pub core_type: CoreType,
133
134 pub core_id: usize,
136 pub llc_id: usize,
137 pub node_id: usize,
138 pub package_id: usize,
139 pub cluster_id: isize,
140}
141
142#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)]
143pub struct Core {
144 pub id: usize,
146 pub kernel_id: usize,
148 pub cluster_id: isize,
149 pub cpus: BTreeMap<usize, Arc<Cpu>>,
150 pub span: Cpumask,
152 pub core_type: CoreType,
153
154 pub llc_id: usize,
156 pub node_id: usize,
157}
158
159#[derive(Debug, Clone)]
160pub struct Llc {
161 pub id: usize,
163 pub kernel_id: usize,
165 pub cores: BTreeMap<usize, Arc<Core>>,
166 pub span: Cpumask,
168
169 pub node_id: usize,
171
172 pub all_cpus: BTreeMap<usize, Arc<Cpu>>,
174}
175
176#[derive(Debug, Clone)]
177pub struct Node {
178 pub id: usize,
179 pub distance: Vec<usize>,
180 pub llcs: BTreeMap<usize, Arc<Llc>>,
181 pub span: Cpumask,
183
184 pub all_cores: BTreeMap<usize, Arc<Core>>,
186 pub all_cpus: BTreeMap<usize, Arc<Cpu>>,
187
188 #[cfg(feature = "gpu-topology")]
189 pub gpus: BTreeMap<GpuIndex, Gpu>,
190}
191
192#[derive(Debug)]
193pub struct Topology {
194 pub nodes: BTreeMap<usize, Node>,
195 pub span: Cpumask,
197 pub smt_enabled: bool,
199
200 pub all_llcs: BTreeMap<usize, Arc<Llc>>,
202 pub all_cores: BTreeMap<usize, Arc<Core>>,
203 pub all_cpus: BTreeMap<usize, Arc<Cpu>>,
204}
205
206impl Topology {
207 fn instantiate(span: Cpumask, mut nodes: BTreeMap<usize, Node>) -> Result<Self> {
208 let mut topo_llcs = BTreeMap::new();
212 let mut topo_cores = BTreeMap::new();
213 let mut topo_cpus = BTreeMap::new();
214
215 for (_node_id, node) in nodes.iter_mut() {
216 let mut node_cores = BTreeMap::new();
217 let mut node_cpus = BTreeMap::new();
218
219 for (&llc_id, llc) in node.llcs.iter_mut() {
220 let llc_mut = Arc::get_mut(llc).unwrap();
221 let mut llc_cpus = BTreeMap::new();
222
223 for (&core_id, core) in llc_mut.cores.iter_mut() {
224 let core_mut = Arc::get_mut(core).unwrap();
225 let smt_level = core_mut.cpus.len();
226
227 for (&cpu_id, cpu) in core_mut.cpus.iter_mut() {
228 let cpu_mut = Arc::get_mut(cpu).unwrap();
229 cpu_mut.smt_level = smt_level;
230
231 if topo_cpus
232 .insert(cpu_id, cpu.clone())
233 .or(node_cpus.insert(cpu_id, cpu.clone()))
234 .or(llc_cpus.insert(cpu_id, cpu.clone()))
235 .is_some()
236 {
237 bail!("Duplicate CPU ID {}", cpu_id);
238 }
239 }
240
241 topo_cores
244 .insert(core_id, core.clone())
245 .or(node_cores.insert(core_id, core.clone()));
246 }
247
248 llc_mut.all_cpus = llc_cpus;
249
250 if topo_llcs.insert(llc_id, llc.clone()).is_some() {
251 bail!("Duplicate LLC ID {}", llc_id);
252 }
253 }
254
255 node.all_cores = node_cores;
256 node.all_cpus = node_cpus;
257 }
258
259 Ok(Topology {
260 nodes,
261 span,
262 smt_enabled: is_smt_active().unwrap_or(false),
263 all_llcs: topo_llcs,
264 all_cores: topo_cores,
265 all_cpus: topo_cpus,
266 })
267 }
268
269 pub fn new() -> Result<Topology> {
271 let span = cpus_online()?;
272 let mut topo_ctx = TopoCtx::new();
273 let nodes = if Path::new("/sys/devices/system/node").exists() {
277 create_numa_nodes(&span, &mut topo_ctx)?
278 } else {
279 create_default_node(&span, &mut topo_ctx, false)?
280 };
281
282 Self::instantiate(span, nodes)
283 }
284
285 pub fn with_flattened_llc_node() -> Result<Topology> {
286 let span = cpus_online()?;
287 let mut topo_ctx = TopoCtx::new();
288 let nodes = create_default_node(&span, &mut topo_ctx, true)?;
289 Self::instantiate(span, nodes)
290 }
291
292 #[cfg(feature = "gpu-topology")]
294 pub fn gpus(&self) -> BTreeMap<GpuIndex, &Gpu> {
295 let mut gpus = BTreeMap::new();
296 for node in self.nodes.values() {
297 for (idx, gpu) in &node.gpus {
298 gpus.insert(*idx, gpu);
299 }
300 }
301 gpus
302 }
303
304 pub fn has_little_cores(&self) -> bool {
306 self.all_cores
307 .values()
308 .any(|c| c.core_type == CoreType::Little)
309 }
310
311 pub fn sibling_cpus(&self) -> Vec<i32> {
318 let mut sibling_cpu = vec![-1i32; *NR_CPUS_POSSIBLE];
319 for core in self.all_cores.values() {
320 let mut first = -1i32;
321 for &cpu in core.cpus.keys() {
322 if first < 0 {
323 first = cpu as i32;
324 } else {
325 sibling_cpu[first as usize] = cpu as i32;
326 sibling_cpu[cpu] = first;
327 break;
328 }
329 }
330 }
331 sibling_cpu
332 }
333}
334
335struct TopoCtx {
340 node_core_kernel_ids: BTreeMap<(usize, usize, usize), usize>,
342 node_llc_kernel_ids: BTreeMap<(usize, usize, usize), usize>,
344 l2_ids: BTreeMap<String, usize>,
346 l3_ids: BTreeMap<String, usize>,
348}
349
350impl TopoCtx {
351 fn new() -> TopoCtx {
352 let core_kernel_ids = BTreeMap::new();
353 let llc_kernel_ids = BTreeMap::new();
354 let l2_ids = BTreeMap::new();
355 let l3_ids = BTreeMap::new();
356 TopoCtx {
357 node_core_kernel_ids: core_kernel_ids,
358 node_llc_kernel_ids: llc_kernel_ids,
359 l2_ids,
360 l3_ids,
361 }
362 }
363}
364
365fn cpus_online() -> Result<Cpumask> {
366 let path = "/sys/devices/system/cpu/online";
367 let online = std::fs::read_to_string(path)?;
368 Cpumask::from_cpulist(&online)
369}
370
371fn get_cache_id(topo_ctx: &mut TopoCtx, cache_level_path: &PathBuf, cache_level: usize) -> usize {
372 let id_map = match cache_level {
374 2 => &mut topo_ctx.l2_ids,
375 3 => &mut topo_ctx.l3_ids,
376 _ => return usize::MAX,
377 };
378
379 let path = &cache_level_path.join("shared_cpu_list");
380 let key = match std::fs::read_to_string(path) {
381 Ok(key) => key,
382 Err(_) => return usize::MAX,
383 };
384
385 let id = *id_map.get(&key).unwrap_or(&usize::MAX);
386 if id != usize::MAX {
387 return id;
388 }
389
390 let id = read_from_file(&cache_level_path.join("id")).unwrap_or(usize::MAX);
392 if id != usize::MAX {
393 id_map.insert(key, id);
395 return id;
396 }
397
398 let id = id_map.len();
400 id_map.insert(key, id);
401
402 id
403}
404
405fn get_per_cpu_cache_size(cache_path: &PathBuf) -> Result<usize> {
406 let path_str = cache_path.to_str().unwrap();
407 let paths = glob(&(path_str.to_owned() + "/index[0-9]*"))?;
408 let mut tot_size = 0;
409
410 for index in paths.filter_map(Result::ok) {
411 let size = read_file_byte(&index.join("size")).unwrap_or(1024_usize);
416 let cpulist: String = read_from_file(&index.join("shared_cpu_list"))?;
417 let num_cpus = read_cpulist(&cpulist)?.len();
418 tot_size += size / num_cpus;
419 }
420
421 Ok(tot_size)
422}
423
424#[allow(clippy::too_many_arguments)]
425fn create_insert_cpu(
426 id: usize,
427 node: &mut Node,
428 online_mask: &Cpumask,
429 topo_ctx: &mut TopoCtx,
430 cs: &CapacitySource,
431 flatten_llc: bool,
432) -> Result<()> {
433 if !online_mask.test_cpu(id) {
437 return Ok(());
438 }
439
440 let cpu_str = format!("/sys/devices/system/cpu/cpu{}", id);
441 let cpu_path = Path::new(&cpu_str);
442
443 let top_path = cpu_path.join("topology");
445 let core_kernel_id = read_from_file(&top_path.join("core_id"))?;
446 let package_id = read_from_file(&top_path.join("physical_package_id"))?;
447 let cluster_id = read_from_file(&top_path.join("cluster_id"))?;
448
449 let cache_path = cpu_path.join("cache");
455 let l2_id = get_cache_id(topo_ctx, &cache_path.join(format!("index{}", 2)), 2);
456 let l3_id = get_cache_id(topo_ctx, &cache_path.join(format!("index{}", 3)), 3);
457 let llc_kernel_id = if flatten_llc {
458 0
459 } else if l3_id == usize::MAX {
460 l2_id
461 } else {
462 l3_id
463 };
464
465 let cache_size = get_per_cpu_cache_size(&cache_path).unwrap_or(0_usize);
467
468 let freq_path = cpu_path.join("cpufreq");
471 let min_freq = read_from_file(&freq_path.join("scaling_min_freq")).unwrap_or(0_usize);
472 let max_freq = read_from_file(&freq_path.join("scaling_max_freq")).unwrap_or(0_usize);
473 let base_freq = read_from_file(&freq_path.join("base_frequency")).unwrap_or(max_freq);
474 let trans_lat_ns =
475 read_from_file(&freq_path.join("cpuinfo_transition_latency")).unwrap_or(0_usize);
476
477 let cap_path = cpu_path.join(cs.suffix.clone());
479 let rcap = read_from_file(&cap_path).unwrap_or(cs.max_rcap);
480 let cpu_capacity = (rcap * 1024) / cs.max_rcap;
481
482 let power_path = cpu_path.join("power");
484 let pm_qos_resume_latency_us =
485 read_from_file(&power_path.join("pm_qos_resume_latency_us")).unwrap_or(0_usize);
486
487 let num_llcs = topo_ctx.node_llc_kernel_ids.len();
488 let llc_id = topo_ctx
489 .node_llc_kernel_ids
490 .entry((node.id, package_id, llc_kernel_id))
491 .or_insert(num_llcs);
492
493 let llc = node.llcs.entry(*llc_id).or_insert(Arc::new(Llc {
494 id: *llc_id,
495 cores: BTreeMap::new(),
496 span: Cpumask::new(),
497 all_cpus: BTreeMap::new(),
498
499 node_id: node.id,
500 kernel_id: llc_kernel_id,
501 }));
502 let llc_mut = Arc::get_mut(llc).unwrap();
503
504 let core_type = if cs.avg_rcap < cs.max_rcap && rcap == cs.max_rcap {
505 CoreType::Big { turbo: true }
506 } else if !cs.has_biglittle || rcap >= cs.avg_rcap {
507 CoreType::Big { turbo: false }
508 } else {
509 CoreType::Little
510 };
511
512 let num_cores = topo_ctx.node_core_kernel_ids.len();
513 let core_id = topo_ctx
514 .node_core_kernel_ids
515 .entry((node.id, package_id, core_kernel_id))
516 .or_insert(num_cores);
517
518 let core = llc_mut.cores.entry(*core_id).or_insert(Arc::new(Core {
519 id: *core_id,
520 cpus: BTreeMap::new(),
521 span: Cpumask::new(),
522 core_type: core_type.clone(),
523
524 llc_id: *llc_id,
525 node_id: node.id,
526 kernel_id: core_kernel_id,
527 cluster_id: cluster_id,
528 }));
529 let core_mut = Arc::get_mut(core).unwrap();
530
531 core_mut.cpus.insert(
532 id,
533 Arc::new(Cpu {
534 id,
535 min_freq,
536 max_freq,
537 base_freq,
538 cpu_capacity,
539 smt_level: 0, pm_qos_resume_latency_us,
541 trans_lat_ns,
542 l2_id,
543 l3_id,
544 cache_size,
545 core_type: core_type.clone(),
546
547 core_id: *core_id,
548 llc_id: *llc_id,
549 node_id: node.id,
550 package_id,
551 cluster_id,
552 }),
553 );
554
555 if node.span.test_cpu(id) {
556 bail!("Node {} already had CPU {}", node.id, id);
557 }
558
559 core_mut.span.set_cpu(id)?;
561 llc_mut.span.set_cpu(id)?;
562 node.span.set_cpu(id)?;
563
564 Ok(())
565}
566
567fn read_cpu_ids() -> Result<Vec<usize>> {
568 let mut cpu_ids = vec![];
569 let cpu_paths = glob("/sys/devices/system/cpu/cpu[0-9]*")?;
570 for cpu_path in cpu_paths.filter_map(Result::ok) {
571 let cpu_str = cpu_path.to_str().unwrap().trim();
572 match sscanf!(cpu_str, "/sys/devices/system/cpu/cpu{usize}") {
573 Ok(val) => cpu_ids.push(val),
574 Err(_) => {
575 bail!("Failed to parse cpu ID {}", cpu_str);
576 }
577 }
578 }
579 cpu_ids.sort();
580 Ok(cpu_ids)
581}
582
583struct CapacitySource {
584 suffix: String,
586 avg_rcap: usize,
588 max_rcap: usize,
590 has_biglittle: bool,
592}
593
594fn get_capacity_source() -> Option<CapacitySource> {
595 let sources = [
598 "cpufreq/amd_pstate_highest_perf",
599 "acpi_cppc/highest_perf",
600 "cpu_capacity",
601 "cpufreq/cpuinfo_max_freq",
602 ];
603
604 let prefix = "/sys/devices/system/cpu/cpu0";
606 let mut raw_capacity;
607 let mut suffix = sources[sources.len() - 1];
608 'outer: for src in sources {
609 let path_str = [prefix, src].join("/");
610 let path = Path::new(&path_str);
611 raw_capacity = read_from_file(&path).unwrap_or(0_usize);
612 if raw_capacity > 0 {
613 suffix = src;
615 let cpu_paths = glob("/sys/devices/system/cpu/cpu[0-9]*").ok()?;
617 for cpu_path in cpu_paths.filter_map(Result::ok) {
618 let raw_capacity2 = read_from_file(&cpu_path.join(suffix)).unwrap_or(0_usize);
619 if raw_capacity != raw_capacity2 {
620 break 'outer;
621 }
622 }
623 }
628 }
629
630 let mut max_rcap = 0;
632 let mut min_rcap = usize::MAX;
633 let mut avg_rcap = 0;
634 let mut nr_cpus = 0;
635 let mut has_biglittle = false;
636 let cpu_paths = glob("/sys/devices/system/cpu/cpu[0-9]*").ok()?;
637 for cpu_path in cpu_paths.filter_map(Result::ok) {
638 let rcap = read_from_file(&cpu_path.join(suffix)).unwrap_or(0_usize);
639 if max_rcap < rcap {
640 max_rcap = rcap;
641 }
642 if min_rcap > rcap {
643 min_rcap = rcap;
644 }
645 avg_rcap += rcap;
646 nr_cpus += 1;
647 }
648
649 if nr_cpus == 0 || max_rcap == 0 {
650 suffix = "";
651 avg_rcap = 1024;
652 max_rcap = 1024;
653 warn!("CPU capacity information is not available under sysfs.");
654 } else {
655 avg_rcap /= nr_cpus;
656 has_biglittle = max_rcap as f32 >= (1.3 * min_rcap as f32);
665 }
666
667 Some(CapacitySource {
668 suffix: suffix.to_string(),
669 avg_rcap,
670 max_rcap,
671 has_biglittle,
672 })
673}
674
675fn is_smt_active() -> Option<bool> {
676 let smt_on: u8 = read_from_file(Path::new("/sys/devices/system/cpu/smt/active")).ok()?;
677 Some(smt_on == 1)
678}
679
680fn create_default_node(
681 online_mask: &Cpumask,
682 topo_ctx: &mut TopoCtx,
683 flatten_llc: bool,
684) -> Result<BTreeMap<usize, Node>> {
685 let mut nodes = BTreeMap::<usize, Node>::new();
686
687 let mut node = Node {
688 id: 0,
689 distance: vec![],
690 llcs: BTreeMap::new(),
691 span: Cpumask::new(),
692 #[cfg(feature = "gpu-topology")]
693 gpus: BTreeMap::new(),
694 all_cores: BTreeMap::new(),
695 all_cpus: BTreeMap::new(),
696 };
697
698 #[cfg(feature = "gpu-topology")]
699 {
700 let system_gpus = create_gpus();
701 if let Some(gpus) = system_gpus.get(&0) {
702 for gpu in gpus {
703 node.gpus.insert(gpu.index, gpu.clone());
704 }
705 }
706 }
707
708 if !Path::new("/sys/devices/system/cpu").exists() {
709 bail!("/sys/devices/system/cpu sysfs node not found");
710 }
711
712 let cs = get_capacity_source().unwrap();
713 let cpu_ids = read_cpu_ids()?;
714 for cpu_id in cpu_ids.iter() {
715 create_insert_cpu(*cpu_id, &mut node, online_mask, topo_ctx, &cs, flatten_llc)?;
716 }
717
718 nodes.insert(node.id, node);
719
720 Ok(nodes)
721}
722
723fn create_numa_nodes(
724 online_mask: &Cpumask,
725 topo_ctx: &mut TopoCtx,
726) -> Result<BTreeMap<usize, Node>> {
727 let mut nodes = BTreeMap::<usize, Node>::new();
728
729 #[cfg(feature = "gpu-topology")]
730 let system_gpus = create_gpus();
731
732 let numa_paths = glob("/sys/devices/system/node/node*")?;
733 for numa_path in numa_paths.filter_map(Result::ok) {
734 let numa_str = numa_path.to_str().unwrap().trim();
735 let node_id = match sscanf!(numa_str, "/sys/devices/system/node/node{usize}") {
736 Ok(val) => val,
737 Err(_) => {
738 bail!("Failed to parse NUMA node ID {}", numa_str);
739 }
740 };
741 let distance = read_file_usize_vec(
742 Path::new(&format!(
743 "/sys/devices/system/node/node{}/distance",
744 node_id
745 )),
746 ' ',
747 )?;
748 let mut node = Node {
749 id: node_id,
750 distance,
751 llcs: BTreeMap::new(),
752 span: Cpumask::new(),
753
754 all_cores: BTreeMap::new(),
755 all_cpus: BTreeMap::new(),
756
757 #[cfg(feature = "gpu-topology")]
758 gpus: BTreeMap::new(),
759 };
760
761 #[cfg(feature = "gpu-topology")]
762 {
763 if let Some(gpus) = system_gpus.get(&node_id) {
764 for gpu in gpus {
765 node.gpus.insert(gpu.index, gpu.clone());
766 }
767 }
768 }
769
770 let cpu_pattern = numa_path.join("cpu[0-9]*");
771 let cpu_paths = glob(cpu_pattern.to_string_lossy().as_ref())?;
772 let cs = get_capacity_source().unwrap();
773 let mut cpu_ids = vec![];
774 for cpu_path in cpu_paths.filter_map(Result::ok) {
775 let cpu_str = cpu_path.to_str().unwrap().trim();
776 let cpu_id = match sscanf!(cpu_str, "/sys/devices/system/node/node{usize}/cpu{usize}") {
777 Ok((_, val)) => val,
778 Err(_) => {
779 bail!("Failed to parse cpu ID {}", cpu_str);
780 }
781 };
782 cpu_ids.push(cpu_id);
783 }
784 cpu_ids.sort();
785
786 for cpu_id in cpu_ids {
787 create_insert_cpu(cpu_id, &mut node, online_mask, topo_ctx, &cs, false)?;
788 }
789
790 nodes.insert(node.id, node);
791 }
792 Ok(nodes)
793}