1use crate::compat::ROOT_PREFIX;
73use crate::cpumask::read_cpulist;
74use crate::misc::read_file_byte;
75use crate::misc::read_file_usize_vec;
76use crate::misc::read_from_file;
77use crate::Cpumask;
78use anyhow::bail;
79use anyhow::Result;
80use glob::glob;
81use log::warn;
82use sscanf::sscanf;
83use std::collections::BTreeMap;
84use std::path::Path;
85use std::path::PathBuf;
86use std::sync::Arc;
87
88#[cfg(feature = "gpu-topology")]
89use crate::gpu::{create_gpus, Gpu, GpuIndex};
90
91lazy_static::lazy_static! {
92 pub static ref NR_CPU_IDS: usize = read_cpu_ids().unwrap().last().unwrap() + 1;
99
100 pub static ref NR_CPUS_POSSIBLE: usize = libbpf_rs::num_possible_cpus().unwrap();
107}
108
109#[derive(Debug, Clone, Eq, Hash, Ord, PartialEq, PartialOrd)]
110pub enum CoreType {
111 Big { turbo: bool },
112 Little,
113}
114
115#[derive(Debug, Clone, Eq, Hash, Ord, PartialEq, PartialOrd)]
116pub struct Cpu {
117 pub id: usize,
118 pub min_freq: usize,
119 pub max_freq: usize,
120 pub base_freq: usize,
123 pub cpu_capacity: usize,
125 pub smt_level: usize,
126 pub pm_qos_resume_latency_us: usize,
128 pub trans_lat_ns: usize,
129 pub l2_id: usize,
130 pub l3_id: usize,
131 pub cache_size: usize,
133 pub core_type: CoreType,
134
135 pub core_id: usize,
137 pub llc_id: usize,
138 pub node_id: usize,
139 pub package_id: usize,
140 pub cluster_id: isize,
141}
142
143#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)]
144pub struct Core {
145 pub id: usize,
147 pub kernel_id: usize,
149 pub cluster_id: isize,
150 pub cpus: BTreeMap<usize, Arc<Cpu>>,
151 pub span: Cpumask,
153 pub core_type: CoreType,
154
155 pub llc_id: usize,
157 pub node_id: usize,
158}
159
160#[derive(Debug, Clone)]
161pub struct Llc {
162 pub id: usize,
164 pub kernel_id: usize,
166 pub cores: BTreeMap<usize, Arc<Core>>,
167 pub span: Cpumask,
169
170 pub node_id: usize,
172
173 pub all_cpus: BTreeMap<usize, Arc<Cpu>>,
175}
176
177#[derive(Debug, Clone)]
178pub struct Node {
179 pub id: usize,
180 pub distance: Vec<usize>,
181 pub llcs: BTreeMap<usize, Arc<Llc>>,
182 pub span: Cpumask,
184
185 pub all_cores: BTreeMap<usize, Arc<Core>>,
187 pub all_cpus: BTreeMap<usize, Arc<Cpu>>,
188
189 #[cfg(feature = "gpu-topology")]
190 pub gpus: BTreeMap<GpuIndex, Gpu>,
191}
192
193#[derive(Debug)]
194pub struct Topology {
195 pub nodes: BTreeMap<usize, Node>,
196 pub span: Cpumask,
198 pub smt_enabled: bool,
200
201 pub all_llcs: BTreeMap<usize, Arc<Llc>>,
203 pub all_cores: BTreeMap<usize, Arc<Core>>,
204 pub all_cpus: BTreeMap<usize, Arc<Cpu>>,
205}
206
207impl Topology {
208 fn instantiate(span: Cpumask, mut nodes: BTreeMap<usize, Node>) -> Result<Self> {
209 let mut topo_llcs = BTreeMap::new();
213 let mut topo_cores = BTreeMap::new();
214 let mut topo_cpus = BTreeMap::new();
215
216 for (_node_id, node) in nodes.iter_mut() {
217 let mut node_cores = BTreeMap::new();
218 let mut node_cpus = BTreeMap::new();
219
220 for (&llc_id, llc) in node.llcs.iter_mut() {
221 let llc_mut = Arc::get_mut(llc).unwrap();
222 let mut llc_cpus = BTreeMap::new();
223
224 for (&core_id, core) in llc_mut.cores.iter_mut() {
225 let core_mut = Arc::get_mut(core).unwrap();
226 let smt_level = core_mut.cpus.len();
227
228 for (&cpu_id, cpu) in core_mut.cpus.iter_mut() {
229 let cpu_mut = Arc::get_mut(cpu).unwrap();
230 cpu_mut.smt_level = smt_level;
231
232 if topo_cpus
233 .insert(cpu_id, cpu.clone())
234 .or(node_cpus.insert(cpu_id, cpu.clone()))
235 .or(llc_cpus.insert(cpu_id, cpu.clone()))
236 .is_some()
237 {
238 bail!("Duplicate CPU ID {}", cpu_id);
239 }
240 }
241
242 topo_cores
245 .insert(core_id, core.clone())
246 .or(node_cores.insert(core_id, core.clone()));
247 }
248
249 llc_mut.all_cpus = llc_cpus;
250
251 if topo_llcs.insert(llc_id, llc.clone()).is_some() {
252 bail!("Duplicate LLC ID {}", llc_id);
253 }
254 }
255
256 node.all_cores = node_cores;
257 node.all_cpus = node_cpus;
258 }
259
260 Ok(Topology {
261 nodes,
262 span,
263 smt_enabled: is_smt_active().unwrap_or(false),
264 all_llcs: topo_llcs,
265 all_cores: topo_cores,
266 all_cpus: topo_cpus,
267 })
268 }
269
270 pub fn new() -> Result<Topology> {
272 let span = cpus_online()?;
273 let mut topo_ctx = TopoCtx::new();
274 let path = format!("{}/sys/devices/system/node", *ROOT_PREFIX);
278 let nodes = if Path::new(&path).exists() {
279 create_numa_nodes(&span, &mut topo_ctx)?
280 } else {
281 create_default_node(&span, &mut topo_ctx, false)?
282 };
283
284 Self::instantiate(span, nodes)
285 }
286
287 pub fn with_flattened_llc_node() -> Result<Topology> {
288 let span = cpus_online()?;
289 let mut topo_ctx = TopoCtx::new();
290 let nodes = create_default_node(&span, &mut topo_ctx, true)?;
291 Self::instantiate(span, nodes)
292 }
293
294 #[cfg(feature = "gpu-topology")]
296 pub fn gpus(&self) -> BTreeMap<GpuIndex, &Gpu> {
297 let mut gpus = BTreeMap::new();
298 for node in self.nodes.values() {
299 for (idx, gpu) in &node.gpus {
300 gpus.insert(*idx, gpu);
301 }
302 }
303 gpus
304 }
305
306 pub fn has_little_cores(&self) -> bool {
308 self.all_cores
309 .values()
310 .any(|c| c.core_type == CoreType::Little)
311 }
312
313 pub fn sibling_cpus(&self) -> Vec<i32> {
320 let mut sibling_cpu = vec![-1i32; *NR_CPUS_POSSIBLE];
321 for core in self.all_cores.values() {
322 let mut first = -1i32;
323 for &cpu in core.cpus.keys() {
324 if first < 0 {
325 first = cpu as i32;
326 } else {
327 sibling_cpu[first as usize] = cpu as i32;
328 sibling_cpu[cpu] = first;
329 break;
330 }
331 }
332 }
333 sibling_cpu
334 }
335}
336
337struct TopoCtx {
342 node_core_kernel_ids: BTreeMap<(usize, usize, usize), usize>,
344 node_llc_kernel_ids: BTreeMap<(usize, usize, usize), usize>,
346 l2_ids: BTreeMap<String, usize>,
348 l3_ids: BTreeMap<String, usize>,
350}
351
352impl TopoCtx {
353 fn new() -> TopoCtx {
354 let core_kernel_ids = BTreeMap::new();
355 let llc_kernel_ids = BTreeMap::new();
356 let l2_ids = BTreeMap::new();
357 let l3_ids = BTreeMap::new();
358 TopoCtx {
359 node_core_kernel_ids: core_kernel_ids,
360 node_llc_kernel_ids: llc_kernel_ids,
361 l2_ids,
362 l3_ids,
363 }
364 }
365}
366
367fn cpus_online() -> Result<Cpumask> {
368 let path = format!("{}/sys/devices/system/cpu/online", *ROOT_PREFIX);
369 let online = std::fs::read_to_string(path)?;
370 Cpumask::from_cpulist(&online)
371}
372
373fn get_cache_id(topo_ctx: &mut TopoCtx, cache_level_path: &PathBuf, cache_level: usize) -> usize {
374 let id_map = match cache_level {
376 2 => &mut topo_ctx.l2_ids,
377 3 => &mut topo_ctx.l3_ids,
378 _ => return usize::MAX,
379 };
380
381 let path = &cache_level_path.join("shared_cpu_list");
382 let key = match std::fs::read_to_string(path) {
383 Ok(key) => key,
384 Err(_) => return usize::MAX,
385 };
386
387 let id = *id_map.get(&key).unwrap_or(&usize::MAX);
388 if id != usize::MAX {
389 return id;
390 }
391
392 let id = read_from_file(&cache_level_path.join("id")).unwrap_or(usize::MAX);
394 if id != usize::MAX {
395 id_map.insert(key, id);
397 return id;
398 }
399
400 let id = id_map.len();
402 id_map.insert(key, id);
403
404 id
405}
406
407fn get_per_cpu_cache_size(cache_path: &PathBuf) -> Result<usize> {
408 let path_str = cache_path.to_str().unwrap();
409 let paths = glob(&(path_str.to_owned() + "/index[0-9]*"))?;
410 let mut tot_size = 0;
411
412 for index in paths.filter_map(Result::ok) {
413 let size = read_file_byte(&index.join("size")).unwrap_or(1024_usize);
418 let cpulist: String = read_from_file(&index.join("shared_cpu_list"))?;
419 let num_cpus = read_cpulist(&cpulist)?.len();
420 tot_size += size / num_cpus;
421 }
422
423 Ok(tot_size)
424}
425
426#[allow(clippy::too_many_arguments)]
427fn create_insert_cpu(
428 id: usize,
429 node: &mut Node,
430 online_mask: &Cpumask,
431 topo_ctx: &mut TopoCtx,
432 cs: &CapacitySource,
433 flatten_llc: bool,
434) -> Result<()> {
435 if !online_mask.test_cpu(id) {
439 return Ok(());
440 }
441
442 let cpu_str = format!("{}/sys/devices/system/cpu/cpu{}", *ROOT_PREFIX, id);
443 let cpu_path = Path::new(&cpu_str);
444
445 let top_path = cpu_path.join("topology");
447 let core_kernel_id = read_from_file(&top_path.join("core_id"))?;
448 let package_id = read_from_file(&top_path.join("physical_package_id"))?;
449 let cluster_id = read_from_file(&top_path.join("cluster_id"))?;
450
451 let cache_path = cpu_path.join("cache");
457 let l2_id = get_cache_id(topo_ctx, &cache_path.join(format!("index{}", 2)), 2);
458 let l3_id = get_cache_id(topo_ctx, &cache_path.join(format!("index{}", 3)), 3);
459 let llc_kernel_id = if flatten_llc {
460 0
461 } else if l3_id == usize::MAX {
462 l2_id
463 } else {
464 l3_id
465 };
466
467 let cache_size = get_per_cpu_cache_size(&cache_path).unwrap_or(0_usize);
469
470 let freq_path = cpu_path.join("cpufreq");
473 let min_freq = read_from_file(&freq_path.join("scaling_min_freq")).unwrap_or(0_usize);
474 let max_freq = read_from_file(&freq_path.join("scaling_max_freq")).unwrap_or(0_usize);
475 let base_freq = read_from_file(&freq_path.join("base_frequency")).unwrap_or(max_freq);
476 let trans_lat_ns =
477 read_from_file(&freq_path.join("cpuinfo_transition_latency")).unwrap_or(0_usize);
478
479 let cap_path = cpu_path.join(cs.suffix.clone());
481 let rcap = read_from_file(&cap_path).unwrap_or(cs.max_rcap);
482 let cpu_capacity = (rcap * 1024) / cs.max_rcap;
483
484 let power_path = cpu_path.join("power");
486 let pm_qos_resume_latency_us =
487 read_from_file(&power_path.join("pm_qos_resume_latency_us")).unwrap_or(0_usize);
488
489 let num_llcs = topo_ctx.node_llc_kernel_ids.len();
490 let llc_id = topo_ctx
491 .node_llc_kernel_ids
492 .entry((node.id, package_id, llc_kernel_id))
493 .or_insert(num_llcs);
494
495 let llc = node.llcs.entry(*llc_id).or_insert(Arc::new(Llc {
496 id: *llc_id,
497 cores: BTreeMap::new(),
498 span: Cpumask::new(),
499 all_cpus: BTreeMap::new(),
500
501 node_id: node.id,
502 kernel_id: llc_kernel_id,
503 }));
504 let llc_mut = Arc::get_mut(llc).unwrap();
505
506 let core_type = if cs.avg_rcap < cs.max_rcap && rcap == cs.max_rcap {
507 CoreType::Big { turbo: true }
508 } else if !cs.has_biglittle || rcap >= cs.avg_rcap {
509 CoreType::Big { turbo: false }
510 } else {
511 CoreType::Little
512 };
513
514 let num_cores = topo_ctx.node_core_kernel_ids.len();
515 let core_id = topo_ctx
516 .node_core_kernel_ids
517 .entry((node.id, package_id, core_kernel_id))
518 .or_insert(num_cores);
519
520 let core = llc_mut.cores.entry(*core_id).or_insert(Arc::new(Core {
521 id: *core_id,
522 cpus: BTreeMap::new(),
523 span: Cpumask::new(),
524 core_type: core_type.clone(),
525
526 llc_id: *llc_id,
527 node_id: node.id,
528 kernel_id: core_kernel_id,
529 cluster_id: cluster_id,
530 }));
531 let core_mut = Arc::get_mut(core).unwrap();
532
533 core_mut.cpus.insert(
534 id,
535 Arc::new(Cpu {
536 id,
537 min_freq,
538 max_freq,
539 base_freq,
540 cpu_capacity,
541 smt_level: 0, pm_qos_resume_latency_us,
543 trans_lat_ns,
544 l2_id,
545 l3_id,
546 cache_size,
547 core_type: core_type.clone(),
548
549 core_id: *core_id,
550 llc_id: *llc_id,
551 node_id: node.id,
552 package_id,
553 cluster_id,
554 }),
555 );
556
557 if node.span.test_cpu(id) {
558 bail!("Node {} already had CPU {}", node.id, id);
559 }
560
561 core_mut.span.set_cpu(id)?;
563 llc_mut.span.set_cpu(id)?;
564 node.span.set_cpu(id)?;
565
566 Ok(())
567}
568
569fn read_cpu_ids() -> Result<Vec<usize>> {
570 let mut cpu_ids = vec![];
571 let path = format!("{}/sys/devices/system/cpu/cpu[0-9]*", *ROOT_PREFIX);
572 let cpu_paths = glob(&path)?;
573 for cpu_path in cpu_paths.filter_map(Result::ok) {
574 let cpu_str = cpu_path.to_str().unwrap().trim();
575 if *ROOT_PREFIX == "" {
576 match sscanf!(cpu_str, "/sys/devices/system/cpu/cpu{usize}") {
577 Ok(val) => cpu_ids.push(val),
578 Err(_) => {
579 bail!("Failed to parse cpu ID {}", cpu_str);
580 }
581 }
582 } else {
583 match sscanf!(cpu_str, "{str}/sys/devices/system/cpu/cpu{usize}") {
584 Ok((_, val)) => cpu_ids.push(val),
585 Err(_) => {
586 bail!("Failed to parse cpu ID {}", cpu_str);
587 }
588 }
589 }
590 }
591 cpu_ids.sort();
592 Ok(cpu_ids)
593}
594
595struct CapacitySource {
596 suffix: String,
598 avg_rcap: usize,
600 max_rcap: usize,
602 has_biglittle: bool,
604}
605
606fn get_capacity_source() -> Option<CapacitySource> {
607 let sources = [
610 "cpufreq/amd_pstate_prefcore_ranking",
611 "cpufreq/amd_pstate_highest_perf",
612 "acpi_cppc/highest_perf",
613 "cpu_capacity",
614 "cpufreq/cpuinfo_max_freq",
615 ];
616
617 let prefix = format!("{}/sys/devices/system/cpu/cpu0", *ROOT_PREFIX);
619 let mut raw_capacity;
620 let mut suffix = sources[sources.len() - 1];
621 'outer: for src in sources {
622 let path_str = [prefix.clone(), src.to_string()].join("/");
623 let path = Path::new(&path_str);
624 raw_capacity = read_from_file(&path).unwrap_or(0_usize);
625 if raw_capacity > 0 {
626 suffix = src;
628 let path = format!("{}/sys/devices/system/cpu/cpu[0-9]*", *ROOT_PREFIX);
630 let cpu_paths = glob(&path).ok()?;
631 for cpu_path in cpu_paths.filter_map(Result::ok) {
632 let raw_capacity2 = read_from_file(&cpu_path.join(suffix)).unwrap_or(0_usize);
633 if raw_capacity != raw_capacity2 {
634 break 'outer;
635 }
636 }
637 }
642 }
643
644 let mut max_rcap = 0;
646 let mut min_rcap = usize::MAX;
647 let mut avg_rcap = 0;
648 let mut nr_cpus = 0;
649 let mut has_biglittle = false;
650 let path = format!("{}/sys/devices/system/cpu/cpu[0-9]*", *ROOT_PREFIX);
651 let cpu_paths = glob(&path).ok()?;
652 for cpu_path in cpu_paths.filter_map(Result::ok) {
653 let rcap = read_from_file(&cpu_path.join(suffix)).unwrap_or(0_usize);
654 if max_rcap < rcap {
655 max_rcap = rcap;
656 }
657 if min_rcap > rcap {
658 min_rcap = rcap;
659 }
660 avg_rcap += rcap;
661 nr_cpus += 1;
662 }
663
664 if nr_cpus == 0 || max_rcap == 0 {
665 suffix = "";
666 avg_rcap = 1024;
667 max_rcap = 1024;
668 warn!("CPU capacity information is not available under sysfs.");
669 } else {
670 avg_rcap /= nr_cpus;
671 has_biglittle = max_rcap as f32 >= (1.3 * min_rcap as f32);
680 }
681
682 Some(CapacitySource {
683 suffix: suffix.to_string(),
684 avg_rcap,
685 max_rcap,
686 has_biglittle,
687 })
688}
689
690fn is_smt_active() -> Option<bool> {
691 let path = format!("{}/sys/devices/system/cpu/smt/active", *ROOT_PREFIX);
692 let smt_on: u8 = read_from_file(Path::new(&path)).ok()?;
693 Some(smt_on == 1)
694}
695
696fn create_default_node(
697 online_mask: &Cpumask,
698 topo_ctx: &mut TopoCtx,
699 flatten_llc: bool,
700) -> Result<BTreeMap<usize, Node>> {
701 let mut nodes = BTreeMap::<usize, Node>::new();
702
703 let mut node = Node {
704 id: 0,
705 distance: vec![],
706 llcs: BTreeMap::new(),
707 span: Cpumask::new(),
708 #[cfg(feature = "gpu-topology")]
709 gpus: BTreeMap::new(),
710 all_cores: BTreeMap::new(),
711 all_cpus: BTreeMap::new(),
712 };
713
714 #[cfg(feature = "gpu-topology")]
715 {
716 let system_gpus = create_gpus();
717 if let Some(gpus) = system_gpus.get(&0) {
718 for gpu in gpus {
719 node.gpus.insert(gpu.index, gpu.clone());
720 }
721 }
722 }
723
724 let path = format!("{}/sys/devices/system/cpu", *ROOT_PREFIX);
725 if !Path::new(&path).exists() {
726 bail!("/sys/devices/system/cpu sysfs node not found");
727 }
728
729 let cs = get_capacity_source().unwrap();
730 let cpu_ids = read_cpu_ids()?;
731 for cpu_id in cpu_ids.iter() {
732 create_insert_cpu(*cpu_id, &mut node, online_mask, topo_ctx, &cs, flatten_llc)?;
733 }
734
735 nodes.insert(node.id, node);
736
737 Ok(nodes)
738}
739
740fn create_numa_nodes(
741 online_mask: &Cpumask,
742 topo_ctx: &mut TopoCtx,
743) -> Result<BTreeMap<usize, Node>> {
744 let mut nodes = BTreeMap::<usize, Node>::new();
745
746 #[cfg(feature = "gpu-topology")]
747 let system_gpus = create_gpus();
748
749 let path = format!("{}/sys/devices/system/node/node*", *ROOT_PREFIX);
750 let numa_paths = glob(&path)?;
751 for numa_path in numa_paths.filter_map(Result::ok) {
752 let numa_str = numa_path.to_str().unwrap().trim();
753 let node_id = if *ROOT_PREFIX == "" {
754 match sscanf!(numa_str, "/sys/devices/system/node/node{usize}") {
755 Ok(val) => val,
756 Err(_) => {
757 bail!("Failed to parse NUMA node ID {}", numa_str);
758 }
759 }
760 } else {
761 match sscanf!(numa_str, "{str}/sys/devices/system/node/node{usize}") {
762 Ok((_, val)) => val,
763 Err(_) => {
764 bail!("Failed to parse NUMA node ID {}", numa_str);
765 }
766 }
767 };
768
769 let distance = read_file_usize_vec(
770 Path::new(&format!(
771 "{}/sys/devices/system/node/node{}/distance",
772 *ROOT_PREFIX, node_id
773 )),
774 ' ',
775 )?;
776 let mut node = Node {
777 id: node_id,
778 distance,
779 llcs: BTreeMap::new(),
780 span: Cpumask::new(),
781
782 all_cores: BTreeMap::new(),
783 all_cpus: BTreeMap::new(),
784
785 #[cfg(feature = "gpu-topology")]
786 gpus: BTreeMap::new(),
787 };
788
789 #[cfg(feature = "gpu-topology")]
790 {
791 if let Some(gpus) = system_gpus.get(&node_id) {
792 for gpu in gpus {
793 node.gpus.insert(gpu.index, gpu.clone());
794 }
795 }
796 }
797
798 let cpu_pattern = numa_path.join("cpu[0-9]*");
799 let cpu_paths = glob(cpu_pattern.to_string_lossy().as_ref())?;
800 let cs = get_capacity_source().unwrap();
801 let mut cpu_ids = vec![];
802 for cpu_path in cpu_paths.filter_map(Result::ok) {
803 let cpu_str = cpu_path.to_str().unwrap().trim();
804 let cpu_id = if *ROOT_PREFIX == "" {
805 match sscanf!(cpu_str, "/sys/devices/system/node/node{usize}/cpu{usize}") {
806 Ok((_, val)) => val,
807 Err(_) => {
808 bail!("Failed to parse cpu ID {}", cpu_str);
809 }
810 }
811 } else {
812 match sscanf!(
813 cpu_str,
814 "{str}/sys/devices/system/node/node{usize}/cpu{usize}"
815 ) {
816 Ok((_, _, val)) => val,
817 Err(_) => {
818 bail!("Failed to parse cpu ID {}", cpu_str);
819 }
820 }
821 };
822 cpu_ids.push(cpu_id);
823 }
824 cpu_ids.sort();
825
826 for cpu_id in cpu_ids {
827 create_insert_cpu(cpu_id, &mut node, online_mask, topo_ctx, &cs, false)?;
828 }
829
830 nodes.insert(node.id, node);
831 }
832 Ok(nodes)
833}