1#![cfg(feature = "gpu-topology")]
2
3use crate::misc::read_from_file;
4use crate::{Cpumask, NR_CPU_IDS};
5use nvml_wrapper::bitmasks::InitFlags;
6use nvml_wrapper::enum_wrappers::device::{Clock, PerformanceState, TopologyLevel};
7use nvml_wrapper::Nvml;
8use nvml_wrapper_sys::bindings::NVML_AFFINITY_SCOPE_NODE;
9use std::collections::BTreeMap;
10use std::path::Path;
11
12#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialOrd, PartialEq)]
13pub enum GpuIndex {
14 Nvidia { nvml_id: u32 },
15}
16
17#[derive(Debug, Clone)]
18pub struct Gpu {
19 pub index: GpuIndex,
20 pub node_id: usize,
21 pub max_graphics_clock: usize,
22 pub max_sm_clock: usize,
24 pub max_mem_clock: usize,
26 pub multiproc_count: usize,
28 pub memory: u64,
29 pub cpu_mask: Cpumask,
30 pub nearest: Vec<GpuIndex>,
34 pub perf_state: PerformanceState,
38}
39
40pub fn create_gpus() -> BTreeMap<usize, Vec<Gpu>> {
41 let mut gpus: BTreeMap<usize, Vec<Gpu>> = BTreeMap::new();
42
43 let Ok(nvml) = Nvml::init_with_flags(InitFlags::NO_GPUS) else {
45 return BTreeMap::new();
46 };
47 if let Ok(nvidia_gpu_count) = nvml.device_count() {
48 for i in 0..nvidia_gpu_count {
49 let Ok(nvidia_gpu) = nvml.device_by_index(i) else {
50 continue;
51 };
52 let graphics_boost_clock = nvidia_gpu
53 .max_customer_boost_clock(Clock::Graphics)
54 .unwrap_or(0);
55 let sm_boost_clock = nvidia_gpu.max_customer_boost_clock(Clock::SM).unwrap_or(0);
56 let mem_boost_clock = nvidia_gpu
57 .max_customer_boost_clock(Clock::Memory)
58 .unwrap_or(0);
59 let Ok(memory_info) = nvidia_gpu.memory_info() else {
60 continue;
61 };
62 let Ok(pci_info) = nvidia_gpu.pci_info() else {
63 continue;
64 };
65 let Ok(index) = nvidia_gpu.index() else {
66 continue;
67 };
68
69 let cpu_mask = if let Ok(cpu_affinity) =
70 nvidia_gpu.cpu_affinity_within_scope(*NR_CPU_IDS, NVML_AFFINITY_SCOPE_NODE)
71 {
72 #[cfg(target_pointer_width = "32")]
74 let cpu_affinity: Vec<u64> = cpu_affinity
75 .chunks_exact(2)
76 .map(|pair| (pair[1] as u64) << 32 | pair[0] as u64)
77 .collect();
78 Cpumask::from_vec(cpu_affinity)
79 } else {
80 Cpumask::new()
81 };
82
83 let multiproc_count = if let Ok(attributes) = nvidia_gpu.attributes() {
84 attributes.multiprocessor_count
85 } else {
86 0
87 };
88
89 let nearest_gpu_topology_level = if nvidia_gpu.is_multi_gpu_board().unwrap_or(false) {
90 TopologyLevel::Internal
93 } else {
94 TopologyLevel::HostBridge
95 };
96
97 let nearest = if let Ok(nearest_gpus) =
101 nvidia_gpu.topology_nearest_gpus(nearest_gpu_topology_level)
102 {
103 nearest_gpus
104 .iter()
105 .filter_map(|d| {
106 if let Ok(idx) = d.index() {
107 Some(GpuIndex::Nvidia { nvml_id: idx })
108 } else {
109 None
110 }
111 })
112 .collect()
113 } else {
114 Vec::new()
115 };
116
117 let perf_state = nvidia_gpu
118 .performance_state()
119 .unwrap_or(PerformanceState::Unknown);
120
121 let bus_id = pci_info.bus_id.to_lowercase();
124 let fixed_bus_id = bus_id.strip_prefix("0000").unwrap_or("");
125 let numa_path = format!("/sys/bus/pci/devices/{}/numa_node", fixed_bus_id);
126 let numa_node = read_from_file(Path::new(&numa_path)).unwrap_or(0_usize);
127
128 let gpu = Gpu {
129 index: GpuIndex::Nvidia { nvml_id: index },
130 node_id: numa_node as usize,
131 max_graphics_clock: graphics_boost_clock as usize,
132 max_sm_clock: sm_boost_clock as usize,
133 max_mem_clock: mem_boost_clock as usize,
134 multiproc_count: multiproc_count as usize,
135 memory: memory_info.total,
136 cpu_mask,
137 nearest,
138 perf_state,
139 };
140 gpus.entry(gpu.node_id).or_default().push(gpu);
141 }
142 }
143
144 gpus
145}