Skip to main content

scx_arena/
arenalib.rs

1// SPDX-License-Identifier: GPL-2.0
2//
3// Copyright (c) 2025 Meta Platforms
4// Author: Emil Tsalapatis <etsal@meta.com>
5
6// This software may be used and distributed according to the terms of the
7// GNU General Public License version 2.
8
9pub use crate::bpf_skel::types;
10
11use scx_utils::Topology;
12use scx_utils::{Core, Llc};
13
14use std::ffi::CString;
15use std::os::raw::c_ulong;
16use std::sync::Arc;
17
18use anyhow::bail;
19use anyhow::Result;
20
21use libbpf_rs::libbpf_sys;
22use libbpf_rs::AsRawLibbpf;
23use libbpf_rs::Object;
24use libbpf_rs::ProgramInput;
25use libbpf_rs::ProgramMut;
26
27// MAX_CPU_ARRSZ has to be big enough to accommodate all present CPUs.
28// Even if it's larger than the size of cpumask_t, we truncate any
29// invalid data when passing it to the kernel's topology init functions.
30/// Maximum length of CPU mask supported by the library in bits.
31const MAX_CPU_SUPPORTED: usize = 640;
32
33/// Holds state related to BPF arenas in the program.
34#[derive(Debug)]
35pub struct ArenaLib<'a> {
36    task_size: usize,
37    obj: &'a mut Object,
38}
39
40impl<'a> ArenaLib<'a> {
41    /// Maximum CPU mask size, derived from MAX_CPU_SUPPORTED.
42    const MAX_CPU_ARRSZ: usize = (MAX_CPU_SUPPORTED + 63) / 64;
43
44    /// Amount of pages allocated at once form the BPF map. by the static stack allocator.
45    const STATIC_ALLOC_PAGES_GRANULARITY: c_ulong = 8;
46
47    fn run_prog_by_name(&self, name: &str, input: ProgramInput) -> Result<i32> {
48        let c_name = CString::new(name)?;
49        let ptr = unsafe {
50            libbpf_sys::bpf_object__find_program_by_name(
51                self.obj.as_libbpf_object().as_ptr(),
52                c_name.as_ptr(),
53            )
54        };
55        if ptr as u64 == 0 as u64 {
56            bail!("No program with name {} found in object", name);
57        }
58
59        let bpfprog = unsafe { &mut *ptr };
60        let prog = ProgramMut::new_mut(bpfprog);
61
62        let output = prog.test_run(input)?;
63
64        // Reach into the object and get the fd of the program
65        // Get the fd of the test program to run
66
67        return Ok(output.return_value as i32);
68    }
69
70    /// Set up basic library state.
71    fn setup_arena(&self) -> Result<()> {
72        // Allocate the arena memory from the BPF side so userspace initializes it before starting
73        // the scheduler. Despite the function call's name this is neither a test nor a test run,
74        // it's the recommended way of executing SEC("syscall") probes.
75        let mut args = types::arena_init_args {
76            static_pages: Self::STATIC_ALLOC_PAGES_GRANULARITY as c_ulong,
77            task_ctx_size: self.task_size as c_ulong,
78        };
79
80        let input = ProgramInput {
81            context_in: Some(unsafe {
82                std::slice::from_raw_parts_mut(
83                    &mut args as *mut _ as *mut u8,
84                    std::mem::size_of_val(&args),
85                )
86            }),
87            ..Default::default()
88        };
89
90        let ret = self.run_prog_by_name("arena_init", input)?;
91        if ret != 0 {
92            bail!("Could not initialize arenas, setup_arenas returned {}", ret);
93        }
94
95        Ok(())
96    }
97
98    fn setup_topology_node(&self, mask: &[u64], id: usize) -> Result<()> {
99        let mut args = types::arena_alloc_mask_args {
100            bitmap: 0 as c_ulong,
101        };
102
103        // Exclude memory-only NUMA nodes
104        if mask.into_iter().all(|&b| b == 0) {
105            return Ok(());
106        }
107
108        let input = ProgramInput {
109            context_in: Some(unsafe {
110                std::slice::from_raw_parts_mut(
111                    &mut args as *mut _ as *mut u8,
112                    std::mem::size_of_val(&args),
113                )
114            }),
115            ..Default::default()
116        };
117
118        let ret = self.run_prog_by_name("arena_alloc_mask", input)?;
119
120        if ret != 0 {
121            bail!(
122                "Could not initialize arenas, setup_topology_node returned {}",
123                ret
124            );
125        }
126
127        let ptr = unsafe {
128            &mut *std::ptr::with_exposed_provenance_mut::<[u64; 640]>(
129                args.bitmap.try_into().unwrap(),
130            )
131        };
132
133        let (valid_mask, _) = ptr.split_at_mut(mask.len());
134        valid_mask.clone_from_slice(mask);
135
136        let mut args = types::arena_topology_node_init_args {
137            bitmap: args.bitmap as c_ulong,
138            data_size: 0 as c_ulong,
139            id: id as c_ulong,
140        };
141
142        let input = ProgramInput {
143            context_in: Some(unsafe {
144                std::slice::from_raw_parts_mut(
145                    &mut args as *mut _ as *mut u8,
146                    std::mem::size_of_val(&args),
147                )
148            }),
149            ..Default::default()
150        };
151
152        let ret = self.run_prog_by_name("arena_topology_node_init", input)?;
153        if ret != 0 {
154            bail!("arena_topology_node_init returned {}", ret);
155        }
156
157        Ok(())
158    }
159
160    /// Set the per-level maximum number of children before registering topology
161    /// nodes. Each topology node at level L is allocated with
162    /// topo_max_children[L] child pointer slots, so these values must be set
163    /// before any arena_topology_node_init() calls. The sizes are derived from
164    /// the actual host topology to keep per-node allocation as small as
165    /// possible.
166    ///
167    /// NOTE: rust/scx_arena/selftests/src/main.rs::setup_topology() contains
168    /// equivalent logic and must be kept in sync with this function.
169    fn setup_topology_max_children(&self, topo: &Topology) -> Result<()> {
170        // Compute the maximum number of children at each topology level.
171        // TOPO_TOP  (0): children are NUMA nodes
172        // TOPO_NODE (1): children are LLCs
173        // TOPO_LLC  (2): children are cores
174        // TOPO_CORE (3): children are CPUs (SMT threads)
175        // TOPO_CPU  (4): leaf nodes, no children
176        let max_children: [u32; 5] = [
177            topo.nodes.len() as u32,
178            topo.nodes.values().map(|n| n.llcs.len()).max().unwrap_or(0) as u32,
179            topo.all_llcs
180                .values()
181                .map(|l| l.cores.len())
182                .max()
183                .unwrap_or(0) as u32,
184            topo.all_cores
185                .values()
186                .map(|c| c.cpus.len())
187                .max()
188                .unwrap_or(0) as u32,
189            0,
190        ];
191
192        let mut args = types::arena_topology_init_args { max_children };
193
194        let input = ProgramInput {
195            context_in: Some(unsafe {
196                std::slice::from_raw_parts_mut(
197                    &mut args as *mut _ as *mut u8,
198                    std::mem::size_of_val(&args),
199                )
200            }),
201            ..Default::default()
202        };
203
204        let ret = self.run_prog_by_name("arena_topology_init", input)?;
205        if ret != 0 {
206            bail!("arena_topology_init returned {}", ret);
207        }
208
209        Ok(())
210    }
211
212    fn setup_topology(&self) -> Result<()> {
213        let topo = Topology::new().expect("Failed to build host topology");
214
215        self.setup_topology_max_children(&topo)?;
216
217        // Top level - ID 0 is fine as there's only one top-level node
218        self.setup_topology_node(topo.span.as_raw_slice(), 0)?;
219
220        for (node_id, node) in topo.nodes {
221            self.setup_topology_node(node.span.as_raw_slice(), node_id)?;
222        }
223
224        // LLCs need to use their actual LLC ID for proper indexing in topo_nodes
225        for (llc_id, llc) in topo.all_llcs {
226            self.setup_topology_node(
227                Arc::<Llc>::into_inner(llc)
228                    .expect("missing llc")
229                    .span
230                    .as_raw_slice(),
231                llc_id,
232            )?;
233        }
234
235        for (core_id, core) in topo.all_cores {
236            self.setup_topology_node(
237                Arc::<Core>::into_inner(core)
238                    .expect("missing core")
239                    .span
240                    .as_raw_slice(),
241                core_id,
242            )?;
243        }
244        for (_, cpu) in topo.all_cpus {
245            let mut mask = [0; Self::MAX_CPU_ARRSZ - 1];
246            mask[cpu.id / 64] |= 1 << (cpu.id % 64);
247            self.setup_topology_node(&mask, cpu.id)?;
248        }
249
250        Ok(())
251    }
252
253    /// Create an Arenalib object This call only initializes the Rust side of Arenalib.
254    pub fn init(obj: &'a mut Object, task_size: usize, nr_cpus: usize) -> Result<Self> {
255        if nr_cpus >= MAX_CPU_SUPPORTED {
256            bail!("Scheduler specifies too many CPUs");
257        }
258
259        Ok(Self { task_size, obj })
260    }
261
262    /// Set up the BPF arena library state.
263    pub fn setup(&self) -> Result<()> {
264        self.setup_arena()?;
265        self.setup_topology()?;
266
267        Ok(())
268    }
269}