1use anyhow::Result;
10use combinations::Combinations;
11use itertools::iproduct;
12use scx_utils::CoreType;
13use scx_utils::Cpumask;
14use scx_utils::EnergyModel;
15use scx_utils::PerfDomain;
16use scx_utils::PerfState;
17use scx_utils::Topology;
18use scx_utils::NR_CPU_IDS;
19use std::cell::Cell;
20use std::cell::RefCell;
21use std::collections::BTreeMap;
22use std::collections::BTreeSet;
23use std::collections::HashSet;
24use std::fmt;
25use std::hash::{Hash, Hasher};
26use tracing::debug;
27
28#[derive(Debug, Clone)]
29pub struct CpuId {
30 pub numa_adx: usize,
40 pub pd_adx: usize,
41 pub llc_adx: usize,
42 pub llc_rdx: usize,
43 pub llc_kernel_id: usize,
44 pub core_rdx: usize,
45 pub cpu_rdx: usize,
46 pub cpu_adx: usize,
47 pub smt_level: usize,
48 pub cache_size: usize,
49 pub cpu_cap: usize,
50 pub big_core: bool,
51 pub turbo_core: bool,
52 pub cpu_sibling: usize,
53}
54
55#[derive(Debug, Eq, PartialEq, Ord, PartialOrd, Clone)]
56pub struct ComputeDomainId {
57 pub numa_adx: usize,
58 pub llc_adx: usize,
59 pub llc_rdx: usize,
60 pub llc_kernel_id: usize,
61 pub is_big: bool,
62}
63
64#[derive(Debug, Clone)]
65pub struct ComputeDomain {
66 pub cpdom_id: usize,
67 pub cpdom_alt_id: Cell<usize>,
68 pub cpu_ids: Vec<usize>,
69 pub neighbor_map: RefCell<BTreeMap<usize, RefCell<Vec<usize>>>>,
70}
71
72#[derive(Debug, Clone)]
73#[allow(dead_code)]
74pub struct PerfCpuOrder {
75 pub perf_cap: usize, pub perf_util: f32, pub cpus_perf: RefCell<Vec<usize>>, pub cpus_ovflw: RefCell<Vec<usize>>, }
80
81#[derive(Debug)]
82#[allow(dead_code)]
83pub struct CpuOrder {
84 pub all_cpus_mask: Cpumask,
85 pub cpuids: Vec<CpuId>,
86 pub perf_cpu_order: BTreeMap<usize, PerfCpuOrder>,
87 pub cpdom_map: BTreeMap<ComputeDomainId, ComputeDomain>,
88 pub nr_cpus: usize,
89 pub nr_cores: usize,
90 pub nr_cpdoms: usize,
91 pub nr_llcs: usize,
92 pub nr_numa: usize,
93 pub smt_enabled: bool,
94 pub has_biglittle: bool,
95 pub has_energy_model: bool,
96}
97
98impl CpuOrder {
99 pub fn new(topology_args: Option<&scx_utils::TopologyArgs>) -> Result<CpuOrder> {
101 let ctx = CpuOrderCtx::new(topology_args)?;
102 let cpus_pf = ctx.build_topo_order(false).unwrap();
103 let cpus_ps = ctx.build_topo_order(true).unwrap();
104 let cpdom_map = CpuOrderCtx::build_cpdom(&cpus_pf).unwrap();
105 let perf_cpu_order = if ctx.em.is_ok() {
106 let em = ctx.em.unwrap();
107 EnergyModelOptimizer::get_perf_cpu_order_table(&em, &cpus_pf)
108 } else {
109 EnergyModelOptimizer::get_fake_perf_cpu_order_table(&cpus_pf, &cpus_ps)
110 };
111
112 let nr_cpdoms = cpdom_map.len();
113 Ok(CpuOrder {
114 all_cpus_mask: ctx.topo.span,
115 cpuids: cpus_pf,
116 perf_cpu_order,
117 cpdom_map,
118 nr_cpus: ctx.topo.all_cpus.len(),
119 nr_cores: ctx.topo.all_cores.len(),
120 nr_cpdoms,
121 nr_llcs: ctx.topo.all_llcs.len(),
122 nr_numa: ctx.topo.nodes.len(),
123 smt_enabled: ctx.smt_enabled,
124 has_biglittle: ctx.has_biglittle,
125 has_energy_model: ctx.has_energy_model,
126 })
127 }
128}
129
130struct CpuOrderCtx {
132 topo: Topology,
133 em: Result<EnergyModel>,
134 smt_enabled: bool,
135 has_biglittle: bool,
136 has_energy_model: bool,
137}
138
139impl CpuOrderCtx {
140 fn new(topology_args: Option<&scx_utils::TopologyArgs>) -> Result<Self> {
141 let topo = match topology_args {
142 Some(args) => Topology::with_args(args)?,
143 None => Topology::new()?,
144 };
145
146 let em = EnergyModel::new();
147 let smt_enabled = topo.smt_enabled;
148 let has_biglittle = topo.has_little_cores();
149 let has_energy_model = em.is_ok();
150
151 debug!("{:#?}", topo);
152 debug!("{:#?}", em);
153
154 Ok(CpuOrderCtx {
155 topo,
156 em,
157 smt_enabled,
158 has_biglittle,
159 has_energy_model,
160 })
161 }
162
163 fn build_topo_order(&self, prefer_powersave: bool) -> Option<Vec<CpuId>> {
165 let mut cpu_ids = Vec::new();
166 let smt_siblings = self.topo.sibling_cpus();
167
168 for (&numa_adx, node) in self.topo.nodes.iter() {
170 for (llc_rdx, (&llc_adx, llc)) in node.llcs.iter().enumerate() {
171 for (core_rdx, (_core_adx, core)) in llc.cores.iter().enumerate() {
172 for (cpu_rdx, (cpu_adx, cpu)) in core.cpus.iter().enumerate() {
173 let cpu_adx = *cpu_adx;
174 let pd_adx = Self::get_pd_id(&self.em, cpu_adx, llc_adx);
175 let cpu_id = CpuId {
176 numa_adx,
177 pd_adx,
178 llc_adx,
179 llc_rdx,
180 core_rdx,
181 cpu_rdx,
182 cpu_adx,
183 smt_level: cpu.smt_level,
184 cache_size: cpu.cache_size,
185 cpu_cap: cpu.cpu_capacity,
186 big_core: cpu.core_type != CoreType::Little,
187 turbo_core: cpu.core_type == CoreType::Big { turbo: true },
188 cpu_sibling: smt_siblings[cpu_adx] as usize,
189 llc_kernel_id: llc.kernel_id,
190 };
191 cpu_ids.push(RefCell::new(cpu_id));
192 }
193 }
194 }
195 }
196
197 let mut cpu_ids2 = Vec::new();
199 for cpu_id in cpu_ids.iter() {
200 cpu_ids2.push(cpu_id.borrow().clone());
201 }
202 let mut cpu_ids = cpu_ids2;
203
204 match (prefer_powersave, self.has_biglittle) {
206 (true, false) => {
215 cpu_ids.sort_by(|a, b| {
216 a.numa_adx
217 .cmp(&b.numa_adx)
218 .then_with(|| a.llc_rdx.cmp(&b.llc_rdx))
219 .then_with(|| b.cpu_cap.cmp(&a.cpu_cap))
220 .then_with(|| b.cache_size.cmp(&a.cache_size))
221 .then_with(|| a.pd_adx.cmp(&b.pd_adx))
222 .then_with(|| a.core_rdx.cmp(&b.core_rdx))
223 .then_with(|| b.smt_level.cmp(&a.smt_level))
224 .then_with(|| a.cpu_rdx.cmp(&b.cpu_rdx))
225 .then_with(|| a.cpu_adx.cmp(&b.cpu_adx))
226 });
227 }
228 (true, true) => {
236 cpu_ids.sort_by(|a, b| {
237 a.numa_adx
238 .cmp(&b.numa_adx)
239 .then_with(|| a.llc_rdx.cmp(&b.llc_rdx))
240 .then_with(|| a.cpu_cap.cmp(&b.cpu_cap))
241 .then_with(|| b.cache_size.cmp(&a.cache_size))
242 .then_with(|| a.pd_adx.cmp(&b.pd_adx))
243 .then_with(|| a.core_rdx.cmp(&b.core_rdx))
244 .then_with(|| b.smt_level.cmp(&a.smt_level))
245 .then_with(|| a.cpu_rdx.cmp(&b.cpu_rdx))
246 .then_with(|| a.cpu_adx.cmp(&b.cpu_adx))
247 });
248 }
249 _ => {
261 cpu_ids.sort_by(|a, b| {
262 a.cpu_rdx
263 .cmp(&b.cpu_rdx)
264 .then_with(|| a.numa_adx.cmp(&b.numa_adx))
265 .then_with(|| a.llc_rdx.cmp(&b.llc_rdx))
266 .then_with(|| b.cpu_cap.cmp(&a.cpu_cap))
267 .then_with(|| b.cache_size.cmp(&a.cache_size))
268 .then_with(|| a.smt_level.cmp(&b.smt_level))
269 .then_with(|| a.pd_adx.cmp(&b.pd_adx))
270 .then_with(|| a.core_rdx.cmp(&b.core_rdx))
271 .then_with(|| a.cpu_adx.cmp(&b.cpu_adx))
272 });
273 }
274 }
275
276 Some(cpu_ids)
277 }
278
279 fn build_cpdom(cpu_ids: &Vec<CpuId>) -> Option<BTreeMap<ComputeDomainId, ComputeDomain>> {
281 let mut cpdom_id = 0;
287 let mut cpdom_map: BTreeMap<ComputeDomainId, ComputeDomain> = BTreeMap::new();
288 let mut cpdom_types: BTreeMap<usize, bool> = BTreeMap::new();
289 for cpu_id in cpu_ids.iter() {
290 let key = ComputeDomainId {
291 numa_adx: cpu_id.numa_adx,
292 llc_adx: cpu_id.llc_adx,
293 llc_rdx: cpu_id.llc_rdx,
294 llc_kernel_id: cpu_id.llc_kernel_id,
295 is_big: cpu_id.big_core,
296 };
297 let value = cpdom_map.entry(key.clone()).or_insert_with(|| {
298 let val = ComputeDomain {
299 cpdom_id,
300 cpdom_alt_id: Cell::new(cpdom_id),
301 cpu_ids: Vec::new(),
302 neighbor_map: RefCell::new(BTreeMap::new()),
303 };
304 cpdom_types.insert(cpdom_id, key.is_big);
305
306 cpdom_id += 1;
307 val
308 });
309 value.cpu_ids.push(cpu_id.cpu_adx);
310 }
311
312 for ((from_k, from_v), (to_k, to_v)) in iproduct!(cpdom_map.iter(), cpdom_map.iter()) {
315 if from_k == to_k {
316 continue;
317 }
318
319 let d = Self::dist(from_k, to_k);
320 let mut map = from_v.neighbor_map.borrow_mut();
321 match map.get(&d) {
322 Some(v) => {
323 v.borrow_mut().push(to_v.cpdom_id);
324 }
325 None => {
326 map.insert(d, RefCell::new(vec![to_v.cpdom_id]));
327 }
328 }
329 }
330
331 for (_, cpdom) in cpdom_map.iter() {
349 for (_, neighbors) in cpdom.neighbor_map.borrow_mut().iter() {
350 let mut neighbors_csorted =
351 Self::circular_sort(cpdom.cpdom_id, &neighbors.borrow_mut().to_vec());
352 neighbors.borrow_mut().clear();
353 neighbors.borrow_mut().append(&mut neighbors_csorted);
354 }
355 }
356
357 for (k, v) in cpdom_map.iter() {
359 let mut key = k.clone();
360 key.is_big = !k.is_big;
361
362 if let Some(alt_v) = cpdom_map.get(&key) {
363 v.cpdom_alt_id.set(alt_v.cpdom_id);
366 } else {
367 'outer: for (_dist, ncpdoms) in v.neighbor_map.borrow().iter() {
374 for ncpdom_id in ncpdoms.borrow().iter() {
375 if let Some(is_big) = cpdom_types.get(ncpdom_id) {
376 if *is_big == key.is_big {
377 v.cpdom_alt_id.set(*ncpdom_id);
378 break 'outer;
379 }
380 }
381 }
382 }
383 }
384 }
385
386 Some(cpdom_map)
387 }
388
389 fn circular_sort(start: usize, the_rest: &Vec<usize>) -> Vec<usize> {
391 let mut list = the_rest.clone();
393 list.push(start);
394 list.sort();
395
396 let s = list
398 .binary_search(&start)
399 .expect("start must appear exactly once");
400
401 let n = list.len();
403 let dist = |x: usize| {
404 let d = (x + n - s) % n;
405 d.min(n - d)
406 };
407 let mut order: Vec<usize> = (0..n).collect();
408 order.sort_by_key(|&x| (dist(x), x));
409
410 let list_csorted: Vec<_> = order.iter().map(|&i| list[i]).collect();
413
414 list_csorted[1..].to_vec()
416 }
417
418 fn get_pd_id(em: &Result<EnergyModel>, cpu_adx: usize, llc_adx: usize) -> usize {
421 match em {
422 Ok(em) => em.get_pd_by_cpu_id(cpu_adx).unwrap().id,
423 Err(_) => llc_adx,
424 }
425 }
426
427 fn dist(from: &ComputeDomainId, to: &ComputeDomainId) -> usize {
429 let mut d = 0;
430 if from.is_big != to.is_big {
432 d += 100;
433 }
434 if from.numa_adx != to.numa_adx {
435 d += 10;
436 } else {
437 if from.llc_rdx != to.llc_rdx {
438 d += 1;
439 }
440 if from.llc_kernel_id != to.llc_kernel_id {
441 d += 1;
442 }
443 }
444 d
445 }
446}
447
448#[derive(Debug)]
449struct EnergyModelOptimizer<'a> {
450 em: &'a EnergyModel,
452
453 cpus_topological_order: Vec<usize>,
455
456 pd_cpu_order: BTreeMap<usize, RefCell<Vec<usize>>>,
458
459 tot_perf: usize,
461
462 pdss_infos: RefCell<BTreeMap<usize, RefCell<HashSet<PDSetInfo<'a>>>>>,
465
466 perf_pdsi: RefCell<BTreeMap<usize, PDSetInfo<'a>>>,
469
470 perf_cpu_order: RefCell<BTreeMap<usize, PerfCpuOrder>>,
472}
473
474#[derive(Debug, Clone, Eq, Hash, Ord, PartialOrd)]
475struct PDS<'a> {
476 pd: &'a PerfDomain,
477 ps: &'a PerfState,
478}
479
480#[derive(Debug, Clone, Eq, Hash, Ord, PartialOrd)]
481struct PDCpu<'a> {
482 pd: &'a PerfDomain, cpu_vid: usize, }
485
486#[derive(Debug, Clone, Eq)]
487struct PDSetInfo<'a> {
488 performance: usize,
489 power: usize,
490 pdcpu_set: BTreeSet<PDCpu<'a>>,
491 pd_id_set: BTreeSet<usize>, }
493
494const PD_UNIT: usize = 100_000_000;
495const CPU_UNIT: usize = 100_000;
496const LOOKAHEAD_CNT: usize = 10;
497
498impl<'a> EnergyModelOptimizer<'a> {
499 fn new(em: &'a EnergyModel, cpus_pf: &'a Vec<CpuId>) -> EnergyModelOptimizer<'a> {
500 let tot_perf = em.perf_total();
501
502 let pdss_infos: BTreeMap<usize, RefCell<HashSet<PDSetInfo<'a>>>> = BTreeMap::new();
503 let pdss_infos = pdss_infos.into();
504
505 let perf_pdsi: BTreeMap<usize, PDSetInfo<'a>> = BTreeMap::new();
506 let perf_pdsi = perf_pdsi.into();
507
508 let mut pd_cpu_order: BTreeMap<usize, RefCell<Vec<usize>>> = BTreeMap::new();
509 let mut cpus_topological_order: Vec<usize> = vec![];
510 for cpuid in cpus_pf.iter() {
511 match pd_cpu_order.get(&cpuid.pd_adx) {
512 Some(v) => {
513 let mut v = v.borrow_mut();
514 v.push(cpuid.cpu_adx);
515 }
516 None => {
517 let v = vec![cpuid.cpu_adx];
518 pd_cpu_order.insert(cpuid.pd_adx, v.into());
519 }
520 }
521 cpus_topological_order.push(cpuid.cpu_adx);
522 }
523
524 let perf_cpu_order: BTreeMap<usize, PerfCpuOrder> = BTreeMap::new();
525 let perf_cpu_order = perf_cpu_order.into();
526
527 debug!("# pd_cpu_order");
528 debug!("{:#?}", pd_cpu_order);
529
530 EnergyModelOptimizer {
531 em,
532 cpus_topological_order,
533 pd_cpu_order,
534 tot_perf,
535 pdss_infos,
536 perf_pdsi,
537 perf_cpu_order,
538 }
539 }
540
541 fn get_perf_cpu_order_table(
542 em: &'a EnergyModel,
543 cpus_pf: &'a Vec<CpuId>,
544 ) -> BTreeMap<usize, PerfCpuOrder> {
545 let emo = EnergyModelOptimizer::new(em, &cpus_pf);
546 emo.gen_perf_cpu_order_table();
547 let perf_cpu_order = emo.perf_cpu_order.borrow().clone();
548
549 perf_cpu_order
550 }
551
552 fn get_fake_perf_cpu_order_table(
553 cpus_pf: &'a Vec<CpuId>,
554 cpus_ps: &'a Vec<CpuId>,
555 ) -> BTreeMap<usize, PerfCpuOrder> {
556 let tot_perf: usize = cpus_pf.iter().map(|cpuid| cpuid.cpu_cap).sum();
557
558 let pco_pf = Self::fake_pco(tot_perf, cpus_pf, false);
559 let pco_ps = Self::fake_pco(tot_perf, cpus_ps, true);
560
561 let mut perf_cpu_order: BTreeMap<usize, PerfCpuOrder> = BTreeMap::new();
562 perf_cpu_order.insert(pco_pf.perf_cap, pco_pf);
563 perf_cpu_order.insert(pco_ps.perf_cap, pco_ps);
564
565 perf_cpu_order
566 }
567
568 fn fake_pco(tot_perf: usize, cpuids: &'a Vec<CpuId>, powersave: bool) -> PerfCpuOrder {
569 let perf_cap;
570
571 if powersave {
572 perf_cap = cpuids[0].cpu_cap;
573 } else {
574 perf_cap = tot_perf;
575 }
576
577 let perf_util: f32 = (perf_cap as f32) / (tot_perf as f32);
578 let cpus: Vec<usize> = cpuids.iter().map(|cpuid| cpuid.cpu_adx).collect();
579 let cpus_perf: Vec<usize> = cpus[..1].iter().map(|&cpuid| cpuid).collect();
580 let cpus_ovflw: Vec<usize> = cpus[1..].iter().map(|&cpuid| cpuid).collect();
581 PerfCpuOrder {
582 perf_cap,
583 perf_util,
584 cpus_perf: cpus_perf.clone().into(),
585 cpus_ovflw: cpus_ovflw.clone().into(),
586 }
587 }
588
589 fn gen_perf_cpu_order_table(&'a self) {
602 self.gen_all_pds_combinations();
608
609 self.gen_perf_pds_table();
614
615 self.assign_cpu_vids();
618 }
619
620 fn assign_cpu_vids(&'a self) {
622 for (&perf_cap, pdsi) in self.perf_pdsi.borrow().iter() {
624 let mut cpus_perf: Vec<usize> = vec![];
625
626 for pdcpu in pdsi.pdcpu_set.iter() {
627 let pd_id = pdcpu.pd.id;
628 let cpu_vid = pdcpu.cpu_vid;
629 let cpu_order = self.pd_cpu_order.get(&pd_id).unwrap().borrow();
630 let cpu_adx = cpu_order[cpu_vid];
631 cpus_perf.push(cpu_adx);
632 }
633
634 let perf_util: f32 = (perf_cap as f32) / (self.tot_perf as f32);
635 let cpus_perf = self.sort_cpus_by_topological_order(&cpus_perf);
636 let cpus_ovflw: Vec<usize> = vec![];
637
638 let mut perf_cpu_order = self.perf_cpu_order.borrow_mut();
639 perf_cpu_order.insert(
640 perf_cap,
641 PerfCpuOrder {
642 perf_cap,
643 perf_util,
644 cpus_perf: cpus_perf.clone().into(),
645 cpus_ovflw: cpus_ovflw.clone().into(),
646 },
647 );
648 }
649
650 let perf_cpu_order = self.perf_cpu_order.borrow();
652 let perf_caps: Vec<_> = self.perf_pdsi.borrow().keys().cloned().collect();
653 for o in 1..perf_caps.len() {
654 let ovrflw_perf_caps = &perf_caps[o..];
656 let mut ovrflw_cpus_all: Vec<usize> = vec![];
657 for perf_cap in ovrflw_perf_caps.iter() {
658 let cpu_order = perf_cpu_order.get(perf_cap).unwrap();
659 let cpus_perf = cpu_order.cpus_perf.borrow();
660 ovrflw_cpus_all.extend(cpus_perf.iter().cloned());
661 }
662
663 let mut cpu_set = HashSet::<usize>::new();
666 let perf_cap = perf_caps[o - 1];
667 let cpu_order = perf_cpu_order.get(&perf_cap).unwrap();
668 let cpus_perf = cpu_order.cpus_perf.borrow();
669 for &cpu_adx in cpus_perf.iter() {
670 cpu_set.insert(cpu_adx);
671 }
672
673 let mut cpus_ovflw: Vec<usize> = vec![];
674 for &cpu_adx in ovrflw_cpus_all.iter() {
675 if cpu_set.get(&cpu_adx).is_none() {
676 cpus_ovflw.push(cpu_adx);
677 cpu_set.insert(cpu_adx);
678 }
679 }
680
681 let mut v = cpu_order.cpus_ovflw.borrow_mut();
683 v.extend(cpus_ovflw.iter().cloned());
684 }
685
686 debug!("## gen_perf_cpu_order_table");
688 debug!("{:#?}", perf_cpu_order);
689 }
690
691 fn sort_cpus_by_topological_order(&'a self, cpus: &Vec<usize>) -> Vec<usize> {
693 let mut sorted: Vec<usize> = vec![];
694 for &cpu_adx in self.cpus_topological_order.iter() {
695 if let Some(_) = cpus.iter().find(|&&x| x == cpu_adx) {
696 sorted.push(cpu_adx);
697 }
698 }
699 sorted
700 }
701
702 fn gen_perf_pds_table(&'a self) {
746 let utils = vec![0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0];
747
748 for &util in utils.iter() {
750 let mut best_pdsi: Option<PDSetInfo<'a>>;
751 let mut del_pdsi: Option<PDSetInfo<'a>> = None;
752
753 match self.perf_pdsi.borrow().last_key_value() {
754 Some((_, base)) => {
755 best_pdsi = self.find_perf_pds_for(util, Some(base));
756
757 if let Some(ref best) = best_pdsi {
763 if best.pdcpu_set.is_subset(&base.pdcpu_set) {
764 let ext_pdcpu = PDSetInfo {
765 performance: best.performance,
766 power: best.power,
767 pdcpu_set: base.pdcpu_set.clone(),
768 pd_id_set: base.pd_id_set.clone(),
769 };
770 best_pdsi = Some(ext_pdcpu);
771 del_pdsi = Some(base.clone());
772 }
773 }
774 }
775 None => {
776 best_pdsi = self.find_perf_pds_for(util, None);
777 }
778 };
779
780 if let Some(best_pdsi) = best_pdsi {
781 self.perf_pdsi
782 .borrow_mut()
783 .insert(best_pdsi.performance, best_pdsi);
784 }
785
786 if let Some(del_pdsi) = del_pdsi {
787 self.perf_pdsi.borrow_mut().remove(&del_pdsi.performance);
788 }
789 }
790
791 debug!("## gen_perf_pds_table");
793 for (perf, pdsi) in self.perf_pdsi.borrow().iter() {
794 debug!("PERF: [_, {}]", perf);
795 for pdcpu in pdsi.pdcpu_set.iter() {
796 debug!(
797 " pd:id: {:?} -- cpu_vid: {}",
798 pdcpu.pd.id, pdcpu.cpu_vid
799 );
800 }
801 }
802 }
803
804 fn find_perf_pds_for(
805 &'a self,
806 util: f32,
807 base: Option<&PDSetInfo<'a>>,
808 ) -> Option<PDSetInfo<'a>> {
809 let target_perf = (util * self.tot_perf as f32) as usize;
810 let mut lookahead = 0;
811 let mut min_dist: usize = usize::MAX;
812 let mut best_pdsi: Option<PDSetInfo<'a>> = None;
813
814 let pdss_infos = self.pdss_infos.borrow();
815 for (&pdsi_perf, pdsi_set) in pdss_infos.iter() {
816 if pdsi_perf >= target_perf {
817 let pdsi_set_ref = pdsi_set.borrow();
818 for pdsi in pdsi_set_ref.iter() {
819 let dist = pdsi.dist(base);
820 if dist < min_dist {
821 min_dist = dist;
822 best_pdsi = Some(pdsi.clone());
823 }
824 }
825 lookahead += 1;
826 if lookahead >= LOOKAHEAD_CNT {
827 break;
828 }
829 }
830 }
831
832 best_pdsi
833 }
834
835 fn gen_all_pds_combinations(&'a self) {
901 let pdsi_vec = self.gen_pds_combinations(0.0);
903 self.insert_pds_combinations(&pdsi_vec);
904
905 let pdsi_vec = self.gen_pds_combinations(100.0);
906 self.insert_pds_combinations(&pdsi_vec);
907
908 self.gen_perf_cpuset_table_range(0, 100);
910
911 debug!("## gen_all_pds_combinations");
913 for (perf, pdss_info) in self.pdss_infos.borrow().iter() {
914 debug!("PERF: [_, {}]", perf);
915 for pdsi in pdss_info.borrow().iter() {
916 debug!(" perf: {} -- power: {}", pdsi.performance, pdsi.power);
917 for pdcpu in pdsi.pdcpu_set.iter() {
918 debug!(
919 " pd:id: {:?} -- cpu_vid: {}",
920 pdcpu.pd.id, pdcpu.cpu_vid
921 );
922 }
923 }
924 }
925 }
926
927 fn gen_perf_cpuset_table_range(&'a self, low: isize, high: isize) {
928 if low > high {
929 return;
930 }
931
932 let mid: isize = low + (high - low) / 2;
935 let pdsi_vec = self.gen_pds_combinations(mid as f32);
936 let found_new = self.insert_pds_combinations(&pdsi_vec);
937 if found_new {
938 self.gen_perf_cpuset_table_range(mid + 1, high);
939 self.gen_perf_cpuset_table_range(low, mid - 1);
940 }
941 }
942
943 fn gen_pds_combinations(&'a self, util: f32) -> Vec<PDSetInfo<'a>> {
944 let mut pdsi_vec = Vec::new();
945
946 let pds_set = self.gen_pds_set(util);
947 let n = pds_set.len();
948 for k in 1..n {
949 let pdss = pds_set.clone();
950 let pds_cmbs: Vec<_> = Combinations::new(pdss, k)
951 .map(|cmb| PDSetInfo::new(cmb.clone()))
952 .collect();
953 pdsi_vec.extend(pds_cmbs);
954 }
955
956 let pdsi = PDSetInfo::new(pds_set.clone());
957 pdsi_vec.push(pdsi);
958
959 pdsi_vec
960 }
961
962 fn insert_pds_combinations(&self, new_pdsi_vec: &Vec<PDSetInfo<'a>>) -> bool {
963 let mut found_new = false;
968
969 for new_pdsi in new_pdsi_vec.iter() {
970 let mut pdss_infos = self.pdss_infos.borrow_mut();
971 let v = pdss_infos.get(&new_pdsi.performance);
972 match v {
973 Some(v) => {
975 let mut v = v.borrow_mut();
976 let pdsi = &v.iter().next().unwrap();
977 if pdsi.power == new_pdsi.power {
978 if v.insert(new_pdsi.clone()) {
980 found_new = true;
981 }
982 } else if pdsi.power > new_pdsi.power {
983 v.clear();
985 v.insert(new_pdsi.clone());
986 found_new = true;
987 }
988 }
989 None => {
991 let mut v: HashSet<PDSetInfo<'a>> = HashSet::new();
993 v.insert(new_pdsi.clone());
994 pdss_infos.insert(new_pdsi.performance, v.into());
995 found_new = true;
996 }
997 }
998 }
999 found_new
1000 }
1001
1002 fn gen_pds_set(&self, util: f32) -> Vec<PDS<'_>> {
1005 let mut pds_set = vec![];
1006 for (_, pd) in self.em.perf_doms.iter() {
1007 let ps = pd.select_perf_state(util).unwrap();
1008 let pds = PDS::new(pd, ps);
1009 pds_set.push(pds);
1010 }
1011 self.expand_pds_set(&mut pds_set);
1012 pds_set
1013 }
1014
1015 fn expand_pds_set(&self, pds_set: &mut Vec<PDS<'_>>) {
1019 let mut xset = vec![];
1020 for pds in pds_set.iter() {
1023 let nr_cpus = pds.pd.span.weight();
1024 for _ in 1..nr_cpus {
1025 xset.push(pds.clone());
1026 }
1027 }
1028 pds_set.append(&mut xset);
1029
1030 pds_set.sort();
1032 }
1033}
1034
1035impl<'a> PDS<'_> {
1036 fn new(pd: &'a PerfDomain, ps: &'a PerfState) -> PDS<'a> {
1037 PDS { pd, ps }
1038 }
1039}
1040
1041impl PartialEq for PDS<'_> {
1042 fn eq(&self, other: &Self) -> bool {
1043 self.pd == other.pd && self.ps == other.ps
1044 }
1045}
1046
1047impl<'a> PDCpu<'_> {
1048 fn new(pd: &'a PerfDomain, cpu_vid: usize) -> PDCpu<'a> {
1049 PDCpu { pd, cpu_vid }
1050 }
1051}
1052
1053impl PartialEq for PDCpu<'_> {
1054 fn eq(&self, other: &Self) -> bool {
1055 self.pd == other.pd && self.cpu_vid == other.cpu_vid
1056 }
1057}
1058
1059impl fmt::Display for PDS<'_> {
1060 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1061 write!(
1062 f,
1063 "pd:id:{}/pd:weight:{}/ps:cap:{}/ps:power:{}",
1064 self.pd.id,
1065 self.pd.span.weight(),
1066 self.ps.performance,
1067 self.ps.power,
1068 )?;
1069 Ok(())
1070 }
1071}
1072
1073impl<'a> PDSetInfo<'_> {
1074 fn new(pds_set: Vec<PDS<'a>>) -> PDSetInfo<'a> {
1075 let mut performance = 0;
1077 let mut power = 0;
1078 let mut pd_id_set: BTreeSet<usize> = BTreeSet::new();
1079
1080 for pds in pds_set.iter() {
1081 performance += pds.ps.performance;
1082 power += pds.ps.power;
1083 pd_id_set.insert(pds.pd.id);
1084 }
1085
1086 let mut pds_map: BTreeMap<PDS<'a>, RefCell<Vec<PDS<'a>>>> = BTreeMap::new();
1088
1089 for pds in pds_set.iter() {
1090 let v = pds_map.get(&pds);
1091 match v {
1092 Some(v) => {
1093 let mut v = v.borrow_mut();
1094 v.push(pds.clone());
1095 }
1096 None => {
1097 let mut v: Vec<PDS<'a>> = Vec::new();
1098 v.push(pds.clone());
1099 pds_map.insert(pds.clone(), v.into());
1100 }
1101 }
1102 }
1103 let mut pdcpu_set: BTreeSet<PDCpu<'a>> = BTreeSet::new();
1105 let pds_map = pds_map;
1106
1107 for (_, v) in pds_map.iter() {
1108 for (cpu_vid, pds) in v.borrow().iter().enumerate() {
1109 let pdcpu = PDCpu::new(pds.pd, cpu_vid);
1110 pdcpu_set.insert(pdcpu);
1111 }
1112 }
1113
1114 PDSetInfo {
1115 performance,
1116 power,
1117 pdcpu_set,
1118 pd_id_set,
1119 }
1120 }
1121
1122 fn dist(&self, base: Option<&PDSetInfo<'a>>) -> usize {
1130 let nr_pds = self.pd_id_set.len();
1131 let nr_pds_overlap = match base {
1132 Some(base) => self.pd_id_set.intersection(&base.pd_id_set).count(),
1133 None => 0,
1134 };
1135 let nr_cpus = self.pdcpu_set.len();
1136
1137 ((nr_pds - nr_pds_overlap) * PD_UNIT) + ((*NR_CPU_IDS - nr_cpus) * CPU_UNIT) + (*NR_CPU_IDS - self.pd_id_set.first().unwrap()) }
1141}
1142
1143impl PartialEq for PDSetInfo<'_> {
1144 fn eq(&self, other: &Self) -> bool {
1145 self.performance == other.performance
1146 && self.power == other.power
1147 && self.pdcpu_set == other.pdcpu_set
1148 }
1149}
1150
1151impl Hash for PDSetInfo<'_> {
1152 fn hash<H: Hasher>(&self, state: &mut H) {
1153 self.pdcpu_set.hash(state);
1156 }
1157}
1158
1159impl PartialEq for PerfCpuOrder {
1160 fn eq(&self, other: &Self) -> bool {
1161 self.perf_cap == other.perf_cap
1162 }
1163}
1164
1165impl fmt::Display for PerfCpuOrder {
1166 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1167 write!(
1168 f,
1169 "capacity bound: {} ({}%)\n",
1170 self.perf_cap,
1171 self.perf_util * 100.0
1172 )?;
1173 write!(f, " primary CPUs: {:?}\n", self.cpus_perf.borrow())?;
1174 write!(f, " overflow CPUs: {:?}", self.cpus_ovflw.borrow())?;
1175 Ok(())
1176 }
1177}