/src/regalloc2/src/ion/data_structures.rs
Line | Count | Source |
1 | | /* |
2 | | * This file was initially derived from the files |
3 | | * `js/src/jit/BacktrackingAllocator.h` and |
4 | | * `js/src/jit/BacktrackingAllocator.cpp` in Mozilla Firefox, and was |
5 | | * originally licensed under the Mozilla Public License 2.0. We |
6 | | * subsequently relicensed it to Apache-2.0 WITH LLVM-exception (see |
7 | | * https://github.com/bytecodealliance/regalloc2/issues/7). |
8 | | * |
9 | | * Since the initial port, the design has been substantially evolved |
10 | | * and optimized. |
11 | | */ |
12 | | |
13 | | //! Data structures for backtracking allocator. |
14 | | |
15 | | use super::liveranges::SpillWeight; |
16 | | use crate::cfg::{CFGInfo, CFGInfoCtx}; |
17 | | use crate::index::ContainerComparator; |
18 | | use crate::indexset::IndexSet; |
19 | | use crate::Vec2; |
20 | | use crate::{ |
21 | | define_index, Allocation, Block, Bump, Edit, Function, FxHashMap, FxHashSet, MachineEnv, |
22 | | Operand, Output, PReg, ProgPoint, RegClass, VReg, |
23 | | }; |
24 | | use alloc::collections::BTreeMap; |
25 | | use alloc::collections::VecDeque; |
26 | | use alloc::string::String; |
27 | | use alloc::vec::Vec; |
28 | | use core::cmp::Ordering; |
29 | | use core::fmt::Debug; |
30 | | use core::ops::{Deref, DerefMut}; |
31 | | use smallvec::SmallVec; |
32 | | |
33 | | /// A range from `from` (inclusive) to `to` (exclusive). |
34 | | #[derive(Clone, Copy, Debug, PartialEq, Eq)] |
35 | | pub struct CodeRange { |
36 | | pub from: ProgPoint, |
37 | | pub to: ProgPoint, |
38 | | } |
39 | | |
40 | | impl CodeRange { |
41 | | #[inline(always)] |
42 | 573k | pub fn is_empty(&self) -> bool { |
43 | 573k | self.from >= self.to |
44 | 573k | } |
45 | | #[inline(always)] |
46 | 7.25M | pub fn contains(&self, other: &Self) -> bool { |
47 | 7.25M | other.from >= self.from && other.to <= self.to |
48 | 7.25M | } |
49 | | #[inline(always)] |
50 | 17.3M | pub fn contains_point(&self, other: ProgPoint) -> bool { |
51 | 17.3M | other >= self.from && other < self.to |
52 | 17.3M | } |
53 | | #[inline(always)] |
54 | 8.88M | pub fn overlaps(&self, other: &Self) -> bool { |
55 | 8.88M | other.to > self.from && other.from < self.to |
56 | 8.88M | } |
57 | | #[inline(always)] |
58 | 14.7M | pub fn len(&self) -> usize { |
59 | 14.7M | self.to.inst().index() - self.from.inst().index() |
60 | 14.7M | } |
61 | | /// Returns the range covering just one program point. |
62 | | #[inline(always)] |
63 | 13.0k | pub fn singleton(pos: ProgPoint) -> CodeRange { |
64 | 13.0k | CodeRange { |
65 | 13.0k | from: pos, |
66 | 13.0k | to: pos.next(), |
67 | 13.0k | } |
68 | 13.0k | } |
69 | | |
70 | | /// Join two [CodeRange] values together, producing a [CodeRange] that includes both. |
71 | | #[inline(always)] |
72 | 11.6M | pub fn join(&self, other: CodeRange) -> Self { |
73 | 11.6M | CodeRange { |
74 | 11.6M | from: self.from.min(other.from), |
75 | 11.6M | to: self.to.max(other.to), |
76 | 11.6M | } |
77 | 11.6M | } |
78 | | } |
79 | | |
80 | | impl core::cmp::PartialOrd for CodeRange { |
81 | | #[inline(always)] |
82 | 0 | fn partial_cmp(&self, other: &Self) -> Option<Ordering> { |
83 | 0 | Some(self.cmp(other)) |
84 | 0 | } |
85 | | } |
86 | | impl core::cmp::Ord for CodeRange { |
87 | | #[inline(always)] |
88 | 0 | fn cmp(&self, other: &Self) -> Ordering { |
89 | 0 | if self.to <= other.from { |
90 | 0 | Ordering::Less |
91 | 0 | } else if self.from >= other.to { |
92 | 0 | Ordering::Greater |
93 | | } else { |
94 | 0 | Ordering::Equal |
95 | | } |
96 | 0 | } |
97 | | } |
98 | | |
99 | | define_index!(LiveBundleIndex, LiveBundles, LiveBundle); |
100 | | define_index!(LiveRangeIndex, LiveRanges, LiveRange); |
101 | | define_index!(SpillSetIndex, SpillSets, SpillSet); |
102 | | define_index!(UseIndex); |
103 | | define_index!(VRegIndex, VRegs, VRegData); |
104 | | define_index!(PRegIndex); |
105 | | define_index!(SpillSlotIndex); |
106 | | |
107 | | /// Used to carry small sets of bundles, e.g. for conflict sets. |
108 | | pub type LiveBundleVec = Vec<LiveBundleIndex>; |
109 | | |
110 | | #[derive(Clone, Copy, Debug)] |
111 | | pub struct LiveRangeListEntry { |
112 | | pub range: CodeRange, |
113 | | pub index: LiveRangeIndex, |
114 | | } |
115 | | |
116 | | pub type LiveRangeList = Vec2<LiveRangeListEntry, Bump>; |
117 | | pub type UseList = Vec2<Use, Bump>; |
118 | | |
119 | | #[derive(Clone, Debug)] |
120 | | pub struct LiveRange { |
121 | | pub range: CodeRange, |
122 | | |
123 | | pub vreg: VRegIndex, |
124 | | pub bundle: LiveBundleIndex, |
125 | | pub uses_spill_weight_and_flags: u32, |
126 | | pub(crate) uses: UseList, |
127 | | } |
128 | | |
129 | | #[derive(Clone, Copy, Debug, PartialEq, Eq)] |
130 | | #[repr(u32)] |
131 | | pub enum LiveRangeFlag { |
132 | | StartsAtDef = 1, |
133 | | } |
134 | | |
135 | | impl LiveRange { |
136 | | #[inline(always)] |
137 | 8.17M | pub fn set_flag(&mut self, flag: LiveRangeFlag) { |
138 | 8.17M | self.uses_spill_weight_and_flags |= (flag as u32) << 29; |
139 | 8.17M | } |
140 | | #[inline(always)] |
141 | 0 | pub fn clear_flag(&mut self, flag: LiveRangeFlag) { |
142 | 0 | self.uses_spill_weight_and_flags &= !((flag as u32) << 29); |
143 | 0 | } |
144 | | #[inline(always)] |
145 | 0 | pub fn assign_flag(&mut self, flag: LiveRangeFlag, val: bool) { |
146 | 0 | let bit = if val { (flag as u32) << 29 } else { 0 }; |
147 | 0 | self.uses_spill_weight_and_flags &= 0xe000_0000; |
148 | 0 | self.uses_spill_weight_and_flags |= bit; |
149 | 0 | } |
150 | | #[inline(always)] |
151 | 2.28M | pub fn has_flag(&self, flag: LiveRangeFlag) -> bool { |
152 | 2.28M | self.uses_spill_weight_and_flags & ((flag as u32) << 29) != 0 |
153 | 2.28M | } |
154 | | #[inline(always)] |
155 | 0 | pub fn flag_word(&self) -> u32 { |
156 | 0 | self.uses_spill_weight_and_flags & 0xe000_0000 |
157 | 0 | } |
158 | | #[inline(always)] |
159 | 0 | pub fn merge_flags(&mut self, flag_word: u32) { |
160 | 0 | self.uses_spill_weight_and_flags |= flag_word; |
161 | 0 | } |
162 | | #[inline(always)] |
163 | 24.0M | pub fn uses_spill_weight(&self) -> SpillWeight { |
164 | | // NOTE: the spill weight is technically stored in 29 bits, but we ignore the sign bit as |
165 | | // we will always be dealing with positive values. Thus we mask out the top 3 bits to |
166 | | // ensure that the sign bit is clear, then shift left by only two. |
167 | 24.0M | let bits = (self.uses_spill_weight_and_flags & 0x1fff_ffff) << 2; |
168 | 24.0M | SpillWeight::from_f32(f32::from_bits(bits)) |
169 | 24.0M | } |
170 | | #[inline(always)] |
171 | 17.5M | pub fn set_uses_spill_weight(&mut self, weight: SpillWeight) { |
172 | 17.5M | let weight_bits = (weight.to_f32().to_bits() >> 2) & 0x1fff_ffff; |
173 | 17.5M | self.uses_spill_weight_and_flags = |
174 | 17.5M | (self.uses_spill_weight_and_flags & 0xe000_0000) | weight_bits; |
175 | 17.5M | } |
176 | | } |
177 | | |
178 | | #[derive(Clone, Copy, Debug)] |
179 | | pub struct Use { |
180 | | pub operand: Operand, |
181 | | pub pos: ProgPoint, |
182 | | pub slot: u16, |
183 | | pub weight: u16, |
184 | | } |
185 | | |
186 | | impl Use { |
187 | | #[inline(always)] |
188 | 16.5M | pub fn new(operand: Operand, pos: ProgPoint, slot: u16) -> Self { |
189 | 16.5M | Self { |
190 | 16.5M | operand, |
191 | 16.5M | pos, |
192 | 16.5M | slot, |
193 | 16.5M | // Weight is updated on insertion into LR. |
194 | 16.5M | weight: 0, |
195 | 16.5M | } |
196 | 16.5M | } |
197 | | } |
198 | | |
199 | | #[derive(Clone, Debug)] |
200 | | pub struct LiveBundle { |
201 | | pub(crate) ranges: LiveRangeList, |
202 | | pub spillset: SpillSetIndex, |
203 | | pub allocation: Allocation, |
204 | | pub prio: u32, // recomputed after every bulk update |
205 | | pub spill_weight_and_props: u32, |
206 | | pub limit: Option<u8>, |
207 | | } |
208 | | |
209 | | pub const BUNDLE_MAX_SPILL_WEIGHT: u32 = (1 << 28) - 1; |
210 | | pub const MINIMAL_FIXED_BUNDLE_SPILL_WEIGHT: u32 = BUNDLE_MAX_SPILL_WEIGHT; |
211 | | pub const MINIMAL_LIMITED_BUNDLE_SPILL_WEIGHT: u32 = BUNDLE_MAX_SPILL_WEIGHT - 1; |
212 | | pub const MINIMAL_BUNDLE_SPILL_WEIGHT: u32 = MINIMAL_LIMITED_BUNDLE_SPILL_WEIGHT - 256; |
213 | | pub const BUNDLE_MAX_NORMAL_SPILL_WEIGHT: u32 = MINIMAL_BUNDLE_SPILL_WEIGHT - 1; |
214 | | |
215 | | impl LiveBundle { |
216 | | #[inline(always)] |
217 | 9.26M | pub fn set_cached_spill_weight_and_props( |
218 | 9.26M | &mut self, |
219 | 9.26M | spill_weight: u32, |
220 | 9.26M | minimal: bool, |
221 | 9.26M | fixed: bool, |
222 | 9.26M | fixed_def: bool, |
223 | 9.26M | stack: bool, |
224 | 9.26M | ) { |
225 | 9.26M | debug_assert!(spill_weight <= BUNDLE_MAX_SPILL_WEIGHT); |
226 | 9.26M | self.spill_weight_and_props = spill_weight |
227 | 9.26M | | (if minimal { 1 << 31 } else { 0 }) |
228 | 9.26M | | (if fixed { 1 << 30 } else { 0 }) |
229 | 9.26M | | (if fixed_def { 1 << 29 } else { 0 }) |
230 | 9.26M | | (if stack { 1 << 28 } else { 0 }); |
231 | 9.26M | } |
232 | | |
233 | | #[inline(always)] |
234 | 1.28M | pub fn cached_minimal(&self) -> bool { |
235 | 1.28M | self.spill_weight_and_props & (1 << 31) != 0 |
236 | 1.28M | } |
237 | | |
238 | | #[inline(always)] |
239 | 1.45M | pub fn cached_fixed(&self) -> bool { |
240 | 1.45M | self.spill_weight_and_props & (1 << 30) != 0 |
241 | 1.45M | } |
242 | | |
243 | | #[inline(always)] |
244 | 8.56M | pub fn cached_fixed_def(&self) -> bool { |
245 | 8.56M | self.spill_weight_and_props & (1 << 29) != 0 |
246 | 8.56M | } |
247 | | |
248 | | #[inline(always)] |
249 | 938k | pub fn cached_stack(&self) -> bool { |
250 | 938k | self.spill_weight_and_props & (1 << 28) != 0 |
251 | 938k | } |
252 | | |
253 | | #[inline(always)] |
254 | 308k | pub fn set_cached_fixed(&mut self) { |
255 | 308k | self.spill_weight_and_props |= 1 << 30; |
256 | 308k | } |
257 | | |
258 | | #[inline(always)] |
259 | 77.7k | pub fn set_cached_fixed_def(&mut self) { |
260 | 77.7k | self.spill_weight_and_props |= 1 << 29; |
261 | 77.7k | } |
262 | | |
263 | | #[inline(always)] |
264 | 0 | pub fn set_cached_stack(&mut self) { |
265 | 0 | self.spill_weight_and_props |= 1 << 28; |
266 | 0 | } |
267 | | |
268 | | #[inline(always)] |
269 | 76.5M | pub fn cached_spill_weight(&self) -> u32 { |
270 | 76.5M | self.spill_weight_and_props & BUNDLE_MAX_SPILL_WEIGHT |
271 | 76.5M | } |
272 | | } |
273 | | |
274 | | #[derive(Clone, Copy, Debug, PartialEq, Eq)] |
275 | | pub struct BundleProperties { |
276 | | pub minimal: bool, |
277 | | pub fixed: bool, |
278 | | } |
279 | | |
280 | | /// Calculate the maximum `N` inline capacity for a `SmallVec<[T; N]>` we can |
281 | | /// have without bloating its size to be larger than a `Vec<T>`. |
282 | 0 | const fn no_bloat_capacity<T>() -> usize { |
283 | | // `Vec<T>` is three words: `(pointer, capacity, length)`. |
284 | | // |
285 | | // A `SmallVec<[T; N]>` replaces the first two members with the following: |
286 | | // |
287 | | // union { |
288 | | // Inline([T; N]), |
289 | | // Heap(pointer, capacity), |
290 | | // } |
291 | | // |
292 | | // So if `size_of([T; N]) == size_of(pointer) + size_of(capacity)` then we |
293 | | // get the maximum inline capacity without bloat. |
294 | 0 | core::mem::size_of::<usize>() * 2 / core::mem::size_of::<T>() |
295 | 0 | } |
296 | | |
297 | | #[derive(Clone, Debug)] |
298 | | pub struct SpillSet { |
299 | | pub slot: SpillSlotIndex, |
300 | | pub hint: PReg, |
301 | | pub class: RegClass, |
302 | | pub spill_bundle: LiveBundleIndex, |
303 | | pub required: bool, |
304 | | pub splits: u8, |
305 | | |
306 | | /// The aggregate [`CodeRange`] of all involved [`LiveRange`]s. The effect of this abstraction |
307 | | /// is that we attempt to allocate one spill slot for the extent of a bundle. For fragmented |
308 | | /// bundles with lots of open space this abstraction is pessimistic, but when bundles are small |
309 | | /// or dense this yields similar results to tracking individual live ranges. |
310 | | pub range: CodeRange, |
311 | | } |
312 | | |
313 | | pub(crate) const MAX_SPLITS_PER_SPILLSET: u8 = 2; |
314 | | |
315 | | #[derive(Clone, Debug)] |
316 | | pub struct VRegData { |
317 | | pub(crate) ranges: LiveRangeList, |
318 | | pub blockparam: Block, |
319 | | // We don't initially know the RegClass until we observe a use of the VReg. |
320 | | pub class: Option<RegClass>, |
321 | | } |
322 | | |
323 | | #[derive(Clone, Debug)] |
324 | | pub struct PRegData { |
325 | | pub allocations: LiveRangeSet, |
326 | | pub is_stack: bool, |
327 | | } |
328 | | |
329 | | #[derive(Clone, Debug)] |
330 | | pub struct MultiFixedRegFixup { |
331 | | pub pos: ProgPoint, |
332 | | pub from_slot: u16, |
333 | | pub to_slot: u16, |
334 | | pub level: FixedRegFixupLevel, |
335 | | pub to_preg: PRegIndex, |
336 | | pub vreg: VRegIndex, |
337 | | } |
338 | | |
339 | | #[derive(Clone, Debug, PartialEq, Eq)] |
340 | | pub enum FixedRegFixupLevel { |
341 | | /// A fixup copy for the initial fixed reg; must come first. |
342 | | Initial, |
343 | | /// A fixup copy from the first fixed reg to other fixed regs for |
344 | | /// the same vreg; must come second. |
345 | | Secondary, |
346 | | } |
347 | | |
348 | | /// The field order is significant: these are sorted so that a |
349 | | /// scan over vregs, then blocks in each range, can scan in |
350 | | /// order through this (sorted) list and add allocs to the |
351 | | /// half-move list. |
352 | | /// |
353 | | /// The fields in this struct are reversed in sort order so that the entire |
354 | | /// struct can be treated as a u128 for sorting purposes. |
355 | | #[derive(Clone, Copy, Debug, PartialEq, Eq)] |
356 | | #[repr(C)] |
357 | | pub struct BlockparamOut { |
358 | | pub to_vreg: VRegIndex, |
359 | | pub to_block: Block, |
360 | | pub from_block: Block, |
361 | | pub from_vreg: VRegIndex, |
362 | | } |
363 | | impl BlockparamOut { |
364 | | #[inline(always)] |
365 | 2.89M | pub fn key(&self) -> u128 { |
366 | 2.89M | u128_key( |
367 | 2.89M | self.from_vreg.raw_u32(), |
368 | 2.89M | self.from_block.raw_u32(), |
369 | 2.89M | self.to_block.raw_u32(), |
370 | 2.89M | self.to_vreg.raw_u32(), |
371 | | ) |
372 | 2.89M | } |
373 | | } |
374 | | |
375 | | /// As above for `BlockparamIn`, field order is significant. |
376 | | /// |
377 | | /// The fields in this struct are reversed in sort order so that the entire |
378 | | /// struct can be treated as a u128 for sorting purposes. |
379 | | #[derive(Clone, Debug)] |
380 | | #[repr(C)] |
381 | | pub struct BlockparamIn { |
382 | | pub from_block: Block, |
383 | | pub to_block: Block, |
384 | | pub to_vreg: VRegIndex, |
385 | | } |
386 | | impl BlockparamIn { |
387 | | #[inline(always)] |
388 | 2.82M | pub fn key(&self) -> u128 { |
389 | 2.82M | u128_key( |
390 | 2.82M | self.to_vreg.raw_u32(), |
391 | 2.82M | self.to_block.raw_u32(), |
392 | 2.82M | self.from_block.raw_u32(), |
393 | | 0, |
394 | | ) |
395 | 2.82M | } |
396 | | } |
397 | | |
398 | | impl LiveRanges { |
399 | 13.3M | pub(crate) fn add(&mut self, range: CodeRange, bump: Bump) -> LiveRangeIndex { |
400 | 13.3M | self.push(LiveRange { |
401 | 13.3M | range, |
402 | 13.3M | vreg: VRegIndex::invalid(), |
403 | 13.3M | bundle: LiveBundleIndex::invalid(), |
404 | 13.3M | uses_spill_weight_and_flags: 0, |
405 | 13.3M | uses: UseList::new_in(bump), |
406 | 13.3M | }) |
407 | 13.3M | } |
408 | | } |
409 | | |
410 | | impl LiveBundles { |
411 | 9.41M | pub(crate) fn add(&mut self, bump: Bump) -> LiveBundleIndex { |
412 | 9.41M | self.push(LiveBundle { |
413 | 9.41M | allocation: Allocation::none(), |
414 | 9.41M | ranges: LiveRangeList::new_in(bump), |
415 | 9.41M | spillset: SpillSetIndex::invalid(), |
416 | 9.41M | prio: 0, |
417 | 9.41M | spill_weight_and_props: 0, |
418 | 9.41M | limit: None, |
419 | 9.41M | }) |
420 | 9.41M | } |
421 | | } |
422 | | |
423 | | impl VRegs { |
424 | 8.07M | pub fn add(&mut self, reg: VReg, data: VRegData) -> VRegIndex { |
425 | 8.07M | let idx = self.push(data); |
426 | 8.07M | debug_assert_eq!(reg.vreg(), idx.index()); |
427 | 8.07M | idx |
428 | 8.07M | } |
429 | | } |
430 | | |
431 | | impl core::ops::Index<VReg> for VRegs { |
432 | | type Output = VRegData; |
433 | | |
434 | | #[inline(always)] |
435 | 1.19M | fn index(&self, idx: VReg) -> &Self::Output { |
436 | 1.19M | &self.storage[idx.vreg()] |
437 | 1.19M | } |
438 | | } |
439 | | |
440 | | impl core::ops::IndexMut<VReg> for VRegs { |
441 | | #[inline(always)] |
442 | 59.5M | fn index_mut(&mut self, idx: VReg) -> &mut Self::Output { |
443 | 59.5M | &mut self.storage[idx.vreg()] |
444 | 59.5M | } |
445 | | } |
446 | | |
447 | | #[derive(Default)] |
448 | | pub struct Ctx { |
449 | | pub(crate) cfginfo: CFGInfo, |
450 | | pub(crate) cfginfo_ctx: CFGInfoCtx, |
451 | | pub(crate) liveins: Vec<IndexSet>, |
452 | | pub(crate) liveouts: Vec<IndexSet>, |
453 | | pub(crate) blockparam_outs: Vec<BlockparamOut>, |
454 | | pub(crate) blockparam_ins: Vec<BlockparamIn>, |
455 | | |
456 | | pub(crate) ranges: LiveRanges, |
457 | | pub(crate) bundles: LiveBundles, |
458 | | pub(crate) spillsets: SpillSets, |
459 | | pub(crate) vregs: VRegs, |
460 | | pub(crate) pregs: Vec<PRegData>, |
461 | | pub(crate) allocation_queue: PrioQueue, |
462 | | |
463 | | pub(crate) spilled_bundles: Vec<LiveBundleIndex>, |
464 | | pub(crate) spillslots: Vec<SpillSlotData>, |
465 | | pub(crate) slots_by_class: [SpillSlotList; 3], |
466 | | |
467 | | pub(crate) extra_spillslots_by_class: [SmallVec<[Allocation; 2]>; 3], |
468 | | pub(crate) preferred_victim_by_class: [PReg; 3], |
469 | | |
470 | | // When multiple fixed-register constraints are present on a |
471 | | // single VReg at a single program point (this can happen for, |
472 | | // e.g., call args that use the same value multiple times), we |
473 | | // remove all but one of the fixed-register constraints, make a |
474 | | // note here, and add a clobber with that PReg instead to keep |
475 | | // the register available. When we produce the final edit-list, we |
476 | | // will insert a copy from wherever the VReg's primary allocation |
477 | | // was to the appropriate PReg. |
478 | | pub(crate) multi_fixed_reg_fixups: Vec<MultiFixedRegFixup>, |
479 | | |
480 | | pub(crate) allocated_bundle_count: usize, |
481 | | |
482 | | // For debug output only: a list of textual annotations at every |
483 | | // ProgPoint to insert into the final allocated program listing. |
484 | | pub(crate) debug_annotations: FxHashMap<ProgPoint, Vec<String>>, |
485 | | pub(crate) annotations_enabled: bool, |
486 | | |
487 | | // Cached allocation for `try_to_allocate_bundle_to_reg` to avoid allocating |
488 | | // a new HashSet on every call. |
489 | | pub(crate) conflict_set: FxHashSet<LiveBundleIndex>, |
490 | | |
491 | | // Output: |
492 | | pub output: Output, |
493 | | |
494 | | pub(crate) scratch_conflicts: LiveBundleVec, |
495 | | pub(crate) scratch_bundle: LiveBundleVec, |
496 | | pub(crate) scratch_vreg_ranges: Vec<LiveRangeIndex>, |
497 | | pub(crate) scratch_spillset_pool: Vec<SpillSetRanges>, |
498 | | |
499 | | pub(crate) scratch_workqueue: VecDeque<Block>, |
500 | | |
501 | | pub(crate) scratch_operand_rewrites: FxHashMap<usize, Operand>, |
502 | | pub(crate) scratch_removed_lrs: FxHashSet<LiveRangeIndex>, |
503 | | pub(crate) scratch_removed_lrs_vregs: FxHashSet<VRegIndex>, |
504 | | pub(crate) scratch_workqueue_set: FxHashSet<Block>, |
505 | | |
506 | | pub(crate) scratch_bump: Bump, |
507 | | } |
508 | | |
509 | | impl Ctx { |
510 | 33.1M | pub(crate) fn bump(&self) -> Bump { |
511 | 33.1M | self.scratch_bump.clone() |
512 | 33.1M | } |
513 | | } |
514 | | |
515 | | pub struct Env<'a, F: Function> { |
516 | | pub func: &'a F, |
517 | | pub env: &'a MachineEnv, |
518 | | pub ctx: &'a mut Ctx, |
519 | | } |
520 | | |
521 | | impl<'a, F: Function> Deref for Env<'a, F> { |
522 | | type Target = Ctx; |
523 | | |
524 | 726M | fn deref(&self) -> &Self::Target { |
525 | 726M | self.ctx |
526 | 726M | } |
527 | | } |
528 | | |
529 | | impl<'a, F: Function> DerefMut for Env<'a, F> { |
530 | 212M | fn deref_mut(&mut self) -> &mut Self::Target { |
531 | 212M | self.ctx |
532 | 212M | } |
533 | | } |
534 | | |
535 | | impl<'a, F: Function> Env<'a, F> { |
536 | | /// Get the VReg (with bundled RegClass) from a vreg index. |
537 | | #[inline] |
538 | 10.2M | pub fn vreg(&self, index: VRegIndex) -> VReg { |
539 | 10.2M | let class = self.vregs[index] |
540 | 10.2M | .class |
541 | 10.2M | .expect("trying to get a VReg before observing its class"); |
542 | 10.2M | VReg::new(index.index(), class) |
543 | 10.2M | } |
544 | | |
545 | | /// Record the class of a VReg. We learn this only when we observe |
546 | | /// the VRegs in use. |
547 | 59.3M | pub fn observe_vreg_class(&mut self, vreg: VReg) { |
548 | 59.3M | let old_class = self.vregs[vreg].class.replace(vreg.class()); |
549 | | // We should never observe two different classes for two |
550 | | // mentions of a VReg in the source program. |
551 | 59.3M | debug_assert!(old_class == None || old_class == Some(vreg.class())); |
552 | 59.3M | } |
553 | | |
554 | | /// Is this vreg actually used in the source program? |
555 | 8.07M | pub fn is_vreg_used(&self, index: VRegIndex) -> bool { |
556 | 8.07M | self.vregs[index].class.is_some() |
557 | 8.07M | } |
558 | | } |
559 | | |
560 | | #[derive(Clone, Debug, Default)] |
561 | | pub struct SpillSetRanges { |
562 | | pub btree: BTreeMap<LiveRangeKey, SpillSetIndex>, |
563 | | } |
564 | | |
565 | | #[derive(Clone, Debug)] |
566 | | pub struct SpillSlotData { |
567 | | pub ranges: SpillSetRanges, |
568 | | pub slots: u32, |
569 | | pub alloc: Allocation, |
570 | | } |
571 | | |
572 | | #[derive(Clone, Debug, Default)] |
573 | | pub struct SpillSlotList { |
574 | | pub slots: SmallVec<[SpillSlotIndex; 32]>, |
575 | | pub probe_start: usize, |
576 | | } |
577 | | |
578 | | impl SpillSlotList { |
579 | | /// Get the next spillslot index in probing order, wrapping around |
580 | | /// at the end of the slots list. |
581 | 2.63M | pub(crate) fn next_index(&self, index: usize) -> usize { |
582 | 2.63M | debug_assert!(index < self.slots.len()); |
583 | 2.63M | if index == self.slots.len() - 1 { |
584 | 280k | 0 |
585 | | } else { |
586 | 2.35M | index + 1 |
587 | | } |
588 | 2.63M | } |
589 | | } |
590 | | |
591 | | #[derive(Clone, Debug, Default)] |
592 | | pub struct PrioQueue { |
593 | | pub heap: alloc::collections::BinaryHeap<PrioQueueEntry>, |
594 | | } |
595 | | |
596 | | #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] |
597 | | pub struct PrioQueueEntry { |
598 | | pub prio: u32, |
599 | | pub bundle: LiveBundleIndex, |
600 | | pub hint: PReg, |
601 | | } |
602 | | |
603 | | #[derive(Clone, Debug)] |
604 | | pub struct LiveRangeSet { |
605 | | pub btree: BTreeMap<LiveRangeKey, LiveRangeIndex>, |
606 | | } |
607 | | |
608 | | #[derive(Clone, Copy, Debug)] |
609 | | pub struct LiveRangeKey { |
610 | | pub from: u32, |
611 | | pub to: u32, |
612 | | } |
613 | | |
614 | | impl LiveRangeKey { |
615 | | #[inline(always)] |
616 | 239M | pub fn from_range(range: &CodeRange) -> Self { |
617 | 239M | Self { |
618 | 239M | from: range.from.to_index(), |
619 | 239M | to: range.to.to_index(), |
620 | 239M | } |
621 | 239M | } |
622 | | |
623 | | #[inline(always)] |
624 | 0 | pub fn to_range(&self) -> CodeRange { |
625 | 0 | CodeRange { |
626 | 0 | from: ProgPoint::from_index(self.from), |
627 | 0 | to: ProgPoint::from_index(self.to), |
628 | 0 | } |
629 | 0 | } |
630 | | } |
631 | | |
632 | | impl core::cmp::PartialEq for LiveRangeKey { |
633 | | #[inline(always)] |
634 | 0 | fn eq(&self, other: &Self) -> bool { |
635 | 0 | self.to > other.from && self.from < other.to |
636 | 0 | } |
637 | | } |
638 | | impl core::cmp::Eq for LiveRangeKey {} |
639 | | impl core::cmp::PartialOrd for LiveRangeKey { |
640 | | #[inline(always)] |
641 | 528M | fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> { |
642 | 528M | Some(self.cmp(other)) |
643 | 528M | } |
644 | | } |
645 | | impl core::cmp::Ord for LiveRangeKey { |
646 | | #[inline(always)] |
647 | 934M | fn cmp(&self, other: &Self) -> core::cmp::Ordering { |
648 | 934M | if self.to <= other.from { |
649 | 119M | core::cmp::Ordering::Less |
650 | 814M | } else if self.from >= other.to { |
651 | 527M | core::cmp::Ordering::Greater |
652 | | } else { |
653 | 286M | core::cmp::Ordering::Equal |
654 | | } |
655 | 934M | } |
656 | | } |
657 | | |
658 | | pub struct PrioQueueComparator<'a> { |
659 | | pub prios: &'a [usize], |
660 | | } |
661 | | impl<'a> ContainerComparator for PrioQueueComparator<'a> { |
662 | | type Ix = LiveBundleIndex; |
663 | 0 | fn compare(&self, a: Self::Ix, b: Self::Ix) -> core::cmp::Ordering { |
664 | 0 | self.prios[a.index()].cmp(&self.prios[b.index()]) |
665 | 0 | } |
666 | | } |
667 | | |
668 | | impl PrioQueue { |
669 | | #[inline(always)] |
670 | 9.48M | pub fn insert(&mut self, bundle: LiveBundleIndex, prio: usize, hint: PReg) { |
671 | 9.48M | self.heap.push(PrioQueueEntry { |
672 | 9.48M | prio: prio as u32, |
673 | 9.48M | bundle, |
674 | 9.48M | hint, |
675 | 9.48M | }); |
676 | 9.48M | } |
677 | | |
678 | | #[inline(always)] |
679 | 0 | pub fn is_empty(self) -> bool { |
680 | 0 | self.heap.is_empty() |
681 | 0 | } |
682 | | |
683 | | #[inline(always)] |
684 | 9.50M | pub fn pop(&mut self) -> Option<(LiveBundleIndex, PReg)> { |
685 | 9.50M | self.heap.pop().map(|entry| (entry.bundle, entry.hint)) |
686 | 9.50M | } |
687 | | } |
688 | | |
689 | | impl LiveRangeSet { |
690 | 11.0k | pub(crate) fn new() -> Self { |
691 | 11.0k | Self { |
692 | 11.0k | btree: BTreeMap::default(), |
693 | 11.0k | } |
694 | 11.0k | } |
695 | | } |
696 | | |
697 | | #[derive(Clone, Debug)] |
698 | | pub struct InsertedMove { |
699 | | pub pos_prio: PosWithPrio, |
700 | | pub from_alloc: Allocation, |
701 | | pub to_alloc: Allocation, |
702 | | pub to_vreg: VReg, |
703 | | } |
704 | | |
705 | | #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] |
706 | | pub enum InsertMovePrio { |
707 | | InEdgeMoves, |
708 | | Regular, |
709 | | MultiFixedRegInitial, |
710 | | MultiFixedRegSecondary, |
711 | | ReusedInput, |
712 | | OutEdgeMoves, |
713 | | } |
714 | | |
715 | | #[derive(Debug, Default)] |
716 | | pub struct InsertedMoves { |
717 | | pub moves: Vec<InsertedMove>, |
718 | | } |
719 | | |
720 | | impl InsertedMoves { |
721 | 7.04M | pub fn push( |
722 | 7.04M | &mut self, |
723 | 7.04M | pos: ProgPoint, |
724 | 7.04M | prio: InsertMovePrio, |
725 | 7.04M | from_alloc: Allocation, |
726 | 7.04M | to_alloc: Allocation, |
727 | 7.04M | to_vreg: VReg, |
728 | 7.04M | ) { |
729 | 7.04M | trace!( |
730 | | "insert_move: pos {:?} prio {:?} from_alloc {:?} to_alloc {:?} to_vreg {:?}", |
731 | | pos, |
732 | | prio, |
733 | | from_alloc, |
734 | | to_alloc, |
735 | | to_vreg |
736 | | ); |
737 | 7.04M | if from_alloc == to_alloc { |
738 | 5.04M | trace!(" -> skipping move with same source and dest"); |
739 | 5.04M | return; |
740 | 2.00M | } |
741 | 2.00M | if let Some(from) = from_alloc.as_reg() { |
742 | 1.47M | debug_assert_eq!(from.class(), to_vreg.class()); |
743 | 525k | } |
744 | 2.00M | if let Some(to) = to_alloc.as_reg() { |
745 | 1.75M | debug_assert_eq!(to.class(), to_vreg.class()); |
746 | 250k | } |
747 | 2.00M | self.moves.push(InsertedMove { |
748 | 2.00M | pos_prio: PosWithPrio { |
749 | 2.00M | pos, |
750 | 2.00M | prio: prio as u32, |
751 | 2.00M | }, |
752 | 2.00M | from_alloc, |
753 | 2.00M | to_alloc, |
754 | 2.00M | to_vreg, |
755 | 2.00M | }); |
756 | 7.04M | } |
757 | | } |
758 | | |
759 | | #[derive(Clone, Debug, Default)] |
760 | | pub struct Edits { |
761 | | edits: Vec<(PosWithPrio, Edit)>, |
762 | | } |
763 | | |
764 | | impl Edits { |
765 | | #[inline(always)] |
766 | 11.0k | pub fn with_capacity(n: usize) -> Self { |
767 | 11.0k | Self { |
768 | 11.0k | edits: Vec::with_capacity(n), |
769 | 11.0k | } |
770 | 11.0k | } |
771 | | |
772 | | #[inline(always)] |
773 | 11.0k | pub fn len(&self) -> usize { |
774 | 11.0k | self.edits.len() |
775 | 11.0k | } |
776 | | |
777 | | #[inline(always)] |
778 | 1.01k | pub fn iter(&self) -> impl Iterator<Item = &(PosWithPrio, Edit)> { |
779 | 1.01k | self.edits.iter() |
780 | 1.01k | } |
781 | | |
782 | | #[inline(always)] |
783 | 11.0k | pub fn drain_edits(&mut self) -> impl Iterator<Item = (ProgPoint, Edit)> + '_ { |
784 | 1.83M | self.edits.drain(..).map(|(pos, edit)| (pos.pos, edit)) |
785 | 11.0k | } |
786 | | |
787 | | /// Sort edits by the combination of their program position and priority. This is a stable sort |
788 | | /// to preserve the order of the moves the parallel move resolver inserts. |
789 | | #[inline(always)] |
790 | 11.0k | pub fn sort(&mut self) { |
791 | 3.66M | self.edits.sort_by_key(|&(pos_prio, _)| pos_prio.key()); |
792 | 11.0k | } |
793 | | |
794 | 1.83M | pub fn add(&mut self, pos_prio: PosWithPrio, from: Allocation, to: Allocation) { |
795 | 1.83M | if from != to { |
796 | 1.83M | if from.is_reg() && to.is_reg() { |
797 | 1.15M | debug_assert_eq!(from.as_reg().unwrap().class(), to.as_reg().unwrap().class()); |
798 | 686k | } |
799 | 1.83M | self.edits.push((pos_prio, Edit::Move { from, to })); |
800 | 0 | } |
801 | 1.83M | } |
802 | | } |
803 | | |
804 | | /// The fields in this struct are reversed in sort order so that the entire |
805 | | /// struct can be treated as a u64 for sorting purposes. |
806 | | #[derive(Clone, Copy, Debug, PartialEq, Eq)] |
807 | | #[repr(C)] |
808 | | pub struct PosWithPrio { |
809 | | pub prio: u32, |
810 | | pub pos: ProgPoint, |
811 | | } |
812 | | |
813 | | impl PosWithPrio { |
814 | | #[inline] |
815 | 39.6M | pub fn key(self) -> u64 { |
816 | 39.6M | u64_key(self.pos.to_index(), self.prio) |
817 | 39.6M | } |
818 | | } |
819 | | |
820 | | #[derive(Clone, Copy, Debug, Default)] |
821 | | #[cfg_attr(feature = "enable-serde", derive(serde::Serialize, serde::Deserialize))] |
822 | | pub struct Stats { |
823 | | pub livein_blocks: usize, |
824 | | pub livein_iterations: usize, |
825 | | pub initial_liverange_count: usize, |
826 | | pub merged_bundle_count: usize, |
827 | | pub process_bundle_count: usize, |
828 | | pub process_bundle_reg_probes_fixed: usize, |
829 | | pub process_bundle_reg_success_fixed: usize, |
830 | | pub process_bundle_bounding_range_probe_start_any: usize, |
831 | | pub process_bundle_bounding_range_probes_any: usize, |
832 | | pub process_bundle_bounding_range_success_any: usize, |
833 | | pub process_bundle_reg_probe_start_any: usize, |
834 | | pub process_bundle_reg_probes_any: usize, |
835 | | pub process_bundle_reg_success_any: usize, |
836 | | pub evict_bundle_event: usize, |
837 | | pub evict_bundle_count: usize, |
838 | | pub splits: usize, |
839 | | pub splits_clobbers: usize, |
840 | | pub splits_hot: usize, |
841 | | pub splits_conflicts: usize, |
842 | | pub splits_defs: usize, |
843 | | pub splits_all: usize, |
844 | | pub final_liverange_count: usize, |
845 | | pub final_bundle_count: usize, |
846 | | pub spill_bundle_count: usize, |
847 | | pub spill_bundle_reg_probes: usize, |
848 | | pub spill_bundle_reg_success: usize, |
849 | | pub blockparam_ins_count: usize, |
850 | | pub blockparam_outs_count: usize, |
851 | | pub halfmoves_count: usize, |
852 | | pub edits_count: usize, |
853 | | } |
854 | | |
855 | | // Helper function for generating sorting keys. The order of arguments is from |
856 | | // the most significant field to the least significant one. |
857 | | // |
858 | | // These work best when the fields are stored in reverse order in memory so that |
859 | | // they can be loaded with a single u64 load on little-endian machines. |
860 | | #[inline(always)] |
861 | 74.6M | pub fn u64_key(b: u32, a: u32) -> u64 { |
862 | 74.6M | a as u64 | (b as u64) << 32 |
863 | 74.6M | } |
864 | | #[inline(always)] |
865 | 5.71M | pub fn u128_key(d: u32, c: u32, b: u32, a: u32) -> u128 { |
866 | 5.71M | a as u128 | (b as u128) << 32 | (c as u128) << 64 | (d as u128) << 96 |
867 | 5.71M | } |