/src/regalloc2/src/lib.rs
Line | Count | Source |
1 | | /* |
2 | | * The following license applies to this file, which derives many |
3 | | * details (register and constraint definitions, for example) from the |
4 | | * files `BacktrackingAllocator.h`, `BacktrackingAllocator.cpp`, |
5 | | * `LIR.h`, and possibly definitions in other related files in |
6 | | * `js/src/jit/`: |
7 | | * |
8 | | * This Source Code Form is subject to the terms of the Mozilla Public |
9 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
10 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
11 | | */ |
12 | | |
13 | | #![allow(dead_code)] |
14 | | #![allow(clippy::all)] |
15 | | #![no_std] |
16 | | |
17 | | #[cfg(feature = "std")] |
18 | | extern crate std; |
19 | | |
20 | | extern crate alloc; |
21 | | |
22 | | // Even when trace logging is disabled, the trace macro has a significant |
23 | | // performance cost so we disable it in release builds. |
24 | | macro_rules! trace { |
25 | | ($($tt:tt)*) => { |
26 | | if cfg!(feature = "trace-log") { |
27 | | ::log::trace!($($tt)*); |
28 | | } |
29 | | }; |
30 | | } |
31 | | |
32 | | macro_rules! trace_enabled { |
33 | | () => { |
34 | | cfg!(feature = "trace-log") && ::log::log_enabled!(::log::Level::Trace) |
35 | | }; |
36 | | } |
37 | | |
38 | | use alloc::rc::Rc; |
39 | | use allocator_api2::vec::Vec as Vec2; |
40 | | use core::ops::Deref as _; |
41 | | use core::{hash::BuildHasherDefault, iter::FromIterator}; |
42 | | use rustc_hash::FxHasher; |
43 | | type FxHashMap<K, V> = hashbrown::HashMap<K, V, BuildHasherDefault<FxHasher>>; |
44 | | type FxHashSet<V> = hashbrown::HashSet<V, BuildHasherDefault<FxHasher>>; |
45 | | |
46 | | pub(crate) mod cfg; |
47 | | pub(crate) mod domtree; |
48 | | pub(crate) mod fastalloc; |
49 | | pub mod indexset; |
50 | | pub(crate) mod ion; |
51 | | pub(crate) mod moves; |
52 | | pub(crate) mod postorder; |
53 | | pub mod ssa; |
54 | | |
55 | | #[macro_use] |
56 | | mod index; |
57 | | |
58 | | pub use self::ion::data_structures::Ctx; |
59 | | use alloc::vec::Vec; |
60 | | pub use index::{Block, Inst, InstRange}; |
61 | | |
62 | | pub mod checker; |
63 | | |
64 | | #[cfg(feature = "fuzzing")] |
65 | | pub mod fuzzing; |
66 | | |
67 | | #[cfg(feature = "enable-serde")] |
68 | | pub mod serialize; |
69 | | |
70 | | #[cfg(feature = "enable-serde")] |
71 | | use serde::{Deserialize, Serialize}; |
72 | | |
73 | | /// Register classes. |
74 | | /// |
75 | | /// Every value has a "register class", which is like a type at the |
76 | | /// register-allocator level. Every register must belong to only one |
77 | | /// class; i.e., they are disjoint. |
78 | | /// |
79 | | /// For tight bit-packing throughout our data structures, we support |
80 | | /// only three classes, "int", "float" and "vector". Usually two will |
81 | | /// be enough on modern machines, as they have one class of general-purpose |
82 | | /// integer registers of machine width (e.g. 64 bits), and another |
83 | | /// class of float/vector registers used both for FP and for vector |
84 | | /// operations. Additionally for machines with totally separate vector |
85 | | /// registers a third class is provided. |
86 | | #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] |
87 | | #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] |
88 | | pub enum RegClass { |
89 | | Int = 0, |
90 | | Float = 1, |
91 | | Vector = 2, |
92 | | } |
93 | | |
94 | | /// A physical register. Contains a physical register number and a class. |
95 | | /// |
96 | | /// The `hw_enc` field contains the physical register number and is in |
97 | | /// a logically separate index space per class; in other words, Int |
98 | | /// register 0 is different than Float register 0. |
99 | | /// |
100 | | /// Because of bit-packed encodings throughout the implementation, |
101 | | /// `hw_enc` must fit in 6 bits, i.e., at most 64 registers per class. |
102 | | /// |
103 | | /// The value returned by `index()`, in contrast, is in a single index |
104 | | /// space shared by all classes, in order to enable uniform reasoning |
105 | | /// about physical registers. This is done by putting the class bit at |
106 | | /// the MSB, or equivalently, declaring that indices 0..=63 are the 64 |
107 | | /// integer registers and indices 64..=127 are the 64 float registers. |
108 | | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] |
109 | | #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] |
110 | | pub struct PReg { |
111 | | bits: u8, |
112 | | } |
113 | | |
114 | | impl PReg { |
115 | | pub const MAX_BITS: usize = 6; |
116 | | pub const MAX: usize = (1 << Self::MAX_BITS) - 1; |
117 | | pub const NUM_INDEX: usize = 1 << (Self::MAX_BITS + 2); // including RegClass bits |
118 | | pub const INVALID: u8 = ((RegClass::Int as u8) << Self::MAX_BITS) | (Self::MAX as u8); |
119 | | |
120 | | /// Create a new PReg. The `hw_enc` range is 6 bits. |
121 | | #[inline(always)] |
122 | 20.1M | pub const fn new(hw_enc: usize, class: RegClass) -> Self { |
123 | 20.1M | debug_assert!(hw_enc <= PReg::MAX); |
124 | 20.1M | PReg { |
125 | 20.1M | bits: ((class as u8) << Self::MAX_BITS) | (hw_enc as u8), |
126 | 20.1M | } |
127 | 20.1M | } |
128 | | |
129 | | /// The physical register number, as encoded by the ISA for the particular register class. |
130 | | #[inline(always)] |
131 | 48.6M | pub const fn hw_enc(self) -> usize { |
132 | 48.6M | self.bits as usize & Self::MAX |
133 | 48.6M | } |
134 | | |
135 | | /// The register class. |
136 | | #[inline(always)] |
137 | 6.47M | pub const fn class(self) -> RegClass { |
138 | 6.47M | match (self.bits >> Self::MAX_BITS) & 0b11 { |
139 | 2.12M | 0 => RegClass::Int, |
140 | 1.95M | 1 => RegClass::Float, |
141 | 2.39M | 2 => RegClass::Vector, |
142 | 0 | _ => unreachable!(), |
143 | | } |
144 | 6.47M | } |
145 | | |
146 | | /// Get an index into the (not necessarily contiguous) index space of |
147 | | /// all physical registers. Allows one to maintain an array of data for |
148 | | /// all PRegs and index it efficiently. |
149 | | #[inline(always)] |
150 | 87.7M | pub const fn index(self) -> usize { |
151 | 87.7M | self.bits as usize |
152 | 87.7M | } |
153 | | |
154 | | /// Construct a PReg from the value returned from `.index()`. |
155 | | #[inline(always)] |
156 | 33.3M | pub const fn from_index(index: usize) -> Self { |
157 | 33.3M | PReg { |
158 | 33.3M | bits: (index & (Self::NUM_INDEX - 1)) as u8, |
159 | 33.3M | } |
160 | 33.3M | } |
161 | | |
162 | | /// Return the "invalid PReg", which can be used to initialize |
163 | | /// data structures. |
164 | | #[inline(always)] |
165 | 27.3M | pub const fn invalid() -> Self { |
166 | 27.3M | PReg { |
167 | 27.3M | bits: Self::INVALID, |
168 | 27.3M | } |
169 | 27.3M | } |
170 | | |
171 | | /// Return a valid [`PReg`] or [`None`] if it is invalid. |
172 | | #[inline(always)] |
173 | 9.04M | pub const fn as_valid(self) -> Option<Self> { |
174 | 9.04M | if self.bits == Self::INVALID { |
175 | 7.66M | None |
176 | | } else { |
177 | 1.37M | Some(self) |
178 | | } |
179 | 9.04M | } |
180 | | } |
181 | | |
182 | | impl Default for PReg { |
183 | 3 | fn default() -> Self { |
184 | 3 | Self::invalid() |
185 | 3 | } |
186 | | } |
187 | | |
188 | | impl core::fmt::Debug for PReg { |
189 | 0 | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { |
190 | 0 | write!( |
191 | 0 | f, |
192 | 0 | "PReg(hw = {}, class = {:?}, index = {})", |
193 | 0 | self.hw_enc(), |
194 | 0 | self.class(), |
195 | 0 | self.index() |
196 | | ) |
197 | 0 | } |
198 | | } |
199 | | |
200 | | impl core::fmt::Display for PReg { |
201 | 6.39M | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { |
202 | 6.39M | let class = match self.class() { |
203 | 2.09M | RegClass::Int => "i", |
204 | 1.92M | RegClass::Float => "f", |
205 | 2.37M | RegClass::Vector => "v", |
206 | | }; |
207 | 6.39M | write!(f, "p{}{}", self.hw_enc(), class) |
208 | 6.39M | } |
209 | | } |
210 | | |
211 | | /// A type for internal bit arrays. |
212 | | type Bits = u64; |
213 | | |
214 | | /// A physical register set. Used to represent clobbers |
215 | | /// efficiently. |
216 | | /// |
217 | | /// The set is `Copy` and is guaranteed to have constant, and small, |
218 | | /// size, as it is based on a bitset internally. |
219 | | #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] |
220 | | #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] |
221 | | pub struct PRegSet { |
222 | | bits: [Bits; Self::LEN], |
223 | | } |
224 | | |
225 | | impl PRegSet { |
226 | | /// The number of bits per element in the internal bit array. |
227 | | const BITS: usize = core::mem::size_of::<Bits>() * 8; |
228 | | |
229 | | /// Length of the internal bit array. |
230 | | const LEN: usize = (PReg::NUM_INDEX + Self::BITS - 1) / Self::BITS; |
231 | | |
232 | | /// Create an empty set. |
233 | 10.8k | pub const fn empty() -> Self { |
234 | 10.8k | Self { |
235 | 10.8k | bits: [0; Self::LEN], |
236 | 10.8k | } |
237 | 10.8k | } |
238 | | |
239 | | /// Splits the given register index into parts to access the internal bit array. |
240 | 9.72M | const fn split_index(reg: PReg) -> (usize, usize) { |
241 | 9.72M | let index = reg.index(); |
242 | 9.72M | (index >> Self::BITS.ilog2(), index & (Self::BITS - 1)) |
243 | 9.72M | } |
244 | | |
245 | | /// Returns whether the given register is part of the set. |
246 | 4.04M | pub fn contains(&self, reg: PReg) -> bool { |
247 | 4.04M | let (index, bit) = Self::split_index(reg); |
248 | 4.04M | self.bits[index] & (1 << bit) != 0 |
249 | 4.04M | } |
250 | | |
251 | | /// Add a physical register (PReg) to the set, returning the new value. |
252 | 4.67M | pub const fn with(self, reg: PReg) -> Self { |
253 | 4.67M | let (index, bit) = Self::split_index(reg); |
254 | 4.67M | let mut out = self; |
255 | 4.67M | out.bits[index] |= 1 << bit; |
256 | 4.67M | out |
257 | 4.67M | } |
258 | | |
259 | | /// Add a physical register (PReg) to the set. |
260 | 1.00M | pub fn add(&mut self, reg: PReg) { |
261 | 1.00M | let (index, bit) = Self::split_index(reg); |
262 | 1.00M | self.bits[index] |= 1 << bit; |
263 | 1.00M | } |
264 | | |
265 | | /// Remove a physical register (PReg) from the set. |
266 | 0 | pub fn remove(&mut self, reg: PReg) { |
267 | 0 | let (index, bit) = Self::split_index(reg); |
268 | 0 | self.bits[index] &= !(1 << bit); |
269 | 0 | } |
270 | | |
271 | | /// Add all of the registers in one set to this one, mutating in |
272 | | /// place. |
273 | 0 | pub fn union_from(&mut self, other: PRegSet) { |
274 | 0 | for i in 0..self.bits.len() { |
275 | 0 | self.bits[i] |= other.bits[i]; |
276 | 0 | } |
277 | 0 | } |
278 | | |
279 | 0 | pub fn intersect_from(&mut self, other: PRegSet) { |
280 | 0 | for i in 0..self.bits.len() { |
281 | 0 | self.bits[i] &= other.bits[i]; |
282 | 0 | } |
283 | 0 | } |
284 | | |
285 | 0 | pub fn invert(&self) -> PRegSet { |
286 | 0 | let mut set = self.bits; |
287 | 0 | for i in 0..self.bits.len() { |
288 | 0 | set[i] = !self.bits[i]; |
289 | 0 | } |
290 | 0 | PRegSet { bits: set } |
291 | 0 | } |
292 | | |
293 | 0 | pub fn is_empty(&self, regclass: RegClass) -> bool { |
294 | 0 | self.bits[regclass as usize] == 0 |
295 | 0 | } |
296 | | } |
297 | | |
298 | | impl core::ops::BitAnd<PRegSet> for PRegSet { |
299 | | type Output = PRegSet; |
300 | | |
301 | 0 | fn bitand(self, rhs: PRegSet) -> Self::Output { |
302 | 0 | let mut out = self; |
303 | 0 | out.intersect_from(rhs); |
304 | 0 | out |
305 | 0 | } |
306 | | } |
307 | | |
308 | | impl core::ops::BitOr<PRegSet> for PRegSet { |
309 | | type Output = PRegSet; |
310 | | |
311 | 0 | fn bitor(self, rhs: PRegSet) -> Self::Output { |
312 | 0 | let mut out = self; |
313 | 0 | out.union_from(rhs); |
314 | 0 | out |
315 | 0 | } |
316 | | } |
317 | | |
318 | | impl IntoIterator for PRegSet { |
319 | | type Item = PReg; |
320 | | type IntoIter = PRegSetIter; |
321 | 10.3M | fn into_iter(self) -> PRegSetIter { |
322 | 10.3M | PRegSetIter { |
323 | 10.3M | bits: self.bits, |
324 | 10.3M | cur: 0, |
325 | 10.3M | } |
326 | 10.3M | } |
327 | | } |
328 | | |
329 | | pub struct PRegSetIter { |
330 | | bits: [Bits; PRegSet::LEN], |
331 | | cur: usize, |
332 | | } |
333 | | |
334 | | impl Iterator for PRegSetIter { |
335 | | type Item = PReg; |
336 | 13.0M | fn next(&mut self) -> Option<PReg> { |
337 | | loop { |
338 | 54.3M | let bits = self.bits.get_mut(self.cur)?; |
339 | 44.0M | if *bits != 0 { |
340 | 2.75M | let bit = bits.trailing_zeros(); |
341 | 2.75M | *bits &= !(1 << bit); |
342 | 2.75M | let index = bit as usize + self.cur * PRegSet::BITS; |
343 | 2.75M | return Some(PReg::from_index(index)); |
344 | 41.2M | } |
345 | 41.2M | self.cur += 1; |
346 | | } |
347 | 13.0M | } |
348 | | } |
349 | | |
350 | | impl From<&MachineEnv> for PRegSet { |
351 | 0 | fn from(env: &MachineEnv) -> Self { |
352 | 0 | let mut res = Self::default(); |
353 | | |
354 | 0 | for class in env.preferred_regs_by_class.iter() { |
355 | 0 | for preg in class { |
356 | 0 | res.add(*preg) |
357 | | } |
358 | | } |
359 | | |
360 | 0 | for class in env.non_preferred_regs_by_class.iter() { |
361 | 0 | for preg in class { |
362 | 0 | res.add(*preg) |
363 | | } |
364 | | } |
365 | | |
366 | 0 | res |
367 | 0 | } |
368 | | } |
369 | | |
370 | | impl FromIterator<PReg> for PRegSet { |
371 | 0 | fn from_iter<T: IntoIterator<Item = PReg>>(iter: T) -> Self { |
372 | 0 | let mut set = Self::default(); |
373 | 0 | for preg in iter { |
374 | 0 | set.add(preg); |
375 | 0 | } |
376 | 0 | set |
377 | 0 | } |
378 | | } |
379 | | |
380 | | impl core::fmt::Display for PRegSet { |
381 | 0 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
382 | 0 | write!(f, "{{")?; |
383 | 0 | for preg in self.into_iter() { |
384 | 0 | write!(f, "{preg}, ")?; |
385 | | } |
386 | 0 | write!(f, "}}") |
387 | 0 | } |
388 | | } |
389 | | |
390 | | /// A virtual register. Contains a virtual register number and a |
391 | | /// class. |
392 | | /// |
393 | | /// A virtual register ("vreg") corresponds to an SSA value. All |
394 | | /// dataflow in the input program is specified via flow through a |
395 | | /// virtual register; even uses of specially-constrained locations, |
396 | | /// such as fixed physical registers, are done by using vregs, because |
397 | | /// we need the vreg's live range in order to track the use of that |
398 | | /// location. |
399 | | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] |
400 | | #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] |
401 | | pub struct VReg { |
402 | | bits: u32, |
403 | | } |
404 | | |
405 | | impl VReg { |
406 | | pub const MAX_BITS: usize = 21; |
407 | | pub const MAX: usize = (1 << Self::MAX_BITS) - 1; |
408 | | |
409 | | #[inline(always)] |
410 | 341M | pub const fn new(virt_reg: usize, class: RegClass) -> Self { |
411 | 341M | debug_assert!(virt_reg <= VReg::MAX); |
412 | 341M | VReg { |
413 | 341M | bits: ((virt_reg as u32) << 2) | (class as u8 as u32), |
414 | 341M | } |
415 | 341M | } |
416 | | |
417 | | #[inline(always)] |
418 | 390M | pub const fn vreg(self) -> usize { |
419 | 390M | let vreg = (self.bits >> 2) as usize; |
420 | 390M | vreg |
421 | 390M | } |
422 | | |
423 | | #[inline(always)] |
424 | 81.9M | pub const fn class(self) -> RegClass { |
425 | 81.9M | match self.bits & 0b11 { |
426 | 39.2M | 0 => RegClass::Int, |
427 | 11.7M | 1 => RegClass::Float, |
428 | 30.9M | 2 => RegClass::Vector, |
429 | 0 | _ => unreachable!(), |
430 | | } |
431 | 81.9M | } |
432 | | |
433 | | #[inline(always)] |
434 | 0 | pub const fn invalid() -> Self { |
435 | 0 | VReg::new(Self::MAX, RegClass::Int) |
436 | 0 | } |
437 | | |
438 | | #[inline(always)] |
439 | | pub const fn bits(self) -> usize { |
440 | | self.bits as usize |
441 | | } |
442 | | } |
443 | | |
444 | | impl From<u32> for VReg { |
445 | 0 | fn from(value: u32) -> Self { |
446 | 0 | Self { bits: value } |
447 | 0 | } |
448 | | } |
449 | | |
450 | | impl core::fmt::Debug for VReg { |
451 | 0 | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { |
452 | 0 | write!( |
453 | 0 | f, |
454 | 0 | "VReg(vreg = {}, class = {:?})", |
455 | 0 | self.vreg(), |
456 | 0 | self.class() |
457 | | ) |
458 | 0 | } |
459 | | } |
460 | | |
461 | | impl core::fmt::Display for VReg { |
462 | 3.12M | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { |
463 | 3.12M | write!(f, "v{}", self.vreg()) |
464 | 3.12M | } |
465 | | } |
466 | | |
467 | | /// A spillslot is a space in the stackframe used by the allocator to |
468 | | /// temporarily store a value. |
469 | | /// |
470 | | /// The allocator is responsible for allocating indices in this space, |
471 | | /// and will specify how many spillslots have been used when the |
472 | | /// allocation is completed. |
473 | | #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] |
474 | | #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] |
475 | | pub struct SpillSlot { |
476 | | bits: u32, |
477 | | } |
478 | | |
479 | | impl SpillSlot { |
480 | | /// The maximum spillslot index. |
481 | | pub const MAX: usize = (1 << 24) - 1; |
482 | | |
483 | | /// Create a new SpillSlot. |
484 | | #[inline(always)] |
485 | 195k | pub fn new(slot: usize) -> Self { |
486 | 195k | debug_assert!(slot <= Self::MAX); |
487 | 195k | SpillSlot { bits: slot as u32 } |
488 | 195k | } |
489 | | |
490 | | /// Get the spillslot index for this spillslot. |
491 | | #[inline(always)] |
492 | 980k | pub fn index(self) -> usize { |
493 | 980k | (self.bits & 0x00ffffff) as usize |
494 | 980k | } |
495 | | |
496 | | /// Get the spillslot `offset` slots away. |
497 | | #[inline(always)] |
498 | | pub fn plus(self, offset: usize) -> Self { |
499 | | SpillSlot::new(self.index() + offset) |
500 | | } |
501 | | |
502 | | /// Get the invalid spillslot, used for initializing data structures. |
503 | | #[inline(always)] |
504 | 0 | pub fn invalid() -> Self { |
505 | 0 | SpillSlot { bits: 0xffff_ffff } |
506 | 0 | } |
507 | | |
508 | | /// Is this the invalid spillslot? |
509 | | #[inline(always)] |
510 | 0 | pub fn is_invalid(self) -> bool { |
511 | 0 | self == Self::invalid() |
512 | 0 | } |
513 | | |
514 | | /// Is this a valid spillslot (not `SpillSlot::invalid()`)? |
515 | | #[inline(always)] |
516 | 0 | pub fn is_valid(self) -> bool { |
517 | 0 | self != Self::invalid() |
518 | 0 | } |
519 | | } |
520 | | |
521 | | impl core::fmt::Display for SpillSlot { |
522 | 980k | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { |
523 | 980k | write!(f, "stack{}", self.index()) |
524 | 980k | } |
525 | | } |
526 | | |
527 | | /// An `OperandConstraint` specifies where a vreg's value must be |
528 | | /// placed at a particular reference to that vreg via an |
529 | | /// `Operand`. The constraint may be loose -- "any register of a given |
530 | | /// class", for example -- or very specific, such as "this particular |
531 | | /// physical register". The allocator's result will always satisfy all |
532 | | /// given constraints; however, if the input has a combination of |
533 | | /// constraints that are impossible to satisfy, then allocation may |
534 | | /// fail or the allocator may panic (providing impossible constraints |
535 | | /// is usually a programming error in the client, rather than a |
536 | | /// function of bad input). |
537 | | #[derive(Clone, Copy, Debug, PartialEq, Eq)] |
538 | | #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] |
539 | | pub enum OperandConstraint { |
540 | | /// Any location is fine (register or stack slot). |
541 | | Any, |
542 | | /// Operand must be in a register. Register is read-only for Uses. |
543 | | Reg, |
544 | | /// Operand must be on the stack. |
545 | | Stack, |
546 | | /// Operand must be in a fixed register. |
547 | | FixedReg(PReg), |
548 | | /// On defs only: reuse a use's register. |
549 | | Reuse(usize), |
550 | | /// Operand must be in a specific range of registers. |
551 | | /// |
552 | | /// The contained `usize` indicates the (exclusive) upper limit of a |
553 | | /// register range, `n`. An operand with this constraint may allocate a |
554 | | /// register between `0 ..= n-1`. Due to encoding constraints, `n` must be a |
555 | | /// power of two and below 2^16. |
556 | | Limit(usize), |
557 | | } |
558 | | |
559 | | impl core::fmt::Display for OperandConstraint { |
560 | 2.67M | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { |
561 | 2.67M | match self { |
562 | 1.56M | Self::Any => write!(f, "any"), |
563 | 918k | Self::Reg => write!(f, "reg"), |
564 | 0 | Self::Stack => write!(f, "stack"), |
565 | 90.9k | Self::FixedReg(preg) => write!(f, "fixed({preg})"), |
566 | 99.7k | Self::Reuse(idx) => write!(f, "reuse({idx})"), |
567 | 0 | Self::Limit(max) => write!(f, "limit(0..={})", max - 1), |
568 | | } |
569 | 2.67M | } |
570 | | } |
571 | | |
572 | | /// The "kind" of the operand: whether it reads a vreg (Use) or writes |
573 | | /// a vreg (Def). |
574 | | #[derive(Clone, Copy, Debug, PartialEq, Eq)] |
575 | | #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] |
576 | | pub enum OperandKind { |
577 | | Def = 0, |
578 | | Use = 1, |
579 | | } |
580 | | |
581 | | /// The "position" of the operand: where it has its read/write |
582 | | /// effects. These are positions "in" the instruction, and "early" and |
583 | | /// "late" are relative to the instruction's main effect or |
584 | | /// computation. In other words, the allocator assumes that the |
585 | | /// instruction (i) performs all reads and writes of "early" operands, |
586 | | /// (ii) does its work, and (iii) performs all reads and writes of its |
587 | | /// "late" operands. |
588 | | /// |
589 | | /// A "write" (def) at "early" or a "read" (use) at "late" may be |
590 | | /// slightly nonsensical, given the above, if the read is necessary |
591 | | /// for the computation or the write is a result of it. A way to think |
592 | | /// of it is that the value (even if a result of execution) *could* |
593 | | /// have been read or written at the given location without causing |
594 | | /// any register-usage conflicts. In other words, these write-early or |
595 | | /// use-late operands ensure that the particular allocations are valid |
596 | | /// for longer than usual and that a register is not reused between |
597 | | /// the use (normally complete at "Early") and the def (normally |
598 | | /// starting at "Late"). See `Operand` for more. |
599 | | #[derive(Clone, Copy, Debug, PartialEq, Eq)] |
600 | | #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] |
601 | | pub enum OperandPos { |
602 | | Early = 0, |
603 | | Late = 1, |
604 | | } |
605 | | |
606 | | /// An `Operand` encodes everything about a mention of a register in |
607 | | /// an instruction: virtual register number, and any constraint that |
608 | | /// applies to the register at this program point. |
609 | | /// |
610 | | /// An Operand may be a use or def (this corresponds to `LUse` and |
611 | | /// `LAllocation` in Ion). |
612 | | /// |
613 | | /// Generally, regalloc2 considers operands to have their effects at |
614 | | /// one of two points that exist in an instruction: "Early" or |
615 | | /// "Late". All operands at a given program-point are assigned |
616 | | /// non-conflicting locations based on their constraints. Each operand |
617 | | /// has a "kind", one of use/def/mod, corresponding to |
618 | | /// read/write/read-write, respectively. |
619 | | /// |
620 | | /// Usually, an instruction's inputs will be "early uses" and outputs |
621 | | /// will be "late defs", though there are valid use-cases for other |
622 | | /// combinations too. For example, a single "instruction" seen by the |
623 | | /// regalloc that lowers into multiple machine instructions and reads |
624 | | /// some of its inputs after it starts to write outputs must either |
625 | | /// make those input(s) "late uses" or those output(s) "early defs" so |
626 | | /// that the conflict (overlap) is properly accounted for. See |
627 | | /// comments on the constructors below for more. |
628 | | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] |
629 | | #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] |
630 | | pub struct Operand { |
631 | | /// Bit-pack into 32 bits. |
632 | | /// |
633 | | /// constraint:7 kind:1 pos:1 class:2 vreg:21 |
634 | | /// |
635 | | /// where `constraint` is an `OperandConstraint`, `kind` is an |
636 | | /// `OperandKind`, `pos` is an `OperandPos`, `class` is a |
637 | | /// `RegClass`, and `vreg` is a vreg index. |
638 | | /// |
639 | | /// The constraints are encoded as follows: |
640 | | /// - 1xxxxxx => FixedReg(preg) |
641 | | /// - 01xxxxx => Reuse(index) |
642 | | /// - 001xxxx => Limit(max) |
643 | | /// - 0000000 => Any |
644 | | /// - 0000001 => Reg |
645 | | /// - 0000010 => Stack |
646 | | /// - _ => Unused for now |
647 | | bits: u32, |
648 | | } |
649 | | |
650 | | impl Operand { |
651 | | /// Construct a new operand. |
652 | | #[inline(always)] |
653 | 17.4M | pub fn new( |
654 | 17.4M | vreg: VReg, |
655 | 17.4M | constraint: OperandConstraint, |
656 | 17.4M | kind: OperandKind, |
657 | 17.4M | pos: OperandPos, |
658 | 17.4M | ) -> Self { |
659 | 17.4M | let constraint_field = match constraint { |
660 | 10.3M | OperandConstraint::Any => 0, |
661 | 5.82M | OperandConstraint::Reg => 1, |
662 | 0 | OperandConstraint::Stack => 2, |
663 | 706k | OperandConstraint::FixedReg(preg) => { |
664 | 706k | debug_assert_eq!(preg.class(), vreg.class()); |
665 | 706k | 0b1000000 | preg.hw_enc() as u32 |
666 | | } |
667 | 555k | OperandConstraint::Reuse(which) => { |
668 | 555k | debug_assert!(which <= 0b11111); |
669 | 555k | 0b0100000 | which as u32 |
670 | | } |
671 | 0 | OperandConstraint::Limit(max) => { |
672 | 0 | assert!(max.is_power_of_two()); |
673 | 0 | assert!( |
674 | 0 | max <= PReg::MAX + 1, |
675 | | "limit is larger than the allowed register encoding" |
676 | | ); |
677 | 0 | let log2 = max.ilog2(); |
678 | 0 | debug_assert!(log2 <= 0b1111); |
679 | 0 | 0b0010000 | log2 as u32 |
680 | | } |
681 | | }; |
682 | 17.4M | let class_field = vreg.class() as u8 as u32; |
683 | 17.4M | let pos_field = pos as u8 as u32; |
684 | 17.4M | let kind_field = kind as u8 as u32; |
685 | 17.4M | Operand { |
686 | 17.4M | bits: vreg.vreg() as u32 |
687 | 17.4M | | (class_field << 21) |
688 | 17.4M | | (pos_field << 23) |
689 | 17.4M | | (kind_field << 24) |
690 | 17.4M | | (constraint_field << 25), |
691 | 17.4M | } |
692 | 17.4M | } |
693 | | |
694 | | /// Create an `Operand` that designates a use of a VReg that must |
695 | | /// be in a register, and that is used at the "before" point, |
696 | | /// i.e., can be overwritten by a result. |
697 | | #[inline(always)] |
698 | | pub fn reg_use(vreg: VReg) -> Self { |
699 | | Operand::new( |
700 | | vreg, |
701 | | OperandConstraint::Reg, |
702 | | OperandKind::Use, |
703 | | OperandPos::Early, |
704 | | ) |
705 | | } |
706 | | |
707 | | /// Create an `Operand` that designates a use of a VReg that must |
708 | | /// be in a register, and that is used up until the "after" point, |
709 | | /// i.e., must not conflict with any results. |
710 | | #[inline(always)] |
711 | | pub fn reg_use_at_end(vreg: VReg) -> Self { |
712 | | Operand::new( |
713 | | vreg, |
714 | | OperandConstraint::Reg, |
715 | | OperandKind::Use, |
716 | | OperandPos::Late, |
717 | | ) |
718 | | } |
719 | | |
720 | | /// Create an `Operand` that designates a definition of a VReg |
721 | | /// that must be in a register, and that occurs at the "after" |
722 | | /// point, i.e. may reuse a register that carried a use into this |
723 | | /// instruction. |
724 | | #[inline(always)] |
725 | | pub fn reg_def(vreg: VReg) -> Self { |
726 | | Operand::new( |
727 | | vreg, |
728 | | OperandConstraint::Reg, |
729 | | OperandKind::Def, |
730 | | OperandPos::Late, |
731 | | ) |
732 | | } |
733 | | |
734 | | /// Create an `Operand` that designates a definition of a VReg |
735 | | /// that must be in a register, and that occurs early at the |
736 | | /// "before" point, i.e., must not conflict with any input to the |
737 | | /// instruction. |
738 | | /// |
739 | | /// Note that the register allocator will ensure that such an |
740 | | /// early-def operand is live throughout the instruction, i.e., also |
741 | | /// at the after-point. Hence it will also avoid conflicts with all |
742 | | /// outputs to the instruction. As such, early defs are appropriate |
743 | | /// for use as "temporary registers" that an instruction can use |
744 | | /// throughout its execution separately from the inputs and outputs. |
745 | | #[inline(always)] |
746 | | pub fn reg_def_at_start(vreg: VReg) -> Self { |
747 | | Operand::new( |
748 | | vreg, |
749 | | OperandConstraint::Reg, |
750 | | OperandKind::Def, |
751 | | OperandPos::Early, |
752 | | ) |
753 | | } |
754 | | |
755 | | /// Create an `Operand` that designates a def (and use) of a |
756 | | /// temporary *within* the instruction. This register is assumed |
757 | | /// to be written by the instruction, and will not conflict with |
758 | | /// any input or output, but should not be used after the |
759 | | /// instruction completes. |
760 | | /// |
761 | | /// Note that within a single instruction, the dedicated scratch |
762 | | /// register (as specified in the `MachineEnv`) is also always |
763 | | /// available for use. The register allocator may use the register |
764 | | /// *between* instructions in order to implement certain sequences |
765 | | /// of moves, but will never hold a value live in the scratch |
766 | | /// register across an instruction. |
767 | | #[inline(always)] |
768 | | pub fn reg_temp(vreg: VReg) -> Self { |
769 | | // For now a temp is equivalent to a def-at-start operand, |
770 | | // which gives the desired semantics but does not enforce the |
771 | | // "not reused later" constraint. |
772 | | Operand::new( |
773 | | vreg, |
774 | | OperandConstraint::Reg, |
775 | | OperandKind::Def, |
776 | | OperandPos::Early, |
777 | | ) |
778 | | } |
779 | | |
780 | | /// Create an `Operand` that designates a def of a vreg that must |
781 | | /// reuse the register assigned to an input to the |
782 | | /// instruction. The input is identified by `idx` (is the `idx`th |
783 | | /// `Operand` for the instruction) and must be constraint to a |
784 | | /// register, i.e., be the result of `Operand::reg_use(vreg)`. |
785 | | #[inline(always)] |
786 | | pub fn reg_reuse_def(vreg: VReg, idx: usize) -> Self { |
787 | | Operand::new( |
788 | | vreg, |
789 | | OperandConstraint::Reuse(idx), |
790 | | OperandKind::Def, |
791 | | OperandPos::Late, |
792 | | ) |
793 | | } |
794 | | |
795 | | /// Create an `Operand` that designates a use of a vreg and |
796 | | /// ensures that it is placed in the given, fixed PReg at the |
797 | | /// use. It is guaranteed that the `Allocation` resulting for this |
798 | | /// operand will be `preg`. |
799 | | #[inline(always)] |
800 | | pub fn reg_fixed_use(vreg: VReg, preg: PReg) -> Self { |
801 | | Operand::new( |
802 | | vreg, |
803 | | OperandConstraint::FixedReg(preg), |
804 | | OperandKind::Use, |
805 | | OperandPos::Early, |
806 | | ) |
807 | | } |
808 | | |
809 | | /// Create an `Operand` that designates a def of a vreg and |
810 | | /// ensures that it is placed in the given, fixed PReg at the |
811 | | /// def. It is guaranteed that the `Allocation` resulting for this |
812 | | /// operand will be `preg`. |
813 | | #[inline(always)] |
814 | | pub fn reg_fixed_def(vreg: VReg, preg: PReg) -> Self { |
815 | | Operand::new( |
816 | | vreg, |
817 | | OperandConstraint::FixedReg(preg), |
818 | | OperandKind::Def, |
819 | | OperandPos::Late, |
820 | | ) |
821 | | } |
822 | | |
823 | | /// Same as `reg_fixed_use` but at `OperandPos::Late`. |
824 | | #[inline(always)] |
825 | | pub fn reg_fixed_use_at_end(vreg: VReg, preg: PReg) -> Self { |
826 | | Operand::new( |
827 | | vreg, |
828 | | OperandConstraint::FixedReg(preg), |
829 | | OperandKind::Use, |
830 | | OperandPos::Late, |
831 | | ) |
832 | | } |
833 | | |
834 | | /// Same as `reg_fixed_def` but at `OperandPos::Early`. |
835 | | #[inline(always)] |
836 | | pub fn reg_fixed_def_at_start(vreg: VReg, preg: PReg) -> Self { |
837 | | Operand::new( |
838 | | vreg, |
839 | | OperandConstraint::FixedReg(preg), |
840 | | OperandKind::Def, |
841 | | OperandPos::Early, |
842 | | ) |
843 | | } |
844 | | |
845 | | /// Create an `Operand` that designates a use of a vreg and places |
846 | | /// no constraints on its location (i.e., it can be allocated into |
847 | | /// either a register or on the stack). |
848 | | #[inline(always)] |
849 | | pub fn any_use(vreg: VReg) -> Self { |
850 | | Operand::new( |
851 | | vreg, |
852 | | OperandConstraint::Any, |
853 | | OperandKind::Use, |
854 | | OperandPos::Early, |
855 | | ) |
856 | | } |
857 | | |
858 | | /// Create an `Operand` that designates a def of a vreg and places |
859 | | /// no constraints on its location (i.e., it can be allocated into |
860 | | /// either a register or on the stack). |
861 | | #[inline(always)] |
862 | 3.60M | pub fn any_def(vreg: VReg) -> Self { |
863 | 3.60M | Operand::new( |
864 | 3.60M | vreg, |
865 | 3.60M | OperandConstraint::Any, |
866 | 3.60M | OperandKind::Def, |
867 | 3.60M | OperandPos::Late, |
868 | | ) |
869 | 3.60M | } |
870 | | |
871 | | /// Create an `Operand` that always results in an assignment to the |
872 | | /// given fixed `preg`, *without* tracking liveranges in that |
873 | | /// `preg`. Must only be used for non-allocatable registers. |
874 | | #[inline(always)] |
875 | 80.9k | pub fn fixed_nonallocatable(preg: PReg) -> Self { |
876 | 80.9k | Operand::new( |
877 | 80.9k | VReg::new(VReg::MAX, preg.class()), |
878 | 80.9k | OperandConstraint::FixedReg(preg), |
879 | 80.9k | OperandKind::Use, |
880 | 80.9k | OperandPos::Early, |
881 | | ) |
882 | 80.9k | } |
883 | | |
884 | | /// Get the virtual register designated by an operand. Every |
885 | | /// operand must name some virtual register, even if it constrains |
886 | | /// the operand to a fixed physical register as well; the vregs |
887 | | /// are used to track dataflow. |
888 | | #[inline(always)] |
889 | 316M | pub fn vreg(self) -> VReg { |
890 | 316M | let vreg_idx = ((self.bits as usize) & VReg::MAX) as usize; |
891 | 316M | VReg::new(vreg_idx, self.class()) |
892 | 316M | } |
893 | | |
894 | | /// Get the register class used by this operand. |
895 | | #[inline(always)] |
896 | 337M | pub fn class(self) -> RegClass { |
897 | 337M | let class_field = (self.bits >> 21) & 3; |
898 | 337M | match class_field { |
899 | 151M | 0 => RegClass::Int, |
900 | 53.8M | 1 => RegClass::Float, |
901 | 131M | 2 => RegClass::Vector, |
902 | 0 | _ => unreachable!(), |
903 | | } |
904 | 337M | } |
905 | | |
906 | | /// Get the "kind" of this operand: a definition (write) or a use |
907 | | /// (read). |
908 | | #[inline(always)] |
909 | 251M | pub fn kind(self) -> OperandKind { |
910 | 251M | let kind_field = (self.bits >> 24) & 1; |
911 | 251M | match kind_field { |
912 | 121M | 0 => OperandKind::Def, |
913 | 129M | 1 => OperandKind::Use, |
914 | 0 | _ => unreachable!(), |
915 | | } |
916 | 251M | } |
917 | | |
918 | | /// Get the "position" of this operand, i.e., where its read |
919 | | /// and/or write occurs: either before the instruction executes, |
920 | | /// or after it does. Ordinarily, uses occur at "before" and defs |
921 | | /// at "after", though there are cases where this is not true. |
922 | | #[inline(always)] |
923 | 165M | pub fn pos(self) -> OperandPos { |
924 | 165M | let pos_field = (self.bits >> 23) & 1; |
925 | 165M | match pos_field { |
926 | 114M | 0 => OperandPos::Early, |
927 | 50.3M | 1 => OperandPos::Late, |
928 | 0 | _ => unreachable!(), |
929 | | } |
930 | 165M | } |
931 | | |
932 | | /// Get the "constraint" of this operand, i.e., what requirements |
933 | | /// its allocation must fulfill. |
934 | | #[inline(always)] |
935 | 387M | pub fn constraint(self) -> OperandConstraint { |
936 | 387M | let constraint_field = ((self.bits >> 25) as usize) & 0b1111111; |
937 | 387M | if constraint_field & 0b1000000 != 0 { |
938 | 15.6M | OperandConstraint::FixedReg(PReg::new(constraint_field & 0b0111111, self.class())) |
939 | 371M | } else if constraint_field & 0b0100000 != 0 { |
940 | 12.8M | OperandConstraint::Reuse(constraint_field & 0b0011111) |
941 | 358M | } else if constraint_field & 0b0010000 != 0 { |
942 | 0 | OperandConstraint::Limit(1 << (constraint_field & 0b0001111)) |
943 | | } else { |
944 | 358M | match constraint_field { |
945 | 237M | 0 => OperandConstraint::Any, |
946 | 121M | 1 => OperandConstraint::Reg, |
947 | 0 | 2 => OperandConstraint::Stack, |
948 | 0 | _ => unreachable!(), |
949 | | } |
950 | | } |
951 | 387M | } |
952 | | |
953 | | /// If this operand is for a fixed non-allocatable register (see |
954 | | /// [`Operand::fixed_nonallocatable`]), then returns the physical register that it will |
955 | | /// be assigned to. |
956 | | #[inline(always)] |
957 | 144M | pub fn as_fixed_nonallocatable(self) -> Option<PReg> { |
958 | 144M | match self.constraint() { |
959 | 5.37M | OperandConstraint::FixedReg(preg) if self.vreg().vreg() == VReg::MAX => Some(preg), |
960 | 143M | _ => None, |
961 | | } |
962 | 144M | } |
963 | | |
964 | | /// Get the raw 32-bit encoding of this operand's fields. |
965 | | #[inline(always)] |
966 | | pub fn bits(self) -> u32 { |
967 | | self.bits |
968 | | } |
969 | | |
970 | | /// Construct an `Operand` from the raw 32-bit encoding returned |
971 | | /// from `bits()`. |
972 | | #[inline(always)] |
973 | | pub fn from_bits(bits: u32) -> Self { |
974 | | debug_assert!(bits >> 29 <= 4); |
975 | | Operand { bits } |
976 | | } |
977 | | } |
978 | | |
979 | | impl core::fmt::Debug for Operand { |
980 | 0 | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { |
981 | 0 | core::fmt::Display::fmt(self, f) |
982 | 0 | } |
983 | | } |
984 | | |
985 | | impl core::fmt::Display for Operand { |
986 | 2.68M | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { |
987 | 2.68M | if let Some(preg) = self.as_fixed_nonallocatable() { |
988 | 14.5k | return write!(f, "Fixed: {preg}"); |
989 | 2.67M | } |
990 | 2.67M | match (self.kind(), self.pos()) { |
991 | | (OperandKind::Def, OperandPos::Late) | (OperandKind::Use, OperandPos::Early) => { |
992 | 2.49M | write!(f, "{:?}", self.kind())?; |
993 | | } |
994 | | _ => { |
995 | 183k | write!(f, "{:?}@{:?}", self.kind(), self.pos())?; |
996 | | } |
997 | | } |
998 | 2.67M | write!( |
999 | 2.67M | f, |
1000 | 2.67M | ": {}{} {}", |
1001 | 2.67M | self.vreg(), |
1002 | 2.67M | match self.class() { |
1003 | 980k | RegClass::Int => "i", |
1004 | 710k | RegClass::Float => "f", |
1005 | 982k | RegClass::Vector => "v", |
1006 | | }, |
1007 | 2.67M | self.constraint() |
1008 | | ) |
1009 | 2.68M | } |
1010 | | } |
1011 | | |
1012 | | /// An Allocation represents the end result of regalloc for an |
1013 | | /// Operand. |
1014 | | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] |
1015 | | #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] |
1016 | | pub struct Allocation { |
1017 | | /// Bit-pack in 32 bits. |
1018 | | /// |
1019 | | /// kind:3 unused:1 index:28 |
1020 | | bits: u32, |
1021 | | } |
1022 | | |
1023 | | impl core::fmt::Debug for Allocation { |
1024 | 0 | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { |
1025 | 0 | core::fmt::Display::fmt(self, f) |
1026 | 0 | } |
1027 | | } |
1028 | | |
1029 | | impl core::fmt::Display for Allocation { |
1030 | 7.08M | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { |
1031 | 7.08M | match self.kind() { |
1032 | 0 | AllocationKind::None => write!(f, "none"), |
1033 | 6.10M | AllocationKind::Reg => write!(f, "{}", self.as_reg().unwrap()), |
1034 | 980k | AllocationKind::Stack => write!(f, "{}", self.as_stack().unwrap()), |
1035 | | } |
1036 | 7.08M | } |
1037 | | } |
1038 | | |
1039 | | impl Allocation { |
1040 | | /// Construct a new Allocation. |
1041 | | #[inline(always)] |
1042 | 90.4M | pub(crate) fn new(kind: AllocationKind, index: usize) -> Self { |
1043 | 90.4M | debug_assert!(index < (1 << 28)); |
1044 | 90.4M | Self { |
1045 | 90.4M | bits: ((kind as u8 as u32) << 29) | (index as u32), |
1046 | 90.4M | } |
1047 | 90.4M | } |
1048 | | |
1049 | | /// Get the "none" allocation, which is distinct from the other |
1050 | | /// possibilities and is used to initialize data structures. |
1051 | | #[inline(always)] |
1052 | 67.6M | pub fn none() -> Allocation { |
1053 | 67.6M | Allocation::new(AllocationKind::None, 0) |
1054 | 67.6M | } |
1055 | | |
1056 | | /// Create an allocation into a register. |
1057 | | #[inline(always)] |
1058 | 22.6M | pub fn reg(preg: PReg) -> Allocation { |
1059 | 22.6M | Allocation::new(AllocationKind::Reg, preg.index()) |
1060 | 22.6M | } |
1061 | | |
1062 | | /// Create an allocation into a spillslot. |
1063 | | #[inline(always)] |
1064 | 195k | pub fn stack(slot: SpillSlot) -> Allocation { |
1065 | 195k | Allocation::new(AllocationKind::Stack, slot.bits as usize) |
1066 | 195k | } |
1067 | | |
1068 | | /// Get the allocation's "kind": none, register, or stack (spillslot). |
1069 | | #[inline(always)] |
1070 | 39.2M | pub fn kind(self) -> AllocationKind { |
1071 | 39.2M | match (self.bits >> 29) & 7 { |
1072 | 1.55M | 0 => AllocationKind::None, |
1073 | 32.7M | 1 => AllocationKind::Reg, |
1074 | 4.98M | 2 => AllocationKind::Stack, |
1075 | 0 | _ => unreachable!(), |
1076 | | } |
1077 | 39.2M | } |
1078 | | |
1079 | | /// Is the allocation "none"? |
1080 | | #[inline(always)] |
1081 | 13.1k | pub fn is_none(self) -> bool { |
1082 | 13.1k | self.kind() == AllocationKind::None |
1083 | 13.1k | } |
1084 | | |
1085 | | /// Is the allocation not "none"? |
1086 | | #[inline(always)] |
1087 | 1.54M | pub fn is_some(self) -> bool { |
1088 | 1.54M | self.kind() != AllocationKind::None |
1089 | 1.54M | } |
1090 | | |
1091 | | /// Is the allocation a register? |
1092 | | #[inline(always)] |
1093 | 5.25M | pub fn is_reg(self) -> bool { |
1094 | 5.25M | self.kind() == AllocationKind::Reg |
1095 | 5.25M | } |
1096 | | |
1097 | | /// Is the allocation on the stack (a spillslot)? |
1098 | | #[inline(always)] |
1099 | 785k | pub fn is_stack(self) -> bool { |
1100 | 785k | self.kind() == AllocationKind::Stack |
1101 | 785k | } |
1102 | | |
1103 | | /// Get the index of the spillslot or register. If register, this |
1104 | | /// is an index that can be used by `PReg::from_index()`. |
1105 | | #[inline(always)] |
1106 | 23.2M | pub fn index(self) -> usize { |
1107 | 23.2M | (self.bits & ((1 << 28) - 1)) as usize |
1108 | 23.2M | } |
1109 | | |
1110 | | /// Get the allocation as a physical register, if any. |
1111 | | #[inline(always)] |
1112 | 23.5M | pub fn as_reg(self) -> Option<PReg> { |
1113 | 23.5M | if self.kind() == AllocationKind::Reg { |
1114 | 22.2M | Some(PReg::from_index(self.index())) |
1115 | | } else { |
1116 | 1.35M | None |
1117 | | } |
1118 | 23.5M | } |
1119 | | |
1120 | | /// Get the allocation as a spillslot, if any. |
1121 | | #[inline(always)] |
1122 | 980k | pub fn as_stack(self) -> Option<SpillSlot> { |
1123 | 980k | if self.kind() == AllocationKind::Stack { |
1124 | 980k | Some(SpillSlot { |
1125 | 980k | bits: self.index() as u32, |
1126 | 980k | }) |
1127 | | } else { |
1128 | 0 | None |
1129 | | } |
1130 | 980k | } |
1131 | | |
1132 | | /// Get the raw bits for the packed encoding of this allocation. |
1133 | | #[inline(always)] |
1134 | 6.55M | pub fn bits(self) -> u32 { |
1135 | 6.55M | self.bits |
1136 | 6.55M | } |
1137 | | |
1138 | | /// Construct an allocation from its packed encoding. |
1139 | | #[inline(always)] |
1140 | | pub fn from_bits(bits: u32) -> Self { |
1141 | | debug_assert!(bits >> 29 >= 5); |
1142 | | Self { bits } |
1143 | | } |
1144 | | } |
1145 | | |
1146 | | /// An allocation is one of two "kinds" (or "none"): register or |
1147 | | /// spillslot/stack. |
1148 | | #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] |
1149 | | #[repr(u8)] |
1150 | | #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] |
1151 | | pub enum AllocationKind { |
1152 | | None = 0, |
1153 | | Reg = 1, |
1154 | | Stack = 2, |
1155 | | } |
1156 | | |
1157 | | /// A trait defined by the regalloc client to provide access to its |
1158 | | /// machine-instruction / CFG representation. |
1159 | | /// |
1160 | | /// (This trait's design is inspired by, and derives heavily from, the |
1161 | | /// trait of the same name in regalloc.rs.) |
1162 | | /// |
1163 | | /// The ids used in [`Block`] and [`VReg`] must start their numbering |
1164 | | /// at zero and be sequential. |
1165 | | pub trait Function { |
1166 | | // ------------- |
1167 | | // CFG traversal |
1168 | | // ------------- |
1169 | | |
1170 | | /// How many instructions are there? |
1171 | | fn num_insts(&self) -> usize; |
1172 | | |
1173 | | /// How many blocks are there? |
1174 | | fn num_blocks(&self) -> usize; |
1175 | | |
1176 | | /// Get the index of the entry block. |
1177 | | fn entry_block(&self) -> Block; |
1178 | | |
1179 | | /// Provide the range of instruction indices contained in each block. |
1180 | | fn block_insns(&self, block: Block) -> InstRange; |
1181 | | |
1182 | | /// Get CFG successors for a given block. |
1183 | | fn block_succs(&self, block: Block) -> &[Block]; |
1184 | | |
1185 | | /// Get the CFG predecessors for a given block. |
1186 | | fn block_preds(&self, block: Block) -> &[Block]; |
1187 | | |
1188 | | /// Get the block parameters for a given block. |
1189 | | fn block_params(&self, block: Block) -> &[VReg]; |
1190 | | |
1191 | | /// Determine whether an instruction is a return instruction. |
1192 | | fn is_ret(&self, insn: Inst) -> bool; |
1193 | | |
1194 | | /// Determine whether an instruction is the end-of-block |
1195 | | /// branch. |
1196 | | fn is_branch(&self, insn: Inst) -> bool; |
1197 | | |
1198 | | /// If `insn` is a branch at the end of `block`, returns the |
1199 | | /// outgoing blockparam arguments for the given successor. The |
1200 | | /// number of arguments must match the number incoming blockparams |
1201 | | /// for each respective successor block. |
1202 | | fn branch_blockparams(&self, block: Block, insn: Inst, succ_idx: usize) -> &[VReg]; |
1203 | | |
1204 | | // -------------------------- |
1205 | | // Instruction register slots |
1206 | | // -------------------------- |
1207 | | |
1208 | | /// Get the Operands for an instruction. |
1209 | | fn inst_operands(&self, insn: Inst) -> &[Operand]; |
1210 | | |
1211 | | /// Get the clobbers for an instruction; these are the registers |
1212 | | /// that, after the instruction has executed, hold values that are |
1213 | | /// arbitrary, separately from the usual outputs to the |
1214 | | /// instruction. It is invalid to read a register that has been |
1215 | | /// clobbered; the register allocator is free to assume that |
1216 | | /// clobbered registers are filled with garbage and available for |
1217 | | /// reuse. It will avoid storing any value in a clobbered register |
1218 | | /// that must be live across the instruction. |
1219 | | /// |
1220 | | /// Another way of seeing this is that a clobber is equivalent to |
1221 | | /// a "late def" of a fresh vreg that is not used anywhere else |
1222 | | /// in the program, with a fixed-register constraint that places |
1223 | | /// it in a given PReg chosen by the client prior to regalloc. |
1224 | | /// |
1225 | | /// Every register written by an instruction must either |
1226 | | /// correspond to (be assigned to) an Operand of kind `Def`, or |
1227 | | /// else must be a "clobber". |
1228 | | /// |
1229 | | /// This can be used to, for example, describe ABI-specified |
1230 | | /// registers that are not preserved by a call instruction, or |
1231 | | /// fixed physical registers written by an instruction but not |
1232 | | /// used as a vreg output, or fixed physical registers used as |
1233 | | /// temps within an instruction out of necessity. |
1234 | | /// |
1235 | | /// Note that clobbers and defs and late-uses must not collide: it |
1236 | | /// is illegal to name the same physical register both as a clobber |
1237 | | /// and in a fixed-register constraint on a def or late use. |
1238 | | /// Internally, clobbers are modeled as defs (of throwaway vregs) at |
1239 | | /// the instruction's late-point constrained to the named register. |
1240 | | fn inst_clobbers(&self, insn: Inst) -> PRegSet; |
1241 | | |
1242 | | /// Get the number of `VReg` in use in this function. |
1243 | | fn num_vregs(&self) -> usize; |
1244 | | |
1245 | | /// Get the VRegs for which we should generate value-location |
1246 | | /// metadata for debugging purposes. This can be used to generate |
1247 | | /// e.g. DWARF with valid prgram-point ranges for each value |
1248 | | /// expression in a way that is more efficient than a post-hoc |
1249 | | /// analysis of the allocator's output. |
1250 | | /// |
1251 | | /// Each tuple is (vreg, inclusive_start, exclusive_end, |
1252 | | /// label). In the `Output` there will be (label, inclusive_start, |
1253 | | /// exclusive_end, alloc)` tuples. The ranges may not exactly |
1254 | | /// match -- specifically, the returned metadata may cover only a |
1255 | | /// subset of the requested ranges -- if the value is not live for |
1256 | | /// the entire requested ranges. |
1257 | | /// |
1258 | | /// The instruction indices imply a program point just *before* |
1259 | | /// the instruction. |
1260 | | /// |
1261 | | /// Precondition: we require this slice to be sorted by vreg. |
1262 | | fn debug_value_labels(&self) -> &[(VReg, Inst, Inst, u32)] { |
1263 | | &[] |
1264 | | } |
1265 | | |
1266 | | // -------------- |
1267 | | // Spills/reloads |
1268 | | // -------------- |
1269 | | |
1270 | | /// How many logical spill slots does the given regclass require? E.g., on |
1271 | | /// a 64-bit machine, spill slots may nominally be 64-bit words, but a |
1272 | | /// 128-bit vector value will require two slots. The regalloc will always |
1273 | | /// align on this size. |
1274 | | /// |
1275 | | /// (This trait method's design and doc text derives from |
1276 | | /// regalloc.rs' trait of the same name.) |
1277 | | fn spillslot_size(&self, regclass: RegClass) -> usize; |
1278 | | |
1279 | | /// When providing a spillslot number for a multi-slot spillslot, |
1280 | | /// do we provide the first or the last? This is usually related |
1281 | | /// to which direction the stack grows and different clients may |
1282 | | /// have different preferences. |
1283 | 195k | fn multi_spillslot_named_by_last_slot(&self) -> bool { |
1284 | 195k | false |
1285 | 195k | } |
1286 | | |
1287 | | // ----------- |
1288 | | // Misc config |
1289 | | // ----------- |
1290 | | |
1291 | | /// Allow a single instruction to define a vreg multiple times. If |
1292 | | /// allowed, the semantics are as if the definition occurs only |
1293 | | /// once, and all defs will get the same alloc. This flexibility is |
1294 | | /// meant to allow the embedder to more easily aggregate operands |
1295 | | /// together in macro/pseudoinstructions, or e.g. add additional |
1296 | | /// clobbered vregs without taking care to deduplicate. This may be |
1297 | | /// particularly useful when referring to physical registers via |
1298 | | /// pinned vregs. It is optional functionality because a strict mode |
1299 | | /// (at most one def per vreg) is also useful for finding bugs in |
1300 | | /// other applications. |
1301 | 12.6M | fn allow_multiple_vreg_defs(&self) -> bool { |
1302 | 12.6M | false |
1303 | 12.6M | } |
1304 | | } |
1305 | | |
1306 | | /// A position before or after an instruction at which we can make an |
1307 | | /// edit. |
1308 | | /// |
1309 | | /// Note that this differs from `OperandPos` in that the former |
1310 | | /// describes specifically a constraint on an operand, while this |
1311 | | /// describes a program point. `OperandPos` could grow more options in |
1312 | | /// the future, for example if we decide that an "early write" or |
1313 | | /// "late read" phase makes sense, while `InstPosition` will always |
1314 | | /// describe these two insertion points. |
1315 | | #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] |
1316 | | #[repr(u8)] |
1317 | | #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] |
1318 | | pub enum InstPosition { |
1319 | | Before = 0, |
1320 | | After = 1, |
1321 | | } |
1322 | | |
1323 | | /// A program point: a single point before or after a given instruction. |
1324 | | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] |
1325 | | #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] |
1326 | | pub struct ProgPoint { |
1327 | | bits: u32, |
1328 | | } |
1329 | | |
1330 | | impl core::fmt::Debug for ProgPoint { |
1331 | 0 | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { |
1332 | 0 | write!( |
1333 | 0 | f, |
1334 | 0 | "progpoint{}{}", |
1335 | 0 | self.inst().index(), |
1336 | 0 | match self.pos() { |
1337 | 0 | InstPosition::Before => "-pre", |
1338 | 0 | InstPosition::After => "-post", |
1339 | | } |
1340 | | ) |
1341 | 0 | } |
1342 | | } |
1343 | | |
1344 | | impl ProgPoint { |
1345 | | /// Create a new ProgPoint before or after the given instruction. |
1346 | | #[inline(always)] |
1347 | 124M | pub fn new(inst: Inst, pos: InstPosition) -> Self { |
1348 | 124M | let bits = ((inst.0 as u32) << 1) | (pos as u8 as u32); |
1349 | 124M | Self { bits } |
1350 | 124M | } |
1351 | | |
1352 | | /// Create a new ProgPoint before the given instruction. |
1353 | | #[inline(always)] |
1354 | 103M | pub fn before(inst: Inst) -> Self { |
1355 | 103M | Self::new(inst, InstPosition::Before) |
1356 | 103M | } |
1357 | | |
1358 | | /// Create a new ProgPoint after the given instruction. |
1359 | | #[inline(always)] |
1360 | 21.0M | pub fn after(inst: Inst) -> Self { |
1361 | 21.0M | Self::new(inst, InstPosition::After) |
1362 | 21.0M | } |
1363 | | |
1364 | | /// Get the instruction that this ProgPoint is before or after. |
1365 | | #[inline(always)] |
1366 | 132M | pub fn inst(self) -> Inst { |
1367 | | // Cast to i32 to do an arithmetic right-shift, which will |
1368 | | // preserve an `Inst::invalid()` (which is -1, or all-ones). |
1369 | 132M | Inst::new(((self.bits as i32) >> 1) as usize) |
1370 | 132M | } |
1371 | | |
1372 | | /// Get the "position" (Before or After) relative to the |
1373 | | /// instruction. |
1374 | | #[inline(always)] |
1375 | 40.6M | pub fn pos(self) -> InstPosition { |
1376 | 40.6M | match self.bits & 1 { |
1377 | 24.4M | 0 => InstPosition::Before, |
1378 | 16.2M | 1 => InstPosition::After, |
1379 | 0 | _ => unreachable!(), |
1380 | | } |
1381 | 40.6M | } |
1382 | | |
1383 | | /// Get the "next" program point: for After, this is the Before of |
1384 | | /// the next instruction, while for Before, this is After of the |
1385 | | /// same instruction. |
1386 | | #[inline(always)] |
1387 | 41.8M | pub fn next(self) -> ProgPoint { |
1388 | 41.8M | Self { |
1389 | 41.8M | bits: self.bits + 1, |
1390 | 41.8M | } |
1391 | 41.8M | } |
1392 | | |
1393 | | /// Get the "previous" program point, the inverse of `.next()` |
1394 | | /// above. |
1395 | | #[inline(always)] |
1396 | 472k | pub fn prev(self) -> ProgPoint { |
1397 | 472k | Self { |
1398 | 472k | bits: self.bits - 1, |
1399 | 472k | } |
1400 | 472k | } |
1401 | | |
1402 | | /// Convert to a raw encoding in 32 bits. |
1403 | | #[inline(always)] |
1404 | 374M | pub fn to_index(self) -> u32 { |
1405 | 374M | self.bits |
1406 | 374M | } |
1407 | | |
1408 | | /// Construct from the raw 32-bit encoding. |
1409 | | #[inline(always)] |
1410 | 34.7M | pub fn from_index(index: u32) -> Self { |
1411 | 34.7M | Self { bits: index } |
1412 | 34.7M | } |
1413 | | |
1414 | | #[inline(always)] |
1415 | 0 | pub fn invalid() -> Self { |
1416 | 0 | Self::before(Inst::new(usize::MAX)) |
1417 | 0 | } |
1418 | | } |
1419 | | |
1420 | | /// An instruction to insert into the program to perform some data movement. |
1421 | | #[derive(Clone, Debug)] |
1422 | | #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] |
1423 | | pub enum Edit { |
1424 | | /// Move one allocation to another. Each allocation may be a |
1425 | | /// register or a stack slot (spillslot). However, stack-to-stack |
1426 | | /// moves will never be generated. |
1427 | | /// |
1428 | | /// `Move` edits will be generated even if src and dst allocation |
1429 | | /// are the same if the vreg changes; this allows proper metadata |
1430 | | /// tracking even when moves are elided. |
1431 | | Move { from: Allocation, to: Allocation }, |
1432 | | } |
1433 | | |
1434 | | /// Wrapper around either an original instruction or an inserted edit. |
1435 | | #[derive(Clone, Debug)] |
1436 | | pub enum InstOrEdit<'a> { |
1437 | | Inst(Inst), |
1438 | | Edit(&'a Edit), |
1439 | | } |
1440 | | |
1441 | | /// Iterator over the instructions and edits in a block. |
1442 | | pub struct OutputIter<'a> { |
1443 | | /// List of edits starting at the first for the current block. |
1444 | | edits: &'a [(ProgPoint, Edit)], |
1445 | | |
1446 | | /// Remaining instructions in the current block. |
1447 | | inst_range: InstRange, |
1448 | | } |
1449 | | |
1450 | | impl<'a> Iterator for OutputIter<'a> { |
1451 | | type Item = InstOrEdit<'a>; |
1452 | | |
1453 | 6.55M | fn next(&mut self) -> Option<InstOrEdit<'a>> { |
1454 | | // There can't be any edits after the last instruction in a block, so |
1455 | | // we don't need to worry about that case. |
1456 | 6.55M | if self.inst_range.len() == 0 { |
1457 | 473k | return None; |
1458 | 6.08M | } |
1459 | | |
1460 | | // Return any edits that happen before the next instruction first. |
1461 | 6.08M | let next_inst = self.inst_range.first(); |
1462 | 6.08M | if let Some((edit, remaining_edits)) = self.edits.split_first() { |
1463 | 5.26M | if edit.0 <= ProgPoint::before(next_inst) { |
1464 | 1.68M | self.edits = remaining_edits; |
1465 | 1.68M | return Some(InstOrEdit::Edit(&edit.1)); |
1466 | 3.57M | } |
1467 | 821k | } |
1468 | | |
1469 | 4.39M | self.inst_range = self.inst_range.rest(); |
1470 | 4.39M | Some(InstOrEdit::Inst(next_inst)) |
1471 | 6.55M | } |
1472 | | } |
1473 | | |
1474 | | /// A machine environment tells the register allocator which registers |
1475 | | /// are available to allocate and what register may be used as a |
1476 | | /// scratch register for each class, and some other miscellaneous info |
1477 | | /// as well. |
1478 | | #[derive(Clone, Debug)] |
1479 | | #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] |
1480 | | pub struct MachineEnv { |
1481 | | /// Preferred physical registers for each class. These are the |
1482 | | /// registers that will be allocated first, if free. |
1483 | | /// |
1484 | | /// If an explicit scratch register is provided in `scratch_by_class` then |
1485 | | /// it must not appear in this list. |
1486 | | pub preferred_regs_by_class: [Vec<PReg>; 3], |
1487 | | |
1488 | | /// Non-preferred physical registers for each class. These are the |
1489 | | /// registers that will be allocated if a preferred register is |
1490 | | /// not available; using one of these is considered suboptimal, |
1491 | | /// but still better than spilling. |
1492 | | /// |
1493 | | /// If an explicit scratch register is provided in `scratch_by_class` then |
1494 | | /// it must not appear in this list. |
1495 | | pub non_preferred_regs_by_class: [Vec<PReg>; 3], |
1496 | | |
1497 | | /// Optional dedicated scratch register per class. This is needed to perform |
1498 | | /// moves between registers when cyclic move patterns occur. The |
1499 | | /// register should not be placed in either the preferred or |
1500 | | /// non-preferred list (i.e., it is not otherwise allocatable). |
1501 | | /// |
1502 | | /// Note that the register allocator will freely use this register |
1503 | | /// between instructions, but *within* the machine code generated |
1504 | | /// by a single (regalloc-level) instruction, the client is free |
1505 | | /// to use the scratch register. E.g., if one "instruction" causes |
1506 | | /// the emission of two machine-code instructions, this lowering |
1507 | | /// can use the scratch register between them. |
1508 | | /// |
1509 | | /// If a scratch register is not provided then the register allocator will |
1510 | | /// automatically allocate one as needed, spilling a value to the stack if |
1511 | | /// necessary. |
1512 | | pub scratch_by_class: [Option<PReg>; 3], |
1513 | | |
1514 | | /// Some `PReg`s can be designated as locations on the stack rather than |
1515 | | /// actual registers. These can be used to tell the register allocator about |
1516 | | /// pre-defined stack slots used for function arguments and return values. |
1517 | | /// |
1518 | | /// `PReg`s in this list cannot be used as an allocatable or scratch |
1519 | | /// register. |
1520 | | pub fixed_stack_slots: Vec<PReg>, |
1521 | | } |
1522 | | |
1523 | | /// The output of the register allocator. |
1524 | | #[derive(Clone, Debug, Default)] |
1525 | | #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] |
1526 | | pub struct Output { |
1527 | | /// How many spillslots are needed in the frame? |
1528 | | pub num_spillslots: usize, |
1529 | | |
1530 | | /// Edits (insertions or removals). Guaranteed to be sorted by |
1531 | | /// program point. |
1532 | | pub edits: Vec<(ProgPoint, Edit)>, |
1533 | | |
1534 | | /// Allocations for each operand. Mapping from instruction to |
1535 | | /// allocations provided by `inst_alloc_offsets` below. |
1536 | | pub allocs: Vec<Allocation>, |
1537 | | |
1538 | | /// Allocation offset in `allocs` for each instruction. |
1539 | | pub inst_alloc_offsets: Vec<u32>, |
1540 | | |
1541 | | /// Debug info: a labeled value (as applied to vregs by |
1542 | | /// `Function::debug_value_labels()` on the input side) is located |
1543 | | /// in the given allocation from the first program point |
1544 | | /// (inclusive) to the second (exclusive). Guaranteed to be sorted |
1545 | | /// by label and program point, and the ranges are guaranteed to |
1546 | | /// be disjoint. |
1547 | | pub debug_locations: Vec<(u32, ProgPoint, ProgPoint, Allocation)>, |
1548 | | |
1549 | | /// Internal stats from the allocator. |
1550 | | pub stats: ion::Stats, |
1551 | | } |
1552 | | |
1553 | | impl Output { |
1554 | | /// Get the allocations assigned to a given instruction. |
1555 | 4.39M | pub fn inst_allocs(&self, inst: Inst) -> &[Allocation] { |
1556 | 4.39M | let start = self.inst_alloc_offsets[inst.index()] as usize; |
1557 | 4.39M | let end = if inst.index() + 1 == self.inst_alloc_offsets.len() { |
1558 | 10.8k | self.allocs.len() |
1559 | | } else { |
1560 | 4.38M | self.inst_alloc_offsets[inst.index() + 1] as usize |
1561 | | }; |
1562 | 4.39M | &self.allocs[start..end] |
1563 | 4.39M | } |
1564 | | |
1565 | | /// Returns an iterator over the instructions and edits in a block, in |
1566 | | /// order. |
1567 | 473k | pub fn block_insts_and_edits(&self, func: &impl Function, block: Block) -> OutputIter<'_> { |
1568 | 473k | let inst_range = func.block_insns(block); |
1569 | | |
1570 | 473k | let edit_idx = self |
1571 | 473k | .edits |
1572 | 3.47M | .binary_search_by(|&(pos, _)| { |
1573 | | // This predicate effectively searches for a point *just* before |
1574 | | // the first ProgPoint. This never returns Ordering::Equal, but |
1575 | | // binary_search_by returns the index of where it would have |
1576 | | // been inserted in Err. |
1577 | 3.47M | if pos < ProgPoint::before(inst_range.first()) { |
1578 | 1.95M | core::cmp::Ordering::Less |
1579 | | } else { |
1580 | 1.51M | core::cmp::Ordering::Greater |
1581 | | } |
1582 | 3.47M | }) |
1583 | 473k | .unwrap_err(); |
1584 | | |
1585 | 473k | let edits = &self.edits[edit_idx..]; |
1586 | 473k | OutputIter { inst_range, edits } |
1587 | 473k | } |
1588 | | } |
1589 | | |
1590 | | /// An error that prevents allocation. |
1591 | | #[derive(Clone, Debug)] |
1592 | | #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] |
1593 | | pub enum RegAllocError { |
1594 | | /// Critical edge is not split between given blocks. |
1595 | | CritEdge(Block, Block), |
1596 | | /// Invalid SSA for given vreg at given inst: multiple defs or |
1597 | | /// illegal use. `inst` may be `Inst::invalid()` if this concerns |
1598 | | /// a block param. |
1599 | | SSA(VReg, Inst), |
1600 | | /// Invalid basic block: does not end in branch/ret, or contains a |
1601 | | /// branch/ret in the middle, or the VReg ids do not start at zero |
1602 | | /// or aren't numbered sequentially. |
1603 | | BB(Block), |
1604 | | /// Invalid branch: operand count does not match sum of block |
1605 | | /// params of successor blocks, or the block ids do not start at |
1606 | | /// zero or aren't numbered sequentially. |
1607 | | Branch(Inst), |
1608 | | /// A VReg is live-in on entry; this is not allowed. |
1609 | | EntryLivein, |
1610 | | /// A branch has non-blockparam arg(s) and at least one of the |
1611 | | /// successor blocks has more than one predecessor, forcing |
1612 | | /// edge-moves before this branch. This is disallowed because it |
1613 | | /// places a use after the edge moves occur; insert an edge block |
1614 | | /// to avoid the situation. |
1615 | | DisallowedBranchArg(Inst), |
1616 | | /// Too many pinned VRegs + Reg-constrained Operands are live at |
1617 | | /// once, making allocation impossible. |
1618 | | TooManyLiveRegs, |
1619 | | /// Too many operands on a single instruction (beyond limit of |
1620 | | /// 2^16 - 1). |
1621 | | TooManyOperands, |
1622 | | } |
1623 | | |
1624 | | impl core::fmt::Display for RegAllocError { |
1625 | 0 | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { |
1626 | 0 | write!(f, "{:?}", self) |
1627 | 0 | } |
1628 | | } |
1629 | | |
1630 | | #[cfg(feature = "std")] |
1631 | | impl std::error::Error for RegAllocError {} |
1632 | | |
1633 | | /// Run the allocator. |
1634 | | pub fn run<F: Function>( |
1635 | | func: &F, |
1636 | | env: &MachineEnv, |
1637 | | options: &RegallocOptions, |
1638 | | ) -> Result<Output, RegAllocError> { |
1639 | | match options.algorithm { |
1640 | | Algorithm::Ion => { |
1641 | | let mut ctx = Ctx::default(); |
1642 | | run_with_ctx(func, env, options, &mut ctx)?; |
1643 | | Ok(ctx.output) |
1644 | | } |
1645 | | Algorithm::Fastalloc => { |
1646 | | fastalloc::run(func, env, options.verbose_log, options.validate_ssa) |
1647 | | } |
1648 | | } |
1649 | | } |
1650 | | |
1651 | | /// Run the allocator with reusable context. |
1652 | | /// |
1653 | | /// Return value points to `ctx.output` that can be alternatively `std::mem::take`n. |
1654 | | pub fn run_with_ctx<'a, F: Function>( |
1655 | | func: &F, |
1656 | | env: &MachineEnv, |
1657 | | options: &RegallocOptions, |
1658 | | ctx: &'a mut Ctx, |
1659 | | ) -> Result<&'a Output, RegAllocError> { |
1660 | | match options.algorithm { |
1661 | | Algorithm::Ion => ion::run(func, env, ctx, options.verbose_log, options.validate_ssa)?, |
1662 | | Algorithm::Fastalloc => { |
1663 | | ctx.output = fastalloc::run(func, env, options.verbose_log, options.validate_ssa)? |
1664 | | } |
1665 | | } |
1666 | | Ok(&ctx.output) |
1667 | | } |
1668 | | |
1669 | | #[derive(Clone, Copy, Debug, Default)] |
1670 | | pub enum Algorithm { |
1671 | | #[default] |
1672 | | Ion, |
1673 | | Fastalloc, |
1674 | | } |
1675 | | |
1676 | | /// Options for allocation. |
1677 | | #[derive(Clone, Copy, Debug, Default)] |
1678 | | pub struct RegallocOptions { |
1679 | | /// Add extra verbosity to debug logs. |
1680 | | pub verbose_log: bool, |
1681 | | |
1682 | | /// Run the SSA validator before allocating registers. |
1683 | | pub validate_ssa: bool, |
1684 | | |
1685 | | /// The register allocation algorithm to be used. |
1686 | | pub algorithm: Algorithm, |
1687 | | } |
1688 | | |
1689 | | pub(crate) trait VecExt<T> { |
1690 | | /// Fills `self` with `value` up to `len` and return the mutable slice to the values. |
1691 | | fn repopulate(&mut self, len: usize, value: T) -> &mut [T] |
1692 | | where |
1693 | | T: Clone; |
1694 | | /// Clears the `self` and returns a mutable reference to it. |
1695 | | fn cleared(&mut self) -> &mut Self; |
1696 | | /// Makes sure `self` is empty and has at least `cap` capacity. |
1697 | | fn preallocate(&mut self, cap: usize) -> &mut Self; |
1698 | | } |
1699 | | |
1700 | | impl<T> VecExt<T> for Vec<T> { |
1701 | 118k | fn repopulate(&mut self, len: usize, value: T) -> &mut [T] |
1702 | 118k | where |
1703 | 118k | T: Clone, |
1704 | | { |
1705 | 118k | self.clear(); |
1706 | 118k | self.resize(len, value); |
1707 | 118k | self |
1708 | 118k | } <alloc::vec::Vec<core::option::Option<u32>> as regalloc2::VecExt<core::option::Option<u32>>>::repopulate Line | Count | Source | 1701 | 21.6k | fn repopulate(&mut self, len: usize, value: T) -> &mut [T] | 1702 | 21.6k | where | 1703 | 21.6k | T: Clone, | 1704 | | { | 1705 | 21.6k | self.clear(); | 1706 | 21.6k | self.resize(len, value); | 1707 | 21.6k | self | 1708 | 21.6k | } |
<alloc::vec::Vec<regalloc2::ProgPoint> as regalloc2::VecExt<regalloc2::ProgPoint>>::repopulate Line | Count | Source | 1701 | 21.6k | fn repopulate(&mut self, len: usize, value: T) -> &mut [T] | 1702 | 21.6k | where | 1703 | 21.6k | T: Clone, | 1704 | | { | 1705 | 21.6k | self.clear(); | 1706 | 21.6k | self.resize(len, value); | 1707 | 21.6k | self | 1708 | 21.6k | } |
<alloc::vec::Vec<regalloc2::index::Block> as regalloc2::VecExt<regalloc2::index::Block>>::repopulate Line | Count | Source | 1701 | 32.4k | fn repopulate(&mut self, len: usize, value: T) -> &mut [T] | 1702 | 32.4k | where | 1703 | 32.4k | T: Clone, | 1704 | | { | 1705 | 32.4k | self.clear(); | 1706 | 32.4k | self.resize(len, value); | 1707 | 32.4k | self | 1708 | 32.4k | } |
<alloc::vec::Vec<regalloc2::ion::data_structures::LiveRangeIndex> as regalloc2::VecExt<regalloc2::ion::data_structures::LiveRangeIndex>>::repopulate Line | Count | Source | 1701 | 10.8k | fn repopulate(&mut self, len: usize, value: T) -> &mut [T] | 1702 | 10.8k | where | 1703 | 10.8k | T: Clone, | 1704 | | { | 1705 | 10.8k | self.clear(); | 1706 | 10.8k | self.resize(len, value); | 1707 | 10.8k | self | 1708 | 10.8k | } |
<alloc::vec::Vec<bool> as regalloc2::VecExt<bool>>::repopulate Line | Count | Source | 1701 | 21.6k | fn repopulate(&mut self, len: usize, value: T) -> &mut [T] | 1702 | 21.6k | where | 1703 | 21.6k | T: Clone, | 1704 | | { | 1705 | 21.6k | self.clear(); | 1706 | 21.6k | self.resize(len, value); | 1707 | 21.6k | self | 1708 | 21.6k | } |
<alloc::vec::Vec<u32> as regalloc2::VecExt<u32>>::repopulate Line | Count | Source | 1701 | 10.8k | fn repopulate(&mut self, len: usize, value: T) -> &mut [T] | 1702 | 10.8k | where | 1703 | 10.8k | T: Clone, | 1704 | | { | 1705 | 10.8k | self.clear(); | 1706 | 10.8k | self.resize(len, value); | 1707 | 10.8k | self | 1708 | 10.8k | } |
|
1709 | | |
1710 | 10.8k | fn cleared(&mut self) -> &mut Self { |
1711 | 10.8k | self.clear(); |
1712 | 10.8k | self |
1713 | 10.8k | } |
1714 | | |
1715 | 75.6k | fn preallocate(&mut self, cap: usize) -> &mut Self { |
1716 | 75.6k | self.clear(); |
1717 | 75.6k | self.reserve(cap); |
1718 | 75.6k | self |
1719 | 75.6k | } <alloc::vec::Vec<regalloc2::Allocation> as regalloc2::VecExt<regalloc2::Allocation>>::preallocate Line | Count | Source | 1715 | 10.8k | fn preallocate(&mut self, cap: usize) -> &mut Self { | 1716 | 10.8k | self.clear(); | 1717 | 10.8k | self.reserve(cap); | 1718 | 10.8k | self | 1719 | 10.8k | } |
<alloc::vec::Vec<regalloc2::indexset::IndexSet> as regalloc2::VecExt<regalloc2::indexset::IndexSet>>::preallocate Line | Count | Source | 1715 | 21.6k | fn preallocate(&mut self, cap: usize) -> &mut Self { | 1716 | 21.6k | self.clear(); | 1717 | 21.6k | self.reserve(cap); | 1718 | 21.6k | self | 1719 | 21.6k | } |
<alloc::vec::Vec<regalloc2::ion::data_structures::LiveBundle> as regalloc2::VecExt<regalloc2::ion::data_structures::LiveBundle>>::preallocate Line | Count | Source | 1715 | 10.8k | fn preallocate(&mut self, cap: usize) -> &mut Self { | 1716 | 10.8k | self.clear(); | 1717 | 10.8k | self.reserve(cap); | 1718 | 10.8k | self | 1719 | 10.8k | } |
<alloc::vec::Vec<regalloc2::ion::data_structures::SpillSet> as regalloc2::VecExt<regalloc2::ion::data_structures::SpillSet>>::preallocate Line | Count | Source | 1715 | 10.8k | fn preallocate(&mut self, cap: usize) -> &mut Self { | 1716 | 10.8k | self.clear(); | 1717 | 10.8k | self.reserve(cap); | 1718 | 10.8k | self | 1719 | 10.8k | } |
<alloc::vec::Vec<regalloc2::ion::data_structures::VRegData> as regalloc2::VecExt<regalloc2::ion::data_structures::VRegData>>::preallocate Line | Count | Source | 1715 | 10.8k | fn preallocate(&mut self, cap: usize) -> &mut Self { | 1716 | 10.8k | self.clear(); | 1717 | 10.8k | self.reserve(cap); | 1718 | 10.8k | self | 1719 | 10.8k | } |
<alloc::vec::Vec<regalloc2::ion::data_structures::LiveRange> as regalloc2::VecExt<regalloc2::ion::data_structures::LiveRange>>::preallocate Line | Count | Source | 1715 | 10.8k | fn preallocate(&mut self, cap: usize) -> &mut Self { | 1716 | 10.8k | self.clear(); | 1717 | 10.8k | self.reserve(cap); | 1718 | 10.8k | self | 1719 | 10.8k | } |
|
1720 | | } |
1721 | | |
1722 | | #[derive(Debug, Clone, Default)] |
1723 | | pub(crate) struct Bump(Rc<bumpalo::Bump>); |
1724 | | |
1725 | | impl Bump { |
1726 | 10.8k | pub(crate) fn get_mut(&mut self) -> Option<&mut bumpalo::Bump> { |
1727 | 10.8k | Rc::get_mut(&mut self.0) |
1728 | 10.8k | } |
1729 | | } |
1730 | | |
1731 | | // Simply delegating because `Rc<bumpalo::Bump>` does not implement `Allocator`. |
1732 | | unsafe impl allocator_api2::alloc::Allocator for Bump { |
1733 | 25.6M | fn allocate( |
1734 | 25.6M | &self, |
1735 | 25.6M | layout: core::alloc::Layout, |
1736 | 25.6M | ) -> Result<core::ptr::NonNull<[u8]>, allocator_api2::alloc::AllocError> { |
1737 | 25.6M | self.0.deref().allocate(layout) |
1738 | 25.6M | } |
1739 | | |
1740 | 25.6M | unsafe fn deallocate(&self, ptr: core::ptr::NonNull<u8>, layout: core::alloc::Layout) { |
1741 | 25.6M | self.0.deref().deallocate(ptr, layout); |
1742 | 25.6M | } |
1743 | | |
1744 | 0 | fn allocate_zeroed( |
1745 | 0 | &self, |
1746 | 0 | layout: core::alloc::Layout, |
1747 | 0 | ) -> Result<core::ptr::NonNull<[u8]>, allocator_api2::alloc::AllocError> { |
1748 | 0 | self.0.deref().allocate_zeroed(layout) |
1749 | 0 | } |
1750 | | |
1751 | 2.14M | unsafe fn grow( |
1752 | 2.14M | &self, |
1753 | 2.14M | ptr: core::ptr::NonNull<u8>, |
1754 | 2.14M | old_layout: core::alloc::Layout, |
1755 | 2.14M | new_layout: core::alloc::Layout, |
1756 | 2.14M | ) -> Result<core::ptr::NonNull<[u8]>, allocator_api2::alloc::AllocError> { |
1757 | 2.14M | self.0.deref().grow(ptr, old_layout, new_layout) |
1758 | 2.14M | } |
1759 | | |
1760 | 0 | unsafe fn grow_zeroed( |
1761 | 0 | &self, |
1762 | 0 | ptr: core::ptr::NonNull<u8>, |
1763 | 0 | old_layout: core::alloc::Layout, |
1764 | 0 | new_layout: core::alloc::Layout, |
1765 | 0 | ) -> Result<core::ptr::NonNull<[u8]>, allocator_api2::alloc::AllocError> { |
1766 | 0 | self.0.deref().grow_zeroed(ptr, old_layout, new_layout) |
1767 | 0 | } |
1768 | | |
1769 | 661k | unsafe fn shrink( |
1770 | 661k | &self, |
1771 | 661k | ptr: core::ptr::NonNull<u8>, |
1772 | 661k | old_layout: core::alloc::Layout, |
1773 | 661k | new_layout: core::alloc::Layout, |
1774 | 661k | ) -> Result<core::ptr::NonNull<[u8]>, allocator_api2::alloc::AllocError> { |
1775 | 661k | self.0.deref().shrink(ptr, old_layout, new_layout) |
1776 | 661k | } |
1777 | | } |