/src/regalloc2/src/ion/liveranges.rs
Line | Count | Source |
1 | | /* |
2 | | * This file was initially derived from the files |
3 | | * `js/src/jit/BacktrackingAllocator.h` and |
4 | | * `js/src/jit/BacktrackingAllocator.cpp` in Mozilla Firefox, and was |
5 | | * originally licensed under the Mozilla Public License 2.0. We |
6 | | * subsequently relicensed it to Apache-2.0 WITH LLVM-exception (see |
7 | | * https://github.com/bytecodealliance/regalloc2/issues/7). |
8 | | * |
9 | | * Since the initial port, the design has been substantially evolved |
10 | | * and optimized. |
11 | | */ |
12 | | |
13 | | //! Live-range computation. |
14 | | |
15 | | use super::{ |
16 | | CodeRange, Env, LiveRangeFlag, LiveRangeIndex, LiveRangeKey, LiveRangeList, LiveRangeListEntry, |
17 | | LiveRangeSet, PRegData, PRegIndex, RegClass, Use, VRegData, VRegIndex, |
18 | | }; |
19 | | use crate::indexset::IndexSet; |
20 | | use crate::ion::data_structures::{ |
21 | | BlockparamIn, BlockparamOut, FixedRegFixupLevel, MultiFixedRegFixup, |
22 | | }; |
23 | | use crate::{ |
24 | | Allocation, Block, Function, Inst, InstPosition, Operand, OperandConstraint, OperandKind, |
25 | | OperandPos, PReg, ProgPoint, RegAllocError, VReg, VecExt, |
26 | | }; |
27 | | use core::convert::TryFrom; |
28 | | use core::usize; |
29 | | use smallvec::{smallvec, SmallVec}; |
30 | | |
31 | | /// A spill weight computed for a certain Use. |
32 | | #[derive(Clone, Copy, Debug)] |
33 | | pub struct SpillWeight(f32); |
34 | | |
35 | | #[inline(always)] |
36 | 27.3M | pub fn spill_weight_from_constraint( |
37 | 27.3M | constraint: OperandConstraint, |
38 | 27.3M | loop_depth: usize, |
39 | 27.3M | is_def: bool, |
40 | 27.3M | ) -> SpillWeight { |
41 | | // A bonus of 1000 for one loop level, 4000 for two loop levels, |
42 | | // 16000 for three loop levels, etc. Avoids exponentiation. |
43 | 27.3M | let loop_depth = core::cmp::min(10, loop_depth); |
44 | 80.5M | let hot_bonus: f32 = (0..loop_depth).fold(1000.0, |a, _| a * 4.0); |
45 | 27.3M | let def_bonus: f32 = if is_def { 2000.0 } else { 0.0 }; |
46 | 27.3M | let constraint_bonus: f32 = match constraint { |
47 | 9.52M | OperandConstraint::Any => 1000.0, |
48 | 17.2M | OperandConstraint::Reg | OperandConstraint::FixedReg(_) => 2000.0, |
49 | 537k | _ => 0.0, |
50 | | }; |
51 | 27.3M | SpillWeight(hot_bonus + def_bonus + constraint_bonus) |
52 | 27.3M | } |
53 | | |
54 | | impl SpillWeight { |
55 | | /// Convert a floating-point weight to a u16 that can be compactly |
56 | | /// stored in a `Use`. We simply take the top 16 bits of the f32; this |
57 | | /// is equivalent to the bfloat16 format |
58 | | /// (https://en.wikipedia.org/wiki/Bfloat16_floating-point_format). |
59 | 15.0M | pub fn to_bits(self) -> u16 { |
60 | 15.0M | (self.0.to_bits() >> 15) as u16 |
61 | 15.0M | } |
62 | | |
63 | | /// Convert a value that was returned from |
64 | | /// `SpillWeight::to_bits()` back into a `SpillWeight`. Note that |
65 | | /// some precision may be lost when round-tripping from a spill |
66 | | /// weight to packed bits and back. |
67 | 3.11M | pub fn from_bits(bits: u16) -> SpillWeight { |
68 | 3.11M | let x = f32::from_bits((bits as u32) << 15); |
69 | 3.11M | SpillWeight(x) |
70 | 3.11M | } |
71 | | |
72 | | /// Get a zero spill weight. |
73 | 2.69M | pub fn zero() -> SpillWeight { |
74 | 2.69M | SpillWeight(0.0) |
75 | 2.69M | } |
76 | | |
77 | | /// Convert to a raw floating-point value. |
78 | 17.7M | pub fn to_f32(self) -> f32 { |
79 | 17.7M | self.0 |
80 | 17.7M | } |
81 | | |
82 | | /// Create a `SpillWeight` from a raw floating-point value. |
83 | 21.2M | pub fn from_f32(x: f32) -> SpillWeight { |
84 | 21.2M | SpillWeight(x) |
85 | 21.2M | } |
86 | | |
87 | 12.2M | pub fn to_int(self) -> u32 { |
88 | 12.2M | self.0 as u32 |
89 | 12.2M | } |
90 | | } |
91 | | |
92 | | impl core::ops::Add<SpillWeight> for SpillWeight { |
93 | | type Output = SpillWeight; |
94 | 24.3M | fn add(self, other: SpillWeight) -> Self { |
95 | 24.3M | SpillWeight(self.0 + other.0) |
96 | 24.3M | } |
97 | | } |
98 | | |
99 | 15.1M | fn slot_idx(i: usize) -> Result<u16, RegAllocError> { |
100 | 15.1M | u16::try_from(i).map_err(|_| RegAllocError::TooManyOperands) |
101 | 15.1M | } |
102 | | |
103 | | impl<'a, F: Function> Env<'a, F> { |
104 | 10.5k | pub fn create_pregs_and_vregs(&mut self) { |
105 | | // Create PRegs from the env. |
106 | 10.5k | self.pregs.resize( |
107 | | PReg::NUM_INDEX, |
108 | 10.5k | PRegData { |
109 | 10.5k | allocations: LiveRangeSet::new(), |
110 | 10.5k | is_stack: false, |
111 | 10.5k | }, |
112 | | ); |
113 | 991k | for &preg in &self.env.fixed_stack_slots { |
114 | 981k | self.pregs[preg.index()].is_stack = true; |
115 | 981k | } |
116 | 31.6k | for class in 0..self.preferred_victim_by_class.len() { |
117 | 31.6k | self.preferred_victim_by_class[class] = self.env.non_preferred_regs_by_class[class] |
118 | 31.6k | .last() |
119 | 31.6k | .or(self.env.preferred_regs_by_class[class].last()) |
120 | 31.6k | .cloned() |
121 | 31.6k | .unwrap_or(PReg::invalid()); |
122 | 31.6k | } |
123 | | // Create VRegs from the vreg count. |
124 | 7.46M | for idx in 0..self.func.num_vregs() { |
125 | 7.46M | // We'll fill in the real details when we see the def. |
126 | 7.46M | self.ctx.vregs.add( |
127 | 7.46M | VReg::new(idx, RegClass::Int), |
128 | 7.46M | VRegData { |
129 | 7.46M | ranges: LiveRangeList::new_in(self.ctx.bump()), |
130 | 7.46M | blockparam: Block::invalid(), |
131 | 7.46M | // We'll learn the RegClass as we scan the code. |
132 | 7.46M | class: None, |
133 | 7.46M | }, |
134 | 7.46M | ); |
135 | 7.46M | } |
136 | | // Create allocations too. |
137 | 4.27M | for inst in 0..self.func.num_insts() { |
138 | 4.27M | let start = self.output.allocs.len() as u32; |
139 | 4.27M | self.output.inst_alloc_offsets.push(start); |
140 | 15.1M | for _ in 0..self.func.inst_operands(Inst::new(inst)).len() { |
141 | 15.1M | self.output.allocs.push(Allocation::none()); |
142 | 15.1M | } |
143 | | } |
144 | 10.5k | } |
145 | | |
146 | | /// Mark `range` as live for the given `vreg`. |
147 | | /// |
148 | | /// Returns the liverange that contains the given range. |
149 | 19.7M | pub fn add_liverange_to_vreg( |
150 | 19.7M | &mut self, |
151 | 19.7M | vreg: VRegIndex, |
152 | 19.7M | mut range: CodeRange, |
153 | 19.7M | ) -> LiveRangeIndex { |
154 | 19.7M | trace!("add_liverange_to_vreg: vreg {:?} range {:?}", vreg, range); |
155 | | |
156 | | // Invariant: as we are building liveness information, we |
157 | | // *always* process instructions bottom-to-top, and as a |
158 | | // consequence, new liveranges are always created before any |
159 | | // existing liveranges for a given vreg. We assert this here, |
160 | | // then use it to avoid an O(n) merge step (which would lead |
161 | | // to O(n^2) liveness construction cost overall). |
162 | | // |
163 | | // We store liveranges in reverse order in the `.ranges` |
164 | | // array, then reverse them at the end of |
165 | | // `compute_liveness()`. |
166 | | |
167 | 19.7M | if !self.vregs[vreg].ranges.is_empty() { |
168 | 12.2M | let last_range_index = self.vregs[vreg].ranges.last().unwrap().index; |
169 | 12.2M | let last_range = self.ranges[last_range_index].range; |
170 | 12.2M | if self.func.allow_multiple_vreg_defs() { |
171 | 0 | if last_range.contains(&range) { |
172 | | // Special case (may occur when multiple defs of pinned |
173 | | // physical regs occur): if this new range overlaps the |
174 | | // existing range, return it. |
175 | 0 | return last_range_index; |
176 | 0 | } |
177 | | // If this range's end falls in the middle of the last |
178 | | // range, truncate it to be contiguous so we can merge |
179 | | // below. |
180 | 0 | if range.to >= last_range.from && range.to <= last_range.to { |
181 | 0 | range.to = last_range.from; |
182 | 0 | } |
183 | 12.2M | } |
184 | 12.2M | debug_assert!( |
185 | 0 | range.to <= last_range.from, |
186 | 0 | "range {:?}, last_range {:?}", |
187 | | range, |
188 | | last_range |
189 | | ); |
190 | 7.46M | } |
191 | | |
192 | 19.7M | if self.vregs[vreg].ranges.is_empty() |
193 | 12.2M | || range.to |
194 | 12.2M | < self.ranges[self.vregs[vreg].ranges.last().unwrap().index] |
195 | 12.2M | .range |
196 | 12.2M | .from |
197 | | { |
198 | | // Is not contiguous with previously-added (immediately |
199 | | // following) range; create a new range. |
200 | 10.1M | let lr = self.ctx.ranges.add(range, self.ctx.bump()); |
201 | 10.1M | self.ranges[lr].vreg = vreg; |
202 | 10.1M | self.vregs[vreg] |
203 | 10.1M | .ranges |
204 | 10.1M | .push(LiveRangeListEntry { range, index: lr }); |
205 | 10.1M | lr |
206 | | } else { |
207 | | // Is contiguous with previously-added range; just extend |
208 | | // its range and return it. |
209 | 9.58M | let lr = self.vregs[vreg].ranges.last().unwrap().index; |
210 | 9.58M | debug_assert!(range.to == self.ranges[lr].range.from); |
211 | 9.58M | self.ranges[lr].range.from = range.from; |
212 | 9.58M | lr |
213 | | } |
214 | 19.7M | } |
215 | | |
216 | 15.0M | pub fn insert_use_into_liverange(&mut self, into: LiveRangeIndex, mut u: Use) { |
217 | 15.0M | let operand = u.operand; |
218 | 15.0M | let constraint = operand.constraint(); |
219 | 15.0M | let block = self.cfginfo.insn_block[u.pos.inst().index()]; |
220 | 15.0M | let loop_depth = self.cfginfo.approx_loop_depth[block.index()] as usize; |
221 | 15.0M | let weight = spill_weight_from_constraint( |
222 | 15.0M | constraint, |
223 | 15.0M | loop_depth, |
224 | 15.0M | operand.kind() != OperandKind::Use, |
225 | | ); |
226 | 15.0M | u.weight = weight.to_bits(); |
227 | | |
228 | 15.0M | trace!( |
229 | 0 | "insert use {:?} into lr {:?} with weight {:?}", |
230 | | u, |
231 | | into, |
232 | | weight, |
233 | | ); |
234 | | |
235 | | // N.B.: we do *not* update `requirement` on the range, |
236 | | // because those will be computed during the multi-fixed-reg |
237 | | // fixup pass later (after all uses are inserted). |
238 | | |
239 | 15.0M | self.ranges[into].uses.push(u); |
240 | | |
241 | | // Update stats. |
242 | 15.0M | let range_weight = self.ranges[into].uses_spill_weight() + weight; |
243 | 15.0M | self.ranges[into].set_uses_spill_weight(range_weight); |
244 | 15.0M | trace!( |
245 | 0 | " -> now range has weight {:?}", |
246 | 0 | self.ranges[into].uses_spill_weight(), |
247 | | ); |
248 | 15.0M | } |
249 | | |
250 | | pub fn find_vreg_liverange_for_pos( |
251 | | &self, |
252 | | vreg: VRegIndex, |
253 | | pos: ProgPoint, |
254 | | ) -> Option<LiveRangeIndex> { |
255 | | for entry in &self.vregs[vreg].ranges { |
256 | | if entry.range.contains_point(pos) { |
257 | | return Some(entry.index); |
258 | | } |
259 | | } |
260 | | None |
261 | | } |
262 | | |
263 | 1.00M | pub fn add_liverange_to_preg(&mut self, range: CodeRange, reg: PReg) { |
264 | 1.00M | trace!("adding liverange to preg: {:?} to {}", range, reg); |
265 | 1.00M | let preg_idx = PRegIndex::new(reg.index()); |
266 | 1.00M | let res = self.pregs[preg_idx.index()] |
267 | 1.00M | .allocations |
268 | 1.00M | .btree |
269 | 1.00M | .insert(LiveRangeKey::from_range(&range), LiveRangeIndex::invalid()); |
270 | 1.00M | debug_assert!(res.is_none()); |
271 | 1.00M | } |
272 | | |
273 | 12.8M | pub fn is_live_in(&mut self, block: Block, vreg: VRegIndex) -> bool { |
274 | 12.8M | self.liveins[block.index()].get(vreg.index()) |
275 | 12.8M | } |
276 | | |
277 | 10.5k | pub fn compute_liveness(&mut self) -> Result<(), RegAllocError> { |
278 | | // Create initial LiveIn and LiveOut bitsets. |
279 | 462k | for _ in 0..self.func.num_blocks() { |
280 | 462k | self.liveins.push(IndexSet::new()); |
281 | 462k | self.liveouts.push(IndexSet::new()); |
282 | 462k | } |
283 | | |
284 | | // Run a worklist algorithm to precisely compute liveins and |
285 | | // liveouts. |
286 | 10.5k | let mut workqueue = core::mem::take(&mut self.ctx.scratch_workqueue); |
287 | 10.5k | let mut workqueue_set = core::mem::take(&mut self.ctx.scratch_workqueue_set); |
288 | 10.5k | workqueue_set.clear(); |
289 | | // Initialize workqueue with postorder traversal. |
290 | 462k | for &block in &self.cfginfo.postorder[..] { |
291 | 462k | workqueue.push_back(block); |
292 | 462k | workqueue_set.insert(block); |
293 | 462k | } |
294 | | |
295 | 1.22M | while let Some(block) = workqueue.pop_front() { |
296 | 1.21M | workqueue_set.remove(&block); |
297 | 1.21M | let insns = self.func.block_insns(block); |
298 | | |
299 | 1.21M | trace!("computing liveins for block{}", block.index()); |
300 | | |
301 | 1.21M | self.output.stats.livein_iterations += 1; |
302 | | |
303 | 1.21M | let mut live = self.liveouts[block.index()].clone(); |
304 | 1.21M | trace!(" -> initial liveout set: {:?}", live); |
305 | | |
306 | | // Include outgoing blockparams in the initial live set. |
307 | 1.21M | if self.func.is_branch(insns.last()) { |
308 | 1.55M | for i in 0..self.func.block_succs(block).len() { |
309 | 1.55M | for ¶m in self.func.branch_blockparams(block, insns.last(), i) { |
310 | 479k | live.set(param.vreg(), true); |
311 | 479k | self.observe_vreg_class(param); |
312 | 479k | } |
313 | | } |
314 | 10.5k | } |
315 | | |
316 | 12.1M | for inst in insns.iter().rev() { |
317 | 36.3M | for pos in &[OperandPos::Late, OperandPos::Early] { |
318 | 96.8M | for op in self.func.inst_operands(inst) { |
319 | 96.8M | if op.as_fixed_nonallocatable().is_some() { |
320 | 626k | continue; |
321 | 96.2M | } |
322 | 96.2M | if op.pos() == *pos { |
323 | 48.1M | let was_live = live.get(op.vreg().vreg()); |
324 | 48.1M | trace!("op {:?} was_live = {}", op, was_live); |
325 | 48.1M | match op.kind() { |
326 | 26.6M | OperandKind::Use => { |
327 | 26.6M | live.set(op.vreg().vreg(), true); |
328 | 26.6M | } |
329 | 21.4M | OperandKind::Def => { |
330 | 21.4M | live.set(op.vreg().vreg(), false); |
331 | 21.4M | } |
332 | | } |
333 | 48.1M | self.observe_vreg_class(op.vreg()); |
334 | 48.1M | } |
335 | | } |
336 | | } |
337 | | } |
338 | 1.21M | for &blockparam in self.func.block_params(block) { |
339 | 375k | live.set(blockparam.vreg(), false); |
340 | 375k | self.observe_vreg_class(blockparam); |
341 | 375k | } |
342 | | |
343 | 1.40M | for &pred in self.func.block_preds(block) { |
344 | 1.40M | if self.ctx.liveouts[pred.index()].union_with(&live) { |
345 | 1.00M | if !workqueue_set.contains(&pred) { |
346 | 747k | workqueue_set.insert(pred); |
347 | 747k | workqueue.push_back(pred); |
348 | 747k | } |
349 | 405k | } |
350 | | } |
351 | | |
352 | 1.21M | trace!("computed liveins at block{}: {:?}", block.index(), live); |
353 | 1.21M | self.liveins[block.index()] = live; |
354 | | } |
355 | | |
356 | | // Check that there are no liveins to the entry block. |
357 | 10.5k | if !self.liveins[self.func.entry_block().index()].is_empty() { |
358 | 0 | trace!( |
359 | 0 | "non-empty liveins to entry block: {:?}", |
360 | 0 | self.liveins[self.func.entry_block().index()] |
361 | | ); |
362 | 0 | return Err(RegAllocError::EntryLivein); |
363 | 10.5k | } |
364 | | |
365 | 10.5k | self.ctx.scratch_workqueue = workqueue; |
366 | 10.5k | self.ctx.scratch_workqueue_set = workqueue_set; |
367 | | |
368 | 10.5k | Ok(()) |
369 | 10.5k | } |
370 | | |
371 | 10.5k | pub fn build_liveranges(&mut self) -> Result<(), RegAllocError> { |
372 | | // Create Uses and Defs referring to VRegs, and place the Uses |
373 | | // in LiveRanges. |
374 | | // |
375 | | // We already computed precise liveouts and liveins for every |
376 | | // block above, so we don't need to run an iterative algorithm |
377 | | // here; instead, every block's computation is purely local, |
378 | | // from end to start. |
379 | | |
380 | | // Track current LiveRange for each vreg. |
381 | | // |
382 | | // Invariant: a stale range may be present here; ranges are |
383 | | // only valid if `live.get(vreg)` is true. |
384 | 10.5k | let mut vreg_ranges = core::mem::take(&mut self.ctx.scratch_vreg_ranges); |
385 | 10.5k | vreg_ranges.repopulate(self.func.num_vregs(), LiveRangeIndex::invalid()); |
386 | 10.5k | let mut operand_rewrites = core::mem::take(&mut self.ctx.scratch_operand_rewrites); |
387 | | |
388 | 462k | for i in (0..self.func.num_blocks()).rev() { |
389 | 462k | let block = Block::new(i); |
390 | 462k | let insns = self.func.block_insns(block); |
391 | | |
392 | 462k | self.output.stats.livein_blocks += 1; |
393 | | |
394 | | // Init our local live-in set. |
395 | 462k | let mut live = self.liveouts[block.index()].clone(); |
396 | | |
397 | | // If the last instruction is a branch (rather than |
398 | | // return), create blockparam_out entries. |
399 | 462k | if self.func.is_branch(insns.last()) { |
400 | 569k | for (i, &succ) in self.func.block_succs(block).iter().enumerate() { |
401 | 569k | let blockparams_in = self.func.block_params(succ); |
402 | 569k | let blockparams_out = self.func.branch_blockparams(block, insns.last(), i); |
403 | 218k | for (&blockparam_in, &blockparam_out) in |
404 | 569k | blockparams_in.iter().zip(blockparams_out) |
405 | 218k | { |
406 | 218k | let blockparam_out = VRegIndex::new(blockparam_out.vreg()); |
407 | 218k | let blockparam_in = VRegIndex::new(blockparam_in.vreg()); |
408 | 218k | self.blockparam_outs.push(BlockparamOut { |
409 | 218k | to_vreg: blockparam_in, |
410 | 218k | to_block: succ, |
411 | 218k | from_block: block, |
412 | 218k | from_vreg: blockparam_out, |
413 | 218k | }); |
414 | 218k | |
415 | 218k | // Include outgoing blockparams in the initial live set. |
416 | 218k | live.set(blockparam_out.index(), true); |
417 | 218k | } |
418 | | } |
419 | 10.5k | } |
420 | | |
421 | | // Initially, registers are assumed live for the whole block. |
422 | 12.4M | for vreg in live.iter() { |
423 | 12.4M | let range = CodeRange { |
424 | 12.4M | from: self.cfginfo.block_entry[block.index()], |
425 | 12.4M | to: self.cfginfo.block_exit[block.index()].next(), |
426 | 12.4M | }; |
427 | 12.4M | trace!( |
428 | 0 | "vreg {:?} live at end of block --> create range {:?}", |
429 | 0 | VRegIndex::new(vreg), |
430 | | range |
431 | | ); |
432 | 12.4M | let lr = self.add_liverange_to_vreg(VRegIndex::new(vreg), range); |
433 | 12.4M | vreg_ranges[vreg] = lr; |
434 | | } |
435 | | |
436 | | // Create vreg data for blockparams. |
437 | 462k | for ¶m in self.func.block_params(block) { |
438 | 170k | self.vregs[param].blockparam = block; |
439 | 170k | } |
440 | | |
441 | | // For each instruction, in reverse order, process |
442 | | // operands and clobbers. |
443 | 4.27M | for inst in insns.iter().rev() { |
444 | | // Mark clobbers with CodeRanges on PRegs. |
445 | 4.27M | for clobber in self.func.inst_clobbers(inst) { |
446 | 928k | // Clobber range is at After point only: an |
447 | 928k | // instruction can still take an input in a reg |
448 | 928k | // that it later clobbers. (In other words, the |
449 | 928k | // clobber is like a normal def that never gets |
450 | 928k | // used.) |
451 | 928k | let range = CodeRange { |
452 | 928k | from: ProgPoint::after(inst), |
453 | 928k | to: ProgPoint::before(inst.next()), |
454 | 928k | }; |
455 | 928k | self.add_liverange_to_preg(range, clobber); |
456 | 928k | } |
457 | | |
458 | | // Does the instruction have any input-reusing |
459 | | // outputs? This is important below to establish |
460 | | // proper interference wrt other inputs. We note the |
461 | | // *vreg* that is reused, not the index. |
462 | 4.27M | let mut reused_input = None; |
463 | 12.6M | for op in self.func.inst_operands(inst) { |
464 | 12.6M | if let OperandConstraint::Reuse(i) = op.constraint() { |
465 | 537k | debug_assert!(self.func.inst_operands(inst)[i] |
466 | 0 | .as_fixed_nonallocatable() |
467 | 0 | .is_none()); |
468 | 537k | reused_input = Some(self.func.inst_operands(inst)[i].vreg()); |
469 | 537k | break; |
470 | 12.1M | } |
471 | | } |
472 | | |
473 | | // Preprocess defs and uses. Specifically, if there |
474 | | // are any fixed-reg-constrained defs at Late position |
475 | | // and fixed-reg-constrained uses at Early position |
476 | | // with the same preg, we need to (i) add a fixup move |
477 | | // for the use, (ii) rewrite the use to have an Any |
478 | | // constraint, and (ii) move the def to Early position |
479 | | // to reserve the register for the whole instruction. |
480 | | // |
481 | | // We don't touch any fixed-early-def or fixed-late-use |
482 | | // constraints: the only situation where the same physical |
483 | | // register can be used multiple times in the same |
484 | | // instruction is with an early-use and a late-def. Anything |
485 | | // else is a user error. |
486 | 4.27M | operand_rewrites.clear(); |
487 | 4.27M | let mut late_def_fixed: SmallVec<[PReg; 8]> = smallvec![]; |
488 | 15.1M | for &operand in self.func.inst_operands(inst) { |
489 | 15.1M | if let OperandConstraint::FixedReg(preg) = operand.constraint() { |
490 | 539k | match (operand.pos(), operand.kind()) { |
491 | 43.6k | (OperandPos::Late, OperandKind::Def) => { |
492 | 43.6k | late_def_fixed.push(preg); |
493 | 43.6k | } |
494 | 495k | _ => {} |
495 | | } |
496 | 14.6M | } |
497 | | } |
498 | 15.1M | for (i, &operand) in self.func.inst_operands(inst).iter().enumerate() { |
499 | 15.1M | if operand.as_fixed_nonallocatable().is_some() { |
500 | 79.5k | continue; |
501 | 15.0M | } |
502 | 15.0M | if let OperandConstraint::FixedReg(preg) = operand.constraint() { |
503 | 459k | match (operand.pos(), operand.kind()) { |
504 | | (OperandPos::Early, OperandKind::Use) |
505 | 388k | if live.get(operand.vreg().vreg()) => |
506 | | { |
507 | | // If we have a use constraint at the |
508 | | // Early point for a fixed preg, and |
509 | | // this preg is also constrained with |
510 | | // a *separate* def at Late or is |
511 | | // clobbered, and *if* the vreg is |
512 | | // live downward, we have to use the |
513 | | // multi-fixed-reg mechanism for a |
514 | | // fixup and rewrite here without the |
515 | | // constraint. See #53. |
516 | | // |
517 | | // We adjust the def liverange and Use |
518 | | // to an "early" position to reserve |
519 | | // the register, it still must not be |
520 | | // used by some other vreg at the |
521 | | // use-site. |
522 | | // |
523 | | // Note that we handle multiple |
524 | | // conflicting constraints for the |
525 | | // same vreg in a separate pass (see |
526 | | // `fixup_multi_fixed_vregs` below). |
527 | 291k | if late_def_fixed.contains(&preg) |
528 | 288k | || self.func.inst_clobbers(inst).contains(preg) |
529 | | { |
530 | 19.7k | trace!( |
531 | 0 | concat!( |
532 | | "-> operand {:?} is fixed to preg {:?}, ", |
533 | | "is downward live, and there is also a ", |
534 | | "def or clobber at this preg" |
535 | | ), |
536 | | operand, |
537 | | preg |
538 | | ); |
539 | 19.7k | let pos = ProgPoint::before(inst); |
540 | 19.7k | self.multi_fixed_reg_fixups.push(MultiFixedRegFixup { |
541 | 19.7k | pos, |
542 | 19.7k | from_slot: slot_idx(i)?, |
543 | 19.7k | to_slot: slot_idx(i)?, |
544 | 19.7k | to_preg: PRegIndex::new(preg.index()), |
545 | 19.7k | vreg: VRegIndex::new(operand.vreg().vreg()), |
546 | 19.7k | level: FixedRegFixupLevel::Initial, |
547 | | }); |
548 | | |
549 | | // We need to insert a reservation |
550 | | // at the before-point to reserve |
551 | | // the reg for the use too. |
552 | 19.7k | let range = CodeRange::singleton(pos); |
553 | 19.7k | self.add_liverange_to_preg(range, preg); |
554 | | |
555 | | // Remove the fixed-preg |
556 | | // constraint from the Use. |
557 | 19.7k | operand_rewrites.insert( |
558 | 19.7k | i, |
559 | 19.7k | Operand::new( |
560 | 19.7k | operand.vreg(), |
561 | 19.7k | OperandConstraint::Any, |
562 | 19.7k | operand.kind(), |
563 | 19.7k | operand.pos(), |
564 | | ), |
565 | | ); |
566 | 271k | } |
567 | | } |
568 | 168k | _ => {} |
569 | | } |
570 | 14.6M | } |
571 | | } |
572 | | |
573 | | // Process defs and uses. |
574 | 12.8M | for &cur_pos in &[InstPosition::After, InstPosition::Before] { |
575 | 30.2M | for i in 0..self.func.inst_operands(inst).len() { |
576 | | // don't borrow `self` |
577 | 30.2M | let operand = operand_rewrites |
578 | 30.2M | .get(&i) |
579 | 30.2M | .cloned() |
580 | 30.2M | .unwrap_or(self.func.inst_operands(inst)[i]); |
581 | 30.2M | let pos = match (operand.kind(), operand.pos()) { |
582 | 5.22M | (OperandKind::Def, OperandPos::Early) => ProgPoint::before(inst), |
583 | 9.36M | (OperandKind::Def, OperandPos::Late) => ProgPoint::after(inst), |
584 | 0 | (OperandKind::Use, OperandPos::Late) => ProgPoint::after(inst), |
585 | | // If there are any reused inputs in this |
586 | | // instruction, and this is *not* the |
587 | | // reused vreg, force `pos` to |
588 | | // `After`. This ensures that we correctly |
589 | | // account for the interference between |
590 | | // the other inputs and the |
591 | | // input-that-is-reused/output. |
592 | | (OperandKind::Use, OperandPos::Early) |
593 | 15.6M | if reused_input.is_some() |
594 | 4.88M | && reused_input.unwrap() != operand.vreg() => |
595 | | { |
596 | 1.94M | ProgPoint::after(inst) |
597 | | } |
598 | 13.7M | (OperandKind::Use, OperandPos::Early) => ProgPoint::before(inst), |
599 | | }; |
600 | | |
601 | 30.2M | if pos.pos() != cur_pos { |
602 | 15.1M | continue; |
603 | 15.1M | } |
604 | | |
605 | 15.1M | trace!( |
606 | 0 | "processing inst{} operand at {:?}: {:?}", |
607 | 0 | inst.index(), |
608 | | pos, |
609 | | operand |
610 | | ); |
611 | | |
612 | | // If this is a "fixed non-allocatable |
613 | | // register" operand, set the alloc |
614 | | // immediately and then ignore the operand |
615 | | // hereafter. |
616 | 15.1M | if let Some(preg) = operand.as_fixed_nonallocatable() { |
617 | 79.5k | self.set_alloc(inst, i, Allocation::reg(preg)); |
618 | 79.5k | continue; |
619 | 15.0M | } |
620 | | |
621 | 15.0M | match operand.kind() { |
622 | | OperandKind::Def => { |
623 | 7.29M | trace!("Def of {} at {:?}", operand.vreg(), pos); |
624 | | |
625 | | // Get or create the LiveRange. |
626 | 7.29M | let mut lr = vreg_ranges[operand.vreg().vreg()]; |
627 | 7.29M | trace!(" -> has existing LR {:?}", lr); |
628 | | // If there was no liverange (dead def), create a trivial one. |
629 | 7.29M | if !live.get(operand.vreg().vreg()) { |
630 | 6.04M | let from = pos; |
631 | | // We want to we want to span |
632 | | // until Before of the next |
633 | | // inst. This ensures that early |
634 | | // defs used for temps on an |
635 | | // instruction are reserved across |
636 | | // the whole instruction. |
637 | 6.04M | let to = ProgPoint::before(pos.inst().next()); |
638 | 6.04M | lr = self.add_liverange_to_vreg( |
639 | 6.04M | VRegIndex::new(operand.vreg().vreg()), |
640 | 6.04M | CodeRange { from, to }, |
641 | 6.04M | ); |
642 | 6.04M | trace!(" -> invalid; created {:?}", lr); |
643 | 6.04M | vreg_ranges[operand.vreg().vreg()] = lr; |
644 | 6.04M | live.set(operand.vreg().vreg(), true); |
645 | 1.25M | } |
646 | | // Create the use in the LiveRange. |
647 | 7.29M | self.insert_use_into_liverange( |
648 | 7.29M | lr, |
649 | 7.29M | Use::new(operand, pos, slot_idx(i)?), |
650 | | ); |
651 | | // If def (not mod), this reg is now dead, |
652 | | // scanning backward; make it so. |
653 | 7.29M | if operand.kind() == OperandKind::Def { |
654 | | // Trim the range for this vreg to start |
655 | | // at `pos` if it previously ended at the |
656 | | // start of this block (i.e. was not |
657 | | // merged into some larger LiveRange due |
658 | | // to out-of-order blocks). |
659 | 7.29M | if self.ranges[lr].range.from |
660 | 7.29M | == self.cfginfo.block_entry[block.index()] |
661 | | { |
662 | 1.47M | trace!(" -> started at block start; trimming to {:?}", pos); |
663 | 1.47M | self.ranges[lr].range.from = pos; |
664 | 5.82M | } |
665 | | |
666 | 7.29M | self.ranges[lr].set_flag(LiveRangeFlag::StartsAtDef); |
667 | | |
668 | | // Remove from live-set. |
669 | 7.29M | live.set(operand.vreg().vreg(), false); |
670 | 7.29M | vreg_ranges[operand.vreg().vreg()] = LiveRangeIndex::invalid(); |
671 | 0 | } |
672 | | } |
673 | | OperandKind::Use => { |
674 | | // Create/extend the LiveRange if it |
675 | | // doesn't already exist, and add the use |
676 | | // to the range. |
677 | 7.76M | let mut lr = vreg_ranges[operand.vreg().vreg()]; |
678 | 7.76M | if !live.get(operand.vreg().vreg()) { |
679 | 1.22M | let range = CodeRange { |
680 | 1.22M | from: self.cfginfo.block_entry[block.index()], |
681 | 1.22M | to: pos.next(), |
682 | 1.22M | }; |
683 | 1.22M | lr = self.add_liverange_to_vreg( |
684 | 1.22M | VRegIndex::new(operand.vreg().vreg()), |
685 | 1.22M | range, |
686 | 1.22M | ); |
687 | 1.22M | vreg_ranges[operand.vreg().vreg()] = lr; |
688 | 6.54M | } |
689 | 7.76M | debug_assert!(lr.is_valid()); |
690 | | |
691 | 7.76M | trace!("Use of {:?} at {:?} -> {:?}", operand, pos, lr,); |
692 | | |
693 | 7.76M | self.insert_use_into_liverange( |
694 | 7.76M | lr, |
695 | 7.76M | Use::new(operand, pos, slot_idx(i)?), |
696 | | ); |
697 | | |
698 | | // Add to live-set. |
699 | 7.76M | live.set(operand.vreg().vreg(), true); |
700 | | } |
701 | | } |
702 | | } |
703 | | } |
704 | | } |
705 | | |
706 | | // Block parameters define vregs at the very beginning of |
707 | | // the block. Remove their live vregs from the live set |
708 | | // here. |
709 | 462k | for vreg in self.func.block_params(block) { |
710 | 170k | if live.get(vreg.vreg()) { |
711 | 112k | live.set(vreg.vreg(), false); |
712 | 112k | } else { |
713 | 58.5k | // Create trivial liverange if blockparam is dead. |
714 | 58.5k | let start = self.cfginfo.block_entry[block.index()]; |
715 | 58.5k | self.add_liverange_to_vreg( |
716 | 58.5k | VRegIndex::new(vreg.vreg()), |
717 | 58.5k | CodeRange { |
718 | 58.5k | from: start, |
719 | 58.5k | to: start.next(), |
720 | 58.5k | }, |
721 | 58.5k | ); |
722 | 58.5k | } |
723 | | // add `blockparam_ins` entries. |
724 | 170k | let vreg_idx = VRegIndex::new(vreg.vreg()); |
725 | 218k | for &pred in self.func.block_preds(block) { |
726 | 218k | self.blockparam_ins.push(BlockparamIn { |
727 | 218k | to_vreg: vreg_idx, |
728 | 218k | to_block: block, |
729 | 218k | from_block: pred, |
730 | 218k | }); |
731 | 218k | } |
732 | | } |
733 | | } |
734 | | |
735 | | // Make ranges in each vreg and uses in each range appear in |
736 | | // sorted order. We built them in reverse order above, so this |
737 | | // is a simple reversal, *not* a full sort. |
738 | | // |
739 | | // The ordering invariant is always maintained for uses and |
740 | | // always for ranges in bundles (which are initialized later), |
741 | | // but not always for ranges in vregs; those are sorted only |
742 | | // when needed, here and then again at the end of allocation |
743 | | // when resolving moves. |
744 | | |
745 | 7.47M | for vreg in &mut self.ctx.vregs { |
746 | 7.46M | vreg.ranges.reverse(); |
747 | 7.46M | let mut last = None; |
748 | 17.6M | for entry in &mut vreg.ranges { |
749 | | // Ranges may have been truncated above at defs. We |
750 | | // need to update with the final range here. |
751 | 10.1M | entry.range = self.ctx.ranges[entry.index].range; |
752 | | // Assert in-order and non-overlapping. |
753 | 10.1M | debug_assert!(last.is_none() || last.unwrap() <= entry.range.from); |
754 | 10.1M | last = Some(entry.range.to); |
755 | | } |
756 | | } |
757 | | |
758 | 10.1M | for range in &mut self.ranges { |
759 | 10.1M | range.uses.reverse(); |
760 | 10.1M | debug_assert!(range.uses.windows(2).all(|win| win[0].pos <= win[1].pos)); |
761 | | } |
762 | | |
763 | 2.91M | self.blockparam_ins.sort_unstable_by_key(|x| x.key()); |
764 | 2.98M | self.blockparam_outs.sort_unstable_by_key(|x| x.key()); |
765 | | |
766 | 10.5k | self.output.stats.initial_liverange_count = self.ranges.len(); |
767 | 10.5k | self.output.stats.blockparam_ins_count = self.blockparam_ins.len(); |
768 | 10.5k | self.output.stats.blockparam_outs_count = self.blockparam_outs.len(); |
769 | 10.5k | self.ctx.scratch_vreg_ranges = vreg_ranges; |
770 | 10.5k | self.ctx.scratch_operand_rewrites = operand_rewrites; |
771 | | |
772 | 10.5k | Ok(()) |
773 | 10.5k | } |
774 | | |
775 | 10.5k | pub fn fixup_multi_fixed_vregs(&mut self) { |
776 | | // Do a fixed-reg cleanup pass: if there are any LiveRanges with |
777 | | // multiple uses at the same ProgPoint and there is |
778 | | // more than one FixedReg constraint at that ProgPoint, we |
779 | | // need to record all but one of them in a special fixup list |
780 | | // and handle them later; otherwise, bundle-splitting to |
781 | | // create minimal bundles becomes much more complex (we would |
782 | | // have to split the multiple uses at the same progpoint into |
783 | | // different bundles, which breaks invariants related to |
784 | | // disjoint ranges and bundles). |
785 | 10.5k | let mut extra_clobbers: SmallVec<[(PReg, ProgPoint); 8]> = smallvec![]; |
786 | 7.46M | for vreg in 0..self.vregs.len() { |
787 | 7.46M | let vreg = VRegIndex::new(vreg); |
788 | 10.1M | for range_idx in 0..self.vregs[vreg].ranges.len() { |
789 | 10.1M | let entry = self.vregs[vreg].ranges[range_idx]; |
790 | 10.1M | let range = entry.index; |
791 | 10.1M | trace!("multi-fixed-reg cleanup: vreg {:?} range {:?}", vreg, range,); |
792 | | |
793 | | // Find groups of uses that occur in at the same program point. |
794 | 11.2M | for uses in self.ctx.ranges[range] |
795 | 10.1M | .uses |
796 | 10.1M | .chunk_by_mut(|a, b| a.pos == b.pos) |
797 | | { |
798 | 11.2M | if uses.len() < 2 { |
799 | 9.76M | continue; |
800 | 1.48M | } |
801 | | |
802 | | // Search for conflicting constraints in the uses. |
803 | 1.48M | let mut requires_reg = false; |
804 | 1.48M | let mut num_fixed_reg = 0; |
805 | 1.48M | let mut num_fixed_stack = 0; |
806 | 1.48M | let mut first_reg_slot = None; |
807 | 1.48M | let mut first_stack_slot = None; |
808 | 1.48M | let mut min_limit = usize::MAX; |
809 | 1.48M | let mut max_fixed_reg = usize::MIN; |
810 | 5.30M | for u in uses.iter() { |
811 | 5.30M | match u.operand.constraint() { |
812 | 2.39M | OperandConstraint::Any => { |
813 | 2.39M | first_reg_slot.get_or_insert(u.slot); |
814 | 2.39M | first_stack_slot.get_or_insert(u.slot); |
815 | 2.39M | } |
816 | 2.71M | OperandConstraint::Reg | OperandConstraint::Reuse(_) => { |
817 | 2.71M | first_reg_slot.get_or_insert(u.slot); |
818 | 2.71M | requires_reg = true; |
819 | 2.71M | } |
820 | 0 | OperandConstraint::Limit(max) => { |
821 | 0 | first_reg_slot.get_or_insert(u.slot); |
822 | 0 | min_limit = min_limit.min(max); |
823 | 0 | requires_reg = true; |
824 | 0 | } |
825 | 197k | OperandConstraint::FixedReg(preg) => { |
826 | 197k | max_fixed_reg = max_fixed_reg.max(preg.hw_enc()); |
827 | 197k | if self.ctx.pregs[preg.index()].is_stack { |
828 | 89.3k | num_fixed_stack += 1; |
829 | 89.3k | first_stack_slot.get_or_insert(u.slot); |
830 | 108k | } else { |
831 | 108k | requires_reg = true; |
832 | 108k | num_fixed_reg += 1; |
833 | 108k | first_reg_slot.get_or_insert(u.slot); |
834 | 108k | } |
835 | | } |
836 | | // Maybe this could be supported in this future... |
837 | 0 | OperandConstraint::Stack => panic!( |
838 | | "multiple uses of vreg with a Stack constraint are not supported" |
839 | | ), |
840 | | } |
841 | | } |
842 | | |
843 | | // Fast path if there are no conflicts. |
844 | 1.48M | if num_fixed_reg + num_fixed_stack <= 1 |
845 | 1.45M | && !(requires_reg && num_fixed_stack != 0) |
846 | 1.43M | && max_fixed_reg < min_limit |
847 | | { |
848 | 1.43M | continue; |
849 | 52.0k | } |
850 | | |
851 | | // We pick one constraint (in order: FixedReg, Reg, FixedStack) |
852 | | // and then rewrite any incompatible constraints to Any. |
853 | | // This allows register allocation to succeed and we will |
854 | | // later insert moves to satisfy the rewritten constraints. |
855 | 52.0k | let source_slot = if requires_reg { |
856 | 43.6k | first_reg_slot.unwrap() |
857 | | } else { |
858 | 8.42k | first_stack_slot.unwrap() |
859 | | }; |
860 | 52.0k | let mut first_preg = None; |
861 | 189k | for u in uses.iter_mut() { |
862 | 189k | if let OperandConstraint::FixedReg(preg) = u.operand.constraint() { |
863 | 91.4k | let vreg_idx = VRegIndex::new(u.operand.vreg().vreg()); |
864 | 91.4k | let preg_idx = PRegIndex::new(preg.index()); |
865 | 91.4k | trace!( |
866 | 0 | "at pos {:?}, vreg {:?} has fixed constraint to preg {:?}", |
867 | | u.pos, |
868 | | vreg_idx, |
869 | | preg_idx |
870 | | ); |
871 | | |
872 | | // FixedStack is incompatible if there are any |
873 | | // Reg/FixedReg constraints. FixedReg is |
874 | | // incompatible if there already is a different |
875 | | // FixedReg constraint. If either condition is true, |
876 | | // we edit the constraint below; otherwise, we can |
877 | | // skip this edit. |
878 | 91.4k | if !(requires_reg && self.ctx.pregs[preg.index()].is_stack) |
879 | 48.3k | && *first_preg.get_or_insert(preg) == preg |
880 | 30.3k | && preg.hw_enc() < min_limit |
881 | | { |
882 | 30.3k | continue; |
883 | 61.1k | } |
884 | | |
885 | 61.1k | trace!(" -> duplicate; switching to constraint Any"); |
886 | 61.1k | self.ctx.multi_fixed_reg_fixups.push(MultiFixedRegFixup { |
887 | 61.1k | pos: u.pos, |
888 | 61.1k | from_slot: source_slot, |
889 | 61.1k | to_slot: u.slot, |
890 | 61.1k | to_preg: preg_idx, |
891 | 61.1k | vreg: vreg_idx, |
892 | 61.1k | level: FixedRegFixupLevel::Secondary, |
893 | 61.1k | }); |
894 | 61.1k | u.operand = Operand::new( |
895 | 61.1k | u.operand.vreg(), |
896 | 61.1k | OperandConstraint::Any, |
897 | 61.1k | u.operand.kind(), |
898 | 61.1k | u.operand.pos(), |
899 | 61.1k | ); |
900 | 61.1k | trace!(" -> extra clobber {} at inst{}", preg, u.pos.inst().index()); |
901 | 61.1k | extra_clobbers.push((preg, u.pos)); |
902 | 97.5k | } |
903 | | } |
904 | | } |
905 | | |
906 | 10.1M | for (clobber, pos) in extra_clobbers.drain(..) { |
907 | 61.1k | let range = CodeRange { |
908 | 61.1k | from: pos, |
909 | 61.1k | to: pos.next(), |
910 | 61.1k | }; |
911 | 61.1k | self.add_liverange_to_preg(range, clobber); |
912 | 61.1k | } |
913 | | } |
914 | | } |
915 | 10.5k | } |
916 | | } |