/src/regalloc2/src/ion/moves.rs
Line | Count | Source |
1 | | /* |
2 | | * This file was initially derived from the files |
3 | | * `js/src/jit/BacktrackingAllocator.h` and |
4 | | * `js/src/jit/BacktrackingAllocator.cpp` in Mozilla Firefox, and was |
5 | | * originally licensed under the Mozilla Public License 2.0. We |
6 | | * subsequently relicensed it to Apache-2.0 WITH LLVM-exception (see |
7 | | * https://github.com/bytecodealliance/regalloc2/issues/7). |
8 | | * |
9 | | * Since the initial port, the design has been substantially evolved |
10 | | * and optimized. |
11 | | */ |
12 | | |
13 | | //! Move resolution. |
14 | | |
15 | | use alloc::vec; |
16 | | |
17 | | use super::{ |
18 | | Env, InsertMovePrio, InsertedMove, InsertedMoves, LiveRangeFlag, LiveRangeIndex, |
19 | | RedundantMoveEliminator, VRegIndex, |
20 | | }; |
21 | | use crate::ion::data_structures::{ |
22 | | u64_key, BlockparamIn, BlockparamOut, CodeRange, Edits, FixedRegFixupLevel, LiveRangeKey, |
23 | | LiveRangeListEntry, |
24 | | }; |
25 | | use crate::ion::reg_traversal::RegTraversalIter; |
26 | | use crate::moves::{MoveAndScratchResolver, ParallelMoves}; |
27 | | use crate::{ |
28 | | Allocation, Block, Edit, Function, FxHashMap, Inst, InstPosition, OperandConstraint, |
29 | | OperandKind, OperandPos, PReg, ProgPoint, RegClass, SpillSlot, |
30 | | }; |
31 | | use alloc::format; |
32 | | use alloc::vec::Vec; |
33 | | use hashbrown::hash_map::Entry; |
34 | | use smallvec::{smallvec, SmallVec}; |
35 | | |
36 | | impl<'a, F: Function> Env<'a, F> { |
37 | 863k | pub fn is_start_of_block(&self, pos: ProgPoint) -> bool { |
38 | 863k | let block = self.ctx.cfginfo.insn_block[pos.inst().index()]; |
39 | 863k | pos == self.ctx.cfginfo.block_entry[block.index()] |
40 | 863k | } |
41 | | pub fn is_end_of_block(&self, pos: ProgPoint) -> bool { |
42 | | let block = self.ctx.cfginfo.insn_block[pos.inst().index()]; |
43 | | pos == self.ctx.cfginfo.block_exit[block.index()] |
44 | | } |
45 | | |
46 | 6.02M | pub fn get_alloc(&self, inst: Inst, slot: usize) -> Allocation { |
47 | 6.02M | let inst_allocs = |
48 | 6.02M | &self.ctx.output.allocs[self.ctx.output.inst_alloc_offsets[inst.index()] as usize..]; |
49 | 6.02M | inst_allocs[slot] |
50 | 6.02M | } |
51 | | |
52 | 16.0M | pub fn set_alloc(&mut self, inst: Inst, slot: usize, alloc: Allocation) { |
53 | 16.0M | let inst_allocs = &mut self.ctx.output.allocs |
54 | 16.0M | [self.ctx.output.inst_alloc_offsets[inst.index()] as usize..]; |
55 | 16.0M | inst_allocs[slot] = alloc; |
56 | 16.0M | } |
57 | | |
58 | 15.6M | pub fn get_alloc_for_range(&self, range: LiveRangeIndex) -> Allocation { |
59 | 15.6M | trace!("get_alloc_for_range: {:?}", range); |
60 | 15.6M | let bundle = self.ctx.ranges[range].bundle; |
61 | 15.6M | trace!(" -> bundle: {:?}", bundle); |
62 | 15.6M | let bundledata = &self.ctx.bundles[bundle]; |
63 | 15.6M | trace!(" -> allocation {:?}", bundledata.allocation); |
64 | 15.6M | if bundledata.allocation != Allocation::none() { |
65 | 11.7M | bundledata.allocation |
66 | | } else { |
67 | 3.91M | trace!(" -> spillset {:?}", bundledata.spillset); |
68 | 3.91M | trace!( |
69 | 0 | " -> spill slot {:?}", |
70 | 0 | self.ctx.spillsets[bundledata.spillset].slot |
71 | | ); |
72 | 3.91M | self.ctx.spillslots[self.ctx.spillsets[bundledata.spillset].slot.index()].alloc |
73 | | } |
74 | 15.6M | } |
75 | | |
76 | 10.8k | pub fn apply_allocations_and_insert_moves(&mut self) -> InsertedMoves { |
77 | 10.8k | trace!("apply_allocations_and_insert_moves"); |
78 | 10.8k | trace!("blockparam_ins: {:?}", self.blockparam_ins); |
79 | 10.8k | trace!("blockparam_outs: {:?}", self.blockparam_outs); |
80 | | |
81 | 10.8k | let mut inserted_moves = InsertedMoves::default(); |
82 | | |
83 | | // Now that all splits are done, we can pay the cost once to |
84 | | // sort VReg range lists and update with the final ranges. |
85 | 7.71M | for vreg in &mut self.ctx.vregs { |
86 | 19.4M | for entry in &mut vreg.ranges { |
87 | 11.7M | entry.range = self.ctx.ranges[entry.index].range; |
88 | 11.7M | } |
89 | 7.70M | vreg.ranges.sort_unstable_by_key(|entry| entry.range.from); |
90 | | } |
91 | | |
92 | | /// Buffered information about the previous liverange that was processed. |
93 | | struct PrevBuffer { |
94 | | prev: Option<LiveRangeListEntry>, |
95 | | prev_ins_idx: usize, |
96 | | buffered: Option<LiveRangeListEntry>, |
97 | | buffered_ins_idx: usize, |
98 | | } |
99 | | |
100 | | impl PrevBuffer { |
101 | 7.70M | fn new(prev_ins_idx: usize) -> Self { |
102 | 7.70M | Self { |
103 | 7.70M | prev: None, |
104 | 7.70M | prev_ins_idx, |
105 | 7.70M | buffered: None, |
106 | 7.70M | buffered_ins_idx: prev_ins_idx, |
107 | 7.70M | } |
108 | 7.70M | } |
109 | | |
110 | | /// Returns the previous `LiveRangeListEntry` when it's present. |
111 | | #[inline(always)] |
112 | 11.7M | fn is_valid(&self) -> Option<LiveRangeListEntry> { |
113 | 11.7M | self.prev |
114 | 11.7M | } |
115 | | |
116 | | /// Fetch the current index into the `Env::blockparam_ins` vector. |
117 | | #[inline(always)] |
118 | 20.8M | fn blockparam_ins_idx(&self) -> usize { |
119 | 20.8M | self.prev_ins_idx |
120 | 20.8M | } |
121 | | |
122 | | /// Record this index as the next index to use when the previous |
123 | | /// liverange buffer advances. |
124 | | #[inline(always)] |
125 | 13.1M | fn update_blockparam_ins_idx(&mut self, idx: usize) { |
126 | 13.1M | self.buffered_ins_idx = idx; |
127 | 13.1M | } |
128 | | |
129 | | /// As overlapping liveranges might start at the same program point, we buffer the |
130 | | /// previous liverange used when determining where to take the last value from for |
131 | | /// intra-block moves. The liveranges we process are buffered until we encounter one |
132 | | /// that starts at a later program point, indicating that it's now safe to advance the |
133 | | /// previous LR buffer. We accumulate the longest-lived liverange in the buffer as a |
134 | | /// heuristic for finding the most stable source of a value. |
135 | | /// |
136 | | /// We also buffer the index into the `Env::blockparam_ins` vector, as we may see |
137 | | /// multiple uses of a blockparam within a single instruction, and as such may need to |
138 | | /// generate multiple blockparam move destinations by re-traversing that section of the |
139 | | /// vector. |
140 | | #[inline(always)] |
141 | 11.7M | fn advance(&mut self, current: LiveRangeListEntry) { |
142 | | // Advance the `prev` pointer to the `next` pointer, as long as the `next` pointer |
143 | | // does not start at the same time as the current LR we're processing. |
144 | 11.7M | if self |
145 | 11.7M | .buffered |
146 | 11.7M | .map(|entry| entry.range.from < current.range.from) |
147 | 11.7M | .unwrap_or(false) |
148 | 3.86M | { |
149 | 3.86M | self.prev = self.buffered; |
150 | 3.86M | self.prev_ins_idx = self.buffered_ins_idx; |
151 | 7.83M | } |
152 | | |
153 | | // Advance the `next` pointer to the currently processed LR, as long as it ends |
154 | | // later than the current `next`. |
155 | 11.7M | if self |
156 | 11.7M | .buffered |
157 | 11.7M | .map(|entry| entry.range.to < current.range.to) |
158 | 11.7M | .unwrap_or(true) |
159 | 11.2M | { |
160 | 11.2M | self.buffered = Some(current); |
161 | 11.2M | } |
162 | 11.7M | } |
163 | | } |
164 | | |
165 | | // Determine the ProgPoint where moves on this (from, to) |
166 | | // edge should go: |
167 | | // - If there is more than one in-edge to `to`, then |
168 | | // `from` must have only one out-edge; moves go at tail of |
169 | | // `from` just before last Branch/Ret. |
170 | | // - Otherwise, there must be at most one in-edge to `to`, |
171 | | // and moves go at start of `to`. |
172 | | #[inline(always)] |
173 | 4.30M | fn choose_move_location<'a, F: Function>( |
174 | 4.30M | env: &Env<'a, F>, |
175 | 4.30M | from: Block, |
176 | 4.30M | to: Block, |
177 | 4.30M | ) -> (ProgPoint, InsertMovePrio) { |
178 | 4.30M | let from_last_insn = env.func.block_insns(from).last(); |
179 | 4.30M | let to_first_insn = env.func.block_insns(to).first(); |
180 | 4.30M | let from_is_ret = env.func.is_ret(from_last_insn); |
181 | 4.30M | let to_is_entry = env.func.entry_block() == to; |
182 | 4.30M | let from_outs = env.func.block_succs(from).len() + if from_is_ret { 1 } else { 0 }; |
183 | 4.30M | let to_ins = env.func.block_preds(to).len() + if to_is_entry { 1 } else { 0 }; |
184 | | |
185 | 4.30M | if to_ins > 1 && from_outs <= 1 { |
186 | 1.31M | ( |
187 | 1.31M | // N.B.: though semantically the edge moves happen |
188 | 1.31M | // after the branch, we must insert them before |
189 | 1.31M | // the branch because otherwise, of course, they |
190 | 1.31M | // would never execute. This is correct even in |
191 | 1.31M | // the presence of branches that read register |
192 | 1.31M | // inputs (e.g. conditional branches on some RISCs |
193 | 1.31M | // that branch on reg zero/not-zero, or any |
194 | 1.31M | // indirect branch), but for a very subtle reason: |
195 | 1.31M | // all cases of such branches will (or should) |
196 | 1.31M | // have multiple successors, and thus due to |
197 | 1.31M | // critical-edge splitting, their successors will |
198 | 1.31M | // have only the single predecessor, and we prefer |
199 | 1.31M | // to insert at the head of the successor in that |
200 | 1.31M | // case (rather than here). We make this a |
201 | 1.31M | // requirement, in fact: the user of this library |
202 | 1.31M | // shall not read registers in a branch |
203 | 1.31M | // instruction of there is only one successor per |
204 | 1.31M | // the given CFG information. |
205 | 1.31M | ProgPoint::before(from_last_insn), |
206 | 1.31M | InsertMovePrio::OutEdgeMoves, |
207 | 1.31M | ) |
208 | 2.99M | } else if to_ins <= 1 { |
209 | 2.99M | ( |
210 | 2.99M | ProgPoint::before(to_first_insn), |
211 | 2.99M | InsertMovePrio::InEdgeMoves, |
212 | 2.99M | ) |
213 | | } else { |
214 | 0 | panic!( |
215 | 0 | "Critical edge: can't insert moves between blocks {:?} and {:?}", |
216 | | from, to |
217 | | ); |
218 | | } |
219 | 4.30M | } |
220 | | |
221 | | #[derive(PartialEq)] |
222 | | struct InterBlockDest { |
223 | | to: Block, |
224 | | from: Block, |
225 | | alloc: Allocation, |
226 | | } |
227 | | |
228 | | impl InterBlockDest { |
229 | 24.0M | fn key(&self) -> u64 { |
230 | 24.0M | u64_key(self.from.raw_u32(), self.to.raw_u32()) |
231 | 24.0M | } |
232 | | } |
233 | | |
234 | 10.8k | let mut inter_block_sources: FxHashMap<Block, Allocation> = FxHashMap::default(); |
235 | 10.8k | let mut inter_block_dests = Vec::with_capacity(self.func.num_blocks()); |
236 | | |
237 | | #[derive(Hash, Eq, PartialEq)] |
238 | | struct BlockparamSourceKey { |
239 | | bits: u64, |
240 | | } |
241 | | |
242 | | impl BlockparamSourceKey { |
243 | 469k | fn new(from_block: Block, to_vreg: VRegIndex) -> Self { |
244 | 469k | BlockparamSourceKey { |
245 | 469k | bits: u64_key(from_block.raw_u32(), to_vreg.raw_u32()), |
246 | 469k | } |
247 | 469k | } |
248 | | } |
249 | | |
250 | | struct BlockparamDest { |
251 | | from_block: Block, |
252 | | to_block: Block, |
253 | | to_vreg: VRegIndex, |
254 | | alloc: Allocation, |
255 | | } |
256 | | |
257 | | impl BlockparamDest { |
258 | | fn key(&self) -> u64 { |
259 | | u64_key(self.to_block.raw_u32(), self.from_block.raw_u32()) |
260 | | } |
261 | | |
262 | 246k | fn source(&self) -> BlockparamSourceKey { |
263 | 246k | BlockparamSourceKey::new(self.from_block, self.to_vreg) |
264 | 246k | } |
265 | | } |
266 | | |
267 | 10.8k | let mut block_param_sources = |
268 | 10.8k | FxHashMap::<BlockparamSourceKey, Allocation>::with_capacity_and_hasher( |
269 | 10.8k | 3 * self.func.num_insts(), |
270 | 10.8k | Default::default(), |
271 | | ); |
272 | 10.8k | let mut block_param_dests = Vec::with_capacity(3 * self.func.num_insts()); |
273 | | |
274 | 10.8k | let debug_labels = self.func.debug_value_labels(); |
275 | | |
276 | 10.8k | let mut reuse_input_insts = Vec::with_capacity(self.func.num_insts() / 2); |
277 | | |
278 | 10.8k | let mut blockparam_in_idx = 0; |
279 | 10.8k | let mut blockparam_out_idx = 0; |
280 | 7.70M | for vreg in 0..self.vregs.len() { |
281 | 7.70M | let vreg = VRegIndex::new(vreg); |
282 | 7.70M | if !self.is_vreg_used(vreg) { |
283 | 0 | continue; |
284 | 7.70M | } |
285 | | |
286 | 7.70M | inter_block_sources.clear(); |
287 | | |
288 | | // For each range in each vreg, insert moves or |
289 | | // half-moves. We also scan over `blockparam_ins` and |
290 | | // `blockparam_outs`, which are sorted by (block, vreg), |
291 | | // to fill in allocations. |
292 | 7.70M | let mut prev = PrevBuffer::new(blockparam_in_idx); |
293 | 11.7M | for range_idx in 0..self.vregs[vreg].ranges.len() { |
294 | 11.7M | let entry = self.vregs[vreg].ranges[range_idx]; |
295 | 11.7M | let alloc = self.get_alloc_for_range(entry.index); |
296 | 11.7M | let range = entry.range; |
297 | 11.7M | trace!( |
298 | 0 | "apply_allocations: vreg {:?} LR {:?} with range {:?} has alloc {:?}", |
299 | | vreg, |
300 | | entry.index, |
301 | | range, |
302 | | alloc, |
303 | | ); |
304 | 11.7M | debug_assert!(alloc != Allocation::none()); |
305 | | |
306 | 11.7M | if self.annotations_enabled { |
307 | 1.87M | self.annotate( |
308 | 1.87M | range.from, |
309 | 1.87M | format!( |
310 | 1.87M | " <<< start v{} in {} (range{}) (bundle{})", |
311 | 1.87M | vreg.index(), |
312 | 1.87M | alloc, |
313 | 1.87M | entry.index.index(), |
314 | 1.87M | self.ranges[entry.index].bundle.raw_u32(), |
315 | 1.87M | ), |
316 | 1.87M | ); |
317 | 1.87M | self.annotate( |
318 | 1.87M | range.to, |
319 | 1.87M | format!( |
320 | 1.87M | " end v{} in {} (range{}) (bundle{}) >>>", |
321 | 1.87M | vreg.index(), |
322 | 1.87M | alloc, |
323 | 1.87M | entry.index.index(), |
324 | 1.87M | self.ranges[entry.index].bundle.raw_u32(), |
325 | 1.87M | ), |
326 | 1.87M | ); |
327 | 9.82M | } |
328 | | |
329 | 11.7M | prev.advance(entry); |
330 | | |
331 | | // Does this range follow immediately after a prior |
332 | | // range in the same block? If so, insert a move (if |
333 | | // the allocs differ). We do this directly rather than |
334 | | // with half-moves because we eagerly know both sides |
335 | | // already (and also, half-moves are specific to |
336 | | // inter-block transfers). |
337 | | // |
338 | | // Note that we do *not* do this if there is also a |
339 | | // def as the first use in the new range: it's |
340 | | // possible that an old liverange covers the Before |
341 | | // pos of an inst, a new liverange covers the After |
342 | | // pos, and the def also happens at After. In this |
343 | | // case we don't want to an insert a move after the |
344 | | // instruction copying the old liverange. |
345 | | // |
346 | | // Note also that we assert that the new range has to |
347 | | // start at the Before-point of an instruction; we |
348 | | // can't insert a move that logically happens just |
349 | | // before After (i.e. in the middle of a single |
350 | | // instruction). |
351 | 11.7M | if let Some(prev) = prev.is_valid() { |
352 | 3.98M | let prev_alloc = self.get_alloc_for_range(prev.index); |
353 | 3.98M | debug_assert!(prev_alloc != Allocation::none()); |
354 | | |
355 | 3.98M | if prev.range.to >= range.from |
356 | 1.22M | && (prev.range.to > range.from || !self.is_start_of_block(range.from)) |
357 | 1.16M | && !self.ranges[entry.index].has_flag(LiveRangeFlag::StartsAtDef) |
358 | | { |
359 | 1.16M | trace!( |
360 | 0 | "prev LR {} abuts LR {} in same block; moving {} -> {} for v{}", |
361 | 0 | prev.index.index(), |
362 | 0 | entry.index.index(), |
363 | | prev_alloc, |
364 | | alloc, |
365 | 0 | vreg.index() |
366 | | ); |
367 | 1.16M | debug_assert_eq!(range.from.pos(), InstPosition::Before); |
368 | 1.16M | inserted_moves.push( |
369 | 1.16M | range.from, |
370 | 1.16M | InsertMovePrio::Regular, |
371 | 1.16M | prev_alloc, |
372 | 1.16M | alloc, |
373 | 1.16M | self.vreg(vreg), |
374 | | ); |
375 | 2.82M | } |
376 | 7.71M | } |
377 | | |
378 | | // Scan over blocks whose ends are covered by this |
379 | | // range. For each, for each successor that is not |
380 | | // already in this range (hence guaranteed to have the |
381 | | // same allocation) and if the vreg is live, add a |
382 | | // Source half-move. |
383 | 11.7M | let mut block = self.cfginfo.insn_block[range.from.inst().index()]; |
384 | 24.4M | while block.is_valid() && block.index() < self.func.num_blocks() { |
385 | 24.4M | if range.to < self.cfginfo.block_exit[block.index()].next() { |
386 | 11.7M | break; |
387 | 12.7M | } |
388 | 12.7M | trace!("examining block with end in range: block{}", block.index()); |
389 | | |
390 | 12.7M | match inter_block_sources.entry(block) { |
391 | | // If the entry is already present in the map, we'll try to prefer a |
392 | | // register allocation. |
393 | 0 | Entry::Occupied(mut entry) => { |
394 | 0 | if !entry.get().is_reg() { |
395 | 0 | entry.insert(alloc); |
396 | 0 | } |
397 | | } |
398 | 12.7M | Entry::Vacant(entry) => { |
399 | 12.7M | entry.insert(alloc); |
400 | 12.7M | } |
401 | | } |
402 | | |
403 | | // Scan forward in `blockparam_outs`, adding all |
404 | | // half-moves for outgoing values to blockparams |
405 | | // in succs. |
406 | 12.7M | trace!( |
407 | 0 | "scanning blockparam_outs for v{} block{}: blockparam_out_idx = {}", |
408 | 0 | vreg.index(), |
409 | 0 | block.index(), |
410 | | blockparam_out_idx, |
411 | | ); |
412 | 12.9M | while blockparam_out_idx < self.blockparam_outs.len() { |
413 | | let BlockparamOut { |
414 | 12.0M | from_vreg, |
415 | 12.0M | from_block, |
416 | 12.0M | to_block, |
417 | 12.0M | to_vreg, |
418 | 12.0M | } = self.blockparam_outs[blockparam_out_idx]; |
419 | 12.0M | if (from_vreg, from_block) > (vreg, block) { |
420 | 11.8M | break; |
421 | 223k | } |
422 | 223k | if (from_vreg, from_block) == (vreg, block) { |
423 | 223k | trace!( |
424 | 0 | " -> found: from v{} block{} to v{} block{}", |
425 | 0 | from_vreg.index(), |
426 | 0 | from_block.index(), |
427 | 0 | to_vreg.index(), |
428 | 0 | to_vreg.index() |
429 | | ); |
430 | | |
431 | 223k | let key = BlockparamSourceKey::new(from_block, to_vreg); |
432 | 223k | match block_param_sources.entry(key) { |
433 | | // As with inter-block moves, if the entry is already present we'll |
434 | | // try to prefer a register allocation. |
435 | 0 | Entry::Occupied(mut entry) => { |
436 | 0 | if !entry.get().is_reg() { |
437 | 0 | entry.insert(alloc); |
438 | 0 | } |
439 | | } |
440 | 223k | Entry::Vacant(entry) => { |
441 | 223k | entry.insert(alloc); |
442 | 223k | } |
443 | | } |
444 | | |
445 | 223k | if self.annotations_enabled { |
446 | 15.9k | self.annotate( |
447 | 15.9k | self.cfginfo.block_exit[block.index()], |
448 | 15.9k | format!( |
449 | 15.9k | "blockparam-out: block{} to block{}: v{} to v{} in {}", |
450 | 15.9k | from_block.index(), |
451 | 15.9k | to_block.index(), |
452 | 15.9k | from_vreg.index(), |
453 | 15.9k | to_vreg.index(), |
454 | 15.9k | alloc |
455 | 15.9k | ), |
456 | 15.9k | ); |
457 | 207k | } |
458 | 0 | } |
459 | | |
460 | 223k | blockparam_out_idx += 1; |
461 | | } |
462 | | |
463 | 12.7M | block = block.next(); |
464 | | } |
465 | | |
466 | | // Scan over blocks whose beginnings are covered by |
467 | | // this range and for which the vreg is live at the |
468 | | // start of the block. For each, for each predecessor, |
469 | | // add a Dest half-move. |
470 | 11.7M | let mut block = self.cfginfo.insn_block[range.from.inst().index()]; |
471 | 11.7M | if self.cfginfo.block_entry[block.index()] < range.from { |
472 | 8.32M | block = block.next(); |
473 | 8.32M | } |
474 | 24.8M | while block.is_valid() && block.index() < self.func.num_blocks() { |
475 | 24.7M | if self.cfginfo.block_entry[block.index()] >= range.to { |
476 | 11.5M | break; |
477 | 13.1M | } |
478 | | |
479 | | // Add half-moves for blockparam inputs. |
480 | 13.1M | trace!( |
481 | 0 | "scanning blockparam_ins at vreg {} block {}: blockparam_in_idx = {}", |
482 | 0 | vreg.index(), |
483 | 0 | block.index(), |
484 | | prev.prev_ins_idx, |
485 | | ); |
486 | 13.1M | let mut idx = prev.blockparam_ins_idx(); |
487 | 23.5M | while idx < self.blockparam_ins.len() { |
488 | | let BlockparamIn { |
489 | 22.4M | from_block, |
490 | 22.4M | to_block, |
491 | 22.4M | to_vreg, |
492 | 22.4M | } = self.blockparam_ins[idx]; |
493 | 22.4M | if (to_vreg, to_block) > (vreg, block) { |
494 | 12.0M | break; |
495 | 10.3M | } |
496 | 10.3M | if (to_vreg, to_block) == (vreg, block) { |
497 | 246k | block_param_dests.push(BlockparamDest { |
498 | 246k | from_block, |
499 | 246k | to_block, |
500 | 246k | to_vreg, |
501 | 246k | alloc, |
502 | 246k | }); |
503 | 246k | trace!( |
504 | 0 | "match: blockparam_in: v{} in block{} from block{} into {}", |
505 | 0 | to_vreg.index(), |
506 | 0 | to_block.index(), |
507 | 0 | from_block.index(), |
508 | | alloc, |
509 | | ); |
510 | | #[cfg(debug_assertions)] |
511 | | if self.annotations_enabled { |
512 | | self.annotate( |
513 | | self.cfginfo.block_entry[block.index()], |
514 | | format!( |
515 | | "blockparam-in: block{} to block{}:into v{} in {}", |
516 | | from_block.index(), |
517 | | to_block.index(), |
518 | | to_vreg.index(), |
519 | | alloc |
520 | | ), |
521 | | ); |
522 | | } |
523 | 10.1M | } |
524 | 10.3M | idx += 1; |
525 | | } |
526 | | |
527 | 13.1M | prev.update_blockparam_ins_idx(idx); |
528 | | |
529 | 13.1M | if !self.is_live_in(block, vreg) { |
530 | 531k | block = block.next(); |
531 | 531k | continue; |
532 | 12.6M | } |
533 | | |
534 | 12.6M | trace!( |
535 | 0 | "scanning preds at vreg {} block {} for ends outside the range", |
536 | 0 | vreg.index(), |
537 | 0 | block.index() |
538 | | ); |
539 | | |
540 | | // Now find any preds whose ends are not in the |
541 | | // same range, and insert appropriate moves. |
542 | 14.1M | for &pred in self.func.block_preds(block) { |
543 | 14.1M | trace!( |
544 | 0 | "pred block {} has exit {:?}", |
545 | 0 | pred.index(), |
546 | 0 | self.cfginfo.block_exit[pred.index()] |
547 | | ); |
548 | 14.1M | if range.contains_point(self.cfginfo.block_exit[pred.index()]) { |
549 | 10.0M | continue; |
550 | 4.06M | } |
551 | | |
552 | 4.06M | inter_block_dests.push(InterBlockDest { |
553 | 4.06M | from: pred, |
554 | 4.06M | to: block, |
555 | 4.06M | alloc, |
556 | 4.06M | }) |
557 | | } |
558 | | |
559 | 12.6M | block = block.next(); |
560 | | } |
561 | | |
562 | | // Scan over def/uses and apply allocations. |
563 | 15.5M | for use_idx in 0..self.ranges[entry.index].uses.len() { |
564 | 15.5M | let usedata = self.ranges[entry.index].uses[use_idx]; |
565 | 15.5M | trace!("applying to use: {:?}", usedata); |
566 | 15.5M | debug_assert!(range.contains_point(usedata.pos)); |
567 | 15.5M | let inst = usedata.pos.inst(); |
568 | 15.5M | let slot = usedata.slot; |
569 | 15.5M | let operand = usedata.operand; |
570 | 15.5M | self.set_alloc(inst, slot as usize, alloc); |
571 | 15.5M | if let OperandConstraint::Reuse(_) = operand.constraint() { |
572 | 555k | reuse_input_insts.push(inst); |
573 | 15.0M | } |
574 | | } |
575 | | |
576 | | // Scan debug-labels on this vreg that overlap with |
577 | | // this range, producing a debug-info output record |
578 | | // giving the allocation location for each label. |
579 | 11.7M | if !debug_labels.is_empty() { |
580 | | // Do a binary search to find the start of any |
581 | | // labels for this vreg. Recall that we require |
582 | | // debug-label requests to be sorted by vreg as a |
583 | | // precondition (which we verified above). |
584 | 11.5M | let start = debug_labels |
585 | 113M | .binary_search_by(|&(label_vreg, _label_from, _label_to, _label)| { |
586 | | // Search for the point just before the first |
587 | | // tuple that could be for `vreg` overlapping |
588 | | // with `range`. Never return |
589 | | // `Ordering::Equal`; `binary_search_by` in |
590 | | // this case returns the index of the first |
591 | | // entry that is greater as an `Err`. |
592 | 113M | if label_vreg.vreg() < vreg.index() { |
593 | 77.9M | core::cmp::Ordering::Less |
594 | | } else { |
595 | 35.8M | core::cmp::Ordering::Greater |
596 | | } |
597 | 113M | }) |
598 | 11.5M | .unwrap_err(); |
599 | | |
600 | 17.3M | for &(label_vreg, label_from, label_to, label) in &debug_labels[start..] { |
601 | 17.3M | let label_from = ProgPoint::before(label_from); |
602 | 17.3M | let label_to = ProgPoint::before(label_to); |
603 | 17.3M | let label_range = CodeRange { |
604 | 17.3M | from: label_from, |
605 | 17.3M | to: label_to, |
606 | 17.3M | }; |
607 | 17.3M | if label_vreg.vreg() != vreg.index() { |
608 | 7.16M | break; |
609 | 10.1M | } |
610 | 10.1M | if !range.overlaps(&label_range) { |
611 | 9.26M | continue; |
612 | 911k | } |
613 | | |
614 | 911k | let from = core::cmp::max(label_from, range.from); |
615 | 911k | let to = core::cmp::min(label_to, range.to); |
616 | | |
617 | 911k | self.ctx |
618 | 911k | .output |
619 | 911k | .debug_locations |
620 | 911k | .push((label, from, to, alloc)); |
621 | | } |
622 | 175k | } |
623 | | } |
624 | | |
625 | 7.70M | if !inter_block_dests.is_empty() { |
626 | 455k | self.output.stats.halfmoves_count += inter_block_dests.len() * 2; |
627 | | |
628 | 455k | inter_block_dests.sort_unstable_by_key(InterBlockDest::key); |
629 | | |
630 | 455k | let vreg = self.vreg(vreg); |
631 | 455k | trace!("processing inter-block moves for {}", vreg); |
632 | 4.06M | for dest in inter_block_dests.drain(..) { |
633 | 4.06M | let src = inter_block_sources[&dest.from]; |
634 | | |
635 | 4.06M | trace!( |
636 | 0 | " -> moving from {} to {} between {:?} and {:?}", |
637 | | src, |
638 | | dest.alloc, |
639 | | dest.from, |
640 | | dest.to |
641 | | ); |
642 | | |
643 | 4.06M | let (pos, prio) = choose_move_location(self, dest.from, dest.to); |
644 | 4.06M | inserted_moves.push(pos, prio, src, dest.alloc, vreg); |
645 | | } |
646 | 7.24M | } |
647 | | |
648 | 7.70M | blockparam_in_idx = prev.blockparam_ins_idx(); |
649 | | } |
650 | | |
651 | 10.8k | if !block_param_dests.is_empty() { |
652 | 6.74k | self.output.stats.halfmoves_count += block_param_sources.len(); |
653 | 6.74k | self.output.stats.halfmoves_count += block_param_dests.len(); |
654 | | |
655 | 6.74k | trace!("processing block-param moves"); |
656 | 252k | for dest in block_param_dests { |
657 | 246k | let src = dest.source(); |
658 | 246k | let src_alloc = block_param_sources.get(&src).unwrap(); |
659 | 246k | let (pos, prio) = choose_move_location(self, dest.from_block, dest.to_block); |
660 | 246k | inserted_moves.push(pos, prio, *src_alloc, dest.alloc, self.vreg(dest.to_vreg)); |
661 | 246k | } |
662 | 4.05k | } |
663 | | |
664 | | // Handle multi-fixed-reg constraints by copying. |
665 | 84.7k | for fixup in core::mem::replace(&mut self.multi_fixed_reg_fixups, vec![]) { |
666 | 84.7k | let from_alloc = self.get_alloc(fixup.pos.inst(), fixup.from_slot as usize); |
667 | 84.7k | let to_alloc = Allocation::reg(PReg::from_index(fixup.to_preg.index())); |
668 | 84.7k | trace!( |
669 | 0 | "multi-fixed-move constraint at {:?} from {} to {} for v{}", |
670 | | fixup.pos, |
671 | | from_alloc, |
672 | | to_alloc, |
673 | 0 | fixup.vreg.index(), |
674 | | ); |
675 | 84.7k | let prio = match fixup.level { |
676 | 22.1k | FixedRegFixupLevel::Initial => InsertMovePrio::MultiFixedRegInitial, |
677 | 62.5k | FixedRegFixupLevel::Secondary => InsertMovePrio::MultiFixedRegSecondary, |
678 | | }; |
679 | 84.7k | inserted_moves.push(fixup.pos, prio, from_alloc, to_alloc, self.vreg(fixup.vreg)); |
680 | 84.7k | self.set_alloc( |
681 | 84.7k | fixup.pos.inst(), |
682 | 84.7k | fixup.to_slot as usize, |
683 | 84.7k | Allocation::reg(PReg::from_index(fixup.to_preg.index())), |
684 | | ); |
685 | | } |
686 | | |
687 | | // Handle outputs that reuse inputs: copy beforehand, then set |
688 | | // input's alloc to output's. |
689 | | // |
690 | | // Note that the output's allocation may not *actually* be |
691 | | // valid until InstPosition::After, but the reused input may |
692 | | // occur at InstPosition::Before. This may appear incorrect, |
693 | | // but we make it work by ensuring that all *other* inputs are |
694 | | // extended to InstPosition::After so that the def will not |
695 | | // interfere. (The liveness computation code does this -- we |
696 | | // do not require the user to do so.) |
697 | | // |
698 | | // One might ask: why not insist that input-reusing defs occur |
699 | | // at InstPosition::Before? this would be correct, but would |
700 | | // mean that the reused input and the reusing output |
701 | | // interfere, *guaranteeing* that every such case would |
702 | | // require a move. This is really bad on ISAs (like x86) where |
703 | | // reused inputs are ubiquitous. |
704 | | // |
705 | | // Another approach might be to put the def at Before, and |
706 | | // trim the reused input's liverange back to the previous |
707 | | // instruction's After. This is kind of OK until (i) a block |
708 | | // boundary occurs between the prior inst and this one, or |
709 | | // (ii) any moves/spills/reloads occur between the two |
710 | | // instructions. We really do need the input to be live at |
711 | | // this inst's Before. |
712 | | // |
713 | | // In principle what we really need is a "BeforeBefore" |
714 | | // program point, but we don't want to introduce that |
715 | | // everywhere and pay the cost of twice as many ProgPoints |
716 | | // throughout the allocator. |
717 | | // |
718 | | // Or we could introduce a separate move instruction -- this |
719 | | // is the approach that regalloc.rs takes with "mod" operands |
720 | | // -- but that is also costly. |
721 | | // |
722 | | // So we take this approach (invented by IonMonkey -- somewhat |
723 | | // hard to discern, though see [0] for a comment that makes |
724 | | // this slightly less unclear) to avoid interference between |
725 | | // the actual reused input and reusing output, ensure |
726 | | // interference (hence no incorrectness) between other inputs |
727 | | // and the reusing output, and not require a separate explicit |
728 | | // move instruction. |
729 | | // |
730 | | // [0] https://searchfox.org/mozilla-central/rev/3a798ef9252896fb389679f06dd3203169565af0/js/src/jit/shared/Lowering-shared-inl.h#108-110 |
731 | 566k | for inst in reuse_input_insts { |
732 | 555k | let mut input_reused: SmallVec<[usize; 4]> = smallvec![]; |
733 | 3.12M | for output_idx in 0..self.func.inst_operands(inst).len() { |
734 | 3.12M | let operand = self.func.inst_operands(inst)[output_idx]; |
735 | 3.12M | if let OperandConstraint::Reuse(input_idx) = operand.constraint() { |
736 | 555k | debug_assert!(!input_reused.contains(&input_idx)); |
737 | 555k | debug_assert_eq!(operand.pos(), OperandPos::Late); |
738 | 555k | input_reused.push(input_idx); |
739 | 555k | let input_alloc = self.get_alloc(inst, input_idx); |
740 | 555k | let output_alloc = self.get_alloc(inst, output_idx); |
741 | 555k | trace!( |
742 | 0 | "reuse-input inst {:?}: output {} has alloc {:?}, input {} has alloc {:?}", |
743 | | inst, |
744 | | output_idx, |
745 | | output_alloc, |
746 | | input_idx, |
747 | | input_alloc |
748 | | ); |
749 | 555k | if input_alloc != output_alloc { |
750 | 365k | #[cfg(debug_assertions)] |
751 | 365k | if self.annotations_enabled { |
752 | 365k | self.annotate( |
753 | 365k | ProgPoint::before(inst), |
754 | 365k | format!(" reuse-input-copy: {} -> {}", input_alloc, output_alloc), |
755 | 365k | ); |
756 | 365k | } |
757 | 365k | let input_operand = self.func.inst_operands(inst)[input_idx]; |
758 | 365k | inserted_moves.push( |
759 | 365k | ProgPoint::before(inst), |
760 | 365k | InsertMovePrio::ReusedInput, |
761 | 365k | input_alloc, |
762 | 365k | output_alloc, |
763 | 365k | input_operand.vreg(), |
764 | 365k | ); |
765 | 365k | self.set_alloc(inst, input_idx, output_alloc); |
766 | 365k | } |
767 | 2.56M | } |
768 | | } |
769 | | } |
770 | | |
771 | | // Sort the debug-locations vector; we provide this |
772 | | // invariant to the client. |
773 | 10.8k | self.output.debug_locations.sort_unstable(); |
774 | | |
775 | 10.8k | inserted_moves |
776 | 10.8k | } |
777 | | |
778 | 10.8k | pub fn resolve_inserted_moves(&mut self, mut inserted_moves: InsertedMoves) -> Edits { |
779 | | // For each program point, gather all moves together. Then |
780 | | // resolve (see cases below). |
781 | 10.8k | let mut i = 0; |
782 | 10.8k | inserted_moves |
783 | 10.8k | .moves |
784 | 32.2M | .sort_unstable_by_key(|m| m.pos_prio.key()); |
785 | | |
786 | | // Redundant-move elimination state tracker. |
787 | 10.8k | let mut redundant_moves = RedundantMoveEliminator::default(); |
788 | | |
789 | 998k | fn redundant_move_process_side_effects<'a, F: Function>( |
790 | 998k | this: &Env<'a, F>, |
791 | 998k | redundant_moves: &mut RedundantMoveEliminator, |
792 | 998k | from: ProgPoint, |
793 | 998k | to: ProgPoint, |
794 | 998k | ) { |
795 | | // If we cross a block boundary, clear and return. |
796 | 998k | if this.cfginfo.insn_block[from.inst().index()] |
797 | 998k | != this.cfginfo.insn_block[to.inst().index()] |
798 | | { |
799 | 223k | redundant_moves.clear(); |
800 | 223k | return; |
801 | 775k | } |
802 | | |
803 | 775k | let start_inst = if from.pos() == InstPosition::Before { |
804 | 775k | from.inst() |
805 | | } else { |
806 | 0 | from.inst().next() |
807 | | }; |
808 | 775k | let end_inst = if to.pos() == InstPosition::Before { |
809 | 775k | to.inst() |
810 | | } else { |
811 | 0 | to.inst().next() |
812 | | }; |
813 | 1.07M | for inst in start_inst.index()..end_inst.index() { |
814 | 1.07M | let inst = Inst::new(inst); |
815 | 7.02M | for (i, op) in this.func.inst_operands(inst).iter().enumerate() { |
816 | 7.02M | match op.kind() { |
817 | 2.14M | OperandKind::Def => { |
818 | 2.14M | let alloc = this.get_alloc(inst, i); |
819 | 2.14M | redundant_moves.clear_alloc(alloc); |
820 | 2.14M | } |
821 | 4.88M | _ => {} |
822 | | } |
823 | | } |
824 | 1.07M | for reg in this.func.inst_clobbers(inst) { |
825 | 651k | redundant_moves.clear_alloc(Allocation::reg(reg)); |
826 | 651k | } |
827 | | // The dedicated scratch registers may be clobbered by any |
828 | | // instruction. |
829 | 4.30M | for reg in this.env.scratch_by_class { |
830 | 3.22M | if let Some(reg) = reg { |
831 | 0 | redundant_moves.clear_alloc(Allocation::reg(reg)); |
832 | 3.22M | } |
833 | | } |
834 | | } |
835 | 998k | } |
836 | | |
837 | 10.8k | let mut last_pos = ProgPoint::before(Inst::new(0)); |
838 | 10.8k | let mut edits = Edits::with_capacity(self.func.num_insts()); |
839 | | |
840 | 1.00M | while i < inserted_moves.moves.len() { |
841 | 998k | let start = i; |
842 | 998k | let pos_prio = inserted_moves.moves[i].pos_prio; |
843 | 2.80M | while i < inserted_moves.moves.len() && inserted_moves.moves[i].pos_prio == pos_prio { |
844 | 1.80M | i += 1; |
845 | 1.80M | } |
846 | 998k | let moves = &inserted_moves.moves[start..i]; |
847 | | |
848 | 998k | redundant_move_process_side_effects(self, &mut redundant_moves, last_pos, pos_prio.pos); |
849 | 998k | last_pos = pos_prio.pos; |
850 | | |
851 | | // Gather all the moves in each RegClass separately. |
852 | | // These cannot interact, so it is safe to have separate |
853 | | // ParallelMove instances. They need to be separate because |
854 | | // moves between the classes are impossible. (We could |
855 | | // enhance ParallelMoves to understand register classes, but |
856 | | // this seems simpler.) |
857 | 998k | let mut int_moves: SmallVec<[InsertedMove; 8]> = smallvec![]; |
858 | 998k | let mut float_moves: SmallVec<[InsertedMove; 8]> = smallvec![]; |
859 | 998k | let mut vec_moves: SmallVec<[InsertedMove; 8]> = smallvec![]; |
860 | | |
861 | 2.80M | for m in moves { |
862 | 1.80M | match m.to_vreg.class() { |
863 | 950k | RegClass::Int => { |
864 | 950k | int_moves.push(m.clone()); |
865 | 950k | } |
866 | 393k | RegClass::Float => { |
867 | 393k | float_moves.push(m.clone()); |
868 | 393k | } |
869 | 459k | RegClass::Vector => { |
870 | 459k | vec_moves.push(m.clone()); |
871 | 459k | } |
872 | | } |
873 | | } |
874 | | |
875 | 2.99M | for &(regclass, moves) in &[ |
876 | 998k | (RegClass::Int, &int_moves), |
877 | 998k | (RegClass::Float, &float_moves), |
878 | 998k | (RegClass::Vector, &vec_moves), |
879 | 998k | ] { |
880 | | // All moves in `moves` semantically happen in |
881 | | // parallel. Let's resolve these to a sequence of moves |
882 | | // that can be done one at a time. |
883 | 2.99M | let mut parallel_moves = ParallelMoves::new(); |
884 | 2.99M | trace!( |
885 | 0 | "parallel moves at pos {:?} prio {:?}", |
886 | | pos_prio.pos, |
887 | | pos_prio.prio |
888 | | ); |
889 | 4.79M | for m in moves { |
890 | 1.80M | trace!(" {} -> {}", m.from_alloc, m.to_alloc); |
891 | 1.80M | parallel_moves.add(m.from_alloc, m.to_alloc, Some(m.to_vreg)); |
892 | | } |
893 | | |
894 | 2.99M | let resolved = parallel_moves.resolve(); |
895 | 2.99M | let mut scratch_iter = RegTraversalIter::new( |
896 | 2.99M | self.env, regclass, None, None, 0, |
897 | 2.99M | None, // We assume there is no limit on the set of registers available for moves. |
898 | | ); |
899 | 2.99M | let mut dedicated_scratch = self.env.scratch_by_class[regclass as usize]; |
900 | 2.99M | let key = LiveRangeKey::from_range(&CodeRange { |
901 | 2.99M | from: pos_prio.pos, |
902 | 2.99M | to: pos_prio.pos.next(), |
903 | 2.99M | }); |
904 | 2.99M | let find_free_reg = || { |
905 | | // Use the dedicated scratch register first if it is |
906 | | // available. |
907 | 43.8k | if let Some(reg) = dedicated_scratch.take() { |
908 | 0 | return Some(Allocation::reg(reg)); |
909 | 43.8k | } |
910 | 95.2k | while let Some(preg) = scratch_iter.next() { |
911 | 95.0k | if !self.pregs[preg.index()] |
912 | 95.0k | .allocations |
913 | 95.0k | .btree |
914 | 95.0k | .contains_key(&key) |
915 | | { |
916 | 50.3k | let alloc = Allocation::reg(preg); |
917 | 50.3k | if moves |
918 | 50.3k | .iter() |
919 | 216k | .any(|m| m.from_alloc == alloc || m.to_alloc == alloc) |
920 | | { |
921 | | // Skip pregs used by moves in this |
922 | | // parallel move set, even if not |
923 | | // marked used at progpoint: edge move |
924 | | // liveranges meet but don't overlap |
925 | | // so otherwise we may incorrectly |
926 | | // overwrite a source reg. |
927 | 6.78k | continue; |
928 | 43.6k | } |
929 | 43.6k | return Some(alloc); |
930 | 44.6k | } |
931 | | } |
932 | 251 | None |
933 | 43.8k | }; |
934 | 2.99M | let mut stackslot_idx = 0; |
935 | 2.99M | let get_stackslot = || { |
936 | 251 | let idx = stackslot_idx; |
937 | 251 | stackslot_idx += 1; |
938 | | // We can't borrow `self` as mutable, so we create |
939 | | // these placeholders then allocate the actual |
940 | | // slots if needed with `self.allocate_spillslot` |
941 | | // below. |
942 | 251 | Allocation::stack(SpillSlot::new(SpillSlot::MAX - idx)) |
943 | 251 | }; |
944 | 2.99M | let is_stack_alloc = |alloc: Allocation| { |
945 | 2.47M | if let Some(preg) = alloc.as_reg() { |
946 | 2.02M | self.pregs[preg.index()].is_stack |
947 | | } else { |
948 | 449k | alloc.is_stack() |
949 | | } |
950 | 2.47M | }; |
951 | 2.99M | let preferred_victim = self.preferred_victim_by_class[regclass as usize]; |
952 | | |
953 | 2.99M | let scratch_resolver = MoveAndScratchResolver { |
954 | 2.99M | find_free_reg, |
955 | 2.99M | get_stackslot, |
956 | 2.99M | is_stack_alloc, |
957 | 2.99M | borrowed_scratch_reg: preferred_victim, |
958 | 2.99M | }; |
959 | | |
960 | 2.99M | let resolved = scratch_resolver.compute(resolved); |
961 | | |
962 | 2.99M | let mut rewrites = FxHashMap::default(); |
963 | 2.99M | for i in 0..stackslot_idx { |
964 | 251 | if i >= self.extra_spillslots_by_class[regclass as usize].len() { |
965 | 77 | let slot = |
966 | 77 | self.allocate_spillslot(self.func.spillslot_size(regclass) as u32); |
967 | 77 | self.extra_spillslots_by_class[regclass as usize].push(slot); |
968 | 174 | } |
969 | 251 | rewrites.insert( |
970 | 251 | Allocation::stack(SpillSlot::new(SpillSlot::MAX - i)), |
971 | 251 | self.extra_spillslots_by_class[regclass as usize][i], |
972 | | ); |
973 | | } |
974 | | |
975 | 4.84M | for (src, dst, to_vreg) in resolved { |
976 | 1.85M | let src = rewrites.get(&src).cloned().unwrap_or(src); |
977 | 1.85M | let dst = rewrites.get(&dst).cloned().unwrap_or(dst); |
978 | 1.85M | trace!(" resolved: {} -> {} ({:?})", src, dst, to_vreg); |
979 | 1.85M | let action = redundant_moves.process_move(src, dst, to_vreg); |
980 | 1.85M | if !action.elide { |
981 | 1.68M | edits.add(pos_prio, src, dst); |
982 | 1.68M | } else { |
983 | 163k | trace!(" -> redundant move elided"); |
984 | | } |
985 | | } |
986 | | } |
987 | | } |
988 | | |
989 | | // Ensure edits are in sorted ProgPoint order. N.B.: this must |
990 | | // be a stable sort! We have to keep the order produced by the |
991 | | // parallel-move resolver for all moves within a single sort |
992 | | // key. |
993 | 10.8k | edits.sort(); |
994 | 10.8k | self.output.stats.edits_count = edits.len(); |
995 | | |
996 | | // Add debug annotations. |
997 | 10.8k | if self.annotations_enabled { |
998 | 313k | for &(pos_prio, ref edit) in edits.iter() { |
999 | 313k | match edit { |
1000 | 313k | &Edit::Move { from, to } => { |
1001 | 313k | self.annotate(pos_prio.pos, format!("move {} -> {}", from, to)); |
1002 | 313k | } |
1003 | | } |
1004 | | } |
1005 | 9.68k | } |
1006 | | |
1007 | 10.8k | edits |
1008 | 10.8k | } |
1009 | | } |