Coverage Report

Created: 2025-12-09 07:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/regalloc2/src/ion/moves.rs
Line
Count
Source
1
/*
2
 * This file was initially derived from the files
3
 * `js/src/jit/BacktrackingAllocator.h` and
4
 * `js/src/jit/BacktrackingAllocator.cpp` in Mozilla Firefox, and was
5
 * originally licensed under the Mozilla Public License 2.0. We
6
 * subsequently relicensed it to Apache-2.0 WITH LLVM-exception (see
7
 * https://github.com/bytecodealliance/regalloc2/issues/7).
8
 *
9
 * Since the initial port, the design has been substantially evolved
10
 * and optimized.
11
 */
12
13
//! Move resolution.
14
15
use alloc::vec;
16
17
use super::{
18
    Env, InsertMovePrio, InsertedMove, InsertedMoves, LiveRangeFlag, LiveRangeIndex,
19
    RedundantMoveEliminator, VRegIndex,
20
};
21
use crate::ion::data_structures::{
22
    u64_key, BlockparamIn, BlockparamOut, CodeRange, Edits, FixedRegFixupLevel, LiveRangeKey,
23
    LiveRangeListEntry,
24
};
25
use crate::ion::reg_traversal::RegTraversalIter;
26
use crate::moves::{MoveAndScratchResolver, ParallelMoves};
27
use crate::{
28
    Allocation, Block, Edit, Function, FxHashMap, Inst, InstPosition, OperandConstraint,
29
    OperandKind, OperandPos, PReg, ProgPoint, RegClass, SpillSlot,
30
};
31
use alloc::format;
32
use alloc::vec::Vec;
33
use hashbrown::hash_map::Entry;
34
use smallvec::{smallvec, SmallVec};
35
36
impl<'a, F: Function> Env<'a, F> {
37
863k
    pub fn is_start_of_block(&self, pos: ProgPoint) -> bool {
38
863k
        let block = self.ctx.cfginfo.insn_block[pos.inst().index()];
39
863k
        pos == self.ctx.cfginfo.block_entry[block.index()]
40
863k
    }
41
    pub fn is_end_of_block(&self, pos: ProgPoint) -> bool {
42
        let block = self.ctx.cfginfo.insn_block[pos.inst().index()];
43
        pos == self.ctx.cfginfo.block_exit[block.index()]
44
    }
45
46
6.02M
    pub fn get_alloc(&self, inst: Inst, slot: usize) -> Allocation {
47
6.02M
        let inst_allocs =
48
6.02M
            &self.ctx.output.allocs[self.ctx.output.inst_alloc_offsets[inst.index()] as usize..];
49
6.02M
        inst_allocs[slot]
50
6.02M
    }
51
52
16.0M
    pub fn set_alloc(&mut self, inst: Inst, slot: usize, alloc: Allocation) {
53
16.0M
        let inst_allocs = &mut self.ctx.output.allocs
54
16.0M
            [self.ctx.output.inst_alloc_offsets[inst.index()] as usize..];
55
16.0M
        inst_allocs[slot] = alloc;
56
16.0M
    }
57
58
15.6M
    pub fn get_alloc_for_range(&self, range: LiveRangeIndex) -> Allocation {
59
15.6M
        trace!("get_alloc_for_range: {:?}", range);
60
15.6M
        let bundle = self.ctx.ranges[range].bundle;
61
15.6M
        trace!(" -> bundle: {:?}", bundle);
62
15.6M
        let bundledata = &self.ctx.bundles[bundle];
63
15.6M
        trace!(" -> allocation {:?}", bundledata.allocation);
64
15.6M
        if bundledata.allocation != Allocation::none() {
65
11.7M
            bundledata.allocation
66
        } else {
67
3.91M
            trace!(" -> spillset {:?}", bundledata.spillset);
68
3.91M
            trace!(
69
0
                " -> spill slot {:?}",
70
0
                self.ctx.spillsets[bundledata.spillset].slot
71
            );
72
3.91M
            self.ctx.spillslots[self.ctx.spillsets[bundledata.spillset].slot.index()].alloc
73
        }
74
15.6M
    }
75
76
10.8k
    pub fn apply_allocations_and_insert_moves(&mut self) -> InsertedMoves {
77
10.8k
        trace!("apply_allocations_and_insert_moves");
78
10.8k
        trace!("blockparam_ins: {:?}", self.blockparam_ins);
79
10.8k
        trace!("blockparam_outs: {:?}", self.blockparam_outs);
80
81
10.8k
        let mut inserted_moves = InsertedMoves::default();
82
83
        // Now that all splits are done, we can pay the cost once to
84
        // sort VReg range lists and update with the final ranges.
85
7.71M
        for vreg in &mut self.ctx.vregs {
86
19.4M
            for entry in &mut vreg.ranges {
87
11.7M
                entry.range = self.ctx.ranges[entry.index].range;
88
11.7M
            }
89
7.70M
            vreg.ranges.sort_unstable_by_key(|entry| entry.range.from);
90
        }
91
92
        /// Buffered information about the previous liverange that was processed.
93
        struct PrevBuffer {
94
            prev: Option<LiveRangeListEntry>,
95
            prev_ins_idx: usize,
96
            buffered: Option<LiveRangeListEntry>,
97
            buffered_ins_idx: usize,
98
        }
99
100
        impl PrevBuffer {
101
7.70M
            fn new(prev_ins_idx: usize) -> Self {
102
7.70M
                Self {
103
7.70M
                    prev: None,
104
7.70M
                    prev_ins_idx,
105
7.70M
                    buffered: None,
106
7.70M
                    buffered_ins_idx: prev_ins_idx,
107
7.70M
                }
108
7.70M
            }
109
110
            /// Returns the previous `LiveRangeListEntry` when it's present.
111
            #[inline(always)]
112
11.7M
            fn is_valid(&self) -> Option<LiveRangeListEntry> {
113
11.7M
                self.prev
114
11.7M
            }
115
116
            /// Fetch the current index into the `Env::blockparam_ins` vector.
117
            #[inline(always)]
118
20.8M
            fn blockparam_ins_idx(&self) -> usize {
119
20.8M
                self.prev_ins_idx
120
20.8M
            }
121
122
            /// Record this index as the next index to use when the previous
123
            /// liverange buffer advances.
124
            #[inline(always)]
125
13.1M
            fn update_blockparam_ins_idx(&mut self, idx: usize) {
126
13.1M
                self.buffered_ins_idx = idx;
127
13.1M
            }
128
129
            /// As overlapping liveranges might start at the same program point, we buffer the
130
            /// previous liverange used when determining where to take the last value from for
131
            /// intra-block moves. The liveranges we process are buffered until we encounter one
132
            /// that starts at a later program point, indicating that it's now safe to advance the
133
            /// previous LR buffer. We accumulate the longest-lived liverange in the buffer as a
134
            /// heuristic for finding the most stable source of a value.
135
            ///
136
            /// We also buffer the index into the `Env::blockparam_ins` vector, as we may see
137
            /// multiple uses of a blockparam within a single instruction, and as such may need to
138
            /// generate multiple blockparam move destinations by re-traversing that section of the
139
            /// vector.
140
            #[inline(always)]
141
11.7M
            fn advance(&mut self, current: LiveRangeListEntry) {
142
                // Advance the `prev` pointer to the `next` pointer, as long as the `next` pointer
143
                // does not start at the same time as the current LR we're processing.
144
11.7M
                if self
145
11.7M
                    .buffered
146
11.7M
                    .map(|entry| entry.range.from < current.range.from)
147
11.7M
                    .unwrap_or(false)
148
3.86M
                {
149
3.86M
                    self.prev = self.buffered;
150
3.86M
                    self.prev_ins_idx = self.buffered_ins_idx;
151
7.83M
                }
152
153
                // Advance the `next` pointer to the currently processed LR, as long as it ends
154
                // later than the current `next`.
155
11.7M
                if self
156
11.7M
                    .buffered
157
11.7M
                    .map(|entry| entry.range.to < current.range.to)
158
11.7M
                    .unwrap_or(true)
159
11.2M
                {
160
11.2M
                    self.buffered = Some(current);
161
11.2M
                }
162
11.7M
            }
163
        }
164
165
        // Determine the ProgPoint where moves on this (from, to)
166
        // edge should go:
167
        // - If there is more than one in-edge to `to`, then
168
        //   `from` must have only one out-edge; moves go at tail of
169
        //   `from` just before last Branch/Ret.
170
        // - Otherwise, there must be at most one in-edge to `to`,
171
        //   and moves go at start of `to`.
172
        #[inline(always)]
173
4.30M
        fn choose_move_location<'a, F: Function>(
174
4.30M
            env: &Env<'a, F>,
175
4.30M
            from: Block,
176
4.30M
            to: Block,
177
4.30M
        ) -> (ProgPoint, InsertMovePrio) {
178
4.30M
            let from_last_insn = env.func.block_insns(from).last();
179
4.30M
            let to_first_insn = env.func.block_insns(to).first();
180
4.30M
            let from_is_ret = env.func.is_ret(from_last_insn);
181
4.30M
            let to_is_entry = env.func.entry_block() == to;
182
4.30M
            let from_outs = env.func.block_succs(from).len() + if from_is_ret { 1 } else { 0 };
183
4.30M
            let to_ins = env.func.block_preds(to).len() + if to_is_entry { 1 } else { 0 };
184
185
4.30M
            if to_ins > 1 && from_outs <= 1 {
186
1.31M
                (
187
1.31M
                    // N.B.: though semantically the edge moves happen
188
1.31M
                    // after the branch, we must insert them before
189
1.31M
                    // the branch because otherwise, of course, they
190
1.31M
                    // would never execute. This is correct even in
191
1.31M
                    // the presence of branches that read register
192
1.31M
                    // inputs (e.g. conditional branches on some RISCs
193
1.31M
                    // that branch on reg zero/not-zero, or any
194
1.31M
                    // indirect branch), but for a very subtle reason:
195
1.31M
                    // all cases of such branches will (or should)
196
1.31M
                    // have multiple successors, and thus due to
197
1.31M
                    // critical-edge splitting, their successors will
198
1.31M
                    // have only the single predecessor, and we prefer
199
1.31M
                    // to insert at the head of the successor in that
200
1.31M
                    // case (rather than here). We make this a
201
1.31M
                    // requirement, in fact: the user of this library
202
1.31M
                    // shall not read registers in a branch
203
1.31M
                    // instruction of there is only one successor per
204
1.31M
                    // the given CFG information.
205
1.31M
                    ProgPoint::before(from_last_insn),
206
1.31M
                    InsertMovePrio::OutEdgeMoves,
207
1.31M
                )
208
2.99M
            } else if to_ins <= 1 {
209
2.99M
                (
210
2.99M
                    ProgPoint::before(to_first_insn),
211
2.99M
                    InsertMovePrio::InEdgeMoves,
212
2.99M
                )
213
            } else {
214
0
                panic!(
215
0
                    "Critical edge: can't insert moves between blocks {:?} and {:?}",
216
                    from, to
217
                );
218
            }
219
4.30M
        }
220
221
        #[derive(PartialEq)]
222
        struct InterBlockDest {
223
            to: Block,
224
            from: Block,
225
            alloc: Allocation,
226
        }
227
228
        impl InterBlockDest {
229
24.0M
            fn key(&self) -> u64 {
230
24.0M
                u64_key(self.from.raw_u32(), self.to.raw_u32())
231
24.0M
            }
232
        }
233
234
10.8k
        let mut inter_block_sources: FxHashMap<Block, Allocation> = FxHashMap::default();
235
10.8k
        let mut inter_block_dests = Vec::with_capacity(self.func.num_blocks());
236
237
        #[derive(Hash, Eq, PartialEq)]
238
        struct BlockparamSourceKey {
239
            bits: u64,
240
        }
241
242
        impl BlockparamSourceKey {
243
469k
            fn new(from_block: Block, to_vreg: VRegIndex) -> Self {
244
469k
                BlockparamSourceKey {
245
469k
                    bits: u64_key(from_block.raw_u32(), to_vreg.raw_u32()),
246
469k
                }
247
469k
            }
248
        }
249
250
        struct BlockparamDest {
251
            from_block: Block,
252
            to_block: Block,
253
            to_vreg: VRegIndex,
254
            alloc: Allocation,
255
        }
256
257
        impl BlockparamDest {
258
            fn key(&self) -> u64 {
259
                u64_key(self.to_block.raw_u32(), self.from_block.raw_u32())
260
            }
261
262
246k
            fn source(&self) -> BlockparamSourceKey {
263
246k
                BlockparamSourceKey::new(self.from_block, self.to_vreg)
264
246k
            }
265
        }
266
267
10.8k
        let mut block_param_sources =
268
10.8k
            FxHashMap::<BlockparamSourceKey, Allocation>::with_capacity_and_hasher(
269
10.8k
                3 * self.func.num_insts(),
270
10.8k
                Default::default(),
271
            );
272
10.8k
        let mut block_param_dests = Vec::with_capacity(3 * self.func.num_insts());
273
274
10.8k
        let debug_labels = self.func.debug_value_labels();
275
276
10.8k
        let mut reuse_input_insts = Vec::with_capacity(self.func.num_insts() / 2);
277
278
10.8k
        let mut blockparam_in_idx = 0;
279
10.8k
        let mut blockparam_out_idx = 0;
280
7.70M
        for vreg in 0..self.vregs.len() {
281
7.70M
            let vreg = VRegIndex::new(vreg);
282
7.70M
            if !self.is_vreg_used(vreg) {
283
0
                continue;
284
7.70M
            }
285
286
7.70M
            inter_block_sources.clear();
287
288
            // For each range in each vreg, insert moves or
289
            // half-moves.  We also scan over `blockparam_ins` and
290
            // `blockparam_outs`, which are sorted by (block, vreg),
291
            // to fill in allocations.
292
7.70M
            let mut prev = PrevBuffer::new(blockparam_in_idx);
293
11.7M
            for range_idx in 0..self.vregs[vreg].ranges.len() {
294
11.7M
                let entry = self.vregs[vreg].ranges[range_idx];
295
11.7M
                let alloc = self.get_alloc_for_range(entry.index);
296
11.7M
                let range = entry.range;
297
11.7M
                trace!(
298
0
                    "apply_allocations: vreg {:?} LR {:?} with range {:?} has alloc {:?}",
299
                    vreg,
300
                    entry.index,
301
                    range,
302
                    alloc,
303
                );
304
11.7M
                debug_assert!(alloc != Allocation::none());
305
306
11.7M
                if self.annotations_enabled {
307
1.87M
                    self.annotate(
308
1.87M
                        range.from,
309
1.87M
                        format!(
310
1.87M
                            " <<< start v{} in {} (range{}) (bundle{})",
311
1.87M
                            vreg.index(),
312
1.87M
                            alloc,
313
1.87M
                            entry.index.index(),
314
1.87M
                            self.ranges[entry.index].bundle.raw_u32(),
315
1.87M
                        ),
316
1.87M
                    );
317
1.87M
                    self.annotate(
318
1.87M
                        range.to,
319
1.87M
                        format!(
320
1.87M
                            "     end   v{} in {} (range{}) (bundle{}) >>>",
321
1.87M
                            vreg.index(),
322
1.87M
                            alloc,
323
1.87M
                            entry.index.index(),
324
1.87M
                            self.ranges[entry.index].bundle.raw_u32(),
325
1.87M
                        ),
326
1.87M
                    );
327
9.82M
                }
328
329
11.7M
                prev.advance(entry);
330
331
                // Does this range follow immediately after a prior
332
                // range in the same block? If so, insert a move (if
333
                // the allocs differ). We do this directly rather than
334
                // with half-moves because we eagerly know both sides
335
                // already (and also, half-moves are specific to
336
                // inter-block transfers).
337
                //
338
                // Note that we do *not* do this if there is also a
339
                // def as the first use in the new range: it's
340
                // possible that an old liverange covers the Before
341
                // pos of an inst, a new liverange covers the After
342
                // pos, and the def also happens at After. In this
343
                // case we don't want to an insert a move after the
344
                // instruction copying the old liverange.
345
                //
346
                // Note also that we assert that the new range has to
347
                // start at the Before-point of an instruction; we
348
                // can't insert a move that logically happens just
349
                // before After (i.e. in the middle of a single
350
                // instruction).
351
11.7M
                if let Some(prev) = prev.is_valid() {
352
3.98M
                    let prev_alloc = self.get_alloc_for_range(prev.index);
353
3.98M
                    debug_assert!(prev_alloc != Allocation::none());
354
355
3.98M
                    if prev.range.to >= range.from
356
1.22M
                        && (prev.range.to > range.from || !self.is_start_of_block(range.from))
357
1.16M
                        && !self.ranges[entry.index].has_flag(LiveRangeFlag::StartsAtDef)
358
                    {
359
1.16M
                        trace!(
360
0
                            "prev LR {} abuts LR {} in same block; moving {} -> {} for v{}",
361
0
                            prev.index.index(),
362
0
                            entry.index.index(),
363
                            prev_alloc,
364
                            alloc,
365
0
                            vreg.index()
366
                        );
367
1.16M
                        debug_assert_eq!(range.from.pos(), InstPosition::Before);
368
1.16M
                        inserted_moves.push(
369
1.16M
                            range.from,
370
1.16M
                            InsertMovePrio::Regular,
371
1.16M
                            prev_alloc,
372
1.16M
                            alloc,
373
1.16M
                            self.vreg(vreg),
374
                        );
375
2.82M
                    }
376
7.71M
                }
377
378
                // Scan over blocks whose ends are covered by this
379
                // range. For each, for each successor that is not
380
                // already in this range (hence guaranteed to have the
381
                // same allocation) and if the vreg is live, add a
382
                // Source half-move.
383
11.7M
                let mut block = self.cfginfo.insn_block[range.from.inst().index()];
384
24.4M
                while block.is_valid() && block.index() < self.func.num_blocks() {
385
24.4M
                    if range.to < self.cfginfo.block_exit[block.index()].next() {
386
11.7M
                        break;
387
12.7M
                    }
388
12.7M
                    trace!("examining block with end in range: block{}", block.index());
389
390
12.7M
                    match inter_block_sources.entry(block) {
391
                        // If the entry is already present in the map, we'll try to prefer a
392
                        // register allocation.
393
0
                        Entry::Occupied(mut entry) => {
394
0
                            if !entry.get().is_reg() {
395
0
                                entry.insert(alloc);
396
0
                            }
397
                        }
398
12.7M
                        Entry::Vacant(entry) => {
399
12.7M
                            entry.insert(alloc);
400
12.7M
                        }
401
                    }
402
403
                    // Scan forward in `blockparam_outs`, adding all
404
                    // half-moves for outgoing values to blockparams
405
                    // in succs.
406
12.7M
                    trace!(
407
0
                        "scanning blockparam_outs for v{} block{}: blockparam_out_idx = {}",
408
0
                        vreg.index(),
409
0
                        block.index(),
410
                        blockparam_out_idx,
411
                    );
412
12.9M
                    while blockparam_out_idx < self.blockparam_outs.len() {
413
                        let BlockparamOut {
414
12.0M
                            from_vreg,
415
12.0M
                            from_block,
416
12.0M
                            to_block,
417
12.0M
                            to_vreg,
418
12.0M
                        } = self.blockparam_outs[blockparam_out_idx];
419
12.0M
                        if (from_vreg, from_block) > (vreg, block) {
420
11.8M
                            break;
421
223k
                        }
422
223k
                        if (from_vreg, from_block) == (vreg, block) {
423
223k
                            trace!(
424
0
                                " -> found: from v{} block{} to v{} block{}",
425
0
                                from_vreg.index(),
426
0
                                from_block.index(),
427
0
                                to_vreg.index(),
428
0
                                to_vreg.index()
429
                            );
430
431
223k
                            let key = BlockparamSourceKey::new(from_block, to_vreg);
432
223k
                            match block_param_sources.entry(key) {
433
                                // As with inter-block moves, if the entry is already present we'll
434
                                // try to prefer a register allocation.
435
0
                                Entry::Occupied(mut entry) => {
436
0
                                    if !entry.get().is_reg() {
437
0
                                        entry.insert(alloc);
438
0
                                    }
439
                                }
440
223k
                                Entry::Vacant(entry) => {
441
223k
                                    entry.insert(alloc);
442
223k
                                }
443
                            }
444
445
223k
                            if self.annotations_enabled {
446
15.9k
                                self.annotate(
447
15.9k
                                    self.cfginfo.block_exit[block.index()],
448
15.9k
                                    format!(
449
15.9k
                                        "blockparam-out: block{} to block{}: v{} to v{} in {}",
450
15.9k
                                        from_block.index(),
451
15.9k
                                        to_block.index(),
452
15.9k
                                        from_vreg.index(),
453
15.9k
                                        to_vreg.index(),
454
15.9k
                                        alloc
455
15.9k
                                    ),
456
15.9k
                                );
457
207k
                            }
458
0
                        }
459
460
223k
                        blockparam_out_idx += 1;
461
                    }
462
463
12.7M
                    block = block.next();
464
                }
465
466
                // Scan over blocks whose beginnings are covered by
467
                // this range and for which the vreg is live at the
468
                // start of the block. For each, for each predecessor,
469
                // add a Dest half-move.
470
11.7M
                let mut block = self.cfginfo.insn_block[range.from.inst().index()];
471
11.7M
                if self.cfginfo.block_entry[block.index()] < range.from {
472
8.32M
                    block = block.next();
473
8.32M
                }
474
24.8M
                while block.is_valid() && block.index() < self.func.num_blocks() {
475
24.7M
                    if self.cfginfo.block_entry[block.index()] >= range.to {
476
11.5M
                        break;
477
13.1M
                    }
478
479
                    // Add half-moves for blockparam inputs.
480
13.1M
                    trace!(
481
0
                        "scanning blockparam_ins at vreg {} block {}: blockparam_in_idx = {}",
482
0
                        vreg.index(),
483
0
                        block.index(),
484
                        prev.prev_ins_idx,
485
                    );
486
13.1M
                    let mut idx = prev.blockparam_ins_idx();
487
23.5M
                    while idx < self.blockparam_ins.len() {
488
                        let BlockparamIn {
489
22.4M
                            from_block,
490
22.4M
                            to_block,
491
22.4M
                            to_vreg,
492
22.4M
                        } = self.blockparam_ins[idx];
493
22.4M
                        if (to_vreg, to_block) > (vreg, block) {
494
12.0M
                            break;
495
10.3M
                        }
496
10.3M
                        if (to_vreg, to_block) == (vreg, block) {
497
246k
                            block_param_dests.push(BlockparamDest {
498
246k
                                from_block,
499
246k
                                to_block,
500
246k
                                to_vreg,
501
246k
                                alloc,
502
246k
                            });
503
246k
                            trace!(
504
0
                                "match: blockparam_in: v{} in block{} from block{} into {}",
505
0
                                to_vreg.index(),
506
0
                                to_block.index(),
507
0
                                from_block.index(),
508
                                alloc,
509
                            );
510
                            #[cfg(debug_assertions)]
511
                            if self.annotations_enabled {
512
                                self.annotate(
513
                                    self.cfginfo.block_entry[block.index()],
514
                                    format!(
515
                                        "blockparam-in: block{} to block{}:into v{} in {}",
516
                                        from_block.index(),
517
                                        to_block.index(),
518
                                        to_vreg.index(),
519
                                        alloc
520
                                    ),
521
                                );
522
                            }
523
10.1M
                        }
524
10.3M
                        idx += 1;
525
                    }
526
527
13.1M
                    prev.update_blockparam_ins_idx(idx);
528
529
13.1M
                    if !self.is_live_in(block, vreg) {
530
531k
                        block = block.next();
531
531k
                        continue;
532
12.6M
                    }
533
534
12.6M
                    trace!(
535
0
                        "scanning preds at vreg {} block {} for ends outside the range",
536
0
                        vreg.index(),
537
0
                        block.index()
538
                    );
539
540
                    // Now find any preds whose ends are not in the
541
                    // same range, and insert appropriate moves.
542
14.1M
                    for &pred in self.func.block_preds(block) {
543
14.1M
                        trace!(
544
0
                            "pred block {} has exit {:?}",
545
0
                            pred.index(),
546
0
                            self.cfginfo.block_exit[pred.index()]
547
                        );
548
14.1M
                        if range.contains_point(self.cfginfo.block_exit[pred.index()]) {
549
10.0M
                            continue;
550
4.06M
                        }
551
552
4.06M
                        inter_block_dests.push(InterBlockDest {
553
4.06M
                            from: pred,
554
4.06M
                            to: block,
555
4.06M
                            alloc,
556
4.06M
                        })
557
                    }
558
559
12.6M
                    block = block.next();
560
                }
561
562
                // Scan over def/uses and apply allocations.
563
15.5M
                for use_idx in 0..self.ranges[entry.index].uses.len() {
564
15.5M
                    let usedata = self.ranges[entry.index].uses[use_idx];
565
15.5M
                    trace!("applying to use: {:?}", usedata);
566
15.5M
                    debug_assert!(range.contains_point(usedata.pos));
567
15.5M
                    let inst = usedata.pos.inst();
568
15.5M
                    let slot = usedata.slot;
569
15.5M
                    let operand = usedata.operand;
570
15.5M
                    self.set_alloc(inst, slot as usize, alloc);
571
15.5M
                    if let OperandConstraint::Reuse(_) = operand.constraint() {
572
555k
                        reuse_input_insts.push(inst);
573
15.0M
                    }
574
                }
575
576
                // Scan debug-labels on this vreg that overlap with
577
                // this range, producing a debug-info output record
578
                // giving the allocation location for each label.
579
11.7M
                if !debug_labels.is_empty() {
580
                    // Do a binary search to find the start of any
581
                    // labels for this vreg. Recall that we require
582
                    // debug-label requests to be sorted by vreg as a
583
                    // precondition (which we verified above).
584
11.5M
                    let start = debug_labels
585
113M
                        .binary_search_by(|&(label_vreg, _label_from, _label_to, _label)| {
586
                            // Search for the point just before the first
587
                            // tuple that could be for `vreg` overlapping
588
                            // with `range`. Never return
589
                            // `Ordering::Equal`; `binary_search_by` in
590
                            // this case returns the index of the first
591
                            // entry that is greater as an `Err`.
592
113M
                            if label_vreg.vreg() < vreg.index() {
593
77.9M
                                core::cmp::Ordering::Less
594
                            } else {
595
35.8M
                                core::cmp::Ordering::Greater
596
                            }
597
113M
                        })
598
11.5M
                        .unwrap_err();
599
600
17.3M
                    for &(label_vreg, label_from, label_to, label) in &debug_labels[start..] {
601
17.3M
                        let label_from = ProgPoint::before(label_from);
602
17.3M
                        let label_to = ProgPoint::before(label_to);
603
17.3M
                        let label_range = CodeRange {
604
17.3M
                            from: label_from,
605
17.3M
                            to: label_to,
606
17.3M
                        };
607
17.3M
                        if label_vreg.vreg() != vreg.index() {
608
7.16M
                            break;
609
10.1M
                        }
610
10.1M
                        if !range.overlaps(&label_range) {
611
9.26M
                            continue;
612
911k
                        }
613
614
911k
                        let from = core::cmp::max(label_from, range.from);
615
911k
                        let to = core::cmp::min(label_to, range.to);
616
617
911k
                        self.ctx
618
911k
                            .output
619
911k
                            .debug_locations
620
911k
                            .push((label, from, to, alloc));
621
                    }
622
175k
                }
623
            }
624
625
7.70M
            if !inter_block_dests.is_empty() {
626
455k
                self.output.stats.halfmoves_count += inter_block_dests.len() * 2;
627
628
455k
                inter_block_dests.sort_unstable_by_key(InterBlockDest::key);
629
630
455k
                let vreg = self.vreg(vreg);
631
455k
                trace!("processing inter-block moves for {}", vreg);
632
4.06M
                for dest in inter_block_dests.drain(..) {
633
4.06M
                    let src = inter_block_sources[&dest.from];
634
635
4.06M
                    trace!(
636
0
                        " -> moving from {} to {} between {:?} and {:?}",
637
                        src,
638
                        dest.alloc,
639
                        dest.from,
640
                        dest.to
641
                    );
642
643
4.06M
                    let (pos, prio) = choose_move_location(self, dest.from, dest.to);
644
4.06M
                    inserted_moves.push(pos, prio, src, dest.alloc, vreg);
645
                }
646
7.24M
            }
647
648
7.70M
            blockparam_in_idx = prev.blockparam_ins_idx();
649
        }
650
651
10.8k
        if !block_param_dests.is_empty() {
652
6.74k
            self.output.stats.halfmoves_count += block_param_sources.len();
653
6.74k
            self.output.stats.halfmoves_count += block_param_dests.len();
654
655
6.74k
            trace!("processing block-param moves");
656
252k
            for dest in block_param_dests {
657
246k
                let src = dest.source();
658
246k
                let src_alloc = block_param_sources.get(&src).unwrap();
659
246k
                let (pos, prio) = choose_move_location(self, dest.from_block, dest.to_block);
660
246k
                inserted_moves.push(pos, prio, *src_alloc, dest.alloc, self.vreg(dest.to_vreg));
661
246k
            }
662
4.05k
        }
663
664
        // Handle multi-fixed-reg constraints by copying.
665
84.7k
        for fixup in core::mem::replace(&mut self.multi_fixed_reg_fixups, vec![]) {
666
84.7k
            let from_alloc = self.get_alloc(fixup.pos.inst(), fixup.from_slot as usize);
667
84.7k
            let to_alloc = Allocation::reg(PReg::from_index(fixup.to_preg.index()));
668
84.7k
            trace!(
669
0
                "multi-fixed-move constraint at {:?} from {} to {} for v{}",
670
                fixup.pos,
671
                from_alloc,
672
                to_alloc,
673
0
                fixup.vreg.index(),
674
            );
675
84.7k
            let prio = match fixup.level {
676
22.1k
                FixedRegFixupLevel::Initial => InsertMovePrio::MultiFixedRegInitial,
677
62.5k
                FixedRegFixupLevel::Secondary => InsertMovePrio::MultiFixedRegSecondary,
678
            };
679
84.7k
            inserted_moves.push(fixup.pos, prio, from_alloc, to_alloc, self.vreg(fixup.vreg));
680
84.7k
            self.set_alloc(
681
84.7k
                fixup.pos.inst(),
682
84.7k
                fixup.to_slot as usize,
683
84.7k
                Allocation::reg(PReg::from_index(fixup.to_preg.index())),
684
            );
685
        }
686
687
        // Handle outputs that reuse inputs: copy beforehand, then set
688
        // input's alloc to output's.
689
        //
690
        // Note that the output's allocation may not *actually* be
691
        // valid until InstPosition::After, but the reused input may
692
        // occur at InstPosition::Before. This may appear incorrect,
693
        // but we make it work by ensuring that all *other* inputs are
694
        // extended to InstPosition::After so that the def will not
695
        // interfere. (The liveness computation code does this -- we
696
        // do not require the user to do so.)
697
        //
698
        // One might ask: why not insist that input-reusing defs occur
699
        // at InstPosition::Before? this would be correct, but would
700
        // mean that the reused input and the reusing output
701
        // interfere, *guaranteeing* that every such case would
702
        // require a move. This is really bad on ISAs (like x86) where
703
        // reused inputs are ubiquitous.
704
        //
705
        // Another approach might be to put the def at Before, and
706
        // trim the reused input's liverange back to the previous
707
        // instruction's After. This is kind of OK until (i) a block
708
        // boundary occurs between the prior inst and this one, or
709
        // (ii) any moves/spills/reloads occur between the two
710
        // instructions. We really do need the input to be live at
711
        // this inst's Before.
712
        //
713
        // In principle what we really need is a "BeforeBefore"
714
        // program point, but we don't want to introduce that
715
        // everywhere and pay the cost of twice as many ProgPoints
716
        // throughout the allocator.
717
        //
718
        // Or we could introduce a separate move instruction -- this
719
        // is the approach that regalloc.rs takes with "mod" operands
720
        // -- but that is also costly.
721
        //
722
        // So we take this approach (invented by IonMonkey -- somewhat
723
        // hard to discern, though see [0] for a comment that makes
724
        // this slightly less unclear) to avoid interference between
725
        // the actual reused input and reusing output, ensure
726
        // interference (hence no incorrectness) between other inputs
727
        // and the reusing output, and not require a separate explicit
728
        // move instruction.
729
        //
730
        // [0] https://searchfox.org/mozilla-central/rev/3a798ef9252896fb389679f06dd3203169565af0/js/src/jit/shared/Lowering-shared-inl.h#108-110
731
566k
        for inst in reuse_input_insts {
732
555k
            let mut input_reused: SmallVec<[usize; 4]> = smallvec![];
733
3.12M
            for output_idx in 0..self.func.inst_operands(inst).len() {
734
3.12M
                let operand = self.func.inst_operands(inst)[output_idx];
735
3.12M
                if let OperandConstraint::Reuse(input_idx) = operand.constraint() {
736
555k
                    debug_assert!(!input_reused.contains(&input_idx));
737
555k
                    debug_assert_eq!(operand.pos(), OperandPos::Late);
738
555k
                    input_reused.push(input_idx);
739
555k
                    let input_alloc = self.get_alloc(inst, input_idx);
740
555k
                    let output_alloc = self.get_alloc(inst, output_idx);
741
555k
                    trace!(
742
0
                        "reuse-input inst {:?}: output {} has alloc {:?}, input {} has alloc {:?}",
743
                        inst,
744
                        output_idx,
745
                        output_alloc,
746
                        input_idx,
747
                        input_alloc
748
                    );
749
555k
                    if input_alloc != output_alloc {
750
365k
                        #[cfg(debug_assertions)]
751
365k
                        if self.annotations_enabled {
752
365k
                            self.annotate(
753
365k
                                ProgPoint::before(inst),
754
365k
                                format!(" reuse-input-copy: {} -> {}", input_alloc, output_alloc),
755
365k
                            );
756
365k
                        }
757
365k
                        let input_operand = self.func.inst_operands(inst)[input_idx];
758
365k
                        inserted_moves.push(
759
365k
                            ProgPoint::before(inst),
760
365k
                            InsertMovePrio::ReusedInput,
761
365k
                            input_alloc,
762
365k
                            output_alloc,
763
365k
                            input_operand.vreg(),
764
365k
                        );
765
365k
                        self.set_alloc(inst, input_idx, output_alloc);
766
365k
                    }
767
2.56M
                }
768
            }
769
        }
770
771
        // Sort the debug-locations vector; we provide this
772
        // invariant to the client.
773
10.8k
        self.output.debug_locations.sort_unstable();
774
775
10.8k
        inserted_moves
776
10.8k
    }
777
778
10.8k
    pub fn resolve_inserted_moves(&mut self, mut inserted_moves: InsertedMoves) -> Edits {
779
        // For each program point, gather all moves together. Then
780
        // resolve (see cases below).
781
10.8k
        let mut i = 0;
782
10.8k
        inserted_moves
783
10.8k
            .moves
784
32.2M
            .sort_unstable_by_key(|m| m.pos_prio.key());
785
786
        // Redundant-move elimination state tracker.
787
10.8k
        let mut redundant_moves = RedundantMoveEliminator::default();
788
789
998k
        fn redundant_move_process_side_effects<'a, F: Function>(
790
998k
            this: &Env<'a, F>,
791
998k
            redundant_moves: &mut RedundantMoveEliminator,
792
998k
            from: ProgPoint,
793
998k
            to: ProgPoint,
794
998k
        ) {
795
            // If we cross a block boundary, clear and return.
796
998k
            if this.cfginfo.insn_block[from.inst().index()]
797
998k
                != this.cfginfo.insn_block[to.inst().index()]
798
            {
799
223k
                redundant_moves.clear();
800
223k
                return;
801
775k
            }
802
803
775k
            let start_inst = if from.pos() == InstPosition::Before {
804
775k
                from.inst()
805
            } else {
806
0
                from.inst().next()
807
            };
808
775k
            let end_inst = if to.pos() == InstPosition::Before {
809
775k
                to.inst()
810
            } else {
811
0
                to.inst().next()
812
            };
813
1.07M
            for inst in start_inst.index()..end_inst.index() {
814
1.07M
                let inst = Inst::new(inst);
815
7.02M
                for (i, op) in this.func.inst_operands(inst).iter().enumerate() {
816
7.02M
                    match op.kind() {
817
2.14M
                        OperandKind::Def => {
818
2.14M
                            let alloc = this.get_alloc(inst, i);
819
2.14M
                            redundant_moves.clear_alloc(alloc);
820
2.14M
                        }
821
4.88M
                        _ => {}
822
                    }
823
                }
824
1.07M
                for reg in this.func.inst_clobbers(inst) {
825
651k
                    redundant_moves.clear_alloc(Allocation::reg(reg));
826
651k
                }
827
                // The dedicated scratch registers may be clobbered by any
828
                // instruction.
829
4.30M
                for reg in this.env.scratch_by_class {
830
3.22M
                    if let Some(reg) = reg {
831
0
                        redundant_moves.clear_alloc(Allocation::reg(reg));
832
3.22M
                    }
833
                }
834
            }
835
998k
        }
836
837
10.8k
        let mut last_pos = ProgPoint::before(Inst::new(0));
838
10.8k
        let mut edits = Edits::with_capacity(self.func.num_insts());
839
840
1.00M
        while i < inserted_moves.moves.len() {
841
998k
            let start = i;
842
998k
            let pos_prio = inserted_moves.moves[i].pos_prio;
843
2.80M
            while i < inserted_moves.moves.len() && inserted_moves.moves[i].pos_prio == pos_prio {
844
1.80M
                i += 1;
845
1.80M
            }
846
998k
            let moves = &inserted_moves.moves[start..i];
847
848
998k
            redundant_move_process_side_effects(self, &mut redundant_moves, last_pos, pos_prio.pos);
849
998k
            last_pos = pos_prio.pos;
850
851
            // Gather all the moves in each RegClass separately.
852
            // These cannot interact, so it is safe to have separate
853
            // ParallelMove instances. They need to be separate because
854
            // moves between the classes are impossible. (We could
855
            // enhance ParallelMoves to understand register classes, but
856
            // this seems simpler.)
857
998k
            let mut int_moves: SmallVec<[InsertedMove; 8]> = smallvec![];
858
998k
            let mut float_moves: SmallVec<[InsertedMove; 8]> = smallvec![];
859
998k
            let mut vec_moves: SmallVec<[InsertedMove; 8]> = smallvec![];
860
861
2.80M
            for m in moves {
862
1.80M
                match m.to_vreg.class() {
863
950k
                    RegClass::Int => {
864
950k
                        int_moves.push(m.clone());
865
950k
                    }
866
393k
                    RegClass::Float => {
867
393k
                        float_moves.push(m.clone());
868
393k
                    }
869
459k
                    RegClass::Vector => {
870
459k
                        vec_moves.push(m.clone());
871
459k
                    }
872
                }
873
            }
874
875
2.99M
            for &(regclass, moves) in &[
876
998k
                (RegClass::Int, &int_moves),
877
998k
                (RegClass::Float, &float_moves),
878
998k
                (RegClass::Vector, &vec_moves),
879
998k
            ] {
880
                // All moves in `moves` semantically happen in
881
                // parallel. Let's resolve these to a sequence of moves
882
                // that can be done one at a time.
883
2.99M
                let mut parallel_moves = ParallelMoves::new();
884
2.99M
                trace!(
885
0
                    "parallel moves at pos {:?} prio {:?}",
886
                    pos_prio.pos,
887
                    pos_prio.prio
888
                );
889
4.79M
                for m in moves {
890
1.80M
                    trace!(" {} -> {}", m.from_alloc, m.to_alloc);
891
1.80M
                    parallel_moves.add(m.from_alloc, m.to_alloc, Some(m.to_vreg));
892
                }
893
894
2.99M
                let resolved = parallel_moves.resolve();
895
2.99M
                let mut scratch_iter = RegTraversalIter::new(
896
2.99M
                    self.env, regclass, None, None, 0,
897
2.99M
                    None, // We assume there is no limit on the set of registers available for moves.
898
                );
899
2.99M
                let mut dedicated_scratch = self.env.scratch_by_class[regclass as usize];
900
2.99M
                let key = LiveRangeKey::from_range(&CodeRange {
901
2.99M
                    from: pos_prio.pos,
902
2.99M
                    to: pos_prio.pos.next(),
903
2.99M
                });
904
2.99M
                let find_free_reg = || {
905
                    // Use the dedicated scratch register first if it is
906
                    // available.
907
43.8k
                    if let Some(reg) = dedicated_scratch.take() {
908
0
                        return Some(Allocation::reg(reg));
909
43.8k
                    }
910
95.2k
                    while let Some(preg) = scratch_iter.next() {
911
95.0k
                        if !self.pregs[preg.index()]
912
95.0k
                            .allocations
913
95.0k
                            .btree
914
95.0k
                            .contains_key(&key)
915
                        {
916
50.3k
                            let alloc = Allocation::reg(preg);
917
50.3k
                            if moves
918
50.3k
                                .iter()
919
216k
                                .any(|m| m.from_alloc == alloc || m.to_alloc == alloc)
920
                            {
921
                                // Skip pregs used by moves in this
922
                                // parallel move set, even if not
923
                                // marked used at progpoint: edge move
924
                                // liveranges meet but don't overlap
925
                                // so otherwise we may incorrectly
926
                                // overwrite a source reg.
927
6.78k
                                continue;
928
43.6k
                            }
929
43.6k
                            return Some(alloc);
930
44.6k
                        }
931
                    }
932
251
                    None
933
43.8k
                };
934
2.99M
                let mut stackslot_idx = 0;
935
2.99M
                let get_stackslot = || {
936
251
                    let idx = stackslot_idx;
937
251
                    stackslot_idx += 1;
938
                    // We can't borrow `self` as mutable, so we create
939
                    // these placeholders then allocate the actual
940
                    // slots if needed with `self.allocate_spillslot`
941
                    // below.
942
251
                    Allocation::stack(SpillSlot::new(SpillSlot::MAX - idx))
943
251
                };
944
2.99M
                let is_stack_alloc = |alloc: Allocation| {
945
2.47M
                    if let Some(preg) = alloc.as_reg() {
946
2.02M
                        self.pregs[preg.index()].is_stack
947
                    } else {
948
449k
                        alloc.is_stack()
949
                    }
950
2.47M
                };
951
2.99M
                let preferred_victim = self.preferred_victim_by_class[regclass as usize];
952
953
2.99M
                let scratch_resolver = MoveAndScratchResolver {
954
2.99M
                    find_free_reg,
955
2.99M
                    get_stackslot,
956
2.99M
                    is_stack_alloc,
957
2.99M
                    borrowed_scratch_reg: preferred_victim,
958
2.99M
                };
959
960
2.99M
                let resolved = scratch_resolver.compute(resolved);
961
962
2.99M
                let mut rewrites = FxHashMap::default();
963
2.99M
                for i in 0..stackslot_idx {
964
251
                    if i >= self.extra_spillslots_by_class[regclass as usize].len() {
965
77
                        let slot =
966
77
                            self.allocate_spillslot(self.func.spillslot_size(regclass) as u32);
967
77
                        self.extra_spillslots_by_class[regclass as usize].push(slot);
968
174
                    }
969
251
                    rewrites.insert(
970
251
                        Allocation::stack(SpillSlot::new(SpillSlot::MAX - i)),
971
251
                        self.extra_spillslots_by_class[regclass as usize][i],
972
                    );
973
                }
974
975
4.84M
                for (src, dst, to_vreg) in resolved {
976
1.85M
                    let src = rewrites.get(&src).cloned().unwrap_or(src);
977
1.85M
                    let dst = rewrites.get(&dst).cloned().unwrap_or(dst);
978
1.85M
                    trace!("  resolved: {} -> {} ({:?})", src, dst, to_vreg);
979
1.85M
                    let action = redundant_moves.process_move(src, dst, to_vreg);
980
1.85M
                    if !action.elide {
981
1.68M
                        edits.add(pos_prio, src, dst);
982
1.68M
                    } else {
983
163k
                        trace!("    -> redundant move elided");
984
                    }
985
                }
986
            }
987
        }
988
989
        // Ensure edits are in sorted ProgPoint order. N.B.: this must
990
        // be a stable sort! We have to keep the order produced by the
991
        // parallel-move resolver for all moves within a single sort
992
        // key.
993
10.8k
        edits.sort();
994
10.8k
        self.output.stats.edits_count = edits.len();
995
996
        // Add debug annotations.
997
10.8k
        if self.annotations_enabled {
998
313k
            for &(pos_prio, ref edit) in edits.iter() {
999
313k
                match edit {
1000
313k
                    &Edit::Move { from, to } => {
1001
313k
                        self.annotate(pos_prio.pos, format!("move {} -> {}", from, to));
1002
313k
                    }
1003
                }
1004
            }
1005
9.68k
        }
1006
1007
10.8k
        edits
1008
10.8k
    }
1009
}