Coverage Report

Created: 2024-10-16 07:58

/rust/registry/src/index.crates.io-6f17d22bba15001f/cranelift-codegen-0.91.1/src/remove_constant_phis.rs
Line
Count
Source (jump to first uncovered line)
1
//! A Constant-Phi-Node removal pass.
2
3
use crate::dominator_tree::DominatorTree;
4
use crate::entity::EntityList;
5
use crate::fx::FxHashMap;
6
use crate::fx::FxHashSet;
7
use crate::ir;
8
use crate::ir::instructions::BranchInfo;
9
use crate::ir::Function;
10
use crate::ir::{Block, Inst, Value};
11
use crate::timing;
12
use arrayvec::ArrayVec;
13
use bumpalo::Bump;
14
use cranelift_entity::SecondaryMap;
15
use smallvec::SmallVec;
16
17
// A note on notation.  For the sake of clarity, this file uses the phrase
18
// "formal parameters" to mean the `Value`s listed in the block head, and
19
// "actual parameters" to mean the `Value`s passed in a branch or a jump:
20
//
21
// block4(v16: i32, v18: i32):    <-- formal parameters
22
//   ...
23
//   brnz v27, block7(v22, v24)   <-- actual parameters
24
//   jump block6
25
26
// This transformation pass (conceptually) partitions all values in the
27
// function into two groups:
28
//
29
// * Group A: values defined by block formal parameters, except for the entry block.
30
//
31
// * Group B: All other values: that is, values defined by instructions,
32
//   and the formals of the entry block.
33
//
34
// For each value in Group A, it attempts to establish whether it will have
35
// the value of exactly one member of Group B.  If so, the formal parameter is
36
// deleted, all corresponding actual parameters (in jumps/branches to the
37
// defining block) are deleted, and a rename is inserted.
38
//
39
// The entry block is special-cased because (1) we don't know what values flow
40
// to its formals and (2) in any case we can't change its formals.
41
//
42
// Work proceeds in three phases.
43
//
44
// * Phase 1: examine all instructions.  For each block, make up a useful
45
//   grab-bag of information, `BlockSummary`, that summarises the block's
46
//   formals and jump/branch instruction.  This is used by Phases 2 and 3.
47
//
48
// * Phase 2: for each value in Group A, try to find a single Group B value
49
//   that flows to it.  This is done using a classical iterative forward
50
//   dataflow analysis over a simple constant-propagation style lattice.  It
51
//   converges quickly in practice -- I have seen at most 4 iterations.  This
52
//   is relatively cheap because the iteration is done over the
53
//   `BlockSummary`s, and does not visit each instruction.  The resulting
54
//   fixed point is stored in a `SolverState`.
55
//
56
// * Phase 3: using the `SolverState` and `BlockSummary`, edit the function to
57
//   remove redundant formals and actuals, and to insert suitable renames.
58
//
59
// Note that the effectiveness of the analysis depends on on the fact that
60
// there are no copy instructions in Cranelift's IR.  If there were, the
61
// computation of `actual_absval` in Phase 2 would have to be extended to
62
// chase through such copies.
63
//
64
// For large functions, the analysis cost using the new AArch64 backend is about
65
// 0.6% of the non-optimising compile time, as measured by instruction counts.
66
// This transformation usually pays for itself several times over, though, by
67
// reducing the isel/regalloc cost downstream.  Gains of up to 7% have been
68
// seen for large functions.
69
70
/// The `Value`s (Group B) that can flow to a formal parameter (Group A).
71
#[derive(Clone, Copy, Debug, PartialEq)]
72
enum AbstractValue {
73
    /// Two or more values flow to this formal.
74
    Many,
75
76
    /// Exactly one value, as stated, flows to this formal.  The `Value`s that
77
    /// can appear here are exactly: `Value`s defined by `Inst`s, plus the
78
    /// `Value`s defined by the formals of the entry block.  Note that this is
79
    /// exactly the set of `Value`s that are *not* tracked in the solver below
80
    /// (see `SolverState`).
81
    One(Value /*Group B*/),
82
83
    /// No value flows to this formal.
84
    None,
85
}
86
87
impl AbstractValue {
88
8.16k
    fn join(self, other: AbstractValue) -> AbstractValue {
89
8.16k
        match (self, other) {
90
            // Joining with `None` has no effect
91
3.62k
            (AbstractValue::None, p2) => p2,
92
0
            (p1, AbstractValue::None) => p1,
93
            // Joining with `Many` produces `Many`
94
240
            (AbstractValue::Many, _p2) => AbstractValue::Many,
95
2
            (_p1, AbstractValue::Many) => AbstractValue::Many,
96
            // The only interesting case
97
4.30k
            (AbstractValue::One(v1), AbstractValue::One(v2)) => {
98
4.30k
                if v1 == v2 {
99
4.24k
                    AbstractValue::One(v1)
100
                } else {
101
54
                    AbstractValue::Many
102
                }
103
            }
104
        }
105
8.16k
    }
106
107
11.1k
    fn is_one(self) -> bool {
108
11.1k
        matches!(self, AbstractValue::One(_))
109
11.1k
    }
110
}
111
112
#[derive(Clone, Copy, Debug)]
113
struct OutEdge<'a> {
114
    /// An instruction that transfers control.
115
    inst: Inst,
116
    /// The block that control is transferred to.
117
    block: Block,
118
    /// The arguments to that block.
119
    ///
120
    /// These values can be from both groups A and B.
121
    args: &'a [Value],
122
}
123
124
impl<'a> OutEdge<'a> {
125
    /// Construct a new `OutEdge` for the given instruction.
126
    ///
127
    /// Returns `None` if this is an edge without any block arguments, which
128
    /// means we can ignore it for this analysis's purposes.
129
    #[inline]
130
192k
    fn new(bump: &'a Bump, dfg: &ir::DataFlowGraph, inst: Inst, block: Block) -> Option<Self> {
131
192k
        let inst_var_args = dfg.inst_variable_args(inst);
132
192k
133
192k
        // Skip edges without params.
134
192k
        if inst_var_args.is_empty() {
135
187k
            return None;
136
4.07k
        }
137
4.07k
138
4.07k
        Some(OutEdge {
139
4.07k
            inst,
140
4.07k
            block,
141
4.07k
            args: bump.alloc_slice_fill_iter(
142
4.07k
                inst_var_args
143
4.07k
                    .iter()
144
4.07k
                    .map(|value| dfg.resolve_aliases(*value)),
145
4.07k
            ),
146
4.07k
        })
147
192k
    }
148
}
149
150
/// For some block, a useful bundle of info.  The `Block` itself is not stored
151
/// here since it will be the key in the associated `FxHashMap` -- see
152
/// `summaries` below.  For the `SmallVec` tuning params: most blocks have
153
/// few parameters, hence `4`.  And almost all blocks have either one or two
154
/// successors, hence `2`.
155
#[derive(Clone, Debug, Default)]
156
struct BlockSummary<'a> {
157
    /// Formal parameters for this `Block`.
158
    ///
159
    /// These values are from group A.
160
    formals: &'a [Value],
161
162
    /// Each outgoing edge from this block.
163
    ///
164
    /// We don't bother to include transfers that pass zero parameters
165
    /// since that makes more work for the solver for no purpose.
166
    ///
167
    /// Note that, because blocks used with `br_table`s cannot have block
168
    /// arguments, there are at most two outgoing edges from these blocks.
169
    dests: ArrayVec<OutEdge<'a>, 2>,
170
}
171
172
impl<'a> BlockSummary<'a> {
173
    /// Construct a new `BlockSummary`, using `values` as its backing storage.
174
    #[inline]
175
328k
    fn new(bump: &'a Bump, formals: &[Value]) -> Self {
176
328k
        Self {
177
328k
            formals: bump.alloc_slice_copy(formals),
178
328k
            dests: Default::default(),
179
328k
        }
180
328k
    }
181
}
182
183
/// Solver state.  This holds a AbstractValue for each formal parameter, except
184
/// for those from the entry block.
185
struct SolverState {
186
    absvals: FxHashMap<Value /*Group A*/, AbstractValue>,
187
}
188
189
impl SolverState {
190
139k
    fn new() -> Self {
191
139k
        Self {
192
139k
            absvals: FxHashMap::default(),
193
139k
        }
194
139k
    }
195
196
19.2k
    fn get(&self, actual: Value) -> AbstractValue {
197
19.2k
        *self
198
19.2k
            .absvals
199
19.2k
            .get(&actual)
200
19.2k
            .unwrap_or_else(|| panic!("SolverState::get: formal param {:?} is untracked?!", actual))
201
19.2k
    }
202
203
8.16k
    fn maybe_get(&self, actual: Value) -> Option<&AbstractValue> {
204
8.16k
        self.absvals.get(&actual)
205
8.16k
    }
206
207
3.68k
    fn set(&mut self, actual: Value, lp: AbstractValue) {
208
3.68k
        match self.absvals.insert(actual, lp) {
209
3.68k
            Some(_old_lp) => {}
210
0
            None => panic!("SolverState::set: formal param {:?} is untracked?!", actual),
211
        }
212
3.68k
    }
213
}
214
215
/// Detect phis in `func` that will only ever produce one value, using a
216
/// classic forward dataflow analysis.  Then remove them.
217
#[inline(never)]
218
139k
pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree) {
219
139k
    let _tt = timing::remove_constant_phis();
220
139k
    debug_assert!(domtree.is_valid());
221
222
    // Phase 1 of 3: for each block, make a summary containing all relevant
223
    // info.  The solver will iterate over the summaries, rather than having
224
    // to inspect each instruction in each block.
225
139k
    let bump =
226
139k
        Bump::with_capacity(domtree.cfg_postorder().len() * 4 * std::mem::size_of::<Value>());
227
139k
    let mut summaries =
228
139k
        SecondaryMap::<Block, BlockSummary>::with_capacity(domtree.cfg_postorder().len());
229
230
328k
    for b in domtree.cfg_postorder().iter().rev().copied() {
231
328k
        let formals = func.dfg.block_params(b);
232
328k
        let mut summary = BlockSummary::new(&bump, formals);
233
234
1.03M
        for inst in func.layout.block_insts(b) {
235
1.03M
            let idetails = &func.dfg[inst];
236
            // Note that multi-dest transfers (i.e., branch tables) don't
237
            // carry parameters in our IR, so we only have to care about
238
            // `SingleDest` here.
239
1.03M
            if let BranchInfo::SingleDest(dest, _) = idetails.analyze_branch(&func.dfg.value_lists)
240
            {
241
192k
                if let Some(edge) = OutEdge::new(&bump, &func.dfg, inst, dest) {
242
4.07k
                    summary.dests.push(edge);
243
187k
                }
244
846k
            }
245
        }
246
247
        // Ensure the invariant that all blocks (except for the entry) appear
248
        // in the summary, *unless* they have neither formals nor any
249
        // param-carrying branches/jumps.
250
328k
        if formals.len() > 0 || summary.dests.len() > 0 {
251
143k
            summaries[b] = summary;
252
184k
        }
253
    }
254
255
    // Phase 2 of 3: iterate over the summaries in reverse postorder,
256
    // computing new `AbstractValue`s for each tracked `Value`.  The set of
257
    // tracked `Value`s is exactly Group A as described above.
258
259
139k
    let entry_block = func
260
139k
        .layout
261
139k
        .entry_block()
262
139k
        .expect("remove_constant_phis: entry block unknown");
263
139k
264
139k
    // Set up initial solver state
265
139k
    let mut state = SolverState::new();
266
267
328k
    for b in domtree.cfg_postorder().iter().rev().copied() {
268
        // For each block, get the formals
269
328k
        if b == entry_block {
270
139k
            continue;
271
188k
        }
272
188k
        let formals = func.dfg.block_params(b);
273
192k
        for formal in formals {
274
3.62k
            let mb_old_absval = state.absvals.insert(*formal, AbstractValue::None);
275
3.62k
            assert!(mb_old_absval.is_none());
276
        }
277
    }
278
279
    // Solve: repeatedly traverse the blocks in reverse postorder, until there
280
    // are no changes.
281
139k
    let mut iter_no = 0;
282
140k
    loop {
283
140k
        iter_no += 1;
284
140k
        let mut changed = false;
285
286
342k
        for src in domtree.cfg_postorder().iter().rev().copied() {
287
342k
            let src_summary = &summaries[src];
288
350k
            for edge in &src_summary.dests {
289
8.16k
                assert!(edge.block != entry_block);
290
                // By contrast, the dst block must have a summary.  Phase 1
291
                // will have only included an entry in `src_summary.dests` if
292
                // that branch/jump carried at least one parameter.  So the
293
                // dst block does take parameters, so it must have a summary.
294
8.16k
                let dst_summary = &summaries[edge.block];
295
8.16k
                let dst_formals = &dst_summary.formals;
296
8.16k
                assert_eq!(edge.args.len(), dst_formals.len());
297
8.16k
                for (formal, actual) in dst_formals.iter().zip(edge.args) {
298
                    // Find the abstract value for `actual`.  If it is a block
299
                    // formal parameter then the most recent abstract value is
300
                    // to be found in the solver state.  If not, then it's a
301
                    // real value defining point (not a phi), in which case
302
                    // return it itself.
303
8.16k
                    let actual_absval = match state.maybe_get(*actual) {
304
5.35k
                        Some(pt) => *pt,
305
2.81k
                        None => AbstractValue::One(*actual),
306
                    };
307
308
                    // And `join` the new value with the old.
309
8.16k
                    let formal_absval_old = state.get(*formal);
310
8.16k
                    let formal_absval_new = formal_absval_old.join(actual_absval);
311
8.16k
                    if formal_absval_new != formal_absval_old {
312
3.68k
                        changed = true;
313
3.68k
                        state.set(*formal, formal_absval_new);
314
4.48k
                    }
315
                }
316
            }
317
        }
318
319
140k
        if !changed {
320
139k
            break;
321
1.01k
        }
322
    }
323
324
139k
    let mut n_consts = 0;
325
139k
    for absval in state.absvals.values() {
326
3.62k
        if absval.is_one() {
327
3.56k
            n_consts += 1;
328
3.56k
        }
329
    }
330
331
    // Phase 3 of 3: edit the function to remove constant formals, using the
332
    // summaries and the final solver state as a guide.
333
334
    // Make up a set of blocks that need editing.
335
139k
    let mut need_editing = FxHashSet::<Block>::default();
336
153k
    for (block, summary) in summaries.iter() {
337
153k
        if block == entry_block {
338
139k
            continue;
339
14.4k
        }
340
14.5k
        for formal in summary.formals {
341
3.62k
            let formal_absval = state.get(*formal);
342
3.62k
            if formal_absval.is_one() {
343
3.56k
                need_editing.insert(block);
344
3.56k
                break;
345
62
            }
346
        }
347
    }
348
349
    // Firstly, deal with the formals.  For each formal which is redundant,
350
    // remove it, and also add a reroute from it to the constant value which
351
    // it we know it to be.
352
143k
    for b in &need_editing {
353
3.56k
        let mut del_these = SmallVec::<[(Value, Value); 32]>::new();
354
3.56k
        let formals: &[Value] = func.dfg.block_params(*b);
355
7.12k
        for formal in formals {
356
            // The state must give an absval for `formal`.
357
3.56k
            if let AbstractValue::One(replacement_val) = state.get(*formal) {
358
3.56k
                del_these.push((*formal, replacement_val));
359
3.56k
            }
360
        }
361
        // We can delete the formals in any order.  However,
362
        // `remove_block_param` works by sliding backwards all arguments to
363
        // the right of the value it is asked to delete.  Hence when removing more
364
        // than one formal, it is significantly more efficient to ask it to
365
        // remove the rightmost formal first, and hence this `rev()`.
366
3.56k
        for (redundant_formal, replacement_val) in del_these.into_iter().rev() {
367
3.56k
            func.dfg.remove_block_param(redundant_formal);
368
3.56k
            func.dfg.change_to_alias(redundant_formal, replacement_val);
369
3.56k
        }
370
    }
371
372
    // Secondly, visit all branch insns.  If the destination has had its
373
    // formals changed, change the actuals accordingly.  Don't scan all insns,
374
    // rather just visit those as listed in the summaries we prepared earlier.
375
153k
    for summary in summaries.values() {
376
157k
        for edge in &summary.dests {
377
4.07k
            if !need_editing.contains(&edge.block) {
378
176
                continue;
379
3.89k
            }
380
3.89k
381
3.89k
            let old_actuals = func.dfg[edge.inst].take_value_list().unwrap();
382
3.89k
            let num_old_actuals = old_actuals.len(&func.dfg.value_lists);
383
3.89k
            let num_fixed_actuals = func.dfg[edge.inst]
384
3.89k
                .opcode()
385
3.89k
                .constraints()
386
3.89k
                .num_fixed_value_arguments();
387
3.89k
            let dst_summary = &summaries[edge.block];
388
3.89k
389
3.89k
            // Check that the numbers of arguments make sense.
390
3.89k
            assert!(num_fixed_actuals <= num_old_actuals);
391
3.89k
            assert_eq!(
392
3.89k
                num_fixed_actuals + dst_summary.formals.len(),
393
3.89k
                num_old_actuals
394
3.89k
            );
395
396
            // Create a new value list.
397
3.89k
            let mut new_actuals = EntityList::<Value>::new();
398
            // Copy the fixed args to the new list
399
3.89k
            for i in 0..num_fixed_actuals {
400
50
                let val = old_actuals.get(i, &func.dfg.value_lists).unwrap();
401
50
                new_actuals.push(val, &mut func.dfg.value_lists);
402
50
            }
403
404
            // Copy the variable args (the actual block params) to the new
405
            // list, filtering out redundant ones.
406
3.89k
            for (i, formal_i) in dst_summary.formals.iter().enumerate() {
407
3.89k
                let actual_i = old_actuals
408
3.89k
                    .get(num_fixed_actuals + i, &func.dfg.value_lists)
409
3.89k
                    .unwrap();
410
3.89k
                let is_redundant = state.get(*formal_i).is_one();
411
3.89k
                if !is_redundant {
412
0
                    new_actuals.push(actual_i, &mut func.dfg.value_lists);
413
3.89k
                }
414
            }
415
3.89k
            func.dfg[edge.inst].put_value_list(new_actuals);
416
        }
417
    }
418
419
139k
    log::debug!(
420
0
        "do_remove_constant_phis: done, {} iters.   {} formals, of which {} const.",
421
0
        iter_no,
422
0
        state.absvals.len(),
423
        n_consts
424
    );
425
139k
}