/rust/registry/src/index.crates.io-6f17d22bba15001f/cranelift-codegen-0.91.1/src/remove_constant_phis.rs
Line | Count | Source (jump to first uncovered line) |
1 | | //! A Constant-Phi-Node removal pass. |
2 | | |
3 | | use crate::dominator_tree::DominatorTree; |
4 | | use crate::entity::EntityList; |
5 | | use crate::fx::FxHashMap; |
6 | | use crate::fx::FxHashSet; |
7 | | use crate::ir; |
8 | | use crate::ir::instructions::BranchInfo; |
9 | | use crate::ir::Function; |
10 | | use crate::ir::{Block, Inst, Value}; |
11 | | use crate::timing; |
12 | | use arrayvec::ArrayVec; |
13 | | use bumpalo::Bump; |
14 | | use cranelift_entity::SecondaryMap; |
15 | | use smallvec::SmallVec; |
16 | | |
17 | | // A note on notation. For the sake of clarity, this file uses the phrase |
18 | | // "formal parameters" to mean the `Value`s listed in the block head, and |
19 | | // "actual parameters" to mean the `Value`s passed in a branch or a jump: |
20 | | // |
21 | | // block4(v16: i32, v18: i32): <-- formal parameters |
22 | | // ... |
23 | | // brnz v27, block7(v22, v24) <-- actual parameters |
24 | | // jump block6 |
25 | | |
26 | | // This transformation pass (conceptually) partitions all values in the |
27 | | // function into two groups: |
28 | | // |
29 | | // * Group A: values defined by block formal parameters, except for the entry block. |
30 | | // |
31 | | // * Group B: All other values: that is, values defined by instructions, |
32 | | // and the formals of the entry block. |
33 | | // |
34 | | // For each value in Group A, it attempts to establish whether it will have |
35 | | // the value of exactly one member of Group B. If so, the formal parameter is |
36 | | // deleted, all corresponding actual parameters (in jumps/branches to the |
37 | | // defining block) are deleted, and a rename is inserted. |
38 | | // |
39 | | // The entry block is special-cased because (1) we don't know what values flow |
40 | | // to its formals and (2) in any case we can't change its formals. |
41 | | // |
42 | | // Work proceeds in three phases. |
43 | | // |
44 | | // * Phase 1: examine all instructions. For each block, make up a useful |
45 | | // grab-bag of information, `BlockSummary`, that summarises the block's |
46 | | // formals and jump/branch instruction. This is used by Phases 2 and 3. |
47 | | // |
48 | | // * Phase 2: for each value in Group A, try to find a single Group B value |
49 | | // that flows to it. This is done using a classical iterative forward |
50 | | // dataflow analysis over a simple constant-propagation style lattice. It |
51 | | // converges quickly in practice -- I have seen at most 4 iterations. This |
52 | | // is relatively cheap because the iteration is done over the |
53 | | // `BlockSummary`s, and does not visit each instruction. The resulting |
54 | | // fixed point is stored in a `SolverState`. |
55 | | // |
56 | | // * Phase 3: using the `SolverState` and `BlockSummary`, edit the function to |
57 | | // remove redundant formals and actuals, and to insert suitable renames. |
58 | | // |
59 | | // Note that the effectiveness of the analysis depends on on the fact that |
60 | | // there are no copy instructions in Cranelift's IR. If there were, the |
61 | | // computation of `actual_absval` in Phase 2 would have to be extended to |
62 | | // chase through such copies. |
63 | | // |
64 | | // For large functions, the analysis cost using the new AArch64 backend is about |
65 | | // 0.6% of the non-optimising compile time, as measured by instruction counts. |
66 | | // This transformation usually pays for itself several times over, though, by |
67 | | // reducing the isel/regalloc cost downstream. Gains of up to 7% have been |
68 | | // seen for large functions. |
69 | | |
70 | | /// The `Value`s (Group B) that can flow to a formal parameter (Group A). |
71 | | #[derive(Clone, Copy, Debug, PartialEq)] |
72 | | enum AbstractValue { |
73 | | /// Two or more values flow to this formal. |
74 | | Many, |
75 | | |
76 | | /// Exactly one value, as stated, flows to this formal. The `Value`s that |
77 | | /// can appear here are exactly: `Value`s defined by `Inst`s, plus the |
78 | | /// `Value`s defined by the formals of the entry block. Note that this is |
79 | | /// exactly the set of `Value`s that are *not* tracked in the solver below |
80 | | /// (see `SolverState`). |
81 | | One(Value /*Group B*/), |
82 | | |
83 | | /// No value flows to this formal. |
84 | | None, |
85 | | } |
86 | | |
87 | | impl AbstractValue { |
88 | 8.16k | fn join(self, other: AbstractValue) -> AbstractValue { |
89 | 8.16k | match (self, other) { |
90 | | // Joining with `None` has no effect |
91 | 3.62k | (AbstractValue::None, p2) => p2, |
92 | 0 | (p1, AbstractValue::None) => p1, |
93 | | // Joining with `Many` produces `Many` |
94 | 240 | (AbstractValue::Many, _p2) => AbstractValue::Many, |
95 | 2 | (_p1, AbstractValue::Many) => AbstractValue::Many, |
96 | | // The only interesting case |
97 | 4.30k | (AbstractValue::One(v1), AbstractValue::One(v2)) => { |
98 | 4.30k | if v1 == v2 { |
99 | 4.24k | AbstractValue::One(v1) |
100 | | } else { |
101 | 54 | AbstractValue::Many |
102 | | } |
103 | | } |
104 | | } |
105 | 8.16k | } |
106 | | |
107 | 11.1k | fn is_one(self) -> bool { |
108 | 11.1k | matches!(self, AbstractValue::One(_)) |
109 | 11.1k | } |
110 | | } |
111 | | |
112 | | #[derive(Clone, Copy, Debug)] |
113 | | struct OutEdge<'a> { |
114 | | /// An instruction that transfers control. |
115 | | inst: Inst, |
116 | | /// The block that control is transferred to. |
117 | | block: Block, |
118 | | /// The arguments to that block. |
119 | | /// |
120 | | /// These values can be from both groups A and B. |
121 | | args: &'a [Value], |
122 | | } |
123 | | |
124 | | impl<'a> OutEdge<'a> { |
125 | | /// Construct a new `OutEdge` for the given instruction. |
126 | | /// |
127 | | /// Returns `None` if this is an edge without any block arguments, which |
128 | | /// means we can ignore it for this analysis's purposes. |
129 | | #[inline] |
130 | 192k | fn new(bump: &'a Bump, dfg: &ir::DataFlowGraph, inst: Inst, block: Block) -> Option<Self> { |
131 | 192k | let inst_var_args = dfg.inst_variable_args(inst); |
132 | 192k | |
133 | 192k | // Skip edges without params. |
134 | 192k | if inst_var_args.is_empty() { |
135 | 187k | return None; |
136 | 4.07k | } |
137 | 4.07k | |
138 | 4.07k | Some(OutEdge { |
139 | 4.07k | inst, |
140 | 4.07k | block, |
141 | 4.07k | args: bump.alloc_slice_fill_iter( |
142 | 4.07k | inst_var_args |
143 | 4.07k | .iter() |
144 | 4.07k | .map(|value| dfg.resolve_aliases(*value)), |
145 | 4.07k | ), |
146 | 4.07k | }) |
147 | 192k | } |
148 | | } |
149 | | |
150 | | /// For some block, a useful bundle of info. The `Block` itself is not stored |
151 | | /// here since it will be the key in the associated `FxHashMap` -- see |
152 | | /// `summaries` below. For the `SmallVec` tuning params: most blocks have |
153 | | /// few parameters, hence `4`. And almost all blocks have either one or two |
154 | | /// successors, hence `2`. |
155 | | #[derive(Clone, Debug, Default)] |
156 | | struct BlockSummary<'a> { |
157 | | /// Formal parameters for this `Block`. |
158 | | /// |
159 | | /// These values are from group A. |
160 | | formals: &'a [Value], |
161 | | |
162 | | /// Each outgoing edge from this block. |
163 | | /// |
164 | | /// We don't bother to include transfers that pass zero parameters |
165 | | /// since that makes more work for the solver for no purpose. |
166 | | /// |
167 | | /// Note that, because blocks used with `br_table`s cannot have block |
168 | | /// arguments, there are at most two outgoing edges from these blocks. |
169 | | dests: ArrayVec<OutEdge<'a>, 2>, |
170 | | } |
171 | | |
172 | | impl<'a> BlockSummary<'a> { |
173 | | /// Construct a new `BlockSummary`, using `values` as its backing storage. |
174 | | #[inline] |
175 | 328k | fn new(bump: &'a Bump, formals: &[Value]) -> Self { |
176 | 328k | Self { |
177 | 328k | formals: bump.alloc_slice_copy(formals), |
178 | 328k | dests: Default::default(), |
179 | 328k | } |
180 | 328k | } |
181 | | } |
182 | | |
183 | | /// Solver state. This holds a AbstractValue for each formal parameter, except |
184 | | /// for those from the entry block. |
185 | | struct SolverState { |
186 | | absvals: FxHashMap<Value /*Group A*/, AbstractValue>, |
187 | | } |
188 | | |
189 | | impl SolverState { |
190 | 139k | fn new() -> Self { |
191 | 139k | Self { |
192 | 139k | absvals: FxHashMap::default(), |
193 | 139k | } |
194 | 139k | } |
195 | | |
196 | 19.2k | fn get(&self, actual: Value) -> AbstractValue { |
197 | 19.2k | *self |
198 | 19.2k | .absvals |
199 | 19.2k | .get(&actual) |
200 | 19.2k | .unwrap_or_else(|| panic!("SolverState::get: formal param {:?} is untracked?!", actual)) |
201 | 19.2k | } |
202 | | |
203 | 8.16k | fn maybe_get(&self, actual: Value) -> Option<&AbstractValue> { |
204 | 8.16k | self.absvals.get(&actual) |
205 | 8.16k | } |
206 | | |
207 | 3.68k | fn set(&mut self, actual: Value, lp: AbstractValue) { |
208 | 3.68k | match self.absvals.insert(actual, lp) { |
209 | 3.68k | Some(_old_lp) => {} |
210 | 0 | None => panic!("SolverState::set: formal param {:?} is untracked?!", actual), |
211 | | } |
212 | 3.68k | } |
213 | | } |
214 | | |
215 | | /// Detect phis in `func` that will only ever produce one value, using a |
216 | | /// classic forward dataflow analysis. Then remove them. |
217 | | #[inline(never)] |
218 | 139k | pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree) { |
219 | 139k | let _tt = timing::remove_constant_phis(); |
220 | 139k | debug_assert!(domtree.is_valid()); |
221 | | |
222 | | // Phase 1 of 3: for each block, make a summary containing all relevant |
223 | | // info. The solver will iterate over the summaries, rather than having |
224 | | // to inspect each instruction in each block. |
225 | 139k | let bump = |
226 | 139k | Bump::with_capacity(domtree.cfg_postorder().len() * 4 * std::mem::size_of::<Value>()); |
227 | 139k | let mut summaries = |
228 | 139k | SecondaryMap::<Block, BlockSummary>::with_capacity(domtree.cfg_postorder().len()); |
229 | | |
230 | 328k | for b in domtree.cfg_postorder().iter().rev().copied() { |
231 | 328k | let formals = func.dfg.block_params(b); |
232 | 328k | let mut summary = BlockSummary::new(&bump, formals); |
233 | | |
234 | 1.03M | for inst in func.layout.block_insts(b) { |
235 | 1.03M | let idetails = &func.dfg[inst]; |
236 | | // Note that multi-dest transfers (i.e., branch tables) don't |
237 | | // carry parameters in our IR, so we only have to care about |
238 | | // `SingleDest` here. |
239 | 1.03M | if let BranchInfo::SingleDest(dest, _) = idetails.analyze_branch(&func.dfg.value_lists) |
240 | | { |
241 | 192k | if let Some(edge) = OutEdge::new(&bump, &func.dfg, inst, dest) { |
242 | 4.07k | summary.dests.push(edge); |
243 | 187k | } |
244 | 846k | } |
245 | | } |
246 | | |
247 | | // Ensure the invariant that all blocks (except for the entry) appear |
248 | | // in the summary, *unless* they have neither formals nor any |
249 | | // param-carrying branches/jumps. |
250 | 328k | if formals.len() > 0 || summary.dests.len() > 0 { |
251 | 143k | summaries[b] = summary; |
252 | 184k | } |
253 | | } |
254 | | |
255 | | // Phase 2 of 3: iterate over the summaries in reverse postorder, |
256 | | // computing new `AbstractValue`s for each tracked `Value`. The set of |
257 | | // tracked `Value`s is exactly Group A as described above. |
258 | | |
259 | 139k | let entry_block = func |
260 | 139k | .layout |
261 | 139k | .entry_block() |
262 | 139k | .expect("remove_constant_phis: entry block unknown"); |
263 | 139k | |
264 | 139k | // Set up initial solver state |
265 | 139k | let mut state = SolverState::new(); |
266 | | |
267 | 328k | for b in domtree.cfg_postorder().iter().rev().copied() { |
268 | | // For each block, get the formals |
269 | 328k | if b == entry_block { |
270 | 139k | continue; |
271 | 188k | } |
272 | 188k | let formals = func.dfg.block_params(b); |
273 | 192k | for formal in formals { |
274 | 3.62k | let mb_old_absval = state.absvals.insert(*formal, AbstractValue::None); |
275 | 3.62k | assert!(mb_old_absval.is_none()); |
276 | | } |
277 | | } |
278 | | |
279 | | // Solve: repeatedly traverse the blocks in reverse postorder, until there |
280 | | // are no changes. |
281 | 139k | let mut iter_no = 0; |
282 | 140k | loop { |
283 | 140k | iter_no += 1; |
284 | 140k | let mut changed = false; |
285 | | |
286 | 342k | for src in domtree.cfg_postorder().iter().rev().copied() { |
287 | 342k | let src_summary = &summaries[src]; |
288 | 350k | for edge in &src_summary.dests { |
289 | 8.16k | assert!(edge.block != entry_block); |
290 | | // By contrast, the dst block must have a summary. Phase 1 |
291 | | // will have only included an entry in `src_summary.dests` if |
292 | | // that branch/jump carried at least one parameter. So the |
293 | | // dst block does take parameters, so it must have a summary. |
294 | 8.16k | let dst_summary = &summaries[edge.block]; |
295 | 8.16k | let dst_formals = &dst_summary.formals; |
296 | 8.16k | assert_eq!(edge.args.len(), dst_formals.len()); |
297 | 8.16k | for (formal, actual) in dst_formals.iter().zip(edge.args) { |
298 | | // Find the abstract value for `actual`. If it is a block |
299 | | // formal parameter then the most recent abstract value is |
300 | | // to be found in the solver state. If not, then it's a |
301 | | // real value defining point (not a phi), in which case |
302 | | // return it itself. |
303 | 8.16k | let actual_absval = match state.maybe_get(*actual) { |
304 | 5.35k | Some(pt) => *pt, |
305 | 2.81k | None => AbstractValue::One(*actual), |
306 | | }; |
307 | | |
308 | | // And `join` the new value with the old. |
309 | 8.16k | let formal_absval_old = state.get(*formal); |
310 | 8.16k | let formal_absval_new = formal_absval_old.join(actual_absval); |
311 | 8.16k | if formal_absval_new != formal_absval_old { |
312 | 3.68k | changed = true; |
313 | 3.68k | state.set(*formal, formal_absval_new); |
314 | 4.48k | } |
315 | | } |
316 | | } |
317 | | } |
318 | | |
319 | 140k | if !changed { |
320 | 139k | break; |
321 | 1.01k | } |
322 | | } |
323 | | |
324 | 139k | let mut n_consts = 0; |
325 | 139k | for absval in state.absvals.values() { |
326 | 3.62k | if absval.is_one() { |
327 | 3.56k | n_consts += 1; |
328 | 3.56k | } |
329 | | } |
330 | | |
331 | | // Phase 3 of 3: edit the function to remove constant formals, using the |
332 | | // summaries and the final solver state as a guide. |
333 | | |
334 | | // Make up a set of blocks that need editing. |
335 | 139k | let mut need_editing = FxHashSet::<Block>::default(); |
336 | 153k | for (block, summary) in summaries.iter() { |
337 | 153k | if block == entry_block { |
338 | 139k | continue; |
339 | 14.4k | } |
340 | 14.5k | for formal in summary.formals { |
341 | 3.62k | let formal_absval = state.get(*formal); |
342 | 3.62k | if formal_absval.is_one() { |
343 | 3.56k | need_editing.insert(block); |
344 | 3.56k | break; |
345 | 62 | } |
346 | | } |
347 | | } |
348 | | |
349 | | // Firstly, deal with the formals. For each formal which is redundant, |
350 | | // remove it, and also add a reroute from it to the constant value which |
351 | | // it we know it to be. |
352 | 143k | for b in &need_editing { |
353 | 3.56k | let mut del_these = SmallVec::<[(Value, Value); 32]>::new(); |
354 | 3.56k | let formals: &[Value] = func.dfg.block_params(*b); |
355 | 7.12k | for formal in formals { |
356 | | // The state must give an absval for `formal`. |
357 | 3.56k | if let AbstractValue::One(replacement_val) = state.get(*formal) { |
358 | 3.56k | del_these.push((*formal, replacement_val)); |
359 | 3.56k | } |
360 | | } |
361 | | // We can delete the formals in any order. However, |
362 | | // `remove_block_param` works by sliding backwards all arguments to |
363 | | // the right of the value it is asked to delete. Hence when removing more |
364 | | // than one formal, it is significantly more efficient to ask it to |
365 | | // remove the rightmost formal first, and hence this `rev()`. |
366 | 3.56k | for (redundant_formal, replacement_val) in del_these.into_iter().rev() { |
367 | 3.56k | func.dfg.remove_block_param(redundant_formal); |
368 | 3.56k | func.dfg.change_to_alias(redundant_formal, replacement_val); |
369 | 3.56k | } |
370 | | } |
371 | | |
372 | | // Secondly, visit all branch insns. If the destination has had its |
373 | | // formals changed, change the actuals accordingly. Don't scan all insns, |
374 | | // rather just visit those as listed in the summaries we prepared earlier. |
375 | 153k | for summary in summaries.values() { |
376 | 157k | for edge in &summary.dests { |
377 | 4.07k | if !need_editing.contains(&edge.block) { |
378 | 176 | continue; |
379 | 3.89k | } |
380 | 3.89k | |
381 | 3.89k | let old_actuals = func.dfg[edge.inst].take_value_list().unwrap(); |
382 | 3.89k | let num_old_actuals = old_actuals.len(&func.dfg.value_lists); |
383 | 3.89k | let num_fixed_actuals = func.dfg[edge.inst] |
384 | 3.89k | .opcode() |
385 | 3.89k | .constraints() |
386 | 3.89k | .num_fixed_value_arguments(); |
387 | 3.89k | let dst_summary = &summaries[edge.block]; |
388 | 3.89k | |
389 | 3.89k | // Check that the numbers of arguments make sense. |
390 | 3.89k | assert!(num_fixed_actuals <= num_old_actuals); |
391 | 3.89k | assert_eq!( |
392 | 3.89k | num_fixed_actuals + dst_summary.formals.len(), |
393 | 3.89k | num_old_actuals |
394 | 3.89k | ); |
395 | | |
396 | | // Create a new value list. |
397 | 3.89k | let mut new_actuals = EntityList::<Value>::new(); |
398 | | // Copy the fixed args to the new list |
399 | 3.89k | for i in 0..num_fixed_actuals { |
400 | 50 | let val = old_actuals.get(i, &func.dfg.value_lists).unwrap(); |
401 | 50 | new_actuals.push(val, &mut func.dfg.value_lists); |
402 | 50 | } |
403 | | |
404 | | // Copy the variable args (the actual block params) to the new |
405 | | // list, filtering out redundant ones. |
406 | 3.89k | for (i, formal_i) in dst_summary.formals.iter().enumerate() { |
407 | 3.89k | let actual_i = old_actuals |
408 | 3.89k | .get(num_fixed_actuals + i, &func.dfg.value_lists) |
409 | 3.89k | .unwrap(); |
410 | 3.89k | let is_redundant = state.get(*formal_i).is_one(); |
411 | 3.89k | if !is_redundant { |
412 | 0 | new_actuals.push(actual_i, &mut func.dfg.value_lists); |
413 | 3.89k | } |
414 | | } |
415 | 3.89k | func.dfg[edge.inst].put_value_list(new_actuals); |
416 | | } |
417 | | } |
418 | | |
419 | 139k | log::debug!( |
420 | 0 | "do_remove_constant_phis: done, {} iters. {} formals, of which {} const.", |
421 | 0 | iter_no, |
422 | 0 | state.absvals.len(), |
423 | | n_consts |
424 | | ); |
425 | 139k | } |