/src/regalloc2/src/ion/process.rs
Line | Count | Source |
1 | | /* |
2 | | * This file was initially derived from the files |
3 | | * `js/src/jit/BacktrackingAllocator.h` and |
4 | | * `js/src/jit/BacktrackingAllocator.cpp` in Mozilla Firefox, and was |
5 | | * originally licensed under the Mozilla Public License 2.0. We |
6 | | * subsequently relicensed it to Apache-2.0 WITH LLVM-exception (see |
7 | | * https://github.com/bytecodealliance/regalloc2/issues/7). |
8 | | * |
9 | | * Since the initial port, the design has been substantially evolved |
10 | | * and optimized. |
11 | | */ |
12 | | |
13 | | //! Main allocation loop that processes bundles. |
14 | | |
15 | | use super::{ |
16 | | spill_weight_from_constraint, Env, LiveBundleIndex, LiveBundleVec, LiveRangeFlag, |
17 | | LiveRangeIndex, LiveRangeKey, LiveRangeList, LiveRangeListEntry, PRegIndex, RegTraversalIter, |
18 | | Requirement, SpillWeight, UseList, VRegIndex, |
19 | | }; |
20 | | use crate::{ |
21 | | ion::data_structures::{ |
22 | | CodeRange, Use, BUNDLE_MAX_NORMAL_SPILL_WEIGHT, MAX_SPLITS_PER_SPILLSET, |
23 | | MINIMAL_BUNDLE_SPILL_WEIGHT, MINIMAL_FIXED_BUNDLE_SPILL_WEIGHT, |
24 | | }, |
25 | | Allocation, Function, Inst, InstPosition, OperandConstraint, OperandKind, PReg, ProgPoint, |
26 | | RegAllocError, |
27 | | }; |
28 | | use core::fmt::Debug; |
29 | | use smallvec::{smallvec, SmallVec}; |
30 | | |
31 | | #[derive(Clone, Debug, PartialEq, Eq)] |
32 | | pub enum AllocRegResult<'a> { |
33 | | Allocated(Allocation), |
34 | | Conflict(&'a [LiveBundleIndex], ProgPoint), |
35 | | ConflictWithFixed(u32, ProgPoint), |
36 | | ConflictHighCost, |
37 | | } |
38 | | |
39 | | impl<'a, F: Function> Env<'a, F> { |
40 | 10.8k | pub fn process_bundles(&mut self) -> Result<(), RegAllocError> { |
41 | 8.93M | while let Some((bundle, hint)) = self.ctx.allocation_queue.pop() { |
42 | 8.92M | self.ctx.output.stats.process_bundle_count += 1; |
43 | 8.92M | self.process_bundle(bundle, hint)?; |
44 | | } |
45 | 10.8k | self.ctx.output.stats.final_liverange_count = self.ranges.len(); |
46 | 10.8k | self.ctx.output.stats.final_bundle_count = self.bundles.len(); |
47 | 10.8k | self.ctx.output.stats.spill_bundle_count = self.spilled_bundles.len(); |
48 | | |
49 | 10.8k | Ok(()) |
50 | 10.8k | } |
51 | | |
52 | 40.6M | pub fn try_to_allocate_bundle_to_reg<'b>( |
53 | 40.6M | &mut self, |
54 | 40.6M | bundle: LiveBundleIndex, |
55 | 40.6M | reg: PRegIndex, |
56 | 40.6M | // if the max bundle weight in the conflict set exceeds this |
57 | 40.6M | // cost (if provided), just return |
58 | 40.6M | // `AllocRegResult::ConflictHighCost`. |
59 | 40.6M | max_allowable_cost: Option<u32>, |
60 | 40.6M | conflicts: &'b mut LiveBundleVec, |
61 | 40.6M | ) -> AllocRegResult<'b> { |
62 | 40.6M | trace!("try_to_allocate_bundle_to_reg: {:?} -> {:?}", bundle, reg); |
63 | 40.6M | conflicts.clear(); |
64 | 40.6M | self.ctx.conflict_set.clear(); |
65 | 40.6M | let mut max_conflict_weight = 0; |
66 | | // Traverse the BTreeMap in order by requesting the whole |
67 | | // range spanned by the bundle and iterating over that |
68 | | // concurrently with our ranges. Because our ranges are in |
69 | | // order, and the BTreeMap is as well, this allows us to have |
70 | | // an overall O(n log n) + O(b) complexity, where the PReg has |
71 | | // n current ranges and the bundle has b ranges, rather than |
72 | | // O(b * n log n) with the simple probe-for-each-bundle-range |
73 | | // approach. |
74 | | // |
75 | | // Note that the comparator function on a CodeRange tests for |
76 | | // *overlap*, so we are checking whether the BTree contains |
77 | | // any preg range that *overlaps* with range `range`, not |
78 | | // literally the range `range`. |
79 | 40.6M | let bundle_ranges = &self.ctx.bundles[bundle].ranges; |
80 | 40.6M | let from_key = LiveRangeKey::from_range(&CodeRange { |
81 | 40.6M | from: bundle_ranges.first().unwrap().range.from, |
82 | 40.6M | to: bundle_ranges.first().unwrap().range.from, |
83 | 40.6M | }); |
84 | 40.6M | let mut preg_range_iter = self.ctx.pregs[reg.index()] |
85 | 40.6M | .allocations |
86 | 40.6M | .btree |
87 | 40.6M | .range(from_key..) |
88 | 40.6M | .peekable(); |
89 | 40.6M | trace!( |
90 | 0 | "alloc map for {:?} in range {:?}..: {:?}", |
91 | | reg, |
92 | | from_key, |
93 | 0 | self.ctx.pregs[reg.index()].allocations.btree |
94 | | ); |
95 | 40.6M | let mut first_conflict: Option<ProgPoint> = None; |
96 | | |
97 | 134M | 'ranges: for entry in bundle_ranges { |
98 | 110M | trace!(" -> range LR {:?}: {:?}", entry.index, entry.range); |
99 | 110M | let key = LiveRangeKey::from_range(&entry.range); |
100 | | |
101 | 110M | let mut skips = 0; |
102 | | 'alloc: loop { |
103 | 192M | trace!(" -> PReg range {:?}", preg_range_iter.peek()); |
104 | | |
105 | | // Advance our BTree traversal until it is >= this bundle |
106 | | // range (i.e., skip PReg allocations in the BTree that |
107 | | // are completely before this bundle range). |
108 | | |
109 | 192M | if preg_range_iter.peek().is_some() && *preg_range_iter.peek().unwrap().0 < key { |
110 | 7.03M | trace!( |
111 | 0 | "Skipping PReg range {:?}", |
112 | 0 | preg_range_iter.peek().unwrap().0 |
113 | | ); |
114 | 7.03M | preg_range_iter.next(); |
115 | 7.03M | skips += 1; |
116 | 7.03M | if skips >= 16 { |
117 | 8.01k | let from_pos = entry.range.from; |
118 | 8.01k | let from_key = LiveRangeKey::from_range(&CodeRange { |
119 | 8.01k | from: from_pos, |
120 | 8.01k | to: from_pos, |
121 | 8.01k | }); |
122 | 8.01k | preg_range_iter = self.ctx.pregs[reg.index()] |
123 | 8.01k | .allocations |
124 | 8.01k | .btree |
125 | 8.01k | .range(from_key..) |
126 | 8.01k | .peekable(); |
127 | 8.01k | skips = 0; |
128 | 7.02M | } |
129 | 7.03M | continue 'alloc; |
130 | 185M | } |
131 | 185M | skips = 0; |
132 | | |
133 | | // If there are no more PReg allocations, we're done! |
134 | 185M | if preg_range_iter.peek().is_none() { |
135 | 4.67M | trace!(" -> no more PReg allocations; so no conflict possible!"); |
136 | 4.67M | break 'ranges; |
137 | 180M | } |
138 | | |
139 | | // If the current PReg range is beyond this range, there is no conflict; continue. |
140 | 180M | if *preg_range_iter.peek().unwrap().0 > key { |
141 | 93.9M | trace!( |
142 | 0 | " -> next PReg allocation is at {:?}; moving to next VReg range", |
143 | 0 | preg_range_iter.peek().unwrap().0 |
144 | | ); |
145 | 93.9M | break 'alloc; |
146 | 86.4M | } |
147 | | |
148 | | // Otherwise, there is a conflict. |
149 | 86.4M | let preg_key = *preg_range_iter.peek().unwrap().0; |
150 | 86.4M | debug_assert_eq!(preg_key, key); // Assert that this range overlaps. |
151 | 86.4M | let preg_range = preg_range_iter.next().unwrap().1; |
152 | | |
153 | 86.4M | trace!(" -> btree contains range {:?} that overlaps", preg_range); |
154 | 86.4M | if preg_range.is_valid() { |
155 | 75.5M | trace!(" -> from vreg {:?}", self.ctx.ranges[*preg_range].vreg); |
156 | | // range from an allocated bundle: find the bundle and add to |
157 | | // conflicts list. |
158 | 75.5M | let conflict_bundle = self.ctx.ranges[*preg_range].bundle; |
159 | 75.5M | trace!(" -> conflict bundle {:?}", conflict_bundle); |
160 | 75.5M | if self.ctx.conflict_set.insert(conflict_bundle) { |
161 | 37.0M | conflicts.push(conflict_bundle); |
162 | 37.0M | max_conflict_weight = core::cmp::max( |
163 | 37.0M | max_conflict_weight, |
164 | 37.0M | self.ctx.bundles[conflict_bundle].cached_spill_weight(), |
165 | 37.0M | ); |
166 | 37.0M | if max_allowable_cost.is_some() |
167 | 6.03M | && max_conflict_weight > max_allowable_cost.unwrap() |
168 | | { |
169 | 689k | trace!(" -> reached high cost, retrying early"); |
170 | 689k | return AllocRegResult::ConflictHighCost; |
171 | 36.4M | } |
172 | 38.4M | } |
173 | | |
174 | 74.9M | if first_conflict.is_none() { |
175 | 23.8M | first_conflict = Some(ProgPoint::from_index(core::cmp::max( |
176 | 23.8M | preg_key.from, |
177 | 23.8M | key.from, |
178 | 23.8M | ))); |
179 | 51.0M | } |
180 | | } else { |
181 | 10.8M | trace!(" -> conflict with fixed reservation"); |
182 | | // range from a direct use of the PReg (due to clobber). |
183 | 10.8M | return AllocRegResult::ConflictWithFixed( |
184 | 10.8M | max_conflict_weight, |
185 | 10.8M | ProgPoint::from_index(preg_key.from), |
186 | 10.8M | ); |
187 | | } |
188 | | } |
189 | | } |
190 | | |
191 | 29.0M | if conflicts.len() > 0 { |
192 | 20.8M | return AllocRegResult::Conflict(conflicts, first_conflict.unwrap()); |
193 | 8.22M | } |
194 | | |
195 | | // We can allocate! Add our ranges to the preg's BTree. |
196 | 8.22M | let preg = PReg::from_index(reg.index()); |
197 | 8.22M | trace!(" -> bundle {:?} assigned to preg {:?}", bundle, preg); |
198 | 8.22M | self.ctx.bundles[bundle].allocation = Allocation::reg(preg); |
199 | 10.8M | for entry in &self.ctx.bundles[bundle].ranges { |
200 | 10.8M | let key = LiveRangeKey::from_range(&entry.range); |
201 | 10.8M | let res = self.ctx.pregs[reg.index()] |
202 | 10.8M | .allocations |
203 | 10.8M | .btree |
204 | 10.8M | .insert(key, entry.index); |
205 | | |
206 | | // We disallow LR overlap within bundles, so this should never be possible. |
207 | 10.8M | debug_assert!(res.is_none()); |
208 | | } |
209 | | |
210 | 8.22M | AllocRegResult::Allocated(Allocation::reg(preg)) |
211 | 40.6M | } |
212 | | |
213 | 181k | pub fn evict_bundle(&mut self, bundle: LiveBundleIndex) { |
214 | 181k | trace!( |
215 | 0 | "evicting bundle {:?}: alloc {:?}", |
216 | | bundle, |
217 | 0 | self.ctx.bundles[bundle].allocation |
218 | | ); |
219 | 181k | let preg = match self.ctx.bundles[bundle].allocation.as_reg() { |
220 | 181k | Some(preg) => preg, |
221 | | None => { |
222 | 0 | trace!( |
223 | 0 | " -> has no allocation! {:?}", |
224 | 0 | self.ctx.bundles[bundle].allocation |
225 | | ); |
226 | 0 | return; |
227 | | } |
228 | | }; |
229 | 181k | let preg_idx = PRegIndex::new(preg.index()); |
230 | 181k | self.ctx.bundles[bundle].allocation = Allocation::none(); |
231 | 1.19M | for entry in &self.ctx.bundles[bundle].ranges { |
232 | 1.19M | trace!(" -> removing LR {:?} from reg {:?}", entry.index, preg_idx); |
233 | 1.19M | self.ctx.pregs[preg_idx.index()] |
234 | 1.19M | .allocations |
235 | 1.19M | .btree |
236 | 1.19M | .remove(&LiveRangeKey::from_range(&entry.range)); |
237 | | } |
238 | 181k | let prio = self.ctx.bundles[bundle].prio; |
239 | 181k | trace!(" -> prio {}; back into queue", prio); |
240 | 181k | self.ctx |
241 | 181k | .allocation_queue |
242 | 181k | .insert(bundle, prio as usize, PReg::invalid()); |
243 | 181k | } |
244 | | |
245 | 539k | pub fn bundle_spill_weight(&self, bundle: LiveBundleIndex) -> u32 { |
246 | 539k | self.ctx.bundles[bundle].cached_spill_weight() |
247 | 539k | } |
248 | | |
249 | 5.69M | pub fn maximum_spill_weight_in_bundle_set(&self, bundles: &[LiveBundleIndex]) -> u32 { |
250 | 5.69M | trace!("maximum_spill_weight_in_bundle_set: {:?}", bundles); |
251 | 5.69M | let m = bundles |
252 | 5.69M | .iter() |
253 | 6.05M | .map(|&b| { |
254 | 6.05M | let w = self.ctx.bundles[b].cached_spill_weight(); |
255 | 6.05M | trace!("bundle{}: {}", b.index(), w); |
256 | 6.05M | w |
257 | 6.05M | }) |
258 | 5.69M | .max() |
259 | 5.69M | .unwrap_or(0); |
260 | 5.69M | trace!(" -> max: {}", m); |
261 | 5.69M | m |
262 | 5.69M | } |
263 | | |
264 | 8.74M | pub fn recompute_bundle_properties(&mut self, bundle: LiveBundleIndex) { |
265 | 8.74M | trace!("recompute bundle properties: bundle {:?}", bundle); |
266 | | |
267 | | let minimal; |
268 | 8.74M | let mut fixed = false; |
269 | 8.74M | let mut fixed_def = false; |
270 | 8.74M | let mut stack = false; |
271 | 8.74M | let bundledata = &self.ctx.bundles[bundle]; |
272 | 8.74M | let num_ranges = bundledata.ranges.len(); |
273 | 8.74M | let first_range = bundledata.ranges[0].index; |
274 | 8.74M | let first_range_data = &self.ctx.ranges[first_range]; |
275 | | |
276 | 8.74M | self.ctx.bundles[bundle].prio = self.compute_bundle_prio(bundle); |
277 | 8.74M | self.ctx.bundles[bundle].limit = self.compute_bundle_limit(bundle); |
278 | | |
279 | 8.74M | if first_range_data.vreg.is_invalid() { |
280 | 0 | trace!(" -> no vreg; minimal and fixed"); |
281 | 0 | minimal = true; |
282 | 0 | fixed = true; |
283 | 8.74M | } else if num_ranges == 1 { |
284 | 21.5M | for u in &first_range_data.uses { |
285 | 13.6M | trace!(" -> use: {:?}", u); |
286 | 13.6M | if let OperandConstraint::FixedReg(_) = u.operand.constraint() { |
287 | 699k | trace!(" -> fixed operand at {:?}: {:?}", u.pos, u.operand); |
288 | 699k | fixed = true; |
289 | 699k | if u.operand.kind() == OperandKind::Def { |
290 | 113k | trace!(" -> is fixed def"); |
291 | 113k | fixed_def = true; |
292 | 586k | } |
293 | 12.9M | } |
294 | 13.6M | if let OperandConstraint::Stack = u.operand.constraint() { |
295 | 0 | trace!(" -> stack operand at {:?}: {:?}", u.pos, u.operand); |
296 | 0 | stack = true; |
297 | 13.6M | } |
298 | 13.6M | if stack && fixed { |
299 | 0 | break; |
300 | 13.6M | } |
301 | | } |
302 | | // Minimal if there is only one LR and only up to one use |
303 | | // in that LR, and extent of range is consistent with the |
304 | | // "minimal range for use" definition. |
305 | 7.92M | minimal = match &first_range_data.uses[..] { |
306 | 7.92M | [] => true, |
307 | 6.88M | [only_use] => { |
308 | | // We use a "contains" rather than "exact" range |
309 | | // check because a smaller-than-min range is also |
310 | | // OK, if some logic produced it for a valid |
311 | | // reason -- for example, a dead def. We don't |
312 | | // want to livelock on "smaller than minimal" |
313 | | // ranges. |
314 | 6.88M | let min_range = minimal_range_for_use(only_use); |
315 | 6.88M | min_range.contains(&first_range_data.range) |
316 | | } |
317 | 998k | _ => false, |
318 | | }; |
319 | 7.92M | trace!(" -> minimal: {}", minimal); |
320 | 824k | } else { |
321 | 824k | minimal = false; |
322 | 824k | } |
323 | | |
324 | 8.74M | let spill_weight = if minimal { |
325 | 6.83M | if fixed { |
326 | 179k | trace!(" -> fixed and minimal"); |
327 | 179k | MINIMAL_FIXED_BUNDLE_SPILL_WEIGHT |
328 | | } else { |
329 | 6.65M | trace!(" -> non-fixed and minimal"); |
330 | 6.65M | MINIMAL_BUNDLE_SPILL_WEIGHT |
331 | | } |
332 | | } else { |
333 | 1.91M | let mut total = SpillWeight::zero(); |
334 | 6.39M | for entry in &self.ctx.bundles[bundle].ranges { |
335 | 6.39M | let range_data = &self.ctx.ranges[entry.index]; |
336 | 6.39M | trace!( |
337 | 0 | " -> uses spill weight: +{:?}", |
338 | 0 | range_data.uses_spill_weight() |
339 | | ); |
340 | 6.39M | total = total + range_data.uses_spill_weight(); |
341 | | } |
342 | | |
343 | 1.91M | if self.ctx.bundles[bundle].prio > 0 { |
344 | 1.88M | let final_weight = (total.to_f32() as u32) / self.ctx.bundles[bundle].prio; |
345 | 1.88M | trace!( |
346 | 0 | " -> dividing by prio {}; final weight {}", |
347 | 0 | self.ctx.bundles[bundle].prio, |
348 | | final_weight |
349 | | ); |
350 | 1.88M | core::cmp::min(BUNDLE_MAX_NORMAL_SPILL_WEIGHT, final_weight) |
351 | | } else { |
352 | 23.6k | 0 |
353 | | } |
354 | | }; |
355 | | |
356 | 8.74M | self.ctx.bundles[bundle].set_cached_spill_weight_and_props( |
357 | 8.74M | spill_weight, |
358 | 8.74M | minimal, |
359 | 8.74M | fixed, |
360 | 8.74M | fixed_def, |
361 | 8.74M | stack, |
362 | | ); |
363 | 8.74M | } |
364 | | |
365 | 1.07M | pub fn minimal_bundle(&self, bundle: LiveBundleIndex) -> bool { |
366 | 1.07M | self.ctx.bundles[bundle].cached_minimal() |
367 | 1.07M | } |
368 | | |
369 | 894k | pub fn recompute_range_properties(&mut self, range: LiveRangeIndex) { |
370 | 894k | let rangedata = &mut self.ctx.ranges[range]; |
371 | 894k | let mut w = SpillWeight::zero(); |
372 | 4.21M | for u in &rangedata.uses { |
373 | 3.31M | w = w + SpillWeight::from_bits(u.weight); |
374 | 3.31M | trace!("range{}: use {:?}", range.index(), u); |
375 | | } |
376 | 894k | rangedata.set_uses_spill_weight(w); |
377 | 894k | if rangedata.uses.len() > 0 && rangedata.uses[0].operand.kind() == OperandKind::Def { |
378 | 220k | // Note that we *set* the flag here, but we never *clear* |
379 | 220k | // it: it may be set by a progmove as well (which does not |
380 | 220k | // create an explicit use or def), and we want to preserve |
381 | 220k | // that. We will never split or trim ranges in a way that |
382 | 220k | // removes a def at the front and requires the flag to be |
383 | 220k | // cleared. |
384 | 220k | rangedata.set_flag(LiveRangeFlag::StartsAtDef); |
385 | 674k | } |
386 | 894k | } |
387 | | |
388 | 7.38M | pub fn get_or_create_spill_bundle( |
389 | 7.38M | &mut self, |
390 | 7.38M | bundle: LiveBundleIndex, |
391 | 7.38M | create_if_absent: bool, |
392 | 7.38M | ) -> Option<LiveBundleIndex> { |
393 | 7.38M | let ssidx = self.ctx.bundles[bundle].spillset; |
394 | 7.38M | let idx = self.ctx.spillsets[ssidx].spill_bundle; |
395 | 7.38M | if idx.is_valid() { |
396 | 1.29M | Some(idx) |
397 | 6.09M | } else if create_if_absent { |
398 | 212k | let idx = self.ctx.bundles.add(self.ctx.bump()); |
399 | 212k | self.ctx.spillsets[ssidx].spill_bundle = idx; |
400 | 212k | self.ctx.bundles[idx].spillset = ssidx; |
401 | 212k | self.ctx.spilled_bundles.push(idx); |
402 | 212k | Some(idx) |
403 | | } else { |
404 | 5.88M | None |
405 | | } |
406 | 7.38M | } |
407 | | |
408 | 557k | pub fn split_and_requeue_bundle( |
409 | 557k | &mut self, |
410 | 557k | bundle: LiveBundleIndex, |
411 | 557k | mut split_at: ProgPoint, |
412 | 557k | hint: PReg, |
413 | 557k | // Do we trim the parts around the split and put them in the |
414 | 557k | // spill bundle? |
415 | 557k | mut trim_ends_into_spill_bundle: bool, |
416 | 557k | ) { |
417 | 557k | self.ctx.output.stats.splits += 1; |
418 | 557k | trace!( |
419 | 0 | "split bundle {bundle:?} at {split_at:?} and requeue with reg hint (for first part) {hint:?}" |
420 | | ); |
421 | | |
422 | | // Split `bundle` at `split_at`, creating new LiveRanges and |
423 | | // bundles (and updating vregs' linked lists appropriately), |
424 | | // and enqueue the new bundles. |
425 | | |
426 | 557k | let spillset = self.ctx.bundles[bundle].spillset; |
427 | | |
428 | | // Have we reached the maximum split count? If so, fall back |
429 | | // to a "minimal bundles and spill bundle" setup for this |
430 | | // bundle. See the doc-comment on |
431 | | // `split_into_minimal_bundles()` above for more. |
432 | 557k | if self.ctx.spillsets[spillset].splits >= MAX_SPLITS_PER_SPILLSET { |
433 | 101k | self.split_into_minimal_bundles(bundle, hint); |
434 | 101k | return; |
435 | 455k | } |
436 | 455k | self.ctx.spillsets[spillset].splits += 1; |
437 | | |
438 | 455k | debug_assert!(!self.ctx.bundles[bundle].ranges.is_empty()); |
439 | | // Split point *at* start is OK; this means we peel off |
440 | | // exactly one use to create a minimal bundle. |
441 | 455k | let bundle_start = self.ctx.bundles[bundle].ranges.first().unwrap().range.from; |
442 | 455k | debug_assert!(split_at >= bundle_start); |
443 | 455k | let bundle_end = self.ctx.bundles[bundle].ranges.last().unwrap().range.to; |
444 | 455k | debug_assert!(split_at < bundle_end); |
445 | | |
446 | 455k | trace!( |
447 | 0 | "bundle_start = {:?} bundle_end = {:?}", |
448 | | bundle_start, |
449 | | bundle_end |
450 | | ); |
451 | | |
452 | | // Is the bundle at most spanning one instruction? Then |
453 | | // there's nothing we can do with this logic (we will not |
454 | | // split in the middle of the instruction). Fall back to the |
455 | | // minimal-bundle splitting in this case as well. |
456 | 455k | if bundle_end.prev().inst() == bundle_start.inst() { |
457 | 1.60k | trace!(" -> spans only one inst; splitting into minimal bundles"); |
458 | 1.60k | self.split_into_minimal_bundles(bundle, hint); |
459 | 1.60k | return; |
460 | 454k | } |
461 | | |
462 | | // Is the split point *at* the start? If so, peel off the |
463 | | // program point with the first use: set the split point just |
464 | | // after it, or just before it if it comes after the start of |
465 | | // the bundle. |
466 | 454k | if split_at == bundle_start { |
467 | | // Find any uses; if none, just chop off one instruction. |
468 | 80.8k | let mut first_use = None; |
469 | 81.1k | 'outer: for entry in &self.ctx.bundles[bundle].ranges { |
470 | 81.1k | for u in &self.ctx.ranges[entry.index].uses { |
471 | 80.8k | first_use = Some(u.pos); |
472 | 80.8k | break 'outer; |
473 | | } |
474 | | } |
475 | 80.8k | trace!(" -> first use loc is {:?}", first_use); |
476 | 80.8k | split_at = match first_use { |
477 | 80.8k | Some(pos) => { |
478 | 80.8k | if pos.inst() == bundle_start.inst() { |
479 | 80.3k | ProgPoint::before(pos.inst().next()) |
480 | | } else { |
481 | 515 | ProgPoint::before(pos.inst()) |
482 | | } |
483 | | } |
484 | 0 | None => ProgPoint::before( |
485 | 0 | self.ctx.bundles[bundle] |
486 | 0 | .ranges |
487 | 0 | .first() |
488 | 0 | .unwrap() |
489 | 0 | .range |
490 | 0 | .from |
491 | 0 | .inst() |
492 | 0 | .next(), |
493 | | ), |
494 | | }; |
495 | 80.8k | trace!( |
496 | 0 | "split point is at bundle start; advancing to {:?}", |
497 | | split_at |
498 | | ); |
499 | | } else { |
500 | | // Don't split in the middle of an instruction -- this could |
501 | | // create impossible moves (we cannot insert a move between an |
502 | | // instruction's uses and defs). |
503 | 373k | if split_at.pos() == InstPosition::After { |
504 | 179k | split_at = split_at.next(); |
505 | 194k | } |
506 | 373k | if split_at >= bundle_end { |
507 | 8.52k | split_at = split_at.prev().prev(); |
508 | 364k | } |
509 | | } |
510 | | |
511 | 454k | debug_assert!(split_at > bundle_start && split_at < bundle_end); |
512 | | |
513 | | // We need to find which LRs fall on each side of the split, |
514 | | // which LR we need to split down the middle, then update the |
515 | | // current bundle, create a new one, and (re)-queue both. |
516 | | |
517 | 454k | trace!(" -> LRs: {:?}", self.ctx.bundles[bundle].ranges); |
518 | | |
519 | 454k | let mut last_lr_in_old_bundle_idx = 0; // last LR-list index in old bundle |
520 | 454k | let mut first_lr_in_new_bundle_idx = 0; // first LR-list index in new bundle |
521 | 1.10M | for (i, entry) in self.ctx.bundles[bundle].ranges.iter().enumerate() { |
522 | 1.10M | if split_at > entry.range.from { |
523 | 1.09M | last_lr_in_old_bundle_idx = i; |
524 | 1.09M | first_lr_in_new_bundle_idx = i; |
525 | 1.09M | } |
526 | 1.10M | if split_at < entry.range.to { |
527 | 454k | first_lr_in_new_bundle_idx = i; |
528 | | |
529 | | // When the bundle contains a fixed constraint, we advance the split point to right |
530 | | // before the first instruction with a fixed use present. |
531 | 454k | if self.ctx.bundles[bundle].cached_fixed() { |
532 | 1.16M | for u in &self.ctx.ranges[entry.index].uses { |
533 | 1.16M | if u.pos < split_at { |
534 | 699k | continue; |
535 | 466k | } |
536 | | |
537 | 466k | if matches!(u.operand.constraint(), OperandConstraint::FixedReg { .. }) { |
538 | 109k | split_at = ProgPoint::before(u.pos.inst()); |
539 | | |
540 | 109k | if split_at > entry.range.from { |
541 | 109k | last_lr_in_old_bundle_idx = i; |
542 | 109k | } |
543 | | |
544 | 109k | trace!(" -> advancing split point to {split_at:?}"); |
545 | | |
546 | 109k | trim_ends_into_spill_bundle = false; |
547 | | |
548 | 109k | break; |
549 | 356k | } |
550 | | } |
551 | 286k | } |
552 | | |
553 | 454k | break; |
554 | 647k | } |
555 | | } |
556 | | |
557 | 454k | trace!( |
558 | 0 | " -> last LR in old bundle: LR {:?}", |
559 | 0 | self.ctx.bundles[bundle].ranges[last_lr_in_old_bundle_idx] |
560 | | ); |
561 | 454k | trace!( |
562 | 0 | " -> first LR in new bundle: LR {:?}", |
563 | 0 | self.ctx.bundles[bundle].ranges[first_lr_in_new_bundle_idx] |
564 | | ); |
565 | | |
566 | | // Take the sublist of LRs that will go in the new bundle. |
567 | 454k | let mut new_lr_list: LiveRangeList = LiveRangeList::new_in(self.ctx.bump()); |
568 | 454k | new_lr_list.extend( |
569 | 454k | self.ctx.bundles[bundle] |
570 | 454k | .ranges |
571 | 454k | .iter() |
572 | 454k | .cloned() |
573 | 454k | .skip(first_lr_in_new_bundle_idx), |
574 | | ); |
575 | 454k | self.ctx.bundles[bundle] |
576 | 454k | .ranges |
577 | 454k | .truncate(last_lr_in_old_bundle_idx + 1); |
578 | 454k | self.ctx.bundles[bundle].ranges.shrink_to_fit(); |
579 | | |
580 | | // If the first entry in `new_lr_list` is a LR that is split |
581 | | // down the middle, replace it with a new LR and chop off the |
582 | | // end of the same LR in the original list. |
583 | 454k | if split_at > new_lr_list[0].range.from { |
584 | 447k | debug_assert_eq!(last_lr_in_old_bundle_idx, first_lr_in_new_bundle_idx); |
585 | 447k | let orig_lr = new_lr_list[0].index; |
586 | 447k | let new_lr = self.ctx.ranges.add( |
587 | 447k | CodeRange { |
588 | 447k | from: split_at, |
589 | 447k | to: new_lr_list[0].range.to, |
590 | 447k | }, |
591 | 447k | self.ctx.bump(), |
592 | | ); |
593 | 447k | self.ctx.ranges[new_lr].vreg = self.ranges[orig_lr].vreg; |
594 | 447k | trace!(" -> splitting LR {:?} into {:?}", orig_lr, new_lr); |
595 | 447k | let first_use = self.ctx.ranges[orig_lr] |
596 | 447k | .uses |
597 | 447k | .iter() |
598 | 1.85M | .position(|u| u.pos >= split_at) |
599 | 447k | .unwrap_or(self.ctx.ranges[orig_lr].uses.len()); |
600 | 447k | let mut rest_uses = UseList::new_in(self.ctx.bump()); |
601 | 447k | rest_uses.extend( |
602 | 447k | self.ctx.ranges[orig_lr] |
603 | 447k | .uses |
604 | 447k | .iter() |
605 | 447k | .cloned() |
606 | 447k | .skip(first_use), |
607 | | ); |
608 | 447k | self.ctx.ranges[new_lr].uses = rest_uses; |
609 | 447k | self.ctx.ranges[orig_lr].uses.truncate(first_use); |
610 | 447k | self.ctx.ranges[orig_lr].uses.shrink_to_fit(); |
611 | 447k | self.recompute_range_properties(orig_lr); |
612 | 447k | self.recompute_range_properties(new_lr); |
613 | 447k | new_lr_list[0].index = new_lr; |
614 | 447k | new_lr_list[0].range = self.ctx.ranges[new_lr].range; |
615 | 447k | self.ctx.ranges[orig_lr].range.to = split_at; |
616 | 447k | self.ctx.bundles[bundle].ranges[last_lr_in_old_bundle_idx].range = |
617 | 447k | self.ctx.ranges[orig_lr].range; |
618 | | |
619 | | // Perform a lazy split in the VReg data. We just |
620 | | // append the new LR and its range; we will sort by |
621 | | // start of range, and fix up range ends, once when we |
622 | | // iterate over the VReg's ranges after allocation |
623 | | // completes (this is the only time when order |
624 | | // matters). |
625 | 447k | self.ctx.vregs[self.ctx.ranges[new_lr].vreg] |
626 | 447k | .ranges |
627 | 447k | .push(LiveRangeListEntry { |
628 | 447k | range: self.ctx.ranges[new_lr].range, |
629 | 447k | index: new_lr, |
630 | 447k | }); |
631 | 6.85k | } |
632 | | |
633 | 454k | let new_bundle = self.ctx.bundles.add(self.ctx.bump()); |
634 | 454k | trace!(" -> creating new bundle {:?}", new_bundle); |
635 | 454k | self.ctx.bundles[new_bundle].spillset = spillset; |
636 | 2.65M | for entry in &new_lr_list { |
637 | 2.20M | self.ctx.ranges[entry.index].bundle = new_bundle; |
638 | 2.20M | } |
639 | 454k | self.ctx.bundles[new_bundle].ranges = new_lr_list; |
640 | | |
641 | 454k | if trim_ends_into_spill_bundle { |
642 | | // Finally, handle moving LRs to the spill bundle when |
643 | | // appropriate: If the first range in `new_bundle` or last |
644 | | // range in `bundle` has "empty space" beyond the first or |
645 | | // last use (respectively), trim it and put an empty LR into |
646 | | // the spill bundle. (We are careful to treat the "starts at |
647 | | // def" flag as an implicit first def even if no def-type Use |
648 | | // is present.) |
649 | 709k | while let Some(entry) = self.ctx.bundles[bundle].ranges.last().cloned() { |
650 | 708k | let end = entry.range.to; |
651 | 708k | let vreg = self.ctx.ranges[entry.index].vreg; |
652 | 708k | let last_use = self.ctx.ranges[entry.index].uses.last().map(|u| u.pos); |
653 | 708k | if last_use.is_none() { |
654 | 456k | let spill = self |
655 | 456k | .get_or_create_spill_bundle(bundle, /* create_if_absent = */ true) |
656 | 456k | .unwrap(); |
657 | 456k | trace!( |
658 | 0 | " -> bundle {:?} range {:?}: no uses; moving to spill bundle {:?}", |
659 | | bundle, |
660 | | entry.index, |
661 | | spill |
662 | | ); |
663 | 456k | self.ctx.bundles[spill].ranges.push(entry); |
664 | 456k | self.ctx.bundles[bundle].ranges.pop(); |
665 | 456k | self.ctx.ranges[entry.index].bundle = spill; |
666 | 456k | continue; |
667 | 251k | } |
668 | 251k | let last_use = last_use.unwrap(); |
669 | 251k | let split = ProgPoint::before(last_use.inst().next()); |
670 | 251k | if split < end { |
671 | 164k | let spill = self |
672 | 164k | .get_or_create_spill_bundle(bundle, /* create_if_absent = */ true) |
673 | 164k | .unwrap(); |
674 | 164k | self.ctx.bundles[bundle].ranges.last_mut().unwrap().range.to = split; |
675 | 164k | self.ctx.ranges[self.ctx.bundles[bundle].ranges.last().unwrap().index] |
676 | 164k | .range |
677 | 164k | .to = split; |
678 | 164k | let range = CodeRange { |
679 | 164k | from: split, |
680 | 164k | to: end, |
681 | 164k | }; |
682 | 164k | let empty_lr = self.ctx.ranges.add(range, self.ctx.bump()); |
683 | 164k | self.ctx.bundles[spill].ranges.push(LiveRangeListEntry { |
684 | 164k | range, |
685 | 164k | index: empty_lr, |
686 | 164k | }); |
687 | 164k | self.ctx.ranges[empty_lr].bundle = spill; |
688 | 164k | self.ctx.vregs[vreg].ranges.push(LiveRangeListEntry { |
689 | 164k | range, |
690 | 164k | index: empty_lr, |
691 | 164k | }); |
692 | 164k | trace!( |
693 | 0 | " -> bundle {:?} range {:?}: last use implies split point {:?}", |
694 | | bundle, |
695 | | entry.index, |
696 | | split |
697 | | ); |
698 | 164k | trace!( |
699 | 0 | " -> moving trailing empty region to new spill bundle {:?} with new LR {:?}", |
700 | | spill, |
701 | | empty_lr |
702 | | ); |
703 | 87.2k | } |
704 | 251k | break; |
705 | | } |
706 | 833k | while let Some(entry) = self.ctx.bundles[new_bundle].ranges.first().cloned() { |
707 | 760k | if self.ctx.ranges[entry.index].has_flag(LiveRangeFlag::StartsAtDef) { |
708 | 278 | break; |
709 | 760k | } |
710 | 760k | let start = entry.range.from; |
711 | 760k | let vreg = self.ctx.ranges[entry.index].vreg; |
712 | 760k | let first_use = self.ctx.ranges[entry.index].uses.first().map(|u| u.pos); |
713 | 760k | if first_use.is_none() { |
714 | 580k | let spill = self |
715 | 580k | .get_or_create_spill_bundle(new_bundle, /* create_if_absent = */ true) |
716 | 580k | .unwrap(); |
717 | 580k | trace!( |
718 | 0 | " -> bundle {:?} range {:?}: no uses; moving to spill bundle {:?}", |
719 | | new_bundle, |
720 | | entry.index, |
721 | | spill |
722 | | ); |
723 | 580k | self.ctx.bundles[spill].ranges.push(entry); |
724 | 580k | self.ctx.bundles[new_bundle].ranges.drain(..1); |
725 | 580k | self.ctx.ranges[entry.index].bundle = spill; |
726 | 580k | continue; |
727 | 179k | } |
728 | 179k | let first_use = first_use.unwrap(); |
729 | 179k | let split = ProgPoint::before(first_use.inst()); |
730 | 179k | if split > start { |
731 | 124k | let spill = self |
732 | 124k | .get_or_create_spill_bundle(new_bundle, /* create_if_absent = */ true) |
733 | 124k | .unwrap(); |
734 | 124k | self.ctx.bundles[new_bundle] |
735 | 124k | .ranges |
736 | 124k | .first_mut() |
737 | 124k | .unwrap() |
738 | 124k | .range |
739 | 124k | .from = split; |
740 | 124k | self.ctx.ranges[self.ctx.bundles[new_bundle].ranges.first().unwrap().index] |
741 | 124k | .range |
742 | 124k | .from = split; |
743 | 124k | let range = CodeRange { |
744 | 124k | from: start, |
745 | 124k | to: split, |
746 | 124k | }; |
747 | 124k | let empty_lr = self.ctx.ranges.add(range, self.ctx.bump()); |
748 | 124k | self.ctx.bundles[spill].ranges.push(LiveRangeListEntry { |
749 | 124k | range, |
750 | 124k | index: empty_lr, |
751 | 124k | }); |
752 | 124k | self.ctx.ranges[empty_lr].bundle = spill; |
753 | 124k | self.ctx.vregs[vreg].ranges.push(LiveRangeListEntry { |
754 | 124k | range, |
755 | 124k | index: empty_lr, |
756 | 124k | }); |
757 | 124k | trace!( |
758 | 0 | " -> bundle {:?} range {:?}: first use implies split point {:?}", |
759 | | bundle, |
760 | | entry.index, |
761 | | first_use, |
762 | | ); |
763 | 124k | trace!( |
764 | 0 | " -> moving leading empty region to new spill bundle {:?} with new LR {:?}", |
765 | | spill, |
766 | | empty_lr |
767 | | ); |
768 | 54.3k | } |
769 | 179k | break; |
770 | | } |
771 | 201k | } |
772 | | |
773 | 454k | if self.ctx.bundles[bundle].ranges.len() > 0 { |
774 | 453k | self.recompute_bundle_properties(bundle); |
775 | 453k | let prio = self.ctx.bundles[bundle].prio; |
776 | 453k | self.ctx |
777 | 453k | .allocation_queue |
778 | 453k | .insert(bundle, prio as usize, hint); |
779 | 453k | } |
780 | 454k | if self.ctx.bundles[new_bundle].ranges.len() > 0 { |
781 | 380k | self.recompute_bundle_properties(new_bundle); |
782 | 380k | let prio = self.ctx.bundles[new_bundle].prio; |
783 | 380k | self.ctx |
784 | 380k | .allocation_queue |
785 | 380k | .insert(new_bundle, prio as usize, hint); |
786 | 380k | } |
787 | 557k | } |
788 | | |
789 | | /// Splits the given bundle into minimal bundles per Use, falling |
790 | | /// back onto the spill bundle. This must work for any bundle no |
791 | | /// matter how many conflicts. |
792 | | /// |
793 | | /// This is meant to solve a quadratic-cost problem that exists |
794 | | /// with "normal" splitting as implemented above. With that |
795 | | /// procedure, splitting a bundle produces two |
796 | | /// halves. Furthermore, it has cost linear in the length of the |
797 | | /// bundle, because the resulting half-bundles have their |
798 | | /// requirements recomputed with a new scan, and because we copy |
799 | | /// half the use-list over to the tail end sub-bundle. |
800 | | /// |
801 | | /// This works fine when a bundle has a handful of splits overall, |
802 | | /// but not when an input has a systematic pattern of conflicts |
803 | | /// that will require O(|bundle|) splits (e.g., every Use is |
804 | | /// constrained to a different fixed register than the last |
805 | | /// one). In such a case, we get quadratic behavior. |
806 | | /// |
807 | | /// This method implements a direct split into minimal bundles |
808 | | /// along the whole length of the bundle, putting the regions |
809 | | /// without uses in the spill bundle. We do this once the number |
810 | | /// of splits in an original bundle (tracked by spillset) reaches |
811 | | /// a pre-determined limit. |
812 | | /// |
813 | | /// This basically approximates what a non-splitting allocator |
814 | | /// would do: it "spills" the whole bundle to possibly a |
815 | | /// stackslot, or a second-chance register allocation at best, via |
816 | | /// the spill bundle; and then does minimal reservations of |
817 | | /// registers just at uses/defs and moves the "spilled" value |
818 | | /// into/out of them immediately. |
819 | 103k | pub fn split_into_minimal_bundles(&mut self, bundle: LiveBundleIndex, hint: PReg) { |
820 | 103k | assert_eq!(self.ctx.scratch_removed_lrs_vregs.len(), 0); |
821 | 103k | self.ctx.scratch_removed_lrs.clear(); |
822 | | |
823 | 103k | let mut new_lrs: SmallVec<[(VRegIndex, LiveRangeIndex); 16]> = smallvec![]; |
824 | 103k | let mut new_bundles: SmallVec<[LiveBundleIndex; 16]> = smallvec![]; |
825 | | |
826 | 103k | let spillset = self.ctx.bundles[bundle].spillset; |
827 | 103k | let spill = self |
828 | 103k | .get_or_create_spill_bundle(bundle, /* create_if_absent = */ true) |
829 | 103k | .unwrap(); |
830 | | |
831 | 103k | trace!("Splitting bundle {bundle:?} into minimal bundles with reg hint {hint:?}"); |
832 | | |
833 | 103k | let mut spill_uses = UseList::new_in(self.ctx.bump()); |
834 | | |
835 | 103k | let empty_vec = LiveRangeList::new_in(self.ctx.bump()); |
836 | 433k | for entry in core::mem::replace(&mut self.ctx.bundles[bundle].ranges, empty_vec) { |
837 | 433k | let vreg = self.ctx.ranges[entry.index].vreg; |
838 | | |
839 | 433k | self.ctx.scratch_removed_lrs.insert(entry.index); |
840 | 433k | self.ctx.scratch_removed_lrs_vregs.insert(vreg); |
841 | | |
842 | 433k | trace!(" -> removing old LR {:?} for vreg {:?}", entry.index, vreg); |
843 | | |
844 | 433k | let mut spill_range = entry.range; |
845 | 433k | let mut spill_starts_def = false; |
846 | | |
847 | 433k | let empty_vec = UseList::new_in(self.ctx.bump()); |
848 | 1.02M | for u in core::mem::replace(&mut self.ctx.ranges[entry.index].uses, empty_vec) { |
849 | 1.02M | trace!(" -> use {:?}", u); |
850 | | |
851 | 1.02M | let is_def = u.operand.kind() == OperandKind::Def; |
852 | | |
853 | | // If this use has an `any` constraint, eagerly migrate it to the spill range. The |
854 | | // reasoning here is that in the second-chance allocation for the spill bundle, |
855 | | // any-constrained uses will be easy to satisfy. Solving those constraints earlier |
856 | | // could create unnecessary conflicts with existing bundles that need to fit in a |
857 | | // register, more strict requirements, so we delay them eagerly. |
858 | 1.02M | if u.operand.constraint() == OperandConstraint::Any { |
859 | 513k | trace!(" -> migrating this any-constrained use to the spill range"); |
860 | 513k | spill_uses.push(u); |
861 | | |
862 | | // Remember if we're moving the def of this vreg into the spill range, so that |
863 | | // we can set the appropriate flags on it later. |
864 | 513k | spill_starts_def = spill_starts_def || is_def; |
865 | | |
866 | 513k | continue; |
867 | 514k | } |
868 | | |
869 | | // If this is a def, make sure that the spill range |
870 | | // starts before the next instruction rather than at |
871 | | // this position: the value is available only *after* |
872 | | // this position. |
873 | 514k | if is_def { |
874 | 14.5k | trace!(" -> moving the spill range forward by one"); |
875 | 14.5k | spill_range.from = ProgPoint::before(u.pos.inst().next()); |
876 | 499k | } |
877 | | |
878 | | // Create a new LR. |
879 | 514k | let cr = minimal_range_for_use(&u); |
880 | 514k | let lr = self.ctx.ranges.add(cr, self.ctx.bump()); |
881 | 514k | new_lrs.push((vreg, lr)); |
882 | 514k | self.ctx.ranges[lr].uses.push(u); |
883 | 514k | self.ctx.ranges[lr].vreg = vreg; |
884 | | |
885 | | // Create a new bundle that contains only this LR. |
886 | 514k | let new_bundle = self.ctx.bundles.add(self.ctx.bump()); |
887 | 514k | self.ctx.ranges[lr].bundle = new_bundle; |
888 | 514k | self.ctx.bundles[new_bundle].spillset = spillset; |
889 | 514k | self.ctx.bundles[new_bundle] |
890 | 514k | .ranges |
891 | 514k | .push(LiveRangeListEntry { |
892 | 514k | range: cr, |
893 | 514k | index: lr, |
894 | 514k | }); |
895 | 514k | new_bundles.push(new_bundle); |
896 | | |
897 | | // If this use was a Def, set the StartsAtDef flag for the new LR. |
898 | 514k | if is_def { |
899 | 14.5k | self.ctx.ranges[lr].set_flag(LiveRangeFlag::StartsAtDef); |
900 | 499k | } |
901 | | |
902 | 514k | trace!( |
903 | 0 | " -> created new LR {:?} range {:?} with new bundle {:?} for this use", |
904 | | lr, |
905 | | cr, |
906 | | new_bundle |
907 | | ); |
908 | | } |
909 | | |
910 | 433k | if !spill_range.is_empty() { |
911 | | // Make one entry in the spill bundle that covers the whole range. |
912 | | // TODO: it might be worth tracking enough state to only create this LR when there is |
913 | | // open space in the original LR. |
914 | 432k | let spill_lr = self.ctx.ranges.add(spill_range, self.ctx.bump()); |
915 | 432k | self.ctx.ranges[spill_lr].vreg = vreg; |
916 | 432k | self.ctx.ranges[spill_lr].bundle = spill; |
917 | 432k | self.ctx.ranges[spill_lr].uses.extend(spill_uses.drain(..)); |
918 | 432k | new_lrs.push((vreg, spill_lr)); |
919 | | |
920 | 432k | if spill_starts_def { |
921 | 4.32k | self.ctx.ranges[spill_lr].set_flag(LiveRangeFlag::StartsAtDef); |
922 | 428k | } |
923 | | |
924 | 432k | self.ctx.bundles[spill].ranges.push(LiveRangeListEntry { |
925 | 432k | range: spill_range, |
926 | 432k | index: spill_lr, |
927 | 432k | }); |
928 | 432k | self.ctx.ranges[spill_lr].bundle = spill; |
929 | 432k | trace!( |
930 | 0 | " -> added spill range {:?} in new LR {:?} in spill bundle {:?}", |
931 | | spill_range, |
932 | | spill_lr, |
933 | | spill |
934 | | ); |
935 | | } else { |
936 | 928 | assert!(spill_uses.is_empty()); |
937 | | } |
938 | | } |
939 | | |
940 | | // Remove all of the removed LRs from respective vregs' lists. |
941 | 116k | for vreg in self.ctx.scratch_removed_lrs_vregs.drain() { |
942 | 116k | let lrs = &mut self.ctx.scratch_removed_lrs; |
943 | 116k | self.ctx.vregs[vreg] |
944 | 116k | .ranges |
945 | 951k | .retain(|entry| !lrs.contains(&entry.index)); |
946 | | } |
947 | | |
948 | | // Add the new LRs to their respective vreg lists. |
949 | 1.04M | for (vreg, lr) in new_lrs { |
950 | 946k | let range = self.ctx.ranges[lr].range; |
951 | 946k | let entry = LiveRangeListEntry { range, index: lr }; |
952 | 946k | self.ctx.vregs[vreg].ranges.push(entry); |
953 | 946k | } |
954 | | |
955 | | // Recompute bundle properties for all new bundles and enqueue |
956 | | // them. |
957 | 617k | for bundle in new_bundles { |
958 | 514k | if self.ctx.bundles[bundle].ranges.len() > 0 { |
959 | 514k | self.recompute_bundle_properties(bundle); |
960 | 514k | let prio = self.ctx.bundles[bundle].prio; |
961 | 514k | self.ctx |
962 | 514k | .allocation_queue |
963 | 514k | .insert(bundle, prio as usize, hint); |
964 | 514k | } |
965 | | } |
966 | 103k | } |
967 | | |
968 | 8.92M | pub fn process_bundle( |
969 | 8.92M | &mut self, |
970 | 8.92M | bundle: LiveBundleIndex, |
971 | 8.92M | hint: PReg, |
972 | 8.92M | ) -> Result<(), RegAllocError> { |
973 | 8.92M | let class = self.ctx.spillsets[self.bundles[bundle].spillset].class; |
974 | | |
975 | | // Grab a hint from either the queue or our spillset, if any. |
976 | 8.92M | let mut hint = if hint != PReg::invalid() { |
977 | 899k | hint |
978 | | } else { |
979 | 8.02M | self.ctx.spillsets[self.bundles[bundle].spillset].hint |
980 | | }; |
981 | 8.92M | if self.ctx.pregs[hint.index()].is_stack { |
982 | 173k | hint = PReg::invalid(); |
983 | 8.75M | } |
984 | 8.92M | trace!("process_bundle: bundle {bundle:?} hint {hint:?}"); |
985 | | |
986 | 8.92M | let req = match self.compute_requirement(bundle) { |
987 | 8.72M | Ok(req) => req, |
988 | 198k | Err(conflict) => { |
989 | 198k | trace!("conflict!: {:?}", conflict); |
990 | | // We have to split right away. We'll find a point to |
991 | | // split that would allow at least the first half of the |
992 | | // split to be conflict-free. |
993 | 198k | debug_assert!( |
994 | 0 | !self.minimal_bundle(bundle), |
995 | | "Minimal bundle with conflict!" |
996 | | ); |
997 | 198k | self.split_and_requeue_bundle( |
998 | 198k | bundle, |
999 | 198k | /* split_at_point = */ conflict.suggested_split_point(), |
1000 | 198k | hint, |
1001 | | /* trim_ends_into_spill_bundle = */ |
1002 | 198k | conflict.should_trim_edges_around_split(), |
1003 | | ); |
1004 | 198k | return Ok(()); |
1005 | | } |
1006 | | }; |
1007 | | |
1008 | | // If no requirement at all (because no uses), and *if* a |
1009 | | // spill bundle is already present, then move the LRs over to |
1010 | | // the spill bundle right away. |
1011 | 8.72M | match req { |
1012 | | Requirement::Any => { |
1013 | 72.8k | if let Some(spill) = |
1014 | 5.95M | self.get_or_create_spill_bundle(bundle, /* create_if_absent = */ false) |
1015 | | { |
1016 | 72.8k | let empty_vec = LiveRangeList::new_in(self.ctx.bump()); |
1017 | 72.8k | let mut list = |
1018 | 72.8k | core::mem::replace(&mut self.ctx.bundles[bundle].ranges, empty_vec); |
1019 | 263k | for entry in &list { |
1020 | 190k | self.ctx.ranges[entry.index].bundle = spill; |
1021 | 190k | } |
1022 | 72.8k | self.ctx.bundles[spill].ranges.extend(list.drain(..)); |
1023 | 72.8k | return Ok(()); |
1024 | 5.88M | } |
1025 | | } |
1026 | 2.76M | _ => {} |
1027 | | } |
1028 | | |
1029 | | // Try to allocate! |
1030 | 8.65M | let mut attempts = 0; |
1031 | 8.65M | let mut scratch = core::mem::take(&mut self.ctx.scratch_conflicts); |
1032 | 8.65M | let mut lowest_cost_evict_conflict_set = core::mem::take(&mut self.ctx.scratch_bundle); |
1033 | | 'outer: loop { |
1034 | 8.83M | attempts += 1; |
1035 | 8.83M | trace!("attempt {}, req {:?}", attempts, req); |
1036 | 8.83M | debug_assert!(attempts < 100 * self.func.num_insts()); |
1037 | | |
1038 | 8.83M | let fixed_preg = match req { |
1039 | 731k | Requirement::FixedReg(preg) | Requirement::FixedStack(preg) => Some(preg), |
1040 | 2.21M | Requirement::Register | Requirement::Limit(..) => None, |
1041 | | Requirement::Stack => { |
1042 | | // If we must be on the stack, mark our spillset |
1043 | | // as required immediately. |
1044 | 0 | let spillset = self.bundles[bundle].spillset; |
1045 | 0 | self.spillsets[spillset].required = true; |
1046 | 0 | return Ok(()); |
1047 | | } |
1048 | | |
1049 | | Requirement::Any => { |
1050 | 5.88M | self.ctx.spilled_bundles.push(bundle); |
1051 | 5.88M | break; |
1052 | | } |
1053 | | }; |
1054 | | // Scan all pregs, or the one fixed preg, and attempt to allocate. |
1055 | | |
1056 | 2.95M | let mut lowest_cost_evict_conflict_cost: Option<u32> = None; |
1057 | 2.95M | lowest_cost_evict_conflict_set.clear(); |
1058 | | |
1059 | 2.95M | let mut lowest_cost_split_conflict_cost: Option<u32> = None; |
1060 | 2.95M | let mut lowest_cost_split_conflict_point = ProgPoint::before(Inst::new(0)); |
1061 | 2.95M | let mut lowest_cost_split_conflict_reg = PReg::invalid(); |
1062 | | |
1063 | | // Heuristic: start the scan for an available |
1064 | | // register at an offset influenced both by our |
1065 | | // location in the code and by the bundle we're |
1066 | | // considering. This has the effect of spreading |
1067 | | // demand more evenly across registers. |
1068 | 2.95M | let scan_offset = self.ctx.ranges[self.bundles[bundle].ranges[0].index] |
1069 | 2.95M | .range |
1070 | 2.95M | .from |
1071 | 2.95M | .inst() |
1072 | 2.95M | .index() |
1073 | 2.95M | + bundle.index(); |
1074 | | |
1075 | 2.95M | self.ctx.output.stats.process_bundle_reg_probe_start_any += 1; |
1076 | 2.95M | let limit = self.bundles[bundle].limit.map(|l| l as usize); |
1077 | 15.8M | for preg in RegTraversalIter::new( |
1078 | 2.95M | self.env, |
1079 | 2.95M | class, |
1080 | 2.95M | fixed_preg, |
1081 | 2.95M | hint.as_valid(), |
1082 | 2.95M | scan_offset, |
1083 | 2.95M | limit, |
1084 | | ) { |
1085 | 15.8M | self.ctx.output.stats.process_bundle_reg_probes_any += 1; |
1086 | 15.8M | let preg_idx = PRegIndex::new(preg.index()); |
1087 | 15.8M | trace!("trying preg {:?}", preg_idx); |
1088 | | |
1089 | 15.8M | let scan_limit_cost = match ( |
1090 | 15.8M | lowest_cost_evict_conflict_cost, |
1091 | 15.8M | lowest_cost_split_conflict_cost, |
1092 | | ) { |
1093 | 10.6M | (Some(a), Some(b)) => Some(core::cmp::max(a, b)), |
1094 | 5.16M | _ => None, |
1095 | | }; |
1096 | 15.8M | match self.try_to_allocate_bundle_to_reg( |
1097 | 15.8M | bundle, |
1098 | 15.8M | preg_idx, |
1099 | 15.8M | scan_limit_cost, |
1100 | 15.8M | &mut scratch, |
1101 | 15.8M | ) { |
1102 | 2.41M | AllocRegResult::Allocated(alloc) => { |
1103 | 2.41M | self.ctx.output.stats.process_bundle_reg_success_any += 1; |
1104 | 2.41M | trace!(" -> allocated to any {:?}", preg_idx); |
1105 | 2.41M | self.ctx.spillsets[self.ctx.bundles[bundle].spillset].hint = |
1106 | 2.41M | alloc.as_reg().unwrap(); |
1107 | | // Success, return scratch memory to context and finish |
1108 | 2.41M | break 'outer; |
1109 | | } |
1110 | 5.69M | AllocRegResult::Conflict(bundles, first_conflict_point) => { |
1111 | 5.69M | trace!( |
1112 | 0 | " -> conflict with bundles {:?}, first conflict at {:?}", |
1113 | | bundles, |
1114 | | first_conflict_point |
1115 | | ); |
1116 | | |
1117 | 5.69M | let conflict_cost = self.maximum_spill_weight_in_bundle_set(bundles); |
1118 | | |
1119 | 5.69M | if lowest_cost_evict_conflict_cost.is_none() |
1120 | 4.28M | || conflict_cost < lowest_cost_evict_conflict_cost.unwrap() |
1121 | 2.35M | { |
1122 | 2.35M | lowest_cost_evict_conflict_cost = Some(conflict_cost); |
1123 | 2.35M | lowest_cost_evict_conflict_set.clear(); |
1124 | 2.35M | lowest_cost_evict_conflict_set.extend(bundles); |
1125 | 3.33M | } |
1126 | | |
1127 | 5.69M | let loop_depth = |
1128 | 5.69M | self.ctx.cfginfo.approx_loop_depth[self.ctx.cfginfo.insn_block |
1129 | 5.69M | [first_conflict_point.inst().index()] |
1130 | 5.69M | .index()]; |
1131 | 5.69M | let move_cost = spill_weight_from_constraint( |
1132 | 5.69M | OperandConstraint::Reg, |
1133 | 5.69M | loop_depth as usize, |
1134 | | /* is_def = */ true, |
1135 | | ) |
1136 | 5.69M | .to_int(); |
1137 | 5.69M | if lowest_cost_split_conflict_cost.is_none() |
1138 | 4.48M | || (conflict_cost + move_cost) |
1139 | 4.48M | < lowest_cost_split_conflict_cost.unwrap() |
1140 | 1.78M | { |
1141 | 1.78M | lowest_cost_split_conflict_cost = Some(conflict_cost + move_cost); |
1142 | 1.78M | lowest_cost_split_conflict_point = first_conflict_point; |
1143 | 1.78M | lowest_cost_split_conflict_reg = preg; |
1144 | 3.91M | } |
1145 | | } |
1146 | 7.06M | AllocRegResult::ConflictWithFixed(max_cost, point) => { |
1147 | 7.06M | trace!(" -> conflict with fixed alloc; cost of other bundles up to point is {}, conflict at {:?}", max_cost, point); |
1148 | | |
1149 | 7.06M | let loop_depth = self.ctx.cfginfo.approx_loop_depth |
1150 | 7.06M | [self.ctx.cfginfo.insn_block[point.inst().index()].index()]; |
1151 | 7.06M | let move_cost = spill_weight_from_constraint( |
1152 | 7.06M | OperandConstraint::Reg, |
1153 | 7.06M | loop_depth as usize, |
1154 | | /* is_def = */ true, |
1155 | | ) |
1156 | 7.06M | .to_int(); |
1157 | | |
1158 | 7.06M | if lowest_cost_split_conflict_cost.is_none() |
1159 | 6.73M | || (max_cost + move_cost) < lowest_cost_split_conflict_cost.unwrap() |
1160 | 838k | { |
1161 | 838k | lowest_cost_split_conflict_cost = Some(max_cost + move_cost); |
1162 | 838k | lowest_cost_split_conflict_point = point; |
1163 | 838k | lowest_cost_split_conflict_reg = preg; |
1164 | 6.22M | } |
1165 | | } |
1166 | | AllocRegResult::ConflictHighCost => { |
1167 | | // Simply don't consider -- we already have |
1168 | | // a lower-cost conflict bundle option |
1169 | | // to evict. |
1170 | 689k | continue; |
1171 | | } |
1172 | | } |
1173 | | } |
1174 | | |
1175 | | // Otherwise, we *require* a register, but didn't fit into |
1176 | | // any with current bundle assignments. Hence, we will need |
1177 | | // to either split or attempt to evict some bundles. |
1178 | | |
1179 | 539k | trace!( |
1180 | 0 | " -> lowest cost evict: set {:?}, cost {:?}", |
1181 | | lowest_cost_evict_conflict_set, |
1182 | | lowest_cost_evict_conflict_cost, |
1183 | | ); |
1184 | 539k | trace!( |
1185 | 0 | " -> lowest cost split: cost {:?}, point {:?}, reg {:?}", |
1186 | | lowest_cost_split_conflict_cost, |
1187 | | lowest_cost_split_conflict_point, |
1188 | | lowest_cost_split_conflict_reg |
1189 | | ); |
1190 | | |
1191 | | // If we reach here, we *must* have an option either to split or evict. |
1192 | 539k | debug_assert!( |
1193 | 0 | lowest_cost_split_conflict_cost.is_some() |
1194 | 0 | || lowest_cost_evict_conflict_cost.is_some() |
1195 | | ); |
1196 | | |
1197 | 539k | let our_spill_weight = self.bundle_spill_weight(bundle); |
1198 | 539k | trace!(" -> our spill weight: {}", our_spill_weight); |
1199 | | |
1200 | | // We detect the "too-many-live-registers" case here and |
1201 | | // return an error cleanly, rather than panicking, because |
1202 | | // the regalloc.rs fuzzer depends on the register |
1203 | | // allocator to correctly reject impossible-to-allocate |
1204 | | // programs in order to discard invalid test cases. |
1205 | 539k | if self.minimal_bundle(bundle) |
1206 | 47.0k | && (attempts >= 2 |
1207 | 47.0k | || lowest_cost_evict_conflict_cost.is_none() |
1208 | 47.0k | || lowest_cost_evict_conflict_cost.unwrap() >= our_spill_weight) |
1209 | | { |
1210 | 0 | if matches!(req, Requirement::Register | Requirement::Limit(_)) { |
1211 | | // Check if this is a too-many-live-registers situation. |
1212 | 0 | let range = self.ctx.bundles[bundle].ranges[0].range; |
1213 | 0 | trace!("checking for too many live regs"); |
1214 | 0 | let mut min_bundles_assigned = 0; |
1215 | 0 | let mut fixed_assigned = 0; |
1216 | 0 | let mut total_regs = 0; |
1217 | 0 | for preg in self.env.preferred_regs_by_class[class as u8 as usize] |
1218 | 0 | .iter() |
1219 | 0 | .chain(self.env.non_preferred_regs_by_class[class as u8 as usize].iter()) |
1220 | | { |
1221 | 0 | trace!(" -> PR {:?}", preg); |
1222 | 0 | let start = LiveRangeKey::from_range(&CodeRange { |
1223 | 0 | from: range.from.prev(), |
1224 | 0 | to: range.from.prev(), |
1225 | 0 | }); |
1226 | 0 | for (key, lr) in self.ctx.pregs[preg.index()] |
1227 | 0 | .allocations |
1228 | 0 | .btree |
1229 | 0 | .range(start..) |
1230 | | { |
1231 | 0 | let preg_range = key.to_range(); |
1232 | 0 | if preg_range.to <= range.from { |
1233 | 0 | continue; |
1234 | 0 | } |
1235 | 0 | if preg_range.from >= range.to { |
1236 | 0 | break; |
1237 | 0 | } |
1238 | 0 | if lr.is_valid() { |
1239 | 0 | if self.minimal_bundle(self.ranges[*lr].bundle) { |
1240 | 0 | trace!(" -> min bundle {:?}", lr); |
1241 | 0 | min_bundles_assigned += 1; |
1242 | | } else { |
1243 | 0 | trace!(" -> non-min bundle {:?}", lr); |
1244 | | } |
1245 | | } else { |
1246 | 0 | trace!(" -> fixed bundle"); |
1247 | 0 | fixed_assigned += 1; |
1248 | | } |
1249 | | } |
1250 | | |
1251 | | // We also need to discard any registers that do not fit |
1252 | | // under the limit--we cannot allocate to them. |
1253 | 0 | if let Requirement::Limit(limit) = req { |
1254 | 0 | if preg.hw_enc() >= limit as usize { |
1255 | 0 | continue; |
1256 | 0 | } |
1257 | 0 | } |
1258 | | |
1259 | 0 | total_regs += 1; |
1260 | | } |
1261 | 0 | trace!( |
1262 | 0 | " -> total {}, fixed {}, min {}", |
1263 | | total_regs, |
1264 | | fixed_assigned, |
1265 | | min_bundles_assigned |
1266 | | ); |
1267 | 0 | if min_bundles_assigned + fixed_assigned >= total_regs { |
1268 | 0 | return Err(RegAllocError::TooManyLiveRegs); |
1269 | 0 | } |
1270 | 0 | } |
1271 | | |
1272 | 0 | panic!("Could not allocate minimal bundle, but the allocation problem should be possible to solve"); |
1273 | 539k | } |
1274 | | |
1275 | | // If our bundle's weight is less than or equal to(*) the |
1276 | | // evict cost, choose to split. Also pick splitting if |
1277 | | // we're on our second or more attempt and we didn't |
1278 | | // allocate. Also pick splitting if the conflict set is |
1279 | | // empty, meaning a fixed conflict that can't be evicted. |
1280 | | // |
1281 | | // (*) the "equal to" part is very important: it prevents |
1282 | | // an infinite loop where two bundles with equal spill |
1283 | | // cost continually evict each other in an infinite |
1284 | | // allocation loop. In such a case, the first bundle in |
1285 | | // wins, and the other splits. |
1286 | | // |
1287 | | // Note that we don't split if the bundle is minimal. |
1288 | 539k | if !self.minimal_bundle(bundle) |
1289 | 492k | && (attempts >= 2 |
1290 | 492k | || lowest_cost_evict_conflict_cost.is_none() |
1291 | 427k | || our_spill_weight <= lowest_cost_evict_conflict_cost.unwrap()) |
1292 | | { |
1293 | 358k | trace!( |
1294 | 0 | " -> deciding to split: our spill weight is {}", |
1295 | 0 | self.bundle_spill_weight(bundle) |
1296 | | ); |
1297 | 358k | let bundle_start = self.ctx.bundles[bundle].ranges[0].range.from; |
1298 | 358k | let mut split_at_point = |
1299 | 358k | core::cmp::max(lowest_cost_split_conflict_point, bundle_start); |
1300 | 358k | let requeue_with_reg = lowest_cost_split_conflict_reg; |
1301 | | |
1302 | | // Adjust `split_at_point` if it is within a deeper loop |
1303 | | // than the bundle start -- hoist it to just before the |
1304 | | // first loop header it encounters. |
1305 | 358k | let bundle_start_depth = self.ctx.cfginfo.approx_loop_depth |
1306 | 358k | [self.ctx.cfginfo.insn_block[bundle_start.inst().index()].index()]; |
1307 | 358k | let split_at_depth = self.ctx.cfginfo.approx_loop_depth |
1308 | 358k | [self.ctx.cfginfo.insn_block[split_at_point.inst().index()].index()]; |
1309 | 358k | if split_at_depth > bundle_start_depth { |
1310 | 59.1k | for block in (self.ctx.cfginfo.insn_block[bundle_start.inst().index()].index() |
1311 | 26.0k | + 1) |
1312 | 26.0k | ..=self.ctx.cfginfo.insn_block[split_at_point.inst().index()].index() |
1313 | | { |
1314 | 59.1k | if self.ctx.cfginfo.approx_loop_depth[block] > bundle_start_depth { |
1315 | 26.0k | split_at_point = self.ctx.cfginfo.block_entry[block]; |
1316 | 26.0k | break; |
1317 | 33.0k | } |
1318 | | } |
1319 | 332k | } |
1320 | | |
1321 | 358k | self.split_and_requeue_bundle( |
1322 | 358k | bundle, |
1323 | 358k | split_at_point, |
1324 | 358k | requeue_with_reg, |
1325 | | /* should_trim = */ true, |
1326 | | ); |
1327 | | |
1328 | | // Success, return scratch memory to context and finish |
1329 | 358k | break 'outer; |
1330 | | } else { |
1331 | | // Evict all bundles in `conflicting bundles` and try again. |
1332 | 180k | self.ctx.output.stats.evict_bundle_event += 1; |
1333 | 362k | for &bundle in &lowest_cost_evict_conflict_set { |
1334 | 181k | trace!(" -> evicting {:?}", bundle); |
1335 | 181k | self.evict_bundle(bundle); |
1336 | 181k | self.ctx.output.stats.evict_bundle_count += 1; |
1337 | | } |
1338 | | } |
1339 | | } |
1340 | | |
1341 | 8.65M | self.ctx.scratch_conflicts = scratch; |
1342 | 8.65M | self.ctx.scratch_bundle = lowest_cost_evict_conflict_set; |
1343 | 8.65M | return Ok(()); |
1344 | 8.92M | } |
1345 | | } |
1346 | | |
1347 | | /// Compute the minimal range that covers a given use in a minimal |
1348 | | /// bundle. This definition needs to be consistent between bundle |
1349 | | /// property computation and minimal-range splitting (fallback |
1350 | | /// splitting). |
1351 | | /// |
1352 | | /// The cases are: |
1353 | | /// - early def: whole instruction |
1354 | | /// - late def: Late only |
1355 | | /// - early use: Early only |
1356 | | /// - late use: whole instruction |
1357 | 7.39M | fn minimal_range_for_use(u: &Use) -> CodeRange { |
1358 | 7.39M | let early = ProgPoint::before(u.pos.inst()); |
1359 | 7.39M | let late = ProgPoint::after(u.pos.inst()); |
1360 | 7.39M | let next_early = ProgPoint::before(u.pos.inst().next()); |
1361 | 7.39M | match (u.pos.pos(), u.operand.kind()) { |
1362 | 2.14M | (InstPosition::Before, OperandKind::Def) => CodeRange { |
1363 | 2.14M | from: early, |
1364 | 2.14M | to: next_early, |
1365 | 2.14M | }, |
1366 | 989k | (InstPosition::Before, OperandKind::Use) => CodeRange { |
1367 | 989k | from: early, |
1368 | 989k | to: late, |
1369 | 989k | }, |
1370 | 4.12M | (InstPosition::After, OperandKind::Def) => CodeRange { |
1371 | 4.12M | from: late, |
1372 | 4.12M | to: next_early, |
1373 | 4.12M | }, |
1374 | 138k | (InstPosition::After, OperandKind::Use) => CodeRange { |
1375 | 138k | from: early, |
1376 | 138k | to: next_early, |
1377 | 138k | }, |
1378 | | } |
1379 | 7.39M | } |