/rust/registry/src/index.crates.io-1949cf8c6b5b557f/regalloc2-0.13.5/src/moves.rs
Line | Count | Source |
1 | | /* |
2 | | * Released under the terms of the Apache 2.0 license with LLVM |
3 | | * exception. See `LICENSE` for details. |
4 | | */ |
5 | | |
6 | | use crate::{ion::data_structures::u64_key, Allocation, PReg}; |
7 | | use core::fmt::Debug; |
8 | | use smallvec::{smallvec, SmallVec}; |
9 | | |
10 | | /// A list of moves to be performed in sequence, with auxiliary data |
11 | | /// attached to each. |
12 | | pub type MoveVec<T> = SmallVec<[(Allocation, Allocation, T); 16]>; |
13 | | |
14 | | /// A list of moves to be performance in sequence, like a |
15 | | /// `MoveVec<T>`, except that an unchosen scratch space may occur as |
16 | | /// well, represented by `Allocation::none()`. |
17 | | #[derive(Clone, Debug)] |
18 | | pub enum MoveVecWithScratch<T> { |
19 | | /// No scratch was actually used. |
20 | | NoScratch(MoveVec<T>), |
21 | | /// A scratch space was used. |
22 | | Scratch(MoveVec<T>), |
23 | | } |
24 | | |
25 | | /// A `ParallelMoves` represents a list of alloc-to-alloc moves that |
26 | | /// must happen in parallel -- i.e., all reads of sources semantically |
27 | | /// happen before all writes of destinations, and destinations are |
28 | | /// allowed to overwrite sources. It can compute a list of sequential |
29 | | /// moves that will produce the equivalent data movement, possibly |
30 | | /// using a scratch register if one is necessary. |
31 | | pub struct ParallelMoves<T: Clone + Copy + Default> { |
32 | | parallel_moves: MoveVec<T>, |
33 | | } |
34 | | |
35 | | impl<T: Clone + Copy + Default + PartialEq> ParallelMoves<T> { |
36 | 8.35M | pub fn new() -> Self { |
37 | 8.35M | Self { |
38 | 8.35M | parallel_moves: smallvec![], |
39 | 8.35M | } |
40 | 8.35M | } <regalloc2::moves::ParallelMoves<core::option::Option<regalloc2::VReg>>>::new Line | Count | Source | 36 | 1.91M | pub fn new() -> Self { | 37 | 1.91M | Self { | 38 | 1.91M | parallel_moves: smallvec![], | 39 | 1.91M | } | 40 | 1.91M | } |
Unexecuted instantiation: <regalloc2::moves::ParallelMoves<_>>::new <regalloc2::moves::ParallelMoves<core::option::Option<regalloc2::VReg>>>::new Line | Count | Source | 36 | 6.43M | pub fn new() -> Self { | 37 | 6.43M | Self { | 38 | 6.43M | parallel_moves: smallvec![], | 39 | 6.43M | } | 40 | 6.43M | } |
|
41 | | |
42 | 3.53M | pub fn add(&mut self, from: Allocation, to: Allocation, t: T) { |
43 | 3.53M | self.parallel_moves.push((from, to, t)); |
44 | 3.53M | } <regalloc2::moves::ParallelMoves<core::option::Option<regalloc2::VReg>>>::add Line | Count | Source | 42 | 805k | pub fn add(&mut self, from: Allocation, to: Allocation, t: T) { | 43 | 805k | self.parallel_moves.push((from, to, t)); | 44 | 805k | } |
Unexecuted instantiation: <regalloc2::moves::ParallelMoves<_>>::add <regalloc2::moves::ParallelMoves<core::option::Option<regalloc2::VReg>>>::add Line | Count | Source | 42 | 2.72M | pub fn add(&mut self, from: Allocation, to: Allocation, t: T) { | 43 | 2.72M | self.parallel_moves.push((from, to, t)); | 44 | 2.72M | } |
|
45 | | |
46 | 560k | fn sources_overlap_dests(&self) -> bool { |
47 | | // Assumes `parallel_moves` has already been sorted by `dst` |
48 | | // in `resolve()` below. The O(n log n) cost of this loop is no |
49 | | // worse than the sort we already did. |
50 | 1.25M | for &(src, _, _) in &self.parallel_moves { |
51 | 1.25M | if self |
52 | 1.25M | .parallel_moves |
53 | 1.25M | .binary_search_by_key(&src, |&(_, dst, _)| dst) |
54 | 1.25M | .is_ok() |
55 | | { |
56 | 102k | return true; |
57 | 1.15M | } |
58 | | } |
59 | 458k | false |
60 | 560k | } <regalloc2::moves::ParallelMoves<core::option::Option<regalloc2::VReg>>>::sources_overlap_dests Line | Count | Source | 46 | 106k | fn sources_overlap_dests(&self) -> bool { | 47 | | // Assumes `parallel_moves` has already been sorted by `dst` | 48 | | // in `resolve()` below. The O(n log n) cost of this loop is no | 49 | | // worse than the sort we already did. | 50 | 253k | for &(src, _, _) in &self.parallel_moves { | 51 | 253k | if self | 52 | 253k | .parallel_moves | 53 | 253k | .binary_search_by_key(&src, |&(_, dst, _)| dst) | 54 | 253k | .is_ok() | 55 | | { | 56 | 35.7k | return true; | 57 | 217k | } | 58 | | } | 59 | 70.3k | false | 60 | 106k | } |
Unexecuted instantiation: <regalloc2::moves::ParallelMoves<_>>::sources_overlap_dests <regalloc2::moves::ParallelMoves<core::option::Option<regalloc2::VReg>>>::sources_overlap_dests Line | Count | Source | 46 | 454k | fn sources_overlap_dests(&self) -> bool { | 47 | | // Assumes `parallel_moves` has already been sorted by `dst` | 48 | | // in `resolve()` below. The O(n log n) cost of this loop is no | 49 | | // worse than the sort we already did. | 50 | 1.00M | for &(src, _, _) in &self.parallel_moves { | 51 | 1.00M | if self | 52 | 1.00M | .parallel_moves | 53 | 1.00M | .binary_search_by_key(&src, |&(_, dst, _)| dst) | 54 | 1.00M | .is_ok() | 55 | | { | 56 | 66.3k | return true; | 57 | 939k | } | 58 | | } | 59 | 388k | false | 60 | 454k | } |
|
61 | | |
62 | | /// Resolve the parallel-moves problem to a sequence of separate |
63 | | /// moves, such that the combined effect of the sequential moves |
64 | | /// is as-if all of the moves added to this `ParallelMoves` |
65 | | /// resolver happened in parallel. |
66 | | /// |
67 | | /// Sometimes, if there is a cycle, a scratch register is |
68 | | /// necessary to allow the moves to occur sequentially. In this |
69 | | /// case, `Allocation::none()` is returned to represent the |
70 | | /// scratch register. The caller may choose to always hold a |
71 | | /// separate scratch register unused to allow this to be trivially |
72 | | /// rewritten; or may dynamically search for or create a free |
73 | | /// register as needed, if none are available. |
74 | 8.35M | pub fn resolve(mut self) -> MoveVecWithScratch<T> { |
75 | | // Easy case: zero or one move. Just return our vec. |
76 | 8.35M | if self.parallel_moves.len() <= 1 { |
77 | 7.79M | return MoveVecWithScratch::NoScratch(self.parallel_moves); |
78 | 560k | } |
79 | | |
80 | | // Sort moves so that we can efficiently test for presence. |
81 | | // For that purpose it doesn't matter whether we sort by |
82 | | // source or destination, but later we'll want them sorted |
83 | | // by destination. |
84 | 560k | self.parallel_moves |
85 | 2.12M | .sort_by_key(|&(src, dst, _)| u64_key(dst.bits(), src.bits())); <regalloc2::moves::ParallelMoves<core::option::Option<regalloc2::VReg>>>::resolve::{closure#0}Line | Count | Source | 85 | 525k | .sort_by_key(|&(src, dst, _)| u64_key(dst.bits(), src.bits())); |
Unexecuted instantiation: <regalloc2::moves::ParallelMoves<_>>::resolve::{closure#0}<regalloc2::moves::ParallelMoves<core::option::Option<regalloc2::VReg>>>::resolve::{closure#0}Line | Count | Source | 85 | 1.59M | .sort_by_key(|&(src, dst, _)| u64_key(dst.bits(), src.bits())); |
|
86 | | |
87 | | // Duplicate moves cannot change the semantics of this |
88 | | // parallel move set, so remove them. This is cheap since we |
89 | | // just sorted the list. |
90 | 560k | self.parallel_moves.dedup(); |
91 | | |
92 | | // General case: some moves overwrite dests that other moves |
93 | | // read as sources. We'll use a general algorithm. |
94 | | // |
95 | | // *Important property*: because we expect that each register |
96 | | // has only one writer (otherwise the effect of the parallel |
97 | | // move is undefined), each move can only block one other move |
98 | | // (with its one source corresponding to the one writer of |
99 | | // that source). Thus, we *can only have simple cycles* (those |
100 | | // that are a ring of nodes, i.e., with only one path from a |
101 | | // node back to itself); there are no SCCs that are more |
102 | | // complex than that. We leverage this fact below to avoid |
103 | | // having to do a full Tarjan SCC DFS (with lowest-index |
104 | | // computation, etc.): instead, as soon as we find a cycle, we |
105 | | // know we have the full cycle and we can do a cyclic move |
106 | | // sequence and continue. |
107 | | |
108 | | // Check that each destination has only one writer. |
109 | 560k | if cfg!(debug_assertions) { |
110 | 0 | let mut last_dst = None; |
111 | 0 | for &(_, dst, _) in &self.parallel_moves { |
112 | 0 | if last_dst.is_some() { |
113 | 0 | debug_assert!(last_dst.unwrap() != dst); |
114 | 0 | } |
115 | 0 | last_dst = Some(dst); |
116 | | } |
117 | 560k | } |
118 | | |
119 | | // Moving an allocation into itself is technically a cycle but |
120 | | // should have no effect, as long as there are no other writes |
121 | | // into that destination. |
122 | 1.29M | self.parallel_moves.retain(|&mut (src, dst, _)| src != dst); <regalloc2::moves::ParallelMoves<core::option::Option<regalloc2::VReg>>>::resolve::{closure#1}Line | Count | Source | 122 | 269k | self.parallel_moves.retain(|&mut (src, dst, _)| src != dst); |
Unexecuted instantiation: <regalloc2::moves::ParallelMoves<_>>::resolve::{closure#1}<regalloc2::moves::ParallelMoves<core::option::Option<regalloc2::VReg>>>::resolve::{closure#1}Line | Count | Source | 122 | 1.02M | self.parallel_moves.retain(|&mut (src, dst, _)| src != dst); |
|
123 | | |
124 | | // Do any dests overlap sources? If not, we can also just |
125 | | // return the list. |
126 | 560k | if !self.sources_overlap_dests() { |
127 | 458k | return MoveVecWithScratch::NoScratch(self.parallel_moves); |
128 | 102k | } |
129 | | |
130 | | // Construct a mapping from move indices to moves they must |
131 | | // come before. Any given move must come before a move that |
132 | | // overwrites its destination; we have moves sorted by dest |
133 | | // above so we can efficiently find such a move, if any. |
134 | | const NONE: usize = usize::MAX; |
135 | 102k | let must_come_before: SmallVec<[usize; 16]> = self |
136 | 102k | .parallel_moves |
137 | 102k | .iter() |
138 | 219k | .map(|&(src, _, _)| { |
139 | 219k | self.parallel_moves |
140 | 219k | .binary_search_by_key(&src, |&(_, dst, _)| dst) |
141 | 219k | .unwrap_or(NONE) |
142 | 219k | }) <regalloc2::moves::ParallelMoves<core::option::Option<regalloc2::VReg>>>::resolve::{closure#2}Line | Count | Source | 138 | 79.2k | .map(|&(src, _, _)| { | 139 | 79.2k | self.parallel_moves | 140 | 79.2k | .binary_search_by_key(&src, |&(_, dst, _)| dst) | 141 | 79.2k | .unwrap_or(NONE) | 142 | 79.2k | }) |
Unexecuted instantiation: <regalloc2::moves::ParallelMoves<_>>::resolve::{closure#2}<regalloc2::moves::ParallelMoves<core::option::Option<regalloc2::VReg>>>::resolve::{closure#2}Line | Count | Source | 138 | 140k | .map(|&(src, _, _)| { | 139 | 140k | self.parallel_moves | 140 | 140k | .binary_search_by_key(&src, |&(_, dst, _)| dst) | 141 | 140k | .unwrap_or(NONE) | 142 | 140k | }) |
|
143 | 102k | .collect(); |
144 | | |
145 | | // Do a simple stack-based DFS and emit moves in postorder, |
146 | | // then reverse at the end for RPO. Unlike Tarjan's SCC |
147 | | // algorithm, we can emit a cycle as soon as we find one, as |
148 | | // noted above. |
149 | | #[derive(Clone, Copy, Debug, Eq, PartialEq)] |
150 | | enum State { |
151 | | /// Not on stack, not visited |
152 | | ToDo, |
153 | | /// On stack, not yet visited |
154 | | Pending, |
155 | | /// Visited |
156 | | Done, |
157 | | } |
158 | 102k | let mut ret: MoveVec<T> = smallvec![]; |
159 | 102k | let mut stack: SmallVec<[usize; 16]> = smallvec![]; |
160 | 102k | let mut state: SmallVec<[State; 16]> = smallvec![State::ToDo; self.parallel_moves.len()]; |
161 | 102k | let mut scratch_used = false; |
162 | | |
163 | 530k | while let Some(next) = state.iter().position(|&state| state == State::ToDo) {<regalloc2::moves::ParallelMoves<core::option::Option<regalloc2::VReg>>>::resolve::{closure#3}Line | Count | Source | 163 | 191k | while let Some(next) = state.iter().position(|&state| state == State::ToDo) { |
Unexecuted instantiation: <regalloc2::moves::ParallelMoves<_>>::resolve::{closure#3}<regalloc2::moves::ParallelMoves<core::option::Option<regalloc2::VReg>>>::resolve::{closure#3}Line | Count | Source | 163 | 338k | while let Some(next) = state.iter().position(|&state| state == State::ToDo) { |
|
164 | 197k | stack.push(next); |
165 | 197k | state[next] = State::Pending; |
166 | | |
167 | 417k | while let Some(&top) = stack.last() { |
168 | 219k | debug_assert_eq!(state[top], State::Pending); |
169 | 219k | let next = must_come_before[top]; |
170 | 219k | if next == NONE || state[next] == State::Done { |
171 | 197k | ret.push(self.parallel_moves[top]); |
172 | 197k | state[top] = State::Done; |
173 | 197k | stack.pop(); |
174 | 219k | while let Some(top) = stack.pop() { |
175 | 21.9k | ret.push(self.parallel_moves[top]); |
176 | 21.9k | state[top] = State::Done; |
177 | 21.9k | } |
178 | 22.2k | } else if state[next] == State::ToDo { |
179 | 22.1k | stack.push(next); |
180 | 22.1k | state[next] = State::Pending; |
181 | 22.1k | } else { |
182 | | // Found a cycle -- emit a cyclic-move sequence |
183 | | // for the cycle on the top of stack, then normal |
184 | | // moves below it. Recall that these moves will be |
185 | | // reversed in sequence, so from the original |
186 | | // parallel move set |
187 | | // |
188 | | // { B := A, C := B, A := B } |
189 | | // |
190 | | // we will generate something like: |
191 | | // |
192 | | // A := scratch |
193 | | // B := A |
194 | | // C := B |
195 | | // scratch := C |
196 | | // |
197 | | // which will become: |
198 | | // |
199 | | // scratch := C |
200 | | // C := B |
201 | | // B := A |
202 | | // A := scratch |
203 | 155 | debug_assert_ne!(top, next); |
204 | 155 | state[top] = State::Done; |
205 | 155 | stack.pop(); |
206 | | |
207 | 155 | let (scratch_src, dst, dst_t) = self.parallel_moves[top]; |
208 | 155 | scratch_used = true; |
209 | | |
210 | 155 | ret.push((Allocation::none(), dst, dst_t)); |
211 | 157 | while let Some(move_idx) = stack.pop() { |
212 | 157 | state[move_idx] = State::Done; |
213 | 157 | ret.push(self.parallel_moves[move_idx]); |
214 | | |
215 | 157 | if move_idx == next { |
216 | 155 | break; |
217 | 2 | } |
218 | | } |
219 | 155 | ret.push((scratch_src, Allocation::none(), T::default())); |
220 | | } |
221 | | } |
222 | | } |
223 | | |
224 | 102k | ret.reverse(); |
225 | | |
226 | 102k | if scratch_used { |
227 | 155 | MoveVecWithScratch::Scratch(ret) |
228 | | } else { |
229 | 101k | MoveVecWithScratch::NoScratch(ret) |
230 | | } |
231 | 8.35M | } <regalloc2::moves::ParallelMoves<core::option::Option<regalloc2::VReg>>>::resolve Line | Count | Source | 74 | 1.91M | pub fn resolve(mut self) -> MoveVecWithScratch<T> { | 75 | | // Easy case: zero or one move. Just return our vec. | 76 | 1.91M | if self.parallel_moves.len() <= 1 { | 77 | 1.81M | return MoveVecWithScratch::NoScratch(self.parallel_moves); | 78 | 106k | } | 79 | | | 80 | | // Sort moves so that we can efficiently test for presence. | 81 | | // For that purpose it doesn't matter whether we sort by | 82 | | // source or destination, but later we'll want them sorted | 83 | | // by destination. | 84 | 106k | self.parallel_moves | 85 | 106k | .sort_by_key(|&(src, dst, _)| u64_key(dst.bits(), src.bits())); | 86 | | | 87 | | // Duplicate moves cannot change the semantics of this | 88 | | // parallel move set, so remove them. This is cheap since we | 89 | | // just sorted the list. | 90 | 106k | self.parallel_moves.dedup(); | 91 | | | 92 | | // General case: some moves overwrite dests that other moves | 93 | | // read as sources. We'll use a general algorithm. | 94 | | // | 95 | | // *Important property*: because we expect that each register | 96 | | // has only one writer (otherwise the effect of the parallel | 97 | | // move is undefined), each move can only block one other move | 98 | | // (with its one source corresponding to the one writer of | 99 | | // that source). Thus, we *can only have simple cycles* (those | 100 | | // that are a ring of nodes, i.e., with only one path from a | 101 | | // node back to itself); there are no SCCs that are more | 102 | | // complex than that. We leverage this fact below to avoid | 103 | | // having to do a full Tarjan SCC DFS (with lowest-index | 104 | | // computation, etc.): instead, as soon as we find a cycle, we | 105 | | // know we have the full cycle and we can do a cyclic move | 106 | | // sequence and continue. | 107 | | | 108 | | // Check that each destination has only one writer. | 109 | 106k | if cfg!(debug_assertions) { | 110 | 0 | let mut last_dst = None; | 111 | 0 | for &(_, dst, _) in &self.parallel_moves { | 112 | 0 | if last_dst.is_some() { | 113 | 0 | debug_assert!(last_dst.unwrap() != dst); | 114 | 0 | } | 115 | 0 | last_dst = Some(dst); | 116 | | } | 117 | 106k | } | 118 | | | 119 | | // Moving an allocation into itself is technically a cycle but | 120 | | // should have no effect, as long as there are no other writes | 121 | | // into that destination. | 122 | 106k | self.parallel_moves.retain(|&mut (src, dst, _)| src != dst); | 123 | | | 124 | | // Do any dests overlap sources? If not, we can also just | 125 | | // return the list. | 126 | 106k | if !self.sources_overlap_dests() { | 127 | 70.3k | return MoveVecWithScratch::NoScratch(self.parallel_moves); | 128 | 35.7k | } | 129 | | | 130 | | // Construct a mapping from move indices to moves they must | 131 | | // come before. Any given move must come before a move that | 132 | | // overwrites its destination; we have moves sorted by dest | 133 | | // above so we can efficiently find such a move, if any. | 134 | | const NONE: usize = usize::MAX; | 135 | 35.7k | let must_come_before: SmallVec<[usize; 16]> = self | 136 | 35.7k | .parallel_moves | 137 | 35.7k | .iter() | 138 | 35.7k | .map(|&(src, _, _)| { | 139 | | self.parallel_moves | 140 | | .binary_search_by_key(&src, |&(_, dst, _)| dst) | 141 | | .unwrap_or(NONE) | 142 | | }) | 143 | 35.7k | .collect(); | 144 | | | 145 | | // Do a simple stack-based DFS and emit moves in postorder, | 146 | | // then reverse at the end for RPO. Unlike Tarjan's SCC | 147 | | // algorithm, we can emit a cycle as soon as we find one, as | 148 | | // noted above. | 149 | | #[derive(Clone, Copy, Debug, Eq, PartialEq)] | 150 | | enum State { | 151 | | /// Not on stack, not visited | 152 | | ToDo, | 153 | | /// On stack, not yet visited | 154 | | Pending, | 155 | | /// Visited | 156 | | Done, | 157 | | } | 158 | 35.7k | let mut ret: MoveVec<T> = smallvec![]; | 159 | 35.7k | let mut stack: SmallVec<[usize; 16]> = smallvec![]; | 160 | 35.7k | let mut state: SmallVec<[State; 16]> = smallvec![State::ToDo; self.parallel_moves.len()]; | 161 | 35.7k | let mut scratch_used = false; | 162 | | | 163 | 105k | while let Some(next) = state.iter().position(|&state| state == State::ToDo) { | 164 | 70.2k | stack.push(next); | 165 | 70.2k | state[next] = State::Pending; | 166 | | | 167 | 149k | while let Some(&top) = stack.last() { | 168 | 79.2k | debug_assert_eq!(state[top], State::Pending); | 169 | 79.2k | let next = must_come_before[top]; | 170 | 79.2k | if next == NONE || state[next] == State::Done { | 171 | 70.2k | ret.push(self.parallel_moves[top]); | 172 | 70.2k | state[top] = State::Done; | 173 | 70.2k | stack.pop(); | 174 | 79.2k | while let Some(top) = stack.pop() { | 175 | 9.04k | ret.push(self.parallel_moves[top]); | 176 | 9.04k | state[top] = State::Done; | 177 | 9.04k | } | 178 | 9.04k | } else if state[next] == State::ToDo { | 179 | 9.04k | stack.push(next); | 180 | 9.04k | state[next] = State::Pending; | 181 | 9.04k | } else { | 182 | | // Found a cycle -- emit a cyclic-move sequence | 183 | | // for the cycle on the top of stack, then normal | 184 | | // moves below it. Recall that these moves will be | 185 | | // reversed in sequence, so from the original | 186 | | // parallel move set | 187 | | // | 188 | | // { B := A, C := B, A := B } | 189 | | // | 190 | | // we will generate something like: | 191 | | // | 192 | | // A := scratch | 193 | | // B := A | 194 | | // C := B | 195 | | // scratch := C | 196 | | // | 197 | | // which will become: | 198 | | // | 199 | | // scratch := C | 200 | | // C := B | 201 | | // B := A | 202 | | // A := scratch | 203 | 0 | debug_assert_ne!(top, next); | 204 | 0 | state[top] = State::Done; | 205 | 0 | stack.pop(); | 206 | | | 207 | 0 | let (scratch_src, dst, dst_t) = self.parallel_moves[top]; | 208 | 0 | scratch_used = true; | 209 | | | 210 | 0 | ret.push((Allocation::none(), dst, dst_t)); | 211 | 0 | while let Some(move_idx) = stack.pop() { | 212 | 0 | state[move_idx] = State::Done; | 213 | 0 | ret.push(self.parallel_moves[move_idx]); | 214 | | | 215 | 0 | if move_idx == next { | 216 | 0 | break; | 217 | 0 | } | 218 | | } | 219 | 0 | ret.push((scratch_src, Allocation::none(), T::default())); | 220 | | } | 221 | | } | 222 | | } | 223 | | | 224 | 35.7k | ret.reverse(); | 225 | | | 226 | 35.7k | if scratch_used { | 227 | 0 | MoveVecWithScratch::Scratch(ret) | 228 | | } else { | 229 | 35.7k | MoveVecWithScratch::NoScratch(ret) | 230 | | } | 231 | 1.91M | } |
Unexecuted instantiation: <regalloc2::moves::ParallelMoves<_>>::resolve <regalloc2::moves::ParallelMoves<core::option::Option<regalloc2::VReg>>>::resolve Line | Count | Source | 74 | 6.43M | pub fn resolve(mut self) -> MoveVecWithScratch<T> { | 75 | | // Easy case: zero or one move. Just return our vec. | 76 | 6.43M | if self.parallel_moves.len() <= 1 { | 77 | 5.98M | return MoveVecWithScratch::NoScratch(self.parallel_moves); | 78 | 454k | } | 79 | | | 80 | | // Sort moves so that we can efficiently test for presence. | 81 | | // For that purpose it doesn't matter whether we sort by | 82 | | // source or destination, but later we'll want them sorted | 83 | | // by destination. | 84 | 454k | self.parallel_moves | 85 | 454k | .sort_by_key(|&(src, dst, _)| u64_key(dst.bits(), src.bits())); | 86 | | | 87 | | // Duplicate moves cannot change the semantics of this | 88 | | // parallel move set, so remove them. This is cheap since we | 89 | | // just sorted the list. | 90 | 454k | self.parallel_moves.dedup(); | 91 | | | 92 | | // General case: some moves overwrite dests that other moves | 93 | | // read as sources. We'll use a general algorithm. | 94 | | // | 95 | | // *Important property*: because we expect that each register | 96 | | // has only one writer (otherwise the effect of the parallel | 97 | | // move is undefined), each move can only block one other move | 98 | | // (with its one source corresponding to the one writer of | 99 | | // that source). Thus, we *can only have simple cycles* (those | 100 | | // that are a ring of nodes, i.e., with only one path from a | 101 | | // node back to itself); there are no SCCs that are more | 102 | | // complex than that. We leverage this fact below to avoid | 103 | | // having to do a full Tarjan SCC DFS (with lowest-index | 104 | | // computation, etc.): instead, as soon as we find a cycle, we | 105 | | // know we have the full cycle and we can do a cyclic move | 106 | | // sequence and continue. | 107 | | | 108 | | // Check that each destination has only one writer. | 109 | 454k | if cfg!(debug_assertions) { | 110 | 0 | let mut last_dst = None; | 111 | 0 | for &(_, dst, _) in &self.parallel_moves { | 112 | 0 | if last_dst.is_some() { | 113 | 0 | debug_assert!(last_dst.unwrap() != dst); | 114 | 0 | } | 115 | 0 | last_dst = Some(dst); | 116 | | } | 117 | 454k | } | 118 | | | 119 | | // Moving an allocation into itself is technically a cycle but | 120 | | // should have no effect, as long as there are no other writes | 121 | | // into that destination. | 122 | 454k | self.parallel_moves.retain(|&mut (src, dst, _)| src != dst); | 123 | | | 124 | | // Do any dests overlap sources? If not, we can also just | 125 | | // return the list. | 126 | 454k | if !self.sources_overlap_dests() { | 127 | 388k | return MoveVecWithScratch::NoScratch(self.parallel_moves); | 128 | 66.3k | } | 129 | | | 130 | | // Construct a mapping from move indices to moves they must | 131 | | // come before. Any given move must come before a move that | 132 | | // overwrites its destination; we have moves sorted by dest | 133 | | // above so we can efficiently find such a move, if any. | 134 | | const NONE: usize = usize::MAX; | 135 | 66.3k | let must_come_before: SmallVec<[usize; 16]> = self | 136 | 66.3k | .parallel_moves | 137 | 66.3k | .iter() | 138 | 66.3k | .map(|&(src, _, _)| { | 139 | | self.parallel_moves | 140 | | .binary_search_by_key(&src, |&(_, dst, _)| dst) | 141 | | .unwrap_or(NONE) | 142 | | }) | 143 | 66.3k | .collect(); | 144 | | | 145 | | // Do a simple stack-based DFS and emit moves in postorder, | 146 | | // then reverse at the end for RPO. Unlike Tarjan's SCC | 147 | | // algorithm, we can emit a cycle as soon as we find one, as | 148 | | // noted above. | 149 | | #[derive(Clone, Copy, Debug, Eq, PartialEq)] | 150 | | enum State { | 151 | | /// Not on stack, not visited | 152 | | ToDo, | 153 | | /// On stack, not yet visited | 154 | | Pending, | 155 | | /// Visited | 156 | | Done, | 157 | | } | 158 | 66.3k | let mut ret: MoveVec<T> = smallvec![]; | 159 | 66.3k | let mut stack: SmallVec<[usize; 16]> = smallvec![]; | 160 | 66.3k | let mut state: SmallVec<[State; 16]> = smallvec![State::ToDo; self.parallel_moves.len()]; | 161 | 66.3k | let mut scratch_used = false; | 162 | | | 163 | 193k | while let Some(next) = state.iter().position(|&state| state == State::ToDo) { | 164 | 127k | stack.push(next); | 165 | 127k | state[next] = State::Pending; | 166 | | | 167 | 267k | while let Some(&top) = stack.last() { | 168 | 140k | debug_assert_eq!(state[top], State::Pending); | 169 | 140k | let next = must_come_before[top]; | 170 | 140k | if next == NONE || state[next] == State::Done { | 171 | 127k | ret.push(self.parallel_moves[top]); | 172 | 127k | state[top] = State::Done; | 173 | 127k | stack.pop(); | 174 | 140k | while let Some(top) = stack.pop() { | 175 | 12.9k | ret.push(self.parallel_moves[top]); | 176 | 12.9k | state[top] = State::Done; | 177 | 12.9k | } | 178 | 13.2k | } else if state[next] == State::ToDo { | 179 | 13.0k | stack.push(next); | 180 | 13.0k | state[next] = State::Pending; | 181 | 13.0k | } else { | 182 | | // Found a cycle -- emit a cyclic-move sequence | 183 | | // for the cycle on the top of stack, then normal | 184 | | // moves below it. Recall that these moves will be | 185 | | // reversed in sequence, so from the original | 186 | | // parallel move set | 187 | | // | 188 | | // { B := A, C := B, A := B } | 189 | | // | 190 | | // we will generate something like: | 191 | | // | 192 | | // A := scratch | 193 | | // B := A | 194 | | // C := B | 195 | | // scratch := C | 196 | | // | 197 | | // which will become: | 198 | | // | 199 | | // scratch := C | 200 | | // C := B | 201 | | // B := A | 202 | | // A := scratch | 203 | 155 | debug_assert_ne!(top, next); | 204 | 155 | state[top] = State::Done; | 205 | 155 | stack.pop(); | 206 | | | 207 | 155 | let (scratch_src, dst, dst_t) = self.parallel_moves[top]; | 208 | 155 | scratch_used = true; | 209 | | | 210 | 155 | ret.push((Allocation::none(), dst, dst_t)); | 211 | 157 | while let Some(move_idx) = stack.pop() { | 212 | 157 | state[move_idx] = State::Done; | 213 | 157 | ret.push(self.parallel_moves[move_idx]); | 214 | | | 215 | 157 | if move_idx == next { | 216 | 155 | break; | 217 | 2 | } | 218 | | } | 219 | 155 | ret.push((scratch_src, Allocation::none(), T::default())); | 220 | | } | 221 | | } | 222 | | } | 223 | | | 224 | 66.3k | ret.reverse(); | 225 | | | 226 | 66.3k | if scratch_used { | 227 | 155 | MoveVecWithScratch::Scratch(ret) | 228 | | } else { | 229 | 66.1k | MoveVecWithScratch::NoScratch(ret) | 230 | | } | 231 | 6.43M | } |
|
232 | | } |
233 | | |
234 | | impl<T> MoveVecWithScratch<T> { |
235 | | /// Fills in the scratch space, if needed, with the given |
236 | | /// register/allocation and returns a final list of moves. The |
237 | | /// scratch register must not occur anywhere in the parallel-move |
238 | | /// problem given to the resolver that produced this |
239 | | /// `MoveVecWithScratch`. |
240 | 155 | pub fn with_scratch(self, scratch: Allocation) -> MoveVec<T> { |
241 | 155 | match self { |
242 | 0 | MoveVecWithScratch::NoScratch(moves) => moves, |
243 | 155 | MoveVecWithScratch::Scratch(mut moves) => { |
244 | 494 | for (src, dst, _) in &mut moves { |
245 | 494 | debug_assert!( |
246 | 0 | *src != scratch && *dst != scratch, |
247 | | "Scratch register should not also be an actual source or dest of moves" |
248 | | ); |
249 | 494 | debug_assert!( |
250 | 0 | !(src.is_none() && dst.is_none()), |
251 | | "Move resolution should not have produced a scratch-to-scratch move" |
252 | | ); |
253 | 494 | if src.is_none() { |
254 | 155 | *src = scratch; |
255 | 339 | } |
256 | 494 | if dst.is_none() { |
257 | 155 | *dst = scratch; |
258 | 339 | } |
259 | | } |
260 | 155 | moves |
261 | | } |
262 | | } |
263 | 155 | } Unexecuted instantiation: <regalloc2::moves::MoveVecWithScratch<core::option::Option<regalloc2::VReg>>>::with_scratch Unexecuted instantiation: <regalloc2::moves::MoveVecWithScratch<_>>::with_scratch <regalloc2::moves::MoveVecWithScratch<core::option::Option<regalloc2::VReg>>>::with_scratch Line | Count | Source | 240 | 155 | pub fn with_scratch(self, scratch: Allocation) -> MoveVec<T> { | 241 | 155 | match self { | 242 | 0 | MoveVecWithScratch::NoScratch(moves) => moves, | 243 | 155 | MoveVecWithScratch::Scratch(mut moves) => { | 244 | 494 | for (src, dst, _) in &mut moves { | 245 | 494 | debug_assert!( | 246 | 0 | *src != scratch && *dst != scratch, | 247 | | "Scratch register should not also be an actual source or dest of moves" | 248 | | ); | 249 | 494 | debug_assert!( | 250 | 0 | !(src.is_none() && dst.is_none()), | 251 | | "Move resolution should not have produced a scratch-to-scratch move" | 252 | | ); | 253 | 494 | if src.is_none() { | 254 | 155 | *src = scratch; | 255 | 339 | } | 256 | 494 | if dst.is_none() { | 257 | 155 | *dst = scratch; | 258 | 339 | } | 259 | | } | 260 | 155 | moves | 261 | | } | 262 | | } | 263 | 155 | } |
|
264 | | |
265 | | /// Unwrap without a scratch register. |
266 | 8.35M | pub fn without_scratch(self) -> Option<MoveVec<T>> { |
267 | 8.35M | match self { |
268 | 8.35M | MoveVecWithScratch::NoScratch(moves) => Some(moves), |
269 | 0 | MoveVecWithScratch::Scratch(..) => None, |
270 | | } |
271 | 8.35M | } <regalloc2::moves::MoveVecWithScratch<core::option::Option<regalloc2::VReg>>>::without_scratch Line | Count | Source | 266 | 1.91M | pub fn without_scratch(self) -> Option<MoveVec<T>> { | 267 | 1.91M | match self { | 268 | 1.91M | MoveVecWithScratch::NoScratch(moves) => Some(moves), | 269 | 0 | MoveVecWithScratch::Scratch(..) => None, | 270 | | } | 271 | 1.91M | } |
Unexecuted instantiation: <regalloc2::moves::MoveVecWithScratch<_>>::without_scratch <regalloc2::moves::MoveVecWithScratch<core::option::Option<regalloc2::VReg>>>::without_scratch Line | Count | Source | 266 | 6.43M | pub fn without_scratch(self) -> Option<MoveVec<T>> { | 267 | 6.43M | match self { | 268 | 6.43M | MoveVecWithScratch::NoScratch(moves) => Some(moves), | 269 | 0 | MoveVecWithScratch::Scratch(..) => None, | 270 | | } | 271 | 6.43M | } |
|
272 | | |
273 | | /// Do we need a scratch register? |
274 | 8.35M | pub fn needs_scratch(&self) -> bool { |
275 | 8.35M | match self { |
276 | 8.35M | MoveVecWithScratch::NoScratch(..) => false, |
277 | 155 | MoveVecWithScratch::Scratch(..) => true, |
278 | | } |
279 | 8.35M | } <regalloc2::moves::MoveVecWithScratch<core::option::Option<regalloc2::VReg>>>::needs_scratch Line | Count | Source | 274 | 1.91M | pub fn needs_scratch(&self) -> bool { | 275 | 1.91M | match self { | 276 | 1.91M | MoveVecWithScratch::NoScratch(..) => false, | 277 | 0 | MoveVecWithScratch::Scratch(..) => true, | 278 | | } | 279 | 1.91M | } |
Unexecuted instantiation: <regalloc2::moves::MoveVecWithScratch<_>>::needs_scratch <regalloc2::moves::MoveVecWithScratch<core::option::Option<regalloc2::VReg>>>::needs_scratch Line | Count | Source | 274 | 6.43M | pub fn needs_scratch(&self) -> bool { | 275 | 6.43M | match self { | 276 | 6.43M | MoveVecWithScratch::NoScratch(..) => false, | 277 | 155 | MoveVecWithScratch::Scratch(..) => true, | 278 | | } | 279 | 6.43M | } |
|
280 | | } |
281 | | |
282 | | /// Final stage of move resolution: finding or using scratch |
283 | | /// registers, creating them if necessary by using stackslots, and |
284 | | /// ensuring that the final list of moves contains no stack-to-stack |
285 | | /// moves. |
286 | | /// |
287 | | /// The resolved list of moves may need one or two scratch registers, |
288 | | /// and maybe a stackslot, to ensure these conditions. Our general |
289 | | /// strategy is in two steps. |
290 | | /// |
291 | | /// First, we find a scratch register, so we only have to worry about |
292 | | /// a list of moves, all with real locations as src and dest. If we're |
293 | | /// lucky and there are any registers not allocated at this |
294 | | /// program-point, we can use a real register. Otherwise, we use an |
295 | | /// extra stackslot. This is fine, because at this step, |
296 | | /// stack-to-stack moves are OK. |
297 | | /// |
298 | | /// Then, we resolve stack-to-stack moves into stack-to-reg / |
299 | | /// reg-to-stack pairs. For this, we try to allocate a second free |
300 | | /// register. If unavailable, we create a new scratch stackslot to |
301 | | /// serve as a backup of one of the in-use registers, then borrow that |
302 | | /// register as the scratch register in the middle of stack-to-stack |
303 | | /// moves. |
304 | | pub struct MoveAndScratchResolver<GetReg, GetStackSlot, IsStackAlloc> |
305 | | where |
306 | | GetReg: FnMut() -> Option<Allocation>, |
307 | | GetStackSlot: FnMut() -> Allocation, |
308 | | IsStackAlloc: Fn(Allocation) -> bool, |
309 | | { |
310 | | /// Closure that finds us a PReg at the current location. |
311 | | pub find_free_reg: GetReg, |
312 | | /// Closure that gets us a stackslot, if needed. |
313 | | pub get_stackslot: GetStackSlot, |
314 | | /// Closure to determine whether an `Allocation` refers to a stack slot. |
315 | | pub is_stack_alloc: IsStackAlloc, |
316 | | /// Use this register if no free register is available to use as a |
317 | | /// temporary in stack-to-stack moves. If we do use this register |
318 | | /// for that purpose, its value will be restored by the end of the |
319 | | /// move sequence. Provided by caller and statically chosen. This is |
320 | | /// a very last-ditch option, so static choice is OK. |
321 | | pub borrowed_scratch_reg: PReg, |
322 | | } |
323 | | |
324 | | impl<GetReg, GetStackSlot, IsStackAlloc> MoveAndScratchResolver<GetReg, GetStackSlot, IsStackAlloc> |
325 | | where |
326 | | GetReg: FnMut() -> Option<Allocation>, |
327 | | GetStackSlot: FnMut() -> Allocation, |
328 | | IsStackAlloc: Fn(Allocation) -> bool, |
329 | | { |
330 | 8.35M | pub fn compute<T: Debug + Default + Copy>( |
331 | 8.35M | mut self, |
332 | 8.35M | moves: MoveVecWithScratch<T>, |
333 | 8.35M | ) -> MoveVec<T> { |
334 | 8.35M | let moves = if moves.needs_scratch() { |
335 | | // Now, find a scratch allocation in order to resolve cycles. |
336 | 155 | let scratch = (self.find_free_reg)().unwrap_or_else(|| (self.get_stackslot)()); Unexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#1}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#2}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#3}>>::compute::<core::option::Option<regalloc2::VReg>>::{closure#0}Unexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#1}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#2}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#3}>>::compute::<core::option::Option<regalloc2::VReg>>::{closure#0}Unexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#1}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#2}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#3}>>::compute::<core::option::Option<regalloc2::VReg>>::{closure#0}Unexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::process_branch::{closure#1}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::process_branch::{closure#2}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::process_branch::{closure#3}>>::compute::<core::option::Option<regalloc2::VReg>>::{closure#0}Unexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::process_branch::{closure#1}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::process_branch::{closure#2}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::process_branch::{closure#3}>>::compute::<core::option::Option<regalloc2::VReg>>::{closure#0}Unexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::process_branch::{closure#1}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::process_branch::{closure#2}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::process_branch::{closure#3}>>::compute::<core::option::Option<regalloc2::VReg>>::{closure#0}Unexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<_, _, _>>::compute::<_>::{closure#0}Unexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#1}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#2}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#3}>>::compute::<core::option::Option<regalloc2::VReg>>::{closure#0}Unexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#1}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#2}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#3}>>::compute::<core::option::Option<regalloc2::VReg>>::{closure#0}Unexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#1}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#2}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#3}>>::compute::<core::option::Option<regalloc2::VReg>>::{closure#0}Unexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::process_branch::{closure#1}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::process_branch::{closure#2}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::process_branch::{closure#3}>>::compute::<core::option::Option<regalloc2::VReg>>::{closure#0}Unexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::process_branch::{closure#1}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::process_branch::{closure#2}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::process_branch::{closure#3}>>::compute::<core::option::Option<regalloc2::VReg>>::{closure#0}Unexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::process_branch::{closure#1}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::process_branch::{closure#2}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::process_branch::{closure#3}>>::compute::<core::option::Option<regalloc2::VReg>>::{closure#0} |
337 | 155 | trace!("scratch resolver: scratch alloc {:?}", scratch); |
338 | | |
339 | 155 | moves.with_scratch(scratch) |
340 | | } else { |
341 | 8.35M | moves.without_scratch().unwrap() |
342 | | }; |
343 | | |
344 | | // Do we have any stack-to-stack moves? Fast return if not. |
345 | 8.35M | let stack_to_stack = moves |
346 | 8.35M | .iter() |
347 | 8.35M | .any(|&(src, dst, _)| self.is_stack_to_stack_move(src, dst)); <regalloc2::moves::MoveAndScratchResolver<<regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#1}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#2}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#3}>>::compute::<core::option::Option<regalloc2::VReg>>::{closure#1}Line | Count | Source | 347 | 805k | .any(|&(src, dst, _)| self.is_stack_to_stack_move(src, dst)); |
Unexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#1}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#2}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#3}>>::compute::<core::option::Option<regalloc2::VReg>>::{closure#1}Unexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#1}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#2}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#3}>>::compute::<core::option::Option<regalloc2::VReg>>::{closure#1}Unexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::process_branch::{closure#1}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::process_branch::{closure#2}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::process_branch::{closure#3}>>::compute::<core::option::Option<regalloc2::VReg>>::{closure#1}Unexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::process_branch::{closure#1}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::process_branch::{closure#2}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::process_branch::{closure#3}>>::compute::<core::option::Option<regalloc2::VReg>>::{closure#1}Unexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::process_branch::{closure#1}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::process_branch::{closure#2}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::process_branch::{closure#3}>>::compute::<core::option::Option<regalloc2::VReg>>::{closure#1}Unexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<_, _, _>>::compute::<_>::{closure#1}<regalloc2::moves::MoveAndScratchResolver<<regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#1}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#2}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#3}>>::compute::<core::option::Option<regalloc2::VReg>>::{closure#1}Line | Count | Source | 347 | 2.72M | .any(|&(src, dst, _)| self.is_stack_to_stack_move(src, dst)); |
Unexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#1}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#2}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#3}>>::compute::<core::option::Option<regalloc2::VReg>>::{closure#1}Unexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#1}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#2}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#3}>>::compute::<core::option::Option<regalloc2::VReg>>::{closure#1}Unexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::process_branch::{closure#1}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::process_branch::{closure#2}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::process_branch::{closure#3}>>::compute::<core::option::Option<regalloc2::VReg>>::{closure#1}Unexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::process_branch::{closure#1}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::process_branch::{closure#2}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::process_branch::{closure#3}>>::compute::<core::option::Option<regalloc2::VReg>>::{closure#1}Unexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::process_branch::{closure#1}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::process_branch::{closure#2}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::process_branch::{closure#3}>>::compute::<core::option::Option<regalloc2::VReg>>::{closure#1} |
348 | 8.35M | if !stack_to_stack { |
349 | 8.35M | return moves; |
350 | 520 | } |
351 | | |
352 | | // Allocate a scratch register for stack-to-stack move expansion. |
353 | 520 | let (scratch_reg, save_slot) = if let Some(reg) = (self.find_free_reg)() { |
354 | 513 | trace!( |
355 | | "scratch resolver: have free stack-to-stack scratch preg: {:?}", |
356 | | reg |
357 | | ); |
358 | 513 | (reg, None) |
359 | | } else { |
360 | 7 | let reg = Allocation::reg(self.borrowed_scratch_reg); |
361 | | // Stackslot into which we need to save the stack-to-stack |
362 | | // scratch reg before doing any stack-to-stack moves, if we stole |
363 | | // the reg. |
364 | 7 | let save = (self.get_stackslot)(); |
365 | 7 | trace!( |
366 | | "scratch resolver: stack-to-stack borrowing {:?} with save stackslot {:?}", |
367 | | reg, |
368 | | save |
369 | | ); |
370 | 7 | (reg, Some(save)) |
371 | | }; |
372 | | |
373 | | // Mutually exclusive flags for whether either scratch_reg or |
374 | | // save_slot need to be restored from the other. Initially, |
375 | | // scratch_reg has a value we should preserve and save_slot |
376 | | // has garbage. |
377 | 520 | let mut scratch_dirty = false; |
378 | 520 | let mut save_dirty = true; |
379 | | |
380 | 520 | let mut result = smallvec![]; |
381 | 1.12k | for &(src, dst, data) in &moves { |
382 | | // Do we have a stack-to-stack move? If so, resolve. |
383 | 1.12k | if self.is_stack_to_stack_move(src, dst) { |
384 | 626 | trace!("scratch resolver: stack to stack: {:?} -> {:?}", src, dst); |
385 | | |
386 | | // If the selected scratch register is stolen from the |
387 | | // set of in-use registers, then we need to save the |
388 | | // current contents of the scratch register before using |
389 | | // it as a temporary. |
390 | 626 | if let Some(save_slot) = save_slot { |
391 | | // However we may have already done so for an earlier |
392 | | // stack-to-stack move in which case we don't need |
393 | | // to do it again. |
394 | 15 | if save_dirty { |
395 | 7 | debug_assert!(!scratch_dirty); |
396 | 7 | result.push((scratch_reg, save_slot, T::default())); |
397 | 7 | save_dirty = false; |
398 | 8 | } |
399 | 611 | } |
400 | | |
401 | | // We can't move directly from one stack slot to another |
402 | | // on any architecture we care about, so stack-to-stack |
403 | | // moves must go via a scratch register. |
404 | 626 | result.push((src, scratch_reg, data)); |
405 | 626 | result.push((scratch_reg, dst, data)); |
406 | 626 | scratch_dirty = true; |
407 | | } else { |
408 | | // This is not a stack-to-stack move, but we need to |
409 | | // make sure that the scratch register is in the correct |
410 | | // state if this move interacts with that register. |
411 | 498 | if src == scratch_reg && scratch_dirty { |
412 | | // We're copying from the scratch register so if |
413 | | // it was stolen for a stack-to-stack move then we |
414 | | // need to make sure it has the correct contents, |
415 | | // not whatever was temporarily copied into it. If |
416 | | // we got scratch_reg from find_free_reg then it |
417 | | // had better not have been used as the source of |
418 | | // a move. So if we're here it's because we fell |
419 | | // back to the caller-provided last-resort scratch |
420 | | // register, and we must therefore have a save-slot |
421 | | // allocated too. |
422 | 0 | debug_assert!(!save_dirty); |
423 | 0 | let save_slot = save_slot.expect("move source should not be a free register"); |
424 | 0 | result.push((save_slot, scratch_reg, T::default())); |
425 | 0 | scratch_dirty = false; |
426 | 498 | } |
427 | 498 | if dst == scratch_reg { |
428 | 5 | // We are writing something to the scratch register |
429 | 5 | // so it doesn't matter what was there before. We |
430 | 5 | // can avoid restoring it, but we will need to save |
431 | 5 | // it again before the next stack-to-stack move. |
432 | 5 | scratch_dirty = false; |
433 | 5 | save_dirty = true; |
434 | 493 | } |
435 | 498 | result.push((src, dst, data)); |
436 | | } |
437 | | } |
438 | | |
439 | | // Now that all the stack-to-stack moves are done, restore the |
440 | | // scratch register if necessary. |
441 | 520 | if let Some(save_slot) = save_slot { |
442 | 7 | if scratch_dirty { |
443 | 7 | debug_assert!(!save_dirty); |
444 | 7 | result.push((save_slot, scratch_reg, T::default())); |
445 | 0 | } |
446 | 513 | } |
447 | | |
448 | 520 | trace!("scratch resolver: got {:?}", result); |
449 | 520 | result |
450 | 8.35M | } <regalloc2::moves::MoveAndScratchResolver<<regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#1}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#2}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#3}>>::compute::<core::option::Option<regalloc2::VReg>>Line | Count | Source | 330 | 1.91M | pub fn compute<T: Debug + Default + Copy>( | 331 | 1.91M | mut self, | 332 | 1.91M | moves: MoveVecWithScratch<T>, | 333 | 1.91M | ) -> MoveVec<T> { | 334 | 1.91M | let moves = if moves.needs_scratch() { | 335 | | // Now, find a scratch allocation in order to resolve cycles. | 336 | 0 | let scratch = (self.find_free_reg)().unwrap_or_else(|| (self.get_stackslot)()); | 337 | 0 | trace!("scratch resolver: scratch alloc {:?}", scratch); | 338 | | | 339 | 0 | moves.with_scratch(scratch) | 340 | | } else { | 341 | 1.91M | moves.without_scratch().unwrap() | 342 | | }; | 343 | | | 344 | | // Do we have any stack-to-stack moves? Fast return if not. | 345 | 1.91M | let stack_to_stack = moves | 346 | 1.91M | .iter() | 347 | 1.91M | .any(|&(src, dst, _)| self.is_stack_to_stack_move(src, dst)); | 348 | 1.91M | if !stack_to_stack { | 349 | 1.91M | return moves; | 350 | 34 | } | 351 | | | 352 | | // Allocate a scratch register for stack-to-stack move expansion. | 353 | 34 | let (scratch_reg, save_slot) = if let Some(reg) = (self.find_free_reg)() { | 354 | 34 | trace!( | 355 | | "scratch resolver: have free stack-to-stack scratch preg: {:?}", | 356 | | reg | 357 | | ); | 358 | 34 | (reg, None) | 359 | | } else { | 360 | 0 | let reg = Allocation::reg(self.borrowed_scratch_reg); | 361 | | // Stackslot into which we need to save the stack-to-stack | 362 | | // scratch reg before doing any stack-to-stack moves, if we stole | 363 | | // the reg. | 364 | 0 | let save = (self.get_stackslot)(); | 365 | 0 | trace!( | 366 | | "scratch resolver: stack-to-stack borrowing {:?} with save stackslot {:?}", | 367 | | reg, | 368 | | save | 369 | | ); | 370 | 0 | (reg, Some(save)) | 371 | | }; | 372 | | | 373 | | // Mutually exclusive flags for whether either scratch_reg or | 374 | | // save_slot need to be restored from the other. Initially, | 375 | | // scratch_reg has a value we should preserve and save_slot | 376 | | // has garbage. | 377 | 34 | let mut scratch_dirty = false; | 378 | 34 | let mut save_dirty = true; | 379 | | | 380 | 34 | let mut result = smallvec![]; | 381 | 98 | for &(src, dst, data) in &moves { | 382 | | // Do we have a stack-to-stack move? If so, resolve. | 383 | 98 | if self.is_stack_to_stack_move(src, dst) { | 384 | 58 | trace!("scratch resolver: stack to stack: {:?} -> {:?}", src, dst); | 385 | | | 386 | | // If the selected scratch register is stolen from the | 387 | | // set of in-use registers, then we need to save the | 388 | | // current contents of the scratch register before using | 389 | | // it as a temporary. | 390 | 58 | if let Some(save_slot) = save_slot { | 391 | | // However we may have already done so for an earlier | 392 | | // stack-to-stack move in which case we don't need | 393 | | // to do it again. | 394 | 0 | if save_dirty { | 395 | 0 | debug_assert!(!scratch_dirty); | 396 | 0 | result.push((scratch_reg, save_slot, T::default())); | 397 | 0 | save_dirty = false; | 398 | 0 | } | 399 | 58 | } | 400 | | | 401 | | // We can't move directly from one stack slot to another | 402 | | // on any architecture we care about, so stack-to-stack | 403 | | // moves must go via a scratch register. | 404 | 58 | result.push((src, scratch_reg, data)); | 405 | 58 | result.push((scratch_reg, dst, data)); | 406 | 58 | scratch_dirty = true; | 407 | | } else { | 408 | | // This is not a stack-to-stack move, but we need to | 409 | | // make sure that the scratch register is in the correct | 410 | | // state if this move interacts with that register. | 411 | 40 | if src == scratch_reg && scratch_dirty { | 412 | | // We're copying from the scratch register so if | 413 | | // it was stolen for a stack-to-stack move then we | 414 | | // need to make sure it has the correct contents, | 415 | | // not whatever was temporarily copied into it. If | 416 | | // we got scratch_reg from find_free_reg then it | 417 | | // had better not have been used as the source of | 418 | | // a move. So if we're here it's because we fell | 419 | | // back to the caller-provided last-resort scratch | 420 | | // register, and we must therefore have a save-slot | 421 | | // allocated too. | 422 | 0 | debug_assert!(!save_dirty); | 423 | 0 | let save_slot = save_slot.expect("move source should not be a free register"); | 424 | 0 | result.push((save_slot, scratch_reg, T::default())); | 425 | 0 | scratch_dirty = false; | 426 | 40 | } | 427 | 40 | if dst == scratch_reg { | 428 | 0 | // We are writing something to the scratch register | 429 | 0 | // so it doesn't matter what was there before. We | 430 | 0 | // can avoid restoring it, but we will need to save | 431 | 0 | // it again before the next stack-to-stack move. | 432 | 0 | scratch_dirty = false; | 433 | 0 | save_dirty = true; | 434 | 40 | } | 435 | 40 | result.push((src, dst, data)); | 436 | | } | 437 | | } | 438 | | | 439 | | // Now that all the stack-to-stack moves are done, restore the | 440 | | // scratch register if necessary. | 441 | 34 | if let Some(save_slot) = save_slot { | 442 | 0 | if scratch_dirty { | 443 | 0 | debug_assert!(!save_dirty); | 444 | 0 | result.push((save_slot, scratch_reg, T::default())); | 445 | 0 | } | 446 | 34 | } | 447 | | | 448 | 34 | trace!("scratch resolver: got {:?}", result); | 449 | 34 | result | 450 | 1.91M | } |
Unexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#1}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#2}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#3}>>::compute::<core::option::Option<regalloc2::VReg>>Unexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#1}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#2}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#3}>>::compute::<core::option::Option<regalloc2::VReg>>Unexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::process_branch::{closure#1}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::process_branch::{closure#2}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::process_branch::{closure#3}>>::compute::<core::option::Option<regalloc2::VReg>>Unexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::process_branch::{closure#1}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::process_branch::{closure#2}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::process_branch::{closure#3}>>::compute::<core::option::Option<regalloc2::VReg>>Unexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::process_branch::{closure#1}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::process_branch::{closure#2}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::process_branch::{closure#3}>>::compute::<core::option::Option<regalloc2::VReg>>Unexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<_, _, _>>::compute::<_> <regalloc2::moves::MoveAndScratchResolver<<regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#1}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#2}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#3}>>::compute::<core::option::Option<regalloc2::VReg>>Line | Count | Source | 330 | 6.43M | pub fn compute<T: Debug + Default + Copy>( | 331 | 6.43M | mut self, | 332 | 6.43M | moves: MoveVecWithScratch<T>, | 333 | 6.43M | ) -> MoveVec<T> { | 334 | 6.43M | let moves = if moves.needs_scratch() { | 335 | | // Now, find a scratch allocation in order to resolve cycles. | 336 | 155 | let scratch = (self.find_free_reg)().unwrap_or_else(|| (self.get_stackslot)()); | 337 | 155 | trace!("scratch resolver: scratch alloc {:?}", scratch); | 338 | | | 339 | 155 | moves.with_scratch(scratch) | 340 | | } else { | 341 | 6.43M | moves.without_scratch().unwrap() | 342 | | }; | 343 | | | 344 | | // Do we have any stack-to-stack moves? Fast return if not. | 345 | 6.43M | let stack_to_stack = moves | 346 | 6.43M | .iter() | 347 | 6.43M | .any(|&(src, dst, _)| self.is_stack_to_stack_move(src, dst)); | 348 | 6.43M | if !stack_to_stack { | 349 | 6.43M | return moves; | 350 | 486 | } | 351 | | | 352 | | // Allocate a scratch register for stack-to-stack move expansion. | 353 | 486 | let (scratch_reg, save_slot) = if let Some(reg) = (self.find_free_reg)() { | 354 | 479 | trace!( | 355 | | "scratch resolver: have free stack-to-stack scratch preg: {:?}", | 356 | | reg | 357 | | ); | 358 | 479 | (reg, None) | 359 | | } else { | 360 | 7 | let reg = Allocation::reg(self.borrowed_scratch_reg); | 361 | | // Stackslot into which we need to save the stack-to-stack | 362 | | // scratch reg before doing any stack-to-stack moves, if we stole | 363 | | // the reg. | 364 | 7 | let save = (self.get_stackslot)(); | 365 | 7 | trace!( | 366 | | "scratch resolver: stack-to-stack borrowing {:?} with save stackslot {:?}", | 367 | | reg, | 368 | | save | 369 | | ); | 370 | 7 | (reg, Some(save)) | 371 | | }; | 372 | | | 373 | | // Mutually exclusive flags for whether either scratch_reg or | 374 | | // save_slot need to be restored from the other. Initially, | 375 | | // scratch_reg has a value we should preserve and save_slot | 376 | | // has garbage. | 377 | 486 | let mut scratch_dirty = false; | 378 | 486 | let mut save_dirty = true; | 379 | | | 380 | 486 | let mut result = smallvec![]; | 381 | 1.02k | for &(src, dst, data) in &moves { | 382 | | // Do we have a stack-to-stack move? If so, resolve. | 383 | 1.02k | if self.is_stack_to_stack_move(src, dst) { | 384 | 568 | trace!("scratch resolver: stack to stack: {:?} -> {:?}", src, dst); | 385 | | | 386 | | // If the selected scratch register is stolen from the | 387 | | // set of in-use registers, then we need to save the | 388 | | // current contents of the scratch register before using | 389 | | // it as a temporary. | 390 | 568 | if let Some(save_slot) = save_slot { | 391 | | // However we may have already done so for an earlier | 392 | | // stack-to-stack move in which case we don't need | 393 | | // to do it again. | 394 | 15 | if save_dirty { | 395 | 7 | debug_assert!(!scratch_dirty); | 396 | 7 | result.push((scratch_reg, save_slot, T::default())); | 397 | 7 | save_dirty = false; | 398 | 8 | } | 399 | 553 | } | 400 | | | 401 | | // We can't move directly from one stack slot to another | 402 | | // on any architecture we care about, so stack-to-stack | 403 | | // moves must go via a scratch register. | 404 | 568 | result.push((src, scratch_reg, data)); | 405 | 568 | result.push((scratch_reg, dst, data)); | 406 | 568 | scratch_dirty = true; | 407 | | } else { | 408 | | // This is not a stack-to-stack move, but we need to | 409 | | // make sure that the scratch register is in the correct | 410 | | // state if this move interacts with that register. | 411 | 458 | if src == scratch_reg && scratch_dirty { | 412 | | // We're copying from the scratch register so if | 413 | | // it was stolen for a stack-to-stack move then we | 414 | | // need to make sure it has the correct contents, | 415 | | // not whatever was temporarily copied into it. If | 416 | | // we got scratch_reg from find_free_reg then it | 417 | | // had better not have been used as the source of | 418 | | // a move. So if we're here it's because we fell | 419 | | // back to the caller-provided last-resort scratch | 420 | | // register, and we must therefore have a save-slot | 421 | | // allocated too. | 422 | 0 | debug_assert!(!save_dirty); | 423 | 0 | let save_slot = save_slot.expect("move source should not be a free register"); | 424 | 0 | result.push((save_slot, scratch_reg, T::default())); | 425 | 0 | scratch_dirty = false; | 426 | 458 | } | 427 | 458 | if dst == scratch_reg { | 428 | 5 | // We are writing something to the scratch register | 429 | 5 | // so it doesn't matter what was there before. We | 430 | 5 | // can avoid restoring it, but we will need to save | 431 | 5 | // it again before the next stack-to-stack move. | 432 | 5 | scratch_dirty = false; | 433 | 5 | save_dirty = true; | 434 | 453 | } | 435 | 458 | result.push((src, dst, data)); | 436 | | } | 437 | | } | 438 | | | 439 | | // Now that all the stack-to-stack moves are done, restore the | 440 | | // scratch register if necessary. | 441 | 486 | if let Some(save_slot) = save_slot { | 442 | 7 | if scratch_dirty { | 443 | 7 | debug_assert!(!save_dirty); | 444 | 7 | result.push((save_slot, scratch_reg, T::default())); | 445 | 0 | } | 446 | 479 | } | 447 | | | 448 | 486 | trace!("scratch resolver: got {:?}", result); | 449 | 486 | result | 450 | 6.43M | } |
Unexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#1}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#2}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#3}>>::compute::<core::option::Option<regalloc2::VReg>>Unexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#1}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#2}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#3}>>::compute::<core::option::Option<regalloc2::VReg>>Unexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::process_branch::{closure#1}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::process_branch::{closure#2}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::process_branch::{closure#3}>>::compute::<core::option::Option<regalloc2::VReg>>Unexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::process_branch::{closure#1}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::process_branch::{closure#2}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::process_branch::{closure#3}>>::compute::<core::option::Option<regalloc2::VReg>>Unexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::process_branch::{closure#1}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::process_branch::{closure#2}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::process_branch::{closure#3}>>::compute::<core::option::Option<regalloc2::VReg>> |
451 | | |
452 | 3.53M | fn is_stack_to_stack_move(&self, src: Allocation, dst: Allocation) -> bool { |
453 | 3.53M | (self.is_stack_alloc)(src) && (self.is_stack_alloc)(dst) |
454 | 3.53M | } <regalloc2::moves::MoveAndScratchResolver<<regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#1}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#2}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#3}>>::is_stack_to_stack_moveLine | Count | Source | 452 | 805k | fn is_stack_to_stack_move(&self, src: Allocation, dst: Allocation) -> bool { | 453 | 805k | (self.is_stack_alloc)(src) && (self.is_stack_alloc)(dst) | 454 | 805k | } |
Unexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#1}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#2}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#3}>>::is_stack_to_stack_moveUnexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#1}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#2}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#3}>>::is_stack_to_stack_moveUnexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::process_branch::{closure#1}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::process_branch::{closure#2}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::process_branch::{closure#3}>>::is_stack_to_stack_moveUnexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::process_branch::{closure#1}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::process_branch::{closure#2}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::process_branch::{closure#3}>>::is_stack_to_stack_moveUnexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::process_branch::{closure#1}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::process_branch::{closure#2}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::process_branch::{closure#3}>>::is_stack_to_stack_moveUnexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<_, _, _>>::is_stack_to_stack_move <regalloc2::moves::MoveAndScratchResolver<<regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#1}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#2}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#3}>>::is_stack_to_stack_moveLine | Count | Source | 452 | 2.72M | fn is_stack_to_stack_move(&self, src: Allocation, dst: Allocation) -> bool { | 453 | 2.72M | (self.is_stack_alloc)(src) && (self.is_stack_alloc)(dst) | 454 | 2.72M | } |
Unexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#1}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#2}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#3}>>::is_stack_to_stack_moveUnexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#1}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#2}, <regalloc2::ion::data_structures::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::resolve_inserted_moves::{closure#3}>>::is_stack_to_stack_moveUnexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::process_branch::{closure#1}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::process_branch::{closure#2}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::x64::lower::isle::generated_code::MInst>>>::process_branch::{closure#3}>>::is_stack_to_stack_moveUnexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::process_branch::{closure#1}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::process_branch::{closure#2}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::aarch64::lower::isle::generated_code::MInst>>>::process_branch::{closure#3}>>::is_stack_to_stack_moveUnexecuted instantiation: <regalloc2::moves::MoveAndScratchResolver<<regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::process_branch::{closure#1}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::process_branch::{closure#2}, <regalloc2::fastalloc::Env<cranelift_codegen::machinst::vcode::VCode<cranelift_codegen::isa::riscv64::lower::isle::generated_code::MInst>>>::process_branch::{closure#3}>>::is_stack_to_stack_move |
455 | | } |