/src/spirv-tools/source/opt/loop_unroller.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) 2018 Google LLC. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | |
15 | | #include "source/opt/loop_unroller.h" |
16 | | |
17 | | #include <limits> |
18 | | #include <memory> |
19 | | #include <unordered_map> |
20 | | #include <utility> |
21 | | #include <vector> |
22 | | |
23 | | #include "source/opt/ir_builder.h" |
24 | | #include "source/opt/loop_utils.h" |
25 | | |
26 | | // Implements loop util unrolling functionality for fully and partially |
27 | | // unrolling loops. Given a factor it will duplicate the loop that many times, |
28 | | // appending each one to the end of the old loop and removing backedges, to |
29 | | // create a new unrolled loop. |
30 | | // |
31 | | // 1 - User calls LoopUtils::FullyUnroll or LoopUtils::PartiallyUnroll with a |
32 | | // loop they wish to unroll. LoopUtils::CanPerformUnroll is used to |
33 | | // validate that a given loop can be unrolled. That method (along with the |
34 | | // constructor of loop) checks that the IR is in the expected canonicalised |
35 | | // format. |
36 | | // |
37 | | // 2 - The LoopUtils methods create a LoopUnrollerUtilsImpl object to actually |
38 | | // perform the unrolling. This implements helper methods to copy the loop basic |
39 | | // blocks and remap the ids of instructions used inside them. |
40 | | // |
41 | | // 3 - The core of LoopUnrollerUtilsImpl is the Unroll method, this method |
42 | | // actually performs the loop duplication. It does this by creating a |
43 | | // LoopUnrollState object and then copying the loop as given by the factor |
44 | | // parameter. The LoopUnrollState object retains the state of the unroller |
45 | | // between the loop body copies as each iteration needs information on the last |
46 | | // to adjust the phi induction variable, adjust the OpLoopMerge instruction in |
47 | | // the main loop header, and change the previous continue block to point to the |
48 | | // new header and the new continue block to the main loop header. |
49 | | // |
50 | | // 4 - If the loop is to be fully unrolled then it is simply closed after step |
51 | | // 3, with the OpLoopMerge being deleted, the backedge removed, and the |
52 | | // condition blocks folded. |
53 | | // |
54 | | // 5 - If it is being partially unrolled: if the unrolling factor leaves the |
55 | | // loop with an even number of bodies with respect to the number of loop |
56 | | // iterations then step 3 is all that is needed. If it is uneven then we need to |
57 | | // duplicate the loop completely and unroll the duplicated loop to cover the |
58 | | // residual part and adjust the first loop to cover only the "even" part. For |
59 | | // instance if you request an unroll factor of 3 on a loop with 10 iterations |
60 | | // then copying the body three times would leave you with three bodies in the |
61 | | // loop |
62 | | // where the loop still iterates over each 4 times. So we make two loops one |
63 | | // iterating once then a second loop of three iterating 3 times. |
64 | | |
65 | | namespace spvtools { |
66 | | namespace opt { |
67 | | namespace { |
68 | | |
69 | | // Loop control constant value for DontUnroll flag. |
70 | | constexpr uint32_t kLoopControlDontUnrollIndex = 2; |
71 | | |
72 | | // Operand index of the loop control parameter of the OpLoopMerge. |
73 | | constexpr uint32_t kLoopControlIndex = 2; |
74 | | |
75 | | // This utility class encapsulates some of the state we need to maintain between |
76 | | // loop unrolls. Specifically it maintains key blocks and the induction variable |
77 | | // in the current loop duplication step and the blocks from the previous one. |
78 | | // This is because each step of the unroll needs to use data from both the |
79 | | // preceding step and the original loop. |
80 | | struct LoopUnrollState { |
81 | | LoopUnrollState() |
82 | 2.18k | : previous_phi_(nullptr), |
83 | 2.18k | previous_latch_block_(nullptr), |
84 | 2.18k | previous_condition_block_(nullptr), |
85 | 2.18k | new_phi(nullptr), |
86 | 2.18k | new_continue_block(nullptr), |
87 | 2.18k | new_condition_block(nullptr), |
88 | 2.18k | new_header_block(nullptr) {} |
89 | | |
90 | | // Initialize from the loop descriptor class. |
91 | | LoopUnrollState(Instruction* induction, BasicBlock* latch_block, |
92 | | BasicBlock* condition, std::vector<Instruction*>&& phis) |
93 | 2.18k | : previous_phi_(induction), |
94 | 2.18k | previous_latch_block_(latch_block), |
95 | 2.18k | previous_condition_block_(condition), |
96 | 2.18k | new_phi(nullptr), |
97 | 2.18k | new_continue_block(nullptr), |
98 | 2.18k | new_condition_block(nullptr), |
99 | 2.18k | new_header_block(nullptr) { |
100 | 2.18k | previous_phis_ = std::move(phis); |
101 | 2.18k | } |
102 | | |
103 | | // Swap the state so that the new nodes are now the previous nodes. |
104 | 93.0k | void NextIterationState() { |
105 | 93.0k | previous_phi_ = new_phi; |
106 | 93.0k | previous_latch_block_ = new_latch_block; |
107 | 93.0k | previous_condition_block_ = new_condition_block; |
108 | 93.0k | previous_phis_ = std::move(new_phis_); |
109 | | |
110 | | // Clear new nodes. |
111 | 93.0k | new_phi = nullptr; |
112 | 93.0k | new_continue_block = nullptr; |
113 | 93.0k | new_condition_block = nullptr; |
114 | 93.0k | new_header_block = nullptr; |
115 | 93.0k | new_latch_block = nullptr; |
116 | | |
117 | | // Clear new block/instruction maps. |
118 | 93.0k | new_blocks.clear(); |
119 | 93.0k | new_inst.clear(); |
120 | 93.0k | ids_to_new_inst.clear(); |
121 | 93.0k | } |
122 | | |
123 | | // The induction variable from the immediately preceding loop body. |
124 | | Instruction* previous_phi_; |
125 | | |
126 | | // All the phi nodes from the previous loop iteration. |
127 | | std::vector<Instruction*> previous_phis_; |
128 | | |
129 | | std::vector<Instruction*> new_phis_; |
130 | | |
131 | | // The previous latch block. The backedge will be removed from this and |
132 | | // added to the new latch block. |
133 | | BasicBlock* previous_latch_block_; |
134 | | |
135 | | // The previous condition block. This may be folded to flatten the loop. |
136 | | BasicBlock* previous_condition_block_; |
137 | | |
138 | | // The new induction variable. |
139 | | Instruction* new_phi; |
140 | | |
141 | | // The new continue block. |
142 | | BasicBlock* new_continue_block; |
143 | | |
144 | | // The new condition block. |
145 | | BasicBlock* new_condition_block; |
146 | | |
147 | | // The new header block. |
148 | | BasicBlock* new_header_block; |
149 | | |
150 | | // The new latch block. |
151 | | BasicBlock* new_latch_block; |
152 | | |
153 | | // A mapping of new block ids to the original blocks which they were copied |
154 | | // from. |
155 | | std::unordered_map<uint32_t, BasicBlock*> new_blocks; |
156 | | |
157 | | // A mapping of the original instruction ids to the instruction ids to their |
158 | | // copies. |
159 | | std::unordered_map<uint32_t, uint32_t> new_inst; |
160 | | |
161 | | std::unordered_map<uint32_t, Instruction*> ids_to_new_inst; |
162 | | }; |
163 | | |
164 | | // This class implements the actual unrolling. It uses a LoopUnrollState to |
165 | | // maintain the state of the unrolling in between steps. |
166 | | class LoopUnrollerUtilsImpl { |
167 | | public: |
168 | | using BasicBlockListTy = std::vector<std::unique_ptr<BasicBlock>>; |
169 | | |
170 | | LoopUnrollerUtilsImpl(IRContext* c, Function* function) |
171 | 2.18k | : context_(c), |
172 | 2.18k | function_(*function), |
173 | 2.18k | loop_condition_block_(nullptr), |
174 | 2.18k | loop_induction_variable_(nullptr), |
175 | 2.18k | number_of_loop_iterations_(0), |
176 | 2.18k | loop_step_value_(0), |
177 | 2.18k | loop_init_value_(0) {} |
178 | | |
179 | | // Unroll the |loop| by given |factor| by copying the whole body |factor| |
180 | | // times. The resulting basicblock structure will remain a loop. |
181 | | void PartiallyUnroll(Loop*, size_t factor); |
182 | | |
183 | | // If partially unrolling the |loop| would leave the loop with too many bodies |
184 | | // for its number of iterations then this method should be used. This method |
185 | | // will duplicate the |loop| completely, making the duplicated loop the |
186 | | // successor of the original's merge block. The original loop will have its |
187 | | // condition changed to loop over the residual part and the duplicate will be |
188 | | // partially unrolled. The resulting structure will be two loops. |
189 | | void PartiallyUnrollResidualFactor(Loop* loop, size_t factor); |
190 | | |
191 | | // Fully unroll the |loop| by copying the full body by the total number of |
192 | | // loop iterations, folding all conditions, and removing the backedge from the |
193 | | // continue block to the header. |
194 | | void FullyUnroll(Loop* loop); |
195 | | |
196 | | // Get the ID of the variable in the |phi| paired with |label|. |
197 | | uint32_t GetPhiDefID(const Instruction* phi, uint32_t label) const; |
198 | | |
199 | | // Close the loop by removing the OpLoopMerge from the |loop| header block and |
200 | | // making the backedge point to the merge block. |
201 | | void CloseUnrolledLoop(Loop* loop); |
202 | | |
203 | | // Remove the OpConditionalBranch instruction inside |conditional_block| used |
204 | | // to branch to either exit or continue the loop and replace it with an |
205 | | // unconditional OpBranch to block |new_target|. |
206 | | void FoldConditionBlock(BasicBlock* condtion_block, uint32_t new_target); |
207 | | |
208 | | // Add all blocks_to_add_ to function_ at the |insert_point|. |
209 | | void AddBlocksToFunction(const BasicBlock* insert_point); |
210 | | |
211 | | // Duplicates the |old_loop|, cloning each body and remapping the ids without |
212 | | // removing instructions or changing relative structure. Result will be stored |
213 | | // in |new_loop|. |
214 | | void DuplicateLoop(Loop* old_loop, Loop* new_loop); |
215 | | |
216 | 0 | inline size_t GetLoopIterationCount() const { |
217 | 0 | return number_of_loop_iterations_; |
218 | 0 | } |
219 | | |
220 | | // Extracts the initial state information from the |loop|. |
221 | | void Init(Loop* loop); |
222 | | |
223 | | // Replace the uses of each induction variable outside the loop with the final |
224 | | // value of the induction variable before the loop exit. To reflect the proper |
225 | | // state of a fully unrolled loop. |
226 | | void ReplaceInductionUseWithFinalValue(Loop* loop); |
227 | | |
228 | | // Remove all the instructions in the invalidated_instructions_ vector. |
229 | | void RemoveDeadInstructions(); |
230 | | |
231 | | // Replace any use of induction variables outwith the loop with the final |
232 | | // value of the induction variable in the unrolled loop. |
233 | | void ReplaceOutsideLoopUseWithFinalValue(Loop* loop); |
234 | | |
235 | | // Set the LoopControl operand of the OpLoopMerge instruction to be |
236 | | // DontUnroll. |
237 | | void MarkLoopControlAsDontUnroll(Loop* loop) const; |
238 | | |
239 | | private: |
240 | | // Remap all the in |basic_block| to new IDs and keep the mapping of new ids |
241 | | // to old |
242 | | // ids. |loop| is used to identify special loop blocks (header, continue, |
243 | | // etc). |
244 | | void AssignNewResultIds(BasicBlock* basic_block); |
245 | | |
246 | | // Using the map built by AssignNewResultIds, replace the uses in |inst| |
247 | | // by the id that the use maps to. |
248 | | void RemapOperands(Instruction* inst); |
249 | | |
250 | | // Using the map built by AssignNewResultIds, for each instruction in |
251 | | // |basic_block| use |
252 | | // that map to substitute the IDs used by instructions (in the operands) with |
253 | | // the new ids. |
254 | | void RemapOperands(BasicBlock* basic_block); |
255 | | |
256 | | // Copy the whole body of the loop, all blocks dominated by the |loop| header |
257 | | // and not dominated by the |loop| merge. The copied body will be linked to by |
258 | | // the old |loop| continue block and the new body will link to the |loop| |
259 | | // header via the new continue block. |eliminate_conditions| is used to decide |
260 | | // whether or not to fold all the condition blocks other than the last one. |
261 | | void CopyBody(Loop* loop, bool eliminate_conditions); |
262 | | |
263 | | // Copy a given |block_to_copy| in the |loop| and record the mapping of the |
264 | | // old/new ids. |preserve_instructions| determines whether or not the method |
265 | | // will modify (other than result_id) instructions which are copied. |
266 | | void CopyBasicBlock(Loop* loop, const BasicBlock* block_to_copy, |
267 | | bool preserve_instructions); |
268 | | |
269 | | // The actual implementation of the unroll step. Unrolls |loop| by given |
270 | | // |factor| by copying the body by |factor| times. Also propagates the |
271 | | // induction variable value throughout the copies. |
272 | | void Unroll(Loop* loop, size_t factor); |
273 | | |
274 | | // Fills the loop_blocks_inorder_ field with the ordered list of basic blocks |
275 | | // as computed by the method ComputeLoopOrderedBlocks. |
276 | | void ComputeLoopOrderedBlocks(Loop* loop); |
277 | | |
278 | | // Adds the blocks_to_add_ to both the |loop| and to the parent of |loop| if |
279 | | // the parent exists. |
280 | | void AddBlocksToLoop(Loop* loop) const; |
281 | | |
282 | | // After the partially unroll step the phi instructions in the header block |
283 | | // will be in an illegal format. This function makes the phis legal by making |
284 | | // the edge from the latch block come from the new latch block and the value |
285 | | // to be the actual value of the phi at that point. |
286 | | void LinkLastPhisToStart(Loop* loop) const; |
287 | | |
288 | | // Kill all debug declaration instructions from |bb|. |
289 | | void KillDebugDeclares(BasicBlock* bb); |
290 | | |
291 | | // A pointer to the IRContext. Used to add/remove instructions and for usedef |
292 | | // chains. |
293 | | IRContext* context_; |
294 | | |
295 | | // A reference the function the loop is within. |
296 | | Function& function_; |
297 | | |
298 | | // A list of basic blocks to be added to the loop at the end of an unroll |
299 | | // step. |
300 | | BasicBlockListTy blocks_to_add_; |
301 | | |
302 | | // List of instructions which are now dead and can be removed. |
303 | | std::vector<Instruction*> invalidated_instructions_; |
304 | | |
305 | | // Maintains the current state of the transform between calls to unroll. |
306 | | LoopUnrollState state_; |
307 | | |
308 | | // An ordered list containing the loop basic blocks. |
309 | | std::vector<BasicBlock*> loop_blocks_inorder_; |
310 | | |
311 | | // The block containing the condition check which contains a conditional |
312 | | // branch to the merge and continue block. |
313 | | BasicBlock* loop_condition_block_; |
314 | | |
315 | | // The induction variable of the loop. |
316 | | Instruction* loop_induction_variable_; |
317 | | |
318 | | // Phis used in the loop need to be remapped to use the actual result values |
319 | | // and then be remapped at the end. |
320 | | std::vector<Instruction*> loop_phi_instructions_; |
321 | | |
322 | | // The number of loop iterations that the loop would perform pre-unroll. |
323 | | size_t number_of_loop_iterations_; |
324 | | |
325 | | // The amount that the loop steps each iteration. |
326 | | int64_t loop_step_value_; |
327 | | |
328 | | // The value the loop starts stepping from. |
329 | | int64_t loop_init_value_; |
330 | | }; |
331 | | |
332 | | /* |
333 | | * Static helper functions. |
334 | | */ |
335 | | |
336 | | // Retrieve the index of the OpPhi instruction |phi| which corresponds to the |
337 | | // incoming |block| id. |
338 | 0 | uint32_t GetPhiIndexFromLabel(const BasicBlock* block, const Instruction* phi) { |
339 | 0 | for (uint32_t i = 1; i < phi->NumInOperands(); i += 2) { |
340 | 0 | if (block->id() == phi->GetSingleWordInOperand(i)) { |
341 | 0 | return i; |
342 | 0 | } |
343 | 0 | } |
344 | 0 | assert(false && "Could not find operand in instruction."); |
345 | 0 | return 0; |
346 | 0 | } |
347 | | |
348 | 2.18k | void LoopUnrollerUtilsImpl::Init(Loop* loop) { |
349 | 2.18k | loop_condition_block_ = loop->FindConditionBlock(); |
350 | | |
351 | | // When we reinit the second loop during PartiallyUnrollResidualFactor we need |
352 | | // to use the cached value from the duplicate step as the dominator tree |
353 | | // basded solution, loop->FindConditionBlock, requires all the nodes to be |
354 | | // connected up with the correct branches. They won't be at this point. |
355 | 2.18k | if (!loop_condition_block_) { |
356 | 0 | loop_condition_block_ = state_.new_condition_block; |
357 | 0 | } |
358 | 2.18k | assert(loop_condition_block_); |
359 | | |
360 | 2.18k | loop_induction_variable_ = loop->FindConditionVariable(loop_condition_block_); |
361 | 2.18k | assert(loop_induction_variable_); |
362 | | |
363 | 2.18k | bool found = loop->FindNumberOfIterations( |
364 | 2.18k | loop_induction_variable_, &*loop_condition_block_->ctail(), |
365 | 2.18k | &number_of_loop_iterations_, &loop_step_value_, &loop_init_value_); |
366 | 2.18k | (void)found; // To silence unused variable warning on release builds. |
367 | 2.18k | assert(found); |
368 | | |
369 | | // Blocks are stored in an unordered set of ids in the loop class, we need to |
370 | | // create the dominator ordered list. |
371 | 2.18k | ComputeLoopOrderedBlocks(loop); |
372 | 2.18k | } |
373 | | |
374 | | // This function is used to partially unroll the loop when the factor provided |
375 | | // would normally lead to an illegal optimization. Instead of just unrolling the |
376 | | // loop it creates two loops and unrolls one and adjusts the condition on the |
377 | | // other. The end result being that the new loop pair iterates over the correct |
378 | | // number of bodies. |
379 | | void LoopUnrollerUtilsImpl::PartiallyUnrollResidualFactor(Loop* loop, |
380 | 0 | size_t factor) { |
381 | | // TODO(1841): Handle id overflow. |
382 | 0 | std::unique_ptr<Instruction> new_label{new Instruction( |
383 | 0 | context_, spv::Op::OpLabel, 0, context_->TakeNextId(), {})}; |
384 | 0 | std::unique_ptr<BasicBlock> new_exit_bb{new BasicBlock(std::move(new_label))}; |
385 | 0 | new_exit_bb->SetParent(&function_); |
386 | | |
387 | | // Save the id of the block before we move it. |
388 | 0 | uint32_t new_merge_id = new_exit_bb->id(); |
389 | | |
390 | | // Add the block the list of blocks to add, we want this merge block to be |
391 | | // right at the start of the new blocks. |
392 | 0 | blocks_to_add_.push_back(std::move(new_exit_bb)); |
393 | 0 | BasicBlock* new_exit_bb_raw = blocks_to_add_[0].get(); |
394 | 0 | Instruction& original_conditional_branch = *loop_condition_block_->tail(); |
395 | | // Duplicate the loop, providing access to the blocks of both loops. |
396 | | // This is a naked new due to the VS2013 requirement of not having unique |
397 | | // pointers in vectors, as it will be inserted into a vector with |
398 | | // loop_descriptor.AddLoop. |
399 | 0 | std::unique_ptr<Loop> new_loop = MakeUnique<Loop>(*loop); |
400 | | |
401 | | // Clear the basic blocks of the new loop. |
402 | 0 | new_loop->ClearBlocks(); |
403 | |
|
404 | 0 | DuplicateLoop(loop, new_loop.get()); |
405 | | |
406 | | // Add the blocks to the function. |
407 | 0 | AddBlocksToFunction(loop->GetMergeBlock()); |
408 | 0 | blocks_to_add_.clear(); |
409 | | |
410 | | // Create a new merge block for the first loop. |
411 | 0 | InstructionBuilder builder{context_, new_exit_bb_raw}; |
412 | | // Make the first loop branch to the second. |
413 | 0 | builder.AddBranch(new_loop->GetHeaderBlock()->id()); |
414 | |
|
415 | 0 | loop_condition_block_ = state_.new_condition_block; |
416 | 0 | loop_induction_variable_ = state_.new_phi; |
417 | | // Unroll the new loop by the factor with the usual -1 to account for the |
418 | | // existing block iteration. |
419 | 0 | Unroll(new_loop.get(), factor); |
420 | |
|
421 | 0 | LinkLastPhisToStart(new_loop.get()); |
422 | 0 | AddBlocksToLoop(new_loop.get()); |
423 | | |
424 | | // Add the new merge block to the back of the list of blocks to be added. It |
425 | | // needs to be the last block added to maintain dominator order in the binary. |
426 | 0 | blocks_to_add_.push_back( |
427 | 0 | std::unique_ptr<BasicBlock>(new_loop->GetMergeBlock())); |
428 | | |
429 | | // Add the blocks to the function. |
430 | 0 | AddBlocksToFunction(loop->GetMergeBlock()); |
431 | | |
432 | | // Reset the usedef analysis. |
433 | 0 | context_->InvalidateAnalysesExceptFor( |
434 | 0 | IRContext::Analysis::kAnalysisLoopAnalysis); |
435 | 0 | analysis::DefUseManager* def_use_manager = context_->get_def_use_mgr(); |
436 | | |
437 | | // The loop condition. |
438 | 0 | Instruction* condition_check = def_use_manager->GetDef( |
439 | 0 | original_conditional_branch.GetSingleWordOperand(0)); |
440 | | |
441 | | // This should have been checked by the LoopUtils::CanPerformUnroll function |
442 | | // before entering this. |
443 | 0 | assert(loop->IsSupportedCondition(condition_check->opcode())); |
444 | | |
445 | | // We need to account for the initial body when calculating the remainder. |
446 | 0 | int64_t remainder = Loop::GetResidualConditionValue( |
447 | 0 | condition_check->opcode(), loop_init_value_, loop_step_value_, |
448 | 0 | number_of_loop_iterations_, factor); |
449 | |
|
450 | 0 | assert(remainder > std::numeric_limits<int32_t>::min() && |
451 | 0 | remainder < std::numeric_limits<int32_t>::max()); |
452 | | |
453 | 0 | Instruction* new_constant = nullptr; |
454 | | |
455 | | // If the remainder is negative then we add a signed constant, otherwise just |
456 | | // add an unsigned constant. |
457 | 0 | if (remainder < 0) { |
458 | 0 | new_constant = builder.GetSintConstant(static_cast<int32_t>(remainder)); |
459 | 0 | } else { |
460 | 0 | new_constant = builder.GetUintConstant(static_cast<int32_t>(remainder)); |
461 | 0 | } |
462 | |
|
463 | 0 | uint32_t constant_id = new_constant->result_id(); |
464 | | |
465 | | // Update the condition check. |
466 | 0 | condition_check->SetInOperand(1, {constant_id}); |
467 | | |
468 | | // Update the next phi node. The phi will have a constant value coming in from |
469 | | // the preheader block. For the duplicated loop we need to update the constant |
470 | | // to be the amount of iterations covered by the first loop and the incoming |
471 | | // block to be the first loops new merge block. |
472 | 0 | std::vector<Instruction*> new_inductions; |
473 | 0 | new_loop->GetInductionVariables(new_inductions); |
474 | |
|
475 | 0 | std::vector<Instruction*> old_inductions; |
476 | 0 | loop->GetInductionVariables(old_inductions); |
477 | 0 | for (size_t index = 0; index < new_inductions.size(); ++index) { |
478 | 0 | Instruction* new_induction = new_inductions[index]; |
479 | 0 | Instruction* old_induction = old_inductions[index]; |
480 | | // Get the index of the loop initalizer, the value coming in from the |
481 | | // preheader. |
482 | 0 | uint32_t initalizer_index = |
483 | 0 | GetPhiIndexFromLabel(new_loop->GetPreHeaderBlock(), old_induction); |
484 | | |
485 | | // Replace the second loop initalizer with the phi from the first |
486 | 0 | new_induction->SetInOperand(initalizer_index - 1, |
487 | 0 | {old_induction->result_id()}); |
488 | 0 | new_induction->SetInOperand(initalizer_index, {new_merge_id}); |
489 | | |
490 | | // If the use of the first loop induction variable is outside of the loop |
491 | | // then replace that use with the second loop induction variable. |
492 | 0 | uint32_t second_loop_induction = new_induction->result_id(); |
493 | 0 | auto replace_use_outside_of_loop = [loop, second_loop_induction]( |
494 | 0 | Instruction* user, |
495 | 0 | uint32_t operand_index) { |
496 | 0 | if (!loop->IsInsideLoop(user)) { |
497 | 0 | user->SetOperand(operand_index, {second_loop_induction}); |
498 | 0 | } |
499 | 0 | }; |
500 | |
|
501 | 0 | context_->get_def_use_mgr()->ForEachUse(old_induction, |
502 | 0 | replace_use_outside_of_loop); |
503 | 0 | } |
504 | |
|
505 | 0 | context_->InvalidateAnalysesExceptFor( |
506 | 0 | IRContext::Analysis::kAnalysisLoopAnalysis); |
507 | |
|
508 | 0 | context_->ReplaceAllUsesWith(loop->GetMergeBlock()->id(), new_merge_id); |
509 | |
|
510 | 0 | LoopDescriptor& loop_descriptor = *context_->GetLoopDescriptor(&function_); |
511 | |
|
512 | 0 | loop_descriptor.AddLoop(std::move(new_loop), loop->GetParent()); |
513 | |
|
514 | 0 | RemoveDeadInstructions(); |
515 | 0 | } |
516 | | |
517 | | // Mark this loop as DontUnroll as it will already be unrolled and it may not |
518 | | // be safe to unroll a previously partially unrolled loop. |
519 | 2.18k | void LoopUnrollerUtilsImpl::MarkLoopControlAsDontUnroll(Loop* loop) const { |
520 | 2.18k | Instruction* loop_merge_inst = loop->GetHeaderBlock()->GetLoopMergeInst(); |
521 | 2.18k | assert(loop_merge_inst && |
522 | 2.18k | "Loop merge instruction could not be found after entering unroller " |
523 | 2.18k | "(should have exited before this)"); |
524 | 2.18k | loop_merge_inst->SetInOperand(kLoopControlIndex, |
525 | 2.18k | {kLoopControlDontUnrollIndex}); |
526 | 2.18k | } |
527 | | |
528 | | // Duplicate the |loop| body |factor| - 1 number of times while keeping the loop |
529 | | // backedge intact. This will leave the loop with |factor| number of bodies |
530 | | // after accounting for the initial body. |
531 | 2.18k | void LoopUnrollerUtilsImpl::Unroll(Loop* loop, size_t factor) { |
532 | | // If we unroll a loop partially it will not be safe to unroll it further. |
533 | | // This is due to the current method of calculating the number of loop |
534 | | // iterations. |
535 | 2.18k | MarkLoopControlAsDontUnroll(loop); |
536 | | |
537 | 2.18k | std::vector<Instruction*> inductions; |
538 | 2.18k | loop->GetInductionVariables(inductions); |
539 | 2.18k | state_ = LoopUnrollState{loop_induction_variable_, loop->GetLatchBlock(), |
540 | 2.18k | loop_condition_block_, std::move(inductions)}; |
541 | 95.2k | for (size_t i = 0; i < factor - 1; ++i) { |
542 | 93.0k | CopyBody(loop, true); |
543 | 93.0k | } |
544 | 2.18k | } |
545 | | |
546 | 2.18k | void LoopUnrollerUtilsImpl::RemoveDeadInstructions() { |
547 | | // Remove the dead instructions. |
548 | 248k | for (Instruction* inst : invalidated_instructions_) { |
549 | 248k | context_->KillInst(inst); |
550 | 248k | } |
551 | 2.18k | } |
552 | | |
553 | 2.18k | void LoopUnrollerUtilsImpl::ReplaceInductionUseWithFinalValue(Loop* loop) { |
554 | 2.18k | context_->InvalidateAnalysesExceptFor( |
555 | 2.18k | IRContext::Analysis::kAnalysisLoopAnalysis | |
556 | 2.18k | IRContext::Analysis::kAnalysisDefUse | |
557 | 2.18k | IRContext::Analysis::kAnalysisInstrToBlockMapping); |
558 | | |
559 | 2.18k | std::vector<Instruction*> inductions; |
560 | 2.18k | loop->GetInductionVariables(inductions); |
561 | | |
562 | 5.81k | for (size_t index = 0; index < inductions.size(); ++index) { |
563 | 3.63k | uint32_t trip_step_id = GetPhiDefID(state_.previous_phis_[index], |
564 | 3.63k | state_.previous_latch_block_->id()); |
565 | 3.63k | context_->ReplaceAllUsesWith(inductions[index]->result_id(), trip_step_id); |
566 | 3.63k | invalidated_instructions_.push_back(inductions[index]); |
567 | 3.63k | } |
568 | 2.18k | } |
569 | | |
570 | | // Fully unroll the loop by partially unrolling it by the number of loop |
571 | | // iterations minus one for the body already accounted for. |
572 | 2.18k | void LoopUnrollerUtilsImpl::FullyUnroll(Loop* loop) { |
573 | | // We unroll the loop by number of iterations in the loop. |
574 | 2.18k | Unroll(loop, number_of_loop_iterations_); |
575 | | |
576 | | // The first condition block is preserved until now so it can be copied. |
577 | 2.18k | FoldConditionBlock(loop_condition_block_, 1); |
578 | | |
579 | | // Delete the OpLoopMerge and remove the backedge to the header. |
580 | 2.18k | CloseUnrolledLoop(loop); |
581 | | |
582 | | // Mark the loop for later deletion. This allows us to preserve the loop |
583 | | // iterators but still disregard dead loops. |
584 | 2.18k | loop->MarkLoopForRemoval(); |
585 | | |
586 | | // If the loop has a parent add the new blocks to the parent. |
587 | 2.18k | if (loop->GetParent()) { |
588 | 472 | AddBlocksToLoop(loop->GetParent()); |
589 | 472 | } |
590 | | |
591 | | // Add the blocks to the function. |
592 | 2.18k | AddBlocksToFunction(loop->GetMergeBlock()); |
593 | | |
594 | 2.18k | ReplaceInductionUseWithFinalValue(loop); |
595 | | |
596 | 2.18k | RemoveDeadInstructions(); |
597 | | // Invalidate all analyses. |
598 | 2.18k | context_->InvalidateAnalysesExceptFor( |
599 | 2.18k | IRContext::Analysis::kAnalysisLoopAnalysis | |
600 | 2.18k | IRContext::Analysis::kAnalysisDefUse); |
601 | 2.18k | } |
602 | | |
603 | 947k | void LoopUnrollerUtilsImpl::KillDebugDeclares(BasicBlock* bb) { |
604 | | // We cannot kill an instruction inside BasicBlock::ForEachInst() |
605 | | // because it will generate dangling pointers. We use |to_be_killed| |
606 | | // to kill them after the loop. |
607 | 947k | std::vector<Instruction*> to_be_killed; |
608 | | |
609 | 5.95M | bb->ForEachInst([&to_be_killed, this](Instruction* inst) { |
610 | 5.95M | if (context_->get_debug_info_mgr()->IsDebugDeclare(inst)) { |
611 | 0 | to_be_killed.push_back(inst); |
612 | 0 | } |
613 | 5.95M | }); |
614 | 947k | for (auto* inst : to_be_killed) context_->KillInst(inst); |
615 | 947k | } |
616 | | |
617 | | // Copy a given basic block, give it a new result_id, and store the new block |
618 | | // and the id mapping in the state. |preserve_instructions| is used to determine |
619 | | // whether or not this function should edit instructions other than the |
620 | | // |result_id|. |
621 | | void LoopUnrollerUtilsImpl::CopyBasicBlock(Loop* loop, const BasicBlock* itr, |
622 | 947k | bool preserve_instructions) { |
623 | | // Clone the block exactly, including the IDs. |
624 | 947k | BasicBlock* basic_block = itr->Clone(context_); |
625 | 947k | basic_block->SetParent(itr->GetParent()); |
626 | | |
627 | | // We do not want to duplicate DebugDeclare. |
628 | 947k | KillDebugDeclares(basic_block); |
629 | | |
630 | | // Assign each result a new unique ID and keep a mapping of the old ids to |
631 | | // the new ones. |
632 | 947k | AssignNewResultIds(basic_block); |
633 | | |
634 | | // If this is the continue block we are copying. |
635 | 947k | if (itr == loop->GetContinueBlock()) { |
636 | | // Make the OpLoopMerge point to this block for the continue. |
637 | 93.0k | if (!preserve_instructions) { |
638 | 93.0k | Instruction* merge_inst = loop->GetHeaderBlock()->GetLoopMergeInst(); |
639 | 93.0k | merge_inst->SetInOperand(1, {basic_block->id()}); |
640 | 93.0k | context_->UpdateDefUse(merge_inst); |
641 | 93.0k | } |
642 | | |
643 | 93.0k | state_.new_continue_block = basic_block; |
644 | 93.0k | } |
645 | | |
646 | | // If this is the header block we are copying. |
647 | 947k | if (itr == loop->GetHeaderBlock()) { |
648 | 93.0k | state_.new_header_block = basic_block; |
649 | | |
650 | 93.0k | if (!preserve_instructions) { |
651 | | // Remove the loop merge instruction if it exists. |
652 | 93.0k | Instruction* merge_inst = basic_block->GetLoopMergeInst(); |
653 | 93.0k | if (merge_inst) invalidated_instructions_.push_back(merge_inst); |
654 | 93.0k | } |
655 | 93.0k | } |
656 | | |
657 | | // If this is the latch block being copied, record it in the state. |
658 | 947k | if (itr == loop->GetLatchBlock()) state_.new_latch_block = basic_block; |
659 | | |
660 | | // If this is the condition block we are copying. |
661 | 947k | if (itr == loop_condition_block_) { |
662 | 93.0k | state_.new_condition_block = basic_block; |
663 | 93.0k | } |
664 | | |
665 | | // Add this block to the list of blocks to add to the function at the end of |
666 | | // the unrolling process. |
667 | 947k | blocks_to_add_.push_back(std::unique_ptr<BasicBlock>(basic_block)); |
668 | | |
669 | | // Keep tracking the old block via a map. |
670 | 947k | state_.new_blocks[itr->id()] = basic_block; |
671 | 947k | } |
672 | | |
673 | 93.0k | void LoopUnrollerUtilsImpl::CopyBody(Loop* loop, bool eliminate_conditions) { |
674 | | // Copy each basic block in the loop, give them new ids, and save state |
675 | | // information. |
676 | 947k | for (const BasicBlock* itr : loop_blocks_inorder_) { |
677 | 947k | CopyBasicBlock(loop, itr, false); |
678 | 947k | } |
679 | | |
680 | | // Set the previous latch block to point to the new header. |
681 | 93.0k | Instruction* latch_branch = state_.previous_latch_block_->terminator(); |
682 | 93.0k | latch_branch->SetInOperand(0, {state_.new_header_block->id()}); |
683 | 93.0k | context_->UpdateDefUse(latch_branch); |
684 | | |
685 | | // As the algorithm copies the original loop blocks exactly, the tail of the |
686 | | // latch block on iterations after the first one will be a branch to the new |
687 | | // header and not the actual loop header. The last continue block in the loop |
688 | | // should always be a backedge to the global header. |
689 | 93.0k | Instruction* new_latch_branch = state_.new_latch_block->terminator(); |
690 | 93.0k | new_latch_branch->SetInOperand(0, {loop->GetHeaderBlock()->id()}); |
691 | 93.0k | context_->AnalyzeUses(new_latch_branch); |
692 | | |
693 | 93.0k | std::vector<Instruction*> inductions; |
694 | 93.0k | loop->GetInductionVariables(inductions); |
695 | 242k | for (size_t index = 0; index < inductions.size(); ++index) { |
696 | 149k | Instruction* primary_copy = inductions[index]; |
697 | | |
698 | 149k | assert(primary_copy->result_id() != 0); |
699 | 149k | Instruction* induction_clone = |
700 | 149k | state_.ids_to_new_inst[state_.new_inst[primary_copy->result_id()]]; |
701 | | |
702 | 149k | state_.new_phis_.push_back(induction_clone); |
703 | 149k | assert(induction_clone->result_id() != 0); |
704 | | |
705 | 149k | if (!state_.previous_phis_.empty()) { |
706 | 149k | state_.new_inst[primary_copy->result_id()] = GetPhiDefID( |
707 | 149k | state_.previous_phis_[index], state_.previous_latch_block_->id()); |
708 | 149k | } else { |
709 | | // Do not replace the first phi block ids. |
710 | 0 | state_.new_inst[primary_copy->result_id()] = primary_copy->result_id(); |
711 | 0 | } |
712 | 149k | } |
713 | | |
714 | 93.0k | if (eliminate_conditions && |
715 | 93.0k | state_.new_condition_block != loop_condition_block_) { |
716 | 93.0k | FoldConditionBlock(state_.new_condition_block, 1); |
717 | 93.0k | } |
718 | | |
719 | | // Only reference to the header block is the backedge in the latch block, |
720 | | // don't change this. |
721 | 93.0k | state_.new_inst[loop->GetHeaderBlock()->id()] = loop->GetHeaderBlock()->id(); |
722 | | |
723 | 947k | for (auto& pair : state_.new_blocks) { |
724 | 947k | RemapOperands(pair.second); |
725 | 947k | } |
726 | | |
727 | 93.0k | for (Instruction* dead_phi : state_.new_phis_) |
728 | 149k | invalidated_instructions_.push_back(dead_phi); |
729 | | |
730 | | // Swap the state so the new is now the previous. |
731 | 93.0k | state_.NextIterationState(); |
732 | 93.0k | } |
733 | | |
734 | | uint32_t LoopUnrollerUtilsImpl::GetPhiDefID(const Instruction* phi, |
735 | 156k | uint32_t label) const { |
736 | 309k | for (uint32_t operand = 3; operand < phi->NumOperands(); operand += 2) { |
737 | 309k | if (phi->GetSingleWordOperand(operand) == label) { |
738 | 156k | return phi->GetSingleWordOperand(operand - 1); |
739 | 156k | } |
740 | 309k | } |
741 | 0 | assert(false && "Could not find a phi index matching the provided label"); |
742 | 0 | return 0; |
743 | 0 | } |
744 | | |
745 | | void LoopUnrollerUtilsImpl::FoldConditionBlock(BasicBlock* condition_block, |
746 | 95.2k | uint32_t operand_label) { |
747 | | // Remove the old conditional branch to the merge and continue blocks. |
748 | 95.2k | Instruction& old_branch = *condition_block->tail(); |
749 | 95.2k | uint32_t new_target = old_branch.GetSingleWordOperand(operand_label); |
750 | | |
751 | 95.2k | DebugScope scope = old_branch.GetDebugScope(); |
752 | 95.2k | const std::vector<Instruction> lines = old_branch.dbg_line_insts(); |
753 | | |
754 | 95.2k | context_->KillInst(&old_branch); |
755 | | // Add the new unconditional branch to the merge block. |
756 | 95.2k | InstructionBuilder builder( |
757 | 95.2k | context_, condition_block, |
758 | 95.2k | IRContext::Analysis::kAnalysisDefUse | |
759 | 95.2k | IRContext::Analysis::kAnalysisInstrToBlockMapping); |
760 | 95.2k | Instruction* new_branch = builder.AddBranch(new_target); |
761 | | |
762 | 95.2k | if (!lines.empty()) new_branch->AddDebugLine(&lines.back()); |
763 | 95.2k | new_branch->SetDebugScope(scope); |
764 | 95.2k | } |
765 | | |
766 | 2.18k | void LoopUnrollerUtilsImpl::CloseUnrolledLoop(Loop* loop) { |
767 | | // Remove the OpLoopMerge instruction from the function. |
768 | 2.18k | Instruction* merge_inst = loop->GetHeaderBlock()->GetLoopMergeInst(); |
769 | 2.18k | invalidated_instructions_.push_back(merge_inst); |
770 | | |
771 | | // Remove the final backedge to the header and make it point instead to the |
772 | | // merge block. |
773 | 2.18k | Instruction* latch_instruction = state_.previous_latch_block_->terminator(); |
774 | 2.18k | latch_instruction->SetInOperand(0, {loop->GetMergeBlock()->id()}); |
775 | 2.18k | context_->UpdateDefUse(latch_instruction); |
776 | | |
777 | | // Remove all induction variables as the phis will now be invalid. Replace all |
778 | | // uses with the constant initializer value (all uses of phis will be in |
779 | | // the first iteration with the subsequent phis already having been removed). |
780 | 2.18k | std::vector<Instruction*> inductions; |
781 | 2.18k | loop->GetInductionVariables(inductions); |
782 | | |
783 | | // We can use the state instruction mechanism to replace all internal loop |
784 | | // values within the first loop trip (as the subsequent ones will be updated |
785 | | // by the copy function) with the value coming in from the preheader and then |
786 | | // use context ReplaceAllUsesWith for the uses outside the loop with the final |
787 | | // trip phi value. |
788 | 2.18k | state_.new_inst.clear(); |
789 | 3.63k | for (Instruction* induction : inductions) { |
790 | 3.63k | uint32_t initalizer_id = |
791 | 3.63k | GetPhiDefID(induction, loop->GetPreHeaderBlock()->id()); |
792 | | |
793 | 3.63k | state_.new_inst[induction->result_id()] = initalizer_id; |
794 | 3.63k | } |
795 | | |
796 | 25.2k | for (BasicBlock* block : loop_blocks_inorder_) { |
797 | 25.2k | RemapOperands(block); |
798 | 25.2k | } |
799 | 947k | for (auto& block_itr : blocks_to_add_) { |
800 | 947k | RemapOperands(block_itr.get()); |
801 | 947k | } |
802 | | |
803 | | // Rewrite the last phis, since they may still reference the original phi. |
804 | 3.63k | for (Instruction* last_phi : state_.previous_phis_) { |
805 | 3.63k | RemapOperands(last_phi); |
806 | 3.63k | } |
807 | 2.18k | } |
808 | | |
809 | | // Uses the first loop to create a copy of the loop with new IDs. |
810 | 0 | void LoopUnrollerUtilsImpl::DuplicateLoop(Loop* old_loop, Loop* new_loop) { |
811 | 0 | std::vector<BasicBlock*> new_block_order; |
812 | | |
813 | | // Copy every block in the old loop. |
814 | 0 | for (const BasicBlock* itr : loop_blocks_inorder_) { |
815 | 0 | CopyBasicBlock(old_loop, itr, true); |
816 | 0 | new_block_order.push_back(blocks_to_add_.back().get()); |
817 | 0 | } |
818 | | |
819 | | // Clone the merge block, give it a new id and record it in the state. |
820 | 0 | BasicBlock* new_merge = old_loop->GetMergeBlock()->Clone(context_); |
821 | 0 | new_merge->SetParent(old_loop->GetMergeBlock()->GetParent()); |
822 | 0 | AssignNewResultIds(new_merge); |
823 | 0 | state_.new_blocks[old_loop->GetMergeBlock()->id()] = new_merge; |
824 | | |
825 | | // Remap the operands of every instruction in the loop to point to the new |
826 | | // copies. |
827 | 0 | for (auto& pair : state_.new_blocks) { |
828 | 0 | RemapOperands(pair.second); |
829 | 0 | } |
830 | |
|
831 | 0 | loop_blocks_inorder_ = std::move(new_block_order); |
832 | |
|
833 | 0 | AddBlocksToLoop(new_loop); |
834 | |
|
835 | 0 | new_loop->SetHeaderBlock(state_.new_header_block); |
836 | 0 | new_loop->SetContinueBlock(state_.new_continue_block); |
837 | 0 | new_loop->SetLatchBlock(state_.new_latch_block); |
838 | 0 | new_loop->SetMergeBlock(new_merge); |
839 | 0 | } |
840 | | |
841 | | // Whenever the utility copies a block it stores it in a temporary buffer, this |
842 | | // function adds the buffer into the Function. The blocks will be inserted |
843 | | // after the block |insert_point|. |
844 | | void LoopUnrollerUtilsImpl::AddBlocksToFunction( |
845 | 2.18k | const BasicBlock* insert_point) { |
846 | 2.18k | for (auto basic_block_iterator = function_.begin(); |
847 | 32.4k | basic_block_iterator != function_.end(); ++basic_block_iterator) { |
848 | 32.4k | if (basic_block_iterator->id() == insert_point->id()) { |
849 | 2.18k | basic_block_iterator.InsertBefore(&blocks_to_add_); |
850 | 2.18k | return; |
851 | 2.18k | } |
852 | 32.4k | } |
853 | | |
854 | 0 | assert( |
855 | 0 | false && |
856 | 0 | "Could not add basic blocks to function as insert point was not found."); |
857 | 0 | } |
858 | | |
859 | | // Assign all result_ids in |basic_block| instructions to new IDs and preserve |
860 | | // the mapping of new ids to old ones. |
861 | 947k | void LoopUnrollerUtilsImpl::AssignNewResultIds(BasicBlock* basic_block) { |
862 | 947k | analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr(); |
863 | | |
864 | | // Label instructions aren't covered by normal traversal of the |
865 | | // instructions. |
866 | | // TODO(1841): Handle id overflow. |
867 | 947k | uint32_t new_label_id = context_->TakeNextId(); |
868 | | |
869 | | // Assign a new id to the label. |
870 | 947k | state_.new_inst[basic_block->GetLabelInst()->result_id()] = new_label_id; |
871 | 947k | basic_block->GetLabelInst()->SetResultId(new_label_id); |
872 | 947k | def_use_mgr->AnalyzeInstDefUse(basic_block->GetLabelInst()); |
873 | | |
874 | 5.00M | for (Instruction& inst : *basic_block) { |
875 | | // Do def/use analysis on new lines |
876 | 5.00M | for (auto& line : inst.dbg_line_insts()) |
877 | 592 | def_use_mgr->AnalyzeInstDefUse(&line); |
878 | | |
879 | 5.00M | uint32_t old_id = inst.result_id(); |
880 | | |
881 | | // Ignore stores etc. |
882 | 5.00M | if (old_id == 0) { |
883 | 1.32M | continue; |
884 | 1.32M | } |
885 | | |
886 | | // Give the instruction a new id. |
887 | | // TODO(1841): Handle id overflow. |
888 | 3.68M | inst.SetResultId(context_->TakeNextId()); |
889 | 3.68M | def_use_mgr->AnalyzeInstDef(&inst); |
890 | | |
891 | | // Save the mapping of old_id -> new_id. |
892 | 3.68M | state_.new_inst[old_id] = inst.result_id(); |
893 | | // Check if this instruction is the induction variable. |
894 | 3.68M | if (loop_induction_variable_->result_id() == old_id) { |
895 | | // Save a pointer to the new copy of it. |
896 | 93.0k | state_.new_phi = &inst; |
897 | 93.0k | } |
898 | 3.68M | state_.ids_to_new_inst[inst.result_id()] = &inst; |
899 | 3.68M | } |
900 | 947k | } |
901 | | |
902 | 10.1M | void LoopUnrollerUtilsImpl::RemapOperands(Instruction* inst) { |
903 | 17.7M | auto remap_operands_to_new_ids = [this](uint32_t* id) { |
904 | 17.7M | auto itr = state_.new_inst.find(*id); |
905 | | |
906 | 17.7M | if (itr != state_.new_inst.end()) { |
907 | 5.68M | *id = itr->second; |
908 | 5.68M | } |
909 | 17.7M | }; |
910 | | |
911 | 10.1M | inst->ForEachInId(remap_operands_to_new_ids); |
912 | 10.1M | context_->AnalyzeUses(inst); |
913 | 10.1M | } |
914 | | |
915 | 1.91M | void LoopUnrollerUtilsImpl::RemapOperands(BasicBlock* basic_block) { |
916 | 10.1M | for (Instruction& inst : *basic_block) { |
917 | 10.1M | RemapOperands(&inst); |
918 | 10.1M | } |
919 | 1.91M | } |
920 | | |
921 | | // Generate the ordered list of basic blocks in the |loop| and cache it for |
922 | | // later use. |
923 | 2.18k | void LoopUnrollerUtilsImpl::ComputeLoopOrderedBlocks(Loop* loop) { |
924 | 2.18k | loop_blocks_inorder_.clear(); |
925 | 2.18k | loop->ComputeLoopStructuredOrder(&loop_blocks_inorder_); |
926 | 2.18k | } |
927 | | |
928 | | // Adds the blocks_to_add_ to both the loop and to the parent. |
929 | 472 | void LoopUnrollerUtilsImpl::AddBlocksToLoop(Loop* loop) const { |
930 | | // Add the blocks to this loop. |
931 | 29.3k | for (auto& block_itr : blocks_to_add_) { |
932 | 29.3k | loop->AddBasicBlock(block_itr.get()); |
933 | 29.3k | } |
934 | | |
935 | | // Add the blocks to the parent as well. |
936 | 472 | if (loop->GetParent()) AddBlocksToLoop(loop->GetParent()); |
937 | 472 | } |
938 | | |
939 | 0 | void LoopUnrollerUtilsImpl::LinkLastPhisToStart(Loop* loop) const { |
940 | 0 | std::vector<Instruction*> inductions; |
941 | 0 | loop->GetInductionVariables(inductions); |
942 | |
|
943 | 0 | for (size_t i = 0; i < inductions.size(); ++i) { |
944 | 0 | Instruction* last_phi_in_block = state_.previous_phis_[i]; |
945 | |
|
946 | 0 | uint32_t phi_index = |
947 | 0 | GetPhiIndexFromLabel(state_.previous_latch_block_, last_phi_in_block); |
948 | 0 | uint32_t phi_variable = |
949 | 0 | last_phi_in_block->GetSingleWordInOperand(phi_index - 1); |
950 | 0 | uint32_t phi_label = last_phi_in_block->GetSingleWordInOperand(phi_index); |
951 | |
|
952 | 0 | Instruction* phi = inductions[i]; |
953 | 0 | phi->SetInOperand(phi_index - 1, {phi_variable}); |
954 | 0 | phi->SetInOperand(phi_index, {phi_label}); |
955 | 0 | } |
956 | 0 | } |
957 | | |
958 | | // Duplicate the |loop| body |factor| number of times while keeping the loop |
959 | | // backedge intact. |
960 | 0 | void LoopUnrollerUtilsImpl::PartiallyUnroll(Loop* loop, size_t factor) { |
961 | 0 | Unroll(loop, factor); |
962 | 0 | LinkLastPhisToStart(loop); |
963 | 0 | AddBlocksToLoop(loop); |
964 | 0 | AddBlocksToFunction(loop->GetMergeBlock()); |
965 | 0 | RemoveDeadInstructions(); |
966 | 0 | } |
967 | | |
968 | | /* |
969 | | * End LoopUtilsImpl. |
970 | | */ |
971 | | |
972 | | } // namespace |
973 | | |
974 | | /* |
975 | | * |
976 | | * Begin Utils. |
977 | | * |
978 | | * */ |
979 | | |
980 | 6.01k | bool LoopUtils::CanPerformUnroll() { |
981 | | // The loop is expected to be in structured order. |
982 | 6.01k | if (!loop_->GetHeaderBlock()->GetMergeInst()) { |
983 | 0 | return false; |
984 | 0 | } |
985 | | |
986 | | // Find check the loop has a condition we can find and evaluate. |
987 | 6.01k | const BasicBlock* condition = loop_->FindConditionBlock(); |
988 | 6.01k | if (!condition) return false; |
989 | | |
990 | | // Check that we can find and process the induction variable. |
991 | 5.24k | const Instruction* induction = loop_->FindConditionVariable(condition); |
992 | 5.24k | if (!induction || induction->opcode() != spv::Op::OpPhi) return false; |
993 | | |
994 | | // Check that we can find the number of loop iterations. |
995 | 4.53k | if (!loop_->FindNumberOfIterations(induction, &*condition->ctail(), nullptr)) |
996 | 0 | return false; |
997 | | |
998 | 4.53k | #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION |
999 | | // ClusterFuzz/OSS-Fuzz is likely to yield examples with very high loop |
1000 | | // iteration counts. This can cause timeouts and memouts during fuzzing that |
1001 | | // are not classed as bugs. To avoid this noise, loop unrolling is not applied |
1002 | | // to loops with large iteration counts when fuzzing. |
1003 | 4.53k | constexpr size_t kFuzzerIterationLimit = 100; |
1004 | 4.53k | size_t num_iterations; |
1005 | 4.53k | loop_->FindNumberOfIterations(induction, &*condition->ctail(), |
1006 | 4.53k | &num_iterations); |
1007 | 4.53k | if (num_iterations > kFuzzerIterationLimit) { |
1008 | 152 | return false; |
1009 | 152 | } |
1010 | 4.38k | #endif |
1011 | | |
1012 | | // Make sure the latch block is a unconditional branch to the header |
1013 | | // block. |
1014 | 4.38k | const Instruction& branch = *loop_->GetLatchBlock()->ctail(); |
1015 | 4.38k | bool branching_assumption = |
1016 | 4.38k | branch.opcode() == spv::Op::OpBranch && |
1017 | 4.38k | branch.GetSingleWordInOperand(0) == loop_->GetHeaderBlock()->id(); |
1018 | 4.38k | if (!branching_assumption) { |
1019 | 0 | return false; |
1020 | 0 | } |
1021 | | |
1022 | 4.38k | std::vector<Instruction*> inductions; |
1023 | 4.38k | loop_->GetInductionVariables(inductions); |
1024 | | |
1025 | | // Ban breaks within the loop. |
1026 | 4.38k | const std::vector<uint32_t>& merge_block_preds = |
1027 | 4.38k | context_->cfg()->preds(loop_->GetMergeBlock()->id()); |
1028 | 4.38k | if (merge_block_preds.size() != 1) { |
1029 | 0 | return false; |
1030 | 0 | } |
1031 | | |
1032 | | // Ban continues within the loop. |
1033 | 4.38k | const std::vector<uint32_t>& continue_block_preds = |
1034 | 4.38k | context_->cfg()->preds(loop_->GetContinueBlock()->id()); |
1035 | 4.38k | if (continue_block_preds.size() != 1) { |
1036 | 4 | return false; |
1037 | 4 | } |
1038 | | |
1039 | | // Ban returns in the loop. |
1040 | | // Iterate over all the blocks within the loop and check that none of them |
1041 | | // exit the loop. |
1042 | 50.5k | for (uint32_t label_id : loop_->GetBlocks()) { |
1043 | 50.5k | const BasicBlock* block = context_->cfg()->block(label_id); |
1044 | 50.5k | if (block->ctail()->opcode() == spv::Op::OpKill || |
1045 | 50.5k | block->ctail()->opcode() == spv::Op::OpReturn || |
1046 | 50.5k | block->ctail()->opcode() == spv::Op::OpReturnValue || |
1047 | 50.5k | block->ctail()->opcode() == spv::Op::OpTerminateInvocation) { |
1048 | 0 | return false; |
1049 | 0 | } |
1050 | 50.5k | } |
1051 | | // Can only unroll inner loops. |
1052 | 4.38k | if (!loop_->AreAllChildrenMarkedForRemoval()) { |
1053 | 17 | return false; |
1054 | 17 | } |
1055 | | |
1056 | 4.36k | return true; |
1057 | 4.38k | } |
1058 | | |
1059 | 0 | bool LoopUtils::PartiallyUnroll(size_t factor) { |
1060 | 0 | if (factor == 1 || !CanPerformUnroll()) return false; |
1061 | | |
1062 | | // Create the unroller utility. |
1063 | 0 | LoopUnrollerUtilsImpl unroller{context_, |
1064 | 0 | loop_->GetHeaderBlock()->GetParent()}; |
1065 | 0 | unroller.Init(loop_); |
1066 | | |
1067 | | // If the unrolling factor is larger than or the same size as the loop just |
1068 | | // fully unroll the loop. |
1069 | 0 | if (factor >= unroller.GetLoopIterationCount()) { |
1070 | 0 | unroller.FullyUnroll(loop_); |
1071 | 0 | return true; |
1072 | 0 | } |
1073 | | |
1074 | | // If the loop unrolling factor is an residual number of iterations we need to |
1075 | | // let run the loop for the residual part then let it branch into the unrolled |
1076 | | // remaining part. We add one when calucating the remainder to take into |
1077 | | // account the one iteration already in the loop. |
1078 | 0 | if (unroller.GetLoopIterationCount() % factor != 0) { |
1079 | 0 | unroller.PartiallyUnrollResidualFactor(loop_, factor); |
1080 | 0 | } else { |
1081 | 0 | unroller.PartiallyUnroll(loop_, factor); |
1082 | 0 | } |
1083 | |
|
1084 | 0 | return true; |
1085 | 0 | } |
1086 | | |
1087 | 2.18k | bool LoopUtils::FullyUnroll() { |
1088 | 2.18k | if (!CanPerformUnroll()) return false; |
1089 | | |
1090 | 2.18k | std::vector<Instruction*> inductions; |
1091 | 2.18k | loop_->GetInductionVariables(inductions); |
1092 | | |
1093 | 2.18k | LoopUnrollerUtilsImpl unroller{context_, |
1094 | 2.18k | loop_->GetHeaderBlock()->GetParent()}; |
1095 | | |
1096 | 2.18k | unroller.Init(loop_); |
1097 | 2.18k | unroller.FullyUnroll(loop_); |
1098 | | |
1099 | 2.18k | return true; |
1100 | 2.18k | } |
1101 | | |
1102 | 0 | void LoopUtils::Finalize() { |
1103 | | // Clean up the loop descriptor to preserve the analysis. |
1104 | |
|
1105 | 0 | LoopDescriptor* LD = context_->GetLoopDescriptor(&function_); |
1106 | 0 | LD->PostModificationCleanup(); |
1107 | 0 | } |
1108 | | |
1109 | | /* |
1110 | | * |
1111 | | * Begin Pass. |
1112 | | * |
1113 | | */ |
1114 | | |
1115 | 10.7k | Pass::Status LoopUnroller::Process() { |
1116 | 10.7k | bool changed = false; |
1117 | 10.7k | for (Function& f : *context()->module()) { |
1118 | 9.97k | if (f.IsDeclaration()) { |
1119 | 0 | continue; |
1120 | 0 | } |
1121 | | |
1122 | 9.97k | LoopDescriptor* LD = context()->GetLoopDescriptor(&f); |
1123 | 20.4k | for (Loop& loop : *LD) { |
1124 | 20.4k | LoopUtils loop_utils{context(), &loop}; |
1125 | 20.4k | if (!loop.HasUnrollLoopControl() || !loop_utils.CanPerformUnroll()) { |
1126 | 18.3k | continue; |
1127 | 18.3k | } |
1128 | | |
1129 | 2.18k | if (fully_unroll_) { |
1130 | 2.18k | loop_utils.FullyUnroll(); |
1131 | 2.18k | } else { |
1132 | 0 | loop_utils.PartiallyUnroll(unroll_factor_); |
1133 | 0 | } |
1134 | 2.18k | changed = true; |
1135 | 2.18k | } |
1136 | 9.97k | LD->PostModificationCleanup(); |
1137 | 9.97k | } |
1138 | | |
1139 | 10.7k | return changed ? Status::SuccessWithChange : Status::SuccessWithoutChange; |
1140 | 10.7k | } |
1141 | | |
1142 | | } // namespace opt |
1143 | | } // namespace spvtools |