Coverage Report

Created: 2025-02-05 06:34

/src/spirv-tools/source/opt/loop_unroller.cpp
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) 2018 Google LLC.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
#include "source/opt/loop_unroller.h"
16
17
#include <limits>
18
#include <memory>
19
#include <unordered_map>
20
#include <utility>
21
#include <vector>
22
23
#include "source/opt/ir_builder.h"
24
#include "source/opt/loop_utils.h"
25
26
// Implements loop util unrolling functionality for fully and partially
27
// unrolling loops. Given a factor it will duplicate the loop that many times,
28
// appending each one to the end of the old loop and removing backedges, to
29
// create a new unrolled loop.
30
//
31
// 1 - User calls LoopUtils::FullyUnroll or LoopUtils::PartiallyUnroll with a
32
// loop they wish to unroll. LoopUtils::CanPerformUnroll is used to
33
// validate that a given loop can be unrolled. That method (along with the
34
// constructor of loop) checks that the IR is in the expected canonicalised
35
// format.
36
//
37
// 2 - The LoopUtils methods create a LoopUnrollerUtilsImpl object to actually
38
// perform the unrolling. This implements helper methods to copy the loop basic
39
// blocks and remap the ids of instructions used inside them.
40
//
41
// 3 - The core of LoopUnrollerUtilsImpl is the Unroll method, this method
42
// actually performs the loop duplication. It does this by creating a
43
// LoopUnrollState object and then copying the loop as given by the factor
44
// parameter. The LoopUnrollState object retains the state of the unroller
45
// between the loop body copies as each iteration needs information on the last
46
// to adjust the phi induction variable, adjust the OpLoopMerge instruction in
47
// the main loop header, and change the previous continue block to point to the
48
// new header and the new continue block to the main loop header.
49
//
50
// 4 - If the loop is to be fully unrolled then it is simply closed after step
51
// 3, with the OpLoopMerge being deleted, the backedge removed, and the
52
// condition blocks folded.
53
//
54
// 5 - If it is being partially unrolled: if the unrolling factor leaves the
55
// loop with an even number of bodies with respect to the number of loop
56
// iterations then step 3 is all that is needed. If it is uneven then we need to
57
// duplicate the loop completely and unroll the duplicated loop to cover the
58
// residual part and adjust the first loop to cover only the "even" part. For
59
// instance if you request an unroll factor of 3 on a loop with 10 iterations
60
// then copying the body three times would leave you with three bodies in the
61
// loop
62
// where the loop still iterates over each 4 times. So we make two loops one
63
// iterating once then a second loop of three iterating 3 times.
64
65
namespace spvtools {
66
namespace opt {
67
namespace {
68
69
// Loop control constant value for DontUnroll flag.
70
constexpr uint32_t kLoopControlDontUnrollIndex = 2;
71
72
// Operand index of the loop control parameter of the OpLoopMerge.
73
constexpr uint32_t kLoopControlIndex = 2;
74
75
// This utility class encapsulates some of the state we need to maintain between
76
// loop unrolls. Specifically it maintains key blocks and the induction variable
77
// in the current loop duplication step and the blocks from the previous one.
78
// This is because each step of the unroll needs to use data from both the
79
// preceding step and the original loop.
80
struct LoopUnrollState {
81
  LoopUnrollState()
82
2.18k
      : previous_phi_(nullptr),
83
2.18k
        previous_latch_block_(nullptr),
84
2.18k
        previous_condition_block_(nullptr),
85
2.18k
        new_phi(nullptr),
86
2.18k
        new_continue_block(nullptr),
87
2.18k
        new_condition_block(nullptr),
88
2.18k
        new_header_block(nullptr) {}
89
90
  // Initialize from the loop descriptor class.
91
  LoopUnrollState(Instruction* induction, BasicBlock* latch_block,
92
                  BasicBlock* condition, std::vector<Instruction*>&& phis)
93
2.18k
      : previous_phi_(induction),
94
2.18k
        previous_latch_block_(latch_block),
95
2.18k
        previous_condition_block_(condition),
96
2.18k
        new_phi(nullptr),
97
2.18k
        new_continue_block(nullptr),
98
2.18k
        new_condition_block(nullptr),
99
2.18k
        new_header_block(nullptr) {
100
2.18k
    previous_phis_ = std::move(phis);
101
2.18k
  }
102
103
  // Swap the state so that the new nodes are now the previous nodes.
104
93.0k
  void NextIterationState() {
105
93.0k
    previous_phi_ = new_phi;
106
93.0k
    previous_latch_block_ = new_latch_block;
107
93.0k
    previous_condition_block_ = new_condition_block;
108
93.0k
    previous_phis_ = std::move(new_phis_);
109
110
    // Clear new nodes.
111
93.0k
    new_phi = nullptr;
112
93.0k
    new_continue_block = nullptr;
113
93.0k
    new_condition_block = nullptr;
114
93.0k
    new_header_block = nullptr;
115
93.0k
    new_latch_block = nullptr;
116
117
    // Clear new block/instruction maps.
118
93.0k
    new_blocks.clear();
119
93.0k
    new_inst.clear();
120
93.0k
    ids_to_new_inst.clear();
121
93.0k
  }
122
123
  // The induction variable from the immediately preceding loop body.
124
  Instruction* previous_phi_;
125
126
  // All the phi nodes from the previous loop iteration.
127
  std::vector<Instruction*> previous_phis_;
128
129
  std::vector<Instruction*> new_phis_;
130
131
  // The previous latch block. The backedge will be removed from this and
132
  // added to the new latch block.
133
  BasicBlock* previous_latch_block_;
134
135
  // The previous condition block. This may be folded to flatten the loop.
136
  BasicBlock* previous_condition_block_;
137
138
  // The new induction variable.
139
  Instruction* new_phi;
140
141
  // The new continue block.
142
  BasicBlock* new_continue_block;
143
144
  // The new condition block.
145
  BasicBlock* new_condition_block;
146
147
  // The new header block.
148
  BasicBlock* new_header_block;
149
150
  // The new latch block.
151
  BasicBlock* new_latch_block;
152
153
  // A mapping of new block ids to the original blocks which they were copied
154
  // from.
155
  std::unordered_map<uint32_t, BasicBlock*> new_blocks;
156
157
  // A mapping of the original instruction ids to the instruction ids to their
158
  // copies.
159
  std::unordered_map<uint32_t, uint32_t> new_inst;
160
161
  std::unordered_map<uint32_t, Instruction*> ids_to_new_inst;
162
};
163
164
// This class implements the actual unrolling. It uses a LoopUnrollState to
165
// maintain the state of the unrolling in between steps.
166
class LoopUnrollerUtilsImpl {
167
 public:
168
  using BasicBlockListTy = std::vector<std::unique_ptr<BasicBlock>>;
169
170
  LoopUnrollerUtilsImpl(IRContext* c, Function* function)
171
2.18k
      : context_(c),
172
2.18k
        function_(*function),
173
2.18k
        loop_condition_block_(nullptr),
174
2.18k
        loop_induction_variable_(nullptr),
175
2.18k
        number_of_loop_iterations_(0),
176
2.18k
        loop_step_value_(0),
177
2.18k
        loop_init_value_(0) {}
178
179
  // Unroll the |loop| by given |factor| by copying the whole body |factor|
180
  // times. The resulting basicblock structure will remain a loop.
181
  void PartiallyUnroll(Loop*, size_t factor);
182
183
  // If partially unrolling the |loop| would leave the loop with too many bodies
184
  // for its number of iterations then this method should be used. This method
185
  // will duplicate the |loop| completely, making the duplicated loop the
186
  // successor of the original's merge block. The original loop will have its
187
  // condition changed to loop over the residual part and the duplicate will be
188
  // partially unrolled. The resulting structure will be two loops.
189
  void PartiallyUnrollResidualFactor(Loop* loop, size_t factor);
190
191
  // Fully unroll the |loop| by copying the full body by the total number of
192
  // loop iterations, folding all conditions, and removing the backedge from the
193
  // continue block to the header.
194
  void FullyUnroll(Loop* loop);
195
196
  // Get the ID of the variable in the |phi| paired with |label|.
197
  uint32_t GetPhiDefID(const Instruction* phi, uint32_t label) const;
198
199
  // Close the loop by removing the OpLoopMerge from the |loop| header block and
200
  // making the backedge point to the merge block.
201
  void CloseUnrolledLoop(Loop* loop);
202
203
  // Remove the OpConditionalBranch instruction inside |conditional_block| used
204
  // to branch to either exit or continue the loop and replace it with an
205
  // unconditional OpBranch to block |new_target|.
206
  void FoldConditionBlock(BasicBlock* condtion_block, uint32_t new_target);
207
208
  // Add all blocks_to_add_ to function_ at the |insert_point|.
209
  void AddBlocksToFunction(const BasicBlock* insert_point);
210
211
  // Duplicates the |old_loop|, cloning each body and remapping the ids without
212
  // removing instructions or changing relative structure. Result will be stored
213
  // in |new_loop|.
214
  void DuplicateLoop(Loop* old_loop, Loop* new_loop);
215
216
0
  inline size_t GetLoopIterationCount() const {
217
0
    return number_of_loop_iterations_;
218
0
  }
219
220
  // Extracts the initial state information from the |loop|.
221
  void Init(Loop* loop);
222
223
  // Replace the uses of each induction variable outside the loop with the final
224
  // value of the induction variable before the loop exit. To reflect the proper
225
  // state of a fully unrolled loop.
226
  void ReplaceInductionUseWithFinalValue(Loop* loop);
227
228
  // Remove all the instructions in the invalidated_instructions_ vector.
229
  void RemoveDeadInstructions();
230
231
  // Replace any use of induction variables outwith the loop with the final
232
  // value of the induction variable in the unrolled loop.
233
  void ReplaceOutsideLoopUseWithFinalValue(Loop* loop);
234
235
  // Set the LoopControl operand of the OpLoopMerge instruction to be
236
  // DontUnroll.
237
  void MarkLoopControlAsDontUnroll(Loop* loop) const;
238
239
 private:
240
  // Remap all the in |basic_block| to new IDs and keep the mapping of new ids
241
  // to old
242
  // ids. |loop| is used to identify special loop blocks (header, continue,
243
  // etc).
244
  void AssignNewResultIds(BasicBlock* basic_block);
245
246
  // Using the map built by AssignNewResultIds, replace the uses in |inst|
247
  // by the id that the use maps to.
248
  void RemapOperands(Instruction* inst);
249
250
  // Using the map built by AssignNewResultIds, for each instruction in
251
  // |basic_block| use
252
  // that map to substitute the IDs used by instructions (in the operands) with
253
  // the new ids.
254
  void RemapOperands(BasicBlock* basic_block);
255
256
  // Copy the whole body of the loop, all blocks dominated by the |loop| header
257
  // and not dominated by the |loop| merge. The copied body will be linked to by
258
  // the old |loop| continue block and the new body will link to the |loop|
259
  // header via the new continue block. |eliminate_conditions| is used to decide
260
  // whether or not to fold all the condition blocks other than the last one.
261
  void CopyBody(Loop* loop, bool eliminate_conditions);
262
263
  // Copy a given |block_to_copy| in the |loop| and record the mapping of the
264
  // old/new ids. |preserve_instructions| determines whether or not the method
265
  // will modify (other than result_id) instructions which are copied.
266
  void CopyBasicBlock(Loop* loop, const BasicBlock* block_to_copy,
267
                      bool preserve_instructions);
268
269
  // The actual implementation of the unroll step. Unrolls |loop| by given
270
  // |factor| by copying the body by |factor| times. Also propagates the
271
  // induction variable value throughout the copies.
272
  void Unroll(Loop* loop, size_t factor);
273
274
  // Fills the loop_blocks_inorder_ field with the ordered list of basic blocks
275
  // as computed by the method ComputeLoopOrderedBlocks.
276
  void ComputeLoopOrderedBlocks(Loop* loop);
277
278
  // Adds the blocks_to_add_ to both the |loop| and to the parent of |loop| if
279
  // the parent exists.
280
  void AddBlocksToLoop(Loop* loop) const;
281
282
  // After the partially unroll step the phi instructions in the header block
283
  // will be in an illegal format. This function makes the phis legal by making
284
  // the edge from the latch block come from the new latch block and the value
285
  // to be the actual value of the phi at that point.
286
  void LinkLastPhisToStart(Loop* loop) const;
287
288
  // Kill all debug declaration instructions from |bb|.
289
  void KillDebugDeclares(BasicBlock* bb);
290
291
  // A pointer to the IRContext. Used to add/remove instructions and for usedef
292
  // chains.
293
  IRContext* context_;
294
295
  // A reference the function the loop is within.
296
  Function& function_;
297
298
  // A list of basic blocks to be added to the loop at the end of an unroll
299
  // step.
300
  BasicBlockListTy blocks_to_add_;
301
302
  // List of instructions which are now dead and can be removed.
303
  std::vector<Instruction*> invalidated_instructions_;
304
305
  // Maintains the current state of the transform between calls to unroll.
306
  LoopUnrollState state_;
307
308
  // An ordered list containing the loop basic blocks.
309
  std::vector<BasicBlock*> loop_blocks_inorder_;
310
311
  // The block containing the condition check which contains a conditional
312
  // branch to the merge and continue block.
313
  BasicBlock* loop_condition_block_;
314
315
  // The induction variable of the loop.
316
  Instruction* loop_induction_variable_;
317
318
  // Phis used in the loop need to be remapped to use the actual result values
319
  // and then be remapped at the end.
320
  std::vector<Instruction*> loop_phi_instructions_;
321
322
  // The number of loop iterations that the loop would perform pre-unroll.
323
  size_t number_of_loop_iterations_;
324
325
  // The amount that the loop steps each iteration.
326
  int64_t loop_step_value_;
327
328
  // The value the loop starts stepping from.
329
  int64_t loop_init_value_;
330
};
331
332
/*
333
 * Static helper functions.
334
 */
335
336
// Retrieve the index of the OpPhi instruction |phi| which corresponds to the
337
// incoming |block| id.
338
0
uint32_t GetPhiIndexFromLabel(const BasicBlock* block, const Instruction* phi) {
339
0
  for (uint32_t i = 1; i < phi->NumInOperands(); i += 2) {
340
0
    if (block->id() == phi->GetSingleWordInOperand(i)) {
341
0
      return i;
342
0
    }
343
0
  }
344
0
  assert(false && "Could not find operand in instruction.");
345
0
  return 0;
346
0
}
347
348
2.18k
void LoopUnrollerUtilsImpl::Init(Loop* loop) {
349
2.18k
  loop_condition_block_ = loop->FindConditionBlock();
350
351
  // When we reinit the second loop during PartiallyUnrollResidualFactor we need
352
  // to use the cached value from the duplicate step as the dominator tree
353
  // basded solution, loop->FindConditionBlock, requires all the nodes to be
354
  // connected up with the correct branches. They won't be at this point.
355
2.18k
  if (!loop_condition_block_) {
356
0
    loop_condition_block_ = state_.new_condition_block;
357
0
  }
358
2.18k
  assert(loop_condition_block_);
359
360
2.18k
  loop_induction_variable_ = loop->FindConditionVariable(loop_condition_block_);
361
2.18k
  assert(loop_induction_variable_);
362
363
2.18k
  bool found = loop->FindNumberOfIterations(
364
2.18k
      loop_induction_variable_, &*loop_condition_block_->ctail(),
365
2.18k
      &number_of_loop_iterations_, &loop_step_value_, &loop_init_value_);
366
2.18k
  (void)found;  // To silence unused variable warning on release builds.
367
2.18k
  assert(found);
368
369
  // Blocks are stored in an unordered set of ids in the loop class, we need to
370
  // create the dominator ordered list.
371
2.18k
  ComputeLoopOrderedBlocks(loop);
372
2.18k
}
373
374
// This function is used to partially unroll the loop when the factor provided
375
// would normally lead to an illegal optimization. Instead of just unrolling the
376
// loop it creates two loops and unrolls one and adjusts the condition on the
377
// other. The end result being that the new loop pair iterates over the correct
378
// number of bodies.
379
void LoopUnrollerUtilsImpl::PartiallyUnrollResidualFactor(Loop* loop,
380
0
                                                          size_t factor) {
381
  // TODO(1841): Handle id overflow.
382
0
  std::unique_ptr<Instruction> new_label{new Instruction(
383
0
      context_, spv::Op::OpLabel, 0, context_->TakeNextId(), {})};
384
0
  std::unique_ptr<BasicBlock> new_exit_bb{new BasicBlock(std::move(new_label))};
385
0
  new_exit_bb->SetParent(&function_);
386
387
  // Save the id of the block before we move it.
388
0
  uint32_t new_merge_id = new_exit_bb->id();
389
390
  // Add the block the list of blocks to add, we want this merge block to be
391
  // right at the start of the new blocks.
392
0
  blocks_to_add_.push_back(std::move(new_exit_bb));
393
0
  BasicBlock* new_exit_bb_raw = blocks_to_add_[0].get();
394
0
  Instruction& original_conditional_branch = *loop_condition_block_->tail();
395
  // Duplicate the loop, providing access to the blocks of both loops.
396
  // This is a naked new due to the VS2013 requirement of not having unique
397
  // pointers in vectors, as it will be inserted into a vector with
398
  // loop_descriptor.AddLoop.
399
0
  std::unique_ptr<Loop> new_loop = MakeUnique<Loop>(*loop);
400
401
  // Clear the basic blocks of the new loop.
402
0
  new_loop->ClearBlocks();
403
404
0
  DuplicateLoop(loop, new_loop.get());
405
406
  // Add the blocks to the function.
407
0
  AddBlocksToFunction(loop->GetMergeBlock());
408
0
  blocks_to_add_.clear();
409
410
  // Create a new merge block for the first loop.
411
0
  InstructionBuilder builder{context_, new_exit_bb_raw};
412
  // Make the first loop branch to the second.
413
0
  builder.AddBranch(new_loop->GetHeaderBlock()->id());
414
415
0
  loop_condition_block_ = state_.new_condition_block;
416
0
  loop_induction_variable_ = state_.new_phi;
417
  // Unroll the new loop by the factor with the usual -1 to account for the
418
  // existing block iteration.
419
0
  Unroll(new_loop.get(), factor);
420
421
0
  LinkLastPhisToStart(new_loop.get());
422
0
  AddBlocksToLoop(new_loop.get());
423
424
  // Add the new merge block to the back of the list of blocks to be added. It
425
  // needs to be the last block added to maintain dominator order in the binary.
426
0
  blocks_to_add_.push_back(
427
0
      std::unique_ptr<BasicBlock>(new_loop->GetMergeBlock()));
428
429
  // Add the blocks to the function.
430
0
  AddBlocksToFunction(loop->GetMergeBlock());
431
432
  // Reset the usedef analysis.
433
0
  context_->InvalidateAnalysesExceptFor(
434
0
      IRContext::Analysis::kAnalysisLoopAnalysis);
435
0
  analysis::DefUseManager* def_use_manager = context_->get_def_use_mgr();
436
437
  // The loop condition.
438
0
  Instruction* condition_check = def_use_manager->GetDef(
439
0
      original_conditional_branch.GetSingleWordOperand(0));
440
441
  // This should have been checked by the LoopUtils::CanPerformUnroll function
442
  // before entering this.
443
0
  assert(loop->IsSupportedCondition(condition_check->opcode()));
444
445
  // We need to account for the initial body when calculating the remainder.
446
0
  int64_t remainder = Loop::GetResidualConditionValue(
447
0
      condition_check->opcode(), loop_init_value_, loop_step_value_,
448
0
      number_of_loop_iterations_, factor);
449
450
0
  assert(remainder > std::numeric_limits<int32_t>::min() &&
451
0
         remainder < std::numeric_limits<int32_t>::max());
452
453
0
  Instruction* new_constant = nullptr;
454
455
  // If the remainder is negative then we add a signed constant, otherwise just
456
  // add an unsigned constant.
457
0
  if (remainder < 0) {
458
0
    new_constant = builder.GetSintConstant(static_cast<int32_t>(remainder));
459
0
  } else {
460
0
    new_constant = builder.GetUintConstant(static_cast<int32_t>(remainder));
461
0
  }
462
463
0
  uint32_t constant_id = new_constant->result_id();
464
465
  // Update the condition check.
466
0
  condition_check->SetInOperand(1, {constant_id});
467
468
  // Update the next phi node. The phi will have a constant value coming in from
469
  // the preheader block. For the duplicated loop we need to update the constant
470
  // to be the amount of iterations covered by the first loop and the incoming
471
  // block to be the first loops new merge block.
472
0
  std::vector<Instruction*> new_inductions;
473
0
  new_loop->GetInductionVariables(new_inductions);
474
475
0
  std::vector<Instruction*> old_inductions;
476
0
  loop->GetInductionVariables(old_inductions);
477
0
  for (size_t index = 0; index < new_inductions.size(); ++index) {
478
0
    Instruction* new_induction = new_inductions[index];
479
0
    Instruction* old_induction = old_inductions[index];
480
    // Get the index of the loop initalizer, the value coming in from the
481
    // preheader.
482
0
    uint32_t initalizer_index =
483
0
        GetPhiIndexFromLabel(new_loop->GetPreHeaderBlock(), old_induction);
484
485
    // Replace the second loop initalizer with the phi from the first
486
0
    new_induction->SetInOperand(initalizer_index - 1,
487
0
                                {old_induction->result_id()});
488
0
    new_induction->SetInOperand(initalizer_index, {new_merge_id});
489
490
    // If the use of the first loop induction variable is outside of the loop
491
    // then replace that use with the second loop induction variable.
492
0
    uint32_t second_loop_induction = new_induction->result_id();
493
0
    auto replace_use_outside_of_loop = [loop, second_loop_induction](
494
0
                                           Instruction* user,
495
0
                                           uint32_t operand_index) {
496
0
      if (!loop->IsInsideLoop(user)) {
497
0
        user->SetOperand(operand_index, {second_loop_induction});
498
0
      }
499
0
    };
500
501
0
    context_->get_def_use_mgr()->ForEachUse(old_induction,
502
0
                                            replace_use_outside_of_loop);
503
0
  }
504
505
0
  context_->InvalidateAnalysesExceptFor(
506
0
      IRContext::Analysis::kAnalysisLoopAnalysis);
507
508
0
  context_->ReplaceAllUsesWith(loop->GetMergeBlock()->id(), new_merge_id);
509
510
0
  LoopDescriptor& loop_descriptor = *context_->GetLoopDescriptor(&function_);
511
512
0
  loop_descriptor.AddLoop(std::move(new_loop), loop->GetParent());
513
514
0
  RemoveDeadInstructions();
515
0
}
516
517
// Mark this loop as DontUnroll as it will already be unrolled and it may not
518
// be safe to unroll a previously partially unrolled loop.
519
2.18k
void LoopUnrollerUtilsImpl::MarkLoopControlAsDontUnroll(Loop* loop) const {
520
2.18k
  Instruction* loop_merge_inst = loop->GetHeaderBlock()->GetLoopMergeInst();
521
2.18k
  assert(loop_merge_inst &&
522
2.18k
         "Loop merge instruction could not be found after entering unroller "
523
2.18k
         "(should have exited before this)");
524
2.18k
  loop_merge_inst->SetInOperand(kLoopControlIndex,
525
2.18k
                                {kLoopControlDontUnrollIndex});
526
2.18k
}
527
528
// Duplicate the |loop| body |factor| - 1 number of times while keeping the loop
529
// backedge intact. This will leave the loop with |factor| number of bodies
530
// after accounting for the initial body.
531
2.18k
void LoopUnrollerUtilsImpl::Unroll(Loop* loop, size_t factor) {
532
  // If we unroll a loop partially it will not be safe to unroll it further.
533
  // This is due to the current method of calculating the number of loop
534
  // iterations.
535
2.18k
  MarkLoopControlAsDontUnroll(loop);
536
537
2.18k
  std::vector<Instruction*> inductions;
538
2.18k
  loop->GetInductionVariables(inductions);
539
2.18k
  state_ = LoopUnrollState{loop_induction_variable_, loop->GetLatchBlock(),
540
2.18k
                           loop_condition_block_, std::move(inductions)};
541
95.2k
  for (size_t i = 0; i < factor - 1; ++i) {
542
93.0k
    CopyBody(loop, true);
543
93.0k
  }
544
2.18k
}
545
546
2.18k
void LoopUnrollerUtilsImpl::RemoveDeadInstructions() {
547
  // Remove the dead instructions.
548
248k
  for (Instruction* inst : invalidated_instructions_) {
549
248k
    context_->KillInst(inst);
550
248k
  }
551
2.18k
}
552
553
2.18k
void LoopUnrollerUtilsImpl::ReplaceInductionUseWithFinalValue(Loop* loop) {
554
2.18k
  context_->InvalidateAnalysesExceptFor(
555
2.18k
      IRContext::Analysis::kAnalysisLoopAnalysis |
556
2.18k
      IRContext::Analysis::kAnalysisDefUse |
557
2.18k
      IRContext::Analysis::kAnalysisInstrToBlockMapping);
558
559
2.18k
  std::vector<Instruction*> inductions;
560
2.18k
  loop->GetInductionVariables(inductions);
561
562
5.81k
  for (size_t index = 0; index < inductions.size(); ++index) {
563
3.63k
    uint32_t trip_step_id = GetPhiDefID(state_.previous_phis_[index],
564
3.63k
                                        state_.previous_latch_block_->id());
565
3.63k
    context_->ReplaceAllUsesWith(inductions[index]->result_id(), trip_step_id);
566
3.63k
    invalidated_instructions_.push_back(inductions[index]);
567
3.63k
  }
568
2.18k
}
569
570
// Fully unroll the loop by partially unrolling it by the number of loop
571
// iterations minus one for the body already accounted for.
572
2.18k
void LoopUnrollerUtilsImpl::FullyUnroll(Loop* loop) {
573
  // We unroll the loop by number of iterations in the loop.
574
2.18k
  Unroll(loop, number_of_loop_iterations_);
575
576
  // The first condition block is preserved until now so it can be copied.
577
2.18k
  FoldConditionBlock(loop_condition_block_, 1);
578
579
  // Delete the OpLoopMerge and remove the backedge to the header.
580
2.18k
  CloseUnrolledLoop(loop);
581
582
  // Mark the loop for later deletion. This allows us to preserve the loop
583
  // iterators but still disregard dead loops.
584
2.18k
  loop->MarkLoopForRemoval();
585
586
  // If the loop has a parent add the new blocks to the parent.
587
2.18k
  if (loop->GetParent()) {
588
472
    AddBlocksToLoop(loop->GetParent());
589
472
  }
590
591
  // Add the blocks to the function.
592
2.18k
  AddBlocksToFunction(loop->GetMergeBlock());
593
594
2.18k
  ReplaceInductionUseWithFinalValue(loop);
595
596
2.18k
  RemoveDeadInstructions();
597
  // Invalidate all analyses.
598
2.18k
  context_->InvalidateAnalysesExceptFor(
599
2.18k
      IRContext::Analysis::kAnalysisLoopAnalysis |
600
2.18k
      IRContext::Analysis::kAnalysisDefUse);
601
2.18k
}
602
603
947k
void LoopUnrollerUtilsImpl::KillDebugDeclares(BasicBlock* bb) {
604
  // We cannot kill an instruction inside BasicBlock::ForEachInst()
605
  // because it will generate dangling pointers. We use |to_be_killed|
606
  // to kill them after the loop.
607
947k
  std::vector<Instruction*> to_be_killed;
608
609
5.95M
  bb->ForEachInst([&to_be_killed, this](Instruction* inst) {
610
5.95M
    if (context_->get_debug_info_mgr()->IsDebugDeclare(inst)) {
611
0
      to_be_killed.push_back(inst);
612
0
    }
613
5.95M
  });
614
947k
  for (auto* inst : to_be_killed) context_->KillInst(inst);
615
947k
}
616
617
// Copy a given basic block, give it a new result_id, and store the new block
618
// and the id mapping in the state. |preserve_instructions| is used to determine
619
// whether or not this function should edit instructions other than the
620
// |result_id|.
621
void LoopUnrollerUtilsImpl::CopyBasicBlock(Loop* loop, const BasicBlock* itr,
622
947k
                                           bool preserve_instructions) {
623
  // Clone the block exactly, including the IDs.
624
947k
  BasicBlock* basic_block = itr->Clone(context_);
625
947k
  basic_block->SetParent(itr->GetParent());
626
627
  // We do not want to duplicate DebugDeclare.
628
947k
  KillDebugDeclares(basic_block);
629
630
  // Assign each result a new unique ID and keep a mapping of the old ids to
631
  // the new ones.
632
947k
  AssignNewResultIds(basic_block);
633
634
  // If this is the continue block we are copying.
635
947k
  if (itr == loop->GetContinueBlock()) {
636
    // Make the OpLoopMerge point to this block for the continue.
637
93.0k
    if (!preserve_instructions) {
638
93.0k
      Instruction* merge_inst = loop->GetHeaderBlock()->GetLoopMergeInst();
639
93.0k
      merge_inst->SetInOperand(1, {basic_block->id()});
640
93.0k
      context_->UpdateDefUse(merge_inst);
641
93.0k
    }
642
643
93.0k
    state_.new_continue_block = basic_block;
644
93.0k
  }
645
646
  // If this is the header block we are copying.
647
947k
  if (itr == loop->GetHeaderBlock()) {
648
93.0k
    state_.new_header_block = basic_block;
649
650
93.0k
    if (!preserve_instructions) {
651
      // Remove the loop merge instruction if it exists.
652
93.0k
      Instruction* merge_inst = basic_block->GetLoopMergeInst();
653
93.0k
      if (merge_inst) invalidated_instructions_.push_back(merge_inst);
654
93.0k
    }
655
93.0k
  }
656
657
  // If this is the latch block being copied, record it in the state.
658
947k
  if (itr == loop->GetLatchBlock()) state_.new_latch_block = basic_block;
659
660
  // If this is the condition block we are copying.
661
947k
  if (itr == loop_condition_block_) {
662
93.0k
    state_.new_condition_block = basic_block;
663
93.0k
  }
664
665
  // Add this block to the list of blocks to add to the function at the end of
666
  // the unrolling process.
667
947k
  blocks_to_add_.push_back(std::unique_ptr<BasicBlock>(basic_block));
668
669
  // Keep tracking the old block via a map.
670
947k
  state_.new_blocks[itr->id()] = basic_block;
671
947k
}
672
673
93.0k
void LoopUnrollerUtilsImpl::CopyBody(Loop* loop, bool eliminate_conditions) {
674
  // Copy each basic block in the loop, give them new ids, and save state
675
  // information.
676
947k
  for (const BasicBlock* itr : loop_blocks_inorder_) {
677
947k
    CopyBasicBlock(loop, itr, false);
678
947k
  }
679
680
  // Set the previous latch block to point to the new header.
681
93.0k
  Instruction* latch_branch = state_.previous_latch_block_->terminator();
682
93.0k
  latch_branch->SetInOperand(0, {state_.new_header_block->id()});
683
93.0k
  context_->UpdateDefUse(latch_branch);
684
685
  // As the algorithm copies the original loop blocks exactly, the tail of the
686
  // latch block on iterations after the first one will be a branch to the new
687
  // header and not the actual loop header. The last continue block in the loop
688
  // should always be a backedge to the global header.
689
93.0k
  Instruction* new_latch_branch = state_.new_latch_block->terminator();
690
93.0k
  new_latch_branch->SetInOperand(0, {loop->GetHeaderBlock()->id()});
691
93.0k
  context_->AnalyzeUses(new_latch_branch);
692
693
93.0k
  std::vector<Instruction*> inductions;
694
93.0k
  loop->GetInductionVariables(inductions);
695
242k
  for (size_t index = 0; index < inductions.size(); ++index) {
696
149k
    Instruction* primary_copy = inductions[index];
697
698
149k
    assert(primary_copy->result_id() != 0);
699
149k
    Instruction* induction_clone =
700
149k
        state_.ids_to_new_inst[state_.new_inst[primary_copy->result_id()]];
701
702
149k
    state_.new_phis_.push_back(induction_clone);
703
149k
    assert(induction_clone->result_id() != 0);
704
705
149k
    if (!state_.previous_phis_.empty()) {
706
149k
      state_.new_inst[primary_copy->result_id()] = GetPhiDefID(
707
149k
          state_.previous_phis_[index], state_.previous_latch_block_->id());
708
149k
    } else {
709
      // Do not replace the first phi block ids.
710
0
      state_.new_inst[primary_copy->result_id()] = primary_copy->result_id();
711
0
    }
712
149k
  }
713
714
93.0k
  if (eliminate_conditions &&
715
93.0k
      state_.new_condition_block != loop_condition_block_) {
716
93.0k
    FoldConditionBlock(state_.new_condition_block, 1);
717
93.0k
  }
718
719
  // Only reference to the header block is the backedge in the latch block,
720
  // don't change this.
721
93.0k
  state_.new_inst[loop->GetHeaderBlock()->id()] = loop->GetHeaderBlock()->id();
722
723
947k
  for (auto& pair : state_.new_blocks) {
724
947k
    RemapOperands(pair.second);
725
947k
  }
726
727
93.0k
  for (Instruction* dead_phi : state_.new_phis_)
728
149k
    invalidated_instructions_.push_back(dead_phi);
729
730
  // Swap the state so the new is now the previous.
731
93.0k
  state_.NextIterationState();
732
93.0k
}
733
734
uint32_t LoopUnrollerUtilsImpl::GetPhiDefID(const Instruction* phi,
735
156k
                                            uint32_t label) const {
736
309k
  for (uint32_t operand = 3; operand < phi->NumOperands(); operand += 2) {
737
309k
    if (phi->GetSingleWordOperand(operand) == label) {
738
156k
      return phi->GetSingleWordOperand(operand - 1);
739
156k
    }
740
309k
  }
741
0
  assert(false && "Could not find a phi index matching the provided label");
742
0
  return 0;
743
0
}
744
745
void LoopUnrollerUtilsImpl::FoldConditionBlock(BasicBlock* condition_block,
746
95.2k
                                               uint32_t operand_label) {
747
  // Remove the old conditional branch to the merge and continue blocks.
748
95.2k
  Instruction& old_branch = *condition_block->tail();
749
95.2k
  uint32_t new_target = old_branch.GetSingleWordOperand(operand_label);
750
751
95.2k
  DebugScope scope = old_branch.GetDebugScope();
752
95.2k
  const std::vector<Instruction> lines = old_branch.dbg_line_insts();
753
754
95.2k
  context_->KillInst(&old_branch);
755
  // Add the new unconditional branch to the merge block.
756
95.2k
  InstructionBuilder builder(
757
95.2k
      context_, condition_block,
758
95.2k
      IRContext::Analysis::kAnalysisDefUse |
759
95.2k
          IRContext::Analysis::kAnalysisInstrToBlockMapping);
760
95.2k
  Instruction* new_branch = builder.AddBranch(new_target);
761
762
95.2k
  if (!lines.empty()) new_branch->AddDebugLine(&lines.back());
763
95.2k
  new_branch->SetDebugScope(scope);
764
95.2k
}
765
766
2.18k
void LoopUnrollerUtilsImpl::CloseUnrolledLoop(Loop* loop) {
767
  // Remove the OpLoopMerge instruction from the function.
768
2.18k
  Instruction* merge_inst = loop->GetHeaderBlock()->GetLoopMergeInst();
769
2.18k
  invalidated_instructions_.push_back(merge_inst);
770
771
  // Remove the final backedge to the header and make it point instead to the
772
  // merge block.
773
2.18k
  Instruction* latch_instruction = state_.previous_latch_block_->terminator();
774
2.18k
  latch_instruction->SetInOperand(0, {loop->GetMergeBlock()->id()});
775
2.18k
  context_->UpdateDefUse(latch_instruction);
776
777
  // Remove all induction variables as the phis will now be invalid. Replace all
778
  // uses with the constant initializer value (all uses of phis will be in
779
  // the first iteration with the subsequent phis already having been removed).
780
2.18k
  std::vector<Instruction*> inductions;
781
2.18k
  loop->GetInductionVariables(inductions);
782
783
  // We can use the state instruction mechanism to replace all internal loop
784
  // values within the first loop trip (as the subsequent ones will be updated
785
  // by the copy function) with the value coming in from the preheader and then
786
  // use context ReplaceAllUsesWith for the uses outside the loop with the final
787
  // trip phi value.
788
2.18k
  state_.new_inst.clear();
789
3.63k
  for (Instruction* induction : inductions) {
790
3.63k
    uint32_t initalizer_id =
791
3.63k
        GetPhiDefID(induction, loop->GetPreHeaderBlock()->id());
792
793
3.63k
    state_.new_inst[induction->result_id()] = initalizer_id;
794
3.63k
  }
795
796
25.2k
  for (BasicBlock* block : loop_blocks_inorder_) {
797
25.2k
    RemapOperands(block);
798
25.2k
  }
799
947k
  for (auto& block_itr : blocks_to_add_) {
800
947k
    RemapOperands(block_itr.get());
801
947k
  }
802
803
  // Rewrite the last phis, since they may still reference the original phi.
804
3.63k
  for (Instruction* last_phi : state_.previous_phis_) {
805
3.63k
    RemapOperands(last_phi);
806
3.63k
  }
807
2.18k
}
808
809
// Uses the first loop to create a copy of the loop with new IDs.
810
0
void LoopUnrollerUtilsImpl::DuplicateLoop(Loop* old_loop, Loop* new_loop) {
811
0
  std::vector<BasicBlock*> new_block_order;
812
813
  // Copy every block in the old loop.
814
0
  for (const BasicBlock* itr : loop_blocks_inorder_) {
815
0
    CopyBasicBlock(old_loop, itr, true);
816
0
    new_block_order.push_back(blocks_to_add_.back().get());
817
0
  }
818
819
  // Clone the merge block, give it a new id and record it in the state.
820
0
  BasicBlock* new_merge = old_loop->GetMergeBlock()->Clone(context_);
821
0
  new_merge->SetParent(old_loop->GetMergeBlock()->GetParent());
822
0
  AssignNewResultIds(new_merge);
823
0
  state_.new_blocks[old_loop->GetMergeBlock()->id()] = new_merge;
824
825
  // Remap the operands of every instruction in the loop to point to the new
826
  // copies.
827
0
  for (auto& pair : state_.new_blocks) {
828
0
    RemapOperands(pair.second);
829
0
  }
830
831
0
  loop_blocks_inorder_ = std::move(new_block_order);
832
833
0
  AddBlocksToLoop(new_loop);
834
835
0
  new_loop->SetHeaderBlock(state_.new_header_block);
836
0
  new_loop->SetContinueBlock(state_.new_continue_block);
837
0
  new_loop->SetLatchBlock(state_.new_latch_block);
838
0
  new_loop->SetMergeBlock(new_merge);
839
0
}
840
841
// Whenever the utility copies a block it stores it in a temporary buffer, this
842
// function adds the buffer into the Function. The blocks will be inserted
843
// after the block |insert_point|.
844
void LoopUnrollerUtilsImpl::AddBlocksToFunction(
845
2.18k
    const BasicBlock* insert_point) {
846
2.18k
  for (auto basic_block_iterator = function_.begin();
847
32.4k
       basic_block_iterator != function_.end(); ++basic_block_iterator) {
848
32.4k
    if (basic_block_iterator->id() == insert_point->id()) {
849
2.18k
      basic_block_iterator.InsertBefore(&blocks_to_add_);
850
2.18k
      return;
851
2.18k
    }
852
32.4k
  }
853
854
0
  assert(
855
0
      false &&
856
0
      "Could not add basic blocks to function as insert point was not found.");
857
0
}
858
859
// Assign all result_ids in |basic_block| instructions to new IDs and preserve
860
// the mapping of new ids to old ones.
861
947k
void LoopUnrollerUtilsImpl::AssignNewResultIds(BasicBlock* basic_block) {
862
947k
  analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
863
864
  // Label instructions aren't covered by normal traversal of the
865
  // instructions.
866
  // TODO(1841): Handle id overflow.
867
947k
  uint32_t new_label_id = context_->TakeNextId();
868
869
  // Assign a new id to the label.
870
947k
  state_.new_inst[basic_block->GetLabelInst()->result_id()] = new_label_id;
871
947k
  basic_block->GetLabelInst()->SetResultId(new_label_id);
872
947k
  def_use_mgr->AnalyzeInstDefUse(basic_block->GetLabelInst());
873
874
5.00M
  for (Instruction& inst : *basic_block) {
875
    // Do def/use analysis on new lines
876
5.00M
    for (auto& line : inst.dbg_line_insts())
877
592
      def_use_mgr->AnalyzeInstDefUse(&line);
878
879
5.00M
    uint32_t old_id = inst.result_id();
880
881
    // Ignore stores etc.
882
5.00M
    if (old_id == 0) {
883
1.32M
      continue;
884
1.32M
    }
885
886
    // Give the instruction a new id.
887
    // TODO(1841): Handle id overflow.
888
3.68M
    inst.SetResultId(context_->TakeNextId());
889
3.68M
    def_use_mgr->AnalyzeInstDef(&inst);
890
891
    // Save the mapping of old_id -> new_id.
892
3.68M
    state_.new_inst[old_id] = inst.result_id();
893
    // Check if this instruction is the induction variable.
894
3.68M
    if (loop_induction_variable_->result_id() == old_id) {
895
      // Save a pointer to the new copy of it.
896
93.0k
      state_.new_phi = &inst;
897
93.0k
    }
898
3.68M
    state_.ids_to_new_inst[inst.result_id()] = &inst;
899
3.68M
  }
900
947k
}
901
902
10.1M
void LoopUnrollerUtilsImpl::RemapOperands(Instruction* inst) {
903
17.7M
  auto remap_operands_to_new_ids = [this](uint32_t* id) {
904
17.7M
    auto itr = state_.new_inst.find(*id);
905
906
17.7M
    if (itr != state_.new_inst.end()) {
907
5.68M
      *id = itr->second;
908
5.68M
    }
909
17.7M
  };
910
911
10.1M
  inst->ForEachInId(remap_operands_to_new_ids);
912
10.1M
  context_->AnalyzeUses(inst);
913
10.1M
}
914
915
1.91M
void LoopUnrollerUtilsImpl::RemapOperands(BasicBlock* basic_block) {
916
10.1M
  for (Instruction& inst : *basic_block) {
917
10.1M
    RemapOperands(&inst);
918
10.1M
  }
919
1.91M
}
920
921
// Generate the ordered list of basic blocks in the |loop| and cache it for
922
// later use.
923
2.18k
void LoopUnrollerUtilsImpl::ComputeLoopOrderedBlocks(Loop* loop) {
924
2.18k
  loop_blocks_inorder_.clear();
925
2.18k
  loop->ComputeLoopStructuredOrder(&loop_blocks_inorder_);
926
2.18k
}
927
928
// Adds the blocks_to_add_ to both the loop and to the parent.
929
472
void LoopUnrollerUtilsImpl::AddBlocksToLoop(Loop* loop) const {
930
  // Add the blocks to this loop.
931
29.3k
  for (auto& block_itr : blocks_to_add_) {
932
29.3k
    loop->AddBasicBlock(block_itr.get());
933
29.3k
  }
934
935
  // Add the blocks to the parent as well.
936
472
  if (loop->GetParent()) AddBlocksToLoop(loop->GetParent());
937
472
}
938
939
0
void LoopUnrollerUtilsImpl::LinkLastPhisToStart(Loop* loop) const {
940
0
  std::vector<Instruction*> inductions;
941
0
  loop->GetInductionVariables(inductions);
942
943
0
  for (size_t i = 0; i < inductions.size(); ++i) {
944
0
    Instruction* last_phi_in_block = state_.previous_phis_[i];
945
946
0
    uint32_t phi_index =
947
0
        GetPhiIndexFromLabel(state_.previous_latch_block_, last_phi_in_block);
948
0
    uint32_t phi_variable =
949
0
        last_phi_in_block->GetSingleWordInOperand(phi_index - 1);
950
0
    uint32_t phi_label = last_phi_in_block->GetSingleWordInOperand(phi_index);
951
952
0
    Instruction* phi = inductions[i];
953
0
    phi->SetInOperand(phi_index - 1, {phi_variable});
954
0
    phi->SetInOperand(phi_index, {phi_label});
955
0
  }
956
0
}
957
958
// Duplicate the |loop| body |factor| number of times while keeping the loop
959
// backedge intact.
960
0
void LoopUnrollerUtilsImpl::PartiallyUnroll(Loop* loop, size_t factor) {
961
0
  Unroll(loop, factor);
962
0
  LinkLastPhisToStart(loop);
963
0
  AddBlocksToLoop(loop);
964
0
  AddBlocksToFunction(loop->GetMergeBlock());
965
0
  RemoveDeadInstructions();
966
0
}
967
968
/*
969
 * End LoopUtilsImpl.
970
 */
971
972
}  // namespace
973
974
/*
975
 *
976
 *  Begin Utils.
977
 *
978
 * */
979
980
6.01k
bool LoopUtils::CanPerformUnroll() {
981
  // The loop is expected to be in structured order.
982
6.01k
  if (!loop_->GetHeaderBlock()->GetMergeInst()) {
983
0
    return false;
984
0
  }
985
986
  // Find check the loop has a condition we can find and evaluate.
987
6.01k
  const BasicBlock* condition = loop_->FindConditionBlock();
988
6.01k
  if (!condition) return false;
989
990
  // Check that we can find and process the induction variable.
991
5.24k
  const Instruction* induction = loop_->FindConditionVariable(condition);
992
5.24k
  if (!induction || induction->opcode() != spv::Op::OpPhi) return false;
993
994
  // Check that we can find the number of loop iterations.
995
4.53k
  if (!loop_->FindNumberOfIterations(induction, &*condition->ctail(), nullptr))
996
0
    return false;
997
998
4.53k
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
999
  // ClusterFuzz/OSS-Fuzz is likely to yield examples with very high loop
1000
  // iteration counts. This can cause timeouts and memouts during fuzzing that
1001
  // are not classed as bugs. To avoid this noise, loop unrolling is not applied
1002
  // to loops with large iteration counts when fuzzing.
1003
4.53k
  constexpr size_t kFuzzerIterationLimit = 100;
1004
4.53k
  size_t num_iterations;
1005
4.53k
  loop_->FindNumberOfIterations(induction, &*condition->ctail(),
1006
4.53k
                                &num_iterations);
1007
4.53k
  if (num_iterations > kFuzzerIterationLimit) {
1008
152
    return false;
1009
152
  }
1010
4.38k
#endif
1011
1012
  // Make sure the latch block is a unconditional branch to the header
1013
  // block.
1014
4.38k
  const Instruction& branch = *loop_->GetLatchBlock()->ctail();
1015
4.38k
  bool branching_assumption =
1016
4.38k
      branch.opcode() == spv::Op::OpBranch &&
1017
4.38k
      branch.GetSingleWordInOperand(0) == loop_->GetHeaderBlock()->id();
1018
4.38k
  if (!branching_assumption) {
1019
0
    return false;
1020
0
  }
1021
1022
4.38k
  std::vector<Instruction*> inductions;
1023
4.38k
  loop_->GetInductionVariables(inductions);
1024
1025
  // Ban breaks within the loop.
1026
4.38k
  const std::vector<uint32_t>& merge_block_preds =
1027
4.38k
      context_->cfg()->preds(loop_->GetMergeBlock()->id());
1028
4.38k
  if (merge_block_preds.size() != 1) {
1029
0
    return false;
1030
0
  }
1031
1032
  // Ban continues within the loop.
1033
4.38k
  const std::vector<uint32_t>& continue_block_preds =
1034
4.38k
      context_->cfg()->preds(loop_->GetContinueBlock()->id());
1035
4.38k
  if (continue_block_preds.size() != 1) {
1036
4
    return false;
1037
4
  }
1038
1039
  // Ban returns in the loop.
1040
  // Iterate over all the blocks within the loop and check that none of them
1041
  // exit the loop.
1042
50.5k
  for (uint32_t label_id : loop_->GetBlocks()) {
1043
50.5k
    const BasicBlock* block = context_->cfg()->block(label_id);
1044
50.5k
    if (block->ctail()->opcode() == spv::Op::OpKill ||
1045
50.5k
        block->ctail()->opcode() == spv::Op::OpReturn ||
1046
50.5k
        block->ctail()->opcode() == spv::Op::OpReturnValue ||
1047
50.5k
        block->ctail()->opcode() == spv::Op::OpTerminateInvocation) {
1048
0
      return false;
1049
0
    }
1050
50.5k
  }
1051
  // Can only unroll inner loops.
1052
4.38k
  if (!loop_->AreAllChildrenMarkedForRemoval()) {
1053
17
    return false;
1054
17
  }
1055
1056
4.36k
  return true;
1057
4.38k
}
1058
1059
0
bool LoopUtils::PartiallyUnroll(size_t factor) {
1060
0
  if (factor == 1 || !CanPerformUnroll()) return false;
1061
1062
  // Create the unroller utility.
1063
0
  LoopUnrollerUtilsImpl unroller{context_,
1064
0
                                 loop_->GetHeaderBlock()->GetParent()};
1065
0
  unroller.Init(loop_);
1066
1067
  // If the unrolling factor is larger than or the same size as the loop just
1068
  // fully unroll the loop.
1069
0
  if (factor >= unroller.GetLoopIterationCount()) {
1070
0
    unroller.FullyUnroll(loop_);
1071
0
    return true;
1072
0
  }
1073
1074
  // If the loop unrolling factor is an residual number of iterations we need to
1075
  // let run the loop for the residual part then let it branch into the unrolled
1076
  // remaining part. We add one when calucating the remainder to take into
1077
  // account the one iteration already in the loop.
1078
0
  if (unroller.GetLoopIterationCount() % factor != 0) {
1079
0
    unroller.PartiallyUnrollResidualFactor(loop_, factor);
1080
0
  } else {
1081
0
    unroller.PartiallyUnroll(loop_, factor);
1082
0
  }
1083
1084
0
  return true;
1085
0
}
1086
1087
2.18k
bool LoopUtils::FullyUnroll() {
1088
2.18k
  if (!CanPerformUnroll()) return false;
1089
1090
2.18k
  std::vector<Instruction*> inductions;
1091
2.18k
  loop_->GetInductionVariables(inductions);
1092
1093
2.18k
  LoopUnrollerUtilsImpl unroller{context_,
1094
2.18k
                                 loop_->GetHeaderBlock()->GetParent()};
1095
1096
2.18k
  unroller.Init(loop_);
1097
2.18k
  unroller.FullyUnroll(loop_);
1098
1099
2.18k
  return true;
1100
2.18k
}
1101
1102
0
void LoopUtils::Finalize() {
1103
  // Clean up the loop descriptor to preserve the analysis.
1104
1105
0
  LoopDescriptor* LD = context_->GetLoopDescriptor(&function_);
1106
0
  LD->PostModificationCleanup();
1107
0
}
1108
1109
/*
1110
 *
1111
 * Begin Pass.
1112
 *
1113
 */
1114
1115
10.7k
Pass::Status LoopUnroller::Process() {
1116
10.7k
  bool changed = false;
1117
10.7k
  for (Function& f : *context()->module()) {
1118
9.97k
    if (f.IsDeclaration()) {
1119
0
      continue;
1120
0
    }
1121
1122
9.97k
    LoopDescriptor* LD = context()->GetLoopDescriptor(&f);
1123
20.4k
    for (Loop& loop : *LD) {
1124
20.4k
      LoopUtils loop_utils{context(), &loop};
1125
20.4k
      if (!loop.HasUnrollLoopControl() || !loop_utils.CanPerformUnroll()) {
1126
18.3k
        continue;
1127
18.3k
      }
1128
1129
2.18k
      if (fully_unroll_) {
1130
2.18k
        loop_utils.FullyUnroll();
1131
2.18k
      } else {
1132
0
        loop_utils.PartiallyUnroll(unroll_factor_);
1133
0
      }
1134
2.18k
      changed = true;
1135
2.18k
    }
1136
9.97k
    LD->PostModificationCleanup();
1137
9.97k
  }
1138
1139
10.7k
  return changed ? Status::SuccessWithChange : Status::SuccessWithoutChange;
1140
10.7k
}
1141
1142
}  // namespace opt
1143
}  // namespace spvtools