Coverage Report

Created: 2025-07-23 06:18

/src/spirv-tools/source/opt/loop_unroller.cpp
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) 2018 Google LLC.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
#include "source/opt/loop_unroller.h"
16
17
#include <limits>
18
#include <memory>
19
#include <unordered_map>
20
#include <utility>
21
#include <vector>
22
23
#include "source/opt/ir_builder.h"
24
#include "source/opt/loop_utils.h"
25
26
// Implements loop util unrolling functionality for fully and partially
27
// unrolling loops. Given a factor it will duplicate the loop that many times,
28
// appending each one to the end of the old loop and removing backedges, to
29
// create a new unrolled loop.
30
//
31
// 1 - User calls LoopUtils::FullyUnroll or LoopUtils::PartiallyUnroll with a
32
// loop they wish to unroll. LoopUtils::CanPerformUnroll is used to
33
// validate that a given loop can be unrolled. That method (along with the
34
// constructor of loop) checks that the IR is in the expected canonicalised
35
// format.
36
//
37
// 2 - The LoopUtils methods create a LoopUnrollerUtilsImpl object to actually
38
// perform the unrolling. This implements helper methods to copy the loop basic
39
// blocks and remap the ids of instructions used inside them.
40
//
41
// 3 - The core of LoopUnrollerUtilsImpl is the Unroll method, this method
42
// actually performs the loop duplication. It does this by creating a
43
// LoopUnrollState object and then copying the loop as given by the factor
44
// parameter. The LoopUnrollState object retains the state of the unroller
45
// between the loop body copies as each iteration needs information on the last
46
// to adjust the phi induction variable, adjust the OpLoopMerge instruction in
47
// the main loop header, and change the previous continue block to point to the
48
// new header and the new continue block to the main loop header.
49
//
50
// 4 - If the loop is to be fully unrolled then it is simply closed after step
51
// 3, with the OpLoopMerge being deleted, the backedge removed, and the
52
// condition blocks folded.
53
//
54
// 5 - If it is being partially unrolled: if the unrolling factor leaves the
55
// loop with an even number of bodies with respect to the number of loop
56
// iterations then step 3 is all that is needed. If it is uneven then we need to
57
// duplicate the loop completely and unroll the duplicated loop to cover the
58
// residual part and adjust the first loop to cover only the "even" part. For
59
// instance if you request an unroll factor of 3 on a loop with 10 iterations
60
// then copying the body three times would leave you with three bodies in the
61
// loop
62
// where the loop still iterates over each 4 times. So we make two loops one
63
// iterating once then a second loop of three iterating 3 times.
64
65
namespace spvtools {
66
namespace opt {
67
namespace {
68
69
// Loop control constant value for DontUnroll flag.
70
constexpr uint32_t kLoopControlDontUnrollIndex = 2;
71
72
// Operand index of the loop control parameter of the OpLoopMerge.
73
constexpr uint32_t kLoopControlIndex = 2;
74
75
// This utility class encapsulates some of the state we need to maintain between
76
// loop unrolls. Specifically it maintains key blocks and the induction variable
77
// in the current loop duplication step and the blocks from the previous one.
78
// This is because each step of the unroll needs to use data from both the
79
// preceding step and the original loop.
80
struct LoopUnrollState {
81
  LoopUnrollState()
82
3.13k
      : previous_phi_(nullptr),
83
3.13k
        previous_latch_block_(nullptr),
84
3.13k
        previous_condition_block_(nullptr),
85
3.13k
        new_phi(nullptr),
86
3.13k
        new_continue_block(nullptr),
87
3.13k
        new_condition_block(nullptr),
88
3.13k
        new_header_block(nullptr) {}
89
90
  // Initialize from the loop descriptor class.
91
  LoopUnrollState(Instruction* induction, BasicBlock* latch_block,
92
                  BasicBlock* condition, std::vector<Instruction*>&& phis)
93
3.13k
      : previous_phi_(induction),
94
3.13k
        previous_latch_block_(latch_block),
95
3.13k
        previous_condition_block_(condition),
96
3.13k
        new_phi(nullptr),
97
3.13k
        new_continue_block(nullptr),
98
3.13k
        new_condition_block(nullptr),
99
3.13k
        new_header_block(nullptr) {
100
3.13k
    previous_phis_ = std::move(phis);
101
3.13k
  }
102
103
  // Swap the state so that the new nodes are now the previous nodes.
104
112k
  void NextIterationState() {
105
112k
    previous_phi_ = new_phi;
106
112k
    previous_latch_block_ = new_latch_block;
107
112k
    previous_condition_block_ = new_condition_block;
108
112k
    previous_phis_ = std::move(new_phis_);
109
110
    // Clear new nodes.
111
112k
    new_phi = nullptr;
112
112k
    new_continue_block = nullptr;
113
112k
    new_condition_block = nullptr;
114
112k
    new_header_block = nullptr;
115
112k
    new_latch_block = nullptr;
116
117
    // Clear new block/instruction maps.
118
112k
    new_blocks.clear();
119
112k
    new_inst.clear();
120
112k
    ids_to_new_inst.clear();
121
112k
  }
122
123
  // The induction variable from the immediately preceding loop body.
124
  Instruction* previous_phi_;
125
126
  // All the phi nodes from the previous loop iteration.
127
  std::vector<Instruction*> previous_phis_;
128
129
  std::vector<Instruction*> new_phis_;
130
131
  // The previous latch block. The backedge will be removed from this and
132
  // added to the new latch block.
133
  BasicBlock* previous_latch_block_;
134
135
  // The previous condition block. This may be folded to flatten the loop.
136
  BasicBlock* previous_condition_block_;
137
138
  // The new induction variable.
139
  Instruction* new_phi;
140
141
  // The new continue block.
142
  BasicBlock* new_continue_block;
143
144
  // The new condition block.
145
  BasicBlock* new_condition_block;
146
147
  // The new header block.
148
  BasicBlock* new_header_block;
149
150
  // The new latch block.
151
  BasicBlock* new_latch_block;
152
153
  // A mapping of new block ids to the original blocks which they were copied
154
  // from.
155
  std::unordered_map<uint32_t, BasicBlock*> new_blocks;
156
157
  // A mapping of the original instruction ids to the instruction ids to their
158
  // copies.
159
  std::unordered_map<uint32_t, uint32_t> new_inst;
160
161
  std::unordered_map<uint32_t, Instruction*> ids_to_new_inst;
162
};
163
164
// This class implements the actual unrolling. It uses a LoopUnrollState to
165
// maintain the state of the unrolling in between steps.
166
class LoopUnrollerUtilsImpl {
167
 public:
168
  using BasicBlockListTy = std::vector<std::unique_ptr<BasicBlock>>;
169
170
  LoopUnrollerUtilsImpl(IRContext* c, Function* function)
171
3.13k
      : context_(c),
172
3.13k
        function_(*function),
173
3.13k
        loop_condition_block_(nullptr),
174
3.13k
        loop_induction_variable_(nullptr),
175
3.13k
        number_of_loop_iterations_(0),
176
3.13k
        loop_step_value_(0),
177
3.13k
        loop_init_value_(0) {}
178
179
  // Unroll the |loop| by given |factor| by copying the whole body |factor|
180
  // times. The resulting basicblock structure will remain a loop.
181
  void PartiallyUnroll(Loop*, size_t factor);
182
183
  // If partially unrolling the |loop| would leave the loop with too many bodies
184
  // for its number of iterations then this method should be used. This method
185
  // will duplicate the |loop| completely, making the duplicated loop the
186
  // successor of the original's merge block. The original loop will have its
187
  // condition changed to loop over the residual part and the duplicate will be
188
  // partially unrolled. The resulting structure will be two loops.
189
  void PartiallyUnrollResidualFactor(Loop* loop, size_t factor);
190
191
  // Fully unroll the |loop| by copying the full body by the total number of
192
  // loop iterations, folding all conditions, and removing the backedge from the
193
  // continue block to the header.
194
  void FullyUnroll(Loop* loop);
195
196
  // Get the ID of the variable in the |phi| paired with |label|.
197
  uint32_t GetPhiDefID(const Instruction* phi, uint32_t label) const;
198
199
  // Close the loop by removing the OpLoopMerge from the |loop| header block and
200
  // making the backedge point to the merge block.
201
  void CloseUnrolledLoop(Loop* loop);
202
203
  // Remove the OpConditionalBranch instruction inside |conditional_block| used
204
  // to branch to either exit or continue the loop and replace it with an
205
  // unconditional OpBranch to block |new_target|.
206
  void FoldConditionBlock(BasicBlock* condtion_block, uint32_t new_target);
207
208
  // Add all blocks_to_add_ to function_ at the |insert_point|.
209
  void AddBlocksToFunction(const BasicBlock* insert_point);
210
211
  // Duplicates the |old_loop|, cloning each body and remapping the ids without
212
  // removing instructions or changing relative structure. Result will be stored
213
  // in |new_loop|.
214
  void DuplicateLoop(Loop* old_loop, Loop* new_loop);
215
216
0
  inline size_t GetLoopIterationCount() const {
217
0
    return number_of_loop_iterations_;
218
0
  }
219
220
  // Extracts the initial state information from the |loop|.
221
  void Init(Loop* loop);
222
223
  // Replace the uses of each induction variable outside the loop with the final
224
  // value of the induction variable before the loop exit. To reflect the proper
225
  // state of a fully unrolled loop.
226
  void ReplaceInductionUseWithFinalValue(Loop* loop);
227
228
  // Remove all the instructions in the invalidated_instructions_ vector.
229
  void RemoveDeadInstructions();
230
231
  // Replace any use of induction variables outwith the loop with the final
232
  // value of the induction variable in the unrolled loop.
233
  void ReplaceOutsideLoopUseWithFinalValue(Loop* loop);
234
235
  // Set the LoopControl operand of the OpLoopMerge instruction to be
236
  // DontUnroll.
237
  void MarkLoopControlAsDontUnroll(Loop* loop) const;
238
239
 private:
240
  // Remap all the in |basic_block| to new IDs and keep the mapping of new ids
241
  // to old
242
  // ids. |loop| is used to identify special loop blocks (header, continue,
243
  // etc).
244
  void AssignNewResultIds(BasicBlock* basic_block);
245
246
  // Using the map built by AssignNewResultIds, replace the uses in |inst|
247
  // by the id that the use maps to.
248
  void RemapOperands(Instruction* inst);
249
250
  // Using the map built by AssignNewResultIds, for each instruction in
251
  // |basic_block| use
252
  // that map to substitute the IDs used by instructions (in the operands) with
253
  // the new ids.
254
  void RemapOperands(BasicBlock* basic_block);
255
256
  // Copy the whole body of the loop, all blocks dominated by the |loop| header
257
  // and not dominated by the |loop| merge. The copied body will be linked to by
258
  // the old |loop| continue block and the new body will link to the |loop|
259
  // header via the new continue block. |eliminate_conditions| is used to decide
260
  // whether or not to fold all the condition blocks other than the last one.
261
  void CopyBody(Loop* loop, bool eliminate_conditions);
262
263
  // Copy a given |block_to_copy| in the |loop| and record the mapping of the
264
  // old/new ids. |preserve_instructions| determines whether or not the method
265
  // will modify (other than result_id) instructions which are copied.
266
  void CopyBasicBlock(Loop* loop, const BasicBlock* block_to_copy,
267
                      bool preserve_instructions);
268
269
  // The actual implementation of the unroll step. Unrolls |loop| by given
270
  // |factor| by copying the body by |factor| times. Also propagates the
271
  // induction variable value throughout the copies.
272
  void Unroll(Loop* loop, size_t factor);
273
274
  // Fills the loop_blocks_inorder_ field with the ordered list of basic blocks
275
  // as computed by the method ComputeLoopOrderedBlocks.
276
  void ComputeLoopOrderedBlocks(Loop* loop);
277
278
  // Adds the blocks_to_add_ to both the |loop| and to the parent of |loop| if
279
  // the parent exists.
280
  void AddBlocksToLoop(Loop* loop) const;
281
282
  // After the partially unroll step the phi instructions in the header block
283
  // will be in an illegal format. This function makes the phis legal by making
284
  // the edge from the latch block come from the new latch block and the value
285
  // to be the actual value of the phi at that point.
286
  void LinkLastPhisToStart(Loop* loop) const;
287
288
  // Kill all debug declaration instructions from |bb|.
289
  void KillDebugDeclares(BasicBlock* bb);
290
291
  // A pointer to the IRContext. Used to add/remove instructions and for usedef
292
  // chains.
293
  IRContext* context_;
294
295
  // A reference the function the loop is within.
296
  Function& function_;
297
298
  // A list of basic blocks to be added to the loop at the end of an unroll
299
  // step.
300
  BasicBlockListTy blocks_to_add_;
301
302
  // List of instructions which are now dead and can be removed.
303
  std::vector<Instruction*> invalidated_instructions_;
304
305
  // Maintains the current state of the transform between calls to unroll.
306
  LoopUnrollState state_;
307
308
  // An ordered list containing the loop basic blocks.
309
  std::vector<BasicBlock*> loop_blocks_inorder_;
310
311
  // The block containing the condition check which contains a conditional
312
  // branch to the merge and continue block.
313
  BasicBlock* loop_condition_block_;
314
315
  // The induction variable of the loop.
316
  Instruction* loop_induction_variable_;
317
318
  // Phis used in the loop need to be remapped to use the actual result values
319
  // and then be remapped at the end.
320
  std::vector<Instruction*> loop_phi_instructions_;
321
322
  // The number of loop iterations that the loop would perform pre-unroll.
323
  size_t number_of_loop_iterations_;
324
325
  // The amount that the loop steps each iteration.
326
  int64_t loop_step_value_;
327
328
  // The value the loop starts stepping from.
329
  int64_t loop_init_value_;
330
};
331
332
/*
333
 * Static helper functions.
334
 */
335
336
// Retrieve the index of the OpPhi instruction |phi| which corresponds to the
337
// incoming |block| id.
338
0
uint32_t GetPhiIndexFromLabel(const BasicBlock* block, const Instruction* phi) {
339
0
  for (uint32_t i = 1; i < phi->NumInOperands(); i += 2) {
340
0
    if (block->id() == phi->GetSingleWordInOperand(i)) {
341
0
      return i;
342
0
    }
343
0
  }
344
0
  assert(false && "Could not find operand in instruction.");
345
0
  return 0;
346
0
}
347
348
3.13k
void LoopUnrollerUtilsImpl::Init(Loop* loop) {
349
3.13k
  loop_condition_block_ = loop->FindConditionBlock();
350
351
  // When we reinit the second loop during PartiallyUnrollResidualFactor we need
352
  // to use the cached value from the duplicate step as the dominator tree
353
  // basded solution, loop->FindConditionBlock, requires all the nodes to be
354
  // connected up with the correct branches. They won't be at this point.
355
3.13k
  if (!loop_condition_block_) {
356
0
    loop_condition_block_ = state_.new_condition_block;
357
0
  }
358
3.13k
  assert(loop_condition_block_);
359
360
3.13k
  loop_induction_variable_ = loop->FindConditionVariable(loop_condition_block_);
361
3.13k
  assert(loop_induction_variable_);
362
363
3.13k
  bool found = loop->FindNumberOfIterations(
364
3.13k
      loop_induction_variable_, &*loop_condition_block_->ctail(),
365
3.13k
      &number_of_loop_iterations_, &loop_step_value_, &loop_init_value_);
366
3.13k
  (void)found;  // To silence unused variable warning on release builds.
367
3.13k
  assert(found);
368
369
  // Blocks are stored in an unordered set of ids in the loop class, we need to
370
  // create the dominator ordered list.
371
3.13k
  ComputeLoopOrderedBlocks(loop);
372
3.13k
}
373
374
// This function is used to partially unroll the loop when the factor provided
375
// would normally lead to an illegal optimization. Instead of just unrolling the
376
// loop it creates two loops and unrolls one and adjusts the condition on the
377
// other. The end result being that the new loop pair iterates over the correct
378
// number of bodies.
379
void LoopUnrollerUtilsImpl::PartiallyUnrollResidualFactor(Loop* loop,
380
0
                                                          size_t factor) {
381
  // TODO(1841): Handle id overflow.
382
0
  std::unique_ptr<Instruction> new_label{new Instruction(
383
0
      context_, spv::Op::OpLabel, 0, context_->TakeNextId(), {})};
384
0
  std::unique_ptr<BasicBlock> new_exit_bb{new BasicBlock(std::move(new_label))};
385
0
  new_exit_bb->SetParent(&function_);
386
387
  // Save the id of the block before we move it.
388
0
  uint32_t new_merge_id = new_exit_bb->id();
389
390
  // Add the block the list of blocks to add, we want this merge block to be
391
  // right at the start of the new blocks.
392
0
  blocks_to_add_.push_back(std::move(new_exit_bb));
393
0
  BasicBlock* new_exit_bb_raw = blocks_to_add_[0].get();
394
0
  Instruction& original_conditional_branch = *loop_condition_block_->tail();
395
  // Duplicate the loop, providing access to the blocks of both loops.
396
  // This is a naked new due to the VS2013 requirement of not having unique
397
  // pointers in vectors, as it will be inserted into a vector with
398
  // loop_descriptor.AddLoop.
399
0
  std::unique_ptr<Loop> new_loop = MakeUnique<Loop>(*loop);
400
401
  // Clear the basic blocks of the new loop.
402
0
  new_loop->ClearBlocks();
403
404
0
  DuplicateLoop(loop, new_loop.get());
405
406
  // Add the blocks to the function.
407
0
  AddBlocksToFunction(loop->GetMergeBlock());
408
0
  blocks_to_add_.clear();
409
410
  // Create a new merge block for the first loop.
411
0
  InstructionBuilder builder{context_, new_exit_bb_raw};
412
  // Make the first loop branch to the second.
413
0
  builder.AddBranch(new_loop->GetHeaderBlock()->id());
414
415
0
  loop_condition_block_ = state_.new_condition_block;
416
0
  loop_induction_variable_ = state_.new_phi;
417
  // Unroll the new loop by the factor with the usual -1 to account for the
418
  // existing block iteration.
419
0
  Unroll(new_loop.get(), factor);
420
421
0
  LinkLastPhisToStart(new_loop.get());
422
0
  AddBlocksToLoop(new_loop.get());
423
424
  // Add the new merge block to the back of the list of blocks to be added. It
425
  // needs to be the last block added to maintain dominator order in the binary.
426
0
  blocks_to_add_.push_back(
427
0
      std::unique_ptr<BasicBlock>(new_loop->GetMergeBlock()));
428
429
  // Add the blocks to the function.
430
0
  AddBlocksToFunction(loop->GetMergeBlock());
431
432
  // Reset the usedef analysis.
433
0
  context_->InvalidateAnalysesExceptFor(
434
0
      IRContext::Analysis::kAnalysisLoopAnalysis);
435
0
  analysis::DefUseManager* def_use_manager = context_->get_def_use_mgr();
436
437
  // The loop condition.
438
0
  Instruction* condition_check = def_use_manager->GetDef(
439
0
      original_conditional_branch.GetSingleWordOperand(0));
440
441
  // This should have been checked by the LoopUtils::CanPerformUnroll function
442
  // before entering this.
443
0
  assert(loop->IsSupportedCondition(condition_check->opcode()));
444
445
  // We need to account for the initial body when calculating the remainder.
446
0
  int64_t remainder = Loop::GetResidualConditionValue(
447
0
      condition_check->opcode(), loop_init_value_, loop_step_value_,
448
0
      number_of_loop_iterations_, factor);
449
450
0
  assert(remainder > std::numeric_limits<int32_t>::min() &&
451
0
         remainder < std::numeric_limits<int32_t>::max());
452
453
0
  Instruction* new_constant = nullptr;
454
455
  // If the remainder is negative then we add a signed constant, otherwise just
456
  // add an unsigned constant.
457
0
  if (remainder < 0) {
458
0
    new_constant = builder.GetSintConstant(static_cast<int32_t>(remainder));
459
0
  } else {
460
0
    new_constant = builder.GetUintConstant(static_cast<int32_t>(remainder));
461
0
  }
462
463
0
  uint32_t constant_id = new_constant->result_id();
464
465
  // Update the condition check.
466
0
  condition_check->SetInOperand(1, {constant_id});
467
468
  // Update the next phi node. The phi will have a constant value coming in from
469
  // the preheader block. For the duplicated loop we need to update the constant
470
  // to be the amount of iterations covered by the first loop and the incoming
471
  // block to be the first loops new merge block.
472
0
  std::vector<Instruction*> new_inductions;
473
0
  new_loop->GetInductionVariables(new_inductions);
474
475
0
  std::vector<Instruction*> old_inductions;
476
0
  loop->GetInductionVariables(old_inductions);
477
0
  for (size_t index = 0; index < new_inductions.size(); ++index) {
478
0
    Instruction* new_induction = new_inductions[index];
479
0
    Instruction* old_induction = old_inductions[index];
480
    // Get the index of the loop initalizer, the value coming in from the
481
    // preheader.
482
0
    uint32_t initalizer_index =
483
0
        GetPhiIndexFromLabel(new_loop->GetPreHeaderBlock(), old_induction);
484
485
    // Replace the second loop initalizer with the phi from the first
486
0
    new_induction->SetInOperand(initalizer_index - 1,
487
0
                                {old_induction->result_id()});
488
0
    new_induction->SetInOperand(initalizer_index, {new_merge_id});
489
490
    // If the use of the first loop induction variable is outside of the loop
491
    // then replace that use with the second loop induction variable.
492
0
    uint32_t second_loop_induction = new_induction->result_id();
493
0
    auto replace_use_outside_of_loop = [loop, second_loop_induction](
494
0
                                           Instruction* user,
495
0
                                           uint32_t operand_index) {
496
0
      if (!loop->IsInsideLoop(user)) {
497
0
        user->SetOperand(operand_index, {second_loop_induction});
498
0
      }
499
0
    };
500
501
0
    context_->get_def_use_mgr()->ForEachUse(old_induction,
502
0
                                            replace_use_outside_of_loop);
503
0
  }
504
505
0
  context_->InvalidateAnalysesExceptFor(
506
0
      IRContext::Analysis::kAnalysisLoopAnalysis);
507
508
0
  context_->ReplaceAllUsesWith(loop->GetMergeBlock()->id(), new_merge_id);
509
510
0
  LoopDescriptor& loop_descriptor = *context_->GetLoopDescriptor(&function_);
511
512
0
  loop_descriptor.AddLoop(std::move(new_loop), loop->GetParent());
513
514
0
  RemoveDeadInstructions();
515
0
}
516
517
// Mark this loop as DontUnroll as it will already be unrolled and it may not
518
// be safe to unroll a previously partially unrolled loop.
519
3.13k
void LoopUnrollerUtilsImpl::MarkLoopControlAsDontUnroll(Loop* loop) const {
520
3.13k
  Instruction* loop_merge_inst = loop->GetHeaderBlock()->GetLoopMergeInst();
521
3.13k
  assert(loop_merge_inst &&
522
3.13k
         "Loop merge instruction could not be found after entering unroller "
523
3.13k
         "(should have exited before this)");
524
3.13k
  loop_merge_inst->SetInOperand(kLoopControlIndex,
525
3.13k
                                {kLoopControlDontUnrollIndex});
526
3.13k
}
527
528
// Duplicate the |loop| body |factor| - 1 number of times while keeping the loop
529
// backedge intact. This will leave the loop with |factor| number of bodies
530
// after accounting for the initial body.
531
3.13k
void LoopUnrollerUtilsImpl::Unroll(Loop* loop, size_t factor) {
532
  // If we unroll a loop partially it will not be safe to unroll it further.
533
  // This is due to the current method of calculating the number of loop
534
  // iterations.
535
3.13k
  MarkLoopControlAsDontUnroll(loop);
536
537
3.13k
  std::vector<Instruction*> inductions;
538
3.13k
  loop->GetInductionVariables(inductions);
539
3.13k
  state_ = LoopUnrollState{loop_induction_variable_, loop->GetLatchBlock(),
540
3.13k
                           loop_condition_block_, std::move(inductions)};
541
115k
  for (size_t i = 0; i < factor - 1; ++i) {
542
112k
    CopyBody(loop, true);
543
112k
  }
544
3.13k
}
545
546
3.13k
void LoopUnrollerUtilsImpl::RemoveDeadInstructions() {
547
  // Remove the dead instructions.
548
310k
  for (Instruction* inst : invalidated_instructions_) {
549
310k
    context_->KillInst(inst);
550
310k
  }
551
3.13k
}
552
553
3.13k
void LoopUnrollerUtilsImpl::ReplaceInductionUseWithFinalValue(Loop* loop) {
554
3.13k
  context_->InvalidateAnalysesExceptFor(
555
3.13k
      IRContext::Analysis::kAnalysisLoopAnalysis |
556
3.13k
      IRContext::Analysis::kAnalysisDefUse |
557
3.13k
      IRContext::Analysis::kAnalysisInstrToBlockMapping);
558
559
3.13k
  std::vector<Instruction*> inductions;
560
3.13k
  loop->GetInductionVariables(inductions);
561
562
8.75k
  for (size_t index = 0; index < inductions.size(); ++index) {
563
    // We don't want the decorations that applied to the induction variable
564
    // to be applied to the value that replace it.
565
5.61k
    context_->KillNamesAndDecorates(state_.previous_phis_[index]);
566
567
5.61k
    uint32_t trip_step_id = GetPhiDefID(state_.previous_phis_[index],
568
5.61k
                                        state_.previous_latch_block_->id());
569
5.61k
    context_->ReplaceAllUsesWith(inductions[index]->result_id(), trip_step_id);
570
5.61k
    invalidated_instructions_.push_back(inductions[index]);
571
5.61k
  }
572
3.13k
}
573
574
// Fully unroll the loop by partially unrolling it by the number of loop
575
// iterations minus one for the body already accounted for.
576
3.13k
void LoopUnrollerUtilsImpl::FullyUnroll(Loop* loop) {
577
  // We unroll the loop by number of iterations in the loop.
578
3.13k
  Unroll(loop, number_of_loop_iterations_);
579
580
  // The first condition block is preserved until now so it can be copied.
581
3.13k
  FoldConditionBlock(loop_condition_block_, 1);
582
583
  // Delete the OpLoopMerge and remove the backedge to the header.
584
3.13k
  CloseUnrolledLoop(loop);
585
586
  // Mark the loop for later deletion. This allows us to preserve the loop
587
  // iterators but still disregard dead loops.
588
3.13k
  loop->MarkLoopForRemoval();
589
590
  // If the loop has a parent add the new blocks to the parent.
591
3.13k
  if (loop->GetParent()) {
592
699
    AddBlocksToLoop(loop->GetParent());
593
699
  }
594
595
  // Add the blocks to the function.
596
3.13k
  AddBlocksToFunction(loop->GetMergeBlock());
597
598
3.13k
  ReplaceInductionUseWithFinalValue(loop);
599
600
3.13k
  RemoveDeadInstructions();
601
  // Invalidate all analyses.
602
3.13k
  context_->InvalidateAnalysesExceptFor(
603
3.13k
      IRContext::Analysis::kAnalysisLoopAnalysis |
604
3.13k
      IRContext::Analysis::kAnalysisDefUse);
605
3.13k
}
606
607
834k
void LoopUnrollerUtilsImpl::KillDebugDeclares(BasicBlock* bb) {
608
  // We cannot kill an instruction inside BasicBlock::ForEachInst()
609
  // because it will generate dangling pointers. We use |to_be_killed|
610
  // to kill them after the loop.
611
834k
  std::vector<Instruction*> to_be_killed;
612
613
5.02M
  bb->ForEachInst([&to_be_killed, this](Instruction* inst) {
614
5.02M
    if (context_->get_debug_info_mgr()->IsDebugDeclare(inst)) {
615
0
      to_be_killed.push_back(inst);
616
0
    }
617
5.02M
  });
618
834k
  for (auto* inst : to_be_killed) context_->KillInst(inst);
619
834k
}
620
621
// Copy a given basic block, give it a new result_id, and store the new block
622
// and the id mapping in the state. |preserve_instructions| is used to determine
623
// whether or not this function should edit instructions other than the
624
// |result_id|.
625
void LoopUnrollerUtilsImpl::CopyBasicBlock(Loop* loop, const BasicBlock* itr,
626
834k
                                           bool preserve_instructions) {
627
  // Clone the block exactly, including the IDs.
628
834k
  BasicBlock* basic_block = itr->Clone(context_);
629
834k
  basic_block->SetParent(itr->GetParent());
630
631
  // We do not want to duplicate DebugDeclare.
632
834k
  KillDebugDeclares(basic_block);
633
634
  // Assign each result a new unique ID and keep a mapping of the old ids to
635
  // the new ones.
636
834k
  AssignNewResultIds(basic_block);
637
638
  // If this is the continue block we are copying.
639
834k
  if (itr == loop->GetContinueBlock()) {
640
    // Make the OpLoopMerge point to this block for the continue.
641
112k
    if (!preserve_instructions) {
642
112k
      Instruction* merge_inst = loop->GetHeaderBlock()->GetLoopMergeInst();
643
112k
      merge_inst->SetInOperand(1, {basic_block->id()});
644
112k
      context_->UpdateDefUse(merge_inst);
645
112k
    }
646
647
112k
    state_.new_continue_block = basic_block;
648
112k
  }
649
650
  // If this is the header block we are copying.
651
834k
  if (itr == loop->GetHeaderBlock()) {
652
112k
    state_.new_header_block = basic_block;
653
654
112k
    if (!preserve_instructions) {
655
      // Remove the loop merge instruction if it exists.
656
112k
      Instruction* merge_inst = basic_block->GetLoopMergeInst();
657
112k
      if (merge_inst) invalidated_instructions_.push_back(merge_inst);
658
112k
    }
659
112k
  }
660
661
  // If this is the latch block being copied, record it in the state.
662
834k
  if (itr == loop->GetLatchBlock()) state_.new_latch_block = basic_block;
663
664
  // If this is the condition block we are copying.
665
834k
  if (itr == loop_condition_block_) {
666
112k
    state_.new_condition_block = basic_block;
667
112k
  }
668
669
  // Add this block to the list of blocks to add to the function at the end of
670
  // the unrolling process.
671
834k
  blocks_to_add_.push_back(std::unique_ptr<BasicBlock>(basic_block));
672
673
  // Keep tracking the old block via a map.
674
834k
  state_.new_blocks[itr->id()] = basic_block;
675
834k
}
676
677
112k
void LoopUnrollerUtilsImpl::CopyBody(Loop* loop, bool eliminate_conditions) {
678
  // Copy each basic block in the loop, give them new ids, and save state
679
  // information.
680
834k
  for (const BasicBlock* itr : loop_blocks_inorder_) {
681
834k
    CopyBasicBlock(loop, itr, false);
682
834k
  }
683
684
  // Set the previous latch block to point to the new header.
685
112k
  Instruction* latch_branch = state_.previous_latch_block_->terminator();
686
112k
  latch_branch->SetInOperand(0, {state_.new_header_block->id()});
687
112k
  context_->UpdateDefUse(latch_branch);
688
689
  // As the algorithm copies the original loop blocks exactly, the tail of the
690
  // latch block on iterations after the first one will be a branch to the new
691
  // header and not the actual loop header. The last continue block in the loop
692
  // should always be a backedge to the global header.
693
112k
  Instruction* new_latch_branch = state_.new_latch_block->terminator();
694
112k
  new_latch_branch->SetInOperand(0, {loop->GetHeaderBlock()->id()});
695
112k
  context_->AnalyzeUses(new_latch_branch);
696
697
112k
  std::vector<Instruction*> inductions;
698
112k
  loop->GetInductionVariables(inductions);
699
301k
  for (size_t index = 0; index < inductions.size(); ++index) {
700
189k
    Instruction* primary_copy = inductions[index];
701
702
189k
    assert(primary_copy->result_id() != 0);
703
189k
    Instruction* induction_clone =
704
189k
        state_.ids_to_new_inst[state_.new_inst[primary_copy->result_id()]];
705
706
189k
    state_.new_phis_.push_back(induction_clone);
707
189k
    assert(induction_clone->result_id() != 0);
708
709
189k
    if (!state_.previous_phis_.empty()) {
710
189k
      state_.new_inst[primary_copy->result_id()] = GetPhiDefID(
711
189k
          state_.previous_phis_[index], state_.previous_latch_block_->id());
712
189k
    } else {
713
      // Do not replace the first phi block ids.
714
0
      state_.new_inst[primary_copy->result_id()] = primary_copy->result_id();
715
0
    }
716
189k
  }
717
718
112k
  if (eliminate_conditions &&
719
112k
      state_.new_condition_block != loop_condition_block_) {
720
112k
    FoldConditionBlock(state_.new_condition_block, 1);
721
112k
  }
722
723
  // Only reference to the header block is the backedge in the latch block,
724
  // don't change this.
725
112k
  state_.new_inst[loop->GetHeaderBlock()->id()] = loop->GetHeaderBlock()->id();
726
727
834k
  for (auto& pair : state_.new_blocks) {
728
834k
    RemapOperands(pair.second);
729
834k
  }
730
731
112k
  for (Instruction* dead_phi : state_.new_phis_)
732
189k
    invalidated_instructions_.push_back(dead_phi);
733
734
  // Swap the state so the new is now the previous.
735
112k
  state_.NextIterationState();
736
112k
}
737
738
uint32_t LoopUnrollerUtilsImpl::GetPhiDefID(const Instruction* phi,
739
200k
                                            uint32_t label) const {
740
394k
  for (uint32_t operand = 3; operand < phi->NumOperands(); operand += 2) {
741
394k
    if (phi->GetSingleWordOperand(operand) == label) {
742
200k
      return phi->GetSingleWordOperand(operand - 1);
743
200k
    }
744
394k
  }
745
0
  assert(false && "Could not find a phi index matching the provided label");
746
0
  return 0;
747
0
}
748
749
void LoopUnrollerUtilsImpl::FoldConditionBlock(BasicBlock* condition_block,
750
115k
                                               uint32_t operand_label) {
751
  // Remove the old conditional branch to the merge and continue blocks.
752
115k
  Instruction& old_branch = *condition_block->tail();
753
115k
  uint32_t new_target = old_branch.GetSingleWordOperand(operand_label);
754
755
115k
  DebugScope scope = old_branch.GetDebugScope();
756
115k
  const std::vector<Instruction> lines = old_branch.dbg_line_insts();
757
758
115k
  context_->KillInst(&old_branch);
759
  // Add the new unconditional branch to the merge block.
760
115k
  InstructionBuilder builder(
761
115k
      context_, condition_block,
762
115k
      IRContext::Analysis::kAnalysisDefUse |
763
115k
          IRContext::Analysis::kAnalysisInstrToBlockMapping);
764
115k
  Instruction* new_branch = builder.AddBranch(new_target);
765
766
115k
  if (!lines.empty()) new_branch->AddDebugLine(&lines.back());
767
115k
  new_branch->SetDebugScope(scope);
768
115k
}
769
770
3.13k
void LoopUnrollerUtilsImpl::CloseUnrolledLoop(Loop* loop) {
771
  // Remove the OpLoopMerge instruction from the function.
772
3.13k
  Instruction* merge_inst = loop->GetHeaderBlock()->GetLoopMergeInst();
773
3.13k
  invalidated_instructions_.push_back(merge_inst);
774
775
  // Remove the final backedge to the header and make it point instead to the
776
  // merge block.
777
3.13k
  Instruction* latch_instruction = state_.previous_latch_block_->terminator();
778
3.13k
  latch_instruction->SetInOperand(0, {loop->GetMergeBlock()->id()});
779
3.13k
  context_->UpdateDefUse(latch_instruction);
780
781
  // Remove all induction variables as the phis will now be invalid. Replace all
782
  // uses with the constant initializer value (all uses of phis will be in
783
  // the first iteration with the subsequent phis already having been removed).
784
3.13k
  std::vector<Instruction*> inductions;
785
3.13k
  loop->GetInductionVariables(inductions);
786
787
  // We can use the state instruction mechanism to replace all internal loop
788
  // values within the first loop trip (as the subsequent ones will be updated
789
  // by the copy function) with the value coming in from the preheader and then
790
  // use context ReplaceAllUsesWith for the uses outside the loop with the final
791
  // trip phi value.
792
3.13k
  state_.new_inst.clear();
793
5.61k
  for (Instruction* induction : inductions) {
794
5.61k
    uint32_t initalizer_id =
795
5.61k
        GetPhiDefID(induction, loop->GetPreHeaderBlock()->id());
796
797
5.61k
    state_.new_inst[induction->result_id()] = initalizer_id;
798
5.61k
  }
799
800
26.1k
  for (BasicBlock* block : loop_blocks_inorder_) {
801
26.1k
    RemapOperands(block);
802
26.1k
  }
803
834k
  for (auto& block_itr : blocks_to_add_) {
804
834k
    RemapOperands(block_itr.get());
805
834k
  }
806
807
  // Rewrite the last phis, since they may still reference the original phi.
808
5.61k
  for (Instruction* last_phi : state_.previous_phis_) {
809
5.61k
    RemapOperands(last_phi);
810
5.61k
  }
811
3.13k
}
812
813
// Uses the first loop to create a copy of the loop with new IDs.
814
0
void LoopUnrollerUtilsImpl::DuplicateLoop(Loop* old_loop, Loop* new_loop) {
815
0
  std::vector<BasicBlock*> new_block_order;
816
817
  // Copy every block in the old loop.
818
0
  for (const BasicBlock* itr : loop_blocks_inorder_) {
819
0
    CopyBasicBlock(old_loop, itr, true);
820
0
    new_block_order.push_back(blocks_to_add_.back().get());
821
0
  }
822
823
  // Clone the merge block, give it a new id and record it in the state.
824
0
  BasicBlock* new_merge = old_loop->GetMergeBlock()->Clone(context_);
825
0
  new_merge->SetParent(old_loop->GetMergeBlock()->GetParent());
826
0
  AssignNewResultIds(new_merge);
827
0
  state_.new_blocks[old_loop->GetMergeBlock()->id()] = new_merge;
828
829
  // Remap the operands of every instruction in the loop to point to the new
830
  // copies.
831
0
  for (auto& pair : state_.new_blocks) {
832
0
    RemapOperands(pair.second);
833
0
  }
834
835
0
  loop_blocks_inorder_ = std::move(new_block_order);
836
837
0
  AddBlocksToLoop(new_loop);
838
839
0
  new_loop->SetHeaderBlock(state_.new_header_block);
840
0
  new_loop->SetContinueBlock(state_.new_continue_block);
841
0
  new_loop->SetLatchBlock(state_.new_latch_block);
842
0
  new_loop->SetMergeBlock(new_merge);
843
0
}
844
845
// Whenever the utility copies a block it stores it in a temporary buffer, this
846
// function adds the buffer into the Function. The blocks will be inserted
847
// after the block |insert_point|.
848
void LoopUnrollerUtilsImpl::AddBlocksToFunction(
849
3.13k
    const BasicBlock* insert_point) {
850
3.13k
  for (auto basic_block_iterator = function_.begin();
851
37.6k
       basic_block_iterator != function_.end(); ++basic_block_iterator) {
852
37.6k
    if (basic_block_iterator->id() == insert_point->id()) {
853
3.13k
      basic_block_iterator.InsertBefore(&blocks_to_add_);
854
3.13k
      return;
855
3.13k
    }
856
37.6k
  }
857
858
0
  assert(
859
0
      false &&
860
0
      "Could not add basic blocks to function as insert point was not found.");
861
0
}
862
863
// Assign all result_ids in |basic_block| instructions to new IDs and preserve
864
// the mapping of new ids to old ones.
865
834k
void LoopUnrollerUtilsImpl::AssignNewResultIds(BasicBlock* basic_block) {
866
834k
  analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
867
868
  // Label instructions aren't covered by normal traversal of the
869
  // instructions.
870
  // TODO(1841): Handle id overflow.
871
834k
  uint32_t new_label_id = context_->TakeNextId();
872
873
  // Assign a new id to the label.
874
834k
  state_.new_inst[basic_block->GetLabelInst()->result_id()] = new_label_id;
875
834k
  basic_block->GetLabelInst()->SetResultId(new_label_id);
876
834k
  def_use_mgr->AnalyzeInstDefUse(basic_block->GetLabelInst());
877
878
4.19M
  for (Instruction& inst : *basic_block) {
879
    // Do def/use analysis on new lines
880
4.19M
    for (auto& line : inst.dbg_line_insts())
881
7.54k
      def_use_mgr->AnalyzeInstDefUse(&line);
882
883
4.19M
    uint32_t old_id = inst.result_id();
884
885
    // Ignore stores etc.
886
4.19M
    if (old_id == 0) {
887
1.21M
      continue;
888
1.21M
    }
889
890
    // Give the instruction a new id.
891
    // TODO(1841): Handle id overflow.
892
2.98M
    inst.SetResultId(context_->TakeNextId());
893
2.98M
    def_use_mgr->AnalyzeInstDef(&inst);
894
895
    // Save the mapping of old_id -> new_id.
896
2.98M
    state_.new_inst[old_id] = inst.result_id();
897
    // Check if this instruction is the induction variable.
898
2.98M
    if (loop_induction_variable_->result_id() == old_id) {
899
      // Save a pointer to the new copy of it.
900
112k
      state_.new_phi = &inst;
901
112k
    }
902
2.98M
    state_.ids_to_new_inst[inst.result_id()] = &inst;
903
2.98M
  }
904
834k
}
905
906
8.53M
void LoopUnrollerUtilsImpl::RemapOperands(Instruction* inst) {
907
15.1M
  auto remap_operands_to_new_ids = [this](uint32_t* id) {
908
15.1M
    auto itr = state_.new_inst.find(*id);
909
910
15.1M
    if (itr != state_.new_inst.end()) {
911
4.74M
      *id = itr->second;
912
4.74M
    }
913
15.1M
  };
914
915
8.53M
  inst->ForEachInId(remap_operands_to_new_ids);
916
8.53M
  context_->AnalyzeUses(inst);
917
8.53M
}
918
919
1.69M
void LoopUnrollerUtilsImpl::RemapOperands(BasicBlock* basic_block) {
920
8.53M
  for (Instruction& inst : *basic_block) {
921
8.53M
    RemapOperands(&inst);
922
8.53M
  }
923
1.69M
}
924
925
// Generate the ordered list of basic blocks in the |loop| and cache it for
926
// later use.
927
3.13k
void LoopUnrollerUtilsImpl::ComputeLoopOrderedBlocks(Loop* loop) {
928
3.13k
  loop_blocks_inorder_.clear();
929
3.13k
  loop->ComputeLoopStructuredOrder(&loop_blocks_inorder_);
930
3.13k
}
931
932
// Adds the blocks_to_add_ to both the loop and to the parent.
933
715
void LoopUnrollerUtilsImpl::AddBlocksToLoop(Loop* loop) const {
934
  // Add the blocks to this loop.
935
43.1k
  for (auto& block_itr : blocks_to_add_) {
936
43.1k
    loop->AddBasicBlock(block_itr.get());
937
43.1k
  }
938
939
  // Add the blocks to the parent as well.
940
715
  if (loop->GetParent()) AddBlocksToLoop(loop->GetParent());
941
715
}
942
943
0
void LoopUnrollerUtilsImpl::LinkLastPhisToStart(Loop* loop) const {
944
0
  std::vector<Instruction*> inductions;
945
0
  loop->GetInductionVariables(inductions);
946
947
0
  for (size_t i = 0; i < inductions.size(); ++i) {
948
0
    Instruction* last_phi_in_block = state_.previous_phis_[i];
949
950
0
    uint32_t phi_index =
951
0
        GetPhiIndexFromLabel(state_.previous_latch_block_, last_phi_in_block);
952
0
    uint32_t phi_variable =
953
0
        last_phi_in_block->GetSingleWordInOperand(phi_index - 1);
954
0
    uint32_t phi_label = last_phi_in_block->GetSingleWordInOperand(phi_index);
955
956
0
    Instruction* phi = inductions[i];
957
0
    phi->SetInOperand(phi_index - 1, {phi_variable});
958
0
    phi->SetInOperand(phi_index, {phi_label});
959
0
  }
960
0
}
961
962
// Duplicate the |loop| body |factor| number of times while keeping the loop
963
// backedge intact.
964
0
void LoopUnrollerUtilsImpl::PartiallyUnroll(Loop* loop, size_t factor) {
965
0
  Unroll(loop, factor);
966
0
  LinkLastPhisToStart(loop);
967
0
  AddBlocksToLoop(loop);
968
0
  AddBlocksToFunction(loop->GetMergeBlock());
969
0
  RemoveDeadInstructions();
970
0
}
971
972
/*
973
 * End LoopUtilsImpl.
974
 */
975
976
}  // namespace
977
978
/*
979
 *
980
 *  Begin Utils.
981
 *
982
 * */
983
984
8.64k
bool LoopUtils::CanPerformUnroll() {
985
  // The loop is expected to be in structured order.
986
8.64k
  if (!loop_->GetHeaderBlock()->GetMergeInst()) {
987
0
    return false;
988
0
  }
989
990
  // Find check the loop has a condition we can find and evaluate.
991
8.64k
  const BasicBlock* condition = loop_->FindConditionBlock();
992
8.64k
  if (!condition) return false;
993
994
  // Check that we can find and process the induction variable.
995
7.57k
  const Instruction* induction = loop_->FindConditionVariable(condition);
996
7.57k
  if (!induction || induction->opcode() != spv::Op::OpPhi) return false;
997
998
  // Check that we can find the number of loop iterations.
999
6.50k
  if (!loop_->FindNumberOfIterations(induction, &*condition->ctail(), nullptr))
1000
0
    return false;
1001
1002
6.50k
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
1003
  // ClusterFuzz/OSS-Fuzz is likely to yield examples with very high loop
1004
  // iteration counts. This can cause timeouts and memouts during fuzzing that
1005
  // are not classed as bugs. To avoid this noise, loop unrolling is not applied
1006
  // to loops with large iteration counts when fuzzing.
1007
6.50k
  constexpr size_t kFuzzerIterationLimit = 100;
1008
6.50k
  size_t num_iterations;
1009
6.50k
  loop_->FindNumberOfIterations(induction, &*condition->ctail(),
1010
6.50k
                                &num_iterations);
1011
6.50k
  if (num_iterations > kFuzzerIterationLimit) {
1012
187
    return false;
1013
187
  }
1014
6.31k
#endif
1015
1016
  // Make sure the latch block is a unconditional branch to the header
1017
  // block.
1018
6.31k
  const Instruction& branch = *loop_->GetLatchBlock()->ctail();
1019
6.31k
  bool branching_assumption =
1020
6.31k
      branch.opcode() == spv::Op::OpBranch &&
1021
6.31k
      branch.GetSingleWordInOperand(0) == loop_->GetHeaderBlock()->id();
1022
6.31k
  if (!branching_assumption) {
1023
0
    return false;
1024
0
  }
1025
1026
6.31k
  std::vector<Instruction*> inductions;
1027
6.31k
  loop_->GetInductionVariables(inductions);
1028
1029
  // Ban breaks within the loop.
1030
6.31k
  const std::vector<uint32_t>& merge_block_preds =
1031
6.31k
      context_->cfg()->preds(loop_->GetMergeBlock()->id());
1032
6.31k
  if (merge_block_preds.size() != 1) {
1033
0
    return false;
1034
0
  }
1035
1036
  // Ban continues within the loop.
1037
6.31k
  const std::vector<uint32_t>& continue_block_preds =
1038
6.31k
      context_->cfg()->preds(loop_->GetContinueBlock()->id());
1039
6.31k
  if (continue_block_preds.size() != 1) {
1040
16
    return false;
1041
16
  }
1042
1043
  // Ban returns in the loop.
1044
  // Iterate over all the blocks within the loop and check that none of them
1045
  // exit the loop.
1046
52.4k
  for (uint32_t label_id : loop_->GetBlocks()) {
1047
52.4k
    const BasicBlock* block = context_->cfg()->block(label_id);
1048
52.4k
    if (block->ctail()->opcode() == spv::Op::OpKill ||
1049
52.4k
        block->ctail()->opcode() == spv::Op::OpReturn ||
1050
52.4k
        block->ctail()->opcode() == spv::Op::OpReturnValue ||
1051
52.4k
        block->ctail()->opcode() == spv::Op::OpTerminateInvocation) {
1052
0
      return false;
1053
0
    }
1054
52.4k
  }
1055
  // Can only unroll inner loops.
1056
6.29k
  if (!loop_->AreAllChildrenMarkedForRemoval()) {
1057
23
    return false;
1058
23
  }
1059
1060
6.27k
  return true;
1061
6.29k
}
1062
1063
0
bool LoopUtils::PartiallyUnroll(size_t factor) {
1064
0
  if (factor == 1 || !CanPerformUnroll()) return false;
1065
1066
  // Create the unroller utility.
1067
0
  LoopUnrollerUtilsImpl unroller{context_,
1068
0
                                 loop_->GetHeaderBlock()->GetParent()};
1069
0
  unroller.Init(loop_);
1070
1071
  // If the unrolling factor is larger than or the same size as the loop just
1072
  // fully unroll the loop.
1073
0
  if (factor >= unroller.GetLoopIterationCount()) {
1074
0
    unroller.FullyUnroll(loop_);
1075
0
    return true;
1076
0
  }
1077
1078
  // If the loop unrolling factor is an residual number of iterations we need to
1079
  // let run the loop for the residual part then let it branch into the unrolled
1080
  // remaining part. We add one when calucating the remainder to take into
1081
  // account the one iteration already in the loop.
1082
0
  if (unroller.GetLoopIterationCount() % factor != 0) {
1083
0
    unroller.PartiallyUnrollResidualFactor(loop_, factor);
1084
0
  } else {
1085
0
    unroller.PartiallyUnroll(loop_, factor);
1086
0
  }
1087
1088
0
  return true;
1089
0
}
1090
1091
3.13k
bool LoopUtils::FullyUnroll() {
1092
3.13k
  if (!CanPerformUnroll()) return false;
1093
1094
3.13k
  std::vector<Instruction*> inductions;
1095
3.13k
  loop_->GetInductionVariables(inductions);
1096
1097
3.13k
  LoopUnrollerUtilsImpl unroller{context_,
1098
3.13k
                                 loop_->GetHeaderBlock()->GetParent()};
1099
1100
3.13k
  unroller.Init(loop_);
1101
3.13k
  unroller.FullyUnroll(loop_);
1102
1103
3.13k
  return true;
1104
3.13k
}
1105
1106
0
void LoopUtils::Finalize() {
1107
  // Clean up the loop descriptor to preserve the analysis.
1108
1109
0
  LoopDescriptor* LD = context_->GetLoopDescriptor(&function_);
1110
0
  LD->PostModificationCleanup();
1111
0
}
1112
1113
/*
1114
 *
1115
 * Begin Pass.
1116
 *
1117
 */
1118
1119
14.8k
Pass::Status LoopUnroller::Process() {
1120
14.8k
  bool changed = false;
1121
14.8k
  for (Function& f : *context()->module()) {
1122
14.3k
    if (f.IsDeclaration()) {
1123
0
      continue;
1124
0
    }
1125
1126
14.3k
    LoopDescriptor* LD = context()->GetLoopDescriptor(&f);
1127
28.7k
    for (Loop& loop : *LD) {
1128
28.7k
      LoopUtils loop_utils{context(), &loop};
1129
28.7k
      if (!loop.HasUnrollLoopControl() || !loop_utils.CanPerformUnroll()) {
1130
25.5k
        continue;
1131
25.5k
      }
1132
1133
3.13k
      if (fully_unroll_) {
1134
3.13k
        loop_utils.FullyUnroll();
1135
3.13k
      } else {
1136
0
        loop_utils.PartiallyUnroll(unroll_factor_);
1137
0
      }
1138
3.13k
      changed = true;
1139
3.13k
    }
1140
14.3k
    LD->PostModificationCleanup();
1141
14.3k
  }
1142
1143
14.8k
  return changed ? Status::SuccessWithChange : Status::SuccessWithoutChange;
1144
14.8k
}
1145
1146
}  // namespace opt
1147
}  // namespace spvtools