Coverage Report

Created: 2024-09-11 07:09

/src/spirv-tools/source/opt/loop_fission.cpp
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) 2018 Google LLC.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
#include "source/opt/loop_fission.h"
16
17
#include <set>
18
19
#include "source/opt/register_pressure.h"
20
21
// Implement loop fission with an optional parameter to split only
22
// if the register pressure in a given loop meets a certain criteria. This is
23
// controlled via the constructors of LoopFissionPass.
24
//
25
// 1 - Build a list of loops to be split, these are top level loops (loops
26
// without child loops themselves) which meet the register pressure criteria, as
27
// determined by the ShouldSplitLoop method of LoopFissionPass.
28
//
29
// 2 - For each loop in the list, group each instruction into a set of related
30
// instructions by traversing each instructions users and operands recursively.
31
// We stop if we encounter an instruction we have seen before or an instruction
32
// which we don't consider relevant (i.e OpLoopMerge). We then group these
33
// groups into two different sets, one for the first loop and one for the
34
// second.
35
//
36
// 3 - We then run CanPerformSplit to check that it would be legal to split a
37
// loop using those two sets. We check that we haven't altered the relative
38
// order load/stores appear in the binary and that we aren't breaking any
39
// dependency between load/stores by splitting them into two loops. We also
40
// check that none of the OpBranch instructions are dependent on a load as we
41
// leave control flow structure intact and move only instructions in the body so
42
// we want to avoid any loads with side affects or aliasing.
43
//
44
// 4 - We then split the loop by calling SplitLoop. This function clones the
45
// loop and attaches it to the preheader and connects the new loops merge block
46
// to the current loop header block. We then use the two sets built in step 2 to
47
// remove instructions from each loop. If an instruction appears in the first
48
// set it is removed from the second loop and vice versa.
49
//
50
// 5 - If the multiple split passes flag is set we check if each of the loops
51
// still meet the register pressure criteria. If they do then we add them to the
52
// list of loops to be split (created in step one) to allow for loops to be
53
// split multiple times.
54
//
55
56
namespace spvtools {
57
namespace opt {
58
59
class LoopFissionImpl {
60
 public:
61
  LoopFissionImpl(IRContext* context, Loop* loop)
62
0
      : context_(context), loop_(loop), load_used_in_condition_(false) {}
63
64
  // Group each instruction in the loop into sets of instructions related by
65
  // their usedef chains. An instruction which uses another will appear in the
66
  // same set. Then merge those sets into just two sets. Returns false if there
67
  // was one or less sets created.
68
  bool GroupInstructionsByUseDef();
69
70
  // Check if the sets built by GroupInstructionsByUseDef violate any data
71
  // dependence rules.
72
  bool CanPerformSplit();
73
74
  // Split the loop and return a pointer to the new loop.
75
  Loop* SplitLoop();
76
77
  // Checks if |inst| is safe to move. We can only move instructions which don't
78
  // have any side effects and OpLoads and OpStores.
79
  bool MovableInstruction(const Instruction& inst) const;
80
81
 private:
82
  // Traverse the def use chain of |inst| and add the users and uses of |inst|
83
  // which are in the same loop to the |returned_set|.
84
  void TraverseUseDef(Instruction* inst, std::set<Instruction*>* returned_set,
85
                      bool ignore_phi_users = false, bool report_loads = false);
86
87
  // We group the instructions in the block into two different groups, the
88
  // instructions to be kept in the original loop and the ones to be cloned into
89
  // the new loop. As the cloned loop is attached to the preheader it will be
90
  // the first loop and the second loop will be the original.
91
  std::set<Instruction*> cloned_loop_instructions_;
92
  std::set<Instruction*> original_loop_instructions_;
93
94
  // We need a set of all the instructions to be seen so we can break any
95
  // recursion and also so we can ignore certain instructions by preemptively
96
  // adding them to this set.
97
  std::set<Instruction*> seen_instructions_;
98
99
  // A map of instructions to their relative position in the function.
100
  std::map<Instruction*, size_t> instruction_order_;
101
102
  IRContext* context_;
103
104
  Loop* loop_;
105
106
  // This is set to true by TraverseUseDef when traversing the instructions
107
  // related to the loop condition and any if conditions should any of those
108
  // instructions be a load.
109
  bool load_used_in_condition_;
110
};
111
112
0
bool LoopFissionImpl::MovableInstruction(const Instruction& inst) const {
113
0
  return inst.opcode() == spv::Op::OpLoad ||
114
0
         inst.opcode() == spv::Op::OpStore ||
115
0
         inst.opcode() == spv::Op::OpSelectionMerge ||
116
0
         inst.opcode() == spv::Op::OpPhi || inst.IsOpcodeCodeMotionSafe();
117
0
}
118
119
void LoopFissionImpl::TraverseUseDef(Instruction* inst,
120
                                     std::set<Instruction*>* returned_set,
121
0
                                     bool ignore_phi_users, bool report_loads) {
122
0
  assert(returned_set && "Set to be returned cannot be null.");
123
124
0
  analysis::DefUseManager* def_use = context_->get_def_use_mgr();
125
0
  std::set<Instruction*>& inst_set = *returned_set;
126
127
  // We create this functor to traverse the use def chain to build the
128
  // grouping of related instructions. The lambda captures the std::function
129
  // to allow it to recurse.
130
0
  std::function<void(Instruction*)> traverser_functor;
131
0
  traverser_functor = [this, def_use, &inst_set, &traverser_functor,
132
0
                       ignore_phi_users, report_loads](Instruction* user) {
133
    // If we've seen the instruction before or it is not inside the loop end the
134
    // traversal.
135
0
    if (!user || seen_instructions_.count(user) != 0 ||
136
0
        !context_->get_instr_block(user) ||
137
0
        !loop_->IsInsideLoop(context_->get_instr_block(user))) {
138
0
      return;
139
0
    }
140
141
    // Don't include labels or loop merge instructions in the instruction sets.
142
    // Including them would mean we group instructions related only by using the
143
    // same labels (i.e phis). We already preempt the inclusion of
144
    // OpSelectionMerge by adding related instructions to the seen_instructions_
145
    // set.
146
0
    if (user->opcode() == spv::Op::OpLoopMerge ||
147
0
        user->opcode() == spv::Op::OpLabel)
148
0
      return;
149
150
    // If the |report_loads| flag is set, set the class field
151
    // load_used_in_condition_ to false. This is used to check that none of the
152
    // condition checks in the loop rely on loads.
153
0
    if (user->opcode() == spv::Op::OpLoad && report_loads) {
154
0
      load_used_in_condition_ = true;
155
0
    }
156
157
    // Add the instruction to the set of instructions already seen, this breaks
158
    // recursion and allows us to ignore certain instructions.
159
0
    seen_instructions_.insert(user);
160
161
0
    inst_set.insert(user);
162
163
    // Wrapper functor to traverse the operands of each instruction.
164
0
    auto traverse_operand = [&traverser_functor, def_use](const uint32_t* id) {
165
0
      traverser_functor(def_use->GetDef(*id));
166
0
    };
167
0
    user->ForEachInOperand(traverse_operand);
168
169
    // For the first traversal we want to ignore the users of the phi.
170
0
    if (ignore_phi_users && user->opcode() == spv::Op::OpPhi) return;
171
172
    // Traverse each user with this lambda.
173
0
    def_use->ForEachUser(user, traverser_functor);
174
175
    // Wrapper functor for the use traversal.
176
0
    auto traverse_use = [&traverser_functor](Instruction* use, uint32_t) {
177
0
      traverser_functor(use);
178
0
    };
179
0
    def_use->ForEachUse(user, traverse_use);
180
181
0
  };
182
183
  // We start the traversal of the use def graph by invoking the above
184
  // lambda with the |inst| parameter.
185
0
  traverser_functor(inst);
186
0
}
187
188
0
bool LoopFissionImpl::GroupInstructionsByUseDef() {
189
0
  std::vector<std::set<Instruction*>> sets{};
190
191
  // We want to ignore all the instructions stemming from the loop condition
192
  // instruction.
193
0
  BasicBlock* condition_block = loop_->FindConditionBlock();
194
195
0
  if (!condition_block) return false;
196
0
  Instruction* condition = &*condition_block->tail();
197
198
  // We iterate over the blocks via iterating over all the blocks in the
199
  // function, we do this so we are iterating in the same order which the blocks
200
  // appear in the binary.
201
0
  Function& function = *loop_->GetHeaderBlock()->GetParent();
202
203
  // Create a temporary set to ignore certain groups of instructions within the
204
  // loop. We don't want any instructions related to control flow to be removed
205
  // from either loop only instructions within the control flow bodies.
206
0
  std::set<Instruction*> instructions_to_ignore{};
207
0
  TraverseUseDef(condition, &instructions_to_ignore, true, true);
208
209
  // Traverse control flow instructions to ensure they are added to the
210
  // seen_instructions_ set and will be ignored when it it called with actual
211
  // sets.
212
0
  for (BasicBlock& block : function) {
213
0
    if (!loop_->IsInsideLoop(block.id())) continue;
214
215
0
    for (Instruction& inst : block) {
216
      // Ignore all instructions related to control flow.
217
0
      if (inst.opcode() == spv::Op::OpSelectionMerge || inst.IsBranch()) {
218
0
        TraverseUseDef(&inst, &instructions_to_ignore, true, true);
219
0
      }
220
0
    }
221
0
  }
222
223
  // Traverse the instructions and generate the sets, automatically ignoring any
224
  // instructions in instructions_to_ignore.
225
0
  for (BasicBlock& block : function) {
226
0
    if (!loop_->IsInsideLoop(block.id()) ||
227
0
        loop_->GetHeaderBlock()->id() == block.id())
228
0
      continue;
229
230
0
    for (Instruction& inst : block) {
231
      // Record the order that each load/store is seen.
232
0
      if (inst.opcode() == spv::Op::OpLoad ||
233
0
          inst.opcode() == spv::Op::OpStore) {
234
0
        instruction_order_[&inst] = instruction_order_.size();
235
0
      }
236
237
      // Ignore instructions already seen in a traversal.
238
0
      if (seen_instructions_.count(&inst) != 0) {
239
0
        continue;
240
0
      }
241
242
      // Build the set.
243
0
      std::set<Instruction*> inst_set{};
244
0
      TraverseUseDef(&inst, &inst_set);
245
0
      if (!inst_set.empty()) sets.push_back(std::move(inst_set));
246
0
    }
247
0
  }
248
249
  // If we have one or zero sets return false to indicate that due to
250
  // insufficient instructions we couldn't split the loop into two groups and
251
  // thus the loop can't be split any further.
252
0
  if (sets.size() < 2) {
253
0
    return false;
254
0
  }
255
256
  // Merge the loop sets into two different sets. In CanPerformSplit we will
257
  // validate that we don't break the relative ordering of loads/stores by doing
258
  // this.
259
0
  for (size_t index = 0; index < sets.size() / 2; ++index) {
260
0
    cloned_loop_instructions_.insert(sets[index].begin(), sets[index].end());
261
0
  }
262
0
  for (size_t index = sets.size() / 2; index < sets.size(); ++index) {
263
0
    original_loop_instructions_.insert(sets[index].begin(), sets[index].end());
264
0
  }
265
266
0
  return true;
267
0
}
268
269
0
bool LoopFissionImpl::CanPerformSplit() {
270
  // Return false if any of the condition instructions in the loop depend on a
271
  // load.
272
0
  if (load_used_in_condition_) {
273
0
    return false;
274
0
  }
275
276
  // Build a list of all parent loops of this loop. Loop dependence analysis
277
  // needs this structure.
278
0
  std::vector<const Loop*> loops;
279
0
  Loop* parent_loop = loop_;
280
0
  while (parent_loop) {
281
0
    loops.push_back(parent_loop);
282
0
    parent_loop = parent_loop->GetParent();
283
0
  }
284
285
0
  LoopDependenceAnalysis analysis{context_, loops};
286
287
  // A list of all the stores in the cloned loop.
288
0
  std::vector<Instruction*> set_one_stores{};
289
290
  // A list of all the loads in the cloned loop.
291
0
  std::vector<Instruction*> set_one_loads{};
292
293
  // Populate the above lists.
294
0
  for (Instruction* inst : cloned_loop_instructions_) {
295
0
    if (inst->opcode() == spv::Op::OpStore) {
296
0
      set_one_stores.push_back(inst);
297
0
    } else if (inst->opcode() == spv::Op::OpLoad) {
298
0
      set_one_loads.push_back(inst);
299
0
    }
300
301
    // If we find any instruction which we can't move (such as a barrier),
302
    // return false.
303
0
    if (!MovableInstruction(*inst)) return false;
304
0
  }
305
306
  // We need to calculate the depth of the loop to create the loop dependency
307
  // distance vectors.
308
0
  const size_t loop_depth = loop_->GetDepth();
309
310
  // Check the dependencies between loads in the cloned loop and stores in the
311
  // original and vice versa.
312
0
  for (Instruction* inst : original_loop_instructions_) {
313
    // If we find any instruction which we can't move (such as a barrier),
314
    // return false.
315
0
    if (!MovableInstruction(*inst)) return false;
316
317
    // Look at the dependency between the loads in the original and stores in
318
    // the cloned loops.
319
0
    if (inst->opcode() == spv::Op::OpLoad) {
320
0
      for (Instruction* store : set_one_stores) {
321
0
        DistanceVector vec{loop_depth};
322
323
        // If the store actually should appear after the load, return false.
324
        // This means the store has been placed in the wrong grouping.
325
0
        if (instruction_order_[store] > instruction_order_[inst]) {
326
0
          return false;
327
0
        }
328
        // If not independent check the distance vector.
329
0
        if (!analysis.GetDependence(store, inst, &vec)) {
330
0
          for (DistanceEntry& entry : vec.GetEntries()) {
331
            // A distance greater than zero means that the store in the cloned
332
            // loop has a dependency on the load in the original loop.
333
0
            if (entry.distance > 0) return false;
334
0
          }
335
0
        }
336
0
      }
337
0
    } else if (inst->opcode() == spv::Op::OpStore) {
338
0
      for (Instruction* load : set_one_loads) {
339
0
        DistanceVector vec{loop_depth};
340
341
        // If the load actually should appear after the store, return false.
342
0
        if (instruction_order_[load] > instruction_order_[inst]) {
343
0
          return false;
344
0
        }
345
346
        // If not independent check the distance vector.
347
0
        if (!analysis.GetDependence(inst, load, &vec)) {
348
0
          for (DistanceEntry& entry : vec.GetEntries()) {
349
            // A distance less than zero means the load in the cloned loop is
350
            // dependent on the store instruction in the original loop.
351
0
            if (entry.distance < 0) return false;
352
0
          }
353
0
        }
354
0
      }
355
0
    }
356
0
  }
357
0
  return true;
358
0
}
359
360
0
Loop* LoopFissionImpl::SplitLoop() {
361
  // Clone the loop.
362
0
  LoopUtils util{context_, loop_};
363
0
  LoopUtils::LoopCloningResult clone_results;
364
0
  Loop* cloned_loop = util.CloneAndAttachLoopToHeader(&clone_results);
365
366
  // Update the OpLoopMerge in the cloned loop.
367
0
  cloned_loop->UpdateLoopMergeInst();
368
369
  // Add the loop_ to the module.
370
  // TODO(1841): Handle failure to create pre-header.
371
0
  Function::iterator it =
372
0
      util.GetFunction()->FindBlock(loop_->GetOrCreatePreHeaderBlock()->id());
373
0
  util.GetFunction()->AddBasicBlocks(clone_results.cloned_bb_.begin(),
374
0
                                     clone_results.cloned_bb_.end(), ++it);
375
0
  loop_->SetPreHeaderBlock(cloned_loop->GetMergeBlock());
376
377
0
  std::vector<Instruction*> instructions_to_kill{};
378
379
  // Kill all the instructions which should appear in the cloned loop but not in
380
  // the original loop.
381
0
  for (uint32_t id : loop_->GetBlocks()) {
382
0
    BasicBlock* block = context_->cfg()->block(id);
383
384
0
    for (Instruction& inst : *block) {
385
      // If the instruction appears in the cloned loop instruction group, kill
386
      // it.
387
0
      if (cloned_loop_instructions_.count(&inst) == 1 &&
388
0
          original_loop_instructions_.count(&inst) == 0) {
389
0
        instructions_to_kill.push_back(&inst);
390
0
        if (inst.opcode() == spv::Op::OpPhi) {
391
0
          context_->ReplaceAllUsesWith(
392
0
              inst.result_id(), clone_results.value_map_[inst.result_id()]);
393
0
        }
394
0
      }
395
0
    }
396
0
  }
397
398
  // Kill all instructions which should appear in the original loop and not in
399
  // the cloned loop.
400
0
  for (uint32_t id : cloned_loop->GetBlocks()) {
401
0
    BasicBlock* block = context_->cfg()->block(id);
402
0
    for (Instruction& inst : *block) {
403
0
      Instruction* old_inst = clone_results.ptr_map_[&inst];
404
      // If the instruction belongs to the original loop instruction group, kill
405
      // it.
406
0
      if (cloned_loop_instructions_.count(old_inst) == 0 &&
407
0
          original_loop_instructions_.count(old_inst) == 1) {
408
0
        instructions_to_kill.push_back(&inst);
409
0
      }
410
0
    }
411
0
  }
412
413
0
  for (Instruction* i : instructions_to_kill) {
414
0
    context_->KillInst(i);
415
0
  }
416
417
0
  return cloned_loop;
418
0
}
419
420
LoopFissionPass::LoopFissionPass(const size_t register_threshold_to_split,
421
                                 bool split_multiple_times)
422
0
    : split_multiple_times_(split_multiple_times) {
423
  // Split if the number of registers in the loop exceeds
424
  // |register_threshold_to_split|.
425
0
  split_criteria_ =
426
0
      [register_threshold_to_split](
427
0
          const RegisterLiveness::RegionRegisterLiveness& liveness) {
428
0
        return liveness.used_registers_ > register_threshold_to_split;
429
0
      };
430
0
}
431
432
0
LoopFissionPass::LoopFissionPass() : split_multiple_times_(false) {
433
  // Split by default.
434
0
  split_criteria_ = [](const RegisterLiveness::RegionRegisterLiveness&) {
435
0
    return true;
436
0
  };
437
0
}
438
439
0
bool LoopFissionPass::ShouldSplitLoop(const Loop& loop, IRContext* c) {
440
0
  LivenessAnalysis* analysis = c->GetLivenessAnalysis();
441
442
0
  RegisterLiveness::RegionRegisterLiveness liveness{};
443
444
0
  Function* function = loop.GetHeaderBlock()->GetParent();
445
0
  analysis->Get(function)->ComputeLoopRegisterPressure(loop, &liveness);
446
447
0
  return split_criteria_(liveness);
448
0
}
449
450
0
Pass::Status LoopFissionPass::Process() {
451
0
  bool changed = false;
452
453
0
  for (Function& f : *context()->module()) {
454
    // We collect all the inner most loops in the function and run the loop
455
    // splitting util on each. The reason we do this is to allow us to iterate
456
    // over each, as creating new loops will invalidate the loop iterator.
457
0
    std::vector<Loop*> inner_most_loops{};
458
0
    LoopDescriptor& loop_descriptor = *context()->GetLoopDescriptor(&f);
459
0
    for (Loop& loop : loop_descriptor) {
460
0
      if (!loop.HasChildren() && ShouldSplitLoop(loop, context())) {
461
0
        inner_most_loops.push_back(&loop);
462
0
      }
463
0
    }
464
465
    // List of new loops which meet the criteria to be split again.
466
0
    std::vector<Loop*> new_loops_to_split{};
467
468
0
    while (!inner_most_loops.empty()) {
469
0
      for (Loop* loop : inner_most_loops) {
470
0
        LoopFissionImpl impl{context(), loop};
471
472
        // Group the instructions in the loop into two different sets of related
473
        // instructions. If we can't group the instructions into the two sets
474
        // then we can't split the loop any further.
475
0
        if (!impl.GroupInstructionsByUseDef()) {
476
0
          continue;
477
0
        }
478
479
0
        if (impl.CanPerformSplit()) {
480
0
          Loop* second_loop = impl.SplitLoop();
481
0
          changed = true;
482
0
          context()->InvalidateAnalysesExceptFor(
483
0
              IRContext::kAnalysisLoopAnalysis);
484
485
          // If the newly created loop meets the criteria to be split, split it
486
          // again.
487
0
          if (ShouldSplitLoop(*second_loop, context()))
488
0
            new_loops_to_split.push_back(second_loop);
489
490
          // If the original loop (now split) still meets the criteria to be
491
          // split, split it again.
492
0
          if (ShouldSplitLoop(*loop, context()))
493
0
            new_loops_to_split.push_back(loop);
494
0
        }
495
0
      }
496
497
      // If the split multiple times flag has been set add the new loops which
498
      // meet the splitting criteria into the list of loops to be split on the
499
      // next iteration.
500
0
      if (split_multiple_times_) {
501
0
        inner_most_loops = std::move(new_loops_to_split);
502
0
      } else {
503
0
        break;
504
0
      }
505
0
    }
506
0
  }
507
508
0
  return changed ? Pass::Status::SuccessWithChange
509
0
                 : Pass::Status::SuccessWithoutChange;
510
0
}
511
512
}  // namespace opt
513
}  // namespace spvtools