Coverage Report

Created: 2025-06-13 06:49

/src/spirv-tools/source/opt/loop_peeling.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) 2018 Google LLC.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
#ifndef SOURCE_OPT_LOOP_PEELING_H_
16
#define SOURCE_OPT_LOOP_PEELING_H_
17
18
#include <algorithm>
19
#include <limits>
20
#include <memory>
21
#include <tuple>
22
#include <unordered_map>
23
#include <unordered_set>
24
#include <utility>
25
#include <vector>
26
27
#include "source/opt/ir_context.h"
28
#include "source/opt/loop_descriptor.h"
29
#include "source/opt/loop_utils.h"
30
#include "source/opt/pass.h"
31
#include "source/opt/scalar_analysis.h"
32
33
namespace spvtools {
34
namespace opt {
35
36
// Utility class to perform the peeling of a given loop.
37
// The loop peeling transformation make a certain amount of a loop iterations to
38
// be executed either before (peel before) or after (peel after) the transformed
39
// loop.
40
//
41
// For peeling cases the transformation does the following steps:
42
//   - It clones the loop and inserts the cloned loop before the original loop;
43
//   - It connects all iterating values of the cloned loop with the
44
//     corresponding original loop values so that the second loop starts with
45
//     the appropriate values.
46
//   - It inserts a new induction variable "i" is inserted into the cloned that
47
//     starts with the value 0 and increment by step of one.
48
//
49
// The last step is specific to each case:
50
//   - Peel before: the transformation is to peel the "N" first iterations.
51
//     The exit condition of the cloned loop is changed so that the loop
52
//     exits when "i < N" becomes false. The original loop is then protected to
53
//     only execute if there is any iteration left to do.
54
//   - Peel after: the transformation is to peel the "N" last iterations,
55
//     then the exit condition of the cloned loop is changed so that the loop
56
//     exits when "i + N < max_iteration" becomes false, where "max_iteration"
57
//     is the upper bound of the loop. The cloned loop is then protected to
58
//     only execute if there is any iteration left to do no covered by the
59
//     second.
60
//
61
// To be peelable:
62
//   - The loop must be in LCSSA form;
63
//   - The loop must not contain any breaks;
64
//   - The loop must not have any ambiguous iterators updates (see
65
//     "CanPeelLoop").
66
// The method "CanPeelLoop" checks that those constrained are met.
67
class LoopPeeling {
68
 public:
69
  // LoopPeeling constructor.
70
  // |loop| is the loop to peel.
71
  // |loop_iteration_count| is the instruction holding the |loop| iteration
72
  // count, must be invariant for |loop| and must be of an int 32 type (signed
73
  // or unsigned).
74
  // |canonical_induction_variable| is an induction variable that can be used to
75
  // count the number of iterations, must be of the same type as
76
  // |loop_iteration_count| and start at 0 and increase by step of one at each
77
  // iteration. The value nullptr is interpreted as no suitable variable exists
78
  // and one will be created.
79
  LoopPeeling(Loop* loop, Instruction* loop_iteration_count,
80
              Instruction* canonical_induction_variable = nullptr)
81
0
      : context_(loop->GetContext()),
82
0
        loop_utils_(loop->GetContext(), loop),
83
0
        loop_(loop),
84
0
        loop_iteration_count_(!loop->IsInsideLoop(loop_iteration_count)
85
0
                                  ? loop_iteration_count
86
0
                                  : nullptr),
87
0
        int_type_(nullptr),
88
        original_loop_canonical_induction_variable_(
89
0
            canonical_induction_variable),
90
0
        canonical_induction_variable_(nullptr) {
91
0
    if (loop_iteration_count_) {
92
0
      int_type_ = context_->get_type_mgr()
93
0
                      ->GetType(loop_iteration_count_->type_id())
94
0
                      ->AsInteger();
95
0
      if (canonical_induction_variable_) {
96
0
        assert(canonical_induction_variable_->type_id() ==
97
0
                   loop_iteration_count_->type_id() &&
98
0
               "loop_iteration_count and canonical_induction_variable do not "
99
0
               "have the same type");
100
0
      }
101
0
    }
102
0
    GetIteratingExitValues();
103
0
  }
104
105
  // Returns true if the loop can be peeled.
106
  // To be peelable, all operation involved in the update of the loop iterators
107
  // must not dominates the exit condition. This restriction is a work around to
108
  // not miss compile code like:
109
  //
110
  //   for (int i = 0; i + 1 < N; i++) {}
111
  //   for (int i = 0; ++i < N; i++) {}
112
  //
113
  // The increment will happen before the test on the exit condition leading to
114
  // very look-a-like code.
115
  //
116
  // This restriction will not apply if a loop rotate is applied before (i.e.
117
  // becomes a do-while loop).
118
0
  bool CanPeelLoop() const {
119
0
    CFG& cfg = *context_->cfg();
120
121
0
    if (!loop_iteration_count_) {
122
0
      return false;
123
0
    }
124
0
    if (!int_type_) {
125
0
      return false;
126
0
    }
127
0
    if (int_type_->width() != 32) {
128
0
      return false;
129
0
    }
130
0
    if (!loop_->IsLCSSA()) {
131
0
      return false;
132
0
    }
133
0
    if (!loop_->GetMergeBlock()) {
134
0
      return false;
135
0
    }
136
0
    if (cfg.preds(loop_->GetMergeBlock()->id()).size() != 1) {
137
0
      return false;
138
0
    }
139
0
    if (!IsConditionCheckSideEffectFree()) {
140
0
      return false;
141
0
    }
142
143
0
    return !std::any_of(exit_value_.cbegin(), exit_value_.cend(),
144
0
                        [](std::pair<uint32_t, Instruction*> it) {
145
0
                          return it.second == nullptr;
146
0
                        });
147
0
  }
148
149
  // Moves the execution of the |factor| first iterations of the loop into a
150
  // dedicated loop.
151
  void PeelBefore(uint32_t factor);
152
153
  // Moves the execution of the |factor| last iterations of the loop into a
154
  // dedicated loop.
155
  void PeelAfter(uint32_t factor);
156
157
  // Returns the cloned loop.
158
0
  Loop* GetClonedLoop() { return cloned_loop_; }
159
  // Returns the original loop.
160
0
  Loop* GetOriginalLoop() { return loop_; }
161
162
 private:
163
  IRContext* context_;
164
  LoopUtils loop_utils_;
165
  // The original loop.
166
  Loop* loop_;
167
  // The initial |loop_| upper bound.
168
  Instruction* loop_iteration_count_;
169
  // The int type to use for the canonical_induction_variable_.
170
  analysis::Integer* int_type_;
171
  // The cloned loop.
172
  Loop* cloned_loop_;
173
  // This is set to true when the exit and back-edge branch instruction is the
174
  // same.
175
  bool do_while_form_;
176
  // The canonical induction variable from the original loop if it exists.
177
  Instruction* original_loop_canonical_induction_variable_;
178
  // The canonical induction variable of the cloned loop. The induction variable
179
  // is initialized to 0 and incremented by step of 1.
180
  Instruction* canonical_induction_variable_;
181
  // Map between loop iterators and exit values. Loop iterators
182
  std::unordered_map<uint32_t, Instruction*> exit_value_;
183
184
  // Duplicate |loop_| and place the new loop before the cloned loop. Iterating
185
  // values from the cloned loop are then connected to the original loop as
186
  // initializer.
187
  void DuplicateAndConnectLoop(LoopUtils::LoopCloningResult* clone_results);
188
189
  // Insert the canonical induction variable into the first loop as a simplified
190
  // counter.
191
  void InsertCanonicalInductionVariable(
192
      LoopUtils::LoopCloningResult* clone_results);
193
194
  // Fixes the exit condition of the before loop. The function calls
195
  // |condition_builder| to get the condition to use in the conditional branch
196
  // of the loop exit. The loop will be exited if the condition evaluate to
197
  // true. |condition_builder| takes an Instruction* that represent the
198
  // insertion point.
199
  void FixExitCondition(
200
      const std::function<uint32_t(Instruction*)>& condition_builder);
201
202
  // Gathers all operations involved in the update of |iterator| into
203
  // |operations|.
204
  void GetIteratorUpdateOperations(
205
      const Loop* loop, Instruction* iterator,
206
      std::unordered_set<Instruction*>* operations);
207
208
  // Gathers exiting iterator values. The function builds a map between each
209
  // iterating value in the loop (a phi instruction in the loop header) and its
210
  // SSA value when it exit the loop. If no exit value can be accurately found,
211
  // it is map to nullptr (see comment on CanPeelLoop).
212
  void GetIteratingExitValues();
213
214
  // Returns true if a for-loop has no instruction with effects before the
215
  // condition check.
216
  bool IsConditionCheckSideEffectFree() const;
217
218
  // Creates a new basic block and insert it between |bb| and the predecessor of
219
  // |bb|.
220
  BasicBlock* CreateBlockBefore(BasicBlock* bb);
221
222
  // Inserts code to only execute |loop| only if the given |condition| is true.
223
  // |if_merge| is a suitable basic block to be used by the if condition as
224
  // merge block.
225
  // The function returns the if block protecting the loop.
226
  BasicBlock* ProtectLoop(Loop* loop, Instruction* condition,
227
                          BasicBlock* if_merge);
228
};
229
230
// Implements a loop peeling optimization.
231
// For each loop, the pass will try to peel it if there is conditions that
232
// are true for the "N" first or last iterations of the loop.
233
// To avoid code size explosion, too large loops will not be peeled.
234
class LoopPeelingPass : public Pass {
235
 public:
236
  // Describes the peeling direction.
237
  enum class PeelDirection {
238
    kNone,    // Cannot peel
239
    kBefore,  // Can peel before
240
    kAfter    // Can peel last
241
  };
242
243
  // Holds some statistics about peeled function.
244
  struct LoopPeelingStats {
245
    std::vector<std::tuple<const Loop*, PeelDirection, uint32_t>> peeled_loops_;
246
  };
247
248
0
  LoopPeelingPass(LoopPeelingStats* stats = nullptr) : stats_(stats) {}
249
250
  // Sets the loop peeling growth threshold. If the code size increase is above
251
  // |code_grow_threshold|, the loop will not be peeled. The code size is
252
  // measured in terms of SPIR-V instructions.
253
0
  static void SetLoopPeelingThreshold(size_t code_grow_threshold) {
254
0
    code_grow_threshold_ = code_grow_threshold;
255
0
  }
256
257
  // Returns the loop peeling code growth threshold.
258
0
  static size_t GetLoopPeelingThreshold() { return code_grow_threshold_; }
259
260
0
  const char* name() const override { return "loop-peeling"; }
261
262
  // Processes the given |module|. Returns Status::Failure if errors occur when
263
  // processing. Returns the corresponding Status::Success if processing is
264
  // successful to indicate whether changes have been made to the module.
265
  Pass::Status Process() override;
266
267
 private:
268
  // Describes the peeling direction.
269
  enum class CmpOperator {
270
    kLT,  // less than
271
    kGT,  // greater than
272
    kLE,  // less than or equal
273
    kGE,  // greater than or equal
274
  };
275
276
  class LoopPeelingInfo {
277
   public:
278
    using Direction = std::pair<PeelDirection, uint32_t>;
279
280
    LoopPeelingInfo(Loop* loop, size_t loop_max_iterations,
281
                    ScalarEvolutionAnalysis* scev_analysis)
282
0
        : context_(loop->GetContext()),
283
0
          loop_(loop),
284
0
          scev_analysis_(scev_analysis),
285
0
          loop_max_iterations_(loop_max_iterations) {}
286
287
    // Returns by how much and to which direction a loop should be peeled to
288
    // make the conditional branch of the basic block |bb| an unconditional
289
    // branch. If |bb|'s terminator is not a conditional branch or the condition
290
    // is not workable then it returns PeelDirection::kNone and a 0 factor.
291
    Direction GetPeelingInfo(BasicBlock* bb) const;
292
293
   private:
294
    // Returns the id of the loop invariant operand of the conditional
295
    // expression |condition|. It returns if no operand is invariant.
296
    uint32_t GetFirstLoopInvariantOperand(Instruction* condition) const;
297
    // Returns the id of the non loop invariant operand of the conditional
298
    // expression |condition|. It returns if all operands are invariant.
299
    uint32_t GetFirstNonLoopInvariantOperand(Instruction* condition) const;
300
301
    // Returns the value of |rec| at the first loop iteration.
302
    SExpression GetValueAtFirstIteration(SERecurrentNode* rec) const;
303
    // Returns the value of |rec| at the given |iteration|.
304
    SExpression GetValueAtIteration(SERecurrentNode* rec,
305
                                    int64_t iteration) const;
306
    // Returns the value of |rec| at the last loop iteration.
307
    SExpression GetValueAtLastIteration(SERecurrentNode* rec) const;
308
309
    bool EvalOperator(CmpOperator cmp_op, SExpression lhs, SExpression rhs,
310
                      bool* result) const;
311
312
    Direction HandleEquality(SExpression lhs, SExpression rhs) const;
313
    Direction HandleInequality(CmpOperator cmp_op, SExpression lhs,
314
                               SERecurrentNode* rhs) const;
315
316
0
    static Direction GetNoneDirection() {
317
0
      return Direction{LoopPeelingPass::PeelDirection::kNone, 0};
318
0
    }
319
    IRContext* context_;
320
    Loop* loop_;
321
    ScalarEvolutionAnalysis* scev_analysis_;
322
    size_t loop_max_iterations_;
323
  };
324
  // Peel profitable loops in |f|.
325
  bool ProcessFunction(Function* f);
326
  // Peel |loop| if profitable.
327
  std::pair<bool, Loop*> ProcessLoop(Loop* loop, CodeMetrics* loop_size);
328
329
  static size_t code_grow_threshold_;
330
  LoopPeelingStats* stats_;
331
};
332
333
}  // namespace opt
334
}  // namespace spvtools
335
336
#endif  // SOURCE_OPT_LOOP_PEELING_H_