/src/spirv-tools/source/opt/loop_peeling.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) 2018 Google LLC. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | |
15 | | #ifndef SOURCE_OPT_LOOP_PEELING_H_ |
16 | | #define SOURCE_OPT_LOOP_PEELING_H_ |
17 | | |
18 | | #include <algorithm> |
19 | | #include <limits> |
20 | | #include <memory> |
21 | | #include <tuple> |
22 | | #include <unordered_map> |
23 | | #include <unordered_set> |
24 | | #include <utility> |
25 | | #include <vector> |
26 | | |
27 | | #include "source/opt/ir_context.h" |
28 | | #include "source/opt/loop_descriptor.h" |
29 | | #include "source/opt/loop_utils.h" |
30 | | #include "source/opt/pass.h" |
31 | | #include "source/opt/scalar_analysis.h" |
32 | | |
33 | | namespace spvtools { |
34 | | namespace opt { |
35 | | |
36 | | // Utility class to perform the peeling of a given loop. |
37 | | // The loop peeling transformation make a certain amount of a loop iterations to |
38 | | // be executed either before (peel before) or after (peel after) the transformed |
39 | | // loop. |
40 | | // |
41 | | // For peeling cases the transformation does the following steps: |
42 | | // - It clones the loop and inserts the cloned loop before the original loop; |
43 | | // - It connects all iterating values of the cloned loop with the |
44 | | // corresponding original loop values so that the second loop starts with |
45 | | // the appropriate values. |
46 | | // - It inserts a new induction variable "i" is inserted into the cloned that |
47 | | // starts with the value 0 and increment by step of one. |
48 | | // |
49 | | // The last step is specific to each case: |
50 | | // - Peel before: the transformation is to peel the "N" first iterations. |
51 | | // The exit condition of the cloned loop is changed so that the loop |
52 | | // exits when "i < N" becomes false. The original loop is then protected to |
53 | | // only execute if there is any iteration left to do. |
54 | | // - Peel after: the transformation is to peel the "N" last iterations, |
55 | | // then the exit condition of the cloned loop is changed so that the loop |
56 | | // exits when "i + N < max_iteration" becomes false, where "max_iteration" |
57 | | // is the upper bound of the loop. The cloned loop is then protected to |
58 | | // only execute if there is any iteration left to do no covered by the |
59 | | // second. |
60 | | // |
61 | | // To be peelable: |
62 | | // - The loop must be in LCSSA form; |
63 | | // - The loop must not contain any breaks; |
64 | | // - The loop must not have any ambiguous iterators updates (see |
65 | | // "CanPeelLoop"). |
66 | | // The method "CanPeelLoop" checks that those constrained are met. |
67 | | class LoopPeeling { |
68 | | public: |
69 | | // LoopPeeling constructor. |
70 | | // |loop| is the loop to peel. |
71 | | // |loop_iteration_count| is the instruction holding the |loop| iteration |
72 | | // count, must be invariant for |loop| and must be of an int 32 type (signed |
73 | | // or unsigned). |
74 | | // |canonical_induction_variable| is an induction variable that can be used to |
75 | | // count the number of iterations, must be of the same type as |
76 | | // |loop_iteration_count| and start at 0 and increase by step of one at each |
77 | | // iteration. The value nullptr is interpreted as no suitable variable exists |
78 | | // and one will be created. |
79 | | LoopPeeling(Loop* loop, Instruction* loop_iteration_count, |
80 | | Instruction* canonical_induction_variable = nullptr) |
81 | 0 | : context_(loop->GetContext()), |
82 | 0 | loop_utils_(loop->GetContext(), loop), |
83 | 0 | loop_(loop), |
84 | 0 | loop_iteration_count_(!loop->IsInsideLoop(loop_iteration_count) |
85 | 0 | ? loop_iteration_count |
86 | 0 | : nullptr), |
87 | 0 | int_type_(nullptr), |
88 | | original_loop_canonical_induction_variable_( |
89 | 0 | canonical_induction_variable), |
90 | 0 | canonical_induction_variable_(nullptr) { |
91 | 0 | if (loop_iteration_count_) { |
92 | 0 | int_type_ = context_->get_type_mgr() |
93 | 0 | ->GetType(loop_iteration_count_->type_id()) |
94 | 0 | ->AsInteger(); |
95 | 0 | if (canonical_induction_variable_) { |
96 | 0 | assert(canonical_induction_variable_->type_id() == |
97 | 0 | loop_iteration_count_->type_id() && |
98 | 0 | "loop_iteration_count and canonical_induction_variable do not " |
99 | 0 | "have the same type"); |
100 | 0 | } |
101 | 0 | } |
102 | 0 | GetIteratingExitValues(); |
103 | 0 | } |
104 | | |
105 | | // Returns true if the loop can be peeled. |
106 | | // To be peelable, all operation involved in the update of the loop iterators |
107 | | // must not dominates the exit condition. This restriction is a work around to |
108 | | // not miss compile code like: |
109 | | // |
110 | | // for (int i = 0; i + 1 < N; i++) {} |
111 | | // for (int i = 0; ++i < N; i++) {} |
112 | | // |
113 | | // The increment will happen before the test on the exit condition leading to |
114 | | // very look-a-like code. |
115 | | // |
116 | | // This restriction will not apply if a loop rotate is applied before (i.e. |
117 | | // becomes a do-while loop). |
118 | 0 | bool CanPeelLoop() const { |
119 | 0 | CFG& cfg = *context_->cfg(); |
120 | |
|
121 | 0 | if (!loop_iteration_count_) { |
122 | 0 | return false; |
123 | 0 | } |
124 | 0 | if (!int_type_) { |
125 | 0 | return false; |
126 | 0 | } |
127 | 0 | if (int_type_->width() != 32) { |
128 | 0 | return false; |
129 | 0 | } |
130 | 0 | if (!loop_->IsLCSSA()) { |
131 | 0 | return false; |
132 | 0 | } |
133 | 0 | if (!loop_->GetMergeBlock()) { |
134 | 0 | return false; |
135 | 0 | } |
136 | 0 | if (cfg.preds(loop_->GetMergeBlock()->id()).size() != 1) { |
137 | 0 | return false; |
138 | 0 | } |
139 | 0 | if (!IsConditionCheckSideEffectFree()) { |
140 | 0 | return false; |
141 | 0 | } |
142 | | |
143 | 0 | return !std::any_of(exit_value_.cbegin(), exit_value_.cend(), |
144 | 0 | [](std::pair<uint32_t, Instruction*> it) { |
145 | 0 | return it.second == nullptr; |
146 | 0 | }); |
147 | 0 | } |
148 | | |
149 | | // Moves the execution of the |factor| first iterations of the loop into a |
150 | | // dedicated loop. |
151 | | void PeelBefore(uint32_t factor); |
152 | | |
153 | | // Moves the execution of the |factor| last iterations of the loop into a |
154 | | // dedicated loop. |
155 | | void PeelAfter(uint32_t factor); |
156 | | |
157 | | // Returns the cloned loop. |
158 | 0 | Loop* GetClonedLoop() { return cloned_loop_; } |
159 | | // Returns the original loop. |
160 | 0 | Loop* GetOriginalLoop() { return loop_; } |
161 | | |
162 | | private: |
163 | | IRContext* context_; |
164 | | LoopUtils loop_utils_; |
165 | | // The original loop. |
166 | | Loop* loop_; |
167 | | // The initial |loop_| upper bound. |
168 | | Instruction* loop_iteration_count_; |
169 | | // The int type to use for the canonical_induction_variable_. |
170 | | analysis::Integer* int_type_; |
171 | | // The cloned loop. |
172 | | Loop* cloned_loop_; |
173 | | // This is set to true when the exit and back-edge branch instruction is the |
174 | | // same. |
175 | | bool do_while_form_; |
176 | | // The canonical induction variable from the original loop if it exists. |
177 | | Instruction* original_loop_canonical_induction_variable_; |
178 | | // The canonical induction variable of the cloned loop. The induction variable |
179 | | // is initialized to 0 and incremented by step of 1. |
180 | | Instruction* canonical_induction_variable_; |
181 | | // Map between loop iterators and exit values. Loop iterators |
182 | | std::unordered_map<uint32_t, Instruction*> exit_value_; |
183 | | |
184 | | // Duplicate |loop_| and place the new loop before the cloned loop. Iterating |
185 | | // values from the cloned loop are then connected to the original loop as |
186 | | // initializer. |
187 | | void DuplicateAndConnectLoop(LoopUtils::LoopCloningResult* clone_results); |
188 | | |
189 | | // Insert the canonical induction variable into the first loop as a simplified |
190 | | // counter. |
191 | | void InsertCanonicalInductionVariable( |
192 | | LoopUtils::LoopCloningResult* clone_results); |
193 | | |
194 | | // Fixes the exit condition of the before loop. The function calls |
195 | | // |condition_builder| to get the condition to use in the conditional branch |
196 | | // of the loop exit. The loop will be exited if the condition evaluate to |
197 | | // true. |condition_builder| takes an Instruction* that represent the |
198 | | // insertion point. |
199 | | void FixExitCondition( |
200 | | const std::function<uint32_t(Instruction*)>& condition_builder); |
201 | | |
202 | | // Gathers all operations involved in the update of |iterator| into |
203 | | // |operations|. |
204 | | void GetIteratorUpdateOperations( |
205 | | const Loop* loop, Instruction* iterator, |
206 | | std::unordered_set<Instruction*>* operations); |
207 | | |
208 | | // Gathers exiting iterator values. The function builds a map between each |
209 | | // iterating value in the loop (a phi instruction in the loop header) and its |
210 | | // SSA value when it exit the loop. If no exit value can be accurately found, |
211 | | // it is map to nullptr (see comment on CanPeelLoop). |
212 | | void GetIteratingExitValues(); |
213 | | |
214 | | // Returns true if a for-loop has no instruction with effects before the |
215 | | // condition check. |
216 | | bool IsConditionCheckSideEffectFree() const; |
217 | | |
218 | | // Creates a new basic block and insert it between |bb| and the predecessor of |
219 | | // |bb|. |
220 | | BasicBlock* CreateBlockBefore(BasicBlock* bb); |
221 | | |
222 | | // Inserts code to only execute |loop| only if the given |condition| is true. |
223 | | // |if_merge| is a suitable basic block to be used by the if condition as |
224 | | // merge block. |
225 | | // The function returns the if block protecting the loop. |
226 | | BasicBlock* ProtectLoop(Loop* loop, Instruction* condition, |
227 | | BasicBlock* if_merge); |
228 | | }; |
229 | | |
230 | | // Implements a loop peeling optimization. |
231 | | // For each loop, the pass will try to peel it if there is conditions that |
232 | | // are true for the "N" first or last iterations of the loop. |
233 | | // To avoid code size explosion, too large loops will not be peeled. |
234 | | class LoopPeelingPass : public Pass { |
235 | | public: |
236 | | // Describes the peeling direction. |
237 | | enum class PeelDirection { |
238 | | kNone, // Cannot peel |
239 | | kBefore, // Can peel before |
240 | | kAfter // Can peel last |
241 | | }; |
242 | | |
243 | | // Holds some statistics about peeled function. |
244 | | struct LoopPeelingStats { |
245 | | std::vector<std::tuple<const Loop*, PeelDirection, uint32_t>> peeled_loops_; |
246 | | }; |
247 | | |
248 | 0 | LoopPeelingPass(LoopPeelingStats* stats = nullptr) : stats_(stats) {} |
249 | | |
250 | | // Sets the loop peeling growth threshold. If the code size increase is above |
251 | | // |code_grow_threshold|, the loop will not be peeled. The code size is |
252 | | // measured in terms of SPIR-V instructions. |
253 | 0 | static void SetLoopPeelingThreshold(size_t code_grow_threshold) { |
254 | 0 | code_grow_threshold_ = code_grow_threshold; |
255 | 0 | } |
256 | | |
257 | | // Returns the loop peeling code growth threshold. |
258 | 0 | static size_t GetLoopPeelingThreshold() { return code_grow_threshold_; } |
259 | | |
260 | 0 | const char* name() const override { return "loop-peeling"; } |
261 | | |
262 | | // Processes the given |module|. Returns Status::Failure if errors occur when |
263 | | // processing. Returns the corresponding Status::Success if processing is |
264 | | // successful to indicate whether changes have been made to the module. |
265 | | Pass::Status Process() override; |
266 | | |
267 | | private: |
268 | | // Describes the peeling direction. |
269 | | enum class CmpOperator { |
270 | | kLT, // less than |
271 | | kGT, // greater than |
272 | | kLE, // less than or equal |
273 | | kGE, // greater than or equal |
274 | | }; |
275 | | |
276 | | class LoopPeelingInfo { |
277 | | public: |
278 | | using Direction = std::pair<PeelDirection, uint32_t>; |
279 | | |
280 | | LoopPeelingInfo(Loop* loop, size_t loop_max_iterations, |
281 | | ScalarEvolutionAnalysis* scev_analysis) |
282 | 0 | : context_(loop->GetContext()), |
283 | 0 | loop_(loop), |
284 | 0 | scev_analysis_(scev_analysis), |
285 | 0 | loop_max_iterations_(loop_max_iterations) {} |
286 | | |
287 | | // Returns by how much and to which direction a loop should be peeled to |
288 | | // make the conditional branch of the basic block |bb| an unconditional |
289 | | // branch. If |bb|'s terminator is not a conditional branch or the condition |
290 | | // is not workable then it returns PeelDirection::kNone and a 0 factor. |
291 | | Direction GetPeelingInfo(BasicBlock* bb) const; |
292 | | |
293 | | private: |
294 | | // Returns the id of the loop invariant operand of the conditional |
295 | | // expression |condition|. It returns if no operand is invariant. |
296 | | uint32_t GetFirstLoopInvariantOperand(Instruction* condition) const; |
297 | | // Returns the id of the non loop invariant operand of the conditional |
298 | | // expression |condition|. It returns if all operands are invariant. |
299 | | uint32_t GetFirstNonLoopInvariantOperand(Instruction* condition) const; |
300 | | |
301 | | // Returns the value of |rec| at the first loop iteration. |
302 | | SExpression GetValueAtFirstIteration(SERecurrentNode* rec) const; |
303 | | // Returns the value of |rec| at the given |iteration|. |
304 | | SExpression GetValueAtIteration(SERecurrentNode* rec, |
305 | | int64_t iteration) const; |
306 | | // Returns the value of |rec| at the last loop iteration. |
307 | | SExpression GetValueAtLastIteration(SERecurrentNode* rec) const; |
308 | | |
309 | | bool EvalOperator(CmpOperator cmp_op, SExpression lhs, SExpression rhs, |
310 | | bool* result) const; |
311 | | |
312 | | Direction HandleEquality(SExpression lhs, SExpression rhs) const; |
313 | | Direction HandleInequality(CmpOperator cmp_op, SExpression lhs, |
314 | | SERecurrentNode* rhs) const; |
315 | | |
316 | 0 | static Direction GetNoneDirection() { |
317 | 0 | return Direction{LoopPeelingPass::PeelDirection::kNone, 0}; |
318 | 0 | } |
319 | | IRContext* context_; |
320 | | Loop* loop_; |
321 | | ScalarEvolutionAnalysis* scev_analysis_; |
322 | | size_t loop_max_iterations_; |
323 | | }; |
324 | | // Peel profitable loops in |f|. |
325 | | bool ProcessFunction(Function* f); |
326 | | // Peel |loop| if profitable. |
327 | | std::pair<bool, Loop*> ProcessLoop(Loop* loop, CodeMetrics* loop_size); |
328 | | |
329 | | static size_t code_grow_threshold_; |
330 | | LoopPeelingStats* stats_; |
331 | | }; |
332 | | |
333 | | } // namespace opt |
334 | | } // namespace spvtools |
335 | | |
336 | | #endif // SOURCE_OPT_LOOP_PEELING_H_ |