/src/spirv-tools/source/opt/loop_utils.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) 2018 Google LLC. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | |
15 | | #include <algorithm> |
16 | | #include <memory> |
17 | | #include <unordered_map> |
18 | | #include <unordered_set> |
19 | | #include <utility> |
20 | | #include <vector> |
21 | | |
22 | | #include "source/cfa.h" |
23 | | #include "source/opt/cfg.h" |
24 | | #include "source/opt/ir_builder.h" |
25 | | #include "source/opt/ir_context.h" |
26 | | #include "source/opt/loop_descriptor.h" |
27 | | #include "source/opt/loop_utils.h" |
28 | | |
29 | | namespace spvtools { |
30 | | namespace opt { |
31 | | namespace { |
32 | | // Return true if |bb| is dominated by at least one block in |exits| |
33 | | inline bool DominatesAnExit(BasicBlock* bb, |
34 | | const std::unordered_set<BasicBlock*>& exits, |
35 | 0 | const DominatorTree& dom_tree) { |
36 | 0 | for (BasicBlock* e_bb : exits) |
37 | 0 | if (dom_tree.Dominates(bb, e_bb)) return true; |
38 | 0 | return false; |
39 | 0 | } |
40 | | |
41 | | // Utility class to rewrite out-of-loop uses of an in-loop definition in terms |
42 | | // of phi instructions to achieve a LCSSA form. |
43 | | // For a given definition, the class user registers phi instructions using that |
44 | | // definition in all loop exit blocks by which the definition escapes. |
45 | | // Then, when rewriting a use of the definition, the rewriter walks the |
46 | | // paths from the use the loop exits. At each step, it will insert a phi |
47 | | // instruction to merge the incoming value according to exit blocks definition. |
48 | | class LCSSARewriter { |
49 | | public: |
50 | | LCSSARewriter(IRContext* context, const DominatorTree& dom_tree, |
51 | | const std::unordered_set<BasicBlock*>& exit_bb, |
52 | | BasicBlock* merge_block) |
53 | 0 | : context_(context), |
54 | 0 | cfg_(context_->cfg()), |
55 | 0 | dom_tree_(dom_tree), |
56 | 0 | exit_bb_(exit_bb), |
57 | 0 | merge_block_id_(merge_block ? merge_block->id() : 0) {} |
58 | | |
59 | | struct UseRewriter { |
60 | | explicit UseRewriter(LCSSARewriter* base, const Instruction& def_insn) |
61 | 0 | : base_(base), def_insn_(def_insn) {} |
62 | | // Rewrites the use of |def_insn_| by the instruction |user| at the index |
63 | | // |operand_index| in terms of phi instruction. This recursively builds new |
64 | | // phi instructions from |user| to the loop exit blocks' phis. The use of |
65 | | // |def_insn_| in |user| is replaced by the relevant phi instruction at the |
66 | | // end of the operation. |
67 | | // It is assumed that |user| does not dominates any of the loop exit basic |
68 | | // block. This operation does not update the def/use manager, instead it |
69 | | // records what needs to be updated. The actual update is performed by |
70 | | // UpdateManagers. |
71 | 0 | void RewriteUse(BasicBlock* bb, Instruction* user, uint32_t operand_index) { |
72 | 0 | assert( |
73 | 0 | (user->opcode() != spv::Op::OpPhi || bb != GetParent(user)) && |
74 | 0 | "The root basic block must be the incoming edge if |user| is a phi " |
75 | 0 | "instruction"); |
76 | 0 | assert((user->opcode() == spv::Op::OpPhi || bb == GetParent(user)) && |
77 | 0 | "The root basic block must be the instruction parent if |user| is " |
78 | 0 | "not " |
79 | 0 | "phi instruction"); |
80 | | |
81 | 0 | Instruction* new_def = GetOrBuildIncoming(bb->id()); |
82 | |
|
83 | 0 | user->SetOperand(operand_index, {new_def->result_id()}); |
84 | 0 | rewritten_.insert(user); |
85 | 0 | } |
86 | | |
87 | | // In-place update of some managers (avoid full invalidation). |
88 | 0 | inline void UpdateManagers() { |
89 | 0 | analysis::DefUseManager* def_use_mgr = base_->context_->get_def_use_mgr(); |
90 | | // Register all new definitions. |
91 | 0 | for (Instruction* insn : rewritten_) { |
92 | 0 | def_use_mgr->AnalyzeInstDef(insn); |
93 | 0 | } |
94 | | // Register all new uses. |
95 | 0 | for (Instruction* insn : rewritten_) { |
96 | 0 | def_use_mgr->AnalyzeInstUse(insn); |
97 | 0 | } |
98 | 0 | } |
99 | | |
100 | | private: |
101 | | // Return the basic block that |instr| belongs to. |
102 | 0 | BasicBlock* GetParent(Instruction* instr) { |
103 | 0 | return base_->context_->get_instr_block(instr); |
104 | 0 | } |
105 | | |
106 | | // Builds a phi instruction for the basic block |bb|. The function assumes |
107 | | // that |defining_blocks| contains the list of basic block that define the |
108 | | // usable value for each predecessor of |bb|. |
109 | | inline Instruction* CreatePhiInstruction( |
110 | 0 | BasicBlock* bb, const std::vector<uint32_t>& defining_blocks) { |
111 | 0 | std::vector<uint32_t> incomings; |
112 | 0 | const std::vector<uint32_t>& bb_preds = base_->cfg_->preds(bb->id()); |
113 | 0 | assert(bb_preds.size() == defining_blocks.size()); |
114 | 0 | for (size_t i = 0; i < bb_preds.size(); i++) { |
115 | 0 | incomings.push_back( |
116 | 0 | GetOrBuildIncoming(defining_blocks[i])->result_id()); |
117 | 0 | incomings.push_back(bb_preds[i]); |
118 | 0 | } |
119 | 0 | InstructionBuilder builder(base_->context_, &*bb->begin(), |
120 | 0 | IRContext::kAnalysisInstrToBlockMapping); |
121 | 0 | Instruction* incoming_phi = |
122 | 0 | builder.AddPhi(def_insn_.type_id(), incomings); |
123 | |
|
124 | 0 | rewritten_.insert(incoming_phi); |
125 | 0 | return incoming_phi; |
126 | 0 | } |
127 | | |
128 | | // Builds a phi instruction for the basic block |bb|, all incoming values |
129 | | // will be |value|. |
130 | | inline Instruction* CreatePhiInstruction(BasicBlock* bb, |
131 | 0 | const Instruction& value) { |
132 | 0 | std::vector<uint32_t> incomings; |
133 | 0 | const std::vector<uint32_t>& bb_preds = base_->cfg_->preds(bb->id()); |
134 | 0 | for (size_t i = 0; i < bb_preds.size(); i++) { |
135 | 0 | incomings.push_back(value.result_id()); |
136 | 0 | incomings.push_back(bb_preds[i]); |
137 | 0 | } |
138 | 0 | InstructionBuilder builder(base_->context_, &*bb->begin(), |
139 | 0 | IRContext::kAnalysisInstrToBlockMapping); |
140 | 0 | Instruction* incoming_phi = |
141 | 0 | builder.AddPhi(def_insn_.type_id(), incomings); |
142 | |
|
143 | 0 | rewritten_.insert(incoming_phi); |
144 | 0 | return incoming_phi; |
145 | 0 | } |
146 | | |
147 | | // Return the new def to use for the basic block |bb_id|. |
148 | | // If |bb_id| does not have a suitable def to use then we: |
149 | | // - return the common def used by all predecessors; |
150 | | // - if there is no common def, then we build a new phi instr at the |
151 | | // beginning of |bb_id| and return this new instruction. |
152 | 0 | Instruction* GetOrBuildIncoming(uint32_t bb_id) { |
153 | 0 | assert(base_->cfg_->block(bb_id) != nullptr && "Unknown basic block"); |
154 | | |
155 | 0 | Instruction*& incoming_phi = bb_to_phi_[bb_id]; |
156 | 0 | if (incoming_phi) { |
157 | 0 | return incoming_phi; |
158 | 0 | } |
159 | | |
160 | 0 | BasicBlock* bb = &*base_->cfg_->block(bb_id); |
161 | | // If this is an exit basic block, look if there already is an eligible |
162 | | // phi instruction. An eligible phi has |def_insn_| as all incoming |
163 | | // values. |
164 | 0 | if (base_->exit_bb_.count(bb)) { |
165 | | // Look if there is an eligible phi in this block. |
166 | 0 | if (!bb->WhileEachPhiInst([&incoming_phi, this](Instruction* phi) { |
167 | 0 | for (uint32_t i = 0; i < phi->NumInOperands(); i += 2) { |
168 | 0 | if (phi->GetSingleWordInOperand(i) != def_insn_.result_id()) |
169 | 0 | return true; |
170 | 0 | } |
171 | 0 | incoming_phi = phi; |
172 | 0 | rewritten_.insert(incoming_phi); |
173 | 0 | return false; |
174 | 0 | })) { |
175 | 0 | return incoming_phi; |
176 | 0 | } |
177 | 0 | incoming_phi = CreatePhiInstruction(bb, def_insn_); |
178 | 0 | return incoming_phi; |
179 | 0 | } |
180 | | |
181 | | // Get the block that defines the value to use for each predecessor. |
182 | | // If the vector has 1 value, then it means that this block does not need |
183 | | // to build a phi instruction unless |bb_id| is the loop merge block. |
184 | 0 | const std::vector<uint32_t>& defining_blocks = |
185 | 0 | base_->GetDefiningBlocks(bb_id); |
186 | | |
187 | | // Special case for structured loops: merge block might be different from |
188 | | // the exit block set. To maintain structured properties it will ease |
189 | | // transformations if the merge block also holds a phi instruction like |
190 | | // the exit ones. |
191 | 0 | if (defining_blocks.size() > 1 || bb_id == base_->merge_block_id_) { |
192 | 0 | if (defining_blocks.size() > 1) { |
193 | 0 | incoming_phi = CreatePhiInstruction(bb, defining_blocks); |
194 | 0 | } else { |
195 | 0 | assert(bb_id == base_->merge_block_id_); |
196 | 0 | incoming_phi = |
197 | 0 | CreatePhiInstruction(bb, *GetOrBuildIncoming(defining_blocks[0])); |
198 | 0 | } |
199 | 0 | } else { |
200 | 0 | incoming_phi = GetOrBuildIncoming(defining_blocks[0]); |
201 | 0 | } |
202 | | |
203 | 0 | return incoming_phi; |
204 | 0 | } |
205 | | |
206 | | LCSSARewriter* base_; |
207 | | const Instruction& def_insn_; |
208 | | std::unordered_map<uint32_t, Instruction*> bb_to_phi_; |
209 | | std::unordered_set<Instruction*> rewritten_; |
210 | | }; |
211 | | |
212 | | private: |
213 | | // Return the new def to use for the basic block |bb_id|. |
214 | | // If |bb_id| does not have a suitable def to use then we: |
215 | | // - return the common def used by all predecessors; |
216 | | // - if there is no common def, then we build a new phi instr at the |
217 | | // beginning of |bb_id| and return this new instruction. |
218 | 0 | const std::vector<uint32_t>& GetDefiningBlocks(uint32_t bb_id) { |
219 | 0 | assert(cfg_->block(bb_id) != nullptr && "Unknown basic block"); |
220 | 0 | std::vector<uint32_t>& defining_blocks = bb_to_defining_blocks_[bb_id]; |
221 | |
|
222 | 0 | if (defining_blocks.size()) return defining_blocks; |
223 | | |
224 | | // Check if one of the loop exit basic block dominates |bb_id|. |
225 | 0 | for (const BasicBlock* e_bb : exit_bb_) { |
226 | 0 | if (dom_tree_.Dominates(e_bb->id(), bb_id)) { |
227 | 0 | defining_blocks.push_back(e_bb->id()); |
228 | 0 | return defining_blocks; |
229 | 0 | } |
230 | 0 | } |
231 | | |
232 | | // Process parents, they will returns their suitable blocks. |
233 | | // If they are all the same, this means this basic block is dominated by a |
234 | | // common block, so we won't need to build a phi instruction. |
235 | 0 | for (uint32_t pred_id : cfg_->preds(bb_id)) { |
236 | 0 | const std::vector<uint32_t>& pred_blocks = GetDefiningBlocks(pred_id); |
237 | 0 | if (pred_blocks.size() == 1) |
238 | 0 | defining_blocks.push_back(pred_blocks[0]); |
239 | 0 | else |
240 | 0 | defining_blocks.push_back(pred_id); |
241 | 0 | } |
242 | 0 | assert(defining_blocks.size()); |
243 | 0 | if (std::all_of(defining_blocks.begin(), defining_blocks.end(), |
244 | 0 | [&defining_blocks](uint32_t id) { |
245 | 0 | return id == defining_blocks[0]; |
246 | 0 | })) { |
247 | | // No need for a phi. |
248 | 0 | defining_blocks.resize(1); |
249 | 0 | } |
250 | |
|
251 | 0 | return defining_blocks; |
252 | 0 | } |
253 | | |
254 | | IRContext* context_; |
255 | | CFG* cfg_; |
256 | | const DominatorTree& dom_tree_; |
257 | | const std::unordered_set<BasicBlock*>& exit_bb_; |
258 | | uint32_t merge_block_id_; |
259 | | // This map represent the set of known paths. For each key, the vector |
260 | | // represent the set of blocks holding the definition to be used to build the |
261 | | // phi instruction. |
262 | | // If the vector has 0 value, then the path is unknown yet, and must be built. |
263 | | // If the vector has 1 value, then the value defined by that basic block |
264 | | // should be used. |
265 | | // If the vector has more than 1 value, then a phi node must be created, the |
266 | | // basic block ordering is the same as the predecessor ordering. |
267 | | std::unordered_map<uint32_t, std::vector<uint32_t>> bb_to_defining_blocks_; |
268 | | }; |
269 | | |
270 | | // Make the set |blocks| closed SSA. The set is closed SSA if all the uses |
271 | | // outside the set are phi instructions in exiting basic block set (hold by |
272 | | // |lcssa_rewriter|). |
273 | | inline void MakeSetClosedSSA(IRContext* context, Function* function, |
274 | | const std::unordered_set<uint32_t>& blocks, |
275 | | const std::unordered_set<BasicBlock*>& exit_bb, |
276 | 0 | LCSSARewriter* lcssa_rewriter) { |
277 | 0 | CFG& cfg = *context->cfg(); |
278 | 0 | DominatorTree& dom_tree = |
279 | 0 | context->GetDominatorAnalysis(function)->GetDomTree(); |
280 | 0 | analysis::DefUseManager* def_use_manager = context->get_def_use_mgr(); |
281 | |
|
282 | 0 | for (uint32_t bb_id : blocks) { |
283 | 0 | BasicBlock* bb = cfg.block(bb_id); |
284 | | // If bb does not dominate an exit block, then it cannot have escaping defs. |
285 | 0 | if (!DominatesAnExit(bb, exit_bb, dom_tree)) continue; |
286 | 0 | for (Instruction& inst : *bb) { |
287 | 0 | LCSSARewriter::UseRewriter rewriter(lcssa_rewriter, inst); |
288 | 0 | def_use_manager->ForEachUse( |
289 | 0 | &inst, [&blocks, &rewriter, &exit_bb, context]( |
290 | 0 | Instruction* use, uint32_t operand_index) { |
291 | 0 | BasicBlock* use_parent = context->get_instr_block(use); |
292 | 0 | assert(use_parent); |
293 | 0 | if (blocks.count(use_parent->id())) return; |
294 | | |
295 | 0 | if (use->opcode() == spv::Op::OpPhi) { |
296 | | // If the use is a Phi instruction and the incoming block is |
297 | | // coming from the loop, then that's consistent with LCSSA form. |
298 | 0 | if (exit_bb.count(use_parent)) { |
299 | 0 | return; |
300 | 0 | } else { |
301 | | // That's not an exit block, but the user is a phi instruction. |
302 | | // Consider the incoming branch only. |
303 | 0 | use_parent = context->get_instr_block( |
304 | 0 | use->GetSingleWordOperand(operand_index + 1)); |
305 | 0 | } |
306 | 0 | } |
307 | | // Rewrite the use. Note that this call does not invalidate the |
308 | | // def/use manager. So this operation is safe. |
309 | 0 | rewriter.RewriteUse(use_parent, use, operand_index); |
310 | 0 | }); |
311 | 0 | rewriter.UpdateManagers(); |
312 | 0 | } |
313 | 0 | } |
314 | 0 | } |
315 | | |
316 | | } // namespace |
317 | | |
318 | 0 | void LoopUtils::CreateLoopDedicatedExits() { |
319 | 0 | Function* function = loop_->GetHeaderBlock()->GetParent(); |
320 | 0 | LoopDescriptor& loop_desc = *context_->GetLoopDescriptor(function); |
321 | 0 | CFG& cfg = *context_->cfg(); |
322 | 0 | analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr(); |
323 | |
|
324 | 0 | const IRContext::Analysis PreservedAnalyses = |
325 | 0 | IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping; |
326 | | |
327 | | // Gathers the set of basic block that are not in this loop and have at least |
328 | | // one predecessor in the loop and one not in the loop. |
329 | 0 | std::unordered_set<uint32_t> exit_bb_set; |
330 | 0 | loop_->GetExitBlocks(&exit_bb_set); |
331 | |
|
332 | 0 | std::unordered_set<BasicBlock*> new_loop_exits; |
333 | 0 | bool made_change = false; |
334 | | // For each block, we create a new one that gathers all branches from |
335 | | // the loop and fall into the block. |
336 | 0 | for (uint32_t non_dedicate_id : exit_bb_set) { |
337 | 0 | BasicBlock* non_dedicate = cfg.block(non_dedicate_id); |
338 | 0 | const std::vector<uint32_t>& bb_pred = cfg.preds(non_dedicate_id); |
339 | | // Ignore the block if all the predecessors are in the loop. |
340 | 0 | if (std::all_of(bb_pred.begin(), bb_pred.end(), |
341 | 0 | [this](uint32_t id) { return loop_->IsInsideLoop(id); })) { |
342 | 0 | new_loop_exits.insert(non_dedicate); |
343 | 0 | continue; |
344 | 0 | } |
345 | | |
346 | 0 | made_change = true; |
347 | 0 | Function::iterator insert_pt = function->begin(); |
348 | 0 | for (; insert_pt != function->end() && &*insert_pt != non_dedicate; |
349 | 0 | ++insert_pt) { |
350 | 0 | } |
351 | 0 | assert(insert_pt != function->end() && "Basic Block not found"); |
352 | | |
353 | | // Create the dedicate exit basic block. |
354 | | // TODO(1841): Handle id overflow. |
355 | 0 | BasicBlock& exit = *insert_pt.InsertBefore(std::unique_ptr<BasicBlock>( |
356 | 0 | new BasicBlock(std::unique_ptr<Instruction>(new Instruction( |
357 | 0 | context_, spv::Op::OpLabel, 0, context_->TakeNextId(), {}))))); |
358 | 0 | exit.SetParent(function); |
359 | | |
360 | | // Redirect in loop predecessors to |exit| block. |
361 | 0 | for (uint32_t exit_pred_id : bb_pred) { |
362 | 0 | if (loop_->IsInsideLoop(exit_pred_id)) { |
363 | 0 | BasicBlock* pred_block = cfg.block(exit_pred_id); |
364 | 0 | pred_block->ForEachSuccessorLabel([non_dedicate, &exit](uint32_t* id) { |
365 | 0 | if (*id == non_dedicate->id()) *id = exit.id(); |
366 | 0 | }); |
367 | | // Update the CFG. |
368 | | // |non_dedicate|'s predecessor list will be updated at the end of the |
369 | | // loop. |
370 | 0 | cfg.RegisterBlock(pred_block); |
371 | 0 | } |
372 | 0 | } |
373 | | |
374 | | // Register the label to the def/use manager, requires for the phi patching. |
375 | 0 | def_use_mgr->AnalyzeInstDefUse(exit.GetLabelInst()); |
376 | 0 | context_->set_instr_block(exit.GetLabelInst(), &exit); |
377 | |
|
378 | 0 | InstructionBuilder builder(context_, &exit, PreservedAnalyses); |
379 | | // Now jump from our dedicate basic block to the old exit. |
380 | | // We also reset the insert point so all instructions are inserted before |
381 | | // the branch. |
382 | 0 | builder.SetInsertPoint(builder.AddBranch(non_dedicate->id())); |
383 | 0 | non_dedicate->ForEachPhiInst( |
384 | 0 | [&builder, &exit, def_use_mgr, this](Instruction* phi) { |
385 | | // New phi operands for this instruction. |
386 | 0 | std::vector<uint32_t> new_phi_op; |
387 | | // Phi operands for the dedicated exit block. |
388 | 0 | std::vector<uint32_t> exit_phi_op; |
389 | 0 | for (uint32_t i = 0; i < phi->NumInOperands(); i += 2) { |
390 | 0 | uint32_t def_id = phi->GetSingleWordInOperand(i); |
391 | 0 | uint32_t incoming_id = phi->GetSingleWordInOperand(i + 1); |
392 | 0 | if (loop_->IsInsideLoop(incoming_id)) { |
393 | 0 | exit_phi_op.push_back(def_id); |
394 | 0 | exit_phi_op.push_back(incoming_id); |
395 | 0 | } else { |
396 | 0 | new_phi_op.push_back(def_id); |
397 | 0 | new_phi_op.push_back(incoming_id); |
398 | 0 | } |
399 | 0 | } |
400 | | |
401 | | // Build the new phi instruction dedicated exit block. |
402 | 0 | Instruction* exit_phi = builder.AddPhi(phi->type_id(), exit_phi_op); |
403 | | // Build the new incoming branch. |
404 | 0 | new_phi_op.push_back(exit_phi->result_id()); |
405 | 0 | new_phi_op.push_back(exit.id()); |
406 | | // Rewrite operands. |
407 | 0 | uint32_t idx = 0; |
408 | 0 | for (; idx < new_phi_op.size(); idx++) |
409 | 0 | phi->SetInOperand(idx, {new_phi_op[idx]}); |
410 | | // Remove extra operands, from last to first (more efficient). |
411 | 0 | for (uint32_t j = phi->NumInOperands() - 1; j >= idx; j--) |
412 | 0 | phi->RemoveInOperand(j); |
413 | | // Update the def/use manager for this |phi|. |
414 | 0 | def_use_mgr->AnalyzeInstUse(phi); |
415 | 0 | }); |
416 | | // Update the CFG. |
417 | 0 | cfg.RegisterBlock(&exit); |
418 | 0 | cfg.RemoveNonExistingEdges(non_dedicate->id()); |
419 | 0 | new_loop_exits.insert(&exit); |
420 | | // If non_dedicate is in a loop, add the new dedicated exit in that loop. |
421 | 0 | if (Loop* parent_loop = loop_desc[non_dedicate]) |
422 | 0 | parent_loop->AddBasicBlock(&exit); |
423 | 0 | } |
424 | | |
425 | 0 | if (new_loop_exits.size() == 1) { |
426 | 0 | loop_->SetMergeBlock(*new_loop_exits.begin()); |
427 | 0 | } |
428 | |
|
429 | 0 | if (made_change) { |
430 | 0 | context_->InvalidateAnalysesExceptFor( |
431 | 0 | PreservedAnalyses | IRContext::kAnalysisCFG | |
432 | 0 | IRContext::Analysis::kAnalysisLoopAnalysis); |
433 | 0 | } |
434 | 0 | } |
435 | | |
436 | 0 | void LoopUtils::MakeLoopClosedSSA() { |
437 | 0 | CreateLoopDedicatedExits(); |
438 | |
|
439 | 0 | Function* function = loop_->GetHeaderBlock()->GetParent(); |
440 | 0 | CFG& cfg = *context_->cfg(); |
441 | 0 | DominatorTree& dom_tree = |
442 | 0 | context_->GetDominatorAnalysis(function)->GetDomTree(); |
443 | |
|
444 | 0 | std::unordered_set<BasicBlock*> exit_bb; |
445 | 0 | { |
446 | 0 | std::unordered_set<uint32_t> exit_bb_id; |
447 | 0 | loop_->GetExitBlocks(&exit_bb_id); |
448 | 0 | for (uint32_t bb_id : exit_bb_id) { |
449 | 0 | exit_bb.insert(cfg.block(bb_id)); |
450 | 0 | } |
451 | 0 | } |
452 | |
|
453 | 0 | LCSSARewriter lcssa_rewriter(context_, dom_tree, exit_bb, |
454 | 0 | loop_->GetMergeBlock()); |
455 | 0 | MakeSetClosedSSA(context_, function, loop_->GetBlocks(), exit_bb, |
456 | 0 | &lcssa_rewriter); |
457 | | |
458 | | // Make sure all defs post-dominated by the merge block have their last use no |
459 | | // further than the merge block. |
460 | 0 | if (loop_->GetMergeBlock()) { |
461 | 0 | std::unordered_set<uint32_t> merging_bb_id; |
462 | 0 | loop_->GetMergingBlocks(&merging_bb_id); |
463 | 0 | merging_bb_id.erase(loop_->GetMergeBlock()->id()); |
464 | | // Reset the exit set, now only the merge block is the exit. |
465 | 0 | exit_bb.clear(); |
466 | 0 | exit_bb.insert(loop_->GetMergeBlock()); |
467 | | // LCSSARewriter is reusable here only because it forces the creation of a |
468 | | // phi instruction in the merge block. |
469 | 0 | MakeSetClosedSSA(context_, function, merging_bb_id, exit_bb, |
470 | 0 | &lcssa_rewriter); |
471 | 0 | } |
472 | |
|
473 | 0 | context_->InvalidateAnalysesExceptFor( |
474 | 0 | IRContext::Analysis::kAnalysisCFG | |
475 | 0 | IRContext::Analysis::kAnalysisDominatorAnalysis | |
476 | 0 | IRContext::Analysis::kAnalysisLoopAnalysis); |
477 | 0 | } |
478 | | |
479 | 0 | Loop* LoopUtils::CloneLoop(LoopCloningResult* cloning_result) const { |
480 | | // Compute the structured order of the loop basic blocks and store it in the |
481 | | // vector ordered_loop_blocks. |
482 | 0 | std::vector<BasicBlock*> ordered_loop_blocks; |
483 | 0 | loop_->ComputeLoopStructuredOrder(&ordered_loop_blocks); |
484 | | |
485 | | // Clone the loop. |
486 | 0 | return CloneLoop(cloning_result, ordered_loop_blocks); |
487 | 0 | } |
488 | | |
489 | 0 | Loop* LoopUtils::CloneAndAttachLoopToHeader(LoopCloningResult* cloning_result) { |
490 | | // Clone the loop. |
491 | 0 | Loop* new_loop = CloneLoop(cloning_result); |
492 | | |
493 | | // Create a new exit block/label for the new loop. |
494 | | // TODO(1841): Handle id overflow. |
495 | 0 | std::unique_ptr<Instruction> new_label{new Instruction( |
496 | 0 | context_, spv::Op::OpLabel, 0, context_->TakeNextId(), {})}; |
497 | 0 | std::unique_ptr<BasicBlock> new_exit_bb{new BasicBlock(std::move(new_label))}; |
498 | 0 | new_exit_bb->SetParent(loop_->GetMergeBlock()->GetParent()); |
499 | | |
500 | | // Create an unconditional branch to the header block. |
501 | 0 | InstructionBuilder builder{context_, new_exit_bb.get()}; |
502 | 0 | builder.AddBranch(loop_->GetHeaderBlock()->id()); |
503 | | |
504 | | // Save the ids of the new and old merge block. |
505 | 0 | const uint32_t old_merge_block = loop_->GetMergeBlock()->id(); |
506 | 0 | const uint32_t new_merge_block = new_exit_bb->id(); |
507 | | |
508 | | // Replace the uses of the old merge block in the new loop with the new merge |
509 | | // block. |
510 | 0 | for (std::unique_ptr<BasicBlock>& basic_block : cloning_result->cloned_bb_) { |
511 | 0 | for (Instruction& inst : *basic_block) { |
512 | | // For each operand in each instruction check if it is using the old merge |
513 | | // block and change it to be the new merge block. |
514 | 0 | auto replace_merge_use = [old_merge_block, |
515 | 0 | new_merge_block](uint32_t* id) { |
516 | 0 | if (*id == old_merge_block) *id = new_merge_block; |
517 | 0 | }; |
518 | 0 | inst.ForEachInOperand(replace_merge_use); |
519 | 0 | } |
520 | 0 | } |
521 | |
|
522 | 0 | const uint32_t old_header = loop_->GetHeaderBlock()->id(); |
523 | 0 | const uint32_t new_header = new_loop->GetHeaderBlock()->id(); |
524 | 0 | analysis::DefUseManager* def_use = context_->get_def_use_mgr(); |
525 | |
|
526 | 0 | def_use->ForEachUse(old_header, |
527 | 0 | [new_header, this](Instruction* inst, uint32_t operand) { |
528 | 0 | if (!this->loop_->IsInsideLoop(inst)) |
529 | 0 | inst->SetOperand(operand, {new_header}); |
530 | 0 | }); |
531 | | |
532 | | // TODO(1841): Handle failure to create pre-header. |
533 | 0 | def_use->ForEachUse( |
534 | 0 | loop_->GetOrCreatePreHeaderBlock()->id(), |
535 | 0 | [new_merge_block, this](Instruction* inst, uint32_t operand) { |
536 | 0 | if (this->loop_->IsInsideLoop(inst)) |
537 | 0 | inst->SetOperand(operand, {new_merge_block}); |
538 | |
|
539 | 0 | }); |
540 | 0 | new_loop->SetMergeBlock(new_exit_bb.get()); |
541 | |
|
542 | 0 | new_loop->SetPreHeaderBlock(loop_->GetPreHeaderBlock()); |
543 | | |
544 | | // Add the new block into the cloned instructions. |
545 | 0 | cloning_result->cloned_bb_.push_back(std::move(new_exit_bb)); |
546 | |
|
547 | 0 | return new_loop; |
548 | 0 | } |
549 | | |
550 | | Loop* LoopUtils::CloneLoop( |
551 | | LoopCloningResult* cloning_result, |
552 | 0 | const std::vector<BasicBlock*>& ordered_loop_blocks) const { |
553 | 0 | analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr(); |
554 | |
|
555 | 0 | std::unique_ptr<Loop> new_loop = MakeUnique<Loop>(context_); |
556 | |
|
557 | 0 | CFG& cfg = *context_->cfg(); |
558 | | |
559 | | // Clone and place blocks in a SPIR-V compliant order (dominators first). |
560 | 0 | for (BasicBlock* old_bb : ordered_loop_blocks) { |
561 | | // For each basic block in the loop, we clone it and register the mapping |
562 | | // between old and new ids. |
563 | 0 | BasicBlock* new_bb = old_bb->Clone(context_); |
564 | 0 | new_bb->SetParent(&function_); |
565 | | // TODO(1841): Handle id overflow. |
566 | 0 | new_bb->GetLabelInst()->SetResultId(context_->TakeNextId()); |
567 | 0 | def_use_mgr->AnalyzeInstDef(new_bb->GetLabelInst()); |
568 | 0 | context_->set_instr_block(new_bb->GetLabelInst(), new_bb); |
569 | 0 | cloning_result->cloned_bb_.emplace_back(new_bb); |
570 | |
|
571 | 0 | cloning_result->old_to_new_bb_[old_bb->id()] = new_bb; |
572 | 0 | cloning_result->new_to_old_bb_[new_bb->id()] = old_bb; |
573 | 0 | cloning_result->value_map_[old_bb->id()] = new_bb->id(); |
574 | |
|
575 | 0 | if (loop_->IsInsideLoop(old_bb)) new_loop->AddBasicBlock(new_bb); |
576 | |
|
577 | 0 | for (auto new_inst = new_bb->begin(), old_inst = old_bb->begin(); |
578 | 0 | new_inst != new_bb->end(); ++new_inst, ++old_inst) { |
579 | 0 | cloning_result->ptr_map_[&*new_inst] = &*old_inst; |
580 | 0 | if (new_inst->HasResultId()) { |
581 | | // TODO(1841): Handle id overflow. |
582 | 0 | new_inst->SetResultId(context_->TakeNextId()); |
583 | 0 | cloning_result->value_map_[old_inst->result_id()] = |
584 | 0 | new_inst->result_id(); |
585 | | |
586 | | // Only look at the defs for now, uses are not updated yet. |
587 | 0 | def_use_mgr->AnalyzeInstDef(&*new_inst); |
588 | 0 | } |
589 | 0 | } |
590 | 0 | } |
591 | | |
592 | | // All instructions (including all labels) have been cloned, |
593 | | // remap instruction operands id with the new ones. |
594 | 0 | for (std::unique_ptr<BasicBlock>& bb_ref : cloning_result->cloned_bb_) { |
595 | 0 | BasicBlock* bb = bb_ref.get(); |
596 | |
|
597 | 0 | for (Instruction& insn : *bb) { |
598 | 0 | insn.ForEachInId([cloning_result](uint32_t* old_id) { |
599 | | // If the operand is defined in the loop, remap the id. |
600 | 0 | auto id_it = cloning_result->value_map_.find(*old_id); |
601 | 0 | if (id_it != cloning_result->value_map_.end()) { |
602 | 0 | *old_id = id_it->second; |
603 | 0 | } |
604 | 0 | }); |
605 | | // Only look at what the instruction uses. All defs are register, so all |
606 | | // should be fine now. |
607 | 0 | def_use_mgr->AnalyzeInstUse(&insn); |
608 | 0 | context_->set_instr_block(&insn, bb); |
609 | 0 | } |
610 | 0 | cfg.RegisterBlock(bb); |
611 | 0 | } |
612 | |
|
613 | 0 | PopulateLoopNest(new_loop.get(), *cloning_result); |
614 | |
|
615 | 0 | return new_loop.release(); |
616 | 0 | } |
617 | | |
618 | | void LoopUtils::PopulateLoopNest( |
619 | 0 | Loop* new_loop, const LoopCloningResult& cloning_result) const { |
620 | 0 | std::unordered_map<Loop*, Loop*> loop_mapping; |
621 | 0 | loop_mapping[loop_] = new_loop; |
622 | |
|
623 | 0 | if (loop_->HasParent()) loop_->GetParent()->AddNestedLoop(new_loop); |
624 | 0 | PopulateLoopDesc(new_loop, loop_, cloning_result); |
625 | |
|
626 | 0 | for (Loop& sub_loop : |
627 | 0 | make_range(++TreeDFIterator<Loop>(loop_), TreeDFIterator<Loop>())) { |
628 | 0 | Loop* cloned = new Loop(context_); |
629 | 0 | if (Loop* parent = loop_mapping[sub_loop.GetParent()]) |
630 | 0 | parent->AddNestedLoop(cloned); |
631 | 0 | loop_mapping[&sub_loop] = cloned; |
632 | 0 | PopulateLoopDesc(cloned, &sub_loop, cloning_result); |
633 | 0 | } |
634 | |
|
635 | 0 | loop_desc_->AddLoopNest(std::unique_ptr<Loop>(new_loop)); |
636 | 0 | } |
637 | | |
638 | | // Populates |new_loop| descriptor according to |old_loop|'s one. |
639 | | void LoopUtils::PopulateLoopDesc( |
640 | | Loop* new_loop, Loop* old_loop, |
641 | 0 | const LoopCloningResult& cloning_result) const { |
642 | 0 | for (uint32_t bb_id : old_loop->GetBlocks()) { |
643 | 0 | BasicBlock* bb = cloning_result.old_to_new_bb_.at(bb_id); |
644 | 0 | new_loop->AddBasicBlock(bb); |
645 | 0 | } |
646 | 0 | new_loop->SetHeaderBlock( |
647 | 0 | cloning_result.old_to_new_bb_.at(old_loop->GetHeaderBlock()->id())); |
648 | 0 | if (old_loop->GetLatchBlock()) |
649 | 0 | new_loop->SetLatchBlock( |
650 | 0 | cloning_result.old_to_new_bb_.at(old_loop->GetLatchBlock()->id())); |
651 | 0 | if (old_loop->GetContinueBlock()) |
652 | 0 | new_loop->SetContinueBlock( |
653 | 0 | cloning_result.old_to_new_bb_.at(old_loop->GetContinueBlock()->id())); |
654 | 0 | if (old_loop->GetMergeBlock()) { |
655 | 0 | auto it = |
656 | 0 | cloning_result.old_to_new_bb_.find(old_loop->GetMergeBlock()->id()); |
657 | 0 | BasicBlock* bb = it != cloning_result.old_to_new_bb_.end() |
658 | 0 | ? it->second |
659 | 0 | : old_loop->GetMergeBlock(); |
660 | 0 | new_loop->SetMergeBlock(bb); |
661 | 0 | } |
662 | 0 | if (old_loop->GetPreHeaderBlock()) { |
663 | 0 | auto it = |
664 | 0 | cloning_result.old_to_new_bb_.find(old_loop->GetPreHeaderBlock()->id()); |
665 | 0 | if (it != cloning_result.old_to_new_bb_.end()) { |
666 | 0 | new_loop->SetPreHeaderBlock(it->second); |
667 | 0 | } |
668 | 0 | } |
669 | 0 | } |
670 | | |
671 | | // Class to gather some metrics about a region of interest. |
672 | 0 | void CodeMetrics::Analyze(const Loop& loop) { |
673 | 0 | CFG& cfg = *loop.GetContext()->cfg(); |
674 | |
|
675 | 0 | roi_size_ = 0; |
676 | 0 | block_sizes_.clear(); |
677 | |
|
678 | 0 | for (uint32_t id : loop.GetBlocks()) { |
679 | 0 | const BasicBlock* bb = cfg.block(id); |
680 | 0 | size_t bb_size = 0; |
681 | 0 | bb->ForEachInst([&bb_size](const Instruction* insn) { |
682 | 0 | if (insn->opcode() == spv::Op::OpLabel) return; |
683 | 0 | if (insn->IsNop()) return; |
684 | 0 | if (insn->opcode() == spv::Op::OpPhi) return; |
685 | 0 | bb_size++; |
686 | 0 | }); |
687 | 0 | block_sizes_[bb->id()] = bb_size; |
688 | 0 | roi_size_ += bb_size; |
689 | 0 | } |
690 | 0 | } |
691 | | |
692 | | } // namespace opt |
693 | | } // namespace spvtools |