/src/llvm-project/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- LoopInterchange.cpp - Loop interchange pass-------------------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This Pass handles loop interchange transform. |
10 | | // This pass interchanges loops to provide a more cache-friendly memory access |
11 | | // patterns. |
12 | | // |
13 | | //===----------------------------------------------------------------------===// |
14 | | |
15 | | #include "llvm/Transforms/Scalar/LoopInterchange.h" |
16 | | #include "llvm/ADT/STLExtras.h" |
17 | | #include "llvm/ADT/SmallVector.h" |
18 | | #include "llvm/ADT/Statistic.h" |
19 | | #include "llvm/ADT/StringRef.h" |
20 | | #include "llvm/Analysis/DependenceAnalysis.h" |
21 | | #include "llvm/Analysis/LoopCacheAnalysis.h" |
22 | | #include "llvm/Analysis/LoopInfo.h" |
23 | | #include "llvm/Analysis/LoopNestAnalysis.h" |
24 | | #include "llvm/Analysis/LoopPass.h" |
25 | | #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
26 | | #include "llvm/Analysis/ScalarEvolution.h" |
27 | | #include "llvm/Analysis/ScalarEvolutionExpressions.h" |
28 | | #include "llvm/IR/BasicBlock.h" |
29 | | #include "llvm/IR/Constants.h" |
30 | | #include "llvm/IR/DiagnosticInfo.h" |
31 | | #include "llvm/IR/Dominators.h" |
32 | | #include "llvm/IR/Function.h" |
33 | | #include "llvm/IR/InstrTypes.h" |
34 | | #include "llvm/IR/Instruction.h" |
35 | | #include "llvm/IR/Instructions.h" |
36 | | #include "llvm/IR/User.h" |
37 | | #include "llvm/IR/Value.h" |
38 | | #include "llvm/Support/Casting.h" |
39 | | #include "llvm/Support/CommandLine.h" |
40 | | #include "llvm/Support/Debug.h" |
41 | | #include "llvm/Support/ErrorHandling.h" |
42 | | #include "llvm/Support/raw_ostream.h" |
43 | | #include "llvm/Transforms/Scalar/LoopPassManager.h" |
44 | | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
45 | | #include "llvm/Transforms/Utils/LoopUtils.h" |
46 | | #include <cassert> |
47 | | #include <utility> |
48 | | #include <vector> |
49 | | |
50 | | using namespace llvm; |
51 | | |
52 | 0 | #define DEBUG_TYPE "loop-interchange" |
53 | | |
54 | | STATISTIC(LoopsInterchanged, "Number of loops interchanged"); |
55 | | |
56 | | static cl::opt<int> LoopInterchangeCostThreshold( |
57 | | "loop-interchange-threshold", cl::init(0), cl::Hidden, |
58 | | cl::desc("Interchange if you gain more than this number")); |
59 | | |
60 | | namespace { |
61 | | |
62 | | using LoopVector = SmallVector<Loop *, 8>; |
63 | | |
64 | | // TODO: Check if we can use a sparse matrix here. |
65 | | using CharMatrix = std::vector<std::vector<char>>; |
66 | | |
67 | | } // end anonymous namespace |
68 | | |
69 | | // Maximum number of dependencies that can be handled in the dependency matrix. |
70 | | static const unsigned MaxMemInstrCount = 100; |
71 | | |
72 | | // Maximum loop depth supported. |
73 | | static const unsigned MaxLoopNestDepth = 10; |
74 | | |
75 | | #ifdef DUMP_DEP_MATRICIES |
76 | | static void printDepMatrix(CharMatrix &DepMatrix) { |
77 | | for (auto &Row : DepMatrix) { |
78 | | for (auto D : Row) |
79 | | LLVM_DEBUG(dbgs() << D << " "); |
80 | | LLVM_DEBUG(dbgs() << "\n"); |
81 | | } |
82 | | } |
83 | | #endif |
84 | | |
85 | | static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, |
86 | | Loop *L, DependenceInfo *DI, |
87 | 0 | ScalarEvolution *SE) { |
88 | 0 | using ValueVector = SmallVector<Value *, 16>; |
89 | |
|
90 | 0 | ValueVector MemInstr; |
91 | | |
92 | | // For each block. |
93 | 0 | for (BasicBlock *BB : L->blocks()) { |
94 | | // Scan the BB and collect legal loads and stores. |
95 | 0 | for (Instruction &I : *BB) { |
96 | 0 | if (!isa<Instruction>(I)) |
97 | 0 | return false; |
98 | 0 | if (auto *Ld = dyn_cast<LoadInst>(&I)) { |
99 | 0 | if (!Ld->isSimple()) |
100 | 0 | return false; |
101 | 0 | MemInstr.push_back(&I); |
102 | 0 | } else if (auto *St = dyn_cast<StoreInst>(&I)) { |
103 | 0 | if (!St->isSimple()) |
104 | 0 | return false; |
105 | 0 | MemInstr.push_back(&I); |
106 | 0 | } |
107 | 0 | } |
108 | 0 | } |
109 | | |
110 | 0 | LLVM_DEBUG(dbgs() << "Found " << MemInstr.size() |
111 | 0 | << " Loads and Stores to analyze\n"); |
112 | |
|
113 | 0 | ValueVector::iterator I, IE, J, JE; |
114 | |
|
115 | 0 | for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) { |
116 | 0 | for (J = I, JE = MemInstr.end(); J != JE; ++J) { |
117 | 0 | std::vector<char> Dep; |
118 | 0 | Instruction *Src = cast<Instruction>(*I); |
119 | 0 | Instruction *Dst = cast<Instruction>(*J); |
120 | | // Ignore Input dependencies. |
121 | 0 | if (isa<LoadInst>(Src) && isa<LoadInst>(Dst)) |
122 | 0 | continue; |
123 | | // Track Output, Flow, and Anti dependencies. |
124 | 0 | if (auto D = DI->depends(Src, Dst, true)) { |
125 | 0 | assert(D->isOrdered() && "Expected an output, flow or anti dep."); |
126 | | // If the direction vector is negative, normalize it to |
127 | | // make it non-negative. |
128 | 0 | if (D->normalize(SE)) |
129 | 0 | LLVM_DEBUG(dbgs() << "Negative dependence vector normalized.\n"); |
130 | 0 | LLVM_DEBUG(StringRef DepType = |
131 | 0 | D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output"; |
132 | 0 | dbgs() << "Found " << DepType |
133 | 0 | << " dependency between Src and Dst\n" |
134 | 0 | << " Src:" << *Src << "\n Dst:" << *Dst << '\n'); |
135 | 0 | unsigned Levels = D->getLevels(); |
136 | 0 | char Direction; |
137 | 0 | for (unsigned II = 1; II <= Levels; ++II) { |
138 | 0 | if (D->isScalar(II)) { |
139 | 0 | Direction = 'S'; |
140 | 0 | Dep.push_back(Direction); |
141 | 0 | } else { |
142 | 0 | unsigned Dir = D->getDirection(II); |
143 | 0 | if (Dir == Dependence::DVEntry::LT || |
144 | 0 | Dir == Dependence::DVEntry::LE) |
145 | 0 | Direction = '<'; |
146 | 0 | else if (Dir == Dependence::DVEntry::GT || |
147 | 0 | Dir == Dependence::DVEntry::GE) |
148 | 0 | Direction = '>'; |
149 | 0 | else if (Dir == Dependence::DVEntry::EQ) |
150 | 0 | Direction = '='; |
151 | 0 | else |
152 | 0 | Direction = '*'; |
153 | 0 | Dep.push_back(Direction); |
154 | 0 | } |
155 | 0 | } |
156 | 0 | while (Dep.size() != Level) { |
157 | 0 | Dep.push_back('I'); |
158 | 0 | } |
159 | |
|
160 | 0 | DepMatrix.push_back(Dep); |
161 | 0 | if (DepMatrix.size() > MaxMemInstrCount) { |
162 | 0 | LLVM_DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount |
163 | 0 | << " dependencies inside loop\n"); |
164 | 0 | return false; |
165 | 0 | } |
166 | 0 | } |
167 | 0 | } |
168 | 0 | } |
169 | | |
170 | 0 | return true; |
171 | 0 | } |
172 | | |
173 | | // A loop is moved from index 'from' to an index 'to'. Update the Dependence |
174 | | // matrix by exchanging the two columns. |
175 | | static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx, |
176 | 0 | unsigned ToIndx) { |
177 | 0 | for (unsigned I = 0, E = DepMatrix.size(); I < E; ++I) |
178 | 0 | std::swap(DepMatrix[I][ToIndx], DepMatrix[I][FromIndx]); |
179 | 0 | } |
180 | | |
181 | | // After interchanging, check if the direction vector is valid. |
182 | | // [Theorem] A permutation of the loops in a perfect nest is legal if and only |
183 | | // if the direction matrix, after the same permutation is applied to its |
184 | | // columns, has no ">" direction as the leftmost non-"=" direction in any row. |
185 | 0 | static bool isLexicographicallyPositive(std::vector<char> &DV) { |
186 | 0 | for (unsigned char Direction : DV) { |
187 | 0 | if (Direction == '<') |
188 | 0 | return true; |
189 | 0 | if (Direction == '>' || Direction == '*') |
190 | 0 | return false; |
191 | 0 | } |
192 | 0 | return true; |
193 | 0 | } |
194 | | |
195 | | // Checks if it is legal to interchange 2 loops. |
196 | | static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, |
197 | | unsigned InnerLoopId, |
198 | 0 | unsigned OuterLoopId) { |
199 | 0 | unsigned NumRows = DepMatrix.size(); |
200 | 0 | std::vector<char> Cur; |
201 | | // For each row check if it is valid to interchange. |
202 | 0 | for (unsigned Row = 0; Row < NumRows; ++Row) { |
203 | | // Create temporary DepVector check its lexicographical order |
204 | | // before and after swapping OuterLoop vs InnerLoop |
205 | 0 | Cur = DepMatrix[Row]; |
206 | 0 | if (!isLexicographicallyPositive(Cur)) |
207 | 0 | return false; |
208 | 0 | std::swap(Cur[InnerLoopId], Cur[OuterLoopId]); |
209 | 0 | if (!isLexicographicallyPositive(Cur)) |
210 | 0 | return false; |
211 | 0 | } |
212 | 0 | return true; |
213 | 0 | } |
214 | | |
215 | 0 | static void populateWorklist(Loop &L, LoopVector &LoopList) { |
216 | 0 | LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: " |
217 | 0 | << L.getHeader()->getParent()->getName() << " Loop: %" |
218 | 0 | << L.getHeader()->getName() << '\n'); |
219 | 0 | assert(LoopList.empty() && "LoopList should initially be empty!"); |
220 | 0 | Loop *CurrentLoop = &L; |
221 | 0 | const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops(); |
222 | 0 | while (!Vec->empty()) { |
223 | 0 | // The current loop has multiple subloops in it hence it is not tightly |
224 | 0 | // nested. |
225 | 0 | // Discard all loops above it added into Worklist. |
226 | 0 | if (Vec->size() != 1) { |
227 | 0 | LoopList = {}; |
228 | 0 | return; |
229 | 0 | } |
230 | 0 |
|
231 | 0 | LoopList.push_back(CurrentLoop); |
232 | 0 | CurrentLoop = Vec->front(); |
233 | 0 | Vec = &CurrentLoop->getSubLoops(); |
234 | 0 | } |
235 | 0 | LoopList.push_back(CurrentLoop); |
236 | 0 | } |
237 | | |
238 | | namespace { |
239 | | |
240 | | /// LoopInterchangeLegality checks if it is legal to interchange the loop. |
241 | | class LoopInterchangeLegality { |
242 | | public: |
243 | | LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE, |
244 | | OptimizationRemarkEmitter *ORE) |
245 | 0 | : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {} |
246 | | |
247 | | /// Check if the loops can be interchanged. |
248 | | bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId, |
249 | | CharMatrix &DepMatrix); |
250 | | |
251 | | /// Discover induction PHIs in the header of \p L. Induction |
252 | | /// PHIs are added to \p Inductions. |
253 | | bool findInductions(Loop *L, SmallVectorImpl<PHINode *> &Inductions); |
254 | | |
255 | | /// Check if the loop structure is understood. We do not handle triangular |
256 | | /// loops for now. |
257 | | bool isLoopStructureUnderstood(); |
258 | | |
259 | | bool currentLimitations(); |
260 | | |
261 | 0 | const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions() const { |
262 | 0 | return OuterInnerReductions; |
263 | 0 | } |
264 | | |
265 | 0 | const SmallVectorImpl<PHINode *> &getInnerLoopInductions() const { |
266 | 0 | return InnerLoopInductions; |
267 | 0 | } |
268 | | |
269 | | private: |
270 | | bool tightlyNested(Loop *Outer, Loop *Inner); |
271 | | bool containsUnsafeInstructions(BasicBlock *BB); |
272 | | |
273 | | /// Discover induction and reduction PHIs in the header of \p L. Induction |
274 | | /// PHIs are added to \p Inductions, reductions are added to |
275 | | /// OuterInnerReductions. When the outer loop is passed, the inner loop needs |
276 | | /// to be passed as \p InnerLoop. |
277 | | bool findInductionAndReductions(Loop *L, |
278 | | SmallVector<PHINode *, 8> &Inductions, |
279 | | Loop *InnerLoop); |
280 | | |
281 | | Loop *OuterLoop; |
282 | | Loop *InnerLoop; |
283 | | |
284 | | ScalarEvolution *SE; |
285 | | |
286 | | /// Interface to emit optimization remarks. |
287 | | OptimizationRemarkEmitter *ORE; |
288 | | |
289 | | /// Set of reduction PHIs taking part of a reduction across the inner and |
290 | | /// outer loop. |
291 | | SmallPtrSet<PHINode *, 4> OuterInnerReductions; |
292 | | |
293 | | /// Set of inner loop induction PHIs |
294 | | SmallVector<PHINode *, 8> InnerLoopInductions; |
295 | | }; |
296 | | |
297 | | /// LoopInterchangeProfitability checks if it is profitable to interchange the |
298 | | /// loop. |
299 | | class LoopInterchangeProfitability { |
300 | | public: |
301 | | LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE, |
302 | | OptimizationRemarkEmitter *ORE) |
303 | 0 | : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {} |
304 | | |
305 | | /// Check if the loop interchange is profitable. |
306 | | bool isProfitable(const Loop *InnerLoop, const Loop *OuterLoop, |
307 | | unsigned InnerLoopId, unsigned OuterLoopId, |
308 | | CharMatrix &DepMatrix, |
309 | | const DenseMap<const Loop *, unsigned> &CostMap, |
310 | | std::unique_ptr<CacheCost> &CC); |
311 | | |
312 | | private: |
313 | | int getInstrOrderCost(); |
314 | | std::optional<bool> isProfitablePerLoopCacheAnalysis( |
315 | | const DenseMap<const Loop *, unsigned> &CostMap, |
316 | | std::unique_ptr<CacheCost> &CC); |
317 | | std::optional<bool> isProfitablePerInstrOrderCost(); |
318 | | std::optional<bool> isProfitableForVectorization(unsigned InnerLoopId, |
319 | | unsigned OuterLoopId, |
320 | | CharMatrix &DepMatrix); |
321 | | Loop *OuterLoop; |
322 | | Loop *InnerLoop; |
323 | | |
324 | | /// Scev analysis. |
325 | | ScalarEvolution *SE; |
326 | | |
327 | | /// Interface to emit optimization remarks. |
328 | | OptimizationRemarkEmitter *ORE; |
329 | | }; |
330 | | |
331 | | /// LoopInterchangeTransform interchanges the loop. |
332 | | class LoopInterchangeTransform { |
333 | | public: |
334 | | LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE, |
335 | | LoopInfo *LI, DominatorTree *DT, |
336 | | const LoopInterchangeLegality &LIL) |
337 | 0 | : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {} |
338 | | |
339 | | /// Interchange OuterLoop and InnerLoop. |
340 | | bool transform(); |
341 | | void restructureLoops(Loop *NewInner, Loop *NewOuter, |
342 | | BasicBlock *OrigInnerPreHeader, |
343 | | BasicBlock *OrigOuterPreHeader); |
344 | | void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop); |
345 | | |
346 | | private: |
347 | | bool adjustLoopLinks(); |
348 | | bool adjustLoopBranches(); |
349 | | |
350 | | Loop *OuterLoop; |
351 | | Loop *InnerLoop; |
352 | | |
353 | | /// Scev analysis. |
354 | | ScalarEvolution *SE; |
355 | | |
356 | | LoopInfo *LI; |
357 | | DominatorTree *DT; |
358 | | |
359 | | const LoopInterchangeLegality &LIL; |
360 | | }; |
361 | | |
362 | | struct LoopInterchange { |
363 | | ScalarEvolution *SE = nullptr; |
364 | | LoopInfo *LI = nullptr; |
365 | | DependenceInfo *DI = nullptr; |
366 | | DominatorTree *DT = nullptr; |
367 | | std::unique_ptr<CacheCost> CC = nullptr; |
368 | | |
369 | | /// Interface to emit optimization remarks. |
370 | | OptimizationRemarkEmitter *ORE; |
371 | | |
372 | | LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI, |
373 | | DominatorTree *DT, std::unique_ptr<CacheCost> &CC, |
374 | | OptimizationRemarkEmitter *ORE) |
375 | 0 | : SE(SE), LI(LI), DI(DI), DT(DT), CC(std::move(CC)), ORE(ORE) {} |
376 | | |
377 | 0 | bool run(Loop *L) { |
378 | 0 | if (L->getParentLoop()) |
379 | 0 | return false; |
380 | 0 | SmallVector<Loop *, 8> LoopList; |
381 | 0 | populateWorklist(*L, LoopList); |
382 | 0 | return processLoopList(LoopList); |
383 | 0 | } |
384 | | |
385 | 0 | bool run(LoopNest &LN) { |
386 | 0 | SmallVector<Loop *, 8> LoopList(LN.getLoops().begin(), LN.getLoops().end()); |
387 | 0 | for (unsigned I = 1; I < LoopList.size(); ++I) |
388 | 0 | if (LoopList[I]->getParentLoop() != LoopList[I - 1]) |
389 | 0 | return false; |
390 | 0 | return processLoopList(LoopList); |
391 | 0 | } |
392 | | |
393 | 0 | bool isComputableLoopNest(ArrayRef<Loop *> LoopList) { |
394 | 0 | for (Loop *L : LoopList) { |
395 | 0 | const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L); |
396 | 0 | if (isa<SCEVCouldNotCompute>(ExitCountOuter)) { |
397 | 0 | LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n"); |
398 | 0 | return false; |
399 | 0 | } |
400 | 0 | if (L->getNumBackEdges() != 1) { |
401 | 0 | LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n"); |
402 | 0 | return false; |
403 | 0 | } |
404 | 0 | if (!L->getExitingBlock()) { |
405 | 0 | LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n"); |
406 | 0 | return false; |
407 | 0 | } |
408 | 0 | } |
409 | 0 | return true; |
410 | 0 | } |
411 | | |
412 | 0 | unsigned selectLoopForInterchange(ArrayRef<Loop *> LoopList) { |
413 | | // TODO: Add a better heuristic to select the loop to be interchanged based |
414 | | // on the dependence matrix. Currently we select the innermost loop. |
415 | 0 | return LoopList.size() - 1; |
416 | 0 | } |
417 | | |
418 | 0 | bool processLoopList(SmallVectorImpl<Loop *> &LoopList) { |
419 | 0 | bool Changed = false; |
420 | 0 | unsigned LoopNestDepth = LoopList.size(); |
421 | 0 | if (LoopNestDepth < 2) { |
422 | 0 | LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n"); |
423 | 0 | return false; |
424 | 0 | } |
425 | 0 | if (LoopNestDepth > MaxLoopNestDepth) { |
426 | 0 | LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than " |
427 | 0 | << MaxLoopNestDepth << "\n"); |
428 | 0 | return false; |
429 | 0 | } |
430 | 0 | if (!isComputableLoopNest(LoopList)) { |
431 | 0 | LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n"); |
432 | 0 | return false; |
433 | 0 | } |
434 | | |
435 | 0 | LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepth |
436 | 0 | << "\n"); |
437 | |
|
438 | 0 | CharMatrix DependencyMatrix; |
439 | 0 | Loop *OuterMostLoop = *(LoopList.begin()); |
440 | 0 | if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth, |
441 | 0 | OuterMostLoop, DI, SE)) { |
442 | 0 | LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n"); |
443 | 0 | return false; |
444 | 0 | } |
445 | | #ifdef DUMP_DEP_MATRICIES |
446 | | LLVM_DEBUG(dbgs() << "Dependence before interchange\n"); |
447 | | printDepMatrix(DependencyMatrix); |
448 | | #endif |
449 | | |
450 | | // Get the Outermost loop exit. |
451 | 0 | BasicBlock *LoopNestExit = OuterMostLoop->getExitBlock(); |
452 | 0 | if (!LoopNestExit) { |
453 | 0 | LLVM_DEBUG(dbgs() << "OuterMostLoop needs an unique exit block"); |
454 | 0 | return false; |
455 | 0 | } |
456 | | |
457 | 0 | unsigned SelecLoopId = selectLoopForInterchange(LoopList); |
458 | | // Obtain the loop vector returned from loop cache analysis beforehand, |
459 | | // and put each <Loop, index> pair into a map for constant time query |
460 | | // later. Indices in loop vector reprsent the optimal order of the |
461 | | // corresponding loop, e.g., given a loopnest with depth N, index 0 |
462 | | // indicates the loop should be placed as the outermost loop and index N |
463 | | // indicates the loop should be placed as the innermost loop. |
464 | | // |
465 | | // For the old pass manager CacheCost would be null. |
466 | 0 | DenseMap<const Loop *, unsigned> CostMap; |
467 | 0 | if (CC != nullptr) { |
468 | 0 | const auto &LoopCosts = CC->getLoopCosts(); |
469 | 0 | for (unsigned i = 0; i < LoopCosts.size(); i++) { |
470 | 0 | CostMap[LoopCosts[i].first] = i; |
471 | 0 | } |
472 | 0 | } |
473 | | // We try to achieve the globally optimal memory access for the loopnest, |
474 | | // and do interchange based on a bubble-sort fasion. We start from |
475 | | // the innermost loop, move it outwards to the best possible position |
476 | | // and repeat this process. |
477 | 0 | for (unsigned j = SelecLoopId; j > 0; j--) { |
478 | 0 | bool ChangedPerIter = false; |
479 | 0 | for (unsigned i = SelecLoopId; i > SelecLoopId - j; i--) { |
480 | 0 | bool Interchanged = processLoop(LoopList[i], LoopList[i - 1], i, i - 1, |
481 | 0 | DependencyMatrix, CostMap); |
482 | 0 | if (!Interchanged) |
483 | 0 | continue; |
484 | | // Loops interchanged, update LoopList accordingly. |
485 | 0 | std::swap(LoopList[i - 1], LoopList[i]); |
486 | | // Update the DependencyMatrix |
487 | 0 | interChangeDependencies(DependencyMatrix, i, i - 1); |
488 | | #ifdef DUMP_DEP_MATRICIES |
489 | | LLVM_DEBUG(dbgs() << "Dependence after interchange\n"); |
490 | | printDepMatrix(DependencyMatrix); |
491 | | #endif |
492 | 0 | ChangedPerIter |= Interchanged; |
493 | 0 | Changed |= Interchanged; |
494 | 0 | } |
495 | | // Early abort if there was no interchange during an entire round of |
496 | | // moving loops outwards. |
497 | 0 | if (!ChangedPerIter) |
498 | 0 | break; |
499 | 0 | } |
500 | 0 | return Changed; |
501 | 0 | } |
502 | | |
503 | | bool processLoop(Loop *InnerLoop, Loop *OuterLoop, unsigned InnerLoopId, |
504 | | unsigned OuterLoopId, |
505 | | std::vector<std::vector<char>> &DependencyMatrix, |
506 | 0 | const DenseMap<const Loop *, unsigned> &CostMap) { |
507 | 0 | LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId |
508 | 0 | << " and OuterLoopId = " << OuterLoopId << "\n"); |
509 | 0 | LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE); |
510 | 0 | if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) { |
511 | 0 | LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n"); |
512 | 0 | return false; |
513 | 0 | } |
514 | 0 | LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n"); |
515 | 0 | LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE); |
516 | 0 | if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId, |
517 | 0 | DependencyMatrix, CostMap, CC)) { |
518 | 0 | LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n"); |
519 | 0 | return false; |
520 | 0 | } |
521 | | |
522 | 0 | ORE->emit([&]() { |
523 | 0 | return OptimizationRemark(DEBUG_TYPE, "Interchanged", |
524 | 0 | InnerLoop->getStartLoc(), |
525 | 0 | InnerLoop->getHeader()) |
526 | 0 | << "Loop interchanged with enclosing loop."; |
527 | 0 | }); |
528 | |
|
529 | 0 | LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL); |
530 | 0 | LIT.transform(); |
531 | 0 | LLVM_DEBUG(dbgs() << "Loops interchanged.\n"); |
532 | 0 | LoopsInterchanged++; |
533 | |
|
534 | 0 | llvm::formLCSSARecursively(*OuterLoop, *DT, LI, SE); |
535 | 0 | return true; |
536 | 0 | } |
537 | | }; |
538 | | |
539 | | } // end anonymous namespace |
540 | | |
541 | 0 | bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) { |
542 | 0 | return any_of(*BB, [](const Instruction &I) { |
543 | 0 | return I.mayHaveSideEffects() || I.mayReadFromMemory(); |
544 | 0 | }); |
545 | 0 | } |
546 | | |
547 | 0 | bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) { |
548 | 0 | BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); |
549 | 0 | BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); |
550 | 0 | BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); |
551 | |
|
552 | 0 | LLVM_DEBUG(dbgs() << "Checking if loops are tightly nested\n"); |
553 | | |
554 | | // A perfectly nested loop will not have any branch in between the outer and |
555 | | // inner block i.e. outer header will branch to either inner preheader and |
556 | | // outerloop latch. |
557 | 0 | BranchInst *OuterLoopHeaderBI = |
558 | 0 | dyn_cast<BranchInst>(OuterLoopHeader->getTerminator()); |
559 | 0 | if (!OuterLoopHeaderBI) |
560 | 0 | return false; |
561 | | |
562 | 0 | for (BasicBlock *Succ : successors(OuterLoopHeaderBI)) |
563 | 0 | if (Succ != InnerLoopPreHeader && Succ != InnerLoop->getHeader() && |
564 | 0 | Succ != OuterLoopLatch) |
565 | 0 | return false; |
566 | | |
567 | 0 | LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n"); |
568 | | // We do not have any basic block in between now make sure the outer header |
569 | | // and outer loop latch doesn't contain any unsafe instructions. |
570 | 0 | if (containsUnsafeInstructions(OuterLoopHeader) || |
571 | 0 | containsUnsafeInstructions(OuterLoopLatch)) |
572 | 0 | return false; |
573 | | |
574 | | // Also make sure the inner loop preheader does not contain any unsafe |
575 | | // instructions. Note that all instructions in the preheader will be moved to |
576 | | // the outer loop header when interchanging. |
577 | 0 | if (InnerLoopPreHeader != OuterLoopHeader && |
578 | 0 | containsUnsafeInstructions(InnerLoopPreHeader)) |
579 | 0 | return false; |
580 | | |
581 | 0 | BasicBlock *InnerLoopExit = InnerLoop->getExitBlock(); |
582 | | // Ensure the inner loop exit block flows to the outer loop latch possibly |
583 | | // through empty blocks. |
584 | 0 | const BasicBlock &SuccInner = |
585 | 0 | LoopNest::skipEmptyBlockUntil(InnerLoopExit, OuterLoopLatch); |
586 | 0 | if (&SuccInner != OuterLoopLatch) { |
587 | 0 | LLVM_DEBUG(dbgs() << "Inner loop exit block " << *InnerLoopExit |
588 | 0 | << " does not lead to the outer loop latch.\n";); |
589 | 0 | return false; |
590 | 0 | } |
591 | | // The inner loop exit block does flow to the outer loop latch and not some |
592 | | // other BBs, now make sure it contains safe instructions, since it will be |
593 | | // moved into the (new) inner loop after interchange. |
594 | 0 | if (containsUnsafeInstructions(InnerLoopExit)) |
595 | 0 | return false; |
596 | | |
597 | 0 | LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n"); |
598 | | // We have a perfect loop nest. |
599 | 0 | return true; |
600 | 0 | } |
601 | | |
602 | 0 | bool LoopInterchangeLegality::isLoopStructureUnderstood() { |
603 | 0 | BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader(); |
604 | 0 | for (PHINode *InnerInduction : InnerLoopInductions) { |
605 | 0 | unsigned Num = InnerInduction->getNumOperands(); |
606 | 0 | for (unsigned i = 0; i < Num; ++i) { |
607 | 0 | Value *Val = InnerInduction->getOperand(i); |
608 | 0 | if (isa<Constant>(Val)) |
609 | 0 | continue; |
610 | 0 | Instruction *I = dyn_cast<Instruction>(Val); |
611 | 0 | if (!I) |
612 | 0 | return false; |
613 | | // TODO: Handle triangular loops. |
614 | | // e.g. for(int i=0;i<N;i++) |
615 | | // for(int j=i;j<N;j++) |
616 | 0 | unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i); |
617 | 0 | if (InnerInduction->getIncomingBlock(IncomBlockIndx) == |
618 | 0 | InnerLoopPreheader && |
619 | 0 | !OuterLoop->isLoopInvariant(I)) { |
620 | 0 | return false; |
621 | 0 | } |
622 | 0 | } |
623 | 0 | } |
624 | | |
625 | | // TODO: Handle triangular loops of another form. |
626 | | // e.g. for(int i=0;i<N;i++) |
627 | | // for(int j=0;j<i;j++) |
628 | | // or, |
629 | | // for(int i=0;i<N;i++) |
630 | | // for(int j=0;j*i<N;j++) |
631 | 0 | BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); |
632 | 0 | BranchInst *InnerLoopLatchBI = |
633 | 0 | dyn_cast<BranchInst>(InnerLoopLatch->getTerminator()); |
634 | 0 | if (!InnerLoopLatchBI->isConditional()) |
635 | 0 | return false; |
636 | 0 | if (CmpInst *InnerLoopCmp = |
637 | 0 | dyn_cast<CmpInst>(InnerLoopLatchBI->getCondition())) { |
638 | 0 | Value *Op0 = InnerLoopCmp->getOperand(0); |
639 | 0 | Value *Op1 = InnerLoopCmp->getOperand(1); |
640 | | |
641 | | // LHS and RHS of the inner loop exit condition, e.g., |
642 | | // in "for(int j=0;j<i;j++)", LHS is j and RHS is i. |
643 | 0 | Value *Left = nullptr; |
644 | 0 | Value *Right = nullptr; |
645 | | |
646 | | // Check if V only involves inner loop induction variable. |
647 | | // Return true if V is InnerInduction, or a cast from |
648 | | // InnerInduction, or a binary operator that involves |
649 | | // InnerInduction and a constant. |
650 | 0 | std::function<bool(Value *)> IsPathToInnerIndVar; |
651 | 0 | IsPathToInnerIndVar = [this, &IsPathToInnerIndVar](const Value *V) -> bool { |
652 | 0 | if (llvm::is_contained(InnerLoopInductions, V)) |
653 | 0 | return true; |
654 | 0 | if (isa<Constant>(V)) |
655 | 0 | return true; |
656 | 0 | const Instruction *I = dyn_cast<Instruction>(V); |
657 | 0 | if (!I) |
658 | 0 | return false; |
659 | 0 | if (isa<CastInst>(I)) |
660 | 0 | return IsPathToInnerIndVar(I->getOperand(0)); |
661 | 0 | if (isa<BinaryOperator>(I)) |
662 | 0 | return IsPathToInnerIndVar(I->getOperand(0)) && |
663 | 0 | IsPathToInnerIndVar(I->getOperand(1)); |
664 | 0 | return false; |
665 | 0 | }; |
666 | | |
667 | | // In case of multiple inner loop indvars, it is okay if LHS and RHS |
668 | | // are both inner indvar related variables. |
669 | 0 | if (IsPathToInnerIndVar(Op0) && IsPathToInnerIndVar(Op1)) |
670 | 0 | return true; |
671 | | |
672 | | // Otherwise we check if the cmp instruction compares an inner indvar |
673 | | // related variable (Left) with a outer loop invariant (Right). |
674 | 0 | if (IsPathToInnerIndVar(Op0) && !isa<Constant>(Op0)) { |
675 | 0 | Left = Op0; |
676 | 0 | Right = Op1; |
677 | 0 | } else if (IsPathToInnerIndVar(Op1) && !isa<Constant>(Op1)) { |
678 | 0 | Left = Op1; |
679 | 0 | Right = Op0; |
680 | 0 | } |
681 | |
|
682 | 0 | if (Left == nullptr) |
683 | 0 | return false; |
684 | | |
685 | 0 | const SCEV *S = SE->getSCEV(Right); |
686 | 0 | if (!SE->isLoopInvariant(S, OuterLoop)) |
687 | 0 | return false; |
688 | 0 | } |
689 | | |
690 | 0 | return true; |
691 | 0 | } |
692 | | |
693 | | // If SV is a LCSSA PHI node with a single incoming value, return the incoming |
694 | | // value. |
695 | 0 | static Value *followLCSSA(Value *SV) { |
696 | 0 | PHINode *PHI = dyn_cast<PHINode>(SV); |
697 | 0 | if (!PHI) |
698 | 0 | return SV; |
699 | | |
700 | 0 | if (PHI->getNumIncomingValues() != 1) |
701 | 0 | return SV; |
702 | 0 | return followLCSSA(PHI->getIncomingValue(0)); |
703 | 0 | } |
704 | | |
705 | | // Check V's users to see if it is involved in a reduction in L. |
706 | 0 | static PHINode *findInnerReductionPhi(Loop *L, Value *V) { |
707 | | // Reduction variables cannot be constants. |
708 | 0 | if (isa<Constant>(V)) |
709 | 0 | return nullptr; |
710 | | |
711 | 0 | for (Value *User : V->users()) { |
712 | 0 | if (PHINode *PHI = dyn_cast<PHINode>(User)) { |
713 | 0 | if (PHI->getNumIncomingValues() == 1) |
714 | 0 | continue; |
715 | 0 | RecurrenceDescriptor RD; |
716 | 0 | if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD)) { |
717 | | // Detect floating point reduction only when it can be reordered. |
718 | 0 | if (RD.getExactFPMathInst() != nullptr) |
719 | 0 | return nullptr; |
720 | 0 | return PHI; |
721 | 0 | } |
722 | 0 | return nullptr; |
723 | 0 | } |
724 | 0 | } |
725 | | |
726 | 0 | return nullptr; |
727 | 0 | } |
728 | | |
729 | | bool LoopInterchangeLegality::findInductionAndReductions( |
730 | 0 | Loop *L, SmallVector<PHINode *, 8> &Inductions, Loop *InnerLoop) { |
731 | 0 | if (!L->getLoopLatch() || !L->getLoopPredecessor()) |
732 | 0 | return false; |
733 | 0 | for (PHINode &PHI : L->getHeader()->phis()) { |
734 | 0 | InductionDescriptor ID; |
735 | 0 | if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID)) |
736 | 0 | Inductions.push_back(&PHI); |
737 | 0 | else { |
738 | | // PHIs in inner loops need to be part of a reduction in the outer loop, |
739 | | // discovered when checking the PHIs of the outer loop earlier. |
740 | 0 | if (!InnerLoop) { |
741 | 0 | if (!OuterInnerReductions.count(&PHI)) { |
742 | 0 | LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions " |
743 | 0 | "across the outer loop.\n"); |
744 | 0 | return false; |
745 | 0 | } |
746 | 0 | } else { |
747 | 0 | assert(PHI.getNumIncomingValues() == 2 && |
748 | 0 | "Phis in loop header should have exactly 2 incoming values"); |
749 | | // Check if we have a PHI node in the outer loop that has a reduction |
750 | | // result from the inner loop as an incoming value. |
751 | 0 | Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch())); |
752 | 0 | PHINode *InnerRedPhi = findInnerReductionPhi(InnerLoop, V); |
753 | 0 | if (!InnerRedPhi || |
754 | 0 | !llvm::is_contained(InnerRedPhi->incoming_values(), &PHI)) { |
755 | 0 | LLVM_DEBUG( |
756 | 0 | dbgs() |
757 | 0 | << "Failed to recognize PHI as an induction or reduction.\n"); |
758 | 0 | return false; |
759 | 0 | } |
760 | 0 | OuterInnerReductions.insert(&PHI); |
761 | 0 | OuterInnerReductions.insert(InnerRedPhi); |
762 | 0 | } |
763 | 0 | } |
764 | 0 | } |
765 | 0 | return true; |
766 | 0 | } |
767 | | |
768 | | // This function indicates the current limitations in the transform as a result |
769 | | // of which we do not proceed. |
770 | 0 | bool LoopInterchangeLegality::currentLimitations() { |
771 | 0 | BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); |
772 | | |
773 | | // transform currently expects the loop latches to also be the exiting |
774 | | // blocks. |
775 | 0 | if (InnerLoop->getExitingBlock() != InnerLoopLatch || |
776 | 0 | OuterLoop->getExitingBlock() != OuterLoop->getLoopLatch() || |
777 | 0 | !isa<BranchInst>(InnerLoopLatch->getTerminator()) || |
778 | 0 | !isa<BranchInst>(OuterLoop->getLoopLatch()->getTerminator())) { |
779 | 0 | LLVM_DEBUG( |
780 | 0 | dbgs() << "Loops where the latch is not the exiting block are not" |
781 | 0 | << " supported currently.\n"); |
782 | 0 | ORE->emit([&]() { |
783 | 0 | return OptimizationRemarkMissed(DEBUG_TYPE, "ExitingNotLatch", |
784 | 0 | OuterLoop->getStartLoc(), |
785 | 0 | OuterLoop->getHeader()) |
786 | 0 | << "Loops where the latch is not the exiting block cannot be" |
787 | 0 | " interchange currently."; |
788 | 0 | }); |
789 | 0 | return true; |
790 | 0 | } |
791 | | |
792 | 0 | SmallVector<PHINode *, 8> Inductions; |
793 | 0 | if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) { |
794 | 0 | LLVM_DEBUG( |
795 | 0 | dbgs() << "Only outer loops with induction or reduction PHI nodes " |
796 | 0 | << "are supported currently.\n"); |
797 | 0 | ORE->emit([&]() { |
798 | 0 | return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIOuter", |
799 | 0 | OuterLoop->getStartLoc(), |
800 | 0 | OuterLoop->getHeader()) |
801 | 0 | << "Only outer loops with induction or reduction PHI nodes can be" |
802 | 0 | " interchanged currently."; |
803 | 0 | }); |
804 | 0 | return true; |
805 | 0 | } |
806 | | |
807 | 0 | Inductions.clear(); |
808 | | // For multi-level loop nests, make sure that all phi nodes for inner loops |
809 | | // at all levels can be recognized as a induction or reduction phi. Bail out |
810 | | // if a phi node at a certain nesting level cannot be properly recognized. |
811 | 0 | Loop *CurLevelLoop = OuterLoop; |
812 | 0 | while (!CurLevelLoop->getSubLoops().empty()) { |
813 | | // We already made sure that the loop nest is tightly nested. |
814 | 0 | CurLevelLoop = CurLevelLoop->getSubLoops().front(); |
815 | 0 | if (!findInductionAndReductions(CurLevelLoop, Inductions, nullptr)) { |
816 | 0 | LLVM_DEBUG( |
817 | 0 | dbgs() << "Only inner loops with induction or reduction PHI nodes " |
818 | 0 | << "are supported currently.\n"); |
819 | 0 | ORE->emit([&]() { |
820 | 0 | return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner", |
821 | 0 | CurLevelLoop->getStartLoc(), |
822 | 0 | CurLevelLoop->getHeader()) |
823 | 0 | << "Only inner loops with induction or reduction PHI nodes can be" |
824 | 0 | " interchange currently."; |
825 | 0 | }); |
826 | 0 | return true; |
827 | 0 | } |
828 | 0 | } |
829 | | |
830 | | // TODO: Triangular loops are not handled for now. |
831 | 0 | if (!isLoopStructureUnderstood()) { |
832 | 0 | LLVM_DEBUG(dbgs() << "Loop structure not understood by pass\n"); |
833 | 0 | ORE->emit([&]() { |
834 | 0 | return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedStructureInner", |
835 | 0 | InnerLoop->getStartLoc(), |
836 | 0 | InnerLoop->getHeader()) |
837 | 0 | << "Inner loop structure not understood currently."; |
838 | 0 | }); |
839 | 0 | return true; |
840 | 0 | } |
841 | | |
842 | 0 | return false; |
843 | 0 | } |
844 | | |
845 | | bool LoopInterchangeLegality::findInductions( |
846 | 0 | Loop *L, SmallVectorImpl<PHINode *> &Inductions) { |
847 | 0 | for (PHINode &PHI : L->getHeader()->phis()) { |
848 | 0 | InductionDescriptor ID; |
849 | 0 | if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID)) |
850 | 0 | Inductions.push_back(&PHI); |
851 | 0 | } |
852 | 0 | return !Inductions.empty(); |
853 | 0 | } |
854 | | |
855 | | // We currently only support LCSSA PHI nodes in the inner loop exit, if their |
856 | | // users are either reduction PHIs or PHIs outside the outer loop (which means |
857 | | // the we are only interested in the final value after the loop). |
858 | | static bool |
859 | | areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL, |
860 | 0 | SmallPtrSetImpl<PHINode *> &Reductions) { |
861 | 0 | BasicBlock *InnerExit = OuterL->getUniqueExitBlock(); |
862 | 0 | for (PHINode &PHI : InnerExit->phis()) { |
863 | | // Reduction lcssa phi will have only 1 incoming block that from loop latch. |
864 | 0 | if (PHI.getNumIncomingValues() > 1) |
865 | 0 | return false; |
866 | 0 | if (any_of(PHI.users(), [&Reductions, OuterL](User *U) { |
867 | 0 | PHINode *PN = dyn_cast<PHINode>(U); |
868 | 0 | return !PN || |
869 | 0 | (!Reductions.count(PN) && OuterL->contains(PN->getParent())); |
870 | 0 | })) { |
871 | 0 | return false; |
872 | 0 | } |
873 | 0 | } |
874 | 0 | return true; |
875 | 0 | } |
876 | | |
877 | | // We currently support LCSSA PHI nodes in the outer loop exit, if their |
878 | | // incoming values do not come from the outer loop latch or if the |
879 | | // outer loop latch has a single predecessor. In that case, the value will |
880 | | // be available if both the inner and outer loop conditions are true, which |
881 | | // will still be true after interchanging. If we have multiple predecessor, |
882 | | // that may not be the case, e.g. because the outer loop latch may be executed |
883 | | // if the inner loop is not executed. |
884 | 0 | static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) { |
885 | 0 | BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock(); |
886 | 0 | for (PHINode &PHI : LoopNestExit->phis()) { |
887 | 0 | for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) { |
888 | 0 | Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i)); |
889 | 0 | if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch()) |
890 | 0 | continue; |
891 | | |
892 | | // The incoming value is defined in the outer loop latch. Currently we |
893 | | // only support that in case the outer loop latch has a single predecessor. |
894 | | // This guarantees that the outer loop latch is executed if and only if |
895 | | // the inner loop is executed (because tightlyNested() guarantees that the |
896 | | // outer loop header only branches to the inner loop or the outer loop |
897 | | // latch). |
898 | | // FIXME: We could weaken this logic and allow multiple predecessors, |
899 | | // if the values are produced outside the loop latch. We would need |
900 | | // additional logic to update the PHI nodes in the exit block as |
901 | | // well. |
902 | 0 | if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr) |
903 | 0 | return false; |
904 | 0 | } |
905 | 0 | } |
906 | 0 | return true; |
907 | 0 | } |
908 | | |
909 | | // In case of multi-level nested loops, it may occur that lcssa phis exist in |
910 | | // the latch of InnerLoop, i.e., when defs of the incoming values are further |
911 | | // inside the loopnest. Sometimes those incoming values are not available |
912 | | // after interchange, since the original inner latch will become the new outer |
913 | | // latch which may have predecessor paths that do not include those incoming |
914 | | // values. |
915 | | // TODO: Handle transformation of lcssa phis in the InnerLoop latch in case of |
916 | | // multi-level loop nests. |
917 | 0 | static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) { |
918 | 0 | if (InnerLoop->getSubLoops().empty()) |
919 | 0 | return true; |
920 | | // If the original outer latch has only one predecessor, then values defined |
921 | | // further inside the looploop, e.g., in the innermost loop, will be available |
922 | | // at the new outer latch after interchange. |
923 | 0 | if (OuterLoop->getLoopLatch()->getUniquePredecessor() != nullptr) |
924 | 0 | return true; |
925 | | |
926 | | // The outer latch has more than one predecessors, i.e., the inner |
927 | | // exit and the inner header. |
928 | | // PHI nodes in the inner latch are lcssa phis where the incoming values |
929 | | // are defined further inside the loopnest. Check if those phis are used |
930 | | // in the original inner latch. If that is the case then bail out since |
931 | | // those incoming values may not be available at the new outer latch. |
932 | 0 | BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); |
933 | 0 | for (PHINode &PHI : InnerLoopLatch->phis()) { |
934 | 0 | for (auto *U : PHI.users()) { |
935 | 0 | Instruction *UI = cast<Instruction>(U); |
936 | 0 | if (InnerLoopLatch == UI->getParent()) |
937 | 0 | return false; |
938 | 0 | } |
939 | 0 | } |
940 | 0 | return true; |
941 | 0 | } |
942 | | |
943 | | bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId, |
944 | | unsigned OuterLoopId, |
945 | 0 | CharMatrix &DepMatrix) { |
946 | 0 | if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) { |
947 | 0 | LLVM_DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId |
948 | 0 | << " and OuterLoopId = " << OuterLoopId |
949 | 0 | << " due to dependence\n"); |
950 | 0 | ORE->emit([&]() { |
951 | 0 | return OptimizationRemarkMissed(DEBUG_TYPE, "Dependence", |
952 | 0 | InnerLoop->getStartLoc(), |
953 | 0 | InnerLoop->getHeader()) |
954 | 0 | << "Cannot interchange loops due to dependences."; |
955 | 0 | }); |
956 | 0 | return false; |
957 | 0 | } |
958 | | // Check if outer and inner loop contain legal instructions only. |
959 | 0 | for (auto *BB : OuterLoop->blocks()) |
960 | 0 | for (Instruction &I : BB->instructionsWithoutDebug()) |
961 | 0 | if (CallInst *CI = dyn_cast<CallInst>(&I)) { |
962 | | // readnone functions do not prevent interchanging. |
963 | 0 | if (CI->onlyWritesMemory()) |
964 | 0 | continue; |
965 | 0 | LLVM_DEBUG( |
966 | 0 | dbgs() << "Loops with call instructions cannot be interchanged " |
967 | 0 | << "safely."); |
968 | 0 | ORE->emit([&]() { |
969 | 0 | return OptimizationRemarkMissed(DEBUG_TYPE, "CallInst", |
970 | 0 | CI->getDebugLoc(), |
971 | 0 | CI->getParent()) |
972 | 0 | << "Cannot interchange loops due to call instruction."; |
973 | 0 | }); |
974 | |
|
975 | 0 | return false; |
976 | 0 | } |
977 | | |
978 | 0 | if (!findInductions(InnerLoop, InnerLoopInductions)) { |
979 | 0 | LLVM_DEBUG(dbgs() << "Cound not find inner loop induction variables.\n"); |
980 | 0 | return false; |
981 | 0 | } |
982 | | |
983 | 0 | if (!areInnerLoopLatchPHIsSupported(OuterLoop, InnerLoop)) { |
984 | 0 | LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop latch.\n"); |
985 | 0 | ORE->emit([&]() { |
986 | 0 | return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedInnerLatchPHI", |
987 | 0 | InnerLoop->getStartLoc(), |
988 | 0 | InnerLoop->getHeader()) |
989 | 0 | << "Cannot interchange loops because unsupported PHI nodes found " |
990 | 0 | "in inner loop latch."; |
991 | 0 | }); |
992 | 0 | return false; |
993 | 0 | } |
994 | | |
995 | | // TODO: The loops could not be interchanged due to current limitations in the |
996 | | // transform module. |
997 | 0 | if (currentLimitations()) { |
998 | 0 | LLVM_DEBUG(dbgs() << "Not legal because of current transform limitation\n"); |
999 | 0 | return false; |
1000 | 0 | } |
1001 | | |
1002 | | // Check if the loops are tightly nested. |
1003 | 0 | if (!tightlyNested(OuterLoop, InnerLoop)) { |
1004 | 0 | LLVM_DEBUG(dbgs() << "Loops not tightly nested\n"); |
1005 | 0 | ORE->emit([&]() { |
1006 | 0 | return OptimizationRemarkMissed(DEBUG_TYPE, "NotTightlyNested", |
1007 | 0 | InnerLoop->getStartLoc(), |
1008 | 0 | InnerLoop->getHeader()) |
1009 | 0 | << "Cannot interchange loops because they are not tightly " |
1010 | 0 | "nested."; |
1011 | 0 | }); |
1012 | 0 | return false; |
1013 | 0 | } |
1014 | | |
1015 | 0 | if (!areInnerLoopExitPHIsSupported(OuterLoop, InnerLoop, |
1016 | 0 | OuterInnerReductions)) { |
1017 | 0 | LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop exit.\n"); |
1018 | 0 | ORE->emit([&]() { |
1019 | 0 | return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI", |
1020 | 0 | InnerLoop->getStartLoc(), |
1021 | 0 | InnerLoop->getHeader()) |
1022 | 0 | << "Found unsupported PHI node in loop exit."; |
1023 | 0 | }); |
1024 | 0 | return false; |
1025 | 0 | } |
1026 | | |
1027 | 0 | if (!areOuterLoopExitPHIsSupported(OuterLoop, InnerLoop)) { |
1028 | 0 | LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n"); |
1029 | 0 | ORE->emit([&]() { |
1030 | 0 | return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI", |
1031 | 0 | OuterLoop->getStartLoc(), |
1032 | 0 | OuterLoop->getHeader()) |
1033 | 0 | << "Found unsupported PHI node in loop exit."; |
1034 | 0 | }); |
1035 | 0 | return false; |
1036 | 0 | } |
1037 | | |
1038 | 0 | return true; |
1039 | 0 | } |
1040 | | |
1041 | 0 | int LoopInterchangeProfitability::getInstrOrderCost() { |
1042 | 0 | unsigned GoodOrder, BadOrder; |
1043 | 0 | BadOrder = GoodOrder = 0; |
1044 | 0 | for (BasicBlock *BB : InnerLoop->blocks()) { |
1045 | 0 | for (Instruction &Ins : *BB) { |
1046 | 0 | if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) { |
1047 | 0 | unsigned NumOp = GEP->getNumOperands(); |
1048 | 0 | bool FoundInnerInduction = false; |
1049 | 0 | bool FoundOuterInduction = false; |
1050 | 0 | for (unsigned i = 0; i < NumOp; ++i) { |
1051 | | // Skip operands that are not SCEV-able. |
1052 | 0 | if (!SE->isSCEVable(GEP->getOperand(i)->getType())) |
1053 | 0 | continue; |
1054 | | |
1055 | 0 | const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i)); |
1056 | 0 | const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal); |
1057 | 0 | if (!AR) |
1058 | 0 | continue; |
1059 | | |
1060 | | // If we find the inner induction after an outer induction e.g. |
1061 | | // for(int i=0;i<N;i++) |
1062 | | // for(int j=0;j<N;j++) |
1063 | | // A[i][j] = A[i-1][j-1]+k; |
1064 | | // then it is a good order. |
1065 | 0 | if (AR->getLoop() == InnerLoop) { |
1066 | | // We found an InnerLoop induction after OuterLoop induction. It is |
1067 | | // a good order. |
1068 | 0 | FoundInnerInduction = true; |
1069 | 0 | if (FoundOuterInduction) { |
1070 | 0 | GoodOrder++; |
1071 | 0 | break; |
1072 | 0 | } |
1073 | 0 | } |
1074 | | // If we find the outer induction after an inner induction e.g. |
1075 | | // for(int i=0;i<N;i++) |
1076 | | // for(int j=0;j<N;j++) |
1077 | | // A[j][i] = A[j-1][i-1]+k; |
1078 | | // then it is a bad order. |
1079 | 0 | if (AR->getLoop() == OuterLoop) { |
1080 | | // We found an OuterLoop induction after InnerLoop induction. It is |
1081 | | // a bad order. |
1082 | 0 | FoundOuterInduction = true; |
1083 | 0 | if (FoundInnerInduction) { |
1084 | 0 | BadOrder++; |
1085 | 0 | break; |
1086 | 0 | } |
1087 | 0 | } |
1088 | 0 | } |
1089 | 0 | } |
1090 | 0 | } |
1091 | 0 | } |
1092 | 0 | return GoodOrder - BadOrder; |
1093 | 0 | } |
1094 | | |
1095 | | std::optional<bool> |
1096 | | LoopInterchangeProfitability::isProfitablePerLoopCacheAnalysis( |
1097 | | const DenseMap<const Loop *, unsigned> &CostMap, |
1098 | 0 | std::unique_ptr<CacheCost> &CC) { |
1099 | | // This is the new cost model returned from loop cache analysis. |
1100 | | // A smaller index means the loop should be placed an outer loop, and vice |
1101 | | // versa. |
1102 | 0 | if (CostMap.contains(InnerLoop) && CostMap.contains(OuterLoop)) { |
1103 | 0 | unsigned InnerIndex = 0, OuterIndex = 0; |
1104 | 0 | InnerIndex = CostMap.find(InnerLoop)->second; |
1105 | 0 | OuterIndex = CostMap.find(OuterLoop)->second; |
1106 | 0 | LLVM_DEBUG(dbgs() << "InnerIndex = " << InnerIndex |
1107 | 0 | << ", OuterIndex = " << OuterIndex << "\n"); |
1108 | 0 | if (InnerIndex < OuterIndex) |
1109 | 0 | return std::optional<bool>(true); |
1110 | 0 | assert(InnerIndex != OuterIndex && "CostMap should assign unique " |
1111 | 0 | "numbers to each loop"); |
1112 | 0 | if (CC->getLoopCost(*OuterLoop) == CC->getLoopCost(*InnerLoop)) |
1113 | 0 | return std::nullopt; |
1114 | 0 | return std::optional<bool>(false); |
1115 | 0 | } |
1116 | 0 | return std::nullopt; |
1117 | 0 | } |
1118 | | |
1119 | | std::optional<bool> |
1120 | 0 | LoopInterchangeProfitability::isProfitablePerInstrOrderCost() { |
1121 | | // Legacy cost model: this is rough cost estimation algorithm. It counts the |
1122 | | // good and bad order of induction variables in the instruction and allows |
1123 | | // reordering if number of bad orders is more than good. |
1124 | 0 | int Cost = getInstrOrderCost(); |
1125 | 0 | LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n"); |
1126 | 0 | if (Cost < 0 && Cost < LoopInterchangeCostThreshold) |
1127 | 0 | return std::optional<bool>(true); |
1128 | | |
1129 | 0 | return std::nullopt; |
1130 | 0 | } |
1131 | | |
1132 | | std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization( |
1133 | 0 | unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix) { |
1134 | 0 | for (auto &Row : DepMatrix) { |
1135 | | // If the inner loop is loop independent or doesn't carry any dependency |
1136 | | // it is not profitable to move this to outer position, since we are |
1137 | | // likely able to do inner loop vectorization already. |
1138 | 0 | if (Row[InnerLoopId] == 'I' || Row[InnerLoopId] == '=') |
1139 | 0 | return std::optional<bool>(false); |
1140 | | |
1141 | | // If the outer loop is not loop independent it is not profitable to move |
1142 | | // this to inner position, since doing so would not enable inner loop |
1143 | | // parallelism. |
1144 | 0 | if (Row[OuterLoopId] != 'I' && Row[OuterLoopId] != '=') |
1145 | 0 | return std::optional<bool>(false); |
1146 | 0 | } |
1147 | | // If inner loop has dependence and outer loop is loop independent then it |
1148 | | // is/ profitable to interchange to enable inner loop parallelism. |
1149 | | // If there are no dependences, interchanging will not improve anything. |
1150 | 0 | return std::optional<bool>(!DepMatrix.empty()); |
1151 | 0 | } |
1152 | | |
1153 | | bool LoopInterchangeProfitability::isProfitable( |
1154 | | const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId, |
1155 | | unsigned OuterLoopId, CharMatrix &DepMatrix, |
1156 | | const DenseMap<const Loop *, unsigned> &CostMap, |
1157 | 0 | std::unique_ptr<CacheCost> &CC) { |
1158 | | // isProfitable() is structured to avoid endless loop interchange. |
1159 | | // If loop cache analysis could decide the profitability then, |
1160 | | // profitability check will stop and return the analysis result. |
1161 | | // If cache analysis failed to analyze the loopnest (e.g., |
1162 | | // due to delinearization issues) then only check whether it is |
1163 | | // profitable for InstrOrderCost. Likewise, if InstrOrderCost failed to |
1164 | | // analysis the profitability then only, isProfitableForVectorization |
1165 | | // will decide. |
1166 | 0 | std::optional<bool> shouldInterchange = |
1167 | 0 | isProfitablePerLoopCacheAnalysis(CostMap, CC); |
1168 | 0 | if (!shouldInterchange.has_value()) { |
1169 | 0 | shouldInterchange = isProfitablePerInstrOrderCost(); |
1170 | 0 | if (!shouldInterchange.has_value()) |
1171 | 0 | shouldInterchange = |
1172 | 0 | isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix); |
1173 | 0 | } |
1174 | 0 | if (!shouldInterchange.has_value()) { |
1175 | 0 | ORE->emit([&]() { |
1176 | 0 | return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable", |
1177 | 0 | InnerLoop->getStartLoc(), |
1178 | 0 | InnerLoop->getHeader()) |
1179 | 0 | << "Insufficient information to calculate the cost of loop for " |
1180 | 0 | "interchange."; |
1181 | 0 | }); |
1182 | 0 | return false; |
1183 | 0 | } else if (!shouldInterchange.value()) { |
1184 | 0 | ORE->emit([&]() { |
1185 | 0 | return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable", |
1186 | 0 | InnerLoop->getStartLoc(), |
1187 | 0 | InnerLoop->getHeader()) |
1188 | 0 | << "Interchanging loops is not considered to improve cache " |
1189 | 0 | "locality nor vectorization."; |
1190 | 0 | }); |
1191 | 0 | return false; |
1192 | 0 | } |
1193 | 0 | return true; |
1194 | 0 | } |
1195 | | |
1196 | | void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop, |
1197 | 0 | Loop *InnerLoop) { |
1198 | 0 | for (Loop *L : *OuterLoop) |
1199 | 0 | if (L == InnerLoop) { |
1200 | 0 | OuterLoop->removeChildLoop(L); |
1201 | 0 | return; |
1202 | 0 | } |
1203 | 0 | llvm_unreachable("Couldn't find loop"); |
1204 | 0 | } |
1205 | | |
1206 | | /// Update LoopInfo, after interchanging. NewInner and NewOuter refer to the |
1207 | | /// new inner and outer loop after interchanging: NewInner is the original |
1208 | | /// outer loop and NewOuter is the original inner loop. |
1209 | | /// |
1210 | | /// Before interchanging, we have the following structure |
1211 | | /// Outer preheader |
1212 | | // Outer header |
1213 | | // Inner preheader |
1214 | | // Inner header |
1215 | | // Inner body |
1216 | | // Inner latch |
1217 | | // outer bbs |
1218 | | // Outer latch |
1219 | | // |
1220 | | // After interchanging: |
1221 | | // Inner preheader |
1222 | | // Inner header |
1223 | | // Outer preheader |
1224 | | // Outer header |
1225 | | // Inner body |
1226 | | // outer bbs |
1227 | | // Outer latch |
1228 | | // Inner latch |
1229 | | void LoopInterchangeTransform::restructureLoops( |
1230 | | Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader, |
1231 | 0 | BasicBlock *OrigOuterPreHeader) { |
1232 | 0 | Loop *OuterLoopParent = OuterLoop->getParentLoop(); |
1233 | | // The original inner loop preheader moves from the new inner loop to |
1234 | | // the parent loop, if there is one. |
1235 | 0 | NewInner->removeBlockFromLoop(OrigInnerPreHeader); |
1236 | 0 | LI->changeLoopFor(OrigInnerPreHeader, OuterLoopParent); |
1237 | | |
1238 | | // Switch the loop levels. |
1239 | 0 | if (OuterLoopParent) { |
1240 | | // Remove the loop from its parent loop. |
1241 | 0 | removeChildLoop(OuterLoopParent, NewInner); |
1242 | 0 | removeChildLoop(NewInner, NewOuter); |
1243 | 0 | OuterLoopParent->addChildLoop(NewOuter); |
1244 | 0 | } else { |
1245 | 0 | removeChildLoop(NewInner, NewOuter); |
1246 | 0 | LI->changeTopLevelLoop(NewInner, NewOuter); |
1247 | 0 | } |
1248 | 0 | while (!NewOuter->isInnermost()) |
1249 | 0 | NewInner->addChildLoop(NewOuter->removeChildLoop(NewOuter->begin())); |
1250 | 0 | NewOuter->addChildLoop(NewInner); |
1251 | | |
1252 | | // BBs from the original inner loop. |
1253 | 0 | SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->blocks()); |
1254 | | |
1255 | | // Add BBs from the original outer loop to the original inner loop (excluding |
1256 | | // BBs already in inner loop) |
1257 | 0 | for (BasicBlock *BB : NewInner->blocks()) |
1258 | 0 | if (LI->getLoopFor(BB) == NewInner) |
1259 | 0 | NewOuter->addBlockEntry(BB); |
1260 | | |
1261 | | // Now remove inner loop header and latch from the new inner loop and move |
1262 | | // other BBs (the loop body) to the new inner loop. |
1263 | 0 | BasicBlock *OuterHeader = NewOuter->getHeader(); |
1264 | 0 | BasicBlock *OuterLatch = NewOuter->getLoopLatch(); |
1265 | 0 | for (BasicBlock *BB : OrigInnerBBs) { |
1266 | | // Nothing will change for BBs in child loops. |
1267 | 0 | if (LI->getLoopFor(BB) != NewOuter) |
1268 | 0 | continue; |
1269 | | // Remove the new outer loop header and latch from the new inner loop. |
1270 | 0 | if (BB == OuterHeader || BB == OuterLatch) |
1271 | 0 | NewInner->removeBlockFromLoop(BB); |
1272 | 0 | else |
1273 | 0 | LI->changeLoopFor(BB, NewInner); |
1274 | 0 | } |
1275 | | |
1276 | | // The preheader of the original outer loop becomes part of the new |
1277 | | // outer loop. |
1278 | 0 | NewOuter->addBlockEntry(OrigOuterPreHeader); |
1279 | 0 | LI->changeLoopFor(OrigOuterPreHeader, NewOuter); |
1280 | | |
1281 | | // Tell SE that we move the loops around. |
1282 | 0 | SE->forgetLoop(NewOuter); |
1283 | 0 | } |
1284 | | |
1285 | 0 | bool LoopInterchangeTransform::transform() { |
1286 | 0 | bool Transformed = false; |
1287 | |
|
1288 | 0 | if (InnerLoop->getSubLoops().empty()) { |
1289 | 0 | BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); |
1290 | 0 | LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n"); |
1291 | 0 | auto &InductionPHIs = LIL.getInnerLoopInductions(); |
1292 | 0 | if (InductionPHIs.empty()) { |
1293 | 0 | LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n"); |
1294 | 0 | return false; |
1295 | 0 | } |
1296 | | |
1297 | 0 | SmallVector<Instruction *, 8> InnerIndexVarList; |
1298 | 0 | for (PHINode *CurInductionPHI : InductionPHIs) { |
1299 | 0 | if (CurInductionPHI->getIncomingBlock(0) == InnerLoopPreHeader) |
1300 | 0 | InnerIndexVarList.push_back( |
1301 | 0 | dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(1))); |
1302 | 0 | else |
1303 | 0 | InnerIndexVarList.push_back( |
1304 | 0 | dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(0))); |
1305 | 0 | } |
1306 | | |
1307 | | // Create a new latch block for the inner loop. We split at the |
1308 | | // current latch's terminator and then move the condition and all |
1309 | | // operands that are not either loop-invariant or the induction PHI into the |
1310 | | // new latch block. |
1311 | 0 | BasicBlock *NewLatch = |
1312 | 0 | SplitBlock(InnerLoop->getLoopLatch(), |
1313 | 0 | InnerLoop->getLoopLatch()->getTerminator(), DT, LI); |
1314 | |
|
1315 | 0 | SmallSetVector<Instruction *, 4> WorkList; |
1316 | 0 | unsigned i = 0; |
1317 | 0 | auto MoveInstructions = [&i, &WorkList, this, &InductionPHIs, NewLatch]() { |
1318 | 0 | for (; i < WorkList.size(); i++) { |
1319 | | // Duplicate instruction and move it the new latch. Update uses that |
1320 | | // have been moved. |
1321 | 0 | Instruction *NewI = WorkList[i]->clone(); |
1322 | 0 | NewI->insertBefore(NewLatch->getFirstNonPHI()); |
1323 | 0 | assert(!NewI->mayHaveSideEffects() && |
1324 | 0 | "Moving instructions with side-effects may change behavior of " |
1325 | 0 | "the loop nest!"); |
1326 | 0 | for (Use &U : llvm::make_early_inc_range(WorkList[i]->uses())) { |
1327 | 0 | Instruction *UserI = cast<Instruction>(U.getUser()); |
1328 | 0 | if (!InnerLoop->contains(UserI->getParent()) || |
1329 | 0 | UserI->getParent() == NewLatch || |
1330 | 0 | llvm::is_contained(InductionPHIs, UserI)) |
1331 | 0 | U.set(NewI); |
1332 | 0 | } |
1333 | | // Add operands of moved instruction to the worklist, except if they are |
1334 | | // outside the inner loop or are the induction PHI. |
1335 | 0 | for (Value *Op : WorkList[i]->operands()) { |
1336 | 0 | Instruction *OpI = dyn_cast<Instruction>(Op); |
1337 | 0 | if (!OpI || |
1338 | 0 | this->LI->getLoopFor(OpI->getParent()) != this->InnerLoop || |
1339 | 0 | llvm::is_contained(InductionPHIs, OpI)) |
1340 | 0 | continue; |
1341 | 0 | WorkList.insert(OpI); |
1342 | 0 | } |
1343 | 0 | } |
1344 | 0 | }; |
1345 | | |
1346 | | // FIXME: Should we interchange when we have a constant condition? |
1347 | 0 | Instruction *CondI = dyn_cast<Instruction>( |
1348 | 0 | cast<BranchInst>(InnerLoop->getLoopLatch()->getTerminator()) |
1349 | 0 | ->getCondition()); |
1350 | 0 | if (CondI) |
1351 | 0 | WorkList.insert(CondI); |
1352 | 0 | MoveInstructions(); |
1353 | 0 | for (Instruction *InnerIndexVar : InnerIndexVarList) |
1354 | 0 | WorkList.insert(cast<Instruction>(InnerIndexVar)); |
1355 | 0 | MoveInstructions(); |
1356 | 0 | } |
1357 | | |
1358 | | // Ensure the inner loop phi nodes have a separate basic block. |
1359 | 0 | BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); |
1360 | 0 | if (InnerLoopHeader->getFirstNonPHI() != InnerLoopHeader->getTerminator()) { |
1361 | 0 | SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI); |
1362 | 0 | LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n"); |
1363 | 0 | } |
1364 | | |
1365 | | // Instructions in the original inner loop preheader may depend on values |
1366 | | // defined in the outer loop header. Move them there, because the original |
1367 | | // inner loop preheader will become the entry into the interchanged loop nest. |
1368 | | // Currently we move all instructions and rely on LICM to move invariant |
1369 | | // instructions outside the loop nest. |
1370 | 0 | BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); |
1371 | 0 | BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); |
1372 | 0 | if (InnerLoopPreHeader != OuterLoopHeader) { |
1373 | 0 | SmallPtrSet<Instruction *, 4> NeedsMoving; |
1374 | 0 | for (Instruction &I : |
1375 | 0 | make_early_inc_range(make_range(InnerLoopPreHeader->begin(), |
1376 | 0 | std::prev(InnerLoopPreHeader->end())))) |
1377 | 0 | I.moveBeforePreserving(OuterLoopHeader->getTerminator()); |
1378 | 0 | } |
1379 | |
|
1380 | 0 | Transformed |= adjustLoopLinks(); |
1381 | 0 | if (!Transformed) { |
1382 | 0 | LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n"); |
1383 | 0 | return false; |
1384 | 0 | } |
1385 | | |
1386 | 0 | return true; |
1387 | 0 | } |
1388 | | |
1389 | | /// \brief Move all instructions except the terminator from FromBB right before |
1390 | | /// InsertBefore |
1391 | 0 | static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) { |
1392 | 0 | BasicBlock *ToBB = InsertBefore->getParent(); |
1393 | |
|
1394 | 0 | ToBB->splice(InsertBefore->getIterator(), FromBB, FromBB->begin(), |
1395 | 0 | FromBB->getTerminator()->getIterator()); |
1396 | 0 | } |
1397 | | |
1398 | | /// Swap instructions between \p BB1 and \p BB2 but keep terminators intact. |
1399 | 0 | static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2) { |
1400 | | // Save all non-terminator instructions of BB1 into TempInstrs and unlink them |
1401 | | // from BB1 afterwards. |
1402 | 0 | auto Iter = map_range(*BB1, [](Instruction &I) { return &I; }); |
1403 | 0 | SmallVector<Instruction *, 4> TempInstrs(Iter.begin(), std::prev(Iter.end())); |
1404 | 0 | for (Instruction *I : TempInstrs) |
1405 | 0 | I->removeFromParent(); |
1406 | | |
1407 | | // Move instructions from BB2 to BB1. |
1408 | 0 | moveBBContents(BB2, BB1->getTerminator()); |
1409 | | |
1410 | | // Move instructions from TempInstrs to BB2. |
1411 | 0 | for (Instruction *I : TempInstrs) |
1412 | 0 | I->insertBefore(BB2->getTerminator()); |
1413 | 0 | } |
1414 | | |
1415 | | // Update BI to jump to NewBB instead of OldBB. Records updates to the |
1416 | | // dominator tree in DTUpdates. If \p MustUpdateOnce is true, assert that |
1417 | | // \p OldBB is exactly once in BI's successor list. |
1418 | | static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB, |
1419 | | BasicBlock *NewBB, |
1420 | | std::vector<DominatorTree::UpdateType> &DTUpdates, |
1421 | 0 | bool MustUpdateOnce = true) { |
1422 | 0 | assert((!MustUpdateOnce || |
1423 | 0 | llvm::count_if(successors(BI), |
1424 | 0 | [OldBB](BasicBlock *BB) { |
1425 | 0 | return BB == OldBB; |
1426 | 0 | }) == 1) && "BI must jump to OldBB exactly once."); |
1427 | 0 | bool Changed = false; |
1428 | 0 | for (Use &Op : BI->operands()) |
1429 | 0 | if (Op == OldBB) { |
1430 | 0 | Op.set(NewBB); |
1431 | 0 | Changed = true; |
1432 | 0 | } |
1433 | |
|
1434 | 0 | if (Changed) { |
1435 | 0 | DTUpdates.push_back( |
1436 | 0 | {DominatorTree::UpdateKind::Insert, BI->getParent(), NewBB}); |
1437 | 0 | DTUpdates.push_back( |
1438 | 0 | {DominatorTree::UpdateKind::Delete, BI->getParent(), OldBB}); |
1439 | 0 | } |
1440 | 0 | assert(Changed && "Expected a successor to be updated"); |
1441 | 0 | } |
1442 | | |
1443 | | // Move Lcssa PHIs to the right place. |
1444 | | static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader, |
1445 | | BasicBlock *InnerLatch, BasicBlock *OuterHeader, |
1446 | | BasicBlock *OuterLatch, BasicBlock *OuterExit, |
1447 | 0 | Loop *InnerLoop, LoopInfo *LI) { |
1448 | | |
1449 | | // Deal with LCSSA PHI nodes in the exit block of the inner loop, that are |
1450 | | // defined either in the header or latch. Those blocks will become header and |
1451 | | // latch of the new outer loop, and the only possible users can PHI nodes |
1452 | | // in the exit block of the loop nest or the outer loop header (reduction |
1453 | | // PHIs, in that case, the incoming value must be defined in the inner loop |
1454 | | // header). We can just substitute the user with the incoming value and remove |
1455 | | // the PHI. |
1456 | 0 | for (PHINode &P : make_early_inc_range(InnerExit->phis())) { |
1457 | 0 | assert(P.getNumIncomingValues() == 1 && |
1458 | 0 | "Only loops with a single exit are supported!"); |
1459 | | |
1460 | | // Incoming values are guaranteed be instructions currently. |
1461 | 0 | auto IncI = cast<Instruction>(P.getIncomingValueForBlock(InnerLatch)); |
1462 | | // In case of multi-level nested loops, follow LCSSA to find the incoming |
1463 | | // value defined from the innermost loop. |
1464 | 0 | auto IncIInnerMost = cast<Instruction>(followLCSSA(IncI)); |
1465 | | // Skip phis with incoming values from the inner loop body, excluding the |
1466 | | // header and latch. |
1467 | 0 | if (IncIInnerMost->getParent() != InnerLatch && |
1468 | 0 | IncIInnerMost->getParent() != InnerHeader) |
1469 | 0 | continue; |
1470 | | |
1471 | 0 | assert(all_of(P.users(), |
1472 | 0 | [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { |
1473 | 0 | return (cast<PHINode>(U)->getParent() == OuterHeader && |
1474 | 0 | IncI->getParent() == InnerHeader) || |
1475 | 0 | cast<PHINode>(U)->getParent() == OuterExit; |
1476 | 0 | }) && |
1477 | 0 | "Can only replace phis iff the uses are in the loop nest exit or " |
1478 | 0 | "the incoming value is defined in the inner header (it will " |
1479 | 0 | "dominate all loop blocks after interchanging)"); |
1480 | 0 | P.replaceAllUsesWith(IncI); |
1481 | 0 | P.eraseFromParent(); |
1482 | 0 | } |
1483 | |
|
1484 | 0 | SmallVector<PHINode *, 8> LcssaInnerExit; |
1485 | 0 | for (PHINode &P : InnerExit->phis()) |
1486 | 0 | LcssaInnerExit.push_back(&P); |
1487 | |
|
1488 | 0 | SmallVector<PHINode *, 8> LcssaInnerLatch; |
1489 | 0 | for (PHINode &P : InnerLatch->phis()) |
1490 | 0 | LcssaInnerLatch.push_back(&P); |
1491 | | |
1492 | | // Lcssa PHIs for values used outside the inner loop are in InnerExit. |
1493 | | // If a PHI node has users outside of InnerExit, it has a use outside the |
1494 | | // interchanged loop and we have to preserve it. We move these to |
1495 | | // InnerLatch, which will become the new exit block for the innermost |
1496 | | // loop after interchanging. |
1497 | 0 | for (PHINode *P : LcssaInnerExit) |
1498 | 0 | P->moveBefore(InnerLatch->getFirstNonPHI()); |
1499 | | |
1500 | | // If the inner loop latch contains LCSSA PHIs, those come from a child loop |
1501 | | // and we have to move them to the new inner latch. |
1502 | 0 | for (PHINode *P : LcssaInnerLatch) |
1503 | 0 | P->moveBefore(InnerExit->getFirstNonPHI()); |
1504 | | |
1505 | | // Deal with LCSSA PHI nodes in the loop nest exit block. For PHIs that have |
1506 | | // incoming values defined in the outer loop, we have to add a new PHI |
1507 | | // in the inner loop latch, which became the exit block of the outer loop, |
1508 | | // after interchanging. |
1509 | 0 | if (OuterExit) { |
1510 | 0 | for (PHINode &P : OuterExit->phis()) { |
1511 | 0 | if (P.getNumIncomingValues() != 1) |
1512 | 0 | continue; |
1513 | | // Skip Phis with incoming values defined in the inner loop. Those should |
1514 | | // already have been updated. |
1515 | 0 | auto I = dyn_cast<Instruction>(P.getIncomingValue(0)); |
1516 | 0 | if (!I || LI->getLoopFor(I->getParent()) == InnerLoop) |
1517 | 0 | continue; |
1518 | | |
1519 | 0 | PHINode *NewPhi = dyn_cast<PHINode>(P.clone()); |
1520 | 0 | NewPhi->setIncomingValue(0, P.getIncomingValue(0)); |
1521 | 0 | NewPhi->setIncomingBlock(0, OuterLatch); |
1522 | | // We might have incoming edges from other BBs, i.e., the original outer |
1523 | | // header. |
1524 | 0 | for (auto *Pred : predecessors(InnerLatch)) { |
1525 | 0 | if (Pred == OuterLatch) |
1526 | 0 | continue; |
1527 | 0 | NewPhi->addIncoming(P.getIncomingValue(0), Pred); |
1528 | 0 | } |
1529 | 0 | NewPhi->insertBefore(InnerLatch->getFirstNonPHI()); |
1530 | 0 | P.setIncomingValue(0, NewPhi); |
1531 | 0 | } |
1532 | 0 | } |
1533 | | |
1534 | | // Now adjust the incoming blocks for the LCSSA PHIs. |
1535 | | // For PHIs moved from Inner's exit block, we need to replace Inner's latch |
1536 | | // with the new latch. |
1537 | 0 | InnerLatch->replacePhiUsesWith(InnerLatch, OuterLatch); |
1538 | 0 | } |
1539 | | |
1540 | 0 | bool LoopInterchangeTransform::adjustLoopBranches() { |
1541 | 0 | LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n"); |
1542 | 0 | std::vector<DominatorTree::UpdateType> DTUpdates; |
1543 | |
|
1544 | 0 | BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader(); |
1545 | 0 | BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); |
1546 | |
|
1547 | 0 | assert(OuterLoopPreHeader != OuterLoop->getHeader() && |
1548 | 0 | InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader && |
1549 | 0 | InnerLoopPreHeader && "Guaranteed by loop-simplify form"); |
1550 | | // Ensure that both preheaders do not contain PHI nodes and have single |
1551 | | // predecessors. This allows us to move them easily. We use |
1552 | | // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing |
1553 | | // preheaders do not satisfy those conditions. |
1554 | 0 | if (isa<PHINode>(OuterLoopPreHeader->begin()) || |
1555 | 0 | !OuterLoopPreHeader->getUniquePredecessor()) |
1556 | 0 | OuterLoopPreHeader = |
1557 | 0 | InsertPreheaderForLoop(OuterLoop, DT, LI, nullptr, true); |
1558 | 0 | if (InnerLoopPreHeader == OuterLoop->getHeader()) |
1559 | 0 | InnerLoopPreHeader = |
1560 | 0 | InsertPreheaderForLoop(InnerLoop, DT, LI, nullptr, true); |
1561 | | |
1562 | | // Adjust the loop preheader |
1563 | 0 | BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); |
1564 | 0 | BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); |
1565 | 0 | BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); |
1566 | 0 | BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); |
1567 | 0 | BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor(); |
1568 | 0 | BasicBlock *InnerLoopLatchPredecessor = |
1569 | 0 | InnerLoopLatch->getUniquePredecessor(); |
1570 | 0 | BasicBlock *InnerLoopLatchSuccessor; |
1571 | 0 | BasicBlock *OuterLoopLatchSuccessor; |
1572 | |
|
1573 | 0 | BranchInst *OuterLoopLatchBI = |
1574 | 0 | dyn_cast<BranchInst>(OuterLoopLatch->getTerminator()); |
1575 | 0 | BranchInst *InnerLoopLatchBI = |
1576 | 0 | dyn_cast<BranchInst>(InnerLoopLatch->getTerminator()); |
1577 | 0 | BranchInst *OuterLoopHeaderBI = |
1578 | 0 | dyn_cast<BranchInst>(OuterLoopHeader->getTerminator()); |
1579 | 0 | BranchInst *InnerLoopHeaderBI = |
1580 | 0 | dyn_cast<BranchInst>(InnerLoopHeader->getTerminator()); |
1581 | |
|
1582 | 0 | if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor || |
1583 | 0 | !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI || |
1584 | 0 | !InnerLoopHeaderBI) |
1585 | 0 | return false; |
1586 | | |
1587 | 0 | BranchInst *InnerLoopLatchPredecessorBI = |
1588 | 0 | dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator()); |
1589 | 0 | BranchInst *OuterLoopPredecessorBI = |
1590 | 0 | dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator()); |
1591 | |
|
1592 | 0 | if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI) |
1593 | 0 | return false; |
1594 | 0 | BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor(); |
1595 | 0 | if (!InnerLoopHeaderSuccessor) |
1596 | 0 | return false; |
1597 | | |
1598 | | // Adjust Loop Preheader and headers. |
1599 | | // The branches in the outer loop predecessor and the outer loop header can |
1600 | | // be unconditional branches or conditional branches with duplicates. Consider |
1601 | | // this when updating the successors. |
1602 | 0 | updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader, |
1603 | 0 | InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false); |
1604 | | // The outer loop header might or might not branch to the outer latch. |
1605 | | // We are guaranteed to branch to the inner loop preheader. |
1606 | 0 | if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch)) { |
1607 | | // In this case the outerLoopHeader should branch to the InnerLoopLatch. |
1608 | 0 | updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, InnerLoopLatch, |
1609 | 0 | DTUpdates, |
1610 | 0 | /*MustUpdateOnce=*/false); |
1611 | 0 | } |
1612 | 0 | updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader, |
1613 | 0 | InnerLoopHeaderSuccessor, DTUpdates, |
1614 | 0 | /*MustUpdateOnce=*/false); |
1615 | | |
1616 | | // Adjust reduction PHI's now that the incoming block has changed. |
1617 | 0 | InnerLoopHeaderSuccessor->replacePhiUsesWith(InnerLoopHeader, |
1618 | 0 | OuterLoopHeader); |
1619 | |
|
1620 | 0 | updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor, |
1621 | 0 | OuterLoopPreHeader, DTUpdates); |
1622 | | |
1623 | | // -------------Adjust loop latches----------- |
1624 | 0 | if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader) |
1625 | 0 | InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1); |
1626 | 0 | else |
1627 | 0 | InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0); |
1628 | |
|
1629 | 0 | updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch, |
1630 | 0 | InnerLoopLatchSuccessor, DTUpdates); |
1631 | |
|
1632 | 0 | if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader) |
1633 | 0 | OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1); |
1634 | 0 | else |
1635 | 0 | OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0); |
1636 | |
|
1637 | 0 | updateSuccessor(InnerLoopLatchBI, InnerLoopLatchSuccessor, |
1638 | 0 | OuterLoopLatchSuccessor, DTUpdates); |
1639 | 0 | updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch, |
1640 | 0 | DTUpdates); |
1641 | |
|
1642 | 0 | DT->applyUpdates(DTUpdates); |
1643 | 0 | restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader, |
1644 | 0 | OuterLoopPreHeader); |
1645 | |
|
1646 | 0 | moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch, |
1647 | 0 | OuterLoopHeader, OuterLoopLatch, InnerLoop->getExitBlock(), |
1648 | 0 | InnerLoop, LI); |
1649 | | // For PHIs in the exit block of the outer loop, outer's latch has been |
1650 | | // replaced by Inners'. |
1651 | 0 | OuterLoopLatchSuccessor->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch); |
1652 | |
|
1653 | 0 | auto &OuterInnerReductions = LIL.getOuterInnerReductions(); |
1654 | | // Now update the reduction PHIs in the inner and outer loop headers. |
1655 | 0 | SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs; |
1656 | 0 | for (PHINode &PHI : InnerLoopHeader->phis()) |
1657 | 0 | if (OuterInnerReductions.contains(&PHI)) |
1658 | 0 | InnerLoopPHIs.push_back(&PHI); |
1659 | |
|
1660 | 0 | for (PHINode &PHI : OuterLoopHeader->phis()) |
1661 | 0 | if (OuterInnerReductions.contains(&PHI)) |
1662 | 0 | OuterLoopPHIs.push_back(&PHI); |
1663 | | |
1664 | | // Now move the remaining reduction PHIs from outer to inner loop header and |
1665 | | // vice versa. The PHI nodes must be part of a reduction across the inner and |
1666 | | // outer loop and all the remains to do is and updating the incoming blocks. |
1667 | 0 | for (PHINode *PHI : OuterLoopPHIs) { |
1668 | 0 | LLVM_DEBUG(dbgs() << "Outer loop reduction PHIs:\n"; PHI->dump();); |
1669 | 0 | PHI->moveBefore(InnerLoopHeader->getFirstNonPHI()); |
1670 | 0 | assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node"); |
1671 | 0 | } |
1672 | 0 | for (PHINode *PHI : InnerLoopPHIs) { |
1673 | 0 | LLVM_DEBUG(dbgs() << "Inner loop reduction PHIs:\n"; PHI->dump();); |
1674 | 0 | PHI->moveBefore(OuterLoopHeader->getFirstNonPHI()); |
1675 | 0 | assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node"); |
1676 | 0 | } |
1677 | | |
1678 | | // Update the incoming blocks for moved PHI nodes. |
1679 | 0 | OuterLoopHeader->replacePhiUsesWith(InnerLoopPreHeader, OuterLoopPreHeader); |
1680 | 0 | OuterLoopHeader->replacePhiUsesWith(InnerLoopLatch, OuterLoopLatch); |
1681 | 0 | InnerLoopHeader->replacePhiUsesWith(OuterLoopPreHeader, InnerLoopPreHeader); |
1682 | 0 | InnerLoopHeader->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch); |
1683 | | |
1684 | | // Values defined in the outer loop header could be used in the inner loop |
1685 | | // latch. In that case, we need to create LCSSA phis for them, because after |
1686 | | // interchanging they will be defined in the new inner loop and used in the |
1687 | | // new outer loop. |
1688 | 0 | SmallVector<Instruction *, 4> MayNeedLCSSAPhis; |
1689 | 0 | for (Instruction &I : |
1690 | 0 | make_range(OuterLoopHeader->begin(), std::prev(OuterLoopHeader->end()))) |
1691 | 0 | MayNeedLCSSAPhis.push_back(&I); |
1692 | 0 | formLCSSAForInstructions(MayNeedLCSSAPhis, *DT, *LI, SE); |
1693 | |
|
1694 | 0 | return true; |
1695 | 0 | } |
1696 | | |
1697 | 0 | bool LoopInterchangeTransform::adjustLoopLinks() { |
1698 | | // Adjust all branches in the inner and outer loop. |
1699 | 0 | bool Changed = adjustLoopBranches(); |
1700 | 0 | if (Changed) { |
1701 | | // We have interchanged the preheaders so we need to interchange the data in |
1702 | | // the preheaders as well. This is because the content of the inner |
1703 | | // preheader was previously executed inside the outer loop. |
1704 | 0 | BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader(); |
1705 | 0 | BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); |
1706 | 0 | swapBBContents(OuterLoopPreHeader, InnerLoopPreHeader); |
1707 | 0 | } |
1708 | 0 | return Changed; |
1709 | 0 | } |
1710 | | |
1711 | | PreservedAnalyses LoopInterchangePass::run(LoopNest &LN, |
1712 | | LoopAnalysisManager &AM, |
1713 | | LoopStandardAnalysisResults &AR, |
1714 | 0 | LPMUpdater &U) { |
1715 | 0 | Function &F = *LN.getParent(); |
1716 | |
|
1717 | 0 | DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI); |
1718 | 0 | std::unique_ptr<CacheCost> CC = |
1719 | 0 | CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI); |
1720 | 0 | OptimizationRemarkEmitter ORE(&F); |
1721 | 0 | if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN)) |
1722 | 0 | return PreservedAnalyses::all(); |
1723 | 0 | U.markLoopNestChanged(true); |
1724 | 0 | return getLoopPassPreservedAnalyses(); |
1725 | 0 | } |