/src/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- SimplifyIndVar.cpp - Induction variable simplification ------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file implements induction variable simplification. It does |
10 | | // not define any actual pass or policy, but provides a single function to |
11 | | // simplify a loop's induction variables based on ScalarEvolution. |
12 | | // |
13 | | //===----------------------------------------------------------------------===// |
14 | | |
15 | | #include "llvm/Transforms/Utils/SimplifyIndVar.h" |
16 | | #include "llvm/ADT/SmallVector.h" |
17 | | #include "llvm/ADT/Statistic.h" |
18 | | #include "llvm/Analysis/LoopInfo.h" |
19 | | #include "llvm/IR/Dominators.h" |
20 | | #include "llvm/IR/IRBuilder.h" |
21 | | #include "llvm/IR/Instructions.h" |
22 | | #include "llvm/IR/IntrinsicInst.h" |
23 | | #include "llvm/IR/PatternMatch.h" |
24 | | #include "llvm/Support/Debug.h" |
25 | | #include "llvm/Support/raw_ostream.h" |
26 | | #include "llvm/Transforms/Utils/Local.h" |
27 | | #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" |
28 | | |
29 | | using namespace llvm; |
30 | | using namespace llvm::PatternMatch; |
31 | | |
32 | 0 | #define DEBUG_TYPE "indvars" |
33 | | |
34 | | STATISTIC(NumElimIdentity, "Number of IV identities eliminated"); |
35 | | STATISTIC(NumElimOperand, "Number of IV operands folded into a use"); |
36 | | STATISTIC(NumFoldedUser, "Number of IV users folded into a constant"); |
37 | | STATISTIC(NumElimRem , "Number of IV remainder operations eliminated"); |
38 | | STATISTIC( |
39 | | NumSimplifiedSDiv, |
40 | | "Number of IV signed division operations converted to unsigned division"); |
41 | | STATISTIC( |
42 | | NumSimplifiedSRem, |
43 | | "Number of IV signed remainder operations converted to unsigned remainder"); |
44 | | STATISTIC(NumElimCmp , "Number of IV comparisons eliminated"); |
45 | | |
46 | | namespace { |
47 | | /// This is a utility for simplifying induction variables |
48 | | /// based on ScalarEvolution. It is the primary instrument of the |
49 | | /// IndvarSimplify pass, but it may also be directly invoked to cleanup after |
50 | | /// other loop passes that preserve SCEV. |
51 | | class SimplifyIndvar { |
52 | | Loop *L; |
53 | | LoopInfo *LI; |
54 | | ScalarEvolution *SE; |
55 | | DominatorTree *DT; |
56 | | const TargetTransformInfo *TTI; |
57 | | SCEVExpander &Rewriter; |
58 | | SmallVectorImpl<WeakTrackingVH> &DeadInsts; |
59 | | |
60 | | bool Changed = false; |
61 | | |
62 | | public: |
63 | | SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT, |
64 | | LoopInfo *LI, const TargetTransformInfo *TTI, |
65 | | SCEVExpander &Rewriter, |
66 | | SmallVectorImpl<WeakTrackingVH> &Dead) |
67 | | : L(Loop), LI(LI), SE(SE), DT(DT), TTI(TTI), Rewriter(Rewriter), |
68 | 76.3k | DeadInsts(Dead) { |
69 | 76.3k | assert(LI && "IV simplification requires LoopInfo"); |
70 | 76.3k | } |
71 | | |
72 | 76.3k | bool hasChanged() const { return Changed; } |
73 | | |
74 | | /// Iteratively perform simplification on a worklist of users of the |
75 | | /// specified induction variable. This is the top-level driver that applies |
76 | | /// all simplifications to users of an IV. |
77 | | void simplifyUsers(PHINode *CurrIV, IVVisitor *V = nullptr); |
78 | | |
79 | | Value *foldIVUser(Instruction *UseInst, Instruction *IVOperand); |
80 | | |
81 | | bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand); |
82 | | bool replaceIVUserWithLoopInvariant(Instruction *UseInst); |
83 | | bool replaceFloatIVWithIntegerIV(Instruction *UseInst); |
84 | | |
85 | | bool eliminateOverflowIntrinsic(WithOverflowInst *WO); |
86 | | bool eliminateSaturatingIntrinsic(SaturatingInst *SI); |
87 | | bool eliminateTrunc(TruncInst *TI); |
88 | | bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand); |
89 | | bool makeIVComparisonInvariant(ICmpInst *ICmp, Instruction *IVOperand); |
90 | | void eliminateIVComparison(ICmpInst *ICmp, Instruction *IVOperand); |
91 | | void simplifyIVRemainder(BinaryOperator *Rem, Instruction *IVOperand, |
92 | | bool IsSigned); |
93 | | void replaceRemWithNumerator(BinaryOperator *Rem); |
94 | | void replaceRemWithNumeratorOrZero(BinaryOperator *Rem); |
95 | | void replaceSRemWithURem(BinaryOperator *Rem); |
96 | | bool eliminateSDiv(BinaryOperator *SDiv); |
97 | | bool strengthenBinaryOp(BinaryOperator *BO, Instruction *IVOperand); |
98 | | bool strengthenOverflowingOperation(BinaryOperator *OBO, |
99 | | Instruction *IVOperand); |
100 | | bool strengthenRightShift(BinaryOperator *BO, Instruction *IVOperand); |
101 | | }; |
102 | | } |
103 | | |
104 | | /// Find a point in code which dominates all given instructions. We can safely |
105 | | /// assume that, whatever fact we can prove at the found point, this fact is |
106 | | /// also true for each of the given instructions. |
107 | | static Instruction *findCommonDominator(ArrayRef<Instruction *> Instructions, |
108 | 6.66k | DominatorTree &DT) { |
109 | 6.66k | Instruction *CommonDom = nullptr; |
110 | 6.66k | for (auto *Insn : Instructions) |
111 | 8.84k | CommonDom = |
112 | 8.84k | CommonDom ? DT.findNearestCommonDominator(CommonDom, Insn) : Insn; |
113 | 6.66k | assert(CommonDom && "Common dominator not found?"); |
114 | 0 | return CommonDom; |
115 | 6.66k | } |
116 | | |
117 | | /// Fold an IV operand into its use. This removes increments of an |
118 | | /// aligned IV when used by a instruction that ignores the low bits. |
119 | | /// |
120 | | /// IVOperand is guaranteed SCEVable, but UseInst may not be. |
121 | | /// |
122 | | /// Return the operand of IVOperand for this induction variable if IVOperand can |
123 | | /// be folded (in case more folding opportunities have been exposed). |
124 | | /// Otherwise return null. |
125 | 423k | Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) { |
126 | 423k | Value *IVSrc = nullptr; |
127 | 423k | const unsigned OperIdx = 0; |
128 | 423k | const SCEV *FoldedExpr = nullptr; |
129 | 423k | bool MustDropExactFlag = false; |
130 | 423k | switch (UseInst->getOpcode()) { |
131 | 422k | default: |
132 | 422k | return nullptr; |
133 | 1.02k | case Instruction::UDiv: |
134 | 1.52k | case Instruction::LShr: |
135 | | // We're only interested in the case where we know something about |
136 | | // the numerator and have a constant denominator. |
137 | 1.52k | if (IVOperand != UseInst->getOperand(OperIdx) || |
138 | 1.52k | !isa<ConstantInt>(UseInst->getOperand(1))) |
139 | 1.09k | return nullptr; |
140 | | |
141 | | // Attempt to fold a binary operator with constant operand. |
142 | | // e.g. ((I + 1) >> 2) => I >> 2 |
143 | 435 | if (!isa<BinaryOperator>(IVOperand) |
144 | 435 | || !isa<ConstantInt>(IVOperand->getOperand(1))) |
145 | 313 | return nullptr; |
146 | | |
147 | 122 | IVSrc = IVOperand->getOperand(0); |
148 | | // IVSrc must be the (SCEVable) IV, since the other operand is const. |
149 | 122 | assert(SE->isSCEVable(IVSrc->getType()) && "Expect SCEVable IV operand"); |
150 | | |
151 | 0 | ConstantInt *D = cast<ConstantInt>(UseInst->getOperand(1)); |
152 | 122 | if (UseInst->getOpcode() == Instruction::LShr) { |
153 | | // Get a constant for the divisor. See createSCEV. |
154 | 26 | uint32_t BitWidth = cast<IntegerType>(UseInst->getType())->getBitWidth(); |
155 | 26 | if (D->getValue().uge(BitWidth)) |
156 | 4 | return nullptr; |
157 | | |
158 | 22 | D = ConstantInt::get(UseInst->getContext(), |
159 | 22 | APInt::getOneBitSet(BitWidth, D->getZExtValue())); |
160 | 22 | } |
161 | 118 | const auto *LHS = SE->getSCEV(IVSrc); |
162 | 118 | const auto *RHS = SE->getSCEV(D); |
163 | 118 | FoldedExpr = SE->getUDivExpr(LHS, RHS); |
164 | | // We might have 'exact' flag set at this point which will no longer be |
165 | | // correct after we make the replacement. |
166 | 118 | if (UseInst->isExact() && LHS != SE->getMulExpr(FoldedExpr, RHS)) |
167 | 24 | MustDropExactFlag = true; |
168 | 423k | } |
169 | | // We have something that might fold it's operand. Compare SCEVs. |
170 | 118 | if (!SE->isSCEVable(UseInst->getType())) |
171 | 0 | return nullptr; |
172 | | |
173 | | // Bypass the operand if SCEV can prove it has no effect. |
174 | 118 | if (SE->getSCEV(UseInst) != FoldedExpr) |
175 | 118 | return nullptr; |
176 | | |
177 | 0 | LLVM_DEBUG(dbgs() << "INDVARS: Eliminated IV operand: " << *IVOperand |
178 | 0 | << " -> " << *UseInst << '\n'); |
179 | |
|
180 | 0 | UseInst->setOperand(OperIdx, IVSrc); |
181 | 0 | assert(SE->getSCEV(UseInst) == FoldedExpr && "bad SCEV with folded oper"); |
182 | | |
183 | 0 | if (MustDropExactFlag) |
184 | 0 | UseInst->dropPoisonGeneratingFlags(); |
185 | |
|
186 | 0 | ++NumElimOperand; |
187 | 0 | Changed = true; |
188 | 0 | if (IVOperand->use_empty()) |
189 | 0 | DeadInsts.emplace_back(IVOperand); |
190 | 0 | return IVSrc; |
191 | 118 | } |
192 | | |
193 | | bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp, |
194 | 3.70k | Instruction *IVOperand) { |
195 | 3.70k | auto *Preheader = L->getLoopPreheader(); |
196 | 3.70k | if (!Preheader) |
197 | 0 | return false; |
198 | 3.70k | unsigned IVOperIdx = 0; |
199 | 3.70k | ICmpInst::Predicate Pred = ICmp->getPredicate(); |
200 | 3.70k | if (IVOperand != ICmp->getOperand(0)) { |
201 | | // Swapped |
202 | 988 | assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand"); |
203 | 0 | IVOperIdx = 1; |
204 | 988 | Pred = ICmpInst::getSwappedPredicate(Pred); |
205 | 988 | } |
206 | | |
207 | | // Get the SCEVs for the ICmp operands (in the specific context of the |
208 | | // current loop) |
209 | 0 | const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent()); |
210 | 3.70k | const SCEV *S = SE->getSCEVAtScope(ICmp->getOperand(IVOperIdx), ICmpLoop); |
211 | 3.70k | const SCEV *X = SE->getSCEVAtScope(ICmp->getOperand(1 - IVOperIdx), ICmpLoop); |
212 | 3.70k | auto LIP = SE->getLoopInvariantPredicate(Pred, S, X, L, ICmp); |
213 | 3.70k | if (!LIP) |
214 | 3.57k | return false; |
215 | 131 | ICmpInst::Predicate InvariantPredicate = LIP->Pred; |
216 | 131 | const SCEV *InvariantLHS = LIP->LHS; |
217 | 131 | const SCEV *InvariantRHS = LIP->RHS; |
218 | | |
219 | | // Do not generate something ridiculous. |
220 | 131 | auto *PHTerm = Preheader->getTerminator(); |
221 | 131 | if (Rewriter.isHighCostExpansion({InvariantLHS, InvariantRHS}, L, |
222 | 131 | 2 * SCEVCheapExpansionBudget, TTI, PHTerm) || |
223 | 131 | !Rewriter.isSafeToExpandAt(InvariantLHS, PHTerm) || |
224 | 131 | !Rewriter.isSafeToExpandAt(InvariantRHS, PHTerm)) |
225 | 0 | return false; |
226 | 131 | auto *NewLHS = |
227 | 131 | Rewriter.expandCodeFor(InvariantLHS, IVOperand->getType(), PHTerm); |
228 | 131 | auto *NewRHS = |
229 | 131 | Rewriter.expandCodeFor(InvariantRHS, IVOperand->getType(), PHTerm); |
230 | 131 | LLVM_DEBUG(dbgs() << "INDVARS: Simplified comparison: " << *ICmp << '\n'); |
231 | 131 | ICmp->setPredicate(InvariantPredicate); |
232 | 131 | ICmp->setOperand(0, NewLHS); |
233 | 131 | ICmp->setOperand(1, NewRHS); |
234 | 131 | return true; |
235 | 131 | } |
236 | | |
237 | | /// SimplifyIVUsers helper for eliminating useless |
238 | | /// comparisons against an induction variable. |
239 | | void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, |
240 | 6.61k | Instruction *IVOperand) { |
241 | 6.61k | unsigned IVOperIdx = 0; |
242 | 6.61k | ICmpInst::Predicate Pred = ICmp->getPredicate(); |
243 | 6.61k | ICmpInst::Predicate OriginalPred = Pred; |
244 | 6.61k | if (IVOperand != ICmp->getOperand(0)) { |
245 | | // Swapped |
246 | 1.70k | assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand"); |
247 | 0 | IVOperIdx = 1; |
248 | 1.70k | Pred = ICmpInst::getSwappedPredicate(Pred); |
249 | 1.70k | } |
250 | | |
251 | | // Get the SCEVs for the ICmp operands (in the specific context of the |
252 | | // current loop) |
253 | 0 | const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent()); |
254 | 6.61k | const SCEV *S = SE->getSCEVAtScope(ICmp->getOperand(IVOperIdx), ICmpLoop); |
255 | 6.61k | const SCEV *X = SE->getSCEVAtScope(ICmp->getOperand(1 - IVOperIdx), ICmpLoop); |
256 | | |
257 | | // If the condition is always true or always false in the given context, |
258 | | // replace it with a constant value. |
259 | 6.61k | SmallVector<Instruction *, 4> Users; |
260 | 6.61k | for (auto *U : ICmp->users()) |
261 | 8.78k | Users.push_back(cast<Instruction>(U)); |
262 | 6.61k | const Instruction *CtxI = findCommonDominator(Users, *DT); |
263 | 6.61k | if (auto Ev = SE->evaluatePredicateAt(Pred, S, X, CtxI)) { |
264 | 2.90k | SE->forgetValue(ICmp); |
265 | 2.90k | ICmp->replaceAllUsesWith(ConstantInt::getBool(ICmp->getContext(), *Ev)); |
266 | 2.90k | DeadInsts.emplace_back(ICmp); |
267 | 2.90k | LLVM_DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); |
268 | 3.70k | } else if (makeIVComparisonInvariant(ICmp, IVOperand)) { |
269 | | // fallthrough to end of function |
270 | 3.57k | } else if (ICmpInst::isSigned(OriginalPred) && |
271 | 3.57k | SE->isKnownNonNegative(S) && SE->isKnownNonNegative(X)) { |
272 | | // If we were unable to make anything above, all we can is to canonicalize |
273 | | // the comparison hoping that it will open the doors for other |
274 | | // optimizations. If we find out that we compare two non-negative values, |
275 | | // we turn the instruction's predicate to its unsigned version. Note that |
276 | | // we cannot rely on Pred here unless we check if we have swapped it. |
277 | 40 | assert(ICmp->getPredicate() == OriginalPred && "Predicate changed?"); |
278 | 40 | LLVM_DEBUG(dbgs() << "INDVARS: Turn to unsigned comparison: " << *ICmp |
279 | 40 | << '\n'); |
280 | 40 | ICmp->setPredicate(ICmpInst::getUnsignedPredicate(OriginalPred)); |
281 | 40 | } else |
282 | 3.53k | return; |
283 | | |
284 | 3.08k | ++NumElimCmp; |
285 | 3.08k | Changed = true; |
286 | 3.08k | } |
287 | | |
288 | 436 | bool SimplifyIndvar::eliminateSDiv(BinaryOperator *SDiv) { |
289 | | // Get the SCEVs for the ICmp operands. |
290 | 436 | auto *N = SE->getSCEV(SDiv->getOperand(0)); |
291 | 436 | auto *D = SE->getSCEV(SDiv->getOperand(1)); |
292 | | |
293 | | // Simplify unnecessary loops away. |
294 | 436 | const Loop *L = LI->getLoopFor(SDiv->getParent()); |
295 | 436 | N = SE->getSCEVAtScope(N, L); |
296 | 436 | D = SE->getSCEVAtScope(D, L); |
297 | | |
298 | | // Replace sdiv by udiv if both of the operands are non-negative |
299 | 436 | if (SE->isKnownNonNegative(N) && SE->isKnownNonNegative(D)) { |
300 | 67 | auto *UDiv = BinaryOperator::Create( |
301 | 67 | BinaryOperator::UDiv, SDiv->getOperand(0), SDiv->getOperand(1), |
302 | 67 | SDiv->getName() + ".udiv", SDiv); |
303 | 67 | UDiv->setIsExact(SDiv->isExact()); |
304 | 67 | SDiv->replaceAllUsesWith(UDiv); |
305 | 67 | LLVM_DEBUG(dbgs() << "INDVARS: Simplified sdiv: " << *SDiv << '\n'); |
306 | 67 | ++NumSimplifiedSDiv; |
307 | 67 | Changed = true; |
308 | 67 | DeadInsts.push_back(SDiv); |
309 | 67 | return true; |
310 | 67 | } |
311 | | |
312 | 369 | return false; |
313 | 436 | } |
314 | | |
315 | | // i %s n -> i %u n if i >= 0 and n >= 0 |
316 | 35 | void SimplifyIndvar::replaceSRemWithURem(BinaryOperator *Rem) { |
317 | 35 | auto *N = Rem->getOperand(0), *D = Rem->getOperand(1); |
318 | 35 | auto *URem = BinaryOperator::Create(BinaryOperator::URem, N, D, |
319 | 35 | Rem->getName() + ".urem", Rem); |
320 | 35 | Rem->replaceAllUsesWith(URem); |
321 | 35 | LLVM_DEBUG(dbgs() << "INDVARS: Simplified srem: " << *Rem << '\n'); |
322 | 35 | ++NumSimplifiedSRem; |
323 | 35 | Changed = true; |
324 | 35 | DeadInsts.emplace_back(Rem); |
325 | 35 | } |
326 | | |
327 | | // i % n --> i if i is in [0,n). |
328 | 132 | void SimplifyIndvar::replaceRemWithNumerator(BinaryOperator *Rem) { |
329 | 132 | Rem->replaceAllUsesWith(Rem->getOperand(0)); |
330 | 132 | LLVM_DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); |
331 | 132 | ++NumElimRem; |
332 | 132 | Changed = true; |
333 | 132 | DeadInsts.emplace_back(Rem); |
334 | 132 | } |
335 | | |
336 | | // (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n). |
337 | 79 | void SimplifyIndvar::replaceRemWithNumeratorOrZero(BinaryOperator *Rem) { |
338 | 79 | auto *T = Rem->getType(); |
339 | 79 | auto *N = Rem->getOperand(0), *D = Rem->getOperand(1); |
340 | 79 | ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ, N, D); |
341 | 79 | SelectInst *Sel = |
342 | 79 | SelectInst::Create(ICmp, ConstantInt::get(T, 0), N, "iv.rem", Rem); |
343 | 79 | Rem->replaceAllUsesWith(Sel); |
344 | 79 | LLVM_DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); |
345 | 79 | ++NumElimRem; |
346 | 79 | Changed = true; |
347 | 79 | DeadInsts.emplace_back(Rem); |
348 | 79 | } |
349 | | |
350 | | /// SimplifyIVUsers helper for eliminating useless remainder operations |
351 | | /// operating on an induction variable or replacing srem by urem. |
352 | | void SimplifyIndvar::simplifyIVRemainder(BinaryOperator *Rem, |
353 | | Instruction *IVOperand, |
354 | 1.35k | bool IsSigned) { |
355 | 1.35k | auto *NValue = Rem->getOperand(0); |
356 | 1.35k | auto *DValue = Rem->getOperand(1); |
357 | | // We're only interested in the case where we know something about |
358 | | // the numerator, unless it is a srem, because we want to replace srem by urem |
359 | | // in general. |
360 | 1.35k | bool UsedAsNumerator = IVOperand == NValue; |
361 | 1.35k | if (!UsedAsNumerator && !IsSigned) |
362 | 196 | return; |
363 | | |
364 | 1.15k | const SCEV *N = SE->getSCEV(NValue); |
365 | | |
366 | | // Simplify unnecessary loops away. |
367 | 1.15k | const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent()); |
368 | 1.15k | N = SE->getSCEVAtScope(N, ICmpLoop); |
369 | | |
370 | 1.15k | bool IsNumeratorNonNegative = !IsSigned || SE->isKnownNonNegative(N); |
371 | | |
372 | | // Do not proceed if the Numerator may be negative |
373 | 1.15k | if (!IsNumeratorNonNegative) |
374 | 320 | return; |
375 | | |
376 | 835 | const SCEV *D = SE->getSCEV(DValue); |
377 | 835 | D = SE->getSCEVAtScope(D, ICmpLoop); |
378 | | |
379 | 835 | if (UsedAsNumerator) { |
380 | 810 | auto LT = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; |
381 | 810 | if (SE->isKnownPredicate(LT, N, D)) { |
382 | 132 | replaceRemWithNumerator(Rem); |
383 | 132 | return; |
384 | 132 | } |
385 | | |
386 | 678 | auto *T = Rem->getType(); |
387 | 678 | const auto *NLessOne = SE->getMinusSCEV(N, SE->getOne(T)); |
388 | 678 | if (SE->isKnownPredicate(LT, NLessOne, D)) { |
389 | 79 | replaceRemWithNumeratorOrZero(Rem); |
390 | 79 | return; |
391 | 79 | } |
392 | 678 | } |
393 | | |
394 | | // Try to replace SRem with URem, if both N and D are known non-negative. |
395 | | // Since we had already check N, we only need to check D now |
396 | 624 | if (!IsSigned || !SE->isKnownNonNegative(D)) |
397 | 589 | return; |
398 | | |
399 | 35 | replaceSRemWithURem(Rem); |
400 | 35 | } |
401 | | |
402 | 0 | bool SimplifyIndvar::eliminateOverflowIntrinsic(WithOverflowInst *WO) { |
403 | 0 | const SCEV *LHS = SE->getSCEV(WO->getLHS()); |
404 | 0 | const SCEV *RHS = SE->getSCEV(WO->getRHS()); |
405 | 0 | if (!SE->willNotOverflow(WO->getBinaryOp(), WO->isSigned(), LHS, RHS)) |
406 | 0 | return false; |
407 | | |
408 | | // Proved no overflow, nuke the overflow check and, if possible, the overflow |
409 | | // intrinsic as well. |
410 | | |
411 | 0 | BinaryOperator *NewResult = BinaryOperator::Create( |
412 | 0 | WO->getBinaryOp(), WO->getLHS(), WO->getRHS(), "", WO); |
413 | |
|
414 | 0 | if (WO->isSigned()) |
415 | 0 | NewResult->setHasNoSignedWrap(true); |
416 | 0 | else |
417 | 0 | NewResult->setHasNoUnsignedWrap(true); |
418 | |
|
419 | 0 | SmallVector<ExtractValueInst *, 4> ToDelete; |
420 | |
|
421 | 0 | for (auto *U : WO->users()) { |
422 | 0 | if (auto *EVI = dyn_cast<ExtractValueInst>(U)) { |
423 | 0 | if (EVI->getIndices()[0] == 1) |
424 | 0 | EVI->replaceAllUsesWith(ConstantInt::getFalse(WO->getContext())); |
425 | 0 | else { |
426 | 0 | assert(EVI->getIndices()[0] == 0 && "Only two possibilities!"); |
427 | 0 | EVI->replaceAllUsesWith(NewResult); |
428 | 0 | } |
429 | 0 | ToDelete.push_back(EVI); |
430 | 0 | } |
431 | 0 | } |
432 | |
|
433 | 0 | for (auto *EVI : ToDelete) |
434 | 0 | EVI->eraseFromParent(); |
435 | |
|
436 | 0 | if (WO->use_empty()) |
437 | 0 | WO->eraseFromParent(); |
438 | |
|
439 | 0 | Changed = true; |
440 | 0 | return true; |
441 | 0 | } |
442 | | |
443 | 0 | bool SimplifyIndvar::eliminateSaturatingIntrinsic(SaturatingInst *SI) { |
444 | 0 | const SCEV *LHS = SE->getSCEV(SI->getLHS()); |
445 | 0 | const SCEV *RHS = SE->getSCEV(SI->getRHS()); |
446 | 0 | if (!SE->willNotOverflow(SI->getBinaryOp(), SI->isSigned(), LHS, RHS)) |
447 | 0 | return false; |
448 | | |
449 | 0 | BinaryOperator *BO = BinaryOperator::Create( |
450 | 0 | SI->getBinaryOp(), SI->getLHS(), SI->getRHS(), SI->getName(), SI); |
451 | 0 | if (SI->isSigned()) |
452 | 0 | BO->setHasNoSignedWrap(); |
453 | 0 | else |
454 | 0 | BO->setHasNoUnsignedWrap(); |
455 | |
|
456 | 0 | SI->replaceAllUsesWith(BO); |
457 | 0 | DeadInsts.emplace_back(SI); |
458 | 0 | Changed = true; |
459 | 0 | return true; |
460 | 0 | } |
461 | | |
462 | 5.75k | bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) { |
463 | | // It is always legal to replace |
464 | | // icmp <pred> i32 trunc(iv), n |
465 | | // with |
466 | | // icmp <pred> i64 sext(trunc(iv)), sext(n), if pred is signed predicate. |
467 | | // Or with |
468 | | // icmp <pred> i64 zext(trunc(iv)), zext(n), if pred is unsigned predicate. |
469 | | // Or with either of these if pred is an equality predicate. |
470 | | // |
471 | | // If we can prove that iv == sext(trunc(iv)) or iv == zext(trunc(iv)) for |
472 | | // every comparison which uses trunc, it means that we can replace each of |
473 | | // them with comparison of iv against sext/zext(n). We no longer need trunc |
474 | | // after that. |
475 | | // |
476 | | // TODO: Should we do this if we can widen *some* comparisons, but not all |
477 | | // of them? Sometimes it is enough to enable other optimizations, but the |
478 | | // trunc instruction will stay in the loop. |
479 | 5.75k | Value *IV = TI->getOperand(0); |
480 | 5.75k | Type *IVTy = IV->getType(); |
481 | 5.75k | const SCEV *IVSCEV = SE->getSCEV(IV); |
482 | 5.75k | const SCEV *TISCEV = SE->getSCEV(TI); |
483 | | |
484 | | // Check if iv == zext(trunc(iv)) and if iv == sext(trunc(iv)). If so, we can |
485 | | // get rid of trunc |
486 | 5.75k | bool DoesSExtCollapse = false; |
487 | 5.75k | bool DoesZExtCollapse = false; |
488 | 5.75k | if (IVSCEV == SE->getSignExtendExpr(TISCEV, IVTy)) |
489 | 4.00k | DoesSExtCollapse = true; |
490 | 5.75k | if (IVSCEV == SE->getZeroExtendExpr(TISCEV, IVTy)) |
491 | 3.45k | DoesZExtCollapse = true; |
492 | | |
493 | | // If neither sext nor zext does collapse, it is not profitable to do any |
494 | | // transform. Bail. |
495 | 5.75k | if (!DoesSExtCollapse && !DoesZExtCollapse) |
496 | 1.01k | return false; |
497 | | |
498 | | // Collect users of the trunc that look like comparisons against invariants. |
499 | | // Bail if we find something different. |
500 | 4.73k | SmallVector<ICmpInst *, 4> ICmpUsers; |
501 | 4.75k | for (auto *U : TI->users()) { |
502 | | // We don't care about users in unreachable blocks. |
503 | 4.75k | if (isa<Instruction>(U) && |
504 | 4.75k | !DT->isReachableFromEntry(cast<Instruction>(U)->getParent())) |
505 | 0 | continue; |
506 | 4.75k | ICmpInst *ICI = dyn_cast<ICmpInst>(U); |
507 | 4.75k | if (!ICI) return false; |
508 | 556 | assert(L->contains(ICI->getParent()) && "LCSSA form broken?"); |
509 | 556 | if (!(ICI->getOperand(0) == TI && L->isLoopInvariant(ICI->getOperand(1))) && |
510 | 556 | !(ICI->getOperand(1) == TI && L->isLoopInvariant(ICI->getOperand(0)))) |
511 | 211 | return false; |
512 | | // If we cannot get rid of trunc, bail. |
513 | 345 | if (ICI->isSigned() && !DoesSExtCollapse) |
514 | 7 | return false; |
515 | 338 | if (ICI->isUnsigned() && !DoesZExtCollapse) |
516 | 23 | return false; |
517 | | // For equality, either signed or unsigned works. |
518 | 315 | ICmpUsers.push_back(ICI); |
519 | 315 | } |
520 | | |
521 | 303 | auto CanUseZExt = [&](ICmpInst *ICI) { |
522 | | // Unsigned comparison can be widened as unsigned. |
523 | 303 | if (ICI->isUnsigned()) |
524 | 65 | return true; |
525 | | // Is it profitable to do zext? |
526 | 238 | if (!DoesZExtCollapse) |
527 | 15 | return false; |
528 | | // For equality, we can safely zext both parts. |
529 | 223 | if (ICI->isEquality()) |
530 | 219 | return true; |
531 | | // Otherwise we can only use zext when comparing two non-negative or two |
532 | | // negative values. But in practice, we will never pass DoesZExtCollapse |
533 | | // check for a negative value, because zext(trunc(x)) is non-negative. So |
534 | | // it only make sense to check for non-negativity here. |
535 | 4 | const SCEV *SCEVOP1 = SE->getSCEV(ICI->getOperand(0)); |
536 | 4 | const SCEV *SCEVOP2 = SE->getSCEV(ICI->getOperand(1)); |
537 | 4 | return SE->isKnownNonNegative(SCEVOP1) && SE->isKnownNonNegative(SCEVOP2); |
538 | 223 | }; |
539 | | // Replace all comparisons against trunc with comparisons against IV. |
540 | 303 | for (auto *ICI : ICmpUsers) { |
541 | 303 | bool IsSwapped = L->isLoopInvariant(ICI->getOperand(0)); |
542 | 303 | auto *Op1 = IsSwapped ? ICI->getOperand(0) : ICI->getOperand(1); |
543 | 303 | IRBuilder<> Builder(ICI); |
544 | 303 | Value *Ext = nullptr; |
545 | | // For signed/unsigned predicate, replace the old comparison with comparison |
546 | | // of immediate IV against sext/zext of the invariant argument. If we can |
547 | | // use either sext or zext (i.e. we are dealing with equality predicate), |
548 | | // then prefer zext as a more canonical form. |
549 | | // TODO: If we see a signed comparison which can be turned into unsigned, |
550 | | // we can do it here for canonicalization purposes. |
551 | 303 | ICmpInst::Predicate Pred = ICI->getPredicate(); |
552 | 303 | if (IsSwapped) Pred = ICmpInst::getSwappedPredicate(Pred); |
553 | 303 | if (CanUseZExt(ICI)) { |
554 | 285 | assert(DoesZExtCollapse && "Unprofitable zext?"); |
555 | 0 | Ext = Builder.CreateZExt(Op1, IVTy, "zext"); |
556 | 285 | Pred = ICmpInst::getUnsignedPredicate(Pred); |
557 | 285 | } else { |
558 | 18 | assert(DoesSExtCollapse && "Unprofitable sext?"); |
559 | 0 | Ext = Builder.CreateSExt(Op1, IVTy, "sext"); |
560 | 18 | assert(Pred == ICmpInst::getSignedPredicate(Pred) && "Must be signed!"); |
561 | 18 | } |
562 | 0 | bool Changed; |
563 | 303 | L->makeLoopInvariant(Ext, Changed); |
564 | 303 | (void)Changed; |
565 | 303 | auto *NewCmp = Builder.CreateICmp(Pred, IV, Ext); |
566 | 303 | ICI->replaceAllUsesWith(NewCmp); |
567 | 303 | DeadInsts.emplace_back(ICI); |
568 | 303 | } |
569 | | |
570 | | // Trunc no longer needed. |
571 | 302 | TI->replaceAllUsesWith(PoisonValue::get(TI->getType())); |
572 | 302 | DeadInsts.emplace_back(TI); |
573 | 302 | return true; |
574 | 4.73k | } |
575 | | |
576 | | /// Eliminate an operation that consumes a simple IV and has no observable |
577 | | /// side-effect given the range of IV values. IVOperand is guaranteed SCEVable, |
578 | | /// but UseInst may not be. |
579 | | bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst, |
580 | 423k | Instruction *IVOperand) { |
581 | 423k | if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) { |
582 | 6.61k | eliminateIVComparison(ICmp, IVOperand); |
583 | 6.61k | return true; |
584 | 6.61k | } |
585 | 417k | if (BinaryOperator *Bin = dyn_cast<BinaryOperator>(UseInst)) { |
586 | 261k | bool IsSRem = Bin->getOpcode() == Instruction::SRem; |
587 | 261k | if (IsSRem || Bin->getOpcode() == Instruction::URem) { |
588 | 1.35k | simplifyIVRemainder(Bin, IVOperand, IsSRem); |
589 | 1.35k | return true; |
590 | 1.35k | } |
591 | | |
592 | 260k | if (Bin->getOpcode() == Instruction::SDiv) |
593 | 436 | return eliminateSDiv(Bin); |
594 | 260k | } |
595 | | |
596 | 415k | if (auto *WO = dyn_cast<WithOverflowInst>(UseInst)) |
597 | 0 | if (eliminateOverflowIntrinsic(WO)) |
598 | 0 | return true; |
599 | | |
600 | 415k | if (auto *SI = dyn_cast<SaturatingInst>(UseInst)) |
601 | 0 | if (eliminateSaturatingIntrinsic(SI)) |
602 | 0 | return true; |
603 | | |
604 | 415k | if (auto *TI = dyn_cast<TruncInst>(UseInst)) |
605 | 5.75k | if (eliminateTrunc(TI)) |
606 | 302 | return true; |
607 | | |
608 | 415k | if (eliminateIdentitySCEV(UseInst, IVOperand)) |
609 | 3.32k | return true; |
610 | | |
611 | 411k | return false; |
612 | 415k | } |
613 | | |
614 | 1.33k | static Instruction *GetLoopInvariantInsertPosition(Loop *L, Instruction *Hint) { |
615 | 1.33k | if (auto *BB = L->getLoopPreheader()) |
616 | 1.33k | return BB->getTerminator(); |
617 | | |
618 | 0 | return Hint; |
619 | 1.33k | } |
620 | | |
621 | | /// Replace the UseInst with a loop invariant expression if it is safe. |
622 | 431k | bool SimplifyIndvar::replaceIVUserWithLoopInvariant(Instruction *I) { |
623 | 431k | if (!SE->isSCEVable(I->getType())) |
624 | 5.55k | return false; |
625 | | |
626 | | // Get the symbolic expression for this instruction. |
627 | 426k | const SCEV *S = SE->getSCEV(I); |
628 | | |
629 | 426k | if (!SE->isLoopInvariant(S, L)) |
630 | 424k | return false; |
631 | | |
632 | | // Do not generate something ridiculous even if S is loop invariant. |
633 | 1.52k | if (Rewriter.isHighCostExpansion(S, L, SCEVCheapExpansionBudget, TTI, I)) |
634 | 194 | return false; |
635 | | |
636 | 1.33k | auto *IP = GetLoopInvariantInsertPosition(L, I); |
637 | | |
638 | 1.33k | if (!Rewriter.isSafeToExpandAt(S, IP)) { |
639 | 4 | LLVM_DEBUG(dbgs() << "INDVARS: Can not replace IV user: " << *I |
640 | 4 | << " with non-speculable loop invariant: " << *S << '\n'); |
641 | 4 | return false; |
642 | 4 | } |
643 | | |
644 | 1.33k | auto *Invariant = Rewriter.expandCodeFor(S, I->getType(), IP); |
645 | | |
646 | 1.33k | I->replaceAllUsesWith(Invariant); |
647 | 1.33k | LLVM_DEBUG(dbgs() << "INDVARS: Replace IV user: " << *I |
648 | 1.33k | << " with loop invariant: " << *S << '\n'); |
649 | 1.33k | ++NumFoldedUser; |
650 | 1.33k | Changed = true; |
651 | 1.33k | DeadInsts.emplace_back(I); |
652 | 1.33k | return true; |
653 | 1.33k | } |
654 | | |
655 | | /// Eliminate redundant type cast between integer and float. |
656 | 412k | bool SimplifyIndvar::replaceFloatIVWithIntegerIV(Instruction *UseInst) { |
657 | 412k | if (UseInst->getOpcode() != CastInst::SIToFP && |
658 | 412k | UseInst->getOpcode() != CastInst::UIToFP) |
659 | 412k | return false; |
660 | | |
661 | 8 | Instruction *IVOperand = cast<Instruction>(UseInst->getOperand(0)); |
662 | | // Get the symbolic expression for this instruction. |
663 | 8 | const SCEV *IV = SE->getSCEV(IVOperand); |
664 | 8 | int MaskBits; |
665 | 8 | if (UseInst->getOpcode() == CastInst::SIToFP) |
666 | 8 | MaskBits = (int)SE->getSignedRange(IV).getMinSignedBits(); |
667 | 0 | else |
668 | 0 | MaskBits = (int)SE->getUnsignedRange(IV).getActiveBits(); |
669 | 8 | int DestNumSigBits = UseInst->getType()->getFPMantissaWidth(); |
670 | 8 | if (MaskBits <= DestNumSigBits) { |
671 | 8 | for (User *U : UseInst->users()) { |
672 | | // Match for fptosi/fptoui of sitofp and with same type. |
673 | 8 | auto *CI = dyn_cast<CastInst>(U); |
674 | 8 | if (!CI) |
675 | 8 | continue; |
676 | | |
677 | 0 | CastInst::CastOps Opcode = CI->getOpcode(); |
678 | 0 | if (Opcode != CastInst::FPToSI && Opcode != CastInst::FPToUI) |
679 | 0 | continue; |
680 | | |
681 | 0 | Value *Conv = nullptr; |
682 | 0 | if (IVOperand->getType() != CI->getType()) { |
683 | 0 | IRBuilder<> Builder(CI); |
684 | 0 | StringRef Name = IVOperand->getName(); |
685 | | // To match InstCombine logic, we only need sext if both fptosi and |
686 | | // sitofp are used. If one of them is unsigned, then we can use zext. |
687 | 0 | if (SE->getTypeSizeInBits(IVOperand->getType()) > |
688 | 0 | SE->getTypeSizeInBits(CI->getType())) { |
689 | 0 | Conv = Builder.CreateTrunc(IVOperand, CI->getType(), Name + ".trunc"); |
690 | 0 | } else if (Opcode == CastInst::FPToUI || |
691 | 0 | UseInst->getOpcode() == CastInst::UIToFP) { |
692 | 0 | Conv = Builder.CreateZExt(IVOperand, CI->getType(), Name + ".zext"); |
693 | 0 | } else { |
694 | 0 | Conv = Builder.CreateSExt(IVOperand, CI->getType(), Name + ".sext"); |
695 | 0 | } |
696 | 0 | } else |
697 | 0 | Conv = IVOperand; |
698 | |
|
699 | 0 | CI->replaceAllUsesWith(Conv); |
700 | 0 | DeadInsts.push_back(CI); |
701 | 0 | LLVM_DEBUG(dbgs() << "INDVARS: Replace IV user: " << *CI |
702 | 0 | << " with: " << *Conv << '\n'); |
703 | |
|
704 | 0 | ++NumFoldedUser; |
705 | 0 | Changed = true; |
706 | 0 | } |
707 | 8 | } |
708 | | |
709 | 8 | return Changed; |
710 | 412k | } |
711 | | |
712 | | /// Eliminate any operation that SCEV can prove is an identity function. |
713 | | bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst, |
714 | 415k | Instruction *IVOperand) { |
715 | 415k | if (!SE->isSCEVable(UseInst->getType()) || |
716 | 415k | (UseInst->getType() != IVOperand->getType()) || |
717 | 415k | (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand))) |
718 | 410k | return false; |
719 | | |
720 | | // getSCEV(X) == getSCEV(Y) does not guarantee that X and Y are related in the |
721 | | // dominator tree, even if X is an operand to Y. For instance, in |
722 | | // |
723 | | // %iv = phi i32 {0,+,1} |
724 | | // br %cond, label %left, label %merge |
725 | | // |
726 | | // left: |
727 | | // %X = add i32 %iv, 0 |
728 | | // br label %merge |
729 | | // |
730 | | // merge: |
731 | | // %M = phi (%X, %iv) |
732 | | // |
733 | | // getSCEV(%M) == getSCEV(%X) == {0,+,1}, but %X does not dominate %M, and |
734 | | // %M.replaceAllUsesWith(%X) would be incorrect. |
735 | | |
736 | 4.63k | if (isa<PHINode>(UseInst)) |
737 | | // If UseInst is not a PHI node then we know that IVOperand dominates |
738 | | // UseInst directly from the legality of SSA. |
739 | 2.30k | if (!DT || !DT->dominates(IVOperand, UseInst)) |
740 | 31 | return false; |
741 | | |
742 | 4.59k | if (!LI->replacementPreservesLCSSAForm(UseInst, IVOperand)) |
743 | 1.27k | return false; |
744 | | |
745 | 3.32k | LLVM_DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n'); |
746 | | |
747 | 3.32k | SE->forgetValue(UseInst); |
748 | 3.32k | UseInst->replaceAllUsesWith(IVOperand); |
749 | 3.32k | ++NumElimIdentity; |
750 | 3.32k | Changed = true; |
751 | 3.32k | DeadInsts.emplace_back(UseInst); |
752 | 3.32k | return true; |
753 | 4.59k | } |
754 | | |
755 | | bool SimplifyIndvar::strengthenBinaryOp(BinaryOperator *BO, |
756 | 258k | Instruction *IVOperand) { |
757 | 258k | return (isa<OverflowingBinaryOperator>(BO) && |
758 | 258k | strengthenOverflowingOperation(BO, IVOperand)) || |
759 | 258k | (isa<ShlOperator>(BO) && strengthenRightShift(BO, IVOperand)); |
760 | 258k | } |
761 | | |
762 | | /// Annotate BO with nsw / nuw if it provably does not signed-overflow / |
763 | | /// unsigned-overflow. Returns true if anything changed, false otherwise. |
764 | | bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO, |
765 | 255k | Instruction *IVOperand) { |
766 | 255k | auto Flags = SE->getStrengthenedNoWrapFlagsFromBinOp( |
767 | 255k | cast<OverflowingBinaryOperator>(BO)); |
768 | | |
769 | 255k | if (!Flags) |
770 | 239k | return false; |
771 | | |
772 | 15.0k | BO->setHasNoUnsignedWrap(ScalarEvolution::maskFlags(*Flags, SCEV::FlagNUW) == |
773 | 15.0k | SCEV::FlagNUW); |
774 | 15.0k | BO->setHasNoSignedWrap(ScalarEvolution::maskFlags(*Flags, SCEV::FlagNSW) == |
775 | 15.0k | SCEV::FlagNSW); |
776 | | |
777 | | // The getStrengthenedNoWrapFlagsFromBinOp() check inferred additional nowrap |
778 | | // flags on addrecs while performing zero/sign extensions. We could call |
779 | | // forgetValue() here to make sure those flags also propagate to any other |
780 | | // SCEV expressions based on the addrec. However, this can have pathological |
781 | | // compile-time impact, see https://bugs.llvm.org/show_bug.cgi?id=50384. |
782 | 15.0k | return true; |
783 | 255k | } |
784 | | |
785 | | /// Annotate the Shr in (X << IVOperand) >> C as exact using the |
786 | | /// information from the IV's range. Returns true if anything changed, false |
787 | | /// otherwise. |
788 | | bool SimplifyIndvar::strengthenRightShift(BinaryOperator *BO, |
789 | 1.67k | Instruction *IVOperand) { |
790 | 1.67k | if (BO->getOpcode() == Instruction::Shl) { |
791 | 1.67k | bool Changed = false; |
792 | 1.67k | ConstantRange IVRange = SE->getUnsignedRange(SE->getSCEV(IVOperand)); |
793 | 2.18k | for (auto *U : BO->users()) { |
794 | 2.18k | const APInt *C; |
795 | 2.18k | if (match(U, |
796 | 2.18k | m_AShr(m_Shl(m_Value(), m_Specific(IVOperand)), m_APInt(C))) || |
797 | 2.18k | match(U, |
798 | 2.18k | m_LShr(m_Shl(m_Value(), m_Specific(IVOperand)), m_APInt(C)))) { |
799 | 0 | BinaryOperator *Shr = cast<BinaryOperator>(U); |
800 | 0 | if (!Shr->isExact() && IVRange.getUnsignedMin().uge(*C)) { |
801 | 0 | Shr->setIsExact(true); |
802 | 0 | Changed = true; |
803 | 0 | } |
804 | 0 | } |
805 | 2.18k | } |
806 | 1.67k | return Changed; |
807 | 1.67k | } |
808 | | |
809 | 0 | return false; |
810 | 1.67k | } |
811 | | |
812 | | /// Add all uses of Def to the current IV's worklist. |
813 | | static void pushIVUsers( |
814 | | Instruction *Def, Loop *L, |
815 | | SmallPtrSet<Instruction*,16> &Simplified, |
816 | 436k | SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) { |
817 | | |
818 | 797k | for (User *U : Def->users()) { |
819 | 797k | Instruction *UI = cast<Instruction>(U); |
820 | | |
821 | | // Avoid infinite or exponential worklist processing. |
822 | | // Also ensure unique worklist users. |
823 | | // If Def is a LoopPhi, it may not be in the Simplified set, so check for |
824 | | // self edges first. |
825 | 797k | if (UI == Def) |
826 | 5.07k | continue; |
827 | | |
828 | | // Only change the current Loop, do not change the other parts (e.g. other |
829 | | // Loops). |
830 | 792k | if (!L->contains(UI)) |
831 | 4.31k | continue; |
832 | | |
833 | | // Do not push the same instruction more than once. |
834 | 788k | if (!Simplified.insert(UI).second) |
835 | 293k | continue; |
836 | | |
837 | 494k | SimpleIVUsers.push_back(std::make_pair(UI, Def)); |
838 | 494k | } |
839 | 436k | } |
840 | | |
841 | | /// Return true if this instruction generates a simple SCEV |
842 | | /// expression in terms of that IV. |
843 | | /// |
844 | | /// This is similar to IVUsers' isInteresting() but processes each instruction |
845 | | /// non-recursively when the operand is already known to be a simpleIVUser. |
846 | | /// |
847 | 404k | static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) { |
848 | 404k | if (!SE->isSCEVable(I->getType())) |
849 | 4.84k | return false; |
850 | | |
851 | | // Get the symbolic expression for this instruction. |
852 | 399k | const SCEV *S = SE->getSCEV(I); |
853 | | |
854 | | // Only consider affine recurrences. |
855 | 399k | const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S); |
856 | 399k | if (AR && AR->getLoop() == L) |
857 | 335k | return true; |
858 | | |
859 | 64.3k | return false; |
860 | 399k | } |
861 | | |
862 | | /// Iteratively perform simplification on a worklist of users |
863 | | /// of the specified induction variable. Each successive simplification may push |
864 | | /// more users which may themselves be candidates for simplification. |
865 | | /// |
866 | | /// This algorithm does not require IVUsers analysis. Instead, it simplifies |
867 | | /// instructions in-place during analysis. Rather than rewriting induction |
868 | | /// variables bottom-up from their users, it transforms a chain of IVUsers |
869 | | /// top-down, updating the IR only when it encounters a clear optimization |
870 | | /// opportunity. |
871 | | /// |
872 | | /// Once DisableIVRewrite is default, LSR will be the only client of IVUsers. |
873 | | /// |
874 | 76.3k | void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { |
875 | 76.3k | if (!SE->isSCEVable(CurrIV->getType())) |
876 | 1.56k | return; |
877 | | |
878 | | // Instructions processed by SimplifyIndvar for CurrIV. |
879 | 74.8k | SmallPtrSet<Instruction*,16> Simplified; |
880 | | |
881 | | // Use-def pairs if IV users waiting to be processed for CurrIV. |
882 | 74.8k | SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers; |
883 | | |
884 | | // Push users of the current LoopPhi. In rare cases, pushIVUsers may be |
885 | | // called multiple times for the same LoopPhi. This is the proper thing to |
886 | | // do for loop header phis that use each other. |
887 | 74.8k | pushIVUsers(CurrIV, L, Simplified, SimpleIVUsers); |
888 | | |
889 | 569k | while (!SimpleIVUsers.empty()) { |
890 | 494k | std::pair<Instruction*, Instruction*> UseOper = |
891 | 494k | SimpleIVUsers.pop_back_val(); |
892 | 494k | Instruction *UseInst = UseOper.first; |
893 | | |
894 | | // If a user of the IndVar is trivially dead, we prefer just to mark it dead |
895 | | // rather than try to do some complex analysis or transformation (such as |
896 | | // widening) basing on it. |
897 | | // TODO: Propagate TLI and pass it here to handle more cases. |
898 | 494k | if (isInstructionTriviallyDead(UseInst, /* TLI */ nullptr)) { |
899 | 1.60k | DeadInsts.emplace_back(UseInst); |
900 | 1.60k | continue; |
901 | 1.60k | } |
902 | | |
903 | | // Bypass back edges to avoid extra work. |
904 | 493k | if (UseInst == CurrIV) continue; |
905 | | |
906 | | // Try to replace UseInst with a loop invariant before any other |
907 | | // simplifications. |
908 | 425k | if (replaceIVUserWithLoopInvariant(UseInst)) |
909 | 1.24k | continue; |
910 | | |
911 | | // Go further for the bitcast 'prtoint ptr to i64' or if the cast is done |
912 | | // by truncation |
913 | 423k | if ((isa<PtrToIntInst>(UseInst)) || (isa<TruncInst>(UseInst))) |
914 | 6.68k | for (Use &U : UseInst->uses()) { |
915 | 6.68k | Instruction *User = cast<Instruction>(U.getUser()); |
916 | 6.68k | if (replaceIVUserWithLoopInvariant(User)) |
917 | 88 | break; // done replacing |
918 | 6.68k | } |
919 | | |
920 | 423k | Instruction *IVOperand = UseOper.second; |
921 | 423k | for (unsigned N = 0; IVOperand; ++N) { |
922 | 423k | assert(N <= Simplified.size() && "runaway iteration"); |
923 | 0 | (void) N; |
924 | | |
925 | 423k | Value *NewOper = foldIVUser(UseInst, IVOperand); |
926 | 423k | if (!NewOper) |
927 | 423k | break; // done folding |
928 | 0 | IVOperand = dyn_cast<Instruction>(NewOper); |
929 | 0 | } |
930 | 423k | if (!IVOperand) |
931 | 0 | continue; |
932 | | |
933 | 423k | if (eliminateIVUser(UseInst, IVOperand)) { |
934 | 11.6k | pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers); |
935 | 11.6k | continue; |
936 | 11.6k | } |
937 | | |
938 | 412k | if (BinaryOperator *BO = dyn_cast<BinaryOperator>(UseInst)) { |
939 | 258k | if (strengthenBinaryOp(BO, IVOperand)) { |
940 | | // re-queue uses of the now modified binary operator and fall |
941 | | // through to the checks that remain. |
942 | 15.0k | pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers); |
943 | 15.0k | } |
944 | 258k | } |
945 | | |
946 | | // Try to use integer induction for FPToSI of float induction directly. |
947 | 412k | if (replaceFloatIVWithIntegerIV(UseInst)) { |
948 | | // Re-queue the potentially new direct uses of IVOperand. |
949 | 0 | pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers); |
950 | 0 | continue; |
951 | 0 | } |
952 | | |
953 | 412k | CastInst *Cast = dyn_cast<CastInst>(UseInst); |
954 | 412k | if (V && Cast) { |
955 | 7.88k | V->visitCast(Cast); |
956 | 7.88k | continue; |
957 | 7.88k | } |
958 | 404k | if (isSimpleIVUser(UseInst, L, SE)) { |
959 | 335k | pushIVUsers(UseInst, L, Simplified, SimpleIVUsers); |
960 | 335k | } |
961 | 404k | } |
962 | 74.8k | } |
963 | | |
964 | | namespace llvm { |
965 | | |
966 | 0 | void IVVisitor::anchor() { } |
967 | | |
968 | | /// Simplify instructions that use this induction variable |
969 | | /// by using ScalarEvolution to analyze the IV's recurrence. |
970 | | bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, |
971 | | LoopInfo *LI, const TargetTransformInfo *TTI, |
972 | | SmallVectorImpl<WeakTrackingVH> &Dead, |
973 | 76.3k | SCEVExpander &Rewriter, IVVisitor *V) { |
974 | 76.3k | SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, TTI, |
975 | 76.3k | Rewriter, Dead); |
976 | 76.3k | SIV.simplifyUsers(CurrIV, V); |
977 | 76.3k | return SIV.hasChanged(); |
978 | 76.3k | } |
979 | | |
980 | | /// Simplify users of induction variables within this |
981 | | /// loop. This does not actually change or add IVs. |
982 | | bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT, |
983 | | LoopInfo *LI, const TargetTransformInfo *TTI, |
984 | 0 | SmallVectorImpl<WeakTrackingVH> &Dead) { |
985 | 0 | SCEVExpander Rewriter(*SE, SE->getDataLayout(), "indvars"); |
986 | 0 | #ifndef NDEBUG |
987 | 0 | Rewriter.setDebugType(DEBUG_TYPE); |
988 | 0 | #endif |
989 | 0 | bool Changed = false; |
990 | 0 | for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { |
991 | 0 | Changed |= |
992 | 0 | simplifyUsersOfIV(cast<PHINode>(I), SE, DT, LI, TTI, Dead, Rewriter); |
993 | 0 | } |
994 | 0 | return Changed; |
995 | 0 | } |
996 | | |
997 | | } // namespace llvm |
998 | | |
999 | | namespace { |
1000 | | //===----------------------------------------------------------------------===// |
1001 | | // Widen Induction Variables - Extend the width of an IV to cover its |
1002 | | // widest uses. |
1003 | | //===----------------------------------------------------------------------===// |
1004 | | |
1005 | | class WidenIV { |
1006 | | // Parameters |
1007 | | PHINode *OrigPhi; |
1008 | | Type *WideType; |
1009 | | |
1010 | | // Context |
1011 | | LoopInfo *LI; |
1012 | | Loop *L; |
1013 | | ScalarEvolution *SE; |
1014 | | DominatorTree *DT; |
1015 | | |
1016 | | // Does the module have any calls to the llvm.experimental.guard intrinsic |
1017 | | // at all? If not we can avoid scanning instructions looking for guards. |
1018 | | bool HasGuards; |
1019 | | |
1020 | | bool UsePostIncrementRanges; |
1021 | | |
1022 | | // Statistics |
1023 | | unsigned NumElimExt = 0; |
1024 | | unsigned NumWidened = 0; |
1025 | | |
1026 | | // Result |
1027 | | PHINode *WidePhi = nullptr; |
1028 | | Instruction *WideInc = nullptr; |
1029 | | const SCEV *WideIncExpr = nullptr; |
1030 | | SmallVectorImpl<WeakTrackingVH> &DeadInsts; |
1031 | | |
1032 | | SmallPtrSet<Instruction *,16> Widened; |
1033 | | |
1034 | | enum class ExtendKind { Zero, Sign, Unknown }; |
1035 | | |
1036 | | // A map tracking the kind of extension used to widen each narrow IV |
1037 | | // and narrow IV user. |
1038 | | // Key: pointer to a narrow IV or IV user. |
1039 | | // Value: the kind of extension used to widen this Instruction. |
1040 | | DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap; |
1041 | | |
1042 | | using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>; |
1043 | | |
1044 | | // A map with control-dependent ranges for post increment IV uses. The key is |
1045 | | // a pair of IV def and a use of this def denoting the context. The value is |
1046 | | // a ConstantRange representing possible values of the def at the given |
1047 | | // context. |
1048 | | DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos; |
1049 | | |
1050 | | std::optional<ConstantRange> getPostIncRangeInfo(Value *Def, |
1051 | 3.66k | Instruction *UseI) { |
1052 | 3.66k | DefUserPair Key(Def, UseI); |
1053 | 3.66k | auto It = PostIncRangeInfos.find(Key); |
1054 | 3.66k | return It == PostIncRangeInfos.end() |
1055 | 3.66k | ? std::optional<ConstantRange>(std::nullopt) |
1056 | 3.66k | : std::optional<ConstantRange>(It->second); |
1057 | 3.66k | } |
1058 | | |
1059 | | void calculatePostIncRanges(PHINode *OrigPhi); |
1060 | | void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser); |
1061 | | |
1062 | 24 | void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) { |
1063 | 24 | DefUserPair Key(Def, UseI); |
1064 | 24 | auto It = PostIncRangeInfos.find(Key); |
1065 | 24 | if (It == PostIncRangeInfos.end()) |
1066 | 24 | PostIncRangeInfos.insert({Key, R}); |
1067 | 0 | else |
1068 | 0 | It->second = R.intersectWith(It->second); |
1069 | 24 | } |
1070 | | |
1071 | | public: |
1072 | | /// Record a link in the Narrow IV def-use chain along with the WideIV that |
1073 | | /// computes the same value as the Narrow IV def. This avoids caching Use* |
1074 | | /// pointers. |
1075 | | struct NarrowIVDefUse { |
1076 | | Instruction *NarrowDef = nullptr; |
1077 | | Instruction *NarrowUse = nullptr; |
1078 | | Instruction *WideDef = nullptr; |
1079 | | |
1080 | | // True if the narrow def is never negative. Tracking this information lets |
1081 | | // us use a sign extension instead of a zero extension or vice versa, when |
1082 | | // profitable and legal. |
1083 | | bool NeverNegative = false; |
1084 | | |
1085 | | NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD, |
1086 | | bool NeverNegative) |
1087 | | : NarrowDef(ND), NarrowUse(NU), WideDef(WD), |
1088 | 9.84k | NeverNegative(NeverNegative) {} |
1089 | | }; |
1090 | | |
1091 | | WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, |
1092 | | DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI, |
1093 | | bool HasGuards, bool UsePostIncrementRanges = true); |
1094 | | |
1095 | | PHINode *createWideIV(SCEVExpander &Rewriter); |
1096 | | |
1097 | 2.02k | unsigned getNumElimExt() { return NumElimExt; }; |
1098 | 2.02k | unsigned getNumWidened() { return NumWidened; }; |
1099 | | |
1100 | | protected: |
1101 | | Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned, |
1102 | | Instruction *Use); |
1103 | | |
1104 | | Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR); |
1105 | | Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU, |
1106 | | const SCEVAddRecExpr *WideAR); |
1107 | | Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU); |
1108 | | |
1109 | | ExtendKind getExtendKind(Instruction *I); |
1110 | | |
1111 | | using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>; |
1112 | | |
1113 | | WidenedRecTy getWideRecurrence(NarrowIVDefUse DU); |
1114 | | |
1115 | | WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU); |
1116 | | |
1117 | | const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, |
1118 | | unsigned OpCode) const; |
1119 | | |
1120 | | Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter); |
1121 | | |
1122 | | bool widenLoopCompare(NarrowIVDefUse DU); |
1123 | | bool widenWithVariantUse(NarrowIVDefUse DU); |
1124 | | |
1125 | | void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); |
1126 | | |
1127 | | private: |
1128 | | SmallVector<NarrowIVDefUse, 8> NarrowIVUsers; |
1129 | | }; |
1130 | | } // namespace |
1131 | | |
1132 | | /// Determine the insertion point for this user. By default, insert immediately |
1133 | | /// before the user. SCEVExpander or LICM will hoist loop invariants out of the |
1134 | | /// loop. For PHI nodes, there may be multiple uses, so compute the nearest |
1135 | | /// common dominator for the incoming blocks. A nullptr can be returned if no |
1136 | | /// viable location is found: it may happen if User is a PHI and Def only comes |
1137 | | /// to this PHI from unreachable blocks. |
1138 | | static Instruction *getInsertPointForUses(Instruction *User, Value *Def, |
1139 | 4.97k | DominatorTree *DT, LoopInfo *LI) { |
1140 | 4.97k | PHINode *PHI = dyn_cast<PHINode>(User); |
1141 | 4.97k | if (!PHI) |
1142 | 4.81k | return User; |
1143 | | |
1144 | 157 | Instruction *InsertPt = nullptr; |
1145 | 421 | for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { |
1146 | 264 | if (PHI->getIncomingValue(i) != Def) |
1147 | 107 | continue; |
1148 | | |
1149 | 157 | BasicBlock *InsertBB = PHI->getIncomingBlock(i); |
1150 | | |
1151 | 157 | if (!DT->isReachableFromEntry(InsertBB)) |
1152 | 0 | continue; |
1153 | | |
1154 | 157 | if (!InsertPt) { |
1155 | 157 | InsertPt = InsertBB->getTerminator(); |
1156 | 157 | continue; |
1157 | 157 | } |
1158 | 0 | InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB); |
1159 | 0 | InsertPt = InsertBB->getTerminator(); |
1160 | 0 | } |
1161 | | |
1162 | | // If we have skipped all inputs, it means that Def only comes to Phi from |
1163 | | // unreachable blocks. |
1164 | 157 | if (!InsertPt) |
1165 | 0 | return nullptr; |
1166 | | |
1167 | 157 | auto *DefI = dyn_cast<Instruction>(Def); |
1168 | 157 | if (!DefI) |
1169 | 0 | return InsertPt; |
1170 | | |
1171 | 157 | assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses"); |
1172 | | |
1173 | 0 | auto *L = LI->getLoopFor(DefI->getParent()); |
1174 | 157 | assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent()))); |
1175 | | |
1176 | 169 | for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom()) |
1177 | 169 | if (LI->getLoopFor(DTN->getBlock()) == L) |
1178 | 157 | return DTN->getBlock()->getTerminator(); |
1179 | | |
1180 | 0 | llvm_unreachable("DefI dominates InsertPt!"); |
1181 | 0 | } |
1182 | | |
1183 | | WidenIV::WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, |
1184 | | DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI, |
1185 | | bool HasGuards, bool UsePostIncrementRanges) |
1186 | | : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo), |
1187 | | L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), |
1188 | | HasGuards(HasGuards), UsePostIncrementRanges(UsePostIncrementRanges), |
1189 | 2.02k | DeadInsts(DI) { |
1190 | 2.02k | assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); |
1191 | 2.02k | ExtendKindMap[OrigPhi] = WI.IsSigned ? ExtendKind::Sign : ExtendKind::Zero; |
1192 | 2.02k | } |
1193 | | |
1194 | | Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType, |
1195 | 1.99k | bool IsSigned, Instruction *Use) { |
1196 | | // Set the debug location and conservative insertion point. |
1197 | 1.99k | IRBuilder<> Builder(Use); |
1198 | | // Hoist the insertion point into loop preheaders as far as possible. |
1199 | 1.99k | for (const Loop *L = LI->getLoopFor(Use->getParent()); |
1200 | 3.80k | L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper); |
1201 | 1.99k | L = L->getParentLoop()) |
1202 | 1.80k | Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator()); |
1203 | | |
1204 | 1.99k | return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) : |
1205 | 1.99k | Builder.CreateZExt(NarrowOper, WideType); |
1206 | 1.99k | } |
1207 | | |
1208 | | /// Instantiate a wide operation to replace a narrow operation. This only needs |
1209 | | /// to handle operations that can evaluation to SCEVAddRec. It can safely return |
1210 | | /// 0 for any operation we decide not to clone. |
1211 | | Instruction *WidenIV::cloneIVUser(WidenIV::NarrowIVDefUse DU, |
1212 | 1.82k | const SCEVAddRecExpr *WideAR) { |
1213 | 1.82k | unsigned Opcode = DU.NarrowUse->getOpcode(); |
1214 | 1.82k | switch (Opcode) { |
1215 | 155 | default: |
1216 | 155 | return nullptr; |
1217 | 1.19k | case Instruction::Add: |
1218 | 1.34k | case Instruction::Mul: |
1219 | 1.34k | case Instruction::UDiv: |
1220 | 1.60k | case Instruction::Sub: |
1221 | 1.60k | return cloneArithmeticIVUser(DU, WideAR); |
1222 | | |
1223 | 3 | case Instruction::And: |
1224 | 3 | case Instruction::Or: |
1225 | 8 | case Instruction::Xor: |
1226 | 38 | case Instruction::Shl: |
1227 | 44 | case Instruction::LShr: |
1228 | 57 | case Instruction::AShr: |
1229 | 57 | return cloneBitwiseIVUser(DU); |
1230 | 1.82k | } |
1231 | 1.82k | } |
1232 | | |
1233 | 57 | Instruction *WidenIV::cloneBitwiseIVUser(WidenIV::NarrowIVDefUse DU) { |
1234 | 57 | Instruction *NarrowUse = DU.NarrowUse; |
1235 | 57 | Instruction *NarrowDef = DU.NarrowDef; |
1236 | 57 | Instruction *WideDef = DU.WideDef; |
1237 | | |
1238 | 57 | LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n"); |
1239 | | |
1240 | | // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything |
1241 | | // about the narrow operand yet so must insert a [sz]ext. It is probably loop |
1242 | | // invariant and will be folded or hoisted. If it actually comes from a |
1243 | | // widened IV, it should be removed during a future call to widenIVUse. |
1244 | 57 | bool IsSigned = getExtendKind(NarrowDef) == ExtendKind::Sign; |
1245 | 57 | Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) |
1246 | 57 | ? WideDef |
1247 | 57 | : createExtendInst(NarrowUse->getOperand(0), WideType, |
1248 | 0 | IsSigned, NarrowUse); |
1249 | 57 | Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) |
1250 | 57 | ? WideDef |
1251 | 57 | : createExtendInst(NarrowUse->getOperand(1), WideType, |
1252 | 57 | IsSigned, NarrowUse); |
1253 | | |
1254 | 57 | auto *NarrowBO = cast<BinaryOperator>(NarrowUse); |
1255 | 57 | auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, |
1256 | 57 | NarrowBO->getName()); |
1257 | 57 | IRBuilder<> Builder(NarrowUse); |
1258 | 57 | Builder.Insert(WideBO); |
1259 | 57 | WideBO->copyIRFlags(NarrowBO); |
1260 | 57 | return WideBO; |
1261 | 57 | } |
1262 | | |
1263 | | Instruction *WidenIV::cloneArithmeticIVUser(WidenIV::NarrowIVDefUse DU, |
1264 | 1.60k | const SCEVAddRecExpr *WideAR) { |
1265 | 1.60k | Instruction *NarrowUse = DU.NarrowUse; |
1266 | 1.60k | Instruction *NarrowDef = DU.NarrowDef; |
1267 | 1.60k | Instruction *WideDef = DU.WideDef; |
1268 | | |
1269 | 1.60k | LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); |
1270 | | |
1271 | 1.60k | unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1; |
1272 | | |
1273 | | // We're trying to find X such that |
1274 | | // |
1275 | | // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X |
1276 | | // |
1277 | | // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef), |
1278 | | // and check using SCEV if any of them are correct. |
1279 | | |
1280 | | // Returns true if extending NonIVNarrowDef according to `SignExt` is a |
1281 | | // correct solution to X. |
1282 | 2.09k | auto GuessNonIVOperand = [&](bool SignExt) { |
1283 | 2.09k | const SCEV *WideLHS; |
1284 | 2.09k | const SCEV *WideRHS; |
1285 | | |
1286 | 2.09k | auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) { |
1287 | 2.09k | if (SignExt) |
1288 | 1.37k | return SE->getSignExtendExpr(S, Ty); |
1289 | 724 | return SE->getZeroExtendExpr(S, Ty); |
1290 | 2.09k | }; |
1291 | | |
1292 | 2.09k | if (IVOpIdx == 0) { |
1293 | 1.25k | WideLHS = SE->getSCEV(WideDef); |
1294 | 1.25k | const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1)); |
1295 | 1.25k | WideRHS = GetExtend(NarrowRHS, WideType); |
1296 | 1.25k | } else { |
1297 | 838 | const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0)); |
1298 | 838 | WideLHS = GetExtend(NarrowLHS, WideType); |
1299 | 838 | WideRHS = SE->getSCEV(WideDef); |
1300 | 838 | } |
1301 | | |
1302 | | // WideUse is "WideDef `op.wide` X" as described in the comment. |
1303 | 2.09k | const SCEV *WideUse = |
1304 | 2.09k | getSCEVByOpCode(WideLHS, WideRHS, NarrowUse->getOpcode()); |
1305 | | |
1306 | 2.09k | return WideUse == WideAR; |
1307 | 2.09k | }; |
1308 | | |
1309 | 1.60k | bool SignExtend = getExtendKind(NarrowDef) == ExtendKind::Sign; |
1310 | 1.60k | if (!GuessNonIVOperand(SignExtend)) { |
1311 | 489 | SignExtend = !SignExtend; |
1312 | 489 | if (!GuessNonIVOperand(SignExtend)) |
1313 | 370 | return nullptr; |
1314 | 489 | } |
1315 | | |
1316 | 1.23k | Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) |
1317 | 1.23k | ? WideDef |
1318 | 1.23k | : createExtendInst(NarrowUse->getOperand(0), WideType, |
1319 | 731 | SignExtend, NarrowUse); |
1320 | 1.23k | Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) |
1321 | 1.23k | ? WideDef |
1322 | 1.23k | : createExtendInst(NarrowUse->getOperand(1), WideType, |
1323 | 470 | SignExtend, NarrowUse); |
1324 | | |
1325 | 1.23k | auto *NarrowBO = cast<BinaryOperator>(NarrowUse); |
1326 | 1.23k | auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, |
1327 | 1.23k | NarrowBO->getName()); |
1328 | | |
1329 | 1.23k | IRBuilder<> Builder(NarrowUse); |
1330 | 1.23k | Builder.Insert(WideBO); |
1331 | 1.23k | WideBO->copyIRFlags(NarrowBO); |
1332 | 1.23k | return WideBO; |
1333 | 1.60k | } |
1334 | | |
1335 | 14.0k | WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) { |
1336 | 14.0k | auto It = ExtendKindMap.find(I); |
1337 | 14.0k | assert(It != ExtendKindMap.end() && "Instruction not yet extended!"); |
1338 | 0 | return It->second; |
1339 | 14.0k | } |
1340 | | |
1341 | | const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, |
1342 | 4.32k | unsigned OpCode) const { |
1343 | 4.32k | switch (OpCode) { |
1344 | 3.51k | case Instruction::Add: |
1345 | 3.51k | return SE->getAddExpr(LHS, RHS); |
1346 | 520 | case Instruction::Sub: |
1347 | 520 | return SE->getMinusSCEV(LHS, RHS); |
1348 | 287 | case Instruction::Mul: |
1349 | 287 | return SE->getMulExpr(LHS, RHS); |
1350 | 4 | case Instruction::UDiv: |
1351 | 4 | return SE->getUDivExpr(LHS, RHS); |
1352 | 0 | default: |
1353 | 0 | llvm_unreachable("Unsupported opcode."); |
1354 | 4.32k | }; |
1355 | 0 | } |
1356 | | |
1357 | | /// No-wrap operations can transfer sign extension of their result to their |
1358 | | /// operands. Generate the SCEV value for the widened operation without |
1359 | | /// actually modifying the IR yet. If the expression after extending the |
1360 | | /// operands is an AddRec for this loop, return the AddRec and the kind of |
1361 | | /// extension used. |
1362 | | WidenIV::WidenedRecTy |
1363 | 8.12k | WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) { |
1364 | | // Handle the common case of add<nsw/nuw> |
1365 | 8.12k | const unsigned OpCode = DU.NarrowUse->getOpcode(); |
1366 | | // Only Add/Sub/Mul instructions supported yet. |
1367 | 8.12k | if (OpCode != Instruction::Add && OpCode != Instruction::Sub && |
1368 | 8.12k | OpCode != Instruction::Mul) |
1369 | 3.60k | return {nullptr, ExtendKind::Unknown}; |
1370 | | |
1371 | | // One operand (NarrowDef) has already been extended to WideDef. Now determine |
1372 | | // if extending the other will lead to a recurrence. |
1373 | 4.52k | const unsigned ExtendOperIdx = |
1374 | 4.52k | DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0; |
1375 | 4.52k | assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU"); |
1376 | | |
1377 | 0 | const OverflowingBinaryOperator *OBO = |
1378 | 4.52k | cast<OverflowingBinaryOperator>(DU.NarrowUse); |
1379 | 4.52k | ExtendKind ExtKind = getExtendKind(DU.NarrowDef); |
1380 | 4.52k | if (!(ExtKind == ExtendKind::Sign && OBO->hasNoSignedWrap()) && |
1381 | 4.52k | !(ExtKind == ExtendKind::Zero && OBO->hasNoUnsignedWrap())) { |
1382 | 2.38k | ExtKind = ExtendKind::Unknown; |
1383 | | |
1384 | | // For a non-negative NarrowDef, we can choose either type of |
1385 | | // extension. We want to use the current extend kind if legal |
1386 | | // (see above), and we only hit this code if we need to check |
1387 | | // the opposite case. |
1388 | 2.38k | if (DU.NeverNegative) { |
1389 | 1.87k | if (OBO->hasNoSignedWrap()) { |
1390 | 74 | ExtKind = ExtendKind::Sign; |
1391 | 1.80k | } else if (OBO->hasNoUnsignedWrap()) { |
1392 | 15 | ExtKind = ExtendKind::Zero; |
1393 | 15 | } |
1394 | 1.87k | } |
1395 | 2.38k | } |
1396 | | |
1397 | 4.52k | const SCEV *ExtendOperExpr = |
1398 | 4.52k | SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)); |
1399 | 4.52k | if (ExtKind == ExtendKind::Sign) |
1400 | 1.98k | ExtendOperExpr = SE->getSignExtendExpr(ExtendOperExpr, WideType); |
1401 | 2.54k | else if (ExtKind == ExtendKind::Zero) |
1402 | 250 | ExtendOperExpr = SE->getZeroExtendExpr(ExtendOperExpr, WideType); |
1403 | 2.29k | else |
1404 | 2.29k | return {nullptr, ExtendKind::Unknown}; |
1405 | | |
1406 | | // When creating this SCEV expr, don't apply the current operations NSW or NUW |
1407 | | // flags. This instruction may be guarded by control flow that the no-wrap |
1408 | | // behavior depends on. Non-control-equivalent instructions can be mapped to |
1409 | | // the same SCEV expression, and it would be incorrect to transfer NSW/NUW |
1410 | | // semantics to those operations. |
1411 | 2.23k | const SCEV *lhs = SE->getSCEV(DU.WideDef); |
1412 | 2.23k | const SCEV *rhs = ExtendOperExpr; |
1413 | | |
1414 | | // Let's swap operands to the initial order for the case of non-commutative |
1415 | | // operations, like SUB. See PR21014. |
1416 | 2.23k | if (ExtendOperIdx == 0) |
1417 | 774 | std::swap(lhs, rhs); |
1418 | 2.23k | const SCEVAddRecExpr *AddRec = |
1419 | 2.23k | dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode)); |
1420 | | |
1421 | 2.23k | if (!AddRec || AddRec->getLoop() != L) |
1422 | 246 | return {nullptr, ExtendKind::Unknown}; |
1423 | | |
1424 | 1.98k | return {AddRec, ExtKind}; |
1425 | 2.23k | } |
1426 | | |
1427 | | /// Is this instruction potentially interesting for further simplification after |
1428 | | /// widening it's type? In other words, can the extend be safely hoisted out of |
1429 | | /// the loop with SCEV reducing the value to a recurrence on the same loop. If |
1430 | | /// so, return the extended recurrence and the kind of extension used. Otherwise |
1431 | | /// return {nullptr, ExtendKind::Unknown}. |
1432 | 6.13k | WidenIV::WidenedRecTy WidenIV::getWideRecurrence(WidenIV::NarrowIVDefUse DU) { |
1433 | 6.13k | if (!DU.NarrowUse->getType()->isIntegerTy()) |
1434 | 985 | return {nullptr, ExtendKind::Unknown}; |
1435 | | |
1436 | 5.15k | const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse); |
1437 | 5.15k | if (SE->getTypeSizeInBits(NarrowExpr->getType()) >= |
1438 | 5.15k | SE->getTypeSizeInBits(WideType)) { |
1439 | | // NarrowUse implicitly widens its operand. e.g. a gep with a narrow |
1440 | | // index. So don't follow this use. |
1441 | 41 | return {nullptr, ExtendKind::Unknown}; |
1442 | 41 | } |
1443 | | |
1444 | 5.11k | const SCEV *WideExpr; |
1445 | 5.11k | ExtendKind ExtKind; |
1446 | 5.11k | if (DU.NeverNegative) { |
1447 | 3.19k | WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); |
1448 | 3.19k | if (isa<SCEVAddRecExpr>(WideExpr)) |
1449 | 487 | ExtKind = ExtendKind::Sign; |
1450 | 2.70k | else { |
1451 | 2.70k | WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); |
1452 | 2.70k | ExtKind = ExtendKind::Zero; |
1453 | 2.70k | } |
1454 | 3.19k | } else if (getExtendKind(DU.NarrowDef) == ExtendKind::Sign) { |
1455 | 1.68k | WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); |
1456 | 1.68k | ExtKind = ExtendKind::Sign; |
1457 | 1.68k | } else { |
1458 | 234 | WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); |
1459 | 234 | ExtKind = ExtendKind::Zero; |
1460 | 234 | } |
1461 | 5.11k | const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); |
1462 | 5.11k | if (!AddRec || AddRec->getLoop() != L) |
1463 | 4.33k | return {nullptr, ExtendKind::Unknown}; |
1464 | 772 | return {AddRec, ExtKind}; |
1465 | 5.11k | } |
1466 | | |
1467 | | /// This IV user cannot be widened. Replace this use of the original narrow IV |
1468 | | /// with a truncation of the new wide IV to isolate and eliminate the narrow IV. |
1469 | | static void truncateIVUse(WidenIV::NarrowIVDefUse DU, DominatorTree *DT, |
1470 | 4.97k | LoopInfo *LI) { |
1471 | 4.97k | auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); |
1472 | 4.97k | if (!InsertPt) |
1473 | 0 | return; |
1474 | 4.97k | LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user " |
1475 | 4.97k | << *DU.NarrowUse << "\n"); |
1476 | 4.97k | IRBuilder<> Builder(InsertPt); |
1477 | 4.97k | Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); |
1478 | 4.97k | DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); |
1479 | 4.97k | } |
1480 | | |
1481 | | /// If the narrow use is a compare instruction, then widen the compare |
1482 | | // (and possibly the other operand). The extend operation is hoisted into the |
1483 | | // loop preheader as far as possible. |
1484 | 5.91k | bool WidenIV::widenLoopCompare(WidenIV::NarrowIVDefUse DU) { |
1485 | 5.91k | ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse); |
1486 | 5.91k | if (!Cmp) |
1487 | 4.74k | return false; |
1488 | | |
1489 | | // We can legally widen the comparison in the following two cases: |
1490 | | // |
1491 | | // - The signedness of the IV extension and comparison match |
1492 | | // |
1493 | | // - The narrow IV is always positive (and thus its sign extension is equal |
1494 | | // to its zero extension). For instance, let's say we're zero extending |
1495 | | // %narrow for the following use |
1496 | | // |
1497 | | // icmp slt i32 %narrow, %val ... (A) |
1498 | | // |
1499 | | // and %narrow is always positive. Then |
1500 | | // |
1501 | | // (A) == icmp slt i32 sext(%narrow), sext(%val) |
1502 | | // == icmp slt i32 zext(%narrow), sext(%val) |
1503 | 1.16k | bool IsSigned = getExtendKind(DU.NarrowDef) == ExtendKind::Sign; |
1504 | 1.16k | if (!(DU.NeverNegative || IsSigned == Cmp->isSigned())) |
1505 | 443 | return false; |
1506 | | |
1507 | 726 | Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0); |
1508 | 726 | unsigned CastWidth = SE->getTypeSizeInBits(Op->getType()); |
1509 | 726 | unsigned IVWidth = SE->getTypeSizeInBits(WideType); |
1510 | 726 | assert(CastWidth <= IVWidth && "Unexpected width while widening compare."); |
1511 | | |
1512 | | // Widen the compare instruction. |
1513 | 0 | DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); |
1514 | | |
1515 | | // Widen the other operand of the compare, if necessary. |
1516 | 726 | if (CastWidth < IVWidth) { |
1517 | 726 | Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp); |
1518 | 726 | DU.NarrowUse->replaceUsesOfWith(Op, ExtOp); |
1519 | 726 | } |
1520 | 726 | return true; |
1521 | 1.16k | } |
1522 | | |
1523 | | // The widenIVUse avoids generating trunc by evaluating the use as AddRec, this |
1524 | | // will not work when: |
1525 | | // 1) SCEV traces back to an instruction inside the loop that SCEV can not |
1526 | | // expand, eg. add %indvar, (load %addr) |
1527 | | // 2) SCEV finds a loop variant, eg. add %indvar, %loopvariant |
1528 | | // While SCEV fails to avoid trunc, we can still try to use instruction |
1529 | | // combining approach to prove trunc is not required. This can be further |
1530 | | // extended with other instruction combining checks, but for now we handle the |
1531 | | // following case (sub can be "add" and "mul", "nsw + sext" can be "nus + zext") |
1532 | | // |
1533 | | // Src: |
1534 | | // %c = sub nsw %b, %indvar |
1535 | | // %d = sext %c to i64 |
1536 | | // Dst: |
1537 | | // %indvar.ext1 = sext %indvar to i64 |
1538 | | // %m = sext %b to i64 |
1539 | | // %d = sub nsw i64 %m, %indvar.ext1 |
1540 | | // Therefore, as long as the result of add/sub/mul is extended to wide type, no |
1541 | | // trunc is required regardless of how %b is generated. This pattern is common |
1542 | | // when calculating address in 64 bit architecture |
1543 | 5.18k | bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) { |
1544 | 5.18k | Instruction *NarrowUse = DU.NarrowUse; |
1545 | 5.18k | Instruction *NarrowDef = DU.NarrowDef; |
1546 | 5.18k | Instruction *WideDef = DU.WideDef; |
1547 | | |
1548 | | // Handle the common case of add<nsw/nuw> |
1549 | 5.18k | const unsigned OpCode = NarrowUse->getOpcode(); |
1550 | | // Only Add/Sub/Mul instructions are supported. |
1551 | 5.18k | if (OpCode != Instruction::Add && OpCode != Instruction::Sub && |
1552 | 5.18k | OpCode != Instruction::Mul) |
1553 | 2.82k | return false; |
1554 | | |
1555 | | // The operand that is not defined by NarrowDef of DU. Let's call it the |
1556 | | // other operand. |
1557 | 2.36k | assert((NarrowUse->getOperand(0) == NarrowDef || |
1558 | 2.36k | NarrowUse->getOperand(1) == NarrowDef) && |
1559 | 2.36k | "bad DU"); |
1560 | | |
1561 | 0 | const OverflowingBinaryOperator *OBO = |
1562 | 2.36k | cast<OverflowingBinaryOperator>(NarrowUse); |
1563 | 2.36k | ExtendKind ExtKind = getExtendKind(NarrowDef); |
1564 | 2.36k | bool CanSignExtend = ExtKind == ExtendKind::Sign && OBO->hasNoSignedWrap(); |
1565 | 2.36k | bool CanZeroExtend = ExtKind == ExtendKind::Zero && OBO->hasNoUnsignedWrap(); |
1566 | 2.36k | auto AnotherOpExtKind = ExtKind; |
1567 | | |
1568 | | // Check that all uses are either: |
1569 | | // - narrow def (in case of we are widening the IV increment); |
1570 | | // - single-input LCSSA Phis; |
1571 | | // - comparison of the chosen type; |
1572 | | // - extend of the chosen type (raison d'etre). |
1573 | 2.36k | SmallVector<Instruction *, 4> ExtUsers; |
1574 | 2.36k | SmallVector<PHINode *, 4> LCSSAPhiUsers; |
1575 | 2.36k | SmallVector<ICmpInst *, 4> ICmpUsers; |
1576 | 2.36k | for (Use &U : NarrowUse->uses()) { |
1577 | 2.36k | Instruction *User = cast<Instruction>(U.getUser()); |
1578 | 2.36k | if (User == NarrowDef) |
1579 | 76 | continue; |
1580 | 2.28k | if (!L->contains(User)) { |
1581 | 13 | auto *LCSSAPhi = cast<PHINode>(User); |
1582 | | // Make sure there is only 1 input, so that we don't have to split |
1583 | | // critical edges. |
1584 | 13 | if (LCSSAPhi->getNumOperands() != 1) |
1585 | 0 | return false; |
1586 | 13 | LCSSAPhiUsers.push_back(LCSSAPhi); |
1587 | 13 | continue; |
1588 | 13 | } |
1589 | 2.27k | if (auto *ICmp = dyn_cast<ICmpInst>(User)) { |
1590 | 102 | auto Pred = ICmp->getPredicate(); |
1591 | | // We have 3 types of predicates: signed, unsigned and equality |
1592 | | // predicates. For equality, it's legal to widen icmp for either sign and |
1593 | | // zero extend. For sign extend, we can also do so for signed predicates, |
1594 | | // likeweise for zero extend we can widen icmp for unsigned predicates. |
1595 | 102 | if (ExtKind == ExtendKind::Zero && ICmpInst::isSigned(Pred)) |
1596 | 0 | return false; |
1597 | 102 | if (ExtKind == ExtendKind::Sign && ICmpInst::isUnsigned(Pred)) |
1598 | 28 | return false; |
1599 | 74 | ICmpUsers.push_back(ICmp); |
1600 | 74 | continue; |
1601 | 102 | } |
1602 | 2.17k | if (ExtKind == ExtendKind::Sign) |
1603 | 2.05k | User = dyn_cast<SExtInst>(User); |
1604 | 124 | else |
1605 | 124 | User = dyn_cast<ZExtInst>(User); |
1606 | 2.17k | if (!User || User->getType() != WideType) |
1607 | 2.07k | return false; |
1608 | 104 | ExtUsers.push_back(User); |
1609 | 104 | } |
1610 | 264 | if (ExtUsers.empty()) { |
1611 | 209 | DeadInsts.emplace_back(NarrowUse); |
1612 | 209 | return true; |
1613 | 209 | } |
1614 | | |
1615 | | // We'll prove some facts that should be true in the context of ext users. If |
1616 | | // there is no users, we are done now. If there are some, pick their common |
1617 | | // dominator as context. |
1618 | 55 | const Instruction *CtxI = findCommonDominator(ExtUsers, *DT); |
1619 | | |
1620 | 55 | if (!CanSignExtend && !CanZeroExtend) { |
1621 | | // Because InstCombine turns 'sub nuw' to 'add' losing the no-wrap flag, we |
1622 | | // will most likely not see it. Let's try to prove it. |
1623 | 39 | if (OpCode != Instruction::Add) |
1624 | 9 | return false; |
1625 | 30 | if (ExtKind != ExtendKind::Zero) |
1626 | 30 | return false; |
1627 | 0 | const SCEV *LHS = SE->getSCEV(OBO->getOperand(0)); |
1628 | 0 | const SCEV *RHS = SE->getSCEV(OBO->getOperand(1)); |
1629 | | // TODO: Support case for NarrowDef = NarrowUse->getOperand(1). |
1630 | 0 | if (NarrowUse->getOperand(0) != NarrowDef) |
1631 | 0 | return false; |
1632 | 0 | if (!SE->isKnownNegative(RHS)) |
1633 | 0 | return false; |
1634 | 0 | bool ProvedSubNUW = SE->isKnownPredicateAt(ICmpInst::ICMP_UGE, LHS, |
1635 | 0 | SE->getNegativeSCEV(RHS), CtxI); |
1636 | 0 | if (!ProvedSubNUW) |
1637 | 0 | return false; |
1638 | | // In fact, our 'add' is 'sub nuw'. We will need to widen the 2nd operand as |
1639 | | // neg(zext(neg(op))), which is basically sext(op). |
1640 | 0 | AnotherOpExtKind = ExtendKind::Sign; |
1641 | 0 | } |
1642 | | |
1643 | | // Verifying that Defining operand is an AddRec |
1644 | 16 | const SCEV *Op1 = SE->getSCEV(WideDef); |
1645 | 16 | const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1); |
1646 | 16 | if (!AddRecOp1 || AddRecOp1->getLoop() != L) |
1647 | 1 | return false; |
1648 | | |
1649 | 15 | LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); |
1650 | | |
1651 | | // Generating a widening use instruction. |
1652 | 15 | Value *LHS = |
1653 | 15 | (NarrowUse->getOperand(0) == NarrowDef) |
1654 | 15 | ? WideDef |
1655 | 15 | : createExtendInst(NarrowUse->getOperand(0), WideType, |
1656 | 1 | AnotherOpExtKind == ExtendKind::Sign, NarrowUse); |
1657 | 15 | Value *RHS = |
1658 | 15 | (NarrowUse->getOperand(1) == NarrowDef) |
1659 | 15 | ? WideDef |
1660 | 15 | : createExtendInst(NarrowUse->getOperand(1), WideType, |
1661 | 14 | AnotherOpExtKind == ExtendKind::Sign, NarrowUse); |
1662 | | |
1663 | 15 | auto *NarrowBO = cast<BinaryOperator>(NarrowUse); |
1664 | 15 | auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, |
1665 | 15 | NarrowBO->getName()); |
1666 | 15 | IRBuilder<> Builder(NarrowUse); |
1667 | 15 | Builder.Insert(WideBO); |
1668 | 15 | WideBO->copyIRFlags(NarrowBO); |
1669 | 15 | ExtendKindMap[NarrowUse] = ExtKind; |
1670 | | |
1671 | 16 | for (Instruction *User : ExtUsers) { |
1672 | 16 | assert(User->getType() == WideType && "Checked before!"); |
1673 | 16 | LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " |
1674 | 16 | << *WideBO << "\n"); |
1675 | 16 | ++NumElimExt; |
1676 | 16 | User->replaceAllUsesWith(WideBO); |
1677 | 16 | DeadInsts.emplace_back(User); |
1678 | 16 | } |
1679 | | |
1680 | 15 | for (PHINode *User : LCSSAPhiUsers) { |
1681 | 0 | assert(User->getNumOperands() == 1 && "Checked before!"); |
1682 | 0 | Builder.SetInsertPoint(User); |
1683 | 0 | auto *WidePN = |
1684 | 0 | Builder.CreatePHI(WideBO->getType(), 1, User->getName() + ".wide"); |
1685 | 0 | BasicBlock *LoopExitingBlock = User->getParent()->getSinglePredecessor(); |
1686 | 0 | assert(LoopExitingBlock && L->contains(LoopExitingBlock) && |
1687 | 0 | "Not a LCSSA Phi?"); |
1688 | 0 | WidePN->addIncoming(WideBO, LoopExitingBlock); |
1689 | 0 | Builder.SetInsertPoint(User->getParent(), |
1690 | 0 | User->getParent()->getFirstInsertionPt()); |
1691 | 0 | auto *TruncPN = Builder.CreateTrunc(WidePN, User->getType()); |
1692 | 0 | User->replaceAllUsesWith(TruncPN); |
1693 | 0 | DeadInsts.emplace_back(User); |
1694 | 0 | } |
1695 | | |
1696 | 15 | for (ICmpInst *User : ICmpUsers) { |
1697 | 9 | Builder.SetInsertPoint(User); |
1698 | 18 | auto ExtendedOp = [&](Value * V)->Value * { |
1699 | 18 | if (V == NarrowUse) |
1700 | 15 | return WideBO; |
1701 | 3 | if (ExtKind == ExtendKind::Zero) |
1702 | 0 | return Builder.CreateZExt(V, WideBO->getType()); |
1703 | 3 | else |
1704 | 3 | return Builder.CreateSExt(V, WideBO->getType()); |
1705 | 3 | }; |
1706 | 9 | auto Pred = User->getPredicate(); |
1707 | 9 | auto *LHS = ExtendedOp(User->getOperand(0)); |
1708 | 9 | auto *RHS = ExtendedOp(User->getOperand(1)); |
1709 | 9 | auto *WideCmp = |
1710 | 9 | Builder.CreateICmp(Pred, LHS, RHS, User->getName() + ".wide"); |
1711 | 9 | User->replaceAllUsesWith(WideCmp); |
1712 | 9 | DeadInsts.emplace_back(User); |
1713 | 9 | } |
1714 | | |
1715 | 15 | return true; |
1716 | 16 | } |
1717 | | |
1718 | | /// Determine whether an individual user of the narrow IV can be widened. If so, |
1719 | | /// return the wide clone of the user. |
1720 | 9.84k | Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU, SCEVExpander &Rewriter) { |
1721 | 9.84k | assert(ExtendKindMap.count(DU.NarrowDef) && |
1722 | 9.84k | "Should already know the kind of extension used to widen NarrowDef"); |
1723 | | |
1724 | | // Stop traversing the def-use chain at inner-loop phis or post-loop phis. |
1725 | 9.84k | if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) { |
1726 | 315 | if (LI->getLoopFor(UsePhi->getParent()) != L) { |
1727 | | // For LCSSA phis, sink the truncate outside the loop. |
1728 | | // After SimplifyCFG most loop exit targets have a single predecessor. |
1729 | | // Otherwise fall back to a truncate within the loop. |
1730 | 163 | if (UsePhi->getNumOperands() != 1) |
1731 | 7 | truncateIVUse(DU, DT, LI); |
1732 | 156 | else { |
1733 | | // Widening the PHI requires us to insert a trunc. The logical place |
1734 | | // for this trunc is in the same BB as the PHI. This is not possible if |
1735 | | // the BB is terminated by a catchswitch. |
1736 | 156 | if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator())) |
1737 | 0 | return nullptr; |
1738 | | |
1739 | 156 | PHINode *WidePhi = |
1740 | 156 | PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide", |
1741 | 156 | UsePhi); |
1742 | 156 | WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0)); |
1743 | 156 | BasicBlock *WidePhiBB = WidePhi->getParent(); |
1744 | 156 | IRBuilder<> Builder(WidePhiBB, WidePhiBB->getFirstInsertionPt()); |
1745 | 156 | Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType()); |
1746 | 156 | UsePhi->replaceAllUsesWith(Trunc); |
1747 | 156 | DeadInsts.emplace_back(UsePhi); |
1748 | 156 | LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to " |
1749 | 156 | << *WidePhi << "\n"); |
1750 | 156 | } |
1751 | 163 | return nullptr; |
1752 | 163 | } |
1753 | 315 | } |
1754 | | |
1755 | | // This narrow use can be widened by a sext if it's non-negative or its narrow |
1756 | | // def was widended by a sext. Same for zext. |
1757 | 9.68k | auto canWidenBySExt = [&]() { |
1758 | 1.32k | return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ExtendKind::Sign; |
1759 | 1.32k | }; |
1760 | 9.68k | auto canWidenByZExt = [&]() { |
1761 | 277 | return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ExtendKind::Zero; |
1762 | 277 | }; |
1763 | | |
1764 | | // Our raison d'etre! Eliminate sign and zero extension. |
1765 | 9.68k | if ((match(DU.NarrowUse, m_SExtLike(m_Value())) && canWidenBySExt()) || |
1766 | 9.68k | (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) { |
1767 | 1.56k | Value *NewDef = DU.WideDef; |
1768 | 1.56k | if (DU.NarrowUse->getType() != WideType) { |
1769 | 1 | unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType()); |
1770 | 1 | unsigned IVWidth = SE->getTypeSizeInBits(WideType); |
1771 | 1 | if (CastWidth < IVWidth) { |
1772 | | // The cast isn't as wide as the IV, so insert a Trunc. |
1773 | 1 | IRBuilder<> Builder(DU.NarrowUse); |
1774 | 1 | NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType()); |
1775 | 1 | } |
1776 | 0 | else { |
1777 | | // A wider extend was hidden behind a narrower one. This may induce |
1778 | | // another round of IV widening in which the intermediate IV becomes |
1779 | | // dead. It should be very rare. |
1780 | 0 | LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi |
1781 | 0 | << " not wide enough to subsume " << *DU.NarrowUse |
1782 | 0 | << "\n"); |
1783 | 0 | DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); |
1784 | 0 | NewDef = DU.NarrowUse; |
1785 | 0 | } |
1786 | 1 | } |
1787 | 1.56k | if (NewDef != DU.NarrowUse) { |
1788 | 1.56k | LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse |
1789 | 1.56k | << " replaced by " << *DU.WideDef << "\n"); |
1790 | 1.56k | ++NumElimExt; |
1791 | 1.56k | DU.NarrowUse->replaceAllUsesWith(NewDef); |
1792 | 1.56k | DeadInsts.emplace_back(DU.NarrowUse); |
1793 | 1.56k | } |
1794 | | // Now that the extend is gone, we want to expose it's uses for potential |
1795 | | // further simplification. We don't need to directly inform SimplifyIVUsers |
1796 | | // of the new users, because their parent IV will be processed later as a |
1797 | | // new loop phi. If we preserved IVUsers analysis, we would also want to |
1798 | | // push the uses of WideDef here. |
1799 | | |
1800 | | // No further widening is needed. The deceased [sz]ext had done it for us. |
1801 | 1.56k | return nullptr; |
1802 | 1.56k | } |
1803 | | |
1804 | 8.12k | auto tryAddRecExpansion = [&]() -> Instruction* { |
1805 | | // Does this user itself evaluate to a recurrence after widening? |
1806 | 8.12k | WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU); |
1807 | 8.12k | if (!WideAddRec.first) |
1808 | 6.13k | WideAddRec = getWideRecurrence(DU); |
1809 | 8.12k | assert((WideAddRec.first == nullptr) == |
1810 | 8.12k | (WideAddRec.second == ExtendKind::Unknown)); |
1811 | 8.12k | if (!WideAddRec.first) |
1812 | 5.36k | return nullptr; |
1813 | | |
1814 | | // Reuse the IV increment that SCEVExpander created as long as it dominates |
1815 | | // NarrowUse. |
1816 | 2.75k | Instruction *WideUse = nullptr; |
1817 | 2.75k | if (WideAddRec.first == WideIncExpr && |
1818 | 2.75k | Rewriter.hoistIVInc(WideInc, DU.NarrowUse)) |
1819 | 936 | WideUse = WideInc; |
1820 | 1.82k | else { |
1821 | 1.82k | WideUse = cloneIVUser(DU, WideAddRec.first); |
1822 | 1.82k | if (!WideUse) |
1823 | 525 | return nullptr; |
1824 | 1.82k | } |
1825 | | // Evaluation of WideAddRec ensured that the narrow expression could be |
1826 | | // extended outside the loop without overflow. This suggests that the wide use |
1827 | | // evaluates to the same expression as the extended narrow use, but doesn't |
1828 | | // absolutely guarantee it. Hence the following failsafe check. In rare cases |
1829 | | // where it fails, we simply throw away the newly created wide use. |
1830 | 2.23k | if (WideAddRec.first != SE->getSCEV(WideUse)) { |
1831 | 23 | LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": " |
1832 | 23 | << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first |
1833 | 23 | << "\n"); |
1834 | 23 | DeadInsts.emplace_back(WideUse); |
1835 | 23 | return nullptr; |
1836 | 2.20k | }; |
1837 | | |
1838 | | // if we reached this point then we are going to replace |
1839 | | // DU.NarrowUse with WideUse. Reattach DbgValue then. |
1840 | 2.20k | replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT); |
1841 | | |
1842 | 2.20k | ExtendKindMap[DU.NarrowUse] = WideAddRec.second; |
1843 | | // Returning WideUse pushes it on the worklist. |
1844 | 2.20k | return WideUse; |
1845 | 2.23k | }; |
1846 | | |
1847 | 8.12k | if (auto *I = tryAddRecExpansion()) |
1848 | 2.20k | return I; |
1849 | | |
1850 | | // If use is a loop condition, try to promote the condition instead of |
1851 | | // truncating the IV first. |
1852 | 5.91k | if (widenLoopCompare(DU)) |
1853 | 726 | return nullptr; |
1854 | | |
1855 | | // We are here about to generate a truncate instruction that may hurt |
1856 | | // performance because the scalar evolution expression computed earlier |
1857 | | // in WideAddRec.first does not indicate a polynomial induction expression. |
1858 | | // In that case, look at the operands of the use instruction to determine |
1859 | | // if we can still widen the use instead of truncating its operand. |
1860 | 5.18k | if (widenWithVariantUse(DU)) |
1861 | 224 | return nullptr; |
1862 | | |
1863 | | // This user does not evaluate to a recurrence after widening, so don't |
1864 | | // follow it. Instead insert a Trunc to kill off the original use, |
1865 | | // eventually isolating the original narrow IV so it can be removed. |
1866 | 4.96k | truncateIVUse(DU, DT, LI); |
1867 | 4.96k | return nullptr; |
1868 | 5.18k | } |
1869 | | |
1870 | | /// Add eligible users of NarrowDef to NarrowIVUsers. |
1871 | 3.43k | void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { |
1872 | 3.43k | const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef); |
1873 | 3.43k | bool NonNegativeDef = |
1874 | 3.43k | SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV, |
1875 | 3.43k | SE->getZero(NarrowSCEV->getType())); |
1876 | 12.1k | for (User *U : NarrowDef->users()) { |
1877 | 12.1k | Instruction *NarrowUser = cast<Instruction>(U); |
1878 | | |
1879 | | // Handle data flow merges and bizarre phi cycles. |
1880 | 12.1k | if (!Widened.insert(NarrowUser).second) |
1881 | 2.30k | continue; |
1882 | | |
1883 | 9.84k | bool NonNegativeUse = false; |
1884 | 9.84k | if (!NonNegativeDef) { |
1885 | | // We might have a control-dependent range information for this context. |
1886 | 3.66k | if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser)) |
1887 | 14 | NonNegativeUse = RangeInfo->getSignedMin().isNonNegative(); |
1888 | 3.66k | } |
1889 | | |
1890 | 9.84k | NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef, |
1891 | 9.84k | NonNegativeDef || NonNegativeUse); |
1892 | 9.84k | } |
1893 | 3.43k | } |
1894 | | |
1895 | | /// Process a single induction variable. First use the SCEVExpander to create a |
1896 | | /// wide induction variable that evaluates to the same recurrence as the |
1897 | | /// original narrow IV. Then use a worklist to forward traverse the narrow IV's |
1898 | | /// def-use chain. After widenIVUse has processed all interesting IV users, the |
1899 | | /// narrow IV will be isolated for removal by DeleteDeadPHIs. |
1900 | | /// |
1901 | | /// It would be simpler to delete uses as they are processed, but we must avoid |
1902 | | /// invalidating SCEV expressions. |
1903 | 2.02k | PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { |
1904 | | // Is this phi an induction variable? |
1905 | 2.02k | const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi)); |
1906 | 2.02k | if (!AddRec) |
1907 | 258 | return nullptr; |
1908 | | |
1909 | | // Widen the induction variable expression. |
1910 | 1.76k | const SCEV *WideIVExpr = getExtendKind(OrigPhi) == ExtendKind::Sign |
1911 | 1.76k | ? SE->getSignExtendExpr(AddRec, WideType) |
1912 | 1.76k | : SE->getZeroExtendExpr(AddRec, WideType); |
1913 | | |
1914 | 1.76k | assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType && |
1915 | 1.76k | "Expect the new IV expression to preserve its type"); |
1916 | | |
1917 | | // Can the IV be extended outside the loop without overflow? |
1918 | 0 | AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr); |
1919 | 1.76k | if (!AddRec || AddRec->getLoop() != L) |
1920 | 543 | return nullptr; |
1921 | | |
1922 | | // An AddRec must have loop-invariant operands. Since this AddRec is |
1923 | | // materialized by a loop header phi, the expression cannot have any post-loop |
1924 | | // operands, so they must dominate the loop header. |
1925 | 1.22k | assert( |
1926 | 1.22k | SE->properlyDominates(AddRec->getStart(), L->getHeader()) && |
1927 | 1.22k | SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) && |
1928 | 1.22k | "Loop header phi recurrence inputs do not dominate the loop"); |
1929 | | |
1930 | | // Iterate over IV uses (including transitive ones) looking for IV increments |
1931 | | // of the form 'add nsw %iv, <const>'. For each increment and each use of |
1932 | | // the increment calculate control-dependent range information basing on |
1933 | | // dominating conditions inside of the loop (e.g. a range check inside of the |
1934 | | // loop). Calculated ranges are stored in PostIncRangeInfos map. |
1935 | | // |
1936 | | // Control-dependent range information is later used to prove that a narrow |
1937 | | // definition is not negative (see pushNarrowIVUsers). It's difficult to do |
1938 | | // this on demand because when pushNarrowIVUsers needs this information some |
1939 | | // of the dominating conditions might be already widened. |
1940 | 1.22k | if (UsePostIncrementRanges) |
1941 | 1.22k | calculatePostIncRanges(OrigPhi); |
1942 | | |
1943 | | // The rewriter provides a value for the desired IV expression. This may |
1944 | | // either find an existing phi or materialize a new one. Either way, we |
1945 | | // expect a well-formed cyclic phi-with-increments. i.e. any operand not part |
1946 | | // of the phi-SCC dominates the loop entry. |
1947 | 1.22k | Instruction *InsertPt = &*L->getHeader()->getFirstInsertionPt(); |
1948 | 1.22k | Value *ExpandInst = Rewriter.expandCodeFor(AddRec, WideType, InsertPt); |
1949 | | // If the wide phi is not a phi node, for example a cast node, like bitcast, |
1950 | | // inttoptr, ptrtoint, just skip for now. |
1951 | 1.22k | if (!(WidePhi = dyn_cast<PHINode>(ExpandInst))) { |
1952 | | // if the cast node is an inserted instruction without any user, we should |
1953 | | // remove it to make sure the pass don't touch the function as we can not |
1954 | | // wide the phi. |
1955 | 0 | if (ExpandInst->hasNUses(0) && |
1956 | 0 | Rewriter.isInsertedInstruction(cast<Instruction>(ExpandInst))) |
1957 | 0 | DeadInsts.emplace_back(ExpandInst); |
1958 | 0 | return nullptr; |
1959 | 0 | } |
1960 | | |
1961 | | // Remembering the WideIV increment generated by SCEVExpander allows |
1962 | | // widenIVUse to reuse it when widening the narrow IV's increment. We don't |
1963 | | // employ a general reuse mechanism because the call above is the only call to |
1964 | | // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses. |
1965 | 1.22k | if (BasicBlock *LatchBlock = L->getLoopLatch()) { |
1966 | 1.22k | WideInc = |
1967 | 1.22k | dyn_cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock)); |
1968 | 1.22k | if (WideInc) { |
1969 | 1.17k | WideIncExpr = SE->getSCEV(WideInc); |
1970 | | // Propagate the debug location associated with the original loop |
1971 | | // increment to the new (widened) increment. |
1972 | 1.17k | auto *OrigInc = |
1973 | 1.17k | cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock)); |
1974 | 1.17k | WideInc->setDebugLoc(OrigInc->getDebugLoc()); |
1975 | 1.17k | } |
1976 | 1.22k | } |
1977 | | |
1978 | 1.22k | LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n"); |
1979 | 1.22k | ++NumWidened; |
1980 | | |
1981 | | // Traverse the def-use chain using a worklist starting at the original IV. |
1982 | 1.22k | assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" ); |
1983 | | |
1984 | 0 | Widened.insert(OrigPhi); |
1985 | 1.22k | pushNarrowIVUsers(OrigPhi, WidePhi); |
1986 | | |
1987 | 11.0k | while (!NarrowIVUsers.empty()) { |
1988 | 9.84k | WidenIV::NarrowIVDefUse DU = NarrowIVUsers.pop_back_val(); |
1989 | | |
1990 | | // Process a def-use edge. This may replace the use, so don't hold a |
1991 | | // use_iterator across it. |
1992 | 9.84k | Instruction *WideUse = widenIVUse(DU, Rewriter); |
1993 | | |
1994 | | // Follow all def-use edges from the previous narrow use. |
1995 | 9.84k | if (WideUse) |
1996 | 2.20k | pushNarrowIVUsers(DU.NarrowUse, WideUse); |
1997 | | |
1998 | | // widenIVUse may have removed the def-use edge. |
1999 | 9.84k | if (DU.NarrowDef->use_empty()) |
2000 | 386 | DeadInsts.emplace_back(DU.NarrowDef); |
2001 | 9.84k | } |
2002 | | |
2003 | | // Attach any debug information to the new PHI. |
2004 | 1.22k | replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT); |
2005 | | |
2006 | 1.22k | return WidePhi; |
2007 | 1.22k | } |
2008 | | |
2009 | | /// Calculates control-dependent range for the given def at the given context |
2010 | | /// by looking at dominating conditions inside of the loop |
2011 | | void WidenIV::calculatePostIncRange(Instruction *NarrowDef, |
2012 | 34.4k | Instruction *NarrowUser) { |
2013 | 34.4k | Value *NarrowDefLHS; |
2014 | 34.4k | const APInt *NarrowDefRHS; |
2015 | 34.4k | if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS), |
2016 | 34.4k | m_APInt(NarrowDefRHS))) || |
2017 | 34.4k | !NarrowDefRHS->isNonNegative()) |
2018 | 32.8k | return; |
2019 | | |
2020 | 1.60k | auto UpdateRangeFromCondition = [&] (Value *Condition, |
2021 | 5.75k | bool TrueDest) { |
2022 | 5.75k | CmpInst::Predicate Pred; |
2023 | 5.75k | Value *CmpRHS; |
2024 | 5.75k | if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS), |
2025 | 5.75k | m_Value(CmpRHS)))) |
2026 | 5.72k | return; |
2027 | | |
2028 | 24 | CmpInst::Predicate P = |
2029 | 24 | TrueDest ? Pred : CmpInst::getInversePredicate(Pred); |
2030 | | |
2031 | 24 | auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS)); |
2032 | 24 | auto CmpConstrainedLHSRange = |
2033 | 24 | ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange); |
2034 | 24 | auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap( |
2035 | 24 | *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap); |
2036 | | |
2037 | 24 | updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange); |
2038 | 24 | }; |
2039 | | |
2040 | 11.6k | auto UpdateRangeFromGuards = [&](Instruction *Ctx) { |
2041 | 11.6k | if (!HasGuards) |
2042 | 11.6k | return; |
2043 | | |
2044 | 49 | for (Instruction &I : make_range(Ctx->getIterator().getReverse(), |
2045 | 526 | Ctx->getParent()->rend())) { |
2046 | 526 | Value *C = nullptr; |
2047 | 526 | if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C)))) |
2048 | 23 | UpdateRangeFromCondition(C, /*TrueDest=*/true); |
2049 | 526 | } |
2050 | 49 | }; |
2051 | | |
2052 | 1.60k | UpdateRangeFromGuards(NarrowUser); |
2053 | | |
2054 | 1.60k | BasicBlock *NarrowUserBB = NarrowUser->getParent(); |
2055 | | // If NarrowUserBB is statically unreachable asking dominator queries may |
2056 | | // yield surprising results. (e.g. the block may not have a dom tree node) |
2057 | 1.60k | if (!DT->isReachableFromEntry(NarrowUserBB)) |
2058 | 0 | return; |
2059 | | |
2060 | 1.60k | for (auto *DTB = (*DT)[NarrowUserBB]->getIDom(); |
2061 | 11.6k | L->contains(DTB->getBlock()); |
2062 | 10.0k | DTB = DTB->getIDom()) { |
2063 | 10.0k | auto *BB = DTB->getBlock(); |
2064 | 10.0k | auto *TI = BB->getTerminator(); |
2065 | 10.0k | UpdateRangeFromGuards(TI); |
2066 | | |
2067 | 10.0k | auto *BI = dyn_cast<BranchInst>(TI); |
2068 | 10.0k | if (!BI || !BI->isConditional()) |
2069 | 4.35k | continue; |
2070 | | |
2071 | 5.73k | auto *TrueSuccessor = BI->getSuccessor(0); |
2072 | 5.73k | auto *FalseSuccessor = BI->getSuccessor(1); |
2073 | | |
2074 | 11.4k | auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) { |
2075 | 11.4k | return BBE.isSingleEdge() && |
2076 | 11.4k | DT->dominates(BBE, NarrowUser->getParent()); |
2077 | 11.4k | }; |
2078 | | |
2079 | 5.73k | if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor))) |
2080 | 1.65k | UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true); |
2081 | | |
2082 | 5.73k | if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor))) |
2083 | 4.07k | UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false); |
2084 | 5.73k | } |
2085 | 1.60k | } |
2086 | | |
2087 | | /// Calculates PostIncRangeInfos map for the given IV |
2088 | 1.22k | void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) { |
2089 | 1.22k | SmallPtrSet<Instruction *, 16> Visited; |
2090 | 1.22k | SmallVector<Instruction *, 6> Worklist; |
2091 | 1.22k | Worklist.push_back(OrigPhi); |
2092 | 1.22k | Visited.insert(OrigPhi); |
2093 | | |
2094 | 36.8k | while (!Worklist.empty()) { |
2095 | 35.6k | Instruction *NarrowDef = Worklist.pop_back_val(); |
2096 | | |
2097 | 46.1k | for (Use &U : NarrowDef->uses()) { |
2098 | 46.1k | auto *NarrowUser = cast<Instruction>(U.getUser()); |
2099 | | |
2100 | | // Don't go looking outside the current loop. |
2101 | 46.1k | auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()]; |
2102 | 46.1k | if (!NarrowUserLoop || !L->contains(NarrowUserLoop)) |
2103 | 285 | continue; |
2104 | | |
2105 | 45.8k | if (!Visited.insert(NarrowUser).second) |
2106 | 11.4k | continue; |
2107 | | |
2108 | 34.4k | Worklist.push_back(NarrowUser); |
2109 | | |
2110 | 34.4k | calculatePostIncRange(NarrowDef, NarrowUser); |
2111 | 34.4k | } |
2112 | 35.6k | } |
2113 | 1.22k | } |
2114 | | |
2115 | | PHINode *llvm::createWideIV(const WideIVInfo &WI, |
2116 | | LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter, |
2117 | | DominatorTree *DT, SmallVectorImpl<WeakTrackingVH> &DeadInsts, |
2118 | | unsigned &NumElimExt, unsigned &NumWidened, |
2119 | 2.02k | bool HasGuards, bool UsePostIncrementRanges) { |
2120 | 2.02k | WidenIV Widener(WI, LI, SE, DT, DeadInsts, HasGuards, UsePostIncrementRanges); |
2121 | 2.02k | PHINode *WidePHI = Widener.createWideIV(Rewriter); |
2122 | 2.02k | NumElimExt = Widener.getNumElimExt(); |
2123 | 2.02k | NumWidened = Widener.getNumWidened(); |
2124 | 2.02k | return WidePHI; |
2125 | 2.02k | } |