Coverage Report

Created: 2024-01-17 10:31

/src/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- SimplifyIndVar.cpp - Induction variable simplification ------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements induction variable simplification. It does
10
// not define any actual pass or policy, but provides a single function to
11
// simplify a loop's induction variables based on ScalarEvolution.
12
//
13
//===----------------------------------------------------------------------===//
14
15
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
16
#include "llvm/ADT/SmallVector.h"
17
#include "llvm/ADT/Statistic.h"
18
#include "llvm/Analysis/LoopInfo.h"
19
#include "llvm/IR/Dominators.h"
20
#include "llvm/IR/IRBuilder.h"
21
#include "llvm/IR/Instructions.h"
22
#include "llvm/IR/IntrinsicInst.h"
23
#include "llvm/IR/PatternMatch.h"
24
#include "llvm/Support/Debug.h"
25
#include "llvm/Support/raw_ostream.h"
26
#include "llvm/Transforms/Utils/Local.h"
27
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
28
29
using namespace llvm;
30
using namespace llvm::PatternMatch;
31
32
0
#define DEBUG_TYPE "indvars"
33
34
STATISTIC(NumElimIdentity, "Number of IV identities eliminated");
35
STATISTIC(NumElimOperand,  "Number of IV operands folded into a use");
36
STATISTIC(NumFoldedUser, "Number of IV users folded into a constant");
37
STATISTIC(NumElimRem     , "Number of IV remainder operations eliminated");
38
STATISTIC(
39
    NumSimplifiedSDiv,
40
    "Number of IV signed division operations converted to unsigned division");
41
STATISTIC(
42
    NumSimplifiedSRem,
43
    "Number of IV signed remainder operations converted to unsigned remainder");
44
STATISTIC(NumElimCmp     , "Number of IV comparisons eliminated");
45
46
namespace {
47
  /// This is a utility for simplifying induction variables
48
  /// based on ScalarEvolution. It is the primary instrument of the
49
  /// IndvarSimplify pass, but it may also be directly invoked to cleanup after
50
  /// other loop passes that preserve SCEV.
51
  class SimplifyIndvar {
52
    Loop             *L;
53
    LoopInfo         *LI;
54
    ScalarEvolution  *SE;
55
    DominatorTree    *DT;
56
    const TargetTransformInfo *TTI;
57
    SCEVExpander     &Rewriter;
58
    SmallVectorImpl<WeakTrackingVH> &DeadInsts;
59
60
    bool Changed = false;
61
62
  public:
63
    SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT,
64
                   LoopInfo *LI, const TargetTransformInfo *TTI,
65
                   SCEVExpander &Rewriter,
66
                   SmallVectorImpl<WeakTrackingVH> &Dead)
67
        : L(Loop), LI(LI), SE(SE), DT(DT), TTI(TTI), Rewriter(Rewriter),
68
76.3k
          DeadInsts(Dead) {
69
76.3k
      assert(LI && "IV simplification requires LoopInfo");
70
76.3k
    }
71
72
76.3k
    bool hasChanged() const { return Changed; }
73
74
    /// Iteratively perform simplification on a worklist of users of the
75
    /// specified induction variable. This is the top-level driver that applies
76
    /// all simplifications to users of an IV.
77
    void simplifyUsers(PHINode *CurrIV, IVVisitor *V = nullptr);
78
79
    Value *foldIVUser(Instruction *UseInst, Instruction *IVOperand);
80
81
    bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand);
82
    bool replaceIVUserWithLoopInvariant(Instruction *UseInst);
83
    bool replaceFloatIVWithIntegerIV(Instruction *UseInst);
84
85
    bool eliminateOverflowIntrinsic(WithOverflowInst *WO);
86
    bool eliminateSaturatingIntrinsic(SaturatingInst *SI);
87
    bool eliminateTrunc(TruncInst *TI);
88
    bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
89
    bool makeIVComparisonInvariant(ICmpInst *ICmp, Instruction *IVOperand);
90
    void eliminateIVComparison(ICmpInst *ICmp, Instruction *IVOperand);
91
    void simplifyIVRemainder(BinaryOperator *Rem, Instruction *IVOperand,
92
                             bool IsSigned);
93
    void replaceRemWithNumerator(BinaryOperator *Rem);
94
    void replaceRemWithNumeratorOrZero(BinaryOperator *Rem);
95
    void replaceSRemWithURem(BinaryOperator *Rem);
96
    bool eliminateSDiv(BinaryOperator *SDiv);
97
    bool strengthenBinaryOp(BinaryOperator *BO, Instruction *IVOperand);
98
    bool strengthenOverflowingOperation(BinaryOperator *OBO,
99
                                        Instruction *IVOperand);
100
    bool strengthenRightShift(BinaryOperator *BO, Instruction *IVOperand);
101
  };
102
}
103
104
/// Find a point in code which dominates all given instructions. We can safely
105
/// assume that, whatever fact we can prove at the found point, this fact is
106
/// also true for each of the given instructions.
107
static Instruction *findCommonDominator(ArrayRef<Instruction *> Instructions,
108
6.66k
                                        DominatorTree &DT) {
109
6.66k
  Instruction *CommonDom = nullptr;
110
6.66k
  for (auto *Insn : Instructions)
111
8.84k
    CommonDom =
112
8.84k
        CommonDom ? DT.findNearestCommonDominator(CommonDom, Insn) : Insn;
113
6.66k
  assert(CommonDom && "Common dominator not found?");
114
0
  return CommonDom;
115
6.66k
}
116
117
/// Fold an IV operand into its use.  This removes increments of an
118
/// aligned IV when used by a instruction that ignores the low bits.
119
///
120
/// IVOperand is guaranteed SCEVable, but UseInst may not be.
121
///
122
/// Return the operand of IVOperand for this induction variable if IVOperand can
123
/// be folded (in case more folding opportunities have been exposed).
124
/// Otherwise return null.
125
423k
Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) {
126
423k
  Value *IVSrc = nullptr;
127
423k
  const unsigned OperIdx = 0;
128
423k
  const SCEV *FoldedExpr = nullptr;
129
423k
  bool MustDropExactFlag = false;
130
423k
  switch (UseInst->getOpcode()) {
131
422k
  default:
132
422k
    return nullptr;
133
1.02k
  case Instruction::UDiv:
134
1.52k
  case Instruction::LShr:
135
    // We're only interested in the case where we know something about
136
    // the numerator and have a constant denominator.
137
1.52k
    if (IVOperand != UseInst->getOperand(OperIdx) ||
138
1.52k
        !isa<ConstantInt>(UseInst->getOperand(1)))
139
1.09k
      return nullptr;
140
141
    // Attempt to fold a binary operator with constant operand.
142
    // e.g. ((I + 1) >> 2) => I >> 2
143
435
    if (!isa<BinaryOperator>(IVOperand)
144
435
        || !isa<ConstantInt>(IVOperand->getOperand(1)))
145
313
      return nullptr;
146
147
122
    IVSrc = IVOperand->getOperand(0);
148
    // IVSrc must be the (SCEVable) IV, since the other operand is const.
149
122
    assert(SE->isSCEVable(IVSrc->getType()) && "Expect SCEVable IV operand");
150
151
0
    ConstantInt *D = cast<ConstantInt>(UseInst->getOperand(1));
152
122
    if (UseInst->getOpcode() == Instruction::LShr) {
153
      // Get a constant for the divisor. See createSCEV.
154
26
      uint32_t BitWidth = cast<IntegerType>(UseInst->getType())->getBitWidth();
155
26
      if (D->getValue().uge(BitWidth))
156
4
        return nullptr;
157
158
22
      D = ConstantInt::get(UseInst->getContext(),
159
22
                           APInt::getOneBitSet(BitWidth, D->getZExtValue()));
160
22
    }
161
118
    const auto *LHS = SE->getSCEV(IVSrc);
162
118
    const auto *RHS = SE->getSCEV(D);
163
118
    FoldedExpr = SE->getUDivExpr(LHS, RHS);
164
    // We might have 'exact' flag set at this point which will no longer be
165
    // correct after we make the replacement.
166
118
    if (UseInst->isExact() && LHS != SE->getMulExpr(FoldedExpr, RHS))
167
24
      MustDropExactFlag = true;
168
423k
  }
169
  // We have something that might fold it's operand. Compare SCEVs.
170
118
  if (!SE->isSCEVable(UseInst->getType()))
171
0
    return nullptr;
172
173
  // Bypass the operand if SCEV can prove it has no effect.
174
118
  if (SE->getSCEV(UseInst) != FoldedExpr)
175
118
    return nullptr;
176
177
0
  LLVM_DEBUG(dbgs() << "INDVARS: Eliminated IV operand: " << *IVOperand
178
0
                    << " -> " << *UseInst << '\n');
179
180
0
  UseInst->setOperand(OperIdx, IVSrc);
181
0
  assert(SE->getSCEV(UseInst) == FoldedExpr && "bad SCEV with folded oper");
182
183
0
  if (MustDropExactFlag)
184
0
    UseInst->dropPoisonGeneratingFlags();
185
186
0
  ++NumElimOperand;
187
0
  Changed = true;
188
0
  if (IVOperand->use_empty())
189
0
    DeadInsts.emplace_back(IVOperand);
190
0
  return IVSrc;
191
118
}
192
193
bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp,
194
3.70k
                                               Instruction *IVOperand) {
195
3.70k
  auto *Preheader = L->getLoopPreheader();
196
3.70k
  if (!Preheader)
197
0
    return false;
198
3.70k
  unsigned IVOperIdx = 0;
199
3.70k
  ICmpInst::Predicate Pred = ICmp->getPredicate();
200
3.70k
  if (IVOperand != ICmp->getOperand(0)) {
201
    // Swapped
202
988
    assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand");
203
0
    IVOperIdx = 1;
204
988
    Pred = ICmpInst::getSwappedPredicate(Pred);
205
988
  }
206
207
  // Get the SCEVs for the ICmp operands (in the specific context of the
208
  // current loop)
209
0
  const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent());
210
3.70k
  const SCEV *S = SE->getSCEVAtScope(ICmp->getOperand(IVOperIdx), ICmpLoop);
211
3.70k
  const SCEV *X = SE->getSCEVAtScope(ICmp->getOperand(1 - IVOperIdx), ICmpLoop);
212
3.70k
  auto LIP = SE->getLoopInvariantPredicate(Pred, S, X, L, ICmp);
213
3.70k
  if (!LIP)
214
3.57k
    return false;
215
131
  ICmpInst::Predicate InvariantPredicate = LIP->Pred;
216
131
  const SCEV *InvariantLHS = LIP->LHS;
217
131
  const SCEV *InvariantRHS = LIP->RHS;
218
219
  // Do not generate something ridiculous.
220
131
  auto *PHTerm = Preheader->getTerminator();
221
131
  if (Rewriter.isHighCostExpansion({InvariantLHS, InvariantRHS}, L,
222
131
                                   2 * SCEVCheapExpansionBudget, TTI, PHTerm) ||
223
131
      !Rewriter.isSafeToExpandAt(InvariantLHS, PHTerm) ||
224
131
      !Rewriter.isSafeToExpandAt(InvariantRHS, PHTerm))
225
0
    return false;
226
131
  auto *NewLHS =
227
131
      Rewriter.expandCodeFor(InvariantLHS, IVOperand->getType(), PHTerm);
228
131
  auto *NewRHS =
229
131
      Rewriter.expandCodeFor(InvariantRHS, IVOperand->getType(), PHTerm);
230
131
  LLVM_DEBUG(dbgs() << "INDVARS: Simplified comparison: " << *ICmp << '\n');
231
131
  ICmp->setPredicate(InvariantPredicate);
232
131
  ICmp->setOperand(0, NewLHS);
233
131
  ICmp->setOperand(1, NewRHS);
234
131
  return true;
235
131
}
236
237
/// SimplifyIVUsers helper for eliminating useless
238
/// comparisons against an induction variable.
239
void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp,
240
6.61k
                                           Instruction *IVOperand) {
241
6.61k
  unsigned IVOperIdx = 0;
242
6.61k
  ICmpInst::Predicate Pred = ICmp->getPredicate();
243
6.61k
  ICmpInst::Predicate OriginalPred = Pred;
244
6.61k
  if (IVOperand != ICmp->getOperand(0)) {
245
    // Swapped
246
1.70k
    assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand");
247
0
    IVOperIdx = 1;
248
1.70k
    Pred = ICmpInst::getSwappedPredicate(Pred);
249
1.70k
  }
250
251
  // Get the SCEVs for the ICmp operands (in the specific context of the
252
  // current loop)
253
0
  const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent());
254
6.61k
  const SCEV *S = SE->getSCEVAtScope(ICmp->getOperand(IVOperIdx), ICmpLoop);
255
6.61k
  const SCEV *X = SE->getSCEVAtScope(ICmp->getOperand(1 - IVOperIdx), ICmpLoop);
256
257
  // If the condition is always true or always false in the given context,
258
  // replace it with a constant value.
259
6.61k
  SmallVector<Instruction *, 4> Users;
260
6.61k
  for (auto *U : ICmp->users())
261
8.78k
    Users.push_back(cast<Instruction>(U));
262
6.61k
  const Instruction *CtxI = findCommonDominator(Users, *DT);
263
6.61k
  if (auto Ev = SE->evaluatePredicateAt(Pred, S, X, CtxI)) {
264
2.90k
    SE->forgetValue(ICmp);
265
2.90k
    ICmp->replaceAllUsesWith(ConstantInt::getBool(ICmp->getContext(), *Ev));
266
2.90k
    DeadInsts.emplace_back(ICmp);
267
2.90k
    LLVM_DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
268
3.70k
  } else if (makeIVComparisonInvariant(ICmp, IVOperand)) {
269
    // fallthrough to end of function
270
3.57k
  } else if (ICmpInst::isSigned(OriginalPred) &&
271
3.57k
             SE->isKnownNonNegative(S) && SE->isKnownNonNegative(X)) {
272
    // If we were unable to make anything above, all we can is to canonicalize
273
    // the comparison hoping that it will open the doors for other
274
    // optimizations. If we find out that we compare two non-negative values,
275
    // we turn the instruction's predicate to its unsigned version. Note that
276
    // we cannot rely on Pred here unless we check if we have swapped it.
277
40
    assert(ICmp->getPredicate() == OriginalPred && "Predicate changed?");
278
40
    LLVM_DEBUG(dbgs() << "INDVARS: Turn to unsigned comparison: " << *ICmp
279
40
                      << '\n');
280
40
    ICmp->setPredicate(ICmpInst::getUnsignedPredicate(OriginalPred));
281
40
  } else
282
3.53k
    return;
283
284
3.08k
  ++NumElimCmp;
285
3.08k
  Changed = true;
286
3.08k
}
287
288
436
bool SimplifyIndvar::eliminateSDiv(BinaryOperator *SDiv) {
289
  // Get the SCEVs for the ICmp operands.
290
436
  auto *N = SE->getSCEV(SDiv->getOperand(0));
291
436
  auto *D = SE->getSCEV(SDiv->getOperand(1));
292
293
  // Simplify unnecessary loops away.
294
436
  const Loop *L = LI->getLoopFor(SDiv->getParent());
295
436
  N = SE->getSCEVAtScope(N, L);
296
436
  D = SE->getSCEVAtScope(D, L);
297
298
  // Replace sdiv by udiv if both of the operands are non-negative
299
436
  if (SE->isKnownNonNegative(N) && SE->isKnownNonNegative(D)) {
300
67
    auto *UDiv = BinaryOperator::Create(
301
67
        BinaryOperator::UDiv, SDiv->getOperand(0), SDiv->getOperand(1),
302
67
        SDiv->getName() + ".udiv", SDiv);
303
67
    UDiv->setIsExact(SDiv->isExact());
304
67
    SDiv->replaceAllUsesWith(UDiv);
305
67
    LLVM_DEBUG(dbgs() << "INDVARS: Simplified sdiv: " << *SDiv << '\n');
306
67
    ++NumSimplifiedSDiv;
307
67
    Changed = true;
308
67
    DeadInsts.push_back(SDiv);
309
67
    return true;
310
67
  }
311
312
369
  return false;
313
436
}
314
315
// i %s n -> i %u n if i >= 0 and n >= 0
316
35
void SimplifyIndvar::replaceSRemWithURem(BinaryOperator *Rem) {
317
35
  auto *N = Rem->getOperand(0), *D = Rem->getOperand(1);
318
35
  auto *URem = BinaryOperator::Create(BinaryOperator::URem, N, D,
319
35
                                      Rem->getName() + ".urem", Rem);
320
35
  Rem->replaceAllUsesWith(URem);
321
35
  LLVM_DEBUG(dbgs() << "INDVARS: Simplified srem: " << *Rem << '\n');
322
35
  ++NumSimplifiedSRem;
323
35
  Changed = true;
324
35
  DeadInsts.emplace_back(Rem);
325
35
}
326
327
// i % n  -->  i  if i is in [0,n).
328
132
void SimplifyIndvar::replaceRemWithNumerator(BinaryOperator *Rem) {
329
132
  Rem->replaceAllUsesWith(Rem->getOperand(0));
330
132
  LLVM_DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
331
132
  ++NumElimRem;
332
132
  Changed = true;
333
132
  DeadInsts.emplace_back(Rem);
334
132
}
335
336
// (i+1) % n  -->  (i+1)==n?0:(i+1)  if i is in [0,n).
337
79
void SimplifyIndvar::replaceRemWithNumeratorOrZero(BinaryOperator *Rem) {
338
79
  auto *T = Rem->getType();
339
79
  auto *N = Rem->getOperand(0), *D = Rem->getOperand(1);
340
79
  ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ, N, D);
341
79
  SelectInst *Sel =
342
79
      SelectInst::Create(ICmp, ConstantInt::get(T, 0), N, "iv.rem", Rem);
343
79
  Rem->replaceAllUsesWith(Sel);
344
79
  LLVM_DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
345
79
  ++NumElimRem;
346
79
  Changed = true;
347
79
  DeadInsts.emplace_back(Rem);
348
79
}
349
350
/// SimplifyIVUsers helper for eliminating useless remainder operations
351
/// operating on an induction variable or replacing srem by urem.
352
void SimplifyIndvar::simplifyIVRemainder(BinaryOperator *Rem,
353
                                         Instruction *IVOperand,
354
1.35k
                                         bool IsSigned) {
355
1.35k
  auto *NValue = Rem->getOperand(0);
356
1.35k
  auto *DValue = Rem->getOperand(1);
357
  // We're only interested in the case where we know something about
358
  // the numerator, unless it is a srem, because we want to replace srem by urem
359
  // in general.
360
1.35k
  bool UsedAsNumerator = IVOperand == NValue;
361
1.35k
  if (!UsedAsNumerator && !IsSigned)
362
196
    return;
363
364
1.15k
  const SCEV *N = SE->getSCEV(NValue);
365
366
  // Simplify unnecessary loops away.
367
1.15k
  const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent());
368
1.15k
  N = SE->getSCEVAtScope(N, ICmpLoop);
369
370
1.15k
  bool IsNumeratorNonNegative = !IsSigned || SE->isKnownNonNegative(N);
371
372
  // Do not proceed if the Numerator may be negative
373
1.15k
  if (!IsNumeratorNonNegative)
374
320
    return;
375
376
835
  const SCEV *D = SE->getSCEV(DValue);
377
835
  D = SE->getSCEVAtScope(D, ICmpLoop);
378
379
835
  if (UsedAsNumerator) {
380
810
    auto LT = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
381
810
    if (SE->isKnownPredicate(LT, N, D)) {
382
132
      replaceRemWithNumerator(Rem);
383
132
      return;
384
132
    }
385
386
678
    auto *T = Rem->getType();
387
678
    const auto *NLessOne = SE->getMinusSCEV(N, SE->getOne(T));
388
678
    if (SE->isKnownPredicate(LT, NLessOne, D)) {
389
79
      replaceRemWithNumeratorOrZero(Rem);
390
79
      return;
391
79
    }
392
678
  }
393
394
  // Try to replace SRem with URem, if both N and D are known non-negative.
395
  // Since we had already check N, we only need to check D now
396
624
  if (!IsSigned || !SE->isKnownNonNegative(D))
397
589
    return;
398
399
35
  replaceSRemWithURem(Rem);
400
35
}
401
402
0
bool SimplifyIndvar::eliminateOverflowIntrinsic(WithOverflowInst *WO) {
403
0
  const SCEV *LHS = SE->getSCEV(WO->getLHS());
404
0
  const SCEV *RHS = SE->getSCEV(WO->getRHS());
405
0
  if (!SE->willNotOverflow(WO->getBinaryOp(), WO->isSigned(), LHS, RHS))
406
0
    return false;
407
408
  // Proved no overflow, nuke the overflow check and, if possible, the overflow
409
  // intrinsic as well.
410
411
0
  BinaryOperator *NewResult = BinaryOperator::Create(
412
0
      WO->getBinaryOp(), WO->getLHS(), WO->getRHS(), "", WO);
413
414
0
  if (WO->isSigned())
415
0
    NewResult->setHasNoSignedWrap(true);
416
0
  else
417
0
    NewResult->setHasNoUnsignedWrap(true);
418
419
0
  SmallVector<ExtractValueInst *, 4> ToDelete;
420
421
0
  for (auto *U : WO->users()) {
422
0
    if (auto *EVI = dyn_cast<ExtractValueInst>(U)) {
423
0
      if (EVI->getIndices()[0] == 1)
424
0
        EVI->replaceAllUsesWith(ConstantInt::getFalse(WO->getContext()));
425
0
      else {
426
0
        assert(EVI->getIndices()[0] == 0 && "Only two possibilities!");
427
0
        EVI->replaceAllUsesWith(NewResult);
428
0
      }
429
0
      ToDelete.push_back(EVI);
430
0
    }
431
0
  }
432
433
0
  for (auto *EVI : ToDelete)
434
0
    EVI->eraseFromParent();
435
436
0
  if (WO->use_empty())
437
0
    WO->eraseFromParent();
438
439
0
  Changed = true;
440
0
  return true;
441
0
}
442
443
0
bool SimplifyIndvar::eliminateSaturatingIntrinsic(SaturatingInst *SI) {
444
0
  const SCEV *LHS = SE->getSCEV(SI->getLHS());
445
0
  const SCEV *RHS = SE->getSCEV(SI->getRHS());
446
0
  if (!SE->willNotOverflow(SI->getBinaryOp(), SI->isSigned(), LHS, RHS))
447
0
    return false;
448
449
0
  BinaryOperator *BO = BinaryOperator::Create(
450
0
      SI->getBinaryOp(), SI->getLHS(), SI->getRHS(), SI->getName(), SI);
451
0
  if (SI->isSigned())
452
0
    BO->setHasNoSignedWrap();
453
0
  else
454
0
    BO->setHasNoUnsignedWrap();
455
456
0
  SI->replaceAllUsesWith(BO);
457
0
  DeadInsts.emplace_back(SI);
458
0
  Changed = true;
459
0
  return true;
460
0
}
461
462
5.75k
bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) {
463
  // It is always legal to replace
464
  //   icmp <pred> i32 trunc(iv), n
465
  // with
466
  //   icmp <pred> i64 sext(trunc(iv)), sext(n), if pred is signed predicate.
467
  // Or with
468
  //   icmp <pred> i64 zext(trunc(iv)), zext(n), if pred is unsigned predicate.
469
  // Or with either of these if pred is an equality predicate.
470
  //
471
  // If we can prove that iv == sext(trunc(iv)) or iv == zext(trunc(iv)) for
472
  // every comparison which uses trunc, it means that we can replace each of
473
  // them with comparison of iv against sext/zext(n). We no longer need trunc
474
  // after that.
475
  //
476
  // TODO: Should we do this if we can widen *some* comparisons, but not all
477
  // of them? Sometimes it is enough to enable other optimizations, but the
478
  // trunc instruction will stay in the loop.
479
5.75k
  Value *IV = TI->getOperand(0);
480
5.75k
  Type *IVTy = IV->getType();
481
5.75k
  const SCEV *IVSCEV = SE->getSCEV(IV);
482
5.75k
  const SCEV *TISCEV = SE->getSCEV(TI);
483
484
  // Check if iv == zext(trunc(iv)) and if iv == sext(trunc(iv)). If so, we can
485
  // get rid of trunc
486
5.75k
  bool DoesSExtCollapse = false;
487
5.75k
  bool DoesZExtCollapse = false;
488
5.75k
  if (IVSCEV == SE->getSignExtendExpr(TISCEV, IVTy))
489
4.00k
    DoesSExtCollapse = true;
490
5.75k
  if (IVSCEV == SE->getZeroExtendExpr(TISCEV, IVTy))
491
3.45k
    DoesZExtCollapse = true;
492
493
  // If neither sext nor zext does collapse, it is not profitable to do any
494
  // transform. Bail.
495
5.75k
  if (!DoesSExtCollapse && !DoesZExtCollapse)
496
1.01k
    return false;
497
498
  // Collect users of the trunc that look like comparisons against invariants.
499
  // Bail if we find something different.
500
4.73k
  SmallVector<ICmpInst *, 4> ICmpUsers;
501
4.75k
  for (auto *U : TI->users()) {
502
    // We don't care about users in unreachable blocks.
503
4.75k
    if (isa<Instruction>(U) &&
504
4.75k
        !DT->isReachableFromEntry(cast<Instruction>(U)->getParent()))
505
0
      continue;
506
4.75k
    ICmpInst *ICI = dyn_cast<ICmpInst>(U);
507
4.75k
    if (!ICI) return false;
508
556
    assert(L->contains(ICI->getParent()) && "LCSSA form broken?");
509
556
    if (!(ICI->getOperand(0) == TI && L->isLoopInvariant(ICI->getOperand(1))) &&
510
556
        !(ICI->getOperand(1) == TI && L->isLoopInvariant(ICI->getOperand(0))))
511
211
      return false;
512
    // If we cannot get rid of trunc, bail.
513
345
    if (ICI->isSigned() && !DoesSExtCollapse)
514
7
      return false;
515
338
    if (ICI->isUnsigned() && !DoesZExtCollapse)
516
23
      return false;
517
    // For equality, either signed or unsigned works.
518
315
    ICmpUsers.push_back(ICI);
519
315
  }
520
521
303
  auto CanUseZExt = [&](ICmpInst *ICI) {
522
    // Unsigned comparison can be widened as unsigned.
523
303
    if (ICI->isUnsigned())
524
65
      return true;
525
    // Is it profitable to do zext?
526
238
    if (!DoesZExtCollapse)
527
15
      return false;
528
    // For equality, we can safely zext both parts.
529
223
    if (ICI->isEquality())
530
219
      return true;
531
    // Otherwise we can only use zext when comparing two non-negative or two
532
    // negative values. But in practice, we will never pass DoesZExtCollapse
533
    // check for a negative value, because zext(trunc(x)) is non-negative. So
534
    // it only make sense to check for non-negativity here.
535
4
    const SCEV *SCEVOP1 = SE->getSCEV(ICI->getOperand(0));
536
4
    const SCEV *SCEVOP2 = SE->getSCEV(ICI->getOperand(1));
537
4
    return SE->isKnownNonNegative(SCEVOP1) && SE->isKnownNonNegative(SCEVOP2);
538
223
  };
539
  // Replace all comparisons against trunc with comparisons against IV.
540
303
  for (auto *ICI : ICmpUsers) {
541
303
    bool IsSwapped = L->isLoopInvariant(ICI->getOperand(0));
542
303
    auto *Op1 = IsSwapped ? ICI->getOperand(0) : ICI->getOperand(1);
543
303
    IRBuilder<> Builder(ICI);
544
303
    Value *Ext = nullptr;
545
    // For signed/unsigned predicate, replace the old comparison with comparison
546
    // of immediate IV against sext/zext of the invariant argument. If we can
547
    // use either sext or zext (i.e. we are dealing with equality predicate),
548
    // then prefer zext as a more canonical form.
549
    // TODO: If we see a signed comparison which can be turned into unsigned,
550
    // we can do it here for canonicalization purposes.
551
303
    ICmpInst::Predicate Pred = ICI->getPredicate();
552
303
    if (IsSwapped) Pred = ICmpInst::getSwappedPredicate(Pred);
553
303
    if (CanUseZExt(ICI)) {
554
285
      assert(DoesZExtCollapse && "Unprofitable zext?");
555
0
      Ext = Builder.CreateZExt(Op1, IVTy, "zext");
556
285
      Pred = ICmpInst::getUnsignedPredicate(Pred);
557
285
    } else {
558
18
      assert(DoesSExtCollapse && "Unprofitable sext?");
559
0
      Ext = Builder.CreateSExt(Op1, IVTy, "sext");
560
18
      assert(Pred == ICmpInst::getSignedPredicate(Pred) && "Must be signed!");
561
18
    }
562
0
    bool Changed;
563
303
    L->makeLoopInvariant(Ext, Changed);
564
303
    (void)Changed;
565
303
    auto *NewCmp = Builder.CreateICmp(Pred, IV, Ext);
566
303
    ICI->replaceAllUsesWith(NewCmp);
567
303
    DeadInsts.emplace_back(ICI);
568
303
  }
569
570
  // Trunc no longer needed.
571
302
  TI->replaceAllUsesWith(PoisonValue::get(TI->getType()));
572
302
  DeadInsts.emplace_back(TI);
573
302
  return true;
574
4.73k
}
575
576
/// Eliminate an operation that consumes a simple IV and has no observable
577
/// side-effect given the range of IV values.  IVOperand is guaranteed SCEVable,
578
/// but UseInst may not be.
579
bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
580
423k
                                     Instruction *IVOperand) {
581
423k
  if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
582
6.61k
    eliminateIVComparison(ICmp, IVOperand);
583
6.61k
    return true;
584
6.61k
  }
585
417k
  if (BinaryOperator *Bin = dyn_cast<BinaryOperator>(UseInst)) {
586
261k
    bool IsSRem = Bin->getOpcode() == Instruction::SRem;
587
261k
    if (IsSRem || Bin->getOpcode() == Instruction::URem) {
588
1.35k
      simplifyIVRemainder(Bin, IVOperand, IsSRem);
589
1.35k
      return true;
590
1.35k
    }
591
592
260k
    if (Bin->getOpcode() == Instruction::SDiv)
593
436
      return eliminateSDiv(Bin);
594
260k
  }
595
596
415k
  if (auto *WO = dyn_cast<WithOverflowInst>(UseInst))
597
0
    if (eliminateOverflowIntrinsic(WO))
598
0
      return true;
599
600
415k
  if (auto *SI = dyn_cast<SaturatingInst>(UseInst))
601
0
    if (eliminateSaturatingIntrinsic(SI))
602
0
      return true;
603
604
415k
  if (auto *TI = dyn_cast<TruncInst>(UseInst))
605
5.75k
    if (eliminateTrunc(TI))
606
302
      return true;
607
608
415k
  if (eliminateIdentitySCEV(UseInst, IVOperand))
609
3.32k
    return true;
610
611
411k
  return false;
612
415k
}
613
614
1.33k
static Instruction *GetLoopInvariantInsertPosition(Loop *L, Instruction *Hint) {
615
1.33k
  if (auto *BB = L->getLoopPreheader())
616
1.33k
    return BB->getTerminator();
617
618
0
  return Hint;
619
1.33k
}
620
621
/// Replace the UseInst with a loop invariant expression if it is safe.
622
431k
bool SimplifyIndvar::replaceIVUserWithLoopInvariant(Instruction *I) {
623
431k
  if (!SE->isSCEVable(I->getType()))
624
5.55k
    return false;
625
626
  // Get the symbolic expression for this instruction.
627
426k
  const SCEV *S = SE->getSCEV(I);
628
629
426k
  if (!SE->isLoopInvariant(S, L))
630
424k
    return false;
631
632
  // Do not generate something ridiculous even if S is loop invariant.
633
1.52k
  if (Rewriter.isHighCostExpansion(S, L, SCEVCheapExpansionBudget, TTI, I))
634
194
    return false;
635
636
1.33k
  auto *IP = GetLoopInvariantInsertPosition(L, I);
637
638
1.33k
  if (!Rewriter.isSafeToExpandAt(S, IP)) {
639
4
    LLVM_DEBUG(dbgs() << "INDVARS: Can not replace IV user: " << *I
640
4
                      << " with non-speculable loop invariant: " << *S << '\n');
641
4
    return false;
642
4
  }
643
644
1.33k
  auto *Invariant = Rewriter.expandCodeFor(S, I->getType(), IP);
645
646
1.33k
  I->replaceAllUsesWith(Invariant);
647
1.33k
  LLVM_DEBUG(dbgs() << "INDVARS: Replace IV user: " << *I
648
1.33k
                    << " with loop invariant: " << *S << '\n');
649
1.33k
  ++NumFoldedUser;
650
1.33k
  Changed = true;
651
1.33k
  DeadInsts.emplace_back(I);
652
1.33k
  return true;
653
1.33k
}
654
655
/// Eliminate redundant type cast between integer and float.
656
412k
bool SimplifyIndvar::replaceFloatIVWithIntegerIV(Instruction *UseInst) {
657
412k
  if (UseInst->getOpcode() != CastInst::SIToFP &&
658
412k
      UseInst->getOpcode() != CastInst::UIToFP)
659
412k
    return false;
660
661
8
  Instruction *IVOperand = cast<Instruction>(UseInst->getOperand(0));
662
  // Get the symbolic expression for this instruction.
663
8
  const SCEV *IV = SE->getSCEV(IVOperand);
664
8
  int MaskBits;
665
8
  if (UseInst->getOpcode() == CastInst::SIToFP)
666
8
    MaskBits = (int)SE->getSignedRange(IV).getMinSignedBits();
667
0
  else
668
0
    MaskBits = (int)SE->getUnsignedRange(IV).getActiveBits();
669
8
  int DestNumSigBits = UseInst->getType()->getFPMantissaWidth();
670
8
  if (MaskBits <= DestNumSigBits) {
671
8
    for (User *U : UseInst->users()) {
672
      // Match for fptosi/fptoui of sitofp and with same type.
673
8
      auto *CI = dyn_cast<CastInst>(U);
674
8
      if (!CI)
675
8
        continue;
676
677
0
      CastInst::CastOps Opcode = CI->getOpcode();
678
0
      if (Opcode != CastInst::FPToSI && Opcode != CastInst::FPToUI)
679
0
        continue;
680
681
0
      Value *Conv = nullptr;
682
0
      if (IVOperand->getType() != CI->getType()) {
683
0
        IRBuilder<> Builder(CI);
684
0
        StringRef Name = IVOperand->getName();
685
        // To match InstCombine logic, we only need sext if both fptosi and
686
        // sitofp are used. If one of them is unsigned, then we can use zext.
687
0
        if (SE->getTypeSizeInBits(IVOperand->getType()) >
688
0
            SE->getTypeSizeInBits(CI->getType())) {
689
0
          Conv = Builder.CreateTrunc(IVOperand, CI->getType(), Name + ".trunc");
690
0
        } else if (Opcode == CastInst::FPToUI ||
691
0
                   UseInst->getOpcode() == CastInst::UIToFP) {
692
0
          Conv = Builder.CreateZExt(IVOperand, CI->getType(), Name + ".zext");
693
0
        } else {
694
0
          Conv = Builder.CreateSExt(IVOperand, CI->getType(), Name + ".sext");
695
0
        }
696
0
      } else
697
0
        Conv = IVOperand;
698
699
0
      CI->replaceAllUsesWith(Conv);
700
0
      DeadInsts.push_back(CI);
701
0
      LLVM_DEBUG(dbgs() << "INDVARS: Replace IV user: " << *CI
702
0
                        << " with: " << *Conv << '\n');
703
704
0
      ++NumFoldedUser;
705
0
      Changed = true;
706
0
    }
707
8
  }
708
709
8
  return Changed;
710
412k
}
711
712
/// Eliminate any operation that SCEV can prove is an identity function.
713
bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst,
714
415k
                                           Instruction *IVOperand) {
715
415k
  if (!SE->isSCEVable(UseInst->getType()) ||
716
415k
      (UseInst->getType() != IVOperand->getType()) ||
717
415k
      (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand)))
718
410k
    return false;
719
720
  // getSCEV(X) == getSCEV(Y) does not guarantee that X and Y are related in the
721
  // dominator tree, even if X is an operand to Y.  For instance, in
722
  //
723
  //     %iv = phi i32 {0,+,1}
724
  //     br %cond, label %left, label %merge
725
  //
726
  //   left:
727
  //     %X = add i32 %iv, 0
728
  //     br label %merge
729
  //
730
  //   merge:
731
  //     %M = phi (%X, %iv)
732
  //
733
  // getSCEV(%M) == getSCEV(%X) == {0,+,1}, but %X does not dominate %M, and
734
  // %M.replaceAllUsesWith(%X) would be incorrect.
735
736
4.63k
  if (isa<PHINode>(UseInst))
737
    // If UseInst is not a PHI node then we know that IVOperand dominates
738
    // UseInst directly from the legality of SSA.
739
2.30k
    if (!DT || !DT->dominates(IVOperand, UseInst))
740
31
      return false;
741
742
4.59k
  if (!LI->replacementPreservesLCSSAForm(UseInst, IVOperand))
743
1.27k
    return false;
744
745
3.32k
  LLVM_DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n');
746
747
3.32k
  SE->forgetValue(UseInst);
748
3.32k
  UseInst->replaceAllUsesWith(IVOperand);
749
3.32k
  ++NumElimIdentity;
750
3.32k
  Changed = true;
751
3.32k
  DeadInsts.emplace_back(UseInst);
752
3.32k
  return true;
753
4.59k
}
754
755
bool SimplifyIndvar::strengthenBinaryOp(BinaryOperator *BO,
756
258k
                                        Instruction *IVOperand) {
757
258k
  return (isa<OverflowingBinaryOperator>(BO) &&
758
258k
          strengthenOverflowingOperation(BO, IVOperand)) ||
759
258k
         (isa<ShlOperator>(BO) && strengthenRightShift(BO, IVOperand));
760
258k
}
761
762
/// Annotate BO with nsw / nuw if it provably does not signed-overflow /
763
/// unsigned-overflow.  Returns true if anything changed, false otherwise.
764
bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO,
765
255k
                                                    Instruction *IVOperand) {
766
255k
  auto Flags = SE->getStrengthenedNoWrapFlagsFromBinOp(
767
255k
      cast<OverflowingBinaryOperator>(BO));
768
769
255k
  if (!Flags)
770
239k
    return false;
771
772
15.0k
  BO->setHasNoUnsignedWrap(ScalarEvolution::maskFlags(*Flags, SCEV::FlagNUW) ==
773
15.0k
                           SCEV::FlagNUW);
774
15.0k
  BO->setHasNoSignedWrap(ScalarEvolution::maskFlags(*Flags, SCEV::FlagNSW) ==
775
15.0k
                         SCEV::FlagNSW);
776
777
  // The getStrengthenedNoWrapFlagsFromBinOp() check inferred additional nowrap
778
  // flags on addrecs while performing zero/sign extensions. We could call
779
  // forgetValue() here to make sure those flags also propagate to any other
780
  // SCEV expressions based on the addrec. However, this can have pathological
781
  // compile-time impact, see https://bugs.llvm.org/show_bug.cgi?id=50384.
782
15.0k
  return true;
783
255k
}
784
785
/// Annotate the Shr in (X << IVOperand) >> C as exact using the
786
/// information from the IV's range. Returns true if anything changed, false
787
/// otherwise.
788
bool SimplifyIndvar::strengthenRightShift(BinaryOperator *BO,
789
1.67k
                                          Instruction *IVOperand) {
790
1.67k
  if (BO->getOpcode() == Instruction::Shl) {
791
1.67k
    bool Changed = false;
792
1.67k
    ConstantRange IVRange = SE->getUnsignedRange(SE->getSCEV(IVOperand));
793
2.18k
    for (auto *U : BO->users()) {
794
2.18k
      const APInt *C;
795
2.18k
      if (match(U,
796
2.18k
                m_AShr(m_Shl(m_Value(), m_Specific(IVOperand)), m_APInt(C))) ||
797
2.18k
          match(U,
798
2.18k
                m_LShr(m_Shl(m_Value(), m_Specific(IVOperand)), m_APInt(C)))) {
799
0
        BinaryOperator *Shr = cast<BinaryOperator>(U);
800
0
        if (!Shr->isExact() && IVRange.getUnsignedMin().uge(*C)) {
801
0
          Shr->setIsExact(true);
802
0
          Changed = true;
803
0
        }
804
0
      }
805
2.18k
    }
806
1.67k
    return Changed;
807
1.67k
  }
808
809
0
  return false;
810
1.67k
}
811
812
/// Add all uses of Def to the current IV's worklist.
813
static void pushIVUsers(
814
  Instruction *Def, Loop *L,
815
  SmallPtrSet<Instruction*,16> &Simplified,
816
436k
  SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) {
817
818
797k
  for (User *U : Def->users()) {
819
797k
    Instruction *UI = cast<Instruction>(U);
820
821
    // Avoid infinite or exponential worklist processing.
822
    // Also ensure unique worklist users.
823
    // If Def is a LoopPhi, it may not be in the Simplified set, so check for
824
    // self edges first.
825
797k
    if (UI == Def)
826
5.07k
      continue;
827
828
    // Only change the current Loop, do not change the other parts (e.g. other
829
    // Loops).
830
792k
    if (!L->contains(UI))
831
4.31k
      continue;
832
833
    // Do not push the same instruction more than once.
834
788k
    if (!Simplified.insert(UI).second)
835
293k
      continue;
836
837
494k
    SimpleIVUsers.push_back(std::make_pair(UI, Def));
838
494k
  }
839
436k
}
840
841
/// Return true if this instruction generates a simple SCEV
842
/// expression in terms of that IV.
843
///
844
/// This is similar to IVUsers' isInteresting() but processes each instruction
845
/// non-recursively when the operand is already known to be a simpleIVUser.
846
///
847
404k
static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) {
848
404k
  if (!SE->isSCEVable(I->getType()))
849
4.84k
    return false;
850
851
  // Get the symbolic expression for this instruction.
852
399k
  const SCEV *S = SE->getSCEV(I);
853
854
  // Only consider affine recurrences.
855
399k
  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
856
399k
  if (AR && AR->getLoop() == L)
857
335k
    return true;
858
859
64.3k
  return false;
860
399k
}
861
862
/// Iteratively perform simplification on a worklist of users
863
/// of the specified induction variable. Each successive simplification may push
864
/// more users which may themselves be candidates for simplification.
865
///
866
/// This algorithm does not require IVUsers analysis. Instead, it simplifies
867
/// instructions in-place during analysis. Rather than rewriting induction
868
/// variables bottom-up from their users, it transforms a chain of IVUsers
869
/// top-down, updating the IR only when it encounters a clear optimization
870
/// opportunity.
871
///
872
/// Once DisableIVRewrite is default, LSR will be the only client of IVUsers.
873
///
874
76.3k
void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
875
76.3k
  if (!SE->isSCEVable(CurrIV->getType()))
876
1.56k
    return;
877
878
  // Instructions processed by SimplifyIndvar for CurrIV.
879
74.8k
  SmallPtrSet<Instruction*,16> Simplified;
880
881
  // Use-def pairs if IV users waiting to be processed for CurrIV.
882
74.8k
  SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers;
883
884
  // Push users of the current LoopPhi. In rare cases, pushIVUsers may be
885
  // called multiple times for the same LoopPhi. This is the proper thing to
886
  // do for loop header phis that use each other.
887
74.8k
  pushIVUsers(CurrIV, L, Simplified, SimpleIVUsers);
888
889
569k
  while (!SimpleIVUsers.empty()) {
890
494k
    std::pair<Instruction*, Instruction*> UseOper =
891
494k
      SimpleIVUsers.pop_back_val();
892
494k
    Instruction *UseInst = UseOper.first;
893
894
    // If a user of the IndVar is trivially dead, we prefer just to mark it dead
895
    // rather than try to do some complex analysis or transformation (such as
896
    // widening) basing on it.
897
    // TODO: Propagate TLI and pass it here to handle more cases.
898
494k
    if (isInstructionTriviallyDead(UseInst, /* TLI */ nullptr)) {
899
1.60k
      DeadInsts.emplace_back(UseInst);
900
1.60k
      continue;
901
1.60k
    }
902
903
    // Bypass back edges to avoid extra work.
904
493k
    if (UseInst == CurrIV) continue;
905
906
    // Try to replace UseInst with a loop invariant before any other
907
    // simplifications.
908
425k
    if (replaceIVUserWithLoopInvariant(UseInst))
909
1.24k
      continue;
910
911
    // Go further for the bitcast 'prtoint ptr to i64' or if the cast is done
912
    // by truncation
913
423k
    if ((isa<PtrToIntInst>(UseInst)) || (isa<TruncInst>(UseInst)))
914
6.68k
      for (Use &U : UseInst->uses()) {
915
6.68k
        Instruction *User = cast<Instruction>(U.getUser());
916
6.68k
        if (replaceIVUserWithLoopInvariant(User))
917
88
          break; // done replacing
918
6.68k
      }
919
920
423k
    Instruction *IVOperand = UseOper.second;
921
423k
    for (unsigned N = 0; IVOperand; ++N) {
922
423k
      assert(N <= Simplified.size() && "runaway iteration");
923
0
      (void) N;
924
925
423k
      Value *NewOper = foldIVUser(UseInst, IVOperand);
926
423k
      if (!NewOper)
927
423k
        break; // done folding
928
0
      IVOperand = dyn_cast<Instruction>(NewOper);
929
0
    }
930
423k
    if (!IVOperand)
931
0
      continue;
932
933
423k
    if (eliminateIVUser(UseInst, IVOperand)) {
934
11.6k
      pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers);
935
11.6k
      continue;
936
11.6k
    }
937
938
412k
    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(UseInst)) {
939
258k
      if (strengthenBinaryOp(BO, IVOperand)) {
940
        // re-queue uses of the now modified binary operator and fall
941
        // through to the checks that remain.
942
15.0k
        pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers);
943
15.0k
      }
944
258k
    }
945
946
    // Try to use integer induction for FPToSI of float induction directly.
947
412k
    if (replaceFloatIVWithIntegerIV(UseInst)) {
948
      // Re-queue the potentially new direct uses of IVOperand.
949
0
      pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers);
950
0
      continue;
951
0
    }
952
953
412k
    CastInst *Cast = dyn_cast<CastInst>(UseInst);
954
412k
    if (V && Cast) {
955
7.88k
      V->visitCast(Cast);
956
7.88k
      continue;
957
7.88k
    }
958
404k
    if (isSimpleIVUser(UseInst, L, SE)) {
959
335k
      pushIVUsers(UseInst, L, Simplified, SimpleIVUsers);
960
335k
    }
961
404k
  }
962
74.8k
}
963
964
namespace llvm {
965
966
0
void IVVisitor::anchor() { }
967
968
/// Simplify instructions that use this induction variable
969
/// by using ScalarEvolution to analyze the IV's recurrence.
970
bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT,
971
                       LoopInfo *LI, const TargetTransformInfo *TTI,
972
                       SmallVectorImpl<WeakTrackingVH> &Dead,
973
76.3k
                       SCEVExpander &Rewriter, IVVisitor *V) {
974
76.3k
  SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, TTI,
975
76.3k
                     Rewriter, Dead);
976
76.3k
  SIV.simplifyUsers(CurrIV, V);
977
76.3k
  return SIV.hasChanged();
978
76.3k
}
979
980
/// Simplify users of induction variables within this
981
/// loop. This does not actually change or add IVs.
982
bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
983
                     LoopInfo *LI, const TargetTransformInfo *TTI,
984
0
                     SmallVectorImpl<WeakTrackingVH> &Dead) {
985
0
  SCEVExpander Rewriter(*SE, SE->getDataLayout(), "indvars");
986
0
#ifndef NDEBUG
987
0
  Rewriter.setDebugType(DEBUG_TYPE);
988
0
#endif
989
0
  bool Changed = false;
990
0
  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
991
0
    Changed |=
992
0
        simplifyUsersOfIV(cast<PHINode>(I), SE, DT, LI, TTI, Dead, Rewriter);
993
0
  }
994
0
  return Changed;
995
0
}
996
997
} // namespace llvm
998
999
namespace {
1000
//===----------------------------------------------------------------------===//
1001
// Widen Induction Variables - Extend the width of an IV to cover its
1002
// widest uses.
1003
//===----------------------------------------------------------------------===//
1004
1005
class WidenIV {
1006
  // Parameters
1007
  PHINode *OrigPhi;
1008
  Type *WideType;
1009
1010
  // Context
1011
  LoopInfo        *LI;
1012
  Loop            *L;
1013
  ScalarEvolution *SE;
1014
  DominatorTree   *DT;
1015
1016
  // Does the module have any calls to the llvm.experimental.guard intrinsic
1017
  // at all? If not we can avoid scanning instructions looking for guards.
1018
  bool HasGuards;
1019
1020
  bool UsePostIncrementRanges;
1021
1022
  // Statistics
1023
  unsigned NumElimExt = 0;
1024
  unsigned NumWidened = 0;
1025
1026
  // Result
1027
  PHINode *WidePhi = nullptr;
1028
  Instruction *WideInc = nullptr;
1029
  const SCEV *WideIncExpr = nullptr;
1030
  SmallVectorImpl<WeakTrackingVH> &DeadInsts;
1031
1032
  SmallPtrSet<Instruction *,16> Widened;
1033
1034
  enum class ExtendKind { Zero, Sign, Unknown };
1035
1036
  // A map tracking the kind of extension used to widen each narrow IV
1037
  // and narrow IV user.
1038
  // Key: pointer to a narrow IV or IV user.
1039
  // Value: the kind of extension used to widen this Instruction.
1040
  DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
1041
1042
  using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
1043
1044
  // A map with control-dependent ranges for post increment IV uses. The key is
1045
  // a pair of IV def and a use of this def denoting the context. The value is
1046
  // a ConstantRange representing possible values of the def at the given
1047
  // context.
1048
  DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
1049
1050
  std::optional<ConstantRange> getPostIncRangeInfo(Value *Def,
1051
3.66k
                                                   Instruction *UseI) {
1052
3.66k
    DefUserPair Key(Def, UseI);
1053
3.66k
    auto It = PostIncRangeInfos.find(Key);
1054
3.66k
    return It == PostIncRangeInfos.end()
1055
3.66k
               ? std::optional<ConstantRange>(std::nullopt)
1056
3.66k
               : std::optional<ConstantRange>(It->second);
1057
3.66k
  }
1058
1059
  void calculatePostIncRanges(PHINode *OrigPhi);
1060
  void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
1061
1062
24
  void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
1063
24
    DefUserPair Key(Def, UseI);
1064
24
    auto It = PostIncRangeInfos.find(Key);
1065
24
    if (It == PostIncRangeInfos.end())
1066
24
      PostIncRangeInfos.insert({Key, R});
1067
0
    else
1068
0
      It->second = R.intersectWith(It->second);
1069
24
  }
1070
1071
public:
1072
  /// Record a link in the Narrow IV def-use chain along with the WideIV that
1073
  /// computes the same value as the Narrow IV def.  This avoids caching Use*
1074
  /// pointers.
1075
  struct NarrowIVDefUse {
1076
    Instruction *NarrowDef = nullptr;
1077
    Instruction *NarrowUse = nullptr;
1078
    Instruction *WideDef = nullptr;
1079
1080
    // True if the narrow def is never negative.  Tracking this information lets
1081
    // us use a sign extension instead of a zero extension or vice versa, when
1082
    // profitable and legal.
1083
    bool NeverNegative = false;
1084
1085
    NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
1086
                   bool NeverNegative)
1087
        : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
1088
9.84k
          NeverNegative(NeverNegative) {}
1089
  };
1090
1091
  WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
1092
          DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
1093
          bool HasGuards, bool UsePostIncrementRanges = true);
1094
1095
  PHINode *createWideIV(SCEVExpander &Rewriter);
1096
1097
2.02k
  unsigned getNumElimExt() { return NumElimExt; };
1098
2.02k
  unsigned getNumWidened() { return NumWidened; };
1099
1100
protected:
1101
  Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
1102
                          Instruction *Use);
1103
1104
  Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
1105
  Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
1106
                                     const SCEVAddRecExpr *WideAR);
1107
  Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
1108
1109
  ExtendKind getExtendKind(Instruction *I);
1110
1111
  using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
1112
1113
  WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
1114
1115
  WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
1116
1117
  const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1118
                              unsigned OpCode) const;
1119
1120
  Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
1121
1122
  bool widenLoopCompare(NarrowIVDefUse DU);
1123
  bool widenWithVariantUse(NarrowIVDefUse DU);
1124
1125
  void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
1126
1127
private:
1128
  SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
1129
};
1130
} // namespace
1131
1132
/// Determine the insertion point for this user. By default, insert immediately
1133
/// before the user. SCEVExpander or LICM will hoist loop invariants out of the
1134
/// loop. For PHI nodes, there may be multiple uses, so compute the nearest
1135
/// common dominator for the incoming blocks. A nullptr can be returned if no
1136
/// viable location is found: it may happen if User is a PHI and Def only comes
1137
/// to this PHI from unreachable blocks.
1138
static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
1139
4.97k
                                          DominatorTree *DT, LoopInfo *LI) {
1140
4.97k
  PHINode *PHI = dyn_cast<PHINode>(User);
1141
4.97k
  if (!PHI)
1142
4.81k
    return User;
1143
1144
157
  Instruction *InsertPt = nullptr;
1145
421
  for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
1146
264
    if (PHI->getIncomingValue(i) != Def)
1147
107
      continue;
1148
1149
157
    BasicBlock *InsertBB = PHI->getIncomingBlock(i);
1150
1151
157
    if (!DT->isReachableFromEntry(InsertBB))
1152
0
      continue;
1153
1154
157
    if (!InsertPt) {
1155
157
      InsertPt = InsertBB->getTerminator();
1156
157
      continue;
1157
157
    }
1158
0
    InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
1159
0
    InsertPt = InsertBB->getTerminator();
1160
0
  }
1161
1162
  // If we have skipped all inputs, it means that Def only comes to Phi from
1163
  // unreachable blocks.
1164
157
  if (!InsertPt)
1165
0
    return nullptr;
1166
1167
157
  auto *DefI = dyn_cast<Instruction>(Def);
1168
157
  if (!DefI)
1169
0
    return InsertPt;
1170
1171
157
  assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
1172
1173
0
  auto *L = LI->getLoopFor(DefI->getParent());
1174
157
  assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
1175
1176
169
  for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
1177
169
    if (LI->getLoopFor(DTN->getBlock()) == L)
1178
157
      return DTN->getBlock()->getTerminator();
1179
1180
0
  llvm_unreachable("DefI dominates InsertPt!");
1181
0
}
1182
1183
WidenIV::WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
1184
          DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
1185
          bool HasGuards, bool UsePostIncrementRanges)
1186
      : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
1187
        L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
1188
        HasGuards(HasGuards), UsePostIncrementRanges(UsePostIncrementRanges),
1189
2.02k
        DeadInsts(DI) {
1190
2.02k
    assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
1191
2.02k
    ExtendKindMap[OrigPhi] = WI.IsSigned ? ExtendKind::Sign : ExtendKind::Zero;
1192
2.02k
}
1193
1194
Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
1195
1.99k
                                 bool IsSigned, Instruction *Use) {
1196
  // Set the debug location and conservative insertion point.
1197
1.99k
  IRBuilder<> Builder(Use);
1198
  // Hoist the insertion point into loop preheaders as far as possible.
1199
1.99k
  for (const Loop *L = LI->getLoopFor(Use->getParent());
1200
3.80k
       L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
1201
1.99k
       L = L->getParentLoop())
1202
1.80k
    Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
1203
1204
1.99k
  return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
1205
1.99k
                    Builder.CreateZExt(NarrowOper, WideType);
1206
1.99k
}
1207
1208
/// Instantiate a wide operation to replace a narrow operation. This only needs
1209
/// to handle operations that can evaluation to SCEVAddRec. It can safely return
1210
/// 0 for any operation we decide not to clone.
1211
Instruction *WidenIV::cloneIVUser(WidenIV::NarrowIVDefUse DU,
1212
1.82k
                                  const SCEVAddRecExpr *WideAR) {
1213
1.82k
  unsigned Opcode = DU.NarrowUse->getOpcode();
1214
1.82k
  switch (Opcode) {
1215
155
  default:
1216
155
    return nullptr;
1217
1.19k
  case Instruction::Add:
1218
1.34k
  case Instruction::Mul:
1219
1.34k
  case Instruction::UDiv:
1220
1.60k
  case Instruction::Sub:
1221
1.60k
    return cloneArithmeticIVUser(DU, WideAR);
1222
1223
3
  case Instruction::And:
1224
3
  case Instruction::Or:
1225
8
  case Instruction::Xor:
1226
38
  case Instruction::Shl:
1227
44
  case Instruction::LShr:
1228
57
  case Instruction::AShr:
1229
57
    return cloneBitwiseIVUser(DU);
1230
1.82k
  }
1231
1.82k
}
1232
1233
57
Instruction *WidenIV::cloneBitwiseIVUser(WidenIV::NarrowIVDefUse DU) {
1234
57
  Instruction *NarrowUse = DU.NarrowUse;
1235
57
  Instruction *NarrowDef = DU.NarrowDef;
1236
57
  Instruction *WideDef = DU.WideDef;
1237
1238
57
  LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
1239
1240
  // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
1241
  // about the narrow operand yet so must insert a [sz]ext. It is probably loop
1242
  // invariant and will be folded or hoisted. If it actually comes from a
1243
  // widened IV, it should be removed during a future call to widenIVUse.
1244
57
  bool IsSigned = getExtendKind(NarrowDef) == ExtendKind::Sign;
1245
57
  Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1246
57
                   ? WideDef
1247
57
                   : createExtendInst(NarrowUse->getOperand(0), WideType,
1248
0
                                      IsSigned, NarrowUse);
1249
57
  Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1250
57
                   ? WideDef
1251
57
                   : createExtendInst(NarrowUse->getOperand(1), WideType,
1252
57
                                      IsSigned, NarrowUse);
1253
1254
57
  auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1255
57
  auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1256
57
                                        NarrowBO->getName());
1257
57
  IRBuilder<> Builder(NarrowUse);
1258
57
  Builder.Insert(WideBO);
1259
57
  WideBO->copyIRFlags(NarrowBO);
1260
57
  return WideBO;
1261
57
}
1262
1263
Instruction *WidenIV::cloneArithmeticIVUser(WidenIV::NarrowIVDefUse DU,
1264
1.60k
                                            const SCEVAddRecExpr *WideAR) {
1265
1.60k
  Instruction *NarrowUse = DU.NarrowUse;
1266
1.60k
  Instruction *NarrowDef = DU.NarrowDef;
1267
1.60k
  Instruction *WideDef = DU.WideDef;
1268
1269
1.60k
  LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1270
1271
1.60k
  unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
1272
1273
  // We're trying to find X such that
1274
  //
1275
  //  Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
1276
  //
1277
  // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
1278
  // and check using SCEV if any of them are correct.
1279
1280
  // Returns true if extending NonIVNarrowDef according to `SignExt` is a
1281
  // correct solution to X.
1282
2.09k
  auto GuessNonIVOperand = [&](bool SignExt) {
1283
2.09k
    const SCEV *WideLHS;
1284
2.09k
    const SCEV *WideRHS;
1285
1286
2.09k
    auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
1287
2.09k
      if (SignExt)
1288
1.37k
        return SE->getSignExtendExpr(S, Ty);
1289
724
      return SE->getZeroExtendExpr(S, Ty);
1290
2.09k
    };
1291
1292
2.09k
    if (IVOpIdx == 0) {
1293
1.25k
      WideLHS = SE->getSCEV(WideDef);
1294
1.25k
      const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
1295
1.25k
      WideRHS = GetExtend(NarrowRHS, WideType);
1296
1.25k
    } else {
1297
838
      const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
1298
838
      WideLHS = GetExtend(NarrowLHS, WideType);
1299
838
      WideRHS = SE->getSCEV(WideDef);
1300
838
    }
1301
1302
    // WideUse is "WideDef `op.wide` X" as described in the comment.
1303
2.09k
    const SCEV *WideUse =
1304
2.09k
      getSCEVByOpCode(WideLHS, WideRHS, NarrowUse->getOpcode());
1305
1306
2.09k
    return WideUse == WideAR;
1307
2.09k
  };
1308
1309
1.60k
  bool SignExtend = getExtendKind(NarrowDef) == ExtendKind::Sign;
1310
1.60k
  if (!GuessNonIVOperand(SignExtend)) {
1311
489
    SignExtend = !SignExtend;
1312
489
    if (!GuessNonIVOperand(SignExtend))
1313
370
      return nullptr;
1314
489
  }
1315
1316
1.23k
  Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1317
1.23k
                   ? WideDef
1318
1.23k
                   : createExtendInst(NarrowUse->getOperand(0), WideType,
1319
731
                                      SignExtend, NarrowUse);
1320
1.23k
  Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1321
1.23k
                   ? WideDef
1322
1.23k
                   : createExtendInst(NarrowUse->getOperand(1), WideType,
1323
470
                                      SignExtend, NarrowUse);
1324
1325
1.23k
  auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1326
1.23k
  auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1327
1.23k
                                        NarrowBO->getName());
1328
1329
1.23k
  IRBuilder<> Builder(NarrowUse);
1330
1.23k
  Builder.Insert(WideBO);
1331
1.23k
  WideBO->copyIRFlags(NarrowBO);
1332
1.23k
  return WideBO;
1333
1.60k
}
1334
1335
14.0k
WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
1336
14.0k
  auto It = ExtendKindMap.find(I);
1337
14.0k
  assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
1338
0
  return It->second;
1339
14.0k
}
1340
1341
const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1342
4.32k
                                     unsigned OpCode) const {
1343
4.32k
  switch (OpCode) {
1344
3.51k
  case Instruction::Add:
1345
3.51k
    return SE->getAddExpr(LHS, RHS);
1346
520
  case Instruction::Sub:
1347
520
    return SE->getMinusSCEV(LHS, RHS);
1348
287
  case Instruction::Mul:
1349
287
    return SE->getMulExpr(LHS, RHS);
1350
4
  case Instruction::UDiv:
1351
4
    return SE->getUDivExpr(LHS, RHS);
1352
0
  default:
1353
0
    llvm_unreachable("Unsupported opcode.");
1354
4.32k
  };
1355
0
}
1356
1357
/// No-wrap operations can transfer sign extension of their result to their
1358
/// operands. Generate the SCEV value for the widened operation without
1359
/// actually modifying the IR yet. If the expression after extending the
1360
/// operands is an AddRec for this loop, return the AddRec and the kind of
1361
/// extension used.
1362
WidenIV::WidenedRecTy
1363
8.12k
WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) {
1364
  // Handle the common case of add<nsw/nuw>
1365
8.12k
  const unsigned OpCode = DU.NarrowUse->getOpcode();
1366
  // Only Add/Sub/Mul instructions supported yet.
1367
8.12k
  if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1368
8.12k
      OpCode != Instruction::Mul)
1369
3.60k
    return {nullptr, ExtendKind::Unknown};
1370
1371
  // One operand (NarrowDef) has already been extended to WideDef. Now determine
1372
  // if extending the other will lead to a recurrence.
1373
4.52k
  const unsigned ExtendOperIdx =
1374
4.52k
      DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
1375
4.52k
  assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
1376
1377
0
  const OverflowingBinaryOperator *OBO =
1378
4.52k
    cast<OverflowingBinaryOperator>(DU.NarrowUse);
1379
4.52k
  ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
1380
4.52k
  if (!(ExtKind == ExtendKind::Sign && OBO->hasNoSignedWrap()) &&
1381
4.52k
      !(ExtKind == ExtendKind::Zero && OBO->hasNoUnsignedWrap())) {
1382
2.38k
    ExtKind = ExtendKind::Unknown;
1383
1384
    // For a non-negative NarrowDef, we can choose either type of
1385
    // extension.  We want to use the current extend kind if legal
1386
    // (see above), and we only hit this code if we need to check
1387
    // the opposite case.
1388
2.38k
    if (DU.NeverNegative) {
1389
1.87k
      if (OBO->hasNoSignedWrap()) {
1390
74
        ExtKind = ExtendKind::Sign;
1391
1.80k
      } else if (OBO->hasNoUnsignedWrap()) {
1392
15
        ExtKind = ExtendKind::Zero;
1393
15
      }
1394
1.87k
    }
1395
2.38k
  }
1396
1397
4.52k
  const SCEV *ExtendOperExpr =
1398
4.52k
      SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx));
1399
4.52k
  if (ExtKind == ExtendKind::Sign)
1400
1.98k
    ExtendOperExpr = SE->getSignExtendExpr(ExtendOperExpr, WideType);
1401
2.54k
  else if (ExtKind == ExtendKind::Zero)
1402
250
    ExtendOperExpr = SE->getZeroExtendExpr(ExtendOperExpr, WideType);
1403
2.29k
  else
1404
2.29k
    return {nullptr, ExtendKind::Unknown};
1405
1406
  // When creating this SCEV expr, don't apply the current operations NSW or NUW
1407
  // flags. This instruction may be guarded by control flow that the no-wrap
1408
  // behavior depends on. Non-control-equivalent instructions can be mapped to
1409
  // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
1410
  // semantics to those operations.
1411
2.23k
  const SCEV *lhs = SE->getSCEV(DU.WideDef);
1412
2.23k
  const SCEV *rhs = ExtendOperExpr;
1413
1414
  // Let's swap operands to the initial order for the case of non-commutative
1415
  // operations, like SUB. See PR21014.
1416
2.23k
  if (ExtendOperIdx == 0)
1417
774
    std::swap(lhs, rhs);
1418
2.23k
  const SCEVAddRecExpr *AddRec =
1419
2.23k
      dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
1420
1421
2.23k
  if (!AddRec || AddRec->getLoop() != L)
1422
246
    return {nullptr, ExtendKind::Unknown};
1423
1424
1.98k
  return {AddRec, ExtKind};
1425
2.23k
}
1426
1427
/// Is this instruction potentially interesting for further simplification after
1428
/// widening it's type? In other words, can the extend be safely hoisted out of
1429
/// the loop with SCEV reducing the value to a recurrence on the same loop. If
1430
/// so, return the extended recurrence and the kind of extension used. Otherwise
1431
/// return {nullptr, ExtendKind::Unknown}.
1432
6.13k
WidenIV::WidenedRecTy WidenIV::getWideRecurrence(WidenIV::NarrowIVDefUse DU) {
1433
6.13k
  if (!DU.NarrowUse->getType()->isIntegerTy())
1434
985
    return {nullptr, ExtendKind::Unknown};
1435
1436
5.15k
  const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
1437
5.15k
  if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
1438
5.15k
      SE->getTypeSizeInBits(WideType)) {
1439
    // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
1440
    // index. So don't follow this use.
1441
41
    return {nullptr, ExtendKind::Unknown};
1442
41
  }
1443
1444
5.11k
  const SCEV *WideExpr;
1445
5.11k
  ExtendKind ExtKind;
1446
5.11k
  if (DU.NeverNegative) {
1447
3.19k
    WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1448
3.19k
    if (isa<SCEVAddRecExpr>(WideExpr))
1449
487
      ExtKind = ExtendKind::Sign;
1450
2.70k
    else {
1451
2.70k
      WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1452
2.70k
      ExtKind = ExtendKind::Zero;
1453
2.70k
    }
1454
3.19k
  } else if (getExtendKind(DU.NarrowDef) == ExtendKind::Sign) {
1455
1.68k
    WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1456
1.68k
    ExtKind = ExtendKind::Sign;
1457
1.68k
  } else {
1458
234
    WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1459
234
    ExtKind = ExtendKind::Zero;
1460
234
  }
1461
5.11k
  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1462
5.11k
  if (!AddRec || AddRec->getLoop() != L)
1463
4.33k
    return {nullptr, ExtendKind::Unknown};
1464
772
  return {AddRec, ExtKind};
1465
5.11k
}
1466
1467
/// This IV user cannot be widened. Replace this use of the original narrow IV
1468
/// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1469
static void truncateIVUse(WidenIV::NarrowIVDefUse DU, DominatorTree *DT,
1470
4.97k
                          LoopInfo *LI) {
1471
4.97k
  auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1472
4.97k
  if (!InsertPt)
1473
0
    return;
1474
4.97k
  LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1475
4.97k
                    << *DU.NarrowUse << "\n");
1476
4.97k
  IRBuilder<> Builder(InsertPt);
1477
4.97k
  Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1478
4.97k
  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1479
4.97k
}
1480
1481
/// If the narrow use is a compare instruction, then widen the compare
1482
//  (and possibly the other operand).  The extend operation is hoisted into the
1483
// loop preheader as far as possible.
1484
5.91k
bool WidenIV::widenLoopCompare(WidenIV::NarrowIVDefUse DU) {
1485
5.91k
  ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1486
5.91k
  if (!Cmp)
1487
4.74k
    return false;
1488
1489
  // We can legally widen the comparison in the following two cases:
1490
  //
1491
  //  - The signedness of the IV extension and comparison match
1492
  //
1493
  //  - The narrow IV is always positive (and thus its sign extension is equal
1494
  //    to its zero extension).  For instance, let's say we're zero extending
1495
  //    %narrow for the following use
1496
  //
1497
  //      icmp slt i32 %narrow, %val   ... (A)
1498
  //
1499
  //    and %narrow is always positive.  Then
1500
  //
1501
  //      (A) == icmp slt i32 sext(%narrow), sext(%val)
1502
  //          == icmp slt i32 zext(%narrow), sext(%val)
1503
1.16k
  bool IsSigned = getExtendKind(DU.NarrowDef) == ExtendKind::Sign;
1504
1.16k
  if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1505
443
    return false;
1506
1507
726
  Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1508
726
  unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1509
726
  unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1510
726
  assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1511
1512
  // Widen the compare instruction.
1513
0
  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1514
1515
  // Widen the other operand of the compare, if necessary.
1516
726
  if (CastWidth < IVWidth) {
1517
726
    Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1518
726
    DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1519
726
  }
1520
726
  return true;
1521
1.16k
}
1522
1523
// The widenIVUse avoids generating trunc by evaluating the use as AddRec, this
1524
// will not work when:
1525
//    1) SCEV traces back to an instruction inside the loop that SCEV can not
1526
// expand, eg. add %indvar, (load %addr)
1527
//    2) SCEV finds a loop variant, eg. add %indvar, %loopvariant
1528
// While SCEV fails to avoid trunc, we can still try to use instruction
1529
// combining approach to prove trunc is not required. This can be further
1530
// extended with other instruction combining checks, but for now we handle the
1531
// following case (sub can be "add" and "mul", "nsw + sext" can be "nus + zext")
1532
//
1533
// Src:
1534
//   %c = sub nsw %b, %indvar
1535
//   %d = sext %c to i64
1536
// Dst:
1537
//   %indvar.ext1 = sext %indvar to i64
1538
//   %m = sext %b to i64
1539
//   %d = sub nsw i64 %m, %indvar.ext1
1540
// Therefore, as long as the result of add/sub/mul is extended to wide type, no
1541
// trunc is required regardless of how %b is generated. This pattern is common
1542
// when calculating address in 64 bit architecture
1543
5.18k
bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) {
1544
5.18k
  Instruction *NarrowUse = DU.NarrowUse;
1545
5.18k
  Instruction *NarrowDef = DU.NarrowDef;
1546
5.18k
  Instruction *WideDef = DU.WideDef;
1547
1548
  // Handle the common case of add<nsw/nuw>
1549
5.18k
  const unsigned OpCode = NarrowUse->getOpcode();
1550
  // Only Add/Sub/Mul instructions are supported.
1551
5.18k
  if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1552
5.18k
      OpCode != Instruction::Mul)
1553
2.82k
    return false;
1554
1555
  // The operand that is not defined by NarrowDef of DU. Let's call it the
1556
  // other operand.
1557
2.36k
  assert((NarrowUse->getOperand(0) == NarrowDef ||
1558
2.36k
          NarrowUse->getOperand(1) == NarrowDef) &&
1559
2.36k
         "bad DU");
1560
1561
0
  const OverflowingBinaryOperator *OBO =
1562
2.36k
    cast<OverflowingBinaryOperator>(NarrowUse);
1563
2.36k
  ExtendKind ExtKind = getExtendKind(NarrowDef);
1564
2.36k
  bool CanSignExtend = ExtKind == ExtendKind::Sign && OBO->hasNoSignedWrap();
1565
2.36k
  bool CanZeroExtend = ExtKind == ExtendKind::Zero && OBO->hasNoUnsignedWrap();
1566
2.36k
  auto AnotherOpExtKind = ExtKind;
1567
1568
  // Check that all uses are either:
1569
  // - narrow def (in case of we are widening the IV increment);
1570
  // - single-input LCSSA Phis;
1571
  // - comparison of the chosen type;
1572
  // - extend of the chosen type (raison d'etre).
1573
2.36k
  SmallVector<Instruction *, 4> ExtUsers;
1574
2.36k
  SmallVector<PHINode *, 4> LCSSAPhiUsers;
1575
2.36k
  SmallVector<ICmpInst *, 4> ICmpUsers;
1576
2.36k
  for (Use &U : NarrowUse->uses()) {
1577
2.36k
    Instruction *User = cast<Instruction>(U.getUser());
1578
2.36k
    if (User == NarrowDef)
1579
76
      continue;
1580
2.28k
    if (!L->contains(User)) {
1581
13
      auto *LCSSAPhi = cast<PHINode>(User);
1582
      // Make sure there is only 1 input, so that we don't have to split
1583
      // critical edges.
1584
13
      if (LCSSAPhi->getNumOperands() != 1)
1585
0
        return false;
1586
13
      LCSSAPhiUsers.push_back(LCSSAPhi);
1587
13
      continue;
1588
13
    }
1589
2.27k
    if (auto *ICmp = dyn_cast<ICmpInst>(User)) {
1590
102
      auto Pred = ICmp->getPredicate();
1591
      // We have 3 types of predicates: signed, unsigned and equality
1592
      // predicates. For equality, it's legal to widen icmp for either sign and
1593
      // zero extend. For sign extend, we can also do so for signed predicates,
1594
      // likeweise for zero extend we can widen icmp for unsigned predicates.
1595
102
      if (ExtKind == ExtendKind::Zero && ICmpInst::isSigned(Pred))
1596
0
        return false;
1597
102
      if (ExtKind == ExtendKind::Sign && ICmpInst::isUnsigned(Pred))
1598
28
        return false;
1599
74
      ICmpUsers.push_back(ICmp);
1600
74
      continue;
1601
102
    }
1602
2.17k
    if (ExtKind == ExtendKind::Sign)
1603
2.05k
      User = dyn_cast<SExtInst>(User);
1604
124
    else
1605
124
      User = dyn_cast<ZExtInst>(User);
1606
2.17k
    if (!User || User->getType() != WideType)
1607
2.07k
      return false;
1608
104
    ExtUsers.push_back(User);
1609
104
  }
1610
264
  if (ExtUsers.empty()) {
1611
209
    DeadInsts.emplace_back(NarrowUse);
1612
209
    return true;
1613
209
  }
1614
1615
  // We'll prove some facts that should be true in the context of ext users. If
1616
  // there is no users, we are done now. If there are some, pick their common
1617
  // dominator as context.
1618
55
  const Instruction *CtxI = findCommonDominator(ExtUsers, *DT);
1619
1620
55
  if (!CanSignExtend && !CanZeroExtend) {
1621
    // Because InstCombine turns 'sub nuw' to 'add' losing the no-wrap flag, we
1622
    // will most likely not see it. Let's try to prove it.
1623
39
    if (OpCode != Instruction::Add)
1624
9
      return false;
1625
30
    if (ExtKind != ExtendKind::Zero)
1626
30
      return false;
1627
0
    const SCEV *LHS = SE->getSCEV(OBO->getOperand(0));
1628
0
    const SCEV *RHS = SE->getSCEV(OBO->getOperand(1));
1629
    // TODO: Support case for NarrowDef = NarrowUse->getOperand(1).
1630
0
    if (NarrowUse->getOperand(0) != NarrowDef)
1631
0
      return false;
1632
0
    if (!SE->isKnownNegative(RHS))
1633
0
      return false;
1634
0
    bool ProvedSubNUW = SE->isKnownPredicateAt(ICmpInst::ICMP_UGE, LHS,
1635
0
                                               SE->getNegativeSCEV(RHS), CtxI);
1636
0
    if (!ProvedSubNUW)
1637
0
      return false;
1638
    // In fact, our 'add' is 'sub nuw'. We will need to widen the 2nd operand as
1639
    // neg(zext(neg(op))), which is basically sext(op).
1640
0
    AnotherOpExtKind = ExtendKind::Sign;
1641
0
  }
1642
1643
  // Verifying that Defining operand is an AddRec
1644
16
  const SCEV *Op1 = SE->getSCEV(WideDef);
1645
16
  const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1646
16
  if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1647
1
    return false;
1648
1649
15
  LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1650
1651
  // Generating a widening use instruction.
1652
15
  Value *LHS =
1653
15
      (NarrowUse->getOperand(0) == NarrowDef)
1654
15
          ? WideDef
1655
15
          : createExtendInst(NarrowUse->getOperand(0), WideType,
1656
1
                             AnotherOpExtKind == ExtendKind::Sign, NarrowUse);
1657
15
  Value *RHS =
1658
15
      (NarrowUse->getOperand(1) == NarrowDef)
1659
15
          ? WideDef
1660
15
          : createExtendInst(NarrowUse->getOperand(1), WideType,
1661
14
                             AnotherOpExtKind == ExtendKind::Sign, NarrowUse);
1662
1663
15
  auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1664
15
  auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1665
15
                                        NarrowBO->getName());
1666
15
  IRBuilder<> Builder(NarrowUse);
1667
15
  Builder.Insert(WideBO);
1668
15
  WideBO->copyIRFlags(NarrowBO);
1669
15
  ExtendKindMap[NarrowUse] = ExtKind;
1670
1671
16
  for (Instruction *User : ExtUsers) {
1672
16
    assert(User->getType() == WideType && "Checked before!");
1673
16
    LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1674
16
                      << *WideBO << "\n");
1675
16
    ++NumElimExt;
1676
16
    User->replaceAllUsesWith(WideBO);
1677
16
    DeadInsts.emplace_back(User);
1678
16
  }
1679
1680
15
  for (PHINode *User : LCSSAPhiUsers) {
1681
0
    assert(User->getNumOperands() == 1 && "Checked before!");
1682
0
    Builder.SetInsertPoint(User);
1683
0
    auto *WidePN =
1684
0
        Builder.CreatePHI(WideBO->getType(), 1, User->getName() + ".wide");
1685
0
    BasicBlock *LoopExitingBlock = User->getParent()->getSinglePredecessor();
1686
0
    assert(LoopExitingBlock && L->contains(LoopExitingBlock) &&
1687
0
           "Not a LCSSA Phi?");
1688
0
    WidePN->addIncoming(WideBO, LoopExitingBlock);
1689
0
    Builder.SetInsertPoint(User->getParent(),
1690
0
                           User->getParent()->getFirstInsertionPt());
1691
0
    auto *TruncPN = Builder.CreateTrunc(WidePN, User->getType());
1692
0
    User->replaceAllUsesWith(TruncPN);
1693
0
    DeadInsts.emplace_back(User);
1694
0
  }
1695
1696
15
  for (ICmpInst *User : ICmpUsers) {
1697
9
    Builder.SetInsertPoint(User);
1698
18
    auto ExtendedOp = [&](Value * V)->Value * {
1699
18
      if (V == NarrowUse)
1700
15
        return WideBO;
1701
3
      if (ExtKind == ExtendKind::Zero)
1702
0
        return Builder.CreateZExt(V, WideBO->getType());
1703
3
      else
1704
3
        return Builder.CreateSExt(V, WideBO->getType());
1705
3
    };
1706
9
    auto Pred = User->getPredicate();
1707
9
    auto *LHS = ExtendedOp(User->getOperand(0));
1708
9
    auto *RHS = ExtendedOp(User->getOperand(1));
1709
9
    auto *WideCmp =
1710
9
        Builder.CreateICmp(Pred, LHS, RHS, User->getName() + ".wide");
1711
9
    User->replaceAllUsesWith(WideCmp);
1712
9
    DeadInsts.emplace_back(User);
1713
9
  }
1714
1715
15
  return true;
1716
16
}
1717
1718
/// Determine whether an individual user of the narrow IV can be widened. If so,
1719
/// return the wide clone of the user.
1720
9.84k
Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1721
9.84k
  assert(ExtendKindMap.count(DU.NarrowDef) &&
1722
9.84k
         "Should already know the kind of extension used to widen NarrowDef");
1723
1724
  // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1725
9.84k
  if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1726
315
    if (LI->getLoopFor(UsePhi->getParent()) != L) {
1727
      // For LCSSA phis, sink the truncate outside the loop.
1728
      // After SimplifyCFG most loop exit targets have a single predecessor.
1729
      // Otherwise fall back to a truncate within the loop.
1730
163
      if (UsePhi->getNumOperands() != 1)
1731
7
        truncateIVUse(DU, DT, LI);
1732
156
      else {
1733
        // Widening the PHI requires us to insert a trunc.  The logical place
1734
        // for this trunc is in the same BB as the PHI.  This is not possible if
1735
        // the BB is terminated by a catchswitch.
1736
156
        if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1737
0
          return nullptr;
1738
1739
156
        PHINode *WidePhi =
1740
156
          PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1741
156
                          UsePhi);
1742
156
        WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1743
156
        BasicBlock *WidePhiBB = WidePhi->getParent();
1744
156
        IRBuilder<> Builder(WidePhiBB, WidePhiBB->getFirstInsertionPt());
1745
156
        Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1746
156
        UsePhi->replaceAllUsesWith(Trunc);
1747
156
        DeadInsts.emplace_back(UsePhi);
1748
156
        LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1749
156
                          << *WidePhi << "\n");
1750
156
      }
1751
163
      return nullptr;
1752
163
    }
1753
315
  }
1754
1755
  // This narrow use can be widened by a sext if it's non-negative or its narrow
1756
  // def was widended by a sext. Same for zext.
1757
9.68k
  auto canWidenBySExt = [&]() {
1758
1.32k
    return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ExtendKind::Sign;
1759
1.32k
  };
1760
9.68k
  auto canWidenByZExt = [&]() {
1761
277
    return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ExtendKind::Zero;
1762
277
  };
1763
1764
  // Our raison d'etre! Eliminate sign and zero extension.
1765
9.68k
  if ((match(DU.NarrowUse, m_SExtLike(m_Value())) && canWidenBySExt()) ||
1766
9.68k
      (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1767
1.56k
    Value *NewDef = DU.WideDef;
1768
1.56k
    if (DU.NarrowUse->getType() != WideType) {
1769
1
      unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1770
1
      unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1771
1
      if (CastWidth < IVWidth) {
1772
        // The cast isn't as wide as the IV, so insert a Trunc.
1773
1
        IRBuilder<> Builder(DU.NarrowUse);
1774
1
        NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1775
1
      }
1776
0
      else {
1777
        // A wider extend was hidden behind a narrower one. This may induce
1778
        // another round of IV widening in which the intermediate IV becomes
1779
        // dead. It should be very rare.
1780
0
        LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1781
0
                          << " not wide enough to subsume " << *DU.NarrowUse
1782
0
                          << "\n");
1783
0
        DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1784
0
        NewDef = DU.NarrowUse;
1785
0
      }
1786
1
    }
1787
1.56k
    if (NewDef != DU.NarrowUse) {
1788
1.56k
      LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1789
1.56k
                        << " replaced by " << *DU.WideDef << "\n");
1790
1.56k
      ++NumElimExt;
1791
1.56k
      DU.NarrowUse->replaceAllUsesWith(NewDef);
1792
1.56k
      DeadInsts.emplace_back(DU.NarrowUse);
1793
1.56k
    }
1794
    // Now that the extend is gone, we want to expose it's uses for potential
1795
    // further simplification. We don't need to directly inform SimplifyIVUsers
1796
    // of the new users, because their parent IV will be processed later as a
1797
    // new loop phi. If we preserved IVUsers analysis, we would also want to
1798
    // push the uses of WideDef here.
1799
1800
    // No further widening is needed. The deceased [sz]ext had done it for us.
1801
1.56k
    return nullptr;
1802
1.56k
  }
1803
1804
8.12k
  auto tryAddRecExpansion = [&]() -> Instruction* {
1805
    // Does this user itself evaluate to a recurrence after widening?
1806
8.12k
    WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1807
8.12k
    if (!WideAddRec.first)
1808
6.13k
      WideAddRec = getWideRecurrence(DU);
1809
8.12k
    assert((WideAddRec.first == nullptr) ==
1810
8.12k
           (WideAddRec.second == ExtendKind::Unknown));
1811
8.12k
    if (!WideAddRec.first)
1812
5.36k
      return nullptr;
1813
1814
    // Reuse the IV increment that SCEVExpander created as long as it dominates
1815
    // NarrowUse.
1816
2.75k
    Instruction *WideUse = nullptr;
1817
2.75k
    if (WideAddRec.first == WideIncExpr &&
1818
2.75k
        Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1819
936
      WideUse = WideInc;
1820
1.82k
    else {
1821
1.82k
      WideUse = cloneIVUser(DU, WideAddRec.first);
1822
1.82k
      if (!WideUse)
1823
525
        return nullptr;
1824
1.82k
    }
1825
    // Evaluation of WideAddRec ensured that the narrow expression could be
1826
    // extended outside the loop without overflow. This suggests that the wide use
1827
    // evaluates to the same expression as the extended narrow use, but doesn't
1828
    // absolutely guarantee it. Hence the following failsafe check. In rare cases
1829
    // where it fails, we simply throw away the newly created wide use.
1830
2.23k
    if (WideAddRec.first != SE->getSCEV(WideUse)) {
1831
23
      LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1832
23
                 << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1833
23
                 << "\n");
1834
23
      DeadInsts.emplace_back(WideUse);
1835
23
      return nullptr;
1836
2.20k
    };
1837
1838
    // if we reached this point then we are going to replace
1839
    // DU.NarrowUse with WideUse. Reattach DbgValue then.
1840
2.20k
    replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT);
1841
1842
2.20k
    ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1843
    // Returning WideUse pushes it on the worklist.
1844
2.20k
    return WideUse;
1845
2.23k
  };
1846
1847
8.12k
  if (auto *I = tryAddRecExpansion())
1848
2.20k
    return I;
1849
1850
  // If use is a loop condition, try to promote the condition instead of
1851
  // truncating the IV first.
1852
5.91k
  if (widenLoopCompare(DU))
1853
726
    return nullptr;
1854
1855
  // We are here about to generate a truncate instruction that may hurt
1856
  // performance because the scalar evolution expression computed earlier
1857
  // in WideAddRec.first does not indicate a polynomial induction expression.
1858
  // In that case, look at the operands of the use instruction to determine
1859
  // if we can still widen the use instead of truncating its operand.
1860
5.18k
  if (widenWithVariantUse(DU))
1861
224
    return nullptr;
1862
1863
  // This user does not evaluate to a recurrence after widening, so don't
1864
  // follow it. Instead insert a Trunc to kill off the original use,
1865
  // eventually isolating the original narrow IV so it can be removed.
1866
4.96k
  truncateIVUse(DU, DT, LI);
1867
4.96k
  return nullptr;
1868
5.18k
}
1869
1870
/// Add eligible users of NarrowDef to NarrowIVUsers.
1871
3.43k
void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1872
3.43k
  const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1873
3.43k
  bool NonNegativeDef =
1874
3.43k
      SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1875
3.43k
                           SE->getZero(NarrowSCEV->getType()));
1876
12.1k
  for (User *U : NarrowDef->users()) {
1877
12.1k
    Instruction *NarrowUser = cast<Instruction>(U);
1878
1879
    // Handle data flow merges and bizarre phi cycles.
1880
12.1k
    if (!Widened.insert(NarrowUser).second)
1881
2.30k
      continue;
1882
1883
9.84k
    bool NonNegativeUse = false;
1884
9.84k
    if (!NonNegativeDef) {
1885
      // We might have a control-dependent range information for this context.
1886
3.66k
      if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1887
14
        NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1888
3.66k
    }
1889
1890
9.84k
    NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1891
9.84k
                               NonNegativeDef || NonNegativeUse);
1892
9.84k
  }
1893
3.43k
}
1894
1895
/// Process a single induction variable. First use the SCEVExpander to create a
1896
/// wide induction variable that evaluates to the same recurrence as the
1897
/// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1898
/// def-use chain. After widenIVUse has processed all interesting IV users, the
1899
/// narrow IV will be isolated for removal by DeleteDeadPHIs.
1900
///
1901
/// It would be simpler to delete uses as they are processed, but we must avoid
1902
/// invalidating SCEV expressions.
1903
2.02k
PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1904
  // Is this phi an induction variable?
1905
2.02k
  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1906
2.02k
  if (!AddRec)
1907
258
    return nullptr;
1908
1909
  // Widen the induction variable expression.
1910
1.76k
  const SCEV *WideIVExpr = getExtendKind(OrigPhi) == ExtendKind::Sign
1911
1.76k
                               ? SE->getSignExtendExpr(AddRec, WideType)
1912
1.76k
                               : SE->getZeroExtendExpr(AddRec, WideType);
1913
1914
1.76k
  assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1915
1.76k
         "Expect the new IV expression to preserve its type");
1916
1917
  // Can the IV be extended outside the loop without overflow?
1918
0
  AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1919
1.76k
  if (!AddRec || AddRec->getLoop() != L)
1920
543
    return nullptr;
1921
1922
  // An AddRec must have loop-invariant operands. Since this AddRec is
1923
  // materialized by a loop header phi, the expression cannot have any post-loop
1924
  // operands, so they must dominate the loop header.
1925
1.22k
  assert(
1926
1.22k
      SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1927
1.22k
      SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1928
1.22k
      "Loop header phi recurrence inputs do not dominate the loop");
1929
1930
  // Iterate over IV uses (including transitive ones) looking for IV increments
1931
  // of the form 'add nsw %iv, <const>'. For each increment and each use of
1932
  // the increment calculate control-dependent range information basing on
1933
  // dominating conditions inside of the loop (e.g. a range check inside of the
1934
  // loop). Calculated ranges are stored in PostIncRangeInfos map.
1935
  //
1936
  // Control-dependent range information is later used to prove that a narrow
1937
  // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1938
  // this on demand because when pushNarrowIVUsers needs this information some
1939
  // of the dominating conditions might be already widened.
1940
1.22k
  if (UsePostIncrementRanges)
1941
1.22k
    calculatePostIncRanges(OrigPhi);
1942
1943
  // The rewriter provides a value for the desired IV expression. This may
1944
  // either find an existing phi or materialize a new one. Either way, we
1945
  // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1946
  // of the phi-SCC dominates the loop entry.
1947
1.22k
  Instruction *InsertPt = &*L->getHeader()->getFirstInsertionPt();
1948
1.22k
  Value *ExpandInst = Rewriter.expandCodeFor(AddRec, WideType, InsertPt);
1949
  // If the wide phi is not a phi node, for example a cast node, like bitcast,
1950
  // inttoptr, ptrtoint, just skip for now.
1951
1.22k
  if (!(WidePhi = dyn_cast<PHINode>(ExpandInst))) {
1952
    // if the cast node is an inserted instruction without any user, we should
1953
    // remove it to make sure the pass don't touch the function as we can not
1954
    // wide the phi.
1955
0
    if (ExpandInst->hasNUses(0) &&
1956
0
        Rewriter.isInsertedInstruction(cast<Instruction>(ExpandInst)))
1957
0
      DeadInsts.emplace_back(ExpandInst);
1958
0
    return nullptr;
1959
0
  }
1960
1961
  // Remembering the WideIV increment generated by SCEVExpander allows
1962
  // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1963
  // employ a general reuse mechanism because the call above is the only call to
1964
  // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1965
1.22k
  if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1966
1.22k
    WideInc =
1967
1.22k
        dyn_cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1968
1.22k
    if (WideInc) {
1969
1.17k
      WideIncExpr = SE->getSCEV(WideInc);
1970
      // Propagate the debug location associated with the original loop
1971
      // increment to the new (widened) increment.
1972
1.17k
      auto *OrigInc =
1973
1.17k
          cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1974
1.17k
      WideInc->setDebugLoc(OrigInc->getDebugLoc());
1975
1.17k
    }
1976
1.22k
  }
1977
1978
1.22k
  LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1979
1.22k
  ++NumWidened;
1980
1981
  // Traverse the def-use chain using a worklist starting at the original IV.
1982
1.22k
  assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1983
1984
0
  Widened.insert(OrigPhi);
1985
1.22k
  pushNarrowIVUsers(OrigPhi, WidePhi);
1986
1987
11.0k
  while (!NarrowIVUsers.empty()) {
1988
9.84k
    WidenIV::NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1989
1990
    // Process a def-use edge. This may replace the use, so don't hold a
1991
    // use_iterator across it.
1992
9.84k
    Instruction *WideUse = widenIVUse(DU, Rewriter);
1993
1994
    // Follow all def-use edges from the previous narrow use.
1995
9.84k
    if (WideUse)
1996
2.20k
      pushNarrowIVUsers(DU.NarrowUse, WideUse);
1997
1998
    // widenIVUse may have removed the def-use edge.
1999
9.84k
    if (DU.NarrowDef->use_empty())
2000
386
      DeadInsts.emplace_back(DU.NarrowDef);
2001
9.84k
  }
2002
2003
  // Attach any debug information to the new PHI.
2004
1.22k
  replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT);
2005
2006
1.22k
  return WidePhi;
2007
1.22k
}
2008
2009
/// Calculates control-dependent range for the given def at the given context
2010
/// by looking at dominating conditions inside of the loop
2011
void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
2012
34.4k
                                    Instruction *NarrowUser) {
2013
34.4k
  Value *NarrowDefLHS;
2014
34.4k
  const APInt *NarrowDefRHS;
2015
34.4k
  if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
2016
34.4k
                                 m_APInt(NarrowDefRHS))) ||
2017
34.4k
      !NarrowDefRHS->isNonNegative())
2018
32.8k
    return;
2019
2020
1.60k
  auto UpdateRangeFromCondition = [&] (Value *Condition,
2021
5.75k
                                       bool TrueDest) {
2022
5.75k
    CmpInst::Predicate Pred;
2023
5.75k
    Value *CmpRHS;
2024
5.75k
    if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
2025
5.75k
                                 m_Value(CmpRHS))))
2026
5.72k
      return;
2027
2028
24
    CmpInst::Predicate P =
2029
24
            TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
2030
2031
24
    auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
2032
24
    auto CmpConstrainedLHSRange =
2033
24
            ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
2034
24
    auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap(
2035
24
        *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap);
2036
2037
24
    updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
2038
24
  };
2039
2040
11.6k
  auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
2041
11.6k
    if (!HasGuards)
2042
11.6k
      return;
2043
2044
49
    for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
2045
526
                                     Ctx->getParent()->rend())) {
2046
526
      Value *C = nullptr;
2047
526
      if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
2048
23
        UpdateRangeFromCondition(C, /*TrueDest=*/true);
2049
526
    }
2050
49
  };
2051
2052
1.60k
  UpdateRangeFromGuards(NarrowUser);
2053
2054
1.60k
  BasicBlock *NarrowUserBB = NarrowUser->getParent();
2055
  // If NarrowUserBB is statically unreachable asking dominator queries may
2056
  // yield surprising results. (e.g. the block may not have a dom tree node)
2057
1.60k
  if (!DT->isReachableFromEntry(NarrowUserBB))
2058
0
    return;
2059
2060
1.60k
  for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
2061
11.6k
       L->contains(DTB->getBlock());
2062
10.0k
       DTB = DTB->getIDom()) {
2063
10.0k
    auto *BB = DTB->getBlock();
2064
10.0k
    auto *TI = BB->getTerminator();
2065
10.0k
    UpdateRangeFromGuards(TI);
2066
2067
10.0k
    auto *BI = dyn_cast<BranchInst>(TI);
2068
10.0k
    if (!BI || !BI->isConditional())
2069
4.35k
      continue;
2070
2071
5.73k
    auto *TrueSuccessor = BI->getSuccessor(0);
2072
5.73k
    auto *FalseSuccessor = BI->getSuccessor(1);
2073
2074
11.4k
    auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
2075
11.4k
      return BBE.isSingleEdge() &&
2076
11.4k
             DT->dominates(BBE, NarrowUser->getParent());
2077
11.4k
    };
2078
2079
5.73k
    if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
2080
1.65k
      UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
2081
2082
5.73k
    if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
2083
4.07k
      UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
2084
5.73k
  }
2085
1.60k
}
2086
2087
/// Calculates PostIncRangeInfos map for the given IV
2088
1.22k
void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
2089
1.22k
  SmallPtrSet<Instruction *, 16> Visited;
2090
1.22k
  SmallVector<Instruction *, 6> Worklist;
2091
1.22k
  Worklist.push_back(OrigPhi);
2092
1.22k
  Visited.insert(OrigPhi);
2093
2094
36.8k
  while (!Worklist.empty()) {
2095
35.6k
    Instruction *NarrowDef = Worklist.pop_back_val();
2096
2097
46.1k
    for (Use &U : NarrowDef->uses()) {
2098
46.1k
      auto *NarrowUser = cast<Instruction>(U.getUser());
2099
2100
      // Don't go looking outside the current loop.
2101
46.1k
      auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
2102
46.1k
      if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
2103
285
        continue;
2104
2105
45.8k
      if (!Visited.insert(NarrowUser).second)
2106
11.4k
        continue;
2107
2108
34.4k
      Worklist.push_back(NarrowUser);
2109
2110
34.4k
      calculatePostIncRange(NarrowDef, NarrowUser);
2111
34.4k
    }
2112
35.6k
  }
2113
1.22k
}
2114
2115
PHINode *llvm::createWideIV(const WideIVInfo &WI,
2116
    LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter,
2117
    DominatorTree *DT, SmallVectorImpl<WeakTrackingVH> &DeadInsts,
2118
    unsigned &NumElimExt, unsigned &NumWidened,
2119
2.02k
    bool HasGuards, bool UsePostIncrementRanges) {
2120
2.02k
  WidenIV Widener(WI, LI, SE, DT, DeadInsts, HasGuards, UsePostIncrementRanges);
2121
2.02k
  PHINode *WidePHI = Widener.createWideIV(Rewriter);
2122
2.02k
  NumElimExt = Widener.getNumElimExt();
2123
2.02k
  NumWidened = Widener.getNumWidened();
2124
2.02k
  return WidePHI;
2125
2.02k
}