Coverage Report

Created: 2024-01-17 10:31

/src/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This transformation analyzes and transforms the induction variables (and
10
// computations derived from them) into simpler forms suitable for subsequent
11
// analysis and transformation.
12
//
13
// If the trip count of a loop is computable, this pass also makes the following
14
// changes:
15
//   1. The exit condition for the loop is canonicalized to compare the
16
//      induction value against the exit value.  This turns loops like:
17
//        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18
//   2. Any use outside of the loop of an expression derived from the indvar
19
//      is changed to compute the derived value outside of the loop, eliminating
20
//      the dependence on the exit value of the induction variable.  If the only
21
//      purpose of the loop is to compute the exit value of some derived
22
//      expression, this transformation will make the loop dead.
23
//
24
//===----------------------------------------------------------------------===//
25
26
#include "llvm/Transforms/Scalar/IndVarSimplify.h"
27
#include "llvm/ADT/APFloat.h"
28
#include "llvm/ADT/ArrayRef.h"
29
#include "llvm/ADT/STLExtras.h"
30
#include "llvm/ADT/SmallPtrSet.h"
31
#include "llvm/ADT/SmallSet.h"
32
#include "llvm/ADT/SmallVector.h"
33
#include "llvm/ADT/Statistic.h"
34
#include "llvm/ADT/iterator_range.h"
35
#include "llvm/Analysis/LoopInfo.h"
36
#include "llvm/Analysis/LoopPass.h"
37
#include "llvm/Analysis/MemorySSA.h"
38
#include "llvm/Analysis/MemorySSAUpdater.h"
39
#include "llvm/Analysis/ScalarEvolution.h"
40
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
41
#include "llvm/Analysis/TargetLibraryInfo.h"
42
#include "llvm/Analysis/TargetTransformInfo.h"
43
#include "llvm/Analysis/ValueTracking.h"
44
#include "llvm/IR/BasicBlock.h"
45
#include "llvm/IR/Constant.h"
46
#include "llvm/IR/ConstantRange.h"
47
#include "llvm/IR/Constants.h"
48
#include "llvm/IR/DataLayout.h"
49
#include "llvm/IR/DerivedTypes.h"
50
#include "llvm/IR/Dominators.h"
51
#include "llvm/IR/Function.h"
52
#include "llvm/IR/IRBuilder.h"
53
#include "llvm/IR/InstrTypes.h"
54
#include "llvm/IR/Instruction.h"
55
#include "llvm/IR/Instructions.h"
56
#include "llvm/IR/IntrinsicInst.h"
57
#include "llvm/IR/Intrinsics.h"
58
#include "llvm/IR/Module.h"
59
#include "llvm/IR/Operator.h"
60
#include "llvm/IR/PassManager.h"
61
#include "llvm/IR/PatternMatch.h"
62
#include "llvm/IR/Type.h"
63
#include "llvm/IR/Use.h"
64
#include "llvm/IR/User.h"
65
#include "llvm/IR/Value.h"
66
#include "llvm/IR/ValueHandle.h"
67
#include "llvm/Support/Casting.h"
68
#include "llvm/Support/CommandLine.h"
69
#include "llvm/Support/Compiler.h"
70
#include "llvm/Support/Debug.h"
71
#include "llvm/Support/MathExtras.h"
72
#include "llvm/Support/raw_ostream.h"
73
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
74
#include "llvm/Transforms/Utils/Local.h"
75
#include "llvm/Transforms/Utils/LoopUtils.h"
76
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
77
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
78
#include <cassert>
79
#include <cstdint>
80
#include <utility>
81
82
using namespace llvm;
83
using namespace PatternMatch;
84
85
35.1k
#define DEBUG_TYPE "indvars"
86
87
STATISTIC(NumWidened     , "Number of indvars widened");
88
STATISTIC(NumReplaced    , "Number of exit values replaced");
89
STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
90
STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
91
STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
92
93
static cl::opt<ReplaceExitVal> ReplaceExitValue(
94
    "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
95
    cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
96
    cl::values(
97
        clEnumValN(NeverRepl, "never", "never replace exit value"),
98
        clEnumValN(OnlyCheapRepl, "cheap",
99
                   "only replace exit value when the cost is cheap"),
100
        clEnumValN(
101
            UnusedIndVarInLoop, "unusedindvarinloop",
102
            "only replace exit value when it is an unused "
103
            "induction variable in the loop and has cheap replacement cost"),
104
        clEnumValN(NoHardUse, "noharduse",
105
                   "only replace exit values when loop def likely dead"),
106
        clEnumValN(AlwaysRepl, "always",
107
                   "always replace exit value whenever possible")));
108
109
static cl::opt<bool> UsePostIncrementRanges(
110
  "indvars-post-increment-ranges", cl::Hidden,
111
  cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
112
  cl::init(true));
113
114
static cl::opt<bool>
115
DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
116
            cl::desc("Disable Linear Function Test Replace optimization"));
117
118
static cl::opt<bool>
119
LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
120
                cl::desc("Predicate conditions in read only loops"));
121
122
static cl::opt<bool>
123
AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
124
                cl::desc("Allow widening of indvars to eliminate s/zext"));
125
126
namespace {
127
128
class IndVarSimplify {
129
  LoopInfo *LI;
130
  ScalarEvolution *SE;
131
  DominatorTree *DT;
132
  const DataLayout &DL;
133
  TargetLibraryInfo *TLI;
134
  const TargetTransformInfo *TTI;
135
  std::unique_ptr<MemorySSAUpdater> MSSAU;
136
137
  SmallVector<WeakTrackingVH, 16> DeadInsts;
138
  bool WidenIndVars;
139
140
  bool handleFloatingPointIV(Loop *L, PHINode *PH);
141
  bool rewriteNonIntegerIVs(Loop *L);
142
143
  bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
144
  /// Try to improve our exit conditions by converting condition from signed
145
  /// to unsigned or rotating computation out of the loop.
146
  /// (See inline comment about why this is duplicated from simplifyAndExtend)
147
  bool canonicalizeExitCondition(Loop *L);
148
  /// Try to eliminate loop exits based on analyzeable exit counts
149
  bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
150
  /// Try to form loop invariant tests for loop exits by changing how many
151
  /// iterations of the loop run when that is unobservable.
152
  bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
153
154
  bool rewriteFirstIterationLoopExitValues(Loop *L);
155
156
  bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
157
                                 const SCEV *ExitCount,
158
                                 PHINode *IndVar, SCEVExpander &Rewriter);
159
160
  bool sinkUnusedInvariants(Loop *L);
161
162
public:
163
  IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
164
                 const DataLayout &DL, TargetLibraryInfo *TLI,
165
                 TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
166
      : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
167
35.1k
        WidenIndVars(WidenIndVars) {
168
35.1k
    if (MSSA)
169
0
      MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
170
35.1k
  }
171
172
  bool run(Loop *L);
173
};
174
175
} // end anonymous namespace
176
177
//===----------------------------------------------------------------------===//
178
// rewriteNonIntegerIVs and helpers. Prefer integer IVs.
179
//===----------------------------------------------------------------------===//
180
181
/// Convert APF to an integer, if possible.
182
2
static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
183
2
  bool isExact = false;
184
  // See if we can convert this to an int64_t
185
2
  uint64_t UIntVal;
186
2
  if (APF.convertToInteger(MutableArrayRef(UIntVal), 64, true,
187
2
                           APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
188
2
      !isExact)
189
0
    return false;
190
2
  IntVal = UIntVal;
191
2
  return true;
192
2
}
193
194
/// If the loop has floating induction variable then insert corresponding
195
/// integer induction variable if possible.
196
/// For example,
197
/// for(double i = 0; i < 10000; ++i)
198
///   bar(i)
199
/// is converted into
200
/// for(int i = 0; i < 10000; ++i)
201
///   bar((double)i);
202
75.1k
bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
203
75.1k
  unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
204
75.1k
  unsigned BackEdge     = IncomingEdge^1;
205
206
  // Check incoming value.
207
75.1k
  auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
208
209
75.1k
  int64_t InitValue;
210
75.1k
  if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
211
75.1k
    return false;
212
213
  // Check IV increment. Reject this PN if increment operation is not
214
  // an add or increment value can not be represented by an integer.
215
1
  auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
216
1
  if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
217
218
  // If this is not an add of the PHI with a constantfp, or if the constant fp
219
  // is not an integer, bail out.
220
1
  ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
221
1
  int64_t IncValue;
222
1
  if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
223
1
      !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
224
0
    return false;
225
226
  // Check Incr uses. One user is PN and the other user is an exit condition
227
  // used by the conditional terminator.
228
1
  Value::user_iterator IncrUse = Incr->user_begin();
229
1
  Instruction *U1 = cast<Instruction>(*IncrUse++);
230
1
  if (IncrUse == Incr->user_end()) return false;
231
1
  Instruction *U2 = cast<Instruction>(*IncrUse++);
232
1
  if (IncrUse != Incr->user_end()) return false;
233
234
  // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
235
  // only used by a branch, we can't transform it.
236
0
  FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
237
0
  if (!Compare)
238
0
    Compare = dyn_cast<FCmpInst>(U2);
239
0
  if (!Compare || !Compare->hasOneUse() ||
240
0
      !isa<BranchInst>(Compare->user_back()))
241
0
    return false;
242
243
0
  BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
244
245
  // We need to verify that the branch actually controls the iteration count
246
  // of the loop.  If not, the new IV can overflow and no one will notice.
247
  // The branch block must be in the loop and one of the successors must be out
248
  // of the loop.
249
0
  assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
250
0
  if (!L->contains(TheBr->getParent()) ||
251
0
      (L->contains(TheBr->getSuccessor(0)) &&
252
0
       L->contains(TheBr->getSuccessor(1))))
253
0
    return false;
254
255
  // If it isn't a comparison with an integer-as-fp (the exit value), we can't
256
  // transform it.
257
0
  ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
258
0
  int64_t ExitValue;
259
0
  if (ExitValueVal == nullptr ||
260
0
      !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
261
0
    return false;
262
263
  // Find new predicate for integer comparison.
264
0
  CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
265
0
  switch (Compare->getPredicate()) {
266
0
  default: return false;  // Unknown comparison.
267
0
  case CmpInst::FCMP_OEQ:
268
0
  case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
269
0
  case CmpInst::FCMP_ONE:
270
0
  case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
271
0
  case CmpInst::FCMP_OGT:
272
0
  case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
273
0
  case CmpInst::FCMP_OGE:
274
0
  case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
275
0
  case CmpInst::FCMP_OLT:
276
0
  case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
277
0
  case CmpInst::FCMP_OLE:
278
0
  case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
279
0
  }
280
281
  // We convert the floating point induction variable to a signed i32 value if
282
  // we can.  This is only safe if the comparison will not overflow in a way
283
  // that won't be trapped by the integer equivalent operations.  Check for this
284
  // now.
285
  // TODO: We could use i64 if it is native and the range requires it.
286
287
  // The start/stride/exit values must all fit in signed i32.
288
0
  if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
289
0
    return false;
290
291
  // If not actually striding (add x, 0.0), avoid touching the code.
292
0
  if (IncValue == 0)
293
0
    return false;
294
295
  // Positive and negative strides have different safety conditions.
296
0
  if (IncValue > 0) {
297
    // If we have a positive stride, we require the init to be less than the
298
    // exit value.
299
0
    if (InitValue >= ExitValue)
300
0
      return false;
301
302
0
    uint32_t Range = uint32_t(ExitValue-InitValue);
303
    // Check for infinite loop, either:
304
    // while (i <= Exit) or until (i > Exit)
305
0
    if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
306
0
      if (++Range == 0) return false;  // Range overflows.
307
0
    }
308
309
0
    unsigned Leftover = Range % uint32_t(IncValue);
310
311
    // If this is an equality comparison, we require that the strided value
312
    // exactly land on the exit value, otherwise the IV condition will wrap
313
    // around and do things the fp IV wouldn't.
314
0
    if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
315
0
        Leftover != 0)
316
0
      return false;
317
318
    // If the stride would wrap around the i32 before exiting, we can't
319
    // transform the IV.
320
0
    if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
321
0
      return false;
322
0
  } else {
323
    // If we have a negative stride, we require the init to be greater than the
324
    // exit value.
325
0
    if (InitValue <= ExitValue)
326
0
      return false;
327
328
0
    uint32_t Range = uint32_t(InitValue-ExitValue);
329
    // Check for infinite loop, either:
330
    // while (i >= Exit) or until (i < Exit)
331
0
    if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
332
0
      if (++Range == 0) return false;  // Range overflows.
333
0
    }
334
335
0
    unsigned Leftover = Range % uint32_t(-IncValue);
336
337
    // If this is an equality comparison, we require that the strided value
338
    // exactly land on the exit value, otherwise the IV condition will wrap
339
    // around and do things the fp IV wouldn't.
340
0
    if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
341
0
        Leftover != 0)
342
0
      return false;
343
344
    // If the stride would wrap around the i32 before exiting, we can't
345
    // transform the IV.
346
0
    if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
347
0
      return false;
348
0
  }
349
350
0
  IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
351
352
  // Insert new integer induction variable.
353
0
  PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
354
0
  NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
355
0
                      PN->getIncomingBlock(IncomingEdge));
356
357
0
  Value *NewAdd =
358
0
    BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
359
0
                              Incr->getName()+".int", Incr);
360
0
  NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
361
362
0
  ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
363
0
                                      ConstantInt::get(Int32Ty, ExitValue),
364
0
                                      Compare->getName());
365
366
  // In the following deletions, PN may become dead and may be deleted.
367
  // Use a WeakTrackingVH to observe whether this happens.
368
0
  WeakTrackingVH WeakPH = PN;
369
370
  // Delete the old floating point exit comparison.  The branch starts using the
371
  // new comparison.
372
0
  NewCompare->takeName(Compare);
373
0
  Compare->replaceAllUsesWith(NewCompare);
374
0
  RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
375
376
  // Delete the old floating point increment.
377
0
  Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType()));
378
0
  RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
379
380
  // If the FP induction variable still has uses, this is because something else
381
  // in the loop uses its value.  In order to canonicalize the induction
382
  // variable, we chose to eliminate the IV and rewrite it in terms of an
383
  // int->fp cast.
384
  //
385
  // We give preference to sitofp over uitofp because it is faster on most
386
  // platforms.
387
0
  if (WeakPH) {
388
0
    Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
389
0
                                 &*PN->getParent()->getFirstInsertionPt());
390
0
    PN->replaceAllUsesWith(Conv);
391
0
    RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
392
0
  }
393
0
  return true;
394
0
}
395
396
35.1k
bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
397
  // First step.  Check to see if there are any floating-point recurrences.
398
  // If there are, change them into integer recurrences, permitting analysis by
399
  // the SCEV routines.
400
35.1k
  BasicBlock *Header = L->getHeader();
401
402
35.1k
  SmallVector<WeakTrackingVH, 8> PHIs;
403
35.1k
  for (PHINode &PN : Header->phis())
404
75.1k
    PHIs.push_back(&PN);
405
406
35.1k
  bool Changed = false;
407
35.1k
  for (WeakTrackingVH &PHI : PHIs)
408
75.1k
    if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHI))
409
75.1k
      Changed |= handleFloatingPointIV(L, PN);
410
411
  // If the loop previously had floating-point IV, ScalarEvolution
412
  // may not have been able to compute a trip count. Now that we've done some
413
  // re-writing, the trip count may be computable.
414
35.1k
  if (Changed)
415
0
    SE->forgetLoop(L);
416
35.1k
  return Changed;
417
35.1k
}
418
419
//===---------------------------------------------------------------------===//
420
// rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
421
// they will exit at the first iteration.
422
//===---------------------------------------------------------------------===//
423
424
/// Check to see if this loop has loop invariant conditions which lead to loop
425
/// exits. If so, we know that if the exit path is taken, it is at the first
426
/// loop iteration. This lets us predict exit values of PHI nodes that live in
427
/// loop header.
428
35.1k
bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
429
  // Verify the input to the pass is already in LCSSA form.
430
35.1k
  assert(L->isLCSSAForm(*DT));
431
432
0
  SmallVector<BasicBlock *, 8> ExitBlocks;
433
35.1k
  L->getUniqueExitBlocks(ExitBlocks);
434
435
35.1k
  bool MadeAnyChanges = false;
436
36.4k
  for (auto *ExitBB : ExitBlocks) {
437
    // If there are no more PHI nodes in this exit block, then no more
438
    // values defined inside the loop are used on this path.
439
37.8k
    for (PHINode &PN : ExitBB->phis()) {
440
37.8k
      for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
441
76.7k
           IncomingValIdx != E; ++IncomingValIdx) {
442
38.9k
        auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
443
444
        // Can we prove that the exit must run on the first iteration if it
445
        // runs at all?  (i.e. early exits are fine for our purposes, but
446
        // traces which lead to this exit being taken on the 2nd iteration
447
        // aren't.)  Note that this is about whether the exit branch is
448
        // executed, not about whether it is taken.
449
38.9k
        if (!L->getLoopLatch() ||
450
38.9k
            !DT->dominates(IncomingBB, L->getLoopLatch()))
451
19.4k
          continue;
452
453
        // Get condition that leads to the exit path.
454
19.5k
        auto *TermInst = IncomingBB->getTerminator();
455
456
19.5k
        Value *Cond = nullptr;
457
19.5k
        if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
458
          // Must be a conditional branch, otherwise the block
459
          // should not be in the loop.
460
19.5k
          Cond = BI->getCondition();
461
19.5k
        } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
462
0
          Cond = SI->getCondition();
463
0
        else
464
0
          continue;
465
466
19.5k
        if (!L->isLoopInvariant(Cond))
467
10.8k
          continue;
468
469
8.61k
        auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
470
471
        // Only deal with PHIs in the loop header.
472
8.61k
        if (!ExitVal || ExitVal->getParent() != L->getHeader())
473
8.57k
          continue;
474
475
        // If ExitVal is a PHI on the loop header, then we know its
476
        // value along this exit because the exit can only be taken
477
        // on the first iteration.
478
46
        auto *LoopPreheader = L->getLoopPreheader();
479
46
        assert(LoopPreheader && "Invalid loop");
480
0
        int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
481
46
        if (PreheaderIdx != -1) {
482
46
          assert(ExitVal->getParent() == L->getHeader() &&
483
46
                 "ExitVal must be in loop header");
484
0
          MadeAnyChanges = true;
485
46
          PN.setIncomingValue(IncomingValIdx,
486
46
                              ExitVal->getIncomingValue(PreheaderIdx));
487
46
          SE->forgetValue(&PN);
488
46
        }
489
46
      }
490
37.8k
    }
491
36.4k
  }
492
35.1k
  return MadeAnyChanges;
493
35.1k
}
494
495
//===----------------------------------------------------------------------===//
496
//  IV Widening - Extend the width of an IV to cover its widest uses.
497
//===----------------------------------------------------------------------===//
498
499
/// Update information about the induction variable that is extended by this
500
/// sign or zero extend operation. This is used to determine the final width of
501
/// the IV before actually widening it.
502
static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
503
                        ScalarEvolution *SE,
504
7.88k
                        const TargetTransformInfo *TTI) {
505
7.88k
  bool IsSigned = Cast->getOpcode() == Instruction::SExt;
506
7.88k
  if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
507
5.46k
    return;
508
509
2.42k
  Type *Ty = Cast->getType();
510
2.42k
  uint64_t Width = SE->getTypeSizeInBits(Ty);
511
2.42k
  if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
512
8
    return;
513
514
  // Check that `Cast` actually extends the induction variable (we rely on this
515
  // later).  This takes care of cases where `Cast` is extending a truncation of
516
  // the narrow induction variable, and thus can end up being narrower than the
517
  // "narrow" induction variable.
518
2.41k
  uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
519
2.41k
  if (NarrowIVWidth >= Width)
520
0
    return;
521
522
  // Cast is either an sext or zext up to this point.
523
  // We should not widen an indvar if arithmetics on the wider indvar are more
524
  // expensive than those on the narrower indvar. We check only the cost of ADD
525
  // because at least an ADD is required to increment the induction variable. We
526
  // could compute more comprehensively the cost of all instructions on the
527
  // induction variable when necessary.
528
2.41k
  if (TTI &&
529
2.41k
      TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
530
2.41k
          TTI->getArithmeticInstrCost(Instruction::Add,
531
2.41k
                                      Cast->getOperand(0)->getType())) {
532
0
    return;
533
0
  }
534
535
2.41k
  if (!WI.WidestNativeType ||
536
2.41k
      Width > SE->getTypeSizeInBits(WI.WidestNativeType)) {
537
2.02k
    WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
538
2.02k
    WI.IsSigned = IsSigned;
539
2.02k
    return;
540
2.02k
  }
541
542
  // We extend the IV to satisfy the sign of its user(s), or 'signed'
543
  // if there are multiple users with both sign- and zero extensions,
544
  // in order not to introduce nondeterministic behaviour based on the
545
  // unspecified order of a PHI nodes' users-iterator.
546
392
  WI.IsSigned |= IsSigned;
547
392
}
548
549
//===----------------------------------------------------------------------===//
550
//  Live IV Reduction - Minimize IVs live across the loop.
551
//===----------------------------------------------------------------------===//
552
553
//===----------------------------------------------------------------------===//
554
//  Simplification of IV users based on SCEV evaluation.
555
//===----------------------------------------------------------------------===//
556
557
namespace {
558
559
class IndVarSimplifyVisitor : public IVVisitor {
560
  ScalarEvolution *SE;
561
  const TargetTransformInfo *TTI;
562
  PHINode *IVPhi;
563
564
public:
565
  WideIVInfo WI;
566
567
  IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
568
                        const TargetTransformInfo *TTI,
569
                        const DominatorTree *DTree)
570
76.3k
    : SE(SCEV), TTI(TTI), IVPhi(IV) {
571
76.3k
    DT = DTree;
572
76.3k
    WI.NarrowIV = IVPhi;
573
76.3k
  }
574
575
  // Implement the interface used by simplifyUsersOfIV.
576
7.88k
  void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
577
};
578
579
} // end anonymous namespace
580
581
/// Iteratively perform simplification on a worklist of IV users. Each
582
/// successive simplification may push more users which may themselves be
583
/// candidates for simplification.
584
///
585
/// Sign/Zero extend elimination is interleaved with IV simplification.
586
bool IndVarSimplify::simplifyAndExtend(Loop *L,
587
                                       SCEVExpander &Rewriter,
588
35.1k
                                       LoopInfo *LI) {
589
35.1k
  SmallVector<WideIVInfo, 8> WideIVs;
590
591
35.1k
  auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
592
35.1k
          Intrinsic::getName(Intrinsic::experimental_guard));
593
35.1k
  bool HasGuards = GuardDecl && !GuardDecl->use_empty();
594
595
35.1k
  SmallVector<PHINode *, 8> LoopPhis;
596
35.1k
  for (PHINode &PN : L->getHeader()->phis())
597
75.1k
    LoopPhis.push_back(&PN);
598
599
  // Each round of simplification iterates through the SimplifyIVUsers worklist
600
  // for all current phis, then determines whether any IVs can be
601
  // widened. Widening adds new phis to LoopPhis, inducing another round of
602
  // simplification on the wide IVs.
603
35.1k
  bool Changed = false;
604
43.9k
  while (!LoopPhis.empty()) {
605
    // Evaluate as many IV expressions as possible before widening any IVs. This
606
    // forces SCEV to set no-wrap flags before evaluating sign/zero
607
    // extension. The first time SCEV attempts to normalize sign/zero extension,
608
    // the result becomes final. So for the most predictable results, we delay
609
    // evaluation of sign/zero extend evaluation until needed, and avoid running
610
    // other SCEV based analysis prior to simplifyAndExtend.
611
76.3k
    do {
612
76.3k
      PHINode *CurrIV = LoopPhis.pop_back_val();
613
614
      // Information about sign/zero extensions of CurrIV.
615
76.3k
      IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
616
617
76.3k
      Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
618
76.3k
                                   &Visitor);
619
620
76.3k
      if (Visitor.WI.WidestNativeType) {
621
2.02k
        WideIVs.push_back(Visitor.WI);
622
2.02k
      }
623
76.3k
    } while(!LoopPhis.empty());
624
625
    // Continue if we disallowed widening.
626
8.75k
    if (!WidenIndVars)
627
0
      continue;
628
629
10.7k
    for (; !WideIVs.empty(); WideIVs.pop_back()) {
630
2.02k
      unsigned ElimExt;
631
2.02k
      unsigned Widened;
632
2.02k
      if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
633
2.02k
                                          DT, DeadInsts, ElimExt, Widened,
634
2.02k
                                          HasGuards, UsePostIncrementRanges)) {
635
1.22k
        NumElimExt += ElimExt;
636
1.22k
        NumWidened += Widened;
637
1.22k
        Changed = true;
638
1.22k
        LoopPhis.push_back(WidePhi);
639
1.22k
      }
640
2.02k
    }
641
8.75k
  }
642
35.1k
  return Changed;
643
35.1k
}
644
645
//===----------------------------------------------------------------------===//
646
//  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
647
//===----------------------------------------------------------------------===//
648
649
/// Given an Value which is hoped to be part of an add recurance in the given
650
/// loop, return the associated Phi node if so.  Otherwise, return null.  Note
651
/// that this is less general than SCEVs AddRec checking.
652
3.50k
static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
653
3.50k
  Instruction *IncI = dyn_cast<Instruction>(IncV);
654
3.50k
  if (!IncI)
655
839
    return nullptr;
656
657
2.66k
  switch (IncI->getOpcode()) {
658
2.11k
  case Instruction::Add:
659
2.12k
  case Instruction::Sub:
660
2.12k
    break;
661
9
  case Instruction::GetElementPtr:
662
    // An IV counter must preserve its type.
663
9
    if (IncI->getNumOperands() == 2)
664
9
      break;
665
9
    [[fallthrough]];
666
540
  default:
667
540
    return nullptr;
668
2.66k
  }
669
670
2.12k
  PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
671
2.12k
  if (Phi && Phi->getParent() == L->getHeader()) {
672
2.09k
    if (L->isLoopInvariant(IncI->getOperand(1)))
673
2.08k
      return Phi;
674
6
    return nullptr;
675
2.09k
  }
676
39
  if (IncI->getOpcode() == Instruction::GetElementPtr)
677
0
    return nullptr;
678
679
  // Allow add/sub to be commuted.
680
39
  Phi = dyn_cast<PHINode>(IncI->getOperand(1));
681
39
  if (Phi && Phi->getParent() == L->getHeader()) {
682
9
    if (L->isLoopInvariant(IncI->getOperand(0)))
683
9
      return Phi;
684
9
  }
685
30
  return nullptr;
686
39
}
687
688
/// Whether the current loop exit test is based on this value.  Currently this
689
/// is limited to a direct use in the loop condition.
690
96
static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
691
96
  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
692
96
  ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
693
  // TODO: Allow non-icmp loop test.
694
96
  if (!ICmp)
695
28
    return false;
696
697
  // TODO: Allow indirect use.
698
68
  return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
699
96
}
700
701
/// linearFunctionTestReplace policy. Return true unless we can show that the
702
/// current exit test is already sufficiently canonical.
703
36.5k
static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
704
36.5k
  assert(L->getLoopLatch() && "Must be in simplified form");
705
706
  // Avoid converting a constant or loop invariant test back to a runtime
707
  // test.  This is critical for when SCEV's cached ExitCount is less precise
708
  // than the current IR (such as after we've proven a particular exit is
709
  // actually dead and thus the BE count never reaches our ExitCount.)
710
0
  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
711
36.5k
  if (L->isLoopInvariant(BI->getCondition()))
712
24.3k
    return false;
713
714
  // Do LFTR to simplify the exit condition to an ICMP.
715
12.2k
  ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
716
12.2k
  if (!Cond)
717
5.65k
    return true;
718
719
  // Do LFTR to simplify the exit ICMP to EQ/NE
720
6.61k
  ICmpInst::Predicate Pred = Cond->getPredicate();
721
6.61k
  if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
722
4.36k
    return true;
723
724
  // Look for a loop invariant RHS
725
2.24k
  Value *LHS = Cond->getOperand(0);
726
2.24k
  Value *RHS = Cond->getOperand(1);
727
2.24k
  if (!L->isLoopInvariant(RHS)) {
728
307
    if (!L->isLoopInvariant(LHS))
729
218
      return true;
730
89
    std::swap(LHS, RHS);
731
89
  }
732
  // Look for a simple IV counter LHS
733
2.02k
  PHINode *Phi = dyn_cast<PHINode>(LHS);
734
2.02k
  if (!Phi)
735
1.96k
    Phi = getLoopPhiForCounter(LHS, L);
736
737
2.02k
  if (!Phi)
738
1.34k
    return true;
739
740
  // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
741
686
  int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
742
686
  if (Idx < 0)
743
20
    return true;
744
745
  // Do LFTR if the exit condition's IV is *not* a simple counter.
746
666
  Value *IncV = Phi->getIncomingValue(Idx);
747
666
  return Phi != getLoopPhiForCounter(IncV, L);
748
686
}
749
750
/// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
751
/// down to checking that all operands are constant and listing instructions
752
/// that may hide undef.
753
static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
754
1.42k
                               unsigned Depth) {
755
1.42k
  if (isa<Constant>(V))
756
609
    return !isa<UndefValue>(V);
757
758
816
  if (Depth >= 6)
759
1
    return false;
760
761
  // Conservatively handle non-constant non-instructions. For example, Arguments
762
  // may be undef.
763
815
  Instruction *I = dyn_cast<Instruction>(V);
764
815
  if (!I)
765
16
    return false;
766
767
  // Load and return values may be undef.
768
799
  if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
769
7
    return false;
770
771
  // Optimistically handle other instructions.
772
1.50k
  for (Value *Op : I->operands()) {
773
1.50k
    if (!Visited.insert(Op).second)
774
404
      continue;
775
1.09k
    if (!hasConcreteDefImpl(Op, Visited, Depth+1))
776
119
      return false;
777
1.09k
  }
778
673
  return true;
779
792
}
780
781
/// Return true if the given value is concrete. We must prove that undef can
782
/// never reach it.
783
///
784
/// TODO: If we decide that this is a good approach to checking for undef, we
785
/// may factor it into a common location.
786
327
static bool hasConcreteDef(Value *V) {
787
327
  SmallPtrSet<Value*, 8> Visited;
788
327
  Visited.insert(V);
789
327
  return hasConcreteDefImpl(V, Visited, 0);
790
327
}
791
792
/// Return true if the given phi is a "counter" in L.  A counter is an
793
/// add recurance (of integer or pointer type) with an arbitrary start, and a
794
/// step of 1.  Note that L must have exactly one latch.
795
static bool isLoopCounter(PHINode* Phi, Loop *L,
796
4.73k
                          ScalarEvolution *SE) {
797
4.73k
  assert(Phi->getParent() == L->getHeader());
798
0
  assert(L->getLoopLatch());
799
800
4.73k
  if (!SE->isSCEVable(Phi->getType()))
801
1
    return false;
802
803
4.73k
  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
804
4.73k
  if (!AR || AR->getLoop() != L || !AR->isAffine())
805
2.72k
    return false;
806
807
2.01k
  const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
808
2.01k
  if (!Step || !Step->isOne())
809
1.13k
    return false;
810
811
878
  int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
812
878
  Value *IncV = Phi->getIncomingValue(LatchIdx);
813
878
  return (getLoopPhiForCounter(IncV, L) == Phi &&
814
878
          isa<SCEVAddRecExpr>(SE->getSCEV(IncV)));
815
2.01k
}
816
817
/// Search the loop header for a loop counter (anadd rec w/step of one)
818
/// suitable for use by LFTR.  If multiple counters are available, select the
819
/// "best" one based profitable heuristics.
820
///
821
/// BECount may be an i8* pointer type. The pointer difference is already
822
/// valid count without scaling the address stride, so it remains a pointer
823
/// expression as far as SCEV is concerned.
824
static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
825
                                const SCEV *BECount,
826
370
                                ScalarEvolution *SE, DominatorTree *DT) {
827
370
  uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
828
829
370
  Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
830
831
  // Loop over all of the PHI nodes, looking for a simple counter.
832
370
  PHINode *BestPhi = nullptr;
833
370
  const SCEV *BestInit = nullptr;
834
370
  BasicBlock *LatchBlock = L->getLoopLatch();
835
370
  assert(LatchBlock && "Must be in simplified form");
836
0
  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
837
838
4.66k
  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
839
4.29k
    PHINode *Phi = cast<PHINode>(I);
840
4.29k
    if (!isLoopCounter(Phi, L, SE))
841
3.91k
      continue;
842
843
380
    const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
844
845
    // AR may be a pointer type, while BECount is an integer type.
846
    // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
847
    // AR may not be a narrower type, or we may never exit.
848
380
    uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
849
380
    if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
850
53
      continue;
851
852
    // Avoid reusing a potentially undef value to compute other values that may
853
    // have originally had a concrete definition.
854
327
    if (!hasConcreteDef(Phi)) {
855
      // We explicitly allow unknown phis as long as they are already used by
856
      // the loop exit test.  This is legal since performing LFTR could not
857
      // increase the number of undef users.
858
53
      Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
859
53
      if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
860
53
          !isLoopExitTestBasedOn(IncPhi, ExitingBB))
861
22
        continue;
862
53
    }
863
864
    // Avoid introducing undefined behavior due to poison which didn't exist in
865
    // the original program.  (Annoyingly, the rules for poison and undef
866
    // propagation are distinct, so this does NOT cover the undef case above.)
867
    // We have to ensure that we don't introduce UB by introducing a use on an
868
    // iteration where said IV produces poison.  Our strategy here differs for
869
    // pointers and integer IVs.  For integers, we strip and reinfer as needed,
870
    // see code in linearFunctionTestReplace.  For pointers, we restrict
871
    // transforms as there is no good way to reinfer inbounds once lost.
872
305
    if (!Phi->getType()->isIntegerTy() &&
873
305
        !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
874
3
      continue;
875
876
302
    const SCEV *Init = AR->getStart();
877
878
302
    if (BestPhi && !isAlmostDeadIV(BestPhi, LatchBlock, Cond)) {
879
      // Don't force a live loop counter if another IV can be used.
880
17
      if (isAlmostDeadIV(Phi, LatchBlock, Cond))
881
0
        continue;
882
883
      // Prefer to count-from-zero. This is a more "canonical" counter form. It
884
      // also prefers integer to pointer IVs.
885
17
      if (BestInit->isZero() != Init->isZero()) {
886
16
        if (BestInit->isZero())
887
15
          continue;
888
16
      }
889
      // If two IVs both count from zero or both count from nonzero then the
890
      // narrower is likely a dead phi that has been widened. Use the wider phi
891
      // to allow the other to be eliminated.
892
1
      else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
893
1
        continue;
894
17
    }
895
286
    BestPhi = Phi;
896
286
    BestInit = Init;
897
286
  }
898
370
  return BestPhi;
899
370
}
900
901
/// Insert an IR expression which computes the value held by the IV IndVar
902
/// (which must be an loop counter w/unit stride) after the backedge of loop L
903
/// is taken ExitCount times.
904
static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
905
                           const SCEV *ExitCount, bool UsePostInc, Loop *L,
906
222
                           SCEVExpander &Rewriter, ScalarEvolution *SE) {
907
222
  assert(isLoopCounter(IndVar, L, SE));
908
0
  assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
909
0
  const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
910
222
  assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
911
912
  // For integer IVs, truncate the IV before computing the limit unless we
913
  // know apriori that the limit must be a constant when evaluated in the
914
  // bitwidth of the IV.  We prefer (potentially) keeping a truncate of the
915
  // IV in the loop over a (potentially) expensive expansion of the widened
916
  // exit count add(zext(add)) expression.
917
222
  if (IndVar->getType()->isIntegerTy() &&
918
222
      SE->getTypeSizeInBits(AR->getType()) >
919
220
      SE->getTypeSizeInBits(ExitCount->getType())) {
920
80
    const SCEV *IVInit = AR->getStart();
921
80
    if (!isa<SCEVConstant>(IVInit) || !isa<SCEVConstant>(ExitCount))
922
41
      AR = cast<SCEVAddRecExpr>(SE->getTruncateExpr(AR, ExitCount->getType()));
923
80
  }
924
925
222
  const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(*SE) : AR;
926
222
  const SCEV *IVLimit = ARBase->evaluateAtIteration(ExitCount, *SE);
927
222
  assert(SE->isLoopInvariant(IVLimit, L) &&
928
222
         "Computed iteration count is not loop invariant!");
929
0
  return Rewriter.expandCodeFor(IVLimit, ARBase->getType(),
930
222
                                ExitingBB->getTerminator());
931
222
}
932
933
/// This method rewrites the exit condition of the loop to be a canonical !=
934
/// comparison against the incremented loop induction variable.  This pass is
935
/// able to rewrite the exit tests of any loop where the SCEV analysis can
936
/// determine a loop-invariant trip count of the loop, which is actually a much
937
/// broader range than just linear tests.
938
bool IndVarSimplify::
939
linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
940
                          const SCEV *ExitCount,
941
222
                          PHINode *IndVar, SCEVExpander &Rewriter) {
942
222
  assert(L->getLoopLatch() && "Loop no longer in simplified form?");
943
0
  assert(isLoopCounter(IndVar, L, SE));
944
0
  Instruction * const IncVar =
945
222
    cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
946
947
  // Initialize CmpIndVar to the preincremented IV.
948
222
  Value *CmpIndVar = IndVar;
949
222
  bool UsePostInc = false;
950
951
  // If the exiting block is the same as the backedge block, we prefer to
952
  // compare against the post-incremented value, otherwise we must compare
953
  // against the preincremented value.
954
222
  if (ExitingBB == L->getLoopLatch()) {
955
    // For pointer IVs, we chose to not strip inbounds which requires us not
956
    // to add a potentially UB introducing use.  We need to either a) show
957
    // the loop test we're modifying is already in post-inc form, or b) show
958
    // that adding a use must not introduce UB.
959
188
    bool SafeToPostInc =
960
188
        IndVar->getType()->isIntegerTy() ||
961
188
        isLoopExitTestBasedOn(IncVar, ExitingBB) ||
962
188
        mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
963
188
    if (SafeToPostInc) {
964
188
      UsePostInc = true;
965
188
      CmpIndVar = IncVar;
966
188
    }
967
188
  }
968
969
  // It may be necessary to drop nowrap flags on the incrementing instruction
970
  // if either LFTR moves from a pre-inc check to a post-inc check (in which
971
  // case the increment might have previously been poison on the last iteration
972
  // only) or if LFTR switches to a different IV that was previously dynamically
973
  // dead (and as such may be arbitrarily poison). We remove any nowrap flags
974
  // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
975
  // check), because the pre-inc addrec flags may be adopted from the original
976
  // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
977
  // TODO: This handling is inaccurate for one case: If we switch to a
978
  // dynamically dead IV that wraps on the first loop iteration only, which is
979
  // not covered by the post-inc addrec. (If the new IV was not dynamically
980
  // dead, it could not be poison on the first iteration in the first place.)
981
222
  if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
982
220
    const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
983
220
    if (BO->hasNoUnsignedWrap())
984
212
      BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
985
220
    if (BO->hasNoSignedWrap())
986
186
      BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
987
220
  }
988
989
222
  Value *ExitCnt = genLoopLimit(
990
222
      IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
991
222
  assert(ExitCnt->getType()->isPointerTy() ==
992
222
             IndVar->getType()->isPointerTy() &&
993
222
         "genLoopLimit missed a cast");
994
995
  // Insert a new icmp_ne or icmp_eq instruction before the branch.
996
0
  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
997
222
  ICmpInst::Predicate P;
998
222
  if (L->contains(BI->getSuccessor(0)))
999
155
    P = ICmpInst::ICMP_NE;
1000
67
  else
1001
67
    P = ICmpInst::ICMP_EQ;
1002
1003
222
  IRBuilder<> Builder(BI);
1004
1005
  // The new loop exit condition should reuse the debug location of the
1006
  // original loop exit condition.
1007
222
  if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
1008
222
    Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1009
1010
  // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1011
  // avoid the expensive expansion of the limit expression in the wider type,
1012
  // emit a truncate to narrow the IV to the ExitCount type.  This is safe
1013
  // since we know (from the exit count bitwidth), that we can't self-wrap in
1014
  // the narrower type.
1015
222
  unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1016
222
  unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1017
222
  if (CmpIndVarSize > ExitCntSize) {
1018
41
    assert(!CmpIndVar->getType()->isPointerTy() &&
1019
41
           !ExitCnt->getType()->isPointerTy());
1020
1021
    // Before resorting to actually inserting the truncate, use the same
1022
    // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1023
    // the other side of the comparison instead.  We still evaluate the limit
1024
    // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1025
    // a truncate within in.
1026
0
    bool Extended = false;
1027
41
    const SCEV *IV = SE->getSCEV(CmpIndVar);
1028
41
    const SCEV *TruncatedIV = SE->getTruncateExpr(IV, ExitCnt->getType());
1029
41
    const SCEV *ZExtTrunc =
1030
41
      SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
1031
1032
41
    if (ZExtTrunc == IV) {
1033
12
      Extended = true;
1034
12
      ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
1035
12
                                   "wide.trip.count");
1036
29
    } else {
1037
29
      const SCEV *SExtTrunc =
1038
29
        SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
1039
29
      if (SExtTrunc == IV) {
1040
0
        Extended = true;
1041
0
        ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
1042
0
                                     "wide.trip.count");
1043
0
      }
1044
29
    }
1045
1046
41
    if (Extended) {
1047
12
      bool Discard;
1048
12
      L->makeLoopInvariant(ExitCnt, Discard);
1049
12
    } else
1050
29
      CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1051
29
                                      "lftr.wideiv");
1052
41
  }
1053
222
  LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1054
222
                    << "      LHS:" << *CmpIndVar << '\n'
1055
222
                    << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1056
222
                    << "\n"
1057
222
                    << "      RHS:\t" << *ExitCnt << "\n"
1058
222
                    << "ExitCount:\t" << *ExitCount << "\n"
1059
222
                    << "  was: " << *BI->getCondition() << "\n");
1060
1061
222
  Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1062
222
  Value *OrigCond = BI->getCondition();
1063
  // It's tempting to use replaceAllUsesWith here to fully replace the old
1064
  // comparison, but that's not immediately safe, since users of the old
1065
  // comparison may not be dominated by the new comparison. Instead, just
1066
  // update the branch to use the new comparison; in the common case this
1067
  // will make old comparison dead.
1068
222
  BI->setCondition(Cond);
1069
222
  DeadInsts.emplace_back(OrigCond);
1070
1071
222
  ++NumLFTR;
1072
222
  return true;
1073
222
}
1074
1075
//===----------------------------------------------------------------------===//
1076
//  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1077
//===----------------------------------------------------------------------===//
1078
1079
/// If there's a single exit block, sink any loop-invariant values that
1080
/// were defined in the preheader but not used inside the loop into the
1081
/// exit block to reduce register pressure in the loop.
1082
35.1k
bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1083
35.1k
  BasicBlock *ExitBlock = L->getExitBlock();
1084
35.1k
  if (!ExitBlock) return false;
1085
1086
34.2k
  BasicBlock *Preheader = L->getLoopPreheader();
1087
34.2k
  if (!Preheader) return false;
1088
1089
34.2k
  bool MadeAnyChanges = false;
1090
34.2k
  BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
1091
34.2k
  BasicBlock::iterator I(Preheader->getTerminator());
1092
161k
  while (I != Preheader->begin()) {
1093
133k
    --I;
1094
    // New instructions were inserted at the end of the preheader.
1095
133k
    if (isa<PHINode>(I))
1096
5.76k
      break;
1097
1098
    // Don't move instructions which might have side effects, since the side
1099
    // effects need to complete before instructions inside the loop.  Also don't
1100
    // move instructions which might read memory, since the loop may modify
1101
    // memory. Note that it's okay if the instruction might have undefined
1102
    // behavior: LoopSimplify guarantees that the preheader dominates the exit
1103
    // block.
1104
128k
    if (I->mayHaveSideEffects() || I->mayReadFromMemory())
1105
18.7k
      continue;
1106
1107
    // Skip debug info intrinsics.
1108
109k
    if (isa<DbgInfoIntrinsic>(I))
1109
3
      continue;
1110
1111
    // Skip eh pad instructions.
1112
109k
    if (I->isEHPad())
1113
1
      continue;
1114
1115
    // Don't sink alloca: we never want to sink static alloca's out of the
1116
    // entry block, and correctly sinking dynamic alloca's requires
1117
    // checks for stacksave/stackrestore intrinsics.
1118
    // FIXME: Refactor this check somehow?
1119
109k
    if (isa<AllocaInst>(I))
1120
5.98k
      continue;
1121
1122
    // Determine if there is a use in or before the loop (direct or
1123
    // otherwise).
1124
103k
    bool UsedInLoop = false;
1125
105k
    for (Use &U : I->uses()) {
1126
105k
      Instruction *User = cast<Instruction>(U.getUser());
1127
105k
      BasicBlock *UseBB = User->getParent();
1128
105k
      if (PHINode *P = dyn_cast<PHINode>(User)) {
1129
7.47k
        unsigned i =
1130
7.47k
          PHINode::getIncomingValueNumForOperand(U.getOperandNo());
1131
7.47k
        UseBB = P->getIncomingBlock(i);
1132
7.47k
      }
1133
105k
      if (UseBB == Preheader || L->contains(UseBB)) {
1134
99.7k
        UsedInLoop = true;
1135
99.7k
        break;
1136
99.7k
      }
1137
105k
    }
1138
1139
    // If there is, the def must remain in the preheader.
1140
103k
    if (UsedInLoop)
1141
99.7k
      continue;
1142
1143
    // Otherwise, sink it to the exit block.
1144
3.60k
    Instruction *ToMove = &*I;
1145
3.60k
    bool Done = false;
1146
1147
3.60k
    if (I != Preheader->begin()) {
1148
      // Skip debug info intrinsics.
1149
3.04k
      do {
1150
3.04k
        --I;
1151
3.04k
      } while (I->isDebugOrPseudoInst() && I != Preheader->begin());
1152
1153
3.04k
      if (I->isDebugOrPseudoInst() && I == Preheader->begin())
1154
0
        Done = true;
1155
3.04k
    } else {
1156
559
      Done = true;
1157
559
    }
1158
1159
3.60k
    MadeAnyChanges = true;
1160
3.60k
    ToMove->moveBefore(*ExitBlock, InsertPt);
1161
3.60k
    SE->forgetValue(ToMove);
1162
3.60k
    if (Done) break;
1163
3.04k
    InsertPt = ToMove->getIterator();
1164
3.04k
  }
1165
1166
34.2k
  return MadeAnyChanges;
1167
34.2k
}
1168
1169
static void replaceExitCond(BranchInst *BI, Value *NewCond,
1170
3.03k
                            SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1171
3.03k
  auto *OldCond = BI->getCondition();
1172
3.03k
  LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI
1173
3.03k
                    << " with " << *NewCond << "\n");
1174
3.03k
  BI->setCondition(NewCond);
1175
3.03k
  if (OldCond->use_empty())
1176
2.20k
    DeadInsts.emplace_back(OldCond);
1177
3.03k
}
1178
1179
static Constant *createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB,
1180
3.17k
                                      bool IsTaken) {
1181
3.17k
  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1182
3.17k
  bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1183
3.17k
  auto *OldCond = BI->getCondition();
1184
3.17k
  return ConstantInt::get(OldCond->getType(),
1185
3.17k
                          IsTaken ? ExitIfTrue : !ExitIfTrue);
1186
3.17k
}
1187
1188
static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1189
3.03k
                     SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1190
3.03k
  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1191
3.03k
  auto *NewCond = createFoldedExitCond(L, ExitingBB, IsTaken);
1192
3.03k
  replaceExitCond(BI, NewCond, DeadInsts);
1193
3.03k
}
1194
1195
static void replaceLoopPHINodesWithPreheaderValues(
1196
    LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1197
9.25k
    ScalarEvolution &SE) {
1198
9.25k
  assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1199
0
  auto *LoopPreheader = L->getLoopPreheader();
1200
9.25k
  auto *LoopHeader = L->getHeader();
1201
9.25k
  SmallVector<Instruction *> Worklist;
1202
34.8k
  for (auto &PN : LoopHeader->phis()) {
1203
34.8k
    auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
1204
34.8k
    for (User *U : PN.users())
1205
66.6k
      Worklist.push_back(cast<Instruction>(U));
1206
34.8k
    SE.forgetValue(&PN);
1207
34.8k
    PN.replaceAllUsesWith(PreheaderIncoming);
1208
34.8k
    DeadInsts.emplace_back(&PN);
1209
34.8k
  }
1210
1211
  // Replacing with the preheader value will often allow IV users to simplify
1212
  // (especially if the preheader value is a constant).
1213
9.25k
  SmallPtrSet<Instruction *, 16> Visited;
1214
134k
  while (!Worklist.empty()) {
1215
125k
    auto *I = cast<Instruction>(Worklist.pop_back_val());
1216
125k
    if (!Visited.insert(I).second)
1217
29.7k
      continue;
1218
1219
    // Don't simplify instructions outside the loop.
1220
95.4k
    if (!L->contains(I))
1221
569
      continue;
1222
1223
94.8k
    Value *Res = simplifyInstruction(I, I->getModule()->getDataLayout());
1224
94.8k
    if (Res && LI->replacementPreservesLCSSAForm(I, Res)) {
1225
41.7k
      for (User *U : I->users())
1226
58.5k
        Worklist.push_back(cast<Instruction>(U));
1227
41.7k
      I->replaceAllUsesWith(Res);
1228
41.7k
      DeadInsts.emplace_back(I);
1229
41.7k
    }
1230
94.8k
  }
1231
9.25k
}
1232
1233
static Value *
1234
createInvariantCond(const Loop *L, BasicBlock *ExitingBB,
1235
                    const ScalarEvolution::LoopInvariantPredicate &LIP,
1236
0
                    SCEVExpander &Rewriter) {
1237
0
  ICmpInst::Predicate InvariantPred = LIP.Pred;
1238
0
  BasicBlock *Preheader = L->getLoopPreheader();
1239
0
  assert(Preheader && "Preheader doesn't exist");
1240
0
  Rewriter.setInsertPoint(Preheader->getTerminator());
1241
0
  auto *LHSV = Rewriter.expandCodeFor(LIP.LHS);
1242
0
  auto *RHSV = Rewriter.expandCodeFor(LIP.RHS);
1243
0
  bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1244
0
  if (ExitIfTrue)
1245
0
    InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
1246
0
  IRBuilder<> Builder(Preheader->getTerminator());
1247
0
  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1248
0
  return Builder.CreateICmp(InvariantPred, LHSV, RHSV,
1249
0
                            BI->getCondition()->getName());
1250
0
}
1251
1252
static std::optional<Value *>
1253
createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB,
1254
                  const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1255
239
                  ScalarEvolution *SE, SCEVExpander &Rewriter) {
1256
239
  ICmpInst::Predicate Pred = ICmp->getPredicate();
1257
239
  Value *LHS = ICmp->getOperand(0);
1258
239
  Value *RHS = ICmp->getOperand(1);
1259
1260
  // 'LHS pred RHS' should now mean that we stay in loop.
1261
239
  auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1262
239
  if (Inverted)
1263
86
    Pred = CmpInst::getInversePredicate(Pred);
1264
1265
239
  const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
1266
239
  const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
1267
  // Can we prove it to be trivially true or false?
1268
239
  if (auto EV = SE->evaluatePredicateAt(Pred, LHSS, RHSS, BI))
1269
136
    return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ !*EV);
1270
1271
103
  auto *ARTy = LHSS->getType();
1272
103
  auto *MaxIterTy = MaxIter->getType();
1273
  // If possible, adjust types.
1274
103
  if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
1275
11
    MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
1276
92
  else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
1277
43
    const SCEV *MinusOne = SE->getMinusOne(ARTy);
1278
43
    auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
1279
43
    if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
1280
0
      MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
1281
43
  }
1282
1283
103
  if (SkipLastIter) {
1284
    // Semantically skip last iter is "subtract 1, do not bother about unsigned
1285
    // wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal
1286
    // with umin in a smart way, but umin(a, b) - 1 will likely not simplify.
1287
    // So we manually construct umin(a - 1, b - 1).
1288
2
    SmallVector<const SCEV *, 4> Elements;
1289
2
    if (auto *UMin = dyn_cast<SCEVUMinExpr>(MaxIter)) {
1290
0
      for (auto *Op : UMin->operands())
1291
0
        Elements.push_back(SE->getMinusSCEV(Op, SE->getOne(Op->getType())));
1292
0
      MaxIter = SE->getUMinFromMismatchedTypes(Elements);
1293
0
    } else
1294
2
      MaxIter = SE->getMinusSCEV(MaxIter, SE->getOne(MaxIter->getType()));
1295
2
  }
1296
1297
  // Check if there is a loop-invariant predicate equivalent to our check.
1298
103
  auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
1299
103
                                                               L, BI, MaxIter);
1300
103
  if (!LIP)
1301
103
    return std::nullopt;
1302
1303
  // Can we prove it to be trivially true?
1304
0
  if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
1305
0
    return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ false);
1306
0
  else
1307
0
    return createInvariantCond(L, ExitingBB, *LIP, Rewriter);
1308
0
}
1309
1310
static bool optimizeLoopExitWithUnknownExitCount(
1311
    const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter,
1312
    bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter,
1313
538
    SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1314
538
  assert(
1315
538
      (L->contains(BI->getSuccessor(0)) != L->contains(BI->getSuccessor(1))) &&
1316
538
      "Not a loop exit!");
1317
1318
  // For branch that stays in loop by TRUE condition, go through AND. For branch
1319
  // that stays in loop by FALSE condition, go through OR. Both gives the
1320
  // similar logic: "stay in loop iff all conditions are true(false)".
1321
0
  bool Inverted = L->contains(BI->getSuccessor(1));
1322
538
  SmallVector<ICmpInst *, 4> LeafConditions;
1323
538
  SmallVector<Value *, 4> Worklist;
1324
538
  SmallPtrSet<Value *, 4> Visited;
1325
538
  Value *OldCond = BI->getCondition();
1326
538
  Visited.insert(OldCond);
1327
538
  Worklist.push_back(OldCond);
1328
1329
538
  auto GoThrough = [&](Value *V) {
1330
451
    Value *LHS = nullptr, *RHS = nullptr;
1331
451
    if (Inverted) {
1332
137
      if (!match(V, m_LogicalOr(m_Value(LHS), m_Value(RHS))))
1333
135
        return false;
1334
314
    } else {
1335
314
      if (!match(V, m_LogicalAnd(m_Value(LHS), m_Value(RHS))))
1336
243
        return false;
1337
314
    }
1338
73
    if (Visited.insert(LHS).second)
1339
72
      Worklist.push_back(LHS);
1340
73
    if (Visited.insert(RHS).second)
1341
70
      Worklist.push_back(RHS);
1342
73
    return true;
1343
451
  };
1344
1345
680
  do {
1346
680
    Value *Curr = Worklist.pop_back_val();
1347
    // Go through AND/OR conditions. Collect leaf ICMPs. We only care about
1348
    // those with one use, to avoid instruction duplication.
1349
680
    if (Curr->hasOneUse())
1350
451
      if (!GoThrough(Curr))
1351
378
        if (auto *ICmp = dyn_cast<ICmpInst>(Curr))
1352
239
          LeafConditions.push_back(ICmp);
1353
680
  } while (!Worklist.empty());
1354
1355
  // If the current basic block has the same exit count as the whole loop, and
1356
  // it consists of multiple icmp's, try to collect all icmp's that give exact
1357
  // same exit count. For all other icmp's, we could use one less iteration,
1358
  // because their value on the last iteration doesn't really matter.
1359
538
  SmallPtrSet<ICmpInst *, 4> ICmpsFailingOnLastIter;
1360
538
  if (!SkipLastIter && LeafConditions.size() > 1 &&
1361
538
      SE->getExitCount(L, ExitingBB,
1362
18
                       ScalarEvolution::ExitCountKind::SymbolicMaximum) ==
1363
18
          MaxIter)
1364
36
    for (auto *ICmp : LeafConditions) {
1365
36
      auto EL = SE->computeExitLimitFromCond(L, ICmp, Inverted,
1366
36
                                             /*ControlsExit*/ false);
1367
36
      auto *ExitMax = EL.SymbolicMaxNotTaken;
1368
36
      if (isa<SCEVCouldNotCompute>(ExitMax))
1369
16
        continue;
1370
      // They could be of different types (specifically this happens after
1371
      // IV widening).
1372
20
      auto *WiderType =
1373
20
          SE->getWiderType(ExitMax->getType(), MaxIter->getType());
1374
20
      auto *WideExitMax = SE->getNoopOrZeroExtend(ExitMax, WiderType);
1375
20
      auto *WideMaxIter = SE->getNoopOrZeroExtend(MaxIter, WiderType);
1376
20
      if (WideExitMax == WideMaxIter)
1377
15
        ICmpsFailingOnLastIter.insert(ICmp);
1378
20
    }
1379
1380
538
  bool Changed = false;
1381
538
  for (auto *OldCond : LeafConditions) {
1382
    // Skip last iteration for this icmp under one of two conditions:
1383
    // - We do it for all conditions;
1384
    // - There is another ICmp that would fail on last iter, so this one doesn't
1385
    // really matter.
1386
239
    bool OptimisticSkipLastIter = SkipLastIter;
1387
239
    if (!OptimisticSkipLastIter) {
1388
238
      if (ICmpsFailingOnLastIter.size() > 1)
1389
0
        OptimisticSkipLastIter = true;
1390
238
      else if (ICmpsFailingOnLastIter.size() == 1)
1391
26
        OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(OldCond);
1392
238
    }
1393
239
    if (auto Replaced =
1394
239
            createReplacement(OldCond, L, ExitingBB, MaxIter, Inverted,
1395
239
                              OptimisticSkipLastIter, SE, Rewriter)) {
1396
136
      Changed = true;
1397
136
      auto *NewCond = *Replaced;
1398
136
      if (auto *NCI = dyn_cast<Instruction>(NewCond)) {
1399
0
        NCI->setName(OldCond->getName() + ".first_iter");
1400
0
      }
1401
136
      LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond
1402
136
                        << " with " << *NewCond << "\n");
1403
136
      assert(OldCond->hasOneUse() && "Must be!");
1404
0
      OldCond->replaceAllUsesWith(NewCond);
1405
136
      DeadInsts.push_back(OldCond);
1406
      // Make sure we no longer consider this condition as failing on last
1407
      // iteration.
1408
136
      ICmpsFailingOnLastIter.erase(OldCond);
1409
136
    }
1410
239
  }
1411
538
  return Changed;
1412
538
}
1413
1414
35.1k
bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1415
  // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1416
  // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1417
  // never reaches the icmp since the zext doesn't fold to an AddRec unless
1418
  // it already has flags.  The alternative to this would be to extending the
1419
  // set of "interesting" IV users to include the icmp, but doing that
1420
  // regresses results in practice by querying SCEVs before trip counts which
1421
  // rely on them which results in SCEV caching sub-optimal answers.  The
1422
  // concern about caching sub-optimal results is why we only query SCEVs of
1423
  // the loop invariant RHS here.
1424
35.1k
  SmallVector<BasicBlock*, 16> ExitingBlocks;
1425
35.1k
  L->getExitingBlocks(ExitingBlocks);
1426
35.1k
  bool Changed = false;
1427
37.1k
  for (auto *ExitingBB : ExitingBlocks) {
1428
37.1k
    auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1429
37.1k
    if (!BI)
1430
0
      continue;
1431
37.1k
    assert(BI->isConditional() && "exit branch must be conditional");
1432
1433
0
    auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1434
37.1k
    if (!ICmp || !ICmp->hasOneUse())
1435
29.8k
      continue;
1436
1437
7.28k
    auto *LHS = ICmp->getOperand(0);
1438
7.28k
    auto *RHS = ICmp->getOperand(1);
1439
    // For the range reasoning, avoid computing SCEVs in the loop to avoid
1440
    // poisoning cache with sub-optimal results.  For the must-execute case,
1441
    // this is a neccessary precondition for correctness.
1442
7.28k
    if (!L->isLoopInvariant(RHS)) {
1443
1.78k
      if (!L->isLoopInvariant(LHS))
1444
1.30k
        continue;
1445
      // Same logic applies for the inverse case
1446
482
      std::swap(LHS, RHS);
1447
482
    }
1448
1449
    // Match (icmp signed-cond zext, RHS)
1450
5.97k
    Value *LHSOp = nullptr;
1451
5.97k
    if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
1452
5.96k
      continue;
1453
1454
11
    const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1455
11
    const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1456
11
    const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1457
11
    auto FullCR = ConstantRange::getFull(InnerBitWidth);
1458
11
    FullCR = FullCR.zeroExtend(OuterBitWidth);
1459
11
    auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1460
11
    if (FullCR.contains(RHSCR)) {
1461
      // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1462
      // replace the signed condition with the unsigned version.
1463
1
      ICmp->setPredicate(ICmp->getUnsignedPredicate());
1464
1
      Changed = true;
1465
      // Note: No SCEV invalidation needed.  We've changed the predicate, but
1466
      // have not changed exit counts, or the values produced by the compare.
1467
1
      continue;
1468
1
    }
1469
11
  }
1470
1471
  // Now that we've canonicalized the condition to match the extend,
1472
  // see if we can rotate the extend out of the loop.
1473
37.1k
  for (auto *ExitingBB : ExitingBlocks) {
1474
37.1k
    auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1475
37.1k
    if (!BI)
1476
0
      continue;
1477
37.1k
    assert(BI->isConditional() && "exit branch must be conditional");
1478
1479
0
    auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1480
37.1k
    if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
1481
34.7k
      continue;
1482
1483
2.37k
    bool Swapped = false;
1484
2.37k
    auto *LHS = ICmp->getOperand(0);
1485
2.37k
    auto *RHS = ICmp->getOperand(1);
1486
2.37k
    if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS))
1487
      // Nothing to rotate
1488
1.79k
      continue;
1489
581
    if (L->isLoopInvariant(LHS)) {
1490
      // Same logic applies for the inverse case until we actually pick
1491
      // which operand of the compare to update.
1492
197
      Swapped = true;
1493
197
      std::swap(LHS, RHS);
1494
197
    }
1495
581
    assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
1496
1497
    // Match (icmp unsigned-cond zext, RHS)
1498
    // TODO: Extend to handle corresponding sext/signed-cmp case
1499
    // TODO: Extend to other invertible functions
1500
0
    Value *LHSOp = nullptr;
1501
581
    if (!match(LHS, m_ZExt(m_Value(LHSOp))))
1502
574
      continue;
1503
1504
    // In general, we only rotate if we can do so without increasing the number
1505
    // of instructions.  The exception is when we have an zext(add-rec).  The
1506
    // reason for allowing this exception is that we know we need to get rid
1507
    // of the zext for SCEV to be able to compute a trip count for said loops;
1508
    // we consider the new trip count valuable enough to increase instruction
1509
    // count by one.
1510
7
    if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp)))
1511
0
      continue;
1512
1513
    // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
1514
    // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
1515
    // when zext is loop varying and RHS is loop invariant.  This converts
1516
    // loop varying work to loop-invariant work.
1517
7
    auto doRotateTransform = [&]() {
1518
7
      assert(ICmp->isUnsigned() && "must have proven unsigned already");
1519
0
      auto *NewRHS =
1520
7
        CastInst::Create(Instruction::Trunc, RHS, LHSOp->getType(), "",
1521
7
                         L->getLoopPreheader()->getTerminator());
1522
7
      ICmp->setOperand(Swapped ? 1 : 0, LHSOp);
1523
7
      ICmp->setOperand(Swapped ? 0 : 1, NewRHS);
1524
7
      if (LHS->use_empty())
1525
0
        DeadInsts.push_back(LHS);
1526
7
    };
1527
1528
1529
7
    const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1530
7
    const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1531
7
    const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1532
7
    auto FullCR = ConstantRange::getFull(InnerBitWidth);
1533
7
    FullCR = FullCR.zeroExtend(OuterBitWidth);
1534
7
    auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1535
7
    if (FullCR.contains(RHSCR)) {
1536
7
      doRotateTransform();
1537
7
      Changed = true;
1538
      // Note, we are leaving SCEV in an unfortunately imprecise case here
1539
      // as rotation tends to reveal information about trip counts not
1540
      // previously visible.
1541
7
      continue;
1542
7
    }
1543
7
  }
1544
1545
35.1k
  return Changed;
1546
35.1k
}
1547
1548
35.1k
bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1549
35.1k
  SmallVector<BasicBlock*, 16> ExitingBlocks;
1550
35.1k
  L->getExitingBlocks(ExitingBlocks);
1551
1552
  // Remove all exits which aren't both rewriteable and execute on every
1553
  // iteration.
1554
37.1k
  llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1555
    // If our exitting block exits multiple loops, we can only rewrite the
1556
    // innermost one.  Otherwise, we're changing how many times the innermost
1557
    // loop runs before it exits.
1558
37.1k
    if (LI->getLoopFor(ExitingBB) != L)
1559
577
      return true;
1560
1561
    // Can't rewrite non-branch yet.
1562
36.5k
    BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1563
36.5k
    if (!BI)
1564
0
      return true;
1565
1566
    // Likewise, the loop latch must be dominated by the exiting BB.
1567
36.5k
    if (!DT->dominates(ExitingBB, L->getLoopLatch()))
1568
10.4k
      return true;
1569
1570
26.0k
    if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
1571
      // If already constant, nothing to do. However, if this is an
1572
      // unconditional exit, we can still replace header phis with their
1573
      // preheader value.
1574
13.0k
      if (!L->contains(BI->getSuccessor(CI->isNullValue())))
1575
6.22k
        replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1576
13.0k
      return true;
1577
13.0k
    }
1578
1579
13.0k
    return false;
1580
26.0k
  });
1581
1582
35.1k
  if (ExitingBlocks.empty())
1583
22.4k
    return false;
1584
1585
  // Get a symbolic upper bound on the loop backedge taken count.
1586
12.6k
  const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L);
1587
12.6k
  if (isa<SCEVCouldNotCompute>(MaxBECount))
1588
8.29k
    return false;
1589
1590
  // Visit our exit blocks in order of dominance. We know from the fact that
1591
  // all exits must dominate the latch, so there is a total dominance order
1592
  // between them.
1593
4.36k
  llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1594
               // std::sort sorts in ascending order, so we want the inverse of
1595
               // the normal dominance relation.
1596
274
               if (A == B) return false;
1597
274
               if (DT->properlyDominates(A, B))
1598
0
                 return true;
1599
274
               else {
1600
274
                 assert(DT->properlyDominates(B, A) &&
1601
274
                        "expected total dominance order!");
1602
0
                 return false;
1603
274
               }
1604
274
  });
1605
#ifdef ASSERT
1606
  for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1607
    assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1608
  }
1609
#endif
1610
1611
4.36k
  bool Changed = false;
1612
4.36k
  bool SkipLastIter = false;
1613
4.36k
  const SCEV *CurrMaxExit = SE->getCouldNotCompute();
1614
4.64k
  auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) {
1615
4.64k
    if (SkipLastIter || isa<SCEVCouldNotCompute>(MaxExitCount))
1616
413
      return;
1617
4.22k
    if (isa<SCEVCouldNotCompute>(CurrMaxExit))
1618
4.20k
      CurrMaxExit = MaxExitCount;
1619
18
    else
1620
18
      CurrMaxExit = SE->getUMinFromMismatchedTypes(CurrMaxExit, MaxExitCount);
1621
    // If the loop has more than 1 iteration, all further checks will be
1622
    // executed 1 iteration less.
1623
4.22k
    if (CurrMaxExit == MaxBECount)
1624
4.20k
      SkipLastIter = true;
1625
4.22k
  };
1626
4.36k
  SmallSet<const SCEV *, 8> DominatingExactExitCounts;
1627
4.64k
  for (BasicBlock *ExitingBB : ExitingBlocks) {
1628
4.64k
    const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBB);
1629
4.64k
    const SCEV *MaxExitCount = SE->getExitCount(
1630
4.64k
        L, ExitingBB, ScalarEvolution::ExitCountKind::SymbolicMaximum);
1631
4.64k
    if (isa<SCEVCouldNotCompute>(ExactExitCount)) {
1632
      // Okay, we do not know the exit count here. Can we at least prove that it
1633
      // will remain the same within iteration space?
1634
491
      auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1635
538
      auto OptimizeCond = [&](bool SkipLastIter) {
1636
538
        return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB,
1637
538
                                                    MaxBECount, SkipLastIter,
1638
538
                                                    SE, Rewriter, DeadInsts);
1639
538
      };
1640
1641
      // TODO: We might have proved that we can skip the last iteration for
1642
      // this check. In this case, we only want to check the condition on the
1643
      // pre-last iteration (MaxBECount - 1). However, there is a nasty
1644
      // corner case:
1645
      //
1646
      //   for (i = len; i != 0; i--) { ... check (i ult X) ... }
1647
      //
1648
      // If we could not prove that len != 0, then we also could not prove that
1649
      // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1650
      // OptimizeCond will likely not prove anything for it, even if it could
1651
      // prove the same fact for len.
1652
      //
1653
      // As a temporary solution, we query both last and pre-last iterations in
1654
      // hope that we will be able to prove triviality for at least one of
1655
      // them. We can stop querying MaxBECount for this case once SCEV
1656
      // understands that (MaxBECount - 1) will not overflow here.
1657
491
      if (OptimizeCond(false))
1658
132
        Changed = true;
1659
359
      else if (SkipLastIter && OptimizeCond(true))
1660
0
        Changed = true;
1661
491
      UpdateSkipLastIter(MaxExitCount);
1662
491
      continue;
1663
491
    }
1664
1665
4.14k
    UpdateSkipLastIter(ExactExitCount);
1666
1667
    // If we know we'd exit on the first iteration, rewrite the exit to
1668
    // reflect this.  This does not imply the loop must exit through this
1669
    // exit; there may be an earlier one taken on the first iteration.
1670
    // We know that the backedge can't be taken, so we replace all
1671
    // the header PHIs with values coming from the preheader.
1672
4.14k
    if (ExactExitCount->isZero()) {
1673
3.03k
      foldExit(L, ExitingBB, true, DeadInsts);
1674
3.03k
      replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1675
3.03k
      Changed = true;
1676
3.03k
      continue;
1677
3.03k
    }
1678
1679
1.11k
    assert(ExactExitCount->getType()->isIntegerTy() &&
1680
1.11k
           MaxBECount->getType()->isIntegerTy() &&
1681
1.11k
           "Exit counts must be integers");
1682
1683
0
    Type *WiderType =
1684
1.11k
        SE->getWiderType(MaxBECount->getType(), ExactExitCount->getType());
1685
1.11k
    ExactExitCount = SE->getNoopOrZeroExtend(ExactExitCount, WiderType);
1686
1.11k
    MaxBECount = SE->getNoopOrZeroExtend(MaxBECount, WiderType);
1687
1.11k
    assert(MaxBECount->getType() == ExactExitCount->getType());
1688
1689
    // Can we prove that some other exit must be taken strictly before this
1690
    // one?
1691
1.11k
    if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, MaxBECount,
1692
1.11k
                                     ExactExitCount)) {
1693
6
      foldExit(L, ExitingBB, false, DeadInsts);
1694
6
      Changed = true;
1695
6
      continue;
1696
6
    }
1697
1698
    // As we run, keep track of which exit counts we've encountered.  If we
1699
    // find a duplicate, we've found an exit which would have exited on the
1700
    // exiting iteration, but (from the visit order) strictly follows another
1701
    // which does the same and is thus dead.
1702
1.11k
    if (!DominatingExactExitCounts.insert(ExactExitCount).second) {
1703
0
      foldExit(L, ExitingBB, false, DeadInsts);
1704
0
      Changed = true;
1705
0
      continue;
1706
0
    }
1707
1708
    // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1709
    // here.  If we kept track of the min of dominanting exits so far, we could
1710
    // discharge exits with EC >= MDEC. This is less powerful than the existing
1711
    // transform (since later exits aren't considered), but potentially more
1712
    // powerful for any case where SCEV can prove a >=u b, but neither a == b
1713
    // or a >u b.  Such a case is not currently known.
1714
1.11k
  }
1715
4.36k
  return Changed;
1716
12.6k
}
1717
1718
35.1k
bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1719
35.1k
  SmallVector<BasicBlock*, 16> ExitingBlocks;
1720
35.1k
  L->getExitingBlocks(ExitingBlocks);
1721
1722
  // Finally, see if we can rewrite our exit conditions into a loop invariant
1723
  // form. If we have a read-only loop, and we can tell that we must exit down
1724
  // a path which does not need any of the values computed within the loop, we
1725
  // can rewrite the loop to exit on the first iteration.  Note that this
1726
  // doesn't either a) tell us the loop exits on the first iteration (unless
1727
  // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1728
  // This transformation looks a lot like a restricted form of dead loop
1729
  // elimination, but restricted to read-only loops and without neccesssarily
1730
  // needing to kill the loop entirely.
1731
35.1k
  if (!LoopPredication)
1732
0
    return false;
1733
1734
  // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1735
  // through *explicit* control flow.  We have to eliminate the possibility of
1736
  // implicit exits (see below) before we know it's truly exact.
1737
35.1k
  const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1738
35.1k
  if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC))
1739
25.1k
    return false;
1740
1741
9.97k
  assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1742
0
  assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1743
1744
9.96k
  auto BadExit = [&](BasicBlock *ExitingBB) {
1745
    // If our exiting block exits multiple loops, we can only rewrite the
1746
    // innermost one.  Otherwise, we're changing how many times the innermost
1747
    // loop runs before it exits.
1748
9.96k
    if (LI->getLoopFor(ExitingBB) != L)
1749
13
      return true;
1750
1751
    // Can't rewrite non-branch yet.
1752
9.94k
    BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1753
9.94k
    if (!BI)
1754
0
      return true;
1755
1756
    // If already constant, nothing to do.
1757
9.94k
    if (isa<Constant>(BI->getCondition()))
1758
8.87k
      return true;
1759
1760
    // If the exit block has phis, we need to be able to compute the values
1761
    // within the loop which contains them.  This assumes trivially lcssa phis
1762
    // have already been removed; TODO: generalize
1763
1.07k
    BasicBlock *ExitBlock =
1764
1.07k
    BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
1765
1.07k
    if (!ExitBlock->phis().empty())
1766
698
      return true;
1767
1768
373
    const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1769
373
    if (isa<SCEVCouldNotCompute>(ExitCount) ||
1770
373
        !Rewriter.isSafeToExpand(ExitCount))
1771
0
      return true;
1772
1773
373
    assert(SE->isLoopInvariant(ExitCount, L) &&
1774
373
           "Exit count must be loop invariant");
1775
0
    assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1776
0
    return false;
1777
373
  };
1778
1779
  // If we have any exits which can't be predicated themselves, than we can't
1780
  // predicate any exit which isn't guaranteed to execute before it.  Consider
1781
  // two exits (a) and (b) which would both exit on the same iteration.  If we
1782
  // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1783
  // we could convert a loop from exiting through (a) to one exiting through
1784
  // (b).  Note that this problem exists only for exits with the same exit
1785
  // count, and we could be more aggressive when exit counts are known inequal.
1786
9.97k
  llvm::sort(ExitingBlocks,
1787
9.97k
            [&](BasicBlock *A, BasicBlock *B) {
1788
              // std::sort sorts in ascending order, so we want the inverse of
1789
              // the normal dominance relation, plus a tie breaker for blocks
1790
              // unordered by dominance.
1791
512
              if (DT->properlyDominates(A, B)) return true;
1792
512
              if (DT->properlyDominates(B, A)) return false;
1793
10
              return A->getName() < B->getName();
1794
512
            });
1795
  // Check to see if our exit blocks are a total order (i.e. a linear chain of
1796
  // exits before the backedge).  If they aren't, reasoning about reachability
1797
  // is complicated and we choose not to for now.
1798
10.4k
  for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1799
512
    if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
1800
10
      return false;
1801
1802
  // Given our sorted total order, we know that exit[j] must be evaluated
1803
  // after all exit[i] such j > i.
1804
10.3k
  for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1805
9.96k
    if (BadExit(ExitingBlocks[i])) {
1806
9.58k
      ExitingBlocks.resize(i);
1807
9.58k
      break;
1808
9.58k
    }
1809
1810
9.96k
  if (ExitingBlocks.empty())
1811
9.58k
    return false;
1812
1813
  // We rely on not being able to reach an exiting block on a later iteration
1814
  // then it's statically compute exit count.  The implementaton of
1815
  // getExitCount currently has this invariant, but assert it here so that
1816
  // breakage is obvious if this ever changes..
1817
373
  assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1818
373
        return DT->dominates(ExitingBB, L->getLoopLatch());
1819
373
      }));
1820
1821
  // At this point, ExitingBlocks consists of only those blocks which are
1822
  // predicatable.  Given that, we know we have at least one exit we can
1823
  // predicate if the loop is doesn't have side effects and doesn't have any
1824
  // implicit exits (because then our exact BTC isn't actually exact).
1825
  // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
1826
  // suggestions on how to improve this?  I can obviously bail out for outer
1827
  // loops, but that seems less than ideal.  MemorySSA can find memory writes,
1828
  // is that enough for *all* side effects?
1829
0
  for (BasicBlock *BB : L->blocks())
1830
387
    for (auto &I : *BB)
1831
      // TODO:isGuaranteedToTransfer
1832
3.60k
      if (I.mayHaveSideEffects())
1833
265
        return false;
1834
1835
108
  bool Changed = false;
1836
  // Finally, do the actual predication for all predicatable blocks.  A couple
1837
  // of notes here:
1838
  // 1) We don't bother to constant fold dominated exits with identical exit
1839
  //    counts; that's simply a form of CSE/equality propagation and we leave
1840
  //    it for dedicated passes.
1841
  // 2) We insert the comparison at the branch.  Hoisting introduces additional
1842
  //    legality constraints and we leave that to dedicated logic.  We want to
1843
  //    predicate even if we can't insert a loop invariant expression as
1844
  //    peeling or unrolling will likely reduce the cost of the otherwise loop
1845
  //    varying check.
1846
108
  Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1847
108
  IRBuilder<> B(L->getLoopPreheader()->getTerminator());
1848
108
  Value *ExactBTCV = nullptr; // Lazily generated if needed.
1849
108
  for (BasicBlock *ExitingBB : ExitingBlocks) {
1850
108
    const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1851
1852
108
    auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1853
108
    Value *NewCond;
1854
108
    if (ExitCount == ExactBTC) {
1855
108
      NewCond = L->contains(BI->getSuccessor(0)) ?
1856
96
        B.getFalse() : B.getTrue();
1857
108
    } else {
1858
0
      Value *ECV = Rewriter.expandCodeFor(ExitCount);
1859
0
      if (!ExactBTCV)
1860
0
        ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
1861
0
      Value *RHS = ExactBTCV;
1862
0
      if (ECV->getType() != RHS->getType()) {
1863
0
        Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1864
0
        ECV = B.CreateZExt(ECV, WiderTy);
1865
0
        RHS = B.CreateZExt(RHS, WiderTy);
1866
0
      }
1867
0
      auto Pred = L->contains(BI->getSuccessor(0)) ?
1868
0
        ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1869
0
      NewCond = B.CreateICmp(Pred, ECV, RHS);
1870
0
    }
1871
108
    Value *OldCond = BI->getCondition();
1872
108
    BI->setCondition(NewCond);
1873
108
    if (OldCond->use_empty())
1874
107
      DeadInsts.emplace_back(OldCond);
1875
108
    Changed = true;
1876
108
  }
1877
1878
108
  return Changed;
1879
373
}
1880
1881
//===----------------------------------------------------------------------===//
1882
//  IndVarSimplify driver. Manage several subpasses of IV simplification.
1883
//===----------------------------------------------------------------------===//
1884
1885
35.1k
bool IndVarSimplify::run(Loop *L) {
1886
  // We need (and expect!) the incoming loop to be in LCSSA.
1887
35.1k
  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1888
35.1k
         "LCSSA required to run indvars!");
1889
1890
  // If LoopSimplify form is not available, stay out of trouble. Some notes:
1891
  //  - LSR currently only supports LoopSimplify-form loops. Indvars'
1892
  //    canonicalization can be a pessimization without LSR to "clean up"
1893
  //    afterwards.
1894
  //  - We depend on having a preheader; in particular,
1895
  //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
1896
  //    and we're in trouble if we can't find the induction variable even when
1897
  //    we've manually inserted one.
1898
  //  - LFTR relies on having a single backedge.
1899
35.1k
  if (!L->isLoopSimplifyForm())
1900
33
    return false;
1901
1902
35.1k
  bool Changed = false;
1903
  // If there are any floating-point recurrences, attempt to
1904
  // transform them to use integer recurrences.
1905
35.1k
  Changed |= rewriteNonIntegerIVs(L);
1906
1907
  // Create a rewriter object which we'll use to transform the code with.
1908
35.1k
  SCEVExpander Rewriter(*SE, DL, "indvars");
1909
35.1k
#ifndef NDEBUG
1910
35.1k
  Rewriter.setDebugType(DEBUG_TYPE);
1911
35.1k
#endif
1912
1913
  // Eliminate redundant IV users.
1914
  //
1915
  // Simplification works best when run before other consumers of SCEV. We
1916
  // attempt to avoid evaluating SCEVs for sign/zero extend operations until
1917
  // other expressions involving loop IVs have been evaluated. This helps SCEV
1918
  // set no-wrap flags before normalizing sign/zero extension.
1919
35.1k
  Rewriter.disableCanonicalMode();
1920
35.1k
  Changed |= simplifyAndExtend(L, Rewriter, LI);
1921
1922
  // Check to see if we can compute the final value of any expressions
1923
  // that are recurrent in the loop, and substitute the exit values from the
1924
  // loop into any instructions outside of the loop that use the final values
1925
  // of the current expressions.
1926
35.1k
  if (ReplaceExitValue != NeverRepl) {
1927
35.1k
    if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
1928
35.1k
                                             ReplaceExitValue, DeadInsts)) {
1929
6.05k
      NumReplaced += Rewrites;
1930
6.05k
      Changed = true;
1931
6.05k
    }
1932
35.1k
  }
1933
1934
  // Eliminate redundant IV cycles.
1935
35.1k
  NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
1936
1937
  // Try to convert exit conditions to unsigned and rotate computation
1938
  // out of the loop.  Note: Handles invalidation internally if needed.
1939
35.1k
  Changed |= canonicalizeExitCondition(L);
1940
1941
  // Try to eliminate loop exits based on analyzeable exit counts
1942
35.1k
  if (optimizeLoopExits(L, Rewriter))  {
1943
3.11k
    Changed = true;
1944
    // Given we've changed exit counts, notify SCEV
1945
    // Some nested loops may share same folded exit basic block,
1946
    // thus we need to notify top most loop.
1947
3.11k
    SE->forgetTopmostLoop(L);
1948
3.11k
  }
1949
1950
  // Try to form loop invariant tests for loop exits by changing how many
1951
  // iterations of the loop run when that is unobservable.
1952
35.1k
  if (predicateLoopExits(L, Rewriter)) {
1953
108
    Changed = true;
1954
    // Given we've changed exit counts, notify SCEV
1955
108
    SE->forgetLoop(L);
1956
108
  }
1957
1958
  // If we have a trip count expression, rewrite the loop's exit condition
1959
  // using it.
1960
35.1k
  if (!DisableLFTR) {
1961
35.1k
    BasicBlock *PreHeader = L->getLoopPreheader();
1962
1963
35.1k
    SmallVector<BasicBlock*, 16> ExitingBlocks;
1964
35.1k
    L->getExitingBlocks(ExitingBlocks);
1965
37.1k
    for (BasicBlock *ExitingBB : ExitingBlocks) {
1966
      // Can't rewrite non-branch yet.
1967
37.1k
      if (!isa<BranchInst>(ExitingBB->getTerminator()))
1968
0
        continue;
1969
1970
      // If our exitting block exits multiple loops, we can only rewrite the
1971
      // innermost one.  Otherwise, we're changing how many times the innermost
1972
      // loop runs before it exits.
1973
37.1k
      if (LI->getLoopFor(ExitingBB) != L)
1974
577
        continue;
1975
1976
36.5k
      if (!needsLFTR(L, ExitingBB))
1977
24.9k
        continue;
1978
1979
11.6k
      const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1980
11.6k
      if (isa<SCEVCouldNotCompute>(ExitCount))
1981
11.2k
        continue;
1982
1983
      // This was handled above, but as we form SCEVs, we can sometimes refine
1984
      // existing ones; this allows exit counts to be folded to zero which
1985
      // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
1986
      // until stable to handle cases like this better.
1987
371
      if (ExitCount->isZero())
1988
1
        continue;
1989
1990
370
      PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
1991
370
      if (!IndVar)
1992
87
        continue;
1993
1994
      // Avoid high cost expansions.  Note: This heuristic is questionable in
1995
      // that our definition of "high cost" is not exactly principled.
1996
283
      if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
1997
283
                                       TTI, PreHeader->getTerminator()))
1998
61
        continue;
1999
2000
222
      if (!Rewriter.isSafeToExpand(ExitCount))
2001
0
        continue;
2002
2003
222
      Changed |= linearFunctionTestReplace(L, ExitingBB,
2004
222
                                           ExitCount, IndVar,
2005
222
                                           Rewriter);
2006
222
    }
2007
35.1k
  }
2008
  // Clear the rewriter cache, because values that are in the rewriter's cache
2009
  // can be deleted in the loop below, causing the AssertingVH in the cache to
2010
  // trigger.
2011
35.1k
  Rewriter.clear();
2012
2013
  // Now that we're done iterating through lists, clean up any instructions
2014
  // which are now dead.
2015
150k
  while (!DeadInsts.empty()) {
2016
115k
    Value *V = DeadInsts.pop_back_val();
2017
2018
115k
    if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
2019
45.3k
      Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
2020
69.9k
    else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
2021
60.2k
      Changed |=
2022
60.2k
          RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
2023
115k
  }
2024
2025
  // The Rewriter may not be used from this point on.
2026
2027
  // Loop-invariant instructions in the preheader that aren't used in the
2028
  // loop may be sunk below the loop to reduce register pressure.
2029
35.1k
  Changed |= sinkUnusedInvariants(L);
2030
2031
  // rewriteFirstIterationLoopExitValues does not rely on the computation of
2032
  // trip count and therefore can further simplify exit values in addition to
2033
  // rewriteLoopExitValues.
2034
35.1k
  Changed |= rewriteFirstIterationLoopExitValues(L);
2035
2036
  // Clean up dead instructions.
2037
35.1k
  Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2038
2039
  // Check a post-condition.
2040
35.1k
  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2041
35.1k
         "Indvars did not preserve LCSSA!");
2042
35.1k
  if (VerifyMemorySSA && MSSAU)
2043
0
    MSSAU->getMemorySSA()->verifyMemorySSA();
2044
2045
35.1k
  return Changed;
2046
35.1k
}
2047
2048
PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2049
                                          LoopStandardAnalysisResults &AR,
2050
35.1k
                                          LPMUpdater &) {
2051
35.1k
  Function *F = L.getHeader()->getParent();
2052
35.1k
  const DataLayout &DL = F->getParent()->getDataLayout();
2053
2054
35.1k
  IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
2055
35.1k
                     WidenIndVars && AllowIVWidening);
2056
35.1k
  if (!IVS.run(&L))
2057
22.3k
    return PreservedAnalyses::all();
2058
2059
12.8k
  auto PA = getLoopPassPreservedAnalyses();
2060
12.8k
  PA.preserveSet<CFGAnalyses>();
2061
12.8k
  if (AR.MSSA)
2062
0
    PA.preserve<MemorySSAAnalysis>();
2063
12.8k
  return PA;
2064
35.1k
}