Coverage Report

Created: 2024-01-17 10:31

/src/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- InstCombineCompares.cpp --------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements the visitICmp and visitFCmp functions.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "InstCombineInternal.h"
14
#include "llvm/ADT/APSInt.h"
15
#include "llvm/ADT/ScopeExit.h"
16
#include "llvm/ADT/SetVector.h"
17
#include "llvm/ADT/Statistic.h"
18
#include "llvm/Analysis/CaptureTracking.h"
19
#include "llvm/Analysis/CmpInstAnalysis.h"
20
#include "llvm/Analysis/ConstantFolding.h"
21
#include "llvm/Analysis/InstructionSimplify.h"
22
#include "llvm/Analysis/Utils/Local.h"
23
#include "llvm/Analysis/VectorUtils.h"
24
#include "llvm/IR/ConstantRange.h"
25
#include "llvm/IR/DataLayout.h"
26
#include "llvm/IR/IntrinsicInst.h"
27
#include "llvm/IR/PatternMatch.h"
28
#include "llvm/Support/KnownBits.h"
29
#include "llvm/Transforms/InstCombine/InstCombiner.h"
30
#include <bitset>
31
32
using namespace llvm;
33
using namespace PatternMatch;
34
35
#define DEBUG_TYPE "instcombine"
36
37
// How many times is a select replaced by one of its operands?
38
STATISTIC(NumSel, "Number of select opts");
39
40
41
/// Compute Result = In1+In2, returning true if the result overflowed for this
42
/// type.
43
static bool addWithOverflow(APInt &Result, const APInt &In1,
44
151
                            const APInt &In2, bool IsSigned = false) {
45
151
  bool Overflow;
46
151
  if (IsSigned)
47
57
    Result = In1.sadd_ov(In2, Overflow);
48
94
  else
49
94
    Result = In1.uadd_ov(In2, Overflow);
50
51
151
  return Overflow;
52
151
}
53
54
/// Compute Result = In1-In2, returning true if the result overflowed for this
55
/// type.
56
static bool subWithOverflow(APInt &Result, const APInt &In1,
57
32
                            const APInt &In2, bool IsSigned = false) {
58
32
  bool Overflow;
59
32
  if (IsSigned)
60
24
    Result = In1.ssub_ov(In2, Overflow);
61
8
  else
62
8
    Result = In1.usub_ov(In2, Overflow);
63
64
32
  return Overflow;
65
32
}
66
67
/// Given an icmp instruction, return true if any use of this comparison is a
68
/// branch on sign bit comparison.
69
7
static bool hasBranchUse(ICmpInst &I) {
70
7
  for (auto *U : I.users())
71
7
    if (isa<BranchInst>(U))
72
2
      return true;
73
5
  return false;
74
7
}
75
76
/// Returns true if the exploded icmp can be expressed as a signed comparison
77
/// to zero and updates the predicate accordingly.
78
/// The signedness of the comparison is preserved.
79
/// TODO: Refactor with decomposeBitTestICmp()?
80
530
static bool isSignTest(ICmpInst::Predicate &Pred, const APInt &C) {
81
530
  if (!ICmpInst::isSigned(Pred))
82
218
    return false;
83
84
312
  if (C.isZero())
85
151
    return ICmpInst::isRelational(Pred);
86
87
161
  if (C.isOne()) {
88
33
    if (Pred == ICmpInst::ICMP_SLT) {
89
20
      Pred = ICmpInst::ICMP_SLE;
90
20
      return true;
91
20
    }
92
128
  } else if (C.isAllOnes()) {
93
49
    if (Pred == ICmpInst::ICMP_SGT) {
94
29
      Pred = ICmpInst::ICMP_SGE;
95
29
      return true;
96
29
    }
97
49
  }
98
99
112
  return false;
100
161
}
101
102
/// This is called when we see this pattern:
103
///   cmp pred (load (gep GV, ...)), cmpcst
104
/// where GV is a global variable with a constant initializer. Try to simplify
105
/// this into some simple computation that does not need the load. For example
106
/// we can optimize "icmp eq (load (gep "foo", 0, i)), 0" into "icmp eq i, 3".
107
///
108
/// If AndCst is non-null, then the loaded value is masked with that constant
109
/// before doing the comparison. This handles cases like "A[i]&4 == 0".
110
Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
111
    LoadInst *LI, GetElementPtrInst *GEP, GlobalVariable *GV, CmpInst &ICI,
112
177
    ConstantInt *AndCst) {
113
177
  if (LI->isVolatile() || LI->getType() != GEP->getResultElementType() ||
114
177
      GV->getValueType() != GEP->getSourceElementType() || !GV->isConstant() ||
115
177
      !GV->hasDefinitiveInitializer())
116
84
    return nullptr;
117
118
93
  Constant *Init = GV->getInitializer();
119
93
  if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
120
32
    return nullptr;
121
122
61
  uint64_t ArrayElementCount = Init->getType()->getArrayNumElements();
123
  // Don't blow up on huge arrays.
124
61
  if (ArrayElementCount > MaxArraySizeForCombine)
125
0
    return nullptr;
126
127
  // There are many forms of this optimization we can handle, for now, just do
128
  // the simple index into a single-dimensional array.
129
  //
130
  // Require: GEP GV, 0, i {{, constant indices}}
131
61
  if (GEP->getNumOperands() < 3 || !isa<ConstantInt>(GEP->getOperand(1)) ||
132
61
      !cast<ConstantInt>(GEP->getOperand(1))->isZero() ||
133
61
      isa<Constant>(GEP->getOperand(2)))
134
0
    return nullptr;
135
136
  // Check that indices after the variable are constants and in-range for the
137
  // type they index.  Collect the indices.  This is typically for arrays of
138
  // structs.
139
61
  SmallVector<unsigned, 4> LaterIndices;
140
141
61
  Type *EltTy = Init->getType()->getArrayElementType();
142
82
  for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) {
143
21
    ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i));
144
21
    if (!Idx)
145
0
      return nullptr; // Variable index.
146
147
21
    uint64_t IdxVal = Idx->getZExtValue();
148
21
    if ((unsigned)IdxVal != IdxVal)
149
0
      return nullptr; // Too large array index.
150
151
21
    if (StructType *STy = dyn_cast<StructType>(EltTy))
152
21
      EltTy = STy->getElementType(IdxVal);
153
0
    else if (ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) {
154
0
      if (IdxVal >= ATy->getNumElements())
155
0
        return nullptr;
156
0
      EltTy = ATy->getElementType();
157
0
    } else {
158
0
      return nullptr; // Unknown type.
159
0
    }
160
161
21
    LaterIndices.push_back(IdxVal);
162
21
  }
163
164
61
  enum { Overdefined = -3, Undefined = -2 };
165
166
  // Variables for our state machines.
167
168
  // FirstTrueElement/SecondTrueElement - Used to emit a comparison of the form
169
  // "i == 47 | i == 87", where 47 is the first index the condition is true for,
170
  // and 87 is the second (and last) index.  FirstTrueElement is -2 when
171
  // undefined, otherwise set to the first true element.  SecondTrueElement is
172
  // -2 when undefined, -3 when overdefined and >= 0 when that index is true.
173
61
  int FirstTrueElement = Undefined, SecondTrueElement = Undefined;
174
175
  // FirstFalseElement/SecondFalseElement - Used to emit a comparison of the
176
  // form "i != 47 & i != 87".  Same state transitions as for true elements.
177
61
  int FirstFalseElement = Undefined, SecondFalseElement = Undefined;
178
179
  /// TrueRangeEnd/FalseRangeEnd - In conjunction with First*Element, these
180
  /// define a state machine that triggers for ranges of values that the index
181
  /// is true or false for.  This triggers on things like "abbbbc"[i] == 'b'.
182
  /// This is -2 when undefined, -3 when overdefined, and otherwise the last
183
  /// index in the range (inclusive).  We use -2 for undefined here because we
184
  /// use relative comparisons and don't want 0-1 to match -1.
185
61
  int TrueRangeEnd = Undefined, FalseRangeEnd = Undefined;
186
187
  // MagicBitvector - This is a magic bitvector where we set a bit if the
188
  // comparison is true for element 'i'.  If there are 64 elements or less in
189
  // the array, this will fully represent all the comparison results.
190
61
  uint64_t MagicBitvector = 0;
191
192
  // Scan the array and see if one of our patterns matches.
193
61
  Constant *CompareRHS = cast<Constant>(ICI.getOperand(1));
194
545
  for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) {
195
484
    Constant *Elt = Init->getAggregateElement(i);
196
484
    if (!Elt)
197
0
      return nullptr;
198
199
    // If this is indexing an array of structures, get the structure element.
200
484
    if (!LaterIndices.empty()) {
201
84
      Elt = ConstantFoldExtractValueInstruction(Elt, LaterIndices);
202
84
      if (!Elt)
203
0
        return nullptr;
204
84
    }
205
206
    // If the element is masked, handle it.
207
484
    if (AndCst) {
208
70
      Elt = ConstantFoldBinaryOpOperands(Instruction::And, Elt, AndCst, DL);
209
70
      if (!Elt)
210
0
        return nullptr;
211
70
    }
212
213
    // Find out if the comparison would be true or false for the i'th element.
214
484
    Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt,
215
484
                                                  CompareRHS, DL, &TLI);
216
    // If the result is undef for this element, ignore it.
217
484
    if (isa<UndefValue>(C)) {
218
      // Extend range state machines to cover this element in case there is an
219
      // undef in the middle of the range.
220
0
      if (TrueRangeEnd == (int)i - 1)
221
0
        TrueRangeEnd = i;
222
0
      if (FalseRangeEnd == (int)i - 1)
223
0
        FalseRangeEnd = i;
224
0
      continue;
225
0
    }
226
227
    // If we can't compute the result for any of the elements, we have to give
228
    // up evaluating the entire conditional.
229
484
    if (!isa<ConstantInt>(C))
230
0
      return nullptr;
231
232
    // Otherwise, we know if the comparison is true or false for this element,
233
    // update our state machines.
234
484
    bool IsTrueForElt = !cast<ConstantInt>(C)->isZero();
235
236
    // State machine for single/double/range index comparison.
237
484
    if (IsTrueForElt) {
238
      // Update the TrueElement state machine.
239
216
      if (FirstTrueElement == Undefined)
240
61
        FirstTrueElement = TrueRangeEnd = i; // First true element.
241
155
      else {
242
        // Update double-compare state machine.
243
155
        if (SecondTrueElement == Undefined)
244
45
          SecondTrueElement = i;
245
110
        else
246
110
          SecondTrueElement = Overdefined;
247
248
        // Update range state machine.
249
155
        if (TrueRangeEnd == (int)i - 1)
250
21
          TrueRangeEnd = i;
251
134
        else
252
134
          TrueRangeEnd = Overdefined;
253
155
      }
254
268
    } else {
255
      // Update the FalseElement state machine.
256
268
      if (FirstFalseElement == Undefined)
257
61
        FirstFalseElement = FalseRangeEnd = i; // First false element.
258
207
      else {
259
        // Update double-compare state machine.
260
207
        if (SecondFalseElement == Undefined)
261
33
          SecondFalseElement = i;
262
174
        else
263
174
          SecondFalseElement = Overdefined;
264
265
        // Update range state machine.
266
207
        if (FalseRangeEnd == (int)i - 1)
267
135
          FalseRangeEnd = i;
268
72
        else
269
72
          FalseRangeEnd = Overdefined;
270
207
      }
271
268
    }
272
273
    // If this element is in range, update our magic bitvector.
274
484
    if (i < 64 && IsTrueForElt)
275
216
      MagicBitvector |= 1ULL << i;
276
277
    // If all of our states become overdefined, bail out early.  Since the
278
    // predicate is expensive, only check it every 8 elements.  This is only
279
    // really useful for really huge arrays.
280
484
    if ((i & 8) == 0 && i >= 64 && SecondTrueElement == Overdefined &&
281
484
        SecondFalseElement == Overdefined && TrueRangeEnd == Overdefined &&
282
484
        FalseRangeEnd == Overdefined)
283
0
      return nullptr;
284
484
  }
285
286
  // Now that we've scanned the entire array, emit our new comparison(s).  We
287
  // order the state machines in complexity of the generated code.
288
61
  Value *Idx = GEP->getOperand(2);
289
290
  // If the index is larger than the pointer offset size of the target, truncate
291
  // the index down like the GEP would do implicitly.  We don't have to do this
292
  // for an inbounds GEP because the index can't be out of range.
293
61
  if (!GEP->isInBounds()) {
294
22
    Type *PtrIdxTy = DL.getIndexType(GEP->getType());
295
22
    unsigned OffsetSize = PtrIdxTy->getIntegerBitWidth();
296
22
    if (Idx->getType()->getPrimitiveSizeInBits().getFixedValue() > OffsetSize)
297
0
      Idx = Builder.CreateTrunc(Idx, PtrIdxTy);
298
22
  }
299
300
  // If inbounds keyword is not present, Idx * ElementSize can overflow.
301
  // Let's assume that ElementSize is 2 and the wanted value is at offset 0.
302
  // Then, there are two possible values for Idx to match offset 0:
303
  // 0x00..00, 0x80..00.
304
  // Emitting 'icmp eq Idx, 0' isn't correct in this case because the
305
  // comparison is false if Idx was 0x80..00.
306
  // We need to erase the highest countTrailingZeros(ElementSize) bits of Idx.
307
61
  unsigned ElementSize =
308
61
      DL.getTypeAllocSize(Init->getType()->getArrayElementType());
309
61
  auto MaskIdx = [&](Value *Idx) {
310
61
    if (!GEP->isInBounds() && llvm::countr_zero(ElementSize) != 0) {
311
22
      Value *Mask = ConstantInt::get(Idx->getType(), -1);
312
22
      Mask = Builder.CreateLShr(Mask, llvm::countr_zero(ElementSize));
313
22
      Idx = Builder.CreateAnd(Idx, Mask);
314
22
    }
315
61
    return Idx;
316
61
  };
317
318
  // If the comparison is only true for one or two elements, emit direct
319
  // comparisons.
320
61
  if (SecondTrueElement != Overdefined) {
321
23
    Idx = MaskIdx(Idx);
322
    // None true -> false.
323
23
    if (FirstTrueElement == Undefined)
324
0
      return replaceInstUsesWith(ICI, Builder.getFalse());
325
326
23
    Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement);
327
328
    // True for one element -> 'i == 47'.
329
23
    if (SecondTrueElement == Undefined)
330
16
      return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx);
331
332
    // True for two elements -> 'i == 47 | i == 72'.
333
7
    Value *C1 = Builder.CreateICmpEQ(Idx, FirstTrueIdx);
334
7
    Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement);
335
7
    Value *C2 = Builder.CreateICmpEQ(Idx, SecondTrueIdx);
336
7
    return BinaryOperator::CreateOr(C1, C2);
337
23
  }
338
339
  // If the comparison is only false for one or two elements, emit direct
340
  // comparisons.
341
38
  if (SecondFalseElement != Overdefined) {
342
28
    Idx = MaskIdx(Idx);
343
    // None false -> true.
344
28
    if (FirstFalseElement == Undefined)
345
0
      return replaceInstUsesWith(ICI, Builder.getTrue());
346
347
28
    Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement);
348
349
    // False for one element -> 'i != 47'.
350
28
    if (SecondFalseElement == Undefined)
351
28
      return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx);
352
353
    // False for two elements -> 'i != 47 & i != 72'.
354
0
    Value *C1 = Builder.CreateICmpNE(Idx, FirstFalseIdx);
355
0
    Value *SecondFalseIdx =
356
0
        ConstantInt::get(Idx->getType(), SecondFalseElement);
357
0
    Value *C2 = Builder.CreateICmpNE(Idx, SecondFalseIdx);
358
0
    return BinaryOperator::CreateAnd(C1, C2);
359
28
  }
360
361
  // If the comparison can be replaced with a range comparison for the elements
362
  // where it is true, emit the range check.
363
10
  if (TrueRangeEnd != Overdefined) {
364
0
    assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare");
365
0
    Idx = MaskIdx(Idx);
366
367
    // Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1).
368
0
    if (FirstTrueElement) {
369
0
      Value *Offs = ConstantInt::get(Idx->getType(), -FirstTrueElement);
370
0
      Idx = Builder.CreateAdd(Idx, Offs);
371
0
    }
372
373
0
    Value *End =
374
0
        ConstantInt::get(Idx->getType(), TrueRangeEnd - FirstTrueElement + 1);
375
0
    return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End);
376
0
  }
377
378
  // False range check.
379
10
  if (FalseRangeEnd != Overdefined) {
380
0
    assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare");
381
0
    Idx = MaskIdx(Idx);
382
    // Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse).
383
0
    if (FirstFalseElement) {
384
0
      Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement);
385
0
      Idx = Builder.CreateAdd(Idx, Offs);
386
0
    }
387
388
0
    Value *End =
389
0
        ConstantInt::get(Idx->getType(), FalseRangeEnd - FirstFalseElement);
390
0
    return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End);
391
0
  }
392
393
  // If a magic bitvector captures the entire comparison state
394
  // of this load, replace it with computation that does:
395
  //   ((magic_cst >> i) & 1) != 0
396
10
  {
397
10
    Type *Ty = nullptr;
398
399
    // Look for an appropriate type:
400
    // - The type of Idx if the magic fits
401
    // - The smallest fitting legal type
402
10
    if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth())
403
10
      Ty = Idx->getType();
404
0
    else
405
0
      Ty = DL.getSmallestLegalIntType(Init->getContext(), ArrayElementCount);
406
407
10
    if (Ty) {
408
10
      Idx = MaskIdx(Idx);
409
10
      Value *V = Builder.CreateIntCast(Idx, Ty, false);
410
10
      V = Builder.CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
411
10
      V = Builder.CreateAnd(ConstantInt::get(Ty, 1), V);
412
10
      return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0));
413
10
    }
414
10
  }
415
416
0
  return nullptr;
417
10
}
418
419
/// Returns true if we can rewrite Start as a GEP with pointer Base
420
/// and some integer offset. The nodes that need to be re-written
421
/// for this transformation will be added to Explored.
422
static bool canRewriteGEPAsOffset(Value *Start, Value *Base,
423
                                  const DataLayout &DL,
424
265
                                  SetVector<Value *> &Explored) {
425
265
  SmallVector<Value *, 16> WorkList(1, Start);
426
265
  Explored.insert(Base);
427
428
  // The following traversal gives us an order which can be used
429
  // when doing the final transformation. Since in the final
430
  // transformation we create the PHI replacement instructions first,
431
  // we don't have to get them in any particular order.
432
  //
433
  // However, for other instructions we will have to traverse the
434
  // operands of an instruction first, which means that we have to
435
  // do a post-order traversal.
436
450
  while (!WorkList.empty()) {
437
393
    SetVector<PHINode *> PHIs;
438
439
657
    while (!WorkList.empty()) {
440
472
      if (Explored.size() >= 100)
441
0
        return false;
442
443
472
      Value *V = WorkList.back();
444
445
472
      if (Explored.contains(V)) {
446
0
        WorkList.pop_back();
447
0
        continue;
448
0
      }
449
450
472
      if (!isa<GetElementPtrInst>(V) && !isa<PHINode>(V))
451
        // We've found some value that we can't explore which is different from
452
        // the base. Therefore we can't do this transformation.
453
90
        return false;
454
455
382
      if (auto *GEP = dyn_cast<GEPOperator>(V)) {
456
        // Only allow inbounds GEPs with at most one variable offset.
457
247
        auto IsNonConst = [](Value *V) { return !isa<ConstantInt>(V); };
458
247
        if (!GEP->isInBounds() || count_if(GEP->indices(), IsNonConst) > 1)
459
111
          return false;
460
461
136
        if (!Explored.contains(GEP->getOperand(0)))
462
18
          WorkList.push_back(GEP->getOperand(0));
463
136
      }
464
465
271
      if (WorkList.back() == V) {
466
253
        WorkList.pop_back();
467
        // We've finished visiting this node, mark it as such.
468
253
        Explored.insert(V);
469
253
      }
470
471
271
      if (auto *PN = dyn_cast<PHINode>(V)) {
472
        // We cannot transform PHIs on unsplittable basic blocks.
473
135
        if (isa<CatchSwitchInst>(PN->getParent()->getTerminator()))
474
7
          return false;
475
128
        Explored.insert(PN);
476
128
        PHIs.insert(PN);
477
128
      }
478
271
    }
479
480
    // Explore the PHI nodes further.
481
185
    for (auto *PN : PHIs)
482
128
      for (Value *Op : PN->incoming_values())
483
259
        if (!Explored.contains(Op))
484
249
          WorkList.push_back(Op);
485
185
  }
486
487
  // Make sure that we can do this. Since we can't insert GEPs in a basic
488
  // block before a PHI node, we can't easily do this transformation if
489
  // we have PHI node users of transformed instructions.
490
226
  for (Value *Val : Explored) {
491
438
    for (Value *Use : Val->uses()) {
492
493
438
      auto *PHI = dyn_cast<PHINode>(Use);
494
438
      auto *Inst = dyn_cast<Instruction>(Val);
495
496
438
      if (Inst == Base || Inst == PHI || !Inst || !PHI ||
497
438
          !Explored.contains(PHI))
498
438
        continue;
499
500
0
      if (PHI->getParent() == Inst->getParent())
501
0
        return false;
502
0
    }
503
226
  }
504
57
  return true;
505
57
}
506
507
// Sets the appropriate insert point on Builder where we can add
508
// a replacement Instruction for V (if that is possible).
509
static void setInsertionPoint(IRBuilder<> &Builder, Value *V,
510
282
                              bool Before = true) {
511
282
  if (auto *PHI = dyn_cast<PHINode>(V)) {
512
56
    BasicBlock *Parent = PHI->getParent();
513
56
    Builder.SetInsertPoint(Parent, Parent->getFirstInsertionPt());
514
56
    return;
515
56
  }
516
226
  if (auto *I = dyn_cast<Instruction>(V)) {
517
226
    if (!Before)
518
113
      I = &*std::next(I->getIterator());
519
226
    Builder.SetInsertPoint(I);
520
226
    return;
521
226
  }
522
0
  if (auto *A = dyn_cast<Argument>(V)) {
523
    // Set the insertion point in the entry block.
524
0
    BasicBlock &Entry = A->getParent()->getEntryBlock();
525
0
    Builder.SetInsertPoint(&Entry, Entry.getFirstInsertionPt());
526
0
    return;
527
0
  }
528
  // Otherwise, this is a constant and we don't need to set a new
529
  // insertion point.
530
0
  assert(isa<Constant>(V) && "Setting insertion point for unknown value!");
531
0
}
532
533
/// Returns a re-written value of Start as an indexed GEP using Base as a
534
/// pointer.
535
static Value *rewriteGEPAsOffset(Value *Start, Value *Base,
536
                                 const DataLayout &DL,
537
                                 SetVector<Value *> &Explored,
538
57
                                 InstCombiner &IC) {
539
  // Perform all the substitutions. This is a bit tricky because we can
540
  // have cycles in our use-def chains.
541
  // 1. Create the PHI nodes without any incoming values.
542
  // 2. Create all the other values.
543
  // 3. Add the edges for the PHI nodes.
544
  // 4. Emit GEPs to get the original pointers.
545
  // 5. Remove the original instructions.
546
57
  Type *IndexType = IntegerType::get(
547
57
      Base->getContext(), DL.getIndexTypeSizeInBits(Start->getType()));
548
549
57
  DenseMap<Value *, Value *> NewInsts;
550
57
  NewInsts[Base] = ConstantInt::getNullValue(IndexType);
551
552
  // Create the new PHI nodes, without adding any incoming values.
553
226
  for (Value *Val : Explored) {
554
226
    if (Val == Base)
555
57
      continue;
556
    // Create empty phi nodes. This avoids cyclic dependencies when creating
557
    // the remaining instructions.
558
169
    if (auto *PHI = dyn_cast<PHINode>(Val))
559
56
      NewInsts[PHI] = PHINode::Create(IndexType, PHI->getNumIncomingValues(),
560
56
                                      PHI->getName() + ".idx", PHI);
561
169
  }
562
57
  IRBuilder<> Builder(Base->getContext());
563
564
  // Create all the other instructions.
565
226
  for (Value *Val : Explored) {
566
226
    if (NewInsts.contains(Val))
567
113
      continue;
568
569
113
    if (auto *GEP = dyn_cast<GEPOperator>(Val)) {
570
113
      setInsertionPoint(Builder, GEP);
571
113
      Value *Op = NewInsts[GEP->getOperand(0)];
572
113
      Value *OffsetV = emitGEPOffset(&Builder, DL, GEP);
573
113
      if (isa<ConstantInt>(Op) && cast<ConstantInt>(Op)->isZero())
574
57
        NewInsts[GEP] = OffsetV;
575
56
      else
576
56
        NewInsts[GEP] = Builder.CreateNSWAdd(
577
56
            Op, OffsetV, GEP->getOperand(0)->getName() + ".add");
578
113
      continue;
579
113
    }
580
0
    if (isa<PHINode>(Val))
581
0
      continue;
582
583
0
    llvm_unreachable("Unexpected instruction type");
584
0
  }
585
586
  // Add the incoming values to the PHI nodes.
587
226
  for (Value *Val : Explored) {
588
226
    if (Val == Base)
589
57
      continue;
590
    // All the instructions have been created, we can now add edges to the
591
    // phi nodes.
592
169
    if (auto *PHI = dyn_cast<PHINode>(Val)) {
593
56
      PHINode *NewPhi = static_cast<PHINode *>(NewInsts[PHI]);
594
168
      for (unsigned I = 0, E = PHI->getNumIncomingValues(); I < E; ++I) {
595
112
        Value *NewIncoming = PHI->getIncomingValue(I);
596
597
112
        if (NewInsts.contains(NewIncoming))
598
112
          NewIncoming = NewInsts[NewIncoming];
599
600
112
        NewPhi->addIncoming(NewIncoming, PHI->getIncomingBlock(I));
601
112
      }
602
56
    }
603
169
  }
604
605
226
  for (Value *Val : Explored) {
606
226
    if (Val == Base)
607
57
      continue;
608
609
169
    setInsertionPoint(Builder, Val, false);
610
    // Create GEP for external users.
611
169
    Value *NewVal = Builder.CreateInBoundsGEP(
612
169
        Builder.getInt8Ty(), Base, NewInsts[Val], Val->getName() + ".ptr");
613
169
    IC.replaceInstUsesWith(*cast<Instruction>(Val), NewVal);
614
    // Add old instruction to worklist for DCE. We don't directly remove it
615
    // here because the original compare is one of the users.
616
169
    IC.addToWorklist(cast<Instruction>(Val));
617
169
  }
618
619
57
  return NewInsts[Start];
620
57
}
621
622
/// Converts (CMP GEPLHS, RHS) if this change would make RHS a constant.
623
/// We can look through PHIs, GEPs and casts in order to determine a common base
624
/// between GEPLHS and RHS.
625
static Instruction *transformToIndexedCompare(GEPOperator *GEPLHS, Value *RHS,
626
                                              ICmpInst::Predicate Cond,
627
                                              const DataLayout &DL,
628
474
                                              InstCombiner &IC) {
629
  // FIXME: Support vector of pointers.
630
474
  if (GEPLHS->getType()->isVectorTy())
631
24
    return nullptr;
632
633
450
  if (!GEPLHS->hasAllConstantIndices())
634
185
    return nullptr;
635
636
265
  APInt Offset(DL.getIndexTypeSizeInBits(GEPLHS->getType()), 0);
637
265
  Value *PtrBase =
638
265
      GEPLHS->stripAndAccumulateConstantOffsets(DL, Offset,
639
265
                                                /*AllowNonInbounds*/ false);
640
641
  // Bail if we looked through addrspacecast.
642
265
  if (PtrBase->getType() != GEPLHS->getType())
643
0
    return nullptr;
644
645
  // The set of nodes that will take part in this transformation.
646
265
  SetVector<Value *> Nodes;
647
648
265
  if (!canRewriteGEPAsOffset(RHS, PtrBase, DL, Nodes))
649
208
    return nullptr;
650
651
  // We know we can re-write this as
652
  //  ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2)
653
  // Since we've only looked through inbouds GEPs we know that we
654
  // can't have overflow on either side. We can therefore re-write
655
  // this as:
656
  //   OFFSET1 cmp OFFSET2
657
57
  Value *NewRHS = rewriteGEPAsOffset(RHS, PtrBase, DL, Nodes, IC);
658
659
  // RewriteGEPAsOffset has replaced RHS and all of its uses with a re-written
660
  // GEP having PtrBase as the pointer base, and has returned in NewRHS the
661
  // offset. Since Index is the offset of LHS to the base pointer, we will now
662
  // compare the offsets instead of comparing the pointers.
663
57
  return new ICmpInst(ICmpInst::getSignedPredicate(Cond),
664
57
                      IC.Builder.getInt(Offset), NewRHS);
665
265
}
666
667
/// Fold comparisons between a GEP instruction and something else. At this point
668
/// we know that the GEP is on the LHS of the comparison.
669
Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
670
                                           ICmpInst::Predicate Cond,
671
674
                                           Instruction &I) {
672
  // Don't transform signed compares of GEPs into index compares. Even if the
673
  // GEP is inbounds, the final add of the base pointer can have signed overflow
674
  // and would change the result of the icmp.
675
  // e.g. "&foo[0] <s &foo[1]" can't be folded to "true" because "foo" could be
676
  // the maximum signed value for the pointer type.
677
674
  if (ICmpInst::isSigned(Cond))
678
32
    return nullptr;
679
680
  // Look through bitcasts and addrspacecasts. We do not however want to remove
681
  // 0 GEPs.
682
642
  if (!isa<GetElementPtrInst>(RHS))
683
446
    RHS = RHS->stripPointerCasts();
684
685
642
  Value *PtrBase = GEPLHS->getOperand(0);
686
642
  if (PtrBase == RHS && (GEPLHS->isInBounds() || ICmpInst::isEquality(Cond))) {
687
    // ((gep Ptr, OFFSET) cmp Ptr)   ---> (OFFSET cmp 0).
688
78
    Value *Offset = EmitGEPOffset(GEPLHS);
689
78
    return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset,
690
78
                        Constant::getNullValue(Offset->getType()));
691
78
  }
692
693
564
  if (GEPLHS->isInBounds() && ICmpInst::isEquality(Cond) &&
694
564
      isa<Constant>(RHS) && cast<Constant>(RHS)->isNullValue() &&
695
564
      !NullPointerIsDefined(I.getFunction(),
696
46
                            RHS->getType()->getPointerAddressSpace())) {
697
    // For most address spaces, an allocation can't be placed at null, but null
698
    // itself is treated as a 0 size allocation in the in bounds rules.  Thus,
699
    // the only valid inbounds address derived from null, is null itself.
700
    // Thus, we have four cases to consider:
701
    // 1) Base == nullptr, Offset == 0 -> inbounds, null
702
    // 2) Base == nullptr, Offset != 0 -> poison as the result is out of bounds
703
    // 3) Base != nullptr, Offset == (-base) -> poison (crossing allocations)
704
    // 4) Base != nullptr, Offset != (-base) -> nonnull (and possibly poison)
705
    //
706
    // (Note if we're indexing a type of size 0, that simply collapses into one
707
    //  of the buckets above.)
708
    //
709
    // In general, we're allowed to make values less poison (i.e. remove
710
    //   sources of full UB), so in this case, we just select between the two
711
    //   non-poison cases (1 and 4 above).
712
    //
713
    // For vectors, we apply the same reasoning on a per-lane basis.
714
43
    auto *Base = GEPLHS->getPointerOperand();
715
43
    if (GEPLHS->getType()->isVectorTy() && Base->getType()->isPointerTy()) {
716
30
      auto EC = cast<VectorType>(GEPLHS->getType())->getElementCount();
717
30
      Base = Builder.CreateVectorSplat(EC, Base);
718
30
    }
719
43
    return new ICmpInst(Cond, Base,
720
43
                        ConstantExpr::getPointerBitCastOrAddrSpaceCast(
721
43
                            cast<Constant>(RHS), Base->getType()));
722
521
  } else if (GEPOperator *GEPRHS = dyn_cast<GEPOperator>(RHS)) {
723
    // If the base pointers are different, but the indices are the same, just
724
    // compare the base pointer.
725
205
    if (PtrBase != GEPRHS->getOperand(0)) {
726
96
      bool IndicesTheSame =
727
96
          GEPLHS->getNumOperands() == GEPRHS->getNumOperands() &&
728
96
          GEPLHS->getPointerOperand()->getType() ==
729
88
              GEPRHS->getPointerOperand()->getType() &&
730
96
          GEPLHS->getSourceElementType() == GEPRHS->getSourceElementType();
731
96
      if (IndicesTheSame)
732
70
        for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i)
733
65
          if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) {
734
57
            IndicesTheSame = false;
735
57
            break;
736
57
          }
737
738
      // If all indices are the same, just compare the base pointers.
739
96
      Type *BaseType = GEPLHS->getOperand(0)->getType();
740
96
      if (IndicesTheSame && CmpInst::makeCmpResultType(BaseType) == I.getType())
741
5
        return new ICmpInst(Cond, GEPLHS->getOperand(0), GEPRHS->getOperand(0));
742
743
      // If we're comparing GEPs with two base pointers that only differ in type
744
      // and both GEPs have only constant indices or just one use, then fold
745
      // the compare with the adjusted indices.
746
      // FIXME: Support vector of pointers.
747
91
      if (GEPLHS->isInBounds() && GEPRHS->isInBounds() &&
748
91
          (GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) &&
749
91
          (GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse()) &&
750
91
          PtrBase->stripPointerCasts() ==
751
27
              GEPRHS->getOperand(0)->stripPointerCasts() &&
752
91
          !GEPLHS->getType()->isVectorTy()) {
753
5
        Value *LOffset = EmitGEPOffset(GEPLHS);
754
5
        Value *ROffset = EmitGEPOffset(GEPRHS);
755
756
        // If we looked through an addrspacecast between different sized address
757
        // spaces, the LHS and RHS pointers are different sized
758
        // integers. Truncate to the smaller one.
759
5
        Type *LHSIndexTy = LOffset->getType();
760
5
        Type *RHSIndexTy = ROffset->getType();
761
5
        if (LHSIndexTy != RHSIndexTy) {
762
0
          if (LHSIndexTy->getPrimitiveSizeInBits().getFixedValue() <
763
0
              RHSIndexTy->getPrimitiveSizeInBits().getFixedValue()) {
764
0
            ROffset = Builder.CreateTrunc(ROffset, LHSIndexTy);
765
0
          } else
766
0
            LOffset = Builder.CreateTrunc(LOffset, RHSIndexTy);
767
0
        }
768
769
5
        Value *Cmp = Builder.CreateICmp(ICmpInst::getSignedPredicate(Cond),
770
5
                                        LOffset, ROffset);
771
5
        return replaceInstUsesWith(I, Cmp);
772
5
      }
773
774
      // Otherwise, the base pointers are different and the indices are
775
      // different. Try convert this to an indexed compare by looking through
776
      // PHIs/casts.
777
86
      return transformToIndexedCompare(GEPLHS, RHS, Cond, DL, *this);
778
91
    }
779
780
109
    bool GEPsInBounds = GEPLHS->isInBounds() && GEPRHS->isInBounds();
781
109
    if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands() &&
782
109
        GEPLHS->getSourceElementType() == GEPRHS->getSourceElementType()) {
783
      // If the GEPs only differ by one index, compare it.
784
55
      unsigned NumDifferences = 0;  // Keep track of # differences.
785
55
      unsigned DiffOperand = 0;     // The operand that differs.
786
119
      for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i)
787
65
        if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) {
788
44
          Type *LHSType = GEPLHS->getOperand(i)->getType();
789
44
          Type *RHSType = GEPRHS->getOperand(i)->getType();
790
          // FIXME: Better support for vector of pointers.
791
44
          if (LHSType->getPrimitiveSizeInBits() !=
792
44
                   RHSType->getPrimitiveSizeInBits() ||
793
44
              (GEPLHS->getType()->isVectorTy() &&
794
44
               (!LHSType->isVectorTy() || !RHSType->isVectorTy()))) {
795
            // Irreconcilable differences.
796
1
            NumDifferences = 2;
797
1
            break;
798
1
          }
799
800
43
          if (NumDifferences++) break;
801
43
          DiffOperand = i;
802
43
        }
803
804
55
      if (NumDifferences == 0)   // SAME GEP?
805
11
        return replaceInstUsesWith(I, // No comparison is needed here.
806
11
          ConstantInt::get(I.getType(), ICmpInst::isTrueWhenEqual(Cond)));
807
808
44
      else if (NumDifferences == 1 && GEPsInBounds) {
809
14
        Value *LHSV = GEPLHS->getOperand(DiffOperand);
810
14
        Value *RHSV = GEPRHS->getOperand(DiffOperand);
811
        // Make sure we do a signed comparison here.
812
14
        return new ICmpInst(ICmpInst::getSignedPredicate(Cond), LHSV, RHSV);
813
14
      }
814
55
    }
815
816
    // Only lower this if the icmp is the only user of the GEP or if we expect
817
    // the result to fold to a constant!
818
84
    if ((GEPsInBounds || CmpInst::isEquality(Cond)) &&
819
84
        (GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) &&
820
84
        (GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse())) {
821
      // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2)  --->  (OFFSET1 cmp OFFSET2)
822
12
      Value *L = EmitGEPOffset(GEPLHS);
823
12
      Value *R = EmitGEPOffset(GEPRHS);
824
12
      return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R);
825
12
    }
826
84
  }
827
828
  // Try convert this to an indexed compare by looking through PHIs/casts as a
829
  // last resort.
830
388
  return transformToIndexedCompare(GEPLHS, RHS, Cond, DL, *this);
831
564
}
832
833
47
bool InstCombinerImpl::foldAllocaCmp(AllocaInst *Alloca) {
834
  // It would be tempting to fold away comparisons between allocas and any
835
  // pointer not based on that alloca (e.g. an argument). However, even
836
  // though such pointers cannot alias, they can still compare equal.
837
  //
838
  // But LLVM doesn't specify where allocas get their memory, so if the alloca
839
  // doesn't escape we can argue that it's impossible to guess its value, and we
840
  // can therefore act as if any such guesses are wrong.
841
  //
842
  // However, we need to ensure that this folding is consistent: We can't fold
843
  // one comparison to false, and then leave a different comparison against the
844
  // same value alone (as it might evaluate to true at runtime, leading to a
845
  // contradiction). As such, this code ensures that all comparisons are folded
846
  // at the same time, and there are no other escapes.
847
848
47
  struct CmpCaptureTracker : public CaptureTracker {
849
47
    AllocaInst *Alloca;
850
47
    bool Captured = false;
851
    /// The value of the map is a bit mask of which icmp operands the alloca is
852
    /// used in.
853
47
    SmallMapVector<ICmpInst *, unsigned, 4> ICmps;
854
855
47
    CmpCaptureTracker(AllocaInst *Alloca) : Alloca(Alloca) {}
856
857
47
    void tooManyUses() override { Captured = true; }
858
859
61
    bool captured(const Use *U) override {
860
61
      auto *ICmp = dyn_cast<ICmpInst>(U->getUser());
861
      // We need to check that U is based *only* on the alloca, and doesn't
862
      // have other contributions from a select/phi operand.
863
      // TODO: We could check whether getUnderlyingObjects() reduces to one
864
      // object, which would allow looking through phi nodes.
865
61
      if (ICmp && ICmp->isEquality() && getUnderlyingObject(*U) == Alloca) {
866
        // Collect equality icmps of the alloca, and don't treat them as
867
        // captures.
868
21
        auto Res = ICmps.insert({ICmp, 0});
869
21
        Res.first->second |= 1u << U->getOperandNo();
870
21
        return false;
871
21
      }
872
873
40
      Captured = true;
874
40
      return true;
875
61
    }
876
47
  };
877
878
47
  CmpCaptureTracker Tracker(Alloca);
879
47
  PointerMayBeCaptured(Alloca, &Tracker);
880
47
  if (Tracker.Captured)
881
40
    return false;
882
883
7
  bool Changed = false;
884
7
  for (auto [ICmp, Operands] : Tracker.ICmps) {
885
7
    switch (Operands) {
886
1
    case 1:
887
7
    case 2: {
888
      // The alloca is only used in one icmp operand. Assume that the
889
      // equality is false.
890
7
      auto *Res = ConstantInt::get(
891
7
          ICmp->getType(), ICmp->getPredicate() == ICmpInst::ICMP_NE);
892
7
      replaceInstUsesWith(*ICmp, Res);
893
7
      eraseInstFromFunction(*ICmp);
894
7
      Changed = true;
895
7
      break;
896
1
    }
897
0
    case 3:
898
      // Both icmp operands are based on the alloca, so this is comparing
899
      // pointer offsets, without leaking any information about the address
900
      // of the alloca. Ignore such comparisons.
901
0
      break;
902
0
    default:
903
0
      llvm_unreachable("Cannot happen");
904
7
    }
905
7
  }
906
907
7
  return Changed;
908
7
}
909
910
/// Fold "icmp pred (X+C), X".
911
Instruction *InstCombinerImpl::foldICmpAddOpConst(Value *X, const APInt &C,
912
117
                                                  ICmpInst::Predicate Pred) {
913
  // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0,
914
  // so the values can never be equal.  Similarly for all other "or equals"
915
  // operators.
916
117
  assert(!!C && "C should not be zero!");
917
918
  // (X+1) <u X        --> X >u (MAXUINT-1)        --> X == 255
919
  // (X+2) <u X        --> X >u (MAXUINT-2)        --> X > 253
920
  // (X+MAXUINT) <u X  --> X >u (MAXUINT-MAXUINT)  --> X != 0
921
117
  if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
922
21
    Constant *R = ConstantInt::get(X->getType(),
923
21
                                   APInt::getMaxValue(C.getBitWidth()) - C);
924
21
    return new ICmpInst(ICmpInst::ICMP_UGT, X, R);
925
21
  }
926
927
  // (X+1) >u X        --> X <u (0-1)        --> X != 255
928
  // (X+2) >u X        --> X <u (0-2)        --> X <u 254
929
  // (X+MAXUINT) >u X  --> X <u (0-MAXUINT)  --> X <u 1  --> X == 0
930
96
  if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE)
931
43
    return new ICmpInst(ICmpInst::ICMP_ULT, X,
932
43
                        ConstantInt::get(X->getType(), -C));
933
934
53
  APInt SMax = APInt::getSignedMaxValue(C.getBitWidth());
935
936
  // (X+ 1) <s X       --> X >s (MAXSINT-1)          --> X == 127
937
  // (X+ 2) <s X       --> X >s (MAXSINT-2)          --> X >s 125
938
  // (X+MAXSINT) <s X  --> X >s (MAXSINT-MAXSINT)    --> X >s 0
939
  // (X+MINSINT) <s X  --> X >s (MAXSINT-MINSINT)    --> X >s -1
940
  // (X+ -2) <s X      --> X >s (MAXSINT- -2)        --> X >s 126
941
  // (X+ -1) <s X      --> X >s (MAXSINT- -1)        --> X != 127
942
53
  if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
943
27
    return new ICmpInst(ICmpInst::ICMP_SGT, X,
944
27
                        ConstantInt::get(X->getType(), SMax - C));
945
946
  // (X+ 1) >s X       --> X <s (MAXSINT-(1-1))       --> X != 127
947
  // (X+ 2) >s X       --> X <s (MAXSINT-(2-1))       --> X <s 126
948
  // (X+MAXSINT) >s X  --> X <s (MAXSINT-(MAXSINT-1)) --> X <s 1
949
  // (X+MINSINT) >s X  --> X <s (MAXSINT-(MINSINT-1)) --> X <s -2
950
  // (X+ -2) >s X      --> X <s (MAXSINT-(-2-1))      --> X <s -126
951
  // (X+ -1) >s X      --> X <s (MAXSINT-(-1-1))      --> X == -128
952
953
26
  assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE);
954
0
  return new ICmpInst(ICmpInst::ICMP_SLT, X,
955
26
                      ConstantInt::get(X->getType(), SMax - (C - 1)));
956
53
}
957
958
/// Handle "(icmp eq/ne (ashr/lshr AP2, A), AP1)" ->
959
/// (icmp eq/ne A, Log2(AP2/AP1)) ->
960
/// (icmp eq/ne A, Log2(AP2) - Log2(AP1)).
961
Instruction *InstCombinerImpl::foldICmpShrConstConst(ICmpInst &I, Value *A,
962
                                                     const APInt &AP1,
963
38
                                                     const APInt &AP2) {
964
38
  assert(I.isEquality() && "Cannot fold icmp gt/lt");
965
966
36
  auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) {
967
36
    if (I.getPredicate() == I.ICMP_NE)
968
20
      Pred = CmpInst::getInversePredicate(Pred);
969
36
    return new ICmpInst(Pred, LHS, RHS);
970
36
  };
971
972
  // Don't bother doing any work for cases which InstSimplify handles.
973
38
  if (AP2.isZero())
974
0
    return nullptr;
975
976
38
  bool IsAShr = isa<AShrOperator>(I.getOperand(0));
977
38
  if (IsAShr) {
978
3
    if (AP2.isAllOnes())
979
0
      return nullptr;
980
3
    if (AP2.isNegative() != AP1.isNegative())
981
0
      return nullptr;
982
3
    if (AP2.sgt(AP1))
983
0
      return nullptr;
984
3
  }
985
986
38
  if (!AP1)
987
    // 'A' must be large enough to shift out the highest set bit.
988
22
    return getICmp(I.ICMP_UGT, A,
989
22
                   ConstantInt::get(A->getType(), AP2.logBase2()));
990
991
16
  if (AP1 == AP2)
992
9
    return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType()));
993
994
7
  int Shift;
995
7
  if (IsAShr && AP1.isNegative())
996
2
    Shift = AP1.countl_one() - AP2.countl_one();
997
5
  else
998
5
    Shift = AP1.countl_zero() - AP2.countl_zero();
999
1000
7
  if (Shift > 0) {
1001
5
    if (IsAShr && AP1 == AP2.ashr(Shift)) {
1002
      // There are multiple solutions if we are comparing against -1 and the LHS
1003
      // of the ashr is not a power of two.
1004
2
      if (AP1.isAllOnes() && !AP2.isPowerOf2())
1005
1
        return getICmp(I.ICMP_UGE, A, ConstantInt::get(A->getType(), Shift));
1006
1
      return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
1007
3
    } else if (AP1 == AP2.lshr(Shift)) {
1008
3
      return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
1009
3
    }
1010
5
  }
1011
1012
  // Shifting const2 will never be equal to const1.
1013
  // FIXME: This should always be handled by InstSimplify?
1014
2
  auto *TorF = ConstantInt::get(I.getType(), I.getPredicate() == I.ICMP_NE);
1015
2
  return replaceInstUsesWith(I, TorF);
1016
7
}
1017
1018
/// Handle "(icmp eq/ne (shl AP2, A), AP1)" ->
1019
/// (icmp eq/ne A, TrailingZeros(AP1) - TrailingZeros(AP2)).
1020
Instruction *InstCombinerImpl::foldICmpShlConstConst(ICmpInst &I, Value *A,
1021
                                                     const APInt &AP1,
1022
44
                                                     const APInt &AP2) {
1023
44
  assert(I.isEquality() && "Cannot fold icmp gt/lt");
1024
1025
41
  auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) {
1026
41
    if (I.getPredicate() == I.ICMP_NE)
1027
21
      Pred = CmpInst::getInversePredicate(Pred);
1028
41
    return new ICmpInst(Pred, LHS, RHS);
1029
41
  };
1030
1031
  // Don't bother doing any work for cases which InstSimplify handles.
1032
44
  if (AP2.isZero())
1033
0
    return nullptr;
1034
1035
44
  unsigned AP2TrailingZeros = AP2.countr_zero();
1036
1037
44
  if (!AP1 && AP2TrailingZeros != 0)
1038
15
    return getICmp(
1039
15
        I.ICMP_UGE, A,
1040
15
        ConstantInt::get(A->getType(), AP2.getBitWidth() - AP2TrailingZeros));
1041
1042
29
  if (AP1 == AP2)
1043
17
    return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType()));
1044
1045
  // Get the distance between the lowest bits that are set.
1046
12
  int Shift = AP1.countr_zero() - AP2TrailingZeros;
1047
1048
12
  if (Shift > 0 && AP2.shl(Shift) == AP1)
1049
9
    return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
1050
1051
  // Shifting const2 will never be equal to const1.
1052
  // FIXME: This should always be handled by InstSimplify?
1053
3
  auto *TorF = ConstantInt::get(I.getType(), I.getPredicate() == I.ICMP_NE);
1054
3
  return replaceInstUsesWith(I, TorF);
1055
12
}
1056
1057
/// The caller has matched a pattern of the form:
1058
///   I = icmp ugt (add (add A, B), CI2), CI1
1059
/// If this is of the form:
1060
///   sum = a + b
1061
///   if (sum+128 >u 255)
1062
/// Then replace it with llvm.sadd.with.overflow.i8.
1063
///
1064
static Instruction *processUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
1065
                                          ConstantInt *CI2, ConstantInt *CI1,
1066
10
                                          InstCombinerImpl &IC) {
1067
  // The transformation we're trying to do here is to transform this into an
1068
  // llvm.sadd.with.overflow.  To do this, we have to replace the original add
1069
  // with a narrower add, and discard the add-with-constant that is part of the
1070
  // range check (if we can't eliminate it, this isn't profitable).
1071
1072
  // In order to eliminate the add-with-constant, the compare can be its only
1073
  // use.
1074
10
  Instruction *AddWithCst = cast<Instruction>(I.getOperand(0));
1075
10
  if (!AddWithCst->hasOneUse())
1076
7
    return nullptr;
1077
1078
  // If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow.
1079
3
  if (!CI2->getValue().isPowerOf2())
1080
0
    return nullptr;
1081
3
  unsigned NewWidth = CI2->getValue().countr_zero();
1082
3
  if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31)
1083
3
    return nullptr;
1084
1085
  // The width of the new add formed is 1 more than the bias.
1086
0
  ++NewWidth;
1087
1088
  // Check to see that CI1 is an all-ones value with NewWidth bits.
1089
0
  if (CI1->getBitWidth() == NewWidth ||
1090
0
      CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth))
1091
0
    return nullptr;
1092
1093
  // This is only really a signed overflow check if the inputs have been
1094
  // sign-extended; check for that condition. For example, if CI2 is 2^31 and
1095
  // the operands of the add are 64 bits wide, we need at least 33 sign bits.
1096
0
  if (IC.ComputeMaxSignificantBits(A, 0, &I) > NewWidth ||
1097
0
      IC.ComputeMaxSignificantBits(B, 0, &I) > NewWidth)
1098
0
    return nullptr;
1099
1100
  // In order to replace the original add with a narrower
1101
  // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant
1102
  // and truncates that discard the high bits of the add.  Verify that this is
1103
  // the case.
1104
0
  Instruction *OrigAdd = cast<Instruction>(AddWithCst->getOperand(0));
1105
0
  for (User *U : OrigAdd->users()) {
1106
0
    if (U == AddWithCst)
1107
0
      continue;
1108
1109
    // Only accept truncates for now.  We would really like a nice recursive
1110
    // predicate like SimplifyDemandedBits, but which goes downwards the use-def
1111
    // chain to see which bits of a value are actually demanded.  If the
1112
    // original add had another add which was then immediately truncated, we
1113
    // could still do the transformation.
1114
0
    TruncInst *TI = dyn_cast<TruncInst>(U);
1115
0
    if (!TI || TI->getType()->getPrimitiveSizeInBits() > NewWidth)
1116
0
      return nullptr;
1117
0
  }
1118
1119
  // If the pattern matches, truncate the inputs to the narrower type and
1120
  // use the sadd_with_overflow intrinsic to efficiently compute both the
1121
  // result and the overflow bit.
1122
0
  Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth);
1123
0
  Function *F = Intrinsic::getDeclaration(
1124
0
      I.getModule(), Intrinsic::sadd_with_overflow, NewType);
1125
1126
0
  InstCombiner::BuilderTy &Builder = IC.Builder;
1127
1128
  // Put the new code above the original add, in case there are any uses of the
1129
  // add between the add and the compare.
1130
0
  Builder.SetInsertPoint(OrigAdd);
1131
1132
0
  Value *TruncA = Builder.CreateTrunc(A, NewType, A->getName() + ".trunc");
1133
0
  Value *TruncB = Builder.CreateTrunc(B, NewType, B->getName() + ".trunc");
1134
0
  CallInst *Call = Builder.CreateCall(F, {TruncA, TruncB}, "sadd");
1135
0
  Value *Add = Builder.CreateExtractValue(Call, 0, "sadd.result");
1136
0
  Value *ZExt = Builder.CreateZExt(Add, OrigAdd->getType());
1137
1138
  // The inner add was the result of the narrow add, zero extended to the
1139
  // wider type.  Replace it with the result computed by the intrinsic.
1140
0
  IC.replaceInstUsesWith(*OrigAdd, ZExt);
1141
0
  IC.eraseInstFromFunction(*OrigAdd);
1142
1143
  // The original icmp gets replaced with the overflow value.
1144
0
  return ExtractValueInst::Create(Call, 1, "sadd.overflow");
1145
0
}
1146
1147
/// If we have:
1148
///   icmp eq/ne (urem/srem %x, %y), 0
1149
/// iff %y is a power-of-two, we can replace this with a bit test:
1150
///   icmp eq/ne (and %x, (add %y, -1)), 0
1151
22.1k
Instruction *InstCombinerImpl::foldIRemByPowerOfTwoToBitTest(ICmpInst &I) {
1152
  // This fold is only valid for equality predicates.
1153
22.1k
  if (!I.isEquality())
1154
6.18k
    return nullptr;
1155
15.9k
  ICmpInst::Predicate Pred;
1156
15.9k
  Value *X, *Y, *Zero;
1157
15.9k
  if (!match(&I, m_ICmp(Pred, m_OneUse(m_IRem(m_Value(X), m_Value(Y))),
1158
15.9k
                        m_CombineAnd(m_Zero(), m_Value(Zero)))))
1159
15.9k
    return nullptr;
1160
43
  if (!isKnownToBeAPowerOfTwo(Y, /*OrZero*/ true, 0, &I))
1161
42
    return nullptr;
1162
  // This may increase instruction count, we don't enforce that Y is a constant.
1163
1
  Value *Mask = Builder.CreateAdd(Y, Constant::getAllOnesValue(Y->getType()));
1164
1
  Value *Masked = Builder.CreateAnd(X, Mask);
1165
1
  return ICmpInst::Create(Instruction::ICmp, Pred, Masked, Zero);
1166
43
}
1167
1168
/// Fold equality-comparison between zero and any (maybe truncated) right-shift
1169
/// by one-less-than-bitwidth into a sign test on the original value.
1170
78.4k
Instruction *InstCombinerImpl::foldSignBitTest(ICmpInst &I) {
1171
78.4k
  Instruction *Val;
1172
78.4k
  ICmpInst::Predicate Pred;
1173
78.4k
  if (!I.isEquality() || !match(&I, m_ICmp(Pred, m_Instruction(Val), m_Zero())))
1174
68.3k
    return nullptr;
1175
1176
10.0k
  Value *X;
1177
10.0k
  Type *XTy;
1178
1179
10.0k
  Constant *C;
1180
10.0k
  if (match(Val, m_TruncOrSelf(m_Shr(m_Value(X), m_Constant(C))))) {
1181
27
    XTy = X->getType();
1182
27
    unsigned XBitWidth = XTy->getScalarSizeInBits();
1183
27
    if (!match(C, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_EQ,
1184
27
                                     APInt(XBitWidth, XBitWidth - 1))))
1185
27
      return nullptr;
1186
10.0k
  } else if (isa<BinaryOperator>(Val) &&
1187
10.0k
             (X = reassociateShiftAmtsOfTwoSameDirectionShifts(
1188
6.68k
                  cast<BinaryOperator>(Val), SQ.getWithInstruction(Val),
1189
6.68k
                  /*AnalyzeForSignBitExtraction=*/true))) {
1190
0
    XTy = X->getType();
1191
0
  } else
1192
10.0k
    return nullptr;
1193
1194
0
  return ICmpInst::Create(Instruction::ICmp,
1195
0
                          Pred == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_SGE
1196
0
                                                    : ICmpInst::ICMP_SLT,
1197
0
                          X, ConstantInt::getNullValue(XTy));
1198
10.0k
}
1199
1200
// Handle  icmp pred X, 0
1201
83.5k
Instruction *InstCombinerImpl::foldICmpWithZero(ICmpInst &Cmp) {
1202
83.5k
  CmpInst::Predicate Pred = Cmp.getPredicate();
1203
83.5k
  if (!match(Cmp.getOperand(1), m_Zero()))
1204
61.3k
    return nullptr;
1205
1206
  // (icmp sgt smin(PosA, B) 0) -> (icmp sgt B 0)
1207
22.2k
  if (Pred == ICmpInst::ICMP_SGT) {
1208
2.08k
    Value *A, *B;
1209
2.08k
    if (match(Cmp.getOperand(0), m_SMin(m_Value(A), m_Value(B)))) {
1210
72
      if (isKnownPositive(A, SQ.getWithInstruction(&Cmp)))
1211
49
        return new ICmpInst(Pred, B, Cmp.getOperand(1));
1212
23
      if (isKnownPositive(B, SQ.getWithInstruction(&Cmp)))
1213
2
        return new ICmpInst(Pred, A, Cmp.getOperand(1));
1214
23
    }
1215
2.08k
  }
1216
1217
22.1k
  if (Instruction *New = foldIRemByPowerOfTwoToBitTest(Cmp))
1218
1
    return New;
1219
1220
  // Given:
1221
  //   icmp eq/ne (urem %x, %y), 0
1222
  // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
1223
  //   icmp eq/ne %x, 0
1224
22.1k
  Value *X, *Y;
1225
22.1k
  if (match(Cmp.getOperand(0), m_URem(m_Value(X), m_Value(Y))) &&
1226
22.1k
      ICmpInst::isEquality(Pred)) {
1227
34
    KnownBits XKnown = computeKnownBits(X, 0, &Cmp);
1228
34
    KnownBits YKnown = computeKnownBits(Y, 0, &Cmp);
1229
34
    if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
1230
0
      return new ICmpInst(Pred, X, Cmp.getOperand(1));
1231
34
  }
1232
1233
  // (icmp eq/ne (mul X Y)) -> (icmp eq/ne X/Y) if we know about whether X/Y are
1234
  // odd/non-zero/there is no overflow.
1235
22.1k
  if (match(Cmp.getOperand(0), m_Mul(m_Value(X), m_Value(Y))) &&
1236
22.1k
      ICmpInst::isEquality(Pred)) {
1237
1238
165
    KnownBits XKnown = computeKnownBits(X, 0, &Cmp);
1239
    // if X % 2 != 0
1240
    //    (icmp eq/ne Y)
1241
165
    if (XKnown.countMaxTrailingZeros() == 0)
1242
0
      return new ICmpInst(Pred, Y, Cmp.getOperand(1));
1243
1244
165
    KnownBits YKnown = computeKnownBits(Y, 0, &Cmp);
1245
    // if Y % 2 != 0
1246
    //    (icmp eq/ne X)
1247
165
    if (YKnown.countMaxTrailingZeros() == 0)
1248
16
      return new ICmpInst(Pred, X, Cmp.getOperand(1));
1249
1250
149
    auto *BO0 = cast<OverflowingBinaryOperator>(Cmp.getOperand(0));
1251
149
    if (BO0->hasNoUnsignedWrap() || BO0->hasNoSignedWrap()) {
1252
51
      const SimplifyQuery Q = SQ.getWithInstruction(&Cmp);
1253
      // `isKnownNonZero` does more analysis than just `!KnownBits.One.isZero()`
1254
      // but to avoid unnecessary work, first just if this is an obvious case.
1255
1256
      // if X non-zero and NoOverflow(X * Y)
1257
      //    (icmp eq/ne Y)
1258
51
      if (!XKnown.One.isZero() || isKnownNonZero(X, DL, 0, Q.AC, Q.CxtI, Q.DT))
1259
1
        return new ICmpInst(Pred, Y, Cmp.getOperand(1));
1260
1261
      // if Y non-zero and NoOverflow(X * Y)
1262
      //    (icmp eq/ne X)
1263
50
      if (!YKnown.One.isZero() || isKnownNonZero(Y, DL, 0, Q.AC, Q.CxtI, Q.DT))
1264
16
        return new ICmpInst(Pred, X, Cmp.getOperand(1));
1265
50
    }
1266
    // Note, we are skipping cases:
1267
    //      if Y % 2 != 0 AND X % 2 != 0
1268
    //          (false/true)
1269
    //      if X non-zero and Y non-zero and NoOverflow(X * Y)
1270
    //          (false/true)
1271
    // Those can be simplified later as we would have already replaced the (icmp
1272
    // eq/ne (mul X, Y)) with (icmp eq/ne X/Y) and if X/Y is known non-zero that
1273
    // will fold to a constant elsewhere.
1274
149
  }
1275
22.1k
  return nullptr;
1276
22.1k
}
1277
1278
/// Fold icmp Pred X, C.
1279
/// TODO: This code structure does not make sense. The saturating add fold
1280
/// should be moved to some other helper and extended as noted below (it is also
1281
/// possible that code has been made unnecessary - do we canonicalize IR to
1282
/// overflow/saturating intrinsics or not?).
1283
92.1k
Instruction *InstCombinerImpl::foldICmpWithConstant(ICmpInst &Cmp) {
1284
  // Match the following pattern, which is a common idiom when writing
1285
  // overflow-safe integer arithmetic functions. The source performs an addition
1286
  // in wider type and explicitly checks for overflow using comparisons against
1287
  // INT_MIN and INT_MAX. Simplify by using the sadd_with_overflow intrinsic.
1288
  //
1289
  // TODO: This could probably be generalized to handle other overflow-safe
1290
  // operations if we worked out the formulas to compute the appropriate magic
1291
  // constants.
1292
  //
1293
  // sum = a + b
1294
  // if (sum+128 >u 255)  ...  -> llvm.sadd.with.overflow.i8
1295
92.1k
  CmpInst::Predicate Pred = Cmp.getPredicate();
1296
92.1k
  Value *Op0 = Cmp.getOperand(0), *Op1 = Cmp.getOperand(1);
1297
92.1k
  Value *A, *B;
1298
92.1k
  ConstantInt *CI, *CI2; // I = icmp ugt (add (add A, B), CI2), CI
1299
92.1k
  if (Pred == ICmpInst::ICMP_UGT && match(Op1, m_ConstantInt(CI)) &&
1300
92.1k
      match(Op0, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2))))
1301
10
    if (Instruction *Res = processUGT_ADDCST_ADD(Cmp, A, B, CI2, CI, *this))
1302
0
      return Res;
1303
1304
  // icmp(phi(C1, C2, ...), C) -> phi(icmp(C1, C), icmp(C2, C), ...).
1305
92.1k
  Constant *C = dyn_cast<Constant>(Op1);
1306
92.1k
  if (!C)
1307
32.5k
    return nullptr;
1308
1309
59.6k
  if (auto *Phi = dyn_cast<PHINode>(Op0))
1310
519
    if (all_of(Phi->operands(), [](Value *V) { return isa<Constant>(V); })) {
1311
39
      SmallVector<Constant *> Ops;
1312
83
      for (Value *V : Phi->incoming_values()) {
1313
83
        Constant *Res =
1314
83
            ConstantFoldCompareInstOperands(Pred, cast<Constant>(V), C, DL);
1315
83
        if (!Res)
1316
0
          return nullptr;
1317
83
        Ops.push_back(Res);
1318
83
      }
1319
39
      Builder.SetInsertPoint(Phi);
1320
39
      PHINode *NewPhi = Builder.CreatePHI(Cmp.getType(), Phi->getNumOperands());
1321
39
      for (auto [V, Pred] : zip(Ops, Phi->blocks()))
1322
83
        NewPhi->addIncoming(V, Pred);
1323
39
      return replaceInstUsesWith(Cmp, NewPhi);
1324
39
    }
1325
1326
59.5k
  if (Instruction *R = tryFoldInstWithCtpopWithNot(&Cmp))
1327
0
    return R;
1328
1329
59.5k
  return nullptr;
1330
59.5k
}
1331
1332
/// Canonicalize icmp instructions based on dominating conditions.
1333
92.1k
Instruction *InstCombinerImpl::foldICmpWithDominatingICmp(ICmpInst &Cmp) {
1334
  // We already checked simple implication in InstSimplify, only handle complex
1335
  // cases here.
1336
92.1k
  Value *X = Cmp.getOperand(0), *Y = Cmp.getOperand(1);
1337
92.1k
  ICmpInst::Predicate DomPred;
1338
92.1k
  const APInt *C;
1339
92.1k
  if (!match(Y, m_APInt(C)))
1340
35.4k
    return nullptr;
1341
1342
56.6k
  CmpInst::Predicate Pred = Cmp.getPredicate();
1343
56.6k
  ConstantRange CR = ConstantRange::makeExactICmpRegion(Pred, *C);
1344
1345
56.6k
  auto handleDomCond = [&](Value *DomCond, bool CondIsTrue) -> Instruction * {
1346
130
    const APInt *DomC;
1347
130
    if (!match(DomCond, m_ICmp(DomPred, m_Specific(X), m_APInt(DomC))))
1348
14
      return nullptr;
1349
    // We have 2 compares of a variable with constants. Calculate the constant
1350
    // ranges of those compares to see if we can transform the 2nd compare:
1351
    // DomBB:
1352
    //   DomCond = icmp DomPred X, DomC
1353
    //   br DomCond, CmpBB, FalseBB
1354
    // CmpBB:
1355
    //   Cmp = icmp Pred X, C
1356
116
    if (!CondIsTrue)
1357
82
      DomPred = CmpInst::getInversePredicate(DomPred);
1358
116
    ConstantRange DominatingCR =
1359
116
        ConstantRange::makeExactICmpRegion(DomPred, *DomC);
1360
116
    ConstantRange Intersection = DominatingCR.intersectWith(CR);
1361
116
    ConstantRange Difference = DominatingCR.difference(CR);
1362
116
    if (Intersection.isEmptySet())
1363
0
      return replaceInstUsesWith(Cmp, Builder.getFalse());
1364
116
    if (Difference.isEmptySet())
1365
0
      return replaceInstUsesWith(Cmp, Builder.getTrue());
1366
1367
    // Canonicalizing a sign bit comparison that gets used in a branch,
1368
    // pessimizes codegen by generating branch on zero instruction instead
1369
    // of a test and branch. So we avoid canonicalizing in such situations
1370
    // because test and branch instruction has better branch displacement
1371
    // than compare and branch instruction.
1372
116
    bool UnusedBit;
1373
116
    bool IsSignBit = isSignBitCheck(Pred, *C, UnusedBit);
1374
116
    if (Cmp.isEquality() || (IsSignBit && hasBranchUse(Cmp)))
1375
49
      return nullptr;
1376
1377
    // Avoid an infinite loop with min/max canonicalization.
1378
    // TODO: This will be unnecessary if we canonicalize to min/max intrinsics.
1379
67
    if (Cmp.hasOneUse() &&
1380
67
        match(Cmp.user_back(), m_MaxOrMin(m_Value(), m_Value())))
1381
9
      return nullptr;
1382
1383
58
    if (const APInt *EqC = Intersection.getSingleElement())
1384
5
      return new ICmpInst(ICmpInst::ICMP_EQ, X, Builder.getInt(*EqC));
1385
53
    if (const APInt *NeC = Difference.getSingleElement())
1386
7
      return new ICmpInst(ICmpInst::ICMP_NE, X, Builder.getInt(*NeC));
1387
46
    return nullptr;
1388
53
  };
1389
1390
56.6k
  for (BranchInst *BI : DC.conditionsFor(X)) {
1391
435
    auto *Cond = BI->getCondition();
1392
435
    BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(0));
1393
435
    if (DT.dominates(Edge0, Cmp.getParent())) {
1394
48
      if (auto *V = handleDomCond(Cond, true))
1395
3
        return V;
1396
387
    } else {
1397
387
      BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(1));
1398
387
      if (DT.dominates(Edge1, Cmp.getParent()))
1399
82
        if (auto *V = handleDomCond(Cond, false))
1400
9
          return V;
1401
387
    }
1402
435
  }
1403
1404
56.6k
  return nullptr;
1405
56.6k
}
1406
1407
/// Fold icmp (trunc X), C.
1408
Instruction *InstCombinerImpl::foldICmpTruncConstant(ICmpInst &Cmp,
1409
                                                     TruncInst *Trunc,
1410
739
                                                     const APInt &C) {
1411
739
  ICmpInst::Predicate Pred = Cmp.getPredicate();
1412
739
  Value *X = Trunc->getOperand(0);
1413
739
  if (C.isOne() && C.getBitWidth() > 1) {
1414
    // icmp slt trunc(signum(V)) 1 --> icmp slt V, 1
1415
34
    Value *V = nullptr;
1416
34
    if (Pred == ICmpInst::ICMP_SLT && match(X, m_Signum(m_Value(V))))
1417
0
      return new ICmpInst(ICmpInst::ICMP_SLT, V,
1418
0
                          ConstantInt::get(V->getType(), 1));
1419
34
  }
1420
1421
739
  Type *SrcTy = X->getType();
1422
739
  unsigned DstBits = Trunc->getType()->getScalarSizeInBits(),
1423
739
           SrcBits = SrcTy->getScalarSizeInBits();
1424
1425
  // TODO: Handle any shifted constant by subtracting trailing zeros.
1426
  // TODO: Handle non-equality predicates.
1427
739
  Value *Y;
1428
739
  if (Cmp.isEquality() && match(X, m_Shl(m_One(), m_Value(Y)))) {
1429
    // (trunc (1 << Y) to iN) == 0 --> Y u>= N
1430
    // (trunc (1 << Y) to iN) != 0 --> Y u<  N
1431
0
    if (C.isZero()) {
1432
0
      auto NewPred = (Pred == Cmp.ICMP_EQ) ? Cmp.ICMP_UGE : Cmp.ICMP_ULT;
1433
0
      return new ICmpInst(NewPred, Y, ConstantInt::get(SrcTy, DstBits));
1434
0
    }
1435
    // (trunc (1 << Y) to iN) == 2**C --> Y == C
1436
    // (trunc (1 << Y) to iN) != 2**C --> Y != C
1437
0
    if (C.isPowerOf2())
1438
0
      return new ICmpInst(Pred, Y, ConstantInt::get(SrcTy, C.logBase2()));
1439
0
  }
1440
1441
739
  if (Cmp.isEquality() && Trunc->hasOneUse()) {
1442
    // Canonicalize to a mask and wider compare if the wide type is suitable:
1443
    // (trunc X to i8) == C --> (X & 0xff) == (zext C)
1444
350
    if (!SrcTy->isVectorTy() && shouldChangeType(DstBits, SrcBits)) {
1445
317
      Constant *Mask =
1446
317
          ConstantInt::get(SrcTy, APInt::getLowBitsSet(SrcBits, DstBits));
1447
317
      Value *And = Builder.CreateAnd(X, Mask);
1448
317
      Constant *WideC = ConstantInt::get(SrcTy, C.zext(SrcBits));
1449
317
      return new ICmpInst(Pred, And, WideC);
1450
317
    }
1451
1452
    // Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all
1453
    // of the high bits truncated out of x are known.
1454
33
    KnownBits Known = computeKnownBits(X, 0, &Cmp);
1455
1456
    // If all the high bits are known, we can do this xform.
1457
33
    if ((Known.Zero | Known.One).countl_one() >= SrcBits - DstBits) {
1458
      // Pull in the high bits from known-ones set.
1459
0
      APInt NewRHS = C.zext(SrcBits);
1460
0
      NewRHS |= Known.One & APInt::getHighBitsSet(SrcBits, SrcBits - DstBits);
1461
0
      return new ICmpInst(Pred, X, ConstantInt::get(SrcTy, NewRHS));
1462
0
    }
1463
33
  }
1464
1465
  // Look through truncated right-shift of the sign-bit for a sign-bit check:
1466
  // trunc iN (ShOp >> ShAmtC) to i[N - ShAmtC] < 0  --> ShOp <  0
1467
  // trunc iN (ShOp >> ShAmtC) to i[N - ShAmtC] > -1 --> ShOp > -1
1468
422
  Value *ShOp;
1469
422
  const APInt *ShAmtC;
1470
422
  bool TrueIfSigned;
1471
422
  if (isSignBitCheck(Pred, C, TrueIfSigned) &&
1472
422
      match(X, m_Shr(m_Value(ShOp), m_APInt(ShAmtC))) &&
1473
422
      DstBits == SrcBits - ShAmtC->getZExtValue()) {
1474
8
    return TrueIfSigned ? new ICmpInst(ICmpInst::ICMP_SLT, ShOp,
1475
6
                                       ConstantInt::getNullValue(SrcTy))
1476
8
                        : new ICmpInst(ICmpInst::ICMP_SGT, ShOp,
1477
2
                                       ConstantInt::getAllOnesValue(SrcTy));
1478
8
  }
1479
1480
414
  return nullptr;
1481
422
}
1482
1483
/// Fold icmp (trunc X), (trunc Y).
1484
/// Fold icmp (trunc X), (zext Y).
1485
Instruction *
1486
InstCombinerImpl::foldICmpTruncWithTruncOrExt(ICmpInst &Cmp,
1487
88.3k
                                              const SimplifyQuery &Q) {
1488
88.3k
  if (Cmp.isSigned())
1489
27.1k
    return nullptr;
1490
1491
61.2k
  Value *X, *Y;
1492
61.2k
  ICmpInst::Predicate Pred;
1493
61.2k
  bool YIsZext = false;
1494
  // Try to match icmp (trunc X), (trunc Y)
1495
61.2k
  if (match(&Cmp, m_ICmp(Pred, m_Trunc(m_Value(X)), m_Trunc(m_Value(Y))))) {
1496
3.97k
    if (X->getType() != Y->getType() &&
1497
3.97k
        (!Cmp.getOperand(0)->hasOneUse() || !Cmp.getOperand(1)->hasOneUse()))
1498
0
      return nullptr;
1499
3.97k
    if (!isDesirableIntType(X->getType()->getScalarSizeInBits()) &&
1500
3.97k
        isDesirableIntType(Y->getType()->getScalarSizeInBits())) {
1501
0
      std::swap(X, Y);
1502
0
      Pred = Cmp.getSwappedPredicate(Pred);
1503
0
    }
1504
3.97k
  }
1505
  // Try to match icmp (trunc X), (zext Y)
1506
57.2k
  else if (match(&Cmp, m_c_ICmp(Pred, m_Trunc(m_Value(X)),
1507
57.2k
                                m_OneUse(m_ZExt(m_Value(Y))))))
1508
1509
4
    YIsZext = true;
1510
57.2k
  else
1511
57.2k
    return nullptr;
1512
1513
3.98k
  Type *TruncTy = Cmp.getOperand(0)->getType();
1514
3.98k
  unsigned TruncBits = TruncTy->getScalarSizeInBits();
1515
1516
  // If this transform will end up changing from desirable types -> undesirable
1517
  // types skip it.
1518
3.98k
  if (isDesirableIntType(TruncBits) &&
1519
3.98k
      !isDesirableIntType(X->getType()->getScalarSizeInBits()))
1520
1
    return nullptr;
1521
1522
  // Check if the trunc is unneeded.
1523
3.98k
  KnownBits KnownX = llvm::computeKnownBits(X, /*Depth*/ 0, Q);
1524
3.98k
  if (KnownX.countMaxActiveBits() > TruncBits)
1525
3.79k
    return nullptr;
1526
1527
183
  if (!YIsZext) {
1528
    // If Y is also a trunc, make sure it is unneeded.
1529
183
    KnownBits KnownY = llvm::computeKnownBits(Y, /*Depth*/ 0, Q);
1530
183
    if (KnownY.countMaxActiveBits() > TruncBits)
1531
143
      return nullptr;
1532
183
  }
1533
1534
40
  Value *NewY = Builder.CreateZExtOrTrunc(Y, X->getType());
1535
40
  return new ICmpInst(Pred, X, NewY);
1536
183
}
1537
1538
/// Fold icmp (xor X, Y), C.
1539
Instruction *InstCombinerImpl::foldICmpXorConstant(ICmpInst &Cmp,
1540
                                                   BinaryOperator *Xor,
1541
775
                                                   const APInt &C) {
1542
775
  if (Instruction *I = foldICmpXorShiftConst(Cmp, Xor, C))
1543
0
    return I;
1544
1545
775
  Value *X = Xor->getOperand(0);
1546
775
  Value *Y = Xor->getOperand(1);
1547
775
  const APInt *XorC;
1548
775
  if (!match(Y, m_APInt(XorC)))
1549
273
    return nullptr;
1550
1551
  // If this is a comparison that tests the signbit (X < 0) or (x > -1),
1552
  // fold the xor.
1553
502
  ICmpInst::Predicate Pred = Cmp.getPredicate();
1554
502
  bool TrueIfSigned = false;
1555
502
  if (isSignBitCheck(Cmp.getPredicate(), C, TrueIfSigned)) {
1556
1557
    // If the sign bit of the XorCst is not set, there is no change to
1558
    // the operation, just stop using the Xor.
1559
48
    if (!XorC->isNegative())
1560
5
      return replaceOperand(Cmp, 0, X);
1561
1562
    // Emit the opposite comparison.
1563
43
    if (TrueIfSigned)
1564
19
      return new ICmpInst(ICmpInst::ICMP_SGT, X,
1565
19
                          ConstantInt::getAllOnesValue(X->getType()));
1566
24
    else
1567
24
      return new ICmpInst(ICmpInst::ICMP_SLT, X,
1568
24
                          ConstantInt::getNullValue(X->getType()));
1569
43
  }
1570
1571
454
  if (Xor->hasOneUse()) {
1572
    // (icmp u/s (xor X SignMask), C) -> (icmp s/u X, (xor C SignMask))
1573
120
    if (!Cmp.isEquality() && XorC->isSignMask()) {
1574
21
      Pred = Cmp.getFlippedSignednessPredicate();
1575
21
      return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), C ^ *XorC));
1576
21
    }
1577
1578
    // (icmp u/s (xor X ~SignMask), C) -> (icmp s/u X, (xor C ~SignMask))
1579
99
    if (!Cmp.isEquality() && XorC->isMaxSignedValue()) {
1580
15
      Pred = Cmp.getFlippedSignednessPredicate();
1581
15
      Pred = Cmp.getSwappedPredicate(Pred);
1582
15
      return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), C ^ *XorC));
1583
15
    }
1584
99
  }
1585
1586
  // Mask constant magic can eliminate an 'xor' with unsigned compares.
1587
418
  if (Pred == ICmpInst::ICMP_UGT) {
1588
    // (xor X, ~C) >u C --> X <u ~C (when C+1 is a power of 2)
1589
58
    if (*XorC == ~C && (C + 1).isPowerOf2())
1590
0
      return new ICmpInst(ICmpInst::ICMP_ULT, X, Y);
1591
    // (xor X, C) >u C --> X >u C (when C+1 is a power of 2)
1592
58
    if (*XorC == C && (C + 1).isPowerOf2())
1593
5
      return new ICmpInst(ICmpInst::ICMP_UGT, X, Y);
1594
58
  }
1595
413
  if (Pred == ICmpInst::ICMP_ULT) {
1596
    // (xor X, -C) <u C --> X >u ~C (when C is a power of 2)
1597
95
    if (*XorC == -C && C.isPowerOf2())
1598
2
      return new ICmpInst(ICmpInst::ICMP_UGT, X,
1599
2
                          ConstantInt::get(X->getType(), ~C));
1600
    // (xor X, C) <u C --> X >u ~C (when -C is a power of 2)
1601
93
    if (*XorC == C && (-C).isPowerOf2())
1602
1
      return new ICmpInst(ICmpInst::ICMP_UGT, X,
1603
1
                          ConstantInt::get(X->getType(), ~C));
1604
93
  }
1605
410
  return nullptr;
1606
413
}
1607
1608
/// For power-of-2 C:
1609
/// ((X s>> ShiftC) ^ X) u< C --> (X + C) u< (C << 1)
1610
/// ((X s>> ShiftC) ^ X) u> (C - 1) --> (X + C) u> ((C << 1) - 1)
1611
Instruction *InstCombinerImpl::foldICmpXorShiftConst(ICmpInst &Cmp,
1612
                                                     BinaryOperator *Xor,
1613
775
                                                     const APInt &C) {
1614
775
  CmpInst::Predicate Pred = Cmp.getPredicate();
1615
775
  APInt PowerOf2;
1616
775
  if (Pred == ICmpInst::ICMP_ULT)
1617
115
    PowerOf2 = C;
1618
660
  else if (Pred == ICmpInst::ICMP_UGT && !C.isMaxValue())
1619
165
    PowerOf2 = C + 1;
1620
495
  else
1621
495
    return nullptr;
1622
280
  if (!PowerOf2.isPowerOf2())
1623
135
    return nullptr;
1624
145
  Value *X;
1625
145
  const APInt *ShiftC;
1626
145
  if (!match(Xor, m_OneUse(m_c_Xor(m_Value(X),
1627
145
                                   m_AShr(m_Deferred(X), m_APInt(ShiftC))))))
1628
145
    return nullptr;
1629
0
  uint64_t Shift = ShiftC->getLimitedValue();
1630
0
  Type *XType = X->getType();
1631
0
  if (Shift == 0 || PowerOf2.isMinSignedValue())
1632
0
    return nullptr;
1633
0
  Value *Add = Builder.CreateAdd(X, ConstantInt::get(XType, PowerOf2));
1634
0
  APInt Bound =
1635
0
      Pred == ICmpInst::ICMP_ULT ? PowerOf2 << 1 : ((PowerOf2 << 1) - 1);
1636
0
  return new ICmpInst(Pred, Add, ConstantInt::get(XType, Bound));
1637
0
}
1638
1639
/// Fold icmp (and (sh X, Y), C2), C1.
1640
Instruction *InstCombinerImpl::foldICmpAndShift(ICmpInst &Cmp,
1641
                                                BinaryOperator *And,
1642
                                                const APInt &C1,
1643
5.72k
                                                const APInt &C2) {
1644
5.72k
  BinaryOperator *Shift = dyn_cast<BinaryOperator>(And->getOperand(0));
1645
5.72k
  if (!Shift || !Shift->isShift())
1646
5.34k
    return nullptr;
1647
1648
  // If this is: (X >> C3) & C2 != C1 (where any shift and any compare could
1649
  // exist), turn it into (X & (C2 << C3)) != (C1 << C3). This happens a LOT in
1650
  // code produced by the clang front-end, for bitfield access.
1651
  // This seemingly simple opportunity to fold away a shift turns out to be
1652
  // rather complicated. See PR17827 for details.
1653
387
  unsigned ShiftOpcode = Shift->getOpcode();
1654
387
  bool IsShl = ShiftOpcode == Instruction::Shl;
1655
387
  const APInt *C3;
1656
387
  if (match(Shift->getOperand(1), m_APInt(C3))) {
1657
326
    APInt NewAndCst, NewCmpCst;
1658
326
    bool AnyCmpCstBitsShiftedOut;
1659
326
    if (ShiftOpcode == Instruction::Shl) {
1660
      // For a left shift, we can fold if the comparison is not signed. We can
1661
      // also fold a signed comparison if the mask value and comparison value
1662
      // are not negative. These constraints may not be obvious, but we can
1663
      // prove that they are correct using an SMT solver.
1664
16
      if (Cmp.isSigned() && (C2.isNegative() || C1.isNegative()))
1665
10
        return nullptr;
1666
1667
6
      NewCmpCst = C1.lshr(*C3);
1668
6
      NewAndCst = C2.lshr(*C3);
1669
6
      AnyCmpCstBitsShiftedOut = NewCmpCst.shl(*C3) != C1;
1670
310
    } else if (ShiftOpcode == Instruction::LShr) {
1671
      // For a logical right shift, we can fold if the comparison is not signed.
1672
      // We can also fold a signed comparison if the shifted mask value and the
1673
      // shifted comparison value are not negative. These constraints may not be
1674
      // obvious, but we can prove that they are correct using an SMT solver.
1675
278
      NewCmpCst = C1.shl(*C3);
1676
278
      NewAndCst = C2.shl(*C3);
1677
278
      AnyCmpCstBitsShiftedOut = NewCmpCst.lshr(*C3) != C1;
1678
278
      if (Cmp.isSigned() && (NewAndCst.isNegative() || NewCmpCst.isNegative()))
1679
0
        return nullptr;
1680
278
    } else {
1681
      // For an arithmetic shift, check that both constants don't use (in a
1682
      // signed sense) the top bits being shifted out.
1683
32
      assert(ShiftOpcode == Instruction::AShr && "Unknown shift opcode");
1684
0
      NewCmpCst = C1.shl(*C3);
1685
32
      NewAndCst = C2.shl(*C3);
1686
32
      AnyCmpCstBitsShiftedOut = NewCmpCst.ashr(*C3) != C1;
1687
32
      if (NewAndCst.ashr(*C3) != C2)
1688
8
        return nullptr;
1689
32
    }
1690
1691
308
    if (AnyCmpCstBitsShiftedOut) {
1692
      // If we shifted bits out, the fold is not going to work out. As a
1693
      // special case, check to see if this means that the result is always
1694
      // true or false now.
1695
3
      if (Cmp.getPredicate() == ICmpInst::ICMP_EQ)
1696
1
        return replaceInstUsesWith(Cmp, ConstantInt::getFalse(Cmp.getType()));
1697
2
      if (Cmp.getPredicate() == ICmpInst::ICMP_NE)
1698
1
        return replaceInstUsesWith(Cmp, ConstantInt::getTrue(Cmp.getType()));
1699
305
    } else {
1700
305
      Value *NewAnd = Builder.CreateAnd(
1701
305
          Shift->getOperand(0), ConstantInt::get(And->getType(), NewAndCst));
1702
305
      return new ICmpInst(Cmp.getPredicate(),
1703
305
          NewAnd, ConstantInt::get(And->getType(), NewCmpCst));
1704
305
    }
1705
308
  }
1706
1707
  // Turn ((X >> Y) & C2) == 0  into  (X & (C2 << Y)) == 0.  The latter is
1708
  // preferable because it allows the C2 << Y expression to be hoisted out of a
1709
  // loop if Y is invariant and X is not.
1710
62
  if (Shift->hasOneUse() && C1.isZero() && Cmp.isEquality() &&
1711
62
      !Shift->isArithmeticShift() && !isa<Constant>(Shift->getOperand(0))) {
1712
    // Compute C2 << Y.
1713
8
    Value *NewShift =
1714
8
        IsShl ? Builder.CreateLShr(And->getOperand(1), Shift->getOperand(1))
1715
8
              : Builder.CreateShl(And->getOperand(1), Shift->getOperand(1));
1716
1717
    // Compute X & (C2 << Y).
1718
8
    Value *NewAnd = Builder.CreateAnd(Shift->getOperand(0), NewShift);
1719
8
    return replaceOperand(Cmp, 0, NewAnd);
1720
8
  }
1721
1722
54
  return nullptr;
1723
62
}
1724
1725
/// Fold icmp (and X, C2), C1.
1726
Instruction *InstCombinerImpl::foldICmpAndConstConst(ICmpInst &Cmp,
1727
                                                     BinaryOperator *And,
1728
7.86k
                                                     const APInt &C1) {
1729
7.86k
  bool isICMP_NE = Cmp.getPredicate() == ICmpInst::ICMP_NE;
1730
1731
  // For vectors: icmp ne (and X, 1), 0 --> trunc X to N x i1
1732
  // TODO: We canonicalize to the longer form for scalars because we have
1733
  // better analysis/folds for icmp, and codegen may be better with icmp.
1734
7.86k
  if (isICMP_NE && Cmp.getType()->isVectorTy() && C1.isZero() &&
1735
7.86k
      match(And->getOperand(1), m_One()))
1736
67
    return new TruncInst(And->getOperand(0), Cmp.getType());
1737
1738
7.80k
  const APInt *C2;
1739
7.80k
  Value *X;
1740
7.80k
  if (!match(And, m_And(m_Value(X), m_APInt(C2))))
1741
1.30k
    return nullptr;
1742
1743
  // Don't perform the following transforms if the AND has multiple uses
1744
6.49k
  if (!And->hasOneUse())
1745
660
    return nullptr;
1746
1747
5.83k
  if (Cmp.isEquality() && C1.isZero()) {
1748
    // Restrict this fold to single-use 'and' (PR10267).
1749
    // Replace (and X, (1 << size(X)-1) != 0) with X s< 0
1750
3.83k
    if (C2->isSignMask()) {
1751
46
      Constant *Zero = Constant::getNullValue(X->getType());
1752
46
      auto NewPred = isICMP_NE ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE;
1753
46
      return new ICmpInst(NewPred, X, Zero);
1754
46
    }
1755
1756
3.79k
    APInt NewC2 = *C2;
1757
3.79k
    KnownBits Know = computeKnownBits(And->getOperand(0), 0, And);
1758
    // Set high zeros of C2 to allow matching negated power-of-2.
1759
3.79k
    NewC2 = *C2 | APInt::getHighBitsSet(C2->getBitWidth(),
1760
3.79k
                                        Know.countMinLeadingZeros());
1761
1762
    // Restrict this fold only for single-use 'and' (PR10267).
1763
    // ((%x & C) == 0) --> %x u< (-C)  iff (-C) is power of two.
1764
3.79k
    if (NewC2.isNegatedPowerOf2()) {
1765
54
      Constant *NegBOC = ConstantInt::get(And->getType(), -NewC2);
1766
54
      auto NewPred = isICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
1767
54
      return new ICmpInst(NewPred, X, NegBOC);
1768
54
    }
1769
3.79k
  }
1770
1771
  // If the LHS is an 'and' of a truncate and we can widen the and/compare to
1772
  // the input width without changing the value produced, eliminate the cast:
1773
  //
1774
  // icmp (and (trunc W), C2), C1 -> icmp (and W, C2'), C1'
1775
  //
1776
  // We can do this transformation if the constants do not have their sign bits
1777
  // set or if it is an equality comparison. Extending a relational comparison
1778
  // when we're checking the sign bit would not work.
1779
5.73k
  Value *W;
1780
5.73k
  if (match(And->getOperand(0), m_OneUse(m_Trunc(m_Value(W)))) &&
1781
5.73k
      (Cmp.isEquality() || (!C1.isNegative() && !C2->isNegative()))) {
1782
    // TODO: Is this a good transform for vectors? Wider types may reduce
1783
    // throughput. Should this transform be limited (even for scalars) by using
1784
    // shouldChangeType()?
1785
10
    if (!Cmp.getType()->isVectorTy()) {
1786
10
      Type *WideType = W->getType();
1787
10
      unsigned WideScalarBits = WideType->getScalarSizeInBits();
1788
10
      Constant *ZextC1 = ConstantInt::get(WideType, C1.zext(WideScalarBits));
1789
10
      Constant *ZextC2 = ConstantInt::get(WideType, C2->zext(WideScalarBits));
1790
10
      Value *NewAnd = Builder.CreateAnd(W, ZextC2, And->getName());
1791
10
      return new ICmpInst(Cmp.getPredicate(), NewAnd, ZextC1);
1792
10
    }
1793
10
  }
1794
1795
5.72k
  if (Instruction *I = foldICmpAndShift(Cmp, And, C1, *C2))
1796
315
    return I;
1797
1798
  // (icmp pred (and (or (lshr A, B), A), 1), 0) -->
1799
  // (icmp pred (and A, (or (shl 1, B), 1), 0))
1800
  //
1801
  // iff pred isn't signed
1802
5.41k
  if (!Cmp.isSigned() && C1.isZero() && And->getOperand(0)->hasOneUse() &&
1803
5.41k
      match(And->getOperand(1), m_One())) {
1804
626
    Constant *One = cast<Constant>(And->getOperand(1));
1805
626
    Value *Or = And->getOperand(0);
1806
626
    Value *A, *B, *LShr;
1807
626
    if (match(Or, m_Or(m_Value(LShr), m_Value(A))) &&
1808
626
        match(LShr, m_LShr(m_Specific(A), m_Value(B)))) {
1809
3
      unsigned UsesRemoved = 0;
1810
3
      if (And->hasOneUse())
1811
3
        ++UsesRemoved;
1812
3
      if (Or->hasOneUse())
1813
3
        ++UsesRemoved;
1814
3
      if (LShr->hasOneUse())
1815
3
        ++UsesRemoved;
1816
1817
      // Compute A & ((1 << B) | 1)
1818
3
      unsigned RequireUsesRemoved = match(B, m_ImmConstant()) ? 1 : 3;
1819
3
      if (UsesRemoved >= RequireUsesRemoved) {
1820
3
        Value *NewOr =
1821
3
            Builder.CreateOr(Builder.CreateShl(One, B, LShr->getName(),
1822
3
                                               /*HasNUW=*/true),
1823
3
                             One, Or->getName());
1824
3
        Value *NewAnd = Builder.CreateAnd(A, NewOr, And->getName());
1825
3
        return replaceOperand(Cmp, 0, NewAnd);
1826
3
      }
1827
3
    }
1828
626
  }
1829
1830
5.41k
  return nullptr;
1831
5.41k
}
1832
1833
/// Fold icmp (and X, Y), C.
1834
Instruction *InstCombinerImpl::foldICmpAndConstant(ICmpInst &Cmp,
1835
                                                   BinaryOperator *And,
1836
7.86k
                                                   const APInt &C) {
1837
7.86k
  if (Instruction *I = foldICmpAndConstConst(Cmp, And, C))
1838
495
    return I;
1839
1840
7.37k
  const ICmpInst::Predicate Pred = Cmp.getPredicate();
1841
7.37k
  bool TrueIfNeg;
1842
7.37k
  if (isSignBitCheck(Pred, C, TrueIfNeg)) {
1843
    // ((X - 1) & ~X) <  0 --> X == 0
1844
    // ((X - 1) & ~X) >= 0 --> X != 0
1845
28
    Value *X;
1846
28
    if (match(And->getOperand(0), m_Add(m_Value(X), m_AllOnes())) &&
1847
28
        match(And->getOperand(1), m_Not(m_Specific(X)))) {
1848
3
      auto NewPred = TrueIfNeg ? CmpInst::ICMP_EQ : CmpInst::ICMP_NE;
1849
3
      return new ICmpInst(NewPred, X, ConstantInt::getNullValue(X->getType()));
1850
3
    }
1851
    // (X & X) <  0 --> X == MinSignedC
1852
    // (X & X) > -1 --> X != MinSignedC
1853
25
    if (match(And, m_c_And(m_Neg(m_Value(X)), m_Deferred(X)))) {
1854
0
      Constant *MinSignedC = ConstantInt::get(
1855
0
          X->getType(),
1856
0
          APInt::getSignedMinValue(X->getType()->getScalarSizeInBits()));
1857
0
      auto NewPred = TrueIfNeg ? CmpInst::ICMP_EQ : CmpInst::ICMP_NE;
1858
0
      return new ICmpInst(NewPred, X, MinSignedC);
1859
0
    }
1860
25
  }
1861
1862
  // TODO: These all require that Y is constant too, so refactor with the above.
1863
1864
  // Try to optimize things like "A[i] & 42 == 0" to index computations.
1865
7.36k
  Value *X = And->getOperand(0);
1866
7.36k
  Value *Y = And->getOperand(1);
1867
7.36k
  if (auto *C2 = dyn_cast<ConstantInt>(Y))
1868
5.68k
    if (auto *LI = dyn_cast<LoadInst>(X))
1869
468
      if (auto *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0)))
1870
62
        if (auto *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
1871
10
          if (Instruction *Res =
1872
10
                  foldCmpLoadFromIndexedGlobal(LI, GEP, GV, Cmp, C2))
1873
7
            return Res;
1874
1875
7.36k
  if (!Cmp.isEquality())
1876
234
    return nullptr;
1877
1878
  // X & -C == -C -> X >  u ~C
1879
  // X & -C != -C -> X <= u ~C
1880
  //   iff C is a power of 2
1881
7.12k
  if (Cmp.getOperand(1) == Y && C.isNegatedPowerOf2()) {
1882
26
    auto NewPred =
1883
26
        Pred == CmpInst::ICMP_EQ ? CmpInst::ICMP_UGT : CmpInst::ICMP_ULE;
1884
26
    return new ICmpInst(NewPred, X, SubOne(cast<Constant>(Cmp.getOperand(1))));
1885
26
  }
1886
1887
  // If we are testing the intersection of 2 select-of-nonzero-constants with no
1888
  // common bits set, it's the same as checking if exactly one select condition
1889
  // is set:
1890
  // ((A ? TC : FC) & (B ? TC : FC)) == 0 --> xor A, B
1891
  // ((A ? TC : FC) & (B ? TC : FC)) != 0 --> not(xor A, B)
1892
  // TODO: Generalize for non-constant values.
1893
  // TODO: Handle signed/unsigned predicates.
1894
  // TODO: Handle other bitwise logic connectors.
1895
  // TODO: Extend to handle a non-zero compare constant.
1896
7.10k
  if (C.isZero() && (Pred == CmpInst::ICMP_EQ || And->hasOneUse())) {
1897
5.03k
    assert(Cmp.isEquality() && "Not expecting non-equality predicates");
1898
0
    Value *A, *B;
1899
5.03k
    const APInt *TC, *FC;
1900
5.03k
    if (match(X, m_Select(m_Value(A), m_APInt(TC), m_APInt(FC))) &&
1901
5.03k
        match(Y,
1902
0
              m_Select(m_Value(B), m_SpecificInt(*TC), m_SpecificInt(*FC))) &&
1903
5.03k
        !TC->isZero() && !FC->isZero() && !TC->intersects(*FC)) {
1904
0
      Value *R = Builder.CreateXor(A, B);
1905
0
      if (Pred == CmpInst::ICMP_NE)
1906
0
        R = Builder.CreateNot(R);
1907
0
      return replaceInstUsesWith(Cmp, R);
1908
0
    }
1909
5.03k
  }
1910
1911
  // ((zext i1 X) & Y) == 0 --> !((trunc Y) & X)
1912
  // ((zext i1 X) & Y) != 0 -->  ((trunc Y) & X)
1913
  // ((zext i1 X) & Y) == 1 -->  ((trunc Y) & X)
1914
  // ((zext i1 X) & Y) != 1 --> !((trunc Y) & X)
1915
7.10k
  if (match(And, m_OneUse(m_c_And(m_OneUse(m_ZExt(m_Value(X))), m_Value(Y)))) &&
1916
7.10k
      X->getType()->isIntOrIntVectorTy(1) && (C.isZero() || C.isOne())) {
1917
0
    Value *TruncY = Builder.CreateTrunc(Y, X->getType());
1918
0
    if (C.isZero() ^ (Pred == CmpInst::ICMP_NE)) {
1919
0
      Value *And = Builder.CreateAnd(TruncY, X);
1920
0
      return BinaryOperator::CreateNot(And);
1921
0
    }
1922
0
    return BinaryOperator::CreateAnd(TruncY, X);
1923
0
  }
1924
1925
7.10k
  return nullptr;
1926
7.10k
}
1927
1928
/// Fold icmp eq/ne (or (xor/sub (X1, X2), xor/sub (X3, X4))), 0.
1929
static Value *foldICmpOrXorSubChain(ICmpInst &Cmp, BinaryOperator *Or,
1930
62
                                    InstCombiner::BuilderTy &Builder) {
1931
  // Are we using xors or subs to bitwise check for a pair or pairs of
1932
  // (in)equalities? Convert to a shorter form that has more potential to be
1933
  // folded even further.
1934
  // ((X1 ^/- X2) || (X3 ^/- X4)) == 0 --> (X1 == X2) && (X3 == X4)
1935
  // ((X1 ^/- X2) || (X3 ^/- X4)) != 0 --> (X1 != X2) || (X3 != X4)
1936
  // ((X1 ^/- X2) || (X3 ^/- X4) || (X5 ^/- X6)) == 0 -->
1937
  // (X1 == X2) && (X3 == X4) && (X5 == X6)
1938
  // ((X1 ^/- X2) || (X3 ^/- X4) || (X5 ^/- X6)) != 0 -->
1939
  // (X1 != X2) || (X3 != X4) || (X5 != X6)
1940
62
  SmallVector<std::pair<Value *, Value *>, 2> CmpValues;
1941
62
  SmallVector<Value *, 16> WorkList(1, Or);
1942
1943
124
  while (!WorkList.empty()) {
1944
124
    auto MatchOrOperatorArgument = [&](Value *OrOperatorArgument) {
1945
124
      Value *Lhs, *Rhs;
1946
1947
124
      if (match(OrOperatorArgument,
1948
124
                m_OneUse(m_Xor(m_Value(Lhs), m_Value(Rhs))))) {
1949
6
        CmpValues.emplace_back(Lhs, Rhs);
1950
6
        return;
1951
6
      }
1952
1953
118
      if (match(OrOperatorArgument,
1954
118
                m_OneUse(m_Sub(m_Value(Lhs), m_Value(Rhs))))) {
1955
1
        CmpValues.emplace_back(Lhs, Rhs);
1956
1
        return;
1957
1
      }
1958
1959
117
      WorkList.push_back(OrOperatorArgument);
1960
117
    };
1961
1962
121
    Value *CurrentValue = WorkList.pop_back_val();
1963
121
    Value *OrOperatorLhs, *OrOperatorRhs;
1964
1965
121
    if (!match(CurrentValue,
1966
121
               m_Or(m_Value(OrOperatorLhs), m_Value(OrOperatorRhs)))) {
1967
59
      return nullptr;
1968
59
    }
1969
1970
62
    MatchOrOperatorArgument(OrOperatorRhs);
1971
62
    MatchOrOperatorArgument(OrOperatorLhs);
1972
62
  }
1973
1974
3
  ICmpInst::Predicate Pred = Cmp.getPredicate();
1975
3
  auto BOpc = Pred == CmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
1976
3
  Value *LhsCmp = Builder.CreateICmp(Pred, CmpValues.rbegin()->first,
1977
3
                                     CmpValues.rbegin()->second);
1978
1979
6
  for (auto It = CmpValues.rbegin() + 1; It != CmpValues.rend(); ++It) {
1980
3
    Value *RhsCmp = Builder.CreateICmp(Pred, It->first, It->second);
1981
3
    LhsCmp = Builder.CreateBinOp(BOpc, LhsCmp, RhsCmp);
1982
3
  }
1983
1984
3
  return LhsCmp;
1985
62
}
1986
1987
/// Fold icmp (or X, Y), C.
1988
Instruction *InstCombinerImpl::foldICmpOrConstant(ICmpInst &Cmp,
1989
                                                  BinaryOperator *Or,
1990
393
                                                  const APInt &C) {
1991
393
  ICmpInst::Predicate Pred = Cmp.getPredicate();
1992
393
  if (C.isOne()) {
1993
    // icmp slt signum(V) 1 --> icmp slt V, 1
1994
27
    Value *V = nullptr;
1995
27
    if (Pred == ICmpInst::ICMP_SLT && match(Or, m_Signum(m_Value(V))))
1996
0
      return new ICmpInst(ICmpInst::ICMP_SLT, V,
1997
0
                          ConstantInt::get(V->getType(), 1));
1998
27
  }
1999
2000
393
  Value *OrOp0 = Or->getOperand(0), *OrOp1 = Or->getOperand(1);
2001
393
  const APInt *MaskC;
2002
393
  if (match(OrOp1, m_APInt(MaskC)) && Cmp.isEquality()) {
2003
83
    if (*MaskC == C && (C + 1).isPowerOf2()) {
2004
      // X | C == C --> X <=u C
2005
      // X | C != C --> X  >u C
2006
      //   iff C+1 is a power of 2 (C is a bitmask of the low bits)
2007
19
      Pred = (Pred == CmpInst::ICMP_EQ) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
2008
19
      return new ICmpInst(Pred, OrOp0, OrOp1);
2009
19
    }
2010
2011
    // More general: canonicalize 'equality with set bits mask' to
2012
    // 'equality with clear bits mask'.
2013
    // (X | MaskC) == C --> (X & ~MaskC) == C ^ MaskC
2014
    // (X | MaskC) != C --> (X & ~MaskC) != C ^ MaskC
2015
64
    if (Or->hasOneUse()) {
2016
28
      Value *And = Builder.CreateAnd(OrOp0, ~(*MaskC));
2017
28
      Constant *NewC = ConstantInt::get(Or->getType(), C ^ (*MaskC));
2018
28
      return new ICmpInst(Pred, And, NewC);
2019
28
    }
2020
64
  }
2021
2022
  // (X | (X-1)) s<  0 --> X s< 1
2023
  // (X | (X-1)) s> -1 --> X s> 0
2024
346
  Value *X;
2025
346
  bool TrueIfSigned;
2026
346
  if (isSignBitCheck(Pred, C, TrueIfSigned) &&
2027
346
      match(Or, m_c_Or(m_Add(m_Value(X), m_AllOnes()), m_Deferred(X)))) {
2028
0
    auto NewPred = TrueIfSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGT;
2029
0
    Constant *NewC = ConstantInt::get(X->getType(), TrueIfSigned ? 1 : 0);
2030
0
    return new ICmpInst(NewPred, X, NewC);
2031
0
  }
2032
2033
346
  const APInt *OrC;
2034
  // icmp(X | OrC, C) --> icmp(X, 0)
2035
346
  if (C.isNonNegative() && match(Or, m_Or(m_Value(X), m_APInt(OrC)))) {
2036
107
    switch (Pred) {
2037
    // X | OrC s< C --> X s< 0 iff OrC s>= C s>= 0
2038
11
    case ICmpInst::ICMP_SLT:
2039
    // X | OrC s>= C --> X s>= 0 iff OrC s>= C s>= 0
2040
11
    case ICmpInst::ICMP_SGE:
2041
11
      if (OrC->sge(C))
2042
10
        return new ICmpInst(Pred, X, ConstantInt::getNullValue(X->getType()));
2043
1
      break;
2044
    // X | OrC s<= C --> X s< 0 iff OrC s> C s>= 0
2045
1
    case ICmpInst::ICMP_SLE:
2046
    // X | OrC s> C --> X s>= 0 iff OrC s> C s>= 0
2047
14
    case ICmpInst::ICMP_SGT:
2048
14
      if (OrC->sgt(C))
2049
13
        return new ICmpInst(ICmpInst::getFlippedStrictnessPredicate(Pred), X,
2050
13
                            ConstantInt::getNullValue(X->getType()));
2051
1
      break;
2052
82
    default:
2053
82
      break;
2054
107
    }
2055
107
  }
2056
2057
323
  if (!Cmp.isEquality() || !C.isZero() || !Or->hasOneUse())
2058
259
    return nullptr;
2059
2060
64
  Value *P, *Q;
2061
64
  if (match(Or, m_Or(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Value(Q))))) {
2062
    // Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0
2063
    // -> and (icmp eq P, null), (icmp eq Q, null).
2064
2
    Value *CmpP =
2065
2
        Builder.CreateICmp(Pred, P, ConstantInt::getNullValue(P->getType()));
2066
2
    Value *CmpQ =
2067
2
        Builder.CreateICmp(Pred, Q, ConstantInt::getNullValue(Q->getType()));
2068
2
    auto BOpc = Pred == CmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
2069
2
    return BinaryOperator::Create(BOpc, CmpP, CmpQ);
2070
2
  }
2071
2072
62
  if (Value *V = foldICmpOrXorSubChain(Cmp, Or, Builder))
2073
3
    return replaceInstUsesWith(Cmp, V);
2074
2075
59
  return nullptr;
2076
62
}
2077
2078
/// Fold icmp (mul X, Y), C.
2079
Instruction *InstCombinerImpl::foldICmpMulConstant(ICmpInst &Cmp,
2080
                                                   BinaryOperator *Mul,
2081
1.20k
                                                   const APInt &C) {
2082
1.20k
  ICmpInst::Predicate Pred = Cmp.getPredicate();
2083
1.20k
  Type *MulTy = Mul->getType();
2084
1.20k
  Value *X = Mul->getOperand(0);
2085
2086
  // If there's no overflow:
2087
  // X * X == 0 --> X == 0
2088
  // X * X != 0 --> X != 0
2089
1.20k
  if (Cmp.isEquality() && C.isZero() && X == Mul->getOperand(1) &&
2090
1.20k
      (Mul->hasNoUnsignedWrap() || Mul->hasNoSignedWrap()))
2091
33
    return new ICmpInst(Pred, X, ConstantInt::getNullValue(MulTy));
2092
2093
1.17k
  const APInt *MulC;
2094
1.17k
  if (!match(Mul->getOperand(1), m_APInt(MulC)))
2095
645
    return nullptr;
2096
2097
  // If this is a test of the sign bit and the multiply is sign-preserving with
2098
  // a constant operand, use the multiply LHS operand instead:
2099
  // (X * +MulC) < 0 --> X < 0
2100
  // (X * -MulC) < 0 --> X > 0
2101
530
  if (isSignTest(Pred, C) && Mul->hasNoSignedWrap()) {
2102
15
    if (MulC->isNegative())
2103
10
      Pred = ICmpInst::getSwappedPredicate(Pred);
2104
15
    return new ICmpInst(Pred, X, ConstantInt::getNullValue(MulTy));
2105
15
  }
2106
2107
515
  if (MulC->isZero())
2108
0
    return nullptr;
2109
2110
  // If the multiply does not wrap or the constant is odd, try to divide the
2111
  // compare constant by the multiplication factor.
2112
515
  if (Cmp.isEquality()) {
2113
    // (mul nsw X, MulC) eq/ne C --> X eq/ne C /s MulC
2114
128
    if (Mul->hasNoSignedWrap() && C.srem(*MulC).isZero()) {
2115
3
      Constant *NewC = ConstantInt::get(MulTy, C.sdiv(*MulC));
2116
3
      return new ICmpInst(Pred, X, NewC);
2117
3
    }
2118
2119
    // C % MulC == 0 is weaker than we could use if MulC is odd because it
2120
    // correct to transform if MulC * N == C including overflow. I.e with i8
2121
    // (icmp eq (mul X, 5), 101) -> (icmp eq X, 225) but since 101 % 5 != 0, we
2122
    // miss that case.
2123
125
    if (C.urem(*MulC).isZero()) {
2124
      // (mul nuw X, MulC) eq/ne C --> X eq/ne C /u MulC
2125
      // (mul X, OddC) eq/ne N * C --> X eq/ne N
2126
33
      if ((*MulC & 1).isOne() || Mul->hasNoUnsignedWrap()) {
2127
10
        Constant *NewC = ConstantInt::get(MulTy, C.udiv(*MulC));
2128
10
        return new ICmpInst(Pred, X, NewC);
2129
10
      }
2130
33
    }
2131
125
  }
2132
2133
  // With a matching no-overflow guarantee, fold the constants:
2134
  // (X * MulC) < C --> X < (C / MulC)
2135
  // (X * MulC) > C --> X > (C / MulC)
2136
  // TODO: Assert that Pred is not equal to SGE, SLE, UGE, ULE?
2137
502
  Constant *NewC = nullptr;
2138
502
  if (Mul->hasNoSignedWrap() && ICmpInst::isSigned(Pred)) {
2139
    // MININT / -1 --> overflow.
2140
3
    if (C.isMinSignedValue() && MulC->isAllOnes())
2141
0
      return nullptr;
2142
3
    if (MulC->isNegative())
2143
1
      Pred = ICmpInst::getSwappedPredicate(Pred);
2144
2145
3
    if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGE) {
2146
1
      NewC = ConstantInt::get(
2147
1
          MulTy, APIntOps::RoundingSDiv(C, *MulC, APInt::Rounding::UP));
2148
2
    } else {
2149
2
      assert((Pred == ICmpInst::ICMP_SLE || Pred == ICmpInst::ICMP_SGT) &&
2150
2
             "Unexpected predicate");
2151
0
      NewC = ConstantInt::get(
2152
2
          MulTy, APIntOps::RoundingSDiv(C, *MulC, APInt::Rounding::DOWN));
2153
2
    }
2154
499
  } else if (Mul->hasNoUnsignedWrap() && ICmpInst::isUnsigned(Pred)) {
2155
7
    if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE) {
2156
3
      NewC = ConstantInt::get(
2157
3
          MulTy, APIntOps::RoundingUDiv(C, *MulC, APInt::Rounding::UP));
2158
4
    } else {
2159
4
      assert((Pred == ICmpInst::ICMP_ULE || Pred == ICmpInst::ICMP_UGT) &&
2160
4
             "Unexpected predicate");
2161
0
      NewC = ConstantInt::get(
2162
4
          MulTy, APIntOps::RoundingUDiv(C, *MulC, APInt::Rounding::DOWN));
2163
4
    }
2164
7
  }
2165
2166
502
  return NewC ? new ICmpInst(Pred, X, NewC) : nullptr;
2167
502
}
2168
2169
/// Fold icmp (shl 1, Y), C.
2170
static Instruction *foldICmpShlOne(ICmpInst &Cmp, Instruction *Shl,
2171
228
                                   const APInt &C) {
2172
228
  Value *Y;
2173
228
  if (!match(Shl, m_Shl(m_One(), m_Value(Y))))
2174
182
    return nullptr;
2175
2176
46
  Type *ShiftType = Shl->getType();
2177
46
  unsigned TypeBits = C.getBitWidth();
2178
46
  bool CIsPowerOf2 = C.isPowerOf2();
2179
46
  ICmpInst::Predicate Pred = Cmp.getPredicate();
2180
46
  if (Cmp.isUnsigned()) {
2181
    // (1 << Y) pred C -> Y pred Log2(C)
2182
26
    if (!CIsPowerOf2) {
2183
      // (1 << Y) <  30 -> Y <= 4
2184
      // (1 << Y) <= 30 -> Y <= 4
2185
      // (1 << Y) >= 30 -> Y >  4
2186
      // (1 << Y) >  30 -> Y >  4
2187
13
      if (Pred == ICmpInst::ICMP_ULT)
2188
7
        Pred = ICmpInst::ICMP_ULE;
2189
6
      else if (Pred == ICmpInst::ICMP_UGE)
2190
0
        Pred = ICmpInst::ICMP_UGT;
2191
13
    }
2192
2193
26
    unsigned CLog2 = C.logBase2();
2194
26
    return new ICmpInst(Pred, Y, ConstantInt::get(ShiftType, CLog2));
2195
26
  } else if (Cmp.isSigned()) {
2196
20
    Constant *BitWidthMinusOne = ConstantInt::get(ShiftType, TypeBits - 1);
2197
    // (1 << Y) >  0 -> Y != 31
2198
    // (1 << Y) >  C -> Y != 31 if C is negative.
2199
20
    if (Pred == ICmpInst::ICMP_SGT && C.sle(0))
2200
5
      return new ICmpInst(ICmpInst::ICMP_NE, Y, BitWidthMinusOne);
2201
2202
    // (1 << Y) <  0 -> Y == 31
2203
    // (1 << Y) <  1 -> Y == 31
2204
    // (1 << Y) <  C -> Y == 31 if C is negative and not signed min.
2205
    // Exclude signed min by subtracting 1 and lower the upper bound to 0.
2206
15
    if (Pred == ICmpInst::ICMP_SLT && (C-1).sle(0))
2207
7
      return new ICmpInst(ICmpInst::ICMP_EQ, Y, BitWidthMinusOne);
2208
15
  }
2209
2210
8
  return nullptr;
2211
46
}
2212
2213
/// Fold icmp (shl X, Y), C.
2214
Instruction *InstCombinerImpl::foldICmpShlConstant(ICmpInst &Cmp,
2215
                                                   BinaryOperator *Shl,
2216
1.12k
                                                   const APInt &C) {
2217
1.12k
  const APInt *ShiftVal;
2218
1.12k
  if (Cmp.isEquality() && match(Shl->getOperand(0), m_APInt(ShiftVal)))
2219
44
    return foldICmpShlConstConst(Cmp, Shl->getOperand(1), C, *ShiftVal);
2220
2221
1.08k
  ICmpInst::Predicate Pred = Cmp.getPredicate();
2222
  // (icmp pred (shl nuw&nsw X, Y), Csle0)
2223
  //      -> (icmp pred X, Csle0)
2224
  //
2225
  // The idea is the nuw/nsw essentially freeze the sign bit for the shift op
2226
  // so X's must be what is used.
2227
1.08k
  if (C.sle(0) && Shl->hasNoUnsignedWrap() && Shl->hasNoSignedWrap())
2228
54
    return new ICmpInst(Pred, Shl->getOperand(0), Cmp.getOperand(1));
2229
2230
  // (icmp eq/ne (shl nuw|nsw X, Y), 0)
2231
  //      -> (icmp eq/ne X, 0)
2232
1.03k
  if (ICmpInst::isEquality(Pred) && C.isZero() &&
2233
1.03k
      (Shl->hasNoUnsignedWrap() || Shl->hasNoSignedWrap()))
2234
61
    return new ICmpInst(Pred, Shl->getOperand(0), Cmp.getOperand(1));
2235
2236
  // (icmp slt (shl nsw X, Y), 0/1)
2237
  //      -> (icmp slt X, 0/1)
2238
  // (icmp sgt (shl nsw X, Y), 0/-1)
2239
  //      -> (icmp sgt X, 0/-1)
2240
  //
2241
  // NB: sge/sle with a constant will canonicalize to sgt/slt.
2242
970
  if (Shl->hasNoSignedWrap() &&
2243
970
      (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLT))
2244
63
    if (C.isZero() || (Pred == ICmpInst::ICMP_SGT ? C.isAllOnes() : C.isOne()))
2245
42
      return new ICmpInst(Pred, Shl->getOperand(0), Cmp.getOperand(1));
2246
2247
928
  const APInt *ShiftAmt;
2248
928
  if (!match(Shl->getOperand(1), m_APInt(ShiftAmt)))
2249
228
    return foldICmpShlOne(Cmp, Shl, C);
2250
2251
  // Check that the shift amount is in range. If not, don't perform undefined
2252
  // shifts. When the shift is visited, it will be simplified.
2253
700
  unsigned TypeBits = C.getBitWidth();
2254
700
  if (ShiftAmt->uge(TypeBits))
2255
0
    return nullptr;
2256
2257
700
  Value *X = Shl->getOperand(0);
2258
700
  Type *ShType = Shl->getType();
2259
2260
  // NSW guarantees that we are only shifting out sign bits from the high bits,
2261
  // so we can ASHR the compare constant without needing a mask and eliminate
2262
  // the shift.
2263
700
  if (Shl->hasNoSignedWrap()) {
2264
106
    if (Pred == ICmpInst::ICMP_SGT) {
2265
      // icmp Pred (shl nsw X, ShiftAmt), C --> icmp Pred X, (C >>s ShiftAmt)
2266
14
      APInt ShiftedC = C.ashr(*ShiftAmt);
2267
14
      return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
2268
14
    }
2269
92
    if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) &&
2270
92
        C.ashr(*ShiftAmt).shl(*ShiftAmt) == C) {
2271
49
      APInt ShiftedC = C.ashr(*ShiftAmt);
2272
49
      return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
2273
49
    }
2274
43
    if (Pred == ICmpInst::ICMP_SLT) {
2275
      // SLE is the same as above, but SLE is canonicalized to SLT, so convert:
2276
      // (X << S) <=s C is equiv to X <=s (C >> S) for all C
2277
      // (X << S) <s (C + 1) is equiv to X <s (C >> S) + 1 if C <s SMAX
2278
      // (X << S) <s C is equiv to X <s ((C - 1) >> S) + 1 if C >s SMIN
2279
7
      assert(!C.isMinSignedValue() && "Unexpected icmp slt");
2280
0
      APInt ShiftedC = (C - 1).ashr(*ShiftAmt) + 1;
2281
7
      return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
2282
7
    }
2283
43
  }
2284
2285
  // NUW guarantees that we are only shifting out zero bits from the high bits,
2286
  // so we can LSHR the compare constant without needing a mask and eliminate
2287
  // the shift.
2288
630
  if (Shl->hasNoUnsignedWrap()) {
2289
161
    if (Pred == ICmpInst::ICMP_UGT) {
2290
      // icmp Pred (shl nuw X, ShiftAmt), C --> icmp Pred X, (C >>u ShiftAmt)
2291
29
      APInt ShiftedC = C.lshr(*ShiftAmt);
2292
29
      return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
2293
29
    }
2294
132
    if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) &&
2295
132
        C.lshr(*ShiftAmt).shl(*ShiftAmt) == C) {
2296
7
      APInt ShiftedC = C.lshr(*ShiftAmt);
2297
7
      return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
2298
7
    }
2299
125
    if (Pred == ICmpInst::ICMP_ULT) {
2300
      // ULE is the same as above, but ULE is canonicalized to ULT, so convert:
2301
      // (X << S) <=u C is equiv to X <=u (C >> S) for all C
2302
      // (X << S) <u (C + 1) is equiv to X <u (C >> S) + 1 if C <u ~0u
2303
      // (X << S) <u C is equiv to X <u ((C - 1) >> S) + 1 if C >u 0
2304
32
      assert(C.ugt(0) && "ult 0 should have been eliminated");
2305
0
      APInt ShiftedC = (C - 1).lshr(*ShiftAmt) + 1;
2306
32
      return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
2307
32
    }
2308
125
  }
2309
2310
562
  if (Cmp.isEquality() && Shl->hasOneUse()) {
2311
    // Strength-reduce the shift into an 'and'.
2312
73
    Constant *Mask = ConstantInt::get(
2313
73
        ShType,
2314
73
        APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt->getZExtValue()));
2315
73
    Value *And = Builder.CreateAnd(X, Mask, Shl->getName() + ".mask");
2316
73
    Constant *LShrC = ConstantInt::get(ShType, C.lshr(*ShiftAmt));
2317
73
    return new ICmpInst(Pred, And, LShrC);
2318
73
  }
2319
2320
  // Otherwise, if this is a comparison of the sign bit, simplify to and/test.
2321
489
  bool TrueIfSigned = false;
2322
489
  if (Shl->hasOneUse() && isSignBitCheck(Pred, C, TrueIfSigned)) {
2323
    // (X << 31) <s 0  --> (X & 1) != 0
2324
31
    Constant *Mask = ConstantInt::get(
2325
31
        ShType,
2326
31
        APInt::getOneBitSet(TypeBits, TypeBits - ShiftAmt->getZExtValue() - 1));
2327
31
    Value *And = Builder.CreateAnd(X, Mask, Shl->getName() + ".mask");
2328
31
    return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ,
2329
31
                        And, Constant::getNullValue(ShType));
2330
31
  }
2331
2332
  // Simplify 'shl' inequality test into 'and' equality test.
2333
458
  if (Cmp.isUnsigned() && Shl->hasOneUse()) {
2334
    // (X l<< C2) u<=/u> C1 iff C1+1 is power of two -> X & (~C1 l>> C2) ==/!= 0
2335
20
    if ((C + 1).isPowerOf2() &&
2336
20
        (Pred == ICmpInst::ICMP_ULE || Pred == ICmpInst::ICMP_UGT)) {
2337
1
      Value *And = Builder.CreateAnd(X, (~C).lshr(ShiftAmt->getZExtValue()));
2338
1
      return new ICmpInst(Pred == ICmpInst::ICMP_ULE ? ICmpInst::ICMP_EQ
2339
1
                                                     : ICmpInst::ICMP_NE,
2340
1
                          And, Constant::getNullValue(ShType));
2341
1
    }
2342
    // (X l<< C2) u</u>= C1 iff C1 is power of two -> X & (-C1 l>> C2) ==/!= 0
2343
19
    if (C.isPowerOf2() &&
2344
19
        (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE)) {
2345
7
      Value *And =
2346
7
          Builder.CreateAnd(X, (~(C - 1)).lshr(ShiftAmt->getZExtValue()));
2347
7
      return new ICmpInst(Pred == ICmpInst::ICMP_ULT ? ICmpInst::ICMP_EQ
2348
7
                                                     : ICmpInst::ICMP_NE,
2349
7
                          And, Constant::getNullValue(ShType));
2350
7
    }
2351
19
  }
2352
2353
  // Transform (icmp pred iM (shl iM %v, N), C)
2354
  // -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (C>>N))
2355
  // Transform the shl to a trunc if (trunc (C>>N)) has no loss and M-N.
2356
  // This enables us to get rid of the shift in favor of a trunc that may be
2357
  // free on the target. It has the additional benefit of comparing to a
2358
  // smaller constant that may be more target-friendly.
2359
450
  unsigned Amt = ShiftAmt->getLimitedValue(TypeBits - 1);
2360
450
  if (Shl->hasOneUse() && Amt != 0 && C.countr_zero() >= Amt &&
2361
450
      DL.isLegalInteger(TypeBits - Amt)) {
2362
19
    Type *TruncTy = IntegerType::get(Cmp.getContext(), TypeBits - Amt);
2363
19
    if (auto *ShVTy = dyn_cast<VectorType>(ShType))
2364
4
      TruncTy = VectorType::get(TruncTy, ShVTy->getElementCount());
2365
19
    Constant *NewC =
2366
19
        ConstantInt::get(TruncTy, C.ashr(*ShiftAmt).trunc(TypeBits - Amt));
2367
19
    return new ICmpInst(Pred, Builder.CreateTrunc(X, TruncTy), NewC);
2368
19
  }
2369
2370
431
  return nullptr;
2371
450
}
2372
2373
/// Fold icmp ({al}shr X, Y), C.
2374
Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp,
2375
                                                   BinaryOperator *Shr,
2376
736
                                                   const APInt &C) {
2377
  // An exact shr only shifts out zero bits, so:
2378
  // icmp eq/ne (shr X, Y), 0 --> icmp eq/ne X, 0
2379
736
  Value *X = Shr->getOperand(0);
2380
736
  CmpInst::Predicate Pred = Cmp.getPredicate();
2381
736
  if (Cmp.isEquality() && Shr->isExact() && C.isZero())
2382
36
    return new ICmpInst(Pred, X, Cmp.getOperand(1));
2383
2384
700
  bool IsAShr = Shr->getOpcode() == Instruction::AShr;
2385
700
  const APInt *ShiftValC;
2386
700
  if (match(X, m_APInt(ShiftValC))) {
2387
56
    if (Cmp.isEquality())
2388
38
      return foldICmpShrConstConst(Cmp, Shr->getOperand(1), C, *ShiftValC);
2389
2390
    // (ShiftValC >> Y) >s -1 --> Y != 0 with ShiftValC < 0
2391
    // (ShiftValC >> Y) <s  0 --> Y == 0 with ShiftValC < 0
2392
18
    bool TrueIfSigned;
2393
18
    if (!IsAShr && ShiftValC->isNegative() &&
2394
18
        isSignBitCheck(Pred, C, TrueIfSigned))
2395
1
      return new ICmpInst(TrueIfSigned ? CmpInst::ICMP_EQ : CmpInst::ICMP_NE,
2396
1
                          Shr->getOperand(1),
2397
1
                          ConstantInt::getNullValue(X->getType()));
2398
2399
    // If the shifted constant is a power-of-2, test the shift amount directly:
2400
    // (ShiftValC >> Y) >u C --> X <u (LZ(C) - LZ(ShiftValC))
2401
    // (ShiftValC >> Y) <u C --> X >=u (LZ(C-1) - LZ(ShiftValC))
2402
17
    if (!IsAShr && ShiftValC->isPowerOf2() &&
2403
17
        (Pred == CmpInst::ICMP_UGT || Pred == CmpInst::ICMP_ULT)) {
2404
3
      bool IsUGT = Pred == CmpInst::ICMP_UGT;
2405
3
      assert(ShiftValC->uge(C) && "Expected simplify of compare");
2406
0
      assert((IsUGT || !C.isZero()) && "Expected X u< 0 to simplify");
2407
2408
3
      unsigned CmpLZ = IsUGT ? C.countl_zero() : (C - 1).countl_zero();
2409
3
      unsigned ShiftLZ = ShiftValC->countl_zero();
2410
3
      Constant *NewC = ConstantInt::get(Shr->getType(), CmpLZ - ShiftLZ);
2411
3
      auto NewPred = IsUGT ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE;
2412
3
      return new ICmpInst(NewPred, Shr->getOperand(1), NewC);
2413
3
    }
2414
17
  }
2415
2416
658
  const APInt *ShiftAmtC;
2417
658
  if (!match(Shr->getOperand(1), m_APInt(ShiftAmtC)))
2418
340
    return nullptr;
2419
2420
  // Check that the shift amount is in range. If not, don't perform undefined
2421
  // shifts. When the shift is visited it will be simplified.
2422
318
  unsigned TypeBits = C.getBitWidth();
2423
318
  unsigned ShAmtVal = ShiftAmtC->getLimitedValue(TypeBits);
2424
318
  if (ShAmtVal >= TypeBits || ShAmtVal == 0)
2425
0
    return nullptr;
2426
2427
318
  bool IsExact = Shr->isExact();
2428
318
  Type *ShrTy = Shr->getType();
2429
  // TODO: If we could guarantee that InstSimplify would handle all of the
2430
  // constant-value-based preconditions in the folds below, then we could assert
2431
  // those conditions rather than checking them. This is difficult because of
2432
  // undef/poison (PR34838).
2433
318
  if (IsAShr && Shr->hasOneUse()) {
2434
63
    if (IsExact || Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_ULT) {
2435
      // When ShAmtC can be shifted losslessly:
2436
      // icmp PRED (ashr exact X, ShAmtC), C --> icmp PRED X, (C << ShAmtC)
2437
      // icmp slt/ult (ashr X, ShAmtC), C --> icmp slt/ult X, (C << ShAmtC)
2438
14
      APInt ShiftedC = C.shl(ShAmtVal);
2439
14
      if (ShiftedC.ashr(ShAmtVal) == C)
2440
10
        return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
2441
14
    }
2442
53
    if (Pred == CmpInst::ICMP_SGT) {
2443
      // icmp sgt (ashr X, ShAmtC), C --> icmp sgt X, ((C + 1) << ShAmtC) - 1
2444
6
      APInt ShiftedC = (C + 1).shl(ShAmtVal) - 1;
2445
6
      if (!C.isMaxSignedValue() && !(C + 1).shl(ShAmtVal).isMinSignedValue() &&
2446
6
          (ShiftedC + 1).ashr(ShAmtVal) == (C + 1))
2447
6
        return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
2448
6
    }
2449
47
    if (Pred == CmpInst::ICMP_UGT) {
2450
      // icmp ugt (ashr X, ShAmtC), C --> icmp ugt X, ((C + 1) << ShAmtC) - 1
2451
      // 'C + 1 << ShAmtC' can overflow as a signed number, so the 2nd
2452
      // clause accounts for that pattern.
2453
1
      APInt ShiftedC = (C + 1).shl(ShAmtVal) - 1;
2454
1
      if ((ShiftedC + 1).ashr(ShAmtVal) == (C + 1) ||
2455
1
          (C + 1).shl(ShAmtVal).isMinSignedValue())
2456
1
        return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
2457
1
    }
2458
2459
    // If the compare constant has significant bits above the lowest sign-bit,
2460
    // then convert an unsigned cmp to a test of the sign-bit:
2461
    // (ashr X, ShiftC) u> C --> X s< 0
2462
    // (ashr X, ShiftC) u< C --> X s> -1
2463
46
    if (C.getBitWidth() > 2 && C.getNumSignBits() <= ShAmtVal) {
2464
4
      if (Pred == CmpInst::ICMP_UGT) {
2465
0
        return new ICmpInst(CmpInst::ICMP_SLT, X,
2466
0
                            ConstantInt::getNullValue(ShrTy));
2467
0
      }
2468
4
      if (Pred == CmpInst::ICMP_ULT) {
2469
4
        return new ICmpInst(CmpInst::ICMP_SGT, X,
2470
4
                            ConstantInt::getAllOnesValue(ShrTy));
2471
4
      }
2472
4
    }
2473
255
  } else if (!IsAShr) {
2474
137
    if (Pred == CmpInst::ICMP_ULT || (Pred == CmpInst::ICMP_UGT && IsExact)) {
2475
      // icmp ult (lshr X, ShAmtC), C --> icmp ult X, (C << ShAmtC)
2476
      // icmp ugt (lshr exact X, ShAmtC), C --> icmp ugt X, (C << ShAmtC)
2477
50
      APInt ShiftedC = C.shl(ShAmtVal);
2478
50
      if (ShiftedC.lshr(ShAmtVal) == C)
2479
50
        return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
2480
50
    }
2481
87
    if (Pred == CmpInst::ICMP_UGT) {
2482
      // icmp ugt (lshr X, ShAmtC), C --> icmp ugt X, ((C + 1) << ShAmtC) - 1
2483
15
      APInt ShiftedC = (C + 1).shl(ShAmtVal) - 1;
2484
15
      if ((ShiftedC + 1).lshr(ShAmtVal) == (C + 1))
2485
15
        return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
2486
15
    }
2487
87
  }
2488
2489
232
  if (!Cmp.isEquality())
2490
87
    return nullptr;
2491
2492
  // Handle equality comparisons of shift-by-constant.
2493
2494
  // If the comparison constant changes with the shift, the comparison cannot
2495
  // succeed (bits of the comparison constant cannot match the shifted value).
2496
  // This should be known by InstSimplify and already be folded to true/false.
2497
145
  assert(((IsAShr && C.shl(ShAmtVal).ashr(ShAmtVal) == C) ||
2498
145
          (!IsAShr && C.shl(ShAmtVal).lshr(ShAmtVal) == C)) &&
2499
145
         "Expected icmp+shr simplify did not occur.");
2500
2501
  // If the bits shifted out are known zero, compare the unshifted value:
2502
  //  (X & 4) >> 1 == 2  --> (X & 4) == 4.
2503
145
  if (Shr->isExact())
2504
5
    return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, C << ShAmtVal));
2505
2506
140
  if (C.isZero()) {
2507
    // == 0 is u< 1.
2508
82
    if (Pred == CmpInst::ICMP_EQ)
2509
57
      return new ICmpInst(CmpInst::ICMP_ULT, X,
2510
57
                          ConstantInt::get(ShrTy, (C + 1).shl(ShAmtVal)));
2511
25
    else
2512
25
      return new ICmpInst(CmpInst::ICMP_UGT, X,
2513
25
                          ConstantInt::get(ShrTy, (C + 1).shl(ShAmtVal) - 1));
2514
82
  }
2515
2516
58
  if (Shr->hasOneUse()) {
2517
    // Canonicalize the shift into an 'and':
2518
    // icmp eq/ne (shr X, ShAmt), C --> icmp eq/ne (and X, HiMask), (C << ShAmt)
2519
37
    APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
2520
37
    Constant *Mask = ConstantInt::get(ShrTy, Val);
2521
37
    Value *And = Builder.CreateAnd(X, Mask, Shr->getName() + ".mask");
2522
37
    return new ICmpInst(Pred, And, ConstantInt::get(ShrTy, C << ShAmtVal));
2523
37
  }
2524
2525
21
  return nullptr;
2526
58
}
2527
2528
Instruction *InstCombinerImpl::foldICmpSRemConstant(ICmpInst &Cmp,
2529
                                                    BinaryOperator *SRem,
2530
195
                                                    const APInt &C) {
2531
  // Match an 'is positive' or 'is negative' comparison of remainder by a
2532
  // constant power-of-2 value:
2533
  // (X % pow2C) sgt/slt 0
2534
195
  const ICmpInst::Predicate Pred = Cmp.getPredicate();
2535
195
  if (Pred != ICmpInst::ICMP_SGT && Pred != ICmpInst::ICMP_SLT &&
2536
195
      Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
2537
43
    return nullptr;
2538
2539
  // TODO: The one-use check is standard because we do not typically want to
2540
  //       create longer instruction sequences, but this might be a special-case
2541
  //       because srem is not good for analysis or codegen.
2542
152
  if (!SRem->hasOneUse())
2543
96
    return nullptr;
2544
2545
56
  const APInt *DivisorC;
2546
56
  if (!match(SRem->getOperand(1), m_Power2(DivisorC)))
2547
37
    return nullptr;
2548
2549
  // For cmp_sgt/cmp_slt only zero valued C is handled.
2550
  // For cmp_eq/cmp_ne only positive valued C is handled.
2551
19
  if (((Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLT) &&
2552
19
       !C.isZero()) ||
2553
19
      ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) &&
2554
9
       !C.isStrictlyPositive()))
2555
13
    return nullptr;
2556
2557
  // Mask off the sign bit and the modulo bits (low-bits).
2558
6
  Type *Ty = SRem->getType();
2559
6
  APInt SignMask = APInt::getSignMask(Ty->getScalarSizeInBits());
2560
6
  Constant *MaskC = ConstantInt::get(Ty, SignMask | (*DivisorC - 1));
2561
6
  Value *And = Builder.CreateAnd(SRem->getOperand(0), MaskC);
2562
2563
6
  if (Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE)
2564
1
    return new ICmpInst(Pred, And, ConstantInt::get(Ty, C));
2565
2566
  // For 'is positive?' check that the sign-bit is clear and at least 1 masked
2567
  // bit is set. Example:
2568
  // (i8 X % 32) s> 0 --> (X & 159) s> 0
2569
5
  if (Pred == ICmpInst::ICMP_SGT)
2570
1
    return new ICmpInst(ICmpInst::ICMP_SGT, And, ConstantInt::getNullValue(Ty));
2571
2572
  // For 'is negative?' check that the sign-bit is set and at least 1 masked
2573
  // bit is set. Example:
2574
  // (i16 X % 4) s< 0 --> (X & 32771) u> 32768
2575
4
  return new ICmpInst(ICmpInst::ICMP_UGT, And, ConstantInt::get(Ty, SignMask));
2576
5
}
2577
2578
/// Fold icmp (udiv X, Y), C.
2579
Instruction *InstCombinerImpl::foldICmpUDivConstant(ICmpInst &Cmp,
2580
                                                    BinaryOperator *UDiv,
2581
265
                                                    const APInt &C) {
2582
265
  ICmpInst::Predicate Pred = Cmp.getPredicate();
2583
265
  Value *X = UDiv->getOperand(0);
2584
265
  Value *Y = UDiv->getOperand(1);
2585
265
  Type *Ty = UDiv->getType();
2586
2587
265
  const APInt *C2;
2588
265
  if (!match(X, m_APInt(C2)))
2589
148
    return nullptr;
2590
2591
117
  assert(*C2 != 0 && "udiv 0, X should have been simplified already.");
2592
2593
  // (icmp ugt (udiv C2, Y), C) -> (icmp ule Y, C2/(C+1))
2594
117
  if (Pred == ICmpInst::ICMP_UGT) {
2595
28
    assert(!C.isMaxValue() &&
2596
28
           "icmp ugt X, UINT_MAX should have been simplified already.");
2597
0
    return new ICmpInst(ICmpInst::ICMP_ULE, Y,
2598
28
                        ConstantInt::get(Ty, C2->udiv(C + 1)));
2599
28
  }
2600
2601
  // (icmp ult (udiv C2, Y), C) -> (icmp ugt Y, C2/C)
2602
89
  if (Pred == ICmpInst::ICMP_ULT) {
2603
26
    assert(C != 0 && "icmp ult X, 0 should have been simplified already.");
2604
0
    return new ICmpInst(ICmpInst::ICMP_UGT, Y,
2605
26
                        ConstantInt::get(Ty, C2->udiv(C)));
2606
26
  }
2607
2608
63
  return nullptr;
2609
89
}
2610
2611
/// Fold icmp ({su}div X, Y), C.
2612
Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp,
2613
                                                   BinaryOperator *Div,
2614
514
                                                   const APInt &C) {
2615
514
  ICmpInst::Predicate Pred = Cmp.getPredicate();
2616
514
  Value *X = Div->getOperand(0);
2617
514
  Value *Y = Div->getOperand(1);
2618
514
  Type *Ty = Div->getType();
2619
514
  bool DivIsSigned = Div->getOpcode() == Instruction::SDiv;
2620
2621
  // If unsigned division and the compare constant is bigger than
2622
  // UMAX/2 (negative), there's only one pair of values that satisfies an
2623
  // equality check, so eliminate the division:
2624
  // (X u/ Y) == C --> (X == C) && (Y == 1)
2625
  // (X u/ Y) != C --> (X != C) || (Y != 1)
2626
  // Similarly, if signed division and the compare constant is exactly SMIN:
2627
  // (X s/ Y) == SMIN --> (X == SMIN) && (Y == 1)
2628
  // (X s/ Y) != SMIN --> (X != SMIN) || (Y != 1)
2629
514
  if (Cmp.isEquality() && Div->hasOneUse() && C.isSignBitSet() &&
2630
514
      (!DivIsSigned || C.isMinSignedValue()))   {
2631
6
    Value *XBig = Builder.CreateICmp(Pred, X, ConstantInt::get(Ty, C));
2632
6
    Value *YOne = Builder.CreateICmp(Pred, Y, ConstantInt::get(Ty, 1));
2633
6
    auto Logic = Pred == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
2634
6
    return BinaryOperator::Create(Logic, XBig, YOne);
2635
6
  }
2636
2637
  // Fold: icmp pred ([us]div X, C2), C -> range test
2638
  // Fold this div into the comparison, producing a range check.
2639
  // Determine, based on the divide type, what the range is being
2640
  // checked.  If there is an overflow on the low or high side, remember
2641
  // it, otherwise compute the range [low, hi) bounding the new value.
2642
  // See: InsertRangeTest above for the kinds of replacements possible.
2643
508
  const APInt *C2;
2644
508
  if (!match(Y, m_APInt(C2)))
2645
282
    return nullptr;
2646
2647
  // FIXME: If the operand types don't match the type of the divide
2648
  // then don't attempt this transform. The code below doesn't have the
2649
  // logic to deal with a signed divide and an unsigned compare (and
2650
  // vice versa). This is because (x /s C2) <s C  produces different
2651
  // results than (x /s C2) <u C or (x /u C2) <s C or even
2652
  // (x /u C2) <u C.  Simply casting the operands and result won't
2653
  // work. :(  The if statement below tests that condition and bails
2654
  // if it finds it.
2655
226
  if (!Cmp.isEquality() && DivIsSigned != Cmp.isSigned())
2656
29
    return nullptr;
2657
2658
  // The ProdOV computation fails on divide by 0 and divide by -1. Cases with
2659
  // INT_MIN will also fail if the divisor is 1. Although folds of all these
2660
  // division-by-constant cases should be present, we can not assert that they
2661
  // have happened before we reach this icmp instruction.
2662
197
  if (C2->isZero() || C2->isOne() || (DivIsSigned && C2->isAllOnes()))
2663
0
    return nullptr;
2664
2665
  // Compute Prod = C * C2. We are essentially solving an equation of
2666
  // form X / C2 = C. We solve for X by multiplying C2 and C.
2667
  // By solving for X, we can turn this into a range check instead of computing
2668
  // a divide.
2669
197
  APInt Prod = C * *C2;
2670
2671
  // Determine if the product overflows by seeing if the product is not equal to
2672
  // the divide. Make sure we do the same kind of divide as in the LHS
2673
  // instruction that we're folding.
2674
197
  bool ProdOV = (DivIsSigned ? Prod.sdiv(*C2) : Prod.udiv(*C2)) != C;
2675
2676
  // If the division is known to be exact, then there is no remainder from the
2677
  // divide, so the covered range size is unit, otherwise it is the divisor.
2678
197
  APInt RangeSize = Div->isExact() ? APInt(C2->getBitWidth(), 1) : *C2;
2679
2680
  // Figure out the interval that is being checked.  For example, a comparison
2681
  // like "X /u 5 == 0" is really checking that X is in the interval [0, 5).
2682
  // Compute this interval based on the constants involved and the signedness of
2683
  // the compare/divide.  This computes a half-open interval, keeping track of
2684
  // whether either value in the interval overflows.  After analysis each
2685
  // overflow variable is set to 0 if it's corresponding bound variable is valid
2686
  // -1 if overflowed off the bottom end, or +1 if overflowed off the top end.
2687
197
  int LoOverflow = 0, HiOverflow = 0;
2688
197
  APInt LoBound, HiBound;
2689
2690
197
  if (!DivIsSigned) { // udiv
2691
    // e.g. X/5 op 3  --> [15, 20)
2692
94
    LoBound = Prod;
2693
94
    HiOverflow = LoOverflow = ProdOV;
2694
94
    if (!HiOverflow) {
2695
      // If this is not an exact divide, then many values in the range collapse
2696
      // to the same result value.
2697
94
      HiOverflow = addWithOverflow(HiBound, LoBound, RangeSize, false);
2698
94
    }
2699
103
  } else if (C2->isStrictlyPositive()) { // Divisor is > 0.
2700
87
    if (C.isZero()) {                    // (X / pos) op 0
2701
      // Can't overflow.  e.g.  X/2 op 0 --> [-1, 2)
2702
33
      LoBound = -(RangeSize - 1);
2703
33
      HiBound = RangeSize;
2704
54
    } else if (C.isStrictlyPositive()) { // (X / pos) op pos
2705
30
      LoBound = Prod;                    // e.g.   X/5 op 3 --> [15, 20)
2706
30
      HiOverflow = LoOverflow = ProdOV;
2707
30
      if (!HiOverflow)
2708
30
        HiOverflow = addWithOverflow(HiBound, Prod, RangeSize, true);
2709
30
    } else { // (X / pos) op neg
2710
      // e.g. X/5 op -3  --> [-15-4, -15+1) --> [-19, -14)
2711
24
      HiBound = Prod + 1;
2712
24
      LoOverflow = HiOverflow = ProdOV ? -1 : 0;
2713
24
      if (!LoOverflow) {
2714
24
        APInt DivNeg = -RangeSize;
2715
24
        LoOverflow = addWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0;
2716
24
      }
2717
24
    }
2718
87
  } else if (C2->isNegative()) { // Divisor is < 0.
2719
16
    if (Div->isExact())
2720
1
      RangeSize.negate();
2721
16
    if (C.isZero()) { // (X / neg) op 0
2722
      // e.g. X/-5 op 0  --> [-4, 5)
2723
1
      LoBound = RangeSize + 1;
2724
1
      HiBound = -RangeSize;
2725
1
      if (HiBound == *C2) { // -INTMIN = INTMIN
2726
0
        HiOverflow = 1;     // [INTMIN+1, overflow)
2727
0
        HiBound = APInt();  // e.g. X/INTMIN = 0 --> X > INTMIN
2728
0
      }
2729
15
    } else if (C.isStrictlyPositive()) { // (X / neg) op pos
2730
      // e.g. X/-5 op 3  --> [-19, -14)
2731
3
      HiBound = Prod + 1;
2732
3
      HiOverflow = LoOverflow = ProdOV ? -1 : 0;
2733
3
      if (!LoOverflow)
2734
3
        LoOverflow =
2735
3
            addWithOverflow(LoBound, HiBound, RangeSize, true) ? -1 : 0;
2736
12
    } else {          // (X / neg) op neg
2737
12
      LoBound = Prod; // e.g. X/-5 op -3  --> [15, 20)
2738
12
      LoOverflow = HiOverflow = ProdOV;
2739
12
      if (!HiOverflow)
2740
12
        HiOverflow = subWithOverflow(HiBound, Prod, RangeSize, true);
2741
12
    }
2742
2743
    // Dividing by a negative swaps the condition.  LT <-> GT
2744
16
    Pred = ICmpInst::getSwappedPredicate(Pred);
2745
16
  }
2746
2747
197
  switch (Pred) {
2748
0
  default:
2749
0
    llvm_unreachable("Unhandled icmp predicate!");
2750
92
  case ICmpInst::ICMP_EQ:
2751
92
    if (LoOverflow && HiOverflow)
2752
0
      return replaceInstUsesWith(Cmp, Builder.getFalse());
2753
92
    if (HiOverflow)
2754
15
      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE,
2755
15
                          X, ConstantInt::get(Ty, LoBound));
2756
77
    if (LoOverflow)
2757
5
      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
2758
5
                          X, ConstantInt::get(Ty, HiBound));
2759
72
    return replaceInstUsesWith(
2760
72
        Cmp, insertRangeTest(X, LoBound, HiBound, DivIsSigned, true));
2761
52
  case ICmpInst::ICMP_NE:
2762
52
    if (LoOverflow && HiOverflow)
2763
0
      return replaceInstUsesWith(Cmp, Builder.getTrue());
2764
52
    if (HiOverflow)
2765
20
      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
2766
20
                          X, ConstantInt::get(Ty, LoBound));
2767
32
    if (LoOverflow)
2768
0
      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE,
2769
0
                          X, ConstantInt::get(Ty, HiBound));
2770
32
    return replaceInstUsesWith(
2771
32
        Cmp, insertRangeTest(X, LoBound, HiBound, DivIsSigned, false));
2772
6
  case ICmpInst::ICMP_ULT:
2773
26
  case ICmpInst::ICMP_SLT:
2774
26
    if (LoOverflow == +1) // Low bound is greater than input range.
2775
0
      return replaceInstUsesWith(Cmp, Builder.getTrue());
2776
26
    if (LoOverflow == -1) // Low bound is less than input range.
2777
0
      return replaceInstUsesWith(Cmp, Builder.getFalse());
2778
26
    return new ICmpInst(Pred, X, ConstantInt::get(Ty, LoBound));
2779
5
  case ICmpInst::ICMP_UGT:
2780
27
  case ICmpInst::ICMP_SGT:
2781
27
    if (HiOverflow == +1) // High bound greater than input range.
2782
0
      return replaceInstUsesWith(Cmp, Builder.getFalse());
2783
27
    if (HiOverflow == -1) // High bound less than input range.
2784
0
      return replaceInstUsesWith(Cmp, Builder.getTrue());
2785
27
    if (Pred == ICmpInst::ICMP_UGT)
2786
5
      return new ICmpInst(ICmpInst::ICMP_UGE, X, ConstantInt::get(Ty, HiBound));
2787
22
    return new ICmpInst(ICmpInst::ICMP_SGE, X, ConstantInt::get(Ty, HiBound));
2788
197
  }
2789
2790
0
  return nullptr;
2791
197
}
2792
2793
/// Fold icmp (sub X, Y), C.
2794
Instruction *InstCombinerImpl::foldICmpSubConstant(ICmpInst &Cmp,
2795
                                                   BinaryOperator *Sub,
2796
1.20k
                                                   const APInt &C) {
2797
1.20k
  Value *X = Sub->getOperand(0), *Y = Sub->getOperand(1);
2798
1.20k
  ICmpInst::Predicate Pred = Cmp.getPredicate();
2799
1.20k
  Type *Ty = Sub->getType();
2800
2801
  // (SubC - Y) == C) --> Y == (SubC - C)
2802
  // (SubC - Y) != C) --> Y != (SubC - C)
2803
1.20k
  Constant *SubC;
2804
1.20k
  if (Cmp.isEquality() && match(X, m_ImmConstant(SubC))) {
2805
206
    return new ICmpInst(Pred, Y,
2806
206
                        ConstantExpr::getSub(SubC, ConstantInt::get(Ty, C)));
2807
206
  }
2808
2809
  // (icmp P (sub nuw|nsw C2, Y), C) -> (icmp swap(P) Y, C2-C)
2810
1.00k
  const APInt *C2;
2811
1.00k
  APInt SubResult;
2812
1.00k
  ICmpInst::Predicate SwappedPred = Cmp.getSwappedPredicate();
2813
1.00k
  bool HasNSW = Sub->hasNoSignedWrap();
2814
1.00k
  bool HasNUW = Sub->hasNoUnsignedWrap();
2815
1.00k
  if (match(X, m_APInt(C2)) &&
2816
1.00k
      ((Cmp.isUnsigned() && HasNUW) || (Cmp.isSigned() && HasNSW)) &&
2817
1.00k
      !subWithOverflow(SubResult, *C2, C, Cmp.isSigned()))
2818
19
    return new ICmpInst(SwappedPred, Y, ConstantInt::get(Ty, SubResult));
2819
2820
  // X - Y == 0 --> X == Y.
2821
  // X - Y != 0 --> X != Y.
2822
  // TODO: We allow this with multiple uses as long as the other uses are not
2823
  //       in phis. The phi use check is guarding against a codegen regression
2824
  //       for a loop test. If the backend could undo this (and possibly
2825
  //       subsequent transforms), we would not need this hack.
2826
984
  if (Cmp.isEquality() && C.isZero() &&
2827
984
      none_of((Sub->users()), [](const User *U) { return isa<PHINode>(U); }))
2828
57
    return new ICmpInst(Pred, X, Y);
2829
2830
  // The following transforms are only worth it if the only user of the subtract
2831
  // is the icmp.
2832
  // TODO: This is an artificial restriction for all of the transforms below
2833
  //       that only need a single replacement icmp. Can these use the phi test
2834
  //       like the transform above here?
2835
927
  if (!Sub->hasOneUse())
2836
751
    return nullptr;
2837
2838
176
  if (Sub->hasNoSignedWrap()) {
2839
    // (icmp sgt (sub nsw X, Y), -1) -> (icmp sge X, Y)
2840
40
    if (Pred == ICmpInst::ICMP_SGT && C.isAllOnes())
2841
5
      return new ICmpInst(ICmpInst::ICMP_SGE, X, Y);
2842
2843
    // (icmp sgt (sub nsw X, Y), 0) -> (icmp sgt X, Y)
2844
35
    if (Pred == ICmpInst::ICMP_SGT && C.isZero())
2845
5
      return new ICmpInst(ICmpInst::ICMP_SGT, X, Y);
2846
2847
    // (icmp slt (sub nsw X, Y), 0) -> (icmp slt X, Y)
2848
30
    if (Pred == ICmpInst::ICMP_SLT && C.isZero())
2849
17
      return new ICmpInst(ICmpInst::ICMP_SLT, X, Y);
2850
2851
    // (icmp slt (sub nsw X, Y), 1) -> (icmp sle X, Y)
2852
13
    if (Pred == ICmpInst::ICMP_SLT && C.isOne())
2853
5
      return new ICmpInst(ICmpInst::ICMP_SLE, X, Y);
2854
13
  }
2855
2856
144
  if (!match(X, m_APInt(C2)))
2857
79
    return nullptr;
2858
2859
  // C2 - Y <u C -> (Y | (C - 1)) == C2
2860
  //   iff (C2 & (C - 1)) == C - 1 and C is a power of 2
2861
65
  if (Pred == ICmpInst::ICMP_ULT && C.isPowerOf2() &&
2862
65
      (*C2 & (C - 1)) == (C - 1))
2863
3
    return new ICmpInst(ICmpInst::ICMP_EQ, Builder.CreateOr(Y, C - 1), X);
2864
2865
  // C2 - Y >u C -> (Y | C) != C2
2866
  //   iff C2 & C == C and C + 1 is a power of 2
2867
62
  if (Pred == ICmpInst::ICMP_UGT && (C + 1).isPowerOf2() && (*C2 & C) == C)
2868
3
    return new ICmpInst(ICmpInst::ICMP_NE, Builder.CreateOr(Y, C), X);
2869
2870
  // We have handled special cases that reduce.
2871
  // Canonicalize any remaining sub to add as:
2872
  // (C2 - Y) > C --> (Y + ~C2) < ~C
2873
59
  Value *Add = Builder.CreateAdd(Y, ConstantInt::get(Ty, ~(*C2)), "notsub",
2874
59
                                 HasNUW, HasNSW);
2875
59
  return new ICmpInst(SwappedPred, Add, ConstantInt::get(Ty, ~C));
2876
62
}
2877
2878
static Value *createLogicFromTable(const std::bitset<4> &Table, Value *Op0,
2879
                                   Value *Op1, IRBuilderBase &Builder,
2880
0
                                   bool HasOneUse) {
2881
0
  auto FoldConstant = [&](bool Val) {
2882
0
    Constant *Res = Val ? Builder.getTrue() : Builder.getFalse();
2883
0
    if (Op0->getType()->isVectorTy())
2884
0
      Res = ConstantVector::getSplat(
2885
0
          cast<VectorType>(Op0->getType())->getElementCount(), Res);
2886
0
    return Res;
2887
0
  };
2888
2889
0
  switch (Table.to_ulong()) {
2890
0
  case 0: // 0 0 0 0
2891
0
    return FoldConstant(false);
2892
0
  case 1: // 0 0 0 1
2893
0
    return HasOneUse ? Builder.CreateNot(Builder.CreateOr(Op0, Op1)) : nullptr;
2894
0
  case 2: // 0 0 1 0
2895
0
    return HasOneUse ? Builder.CreateAnd(Builder.CreateNot(Op0), Op1) : nullptr;
2896
0
  case 3: // 0 0 1 1
2897
0
    return Builder.CreateNot(Op0);
2898
0
  case 4: // 0 1 0 0
2899
0
    return HasOneUse ? Builder.CreateAnd(Op0, Builder.CreateNot(Op1)) : nullptr;
2900
0
  case 5: // 0 1 0 1
2901
0
    return Builder.CreateNot(Op1);
2902
0
  case 6: // 0 1 1 0
2903
0
    return Builder.CreateXor(Op0, Op1);
2904
0
  case 7: // 0 1 1 1
2905
0
    return HasOneUse ? Builder.CreateNot(Builder.CreateAnd(Op0, Op1)) : nullptr;
2906
0
  case 8: // 1 0 0 0
2907
0
    return Builder.CreateAnd(Op0, Op1);
2908
0
  case 9: // 1 0 0 1
2909
0
    return HasOneUse ? Builder.CreateNot(Builder.CreateXor(Op0, Op1)) : nullptr;
2910
0
  case 10: // 1 0 1 0
2911
0
    return Op1;
2912
0
  case 11: // 1 0 1 1
2913
0
    return HasOneUse ? Builder.CreateOr(Builder.CreateNot(Op0), Op1) : nullptr;
2914
0
  case 12: // 1 1 0 0
2915
0
    return Op0;
2916
0
  case 13: // 1 1 0 1
2917
0
    return HasOneUse ? Builder.CreateOr(Op0, Builder.CreateNot(Op1)) : nullptr;
2918
0
  case 14: // 1 1 1 0
2919
0
    return Builder.CreateOr(Op0, Op1);
2920
0
  case 15: // 1 1 1 1
2921
0
    return FoldConstant(true);
2922
0
  default:
2923
0
    llvm_unreachable("Invalid Operation");
2924
0
  }
2925
0
  return nullptr;
2926
0
}
2927
2928
/// Fold icmp (add X, Y), C.
2929
Instruction *InstCombinerImpl::foldICmpAddConstant(ICmpInst &Cmp,
2930
                                                   BinaryOperator *Add,
2931
3.84k
                                                   const APInt &C) {
2932
3.84k
  Value *Y = Add->getOperand(1);
2933
3.84k
  Value *X = Add->getOperand(0);
2934
2935
3.84k
  Value *Op0, *Op1;
2936
3.84k
  Instruction *Ext0, *Ext1;
2937
3.84k
  const CmpInst::Predicate Pred = Cmp.getPredicate();
2938
3.84k
  if (match(Add,
2939
3.84k
            m_Add(m_CombineAnd(m_Instruction(Ext0), m_ZExtOrSExt(m_Value(Op0))),
2940
3.84k
                  m_CombineAnd(m_Instruction(Ext1),
2941
3.84k
                               m_ZExtOrSExt(m_Value(Op1))))) &&
2942
3.84k
      Op0->getType()->isIntOrIntVectorTy(1) &&
2943
3.84k
      Op1->getType()->isIntOrIntVectorTy(1)) {
2944
0
    unsigned BW = C.getBitWidth();
2945
0
    std::bitset<4> Table;
2946
0
    auto ComputeTable = [&](bool Op0Val, bool Op1Val) {
2947
0
      int Res = 0;
2948
0
      if (Op0Val)
2949
0
        Res += isa<ZExtInst>(Ext0) ? 1 : -1;
2950
0
      if (Op1Val)
2951
0
        Res += isa<ZExtInst>(Ext1) ? 1 : -1;
2952
0
      return ICmpInst::compare(APInt(BW, Res, true), C, Pred);
2953
0
    };
2954
2955
0
    Table[0] = ComputeTable(false, false);
2956
0
    Table[1] = ComputeTable(false, true);
2957
0
    Table[2] = ComputeTable(true, false);
2958
0
    Table[3] = ComputeTable(true, true);
2959
0
    if (auto *Cond =
2960
0
            createLogicFromTable(Table, Op0, Op1, Builder, Add->hasOneUse()))
2961
0
      return replaceInstUsesWith(Cmp, Cond);
2962
0
  }
2963
3.84k
  const APInt *C2;
2964
3.84k
  if (Cmp.isEquality() || !match(Y, m_APInt(C2)))
2965
1.54k
    return nullptr;
2966
2967
  // Fold icmp pred (add X, C2), C.
2968
2.30k
  Type *Ty = Add->getType();
2969
2970
  // If the add does not wrap, we can always adjust the compare by subtracting
2971
  // the constants. Equality comparisons are handled elsewhere. SGE/SLE/UGE/ULE
2972
  // are canonicalized to SGT/SLT/UGT/ULT.
2973
2.30k
  if ((Add->hasNoSignedWrap() &&
2974
2.30k
       (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLT)) ||
2975
2.30k
      (Add->hasNoUnsignedWrap() &&
2976
2.26k
       (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULT))) {
2977
85
    bool Overflow;
2978
85
    APInt NewC =
2979
85
        Cmp.isSigned() ? C.ssub_ov(*C2, Overflow) : C.usub_ov(*C2, Overflow);
2980
    // If there is overflow, the result must be true or false.
2981
    // TODO: Can we assert there is no overflow because InstSimplify always
2982
    // handles those cases?
2983
85
    if (!Overflow)
2984
      // icmp Pred (add nsw X, C2), C --> icmp Pred X, (C - C2)
2985
85
      return new ICmpInst(Pred, X, ConstantInt::get(Ty, NewC));
2986
85
  }
2987
2988
2.22k
  auto CR = ConstantRange::makeExactICmpRegion(Pred, C).subtract(*C2);
2989
2.22k
  const APInt &Upper = CR.getUpper();
2990
2.22k
  const APInt &Lower = CR.getLower();
2991
2.22k
  if (Cmp.isSigned()) {
2992
272
    if (Lower.isSignMask())
2993
3
      return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantInt::get(Ty, Upper));
2994
269
    if (Upper.isSignMask())
2995
4
      return new ICmpInst(ICmpInst::ICMP_SGE, X, ConstantInt::get(Ty, Lower));
2996
1.94k
  } else {
2997
1.94k
    if (Lower.isMinValue())
2998
20
      return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantInt::get(Ty, Upper));
2999
1.92k
    if (Upper.isMinValue())
3000
1
      return new ICmpInst(ICmpInst::ICMP_UGE, X, ConstantInt::get(Ty, Lower));
3001
1.92k
  }
3002
3003
  // This set of folds is intentionally placed after folds that use no-wrapping
3004
  // flags because those folds are likely better for later analysis/codegen.
3005
2.19k
  const APInt SMax = APInt::getSignedMaxValue(Ty->getScalarSizeInBits());
3006
2.19k
  const APInt SMin = APInt::getSignedMinValue(Ty->getScalarSizeInBits());
3007
3008
  // Fold compare with offset to opposite sign compare if it eliminates offset:
3009
  // (X + C2) >u C --> X <s -C2 (if C == C2 + SMAX)
3010
2.19k
  if (Pred == CmpInst::ICMP_UGT && C == *C2 + SMax)
3011
7
    return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantInt::get(Ty, -(*C2)));
3012
3013
  // (X + C2) <u C --> X >s ~C2 (if C == C2 + SMIN)
3014
2.18k
  if (Pred == CmpInst::ICMP_ULT && C == *C2 + SMin)
3015
2
    return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantInt::get(Ty, ~(*C2)));
3016
3017
  // (X + C2) >s C --> X <u (SMAX - C) (if C == C2 - 1)
3018
2.18k
  if (Pred == CmpInst::ICMP_SGT && C == *C2 - 1)
3019
15
    return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantInt::get(Ty, SMax - C));
3020
3021
  // (X + C2) <s C --> X >u (C ^ SMAX) (if C == C2)
3022
2.16k
  if (Pred == CmpInst::ICMP_SLT && C == *C2)
3023
11
    return new ICmpInst(ICmpInst::ICMP_UGT, X, ConstantInt::get(Ty, C ^ SMax));
3024
3025
  // (X + -1) <u C --> X <=u C (if X is never null)
3026
2.15k
  if (Pred == CmpInst::ICMP_ULT && C2->isAllOnes()) {
3027
95
    const SimplifyQuery Q = SQ.getWithInstruction(&Cmp);
3028
95
    if (llvm::isKnownNonZero(X, DL, 0, Q.AC, Q.CxtI, Q.DT))
3029
7
      return new ICmpInst(ICmpInst::ICMP_ULE, X, ConstantInt::get(Ty, C));
3030
95
  }
3031
3032
2.15k
  if (!Add->hasOneUse())
3033
295
    return nullptr;
3034
3035
  // X+C <u C2 -> (X & -C2) == C
3036
  //   iff C & (C2-1) == 0
3037
  //       C2 is a power of 2
3038
1.85k
  if (Pred == ICmpInst::ICMP_ULT && C.isPowerOf2() && (*C2 & (C - 1)) == 0)
3039
17
    return new ICmpInst(ICmpInst::ICMP_EQ, Builder.CreateAnd(X, -C),
3040
17
                        ConstantExpr::getNeg(cast<Constant>(Y)));
3041
3042
  // X+C >u C2 -> (X & ~C2) != C
3043
  //   iff C & C2 == 0
3044
  //       C2+1 is a power of 2
3045
1.83k
  if (Pred == ICmpInst::ICMP_UGT && (C + 1).isPowerOf2() && (*C2 & C) == 0)
3046
12
    return new ICmpInst(ICmpInst::ICMP_NE, Builder.CreateAnd(X, ~C),
3047
12
                        ConstantExpr::getNeg(cast<Constant>(Y)));
3048
3049
  // The range test idiom can use either ult or ugt. Arbitrarily canonicalize
3050
  // to the ult form.
3051
  // X+C2 >u C -> X+(C2-C-1) <u ~C
3052
1.82k
  if (Pred == ICmpInst::ICMP_UGT)
3053
75
    return new ICmpInst(ICmpInst::ICMP_ULT,
3054
75
                        Builder.CreateAdd(X, ConstantInt::get(Ty, *C2 - C - 1)),
3055
75
                        ConstantInt::get(Ty, ~C));
3056
3057
1.75k
  return nullptr;
3058
1.82k
}
3059
3060
bool InstCombinerImpl::matchThreeWayIntCompare(SelectInst *SI, Value *&LHS,
3061
                                               Value *&RHS, ConstantInt *&Less,
3062
                                               ConstantInt *&Equal,
3063
541
                                               ConstantInt *&Greater) {
3064
  // TODO: Generalize this to work with other comparison idioms or ensure
3065
  // they get canonicalized into this form.
3066
3067
  // select i1 (a == b),
3068
  //        i32 Equal,
3069
  //        i32 (select i1 (a < b), i32 Less, i32 Greater)
3070
  // where Equal, Less and Greater are placeholders for any three constants.
3071
541
  ICmpInst::Predicate PredA;
3072
541
  if (!match(SI->getCondition(), m_ICmp(PredA, m_Value(LHS), m_Value(RHS))) ||
3073
541
      !ICmpInst::isEquality(PredA))
3074
411
    return false;
3075
130
  Value *EqualVal = SI->getTrueValue();
3076
130
  Value *UnequalVal = SI->getFalseValue();
3077
  // We still can get non-canonical predicate here, so canonicalize.
3078
130
  if (PredA == ICmpInst::ICMP_NE)
3079
46
    std::swap(EqualVal, UnequalVal);
3080
130
  if (!match(EqualVal, m_ConstantInt(Equal)))
3081
20
    return false;
3082
110
  ICmpInst::Predicate PredB;
3083
110
  Value *LHS2, *RHS2;
3084
110
  if (!match(UnequalVal, m_Select(m_ICmp(PredB, m_Value(LHS2), m_Value(RHS2)),
3085
110
                                  m_ConstantInt(Less), m_ConstantInt(Greater))))
3086
90
    return false;
3087
  // We can get predicate mismatch here, so canonicalize if possible:
3088
  // First, ensure that 'LHS' match.
3089
20
  if (LHS2 != LHS) {
3090
    // x sgt y <--> y slt x
3091
1
    std::swap(LHS2, RHS2);
3092
1
    PredB = ICmpInst::getSwappedPredicate(PredB);
3093
1
  }
3094
20
  if (LHS2 != LHS)
3095
0
    return false;
3096
  // We also need to canonicalize 'RHS'.
3097
20
  if (PredB == ICmpInst::ICMP_SGT && isa<Constant>(RHS2)) {
3098
    // x sgt C-1  <-->  x sge C  <-->  not(x slt C)
3099
4
    auto FlippedStrictness =
3100
4
        InstCombiner::getFlippedStrictnessPredicateAndConstant(
3101
4
            PredB, cast<Constant>(RHS2));
3102
4
    if (!FlippedStrictness)
3103
0
      return false;
3104
4
    assert(FlippedStrictness->first == ICmpInst::ICMP_SGE &&
3105
4
           "basic correctness failure");
3106
0
    RHS2 = FlippedStrictness->second;
3107
    // And kind-of perform the result swap.
3108
4
    std::swap(Less, Greater);
3109
4
    PredB = ICmpInst::ICMP_SLT;
3110
4
  }
3111
20
  return PredB == ICmpInst::ICMP_SLT && RHS == RHS2;
3112
20
}
3113
3114
Instruction *InstCombinerImpl::foldICmpSelectConstant(ICmpInst &Cmp,
3115
                                                      SelectInst *Select,
3116
984
                                                      ConstantInt *C) {
3117
3118
984
  assert(C && "Cmp RHS should be a constant int!");
3119
  // If we're testing a constant value against the result of a three way
3120
  // comparison, the result can be expressed directly in terms of the
3121
  // original values being compared.  Note: We could possibly be more
3122
  // aggressive here and remove the hasOneUse test. The original select is
3123
  // really likely to simplify or sink when we remove a test of the result.
3124
0
  Value *OrigLHS, *OrigRHS;
3125
984
  ConstantInt *C1LessThan, *C2Equal, *C3GreaterThan;
3126
984
  if (Cmp.hasOneUse() &&
3127
984
      matchThreeWayIntCompare(Select, OrigLHS, OrigRHS, C1LessThan, C2Equal,
3128
541
                              C3GreaterThan)) {
3129
20
    assert(C1LessThan && C2Equal && C3GreaterThan);
3130
3131
0
    bool TrueWhenLessThan =
3132
20
        ConstantExpr::getCompare(Cmp.getPredicate(), C1LessThan, C)
3133
20
            ->isAllOnesValue();
3134
20
    bool TrueWhenEqual =
3135
20
        ConstantExpr::getCompare(Cmp.getPredicate(), C2Equal, C)
3136
20
            ->isAllOnesValue();
3137
20
    bool TrueWhenGreaterThan =
3138
20
        ConstantExpr::getCompare(Cmp.getPredicate(), C3GreaterThan, C)
3139
20
            ->isAllOnesValue();
3140
3141
    // This generates the new instruction that will replace the original Cmp
3142
    // Instruction. Instead of enumerating the various combinations when
3143
    // TrueWhenLessThan, TrueWhenEqual and TrueWhenGreaterThan are true versus
3144
    // false, we rely on chaining of ORs and future passes of InstCombine to
3145
    // simplify the OR further (i.e. a s< b || a == b becomes a s<= b).
3146
3147
    // When none of the three constants satisfy the predicate for the RHS (C),
3148
    // the entire original Cmp can be simplified to a false.
3149
20
    Value *Cond = Builder.getFalse();
3150
20
    if (TrueWhenLessThan)
3151
0
      Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SLT,
3152
0
                                                       OrigLHS, OrigRHS));
3153
20
    if (TrueWhenEqual)
3154
0
      Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_EQ,
3155
0
                                                       OrigLHS, OrigRHS));
3156
20
    if (TrueWhenGreaterThan)
3157
20
      Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SGT,
3158
20
                                                       OrigLHS, OrigRHS));
3159
3160
20
    return replaceInstUsesWith(Cmp, Cond);
3161
20
  }
3162
964
  return nullptr;
3163
984
}
3164
3165
77.4k
Instruction *InstCombinerImpl::foldICmpBitCast(ICmpInst &Cmp) {
3166
77.4k
  auto *Bitcast = dyn_cast<BitCastInst>(Cmp.getOperand(0));
3167
77.4k
  if (!Bitcast)
3168
76.3k
    return nullptr;
3169
3170
1.13k
  ICmpInst::Predicate Pred = Cmp.getPredicate();
3171
1.13k
  Value *Op1 = Cmp.getOperand(1);
3172
1.13k
  Value *BCSrcOp = Bitcast->getOperand(0);
3173
1.13k
  Type *SrcType = Bitcast->getSrcTy();
3174
1.13k
  Type *DstType = Bitcast->getType();
3175
3176
  // Make sure the bitcast doesn't change between scalar and vector and
3177
  // doesn't change the number of vector elements.
3178
1.13k
  if (SrcType->isVectorTy() == DstType->isVectorTy() &&
3179
1.13k
      SrcType->getScalarSizeInBits() == DstType->getScalarSizeInBits()) {
3180
    // Zero-equality and sign-bit checks are preserved through sitofp + bitcast.
3181
620
    Value *X;
3182
620
    if (match(BCSrcOp, m_SIToFP(m_Value(X)))) {
3183
      // icmp  eq (bitcast (sitofp X)), 0 --> icmp  eq X, 0
3184
      // icmp  ne (bitcast (sitofp X)), 0 --> icmp  ne X, 0
3185
      // icmp slt (bitcast (sitofp X)), 0 --> icmp slt X, 0
3186
      // icmp sgt (bitcast (sitofp X)), 0 --> icmp sgt X, 0
3187
181
      if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_SLT ||
3188
181
           Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT) &&
3189
181
          match(Op1, m_Zero()))
3190
123
        return new ICmpInst(Pred, X, ConstantInt::getNullValue(X->getType()));
3191
3192
      // icmp slt (bitcast (sitofp X)), 1 --> icmp slt X, 1
3193
58
      if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_One()))
3194
30
        return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), 1));
3195
3196
      // icmp sgt (bitcast (sitofp X)), -1 --> icmp sgt X, -1
3197
28
      if (Pred == ICmpInst::ICMP_SGT && match(Op1, m_AllOnes()))
3198
28
        return new ICmpInst(Pred, X,
3199
28
                            ConstantInt::getAllOnesValue(X->getType()));
3200
28
    }
3201
3202
    // Zero-equality checks are preserved through unsigned floating-point casts:
3203
    // icmp eq (bitcast (uitofp X)), 0 --> icmp eq X, 0
3204
    // icmp ne (bitcast (uitofp X)), 0 --> icmp ne X, 0
3205
439
    if (match(BCSrcOp, m_UIToFP(m_Value(X))))
3206
71
      if (Cmp.isEquality() && match(Op1, m_Zero()))
3207
52
        return new ICmpInst(Pred, X, ConstantInt::getNullValue(X->getType()));
3208
3209
    // If this is a sign-bit test of a bitcast of a casted FP value, eliminate
3210
    // the FP extend/truncate because that cast does not change the sign-bit.
3211
    // This is true for all standard IEEE-754 types and the X86 80-bit type.
3212
    // The sign-bit is always the most significant bit in those types.
3213
387
    const APInt *C;
3214
387
    bool TrueIfSigned;
3215
387
    if (match(Op1, m_APInt(C)) && Bitcast->hasOneUse() &&
3216
387
        isSignBitCheck(Pred, *C, TrueIfSigned)) {
3217
143
      if (match(BCSrcOp, m_FPExt(m_Value(X))) ||
3218
143
          match(BCSrcOp, m_FPTrunc(m_Value(X)))) {
3219
        // (bitcast (fpext/fptrunc X)) to iX) < 0 --> (bitcast X to iY) < 0
3220
        // (bitcast (fpext/fptrunc X)) to iX) > -1 --> (bitcast X to iY) > -1
3221
23
        Type *XType = X->getType();
3222
3223
        // We can't currently handle Power style floating point operations here.
3224
23
        if (!(XType->isPPC_FP128Ty() || SrcType->isPPC_FP128Ty())) {
3225
21
          Type *NewType = Builder.getIntNTy(XType->getScalarSizeInBits());
3226
21
          if (auto *XVTy = dyn_cast<VectorType>(XType))
3227
19
            NewType = VectorType::get(NewType, XVTy->getElementCount());
3228
21
          Value *NewBitcast = Builder.CreateBitCast(X, NewType);
3229
21
          if (TrueIfSigned)
3230
20
            return new ICmpInst(ICmpInst::ICMP_SLT, NewBitcast,
3231
20
                                ConstantInt::getNullValue(NewType));
3232
1
          else
3233
1
            return new ICmpInst(ICmpInst::ICMP_SGT, NewBitcast,
3234
1
                                ConstantInt::getAllOnesValue(NewType));
3235
21
        }
3236
23
      }
3237
143
    }
3238
387
  }
3239
3240
881
  const APInt *C;
3241
881
  if (!match(Cmp.getOperand(1), m_APInt(C)) || !DstType->isIntegerTy() ||
3242
881
      !SrcType->isIntOrIntVectorTy())
3243
465
    return nullptr;
3244
3245
  // If this is checking if all elements of a vector compare are set or not,
3246
  // invert the casted vector equality compare and test if all compare
3247
  // elements are clear or not. Compare against zero is generally easier for
3248
  // analysis and codegen.
3249
  // icmp eq/ne (bitcast (not X) to iN), -1 --> icmp eq/ne (bitcast X to iN), 0
3250
  // Example: are all elements equal? --> are zero elements not equal?
3251
  // TODO: Try harder to reduce compare of 2 freely invertible operands?
3252
416
  if (Cmp.isEquality() && C->isAllOnes() && Bitcast->hasOneUse()) {
3253
173
    if (Value *NotBCSrcOp =
3254
173
            getFreelyInverted(BCSrcOp, BCSrcOp->hasOneUse(), &Builder)) {
3255
51
      Value *Cast = Builder.CreateBitCast(NotBCSrcOp, DstType);
3256
51
      return new ICmpInst(Pred, Cast, ConstantInt::getNullValue(DstType));
3257
51
    }
3258
173
  }
3259
3260
  // If this is checking if all elements of an extended vector are clear or not,
3261
  // compare in a narrow type to eliminate the extend:
3262
  // icmp eq/ne (bitcast (ext X) to iN), 0 --> icmp eq/ne (bitcast X to iM), 0
3263
365
  Value *X;
3264
365
  if (Cmp.isEquality() && C->isZero() && Bitcast->hasOneUse() &&
3265
365
      match(BCSrcOp, m_ZExtOrSExt(m_Value(X)))) {
3266
0
    if (auto *VecTy = dyn_cast<FixedVectorType>(X->getType())) {
3267
0
      Type *NewType = Builder.getIntNTy(VecTy->getPrimitiveSizeInBits());
3268
0
      Value *NewCast = Builder.CreateBitCast(X, NewType);
3269
0
      return new ICmpInst(Pred, NewCast, ConstantInt::getNullValue(NewType));
3270
0
    }
3271
0
  }
3272
3273
  // Folding: icmp <pred> iN X, C
3274
  //  where X = bitcast <M x iK> (shufflevector <M x iK> %vec, undef, SC)) to iN
3275
  //    and C is a splat of a K-bit pattern
3276
  //    and SC is a constant vector = <C', C', C', ..., C'>
3277
  // Into:
3278
  //   %E = extractelement <M x iK> %vec, i32 C'
3279
  //   icmp <pred> iK %E, trunc(C)
3280
365
  Value *Vec;
3281
365
  ArrayRef<int> Mask;
3282
365
  if (match(BCSrcOp, m_Shuffle(m_Value(Vec), m_Undef(), m_Mask(Mask)))) {
3283
    // Check whether every element of Mask is the same constant
3284
74
    if (all_equal(Mask)) {
3285
32
      auto *VecTy = cast<VectorType>(SrcType);
3286
32
      auto *EltTy = cast<IntegerType>(VecTy->getElementType());
3287
32
      if (C->isSplat(EltTy->getBitWidth())) {
3288
        // Fold the icmp based on the value of C
3289
        // If C is M copies of an iK sized bit pattern,
3290
        // then:
3291
        //   =>  %E = extractelement <N x iK> %vec, i32 Elem
3292
        //       icmp <pred> iK %SplatVal, <pattern>
3293
19
        Value *Elem = Builder.getInt32(Mask[0]);
3294
19
        Value *Extract = Builder.CreateExtractElement(Vec, Elem);
3295
19
        Value *NewC = ConstantInt::get(EltTy, C->trunc(EltTy->getBitWidth()));
3296
19
        return new ICmpInst(Pred, Extract, NewC);
3297
19
      }
3298
32
    }
3299
74
  }
3300
346
  return nullptr;
3301
365
}
3302
3303
/// Try to fold integer comparisons with a constant operand: icmp Pred X, C
3304
/// where X is some kind of instruction.
3305
81.6k
Instruction *InstCombinerImpl::foldICmpInstWithConstant(ICmpInst &Cmp) {
3306
81.6k
  const APInt *C;
3307
3308
81.6k
  if (match(Cmp.getOperand(1), m_APInt(C))) {
3309
50.3k
    if (auto *BO = dyn_cast<BinaryOperator>(Cmp.getOperand(0)))
3310
18.0k
      if (Instruction *I = foldICmpBinOpWithConstant(Cmp, BO, *C))
3311
2.69k
        return I;
3312
3313
47.6k
    if (auto *SI = dyn_cast<SelectInst>(Cmp.getOperand(0)))
3314
      // For now, we only support constant integers while folding the
3315
      // ICMP(SELECT)) pattern. We can extend this to support vector of integers
3316
      // similar to the cases handled by binary ops above.
3317
999
      if (auto *ConstRHS = dyn_cast<ConstantInt>(Cmp.getOperand(1)))
3318
984
        if (Instruction *I = foldICmpSelectConstant(Cmp, SI, ConstRHS))
3319
20
          return I;
3320
3321
47.6k
    if (auto *TI = dyn_cast<TruncInst>(Cmp.getOperand(0)))
3322
739
      if (Instruction *I = foldICmpTruncConstant(Cmp, TI, *C))
3323
325
        return I;
3324
3325
47.3k
    if (auto *II = dyn_cast<IntrinsicInst>(Cmp.getOperand(0)))
3326
1.32k
      if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, II, *C))
3327
239
        return I;
3328
3329
    // (extractval ([s/u]subo X, Y), 0) == 0 --> X == Y
3330
    // (extractval ([s/u]subo X, Y), 0) != 0 --> X != Y
3331
    // TODO: This checks one-use, but that is not strictly necessary.
3332
47.0k
    Value *Cmp0 = Cmp.getOperand(0);
3333
47.0k
    Value *X, *Y;
3334
47.0k
    if (C->isZero() && Cmp.isEquality() && Cmp0->hasOneUse() &&
3335
47.0k
        (match(Cmp0,
3336
7.79k
               m_ExtractValue<0>(m_Intrinsic<Intrinsic::ssub_with_overflow>(
3337
7.79k
                   m_Value(X), m_Value(Y)))) ||
3338
7.79k
         match(Cmp0,
3339
7.79k
               m_ExtractValue<0>(m_Intrinsic<Intrinsic::usub_with_overflow>(
3340
7.79k
                   m_Value(X), m_Value(Y))))))
3341
0
      return new ICmpInst(Cmp.getPredicate(), X, Y);
3342
47.0k
  }
3343
3344
78.4k
  if (match(Cmp.getOperand(1), m_APIntAllowUndef(C)))
3345
47.3k
    return foldICmpInstWithConstantAllowUndef(Cmp, *C);
3346
3347
31.0k
  return nullptr;
3348
78.4k
}
3349
3350
/// Fold an icmp equality instruction with binary operator LHS and constant RHS:
3351
/// icmp eq/ne BO, C.
3352
Instruction *InstCombinerImpl::foldICmpBinOpEqualityWithConstant(
3353
15.5k
    ICmpInst &Cmp, BinaryOperator *BO, const APInt &C) {
3354
  // TODO: Some of these folds could work with arbitrary constants, but this
3355
  // function is limited to scalar and vector splat constants.
3356
15.5k
  if (!Cmp.isEquality())
3357
5.41k
    return nullptr;
3358
3359
10.1k
  ICmpInst::Predicate Pred = Cmp.getPredicate();
3360
10.1k
  bool isICMP_NE = Pred == ICmpInst::ICMP_NE;
3361
10.1k
  Constant *RHS = cast<Constant>(Cmp.getOperand(1));
3362
10.1k
  Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1);
3363
3364
10.1k
  switch (BO->getOpcode()) {
3365
110
  case Instruction::SRem:
3366
    // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one.
3367
110
    if (C.isZero() && BO->hasOneUse()) {
3368
26
      const APInt *BOC;
3369
26
      if (match(BOp1, m_APInt(BOC)) && BOC->sgt(1) && BOC->isPowerOf2()) {
3370
0
        Value *NewRem = Builder.CreateURem(BOp0, BOp1, BO->getName());
3371
0
        return new ICmpInst(Pred, NewRem,
3372
0
                            Constant::getNullValue(BO->getType()));
3373
0
      }
3374
26
    }
3375
110
    break;
3376
1.22k
  case Instruction::Add: {
3377
    // (A + C2) == C --> A == (C - C2)
3378
    // (A + C2) != C --> A != (C - C2)
3379
    // TODO: Remove the one-use limitation? See discussion in D58633.
3380
1.22k
    if (Constant *C2 = dyn_cast<Constant>(BOp1)) {
3381
409
      if (BO->hasOneUse())
3382
86
        return new ICmpInst(Pred, BOp0, ConstantExpr::getSub(RHS, C2));
3383
814
    } else if (C.isZero()) {
3384
      // Replace ((add A, B) != 0) with (A != -B) if A or B is
3385
      // efficiently invertible, or if the add has just this one use.
3386
569
      if (Value *NegVal = dyn_castNegVal(BOp1))
3387
0
        return new ICmpInst(Pred, BOp0, NegVal);
3388
569
      if (Value *NegVal = dyn_castNegVal(BOp0))
3389
0
        return new ICmpInst(Pred, NegVal, BOp1);
3390
569
      if (BO->hasOneUse()) {
3391
19
        Value *Neg = Builder.CreateNeg(BOp1);
3392
19
        Neg->takeName(BO);
3393
19
        return new ICmpInst(Pred, BOp0, Neg);
3394
19
      }
3395
569
    }
3396
1.11k
    break;
3397
1.22k
  }
3398
1.11k
  case Instruction::Xor:
3399
323
    if (BO->hasOneUse()) {
3400
122
      if (Constant *BOC = dyn_cast<Constant>(BOp1)) {
3401
        // For the xor case, we can xor two constants together, eliminating
3402
        // the explicit xor.
3403
50
        return new ICmpInst(Pred, BOp0, ConstantExpr::getXor(RHS, BOC));
3404
72
      } else if (C.isZero()) {
3405
        // Replace ((xor A, B) != 0) with (A != B)
3406
14
        return new ICmpInst(Pred, BOp0, BOp1);
3407
14
      }
3408
122
    }
3409
259
    break;
3410
259
  case Instruction::Or: {
3411
163
    const APInt *BOC;
3412
163
    if (match(BOp1, m_APInt(BOC)) && BO->hasOneUse() && RHS->isAllOnesValue()) {
3413
      // Comparing if all bits outside of a constant mask are set?
3414
      // Replace (X | C) == -1 with (X & ~C) == ~C.
3415
      // This removes the -1 constant.
3416
0
      Constant *NotBOC = ConstantExpr::getNot(cast<Constant>(BOp1));
3417
0
      Value *And = Builder.CreateAnd(BOp0, NotBOC);
3418
0
      return new ICmpInst(Pred, And, NotBOC);
3419
0
    }
3420
163
    break;
3421
163
  }
3422
163
  case Instruction::UDiv:
3423
186
  case Instruction::SDiv:
3424
186
    if (BO->isExact()) {
3425
      // div exact X, Y eq/ne 0 -> X eq/ne 0
3426
      // div exact X, Y eq/ne 1 -> X eq/ne Y
3427
      // div exact X, Y eq/ne C ->
3428
      //    if Y * C never-overflow && OneUse:
3429
      //      -> Y * C eq/ne X
3430
9
      if (C.isZero())
3431
0
        return new ICmpInst(Pred, BOp0, Constant::getNullValue(BO->getType()));
3432
9
      else if (C.isOne())
3433
0
        return new ICmpInst(Pred, BOp0, BOp1);
3434
9
      else if (BO->hasOneUse()) {
3435
0
        OverflowResult OR = computeOverflow(
3436
0
            Instruction::Mul, BO->getOpcode() == Instruction::SDiv, BOp1,
3437
0
            Cmp.getOperand(1), BO);
3438
0
        if (OR == OverflowResult::NeverOverflows) {
3439
0
          Value *YC =
3440
0
              Builder.CreateMul(BOp1, ConstantInt::get(BO->getType(), C));
3441
0
          return new ICmpInst(Pred, YC, BOp0);
3442
0
        }
3443
0
      }
3444
9
    }
3445
186
    if (BO->getOpcode() == Instruction::UDiv && C.isZero()) {
3446
      // (icmp eq/ne (udiv A, B), 0) -> (icmp ugt/ule i32 B, A)
3447
56
      auto NewPred = isICMP_NE ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_UGT;
3448
56
      return new ICmpInst(NewPred, BOp1, BOp0);
3449
56
    }
3450
130
    break;
3451
8.14k
  default:
3452
8.14k
    break;
3453
10.1k
  }
3454
9.92k
  return nullptr;
3455
10.1k
}
3456
3457
static Instruction *foldCtpopPow2Test(ICmpInst &I, IntrinsicInst *CtpopLhs,
3458
                                      const APInt &CRhs,
3459
                                      InstCombiner::BuilderTy &Builder,
3460
622
                                      const SimplifyQuery &Q) {
3461
622
  assert(CtpopLhs->getIntrinsicID() == Intrinsic::ctpop &&
3462
622
         "Non-ctpop intrin in ctpop fold");
3463
622
  if (!CtpopLhs->hasOneUse())
3464
57
    return nullptr;
3465
3466
  // Power of 2 test:
3467
  //    isPow2OrZero : ctpop(X) u< 2
3468
  //    isPow2       : ctpop(X) == 1
3469
  //    NotPow2OrZero: ctpop(X) u> 1
3470
  //    NotPow2      : ctpop(X) != 1
3471
  // If we know any bit of X can be folded to:
3472
  //    IsPow2       : X & (~Bit) == 0
3473
  //    NotPow2      : X & (~Bit) != 0
3474
565
  const ICmpInst::Predicate Pred = I.getPredicate();
3475
565
  if (((I.isEquality() || Pred == ICmpInst::ICMP_UGT) && CRhs == 1) ||
3476
565
      (Pred == ICmpInst::ICMP_ULT && CRhs == 2)) {
3477
481
    Value *Op = CtpopLhs->getArgOperand(0);
3478
481
    KnownBits OpKnown = computeKnownBits(Op, Q.DL,
3479
481
                                         /*Depth*/ 0, Q.AC, Q.CxtI, Q.DT);
3480
    // No need to check for count > 1, that should be already constant folded.
3481
481
    if (OpKnown.countMinPopulation() == 1) {
3482
0
      Value *And = Builder.CreateAnd(
3483
0
          Op, Constant::getIntegerValue(Op->getType(), ~(OpKnown.One)));
3484
0
      return new ICmpInst(
3485
0
          (Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_ULT)
3486
0
              ? ICmpInst::ICMP_EQ
3487
0
              : ICmpInst::ICMP_NE,
3488
0
          And, Constant::getNullValue(Op->getType()));
3489
0
    }
3490
481
  }
3491
3492
565
  return nullptr;
3493
565
}
3494
3495
/// Fold an equality icmp with LLVM intrinsic and constant operand.
3496
Instruction *InstCombinerImpl::foldICmpEqIntrinsicWithConstant(
3497
590
    ICmpInst &Cmp, IntrinsicInst *II, const APInt &C) {
3498
590
  Type *Ty = II->getType();
3499
590
  unsigned BitWidth = C.getBitWidth();
3500
590
  const ICmpInst::Predicate Pred = Cmp.getPredicate();
3501
3502
590
  switch (II->getIntrinsicID()) {
3503
35
  case Intrinsic::abs:
3504
    // abs(A) == 0  ->  A == 0
3505
    // abs(A) == INT_MIN  ->  A == INT_MIN
3506
35
    if (C.isZero() || C.isMinSignedValue())
3507
35
      return new ICmpInst(Pred, II->getArgOperand(0), ConstantInt::get(Ty, C));
3508
0
    break;
3509
3510
21
  case Intrinsic::bswap:
3511
    // bswap(A) == C  ->  A == bswap(C)
3512
21
    return new ICmpInst(Pred, II->getArgOperand(0),
3513
21
                        ConstantInt::get(Ty, C.byteSwap()));
3514
3515
6
  case Intrinsic::bitreverse:
3516
    // bitreverse(A) == C  ->  A == bitreverse(C)
3517
6
    return new ICmpInst(Pred, II->getArgOperand(0),
3518
6
                        ConstantInt::get(Ty, C.reverseBits()));
3519
3520
54
  case Intrinsic::ctlz:
3521
111
  case Intrinsic::cttz: {
3522
    // ctz(A) == bitwidth(A)  ->  A == 0 and likewise for !=
3523
111
    if (C == BitWidth)
3524
15
      return new ICmpInst(Pred, II->getArgOperand(0),
3525
15
                          ConstantInt::getNullValue(Ty));
3526
3527
    // ctz(A) == C -> A & Mask1 == Mask2, where Mask2 only has bit C set
3528
    // and Mask1 has bits 0..C+1 set. Similar for ctl, but for high bits.
3529
    // Limit to one use to ensure we don't increase instruction count.
3530
96
    unsigned Num = C.getLimitedValue(BitWidth);
3531
96
    if (Num != BitWidth && II->hasOneUse()) {
3532
79
      bool IsTrailing = II->getIntrinsicID() == Intrinsic::cttz;
3533
79
      APInt Mask1 = IsTrailing ? APInt::getLowBitsSet(BitWidth, Num + 1)
3534
79
                               : APInt::getHighBitsSet(BitWidth, Num + 1);
3535
79
      APInt Mask2 = IsTrailing
3536
79
        ? APInt::getOneBitSet(BitWidth, Num)
3537
79
        : APInt::getOneBitSet(BitWidth, BitWidth - Num - 1);
3538
79
      return new ICmpInst(Pred, Builder.CreateAnd(II->getArgOperand(0), Mask1),
3539
79
                          ConstantInt::get(Ty, Mask2));
3540
79
    }
3541
17
    break;
3542
96
  }
3543
3544
195
  case Intrinsic::ctpop: {
3545
    // popcount(A) == 0  ->  A == 0 and likewise for !=
3546
    // popcount(A) == bitwidth(A)  ->  A == -1 and likewise for !=
3547
195
    bool IsZero = C.isZero();
3548
195
    if (IsZero || C == BitWidth)
3549
17
      return new ICmpInst(Pred, II->getArgOperand(0),
3550
17
                          IsZero ? Constant::getNullValue(Ty)
3551
17
                                 : Constant::getAllOnesValue(Ty));
3552
3553
178
    break;
3554
195
  }
3555
3556
178
  case Intrinsic::fshl:
3557
15
  case Intrinsic::fshr:
3558
15
    if (II->getArgOperand(0) == II->getArgOperand(1)) {
3559
2
      const APInt *RotAmtC;
3560
      // ror(X, RotAmtC) == C --> X == rol(C, RotAmtC)
3561
      // rol(X, RotAmtC) == C --> X == ror(C, RotAmtC)
3562
2
      if (match(II->getArgOperand(2), m_APInt(RotAmtC)))
3563
2
        return new ICmpInst(Pred, II->getArgOperand(0),
3564
2
                            II->getIntrinsicID() == Intrinsic::fshl
3565
2
                                ? ConstantInt::get(Ty, C.rotr(*RotAmtC))
3566
2
                                : ConstantInt::get(Ty, C.rotl(*RotAmtC)));
3567
2
    }
3568
13
    break;
3569
3570
30
  case Intrinsic::umax:
3571
39
  case Intrinsic::uadd_sat: {
3572
    // uadd.sat(a, b) == 0  ->  (a | b) == 0
3573
    // umax(a, b) == 0  ->  (a | b) == 0
3574
39
    if (C.isZero() && II->hasOneUse()) {
3575
7
      Value *Or = Builder.CreateOr(II->getArgOperand(0), II->getArgOperand(1));
3576
7
      return new ICmpInst(Pred, Or, Constant::getNullValue(Ty));
3577
7
    }
3578
32
    break;
3579
39
  }
3580
3581
32
  case Intrinsic::ssub_sat:
3582
    // ssub.sat(a, b) == 0 -> a == b
3583
5
    if (C.isZero())
3584
0
      return new ICmpInst(Pred, II->getArgOperand(0), II->getArgOperand(1));
3585
5
    break;
3586
8
  case Intrinsic::usub_sat: {
3587
    // usub.sat(a, b) == 0  ->  a <= b
3588
8
    if (C.isZero()) {
3589
8
      ICmpInst::Predicate NewPred =
3590
8
          Pred == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_UGT;
3591
8
      return new ICmpInst(NewPred, II->getArgOperand(0), II->getArgOperand(1));
3592
8
    }
3593
0
    break;
3594
8
  }
3595
155
  default:
3596
155
    break;
3597
590
  }
3598
3599
400
  return nullptr;
3600
590
}
3601
3602
/// Fold an icmp with LLVM intrinsics
3603
static Instruction *
3604
foldICmpIntrinsicWithIntrinsic(ICmpInst &Cmp,
3605
34.9k
                               InstCombiner::BuilderTy &Builder) {
3606
34.9k
  assert(Cmp.isEquality());
3607
3608
0
  ICmpInst::Predicate Pred = Cmp.getPredicate();
3609
34.9k
  Value *Op0 = Cmp.getOperand(0);
3610
34.9k
  Value *Op1 = Cmp.getOperand(1);
3611
34.9k
  const auto *IIOp0 = dyn_cast<IntrinsicInst>(Op0);
3612
34.9k
  const auto *IIOp1 = dyn_cast<IntrinsicInst>(Op1);
3613
34.9k
  if (!IIOp0 || !IIOp1 || IIOp0->getIntrinsicID() != IIOp1->getIntrinsicID())
3614
34.9k
    return nullptr;
3615
3616
22
  switch (IIOp0->getIntrinsicID()) {
3617
4
  case Intrinsic::bswap:
3618
8
  case Intrinsic::bitreverse:
3619
    // If both operands are byte-swapped or bit-reversed, just compare the
3620
    // original values.
3621
8
    return new ICmpInst(Pred, IIOp0->getOperand(0), IIOp1->getOperand(0));
3622
0
  case Intrinsic::fshl:
3623
0
  case Intrinsic::fshr: {
3624
    // If both operands are rotated by same amount, just compare the
3625
    // original values.
3626
0
    if (IIOp0->getOperand(0) != IIOp0->getOperand(1))
3627
0
      break;
3628
0
    if (IIOp1->getOperand(0) != IIOp1->getOperand(1))
3629
0
      break;
3630
0
    if (IIOp0->getOperand(2) == IIOp1->getOperand(2))
3631
0
      return new ICmpInst(Pred, IIOp0->getOperand(0), IIOp1->getOperand(0));
3632
3633
    // rotate(X, AmtX) == rotate(Y, AmtY)
3634
    //  -> rotate(X, AmtX - AmtY) == Y
3635
    // Do this if either both rotates have one use or if only one has one use
3636
    // and AmtX/AmtY are constants.
3637
0
    unsigned OneUses = IIOp0->hasOneUse() + IIOp1->hasOneUse();
3638
0
    if (OneUses == 2 ||
3639
0
        (OneUses == 1 && match(IIOp0->getOperand(2), m_ImmConstant()) &&
3640
0
         match(IIOp1->getOperand(2), m_ImmConstant()))) {
3641
0
      Value *SubAmt =
3642
0
          Builder.CreateSub(IIOp0->getOperand(2), IIOp1->getOperand(2));
3643
0
      Value *CombinedRotate = Builder.CreateIntrinsic(
3644
0
          Op0->getType(), IIOp0->getIntrinsicID(),
3645
0
          {IIOp0->getOperand(0), IIOp0->getOperand(0), SubAmt});
3646
0
      return new ICmpInst(Pred, IIOp1->getOperand(0), CombinedRotate);
3647
0
    }
3648
0
  } break;
3649
14
  default:
3650
14
    break;
3651
22
  }
3652
3653
14
  return nullptr;
3654
22
}
3655
3656
/// Try to fold integer comparisons with a constant operand: icmp Pred X, C
3657
/// where X is some kind of instruction and C is AllowUndef.
3658
/// TODO: Move more folds which allow undef to this function.
3659
Instruction *
3660
InstCombinerImpl::foldICmpInstWithConstantAllowUndef(ICmpInst &Cmp,
3661
47.3k
                                                     const APInt &C) {
3662
47.3k
  const ICmpInst::Predicate Pred = Cmp.getPredicate();
3663
47.3k
  if (auto *II = dyn_cast<IntrinsicInst>(Cmp.getOperand(0))) {
3664
1.08k
    switch (II->getIntrinsicID()) {
3665
1.05k
    default:
3666
1.05k
      break;
3667
1.05k
    case Intrinsic::fshl:
3668
26
    case Intrinsic::fshr:
3669
26
      if (Cmp.isEquality() && II->getArgOperand(0) == II->getArgOperand(1)) {
3670
        // (rot X, ?) == 0/-1 --> X == 0/-1
3671
0
        if (C.isZero() || C.isAllOnes())
3672
0
          return new ICmpInst(Pred, II->getArgOperand(0), Cmp.getOperand(1));
3673
0
      }
3674
26
      break;
3675
1.08k
    }
3676
1.08k
  }
3677
3678
47.3k
  return nullptr;
3679
47.3k
}
3680
3681
/// Fold an icmp with BinaryOp and constant operand: icmp Pred BO, C.
3682
Instruction *InstCombinerImpl::foldICmpBinOpWithConstant(ICmpInst &Cmp,
3683
                                                         BinaryOperator *BO,
3684
18.0k
                                                         const APInt &C) {
3685
18.0k
  switch (BO->getOpcode()) {
3686
775
  case Instruction::Xor:
3687
775
    if (Instruction *I = foldICmpXorConstant(Cmp, BO, C))
3688
92
      return I;
3689
683
    break;
3690
7.86k
  case Instruction::And:
3691
7.86k
    if (Instruction *I = foldICmpAndConstant(Cmp, BO, C))
3692
531
      return I;
3693
7.33k
    break;
3694
7.33k
  case Instruction::Or:
3695
393
    if (Instruction *I = foldICmpOrConstant(Cmp, BO, C))
3696
75
      return I;
3697
318
    break;
3698
1.20k
  case Instruction::Mul:
3699
1.20k
    if (Instruction *I = foldICmpMulConstant(Cmp, BO, C))
3700
71
      return I;
3701
1.13k
    break;
3702
1.13k
  case Instruction::Shl:
3703
1.12k
    if (Instruction *I = foldICmpShlConstant(Cmp, BO, C))
3704
508
      return I;
3705
621
    break;
3706
621
  case Instruction::LShr:
3707
736
  case Instruction::AShr:
3708
736
    if (Instruction *I = foldICmpShrConstant(Cmp, BO, C))
3709
288
      return I;
3710
448
    break;
3711
448
  case Instruction::SRem:
3712
195
    if (Instruction *I = foldICmpSRemConstant(Cmp, BO, C))
3713
6
      return I;
3714
189
    break;
3715
265
  case Instruction::UDiv:
3716
265
    if (Instruction *I = foldICmpUDivConstant(Cmp, BO, C))
3717
54
      return I;
3718
265
    [[fallthrough]];
3719
514
  case Instruction::SDiv:
3720
514
    if (Instruction *I = foldICmpDivConstant(Cmp, BO, C))
3721
203
      return I;
3722
311
    break;
3723
1.20k
  case Instruction::Sub:
3724
1.20k
    if (Instruction *I = foldICmpSubConstant(Cmp, BO, C))
3725
379
      return I;
3726
830
    break;
3727
3.84k
  case Instruction::Add:
3728
3.84k
    if (Instruction *I = foldICmpAddConstant(Cmp, BO, C))
3729
259
      return I;
3730
3.59k
    break;
3731
3.59k
  default:
3732
98
    break;
3733
18.0k
  }
3734
3735
  // TODO: These folds could be refactored to be part of the above calls.
3736
15.5k
  return foldICmpBinOpEqualityWithConstant(Cmp, BO, C);
3737
18.0k
}
3738
3739
static Instruction *
3740
foldICmpUSubSatOrUAddSatWithConstant(ICmpInst::Predicate Pred,
3741
                                     SaturatingInst *II, const APInt &C,
3742
18
                                     InstCombiner::BuilderTy &Builder) {
3743
  // This transform may end up producing more than one instruction for the
3744
  // intrinsic, so limit it to one user of the intrinsic.
3745
18
  if (!II->hasOneUse())
3746
6
    return nullptr;
3747
3748
  // Let Y        = [add/sub]_sat(X, C) pred C2
3749
  //     SatVal   = The saturating value for the operation
3750
  //     WillWrap = Whether or not the operation will underflow / overflow
3751
  // => Y = (WillWrap ? SatVal : (X binop C)) pred C2
3752
  // => Y = WillWrap ? (SatVal pred C2) : ((X binop C) pred C2)
3753
  //
3754
  // When (SatVal pred C2) is true, then
3755
  //    Y = WillWrap ? true : ((X binop C) pred C2)
3756
  // => Y = WillWrap || ((X binop C) pred C2)
3757
  // else
3758
  //    Y =  WillWrap ? false : ((X binop C) pred C2)
3759
  // => Y = !WillWrap ?  ((X binop C) pred C2) : false
3760
  // => Y = !WillWrap && ((X binop C) pred C2)
3761
12
  Value *Op0 = II->getOperand(0);
3762
12
  Value *Op1 = II->getOperand(1);
3763
3764
12
  const APInt *COp1;
3765
  // This transform only works when the intrinsic has an integral constant or
3766
  // splat vector as the second operand.
3767
12
  if (!match(Op1, m_APInt(COp1)))
3768
12
    return nullptr;
3769
3770
0
  APInt SatVal;
3771
0
  switch (II->getIntrinsicID()) {
3772
0
  default:
3773
0
    llvm_unreachable(
3774
0
        "This function only works with usub_sat and uadd_sat for now!");
3775
0
  case Intrinsic::uadd_sat:
3776
0
    SatVal = APInt::getAllOnes(C.getBitWidth());
3777
0
    break;
3778
0
  case Intrinsic::usub_sat:
3779
0
    SatVal = APInt::getZero(C.getBitWidth());
3780
0
    break;
3781
0
  }
3782
3783
  // Check (SatVal pred C2)
3784
0
  bool SatValCheck = ICmpInst::compare(SatVal, C, Pred);
3785
3786
  // !WillWrap.
3787
0
  ConstantRange C1 = ConstantRange::makeExactNoWrapRegion(
3788
0
      II->getBinaryOp(), *COp1, II->getNoWrapKind());
3789
3790
  // WillWrap.
3791
0
  if (SatValCheck)
3792
0
    C1 = C1.inverse();
3793
3794
0
  ConstantRange C2 = ConstantRange::makeExactICmpRegion(Pred, C);
3795
0
  if (II->getBinaryOp() == Instruction::Add)
3796
0
    C2 = C2.sub(*COp1);
3797
0
  else
3798
0
    C2 = C2.add(*COp1);
3799
3800
0
  Instruction::BinaryOps CombiningOp =
3801
0
      SatValCheck ? Instruction::BinaryOps::Or : Instruction::BinaryOps::And;
3802
3803
0
  std::optional<ConstantRange> Combination;
3804
0
  if (CombiningOp == Instruction::BinaryOps::Or)
3805
0
    Combination = C1.exactUnionWith(C2);
3806
0
  else /* CombiningOp == Instruction::BinaryOps::And */
3807
0
    Combination = C1.exactIntersectWith(C2);
3808
3809
0
  if (!Combination)
3810
0
    return nullptr;
3811
3812
0
  CmpInst::Predicate EquivPred;
3813
0
  APInt EquivInt;
3814
0
  APInt EquivOffset;
3815
3816
0
  Combination->getEquivalentICmp(EquivPred, EquivInt, EquivOffset);
3817
3818
0
  return new ICmpInst(
3819
0
      EquivPred,
3820
0
      Builder.CreateAdd(Op0, ConstantInt::get(Op1->getType(), EquivOffset)),
3821
0
      ConstantInt::get(Op1->getType(), EquivInt));
3822
0
}
3823
3824
/// Fold an icmp with LLVM intrinsic and constant operand: icmp Pred II, C.
3825
Instruction *InstCombinerImpl::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
3826
                                                             IntrinsicInst *II,
3827
1.32k
                                                             const APInt &C) {
3828
1.32k
  ICmpInst::Predicate Pred = Cmp.getPredicate();
3829
3830
  // Handle folds that apply for any kind of icmp.
3831
1.32k
  switch (II->getIntrinsicID()) {
3832
684
  default:
3833
684
    break;
3834
684
  case Intrinsic::uadd_sat:
3835
18
  case Intrinsic::usub_sat:
3836
18
    if (auto *Folded = foldICmpUSubSatOrUAddSatWithConstant(
3837
18
            Pred, cast<SaturatingInst>(II), C, Builder))
3838
0
      return Folded;
3839
18
    break;
3840
622
  case Intrinsic::ctpop: {
3841
622
    const SimplifyQuery Q = SQ.getWithInstruction(&Cmp);
3842
622
    if (Instruction *R = foldCtpopPow2Test(Cmp, II, C, Builder, Q))
3843
0
      return R;
3844
622
  } break;
3845
1.32k
  }
3846
3847
1.32k
  if (Cmp.isEquality())
3848
590
    return foldICmpEqIntrinsicWithConstant(Cmp, II, C);
3849
3850
734
  Type *Ty = II->getType();
3851
734
  unsigned BitWidth = C.getBitWidth();
3852
734
  switch (II->getIntrinsicID()) {
3853
427
  case Intrinsic::ctpop: {
3854
    // (ctpop X > BitWidth - 1) --> X == -1
3855
427
    Value *X = II->getArgOperand(0);
3856
427
    if (C == BitWidth - 1 && Pred == ICmpInst::ICMP_UGT)
3857
5
      return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_EQ, X,
3858
5
                             ConstantInt::getAllOnesValue(Ty));
3859
    // (ctpop X < BitWidth) --> X != -1
3860
422
    if (C == BitWidth && Pred == ICmpInst::ICMP_ULT)
3861
0
      return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_NE, X,
3862
0
                             ConstantInt::getAllOnesValue(Ty));
3863
422
    break;
3864
422
  }
3865
422
  case Intrinsic::ctlz: {
3866
    // ctlz(0bXXXXXXXX) > 3 -> 0bXXXXXXXX < 0b00010000
3867
20
    if (Pred == ICmpInst::ICMP_UGT && C.ult(BitWidth)) {
3868
15
      unsigned Num = C.getLimitedValue();
3869
15
      APInt Limit = APInt::getOneBitSet(BitWidth, BitWidth - Num - 1);
3870
15
      return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_ULT,
3871
15
                             II->getArgOperand(0), ConstantInt::get(Ty, Limit));
3872
15
    }
3873
3874
    // ctlz(0bXXXXXXXX) < 3 -> 0bXXXXXXXX > 0b00011111
3875
5
    if (Pred == ICmpInst::ICMP_ULT && C.uge(1) && C.ule(BitWidth)) {
3876
5
      unsigned Num = C.getLimitedValue();
3877
5
      APInt Limit = APInt::getLowBitsSet(BitWidth, BitWidth - Num);
3878
5
      return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_UGT,
3879
5
                             II->getArgOperand(0), ConstantInt::get(Ty, Limit));
3880
5
    }
3881
0
    break;
3882
5
  }
3883
38
  case Intrinsic::cttz: {
3884
    // Limit to one use to ensure we don't increase instruction count.
3885
38
    if (!II->hasOneUse())
3886
14
      return nullptr;
3887
3888
    // cttz(0bXXXXXXXX) > 3 -> 0bXXXXXXXX & 0b00001111 == 0
3889
24
    if (Pred == ICmpInst::ICMP_UGT && C.ult(BitWidth)) {
3890
15
      APInt Mask = APInt::getLowBitsSet(BitWidth, C.getLimitedValue() + 1);
3891
15
      return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_EQ,
3892
15
                             Builder.CreateAnd(II->getArgOperand(0), Mask),
3893
15
                             ConstantInt::getNullValue(Ty));
3894
15
    }
3895
3896
    // cttz(0bXXXXXXXX) < 3 -> 0bXXXXXXXX & 0b00000111 != 0
3897
9
    if (Pred == ICmpInst::ICMP_ULT && C.uge(1) && C.ule(BitWidth)) {
3898
9
      APInt Mask = APInt::getLowBitsSet(BitWidth, C.getLimitedValue());
3899
9
      return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_NE,
3900
9
                             Builder.CreateAnd(II->getArgOperand(0), Mask),
3901
9
                             ConstantInt::getNullValue(Ty));
3902
9
    }
3903
0
    break;
3904
9
  }
3905
0
  case Intrinsic::ssub_sat:
3906
    // ssub.sat(a, b) spred 0 -> a spred b
3907
0
    if (ICmpInst::isSigned(Pred)) {
3908
0
      if (C.isZero())
3909
0
        return new ICmpInst(Pred, II->getArgOperand(0), II->getArgOperand(1));
3910
      // X s<= 0 is cannonicalized to X s< 1
3911
0
      if (Pred == ICmpInst::ICMP_SLT && C.isOne())
3912
0
        return new ICmpInst(ICmpInst::ICMP_SLE, II->getArgOperand(0),
3913
0
                            II->getArgOperand(1));
3914
      // X s>= 0 is cannonicalized to X s> -1
3915
0
      if (Pred == ICmpInst::ICMP_SGT && C.isAllOnes())
3916
0
        return new ICmpInst(ICmpInst::ICMP_SGE, II->getArgOperand(0),
3917
0
                            II->getArgOperand(1));
3918
0
    }
3919
0
    break;
3920
249
  default:
3921
249
    break;
3922
734
  }
3923
3924
671
  return nullptr;
3925
734
}
3926
3927
/// Handle icmp with constant (but not simple integer constant) RHS.
3928
78.4k
Instruction *InstCombinerImpl::foldICmpInstWithConstantNotInt(ICmpInst &I) {
3929
78.4k
  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
3930
78.4k
  Constant *RHSC = dyn_cast<Constant>(Op1);
3931
78.4k
  Instruction *LHSI = dyn_cast<Instruction>(Op0);
3932
78.4k
  if (!RHSC || !LHSI)
3933
47.4k
    return nullptr;
3934
3935
30.9k
  switch (LHSI->getOpcode()) {
3936
336
  case Instruction::PHI:
3937
336
    if (Instruction *NV = foldOpIntoPhi(I, cast<PHINode>(LHSI)))
3938
4
      return NV;
3939
332
    break;
3940
332
  case Instruction::IntToPtr:
3941
    // icmp pred inttoptr(X), null -> icmp pred X, 0
3942
19
    if (RHSC->isNullValue() &&
3943
19
        DL.getIntPtrType(RHSC->getType()) == LHSI->getOperand(0)->getType())
3944
19
      return new ICmpInst(
3945
19
          I.getPredicate(), LHSI->getOperand(0),
3946
19
          Constant::getNullValue(LHSI->getOperand(0)->getType()));
3947
0
    break;
3948
3949
8.37k
  case Instruction::Load:
3950
    // Try to optimize things like "A[i] > 4" to index computations.
3951
8.37k
    if (GetElementPtrInst *GEP =
3952
8.37k
            dyn_cast<GetElementPtrInst>(LHSI->getOperand(0)))
3953
554
      if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
3954
161
        if (Instruction *Res =
3955
161
                foldCmpLoadFromIndexedGlobal(cast<LoadInst>(LHSI), GEP, GV, I))
3956
54
          return Res;
3957
8.32k
    break;
3958
30.9k
  }
3959
3960
30.8k
  return nullptr;
3961
30.9k
}
3962
3963
Instruction *InstCombinerImpl::foldSelectICmp(ICmpInst::Predicate Pred,
3964
                                              SelectInst *SI, Value *RHS,
3965
2.16k
                                              const ICmpInst &I) {
3966
  // Try to fold the comparison into the select arms, which will cause the
3967
  // select to be converted into a logical and/or.
3968
4.32k
  auto SimplifyOp = [&](Value *Op, bool SelectCondIsTrue) -> Value * {
3969
4.32k
    if (Value *Res = simplifyICmpInst(Pred, Op, RHS, SQ))
3970
1.48k
      return Res;
3971
2.83k
    if (std::optional<bool> Impl = isImpliedCondition(
3972
2.83k
            SI->getCondition(), Pred, Op, RHS, DL, SelectCondIsTrue))
3973
42
      return ConstantInt::get(I.getType(), *Impl);
3974
2.79k
    return nullptr;
3975
2.83k
  };
3976
3977
2.16k
  ConstantInt *CI = nullptr;
3978
2.16k
  Value *Op1 = SimplifyOp(SI->getOperand(1), true);
3979
2.16k
  if (Op1)
3980
551
    CI = dyn_cast<ConstantInt>(Op1);
3981
3982
2.16k
  Value *Op2 = SimplifyOp(SI->getOperand(2), false);
3983
2.16k
  if (Op2)
3984
979
    CI = dyn_cast<ConstantInt>(Op2);
3985
3986
  // We only want to perform this transformation if it will not lead to
3987
  // additional code. This is true if either both sides of the select
3988
  // fold to a constant (in which case the icmp is replaced with a select
3989
  // which will usually simplify) or this is the only user of the
3990
  // select (in which case we are trading a select+icmp for a simpler
3991
  // select+icmp) or all uses of the select can be replaced based on
3992
  // dominance information ("Global cases").
3993
2.16k
  bool Transform = false;
3994
2.16k
  if (Op1 && Op2)
3995
65
    Transform = true;
3996
2.09k
  else if (Op1 || Op2) {
3997
    // Local case
3998
1.40k
    if (SI->hasOneUse())
3999
236
      Transform = true;
4000
    // Global cases
4001
1.16k
    else if (CI && !CI->isZero())
4002
      // When Op1 is constant try replacing select with second operand.
4003
      // Otherwise Op2 is constant and try replacing select with first
4004
      // operand.
4005
634
      Transform = replacedSelectWithOperand(SI, &I, Op1 ? 2 : 1);
4006
1.40k
  }
4007
2.16k
  if (Transform) {
4008
341
    if (!Op1)
4009
188
      Op1 = Builder.CreateICmp(Pred, SI->getOperand(1), RHS, I.getName());
4010
341
    if (!Op2)
4011
88
      Op2 = Builder.CreateICmp(Pred, SI->getOperand(2), RHS, I.getName());
4012
341
    return SelectInst::Create(SI->getOperand(0), Op1, Op2);
4013
341
  }
4014
4015
1.82k
  return nullptr;
4016
2.16k
}
4017
4018
/// Some comparisons can be simplified.
4019
/// In this case, we are looking for comparisons that look like
4020
/// a check for a lossy truncation.
4021
/// Folds:
4022
///   icmp SrcPred (x & Mask), x    to    icmp DstPred x, Mask
4023
/// Where Mask is some pattern that produces all-ones in low bits:
4024
///    (-1 >> y)
4025
///    ((-1 << y) >> y)     <- non-canonical, has extra uses
4026
///   ~(-1 << y)
4027
///    ((1 << y) + (-1))    <- non-canonical, has extra uses
4028
/// The Mask can be a constant, too.
4029
/// For some predicates, the operands are commutative.
4030
/// For others, x can only be on a specific side.
4031
static Value *foldICmpWithLowBitMaskedVal(ICmpInst &I,
4032
29.9k
                                          InstCombiner::BuilderTy &Builder) {
4033
29.9k
  ICmpInst::Predicate SrcPred;
4034
29.9k
  Value *X, *M, *Y;
4035
29.9k
  auto m_VariableMask = m_CombineOr(
4036
29.9k
      m_CombineOr(m_Not(m_Shl(m_AllOnes(), m_Value())),
4037
29.9k
                  m_Add(m_Shl(m_One(), m_Value()), m_AllOnes())),
4038
29.9k
      m_CombineOr(m_LShr(m_AllOnes(), m_Value()),
4039
29.9k
                  m_LShr(m_Shl(m_AllOnes(), m_Value(Y)), m_Deferred(Y))));
4040
29.9k
  auto m_Mask = m_CombineOr(m_VariableMask, m_LowBitMask());
4041
29.9k
  if (!match(&I, m_c_ICmp(SrcPred,
4042
29.9k
                          m_c_And(m_CombineAnd(m_Mask, m_Value(M)), m_Value(X)),
4043
29.9k
                          m_Deferred(X))))
4044
29.7k
    return nullptr;
4045
4046
187
  ICmpInst::Predicate DstPred;
4047
187
  switch (SrcPred) {
4048
33
  case ICmpInst::Predicate::ICMP_EQ:
4049
    //  x & (-1 >> y) == x    ->    x u<= (-1 >> y)
4050
33
    DstPred = ICmpInst::Predicate::ICMP_ULE;
4051
33
    break;
4052
52
  case ICmpInst::Predicate::ICMP_NE:
4053
    //  x & (-1 >> y) != x    ->    x u> (-1 >> y)
4054
52
    DstPred = ICmpInst::Predicate::ICMP_UGT;
4055
52
    break;
4056
8
  case ICmpInst::Predicate::ICMP_ULT:
4057
    //  x & (-1 >> y) u< x    ->    x u> (-1 >> y)
4058
    //  x u> x & (-1 >> y)    ->    x u> (-1 >> y)
4059
8
    DstPred = ICmpInst::Predicate::ICMP_UGT;
4060
8
    break;
4061
19
  case ICmpInst::Predicate::ICMP_UGE:
4062
    //  x & (-1 >> y) u>= x    ->    x u<= (-1 >> y)
4063
    //  x u<= x & (-1 >> y)    ->    x u<= (-1 >> y)
4064
19
    DstPred = ICmpInst::Predicate::ICMP_ULE;
4065
19
    break;
4066
17
  case ICmpInst::Predicate::ICMP_SLT:
4067
    //  x & (-1 >> y) s< x    ->    x s> (-1 >> y)
4068
    //  x s> x & (-1 >> y)    ->    x s> (-1 >> y)
4069
17
    if (!match(M, m_Constant())) // Can not do this fold with non-constant.
4070
0
      return nullptr;
4071
17
    if (!match(M, m_NonNegative())) // Must not have any -1 vector elements.
4072
0
      return nullptr;
4073
17
    DstPred = ICmpInst::Predicate::ICMP_SGT;
4074
17
    break;
4075
10
  case ICmpInst::Predicate::ICMP_SGE:
4076
    //  x & (-1 >> y) s>= x    ->    x s<= (-1 >> y)
4077
    //  x s<= x & (-1 >> y)    ->    x s<= (-1 >> y)
4078
10
    if (!match(M, m_Constant())) // Can not do this fold with non-constant.
4079
0
      return nullptr;
4080
10
    if (!match(M, m_NonNegative())) // Must not have any -1 vector elements.
4081
1
      return nullptr;
4082
9
    DstPred = ICmpInst::Predicate::ICMP_SLE;
4083
9
    break;
4084
19
  case ICmpInst::Predicate::ICMP_SGT:
4085
48
  case ICmpInst::Predicate::ICMP_SLE:
4086
48
    return nullptr;
4087
0
  case ICmpInst::Predicate::ICMP_UGT:
4088
0
  case ICmpInst::Predicate::ICMP_ULE:
4089
0
    llvm_unreachable("Instsimplify took care of commut. variant");
4090
0
    break;
4091
0
  default:
4092
0
    llvm_unreachable("All possible folds are handled.");
4093
187
  }
4094
4095
  // The mask value may be a vector constant that has undefined elements. But it
4096
  // may not be safe to propagate those undefs into the new compare, so replace
4097
  // those elements by copying an existing, defined, and safe scalar constant.
4098
138
  Type *OpTy = M->getType();
4099
138
  auto *VecC = dyn_cast<Constant>(M);
4100
138
  auto *OpVTy = dyn_cast<FixedVectorType>(OpTy);
4101
138
  if (OpVTy && VecC && VecC->containsUndefOrPoisonElement()) {
4102
11
    Constant *SafeReplacementConstant = nullptr;
4103
11
    for (unsigned i = 0, e = OpVTy->getNumElements(); i != e; ++i) {
4104
11
      if (!isa<UndefValue>(VecC->getAggregateElement(i))) {
4105
11
        SafeReplacementConstant = VecC->getAggregateElement(i);
4106
11
        break;
4107
11
      }
4108
11
    }
4109
11
    assert(SafeReplacementConstant && "Failed to find undef replacement");
4110
0
    M = Constant::replaceUndefsWith(VecC, SafeReplacementConstant);
4111
11
  }
4112
4113
0
  return Builder.CreateICmp(DstPred, X, M);
4114
187
}
4115
4116
/// Some comparisons can be simplified.
4117
/// In this case, we are looking for comparisons that look like
4118
/// a check for a lossy signed truncation.
4119
/// Folds:   (MaskedBits is a constant.)
4120
///   ((%x << MaskedBits) a>> MaskedBits) SrcPred %x
4121
/// Into:
4122
///   (add %x, (1 << (KeptBits-1))) DstPred (1 << KeptBits)
4123
/// Where  KeptBits = bitwidth(%x) - MaskedBits
4124
static Value *
4125
foldICmpWithTruncSignExtendedVal(ICmpInst &I,
4126
29.7k
                                 InstCombiner::BuilderTy &Builder) {
4127
29.7k
  ICmpInst::Predicate SrcPred;
4128
29.7k
  Value *X;
4129
29.7k
  const APInt *C0, *C1; // FIXME: non-splats, potentially with undef.
4130
  // We are ok with 'shl' having multiple uses, but 'ashr' must be one-use.
4131
29.7k
  if (!match(&I, m_c_ICmp(SrcPred,
4132
29.7k
                          m_OneUse(m_AShr(m_Shl(m_Value(X), m_APInt(C0)),
4133
29.7k
                                          m_APInt(C1))),
4134
29.7k
                          m_Deferred(X))))
4135
29.7k
    return nullptr;
4136
4137
  // Potential handling of non-splats: for each element:
4138
  //  * if both are undef, replace with constant 0.
4139
  //    Because (1<<0) is OK and is 1, and ((1<<0)>>1) is also OK and is 0.
4140
  //  * if both are not undef, and are different, bailout.
4141
  //  * else, only one is undef, then pick the non-undef one.
4142
4143
  // The shift amount must be equal.
4144
48
  if (*C0 != *C1)
4145
5
    return nullptr;
4146
43
  const APInt &MaskedBits = *C0;
4147
43
  assert(MaskedBits != 0 && "shift by zero should be folded away already.");
4148
4149
0
  ICmpInst::Predicate DstPred;
4150
43
  switch (SrcPred) {
4151
39
  case ICmpInst::Predicate::ICMP_EQ:
4152
    // ((%x << MaskedBits) a>> MaskedBits) == %x
4153
    //   =>
4154
    // (add %x, (1 << (KeptBits-1))) u< (1 << KeptBits)
4155
39
    DstPred = ICmpInst::Predicate::ICMP_ULT;
4156
39
    break;
4157
0
  case ICmpInst::Predicate::ICMP_NE:
4158
    // ((%x << MaskedBits) a>> MaskedBits) != %x
4159
    //   =>
4160
    // (add %x, (1 << (KeptBits-1))) u>= (1 << KeptBits)
4161
0
    DstPred = ICmpInst::Predicate::ICMP_UGE;
4162
0
    break;
4163
  // FIXME: are more folds possible?
4164
4
  default:
4165
4
    return nullptr;
4166
43
  }
4167
4168
39
  auto *XType = X->getType();
4169
39
  const unsigned XBitWidth = XType->getScalarSizeInBits();
4170
39
  const APInt BitWidth = APInt(XBitWidth, XBitWidth);
4171
39
  assert(BitWidth.ugt(MaskedBits) && "shifts should leave some bits untouched");
4172
4173
  // KeptBits = bitwidth(%x) - MaskedBits
4174
0
  const APInt KeptBits = BitWidth - MaskedBits;
4175
39
  assert(KeptBits.ugt(0) && KeptBits.ult(BitWidth) && "unreachable");
4176
  // ICmpCst = (1 << KeptBits)
4177
0
  const APInt ICmpCst = APInt(XBitWidth, 1).shl(KeptBits);
4178
39
  assert(ICmpCst.isPowerOf2());
4179
  // AddCst = (1 << (KeptBits-1))
4180
0
  const APInt AddCst = ICmpCst.lshr(1);
4181
39
  assert(AddCst.ult(ICmpCst) && AddCst.isPowerOf2());
4182
4183
  // T0 = add %x, AddCst
4184
0
  Value *T0 = Builder.CreateAdd(X, ConstantInt::get(XType, AddCst));
4185
  // T1 = T0 DstPred ICmpCst
4186
39
  Value *T1 = Builder.CreateICmp(DstPred, T0, ConstantInt::get(XType, ICmpCst));
4187
4188
39
  return T1;
4189
43
}
4190
4191
// Given pattern:
4192
//   icmp eq/ne (and ((x shift Q), (y oppositeshift K))), 0
4193
// we should move shifts to the same hand of 'and', i.e. rewrite as
4194
//   icmp eq/ne (and (x shift (Q+K)), y), 0  iff (Q+K) u< bitwidth(x)
4195
// We are only interested in opposite logical shifts here.
4196
// One of the shifts can be truncated.
4197
// If we can, we want to end up creating 'lshr' shift.
4198
static Value *
4199
foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ,
4200
29.7k
                                           InstCombiner::BuilderTy &Builder) {
4201
29.7k
  if (!I.isEquality() || !match(I.getOperand(1), m_Zero()) ||
4202
29.7k
      !I.getOperand(0)->hasOneUse())
4203
23.8k
    return nullptr;
4204
4205
5.82k
  auto m_AnyLogicalShift = m_LogicalShift(m_Value(), m_Value());
4206
4207
  // Look for an 'and' of two logical shifts, one of which may be truncated.
4208
  // We use m_TruncOrSelf() on the RHS to correctly handle commutative case.
4209
5.82k
  Instruction *XShift, *MaybeTruncation, *YShift;
4210
5.82k
  if (!match(
4211
5.82k
          I.getOperand(0),
4212
5.82k
          m_c_And(m_CombineAnd(m_AnyLogicalShift, m_Instruction(XShift)),
4213
5.82k
                  m_CombineAnd(m_TruncOrSelf(m_CombineAnd(
4214
5.82k
                                   m_AnyLogicalShift, m_Instruction(YShift))),
4215
5.82k
                               m_Instruction(MaybeTruncation)))))
4216
5.60k
    return nullptr;
4217
4218
  // We potentially looked past 'trunc', but only when matching YShift,
4219
  // therefore YShift must have the widest type.
4220
214
  Instruction *WidestShift = YShift;
4221
  // Therefore XShift must have the shallowest type.
4222
  // Or they both have identical types if there was no truncation.
4223
214
  Instruction *NarrowestShift = XShift;
4224
4225
214
  Type *WidestTy = WidestShift->getType();
4226
214
  Type *NarrowestTy = NarrowestShift->getType();
4227
214
  assert(NarrowestTy == I.getOperand(0)->getType() &&
4228
214
         "We did not look past any shifts while matching XShift though.");
4229
0
  bool HadTrunc = WidestTy != I.getOperand(0)->getType();
4230
4231
  // If YShift is a 'lshr', swap the shifts around.
4232
214
  if (match(YShift, m_LShr(m_Value(), m_Value())))
4233
151
    std::swap(XShift, YShift);
4234
4235
  // The shifts must be in opposite directions.
4236
214
  auto XShiftOpcode = XShift->getOpcode();
4237
214
  if (XShiftOpcode == YShift->getOpcode())
4238
0
    return nullptr; // Do not care about same-direction shifts here.
4239
4240
214
  Value *X, *XShAmt, *Y, *YShAmt;
4241
214
  match(XShift, m_BinOp(m_Value(X), m_ZExtOrSelf(m_Value(XShAmt))));
4242
214
  match(YShift, m_BinOp(m_Value(Y), m_ZExtOrSelf(m_Value(YShAmt))));
4243
4244
  // If one of the values being shifted is a constant, then we will end with
4245
  // and+icmp, and [zext+]shift instrs will be constant-folded. If they are not,
4246
  // however, we will need to ensure that we won't increase instruction count.
4247
214
  if (!isa<Constant>(X) && !isa<Constant>(Y)) {
4248
    // At least one of the hands of the 'and' should be one-use shift.
4249
165
    if (!match(I.getOperand(0),
4250
165
               m_c_And(m_OneUse(m_AnyLogicalShift), m_Value())))
4251
20
      return nullptr;
4252
145
    if (HadTrunc) {
4253
      // Due to the 'trunc', we will need to widen X. For that either the old
4254
      // 'trunc' or the shift amt in the non-truncated shift should be one-use.
4255
28
      if (!MaybeTruncation->hasOneUse() &&
4256
28
          !NarrowestShift->getOperand(1)->hasOneUse())
4257
5
        return nullptr;
4258
28
    }
4259
145
  }
4260
4261
  // We have two shift amounts from two different shifts. The types of those
4262
  // shift amounts may not match. If that's the case let's bailout now.
4263
189
  if (XShAmt->getType() != YShAmt->getType())
4264
4
    return nullptr;
4265
4266
  // As input, we have the following pattern:
4267
  //   icmp eq/ne (and ((x shift Q), (y oppositeshift K))), 0
4268
  // We want to rewrite that as:
4269
  //   icmp eq/ne (and (x shift (Q+K)), y), 0  iff (Q+K) u< bitwidth(x)
4270
  // While we know that originally (Q+K) would not overflow
4271
  // (because  2 * (N-1) u<= iN -1), we have looked past extensions of
4272
  // shift amounts. so it may now overflow in smaller bitwidth.
4273
  // To ensure that does not happen, we need to ensure that the total maximal
4274
  // shift amount is still representable in that smaller bit width.
4275
185
  unsigned MaximalPossibleTotalShiftAmount =
4276
185
      (WidestTy->getScalarSizeInBits() - 1) +
4277
185
      (NarrowestTy->getScalarSizeInBits() - 1);
4278
185
  APInt MaximalRepresentableShiftAmount =
4279
185
      APInt::getAllOnes(XShAmt->getType()->getScalarSizeInBits());
4280
185
  if (MaximalRepresentableShiftAmount.ult(MaximalPossibleTotalShiftAmount))
4281
1
    return nullptr;
4282
4283
  // Can we fold (XShAmt+YShAmt) ?
4284
184
  auto *NewShAmt = dyn_cast_or_null<Constant>(
4285
184
      simplifyAddInst(XShAmt, YShAmt, /*isNSW=*/false,
4286
184
                      /*isNUW=*/false, SQ.getWithInstruction(&I)));
4287
184
  if (!NewShAmt)
4288
24
    return nullptr;
4289
160
  if (NewShAmt->getType() != WidestTy) {
4290
54
    NewShAmt =
4291
54
        ConstantFoldCastOperand(Instruction::ZExt, NewShAmt, WidestTy, SQ.DL);
4292
54
    if (!NewShAmt)
4293
0
      return nullptr;
4294
54
  }
4295
160
  unsigned WidestBitWidth = WidestTy->getScalarSizeInBits();
4296
4297
  // Is the new shift amount smaller than the bit width?
4298
  // FIXME: could also rely on ConstantRange.
4299
160
  if (!match(NewShAmt,
4300
160
             m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_ULT,
4301
160
                                APInt(WidestBitWidth, WidestBitWidth))))
4302
10
    return nullptr;
4303
4304
  // An extra legality check is needed if we had trunc-of-lshr.
4305
150
  if (HadTrunc && match(WidestShift, m_LShr(m_Value(), m_Value()))) {
4306
43
    auto CanFold = [NewShAmt, WidestBitWidth, NarrowestShift, SQ,
4307
43
                    WidestShift]() {
4308
      // It isn't obvious whether it's worth it to analyze non-constants here.
4309
      // Also, let's basically give up on non-splat cases, pessimizing vectors.
4310
      // If *any* of these preconditions matches we can perform the fold.
4311
43
      Constant *NewShAmtSplat = NewShAmt->getType()->isVectorTy()
4312
43
                                    ? NewShAmt->getSplatValue()
4313
43
                                    : NewShAmt;
4314
      // If it's edge-case shift (by 0 or by WidestBitWidth-1) we can fold.
4315
43
      if (NewShAmtSplat &&
4316
43
          (NewShAmtSplat->isNullValue() ||
4317
43
           NewShAmtSplat->getUniqueInteger() == WidestBitWidth - 1))
4318
5
        return true;
4319
      // We consider *min* leading zeros so a single outlier
4320
      // blocks the transform as opposed to allowing it.
4321
38
      if (auto *C = dyn_cast<Constant>(NarrowestShift->getOperand(0))) {
4322
11
        KnownBits Known = computeKnownBits(C, SQ.DL);
4323
11
        unsigned MinLeadZero = Known.countMinLeadingZeros();
4324
        // If the value being shifted has at most lowest bit set we can fold.
4325
11
        unsigned MaxActiveBits = Known.getBitWidth() - MinLeadZero;
4326
11
        if (MaxActiveBits <= 1)
4327
6
          return true;
4328
        // Precondition:  NewShAmt u<= countLeadingZeros(C)
4329
5
        if (NewShAmtSplat && NewShAmtSplat->getUniqueInteger().ule(MinLeadZero))
4330
5
          return true;
4331
5
      }
4332
27
      if (auto *C = dyn_cast<Constant>(WidestShift->getOperand(0))) {
4333
19
        KnownBits Known = computeKnownBits(C, SQ.DL);
4334
19
        unsigned MinLeadZero = Known.countMinLeadingZeros();
4335
        // If the value being shifted has at most lowest bit set we can fold.
4336
19
        unsigned MaxActiveBits = Known.getBitWidth() - MinLeadZero;
4337
19
        if (MaxActiveBits <= 1)
4338
1
          return true;
4339
        // Precondition:  ((WidestBitWidth-1)-NewShAmt) u<= countLeadingZeros(C)
4340
18
        if (NewShAmtSplat) {
4341
18
          APInt AdjNewShAmt =
4342
18
              (WidestBitWidth - 1) - NewShAmtSplat->getUniqueInteger();
4343
18
          if (AdjNewShAmt.ule(MinLeadZero))
4344
8
            return true;
4345
18
        }
4346
18
      }
4347
18
      return false; // Can't tell if it's ok.
4348
27
    };
4349
43
    if (!CanFold())
4350
18
      return nullptr;
4351
43
  }
4352
4353
  // All good, we can do this fold.
4354
132
  X = Builder.CreateZExt(X, WidestTy);
4355
132
  Y = Builder.CreateZExt(Y, WidestTy);
4356
  // The shift is the same that was for X.
4357
132
  Value *T0 = XShiftOpcode == Instruction::BinaryOps::LShr
4358
132
                  ? Builder.CreateLShr(X, NewShAmt)
4359
132
                  : Builder.CreateShl(X, NewShAmt);
4360
132
  Value *T1 = Builder.CreateAnd(T0, Y);
4361
132
  return Builder.CreateICmp(I.getPredicate(), T1,
4362
132
                            Constant::getNullValue(WidestTy));
4363
150
}
4364
4365
/// Fold
4366
///   (-1 u/ x) u< y
4367
///   ((x * y) ?/ x) != y
4368
/// to
4369
///   @llvm.?mul.with.overflow(x, y) plus extraction of overflow bit
4370
/// Note that the comparison is commutative, while inverted (u>=, ==) predicate
4371
/// will mean that we are looking for the opposite answer.
4372
29.9k
Value *InstCombinerImpl::foldMultiplicationOverflowCheck(ICmpInst &I) {
4373
29.9k
  ICmpInst::Predicate Pred;
4374
29.9k
  Value *X, *Y;
4375
29.9k
  Instruction *Mul;
4376
29.9k
  Instruction *Div;
4377
29.9k
  bool NeedNegation;
4378
  // Look for: (-1 u/ x) u</u>= y
4379
29.9k
  if (!I.isEquality() &&
4380
29.9k
      match(&I, m_c_ICmp(Pred,
4381
14.8k
                         m_CombineAnd(m_OneUse(m_UDiv(m_AllOnes(), m_Value(X))),
4382
14.8k
                                      m_Instruction(Div)),
4383
14.8k
                         m_Value(Y)))) {
4384
56
    Mul = nullptr;
4385
4386
    // Are we checking that overflow does not happen, or does happen?
4387
56
    switch (Pred) {
4388
6
    case ICmpInst::Predicate::ICMP_ULT:
4389
6
      NeedNegation = false;
4390
6
      break; // OK
4391
11
    case ICmpInst::Predicate::ICMP_UGE:
4392
11
      NeedNegation = true;
4393
11
      break; // OK
4394
39
    default:
4395
39
      return nullptr; // Wrong predicate.
4396
56
    }
4397
56
  } else // Look for: ((x * y) / x) !=/== y
4398
29.9k
      if (I.isEquality() &&
4399
29.9k
          match(&I,
4400
15.0k
                m_c_ICmp(Pred, m_Value(Y),
4401
15.0k
                         m_CombineAnd(
4402
15.0k
                             m_OneUse(m_IDiv(m_CombineAnd(m_c_Mul(m_Deferred(Y),
4403
15.0k
                                                                  m_Value(X)),
4404
15.0k
                                                          m_Instruction(Mul)),
4405
15.0k
                                             m_Deferred(X))),
4406
15.0k
                             m_Instruction(Div))))) {
4407
30
    NeedNegation = Pred == ICmpInst::Predicate::ICMP_EQ;
4408
30
  } else
4409
29.8k
    return nullptr;
4410
4411
47
  BuilderTy::InsertPointGuard Guard(Builder);
4412
  // If the pattern included (x * y), we'll want to insert new instructions
4413
  // right before that original multiplication so that we can replace it.
4414
47
  bool MulHadOtherUses = Mul && !Mul->hasOneUse();
4415
47
  if (MulHadOtherUses)
4416
15
    Builder.SetInsertPoint(Mul);
4417
4418
47
  Function *F = Intrinsic::getDeclaration(I.getModule(),
4419
47
                                          Div->getOpcode() == Instruction::UDiv
4420
47
                                              ? Intrinsic::umul_with_overflow
4421
47
                                              : Intrinsic::smul_with_overflow,
4422
47
                                          X->getType());
4423
47
  CallInst *Call = Builder.CreateCall(F, {X, Y}, "mul");
4424
4425
  // If the multiplication was used elsewhere, to ensure that we don't leave
4426
  // "duplicate" instructions, replace uses of that original multiplication
4427
  // with the multiplication result from the with.overflow intrinsic.
4428
47
  if (MulHadOtherUses)
4429
15
    replaceInstUsesWith(*Mul, Builder.CreateExtractValue(Call, 0, "mul.val"));
4430
4431
47
  Value *Res = Builder.CreateExtractValue(Call, 1, "mul.ov");
4432
47
  if (NeedNegation) // This technically increases instruction count.
4433
12
    Res = Builder.CreateNot(Res, "mul.not.ov");
4434
4435
  // If we replaced the mul, erase it. Do this after all uses of Builder,
4436
  // as the mul is used as insertion point.
4437
47
  if (MulHadOtherUses)
4438
15
    eraseInstFromFunction(*Mul);
4439
4440
47
  return Res;
4441
29.9k
}
4442
4443
static Instruction *foldICmpXNegX(ICmpInst &I,
4444
30.7k
                                  InstCombiner::BuilderTy &Builder) {
4445
30.7k
  CmpInst::Predicate Pred;
4446
30.7k
  Value *X;
4447
30.7k
  if (match(&I, m_c_ICmp(Pred, m_NSWNeg(m_Value(X)), m_Deferred(X)))) {
4448
4449
38
    if (ICmpInst::isSigned(Pred))
4450
14
      Pred = ICmpInst::getSwappedPredicate(Pred);
4451
24
    else if (ICmpInst::isUnsigned(Pred))
4452
18
      Pred = ICmpInst::getSignedPredicate(Pred);
4453
    // else for equality-comparisons just keep the predicate.
4454
4455
38
    return ICmpInst::Create(Instruction::ICmp, Pred, X,
4456
38
                            Constant::getNullValue(X->getType()), I.getName());
4457
38
  }
4458
4459
  // A value is not equal to its negation unless that value is 0 or
4460
  // MinSignedValue, ie: a != -a --> (a & MaxSignedVal) != 0
4461
30.6k
  if (match(&I, m_c_ICmp(Pred, m_OneUse(m_Neg(m_Value(X))), m_Deferred(X))) &&
4462
30.6k
      ICmpInst::isEquality(Pred)) {
4463
17
    Type *Ty = X->getType();
4464
17
    uint32_t BitWidth = Ty->getScalarSizeInBits();
4465
17
    Constant *MaxSignedVal =
4466
17
        ConstantInt::get(Ty, APInt::getSignedMaxValue(BitWidth));
4467
17
    Value *And = Builder.CreateAnd(X, MaxSignedVal);
4468
17
    Constant *Zero = Constant::getNullValue(Ty);
4469
17
    return CmpInst::Create(Instruction::ICmp, Pred, And, Zero);
4470
17
  }
4471
4472
30.6k
  return nullptr;
4473
30.6k
}
4474
4475
static Instruction *foldICmpAndXX(ICmpInst &I, const SimplifyQuery &Q,
4476
29.7k
                                  InstCombinerImpl &IC) {
4477
29.7k
  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1), *A;
4478
  // Normalize and operand as operand 0.
4479
29.7k
  CmpInst::Predicate Pred = I.getPredicate();
4480
29.7k
  if (match(Op1, m_c_And(m_Specific(Op0), m_Value()))) {
4481
81
    std::swap(Op0, Op1);
4482
81
    Pred = ICmpInst::getSwappedPredicate(Pred);
4483
81
  }
4484
4485
29.7k
  if (!match(Op0, m_c_And(m_Specific(Op1), m_Value(A))))
4486
28.5k
    return nullptr;
4487
4488
  // (icmp (X & Y) u< X --> (X & Y) != X
4489
1.19k
  if (Pred == ICmpInst::ICMP_ULT)
4490
14
    return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
4491
4492
  // (icmp (X & Y) u>= X --> (X & Y) == X
4493
1.18k
  if (Pred == ICmpInst::ICMP_UGE)
4494
12
    return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
4495
4496
1.16k
  return nullptr;
4497
1.18k
}
4498
4499
static Instruction *foldICmpOrXX(ICmpInst &I, const SimplifyQuery &Q,
4500
30.1k
                                 InstCombinerImpl &IC) {
4501
30.1k
  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1), *A;
4502
4503
  // Normalize or operand as operand 0.
4504
30.1k
  CmpInst::Predicate Pred = I.getPredicate();
4505
30.1k
  if (match(Op1, m_c_Or(m_Specific(Op0), m_Value(A)))) {
4506
70
    std::swap(Op0, Op1);
4507
70
    Pred = ICmpInst::getSwappedPredicate(Pred);
4508
30.0k
  } else if (!match(Op0, m_c_Or(m_Specific(Op1), m_Value(A)))) {
4509
29.8k
    return nullptr;
4510
29.8k
  }
4511
4512
  // icmp (X | Y) u<= X --> (X | Y) == X
4513
242
  if (Pred == ICmpInst::ICMP_ULE)
4514
9
    return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
4515
4516
  // icmp (X | Y) u> X --> (X | Y) != X
4517
233
  if (Pred == ICmpInst::ICMP_UGT)
4518
14
    return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
4519
4520
219
  if (ICmpInst::isEquality(Pred) && Op0->hasOneUse()) {
4521
    // icmp (X | Y) eq/ne Y --> (X & ~Y) eq/ne 0 if Y is freely invertible
4522
57
    if (Value *NotOp1 =
4523
57
            IC.getFreelyInverted(Op1, Op1->hasOneUse(), &IC.Builder))
4524
20
      return new ICmpInst(Pred, IC.Builder.CreateAnd(A, NotOp1),
4525
20
                          Constant::getNullValue(Op1->getType()));
4526
    // icmp (X | Y) eq/ne Y --> (~X | Y) eq/ne -1 if X  is freely invertible.
4527
37
    if (Value *NotA = IC.getFreelyInverted(A, A->hasOneUse(), &IC.Builder))
4528
13
      return new ICmpInst(Pred, IC.Builder.CreateOr(Op1, NotA),
4529
13
                          Constant::getAllOnesValue(Op1->getType()));
4530
37
  }
4531
186
  return nullptr;
4532
219
}
4533
4534
static Instruction *foldICmpXorXX(ICmpInst &I, const SimplifyQuery &Q,
4535
30.1k
                                  InstCombinerImpl &IC) {
4536
30.1k
  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1), *A;
4537
  // Normalize xor operand as operand 0.
4538
30.1k
  CmpInst::Predicate Pred = I.getPredicate();
4539
30.1k
  if (match(Op1, m_c_Xor(m_Specific(Op0), m_Value()))) {
4540
109
    std::swap(Op0, Op1);
4541
109
    Pred = ICmpInst::getSwappedPredicate(Pred);
4542
109
  }
4543
30.1k
  if (!match(Op0, m_c_Xor(m_Specific(Op1), m_Value(A))))
4544
29.7k
    return nullptr;
4545
4546
  // icmp (X ^ Y_NonZero) u>= X --> icmp (X ^ Y_NonZero) u> X
4547
  // icmp (X ^ Y_NonZero) u<= X --> icmp (X ^ Y_NonZero) u< X
4548
  // icmp (X ^ Y_NonZero) s>= X --> icmp (X ^ Y_NonZero) s> X
4549
  // icmp (X ^ Y_NonZero) s<= X --> icmp (X ^ Y_NonZero) s< X
4550
419
  CmpInst::Predicate PredOut = CmpInst::getStrictPredicate(Pred);
4551
419
  if (PredOut != Pred &&
4552
419
      isKnownNonZero(A, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT))
4553
40
    return new ICmpInst(PredOut, Op0, Op1);
4554
4555
379
  return nullptr;
4556
419
}
4557
4558
/// Try to fold icmp (binop), X or icmp X, (binop).
4559
/// TODO: A large part of this logic is duplicated in InstSimplify's
4560
/// simplifyICmpWithBinOp(). We should be able to share that and avoid the code
4561
/// duplication.
4562
Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
4563
82.8k
                                             const SimplifyQuery &SQ) {
4564
82.8k
  const SimplifyQuery Q = SQ.getWithInstruction(&I);
4565
82.8k
  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
4566
4567
  // Special logic for binary operators.
4568
82.8k
  BinaryOperator *BO0 = dyn_cast<BinaryOperator>(Op0);
4569
82.8k
  BinaryOperator *BO1 = dyn_cast<BinaryOperator>(Op1);
4570
82.8k
  if (!BO0 && !BO1)
4571
52.1k
    return nullptr;
4572
4573
30.7k
  if (Instruction *NewICmp = foldICmpXNegX(I, Builder))
4574
55
    return NewICmp;
4575
4576
30.6k
  const CmpInst::Predicate Pred = I.getPredicate();
4577
30.6k
  Value *X;
4578
4579
  // Convert add-with-unsigned-overflow comparisons into a 'not' with compare.
4580
  // (Op1 + X) u</u>= Op1 --> ~Op1 u</u>= X
4581
30.6k
  if (match(Op0, m_OneUse(m_c_Add(m_Specific(Op1), m_Value(X)))) &&
4582
30.6k
      (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE))
4583
41
    return new ICmpInst(Pred, Builder.CreateNot(Op1), X);
4584
  // Op0 u>/u<= (Op0 + X) --> X u>/u<= ~Op0
4585
30.6k
  if (match(Op1, m_OneUse(m_c_Add(m_Specific(Op0), m_Value(X)))) &&
4586
30.6k
      (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULE))
4587
15
    return new ICmpInst(Pred, X, Builder.CreateNot(Op0));
4588
4589
30.5k
  {
4590
    // (Op1 + X) + C u</u>= Op1 --> ~C - X u</u>= Op1
4591
30.5k
    Constant *C;
4592
30.5k
    if (match(Op0, m_OneUse(m_Add(m_c_Add(m_Specific(Op1), m_Value(X)),
4593
30.5k
                                  m_ImmConstant(C)))) &&
4594
30.5k
        (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE)) {
4595
15
      Constant *C2 = ConstantExpr::getNot(C);
4596
15
      return new ICmpInst(Pred, Builder.CreateSub(C2, X), Op1);
4597
15
    }
4598
    // Op0 u>/u<= (Op0 + X) + C --> Op0 u>/u<= ~C - X
4599
30.5k
    if (match(Op1, m_OneUse(m_Add(m_c_Add(m_Specific(Op0), m_Value(X)),
4600
30.5k
                                  m_ImmConstant(C)))) &&
4601
30.5k
        (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULE)) {
4602
0
      Constant *C2 = ConstantExpr::getNot(C);
4603
0
      return new ICmpInst(Pred, Op0, Builder.CreateSub(C2, X));
4604
0
    }
4605
30.5k
  }
4606
4607
30.5k
  {
4608
    // Similar to above: an unsigned overflow comparison may use offset + mask:
4609
    // ((Op1 + C) & C) u<  Op1 --> Op1 != 0
4610
    // ((Op1 + C) & C) u>= Op1 --> Op1 == 0
4611
    // Op0 u>  ((Op0 + C) & C) --> Op0 != 0
4612
    // Op0 u<= ((Op0 + C) & C) --> Op0 == 0
4613
30.5k
    BinaryOperator *BO;
4614
30.5k
    const APInt *C;
4615
30.5k
    if ((Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE) &&
4616
30.5k
        match(Op0, m_And(m_BinOp(BO), m_LowBitMask(C))) &&
4617
30.5k
        match(BO, m_Add(m_Specific(Op1), m_SpecificIntAllowUndef(*C)))) {
4618
0
      CmpInst::Predicate NewPred =
4619
0
          Pred == ICmpInst::ICMP_ULT ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
4620
0
      Constant *Zero = ConstantInt::getNullValue(Op1->getType());
4621
0
      return new ICmpInst(NewPred, Op1, Zero);
4622
0
    }
4623
4624
30.5k
    if ((Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULE) &&
4625
30.5k
        match(Op1, m_And(m_BinOp(BO), m_LowBitMask(C))) &&
4626
30.5k
        match(BO, m_Add(m_Specific(Op0), m_SpecificIntAllowUndef(*C)))) {
4627
0
      CmpInst::Predicate NewPred =
4628
0
          Pred == ICmpInst::ICMP_UGT ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
4629
0
      Constant *Zero = ConstantInt::getNullValue(Op1->getType());
4630
0
      return new ICmpInst(NewPred, Op0, Zero);
4631
0
    }
4632
30.5k
  }
4633
4634
30.5k
  bool NoOp0WrapProblem = false, NoOp1WrapProblem = false;
4635
30.5k
  bool Op0HasNUW = false, Op1HasNUW = false;
4636
30.5k
  bool Op0HasNSW = false, Op1HasNSW = false;
4637
  // Analyze the case when either Op0 or Op1 is an add instruction.
4638
  // Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null).
4639
30.5k
  auto hasNoWrapProblem = [](const BinaryOperator &BO, CmpInst::Predicate Pred,
4640
34.4k
                             bool &HasNSW, bool &HasNUW) -> bool {
4641
34.4k
    if (isa<OverflowingBinaryOperator>(BO)) {
4642
16.0k
      HasNUW = BO.hasNoUnsignedWrap();
4643
16.0k
      HasNSW = BO.hasNoSignedWrap();
4644
16.0k
      return ICmpInst::isEquality(Pred) ||
4645
16.0k
             (CmpInst::isUnsigned(Pred) && HasNUW) ||
4646
16.0k
             (CmpInst::isSigned(Pred) && HasNSW);
4647
18.4k
    } else if (BO.getOpcode() == Instruction::Or) {
4648
1.28k
      HasNUW = true;
4649
1.28k
      HasNSW = true;
4650
1.28k
      return true;
4651
17.1k
    } else {
4652
17.1k
      return false;
4653
17.1k
    }
4654
34.4k
  };
4655
30.5k
  Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
4656
4657
30.5k
  if (BO0) {
4658
28.7k
    match(BO0, m_AddLike(m_Value(A), m_Value(B)));
4659
28.7k
    NoOp0WrapProblem = hasNoWrapProblem(*BO0, Pred, Op0HasNSW, Op0HasNUW);
4660
28.7k
  }
4661
30.5k
  if (BO1) {
4662
5.73k
    match(BO1, m_AddLike(m_Value(C), m_Value(D)));
4663
5.73k
    NoOp1WrapProblem = hasNoWrapProblem(*BO1, Pred, Op1HasNSW, Op1HasNUW);
4664
5.73k
  }
4665
4666
  // icmp (A+B), A -> icmp B, 0 for equalities or if there is no overflow.
4667
  // icmp (A+B), B -> icmp A, 0 for equalities or if there is no overflow.
4668
30.5k
  if ((A == Op1 || B == Op1) && NoOp0WrapProblem)
4669
106
    return new ICmpInst(Pred, A == Op1 ? B : A,
4670
106
                        Constant::getNullValue(Op1->getType()));
4671
4672
  // icmp C, (C+D) -> icmp 0, D for equalities or if there is no overflow.
4673
  // icmp D, (C+D) -> icmp 0, C for equalities or if there is no overflow.
4674
30.4k
  if ((C == Op0 || D == Op0) && NoOp1WrapProblem)
4675
14
    return new ICmpInst(Pred, Constant::getNullValue(Op0->getType()),
4676
14
                        C == Op0 ? D : C);
4677
4678
  // icmp (A+B), (A+D) -> icmp B, D for equalities or if there is no overflow.
4679
30.4k
  if (A && C && (A == C || A == D || B == C || B == D) && NoOp0WrapProblem &&
4680
30.4k
      NoOp1WrapProblem) {
4681
    // Determine Y and Z in the form icmp (X+Y), (X+Z).
4682
16
    Value *Y, *Z;
4683
16
    if (A == C) {
4684
      // C + B == C + D  ->  B == D
4685
5
      Y = B;
4686
5
      Z = D;
4687
11
    } else if (A == D) {
4688
      // D + B == C + D  ->  B == C
4689
1
      Y = B;
4690
1
      Z = C;
4691
10
    } else if (B == C) {
4692
      // A + C == C + D  ->  A == D
4693
2
      Y = A;
4694
2
      Z = D;
4695
8
    } else {
4696
8
      assert(B == D);
4697
      // A + D == C + D  ->  A == C
4698
0
      Y = A;
4699
8
      Z = C;
4700
8
    }
4701
0
    return new ICmpInst(Pred, Y, Z);
4702
16
  }
4703
4704
  // icmp slt (A + -1), Op1 -> icmp sle A, Op1
4705
30.4k
  if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLT &&
4706
30.4k
      match(B, m_AllOnes()))
4707
12
    return new ICmpInst(CmpInst::ICMP_SLE, A, Op1);
4708
4709
  // icmp sge (A + -1), Op1 -> icmp sgt A, Op1
4710
30.4k
  if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGE &&
4711
30.4k
      match(B, m_AllOnes()))
4712
3
    return new ICmpInst(CmpInst::ICMP_SGT, A, Op1);
4713
4714
  // icmp sle (A + 1), Op1 -> icmp slt A, Op1
4715
30.4k
  if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLE && match(B, m_One()))
4716
9
    return new ICmpInst(CmpInst::ICMP_SLT, A, Op1);
4717
4718
  // icmp sgt (A + 1), Op1 -> icmp sge A, Op1
4719
30.4k
  if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGT && match(B, m_One()))
4720
16
    return new ICmpInst(CmpInst::ICMP_SGE, A, Op1);
4721
4722
  // icmp sgt Op0, (C + -1) -> icmp sge Op0, C
4723
30.3k
  if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGT &&
4724
30.3k
      match(D, m_AllOnes()))
4725
0
    return new ICmpInst(CmpInst::ICMP_SGE, Op0, C);
4726
4727
  // icmp sle Op0, (C + -1) -> icmp slt Op0, C
4728
30.3k
  if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLE &&
4729
30.3k
      match(D, m_AllOnes()))
4730
1
    return new ICmpInst(CmpInst::ICMP_SLT, Op0, C);
4731
4732
  // icmp sge Op0, (C + 1) -> icmp sgt Op0, C
4733
30.3k
  if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGE && match(D, m_One()))
4734
5
    return new ICmpInst(CmpInst::ICMP_SGT, Op0, C);
4735
4736
  // icmp slt Op0, (C + 1) -> icmp sle Op0, C
4737
30.3k
  if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLT && match(D, m_One()))
4738
3
    return new ICmpInst(CmpInst::ICMP_SLE, Op0, C);
4739
4740
  // TODO: The subtraction-related identities shown below also hold, but
4741
  // canonicalization from (X -nuw 1) to (X + -1) means that the combinations
4742
  // wouldn't happen even if they were implemented.
4743
  //
4744
  // icmp ult (A - 1), Op1 -> icmp ule A, Op1
4745
  // icmp uge (A - 1), Op1 -> icmp ugt A, Op1
4746
  // icmp ugt Op0, (C - 1) -> icmp uge Op0, C
4747
  // icmp ule Op0, (C - 1) -> icmp ult Op0, C
4748
4749
  // icmp ule (A + 1), Op0 -> icmp ult A, Op1
4750
30.3k
  if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_ULE && match(B, m_One()))
4751
8
    return new ICmpInst(CmpInst::ICMP_ULT, A, Op1);
4752
4753
  // icmp ugt (A + 1), Op0 -> icmp uge A, Op1
4754
30.3k
  if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_UGT && match(B, m_One()))
4755
18
    return new ICmpInst(CmpInst::ICMP_UGE, A, Op1);
4756
4757
  // icmp uge Op0, (C + 1) -> icmp ugt Op0, C
4758
30.3k
  if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_UGE && match(D, m_One()))
4759
4
    return new ICmpInst(CmpInst::ICMP_UGT, Op0, C);
4760
4761
  // icmp ult Op0, (C + 1) -> icmp ule Op0, C
4762
30.3k
  if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_ULT && match(D, m_One()))
4763
1
    return new ICmpInst(CmpInst::ICMP_ULE, Op0, C);
4764
4765
  // if C1 has greater magnitude than C2:
4766
  //  icmp (A + C1), (C + C2) -> icmp (A + C3), C
4767
  //  s.t. C3 = C1 - C2
4768
  //
4769
  // if C2 has greater magnitude than C1:
4770
  //  icmp (A + C1), (C + C2) -> icmp A, (C + C3)
4771
  //  s.t. C3 = C2 - C1
4772
30.3k
  if (A && C && NoOp0WrapProblem && NoOp1WrapProblem &&
4773
30.3k
      (BO0->hasOneUse() || BO1->hasOneUse()) && !I.isUnsigned()) {
4774
27
    const APInt *AP1, *AP2;
4775
    // TODO: Support non-uniform vectors.
4776
    // TODO: Allow undef passthrough if B AND D's element is undef.
4777
27
    if (match(B, m_APIntAllowUndef(AP1)) && match(D, m_APIntAllowUndef(AP2)) &&
4778
27
        AP1->isNegative() == AP2->isNegative()) {
4779
21
      APInt AP1Abs = AP1->abs();
4780
21
      APInt AP2Abs = AP2->abs();
4781
21
      if (AP1Abs.uge(AP2Abs)) {
4782
9
        APInt Diff = *AP1 - *AP2;
4783
9
        Constant *C3 = Constant::getIntegerValue(BO0->getType(), Diff);
4784
9
        Value *NewAdd = Builder.CreateAdd(
4785
9
            A, C3, "", Op0HasNUW && Diff.ule(*AP1), Op0HasNSW);
4786
9
        return new ICmpInst(Pred, NewAdd, C);
4787
12
      } else {
4788
12
        APInt Diff = *AP2 - *AP1;
4789
12
        Constant *C3 = Constant::getIntegerValue(BO0->getType(), Diff);
4790
12
        Value *NewAdd = Builder.CreateAdd(
4791
12
            C, C3, "", Op1HasNUW && Diff.ule(*AP2), Op1HasNSW);
4792
12
        return new ICmpInst(Pred, A, NewAdd);
4793
12
      }
4794
21
    }
4795
6
    Constant *Cst1, *Cst2;
4796
6
    if (match(B, m_ImmConstant(Cst1)) && match(D, m_ImmConstant(Cst2)) &&
4797
6
        ICmpInst::isEquality(Pred)) {
4798
1
      Constant *Diff = ConstantExpr::getSub(Cst2, Cst1);
4799
1
      Value *NewAdd = Builder.CreateAdd(C, Diff);
4800
1
      return new ICmpInst(Pred, A, NewAdd);
4801
1
    }
4802
6
  }
4803
4804
  // Analyze the case when either Op0 or Op1 is a sub instruction.
4805
  // Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null).
4806
30.3k
  A = nullptr;
4807
30.3k
  B = nullptr;
4808
30.3k
  C = nullptr;
4809
30.3k
  D = nullptr;
4810
30.3k
  if (BO0 && BO0->getOpcode() == Instruction::Sub) {
4811
2.76k
    A = BO0->getOperand(0);
4812
2.76k
    B = BO0->getOperand(1);
4813
2.76k
  }
4814
30.3k
  if (BO1 && BO1->getOpcode() == Instruction::Sub) {
4815
1.19k
    C = BO1->getOperand(0);
4816
1.19k
    D = BO1->getOperand(1);
4817
1.19k
  }
4818
4819
  // icmp (A-B), A -> icmp 0, B for equalities or if there is no overflow.
4820
30.3k
  if (A == Op1 && NoOp0WrapProblem)
4821
78
    return new ICmpInst(Pred, Constant::getNullValue(Op1->getType()), B);
4822
  // icmp C, (C-D) -> icmp D, 0 for equalities or if there is no overflow.
4823
30.2k
  if (C == Op0 && NoOp1WrapProblem)
4824
6
    return new ICmpInst(Pred, D, Constant::getNullValue(Op0->getType()));
4825
4826
  // Convert sub-with-unsigned-overflow comparisons into a comparison of args.
4827
  // (A - B) u>/u<= A --> B u>/u<= A
4828
30.2k
  if (A == Op1 && (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULE))
4829
16
    return new ICmpInst(Pred, B, A);
4830
  // C u</u>= (C - D) --> C u</u>= D
4831
30.2k
  if (C == Op0 && (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE))
4832
7
    return new ICmpInst(Pred, C, D);
4833
  // (A - B) u>=/u< A --> B u>/u<= A  iff B != 0
4834
30.2k
  if (A == Op1 && (Pred == ICmpInst::ICMP_UGE || Pred == ICmpInst::ICMP_ULT) &&
4835
30.2k
      isKnownNonZero(B, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT))
4836
2
    return new ICmpInst(CmpInst::getFlippedStrictnessPredicate(Pred), B, A);
4837
  // C u<=/u> (C - D) --> C u</u>= D  iff B != 0
4838
30.2k
  if (C == Op0 && (Pred == ICmpInst::ICMP_ULE || Pred == ICmpInst::ICMP_UGT) &&
4839
30.2k
      isKnownNonZero(D, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT))
4840
1
    return new ICmpInst(CmpInst::getFlippedStrictnessPredicate(Pred), C, D);
4841
4842
  // icmp (A-B), (C-B) -> icmp A, C for equalities or if there is no overflow.
4843
30.2k
  if (B && D && B == D && NoOp0WrapProblem && NoOp1WrapProblem)
4844
15
    return new ICmpInst(Pred, A, C);
4845
4846
  // icmp (A-B), (A-D) -> icmp D, B for equalities or if there is no overflow.
4847
30.2k
  if (A && C && A == C && NoOp0WrapProblem && NoOp1WrapProblem)
4848
24
    return new ICmpInst(Pred, D, B);
4849
4850
  // icmp (0-X) < cst --> x > -cst
4851
30.1k
  if (NoOp0WrapProblem && ICmpInst::isSigned(Pred)) {
4852
1.03k
    Value *X;
4853
1.03k
    if (match(BO0, m_Neg(m_Value(X))))
4854
26
      if (Constant *RHSC = dyn_cast<Constant>(Op1))
4855
22
        if (RHSC->isNotMinSignedValue())
4856
22
          return new ICmpInst(I.getSwappedPredicate(), X,
4857
22
                              ConstantExpr::getNeg(RHSC));
4858
1.03k
  }
4859
4860
30.1k
  if (Instruction * R = foldICmpXorXX(I, Q, *this))
4861
40
    return R;
4862
30.1k
  if (Instruction *R = foldICmpOrXX(I, Q, *this))
4863
56
    return R;
4864
4865
30.0k
  {
4866
    // Try to remove shared multiplier from comparison:
4867
    // X * Z u{lt/le/gt/ge}/eq/ne Y * Z
4868
30.0k
    Value *X, *Y, *Z;
4869
30.0k
    if (Pred == ICmpInst::getUnsignedPredicate(Pred) &&
4870
30.0k
        ((match(Op0, m_Mul(m_Value(X), m_Value(Z))) &&
4871
23.4k
          match(Op1, m_c_Mul(m_Specific(Z), m_Value(Y)))) ||
4872
23.4k
         (match(Op0, m_Mul(m_Value(Z), m_Value(X))) &&
4873
23.3k
          match(Op1, m_c_Mul(m_Specific(Z), m_Value(Y)))))) {
4874
83
      bool NonZero;
4875
83
      if (ICmpInst::isEquality(Pred)) {
4876
51
        KnownBits ZKnown = computeKnownBits(Z, 0, &I);
4877
        // if Z % 2 != 0
4878
        //    X * Z eq/ne Y * Z -> X eq/ne Y
4879
51
        if (ZKnown.countMaxTrailingZeros() == 0)
4880
2
          return new ICmpInst(Pred, X, Y);
4881
49
        NonZero = !ZKnown.One.isZero() ||
4882
49
                  isKnownNonZero(Z, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT);
4883
        // if Z != 0 and nsw(X * Z) and nsw(Y * Z)
4884
        //    X * Z eq/ne Y * Z -> X eq/ne Y
4885
49
        if (NonZero && BO0 && BO1 && Op0HasNSW && Op1HasNSW)
4886
0
          return new ICmpInst(Pred, X, Y);
4887
49
      } else
4888
32
        NonZero = isKnownNonZero(Z, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT);
4889
4890
      // If Z != 0 and nuw(X * Z) and nuw(Y * Z)
4891
      //    X * Z u{lt/le/gt/ge}/eq/ne Y * Z -> X u{lt/le/gt/ge}/eq/ne Y
4892
81
      if (NonZero && BO0 && BO1 && Op0HasNUW && Op1HasNUW)
4893
1
        return new ICmpInst(Pred, X, Y);
4894
81
    }
4895
30.0k
  }
4896
4897
30.0k
  BinaryOperator *SRem = nullptr;
4898
  // icmp (srem X, Y), Y
4899
30.0k
  if (BO0 && BO0->getOpcode() == Instruction::SRem && Op1 == BO0->getOperand(1))
4900
47
    SRem = BO0;
4901
  // icmp Y, (srem X, Y)
4902
30.0k
  else if (BO1 && BO1->getOpcode() == Instruction::SRem &&
4903
30.0k
           Op0 == BO1->getOperand(1))
4904
26
    SRem = BO1;
4905
30.0k
  if (SRem) {
4906
    // We don't check hasOneUse to avoid increasing register pressure because
4907
    // the value we use is the same value this instruction was already using.
4908
73
    switch (SRem == BO0 ? ICmpInst::getSwappedPredicate(Pred) : Pred) {
4909
46
    default:
4910
46
      break;
4911
46
    case ICmpInst::ICMP_EQ:
4912
1
      return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
4913
3
    case ICmpInst::ICMP_NE:
4914
3
      return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
4915
7
    case ICmpInst::ICMP_SGT:
4916
11
    case ICmpInst::ICMP_SGE:
4917
11
      return new ICmpInst(ICmpInst::ICMP_SGT, SRem->getOperand(1),
4918
11
                          Constant::getAllOnesValue(SRem->getType()));
4919
8
    case ICmpInst::ICMP_SLT:
4920
12
    case ICmpInst::ICMP_SLE:
4921
12
      return new ICmpInst(ICmpInst::ICMP_SLT, SRem->getOperand(1),
4922
12
                          Constant::getNullValue(SRem->getType()));
4923
73
    }
4924
73
  }
4925
4926
30.0k
  if (BO0 && BO1 && BO0->getOpcode() == BO1->getOpcode() &&
4927
30.0k
      (BO0->hasOneUse() || BO1->hasOneUse()) &&
4928
30.0k
      BO0->getOperand(1) == BO1->getOperand(1)) {
4929
188
    switch (BO0->getOpcode()) {
4930
6
    default:
4931
6
      break;
4932
6
    case Instruction::Add:
4933
10
    case Instruction::Sub:
4934
55
    case Instruction::Xor: {
4935
55
      if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b
4936
6
        return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
4937
4938
49
      const APInt *C;
4939
49
      if (match(BO0->getOperand(1), m_APInt(C))) {
4940
        // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
4941
43
        if (C->isSignMask()) {
4942
18
          ICmpInst::Predicate NewPred = I.getFlippedSignednessPredicate();
4943
18
          return new ICmpInst(NewPred, BO0->getOperand(0), BO1->getOperand(0));
4944
18
        }
4945
4946
        // icmp u/s (a ^ maxsignval), (b ^ maxsignval) --> icmp s/u' a, b
4947
25
        if (BO0->getOpcode() == Instruction::Xor && C->isMaxSignedValue()) {
4948
4
          ICmpInst::Predicate NewPred = I.getFlippedSignednessPredicate();
4949
4
          NewPred = I.getSwappedPredicate(NewPred);
4950
4
          return new ICmpInst(NewPred, BO0->getOperand(0), BO1->getOperand(0));
4951
4
        }
4952
25
      }
4953
27
      break;
4954
49
    }
4955
27
    case Instruction::Mul: {
4956
15
      if (!I.isEquality())
4957
4
        break;
4958
4959
11
      const APInt *C;
4960
11
      if (match(BO0->getOperand(1), m_APInt(C)) && !C->isZero() &&
4961
11
          !C->isOne()) {
4962
        // icmp eq/ne (X * C), (Y * C) --> icmp (X & Mask), (Y & Mask)
4963
        // Mask = -1 >> count-trailing-zeros(C).
4964
1
        if (unsigned TZs = C->countr_zero()) {
4965
1
          Constant *Mask = ConstantInt::get(
4966
1
              BO0->getType(),
4967
1
              APInt::getLowBitsSet(C->getBitWidth(), C->getBitWidth() - TZs));
4968
1
          Value *And1 = Builder.CreateAnd(BO0->getOperand(0), Mask);
4969
1
          Value *And2 = Builder.CreateAnd(BO1->getOperand(0), Mask);
4970
1
          return new ICmpInst(Pred, And1, And2);
4971
1
        }
4972
1
      }
4973
10
      break;
4974
11
    }
4975
10
    case Instruction::UDiv:
4976
51
    case Instruction::LShr:
4977
51
      if (I.isSigned() || !BO0->isExact() || !BO1->isExact())
4978
51
        break;
4979
0
      return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
4980
4981
10
    case Instruction::SDiv:
4982
10
      if (!(I.isEquality() || match(BO0->getOperand(1), m_NonNegative())) ||
4983
10
          !BO0->isExact() || !BO1->isExact())
4984
9
        break;
4985
1
      return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
4986
4987
8
    case Instruction::AShr:
4988
8
      if (!BO0->isExact() || !BO1->isExact())
4989
3
        break;
4990
5
      return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
4991
4992
43
    case Instruction::Shl: {
4993
43
      bool NUW = Op0HasNUW && Op1HasNUW;
4994
43
      bool NSW = Op0HasNSW && Op1HasNSW;
4995
43
      if (!NUW && !NSW)
4996
43
        break;
4997
0
      if (!NSW && I.isSigned())
4998
0
        break;
4999
0
      return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
5000
0
    }
5001
188
    }
5002
188
  }
5003
5004
30.0k
  if (BO0) {
5005
    // Transform  A & (L - 1) `ult` L --> L != 0
5006
28.2k
    auto LSubOne = m_Add(m_Specific(Op1), m_AllOnes());
5007
28.2k
    auto BitwiseAnd = m_c_And(m_Value(), LSubOne);
5008
5009
28.2k
    if (match(BO0, BitwiseAnd) && Pred == ICmpInst::ICMP_ULT) {
5010
5
      auto *Zero = Constant::getNullValue(BO0->getType());
5011
5
      return new ICmpInst(ICmpInst::ICMP_NE, Op1, Zero);
5012
5
    }
5013
28.2k
  }
5014
5015
  // For unsigned predicates / eq / ne:
5016
  // icmp pred (x << 1), x --> icmp getSignedPredicate(pred) x, 0
5017
  // icmp pred x, (x << 1) --> icmp getSignedPredicate(pred) 0, x
5018
30.0k
  if (!ICmpInst::isSigned(Pred)) {
5019
23.3k
    if (match(Op0, m_Shl(m_Specific(Op1), m_One())))
5020
18
      return new ICmpInst(ICmpInst::getSignedPredicate(Pred), Op1,
5021
18
                          Constant::getNullValue(Op1->getType()));
5022
23.3k
    else if (match(Op1, m_Shl(m_Specific(Op0), m_One())))
5023
19
      return new ICmpInst(ICmpInst::getSignedPredicate(Pred),
5024
19
                          Constant::getNullValue(Op0->getType()), Op0);
5025
23.3k
  }
5026
5027
29.9k
  if (Value *V = foldMultiplicationOverflowCheck(I))
5028
47
    return replaceInstUsesWith(I, V);
5029
5030
29.9k
  if (Value *V = foldICmpWithLowBitMaskedVal(I, Builder))
5031
138
    return replaceInstUsesWith(I, V);
5032
5033
29.7k
  if (Instruction *R = foldICmpAndXX(I, Q, *this))
5034
26
    return R;
5035
5036
29.7k
  if (Value *V = foldICmpWithTruncSignExtendedVal(I, Builder))
5037
39
    return replaceInstUsesWith(I, V);
5038
5039
29.7k
  if (Value *V = foldShiftIntoShiftInAnotherHandOfAndInICmp(I, SQ, Builder))
5040
132
    return replaceInstUsesWith(I, V);
5041
5042
29.5k
  return nullptr;
5043
29.7k
}
5044
5045
/// Fold icmp Pred min|max(X, Y), Z.
5046
Instruction *InstCombinerImpl::foldICmpWithMinMax(Instruction &I,
5047
                                                  MinMaxIntrinsic *MinMax,
5048
                                                  Value *Z,
5049
1.09k
                                                  ICmpInst::Predicate Pred) {
5050
1.09k
  Value *X = MinMax->getLHS();
5051
1.09k
  Value *Y = MinMax->getRHS();
5052
1.09k
  if (ICmpInst::isSigned(Pred) && !MinMax->isSigned())
5053
117
    return nullptr;
5054
973
  if (ICmpInst::isUnsigned(Pred) && MinMax->isSigned()) {
5055
    // Revert the transform signed pred -> unsigned pred
5056
    // TODO: We can flip the signedness of predicate if both operands of icmp
5057
    // are negative.
5058
130
    if (isKnownNonNegative(Z, SQ.getWithInstruction(&I)) &&
5059
130
        isKnownNonNegative(MinMax, SQ.getWithInstruction(&I))) {
5060
3
      Pred = ICmpInst::getFlippedSignednessPredicate(Pred);
5061
3
    } else
5062
127
      return nullptr;
5063
130
  }
5064
846
  SimplifyQuery Q = SQ.getWithInstruction(&I);
5065
1.71k
  auto IsCondKnownTrue = [](Value *Val) -> std::optional<bool> {
5066
1.71k
    if (!Val)
5067
1.46k
      return std::nullopt;
5068
246
    if (match(Val, m_One()))
5069
86
      return true;
5070
160
    if (match(Val, m_Zero()))
5071
160
      return false;
5072
0
    return std::nullopt;
5073
160
  };
5074
846
  auto CmpXZ = IsCondKnownTrue(simplifyICmpInst(Pred, X, Z, Q));
5075
846
  auto CmpYZ = IsCondKnownTrue(simplifyICmpInst(Pred, Y, Z, Q));
5076
846
  if (!CmpXZ.has_value() && !CmpYZ.has_value())
5077
650
    return nullptr;
5078
196
  if (!CmpXZ.has_value()) {
5079
116
    std::swap(X, Y);
5080
116
    std::swap(CmpXZ, CmpYZ);
5081
116
  }
5082
5083
196
  auto FoldIntoCmpYZ = [&]() -> Instruction * {
5084
145
    if (CmpYZ.has_value())
5085
27
      return replaceInstUsesWith(I, ConstantInt::getBool(I.getType(), *CmpYZ));
5086
118
    return ICmpInst::Create(Instruction::ICmp, Pred, Y, Z);
5087
145
  };
5088
5089
196
  switch (Pred) {
5090
39
  case ICmpInst::ICMP_EQ:
5091
68
  case ICmpInst::ICMP_NE: {
5092
    // If X == Z:
5093
    //     Expr       Result
5094
    // min(X, Y) == Z X <= Y
5095
    // max(X, Y) == Z X >= Y
5096
    // min(X, Y) != Z X > Y
5097
    // max(X, Y) != Z X < Y
5098
68
    if ((Pred == ICmpInst::ICMP_EQ) == *CmpXZ) {
5099
45
      ICmpInst::Predicate NewPred =
5100
45
          ICmpInst::getNonStrictPredicate(MinMax->getPredicate());
5101
45
      if (Pred == ICmpInst::ICMP_NE)
5102
21
        NewPred = ICmpInst::getInversePredicate(NewPred);
5103
45
      return ICmpInst::Create(Instruction::ICmp, NewPred, X, Y);
5104
45
    }
5105
    // Otherwise (X != Z):
5106
23
    ICmpInst::Predicate NewPred = MinMax->getPredicate();
5107
23
    auto MinMaxCmpXZ = IsCondKnownTrue(simplifyICmpInst(NewPred, X, Z, Q));
5108
23
    if (!MinMaxCmpXZ.has_value()) {
5109
6
      std::swap(X, Y);
5110
6
      std::swap(CmpXZ, CmpYZ);
5111
      // Re-check pre-condition X != Z
5112
6
      if (!CmpXZ.has_value() || (Pred == ICmpInst::ICMP_EQ) == *CmpXZ)
5113
6
        break;
5114
0
      MinMaxCmpXZ = IsCondKnownTrue(simplifyICmpInst(NewPred, X, Z, Q));
5115
0
    }
5116
17
    if (!MinMaxCmpXZ.has_value())
5117
0
      break;
5118
17
    if (*MinMaxCmpXZ) {
5119
      //    Expr         Fact    Result
5120
      // min(X, Y) == Z  X < Z   false
5121
      // max(X, Y) == Z  X > Z   false
5122
      // min(X, Y) != Z  X < Z    true
5123
      // max(X, Y) != Z  X > Z    true
5124
0
      return replaceInstUsesWith(
5125
0
          I, ConstantInt::getBool(I.getType(), Pred == ICmpInst::ICMP_NE));
5126
17
    } else {
5127
      //    Expr         Fact    Result
5128
      // min(X, Y) == Z  X > Z   Y == Z
5129
      // max(X, Y) == Z  X < Z   Y == Z
5130
      // min(X, Y) != Z  X > Z   Y != Z
5131
      // max(X, Y) != Z  X < Z   Y != Z
5132
17
      return FoldIntoCmpYZ();
5133
17
    }
5134
0
    break;
5135
17
  }
5136
43
  case ICmpInst::ICMP_SLT:
5137
67
  case ICmpInst::ICMP_ULT:
5138
70
  case ICmpInst::ICMP_SLE:
5139
71
  case ICmpInst::ICMP_ULE:
5140
105
  case ICmpInst::ICMP_SGT:
5141
118
  case ICmpInst::ICMP_UGT:
5142
125
  case ICmpInst::ICMP_SGE:
5143
128
  case ICmpInst::ICMP_UGE: {
5144
128
    bool IsSame = MinMax->getPredicate() == ICmpInst::getStrictPredicate(Pred);
5145
128
    if (*CmpXZ) {
5146
51
      if (IsSame) {
5147
        //      Expr        Fact    Result
5148
        // min(X, Y) < Z    X < Z   true
5149
        // min(X, Y) <= Z   X <= Z  true
5150
        // max(X, Y) > Z    X > Z   true
5151
        // max(X, Y) >= Z   X >= Z  true
5152
0
        return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
5153
51
      } else {
5154
        //      Expr        Fact    Result
5155
        // max(X, Y) < Z    X < Z   Y < Z
5156
        // max(X, Y) <= Z   X <= Z  Y <= Z
5157
        // min(X, Y) > Z    X > Z   Y > Z
5158
        // min(X, Y) >= Z   X >= Z  Y >= Z
5159
51
        return FoldIntoCmpYZ();
5160
51
      }
5161
77
    } else {
5162
77
      if (IsSame) {
5163
        //      Expr        Fact    Result
5164
        // min(X, Y) < Z    X >= Z  Y < Z
5165
        // min(X, Y) <= Z   X > Z   Y <= Z
5166
        // max(X, Y) > Z    X <= Z  Y > Z
5167
        // max(X, Y) >= Z   X < Z   Y >= Z
5168
77
        return FoldIntoCmpYZ();
5169
77
      } else {
5170
        //      Expr        Fact    Result
5171
        // max(X, Y) < Z    X >= Z  false
5172
        // max(X, Y) <= Z   X > Z   false
5173
        // min(X, Y) > Z    X <= Z  false
5174
        // min(X, Y) >= Z   X < Z   false
5175
0
        return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
5176
0
      }
5177
77
    }
5178
0
    break;
5179
128
  }
5180
0
  default:
5181
0
    break;
5182
196
  }
5183
5184
6
  return nullptr;
5185
196
}
5186
5187
// Canonicalize checking for a power-of-2-or-zero value:
5188
static Instruction *foldICmpPow2Test(ICmpInst &I,
5189
76.0k
                                     InstCombiner::BuilderTy &Builder) {
5190
76.0k
  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
5191
76.0k
  const CmpInst::Predicate Pred = I.getPredicate();
5192
76.0k
  Value *A = nullptr;
5193
76.0k
  bool CheckIs;
5194
76.0k
  if (I.isEquality()) {
5195
    // (A & (A-1)) == 0 --> ctpop(A) < 2 (two commuted variants)
5196
    // ((A-1) & A) != 0 --> ctpop(A) > 1 (two commuted variants)
5197
34.9k
    if (!match(Op0, m_OneUse(m_c_And(m_Add(m_Value(A), m_AllOnes()),
5198
34.9k
                                     m_Deferred(A)))) ||
5199
34.9k
        !match(Op1, m_ZeroInt()))
5200
34.8k
      A = nullptr;
5201
5202
    // (A & -A) == A --> ctpop(A) < 2 (four commuted variants)
5203
    // (-A & A) != A --> ctpop(A) > 1 (four commuted variants)
5204
34.9k
    if (match(Op0, m_OneUse(m_c_And(m_Neg(m_Specific(Op1)), m_Specific(Op1)))))
5205
95
      A = Op1;
5206
34.8k
    else if (match(Op1,
5207
34.8k
                   m_OneUse(m_c_And(m_Neg(m_Specific(Op0)), m_Specific(Op0)))))
5208
9
      A = Op0;
5209
5210
34.9k
    CheckIs = Pred == ICmpInst::ICMP_EQ;
5211
41.1k
  } else if (ICmpInst::isUnsigned(Pred)) {
5212
    // (A ^ (A-1)) u>= A --> ctpop(A) < 2 (two commuted variants)
5213
    // ((A-1) ^ A) u< A --> ctpop(A) > 1 (two commuted variants)
5214
5215
18.0k
    if ((Pred == ICmpInst::ICMP_UGE || Pred == ICmpInst::ICMP_ULT) &&
5216
18.0k
        match(Op0, m_OneUse(m_c_Xor(m_Add(m_Specific(Op1), m_AllOnes()),
5217
10.9k
                                    m_Specific(Op1))))) {
5218
0
      A = Op1;
5219
0
      CheckIs = Pred == ICmpInst::ICMP_UGE;
5220
18.0k
    } else if ((Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULE) &&
5221
18.0k
               match(Op1, m_OneUse(m_c_Xor(m_Add(m_Specific(Op0), m_AllOnes()),
5222
7.12k
                                           m_Specific(Op0))))) {
5223
0
      A = Op0;
5224
0
      CheckIs = Pred == ICmpInst::ICMP_ULE;
5225
0
    }
5226
18.0k
  }
5227
5228
76.0k
  if (A) {
5229
177
    Type *Ty = A->getType();
5230
177
    CallInst *CtPop = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, A);
5231
177
    return CheckIs ? new ICmpInst(ICmpInst::ICMP_ULT, CtPop,
5232
104
                                  ConstantInt::get(Ty, 2))
5233
177
                   : new ICmpInst(ICmpInst::ICMP_UGT, CtPop,
5234
73
                                  ConstantInt::get(Ty, 1));
5235
177
  }
5236
5237
75.8k
  return nullptr;
5238
76.0k
}
5239
5240
76.2k
Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) {
5241
76.2k
  if (!I.isEquality())
5242
41.1k
    return nullptr;
5243
5244
35.1k
  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
5245
35.1k
  const CmpInst::Predicate Pred = I.getPredicate();
5246
35.1k
  Value *A, *B, *C, *D;
5247
35.1k
  if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
5248
300
    if (A == Op1 || B == Op1) { // (A^B) == A  ->  B == 0
5249
56
      Value *OtherVal = A == Op1 ? B : A;
5250
56
      return new ICmpInst(Pred, OtherVal, Constant::getNullValue(A->getType()));
5251
56
    }
5252
5253
244
    if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) {
5254
      // A^c1 == C^c2 --> A == C^(c1^c2)
5255
28
      ConstantInt *C1, *C2;
5256
28
      if (match(B, m_ConstantInt(C1)) && match(D, m_ConstantInt(C2)) &&
5257
28
          Op1->hasOneUse()) {
5258
1
        Constant *NC = Builder.getInt(C1->getValue() ^ C2->getValue());
5259
1
        Value *Xor = Builder.CreateXor(C, NC);
5260
1
        return new ICmpInst(Pred, A, Xor);
5261
1
      }
5262
5263
      // A^B == A^D -> B == D
5264
27
      if (A == C)
5265
2
        return new ICmpInst(Pred, B, D);
5266
25
      if (A == D)
5267
0
        return new ICmpInst(Pred, B, C);
5268
25
      if (B == C)
5269
1
        return new ICmpInst(Pred, A, D);
5270
24
      if (B == D)
5271
0
        return new ICmpInst(Pred, A, C);
5272
24
    }
5273
244
  }
5274
5275
  // canoncalize:
5276
  // (icmp eq/ne (and X, C), X)
5277
  //    -> (icmp eq/ne (and X, ~C), 0)
5278
35.0k
  {
5279
35.0k
    Constant *CMask;
5280
35.0k
    A = nullptr;
5281
35.0k
    if (match(Op0, m_OneUse(m_And(m_Specific(Op1), m_ImmConstant(CMask)))))
5282
56
      A = Op1;
5283
35.0k
    else if (match(Op1, m_OneUse(m_And(m_Specific(Op0), m_ImmConstant(CMask)))))
5284
0
      A = Op0;
5285
35.0k
    if (A)
5286
56
      return new ICmpInst(Pred, Builder.CreateAnd(A, Builder.CreateNot(CMask)),
5287
56
                          Constant::getNullValue(A->getType()));
5288
35.0k
  }
5289
5290
35.0k
  if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && (A == Op0 || B == Op0)) {
5291
    // A == (A^B)  ->  B == 0
5292
18
    Value *OtherVal = A == Op0 ? B : A;
5293
18
    return new ICmpInst(Pred, OtherVal, Constant::getNullValue(A->getType()));
5294
18
  }
5295
5296
  // (X&Z) == (Y&Z) -> (X^Y) & Z == 0
5297
35.0k
  if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) &&
5298
35.0k
      match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) {
5299
8
    Value *X = nullptr, *Y = nullptr, *Z = nullptr;
5300
5301
8
    if (A == C) {
5302
1
      X = B;
5303
1
      Y = D;
5304
1
      Z = A;
5305
7
    } else if (A == D) {
5306
0
      X = B;
5307
0
      Y = C;
5308
0
      Z = A;
5309
7
    } else if (B == C) {
5310
0
      X = A;
5311
0
      Y = D;
5312
0
      Z = B;
5313
7
    } else if (B == D) {
5314
3
      X = A;
5315
3
      Y = C;
5316
3
      Z = B;
5317
3
    }
5318
5319
8
    if (X) { // Build (X^Y) & Z
5320
4
      Op1 = Builder.CreateXor(X, Y);
5321
4
      Op1 = Builder.CreateAnd(Op1, Z);
5322
4
      return new ICmpInst(Pred, Op1, Constant::getNullValue(Op1->getType()));
5323
4
    }
5324
8
  }
5325
5326
35.0k
  {
5327
    // Similar to above, but specialized for constant because invert is needed:
5328
    // (X | C) == (Y | C) --> (X ^ Y) & ~C == 0
5329
35.0k
    Value *X, *Y;
5330
35.0k
    Constant *C;
5331
35.0k
    if (match(Op0, m_OneUse(m_Or(m_Value(X), m_Constant(C)))) &&
5332
35.0k
        match(Op1, m_OneUse(m_Or(m_Value(Y), m_Specific(C))))) {
5333
0
      Value *Xor = Builder.CreateXor(X, Y);
5334
0
      Value *And = Builder.CreateAnd(Xor, ConstantExpr::getNot(C));
5335
0
      return new ICmpInst(Pred, And, Constant::getNullValue(And->getType()));
5336
0
    }
5337
35.0k
  }
5338
5339
35.0k
  if (match(Op1, m_ZExt(m_Value(A))) &&
5340
35.0k
      (Op0->hasOneUse() || Op1->hasOneUse())) {
5341
    // (B & (Pow2C-1)) == zext A --> A == trunc B
5342
    // (B & (Pow2C-1)) != zext A --> A != trunc B
5343
52
    const APInt *MaskC;
5344
52
    if (match(Op0, m_And(m_Value(B), m_LowBitMask(MaskC))) &&
5345
52
        MaskC->countr_one() == A->getType()->getScalarSizeInBits())
5346
4
      return new ICmpInst(Pred, A, Builder.CreateTrunc(B, A->getType()));
5347
52
  }
5348
5349
  // (A >> C) == (B >> C) --> (A^B) u< (1 << C)
5350
  // For lshr and ashr pairs.
5351
35.0k
  const APInt *AP1, *AP2;
5352
35.0k
  if ((match(Op0, m_OneUse(m_LShr(m_Value(A), m_APIntAllowUndef(AP1)))) &&
5353
35.0k
       match(Op1, m_OneUse(m_LShr(m_Value(B), m_APIntAllowUndef(AP2))))) ||
5354
35.0k
      (match(Op0, m_OneUse(m_AShr(m_Value(A), m_APIntAllowUndef(AP1)))) &&
5355
34.9k
       match(Op1, m_OneUse(m_AShr(m_Value(B), m_APIntAllowUndef(AP2)))))) {
5356
41
    if (AP1 != AP2)
5357
0
      return nullptr;
5358
41
    unsigned TypeBits = AP1->getBitWidth();
5359
41
    unsigned ShAmt = AP1->getLimitedValue(TypeBits);
5360
41
    if (ShAmt < TypeBits && ShAmt != 0) {
5361
41
      ICmpInst::Predicate NewPred =
5362
41
          Pred == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
5363
41
      Value *Xor = Builder.CreateXor(A, B, I.getName() + ".unshifted");
5364
41
      APInt CmpVal = APInt::getOneBitSet(TypeBits, ShAmt);
5365
41
      return new ICmpInst(NewPred, Xor, ConstantInt::get(A->getType(), CmpVal));
5366
41
    }
5367
41
  }
5368
5369
  // (A << C) == (B << C) --> ((A^B) & (~0U >> C)) == 0
5370
34.9k
  ConstantInt *Cst1;
5371
34.9k
  if (match(Op0, m_OneUse(m_Shl(m_Value(A), m_ConstantInt(Cst1)))) &&
5372
34.9k
      match(Op1, m_OneUse(m_Shl(m_Value(B), m_Specific(Cst1))))) {
5373
3
    unsigned TypeBits = Cst1->getBitWidth();
5374
3
    unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits);
5375
3
    if (ShAmt < TypeBits && ShAmt != 0) {
5376
3
      Value *Xor = Builder.CreateXor(A, B, I.getName() + ".unshifted");
5377
3
      APInt AndVal = APInt::getLowBitsSet(TypeBits, TypeBits - ShAmt);
5378
3
      Value *And = Builder.CreateAnd(Xor, Builder.getInt(AndVal),
5379
3
                                      I.getName() + ".mask");
5380
3
      return new ICmpInst(Pred, And, Constant::getNullValue(Cst1->getType()));
5381
3
    }
5382
3
  }
5383
5384
  // Transform "icmp eq (trunc (lshr(X, cst1)), cst" to
5385
  // "icmp (and X, mask), cst"
5386
34.9k
  uint64_t ShAmt = 0;
5387
34.9k
  if (Op0->hasOneUse() &&
5388
34.9k
      match(Op0, m_Trunc(m_OneUse(m_LShr(m_Value(A), m_ConstantInt(ShAmt))))) &&
5389
34.9k
      match(Op1, m_ConstantInt(Cst1)) &&
5390
      // Only do this when A has multiple uses.  This is most important to do
5391
      // when it exposes other optimizations.
5392
34.9k
      !A->hasOneUse()) {
5393
5
    unsigned ASize = cast<IntegerType>(A->getType())->getPrimitiveSizeInBits();
5394
5395
5
    if (ShAmt < ASize) {
5396
5
      APInt MaskV =
5397
5
          APInt::getLowBitsSet(ASize, Op0->getType()->getPrimitiveSizeInBits());
5398
5
      MaskV <<= ShAmt;
5399
5400
5
      APInt CmpV = Cst1->getValue().zext(ASize);
5401
5
      CmpV <<= ShAmt;
5402
5403
5
      Value *Mask = Builder.CreateAnd(A, Builder.getInt(MaskV));
5404
5
      return new ICmpInst(Pred, Mask, Builder.getInt(CmpV));
5405
5
    }
5406
5
  }
5407
5408
34.9k
  if (Instruction *ICmp = foldICmpIntrinsicWithIntrinsic(I, Builder))
5409
8
    return ICmp;
5410
5411
  // Match icmp eq (trunc (lshr A, BW), (ashr (trunc A), BW-1)), which checks the
5412
  // top BW/2 + 1 bits are all the same. Create "A >=s INT_MIN && A <=s INT_MAX",
5413
  // which we generate as "icmp ult (add A, 2^(BW-1)), 2^BW" to skip a few steps
5414
  // of instcombine.
5415
34.9k
  unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
5416
34.9k
  if (match(Op0, m_AShr(m_Trunc(m_Value(A)), m_SpecificInt(BitWidth - 1))) &&
5417
34.9k
      match(Op1, m_Trunc(m_LShr(m_Specific(A), m_SpecificInt(BitWidth)))) &&
5418
34.9k
      A->getType()->getScalarSizeInBits() == BitWidth * 2 &&
5419
34.9k
      (I.getOperand(0)->hasOneUse() || I.getOperand(1)->hasOneUse())) {
5420
0
    APInt C = APInt::getOneBitSet(BitWidth * 2, BitWidth - 1);
5421
0
    Value *Add = Builder.CreateAdd(A, ConstantInt::get(A->getType(), C));
5422
0
    return new ICmpInst(Pred == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_ULT
5423
0
                                                  : ICmpInst::ICMP_UGE,
5424
0
                        Add, ConstantInt::get(A->getType(), C.shl(1)));
5425
0
  }
5426
5427
  // Canonicalize:
5428
  // Assume B_Pow2 != 0
5429
  // 1. A & B_Pow2 != B_Pow2 -> A & B_Pow2 == 0
5430
  // 2. A & B_Pow2 == B_Pow2 -> A & B_Pow2 != 0
5431
34.9k
  if (match(Op0, m_c_And(m_Specific(Op1), m_Value())) &&
5432
34.9k
      isKnownToBeAPowerOfTwo(Op1, /* OrZero */ false, 0, &I))
5433
0
    return new ICmpInst(CmpInst::getInversePredicate(Pred), Op0,
5434
0
                        ConstantInt::getNullValue(Op0->getType()));
5435
5436
34.9k
  if (match(Op1, m_c_And(m_Specific(Op0), m_Value())) &&
5437
34.9k
      isKnownToBeAPowerOfTwo(Op0, /* OrZero */ false, 0, &I))
5438
0
    return new ICmpInst(CmpInst::getInversePredicate(Pred), Op1,
5439
0
                        ConstantInt::getNullValue(Op1->getType()));
5440
5441
  // Canonicalize:
5442
  // icmp eq/ne X, OneUse(rotate-right(X))
5443
  //    -> icmp eq/ne X, rotate-left(X)
5444
  // We generally try to convert rotate-right -> rotate-left, this just
5445
  // canonicalizes another case.
5446
34.9k
  CmpInst::Predicate PredUnused = Pred;
5447
34.9k
  if (match(&I, m_c_ICmp(PredUnused, m_Value(A),
5448
34.9k
                         m_OneUse(m_Intrinsic<Intrinsic::fshr>(
5449
34.9k
                             m_Deferred(A), m_Deferred(A), m_Value(B))))))
5450
0
    return new ICmpInst(
5451
0
        Pred, A,
5452
0
        Builder.CreateIntrinsic(Op0->getType(), Intrinsic::fshl, {A, A, B}));
5453
5454
  // Canonicalize:
5455
  // icmp eq/ne OneUse(A ^ Cst), B --> icmp eq/ne (A ^ B), Cst
5456
34.9k
  Constant *Cst;
5457
34.9k
  if (match(&I, m_c_ICmp(PredUnused,
5458
34.9k
                         m_OneUse(m_Xor(m_Value(A), m_ImmConstant(Cst))),
5459
34.9k
                         m_CombineAnd(m_Value(B), m_Unless(m_ImmConstant())))))
5460
21
    return new ICmpInst(Pred, Builder.CreateXor(A, B), Cst);
5461
5462
34.9k
  {
5463
    // (icmp eq/ne (and (add/sub/xor X, P2), P2), P2)
5464
34.9k
    auto m_Matcher =
5465
34.9k
        m_CombineOr(m_CombineOr(m_c_Add(m_Value(B), m_Deferred(A)),
5466
34.9k
                                m_c_Xor(m_Value(B), m_Deferred(A))),
5467
34.9k
                    m_Sub(m_Value(B), m_Deferred(A)));
5468
34.9k
    std::optional<bool> IsZero = std::nullopt;
5469
34.9k
    if (match(&I, m_c_ICmp(PredUnused, m_OneUse(m_c_And(m_Value(A), m_Matcher)),
5470
34.9k
                           m_Deferred(A))))
5471
104
      IsZero = false;
5472
    // (icmp eq/ne (and (add/sub/xor X, P2), P2), 0)
5473
34.8k
    else if (match(&I,
5474
34.8k
                   m_ICmp(PredUnused, m_OneUse(m_c_And(m_Value(A), m_Matcher)),
5475
34.8k
                          m_Zero())))
5476
83
      IsZero = true;
5477
5478
34.9k
    if (IsZero && isKnownToBeAPowerOfTwo(A, /* OrZero */ true, /*Depth*/ 0, &I))
5479
      // (icmp eq/ne (and (add/sub/xor X, P2), P2), P2)
5480
      //    -> (icmp eq/ne (and X, P2), 0)
5481
      // (icmp eq/ne (and (add/sub/xor X, P2), P2), 0)
5482
      //    -> (icmp eq/ne (and X, P2), P2)
5483
2
      return new ICmpInst(Pred, Builder.CreateAnd(B, A),
5484
2
                          *IsZero ? A
5485
2
                                  : ConstantInt::getNullValue(A->getType()));
5486
34.9k
  }
5487
5488
34.9k
  return nullptr;
5489
34.9k
}
5490
5491
5.81k
Instruction *InstCombinerImpl::foldICmpWithTrunc(ICmpInst &ICmp) {
5492
5.81k
  ICmpInst::Predicate Pred = ICmp.getPredicate();
5493
5.81k
  Value *Op0 = ICmp.getOperand(0), *Op1 = ICmp.getOperand(1);
5494
5495
  // Try to canonicalize trunc + compare-to-constant into a mask + cmp.
5496
  // The trunc masks high bits while the compare may effectively mask low bits.
5497
5.81k
  Value *X;
5498
5.81k
  const APInt *C;
5499
5.81k
  if (!match(Op0, m_OneUse(m_Trunc(m_Value(X)))) || !match(Op1, m_APInt(C)))
5500
5.63k
    return nullptr;
5501
5502
  // This matches patterns corresponding to tests of the signbit as well as:
5503
  // (trunc X) u< C --> (X & -C) == 0 (are all masked-high-bits clear?)
5504
  // (trunc X) u> C --> (X & ~C) != 0 (are any masked-high-bits set?)
5505
180
  APInt Mask;
5506
180
  if (decomposeBitTestICmp(Op0, Op1, Pred, X, Mask, true /* WithTrunc */)) {
5507
110
    Value *And = Builder.CreateAnd(X, Mask);
5508
110
    Constant *Zero = ConstantInt::getNullValue(X->getType());
5509
110
    return new ICmpInst(Pred, And, Zero);
5510
110
  }
5511
5512
70
  unsigned SrcBits = X->getType()->getScalarSizeInBits();
5513
70
  if (Pred == ICmpInst::ICMP_ULT && C->isNegatedPowerOf2()) {
5514
    // If C is a negative power-of-2 (high-bit mask):
5515
    // (trunc X) u< C --> (X & C) != C (are any masked-high-bits clear?)
5516
0
    Constant *MaskC = ConstantInt::get(X->getType(), C->zext(SrcBits));
5517
0
    Value *And = Builder.CreateAnd(X, MaskC);
5518
0
    return new ICmpInst(ICmpInst::ICMP_NE, And, MaskC);
5519
0
  }
5520
5521
70
  if (Pred == ICmpInst::ICMP_UGT && (~*C).isPowerOf2()) {
5522
    // If C is not-of-power-of-2 (one clear bit):
5523
    // (trunc X) u> C --> (X & (C+1)) == C+1 (are all masked-high-bits set?)
5524
0
    Constant *MaskC = ConstantInt::get(X->getType(), (*C + 1).zext(SrcBits));
5525
0
    Value *And = Builder.CreateAnd(X, MaskC);
5526
0
    return new ICmpInst(ICmpInst::ICMP_EQ, And, MaskC);
5527
0
  }
5528
5529
70
  if (auto *II = dyn_cast<IntrinsicInst>(X)) {
5530
0
    if (II->getIntrinsicID() == Intrinsic::cttz ||
5531
0
        II->getIntrinsicID() == Intrinsic::ctlz) {
5532
0
      unsigned MaxRet = SrcBits;
5533
      // If the "is_zero_poison" argument is set, then we know at least
5534
      // one bit is set in the input, so the result is always at least one
5535
      // less than the full bitwidth of that input.
5536
0
      if (match(II->getArgOperand(1), m_One()))
5537
0
        MaxRet--;
5538
5539
      // Make sure the destination is wide enough to hold the largest output of
5540
      // the intrinsic.
5541
0
      if (llvm::Log2_32(MaxRet) + 1 <= Op0->getType()->getScalarSizeInBits())
5542
0
        if (Instruction *I =
5543
0
                foldICmpIntrinsicWithConstant(ICmp, II, C->zext(SrcBits)))
5544
0
          return I;
5545
0
    }
5546
0
  }
5547
5548
70
  return nullptr;
5549
70
}
5550
5551
5.70k
Instruction *InstCombinerImpl::foldICmpWithZextOrSext(ICmpInst &ICmp) {
5552
5.70k
  assert(isa<CastInst>(ICmp.getOperand(0)) && "Expected cast for operand 0");
5553
0
  auto *CastOp0 = cast<CastInst>(ICmp.getOperand(0));
5554
5.70k
  Value *X;
5555
5.70k
  if (!match(CastOp0, m_ZExtOrSExt(m_Value(X))))
5556
5.15k
    return nullptr;
5557
5558
549
  bool IsSignedExt = CastOp0->getOpcode() == Instruction::SExt;
5559
549
  bool IsSignedCmp = ICmp.isSigned();
5560
5561
  // icmp Pred (ext X), (ext Y)
5562
549
  Value *Y;
5563
549
  if (match(ICmp.getOperand(1), m_ZExtOrSExt(m_Value(Y)))) {
5564
166
    bool IsZext0 = isa<ZExtInst>(ICmp.getOperand(0));
5565
166
    bool IsZext1 = isa<ZExtInst>(ICmp.getOperand(1));
5566
5567
166
    if (IsZext0 != IsZext1) {
5568
        // If X and Y and both i1
5569
        // (icmp eq/ne (zext X) (sext Y))
5570
        //      eq -> (icmp eq (or X, Y), 0)
5571
        //      ne -> (icmp ne (or X, Y), 0)
5572
43
      if (ICmp.isEquality() && X->getType()->isIntOrIntVectorTy(1) &&
5573
43
          Y->getType()->isIntOrIntVectorTy(1))
5574
4
        return new ICmpInst(ICmp.getPredicate(), Builder.CreateOr(X, Y),
5575
4
                            Constant::getNullValue(X->getType()));
5576
5577
      // If we have mismatched casts and zext has the nneg flag, we can
5578
      //  treat the "zext nneg" as "sext". Otherwise, we cannot fold and quit.
5579
5580
39
      auto *NonNegInst0 = dyn_cast<PossiblyNonNegInst>(ICmp.getOperand(0));
5581
39
      auto *NonNegInst1 = dyn_cast<PossiblyNonNegInst>(ICmp.getOperand(1));
5582
5583
39
      bool IsNonNeg0 = NonNegInst0 && NonNegInst0->hasNonNeg();
5584
39
      bool IsNonNeg1 = NonNegInst1 && NonNegInst1->hasNonNeg();
5585
5586
39
      if ((IsZext0 && IsNonNeg0) || (IsZext1 && IsNonNeg1))
5587
3
        IsSignedExt = true;
5588
36
      else
5589
36
        return nullptr;
5590
39
    }
5591
5592
    // Not an extension from the same type?
5593
126
    Type *XTy = X->getType(), *YTy = Y->getType();
5594
126
    if (XTy != YTy) {
5595
      // One of the casts must have one use because we are creating a new cast.
5596
94
      if (!ICmp.getOperand(0)->hasOneUse() && !ICmp.getOperand(1)->hasOneUse())
5597
18
        return nullptr;
5598
      // Extend the narrower operand to the type of the wider operand.
5599
76
      CastInst::CastOps CastOpcode =
5600
76
          IsSignedExt ? Instruction::SExt : Instruction::ZExt;
5601
76
      if (XTy->getScalarSizeInBits() < YTy->getScalarSizeInBits())
5602
22
        X = Builder.CreateCast(CastOpcode, X, YTy);
5603
54
      else if (YTy->getScalarSizeInBits() < XTy->getScalarSizeInBits())
5604
54
        Y = Builder.CreateCast(CastOpcode, Y, XTy);
5605
0
      else
5606
0
        return nullptr;
5607
76
    }
5608
5609
    // (zext X) == (zext Y) --> X == Y
5610
    // (sext X) == (sext Y) --> X == Y
5611
108
    if (ICmp.isEquality())
5612
25
      return new ICmpInst(ICmp.getPredicate(), X, Y);
5613
5614
    // A signed comparison of sign extended values simplifies into a
5615
    // signed comparison.
5616
83
    if (IsSignedCmp && IsSignedExt)
5617
19
      return new ICmpInst(ICmp.getPredicate(), X, Y);
5618
5619
    // The other three cases all fold into an unsigned comparison.
5620
64
    return new ICmpInst(ICmp.getUnsignedPredicate(), X, Y);
5621
83
  }
5622
5623
  // Below here, we are only folding a compare with constant.
5624
383
  auto *C = dyn_cast<Constant>(ICmp.getOperand(1));
5625
383
  if (!C)
5626
6
    return nullptr;
5627
5628
  // If a lossless truncate is possible...
5629
377
  Type *SrcTy = CastOp0->getSrcTy();
5630
377
  Constant *Res = getLosslessTrunc(C, SrcTy, CastOp0->getOpcode());
5631
377
  if (Res) {
5632
357
    if (ICmp.isEquality())
5633
211
      return new ICmpInst(ICmp.getPredicate(), X, Res);
5634
5635
    // A signed comparison of sign extended values simplifies into a
5636
    // signed comparison.
5637
146
    if (IsSignedExt && IsSignedCmp)
5638
91
      return new ICmpInst(ICmp.getPredicate(), X, Res);
5639
5640
    // The other three cases all fold into an unsigned comparison.
5641
55
    return new ICmpInst(ICmp.getUnsignedPredicate(), X, Res);
5642
146
  }
5643
5644
  // The re-extended constant changed, partly changed (in the case of a vector),
5645
  // or could not be determined to be equal (in the case of a constant
5646
  // expression), so the constant cannot be represented in the shorter type.
5647
  // All the cases that fold to true or false will have already been handled
5648
  // by simplifyICmpInst, so only deal with the tricky case.
5649
20
  if (IsSignedCmp || !IsSignedExt || !isa<ConstantInt>(C))
5650
8
    return nullptr;
5651
5652
  // Is source op positive?
5653
  // icmp ult (sext X), C --> icmp sgt X, -1
5654
12
  if (ICmp.getPredicate() == ICmpInst::ICMP_ULT)
5655
10
    return new ICmpInst(CmpInst::ICMP_SGT, X, Constant::getAllOnesValue(SrcTy));
5656
5657
  // Is source op negative?
5658
  // icmp ugt (sext X), C --> icmp slt X, 0
5659
2
  assert(ICmp.getPredicate() == ICmpInst::ICMP_UGT && "ICmp should be folded!");
5660
0
  return new ICmpInst(CmpInst::ICMP_SLT, X, Constant::getNullValue(SrcTy));
5661
12
}
5662
5663
/// Handle icmp (cast x), (cast or constant).
5664
77.1k
Instruction *InstCombinerImpl::foldICmpWithCastOp(ICmpInst &ICmp) {
5665
  // If any operand of ICmp is a inttoptr roundtrip cast then remove it as
5666
  // icmp compares only pointer's value.
5667
  // icmp (inttoptr (ptrtoint p1)), p2 --> icmp p1, p2.
5668
77.1k
  Value *SimplifiedOp0 = simplifyIntToPtrRoundTripCast(ICmp.getOperand(0));
5669
77.1k
  Value *SimplifiedOp1 = simplifyIntToPtrRoundTripCast(ICmp.getOperand(1));
5670
77.1k
  if (SimplifiedOp0 || SimplifiedOp1)
5671
0
    return new ICmpInst(ICmp.getPredicate(),
5672
0
                        SimplifiedOp0 ? SimplifiedOp0 : ICmp.getOperand(0),
5673
0
                        SimplifiedOp1 ? SimplifiedOp1 : ICmp.getOperand(1));
5674
5675
77.1k
  auto *CastOp0 = dyn_cast<CastInst>(ICmp.getOperand(0));
5676
77.1k
  if (!CastOp0)
5677
70.8k
    return nullptr;
5678
6.24k
  if (!isa<Constant>(ICmp.getOperand(1)) && !isa<CastInst>(ICmp.getOperand(1)))
5679
356
    return nullptr;
5680
5681
5.88k
  Value *Op0Src = CastOp0->getOperand(0);
5682
5.88k
  Type *SrcTy = CastOp0->getSrcTy();
5683
5.88k
  Type *DestTy = CastOp0->getDestTy();
5684
5685
  // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
5686
  // integer type is the same size as the pointer type.
5687
5.88k
  auto CompatibleSizes = [&](Type *SrcTy, Type *DestTy) {
5688
69
    if (isa<VectorType>(SrcTy)) {
5689
1
      SrcTy = cast<VectorType>(SrcTy)->getElementType();
5690
1
      DestTy = cast<VectorType>(DestTy)->getElementType();
5691
1
    }
5692
69
    return DL.getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth();
5693
69
  };
5694
5.88k
  if (CastOp0->getOpcode() == Instruction::PtrToInt &&
5695
5.88k
      CompatibleSizes(SrcTy, DestTy)) {
5696
69
    Value *NewOp1 = nullptr;
5697
69
    if (auto *PtrToIntOp1 = dyn_cast<PtrToIntOperator>(ICmp.getOperand(1))) {
5698
13
      Value *PtrSrc = PtrToIntOp1->getOperand(0);
5699
13
      if (PtrSrc->getType() == Op0Src->getType())
5700
13
        NewOp1 = PtrToIntOp1->getOperand(0);
5701
56
    } else if (auto *RHSC = dyn_cast<Constant>(ICmp.getOperand(1))) {
5702
56
      NewOp1 = ConstantExpr::getIntToPtr(RHSC, SrcTy);
5703
56
    }
5704
5705
69
    if (NewOp1)
5706
69
      return new ICmpInst(ICmp.getPredicate(), Op0Src, NewOp1);
5707
69
  }
5708
5709
5.81k
  if (Instruction *R = foldICmpWithTrunc(ICmp))
5710
110
    return R;
5711
5712
5.70k
  return foldICmpWithZextOrSext(ICmp);
5713
5.81k
}
5714
5715
2.86k
static bool isNeutralValue(Instruction::BinaryOps BinaryOp, Value *RHS, bool IsSigned) {
5716
2.86k
  switch (BinaryOp) {
5717
0
    default:
5718
0
      llvm_unreachable("Unsupported binary op");
5719
1.57k
    case Instruction::Add:
5720
2.46k
    case Instruction::Sub:
5721
2.46k
      return match(RHS, m_Zero());
5722
404
    case Instruction::Mul:
5723
404
      return !(RHS->getType()->isIntOrIntVectorTy(1) && IsSigned) &&
5724
404
             match(RHS, m_One());
5725
2.86k
  }
5726
2.86k
}
5727
5728
OverflowResult
5729
InstCombinerImpl::computeOverflow(Instruction::BinaryOps BinaryOp,
5730
                                  bool IsSigned, Value *LHS, Value *RHS,
5731
4.49k
                                  Instruction *CxtI) const {
5732
4.49k
  switch (BinaryOp) {
5733
0
    default:
5734
0
      llvm_unreachable("Unsupported binary op");
5735
2.63k
    case Instruction::Add:
5736
2.63k
      if (IsSigned)
5737
1.28k
        return computeOverflowForSignedAdd(LHS, RHS, CxtI);
5738
1.35k
      else
5739
1.35k
        return computeOverflowForUnsignedAdd(LHS, RHS, CxtI);
5740
1.46k
    case Instruction::Sub:
5741
1.46k
      if (IsSigned)
5742
1.05k
        return computeOverflowForSignedSub(LHS, RHS, CxtI);
5743
404
      else
5744
404
        return computeOverflowForUnsignedSub(LHS, RHS, CxtI);
5745
395
    case Instruction::Mul:
5746
395
      if (IsSigned)
5747
89
        return computeOverflowForSignedMul(LHS, RHS, CxtI);
5748
306
      else
5749
306
        return computeOverflowForUnsignedMul(LHS, RHS, CxtI);
5750
4.49k
  }
5751
4.49k
}
5752
5753
bool InstCombinerImpl::OptimizeOverflowCheck(Instruction::BinaryOps BinaryOp,
5754
                                             bool IsSigned, Value *LHS,
5755
                                             Value *RHS, Instruction &OrigI,
5756
                                             Value *&Result,
5757
2.86k
                                             Constant *&Overflow) {
5758
2.86k
  if (OrigI.isCommutative() && isa<Constant>(LHS) && !isa<Constant>(RHS))
5759
0
    std::swap(LHS, RHS);
5760
5761
  // If the overflow check was an add followed by a compare, the insertion point
5762
  // may be pointing to the compare.  We want to insert the new instructions
5763
  // before the add in case there are uses of the add between the add and the
5764
  // compare.
5765
2.86k
  Builder.SetInsertPoint(&OrigI);
5766
5767
2.86k
  Type *OverflowTy = Type::getInt1Ty(LHS->getContext());
5768
2.86k
  if (auto *LHSTy = dyn_cast<VectorType>(LHS->getType()))
5769
250
    OverflowTy = VectorType::get(OverflowTy, LHSTy->getElementCount());
5770
5771
2.86k
  if (isNeutralValue(BinaryOp, RHS, IsSigned)) {
5772
45
    Result = LHS;
5773
45
    Overflow = ConstantInt::getFalse(OverflowTy);
5774
45
    return true;
5775
45
  }
5776
5777
2.82k
  switch (computeOverflow(BinaryOp, IsSigned, LHS, RHS, &OrigI)) {
5778
2.65k
    case OverflowResult::MayOverflow:
5779
2.65k
      return false;
5780
16
    case OverflowResult::AlwaysOverflowsLow:
5781
56
    case OverflowResult::AlwaysOverflowsHigh:
5782
56
      Result = Builder.CreateBinOp(BinaryOp, LHS, RHS);
5783
56
      Result->takeName(&OrigI);
5784
56
      Overflow = ConstantInt::getTrue(OverflowTy);
5785
56
      return true;
5786
110
    case OverflowResult::NeverOverflows:
5787
110
      Result = Builder.CreateBinOp(BinaryOp, LHS, RHS);
5788
110
      Result->takeName(&OrigI);
5789
110
      Overflow = ConstantInt::getFalse(OverflowTy);
5790
110
      if (auto *Inst = dyn_cast<Instruction>(Result)) {
5791
89
        if (IsSigned)
5792
57
          Inst->setHasNoSignedWrap();
5793
32
        else
5794
32
          Inst->setHasNoUnsignedWrap();
5795
89
      }
5796
110
      return true;
5797
2.82k
  }
5798
5799
0
  llvm_unreachable("Unexpected overflow result");
5800
0
}
5801
5802
/// Recognize and process idiom involving test for multiplication
5803
/// overflow.
5804
///
5805
/// The caller has matched a pattern of the form:
5806
///   I = cmp u (mul(zext A, zext B), V
5807
/// The function checks if this is a test for overflow and if so replaces
5808
/// multiplication with call to 'mul.with.overflow' intrinsic.
5809
///
5810
/// \param I Compare instruction.
5811
/// \param MulVal Result of 'mult' instruction.  It is one of the arguments of
5812
///               the compare instruction.  Must be of integer type.
5813
/// \param OtherVal The other argument of compare instruction.
5814
/// \returns Instruction which must replace the compare instruction, NULL if no
5815
///          replacement required.
5816
static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,
5817
                                         const APInt *OtherVal,
5818
53
                                         InstCombinerImpl &IC) {
5819
  // Don't bother doing this transformation for pointers, don't do it for
5820
  // vectors.
5821
53
  if (!isa<IntegerType>(MulVal->getType()))
5822
0
    return nullptr;
5823
5824
53
  auto *MulInstr = dyn_cast<Instruction>(MulVal);
5825
53
  if (!MulInstr)
5826
0
    return nullptr;
5827
53
  assert(MulInstr->getOpcode() == Instruction::Mul);
5828
5829
0
  auto *LHS = cast<ZExtInst>(MulInstr->getOperand(0)),
5830
53
       *RHS = cast<ZExtInst>(MulInstr->getOperand(1));
5831
53
  assert(LHS->getOpcode() == Instruction::ZExt);
5832
0
  assert(RHS->getOpcode() == Instruction::ZExt);
5833
0
  Value *A = LHS->getOperand(0), *B = RHS->getOperand(0);
5834
5835
  // Calculate type and width of the result produced by mul.with.overflow.
5836
53
  Type *TyA = A->getType(), *TyB = B->getType();
5837
53
  unsigned WidthA = TyA->getPrimitiveSizeInBits(),
5838
53
           WidthB = TyB->getPrimitiveSizeInBits();
5839
53
  unsigned MulWidth;
5840
53
  Type *MulType;
5841
53
  if (WidthB > WidthA) {
5842
0
    MulWidth = WidthB;
5843
0
    MulType = TyB;
5844
53
  } else {
5845
53
    MulWidth = WidthA;
5846
53
    MulType = TyA;
5847
53
  }
5848
5849
  // In order to replace the original mul with a narrower mul.with.overflow,
5850
  // all uses must ignore upper bits of the product.  The number of used low
5851
  // bits must be not greater than the width of mul.with.overflow.
5852
53
  if (MulVal->hasNUsesOrMore(2))
5853
83
    for (User *U : MulVal->users()) {
5854
83
      if (U == &I)
5855
38
        continue;
5856
45
      if (TruncInst *TI = dyn_cast<TruncInst>(U)) {
5857
        // Check if truncation ignores bits above MulWidth.
5858
5
        unsigned TruncWidth = TI->getType()->getPrimitiveSizeInBits();
5859
5
        if (TruncWidth > MulWidth)
5860
0
          return nullptr;
5861
40
      } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) {
5862
        // Check if AND ignores bits above MulWidth.
5863
34
        if (BO->getOpcode() != Instruction::And)
5864
8
          return nullptr;
5865
26
        if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
5866
13
          const APInt &CVal = CI->getValue();
5867
13
          if (CVal.getBitWidth() - CVal.countl_zero() > MulWidth)
5868
11
            return nullptr;
5869
13
        } else {
5870
          // In this case we could have the operand of the binary operation
5871
          // being defined in another block, and performing the replacement
5872
          // could break the dominance relation.
5873
13
          return nullptr;
5874
13
        }
5875
26
      } else {
5876
        // Other uses prohibit this transformation.
5877
6
        return nullptr;
5878
6
      }
5879
45
    }
5880
5881
  // Recognize patterns
5882
15
  switch (I.getPredicate()) {
5883
6
  case ICmpInst::ICMP_UGT: {
5884
    // Recognize pattern:
5885
    //   mulval = mul(zext A, zext B)
5886
    //   cmp ugt mulval, max
5887
6
    APInt MaxVal = APInt::getMaxValue(MulWidth);
5888
6
    MaxVal = MaxVal.zext(OtherVal->getBitWidth());
5889
6
    if (MaxVal.eq(*OtherVal))
5890
6
      break; // Recognized
5891
0
    return nullptr;
5892
6
  }
5893
5894
3
  case ICmpInst::ICMP_ULT: {
5895
    // Recognize pattern:
5896
    //   mulval = mul(zext A, zext B)
5897
    //   cmp ule mulval, max + 1
5898
3
    APInt MaxVal = APInt::getOneBitSet(OtherVal->getBitWidth(), MulWidth);
5899
3
    if (MaxVal.eq(*OtherVal))
5900
3
      break; // Recognized
5901
0
    return nullptr;
5902
3
  }
5903
5904
6
  default:
5905
6
    return nullptr;
5906
15
  }
5907
5908
9
  InstCombiner::BuilderTy &Builder = IC.Builder;
5909
9
  Builder.SetInsertPoint(MulInstr);
5910
5911
  // Replace: mul(zext A, zext B) --> mul.with.overflow(A, B)
5912
9
  Value *MulA = A, *MulB = B;
5913
9
  if (WidthA < MulWidth)
5914
0
    MulA = Builder.CreateZExt(A, MulType);
5915
9
  if (WidthB < MulWidth)
5916
0
    MulB = Builder.CreateZExt(B, MulType);
5917
9
  Function *F = Intrinsic::getDeclaration(
5918
9
      I.getModule(), Intrinsic::umul_with_overflow, MulType);
5919
9
  CallInst *Call = Builder.CreateCall(F, {MulA, MulB}, "umul");
5920
9
  IC.addToWorklist(MulInstr);
5921
5922
  // If there are uses of mul result other than the comparison, we know that
5923
  // they are truncation or binary AND. Change them to use result of
5924
  // mul.with.overflow and adjust properly mask/size.
5925
9
  if (MulVal->hasNUsesOrMore(2)) {
5926
5
    Value *Mul = Builder.CreateExtractValue(Call, 0, "umul.value");
5927
10
    for (User *U : make_early_inc_range(MulVal->users())) {
5928
10
      if (U == &I)
5929
5
        continue;
5930
5
      if (TruncInst *TI = dyn_cast<TruncInst>(U)) {
5931
5
        if (TI->getType()->getPrimitiveSizeInBits() == MulWidth)
5932
3
          IC.replaceInstUsesWith(*TI, Mul);
5933
2
        else
5934
2
          TI->setOperand(0, Mul);
5935
5
      } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) {
5936
0
        assert(BO->getOpcode() == Instruction::And);
5937
        // Replace (mul & mask) --> zext (mul.with.overflow & short_mask)
5938
0
        ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1));
5939
0
        APInt ShortMask = CI->getValue().trunc(MulWidth);
5940
0
        Value *ShortAnd = Builder.CreateAnd(Mul, ShortMask);
5941
0
        Value *Zext = Builder.CreateZExt(ShortAnd, BO->getType());
5942
0
        IC.replaceInstUsesWith(*BO, Zext);
5943
0
      } else {
5944
0
        llvm_unreachable("Unexpected Binary operation");
5945
0
      }
5946
0
      IC.addToWorklist(cast<Instruction>(U));
5947
5
    }
5948
5
  }
5949
5950
  // The original icmp gets replaced with the overflow value, maybe inverted
5951
  // depending on predicate.
5952
9
  if (I.getPredicate() == ICmpInst::ICMP_ULT) {
5953
3
    Value *Res = Builder.CreateExtractValue(Call, 1);
5954
3
    return BinaryOperator::CreateNot(Res);
5955
3
  }
5956
5957
6
  return ExtractValueInst::Create(Call, 1);
5958
9
}
5959
5960
/// When performing a comparison against a constant, it is possible that not all
5961
/// the bits in the LHS are demanded. This helper method computes the mask that
5962
/// IS demanded.
5963
92.0k
static APInt getDemandedBitsLHSMask(ICmpInst &I, unsigned BitWidth) {
5964
92.0k
  const APInt *RHS;
5965
92.0k
  if (!match(I.getOperand(1), m_APInt(RHS)))
5966
35.4k
    return APInt::getAllOnes(BitWidth);
5967
5968
  // If this is a normal comparison, it demands all bits. If it is a sign bit
5969
  // comparison, it only demands the sign bit.
5970
56.6k
  bool UnusedBit;
5971
56.6k
  if (InstCombiner::isSignBitCheck(I.getPredicate(), *RHS, UnusedBit))
5972
7.56k
    return APInt::getSignMask(BitWidth);
5973
5974
49.0k
  switch (I.getPredicate()) {
5975
  // For a UGT comparison, we don't care about any bits that
5976
  // correspond to the trailing ones of the comparand.  The value of these
5977
  // bits doesn't impact the outcome of the comparison, because any value
5978
  // greater than the RHS must differ in a bit higher than these due to carry.
5979
4.24k
  case ICmpInst::ICMP_UGT:
5980
4.24k
    return APInt::getBitsSetFrom(BitWidth, RHS->countr_one());
5981
5982
  // Similarly, for a ULT comparison, we don't care about the trailing zeros.
5983
  // Any value less than the RHS must differ in a higher bit because of carries.
5984
6.67k
  case ICmpInst::ICMP_ULT:
5985
6.67k
    return APInt::getBitsSetFrom(BitWidth, RHS->countr_zero());
5986
5987
38.1k
  default:
5988
38.1k
    return APInt::getAllOnes(BitWidth);
5989
49.0k
  }
5990
49.0k
}
5991
5992
/// Check that one use is in the same block as the definition and all
5993
/// other uses are in blocks dominated by a given block.
5994
///
5995
/// \param DI Definition
5996
/// \param UI Use
5997
/// \param DB Block that must dominate all uses of \p DI outside
5998
///           the parent block
5999
/// \return true when \p UI is the only use of \p DI in the parent block
6000
/// and all other uses of \p DI are in blocks dominated by \p DB.
6001
///
6002
bool InstCombinerImpl::dominatesAllUses(const Instruction *DI,
6003
                                        const Instruction *UI,
6004
49
                                        const BasicBlock *DB) const {
6005
49
  assert(DI && UI && "Instruction not defined\n");
6006
  // Ignore incomplete definitions.
6007
49
  if (!DI->getParent())
6008
0
    return false;
6009
  // DI and UI must be in the same block.
6010
49
  if (DI->getParent() != UI->getParent())
6011
2
    return false;
6012
  // Protect from self-referencing blocks.
6013
47
  if (DI->getParent() == DB)
6014
0
    return false;
6015
96
  for (const User *U : DI->users()) {
6016
96
    auto *Usr = cast<Instruction>(U);
6017
96
    if (Usr != UI && !DT.dominates(DB, Usr->getParent()))
6018
7
      return false;
6019
96
  }
6020
40
  return true;
6021
47
}
6022
6023
/// Return true when the instruction sequence within a block is select-cmp-br.
6024
634
static bool isChainSelectCmpBranch(const SelectInst *SI) {
6025
634
  const BasicBlock *BB = SI->getParent();
6026
634
  if (!BB)
6027
0
    return false;
6028
634
  auto *BI = dyn_cast_or_null<BranchInst>(BB->getTerminator());
6029
634
  if (!BI || BI->getNumSuccessors() != 2)
6030
502
    return false;
6031
132
  auto *IC = dyn_cast<ICmpInst>(BI->getCondition());
6032
132
  if (!IC || (IC->getOperand(0) != SI && IC->getOperand(1) != SI))
6033
42
    return false;
6034
90
  return true;
6035
132
}
6036
6037
/// True when a select result is replaced by one of its operands
6038
/// in select-icmp sequence. This will eventually result in the elimination
6039
/// of the select.
6040
///
6041
/// \param SI    Select instruction
6042
/// \param Icmp  Compare instruction
6043
/// \param SIOpd Operand that replaces the select
6044
///
6045
/// Notes:
6046
/// - The replacement is global and requires dominator information
6047
/// - The caller is responsible for the actual replacement
6048
///
6049
/// Example:
6050
///
6051
/// entry:
6052
///  %4 = select i1 %3, %C* %0, %C* null
6053
///  %5 = icmp eq %C* %4, null
6054
///  br i1 %5, label %9, label %7
6055
///  ...
6056
///  ; <label>:7                                       ; preds = %entry
6057
///  %8 = getelementptr inbounds %C* %4, i64 0, i32 0
6058
///  ...
6059
///
6060
/// can be transformed to
6061
///
6062
///  %5 = icmp eq %C* %0, null
6063
///  %6 = select i1 %3, i1 %5, i1 true
6064
///  br i1 %6, label %9, label %7
6065
///  ...
6066
///  ; <label>:7                                       ; preds = %entry
6067
///  %8 = getelementptr inbounds %C* %0, i64 0, i32 0  // replace by %0!
6068
///
6069
/// Similar when the first operand of the select is a constant or/and
6070
/// the compare is for not equal rather than equal.
6071
///
6072
/// NOTE: The function is only called when the select and compare constants
6073
/// are equal, the optimization can work only for EQ predicates. This is not a
6074
/// major restriction since a NE compare should be 'normalized' to an equal
6075
/// compare, which usually happens in the combiner and test case
6076
/// select-cmp-br.ll checks for it.
6077
bool InstCombinerImpl::replacedSelectWithOperand(SelectInst *SI,
6078
                                                 const ICmpInst *Icmp,
6079
634
                                                 const unsigned SIOpd) {
6080
634
  assert((SIOpd == 1 || SIOpd == 2) && "Invalid select operand!");
6081
634
  if (isChainSelectCmpBranch(SI) && Icmp->getPredicate() == ICmpInst::ICMP_EQ) {
6082
90
    BasicBlock *Succ = SI->getParent()->getTerminator()->getSuccessor(1);
6083
    // The check for the single predecessor is not the best that can be
6084
    // done. But it protects efficiently against cases like when SI's
6085
    // home block has two successors, Succ and Succ1, and Succ1 predecessor
6086
    // of Succ. Then SI can't be replaced by SIOpd because the use that gets
6087
    // replaced can be reached on either path. So the uniqueness check
6088
    // guarantees that the path all uses of SI (outside SI's parent) are on
6089
    // is disjoint from all other paths out of SI. But that information
6090
    // is more expensive to compute, and the trade-off here is in favor
6091
    // of compile-time. It should also be noticed that we check for a single
6092
    // predecessor and not only uniqueness. This to handle the situation when
6093
    // Succ and Succ1 points to the same basic block.
6094
90
    if (Succ->getSinglePredecessor() && dominatesAllUses(SI, Icmp, Succ)) {
6095
40
      NumSel++;
6096
40
      SI->replaceUsesOutsideBlock(SI->getOperand(SIOpd), SI->getParent());
6097
40
      return true;
6098
40
    }
6099
90
  }
6100
594
  return false;
6101
634
}
6102
6103
/// Try to fold the comparison based on range information we can get by checking
6104
/// whether bits are known to be zero or one in the inputs.
6105
92.0k
Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
6106
92.0k
  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
6107
92.0k
  Type *Ty = Op0->getType();
6108
92.0k
  ICmpInst::Predicate Pred = I.getPredicate();
6109
6110
  // Get scalar or pointer size.
6111
92.0k
  unsigned BitWidth = Ty->isIntOrIntVectorTy()
6112
92.0k
                          ? Ty->getScalarSizeInBits()
6113
92.0k
                          : DL.getPointerTypeSizeInBits(Ty->getScalarType());
6114
6115
92.0k
  if (!BitWidth)
6116
0
    return nullptr;
6117
6118
92.0k
  KnownBits Op0Known(BitWidth);
6119
92.0k
  KnownBits Op1Known(BitWidth);
6120
6121
92.0k
  {
6122
    // Don't use dominating conditions when folding icmp using known bits. This
6123
    // may convert signed into unsigned predicates in ways that other passes
6124
    // (especially IndVarSimplify) may not be able to reliably undo.
6125
92.0k
    SQ.DC = nullptr;
6126
92.0k
    auto _ = make_scope_exit([&]() { SQ.DC = &DC; });
6127
92.0k
    if (SimplifyDemandedBits(&I, 0, getDemandedBitsLHSMask(I, BitWidth),
6128
92.0k
                             Op0Known, 0))
6129
154
      return &I;
6130
6131
91.9k
    if (SimplifyDemandedBits(&I, 1, APInt::getAllOnes(BitWidth), Op1Known, 0))
6132
17
      return &I;
6133
91.9k
  }
6134
6135
  // Given the known and unknown bits, compute a range that the LHS could be
6136
  // in.  Compute the Min, Max and RHS values based on the known bits. For the
6137
  // EQ and NE we use unsigned values.
6138
91.9k
  APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0);
6139
91.9k
  APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0);
6140
91.9k
  if (I.isSigned()) {
6141
28.4k
    Op0Min = Op0Known.getSignedMinValue();
6142
28.4k
    Op0Max = Op0Known.getSignedMaxValue();
6143
28.4k
    Op1Min = Op1Known.getSignedMinValue();
6144
28.4k
    Op1Max = Op1Known.getSignedMaxValue();
6145
63.5k
  } else {
6146
63.5k
    Op0Min = Op0Known.getMinValue();
6147
63.5k
    Op0Max = Op0Known.getMaxValue();
6148
63.5k
    Op1Min = Op1Known.getMinValue();
6149
63.5k
    Op1Max = Op1Known.getMaxValue();
6150
63.5k
  }
6151
6152
  // If Min and Max are known to be the same, then SimplifyDemandedBits figured
6153
  // out that the LHS or RHS is a constant. Constant fold this now, so that
6154
  // code below can assume that Min != Max.
6155
91.9k
  if (!isa<Constant>(Op0) && Op0Min == Op0Max)
6156
5
    return new ICmpInst(Pred, ConstantExpr::getIntegerValue(Ty, Op0Min), Op1);
6157
91.9k
  if (!isa<Constant>(Op1) && Op1Min == Op1Max)
6158
0
    return new ICmpInst(Pred, Op0, ConstantExpr::getIntegerValue(Ty, Op1Min));
6159
6160
  // Don't break up a clamp pattern -- (min(max X, Y), Z) -- by replacing a
6161
  // min/max canonical compare with some other compare. That could lead to
6162
  // conflict with select canonicalization and infinite looping.
6163
  // FIXME: This constraint may go away if min/max intrinsics are canonical.
6164
91.9k
  auto isMinMaxCmp = [&](Instruction &Cmp) {
6165
91.9k
    if (!Cmp.hasOneUse())
6166
18.3k
      return false;
6167
73.5k
    Value *A, *B;
6168
73.5k
    SelectPatternFlavor SPF = matchSelectPattern(Cmp.user_back(), A, B).Flavor;
6169
73.5k
    if (!SelectPatternResult::isMinOrMax(SPF))
6170
69.1k
      return false;
6171
4.43k
    return match(Op0, m_MaxOrMin(m_Value(), m_Value())) ||
6172
4.43k
           match(Op1, m_MaxOrMin(m_Value(), m_Value()));
6173
73.5k
  };
6174
91.9k
  if (!isMinMaxCmp(I)) {
6175
91.0k
    switch (Pred) {
6176
47.1k
    default:
6177
47.1k
      break;
6178
47.1k
    case ICmpInst::ICMP_ULT: {
6179
11.4k
      if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B)
6180
404
        return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
6181
11.0k
      const APInt *CmpC;
6182
11.0k
      if (match(Op1, m_APInt(CmpC))) {
6183
        // A <u C -> A == C-1 if min(A)+1 == C
6184
6.46k
        if (*CmpC == Op0Min + 1)
6185
551
          return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
6186
551
                              ConstantInt::get(Op1->getType(), *CmpC - 1));
6187
        // X <u C --> X == 0, if the number of zero bits in the bottom of X
6188
        // exceeds the log2 of C.
6189
5.91k
        if (Op0Known.countMinTrailingZeros() >= CmpC->ceilLogBase2())
6190
48
          return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
6191
48
                              Constant::getNullValue(Op1->getType()));
6192
5.91k
      }
6193
10.4k
      break;
6194
11.0k
    }
6195
10.4k
    case ICmpInst::ICMP_UGT: {
6196
8.08k
      if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B)
6197
414
        return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
6198
7.66k
      const APInt *CmpC;
6199
7.66k
      if (match(Op1, m_APInt(CmpC))) {
6200
        // A >u C -> A == C+1 if max(a)-1 == C
6201
4.28k
        if (*CmpC == Op0Max - 1)
6202
492
          return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
6203
492
                              ConstantInt::get(Op1->getType(), *CmpC + 1));
6204
        // X >u C --> X != 0, if the number of zero bits in the bottom of X
6205
        // exceeds the log2 of C.
6206
3.79k
        if (Op0Known.countMinTrailingZeros() >= CmpC->getActiveBits())
6207
62
          return new ICmpInst(ICmpInst::ICMP_NE, Op0,
6208
62
                              Constant::getNullValue(Op1->getType()));
6209
3.79k
      }
6210
7.11k
      break;
6211
7.66k
    }
6212
12.9k
    case ICmpInst::ICMP_SLT: {
6213
12.9k
      if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B)
6214
143
        return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
6215
12.7k
      const APInt *CmpC;
6216
12.7k
      if (match(Op1, m_APInt(CmpC))) {
6217
8.70k
        if (*CmpC == Op0Min + 1) // A <s C -> A == C-1 if min(A)+1 == C
6218
164
          return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
6219
164
                              ConstantInt::get(Op1->getType(), *CmpC - 1));
6220
8.70k
      }
6221
12.6k
      break;
6222
12.7k
    }
6223
12.6k
    case ICmpInst::ICMP_SGT: {
6224
11.4k
      if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B)
6225
303
        return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
6226
11.1k
      const APInt *CmpC;
6227
11.1k
      if (match(Op1, m_APInt(CmpC))) {
6228
8.16k
        if (*CmpC == Op0Max - 1) // A >s C -> A == C+1 if max(A)-1 == C
6229
137
          return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
6230
137
                              ConstantInt::get(Op1->getType(), *CmpC + 1));
6231
8.16k
      }
6232
11.0k
      break;
6233
11.1k
    }
6234
91.0k
    }
6235
91.0k
  }
6236
6237
  // Based on the range information we know about the LHS, see if we can
6238
  // simplify this comparison.  For example, (x&4) < 8 is always true.
6239
89.1k
  switch (Pred) {
6240
0
  default:
6241
0
    llvm_unreachable("Unknown icmp opcode!");
6242
23.1k
  case ICmpInst::ICMP_EQ:
6243
39.2k
  case ICmpInst::ICMP_NE: {
6244
39.2k
    if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
6245
1
      return replaceInstUsesWith(
6246
1
          I, ConstantInt::getBool(I.getType(), Pred == CmpInst::ICMP_NE));
6247
6248
    // If all bits are known zero except for one, then we know at most one bit
6249
    // is set. If the comparison is against zero, then this is a check to see if
6250
    // *that* bit is set.
6251
39.2k
    APInt Op0KnownZeroInverted = ~Op0Known.Zero;
6252
39.2k
    if (Op1Known.isZero()) {
6253
      // If the LHS is an AND with the same constant, look through it.
6254
15.8k
      Value *LHS = nullptr;
6255
15.8k
      const APInt *LHSC;
6256
15.8k
      if (!match(Op0, m_And(m_Value(LHS), m_APInt(LHSC))) ||
6257
15.8k
          *LHSC != Op0KnownZeroInverted)
6258
11.3k
        LHS = Op0;
6259
6260
15.8k
      Value *X;
6261
15.8k
      const APInt *C1;
6262
15.8k
      if (match(LHS, m_Shl(m_Power2(C1), m_Value(X)))) {
6263
40
        Type *XTy = X->getType();
6264
40
        unsigned Log2C1 = C1->countr_zero();
6265
40
        APInt C2 = Op0KnownZeroInverted;
6266
40
        APInt C2Pow2 = (C2 & ~(*C1 - 1)) + *C1;
6267
40
        if (C2Pow2.isPowerOf2()) {
6268
          // iff (C1 is pow2) & ((C2 & ~(C1-1)) + C1) is pow2):
6269
          // ((C1 << X) & C2) == 0 -> X >= (Log2(C2+C1) - Log2(C1))
6270
          // ((C1 << X) & C2) != 0 -> X  < (Log2(C2+C1) - Log2(C1))
6271
13
          unsigned Log2C2 = C2Pow2.countr_zero();
6272
13
          auto *CmpC = ConstantInt::get(XTy, Log2C2 - Log2C1);
6273
13
          auto NewPred =
6274
13
              Pred == CmpInst::ICMP_EQ ? CmpInst::ICMP_UGE : CmpInst::ICMP_ULT;
6275
13
          return new ICmpInst(NewPred, X, CmpC);
6276
13
        }
6277
40
      }
6278
15.8k
    }
6279
6280
    // Op0 eq C_Pow2 -> Op0 ne 0 if Op0 is known to be C_Pow2 or zero.
6281
39.2k
    if (Op1Known.isConstant() && Op1Known.getConstant().isPowerOf2() &&
6282
39.2k
        (Op0Known & Op1Known) == Op0Known)
6283
205
      return new ICmpInst(CmpInst::getInversePredicate(Pred), Op0,
6284
205
                          ConstantInt::getNullValue(Op1->getType()));
6285
39.0k
    break;
6286
39.2k
  }
6287
39.0k
  case ICmpInst::ICMP_ULT: {
6288
10.7k
    if (Op0Max.ult(Op1Min)) // A <u B -> true if max(A) < min(B)
6289
7
      return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
6290
10.6k
    if (Op0Min.uge(Op1Max)) // A <u B -> false if min(A) >= max(B)
6291
10
      return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
6292
10.6k
    break;
6293
10.6k
  }
6294
10.6k
  case ICmpInst::ICMP_UGT: {
6295
7.27k
    if (Op0Min.ugt(Op1Max)) // A >u B -> true if min(A) > max(B)
6296
9
      return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
6297
7.26k
    if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B)
6298
19
      return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
6299
7.25k
    break;
6300
7.26k
  }
6301
12.7k
  case ICmpInst::ICMP_SLT: {
6302
12.7k
    if (Op0Max.slt(Op1Min)) // A <s B -> true if max(A) < min(C)
6303
18
      return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
6304
12.7k
    if (Op0Min.sge(Op1Max)) // A <s B -> false if min(A) >= max(C)
6305
19
      return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
6306
12.7k
    break;
6307
12.7k
  }
6308
12.7k
  case ICmpInst::ICMP_SGT: {
6309
11.3k
    if (Op0Min.sgt(Op1Max)) // A >s B -> true if min(A) > max(B)
6310
10
      return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
6311
11.2k
    if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B)
6312
42
      return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
6313
11.2k
    break;
6314
11.2k
  }
6315
11.2k
  case ICmpInst::ICMP_SGE:
6316
1.85k
    assert(!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!");
6317
1.85k
    if (Op0Min.sge(Op1Max)) // A >=s B -> true if min(A) >= max(B)
6318
13
      return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
6319
1.83k
    if (Op0Max.slt(Op1Min)) // A >=s B -> false if max(A) < min(B)
6320
2
      return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
6321
1.83k
    if (Op1Min == Op0Max) // A >=s B -> A == B if max(A) == min(B)
6322
2
      return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
6323
1.83k
    break;
6324
1.83k
  case ICmpInst::ICMP_SLE:
6325
1.73k
    assert(!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!");
6326
1.73k
    if (Op0Max.sle(Op1Min)) // A <=s B -> true if max(A) <= min(B)
6327
4
      return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
6328
1.73k
    if (Op0Min.sgt(Op1Max)) // A <=s B -> false if min(A) > max(B)
6329
7
      return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
6330
1.72k
    if (Op1Max == Op0Min) // A <=s B -> A == B if min(A) == max(B)
6331
3
      return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
6332
1.72k
    break;
6333
2.31k
  case ICmpInst::ICMP_UGE:
6334
2.31k
    assert(!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!");
6335
2.31k
    if (Op0Min.uge(Op1Max)) // A >=u B -> true if min(A) >= max(B)
6336
12
      return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
6337
2.30k
    if (Op0Max.ult(Op1Min)) // A >=u B -> false if max(A) < min(B)
6338
10
      return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
6339
2.29k
    if (Op1Min == Op0Max) // A >=u B -> A == B if max(A) == min(B)
6340
5
      return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
6341
2.29k
    break;
6342
2.29k
  case ICmpInst::ICMP_ULE:
6343
1.99k
    assert(!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!");
6344
1.99k
    if (Op0Max.ule(Op1Min)) // A <=u B -> true if max(A) <= min(B)
6345
15
      return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
6346
1.97k
    if (Op0Min.ugt(Op1Max)) // A <=u B -> false if min(A) > max(B)
6347
5
      return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
6348
1.97k
    if (Op1Max == Op0Min) // A <=u B -> A == B if min(A) == max(B)
6349
5
      return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
6350
1.96k
    break;
6351
89.1k
  }
6352
6353
  // Turn a signed comparison into an unsigned one if both operands are known to
6354
  // have the same sign.
6355
88.7k
  if (I.isSigned() &&
6356
88.7k
      ((Op0Known.Zero.isNegative() && Op1Known.Zero.isNegative()) ||
6357
27.5k
       (Op0Known.One.isNegative() && Op1Known.One.isNegative())))
6358
392
    return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1);
6359
6360
88.3k
  return nullptr;
6361
88.7k
}
6362
6363
/// If one operand of an icmp is effectively a bool (value range of {0,1}),
6364
/// then try to reduce patterns based on that limit.
6365
92.0k
Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) {
6366
92.0k
  Value *X, *Y;
6367
92.0k
  ICmpInst::Predicate Pred;
6368
6369
  // X must be 0 and bool must be true for "ULT":
6370
  // X <u (zext i1 Y) --> (X == 0) & Y
6371
92.0k
  if (match(&I, m_c_ICmp(Pred, m_Value(X), m_OneUse(m_ZExt(m_Value(Y))))) &&
6372
92.0k
      Y->getType()->isIntOrIntVectorTy(1) && Pred == ICmpInst::ICMP_ULT)
6373
12
    return BinaryOperator::CreateAnd(Builder.CreateIsNull(X), Y);
6374
6375
  // X must be 0 or bool must be true for "ULE":
6376
  // X <=u (sext i1 Y) --> (X == 0) | Y
6377
92.0k
  if (match(&I, m_c_ICmp(Pred, m_Value(X), m_OneUse(m_SExt(m_Value(Y))))) &&
6378
92.0k
      Y->getType()->isIntOrIntVectorTy(1) && Pred == ICmpInst::ICMP_ULE)
6379
0
    return BinaryOperator::CreateOr(Builder.CreateIsNull(X), Y);
6380
6381
  // icmp eq/ne X, (zext/sext (icmp eq/ne X, C))
6382
92.0k
  ICmpInst::Predicate Pred1, Pred2;
6383
92.0k
  const APInt *C;
6384
92.0k
  Instruction *ExtI;
6385
92.0k
  if (match(&I, m_c_ICmp(Pred1, m_Value(X),
6386
92.0k
                         m_CombineAnd(m_Instruction(ExtI),
6387
92.0k
                                      m_ZExtOrSExt(m_ICmp(Pred2, m_Deferred(X),
6388
92.0k
                                                          m_APInt(C)))))) &&
6389
92.0k
      ICmpInst::isEquality(Pred1) && ICmpInst::isEquality(Pred2)) {
6390
11
    bool IsSExt = ExtI->getOpcode() == Instruction::SExt;
6391
11
    bool HasOneUse = ExtI->hasOneUse() && ExtI->getOperand(0)->hasOneUse();
6392
11
    auto CreateRangeCheck = [&] {
6393
1
      Value *CmpV1 =
6394
1
          Builder.CreateICmp(Pred1, X, Constant::getNullValue(X->getType()));
6395
1
      Value *CmpV2 = Builder.CreateICmp(
6396
1
          Pred1, X, ConstantInt::getSigned(X->getType(), IsSExt ? -1 : 1));
6397
1
      return BinaryOperator::Create(
6398
1
          Pred1 == ICmpInst::ICMP_EQ ? Instruction::Or : Instruction::And,
6399
1
          CmpV1, CmpV2);
6400
1
    };
6401
11
    if (C->isZero()) {
6402
0
      if (Pred2 == ICmpInst::ICMP_EQ) {
6403
        // icmp eq X, (zext/sext (icmp eq X, 0)) --> false
6404
        // icmp ne X, (zext/sext (icmp eq X, 0)) --> true
6405
0
        return replaceInstUsesWith(
6406
0
            I, ConstantInt::getBool(I.getType(), Pred1 == ICmpInst::ICMP_NE));
6407
0
      } else if (!IsSExt || HasOneUse) {
6408
        // icmp eq X, (zext (icmp ne X, 0)) --> X == 0 || X == 1
6409
        // icmp ne X, (zext (icmp ne X, 0)) --> X != 0 && X != 1
6410
        // icmp eq X, (sext (icmp ne X, 0)) --> X == 0 || X == -1
6411
        // icmp ne X, (sext (icmp ne X, 0)) --> X != 0 && X == -1
6412
0
        return CreateRangeCheck();
6413
0
      }
6414
11
    } else if (IsSExt ? C->isAllOnes() : C->isOne()) {
6415
5
      if (Pred2 == ICmpInst::ICMP_NE) {
6416
        // icmp eq X, (zext (icmp ne X, 1)) --> false
6417
        // icmp ne X, (zext (icmp ne X, 1)) --> true
6418
        // icmp eq X, (sext (icmp ne X, -1)) --> false
6419
        // icmp ne X, (sext (icmp ne X, -1)) --> true
6420
4
        return replaceInstUsesWith(
6421
4
            I, ConstantInt::getBool(I.getType(), Pred1 == ICmpInst::ICMP_NE));
6422
4
      } else if (!IsSExt || HasOneUse) {
6423
        // icmp eq X, (zext (icmp eq X, 1)) --> X == 0 || X == 1
6424
        // icmp ne X, (zext (icmp eq X, 1)) --> X != 0 && X != 1
6425
        // icmp eq X, (sext (icmp eq X, -1)) --> X == 0 || X == -1
6426
        // icmp ne X, (sext (icmp eq X, -1)) --> X != 0 && X == -1
6427
1
        return CreateRangeCheck();
6428
1
      }
6429
6
    } else {
6430
      // when C != 0 && C != 1:
6431
      //   icmp eq X, (zext (icmp eq X, C)) --> icmp eq X, 0
6432
      //   icmp eq X, (zext (icmp ne X, C)) --> icmp eq X, 1
6433
      //   icmp ne X, (zext (icmp eq X, C)) --> icmp ne X, 0
6434
      //   icmp ne X, (zext (icmp ne X, C)) --> icmp ne X, 1
6435
      // when C != 0 && C != -1:
6436
      //   icmp eq X, (sext (icmp eq X, C)) --> icmp eq X, 0
6437
      //   icmp eq X, (sext (icmp ne X, C)) --> icmp eq X, -1
6438
      //   icmp ne X, (sext (icmp eq X, C)) --> icmp ne X, 0
6439
      //   icmp ne X, (sext (icmp ne X, C)) --> icmp ne X, -1
6440
6
      return ICmpInst::Create(
6441
6
          Instruction::ICmp, Pred1, X,
6442
6
          ConstantInt::getSigned(X->getType(), Pred2 == ICmpInst::ICMP_NE
6443
6
                                                   ? (IsSExt ? -1 : 1)
6444
6
                                                   : 0));
6445
6
    }
6446
11
  }
6447
6448
92.0k
  return nullptr;
6449
92.0k
}
6450
6451
std::optional<std::pair<CmpInst::Predicate, Constant *>>
6452
InstCombiner::getFlippedStrictnessPredicateAndConstant(CmpInst::Predicate Pred,
6453
7.24k
                                                       Constant *C) {
6454
7.24k
  assert(ICmpInst::isRelational(Pred) && ICmpInst::isIntPredicate(Pred) &&
6455
7.24k
         "Only for relational integer predicates.");
6456
6457
0
  Type *Type = C->getType();
6458
7.24k
  bool IsSigned = ICmpInst::isSigned(Pred);
6459
6460
7.24k
  CmpInst::Predicate UnsignedPred = ICmpInst::getUnsignedPredicate(Pred);
6461
7.24k
  bool WillIncrement =
6462
7.24k
      UnsignedPred == ICmpInst::ICMP_ULE || UnsignedPred == ICmpInst::ICMP_UGT;
6463
6464
  // Check if the constant operand can be safely incremented/decremented
6465
  // without overflowing/underflowing.
6466
8.34k
  auto ConstantIsOk = [WillIncrement, IsSigned](ConstantInt *C) {
6467
8.34k
    return WillIncrement ? !C->isMaxValue(IsSigned) : !C->isMinValue(IsSigned);
6468
8.34k
  };
6469
6470
7.24k
  Constant *SafeReplacementConstant = nullptr;
6471
7.24k
  if (auto *CI = dyn_cast<ConstantInt>(C)) {
6472
    // Bail out if the constant can't be safely incremented/decremented.
6473
6.62k
    if (!ConstantIsOk(CI))
6474
0
      return std::nullopt;
6475
6.62k
  } else if (auto *FVTy = dyn_cast<FixedVectorType>(Type)) {
6476
497
    unsigned NumElts = FVTy->getNumElements();
6477
2.21k
    for (unsigned i = 0; i != NumElts; ++i) {
6478
1.76k
      Constant *Elt = C->getAggregateElement(i);
6479
1.76k
      if (!Elt)
6480
7
        return std::nullopt;
6481
6482
1.75k
      if (isa<UndefValue>(Elt))
6483
28
        continue;
6484
6485
      // Bail out if we can't determine if this constant is min/max or if we
6486
      // know that this constant is min/max.
6487
1.72k
      auto *CI = dyn_cast<ConstantInt>(Elt);
6488
1.72k
      if (!CI || !ConstantIsOk(CI))
6489
35
        return std::nullopt;
6490
6491
1.69k
      if (!SafeReplacementConstant)
6492
474
        SafeReplacementConstant = CI;
6493
1.69k
    }
6494
497
  } else {
6495
    // ConstantExpr?
6496
127
    return std::nullopt;
6497
127
  }
6498
6499
  // It may not be safe to change a compare predicate in the presence of
6500
  // undefined elements, so replace those elements with the first safe constant
6501
  // that we found.
6502
  // TODO: in case of poison, it is safe; let's replace undefs only.
6503
7.07k
  if (C->containsUndefOrPoisonElement()) {
6504
14
    assert(SafeReplacementConstant && "Replacement constant not set");
6505
0
    C = Constant::replaceUndefsWith(C, SafeReplacementConstant);
6506
14
  }
6507
6508
0
  CmpInst::Predicate NewPred = CmpInst::getFlippedStrictnessPredicate(Pred);
6509
6510
  // Increment or decrement the constant.
6511
7.07k
  Constant *OneOrNegOne = ConstantInt::get(Type, WillIncrement ? 1 : -1, true);
6512
7.07k
  Constant *NewC = ConstantExpr::getAdd(C, OneOrNegOne);
6513
6514
7.07k
  return std::make_pair(NewPred, NewC);
6515
7.24k
}
6516
6517
/// If we have an icmp le or icmp ge instruction with a constant operand, turn
6518
/// it into the appropriate icmp lt or icmp gt instruction. This transform
6519
/// allows them to be folded in visitICmpInst.
6520
99.5k
static ICmpInst *canonicalizeCmpWithConstant(ICmpInst &I) {
6521
99.5k
  ICmpInst::Predicate Pred = I.getPredicate();
6522
99.5k
  if (ICmpInst::isEquality(Pred) || !ICmpInst::isIntPredicate(Pred) ||
6523
99.5k
      InstCombiner::isCanonicalPredicate(Pred))
6524
85.7k
    return nullptr;
6525
6526
13.8k
  Value *Op0 = I.getOperand(0);
6527
13.8k
  Value *Op1 = I.getOperand(1);
6528
13.8k
  auto *Op1C = dyn_cast<Constant>(Op1);
6529
13.8k
  if (!Op1C)
6530
8.12k
    return nullptr;
6531
6532
5.68k
  auto FlippedStrictness =
6533
5.68k
      InstCombiner::getFlippedStrictnessPredicateAndConstant(Pred, Op1C);
6534
5.68k
  if (!FlippedStrictness)
6535
165
    return nullptr;
6536
6537
5.51k
  return new ICmpInst(FlippedStrictness->first, Op0, FlippedStrictness->second);
6538
5.68k
}
6539
6540
/// If we have a comparison with a non-canonical predicate, if we can update
6541
/// all the users, invert the predicate and adjust all the users.
6542
94.0k
CmpInst *InstCombinerImpl::canonicalizeICmpPredicate(CmpInst &I) {
6543
  // Is the predicate already canonical?
6544
94.0k
  CmpInst::Predicate Pred = I.getPredicate();
6545
94.0k
  if (InstCombiner::isCanonicalPredicate(Pred))
6546
68.1k
    return nullptr;
6547
6548
  // Can all users be adjusted to predicate inversion?
6549
25.9k
  if (!InstCombiner::canFreelyInvertAllUsersOf(&I, /*IgnoredUser=*/nullptr))
6550
24.0k
    return nullptr;
6551
6552
  // Ok, we can canonicalize comparison!
6553
  // Let's first invert the comparison's predicate.
6554
1.91k
  I.setPredicate(CmpInst::getInversePredicate(Pred));
6555
1.91k
  I.setName(I.getName() + ".not");
6556
6557
  // And, adapt users.
6558
1.91k
  freelyInvertAllUsersOf(&I);
6559
6560
1.91k
  return &I;
6561
25.9k
}
6562
6563
/// Integer compare with boolean values can always be turned into bitwise ops.
6564
static Instruction *canonicalizeICmpBool(ICmpInst &I,
6565
6.26k
                                         InstCombiner::BuilderTy &Builder) {
6566
6.26k
  Value *A = I.getOperand(0), *B = I.getOperand(1);
6567
6.26k
  assert(A->getType()->isIntOrIntVectorTy(1) && "Bools only");
6568
6569
  // A boolean compared to true/false can be simplified to Op0/true/false in
6570
  // 14 out of the 20 (10 predicates * 2 constants) possible combinations.
6571
  // Cases not handled by InstSimplify are always 'not' of Op0.
6572
6.26k
  if (match(B, m_Zero())) {
6573
650
    switch (I.getPredicate()) {
6574
248
      case CmpInst::ICMP_EQ:  // A ==   0 -> !A
6575
428
      case CmpInst::ICMP_ULE: // A <=u  0 -> !A
6576
650
      case CmpInst::ICMP_SGE: // A >=s  0 -> !A
6577
650
        return BinaryOperator::CreateNot(A);
6578
0
      default:
6579
0
        llvm_unreachable("ICmp i1 X, C not simplified as expected.");
6580
650
    }
6581
5.61k
  } else if (match(B, m_One())) {
6582
541
    switch (I.getPredicate()) {
6583
148
      case CmpInst::ICMP_NE:  // A !=  1 -> !A
6584
363
      case CmpInst::ICMP_ULT: // A <u  1 -> !A
6585
541
      case CmpInst::ICMP_SGT: // A >s -1 -> !A
6586
541
        return BinaryOperator::CreateNot(A);
6587
0
      default:
6588
0
        llvm_unreachable("ICmp i1 X, C not simplified as expected.");
6589
541
    }
6590
541
  }
6591
6592
5.07k
  switch (I.getPredicate()) {
6593
0
  default:
6594
0
    llvm_unreachable("Invalid icmp instruction!");
6595
591
  case ICmpInst::ICMP_EQ:
6596
    // icmp eq i1 A, B -> ~(A ^ B)
6597
591
    return BinaryOperator::CreateNot(Builder.CreateXor(A, B));
6598
6599
303
  case ICmpInst::ICMP_NE:
6600
    // icmp ne i1 A, B -> A ^ B
6601
303
    return BinaryOperator::CreateXor(A, B);
6602
6603
491
  case ICmpInst::ICMP_UGT:
6604
    // icmp ugt -> icmp ult
6605
491
    std::swap(A, B);
6606
491
    [[fallthrough]];
6607
1.08k
  case ICmpInst::ICMP_ULT:
6608
    // icmp ult i1 A, B -> ~A & B
6609
1.08k
    return BinaryOperator::CreateAnd(Builder.CreateNot(A), B);
6610
6611
559
  case ICmpInst::ICMP_SGT:
6612
    // icmp sgt -> icmp slt
6613
559
    std::swap(A, B);
6614
559
    [[fallthrough]];
6615
970
  case ICmpInst::ICMP_SLT:
6616
    // icmp slt i1 A, B -> A & ~B
6617
970
    return BinaryOperator::CreateAnd(Builder.CreateNot(B), A);
6618
6619
499
  case ICmpInst::ICMP_UGE:
6620
    // icmp uge -> icmp ule
6621
499
    std::swap(A, B);
6622
499
    [[fallthrough]];
6623
1.06k
  case ICmpInst::ICMP_ULE:
6624
    // icmp ule i1 A, B -> ~A | B
6625
1.06k
    return BinaryOperator::CreateOr(Builder.CreateNot(A), B);
6626
6627
506
  case ICmpInst::ICMP_SGE:
6628
    // icmp sge -> icmp sle
6629
506
    std::swap(A, B);
6630
506
    [[fallthrough]];
6631
1.06k
  case ICmpInst::ICMP_SLE:
6632
    // icmp sle i1 A, B -> A | ~B
6633
1.06k
    return BinaryOperator::CreateOr(Builder.CreateNot(B), A);
6634
5.07k
  }
6635
5.07k
}
6636
6637
// Transform pattern like:
6638
//   (1 << Y) u<= X  or  ~(-1 << Y) u<  X  or  ((1 << Y)+(-1)) u<  X
6639
//   (1 << Y) u>  X  or  ~(-1 << Y) u>= X  or  ((1 << Y)+(-1)) u>= X
6640
// Into:
6641
//   (X l>> Y) != 0
6642
//   (X l>> Y) == 0
6643
static Instruction *foldICmpWithHighBitMask(ICmpInst &Cmp,
6644
75.8k
                                            InstCombiner::BuilderTy &Builder) {
6645
75.8k
  ICmpInst::Predicate Pred, NewPred;
6646
75.8k
  Value *X, *Y;
6647
75.8k
  if (match(&Cmp,
6648
75.8k
            m_c_ICmp(Pred, m_OneUse(m_Shl(m_One(), m_Value(Y))), m_Value(X)))) {
6649
132
    switch (Pred) {
6650
52
    case ICmpInst::ICMP_ULE:
6651
52
      NewPred = ICmpInst::ICMP_NE;
6652
52
      break;
6653
2
    case ICmpInst::ICMP_UGT:
6654
2
      NewPred = ICmpInst::ICMP_EQ;
6655
2
      break;
6656
78
    default:
6657
78
      return nullptr;
6658
132
    }
6659
75.6k
  } else if (match(&Cmp, m_c_ICmp(Pred,
6660
75.6k
                                  m_OneUse(m_CombineOr(
6661
75.6k
                                      m_Not(m_Shl(m_AllOnes(), m_Value(Y))),
6662
75.6k
                                      m_Add(m_Shl(m_One(), m_Value(Y)),
6663
75.6k
                                            m_AllOnes()))),
6664
75.6k
                                  m_Value(X)))) {
6665
    // The variant with 'add' is not canonical, (the variant with 'not' is)
6666
    // we only get it because it has extra uses, and can't be canonicalized,
6667
6668
8
    switch (Pred) {
6669
7
    case ICmpInst::ICMP_ULT:
6670
7
      NewPred = ICmpInst::ICMP_NE;
6671
7
      break;
6672
1
    case ICmpInst::ICMP_UGE:
6673
1
      NewPred = ICmpInst::ICMP_EQ;
6674
1
      break;
6675
0
    default:
6676
0
      return nullptr;
6677
8
    }
6678
8
  } else
6679
75.6k
    return nullptr;
6680
6681
62
  Value *NewX = Builder.CreateLShr(X, Y, X->getName() + ".highbits");
6682
62
  Constant *Zero = Constant::getNullValue(NewX->getType());
6683
62
  return CmpInst::Create(Instruction::ICmp, NewPred, NewX, Zero);
6684
75.8k
}
6685
6686
static Instruction *foldVectorCmp(CmpInst &Cmp,
6687
8.17k
                                  InstCombiner::BuilderTy &Builder) {
6688
8.17k
  const CmpInst::Predicate Pred = Cmp.getPredicate();
6689
8.17k
  Value *LHS = Cmp.getOperand(0), *RHS = Cmp.getOperand(1);
6690
8.17k
  Value *V1, *V2;
6691
6692
8.17k
  auto createCmpReverse = [&](CmpInst::Predicate Pred, Value *X, Value *Y) {
6693
0
    Value *V = Builder.CreateCmp(Pred, X, Y, Cmp.getName());
6694
0
    if (auto *I = dyn_cast<Instruction>(V))
6695
0
      I->copyIRFlags(&Cmp);
6696
0
    Module *M = Cmp.getModule();
6697
0
    Function *F = Intrinsic::getDeclaration(
6698
0
        M, Intrinsic::experimental_vector_reverse, V->getType());
6699
0
    return CallInst::Create(F, V);
6700
0
  };
6701
6702
8.17k
  if (match(LHS, m_VecReverse(m_Value(V1)))) {
6703
    // cmp Pred, rev(V1), rev(V2) --> rev(cmp Pred, V1, V2)
6704
0
    if (match(RHS, m_VecReverse(m_Value(V2))) &&
6705
0
        (LHS->hasOneUse() || RHS->hasOneUse()))
6706
0
      return createCmpReverse(Pred, V1, V2);
6707
6708
    // cmp Pred, rev(V1), RHSSplat --> rev(cmp Pred, V1, RHSSplat)
6709
0
    if (LHS->hasOneUse() && isSplatValue(RHS))
6710
0
      return createCmpReverse(Pred, V1, RHS);
6711
0
  }
6712
  // cmp Pred, LHSSplat, rev(V2) --> rev(cmp Pred, LHSSplat, V2)
6713
8.17k
  else if (isSplatValue(LHS) && match(RHS, m_OneUse(m_VecReverse(m_Value(V2)))))
6714
0
    return createCmpReverse(Pred, LHS, V2);
6715
6716
8.17k
  ArrayRef<int> M;
6717
8.17k
  if (!match(LHS, m_Shuffle(m_Value(V1), m_Undef(), m_Mask(M))))
6718
7.99k
    return nullptr;
6719
6720
  // If both arguments of the cmp are shuffles that use the same mask and
6721
  // shuffle within a single vector, move the shuffle after the cmp:
6722
  // cmp (shuffle V1, M), (shuffle V2, M) --> shuffle (cmp V1, V2), M
6723
179
  Type *V1Ty = V1->getType();
6724
179
  if (match(RHS, m_Shuffle(m_Value(V2), m_Undef(), m_SpecificMask(M))) &&
6725
179
      V1Ty == V2->getType() && (LHS->hasOneUse() || RHS->hasOneUse())) {
6726
22
    Value *NewCmp = Builder.CreateCmp(Pred, V1, V2);
6727
22
    return new ShuffleVectorInst(NewCmp, M);
6728
22
  }
6729
6730
  // Try to canonicalize compare with splatted operand and splat constant.
6731
  // TODO: We could generalize this for more than splats. See/use the code in
6732
  //       InstCombiner::foldVectorBinop().
6733
157
  Constant *C;
6734
157
  if (!LHS->hasOneUse() || !match(RHS, m_Constant(C)))
6735
82
    return nullptr;
6736
6737
  // Length-changing splats are ok, so adjust the constants as needed:
6738
  // cmp (shuffle V1, M), C --> shuffle (cmp V1, C'), M
6739
75
  Constant *ScalarC = C->getSplatValue(/* AllowUndefs */ true);
6740
75
  int MaskSplatIndex;
6741
75
  if (ScalarC && match(M, m_SplatOrUndefMask(MaskSplatIndex))) {
6742
    // We allow undefs in matching, but this transform removes those for safety.
6743
    // Demanded elements analysis should be able to recover some/all of that.
6744
54
    C = ConstantVector::getSplat(cast<VectorType>(V1Ty)->getElementCount(),
6745
54
                                 ScalarC);
6746
54
    SmallVector<int, 8> NewM(M.size(), MaskSplatIndex);
6747
54
    Value *NewCmp = Builder.CreateCmp(Pred, V1, C);
6748
54
    return new ShuffleVectorInst(NewCmp, NewM);
6749
54
  }
6750
6751
21
  return nullptr;
6752
75
}
6753
6754
// extract(uadd.with.overflow(A, B), 0) ult A
6755
//  -> extract(uadd.with.overflow(A, B), 1)
6756
75.8k
static Instruction *foldICmpOfUAddOv(ICmpInst &I) {
6757
75.8k
  CmpInst::Predicate Pred = I.getPredicate();
6758
75.8k
  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
6759
6760
75.8k
  Value *UAddOv;
6761
75.8k
  Value *A, *B;
6762
75.8k
  auto UAddOvResultPat = m_ExtractValue<0>(
6763
75.8k
      m_Intrinsic<Intrinsic::uadd_with_overflow>(m_Value(A), m_Value(B)));
6764
75.8k
  if (match(Op0, UAddOvResultPat) &&
6765
75.8k
      ((Pred == ICmpInst::ICMP_ULT && (Op1 == A || Op1 == B)) ||
6766
23
       (Pred == ICmpInst::ICMP_EQ && match(Op1, m_ZeroInt()) &&
6767
14
        (match(A, m_One()) || match(B, m_One()))) ||
6768
23
       (Pred == ICmpInst::ICMP_NE && match(Op1, m_AllOnes()) &&
6769
10
        (match(A, m_AllOnes()) || match(B, m_AllOnes())))))
6770
    // extract(uadd.with.overflow(A, B), 0) < A
6771
    // extract(uadd.with.overflow(A, 1), 0) == 0
6772
    // extract(uadd.with.overflow(A, -1), 0) != -1
6773
17
    UAddOv = cast<ExtractValueInst>(Op0)->getAggregateOperand();
6774
75.8k
  else if (match(Op1, UAddOvResultPat) &&
6775
75.8k
           Pred == ICmpInst::ICMP_UGT && (Op0 == A || Op0 == B))
6776
    // A > extract(uadd.with.overflow(A, B), 0)
6777
10
    UAddOv = cast<ExtractValueInst>(Op1)->getAggregateOperand();
6778
75.8k
  else
6779
75.8k
    return nullptr;
6780
6781
27
  return ExtractValueInst::Create(UAddOv, 1);
6782
75.8k
}
6783
6784
75.6k
static Instruction *foldICmpInvariantGroup(ICmpInst &I) {
6785
75.6k
  if (!I.getOperand(0)->getType()->isPointerTy() ||
6786
75.6k
      NullPointerIsDefined(
6787
3.84k
          I.getParent()->getParent(),
6788
71.8k
          I.getOperand(0)->getType()->getPointerAddressSpace())) {
6789
71.8k
    return nullptr;
6790
71.8k
  }
6791
3.83k
  Instruction *Op;
6792
3.83k
  if (match(I.getOperand(0), m_Instruction(Op)) &&
6793
3.83k
      match(I.getOperand(1), m_Zero()) &&
6794
3.83k
      Op->isLaunderOrStripInvariantGroup()) {
6795
0
    return ICmpInst::Create(Instruction::ICmp, I.getPredicate(),
6796
0
                            Op->getOperand(0), I.getOperand(1));
6797
0
  }
6798
3.83k
  return nullptr;
6799
3.83k
}
6800
6801
/// This function folds patterns produced by lowering of reduce idioms, such as
6802
/// llvm.vector.reduce.and which are lowered into instruction chains. This code
6803
/// attempts to generate fewer number of scalar comparisons instead of vector
6804
/// comparisons when possible.
6805
static Instruction *foldReductionIdiom(ICmpInst &I,
6806
                                       InstCombiner::BuilderTy &Builder,
6807
75.6k
                                       const DataLayout &DL) {
6808
75.6k
  if (I.getType()->isVectorTy())
6809
6.94k
    return nullptr;
6810
68.7k
  ICmpInst::Predicate OuterPred, InnerPred;
6811
68.7k
  Value *LHS, *RHS;
6812
6813
  // Match lowering of @llvm.vector.reduce.and. Turn
6814
  ///   %vec_ne = icmp ne <8 x i8> %lhs, %rhs
6815
  ///   %scalar_ne = bitcast <8 x i1> %vec_ne to i8
6816
  ///   %res = icmp <pred> i8 %scalar_ne, 0
6817
  ///
6818
  /// into
6819
  ///
6820
  ///   %lhs.scalar = bitcast <8 x i8> %lhs to i64
6821
  ///   %rhs.scalar = bitcast <8 x i8> %rhs to i64
6822
  ///   %res = icmp <pred> i64 %lhs.scalar, %rhs.scalar
6823
  ///
6824
  /// for <pred> in {ne, eq}.
6825
68.7k
  if (!match(&I, m_ICmp(OuterPred,
6826
68.7k
                        m_OneUse(m_BitCast(m_OneUse(
6827
68.7k
                            m_ICmp(InnerPred, m_Value(LHS), m_Value(RHS))))),
6828
68.7k
                        m_Zero())))
6829
68.6k
    return nullptr;
6830
62
  auto *LHSTy = dyn_cast<FixedVectorType>(LHS->getType());
6831
62
  if (!LHSTy || !LHSTy->getElementType()->isIntegerTy())
6832
0
    return nullptr;
6833
62
  unsigned NumBits =
6834
62
      LHSTy->getNumElements() * LHSTy->getElementType()->getIntegerBitWidth();
6835
  // TODO: Relax this to "not wider than max legal integer type"?
6836
62
  if (!DL.isLegalInteger(NumBits))
6837
32
    return nullptr;
6838
6839
30
  if (ICmpInst::isEquality(OuterPred) && InnerPred == ICmpInst::ICMP_NE) {
6840
4
    auto *ScalarTy = Builder.getIntNTy(NumBits);
6841
4
    LHS = Builder.CreateBitCast(LHS, ScalarTy, LHS->getName() + ".scalar");
6842
4
    RHS = Builder.CreateBitCast(RHS, ScalarTy, RHS->getName() + ".scalar");
6843
4
    return ICmpInst::Create(Instruction::ICmp, OuterPred, LHS, RHS,
6844
4
                            I.getName());
6845
4
  }
6846
6847
26
  return nullptr;
6848
30
}
6849
6850
// This helper will be called with icmp operands in both orders.
6851
Instruction *InstCombinerImpl::foldICmpCommutative(ICmpInst::Predicate Pred,
6852
                                                   Value *Op0, Value *Op1,
6853
155k
                                                   ICmpInst &CxtI) {
6854
  // Try to optimize 'icmp GEP, P' or 'icmp P, GEP'.
6855
155k
  if (auto *GEP = dyn_cast<GEPOperator>(Op0))
6856
674
    if (Instruction *NI = foldGEPICmp(GEP, Op1, Pred, CxtI))
6857
225
      return NI;
6858
6859
155k
  if (auto *SI = dyn_cast<SelectInst>(Op0))
6860
2.16k
    if (Instruction *NI = foldSelectICmp(Pred, SI, Op1, CxtI))
6861
341
      return NI;
6862
6863
155k
  if (auto *MinMax = dyn_cast<MinMaxIntrinsic>(Op0))
6864
1.09k
    if (Instruction *Res = foldICmpWithMinMax(CxtI, MinMax, Op1, Pred))
6865
190
      return Res;
6866
6867
155k
  {
6868
155k
    Value *X;
6869
155k
    const APInt *C;
6870
    // icmp X+Cst, X
6871
155k
    if (match(Op0, m_Add(m_Value(X), m_APInt(C))) && Op1 == X)
6872
117
      return foldICmpAddOpConst(X, *C, Pred);
6873
155k
  }
6874
6875
  // abs(X) >=  X --> true
6876
  // abs(X) u<= X --> true
6877
  // abs(X) <   X --> false
6878
  // abs(X) u>  X --> false
6879
  // abs(X) u>= X --> IsIntMinPosion ? `X > -1`: `X u<= INTMIN`
6880
  // abs(X) <=  X --> IsIntMinPosion ? `X > -1`: `X u<= INTMIN`
6881
  // abs(X) ==  X --> IsIntMinPosion ? `X > -1`: `X u<= INTMIN`
6882
  // abs(X) u<  X --> IsIntMinPosion ? `X < 0` : `X >   INTMIN`
6883
  // abs(X) >   X --> IsIntMinPosion ? `X < 0` : `X >   INTMIN`
6884
  // abs(X) !=  X --> IsIntMinPosion ? `X < 0` : `X >   INTMIN`
6885
155k
  {
6886
155k
    Value *X;
6887
155k
    Constant *C;
6888
155k
    if (match(Op0, m_Intrinsic<Intrinsic::abs>(m_Value(X), m_Constant(C))) &&
6889
155k
        match(Op1, m_Specific(X))) {
6890
7
      Value *NullValue = Constant::getNullValue(X->getType());
6891
7
      Value *AllOnesValue = Constant::getAllOnesValue(X->getType());
6892
7
      const APInt SMin =
6893
7
          APInt::getSignedMinValue(X->getType()->getScalarSizeInBits());
6894
7
      bool IsIntMinPosion = C->isAllOnesValue();
6895
7
      switch (Pred) {
6896
5
      case CmpInst::ICMP_ULE:
6897
5
      case CmpInst::ICMP_SGE:
6898
5
        return replaceInstUsesWith(CxtI, ConstantInt::getTrue(CxtI.getType()));
6899
0
      case CmpInst::ICMP_UGT:
6900
0
      case CmpInst::ICMP_SLT:
6901
0
        return replaceInstUsesWith(CxtI, ConstantInt::getFalse(CxtI.getType()));
6902
0
      case CmpInst::ICMP_UGE:
6903
0
      case CmpInst::ICMP_SLE:
6904
2
      case CmpInst::ICMP_EQ: {
6905
2
        return replaceInstUsesWith(
6906
2
            CxtI, IsIntMinPosion
6907
2
                      ? Builder.CreateICmpSGT(X, AllOnesValue)
6908
2
                      : Builder.CreateICmpULT(
6909
2
                            X, ConstantInt::get(X->getType(), SMin + 1)));
6910
0
      }
6911
0
      case CmpInst::ICMP_ULT:
6912
0
      case CmpInst::ICMP_SGT:
6913
0
      case CmpInst::ICMP_NE: {
6914
0
        return replaceInstUsesWith(
6915
0
            CxtI, IsIntMinPosion
6916
0
                      ? Builder.CreateICmpSLT(X, NullValue)
6917
0
                      : Builder.CreateICmpUGT(
6918
0
                            X, ConstantInt::get(X->getType(), SMin)));
6919
0
      }
6920
0
      default:
6921
0
        llvm_unreachable("Invalid predicate!");
6922
7
      }
6923
7
    }
6924
155k
  }
6925
6926
155k
  return nullptr;
6927
155k
}
6928
6929
116k
Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
6930
116k
  bool Changed = false;
6931
116k
  const SimplifyQuery Q = SQ.getWithInstruction(&I);
6932
116k
  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
6933
116k
  unsigned Op0Cplxity = getComplexity(Op0);
6934
116k
  unsigned Op1Cplxity = getComplexity(Op1);
6935
6936
  /// Orders the operands of the compare so that they are listed from most
6937
  /// complex to least complex.  This puts constants before unary operators,
6938
  /// before binary operators.
6939
116k
  if (Op0Cplxity < Op1Cplxity) {
6940
8.17k
    I.swapOperands();
6941
8.17k
    std::swap(Op0, Op1);
6942
8.17k
    Changed = true;
6943
8.17k
  }
6944
6945
116k
  if (Value *V = simplifyICmpInst(I.getPredicate(), Op0, Op1, Q))
6946
10.5k
    return replaceInstUsesWith(I, V);
6947
6948
  // Comparing -val or val with non-zero is the same as just comparing val
6949
  // ie, abs(val) != 0 -> val != 0
6950
105k
  if (I.getPredicate() == ICmpInst::ICMP_NE && match(Op1, m_Zero())) {
6951
7.62k
    Value *Cond, *SelectTrue, *SelectFalse;
6952
7.62k
    if (match(Op0, m_Select(m_Value(Cond), m_Value(SelectTrue),
6953
7.62k
                            m_Value(SelectFalse)))) {
6954
208
      if (Value *V = dyn_castNegVal(SelectTrue)) {
6955
15
        if (V == SelectFalse)
6956
0
          return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1);
6957
15
      }
6958
193
      else if (Value *V = dyn_castNegVal(SelectFalse)) {
6959
88
        if (V == SelectTrue)
6960
0
          return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1);
6961
88
      }
6962
208
    }
6963
7.62k
  }
6964
6965
105k
  if (Op0->getType()->isIntOrIntVectorTy(1))
6966
6.26k
    if (Instruction *Res = canonicalizeICmpBool(I, Builder))
6967
6.26k
      return Res;
6968
6969
99.5k
  if (Instruction *Res = canonicalizeCmpWithConstant(I))
6970
5.51k
    return Res;
6971
6972
94.0k
  if (Instruction *Res = canonicalizeICmpPredicate(I))
6973
1.91k
    return Res;
6974
6975
92.1k
  if (Instruction *Res = foldICmpWithConstant(I))
6976
39
    return Res;
6977
6978
92.1k
  if (Instruction *Res = foldICmpWithDominatingICmp(I))
6979
12
    return Res;
6980
6981
92.0k
  if (Instruction *Res = foldICmpUsingBoolRange(I))
6982
23
    return Res;
6983
6984
92.0k
  if (Instruction *Res = foldICmpUsingKnownBits(I))
6985
3.72k
    return Res;
6986
6987
88.3k
  if (Instruction *Res = foldICmpTruncWithTruncOrExt(I, Q))
6988
40
    return Res;
6989
6990
  // Test if the ICmpInst instruction is used exclusively by a select as
6991
  // part of a minimum or maximum operation. If so, refrain from doing
6992
  // any other folding. This helps out other analyses which understand
6993
  // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
6994
  // and CodeGen. And in this case, at least one of the comparison
6995
  // operands has at least one user besides the compare (the select),
6996
  // which would often largely negate the benefit of folding anyway.
6997
  //
6998
  // Do the same for the other patterns recognized by matchSelectPattern.
6999
88.3k
  if (I.hasOneUse())
7000
70.7k
    if (SelectInst *SI = dyn_cast<SelectInst>(I.user_back())) {
7001
19.2k
      Value *A, *B;
7002
19.2k
      SelectPatternResult SPR = matchSelectPattern(SI, A, B);
7003
19.2k
      if (SPR.Flavor != SPF_UNKNOWN)
7004
4.75k
        return nullptr;
7005
19.2k
    }
7006
7007
  // Do this after checking for min/max to prevent infinite looping.
7008
83.5k
  if (Instruction *Res = foldICmpWithZero(I))
7009
85
    return Res;
7010
7011
  // FIXME: We only do this after checking for min/max to prevent infinite
7012
  // looping caused by a reverse canonicalization of these patterns for min/max.
7013
  // FIXME: The organization of folds is a mess. These would naturally go into
7014
  // canonicalizeCmpWithConstant(), but we can't move all of the above folds
7015
  // down here after the min/max restriction.
7016
83.4k
  ICmpInst::Predicate Pred = I.getPredicate();
7017
83.4k
  const APInt *C;
7018
83.4k
  if (match(Op1, m_APInt(C))) {
7019
    // For i32: x >u 2147483647 -> x <s 0  -> true if sign bit set
7020
51.3k
    if (Pred == ICmpInst::ICMP_UGT && C->isMaxSignedValue()) {
7021
443
      Constant *Zero = Constant::getNullValue(Op0->getType());
7022
443
      return new ICmpInst(ICmpInst::ICMP_SLT, Op0, Zero);
7023
443
    }
7024
7025
    // For i32: x <u 2147483648 -> x >s -1  -> true if sign bit clear
7026
50.8k
    if (Pred == ICmpInst::ICMP_ULT && C->isMinSignedValue()) {
7027
213
      Constant *AllOnes = Constant::getAllOnesValue(Op0->getType());
7028
213
      return new ICmpInst(ICmpInst::ICMP_SGT, Op0, AllOnes);
7029
213
    }
7030
50.8k
  }
7031
7032
  // The folds in here may rely on wrapping flags and special constants, so
7033
  // they can break up min/max idioms in some cases but not seemingly similar
7034
  // patterns.
7035
  // FIXME: It may be possible to enhance select folding to make this
7036
  //        unnecessary. It may also be moot if we canonicalize to min/max
7037
  //        intrinsics.
7038
82.8k
  if (Instruction *Res = foldICmpBinOp(I, Q))
7039
1.12k
    return Res;
7040
7041
81.6k
  if (Instruction *Res = foldICmpInstWithConstant(I))
7042
3.27k
    return Res;
7043
7044
  // Try to match comparison as a sign bit test. Intentionally do this after
7045
  // foldICmpInstWithConstant() to potentially let other folds to happen first.
7046
78.4k
  if (Instruction *New = foldSignBitTest(I))
7047
0
    return New;
7048
7049
78.4k
  if (Instruction *Res = foldICmpInstWithConstantNotInt(I))
7050
77
    return Res;
7051
7052
78.3k
  if (Instruction *Res = foldICmpCommutative(I.getPredicate(), Op0, Op1, I))
7053
764
    return Res;
7054
77.5k
  if (Instruction *Res =
7055
77.5k
          foldICmpCommutative(I.getSwappedPredicate(), Op1, Op0, I))
7056
116
    return Res;
7057
7058
  // In case of a comparison with two select instructions having the same
7059
  // condition, check whether one of the resulting branches can be simplified.
7060
  // If so, just compare the other branch and select the appropriate result.
7061
  // For example:
7062
  //   %tmp1 = select i1 %cmp, i32 %y, i32 %x
7063
  //   %tmp2 = select i1 %cmp, i32 %z, i32 %x
7064
  //   %cmp2 = icmp slt i32 %tmp2, %tmp1
7065
  // The icmp will result false for the false value of selects and the result
7066
  // will depend upon the comparison of true values of selects if %cmp is
7067
  // true. Thus, transform this into:
7068
  //   %cmp = icmp slt i32 %y, %z
7069
  //   %sel = select i1 %cond, i1 %cmp, i1 false
7070
  // This handles similar cases to transform.
7071
77.4k
  {
7072
77.4k
    Value *Cond, *A, *B, *C, *D;
7073
77.4k
    if (match(Op0, m_Select(m_Value(Cond), m_Value(A), m_Value(B))) &&
7074
77.4k
        match(Op1, m_Select(m_Specific(Cond), m_Value(C), m_Value(D))) &&
7075
77.4k
        (Op0->hasOneUse() || Op1->hasOneUse())) {
7076
      // Check whether comparison of TrueValues can be simplified
7077
11
      if (Value *Res = simplifyICmpInst(Pred, A, C, SQ)) {
7078
9
        Value *NewICMP = Builder.CreateICmp(Pred, B, D);
7079
9
        return SelectInst::Create(Cond, Res, NewICMP);
7080
9
      }
7081
      // Check whether comparison of FalseValues can be simplified
7082
2
      if (Value *Res = simplifyICmpInst(Pred, B, D, SQ)) {
7083
0
        Value *NewICMP = Builder.CreateICmp(Pred, A, C);
7084
0
        return SelectInst::Create(Cond, NewICMP, Res);
7085
0
      }
7086
2
    }
7087
77.4k
  }
7088
7089
  // Try to optimize equality comparisons against alloca-based pointers.
7090
77.4k
  if (Op0->getType()->isPointerTy() && I.isEquality()) {
7091
2.20k
    assert(Op1->getType()->isPointerTy() && "Comparing pointer with non-pointer?");
7092
2.20k
    if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(Op0)))
7093
25
      if (foldAllocaCmp(Alloca))
7094
1
        return nullptr;
7095
2.20k
    if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(Op1)))
7096
22
      if (foldAllocaCmp(Alloca))
7097
6
        return nullptr;
7098
2.20k
  }
7099
7100
77.4k
  if (Instruction *Res = foldICmpBitCast(I))
7101
324
    return Res;
7102
7103
  // TODO: Hoist this above the min/max bailout.
7104
77.1k
  if (Instruction *R = foldICmpWithCastOp(I))
7105
660
    return R;
7106
7107
76.4k
  {
7108
76.4k
    Value *X, *Y;
7109
    // Transform (X & ~Y) == 0 --> (X & Y) != 0
7110
    // and       (X & ~Y) != 0 --> (X & Y) == 0
7111
    // if A is a power of 2.
7112
76.4k
    if (match(Op0, m_And(m_Value(X), m_Not(m_Value(Y)))) &&
7113
76.4k
        match(Op1, m_Zero()) && isKnownToBeAPowerOfTwo(X, false, 0, &I) &&
7114
76.4k
        I.isEquality())
7115
2
      return new ICmpInst(I.getInversePredicate(), Builder.CreateAnd(X, Y),
7116
2
                          Op1);
7117
7118
    // Op0 pred Op1 -> ~Op1 pred ~Op0, if this allows us to drop an instruction.
7119
76.4k
    if (Op0->getType()->isIntOrIntVectorTy()) {
7120
72.4k
      bool ConsumesOp0, ConsumesOp1;
7121
72.4k
      if (isFreeToInvert(Op0, Op0->hasOneUse(), ConsumesOp0) &&
7122
72.4k
          isFreeToInvert(Op1, Op1->hasOneUse(), ConsumesOp1) &&
7123
72.4k
          (ConsumesOp0 || ConsumesOp1)) {
7124
200
        Value *InvOp0 = getFreelyInverted(Op0, Op0->hasOneUse(), &Builder);
7125
200
        Value *InvOp1 = getFreelyInverted(Op1, Op1->hasOneUse(), &Builder);
7126
200
        assert(InvOp0 && InvOp1 &&
7127
200
               "Mismatch between isFreeToInvert and getFreelyInverted");
7128
0
        return new ICmpInst(I.getSwappedPredicate(), InvOp0, InvOp1);
7129
200
      }
7130
72.4k
    }
7131
7132
76.2k
    Instruction *AddI = nullptr;
7133
76.2k
    if (match(&I, m_UAddWithOverflow(m_Value(X), m_Value(Y),
7134
76.2k
                                     m_Instruction(AddI))) &&
7135
76.2k
        isa<IntegerType>(X->getType())) {
7136
474
      Value *Result;
7137
474
      Constant *Overflow;
7138
      // m_UAddWithOverflow can match patterns that do not include  an explicit
7139
      // "add" instruction, so check the opcode of the matched op.
7140
474
      if (AddI->getOpcode() == Instruction::Add &&
7141
474
          OptimizeOverflowCheck(Instruction::Add, /*Signed*/ false, X, Y, *AddI,
7142
413
                                Result, Overflow)) {
7143
0
        replaceInstUsesWith(*AddI, Result);
7144
0
        eraseInstFromFunction(*AddI);
7145
0
        return replaceInstUsesWith(I, Overflow);
7146
0
      }
7147
474
    }
7148
7149
    // (zext X) * (zext Y)  --> llvm.umul.with.overflow.
7150
76.2k
    if (match(Op0, m_NUWMul(m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) &&
7151
76.2k
        match(Op1, m_APInt(C))) {
7152
53
      if (Instruction *R = processUMulZExtIdiom(I, Op0, C, *this))
7153
9
        return R;
7154
53
    }
7155
7156
    // Signbit test folds
7157
    // Fold (X u>> BitWidth - 1 Pred ZExt(i1))  -->  X s< 0 Pred i1
7158
    // Fold (X s>> BitWidth - 1 Pred SExt(i1))  -->  X s< 0 Pred i1
7159
76.2k
    Instruction *ExtI;
7160
76.2k
    if ((I.isUnsigned() || I.isEquality()) &&
7161
76.2k
        match(Op1,
7162
53.2k
              m_CombineAnd(m_Instruction(ExtI), m_ZExtOrSExt(m_Value(Y)))) &&
7163
76.2k
        Y->getType()->getScalarSizeInBits() == 1 &&
7164
76.2k
        (Op0->hasOneUse() || Op1->hasOneUse())) {
7165
146
      unsigned OpWidth = Op0->getType()->getScalarSizeInBits();
7166
146
      Instruction *ShiftI;
7167
146
      if (match(Op0, m_CombineAnd(m_Instruction(ShiftI),
7168
146
                                  m_Shr(m_Value(X), m_SpecificIntAllowUndef(
7169
146
                                                        OpWidth - 1))))) {
7170
0
        unsigned ExtOpc = ExtI->getOpcode();
7171
0
        unsigned ShiftOpc = ShiftI->getOpcode();
7172
0
        if ((ExtOpc == Instruction::ZExt && ShiftOpc == Instruction::LShr) ||
7173
0
            (ExtOpc == Instruction::SExt && ShiftOpc == Instruction::AShr)) {
7174
0
          Value *SLTZero =
7175
0
              Builder.CreateICmpSLT(X, Constant::getNullValue(X->getType()));
7176
0
          Value *Cmp = Builder.CreateICmp(Pred, SLTZero, Y, I.getName());
7177
0
          return replaceInstUsesWith(I, Cmp);
7178
0
        }
7179
0
      }
7180
146
    }
7181
76.2k
  }
7182
7183
76.2k
  if (Instruction *Res = foldICmpEquality(I))
7184
222
    return Res;
7185
7186
76.0k
  if (Instruction *Res = foldICmpPow2Test(I, Builder))
7187
177
    return Res;
7188
7189
75.8k
  if (Instruction *Res = foldICmpOfUAddOv(I))
7190
27
    return Res;
7191
7192
  // The 'cmpxchg' instruction returns an aggregate containing the old value and
7193
  // an i1 which indicates whether or not we successfully did the swap.
7194
  //
7195
  // Replace comparisons between the old value and the expected value with the
7196
  // indicator that 'cmpxchg' returns.
7197
  //
7198
  // N.B.  This transform is only valid when the 'cmpxchg' is not permitted to
7199
  // spuriously fail.  In those cases, the old value may equal the expected
7200
  // value but it is possible for the swap to not occur.
7201
75.8k
  if (I.getPredicate() == ICmpInst::ICMP_EQ)
7202
20.6k
    if (auto *EVI = dyn_cast<ExtractValueInst>(Op0))
7203
26
      if (auto *ACXI = dyn_cast<AtomicCmpXchgInst>(EVI->getAggregateOperand()))
7204
4
        if (EVI->getIndices()[0] == 0 && ACXI->getCompareOperand() == Op1 &&
7205
4
            !ACXI->isWeak())
7206
2
          return ExtractValueInst::Create(ACXI, 1);
7207
7208
75.8k
  if (Instruction *Res = foldICmpWithHighBitMask(I, Builder))
7209
62
    return Res;
7210
7211
75.7k
  if (I.getType()->isVectorTy())
7212
7.00k
    if (Instruction *Res = foldVectorCmp(I, Builder))
7213
65
      return Res;
7214
7215
75.6k
  if (Instruction *Res = foldICmpInvariantGroup(I))
7216
0
    return Res;
7217
7218
75.6k
  if (Instruction *Res = foldReductionIdiom(I, Builder, DL))
7219
4
    return Res;
7220
7221
75.6k
  return Changed ? &I : nullptr;
7222
75.6k
}
7223
7224
/// Fold fcmp ([us]itofp x, cst) if possible.
7225
Instruction *InstCombinerImpl::foldFCmpIntToFPConst(FCmpInst &I,
7226
                                                    Instruction *LHSI,
7227
513
                                                    Constant *RHSC) {
7228
513
  if (!isa<ConstantFP>(RHSC)) return nullptr;
7229
500
  const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF();
7230
7231
  // Get the width of the mantissa.  We don't want to hack on conversions that
7232
  // might lose information from the integer, e.g. "i64 -> float"
7233
500
  int MantissaWidth = LHSI->getType()->getFPMantissaWidth();
7234
500
  if (MantissaWidth == -1) return nullptr;  // Unknown.
7235
7236
487
  IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType());
7237
7238
487
  bool LHSUnsigned = isa<UIToFPInst>(LHSI);
7239
7240
487
  if (I.isEquality()) {
7241
348
    FCmpInst::Predicate P = I.getPredicate();
7242
348
    bool IsExact = false;
7243
348
    APSInt RHSCvt(IntTy->getBitWidth(), LHSUnsigned);
7244
348
    RHS.convertToInteger(RHSCvt, APFloat::rmNearestTiesToEven, &IsExact);
7245
7246
    // If the floating point constant isn't an integer value, we know if we will
7247
    // ever compare equal / not equal to it.
7248
348
    if (!IsExact) {
7249
      // TODO: Can never be -0.0 and other non-representable values
7250
135
      APFloat RHSRoundInt(RHS);
7251
135
      RHSRoundInt.roundToIntegral(APFloat::rmNearestTiesToEven);
7252
135
      if (RHS != RHSRoundInt) {
7253
74
        if (P == FCmpInst::FCMP_OEQ || P == FCmpInst::FCMP_UEQ)
7254
30
          return replaceInstUsesWith(I, Builder.getFalse());
7255
7256
44
        assert(P == FCmpInst::FCMP_ONE || P == FCmpInst::FCMP_UNE);
7257
0
        return replaceInstUsesWith(I, Builder.getTrue());
7258
74
      }
7259
135
    }
7260
7261
    // TODO: If the constant is exactly representable, is it always OK to do
7262
    // equality compares as integer?
7263
348
  }
7264
7265
  // Check to see that the input is converted from an integer type that is small
7266
  // enough that preserves all bits.  TODO: check here for "known" sign bits.
7267
  // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e.
7268
413
  unsigned InputSize = IntTy->getScalarSizeInBits();
7269
7270
  // Following test does NOT adjust InputSize downwards for signed inputs,
7271
  // because the most negative value still requires all the mantissa bits
7272
  // to distinguish it from one less than that value.
7273
413
  if ((int)InputSize > MantissaWidth) {
7274
    // Conversion would lose accuracy. Check if loss can impact comparison.
7275
361
    int Exp = ilogb(RHS);
7276
361
    if (Exp == APFloat::IEK_Inf) {
7277
10
      int MaxExponent = ilogb(APFloat::getLargest(RHS.getSemantics()));
7278
10
      if (MaxExponent < (int)InputSize - !LHSUnsigned)
7279
        // Conversion could create infinity.
7280
10
        return nullptr;
7281
351
    } else {
7282
      // Note that if RHS is zero or NaN, then Exp is negative
7283
      // and first condition is trivially false.
7284
351
      if (MantissaWidth <= Exp && Exp <= (int)InputSize - !LHSUnsigned)
7285
        // Conversion could affect comparison.
7286
53
        return nullptr;
7287
351
    }
7288
361
  }
7289
7290
  // Otherwise, we can potentially simplify the comparison.  We know that it
7291
  // will always come through as an integer value and we know the constant is
7292
  // not a NAN (it would have been previously simplified).
7293
350
  assert(!RHS.isNaN() && "NaN comparison not already folded!");
7294
7295
0
  ICmpInst::Predicate Pred;
7296
350
  switch (I.getPredicate()) {
7297
0
  default: llvm_unreachable("Unexpected predicate!");
7298
34
  case FCmpInst::FCMP_UEQ:
7299
133
  case FCmpInst::FCMP_OEQ:
7300
133
    Pred = ICmpInst::ICMP_EQ;
7301
133
    break;
7302
17
  case FCmpInst::FCMP_UGT:
7303
53
  case FCmpInst::FCMP_OGT:
7304
53
    Pred = LHSUnsigned ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_SGT;
7305
53
    break;
7306
9
  case FCmpInst::FCMP_UGE:
7307
15
  case FCmpInst::FCMP_OGE:
7308
15
    Pred = LHSUnsigned ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_SGE;
7309
15
    break;
7310
18
  case FCmpInst::FCMP_ULT:
7311
31
  case FCmpInst::FCMP_OLT:
7312
31
    Pred = LHSUnsigned ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_SLT;
7313
31
    break;
7314
12
  case FCmpInst::FCMP_ULE:
7315
40
  case FCmpInst::FCMP_OLE:
7316
40
    Pred = LHSUnsigned ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_SLE;
7317
40
    break;
7318
40
  case FCmpInst::FCMP_UNE:
7319
78
  case FCmpInst::FCMP_ONE:
7320
78
    Pred = ICmpInst::ICMP_NE;
7321
78
    break;
7322
0
  case FCmpInst::FCMP_ORD:
7323
0
    return replaceInstUsesWith(I, Builder.getTrue());
7324
0
  case FCmpInst::FCMP_UNO:
7325
0
    return replaceInstUsesWith(I, Builder.getFalse());
7326
350
  }
7327
7328
  // Now we know that the APFloat is a normal number, zero or inf.
7329
7330
  // See if the FP constant is too large for the integer.  For example,
7331
  // comparing an i8 to 300.0.
7332
350
  unsigned IntWidth = IntTy->getScalarSizeInBits();
7333
7334
350
  if (!LHSUnsigned) {
7335
    // If the RHS value is > SignedMax, fold the comparison.  This handles +INF
7336
    // and large values.
7337
217
    APFloat SMax(RHS.getSemantics());
7338
217
    SMax.convertFromAPInt(APInt::getSignedMaxValue(IntWidth), true,
7339
217
                          APFloat::rmNearestTiesToEven);
7340
217
    if (SMax < RHS) { // smax < 13123.0
7341
31
      if (Pred == ICmpInst::ICMP_NE  || Pred == ICmpInst::ICMP_SLT ||
7342
31
          Pred == ICmpInst::ICMP_SLE)
7343
18
        return replaceInstUsesWith(I, Builder.getTrue());
7344
13
      return replaceInstUsesWith(I, Builder.getFalse());
7345
31
    }
7346
217
  } else {
7347
    // If the RHS value is > UnsignedMax, fold the comparison. This handles
7348
    // +INF and large values.
7349
133
    APFloat UMax(RHS.getSemantics());
7350
133
    UMax.convertFromAPInt(APInt::getMaxValue(IntWidth), false,
7351
133
                          APFloat::rmNearestTiesToEven);
7352
133
    if (UMax < RHS) { // umax < 13123.0
7353
10
      if (Pred == ICmpInst::ICMP_NE  || Pred == ICmpInst::ICMP_ULT ||
7354
10
          Pred == ICmpInst::ICMP_ULE)
7355
1
        return replaceInstUsesWith(I, Builder.getTrue());
7356
9
      return replaceInstUsesWith(I, Builder.getFalse());
7357
10
    }
7358
133
  }
7359
7360
309
  if (!LHSUnsigned) {
7361
    // See if the RHS value is < SignedMin.
7362
186
    APFloat SMin(RHS.getSemantics());
7363
186
    SMin.convertFromAPInt(APInt::getSignedMinValue(IntWidth), true,
7364
186
                          APFloat::rmNearestTiesToEven);
7365
186
    if (SMin > RHS) { // smin > 12312.0
7366
20
      if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT ||
7367
20
          Pred == ICmpInst::ICMP_SGE)
7368
11
        return replaceInstUsesWith(I, Builder.getTrue());
7369
9
      return replaceInstUsesWith(I, Builder.getFalse());
7370
20
    }
7371
186
  } else {
7372
    // See if the RHS value is < UnsignedMin.
7373
123
    APFloat UMin(RHS.getSemantics());
7374
123
    UMin.convertFromAPInt(APInt::getMinValue(IntWidth), false,
7375
123
                          APFloat::rmNearestTiesToEven);
7376
123
    if (UMin > RHS) { // umin > 12312.0
7377
2
      if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_UGT ||
7378
2
          Pred == ICmpInst::ICMP_UGE)
7379
2
        return replaceInstUsesWith(I, Builder.getTrue());
7380
0
      return replaceInstUsesWith(I, Builder.getFalse());
7381
2
    }
7382
123
  }
7383
7384
  // Okay, now we know that the FP constant fits in the range [SMIN, SMAX] or
7385
  // [0, UMAX], but it may still be fractional. Check whether this is the case
7386
  // using the IsExact flag.
7387
  // Don't do this for zero, because -0.0 is not fractional.
7388
287
  APSInt RHSInt(IntWidth, LHSUnsigned);
7389
287
  bool IsExact;
7390
287
  RHS.convertToInteger(RHSInt, APFloat::rmTowardZero, &IsExact);
7391
287
  if (!RHS.isZero()) {
7392
66
    if (!IsExact) {
7393
      // If we had a comparison against a fractional value, we have to adjust
7394
      // the compare predicate and sometimes the value.  RHSC is rounded towards
7395
      // zero at this point.
7396
37
      switch (Pred) {
7397
0
      default: llvm_unreachable("Unexpected integer comparison!");
7398
0
      case ICmpInst::ICMP_NE:  // (float)int != 4.4   --> true
7399
0
        return replaceInstUsesWith(I, Builder.getTrue());
7400
0
      case ICmpInst::ICMP_EQ:  // (float)int == 4.4   --> false
7401
0
        return replaceInstUsesWith(I, Builder.getFalse());
7402
4
      case ICmpInst::ICMP_ULE:
7403
        // (float)int <= 4.4   --> int <= 4
7404
        // (float)int <= -4.4  --> false
7405
4
        if (RHS.isNegative())
7406
0
          return replaceInstUsesWith(I, Builder.getFalse());
7407
4
        break;
7408
6
      case ICmpInst::ICMP_SLE:
7409
        // (float)int <= 4.4   --> int <= 4
7410
        // (float)int <= -4.4  --> int < -4
7411
6
        if (RHS.isNegative())
7412
0
          Pred = ICmpInst::ICMP_SLT;
7413
6
        break;
7414
1
      case ICmpInst::ICMP_ULT:
7415
        // (float)int < -4.4   --> false
7416
        // (float)int < 4.4    --> int <= 4
7417
1
        if (RHS.isNegative())
7418
0
          return replaceInstUsesWith(I, Builder.getFalse());
7419
1
        Pred = ICmpInst::ICMP_ULE;
7420
1
        break;
7421
3
      case ICmpInst::ICMP_SLT:
7422
        // (float)int < -4.4   --> int < -4
7423
        // (float)int < 4.4    --> int <= 4
7424
3
        if (!RHS.isNegative())
7425
3
          Pred = ICmpInst::ICMP_SLE;
7426
3
        break;
7427
2
      case ICmpInst::ICMP_UGT:
7428
        // (float)int > 4.4    --> int > 4
7429
        // (float)int > -4.4   --> true
7430
2
        if (RHS.isNegative())
7431
0
          return replaceInstUsesWith(I, Builder.getTrue());
7432
2
        break;
7433
10
      case ICmpInst::ICMP_SGT:
7434
        // (float)int > 4.4    --> int > 4
7435
        // (float)int > -4.4   --> int >= -4
7436
10
        if (RHS.isNegative())
7437
0
          Pred = ICmpInst::ICMP_SGE;
7438
10
        break;
7439
6
      case ICmpInst::ICMP_UGE:
7440
        // (float)int >= -4.4   --> true
7441
        // (float)int >= 4.4    --> int > 4
7442
6
        if (RHS.isNegative())
7443
0
          return replaceInstUsesWith(I, Builder.getTrue());
7444
6
        Pred = ICmpInst::ICMP_UGT;
7445
6
        break;
7446
5
      case ICmpInst::ICMP_SGE:
7447
        // (float)int >= -4.4   --> int >= -4
7448
        // (float)int >= 4.4    --> int > 4
7449
5
        if (!RHS.isNegative())
7450
5
          Pred = ICmpInst::ICMP_SGT;
7451
5
        break;
7452
37
      }
7453
37
    }
7454
66
  }
7455
7456
  // Lower this FP comparison into an appropriate integer version of the
7457
  // comparison.
7458
287
  return new ICmpInst(Pred, LHSI->getOperand(0), Builder.getInt(RHSInt));
7459
287
}
7460
7461
/// Fold (C / X) < 0.0 --> X < 0.0 if possible. Swap predicate if necessary.
7462
static Instruction *foldFCmpReciprocalAndZero(FCmpInst &I, Instruction *LHSI,
7463
106
                                              Constant *RHSC) {
7464
  // When C is not 0.0 and infinities are not allowed:
7465
  // (C / X) < 0.0 is a sign-bit test of X
7466
  // (C / X) < 0.0 --> X < 0.0 (if C is positive)
7467
  // (C / X) < 0.0 --> X > 0.0 (if C is negative, swap the predicate)
7468
  //
7469
  // Proof:
7470
  // Multiply (C / X) < 0.0 by X * X / C.
7471
  // - X is non zero, if it is the flag 'ninf' is violated.
7472
  // - C defines the sign of X * X * C. Thus it also defines whether to swap
7473
  //   the predicate. C is also non zero by definition.
7474
  //
7475
  // Thus X * X / C is non zero and the transformation is valid. [qed]
7476
7477
106
  FCmpInst::Predicate Pred = I.getPredicate();
7478
7479
  // Check that predicates are valid.
7480
106
  if ((Pred != FCmpInst::FCMP_OGT) && (Pred != FCmpInst::FCMP_OLT) &&
7481
106
      (Pred != FCmpInst::FCMP_OGE) && (Pred != FCmpInst::FCMP_OLE))
7482
58
    return nullptr;
7483
7484
  // Check that RHS operand is zero.
7485
48
  if (!match(RHSC, m_AnyZeroFP()))
7486
27
    return nullptr;
7487
7488
  // Check fastmath flags ('ninf').
7489
21
  if (!LHSI->hasNoInfs() || !I.hasNoInfs())
7490
18
    return nullptr;
7491
7492
  // Check the properties of the dividend. It must not be zero to avoid a
7493
  // division by zero (see Proof).
7494
3
  const APFloat *C;
7495
3
  if (!match(LHSI->getOperand(0), m_APFloat(C)))
7496
0
    return nullptr;
7497
7498
3
  if (C->isZero())
7499
0
    return nullptr;
7500
7501
  // Get swapped predicate if necessary.
7502
3
  if (C->isNegative())
7503
3
    Pred = I.getSwappedPredicate();
7504
7505
3
  return new FCmpInst(Pred, LHSI->getOperand(1), RHSC, "", &I);
7506
3
}
7507
7508
/// Optimize fabs(X) compared with zero.
7509
13.9k
static Instruction *foldFabsWithFcmpZero(FCmpInst &I, InstCombinerImpl &IC) {
7510
13.9k
  Value *X;
7511
13.9k
  if (!match(I.getOperand(0), m_FAbs(m_Value(X))))
7512
13.8k
    return nullptr;
7513
7514
117
  const APFloat *C;
7515
117
  if (!match(I.getOperand(1), m_APFloat(C)))
7516
34
    return nullptr;
7517
7518
83
  if (!C->isPosZero()) {
7519
12
    if (!C->isSmallestNormalized())
7520
12
      return nullptr;
7521
7522
0
    const Function *F = I.getFunction();
7523
0
    DenormalMode Mode = F->getDenormalMode(C->getSemantics());
7524
0
    if (Mode.Input == DenormalMode::PreserveSign ||
7525
0
        Mode.Input == DenormalMode::PositiveZero) {
7526
7527
0
      auto replaceFCmp = [](FCmpInst *I, FCmpInst::Predicate P, Value *X) {
7528
0
        Constant *Zero = ConstantFP::getZero(X->getType());
7529
0
        return new FCmpInst(P, X, Zero, "", I);
7530
0
      };
7531
7532
0
      switch (I.getPredicate()) {
7533
0
      case FCmpInst::FCMP_OLT:
7534
        // fcmp olt fabs(x), smallest_normalized_number -> fcmp oeq x, 0.0
7535
0
        return replaceFCmp(&I, FCmpInst::FCMP_OEQ, X);
7536
0
      case FCmpInst::FCMP_UGE:
7537
        // fcmp uge fabs(x), smallest_normalized_number -> fcmp une x, 0.0
7538
0
        return replaceFCmp(&I, FCmpInst::FCMP_UNE, X);
7539
0
      case FCmpInst::FCMP_OGE:
7540
        // fcmp oge fabs(x), smallest_normalized_number -> fcmp one x, 0.0
7541
0
        return replaceFCmp(&I, FCmpInst::FCMP_ONE, X);
7542
0
      case FCmpInst::FCMP_ULT:
7543
        // fcmp ult fabs(x), smallest_normalized_number -> fcmp ueq x, 0.0
7544
0
        return replaceFCmp(&I, FCmpInst::FCMP_UEQ, X);
7545
0
      default:
7546
0
        break;
7547
0
      }
7548
0
    }
7549
7550
0
    return nullptr;
7551
0
  }
7552
7553
71
  auto replacePredAndOp0 = [&IC](FCmpInst *I, FCmpInst::Predicate P, Value *X) {
7554
71
    I->setPredicate(P);
7555
71
    return IC.replaceOperand(*I, 0, X);
7556
71
  };
7557
7558
71
  switch (I.getPredicate()) {
7559
0
  case FCmpInst::FCMP_UGE:
7560
0
  case FCmpInst::FCMP_OLT:
7561
    // fabs(X) >= 0.0 --> true
7562
    // fabs(X) <  0.0 --> false
7563
0
    llvm_unreachable("fcmp should have simplified");
7564
7565
4
  case FCmpInst::FCMP_OGT:
7566
    // fabs(X) > 0.0 --> X != 0.0
7567
4
    return replacePredAndOp0(&I, FCmpInst::FCMP_ONE, X);
7568
7569
2
  case FCmpInst::FCMP_UGT:
7570
    // fabs(X) u> 0.0 --> X u!= 0.0
7571
2
    return replacePredAndOp0(&I, FCmpInst::FCMP_UNE, X);
7572
7573
4
  case FCmpInst::FCMP_OLE:
7574
    // fabs(X) <= 0.0 --> X == 0.0
7575
4
    return replacePredAndOp0(&I, FCmpInst::FCMP_OEQ, X);
7576
7577
0
  case FCmpInst::FCMP_ULE:
7578
    // fabs(X) u<= 0.0 --> X u== 0.0
7579
0
    return replacePredAndOp0(&I, FCmpInst::FCMP_UEQ, X);
7580
7581
3
  case FCmpInst::FCMP_OGE:
7582
    // fabs(X) >= 0.0 --> !isnan(X)
7583
3
    assert(!I.hasNoNaNs() && "fcmp should have simplified");
7584
0
    return replacePredAndOp0(&I, FCmpInst::FCMP_ORD, X);
7585
7586
2
  case FCmpInst::FCMP_ULT:
7587
    // fabs(X) u< 0.0 --> isnan(X)
7588
2
    assert(!I.hasNoNaNs() && "fcmp should have simplified");
7589
0
    return replacePredAndOp0(&I, FCmpInst::FCMP_UNO, X);
7590
7591
17
  case FCmpInst::FCMP_OEQ:
7592
18
  case FCmpInst::FCMP_UEQ:
7593
20
  case FCmpInst::FCMP_ONE:
7594
31
  case FCmpInst::FCMP_UNE:
7595
39
  case FCmpInst::FCMP_ORD:
7596
56
  case FCmpInst::FCMP_UNO:
7597
    // Look through the fabs() because it doesn't change anything but the sign.
7598
    // fabs(X) == 0.0 --> X == 0.0,
7599
    // fabs(X) != 0.0 --> X != 0.0
7600
    // isnan(fabs(X)) --> isnan(X)
7601
    // !isnan(fabs(X) --> !isnan(X)
7602
56
    return replacePredAndOp0(&I, I.getPredicate(), X);
7603
7604
0
  default:
7605
0
    return nullptr;
7606
71
  }
7607
71
}
7608
7609
15.4k
static Instruction *foldFCmpFNegCommonOp(FCmpInst &I) {
7610
15.4k
  CmpInst::Predicate Pred = I.getPredicate();
7611
15.4k
  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
7612
7613
  // Canonicalize fneg as Op1.
7614
15.4k
  if (match(Op0, m_FNeg(m_Value())) && !match(Op1, m_FNeg(m_Value()))) {
7615
137
    std::swap(Op0, Op1);
7616
137
    Pred = I.getSwappedPredicate();
7617
137
  }
7618
7619
15.4k
  if (!match(Op1, m_FNeg(m_Specific(Op0))))
7620
15.4k
    return nullptr;
7621
7622
  // Replace the negated operand with 0.0:
7623
  // fcmp Pred Op0, -Op0 --> fcmp Pred Op0, 0.0
7624
10
  Constant *Zero = ConstantFP::getZero(Op0->getType());
7625
10
  return new FCmpInst(Pred, Op0, Zero, "", &I);
7626
15.4k
}
7627
7628
18.3k
Instruction *InstCombinerImpl::visitFCmpInst(FCmpInst &I) {
7629
18.3k
  bool Changed = false;
7630
7631
  /// Orders the operands of the compare so that they are listed from most
7632
  /// complex to least complex.  This puts constants before unary operators,
7633
  /// before binary operators.
7634
18.3k
  if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) {
7635
1.34k
    I.swapOperands();
7636
1.34k
    Changed = true;
7637
1.34k
  }
7638
7639
18.3k
  const CmpInst::Predicate Pred = I.getPredicate();
7640
18.3k
  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
7641
18.3k
  if (Value *V = simplifyFCmpInst(Pred, Op0, Op1, I.getFastMathFlags(),
7642
18.3k
                                  SQ.getWithInstruction(&I)))
7643
1.95k
    return replaceInstUsesWith(I, V);
7644
7645
  // Simplify 'fcmp pred X, X'
7646
16.4k
  Type *OpType = Op0->getType();
7647
16.4k
  assert(OpType == Op1->getType() && "fcmp with different-typed operands?");
7648
16.4k
  if (Op0 == Op1) {
7649
692
    switch (Pred) {
7650
0
      default: break;
7651
97
    case FCmpInst::FCMP_UNO:    // True if unordered: isnan(X) | isnan(Y)
7652
171
    case FCmpInst::FCMP_ULT:    // True if unordered or less than
7653
276
    case FCmpInst::FCMP_UGT:    // True if unordered or greater than
7654
351
    case FCmpInst::FCMP_UNE:    // True if unordered or not equal
7655
      // Canonicalize these to be 'fcmp uno %X, 0.0'.
7656
351
      I.setPredicate(FCmpInst::FCMP_UNO);
7657
351
      I.setOperand(1, Constant::getNullValue(OpType));
7658
351
      return &I;
7659
7660
77
    case FCmpInst::FCMP_ORD:    // True if ordered (no nans)
7661
191
    case FCmpInst::FCMP_OEQ:    // True if ordered and equal
7662
270
    case FCmpInst::FCMP_OGE:    // True if ordered and greater than or equal
7663
341
    case FCmpInst::FCMP_OLE:    // True if ordered and less than or equal
7664
      // Canonicalize these to be 'fcmp ord %X, 0.0'.
7665
341
      I.setPredicate(FCmpInst::FCMP_ORD);
7666
341
      I.setOperand(1, Constant::getNullValue(OpType));
7667
341
      return &I;
7668
692
    }
7669
692
  }
7670
7671
  // If we're just checking for a NaN (ORD/UNO) and have a non-NaN operand,
7672
  // then canonicalize the operand to 0.0.
7673
15.7k
  if (Pred == CmpInst::FCMP_ORD || Pred == CmpInst::FCMP_UNO) {
7674
3.35k
    if (!match(Op0, m_PosZeroFP()) && isKnownNeverNaN(Op0, DL, &TLI, 0,
7675
3.35k
                                                      &AC, &I, &DT))
7676
25
      return replaceOperand(I, 0, ConstantFP::getZero(OpType));
7677
7678
3.33k
    if (!match(Op1, m_PosZeroFP()) &&
7679
3.33k
        isKnownNeverNaN(Op1, DL, &TLI, 0, &AC, &I, &DT))
7680
244
      return replaceOperand(I, 1, ConstantFP::getZero(OpType));
7681
3.33k
  }
7682
7683
  // fcmp pred (fneg X), (fneg Y) -> fcmp swap(pred) X, Y
7684
15.4k
  Value *X, *Y;
7685
15.4k
  if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_FNeg(m_Value(Y))))
7686
48
    return new FCmpInst(I.getSwappedPredicate(), X, Y, "", &I);
7687
7688
15.4k
  if (Instruction *R = foldFCmpFNegCommonOp(I))
7689
10
    return R;
7690
7691
  // Test if the FCmpInst instruction is used exclusively by a select as
7692
  // part of a minimum or maximum operation. If so, refrain from doing
7693
  // any other folding. This helps out other analyses which understand
7694
  // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
7695
  // and CodeGen. And in this case, at least one of the comparison
7696
  // operands has at least one user besides the compare (the select),
7697
  // which would often largely negate the benefit of folding anyway.
7698
15.4k
  if (I.hasOneUse())
7699
13.6k
    if (SelectInst *SI = dyn_cast<SelectInst>(I.user_back())) {
7700
3.50k
      Value *A, *B;
7701
3.50k
      SelectPatternResult SPR = matchSelectPattern(SI, A, B);
7702
3.50k
      if (SPR.Flavor != SPF_UNKNOWN)
7703
819
        return nullptr;
7704
3.50k
    }
7705
7706
  // The sign of 0.0 is ignored by fcmp, so canonicalize to +0.0:
7707
  // fcmp Pred X, -0.0 --> fcmp Pred X, 0.0
7708
14.6k
  if (match(Op1, m_AnyZeroFP()) && !match(Op1, m_PosZeroFP()))
7709
198
    return replaceOperand(I, 1, ConstantFP::getZero(OpType));
7710
7711
  // Ignore signbit of bitcasted int when comparing equality to FP 0.0:
7712
  // fcmp oeq/une (bitcast X), 0.0 --> (and X, SignMaskC) ==/!= 0
7713
14.4k
  if (match(Op1, m_PosZeroFP()) &&
7714
14.4k
      match(Op0, m_OneUse(m_BitCast(m_Value(X)))) &&
7715
14.4k
      X->getType()->isVectorTy() == OpType->isVectorTy() &&
7716
14.4k
      X->getType()->getScalarSizeInBits() == OpType->getScalarSizeInBits()) {
7717
14
    ICmpInst::Predicate IntPred = ICmpInst::BAD_ICMP_PREDICATE;
7718
14
    if (Pred == FCmpInst::FCMP_OEQ)
7719
0
      IntPred = ICmpInst::ICMP_EQ;
7720
14
    else if (Pred == FCmpInst::FCMP_UNE)
7721
1
      IntPred = ICmpInst::ICMP_NE;
7722
7723
14
    if (IntPred != ICmpInst::BAD_ICMP_PREDICATE) {
7724
1
      Type *IntTy = X->getType();
7725
1
      const APInt &SignMask = ~APInt::getSignMask(IntTy->getScalarSizeInBits());
7726
1
      Value *MaskX = Builder.CreateAnd(X, ConstantInt::get(IntTy, SignMask));
7727
1
      return new ICmpInst(IntPred, MaskX, ConstantInt::getNullValue(IntTy));
7728
1
    }
7729
14
  }
7730
7731
  // Handle fcmp with instruction LHS and constant RHS.
7732
14.4k
  Instruction *LHSI;
7733
14.4k
  Constant *RHSC;
7734
14.4k
  if (match(Op0, m_Instruction(LHSI)) && match(Op1, m_Constant(RHSC))) {
7735
4.26k
    switch (LHSI->getOpcode()) {
7736
0
    case Instruction::PHI:
7737
0
      if (Instruction *NV = foldOpIntoPhi(I, cast<PHINode>(LHSI)))
7738
0
        return NV;
7739
0
      break;
7740
277
    case Instruction::SIToFP:
7741
513
    case Instruction::UIToFP:
7742
513
      if (Instruction *NV = foldFCmpIntToFPConst(I, LHSI, RHSC))
7743
424
        return NV;
7744
89
      break;
7745
106
    case Instruction::FDiv:
7746
106
      if (Instruction *NV = foldFCmpReciprocalAndZero(I, LHSI, RHSC))
7747
3
        return NV;
7748
103
      break;
7749
2.29k
    case Instruction::Load:
7750
2.29k
      if (auto *GEP = dyn_cast<GetElementPtrInst>(LHSI->getOperand(0)))
7751
100
        if (auto *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
7752
6
          if (Instruction *Res = foldCmpLoadFromIndexedGlobal(
7753
6
                  cast<LoadInst>(LHSI), GEP, GV, I))
7754
0
            return Res;
7755
2.29k
      break;
7756
4.26k
  }
7757
4.26k
  }
7758
7759
13.9k
  if (Instruction *R = foldFabsWithFcmpZero(I, *this))
7760
71
    return R;
7761
7762
13.9k
  if (match(Op0, m_FNeg(m_Value(X)))) {
7763
    // fcmp pred (fneg X), C --> fcmp swap(pred) X, -C
7764
127
    Constant *C;
7765
127
    if (match(Op1, m_Constant(C)))
7766
96
      if (Constant *NegC = ConstantFoldUnaryOpOperand(Instruction::FNeg, C, DL))
7767
96
        return new FCmpInst(I.getSwappedPredicate(), X, NegC, "", &I);
7768
127
  }
7769
7770
13.8k
  if (match(Op0, m_FPExt(m_Value(X)))) {
7771
    // fcmp (fpext X), (fpext Y) -> fcmp X, Y
7772
169
    if (match(Op1, m_FPExt(m_Value(Y))) && X->getType() == Y->getType())
7773
22
      return new FCmpInst(Pred, X, Y, "", &I);
7774
7775
147
    const APFloat *C;
7776
147
    if (match(Op1, m_APFloat(C))) {
7777
143
      const fltSemantics &FPSem =
7778
143
          X->getType()->getScalarType()->getFltSemantics();
7779
143
      bool Lossy;
7780
143
      APFloat TruncC = *C;
7781
143
      TruncC.convert(FPSem, APFloat::rmNearestTiesToEven, &Lossy);
7782
7783
143
      if (Lossy) {
7784
        // X can't possibly equal the higher-precision constant, so reduce any
7785
        // equality comparison.
7786
        // TODO: Other predicates can be handled via getFCmpCode().
7787
38
        switch (Pred) {
7788
2
        case FCmpInst::FCMP_OEQ:
7789
          // X is ordered and equal to an impossible constant --> false
7790
2
          return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
7791
10
        case FCmpInst::FCMP_ONE:
7792
          // X is ordered and not equal to an impossible constant --> ordered
7793
10
          return new FCmpInst(FCmpInst::FCMP_ORD, X,
7794
10
                              ConstantFP::getZero(X->getType()));
7795
2
        case FCmpInst::FCMP_UEQ:
7796
          // X is unordered or equal to an impossible constant --> unordered
7797
2
          return new FCmpInst(FCmpInst::FCMP_UNO, X,
7798
2
                              ConstantFP::getZero(X->getType()));
7799
0
        case FCmpInst::FCMP_UNE:
7800
          // X is unordered or not equal to an impossible constant --> true
7801
0
          return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
7802
24
        default:
7803
24
          break;
7804
38
        }
7805
38
      }
7806
7807
      // fcmp (fpext X), C -> fcmp X, (fptrunc C) if fptrunc is lossless
7808
      // Avoid lossy conversions and denormals.
7809
      // Zero is a special case that's OK to convert.
7810
129
      APFloat Fabs = TruncC;
7811
129
      Fabs.clearSign();
7812
129
      if (!Lossy &&
7813
129
          (Fabs.isZero() || !(Fabs < APFloat::getSmallestNormalized(FPSem)))) {
7814
101
        Constant *NewC = ConstantFP::get(X->getType(), TruncC);
7815
101
        return new FCmpInst(Pred, X, NewC, "", &I);
7816
101
      }
7817
129
    }
7818
147
  }
7819
7820
  // Convert a sign-bit test of an FP value into a cast and integer compare.
7821
  // TODO: Simplify if the copysign constant is 0.0 or NaN.
7822
  // TODO: Handle non-zero compare constants.
7823
  // TODO: Handle other predicates.
7824
13.6k
  const APFloat *C;
7825
13.6k
  if (match(Op0, m_OneUse(m_Intrinsic<Intrinsic::copysign>(m_APFloat(C),
7826
13.6k
                                                           m_Value(X)))) &&
7827
13.6k
      match(Op1, m_AnyZeroFP()) && !C->isZero() && !C->isNaN()) {
7828
0
    Type *IntType = Builder.getIntNTy(X->getType()->getScalarSizeInBits());
7829
0
    if (auto *VecTy = dyn_cast<VectorType>(OpType))
7830
0
      IntType = VectorType::get(IntType, VecTy->getElementCount());
7831
7832
    // copysign(non-zero constant, X) < 0.0 --> (bitcast X) < 0
7833
0
    if (Pred == FCmpInst::FCMP_OLT) {
7834
0
      Value *IntX = Builder.CreateBitCast(X, IntType);
7835
0
      return new ICmpInst(ICmpInst::ICMP_SLT, IntX,
7836
0
                          ConstantInt::getNullValue(IntType));
7837
0
    }
7838
0
  }
7839
7840
13.6k
  {
7841
13.6k
    Value *CanonLHS = nullptr, *CanonRHS = nullptr;
7842
13.6k
    match(Op0, m_Intrinsic<Intrinsic::canonicalize>(m_Value(CanonLHS)));
7843
13.6k
    match(Op1, m_Intrinsic<Intrinsic::canonicalize>(m_Value(CanonRHS)));
7844
7845
    // (canonicalize(x) == x) => (x == x)
7846
13.6k
    if (CanonLHS == Op1)
7847
0
      return new FCmpInst(Pred, Op1, Op1, "", &I);
7848
7849
    // (x == canonicalize(x)) => (x == x)
7850
13.6k
    if (CanonRHS == Op0)
7851
0
      return new FCmpInst(Pred, Op0, Op0, "", &I);
7852
7853
    // (canonicalize(x) == canonicalize(y)) => (x == y)
7854
13.6k
    if (CanonLHS && CanonRHS)
7855
0
      return new FCmpInst(Pred, CanonLHS, CanonRHS, "", &I);
7856
13.6k
  }
7857
7858
13.6k
  if (I.getType()->isVectorTy())
7859
1.17k
    if (Instruction *Res = foldVectorCmp(I, Builder))
7860
11
      return Res;
7861
7862
13.6k
  return Changed ? &I : nullptr;
7863
13.6k
}