/src/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- InstCombineCompares.cpp --------------------------------------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file implements the visitICmp and visitFCmp functions. |
10 | | // |
11 | | //===----------------------------------------------------------------------===// |
12 | | |
13 | | #include "InstCombineInternal.h" |
14 | | #include "llvm/ADT/APSInt.h" |
15 | | #include "llvm/ADT/ScopeExit.h" |
16 | | #include "llvm/ADT/SetVector.h" |
17 | | #include "llvm/ADT/Statistic.h" |
18 | | #include "llvm/Analysis/CaptureTracking.h" |
19 | | #include "llvm/Analysis/CmpInstAnalysis.h" |
20 | | #include "llvm/Analysis/ConstantFolding.h" |
21 | | #include "llvm/Analysis/InstructionSimplify.h" |
22 | | #include "llvm/Analysis/Utils/Local.h" |
23 | | #include "llvm/Analysis/VectorUtils.h" |
24 | | #include "llvm/IR/ConstantRange.h" |
25 | | #include "llvm/IR/DataLayout.h" |
26 | | #include "llvm/IR/IntrinsicInst.h" |
27 | | #include "llvm/IR/PatternMatch.h" |
28 | | #include "llvm/Support/KnownBits.h" |
29 | | #include "llvm/Transforms/InstCombine/InstCombiner.h" |
30 | | #include <bitset> |
31 | | |
32 | | using namespace llvm; |
33 | | using namespace PatternMatch; |
34 | | |
35 | | #define DEBUG_TYPE "instcombine" |
36 | | |
37 | | // How many times is a select replaced by one of its operands? |
38 | | STATISTIC(NumSel, "Number of select opts"); |
39 | | |
40 | | |
41 | | /// Compute Result = In1+In2, returning true if the result overflowed for this |
42 | | /// type. |
43 | | static bool addWithOverflow(APInt &Result, const APInt &In1, |
44 | 151 | const APInt &In2, bool IsSigned = false) { |
45 | 151 | bool Overflow; |
46 | 151 | if (IsSigned) |
47 | 57 | Result = In1.sadd_ov(In2, Overflow); |
48 | 94 | else |
49 | 94 | Result = In1.uadd_ov(In2, Overflow); |
50 | | |
51 | 151 | return Overflow; |
52 | 151 | } |
53 | | |
54 | | /// Compute Result = In1-In2, returning true if the result overflowed for this |
55 | | /// type. |
56 | | static bool subWithOverflow(APInt &Result, const APInt &In1, |
57 | 32 | const APInt &In2, bool IsSigned = false) { |
58 | 32 | bool Overflow; |
59 | 32 | if (IsSigned) |
60 | 24 | Result = In1.ssub_ov(In2, Overflow); |
61 | 8 | else |
62 | 8 | Result = In1.usub_ov(In2, Overflow); |
63 | | |
64 | 32 | return Overflow; |
65 | 32 | } |
66 | | |
67 | | /// Given an icmp instruction, return true if any use of this comparison is a |
68 | | /// branch on sign bit comparison. |
69 | 7 | static bool hasBranchUse(ICmpInst &I) { |
70 | 7 | for (auto *U : I.users()) |
71 | 7 | if (isa<BranchInst>(U)) |
72 | 2 | return true; |
73 | 5 | return false; |
74 | 7 | } |
75 | | |
76 | | /// Returns true if the exploded icmp can be expressed as a signed comparison |
77 | | /// to zero and updates the predicate accordingly. |
78 | | /// The signedness of the comparison is preserved. |
79 | | /// TODO: Refactor with decomposeBitTestICmp()? |
80 | 530 | static bool isSignTest(ICmpInst::Predicate &Pred, const APInt &C) { |
81 | 530 | if (!ICmpInst::isSigned(Pred)) |
82 | 218 | return false; |
83 | | |
84 | 312 | if (C.isZero()) |
85 | 151 | return ICmpInst::isRelational(Pred); |
86 | | |
87 | 161 | if (C.isOne()) { |
88 | 33 | if (Pred == ICmpInst::ICMP_SLT) { |
89 | 20 | Pred = ICmpInst::ICMP_SLE; |
90 | 20 | return true; |
91 | 20 | } |
92 | 128 | } else if (C.isAllOnes()) { |
93 | 49 | if (Pred == ICmpInst::ICMP_SGT) { |
94 | 29 | Pred = ICmpInst::ICMP_SGE; |
95 | 29 | return true; |
96 | 29 | } |
97 | 49 | } |
98 | | |
99 | 112 | return false; |
100 | 161 | } |
101 | | |
102 | | /// This is called when we see this pattern: |
103 | | /// cmp pred (load (gep GV, ...)), cmpcst |
104 | | /// where GV is a global variable with a constant initializer. Try to simplify |
105 | | /// this into some simple computation that does not need the load. For example |
106 | | /// we can optimize "icmp eq (load (gep "foo", 0, i)), 0" into "icmp eq i, 3". |
107 | | /// |
108 | | /// If AndCst is non-null, then the loaded value is masked with that constant |
109 | | /// before doing the comparison. This handles cases like "A[i]&4 == 0". |
110 | | Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal( |
111 | | LoadInst *LI, GetElementPtrInst *GEP, GlobalVariable *GV, CmpInst &ICI, |
112 | 177 | ConstantInt *AndCst) { |
113 | 177 | if (LI->isVolatile() || LI->getType() != GEP->getResultElementType() || |
114 | 177 | GV->getValueType() != GEP->getSourceElementType() || !GV->isConstant() || |
115 | 177 | !GV->hasDefinitiveInitializer()) |
116 | 84 | return nullptr; |
117 | | |
118 | 93 | Constant *Init = GV->getInitializer(); |
119 | 93 | if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init)) |
120 | 32 | return nullptr; |
121 | | |
122 | 61 | uint64_t ArrayElementCount = Init->getType()->getArrayNumElements(); |
123 | | // Don't blow up on huge arrays. |
124 | 61 | if (ArrayElementCount > MaxArraySizeForCombine) |
125 | 0 | return nullptr; |
126 | | |
127 | | // There are many forms of this optimization we can handle, for now, just do |
128 | | // the simple index into a single-dimensional array. |
129 | | // |
130 | | // Require: GEP GV, 0, i {{, constant indices}} |
131 | 61 | if (GEP->getNumOperands() < 3 || !isa<ConstantInt>(GEP->getOperand(1)) || |
132 | 61 | !cast<ConstantInt>(GEP->getOperand(1))->isZero() || |
133 | 61 | isa<Constant>(GEP->getOperand(2))) |
134 | 0 | return nullptr; |
135 | | |
136 | | // Check that indices after the variable are constants and in-range for the |
137 | | // type they index. Collect the indices. This is typically for arrays of |
138 | | // structs. |
139 | 61 | SmallVector<unsigned, 4> LaterIndices; |
140 | | |
141 | 61 | Type *EltTy = Init->getType()->getArrayElementType(); |
142 | 82 | for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) { |
143 | 21 | ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i)); |
144 | 21 | if (!Idx) |
145 | 0 | return nullptr; // Variable index. |
146 | | |
147 | 21 | uint64_t IdxVal = Idx->getZExtValue(); |
148 | 21 | if ((unsigned)IdxVal != IdxVal) |
149 | 0 | return nullptr; // Too large array index. |
150 | | |
151 | 21 | if (StructType *STy = dyn_cast<StructType>(EltTy)) |
152 | 21 | EltTy = STy->getElementType(IdxVal); |
153 | 0 | else if (ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) { |
154 | 0 | if (IdxVal >= ATy->getNumElements()) |
155 | 0 | return nullptr; |
156 | 0 | EltTy = ATy->getElementType(); |
157 | 0 | } else { |
158 | 0 | return nullptr; // Unknown type. |
159 | 0 | } |
160 | | |
161 | 21 | LaterIndices.push_back(IdxVal); |
162 | 21 | } |
163 | | |
164 | 61 | enum { Overdefined = -3, Undefined = -2 }; |
165 | | |
166 | | // Variables for our state machines. |
167 | | |
168 | | // FirstTrueElement/SecondTrueElement - Used to emit a comparison of the form |
169 | | // "i == 47 | i == 87", where 47 is the first index the condition is true for, |
170 | | // and 87 is the second (and last) index. FirstTrueElement is -2 when |
171 | | // undefined, otherwise set to the first true element. SecondTrueElement is |
172 | | // -2 when undefined, -3 when overdefined and >= 0 when that index is true. |
173 | 61 | int FirstTrueElement = Undefined, SecondTrueElement = Undefined; |
174 | | |
175 | | // FirstFalseElement/SecondFalseElement - Used to emit a comparison of the |
176 | | // form "i != 47 & i != 87". Same state transitions as for true elements. |
177 | 61 | int FirstFalseElement = Undefined, SecondFalseElement = Undefined; |
178 | | |
179 | | /// TrueRangeEnd/FalseRangeEnd - In conjunction with First*Element, these |
180 | | /// define a state machine that triggers for ranges of values that the index |
181 | | /// is true or false for. This triggers on things like "abbbbc"[i] == 'b'. |
182 | | /// This is -2 when undefined, -3 when overdefined, and otherwise the last |
183 | | /// index in the range (inclusive). We use -2 for undefined here because we |
184 | | /// use relative comparisons and don't want 0-1 to match -1. |
185 | 61 | int TrueRangeEnd = Undefined, FalseRangeEnd = Undefined; |
186 | | |
187 | | // MagicBitvector - This is a magic bitvector where we set a bit if the |
188 | | // comparison is true for element 'i'. If there are 64 elements or less in |
189 | | // the array, this will fully represent all the comparison results. |
190 | 61 | uint64_t MagicBitvector = 0; |
191 | | |
192 | | // Scan the array and see if one of our patterns matches. |
193 | 61 | Constant *CompareRHS = cast<Constant>(ICI.getOperand(1)); |
194 | 545 | for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) { |
195 | 484 | Constant *Elt = Init->getAggregateElement(i); |
196 | 484 | if (!Elt) |
197 | 0 | return nullptr; |
198 | | |
199 | | // If this is indexing an array of structures, get the structure element. |
200 | 484 | if (!LaterIndices.empty()) { |
201 | 84 | Elt = ConstantFoldExtractValueInstruction(Elt, LaterIndices); |
202 | 84 | if (!Elt) |
203 | 0 | return nullptr; |
204 | 84 | } |
205 | | |
206 | | // If the element is masked, handle it. |
207 | 484 | if (AndCst) { |
208 | 70 | Elt = ConstantFoldBinaryOpOperands(Instruction::And, Elt, AndCst, DL); |
209 | 70 | if (!Elt) |
210 | 0 | return nullptr; |
211 | 70 | } |
212 | | |
213 | | // Find out if the comparison would be true or false for the i'th element. |
214 | 484 | Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt, |
215 | 484 | CompareRHS, DL, &TLI); |
216 | | // If the result is undef for this element, ignore it. |
217 | 484 | if (isa<UndefValue>(C)) { |
218 | | // Extend range state machines to cover this element in case there is an |
219 | | // undef in the middle of the range. |
220 | 0 | if (TrueRangeEnd == (int)i - 1) |
221 | 0 | TrueRangeEnd = i; |
222 | 0 | if (FalseRangeEnd == (int)i - 1) |
223 | 0 | FalseRangeEnd = i; |
224 | 0 | continue; |
225 | 0 | } |
226 | | |
227 | | // If we can't compute the result for any of the elements, we have to give |
228 | | // up evaluating the entire conditional. |
229 | 484 | if (!isa<ConstantInt>(C)) |
230 | 0 | return nullptr; |
231 | | |
232 | | // Otherwise, we know if the comparison is true or false for this element, |
233 | | // update our state machines. |
234 | 484 | bool IsTrueForElt = !cast<ConstantInt>(C)->isZero(); |
235 | | |
236 | | // State machine for single/double/range index comparison. |
237 | 484 | if (IsTrueForElt) { |
238 | | // Update the TrueElement state machine. |
239 | 216 | if (FirstTrueElement == Undefined) |
240 | 61 | FirstTrueElement = TrueRangeEnd = i; // First true element. |
241 | 155 | else { |
242 | | // Update double-compare state machine. |
243 | 155 | if (SecondTrueElement == Undefined) |
244 | 45 | SecondTrueElement = i; |
245 | 110 | else |
246 | 110 | SecondTrueElement = Overdefined; |
247 | | |
248 | | // Update range state machine. |
249 | 155 | if (TrueRangeEnd == (int)i - 1) |
250 | 21 | TrueRangeEnd = i; |
251 | 134 | else |
252 | 134 | TrueRangeEnd = Overdefined; |
253 | 155 | } |
254 | 268 | } else { |
255 | | // Update the FalseElement state machine. |
256 | 268 | if (FirstFalseElement == Undefined) |
257 | 61 | FirstFalseElement = FalseRangeEnd = i; // First false element. |
258 | 207 | else { |
259 | | // Update double-compare state machine. |
260 | 207 | if (SecondFalseElement == Undefined) |
261 | 33 | SecondFalseElement = i; |
262 | 174 | else |
263 | 174 | SecondFalseElement = Overdefined; |
264 | | |
265 | | // Update range state machine. |
266 | 207 | if (FalseRangeEnd == (int)i - 1) |
267 | 135 | FalseRangeEnd = i; |
268 | 72 | else |
269 | 72 | FalseRangeEnd = Overdefined; |
270 | 207 | } |
271 | 268 | } |
272 | | |
273 | | // If this element is in range, update our magic bitvector. |
274 | 484 | if (i < 64 && IsTrueForElt) |
275 | 216 | MagicBitvector |= 1ULL << i; |
276 | | |
277 | | // If all of our states become overdefined, bail out early. Since the |
278 | | // predicate is expensive, only check it every 8 elements. This is only |
279 | | // really useful for really huge arrays. |
280 | 484 | if ((i & 8) == 0 && i >= 64 && SecondTrueElement == Overdefined && |
281 | 484 | SecondFalseElement == Overdefined && TrueRangeEnd == Overdefined && |
282 | 484 | FalseRangeEnd == Overdefined) |
283 | 0 | return nullptr; |
284 | 484 | } |
285 | | |
286 | | // Now that we've scanned the entire array, emit our new comparison(s). We |
287 | | // order the state machines in complexity of the generated code. |
288 | 61 | Value *Idx = GEP->getOperand(2); |
289 | | |
290 | | // If the index is larger than the pointer offset size of the target, truncate |
291 | | // the index down like the GEP would do implicitly. We don't have to do this |
292 | | // for an inbounds GEP because the index can't be out of range. |
293 | 61 | if (!GEP->isInBounds()) { |
294 | 22 | Type *PtrIdxTy = DL.getIndexType(GEP->getType()); |
295 | 22 | unsigned OffsetSize = PtrIdxTy->getIntegerBitWidth(); |
296 | 22 | if (Idx->getType()->getPrimitiveSizeInBits().getFixedValue() > OffsetSize) |
297 | 0 | Idx = Builder.CreateTrunc(Idx, PtrIdxTy); |
298 | 22 | } |
299 | | |
300 | | // If inbounds keyword is not present, Idx * ElementSize can overflow. |
301 | | // Let's assume that ElementSize is 2 and the wanted value is at offset 0. |
302 | | // Then, there are two possible values for Idx to match offset 0: |
303 | | // 0x00..00, 0x80..00. |
304 | | // Emitting 'icmp eq Idx, 0' isn't correct in this case because the |
305 | | // comparison is false if Idx was 0x80..00. |
306 | | // We need to erase the highest countTrailingZeros(ElementSize) bits of Idx. |
307 | 61 | unsigned ElementSize = |
308 | 61 | DL.getTypeAllocSize(Init->getType()->getArrayElementType()); |
309 | 61 | auto MaskIdx = [&](Value *Idx) { |
310 | 61 | if (!GEP->isInBounds() && llvm::countr_zero(ElementSize) != 0) { |
311 | 22 | Value *Mask = ConstantInt::get(Idx->getType(), -1); |
312 | 22 | Mask = Builder.CreateLShr(Mask, llvm::countr_zero(ElementSize)); |
313 | 22 | Idx = Builder.CreateAnd(Idx, Mask); |
314 | 22 | } |
315 | 61 | return Idx; |
316 | 61 | }; |
317 | | |
318 | | // If the comparison is only true for one or two elements, emit direct |
319 | | // comparisons. |
320 | 61 | if (SecondTrueElement != Overdefined) { |
321 | 23 | Idx = MaskIdx(Idx); |
322 | | // None true -> false. |
323 | 23 | if (FirstTrueElement == Undefined) |
324 | 0 | return replaceInstUsesWith(ICI, Builder.getFalse()); |
325 | | |
326 | 23 | Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement); |
327 | | |
328 | | // True for one element -> 'i == 47'. |
329 | 23 | if (SecondTrueElement == Undefined) |
330 | 16 | return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx); |
331 | | |
332 | | // True for two elements -> 'i == 47 | i == 72'. |
333 | 7 | Value *C1 = Builder.CreateICmpEQ(Idx, FirstTrueIdx); |
334 | 7 | Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement); |
335 | 7 | Value *C2 = Builder.CreateICmpEQ(Idx, SecondTrueIdx); |
336 | 7 | return BinaryOperator::CreateOr(C1, C2); |
337 | 23 | } |
338 | | |
339 | | // If the comparison is only false for one or two elements, emit direct |
340 | | // comparisons. |
341 | 38 | if (SecondFalseElement != Overdefined) { |
342 | 28 | Idx = MaskIdx(Idx); |
343 | | // None false -> true. |
344 | 28 | if (FirstFalseElement == Undefined) |
345 | 0 | return replaceInstUsesWith(ICI, Builder.getTrue()); |
346 | | |
347 | 28 | Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement); |
348 | | |
349 | | // False for one element -> 'i != 47'. |
350 | 28 | if (SecondFalseElement == Undefined) |
351 | 28 | return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx); |
352 | | |
353 | | // False for two elements -> 'i != 47 & i != 72'. |
354 | 0 | Value *C1 = Builder.CreateICmpNE(Idx, FirstFalseIdx); |
355 | 0 | Value *SecondFalseIdx = |
356 | 0 | ConstantInt::get(Idx->getType(), SecondFalseElement); |
357 | 0 | Value *C2 = Builder.CreateICmpNE(Idx, SecondFalseIdx); |
358 | 0 | return BinaryOperator::CreateAnd(C1, C2); |
359 | 28 | } |
360 | | |
361 | | // If the comparison can be replaced with a range comparison for the elements |
362 | | // where it is true, emit the range check. |
363 | 10 | if (TrueRangeEnd != Overdefined) { |
364 | 0 | assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare"); |
365 | 0 | Idx = MaskIdx(Idx); |
366 | | |
367 | | // Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1). |
368 | 0 | if (FirstTrueElement) { |
369 | 0 | Value *Offs = ConstantInt::get(Idx->getType(), -FirstTrueElement); |
370 | 0 | Idx = Builder.CreateAdd(Idx, Offs); |
371 | 0 | } |
372 | |
|
373 | 0 | Value *End = |
374 | 0 | ConstantInt::get(Idx->getType(), TrueRangeEnd - FirstTrueElement + 1); |
375 | 0 | return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End); |
376 | 0 | } |
377 | | |
378 | | // False range check. |
379 | 10 | if (FalseRangeEnd != Overdefined) { |
380 | 0 | assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare"); |
381 | 0 | Idx = MaskIdx(Idx); |
382 | | // Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse). |
383 | 0 | if (FirstFalseElement) { |
384 | 0 | Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement); |
385 | 0 | Idx = Builder.CreateAdd(Idx, Offs); |
386 | 0 | } |
387 | |
|
388 | 0 | Value *End = |
389 | 0 | ConstantInt::get(Idx->getType(), FalseRangeEnd - FirstFalseElement); |
390 | 0 | return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End); |
391 | 0 | } |
392 | | |
393 | | // If a magic bitvector captures the entire comparison state |
394 | | // of this load, replace it with computation that does: |
395 | | // ((magic_cst >> i) & 1) != 0 |
396 | 10 | { |
397 | 10 | Type *Ty = nullptr; |
398 | | |
399 | | // Look for an appropriate type: |
400 | | // - The type of Idx if the magic fits |
401 | | // - The smallest fitting legal type |
402 | 10 | if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth()) |
403 | 10 | Ty = Idx->getType(); |
404 | 0 | else |
405 | 0 | Ty = DL.getSmallestLegalIntType(Init->getContext(), ArrayElementCount); |
406 | | |
407 | 10 | if (Ty) { |
408 | 10 | Idx = MaskIdx(Idx); |
409 | 10 | Value *V = Builder.CreateIntCast(Idx, Ty, false); |
410 | 10 | V = Builder.CreateLShr(ConstantInt::get(Ty, MagicBitvector), V); |
411 | 10 | V = Builder.CreateAnd(ConstantInt::get(Ty, 1), V); |
412 | 10 | return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0)); |
413 | 10 | } |
414 | 10 | } |
415 | | |
416 | 0 | return nullptr; |
417 | 10 | } |
418 | | |
419 | | /// Returns true if we can rewrite Start as a GEP with pointer Base |
420 | | /// and some integer offset. The nodes that need to be re-written |
421 | | /// for this transformation will be added to Explored. |
422 | | static bool canRewriteGEPAsOffset(Value *Start, Value *Base, |
423 | | const DataLayout &DL, |
424 | 265 | SetVector<Value *> &Explored) { |
425 | 265 | SmallVector<Value *, 16> WorkList(1, Start); |
426 | 265 | Explored.insert(Base); |
427 | | |
428 | | // The following traversal gives us an order which can be used |
429 | | // when doing the final transformation. Since in the final |
430 | | // transformation we create the PHI replacement instructions first, |
431 | | // we don't have to get them in any particular order. |
432 | | // |
433 | | // However, for other instructions we will have to traverse the |
434 | | // operands of an instruction first, which means that we have to |
435 | | // do a post-order traversal. |
436 | 450 | while (!WorkList.empty()) { |
437 | 393 | SetVector<PHINode *> PHIs; |
438 | | |
439 | 657 | while (!WorkList.empty()) { |
440 | 472 | if (Explored.size() >= 100) |
441 | 0 | return false; |
442 | | |
443 | 472 | Value *V = WorkList.back(); |
444 | | |
445 | 472 | if (Explored.contains(V)) { |
446 | 0 | WorkList.pop_back(); |
447 | 0 | continue; |
448 | 0 | } |
449 | | |
450 | 472 | if (!isa<GetElementPtrInst>(V) && !isa<PHINode>(V)) |
451 | | // We've found some value that we can't explore which is different from |
452 | | // the base. Therefore we can't do this transformation. |
453 | 90 | return false; |
454 | | |
455 | 382 | if (auto *GEP = dyn_cast<GEPOperator>(V)) { |
456 | | // Only allow inbounds GEPs with at most one variable offset. |
457 | 247 | auto IsNonConst = [](Value *V) { return !isa<ConstantInt>(V); }; |
458 | 247 | if (!GEP->isInBounds() || count_if(GEP->indices(), IsNonConst) > 1) |
459 | 111 | return false; |
460 | | |
461 | 136 | if (!Explored.contains(GEP->getOperand(0))) |
462 | 18 | WorkList.push_back(GEP->getOperand(0)); |
463 | 136 | } |
464 | | |
465 | 271 | if (WorkList.back() == V) { |
466 | 253 | WorkList.pop_back(); |
467 | | // We've finished visiting this node, mark it as such. |
468 | 253 | Explored.insert(V); |
469 | 253 | } |
470 | | |
471 | 271 | if (auto *PN = dyn_cast<PHINode>(V)) { |
472 | | // We cannot transform PHIs on unsplittable basic blocks. |
473 | 135 | if (isa<CatchSwitchInst>(PN->getParent()->getTerminator())) |
474 | 7 | return false; |
475 | 128 | Explored.insert(PN); |
476 | 128 | PHIs.insert(PN); |
477 | 128 | } |
478 | 271 | } |
479 | | |
480 | | // Explore the PHI nodes further. |
481 | 185 | for (auto *PN : PHIs) |
482 | 128 | for (Value *Op : PN->incoming_values()) |
483 | 259 | if (!Explored.contains(Op)) |
484 | 249 | WorkList.push_back(Op); |
485 | 185 | } |
486 | | |
487 | | // Make sure that we can do this. Since we can't insert GEPs in a basic |
488 | | // block before a PHI node, we can't easily do this transformation if |
489 | | // we have PHI node users of transformed instructions. |
490 | 226 | for (Value *Val : Explored) { |
491 | 438 | for (Value *Use : Val->uses()) { |
492 | | |
493 | 438 | auto *PHI = dyn_cast<PHINode>(Use); |
494 | 438 | auto *Inst = dyn_cast<Instruction>(Val); |
495 | | |
496 | 438 | if (Inst == Base || Inst == PHI || !Inst || !PHI || |
497 | 438 | !Explored.contains(PHI)) |
498 | 438 | continue; |
499 | | |
500 | 0 | if (PHI->getParent() == Inst->getParent()) |
501 | 0 | return false; |
502 | 0 | } |
503 | 226 | } |
504 | 57 | return true; |
505 | 57 | } |
506 | | |
507 | | // Sets the appropriate insert point on Builder where we can add |
508 | | // a replacement Instruction for V (if that is possible). |
509 | | static void setInsertionPoint(IRBuilder<> &Builder, Value *V, |
510 | 282 | bool Before = true) { |
511 | 282 | if (auto *PHI = dyn_cast<PHINode>(V)) { |
512 | 56 | BasicBlock *Parent = PHI->getParent(); |
513 | 56 | Builder.SetInsertPoint(Parent, Parent->getFirstInsertionPt()); |
514 | 56 | return; |
515 | 56 | } |
516 | 226 | if (auto *I = dyn_cast<Instruction>(V)) { |
517 | 226 | if (!Before) |
518 | 113 | I = &*std::next(I->getIterator()); |
519 | 226 | Builder.SetInsertPoint(I); |
520 | 226 | return; |
521 | 226 | } |
522 | 0 | if (auto *A = dyn_cast<Argument>(V)) { |
523 | | // Set the insertion point in the entry block. |
524 | 0 | BasicBlock &Entry = A->getParent()->getEntryBlock(); |
525 | 0 | Builder.SetInsertPoint(&Entry, Entry.getFirstInsertionPt()); |
526 | 0 | return; |
527 | 0 | } |
528 | | // Otherwise, this is a constant and we don't need to set a new |
529 | | // insertion point. |
530 | 0 | assert(isa<Constant>(V) && "Setting insertion point for unknown value!"); |
531 | 0 | } |
532 | | |
533 | | /// Returns a re-written value of Start as an indexed GEP using Base as a |
534 | | /// pointer. |
535 | | static Value *rewriteGEPAsOffset(Value *Start, Value *Base, |
536 | | const DataLayout &DL, |
537 | | SetVector<Value *> &Explored, |
538 | 57 | InstCombiner &IC) { |
539 | | // Perform all the substitutions. This is a bit tricky because we can |
540 | | // have cycles in our use-def chains. |
541 | | // 1. Create the PHI nodes without any incoming values. |
542 | | // 2. Create all the other values. |
543 | | // 3. Add the edges for the PHI nodes. |
544 | | // 4. Emit GEPs to get the original pointers. |
545 | | // 5. Remove the original instructions. |
546 | 57 | Type *IndexType = IntegerType::get( |
547 | 57 | Base->getContext(), DL.getIndexTypeSizeInBits(Start->getType())); |
548 | | |
549 | 57 | DenseMap<Value *, Value *> NewInsts; |
550 | 57 | NewInsts[Base] = ConstantInt::getNullValue(IndexType); |
551 | | |
552 | | // Create the new PHI nodes, without adding any incoming values. |
553 | 226 | for (Value *Val : Explored) { |
554 | 226 | if (Val == Base) |
555 | 57 | continue; |
556 | | // Create empty phi nodes. This avoids cyclic dependencies when creating |
557 | | // the remaining instructions. |
558 | 169 | if (auto *PHI = dyn_cast<PHINode>(Val)) |
559 | 56 | NewInsts[PHI] = PHINode::Create(IndexType, PHI->getNumIncomingValues(), |
560 | 56 | PHI->getName() + ".idx", PHI); |
561 | 169 | } |
562 | 57 | IRBuilder<> Builder(Base->getContext()); |
563 | | |
564 | | // Create all the other instructions. |
565 | 226 | for (Value *Val : Explored) { |
566 | 226 | if (NewInsts.contains(Val)) |
567 | 113 | continue; |
568 | | |
569 | 113 | if (auto *GEP = dyn_cast<GEPOperator>(Val)) { |
570 | 113 | setInsertionPoint(Builder, GEP); |
571 | 113 | Value *Op = NewInsts[GEP->getOperand(0)]; |
572 | 113 | Value *OffsetV = emitGEPOffset(&Builder, DL, GEP); |
573 | 113 | if (isa<ConstantInt>(Op) && cast<ConstantInt>(Op)->isZero()) |
574 | 57 | NewInsts[GEP] = OffsetV; |
575 | 56 | else |
576 | 56 | NewInsts[GEP] = Builder.CreateNSWAdd( |
577 | 56 | Op, OffsetV, GEP->getOperand(0)->getName() + ".add"); |
578 | 113 | continue; |
579 | 113 | } |
580 | 0 | if (isa<PHINode>(Val)) |
581 | 0 | continue; |
582 | | |
583 | 0 | llvm_unreachable("Unexpected instruction type"); |
584 | 0 | } |
585 | | |
586 | | // Add the incoming values to the PHI nodes. |
587 | 226 | for (Value *Val : Explored) { |
588 | 226 | if (Val == Base) |
589 | 57 | continue; |
590 | | // All the instructions have been created, we can now add edges to the |
591 | | // phi nodes. |
592 | 169 | if (auto *PHI = dyn_cast<PHINode>(Val)) { |
593 | 56 | PHINode *NewPhi = static_cast<PHINode *>(NewInsts[PHI]); |
594 | 168 | for (unsigned I = 0, E = PHI->getNumIncomingValues(); I < E; ++I) { |
595 | 112 | Value *NewIncoming = PHI->getIncomingValue(I); |
596 | | |
597 | 112 | if (NewInsts.contains(NewIncoming)) |
598 | 112 | NewIncoming = NewInsts[NewIncoming]; |
599 | | |
600 | 112 | NewPhi->addIncoming(NewIncoming, PHI->getIncomingBlock(I)); |
601 | 112 | } |
602 | 56 | } |
603 | 169 | } |
604 | | |
605 | 226 | for (Value *Val : Explored) { |
606 | 226 | if (Val == Base) |
607 | 57 | continue; |
608 | | |
609 | 169 | setInsertionPoint(Builder, Val, false); |
610 | | // Create GEP for external users. |
611 | 169 | Value *NewVal = Builder.CreateInBoundsGEP( |
612 | 169 | Builder.getInt8Ty(), Base, NewInsts[Val], Val->getName() + ".ptr"); |
613 | 169 | IC.replaceInstUsesWith(*cast<Instruction>(Val), NewVal); |
614 | | // Add old instruction to worklist for DCE. We don't directly remove it |
615 | | // here because the original compare is one of the users. |
616 | 169 | IC.addToWorklist(cast<Instruction>(Val)); |
617 | 169 | } |
618 | | |
619 | 57 | return NewInsts[Start]; |
620 | 57 | } |
621 | | |
622 | | /// Converts (CMP GEPLHS, RHS) if this change would make RHS a constant. |
623 | | /// We can look through PHIs, GEPs and casts in order to determine a common base |
624 | | /// between GEPLHS and RHS. |
625 | | static Instruction *transformToIndexedCompare(GEPOperator *GEPLHS, Value *RHS, |
626 | | ICmpInst::Predicate Cond, |
627 | | const DataLayout &DL, |
628 | 474 | InstCombiner &IC) { |
629 | | // FIXME: Support vector of pointers. |
630 | 474 | if (GEPLHS->getType()->isVectorTy()) |
631 | 24 | return nullptr; |
632 | | |
633 | 450 | if (!GEPLHS->hasAllConstantIndices()) |
634 | 185 | return nullptr; |
635 | | |
636 | 265 | APInt Offset(DL.getIndexTypeSizeInBits(GEPLHS->getType()), 0); |
637 | 265 | Value *PtrBase = |
638 | 265 | GEPLHS->stripAndAccumulateConstantOffsets(DL, Offset, |
639 | 265 | /*AllowNonInbounds*/ false); |
640 | | |
641 | | // Bail if we looked through addrspacecast. |
642 | 265 | if (PtrBase->getType() != GEPLHS->getType()) |
643 | 0 | return nullptr; |
644 | | |
645 | | // The set of nodes that will take part in this transformation. |
646 | 265 | SetVector<Value *> Nodes; |
647 | | |
648 | 265 | if (!canRewriteGEPAsOffset(RHS, PtrBase, DL, Nodes)) |
649 | 208 | return nullptr; |
650 | | |
651 | | // We know we can re-write this as |
652 | | // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) |
653 | | // Since we've only looked through inbouds GEPs we know that we |
654 | | // can't have overflow on either side. We can therefore re-write |
655 | | // this as: |
656 | | // OFFSET1 cmp OFFSET2 |
657 | 57 | Value *NewRHS = rewriteGEPAsOffset(RHS, PtrBase, DL, Nodes, IC); |
658 | | |
659 | | // RewriteGEPAsOffset has replaced RHS and all of its uses with a re-written |
660 | | // GEP having PtrBase as the pointer base, and has returned in NewRHS the |
661 | | // offset. Since Index is the offset of LHS to the base pointer, we will now |
662 | | // compare the offsets instead of comparing the pointers. |
663 | 57 | return new ICmpInst(ICmpInst::getSignedPredicate(Cond), |
664 | 57 | IC.Builder.getInt(Offset), NewRHS); |
665 | 265 | } |
666 | | |
667 | | /// Fold comparisons between a GEP instruction and something else. At this point |
668 | | /// we know that the GEP is on the LHS of the comparison. |
669 | | Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, |
670 | | ICmpInst::Predicate Cond, |
671 | 674 | Instruction &I) { |
672 | | // Don't transform signed compares of GEPs into index compares. Even if the |
673 | | // GEP is inbounds, the final add of the base pointer can have signed overflow |
674 | | // and would change the result of the icmp. |
675 | | // e.g. "&foo[0] <s &foo[1]" can't be folded to "true" because "foo" could be |
676 | | // the maximum signed value for the pointer type. |
677 | 674 | if (ICmpInst::isSigned(Cond)) |
678 | 32 | return nullptr; |
679 | | |
680 | | // Look through bitcasts and addrspacecasts. We do not however want to remove |
681 | | // 0 GEPs. |
682 | 642 | if (!isa<GetElementPtrInst>(RHS)) |
683 | 446 | RHS = RHS->stripPointerCasts(); |
684 | | |
685 | 642 | Value *PtrBase = GEPLHS->getOperand(0); |
686 | 642 | if (PtrBase == RHS && (GEPLHS->isInBounds() || ICmpInst::isEquality(Cond))) { |
687 | | // ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0). |
688 | 78 | Value *Offset = EmitGEPOffset(GEPLHS); |
689 | 78 | return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset, |
690 | 78 | Constant::getNullValue(Offset->getType())); |
691 | 78 | } |
692 | | |
693 | 564 | if (GEPLHS->isInBounds() && ICmpInst::isEquality(Cond) && |
694 | 564 | isa<Constant>(RHS) && cast<Constant>(RHS)->isNullValue() && |
695 | 564 | !NullPointerIsDefined(I.getFunction(), |
696 | 46 | RHS->getType()->getPointerAddressSpace())) { |
697 | | // For most address spaces, an allocation can't be placed at null, but null |
698 | | // itself is treated as a 0 size allocation in the in bounds rules. Thus, |
699 | | // the only valid inbounds address derived from null, is null itself. |
700 | | // Thus, we have four cases to consider: |
701 | | // 1) Base == nullptr, Offset == 0 -> inbounds, null |
702 | | // 2) Base == nullptr, Offset != 0 -> poison as the result is out of bounds |
703 | | // 3) Base != nullptr, Offset == (-base) -> poison (crossing allocations) |
704 | | // 4) Base != nullptr, Offset != (-base) -> nonnull (and possibly poison) |
705 | | // |
706 | | // (Note if we're indexing a type of size 0, that simply collapses into one |
707 | | // of the buckets above.) |
708 | | // |
709 | | // In general, we're allowed to make values less poison (i.e. remove |
710 | | // sources of full UB), so in this case, we just select between the two |
711 | | // non-poison cases (1 and 4 above). |
712 | | // |
713 | | // For vectors, we apply the same reasoning on a per-lane basis. |
714 | 43 | auto *Base = GEPLHS->getPointerOperand(); |
715 | 43 | if (GEPLHS->getType()->isVectorTy() && Base->getType()->isPointerTy()) { |
716 | 30 | auto EC = cast<VectorType>(GEPLHS->getType())->getElementCount(); |
717 | 30 | Base = Builder.CreateVectorSplat(EC, Base); |
718 | 30 | } |
719 | 43 | return new ICmpInst(Cond, Base, |
720 | 43 | ConstantExpr::getPointerBitCastOrAddrSpaceCast( |
721 | 43 | cast<Constant>(RHS), Base->getType())); |
722 | 521 | } else if (GEPOperator *GEPRHS = dyn_cast<GEPOperator>(RHS)) { |
723 | | // If the base pointers are different, but the indices are the same, just |
724 | | // compare the base pointer. |
725 | 205 | if (PtrBase != GEPRHS->getOperand(0)) { |
726 | 96 | bool IndicesTheSame = |
727 | 96 | GEPLHS->getNumOperands() == GEPRHS->getNumOperands() && |
728 | 96 | GEPLHS->getPointerOperand()->getType() == |
729 | 88 | GEPRHS->getPointerOperand()->getType() && |
730 | 96 | GEPLHS->getSourceElementType() == GEPRHS->getSourceElementType(); |
731 | 96 | if (IndicesTheSame) |
732 | 70 | for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i) |
733 | 65 | if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) { |
734 | 57 | IndicesTheSame = false; |
735 | 57 | break; |
736 | 57 | } |
737 | | |
738 | | // If all indices are the same, just compare the base pointers. |
739 | 96 | Type *BaseType = GEPLHS->getOperand(0)->getType(); |
740 | 96 | if (IndicesTheSame && CmpInst::makeCmpResultType(BaseType) == I.getType()) |
741 | 5 | return new ICmpInst(Cond, GEPLHS->getOperand(0), GEPRHS->getOperand(0)); |
742 | | |
743 | | // If we're comparing GEPs with two base pointers that only differ in type |
744 | | // and both GEPs have only constant indices or just one use, then fold |
745 | | // the compare with the adjusted indices. |
746 | | // FIXME: Support vector of pointers. |
747 | 91 | if (GEPLHS->isInBounds() && GEPRHS->isInBounds() && |
748 | 91 | (GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) && |
749 | 91 | (GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse()) && |
750 | 91 | PtrBase->stripPointerCasts() == |
751 | 27 | GEPRHS->getOperand(0)->stripPointerCasts() && |
752 | 91 | !GEPLHS->getType()->isVectorTy()) { |
753 | 5 | Value *LOffset = EmitGEPOffset(GEPLHS); |
754 | 5 | Value *ROffset = EmitGEPOffset(GEPRHS); |
755 | | |
756 | | // If we looked through an addrspacecast between different sized address |
757 | | // spaces, the LHS and RHS pointers are different sized |
758 | | // integers. Truncate to the smaller one. |
759 | 5 | Type *LHSIndexTy = LOffset->getType(); |
760 | 5 | Type *RHSIndexTy = ROffset->getType(); |
761 | 5 | if (LHSIndexTy != RHSIndexTy) { |
762 | 0 | if (LHSIndexTy->getPrimitiveSizeInBits().getFixedValue() < |
763 | 0 | RHSIndexTy->getPrimitiveSizeInBits().getFixedValue()) { |
764 | 0 | ROffset = Builder.CreateTrunc(ROffset, LHSIndexTy); |
765 | 0 | } else |
766 | 0 | LOffset = Builder.CreateTrunc(LOffset, RHSIndexTy); |
767 | 0 | } |
768 | | |
769 | 5 | Value *Cmp = Builder.CreateICmp(ICmpInst::getSignedPredicate(Cond), |
770 | 5 | LOffset, ROffset); |
771 | 5 | return replaceInstUsesWith(I, Cmp); |
772 | 5 | } |
773 | | |
774 | | // Otherwise, the base pointers are different and the indices are |
775 | | // different. Try convert this to an indexed compare by looking through |
776 | | // PHIs/casts. |
777 | 86 | return transformToIndexedCompare(GEPLHS, RHS, Cond, DL, *this); |
778 | 91 | } |
779 | | |
780 | 109 | bool GEPsInBounds = GEPLHS->isInBounds() && GEPRHS->isInBounds(); |
781 | 109 | if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands() && |
782 | 109 | GEPLHS->getSourceElementType() == GEPRHS->getSourceElementType()) { |
783 | | // If the GEPs only differ by one index, compare it. |
784 | 55 | unsigned NumDifferences = 0; // Keep track of # differences. |
785 | 55 | unsigned DiffOperand = 0; // The operand that differs. |
786 | 119 | for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i) |
787 | 65 | if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) { |
788 | 44 | Type *LHSType = GEPLHS->getOperand(i)->getType(); |
789 | 44 | Type *RHSType = GEPRHS->getOperand(i)->getType(); |
790 | | // FIXME: Better support for vector of pointers. |
791 | 44 | if (LHSType->getPrimitiveSizeInBits() != |
792 | 44 | RHSType->getPrimitiveSizeInBits() || |
793 | 44 | (GEPLHS->getType()->isVectorTy() && |
794 | 44 | (!LHSType->isVectorTy() || !RHSType->isVectorTy()))) { |
795 | | // Irreconcilable differences. |
796 | 1 | NumDifferences = 2; |
797 | 1 | break; |
798 | 1 | } |
799 | | |
800 | 43 | if (NumDifferences++) break; |
801 | 43 | DiffOperand = i; |
802 | 43 | } |
803 | | |
804 | 55 | if (NumDifferences == 0) // SAME GEP? |
805 | 11 | return replaceInstUsesWith(I, // No comparison is needed here. |
806 | 11 | ConstantInt::get(I.getType(), ICmpInst::isTrueWhenEqual(Cond))); |
807 | | |
808 | 44 | else if (NumDifferences == 1 && GEPsInBounds) { |
809 | 14 | Value *LHSV = GEPLHS->getOperand(DiffOperand); |
810 | 14 | Value *RHSV = GEPRHS->getOperand(DiffOperand); |
811 | | // Make sure we do a signed comparison here. |
812 | 14 | return new ICmpInst(ICmpInst::getSignedPredicate(Cond), LHSV, RHSV); |
813 | 14 | } |
814 | 55 | } |
815 | | |
816 | | // Only lower this if the icmp is the only user of the GEP or if we expect |
817 | | // the result to fold to a constant! |
818 | 84 | if ((GEPsInBounds || CmpInst::isEquality(Cond)) && |
819 | 84 | (GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) && |
820 | 84 | (GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse())) { |
821 | | // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2) |
822 | 12 | Value *L = EmitGEPOffset(GEPLHS); |
823 | 12 | Value *R = EmitGEPOffset(GEPRHS); |
824 | 12 | return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R); |
825 | 12 | } |
826 | 84 | } |
827 | | |
828 | | // Try convert this to an indexed compare by looking through PHIs/casts as a |
829 | | // last resort. |
830 | 388 | return transformToIndexedCompare(GEPLHS, RHS, Cond, DL, *this); |
831 | 564 | } |
832 | | |
833 | 47 | bool InstCombinerImpl::foldAllocaCmp(AllocaInst *Alloca) { |
834 | | // It would be tempting to fold away comparisons between allocas and any |
835 | | // pointer not based on that alloca (e.g. an argument). However, even |
836 | | // though such pointers cannot alias, they can still compare equal. |
837 | | // |
838 | | // But LLVM doesn't specify where allocas get their memory, so if the alloca |
839 | | // doesn't escape we can argue that it's impossible to guess its value, and we |
840 | | // can therefore act as if any such guesses are wrong. |
841 | | // |
842 | | // However, we need to ensure that this folding is consistent: We can't fold |
843 | | // one comparison to false, and then leave a different comparison against the |
844 | | // same value alone (as it might evaluate to true at runtime, leading to a |
845 | | // contradiction). As such, this code ensures that all comparisons are folded |
846 | | // at the same time, and there are no other escapes. |
847 | | |
848 | 47 | struct CmpCaptureTracker : public CaptureTracker { |
849 | 47 | AllocaInst *Alloca; |
850 | 47 | bool Captured = false; |
851 | | /// The value of the map is a bit mask of which icmp operands the alloca is |
852 | | /// used in. |
853 | 47 | SmallMapVector<ICmpInst *, unsigned, 4> ICmps; |
854 | | |
855 | 47 | CmpCaptureTracker(AllocaInst *Alloca) : Alloca(Alloca) {} |
856 | | |
857 | 47 | void tooManyUses() override { Captured = true; } |
858 | | |
859 | 61 | bool captured(const Use *U) override { |
860 | 61 | auto *ICmp = dyn_cast<ICmpInst>(U->getUser()); |
861 | | // We need to check that U is based *only* on the alloca, and doesn't |
862 | | // have other contributions from a select/phi operand. |
863 | | // TODO: We could check whether getUnderlyingObjects() reduces to one |
864 | | // object, which would allow looking through phi nodes. |
865 | 61 | if (ICmp && ICmp->isEquality() && getUnderlyingObject(*U) == Alloca) { |
866 | | // Collect equality icmps of the alloca, and don't treat them as |
867 | | // captures. |
868 | 21 | auto Res = ICmps.insert({ICmp, 0}); |
869 | 21 | Res.first->second |= 1u << U->getOperandNo(); |
870 | 21 | return false; |
871 | 21 | } |
872 | | |
873 | 40 | Captured = true; |
874 | 40 | return true; |
875 | 61 | } |
876 | 47 | }; |
877 | | |
878 | 47 | CmpCaptureTracker Tracker(Alloca); |
879 | 47 | PointerMayBeCaptured(Alloca, &Tracker); |
880 | 47 | if (Tracker.Captured) |
881 | 40 | return false; |
882 | | |
883 | 7 | bool Changed = false; |
884 | 7 | for (auto [ICmp, Operands] : Tracker.ICmps) { |
885 | 7 | switch (Operands) { |
886 | 1 | case 1: |
887 | 7 | case 2: { |
888 | | // The alloca is only used in one icmp operand. Assume that the |
889 | | // equality is false. |
890 | 7 | auto *Res = ConstantInt::get( |
891 | 7 | ICmp->getType(), ICmp->getPredicate() == ICmpInst::ICMP_NE); |
892 | 7 | replaceInstUsesWith(*ICmp, Res); |
893 | 7 | eraseInstFromFunction(*ICmp); |
894 | 7 | Changed = true; |
895 | 7 | break; |
896 | 1 | } |
897 | 0 | case 3: |
898 | | // Both icmp operands are based on the alloca, so this is comparing |
899 | | // pointer offsets, without leaking any information about the address |
900 | | // of the alloca. Ignore such comparisons. |
901 | 0 | break; |
902 | 0 | default: |
903 | 0 | llvm_unreachable("Cannot happen"); |
904 | 7 | } |
905 | 7 | } |
906 | | |
907 | 7 | return Changed; |
908 | 7 | } |
909 | | |
910 | | /// Fold "icmp pred (X+C), X". |
911 | | Instruction *InstCombinerImpl::foldICmpAddOpConst(Value *X, const APInt &C, |
912 | 117 | ICmpInst::Predicate Pred) { |
913 | | // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0, |
914 | | // so the values can never be equal. Similarly for all other "or equals" |
915 | | // operators. |
916 | 117 | assert(!!C && "C should not be zero!"); |
917 | | |
918 | | // (X+1) <u X --> X >u (MAXUINT-1) --> X == 255 |
919 | | // (X+2) <u X --> X >u (MAXUINT-2) --> X > 253 |
920 | | // (X+MAXUINT) <u X --> X >u (MAXUINT-MAXUINT) --> X != 0 |
921 | 117 | if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) { |
922 | 21 | Constant *R = ConstantInt::get(X->getType(), |
923 | 21 | APInt::getMaxValue(C.getBitWidth()) - C); |
924 | 21 | return new ICmpInst(ICmpInst::ICMP_UGT, X, R); |
925 | 21 | } |
926 | | |
927 | | // (X+1) >u X --> X <u (0-1) --> X != 255 |
928 | | // (X+2) >u X --> X <u (0-2) --> X <u 254 |
929 | | // (X+MAXUINT) >u X --> X <u (0-MAXUINT) --> X <u 1 --> X == 0 |
930 | 96 | if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) |
931 | 43 | return new ICmpInst(ICmpInst::ICMP_ULT, X, |
932 | 43 | ConstantInt::get(X->getType(), -C)); |
933 | | |
934 | 53 | APInt SMax = APInt::getSignedMaxValue(C.getBitWidth()); |
935 | | |
936 | | // (X+ 1) <s X --> X >s (MAXSINT-1) --> X == 127 |
937 | | // (X+ 2) <s X --> X >s (MAXSINT-2) --> X >s 125 |
938 | | // (X+MAXSINT) <s X --> X >s (MAXSINT-MAXSINT) --> X >s 0 |
939 | | // (X+MINSINT) <s X --> X >s (MAXSINT-MINSINT) --> X >s -1 |
940 | | // (X+ -2) <s X --> X >s (MAXSINT- -2) --> X >s 126 |
941 | | // (X+ -1) <s X --> X >s (MAXSINT- -1) --> X != 127 |
942 | 53 | if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) |
943 | 27 | return new ICmpInst(ICmpInst::ICMP_SGT, X, |
944 | 27 | ConstantInt::get(X->getType(), SMax - C)); |
945 | | |
946 | | // (X+ 1) >s X --> X <s (MAXSINT-(1-1)) --> X != 127 |
947 | | // (X+ 2) >s X --> X <s (MAXSINT-(2-1)) --> X <s 126 |
948 | | // (X+MAXSINT) >s X --> X <s (MAXSINT-(MAXSINT-1)) --> X <s 1 |
949 | | // (X+MINSINT) >s X --> X <s (MAXSINT-(MINSINT-1)) --> X <s -2 |
950 | | // (X+ -2) >s X --> X <s (MAXSINT-(-2-1)) --> X <s -126 |
951 | | // (X+ -1) >s X --> X <s (MAXSINT-(-1-1)) --> X == -128 |
952 | | |
953 | 26 | assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE); |
954 | 0 | return new ICmpInst(ICmpInst::ICMP_SLT, X, |
955 | 26 | ConstantInt::get(X->getType(), SMax - (C - 1))); |
956 | 53 | } |
957 | | |
958 | | /// Handle "(icmp eq/ne (ashr/lshr AP2, A), AP1)" -> |
959 | | /// (icmp eq/ne A, Log2(AP2/AP1)) -> |
960 | | /// (icmp eq/ne A, Log2(AP2) - Log2(AP1)). |
961 | | Instruction *InstCombinerImpl::foldICmpShrConstConst(ICmpInst &I, Value *A, |
962 | | const APInt &AP1, |
963 | 38 | const APInt &AP2) { |
964 | 38 | assert(I.isEquality() && "Cannot fold icmp gt/lt"); |
965 | | |
966 | 36 | auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) { |
967 | 36 | if (I.getPredicate() == I.ICMP_NE) |
968 | 20 | Pred = CmpInst::getInversePredicate(Pred); |
969 | 36 | return new ICmpInst(Pred, LHS, RHS); |
970 | 36 | }; |
971 | | |
972 | | // Don't bother doing any work for cases which InstSimplify handles. |
973 | 38 | if (AP2.isZero()) |
974 | 0 | return nullptr; |
975 | | |
976 | 38 | bool IsAShr = isa<AShrOperator>(I.getOperand(0)); |
977 | 38 | if (IsAShr) { |
978 | 3 | if (AP2.isAllOnes()) |
979 | 0 | return nullptr; |
980 | 3 | if (AP2.isNegative() != AP1.isNegative()) |
981 | 0 | return nullptr; |
982 | 3 | if (AP2.sgt(AP1)) |
983 | 0 | return nullptr; |
984 | 3 | } |
985 | | |
986 | 38 | if (!AP1) |
987 | | // 'A' must be large enough to shift out the highest set bit. |
988 | 22 | return getICmp(I.ICMP_UGT, A, |
989 | 22 | ConstantInt::get(A->getType(), AP2.logBase2())); |
990 | | |
991 | 16 | if (AP1 == AP2) |
992 | 9 | return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType())); |
993 | | |
994 | 7 | int Shift; |
995 | 7 | if (IsAShr && AP1.isNegative()) |
996 | 2 | Shift = AP1.countl_one() - AP2.countl_one(); |
997 | 5 | else |
998 | 5 | Shift = AP1.countl_zero() - AP2.countl_zero(); |
999 | | |
1000 | 7 | if (Shift > 0) { |
1001 | 5 | if (IsAShr && AP1 == AP2.ashr(Shift)) { |
1002 | | // There are multiple solutions if we are comparing against -1 and the LHS |
1003 | | // of the ashr is not a power of two. |
1004 | 2 | if (AP1.isAllOnes() && !AP2.isPowerOf2()) |
1005 | 1 | return getICmp(I.ICMP_UGE, A, ConstantInt::get(A->getType(), Shift)); |
1006 | 1 | return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift)); |
1007 | 3 | } else if (AP1 == AP2.lshr(Shift)) { |
1008 | 3 | return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift)); |
1009 | 3 | } |
1010 | 5 | } |
1011 | | |
1012 | | // Shifting const2 will never be equal to const1. |
1013 | | // FIXME: This should always be handled by InstSimplify? |
1014 | 2 | auto *TorF = ConstantInt::get(I.getType(), I.getPredicate() == I.ICMP_NE); |
1015 | 2 | return replaceInstUsesWith(I, TorF); |
1016 | 7 | } |
1017 | | |
1018 | | /// Handle "(icmp eq/ne (shl AP2, A), AP1)" -> |
1019 | | /// (icmp eq/ne A, TrailingZeros(AP1) - TrailingZeros(AP2)). |
1020 | | Instruction *InstCombinerImpl::foldICmpShlConstConst(ICmpInst &I, Value *A, |
1021 | | const APInt &AP1, |
1022 | 44 | const APInt &AP2) { |
1023 | 44 | assert(I.isEquality() && "Cannot fold icmp gt/lt"); |
1024 | | |
1025 | 41 | auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) { |
1026 | 41 | if (I.getPredicate() == I.ICMP_NE) |
1027 | 21 | Pred = CmpInst::getInversePredicate(Pred); |
1028 | 41 | return new ICmpInst(Pred, LHS, RHS); |
1029 | 41 | }; |
1030 | | |
1031 | | // Don't bother doing any work for cases which InstSimplify handles. |
1032 | 44 | if (AP2.isZero()) |
1033 | 0 | return nullptr; |
1034 | | |
1035 | 44 | unsigned AP2TrailingZeros = AP2.countr_zero(); |
1036 | | |
1037 | 44 | if (!AP1 && AP2TrailingZeros != 0) |
1038 | 15 | return getICmp( |
1039 | 15 | I.ICMP_UGE, A, |
1040 | 15 | ConstantInt::get(A->getType(), AP2.getBitWidth() - AP2TrailingZeros)); |
1041 | | |
1042 | 29 | if (AP1 == AP2) |
1043 | 17 | return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType())); |
1044 | | |
1045 | | // Get the distance between the lowest bits that are set. |
1046 | 12 | int Shift = AP1.countr_zero() - AP2TrailingZeros; |
1047 | | |
1048 | 12 | if (Shift > 0 && AP2.shl(Shift) == AP1) |
1049 | 9 | return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift)); |
1050 | | |
1051 | | // Shifting const2 will never be equal to const1. |
1052 | | // FIXME: This should always be handled by InstSimplify? |
1053 | 3 | auto *TorF = ConstantInt::get(I.getType(), I.getPredicate() == I.ICMP_NE); |
1054 | 3 | return replaceInstUsesWith(I, TorF); |
1055 | 12 | } |
1056 | | |
1057 | | /// The caller has matched a pattern of the form: |
1058 | | /// I = icmp ugt (add (add A, B), CI2), CI1 |
1059 | | /// If this is of the form: |
1060 | | /// sum = a + b |
1061 | | /// if (sum+128 >u 255) |
1062 | | /// Then replace it with llvm.sadd.with.overflow.i8. |
1063 | | /// |
1064 | | static Instruction *processUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, |
1065 | | ConstantInt *CI2, ConstantInt *CI1, |
1066 | 10 | InstCombinerImpl &IC) { |
1067 | | // The transformation we're trying to do here is to transform this into an |
1068 | | // llvm.sadd.with.overflow. To do this, we have to replace the original add |
1069 | | // with a narrower add, and discard the add-with-constant that is part of the |
1070 | | // range check (if we can't eliminate it, this isn't profitable). |
1071 | | |
1072 | | // In order to eliminate the add-with-constant, the compare can be its only |
1073 | | // use. |
1074 | 10 | Instruction *AddWithCst = cast<Instruction>(I.getOperand(0)); |
1075 | 10 | if (!AddWithCst->hasOneUse()) |
1076 | 7 | return nullptr; |
1077 | | |
1078 | | // If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow. |
1079 | 3 | if (!CI2->getValue().isPowerOf2()) |
1080 | 0 | return nullptr; |
1081 | 3 | unsigned NewWidth = CI2->getValue().countr_zero(); |
1082 | 3 | if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31) |
1083 | 3 | return nullptr; |
1084 | | |
1085 | | // The width of the new add formed is 1 more than the bias. |
1086 | 0 | ++NewWidth; |
1087 | | |
1088 | | // Check to see that CI1 is an all-ones value with NewWidth bits. |
1089 | 0 | if (CI1->getBitWidth() == NewWidth || |
1090 | 0 | CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth)) |
1091 | 0 | return nullptr; |
1092 | | |
1093 | | // This is only really a signed overflow check if the inputs have been |
1094 | | // sign-extended; check for that condition. For example, if CI2 is 2^31 and |
1095 | | // the operands of the add are 64 bits wide, we need at least 33 sign bits. |
1096 | 0 | if (IC.ComputeMaxSignificantBits(A, 0, &I) > NewWidth || |
1097 | 0 | IC.ComputeMaxSignificantBits(B, 0, &I) > NewWidth) |
1098 | 0 | return nullptr; |
1099 | | |
1100 | | // In order to replace the original add with a narrower |
1101 | | // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant |
1102 | | // and truncates that discard the high bits of the add. Verify that this is |
1103 | | // the case. |
1104 | 0 | Instruction *OrigAdd = cast<Instruction>(AddWithCst->getOperand(0)); |
1105 | 0 | for (User *U : OrigAdd->users()) { |
1106 | 0 | if (U == AddWithCst) |
1107 | 0 | continue; |
1108 | | |
1109 | | // Only accept truncates for now. We would really like a nice recursive |
1110 | | // predicate like SimplifyDemandedBits, but which goes downwards the use-def |
1111 | | // chain to see which bits of a value are actually demanded. If the |
1112 | | // original add had another add which was then immediately truncated, we |
1113 | | // could still do the transformation. |
1114 | 0 | TruncInst *TI = dyn_cast<TruncInst>(U); |
1115 | 0 | if (!TI || TI->getType()->getPrimitiveSizeInBits() > NewWidth) |
1116 | 0 | return nullptr; |
1117 | 0 | } |
1118 | | |
1119 | | // If the pattern matches, truncate the inputs to the narrower type and |
1120 | | // use the sadd_with_overflow intrinsic to efficiently compute both the |
1121 | | // result and the overflow bit. |
1122 | 0 | Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth); |
1123 | 0 | Function *F = Intrinsic::getDeclaration( |
1124 | 0 | I.getModule(), Intrinsic::sadd_with_overflow, NewType); |
1125 | |
|
1126 | 0 | InstCombiner::BuilderTy &Builder = IC.Builder; |
1127 | | |
1128 | | // Put the new code above the original add, in case there are any uses of the |
1129 | | // add between the add and the compare. |
1130 | 0 | Builder.SetInsertPoint(OrigAdd); |
1131 | |
|
1132 | 0 | Value *TruncA = Builder.CreateTrunc(A, NewType, A->getName() + ".trunc"); |
1133 | 0 | Value *TruncB = Builder.CreateTrunc(B, NewType, B->getName() + ".trunc"); |
1134 | 0 | CallInst *Call = Builder.CreateCall(F, {TruncA, TruncB}, "sadd"); |
1135 | 0 | Value *Add = Builder.CreateExtractValue(Call, 0, "sadd.result"); |
1136 | 0 | Value *ZExt = Builder.CreateZExt(Add, OrigAdd->getType()); |
1137 | | |
1138 | | // The inner add was the result of the narrow add, zero extended to the |
1139 | | // wider type. Replace it with the result computed by the intrinsic. |
1140 | 0 | IC.replaceInstUsesWith(*OrigAdd, ZExt); |
1141 | 0 | IC.eraseInstFromFunction(*OrigAdd); |
1142 | | |
1143 | | // The original icmp gets replaced with the overflow value. |
1144 | 0 | return ExtractValueInst::Create(Call, 1, "sadd.overflow"); |
1145 | 0 | } |
1146 | | |
1147 | | /// If we have: |
1148 | | /// icmp eq/ne (urem/srem %x, %y), 0 |
1149 | | /// iff %y is a power-of-two, we can replace this with a bit test: |
1150 | | /// icmp eq/ne (and %x, (add %y, -1)), 0 |
1151 | 22.1k | Instruction *InstCombinerImpl::foldIRemByPowerOfTwoToBitTest(ICmpInst &I) { |
1152 | | // This fold is only valid for equality predicates. |
1153 | 22.1k | if (!I.isEquality()) |
1154 | 6.18k | return nullptr; |
1155 | 15.9k | ICmpInst::Predicate Pred; |
1156 | 15.9k | Value *X, *Y, *Zero; |
1157 | 15.9k | if (!match(&I, m_ICmp(Pred, m_OneUse(m_IRem(m_Value(X), m_Value(Y))), |
1158 | 15.9k | m_CombineAnd(m_Zero(), m_Value(Zero))))) |
1159 | 15.9k | return nullptr; |
1160 | 43 | if (!isKnownToBeAPowerOfTwo(Y, /*OrZero*/ true, 0, &I)) |
1161 | 42 | return nullptr; |
1162 | | // This may increase instruction count, we don't enforce that Y is a constant. |
1163 | 1 | Value *Mask = Builder.CreateAdd(Y, Constant::getAllOnesValue(Y->getType())); |
1164 | 1 | Value *Masked = Builder.CreateAnd(X, Mask); |
1165 | 1 | return ICmpInst::Create(Instruction::ICmp, Pred, Masked, Zero); |
1166 | 43 | } |
1167 | | |
1168 | | /// Fold equality-comparison between zero and any (maybe truncated) right-shift |
1169 | | /// by one-less-than-bitwidth into a sign test on the original value. |
1170 | 78.4k | Instruction *InstCombinerImpl::foldSignBitTest(ICmpInst &I) { |
1171 | 78.4k | Instruction *Val; |
1172 | 78.4k | ICmpInst::Predicate Pred; |
1173 | 78.4k | if (!I.isEquality() || !match(&I, m_ICmp(Pred, m_Instruction(Val), m_Zero()))) |
1174 | 68.3k | return nullptr; |
1175 | | |
1176 | 10.0k | Value *X; |
1177 | 10.0k | Type *XTy; |
1178 | | |
1179 | 10.0k | Constant *C; |
1180 | 10.0k | if (match(Val, m_TruncOrSelf(m_Shr(m_Value(X), m_Constant(C))))) { |
1181 | 27 | XTy = X->getType(); |
1182 | 27 | unsigned XBitWidth = XTy->getScalarSizeInBits(); |
1183 | 27 | if (!match(C, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_EQ, |
1184 | 27 | APInt(XBitWidth, XBitWidth - 1)))) |
1185 | 27 | return nullptr; |
1186 | 10.0k | } else if (isa<BinaryOperator>(Val) && |
1187 | 10.0k | (X = reassociateShiftAmtsOfTwoSameDirectionShifts( |
1188 | 6.68k | cast<BinaryOperator>(Val), SQ.getWithInstruction(Val), |
1189 | 6.68k | /*AnalyzeForSignBitExtraction=*/true))) { |
1190 | 0 | XTy = X->getType(); |
1191 | 0 | } else |
1192 | 10.0k | return nullptr; |
1193 | | |
1194 | 0 | return ICmpInst::Create(Instruction::ICmp, |
1195 | 0 | Pred == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_SGE |
1196 | 0 | : ICmpInst::ICMP_SLT, |
1197 | 0 | X, ConstantInt::getNullValue(XTy)); |
1198 | 10.0k | } |
1199 | | |
1200 | | // Handle icmp pred X, 0 |
1201 | 83.5k | Instruction *InstCombinerImpl::foldICmpWithZero(ICmpInst &Cmp) { |
1202 | 83.5k | CmpInst::Predicate Pred = Cmp.getPredicate(); |
1203 | 83.5k | if (!match(Cmp.getOperand(1), m_Zero())) |
1204 | 61.3k | return nullptr; |
1205 | | |
1206 | | // (icmp sgt smin(PosA, B) 0) -> (icmp sgt B 0) |
1207 | 22.2k | if (Pred == ICmpInst::ICMP_SGT) { |
1208 | 2.08k | Value *A, *B; |
1209 | 2.08k | if (match(Cmp.getOperand(0), m_SMin(m_Value(A), m_Value(B)))) { |
1210 | 72 | if (isKnownPositive(A, SQ.getWithInstruction(&Cmp))) |
1211 | 49 | return new ICmpInst(Pred, B, Cmp.getOperand(1)); |
1212 | 23 | if (isKnownPositive(B, SQ.getWithInstruction(&Cmp))) |
1213 | 2 | return new ICmpInst(Pred, A, Cmp.getOperand(1)); |
1214 | 23 | } |
1215 | 2.08k | } |
1216 | | |
1217 | 22.1k | if (Instruction *New = foldIRemByPowerOfTwoToBitTest(Cmp)) |
1218 | 1 | return New; |
1219 | | |
1220 | | // Given: |
1221 | | // icmp eq/ne (urem %x, %y), 0 |
1222 | | // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem': |
1223 | | // icmp eq/ne %x, 0 |
1224 | 22.1k | Value *X, *Y; |
1225 | 22.1k | if (match(Cmp.getOperand(0), m_URem(m_Value(X), m_Value(Y))) && |
1226 | 22.1k | ICmpInst::isEquality(Pred)) { |
1227 | 34 | KnownBits XKnown = computeKnownBits(X, 0, &Cmp); |
1228 | 34 | KnownBits YKnown = computeKnownBits(Y, 0, &Cmp); |
1229 | 34 | if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2) |
1230 | 0 | return new ICmpInst(Pred, X, Cmp.getOperand(1)); |
1231 | 34 | } |
1232 | | |
1233 | | // (icmp eq/ne (mul X Y)) -> (icmp eq/ne X/Y) if we know about whether X/Y are |
1234 | | // odd/non-zero/there is no overflow. |
1235 | 22.1k | if (match(Cmp.getOperand(0), m_Mul(m_Value(X), m_Value(Y))) && |
1236 | 22.1k | ICmpInst::isEquality(Pred)) { |
1237 | | |
1238 | 165 | KnownBits XKnown = computeKnownBits(X, 0, &Cmp); |
1239 | | // if X % 2 != 0 |
1240 | | // (icmp eq/ne Y) |
1241 | 165 | if (XKnown.countMaxTrailingZeros() == 0) |
1242 | 0 | return new ICmpInst(Pred, Y, Cmp.getOperand(1)); |
1243 | | |
1244 | 165 | KnownBits YKnown = computeKnownBits(Y, 0, &Cmp); |
1245 | | // if Y % 2 != 0 |
1246 | | // (icmp eq/ne X) |
1247 | 165 | if (YKnown.countMaxTrailingZeros() == 0) |
1248 | 16 | return new ICmpInst(Pred, X, Cmp.getOperand(1)); |
1249 | | |
1250 | 149 | auto *BO0 = cast<OverflowingBinaryOperator>(Cmp.getOperand(0)); |
1251 | 149 | if (BO0->hasNoUnsignedWrap() || BO0->hasNoSignedWrap()) { |
1252 | 51 | const SimplifyQuery Q = SQ.getWithInstruction(&Cmp); |
1253 | | // `isKnownNonZero` does more analysis than just `!KnownBits.One.isZero()` |
1254 | | // but to avoid unnecessary work, first just if this is an obvious case. |
1255 | | |
1256 | | // if X non-zero and NoOverflow(X * Y) |
1257 | | // (icmp eq/ne Y) |
1258 | 51 | if (!XKnown.One.isZero() || isKnownNonZero(X, DL, 0, Q.AC, Q.CxtI, Q.DT)) |
1259 | 1 | return new ICmpInst(Pred, Y, Cmp.getOperand(1)); |
1260 | | |
1261 | | // if Y non-zero and NoOverflow(X * Y) |
1262 | | // (icmp eq/ne X) |
1263 | 50 | if (!YKnown.One.isZero() || isKnownNonZero(Y, DL, 0, Q.AC, Q.CxtI, Q.DT)) |
1264 | 16 | return new ICmpInst(Pred, X, Cmp.getOperand(1)); |
1265 | 50 | } |
1266 | | // Note, we are skipping cases: |
1267 | | // if Y % 2 != 0 AND X % 2 != 0 |
1268 | | // (false/true) |
1269 | | // if X non-zero and Y non-zero and NoOverflow(X * Y) |
1270 | | // (false/true) |
1271 | | // Those can be simplified later as we would have already replaced the (icmp |
1272 | | // eq/ne (mul X, Y)) with (icmp eq/ne X/Y) and if X/Y is known non-zero that |
1273 | | // will fold to a constant elsewhere. |
1274 | 149 | } |
1275 | 22.1k | return nullptr; |
1276 | 22.1k | } |
1277 | | |
1278 | | /// Fold icmp Pred X, C. |
1279 | | /// TODO: This code structure does not make sense. The saturating add fold |
1280 | | /// should be moved to some other helper and extended as noted below (it is also |
1281 | | /// possible that code has been made unnecessary - do we canonicalize IR to |
1282 | | /// overflow/saturating intrinsics or not?). |
1283 | 92.1k | Instruction *InstCombinerImpl::foldICmpWithConstant(ICmpInst &Cmp) { |
1284 | | // Match the following pattern, which is a common idiom when writing |
1285 | | // overflow-safe integer arithmetic functions. The source performs an addition |
1286 | | // in wider type and explicitly checks for overflow using comparisons against |
1287 | | // INT_MIN and INT_MAX. Simplify by using the sadd_with_overflow intrinsic. |
1288 | | // |
1289 | | // TODO: This could probably be generalized to handle other overflow-safe |
1290 | | // operations if we worked out the formulas to compute the appropriate magic |
1291 | | // constants. |
1292 | | // |
1293 | | // sum = a + b |
1294 | | // if (sum+128 >u 255) ... -> llvm.sadd.with.overflow.i8 |
1295 | 92.1k | CmpInst::Predicate Pred = Cmp.getPredicate(); |
1296 | 92.1k | Value *Op0 = Cmp.getOperand(0), *Op1 = Cmp.getOperand(1); |
1297 | 92.1k | Value *A, *B; |
1298 | 92.1k | ConstantInt *CI, *CI2; // I = icmp ugt (add (add A, B), CI2), CI |
1299 | 92.1k | if (Pred == ICmpInst::ICMP_UGT && match(Op1, m_ConstantInt(CI)) && |
1300 | 92.1k | match(Op0, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2)))) |
1301 | 10 | if (Instruction *Res = processUGT_ADDCST_ADD(Cmp, A, B, CI2, CI, *this)) |
1302 | 0 | return Res; |
1303 | | |
1304 | | // icmp(phi(C1, C2, ...), C) -> phi(icmp(C1, C), icmp(C2, C), ...). |
1305 | 92.1k | Constant *C = dyn_cast<Constant>(Op1); |
1306 | 92.1k | if (!C) |
1307 | 32.5k | return nullptr; |
1308 | | |
1309 | 59.6k | if (auto *Phi = dyn_cast<PHINode>(Op0)) |
1310 | 519 | if (all_of(Phi->operands(), [](Value *V) { return isa<Constant>(V); })) { |
1311 | 39 | SmallVector<Constant *> Ops; |
1312 | 83 | for (Value *V : Phi->incoming_values()) { |
1313 | 83 | Constant *Res = |
1314 | 83 | ConstantFoldCompareInstOperands(Pred, cast<Constant>(V), C, DL); |
1315 | 83 | if (!Res) |
1316 | 0 | return nullptr; |
1317 | 83 | Ops.push_back(Res); |
1318 | 83 | } |
1319 | 39 | Builder.SetInsertPoint(Phi); |
1320 | 39 | PHINode *NewPhi = Builder.CreatePHI(Cmp.getType(), Phi->getNumOperands()); |
1321 | 39 | for (auto [V, Pred] : zip(Ops, Phi->blocks())) |
1322 | 83 | NewPhi->addIncoming(V, Pred); |
1323 | 39 | return replaceInstUsesWith(Cmp, NewPhi); |
1324 | 39 | } |
1325 | | |
1326 | 59.5k | if (Instruction *R = tryFoldInstWithCtpopWithNot(&Cmp)) |
1327 | 0 | return R; |
1328 | | |
1329 | 59.5k | return nullptr; |
1330 | 59.5k | } |
1331 | | |
1332 | | /// Canonicalize icmp instructions based on dominating conditions. |
1333 | 92.1k | Instruction *InstCombinerImpl::foldICmpWithDominatingICmp(ICmpInst &Cmp) { |
1334 | | // We already checked simple implication in InstSimplify, only handle complex |
1335 | | // cases here. |
1336 | 92.1k | Value *X = Cmp.getOperand(0), *Y = Cmp.getOperand(1); |
1337 | 92.1k | ICmpInst::Predicate DomPred; |
1338 | 92.1k | const APInt *C; |
1339 | 92.1k | if (!match(Y, m_APInt(C))) |
1340 | 35.4k | return nullptr; |
1341 | | |
1342 | 56.6k | CmpInst::Predicate Pred = Cmp.getPredicate(); |
1343 | 56.6k | ConstantRange CR = ConstantRange::makeExactICmpRegion(Pred, *C); |
1344 | | |
1345 | 56.6k | auto handleDomCond = [&](Value *DomCond, bool CondIsTrue) -> Instruction * { |
1346 | 130 | const APInt *DomC; |
1347 | 130 | if (!match(DomCond, m_ICmp(DomPred, m_Specific(X), m_APInt(DomC)))) |
1348 | 14 | return nullptr; |
1349 | | // We have 2 compares of a variable with constants. Calculate the constant |
1350 | | // ranges of those compares to see if we can transform the 2nd compare: |
1351 | | // DomBB: |
1352 | | // DomCond = icmp DomPred X, DomC |
1353 | | // br DomCond, CmpBB, FalseBB |
1354 | | // CmpBB: |
1355 | | // Cmp = icmp Pred X, C |
1356 | 116 | if (!CondIsTrue) |
1357 | 82 | DomPred = CmpInst::getInversePredicate(DomPred); |
1358 | 116 | ConstantRange DominatingCR = |
1359 | 116 | ConstantRange::makeExactICmpRegion(DomPred, *DomC); |
1360 | 116 | ConstantRange Intersection = DominatingCR.intersectWith(CR); |
1361 | 116 | ConstantRange Difference = DominatingCR.difference(CR); |
1362 | 116 | if (Intersection.isEmptySet()) |
1363 | 0 | return replaceInstUsesWith(Cmp, Builder.getFalse()); |
1364 | 116 | if (Difference.isEmptySet()) |
1365 | 0 | return replaceInstUsesWith(Cmp, Builder.getTrue()); |
1366 | | |
1367 | | // Canonicalizing a sign bit comparison that gets used in a branch, |
1368 | | // pessimizes codegen by generating branch on zero instruction instead |
1369 | | // of a test and branch. So we avoid canonicalizing in such situations |
1370 | | // because test and branch instruction has better branch displacement |
1371 | | // than compare and branch instruction. |
1372 | 116 | bool UnusedBit; |
1373 | 116 | bool IsSignBit = isSignBitCheck(Pred, *C, UnusedBit); |
1374 | 116 | if (Cmp.isEquality() || (IsSignBit && hasBranchUse(Cmp))) |
1375 | 49 | return nullptr; |
1376 | | |
1377 | | // Avoid an infinite loop with min/max canonicalization. |
1378 | | // TODO: This will be unnecessary if we canonicalize to min/max intrinsics. |
1379 | 67 | if (Cmp.hasOneUse() && |
1380 | 67 | match(Cmp.user_back(), m_MaxOrMin(m_Value(), m_Value()))) |
1381 | 9 | return nullptr; |
1382 | | |
1383 | 58 | if (const APInt *EqC = Intersection.getSingleElement()) |
1384 | 5 | return new ICmpInst(ICmpInst::ICMP_EQ, X, Builder.getInt(*EqC)); |
1385 | 53 | if (const APInt *NeC = Difference.getSingleElement()) |
1386 | 7 | return new ICmpInst(ICmpInst::ICMP_NE, X, Builder.getInt(*NeC)); |
1387 | 46 | return nullptr; |
1388 | 53 | }; |
1389 | | |
1390 | 56.6k | for (BranchInst *BI : DC.conditionsFor(X)) { |
1391 | 435 | auto *Cond = BI->getCondition(); |
1392 | 435 | BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(0)); |
1393 | 435 | if (DT.dominates(Edge0, Cmp.getParent())) { |
1394 | 48 | if (auto *V = handleDomCond(Cond, true)) |
1395 | 3 | return V; |
1396 | 387 | } else { |
1397 | 387 | BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(1)); |
1398 | 387 | if (DT.dominates(Edge1, Cmp.getParent())) |
1399 | 82 | if (auto *V = handleDomCond(Cond, false)) |
1400 | 9 | return V; |
1401 | 387 | } |
1402 | 435 | } |
1403 | | |
1404 | 56.6k | return nullptr; |
1405 | 56.6k | } |
1406 | | |
1407 | | /// Fold icmp (trunc X), C. |
1408 | | Instruction *InstCombinerImpl::foldICmpTruncConstant(ICmpInst &Cmp, |
1409 | | TruncInst *Trunc, |
1410 | 739 | const APInt &C) { |
1411 | 739 | ICmpInst::Predicate Pred = Cmp.getPredicate(); |
1412 | 739 | Value *X = Trunc->getOperand(0); |
1413 | 739 | if (C.isOne() && C.getBitWidth() > 1) { |
1414 | | // icmp slt trunc(signum(V)) 1 --> icmp slt V, 1 |
1415 | 34 | Value *V = nullptr; |
1416 | 34 | if (Pred == ICmpInst::ICMP_SLT && match(X, m_Signum(m_Value(V)))) |
1417 | 0 | return new ICmpInst(ICmpInst::ICMP_SLT, V, |
1418 | 0 | ConstantInt::get(V->getType(), 1)); |
1419 | 34 | } |
1420 | | |
1421 | 739 | Type *SrcTy = X->getType(); |
1422 | 739 | unsigned DstBits = Trunc->getType()->getScalarSizeInBits(), |
1423 | 739 | SrcBits = SrcTy->getScalarSizeInBits(); |
1424 | | |
1425 | | // TODO: Handle any shifted constant by subtracting trailing zeros. |
1426 | | // TODO: Handle non-equality predicates. |
1427 | 739 | Value *Y; |
1428 | 739 | if (Cmp.isEquality() && match(X, m_Shl(m_One(), m_Value(Y)))) { |
1429 | | // (trunc (1 << Y) to iN) == 0 --> Y u>= N |
1430 | | // (trunc (1 << Y) to iN) != 0 --> Y u< N |
1431 | 0 | if (C.isZero()) { |
1432 | 0 | auto NewPred = (Pred == Cmp.ICMP_EQ) ? Cmp.ICMP_UGE : Cmp.ICMP_ULT; |
1433 | 0 | return new ICmpInst(NewPred, Y, ConstantInt::get(SrcTy, DstBits)); |
1434 | 0 | } |
1435 | | // (trunc (1 << Y) to iN) == 2**C --> Y == C |
1436 | | // (trunc (1 << Y) to iN) != 2**C --> Y != C |
1437 | 0 | if (C.isPowerOf2()) |
1438 | 0 | return new ICmpInst(Pred, Y, ConstantInt::get(SrcTy, C.logBase2())); |
1439 | 0 | } |
1440 | | |
1441 | 739 | if (Cmp.isEquality() && Trunc->hasOneUse()) { |
1442 | | // Canonicalize to a mask and wider compare if the wide type is suitable: |
1443 | | // (trunc X to i8) == C --> (X & 0xff) == (zext C) |
1444 | 350 | if (!SrcTy->isVectorTy() && shouldChangeType(DstBits, SrcBits)) { |
1445 | 317 | Constant *Mask = |
1446 | 317 | ConstantInt::get(SrcTy, APInt::getLowBitsSet(SrcBits, DstBits)); |
1447 | 317 | Value *And = Builder.CreateAnd(X, Mask); |
1448 | 317 | Constant *WideC = ConstantInt::get(SrcTy, C.zext(SrcBits)); |
1449 | 317 | return new ICmpInst(Pred, And, WideC); |
1450 | 317 | } |
1451 | | |
1452 | | // Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all |
1453 | | // of the high bits truncated out of x are known. |
1454 | 33 | KnownBits Known = computeKnownBits(X, 0, &Cmp); |
1455 | | |
1456 | | // If all the high bits are known, we can do this xform. |
1457 | 33 | if ((Known.Zero | Known.One).countl_one() >= SrcBits - DstBits) { |
1458 | | // Pull in the high bits from known-ones set. |
1459 | 0 | APInt NewRHS = C.zext(SrcBits); |
1460 | 0 | NewRHS |= Known.One & APInt::getHighBitsSet(SrcBits, SrcBits - DstBits); |
1461 | 0 | return new ICmpInst(Pred, X, ConstantInt::get(SrcTy, NewRHS)); |
1462 | 0 | } |
1463 | 33 | } |
1464 | | |
1465 | | // Look through truncated right-shift of the sign-bit for a sign-bit check: |
1466 | | // trunc iN (ShOp >> ShAmtC) to i[N - ShAmtC] < 0 --> ShOp < 0 |
1467 | | // trunc iN (ShOp >> ShAmtC) to i[N - ShAmtC] > -1 --> ShOp > -1 |
1468 | 422 | Value *ShOp; |
1469 | 422 | const APInt *ShAmtC; |
1470 | 422 | bool TrueIfSigned; |
1471 | 422 | if (isSignBitCheck(Pred, C, TrueIfSigned) && |
1472 | 422 | match(X, m_Shr(m_Value(ShOp), m_APInt(ShAmtC))) && |
1473 | 422 | DstBits == SrcBits - ShAmtC->getZExtValue()) { |
1474 | 8 | return TrueIfSigned ? new ICmpInst(ICmpInst::ICMP_SLT, ShOp, |
1475 | 6 | ConstantInt::getNullValue(SrcTy)) |
1476 | 8 | : new ICmpInst(ICmpInst::ICMP_SGT, ShOp, |
1477 | 2 | ConstantInt::getAllOnesValue(SrcTy)); |
1478 | 8 | } |
1479 | | |
1480 | 414 | return nullptr; |
1481 | 422 | } |
1482 | | |
1483 | | /// Fold icmp (trunc X), (trunc Y). |
1484 | | /// Fold icmp (trunc X), (zext Y). |
1485 | | Instruction * |
1486 | | InstCombinerImpl::foldICmpTruncWithTruncOrExt(ICmpInst &Cmp, |
1487 | 88.3k | const SimplifyQuery &Q) { |
1488 | 88.3k | if (Cmp.isSigned()) |
1489 | 27.1k | return nullptr; |
1490 | | |
1491 | 61.2k | Value *X, *Y; |
1492 | 61.2k | ICmpInst::Predicate Pred; |
1493 | 61.2k | bool YIsZext = false; |
1494 | | // Try to match icmp (trunc X), (trunc Y) |
1495 | 61.2k | if (match(&Cmp, m_ICmp(Pred, m_Trunc(m_Value(X)), m_Trunc(m_Value(Y))))) { |
1496 | 3.97k | if (X->getType() != Y->getType() && |
1497 | 3.97k | (!Cmp.getOperand(0)->hasOneUse() || !Cmp.getOperand(1)->hasOneUse())) |
1498 | 0 | return nullptr; |
1499 | 3.97k | if (!isDesirableIntType(X->getType()->getScalarSizeInBits()) && |
1500 | 3.97k | isDesirableIntType(Y->getType()->getScalarSizeInBits())) { |
1501 | 0 | std::swap(X, Y); |
1502 | 0 | Pred = Cmp.getSwappedPredicate(Pred); |
1503 | 0 | } |
1504 | 3.97k | } |
1505 | | // Try to match icmp (trunc X), (zext Y) |
1506 | 57.2k | else if (match(&Cmp, m_c_ICmp(Pred, m_Trunc(m_Value(X)), |
1507 | 57.2k | m_OneUse(m_ZExt(m_Value(Y)))))) |
1508 | | |
1509 | 4 | YIsZext = true; |
1510 | 57.2k | else |
1511 | 57.2k | return nullptr; |
1512 | | |
1513 | 3.98k | Type *TruncTy = Cmp.getOperand(0)->getType(); |
1514 | 3.98k | unsigned TruncBits = TruncTy->getScalarSizeInBits(); |
1515 | | |
1516 | | // If this transform will end up changing from desirable types -> undesirable |
1517 | | // types skip it. |
1518 | 3.98k | if (isDesirableIntType(TruncBits) && |
1519 | 3.98k | !isDesirableIntType(X->getType()->getScalarSizeInBits())) |
1520 | 1 | return nullptr; |
1521 | | |
1522 | | // Check if the trunc is unneeded. |
1523 | 3.98k | KnownBits KnownX = llvm::computeKnownBits(X, /*Depth*/ 0, Q); |
1524 | 3.98k | if (KnownX.countMaxActiveBits() > TruncBits) |
1525 | 3.79k | return nullptr; |
1526 | | |
1527 | 183 | if (!YIsZext) { |
1528 | | // If Y is also a trunc, make sure it is unneeded. |
1529 | 183 | KnownBits KnownY = llvm::computeKnownBits(Y, /*Depth*/ 0, Q); |
1530 | 183 | if (KnownY.countMaxActiveBits() > TruncBits) |
1531 | 143 | return nullptr; |
1532 | 183 | } |
1533 | | |
1534 | 40 | Value *NewY = Builder.CreateZExtOrTrunc(Y, X->getType()); |
1535 | 40 | return new ICmpInst(Pred, X, NewY); |
1536 | 183 | } |
1537 | | |
1538 | | /// Fold icmp (xor X, Y), C. |
1539 | | Instruction *InstCombinerImpl::foldICmpXorConstant(ICmpInst &Cmp, |
1540 | | BinaryOperator *Xor, |
1541 | 775 | const APInt &C) { |
1542 | 775 | if (Instruction *I = foldICmpXorShiftConst(Cmp, Xor, C)) |
1543 | 0 | return I; |
1544 | | |
1545 | 775 | Value *X = Xor->getOperand(0); |
1546 | 775 | Value *Y = Xor->getOperand(1); |
1547 | 775 | const APInt *XorC; |
1548 | 775 | if (!match(Y, m_APInt(XorC))) |
1549 | 273 | return nullptr; |
1550 | | |
1551 | | // If this is a comparison that tests the signbit (X < 0) or (x > -1), |
1552 | | // fold the xor. |
1553 | 502 | ICmpInst::Predicate Pred = Cmp.getPredicate(); |
1554 | 502 | bool TrueIfSigned = false; |
1555 | 502 | if (isSignBitCheck(Cmp.getPredicate(), C, TrueIfSigned)) { |
1556 | | |
1557 | | // If the sign bit of the XorCst is not set, there is no change to |
1558 | | // the operation, just stop using the Xor. |
1559 | 48 | if (!XorC->isNegative()) |
1560 | 5 | return replaceOperand(Cmp, 0, X); |
1561 | | |
1562 | | // Emit the opposite comparison. |
1563 | 43 | if (TrueIfSigned) |
1564 | 19 | return new ICmpInst(ICmpInst::ICMP_SGT, X, |
1565 | 19 | ConstantInt::getAllOnesValue(X->getType())); |
1566 | 24 | else |
1567 | 24 | return new ICmpInst(ICmpInst::ICMP_SLT, X, |
1568 | 24 | ConstantInt::getNullValue(X->getType())); |
1569 | 43 | } |
1570 | | |
1571 | 454 | if (Xor->hasOneUse()) { |
1572 | | // (icmp u/s (xor X SignMask), C) -> (icmp s/u X, (xor C SignMask)) |
1573 | 120 | if (!Cmp.isEquality() && XorC->isSignMask()) { |
1574 | 21 | Pred = Cmp.getFlippedSignednessPredicate(); |
1575 | 21 | return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), C ^ *XorC)); |
1576 | 21 | } |
1577 | | |
1578 | | // (icmp u/s (xor X ~SignMask), C) -> (icmp s/u X, (xor C ~SignMask)) |
1579 | 99 | if (!Cmp.isEquality() && XorC->isMaxSignedValue()) { |
1580 | 15 | Pred = Cmp.getFlippedSignednessPredicate(); |
1581 | 15 | Pred = Cmp.getSwappedPredicate(Pred); |
1582 | 15 | return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), C ^ *XorC)); |
1583 | 15 | } |
1584 | 99 | } |
1585 | | |
1586 | | // Mask constant magic can eliminate an 'xor' with unsigned compares. |
1587 | 418 | if (Pred == ICmpInst::ICMP_UGT) { |
1588 | | // (xor X, ~C) >u C --> X <u ~C (when C+1 is a power of 2) |
1589 | 58 | if (*XorC == ~C && (C + 1).isPowerOf2()) |
1590 | 0 | return new ICmpInst(ICmpInst::ICMP_ULT, X, Y); |
1591 | | // (xor X, C) >u C --> X >u C (when C+1 is a power of 2) |
1592 | 58 | if (*XorC == C && (C + 1).isPowerOf2()) |
1593 | 5 | return new ICmpInst(ICmpInst::ICMP_UGT, X, Y); |
1594 | 58 | } |
1595 | 413 | if (Pred == ICmpInst::ICMP_ULT) { |
1596 | | // (xor X, -C) <u C --> X >u ~C (when C is a power of 2) |
1597 | 95 | if (*XorC == -C && C.isPowerOf2()) |
1598 | 2 | return new ICmpInst(ICmpInst::ICMP_UGT, X, |
1599 | 2 | ConstantInt::get(X->getType(), ~C)); |
1600 | | // (xor X, C) <u C --> X >u ~C (when -C is a power of 2) |
1601 | 93 | if (*XorC == C && (-C).isPowerOf2()) |
1602 | 1 | return new ICmpInst(ICmpInst::ICMP_UGT, X, |
1603 | 1 | ConstantInt::get(X->getType(), ~C)); |
1604 | 93 | } |
1605 | 410 | return nullptr; |
1606 | 413 | } |
1607 | | |
1608 | | /// For power-of-2 C: |
1609 | | /// ((X s>> ShiftC) ^ X) u< C --> (X + C) u< (C << 1) |
1610 | | /// ((X s>> ShiftC) ^ X) u> (C - 1) --> (X + C) u> ((C << 1) - 1) |
1611 | | Instruction *InstCombinerImpl::foldICmpXorShiftConst(ICmpInst &Cmp, |
1612 | | BinaryOperator *Xor, |
1613 | 775 | const APInt &C) { |
1614 | 775 | CmpInst::Predicate Pred = Cmp.getPredicate(); |
1615 | 775 | APInt PowerOf2; |
1616 | 775 | if (Pred == ICmpInst::ICMP_ULT) |
1617 | 115 | PowerOf2 = C; |
1618 | 660 | else if (Pred == ICmpInst::ICMP_UGT && !C.isMaxValue()) |
1619 | 165 | PowerOf2 = C + 1; |
1620 | 495 | else |
1621 | 495 | return nullptr; |
1622 | 280 | if (!PowerOf2.isPowerOf2()) |
1623 | 135 | return nullptr; |
1624 | 145 | Value *X; |
1625 | 145 | const APInt *ShiftC; |
1626 | 145 | if (!match(Xor, m_OneUse(m_c_Xor(m_Value(X), |
1627 | 145 | m_AShr(m_Deferred(X), m_APInt(ShiftC)))))) |
1628 | 145 | return nullptr; |
1629 | 0 | uint64_t Shift = ShiftC->getLimitedValue(); |
1630 | 0 | Type *XType = X->getType(); |
1631 | 0 | if (Shift == 0 || PowerOf2.isMinSignedValue()) |
1632 | 0 | return nullptr; |
1633 | 0 | Value *Add = Builder.CreateAdd(X, ConstantInt::get(XType, PowerOf2)); |
1634 | 0 | APInt Bound = |
1635 | 0 | Pred == ICmpInst::ICMP_ULT ? PowerOf2 << 1 : ((PowerOf2 << 1) - 1); |
1636 | 0 | return new ICmpInst(Pred, Add, ConstantInt::get(XType, Bound)); |
1637 | 0 | } |
1638 | | |
1639 | | /// Fold icmp (and (sh X, Y), C2), C1. |
1640 | | Instruction *InstCombinerImpl::foldICmpAndShift(ICmpInst &Cmp, |
1641 | | BinaryOperator *And, |
1642 | | const APInt &C1, |
1643 | 5.72k | const APInt &C2) { |
1644 | 5.72k | BinaryOperator *Shift = dyn_cast<BinaryOperator>(And->getOperand(0)); |
1645 | 5.72k | if (!Shift || !Shift->isShift()) |
1646 | 5.34k | return nullptr; |
1647 | | |
1648 | | // If this is: (X >> C3) & C2 != C1 (where any shift and any compare could |
1649 | | // exist), turn it into (X & (C2 << C3)) != (C1 << C3). This happens a LOT in |
1650 | | // code produced by the clang front-end, for bitfield access. |
1651 | | // This seemingly simple opportunity to fold away a shift turns out to be |
1652 | | // rather complicated. See PR17827 for details. |
1653 | 387 | unsigned ShiftOpcode = Shift->getOpcode(); |
1654 | 387 | bool IsShl = ShiftOpcode == Instruction::Shl; |
1655 | 387 | const APInt *C3; |
1656 | 387 | if (match(Shift->getOperand(1), m_APInt(C3))) { |
1657 | 326 | APInt NewAndCst, NewCmpCst; |
1658 | 326 | bool AnyCmpCstBitsShiftedOut; |
1659 | 326 | if (ShiftOpcode == Instruction::Shl) { |
1660 | | // For a left shift, we can fold if the comparison is not signed. We can |
1661 | | // also fold a signed comparison if the mask value and comparison value |
1662 | | // are not negative. These constraints may not be obvious, but we can |
1663 | | // prove that they are correct using an SMT solver. |
1664 | 16 | if (Cmp.isSigned() && (C2.isNegative() || C1.isNegative())) |
1665 | 10 | return nullptr; |
1666 | | |
1667 | 6 | NewCmpCst = C1.lshr(*C3); |
1668 | 6 | NewAndCst = C2.lshr(*C3); |
1669 | 6 | AnyCmpCstBitsShiftedOut = NewCmpCst.shl(*C3) != C1; |
1670 | 310 | } else if (ShiftOpcode == Instruction::LShr) { |
1671 | | // For a logical right shift, we can fold if the comparison is not signed. |
1672 | | // We can also fold a signed comparison if the shifted mask value and the |
1673 | | // shifted comparison value are not negative. These constraints may not be |
1674 | | // obvious, but we can prove that they are correct using an SMT solver. |
1675 | 278 | NewCmpCst = C1.shl(*C3); |
1676 | 278 | NewAndCst = C2.shl(*C3); |
1677 | 278 | AnyCmpCstBitsShiftedOut = NewCmpCst.lshr(*C3) != C1; |
1678 | 278 | if (Cmp.isSigned() && (NewAndCst.isNegative() || NewCmpCst.isNegative())) |
1679 | 0 | return nullptr; |
1680 | 278 | } else { |
1681 | | // For an arithmetic shift, check that both constants don't use (in a |
1682 | | // signed sense) the top bits being shifted out. |
1683 | 32 | assert(ShiftOpcode == Instruction::AShr && "Unknown shift opcode"); |
1684 | 0 | NewCmpCst = C1.shl(*C3); |
1685 | 32 | NewAndCst = C2.shl(*C3); |
1686 | 32 | AnyCmpCstBitsShiftedOut = NewCmpCst.ashr(*C3) != C1; |
1687 | 32 | if (NewAndCst.ashr(*C3) != C2) |
1688 | 8 | return nullptr; |
1689 | 32 | } |
1690 | | |
1691 | 308 | if (AnyCmpCstBitsShiftedOut) { |
1692 | | // If we shifted bits out, the fold is not going to work out. As a |
1693 | | // special case, check to see if this means that the result is always |
1694 | | // true or false now. |
1695 | 3 | if (Cmp.getPredicate() == ICmpInst::ICMP_EQ) |
1696 | 1 | return replaceInstUsesWith(Cmp, ConstantInt::getFalse(Cmp.getType())); |
1697 | 2 | if (Cmp.getPredicate() == ICmpInst::ICMP_NE) |
1698 | 1 | return replaceInstUsesWith(Cmp, ConstantInt::getTrue(Cmp.getType())); |
1699 | 305 | } else { |
1700 | 305 | Value *NewAnd = Builder.CreateAnd( |
1701 | 305 | Shift->getOperand(0), ConstantInt::get(And->getType(), NewAndCst)); |
1702 | 305 | return new ICmpInst(Cmp.getPredicate(), |
1703 | 305 | NewAnd, ConstantInt::get(And->getType(), NewCmpCst)); |
1704 | 305 | } |
1705 | 308 | } |
1706 | | |
1707 | | // Turn ((X >> Y) & C2) == 0 into (X & (C2 << Y)) == 0. The latter is |
1708 | | // preferable because it allows the C2 << Y expression to be hoisted out of a |
1709 | | // loop if Y is invariant and X is not. |
1710 | 62 | if (Shift->hasOneUse() && C1.isZero() && Cmp.isEquality() && |
1711 | 62 | !Shift->isArithmeticShift() && !isa<Constant>(Shift->getOperand(0))) { |
1712 | | // Compute C2 << Y. |
1713 | 8 | Value *NewShift = |
1714 | 8 | IsShl ? Builder.CreateLShr(And->getOperand(1), Shift->getOperand(1)) |
1715 | 8 | : Builder.CreateShl(And->getOperand(1), Shift->getOperand(1)); |
1716 | | |
1717 | | // Compute X & (C2 << Y). |
1718 | 8 | Value *NewAnd = Builder.CreateAnd(Shift->getOperand(0), NewShift); |
1719 | 8 | return replaceOperand(Cmp, 0, NewAnd); |
1720 | 8 | } |
1721 | | |
1722 | 54 | return nullptr; |
1723 | 62 | } |
1724 | | |
1725 | | /// Fold icmp (and X, C2), C1. |
1726 | | Instruction *InstCombinerImpl::foldICmpAndConstConst(ICmpInst &Cmp, |
1727 | | BinaryOperator *And, |
1728 | 7.86k | const APInt &C1) { |
1729 | 7.86k | bool isICMP_NE = Cmp.getPredicate() == ICmpInst::ICMP_NE; |
1730 | | |
1731 | | // For vectors: icmp ne (and X, 1), 0 --> trunc X to N x i1 |
1732 | | // TODO: We canonicalize to the longer form for scalars because we have |
1733 | | // better analysis/folds for icmp, and codegen may be better with icmp. |
1734 | 7.86k | if (isICMP_NE && Cmp.getType()->isVectorTy() && C1.isZero() && |
1735 | 7.86k | match(And->getOperand(1), m_One())) |
1736 | 67 | return new TruncInst(And->getOperand(0), Cmp.getType()); |
1737 | | |
1738 | 7.80k | const APInt *C2; |
1739 | 7.80k | Value *X; |
1740 | 7.80k | if (!match(And, m_And(m_Value(X), m_APInt(C2)))) |
1741 | 1.30k | return nullptr; |
1742 | | |
1743 | | // Don't perform the following transforms if the AND has multiple uses |
1744 | 6.49k | if (!And->hasOneUse()) |
1745 | 660 | return nullptr; |
1746 | | |
1747 | 5.83k | if (Cmp.isEquality() && C1.isZero()) { |
1748 | | // Restrict this fold to single-use 'and' (PR10267). |
1749 | | // Replace (and X, (1 << size(X)-1) != 0) with X s< 0 |
1750 | 3.83k | if (C2->isSignMask()) { |
1751 | 46 | Constant *Zero = Constant::getNullValue(X->getType()); |
1752 | 46 | auto NewPred = isICMP_NE ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE; |
1753 | 46 | return new ICmpInst(NewPred, X, Zero); |
1754 | 46 | } |
1755 | | |
1756 | 3.79k | APInt NewC2 = *C2; |
1757 | 3.79k | KnownBits Know = computeKnownBits(And->getOperand(0), 0, And); |
1758 | | // Set high zeros of C2 to allow matching negated power-of-2. |
1759 | 3.79k | NewC2 = *C2 | APInt::getHighBitsSet(C2->getBitWidth(), |
1760 | 3.79k | Know.countMinLeadingZeros()); |
1761 | | |
1762 | | // Restrict this fold only for single-use 'and' (PR10267). |
1763 | | // ((%x & C) == 0) --> %x u< (-C) iff (-C) is power of two. |
1764 | 3.79k | if (NewC2.isNegatedPowerOf2()) { |
1765 | 54 | Constant *NegBOC = ConstantInt::get(And->getType(), -NewC2); |
1766 | 54 | auto NewPred = isICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT; |
1767 | 54 | return new ICmpInst(NewPred, X, NegBOC); |
1768 | 54 | } |
1769 | 3.79k | } |
1770 | | |
1771 | | // If the LHS is an 'and' of a truncate and we can widen the and/compare to |
1772 | | // the input width without changing the value produced, eliminate the cast: |
1773 | | // |
1774 | | // icmp (and (trunc W), C2), C1 -> icmp (and W, C2'), C1' |
1775 | | // |
1776 | | // We can do this transformation if the constants do not have their sign bits |
1777 | | // set or if it is an equality comparison. Extending a relational comparison |
1778 | | // when we're checking the sign bit would not work. |
1779 | 5.73k | Value *W; |
1780 | 5.73k | if (match(And->getOperand(0), m_OneUse(m_Trunc(m_Value(W)))) && |
1781 | 5.73k | (Cmp.isEquality() || (!C1.isNegative() && !C2->isNegative()))) { |
1782 | | // TODO: Is this a good transform for vectors? Wider types may reduce |
1783 | | // throughput. Should this transform be limited (even for scalars) by using |
1784 | | // shouldChangeType()? |
1785 | 10 | if (!Cmp.getType()->isVectorTy()) { |
1786 | 10 | Type *WideType = W->getType(); |
1787 | 10 | unsigned WideScalarBits = WideType->getScalarSizeInBits(); |
1788 | 10 | Constant *ZextC1 = ConstantInt::get(WideType, C1.zext(WideScalarBits)); |
1789 | 10 | Constant *ZextC2 = ConstantInt::get(WideType, C2->zext(WideScalarBits)); |
1790 | 10 | Value *NewAnd = Builder.CreateAnd(W, ZextC2, And->getName()); |
1791 | 10 | return new ICmpInst(Cmp.getPredicate(), NewAnd, ZextC1); |
1792 | 10 | } |
1793 | 10 | } |
1794 | | |
1795 | 5.72k | if (Instruction *I = foldICmpAndShift(Cmp, And, C1, *C2)) |
1796 | 315 | return I; |
1797 | | |
1798 | | // (icmp pred (and (or (lshr A, B), A), 1), 0) --> |
1799 | | // (icmp pred (and A, (or (shl 1, B), 1), 0)) |
1800 | | // |
1801 | | // iff pred isn't signed |
1802 | 5.41k | if (!Cmp.isSigned() && C1.isZero() && And->getOperand(0)->hasOneUse() && |
1803 | 5.41k | match(And->getOperand(1), m_One())) { |
1804 | 626 | Constant *One = cast<Constant>(And->getOperand(1)); |
1805 | 626 | Value *Or = And->getOperand(0); |
1806 | 626 | Value *A, *B, *LShr; |
1807 | 626 | if (match(Or, m_Or(m_Value(LShr), m_Value(A))) && |
1808 | 626 | match(LShr, m_LShr(m_Specific(A), m_Value(B)))) { |
1809 | 3 | unsigned UsesRemoved = 0; |
1810 | 3 | if (And->hasOneUse()) |
1811 | 3 | ++UsesRemoved; |
1812 | 3 | if (Or->hasOneUse()) |
1813 | 3 | ++UsesRemoved; |
1814 | 3 | if (LShr->hasOneUse()) |
1815 | 3 | ++UsesRemoved; |
1816 | | |
1817 | | // Compute A & ((1 << B) | 1) |
1818 | 3 | unsigned RequireUsesRemoved = match(B, m_ImmConstant()) ? 1 : 3; |
1819 | 3 | if (UsesRemoved >= RequireUsesRemoved) { |
1820 | 3 | Value *NewOr = |
1821 | 3 | Builder.CreateOr(Builder.CreateShl(One, B, LShr->getName(), |
1822 | 3 | /*HasNUW=*/true), |
1823 | 3 | One, Or->getName()); |
1824 | 3 | Value *NewAnd = Builder.CreateAnd(A, NewOr, And->getName()); |
1825 | 3 | return replaceOperand(Cmp, 0, NewAnd); |
1826 | 3 | } |
1827 | 3 | } |
1828 | 626 | } |
1829 | | |
1830 | 5.41k | return nullptr; |
1831 | 5.41k | } |
1832 | | |
1833 | | /// Fold icmp (and X, Y), C. |
1834 | | Instruction *InstCombinerImpl::foldICmpAndConstant(ICmpInst &Cmp, |
1835 | | BinaryOperator *And, |
1836 | 7.86k | const APInt &C) { |
1837 | 7.86k | if (Instruction *I = foldICmpAndConstConst(Cmp, And, C)) |
1838 | 495 | return I; |
1839 | | |
1840 | 7.37k | const ICmpInst::Predicate Pred = Cmp.getPredicate(); |
1841 | 7.37k | bool TrueIfNeg; |
1842 | 7.37k | if (isSignBitCheck(Pred, C, TrueIfNeg)) { |
1843 | | // ((X - 1) & ~X) < 0 --> X == 0 |
1844 | | // ((X - 1) & ~X) >= 0 --> X != 0 |
1845 | 28 | Value *X; |
1846 | 28 | if (match(And->getOperand(0), m_Add(m_Value(X), m_AllOnes())) && |
1847 | 28 | match(And->getOperand(1), m_Not(m_Specific(X)))) { |
1848 | 3 | auto NewPred = TrueIfNeg ? CmpInst::ICMP_EQ : CmpInst::ICMP_NE; |
1849 | 3 | return new ICmpInst(NewPred, X, ConstantInt::getNullValue(X->getType())); |
1850 | 3 | } |
1851 | | // (X & X) < 0 --> X == MinSignedC |
1852 | | // (X & X) > -1 --> X != MinSignedC |
1853 | 25 | if (match(And, m_c_And(m_Neg(m_Value(X)), m_Deferred(X)))) { |
1854 | 0 | Constant *MinSignedC = ConstantInt::get( |
1855 | 0 | X->getType(), |
1856 | 0 | APInt::getSignedMinValue(X->getType()->getScalarSizeInBits())); |
1857 | 0 | auto NewPred = TrueIfNeg ? CmpInst::ICMP_EQ : CmpInst::ICMP_NE; |
1858 | 0 | return new ICmpInst(NewPred, X, MinSignedC); |
1859 | 0 | } |
1860 | 25 | } |
1861 | | |
1862 | | // TODO: These all require that Y is constant too, so refactor with the above. |
1863 | | |
1864 | | // Try to optimize things like "A[i] & 42 == 0" to index computations. |
1865 | 7.36k | Value *X = And->getOperand(0); |
1866 | 7.36k | Value *Y = And->getOperand(1); |
1867 | 7.36k | if (auto *C2 = dyn_cast<ConstantInt>(Y)) |
1868 | 5.68k | if (auto *LI = dyn_cast<LoadInst>(X)) |
1869 | 468 | if (auto *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0))) |
1870 | 62 | if (auto *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0))) |
1871 | 10 | if (Instruction *Res = |
1872 | 10 | foldCmpLoadFromIndexedGlobal(LI, GEP, GV, Cmp, C2)) |
1873 | 7 | return Res; |
1874 | | |
1875 | 7.36k | if (!Cmp.isEquality()) |
1876 | 234 | return nullptr; |
1877 | | |
1878 | | // X & -C == -C -> X > u ~C |
1879 | | // X & -C != -C -> X <= u ~C |
1880 | | // iff C is a power of 2 |
1881 | 7.12k | if (Cmp.getOperand(1) == Y && C.isNegatedPowerOf2()) { |
1882 | 26 | auto NewPred = |
1883 | 26 | Pred == CmpInst::ICMP_EQ ? CmpInst::ICMP_UGT : CmpInst::ICMP_ULE; |
1884 | 26 | return new ICmpInst(NewPred, X, SubOne(cast<Constant>(Cmp.getOperand(1)))); |
1885 | 26 | } |
1886 | | |
1887 | | // If we are testing the intersection of 2 select-of-nonzero-constants with no |
1888 | | // common bits set, it's the same as checking if exactly one select condition |
1889 | | // is set: |
1890 | | // ((A ? TC : FC) & (B ? TC : FC)) == 0 --> xor A, B |
1891 | | // ((A ? TC : FC) & (B ? TC : FC)) != 0 --> not(xor A, B) |
1892 | | // TODO: Generalize for non-constant values. |
1893 | | // TODO: Handle signed/unsigned predicates. |
1894 | | // TODO: Handle other bitwise logic connectors. |
1895 | | // TODO: Extend to handle a non-zero compare constant. |
1896 | 7.10k | if (C.isZero() && (Pred == CmpInst::ICMP_EQ || And->hasOneUse())) { |
1897 | 5.03k | assert(Cmp.isEquality() && "Not expecting non-equality predicates"); |
1898 | 0 | Value *A, *B; |
1899 | 5.03k | const APInt *TC, *FC; |
1900 | 5.03k | if (match(X, m_Select(m_Value(A), m_APInt(TC), m_APInt(FC))) && |
1901 | 5.03k | match(Y, |
1902 | 0 | m_Select(m_Value(B), m_SpecificInt(*TC), m_SpecificInt(*FC))) && |
1903 | 5.03k | !TC->isZero() && !FC->isZero() && !TC->intersects(*FC)) { |
1904 | 0 | Value *R = Builder.CreateXor(A, B); |
1905 | 0 | if (Pred == CmpInst::ICMP_NE) |
1906 | 0 | R = Builder.CreateNot(R); |
1907 | 0 | return replaceInstUsesWith(Cmp, R); |
1908 | 0 | } |
1909 | 5.03k | } |
1910 | | |
1911 | | // ((zext i1 X) & Y) == 0 --> !((trunc Y) & X) |
1912 | | // ((zext i1 X) & Y) != 0 --> ((trunc Y) & X) |
1913 | | // ((zext i1 X) & Y) == 1 --> ((trunc Y) & X) |
1914 | | // ((zext i1 X) & Y) != 1 --> !((trunc Y) & X) |
1915 | 7.10k | if (match(And, m_OneUse(m_c_And(m_OneUse(m_ZExt(m_Value(X))), m_Value(Y)))) && |
1916 | 7.10k | X->getType()->isIntOrIntVectorTy(1) && (C.isZero() || C.isOne())) { |
1917 | 0 | Value *TruncY = Builder.CreateTrunc(Y, X->getType()); |
1918 | 0 | if (C.isZero() ^ (Pred == CmpInst::ICMP_NE)) { |
1919 | 0 | Value *And = Builder.CreateAnd(TruncY, X); |
1920 | 0 | return BinaryOperator::CreateNot(And); |
1921 | 0 | } |
1922 | 0 | return BinaryOperator::CreateAnd(TruncY, X); |
1923 | 0 | } |
1924 | | |
1925 | 7.10k | return nullptr; |
1926 | 7.10k | } |
1927 | | |
1928 | | /// Fold icmp eq/ne (or (xor/sub (X1, X2), xor/sub (X3, X4))), 0. |
1929 | | static Value *foldICmpOrXorSubChain(ICmpInst &Cmp, BinaryOperator *Or, |
1930 | 62 | InstCombiner::BuilderTy &Builder) { |
1931 | | // Are we using xors or subs to bitwise check for a pair or pairs of |
1932 | | // (in)equalities? Convert to a shorter form that has more potential to be |
1933 | | // folded even further. |
1934 | | // ((X1 ^/- X2) || (X3 ^/- X4)) == 0 --> (X1 == X2) && (X3 == X4) |
1935 | | // ((X1 ^/- X2) || (X3 ^/- X4)) != 0 --> (X1 != X2) || (X3 != X4) |
1936 | | // ((X1 ^/- X2) || (X3 ^/- X4) || (X5 ^/- X6)) == 0 --> |
1937 | | // (X1 == X2) && (X3 == X4) && (X5 == X6) |
1938 | | // ((X1 ^/- X2) || (X3 ^/- X4) || (X5 ^/- X6)) != 0 --> |
1939 | | // (X1 != X2) || (X3 != X4) || (X5 != X6) |
1940 | 62 | SmallVector<std::pair<Value *, Value *>, 2> CmpValues; |
1941 | 62 | SmallVector<Value *, 16> WorkList(1, Or); |
1942 | | |
1943 | 124 | while (!WorkList.empty()) { |
1944 | 124 | auto MatchOrOperatorArgument = [&](Value *OrOperatorArgument) { |
1945 | 124 | Value *Lhs, *Rhs; |
1946 | | |
1947 | 124 | if (match(OrOperatorArgument, |
1948 | 124 | m_OneUse(m_Xor(m_Value(Lhs), m_Value(Rhs))))) { |
1949 | 6 | CmpValues.emplace_back(Lhs, Rhs); |
1950 | 6 | return; |
1951 | 6 | } |
1952 | | |
1953 | 118 | if (match(OrOperatorArgument, |
1954 | 118 | m_OneUse(m_Sub(m_Value(Lhs), m_Value(Rhs))))) { |
1955 | 1 | CmpValues.emplace_back(Lhs, Rhs); |
1956 | 1 | return; |
1957 | 1 | } |
1958 | | |
1959 | 117 | WorkList.push_back(OrOperatorArgument); |
1960 | 117 | }; |
1961 | | |
1962 | 121 | Value *CurrentValue = WorkList.pop_back_val(); |
1963 | 121 | Value *OrOperatorLhs, *OrOperatorRhs; |
1964 | | |
1965 | 121 | if (!match(CurrentValue, |
1966 | 121 | m_Or(m_Value(OrOperatorLhs), m_Value(OrOperatorRhs)))) { |
1967 | 59 | return nullptr; |
1968 | 59 | } |
1969 | | |
1970 | 62 | MatchOrOperatorArgument(OrOperatorRhs); |
1971 | 62 | MatchOrOperatorArgument(OrOperatorLhs); |
1972 | 62 | } |
1973 | | |
1974 | 3 | ICmpInst::Predicate Pred = Cmp.getPredicate(); |
1975 | 3 | auto BOpc = Pred == CmpInst::ICMP_EQ ? Instruction::And : Instruction::Or; |
1976 | 3 | Value *LhsCmp = Builder.CreateICmp(Pred, CmpValues.rbegin()->first, |
1977 | 3 | CmpValues.rbegin()->second); |
1978 | | |
1979 | 6 | for (auto It = CmpValues.rbegin() + 1; It != CmpValues.rend(); ++It) { |
1980 | 3 | Value *RhsCmp = Builder.CreateICmp(Pred, It->first, It->second); |
1981 | 3 | LhsCmp = Builder.CreateBinOp(BOpc, LhsCmp, RhsCmp); |
1982 | 3 | } |
1983 | | |
1984 | 3 | return LhsCmp; |
1985 | 62 | } |
1986 | | |
1987 | | /// Fold icmp (or X, Y), C. |
1988 | | Instruction *InstCombinerImpl::foldICmpOrConstant(ICmpInst &Cmp, |
1989 | | BinaryOperator *Or, |
1990 | 393 | const APInt &C) { |
1991 | 393 | ICmpInst::Predicate Pred = Cmp.getPredicate(); |
1992 | 393 | if (C.isOne()) { |
1993 | | // icmp slt signum(V) 1 --> icmp slt V, 1 |
1994 | 27 | Value *V = nullptr; |
1995 | 27 | if (Pred == ICmpInst::ICMP_SLT && match(Or, m_Signum(m_Value(V)))) |
1996 | 0 | return new ICmpInst(ICmpInst::ICMP_SLT, V, |
1997 | 0 | ConstantInt::get(V->getType(), 1)); |
1998 | 27 | } |
1999 | | |
2000 | 393 | Value *OrOp0 = Or->getOperand(0), *OrOp1 = Or->getOperand(1); |
2001 | 393 | const APInt *MaskC; |
2002 | 393 | if (match(OrOp1, m_APInt(MaskC)) && Cmp.isEquality()) { |
2003 | 83 | if (*MaskC == C && (C + 1).isPowerOf2()) { |
2004 | | // X | C == C --> X <=u C |
2005 | | // X | C != C --> X >u C |
2006 | | // iff C+1 is a power of 2 (C is a bitmask of the low bits) |
2007 | 19 | Pred = (Pred == CmpInst::ICMP_EQ) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT; |
2008 | 19 | return new ICmpInst(Pred, OrOp0, OrOp1); |
2009 | 19 | } |
2010 | | |
2011 | | // More general: canonicalize 'equality with set bits mask' to |
2012 | | // 'equality with clear bits mask'. |
2013 | | // (X | MaskC) == C --> (X & ~MaskC) == C ^ MaskC |
2014 | | // (X | MaskC) != C --> (X & ~MaskC) != C ^ MaskC |
2015 | 64 | if (Or->hasOneUse()) { |
2016 | 28 | Value *And = Builder.CreateAnd(OrOp0, ~(*MaskC)); |
2017 | 28 | Constant *NewC = ConstantInt::get(Or->getType(), C ^ (*MaskC)); |
2018 | 28 | return new ICmpInst(Pred, And, NewC); |
2019 | 28 | } |
2020 | 64 | } |
2021 | | |
2022 | | // (X | (X-1)) s< 0 --> X s< 1 |
2023 | | // (X | (X-1)) s> -1 --> X s> 0 |
2024 | 346 | Value *X; |
2025 | 346 | bool TrueIfSigned; |
2026 | 346 | if (isSignBitCheck(Pred, C, TrueIfSigned) && |
2027 | 346 | match(Or, m_c_Or(m_Add(m_Value(X), m_AllOnes()), m_Deferred(X)))) { |
2028 | 0 | auto NewPred = TrueIfSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGT; |
2029 | 0 | Constant *NewC = ConstantInt::get(X->getType(), TrueIfSigned ? 1 : 0); |
2030 | 0 | return new ICmpInst(NewPred, X, NewC); |
2031 | 0 | } |
2032 | | |
2033 | 346 | const APInt *OrC; |
2034 | | // icmp(X | OrC, C) --> icmp(X, 0) |
2035 | 346 | if (C.isNonNegative() && match(Or, m_Or(m_Value(X), m_APInt(OrC)))) { |
2036 | 107 | switch (Pred) { |
2037 | | // X | OrC s< C --> X s< 0 iff OrC s>= C s>= 0 |
2038 | 11 | case ICmpInst::ICMP_SLT: |
2039 | | // X | OrC s>= C --> X s>= 0 iff OrC s>= C s>= 0 |
2040 | 11 | case ICmpInst::ICMP_SGE: |
2041 | 11 | if (OrC->sge(C)) |
2042 | 10 | return new ICmpInst(Pred, X, ConstantInt::getNullValue(X->getType())); |
2043 | 1 | break; |
2044 | | // X | OrC s<= C --> X s< 0 iff OrC s> C s>= 0 |
2045 | 1 | case ICmpInst::ICMP_SLE: |
2046 | | // X | OrC s> C --> X s>= 0 iff OrC s> C s>= 0 |
2047 | 14 | case ICmpInst::ICMP_SGT: |
2048 | 14 | if (OrC->sgt(C)) |
2049 | 13 | return new ICmpInst(ICmpInst::getFlippedStrictnessPredicate(Pred), X, |
2050 | 13 | ConstantInt::getNullValue(X->getType())); |
2051 | 1 | break; |
2052 | 82 | default: |
2053 | 82 | break; |
2054 | 107 | } |
2055 | 107 | } |
2056 | | |
2057 | 323 | if (!Cmp.isEquality() || !C.isZero() || !Or->hasOneUse()) |
2058 | 259 | return nullptr; |
2059 | | |
2060 | 64 | Value *P, *Q; |
2061 | 64 | if (match(Or, m_Or(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Value(Q))))) { |
2062 | | // Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0 |
2063 | | // -> and (icmp eq P, null), (icmp eq Q, null). |
2064 | 2 | Value *CmpP = |
2065 | 2 | Builder.CreateICmp(Pred, P, ConstantInt::getNullValue(P->getType())); |
2066 | 2 | Value *CmpQ = |
2067 | 2 | Builder.CreateICmp(Pred, Q, ConstantInt::getNullValue(Q->getType())); |
2068 | 2 | auto BOpc = Pred == CmpInst::ICMP_EQ ? Instruction::And : Instruction::Or; |
2069 | 2 | return BinaryOperator::Create(BOpc, CmpP, CmpQ); |
2070 | 2 | } |
2071 | | |
2072 | 62 | if (Value *V = foldICmpOrXorSubChain(Cmp, Or, Builder)) |
2073 | 3 | return replaceInstUsesWith(Cmp, V); |
2074 | | |
2075 | 59 | return nullptr; |
2076 | 62 | } |
2077 | | |
2078 | | /// Fold icmp (mul X, Y), C. |
2079 | | Instruction *InstCombinerImpl::foldICmpMulConstant(ICmpInst &Cmp, |
2080 | | BinaryOperator *Mul, |
2081 | 1.20k | const APInt &C) { |
2082 | 1.20k | ICmpInst::Predicate Pred = Cmp.getPredicate(); |
2083 | 1.20k | Type *MulTy = Mul->getType(); |
2084 | 1.20k | Value *X = Mul->getOperand(0); |
2085 | | |
2086 | | // If there's no overflow: |
2087 | | // X * X == 0 --> X == 0 |
2088 | | // X * X != 0 --> X != 0 |
2089 | 1.20k | if (Cmp.isEquality() && C.isZero() && X == Mul->getOperand(1) && |
2090 | 1.20k | (Mul->hasNoUnsignedWrap() || Mul->hasNoSignedWrap())) |
2091 | 33 | return new ICmpInst(Pred, X, ConstantInt::getNullValue(MulTy)); |
2092 | | |
2093 | 1.17k | const APInt *MulC; |
2094 | 1.17k | if (!match(Mul->getOperand(1), m_APInt(MulC))) |
2095 | 645 | return nullptr; |
2096 | | |
2097 | | // If this is a test of the sign bit and the multiply is sign-preserving with |
2098 | | // a constant operand, use the multiply LHS operand instead: |
2099 | | // (X * +MulC) < 0 --> X < 0 |
2100 | | // (X * -MulC) < 0 --> X > 0 |
2101 | 530 | if (isSignTest(Pred, C) && Mul->hasNoSignedWrap()) { |
2102 | 15 | if (MulC->isNegative()) |
2103 | 10 | Pred = ICmpInst::getSwappedPredicate(Pred); |
2104 | 15 | return new ICmpInst(Pred, X, ConstantInt::getNullValue(MulTy)); |
2105 | 15 | } |
2106 | | |
2107 | 515 | if (MulC->isZero()) |
2108 | 0 | return nullptr; |
2109 | | |
2110 | | // If the multiply does not wrap or the constant is odd, try to divide the |
2111 | | // compare constant by the multiplication factor. |
2112 | 515 | if (Cmp.isEquality()) { |
2113 | | // (mul nsw X, MulC) eq/ne C --> X eq/ne C /s MulC |
2114 | 128 | if (Mul->hasNoSignedWrap() && C.srem(*MulC).isZero()) { |
2115 | 3 | Constant *NewC = ConstantInt::get(MulTy, C.sdiv(*MulC)); |
2116 | 3 | return new ICmpInst(Pred, X, NewC); |
2117 | 3 | } |
2118 | | |
2119 | | // C % MulC == 0 is weaker than we could use if MulC is odd because it |
2120 | | // correct to transform if MulC * N == C including overflow. I.e with i8 |
2121 | | // (icmp eq (mul X, 5), 101) -> (icmp eq X, 225) but since 101 % 5 != 0, we |
2122 | | // miss that case. |
2123 | 125 | if (C.urem(*MulC).isZero()) { |
2124 | | // (mul nuw X, MulC) eq/ne C --> X eq/ne C /u MulC |
2125 | | // (mul X, OddC) eq/ne N * C --> X eq/ne N |
2126 | 33 | if ((*MulC & 1).isOne() || Mul->hasNoUnsignedWrap()) { |
2127 | 10 | Constant *NewC = ConstantInt::get(MulTy, C.udiv(*MulC)); |
2128 | 10 | return new ICmpInst(Pred, X, NewC); |
2129 | 10 | } |
2130 | 33 | } |
2131 | 125 | } |
2132 | | |
2133 | | // With a matching no-overflow guarantee, fold the constants: |
2134 | | // (X * MulC) < C --> X < (C / MulC) |
2135 | | // (X * MulC) > C --> X > (C / MulC) |
2136 | | // TODO: Assert that Pred is not equal to SGE, SLE, UGE, ULE? |
2137 | 502 | Constant *NewC = nullptr; |
2138 | 502 | if (Mul->hasNoSignedWrap() && ICmpInst::isSigned(Pred)) { |
2139 | | // MININT / -1 --> overflow. |
2140 | 3 | if (C.isMinSignedValue() && MulC->isAllOnes()) |
2141 | 0 | return nullptr; |
2142 | 3 | if (MulC->isNegative()) |
2143 | 1 | Pred = ICmpInst::getSwappedPredicate(Pred); |
2144 | | |
2145 | 3 | if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGE) { |
2146 | 1 | NewC = ConstantInt::get( |
2147 | 1 | MulTy, APIntOps::RoundingSDiv(C, *MulC, APInt::Rounding::UP)); |
2148 | 2 | } else { |
2149 | 2 | assert((Pred == ICmpInst::ICMP_SLE || Pred == ICmpInst::ICMP_SGT) && |
2150 | 2 | "Unexpected predicate"); |
2151 | 0 | NewC = ConstantInt::get( |
2152 | 2 | MulTy, APIntOps::RoundingSDiv(C, *MulC, APInt::Rounding::DOWN)); |
2153 | 2 | } |
2154 | 499 | } else if (Mul->hasNoUnsignedWrap() && ICmpInst::isUnsigned(Pred)) { |
2155 | 7 | if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE) { |
2156 | 3 | NewC = ConstantInt::get( |
2157 | 3 | MulTy, APIntOps::RoundingUDiv(C, *MulC, APInt::Rounding::UP)); |
2158 | 4 | } else { |
2159 | 4 | assert((Pred == ICmpInst::ICMP_ULE || Pred == ICmpInst::ICMP_UGT) && |
2160 | 4 | "Unexpected predicate"); |
2161 | 0 | NewC = ConstantInt::get( |
2162 | 4 | MulTy, APIntOps::RoundingUDiv(C, *MulC, APInt::Rounding::DOWN)); |
2163 | 4 | } |
2164 | 7 | } |
2165 | | |
2166 | 502 | return NewC ? new ICmpInst(Pred, X, NewC) : nullptr; |
2167 | 502 | } |
2168 | | |
2169 | | /// Fold icmp (shl 1, Y), C. |
2170 | | static Instruction *foldICmpShlOne(ICmpInst &Cmp, Instruction *Shl, |
2171 | 228 | const APInt &C) { |
2172 | 228 | Value *Y; |
2173 | 228 | if (!match(Shl, m_Shl(m_One(), m_Value(Y)))) |
2174 | 182 | return nullptr; |
2175 | | |
2176 | 46 | Type *ShiftType = Shl->getType(); |
2177 | 46 | unsigned TypeBits = C.getBitWidth(); |
2178 | 46 | bool CIsPowerOf2 = C.isPowerOf2(); |
2179 | 46 | ICmpInst::Predicate Pred = Cmp.getPredicate(); |
2180 | 46 | if (Cmp.isUnsigned()) { |
2181 | | // (1 << Y) pred C -> Y pred Log2(C) |
2182 | 26 | if (!CIsPowerOf2) { |
2183 | | // (1 << Y) < 30 -> Y <= 4 |
2184 | | // (1 << Y) <= 30 -> Y <= 4 |
2185 | | // (1 << Y) >= 30 -> Y > 4 |
2186 | | // (1 << Y) > 30 -> Y > 4 |
2187 | 13 | if (Pred == ICmpInst::ICMP_ULT) |
2188 | 7 | Pred = ICmpInst::ICMP_ULE; |
2189 | 6 | else if (Pred == ICmpInst::ICMP_UGE) |
2190 | 0 | Pred = ICmpInst::ICMP_UGT; |
2191 | 13 | } |
2192 | | |
2193 | 26 | unsigned CLog2 = C.logBase2(); |
2194 | 26 | return new ICmpInst(Pred, Y, ConstantInt::get(ShiftType, CLog2)); |
2195 | 26 | } else if (Cmp.isSigned()) { |
2196 | 20 | Constant *BitWidthMinusOne = ConstantInt::get(ShiftType, TypeBits - 1); |
2197 | | // (1 << Y) > 0 -> Y != 31 |
2198 | | // (1 << Y) > C -> Y != 31 if C is negative. |
2199 | 20 | if (Pred == ICmpInst::ICMP_SGT && C.sle(0)) |
2200 | 5 | return new ICmpInst(ICmpInst::ICMP_NE, Y, BitWidthMinusOne); |
2201 | | |
2202 | | // (1 << Y) < 0 -> Y == 31 |
2203 | | // (1 << Y) < 1 -> Y == 31 |
2204 | | // (1 << Y) < C -> Y == 31 if C is negative and not signed min. |
2205 | | // Exclude signed min by subtracting 1 and lower the upper bound to 0. |
2206 | 15 | if (Pred == ICmpInst::ICMP_SLT && (C-1).sle(0)) |
2207 | 7 | return new ICmpInst(ICmpInst::ICMP_EQ, Y, BitWidthMinusOne); |
2208 | 15 | } |
2209 | | |
2210 | 8 | return nullptr; |
2211 | 46 | } |
2212 | | |
2213 | | /// Fold icmp (shl X, Y), C. |
2214 | | Instruction *InstCombinerImpl::foldICmpShlConstant(ICmpInst &Cmp, |
2215 | | BinaryOperator *Shl, |
2216 | 1.12k | const APInt &C) { |
2217 | 1.12k | const APInt *ShiftVal; |
2218 | 1.12k | if (Cmp.isEquality() && match(Shl->getOperand(0), m_APInt(ShiftVal))) |
2219 | 44 | return foldICmpShlConstConst(Cmp, Shl->getOperand(1), C, *ShiftVal); |
2220 | | |
2221 | 1.08k | ICmpInst::Predicate Pred = Cmp.getPredicate(); |
2222 | | // (icmp pred (shl nuw&nsw X, Y), Csle0) |
2223 | | // -> (icmp pred X, Csle0) |
2224 | | // |
2225 | | // The idea is the nuw/nsw essentially freeze the sign bit for the shift op |
2226 | | // so X's must be what is used. |
2227 | 1.08k | if (C.sle(0) && Shl->hasNoUnsignedWrap() && Shl->hasNoSignedWrap()) |
2228 | 54 | return new ICmpInst(Pred, Shl->getOperand(0), Cmp.getOperand(1)); |
2229 | | |
2230 | | // (icmp eq/ne (shl nuw|nsw X, Y), 0) |
2231 | | // -> (icmp eq/ne X, 0) |
2232 | 1.03k | if (ICmpInst::isEquality(Pred) && C.isZero() && |
2233 | 1.03k | (Shl->hasNoUnsignedWrap() || Shl->hasNoSignedWrap())) |
2234 | 61 | return new ICmpInst(Pred, Shl->getOperand(0), Cmp.getOperand(1)); |
2235 | | |
2236 | | // (icmp slt (shl nsw X, Y), 0/1) |
2237 | | // -> (icmp slt X, 0/1) |
2238 | | // (icmp sgt (shl nsw X, Y), 0/-1) |
2239 | | // -> (icmp sgt X, 0/-1) |
2240 | | // |
2241 | | // NB: sge/sle with a constant will canonicalize to sgt/slt. |
2242 | 970 | if (Shl->hasNoSignedWrap() && |
2243 | 970 | (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLT)) |
2244 | 63 | if (C.isZero() || (Pred == ICmpInst::ICMP_SGT ? C.isAllOnes() : C.isOne())) |
2245 | 42 | return new ICmpInst(Pred, Shl->getOperand(0), Cmp.getOperand(1)); |
2246 | | |
2247 | 928 | const APInt *ShiftAmt; |
2248 | 928 | if (!match(Shl->getOperand(1), m_APInt(ShiftAmt))) |
2249 | 228 | return foldICmpShlOne(Cmp, Shl, C); |
2250 | | |
2251 | | // Check that the shift amount is in range. If not, don't perform undefined |
2252 | | // shifts. When the shift is visited, it will be simplified. |
2253 | 700 | unsigned TypeBits = C.getBitWidth(); |
2254 | 700 | if (ShiftAmt->uge(TypeBits)) |
2255 | 0 | return nullptr; |
2256 | | |
2257 | 700 | Value *X = Shl->getOperand(0); |
2258 | 700 | Type *ShType = Shl->getType(); |
2259 | | |
2260 | | // NSW guarantees that we are only shifting out sign bits from the high bits, |
2261 | | // so we can ASHR the compare constant without needing a mask and eliminate |
2262 | | // the shift. |
2263 | 700 | if (Shl->hasNoSignedWrap()) { |
2264 | 106 | if (Pred == ICmpInst::ICMP_SGT) { |
2265 | | // icmp Pred (shl nsw X, ShiftAmt), C --> icmp Pred X, (C >>s ShiftAmt) |
2266 | 14 | APInt ShiftedC = C.ashr(*ShiftAmt); |
2267 | 14 | return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC)); |
2268 | 14 | } |
2269 | 92 | if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) && |
2270 | 92 | C.ashr(*ShiftAmt).shl(*ShiftAmt) == C) { |
2271 | 49 | APInt ShiftedC = C.ashr(*ShiftAmt); |
2272 | 49 | return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC)); |
2273 | 49 | } |
2274 | 43 | if (Pred == ICmpInst::ICMP_SLT) { |
2275 | | // SLE is the same as above, but SLE is canonicalized to SLT, so convert: |
2276 | | // (X << S) <=s C is equiv to X <=s (C >> S) for all C |
2277 | | // (X << S) <s (C + 1) is equiv to X <s (C >> S) + 1 if C <s SMAX |
2278 | | // (X << S) <s C is equiv to X <s ((C - 1) >> S) + 1 if C >s SMIN |
2279 | 7 | assert(!C.isMinSignedValue() && "Unexpected icmp slt"); |
2280 | 0 | APInt ShiftedC = (C - 1).ashr(*ShiftAmt) + 1; |
2281 | 7 | return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC)); |
2282 | 7 | } |
2283 | 43 | } |
2284 | | |
2285 | | // NUW guarantees that we are only shifting out zero bits from the high bits, |
2286 | | // so we can LSHR the compare constant without needing a mask and eliminate |
2287 | | // the shift. |
2288 | 630 | if (Shl->hasNoUnsignedWrap()) { |
2289 | 161 | if (Pred == ICmpInst::ICMP_UGT) { |
2290 | | // icmp Pred (shl nuw X, ShiftAmt), C --> icmp Pred X, (C >>u ShiftAmt) |
2291 | 29 | APInt ShiftedC = C.lshr(*ShiftAmt); |
2292 | 29 | return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC)); |
2293 | 29 | } |
2294 | 132 | if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) && |
2295 | 132 | C.lshr(*ShiftAmt).shl(*ShiftAmt) == C) { |
2296 | 7 | APInt ShiftedC = C.lshr(*ShiftAmt); |
2297 | 7 | return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC)); |
2298 | 7 | } |
2299 | 125 | if (Pred == ICmpInst::ICMP_ULT) { |
2300 | | // ULE is the same as above, but ULE is canonicalized to ULT, so convert: |
2301 | | // (X << S) <=u C is equiv to X <=u (C >> S) for all C |
2302 | | // (X << S) <u (C + 1) is equiv to X <u (C >> S) + 1 if C <u ~0u |
2303 | | // (X << S) <u C is equiv to X <u ((C - 1) >> S) + 1 if C >u 0 |
2304 | 32 | assert(C.ugt(0) && "ult 0 should have been eliminated"); |
2305 | 0 | APInt ShiftedC = (C - 1).lshr(*ShiftAmt) + 1; |
2306 | 32 | return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC)); |
2307 | 32 | } |
2308 | 125 | } |
2309 | | |
2310 | 562 | if (Cmp.isEquality() && Shl->hasOneUse()) { |
2311 | | // Strength-reduce the shift into an 'and'. |
2312 | 73 | Constant *Mask = ConstantInt::get( |
2313 | 73 | ShType, |
2314 | 73 | APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt->getZExtValue())); |
2315 | 73 | Value *And = Builder.CreateAnd(X, Mask, Shl->getName() + ".mask"); |
2316 | 73 | Constant *LShrC = ConstantInt::get(ShType, C.lshr(*ShiftAmt)); |
2317 | 73 | return new ICmpInst(Pred, And, LShrC); |
2318 | 73 | } |
2319 | | |
2320 | | // Otherwise, if this is a comparison of the sign bit, simplify to and/test. |
2321 | 489 | bool TrueIfSigned = false; |
2322 | 489 | if (Shl->hasOneUse() && isSignBitCheck(Pred, C, TrueIfSigned)) { |
2323 | | // (X << 31) <s 0 --> (X & 1) != 0 |
2324 | 31 | Constant *Mask = ConstantInt::get( |
2325 | 31 | ShType, |
2326 | 31 | APInt::getOneBitSet(TypeBits, TypeBits - ShiftAmt->getZExtValue() - 1)); |
2327 | 31 | Value *And = Builder.CreateAnd(X, Mask, Shl->getName() + ".mask"); |
2328 | 31 | return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ, |
2329 | 31 | And, Constant::getNullValue(ShType)); |
2330 | 31 | } |
2331 | | |
2332 | | // Simplify 'shl' inequality test into 'and' equality test. |
2333 | 458 | if (Cmp.isUnsigned() && Shl->hasOneUse()) { |
2334 | | // (X l<< C2) u<=/u> C1 iff C1+1 is power of two -> X & (~C1 l>> C2) ==/!= 0 |
2335 | 20 | if ((C + 1).isPowerOf2() && |
2336 | 20 | (Pred == ICmpInst::ICMP_ULE || Pred == ICmpInst::ICMP_UGT)) { |
2337 | 1 | Value *And = Builder.CreateAnd(X, (~C).lshr(ShiftAmt->getZExtValue())); |
2338 | 1 | return new ICmpInst(Pred == ICmpInst::ICMP_ULE ? ICmpInst::ICMP_EQ |
2339 | 1 | : ICmpInst::ICMP_NE, |
2340 | 1 | And, Constant::getNullValue(ShType)); |
2341 | 1 | } |
2342 | | // (X l<< C2) u</u>= C1 iff C1 is power of two -> X & (-C1 l>> C2) ==/!= 0 |
2343 | 19 | if (C.isPowerOf2() && |
2344 | 19 | (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE)) { |
2345 | 7 | Value *And = |
2346 | 7 | Builder.CreateAnd(X, (~(C - 1)).lshr(ShiftAmt->getZExtValue())); |
2347 | 7 | return new ICmpInst(Pred == ICmpInst::ICMP_ULT ? ICmpInst::ICMP_EQ |
2348 | 7 | : ICmpInst::ICMP_NE, |
2349 | 7 | And, Constant::getNullValue(ShType)); |
2350 | 7 | } |
2351 | 19 | } |
2352 | | |
2353 | | // Transform (icmp pred iM (shl iM %v, N), C) |
2354 | | // -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (C>>N)) |
2355 | | // Transform the shl to a trunc if (trunc (C>>N)) has no loss and M-N. |
2356 | | // This enables us to get rid of the shift in favor of a trunc that may be |
2357 | | // free on the target. It has the additional benefit of comparing to a |
2358 | | // smaller constant that may be more target-friendly. |
2359 | 450 | unsigned Amt = ShiftAmt->getLimitedValue(TypeBits - 1); |
2360 | 450 | if (Shl->hasOneUse() && Amt != 0 && C.countr_zero() >= Amt && |
2361 | 450 | DL.isLegalInteger(TypeBits - Amt)) { |
2362 | 19 | Type *TruncTy = IntegerType::get(Cmp.getContext(), TypeBits - Amt); |
2363 | 19 | if (auto *ShVTy = dyn_cast<VectorType>(ShType)) |
2364 | 4 | TruncTy = VectorType::get(TruncTy, ShVTy->getElementCount()); |
2365 | 19 | Constant *NewC = |
2366 | 19 | ConstantInt::get(TruncTy, C.ashr(*ShiftAmt).trunc(TypeBits - Amt)); |
2367 | 19 | return new ICmpInst(Pred, Builder.CreateTrunc(X, TruncTy), NewC); |
2368 | 19 | } |
2369 | | |
2370 | 431 | return nullptr; |
2371 | 450 | } |
2372 | | |
2373 | | /// Fold icmp ({al}shr X, Y), C. |
2374 | | Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp, |
2375 | | BinaryOperator *Shr, |
2376 | 736 | const APInt &C) { |
2377 | | // An exact shr only shifts out zero bits, so: |
2378 | | // icmp eq/ne (shr X, Y), 0 --> icmp eq/ne X, 0 |
2379 | 736 | Value *X = Shr->getOperand(0); |
2380 | 736 | CmpInst::Predicate Pred = Cmp.getPredicate(); |
2381 | 736 | if (Cmp.isEquality() && Shr->isExact() && C.isZero()) |
2382 | 36 | return new ICmpInst(Pred, X, Cmp.getOperand(1)); |
2383 | | |
2384 | 700 | bool IsAShr = Shr->getOpcode() == Instruction::AShr; |
2385 | 700 | const APInt *ShiftValC; |
2386 | 700 | if (match(X, m_APInt(ShiftValC))) { |
2387 | 56 | if (Cmp.isEquality()) |
2388 | 38 | return foldICmpShrConstConst(Cmp, Shr->getOperand(1), C, *ShiftValC); |
2389 | | |
2390 | | // (ShiftValC >> Y) >s -1 --> Y != 0 with ShiftValC < 0 |
2391 | | // (ShiftValC >> Y) <s 0 --> Y == 0 with ShiftValC < 0 |
2392 | 18 | bool TrueIfSigned; |
2393 | 18 | if (!IsAShr && ShiftValC->isNegative() && |
2394 | 18 | isSignBitCheck(Pred, C, TrueIfSigned)) |
2395 | 1 | return new ICmpInst(TrueIfSigned ? CmpInst::ICMP_EQ : CmpInst::ICMP_NE, |
2396 | 1 | Shr->getOperand(1), |
2397 | 1 | ConstantInt::getNullValue(X->getType())); |
2398 | | |
2399 | | // If the shifted constant is a power-of-2, test the shift amount directly: |
2400 | | // (ShiftValC >> Y) >u C --> X <u (LZ(C) - LZ(ShiftValC)) |
2401 | | // (ShiftValC >> Y) <u C --> X >=u (LZ(C-1) - LZ(ShiftValC)) |
2402 | 17 | if (!IsAShr && ShiftValC->isPowerOf2() && |
2403 | 17 | (Pred == CmpInst::ICMP_UGT || Pred == CmpInst::ICMP_ULT)) { |
2404 | 3 | bool IsUGT = Pred == CmpInst::ICMP_UGT; |
2405 | 3 | assert(ShiftValC->uge(C) && "Expected simplify of compare"); |
2406 | 0 | assert((IsUGT || !C.isZero()) && "Expected X u< 0 to simplify"); |
2407 | | |
2408 | 3 | unsigned CmpLZ = IsUGT ? C.countl_zero() : (C - 1).countl_zero(); |
2409 | 3 | unsigned ShiftLZ = ShiftValC->countl_zero(); |
2410 | 3 | Constant *NewC = ConstantInt::get(Shr->getType(), CmpLZ - ShiftLZ); |
2411 | 3 | auto NewPred = IsUGT ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE; |
2412 | 3 | return new ICmpInst(NewPred, Shr->getOperand(1), NewC); |
2413 | 3 | } |
2414 | 17 | } |
2415 | | |
2416 | 658 | const APInt *ShiftAmtC; |
2417 | 658 | if (!match(Shr->getOperand(1), m_APInt(ShiftAmtC))) |
2418 | 340 | return nullptr; |
2419 | | |
2420 | | // Check that the shift amount is in range. If not, don't perform undefined |
2421 | | // shifts. When the shift is visited it will be simplified. |
2422 | 318 | unsigned TypeBits = C.getBitWidth(); |
2423 | 318 | unsigned ShAmtVal = ShiftAmtC->getLimitedValue(TypeBits); |
2424 | 318 | if (ShAmtVal >= TypeBits || ShAmtVal == 0) |
2425 | 0 | return nullptr; |
2426 | | |
2427 | 318 | bool IsExact = Shr->isExact(); |
2428 | 318 | Type *ShrTy = Shr->getType(); |
2429 | | // TODO: If we could guarantee that InstSimplify would handle all of the |
2430 | | // constant-value-based preconditions in the folds below, then we could assert |
2431 | | // those conditions rather than checking them. This is difficult because of |
2432 | | // undef/poison (PR34838). |
2433 | 318 | if (IsAShr && Shr->hasOneUse()) { |
2434 | 63 | if (IsExact || Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_ULT) { |
2435 | | // When ShAmtC can be shifted losslessly: |
2436 | | // icmp PRED (ashr exact X, ShAmtC), C --> icmp PRED X, (C << ShAmtC) |
2437 | | // icmp slt/ult (ashr X, ShAmtC), C --> icmp slt/ult X, (C << ShAmtC) |
2438 | 14 | APInt ShiftedC = C.shl(ShAmtVal); |
2439 | 14 | if (ShiftedC.ashr(ShAmtVal) == C) |
2440 | 10 | return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC)); |
2441 | 14 | } |
2442 | 53 | if (Pred == CmpInst::ICMP_SGT) { |
2443 | | // icmp sgt (ashr X, ShAmtC), C --> icmp sgt X, ((C + 1) << ShAmtC) - 1 |
2444 | 6 | APInt ShiftedC = (C + 1).shl(ShAmtVal) - 1; |
2445 | 6 | if (!C.isMaxSignedValue() && !(C + 1).shl(ShAmtVal).isMinSignedValue() && |
2446 | 6 | (ShiftedC + 1).ashr(ShAmtVal) == (C + 1)) |
2447 | 6 | return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC)); |
2448 | 6 | } |
2449 | 47 | if (Pred == CmpInst::ICMP_UGT) { |
2450 | | // icmp ugt (ashr X, ShAmtC), C --> icmp ugt X, ((C + 1) << ShAmtC) - 1 |
2451 | | // 'C + 1 << ShAmtC' can overflow as a signed number, so the 2nd |
2452 | | // clause accounts for that pattern. |
2453 | 1 | APInt ShiftedC = (C + 1).shl(ShAmtVal) - 1; |
2454 | 1 | if ((ShiftedC + 1).ashr(ShAmtVal) == (C + 1) || |
2455 | 1 | (C + 1).shl(ShAmtVal).isMinSignedValue()) |
2456 | 1 | return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC)); |
2457 | 1 | } |
2458 | | |
2459 | | // If the compare constant has significant bits above the lowest sign-bit, |
2460 | | // then convert an unsigned cmp to a test of the sign-bit: |
2461 | | // (ashr X, ShiftC) u> C --> X s< 0 |
2462 | | // (ashr X, ShiftC) u< C --> X s> -1 |
2463 | 46 | if (C.getBitWidth() > 2 && C.getNumSignBits() <= ShAmtVal) { |
2464 | 4 | if (Pred == CmpInst::ICMP_UGT) { |
2465 | 0 | return new ICmpInst(CmpInst::ICMP_SLT, X, |
2466 | 0 | ConstantInt::getNullValue(ShrTy)); |
2467 | 0 | } |
2468 | 4 | if (Pred == CmpInst::ICMP_ULT) { |
2469 | 4 | return new ICmpInst(CmpInst::ICMP_SGT, X, |
2470 | 4 | ConstantInt::getAllOnesValue(ShrTy)); |
2471 | 4 | } |
2472 | 4 | } |
2473 | 255 | } else if (!IsAShr) { |
2474 | 137 | if (Pred == CmpInst::ICMP_ULT || (Pred == CmpInst::ICMP_UGT && IsExact)) { |
2475 | | // icmp ult (lshr X, ShAmtC), C --> icmp ult X, (C << ShAmtC) |
2476 | | // icmp ugt (lshr exact X, ShAmtC), C --> icmp ugt X, (C << ShAmtC) |
2477 | 50 | APInt ShiftedC = C.shl(ShAmtVal); |
2478 | 50 | if (ShiftedC.lshr(ShAmtVal) == C) |
2479 | 50 | return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC)); |
2480 | 50 | } |
2481 | 87 | if (Pred == CmpInst::ICMP_UGT) { |
2482 | | // icmp ugt (lshr X, ShAmtC), C --> icmp ugt X, ((C + 1) << ShAmtC) - 1 |
2483 | 15 | APInt ShiftedC = (C + 1).shl(ShAmtVal) - 1; |
2484 | 15 | if ((ShiftedC + 1).lshr(ShAmtVal) == (C + 1)) |
2485 | 15 | return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC)); |
2486 | 15 | } |
2487 | 87 | } |
2488 | | |
2489 | 232 | if (!Cmp.isEquality()) |
2490 | 87 | return nullptr; |
2491 | | |
2492 | | // Handle equality comparisons of shift-by-constant. |
2493 | | |
2494 | | // If the comparison constant changes with the shift, the comparison cannot |
2495 | | // succeed (bits of the comparison constant cannot match the shifted value). |
2496 | | // This should be known by InstSimplify and already be folded to true/false. |
2497 | 145 | assert(((IsAShr && C.shl(ShAmtVal).ashr(ShAmtVal) == C) || |
2498 | 145 | (!IsAShr && C.shl(ShAmtVal).lshr(ShAmtVal) == C)) && |
2499 | 145 | "Expected icmp+shr simplify did not occur."); |
2500 | | |
2501 | | // If the bits shifted out are known zero, compare the unshifted value: |
2502 | | // (X & 4) >> 1 == 2 --> (X & 4) == 4. |
2503 | 145 | if (Shr->isExact()) |
2504 | 5 | return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, C << ShAmtVal)); |
2505 | | |
2506 | 140 | if (C.isZero()) { |
2507 | | // == 0 is u< 1. |
2508 | 82 | if (Pred == CmpInst::ICMP_EQ) |
2509 | 57 | return new ICmpInst(CmpInst::ICMP_ULT, X, |
2510 | 57 | ConstantInt::get(ShrTy, (C + 1).shl(ShAmtVal))); |
2511 | 25 | else |
2512 | 25 | return new ICmpInst(CmpInst::ICMP_UGT, X, |
2513 | 25 | ConstantInt::get(ShrTy, (C + 1).shl(ShAmtVal) - 1)); |
2514 | 82 | } |
2515 | | |
2516 | 58 | if (Shr->hasOneUse()) { |
2517 | | // Canonicalize the shift into an 'and': |
2518 | | // icmp eq/ne (shr X, ShAmt), C --> icmp eq/ne (and X, HiMask), (C << ShAmt) |
2519 | 37 | APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal)); |
2520 | 37 | Constant *Mask = ConstantInt::get(ShrTy, Val); |
2521 | 37 | Value *And = Builder.CreateAnd(X, Mask, Shr->getName() + ".mask"); |
2522 | 37 | return new ICmpInst(Pred, And, ConstantInt::get(ShrTy, C << ShAmtVal)); |
2523 | 37 | } |
2524 | | |
2525 | 21 | return nullptr; |
2526 | 58 | } |
2527 | | |
2528 | | Instruction *InstCombinerImpl::foldICmpSRemConstant(ICmpInst &Cmp, |
2529 | | BinaryOperator *SRem, |
2530 | 195 | const APInt &C) { |
2531 | | // Match an 'is positive' or 'is negative' comparison of remainder by a |
2532 | | // constant power-of-2 value: |
2533 | | // (X % pow2C) sgt/slt 0 |
2534 | 195 | const ICmpInst::Predicate Pred = Cmp.getPredicate(); |
2535 | 195 | if (Pred != ICmpInst::ICMP_SGT && Pred != ICmpInst::ICMP_SLT && |
2536 | 195 | Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE) |
2537 | 43 | return nullptr; |
2538 | | |
2539 | | // TODO: The one-use check is standard because we do not typically want to |
2540 | | // create longer instruction sequences, but this might be a special-case |
2541 | | // because srem is not good for analysis or codegen. |
2542 | 152 | if (!SRem->hasOneUse()) |
2543 | 96 | return nullptr; |
2544 | | |
2545 | 56 | const APInt *DivisorC; |
2546 | 56 | if (!match(SRem->getOperand(1), m_Power2(DivisorC))) |
2547 | 37 | return nullptr; |
2548 | | |
2549 | | // For cmp_sgt/cmp_slt only zero valued C is handled. |
2550 | | // For cmp_eq/cmp_ne only positive valued C is handled. |
2551 | 19 | if (((Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLT) && |
2552 | 19 | !C.isZero()) || |
2553 | 19 | ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) && |
2554 | 9 | !C.isStrictlyPositive())) |
2555 | 13 | return nullptr; |
2556 | | |
2557 | | // Mask off the sign bit and the modulo bits (low-bits). |
2558 | 6 | Type *Ty = SRem->getType(); |
2559 | 6 | APInt SignMask = APInt::getSignMask(Ty->getScalarSizeInBits()); |
2560 | 6 | Constant *MaskC = ConstantInt::get(Ty, SignMask | (*DivisorC - 1)); |
2561 | 6 | Value *And = Builder.CreateAnd(SRem->getOperand(0), MaskC); |
2562 | | |
2563 | 6 | if (Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) |
2564 | 1 | return new ICmpInst(Pred, And, ConstantInt::get(Ty, C)); |
2565 | | |
2566 | | // For 'is positive?' check that the sign-bit is clear and at least 1 masked |
2567 | | // bit is set. Example: |
2568 | | // (i8 X % 32) s> 0 --> (X & 159) s> 0 |
2569 | 5 | if (Pred == ICmpInst::ICMP_SGT) |
2570 | 1 | return new ICmpInst(ICmpInst::ICMP_SGT, And, ConstantInt::getNullValue(Ty)); |
2571 | | |
2572 | | // For 'is negative?' check that the sign-bit is set and at least 1 masked |
2573 | | // bit is set. Example: |
2574 | | // (i16 X % 4) s< 0 --> (X & 32771) u> 32768 |
2575 | 4 | return new ICmpInst(ICmpInst::ICMP_UGT, And, ConstantInt::get(Ty, SignMask)); |
2576 | 5 | } |
2577 | | |
2578 | | /// Fold icmp (udiv X, Y), C. |
2579 | | Instruction *InstCombinerImpl::foldICmpUDivConstant(ICmpInst &Cmp, |
2580 | | BinaryOperator *UDiv, |
2581 | 265 | const APInt &C) { |
2582 | 265 | ICmpInst::Predicate Pred = Cmp.getPredicate(); |
2583 | 265 | Value *X = UDiv->getOperand(0); |
2584 | 265 | Value *Y = UDiv->getOperand(1); |
2585 | 265 | Type *Ty = UDiv->getType(); |
2586 | | |
2587 | 265 | const APInt *C2; |
2588 | 265 | if (!match(X, m_APInt(C2))) |
2589 | 148 | return nullptr; |
2590 | | |
2591 | 117 | assert(*C2 != 0 && "udiv 0, X should have been simplified already."); |
2592 | | |
2593 | | // (icmp ugt (udiv C2, Y), C) -> (icmp ule Y, C2/(C+1)) |
2594 | 117 | if (Pred == ICmpInst::ICMP_UGT) { |
2595 | 28 | assert(!C.isMaxValue() && |
2596 | 28 | "icmp ugt X, UINT_MAX should have been simplified already."); |
2597 | 0 | return new ICmpInst(ICmpInst::ICMP_ULE, Y, |
2598 | 28 | ConstantInt::get(Ty, C2->udiv(C + 1))); |
2599 | 28 | } |
2600 | | |
2601 | | // (icmp ult (udiv C2, Y), C) -> (icmp ugt Y, C2/C) |
2602 | 89 | if (Pred == ICmpInst::ICMP_ULT) { |
2603 | 26 | assert(C != 0 && "icmp ult X, 0 should have been simplified already."); |
2604 | 0 | return new ICmpInst(ICmpInst::ICMP_UGT, Y, |
2605 | 26 | ConstantInt::get(Ty, C2->udiv(C))); |
2606 | 26 | } |
2607 | | |
2608 | 63 | return nullptr; |
2609 | 89 | } |
2610 | | |
2611 | | /// Fold icmp ({su}div X, Y), C. |
2612 | | Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp, |
2613 | | BinaryOperator *Div, |
2614 | 514 | const APInt &C) { |
2615 | 514 | ICmpInst::Predicate Pred = Cmp.getPredicate(); |
2616 | 514 | Value *X = Div->getOperand(0); |
2617 | 514 | Value *Y = Div->getOperand(1); |
2618 | 514 | Type *Ty = Div->getType(); |
2619 | 514 | bool DivIsSigned = Div->getOpcode() == Instruction::SDiv; |
2620 | | |
2621 | | // If unsigned division and the compare constant is bigger than |
2622 | | // UMAX/2 (negative), there's only one pair of values that satisfies an |
2623 | | // equality check, so eliminate the division: |
2624 | | // (X u/ Y) == C --> (X == C) && (Y == 1) |
2625 | | // (X u/ Y) != C --> (X != C) || (Y != 1) |
2626 | | // Similarly, if signed division and the compare constant is exactly SMIN: |
2627 | | // (X s/ Y) == SMIN --> (X == SMIN) && (Y == 1) |
2628 | | // (X s/ Y) != SMIN --> (X != SMIN) || (Y != 1) |
2629 | 514 | if (Cmp.isEquality() && Div->hasOneUse() && C.isSignBitSet() && |
2630 | 514 | (!DivIsSigned || C.isMinSignedValue())) { |
2631 | 6 | Value *XBig = Builder.CreateICmp(Pred, X, ConstantInt::get(Ty, C)); |
2632 | 6 | Value *YOne = Builder.CreateICmp(Pred, Y, ConstantInt::get(Ty, 1)); |
2633 | 6 | auto Logic = Pred == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or; |
2634 | 6 | return BinaryOperator::Create(Logic, XBig, YOne); |
2635 | 6 | } |
2636 | | |
2637 | | // Fold: icmp pred ([us]div X, C2), C -> range test |
2638 | | // Fold this div into the comparison, producing a range check. |
2639 | | // Determine, based on the divide type, what the range is being |
2640 | | // checked. If there is an overflow on the low or high side, remember |
2641 | | // it, otherwise compute the range [low, hi) bounding the new value. |
2642 | | // See: InsertRangeTest above for the kinds of replacements possible. |
2643 | 508 | const APInt *C2; |
2644 | 508 | if (!match(Y, m_APInt(C2))) |
2645 | 282 | return nullptr; |
2646 | | |
2647 | | // FIXME: If the operand types don't match the type of the divide |
2648 | | // then don't attempt this transform. The code below doesn't have the |
2649 | | // logic to deal with a signed divide and an unsigned compare (and |
2650 | | // vice versa). This is because (x /s C2) <s C produces different |
2651 | | // results than (x /s C2) <u C or (x /u C2) <s C or even |
2652 | | // (x /u C2) <u C. Simply casting the operands and result won't |
2653 | | // work. :( The if statement below tests that condition and bails |
2654 | | // if it finds it. |
2655 | 226 | if (!Cmp.isEquality() && DivIsSigned != Cmp.isSigned()) |
2656 | 29 | return nullptr; |
2657 | | |
2658 | | // The ProdOV computation fails on divide by 0 and divide by -1. Cases with |
2659 | | // INT_MIN will also fail if the divisor is 1. Although folds of all these |
2660 | | // division-by-constant cases should be present, we can not assert that they |
2661 | | // have happened before we reach this icmp instruction. |
2662 | 197 | if (C2->isZero() || C2->isOne() || (DivIsSigned && C2->isAllOnes())) |
2663 | 0 | return nullptr; |
2664 | | |
2665 | | // Compute Prod = C * C2. We are essentially solving an equation of |
2666 | | // form X / C2 = C. We solve for X by multiplying C2 and C. |
2667 | | // By solving for X, we can turn this into a range check instead of computing |
2668 | | // a divide. |
2669 | 197 | APInt Prod = C * *C2; |
2670 | | |
2671 | | // Determine if the product overflows by seeing if the product is not equal to |
2672 | | // the divide. Make sure we do the same kind of divide as in the LHS |
2673 | | // instruction that we're folding. |
2674 | 197 | bool ProdOV = (DivIsSigned ? Prod.sdiv(*C2) : Prod.udiv(*C2)) != C; |
2675 | | |
2676 | | // If the division is known to be exact, then there is no remainder from the |
2677 | | // divide, so the covered range size is unit, otherwise it is the divisor. |
2678 | 197 | APInt RangeSize = Div->isExact() ? APInt(C2->getBitWidth(), 1) : *C2; |
2679 | | |
2680 | | // Figure out the interval that is being checked. For example, a comparison |
2681 | | // like "X /u 5 == 0" is really checking that X is in the interval [0, 5). |
2682 | | // Compute this interval based on the constants involved and the signedness of |
2683 | | // the compare/divide. This computes a half-open interval, keeping track of |
2684 | | // whether either value in the interval overflows. After analysis each |
2685 | | // overflow variable is set to 0 if it's corresponding bound variable is valid |
2686 | | // -1 if overflowed off the bottom end, or +1 if overflowed off the top end. |
2687 | 197 | int LoOverflow = 0, HiOverflow = 0; |
2688 | 197 | APInt LoBound, HiBound; |
2689 | | |
2690 | 197 | if (!DivIsSigned) { // udiv |
2691 | | // e.g. X/5 op 3 --> [15, 20) |
2692 | 94 | LoBound = Prod; |
2693 | 94 | HiOverflow = LoOverflow = ProdOV; |
2694 | 94 | if (!HiOverflow) { |
2695 | | // If this is not an exact divide, then many values in the range collapse |
2696 | | // to the same result value. |
2697 | 94 | HiOverflow = addWithOverflow(HiBound, LoBound, RangeSize, false); |
2698 | 94 | } |
2699 | 103 | } else if (C2->isStrictlyPositive()) { // Divisor is > 0. |
2700 | 87 | if (C.isZero()) { // (X / pos) op 0 |
2701 | | // Can't overflow. e.g. X/2 op 0 --> [-1, 2) |
2702 | 33 | LoBound = -(RangeSize - 1); |
2703 | 33 | HiBound = RangeSize; |
2704 | 54 | } else if (C.isStrictlyPositive()) { // (X / pos) op pos |
2705 | 30 | LoBound = Prod; // e.g. X/5 op 3 --> [15, 20) |
2706 | 30 | HiOverflow = LoOverflow = ProdOV; |
2707 | 30 | if (!HiOverflow) |
2708 | 30 | HiOverflow = addWithOverflow(HiBound, Prod, RangeSize, true); |
2709 | 30 | } else { // (X / pos) op neg |
2710 | | // e.g. X/5 op -3 --> [-15-4, -15+1) --> [-19, -14) |
2711 | 24 | HiBound = Prod + 1; |
2712 | 24 | LoOverflow = HiOverflow = ProdOV ? -1 : 0; |
2713 | 24 | if (!LoOverflow) { |
2714 | 24 | APInt DivNeg = -RangeSize; |
2715 | 24 | LoOverflow = addWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0; |
2716 | 24 | } |
2717 | 24 | } |
2718 | 87 | } else if (C2->isNegative()) { // Divisor is < 0. |
2719 | 16 | if (Div->isExact()) |
2720 | 1 | RangeSize.negate(); |
2721 | 16 | if (C.isZero()) { // (X / neg) op 0 |
2722 | | // e.g. X/-5 op 0 --> [-4, 5) |
2723 | 1 | LoBound = RangeSize + 1; |
2724 | 1 | HiBound = -RangeSize; |
2725 | 1 | if (HiBound == *C2) { // -INTMIN = INTMIN |
2726 | 0 | HiOverflow = 1; // [INTMIN+1, overflow) |
2727 | 0 | HiBound = APInt(); // e.g. X/INTMIN = 0 --> X > INTMIN |
2728 | 0 | } |
2729 | 15 | } else if (C.isStrictlyPositive()) { // (X / neg) op pos |
2730 | | // e.g. X/-5 op 3 --> [-19, -14) |
2731 | 3 | HiBound = Prod + 1; |
2732 | 3 | HiOverflow = LoOverflow = ProdOV ? -1 : 0; |
2733 | 3 | if (!LoOverflow) |
2734 | 3 | LoOverflow = |
2735 | 3 | addWithOverflow(LoBound, HiBound, RangeSize, true) ? -1 : 0; |
2736 | 12 | } else { // (X / neg) op neg |
2737 | 12 | LoBound = Prod; // e.g. X/-5 op -3 --> [15, 20) |
2738 | 12 | LoOverflow = HiOverflow = ProdOV; |
2739 | 12 | if (!HiOverflow) |
2740 | 12 | HiOverflow = subWithOverflow(HiBound, Prod, RangeSize, true); |
2741 | 12 | } |
2742 | | |
2743 | | // Dividing by a negative swaps the condition. LT <-> GT |
2744 | 16 | Pred = ICmpInst::getSwappedPredicate(Pred); |
2745 | 16 | } |
2746 | | |
2747 | 197 | switch (Pred) { |
2748 | 0 | default: |
2749 | 0 | llvm_unreachable("Unhandled icmp predicate!"); |
2750 | 92 | case ICmpInst::ICMP_EQ: |
2751 | 92 | if (LoOverflow && HiOverflow) |
2752 | 0 | return replaceInstUsesWith(Cmp, Builder.getFalse()); |
2753 | 92 | if (HiOverflow) |
2754 | 15 | return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, |
2755 | 15 | X, ConstantInt::get(Ty, LoBound)); |
2756 | 77 | if (LoOverflow) |
2757 | 5 | return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, |
2758 | 5 | X, ConstantInt::get(Ty, HiBound)); |
2759 | 72 | return replaceInstUsesWith( |
2760 | 72 | Cmp, insertRangeTest(X, LoBound, HiBound, DivIsSigned, true)); |
2761 | 52 | case ICmpInst::ICMP_NE: |
2762 | 52 | if (LoOverflow && HiOverflow) |
2763 | 0 | return replaceInstUsesWith(Cmp, Builder.getTrue()); |
2764 | 52 | if (HiOverflow) |
2765 | 20 | return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, |
2766 | 20 | X, ConstantInt::get(Ty, LoBound)); |
2767 | 32 | if (LoOverflow) |
2768 | 0 | return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, |
2769 | 0 | X, ConstantInt::get(Ty, HiBound)); |
2770 | 32 | return replaceInstUsesWith( |
2771 | 32 | Cmp, insertRangeTest(X, LoBound, HiBound, DivIsSigned, false)); |
2772 | 6 | case ICmpInst::ICMP_ULT: |
2773 | 26 | case ICmpInst::ICMP_SLT: |
2774 | 26 | if (LoOverflow == +1) // Low bound is greater than input range. |
2775 | 0 | return replaceInstUsesWith(Cmp, Builder.getTrue()); |
2776 | 26 | if (LoOverflow == -1) // Low bound is less than input range. |
2777 | 0 | return replaceInstUsesWith(Cmp, Builder.getFalse()); |
2778 | 26 | return new ICmpInst(Pred, X, ConstantInt::get(Ty, LoBound)); |
2779 | 5 | case ICmpInst::ICMP_UGT: |
2780 | 27 | case ICmpInst::ICMP_SGT: |
2781 | 27 | if (HiOverflow == +1) // High bound greater than input range. |
2782 | 0 | return replaceInstUsesWith(Cmp, Builder.getFalse()); |
2783 | 27 | if (HiOverflow == -1) // High bound less than input range. |
2784 | 0 | return replaceInstUsesWith(Cmp, Builder.getTrue()); |
2785 | 27 | if (Pred == ICmpInst::ICMP_UGT) |
2786 | 5 | return new ICmpInst(ICmpInst::ICMP_UGE, X, ConstantInt::get(Ty, HiBound)); |
2787 | 22 | return new ICmpInst(ICmpInst::ICMP_SGE, X, ConstantInt::get(Ty, HiBound)); |
2788 | 197 | } |
2789 | | |
2790 | 0 | return nullptr; |
2791 | 197 | } |
2792 | | |
2793 | | /// Fold icmp (sub X, Y), C. |
2794 | | Instruction *InstCombinerImpl::foldICmpSubConstant(ICmpInst &Cmp, |
2795 | | BinaryOperator *Sub, |
2796 | 1.20k | const APInt &C) { |
2797 | 1.20k | Value *X = Sub->getOperand(0), *Y = Sub->getOperand(1); |
2798 | 1.20k | ICmpInst::Predicate Pred = Cmp.getPredicate(); |
2799 | 1.20k | Type *Ty = Sub->getType(); |
2800 | | |
2801 | | // (SubC - Y) == C) --> Y == (SubC - C) |
2802 | | // (SubC - Y) != C) --> Y != (SubC - C) |
2803 | 1.20k | Constant *SubC; |
2804 | 1.20k | if (Cmp.isEquality() && match(X, m_ImmConstant(SubC))) { |
2805 | 206 | return new ICmpInst(Pred, Y, |
2806 | 206 | ConstantExpr::getSub(SubC, ConstantInt::get(Ty, C))); |
2807 | 206 | } |
2808 | | |
2809 | | // (icmp P (sub nuw|nsw C2, Y), C) -> (icmp swap(P) Y, C2-C) |
2810 | 1.00k | const APInt *C2; |
2811 | 1.00k | APInt SubResult; |
2812 | 1.00k | ICmpInst::Predicate SwappedPred = Cmp.getSwappedPredicate(); |
2813 | 1.00k | bool HasNSW = Sub->hasNoSignedWrap(); |
2814 | 1.00k | bool HasNUW = Sub->hasNoUnsignedWrap(); |
2815 | 1.00k | if (match(X, m_APInt(C2)) && |
2816 | 1.00k | ((Cmp.isUnsigned() && HasNUW) || (Cmp.isSigned() && HasNSW)) && |
2817 | 1.00k | !subWithOverflow(SubResult, *C2, C, Cmp.isSigned())) |
2818 | 19 | return new ICmpInst(SwappedPred, Y, ConstantInt::get(Ty, SubResult)); |
2819 | | |
2820 | | // X - Y == 0 --> X == Y. |
2821 | | // X - Y != 0 --> X != Y. |
2822 | | // TODO: We allow this with multiple uses as long as the other uses are not |
2823 | | // in phis. The phi use check is guarding against a codegen regression |
2824 | | // for a loop test. If the backend could undo this (and possibly |
2825 | | // subsequent transforms), we would not need this hack. |
2826 | 984 | if (Cmp.isEquality() && C.isZero() && |
2827 | 984 | none_of((Sub->users()), [](const User *U) { return isa<PHINode>(U); })) |
2828 | 57 | return new ICmpInst(Pred, X, Y); |
2829 | | |
2830 | | // The following transforms are only worth it if the only user of the subtract |
2831 | | // is the icmp. |
2832 | | // TODO: This is an artificial restriction for all of the transforms below |
2833 | | // that only need a single replacement icmp. Can these use the phi test |
2834 | | // like the transform above here? |
2835 | 927 | if (!Sub->hasOneUse()) |
2836 | 751 | return nullptr; |
2837 | | |
2838 | 176 | if (Sub->hasNoSignedWrap()) { |
2839 | | // (icmp sgt (sub nsw X, Y), -1) -> (icmp sge X, Y) |
2840 | 40 | if (Pred == ICmpInst::ICMP_SGT && C.isAllOnes()) |
2841 | 5 | return new ICmpInst(ICmpInst::ICMP_SGE, X, Y); |
2842 | | |
2843 | | // (icmp sgt (sub nsw X, Y), 0) -> (icmp sgt X, Y) |
2844 | 35 | if (Pred == ICmpInst::ICMP_SGT && C.isZero()) |
2845 | 5 | return new ICmpInst(ICmpInst::ICMP_SGT, X, Y); |
2846 | | |
2847 | | // (icmp slt (sub nsw X, Y), 0) -> (icmp slt X, Y) |
2848 | 30 | if (Pred == ICmpInst::ICMP_SLT && C.isZero()) |
2849 | 17 | return new ICmpInst(ICmpInst::ICMP_SLT, X, Y); |
2850 | | |
2851 | | // (icmp slt (sub nsw X, Y), 1) -> (icmp sle X, Y) |
2852 | 13 | if (Pred == ICmpInst::ICMP_SLT && C.isOne()) |
2853 | 5 | return new ICmpInst(ICmpInst::ICMP_SLE, X, Y); |
2854 | 13 | } |
2855 | | |
2856 | 144 | if (!match(X, m_APInt(C2))) |
2857 | 79 | return nullptr; |
2858 | | |
2859 | | // C2 - Y <u C -> (Y | (C - 1)) == C2 |
2860 | | // iff (C2 & (C - 1)) == C - 1 and C is a power of 2 |
2861 | 65 | if (Pred == ICmpInst::ICMP_ULT && C.isPowerOf2() && |
2862 | 65 | (*C2 & (C - 1)) == (C - 1)) |
2863 | 3 | return new ICmpInst(ICmpInst::ICMP_EQ, Builder.CreateOr(Y, C - 1), X); |
2864 | | |
2865 | | // C2 - Y >u C -> (Y | C) != C2 |
2866 | | // iff C2 & C == C and C + 1 is a power of 2 |
2867 | 62 | if (Pred == ICmpInst::ICMP_UGT && (C + 1).isPowerOf2() && (*C2 & C) == C) |
2868 | 3 | return new ICmpInst(ICmpInst::ICMP_NE, Builder.CreateOr(Y, C), X); |
2869 | | |
2870 | | // We have handled special cases that reduce. |
2871 | | // Canonicalize any remaining sub to add as: |
2872 | | // (C2 - Y) > C --> (Y + ~C2) < ~C |
2873 | 59 | Value *Add = Builder.CreateAdd(Y, ConstantInt::get(Ty, ~(*C2)), "notsub", |
2874 | 59 | HasNUW, HasNSW); |
2875 | 59 | return new ICmpInst(SwappedPred, Add, ConstantInt::get(Ty, ~C)); |
2876 | 62 | } |
2877 | | |
2878 | | static Value *createLogicFromTable(const std::bitset<4> &Table, Value *Op0, |
2879 | | Value *Op1, IRBuilderBase &Builder, |
2880 | 0 | bool HasOneUse) { |
2881 | 0 | auto FoldConstant = [&](bool Val) { |
2882 | 0 | Constant *Res = Val ? Builder.getTrue() : Builder.getFalse(); |
2883 | 0 | if (Op0->getType()->isVectorTy()) |
2884 | 0 | Res = ConstantVector::getSplat( |
2885 | 0 | cast<VectorType>(Op0->getType())->getElementCount(), Res); |
2886 | 0 | return Res; |
2887 | 0 | }; |
2888 | |
|
2889 | 0 | switch (Table.to_ulong()) { |
2890 | 0 | case 0: // 0 0 0 0 |
2891 | 0 | return FoldConstant(false); |
2892 | 0 | case 1: // 0 0 0 1 |
2893 | 0 | return HasOneUse ? Builder.CreateNot(Builder.CreateOr(Op0, Op1)) : nullptr; |
2894 | 0 | case 2: // 0 0 1 0 |
2895 | 0 | return HasOneUse ? Builder.CreateAnd(Builder.CreateNot(Op0), Op1) : nullptr; |
2896 | 0 | case 3: // 0 0 1 1 |
2897 | 0 | return Builder.CreateNot(Op0); |
2898 | 0 | case 4: // 0 1 0 0 |
2899 | 0 | return HasOneUse ? Builder.CreateAnd(Op0, Builder.CreateNot(Op1)) : nullptr; |
2900 | 0 | case 5: // 0 1 0 1 |
2901 | 0 | return Builder.CreateNot(Op1); |
2902 | 0 | case 6: // 0 1 1 0 |
2903 | 0 | return Builder.CreateXor(Op0, Op1); |
2904 | 0 | case 7: // 0 1 1 1 |
2905 | 0 | return HasOneUse ? Builder.CreateNot(Builder.CreateAnd(Op0, Op1)) : nullptr; |
2906 | 0 | case 8: // 1 0 0 0 |
2907 | 0 | return Builder.CreateAnd(Op0, Op1); |
2908 | 0 | case 9: // 1 0 0 1 |
2909 | 0 | return HasOneUse ? Builder.CreateNot(Builder.CreateXor(Op0, Op1)) : nullptr; |
2910 | 0 | case 10: // 1 0 1 0 |
2911 | 0 | return Op1; |
2912 | 0 | case 11: // 1 0 1 1 |
2913 | 0 | return HasOneUse ? Builder.CreateOr(Builder.CreateNot(Op0), Op1) : nullptr; |
2914 | 0 | case 12: // 1 1 0 0 |
2915 | 0 | return Op0; |
2916 | 0 | case 13: // 1 1 0 1 |
2917 | 0 | return HasOneUse ? Builder.CreateOr(Op0, Builder.CreateNot(Op1)) : nullptr; |
2918 | 0 | case 14: // 1 1 1 0 |
2919 | 0 | return Builder.CreateOr(Op0, Op1); |
2920 | 0 | case 15: // 1 1 1 1 |
2921 | 0 | return FoldConstant(true); |
2922 | 0 | default: |
2923 | 0 | llvm_unreachable("Invalid Operation"); |
2924 | 0 | } |
2925 | 0 | return nullptr; |
2926 | 0 | } |
2927 | | |
2928 | | /// Fold icmp (add X, Y), C. |
2929 | | Instruction *InstCombinerImpl::foldICmpAddConstant(ICmpInst &Cmp, |
2930 | | BinaryOperator *Add, |
2931 | 3.84k | const APInt &C) { |
2932 | 3.84k | Value *Y = Add->getOperand(1); |
2933 | 3.84k | Value *X = Add->getOperand(0); |
2934 | | |
2935 | 3.84k | Value *Op0, *Op1; |
2936 | 3.84k | Instruction *Ext0, *Ext1; |
2937 | 3.84k | const CmpInst::Predicate Pred = Cmp.getPredicate(); |
2938 | 3.84k | if (match(Add, |
2939 | 3.84k | m_Add(m_CombineAnd(m_Instruction(Ext0), m_ZExtOrSExt(m_Value(Op0))), |
2940 | 3.84k | m_CombineAnd(m_Instruction(Ext1), |
2941 | 3.84k | m_ZExtOrSExt(m_Value(Op1))))) && |
2942 | 3.84k | Op0->getType()->isIntOrIntVectorTy(1) && |
2943 | 3.84k | Op1->getType()->isIntOrIntVectorTy(1)) { |
2944 | 0 | unsigned BW = C.getBitWidth(); |
2945 | 0 | std::bitset<4> Table; |
2946 | 0 | auto ComputeTable = [&](bool Op0Val, bool Op1Val) { |
2947 | 0 | int Res = 0; |
2948 | 0 | if (Op0Val) |
2949 | 0 | Res += isa<ZExtInst>(Ext0) ? 1 : -1; |
2950 | 0 | if (Op1Val) |
2951 | 0 | Res += isa<ZExtInst>(Ext1) ? 1 : -1; |
2952 | 0 | return ICmpInst::compare(APInt(BW, Res, true), C, Pred); |
2953 | 0 | }; |
2954 | |
|
2955 | 0 | Table[0] = ComputeTable(false, false); |
2956 | 0 | Table[1] = ComputeTable(false, true); |
2957 | 0 | Table[2] = ComputeTable(true, false); |
2958 | 0 | Table[3] = ComputeTable(true, true); |
2959 | 0 | if (auto *Cond = |
2960 | 0 | createLogicFromTable(Table, Op0, Op1, Builder, Add->hasOneUse())) |
2961 | 0 | return replaceInstUsesWith(Cmp, Cond); |
2962 | 0 | } |
2963 | 3.84k | const APInt *C2; |
2964 | 3.84k | if (Cmp.isEquality() || !match(Y, m_APInt(C2))) |
2965 | 1.54k | return nullptr; |
2966 | | |
2967 | | // Fold icmp pred (add X, C2), C. |
2968 | 2.30k | Type *Ty = Add->getType(); |
2969 | | |
2970 | | // If the add does not wrap, we can always adjust the compare by subtracting |
2971 | | // the constants. Equality comparisons are handled elsewhere. SGE/SLE/UGE/ULE |
2972 | | // are canonicalized to SGT/SLT/UGT/ULT. |
2973 | 2.30k | if ((Add->hasNoSignedWrap() && |
2974 | 2.30k | (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLT)) || |
2975 | 2.30k | (Add->hasNoUnsignedWrap() && |
2976 | 2.26k | (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULT))) { |
2977 | 85 | bool Overflow; |
2978 | 85 | APInt NewC = |
2979 | 85 | Cmp.isSigned() ? C.ssub_ov(*C2, Overflow) : C.usub_ov(*C2, Overflow); |
2980 | | // If there is overflow, the result must be true or false. |
2981 | | // TODO: Can we assert there is no overflow because InstSimplify always |
2982 | | // handles those cases? |
2983 | 85 | if (!Overflow) |
2984 | | // icmp Pred (add nsw X, C2), C --> icmp Pred X, (C - C2) |
2985 | 85 | return new ICmpInst(Pred, X, ConstantInt::get(Ty, NewC)); |
2986 | 85 | } |
2987 | | |
2988 | 2.22k | auto CR = ConstantRange::makeExactICmpRegion(Pred, C).subtract(*C2); |
2989 | 2.22k | const APInt &Upper = CR.getUpper(); |
2990 | 2.22k | const APInt &Lower = CR.getLower(); |
2991 | 2.22k | if (Cmp.isSigned()) { |
2992 | 272 | if (Lower.isSignMask()) |
2993 | 3 | return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantInt::get(Ty, Upper)); |
2994 | 269 | if (Upper.isSignMask()) |
2995 | 4 | return new ICmpInst(ICmpInst::ICMP_SGE, X, ConstantInt::get(Ty, Lower)); |
2996 | 1.94k | } else { |
2997 | 1.94k | if (Lower.isMinValue()) |
2998 | 20 | return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantInt::get(Ty, Upper)); |
2999 | 1.92k | if (Upper.isMinValue()) |
3000 | 1 | return new ICmpInst(ICmpInst::ICMP_UGE, X, ConstantInt::get(Ty, Lower)); |
3001 | 1.92k | } |
3002 | | |
3003 | | // This set of folds is intentionally placed after folds that use no-wrapping |
3004 | | // flags because those folds are likely better for later analysis/codegen. |
3005 | 2.19k | const APInt SMax = APInt::getSignedMaxValue(Ty->getScalarSizeInBits()); |
3006 | 2.19k | const APInt SMin = APInt::getSignedMinValue(Ty->getScalarSizeInBits()); |
3007 | | |
3008 | | // Fold compare with offset to opposite sign compare if it eliminates offset: |
3009 | | // (X + C2) >u C --> X <s -C2 (if C == C2 + SMAX) |
3010 | 2.19k | if (Pred == CmpInst::ICMP_UGT && C == *C2 + SMax) |
3011 | 7 | return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantInt::get(Ty, -(*C2))); |
3012 | | |
3013 | | // (X + C2) <u C --> X >s ~C2 (if C == C2 + SMIN) |
3014 | 2.18k | if (Pred == CmpInst::ICMP_ULT && C == *C2 + SMin) |
3015 | 2 | return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantInt::get(Ty, ~(*C2))); |
3016 | | |
3017 | | // (X + C2) >s C --> X <u (SMAX - C) (if C == C2 - 1) |
3018 | 2.18k | if (Pred == CmpInst::ICMP_SGT && C == *C2 - 1) |
3019 | 15 | return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantInt::get(Ty, SMax - C)); |
3020 | | |
3021 | | // (X + C2) <s C --> X >u (C ^ SMAX) (if C == C2) |
3022 | 2.16k | if (Pred == CmpInst::ICMP_SLT && C == *C2) |
3023 | 11 | return new ICmpInst(ICmpInst::ICMP_UGT, X, ConstantInt::get(Ty, C ^ SMax)); |
3024 | | |
3025 | | // (X + -1) <u C --> X <=u C (if X is never null) |
3026 | 2.15k | if (Pred == CmpInst::ICMP_ULT && C2->isAllOnes()) { |
3027 | 95 | const SimplifyQuery Q = SQ.getWithInstruction(&Cmp); |
3028 | 95 | if (llvm::isKnownNonZero(X, DL, 0, Q.AC, Q.CxtI, Q.DT)) |
3029 | 7 | return new ICmpInst(ICmpInst::ICMP_ULE, X, ConstantInt::get(Ty, C)); |
3030 | 95 | } |
3031 | | |
3032 | 2.15k | if (!Add->hasOneUse()) |
3033 | 295 | return nullptr; |
3034 | | |
3035 | | // X+C <u C2 -> (X & -C2) == C |
3036 | | // iff C & (C2-1) == 0 |
3037 | | // C2 is a power of 2 |
3038 | 1.85k | if (Pred == ICmpInst::ICMP_ULT && C.isPowerOf2() && (*C2 & (C - 1)) == 0) |
3039 | 17 | return new ICmpInst(ICmpInst::ICMP_EQ, Builder.CreateAnd(X, -C), |
3040 | 17 | ConstantExpr::getNeg(cast<Constant>(Y))); |
3041 | | |
3042 | | // X+C >u C2 -> (X & ~C2) != C |
3043 | | // iff C & C2 == 0 |
3044 | | // C2+1 is a power of 2 |
3045 | 1.83k | if (Pred == ICmpInst::ICMP_UGT && (C + 1).isPowerOf2() && (*C2 & C) == 0) |
3046 | 12 | return new ICmpInst(ICmpInst::ICMP_NE, Builder.CreateAnd(X, ~C), |
3047 | 12 | ConstantExpr::getNeg(cast<Constant>(Y))); |
3048 | | |
3049 | | // The range test idiom can use either ult or ugt. Arbitrarily canonicalize |
3050 | | // to the ult form. |
3051 | | // X+C2 >u C -> X+(C2-C-1) <u ~C |
3052 | 1.82k | if (Pred == ICmpInst::ICMP_UGT) |
3053 | 75 | return new ICmpInst(ICmpInst::ICMP_ULT, |
3054 | 75 | Builder.CreateAdd(X, ConstantInt::get(Ty, *C2 - C - 1)), |
3055 | 75 | ConstantInt::get(Ty, ~C)); |
3056 | | |
3057 | 1.75k | return nullptr; |
3058 | 1.82k | } |
3059 | | |
3060 | | bool InstCombinerImpl::matchThreeWayIntCompare(SelectInst *SI, Value *&LHS, |
3061 | | Value *&RHS, ConstantInt *&Less, |
3062 | | ConstantInt *&Equal, |
3063 | 541 | ConstantInt *&Greater) { |
3064 | | // TODO: Generalize this to work with other comparison idioms or ensure |
3065 | | // they get canonicalized into this form. |
3066 | | |
3067 | | // select i1 (a == b), |
3068 | | // i32 Equal, |
3069 | | // i32 (select i1 (a < b), i32 Less, i32 Greater) |
3070 | | // where Equal, Less and Greater are placeholders for any three constants. |
3071 | 541 | ICmpInst::Predicate PredA; |
3072 | 541 | if (!match(SI->getCondition(), m_ICmp(PredA, m_Value(LHS), m_Value(RHS))) || |
3073 | 541 | !ICmpInst::isEquality(PredA)) |
3074 | 411 | return false; |
3075 | 130 | Value *EqualVal = SI->getTrueValue(); |
3076 | 130 | Value *UnequalVal = SI->getFalseValue(); |
3077 | | // We still can get non-canonical predicate here, so canonicalize. |
3078 | 130 | if (PredA == ICmpInst::ICMP_NE) |
3079 | 46 | std::swap(EqualVal, UnequalVal); |
3080 | 130 | if (!match(EqualVal, m_ConstantInt(Equal))) |
3081 | 20 | return false; |
3082 | 110 | ICmpInst::Predicate PredB; |
3083 | 110 | Value *LHS2, *RHS2; |
3084 | 110 | if (!match(UnequalVal, m_Select(m_ICmp(PredB, m_Value(LHS2), m_Value(RHS2)), |
3085 | 110 | m_ConstantInt(Less), m_ConstantInt(Greater)))) |
3086 | 90 | return false; |
3087 | | // We can get predicate mismatch here, so canonicalize if possible: |
3088 | | // First, ensure that 'LHS' match. |
3089 | 20 | if (LHS2 != LHS) { |
3090 | | // x sgt y <--> y slt x |
3091 | 1 | std::swap(LHS2, RHS2); |
3092 | 1 | PredB = ICmpInst::getSwappedPredicate(PredB); |
3093 | 1 | } |
3094 | 20 | if (LHS2 != LHS) |
3095 | 0 | return false; |
3096 | | // We also need to canonicalize 'RHS'. |
3097 | 20 | if (PredB == ICmpInst::ICMP_SGT && isa<Constant>(RHS2)) { |
3098 | | // x sgt C-1 <--> x sge C <--> not(x slt C) |
3099 | 4 | auto FlippedStrictness = |
3100 | 4 | InstCombiner::getFlippedStrictnessPredicateAndConstant( |
3101 | 4 | PredB, cast<Constant>(RHS2)); |
3102 | 4 | if (!FlippedStrictness) |
3103 | 0 | return false; |
3104 | 4 | assert(FlippedStrictness->first == ICmpInst::ICMP_SGE && |
3105 | 4 | "basic correctness failure"); |
3106 | 0 | RHS2 = FlippedStrictness->second; |
3107 | | // And kind-of perform the result swap. |
3108 | 4 | std::swap(Less, Greater); |
3109 | 4 | PredB = ICmpInst::ICMP_SLT; |
3110 | 4 | } |
3111 | 20 | return PredB == ICmpInst::ICMP_SLT && RHS == RHS2; |
3112 | 20 | } |
3113 | | |
3114 | | Instruction *InstCombinerImpl::foldICmpSelectConstant(ICmpInst &Cmp, |
3115 | | SelectInst *Select, |
3116 | 984 | ConstantInt *C) { |
3117 | | |
3118 | 984 | assert(C && "Cmp RHS should be a constant int!"); |
3119 | | // If we're testing a constant value against the result of a three way |
3120 | | // comparison, the result can be expressed directly in terms of the |
3121 | | // original values being compared. Note: We could possibly be more |
3122 | | // aggressive here and remove the hasOneUse test. The original select is |
3123 | | // really likely to simplify or sink when we remove a test of the result. |
3124 | 0 | Value *OrigLHS, *OrigRHS; |
3125 | 984 | ConstantInt *C1LessThan, *C2Equal, *C3GreaterThan; |
3126 | 984 | if (Cmp.hasOneUse() && |
3127 | 984 | matchThreeWayIntCompare(Select, OrigLHS, OrigRHS, C1LessThan, C2Equal, |
3128 | 541 | C3GreaterThan)) { |
3129 | 20 | assert(C1LessThan && C2Equal && C3GreaterThan); |
3130 | | |
3131 | 0 | bool TrueWhenLessThan = |
3132 | 20 | ConstantExpr::getCompare(Cmp.getPredicate(), C1LessThan, C) |
3133 | 20 | ->isAllOnesValue(); |
3134 | 20 | bool TrueWhenEqual = |
3135 | 20 | ConstantExpr::getCompare(Cmp.getPredicate(), C2Equal, C) |
3136 | 20 | ->isAllOnesValue(); |
3137 | 20 | bool TrueWhenGreaterThan = |
3138 | 20 | ConstantExpr::getCompare(Cmp.getPredicate(), C3GreaterThan, C) |
3139 | 20 | ->isAllOnesValue(); |
3140 | | |
3141 | | // This generates the new instruction that will replace the original Cmp |
3142 | | // Instruction. Instead of enumerating the various combinations when |
3143 | | // TrueWhenLessThan, TrueWhenEqual and TrueWhenGreaterThan are true versus |
3144 | | // false, we rely on chaining of ORs and future passes of InstCombine to |
3145 | | // simplify the OR further (i.e. a s< b || a == b becomes a s<= b). |
3146 | | |
3147 | | // When none of the three constants satisfy the predicate for the RHS (C), |
3148 | | // the entire original Cmp can be simplified to a false. |
3149 | 20 | Value *Cond = Builder.getFalse(); |
3150 | 20 | if (TrueWhenLessThan) |
3151 | 0 | Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SLT, |
3152 | 0 | OrigLHS, OrigRHS)); |
3153 | 20 | if (TrueWhenEqual) |
3154 | 0 | Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_EQ, |
3155 | 0 | OrigLHS, OrigRHS)); |
3156 | 20 | if (TrueWhenGreaterThan) |
3157 | 20 | Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SGT, |
3158 | 20 | OrigLHS, OrigRHS)); |
3159 | | |
3160 | 20 | return replaceInstUsesWith(Cmp, Cond); |
3161 | 20 | } |
3162 | 964 | return nullptr; |
3163 | 984 | } |
3164 | | |
3165 | 77.4k | Instruction *InstCombinerImpl::foldICmpBitCast(ICmpInst &Cmp) { |
3166 | 77.4k | auto *Bitcast = dyn_cast<BitCastInst>(Cmp.getOperand(0)); |
3167 | 77.4k | if (!Bitcast) |
3168 | 76.3k | return nullptr; |
3169 | | |
3170 | 1.13k | ICmpInst::Predicate Pred = Cmp.getPredicate(); |
3171 | 1.13k | Value *Op1 = Cmp.getOperand(1); |
3172 | 1.13k | Value *BCSrcOp = Bitcast->getOperand(0); |
3173 | 1.13k | Type *SrcType = Bitcast->getSrcTy(); |
3174 | 1.13k | Type *DstType = Bitcast->getType(); |
3175 | | |
3176 | | // Make sure the bitcast doesn't change between scalar and vector and |
3177 | | // doesn't change the number of vector elements. |
3178 | 1.13k | if (SrcType->isVectorTy() == DstType->isVectorTy() && |
3179 | 1.13k | SrcType->getScalarSizeInBits() == DstType->getScalarSizeInBits()) { |
3180 | | // Zero-equality and sign-bit checks are preserved through sitofp + bitcast. |
3181 | 620 | Value *X; |
3182 | 620 | if (match(BCSrcOp, m_SIToFP(m_Value(X)))) { |
3183 | | // icmp eq (bitcast (sitofp X)), 0 --> icmp eq X, 0 |
3184 | | // icmp ne (bitcast (sitofp X)), 0 --> icmp ne X, 0 |
3185 | | // icmp slt (bitcast (sitofp X)), 0 --> icmp slt X, 0 |
3186 | | // icmp sgt (bitcast (sitofp X)), 0 --> icmp sgt X, 0 |
3187 | 181 | if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_SLT || |
3188 | 181 | Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT) && |
3189 | 181 | match(Op1, m_Zero())) |
3190 | 123 | return new ICmpInst(Pred, X, ConstantInt::getNullValue(X->getType())); |
3191 | | |
3192 | | // icmp slt (bitcast (sitofp X)), 1 --> icmp slt X, 1 |
3193 | 58 | if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_One())) |
3194 | 30 | return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), 1)); |
3195 | | |
3196 | | // icmp sgt (bitcast (sitofp X)), -1 --> icmp sgt X, -1 |
3197 | 28 | if (Pred == ICmpInst::ICMP_SGT && match(Op1, m_AllOnes())) |
3198 | 28 | return new ICmpInst(Pred, X, |
3199 | 28 | ConstantInt::getAllOnesValue(X->getType())); |
3200 | 28 | } |
3201 | | |
3202 | | // Zero-equality checks are preserved through unsigned floating-point casts: |
3203 | | // icmp eq (bitcast (uitofp X)), 0 --> icmp eq X, 0 |
3204 | | // icmp ne (bitcast (uitofp X)), 0 --> icmp ne X, 0 |
3205 | 439 | if (match(BCSrcOp, m_UIToFP(m_Value(X)))) |
3206 | 71 | if (Cmp.isEquality() && match(Op1, m_Zero())) |
3207 | 52 | return new ICmpInst(Pred, X, ConstantInt::getNullValue(X->getType())); |
3208 | | |
3209 | | // If this is a sign-bit test of a bitcast of a casted FP value, eliminate |
3210 | | // the FP extend/truncate because that cast does not change the sign-bit. |
3211 | | // This is true for all standard IEEE-754 types and the X86 80-bit type. |
3212 | | // The sign-bit is always the most significant bit in those types. |
3213 | 387 | const APInt *C; |
3214 | 387 | bool TrueIfSigned; |
3215 | 387 | if (match(Op1, m_APInt(C)) && Bitcast->hasOneUse() && |
3216 | 387 | isSignBitCheck(Pred, *C, TrueIfSigned)) { |
3217 | 143 | if (match(BCSrcOp, m_FPExt(m_Value(X))) || |
3218 | 143 | match(BCSrcOp, m_FPTrunc(m_Value(X)))) { |
3219 | | // (bitcast (fpext/fptrunc X)) to iX) < 0 --> (bitcast X to iY) < 0 |
3220 | | // (bitcast (fpext/fptrunc X)) to iX) > -1 --> (bitcast X to iY) > -1 |
3221 | 23 | Type *XType = X->getType(); |
3222 | | |
3223 | | // We can't currently handle Power style floating point operations here. |
3224 | 23 | if (!(XType->isPPC_FP128Ty() || SrcType->isPPC_FP128Ty())) { |
3225 | 21 | Type *NewType = Builder.getIntNTy(XType->getScalarSizeInBits()); |
3226 | 21 | if (auto *XVTy = dyn_cast<VectorType>(XType)) |
3227 | 19 | NewType = VectorType::get(NewType, XVTy->getElementCount()); |
3228 | 21 | Value *NewBitcast = Builder.CreateBitCast(X, NewType); |
3229 | 21 | if (TrueIfSigned) |
3230 | 20 | return new ICmpInst(ICmpInst::ICMP_SLT, NewBitcast, |
3231 | 20 | ConstantInt::getNullValue(NewType)); |
3232 | 1 | else |
3233 | 1 | return new ICmpInst(ICmpInst::ICMP_SGT, NewBitcast, |
3234 | 1 | ConstantInt::getAllOnesValue(NewType)); |
3235 | 21 | } |
3236 | 23 | } |
3237 | 143 | } |
3238 | 387 | } |
3239 | | |
3240 | 881 | const APInt *C; |
3241 | 881 | if (!match(Cmp.getOperand(1), m_APInt(C)) || !DstType->isIntegerTy() || |
3242 | 881 | !SrcType->isIntOrIntVectorTy()) |
3243 | 465 | return nullptr; |
3244 | | |
3245 | | // If this is checking if all elements of a vector compare are set or not, |
3246 | | // invert the casted vector equality compare and test if all compare |
3247 | | // elements are clear or not. Compare against zero is generally easier for |
3248 | | // analysis and codegen. |
3249 | | // icmp eq/ne (bitcast (not X) to iN), -1 --> icmp eq/ne (bitcast X to iN), 0 |
3250 | | // Example: are all elements equal? --> are zero elements not equal? |
3251 | | // TODO: Try harder to reduce compare of 2 freely invertible operands? |
3252 | 416 | if (Cmp.isEquality() && C->isAllOnes() && Bitcast->hasOneUse()) { |
3253 | 173 | if (Value *NotBCSrcOp = |
3254 | 173 | getFreelyInverted(BCSrcOp, BCSrcOp->hasOneUse(), &Builder)) { |
3255 | 51 | Value *Cast = Builder.CreateBitCast(NotBCSrcOp, DstType); |
3256 | 51 | return new ICmpInst(Pred, Cast, ConstantInt::getNullValue(DstType)); |
3257 | 51 | } |
3258 | 173 | } |
3259 | | |
3260 | | // If this is checking if all elements of an extended vector are clear or not, |
3261 | | // compare in a narrow type to eliminate the extend: |
3262 | | // icmp eq/ne (bitcast (ext X) to iN), 0 --> icmp eq/ne (bitcast X to iM), 0 |
3263 | 365 | Value *X; |
3264 | 365 | if (Cmp.isEquality() && C->isZero() && Bitcast->hasOneUse() && |
3265 | 365 | match(BCSrcOp, m_ZExtOrSExt(m_Value(X)))) { |
3266 | 0 | if (auto *VecTy = dyn_cast<FixedVectorType>(X->getType())) { |
3267 | 0 | Type *NewType = Builder.getIntNTy(VecTy->getPrimitiveSizeInBits()); |
3268 | 0 | Value *NewCast = Builder.CreateBitCast(X, NewType); |
3269 | 0 | return new ICmpInst(Pred, NewCast, ConstantInt::getNullValue(NewType)); |
3270 | 0 | } |
3271 | 0 | } |
3272 | | |
3273 | | // Folding: icmp <pred> iN X, C |
3274 | | // where X = bitcast <M x iK> (shufflevector <M x iK> %vec, undef, SC)) to iN |
3275 | | // and C is a splat of a K-bit pattern |
3276 | | // and SC is a constant vector = <C', C', C', ..., C'> |
3277 | | // Into: |
3278 | | // %E = extractelement <M x iK> %vec, i32 C' |
3279 | | // icmp <pred> iK %E, trunc(C) |
3280 | 365 | Value *Vec; |
3281 | 365 | ArrayRef<int> Mask; |
3282 | 365 | if (match(BCSrcOp, m_Shuffle(m_Value(Vec), m_Undef(), m_Mask(Mask)))) { |
3283 | | // Check whether every element of Mask is the same constant |
3284 | 74 | if (all_equal(Mask)) { |
3285 | 32 | auto *VecTy = cast<VectorType>(SrcType); |
3286 | 32 | auto *EltTy = cast<IntegerType>(VecTy->getElementType()); |
3287 | 32 | if (C->isSplat(EltTy->getBitWidth())) { |
3288 | | // Fold the icmp based on the value of C |
3289 | | // If C is M copies of an iK sized bit pattern, |
3290 | | // then: |
3291 | | // => %E = extractelement <N x iK> %vec, i32 Elem |
3292 | | // icmp <pred> iK %SplatVal, <pattern> |
3293 | 19 | Value *Elem = Builder.getInt32(Mask[0]); |
3294 | 19 | Value *Extract = Builder.CreateExtractElement(Vec, Elem); |
3295 | 19 | Value *NewC = ConstantInt::get(EltTy, C->trunc(EltTy->getBitWidth())); |
3296 | 19 | return new ICmpInst(Pred, Extract, NewC); |
3297 | 19 | } |
3298 | 32 | } |
3299 | 74 | } |
3300 | 346 | return nullptr; |
3301 | 365 | } |
3302 | | |
3303 | | /// Try to fold integer comparisons with a constant operand: icmp Pred X, C |
3304 | | /// where X is some kind of instruction. |
3305 | 81.6k | Instruction *InstCombinerImpl::foldICmpInstWithConstant(ICmpInst &Cmp) { |
3306 | 81.6k | const APInt *C; |
3307 | | |
3308 | 81.6k | if (match(Cmp.getOperand(1), m_APInt(C))) { |
3309 | 50.3k | if (auto *BO = dyn_cast<BinaryOperator>(Cmp.getOperand(0))) |
3310 | 18.0k | if (Instruction *I = foldICmpBinOpWithConstant(Cmp, BO, *C)) |
3311 | 2.69k | return I; |
3312 | | |
3313 | 47.6k | if (auto *SI = dyn_cast<SelectInst>(Cmp.getOperand(0))) |
3314 | | // For now, we only support constant integers while folding the |
3315 | | // ICMP(SELECT)) pattern. We can extend this to support vector of integers |
3316 | | // similar to the cases handled by binary ops above. |
3317 | 999 | if (auto *ConstRHS = dyn_cast<ConstantInt>(Cmp.getOperand(1))) |
3318 | 984 | if (Instruction *I = foldICmpSelectConstant(Cmp, SI, ConstRHS)) |
3319 | 20 | return I; |
3320 | | |
3321 | 47.6k | if (auto *TI = dyn_cast<TruncInst>(Cmp.getOperand(0))) |
3322 | 739 | if (Instruction *I = foldICmpTruncConstant(Cmp, TI, *C)) |
3323 | 325 | return I; |
3324 | | |
3325 | 47.3k | if (auto *II = dyn_cast<IntrinsicInst>(Cmp.getOperand(0))) |
3326 | 1.32k | if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, II, *C)) |
3327 | 239 | return I; |
3328 | | |
3329 | | // (extractval ([s/u]subo X, Y), 0) == 0 --> X == Y |
3330 | | // (extractval ([s/u]subo X, Y), 0) != 0 --> X != Y |
3331 | | // TODO: This checks one-use, but that is not strictly necessary. |
3332 | 47.0k | Value *Cmp0 = Cmp.getOperand(0); |
3333 | 47.0k | Value *X, *Y; |
3334 | 47.0k | if (C->isZero() && Cmp.isEquality() && Cmp0->hasOneUse() && |
3335 | 47.0k | (match(Cmp0, |
3336 | 7.79k | m_ExtractValue<0>(m_Intrinsic<Intrinsic::ssub_with_overflow>( |
3337 | 7.79k | m_Value(X), m_Value(Y)))) || |
3338 | 7.79k | match(Cmp0, |
3339 | 7.79k | m_ExtractValue<0>(m_Intrinsic<Intrinsic::usub_with_overflow>( |
3340 | 7.79k | m_Value(X), m_Value(Y)))))) |
3341 | 0 | return new ICmpInst(Cmp.getPredicate(), X, Y); |
3342 | 47.0k | } |
3343 | | |
3344 | 78.4k | if (match(Cmp.getOperand(1), m_APIntAllowUndef(C))) |
3345 | 47.3k | return foldICmpInstWithConstantAllowUndef(Cmp, *C); |
3346 | | |
3347 | 31.0k | return nullptr; |
3348 | 78.4k | } |
3349 | | |
3350 | | /// Fold an icmp equality instruction with binary operator LHS and constant RHS: |
3351 | | /// icmp eq/ne BO, C. |
3352 | | Instruction *InstCombinerImpl::foldICmpBinOpEqualityWithConstant( |
3353 | 15.5k | ICmpInst &Cmp, BinaryOperator *BO, const APInt &C) { |
3354 | | // TODO: Some of these folds could work with arbitrary constants, but this |
3355 | | // function is limited to scalar and vector splat constants. |
3356 | 15.5k | if (!Cmp.isEquality()) |
3357 | 5.41k | return nullptr; |
3358 | | |
3359 | 10.1k | ICmpInst::Predicate Pred = Cmp.getPredicate(); |
3360 | 10.1k | bool isICMP_NE = Pred == ICmpInst::ICMP_NE; |
3361 | 10.1k | Constant *RHS = cast<Constant>(Cmp.getOperand(1)); |
3362 | 10.1k | Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1); |
3363 | | |
3364 | 10.1k | switch (BO->getOpcode()) { |
3365 | 110 | case Instruction::SRem: |
3366 | | // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one. |
3367 | 110 | if (C.isZero() && BO->hasOneUse()) { |
3368 | 26 | const APInt *BOC; |
3369 | 26 | if (match(BOp1, m_APInt(BOC)) && BOC->sgt(1) && BOC->isPowerOf2()) { |
3370 | 0 | Value *NewRem = Builder.CreateURem(BOp0, BOp1, BO->getName()); |
3371 | 0 | return new ICmpInst(Pred, NewRem, |
3372 | 0 | Constant::getNullValue(BO->getType())); |
3373 | 0 | } |
3374 | 26 | } |
3375 | 110 | break; |
3376 | 1.22k | case Instruction::Add: { |
3377 | | // (A + C2) == C --> A == (C - C2) |
3378 | | // (A + C2) != C --> A != (C - C2) |
3379 | | // TODO: Remove the one-use limitation? See discussion in D58633. |
3380 | 1.22k | if (Constant *C2 = dyn_cast<Constant>(BOp1)) { |
3381 | 409 | if (BO->hasOneUse()) |
3382 | 86 | return new ICmpInst(Pred, BOp0, ConstantExpr::getSub(RHS, C2)); |
3383 | 814 | } else if (C.isZero()) { |
3384 | | // Replace ((add A, B) != 0) with (A != -B) if A or B is |
3385 | | // efficiently invertible, or if the add has just this one use. |
3386 | 569 | if (Value *NegVal = dyn_castNegVal(BOp1)) |
3387 | 0 | return new ICmpInst(Pred, BOp0, NegVal); |
3388 | 569 | if (Value *NegVal = dyn_castNegVal(BOp0)) |
3389 | 0 | return new ICmpInst(Pred, NegVal, BOp1); |
3390 | 569 | if (BO->hasOneUse()) { |
3391 | 19 | Value *Neg = Builder.CreateNeg(BOp1); |
3392 | 19 | Neg->takeName(BO); |
3393 | 19 | return new ICmpInst(Pred, BOp0, Neg); |
3394 | 19 | } |
3395 | 569 | } |
3396 | 1.11k | break; |
3397 | 1.22k | } |
3398 | 1.11k | case Instruction::Xor: |
3399 | 323 | if (BO->hasOneUse()) { |
3400 | 122 | if (Constant *BOC = dyn_cast<Constant>(BOp1)) { |
3401 | | // For the xor case, we can xor two constants together, eliminating |
3402 | | // the explicit xor. |
3403 | 50 | return new ICmpInst(Pred, BOp0, ConstantExpr::getXor(RHS, BOC)); |
3404 | 72 | } else if (C.isZero()) { |
3405 | | // Replace ((xor A, B) != 0) with (A != B) |
3406 | 14 | return new ICmpInst(Pred, BOp0, BOp1); |
3407 | 14 | } |
3408 | 122 | } |
3409 | 259 | break; |
3410 | 259 | case Instruction::Or: { |
3411 | 163 | const APInt *BOC; |
3412 | 163 | if (match(BOp1, m_APInt(BOC)) && BO->hasOneUse() && RHS->isAllOnesValue()) { |
3413 | | // Comparing if all bits outside of a constant mask are set? |
3414 | | // Replace (X | C) == -1 with (X & ~C) == ~C. |
3415 | | // This removes the -1 constant. |
3416 | 0 | Constant *NotBOC = ConstantExpr::getNot(cast<Constant>(BOp1)); |
3417 | 0 | Value *And = Builder.CreateAnd(BOp0, NotBOC); |
3418 | 0 | return new ICmpInst(Pred, And, NotBOC); |
3419 | 0 | } |
3420 | 163 | break; |
3421 | 163 | } |
3422 | 163 | case Instruction::UDiv: |
3423 | 186 | case Instruction::SDiv: |
3424 | 186 | if (BO->isExact()) { |
3425 | | // div exact X, Y eq/ne 0 -> X eq/ne 0 |
3426 | | // div exact X, Y eq/ne 1 -> X eq/ne Y |
3427 | | // div exact X, Y eq/ne C -> |
3428 | | // if Y * C never-overflow && OneUse: |
3429 | | // -> Y * C eq/ne X |
3430 | 9 | if (C.isZero()) |
3431 | 0 | return new ICmpInst(Pred, BOp0, Constant::getNullValue(BO->getType())); |
3432 | 9 | else if (C.isOne()) |
3433 | 0 | return new ICmpInst(Pred, BOp0, BOp1); |
3434 | 9 | else if (BO->hasOneUse()) { |
3435 | 0 | OverflowResult OR = computeOverflow( |
3436 | 0 | Instruction::Mul, BO->getOpcode() == Instruction::SDiv, BOp1, |
3437 | 0 | Cmp.getOperand(1), BO); |
3438 | 0 | if (OR == OverflowResult::NeverOverflows) { |
3439 | 0 | Value *YC = |
3440 | 0 | Builder.CreateMul(BOp1, ConstantInt::get(BO->getType(), C)); |
3441 | 0 | return new ICmpInst(Pred, YC, BOp0); |
3442 | 0 | } |
3443 | 0 | } |
3444 | 9 | } |
3445 | 186 | if (BO->getOpcode() == Instruction::UDiv && C.isZero()) { |
3446 | | // (icmp eq/ne (udiv A, B), 0) -> (icmp ugt/ule i32 B, A) |
3447 | 56 | auto NewPred = isICMP_NE ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_UGT; |
3448 | 56 | return new ICmpInst(NewPred, BOp1, BOp0); |
3449 | 56 | } |
3450 | 130 | break; |
3451 | 8.14k | default: |
3452 | 8.14k | break; |
3453 | 10.1k | } |
3454 | 9.92k | return nullptr; |
3455 | 10.1k | } |
3456 | | |
3457 | | static Instruction *foldCtpopPow2Test(ICmpInst &I, IntrinsicInst *CtpopLhs, |
3458 | | const APInt &CRhs, |
3459 | | InstCombiner::BuilderTy &Builder, |
3460 | 622 | const SimplifyQuery &Q) { |
3461 | 622 | assert(CtpopLhs->getIntrinsicID() == Intrinsic::ctpop && |
3462 | 622 | "Non-ctpop intrin in ctpop fold"); |
3463 | 622 | if (!CtpopLhs->hasOneUse()) |
3464 | 57 | return nullptr; |
3465 | | |
3466 | | // Power of 2 test: |
3467 | | // isPow2OrZero : ctpop(X) u< 2 |
3468 | | // isPow2 : ctpop(X) == 1 |
3469 | | // NotPow2OrZero: ctpop(X) u> 1 |
3470 | | // NotPow2 : ctpop(X) != 1 |
3471 | | // If we know any bit of X can be folded to: |
3472 | | // IsPow2 : X & (~Bit) == 0 |
3473 | | // NotPow2 : X & (~Bit) != 0 |
3474 | 565 | const ICmpInst::Predicate Pred = I.getPredicate(); |
3475 | 565 | if (((I.isEquality() || Pred == ICmpInst::ICMP_UGT) && CRhs == 1) || |
3476 | 565 | (Pred == ICmpInst::ICMP_ULT && CRhs == 2)) { |
3477 | 481 | Value *Op = CtpopLhs->getArgOperand(0); |
3478 | 481 | KnownBits OpKnown = computeKnownBits(Op, Q.DL, |
3479 | 481 | /*Depth*/ 0, Q.AC, Q.CxtI, Q.DT); |
3480 | | // No need to check for count > 1, that should be already constant folded. |
3481 | 481 | if (OpKnown.countMinPopulation() == 1) { |
3482 | 0 | Value *And = Builder.CreateAnd( |
3483 | 0 | Op, Constant::getIntegerValue(Op->getType(), ~(OpKnown.One))); |
3484 | 0 | return new ICmpInst( |
3485 | 0 | (Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_ULT) |
3486 | 0 | ? ICmpInst::ICMP_EQ |
3487 | 0 | : ICmpInst::ICMP_NE, |
3488 | 0 | And, Constant::getNullValue(Op->getType())); |
3489 | 0 | } |
3490 | 481 | } |
3491 | | |
3492 | 565 | return nullptr; |
3493 | 565 | } |
3494 | | |
3495 | | /// Fold an equality icmp with LLVM intrinsic and constant operand. |
3496 | | Instruction *InstCombinerImpl::foldICmpEqIntrinsicWithConstant( |
3497 | 590 | ICmpInst &Cmp, IntrinsicInst *II, const APInt &C) { |
3498 | 590 | Type *Ty = II->getType(); |
3499 | 590 | unsigned BitWidth = C.getBitWidth(); |
3500 | 590 | const ICmpInst::Predicate Pred = Cmp.getPredicate(); |
3501 | | |
3502 | 590 | switch (II->getIntrinsicID()) { |
3503 | 35 | case Intrinsic::abs: |
3504 | | // abs(A) == 0 -> A == 0 |
3505 | | // abs(A) == INT_MIN -> A == INT_MIN |
3506 | 35 | if (C.isZero() || C.isMinSignedValue()) |
3507 | 35 | return new ICmpInst(Pred, II->getArgOperand(0), ConstantInt::get(Ty, C)); |
3508 | 0 | break; |
3509 | | |
3510 | 21 | case Intrinsic::bswap: |
3511 | | // bswap(A) == C -> A == bswap(C) |
3512 | 21 | return new ICmpInst(Pred, II->getArgOperand(0), |
3513 | 21 | ConstantInt::get(Ty, C.byteSwap())); |
3514 | | |
3515 | 6 | case Intrinsic::bitreverse: |
3516 | | // bitreverse(A) == C -> A == bitreverse(C) |
3517 | 6 | return new ICmpInst(Pred, II->getArgOperand(0), |
3518 | 6 | ConstantInt::get(Ty, C.reverseBits())); |
3519 | | |
3520 | 54 | case Intrinsic::ctlz: |
3521 | 111 | case Intrinsic::cttz: { |
3522 | | // ctz(A) == bitwidth(A) -> A == 0 and likewise for != |
3523 | 111 | if (C == BitWidth) |
3524 | 15 | return new ICmpInst(Pred, II->getArgOperand(0), |
3525 | 15 | ConstantInt::getNullValue(Ty)); |
3526 | | |
3527 | | // ctz(A) == C -> A & Mask1 == Mask2, where Mask2 only has bit C set |
3528 | | // and Mask1 has bits 0..C+1 set. Similar for ctl, but for high bits. |
3529 | | // Limit to one use to ensure we don't increase instruction count. |
3530 | 96 | unsigned Num = C.getLimitedValue(BitWidth); |
3531 | 96 | if (Num != BitWidth && II->hasOneUse()) { |
3532 | 79 | bool IsTrailing = II->getIntrinsicID() == Intrinsic::cttz; |
3533 | 79 | APInt Mask1 = IsTrailing ? APInt::getLowBitsSet(BitWidth, Num + 1) |
3534 | 79 | : APInt::getHighBitsSet(BitWidth, Num + 1); |
3535 | 79 | APInt Mask2 = IsTrailing |
3536 | 79 | ? APInt::getOneBitSet(BitWidth, Num) |
3537 | 79 | : APInt::getOneBitSet(BitWidth, BitWidth - Num - 1); |
3538 | 79 | return new ICmpInst(Pred, Builder.CreateAnd(II->getArgOperand(0), Mask1), |
3539 | 79 | ConstantInt::get(Ty, Mask2)); |
3540 | 79 | } |
3541 | 17 | break; |
3542 | 96 | } |
3543 | | |
3544 | 195 | case Intrinsic::ctpop: { |
3545 | | // popcount(A) == 0 -> A == 0 and likewise for != |
3546 | | // popcount(A) == bitwidth(A) -> A == -1 and likewise for != |
3547 | 195 | bool IsZero = C.isZero(); |
3548 | 195 | if (IsZero || C == BitWidth) |
3549 | 17 | return new ICmpInst(Pred, II->getArgOperand(0), |
3550 | 17 | IsZero ? Constant::getNullValue(Ty) |
3551 | 17 | : Constant::getAllOnesValue(Ty)); |
3552 | | |
3553 | 178 | break; |
3554 | 195 | } |
3555 | | |
3556 | 178 | case Intrinsic::fshl: |
3557 | 15 | case Intrinsic::fshr: |
3558 | 15 | if (II->getArgOperand(0) == II->getArgOperand(1)) { |
3559 | 2 | const APInt *RotAmtC; |
3560 | | // ror(X, RotAmtC) == C --> X == rol(C, RotAmtC) |
3561 | | // rol(X, RotAmtC) == C --> X == ror(C, RotAmtC) |
3562 | 2 | if (match(II->getArgOperand(2), m_APInt(RotAmtC))) |
3563 | 2 | return new ICmpInst(Pred, II->getArgOperand(0), |
3564 | 2 | II->getIntrinsicID() == Intrinsic::fshl |
3565 | 2 | ? ConstantInt::get(Ty, C.rotr(*RotAmtC)) |
3566 | 2 | : ConstantInt::get(Ty, C.rotl(*RotAmtC))); |
3567 | 2 | } |
3568 | 13 | break; |
3569 | | |
3570 | 30 | case Intrinsic::umax: |
3571 | 39 | case Intrinsic::uadd_sat: { |
3572 | | // uadd.sat(a, b) == 0 -> (a | b) == 0 |
3573 | | // umax(a, b) == 0 -> (a | b) == 0 |
3574 | 39 | if (C.isZero() && II->hasOneUse()) { |
3575 | 7 | Value *Or = Builder.CreateOr(II->getArgOperand(0), II->getArgOperand(1)); |
3576 | 7 | return new ICmpInst(Pred, Or, Constant::getNullValue(Ty)); |
3577 | 7 | } |
3578 | 32 | break; |
3579 | 39 | } |
3580 | | |
3581 | 32 | case Intrinsic::ssub_sat: |
3582 | | // ssub.sat(a, b) == 0 -> a == b |
3583 | 5 | if (C.isZero()) |
3584 | 0 | return new ICmpInst(Pred, II->getArgOperand(0), II->getArgOperand(1)); |
3585 | 5 | break; |
3586 | 8 | case Intrinsic::usub_sat: { |
3587 | | // usub.sat(a, b) == 0 -> a <= b |
3588 | 8 | if (C.isZero()) { |
3589 | 8 | ICmpInst::Predicate NewPred = |
3590 | 8 | Pred == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_UGT; |
3591 | 8 | return new ICmpInst(NewPred, II->getArgOperand(0), II->getArgOperand(1)); |
3592 | 8 | } |
3593 | 0 | break; |
3594 | 8 | } |
3595 | 155 | default: |
3596 | 155 | break; |
3597 | 590 | } |
3598 | | |
3599 | 400 | return nullptr; |
3600 | 590 | } |
3601 | | |
3602 | | /// Fold an icmp with LLVM intrinsics |
3603 | | static Instruction * |
3604 | | foldICmpIntrinsicWithIntrinsic(ICmpInst &Cmp, |
3605 | 34.9k | InstCombiner::BuilderTy &Builder) { |
3606 | 34.9k | assert(Cmp.isEquality()); |
3607 | | |
3608 | 0 | ICmpInst::Predicate Pred = Cmp.getPredicate(); |
3609 | 34.9k | Value *Op0 = Cmp.getOperand(0); |
3610 | 34.9k | Value *Op1 = Cmp.getOperand(1); |
3611 | 34.9k | const auto *IIOp0 = dyn_cast<IntrinsicInst>(Op0); |
3612 | 34.9k | const auto *IIOp1 = dyn_cast<IntrinsicInst>(Op1); |
3613 | 34.9k | if (!IIOp0 || !IIOp1 || IIOp0->getIntrinsicID() != IIOp1->getIntrinsicID()) |
3614 | 34.9k | return nullptr; |
3615 | | |
3616 | 22 | switch (IIOp0->getIntrinsicID()) { |
3617 | 4 | case Intrinsic::bswap: |
3618 | 8 | case Intrinsic::bitreverse: |
3619 | | // If both operands are byte-swapped or bit-reversed, just compare the |
3620 | | // original values. |
3621 | 8 | return new ICmpInst(Pred, IIOp0->getOperand(0), IIOp1->getOperand(0)); |
3622 | 0 | case Intrinsic::fshl: |
3623 | 0 | case Intrinsic::fshr: { |
3624 | | // If both operands are rotated by same amount, just compare the |
3625 | | // original values. |
3626 | 0 | if (IIOp0->getOperand(0) != IIOp0->getOperand(1)) |
3627 | 0 | break; |
3628 | 0 | if (IIOp1->getOperand(0) != IIOp1->getOperand(1)) |
3629 | 0 | break; |
3630 | 0 | if (IIOp0->getOperand(2) == IIOp1->getOperand(2)) |
3631 | 0 | return new ICmpInst(Pred, IIOp0->getOperand(0), IIOp1->getOperand(0)); |
3632 | | |
3633 | | // rotate(X, AmtX) == rotate(Y, AmtY) |
3634 | | // -> rotate(X, AmtX - AmtY) == Y |
3635 | | // Do this if either both rotates have one use or if only one has one use |
3636 | | // and AmtX/AmtY are constants. |
3637 | 0 | unsigned OneUses = IIOp0->hasOneUse() + IIOp1->hasOneUse(); |
3638 | 0 | if (OneUses == 2 || |
3639 | 0 | (OneUses == 1 && match(IIOp0->getOperand(2), m_ImmConstant()) && |
3640 | 0 | match(IIOp1->getOperand(2), m_ImmConstant()))) { |
3641 | 0 | Value *SubAmt = |
3642 | 0 | Builder.CreateSub(IIOp0->getOperand(2), IIOp1->getOperand(2)); |
3643 | 0 | Value *CombinedRotate = Builder.CreateIntrinsic( |
3644 | 0 | Op0->getType(), IIOp0->getIntrinsicID(), |
3645 | 0 | {IIOp0->getOperand(0), IIOp0->getOperand(0), SubAmt}); |
3646 | 0 | return new ICmpInst(Pred, IIOp1->getOperand(0), CombinedRotate); |
3647 | 0 | } |
3648 | 0 | } break; |
3649 | 14 | default: |
3650 | 14 | break; |
3651 | 22 | } |
3652 | | |
3653 | 14 | return nullptr; |
3654 | 22 | } |
3655 | | |
3656 | | /// Try to fold integer comparisons with a constant operand: icmp Pred X, C |
3657 | | /// where X is some kind of instruction and C is AllowUndef. |
3658 | | /// TODO: Move more folds which allow undef to this function. |
3659 | | Instruction * |
3660 | | InstCombinerImpl::foldICmpInstWithConstantAllowUndef(ICmpInst &Cmp, |
3661 | 47.3k | const APInt &C) { |
3662 | 47.3k | const ICmpInst::Predicate Pred = Cmp.getPredicate(); |
3663 | 47.3k | if (auto *II = dyn_cast<IntrinsicInst>(Cmp.getOperand(0))) { |
3664 | 1.08k | switch (II->getIntrinsicID()) { |
3665 | 1.05k | default: |
3666 | 1.05k | break; |
3667 | 1.05k | case Intrinsic::fshl: |
3668 | 26 | case Intrinsic::fshr: |
3669 | 26 | if (Cmp.isEquality() && II->getArgOperand(0) == II->getArgOperand(1)) { |
3670 | | // (rot X, ?) == 0/-1 --> X == 0/-1 |
3671 | 0 | if (C.isZero() || C.isAllOnes()) |
3672 | 0 | return new ICmpInst(Pred, II->getArgOperand(0), Cmp.getOperand(1)); |
3673 | 0 | } |
3674 | 26 | break; |
3675 | 1.08k | } |
3676 | 1.08k | } |
3677 | | |
3678 | 47.3k | return nullptr; |
3679 | 47.3k | } |
3680 | | |
3681 | | /// Fold an icmp with BinaryOp and constant operand: icmp Pred BO, C. |
3682 | | Instruction *InstCombinerImpl::foldICmpBinOpWithConstant(ICmpInst &Cmp, |
3683 | | BinaryOperator *BO, |
3684 | 18.0k | const APInt &C) { |
3685 | 18.0k | switch (BO->getOpcode()) { |
3686 | 775 | case Instruction::Xor: |
3687 | 775 | if (Instruction *I = foldICmpXorConstant(Cmp, BO, C)) |
3688 | 92 | return I; |
3689 | 683 | break; |
3690 | 7.86k | case Instruction::And: |
3691 | 7.86k | if (Instruction *I = foldICmpAndConstant(Cmp, BO, C)) |
3692 | 531 | return I; |
3693 | 7.33k | break; |
3694 | 7.33k | case Instruction::Or: |
3695 | 393 | if (Instruction *I = foldICmpOrConstant(Cmp, BO, C)) |
3696 | 75 | return I; |
3697 | 318 | break; |
3698 | 1.20k | case Instruction::Mul: |
3699 | 1.20k | if (Instruction *I = foldICmpMulConstant(Cmp, BO, C)) |
3700 | 71 | return I; |
3701 | 1.13k | break; |
3702 | 1.13k | case Instruction::Shl: |
3703 | 1.12k | if (Instruction *I = foldICmpShlConstant(Cmp, BO, C)) |
3704 | 508 | return I; |
3705 | 621 | break; |
3706 | 621 | case Instruction::LShr: |
3707 | 736 | case Instruction::AShr: |
3708 | 736 | if (Instruction *I = foldICmpShrConstant(Cmp, BO, C)) |
3709 | 288 | return I; |
3710 | 448 | break; |
3711 | 448 | case Instruction::SRem: |
3712 | 195 | if (Instruction *I = foldICmpSRemConstant(Cmp, BO, C)) |
3713 | 6 | return I; |
3714 | 189 | break; |
3715 | 265 | case Instruction::UDiv: |
3716 | 265 | if (Instruction *I = foldICmpUDivConstant(Cmp, BO, C)) |
3717 | 54 | return I; |
3718 | 265 | [[fallthrough]]; |
3719 | 514 | case Instruction::SDiv: |
3720 | 514 | if (Instruction *I = foldICmpDivConstant(Cmp, BO, C)) |
3721 | 203 | return I; |
3722 | 311 | break; |
3723 | 1.20k | case Instruction::Sub: |
3724 | 1.20k | if (Instruction *I = foldICmpSubConstant(Cmp, BO, C)) |
3725 | 379 | return I; |
3726 | 830 | break; |
3727 | 3.84k | case Instruction::Add: |
3728 | 3.84k | if (Instruction *I = foldICmpAddConstant(Cmp, BO, C)) |
3729 | 259 | return I; |
3730 | 3.59k | break; |
3731 | 3.59k | default: |
3732 | 98 | break; |
3733 | 18.0k | } |
3734 | | |
3735 | | // TODO: These folds could be refactored to be part of the above calls. |
3736 | 15.5k | return foldICmpBinOpEqualityWithConstant(Cmp, BO, C); |
3737 | 18.0k | } |
3738 | | |
3739 | | static Instruction * |
3740 | | foldICmpUSubSatOrUAddSatWithConstant(ICmpInst::Predicate Pred, |
3741 | | SaturatingInst *II, const APInt &C, |
3742 | 18 | InstCombiner::BuilderTy &Builder) { |
3743 | | // This transform may end up producing more than one instruction for the |
3744 | | // intrinsic, so limit it to one user of the intrinsic. |
3745 | 18 | if (!II->hasOneUse()) |
3746 | 6 | return nullptr; |
3747 | | |
3748 | | // Let Y = [add/sub]_sat(X, C) pred C2 |
3749 | | // SatVal = The saturating value for the operation |
3750 | | // WillWrap = Whether or not the operation will underflow / overflow |
3751 | | // => Y = (WillWrap ? SatVal : (X binop C)) pred C2 |
3752 | | // => Y = WillWrap ? (SatVal pred C2) : ((X binop C) pred C2) |
3753 | | // |
3754 | | // When (SatVal pred C2) is true, then |
3755 | | // Y = WillWrap ? true : ((X binop C) pred C2) |
3756 | | // => Y = WillWrap || ((X binop C) pred C2) |
3757 | | // else |
3758 | | // Y = WillWrap ? false : ((X binop C) pred C2) |
3759 | | // => Y = !WillWrap ? ((X binop C) pred C2) : false |
3760 | | // => Y = !WillWrap && ((X binop C) pred C2) |
3761 | 12 | Value *Op0 = II->getOperand(0); |
3762 | 12 | Value *Op1 = II->getOperand(1); |
3763 | | |
3764 | 12 | const APInt *COp1; |
3765 | | // This transform only works when the intrinsic has an integral constant or |
3766 | | // splat vector as the second operand. |
3767 | 12 | if (!match(Op1, m_APInt(COp1))) |
3768 | 12 | return nullptr; |
3769 | | |
3770 | 0 | APInt SatVal; |
3771 | 0 | switch (II->getIntrinsicID()) { |
3772 | 0 | default: |
3773 | 0 | llvm_unreachable( |
3774 | 0 | "This function only works with usub_sat and uadd_sat for now!"); |
3775 | 0 | case Intrinsic::uadd_sat: |
3776 | 0 | SatVal = APInt::getAllOnes(C.getBitWidth()); |
3777 | 0 | break; |
3778 | 0 | case Intrinsic::usub_sat: |
3779 | 0 | SatVal = APInt::getZero(C.getBitWidth()); |
3780 | 0 | break; |
3781 | 0 | } |
3782 | | |
3783 | | // Check (SatVal pred C2) |
3784 | 0 | bool SatValCheck = ICmpInst::compare(SatVal, C, Pred); |
3785 | | |
3786 | | // !WillWrap. |
3787 | 0 | ConstantRange C1 = ConstantRange::makeExactNoWrapRegion( |
3788 | 0 | II->getBinaryOp(), *COp1, II->getNoWrapKind()); |
3789 | | |
3790 | | // WillWrap. |
3791 | 0 | if (SatValCheck) |
3792 | 0 | C1 = C1.inverse(); |
3793 | |
|
3794 | 0 | ConstantRange C2 = ConstantRange::makeExactICmpRegion(Pred, C); |
3795 | 0 | if (II->getBinaryOp() == Instruction::Add) |
3796 | 0 | C2 = C2.sub(*COp1); |
3797 | 0 | else |
3798 | 0 | C2 = C2.add(*COp1); |
3799 | |
|
3800 | 0 | Instruction::BinaryOps CombiningOp = |
3801 | 0 | SatValCheck ? Instruction::BinaryOps::Or : Instruction::BinaryOps::And; |
3802 | |
|
3803 | 0 | std::optional<ConstantRange> Combination; |
3804 | 0 | if (CombiningOp == Instruction::BinaryOps::Or) |
3805 | 0 | Combination = C1.exactUnionWith(C2); |
3806 | 0 | else /* CombiningOp == Instruction::BinaryOps::And */ |
3807 | 0 | Combination = C1.exactIntersectWith(C2); |
3808 | |
|
3809 | 0 | if (!Combination) |
3810 | 0 | return nullptr; |
3811 | | |
3812 | 0 | CmpInst::Predicate EquivPred; |
3813 | 0 | APInt EquivInt; |
3814 | 0 | APInt EquivOffset; |
3815 | |
|
3816 | 0 | Combination->getEquivalentICmp(EquivPred, EquivInt, EquivOffset); |
3817 | |
|
3818 | 0 | return new ICmpInst( |
3819 | 0 | EquivPred, |
3820 | 0 | Builder.CreateAdd(Op0, ConstantInt::get(Op1->getType(), EquivOffset)), |
3821 | 0 | ConstantInt::get(Op1->getType(), EquivInt)); |
3822 | 0 | } |
3823 | | |
3824 | | /// Fold an icmp with LLVM intrinsic and constant operand: icmp Pred II, C. |
3825 | | Instruction *InstCombinerImpl::foldICmpIntrinsicWithConstant(ICmpInst &Cmp, |
3826 | | IntrinsicInst *II, |
3827 | 1.32k | const APInt &C) { |
3828 | 1.32k | ICmpInst::Predicate Pred = Cmp.getPredicate(); |
3829 | | |
3830 | | // Handle folds that apply for any kind of icmp. |
3831 | 1.32k | switch (II->getIntrinsicID()) { |
3832 | 684 | default: |
3833 | 684 | break; |
3834 | 684 | case Intrinsic::uadd_sat: |
3835 | 18 | case Intrinsic::usub_sat: |
3836 | 18 | if (auto *Folded = foldICmpUSubSatOrUAddSatWithConstant( |
3837 | 18 | Pred, cast<SaturatingInst>(II), C, Builder)) |
3838 | 0 | return Folded; |
3839 | 18 | break; |
3840 | 622 | case Intrinsic::ctpop: { |
3841 | 622 | const SimplifyQuery Q = SQ.getWithInstruction(&Cmp); |
3842 | 622 | if (Instruction *R = foldCtpopPow2Test(Cmp, II, C, Builder, Q)) |
3843 | 0 | return R; |
3844 | 622 | } break; |
3845 | 1.32k | } |
3846 | | |
3847 | 1.32k | if (Cmp.isEquality()) |
3848 | 590 | return foldICmpEqIntrinsicWithConstant(Cmp, II, C); |
3849 | | |
3850 | 734 | Type *Ty = II->getType(); |
3851 | 734 | unsigned BitWidth = C.getBitWidth(); |
3852 | 734 | switch (II->getIntrinsicID()) { |
3853 | 427 | case Intrinsic::ctpop: { |
3854 | | // (ctpop X > BitWidth - 1) --> X == -1 |
3855 | 427 | Value *X = II->getArgOperand(0); |
3856 | 427 | if (C == BitWidth - 1 && Pred == ICmpInst::ICMP_UGT) |
3857 | 5 | return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_EQ, X, |
3858 | 5 | ConstantInt::getAllOnesValue(Ty)); |
3859 | | // (ctpop X < BitWidth) --> X != -1 |
3860 | 422 | if (C == BitWidth && Pred == ICmpInst::ICMP_ULT) |
3861 | 0 | return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_NE, X, |
3862 | 0 | ConstantInt::getAllOnesValue(Ty)); |
3863 | 422 | break; |
3864 | 422 | } |
3865 | 422 | case Intrinsic::ctlz: { |
3866 | | // ctlz(0bXXXXXXXX) > 3 -> 0bXXXXXXXX < 0b00010000 |
3867 | 20 | if (Pred == ICmpInst::ICMP_UGT && C.ult(BitWidth)) { |
3868 | 15 | unsigned Num = C.getLimitedValue(); |
3869 | 15 | APInt Limit = APInt::getOneBitSet(BitWidth, BitWidth - Num - 1); |
3870 | 15 | return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_ULT, |
3871 | 15 | II->getArgOperand(0), ConstantInt::get(Ty, Limit)); |
3872 | 15 | } |
3873 | | |
3874 | | // ctlz(0bXXXXXXXX) < 3 -> 0bXXXXXXXX > 0b00011111 |
3875 | 5 | if (Pred == ICmpInst::ICMP_ULT && C.uge(1) && C.ule(BitWidth)) { |
3876 | 5 | unsigned Num = C.getLimitedValue(); |
3877 | 5 | APInt Limit = APInt::getLowBitsSet(BitWidth, BitWidth - Num); |
3878 | 5 | return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_UGT, |
3879 | 5 | II->getArgOperand(0), ConstantInt::get(Ty, Limit)); |
3880 | 5 | } |
3881 | 0 | break; |
3882 | 5 | } |
3883 | 38 | case Intrinsic::cttz: { |
3884 | | // Limit to one use to ensure we don't increase instruction count. |
3885 | 38 | if (!II->hasOneUse()) |
3886 | 14 | return nullptr; |
3887 | | |
3888 | | // cttz(0bXXXXXXXX) > 3 -> 0bXXXXXXXX & 0b00001111 == 0 |
3889 | 24 | if (Pred == ICmpInst::ICMP_UGT && C.ult(BitWidth)) { |
3890 | 15 | APInt Mask = APInt::getLowBitsSet(BitWidth, C.getLimitedValue() + 1); |
3891 | 15 | return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_EQ, |
3892 | 15 | Builder.CreateAnd(II->getArgOperand(0), Mask), |
3893 | 15 | ConstantInt::getNullValue(Ty)); |
3894 | 15 | } |
3895 | | |
3896 | | // cttz(0bXXXXXXXX) < 3 -> 0bXXXXXXXX & 0b00000111 != 0 |
3897 | 9 | if (Pred == ICmpInst::ICMP_ULT && C.uge(1) && C.ule(BitWidth)) { |
3898 | 9 | APInt Mask = APInt::getLowBitsSet(BitWidth, C.getLimitedValue()); |
3899 | 9 | return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_NE, |
3900 | 9 | Builder.CreateAnd(II->getArgOperand(0), Mask), |
3901 | 9 | ConstantInt::getNullValue(Ty)); |
3902 | 9 | } |
3903 | 0 | break; |
3904 | 9 | } |
3905 | 0 | case Intrinsic::ssub_sat: |
3906 | | // ssub.sat(a, b) spred 0 -> a spred b |
3907 | 0 | if (ICmpInst::isSigned(Pred)) { |
3908 | 0 | if (C.isZero()) |
3909 | 0 | return new ICmpInst(Pred, II->getArgOperand(0), II->getArgOperand(1)); |
3910 | | // X s<= 0 is cannonicalized to X s< 1 |
3911 | 0 | if (Pred == ICmpInst::ICMP_SLT && C.isOne()) |
3912 | 0 | return new ICmpInst(ICmpInst::ICMP_SLE, II->getArgOperand(0), |
3913 | 0 | II->getArgOperand(1)); |
3914 | | // X s>= 0 is cannonicalized to X s> -1 |
3915 | 0 | if (Pred == ICmpInst::ICMP_SGT && C.isAllOnes()) |
3916 | 0 | return new ICmpInst(ICmpInst::ICMP_SGE, II->getArgOperand(0), |
3917 | 0 | II->getArgOperand(1)); |
3918 | 0 | } |
3919 | 0 | break; |
3920 | 249 | default: |
3921 | 249 | break; |
3922 | 734 | } |
3923 | | |
3924 | 671 | return nullptr; |
3925 | 734 | } |
3926 | | |
3927 | | /// Handle icmp with constant (but not simple integer constant) RHS. |
3928 | 78.4k | Instruction *InstCombinerImpl::foldICmpInstWithConstantNotInt(ICmpInst &I) { |
3929 | 78.4k | Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); |
3930 | 78.4k | Constant *RHSC = dyn_cast<Constant>(Op1); |
3931 | 78.4k | Instruction *LHSI = dyn_cast<Instruction>(Op0); |
3932 | 78.4k | if (!RHSC || !LHSI) |
3933 | 47.4k | return nullptr; |
3934 | | |
3935 | 30.9k | switch (LHSI->getOpcode()) { |
3936 | 336 | case Instruction::PHI: |
3937 | 336 | if (Instruction *NV = foldOpIntoPhi(I, cast<PHINode>(LHSI))) |
3938 | 4 | return NV; |
3939 | 332 | break; |
3940 | 332 | case Instruction::IntToPtr: |
3941 | | // icmp pred inttoptr(X), null -> icmp pred X, 0 |
3942 | 19 | if (RHSC->isNullValue() && |
3943 | 19 | DL.getIntPtrType(RHSC->getType()) == LHSI->getOperand(0)->getType()) |
3944 | 19 | return new ICmpInst( |
3945 | 19 | I.getPredicate(), LHSI->getOperand(0), |
3946 | 19 | Constant::getNullValue(LHSI->getOperand(0)->getType())); |
3947 | 0 | break; |
3948 | | |
3949 | 8.37k | case Instruction::Load: |
3950 | | // Try to optimize things like "A[i] > 4" to index computations. |
3951 | 8.37k | if (GetElementPtrInst *GEP = |
3952 | 8.37k | dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) |
3953 | 554 | if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0))) |
3954 | 161 | if (Instruction *Res = |
3955 | 161 | foldCmpLoadFromIndexedGlobal(cast<LoadInst>(LHSI), GEP, GV, I)) |
3956 | 54 | return Res; |
3957 | 8.32k | break; |
3958 | 30.9k | } |
3959 | | |
3960 | 30.8k | return nullptr; |
3961 | 30.9k | } |
3962 | | |
3963 | | Instruction *InstCombinerImpl::foldSelectICmp(ICmpInst::Predicate Pred, |
3964 | | SelectInst *SI, Value *RHS, |
3965 | 2.16k | const ICmpInst &I) { |
3966 | | // Try to fold the comparison into the select arms, which will cause the |
3967 | | // select to be converted into a logical and/or. |
3968 | 4.32k | auto SimplifyOp = [&](Value *Op, bool SelectCondIsTrue) -> Value * { |
3969 | 4.32k | if (Value *Res = simplifyICmpInst(Pred, Op, RHS, SQ)) |
3970 | 1.48k | return Res; |
3971 | 2.83k | if (std::optional<bool> Impl = isImpliedCondition( |
3972 | 2.83k | SI->getCondition(), Pred, Op, RHS, DL, SelectCondIsTrue)) |
3973 | 42 | return ConstantInt::get(I.getType(), *Impl); |
3974 | 2.79k | return nullptr; |
3975 | 2.83k | }; |
3976 | | |
3977 | 2.16k | ConstantInt *CI = nullptr; |
3978 | 2.16k | Value *Op1 = SimplifyOp(SI->getOperand(1), true); |
3979 | 2.16k | if (Op1) |
3980 | 551 | CI = dyn_cast<ConstantInt>(Op1); |
3981 | | |
3982 | 2.16k | Value *Op2 = SimplifyOp(SI->getOperand(2), false); |
3983 | 2.16k | if (Op2) |
3984 | 979 | CI = dyn_cast<ConstantInt>(Op2); |
3985 | | |
3986 | | // We only want to perform this transformation if it will not lead to |
3987 | | // additional code. This is true if either both sides of the select |
3988 | | // fold to a constant (in which case the icmp is replaced with a select |
3989 | | // which will usually simplify) or this is the only user of the |
3990 | | // select (in which case we are trading a select+icmp for a simpler |
3991 | | // select+icmp) or all uses of the select can be replaced based on |
3992 | | // dominance information ("Global cases"). |
3993 | 2.16k | bool Transform = false; |
3994 | 2.16k | if (Op1 && Op2) |
3995 | 65 | Transform = true; |
3996 | 2.09k | else if (Op1 || Op2) { |
3997 | | // Local case |
3998 | 1.40k | if (SI->hasOneUse()) |
3999 | 236 | Transform = true; |
4000 | | // Global cases |
4001 | 1.16k | else if (CI && !CI->isZero()) |
4002 | | // When Op1 is constant try replacing select with second operand. |
4003 | | // Otherwise Op2 is constant and try replacing select with first |
4004 | | // operand. |
4005 | 634 | Transform = replacedSelectWithOperand(SI, &I, Op1 ? 2 : 1); |
4006 | 1.40k | } |
4007 | 2.16k | if (Transform) { |
4008 | 341 | if (!Op1) |
4009 | 188 | Op1 = Builder.CreateICmp(Pred, SI->getOperand(1), RHS, I.getName()); |
4010 | 341 | if (!Op2) |
4011 | 88 | Op2 = Builder.CreateICmp(Pred, SI->getOperand(2), RHS, I.getName()); |
4012 | 341 | return SelectInst::Create(SI->getOperand(0), Op1, Op2); |
4013 | 341 | } |
4014 | | |
4015 | 1.82k | return nullptr; |
4016 | 2.16k | } |
4017 | | |
4018 | | /// Some comparisons can be simplified. |
4019 | | /// In this case, we are looking for comparisons that look like |
4020 | | /// a check for a lossy truncation. |
4021 | | /// Folds: |
4022 | | /// icmp SrcPred (x & Mask), x to icmp DstPred x, Mask |
4023 | | /// Where Mask is some pattern that produces all-ones in low bits: |
4024 | | /// (-1 >> y) |
4025 | | /// ((-1 << y) >> y) <- non-canonical, has extra uses |
4026 | | /// ~(-1 << y) |
4027 | | /// ((1 << y) + (-1)) <- non-canonical, has extra uses |
4028 | | /// The Mask can be a constant, too. |
4029 | | /// For some predicates, the operands are commutative. |
4030 | | /// For others, x can only be on a specific side. |
4031 | | static Value *foldICmpWithLowBitMaskedVal(ICmpInst &I, |
4032 | 29.9k | InstCombiner::BuilderTy &Builder) { |
4033 | 29.9k | ICmpInst::Predicate SrcPred; |
4034 | 29.9k | Value *X, *M, *Y; |
4035 | 29.9k | auto m_VariableMask = m_CombineOr( |
4036 | 29.9k | m_CombineOr(m_Not(m_Shl(m_AllOnes(), m_Value())), |
4037 | 29.9k | m_Add(m_Shl(m_One(), m_Value()), m_AllOnes())), |
4038 | 29.9k | m_CombineOr(m_LShr(m_AllOnes(), m_Value()), |
4039 | 29.9k | m_LShr(m_Shl(m_AllOnes(), m_Value(Y)), m_Deferred(Y)))); |
4040 | 29.9k | auto m_Mask = m_CombineOr(m_VariableMask, m_LowBitMask()); |
4041 | 29.9k | if (!match(&I, m_c_ICmp(SrcPred, |
4042 | 29.9k | m_c_And(m_CombineAnd(m_Mask, m_Value(M)), m_Value(X)), |
4043 | 29.9k | m_Deferred(X)))) |
4044 | 29.7k | return nullptr; |
4045 | | |
4046 | 187 | ICmpInst::Predicate DstPred; |
4047 | 187 | switch (SrcPred) { |
4048 | 33 | case ICmpInst::Predicate::ICMP_EQ: |
4049 | | // x & (-1 >> y) == x -> x u<= (-1 >> y) |
4050 | 33 | DstPred = ICmpInst::Predicate::ICMP_ULE; |
4051 | 33 | break; |
4052 | 52 | case ICmpInst::Predicate::ICMP_NE: |
4053 | | // x & (-1 >> y) != x -> x u> (-1 >> y) |
4054 | 52 | DstPred = ICmpInst::Predicate::ICMP_UGT; |
4055 | 52 | break; |
4056 | 8 | case ICmpInst::Predicate::ICMP_ULT: |
4057 | | // x & (-1 >> y) u< x -> x u> (-1 >> y) |
4058 | | // x u> x & (-1 >> y) -> x u> (-1 >> y) |
4059 | 8 | DstPred = ICmpInst::Predicate::ICMP_UGT; |
4060 | 8 | break; |
4061 | 19 | case ICmpInst::Predicate::ICMP_UGE: |
4062 | | // x & (-1 >> y) u>= x -> x u<= (-1 >> y) |
4063 | | // x u<= x & (-1 >> y) -> x u<= (-1 >> y) |
4064 | 19 | DstPred = ICmpInst::Predicate::ICMP_ULE; |
4065 | 19 | break; |
4066 | 17 | case ICmpInst::Predicate::ICMP_SLT: |
4067 | | // x & (-1 >> y) s< x -> x s> (-1 >> y) |
4068 | | // x s> x & (-1 >> y) -> x s> (-1 >> y) |
4069 | 17 | if (!match(M, m_Constant())) // Can not do this fold with non-constant. |
4070 | 0 | return nullptr; |
4071 | 17 | if (!match(M, m_NonNegative())) // Must not have any -1 vector elements. |
4072 | 0 | return nullptr; |
4073 | 17 | DstPred = ICmpInst::Predicate::ICMP_SGT; |
4074 | 17 | break; |
4075 | 10 | case ICmpInst::Predicate::ICMP_SGE: |
4076 | | // x & (-1 >> y) s>= x -> x s<= (-1 >> y) |
4077 | | // x s<= x & (-1 >> y) -> x s<= (-1 >> y) |
4078 | 10 | if (!match(M, m_Constant())) // Can not do this fold with non-constant. |
4079 | 0 | return nullptr; |
4080 | 10 | if (!match(M, m_NonNegative())) // Must not have any -1 vector elements. |
4081 | 1 | return nullptr; |
4082 | 9 | DstPred = ICmpInst::Predicate::ICMP_SLE; |
4083 | 9 | break; |
4084 | 19 | case ICmpInst::Predicate::ICMP_SGT: |
4085 | 48 | case ICmpInst::Predicate::ICMP_SLE: |
4086 | 48 | return nullptr; |
4087 | 0 | case ICmpInst::Predicate::ICMP_UGT: |
4088 | 0 | case ICmpInst::Predicate::ICMP_ULE: |
4089 | 0 | llvm_unreachable("Instsimplify took care of commut. variant"); |
4090 | 0 | break; |
4091 | 0 | default: |
4092 | 0 | llvm_unreachable("All possible folds are handled."); |
4093 | 187 | } |
4094 | | |
4095 | | // The mask value may be a vector constant that has undefined elements. But it |
4096 | | // may not be safe to propagate those undefs into the new compare, so replace |
4097 | | // those elements by copying an existing, defined, and safe scalar constant. |
4098 | 138 | Type *OpTy = M->getType(); |
4099 | 138 | auto *VecC = dyn_cast<Constant>(M); |
4100 | 138 | auto *OpVTy = dyn_cast<FixedVectorType>(OpTy); |
4101 | 138 | if (OpVTy && VecC && VecC->containsUndefOrPoisonElement()) { |
4102 | 11 | Constant *SafeReplacementConstant = nullptr; |
4103 | 11 | for (unsigned i = 0, e = OpVTy->getNumElements(); i != e; ++i) { |
4104 | 11 | if (!isa<UndefValue>(VecC->getAggregateElement(i))) { |
4105 | 11 | SafeReplacementConstant = VecC->getAggregateElement(i); |
4106 | 11 | break; |
4107 | 11 | } |
4108 | 11 | } |
4109 | 11 | assert(SafeReplacementConstant && "Failed to find undef replacement"); |
4110 | 0 | M = Constant::replaceUndefsWith(VecC, SafeReplacementConstant); |
4111 | 11 | } |
4112 | | |
4113 | 0 | return Builder.CreateICmp(DstPred, X, M); |
4114 | 187 | } |
4115 | | |
4116 | | /// Some comparisons can be simplified. |
4117 | | /// In this case, we are looking for comparisons that look like |
4118 | | /// a check for a lossy signed truncation. |
4119 | | /// Folds: (MaskedBits is a constant.) |
4120 | | /// ((%x << MaskedBits) a>> MaskedBits) SrcPred %x |
4121 | | /// Into: |
4122 | | /// (add %x, (1 << (KeptBits-1))) DstPred (1 << KeptBits) |
4123 | | /// Where KeptBits = bitwidth(%x) - MaskedBits |
4124 | | static Value * |
4125 | | foldICmpWithTruncSignExtendedVal(ICmpInst &I, |
4126 | 29.7k | InstCombiner::BuilderTy &Builder) { |
4127 | 29.7k | ICmpInst::Predicate SrcPred; |
4128 | 29.7k | Value *X; |
4129 | 29.7k | const APInt *C0, *C1; // FIXME: non-splats, potentially with undef. |
4130 | | // We are ok with 'shl' having multiple uses, but 'ashr' must be one-use. |
4131 | 29.7k | if (!match(&I, m_c_ICmp(SrcPred, |
4132 | 29.7k | m_OneUse(m_AShr(m_Shl(m_Value(X), m_APInt(C0)), |
4133 | 29.7k | m_APInt(C1))), |
4134 | 29.7k | m_Deferred(X)))) |
4135 | 29.7k | return nullptr; |
4136 | | |
4137 | | // Potential handling of non-splats: for each element: |
4138 | | // * if both are undef, replace with constant 0. |
4139 | | // Because (1<<0) is OK and is 1, and ((1<<0)>>1) is also OK and is 0. |
4140 | | // * if both are not undef, and are different, bailout. |
4141 | | // * else, only one is undef, then pick the non-undef one. |
4142 | | |
4143 | | // The shift amount must be equal. |
4144 | 48 | if (*C0 != *C1) |
4145 | 5 | return nullptr; |
4146 | 43 | const APInt &MaskedBits = *C0; |
4147 | 43 | assert(MaskedBits != 0 && "shift by zero should be folded away already."); |
4148 | | |
4149 | 0 | ICmpInst::Predicate DstPred; |
4150 | 43 | switch (SrcPred) { |
4151 | 39 | case ICmpInst::Predicate::ICMP_EQ: |
4152 | | // ((%x << MaskedBits) a>> MaskedBits) == %x |
4153 | | // => |
4154 | | // (add %x, (1 << (KeptBits-1))) u< (1 << KeptBits) |
4155 | 39 | DstPred = ICmpInst::Predicate::ICMP_ULT; |
4156 | 39 | break; |
4157 | 0 | case ICmpInst::Predicate::ICMP_NE: |
4158 | | // ((%x << MaskedBits) a>> MaskedBits) != %x |
4159 | | // => |
4160 | | // (add %x, (1 << (KeptBits-1))) u>= (1 << KeptBits) |
4161 | 0 | DstPred = ICmpInst::Predicate::ICMP_UGE; |
4162 | 0 | break; |
4163 | | // FIXME: are more folds possible? |
4164 | 4 | default: |
4165 | 4 | return nullptr; |
4166 | 43 | } |
4167 | | |
4168 | 39 | auto *XType = X->getType(); |
4169 | 39 | const unsigned XBitWidth = XType->getScalarSizeInBits(); |
4170 | 39 | const APInt BitWidth = APInt(XBitWidth, XBitWidth); |
4171 | 39 | assert(BitWidth.ugt(MaskedBits) && "shifts should leave some bits untouched"); |
4172 | | |
4173 | | // KeptBits = bitwidth(%x) - MaskedBits |
4174 | 0 | const APInt KeptBits = BitWidth - MaskedBits; |
4175 | 39 | assert(KeptBits.ugt(0) && KeptBits.ult(BitWidth) && "unreachable"); |
4176 | | // ICmpCst = (1 << KeptBits) |
4177 | 0 | const APInt ICmpCst = APInt(XBitWidth, 1).shl(KeptBits); |
4178 | 39 | assert(ICmpCst.isPowerOf2()); |
4179 | | // AddCst = (1 << (KeptBits-1)) |
4180 | 0 | const APInt AddCst = ICmpCst.lshr(1); |
4181 | 39 | assert(AddCst.ult(ICmpCst) && AddCst.isPowerOf2()); |
4182 | | |
4183 | | // T0 = add %x, AddCst |
4184 | 0 | Value *T0 = Builder.CreateAdd(X, ConstantInt::get(XType, AddCst)); |
4185 | | // T1 = T0 DstPred ICmpCst |
4186 | 39 | Value *T1 = Builder.CreateICmp(DstPred, T0, ConstantInt::get(XType, ICmpCst)); |
4187 | | |
4188 | 39 | return T1; |
4189 | 43 | } |
4190 | | |
4191 | | // Given pattern: |
4192 | | // icmp eq/ne (and ((x shift Q), (y oppositeshift K))), 0 |
4193 | | // we should move shifts to the same hand of 'and', i.e. rewrite as |
4194 | | // icmp eq/ne (and (x shift (Q+K)), y), 0 iff (Q+K) u< bitwidth(x) |
4195 | | // We are only interested in opposite logical shifts here. |
4196 | | // One of the shifts can be truncated. |
4197 | | // If we can, we want to end up creating 'lshr' shift. |
4198 | | static Value * |
4199 | | foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ, |
4200 | 29.7k | InstCombiner::BuilderTy &Builder) { |
4201 | 29.7k | if (!I.isEquality() || !match(I.getOperand(1), m_Zero()) || |
4202 | 29.7k | !I.getOperand(0)->hasOneUse()) |
4203 | 23.8k | return nullptr; |
4204 | | |
4205 | 5.82k | auto m_AnyLogicalShift = m_LogicalShift(m_Value(), m_Value()); |
4206 | | |
4207 | | // Look for an 'and' of two logical shifts, one of which may be truncated. |
4208 | | // We use m_TruncOrSelf() on the RHS to correctly handle commutative case. |
4209 | 5.82k | Instruction *XShift, *MaybeTruncation, *YShift; |
4210 | 5.82k | if (!match( |
4211 | 5.82k | I.getOperand(0), |
4212 | 5.82k | m_c_And(m_CombineAnd(m_AnyLogicalShift, m_Instruction(XShift)), |
4213 | 5.82k | m_CombineAnd(m_TruncOrSelf(m_CombineAnd( |
4214 | 5.82k | m_AnyLogicalShift, m_Instruction(YShift))), |
4215 | 5.82k | m_Instruction(MaybeTruncation))))) |
4216 | 5.60k | return nullptr; |
4217 | | |
4218 | | // We potentially looked past 'trunc', but only when matching YShift, |
4219 | | // therefore YShift must have the widest type. |
4220 | 214 | Instruction *WidestShift = YShift; |
4221 | | // Therefore XShift must have the shallowest type. |
4222 | | // Or they both have identical types if there was no truncation. |
4223 | 214 | Instruction *NarrowestShift = XShift; |
4224 | | |
4225 | 214 | Type *WidestTy = WidestShift->getType(); |
4226 | 214 | Type *NarrowestTy = NarrowestShift->getType(); |
4227 | 214 | assert(NarrowestTy == I.getOperand(0)->getType() && |
4228 | 214 | "We did not look past any shifts while matching XShift though."); |
4229 | 0 | bool HadTrunc = WidestTy != I.getOperand(0)->getType(); |
4230 | | |
4231 | | // If YShift is a 'lshr', swap the shifts around. |
4232 | 214 | if (match(YShift, m_LShr(m_Value(), m_Value()))) |
4233 | 151 | std::swap(XShift, YShift); |
4234 | | |
4235 | | // The shifts must be in opposite directions. |
4236 | 214 | auto XShiftOpcode = XShift->getOpcode(); |
4237 | 214 | if (XShiftOpcode == YShift->getOpcode()) |
4238 | 0 | return nullptr; // Do not care about same-direction shifts here. |
4239 | | |
4240 | 214 | Value *X, *XShAmt, *Y, *YShAmt; |
4241 | 214 | match(XShift, m_BinOp(m_Value(X), m_ZExtOrSelf(m_Value(XShAmt)))); |
4242 | 214 | match(YShift, m_BinOp(m_Value(Y), m_ZExtOrSelf(m_Value(YShAmt)))); |
4243 | | |
4244 | | // If one of the values being shifted is a constant, then we will end with |
4245 | | // and+icmp, and [zext+]shift instrs will be constant-folded. If they are not, |
4246 | | // however, we will need to ensure that we won't increase instruction count. |
4247 | 214 | if (!isa<Constant>(X) && !isa<Constant>(Y)) { |
4248 | | // At least one of the hands of the 'and' should be one-use shift. |
4249 | 165 | if (!match(I.getOperand(0), |
4250 | 165 | m_c_And(m_OneUse(m_AnyLogicalShift), m_Value()))) |
4251 | 20 | return nullptr; |
4252 | 145 | if (HadTrunc) { |
4253 | | // Due to the 'trunc', we will need to widen X. For that either the old |
4254 | | // 'trunc' or the shift amt in the non-truncated shift should be one-use. |
4255 | 28 | if (!MaybeTruncation->hasOneUse() && |
4256 | 28 | !NarrowestShift->getOperand(1)->hasOneUse()) |
4257 | 5 | return nullptr; |
4258 | 28 | } |
4259 | 145 | } |
4260 | | |
4261 | | // We have two shift amounts from two different shifts. The types of those |
4262 | | // shift amounts may not match. If that's the case let's bailout now. |
4263 | 189 | if (XShAmt->getType() != YShAmt->getType()) |
4264 | 4 | return nullptr; |
4265 | | |
4266 | | // As input, we have the following pattern: |
4267 | | // icmp eq/ne (and ((x shift Q), (y oppositeshift K))), 0 |
4268 | | // We want to rewrite that as: |
4269 | | // icmp eq/ne (and (x shift (Q+K)), y), 0 iff (Q+K) u< bitwidth(x) |
4270 | | // While we know that originally (Q+K) would not overflow |
4271 | | // (because 2 * (N-1) u<= iN -1), we have looked past extensions of |
4272 | | // shift amounts. so it may now overflow in smaller bitwidth. |
4273 | | // To ensure that does not happen, we need to ensure that the total maximal |
4274 | | // shift amount is still representable in that smaller bit width. |
4275 | 185 | unsigned MaximalPossibleTotalShiftAmount = |
4276 | 185 | (WidestTy->getScalarSizeInBits() - 1) + |
4277 | 185 | (NarrowestTy->getScalarSizeInBits() - 1); |
4278 | 185 | APInt MaximalRepresentableShiftAmount = |
4279 | 185 | APInt::getAllOnes(XShAmt->getType()->getScalarSizeInBits()); |
4280 | 185 | if (MaximalRepresentableShiftAmount.ult(MaximalPossibleTotalShiftAmount)) |
4281 | 1 | return nullptr; |
4282 | | |
4283 | | // Can we fold (XShAmt+YShAmt) ? |
4284 | 184 | auto *NewShAmt = dyn_cast_or_null<Constant>( |
4285 | 184 | simplifyAddInst(XShAmt, YShAmt, /*isNSW=*/false, |
4286 | 184 | /*isNUW=*/false, SQ.getWithInstruction(&I))); |
4287 | 184 | if (!NewShAmt) |
4288 | 24 | return nullptr; |
4289 | 160 | if (NewShAmt->getType() != WidestTy) { |
4290 | 54 | NewShAmt = |
4291 | 54 | ConstantFoldCastOperand(Instruction::ZExt, NewShAmt, WidestTy, SQ.DL); |
4292 | 54 | if (!NewShAmt) |
4293 | 0 | return nullptr; |
4294 | 54 | } |
4295 | 160 | unsigned WidestBitWidth = WidestTy->getScalarSizeInBits(); |
4296 | | |
4297 | | // Is the new shift amount smaller than the bit width? |
4298 | | // FIXME: could also rely on ConstantRange. |
4299 | 160 | if (!match(NewShAmt, |
4300 | 160 | m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_ULT, |
4301 | 160 | APInt(WidestBitWidth, WidestBitWidth)))) |
4302 | 10 | return nullptr; |
4303 | | |
4304 | | // An extra legality check is needed if we had trunc-of-lshr. |
4305 | 150 | if (HadTrunc && match(WidestShift, m_LShr(m_Value(), m_Value()))) { |
4306 | 43 | auto CanFold = [NewShAmt, WidestBitWidth, NarrowestShift, SQ, |
4307 | 43 | WidestShift]() { |
4308 | | // It isn't obvious whether it's worth it to analyze non-constants here. |
4309 | | // Also, let's basically give up on non-splat cases, pessimizing vectors. |
4310 | | // If *any* of these preconditions matches we can perform the fold. |
4311 | 43 | Constant *NewShAmtSplat = NewShAmt->getType()->isVectorTy() |
4312 | 43 | ? NewShAmt->getSplatValue() |
4313 | 43 | : NewShAmt; |
4314 | | // If it's edge-case shift (by 0 or by WidestBitWidth-1) we can fold. |
4315 | 43 | if (NewShAmtSplat && |
4316 | 43 | (NewShAmtSplat->isNullValue() || |
4317 | 43 | NewShAmtSplat->getUniqueInteger() == WidestBitWidth - 1)) |
4318 | 5 | return true; |
4319 | | // We consider *min* leading zeros so a single outlier |
4320 | | // blocks the transform as opposed to allowing it. |
4321 | 38 | if (auto *C = dyn_cast<Constant>(NarrowestShift->getOperand(0))) { |
4322 | 11 | KnownBits Known = computeKnownBits(C, SQ.DL); |
4323 | 11 | unsigned MinLeadZero = Known.countMinLeadingZeros(); |
4324 | | // If the value being shifted has at most lowest bit set we can fold. |
4325 | 11 | unsigned MaxActiveBits = Known.getBitWidth() - MinLeadZero; |
4326 | 11 | if (MaxActiveBits <= 1) |
4327 | 6 | return true; |
4328 | | // Precondition: NewShAmt u<= countLeadingZeros(C) |
4329 | 5 | if (NewShAmtSplat && NewShAmtSplat->getUniqueInteger().ule(MinLeadZero)) |
4330 | 5 | return true; |
4331 | 5 | } |
4332 | 27 | if (auto *C = dyn_cast<Constant>(WidestShift->getOperand(0))) { |
4333 | 19 | KnownBits Known = computeKnownBits(C, SQ.DL); |
4334 | 19 | unsigned MinLeadZero = Known.countMinLeadingZeros(); |
4335 | | // If the value being shifted has at most lowest bit set we can fold. |
4336 | 19 | unsigned MaxActiveBits = Known.getBitWidth() - MinLeadZero; |
4337 | 19 | if (MaxActiveBits <= 1) |
4338 | 1 | return true; |
4339 | | // Precondition: ((WidestBitWidth-1)-NewShAmt) u<= countLeadingZeros(C) |
4340 | 18 | if (NewShAmtSplat) { |
4341 | 18 | APInt AdjNewShAmt = |
4342 | 18 | (WidestBitWidth - 1) - NewShAmtSplat->getUniqueInteger(); |
4343 | 18 | if (AdjNewShAmt.ule(MinLeadZero)) |
4344 | 8 | return true; |
4345 | 18 | } |
4346 | 18 | } |
4347 | 18 | return false; // Can't tell if it's ok. |
4348 | 27 | }; |
4349 | 43 | if (!CanFold()) |
4350 | 18 | return nullptr; |
4351 | 43 | } |
4352 | | |
4353 | | // All good, we can do this fold. |
4354 | 132 | X = Builder.CreateZExt(X, WidestTy); |
4355 | 132 | Y = Builder.CreateZExt(Y, WidestTy); |
4356 | | // The shift is the same that was for X. |
4357 | 132 | Value *T0 = XShiftOpcode == Instruction::BinaryOps::LShr |
4358 | 132 | ? Builder.CreateLShr(X, NewShAmt) |
4359 | 132 | : Builder.CreateShl(X, NewShAmt); |
4360 | 132 | Value *T1 = Builder.CreateAnd(T0, Y); |
4361 | 132 | return Builder.CreateICmp(I.getPredicate(), T1, |
4362 | 132 | Constant::getNullValue(WidestTy)); |
4363 | 150 | } |
4364 | | |
4365 | | /// Fold |
4366 | | /// (-1 u/ x) u< y |
4367 | | /// ((x * y) ?/ x) != y |
4368 | | /// to |
4369 | | /// @llvm.?mul.with.overflow(x, y) plus extraction of overflow bit |
4370 | | /// Note that the comparison is commutative, while inverted (u>=, ==) predicate |
4371 | | /// will mean that we are looking for the opposite answer. |
4372 | 29.9k | Value *InstCombinerImpl::foldMultiplicationOverflowCheck(ICmpInst &I) { |
4373 | 29.9k | ICmpInst::Predicate Pred; |
4374 | 29.9k | Value *X, *Y; |
4375 | 29.9k | Instruction *Mul; |
4376 | 29.9k | Instruction *Div; |
4377 | 29.9k | bool NeedNegation; |
4378 | | // Look for: (-1 u/ x) u</u>= y |
4379 | 29.9k | if (!I.isEquality() && |
4380 | 29.9k | match(&I, m_c_ICmp(Pred, |
4381 | 14.8k | m_CombineAnd(m_OneUse(m_UDiv(m_AllOnes(), m_Value(X))), |
4382 | 14.8k | m_Instruction(Div)), |
4383 | 14.8k | m_Value(Y)))) { |
4384 | 56 | Mul = nullptr; |
4385 | | |
4386 | | // Are we checking that overflow does not happen, or does happen? |
4387 | 56 | switch (Pred) { |
4388 | 6 | case ICmpInst::Predicate::ICMP_ULT: |
4389 | 6 | NeedNegation = false; |
4390 | 6 | break; // OK |
4391 | 11 | case ICmpInst::Predicate::ICMP_UGE: |
4392 | 11 | NeedNegation = true; |
4393 | 11 | break; // OK |
4394 | 39 | default: |
4395 | 39 | return nullptr; // Wrong predicate. |
4396 | 56 | } |
4397 | 56 | } else // Look for: ((x * y) / x) !=/== y |
4398 | 29.9k | if (I.isEquality() && |
4399 | 29.9k | match(&I, |
4400 | 15.0k | m_c_ICmp(Pred, m_Value(Y), |
4401 | 15.0k | m_CombineAnd( |
4402 | 15.0k | m_OneUse(m_IDiv(m_CombineAnd(m_c_Mul(m_Deferred(Y), |
4403 | 15.0k | m_Value(X)), |
4404 | 15.0k | m_Instruction(Mul)), |
4405 | 15.0k | m_Deferred(X))), |
4406 | 15.0k | m_Instruction(Div))))) { |
4407 | 30 | NeedNegation = Pred == ICmpInst::Predicate::ICMP_EQ; |
4408 | 30 | } else |
4409 | 29.8k | return nullptr; |
4410 | | |
4411 | 47 | BuilderTy::InsertPointGuard Guard(Builder); |
4412 | | // If the pattern included (x * y), we'll want to insert new instructions |
4413 | | // right before that original multiplication so that we can replace it. |
4414 | 47 | bool MulHadOtherUses = Mul && !Mul->hasOneUse(); |
4415 | 47 | if (MulHadOtherUses) |
4416 | 15 | Builder.SetInsertPoint(Mul); |
4417 | | |
4418 | 47 | Function *F = Intrinsic::getDeclaration(I.getModule(), |
4419 | 47 | Div->getOpcode() == Instruction::UDiv |
4420 | 47 | ? Intrinsic::umul_with_overflow |
4421 | 47 | : Intrinsic::smul_with_overflow, |
4422 | 47 | X->getType()); |
4423 | 47 | CallInst *Call = Builder.CreateCall(F, {X, Y}, "mul"); |
4424 | | |
4425 | | // If the multiplication was used elsewhere, to ensure that we don't leave |
4426 | | // "duplicate" instructions, replace uses of that original multiplication |
4427 | | // with the multiplication result from the with.overflow intrinsic. |
4428 | 47 | if (MulHadOtherUses) |
4429 | 15 | replaceInstUsesWith(*Mul, Builder.CreateExtractValue(Call, 0, "mul.val")); |
4430 | | |
4431 | 47 | Value *Res = Builder.CreateExtractValue(Call, 1, "mul.ov"); |
4432 | 47 | if (NeedNegation) // This technically increases instruction count. |
4433 | 12 | Res = Builder.CreateNot(Res, "mul.not.ov"); |
4434 | | |
4435 | | // If we replaced the mul, erase it. Do this after all uses of Builder, |
4436 | | // as the mul is used as insertion point. |
4437 | 47 | if (MulHadOtherUses) |
4438 | 15 | eraseInstFromFunction(*Mul); |
4439 | | |
4440 | 47 | return Res; |
4441 | 29.9k | } |
4442 | | |
4443 | | static Instruction *foldICmpXNegX(ICmpInst &I, |
4444 | 30.7k | InstCombiner::BuilderTy &Builder) { |
4445 | 30.7k | CmpInst::Predicate Pred; |
4446 | 30.7k | Value *X; |
4447 | 30.7k | if (match(&I, m_c_ICmp(Pred, m_NSWNeg(m_Value(X)), m_Deferred(X)))) { |
4448 | | |
4449 | 38 | if (ICmpInst::isSigned(Pred)) |
4450 | 14 | Pred = ICmpInst::getSwappedPredicate(Pred); |
4451 | 24 | else if (ICmpInst::isUnsigned(Pred)) |
4452 | 18 | Pred = ICmpInst::getSignedPredicate(Pred); |
4453 | | // else for equality-comparisons just keep the predicate. |
4454 | | |
4455 | 38 | return ICmpInst::Create(Instruction::ICmp, Pred, X, |
4456 | 38 | Constant::getNullValue(X->getType()), I.getName()); |
4457 | 38 | } |
4458 | | |
4459 | | // A value is not equal to its negation unless that value is 0 or |
4460 | | // MinSignedValue, ie: a != -a --> (a & MaxSignedVal) != 0 |
4461 | 30.6k | if (match(&I, m_c_ICmp(Pred, m_OneUse(m_Neg(m_Value(X))), m_Deferred(X))) && |
4462 | 30.6k | ICmpInst::isEquality(Pred)) { |
4463 | 17 | Type *Ty = X->getType(); |
4464 | 17 | uint32_t BitWidth = Ty->getScalarSizeInBits(); |
4465 | 17 | Constant *MaxSignedVal = |
4466 | 17 | ConstantInt::get(Ty, APInt::getSignedMaxValue(BitWidth)); |
4467 | 17 | Value *And = Builder.CreateAnd(X, MaxSignedVal); |
4468 | 17 | Constant *Zero = Constant::getNullValue(Ty); |
4469 | 17 | return CmpInst::Create(Instruction::ICmp, Pred, And, Zero); |
4470 | 17 | } |
4471 | | |
4472 | 30.6k | return nullptr; |
4473 | 30.6k | } |
4474 | | |
4475 | | static Instruction *foldICmpAndXX(ICmpInst &I, const SimplifyQuery &Q, |
4476 | 29.7k | InstCombinerImpl &IC) { |
4477 | 29.7k | Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1), *A; |
4478 | | // Normalize and operand as operand 0. |
4479 | 29.7k | CmpInst::Predicate Pred = I.getPredicate(); |
4480 | 29.7k | if (match(Op1, m_c_And(m_Specific(Op0), m_Value()))) { |
4481 | 81 | std::swap(Op0, Op1); |
4482 | 81 | Pred = ICmpInst::getSwappedPredicate(Pred); |
4483 | 81 | } |
4484 | | |
4485 | 29.7k | if (!match(Op0, m_c_And(m_Specific(Op1), m_Value(A)))) |
4486 | 28.5k | return nullptr; |
4487 | | |
4488 | | // (icmp (X & Y) u< X --> (X & Y) != X |
4489 | 1.19k | if (Pred == ICmpInst::ICMP_ULT) |
4490 | 14 | return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); |
4491 | | |
4492 | | // (icmp (X & Y) u>= X --> (X & Y) == X |
4493 | 1.18k | if (Pred == ICmpInst::ICMP_UGE) |
4494 | 12 | return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1); |
4495 | | |
4496 | 1.16k | return nullptr; |
4497 | 1.18k | } |
4498 | | |
4499 | | static Instruction *foldICmpOrXX(ICmpInst &I, const SimplifyQuery &Q, |
4500 | 30.1k | InstCombinerImpl &IC) { |
4501 | 30.1k | Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1), *A; |
4502 | | |
4503 | | // Normalize or operand as operand 0. |
4504 | 30.1k | CmpInst::Predicate Pred = I.getPredicate(); |
4505 | 30.1k | if (match(Op1, m_c_Or(m_Specific(Op0), m_Value(A)))) { |
4506 | 70 | std::swap(Op0, Op1); |
4507 | 70 | Pred = ICmpInst::getSwappedPredicate(Pred); |
4508 | 30.0k | } else if (!match(Op0, m_c_Or(m_Specific(Op1), m_Value(A)))) { |
4509 | 29.8k | return nullptr; |
4510 | 29.8k | } |
4511 | | |
4512 | | // icmp (X | Y) u<= X --> (X | Y) == X |
4513 | 242 | if (Pred == ICmpInst::ICMP_ULE) |
4514 | 9 | return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1); |
4515 | | |
4516 | | // icmp (X | Y) u> X --> (X | Y) != X |
4517 | 233 | if (Pred == ICmpInst::ICMP_UGT) |
4518 | 14 | return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); |
4519 | | |
4520 | 219 | if (ICmpInst::isEquality(Pred) && Op0->hasOneUse()) { |
4521 | | // icmp (X | Y) eq/ne Y --> (X & ~Y) eq/ne 0 if Y is freely invertible |
4522 | 57 | if (Value *NotOp1 = |
4523 | 57 | IC.getFreelyInverted(Op1, Op1->hasOneUse(), &IC.Builder)) |
4524 | 20 | return new ICmpInst(Pred, IC.Builder.CreateAnd(A, NotOp1), |
4525 | 20 | Constant::getNullValue(Op1->getType())); |
4526 | | // icmp (X | Y) eq/ne Y --> (~X | Y) eq/ne -1 if X is freely invertible. |
4527 | 37 | if (Value *NotA = IC.getFreelyInverted(A, A->hasOneUse(), &IC.Builder)) |
4528 | 13 | return new ICmpInst(Pred, IC.Builder.CreateOr(Op1, NotA), |
4529 | 13 | Constant::getAllOnesValue(Op1->getType())); |
4530 | 37 | } |
4531 | 186 | return nullptr; |
4532 | 219 | } |
4533 | | |
4534 | | static Instruction *foldICmpXorXX(ICmpInst &I, const SimplifyQuery &Q, |
4535 | 30.1k | InstCombinerImpl &IC) { |
4536 | 30.1k | Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1), *A; |
4537 | | // Normalize xor operand as operand 0. |
4538 | 30.1k | CmpInst::Predicate Pred = I.getPredicate(); |
4539 | 30.1k | if (match(Op1, m_c_Xor(m_Specific(Op0), m_Value()))) { |
4540 | 109 | std::swap(Op0, Op1); |
4541 | 109 | Pred = ICmpInst::getSwappedPredicate(Pred); |
4542 | 109 | } |
4543 | 30.1k | if (!match(Op0, m_c_Xor(m_Specific(Op1), m_Value(A)))) |
4544 | 29.7k | return nullptr; |
4545 | | |
4546 | | // icmp (X ^ Y_NonZero) u>= X --> icmp (X ^ Y_NonZero) u> X |
4547 | | // icmp (X ^ Y_NonZero) u<= X --> icmp (X ^ Y_NonZero) u< X |
4548 | | // icmp (X ^ Y_NonZero) s>= X --> icmp (X ^ Y_NonZero) s> X |
4549 | | // icmp (X ^ Y_NonZero) s<= X --> icmp (X ^ Y_NonZero) s< X |
4550 | 419 | CmpInst::Predicate PredOut = CmpInst::getStrictPredicate(Pred); |
4551 | 419 | if (PredOut != Pred && |
4552 | 419 | isKnownNonZero(A, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT)) |
4553 | 40 | return new ICmpInst(PredOut, Op0, Op1); |
4554 | | |
4555 | 379 | return nullptr; |
4556 | 419 | } |
4557 | | |
4558 | | /// Try to fold icmp (binop), X or icmp X, (binop). |
4559 | | /// TODO: A large part of this logic is duplicated in InstSimplify's |
4560 | | /// simplifyICmpWithBinOp(). We should be able to share that and avoid the code |
4561 | | /// duplication. |
4562 | | Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, |
4563 | 82.8k | const SimplifyQuery &SQ) { |
4564 | 82.8k | const SimplifyQuery Q = SQ.getWithInstruction(&I); |
4565 | 82.8k | Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); |
4566 | | |
4567 | | // Special logic for binary operators. |
4568 | 82.8k | BinaryOperator *BO0 = dyn_cast<BinaryOperator>(Op0); |
4569 | 82.8k | BinaryOperator *BO1 = dyn_cast<BinaryOperator>(Op1); |
4570 | 82.8k | if (!BO0 && !BO1) |
4571 | 52.1k | return nullptr; |
4572 | | |
4573 | 30.7k | if (Instruction *NewICmp = foldICmpXNegX(I, Builder)) |
4574 | 55 | return NewICmp; |
4575 | | |
4576 | 30.6k | const CmpInst::Predicate Pred = I.getPredicate(); |
4577 | 30.6k | Value *X; |
4578 | | |
4579 | | // Convert add-with-unsigned-overflow comparisons into a 'not' with compare. |
4580 | | // (Op1 + X) u</u>= Op1 --> ~Op1 u</u>= X |
4581 | 30.6k | if (match(Op0, m_OneUse(m_c_Add(m_Specific(Op1), m_Value(X)))) && |
4582 | 30.6k | (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE)) |
4583 | 41 | return new ICmpInst(Pred, Builder.CreateNot(Op1), X); |
4584 | | // Op0 u>/u<= (Op0 + X) --> X u>/u<= ~Op0 |
4585 | 30.6k | if (match(Op1, m_OneUse(m_c_Add(m_Specific(Op0), m_Value(X)))) && |
4586 | 30.6k | (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULE)) |
4587 | 15 | return new ICmpInst(Pred, X, Builder.CreateNot(Op0)); |
4588 | | |
4589 | 30.5k | { |
4590 | | // (Op1 + X) + C u</u>= Op1 --> ~C - X u</u>= Op1 |
4591 | 30.5k | Constant *C; |
4592 | 30.5k | if (match(Op0, m_OneUse(m_Add(m_c_Add(m_Specific(Op1), m_Value(X)), |
4593 | 30.5k | m_ImmConstant(C)))) && |
4594 | 30.5k | (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE)) { |
4595 | 15 | Constant *C2 = ConstantExpr::getNot(C); |
4596 | 15 | return new ICmpInst(Pred, Builder.CreateSub(C2, X), Op1); |
4597 | 15 | } |
4598 | | // Op0 u>/u<= (Op0 + X) + C --> Op0 u>/u<= ~C - X |
4599 | 30.5k | if (match(Op1, m_OneUse(m_Add(m_c_Add(m_Specific(Op0), m_Value(X)), |
4600 | 30.5k | m_ImmConstant(C)))) && |
4601 | 30.5k | (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULE)) { |
4602 | 0 | Constant *C2 = ConstantExpr::getNot(C); |
4603 | 0 | return new ICmpInst(Pred, Op0, Builder.CreateSub(C2, X)); |
4604 | 0 | } |
4605 | 30.5k | } |
4606 | | |
4607 | 30.5k | { |
4608 | | // Similar to above: an unsigned overflow comparison may use offset + mask: |
4609 | | // ((Op1 + C) & C) u< Op1 --> Op1 != 0 |
4610 | | // ((Op1 + C) & C) u>= Op1 --> Op1 == 0 |
4611 | | // Op0 u> ((Op0 + C) & C) --> Op0 != 0 |
4612 | | // Op0 u<= ((Op0 + C) & C) --> Op0 == 0 |
4613 | 30.5k | BinaryOperator *BO; |
4614 | 30.5k | const APInt *C; |
4615 | 30.5k | if ((Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE) && |
4616 | 30.5k | match(Op0, m_And(m_BinOp(BO), m_LowBitMask(C))) && |
4617 | 30.5k | match(BO, m_Add(m_Specific(Op1), m_SpecificIntAllowUndef(*C)))) { |
4618 | 0 | CmpInst::Predicate NewPred = |
4619 | 0 | Pred == ICmpInst::ICMP_ULT ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; |
4620 | 0 | Constant *Zero = ConstantInt::getNullValue(Op1->getType()); |
4621 | 0 | return new ICmpInst(NewPred, Op1, Zero); |
4622 | 0 | } |
4623 | | |
4624 | 30.5k | if ((Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULE) && |
4625 | 30.5k | match(Op1, m_And(m_BinOp(BO), m_LowBitMask(C))) && |
4626 | 30.5k | match(BO, m_Add(m_Specific(Op0), m_SpecificIntAllowUndef(*C)))) { |
4627 | 0 | CmpInst::Predicate NewPred = |
4628 | 0 | Pred == ICmpInst::ICMP_UGT ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; |
4629 | 0 | Constant *Zero = ConstantInt::getNullValue(Op1->getType()); |
4630 | 0 | return new ICmpInst(NewPred, Op0, Zero); |
4631 | 0 | } |
4632 | 30.5k | } |
4633 | | |
4634 | 30.5k | bool NoOp0WrapProblem = false, NoOp1WrapProblem = false; |
4635 | 30.5k | bool Op0HasNUW = false, Op1HasNUW = false; |
4636 | 30.5k | bool Op0HasNSW = false, Op1HasNSW = false; |
4637 | | // Analyze the case when either Op0 or Op1 is an add instruction. |
4638 | | // Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null). |
4639 | 30.5k | auto hasNoWrapProblem = [](const BinaryOperator &BO, CmpInst::Predicate Pred, |
4640 | 34.4k | bool &HasNSW, bool &HasNUW) -> bool { |
4641 | 34.4k | if (isa<OverflowingBinaryOperator>(BO)) { |
4642 | 16.0k | HasNUW = BO.hasNoUnsignedWrap(); |
4643 | 16.0k | HasNSW = BO.hasNoSignedWrap(); |
4644 | 16.0k | return ICmpInst::isEquality(Pred) || |
4645 | 16.0k | (CmpInst::isUnsigned(Pred) && HasNUW) || |
4646 | 16.0k | (CmpInst::isSigned(Pred) && HasNSW); |
4647 | 18.4k | } else if (BO.getOpcode() == Instruction::Or) { |
4648 | 1.28k | HasNUW = true; |
4649 | 1.28k | HasNSW = true; |
4650 | 1.28k | return true; |
4651 | 17.1k | } else { |
4652 | 17.1k | return false; |
4653 | 17.1k | } |
4654 | 34.4k | }; |
4655 | 30.5k | Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr; |
4656 | | |
4657 | 30.5k | if (BO0) { |
4658 | 28.7k | match(BO0, m_AddLike(m_Value(A), m_Value(B))); |
4659 | 28.7k | NoOp0WrapProblem = hasNoWrapProblem(*BO0, Pred, Op0HasNSW, Op0HasNUW); |
4660 | 28.7k | } |
4661 | 30.5k | if (BO1) { |
4662 | 5.73k | match(BO1, m_AddLike(m_Value(C), m_Value(D))); |
4663 | 5.73k | NoOp1WrapProblem = hasNoWrapProblem(*BO1, Pred, Op1HasNSW, Op1HasNUW); |
4664 | 5.73k | } |
4665 | | |
4666 | | // icmp (A+B), A -> icmp B, 0 for equalities or if there is no overflow. |
4667 | | // icmp (A+B), B -> icmp A, 0 for equalities or if there is no overflow. |
4668 | 30.5k | if ((A == Op1 || B == Op1) && NoOp0WrapProblem) |
4669 | 106 | return new ICmpInst(Pred, A == Op1 ? B : A, |
4670 | 106 | Constant::getNullValue(Op1->getType())); |
4671 | | |
4672 | | // icmp C, (C+D) -> icmp 0, D for equalities or if there is no overflow. |
4673 | | // icmp D, (C+D) -> icmp 0, C for equalities or if there is no overflow. |
4674 | 30.4k | if ((C == Op0 || D == Op0) && NoOp1WrapProblem) |
4675 | 14 | return new ICmpInst(Pred, Constant::getNullValue(Op0->getType()), |
4676 | 14 | C == Op0 ? D : C); |
4677 | | |
4678 | | // icmp (A+B), (A+D) -> icmp B, D for equalities or if there is no overflow. |
4679 | 30.4k | if (A && C && (A == C || A == D || B == C || B == D) && NoOp0WrapProblem && |
4680 | 30.4k | NoOp1WrapProblem) { |
4681 | | // Determine Y and Z in the form icmp (X+Y), (X+Z). |
4682 | 16 | Value *Y, *Z; |
4683 | 16 | if (A == C) { |
4684 | | // C + B == C + D -> B == D |
4685 | 5 | Y = B; |
4686 | 5 | Z = D; |
4687 | 11 | } else if (A == D) { |
4688 | | // D + B == C + D -> B == C |
4689 | 1 | Y = B; |
4690 | 1 | Z = C; |
4691 | 10 | } else if (B == C) { |
4692 | | // A + C == C + D -> A == D |
4693 | 2 | Y = A; |
4694 | 2 | Z = D; |
4695 | 8 | } else { |
4696 | 8 | assert(B == D); |
4697 | | // A + D == C + D -> A == C |
4698 | 0 | Y = A; |
4699 | 8 | Z = C; |
4700 | 8 | } |
4701 | 0 | return new ICmpInst(Pred, Y, Z); |
4702 | 16 | } |
4703 | | |
4704 | | // icmp slt (A + -1), Op1 -> icmp sle A, Op1 |
4705 | 30.4k | if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLT && |
4706 | 30.4k | match(B, m_AllOnes())) |
4707 | 12 | return new ICmpInst(CmpInst::ICMP_SLE, A, Op1); |
4708 | | |
4709 | | // icmp sge (A + -1), Op1 -> icmp sgt A, Op1 |
4710 | 30.4k | if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGE && |
4711 | 30.4k | match(B, m_AllOnes())) |
4712 | 3 | return new ICmpInst(CmpInst::ICMP_SGT, A, Op1); |
4713 | | |
4714 | | // icmp sle (A + 1), Op1 -> icmp slt A, Op1 |
4715 | 30.4k | if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLE && match(B, m_One())) |
4716 | 9 | return new ICmpInst(CmpInst::ICMP_SLT, A, Op1); |
4717 | | |
4718 | | // icmp sgt (A + 1), Op1 -> icmp sge A, Op1 |
4719 | 30.4k | if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGT && match(B, m_One())) |
4720 | 16 | return new ICmpInst(CmpInst::ICMP_SGE, A, Op1); |
4721 | | |
4722 | | // icmp sgt Op0, (C + -1) -> icmp sge Op0, C |
4723 | 30.3k | if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGT && |
4724 | 30.3k | match(D, m_AllOnes())) |
4725 | 0 | return new ICmpInst(CmpInst::ICMP_SGE, Op0, C); |
4726 | | |
4727 | | // icmp sle Op0, (C + -1) -> icmp slt Op0, C |
4728 | 30.3k | if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLE && |
4729 | 30.3k | match(D, m_AllOnes())) |
4730 | 1 | return new ICmpInst(CmpInst::ICMP_SLT, Op0, C); |
4731 | | |
4732 | | // icmp sge Op0, (C + 1) -> icmp sgt Op0, C |
4733 | 30.3k | if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGE && match(D, m_One())) |
4734 | 5 | return new ICmpInst(CmpInst::ICMP_SGT, Op0, C); |
4735 | | |
4736 | | // icmp slt Op0, (C + 1) -> icmp sle Op0, C |
4737 | 30.3k | if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLT && match(D, m_One())) |
4738 | 3 | return new ICmpInst(CmpInst::ICMP_SLE, Op0, C); |
4739 | | |
4740 | | // TODO: The subtraction-related identities shown below also hold, but |
4741 | | // canonicalization from (X -nuw 1) to (X + -1) means that the combinations |
4742 | | // wouldn't happen even if they were implemented. |
4743 | | // |
4744 | | // icmp ult (A - 1), Op1 -> icmp ule A, Op1 |
4745 | | // icmp uge (A - 1), Op1 -> icmp ugt A, Op1 |
4746 | | // icmp ugt Op0, (C - 1) -> icmp uge Op0, C |
4747 | | // icmp ule Op0, (C - 1) -> icmp ult Op0, C |
4748 | | |
4749 | | // icmp ule (A + 1), Op0 -> icmp ult A, Op1 |
4750 | 30.3k | if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_ULE && match(B, m_One())) |
4751 | 8 | return new ICmpInst(CmpInst::ICMP_ULT, A, Op1); |
4752 | | |
4753 | | // icmp ugt (A + 1), Op0 -> icmp uge A, Op1 |
4754 | 30.3k | if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_UGT && match(B, m_One())) |
4755 | 18 | return new ICmpInst(CmpInst::ICMP_UGE, A, Op1); |
4756 | | |
4757 | | // icmp uge Op0, (C + 1) -> icmp ugt Op0, C |
4758 | 30.3k | if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_UGE && match(D, m_One())) |
4759 | 4 | return new ICmpInst(CmpInst::ICMP_UGT, Op0, C); |
4760 | | |
4761 | | // icmp ult Op0, (C + 1) -> icmp ule Op0, C |
4762 | 30.3k | if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_ULT && match(D, m_One())) |
4763 | 1 | return new ICmpInst(CmpInst::ICMP_ULE, Op0, C); |
4764 | | |
4765 | | // if C1 has greater magnitude than C2: |
4766 | | // icmp (A + C1), (C + C2) -> icmp (A + C3), C |
4767 | | // s.t. C3 = C1 - C2 |
4768 | | // |
4769 | | // if C2 has greater magnitude than C1: |
4770 | | // icmp (A + C1), (C + C2) -> icmp A, (C + C3) |
4771 | | // s.t. C3 = C2 - C1 |
4772 | 30.3k | if (A && C && NoOp0WrapProblem && NoOp1WrapProblem && |
4773 | 30.3k | (BO0->hasOneUse() || BO1->hasOneUse()) && !I.isUnsigned()) { |
4774 | 27 | const APInt *AP1, *AP2; |
4775 | | // TODO: Support non-uniform vectors. |
4776 | | // TODO: Allow undef passthrough if B AND D's element is undef. |
4777 | 27 | if (match(B, m_APIntAllowUndef(AP1)) && match(D, m_APIntAllowUndef(AP2)) && |
4778 | 27 | AP1->isNegative() == AP2->isNegative()) { |
4779 | 21 | APInt AP1Abs = AP1->abs(); |
4780 | 21 | APInt AP2Abs = AP2->abs(); |
4781 | 21 | if (AP1Abs.uge(AP2Abs)) { |
4782 | 9 | APInt Diff = *AP1 - *AP2; |
4783 | 9 | Constant *C3 = Constant::getIntegerValue(BO0->getType(), Diff); |
4784 | 9 | Value *NewAdd = Builder.CreateAdd( |
4785 | 9 | A, C3, "", Op0HasNUW && Diff.ule(*AP1), Op0HasNSW); |
4786 | 9 | return new ICmpInst(Pred, NewAdd, C); |
4787 | 12 | } else { |
4788 | 12 | APInt Diff = *AP2 - *AP1; |
4789 | 12 | Constant *C3 = Constant::getIntegerValue(BO0->getType(), Diff); |
4790 | 12 | Value *NewAdd = Builder.CreateAdd( |
4791 | 12 | C, C3, "", Op1HasNUW && Diff.ule(*AP2), Op1HasNSW); |
4792 | 12 | return new ICmpInst(Pred, A, NewAdd); |
4793 | 12 | } |
4794 | 21 | } |
4795 | 6 | Constant *Cst1, *Cst2; |
4796 | 6 | if (match(B, m_ImmConstant(Cst1)) && match(D, m_ImmConstant(Cst2)) && |
4797 | 6 | ICmpInst::isEquality(Pred)) { |
4798 | 1 | Constant *Diff = ConstantExpr::getSub(Cst2, Cst1); |
4799 | 1 | Value *NewAdd = Builder.CreateAdd(C, Diff); |
4800 | 1 | return new ICmpInst(Pred, A, NewAdd); |
4801 | 1 | } |
4802 | 6 | } |
4803 | | |
4804 | | // Analyze the case when either Op0 or Op1 is a sub instruction. |
4805 | | // Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null). |
4806 | 30.3k | A = nullptr; |
4807 | 30.3k | B = nullptr; |
4808 | 30.3k | C = nullptr; |
4809 | 30.3k | D = nullptr; |
4810 | 30.3k | if (BO0 && BO0->getOpcode() == Instruction::Sub) { |
4811 | 2.76k | A = BO0->getOperand(0); |
4812 | 2.76k | B = BO0->getOperand(1); |
4813 | 2.76k | } |
4814 | 30.3k | if (BO1 && BO1->getOpcode() == Instruction::Sub) { |
4815 | 1.19k | C = BO1->getOperand(0); |
4816 | 1.19k | D = BO1->getOperand(1); |
4817 | 1.19k | } |
4818 | | |
4819 | | // icmp (A-B), A -> icmp 0, B for equalities or if there is no overflow. |
4820 | 30.3k | if (A == Op1 && NoOp0WrapProblem) |
4821 | 78 | return new ICmpInst(Pred, Constant::getNullValue(Op1->getType()), B); |
4822 | | // icmp C, (C-D) -> icmp D, 0 for equalities or if there is no overflow. |
4823 | 30.2k | if (C == Op0 && NoOp1WrapProblem) |
4824 | 6 | return new ICmpInst(Pred, D, Constant::getNullValue(Op0->getType())); |
4825 | | |
4826 | | // Convert sub-with-unsigned-overflow comparisons into a comparison of args. |
4827 | | // (A - B) u>/u<= A --> B u>/u<= A |
4828 | 30.2k | if (A == Op1 && (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULE)) |
4829 | 16 | return new ICmpInst(Pred, B, A); |
4830 | | // C u</u>= (C - D) --> C u</u>= D |
4831 | 30.2k | if (C == Op0 && (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE)) |
4832 | 7 | return new ICmpInst(Pred, C, D); |
4833 | | // (A - B) u>=/u< A --> B u>/u<= A iff B != 0 |
4834 | 30.2k | if (A == Op1 && (Pred == ICmpInst::ICMP_UGE || Pred == ICmpInst::ICMP_ULT) && |
4835 | 30.2k | isKnownNonZero(B, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT)) |
4836 | 2 | return new ICmpInst(CmpInst::getFlippedStrictnessPredicate(Pred), B, A); |
4837 | | // C u<=/u> (C - D) --> C u</u>= D iff B != 0 |
4838 | 30.2k | if (C == Op0 && (Pred == ICmpInst::ICMP_ULE || Pred == ICmpInst::ICMP_UGT) && |
4839 | 30.2k | isKnownNonZero(D, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT)) |
4840 | 1 | return new ICmpInst(CmpInst::getFlippedStrictnessPredicate(Pred), C, D); |
4841 | | |
4842 | | // icmp (A-B), (C-B) -> icmp A, C for equalities or if there is no overflow. |
4843 | 30.2k | if (B && D && B == D && NoOp0WrapProblem && NoOp1WrapProblem) |
4844 | 15 | return new ICmpInst(Pred, A, C); |
4845 | | |
4846 | | // icmp (A-B), (A-D) -> icmp D, B for equalities or if there is no overflow. |
4847 | 30.2k | if (A && C && A == C && NoOp0WrapProblem && NoOp1WrapProblem) |
4848 | 24 | return new ICmpInst(Pred, D, B); |
4849 | | |
4850 | | // icmp (0-X) < cst --> x > -cst |
4851 | 30.1k | if (NoOp0WrapProblem && ICmpInst::isSigned(Pred)) { |
4852 | 1.03k | Value *X; |
4853 | 1.03k | if (match(BO0, m_Neg(m_Value(X)))) |
4854 | 26 | if (Constant *RHSC = dyn_cast<Constant>(Op1)) |
4855 | 22 | if (RHSC->isNotMinSignedValue()) |
4856 | 22 | return new ICmpInst(I.getSwappedPredicate(), X, |
4857 | 22 | ConstantExpr::getNeg(RHSC)); |
4858 | 1.03k | } |
4859 | | |
4860 | 30.1k | if (Instruction * R = foldICmpXorXX(I, Q, *this)) |
4861 | 40 | return R; |
4862 | 30.1k | if (Instruction *R = foldICmpOrXX(I, Q, *this)) |
4863 | 56 | return R; |
4864 | | |
4865 | 30.0k | { |
4866 | | // Try to remove shared multiplier from comparison: |
4867 | | // X * Z u{lt/le/gt/ge}/eq/ne Y * Z |
4868 | 30.0k | Value *X, *Y, *Z; |
4869 | 30.0k | if (Pred == ICmpInst::getUnsignedPredicate(Pred) && |
4870 | 30.0k | ((match(Op0, m_Mul(m_Value(X), m_Value(Z))) && |
4871 | 23.4k | match(Op1, m_c_Mul(m_Specific(Z), m_Value(Y)))) || |
4872 | 23.4k | (match(Op0, m_Mul(m_Value(Z), m_Value(X))) && |
4873 | 23.3k | match(Op1, m_c_Mul(m_Specific(Z), m_Value(Y)))))) { |
4874 | 83 | bool NonZero; |
4875 | 83 | if (ICmpInst::isEquality(Pred)) { |
4876 | 51 | KnownBits ZKnown = computeKnownBits(Z, 0, &I); |
4877 | | // if Z % 2 != 0 |
4878 | | // X * Z eq/ne Y * Z -> X eq/ne Y |
4879 | 51 | if (ZKnown.countMaxTrailingZeros() == 0) |
4880 | 2 | return new ICmpInst(Pred, X, Y); |
4881 | 49 | NonZero = !ZKnown.One.isZero() || |
4882 | 49 | isKnownNonZero(Z, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT); |
4883 | | // if Z != 0 and nsw(X * Z) and nsw(Y * Z) |
4884 | | // X * Z eq/ne Y * Z -> X eq/ne Y |
4885 | 49 | if (NonZero && BO0 && BO1 && Op0HasNSW && Op1HasNSW) |
4886 | 0 | return new ICmpInst(Pred, X, Y); |
4887 | 49 | } else |
4888 | 32 | NonZero = isKnownNonZero(Z, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT); |
4889 | | |
4890 | | // If Z != 0 and nuw(X * Z) and nuw(Y * Z) |
4891 | | // X * Z u{lt/le/gt/ge}/eq/ne Y * Z -> X u{lt/le/gt/ge}/eq/ne Y |
4892 | 81 | if (NonZero && BO0 && BO1 && Op0HasNUW && Op1HasNUW) |
4893 | 1 | return new ICmpInst(Pred, X, Y); |
4894 | 81 | } |
4895 | 30.0k | } |
4896 | | |
4897 | 30.0k | BinaryOperator *SRem = nullptr; |
4898 | | // icmp (srem X, Y), Y |
4899 | 30.0k | if (BO0 && BO0->getOpcode() == Instruction::SRem && Op1 == BO0->getOperand(1)) |
4900 | 47 | SRem = BO0; |
4901 | | // icmp Y, (srem X, Y) |
4902 | 30.0k | else if (BO1 && BO1->getOpcode() == Instruction::SRem && |
4903 | 30.0k | Op0 == BO1->getOperand(1)) |
4904 | 26 | SRem = BO1; |
4905 | 30.0k | if (SRem) { |
4906 | | // We don't check hasOneUse to avoid increasing register pressure because |
4907 | | // the value we use is the same value this instruction was already using. |
4908 | 73 | switch (SRem == BO0 ? ICmpInst::getSwappedPredicate(Pred) : Pred) { |
4909 | 46 | default: |
4910 | 46 | break; |
4911 | 46 | case ICmpInst::ICMP_EQ: |
4912 | 1 | return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); |
4913 | 3 | case ICmpInst::ICMP_NE: |
4914 | 3 | return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); |
4915 | 7 | case ICmpInst::ICMP_SGT: |
4916 | 11 | case ICmpInst::ICMP_SGE: |
4917 | 11 | return new ICmpInst(ICmpInst::ICMP_SGT, SRem->getOperand(1), |
4918 | 11 | Constant::getAllOnesValue(SRem->getType())); |
4919 | 8 | case ICmpInst::ICMP_SLT: |
4920 | 12 | case ICmpInst::ICMP_SLE: |
4921 | 12 | return new ICmpInst(ICmpInst::ICMP_SLT, SRem->getOperand(1), |
4922 | 12 | Constant::getNullValue(SRem->getType())); |
4923 | 73 | } |
4924 | 73 | } |
4925 | | |
4926 | 30.0k | if (BO0 && BO1 && BO0->getOpcode() == BO1->getOpcode() && |
4927 | 30.0k | (BO0->hasOneUse() || BO1->hasOneUse()) && |
4928 | 30.0k | BO0->getOperand(1) == BO1->getOperand(1)) { |
4929 | 188 | switch (BO0->getOpcode()) { |
4930 | 6 | default: |
4931 | 6 | break; |
4932 | 6 | case Instruction::Add: |
4933 | 10 | case Instruction::Sub: |
4934 | 55 | case Instruction::Xor: { |
4935 | 55 | if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b |
4936 | 6 | return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); |
4937 | | |
4938 | 49 | const APInt *C; |
4939 | 49 | if (match(BO0->getOperand(1), m_APInt(C))) { |
4940 | | // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b |
4941 | 43 | if (C->isSignMask()) { |
4942 | 18 | ICmpInst::Predicate NewPred = I.getFlippedSignednessPredicate(); |
4943 | 18 | return new ICmpInst(NewPred, BO0->getOperand(0), BO1->getOperand(0)); |
4944 | 18 | } |
4945 | | |
4946 | | // icmp u/s (a ^ maxsignval), (b ^ maxsignval) --> icmp s/u' a, b |
4947 | 25 | if (BO0->getOpcode() == Instruction::Xor && C->isMaxSignedValue()) { |
4948 | 4 | ICmpInst::Predicate NewPred = I.getFlippedSignednessPredicate(); |
4949 | 4 | NewPred = I.getSwappedPredicate(NewPred); |
4950 | 4 | return new ICmpInst(NewPred, BO0->getOperand(0), BO1->getOperand(0)); |
4951 | 4 | } |
4952 | 25 | } |
4953 | 27 | break; |
4954 | 49 | } |
4955 | 27 | case Instruction::Mul: { |
4956 | 15 | if (!I.isEquality()) |
4957 | 4 | break; |
4958 | | |
4959 | 11 | const APInt *C; |
4960 | 11 | if (match(BO0->getOperand(1), m_APInt(C)) && !C->isZero() && |
4961 | 11 | !C->isOne()) { |
4962 | | // icmp eq/ne (X * C), (Y * C) --> icmp (X & Mask), (Y & Mask) |
4963 | | // Mask = -1 >> count-trailing-zeros(C). |
4964 | 1 | if (unsigned TZs = C->countr_zero()) { |
4965 | 1 | Constant *Mask = ConstantInt::get( |
4966 | 1 | BO0->getType(), |
4967 | 1 | APInt::getLowBitsSet(C->getBitWidth(), C->getBitWidth() - TZs)); |
4968 | 1 | Value *And1 = Builder.CreateAnd(BO0->getOperand(0), Mask); |
4969 | 1 | Value *And2 = Builder.CreateAnd(BO1->getOperand(0), Mask); |
4970 | 1 | return new ICmpInst(Pred, And1, And2); |
4971 | 1 | } |
4972 | 1 | } |
4973 | 10 | break; |
4974 | 11 | } |
4975 | 10 | case Instruction::UDiv: |
4976 | 51 | case Instruction::LShr: |
4977 | 51 | if (I.isSigned() || !BO0->isExact() || !BO1->isExact()) |
4978 | 51 | break; |
4979 | 0 | return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); |
4980 | | |
4981 | 10 | case Instruction::SDiv: |
4982 | 10 | if (!(I.isEquality() || match(BO0->getOperand(1), m_NonNegative())) || |
4983 | 10 | !BO0->isExact() || !BO1->isExact()) |
4984 | 9 | break; |
4985 | 1 | return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); |
4986 | | |
4987 | 8 | case Instruction::AShr: |
4988 | 8 | if (!BO0->isExact() || !BO1->isExact()) |
4989 | 3 | break; |
4990 | 5 | return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); |
4991 | | |
4992 | 43 | case Instruction::Shl: { |
4993 | 43 | bool NUW = Op0HasNUW && Op1HasNUW; |
4994 | 43 | bool NSW = Op0HasNSW && Op1HasNSW; |
4995 | 43 | if (!NUW && !NSW) |
4996 | 43 | break; |
4997 | 0 | if (!NSW && I.isSigned()) |
4998 | 0 | break; |
4999 | 0 | return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); |
5000 | 0 | } |
5001 | 188 | } |
5002 | 188 | } |
5003 | | |
5004 | 30.0k | if (BO0) { |
5005 | | // Transform A & (L - 1) `ult` L --> L != 0 |
5006 | 28.2k | auto LSubOne = m_Add(m_Specific(Op1), m_AllOnes()); |
5007 | 28.2k | auto BitwiseAnd = m_c_And(m_Value(), LSubOne); |
5008 | | |
5009 | 28.2k | if (match(BO0, BitwiseAnd) && Pred == ICmpInst::ICMP_ULT) { |
5010 | 5 | auto *Zero = Constant::getNullValue(BO0->getType()); |
5011 | 5 | return new ICmpInst(ICmpInst::ICMP_NE, Op1, Zero); |
5012 | 5 | } |
5013 | 28.2k | } |
5014 | | |
5015 | | // For unsigned predicates / eq / ne: |
5016 | | // icmp pred (x << 1), x --> icmp getSignedPredicate(pred) x, 0 |
5017 | | // icmp pred x, (x << 1) --> icmp getSignedPredicate(pred) 0, x |
5018 | 30.0k | if (!ICmpInst::isSigned(Pred)) { |
5019 | 23.3k | if (match(Op0, m_Shl(m_Specific(Op1), m_One()))) |
5020 | 18 | return new ICmpInst(ICmpInst::getSignedPredicate(Pred), Op1, |
5021 | 18 | Constant::getNullValue(Op1->getType())); |
5022 | 23.3k | else if (match(Op1, m_Shl(m_Specific(Op0), m_One()))) |
5023 | 19 | return new ICmpInst(ICmpInst::getSignedPredicate(Pred), |
5024 | 19 | Constant::getNullValue(Op0->getType()), Op0); |
5025 | 23.3k | } |
5026 | | |
5027 | 29.9k | if (Value *V = foldMultiplicationOverflowCheck(I)) |
5028 | 47 | return replaceInstUsesWith(I, V); |
5029 | | |
5030 | 29.9k | if (Value *V = foldICmpWithLowBitMaskedVal(I, Builder)) |
5031 | 138 | return replaceInstUsesWith(I, V); |
5032 | | |
5033 | 29.7k | if (Instruction *R = foldICmpAndXX(I, Q, *this)) |
5034 | 26 | return R; |
5035 | | |
5036 | 29.7k | if (Value *V = foldICmpWithTruncSignExtendedVal(I, Builder)) |
5037 | 39 | return replaceInstUsesWith(I, V); |
5038 | | |
5039 | 29.7k | if (Value *V = foldShiftIntoShiftInAnotherHandOfAndInICmp(I, SQ, Builder)) |
5040 | 132 | return replaceInstUsesWith(I, V); |
5041 | | |
5042 | 29.5k | return nullptr; |
5043 | 29.7k | } |
5044 | | |
5045 | | /// Fold icmp Pred min|max(X, Y), Z. |
5046 | | Instruction *InstCombinerImpl::foldICmpWithMinMax(Instruction &I, |
5047 | | MinMaxIntrinsic *MinMax, |
5048 | | Value *Z, |
5049 | 1.09k | ICmpInst::Predicate Pred) { |
5050 | 1.09k | Value *X = MinMax->getLHS(); |
5051 | 1.09k | Value *Y = MinMax->getRHS(); |
5052 | 1.09k | if (ICmpInst::isSigned(Pred) && !MinMax->isSigned()) |
5053 | 117 | return nullptr; |
5054 | 973 | if (ICmpInst::isUnsigned(Pred) && MinMax->isSigned()) { |
5055 | | // Revert the transform signed pred -> unsigned pred |
5056 | | // TODO: We can flip the signedness of predicate if both operands of icmp |
5057 | | // are negative. |
5058 | 130 | if (isKnownNonNegative(Z, SQ.getWithInstruction(&I)) && |
5059 | 130 | isKnownNonNegative(MinMax, SQ.getWithInstruction(&I))) { |
5060 | 3 | Pred = ICmpInst::getFlippedSignednessPredicate(Pred); |
5061 | 3 | } else |
5062 | 127 | return nullptr; |
5063 | 130 | } |
5064 | 846 | SimplifyQuery Q = SQ.getWithInstruction(&I); |
5065 | 1.71k | auto IsCondKnownTrue = [](Value *Val) -> std::optional<bool> { |
5066 | 1.71k | if (!Val) |
5067 | 1.46k | return std::nullopt; |
5068 | 246 | if (match(Val, m_One())) |
5069 | 86 | return true; |
5070 | 160 | if (match(Val, m_Zero())) |
5071 | 160 | return false; |
5072 | 0 | return std::nullopt; |
5073 | 160 | }; |
5074 | 846 | auto CmpXZ = IsCondKnownTrue(simplifyICmpInst(Pred, X, Z, Q)); |
5075 | 846 | auto CmpYZ = IsCondKnownTrue(simplifyICmpInst(Pred, Y, Z, Q)); |
5076 | 846 | if (!CmpXZ.has_value() && !CmpYZ.has_value()) |
5077 | 650 | return nullptr; |
5078 | 196 | if (!CmpXZ.has_value()) { |
5079 | 116 | std::swap(X, Y); |
5080 | 116 | std::swap(CmpXZ, CmpYZ); |
5081 | 116 | } |
5082 | | |
5083 | 196 | auto FoldIntoCmpYZ = [&]() -> Instruction * { |
5084 | 145 | if (CmpYZ.has_value()) |
5085 | 27 | return replaceInstUsesWith(I, ConstantInt::getBool(I.getType(), *CmpYZ)); |
5086 | 118 | return ICmpInst::Create(Instruction::ICmp, Pred, Y, Z); |
5087 | 145 | }; |
5088 | | |
5089 | 196 | switch (Pred) { |
5090 | 39 | case ICmpInst::ICMP_EQ: |
5091 | 68 | case ICmpInst::ICMP_NE: { |
5092 | | // If X == Z: |
5093 | | // Expr Result |
5094 | | // min(X, Y) == Z X <= Y |
5095 | | // max(X, Y) == Z X >= Y |
5096 | | // min(X, Y) != Z X > Y |
5097 | | // max(X, Y) != Z X < Y |
5098 | 68 | if ((Pred == ICmpInst::ICMP_EQ) == *CmpXZ) { |
5099 | 45 | ICmpInst::Predicate NewPred = |
5100 | 45 | ICmpInst::getNonStrictPredicate(MinMax->getPredicate()); |
5101 | 45 | if (Pred == ICmpInst::ICMP_NE) |
5102 | 21 | NewPred = ICmpInst::getInversePredicate(NewPred); |
5103 | 45 | return ICmpInst::Create(Instruction::ICmp, NewPred, X, Y); |
5104 | 45 | } |
5105 | | // Otherwise (X != Z): |
5106 | 23 | ICmpInst::Predicate NewPred = MinMax->getPredicate(); |
5107 | 23 | auto MinMaxCmpXZ = IsCondKnownTrue(simplifyICmpInst(NewPred, X, Z, Q)); |
5108 | 23 | if (!MinMaxCmpXZ.has_value()) { |
5109 | 6 | std::swap(X, Y); |
5110 | 6 | std::swap(CmpXZ, CmpYZ); |
5111 | | // Re-check pre-condition X != Z |
5112 | 6 | if (!CmpXZ.has_value() || (Pred == ICmpInst::ICMP_EQ) == *CmpXZ) |
5113 | 6 | break; |
5114 | 0 | MinMaxCmpXZ = IsCondKnownTrue(simplifyICmpInst(NewPred, X, Z, Q)); |
5115 | 0 | } |
5116 | 17 | if (!MinMaxCmpXZ.has_value()) |
5117 | 0 | break; |
5118 | 17 | if (*MinMaxCmpXZ) { |
5119 | | // Expr Fact Result |
5120 | | // min(X, Y) == Z X < Z false |
5121 | | // max(X, Y) == Z X > Z false |
5122 | | // min(X, Y) != Z X < Z true |
5123 | | // max(X, Y) != Z X > Z true |
5124 | 0 | return replaceInstUsesWith( |
5125 | 0 | I, ConstantInt::getBool(I.getType(), Pred == ICmpInst::ICMP_NE)); |
5126 | 17 | } else { |
5127 | | // Expr Fact Result |
5128 | | // min(X, Y) == Z X > Z Y == Z |
5129 | | // max(X, Y) == Z X < Z Y == Z |
5130 | | // min(X, Y) != Z X > Z Y != Z |
5131 | | // max(X, Y) != Z X < Z Y != Z |
5132 | 17 | return FoldIntoCmpYZ(); |
5133 | 17 | } |
5134 | 0 | break; |
5135 | 17 | } |
5136 | 43 | case ICmpInst::ICMP_SLT: |
5137 | 67 | case ICmpInst::ICMP_ULT: |
5138 | 70 | case ICmpInst::ICMP_SLE: |
5139 | 71 | case ICmpInst::ICMP_ULE: |
5140 | 105 | case ICmpInst::ICMP_SGT: |
5141 | 118 | case ICmpInst::ICMP_UGT: |
5142 | 125 | case ICmpInst::ICMP_SGE: |
5143 | 128 | case ICmpInst::ICMP_UGE: { |
5144 | 128 | bool IsSame = MinMax->getPredicate() == ICmpInst::getStrictPredicate(Pred); |
5145 | 128 | if (*CmpXZ) { |
5146 | 51 | if (IsSame) { |
5147 | | // Expr Fact Result |
5148 | | // min(X, Y) < Z X < Z true |
5149 | | // min(X, Y) <= Z X <= Z true |
5150 | | // max(X, Y) > Z X > Z true |
5151 | | // max(X, Y) >= Z X >= Z true |
5152 | 0 | return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); |
5153 | 51 | } else { |
5154 | | // Expr Fact Result |
5155 | | // max(X, Y) < Z X < Z Y < Z |
5156 | | // max(X, Y) <= Z X <= Z Y <= Z |
5157 | | // min(X, Y) > Z X > Z Y > Z |
5158 | | // min(X, Y) >= Z X >= Z Y >= Z |
5159 | 51 | return FoldIntoCmpYZ(); |
5160 | 51 | } |
5161 | 77 | } else { |
5162 | 77 | if (IsSame) { |
5163 | | // Expr Fact Result |
5164 | | // min(X, Y) < Z X >= Z Y < Z |
5165 | | // min(X, Y) <= Z X > Z Y <= Z |
5166 | | // max(X, Y) > Z X <= Z Y > Z |
5167 | | // max(X, Y) >= Z X < Z Y >= Z |
5168 | 77 | return FoldIntoCmpYZ(); |
5169 | 77 | } else { |
5170 | | // Expr Fact Result |
5171 | | // max(X, Y) < Z X >= Z false |
5172 | | // max(X, Y) <= Z X > Z false |
5173 | | // min(X, Y) > Z X <= Z false |
5174 | | // min(X, Y) >= Z X < Z false |
5175 | 0 | return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); |
5176 | 0 | } |
5177 | 77 | } |
5178 | 0 | break; |
5179 | 128 | } |
5180 | 0 | default: |
5181 | 0 | break; |
5182 | 196 | } |
5183 | | |
5184 | 6 | return nullptr; |
5185 | 196 | } |
5186 | | |
5187 | | // Canonicalize checking for a power-of-2-or-zero value: |
5188 | | static Instruction *foldICmpPow2Test(ICmpInst &I, |
5189 | 76.0k | InstCombiner::BuilderTy &Builder) { |
5190 | 76.0k | Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); |
5191 | 76.0k | const CmpInst::Predicate Pred = I.getPredicate(); |
5192 | 76.0k | Value *A = nullptr; |
5193 | 76.0k | bool CheckIs; |
5194 | 76.0k | if (I.isEquality()) { |
5195 | | // (A & (A-1)) == 0 --> ctpop(A) < 2 (two commuted variants) |
5196 | | // ((A-1) & A) != 0 --> ctpop(A) > 1 (two commuted variants) |
5197 | 34.9k | if (!match(Op0, m_OneUse(m_c_And(m_Add(m_Value(A), m_AllOnes()), |
5198 | 34.9k | m_Deferred(A)))) || |
5199 | 34.9k | !match(Op1, m_ZeroInt())) |
5200 | 34.8k | A = nullptr; |
5201 | | |
5202 | | // (A & -A) == A --> ctpop(A) < 2 (four commuted variants) |
5203 | | // (-A & A) != A --> ctpop(A) > 1 (four commuted variants) |
5204 | 34.9k | if (match(Op0, m_OneUse(m_c_And(m_Neg(m_Specific(Op1)), m_Specific(Op1))))) |
5205 | 95 | A = Op1; |
5206 | 34.8k | else if (match(Op1, |
5207 | 34.8k | m_OneUse(m_c_And(m_Neg(m_Specific(Op0)), m_Specific(Op0))))) |
5208 | 9 | A = Op0; |
5209 | | |
5210 | 34.9k | CheckIs = Pred == ICmpInst::ICMP_EQ; |
5211 | 41.1k | } else if (ICmpInst::isUnsigned(Pred)) { |
5212 | | // (A ^ (A-1)) u>= A --> ctpop(A) < 2 (two commuted variants) |
5213 | | // ((A-1) ^ A) u< A --> ctpop(A) > 1 (two commuted variants) |
5214 | | |
5215 | 18.0k | if ((Pred == ICmpInst::ICMP_UGE || Pred == ICmpInst::ICMP_ULT) && |
5216 | 18.0k | match(Op0, m_OneUse(m_c_Xor(m_Add(m_Specific(Op1), m_AllOnes()), |
5217 | 10.9k | m_Specific(Op1))))) { |
5218 | 0 | A = Op1; |
5219 | 0 | CheckIs = Pred == ICmpInst::ICMP_UGE; |
5220 | 18.0k | } else if ((Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULE) && |
5221 | 18.0k | match(Op1, m_OneUse(m_c_Xor(m_Add(m_Specific(Op0), m_AllOnes()), |
5222 | 7.12k | m_Specific(Op0))))) { |
5223 | 0 | A = Op0; |
5224 | 0 | CheckIs = Pred == ICmpInst::ICMP_ULE; |
5225 | 0 | } |
5226 | 18.0k | } |
5227 | | |
5228 | 76.0k | if (A) { |
5229 | 177 | Type *Ty = A->getType(); |
5230 | 177 | CallInst *CtPop = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, A); |
5231 | 177 | return CheckIs ? new ICmpInst(ICmpInst::ICMP_ULT, CtPop, |
5232 | 104 | ConstantInt::get(Ty, 2)) |
5233 | 177 | : new ICmpInst(ICmpInst::ICMP_UGT, CtPop, |
5234 | 73 | ConstantInt::get(Ty, 1)); |
5235 | 177 | } |
5236 | | |
5237 | 75.8k | return nullptr; |
5238 | 76.0k | } |
5239 | | |
5240 | 76.2k | Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) { |
5241 | 76.2k | if (!I.isEquality()) |
5242 | 41.1k | return nullptr; |
5243 | | |
5244 | 35.1k | Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); |
5245 | 35.1k | const CmpInst::Predicate Pred = I.getPredicate(); |
5246 | 35.1k | Value *A, *B, *C, *D; |
5247 | 35.1k | if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) { |
5248 | 300 | if (A == Op1 || B == Op1) { // (A^B) == A -> B == 0 |
5249 | 56 | Value *OtherVal = A == Op1 ? B : A; |
5250 | 56 | return new ICmpInst(Pred, OtherVal, Constant::getNullValue(A->getType())); |
5251 | 56 | } |
5252 | | |
5253 | 244 | if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) { |
5254 | | // A^c1 == C^c2 --> A == C^(c1^c2) |
5255 | 28 | ConstantInt *C1, *C2; |
5256 | 28 | if (match(B, m_ConstantInt(C1)) && match(D, m_ConstantInt(C2)) && |
5257 | 28 | Op1->hasOneUse()) { |
5258 | 1 | Constant *NC = Builder.getInt(C1->getValue() ^ C2->getValue()); |
5259 | 1 | Value *Xor = Builder.CreateXor(C, NC); |
5260 | 1 | return new ICmpInst(Pred, A, Xor); |
5261 | 1 | } |
5262 | | |
5263 | | // A^B == A^D -> B == D |
5264 | 27 | if (A == C) |
5265 | 2 | return new ICmpInst(Pred, B, D); |
5266 | 25 | if (A == D) |
5267 | 0 | return new ICmpInst(Pred, B, C); |
5268 | 25 | if (B == C) |
5269 | 1 | return new ICmpInst(Pred, A, D); |
5270 | 24 | if (B == D) |
5271 | 0 | return new ICmpInst(Pred, A, C); |
5272 | 24 | } |
5273 | 244 | } |
5274 | | |
5275 | | // canoncalize: |
5276 | | // (icmp eq/ne (and X, C), X) |
5277 | | // -> (icmp eq/ne (and X, ~C), 0) |
5278 | 35.0k | { |
5279 | 35.0k | Constant *CMask; |
5280 | 35.0k | A = nullptr; |
5281 | 35.0k | if (match(Op0, m_OneUse(m_And(m_Specific(Op1), m_ImmConstant(CMask))))) |
5282 | 56 | A = Op1; |
5283 | 35.0k | else if (match(Op1, m_OneUse(m_And(m_Specific(Op0), m_ImmConstant(CMask))))) |
5284 | 0 | A = Op0; |
5285 | 35.0k | if (A) |
5286 | 56 | return new ICmpInst(Pred, Builder.CreateAnd(A, Builder.CreateNot(CMask)), |
5287 | 56 | Constant::getNullValue(A->getType())); |
5288 | 35.0k | } |
5289 | | |
5290 | 35.0k | if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && (A == Op0 || B == Op0)) { |
5291 | | // A == (A^B) -> B == 0 |
5292 | 18 | Value *OtherVal = A == Op0 ? B : A; |
5293 | 18 | return new ICmpInst(Pred, OtherVal, Constant::getNullValue(A->getType())); |
5294 | 18 | } |
5295 | | |
5296 | | // (X&Z) == (Y&Z) -> (X^Y) & Z == 0 |
5297 | 35.0k | if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) && |
5298 | 35.0k | match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) { |
5299 | 8 | Value *X = nullptr, *Y = nullptr, *Z = nullptr; |
5300 | | |
5301 | 8 | if (A == C) { |
5302 | 1 | X = B; |
5303 | 1 | Y = D; |
5304 | 1 | Z = A; |
5305 | 7 | } else if (A == D) { |
5306 | 0 | X = B; |
5307 | 0 | Y = C; |
5308 | 0 | Z = A; |
5309 | 7 | } else if (B == C) { |
5310 | 0 | X = A; |
5311 | 0 | Y = D; |
5312 | 0 | Z = B; |
5313 | 7 | } else if (B == D) { |
5314 | 3 | X = A; |
5315 | 3 | Y = C; |
5316 | 3 | Z = B; |
5317 | 3 | } |
5318 | | |
5319 | 8 | if (X) { // Build (X^Y) & Z |
5320 | 4 | Op1 = Builder.CreateXor(X, Y); |
5321 | 4 | Op1 = Builder.CreateAnd(Op1, Z); |
5322 | 4 | return new ICmpInst(Pred, Op1, Constant::getNullValue(Op1->getType())); |
5323 | 4 | } |
5324 | 8 | } |
5325 | | |
5326 | 35.0k | { |
5327 | | // Similar to above, but specialized for constant because invert is needed: |
5328 | | // (X | C) == (Y | C) --> (X ^ Y) & ~C == 0 |
5329 | 35.0k | Value *X, *Y; |
5330 | 35.0k | Constant *C; |
5331 | 35.0k | if (match(Op0, m_OneUse(m_Or(m_Value(X), m_Constant(C)))) && |
5332 | 35.0k | match(Op1, m_OneUse(m_Or(m_Value(Y), m_Specific(C))))) { |
5333 | 0 | Value *Xor = Builder.CreateXor(X, Y); |
5334 | 0 | Value *And = Builder.CreateAnd(Xor, ConstantExpr::getNot(C)); |
5335 | 0 | return new ICmpInst(Pred, And, Constant::getNullValue(And->getType())); |
5336 | 0 | } |
5337 | 35.0k | } |
5338 | | |
5339 | 35.0k | if (match(Op1, m_ZExt(m_Value(A))) && |
5340 | 35.0k | (Op0->hasOneUse() || Op1->hasOneUse())) { |
5341 | | // (B & (Pow2C-1)) == zext A --> A == trunc B |
5342 | | // (B & (Pow2C-1)) != zext A --> A != trunc B |
5343 | 52 | const APInt *MaskC; |
5344 | 52 | if (match(Op0, m_And(m_Value(B), m_LowBitMask(MaskC))) && |
5345 | 52 | MaskC->countr_one() == A->getType()->getScalarSizeInBits()) |
5346 | 4 | return new ICmpInst(Pred, A, Builder.CreateTrunc(B, A->getType())); |
5347 | 52 | } |
5348 | | |
5349 | | // (A >> C) == (B >> C) --> (A^B) u< (1 << C) |
5350 | | // For lshr and ashr pairs. |
5351 | 35.0k | const APInt *AP1, *AP2; |
5352 | 35.0k | if ((match(Op0, m_OneUse(m_LShr(m_Value(A), m_APIntAllowUndef(AP1)))) && |
5353 | 35.0k | match(Op1, m_OneUse(m_LShr(m_Value(B), m_APIntAllowUndef(AP2))))) || |
5354 | 35.0k | (match(Op0, m_OneUse(m_AShr(m_Value(A), m_APIntAllowUndef(AP1)))) && |
5355 | 34.9k | match(Op1, m_OneUse(m_AShr(m_Value(B), m_APIntAllowUndef(AP2)))))) { |
5356 | 41 | if (AP1 != AP2) |
5357 | 0 | return nullptr; |
5358 | 41 | unsigned TypeBits = AP1->getBitWidth(); |
5359 | 41 | unsigned ShAmt = AP1->getLimitedValue(TypeBits); |
5360 | 41 | if (ShAmt < TypeBits && ShAmt != 0) { |
5361 | 41 | ICmpInst::Predicate NewPred = |
5362 | 41 | Pred == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT; |
5363 | 41 | Value *Xor = Builder.CreateXor(A, B, I.getName() + ".unshifted"); |
5364 | 41 | APInt CmpVal = APInt::getOneBitSet(TypeBits, ShAmt); |
5365 | 41 | return new ICmpInst(NewPred, Xor, ConstantInt::get(A->getType(), CmpVal)); |
5366 | 41 | } |
5367 | 41 | } |
5368 | | |
5369 | | // (A << C) == (B << C) --> ((A^B) & (~0U >> C)) == 0 |
5370 | 34.9k | ConstantInt *Cst1; |
5371 | 34.9k | if (match(Op0, m_OneUse(m_Shl(m_Value(A), m_ConstantInt(Cst1)))) && |
5372 | 34.9k | match(Op1, m_OneUse(m_Shl(m_Value(B), m_Specific(Cst1))))) { |
5373 | 3 | unsigned TypeBits = Cst1->getBitWidth(); |
5374 | 3 | unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits); |
5375 | 3 | if (ShAmt < TypeBits && ShAmt != 0) { |
5376 | 3 | Value *Xor = Builder.CreateXor(A, B, I.getName() + ".unshifted"); |
5377 | 3 | APInt AndVal = APInt::getLowBitsSet(TypeBits, TypeBits - ShAmt); |
5378 | 3 | Value *And = Builder.CreateAnd(Xor, Builder.getInt(AndVal), |
5379 | 3 | I.getName() + ".mask"); |
5380 | 3 | return new ICmpInst(Pred, And, Constant::getNullValue(Cst1->getType())); |
5381 | 3 | } |
5382 | 3 | } |
5383 | | |
5384 | | // Transform "icmp eq (trunc (lshr(X, cst1)), cst" to |
5385 | | // "icmp (and X, mask), cst" |
5386 | 34.9k | uint64_t ShAmt = 0; |
5387 | 34.9k | if (Op0->hasOneUse() && |
5388 | 34.9k | match(Op0, m_Trunc(m_OneUse(m_LShr(m_Value(A), m_ConstantInt(ShAmt))))) && |
5389 | 34.9k | match(Op1, m_ConstantInt(Cst1)) && |
5390 | | // Only do this when A has multiple uses. This is most important to do |
5391 | | // when it exposes other optimizations. |
5392 | 34.9k | !A->hasOneUse()) { |
5393 | 5 | unsigned ASize = cast<IntegerType>(A->getType())->getPrimitiveSizeInBits(); |
5394 | | |
5395 | 5 | if (ShAmt < ASize) { |
5396 | 5 | APInt MaskV = |
5397 | 5 | APInt::getLowBitsSet(ASize, Op0->getType()->getPrimitiveSizeInBits()); |
5398 | 5 | MaskV <<= ShAmt; |
5399 | | |
5400 | 5 | APInt CmpV = Cst1->getValue().zext(ASize); |
5401 | 5 | CmpV <<= ShAmt; |
5402 | | |
5403 | 5 | Value *Mask = Builder.CreateAnd(A, Builder.getInt(MaskV)); |
5404 | 5 | return new ICmpInst(Pred, Mask, Builder.getInt(CmpV)); |
5405 | 5 | } |
5406 | 5 | } |
5407 | | |
5408 | 34.9k | if (Instruction *ICmp = foldICmpIntrinsicWithIntrinsic(I, Builder)) |
5409 | 8 | return ICmp; |
5410 | | |
5411 | | // Match icmp eq (trunc (lshr A, BW), (ashr (trunc A), BW-1)), which checks the |
5412 | | // top BW/2 + 1 bits are all the same. Create "A >=s INT_MIN && A <=s INT_MAX", |
5413 | | // which we generate as "icmp ult (add A, 2^(BW-1)), 2^BW" to skip a few steps |
5414 | | // of instcombine. |
5415 | 34.9k | unsigned BitWidth = Op0->getType()->getScalarSizeInBits(); |
5416 | 34.9k | if (match(Op0, m_AShr(m_Trunc(m_Value(A)), m_SpecificInt(BitWidth - 1))) && |
5417 | 34.9k | match(Op1, m_Trunc(m_LShr(m_Specific(A), m_SpecificInt(BitWidth)))) && |
5418 | 34.9k | A->getType()->getScalarSizeInBits() == BitWidth * 2 && |
5419 | 34.9k | (I.getOperand(0)->hasOneUse() || I.getOperand(1)->hasOneUse())) { |
5420 | 0 | APInt C = APInt::getOneBitSet(BitWidth * 2, BitWidth - 1); |
5421 | 0 | Value *Add = Builder.CreateAdd(A, ConstantInt::get(A->getType(), C)); |
5422 | 0 | return new ICmpInst(Pred == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_ULT |
5423 | 0 | : ICmpInst::ICMP_UGE, |
5424 | 0 | Add, ConstantInt::get(A->getType(), C.shl(1))); |
5425 | 0 | } |
5426 | | |
5427 | | // Canonicalize: |
5428 | | // Assume B_Pow2 != 0 |
5429 | | // 1. A & B_Pow2 != B_Pow2 -> A & B_Pow2 == 0 |
5430 | | // 2. A & B_Pow2 == B_Pow2 -> A & B_Pow2 != 0 |
5431 | 34.9k | if (match(Op0, m_c_And(m_Specific(Op1), m_Value())) && |
5432 | 34.9k | isKnownToBeAPowerOfTwo(Op1, /* OrZero */ false, 0, &I)) |
5433 | 0 | return new ICmpInst(CmpInst::getInversePredicate(Pred), Op0, |
5434 | 0 | ConstantInt::getNullValue(Op0->getType())); |
5435 | | |
5436 | 34.9k | if (match(Op1, m_c_And(m_Specific(Op0), m_Value())) && |
5437 | 34.9k | isKnownToBeAPowerOfTwo(Op0, /* OrZero */ false, 0, &I)) |
5438 | 0 | return new ICmpInst(CmpInst::getInversePredicate(Pred), Op1, |
5439 | 0 | ConstantInt::getNullValue(Op1->getType())); |
5440 | | |
5441 | | // Canonicalize: |
5442 | | // icmp eq/ne X, OneUse(rotate-right(X)) |
5443 | | // -> icmp eq/ne X, rotate-left(X) |
5444 | | // We generally try to convert rotate-right -> rotate-left, this just |
5445 | | // canonicalizes another case. |
5446 | 34.9k | CmpInst::Predicate PredUnused = Pred; |
5447 | 34.9k | if (match(&I, m_c_ICmp(PredUnused, m_Value(A), |
5448 | 34.9k | m_OneUse(m_Intrinsic<Intrinsic::fshr>( |
5449 | 34.9k | m_Deferred(A), m_Deferred(A), m_Value(B)))))) |
5450 | 0 | return new ICmpInst( |
5451 | 0 | Pred, A, |
5452 | 0 | Builder.CreateIntrinsic(Op0->getType(), Intrinsic::fshl, {A, A, B})); |
5453 | | |
5454 | | // Canonicalize: |
5455 | | // icmp eq/ne OneUse(A ^ Cst), B --> icmp eq/ne (A ^ B), Cst |
5456 | 34.9k | Constant *Cst; |
5457 | 34.9k | if (match(&I, m_c_ICmp(PredUnused, |
5458 | 34.9k | m_OneUse(m_Xor(m_Value(A), m_ImmConstant(Cst))), |
5459 | 34.9k | m_CombineAnd(m_Value(B), m_Unless(m_ImmConstant()))))) |
5460 | 21 | return new ICmpInst(Pred, Builder.CreateXor(A, B), Cst); |
5461 | | |
5462 | 34.9k | { |
5463 | | // (icmp eq/ne (and (add/sub/xor X, P2), P2), P2) |
5464 | 34.9k | auto m_Matcher = |
5465 | 34.9k | m_CombineOr(m_CombineOr(m_c_Add(m_Value(B), m_Deferred(A)), |
5466 | 34.9k | m_c_Xor(m_Value(B), m_Deferred(A))), |
5467 | 34.9k | m_Sub(m_Value(B), m_Deferred(A))); |
5468 | 34.9k | std::optional<bool> IsZero = std::nullopt; |
5469 | 34.9k | if (match(&I, m_c_ICmp(PredUnused, m_OneUse(m_c_And(m_Value(A), m_Matcher)), |
5470 | 34.9k | m_Deferred(A)))) |
5471 | 104 | IsZero = false; |
5472 | | // (icmp eq/ne (and (add/sub/xor X, P2), P2), 0) |
5473 | 34.8k | else if (match(&I, |
5474 | 34.8k | m_ICmp(PredUnused, m_OneUse(m_c_And(m_Value(A), m_Matcher)), |
5475 | 34.8k | m_Zero()))) |
5476 | 83 | IsZero = true; |
5477 | | |
5478 | 34.9k | if (IsZero && isKnownToBeAPowerOfTwo(A, /* OrZero */ true, /*Depth*/ 0, &I)) |
5479 | | // (icmp eq/ne (and (add/sub/xor X, P2), P2), P2) |
5480 | | // -> (icmp eq/ne (and X, P2), 0) |
5481 | | // (icmp eq/ne (and (add/sub/xor X, P2), P2), 0) |
5482 | | // -> (icmp eq/ne (and X, P2), P2) |
5483 | 2 | return new ICmpInst(Pred, Builder.CreateAnd(B, A), |
5484 | 2 | *IsZero ? A |
5485 | 2 | : ConstantInt::getNullValue(A->getType())); |
5486 | 34.9k | } |
5487 | | |
5488 | 34.9k | return nullptr; |
5489 | 34.9k | } |
5490 | | |
5491 | 5.81k | Instruction *InstCombinerImpl::foldICmpWithTrunc(ICmpInst &ICmp) { |
5492 | 5.81k | ICmpInst::Predicate Pred = ICmp.getPredicate(); |
5493 | 5.81k | Value *Op0 = ICmp.getOperand(0), *Op1 = ICmp.getOperand(1); |
5494 | | |
5495 | | // Try to canonicalize trunc + compare-to-constant into a mask + cmp. |
5496 | | // The trunc masks high bits while the compare may effectively mask low bits. |
5497 | 5.81k | Value *X; |
5498 | 5.81k | const APInt *C; |
5499 | 5.81k | if (!match(Op0, m_OneUse(m_Trunc(m_Value(X)))) || !match(Op1, m_APInt(C))) |
5500 | 5.63k | return nullptr; |
5501 | | |
5502 | | // This matches patterns corresponding to tests of the signbit as well as: |
5503 | | // (trunc X) u< C --> (X & -C) == 0 (are all masked-high-bits clear?) |
5504 | | // (trunc X) u> C --> (X & ~C) != 0 (are any masked-high-bits set?) |
5505 | 180 | APInt Mask; |
5506 | 180 | if (decomposeBitTestICmp(Op0, Op1, Pred, X, Mask, true /* WithTrunc */)) { |
5507 | 110 | Value *And = Builder.CreateAnd(X, Mask); |
5508 | 110 | Constant *Zero = ConstantInt::getNullValue(X->getType()); |
5509 | 110 | return new ICmpInst(Pred, And, Zero); |
5510 | 110 | } |
5511 | | |
5512 | 70 | unsigned SrcBits = X->getType()->getScalarSizeInBits(); |
5513 | 70 | if (Pred == ICmpInst::ICMP_ULT && C->isNegatedPowerOf2()) { |
5514 | | // If C is a negative power-of-2 (high-bit mask): |
5515 | | // (trunc X) u< C --> (X & C) != C (are any masked-high-bits clear?) |
5516 | 0 | Constant *MaskC = ConstantInt::get(X->getType(), C->zext(SrcBits)); |
5517 | 0 | Value *And = Builder.CreateAnd(X, MaskC); |
5518 | 0 | return new ICmpInst(ICmpInst::ICMP_NE, And, MaskC); |
5519 | 0 | } |
5520 | | |
5521 | 70 | if (Pred == ICmpInst::ICMP_UGT && (~*C).isPowerOf2()) { |
5522 | | // If C is not-of-power-of-2 (one clear bit): |
5523 | | // (trunc X) u> C --> (X & (C+1)) == C+1 (are all masked-high-bits set?) |
5524 | 0 | Constant *MaskC = ConstantInt::get(X->getType(), (*C + 1).zext(SrcBits)); |
5525 | 0 | Value *And = Builder.CreateAnd(X, MaskC); |
5526 | 0 | return new ICmpInst(ICmpInst::ICMP_EQ, And, MaskC); |
5527 | 0 | } |
5528 | | |
5529 | 70 | if (auto *II = dyn_cast<IntrinsicInst>(X)) { |
5530 | 0 | if (II->getIntrinsicID() == Intrinsic::cttz || |
5531 | 0 | II->getIntrinsicID() == Intrinsic::ctlz) { |
5532 | 0 | unsigned MaxRet = SrcBits; |
5533 | | // If the "is_zero_poison" argument is set, then we know at least |
5534 | | // one bit is set in the input, so the result is always at least one |
5535 | | // less than the full bitwidth of that input. |
5536 | 0 | if (match(II->getArgOperand(1), m_One())) |
5537 | 0 | MaxRet--; |
5538 | | |
5539 | | // Make sure the destination is wide enough to hold the largest output of |
5540 | | // the intrinsic. |
5541 | 0 | if (llvm::Log2_32(MaxRet) + 1 <= Op0->getType()->getScalarSizeInBits()) |
5542 | 0 | if (Instruction *I = |
5543 | 0 | foldICmpIntrinsicWithConstant(ICmp, II, C->zext(SrcBits))) |
5544 | 0 | return I; |
5545 | 0 | } |
5546 | 0 | } |
5547 | | |
5548 | 70 | return nullptr; |
5549 | 70 | } |
5550 | | |
5551 | 5.70k | Instruction *InstCombinerImpl::foldICmpWithZextOrSext(ICmpInst &ICmp) { |
5552 | 5.70k | assert(isa<CastInst>(ICmp.getOperand(0)) && "Expected cast for operand 0"); |
5553 | 0 | auto *CastOp0 = cast<CastInst>(ICmp.getOperand(0)); |
5554 | 5.70k | Value *X; |
5555 | 5.70k | if (!match(CastOp0, m_ZExtOrSExt(m_Value(X)))) |
5556 | 5.15k | return nullptr; |
5557 | | |
5558 | 549 | bool IsSignedExt = CastOp0->getOpcode() == Instruction::SExt; |
5559 | 549 | bool IsSignedCmp = ICmp.isSigned(); |
5560 | | |
5561 | | // icmp Pred (ext X), (ext Y) |
5562 | 549 | Value *Y; |
5563 | 549 | if (match(ICmp.getOperand(1), m_ZExtOrSExt(m_Value(Y)))) { |
5564 | 166 | bool IsZext0 = isa<ZExtInst>(ICmp.getOperand(0)); |
5565 | 166 | bool IsZext1 = isa<ZExtInst>(ICmp.getOperand(1)); |
5566 | | |
5567 | 166 | if (IsZext0 != IsZext1) { |
5568 | | // If X and Y and both i1 |
5569 | | // (icmp eq/ne (zext X) (sext Y)) |
5570 | | // eq -> (icmp eq (or X, Y), 0) |
5571 | | // ne -> (icmp ne (or X, Y), 0) |
5572 | 43 | if (ICmp.isEquality() && X->getType()->isIntOrIntVectorTy(1) && |
5573 | 43 | Y->getType()->isIntOrIntVectorTy(1)) |
5574 | 4 | return new ICmpInst(ICmp.getPredicate(), Builder.CreateOr(X, Y), |
5575 | 4 | Constant::getNullValue(X->getType())); |
5576 | | |
5577 | | // If we have mismatched casts and zext has the nneg flag, we can |
5578 | | // treat the "zext nneg" as "sext". Otherwise, we cannot fold and quit. |
5579 | | |
5580 | 39 | auto *NonNegInst0 = dyn_cast<PossiblyNonNegInst>(ICmp.getOperand(0)); |
5581 | 39 | auto *NonNegInst1 = dyn_cast<PossiblyNonNegInst>(ICmp.getOperand(1)); |
5582 | | |
5583 | 39 | bool IsNonNeg0 = NonNegInst0 && NonNegInst0->hasNonNeg(); |
5584 | 39 | bool IsNonNeg1 = NonNegInst1 && NonNegInst1->hasNonNeg(); |
5585 | | |
5586 | 39 | if ((IsZext0 && IsNonNeg0) || (IsZext1 && IsNonNeg1)) |
5587 | 3 | IsSignedExt = true; |
5588 | 36 | else |
5589 | 36 | return nullptr; |
5590 | 39 | } |
5591 | | |
5592 | | // Not an extension from the same type? |
5593 | 126 | Type *XTy = X->getType(), *YTy = Y->getType(); |
5594 | 126 | if (XTy != YTy) { |
5595 | | // One of the casts must have one use because we are creating a new cast. |
5596 | 94 | if (!ICmp.getOperand(0)->hasOneUse() && !ICmp.getOperand(1)->hasOneUse()) |
5597 | 18 | return nullptr; |
5598 | | // Extend the narrower operand to the type of the wider operand. |
5599 | 76 | CastInst::CastOps CastOpcode = |
5600 | 76 | IsSignedExt ? Instruction::SExt : Instruction::ZExt; |
5601 | 76 | if (XTy->getScalarSizeInBits() < YTy->getScalarSizeInBits()) |
5602 | 22 | X = Builder.CreateCast(CastOpcode, X, YTy); |
5603 | 54 | else if (YTy->getScalarSizeInBits() < XTy->getScalarSizeInBits()) |
5604 | 54 | Y = Builder.CreateCast(CastOpcode, Y, XTy); |
5605 | 0 | else |
5606 | 0 | return nullptr; |
5607 | 76 | } |
5608 | | |
5609 | | // (zext X) == (zext Y) --> X == Y |
5610 | | // (sext X) == (sext Y) --> X == Y |
5611 | 108 | if (ICmp.isEquality()) |
5612 | 25 | return new ICmpInst(ICmp.getPredicate(), X, Y); |
5613 | | |
5614 | | // A signed comparison of sign extended values simplifies into a |
5615 | | // signed comparison. |
5616 | 83 | if (IsSignedCmp && IsSignedExt) |
5617 | 19 | return new ICmpInst(ICmp.getPredicate(), X, Y); |
5618 | | |
5619 | | // The other three cases all fold into an unsigned comparison. |
5620 | 64 | return new ICmpInst(ICmp.getUnsignedPredicate(), X, Y); |
5621 | 83 | } |
5622 | | |
5623 | | // Below here, we are only folding a compare with constant. |
5624 | 383 | auto *C = dyn_cast<Constant>(ICmp.getOperand(1)); |
5625 | 383 | if (!C) |
5626 | 6 | return nullptr; |
5627 | | |
5628 | | // If a lossless truncate is possible... |
5629 | 377 | Type *SrcTy = CastOp0->getSrcTy(); |
5630 | 377 | Constant *Res = getLosslessTrunc(C, SrcTy, CastOp0->getOpcode()); |
5631 | 377 | if (Res) { |
5632 | 357 | if (ICmp.isEquality()) |
5633 | 211 | return new ICmpInst(ICmp.getPredicate(), X, Res); |
5634 | | |
5635 | | // A signed comparison of sign extended values simplifies into a |
5636 | | // signed comparison. |
5637 | 146 | if (IsSignedExt && IsSignedCmp) |
5638 | 91 | return new ICmpInst(ICmp.getPredicate(), X, Res); |
5639 | | |
5640 | | // The other three cases all fold into an unsigned comparison. |
5641 | 55 | return new ICmpInst(ICmp.getUnsignedPredicate(), X, Res); |
5642 | 146 | } |
5643 | | |
5644 | | // The re-extended constant changed, partly changed (in the case of a vector), |
5645 | | // or could not be determined to be equal (in the case of a constant |
5646 | | // expression), so the constant cannot be represented in the shorter type. |
5647 | | // All the cases that fold to true or false will have already been handled |
5648 | | // by simplifyICmpInst, so only deal with the tricky case. |
5649 | 20 | if (IsSignedCmp || !IsSignedExt || !isa<ConstantInt>(C)) |
5650 | 8 | return nullptr; |
5651 | | |
5652 | | // Is source op positive? |
5653 | | // icmp ult (sext X), C --> icmp sgt X, -1 |
5654 | 12 | if (ICmp.getPredicate() == ICmpInst::ICMP_ULT) |
5655 | 10 | return new ICmpInst(CmpInst::ICMP_SGT, X, Constant::getAllOnesValue(SrcTy)); |
5656 | | |
5657 | | // Is source op negative? |
5658 | | // icmp ugt (sext X), C --> icmp slt X, 0 |
5659 | 2 | assert(ICmp.getPredicate() == ICmpInst::ICMP_UGT && "ICmp should be folded!"); |
5660 | 0 | return new ICmpInst(CmpInst::ICMP_SLT, X, Constant::getNullValue(SrcTy)); |
5661 | 12 | } |
5662 | | |
5663 | | /// Handle icmp (cast x), (cast or constant). |
5664 | 77.1k | Instruction *InstCombinerImpl::foldICmpWithCastOp(ICmpInst &ICmp) { |
5665 | | // If any operand of ICmp is a inttoptr roundtrip cast then remove it as |
5666 | | // icmp compares only pointer's value. |
5667 | | // icmp (inttoptr (ptrtoint p1)), p2 --> icmp p1, p2. |
5668 | 77.1k | Value *SimplifiedOp0 = simplifyIntToPtrRoundTripCast(ICmp.getOperand(0)); |
5669 | 77.1k | Value *SimplifiedOp1 = simplifyIntToPtrRoundTripCast(ICmp.getOperand(1)); |
5670 | 77.1k | if (SimplifiedOp0 || SimplifiedOp1) |
5671 | 0 | return new ICmpInst(ICmp.getPredicate(), |
5672 | 0 | SimplifiedOp0 ? SimplifiedOp0 : ICmp.getOperand(0), |
5673 | 0 | SimplifiedOp1 ? SimplifiedOp1 : ICmp.getOperand(1)); |
5674 | | |
5675 | 77.1k | auto *CastOp0 = dyn_cast<CastInst>(ICmp.getOperand(0)); |
5676 | 77.1k | if (!CastOp0) |
5677 | 70.8k | return nullptr; |
5678 | 6.24k | if (!isa<Constant>(ICmp.getOperand(1)) && !isa<CastInst>(ICmp.getOperand(1))) |
5679 | 356 | return nullptr; |
5680 | | |
5681 | 5.88k | Value *Op0Src = CastOp0->getOperand(0); |
5682 | 5.88k | Type *SrcTy = CastOp0->getSrcTy(); |
5683 | 5.88k | Type *DestTy = CastOp0->getDestTy(); |
5684 | | |
5685 | | // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the |
5686 | | // integer type is the same size as the pointer type. |
5687 | 5.88k | auto CompatibleSizes = [&](Type *SrcTy, Type *DestTy) { |
5688 | 69 | if (isa<VectorType>(SrcTy)) { |
5689 | 1 | SrcTy = cast<VectorType>(SrcTy)->getElementType(); |
5690 | 1 | DestTy = cast<VectorType>(DestTy)->getElementType(); |
5691 | 1 | } |
5692 | 69 | return DL.getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth(); |
5693 | 69 | }; |
5694 | 5.88k | if (CastOp0->getOpcode() == Instruction::PtrToInt && |
5695 | 5.88k | CompatibleSizes(SrcTy, DestTy)) { |
5696 | 69 | Value *NewOp1 = nullptr; |
5697 | 69 | if (auto *PtrToIntOp1 = dyn_cast<PtrToIntOperator>(ICmp.getOperand(1))) { |
5698 | 13 | Value *PtrSrc = PtrToIntOp1->getOperand(0); |
5699 | 13 | if (PtrSrc->getType() == Op0Src->getType()) |
5700 | 13 | NewOp1 = PtrToIntOp1->getOperand(0); |
5701 | 56 | } else if (auto *RHSC = dyn_cast<Constant>(ICmp.getOperand(1))) { |
5702 | 56 | NewOp1 = ConstantExpr::getIntToPtr(RHSC, SrcTy); |
5703 | 56 | } |
5704 | | |
5705 | 69 | if (NewOp1) |
5706 | 69 | return new ICmpInst(ICmp.getPredicate(), Op0Src, NewOp1); |
5707 | 69 | } |
5708 | | |
5709 | 5.81k | if (Instruction *R = foldICmpWithTrunc(ICmp)) |
5710 | 110 | return R; |
5711 | | |
5712 | 5.70k | return foldICmpWithZextOrSext(ICmp); |
5713 | 5.81k | } |
5714 | | |
5715 | 2.86k | static bool isNeutralValue(Instruction::BinaryOps BinaryOp, Value *RHS, bool IsSigned) { |
5716 | 2.86k | switch (BinaryOp) { |
5717 | 0 | default: |
5718 | 0 | llvm_unreachable("Unsupported binary op"); |
5719 | 1.57k | case Instruction::Add: |
5720 | 2.46k | case Instruction::Sub: |
5721 | 2.46k | return match(RHS, m_Zero()); |
5722 | 404 | case Instruction::Mul: |
5723 | 404 | return !(RHS->getType()->isIntOrIntVectorTy(1) && IsSigned) && |
5724 | 404 | match(RHS, m_One()); |
5725 | 2.86k | } |
5726 | 2.86k | } |
5727 | | |
5728 | | OverflowResult |
5729 | | InstCombinerImpl::computeOverflow(Instruction::BinaryOps BinaryOp, |
5730 | | bool IsSigned, Value *LHS, Value *RHS, |
5731 | 4.49k | Instruction *CxtI) const { |
5732 | 4.49k | switch (BinaryOp) { |
5733 | 0 | default: |
5734 | 0 | llvm_unreachable("Unsupported binary op"); |
5735 | 2.63k | case Instruction::Add: |
5736 | 2.63k | if (IsSigned) |
5737 | 1.28k | return computeOverflowForSignedAdd(LHS, RHS, CxtI); |
5738 | 1.35k | else |
5739 | 1.35k | return computeOverflowForUnsignedAdd(LHS, RHS, CxtI); |
5740 | 1.46k | case Instruction::Sub: |
5741 | 1.46k | if (IsSigned) |
5742 | 1.05k | return computeOverflowForSignedSub(LHS, RHS, CxtI); |
5743 | 404 | else |
5744 | 404 | return computeOverflowForUnsignedSub(LHS, RHS, CxtI); |
5745 | 395 | case Instruction::Mul: |
5746 | 395 | if (IsSigned) |
5747 | 89 | return computeOverflowForSignedMul(LHS, RHS, CxtI); |
5748 | 306 | else |
5749 | 306 | return computeOverflowForUnsignedMul(LHS, RHS, CxtI); |
5750 | 4.49k | } |
5751 | 4.49k | } |
5752 | | |
5753 | | bool InstCombinerImpl::OptimizeOverflowCheck(Instruction::BinaryOps BinaryOp, |
5754 | | bool IsSigned, Value *LHS, |
5755 | | Value *RHS, Instruction &OrigI, |
5756 | | Value *&Result, |
5757 | 2.86k | Constant *&Overflow) { |
5758 | 2.86k | if (OrigI.isCommutative() && isa<Constant>(LHS) && !isa<Constant>(RHS)) |
5759 | 0 | std::swap(LHS, RHS); |
5760 | | |
5761 | | // If the overflow check was an add followed by a compare, the insertion point |
5762 | | // may be pointing to the compare. We want to insert the new instructions |
5763 | | // before the add in case there are uses of the add between the add and the |
5764 | | // compare. |
5765 | 2.86k | Builder.SetInsertPoint(&OrigI); |
5766 | | |
5767 | 2.86k | Type *OverflowTy = Type::getInt1Ty(LHS->getContext()); |
5768 | 2.86k | if (auto *LHSTy = dyn_cast<VectorType>(LHS->getType())) |
5769 | 250 | OverflowTy = VectorType::get(OverflowTy, LHSTy->getElementCount()); |
5770 | | |
5771 | 2.86k | if (isNeutralValue(BinaryOp, RHS, IsSigned)) { |
5772 | 45 | Result = LHS; |
5773 | 45 | Overflow = ConstantInt::getFalse(OverflowTy); |
5774 | 45 | return true; |
5775 | 45 | } |
5776 | | |
5777 | 2.82k | switch (computeOverflow(BinaryOp, IsSigned, LHS, RHS, &OrigI)) { |
5778 | 2.65k | case OverflowResult::MayOverflow: |
5779 | 2.65k | return false; |
5780 | 16 | case OverflowResult::AlwaysOverflowsLow: |
5781 | 56 | case OverflowResult::AlwaysOverflowsHigh: |
5782 | 56 | Result = Builder.CreateBinOp(BinaryOp, LHS, RHS); |
5783 | 56 | Result->takeName(&OrigI); |
5784 | 56 | Overflow = ConstantInt::getTrue(OverflowTy); |
5785 | 56 | return true; |
5786 | 110 | case OverflowResult::NeverOverflows: |
5787 | 110 | Result = Builder.CreateBinOp(BinaryOp, LHS, RHS); |
5788 | 110 | Result->takeName(&OrigI); |
5789 | 110 | Overflow = ConstantInt::getFalse(OverflowTy); |
5790 | 110 | if (auto *Inst = dyn_cast<Instruction>(Result)) { |
5791 | 89 | if (IsSigned) |
5792 | 57 | Inst->setHasNoSignedWrap(); |
5793 | 32 | else |
5794 | 32 | Inst->setHasNoUnsignedWrap(); |
5795 | 89 | } |
5796 | 110 | return true; |
5797 | 2.82k | } |
5798 | | |
5799 | 0 | llvm_unreachable("Unexpected overflow result"); |
5800 | 0 | } |
5801 | | |
5802 | | /// Recognize and process idiom involving test for multiplication |
5803 | | /// overflow. |
5804 | | /// |
5805 | | /// The caller has matched a pattern of the form: |
5806 | | /// I = cmp u (mul(zext A, zext B), V |
5807 | | /// The function checks if this is a test for overflow and if so replaces |
5808 | | /// multiplication with call to 'mul.with.overflow' intrinsic. |
5809 | | /// |
5810 | | /// \param I Compare instruction. |
5811 | | /// \param MulVal Result of 'mult' instruction. It is one of the arguments of |
5812 | | /// the compare instruction. Must be of integer type. |
5813 | | /// \param OtherVal The other argument of compare instruction. |
5814 | | /// \returns Instruction which must replace the compare instruction, NULL if no |
5815 | | /// replacement required. |
5816 | | static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal, |
5817 | | const APInt *OtherVal, |
5818 | 53 | InstCombinerImpl &IC) { |
5819 | | // Don't bother doing this transformation for pointers, don't do it for |
5820 | | // vectors. |
5821 | 53 | if (!isa<IntegerType>(MulVal->getType())) |
5822 | 0 | return nullptr; |
5823 | | |
5824 | 53 | auto *MulInstr = dyn_cast<Instruction>(MulVal); |
5825 | 53 | if (!MulInstr) |
5826 | 0 | return nullptr; |
5827 | 53 | assert(MulInstr->getOpcode() == Instruction::Mul); |
5828 | | |
5829 | 0 | auto *LHS = cast<ZExtInst>(MulInstr->getOperand(0)), |
5830 | 53 | *RHS = cast<ZExtInst>(MulInstr->getOperand(1)); |
5831 | 53 | assert(LHS->getOpcode() == Instruction::ZExt); |
5832 | 0 | assert(RHS->getOpcode() == Instruction::ZExt); |
5833 | 0 | Value *A = LHS->getOperand(0), *B = RHS->getOperand(0); |
5834 | | |
5835 | | // Calculate type and width of the result produced by mul.with.overflow. |
5836 | 53 | Type *TyA = A->getType(), *TyB = B->getType(); |
5837 | 53 | unsigned WidthA = TyA->getPrimitiveSizeInBits(), |
5838 | 53 | WidthB = TyB->getPrimitiveSizeInBits(); |
5839 | 53 | unsigned MulWidth; |
5840 | 53 | Type *MulType; |
5841 | 53 | if (WidthB > WidthA) { |
5842 | 0 | MulWidth = WidthB; |
5843 | 0 | MulType = TyB; |
5844 | 53 | } else { |
5845 | 53 | MulWidth = WidthA; |
5846 | 53 | MulType = TyA; |
5847 | 53 | } |
5848 | | |
5849 | | // In order to replace the original mul with a narrower mul.with.overflow, |
5850 | | // all uses must ignore upper bits of the product. The number of used low |
5851 | | // bits must be not greater than the width of mul.with.overflow. |
5852 | 53 | if (MulVal->hasNUsesOrMore(2)) |
5853 | 83 | for (User *U : MulVal->users()) { |
5854 | 83 | if (U == &I) |
5855 | 38 | continue; |
5856 | 45 | if (TruncInst *TI = dyn_cast<TruncInst>(U)) { |
5857 | | // Check if truncation ignores bits above MulWidth. |
5858 | 5 | unsigned TruncWidth = TI->getType()->getPrimitiveSizeInBits(); |
5859 | 5 | if (TruncWidth > MulWidth) |
5860 | 0 | return nullptr; |
5861 | 40 | } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) { |
5862 | | // Check if AND ignores bits above MulWidth. |
5863 | 34 | if (BO->getOpcode() != Instruction::And) |
5864 | 8 | return nullptr; |
5865 | 26 | if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) { |
5866 | 13 | const APInt &CVal = CI->getValue(); |
5867 | 13 | if (CVal.getBitWidth() - CVal.countl_zero() > MulWidth) |
5868 | 11 | return nullptr; |
5869 | 13 | } else { |
5870 | | // In this case we could have the operand of the binary operation |
5871 | | // being defined in another block, and performing the replacement |
5872 | | // could break the dominance relation. |
5873 | 13 | return nullptr; |
5874 | 13 | } |
5875 | 26 | } else { |
5876 | | // Other uses prohibit this transformation. |
5877 | 6 | return nullptr; |
5878 | 6 | } |
5879 | 45 | } |
5880 | | |
5881 | | // Recognize patterns |
5882 | 15 | switch (I.getPredicate()) { |
5883 | 6 | case ICmpInst::ICMP_UGT: { |
5884 | | // Recognize pattern: |
5885 | | // mulval = mul(zext A, zext B) |
5886 | | // cmp ugt mulval, max |
5887 | 6 | APInt MaxVal = APInt::getMaxValue(MulWidth); |
5888 | 6 | MaxVal = MaxVal.zext(OtherVal->getBitWidth()); |
5889 | 6 | if (MaxVal.eq(*OtherVal)) |
5890 | 6 | break; // Recognized |
5891 | 0 | return nullptr; |
5892 | 6 | } |
5893 | | |
5894 | 3 | case ICmpInst::ICMP_ULT: { |
5895 | | // Recognize pattern: |
5896 | | // mulval = mul(zext A, zext B) |
5897 | | // cmp ule mulval, max + 1 |
5898 | 3 | APInt MaxVal = APInt::getOneBitSet(OtherVal->getBitWidth(), MulWidth); |
5899 | 3 | if (MaxVal.eq(*OtherVal)) |
5900 | 3 | break; // Recognized |
5901 | 0 | return nullptr; |
5902 | 3 | } |
5903 | | |
5904 | 6 | default: |
5905 | 6 | return nullptr; |
5906 | 15 | } |
5907 | | |
5908 | 9 | InstCombiner::BuilderTy &Builder = IC.Builder; |
5909 | 9 | Builder.SetInsertPoint(MulInstr); |
5910 | | |
5911 | | // Replace: mul(zext A, zext B) --> mul.with.overflow(A, B) |
5912 | 9 | Value *MulA = A, *MulB = B; |
5913 | 9 | if (WidthA < MulWidth) |
5914 | 0 | MulA = Builder.CreateZExt(A, MulType); |
5915 | 9 | if (WidthB < MulWidth) |
5916 | 0 | MulB = Builder.CreateZExt(B, MulType); |
5917 | 9 | Function *F = Intrinsic::getDeclaration( |
5918 | 9 | I.getModule(), Intrinsic::umul_with_overflow, MulType); |
5919 | 9 | CallInst *Call = Builder.CreateCall(F, {MulA, MulB}, "umul"); |
5920 | 9 | IC.addToWorklist(MulInstr); |
5921 | | |
5922 | | // If there are uses of mul result other than the comparison, we know that |
5923 | | // they are truncation or binary AND. Change them to use result of |
5924 | | // mul.with.overflow and adjust properly mask/size. |
5925 | 9 | if (MulVal->hasNUsesOrMore(2)) { |
5926 | 5 | Value *Mul = Builder.CreateExtractValue(Call, 0, "umul.value"); |
5927 | 10 | for (User *U : make_early_inc_range(MulVal->users())) { |
5928 | 10 | if (U == &I) |
5929 | 5 | continue; |
5930 | 5 | if (TruncInst *TI = dyn_cast<TruncInst>(U)) { |
5931 | 5 | if (TI->getType()->getPrimitiveSizeInBits() == MulWidth) |
5932 | 3 | IC.replaceInstUsesWith(*TI, Mul); |
5933 | 2 | else |
5934 | 2 | TI->setOperand(0, Mul); |
5935 | 5 | } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) { |
5936 | 0 | assert(BO->getOpcode() == Instruction::And); |
5937 | | // Replace (mul & mask) --> zext (mul.with.overflow & short_mask) |
5938 | 0 | ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1)); |
5939 | 0 | APInt ShortMask = CI->getValue().trunc(MulWidth); |
5940 | 0 | Value *ShortAnd = Builder.CreateAnd(Mul, ShortMask); |
5941 | 0 | Value *Zext = Builder.CreateZExt(ShortAnd, BO->getType()); |
5942 | 0 | IC.replaceInstUsesWith(*BO, Zext); |
5943 | 0 | } else { |
5944 | 0 | llvm_unreachable("Unexpected Binary operation"); |
5945 | 0 | } |
5946 | 0 | IC.addToWorklist(cast<Instruction>(U)); |
5947 | 5 | } |
5948 | 5 | } |
5949 | | |
5950 | | // The original icmp gets replaced with the overflow value, maybe inverted |
5951 | | // depending on predicate. |
5952 | 9 | if (I.getPredicate() == ICmpInst::ICMP_ULT) { |
5953 | 3 | Value *Res = Builder.CreateExtractValue(Call, 1); |
5954 | 3 | return BinaryOperator::CreateNot(Res); |
5955 | 3 | } |
5956 | | |
5957 | 6 | return ExtractValueInst::Create(Call, 1); |
5958 | 9 | } |
5959 | | |
5960 | | /// When performing a comparison against a constant, it is possible that not all |
5961 | | /// the bits in the LHS are demanded. This helper method computes the mask that |
5962 | | /// IS demanded. |
5963 | 92.0k | static APInt getDemandedBitsLHSMask(ICmpInst &I, unsigned BitWidth) { |
5964 | 92.0k | const APInt *RHS; |
5965 | 92.0k | if (!match(I.getOperand(1), m_APInt(RHS))) |
5966 | 35.4k | return APInt::getAllOnes(BitWidth); |
5967 | | |
5968 | | // If this is a normal comparison, it demands all bits. If it is a sign bit |
5969 | | // comparison, it only demands the sign bit. |
5970 | 56.6k | bool UnusedBit; |
5971 | 56.6k | if (InstCombiner::isSignBitCheck(I.getPredicate(), *RHS, UnusedBit)) |
5972 | 7.56k | return APInt::getSignMask(BitWidth); |
5973 | | |
5974 | 49.0k | switch (I.getPredicate()) { |
5975 | | // For a UGT comparison, we don't care about any bits that |
5976 | | // correspond to the trailing ones of the comparand. The value of these |
5977 | | // bits doesn't impact the outcome of the comparison, because any value |
5978 | | // greater than the RHS must differ in a bit higher than these due to carry. |
5979 | 4.24k | case ICmpInst::ICMP_UGT: |
5980 | 4.24k | return APInt::getBitsSetFrom(BitWidth, RHS->countr_one()); |
5981 | | |
5982 | | // Similarly, for a ULT comparison, we don't care about the trailing zeros. |
5983 | | // Any value less than the RHS must differ in a higher bit because of carries. |
5984 | 6.67k | case ICmpInst::ICMP_ULT: |
5985 | 6.67k | return APInt::getBitsSetFrom(BitWidth, RHS->countr_zero()); |
5986 | | |
5987 | 38.1k | default: |
5988 | 38.1k | return APInt::getAllOnes(BitWidth); |
5989 | 49.0k | } |
5990 | 49.0k | } |
5991 | | |
5992 | | /// Check that one use is in the same block as the definition and all |
5993 | | /// other uses are in blocks dominated by a given block. |
5994 | | /// |
5995 | | /// \param DI Definition |
5996 | | /// \param UI Use |
5997 | | /// \param DB Block that must dominate all uses of \p DI outside |
5998 | | /// the parent block |
5999 | | /// \return true when \p UI is the only use of \p DI in the parent block |
6000 | | /// and all other uses of \p DI are in blocks dominated by \p DB. |
6001 | | /// |
6002 | | bool InstCombinerImpl::dominatesAllUses(const Instruction *DI, |
6003 | | const Instruction *UI, |
6004 | 49 | const BasicBlock *DB) const { |
6005 | 49 | assert(DI && UI && "Instruction not defined\n"); |
6006 | | // Ignore incomplete definitions. |
6007 | 49 | if (!DI->getParent()) |
6008 | 0 | return false; |
6009 | | // DI and UI must be in the same block. |
6010 | 49 | if (DI->getParent() != UI->getParent()) |
6011 | 2 | return false; |
6012 | | // Protect from self-referencing blocks. |
6013 | 47 | if (DI->getParent() == DB) |
6014 | 0 | return false; |
6015 | 96 | for (const User *U : DI->users()) { |
6016 | 96 | auto *Usr = cast<Instruction>(U); |
6017 | 96 | if (Usr != UI && !DT.dominates(DB, Usr->getParent())) |
6018 | 7 | return false; |
6019 | 96 | } |
6020 | 40 | return true; |
6021 | 47 | } |
6022 | | |
6023 | | /// Return true when the instruction sequence within a block is select-cmp-br. |
6024 | 634 | static bool isChainSelectCmpBranch(const SelectInst *SI) { |
6025 | 634 | const BasicBlock *BB = SI->getParent(); |
6026 | 634 | if (!BB) |
6027 | 0 | return false; |
6028 | 634 | auto *BI = dyn_cast_or_null<BranchInst>(BB->getTerminator()); |
6029 | 634 | if (!BI || BI->getNumSuccessors() != 2) |
6030 | 502 | return false; |
6031 | 132 | auto *IC = dyn_cast<ICmpInst>(BI->getCondition()); |
6032 | 132 | if (!IC || (IC->getOperand(0) != SI && IC->getOperand(1) != SI)) |
6033 | 42 | return false; |
6034 | 90 | return true; |
6035 | 132 | } |
6036 | | |
6037 | | /// True when a select result is replaced by one of its operands |
6038 | | /// in select-icmp sequence. This will eventually result in the elimination |
6039 | | /// of the select. |
6040 | | /// |
6041 | | /// \param SI Select instruction |
6042 | | /// \param Icmp Compare instruction |
6043 | | /// \param SIOpd Operand that replaces the select |
6044 | | /// |
6045 | | /// Notes: |
6046 | | /// - The replacement is global and requires dominator information |
6047 | | /// - The caller is responsible for the actual replacement |
6048 | | /// |
6049 | | /// Example: |
6050 | | /// |
6051 | | /// entry: |
6052 | | /// %4 = select i1 %3, %C* %0, %C* null |
6053 | | /// %5 = icmp eq %C* %4, null |
6054 | | /// br i1 %5, label %9, label %7 |
6055 | | /// ... |
6056 | | /// ; <label>:7 ; preds = %entry |
6057 | | /// %8 = getelementptr inbounds %C* %4, i64 0, i32 0 |
6058 | | /// ... |
6059 | | /// |
6060 | | /// can be transformed to |
6061 | | /// |
6062 | | /// %5 = icmp eq %C* %0, null |
6063 | | /// %6 = select i1 %3, i1 %5, i1 true |
6064 | | /// br i1 %6, label %9, label %7 |
6065 | | /// ... |
6066 | | /// ; <label>:7 ; preds = %entry |
6067 | | /// %8 = getelementptr inbounds %C* %0, i64 0, i32 0 // replace by %0! |
6068 | | /// |
6069 | | /// Similar when the first operand of the select is a constant or/and |
6070 | | /// the compare is for not equal rather than equal. |
6071 | | /// |
6072 | | /// NOTE: The function is only called when the select and compare constants |
6073 | | /// are equal, the optimization can work only for EQ predicates. This is not a |
6074 | | /// major restriction since a NE compare should be 'normalized' to an equal |
6075 | | /// compare, which usually happens in the combiner and test case |
6076 | | /// select-cmp-br.ll checks for it. |
6077 | | bool InstCombinerImpl::replacedSelectWithOperand(SelectInst *SI, |
6078 | | const ICmpInst *Icmp, |
6079 | 634 | const unsigned SIOpd) { |
6080 | 634 | assert((SIOpd == 1 || SIOpd == 2) && "Invalid select operand!"); |
6081 | 634 | if (isChainSelectCmpBranch(SI) && Icmp->getPredicate() == ICmpInst::ICMP_EQ) { |
6082 | 90 | BasicBlock *Succ = SI->getParent()->getTerminator()->getSuccessor(1); |
6083 | | // The check for the single predecessor is not the best that can be |
6084 | | // done. But it protects efficiently against cases like when SI's |
6085 | | // home block has two successors, Succ and Succ1, and Succ1 predecessor |
6086 | | // of Succ. Then SI can't be replaced by SIOpd because the use that gets |
6087 | | // replaced can be reached on either path. So the uniqueness check |
6088 | | // guarantees that the path all uses of SI (outside SI's parent) are on |
6089 | | // is disjoint from all other paths out of SI. But that information |
6090 | | // is more expensive to compute, and the trade-off here is in favor |
6091 | | // of compile-time. It should also be noticed that we check for a single |
6092 | | // predecessor and not only uniqueness. This to handle the situation when |
6093 | | // Succ and Succ1 points to the same basic block. |
6094 | 90 | if (Succ->getSinglePredecessor() && dominatesAllUses(SI, Icmp, Succ)) { |
6095 | 40 | NumSel++; |
6096 | 40 | SI->replaceUsesOutsideBlock(SI->getOperand(SIOpd), SI->getParent()); |
6097 | 40 | return true; |
6098 | 40 | } |
6099 | 90 | } |
6100 | 594 | return false; |
6101 | 634 | } |
6102 | | |
6103 | | /// Try to fold the comparison based on range information we can get by checking |
6104 | | /// whether bits are known to be zero or one in the inputs. |
6105 | 92.0k | Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) { |
6106 | 92.0k | Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); |
6107 | 92.0k | Type *Ty = Op0->getType(); |
6108 | 92.0k | ICmpInst::Predicate Pred = I.getPredicate(); |
6109 | | |
6110 | | // Get scalar or pointer size. |
6111 | 92.0k | unsigned BitWidth = Ty->isIntOrIntVectorTy() |
6112 | 92.0k | ? Ty->getScalarSizeInBits() |
6113 | 92.0k | : DL.getPointerTypeSizeInBits(Ty->getScalarType()); |
6114 | | |
6115 | 92.0k | if (!BitWidth) |
6116 | 0 | return nullptr; |
6117 | | |
6118 | 92.0k | KnownBits Op0Known(BitWidth); |
6119 | 92.0k | KnownBits Op1Known(BitWidth); |
6120 | | |
6121 | 92.0k | { |
6122 | | // Don't use dominating conditions when folding icmp using known bits. This |
6123 | | // may convert signed into unsigned predicates in ways that other passes |
6124 | | // (especially IndVarSimplify) may not be able to reliably undo. |
6125 | 92.0k | SQ.DC = nullptr; |
6126 | 92.0k | auto _ = make_scope_exit([&]() { SQ.DC = &DC; }); |
6127 | 92.0k | if (SimplifyDemandedBits(&I, 0, getDemandedBitsLHSMask(I, BitWidth), |
6128 | 92.0k | Op0Known, 0)) |
6129 | 154 | return &I; |
6130 | | |
6131 | 91.9k | if (SimplifyDemandedBits(&I, 1, APInt::getAllOnes(BitWidth), Op1Known, 0)) |
6132 | 17 | return &I; |
6133 | 91.9k | } |
6134 | | |
6135 | | // Given the known and unknown bits, compute a range that the LHS could be |
6136 | | // in. Compute the Min, Max and RHS values based on the known bits. For the |
6137 | | // EQ and NE we use unsigned values. |
6138 | 91.9k | APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0); |
6139 | 91.9k | APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0); |
6140 | 91.9k | if (I.isSigned()) { |
6141 | 28.4k | Op0Min = Op0Known.getSignedMinValue(); |
6142 | 28.4k | Op0Max = Op0Known.getSignedMaxValue(); |
6143 | 28.4k | Op1Min = Op1Known.getSignedMinValue(); |
6144 | 28.4k | Op1Max = Op1Known.getSignedMaxValue(); |
6145 | 63.5k | } else { |
6146 | 63.5k | Op0Min = Op0Known.getMinValue(); |
6147 | 63.5k | Op0Max = Op0Known.getMaxValue(); |
6148 | 63.5k | Op1Min = Op1Known.getMinValue(); |
6149 | 63.5k | Op1Max = Op1Known.getMaxValue(); |
6150 | 63.5k | } |
6151 | | |
6152 | | // If Min and Max are known to be the same, then SimplifyDemandedBits figured |
6153 | | // out that the LHS or RHS is a constant. Constant fold this now, so that |
6154 | | // code below can assume that Min != Max. |
6155 | 91.9k | if (!isa<Constant>(Op0) && Op0Min == Op0Max) |
6156 | 5 | return new ICmpInst(Pred, ConstantExpr::getIntegerValue(Ty, Op0Min), Op1); |
6157 | 91.9k | if (!isa<Constant>(Op1) && Op1Min == Op1Max) |
6158 | 0 | return new ICmpInst(Pred, Op0, ConstantExpr::getIntegerValue(Ty, Op1Min)); |
6159 | | |
6160 | | // Don't break up a clamp pattern -- (min(max X, Y), Z) -- by replacing a |
6161 | | // min/max canonical compare with some other compare. That could lead to |
6162 | | // conflict with select canonicalization and infinite looping. |
6163 | | // FIXME: This constraint may go away if min/max intrinsics are canonical. |
6164 | 91.9k | auto isMinMaxCmp = [&](Instruction &Cmp) { |
6165 | 91.9k | if (!Cmp.hasOneUse()) |
6166 | 18.3k | return false; |
6167 | 73.5k | Value *A, *B; |
6168 | 73.5k | SelectPatternFlavor SPF = matchSelectPattern(Cmp.user_back(), A, B).Flavor; |
6169 | 73.5k | if (!SelectPatternResult::isMinOrMax(SPF)) |
6170 | 69.1k | return false; |
6171 | 4.43k | return match(Op0, m_MaxOrMin(m_Value(), m_Value())) || |
6172 | 4.43k | match(Op1, m_MaxOrMin(m_Value(), m_Value())); |
6173 | 73.5k | }; |
6174 | 91.9k | if (!isMinMaxCmp(I)) { |
6175 | 91.0k | switch (Pred) { |
6176 | 47.1k | default: |
6177 | 47.1k | break; |
6178 | 47.1k | case ICmpInst::ICMP_ULT: { |
6179 | 11.4k | if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B) |
6180 | 404 | return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); |
6181 | 11.0k | const APInt *CmpC; |
6182 | 11.0k | if (match(Op1, m_APInt(CmpC))) { |
6183 | | // A <u C -> A == C-1 if min(A)+1 == C |
6184 | 6.46k | if (*CmpC == Op0Min + 1) |
6185 | 551 | return new ICmpInst(ICmpInst::ICMP_EQ, Op0, |
6186 | 551 | ConstantInt::get(Op1->getType(), *CmpC - 1)); |
6187 | | // X <u C --> X == 0, if the number of zero bits in the bottom of X |
6188 | | // exceeds the log2 of C. |
6189 | 5.91k | if (Op0Known.countMinTrailingZeros() >= CmpC->ceilLogBase2()) |
6190 | 48 | return new ICmpInst(ICmpInst::ICMP_EQ, Op0, |
6191 | 48 | Constant::getNullValue(Op1->getType())); |
6192 | 5.91k | } |
6193 | 10.4k | break; |
6194 | 11.0k | } |
6195 | 10.4k | case ICmpInst::ICMP_UGT: { |
6196 | 8.08k | if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B) |
6197 | 414 | return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); |
6198 | 7.66k | const APInt *CmpC; |
6199 | 7.66k | if (match(Op1, m_APInt(CmpC))) { |
6200 | | // A >u C -> A == C+1 if max(a)-1 == C |
6201 | 4.28k | if (*CmpC == Op0Max - 1) |
6202 | 492 | return new ICmpInst(ICmpInst::ICMP_EQ, Op0, |
6203 | 492 | ConstantInt::get(Op1->getType(), *CmpC + 1)); |
6204 | | // X >u C --> X != 0, if the number of zero bits in the bottom of X |
6205 | | // exceeds the log2 of C. |
6206 | 3.79k | if (Op0Known.countMinTrailingZeros() >= CmpC->getActiveBits()) |
6207 | 62 | return new ICmpInst(ICmpInst::ICMP_NE, Op0, |
6208 | 62 | Constant::getNullValue(Op1->getType())); |
6209 | 3.79k | } |
6210 | 7.11k | break; |
6211 | 7.66k | } |
6212 | 12.9k | case ICmpInst::ICMP_SLT: { |
6213 | 12.9k | if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B) |
6214 | 143 | return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); |
6215 | 12.7k | const APInt *CmpC; |
6216 | 12.7k | if (match(Op1, m_APInt(CmpC))) { |
6217 | 8.70k | if (*CmpC == Op0Min + 1) // A <s C -> A == C-1 if min(A)+1 == C |
6218 | 164 | return new ICmpInst(ICmpInst::ICMP_EQ, Op0, |
6219 | 164 | ConstantInt::get(Op1->getType(), *CmpC - 1)); |
6220 | 8.70k | } |
6221 | 12.6k | break; |
6222 | 12.7k | } |
6223 | 12.6k | case ICmpInst::ICMP_SGT: { |
6224 | 11.4k | if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B) |
6225 | 303 | return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); |
6226 | 11.1k | const APInt *CmpC; |
6227 | 11.1k | if (match(Op1, m_APInt(CmpC))) { |
6228 | 8.16k | if (*CmpC == Op0Max - 1) // A >s C -> A == C+1 if max(A)-1 == C |
6229 | 137 | return new ICmpInst(ICmpInst::ICMP_EQ, Op0, |
6230 | 137 | ConstantInt::get(Op1->getType(), *CmpC + 1)); |
6231 | 8.16k | } |
6232 | 11.0k | break; |
6233 | 11.1k | } |
6234 | 91.0k | } |
6235 | 91.0k | } |
6236 | | |
6237 | | // Based on the range information we know about the LHS, see if we can |
6238 | | // simplify this comparison. For example, (x&4) < 8 is always true. |
6239 | 89.1k | switch (Pred) { |
6240 | 0 | default: |
6241 | 0 | llvm_unreachable("Unknown icmp opcode!"); |
6242 | 23.1k | case ICmpInst::ICMP_EQ: |
6243 | 39.2k | case ICmpInst::ICMP_NE: { |
6244 | 39.2k | if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) |
6245 | 1 | return replaceInstUsesWith( |
6246 | 1 | I, ConstantInt::getBool(I.getType(), Pred == CmpInst::ICMP_NE)); |
6247 | | |
6248 | | // If all bits are known zero except for one, then we know at most one bit |
6249 | | // is set. If the comparison is against zero, then this is a check to see if |
6250 | | // *that* bit is set. |
6251 | 39.2k | APInt Op0KnownZeroInverted = ~Op0Known.Zero; |
6252 | 39.2k | if (Op1Known.isZero()) { |
6253 | | // If the LHS is an AND with the same constant, look through it. |
6254 | 15.8k | Value *LHS = nullptr; |
6255 | 15.8k | const APInt *LHSC; |
6256 | 15.8k | if (!match(Op0, m_And(m_Value(LHS), m_APInt(LHSC))) || |
6257 | 15.8k | *LHSC != Op0KnownZeroInverted) |
6258 | 11.3k | LHS = Op0; |
6259 | | |
6260 | 15.8k | Value *X; |
6261 | 15.8k | const APInt *C1; |
6262 | 15.8k | if (match(LHS, m_Shl(m_Power2(C1), m_Value(X)))) { |
6263 | 40 | Type *XTy = X->getType(); |
6264 | 40 | unsigned Log2C1 = C1->countr_zero(); |
6265 | 40 | APInt C2 = Op0KnownZeroInverted; |
6266 | 40 | APInt C2Pow2 = (C2 & ~(*C1 - 1)) + *C1; |
6267 | 40 | if (C2Pow2.isPowerOf2()) { |
6268 | | // iff (C1 is pow2) & ((C2 & ~(C1-1)) + C1) is pow2): |
6269 | | // ((C1 << X) & C2) == 0 -> X >= (Log2(C2+C1) - Log2(C1)) |
6270 | | // ((C1 << X) & C2) != 0 -> X < (Log2(C2+C1) - Log2(C1)) |
6271 | 13 | unsigned Log2C2 = C2Pow2.countr_zero(); |
6272 | 13 | auto *CmpC = ConstantInt::get(XTy, Log2C2 - Log2C1); |
6273 | 13 | auto NewPred = |
6274 | 13 | Pred == CmpInst::ICMP_EQ ? CmpInst::ICMP_UGE : CmpInst::ICMP_ULT; |
6275 | 13 | return new ICmpInst(NewPred, X, CmpC); |
6276 | 13 | } |
6277 | 40 | } |
6278 | 15.8k | } |
6279 | | |
6280 | | // Op0 eq C_Pow2 -> Op0 ne 0 if Op0 is known to be C_Pow2 or zero. |
6281 | 39.2k | if (Op1Known.isConstant() && Op1Known.getConstant().isPowerOf2() && |
6282 | 39.2k | (Op0Known & Op1Known) == Op0Known) |
6283 | 205 | return new ICmpInst(CmpInst::getInversePredicate(Pred), Op0, |
6284 | 205 | ConstantInt::getNullValue(Op1->getType())); |
6285 | 39.0k | break; |
6286 | 39.2k | } |
6287 | 39.0k | case ICmpInst::ICMP_ULT: { |
6288 | 10.7k | if (Op0Max.ult(Op1Min)) // A <u B -> true if max(A) < min(B) |
6289 | 7 | return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); |
6290 | 10.6k | if (Op0Min.uge(Op1Max)) // A <u B -> false if min(A) >= max(B) |
6291 | 10 | return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); |
6292 | 10.6k | break; |
6293 | 10.6k | } |
6294 | 10.6k | case ICmpInst::ICMP_UGT: { |
6295 | 7.27k | if (Op0Min.ugt(Op1Max)) // A >u B -> true if min(A) > max(B) |
6296 | 9 | return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); |
6297 | 7.26k | if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B) |
6298 | 19 | return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); |
6299 | 7.25k | break; |
6300 | 7.26k | } |
6301 | 12.7k | case ICmpInst::ICMP_SLT: { |
6302 | 12.7k | if (Op0Max.slt(Op1Min)) // A <s B -> true if max(A) < min(C) |
6303 | 18 | return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); |
6304 | 12.7k | if (Op0Min.sge(Op1Max)) // A <s B -> false if min(A) >= max(C) |
6305 | 19 | return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); |
6306 | 12.7k | break; |
6307 | 12.7k | } |
6308 | 12.7k | case ICmpInst::ICMP_SGT: { |
6309 | 11.3k | if (Op0Min.sgt(Op1Max)) // A >s B -> true if min(A) > max(B) |
6310 | 10 | return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); |
6311 | 11.2k | if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B) |
6312 | 42 | return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); |
6313 | 11.2k | break; |
6314 | 11.2k | } |
6315 | 11.2k | case ICmpInst::ICMP_SGE: |
6316 | 1.85k | assert(!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!"); |
6317 | 1.85k | if (Op0Min.sge(Op1Max)) // A >=s B -> true if min(A) >= max(B) |
6318 | 13 | return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); |
6319 | 1.83k | if (Op0Max.slt(Op1Min)) // A >=s B -> false if max(A) < min(B) |
6320 | 2 | return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); |
6321 | 1.83k | if (Op1Min == Op0Max) // A >=s B -> A == B if max(A) == min(B) |
6322 | 2 | return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1); |
6323 | 1.83k | break; |
6324 | 1.83k | case ICmpInst::ICMP_SLE: |
6325 | 1.73k | assert(!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!"); |
6326 | 1.73k | if (Op0Max.sle(Op1Min)) // A <=s B -> true if max(A) <= min(B) |
6327 | 4 | return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); |
6328 | 1.73k | if (Op0Min.sgt(Op1Max)) // A <=s B -> false if min(A) > max(B) |
6329 | 7 | return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); |
6330 | 1.72k | if (Op1Max == Op0Min) // A <=s B -> A == B if min(A) == max(B) |
6331 | 3 | return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1); |
6332 | 1.72k | break; |
6333 | 2.31k | case ICmpInst::ICMP_UGE: |
6334 | 2.31k | assert(!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!"); |
6335 | 2.31k | if (Op0Min.uge(Op1Max)) // A >=u B -> true if min(A) >= max(B) |
6336 | 12 | return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); |
6337 | 2.30k | if (Op0Max.ult(Op1Min)) // A >=u B -> false if max(A) < min(B) |
6338 | 10 | return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); |
6339 | 2.29k | if (Op1Min == Op0Max) // A >=u B -> A == B if max(A) == min(B) |
6340 | 5 | return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1); |
6341 | 2.29k | break; |
6342 | 2.29k | case ICmpInst::ICMP_ULE: |
6343 | 1.99k | assert(!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!"); |
6344 | 1.99k | if (Op0Max.ule(Op1Min)) // A <=u B -> true if max(A) <= min(B) |
6345 | 15 | return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); |
6346 | 1.97k | if (Op0Min.ugt(Op1Max)) // A <=u B -> false if min(A) > max(B) |
6347 | 5 | return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); |
6348 | 1.97k | if (Op1Max == Op0Min) // A <=u B -> A == B if min(A) == max(B) |
6349 | 5 | return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1); |
6350 | 1.96k | break; |
6351 | 89.1k | } |
6352 | | |
6353 | | // Turn a signed comparison into an unsigned one if both operands are known to |
6354 | | // have the same sign. |
6355 | 88.7k | if (I.isSigned() && |
6356 | 88.7k | ((Op0Known.Zero.isNegative() && Op1Known.Zero.isNegative()) || |
6357 | 27.5k | (Op0Known.One.isNegative() && Op1Known.One.isNegative()))) |
6358 | 392 | return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1); |
6359 | | |
6360 | 88.3k | return nullptr; |
6361 | 88.7k | } |
6362 | | |
6363 | | /// If one operand of an icmp is effectively a bool (value range of {0,1}), |
6364 | | /// then try to reduce patterns based on that limit. |
6365 | 92.0k | Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) { |
6366 | 92.0k | Value *X, *Y; |
6367 | 92.0k | ICmpInst::Predicate Pred; |
6368 | | |
6369 | | // X must be 0 and bool must be true for "ULT": |
6370 | | // X <u (zext i1 Y) --> (X == 0) & Y |
6371 | 92.0k | if (match(&I, m_c_ICmp(Pred, m_Value(X), m_OneUse(m_ZExt(m_Value(Y))))) && |
6372 | 92.0k | Y->getType()->isIntOrIntVectorTy(1) && Pred == ICmpInst::ICMP_ULT) |
6373 | 12 | return BinaryOperator::CreateAnd(Builder.CreateIsNull(X), Y); |
6374 | | |
6375 | | // X must be 0 or bool must be true for "ULE": |
6376 | | // X <=u (sext i1 Y) --> (X == 0) | Y |
6377 | 92.0k | if (match(&I, m_c_ICmp(Pred, m_Value(X), m_OneUse(m_SExt(m_Value(Y))))) && |
6378 | 92.0k | Y->getType()->isIntOrIntVectorTy(1) && Pred == ICmpInst::ICMP_ULE) |
6379 | 0 | return BinaryOperator::CreateOr(Builder.CreateIsNull(X), Y); |
6380 | | |
6381 | | // icmp eq/ne X, (zext/sext (icmp eq/ne X, C)) |
6382 | 92.0k | ICmpInst::Predicate Pred1, Pred2; |
6383 | 92.0k | const APInt *C; |
6384 | 92.0k | Instruction *ExtI; |
6385 | 92.0k | if (match(&I, m_c_ICmp(Pred1, m_Value(X), |
6386 | 92.0k | m_CombineAnd(m_Instruction(ExtI), |
6387 | 92.0k | m_ZExtOrSExt(m_ICmp(Pred2, m_Deferred(X), |
6388 | 92.0k | m_APInt(C)))))) && |
6389 | 92.0k | ICmpInst::isEquality(Pred1) && ICmpInst::isEquality(Pred2)) { |
6390 | 11 | bool IsSExt = ExtI->getOpcode() == Instruction::SExt; |
6391 | 11 | bool HasOneUse = ExtI->hasOneUse() && ExtI->getOperand(0)->hasOneUse(); |
6392 | 11 | auto CreateRangeCheck = [&] { |
6393 | 1 | Value *CmpV1 = |
6394 | 1 | Builder.CreateICmp(Pred1, X, Constant::getNullValue(X->getType())); |
6395 | 1 | Value *CmpV2 = Builder.CreateICmp( |
6396 | 1 | Pred1, X, ConstantInt::getSigned(X->getType(), IsSExt ? -1 : 1)); |
6397 | 1 | return BinaryOperator::Create( |
6398 | 1 | Pred1 == ICmpInst::ICMP_EQ ? Instruction::Or : Instruction::And, |
6399 | 1 | CmpV1, CmpV2); |
6400 | 1 | }; |
6401 | 11 | if (C->isZero()) { |
6402 | 0 | if (Pred2 == ICmpInst::ICMP_EQ) { |
6403 | | // icmp eq X, (zext/sext (icmp eq X, 0)) --> false |
6404 | | // icmp ne X, (zext/sext (icmp eq X, 0)) --> true |
6405 | 0 | return replaceInstUsesWith( |
6406 | 0 | I, ConstantInt::getBool(I.getType(), Pred1 == ICmpInst::ICMP_NE)); |
6407 | 0 | } else if (!IsSExt || HasOneUse) { |
6408 | | // icmp eq X, (zext (icmp ne X, 0)) --> X == 0 || X == 1 |
6409 | | // icmp ne X, (zext (icmp ne X, 0)) --> X != 0 && X != 1 |
6410 | | // icmp eq X, (sext (icmp ne X, 0)) --> X == 0 || X == -1 |
6411 | | // icmp ne X, (sext (icmp ne X, 0)) --> X != 0 && X == -1 |
6412 | 0 | return CreateRangeCheck(); |
6413 | 0 | } |
6414 | 11 | } else if (IsSExt ? C->isAllOnes() : C->isOne()) { |
6415 | 5 | if (Pred2 == ICmpInst::ICMP_NE) { |
6416 | | // icmp eq X, (zext (icmp ne X, 1)) --> false |
6417 | | // icmp ne X, (zext (icmp ne X, 1)) --> true |
6418 | | // icmp eq X, (sext (icmp ne X, -1)) --> false |
6419 | | // icmp ne X, (sext (icmp ne X, -1)) --> true |
6420 | 4 | return replaceInstUsesWith( |
6421 | 4 | I, ConstantInt::getBool(I.getType(), Pred1 == ICmpInst::ICMP_NE)); |
6422 | 4 | } else if (!IsSExt || HasOneUse) { |
6423 | | // icmp eq X, (zext (icmp eq X, 1)) --> X == 0 || X == 1 |
6424 | | // icmp ne X, (zext (icmp eq X, 1)) --> X != 0 && X != 1 |
6425 | | // icmp eq X, (sext (icmp eq X, -1)) --> X == 0 || X == -1 |
6426 | | // icmp ne X, (sext (icmp eq X, -1)) --> X != 0 && X == -1 |
6427 | 1 | return CreateRangeCheck(); |
6428 | 1 | } |
6429 | 6 | } else { |
6430 | | // when C != 0 && C != 1: |
6431 | | // icmp eq X, (zext (icmp eq X, C)) --> icmp eq X, 0 |
6432 | | // icmp eq X, (zext (icmp ne X, C)) --> icmp eq X, 1 |
6433 | | // icmp ne X, (zext (icmp eq X, C)) --> icmp ne X, 0 |
6434 | | // icmp ne X, (zext (icmp ne X, C)) --> icmp ne X, 1 |
6435 | | // when C != 0 && C != -1: |
6436 | | // icmp eq X, (sext (icmp eq X, C)) --> icmp eq X, 0 |
6437 | | // icmp eq X, (sext (icmp ne X, C)) --> icmp eq X, -1 |
6438 | | // icmp ne X, (sext (icmp eq X, C)) --> icmp ne X, 0 |
6439 | | // icmp ne X, (sext (icmp ne X, C)) --> icmp ne X, -1 |
6440 | 6 | return ICmpInst::Create( |
6441 | 6 | Instruction::ICmp, Pred1, X, |
6442 | 6 | ConstantInt::getSigned(X->getType(), Pred2 == ICmpInst::ICMP_NE |
6443 | 6 | ? (IsSExt ? -1 : 1) |
6444 | 6 | : 0)); |
6445 | 6 | } |
6446 | 11 | } |
6447 | | |
6448 | 92.0k | return nullptr; |
6449 | 92.0k | } |
6450 | | |
6451 | | std::optional<std::pair<CmpInst::Predicate, Constant *>> |
6452 | | InstCombiner::getFlippedStrictnessPredicateAndConstant(CmpInst::Predicate Pred, |
6453 | 7.24k | Constant *C) { |
6454 | 7.24k | assert(ICmpInst::isRelational(Pred) && ICmpInst::isIntPredicate(Pred) && |
6455 | 7.24k | "Only for relational integer predicates."); |
6456 | | |
6457 | 0 | Type *Type = C->getType(); |
6458 | 7.24k | bool IsSigned = ICmpInst::isSigned(Pred); |
6459 | | |
6460 | 7.24k | CmpInst::Predicate UnsignedPred = ICmpInst::getUnsignedPredicate(Pred); |
6461 | 7.24k | bool WillIncrement = |
6462 | 7.24k | UnsignedPred == ICmpInst::ICMP_ULE || UnsignedPred == ICmpInst::ICMP_UGT; |
6463 | | |
6464 | | // Check if the constant operand can be safely incremented/decremented |
6465 | | // without overflowing/underflowing. |
6466 | 8.34k | auto ConstantIsOk = [WillIncrement, IsSigned](ConstantInt *C) { |
6467 | 8.34k | return WillIncrement ? !C->isMaxValue(IsSigned) : !C->isMinValue(IsSigned); |
6468 | 8.34k | }; |
6469 | | |
6470 | 7.24k | Constant *SafeReplacementConstant = nullptr; |
6471 | 7.24k | if (auto *CI = dyn_cast<ConstantInt>(C)) { |
6472 | | // Bail out if the constant can't be safely incremented/decremented. |
6473 | 6.62k | if (!ConstantIsOk(CI)) |
6474 | 0 | return std::nullopt; |
6475 | 6.62k | } else if (auto *FVTy = dyn_cast<FixedVectorType>(Type)) { |
6476 | 497 | unsigned NumElts = FVTy->getNumElements(); |
6477 | 2.21k | for (unsigned i = 0; i != NumElts; ++i) { |
6478 | 1.76k | Constant *Elt = C->getAggregateElement(i); |
6479 | 1.76k | if (!Elt) |
6480 | 7 | return std::nullopt; |
6481 | | |
6482 | 1.75k | if (isa<UndefValue>(Elt)) |
6483 | 28 | continue; |
6484 | | |
6485 | | // Bail out if we can't determine if this constant is min/max or if we |
6486 | | // know that this constant is min/max. |
6487 | 1.72k | auto *CI = dyn_cast<ConstantInt>(Elt); |
6488 | 1.72k | if (!CI || !ConstantIsOk(CI)) |
6489 | 35 | return std::nullopt; |
6490 | | |
6491 | 1.69k | if (!SafeReplacementConstant) |
6492 | 474 | SafeReplacementConstant = CI; |
6493 | 1.69k | } |
6494 | 497 | } else { |
6495 | | // ConstantExpr? |
6496 | 127 | return std::nullopt; |
6497 | 127 | } |
6498 | | |
6499 | | // It may not be safe to change a compare predicate in the presence of |
6500 | | // undefined elements, so replace those elements with the first safe constant |
6501 | | // that we found. |
6502 | | // TODO: in case of poison, it is safe; let's replace undefs only. |
6503 | 7.07k | if (C->containsUndefOrPoisonElement()) { |
6504 | 14 | assert(SafeReplacementConstant && "Replacement constant not set"); |
6505 | 0 | C = Constant::replaceUndefsWith(C, SafeReplacementConstant); |
6506 | 14 | } |
6507 | | |
6508 | 0 | CmpInst::Predicate NewPred = CmpInst::getFlippedStrictnessPredicate(Pred); |
6509 | | |
6510 | | // Increment or decrement the constant. |
6511 | 7.07k | Constant *OneOrNegOne = ConstantInt::get(Type, WillIncrement ? 1 : -1, true); |
6512 | 7.07k | Constant *NewC = ConstantExpr::getAdd(C, OneOrNegOne); |
6513 | | |
6514 | 7.07k | return std::make_pair(NewPred, NewC); |
6515 | 7.24k | } |
6516 | | |
6517 | | /// If we have an icmp le or icmp ge instruction with a constant operand, turn |
6518 | | /// it into the appropriate icmp lt or icmp gt instruction. This transform |
6519 | | /// allows them to be folded in visitICmpInst. |
6520 | 99.5k | static ICmpInst *canonicalizeCmpWithConstant(ICmpInst &I) { |
6521 | 99.5k | ICmpInst::Predicate Pred = I.getPredicate(); |
6522 | 99.5k | if (ICmpInst::isEquality(Pred) || !ICmpInst::isIntPredicate(Pred) || |
6523 | 99.5k | InstCombiner::isCanonicalPredicate(Pred)) |
6524 | 85.7k | return nullptr; |
6525 | | |
6526 | 13.8k | Value *Op0 = I.getOperand(0); |
6527 | 13.8k | Value *Op1 = I.getOperand(1); |
6528 | 13.8k | auto *Op1C = dyn_cast<Constant>(Op1); |
6529 | 13.8k | if (!Op1C) |
6530 | 8.12k | return nullptr; |
6531 | | |
6532 | 5.68k | auto FlippedStrictness = |
6533 | 5.68k | InstCombiner::getFlippedStrictnessPredicateAndConstant(Pred, Op1C); |
6534 | 5.68k | if (!FlippedStrictness) |
6535 | 165 | return nullptr; |
6536 | | |
6537 | 5.51k | return new ICmpInst(FlippedStrictness->first, Op0, FlippedStrictness->second); |
6538 | 5.68k | } |
6539 | | |
6540 | | /// If we have a comparison with a non-canonical predicate, if we can update |
6541 | | /// all the users, invert the predicate and adjust all the users. |
6542 | 94.0k | CmpInst *InstCombinerImpl::canonicalizeICmpPredicate(CmpInst &I) { |
6543 | | // Is the predicate already canonical? |
6544 | 94.0k | CmpInst::Predicate Pred = I.getPredicate(); |
6545 | 94.0k | if (InstCombiner::isCanonicalPredicate(Pred)) |
6546 | 68.1k | return nullptr; |
6547 | | |
6548 | | // Can all users be adjusted to predicate inversion? |
6549 | 25.9k | if (!InstCombiner::canFreelyInvertAllUsersOf(&I, /*IgnoredUser=*/nullptr)) |
6550 | 24.0k | return nullptr; |
6551 | | |
6552 | | // Ok, we can canonicalize comparison! |
6553 | | // Let's first invert the comparison's predicate. |
6554 | 1.91k | I.setPredicate(CmpInst::getInversePredicate(Pred)); |
6555 | 1.91k | I.setName(I.getName() + ".not"); |
6556 | | |
6557 | | // And, adapt users. |
6558 | 1.91k | freelyInvertAllUsersOf(&I); |
6559 | | |
6560 | 1.91k | return &I; |
6561 | 25.9k | } |
6562 | | |
6563 | | /// Integer compare with boolean values can always be turned into bitwise ops. |
6564 | | static Instruction *canonicalizeICmpBool(ICmpInst &I, |
6565 | 6.26k | InstCombiner::BuilderTy &Builder) { |
6566 | 6.26k | Value *A = I.getOperand(0), *B = I.getOperand(1); |
6567 | 6.26k | assert(A->getType()->isIntOrIntVectorTy(1) && "Bools only"); |
6568 | | |
6569 | | // A boolean compared to true/false can be simplified to Op0/true/false in |
6570 | | // 14 out of the 20 (10 predicates * 2 constants) possible combinations. |
6571 | | // Cases not handled by InstSimplify are always 'not' of Op0. |
6572 | 6.26k | if (match(B, m_Zero())) { |
6573 | 650 | switch (I.getPredicate()) { |
6574 | 248 | case CmpInst::ICMP_EQ: // A == 0 -> !A |
6575 | 428 | case CmpInst::ICMP_ULE: // A <=u 0 -> !A |
6576 | 650 | case CmpInst::ICMP_SGE: // A >=s 0 -> !A |
6577 | 650 | return BinaryOperator::CreateNot(A); |
6578 | 0 | default: |
6579 | 0 | llvm_unreachable("ICmp i1 X, C not simplified as expected."); |
6580 | 650 | } |
6581 | 5.61k | } else if (match(B, m_One())) { |
6582 | 541 | switch (I.getPredicate()) { |
6583 | 148 | case CmpInst::ICMP_NE: // A != 1 -> !A |
6584 | 363 | case CmpInst::ICMP_ULT: // A <u 1 -> !A |
6585 | 541 | case CmpInst::ICMP_SGT: // A >s -1 -> !A |
6586 | 541 | return BinaryOperator::CreateNot(A); |
6587 | 0 | default: |
6588 | 0 | llvm_unreachable("ICmp i1 X, C not simplified as expected."); |
6589 | 541 | } |
6590 | 541 | } |
6591 | | |
6592 | 5.07k | switch (I.getPredicate()) { |
6593 | 0 | default: |
6594 | 0 | llvm_unreachable("Invalid icmp instruction!"); |
6595 | 591 | case ICmpInst::ICMP_EQ: |
6596 | | // icmp eq i1 A, B -> ~(A ^ B) |
6597 | 591 | return BinaryOperator::CreateNot(Builder.CreateXor(A, B)); |
6598 | | |
6599 | 303 | case ICmpInst::ICMP_NE: |
6600 | | // icmp ne i1 A, B -> A ^ B |
6601 | 303 | return BinaryOperator::CreateXor(A, B); |
6602 | | |
6603 | 491 | case ICmpInst::ICMP_UGT: |
6604 | | // icmp ugt -> icmp ult |
6605 | 491 | std::swap(A, B); |
6606 | 491 | [[fallthrough]]; |
6607 | 1.08k | case ICmpInst::ICMP_ULT: |
6608 | | // icmp ult i1 A, B -> ~A & B |
6609 | 1.08k | return BinaryOperator::CreateAnd(Builder.CreateNot(A), B); |
6610 | | |
6611 | 559 | case ICmpInst::ICMP_SGT: |
6612 | | // icmp sgt -> icmp slt |
6613 | 559 | std::swap(A, B); |
6614 | 559 | [[fallthrough]]; |
6615 | 970 | case ICmpInst::ICMP_SLT: |
6616 | | // icmp slt i1 A, B -> A & ~B |
6617 | 970 | return BinaryOperator::CreateAnd(Builder.CreateNot(B), A); |
6618 | | |
6619 | 499 | case ICmpInst::ICMP_UGE: |
6620 | | // icmp uge -> icmp ule |
6621 | 499 | std::swap(A, B); |
6622 | 499 | [[fallthrough]]; |
6623 | 1.06k | case ICmpInst::ICMP_ULE: |
6624 | | // icmp ule i1 A, B -> ~A | B |
6625 | 1.06k | return BinaryOperator::CreateOr(Builder.CreateNot(A), B); |
6626 | | |
6627 | 506 | case ICmpInst::ICMP_SGE: |
6628 | | // icmp sge -> icmp sle |
6629 | 506 | std::swap(A, B); |
6630 | 506 | [[fallthrough]]; |
6631 | 1.06k | case ICmpInst::ICMP_SLE: |
6632 | | // icmp sle i1 A, B -> A | ~B |
6633 | 1.06k | return BinaryOperator::CreateOr(Builder.CreateNot(B), A); |
6634 | 5.07k | } |
6635 | 5.07k | } |
6636 | | |
6637 | | // Transform pattern like: |
6638 | | // (1 << Y) u<= X or ~(-1 << Y) u< X or ((1 << Y)+(-1)) u< X |
6639 | | // (1 << Y) u> X or ~(-1 << Y) u>= X or ((1 << Y)+(-1)) u>= X |
6640 | | // Into: |
6641 | | // (X l>> Y) != 0 |
6642 | | // (X l>> Y) == 0 |
6643 | | static Instruction *foldICmpWithHighBitMask(ICmpInst &Cmp, |
6644 | 75.8k | InstCombiner::BuilderTy &Builder) { |
6645 | 75.8k | ICmpInst::Predicate Pred, NewPred; |
6646 | 75.8k | Value *X, *Y; |
6647 | 75.8k | if (match(&Cmp, |
6648 | 75.8k | m_c_ICmp(Pred, m_OneUse(m_Shl(m_One(), m_Value(Y))), m_Value(X)))) { |
6649 | 132 | switch (Pred) { |
6650 | 52 | case ICmpInst::ICMP_ULE: |
6651 | 52 | NewPred = ICmpInst::ICMP_NE; |
6652 | 52 | break; |
6653 | 2 | case ICmpInst::ICMP_UGT: |
6654 | 2 | NewPred = ICmpInst::ICMP_EQ; |
6655 | 2 | break; |
6656 | 78 | default: |
6657 | 78 | return nullptr; |
6658 | 132 | } |
6659 | 75.6k | } else if (match(&Cmp, m_c_ICmp(Pred, |
6660 | 75.6k | m_OneUse(m_CombineOr( |
6661 | 75.6k | m_Not(m_Shl(m_AllOnes(), m_Value(Y))), |
6662 | 75.6k | m_Add(m_Shl(m_One(), m_Value(Y)), |
6663 | 75.6k | m_AllOnes()))), |
6664 | 75.6k | m_Value(X)))) { |
6665 | | // The variant with 'add' is not canonical, (the variant with 'not' is) |
6666 | | // we only get it because it has extra uses, and can't be canonicalized, |
6667 | | |
6668 | 8 | switch (Pred) { |
6669 | 7 | case ICmpInst::ICMP_ULT: |
6670 | 7 | NewPred = ICmpInst::ICMP_NE; |
6671 | 7 | break; |
6672 | 1 | case ICmpInst::ICMP_UGE: |
6673 | 1 | NewPred = ICmpInst::ICMP_EQ; |
6674 | 1 | break; |
6675 | 0 | default: |
6676 | 0 | return nullptr; |
6677 | 8 | } |
6678 | 8 | } else |
6679 | 75.6k | return nullptr; |
6680 | | |
6681 | 62 | Value *NewX = Builder.CreateLShr(X, Y, X->getName() + ".highbits"); |
6682 | 62 | Constant *Zero = Constant::getNullValue(NewX->getType()); |
6683 | 62 | return CmpInst::Create(Instruction::ICmp, NewPred, NewX, Zero); |
6684 | 75.8k | } |
6685 | | |
6686 | | static Instruction *foldVectorCmp(CmpInst &Cmp, |
6687 | 8.17k | InstCombiner::BuilderTy &Builder) { |
6688 | 8.17k | const CmpInst::Predicate Pred = Cmp.getPredicate(); |
6689 | 8.17k | Value *LHS = Cmp.getOperand(0), *RHS = Cmp.getOperand(1); |
6690 | 8.17k | Value *V1, *V2; |
6691 | | |
6692 | 8.17k | auto createCmpReverse = [&](CmpInst::Predicate Pred, Value *X, Value *Y) { |
6693 | 0 | Value *V = Builder.CreateCmp(Pred, X, Y, Cmp.getName()); |
6694 | 0 | if (auto *I = dyn_cast<Instruction>(V)) |
6695 | 0 | I->copyIRFlags(&Cmp); |
6696 | 0 | Module *M = Cmp.getModule(); |
6697 | 0 | Function *F = Intrinsic::getDeclaration( |
6698 | 0 | M, Intrinsic::experimental_vector_reverse, V->getType()); |
6699 | 0 | return CallInst::Create(F, V); |
6700 | 0 | }; |
6701 | | |
6702 | 8.17k | if (match(LHS, m_VecReverse(m_Value(V1)))) { |
6703 | | // cmp Pred, rev(V1), rev(V2) --> rev(cmp Pred, V1, V2) |
6704 | 0 | if (match(RHS, m_VecReverse(m_Value(V2))) && |
6705 | 0 | (LHS->hasOneUse() || RHS->hasOneUse())) |
6706 | 0 | return createCmpReverse(Pred, V1, V2); |
6707 | | |
6708 | | // cmp Pred, rev(V1), RHSSplat --> rev(cmp Pred, V1, RHSSplat) |
6709 | 0 | if (LHS->hasOneUse() && isSplatValue(RHS)) |
6710 | 0 | return createCmpReverse(Pred, V1, RHS); |
6711 | 0 | } |
6712 | | // cmp Pred, LHSSplat, rev(V2) --> rev(cmp Pred, LHSSplat, V2) |
6713 | 8.17k | else if (isSplatValue(LHS) && match(RHS, m_OneUse(m_VecReverse(m_Value(V2))))) |
6714 | 0 | return createCmpReverse(Pred, LHS, V2); |
6715 | | |
6716 | 8.17k | ArrayRef<int> M; |
6717 | 8.17k | if (!match(LHS, m_Shuffle(m_Value(V1), m_Undef(), m_Mask(M)))) |
6718 | 7.99k | return nullptr; |
6719 | | |
6720 | | // If both arguments of the cmp are shuffles that use the same mask and |
6721 | | // shuffle within a single vector, move the shuffle after the cmp: |
6722 | | // cmp (shuffle V1, M), (shuffle V2, M) --> shuffle (cmp V1, V2), M |
6723 | 179 | Type *V1Ty = V1->getType(); |
6724 | 179 | if (match(RHS, m_Shuffle(m_Value(V2), m_Undef(), m_SpecificMask(M))) && |
6725 | 179 | V1Ty == V2->getType() && (LHS->hasOneUse() || RHS->hasOneUse())) { |
6726 | 22 | Value *NewCmp = Builder.CreateCmp(Pred, V1, V2); |
6727 | 22 | return new ShuffleVectorInst(NewCmp, M); |
6728 | 22 | } |
6729 | | |
6730 | | // Try to canonicalize compare with splatted operand and splat constant. |
6731 | | // TODO: We could generalize this for more than splats. See/use the code in |
6732 | | // InstCombiner::foldVectorBinop(). |
6733 | 157 | Constant *C; |
6734 | 157 | if (!LHS->hasOneUse() || !match(RHS, m_Constant(C))) |
6735 | 82 | return nullptr; |
6736 | | |
6737 | | // Length-changing splats are ok, so adjust the constants as needed: |
6738 | | // cmp (shuffle V1, M), C --> shuffle (cmp V1, C'), M |
6739 | 75 | Constant *ScalarC = C->getSplatValue(/* AllowUndefs */ true); |
6740 | 75 | int MaskSplatIndex; |
6741 | 75 | if (ScalarC && match(M, m_SplatOrUndefMask(MaskSplatIndex))) { |
6742 | | // We allow undefs in matching, but this transform removes those for safety. |
6743 | | // Demanded elements analysis should be able to recover some/all of that. |
6744 | 54 | C = ConstantVector::getSplat(cast<VectorType>(V1Ty)->getElementCount(), |
6745 | 54 | ScalarC); |
6746 | 54 | SmallVector<int, 8> NewM(M.size(), MaskSplatIndex); |
6747 | 54 | Value *NewCmp = Builder.CreateCmp(Pred, V1, C); |
6748 | 54 | return new ShuffleVectorInst(NewCmp, NewM); |
6749 | 54 | } |
6750 | | |
6751 | 21 | return nullptr; |
6752 | 75 | } |
6753 | | |
6754 | | // extract(uadd.with.overflow(A, B), 0) ult A |
6755 | | // -> extract(uadd.with.overflow(A, B), 1) |
6756 | 75.8k | static Instruction *foldICmpOfUAddOv(ICmpInst &I) { |
6757 | 75.8k | CmpInst::Predicate Pred = I.getPredicate(); |
6758 | 75.8k | Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); |
6759 | | |
6760 | 75.8k | Value *UAddOv; |
6761 | 75.8k | Value *A, *B; |
6762 | 75.8k | auto UAddOvResultPat = m_ExtractValue<0>( |
6763 | 75.8k | m_Intrinsic<Intrinsic::uadd_with_overflow>(m_Value(A), m_Value(B))); |
6764 | 75.8k | if (match(Op0, UAddOvResultPat) && |
6765 | 75.8k | ((Pred == ICmpInst::ICMP_ULT && (Op1 == A || Op1 == B)) || |
6766 | 23 | (Pred == ICmpInst::ICMP_EQ && match(Op1, m_ZeroInt()) && |
6767 | 14 | (match(A, m_One()) || match(B, m_One()))) || |
6768 | 23 | (Pred == ICmpInst::ICMP_NE && match(Op1, m_AllOnes()) && |
6769 | 10 | (match(A, m_AllOnes()) || match(B, m_AllOnes()))))) |
6770 | | // extract(uadd.with.overflow(A, B), 0) < A |
6771 | | // extract(uadd.with.overflow(A, 1), 0) == 0 |
6772 | | // extract(uadd.with.overflow(A, -1), 0) != -1 |
6773 | 17 | UAddOv = cast<ExtractValueInst>(Op0)->getAggregateOperand(); |
6774 | 75.8k | else if (match(Op1, UAddOvResultPat) && |
6775 | 75.8k | Pred == ICmpInst::ICMP_UGT && (Op0 == A || Op0 == B)) |
6776 | | // A > extract(uadd.with.overflow(A, B), 0) |
6777 | 10 | UAddOv = cast<ExtractValueInst>(Op1)->getAggregateOperand(); |
6778 | 75.8k | else |
6779 | 75.8k | return nullptr; |
6780 | | |
6781 | 27 | return ExtractValueInst::Create(UAddOv, 1); |
6782 | 75.8k | } |
6783 | | |
6784 | 75.6k | static Instruction *foldICmpInvariantGroup(ICmpInst &I) { |
6785 | 75.6k | if (!I.getOperand(0)->getType()->isPointerTy() || |
6786 | 75.6k | NullPointerIsDefined( |
6787 | 3.84k | I.getParent()->getParent(), |
6788 | 71.8k | I.getOperand(0)->getType()->getPointerAddressSpace())) { |
6789 | 71.8k | return nullptr; |
6790 | 71.8k | } |
6791 | 3.83k | Instruction *Op; |
6792 | 3.83k | if (match(I.getOperand(0), m_Instruction(Op)) && |
6793 | 3.83k | match(I.getOperand(1), m_Zero()) && |
6794 | 3.83k | Op->isLaunderOrStripInvariantGroup()) { |
6795 | 0 | return ICmpInst::Create(Instruction::ICmp, I.getPredicate(), |
6796 | 0 | Op->getOperand(0), I.getOperand(1)); |
6797 | 0 | } |
6798 | 3.83k | return nullptr; |
6799 | 3.83k | } |
6800 | | |
6801 | | /// This function folds patterns produced by lowering of reduce idioms, such as |
6802 | | /// llvm.vector.reduce.and which are lowered into instruction chains. This code |
6803 | | /// attempts to generate fewer number of scalar comparisons instead of vector |
6804 | | /// comparisons when possible. |
6805 | | static Instruction *foldReductionIdiom(ICmpInst &I, |
6806 | | InstCombiner::BuilderTy &Builder, |
6807 | 75.6k | const DataLayout &DL) { |
6808 | 75.6k | if (I.getType()->isVectorTy()) |
6809 | 6.94k | return nullptr; |
6810 | 68.7k | ICmpInst::Predicate OuterPred, InnerPred; |
6811 | 68.7k | Value *LHS, *RHS; |
6812 | | |
6813 | | // Match lowering of @llvm.vector.reduce.and. Turn |
6814 | | /// %vec_ne = icmp ne <8 x i8> %lhs, %rhs |
6815 | | /// %scalar_ne = bitcast <8 x i1> %vec_ne to i8 |
6816 | | /// %res = icmp <pred> i8 %scalar_ne, 0 |
6817 | | /// |
6818 | | /// into |
6819 | | /// |
6820 | | /// %lhs.scalar = bitcast <8 x i8> %lhs to i64 |
6821 | | /// %rhs.scalar = bitcast <8 x i8> %rhs to i64 |
6822 | | /// %res = icmp <pred> i64 %lhs.scalar, %rhs.scalar |
6823 | | /// |
6824 | | /// for <pred> in {ne, eq}. |
6825 | 68.7k | if (!match(&I, m_ICmp(OuterPred, |
6826 | 68.7k | m_OneUse(m_BitCast(m_OneUse( |
6827 | 68.7k | m_ICmp(InnerPred, m_Value(LHS), m_Value(RHS))))), |
6828 | 68.7k | m_Zero()))) |
6829 | 68.6k | return nullptr; |
6830 | 62 | auto *LHSTy = dyn_cast<FixedVectorType>(LHS->getType()); |
6831 | 62 | if (!LHSTy || !LHSTy->getElementType()->isIntegerTy()) |
6832 | 0 | return nullptr; |
6833 | 62 | unsigned NumBits = |
6834 | 62 | LHSTy->getNumElements() * LHSTy->getElementType()->getIntegerBitWidth(); |
6835 | | // TODO: Relax this to "not wider than max legal integer type"? |
6836 | 62 | if (!DL.isLegalInteger(NumBits)) |
6837 | 32 | return nullptr; |
6838 | | |
6839 | 30 | if (ICmpInst::isEquality(OuterPred) && InnerPred == ICmpInst::ICMP_NE) { |
6840 | 4 | auto *ScalarTy = Builder.getIntNTy(NumBits); |
6841 | 4 | LHS = Builder.CreateBitCast(LHS, ScalarTy, LHS->getName() + ".scalar"); |
6842 | 4 | RHS = Builder.CreateBitCast(RHS, ScalarTy, RHS->getName() + ".scalar"); |
6843 | 4 | return ICmpInst::Create(Instruction::ICmp, OuterPred, LHS, RHS, |
6844 | 4 | I.getName()); |
6845 | 4 | } |
6846 | | |
6847 | 26 | return nullptr; |
6848 | 30 | } |
6849 | | |
6850 | | // This helper will be called with icmp operands in both orders. |
6851 | | Instruction *InstCombinerImpl::foldICmpCommutative(ICmpInst::Predicate Pred, |
6852 | | Value *Op0, Value *Op1, |
6853 | 155k | ICmpInst &CxtI) { |
6854 | | // Try to optimize 'icmp GEP, P' or 'icmp P, GEP'. |
6855 | 155k | if (auto *GEP = dyn_cast<GEPOperator>(Op0)) |
6856 | 674 | if (Instruction *NI = foldGEPICmp(GEP, Op1, Pred, CxtI)) |
6857 | 225 | return NI; |
6858 | | |
6859 | 155k | if (auto *SI = dyn_cast<SelectInst>(Op0)) |
6860 | 2.16k | if (Instruction *NI = foldSelectICmp(Pred, SI, Op1, CxtI)) |
6861 | 341 | return NI; |
6862 | | |
6863 | 155k | if (auto *MinMax = dyn_cast<MinMaxIntrinsic>(Op0)) |
6864 | 1.09k | if (Instruction *Res = foldICmpWithMinMax(CxtI, MinMax, Op1, Pred)) |
6865 | 190 | return Res; |
6866 | | |
6867 | 155k | { |
6868 | 155k | Value *X; |
6869 | 155k | const APInt *C; |
6870 | | // icmp X+Cst, X |
6871 | 155k | if (match(Op0, m_Add(m_Value(X), m_APInt(C))) && Op1 == X) |
6872 | 117 | return foldICmpAddOpConst(X, *C, Pred); |
6873 | 155k | } |
6874 | | |
6875 | | // abs(X) >= X --> true |
6876 | | // abs(X) u<= X --> true |
6877 | | // abs(X) < X --> false |
6878 | | // abs(X) u> X --> false |
6879 | | // abs(X) u>= X --> IsIntMinPosion ? `X > -1`: `X u<= INTMIN` |
6880 | | // abs(X) <= X --> IsIntMinPosion ? `X > -1`: `X u<= INTMIN` |
6881 | | // abs(X) == X --> IsIntMinPosion ? `X > -1`: `X u<= INTMIN` |
6882 | | // abs(X) u< X --> IsIntMinPosion ? `X < 0` : `X > INTMIN` |
6883 | | // abs(X) > X --> IsIntMinPosion ? `X < 0` : `X > INTMIN` |
6884 | | // abs(X) != X --> IsIntMinPosion ? `X < 0` : `X > INTMIN` |
6885 | 155k | { |
6886 | 155k | Value *X; |
6887 | 155k | Constant *C; |
6888 | 155k | if (match(Op0, m_Intrinsic<Intrinsic::abs>(m_Value(X), m_Constant(C))) && |
6889 | 155k | match(Op1, m_Specific(X))) { |
6890 | 7 | Value *NullValue = Constant::getNullValue(X->getType()); |
6891 | 7 | Value *AllOnesValue = Constant::getAllOnesValue(X->getType()); |
6892 | 7 | const APInt SMin = |
6893 | 7 | APInt::getSignedMinValue(X->getType()->getScalarSizeInBits()); |
6894 | 7 | bool IsIntMinPosion = C->isAllOnesValue(); |
6895 | 7 | switch (Pred) { |
6896 | 5 | case CmpInst::ICMP_ULE: |
6897 | 5 | case CmpInst::ICMP_SGE: |
6898 | 5 | return replaceInstUsesWith(CxtI, ConstantInt::getTrue(CxtI.getType())); |
6899 | 0 | case CmpInst::ICMP_UGT: |
6900 | 0 | case CmpInst::ICMP_SLT: |
6901 | 0 | return replaceInstUsesWith(CxtI, ConstantInt::getFalse(CxtI.getType())); |
6902 | 0 | case CmpInst::ICMP_UGE: |
6903 | 0 | case CmpInst::ICMP_SLE: |
6904 | 2 | case CmpInst::ICMP_EQ: { |
6905 | 2 | return replaceInstUsesWith( |
6906 | 2 | CxtI, IsIntMinPosion |
6907 | 2 | ? Builder.CreateICmpSGT(X, AllOnesValue) |
6908 | 2 | : Builder.CreateICmpULT( |
6909 | 2 | X, ConstantInt::get(X->getType(), SMin + 1))); |
6910 | 0 | } |
6911 | 0 | case CmpInst::ICMP_ULT: |
6912 | 0 | case CmpInst::ICMP_SGT: |
6913 | 0 | case CmpInst::ICMP_NE: { |
6914 | 0 | return replaceInstUsesWith( |
6915 | 0 | CxtI, IsIntMinPosion |
6916 | 0 | ? Builder.CreateICmpSLT(X, NullValue) |
6917 | 0 | : Builder.CreateICmpUGT( |
6918 | 0 | X, ConstantInt::get(X->getType(), SMin))); |
6919 | 0 | } |
6920 | 0 | default: |
6921 | 0 | llvm_unreachable("Invalid predicate!"); |
6922 | 7 | } |
6923 | 7 | } |
6924 | 155k | } |
6925 | | |
6926 | 155k | return nullptr; |
6927 | 155k | } |
6928 | | |
6929 | 116k | Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) { |
6930 | 116k | bool Changed = false; |
6931 | 116k | const SimplifyQuery Q = SQ.getWithInstruction(&I); |
6932 | 116k | Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); |
6933 | 116k | unsigned Op0Cplxity = getComplexity(Op0); |
6934 | 116k | unsigned Op1Cplxity = getComplexity(Op1); |
6935 | | |
6936 | | /// Orders the operands of the compare so that they are listed from most |
6937 | | /// complex to least complex. This puts constants before unary operators, |
6938 | | /// before binary operators. |
6939 | 116k | if (Op0Cplxity < Op1Cplxity) { |
6940 | 8.17k | I.swapOperands(); |
6941 | 8.17k | std::swap(Op0, Op1); |
6942 | 8.17k | Changed = true; |
6943 | 8.17k | } |
6944 | | |
6945 | 116k | if (Value *V = simplifyICmpInst(I.getPredicate(), Op0, Op1, Q)) |
6946 | 10.5k | return replaceInstUsesWith(I, V); |
6947 | | |
6948 | | // Comparing -val or val with non-zero is the same as just comparing val |
6949 | | // ie, abs(val) != 0 -> val != 0 |
6950 | 105k | if (I.getPredicate() == ICmpInst::ICMP_NE && match(Op1, m_Zero())) { |
6951 | 7.62k | Value *Cond, *SelectTrue, *SelectFalse; |
6952 | 7.62k | if (match(Op0, m_Select(m_Value(Cond), m_Value(SelectTrue), |
6953 | 7.62k | m_Value(SelectFalse)))) { |
6954 | 208 | if (Value *V = dyn_castNegVal(SelectTrue)) { |
6955 | 15 | if (V == SelectFalse) |
6956 | 0 | return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1); |
6957 | 15 | } |
6958 | 193 | else if (Value *V = dyn_castNegVal(SelectFalse)) { |
6959 | 88 | if (V == SelectTrue) |
6960 | 0 | return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1); |
6961 | 88 | } |
6962 | 208 | } |
6963 | 7.62k | } |
6964 | | |
6965 | 105k | if (Op0->getType()->isIntOrIntVectorTy(1)) |
6966 | 6.26k | if (Instruction *Res = canonicalizeICmpBool(I, Builder)) |
6967 | 6.26k | return Res; |
6968 | | |
6969 | 99.5k | if (Instruction *Res = canonicalizeCmpWithConstant(I)) |
6970 | 5.51k | return Res; |
6971 | | |
6972 | 94.0k | if (Instruction *Res = canonicalizeICmpPredicate(I)) |
6973 | 1.91k | return Res; |
6974 | | |
6975 | 92.1k | if (Instruction *Res = foldICmpWithConstant(I)) |
6976 | 39 | return Res; |
6977 | | |
6978 | 92.1k | if (Instruction *Res = foldICmpWithDominatingICmp(I)) |
6979 | 12 | return Res; |
6980 | | |
6981 | 92.0k | if (Instruction *Res = foldICmpUsingBoolRange(I)) |
6982 | 23 | return Res; |
6983 | | |
6984 | 92.0k | if (Instruction *Res = foldICmpUsingKnownBits(I)) |
6985 | 3.72k | return Res; |
6986 | | |
6987 | 88.3k | if (Instruction *Res = foldICmpTruncWithTruncOrExt(I, Q)) |
6988 | 40 | return Res; |
6989 | | |
6990 | | // Test if the ICmpInst instruction is used exclusively by a select as |
6991 | | // part of a minimum or maximum operation. If so, refrain from doing |
6992 | | // any other folding. This helps out other analyses which understand |
6993 | | // non-obfuscated minimum and maximum idioms, such as ScalarEvolution |
6994 | | // and CodeGen. And in this case, at least one of the comparison |
6995 | | // operands has at least one user besides the compare (the select), |
6996 | | // which would often largely negate the benefit of folding anyway. |
6997 | | // |
6998 | | // Do the same for the other patterns recognized by matchSelectPattern. |
6999 | 88.3k | if (I.hasOneUse()) |
7000 | 70.7k | if (SelectInst *SI = dyn_cast<SelectInst>(I.user_back())) { |
7001 | 19.2k | Value *A, *B; |
7002 | 19.2k | SelectPatternResult SPR = matchSelectPattern(SI, A, B); |
7003 | 19.2k | if (SPR.Flavor != SPF_UNKNOWN) |
7004 | 4.75k | return nullptr; |
7005 | 19.2k | } |
7006 | | |
7007 | | // Do this after checking for min/max to prevent infinite looping. |
7008 | 83.5k | if (Instruction *Res = foldICmpWithZero(I)) |
7009 | 85 | return Res; |
7010 | | |
7011 | | // FIXME: We only do this after checking for min/max to prevent infinite |
7012 | | // looping caused by a reverse canonicalization of these patterns for min/max. |
7013 | | // FIXME: The organization of folds is a mess. These would naturally go into |
7014 | | // canonicalizeCmpWithConstant(), but we can't move all of the above folds |
7015 | | // down here after the min/max restriction. |
7016 | 83.4k | ICmpInst::Predicate Pred = I.getPredicate(); |
7017 | 83.4k | const APInt *C; |
7018 | 83.4k | if (match(Op1, m_APInt(C))) { |
7019 | | // For i32: x >u 2147483647 -> x <s 0 -> true if sign bit set |
7020 | 51.3k | if (Pred == ICmpInst::ICMP_UGT && C->isMaxSignedValue()) { |
7021 | 443 | Constant *Zero = Constant::getNullValue(Op0->getType()); |
7022 | 443 | return new ICmpInst(ICmpInst::ICMP_SLT, Op0, Zero); |
7023 | 443 | } |
7024 | | |
7025 | | // For i32: x <u 2147483648 -> x >s -1 -> true if sign bit clear |
7026 | 50.8k | if (Pred == ICmpInst::ICMP_ULT && C->isMinSignedValue()) { |
7027 | 213 | Constant *AllOnes = Constant::getAllOnesValue(Op0->getType()); |
7028 | 213 | return new ICmpInst(ICmpInst::ICMP_SGT, Op0, AllOnes); |
7029 | 213 | } |
7030 | 50.8k | } |
7031 | | |
7032 | | // The folds in here may rely on wrapping flags and special constants, so |
7033 | | // they can break up min/max idioms in some cases but not seemingly similar |
7034 | | // patterns. |
7035 | | // FIXME: It may be possible to enhance select folding to make this |
7036 | | // unnecessary. It may also be moot if we canonicalize to min/max |
7037 | | // intrinsics. |
7038 | 82.8k | if (Instruction *Res = foldICmpBinOp(I, Q)) |
7039 | 1.12k | return Res; |
7040 | | |
7041 | 81.6k | if (Instruction *Res = foldICmpInstWithConstant(I)) |
7042 | 3.27k | return Res; |
7043 | | |
7044 | | // Try to match comparison as a sign bit test. Intentionally do this after |
7045 | | // foldICmpInstWithConstant() to potentially let other folds to happen first. |
7046 | 78.4k | if (Instruction *New = foldSignBitTest(I)) |
7047 | 0 | return New; |
7048 | | |
7049 | 78.4k | if (Instruction *Res = foldICmpInstWithConstantNotInt(I)) |
7050 | 77 | return Res; |
7051 | | |
7052 | 78.3k | if (Instruction *Res = foldICmpCommutative(I.getPredicate(), Op0, Op1, I)) |
7053 | 764 | return Res; |
7054 | 77.5k | if (Instruction *Res = |
7055 | 77.5k | foldICmpCommutative(I.getSwappedPredicate(), Op1, Op0, I)) |
7056 | 116 | return Res; |
7057 | | |
7058 | | // In case of a comparison with two select instructions having the same |
7059 | | // condition, check whether one of the resulting branches can be simplified. |
7060 | | // If so, just compare the other branch and select the appropriate result. |
7061 | | // For example: |
7062 | | // %tmp1 = select i1 %cmp, i32 %y, i32 %x |
7063 | | // %tmp2 = select i1 %cmp, i32 %z, i32 %x |
7064 | | // %cmp2 = icmp slt i32 %tmp2, %tmp1 |
7065 | | // The icmp will result false for the false value of selects and the result |
7066 | | // will depend upon the comparison of true values of selects if %cmp is |
7067 | | // true. Thus, transform this into: |
7068 | | // %cmp = icmp slt i32 %y, %z |
7069 | | // %sel = select i1 %cond, i1 %cmp, i1 false |
7070 | | // This handles similar cases to transform. |
7071 | 77.4k | { |
7072 | 77.4k | Value *Cond, *A, *B, *C, *D; |
7073 | 77.4k | if (match(Op0, m_Select(m_Value(Cond), m_Value(A), m_Value(B))) && |
7074 | 77.4k | match(Op1, m_Select(m_Specific(Cond), m_Value(C), m_Value(D))) && |
7075 | 77.4k | (Op0->hasOneUse() || Op1->hasOneUse())) { |
7076 | | // Check whether comparison of TrueValues can be simplified |
7077 | 11 | if (Value *Res = simplifyICmpInst(Pred, A, C, SQ)) { |
7078 | 9 | Value *NewICMP = Builder.CreateICmp(Pred, B, D); |
7079 | 9 | return SelectInst::Create(Cond, Res, NewICMP); |
7080 | 9 | } |
7081 | | // Check whether comparison of FalseValues can be simplified |
7082 | 2 | if (Value *Res = simplifyICmpInst(Pred, B, D, SQ)) { |
7083 | 0 | Value *NewICMP = Builder.CreateICmp(Pred, A, C); |
7084 | 0 | return SelectInst::Create(Cond, NewICMP, Res); |
7085 | 0 | } |
7086 | 2 | } |
7087 | 77.4k | } |
7088 | | |
7089 | | // Try to optimize equality comparisons against alloca-based pointers. |
7090 | 77.4k | if (Op0->getType()->isPointerTy() && I.isEquality()) { |
7091 | 2.20k | assert(Op1->getType()->isPointerTy() && "Comparing pointer with non-pointer?"); |
7092 | 2.20k | if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(Op0))) |
7093 | 25 | if (foldAllocaCmp(Alloca)) |
7094 | 1 | return nullptr; |
7095 | 2.20k | if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(Op1))) |
7096 | 22 | if (foldAllocaCmp(Alloca)) |
7097 | 6 | return nullptr; |
7098 | 2.20k | } |
7099 | | |
7100 | 77.4k | if (Instruction *Res = foldICmpBitCast(I)) |
7101 | 324 | return Res; |
7102 | | |
7103 | | // TODO: Hoist this above the min/max bailout. |
7104 | 77.1k | if (Instruction *R = foldICmpWithCastOp(I)) |
7105 | 660 | return R; |
7106 | | |
7107 | 76.4k | { |
7108 | 76.4k | Value *X, *Y; |
7109 | | // Transform (X & ~Y) == 0 --> (X & Y) != 0 |
7110 | | // and (X & ~Y) != 0 --> (X & Y) == 0 |
7111 | | // if A is a power of 2. |
7112 | 76.4k | if (match(Op0, m_And(m_Value(X), m_Not(m_Value(Y)))) && |
7113 | 76.4k | match(Op1, m_Zero()) && isKnownToBeAPowerOfTwo(X, false, 0, &I) && |
7114 | 76.4k | I.isEquality()) |
7115 | 2 | return new ICmpInst(I.getInversePredicate(), Builder.CreateAnd(X, Y), |
7116 | 2 | Op1); |
7117 | | |
7118 | | // Op0 pred Op1 -> ~Op1 pred ~Op0, if this allows us to drop an instruction. |
7119 | 76.4k | if (Op0->getType()->isIntOrIntVectorTy()) { |
7120 | 72.4k | bool ConsumesOp0, ConsumesOp1; |
7121 | 72.4k | if (isFreeToInvert(Op0, Op0->hasOneUse(), ConsumesOp0) && |
7122 | 72.4k | isFreeToInvert(Op1, Op1->hasOneUse(), ConsumesOp1) && |
7123 | 72.4k | (ConsumesOp0 || ConsumesOp1)) { |
7124 | 200 | Value *InvOp0 = getFreelyInverted(Op0, Op0->hasOneUse(), &Builder); |
7125 | 200 | Value *InvOp1 = getFreelyInverted(Op1, Op1->hasOneUse(), &Builder); |
7126 | 200 | assert(InvOp0 && InvOp1 && |
7127 | 200 | "Mismatch between isFreeToInvert and getFreelyInverted"); |
7128 | 0 | return new ICmpInst(I.getSwappedPredicate(), InvOp0, InvOp1); |
7129 | 200 | } |
7130 | 72.4k | } |
7131 | | |
7132 | 76.2k | Instruction *AddI = nullptr; |
7133 | 76.2k | if (match(&I, m_UAddWithOverflow(m_Value(X), m_Value(Y), |
7134 | 76.2k | m_Instruction(AddI))) && |
7135 | 76.2k | isa<IntegerType>(X->getType())) { |
7136 | 474 | Value *Result; |
7137 | 474 | Constant *Overflow; |
7138 | | // m_UAddWithOverflow can match patterns that do not include an explicit |
7139 | | // "add" instruction, so check the opcode of the matched op. |
7140 | 474 | if (AddI->getOpcode() == Instruction::Add && |
7141 | 474 | OptimizeOverflowCheck(Instruction::Add, /*Signed*/ false, X, Y, *AddI, |
7142 | 413 | Result, Overflow)) { |
7143 | 0 | replaceInstUsesWith(*AddI, Result); |
7144 | 0 | eraseInstFromFunction(*AddI); |
7145 | 0 | return replaceInstUsesWith(I, Overflow); |
7146 | 0 | } |
7147 | 474 | } |
7148 | | |
7149 | | // (zext X) * (zext Y) --> llvm.umul.with.overflow. |
7150 | 76.2k | if (match(Op0, m_NUWMul(m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) && |
7151 | 76.2k | match(Op1, m_APInt(C))) { |
7152 | 53 | if (Instruction *R = processUMulZExtIdiom(I, Op0, C, *this)) |
7153 | 9 | return R; |
7154 | 53 | } |
7155 | | |
7156 | | // Signbit test folds |
7157 | | // Fold (X u>> BitWidth - 1 Pred ZExt(i1)) --> X s< 0 Pred i1 |
7158 | | // Fold (X s>> BitWidth - 1 Pred SExt(i1)) --> X s< 0 Pred i1 |
7159 | 76.2k | Instruction *ExtI; |
7160 | 76.2k | if ((I.isUnsigned() || I.isEquality()) && |
7161 | 76.2k | match(Op1, |
7162 | 53.2k | m_CombineAnd(m_Instruction(ExtI), m_ZExtOrSExt(m_Value(Y)))) && |
7163 | 76.2k | Y->getType()->getScalarSizeInBits() == 1 && |
7164 | 76.2k | (Op0->hasOneUse() || Op1->hasOneUse())) { |
7165 | 146 | unsigned OpWidth = Op0->getType()->getScalarSizeInBits(); |
7166 | 146 | Instruction *ShiftI; |
7167 | 146 | if (match(Op0, m_CombineAnd(m_Instruction(ShiftI), |
7168 | 146 | m_Shr(m_Value(X), m_SpecificIntAllowUndef( |
7169 | 146 | OpWidth - 1))))) { |
7170 | 0 | unsigned ExtOpc = ExtI->getOpcode(); |
7171 | 0 | unsigned ShiftOpc = ShiftI->getOpcode(); |
7172 | 0 | if ((ExtOpc == Instruction::ZExt && ShiftOpc == Instruction::LShr) || |
7173 | 0 | (ExtOpc == Instruction::SExt && ShiftOpc == Instruction::AShr)) { |
7174 | 0 | Value *SLTZero = |
7175 | 0 | Builder.CreateICmpSLT(X, Constant::getNullValue(X->getType())); |
7176 | 0 | Value *Cmp = Builder.CreateICmp(Pred, SLTZero, Y, I.getName()); |
7177 | 0 | return replaceInstUsesWith(I, Cmp); |
7178 | 0 | } |
7179 | 0 | } |
7180 | 146 | } |
7181 | 76.2k | } |
7182 | | |
7183 | 76.2k | if (Instruction *Res = foldICmpEquality(I)) |
7184 | 222 | return Res; |
7185 | | |
7186 | 76.0k | if (Instruction *Res = foldICmpPow2Test(I, Builder)) |
7187 | 177 | return Res; |
7188 | | |
7189 | 75.8k | if (Instruction *Res = foldICmpOfUAddOv(I)) |
7190 | 27 | return Res; |
7191 | | |
7192 | | // The 'cmpxchg' instruction returns an aggregate containing the old value and |
7193 | | // an i1 which indicates whether or not we successfully did the swap. |
7194 | | // |
7195 | | // Replace comparisons between the old value and the expected value with the |
7196 | | // indicator that 'cmpxchg' returns. |
7197 | | // |
7198 | | // N.B. This transform is only valid when the 'cmpxchg' is not permitted to |
7199 | | // spuriously fail. In those cases, the old value may equal the expected |
7200 | | // value but it is possible for the swap to not occur. |
7201 | 75.8k | if (I.getPredicate() == ICmpInst::ICMP_EQ) |
7202 | 20.6k | if (auto *EVI = dyn_cast<ExtractValueInst>(Op0)) |
7203 | 26 | if (auto *ACXI = dyn_cast<AtomicCmpXchgInst>(EVI->getAggregateOperand())) |
7204 | 4 | if (EVI->getIndices()[0] == 0 && ACXI->getCompareOperand() == Op1 && |
7205 | 4 | !ACXI->isWeak()) |
7206 | 2 | return ExtractValueInst::Create(ACXI, 1); |
7207 | | |
7208 | 75.8k | if (Instruction *Res = foldICmpWithHighBitMask(I, Builder)) |
7209 | 62 | return Res; |
7210 | | |
7211 | 75.7k | if (I.getType()->isVectorTy()) |
7212 | 7.00k | if (Instruction *Res = foldVectorCmp(I, Builder)) |
7213 | 65 | return Res; |
7214 | | |
7215 | 75.6k | if (Instruction *Res = foldICmpInvariantGroup(I)) |
7216 | 0 | return Res; |
7217 | | |
7218 | 75.6k | if (Instruction *Res = foldReductionIdiom(I, Builder, DL)) |
7219 | 4 | return Res; |
7220 | | |
7221 | 75.6k | return Changed ? &I : nullptr; |
7222 | 75.6k | } |
7223 | | |
7224 | | /// Fold fcmp ([us]itofp x, cst) if possible. |
7225 | | Instruction *InstCombinerImpl::foldFCmpIntToFPConst(FCmpInst &I, |
7226 | | Instruction *LHSI, |
7227 | 513 | Constant *RHSC) { |
7228 | 513 | if (!isa<ConstantFP>(RHSC)) return nullptr; |
7229 | 500 | const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF(); |
7230 | | |
7231 | | // Get the width of the mantissa. We don't want to hack on conversions that |
7232 | | // might lose information from the integer, e.g. "i64 -> float" |
7233 | 500 | int MantissaWidth = LHSI->getType()->getFPMantissaWidth(); |
7234 | 500 | if (MantissaWidth == -1) return nullptr; // Unknown. |
7235 | | |
7236 | 487 | IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType()); |
7237 | | |
7238 | 487 | bool LHSUnsigned = isa<UIToFPInst>(LHSI); |
7239 | | |
7240 | 487 | if (I.isEquality()) { |
7241 | 348 | FCmpInst::Predicate P = I.getPredicate(); |
7242 | 348 | bool IsExact = false; |
7243 | 348 | APSInt RHSCvt(IntTy->getBitWidth(), LHSUnsigned); |
7244 | 348 | RHS.convertToInteger(RHSCvt, APFloat::rmNearestTiesToEven, &IsExact); |
7245 | | |
7246 | | // If the floating point constant isn't an integer value, we know if we will |
7247 | | // ever compare equal / not equal to it. |
7248 | 348 | if (!IsExact) { |
7249 | | // TODO: Can never be -0.0 and other non-representable values |
7250 | 135 | APFloat RHSRoundInt(RHS); |
7251 | 135 | RHSRoundInt.roundToIntegral(APFloat::rmNearestTiesToEven); |
7252 | 135 | if (RHS != RHSRoundInt) { |
7253 | 74 | if (P == FCmpInst::FCMP_OEQ || P == FCmpInst::FCMP_UEQ) |
7254 | 30 | return replaceInstUsesWith(I, Builder.getFalse()); |
7255 | | |
7256 | 44 | assert(P == FCmpInst::FCMP_ONE || P == FCmpInst::FCMP_UNE); |
7257 | 0 | return replaceInstUsesWith(I, Builder.getTrue()); |
7258 | 74 | } |
7259 | 135 | } |
7260 | | |
7261 | | // TODO: If the constant is exactly representable, is it always OK to do |
7262 | | // equality compares as integer? |
7263 | 348 | } |
7264 | | |
7265 | | // Check to see that the input is converted from an integer type that is small |
7266 | | // enough that preserves all bits. TODO: check here for "known" sign bits. |
7267 | | // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e. |
7268 | 413 | unsigned InputSize = IntTy->getScalarSizeInBits(); |
7269 | | |
7270 | | // Following test does NOT adjust InputSize downwards for signed inputs, |
7271 | | // because the most negative value still requires all the mantissa bits |
7272 | | // to distinguish it from one less than that value. |
7273 | 413 | if ((int)InputSize > MantissaWidth) { |
7274 | | // Conversion would lose accuracy. Check if loss can impact comparison. |
7275 | 361 | int Exp = ilogb(RHS); |
7276 | 361 | if (Exp == APFloat::IEK_Inf) { |
7277 | 10 | int MaxExponent = ilogb(APFloat::getLargest(RHS.getSemantics())); |
7278 | 10 | if (MaxExponent < (int)InputSize - !LHSUnsigned) |
7279 | | // Conversion could create infinity. |
7280 | 10 | return nullptr; |
7281 | 351 | } else { |
7282 | | // Note that if RHS is zero or NaN, then Exp is negative |
7283 | | // and first condition is trivially false. |
7284 | 351 | if (MantissaWidth <= Exp && Exp <= (int)InputSize - !LHSUnsigned) |
7285 | | // Conversion could affect comparison. |
7286 | 53 | return nullptr; |
7287 | 351 | } |
7288 | 361 | } |
7289 | | |
7290 | | // Otherwise, we can potentially simplify the comparison. We know that it |
7291 | | // will always come through as an integer value and we know the constant is |
7292 | | // not a NAN (it would have been previously simplified). |
7293 | 350 | assert(!RHS.isNaN() && "NaN comparison not already folded!"); |
7294 | | |
7295 | 0 | ICmpInst::Predicate Pred; |
7296 | 350 | switch (I.getPredicate()) { |
7297 | 0 | default: llvm_unreachable("Unexpected predicate!"); |
7298 | 34 | case FCmpInst::FCMP_UEQ: |
7299 | 133 | case FCmpInst::FCMP_OEQ: |
7300 | 133 | Pred = ICmpInst::ICMP_EQ; |
7301 | 133 | break; |
7302 | 17 | case FCmpInst::FCMP_UGT: |
7303 | 53 | case FCmpInst::FCMP_OGT: |
7304 | 53 | Pred = LHSUnsigned ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_SGT; |
7305 | 53 | break; |
7306 | 9 | case FCmpInst::FCMP_UGE: |
7307 | 15 | case FCmpInst::FCMP_OGE: |
7308 | 15 | Pred = LHSUnsigned ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_SGE; |
7309 | 15 | break; |
7310 | 18 | case FCmpInst::FCMP_ULT: |
7311 | 31 | case FCmpInst::FCMP_OLT: |
7312 | 31 | Pred = LHSUnsigned ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_SLT; |
7313 | 31 | break; |
7314 | 12 | case FCmpInst::FCMP_ULE: |
7315 | 40 | case FCmpInst::FCMP_OLE: |
7316 | 40 | Pred = LHSUnsigned ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_SLE; |
7317 | 40 | break; |
7318 | 40 | case FCmpInst::FCMP_UNE: |
7319 | 78 | case FCmpInst::FCMP_ONE: |
7320 | 78 | Pred = ICmpInst::ICMP_NE; |
7321 | 78 | break; |
7322 | 0 | case FCmpInst::FCMP_ORD: |
7323 | 0 | return replaceInstUsesWith(I, Builder.getTrue()); |
7324 | 0 | case FCmpInst::FCMP_UNO: |
7325 | 0 | return replaceInstUsesWith(I, Builder.getFalse()); |
7326 | 350 | } |
7327 | | |
7328 | | // Now we know that the APFloat is a normal number, zero or inf. |
7329 | | |
7330 | | // See if the FP constant is too large for the integer. For example, |
7331 | | // comparing an i8 to 300.0. |
7332 | 350 | unsigned IntWidth = IntTy->getScalarSizeInBits(); |
7333 | | |
7334 | 350 | if (!LHSUnsigned) { |
7335 | | // If the RHS value is > SignedMax, fold the comparison. This handles +INF |
7336 | | // and large values. |
7337 | 217 | APFloat SMax(RHS.getSemantics()); |
7338 | 217 | SMax.convertFromAPInt(APInt::getSignedMaxValue(IntWidth), true, |
7339 | 217 | APFloat::rmNearestTiesToEven); |
7340 | 217 | if (SMax < RHS) { // smax < 13123.0 |
7341 | 31 | if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SLT || |
7342 | 31 | Pred == ICmpInst::ICMP_SLE) |
7343 | 18 | return replaceInstUsesWith(I, Builder.getTrue()); |
7344 | 13 | return replaceInstUsesWith(I, Builder.getFalse()); |
7345 | 31 | } |
7346 | 217 | } else { |
7347 | | // If the RHS value is > UnsignedMax, fold the comparison. This handles |
7348 | | // +INF and large values. |
7349 | 133 | APFloat UMax(RHS.getSemantics()); |
7350 | 133 | UMax.convertFromAPInt(APInt::getMaxValue(IntWidth), false, |
7351 | 133 | APFloat::rmNearestTiesToEven); |
7352 | 133 | if (UMax < RHS) { // umax < 13123.0 |
7353 | 10 | if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_ULT || |
7354 | 10 | Pred == ICmpInst::ICMP_ULE) |
7355 | 1 | return replaceInstUsesWith(I, Builder.getTrue()); |
7356 | 9 | return replaceInstUsesWith(I, Builder.getFalse()); |
7357 | 10 | } |
7358 | 133 | } |
7359 | | |
7360 | 309 | if (!LHSUnsigned) { |
7361 | | // See if the RHS value is < SignedMin. |
7362 | 186 | APFloat SMin(RHS.getSemantics()); |
7363 | 186 | SMin.convertFromAPInt(APInt::getSignedMinValue(IntWidth), true, |
7364 | 186 | APFloat::rmNearestTiesToEven); |
7365 | 186 | if (SMin > RHS) { // smin > 12312.0 |
7366 | 20 | if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT || |
7367 | 20 | Pred == ICmpInst::ICMP_SGE) |
7368 | 11 | return replaceInstUsesWith(I, Builder.getTrue()); |
7369 | 9 | return replaceInstUsesWith(I, Builder.getFalse()); |
7370 | 20 | } |
7371 | 186 | } else { |
7372 | | // See if the RHS value is < UnsignedMin. |
7373 | 123 | APFloat UMin(RHS.getSemantics()); |
7374 | 123 | UMin.convertFromAPInt(APInt::getMinValue(IntWidth), false, |
7375 | 123 | APFloat::rmNearestTiesToEven); |
7376 | 123 | if (UMin > RHS) { // umin > 12312.0 |
7377 | 2 | if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_UGT || |
7378 | 2 | Pred == ICmpInst::ICMP_UGE) |
7379 | 2 | return replaceInstUsesWith(I, Builder.getTrue()); |
7380 | 0 | return replaceInstUsesWith(I, Builder.getFalse()); |
7381 | 2 | } |
7382 | 123 | } |
7383 | | |
7384 | | // Okay, now we know that the FP constant fits in the range [SMIN, SMAX] or |
7385 | | // [0, UMAX], but it may still be fractional. Check whether this is the case |
7386 | | // using the IsExact flag. |
7387 | | // Don't do this for zero, because -0.0 is not fractional. |
7388 | 287 | APSInt RHSInt(IntWidth, LHSUnsigned); |
7389 | 287 | bool IsExact; |
7390 | 287 | RHS.convertToInteger(RHSInt, APFloat::rmTowardZero, &IsExact); |
7391 | 287 | if (!RHS.isZero()) { |
7392 | 66 | if (!IsExact) { |
7393 | | // If we had a comparison against a fractional value, we have to adjust |
7394 | | // the compare predicate and sometimes the value. RHSC is rounded towards |
7395 | | // zero at this point. |
7396 | 37 | switch (Pred) { |
7397 | 0 | default: llvm_unreachable("Unexpected integer comparison!"); |
7398 | 0 | case ICmpInst::ICMP_NE: // (float)int != 4.4 --> true |
7399 | 0 | return replaceInstUsesWith(I, Builder.getTrue()); |
7400 | 0 | case ICmpInst::ICMP_EQ: // (float)int == 4.4 --> false |
7401 | 0 | return replaceInstUsesWith(I, Builder.getFalse()); |
7402 | 4 | case ICmpInst::ICMP_ULE: |
7403 | | // (float)int <= 4.4 --> int <= 4 |
7404 | | // (float)int <= -4.4 --> false |
7405 | 4 | if (RHS.isNegative()) |
7406 | 0 | return replaceInstUsesWith(I, Builder.getFalse()); |
7407 | 4 | break; |
7408 | 6 | case ICmpInst::ICMP_SLE: |
7409 | | // (float)int <= 4.4 --> int <= 4 |
7410 | | // (float)int <= -4.4 --> int < -4 |
7411 | 6 | if (RHS.isNegative()) |
7412 | 0 | Pred = ICmpInst::ICMP_SLT; |
7413 | 6 | break; |
7414 | 1 | case ICmpInst::ICMP_ULT: |
7415 | | // (float)int < -4.4 --> false |
7416 | | // (float)int < 4.4 --> int <= 4 |
7417 | 1 | if (RHS.isNegative()) |
7418 | 0 | return replaceInstUsesWith(I, Builder.getFalse()); |
7419 | 1 | Pred = ICmpInst::ICMP_ULE; |
7420 | 1 | break; |
7421 | 3 | case ICmpInst::ICMP_SLT: |
7422 | | // (float)int < -4.4 --> int < -4 |
7423 | | // (float)int < 4.4 --> int <= 4 |
7424 | 3 | if (!RHS.isNegative()) |
7425 | 3 | Pred = ICmpInst::ICMP_SLE; |
7426 | 3 | break; |
7427 | 2 | case ICmpInst::ICMP_UGT: |
7428 | | // (float)int > 4.4 --> int > 4 |
7429 | | // (float)int > -4.4 --> true |
7430 | 2 | if (RHS.isNegative()) |
7431 | 0 | return replaceInstUsesWith(I, Builder.getTrue()); |
7432 | 2 | break; |
7433 | 10 | case ICmpInst::ICMP_SGT: |
7434 | | // (float)int > 4.4 --> int > 4 |
7435 | | // (float)int > -4.4 --> int >= -4 |
7436 | 10 | if (RHS.isNegative()) |
7437 | 0 | Pred = ICmpInst::ICMP_SGE; |
7438 | 10 | break; |
7439 | 6 | case ICmpInst::ICMP_UGE: |
7440 | | // (float)int >= -4.4 --> true |
7441 | | // (float)int >= 4.4 --> int > 4 |
7442 | 6 | if (RHS.isNegative()) |
7443 | 0 | return replaceInstUsesWith(I, Builder.getTrue()); |
7444 | 6 | Pred = ICmpInst::ICMP_UGT; |
7445 | 6 | break; |
7446 | 5 | case ICmpInst::ICMP_SGE: |
7447 | | // (float)int >= -4.4 --> int >= -4 |
7448 | | // (float)int >= 4.4 --> int > 4 |
7449 | 5 | if (!RHS.isNegative()) |
7450 | 5 | Pred = ICmpInst::ICMP_SGT; |
7451 | 5 | break; |
7452 | 37 | } |
7453 | 37 | } |
7454 | 66 | } |
7455 | | |
7456 | | // Lower this FP comparison into an appropriate integer version of the |
7457 | | // comparison. |
7458 | 287 | return new ICmpInst(Pred, LHSI->getOperand(0), Builder.getInt(RHSInt)); |
7459 | 287 | } |
7460 | | |
7461 | | /// Fold (C / X) < 0.0 --> X < 0.0 if possible. Swap predicate if necessary. |
7462 | | static Instruction *foldFCmpReciprocalAndZero(FCmpInst &I, Instruction *LHSI, |
7463 | 106 | Constant *RHSC) { |
7464 | | // When C is not 0.0 and infinities are not allowed: |
7465 | | // (C / X) < 0.0 is a sign-bit test of X |
7466 | | // (C / X) < 0.0 --> X < 0.0 (if C is positive) |
7467 | | // (C / X) < 0.0 --> X > 0.0 (if C is negative, swap the predicate) |
7468 | | // |
7469 | | // Proof: |
7470 | | // Multiply (C / X) < 0.0 by X * X / C. |
7471 | | // - X is non zero, if it is the flag 'ninf' is violated. |
7472 | | // - C defines the sign of X * X * C. Thus it also defines whether to swap |
7473 | | // the predicate. C is also non zero by definition. |
7474 | | // |
7475 | | // Thus X * X / C is non zero and the transformation is valid. [qed] |
7476 | | |
7477 | 106 | FCmpInst::Predicate Pred = I.getPredicate(); |
7478 | | |
7479 | | // Check that predicates are valid. |
7480 | 106 | if ((Pred != FCmpInst::FCMP_OGT) && (Pred != FCmpInst::FCMP_OLT) && |
7481 | 106 | (Pred != FCmpInst::FCMP_OGE) && (Pred != FCmpInst::FCMP_OLE)) |
7482 | 58 | return nullptr; |
7483 | | |
7484 | | // Check that RHS operand is zero. |
7485 | 48 | if (!match(RHSC, m_AnyZeroFP())) |
7486 | 27 | return nullptr; |
7487 | | |
7488 | | // Check fastmath flags ('ninf'). |
7489 | 21 | if (!LHSI->hasNoInfs() || !I.hasNoInfs()) |
7490 | 18 | return nullptr; |
7491 | | |
7492 | | // Check the properties of the dividend. It must not be zero to avoid a |
7493 | | // division by zero (see Proof). |
7494 | 3 | const APFloat *C; |
7495 | 3 | if (!match(LHSI->getOperand(0), m_APFloat(C))) |
7496 | 0 | return nullptr; |
7497 | | |
7498 | 3 | if (C->isZero()) |
7499 | 0 | return nullptr; |
7500 | | |
7501 | | // Get swapped predicate if necessary. |
7502 | 3 | if (C->isNegative()) |
7503 | 3 | Pred = I.getSwappedPredicate(); |
7504 | | |
7505 | 3 | return new FCmpInst(Pred, LHSI->getOperand(1), RHSC, "", &I); |
7506 | 3 | } |
7507 | | |
7508 | | /// Optimize fabs(X) compared with zero. |
7509 | 13.9k | static Instruction *foldFabsWithFcmpZero(FCmpInst &I, InstCombinerImpl &IC) { |
7510 | 13.9k | Value *X; |
7511 | 13.9k | if (!match(I.getOperand(0), m_FAbs(m_Value(X)))) |
7512 | 13.8k | return nullptr; |
7513 | | |
7514 | 117 | const APFloat *C; |
7515 | 117 | if (!match(I.getOperand(1), m_APFloat(C))) |
7516 | 34 | return nullptr; |
7517 | | |
7518 | 83 | if (!C->isPosZero()) { |
7519 | 12 | if (!C->isSmallestNormalized()) |
7520 | 12 | return nullptr; |
7521 | | |
7522 | 0 | const Function *F = I.getFunction(); |
7523 | 0 | DenormalMode Mode = F->getDenormalMode(C->getSemantics()); |
7524 | 0 | if (Mode.Input == DenormalMode::PreserveSign || |
7525 | 0 | Mode.Input == DenormalMode::PositiveZero) { |
7526 | |
|
7527 | 0 | auto replaceFCmp = [](FCmpInst *I, FCmpInst::Predicate P, Value *X) { |
7528 | 0 | Constant *Zero = ConstantFP::getZero(X->getType()); |
7529 | 0 | return new FCmpInst(P, X, Zero, "", I); |
7530 | 0 | }; |
7531 | |
|
7532 | 0 | switch (I.getPredicate()) { |
7533 | 0 | case FCmpInst::FCMP_OLT: |
7534 | | // fcmp olt fabs(x), smallest_normalized_number -> fcmp oeq x, 0.0 |
7535 | 0 | return replaceFCmp(&I, FCmpInst::FCMP_OEQ, X); |
7536 | 0 | case FCmpInst::FCMP_UGE: |
7537 | | // fcmp uge fabs(x), smallest_normalized_number -> fcmp une x, 0.0 |
7538 | 0 | return replaceFCmp(&I, FCmpInst::FCMP_UNE, X); |
7539 | 0 | case FCmpInst::FCMP_OGE: |
7540 | | // fcmp oge fabs(x), smallest_normalized_number -> fcmp one x, 0.0 |
7541 | 0 | return replaceFCmp(&I, FCmpInst::FCMP_ONE, X); |
7542 | 0 | case FCmpInst::FCMP_ULT: |
7543 | | // fcmp ult fabs(x), smallest_normalized_number -> fcmp ueq x, 0.0 |
7544 | 0 | return replaceFCmp(&I, FCmpInst::FCMP_UEQ, X); |
7545 | 0 | default: |
7546 | 0 | break; |
7547 | 0 | } |
7548 | 0 | } |
7549 | | |
7550 | 0 | return nullptr; |
7551 | 0 | } |
7552 | | |
7553 | 71 | auto replacePredAndOp0 = [&IC](FCmpInst *I, FCmpInst::Predicate P, Value *X) { |
7554 | 71 | I->setPredicate(P); |
7555 | 71 | return IC.replaceOperand(*I, 0, X); |
7556 | 71 | }; |
7557 | | |
7558 | 71 | switch (I.getPredicate()) { |
7559 | 0 | case FCmpInst::FCMP_UGE: |
7560 | 0 | case FCmpInst::FCMP_OLT: |
7561 | | // fabs(X) >= 0.0 --> true |
7562 | | // fabs(X) < 0.0 --> false |
7563 | 0 | llvm_unreachable("fcmp should have simplified"); |
7564 | |
|
7565 | 4 | case FCmpInst::FCMP_OGT: |
7566 | | // fabs(X) > 0.0 --> X != 0.0 |
7567 | 4 | return replacePredAndOp0(&I, FCmpInst::FCMP_ONE, X); |
7568 | | |
7569 | 2 | case FCmpInst::FCMP_UGT: |
7570 | | // fabs(X) u> 0.0 --> X u!= 0.0 |
7571 | 2 | return replacePredAndOp0(&I, FCmpInst::FCMP_UNE, X); |
7572 | | |
7573 | 4 | case FCmpInst::FCMP_OLE: |
7574 | | // fabs(X) <= 0.0 --> X == 0.0 |
7575 | 4 | return replacePredAndOp0(&I, FCmpInst::FCMP_OEQ, X); |
7576 | | |
7577 | 0 | case FCmpInst::FCMP_ULE: |
7578 | | // fabs(X) u<= 0.0 --> X u== 0.0 |
7579 | 0 | return replacePredAndOp0(&I, FCmpInst::FCMP_UEQ, X); |
7580 | | |
7581 | 3 | case FCmpInst::FCMP_OGE: |
7582 | | // fabs(X) >= 0.0 --> !isnan(X) |
7583 | 3 | assert(!I.hasNoNaNs() && "fcmp should have simplified"); |
7584 | 0 | return replacePredAndOp0(&I, FCmpInst::FCMP_ORD, X); |
7585 | | |
7586 | 2 | case FCmpInst::FCMP_ULT: |
7587 | | // fabs(X) u< 0.0 --> isnan(X) |
7588 | 2 | assert(!I.hasNoNaNs() && "fcmp should have simplified"); |
7589 | 0 | return replacePredAndOp0(&I, FCmpInst::FCMP_UNO, X); |
7590 | | |
7591 | 17 | case FCmpInst::FCMP_OEQ: |
7592 | 18 | case FCmpInst::FCMP_UEQ: |
7593 | 20 | case FCmpInst::FCMP_ONE: |
7594 | 31 | case FCmpInst::FCMP_UNE: |
7595 | 39 | case FCmpInst::FCMP_ORD: |
7596 | 56 | case FCmpInst::FCMP_UNO: |
7597 | | // Look through the fabs() because it doesn't change anything but the sign. |
7598 | | // fabs(X) == 0.0 --> X == 0.0, |
7599 | | // fabs(X) != 0.0 --> X != 0.0 |
7600 | | // isnan(fabs(X)) --> isnan(X) |
7601 | | // !isnan(fabs(X) --> !isnan(X) |
7602 | 56 | return replacePredAndOp0(&I, I.getPredicate(), X); |
7603 | | |
7604 | 0 | default: |
7605 | 0 | return nullptr; |
7606 | 71 | } |
7607 | 71 | } |
7608 | | |
7609 | 15.4k | static Instruction *foldFCmpFNegCommonOp(FCmpInst &I) { |
7610 | 15.4k | CmpInst::Predicate Pred = I.getPredicate(); |
7611 | 15.4k | Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); |
7612 | | |
7613 | | // Canonicalize fneg as Op1. |
7614 | 15.4k | if (match(Op0, m_FNeg(m_Value())) && !match(Op1, m_FNeg(m_Value()))) { |
7615 | 137 | std::swap(Op0, Op1); |
7616 | 137 | Pred = I.getSwappedPredicate(); |
7617 | 137 | } |
7618 | | |
7619 | 15.4k | if (!match(Op1, m_FNeg(m_Specific(Op0)))) |
7620 | 15.4k | return nullptr; |
7621 | | |
7622 | | // Replace the negated operand with 0.0: |
7623 | | // fcmp Pred Op0, -Op0 --> fcmp Pred Op0, 0.0 |
7624 | 10 | Constant *Zero = ConstantFP::getZero(Op0->getType()); |
7625 | 10 | return new FCmpInst(Pred, Op0, Zero, "", &I); |
7626 | 15.4k | } |
7627 | | |
7628 | 18.3k | Instruction *InstCombinerImpl::visitFCmpInst(FCmpInst &I) { |
7629 | 18.3k | bool Changed = false; |
7630 | | |
7631 | | /// Orders the operands of the compare so that they are listed from most |
7632 | | /// complex to least complex. This puts constants before unary operators, |
7633 | | /// before binary operators. |
7634 | 18.3k | if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) { |
7635 | 1.34k | I.swapOperands(); |
7636 | 1.34k | Changed = true; |
7637 | 1.34k | } |
7638 | | |
7639 | 18.3k | const CmpInst::Predicate Pred = I.getPredicate(); |
7640 | 18.3k | Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); |
7641 | 18.3k | if (Value *V = simplifyFCmpInst(Pred, Op0, Op1, I.getFastMathFlags(), |
7642 | 18.3k | SQ.getWithInstruction(&I))) |
7643 | 1.95k | return replaceInstUsesWith(I, V); |
7644 | | |
7645 | | // Simplify 'fcmp pred X, X' |
7646 | 16.4k | Type *OpType = Op0->getType(); |
7647 | 16.4k | assert(OpType == Op1->getType() && "fcmp with different-typed operands?"); |
7648 | 16.4k | if (Op0 == Op1) { |
7649 | 692 | switch (Pred) { |
7650 | 0 | default: break; |
7651 | 97 | case FCmpInst::FCMP_UNO: // True if unordered: isnan(X) | isnan(Y) |
7652 | 171 | case FCmpInst::FCMP_ULT: // True if unordered or less than |
7653 | 276 | case FCmpInst::FCMP_UGT: // True if unordered or greater than |
7654 | 351 | case FCmpInst::FCMP_UNE: // True if unordered or not equal |
7655 | | // Canonicalize these to be 'fcmp uno %X, 0.0'. |
7656 | 351 | I.setPredicate(FCmpInst::FCMP_UNO); |
7657 | 351 | I.setOperand(1, Constant::getNullValue(OpType)); |
7658 | 351 | return &I; |
7659 | | |
7660 | 77 | case FCmpInst::FCMP_ORD: // True if ordered (no nans) |
7661 | 191 | case FCmpInst::FCMP_OEQ: // True if ordered and equal |
7662 | 270 | case FCmpInst::FCMP_OGE: // True if ordered and greater than or equal |
7663 | 341 | case FCmpInst::FCMP_OLE: // True if ordered and less than or equal |
7664 | | // Canonicalize these to be 'fcmp ord %X, 0.0'. |
7665 | 341 | I.setPredicate(FCmpInst::FCMP_ORD); |
7666 | 341 | I.setOperand(1, Constant::getNullValue(OpType)); |
7667 | 341 | return &I; |
7668 | 692 | } |
7669 | 692 | } |
7670 | | |
7671 | | // If we're just checking for a NaN (ORD/UNO) and have a non-NaN operand, |
7672 | | // then canonicalize the operand to 0.0. |
7673 | 15.7k | if (Pred == CmpInst::FCMP_ORD || Pred == CmpInst::FCMP_UNO) { |
7674 | 3.35k | if (!match(Op0, m_PosZeroFP()) && isKnownNeverNaN(Op0, DL, &TLI, 0, |
7675 | 3.35k | &AC, &I, &DT)) |
7676 | 25 | return replaceOperand(I, 0, ConstantFP::getZero(OpType)); |
7677 | | |
7678 | 3.33k | if (!match(Op1, m_PosZeroFP()) && |
7679 | 3.33k | isKnownNeverNaN(Op1, DL, &TLI, 0, &AC, &I, &DT)) |
7680 | 244 | return replaceOperand(I, 1, ConstantFP::getZero(OpType)); |
7681 | 3.33k | } |
7682 | | |
7683 | | // fcmp pred (fneg X), (fneg Y) -> fcmp swap(pred) X, Y |
7684 | 15.4k | Value *X, *Y; |
7685 | 15.4k | if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_FNeg(m_Value(Y)))) |
7686 | 48 | return new FCmpInst(I.getSwappedPredicate(), X, Y, "", &I); |
7687 | | |
7688 | 15.4k | if (Instruction *R = foldFCmpFNegCommonOp(I)) |
7689 | 10 | return R; |
7690 | | |
7691 | | // Test if the FCmpInst instruction is used exclusively by a select as |
7692 | | // part of a minimum or maximum operation. If so, refrain from doing |
7693 | | // any other folding. This helps out other analyses which understand |
7694 | | // non-obfuscated minimum and maximum idioms, such as ScalarEvolution |
7695 | | // and CodeGen. And in this case, at least one of the comparison |
7696 | | // operands has at least one user besides the compare (the select), |
7697 | | // which would often largely negate the benefit of folding anyway. |
7698 | 15.4k | if (I.hasOneUse()) |
7699 | 13.6k | if (SelectInst *SI = dyn_cast<SelectInst>(I.user_back())) { |
7700 | 3.50k | Value *A, *B; |
7701 | 3.50k | SelectPatternResult SPR = matchSelectPattern(SI, A, B); |
7702 | 3.50k | if (SPR.Flavor != SPF_UNKNOWN) |
7703 | 819 | return nullptr; |
7704 | 3.50k | } |
7705 | | |
7706 | | // The sign of 0.0 is ignored by fcmp, so canonicalize to +0.0: |
7707 | | // fcmp Pred X, -0.0 --> fcmp Pred X, 0.0 |
7708 | 14.6k | if (match(Op1, m_AnyZeroFP()) && !match(Op1, m_PosZeroFP())) |
7709 | 198 | return replaceOperand(I, 1, ConstantFP::getZero(OpType)); |
7710 | | |
7711 | | // Ignore signbit of bitcasted int when comparing equality to FP 0.0: |
7712 | | // fcmp oeq/une (bitcast X), 0.0 --> (and X, SignMaskC) ==/!= 0 |
7713 | 14.4k | if (match(Op1, m_PosZeroFP()) && |
7714 | 14.4k | match(Op0, m_OneUse(m_BitCast(m_Value(X)))) && |
7715 | 14.4k | X->getType()->isVectorTy() == OpType->isVectorTy() && |
7716 | 14.4k | X->getType()->getScalarSizeInBits() == OpType->getScalarSizeInBits()) { |
7717 | 14 | ICmpInst::Predicate IntPred = ICmpInst::BAD_ICMP_PREDICATE; |
7718 | 14 | if (Pred == FCmpInst::FCMP_OEQ) |
7719 | 0 | IntPred = ICmpInst::ICMP_EQ; |
7720 | 14 | else if (Pred == FCmpInst::FCMP_UNE) |
7721 | 1 | IntPred = ICmpInst::ICMP_NE; |
7722 | | |
7723 | 14 | if (IntPred != ICmpInst::BAD_ICMP_PREDICATE) { |
7724 | 1 | Type *IntTy = X->getType(); |
7725 | 1 | const APInt &SignMask = ~APInt::getSignMask(IntTy->getScalarSizeInBits()); |
7726 | 1 | Value *MaskX = Builder.CreateAnd(X, ConstantInt::get(IntTy, SignMask)); |
7727 | 1 | return new ICmpInst(IntPred, MaskX, ConstantInt::getNullValue(IntTy)); |
7728 | 1 | } |
7729 | 14 | } |
7730 | | |
7731 | | // Handle fcmp with instruction LHS and constant RHS. |
7732 | 14.4k | Instruction *LHSI; |
7733 | 14.4k | Constant *RHSC; |
7734 | 14.4k | if (match(Op0, m_Instruction(LHSI)) && match(Op1, m_Constant(RHSC))) { |
7735 | 4.26k | switch (LHSI->getOpcode()) { |
7736 | 0 | case Instruction::PHI: |
7737 | 0 | if (Instruction *NV = foldOpIntoPhi(I, cast<PHINode>(LHSI))) |
7738 | 0 | return NV; |
7739 | 0 | break; |
7740 | 277 | case Instruction::SIToFP: |
7741 | 513 | case Instruction::UIToFP: |
7742 | 513 | if (Instruction *NV = foldFCmpIntToFPConst(I, LHSI, RHSC)) |
7743 | 424 | return NV; |
7744 | 89 | break; |
7745 | 106 | case Instruction::FDiv: |
7746 | 106 | if (Instruction *NV = foldFCmpReciprocalAndZero(I, LHSI, RHSC)) |
7747 | 3 | return NV; |
7748 | 103 | break; |
7749 | 2.29k | case Instruction::Load: |
7750 | 2.29k | if (auto *GEP = dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) |
7751 | 100 | if (auto *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0))) |
7752 | 6 | if (Instruction *Res = foldCmpLoadFromIndexedGlobal( |
7753 | 6 | cast<LoadInst>(LHSI), GEP, GV, I)) |
7754 | 0 | return Res; |
7755 | 2.29k | break; |
7756 | 4.26k | } |
7757 | 4.26k | } |
7758 | | |
7759 | 13.9k | if (Instruction *R = foldFabsWithFcmpZero(I, *this)) |
7760 | 71 | return R; |
7761 | | |
7762 | 13.9k | if (match(Op0, m_FNeg(m_Value(X)))) { |
7763 | | // fcmp pred (fneg X), C --> fcmp swap(pred) X, -C |
7764 | 127 | Constant *C; |
7765 | 127 | if (match(Op1, m_Constant(C))) |
7766 | 96 | if (Constant *NegC = ConstantFoldUnaryOpOperand(Instruction::FNeg, C, DL)) |
7767 | 96 | return new FCmpInst(I.getSwappedPredicate(), X, NegC, "", &I); |
7768 | 127 | } |
7769 | | |
7770 | 13.8k | if (match(Op0, m_FPExt(m_Value(X)))) { |
7771 | | // fcmp (fpext X), (fpext Y) -> fcmp X, Y |
7772 | 169 | if (match(Op1, m_FPExt(m_Value(Y))) && X->getType() == Y->getType()) |
7773 | 22 | return new FCmpInst(Pred, X, Y, "", &I); |
7774 | | |
7775 | 147 | const APFloat *C; |
7776 | 147 | if (match(Op1, m_APFloat(C))) { |
7777 | 143 | const fltSemantics &FPSem = |
7778 | 143 | X->getType()->getScalarType()->getFltSemantics(); |
7779 | 143 | bool Lossy; |
7780 | 143 | APFloat TruncC = *C; |
7781 | 143 | TruncC.convert(FPSem, APFloat::rmNearestTiesToEven, &Lossy); |
7782 | | |
7783 | 143 | if (Lossy) { |
7784 | | // X can't possibly equal the higher-precision constant, so reduce any |
7785 | | // equality comparison. |
7786 | | // TODO: Other predicates can be handled via getFCmpCode(). |
7787 | 38 | switch (Pred) { |
7788 | 2 | case FCmpInst::FCMP_OEQ: |
7789 | | // X is ordered and equal to an impossible constant --> false |
7790 | 2 | return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); |
7791 | 10 | case FCmpInst::FCMP_ONE: |
7792 | | // X is ordered and not equal to an impossible constant --> ordered |
7793 | 10 | return new FCmpInst(FCmpInst::FCMP_ORD, X, |
7794 | 10 | ConstantFP::getZero(X->getType())); |
7795 | 2 | case FCmpInst::FCMP_UEQ: |
7796 | | // X is unordered or equal to an impossible constant --> unordered |
7797 | 2 | return new FCmpInst(FCmpInst::FCMP_UNO, X, |
7798 | 2 | ConstantFP::getZero(X->getType())); |
7799 | 0 | case FCmpInst::FCMP_UNE: |
7800 | | // X is unordered or not equal to an impossible constant --> true |
7801 | 0 | return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); |
7802 | 24 | default: |
7803 | 24 | break; |
7804 | 38 | } |
7805 | 38 | } |
7806 | | |
7807 | | // fcmp (fpext X), C -> fcmp X, (fptrunc C) if fptrunc is lossless |
7808 | | // Avoid lossy conversions and denormals. |
7809 | | // Zero is a special case that's OK to convert. |
7810 | 129 | APFloat Fabs = TruncC; |
7811 | 129 | Fabs.clearSign(); |
7812 | 129 | if (!Lossy && |
7813 | 129 | (Fabs.isZero() || !(Fabs < APFloat::getSmallestNormalized(FPSem)))) { |
7814 | 101 | Constant *NewC = ConstantFP::get(X->getType(), TruncC); |
7815 | 101 | return new FCmpInst(Pred, X, NewC, "", &I); |
7816 | 101 | } |
7817 | 129 | } |
7818 | 147 | } |
7819 | | |
7820 | | // Convert a sign-bit test of an FP value into a cast and integer compare. |
7821 | | // TODO: Simplify if the copysign constant is 0.0 or NaN. |
7822 | | // TODO: Handle non-zero compare constants. |
7823 | | // TODO: Handle other predicates. |
7824 | 13.6k | const APFloat *C; |
7825 | 13.6k | if (match(Op0, m_OneUse(m_Intrinsic<Intrinsic::copysign>(m_APFloat(C), |
7826 | 13.6k | m_Value(X)))) && |
7827 | 13.6k | match(Op1, m_AnyZeroFP()) && !C->isZero() && !C->isNaN()) { |
7828 | 0 | Type *IntType = Builder.getIntNTy(X->getType()->getScalarSizeInBits()); |
7829 | 0 | if (auto *VecTy = dyn_cast<VectorType>(OpType)) |
7830 | 0 | IntType = VectorType::get(IntType, VecTy->getElementCount()); |
7831 | | |
7832 | | // copysign(non-zero constant, X) < 0.0 --> (bitcast X) < 0 |
7833 | 0 | if (Pred == FCmpInst::FCMP_OLT) { |
7834 | 0 | Value *IntX = Builder.CreateBitCast(X, IntType); |
7835 | 0 | return new ICmpInst(ICmpInst::ICMP_SLT, IntX, |
7836 | 0 | ConstantInt::getNullValue(IntType)); |
7837 | 0 | } |
7838 | 0 | } |
7839 | | |
7840 | 13.6k | { |
7841 | 13.6k | Value *CanonLHS = nullptr, *CanonRHS = nullptr; |
7842 | 13.6k | match(Op0, m_Intrinsic<Intrinsic::canonicalize>(m_Value(CanonLHS))); |
7843 | 13.6k | match(Op1, m_Intrinsic<Intrinsic::canonicalize>(m_Value(CanonRHS))); |
7844 | | |
7845 | | // (canonicalize(x) == x) => (x == x) |
7846 | 13.6k | if (CanonLHS == Op1) |
7847 | 0 | return new FCmpInst(Pred, Op1, Op1, "", &I); |
7848 | | |
7849 | | // (x == canonicalize(x)) => (x == x) |
7850 | 13.6k | if (CanonRHS == Op0) |
7851 | 0 | return new FCmpInst(Pred, Op0, Op0, "", &I); |
7852 | | |
7853 | | // (canonicalize(x) == canonicalize(y)) => (x == y) |
7854 | 13.6k | if (CanonLHS && CanonRHS) |
7855 | 0 | return new FCmpInst(Pred, CanonLHS, CanonRHS, "", &I); |
7856 | 13.6k | } |
7857 | | |
7858 | 13.6k | if (I.getType()->isVectorTy()) |
7859 | 1.17k | if (Instruction *Res = foldVectorCmp(I, Builder)) |
7860 | 11 | return Res; |
7861 | | |
7862 | 13.6k | return Changed ? &I : nullptr; |
7863 | 13.6k | } |