xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- InstCombineMulDivRem.cpp -------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the visit functions for mul, fmul, sdiv, udiv, fdiv,
10 // srem, urem, frem.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "InstCombineInternal.h"
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/SmallPtrSet.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/Analysis/InstructionSimplify.h"
19 #include "llvm/Analysis/ValueTracking.h"
20 #include "llvm/IR/BasicBlock.h"
21 #include "llvm/IR/Constant.h"
22 #include "llvm/IR/Constants.h"
23 #include "llvm/IR/InstrTypes.h"
24 #include "llvm/IR/Instruction.h"
25 #include "llvm/IR/Instructions.h"
26 #include "llvm/IR/IntrinsicInst.h"
27 #include "llvm/IR/Intrinsics.h"
28 #include "llvm/IR/Operator.h"
29 #include "llvm/IR/PatternMatch.h"
30 #include "llvm/IR/Type.h"
31 #include "llvm/IR/Value.h"
32 #include "llvm/Support/Casting.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Transforms/InstCombine/InstCombiner.h"
35 #include "llvm/Transforms/Utils/BuildLibCalls.h"
36 #include <cassert>
37 
38 #define DEBUG_TYPE "instcombine"
39 #include "llvm/Transforms/Utils/InstructionWorklist.h"
40 
41 using namespace llvm;
42 using namespace PatternMatch;
43 
44 /// The specific integer value is used in a context where it is known to be
45 /// non-zero.  If this allows us to simplify the computation, do so and return
46 /// the new operand, otherwise return null.
simplifyValueKnownNonZero(Value * V,InstCombinerImpl & IC,Instruction & CxtI)47 static Value *simplifyValueKnownNonZero(Value *V, InstCombinerImpl &IC,
48                                         Instruction &CxtI) {
49   // If V has multiple uses, then we would have to do more analysis to determine
50   // if this is safe.  For example, the use could be in dynamically unreached
51   // code.
52   if (!V->hasOneUse()) return nullptr;
53 
54   bool MadeChange = false;
55 
56   // ((1 << A) >>u B) --> (1 << (A-B))
57   // Because V cannot be zero, we know that B is less than A.
58   Value *A = nullptr, *B = nullptr, *One = nullptr;
59   if (match(V, m_LShr(m_OneUse(m_Shl(m_Value(One), m_Value(A))), m_Value(B))) &&
60       match(One, m_One())) {
61     A = IC.Builder.CreateSub(A, B);
62     return IC.Builder.CreateShl(One, A);
63   }
64 
65   // (PowerOfTwo >>u B) --> isExact since shifting out the result would make it
66   // inexact.  Similarly for <<.
67   BinaryOperator *I = dyn_cast<BinaryOperator>(V);
68   if (I && I->isLogicalShift() &&
69       IC.isKnownToBeAPowerOfTwo(I->getOperand(0), false, &CxtI)) {
70     // We know that this is an exact/nuw shift and that the input is a
71     // non-zero context as well.
72     if (Value *V2 = simplifyValueKnownNonZero(I->getOperand(0), IC, CxtI)) {
73       IC.replaceOperand(*I, 0, V2);
74       MadeChange = true;
75     }
76 
77     if (I->getOpcode() == Instruction::LShr && !I->isExact()) {
78       I->setIsExact();
79       MadeChange = true;
80     }
81 
82     if (I->getOpcode() == Instruction::Shl && !I->hasNoUnsignedWrap()) {
83       I->setHasNoUnsignedWrap();
84       MadeChange = true;
85     }
86   }
87 
88   // TODO: Lots more we could do here:
89   //    If V is a phi node, we can call this on each of its operands.
90   //    "select cond, X, 0" can simplify to "X".
91 
92   return MadeChange ? V : nullptr;
93 }
94 
95 // TODO: This is a specific form of a much more general pattern.
96 //       We could detect a select with any binop identity constant, or we
97 //       could use SimplifyBinOp to see if either arm of the select reduces.
98 //       But that needs to be done carefully and/or while removing potential
99 //       reverse canonicalizations as in InstCombiner::foldSelectIntoOp().
foldMulSelectToNegate(BinaryOperator & I,InstCombiner::BuilderTy & Builder)100 static Value *foldMulSelectToNegate(BinaryOperator &I,
101                                     InstCombiner::BuilderTy &Builder) {
102   Value *Cond, *OtherOp;
103 
104   // mul (select Cond, 1, -1), OtherOp --> select Cond, OtherOp, -OtherOp
105   // mul OtherOp, (select Cond, 1, -1) --> select Cond, OtherOp, -OtherOp
106   if (match(&I, m_c_Mul(m_OneUse(m_Select(m_Value(Cond), m_One(), m_AllOnes())),
107                         m_Value(OtherOp)))) {
108     bool HasAnyNoWrap = I.hasNoSignedWrap() || I.hasNoUnsignedWrap();
109     Value *Neg = Builder.CreateNeg(OtherOp, "", HasAnyNoWrap);
110     return Builder.CreateSelect(Cond, OtherOp, Neg);
111   }
112   // mul (select Cond, -1, 1), OtherOp --> select Cond, -OtherOp, OtherOp
113   // mul OtherOp, (select Cond, -1, 1) --> select Cond, -OtherOp, OtherOp
114   if (match(&I, m_c_Mul(m_OneUse(m_Select(m_Value(Cond), m_AllOnes(), m_One())),
115                         m_Value(OtherOp)))) {
116     bool HasAnyNoWrap = I.hasNoSignedWrap() || I.hasNoUnsignedWrap();
117     Value *Neg = Builder.CreateNeg(OtherOp, "", HasAnyNoWrap);
118     return Builder.CreateSelect(Cond, Neg, OtherOp);
119   }
120 
121   // fmul (select Cond, 1.0, -1.0), OtherOp --> select Cond, OtherOp, -OtherOp
122   // fmul OtherOp, (select Cond, 1.0, -1.0) --> select Cond, OtherOp, -OtherOp
123   if (match(&I, m_c_FMul(m_OneUse(m_Select(m_Value(Cond), m_SpecificFP(1.0),
124                                            m_SpecificFP(-1.0))),
125                          m_Value(OtherOp))))
126     return Builder.CreateSelectFMF(Cond, OtherOp,
127                                    Builder.CreateFNegFMF(OtherOp, &I), &I);
128 
129   // fmul (select Cond, -1.0, 1.0), OtherOp --> select Cond, -OtherOp, OtherOp
130   // fmul OtherOp, (select Cond, -1.0, 1.0) --> select Cond, -OtherOp, OtherOp
131   if (match(&I, m_c_FMul(m_OneUse(m_Select(m_Value(Cond), m_SpecificFP(-1.0),
132                                            m_SpecificFP(1.0))),
133                          m_Value(OtherOp))))
134     return Builder.CreateSelectFMF(Cond, Builder.CreateFNegFMF(OtherOp, &I),
135                                    OtherOp, &I);
136 
137   return nullptr;
138 }
139 
140 /// Reduce integer multiplication patterns that contain a (+/-1 << Z) factor.
141 /// Callers are expected to call this twice to handle commuted patterns.
foldMulShl1(BinaryOperator & Mul,bool CommuteOperands,InstCombiner::BuilderTy & Builder)142 static Value *foldMulShl1(BinaryOperator &Mul, bool CommuteOperands,
143                           InstCombiner::BuilderTy &Builder) {
144   Value *X = Mul.getOperand(0), *Y = Mul.getOperand(1);
145   if (CommuteOperands)
146     std::swap(X, Y);
147 
148   const bool HasNSW = Mul.hasNoSignedWrap();
149   const bool HasNUW = Mul.hasNoUnsignedWrap();
150 
151   // X * (1 << Z) --> X << Z
152   Value *Z;
153   if (match(Y, m_Shl(m_One(), m_Value(Z)))) {
154     bool PropagateNSW = HasNSW && cast<ShlOperator>(Y)->hasNoSignedWrap();
155     return Builder.CreateShl(X, Z, Mul.getName(), HasNUW, PropagateNSW);
156   }
157 
158   // Similar to above, but an increment of the shifted value becomes an add:
159   // X * ((1 << Z) + 1) --> (X * (1 << Z)) + X --> (X << Z) + X
160   // This increases uses of X, so it may require a freeze, but that is still
161   // expected to be an improvement because it removes the multiply.
162   BinaryOperator *Shift;
163   if (match(Y, m_OneUse(m_Add(m_BinOp(Shift), m_One()))) &&
164       match(Shift, m_OneUse(m_Shl(m_One(), m_Value(Z))))) {
165     bool PropagateNSW = HasNSW && Shift->hasNoSignedWrap();
166     Value *FrX = X;
167     if (!isGuaranteedNotToBeUndef(X))
168       FrX = Builder.CreateFreeze(X, X->getName() + ".fr");
169     Value *Shl = Builder.CreateShl(FrX, Z, "mulshl", HasNUW, PropagateNSW);
170     return Builder.CreateAdd(Shl, FrX, Mul.getName(), HasNUW, PropagateNSW);
171   }
172 
173   // Similar to above, but a decrement of the shifted value is disguised as
174   // 'not' and becomes a sub:
175   // X * (~(-1 << Z)) --> X * ((1 << Z) - 1) --> (X << Z) - X
176   // This increases uses of X, so it may require a freeze, but that is still
177   // expected to be an improvement because it removes the multiply.
178   if (match(Y, m_OneUse(m_Not(m_OneUse(m_Shl(m_AllOnes(), m_Value(Z))))))) {
179     Value *FrX = X;
180     if (!isGuaranteedNotToBeUndef(X))
181       FrX = Builder.CreateFreeze(X, X->getName() + ".fr");
182     Value *Shl = Builder.CreateShl(FrX, Z, "mulshl");
183     return Builder.CreateSub(Shl, FrX, Mul.getName());
184   }
185 
186   return nullptr;
187 }
188 
visitMul(BinaryOperator & I)189 Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) {
190   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
191   if (Value *V =
192           simplifyMulInst(Op0, Op1, I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),
193                           SQ.getWithInstruction(&I)))
194     return replaceInstUsesWith(I, V);
195 
196   if (SimplifyAssociativeOrCommutative(I))
197     return &I;
198 
199   if (Instruction *X = foldVectorBinop(I))
200     return X;
201 
202   if (Instruction *Phi = foldBinopWithPhiOperands(I))
203     return Phi;
204 
205   if (Value *V = foldUsingDistributiveLaws(I))
206     return replaceInstUsesWith(I, V);
207 
208   Type *Ty = I.getType();
209   const unsigned BitWidth = Ty->getScalarSizeInBits();
210   const bool HasNSW = I.hasNoSignedWrap();
211   const bool HasNUW = I.hasNoUnsignedWrap();
212 
213   // X * -1 --> 0 - X
214   if (match(Op1, m_AllOnes())) {
215     return HasNSW ? BinaryOperator::CreateNSWNeg(Op0)
216                   : BinaryOperator::CreateNeg(Op0);
217   }
218 
219   // Also allow combining multiply instructions on vectors.
220   {
221     Value *NewOp;
222     Constant *C1, *C2;
223     const APInt *IVal;
224     if (match(&I, m_Mul(m_Shl(m_Value(NewOp), m_ImmConstant(C2)),
225                         m_ImmConstant(C1))) &&
226         match(C1, m_APInt(IVal))) {
227       // ((X << C2)*C1) == (X * (C1 << C2))
228       Constant *Shl =
229           ConstantFoldBinaryOpOperands(Instruction::Shl, C1, C2, DL);
230       assert(Shl && "Constant folding of immediate constants failed");
231       BinaryOperator *Mul = cast<BinaryOperator>(I.getOperand(0));
232       BinaryOperator *BO = BinaryOperator::CreateMul(NewOp, Shl);
233       if (HasNUW && Mul->hasNoUnsignedWrap())
234         BO->setHasNoUnsignedWrap();
235       if (HasNSW && Mul->hasNoSignedWrap() && Shl->isNotMinSignedValue())
236         BO->setHasNoSignedWrap();
237       return BO;
238     }
239 
240     if (match(&I, m_Mul(m_Value(NewOp), m_Constant(C1)))) {
241       // Replace X*(2^C) with X << C, where C is either a scalar or a vector.
242       if (Constant *NewCst = ConstantExpr::getExactLogBase2(C1)) {
243         BinaryOperator *Shl = BinaryOperator::CreateShl(NewOp, NewCst);
244 
245         if (HasNUW)
246           Shl->setHasNoUnsignedWrap();
247         if (HasNSW) {
248           const APInt *V;
249           if (match(NewCst, m_APInt(V)) && *V != V->getBitWidth() - 1)
250             Shl->setHasNoSignedWrap();
251         }
252 
253         return Shl;
254       }
255     }
256   }
257 
258   // mul (shr exact X, N), (2^N + 1) -> add (X, shr exact (X, N))
259   {
260     Value *NewOp;
261     const APInt *ShiftC;
262     const APInt *MulAP;
263     if (BitWidth > 2 &&
264         match(&I, m_Mul(m_Exact(m_Shr(m_Value(NewOp), m_APInt(ShiftC))),
265                         m_APInt(MulAP))) &&
266         (*MulAP - 1).isPowerOf2() && *ShiftC == MulAP->logBase2()) {
267       Value *BinOp = Op0;
268       BinaryOperator *OpBO = cast<BinaryOperator>(Op0);
269 
270       // mul nuw (ashr exact X, N) -> add nuw (X, lshr exact (X, N))
271       if (HasNUW && OpBO->getOpcode() == Instruction::AShr && OpBO->hasOneUse())
272         BinOp = Builder.CreateLShr(NewOp, ConstantInt::get(Ty, *ShiftC), "",
273                                    /*isExact=*/true);
274 
275       auto *NewAdd = BinaryOperator::CreateAdd(NewOp, BinOp);
276       if (HasNSW && (HasNUW || OpBO->getOpcode() == Instruction::LShr ||
277                      ShiftC->getZExtValue() < BitWidth - 1))
278         NewAdd->setHasNoSignedWrap(true);
279 
280       NewAdd->setHasNoUnsignedWrap(HasNUW);
281       return NewAdd;
282     }
283   }
284 
285   if (Op0->hasOneUse() && match(Op1, m_NegatedPower2())) {
286     // Interpret  X * (-1<<C)  as  (-X) * (1<<C)  and try to sink the negation.
287     // The "* (1<<C)" thus becomes a potential shifting opportunity.
288     if (Value *NegOp0 =
289             Negator::Negate(/*IsNegation*/ true, HasNSW, Op0, *this)) {
290       auto *Op1C = cast<Constant>(Op1);
291       return replaceInstUsesWith(
292           I, Builder.CreateMul(NegOp0, ConstantExpr::getNeg(Op1C), "",
293                                /*HasNUW=*/false,
294                                HasNSW && Op1C->isNotMinSignedValue()));
295     }
296 
297     // Try to convert multiply of extended operand to narrow negate and shift
298     // for better analysis.
299     // This is valid if the shift amount (trailing zeros in the multiplier
300     // constant) clears more high bits than the bitwidth difference between
301     // source and destination types:
302     // ({z/s}ext X) * (-1<<C) --> (zext (-X)) << C
303     const APInt *NegPow2C;
304     Value *X;
305     if (match(Op0, m_ZExtOrSExt(m_Value(X))) &&
306         match(Op1, m_APIntAllowPoison(NegPow2C))) {
307       unsigned SrcWidth = X->getType()->getScalarSizeInBits();
308       unsigned ShiftAmt = NegPow2C->countr_zero();
309       if (ShiftAmt >= BitWidth - SrcWidth) {
310         Value *N = Builder.CreateNeg(X, X->getName() + ".neg");
311         Value *Z = Builder.CreateZExt(N, Ty, N->getName() + ".z");
312         return BinaryOperator::CreateShl(Z, ConstantInt::get(Ty, ShiftAmt));
313       }
314     }
315   }
316 
317   if (Instruction *FoldedMul = foldBinOpIntoSelectOrPhi(I))
318     return FoldedMul;
319 
320   if (Value *FoldedMul = foldMulSelectToNegate(I, Builder))
321     return replaceInstUsesWith(I, FoldedMul);
322 
323   // Simplify mul instructions with a constant RHS.
324   Constant *MulC;
325   if (match(Op1, m_ImmConstant(MulC))) {
326     // Canonicalize (X+C1)*MulC -> X*MulC+C1*MulC.
327     // Canonicalize (X|C1)*MulC -> X*MulC+C1*MulC.
328     Value *X;
329     Constant *C1;
330     if (match(Op0, m_OneUse(m_AddLike(m_Value(X), m_ImmConstant(C1))))) {
331       // C1*MulC simplifies to a tidier constant.
332       Value *NewC = Builder.CreateMul(C1, MulC);
333       auto *BOp0 = cast<BinaryOperator>(Op0);
334       bool Op0NUW =
335           (BOp0->getOpcode() == Instruction::Or || BOp0->hasNoUnsignedWrap());
336       Value *NewMul = Builder.CreateMul(X, MulC);
337       auto *BO = BinaryOperator::CreateAdd(NewMul, NewC);
338       if (HasNUW && Op0NUW) {
339         // If NewMulBO is constant we also can set BO to nuw.
340         if (auto *NewMulBO = dyn_cast<BinaryOperator>(NewMul))
341           NewMulBO->setHasNoUnsignedWrap();
342         BO->setHasNoUnsignedWrap();
343       }
344       return BO;
345     }
346   }
347 
348   // abs(X) * abs(X) -> X * X
349   Value *X;
350   if (Op0 == Op1 && match(Op0, m_Intrinsic<Intrinsic::abs>(m_Value(X))))
351     return BinaryOperator::CreateMul(X, X);
352 
353   {
354     Value *Y;
355     // abs(X) * abs(Y) -> abs(X * Y)
356     if (I.hasNoSignedWrap() &&
357         match(Op0,
358               m_OneUse(m_Intrinsic<Intrinsic::abs>(m_Value(X), m_One()))) &&
359         match(Op1, m_OneUse(m_Intrinsic<Intrinsic::abs>(m_Value(Y), m_One()))))
360       return replaceInstUsesWith(
361           I, Builder.CreateBinaryIntrinsic(Intrinsic::abs,
362                                            Builder.CreateNSWMul(X, Y),
363                                            Builder.getTrue()));
364   }
365 
366   // -X * C --> X * -C
367   Value *Y;
368   Constant *Op1C;
369   if (match(Op0, m_Neg(m_Value(X))) && match(Op1, m_Constant(Op1C)))
370     return BinaryOperator::CreateMul(X, ConstantExpr::getNeg(Op1C));
371 
372   // -X * -Y --> X * Y
373   if (match(Op0, m_Neg(m_Value(X))) && match(Op1, m_Neg(m_Value(Y)))) {
374     auto *NewMul = BinaryOperator::CreateMul(X, Y);
375     if (HasNSW && cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap() &&
376         cast<OverflowingBinaryOperator>(Op1)->hasNoSignedWrap())
377       NewMul->setHasNoSignedWrap();
378     return NewMul;
379   }
380 
381   // -X * Y --> -(X * Y)
382   // X * -Y --> -(X * Y)
383   if (match(&I, m_c_Mul(m_OneUse(m_Neg(m_Value(X))), m_Value(Y))))
384     return BinaryOperator::CreateNeg(Builder.CreateMul(X, Y));
385 
386   // (-X * Y) * -X --> (X * Y) * X
387   // (-X << Y) * -X --> (X << Y) * X
388   if (match(Op1, m_Neg(m_Value(X)))) {
389     if (Value *NegOp0 = Negator::Negate(false, /*IsNSW*/ false, Op0, *this))
390       return BinaryOperator::CreateMul(NegOp0, X);
391   }
392 
393   if (Op0->hasOneUse()) {
394     // (mul (div exact X, C0), C1)
395     //    -> (div exact X, C0 / C1)
396     // iff C0 % C1 == 0 and X / (C0 / C1) doesn't create UB.
397     const APInt *C1;
398     auto UDivCheck = [&C1](const APInt &C) { return C.urem(*C1).isZero(); };
399     auto SDivCheck = [&C1](const APInt &C) {
400       APInt Quot, Rem;
401       APInt::sdivrem(C, *C1, Quot, Rem);
402       return Rem.isZero() && !Quot.isAllOnes();
403     };
404     if (match(Op1, m_APInt(C1)) &&
405         (match(Op0, m_Exact(m_UDiv(m_Value(X), m_CheckedInt(UDivCheck)))) ||
406          match(Op0, m_Exact(m_SDiv(m_Value(X), m_CheckedInt(SDivCheck)))))) {
407       auto BOpc = cast<BinaryOperator>(Op0)->getOpcode();
408       return BinaryOperator::CreateExact(
409           BOpc, X,
410           Builder.CreateBinOp(BOpc, cast<BinaryOperator>(Op0)->getOperand(1),
411                               Op1));
412     }
413   }
414 
415   // (X / Y) *  Y = X - (X % Y)
416   // (X / Y) * -Y = (X % Y) - X
417   {
418     Value *Y = Op1;
419     BinaryOperator *Div = dyn_cast<BinaryOperator>(Op0);
420     if (!Div || (Div->getOpcode() != Instruction::UDiv &&
421                  Div->getOpcode() != Instruction::SDiv)) {
422       Y = Op0;
423       Div = dyn_cast<BinaryOperator>(Op1);
424     }
425     Value *Neg = dyn_castNegVal(Y);
426     if (Div && Div->hasOneUse() &&
427         (Div->getOperand(1) == Y || Div->getOperand(1) == Neg) &&
428         (Div->getOpcode() == Instruction::UDiv ||
429          Div->getOpcode() == Instruction::SDiv)) {
430       Value *X = Div->getOperand(0), *DivOp1 = Div->getOperand(1);
431 
432       // If the division is exact, X % Y is zero, so we end up with X or -X.
433       if (Div->isExact()) {
434         if (DivOp1 == Y)
435           return replaceInstUsesWith(I, X);
436         return BinaryOperator::CreateNeg(X);
437       }
438 
439       auto RemOpc = Div->getOpcode() == Instruction::UDiv ? Instruction::URem
440                                                           : Instruction::SRem;
441       // X must be frozen because we are increasing its number of uses.
442       Value *XFreeze = X;
443       if (!isGuaranteedNotToBeUndef(X))
444         XFreeze = Builder.CreateFreeze(X, X->getName() + ".fr");
445       Value *Rem = Builder.CreateBinOp(RemOpc, XFreeze, DivOp1);
446       if (DivOp1 == Y)
447         return BinaryOperator::CreateSub(XFreeze, Rem);
448       return BinaryOperator::CreateSub(Rem, XFreeze);
449     }
450   }
451 
452   // Fold the following two scenarios:
453   //   1) i1 mul -> i1 and.
454   //   2) X * Y --> X & Y, iff X, Y can be only {0,1}.
455   // Note: We could use known bits to generalize this and related patterns with
456   // shifts/truncs
457   if (Ty->isIntOrIntVectorTy(1) ||
458       (match(Op0, m_And(m_Value(), m_One())) &&
459        match(Op1, m_And(m_Value(), m_One()))))
460     return BinaryOperator::CreateAnd(Op0, Op1);
461 
462   if (Value *R = foldMulShl1(I, /* CommuteOperands */ false, Builder))
463     return replaceInstUsesWith(I, R);
464   if (Value *R = foldMulShl1(I, /* CommuteOperands */ true, Builder))
465     return replaceInstUsesWith(I, R);
466 
467   // (zext bool X) * (zext bool Y) --> zext (and X, Y)
468   // (sext bool X) * (sext bool Y) --> zext (and X, Y)
469   // Note: -1 * -1 == 1 * 1 == 1 (if the extends match, the result is the same)
470   if (((match(Op0, m_ZExt(m_Value(X))) && match(Op1, m_ZExt(m_Value(Y)))) ||
471        (match(Op0, m_SExt(m_Value(X))) && match(Op1, m_SExt(m_Value(Y))))) &&
472       X->getType()->isIntOrIntVectorTy(1) && X->getType() == Y->getType() &&
473       (Op0->hasOneUse() || Op1->hasOneUse() || X == Y)) {
474     Value *And = Builder.CreateAnd(X, Y, "mulbool");
475     return CastInst::Create(Instruction::ZExt, And, Ty);
476   }
477   // (sext bool X) * (zext bool Y) --> sext (and X, Y)
478   // (zext bool X) * (sext bool Y) --> sext (and X, Y)
479   // Note: -1 * 1 == 1 * -1  == -1
480   if (((match(Op0, m_SExt(m_Value(X))) && match(Op1, m_ZExt(m_Value(Y)))) ||
481        (match(Op0, m_ZExt(m_Value(X))) && match(Op1, m_SExt(m_Value(Y))))) &&
482       X->getType()->isIntOrIntVectorTy(1) && X->getType() == Y->getType() &&
483       (Op0->hasOneUse() || Op1->hasOneUse())) {
484     Value *And = Builder.CreateAnd(X, Y, "mulbool");
485     return CastInst::Create(Instruction::SExt, And, Ty);
486   }
487 
488   // (zext bool X) * Y --> X ? Y : 0
489   // Y * (zext bool X) --> X ? Y : 0
490   if (match(Op0, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1))
491     return SelectInst::Create(X, Op1, ConstantInt::getNullValue(Ty));
492   if (match(Op1, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1))
493     return SelectInst::Create(X, Op0, ConstantInt::getNullValue(Ty));
494 
495   // mul (sext X), Y -> select X, -Y, 0
496   // mul Y, (sext X) -> select X, -Y, 0
497   if (match(&I, m_c_Mul(m_OneUse(m_SExt(m_Value(X))), m_Value(Y))) &&
498       X->getType()->isIntOrIntVectorTy(1))
499     return SelectInst::Create(X, Builder.CreateNeg(Y, "", I.hasNoSignedWrap()),
500                               ConstantInt::getNullValue(Op0->getType()));
501 
502   Constant *ImmC;
503   if (match(Op1, m_ImmConstant(ImmC))) {
504     // (sext bool X) * C --> X ? -C : 0
505     if (match(Op0, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) {
506       Constant *NegC = ConstantExpr::getNeg(ImmC);
507       return SelectInst::Create(X, NegC, ConstantInt::getNullValue(Ty));
508     }
509 
510     // (ashr i32 X, 31) * C --> (X < 0) ? -C : 0
511     const APInt *C;
512     if (match(Op0, m_OneUse(m_AShr(m_Value(X), m_APInt(C)))) &&
513         *C == C->getBitWidth() - 1) {
514       Constant *NegC = ConstantExpr::getNeg(ImmC);
515       Value *IsNeg = Builder.CreateIsNeg(X, "isneg");
516       return SelectInst::Create(IsNeg, NegC, ConstantInt::getNullValue(Ty));
517     }
518   }
519 
520   // (lshr X, 31) * Y --> (X < 0) ? Y : 0
521   // TODO: We are not checking one-use because the elimination of the multiply
522   //       is better for analysis?
523   const APInt *C;
524   if (match(&I, m_c_BinOp(m_LShr(m_Value(X), m_APInt(C)), m_Value(Y))) &&
525       *C == C->getBitWidth() - 1) {
526     Value *IsNeg = Builder.CreateIsNeg(X, "isneg");
527     return SelectInst::Create(IsNeg, Y, ConstantInt::getNullValue(Ty));
528   }
529 
530   // (and X, 1) * Y --> (trunc X) ? Y : 0
531   if (match(&I, m_c_BinOp(m_OneUse(m_And(m_Value(X), m_One())), m_Value(Y)))) {
532     Value *Tr = Builder.CreateTrunc(X, CmpInst::makeCmpResultType(Ty));
533     return SelectInst::Create(Tr, Y, ConstantInt::getNullValue(Ty));
534   }
535 
536   // ((ashr X, 31) | 1) * X --> abs(X)
537   // X * ((ashr X, 31) | 1) --> abs(X)
538   if (match(&I, m_c_BinOp(m_Or(m_AShr(m_Value(X),
539                                       m_SpecificIntAllowPoison(BitWidth - 1)),
540                                m_One()),
541                           m_Deferred(X)))) {
542     Value *Abs = Builder.CreateBinaryIntrinsic(
543         Intrinsic::abs, X, ConstantInt::getBool(I.getContext(), HasNSW));
544     Abs->takeName(&I);
545     return replaceInstUsesWith(I, Abs);
546   }
547 
548   if (Instruction *Ext = narrowMathIfNoOverflow(I))
549     return Ext;
550 
551   if (Instruction *Res = foldBinOpOfSelectAndCastOfSelectCondition(I))
552     return Res;
553 
554   // (mul Op0 Op1):
555   //    if Log2(Op0) folds away ->
556   //        (shl Op1, Log2(Op0))
557   //    if Log2(Op1) folds away ->
558   //        (shl Op0, Log2(Op1))
559   if (Value *Res = tryGetLog2(Op0, /*AssumeNonZero=*/false)) {
560     BinaryOperator *Shl = BinaryOperator::CreateShl(Op1, Res);
561     // We can only propegate nuw flag.
562     Shl->setHasNoUnsignedWrap(HasNUW);
563     return Shl;
564   }
565   if (Value *Res = tryGetLog2(Op1, /*AssumeNonZero=*/false)) {
566     BinaryOperator *Shl = BinaryOperator::CreateShl(Op0, Res);
567     // We can only propegate nuw flag.
568     Shl->setHasNoUnsignedWrap(HasNUW);
569     return Shl;
570   }
571 
572   bool Changed = false;
573   if (!HasNSW && willNotOverflowSignedMul(Op0, Op1, I)) {
574     Changed = true;
575     I.setHasNoSignedWrap(true);
576   }
577 
578   if (!HasNUW && willNotOverflowUnsignedMul(Op0, Op1, I, I.hasNoSignedWrap())) {
579     Changed = true;
580     I.setHasNoUnsignedWrap(true);
581   }
582 
583   return Changed ? &I : nullptr;
584 }
585 
foldFPSignBitOps(BinaryOperator & I)586 Instruction *InstCombinerImpl::foldFPSignBitOps(BinaryOperator &I) {
587   BinaryOperator::BinaryOps Opcode = I.getOpcode();
588   assert((Opcode == Instruction::FMul || Opcode == Instruction::FDiv) &&
589          "Expected fmul or fdiv");
590 
591   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
592   Value *X, *Y;
593 
594   // -X * -Y --> X * Y
595   // -X / -Y --> X / Y
596   if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_FNeg(m_Value(Y))))
597     return BinaryOperator::CreateWithCopiedFlags(Opcode, X, Y, &I);
598 
599   // fabs(X) * fabs(X) -> X * X
600   // fabs(X) / fabs(X) -> X / X
601   if (Op0 == Op1 && match(Op0, m_FAbs(m_Value(X))))
602     return BinaryOperator::CreateWithCopiedFlags(Opcode, X, X, &I);
603 
604   // fabs(X) * fabs(Y) --> fabs(X * Y)
605   // fabs(X) / fabs(Y) --> fabs(X / Y)
606   if (match(Op0, m_FAbs(m_Value(X))) && match(Op1, m_FAbs(m_Value(Y))) &&
607       (Op0->hasOneUse() || Op1->hasOneUse())) {
608     Value *XY = Builder.CreateBinOpFMF(Opcode, X, Y, &I);
609     Value *Fabs =
610         Builder.CreateUnaryIntrinsic(Intrinsic::fabs, XY, &I, I.getName());
611     return replaceInstUsesWith(I, Fabs);
612   }
613 
614   return nullptr;
615 }
616 
foldPowiReassoc(BinaryOperator & I)617 Instruction *InstCombinerImpl::foldPowiReassoc(BinaryOperator &I) {
618   auto createPowiExpr = [](BinaryOperator &I, InstCombinerImpl &IC, Value *X,
619                            Value *Y, Value *Z) {
620     InstCombiner::BuilderTy &Builder = IC.Builder;
621     Value *YZ = Builder.CreateAdd(Y, Z);
622     Instruction *NewPow = Builder.CreateIntrinsic(
623         Intrinsic::powi, {X->getType(), YZ->getType()}, {X, YZ}, &I);
624 
625     return NewPow;
626   };
627 
628   Value *X, *Y, *Z;
629   unsigned Opcode = I.getOpcode();
630   assert((Opcode == Instruction::FMul || Opcode == Instruction::FDiv) &&
631          "Unexpected opcode");
632 
633   // powi(X, Y) * X --> powi(X, Y+1)
634   // X * powi(X, Y) --> powi(X, Y+1)
635   if (match(&I, m_c_FMul(m_OneUse(m_AllowReassoc(m_Intrinsic<Intrinsic::powi>(
636                              m_Value(X), m_Value(Y)))),
637                          m_Deferred(X)))) {
638     Constant *One = ConstantInt::get(Y->getType(), 1);
639     if (willNotOverflowSignedAdd(Y, One, I)) {
640       Instruction *NewPow = createPowiExpr(I, *this, X, Y, One);
641       return replaceInstUsesWith(I, NewPow);
642     }
643   }
644 
645   // powi(x, y) * powi(x, z) -> powi(x, y + z)
646   Value *Op0 = I.getOperand(0);
647   Value *Op1 = I.getOperand(1);
648   if (Opcode == Instruction::FMul && I.isOnlyUserOfAnyOperand() &&
649       match(Op0, m_AllowReassoc(
650                      m_Intrinsic<Intrinsic::powi>(m_Value(X), m_Value(Y)))) &&
651       match(Op1, m_AllowReassoc(m_Intrinsic<Intrinsic::powi>(m_Specific(X),
652                                                              m_Value(Z)))) &&
653       Y->getType() == Z->getType()) {
654     Instruction *NewPow = createPowiExpr(I, *this, X, Y, Z);
655     return replaceInstUsesWith(I, NewPow);
656   }
657 
658   if (Opcode == Instruction::FDiv && I.hasAllowReassoc() && I.hasNoNaNs()) {
659     // powi(X, Y) / X --> powi(X, Y-1)
660     // This is legal when (Y - 1) can't wraparound, in which case reassoc and
661     // nnan are required.
662     // TODO: Multi-use may be also better off creating Powi(x,y-1)
663     if (match(Op0, m_OneUse(m_AllowReassoc(m_Intrinsic<Intrinsic::powi>(
664                        m_Specific(Op1), m_Value(Y))))) &&
665         willNotOverflowSignedSub(Y, ConstantInt::get(Y->getType(), 1), I)) {
666       Constant *NegOne = ConstantInt::getAllOnesValue(Y->getType());
667       Instruction *NewPow = createPowiExpr(I, *this, Op1, Y, NegOne);
668       return replaceInstUsesWith(I, NewPow);
669     }
670 
671     // powi(X, Y) / (X * Z) --> powi(X, Y-1) / Z
672     // This is legal when (Y - 1) can't wraparound, in which case reassoc and
673     // nnan are required.
674     // TODO: Multi-use may be also better off creating Powi(x,y-1)
675     if (match(Op0, m_OneUse(m_AllowReassoc(m_Intrinsic<Intrinsic::powi>(
676                        m_Value(X), m_Value(Y))))) &&
677         match(Op1, m_AllowReassoc(m_c_FMul(m_Specific(X), m_Value(Z)))) &&
678         willNotOverflowSignedSub(Y, ConstantInt::get(Y->getType(), 1), I)) {
679       Constant *NegOne = ConstantInt::getAllOnesValue(Y->getType());
680       auto *NewPow = createPowiExpr(I, *this, X, Y, NegOne);
681       return BinaryOperator::CreateFDivFMF(NewPow, Z, &I);
682     }
683   }
684 
685   return nullptr;
686 }
687 
688 // If we have the following pattern,
689 // X = 1.0/sqrt(a)
690 // R1 = X * X
691 // R2 = a/sqrt(a)
692 // then this method collects all the instructions that match R1 and R2.
getFSqrtDivOptPattern(Instruction * Div,SmallPtrSetImpl<Instruction * > & R1,SmallPtrSetImpl<Instruction * > & R2)693 static bool getFSqrtDivOptPattern(Instruction *Div,
694                                   SmallPtrSetImpl<Instruction *> &R1,
695                                   SmallPtrSetImpl<Instruction *> &R2) {
696   Value *A;
697   if (match(Div, m_FDiv(m_FPOne(), m_Sqrt(m_Value(A)))) ||
698       match(Div, m_FDiv(m_SpecificFP(-1.0), m_Sqrt(m_Value(A))))) {
699     for (User *U : Div->users()) {
700       Instruction *I = cast<Instruction>(U);
701       if (match(I, m_FMul(m_Specific(Div), m_Specific(Div))))
702         R1.insert(I);
703     }
704 
705     CallInst *CI = cast<CallInst>(Div->getOperand(1));
706     for (User *U : CI->users()) {
707       Instruction *I = cast<Instruction>(U);
708       if (match(I, m_FDiv(m_Specific(A), m_Sqrt(m_Specific(A)))))
709         R2.insert(I);
710     }
711   }
712   return !R1.empty() && !R2.empty();
713 }
714 
715 // Check legality for transforming
716 // x = 1.0/sqrt(a)
717 // r1 = x * x;
718 // r2 = a/sqrt(a);
719 //
720 // TO
721 //
722 // r1 = 1/a
723 // r2 = sqrt(a)
724 // x = r1 * r2
725 // This transform works only when 'a' is known positive.
isFSqrtDivToFMulLegal(Instruction * X,SmallPtrSetImpl<Instruction * > & R1,SmallPtrSetImpl<Instruction * > & R2)726 static bool isFSqrtDivToFMulLegal(Instruction *X,
727                                   SmallPtrSetImpl<Instruction *> &R1,
728                                   SmallPtrSetImpl<Instruction *> &R2) {
729   // Check if the required pattern for the transformation exists.
730   if (!getFSqrtDivOptPattern(X, R1, R2))
731     return false;
732 
733   BasicBlock *BBx = X->getParent();
734   BasicBlock *BBr1 = (*R1.begin())->getParent();
735   BasicBlock *BBr2 = (*R2.begin())->getParent();
736 
737   CallInst *FSqrt = cast<CallInst>(X->getOperand(1));
738   if (!FSqrt->hasAllowReassoc() || !FSqrt->hasNoNaNs() ||
739       !FSqrt->hasNoSignedZeros() || !FSqrt->hasNoInfs())
740     return false;
741 
742   // We change x = 1/sqrt(a) to x = sqrt(a) * 1/a . This change isn't allowed
743   // by recip fp as it is strictly meant to transform ops of type a/b to
744   // a * 1/b. So, this can be considered as algebraic rewrite and reassoc flag
745   // has been used(rather abused)in the past for algebraic rewrites.
746   if (!X->hasAllowReassoc() || !X->hasAllowReciprocal() || !X->hasNoInfs())
747     return false;
748 
749   // Check the constraints on X, R1 and R2 combined.
750   // fdiv instruction and one of the multiplications must reside in the same
751   // block. If not, the optimized code may execute more ops than before and
752   // this may hamper the performance.
753   if (BBx != BBr1 && BBx != BBr2)
754     return false;
755 
756   // Check the constraints on instructions in R1.
757   if (any_of(R1, [BBr1](Instruction *I) {
758         // When you have multiple instructions residing in R1 and R2
759         // respectively, it's difficult to generate combinations of (R1,R2) and
760         // then check if we have the required pattern. So, for now, just be
761         // conservative.
762         return (I->getParent() != BBr1 || !I->hasAllowReassoc());
763       }))
764     return false;
765 
766   // Check the constraints on instructions in R2.
767   return all_of(R2, [BBr2](Instruction *I) {
768     // When you have multiple instructions residing in R1 and R2
769     // respectively, it's difficult to generate combination of (R1,R2) and
770     // then check if we have the required pattern. So, for now, just be
771     // conservative.
772     return (I->getParent() == BBr2 && I->hasAllowReassoc());
773   });
774 }
775 
foldFMulReassoc(BinaryOperator & I)776 Instruction *InstCombinerImpl::foldFMulReassoc(BinaryOperator &I) {
777   Value *Op0 = I.getOperand(0);
778   Value *Op1 = I.getOperand(1);
779   Value *X, *Y;
780   Constant *C;
781   BinaryOperator *Op0BinOp;
782 
783   // Reassociate constant RHS with another constant to form constant
784   // expression.
785   if (match(Op1, m_Constant(C)) && C->isFiniteNonZeroFP() &&
786       match(Op0, m_AllowReassoc(m_BinOp(Op0BinOp)))) {
787     // Everything in this scope folds I with Op0, intersecting their FMF.
788     FastMathFlags FMF = I.getFastMathFlags() & Op0BinOp->getFastMathFlags();
789     Constant *C1;
790     if (match(Op0, m_OneUse(m_FDiv(m_Constant(C1), m_Value(X))))) {
791       // (C1 / X) * C --> (C * C1) / X
792       Constant *CC1 =
793           ConstantFoldBinaryOpOperands(Instruction::FMul, C, C1, DL);
794       if (CC1 && CC1->isNormalFP())
795         return BinaryOperator::CreateFDivFMF(CC1, X, FMF);
796     }
797     if (match(Op0, m_FDiv(m_Value(X), m_Constant(C1)))) {
798       // FIXME: This seems like it should also be checking for arcp
799       // (X / C1) * C --> X * (C / C1)
800       Constant *CDivC1 =
801           ConstantFoldBinaryOpOperands(Instruction::FDiv, C, C1, DL);
802       if (CDivC1 && CDivC1->isNormalFP())
803         return BinaryOperator::CreateFMulFMF(X, CDivC1, FMF);
804 
805       // If the constant was a denormal, try reassociating differently.
806       // (X / C1) * C --> X / (C1 / C)
807       Constant *C1DivC =
808           ConstantFoldBinaryOpOperands(Instruction::FDiv, C1, C, DL);
809       if (C1DivC && Op0->hasOneUse() && C1DivC->isNormalFP())
810         return BinaryOperator::CreateFDivFMF(X, C1DivC, FMF);
811     }
812 
813     // We do not need to match 'fadd C, X' and 'fsub X, C' because they are
814     // canonicalized to 'fadd X, C'. Distributing the multiply may allow
815     // further folds and (X * C) + C2 is 'fma'.
816     if (match(Op0, m_OneUse(m_FAdd(m_Value(X), m_Constant(C1))))) {
817       // (X + C1) * C --> (X * C) + (C * C1)
818       if (Constant *CC1 =
819               ConstantFoldBinaryOpOperands(Instruction::FMul, C, C1, DL)) {
820         Value *XC = Builder.CreateFMulFMF(X, C, FMF);
821         return BinaryOperator::CreateFAddFMF(XC, CC1, FMF);
822       }
823     }
824     if (match(Op0, m_OneUse(m_FSub(m_Constant(C1), m_Value(X))))) {
825       // (C1 - X) * C --> (C * C1) - (X * C)
826       if (Constant *CC1 =
827               ConstantFoldBinaryOpOperands(Instruction::FMul, C, C1, DL)) {
828         Value *XC = Builder.CreateFMulFMF(X, C, FMF);
829         return BinaryOperator::CreateFSubFMF(CC1, XC, FMF);
830       }
831     }
832   }
833 
834   Value *Z;
835   if (match(&I,
836             m_c_FMul(m_AllowReassoc(m_OneUse(m_FDiv(m_Value(X), m_Value(Y)))),
837                      m_Value(Z)))) {
838     BinaryOperator *DivOp = cast<BinaryOperator>(((Z == Op0) ? Op1 : Op0));
839     FastMathFlags FMF = I.getFastMathFlags() & DivOp->getFastMathFlags();
840     if (FMF.allowReassoc()) {
841       // Sink division: (X / Y) * Z --> (X * Z) / Y
842       auto *NewFMul = Builder.CreateFMulFMF(X, Z, FMF);
843       return BinaryOperator::CreateFDivFMF(NewFMul, Y, FMF);
844     }
845   }
846 
847   // sqrt(X) * sqrt(Y) -> sqrt(X * Y)
848   // nnan disallows the possibility of returning a number if both operands are
849   // negative (in that case, we should return NaN).
850   if (I.hasNoNaNs() && match(Op0, m_OneUse(m_Sqrt(m_Value(X)))) &&
851       match(Op1, m_OneUse(m_Sqrt(m_Value(Y))))) {
852     Value *XY = Builder.CreateFMulFMF(X, Y, &I);
853     Value *Sqrt = Builder.CreateUnaryIntrinsic(Intrinsic::sqrt, XY, &I);
854     return replaceInstUsesWith(I, Sqrt);
855   }
856 
857   // The following transforms are done irrespective of the number of uses
858   // for the expression "1.0/sqrt(X)".
859   //  1) 1.0/sqrt(X) * X -> X/sqrt(X)
860   //  2) X * 1.0/sqrt(X) -> X/sqrt(X)
861   // We always expect the backend to reduce X/sqrt(X) to sqrt(X), if it
862   // has the necessary (reassoc) fast-math-flags.
863   if (I.hasNoSignedZeros() &&
864       match(Op0, (m_FDiv(m_SpecificFP(1.0), m_Value(Y)))) &&
865       match(Y, m_Sqrt(m_Value(X))) && Op1 == X)
866     return BinaryOperator::CreateFDivFMF(X, Y, &I);
867   if (I.hasNoSignedZeros() &&
868       match(Op1, (m_FDiv(m_SpecificFP(1.0), m_Value(Y)))) &&
869       match(Y, m_Sqrt(m_Value(X))) && Op0 == X)
870     return BinaryOperator::CreateFDivFMF(X, Y, &I);
871 
872   // Like the similar transform in instsimplify, this requires 'nsz' because
873   // sqrt(-0.0) = -0.0, and -0.0 * -0.0 does not simplify to -0.0.
874   if (I.hasNoNaNs() && I.hasNoSignedZeros() && Op0 == Op1 && Op0->hasNUses(2)) {
875     // Peek through fdiv to find squaring of square root:
876     // (X / sqrt(Y)) * (X / sqrt(Y)) --> (X * X) / Y
877     if (match(Op0, m_FDiv(m_Value(X), m_Sqrt(m_Value(Y))))) {
878       Value *XX = Builder.CreateFMulFMF(X, X, &I);
879       return BinaryOperator::CreateFDivFMF(XX, Y, &I);
880     }
881     // (sqrt(Y) / X) * (sqrt(Y) / X) --> Y / (X * X)
882     if (match(Op0, m_FDiv(m_Sqrt(m_Value(Y)), m_Value(X)))) {
883       Value *XX = Builder.CreateFMulFMF(X, X, &I);
884       return BinaryOperator::CreateFDivFMF(Y, XX, &I);
885     }
886   }
887 
888   // pow(X, Y) * X --> pow(X, Y+1)
889   // X * pow(X, Y) --> pow(X, Y+1)
890   if (match(&I, m_c_FMul(m_OneUse(m_Intrinsic<Intrinsic::pow>(m_Value(X),
891                                                               m_Value(Y))),
892                          m_Deferred(X)))) {
893     Value *Y1 = Builder.CreateFAddFMF(Y, ConstantFP::get(I.getType(), 1.0), &I);
894     Value *Pow = Builder.CreateBinaryIntrinsic(Intrinsic::pow, X, Y1, &I);
895     return replaceInstUsesWith(I, Pow);
896   }
897 
898   if (Instruction *FoldedPowi = foldPowiReassoc(I))
899     return FoldedPowi;
900 
901   if (I.isOnlyUserOfAnyOperand()) {
902     // pow(X, Y) * pow(X, Z) -> pow(X, Y + Z)
903     if (match(Op0, m_Intrinsic<Intrinsic::pow>(m_Value(X), m_Value(Y))) &&
904         match(Op1, m_Intrinsic<Intrinsic::pow>(m_Specific(X), m_Value(Z)))) {
905       auto *YZ = Builder.CreateFAddFMF(Y, Z, &I);
906       auto *NewPow = Builder.CreateBinaryIntrinsic(Intrinsic::pow, X, YZ, &I);
907       return replaceInstUsesWith(I, NewPow);
908     }
909     // pow(X, Y) * pow(Z, Y) -> pow(X * Z, Y)
910     if (match(Op0, m_Intrinsic<Intrinsic::pow>(m_Value(X), m_Value(Y))) &&
911         match(Op1, m_Intrinsic<Intrinsic::pow>(m_Value(Z), m_Specific(Y)))) {
912       auto *XZ = Builder.CreateFMulFMF(X, Z, &I);
913       auto *NewPow = Builder.CreateBinaryIntrinsic(Intrinsic::pow, XZ, Y, &I);
914       return replaceInstUsesWith(I, NewPow);
915     }
916 
917     // exp(X) * exp(Y) -> exp(X + Y)
918     if (match(Op0, m_Intrinsic<Intrinsic::exp>(m_Value(X))) &&
919         match(Op1, m_Intrinsic<Intrinsic::exp>(m_Value(Y)))) {
920       Value *XY = Builder.CreateFAddFMF(X, Y, &I);
921       Value *Exp = Builder.CreateUnaryIntrinsic(Intrinsic::exp, XY, &I);
922       return replaceInstUsesWith(I, Exp);
923     }
924 
925     // exp2(X) * exp2(Y) -> exp2(X + Y)
926     if (match(Op0, m_Intrinsic<Intrinsic::exp2>(m_Value(X))) &&
927         match(Op1, m_Intrinsic<Intrinsic::exp2>(m_Value(Y)))) {
928       Value *XY = Builder.CreateFAddFMF(X, Y, &I);
929       Value *Exp2 = Builder.CreateUnaryIntrinsic(Intrinsic::exp2, XY, &I);
930       return replaceInstUsesWith(I, Exp2);
931     }
932   }
933 
934   // (X*Y) * X => (X*X) * Y where Y != X
935   //  The purpose is two-fold:
936   //   1) to form a power expression (of X).
937   //   2) potentially shorten the critical path: After transformation, the
938   //  latency of the instruction Y is amortized by the expression of X*X,
939   //  and therefore Y is in a "less critical" position compared to what it
940   //  was before the transformation.
941   if (match(Op0, m_OneUse(m_c_FMul(m_Specific(Op1), m_Value(Y)))) && Op1 != Y) {
942     Value *XX = Builder.CreateFMulFMF(Op1, Op1, &I);
943     return BinaryOperator::CreateFMulFMF(XX, Y, &I);
944   }
945   if (match(Op1, m_OneUse(m_c_FMul(m_Specific(Op0), m_Value(Y)))) && Op0 != Y) {
946     Value *XX = Builder.CreateFMulFMF(Op0, Op0, &I);
947     return BinaryOperator::CreateFMulFMF(XX, Y, &I);
948   }
949 
950   return nullptr;
951 }
952 
visitFMul(BinaryOperator & I)953 Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) {
954   if (Value *V = simplifyFMulInst(I.getOperand(0), I.getOperand(1),
955                                   I.getFastMathFlags(),
956                                   SQ.getWithInstruction(&I)))
957     return replaceInstUsesWith(I, V);
958 
959   if (SimplifyAssociativeOrCommutative(I))
960     return &I;
961 
962   if (Instruction *X = foldVectorBinop(I))
963     return X;
964 
965   if (Instruction *Phi = foldBinopWithPhiOperands(I))
966     return Phi;
967 
968   if (Instruction *FoldedMul = foldBinOpIntoSelectOrPhi(I))
969     return FoldedMul;
970 
971   if (Value *FoldedMul = foldMulSelectToNegate(I, Builder))
972     return replaceInstUsesWith(I, FoldedMul);
973 
974   if (Instruction *R = foldFPSignBitOps(I))
975     return R;
976 
977   if (Instruction *R = foldFBinOpOfIntCasts(I))
978     return R;
979 
980   // X * -1.0 --> -X
981   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
982   if (match(Op1, m_SpecificFP(-1.0)))
983     return UnaryOperator::CreateFNegFMF(Op0, &I);
984 
985   // With no-nans/no-infs:
986   // X * 0.0 --> copysign(0.0, X)
987   // X * -0.0 --> copysign(0.0, -X)
988   const APFloat *FPC;
989   if (match(Op1, m_APFloatAllowPoison(FPC)) && FPC->isZero() &&
990       ((I.hasNoInfs() && isKnownNeverNaN(Op0, SQ.getWithInstruction(&I))) ||
991        isKnownNeverNaN(&I, SQ.getWithInstruction(&I)))) {
992     if (FPC->isNegative())
993       Op0 = Builder.CreateFNegFMF(Op0, &I);
994     CallInst *CopySign = Builder.CreateIntrinsic(Intrinsic::copysign,
995                                                  {I.getType()}, {Op1, Op0}, &I);
996     return replaceInstUsesWith(I, CopySign);
997   }
998 
999   // -X * C --> X * -C
1000   Value *X, *Y;
1001   Constant *C;
1002   if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_Constant(C)))
1003     if (Constant *NegC = ConstantFoldUnaryOpOperand(Instruction::FNeg, C, DL))
1004       return BinaryOperator::CreateFMulFMF(X, NegC, &I);
1005 
1006   if (I.hasNoNaNs() && I.hasNoSignedZeros()) {
1007     // (uitofp bool X) * Y --> X ? Y : 0
1008     // Y * (uitofp bool X) --> X ? Y : 0
1009     // Note INF * 0 is NaN.
1010     if (match(Op0, m_UIToFP(m_Value(X))) &&
1011         X->getType()->isIntOrIntVectorTy(1)) {
1012       auto *SI = SelectInst::Create(X, Op1, ConstantFP::get(I.getType(), 0.0));
1013       SI->copyFastMathFlags(I.getFastMathFlags());
1014       return SI;
1015     }
1016     if (match(Op1, m_UIToFP(m_Value(X))) &&
1017         X->getType()->isIntOrIntVectorTy(1)) {
1018       auto *SI = SelectInst::Create(X, Op0, ConstantFP::get(I.getType(), 0.0));
1019       SI->copyFastMathFlags(I.getFastMathFlags());
1020       return SI;
1021     }
1022   }
1023 
1024   // (select A, B, C) * (select A, D, E) --> select A, (B*D), (C*E)
1025   if (Value *V = SimplifySelectsFeedingBinaryOp(I, Op0, Op1))
1026     return replaceInstUsesWith(I, V);
1027 
1028   if (I.hasAllowReassoc())
1029     if (Instruction *FoldedMul = foldFMulReassoc(I))
1030       return FoldedMul;
1031 
1032   // log2(X * 0.5) * Y = log2(X) * Y - Y
1033   if (I.isFast()) {
1034     IntrinsicInst *Log2 = nullptr;
1035     if (match(Op0, m_OneUse(m_Intrinsic<Intrinsic::log2>(
1036             m_OneUse(m_FMul(m_Value(X), m_SpecificFP(0.5))))))) {
1037       Log2 = cast<IntrinsicInst>(Op0);
1038       Y = Op1;
1039     }
1040     if (match(Op1, m_OneUse(m_Intrinsic<Intrinsic::log2>(
1041             m_OneUse(m_FMul(m_Value(X), m_SpecificFP(0.5))))))) {
1042       Log2 = cast<IntrinsicInst>(Op1);
1043       Y = Op0;
1044     }
1045     if (Log2) {
1046       Value *Log2 = Builder.CreateUnaryIntrinsic(Intrinsic::log2, X, &I);
1047       Value *LogXTimesY = Builder.CreateFMulFMF(Log2, Y, &I);
1048       return BinaryOperator::CreateFSubFMF(LogXTimesY, Y, &I);
1049     }
1050   }
1051 
1052   // Simplify FMUL recurrences starting with 0.0 to 0.0 if nnan and nsz are set.
1053   // Given a phi node with entry value as 0 and it used in fmul operation,
1054   // we can replace fmul with 0 safely and eleminate loop operation.
1055   PHINode *PN = nullptr;
1056   Value *Start = nullptr, *Step = nullptr;
1057   if (matchSimpleRecurrence(&I, PN, Start, Step) && I.hasNoNaNs() &&
1058       I.hasNoSignedZeros() && match(Start, m_Zero()))
1059     return replaceInstUsesWith(I, Start);
1060 
1061   // minimum(X, Y) * maximum(X, Y) => X * Y.
1062   if (match(&I,
1063             m_c_FMul(m_Intrinsic<Intrinsic::maximum>(m_Value(X), m_Value(Y)),
1064                      m_c_Intrinsic<Intrinsic::minimum>(m_Deferred(X),
1065                                                        m_Deferred(Y))))) {
1066     BinaryOperator *Result = BinaryOperator::CreateFMulFMF(X, Y, &I);
1067     // We cannot preserve ninf if nnan flag is not set.
1068     // If X is NaN and Y is Inf then in original program we had NaN * NaN,
1069     // while in optimized version NaN * Inf and this is a poison with ninf flag.
1070     if (!Result->hasNoNaNs())
1071       Result->setHasNoInfs(false);
1072     return Result;
1073   }
1074 
1075   // tan(X) * cos(X) -> sin(X)
1076   if (I.hasAllowContract() &&
1077       match(&I,
1078             m_c_FMul(m_OneUse(m_Intrinsic<Intrinsic::tan>(m_Value(X))),
1079                      m_OneUse(m_Intrinsic<Intrinsic::cos>(m_Deferred(X)))))) {
1080     auto *Sin = Builder.CreateUnaryIntrinsic(Intrinsic::sin, X, &I);
1081     if (auto *Metadata = I.getMetadata(LLVMContext::MD_fpmath)) {
1082       Sin->setMetadata(LLVMContext::MD_fpmath, Metadata);
1083     }
1084     return replaceInstUsesWith(I, Sin);
1085   }
1086 
1087   return nullptr;
1088 }
1089 
1090 /// Fold a divide or remainder with a select instruction divisor when one of the
1091 /// select operands is zero. In that case, we can use the other select operand
1092 /// because div/rem by zero is undefined.
simplifyDivRemOfSelectWithZeroOp(BinaryOperator & I)1093 bool InstCombinerImpl::simplifyDivRemOfSelectWithZeroOp(BinaryOperator &I) {
1094   SelectInst *SI = dyn_cast<SelectInst>(I.getOperand(1));
1095   if (!SI)
1096     return false;
1097 
1098   int NonNullOperand;
1099   if (match(SI->getTrueValue(), m_Zero()))
1100     // div/rem X, (Cond ? 0 : Y) -> div/rem X, Y
1101     NonNullOperand = 2;
1102   else if (match(SI->getFalseValue(), m_Zero()))
1103     // div/rem X, (Cond ? Y : 0) -> div/rem X, Y
1104     NonNullOperand = 1;
1105   else
1106     return false;
1107 
1108   // Change the div/rem to use 'Y' instead of the select.
1109   replaceOperand(I, 1, SI->getOperand(NonNullOperand));
1110 
1111   // Okay, we know we replace the operand of the div/rem with 'Y' with no
1112   // problem.  However, the select, or the condition of the select may have
1113   // multiple uses.  Based on our knowledge that the operand must be non-zero,
1114   // propagate the known value for the select into other uses of it, and
1115   // propagate a known value of the condition into its other users.
1116 
1117   // If the select and condition only have a single use, don't bother with this,
1118   // early exit.
1119   Value *SelectCond = SI->getCondition();
1120   if (SI->use_empty() && SelectCond->hasOneUse())
1121     return true;
1122 
1123   // Scan the current block backward, looking for other uses of SI.
1124   BasicBlock::iterator BBI = I.getIterator(), BBFront = I.getParent()->begin();
1125   Type *CondTy = SelectCond->getType();
1126   while (BBI != BBFront) {
1127     --BBI;
1128     // If we found an instruction that we can't assume will return, so
1129     // information from below it cannot be propagated above it.
1130     if (!isGuaranteedToTransferExecutionToSuccessor(&*BBI))
1131       break;
1132 
1133     // Replace uses of the select or its condition with the known values.
1134     for (Use &Op : BBI->operands()) {
1135       if (Op == SI) {
1136         replaceUse(Op, SI->getOperand(NonNullOperand));
1137         Worklist.push(&*BBI);
1138       } else if (Op == SelectCond) {
1139         replaceUse(Op, NonNullOperand == 1 ? ConstantInt::getTrue(CondTy)
1140                                            : ConstantInt::getFalse(CondTy));
1141         Worklist.push(&*BBI);
1142       }
1143     }
1144 
1145     // If we past the instruction, quit looking for it.
1146     if (&*BBI == SI)
1147       SI = nullptr;
1148     if (&*BBI == SelectCond)
1149       SelectCond = nullptr;
1150 
1151     // If we ran out of things to eliminate, break out of the loop.
1152     if (!SelectCond && !SI)
1153       break;
1154 
1155   }
1156   return true;
1157 }
1158 
1159 /// True if the multiply can not be expressed in an int this size.
multiplyOverflows(const APInt & C1,const APInt & C2,APInt & Product,bool IsSigned)1160 static bool multiplyOverflows(const APInt &C1, const APInt &C2, APInt &Product,
1161                               bool IsSigned) {
1162   bool Overflow;
1163   Product = IsSigned ? C1.smul_ov(C2, Overflow) : C1.umul_ov(C2, Overflow);
1164   return Overflow;
1165 }
1166 
1167 /// True if C1 is a multiple of C2. Quotient contains C1/C2.
isMultiple(const APInt & C1,const APInt & C2,APInt & Quotient,bool IsSigned)1168 static bool isMultiple(const APInt &C1, const APInt &C2, APInt &Quotient,
1169                        bool IsSigned) {
1170   assert(C1.getBitWidth() == C2.getBitWidth() && "Constant widths not equal");
1171 
1172   // Bail if we will divide by zero.
1173   if (C2.isZero())
1174     return false;
1175 
1176   // Bail if we would divide INT_MIN by -1.
1177   if (IsSigned && C1.isMinSignedValue() && C2.isAllOnes())
1178     return false;
1179 
1180   APInt Remainder(C1.getBitWidth(), /*val=*/0ULL, IsSigned);
1181   if (IsSigned)
1182     APInt::sdivrem(C1, C2, Quotient, Remainder);
1183   else
1184     APInt::udivrem(C1, C2, Quotient, Remainder);
1185 
1186   return Remainder.isMinValue();
1187 }
1188 
foldIDivShl(BinaryOperator & I,InstCombiner::BuilderTy & Builder)1189 static Value *foldIDivShl(BinaryOperator &I, InstCombiner::BuilderTy &Builder) {
1190   assert((I.getOpcode() == Instruction::SDiv ||
1191           I.getOpcode() == Instruction::UDiv) &&
1192          "Expected integer divide");
1193 
1194   bool IsSigned = I.getOpcode() == Instruction::SDiv;
1195   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
1196   Type *Ty = I.getType();
1197 
1198   Value *X, *Y, *Z;
1199 
1200   // With appropriate no-wrap constraints, remove a common factor in the
1201   // dividend and divisor that is disguised as a left-shifted value.
1202   if (match(Op1, m_Shl(m_Value(X), m_Value(Z))) &&
1203       match(Op0, m_c_Mul(m_Specific(X), m_Value(Y)))) {
1204     // Both operands must have the matching no-wrap for this kind of division.
1205     auto *Mul = cast<OverflowingBinaryOperator>(Op0);
1206     auto *Shl = cast<OverflowingBinaryOperator>(Op1);
1207     bool HasNUW = Mul->hasNoUnsignedWrap() && Shl->hasNoUnsignedWrap();
1208     bool HasNSW = Mul->hasNoSignedWrap() && Shl->hasNoSignedWrap();
1209 
1210     // (X * Y) u/ (X << Z) --> Y u>> Z
1211     if (!IsSigned && HasNUW)
1212       return Builder.CreateLShr(Y, Z, "", I.isExact());
1213 
1214     // (X * Y) s/ (X << Z) --> Y s/ (1 << Z)
1215     if (IsSigned && HasNSW && (Op0->hasOneUse() || Op1->hasOneUse())) {
1216       Value *Shl = Builder.CreateShl(ConstantInt::get(Ty, 1), Z);
1217       return Builder.CreateSDiv(Y, Shl, "", I.isExact());
1218     }
1219   }
1220 
1221   // With appropriate no-wrap constraints, remove a common factor in the
1222   // dividend and divisor that is disguised as a left-shift amount.
1223   if (match(Op0, m_Shl(m_Value(X), m_Value(Z))) &&
1224       match(Op1, m_Shl(m_Value(Y), m_Specific(Z)))) {
1225     auto *Shl0 = cast<OverflowingBinaryOperator>(Op0);
1226     auto *Shl1 = cast<OverflowingBinaryOperator>(Op1);
1227 
1228     // For unsigned div, we need 'nuw' on both shifts or
1229     // 'nsw' on both shifts + 'nuw' on the dividend.
1230     // (X << Z) / (Y << Z) --> X / Y
1231     if (!IsSigned &&
1232         ((Shl0->hasNoUnsignedWrap() && Shl1->hasNoUnsignedWrap()) ||
1233          (Shl0->hasNoUnsignedWrap() && Shl0->hasNoSignedWrap() &&
1234           Shl1->hasNoSignedWrap())))
1235       return Builder.CreateUDiv(X, Y, "", I.isExact());
1236 
1237     // For signed div, we need 'nsw' on both shifts + 'nuw' on the divisor.
1238     // (X << Z) / (Y << Z) --> X / Y
1239     if (IsSigned && Shl0->hasNoSignedWrap() && Shl1->hasNoSignedWrap() &&
1240         Shl1->hasNoUnsignedWrap())
1241       return Builder.CreateSDiv(X, Y, "", I.isExact());
1242   }
1243 
1244   // If X << Y and X << Z does not overflow, then:
1245   // (X << Y) / (X << Z) -> (1 << Y) / (1 << Z) -> 1 << Y >> Z
1246   if (match(Op0, m_Shl(m_Value(X), m_Value(Y))) &&
1247       match(Op1, m_Shl(m_Specific(X), m_Value(Z)))) {
1248     auto *Shl0 = cast<OverflowingBinaryOperator>(Op0);
1249     auto *Shl1 = cast<OverflowingBinaryOperator>(Op1);
1250 
1251     if (IsSigned ? (Shl0->hasNoSignedWrap() && Shl1->hasNoSignedWrap())
1252                  : (Shl0->hasNoUnsignedWrap() && Shl1->hasNoUnsignedWrap())) {
1253       Constant *One = ConstantInt::get(X->getType(), 1);
1254       // Only preserve the nsw flag if dividend has nsw
1255       // or divisor has nsw and operator is sdiv.
1256       Value *Dividend = Builder.CreateShl(
1257           One, Y, "shl.dividend",
1258           /*HasNUW=*/true,
1259           /*HasNSW=*/
1260           IsSigned ? (Shl0->hasNoUnsignedWrap() || Shl1->hasNoUnsignedWrap())
1261                    : Shl0->hasNoSignedWrap());
1262       return Builder.CreateLShr(Dividend, Z, "", I.isExact());
1263     }
1264   }
1265 
1266   return nullptr;
1267 }
1268 
1269 /// Common integer divide/remainder transforms
commonIDivRemTransforms(BinaryOperator & I)1270 Instruction *InstCombinerImpl::commonIDivRemTransforms(BinaryOperator &I) {
1271   assert(I.isIntDivRem() && "Unexpected instruction");
1272   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
1273 
1274   // If any element of a constant divisor fixed width vector is zero or undef
1275   // the behavior is undefined and we can fold the whole op to poison.
1276   auto *Op1C = dyn_cast<Constant>(Op1);
1277   Type *Ty = I.getType();
1278   auto *VTy = dyn_cast<FixedVectorType>(Ty);
1279   if (Op1C && VTy) {
1280     unsigned NumElts = VTy->getNumElements();
1281     for (unsigned i = 0; i != NumElts; ++i) {
1282       Constant *Elt = Op1C->getAggregateElement(i);
1283       if (Elt && (Elt->isNullValue() || isa<UndefValue>(Elt)))
1284         return replaceInstUsesWith(I, PoisonValue::get(Ty));
1285     }
1286   }
1287 
1288   if (Instruction *Phi = foldBinopWithPhiOperands(I))
1289     return Phi;
1290 
1291   // The RHS is known non-zero.
1292   if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, I))
1293     return replaceOperand(I, 1, V);
1294 
1295   // Handle cases involving: div/rem X, (select Cond, Y, Z)
1296   if (simplifyDivRemOfSelectWithZeroOp(I))
1297     return &I;
1298 
1299   // If the divisor is a select-of-constants, try to constant fold all div ops:
1300   // C div/rem (select Cond, TrueC, FalseC) --> select Cond, (C div/rem TrueC),
1301   // (C div/rem FalseC)
1302   // TODO: Adapt simplifyDivRemOfSelectWithZeroOp to allow this and other folds.
1303   if (match(Op0, m_ImmConstant()) &&
1304       match(Op1, m_Select(m_Value(), m_ImmConstant(), m_ImmConstant()))) {
1305     if (Instruction *R = FoldOpIntoSelect(I, cast<SelectInst>(Op1),
1306                                           /*FoldWithMultiUse*/ true))
1307       return R;
1308   }
1309 
1310   return nullptr;
1311 }
1312 
1313 /// This function implements the transforms common to both integer division
1314 /// instructions (udiv and sdiv). It is called by the visitors to those integer
1315 /// division instructions.
1316 /// Common integer divide transforms
commonIDivTransforms(BinaryOperator & I)1317 Instruction *InstCombinerImpl::commonIDivTransforms(BinaryOperator &I) {
1318   if (Instruction *Res = commonIDivRemTransforms(I))
1319     return Res;
1320 
1321   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
1322   bool IsSigned = I.getOpcode() == Instruction::SDiv;
1323   Type *Ty = I.getType();
1324 
1325   const APInt *C2;
1326   if (match(Op1, m_APInt(C2))) {
1327     Value *X;
1328     const APInt *C1;
1329 
1330     // (X / C1) / C2  -> X / (C1*C2)
1331     if ((IsSigned && match(Op0, m_SDiv(m_Value(X), m_APInt(C1)))) ||
1332         (!IsSigned && match(Op0, m_UDiv(m_Value(X), m_APInt(C1))))) {
1333       APInt Product(C1->getBitWidth(), /*val=*/0ULL, IsSigned);
1334       if (!multiplyOverflows(*C1, *C2, Product, IsSigned))
1335         return BinaryOperator::Create(I.getOpcode(), X,
1336                                       ConstantInt::get(Ty, Product));
1337     }
1338 
1339     APInt Quotient(C2->getBitWidth(), /*val=*/0ULL, IsSigned);
1340     if ((IsSigned && match(Op0, m_NSWMul(m_Value(X), m_APInt(C1)))) ||
1341         (!IsSigned && match(Op0, m_NUWMul(m_Value(X), m_APInt(C1))))) {
1342 
1343       // (X * C1) / C2 -> X / (C2 / C1) if C2 is a multiple of C1.
1344       if (isMultiple(*C2, *C1, Quotient, IsSigned)) {
1345         auto *NewDiv = BinaryOperator::Create(I.getOpcode(), X,
1346                                               ConstantInt::get(Ty, Quotient));
1347         NewDiv->setIsExact(I.isExact());
1348         return NewDiv;
1349       }
1350 
1351       // (X * C1) / C2 -> X * (C1 / C2) if C1 is a multiple of C2.
1352       if (isMultiple(*C1, *C2, Quotient, IsSigned)) {
1353         auto *Mul = BinaryOperator::Create(Instruction::Mul, X,
1354                                            ConstantInt::get(Ty, Quotient));
1355         auto *OBO = cast<OverflowingBinaryOperator>(Op0);
1356         Mul->setHasNoUnsignedWrap(!IsSigned && OBO->hasNoUnsignedWrap());
1357         Mul->setHasNoSignedWrap(OBO->hasNoSignedWrap());
1358         return Mul;
1359       }
1360     }
1361 
1362     if ((IsSigned && match(Op0, m_NSWShl(m_Value(X), m_APInt(C1))) &&
1363          C1->ult(C1->getBitWidth() - 1)) ||
1364         (!IsSigned && match(Op0, m_NUWShl(m_Value(X), m_APInt(C1))) &&
1365          C1->ult(C1->getBitWidth()))) {
1366       APInt C1Shifted = APInt::getOneBitSet(
1367           C1->getBitWidth(), static_cast<unsigned>(C1->getZExtValue()));
1368 
1369       // (X << C1) / C2 -> X / (C2 >> C1) if C2 is a multiple of 1 << C1.
1370       if (isMultiple(*C2, C1Shifted, Quotient, IsSigned)) {
1371         auto *BO = BinaryOperator::Create(I.getOpcode(), X,
1372                                           ConstantInt::get(Ty, Quotient));
1373         BO->setIsExact(I.isExact());
1374         return BO;
1375       }
1376 
1377       // (X << C1) / C2 -> X * ((1 << C1) / C2) if 1 << C1 is a multiple of C2.
1378       if (isMultiple(C1Shifted, *C2, Quotient, IsSigned)) {
1379         auto *Mul = BinaryOperator::Create(Instruction::Mul, X,
1380                                            ConstantInt::get(Ty, Quotient));
1381         auto *OBO = cast<OverflowingBinaryOperator>(Op0);
1382         Mul->setHasNoUnsignedWrap(!IsSigned && OBO->hasNoUnsignedWrap());
1383         Mul->setHasNoSignedWrap(OBO->hasNoSignedWrap());
1384         return Mul;
1385       }
1386     }
1387 
1388     // Distribute div over add to eliminate a matching div/mul pair:
1389     // ((X * C2) + C1) / C2 --> X + C1/C2
1390     // We need a multiple of the divisor for a signed add constant, but
1391     // unsigned is fine with any constant pair.
1392     if (IsSigned &&
1393         match(Op0, m_NSWAddLike(m_NSWMul(m_Value(X), m_SpecificInt(*C2)),
1394                                 m_APInt(C1))) &&
1395         isMultiple(*C1, *C2, Quotient, IsSigned)) {
1396       return BinaryOperator::CreateNSWAdd(X, ConstantInt::get(Ty, Quotient));
1397     }
1398     if (!IsSigned &&
1399         match(Op0, m_NUWAddLike(m_NUWMul(m_Value(X), m_SpecificInt(*C2)),
1400                                 m_APInt(C1)))) {
1401       return BinaryOperator::CreateNUWAdd(X,
1402                                           ConstantInt::get(Ty, C1->udiv(*C2)));
1403     }
1404 
1405     if (!C2->isZero()) // avoid X udiv 0
1406       if (Instruction *FoldedDiv = foldBinOpIntoSelectOrPhi(I))
1407         return FoldedDiv;
1408   }
1409 
1410   if (match(Op0, m_One())) {
1411     assert(!Ty->isIntOrIntVectorTy(1) && "i1 divide not removed?");
1412     if (IsSigned) {
1413       // 1 / 0 --> undef ; 1 / 1 --> 1 ; 1 / -1 --> -1 ; 1 / anything else --> 0
1414       // (Op1 + 1) u< 3 ? Op1 : 0
1415       // Op1 must be frozen because we are increasing its number of uses.
1416       Value *F1 = Op1;
1417       if (!isGuaranteedNotToBeUndef(Op1))
1418         F1 = Builder.CreateFreeze(Op1, Op1->getName() + ".fr");
1419       Value *Inc = Builder.CreateAdd(F1, Op0);
1420       Value *Cmp = Builder.CreateICmpULT(Inc, ConstantInt::get(Ty, 3));
1421       return SelectInst::Create(Cmp, F1, ConstantInt::get(Ty, 0));
1422     } else {
1423       // If Op1 is 0 then it's undefined behaviour. If Op1 is 1 then the
1424       // result is one, otherwise it's zero.
1425       return new ZExtInst(Builder.CreateICmpEQ(Op1, Op0), Ty);
1426     }
1427   }
1428 
1429   // See if we can fold away this div instruction.
1430   if (SimplifyDemandedInstructionBits(I))
1431     return &I;
1432 
1433   // (X - (X rem Y)) / Y -> X / Y; usually originates as ((X / Y) * Y) / Y
1434   Value *X, *Z;
1435   if (match(Op0, m_Sub(m_Value(X), m_Value(Z)))) // (X - Z) / Y; Y = Op1
1436     if ((IsSigned && match(Z, m_SRem(m_Specific(X), m_Specific(Op1)))) ||
1437         (!IsSigned && match(Z, m_URem(m_Specific(X), m_Specific(Op1)))))
1438       return BinaryOperator::Create(I.getOpcode(), X, Op1);
1439 
1440   // (X << Y) / X -> 1 << Y
1441   Value *Y;
1442   if (IsSigned && match(Op0, m_NSWShl(m_Specific(Op1), m_Value(Y))))
1443     return BinaryOperator::CreateNSWShl(ConstantInt::get(Ty, 1), Y);
1444   if (!IsSigned && match(Op0, m_NUWShl(m_Specific(Op1), m_Value(Y))))
1445     return BinaryOperator::CreateNUWShl(ConstantInt::get(Ty, 1), Y);
1446 
1447   // X / (X * Y) -> 1 / Y if the multiplication does not overflow.
1448   if (match(Op1, m_c_Mul(m_Specific(Op0), m_Value(Y)))) {
1449     bool HasNSW = cast<OverflowingBinaryOperator>(Op1)->hasNoSignedWrap();
1450     bool HasNUW = cast<OverflowingBinaryOperator>(Op1)->hasNoUnsignedWrap();
1451     if ((IsSigned && HasNSW) || (!IsSigned && HasNUW)) {
1452       replaceOperand(I, 0, ConstantInt::get(Ty, 1));
1453       replaceOperand(I, 1, Y);
1454       return &I;
1455     }
1456   }
1457 
1458   // (X << Z) / (X * Y) -> (1 << Z) / Y
1459   // TODO: Handle sdiv.
1460   if (!IsSigned && Op1->hasOneUse() &&
1461       match(Op0, m_NUWShl(m_Value(X), m_Value(Z))) &&
1462       match(Op1, m_c_Mul(m_Specific(X), m_Value(Y))))
1463     if (cast<OverflowingBinaryOperator>(Op1)->hasNoUnsignedWrap()) {
1464       Instruction *NewDiv = BinaryOperator::CreateUDiv(
1465           Builder.CreateShl(ConstantInt::get(Ty, 1), Z, "", /*NUW*/ true), Y);
1466       NewDiv->setIsExact(I.isExact());
1467       return NewDiv;
1468     }
1469 
1470   if (Value *R = foldIDivShl(I, Builder))
1471     return replaceInstUsesWith(I, R);
1472 
1473   // With the appropriate no-wrap constraint, remove a multiply by the divisor
1474   // after peeking through another divide:
1475   // ((Op1 * X) / Y) / Op1 --> X / Y
1476   if (match(Op0, m_BinOp(I.getOpcode(), m_c_Mul(m_Specific(Op1), m_Value(X)),
1477                          m_Value(Y)))) {
1478     auto *InnerDiv = cast<PossiblyExactOperator>(Op0);
1479     auto *Mul = cast<OverflowingBinaryOperator>(InnerDiv->getOperand(0));
1480     Instruction *NewDiv = nullptr;
1481     if (!IsSigned && Mul->hasNoUnsignedWrap())
1482       NewDiv = BinaryOperator::CreateUDiv(X, Y);
1483     else if (IsSigned && Mul->hasNoSignedWrap())
1484       NewDiv = BinaryOperator::CreateSDiv(X, Y);
1485 
1486     // Exact propagates only if both of the original divides are exact.
1487     if (NewDiv) {
1488       NewDiv->setIsExact(I.isExact() && InnerDiv->isExact());
1489       return NewDiv;
1490     }
1491   }
1492 
1493   // (X * Y) / (X * Z) --> Y / Z (and commuted variants)
1494   if (match(Op0, m_Mul(m_Value(X), m_Value(Y)))) {
1495     auto OB0HasNSW = cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap();
1496     auto OB0HasNUW = cast<OverflowingBinaryOperator>(Op0)->hasNoUnsignedWrap();
1497 
1498     auto CreateDivOrNull = [&](Value *A, Value *B) -> Instruction * {
1499       auto OB1HasNSW = cast<OverflowingBinaryOperator>(Op1)->hasNoSignedWrap();
1500       auto OB1HasNUW =
1501           cast<OverflowingBinaryOperator>(Op1)->hasNoUnsignedWrap();
1502       const APInt *C1, *C2;
1503       if (IsSigned && OB0HasNSW) {
1504         if (OB1HasNSW && match(B, m_APInt(C1)) && !C1->isAllOnes())
1505           return BinaryOperator::CreateSDiv(A, B);
1506       }
1507       if (!IsSigned && OB0HasNUW) {
1508         if (OB1HasNUW)
1509           return BinaryOperator::CreateUDiv(A, B);
1510         if (match(A, m_APInt(C1)) && match(B, m_APInt(C2)) && C2->ule(*C1))
1511           return BinaryOperator::CreateUDiv(A, B);
1512       }
1513       return nullptr;
1514     };
1515 
1516     if (match(Op1, m_c_Mul(m_Specific(X), m_Value(Z)))) {
1517       if (auto *Val = CreateDivOrNull(Y, Z))
1518         return Val;
1519     }
1520     if (match(Op1, m_c_Mul(m_Specific(Y), m_Value(Z)))) {
1521       if (auto *Val = CreateDivOrNull(X, Z))
1522         return Val;
1523     }
1524   }
1525   return nullptr;
1526 }
1527 
takeLog2(Value * Op,unsigned Depth,bool AssumeNonZero,bool DoFold)1528 Value *InstCombinerImpl::takeLog2(Value *Op, unsigned Depth, bool AssumeNonZero,
1529                                   bool DoFold) {
1530   auto IfFold = [DoFold](function_ref<Value *()> Fn) {
1531     if (!DoFold)
1532       return reinterpret_cast<Value *>(-1);
1533     return Fn();
1534   };
1535 
1536   // FIXME: assert that Op1 isn't/doesn't contain undef.
1537 
1538   // log2(2^C) -> C
1539   if (match(Op, m_Power2()))
1540     return IfFold([&]() {
1541       Constant *C = ConstantExpr::getExactLogBase2(cast<Constant>(Op));
1542       if (!C)
1543         llvm_unreachable("Failed to constant fold udiv -> logbase2");
1544       return C;
1545     });
1546 
1547   // The remaining tests are all recursive, so bail out if we hit the limit.
1548   if (Depth++ == MaxAnalysisRecursionDepth)
1549     return nullptr;
1550 
1551   // log2(zext X) -> zext log2(X)
1552   // FIXME: Require one use?
1553   Value *X, *Y;
1554   if (match(Op, m_ZExt(m_Value(X))))
1555     if (Value *LogX = takeLog2(X, Depth, AssumeNonZero, DoFold))
1556       return IfFold([&]() { return Builder.CreateZExt(LogX, Op->getType()); });
1557 
1558   // log2(trunc x) -> trunc log2(X)
1559   // FIXME: Require one use?
1560   if (match(Op, m_Trunc(m_Value(X)))) {
1561     auto *TI = cast<TruncInst>(Op);
1562     if (AssumeNonZero || TI->hasNoUnsignedWrap())
1563       if (Value *LogX = takeLog2(X, Depth, AssumeNonZero, DoFold))
1564         return IfFold([&]() {
1565           return Builder.CreateTrunc(LogX, Op->getType(), "",
1566                                      /*IsNUW=*/TI->hasNoUnsignedWrap());
1567         });
1568   }
1569 
1570   // log2(X << Y) -> log2(X) + Y
1571   // FIXME: Require one use unless X is 1?
1572   if (match(Op, m_Shl(m_Value(X), m_Value(Y)))) {
1573     auto *BO = cast<OverflowingBinaryOperator>(Op);
1574     // nuw will be set if the `shl` is trivially non-zero.
1575     if (AssumeNonZero || BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap())
1576       if (Value *LogX = takeLog2(X, Depth, AssumeNonZero, DoFold))
1577         return IfFold([&]() { return Builder.CreateAdd(LogX, Y); });
1578   }
1579 
1580   // log2(X >>u Y) -> log2(X) - Y
1581   // FIXME: Require one use?
1582   if (match(Op, m_LShr(m_Value(X), m_Value(Y)))) {
1583     auto *PEO = cast<PossiblyExactOperator>(Op);
1584     if (AssumeNonZero || PEO->isExact())
1585       if (Value *LogX = takeLog2(X, Depth, AssumeNonZero, DoFold))
1586         return IfFold([&]() { return Builder.CreateSub(LogX, Y); });
1587   }
1588 
1589   // log2(X & Y) -> either log2(X) or log2(Y)
1590   // This requires `AssumeNonZero` as `X & Y` may be zero when X != Y.
1591   if (AssumeNonZero && match(Op, m_And(m_Value(X), m_Value(Y)))) {
1592     if (Value *LogX = takeLog2(X, Depth, AssumeNonZero, DoFold))
1593       return IfFold([&]() { return LogX; });
1594     if (Value *LogY = takeLog2(Y, Depth, AssumeNonZero, DoFold))
1595       return IfFold([&]() { return LogY; });
1596   }
1597 
1598   // log2(Cond ? X : Y) -> Cond ? log2(X) : log2(Y)
1599   // FIXME: Require one use?
1600   if (SelectInst *SI = dyn_cast<SelectInst>(Op))
1601     if (Value *LogX = takeLog2(SI->getOperand(1), Depth, AssumeNonZero, DoFold))
1602       if (Value *LogY =
1603               takeLog2(SI->getOperand(2), Depth, AssumeNonZero, DoFold))
1604         return IfFold([&]() {
1605           return Builder.CreateSelect(SI->getOperand(0), LogX, LogY);
1606         });
1607 
1608   // log2(umin(X, Y)) -> umin(log2(X), log2(Y))
1609   // log2(umax(X, Y)) -> umax(log2(X), log2(Y))
1610   auto *MinMax = dyn_cast<MinMaxIntrinsic>(Op);
1611   if (MinMax && MinMax->hasOneUse() && !MinMax->isSigned()) {
1612     // Use AssumeNonZero as false here. Otherwise we can hit case where
1613     // log2(umax(X, Y)) != umax(log2(X), log2(Y)) (because overflow).
1614     if (Value *LogX = takeLog2(MinMax->getLHS(), Depth,
1615                                /*AssumeNonZero*/ false, DoFold))
1616       if (Value *LogY = takeLog2(MinMax->getRHS(), Depth,
1617                                  /*AssumeNonZero*/ false, DoFold))
1618         return IfFold([&]() {
1619           return Builder.CreateBinaryIntrinsic(MinMax->getIntrinsicID(), LogX,
1620                                                LogY);
1621         });
1622   }
1623 
1624   return nullptr;
1625 }
1626 
1627 /// If we have zero-extended operands of an unsigned div or rem, we may be able
1628 /// to narrow the operation (sink the zext below the math).
narrowUDivURem(BinaryOperator & I,InstCombinerImpl & IC)1629 static Instruction *narrowUDivURem(BinaryOperator &I,
1630                                    InstCombinerImpl &IC) {
1631   Instruction::BinaryOps Opcode = I.getOpcode();
1632   Value *N = I.getOperand(0);
1633   Value *D = I.getOperand(1);
1634   Type *Ty = I.getType();
1635   Value *X, *Y;
1636   if (match(N, m_ZExt(m_Value(X))) && match(D, m_ZExt(m_Value(Y))) &&
1637       X->getType() == Y->getType() && (N->hasOneUse() || D->hasOneUse())) {
1638     // udiv (zext X), (zext Y) --> zext (udiv X, Y)
1639     // urem (zext X), (zext Y) --> zext (urem X, Y)
1640     Value *NarrowOp = IC.Builder.CreateBinOp(Opcode, X, Y);
1641     return new ZExtInst(NarrowOp, Ty);
1642   }
1643 
1644   Constant *C;
1645   if (isa<Instruction>(N) && match(N, m_OneUse(m_ZExt(m_Value(X)))) &&
1646       match(D, m_Constant(C))) {
1647     // If the constant is the same in the smaller type, use the narrow version.
1648     Constant *TruncC = IC.getLosslessUnsignedTrunc(C, X->getType());
1649     if (!TruncC)
1650       return nullptr;
1651 
1652     // udiv (zext X), C --> zext (udiv X, C')
1653     // urem (zext X), C --> zext (urem X, C')
1654     return new ZExtInst(IC.Builder.CreateBinOp(Opcode, X, TruncC), Ty);
1655   }
1656   if (isa<Instruction>(D) && match(D, m_OneUse(m_ZExt(m_Value(X)))) &&
1657       match(N, m_Constant(C))) {
1658     // If the constant is the same in the smaller type, use the narrow version.
1659     Constant *TruncC = IC.getLosslessUnsignedTrunc(C, X->getType());
1660     if (!TruncC)
1661       return nullptr;
1662 
1663     // udiv C, (zext X) --> zext (udiv C', X)
1664     // urem C, (zext X) --> zext (urem C', X)
1665     return new ZExtInst(IC.Builder.CreateBinOp(Opcode, TruncC, X), Ty);
1666   }
1667 
1668   return nullptr;
1669 }
1670 
visitUDiv(BinaryOperator & I)1671 Instruction *InstCombinerImpl::visitUDiv(BinaryOperator &I) {
1672   if (Value *V = simplifyUDivInst(I.getOperand(0), I.getOperand(1), I.isExact(),
1673                                   SQ.getWithInstruction(&I)))
1674     return replaceInstUsesWith(I, V);
1675 
1676   if (Instruction *X = foldVectorBinop(I))
1677     return X;
1678 
1679   // Handle the integer div common cases
1680   if (Instruction *Common = commonIDivTransforms(I))
1681     return Common;
1682 
1683   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
1684   Value *X;
1685   const APInt *C1, *C2;
1686   if (match(Op0, m_LShr(m_Value(X), m_APInt(C1))) && match(Op1, m_APInt(C2))) {
1687     // (X lshr C1) udiv C2 --> X udiv (C2 << C1)
1688     bool Overflow;
1689     APInt C2ShlC1 = C2->ushl_ov(*C1, Overflow);
1690     if (!Overflow) {
1691       bool IsExact = I.isExact() && match(Op0, m_Exact(m_Value()));
1692       BinaryOperator *BO = BinaryOperator::CreateUDiv(
1693           X, ConstantInt::get(X->getType(), C2ShlC1));
1694       if (IsExact)
1695         BO->setIsExact();
1696       return BO;
1697     }
1698   }
1699 
1700   // Op0 / C where C is large (negative) --> zext (Op0 >= C)
1701   // TODO: Could use isKnownNegative() to handle non-constant values.
1702   Type *Ty = I.getType();
1703   if (match(Op1, m_Negative())) {
1704     Value *Cmp = Builder.CreateICmpUGE(Op0, Op1);
1705     return CastInst::CreateZExtOrBitCast(Cmp, Ty);
1706   }
1707   // Op0 / (sext i1 X) --> zext (Op0 == -1) (if X is 0, the div is undefined)
1708   if (match(Op1, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) {
1709     Value *Cmp = Builder.CreateICmpEQ(Op0, ConstantInt::getAllOnesValue(Ty));
1710     return CastInst::CreateZExtOrBitCast(Cmp, Ty);
1711   }
1712 
1713   if (Instruction *NarrowDiv = narrowUDivURem(I, *this))
1714     return NarrowDiv;
1715 
1716   Value *A, *B;
1717 
1718   // Look through a right-shift to find the common factor:
1719   // ((Op1 *nuw A) >> B) / Op1 --> A >> B
1720   if (match(Op0, m_LShr(m_NUWMul(m_Specific(Op1), m_Value(A)), m_Value(B))) ||
1721       match(Op0, m_LShr(m_NUWMul(m_Value(A), m_Specific(Op1)), m_Value(B)))) {
1722     Instruction *Lshr = BinaryOperator::CreateLShr(A, B);
1723     if (I.isExact() && cast<PossiblyExactOperator>(Op0)->isExact())
1724       Lshr->setIsExact();
1725     return Lshr;
1726   }
1727 
1728   auto GetShiftableDenom = [&](Value *Denom) -> Value * {
1729     // Op0 udiv Op1 -> Op0 lshr log2(Op1), if log2() folds away.
1730     if (Value *Log2 = tryGetLog2(Op1, /*AssumeNonZero=*/true))
1731       return Log2;
1732 
1733     // Op0 udiv Op1 -> Op0 lshr cttz(Op1), if Op1 is a power of 2.
1734     if (isKnownToBeAPowerOfTwo(Denom, /*OrZero=*/true, &I))
1735       // This will increase instruction count but it's okay
1736       // since bitwise operations are substantially faster than
1737       // division.
1738       return Builder.CreateBinaryIntrinsic(Intrinsic::cttz, Denom,
1739                                            Builder.getTrue());
1740 
1741     return nullptr;
1742   };
1743 
1744   if (auto *Res = GetShiftableDenom(Op1))
1745     return replaceInstUsesWith(
1746         I, Builder.CreateLShr(Op0, Res, I.getName(), I.isExact()));
1747 
1748   return nullptr;
1749 }
1750 
visitSDiv(BinaryOperator & I)1751 Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) {
1752   if (Value *V = simplifySDivInst(I.getOperand(0), I.getOperand(1), I.isExact(),
1753                                   SQ.getWithInstruction(&I)))
1754     return replaceInstUsesWith(I, V);
1755 
1756   if (Instruction *X = foldVectorBinop(I))
1757     return X;
1758 
1759   // Handle the integer div common cases
1760   if (Instruction *Common = commonIDivTransforms(I))
1761     return Common;
1762 
1763   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
1764   Type *Ty = I.getType();
1765   Value *X;
1766   // sdiv Op0, -1 --> -Op0
1767   // sdiv Op0, (sext i1 X) --> -Op0 (because if X is 0, the op is undefined)
1768   if (match(Op1, m_AllOnes()) ||
1769       (match(Op1, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)))
1770     return BinaryOperator::CreateNSWNeg(Op0);
1771 
1772   // X / INT_MIN --> X == INT_MIN
1773   if (match(Op1, m_SignMask()))
1774     return new ZExtInst(Builder.CreateICmpEQ(Op0, Op1), Ty);
1775 
1776   if (I.isExact()) {
1777     // sdiv exact X, 1<<C --> ashr exact X, C   iff  1<<C  is non-negative
1778     if (match(Op1, m_Power2()) && match(Op1, m_NonNegative())) {
1779       Constant *C = ConstantExpr::getExactLogBase2(cast<Constant>(Op1));
1780       return BinaryOperator::CreateExactAShr(Op0, C);
1781     }
1782 
1783     // sdiv exact X, (1<<ShAmt) --> ashr exact X, ShAmt (if shl is non-negative)
1784     Value *ShAmt;
1785     if (match(Op1, m_NSWShl(m_One(), m_Value(ShAmt))))
1786       return BinaryOperator::CreateExactAShr(Op0, ShAmt);
1787 
1788     // sdiv exact X, -1<<C --> -(ashr exact X, C)
1789     if (match(Op1, m_NegatedPower2())) {
1790       Constant *NegPow2C = ConstantExpr::getNeg(cast<Constant>(Op1));
1791       Constant *C = ConstantExpr::getExactLogBase2(NegPow2C);
1792       Value *Ashr = Builder.CreateAShr(Op0, C, I.getName() + ".neg", true);
1793       return BinaryOperator::CreateNSWNeg(Ashr);
1794     }
1795   }
1796 
1797   const APInt *Op1C;
1798   if (match(Op1, m_APInt(Op1C))) {
1799     // If the dividend is sign-extended and the constant divisor is small enough
1800     // to fit in the source type, shrink the division to the narrower type:
1801     // (sext X) sdiv C --> sext (X sdiv C)
1802     Value *Op0Src;
1803     if (match(Op0, m_OneUse(m_SExt(m_Value(Op0Src)))) &&
1804         Op0Src->getType()->getScalarSizeInBits() >=
1805             Op1C->getSignificantBits()) {
1806 
1807       // In the general case, we need to make sure that the dividend is not the
1808       // minimum signed value because dividing that by -1 is UB. But here, we
1809       // know that the -1 divisor case is already handled above.
1810 
1811       Constant *NarrowDivisor =
1812           ConstantExpr::getTrunc(cast<Constant>(Op1), Op0Src->getType());
1813       Value *NarrowOp = Builder.CreateSDiv(Op0Src, NarrowDivisor);
1814       return new SExtInst(NarrowOp, Ty);
1815     }
1816 
1817     // -X / C --> X / -C (if the negation doesn't overflow).
1818     // TODO: This could be enhanced to handle arbitrary vector constants by
1819     //       checking if all elements are not the min-signed-val.
1820     if (!Op1C->isMinSignedValue() && match(Op0, m_NSWNeg(m_Value(X)))) {
1821       Constant *NegC = ConstantInt::get(Ty, -(*Op1C));
1822       Instruction *BO = BinaryOperator::CreateSDiv(X, NegC);
1823       BO->setIsExact(I.isExact());
1824       return BO;
1825     }
1826   }
1827 
1828   // -X / Y --> -(X / Y)
1829   Value *Y;
1830   if (match(&I, m_SDiv(m_OneUse(m_NSWNeg(m_Value(X))), m_Value(Y))))
1831     return BinaryOperator::CreateNSWNeg(
1832         Builder.CreateSDiv(X, Y, I.getName(), I.isExact()));
1833 
1834   // abs(X) / X --> X > -1 ? 1 : -1
1835   // X / abs(X) --> X > -1 ? 1 : -1
1836   if (match(&I, m_c_BinOp(
1837                     m_OneUse(m_Intrinsic<Intrinsic::abs>(m_Value(X), m_One())),
1838                     m_Deferred(X)))) {
1839     Value *Cond = Builder.CreateIsNotNeg(X);
1840     return SelectInst::Create(Cond, ConstantInt::get(Ty, 1),
1841                               ConstantInt::getAllOnesValue(Ty));
1842   }
1843 
1844   KnownBits KnownDividend = computeKnownBits(Op0, &I);
1845   if (!I.isExact() &&
1846       (match(Op1, m_Power2(Op1C)) || match(Op1, m_NegatedPower2(Op1C))) &&
1847       KnownDividend.countMinTrailingZeros() >= Op1C->countr_zero()) {
1848     I.setIsExact();
1849     return &I;
1850   }
1851 
1852   if (KnownDividend.isNonNegative()) {
1853     // If both operands are unsigned, turn this into a udiv.
1854     if (isKnownNonNegative(Op1, SQ.getWithInstruction(&I))) {
1855       auto *BO = BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
1856       BO->setIsExact(I.isExact());
1857       return BO;
1858     }
1859 
1860     if (match(Op1, m_NegatedPower2())) {
1861       // X sdiv (-(1 << C)) -> -(X sdiv (1 << C)) ->
1862       //                    -> -(X udiv (1 << C)) -> -(X u>> C)
1863       Constant *CNegLog2 = ConstantExpr::getExactLogBase2(
1864           ConstantExpr::getNeg(cast<Constant>(Op1)));
1865       Value *Shr = Builder.CreateLShr(Op0, CNegLog2, I.getName(), I.isExact());
1866       return BinaryOperator::CreateNeg(Shr);
1867     }
1868 
1869     if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, &I)) {
1870       // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y)
1871       // Safe because the only negative value (1 << Y) can take on is
1872       // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have
1873       // the sign bit set.
1874       auto *BO = BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
1875       BO->setIsExact(I.isExact());
1876       return BO;
1877     }
1878   }
1879 
1880   // -X / X --> X == INT_MIN ? 1 : -1
1881   if (isKnownNegation(Op0, Op1)) {
1882     APInt MinVal = APInt::getSignedMinValue(Ty->getScalarSizeInBits());
1883     Value *Cond = Builder.CreateICmpEQ(Op0, ConstantInt::get(Ty, MinVal));
1884     return SelectInst::Create(Cond, ConstantInt::get(Ty, 1),
1885                               ConstantInt::getAllOnesValue(Ty));
1886   }
1887   return nullptr;
1888 }
1889 
1890 /// Remove negation and try to convert division into multiplication.
foldFDivConstantDivisor(BinaryOperator & I)1891 Instruction *InstCombinerImpl::foldFDivConstantDivisor(BinaryOperator &I) {
1892   Constant *C;
1893   if (!match(I.getOperand(1), m_Constant(C)))
1894     return nullptr;
1895 
1896   // -X / C --> X / -C
1897   Value *X;
1898   const DataLayout &DL = I.getDataLayout();
1899   if (match(I.getOperand(0), m_FNeg(m_Value(X))))
1900     if (Constant *NegC = ConstantFoldUnaryOpOperand(Instruction::FNeg, C, DL))
1901       return BinaryOperator::CreateFDivFMF(X, NegC, &I);
1902 
1903   // nnan X / +0.0 -> copysign(inf, X)
1904   // nnan nsz X / -0.0 -> copysign(inf, X)
1905   if (I.hasNoNaNs() &&
1906       (match(I.getOperand(1), m_PosZeroFP()) ||
1907        (I.hasNoSignedZeros() && match(I.getOperand(1), m_AnyZeroFP())))) {
1908     IRBuilder<> B(&I);
1909     CallInst *CopySign = B.CreateIntrinsic(
1910         Intrinsic::copysign, {C->getType()},
1911         {ConstantFP::getInfinity(I.getType()), I.getOperand(0)}, &I);
1912     CopySign->takeName(&I);
1913     return replaceInstUsesWith(I, CopySign);
1914   }
1915 
1916   // If the constant divisor has an exact inverse, this is always safe. If not,
1917   // then we can still create a reciprocal if fast-math-flags allow it and the
1918   // constant is a regular number (not zero, infinite, or denormal).
1919   if (!(C->hasExactInverseFP() || (I.hasAllowReciprocal() && C->isNormalFP())))
1920     return nullptr;
1921 
1922   // Disallow denormal constants because we don't know what would happen
1923   // on all targets.
1924   // TODO: Use Intrinsic::canonicalize or let function attributes tell us that
1925   // denorms are flushed?
1926   auto *RecipC = ConstantFoldBinaryOpOperands(
1927       Instruction::FDiv, ConstantFP::get(I.getType(), 1.0), C, DL);
1928   if (!RecipC || !RecipC->isNormalFP())
1929     return nullptr;
1930 
1931   // X / C --> X * (1 / C)
1932   return BinaryOperator::CreateFMulFMF(I.getOperand(0), RecipC, &I);
1933 }
1934 
1935 /// Remove negation and try to reassociate constant math.
foldFDivConstantDividend(BinaryOperator & I)1936 static Instruction *foldFDivConstantDividend(BinaryOperator &I) {
1937   Constant *C;
1938   if (!match(I.getOperand(0), m_Constant(C)))
1939     return nullptr;
1940 
1941   // C / -X --> -C / X
1942   Value *X;
1943   const DataLayout &DL = I.getDataLayout();
1944   if (match(I.getOperand(1), m_FNeg(m_Value(X))))
1945     if (Constant *NegC = ConstantFoldUnaryOpOperand(Instruction::FNeg, C, DL))
1946       return BinaryOperator::CreateFDivFMF(NegC, X, &I);
1947 
1948   if (!I.hasAllowReassoc() || !I.hasAllowReciprocal())
1949     return nullptr;
1950 
1951   // Try to reassociate C / X expressions where X includes another constant.
1952   Constant *C2, *NewC = nullptr;
1953   if (match(I.getOperand(1), m_FMul(m_Value(X), m_Constant(C2)))) {
1954     // C / (X * C2) --> (C / C2) / X
1955     NewC = ConstantFoldBinaryOpOperands(Instruction::FDiv, C, C2, DL);
1956   } else if (match(I.getOperand(1), m_FDiv(m_Value(X), m_Constant(C2)))) {
1957     // C / (X / C2) --> (C * C2) / X
1958     NewC = ConstantFoldBinaryOpOperands(Instruction::FMul, C, C2, DL);
1959   }
1960   // Disallow denormal constants because we don't know what would happen
1961   // on all targets.
1962   // TODO: Use Intrinsic::canonicalize or let function attributes tell us that
1963   // denorms are flushed?
1964   if (!NewC || !NewC->isNormalFP())
1965     return nullptr;
1966 
1967   return BinaryOperator::CreateFDivFMF(NewC, X, &I);
1968 }
1969 
1970 /// Negate the exponent of pow/exp to fold division-by-pow() into multiply.
foldFDivPowDivisor(BinaryOperator & I,InstCombiner::BuilderTy & Builder)1971 static Instruction *foldFDivPowDivisor(BinaryOperator &I,
1972                                        InstCombiner::BuilderTy &Builder) {
1973   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
1974   auto *II = dyn_cast<IntrinsicInst>(Op1);
1975   if (!II || !II->hasOneUse() || !I.hasAllowReassoc() ||
1976       !I.hasAllowReciprocal())
1977     return nullptr;
1978 
1979   // Z / pow(X, Y) --> Z * pow(X, -Y)
1980   // Z / exp{2}(Y) --> Z * exp{2}(-Y)
1981   // In the general case, this creates an extra instruction, but fmul allows
1982   // for better canonicalization and optimization than fdiv.
1983   Intrinsic::ID IID = II->getIntrinsicID();
1984   SmallVector<Value *> Args;
1985   switch (IID) {
1986   case Intrinsic::pow:
1987     Args.push_back(II->getArgOperand(0));
1988     Args.push_back(Builder.CreateFNegFMF(II->getArgOperand(1), &I));
1989     break;
1990   case Intrinsic::powi: {
1991     // Require 'ninf' assuming that makes powi(X, -INT_MIN) acceptable.
1992     // That is, X ** (huge negative number) is 0.0, ~1.0, or INF and so
1993     // dividing by that is INF, ~1.0, or 0.0. Code that uses powi allows
1994     // non-standard results, so this corner case should be acceptable if the
1995     // code rules out INF values.
1996     if (!I.hasNoInfs())
1997       return nullptr;
1998     Args.push_back(II->getArgOperand(0));
1999     Args.push_back(Builder.CreateNeg(II->getArgOperand(1)));
2000     Type *Tys[] = {I.getType(), II->getArgOperand(1)->getType()};
2001     Value *Pow = Builder.CreateIntrinsic(IID, Tys, Args, &I);
2002     return BinaryOperator::CreateFMulFMF(Op0, Pow, &I);
2003   }
2004   case Intrinsic::exp:
2005   case Intrinsic::exp2:
2006     Args.push_back(Builder.CreateFNegFMF(II->getArgOperand(0), &I));
2007     break;
2008   default:
2009     return nullptr;
2010   }
2011   Value *Pow = Builder.CreateIntrinsic(IID, I.getType(), Args, &I);
2012   return BinaryOperator::CreateFMulFMF(Op0, Pow, &I);
2013 }
2014 
2015 /// Convert div to mul if we have an sqrt divisor iff sqrt's operand is a fdiv
2016 /// instruction.
foldFDivSqrtDivisor(BinaryOperator & I,InstCombiner::BuilderTy & Builder)2017 static Instruction *foldFDivSqrtDivisor(BinaryOperator &I,
2018                                         InstCombiner::BuilderTy &Builder) {
2019   // X / sqrt(Y / Z) -->  X * sqrt(Z / Y)
2020   if (!I.hasAllowReassoc() || !I.hasAllowReciprocal())
2021     return nullptr;
2022   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
2023   auto *II = dyn_cast<IntrinsicInst>(Op1);
2024   if (!II || II->getIntrinsicID() != Intrinsic::sqrt || !II->hasOneUse() ||
2025       !II->hasAllowReassoc() || !II->hasAllowReciprocal())
2026     return nullptr;
2027 
2028   Value *Y, *Z;
2029   auto *DivOp = dyn_cast<Instruction>(II->getOperand(0));
2030   if (!DivOp)
2031     return nullptr;
2032   if (!match(DivOp, m_FDiv(m_Value(Y), m_Value(Z))))
2033     return nullptr;
2034   if (!DivOp->hasAllowReassoc() || !I.hasAllowReciprocal() ||
2035       !DivOp->hasOneUse())
2036     return nullptr;
2037   Value *SwapDiv = Builder.CreateFDivFMF(Z, Y, DivOp);
2038   Value *NewSqrt =
2039       Builder.CreateUnaryIntrinsic(II->getIntrinsicID(), SwapDiv, II);
2040   return BinaryOperator::CreateFMulFMF(Op0, NewSqrt, &I);
2041 }
2042 
2043 // Change
2044 // X = 1/sqrt(a)
2045 // R1 = X * X
2046 // R2 = a * X
2047 //
2048 // TO
2049 //
2050 // FDiv = 1/a
2051 // FSqrt = sqrt(a)
2052 // FMul = FDiv * FSqrt
2053 // Replace Uses Of R1 With FDiv
2054 // Replace Uses Of R2 With FSqrt
2055 // Replace Uses Of X With FMul
2056 static Instruction *
convertFSqrtDivIntoFMul(CallInst * CI,Instruction * X,const SmallPtrSetImpl<Instruction * > & R1,const SmallPtrSetImpl<Instruction * > & R2,InstCombiner::BuilderTy & B,InstCombinerImpl * IC)2057 convertFSqrtDivIntoFMul(CallInst *CI, Instruction *X,
2058                         const SmallPtrSetImpl<Instruction *> &R1,
2059                         const SmallPtrSetImpl<Instruction *> &R2,
2060                         InstCombiner::BuilderTy &B, InstCombinerImpl *IC) {
2061 
2062   B.SetInsertPoint(X);
2063 
2064   // Have an instruction that is representative of all of instructions in R1 and
2065   // get the most common fpmath metadata and fast-math flags on it.
2066   Value *SqrtOp = CI->getArgOperand(0);
2067   auto *FDiv = cast<Instruction>(
2068       B.CreateFDiv(ConstantFP::get(X->getType(), 1.0), SqrtOp));
2069   auto *R1FPMathMDNode = (*R1.begin())->getMetadata(LLVMContext::MD_fpmath);
2070   FastMathFlags R1FMF = (*R1.begin())->getFastMathFlags(); // Common FMF
2071   for (Instruction *I : R1) {
2072     R1FPMathMDNode = MDNode::getMostGenericFPMath(
2073         R1FPMathMDNode, I->getMetadata(LLVMContext::MD_fpmath));
2074     R1FMF &= I->getFastMathFlags();
2075     IC->replaceInstUsesWith(*I, FDiv);
2076     IC->eraseInstFromFunction(*I);
2077   }
2078   FDiv->setMetadata(LLVMContext::MD_fpmath, R1FPMathMDNode);
2079   FDiv->copyFastMathFlags(R1FMF);
2080 
2081   // Have a single sqrt call instruction that is representative of all of
2082   // instructions in R2 and get the most common fpmath metadata and fast-math
2083   // flags on it.
2084   auto *FSqrt = cast<CallInst>(CI->clone());
2085   FSqrt->insertBefore(CI->getIterator());
2086   auto *R2FPMathMDNode = (*R2.begin())->getMetadata(LLVMContext::MD_fpmath);
2087   FastMathFlags R2FMF = (*R2.begin())->getFastMathFlags(); // Common FMF
2088   for (Instruction *I : R2) {
2089     R2FPMathMDNode = MDNode::getMostGenericFPMath(
2090         R2FPMathMDNode, I->getMetadata(LLVMContext::MD_fpmath));
2091     R2FMF &= I->getFastMathFlags();
2092     IC->replaceInstUsesWith(*I, FSqrt);
2093     IC->eraseInstFromFunction(*I);
2094   }
2095   FSqrt->setMetadata(LLVMContext::MD_fpmath, R2FPMathMDNode);
2096   FSqrt->copyFastMathFlags(R2FMF);
2097 
2098   Instruction *FMul;
2099   // If X = -1/sqrt(a) initially,then FMul = -(FDiv * FSqrt)
2100   if (match(X, m_FDiv(m_SpecificFP(-1.0), m_Specific(CI)))) {
2101     Value *Mul = B.CreateFMul(FDiv, FSqrt);
2102     FMul = cast<Instruction>(B.CreateFNeg(Mul));
2103   } else
2104     FMul = cast<Instruction>(B.CreateFMul(FDiv, FSqrt));
2105   FMul->copyMetadata(*X);
2106   FMul->copyFastMathFlags(FastMathFlags::intersectRewrite(R1FMF, R2FMF) |
2107                           FastMathFlags::unionValue(R1FMF, R2FMF));
2108   return IC->replaceInstUsesWith(*X, FMul);
2109 }
2110 
visitFDiv(BinaryOperator & I)2111 Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) {
2112   Module *M = I.getModule();
2113 
2114   if (Value *V = simplifyFDivInst(I.getOperand(0), I.getOperand(1),
2115                                   I.getFastMathFlags(),
2116                                   SQ.getWithInstruction(&I)))
2117     return replaceInstUsesWith(I, V);
2118 
2119   if (Instruction *X = foldVectorBinop(I))
2120     return X;
2121 
2122   if (Instruction *Phi = foldBinopWithPhiOperands(I))
2123     return Phi;
2124 
2125   if (Instruction *R = foldFDivConstantDivisor(I))
2126     return R;
2127 
2128   if (Instruction *R = foldFDivConstantDividend(I))
2129     return R;
2130 
2131   if (Instruction *R = foldFPSignBitOps(I))
2132     return R;
2133 
2134   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
2135 
2136   // Convert
2137   // x = 1.0/sqrt(a)
2138   // r1 = x * x;
2139   // r2 = a/sqrt(a);
2140   //
2141   // TO
2142   //
2143   // r1 = 1/a
2144   // r2 = sqrt(a)
2145   // x = r1 * r2
2146   SmallPtrSet<Instruction *, 2> R1, R2;
2147   if (isFSqrtDivToFMulLegal(&I, R1, R2)) {
2148     CallInst *CI = cast<CallInst>(I.getOperand(1));
2149     if (Instruction *D = convertFSqrtDivIntoFMul(CI, &I, R1, R2, Builder, this))
2150       return D;
2151   }
2152 
2153   if (isa<Constant>(Op0))
2154     if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
2155       if (Instruction *R = FoldOpIntoSelect(I, SI))
2156         return R;
2157 
2158   if (isa<Constant>(Op1))
2159     if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
2160       if (Instruction *R = FoldOpIntoSelect(I, SI))
2161         return R;
2162 
2163   if (I.hasAllowReassoc() && I.hasAllowReciprocal()) {
2164     Value *X, *Y;
2165     if (match(Op0, m_OneUse(m_FDiv(m_Value(X), m_Value(Y)))) &&
2166         (!isa<Constant>(Y) || !isa<Constant>(Op1))) {
2167       // (X / Y) / Z => X / (Y * Z)
2168       Value *YZ = Builder.CreateFMulFMF(Y, Op1, &I);
2169       return BinaryOperator::CreateFDivFMF(X, YZ, &I);
2170     }
2171     if (match(Op1, m_OneUse(m_FDiv(m_Value(X), m_Value(Y)))) &&
2172         (!isa<Constant>(Y) || !isa<Constant>(Op0))) {
2173       // Z / (X / Y) => (Y * Z) / X
2174       Value *YZ = Builder.CreateFMulFMF(Y, Op0, &I);
2175       return BinaryOperator::CreateFDivFMF(YZ, X, &I);
2176     }
2177     // Z / (1.0 / Y) => (Y * Z)
2178     //
2179     // This is a special case of Z / (X / Y) => (Y * Z) / X, with X = 1.0. The
2180     // m_OneUse check is avoided because even in the case of the multiple uses
2181     // for 1.0/Y, the number of instructions remain the same and a division is
2182     // replaced by a multiplication.
2183     if (match(Op1, m_FDiv(m_SpecificFP(1.0), m_Value(Y))))
2184       return BinaryOperator::CreateFMulFMF(Y, Op0, &I);
2185   }
2186 
2187   if (I.hasAllowReassoc() && Op0->hasOneUse() && Op1->hasOneUse()) {
2188     // sin(X) / cos(X) -> tan(X)
2189     // cos(X) / sin(X) -> 1/tan(X) (cotangent)
2190     Value *X;
2191     bool IsTan = match(Op0, m_Intrinsic<Intrinsic::sin>(m_Value(X))) &&
2192                  match(Op1, m_Intrinsic<Intrinsic::cos>(m_Specific(X)));
2193     bool IsCot =
2194         !IsTan && match(Op0, m_Intrinsic<Intrinsic::cos>(m_Value(X))) &&
2195                   match(Op1, m_Intrinsic<Intrinsic::sin>(m_Specific(X)));
2196 
2197     if ((IsTan || IsCot) && hasFloatFn(M, &TLI, I.getType(), LibFunc_tan,
2198                                        LibFunc_tanf, LibFunc_tanl)) {
2199       IRBuilder<> B(&I);
2200       IRBuilder<>::FastMathFlagGuard FMFGuard(B);
2201       B.setFastMathFlags(I.getFastMathFlags());
2202       AttributeList Attrs =
2203           cast<CallBase>(Op0)->getCalledFunction()->getAttributes();
2204       Value *Res = emitUnaryFloatFnCall(X, &TLI, LibFunc_tan, LibFunc_tanf,
2205                                         LibFunc_tanl, B, Attrs);
2206       if (IsCot)
2207         Res = B.CreateFDiv(ConstantFP::get(I.getType(), 1.0), Res);
2208       return replaceInstUsesWith(I, Res);
2209     }
2210   }
2211 
2212   // X / (X * Y) --> 1.0 / Y
2213   // Reassociate to (X / X -> 1.0) is legal when NaNs are not allowed.
2214   // We can ignore the possibility that X is infinity because INF/INF is NaN.
2215   Value *X, *Y;
2216   if (I.hasNoNaNs() && I.hasAllowReassoc() &&
2217       match(Op1, m_c_FMul(m_Specific(Op0), m_Value(Y)))) {
2218     replaceOperand(I, 0, ConstantFP::get(I.getType(), 1.0));
2219     replaceOperand(I, 1, Y);
2220     return &I;
2221   }
2222 
2223   // X / fabs(X) -> copysign(1.0, X)
2224   // fabs(X) / X -> copysign(1.0, X)
2225   if (I.hasNoNaNs() && I.hasNoInfs() &&
2226       (match(&I, m_FDiv(m_Value(X), m_FAbs(m_Deferred(X)))) ||
2227        match(&I, m_FDiv(m_FAbs(m_Value(X)), m_Deferred(X))))) {
2228     Value *V = Builder.CreateBinaryIntrinsic(
2229         Intrinsic::copysign, ConstantFP::get(I.getType(), 1.0), X, &I);
2230     return replaceInstUsesWith(I, V);
2231   }
2232 
2233   if (Instruction *Mul = foldFDivPowDivisor(I, Builder))
2234     return Mul;
2235 
2236   if (Instruction *Mul = foldFDivSqrtDivisor(I, Builder))
2237     return Mul;
2238 
2239   // pow(X, Y) / X --> pow(X, Y-1)
2240   if (I.hasAllowReassoc() &&
2241       match(Op0, m_OneUse(m_Intrinsic<Intrinsic::pow>(m_Specific(Op1),
2242                                                       m_Value(Y))))) {
2243     Value *Y1 =
2244         Builder.CreateFAddFMF(Y, ConstantFP::get(I.getType(), -1.0), &I);
2245     Value *Pow = Builder.CreateBinaryIntrinsic(Intrinsic::pow, Op1, Y1, &I);
2246     return replaceInstUsesWith(I, Pow);
2247   }
2248 
2249   if (Instruction *FoldedPowi = foldPowiReassoc(I))
2250     return FoldedPowi;
2251 
2252   return nullptr;
2253 }
2254 
2255 // Variety of transform for:
2256 //  (urem/srem (mul X, Y), (mul X, Z))
2257 //  (urem/srem (shl X, Y), (shl X, Z))
2258 //  (urem/srem (shl Y, X), (shl Z, X))
2259 // NB: The shift cases are really just extensions of the mul case. We treat
2260 // shift as Val * (1 << Amt).
simplifyIRemMulShl(BinaryOperator & I,InstCombinerImpl & IC)2261 static Instruction *simplifyIRemMulShl(BinaryOperator &I,
2262                                        InstCombinerImpl &IC) {
2263   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1), *X = nullptr;
2264   APInt Y, Z;
2265   bool ShiftByX = false;
2266 
2267   // If V is not nullptr, it will be matched using m_Specific.
2268   auto MatchShiftOrMulXC = [](Value *Op, Value *&V, APInt &C,
2269                               bool &PreserveNSW) -> bool {
2270     const APInt *Tmp = nullptr;
2271     if ((!V && match(Op, m_Mul(m_Value(V), m_APInt(Tmp)))) ||
2272         (V && match(Op, m_Mul(m_Specific(V), m_APInt(Tmp)))))
2273       C = *Tmp;
2274     else if ((!V && match(Op, m_Shl(m_Value(V), m_APInt(Tmp)))) ||
2275              (V && match(Op, m_Shl(m_Specific(V), m_APInt(Tmp))))) {
2276       C = APInt(Tmp->getBitWidth(), 1) << *Tmp;
2277       // We cannot preserve NSW when shifting by BW - 1.
2278       PreserveNSW = Tmp->ult(Tmp->getBitWidth() - 1);
2279     }
2280     if (Tmp != nullptr)
2281       return true;
2282 
2283     // Reset `V` so we don't start with specific value on next match attempt.
2284     V = nullptr;
2285     return false;
2286   };
2287 
2288   auto MatchShiftCX = [](Value *Op, APInt &C, Value *&V) -> bool {
2289     const APInt *Tmp = nullptr;
2290     if ((!V && match(Op, m_Shl(m_APInt(Tmp), m_Value(V)))) ||
2291         (V && match(Op, m_Shl(m_APInt(Tmp), m_Specific(V))))) {
2292       C = *Tmp;
2293       return true;
2294     }
2295 
2296     // Reset `V` so we don't start with specific value on next match attempt.
2297     V = nullptr;
2298     return false;
2299   };
2300 
2301   bool Op0PreserveNSW = true, Op1PreserveNSW = true;
2302   if (MatchShiftOrMulXC(Op0, X, Y, Op0PreserveNSW) &&
2303       MatchShiftOrMulXC(Op1, X, Z, Op1PreserveNSW)) {
2304     // pass
2305   } else if (MatchShiftCX(Op0, Y, X) && MatchShiftCX(Op1, Z, X)) {
2306     ShiftByX = true;
2307   } else {
2308     return nullptr;
2309   }
2310 
2311   bool IsSRem = I.getOpcode() == Instruction::SRem;
2312 
2313   OverflowingBinaryOperator *BO0 = cast<OverflowingBinaryOperator>(Op0);
2314   // TODO: We may be able to deduce more about nsw/nuw of BO0/BO1 based on Y >=
2315   // Z or Z >= Y.
2316   bool BO0HasNSW = Op0PreserveNSW && BO0->hasNoSignedWrap();
2317   bool BO0HasNUW = BO0->hasNoUnsignedWrap();
2318   bool BO0NoWrap = IsSRem ? BO0HasNSW : BO0HasNUW;
2319 
2320   APInt RemYZ = IsSRem ? Y.srem(Z) : Y.urem(Z);
2321   // (rem (mul nuw/nsw X, Y), (mul X, Z))
2322   //      if (rem Y, Z) == 0
2323   //          -> 0
2324   if (RemYZ.isZero() && BO0NoWrap)
2325     return IC.replaceInstUsesWith(I, ConstantInt::getNullValue(I.getType()));
2326 
2327   // Helper function to emit either (RemSimplificationC << X) or
2328   // (RemSimplificationC * X) depending on whether we matched Op0/Op1 as
2329   // (shl V, X) or (mul V, X) respectively.
2330   auto CreateMulOrShift =
2331       [&](const APInt &RemSimplificationC) -> BinaryOperator * {
2332     Value *RemSimplification =
2333         ConstantInt::get(I.getType(), RemSimplificationC);
2334     return ShiftByX ? BinaryOperator::CreateShl(RemSimplification, X)
2335                     : BinaryOperator::CreateMul(X, RemSimplification);
2336   };
2337 
2338   OverflowingBinaryOperator *BO1 = cast<OverflowingBinaryOperator>(Op1);
2339   bool BO1HasNSW = Op1PreserveNSW && BO1->hasNoSignedWrap();
2340   bool BO1HasNUW = BO1->hasNoUnsignedWrap();
2341   bool BO1NoWrap = IsSRem ? BO1HasNSW : BO1HasNUW;
2342   // (rem (mul X, Y), (mul nuw/nsw X, Z))
2343   //      if (rem Y, Z) == Y
2344   //          -> (mul nuw/nsw X, Y)
2345   if (RemYZ == Y && BO1NoWrap) {
2346     BinaryOperator *BO = CreateMulOrShift(Y);
2347     // Copy any overflow flags from Op0.
2348     BO->setHasNoSignedWrap(IsSRem || BO0HasNSW);
2349     BO->setHasNoUnsignedWrap(!IsSRem || BO0HasNUW);
2350     return BO;
2351   }
2352 
2353   // (rem (mul nuw/nsw X, Y), (mul {nsw} X, Z))
2354   //      if Y >= Z
2355   //          -> (mul {nuw} nsw X, (rem Y, Z))
2356   if (Y.uge(Z) && (IsSRem ? (BO0HasNSW && BO1HasNSW) : BO0HasNUW)) {
2357     BinaryOperator *BO = CreateMulOrShift(RemYZ);
2358     BO->setHasNoSignedWrap();
2359     BO->setHasNoUnsignedWrap(BO0HasNUW);
2360     return BO;
2361   }
2362 
2363   return nullptr;
2364 }
2365 
2366 /// This function implements the transforms common to both integer remainder
2367 /// instructions (urem and srem). It is called by the visitors to those integer
2368 /// remainder instructions.
2369 /// Common integer remainder transforms
commonIRemTransforms(BinaryOperator & I)2370 Instruction *InstCombinerImpl::commonIRemTransforms(BinaryOperator &I) {
2371   if (Instruction *Res = commonIDivRemTransforms(I))
2372     return Res;
2373 
2374   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
2375 
2376   if (isa<Constant>(Op1)) {
2377     if (Instruction *Op0I = dyn_cast<Instruction>(Op0)) {
2378       if (SelectInst *SI = dyn_cast<SelectInst>(Op0I)) {
2379         if (Instruction *R = FoldOpIntoSelect(I, SI))
2380           return R;
2381       } else if (auto *PN = dyn_cast<PHINode>(Op0I)) {
2382         const APInt *Op1Int;
2383         if (match(Op1, m_APInt(Op1Int)) && !Op1Int->isMinValue() &&
2384             (I.getOpcode() == Instruction::URem ||
2385              !Op1Int->isMinSignedValue())) {
2386           // foldOpIntoPhi will speculate instructions to the end of the PHI's
2387           // predecessor blocks, so do this only if we know the srem or urem
2388           // will not fault.
2389           if (Instruction *NV = foldOpIntoPhi(I, PN))
2390             return NV;
2391         }
2392       }
2393 
2394       // See if we can fold away this rem instruction.
2395       if (SimplifyDemandedInstructionBits(I))
2396         return &I;
2397     }
2398   }
2399 
2400   if (Instruction *R = simplifyIRemMulShl(I, *this))
2401     return R;
2402 
2403   return nullptr;
2404 }
2405 
visitURem(BinaryOperator & I)2406 Instruction *InstCombinerImpl::visitURem(BinaryOperator &I) {
2407   if (Value *V = simplifyURemInst(I.getOperand(0), I.getOperand(1),
2408                                   SQ.getWithInstruction(&I)))
2409     return replaceInstUsesWith(I, V);
2410 
2411   if (Instruction *X = foldVectorBinop(I))
2412     return X;
2413 
2414   if (Instruction *common = commonIRemTransforms(I))
2415     return common;
2416 
2417   if (Instruction *NarrowRem = narrowUDivURem(I, *this))
2418     return NarrowRem;
2419 
2420   // X urem Y -> X and Y-1, where Y is a power of 2,
2421   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
2422   Type *Ty = I.getType();
2423   if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, &I)) {
2424     // This may increase instruction count, we don't enforce that Y is a
2425     // constant.
2426     Constant *N1 = Constant::getAllOnesValue(Ty);
2427     Value *Add = Builder.CreateAdd(Op1, N1);
2428     return BinaryOperator::CreateAnd(Op0, Add);
2429   }
2430 
2431   // 1 urem X -> zext(X != 1)
2432   if (match(Op0, m_One())) {
2433     Value *Cmp = Builder.CreateICmpNE(Op1, ConstantInt::get(Ty, 1));
2434     return CastInst::CreateZExtOrBitCast(Cmp, Ty);
2435   }
2436 
2437   // Op0 urem C -> Op0 < C ? Op0 : Op0 - C, where C >= signbit.
2438   // Op0 must be frozen because we are increasing its number of uses.
2439   if (match(Op1, m_Negative())) {
2440     Value *F0 = Op0;
2441     if (!isGuaranteedNotToBeUndef(Op0))
2442       F0 = Builder.CreateFreeze(Op0, Op0->getName() + ".fr");
2443     Value *Cmp = Builder.CreateICmpULT(F0, Op1);
2444     Value *Sub = Builder.CreateSub(F0, Op1);
2445     return SelectInst::Create(Cmp, F0, Sub);
2446   }
2447 
2448   // If the divisor is a sext of a boolean, then the divisor must be max
2449   // unsigned value (-1). Therefore, the remainder is Op0 unless Op0 is also
2450   // max unsigned value. In that case, the remainder is 0:
2451   // urem Op0, (sext i1 X) --> (Op0 == -1) ? 0 : Op0
2452   Value *X;
2453   if (match(Op1, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) {
2454     Value *FrozenOp0 = Op0;
2455     if (!isGuaranteedNotToBeUndef(Op0))
2456       FrozenOp0 = Builder.CreateFreeze(Op0, Op0->getName() + ".frozen");
2457     Value *Cmp =
2458         Builder.CreateICmpEQ(FrozenOp0, ConstantInt::getAllOnesValue(Ty));
2459     return SelectInst::Create(Cmp, ConstantInt::getNullValue(Ty), FrozenOp0);
2460   }
2461 
2462   // For "(X + 1) % Op1" and if (X u< Op1) => (X + 1) == Op1 ? 0 : X + 1 .
2463   if (match(Op0, m_Add(m_Value(X), m_One()))) {
2464     Value *Val =
2465         simplifyICmpInst(ICmpInst::ICMP_ULT, X, Op1, SQ.getWithInstruction(&I));
2466     if (Val && match(Val, m_One())) {
2467       Value *FrozenOp0 = Op0;
2468       if (!isGuaranteedNotToBeUndef(Op0))
2469         FrozenOp0 = Builder.CreateFreeze(Op0, Op0->getName() + ".frozen");
2470       Value *Cmp = Builder.CreateICmpEQ(FrozenOp0, Op1);
2471       return SelectInst::Create(Cmp, ConstantInt::getNullValue(Ty), FrozenOp0);
2472     }
2473   }
2474 
2475   return nullptr;
2476 }
2477 
visitSRem(BinaryOperator & I)2478 Instruction *InstCombinerImpl::visitSRem(BinaryOperator &I) {
2479   if (Value *V = simplifySRemInst(I.getOperand(0), I.getOperand(1),
2480                                   SQ.getWithInstruction(&I)))
2481     return replaceInstUsesWith(I, V);
2482 
2483   if (Instruction *X = foldVectorBinop(I))
2484     return X;
2485 
2486   // Handle the integer rem common cases
2487   if (Instruction *Common = commonIRemTransforms(I))
2488     return Common;
2489 
2490   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
2491   {
2492     const APInt *Y;
2493     // X % -Y -> X % Y
2494     if (match(Op1, m_Negative(Y)) && !Y->isMinSignedValue())
2495       return replaceOperand(I, 1, ConstantInt::get(I.getType(), -*Y));
2496   }
2497 
2498   // -X srem Y --> -(X srem Y)
2499   Value *X, *Y;
2500   if (match(&I, m_SRem(m_OneUse(m_NSWNeg(m_Value(X))), m_Value(Y))))
2501     return BinaryOperator::CreateNSWNeg(Builder.CreateSRem(X, Y));
2502 
2503   // If the sign bits of both operands are zero (i.e. we can prove they are
2504   // unsigned inputs), turn this into a urem.
2505   APInt Mask(APInt::getSignMask(I.getType()->getScalarSizeInBits()));
2506   if (MaskedValueIsZero(Op1, Mask, &I) && MaskedValueIsZero(Op0, Mask, &I)) {
2507     // X srem Y -> X urem Y, iff X and Y don't have sign bit set
2508     return BinaryOperator::CreateURem(Op0, Op1, I.getName());
2509   }
2510 
2511   // If it's a constant vector, flip any negative values positive.
2512   if (isa<ConstantVector>(Op1) || isa<ConstantDataVector>(Op1)) {
2513     Constant *C = cast<Constant>(Op1);
2514     unsigned VWidth = cast<FixedVectorType>(C->getType())->getNumElements();
2515 
2516     bool hasNegative = false;
2517     bool hasMissing = false;
2518     for (unsigned i = 0; i != VWidth; ++i) {
2519       Constant *Elt = C->getAggregateElement(i);
2520       if (!Elt) {
2521         hasMissing = true;
2522         break;
2523       }
2524 
2525       if (ConstantInt *RHS = dyn_cast<ConstantInt>(Elt))
2526         if (RHS->isNegative())
2527           hasNegative = true;
2528     }
2529 
2530     if (hasNegative && !hasMissing) {
2531       SmallVector<Constant *, 16> Elts(VWidth);
2532       for (unsigned i = 0; i != VWidth; ++i) {
2533         Elts[i] = C->getAggregateElement(i);  // Handle undef, etc.
2534         if (ConstantInt *RHS = dyn_cast<ConstantInt>(Elts[i])) {
2535           if (RHS->isNegative())
2536             Elts[i] = cast<ConstantInt>(ConstantExpr::getNeg(RHS));
2537         }
2538       }
2539 
2540       Constant *NewRHSV = ConstantVector::get(Elts);
2541       if (NewRHSV != C)  // Don't loop on -MININT
2542         return replaceOperand(I, 1, NewRHSV);
2543     }
2544   }
2545 
2546   return nullptr;
2547 }
2548 
visitFRem(BinaryOperator & I)2549 Instruction *InstCombinerImpl::visitFRem(BinaryOperator &I) {
2550   if (Value *V = simplifyFRemInst(I.getOperand(0), I.getOperand(1),
2551                                   I.getFastMathFlags(),
2552                                   SQ.getWithInstruction(&I)))
2553     return replaceInstUsesWith(I, V);
2554 
2555   if (Instruction *X = foldVectorBinop(I))
2556     return X;
2557 
2558   if (Instruction *Phi = foldBinopWithPhiOperands(I))
2559     return Phi;
2560 
2561   return nullptr;
2562 }
2563