1 //===- InstCombineSimplifyDemanded.cpp ------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains logic for simplifying instructions based on information 10 // about how they are used. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "InstCombineInternal.h" 15 #include "llvm/Analysis/ValueTracking.h" 16 #include "llvm/IR/GetElementPtrTypeIterator.h" 17 #include "llvm/IR/IntrinsicInst.h" 18 #include "llvm/IR/PatternMatch.h" 19 #include "llvm/Support/KnownBits.h" 20 #include "llvm/Transforms/InstCombine/InstCombiner.h" 21 22 using namespace llvm; 23 using namespace llvm::PatternMatch; 24 25 #define DEBUG_TYPE "instcombine" 26 27 /// Check to see if the specified operand of the specified instruction is a 28 /// constant integer. If so, check to see if there are any bits set in the 29 /// constant that are not demanded. If so, shrink the constant and return true. 30 static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, 31 const APInt &Demanded) { 32 assert(I && "No instruction?"); 33 assert(OpNo < I->getNumOperands() && "Operand index too large"); 34 35 // The operand must be a constant integer or splat integer. 36 Value *Op = I->getOperand(OpNo); 37 const APInt *C; 38 if (!match(Op, m_APInt(C))) 39 return false; 40 41 // If there are no bits set that aren't demanded, nothing to do. 42 if (C->isSubsetOf(Demanded)) 43 return false; 44 45 // This instruction is producing bits that are not demanded. Shrink the RHS. 46 I->setOperand(OpNo, ConstantInt::get(Op->getType(), *C & Demanded)); 47 48 return true; 49 } 50 51 52 53 /// Inst is an integer instruction that SimplifyDemandedBits knows about. See if 54 /// the instruction has any properties that allow us to simplify its operands. 55 bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst) { 56 unsigned BitWidth = Inst.getType()->getScalarSizeInBits(); 57 KnownBits Known(BitWidth); 58 APInt DemandedMask(APInt::getAllOnes(BitWidth)); 59 60 Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, Known, 61 0, &Inst); 62 if (!V) return false; 63 if (V == &Inst) return true; 64 replaceInstUsesWith(Inst, V); 65 return true; 66 } 67 68 /// This form of SimplifyDemandedBits simplifies the specified instruction 69 /// operand if possible, updating it in place. It returns true if it made any 70 /// change and false otherwise. 71 bool InstCombinerImpl::SimplifyDemandedBits(Instruction *I, unsigned OpNo, 72 const APInt &DemandedMask, 73 KnownBits &Known, unsigned Depth) { 74 Use &U = I->getOperandUse(OpNo); 75 Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, Known, 76 Depth, I); 77 if (!NewVal) return false; 78 if (Instruction* OpInst = dyn_cast<Instruction>(U)) 79 salvageDebugInfo(*OpInst); 80 81 replaceUse(U, NewVal); 82 return true; 83 } 84 85 /// This function attempts to replace V with a simpler value based on the 86 /// demanded bits. When this function is called, it is known that only the bits 87 /// set in DemandedMask of the result of V are ever used downstream. 88 /// Consequently, depending on the mask and V, it may be possible to replace V 89 /// with a constant or one of its operands. In such cases, this function does 90 /// the replacement and returns true. In all other cases, it returns false after 91 /// analyzing the expression and setting KnownOne and known to be one in the 92 /// expression. Known.Zero contains all the bits that are known to be zero in 93 /// the expression. These are provided to potentially allow the caller (which 94 /// might recursively be SimplifyDemandedBits itself) to simplify the 95 /// expression. 96 /// Known.One and Known.Zero always follow the invariant that: 97 /// Known.One & Known.Zero == 0. 98 /// That is, a bit can't be both 1 and 0. Note that the bits in Known.One and 99 /// Known.Zero may only be accurate for those bits set in DemandedMask. Note 100 /// also that the bitwidth of V, DemandedMask, Known.Zero and Known.One must all 101 /// be the same. 102 /// 103 /// This returns null if it did not change anything and it permits no 104 /// simplification. This returns V itself if it did some simplification of V's 105 /// operands based on the information about what bits are demanded. This returns 106 /// some other non-null value if it found out that V is equal to another value 107 /// in the context where the specified bits are demanded, but not for all users. 108 Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, 109 KnownBits &Known, 110 unsigned Depth, 111 Instruction *CxtI) { 112 assert(V != nullptr && "Null pointer of Value???"); 113 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); 114 uint32_t BitWidth = DemandedMask.getBitWidth(); 115 Type *VTy = V->getType(); 116 assert( 117 (!VTy->isIntOrIntVectorTy() || VTy->getScalarSizeInBits() == BitWidth) && 118 Known.getBitWidth() == BitWidth && 119 "Value *V, DemandedMask and Known must have same BitWidth"); 120 121 if (isa<Constant>(V)) { 122 computeKnownBits(V, Known, Depth, CxtI); 123 return nullptr; 124 } 125 126 Known.resetAll(); 127 if (DemandedMask.isZero()) // Not demanding any bits from V. 128 return UndefValue::get(VTy); 129 130 if (Depth == MaxAnalysisRecursionDepth) 131 return nullptr; 132 133 Instruction *I = dyn_cast<Instruction>(V); 134 if (!I) { 135 computeKnownBits(V, Known, Depth, CxtI); 136 return nullptr; // Only analyze instructions. 137 } 138 139 // If there are multiple uses of this value and we aren't at the root, then 140 // we can't do any simplifications of the operands, because DemandedMask 141 // only reflects the bits demanded by *one* of the users. 142 if (Depth != 0 && !I->hasOneUse()) 143 return SimplifyMultipleUseDemandedBits(I, DemandedMask, Known, Depth, CxtI); 144 145 KnownBits LHSKnown(BitWidth), RHSKnown(BitWidth); 146 147 // If this is the root being simplified, allow it to have multiple uses, 148 // just set the DemandedMask to all bits so that we can try to simplify the 149 // operands. This allows visitTruncInst (for example) to simplify the 150 // operand of a trunc without duplicating all the logic below. 151 if (Depth == 0 && !V->hasOneUse()) 152 DemandedMask.setAllBits(); 153 154 // Update flags after simplifying an operand based on the fact that some high 155 // order bits are not demanded. 156 auto disableWrapFlagsBasedOnUnusedHighBits = [](Instruction *I, 157 unsigned NLZ) { 158 if (NLZ > 0) { 159 // Disable the nsw and nuw flags here: We can no longer guarantee that 160 // we won't wrap after simplification. Removing the nsw/nuw flags is 161 // legal here because the top bit is not demanded. 162 I->setHasNoSignedWrap(false); 163 I->setHasNoUnsignedWrap(false); 164 } 165 return I; 166 }; 167 168 // If the high-bits of an ADD/SUB/MUL are not demanded, then we do not care 169 // about the high bits of the operands. 170 auto simplifyOperandsBasedOnUnusedHighBits = [&](APInt &DemandedFromOps) { 171 unsigned NLZ = DemandedMask.countl_zero(); 172 // Right fill the mask of bits for the operands to demand the most 173 // significant bit and all those below it. 174 DemandedFromOps = APInt::getLowBitsSet(BitWidth, BitWidth - NLZ); 175 if (ShrinkDemandedConstant(I, 0, DemandedFromOps) || 176 SimplifyDemandedBits(I, 0, DemandedFromOps, LHSKnown, Depth + 1) || 177 ShrinkDemandedConstant(I, 1, DemandedFromOps) || 178 SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnown, Depth + 1)) { 179 disableWrapFlagsBasedOnUnusedHighBits(I, NLZ); 180 return true; 181 } 182 return false; 183 }; 184 185 switch (I->getOpcode()) { 186 default: 187 computeKnownBits(I, Known, Depth, CxtI); 188 break; 189 case Instruction::And: { 190 // If either the LHS or the RHS are Zero, the result is zero. 191 if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) || 192 SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.Zero, LHSKnown, 193 Depth + 1)) 194 return I; 195 assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?"); 196 assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?"); 197 198 Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown, 199 Depth, DL, &AC, CxtI, &DT); 200 201 // If the client is only demanding bits that we know, return the known 202 // constant. 203 if (DemandedMask.isSubsetOf(Known.Zero | Known.One)) 204 return Constant::getIntegerValue(VTy, Known.One); 205 206 // If all of the demanded bits are known 1 on one side, return the other. 207 // These bits cannot contribute to the result of the 'and'. 208 if (DemandedMask.isSubsetOf(LHSKnown.Zero | RHSKnown.One)) 209 return I->getOperand(0); 210 if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.One)) 211 return I->getOperand(1); 212 213 // If the RHS is a constant, see if we can simplify it. 214 if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnown.Zero)) 215 return I; 216 217 break; 218 } 219 case Instruction::Or: { 220 // If either the LHS or the RHS are One, the result is One. 221 if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) || 222 SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.One, LHSKnown, 223 Depth + 1)) 224 return I; 225 assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?"); 226 assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?"); 227 228 Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown, 229 Depth, DL, &AC, CxtI, &DT); 230 231 // If the client is only demanding bits that we know, return the known 232 // constant. 233 if (DemandedMask.isSubsetOf(Known.Zero | Known.One)) 234 return Constant::getIntegerValue(VTy, Known.One); 235 236 // If all of the demanded bits are known zero on one side, return the other. 237 // These bits cannot contribute to the result of the 'or'. 238 if (DemandedMask.isSubsetOf(LHSKnown.One | RHSKnown.Zero)) 239 return I->getOperand(0); 240 if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero)) 241 return I->getOperand(1); 242 243 // If the RHS is a constant, see if we can simplify it. 244 if (ShrinkDemandedConstant(I, 1, DemandedMask)) 245 return I; 246 247 break; 248 } 249 case Instruction::Xor: { 250 if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) || 251 SimplifyDemandedBits(I, 0, DemandedMask, LHSKnown, Depth + 1)) 252 return I; 253 Value *LHS, *RHS; 254 if (DemandedMask == 1 && 255 match(I->getOperand(0), m_Intrinsic<Intrinsic::ctpop>(m_Value(LHS))) && 256 match(I->getOperand(1), m_Intrinsic<Intrinsic::ctpop>(m_Value(RHS)))) { 257 // (ctpop(X) ^ ctpop(Y)) & 1 --> ctpop(X^Y) & 1 258 IRBuilderBase::InsertPointGuard Guard(Builder); 259 Builder.SetInsertPoint(I); 260 auto *Xor = Builder.CreateXor(LHS, RHS); 261 return Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, Xor); 262 } 263 264 assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?"); 265 assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?"); 266 267 Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown, 268 Depth, DL, &AC, CxtI, &DT); 269 270 // If the client is only demanding bits that we know, return the known 271 // constant. 272 if (DemandedMask.isSubsetOf(Known.Zero | Known.One)) 273 return Constant::getIntegerValue(VTy, Known.One); 274 275 // If all of the demanded bits are known zero on one side, return the other. 276 // These bits cannot contribute to the result of the 'xor'. 277 if (DemandedMask.isSubsetOf(RHSKnown.Zero)) 278 return I->getOperand(0); 279 if (DemandedMask.isSubsetOf(LHSKnown.Zero)) 280 return I->getOperand(1); 281 282 // If all of the demanded bits are known to be zero on one side or the 283 // other, turn this into an *inclusive* or. 284 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 285 if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.Zero)) { 286 Instruction *Or = 287 BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1), 288 I->getName()); 289 return InsertNewInstWith(Or, *I); 290 } 291 292 // If all of the demanded bits on one side are known, and all of the set 293 // bits on that side are also known to be set on the other side, turn this 294 // into an AND, as we know the bits will be cleared. 295 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 296 if (DemandedMask.isSubsetOf(RHSKnown.Zero|RHSKnown.One) && 297 RHSKnown.One.isSubsetOf(LHSKnown.One)) { 298 Constant *AndC = Constant::getIntegerValue(VTy, 299 ~RHSKnown.One & DemandedMask); 300 Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC); 301 return InsertNewInstWith(And, *I); 302 } 303 304 // If the RHS is a constant, see if we can change it. Don't alter a -1 305 // constant because that's a canonical 'not' op, and that is better for 306 // combining, SCEV, and codegen. 307 const APInt *C; 308 if (match(I->getOperand(1), m_APInt(C)) && !C->isAllOnes()) { 309 if ((*C | ~DemandedMask).isAllOnes()) { 310 // Force bits to 1 to create a 'not' op. 311 I->setOperand(1, ConstantInt::getAllOnesValue(VTy)); 312 return I; 313 } 314 // If we can't turn this into a 'not', try to shrink the constant. 315 if (ShrinkDemandedConstant(I, 1, DemandedMask)) 316 return I; 317 } 318 319 // If our LHS is an 'and' and if it has one use, and if any of the bits we 320 // are flipping are known to be set, then the xor is just resetting those 321 // bits to zero. We can just knock out bits from the 'and' and the 'xor', 322 // simplifying both of them. 323 if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0))) { 324 ConstantInt *AndRHS, *XorRHS; 325 if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() && 326 match(I->getOperand(1), m_ConstantInt(XorRHS)) && 327 match(LHSInst->getOperand(1), m_ConstantInt(AndRHS)) && 328 (LHSKnown.One & RHSKnown.One & DemandedMask) != 0) { 329 APInt NewMask = ~(LHSKnown.One & RHSKnown.One & DemandedMask); 330 331 Constant *AndC = ConstantInt::get(VTy, NewMask & AndRHS->getValue()); 332 Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC); 333 InsertNewInstWith(NewAnd, *I); 334 335 Constant *XorC = ConstantInt::get(VTy, NewMask & XorRHS->getValue()); 336 Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC); 337 return InsertNewInstWith(NewXor, *I); 338 } 339 } 340 break; 341 } 342 case Instruction::Select: { 343 if (SimplifyDemandedBits(I, 2, DemandedMask, RHSKnown, Depth + 1) || 344 SimplifyDemandedBits(I, 1, DemandedMask, LHSKnown, Depth + 1)) 345 return I; 346 assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?"); 347 assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?"); 348 349 // If the operands are constants, see if we can simplify them. 350 // This is similar to ShrinkDemandedConstant, but for a select we want to 351 // try to keep the selected constants the same as icmp value constants, if 352 // we can. This helps not break apart (or helps put back together) 353 // canonical patterns like min and max. 354 auto CanonicalizeSelectConstant = [](Instruction *I, unsigned OpNo, 355 const APInt &DemandedMask) { 356 const APInt *SelC; 357 if (!match(I->getOperand(OpNo), m_APInt(SelC))) 358 return false; 359 360 // Get the constant out of the ICmp, if there is one. 361 // Only try this when exactly 1 operand is a constant (if both operands 362 // are constant, the icmp should eventually simplify). Otherwise, we may 363 // invert the transform that reduces set bits and infinite-loop. 364 Value *X; 365 const APInt *CmpC; 366 ICmpInst::Predicate Pred; 367 if (!match(I->getOperand(0), m_ICmp(Pred, m_Value(X), m_APInt(CmpC))) || 368 isa<Constant>(X) || CmpC->getBitWidth() != SelC->getBitWidth()) 369 return ShrinkDemandedConstant(I, OpNo, DemandedMask); 370 371 // If the constant is already the same as the ICmp, leave it as-is. 372 if (*CmpC == *SelC) 373 return false; 374 // If the constants are not already the same, but can be with the demand 375 // mask, use the constant value from the ICmp. 376 if ((*CmpC & DemandedMask) == (*SelC & DemandedMask)) { 377 I->setOperand(OpNo, ConstantInt::get(I->getType(), *CmpC)); 378 return true; 379 } 380 return ShrinkDemandedConstant(I, OpNo, DemandedMask); 381 }; 382 if (CanonicalizeSelectConstant(I, 1, DemandedMask) || 383 CanonicalizeSelectConstant(I, 2, DemandedMask)) 384 return I; 385 386 // Only known if known in both the LHS and RHS. 387 Known = LHSKnown.intersectWith(RHSKnown); 388 break; 389 } 390 case Instruction::Trunc: { 391 // If we do not demand the high bits of a right-shifted and truncated value, 392 // then we may be able to truncate it before the shift. 393 Value *X; 394 const APInt *C; 395 if (match(I->getOperand(0), m_OneUse(m_LShr(m_Value(X), m_APInt(C))))) { 396 // The shift amount must be valid (not poison) in the narrow type, and 397 // it must not be greater than the high bits demanded of the result. 398 if (C->ult(VTy->getScalarSizeInBits()) && 399 C->ule(DemandedMask.countl_zero())) { 400 // trunc (lshr X, C) --> lshr (trunc X), C 401 IRBuilderBase::InsertPointGuard Guard(Builder); 402 Builder.SetInsertPoint(I); 403 Value *Trunc = Builder.CreateTrunc(X, VTy); 404 return Builder.CreateLShr(Trunc, C->getZExtValue()); 405 } 406 } 407 } 408 [[fallthrough]]; 409 case Instruction::ZExt: { 410 unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits(); 411 412 APInt InputDemandedMask = DemandedMask.zextOrTrunc(SrcBitWidth); 413 KnownBits InputKnown(SrcBitWidth); 414 if (SimplifyDemandedBits(I, 0, InputDemandedMask, InputKnown, Depth + 1)) 415 return I; 416 assert(InputKnown.getBitWidth() == SrcBitWidth && "Src width changed?"); 417 Known = InputKnown.zextOrTrunc(BitWidth); 418 assert(!Known.hasConflict() && "Bits known to be one AND zero?"); 419 break; 420 } 421 case Instruction::BitCast: 422 if (!I->getOperand(0)->getType()->isIntOrIntVectorTy()) 423 return nullptr; // vector->int or fp->int? 424 425 if (auto *DstVTy = dyn_cast<VectorType>(VTy)) { 426 if (auto *SrcVTy = dyn_cast<VectorType>(I->getOperand(0)->getType())) { 427 if (isa<ScalableVectorType>(DstVTy) || 428 isa<ScalableVectorType>(SrcVTy) || 429 cast<FixedVectorType>(DstVTy)->getNumElements() != 430 cast<FixedVectorType>(SrcVTy)->getNumElements()) 431 // Don't touch a bitcast between vectors of different element counts. 432 return nullptr; 433 } else 434 // Don't touch a scalar-to-vector bitcast. 435 return nullptr; 436 } else if (I->getOperand(0)->getType()->isVectorTy()) 437 // Don't touch a vector-to-scalar bitcast. 438 return nullptr; 439 440 if (SimplifyDemandedBits(I, 0, DemandedMask, Known, Depth + 1)) 441 return I; 442 assert(!Known.hasConflict() && "Bits known to be one AND zero?"); 443 break; 444 case Instruction::SExt: { 445 // Compute the bits in the result that are not present in the input. 446 unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits(); 447 448 APInt InputDemandedBits = DemandedMask.trunc(SrcBitWidth); 449 450 // If any of the sign extended bits are demanded, we know that the sign 451 // bit is demanded. 452 if (DemandedMask.getActiveBits() > SrcBitWidth) 453 InputDemandedBits.setBit(SrcBitWidth-1); 454 455 KnownBits InputKnown(SrcBitWidth); 456 if (SimplifyDemandedBits(I, 0, InputDemandedBits, InputKnown, Depth + 1)) 457 return I; 458 459 // If the input sign bit is known zero, or if the NewBits are not demanded 460 // convert this into a zero extension. 461 if (InputKnown.isNonNegative() || 462 DemandedMask.getActiveBits() <= SrcBitWidth) { 463 // Convert to ZExt cast. 464 CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName()); 465 return InsertNewInstWith(NewCast, *I); 466 } 467 468 // If the sign bit of the input is known set or clear, then we know the 469 // top bits of the result. 470 Known = InputKnown.sext(BitWidth); 471 assert(!Known.hasConflict() && "Bits known to be one AND zero?"); 472 break; 473 } 474 case Instruction::Add: { 475 if ((DemandedMask & 1) == 0) { 476 // If we do not need the low bit, try to convert bool math to logic: 477 // add iN (zext i1 X), (sext i1 Y) --> sext (~X & Y) to iN 478 Value *X, *Y; 479 if (match(I, m_c_Add(m_OneUse(m_ZExt(m_Value(X))), 480 m_OneUse(m_SExt(m_Value(Y))))) && 481 X->getType()->isIntOrIntVectorTy(1) && X->getType() == Y->getType()) { 482 // Truth table for inputs and output signbits: 483 // X:0 | X:1 484 // ---------- 485 // Y:0 | 0 | 0 | 486 // Y:1 | -1 | 0 | 487 // ---------- 488 IRBuilderBase::InsertPointGuard Guard(Builder); 489 Builder.SetInsertPoint(I); 490 Value *AndNot = Builder.CreateAnd(Builder.CreateNot(X), Y); 491 return Builder.CreateSExt(AndNot, VTy); 492 } 493 494 // add iN (sext i1 X), (sext i1 Y) --> sext (X | Y) to iN 495 // TODO: Relax the one-use checks because we are removing an instruction? 496 if (match(I, m_Add(m_OneUse(m_SExt(m_Value(X))), 497 m_OneUse(m_SExt(m_Value(Y))))) && 498 X->getType()->isIntOrIntVectorTy(1) && X->getType() == Y->getType()) { 499 // Truth table for inputs and output signbits: 500 // X:0 | X:1 501 // ----------- 502 // Y:0 | -1 | -1 | 503 // Y:1 | -1 | 0 | 504 // ----------- 505 IRBuilderBase::InsertPointGuard Guard(Builder); 506 Builder.SetInsertPoint(I); 507 Value *Or = Builder.CreateOr(X, Y); 508 return Builder.CreateSExt(Or, VTy); 509 } 510 } 511 512 // Right fill the mask of bits for the operands to demand the most 513 // significant bit and all those below it. 514 unsigned NLZ = DemandedMask.countl_zero(); 515 APInt DemandedFromOps = APInt::getLowBitsSet(BitWidth, BitWidth - NLZ); 516 if (ShrinkDemandedConstant(I, 1, DemandedFromOps) || 517 SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnown, Depth + 1)) 518 return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ); 519 520 // If low order bits are not demanded and known to be zero in one operand, 521 // then we don't need to demand them from the other operand, since they 522 // can't cause overflow into any bits that are demanded in the result. 523 unsigned NTZ = (~DemandedMask & RHSKnown.Zero).countr_one(); 524 APInt DemandedFromLHS = DemandedFromOps; 525 DemandedFromLHS.clearLowBits(NTZ); 526 if (ShrinkDemandedConstant(I, 0, DemandedFromLHS) || 527 SimplifyDemandedBits(I, 0, DemandedFromLHS, LHSKnown, Depth + 1)) 528 return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ); 529 530 // If we are known to be adding zeros to every bit below 531 // the highest demanded bit, we just return the other side. 532 if (DemandedFromOps.isSubsetOf(RHSKnown.Zero)) 533 return I->getOperand(0); 534 if (DemandedFromOps.isSubsetOf(LHSKnown.Zero)) 535 return I->getOperand(1); 536 537 // Otherwise just compute the known bits of the result. 538 bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); 539 Known = KnownBits::computeForAddSub(true, NSW, LHSKnown, RHSKnown); 540 break; 541 } 542 case Instruction::Sub: { 543 // Right fill the mask of bits for the operands to demand the most 544 // significant bit and all those below it. 545 unsigned NLZ = DemandedMask.countl_zero(); 546 APInt DemandedFromOps = APInt::getLowBitsSet(BitWidth, BitWidth - NLZ); 547 if (ShrinkDemandedConstant(I, 1, DemandedFromOps) || 548 SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnown, Depth + 1)) 549 return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ); 550 551 // If low order bits are not demanded and are known to be zero in RHS, 552 // then we don't need to demand them from LHS, since they can't cause a 553 // borrow from any bits that are demanded in the result. 554 unsigned NTZ = (~DemandedMask & RHSKnown.Zero).countr_one(); 555 APInt DemandedFromLHS = DemandedFromOps; 556 DemandedFromLHS.clearLowBits(NTZ); 557 if (ShrinkDemandedConstant(I, 0, DemandedFromLHS) || 558 SimplifyDemandedBits(I, 0, DemandedFromLHS, LHSKnown, Depth + 1)) 559 return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ); 560 561 // If we are known to be subtracting zeros from every bit below 562 // the highest demanded bit, we just return the other side. 563 if (DemandedFromOps.isSubsetOf(RHSKnown.Zero)) 564 return I->getOperand(0); 565 // We can't do this with the LHS for subtraction, unless we are only 566 // demanding the LSB. 567 if (DemandedFromOps.isOne() && DemandedFromOps.isSubsetOf(LHSKnown.Zero)) 568 return I->getOperand(1); 569 570 // Otherwise just compute the known bits of the result. 571 bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); 572 Known = KnownBits::computeForAddSub(false, NSW, LHSKnown, RHSKnown); 573 break; 574 } 575 case Instruction::Mul: { 576 APInt DemandedFromOps; 577 if (simplifyOperandsBasedOnUnusedHighBits(DemandedFromOps)) 578 return I; 579 580 if (DemandedMask.isPowerOf2()) { 581 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1. 582 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is 583 // odd (has LSB set), then the left-shifted low bit of X is the answer. 584 unsigned CTZ = DemandedMask.countr_zero(); 585 const APInt *C; 586 if (match(I->getOperand(1), m_APInt(C)) && C->countr_zero() == CTZ) { 587 Constant *ShiftC = ConstantInt::get(VTy, CTZ); 588 Instruction *Shl = BinaryOperator::CreateShl(I->getOperand(0), ShiftC); 589 return InsertNewInstWith(Shl, *I); 590 } 591 } 592 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because: 593 // X * X is odd iff X is odd. 594 // 'Quadratic Reciprocity': X * X -> 0 for bit[1] 595 if (I->getOperand(0) == I->getOperand(1) && DemandedMask.ult(4)) { 596 Constant *One = ConstantInt::get(VTy, 1); 597 Instruction *And1 = BinaryOperator::CreateAnd(I->getOperand(0), One); 598 return InsertNewInstWith(And1, *I); 599 } 600 601 computeKnownBits(I, Known, Depth, CxtI); 602 break; 603 } 604 case Instruction::Shl: { 605 const APInt *SA; 606 if (match(I->getOperand(1), m_APInt(SA))) { 607 const APInt *ShrAmt; 608 if (match(I->getOperand(0), m_Shr(m_Value(), m_APInt(ShrAmt)))) 609 if (Instruction *Shr = dyn_cast<Instruction>(I->getOperand(0))) 610 if (Value *R = simplifyShrShlDemandedBits(Shr, *ShrAmt, I, *SA, 611 DemandedMask, Known)) 612 return R; 613 614 // TODO: If we only want bits that already match the signbit then we don't 615 // need to shift. 616 617 // If we can pre-shift a right-shifted constant to the left without 618 // losing any high bits amd we don't demand the low bits, then eliminate 619 // the left-shift: 620 // (C >> X) << LeftShiftAmtC --> (C << RightShiftAmtC) >> X 621 uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); 622 Value *X; 623 Constant *C; 624 if (DemandedMask.countr_zero() >= ShiftAmt && 625 match(I->getOperand(0), m_LShr(m_ImmConstant(C), m_Value(X)))) { 626 Constant *LeftShiftAmtC = ConstantInt::get(VTy, ShiftAmt); 627 Constant *NewC = ConstantExpr::getShl(C, LeftShiftAmtC); 628 if (ConstantExpr::getLShr(NewC, LeftShiftAmtC) == C) { 629 Instruction *Lshr = BinaryOperator::CreateLShr(NewC, X); 630 return InsertNewInstWith(Lshr, *I); 631 } 632 } 633 634 APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt)); 635 636 // If the shift is NUW/NSW, then it does demand the high bits. 637 ShlOperator *IOp = cast<ShlOperator>(I); 638 if (IOp->hasNoSignedWrap()) 639 DemandedMaskIn.setHighBits(ShiftAmt+1); 640 else if (IOp->hasNoUnsignedWrap()) 641 DemandedMaskIn.setHighBits(ShiftAmt); 642 643 if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1)) 644 return I; 645 assert(!Known.hasConflict() && "Bits known to be one AND zero?"); 646 647 Known = KnownBits::shl(Known, 648 KnownBits::makeConstant(APInt(BitWidth, ShiftAmt)), 649 /* NUW */ IOp->hasNoUnsignedWrap(), 650 /* NSW */ IOp->hasNoSignedWrap()); 651 } else { 652 // This is a variable shift, so we can't shift the demand mask by a known 653 // amount. But if we are not demanding high bits, then we are not 654 // demanding those bits from the pre-shifted operand either. 655 if (unsigned CTLZ = DemandedMask.countl_zero()) { 656 APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ)); 657 if (SimplifyDemandedBits(I, 0, DemandedFromOp, Known, Depth + 1)) { 658 // We can't guarantee that nsw/nuw hold after simplifying the operand. 659 I->dropPoisonGeneratingFlags(); 660 return I; 661 } 662 } 663 computeKnownBits(I, Known, Depth, CxtI); 664 } 665 break; 666 } 667 case Instruction::LShr: { 668 const APInt *SA; 669 if (match(I->getOperand(1), m_APInt(SA))) { 670 uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); 671 672 // If we are just demanding the shifted sign bit and below, then this can 673 // be treated as an ASHR in disguise. 674 if (DemandedMask.countl_zero() >= ShiftAmt) { 675 // If we only want bits that already match the signbit then we don't 676 // need to shift. 677 unsigned NumHiDemandedBits = BitWidth - DemandedMask.countr_zero(); 678 unsigned SignBits = 679 ComputeNumSignBits(I->getOperand(0), Depth + 1, CxtI); 680 if (SignBits >= NumHiDemandedBits) 681 return I->getOperand(0); 682 683 // If we can pre-shift a left-shifted constant to the right without 684 // losing any low bits (we already know we don't demand the high bits), 685 // then eliminate the right-shift: 686 // (C << X) >> RightShiftAmtC --> (C >> RightShiftAmtC) << X 687 Value *X; 688 Constant *C; 689 if (match(I->getOperand(0), m_Shl(m_ImmConstant(C), m_Value(X)))) { 690 Constant *RightShiftAmtC = ConstantInt::get(VTy, ShiftAmt); 691 Constant *NewC = ConstantExpr::getLShr(C, RightShiftAmtC); 692 if (ConstantExpr::getShl(NewC, RightShiftAmtC) == C) { 693 Instruction *Shl = BinaryOperator::CreateShl(NewC, X); 694 return InsertNewInstWith(Shl, *I); 695 } 696 } 697 } 698 699 // Unsigned shift right. 700 APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); 701 702 // If the shift is exact, then it does demand the low bits (and knows that 703 // they are zero). 704 if (cast<LShrOperator>(I)->isExact()) 705 DemandedMaskIn.setLowBits(ShiftAmt); 706 707 if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1)) 708 return I; 709 assert(!Known.hasConflict() && "Bits known to be one AND zero?"); 710 Known.Zero.lshrInPlace(ShiftAmt); 711 Known.One.lshrInPlace(ShiftAmt); 712 if (ShiftAmt) 713 Known.Zero.setHighBits(ShiftAmt); // high bits known zero. 714 } else { 715 computeKnownBits(I, Known, Depth, CxtI); 716 } 717 break; 718 } 719 case Instruction::AShr: { 720 unsigned SignBits = ComputeNumSignBits(I->getOperand(0), Depth + 1, CxtI); 721 722 // If we only want bits that already match the signbit then we don't need 723 // to shift. 724 unsigned NumHiDemandedBits = BitWidth - DemandedMask.countr_zero(); 725 if (SignBits >= NumHiDemandedBits) 726 return I->getOperand(0); 727 728 // If this is an arithmetic shift right and only the low-bit is set, we can 729 // always convert this into a logical shr, even if the shift amount is 730 // variable. The low bit of the shift cannot be an input sign bit unless 731 // the shift amount is >= the size of the datatype, which is undefined. 732 if (DemandedMask.isOne()) { 733 // Perform the logical shift right. 734 Instruction *NewVal = BinaryOperator::CreateLShr( 735 I->getOperand(0), I->getOperand(1), I->getName()); 736 return InsertNewInstWith(NewVal, *I); 737 } 738 739 const APInt *SA; 740 if (match(I->getOperand(1), m_APInt(SA))) { 741 uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1); 742 743 // Signed shift right. 744 APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); 745 // If any of the high bits are demanded, we should set the sign bit as 746 // demanded. 747 if (DemandedMask.countl_zero() <= ShiftAmt) 748 DemandedMaskIn.setSignBit(); 749 750 // If the shift is exact, then it does demand the low bits (and knows that 751 // they are zero). 752 if (cast<AShrOperator>(I)->isExact()) 753 DemandedMaskIn.setLowBits(ShiftAmt); 754 755 if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1)) 756 return I; 757 758 assert(!Known.hasConflict() && "Bits known to be one AND zero?"); 759 // Compute the new bits that are at the top now plus sign bits. 760 APInt HighBits(APInt::getHighBitsSet( 761 BitWidth, std::min(SignBits + ShiftAmt - 1, BitWidth))); 762 Known.Zero.lshrInPlace(ShiftAmt); 763 Known.One.lshrInPlace(ShiftAmt); 764 765 // If the input sign bit is known to be zero, or if none of the top bits 766 // are demanded, turn this into an unsigned shift right. 767 assert(BitWidth > ShiftAmt && "Shift amount not saturated?"); 768 if (Known.Zero[BitWidth-ShiftAmt-1] || 769 !DemandedMask.intersects(HighBits)) { 770 BinaryOperator *LShr = BinaryOperator::CreateLShr(I->getOperand(0), 771 I->getOperand(1)); 772 LShr->setIsExact(cast<BinaryOperator>(I)->isExact()); 773 return InsertNewInstWith(LShr, *I); 774 } else if (Known.One[BitWidth-ShiftAmt-1]) { // New bits are known one. 775 Known.One |= HighBits; 776 } 777 } else { 778 computeKnownBits(I, Known, Depth, CxtI); 779 } 780 break; 781 } 782 case Instruction::UDiv: { 783 // UDiv doesn't demand low bits that are zero in the divisor. 784 const APInt *SA; 785 if (match(I->getOperand(1), m_APInt(SA))) { 786 // TODO: Take the demanded mask of the result into account. 787 unsigned RHSTrailingZeros = SA->countr_zero(); 788 APInt DemandedMaskIn = 789 APInt::getHighBitsSet(BitWidth, BitWidth - RHSTrailingZeros); 790 if (SimplifyDemandedBits(I, 0, DemandedMaskIn, LHSKnown, Depth + 1)) { 791 // We can't guarantee that "exact" is still true after changing the 792 // the dividend. 793 I->dropPoisonGeneratingFlags(); 794 return I; 795 } 796 797 Known = KnownBits::udiv(LHSKnown, KnownBits::makeConstant(*SA), 798 cast<BinaryOperator>(I)->isExact()); 799 } else { 800 computeKnownBits(I, Known, Depth, CxtI); 801 } 802 break; 803 } 804 case Instruction::SRem: { 805 const APInt *Rem; 806 if (match(I->getOperand(1), m_APInt(Rem))) { 807 // X % -1 demands all the bits because we don't want to introduce 808 // INT_MIN % -1 (== undef) by accident. 809 if (Rem->isAllOnes()) 810 break; 811 APInt RA = Rem->abs(); 812 if (RA.isPowerOf2()) { 813 if (DemandedMask.ult(RA)) // srem won't affect demanded bits 814 return I->getOperand(0); 815 816 APInt LowBits = RA - 1; 817 APInt Mask2 = LowBits | APInt::getSignMask(BitWidth); 818 if (SimplifyDemandedBits(I, 0, Mask2, LHSKnown, Depth + 1)) 819 return I; 820 821 // The low bits of LHS are unchanged by the srem. 822 Known.Zero = LHSKnown.Zero & LowBits; 823 Known.One = LHSKnown.One & LowBits; 824 825 // If LHS is non-negative or has all low bits zero, then the upper bits 826 // are all zero. 827 if (LHSKnown.isNonNegative() || LowBits.isSubsetOf(LHSKnown.Zero)) 828 Known.Zero |= ~LowBits; 829 830 // If LHS is negative and not all low bits are zero, then the upper bits 831 // are all one. 832 if (LHSKnown.isNegative() && LowBits.intersects(LHSKnown.One)) 833 Known.One |= ~LowBits; 834 835 assert(!Known.hasConflict() && "Bits known to be one AND zero?"); 836 break; 837 } 838 } 839 840 computeKnownBits(I, Known, Depth, CxtI); 841 break; 842 } 843 case Instruction::URem: { 844 APInt AllOnes = APInt::getAllOnes(BitWidth); 845 if (SimplifyDemandedBits(I, 0, AllOnes, LHSKnown, Depth + 1) || 846 SimplifyDemandedBits(I, 1, AllOnes, RHSKnown, Depth + 1)) 847 return I; 848 849 Known = KnownBits::urem(LHSKnown, RHSKnown); 850 break; 851 } 852 case Instruction::Call: { 853 bool KnownBitsComputed = false; 854 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 855 switch (II->getIntrinsicID()) { 856 case Intrinsic::abs: { 857 if (DemandedMask == 1) 858 return II->getArgOperand(0); 859 break; 860 } 861 case Intrinsic::ctpop: { 862 // Checking if the number of clear bits is odd (parity)? If the type has 863 // an even number of bits, that's the same as checking if the number of 864 // set bits is odd, so we can eliminate the 'not' op. 865 Value *X; 866 if (DemandedMask == 1 && VTy->getScalarSizeInBits() % 2 == 0 && 867 match(II->getArgOperand(0), m_Not(m_Value(X)))) { 868 Function *Ctpop = Intrinsic::getDeclaration( 869 II->getModule(), Intrinsic::ctpop, VTy); 870 return InsertNewInstWith(CallInst::Create(Ctpop, {X}), *I); 871 } 872 break; 873 } 874 case Intrinsic::bswap: { 875 // If the only bits demanded come from one byte of the bswap result, 876 // just shift the input byte into position to eliminate the bswap. 877 unsigned NLZ = DemandedMask.countl_zero(); 878 unsigned NTZ = DemandedMask.countr_zero(); 879 880 // Round NTZ down to the next byte. If we have 11 trailing zeros, then 881 // we need all the bits down to bit 8. Likewise, round NLZ. If we 882 // have 14 leading zeros, round to 8. 883 NLZ = alignDown(NLZ, 8); 884 NTZ = alignDown(NTZ, 8); 885 // If we need exactly one byte, we can do this transformation. 886 if (BitWidth - NLZ - NTZ == 8) { 887 // Replace this with either a left or right shift to get the byte into 888 // the right place. 889 Instruction *NewVal; 890 if (NLZ > NTZ) 891 NewVal = BinaryOperator::CreateLShr( 892 II->getArgOperand(0), ConstantInt::get(VTy, NLZ - NTZ)); 893 else 894 NewVal = BinaryOperator::CreateShl( 895 II->getArgOperand(0), ConstantInt::get(VTy, NTZ - NLZ)); 896 NewVal->takeName(I); 897 return InsertNewInstWith(NewVal, *I); 898 } 899 break; 900 } 901 case Intrinsic::fshr: 902 case Intrinsic::fshl: { 903 const APInt *SA; 904 if (!match(I->getOperand(2), m_APInt(SA))) 905 break; 906 907 // Normalize to funnel shift left. APInt shifts of BitWidth are well- 908 // defined, so no need to special-case zero shifts here. 909 uint64_t ShiftAmt = SA->urem(BitWidth); 910 if (II->getIntrinsicID() == Intrinsic::fshr) 911 ShiftAmt = BitWidth - ShiftAmt; 912 913 APInt DemandedMaskLHS(DemandedMask.lshr(ShiftAmt)); 914 APInt DemandedMaskRHS(DemandedMask.shl(BitWidth - ShiftAmt)); 915 if (I->getOperand(0) != I->getOperand(1)) { 916 if (SimplifyDemandedBits(I, 0, DemandedMaskLHS, LHSKnown, 917 Depth + 1) || 918 SimplifyDemandedBits(I, 1, DemandedMaskRHS, RHSKnown, Depth + 1)) 919 return I; 920 } else { // fshl is a rotate 921 // Avoid converting rotate into funnel shift. 922 // Only simplify if one operand is constant. 923 LHSKnown = computeKnownBits(I->getOperand(0), Depth + 1, I); 924 if (DemandedMaskLHS.isSubsetOf(LHSKnown.Zero | LHSKnown.One) && 925 !match(I->getOperand(0), m_SpecificInt(LHSKnown.One))) { 926 replaceOperand(*I, 0, Constant::getIntegerValue(VTy, LHSKnown.One)); 927 return I; 928 } 929 930 RHSKnown = computeKnownBits(I->getOperand(1), Depth + 1, I); 931 if (DemandedMaskRHS.isSubsetOf(RHSKnown.Zero | RHSKnown.One) && 932 !match(I->getOperand(1), m_SpecificInt(RHSKnown.One))) { 933 replaceOperand(*I, 1, Constant::getIntegerValue(VTy, RHSKnown.One)); 934 return I; 935 } 936 } 937 938 Known.Zero = LHSKnown.Zero.shl(ShiftAmt) | 939 RHSKnown.Zero.lshr(BitWidth - ShiftAmt); 940 Known.One = LHSKnown.One.shl(ShiftAmt) | 941 RHSKnown.One.lshr(BitWidth - ShiftAmt); 942 KnownBitsComputed = true; 943 break; 944 } 945 case Intrinsic::umax: { 946 // UMax(A, C) == A if ... 947 // The lowest non-zero bit of DemandMask is higher than the highest 948 // non-zero bit of C. 949 const APInt *C; 950 unsigned CTZ = DemandedMask.countr_zero(); 951 if (match(II->getArgOperand(1), m_APInt(C)) && 952 CTZ >= C->getActiveBits()) 953 return II->getArgOperand(0); 954 break; 955 } 956 case Intrinsic::umin: { 957 // UMin(A, C) == A if ... 958 // The lowest non-zero bit of DemandMask is higher than the highest 959 // non-one bit of C. 960 // This comes from using DeMorgans on the above umax example. 961 const APInt *C; 962 unsigned CTZ = DemandedMask.countr_zero(); 963 if (match(II->getArgOperand(1), m_APInt(C)) && 964 CTZ >= C->getBitWidth() - C->countl_one()) 965 return II->getArgOperand(0); 966 break; 967 } 968 default: { 969 // Handle target specific intrinsics 970 std::optional<Value *> V = targetSimplifyDemandedUseBitsIntrinsic( 971 *II, DemandedMask, Known, KnownBitsComputed); 972 if (V) 973 return *V; 974 break; 975 } 976 } 977 } 978 979 if (!KnownBitsComputed) 980 computeKnownBits(V, Known, Depth, CxtI); 981 break; 982 } 983 } 984 985 // If the client is only demanding bits that we know, return the known 986 // constant. 987 if (DemandedMask.isSubsetOf(Known.Zero|Known.One)) 988 return Constant::getIntegerValue(VTy, Known.One); 989 return nullptr; 990 } 991 992 /// Helper routine of SimplifyDemandedUseBits. It computes Known 993 /// bits. It also tries to handle simplifications that can be done based on 994 /// DemandedMask, but without modifying the Instruction. 995 Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits( 996 Instruction *I, const APInt &DemandedMask, KnownBits &Known, unsigned Depth, 997 Instruction *CxtI) { 998 unsigned BitWidth = DemandedMask.getBitWidth(); 999 Type *ITy = I->getType(); 1000 1001 KnownBits LHSKnown(BitWidth); 1002 KnownBits RHSKnown(BitWidth); 1003 1004 // Despite the fact that we can't simplify this instruction in all User's 1005 // context, we can at least compute the known bits, and we can 1006 // do simplifications that apply to *just* the one user if we know that 1007 // this instruction has a simpler value in that context. 1008 switch (I->getOpcode()) { 1009 case Instruction::And: { 1010 computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI); 1011 computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI); 1012 Known = LHSKnown & RHSKnown; 1013 computeKnownBitsFromAssume(I, Known, Depth, SQ.getWithInstruction(CxtI)); 1014 1015 // If the client is only demanding bits that we know, return the known 1016 // constant. 1017 if (DemandedMask.isSubsetOf(Known.Zero | Known.One)) 1018 return Constant::getIntegerValue(ITy, Known.One); 1019 1020 // If all of the demanded bits are known 1 on one side, return the other. 1021 // These bits cannot contribute to the result of the 'and' in this context. 1022 if (DemandedMask.isSubsetOf(LHSKnown.Zero | RHSKnown.One)) 1023 return I->getOperand(0); 1024 if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.One)) 1025 return I->getOperand(1); 1026 1027 break; 1028 } 1029 case Instruction::Or: { 1030 computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI); 1031 computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI); 1032 Known = LHSKnown | RHSKnown; 1033 computeKnownBitsFromAssume(I, Known, Depth, SQ.getWithInstruction(CxtI)); 1034 1035 // If the client is only demanding bits that we know, return the known 1036 // constant. 1037 if (DemandedMask.isSubsetOf(Known.Zero | Known.One)) 1038 return Constant::getIntegerValue(ITy, Known.One); 1039 1040 // We can simplify (X|Y) -> X or Y in the user's context if we know that 1041 // only bits from X or Y are demanded. 1042 // If all of the demanded bits are known zero on one side, return the other. 1043 // These bits cannot contribute to the result of the 'or' in this context. 1044 if (DemandedMask.isSubsetOf(LHSKnown.One | RHSKnown.Zero)) 1045 return I->getOperand(0); 1046 if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero)) 1047 return I->getOperand(1); 1048 1049 break; 1050 } 1051 case Instruction::Xor: { 1052 computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI); 1053 computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI); 1054 Known = LHSKnown ^ RHSKnown; 1055 computeKnownBitsFromAssume(I, Known, Depth, SQ.getWithInstruction(CxtI)); 1056 1057 // If the client is only demanding bits that we know, return the known 1058 // constant. 1059 if (DemandedMask.isSubsetOf(Known.Zero | Known.One)) 1060 return Constant::getIntegerValue(ITy, Known.One); 1061 1062 // We can simplify (X^Y) -> X or Y in the user's context if we know that 1063 // only bits from X or Y are demanded. 1064 // If all of the demanded bits are known zero on one side, return the other. 1065 if (DemandedMask.isSubsetOf(RHSKnown.Zero)) 1066 return I->getOperand(0); 1067 if (DemandedMask.isSubsetOf(LHSKnown.Zero)) 1068 return I->getOperand(1); 1069 1070 break; 1071 } 1072 case Instruction::Add: { 1073 unsigned NLZ = DemandedMask.countl_zero(); 1074 APInt DemandedFromOps = APInt::getLowBitsSet(BitWidth, BitWidth - NLZ); 1075 1076 // If an operand adds zeros to every bit below the highest demanded bit, 1077 // that operand doesn't change the result. Return the other side. 1078 computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI); 1079 if (DemandedFromOps.isSubsetOf(RHSKnown.Zero)) 1080 return I->getOperand(0); 1081 1082 computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI); 1083 if (DemandedFromOps.isSubsetOf(LHSKnown.Zero)) 1084 return I->getOperand(1); 1085 1086 bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); 1087 Known = KnownBits::computeForAddSub(/*Add*/ true, NSW, LHSKnown, RHSKnown); 1088 computeKnownBitsFromAssume(I, Known, Depth, SQ.getWithInstruction(CxtI)); 1089 break; 1090 } 1091 case Instruction::Sub: { 1092 unsigned NLZ = DemandedMask.countl_zero(); 1093 APInt DemandedFromOps = APInt::getLowBitsSet(BitWidth, BitWidth - NLZ); 1094 1095 // If an operand subtracts zeros from every bit below the highest demanded 1096 // bit, that operand doesn't change the result. Return the other side. 1097 computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI); 1098 if (DemandedFromOps.isSubsetOf(RHSKnown.Zero)) 1099 return I->getOperand(0); 1100 1101 bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); 1102 computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI); 1103 Known = KnownBits::computeForAddSub(/*Add*/ false, NSW, LHSKnown, RHSKnown); 1104 computeKnownBitsFromAssume(I, Known, Depth, SQ.getWithInstruction(CxtI)); 1105 break; 1106 } 1107 case Instruction::AShr: { 1108 // Compute the Known bits to simplify things downstream. 1109 computeKnownBits(I, Known, Depth, CxtI); 1110 1111 // If this user is only demanding bits that we know, return the known 1112 // constant. 1113 if (DemandedMask.isSubsetOf(Known.Zero | Known.One)) 1114 return Constant::getIntegerValue(ITy, Known.One); 1115 1116 // If the right shift operand 0 is a result of a left shift by the same 1117 // amount, this is probably a zero/sign extension, which may be unnecessary, 1118 // if we do not demand any of the new sign bits. So, return the original 1119 // operand instead. 1120 const APInt *ShiftRC; 1121 const APInt *ShiftLC; 1122 Value *X; 1123 unsigned BitWidth = DemandedMask.getBitWidth(); 1124 if (match(I, 1125 m_AShr(m_Shl(m_Value(X), m_APInt(ShiftLC)), m_APInt(ShiftRC))) && 1126 ShiftLC == ShiftRC && ShiftLC->ult(BitWidth) && 1127 DemandedMask.isSubsetOf(APInt::getLowBitsSet( 1128 BitWidth, BitWidth - ShiftRC->getZExtValue()))) { 1129 return X; 1130 } 1131 1132 break; 1133 } 1134 default: 1135 // Compute the Known bits to simplify things downstream. 1136 computeKnownBits(I, Known, Depth, CxtI); 1137 1138 // If this user is only demanding bits that we know, return the known 1139 // constant. 1140 if (DemandedMask.isSubsetOf(Known.Zero|Known.One)) 1141 return Constant::getIntegerValue(ITy, Known.One); 1142 1143 break; 1144 } 1145 1146 return nullptr; 1147 } 1148 1149 /// Helper routine of SimplifyDemandedUseBits. It tries to simplify 1150 /// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into 1151 /// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign 1152 /// of "C2-C1". 1153 /// 1154 /// Suppose E1 and E2 are generally different in bits S={bm, bm+1, 1155 /// ..., bn}, without considering the specific value X is holding. 1156 /// This transformation is legal iff one of following conditions is hold: 1157 /// 1) All the bit in S are 0, in this case E1 == E2. 1158 /// 2) We don't care those bits in S, per the input DemandedMask. 1159 /// 3) Combination of 1) and 2). Some bits in S are 0, and we don't care the 1160 /// rest bits. 1161 /// 1162 /// Currently we only test condition 2). 1163 /// 1164 /// As with SimplifyDemandedUseBits, it returns NULL if the simplification was 1165 /// not successful. 1166 Value *InstCombinerImpl::simplifyShrShlDemandedBits( 1167 Instruction *Shr, const APInt &ShrOp1, Instruction *Shl, 1168 const APInt &ShlOp1, const APInt &DemandedMask, KnownBits &Known) { 1169 if (!ShlOp1 || !ShrOp1) 1170 return nullptr; // No-op. 1171 1172 Value *VarX = Shr->getOperand(0); 1173 Type *Ty = VarX->getType(); 1174 unsigned BitWidth = Ty->getScalarSizeInBits(); 1175 if (ShlOp1.uge(BitWidth) || ShrOp1.uge(BitWidth)) 1176 return nullptr; // Undef. 1177 1178 unsigned ShlAmt = ShlOp1.getZExtValue(); 1179 unsigned ShrAmt = ShrOp1.getZExtValue(); 1180 1181 Known.One.clearAllBits(); 1182 Known.Zero.setLowBits(ShlAmt - 1); 1183 Known.Zero &= DemandedMask; 1184 1185 APInt BitMask1(APInt::getAllOnes(BitWidth)); 1186 APInt BitMask2(APInt::getAllOnes(BitWidth)); 1187 1188 bool isLshr = (Shr->getOpcode() == Instruction::LShr); 1189 BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) : 1190 (BitMask1.ashr(ShrAmt) << ShlAmt); 1191 1192 if (ShrAmt <= ShlAmt) { 1193 BitMask2 <<= (ShlAmt - ShrAmt); 1194 } else { 1195 BitMask2 = isLshr ? BitMask2.lshr(ShrAmt - ShlAmt): 1196 BitMask2.ashr(ShrAmt - ShlAmt); 1197 } 1198 1199 // Check if condition-2 (see the comment to this function) is satified. 1200 if ((BitMask1 & DemandedMask) == (BitMask2 & DemandedMask)) { 1201 if (ShrAmt == ShlAmt) 1202 return VarX; 1203 1204 if (!Shr->hasOneUse()) 1205 return nullptr; 1206 1207 BinaryOperator *New; 1208 if (ShrAmt < ShlAmt) { 1209 Constant *Amt = ConstantInt::get(VarX->getType(), ShlAmt - ShrAmt); 1210 New = BinaryOperator::CreateShl(VarX, Amt); 1211 BinaryOperator *Orig = cast<BinaryOperator>(Shl); 1212 New->setHasNoSignedWrap(Orig->hasNoSignedWrap()); 1213 New->setHasNoUnsignedWrap(Orig->hasNoUnsignedWrap()); 1214 } else { 1215 Constant *Amt = ConstantInt::get(VarX->getType(), ShrAmt - ShlAmt); 1216 New = isLshr ? BinaryOperator::CreateLShr(VarX, Amt) : 1217 BinaryOperator::CreateAShr(VarX, Amt); 1218 if (cast<BinaryOperator>(Shr)->isExact()) 1219 New->setIsExact(true); 1220 } 1221 1222 return InsertNewInstWith(New, *Shl); 1223 } 1224 1225 return nullptr; 1226 } 1227 1228 /// The specified value produces a vector with any number of elements. 1229 /// This method analyzes which elements of the operand are undef or poison and 1230 /// returns that information in UndefElts. 1231 /// 1232 /// DemandedElts contains the set of elements that are actually used by the 1233 /// caller, and by default (AllowMultipleUsers equals false) the value is 1234 /// simplified only if it has a single caller. If AllowMultipleUsers is set 1235 /// to true, DemandedElts refers to the union of sets of elements that are 1236 /// used by all callers. 1237 /// 1238 /// If the information about demanded elements can be used to simplify the 1239 /// operation, the operation is simplified, then the resultant value is 1240 /// returned. This returns null if no change was made. 1241 Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, 1242 APInt DemandedElts, 1243 APInt &UndefElts, 1244 unsigned Depth, 1245 bool AllowMultipleUsers) { 1246 // Cannot analyze scalable type. The number of vector elements is not a 1247 // compile-time constant. 1248 if (isa<ScalableVectorType>(V->getType())) 1249 return nullptr; 1250 1251 unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements(); 1252 APInt EltMask(APInt::getAllOnes(VWidth)); 1253 assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!"); 1254 1255 if (match(V, m_Undef())) { 1256 // If the entire vector is undef or poison, just return this info. 1257 UndefElts = EltMask; 1258 return nullptr; 1259 } 1260 1261 if (DemandedElts.isZero()) { // If nothing is demanded, provide poison. 1262 UndefElts = EltMask; 1263 return PoisonValue::get(V->getType()); 1264 } 1265 1266 UndefElts = 0; 1267 1268 if (auto *C = dyn_cast<Constant>(V)) { 1269 // Check if this is identity. If so, return 0 since we are not simplifying 1270 // anything. 1271 if (DemandedElts.isAllOnes()) 1272 return nullptr; 1273 1274 Type *EltTy = cast<VectorType>(V->getType())->getElementType(); 1275 Constant *Poison = PoisonValue::get(EltTy); 1276 SmallVector<Constant*, 16> Elts; 1277 for (unsigned i = 0; i != VWidth; ++i) { 1278 if (!DemandedElts[i]) { // If not demanded, set to poison. 1279 Elts.push_back(Poison); 1280 UndefElts.setBit(i); 1281 continue; 1282 } 1283 1284 Constant *Elt = C->getAggregateElement(i); 1285 if (!Elt) return nullptr; 1286 1287 Elts.push_back(Elt); 1288 if (isa<UndefValue>(Elt)) // Already undef or poison. 1289 UndefElts.setBit(i); 1290 } 1291 1292 // If we changed the constant, return it. 1293 Constant *NewCV = ConstantVector::get(Elts); 1294 return NewCV != C ? NewCV : nullptr; 1295 } 1296 1297 // Limit search depth. 1298 if (Depth == 10) 1299 return nullptr; 1300 1301 if (!AllowMultipleUsers) { 1302 // If multiple users are using the root value, proceed with 1303 // simplification conservatively assuming that all elements 1304 // are needed. 1305 if (!V->hasOneUse()) { 1306 // Quit if we find multiple users of a non-root value though. 1307 // They'll be handled when it's their turn to be visited by 1308 // the main instcombine process. 1309 if (Depth != 0) 1310 // TODO: Just compute the UndefElts information recursively. 1311 return nullptr; 1312 1313 // Conservatively assume that all elements are needed. 1314 DemandedElts = EltMask; 1315 } 1316 } 1317 1318 Instruction *I = dyn_cast<Instruction>(V); 1319 if (!I) return nullptr; // Only analyze instructions. 1320 1321 bool MadeChange = false; 1322 auto simplifyAndSetOp = [&](Instruction *Inst, unsigned OpNum, 1323 APInt Demanded, APInt &Undef) { 1324 auto *II = dyn_cast<IntrinsicInst>(Inst); 1325 Value *Op = II ? II->getArgOperand(OpNum) : Inst->getOperand(OpNum); 1326 if (Value *V = SimplifyDemandedVectorElts(Op, Demanded, Undef, Depth + 1)) { 1327 replaceOperand(*Inst, OpNum, V); 1328 MadeChange = true; 1329 } 1330 }; 1331 1332 APInt UndefElts2(VWidth, 0); 1333 APInt UndefElts3(VWidth, 0); 1334 switch (I->getOpcode()) { 1335 default: break; 1336 1337 case Instruction::GetElementPtr: { 1338 // The LangRef requires that struct geps have all constant indices. As 1339 // such, we can't convert any operand to partial undef. 1340 auto mayIndexStructType = [](GetElementPtrInst &GEP) { 1341 for (auto I = gep_type_begin(GEP), E = gep_type_end(GEP); 1342 I != E; I++) 1343 if (I.isStruct()) 1344 return true; 1345 return false; 1346 }; 1347 if (mayIndexStructType(cast<GetElementPtrInst>(*I))) 1348 break; 1349 1350 // Conservatively track the demanded elements back through any vector 1351 // operands we may have. We know there must be at least one, or we 1352 // wouldn't have a vector result to get here. Note that we intentionally 1353 // merge the undef bits here since gepping with either an poison base or 1354 // index results in poison. 1355 for (unsigned i = 0; i < I->getNumOperands(); i++) { 1356 if (i == 0 ? match(I->getOperand(i), m_Undef()) 1357 : match(I->getOperand(i), m_Poison())) { 1358 // If the entire vector is undefined, just return this info. 1359 UndefElts = EltMask; 1360 return nullptr; 1361 } 1362 if (I->getOperand(i)->getType()->isVectorTy()) { 1363 APInt UndefEltsOp(VWidth, 0); 1364 simplifyAndSetOp(I, i, DemandedElts, UndefEltsOp); 1365 // gep(x, undef) is not undef, so skip considering idx ops here 1366 // Note that we could propagate poison, but we can't distinguish between 1367 // undef & poison bits ATM 1368 if (i == 0) 1369 UndefElts |= UndefEltsOp; 1370 } 1371 } 1372 1373 break; 1374 } 1375 case Instruction::InsertElement: { 1376 // If this is a variable index, we don't know which element it overwrites. 1377 // demand exactly the same input as we produce. 1378 ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2)); 1379 if (!Idx) { 1380 // Note that we can't propagate undef elt info, because we don't know 1381 // which elt is getting updated. 1382 simplifyAndSetOp(I, 0, DemandedElts, UndefElts2); 1383 break; 1384 } 1385 1386 // The element inserted overwrites whatever was there, so the input demanded 1387 // set is simpler than the output set. 1388 unsigned IdxNo = Idx->getZExtValue(); 1389 APInt PreInsertDemandedElts = DemandedElts; 1390 if (IdxNo < VWidth) 1391 PreInsertDemandedElts.clearBit(IdxNo); 1392 1393 // If we only demand the element that is being inserted and that element 1394 // was extracted from the same index in another vector with the same type, 1395 // replace this insert with that other vector. 1396 // Note: This is attempted before the call to simplifyAndSetOp because that 1397 // may change UndefElts to a value that does not match with Vec. 1398 Value *Vec; 1399 if (PreInsertDemandedElts == 0 && 1400 match(I->getOperand(1), 1401 m_ExtractElt(m_Value(Vec), m_SpecificInt(IdxNo))) && 1402 Vec->getType() == I->getType()) { 1403 return Vec; 1404 } 1405 1406 simplifyAndSetOp(I, 0, PreInsertDemandedElts, UndefElts); 1407 1408 // If this is inserting an element that isn't demanded, remove this 1409 // insertelement. 1410 if (IdxNo >= VWidth || !DemandedElts[IdxNo]) { 1411 Worklist.push(I); 1412 return I->getOperand(0); 1413 } 1414 1415 // The inserted element is defined. 1416 UndefElts.clearBit(IdxNo); 1417 break; 1418 } 1419 case Instruction::ShuffleVector: { 1420 auto *Shuffle = cast<ShuffleVectorInst>(I); 1421 assert(Shuffle->getOperand(0)->getType() == 1422 Shuffle->getOperand(1)->getType() && 1423 "Expected shuffle operands to have same type"); 1424 unsigned OpWidth = cast<FixedVectorType>(Shuffle->getOperand(0)->getType()) 1425 ->getNumElements(); 1426 // Handle trivial case of a splat. Only check the first element of LHS 1427 // operand. 1428 if (all_of(Shuffle->getShuffleMask(), [](int Elt) { return Elt == 0; }) && 1429 DemandedElts.isAllOnes()) { 1430 if (!match(I->getOperand(1), m_Undef())) { 1431 I->setOperand(1, PoisonValue::get(I->getOperand(1)->getType())); 1432 MadeChange = true; 1433 } 1434 APInt LeftDemanded(OpWidth, 1); 1435 APInt LHSUndefElts(OpWidth, 0); 1436 simplifyAndSetOp(I, 0, LeftDemanded, LHSUndefElts); 1437 if (LHSUndefElts[0]) 1438 UndefElts = EltMask; 1439 else 1440 UndefElts.clearAllBits(); 1441 break; 1442 } 1443 1444 APInt LeftDemanded(OpWidth, 0), RightDemanded(OpWidth, 0); 1445 for (unsigned i = 0; i < VWidth; i++) { 1446 if (DemandedElts[i]) { 1447 unsigned MaskVal = Shuffle->getMaskValue(i); 1448 if (MaskVal != -1u) { 1449 assert(MaskVal < OpWidth * 2 && 1450 "shufflevector mask index out of range!"); 1451 if (MaskVal < OpWidth) 1452 LeftDemanded.setBit(MaskVal); 1453 else 1454 RightDemanded.setBit(MaskVal - OpWidth); 1455 } 1456 } 1457 } 1458 1459 APInt LHSUndefElts(OpWidth, 0); 1460 simplifyAndSetOp(I, 0, LeftDemanded, LHSUndefElts); 1461 1462 APInt RHSUndefElts(OpWidth, 0); 1463 simplifyAndSetOp(I, 1, RightDemanded, RHSUndefElts); 1464 1465 // If this shuffle does not change the vector length and the elements 1466 // demanded by this shuffle are an identity mask, then this shuffle is 1467 // unnecessary. 1468 // 1469 // We are assuming canonical form for the mask, so the source vector is 1470 // operand 0 and operand 1 is not used. 1471 // 1472 // Note that if an element is demanded and this shuffle mask is undefined 1473 // for that element, then the shuffle is not considered an identity 1474 // operation. The shuffle prevents poison from the operand vector from 1475 // leaking to the result by replacing poison with an undefined value. 1476 if (VWidth == OpWidth) { 1477 bool IsIdentityShuffle = true; 1478 for (unsigned i = 0; i < VWidth; i++) { 1479 unsigned MaskVal = Shuffle->getMaskValue(i); 1480 if (DemandedElts[i] && i != MaskVal) { 1481 IsIdentityShuffle = false; 1482 break; 1483 } 1484 } 1485 if (IsIdentityShuffle) 1486 return Shuffle->getOperand(0); 1487 } 1488 1489 bool NewUndefElts = false; 1490 unsigned LHSIdx = -1u, LHSValIdx = -1u; 1491 unsigned RHSIdx = -1u, RHSValIdx = -1u; 1492 bool LHSUniform = true; 1493 bool RHSUniform = true; 1494 for (unsigned i = 0; i < VWidth; i++) { 1495 unsigned MaskVal = Shuffle->getMaskValue(i); 1496 if (MaskVal == -1u) { 1497 UndefElts.setBit(i); 1498 } else if (!DemandedElts[i]) { 1499 NewUndefElts = true; 1500 UndefElts.setBit(i); 1501 } else if (MaskVal < OpWidth) { 1502 if (LHSUndefElts[MaskVal]) { 1503 NewUndefElts = true; 1504 UndefElts.setBit(i); 1505 } else { 1506 LHSIdx = LHSIdx == -1u ? i : OpWidth; 1507 LHSValIdx = LHSValIdx == -1u ? MaskVal : OpWidth; 1508 LHSUniform = LHSUniform && (MaskVal == i); 1509 } 1510 } else { 1511 if (RHSUndefElts[MaskVal - OpWidth]) { 1512 NewUndefElts = true; 1513 UndefElts.setBit(i); 1514 } else { 1515 RHSIdx = RHSIdx == -1u ? i : OpWidth; 1516 RHSValIdx = RHSValIdx == -1u ? MaskVal - OpWidth : OpWidth; 1517 RHSUniform = RHSUniform && (MaskVal - OpWidth == i); 1518 } 1519 } 1520 } 1521 1522 // Try to transform shuffle with constant vector and single element from 1523 // this constant vector to single insertelement instruction. 1524 // shufflevector V, C, <v1, v2, .., ci, .., vm> -> 1525 // insertelement V, C[ci], ci-n 1526 if (OpWidth == 1527 cast<FixedVectorType>(Shuffle->getType())->getNumElements()) { 1528 Value *Op = nullptr; 1529 Constant *Value = nullptr; 1530 unsigned Idx = -1u; 1531 1532 // Find constant vector with the single element in shuffle (LHS or RHS). 1533 if (LHSIdx < OpWidth && RHSUniform) { 1534 if (auto *CV = dyn_cast<ConstantVector>(Shuffle->getOperand(0))) { 1535 Op = Shuffle->getOperand(1); 1536 Value = CV->getOperand(LHSValIdx); 1537 Idx = LHSIdx; 1538 } 1539 } 1540 if (RHSIdx < OpWidth && LHSUniform) { 1541 if (auto *CV = dyn_cast<ConstantVector>(Shuffle->getOperand(1))) { 1542 Op = Shuffle->getOperand(0); 1543 Value = CV->getOperand(RHSValIdx); 1544 Idx = RHSIdx; 1545 } 1546 } 1547 // Found constant vector with single element - convert to insertelement. 1548 if (Op && Value) { 1549 Instruction *New = InsertElementInst::Create( 1550 Op, Value, ConstantInt::get(Type::getInt64Ty(I->getContext()), Idx), 1551 Shuffle->getName()); 1552 InsertNewInstWith(New, *Shuffle); 1553 return New; 1554 } 1555 } 1556 if (NewUndefElts) { 1557 // Add additional discovered undefs. 1558 SmallVector<int, 16> Elts; 1559 for (unsigned i = 0; i < VWidth; ++i) { 1560 if (UndefElts[i]) 1561 Elts.push_back(PoisonMaskElem); 1562 else 1563 Elts.push_back(Shuffle->getMaskValue(i)); 1564 } 1565 Shuffle->setShuffleMask(Elts); 1566 MadeChange = true; 1567 } 1568 break; 1569 } 1570 case Instruction::Select: { 1571 // If this is a vector select, try to transform the select condition based 1572 // on the current demanded elements. 1573 SelectInst *Sel = cast<SelectInst>(I); 1574 if (Sel->getCondition()->getType()->isVectorTy()) { 1575 // TODO: We are not doing anything with UndefElts based on this call. 1576 // It is overwritten below based on the other select operands. If an 1577 // element of the select condition is known undef, then we are free to 1578 // choose the output value from either arm of the select. If we know that 1579 // one of those values is undef, then the output can be undef. 1580 simplifyAndSetOp(I, 0, DemandedElts, UndefElts); 1581 } 1582 1583 // Next, see if we can transform the arms of the select. 1584 APInt DemandedLHS(DemandedElts), DemandedRHS(DemandedElts); 1585 if (auto *CV = dyn_cast<ConstantVector>(Sel->getCondition())) { 1586 for (unsigned i = 0; i < VWidth; i++) { 1587 // isNullValue() always returns false when called on a ConstantExpr. 1588 // Skip constant expressions to avoid propagating incorrect information. 1589 Constant *CElt = CV->getAggregateElement(i); 1590 if (isa<ConstantExpr>(CElt)) 1591 continue; 1592 // TODO: If a select condition element is undef, we can demand from 1593 // either side. If one side is known undef, choosing that side would 1594 // propagate undef. 1595 if (CElt->isNullValue()) 1596 DemandedLHS.clearBit(i); 1597 else 1598 DemandedRHS.clearBit(i); 1599 } 1600 } 1601 1602 simplifyAndSetOp(I, 1, DemandedLHS, UndefElts2); 1603 simplifyAndSetOp(I, 2, DemandedRHS, UndefElts3); 1604 1605 // Output elements are undefined if the element from each arm is undefined. 1606 // TODO: This can be improved. See comment in select condition handling. 1607 UndefElts = UndefElts2 & UndefElts3; 1608 break; 1609 } 1610 case Instruction::BitCast: { 1611 // Vector->vector casts only. 1612 VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType()); 1613 if (!VTy) break; 1614 unsigned InVWidth = cast<FixedVectorType>(VTy)->getNumElements(); 1615 APInt InputDemandedElts(InVWidth, 0); 1616 UndefElts2 = APInt(InVWidth, 0); 1617 unsigned Ratio; 1618 1619 if (VWidth == InVWidth) { 1620 // If we are converting from <4 x i32> -> <4 x f32>, we demand the same 1621 // elements as are demanded of us. 1622 Ratio = 1; 1623 InputDemandedElts = DemandedElts; 1624 } else if ((VWidth % InVWidth) == 0) { 1625 // If the number of elements in the output is a multiple of the number of 1626 // elements in the input then an input element is live if any of the 1627 // corresponding output elements are live. 1628 Ratio = VWidth / InVWidth; 1629 for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) 1630 if (DemandedElts[OutIdx]) 1631 InputDemandedElts.setBit(OutIdx / Ratio); 1632 } else if ((InVWidth % VWidth) == 0) { 1633 // If the number of elements in the input is a multiple of the number of 1634 // elements in the output then an input element is live if the 1635 // corresponding output element is live. 1636 Ratio = InVWidth / VWidth; 1637 for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx) 1638 if (DemandedElts[InIdx / Ratio]) 1639 InputDemandedElts.setBit(InIdx); 1640 } else { 1641 // Unsupported so far. 1642 break; 1643 } 1644 1645 simplifyAndSetOp(I, 0, InputDemandedElts, UndefElts2); 1646 1647 if (VWidth == InVWidth) { 1648 UndefElts = UndefElts2; 1649 } else if ((VWidth % InVWidth) == 0) { 1650 // If the number of elements in the output is a multiple of the number of 1651 // elements in the input then an output element is undef if the 1652 // corresponding input element is undef. 1653 for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) 1654 if (UndefElts2[OutIdx / Ratio]) 1655 UndefElts.setBit(OutIdx); 1656 } else if ((InVWidth % VWidth) == 0) { 1657 // If the number of elements in the input is a multiple of the number of 1658 // elements in the output then an output element is undef if all of the 1659 // corresponding input elements are undef. 1660 for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) { 1661 APInt SubUndef = UndefElts2.lshr(OutIdx * Ratio).zextOrTrunc(Ratio); 1662 if (SubUndef.popcount() == Ratio) 1663 UndefElts.setBit(OutIdx); 1664 } 1665 } else { 1666 llvm_unreachable("Unimp"); 1667 } 1668 break; 1669 } 1670 case Instruction::FPTrunc: 1671 case Instruction::FPExt: 1672 simplifyAndSetOp(I, 0, DemandedElts, UndefElts); 1673 break; 1674 1675 case Instruction::Call: { 1676 IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); 1677 if (!II) break; 1678 switch (II->getIntrinsicID()) { 1679 case Intrinsic::masked_gather: // fallthrough 1680 case Intrinsic::masked_load: { 1681 // Subtlety: If we load from a pointer, the pointer must be valid 1682 // regardless of whether the element is demanded. Doing otherwise risks 1683 // segfaults which didn't exist in the original program. 1684 APInt DemandedPtrs(APInt::getAllOnes(VWidth)), 1685 DemandedPassThrough(DemandedElts); 1686 if (auto *CV = dyn_cast<ConstantVector>(II->getOperand(2))) 1687 for (unsigned i = 0; i < VWidth; i++) { 1688 Constant *CElt = CV->getAggregateElement(i); 1689 if (CElt->isNullValue()) 1690 DemandedPtrs.clearBit(i); 1691 else if (CElt->isAllOnesValue()) 1692 DemandedPassThrough.clearBit(i); 1693 } 1694 if (II->getIntrinsicID() == Intrinsic::masked_gather) 1695 simplifyAndSetOp(II, 0, DemandedPtrs, UndefElts2); 1696 simplifyAndSetOp(II, 3, DemandedPassThrough, UndefElts3); 1697 1698 // Output elements are undefined if the element from both sources are. 1699 // TODO: can strengthen via mask as well. 1700 UndefElts = UndefElts2 & UndefElts3; 1701 break; 1702 } 1703 default: { 1704 // Handle target specific intrinsics 1705 std::optional<Value *> V = targetSimplifyDemandedVectorEltsIntrinsic( 1706 *II, DemandedElts, UndefElts, UndefElts2, UndefElts3, 1707 simplifyAndSetOp); 1708 if (V) 1709 return *V; 1710 break; 1711 } 1712 } // switch on IntrinsicID 1713 break; 1714 } // case Call 1715 } // switch on Opcode 1716 1717 // TODO: We bail completely on integer div/rem and shifts because they have 1718 // UB/poison potential, but that should be refined. 1719 BinaryOperator *BO; 1720 if (match(I, m_BinOp(BO)) && !BO->isIntDivRem() && !BO->isShift()) { 1721 Value *X = BO->getOperand(0); 1722 Value *Y = BO->getOperand(1); 1723 1724 // Look for an equivalent binop except that one operand has been shuffled. 1725 // If the demand for this binop only includes elements that are the same as 1726 // the other binop, then we may be able to replace this binop with a use of 1727 // the earlier one. 1728 // 1729 // Example: 1730 // %other_bo = bo (shuf X, {0}), Y 1731 // %this_extracted_bo = extelt (bo X, Y), 0 1732 // --> 1733 // %other_bo = bo (shuf X, {0}), Y 1734 // %this_extracted_bo = extelt %other_bo, 0 1735 // 1736 // TODO: Handle demand of an arbitrary single element or more than one 1737 // element instead of just element 0. 1738 // TODO: Unlike general demanded elements transforms, this should be safe 1739 // for any (div/rem/shift) opcode too. 1740 if (DemandedElts == 1 && !X->hasOneUse() && !Y->hasOneUse() && 1741 BO->hasOneUse() ) { 1742 1743 auto findShufBO = [&](bool MatchShufAsOp0) -> User * { 1744 // Try to use shuffle-of-operand in place of an operand: 1745 // bo X, Y --> bo (shuf X), Y 1746 // bo X, Y --> bo X, (shuf Y) 1747 BinaryOperator::BinaryOps Opcode = BO->getOpcode(); 1748 Value *ShufOp = MatchShufAsOp0 ? X : Y; 1749 Value *OtherOp = MatchShufAsOp0 ? Y : X; 1750 for (User *U : OtherOp->users()) { 1751 auto Shuf = m_Shuffle(m_Specific(ShufOp), m_Value(), m_ZeroMask()); 1752 if (BO->isCommutative() 1753 ? match(U, m_c_BinOp(Opcode, Shuf, m_Specific(OtherOp))) 1754 : MatchShufAsOp0 1755 ? match(U, m_BinOp(Opcode, Shuf, m_Specific(OtherOp))) 1756 : match(U, m_BinOp(Opcode, m_Specific(OtherOp), Shuf))) 1757 if (DT.dominates(U, I)) 1758 return U; 1759 } 1760 return nullptr; 1761 }; 1762 1763 if (User *ShufBO = findShufBO(/* MatchShufAsOp0 */ true)) 1764 return ShufBO; 1765 if (User *ShufBO = findShufBO(/* MatchShufAsOp0 */ false)) 1766 return ShufBO; 1767 } 1768 1769 simplifyAndSetOp(I, 0, DemandedElts, UndefElts); 1770 simplifyAndSetOp(I, 1, DemandedElts, UndefElts2); 1771 1772 // Output elements are undefined if both are undefined. Consider things 1773 // like undef & 0. The result is known zero, not undef. 1774 UndefElts &= UndefElts2; 1775 } 1776 1777 // If we've proven all of the lanes undef, return an undef value. 1778 // TODO: Intersect w/demanded lanes 1779 if (UndefElts.isAllOnes()) 1780 return UndefValue::get(I->getType()); 1781 1782 return MadeChange ? I : nullptr; 1783 } 1784