1 //===- ValueTracking.cpp - Walk computations to compute properties --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains routines that help analyze properties that chains of 10 // computations have. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Analysis/ValueTracking.h" 15 #include "llvm/ADT/APFloat.h" 16 #include "llvm/ADT/APInt.h" 17 #include "llvm/ADT/ArrayRef.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/ADT/ScopeExit.h" 20 #include "llvm/ADT/SmallPtrSet.h" 21 #include "llvm/ADT/SmallSet.h" 22 #include "llvm/ADT/SmallVector.h" 23 #include "llvm/ADT/StringRef.h" 24 #include "llvm/ADT/iterator_range.h" 25 #include "llvm/Analysis/AliasAnalysis.h" 26 #include "llvm/Analysis/AssumeBundleQueries.h" 27 #include "llvm/Analysis/AssumptionCache.h" 28 #include "llvm/Analysis/ConstantFolding.h" 29 #include "llvm/Analysis/DomConditionCache.h" 30 #include "llvm/Analysis/GuardUtils.h" 31 #include "llvm/Analysis/InstructionSimplify.h" 32 #include "llvm/Analysis/Loads.h" 33 #include "llvm/Analysis/LoopInfo.h" 34 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 35 #include "llvm/Analysis/TargetLibraryInfo.h" 36 #include "llvm/Analysis/VectorUtils.h" 37 #include "llvm/Analysis/WithCache.h" 38 #include "llvm/IR/Argument.h" 39 #include "llvm/IR/Attributes.h" 40 #include "llvm/IR/BasicBlock.h" 41 #include "llvm/IR/Constant.h" 42 #include "llvm/IR/ConstantRange.h" 43 #include "llvm/IR/Constants.h" 44 #include "llvm/IR/DerivedTypes.h" 45 #include "llvm/IR/DiagnosticInfo.h" 46 #include "llvm/IR/Dominators.h" 47 #include "llvm/IR/EHPersonalities.h" 48 #include "llvm/IR/Function.h" 49 #include "llvm/IR/GetElementPtrTypeIterator.h" 50 #include "llvm/IR/GlobalAlias.h" 51 #include "llvm/IR/GlobalValue.h" 52 #include "llvm/IR/GlobalVariable.h" 53 #include "llvm/IR/InstrTypes.h" 54 #include "llvm/IR/Instruction.h" 55 #include "llvm/IR/Instructions.h" 56 #include "llvm/IR/IntrinsicInst.h" 57 #include "llvm/IR/Intrinsics.h" 58 #include "llvm/IR/IntrinsicsAArch64.h" 59 #include "llvm/IR/IntrinsicsAMDGPU.h" 60 #include "llvm/IR/IntrinsicsRISCV.h" 61 #include "llvm/IR/IntrinsicsX86.h" 62 #include "llvm/IR/LLVMContext.h" 63 #include "llvm/IR/Metadata.h" 64 #include "llvm/IR/Module.h" 65 #include "llvm/IR/Operator.h" 66 #include "llvm/IR/PatternMatch.h" 67 #include "llvm/IR/Type.h" 68 #include "llvm/IR/User.h" 69 #include "llvm/IR/Value.h" 70 #include "llvm/Support/Casting.h" 71 #include "llvm/Support/CommandLine.h" 72 #include "llvm/Support/Compiler.h" 73 #include "llvm/Support/ErrorHandling.h" 74 #include "llvm/Support/KnownBits.h" 75 #include "llvm/Support/MathExtras.h" 76 #include "llvm/TargetParser/RISCVTargetParser.h" 77 #include <algorithm> 78 #include <cassert> 79 #include <cstdint> 80 #include <optional> 81 #include <utility> 82 83 using namespace llvm; 84 using namespace llvm::PatternMatch; 85 86 // Controls the number of uses of the value searched for possible 87 // dominating comparisons. 88 static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses", 89 cl::Hidden, cl::init(20)); 90 91 92 /// Returns the bitwidth of the given scalar or pointer type. For vector types, 93 /// returns the element type's bitwidth. 94 static unsigned getBitWidth(Type *Ty, const DataLayout &DL) { 95 if (unsigned BitWidth = Ty->getScalarSizeInBits()) 96 return BitWidth; 97 98 return DL.getPointerTypeSizeInBits(Ty); 99 } 100 101 // Given the provided Value and, potentially, a context instruction, return 102 // the preferred context instruction (if any). 103 static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) { 104 // If we've been provided with a context instruction, then use that (provided 105 // it has been inserted). 106 if (CxtI && CxtI->getParent()) 107 return CxtI; 108 109 // If the value is really an already-inserted instruction, then use that. 110 CxtI = dyn_cast<Instruction>(V); 111 if (CxtI && CxtI->getParent()) 112 return CxtI; 113 114 return nullptr; 115 } 116 117 static const Instruction *safeCxtI(const Value *V1, const Value *V2, const Instruction *CxtI) { 118 // If we've been provided with a context instruction, then use that (provided 119 // it has been inserted). 120 if (CxtI && CxtI->getParent()) 121 return CxtI; 122 123 // If the value is really an already-inserted instruction, then use that. 124 CxtI = dyn_cast<Instruction>(V1); 125 if (CxtI && CxtI->getParent()) 126 return CxtI; 127 128 CxtI = dyn_cast<Instruction>(V2); 129 if (CxtI && CxtI->getParent()) 130 return CxtI; 131 132 return nullptr; 133 } 134 135 static bool getShuffleDemandedElts(const ShuffleVectorInst *Shuf, 136 const APInt &DemandedElts, 137 APInt &DemandedLHS, APInt &DemandedRHS) { 138 if (isa<ScalableVectorType>(Shuf->getType())) { 139 assert(DemandedElts == APInt(1,1)); 140 DemandedLHS = DemandedRHS = DemandedElts; 141 return true; 142 } 143 144 int NumElts = 145 cast<FixedVectorType>(Shuf->getOperand(0)->getType())->getNumElements(); 146 return llvm::getShuffleDemandedElts(NumElts, Shuf->getShuffleMask(), 147 DemandedElts, DemandedLHS, DemandedRHS); 148 } 149 150 static void computeKnownBits(const Value *V, const APInt &DemandedElts, 151 KnownBits &Known, unsigned Depth, 152 const SimplifyQuery &Q); 153 154 void llvm::computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, 155 const SimplifyQuery &Q) { 156 // Since the number of lanes in a scalable vector is unknown at compile time, 157 // we track one bit which is implicitly broadcast to all lanes. This means 158 // that all lanes in a scalable vector are considered demanded. 159 auto *FVTy = dyn_cast<FixedVectorType>(V->getType()); 160 APInt DemandedElts = 161 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1); 162 ::computeKnownBits(V, DemandedElts, Known, Depth, Q); 163 } 164 165 void llvm::computeKnownBits(const Value *V, KnownBits &Known, 166 const DataLayout &DL, unsigned Depth, 167 AssumptionCache *AC, const Instruction *CxtI, 168 const DominatorTree *DT, bool UseInstrInfo) { 169 computeKnownBits( 170 V, Known, Depth, 171 SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); 172 } 173 174 KnownBits llvm::computeKnownBits(const Value *V, const DataLayout &DL, 175 unsigned Depth, AssumptionCache *AC, 176 const Instruction *CxtI, 177 const DominatorTree *DT, bool UseInstrInfo) { 178 return computeKnownBits( 179 V, Depth, SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); 180 } 181 182 KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts, 183 const DataLayout &DL, unsigned Depth, 184 AssumptionCache *AC, const Instruction *CxtI, 185 const DominatorTree *DT, bool UseInstrInfo) { 186 return computeKnownBits( 187 V, DemandedElts, Depth, 188 SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); 189 } 190 191 static bool haveNoCommonBitsSetSpecialCases(const Value *LHS, const Value *RHS, 192 const SimplifyQuery &SQ) { 193 // Look for an inverted mask: (X & ~M) op (Y & M). 194 { 195 Value *M; 196 if (match(LHS, m_c_And(m_Not(m_Value(M)), m_Value())) && 197 match(RHS, m_c_And(m_Specific(M), m_Value())) && 198 isGuaranteedNotToBeUndef(M, SQ.AC, SQ.CxtI, SQ.DT)) 199 return true; 200 } 201 202 // X op (Y & ~X) 203 if (match(RHS, m_c_And(m_Not(m_Specific(LHS)), m_Value())) && 204 isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT)) 205 return true; 206 207 // X op ((X & Y) ^ Y) -- this is the canonical form of the previous pattern 208 // for constant Y. 209 Value *Y; 210 if (match(RHS, 211 m_c_Xor(m_c_And(m_Specific(LHS), m_Value(Y)), m_Deferred(Y))) && 212 isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT) && 213 isGuaranteedNotToBeUndef(Y, SQ.AC, SQ.CxtI, SQ.DT)) 214 return true; 215 216 // Peek through extends to find a 'not' of the other side: 217 // (ext Y) op ext(~Y) 218 if (match(LHS, m_ZExtOrSExt(m_Value(Y))) && 219 match(RHS, m_ZExtOrSExt(m_Not(m_Specific(Y)))) && 220 isGuaranteedNotToBeUndef(Y, SQ.AC, SQ.CxtI, SQ.DT)) 221 return true; 222 223 // Look for: (A & B) op ~(A | B) 224 { 225 Value *A, *B; 226 if (match(LHS, m_And(m_Value(A), m_Value(B))) && 227 match(RHS, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))) && 228 isGuaranteedNotToBeUndef(A, SQ.AC, SQ.CxtI, SQ.DT) && 229 isGuaranteedNotToBeUndef(B, SQ.AC, SQ.CxtI, SQ.DT)) 230 return true; 231 } 232 233 return false; 234 } 235 236 bool llvm::haveNoCommonBitsSet(const WithCache<const Value *> &LHSCache, 237 const WithCache<const Value *> &RHSCache, 238 const SimplifyQuery &SQ) { 239 const Value *LHS = LHSCache.getValue(); 240 const Value *RHS = RHSCache.getValue(); 241 242 assert(LHS->getType() == RHS->getType() && 243 "LHS and RHS should have the same type"); 244 assert(LHS->getType()->isIntOrIntVectorTy() && 245 "LHS and RHS should be integers"); 246 247 if (haveNoCommonBitsSetSpecialCases(LHS, RHS, SQ) || 248 haveNoCommonBitsSetSpecialCases(RHS, LHS, SQ)) 249 return true; 250 251 return KnownBits::haveNoCommonBitsSet(LHSCache.getKnownBits(SQ), 252 RHSCache.getKnownBits(SQ)); 253 } 254 255 bool llvm::isOnlyUsedInZeroComparison(const Instruction *I) { 256 return !I->user_empty() && all_of(I->users(), [](const User *U) { 257 ICmpInst::Predicate P; 258 return match(U, m_ICmp(P, m_Value(), m_Zero())); 259 }); 260 } 261 262 bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *I) { 263 return !I->user_empty() && all_of(I->users(), [](const User *U) { 264 ICmpInst::Predicate P; 265 return match(U, m_ICmp(P, m_Value(), m_Zero())) && ICmpInst::isEquality(P); 266 }); 267 } 268 269 static bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, 270 const SimplifyQuery &Q); 271 272 bool llvm::isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, 273 bool OrZero, unsigned Depth, 274 AssumptionCache *AC, const Instruction *CxtI, 275 const DominatorTree *DT, bool UseInstrInfo) { 276 return ::isKnownToBeAPowerOfTwo( 277 V, OrZero, Depth, 278 SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); 279 } 280 281 static bool isKnownNonZero(const Value *V, const APInt &DemandedElts, 282 const SimplifyQuery &Q, unsigned Depth); 283 284 bool llvm::isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, 285 unsigned Depth) { 286 return computeKnownBits(V, Depth, SQ).isNonNegative(); 287 } 288 289 bool llvm::isKnownPositive(const Value *V, const SimplifyQuery &SQ, 290 unsigned Depth) { 291 if (auto *CI = dyn_cast<ConstantInt>(V)) 292 return CI->getValue().isStrictlyPositive(); 293 294 // If `isKnownNonNegative` ever becomes more sophisticated, make sure to keep 295 // this updated. 296 KnownBits Known = computeKnownBits(V, Depth, SQ); 297 return Known.isNonNegative() && 298 (Known.isNonZero() || isKnownNonZero(V, SQ, Depth)); 299 } 300 301 bool llvm::isKnownNegative(const Value *V, const SimplifyQuery &SQ, 302 unsigned Depth) { 303 return computeKnownBits(V, Depth, SQ).isNegative(); 304 } 305 306 static bool isKnownNonEqual(const Value *V1, const Value *V2, 307 const APInt &DemandedElts, unsigned Depth, 308 const SimplifyQuery &Q); 309 310 bool llvm::isKnownNonEqual(const Value *V1, const Value *V2, 311 const DataLayout &DL, AssumptionCache *AC, 312 const Instruction *CxtI, const DominatorTree *DT, 313 bool UseInstrInfo) { 314 assert(V1->getType() == V2->getType() && 315 "Testing equality of non-equal types!"); 316 auto *FVTy = dyn_cast<FixedVectorType>(V1->getType()); 317 APInt DemandedElts = 318 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1); 319 return ::isKnownNonEqual( 320 V1, V2, DemandedElts, 0, 321 SimplifyQuery(DL, DT, AC, safeCxtI(V2, V1, CxtI), UseInstrInfo)); 322 } 323 324 bool llvm::MaskedValueIsZero(const Value *V, const APInt &Mask, 325 const SimplifyQuery &SQ, unsigned Depth) { 326 KnownBits Known(Mask.getBitWidth()); 327 computeKnownBits(V, Known, Depth, SQ); 328 return Mask.isSubsetOf(Known.Zero); 329 } 330 331 static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts, 332 unsigned Depth, const SimplifyQuery &Q); 333 334 static unsigned ComputeNumSignBits(const Value *V, unsigned Depth, 335 const SimplifyQuery &Q) { 336 auto *FVTy = dyn_cast<FixedVectorType>(V->getType()); 337 APInt DemandedElts = 338 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1); 339 return ComputeNumSignBits(V, DemandedElts, Depth, Q); 340 } 341 342 unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL, 343 unsigned Depth, AssumptionCache *AC, 344 const Instruction *CxtI, 345 const DominatorTree *DT, bool UseInstrInfo) { 346 return ::ComputeNumSignBits( 347 V, Depth, SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); 348 } 349 350 unsigned llvm::ComputeMaxSignificantBits(const Value *V, const DataLayout &DL, 351 unsigned Depth, AssumptionCache *AC, 352 const Instruction *CxtI, 353 const DominatorTree *DT) { 354 unsigned SignBits = ComputeNumSignBits(V, DL, Depth, AC, CxtI, DT); 355 return V->getType()->getScalarSizeInBits() - SignBits + 1; 356 } 357 358 static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1, 359 bool NSW, bool NUW, 360 const APInt &DemandedElts, 361 KnownBits &KnownOut, KnownBits &Known2, 362 unsigned Depth, const SimplifyQuery &Q) { 363 computeKnownBits(Op1, DemandedElts, KnownOut, Depth + 1, Q); 364 365 // If one operand is unknown and we have no nowrap information, 366 // the result will be unknown independently of the second operand. 367 if (KnownOut.isUnknown() && !NSW && !NUW) 368 return; 369 370 computeKnownBits(Op0, DemandedElts, Known2, Depth + 1, Q); 371 KnownOut = KnownBits::computeForAddSub(Add, NSW, NUW, Known2, KnownOut); 372 } 373 374 static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW, 375 const APInt &DemandedElts, KnownBits &Known, 376 KnownBits &Known2, unsigned Depth, 377 const SimplifyQuery &Q) { 378 computeKnownBits(Op1, DemandedElts, Known, Depth + 1, Q); 379 computeKnownBits(Op0, DemandedElts, Known2, Depth + 1, Q); 380 381 bool isKnownNegative = false; 382 bool isKnownNonNegative = false; 383 // If the multiplication is known not to overflow, compute the sign bit. 384 if (NSW) { 385 if (Op0 == Op1) { 386 // The product of a number with itself is non-negative. 387 isKnownNonNegative = true; 388 } else { 389 bool isKnownNonNegativeOp1 = Known.isNonNegative(); 390 bool isKnownNonNegativeOp0 = Known2.isNonNegative(); 391 bool isKnownNegativeOp1 = Known.isNegative(); 392 bool isKnownNegativeOp0 = Known2.isNegative(); 393 // The product of two numbers with the same sign is non-negative. 394 isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) || 395 (isKnownNonNegativeOp1 && isKnownNonNegativeOp0); 396 // The product of a negative number and a non-negative number is either 397 // negative or zero. 398 if (!isKnownNonNegative) 399 isKnownNegative = 400 (isKnownNegativeOp1 && isKnownNonNegativeOp0 && 401 Known2.isNonZero()) || 402 (isKnownNegativeOp0 && isKnownNonNegativeOp1 && Known.isNonZero()); 403 } 404 } 405 406 bool SelfMultiply = Op0 == Op1; 407 if (SelfMultiply) 408 SelfMultiply &= 409 isGuaranteedNotToBeUndef(Op0, Q.AC, Q.CxtI, Q.DT, Depth + 1); 410 Known = KnownBits::mul(Known, Known2, SelfMultiply); 411 412 // Only make use of no-wrap flags if we failed to compute the sign bit 413 // directly. This matters if the multiplication always overflows, in 414 // which case we prefer to follow the result of the direct computation, 415 // though as the program is invoking undefined behaviour we can choose 416 // whatever we like here. 417 if (isKnownNonNegative && !Known.isNegative()) 418 Known.makeNonNegative(); 419 else if (isKnownNegative && !Known.isNonNegative()) 420 Known.makeNegative(); 421 } 422 423 void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges, 424 KnownBits &Known) { 425 unsigned BitWidth = Known.getBitWidth(); 426 unsigned NumRanges = Ranges.getNumOperands() / 2; 427 assert(NumRanges >= 1); 428 429 Known.Zero.setAllBits(); 430 Known.One.setAllBits(); 431 432 for (unsigned i = 0; i < NumRanges; ++i) { 433 ConstantInt *Lower = 434 mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0)); 435 ConstantInt *Upper = 436 mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1)); 437 ConstantRange Range(Lower->getValue(), Upper->getValue()); 438 439 // The first CommonPrefixBits of all values in Range are equal. 440 unsigned CommonPrefixBits = 441 (Range.getUnsignedMax() ^ Range.getUnsignedMin()).countl_zero(); 442 APInt Mask = APInt::getHighBitsSet(BitWidth, CommonPrefixBits); 443 APInt UnsignedMax = Range.getUnsignedMax().zextOrTrunc(BitWidth); 444 Known.One &= UnsignedMax & Mask; 445 Known.Zero &= ~UnsignedMax & Mask; 446 } 447 } 448 449 static bool isEphemeralValueOf(const Instruction *I, const Value *E) { 450 SmallVector<const Value *, 16> WorkSet(1, I); 451 SmallPtrSet<const Value *, 32> Visited; 452 SmallPtrSet<const Value *, 16> EphValues; 453 454 // The instruction defining an assumption's condition itself is always 455 // considered ephemeral to that assumption (even if it has other 456 // non-ephemeral users). See r246696's test case for an example. 457 if (is_contained(I->operands(), E)) 458 return true; 459 460 while (!WorkSet.empty()) { 461 const Value *V = WorkSet.pop_back_val(); 462 if (!Visited.insert(V).second) 463 continue; 464 465 // If all uses of this value are ephemeral, then so is this value. 466 if (llvm::all_of(V->users(), [&](const User *U) { 467 return EphValues.count(U); 468 })) { 469 if (V == E) 470 return true; 471 472 if (V == I || (isa<Instruction>(V) && 473 !cast<Instruction>(V)->mayHaveSideEffects() && 474 !cast<Instruction>(V)->isTerminator())) { 475 EphValues.insert(V); 476 if (const User *U = dyn_cast<User>(V)) 477 append_range(WorkSet, U->operands()); 478 } 479 } 480 } 481 482 return false; 483 } 484 485 // Is this an intrinsic that cannot be speculated but also cannot trap? 486 bool llvm::isAssumeLikeIntrinsic(const Instruction *I) { 487 if (const IntrinsicInst *CI = dyn_cast<IntrinsicInst>(I)) 488 return CI->isAssumeLikeIntrinsic(); 489 490 return false; 491 } 492 493 bool llvm::isValidAssumeForContext(const Instruction *Inv, 494 const Instruction *CxtI, 495 const DominatorTree *DT, 496 bool AllowEphemerals) { 497 // There are two restrictions on the use of an assume: 498 // 1. The assume must dominate the context (or the control flow must 499 // reach the assume whenever it reaches the context). 500 // 2. The context must not be in the assume's set of ephemeral values 501 // (otherwise we will use the assume to prove that the condition 502 // feeding the assume is trivially true, thus causing the removal of 503 // the assume). 504 505 if (Inv->getParent() == CxtI->getParent()) { 506 // If Inv and CtxI are in the same block, check if the assume (Inv) is first 507 // in the BB. 508 if (Inv->comesBefore(CxtI)) 509 return true; 510 511 // Don't let an assume affect itself - this would cause the problems 512 // `isEphemeralValueOf` is trying to prevent, and it would also make 513 // the loop below go out of bounds. 514 if (!AllowEphemerals && Inv == CxtI) 515 return false; 516 517 // The context comes first, but they're both in the same block. 518 // Make sure there is nothing in between that might interrupt 519 // the control flow, not even CxtI itself. 520 // We limit the scan distance between the assume and its context instruction 521 // to avoid a compile-time explosion. This limit is chosen arbitrarily, so 522 // it can be adjusted if needed (could be turned into a cl::opt). 523 auto Range = make_range(CxtI->getIterator(), Inv->getIterator()); 524 if (!isGuaranteedToTransferExecutionToSuccessor(Range, 15)) 525 return false; 526 527 return AllowEphemerals || !isEphemeralValueOf(Inv, CxtI); 528 } 529 530 // Inv and CxtI are in different blocks. 531 if (DT) { 532 if (DT->dominates(Inv, CxtI)) 533 return true; 534 } else if (Inv->getParent() == CxtI->getParent()->getSinglePredecessor()) { 535 // We don't have a DT, but this trivially dominates. 536 return true; 537 } 538 539 return false; 540 } 541 542 // TODO: cmpExcludesZero misses many cases where `RHS` is non-constant but 543 // we still have enough information about `RHS` to conclude non-zero. For 544 // example Pred=EQ, RHS=isKnownNonZero. cmpExcludesZero is called in loops 545 // so the extra compile time may not be worth it, but possibly a second API 546 // should be created for use outside of loops. 547 static bool cmpExcludesZero(CmpInst::Predicate Pred, const Value *RHS) { 548 // v u> y implies v != 0. 549 if (Pred == ICmpInst::ICMP_UGT) 550 return true; 551 552 // Special-case v != 0 to also handle v != null. 553 if (Pred == ICmpInst::ICMP_NE) 554 return match(RHS, m_Zero()); 555 556 // All other predicates - rely on generic ConstantRange handling. 557 const APInt *C; 558 auto Zero = APInt::getZero(RHS->getType()->getScalarSizeInBits()); 559 if (match(RHS, m_APInt(C))) { 560 ConstantRange TrueValues = ConstantRange::makeExactICmpRegion(Pred, *C); 561 return !TrueValues.contains(Zero); 562 } 563 564 auto *VC = dyn_cast<ConstantDataVector>(RHS); 565 if (VC == nullptr) 566 return false; 567 568 for (unsigned ElemIdx = 0, NElem = VC->getNumElements(); ElemIdx < NElem; 569 ++ElemIdx) { 570 ConstantRange TrueValues = ConstantRange::makeExactICmpRegion( 571 Pred, VC->getElementAsAPInt(ElemIdx)); 572 if (TrueValues.contains(Zero)) 573 return false; 574 } 575 return true; 576 } 577 578 static bool isKnownNonZeroFromAssume(const Value *V, const SimplifyQuery &Q) { 579 // Use of assumptions is context-sensitive. If we don't have a context, we 580 // cannot use them! 581 if (!Q.AC || !Q.CxtI) 582 return false; 583 584 for (AssumptionCache::ResultElem &Elem : Q.AC->assumptionsFor(V)) { 585 if (!Elem.Assume) 586 continue; 587 588 AssumeInst *I = cast<AssumeInst>(Elem.Assume); 589 assert(I->getFunction() == Q.CxtI->getFunction() && 590 "Got assumption for the wrong function!"); 591 592 if (Elem.Index != AssumptionCache::ExprResultIdx) { 593 if (!V->getType()->isPointerTy()) 594 continue; 595 if (RetainedKnowledge RK = getKnowledgeFromBundle( 596 *I, I->bundle_op_info_begin()[Elem.Index])) { 597 if (RK.WasOn == V && 598 (RK.AttrKind == Attribute::NonNull || 599 (RK.AttrKind == Attribute::Dereferenceable && 600 !NullPointerIsDefined(Q.CxtI->getFunction(), 601 V->getType()->getPointerAddressSpace()))) && 602 isValidAssumeForContext(I, Q.CxtI, Q.DT)) 603 return true; 604 } 605 continue; 606 } 607 608 // Warning: This loop can end up being somewhat performance sensitive. 609 // We're running this loop for once for each value queried resulting in a 610 // runtime of ~O(#assumes * #values). 611 612 Value *RHS; 613 CmpInst::Predicate Pred; 614 auto m_V = m_CombineOr(m_Specific(V), m_PtrToInt(m_Specific(V))); 615 if (!match(I->getArgOperand(0), m_c_ICmp(Pred, m_V, m_Value(RHS)))) 616 return false; 617 618 if (cmpExcludesZero(Pred, RHS) && isValidAssumeForContext(I, Q.CxtI, Q.DT)) 619 return true; 620 } 621 622 return false; 623 } 624 625 static void computeKnownBitsFromCmp(const Value *V, CmpInst::Predicate Pred, 626 Value *LHS, Value *RHS, KnownBits &Known, 627 const SimplifyQuery &Q) { 628 if (RHS->getType()->isPointerTy()) { 629 // Handle comparison of pointer to null explicitly, as it will not be 630 // covered by the m_APInt() logic below. 631 if (LHS == V && match(RHS, m_Zero())) { 632 switch (Pred) { 633 case ICmpInst::ICMP_EQ: 634 Known.setAllZero(); 635 break; 636 case ICmpInst::ICMP_SGE: 637 case ICmpInst::ICMP_SGT: 638 Known.makeNonNegative(); 639 break; 640 case ICmpInst::ICMP_SLT: 641 Known.makeNegative(); 642 break; 643 default: 644 break; 645 } 646 } 647 return; 648 } 649 650 unsigned BitWidth = Known.getBitWidth(); 651 auto m_V = 652 m_CombineOr(m_Specific(V), m_PtrToIntSameSize(Q.DL, m_Specific(V))); 653 654 Value *Y; 655 const APInt *Mask, *C; 656 uint64_t ShAmt; 657 switch (Pred) { 658 case ICmpInst::ICMP_EQ: 659 // assume(V = C) 660 if (match(LHS, m_V) && match(RHS, m_APInt(C))) { 661 Known = Known.unionWith(KnownBits::makeConstant(*C)); 662 // assume(V & Mask = C) 663 } else if (match(LHS, m_c_And(m_V, m_Value(Y))) && 664 match(RHS, m_APInt(C))) { 665 // For one bits in Mask, we can propagate bits from C to V. 666 Known.One |= *C; 667 if (match(Y, m_APInt(Mask))) 668 Known.Zero |= ~*C & *Mask; 669 // assume(V | Mask = C) 670 } else if (match(LHS, m_c_Or(m_V, m_Value(Y))) && match(RHS, m_APInt(C))) { 671 // For zero bits in Mask, we can propagate bits from C to V. 672 Known.Zero |= ~*C; 673 if (match(Y, m_APInt(Mask))) 674 Known.One |= *C & ~*Mask; 675 // assume(V ^ Mask = C) 676 } else if (match(LHS, m_Xor(m_V, m_APInt(Mask))) && 677 match(RHS, m_APInt(C))) { 678 // Equivalent to assume(V == Mask ^ C) 679 Known = Known.unionWith(KnownBits::makeConstant(*C ^ *Mask)); 680 // assume(V << ShAmt = C) 681 } else if (match(LHS, m_Shl(m_V, m_ConstantInt(ShAmt))) && 682 match(RHS, m_APInt(C)) && ShAmt < BitWidth) { 683 // For those bits in C that are known, we can propagate them to known 684 // bits in V shifted to the right by ShAmt. 685 KnownBits RHSKnown = KnownBits::makeConstant(*C); 686 RHSKnown.Zero.lshrInPlace(ShAmt); 687 RHSKnown.One.lshrInPlace(ShAmt); 688 Known = Known.unionWith(RHSKnown); 689 // assume(V >> ShAmt = C) 690 } else if (match(LHS, m_Shr(m_V, m_ConstantInt(ShAmt))) && 691 match(RHS, m_APInt(C)) && ShAmt < BitWidth) { 692 KnownBits RHSKnown = KnownBits::makeConstant(*C); 693 // For those bits in RHS that are known, we can propagate them to known 694 // bits in V shifted to the right by C. 695 Known.Zero |= RHSKnown.Zero << ShAmt; 696 Known.One |= RHSKnown.One << ShAmt; 697 } 698 break; 699 case ICmpInst::ICMP_NE: { 700 // assume (V & B != 0) where B is a power of 2 701 const APInt *BPow2; 702 if (match(LHS, m_And(m_V, m_Power2(BPow2))) && match(RHS, m_Zero())) 703 Known.One |= *BPow2; 704 break; 705 } 706 default: 707 if (match(RHS, m_APInt(C))) { 708 const APInt *Offset = nullptr; 709 if (match(LHS, m_CombineOr(m_V, m_AddLike(m_V, m_APInt(Offset))))) { 710 ConstantRange LHSRange = ConstantRange::makeAllowedICmpRegion(Pred, *C); 711 if (Offset) 712 LHSRange = LHSRange.sub(*Offset); 713 Known = Known.unionWith(LHSRange.toKnownBits()); 714 } 715 if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) { 716 // X & Y u> C -> X u> C && Y u> C 717 // X nuw- Y u> C -> X u> C 718 if (match(LHS, m_c_And(m_V, m_Value())) || 719 match(LHS, m_NUWSub(m_V, m_Value()))) 720 Known.One.setHighBits( 721 (*C + (Pred == ICmpInst::ICMP_UGT)).countLeadingOnes()); 722 } 723 if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) { 724 // X | Y u< C -> X u< C && Y u< C 725 // X nuw+ Y u< C -> X u< C && Y u< C 726 if (match(LHS, m_c_Or(m_V, m_Value())) || 727 match(LHS, m_c_NUWAdd(m_V, m_Value()))) { 728 Known.Zero.setHighBits( 729 (*C - (Pred == ICmpInst::ICMP_ULT)).countLeadingZeros()); 730 } 731 } 732 } 733 break; 734 } 735 } 736 737 static void computeKnownBitsFromICmpCond(const Value *V, ICmpInst *Cmp, 738 KnownBits &Known, 739 const SimplifyQuery &SQ, bool Invert) { 740 ICmpInst::Predicate Pred = 741 Invert ? Cmp->getInversePredicate() : Cmp->getPredicate(); 742 Value *LHS = Cmp->getOperand(0); 743 Value *RHS = Cmp->getOperand(1); 744 745 // Handle icmp pred (trunc V), C 746 if (match(LHS, m_Trunc(m_Specific(V)))) { 747 KnownBits DstKnown(LHS->getType()->getScalarSizeInBits()); 748 computeKnownBitsFromCmp(LHS, Pred, LHS, RHS, DstKnown, SQ); 749 Known = Known.unionWith(DstKnown.anyext(Known.getBitWidth())); 750 return; 751 } 752 753 computeKnownBitsFromCmp(V, Pred, LHS, RHS, Known, SQ); 754 } 755 756 static void computeKnownBitsFromCond(const Value *V, Value *Cond, 757 KnownBits &Known, unsigned Depth, 758 const SimplifyQuery &SQ, bool Invert) { 759 Value *A, *B; 760 if (Depth < MaxAnalysisRecursionDepth && 761 match(Cond, m_LogicalOp(m_Value(A), m_Value(B)))) { 762 KnownBits Known2(Known.getBitWidth()); 763 KnownBits Known3(Known.getBitWidth()); 764 computeKnownBitsFromCond(V, A, Known2, Depth + 1, SQ, Invert); 765 computeKnownBitsFromCond(V, B, Known3, Depth + 1, SQ, Invert); 766 if (Invert ? match(Cond, m_LogicalOr(m_Value(), m_Value())) 767 : match(Cond, m_LogicalAnd(m_Value(), m_Value()))) 768 Known2 = Known2.unionWith(Known3); 769 else 770 Known2 = Known2.intersectWith(Known3); 771 Known = Known.unionWith(Known2); 772 } 773 774 if (auto *Cmp = dyn_cast<ICmpInst>(Cond)) 775 computeKnownBitsFromICmpCond(V, Cmp, Known, SQ, Invert); 776 } 777 778 void llvm::computeKnownBitsFromContext(const Value *V, KnownBits &Known, 779 unsigned Depth, const SimplifyQuery &Q) { 780 // Handle injected condition. 781 if (Q.CC && Q.CC->AffectedValues.contains(V)) 782 computeKnownBitsFromCond(V, Q.CC->Cond, Known, Depth, Q, Q.CC->Invert); 783 784 if (!Q.CxtI) 785 return; 786 787 if (Q.DC && Q.DT) { 788 // Handle dominating conditions. 789 for (BranchInst *BI : Q.DC->conditionsFor(V)) { 790 BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(0)); 791 if (Q.DT->dominates(Edge0, Q.CxtI->getParent())) 792 computeKnownBitsFromCond(V, BI->getCondition(), Known, Depth, Q, 793 /*Invert*/ false); 794 795 BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(1)); 796 if (Q.DT->dominates(Edge1, Q.CxtI->getParent())) 797 computeKnownBitsFromCond(V, BI->getCondition(), Known, Depth, Q, 798 /*Invert*/ true); 799 } 800 801 if (Known.hasConflict()) 802 Known.resetAll(); 803 } 804 805 if (!Q.AC) 806 return; 807 808 unsigned BitWidth = Known.getBitWidth(); 809 810 // Note that the patterns below need to be kept in sync with the code 811 // in AssumptionCache::updateAffectedValues. 812 813 for (AssumptionCache::ResultElem &Elem : Q.AC->assumptionsFor(V)) { 814 if (!Elem.Assume) 815 continue; 816 817 AssumeInst *I = cast<AssumeInst>(Elem.Assume); 818 assert(I->getParent()->getParent() == Q.CxtI->getParent()->getParent() && 819 "Got assumption for the wrong function!"); 820 821 if (Elem.Index != AssumptionCache::ExprResultIdx) { 822 if (!V->getType()->isPointerTy()) 823 continue; 824 if (RetainedKnowledge RK = getKnowledgeFromBundle( 825 *I, I->bundle_op_info_begin()[Elem.Index])) { 826 if (RK.WasOn == V && RK.AttrKind == Attribute::Alignment && 827 isPowerOf2_64(RK.ArgValue) && 828 isValidAssumeForContext(I, Q.CxtI, Q.DT)) 829 Known.Zero.setLowBits(Log2_64(RK.ArgValue)); 830 } 831 continue; 832 } 833 834 // Warning: This loop can end up being somewhat performance sensitive. 835 // We're running this loop for once for each value queried resulting in a 836 // runtime of ~O(#assumes * #values). 837 838 Value *Arg = I->getArgOperand(0); 839 840 if (Arg == V && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { 841 assert(BitWidth == 1 && "assume operand is not i1?"); 842 (void)BitWidth; 843 Known.setAllOnes(); 844 return; 845 } 846 if (match(Arg, m_Not(m_Specific(V))) && 847 isValidAssumeForContext(I, Q.CxtI, Q.DT)) { 848 assert(BitWidth == 1 && "assume operand is not i1?"); 849 (void)BitWidth; 850 Known.setAllZero(); 851 return; 852 } 853 854 // The remaining tests are all recursive, so bail out if we hit the limit. 855 if (Depth == MaxAnalysisRecursionDepth) 856 continue; 857 858 ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg); 859 if (!Cmp) 860 continue; 861 862 if (!isValidAssumeForContext(I, Q.CxtI, Q.DT)) 863 continue; 864 865 computeKnownBitsFromICmpCond(V, Cmp, Known, Q, /*Invert=*/false); 866 } 867 868 // Conflicting assumption: Undefined behavior will occur on this execution 869 // path. 870 if (Known.hasConflict()) 871 Known.resetAll(); 872 } 873 874 /// Compute known bits from a shift operator, including those with a 875 /// non-constant shift amount. Known is the output of this function. Known2 is a 876 /// pre-allocated temporary with the same bit width as Known and on return 877 /// contains the known bit of the shift value source. KF is an 878 /// operator-specific function that, given the known-bits and a shift amount, 879 /// compute the implied known-bits of the shift operator's result respectively 880 /// for that shift amount. The results from calling KF are conservatively 881 /// combined for all permitted shift amounts. 882 static void computeKnownBitsFromShiftOperator( 883 const Operator *I, const APInt &DemandedElts, KnownBits &Known, 884 KnownBits &Known2, unsigned Depth, const SimplifyQuery &Q, 885 function_ref<KnownBits(const KnownBits &, const KnownBits &, bool)> KF) { 886 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); 887 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); 888 // To limit compile-time impact, only query isKnownNonZero() if we know at 889 // least something about the shift amount. 890 bool ShAmtNonZero = 891 Known.isNonZero() || 892 (Known.getMaxValue().ult(Known.getBitWidth()) && 893 isKnownNonZero(I->getOperand(1), DemandedElts, Q, Depth + 1)); 894 Known = KF(Known2, Known, ShAmtNonZero); 895 } 896 897 static KnownBits 898 getKnownBitsFromAndXorOr(const Operator *I, const APInt &DemandedElts, 899 const KnownBits &KnownLHS, const KnownBits &KnownRHS, 900 unsigned Depth, const SimplifyQuery &Q) { 901 unsigned BitWidth = KnownLHS.getBitWidth(); 902 KnownBits KnownOut(BitWidth); 903 bool IsAnd = false; 904 bool HasKnownOne = !KnownLHS.One.isZero() || !KnownRHS.One.isZero(); 905 Value *X = nullptr, *Y = nullptr; 906 907 switch (I->getOpcode()) { 908 case Instruction::And: 909 KnownOut = KnownLHS & KnownRHS; 910 IsAnd = true; 911 // and(x, -x) is common idioms that will clear all but lowest set 912 // bit. If we have a single known bit in x, we can clear all bits 913 // above it. 914 // TODO: instcombine often reassociates independent `and` which can hide 915 // this pattern. Try to match and(x, and(-x, y)) / and(and(x, y), -x). 916 if (HasKnownOne && match(I, m_c_And(m_Value(X), m_Neg(m_Deferred(X))))) { 917 // -(-x) == x so using whichever (LHS/RHS) gets us a better result. 918 if (KnownLHS.countMaxTrailingZeros() <= KnownRHS.countMaxTrailingZeros()) 919 KnownOut = KnownLHS.blsi(); 920 else 921 KnownOut = KnownRHS.blsi(); 922 } 923 break; 924 case Instruction::Or: 925 KnownOut = KnownLHS | KnownRHS; 926 break; 927 case Instruction::Xor: 928 KnownOut = KnownLHS ^ KnownRHS; 929 // xor(x, x-1) is common idioms that will clear all but lowest set 930 // bit. If we have a single known bit in x, we can clear all bits 931 // above it. 932 // TODO: xor(x, x-1) is often rewritting as xor(x, x-C) where C != 933 // -1 but for the purpose of demanded bits (xor(x, x-C) & 934 // Demanded) == (xor(x, x-1) & Demanded). Extend the xor pattern 935 // to use arbitrary C if xor(x, x-C) as the same as xor(x, x-1). 936 if (HasKnownOne && 937 match(I, m_c_Xor(m_Value(X), m_Add(m_Deferred(X), m_AllOnes())))) { 938 const KnownBits &XBits = I->getOperand(0) == X ? KnownLHS : KnownRHS; 939 KnownOut = XBits.blsmsk(); 940 } 941 break; 942 default: 943 llvm_unreachable("Invalid Op used in 'analyzeKnownBitsFromAndXorOr'"); 944 } 945 946 // and(x, add (x, -1)) is a common idiom that always clears the low bit; 947 // xor/or(x, add (x, -1)) is an idiom that will always set the low bit. 948 // here we handle the more general case of adding any odd number by 949 // matching the form and/xor/or(x, add(x, y)) where y is odd. 950 // TODO: This could be generalized to clearing any bit set in y where the 951 // following bit is known to be unset in y. 952 if (!KnownOut.Zero[0] && !KnownOut.One[0] && 953 (match(I, m_c_BinOp(m_Value(X), m_c_Add(m_Deferred(X), m_Value(Y)))) || 954 match(I, m_c_BinOp(m_Value(X), m_Sub(m_Deferred(X), m_Value(Y)))) || 955 match(I, m_c_BinOp(m_Value(X), m_Sub(m_Value(Y), m_Deferred(X)))))) { 956 KnownBits KnownY(BitWidth); 957 computeKnownBits(Y, DemandedElts, KnownY, Depth + 1, Q); 958 if (KnownY.countMinTrailingOnes() > 0) { 959 if (IsAnd) 960 KnownOut.Zero.setBit(0); 961 else 962 KnownOut.One.setBit(0); 963 } 964 } 965 return KnownOut; 966 } 967 968 static KnownBits computeKnownBitsForHorizontalOperation( 969 const Operator *I, const APInt &DemandedElts, unsigned Depth, 970 const SimplifyQuery &Q, 971 const function_ref<KnownBits(const KnownBits &, const KnownBits &)> 972 KnownBitsFunc) { 973 APInt DemandedEltsLHS, DemandedEltsRHS; 974 getHorizDemandedEltsForFirstOperand(Q.DL.getTypeSizeInBits(I->getType()), 975 DemandedElts, DemandedEltsLHS, 976 DemandedEltsRHS); 977 978 const auto ComputeForSingleOpFunc = 979 [Depth, &Q, KnownBitsFunc](const Value *Op, APInt &DemandedEltsOp) { 980 return KnownBitsFunc( 981 computeKnownBits(Op, DemandedEltsOp, Depth + 1, Q), 982 computeKnownBits(Op, DemandedEltsOp << 1, Depth + 1, Q)); 983 }; 984 985 if (DemandedEltsRHS.isZero()) 986 return ComputeForSingleOpFunc(I->getOperand(0), DemandedEltsLHS); 987 if (DemandedEltsLHS.isZero()) 988 return ComputeForSingleOpFunc(I->getOperand(1), DemandedEltsRHS); 989 990 return ComputeForSingleOpFunc(I->getOperand(0), DemandedEltsLHS) 991 .intersectWith(ComputeForSingleOpFunc(I->getOperand(1), DemandedEltsRHS)); 992 } 993 994 // Public so this can be used in `SimplifyDemandedUseBits`. 995 KnownBits llvm::analyzeKnownBitsFromAndXorOr(const Operator *I, 996 const KnownBits &KnownLHS, 997 const KnownBits &KnownRHS, 998 unsigned Depth, 999 const SimplifyQuery &SQ) { 1000 auto *FVTy = dyn_cast<FixedVectorType>(I->getType()); 1001 APInt DemandedElts = 1002 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1); 1003 1004 return getKnownBitsFromAndXorOr(I, DemandedElts, KnownLHS, KnownRHS, Depth, 1005 SQ); 1006 } 1007 1008 ConstantRange llvm::getVScaleRange(const Function *F, unsigned BitWidth) { 1009 Attribute Attr = F->getFnAttribute(Attribute::VScaleRange); 1010 // Without vscale_range, we only know that vscale is non-zero. 1011 if (!Attr.isValid()) 1012 return ConstantRange(APInt(BitWidth, 1), APInt::getZero(BitWidth)); 1013 1014 unsigned AttrMin = Attr.getVScaleRangeMin(); 1015 // Minimum is larger than vscale width, result is always poison. 1016 if ((unsigned)llvm::bit_width(AttrMin) > BitWidth) 1017 return ConstantRange::getEmpty(BitWidth); 1018 1019 APInt Min(BitWidth, AttrMin); 1020 std::optional<unsigned> AttrMax = Attr.getVScaleRangeMax(); 1021 if (!AttrMax || (unsigned)llvm::bit_width(*AttrMax) > BitWidth) 1022 return ConstantRange(Min, APInt::getZero(BitWidth)); 1023 1024 return ConstantRange(Min, APInt(BitWidth, *AttrMax) + 1); 1025 } 1026 1027 void llvm::adjustKnownBitsForSelectArm(KnownBits &Known, Value *Cond, 1028 Value *Arm, bool Invert, unsigned Depth, 1029 const SimplifyQuery &Q) { 1030 // If we have a constant arm, we are done. 1031 if (Known.isConstant()) 1032 return; 1033 1034 // See what condition implies about the bits of the select arm. 1035 KnownBits CondRes(Known.getBitWidth()); 1036 computeKnownBitsFromCond(Arm, Cond, CondRes, Depth + 1, Q, Invert); 1037 // If we don't get any information from the condition, no reason to 1038 // proceed. 1039 if (CondRes.isUnknown()) 1040 return; 1041 1042 // We can have conflict if the condition is dead. I.e if we have 1043 // (x | 64) < 32 ? (x | 64) : y 1044 // we will have conflict at bit 6 from the condition/the `or`. 1045 // In that case just return. Its not particularly important 1046 // what we do, as this select is going to be simplified soon. 1047 CondRes = CondRes.unionWith(Known); 1048 if (CondRes.hasConflict()) 1049 return; 1050 1051 // Finally make sure the information we found is valid. This is relatively 1052 // expensive so it's left for the very end. 1053 if (!isGuaranteedNotToBeUndef(Arm, Q.AC, Q.CxtI, Q.DT, Depth + 1)) 1054 return; 1055 1056 // Finally, we know we get information from the condition and its valid, 1057 // so return it. 1058 Known = CondRes; 1059 } 1060 1061 static void computeKnownBitsFromOperator(const Operator *I, 1062 const APInt &DemandedElts, 1063 KnownBits &Known, unsigned Depth, 1064 const SimplifyQuery &Q) { 1065 unsigned BitWidth = Known.getBitWidth(); 1066 1067 KnownBits Known2(BitWidth); 1068 switch (I->getOpcode()) { 1069 default: break; 1070 case Instruction::Load: 1071 if (MDNode *MD = 1072 Q.IIQ.getMetadata(cast<LoadInst>(I), LLVMContext::MD_range)) 1073 computeKnownBitsFromRangeMetadata(*MD, Known); 1074 break; 1075 case Instruction::And: 1076 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); 1077 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); 1078 1079 Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Depth, Q); 1080 break; 1081 case Instruction::Or: 1082 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); 1083 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); 1084 1085 Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Depth, Q); 1086 break; 1087 case Instruction::Xor: 1088 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); 1089 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); 1090 1091 Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Depth, Q); 1092 break; 1093 case Instruction::Mul: { 1094 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I)); 1095 computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, DemandedElts, 1096 Known, Known2, Depth, Q); 1097 break; 1098 } 1099 case Instruction::UDiv: { 1100 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q); 1101 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q); 1102 Known = 1103 KnownBits::udiv(Known, Known2, Q.IIQ.isExact(cast<BinaryOperator>(I))); 1104 break; 1105 } 1106 case Instruction::SDiv: { 1107 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q); 1108 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q); 1109 Known = 1110 KnownBits::sdiv(Known, Known2, Q.IIQ.isExact(cast<BinaryOperator>(I))); 1111 break; 1112 } 1113 case Instruction::Select: { 1114 auto ComputeForArm = [&](Value *Arm, bool Invert) { 1115 KnownBits Res(Known.getBitWidth()); 1116 computeKnownBits(Arm, DemandedElts, Res, Depth + 1, Q); 1117 adjustKnownBitsForSelectArm(Res, I->getOperand(0), Arm, Invert, Depth, Q); 1118 return Res; 1119 }; 1120 // Only known if known in both the LHS and RHS. 1121 Known = 1122 ComputeForArm(I->getOperand(1), /*Invert=*/false) 1123 .intersectWith(ComputeForArm(I->getOperand(2), /*Invert=*/true)); 1124 break; 1125 } 1126 case Instruction::FPTrunc: 1127 case Instruction::FPExt: 1128 case Instruction::FPToUI: 1129 case Instruction::FPToSI: 1130 case Instruction::SIToFP: 1131 case Instruction::UIToFP: 1132 break; // Can't work with floating point. 1133 case Instruction::PtrToInt: 1134 case Instruction::IntToPtr: 1135 // Fall through and handle them the same as zext/trunc. 1136 [[fallthrough]]; 1137 case Instruction::ZExt: 1138 case Instruction::Trunc: { 1139 Type *SrcTy = I->getOperand(0)->getType(); 1140 1141 unsigned SrcBitWidth; 1142 // Note that we handle pointer operands here because of inttoptr/ptrtoint 1143 // which fall through here. 1144 Type *ScalarTy = SrcTy->getScalarType(); 1145 SrcBitWidth = ScalarTy->isPointerTy() ? 1146 Q.DL.getPointerTypeSizeInBits(ScalarTy) : 1147 Q.DL.getTypeSizeInBits(ScalarTy); 1148 1149 assert(SrcBitWidth && "SrcBitWidth can't be zero"); 1150 Known = Known.anyextOrTrunc(SrcBitWidth); 1151 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q); 1152 if (auto *Inst = dyn_cast<PossiblyNonNegInst>(I); 1153 Inst && Inst->hasNonNeg() && !Known.isNegative()) 1154 Known.makeNonNegative(); 1155 Known = Known.zextOrTrunc(BitWidth); 1156 break; 1157 } 1158 case Instruction::BitCast: { 1159 Type *SrcTy = I->getOperand(0)->getType(); 1160 if (SrcTy->isIntOrPtrTy() && 1161 // TODO: For now, not handling conversions like: 1162 // (bitcast i64 %x to <2 x i32>) 1163 !I->getType()->isVectorTy()) { 1164 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1165 break; 1166 } 1167 1168 const Value *V; 1169 // Handle bitcast from floating point to integer. 1170 if (match(I, m_ElementWiseBitCast(m_Value(V))) && 1171 V->getType()->isFPOrFPVectorTy()) { 1172 Type *FPType = V->getType()->getScalarType(); 1173 KnownFPClass Result = 1174 computeKnownFPClass(V, DemandedElts, fcAllFlags, Depth + 1, Q); 1175 FPClassTest FPClasses = Result.KnownFPClasses; 1176 1177 // TODO: Treat it as zero/poison if the use of I is unreachable. 1178 if (FPClasses == fcNone) 1179 break; 1180 1181 if (Result.isKnownNever(fcNormal | fcSubnormal | fcNan)) { 1182 Known.Zero.setAllBits(); 1183 Known.One.setAllBits(); 1184 1185 if (FPClasses & fcInf) 1186 Known = Known.intersectWith(KnownBits::makeConstant( 1187 APFloat::getInf(FPType->getFltSemantics()).bitcastToAPInt())); 1188 1189 if (FPClasses & fcZero) 1190 Known = Known.intersectWith(KnownBits::makeConstant( 1191 APInt::getZero(FPType->getScalarSizeInBits()))); 1192 1193 Known.Zero.clearSignBit(); 1194 Known.One.clearSignBit(); 1195 } 1196 1197 if (Result.SignBit) { 1198 if (*Result.SignBit) 1199 Known.makeNegative(); 1200 else 1201 Known.makeNonNegative(); 1202 } 1203 1204 break; 1205 } 1206 1207 // Handle cast from vector integer type to scalar or vector integer. 1208 auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcTy); 1209 if (!SrcVecTy || !SrcVecTy->getElementType()->isIntegerTy() || 1210 !I->getType()->isIntOrIntVectorTy() || 1211 isa<ScalableVectorType>(I->getType())) 1212 break; 1213 1214 // Look through a cast from narrow vector elements to wider type. 1215 // Examples: v4i32 -> v2i64, v3i8 -> v24 1216 unsigned SubBitWidth = SrcVecTy->getScalarSizeInBits(); 1217 if (BitWidth % SubBitWidth == 0) { 1218 // Known bits are automatically intersected across demanded elements of a 1219 // vector. So for example, if a bit is computed as known zero, it must be 1220 // zero across all demanded elements of the vector. 1221 // 1222 // For this bitcast, each demanded element of the output is sub-divided 1223 // across a set of smaller vector elements in the source vector. To get 1224 // the known bits for an entire element of the output, compute the known 1225 // bits for each sub-element sequentially. This is done by shifting the 1226 // one-set-bit demanded elements parameter across the sub-elements for 1227 // consecutive calls to computeKnownBits. We are using the demanded 1228 // elements parameter as a mask operator. 1229 // 1230 // The known bits of each sub-element are then inserted into place 1231 // (dependent on endian) to form the full result of known bits. 1232 unsigned NumElts = DemandedElts.getBitWidth(); 1233 unsigned SubScale = BitWidth / SubBitWidth; 1234 APInt SubDemandedElts = APInt::getZero(NumElts * SubScale); 1235 for (unsigned i = 0; i != NumElts; ++i) { 1236 if (DemandedElts[i]) 1237 SubDemandedElts.setBit(i * SubScale); 1238 } 1239 1240 KnownBits KnownSrc(SubBitWidth); 1241 for (unsigned i = 0; i != SubScale; ++i) { 1242 computeKnownBits(I->getOperand(0), SubDemandedElts.shl(i), KnownSrc, 1243 Depth + 1, Q); 1244 unsigned ShiftElt = Q.DL.isLittleEndian() ? i : SubScale - 1 - i; 1245 Known.insertBits(KnownSrc, ShiftElt * SubBitWidth); 1246 } 1247 } 1248 break; 1249 } 1250 case Instruction::SExt: { 1251 // Compute the bits in the result that are not present in the input. 1252 unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits(); 1253 1254 Known = Known.trunc(SrcBitWidth); 1255 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q); 1256 // If the sign bit of the input is known set or clear, then we know the 1257 // top bits of the result. 1258 Known = Known.sext(BitWidth); 1259 break; 1260 } 1261 case Instruction::Shl: { 1262 bool NUW = Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(I)); 1263 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I)); 1264 auto KF = [NUW, NSW](const KnownBits &KnownVal, const KnownBits &KnownAmt, 1265 bool ShAmtNonZero) { 1266 return KnownBits::shl(KnownVal, KnownAmt, NUW, NSW, ShAmtNonZero); 1267 }; 1268 computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q, 1269 KF); 1270 // Trailing zeros of a right-shifted constant never decrease. 1271 const APInt *C; 1272 if (match(I->getOperand(0), m_APInt(C))) 1273 Known.Zero.setLowBits(C->countr_zero()); 1274 break; 1275 } 1276 case Instruction::LShr: { 1277 bool Exact = Q.IIQ.isExact(cast<BinaryOperator>(I)); 1278 auto KF = [Exact](const KnownBits &KnownVal, const KnownBits &KnownAmt, 1279 bool ShAmtNonZero) { 1280 return KnownBits::lshr(KnownVal, KnownAmt, ShAmtNonZero, Exact); 1281 }; 1282 computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q, 1283 KF); 1284 // Leading zeros of a left-shifted constant never decrease. 1285 const APInt *C; 1286 if (match(I->getOperand(0), m_APInt(C))) 1287 Known.Zero.setHighBits(C->countl_zero()); 1288 break; 1289 } 1290 case Instruction::AShr: { 1291 bool Exact = Q.IIQ.isExact(cast<BinaryOperator>(I)); 1292 auto KF = [Exact](const KnownBits &KnownVal, const KnownBits &KnownAmt, 1293 bool ShAmtNonZero) { 1294 return KnownBits::ashr(KnownVal, KnownAmt, ShAmtNonZero, Exact); 1295 }; 1296 computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q, 1297 KF); 1298 break; 1299 } 1300 case Instruction::Sub: { 1301 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I)); 1302 bool NUW = Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(I)); 1303 computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, NUW, 1304 DemandedElts, Known, Known2, Depth, Q); 1305 break; 1306 } 1307 case Instruction::Add: { 1308 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I)); 1309 bool NUW = Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(I)); 1310 computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW, NUW, 1311 DemandedElts, Known, Known2, Depth, Q); 1312 break; 1313 } 1314 case Instruction::SRem: 1315 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q); 1316 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q); 1317 Known = KnownBits::srem(Known, Known2); 1318 break; 1319 1320 case Instruction::URem: 1321 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q); 1322 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q); 1323 Known = KnownBits::urem(Known, Known2); 1324 break; 1325 case Instruction::Alloca: 1326 Known.Zero.setLowBits(Log2(cast<AllocaInst>(I)->getAlign())); 1327 break; 1328 case Instruction::GetElementPtr: { 1329 // Analyze all of the subscripts of this getelementptr instruction 1330 // to determine if we can prove known low zero bits. 1331 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1332 // Accumulate the constant indices in a separate variable 1333 // to minimize the number of calls to computeForAddSub. 1334 APInt AccConstIndices(BitWidth, 0, /*IsSigned*/ true); 1335 1336 gep_type_iterator GTI = gep_type_begin(I); 1337 for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) { 1338 // TrailZ can only become smaller, short-circuit if we hit zero. 1339 if (Known.isUnknown()) 1340 break; 1341 1342 Value *Index = I->getOperand(i); 1343 1344 // Handle case when index is zero. 1345 Constant *CIndex = dyn_cast<Constant>(Index); 1346 if (CIndex && CIndex->isZeroValue()) 1347 continue; 1348 1349 if (StructType *STy = GTI.getStructTypeOrNull()) { 1350 // Handle struct member offset arithmetic. 1351 1352 assert(CIndex && 1353 "Access to structure field must be known at compile time"); 1354 1355 if (CIndex->getType()->isVectorTy()) 1356 Index = CIndex->getSplatValue(); 1357 1358 unsigned Idx = cast<ConstantInt>(Index)->getZExtValue(); 1359 const StructLayout *SL = Q.DL.getStructLayout(STy); 1360 uint64_t Offset = SL->getElementOffset(Idx); 1361 AccConstIndices += Offset; 1362 continue; 1363 } 1364 1365 // Handle array index arithmetic. 1366 Type *IndexedTy = GTI.getIndexedType(); 1367 if (!IndexedTy->isSized()) { 1368 Known.resetAll(); 1369 break; 1370 } 1371 1372 unsigned IndexBitWidth = Index->getType()->getScalarSizeInBits(); 1373 KnownBits IndexBits(IndexBitWidth); 1374 computeKnownBits(Index, IndexBits, Depth + 1, Q); 1375 TypeSize IndexTypeSize = GTI.getSequentialElementStride(Q.DL); 1376 uint64_t TypeSizeInBytes = IndexTypeSize.getKnownMinValue(); 1377 KnownBits ScalingFactor(IndexBitWidth); 1378 // Multiply by current sizeof type. 1379 // &A[i] == A + i * sizeof(*A[i]). 1380 if (IndexTypeSize.isScalable()) { 1381 // For scalable types the only thing we know about sizeof is 1382 // that this is a multiple of the minimum size. 1383 ScalingFactor.Zero.setLowBits(llvm::countr_zero(TypeSizeInBytes)); 1384 } else if (IndexBits.isConstant()) { 1385 APInt IndexConst = IndexBits.getConstant(); 1386 APInt ScalingFactor(IndexBitWidth, TypeSizeInBytes); 1387 IndexConst *= ScalingFactor; 1388 AccConstIndices += IndexConst.sextOrTrunc(BitWidth); 1389 continue; 1390 } else { 1391 ScalingFactor = 1392 KnownBits::makeConstant(APInt(IndexBitWidth, TypeSizeInBytes)); 1393 } 1394 IndexBits = KnownBits::mul(IndexBits, ScalingFactor); 1395 1396 // If the offsets have a different width from the pointer, according 1397 // to the language reference we need to sign-extend or truncate them 1398 // to the width of the pointer. 1399 IndexBits = IndexBits.sextOrTrunc(BitWidth); 1400 1401 // Note that inbounds does *not* guarantee nsw for the addition, as only 1402 // the offset is signed, while the base address is unsigned. 1403 Known = KnownBits::computeForAddSub( 1404 /*Add=*/true, /*NSW=*/false, /* NUW=*/false, Known, IndexBits); 1405 } 1406 if (!Known.isUnknown() && !AccConstIndices.isZero()) { 1407 KnownBits Index = KnownBits::makeConstant(AccConstIndices); 1408 Known = KnownBits::computeForAddSub( 1409 /*Add=*/true, /*NSW=*/false, /* NUW=*/false, Known, Index); 1410 } 1411 break; 1412 } 1413 case Instruction::PHI: { 1414 const PHINode *P = cast<PHINode>(I); 1415 BinaryOperator *BO = nullptr; 1416 Value *R = nullptr, *L = nullptr; 1417 if (matchSimpleRecurrence(P, BO, R, L)) { 1418 // Handle the case of a simple two-predecessor recurrence PHI. 1419 // There's a lot more that could theoretically be done here, but 1420 // this is sufficient to catch some interesting cases. 1421 unsigned Opcode = BO->getOpcode(); 1422 1423 // If this is a shift recurrence, we know the bits being shifted in. 1424 // We can combine that with information about the start value of the 1425 // recurrence to conclude facts about the result. 1426 if ((Opcode == Instruction::LShr || Opcode == Instruction::AShr || 1427 Opcode == Instruction::Shl) && 1428 BO->getOperand(0) == I) { 1429 1430 // We have matched a recurrence of the form: 1431 // %iv = [R, %entry], [%iv.next, %backedge] 1432 // %iv.next = shift_op %iv, L 1433 1434 // Recurse with the phi context to avoid concern about whether facts 1435 // inferred hold at original context instruction. TODO: It may be 1436 // correct to use the original context. IF warranted, explore and 1437 // add sufficient tests to cover. 1438 SimplifyQuery RecQ = Q.getWithoutCondContext(); 1439 RecQ.CxtI = P; 1440 computeKnownBits(R, DemandedElts, Known2, Depth + 1, RecQ); 1441 switch (Opcode) { 1442 case Instruction::Shl: 1443 // A shl recurrence will only increase the tailing zeros 1444 Known.Zero.setLowBits(Known2.countMinTrailingZeros()); 1445 break; 1446 case Instruction::LShr: 1447 // A lshr recurrence will preserve the leading zeros of the 1448 // start value 1449 Known.Zero.setHighBits(Known2.countMinLeadingZeros()); 1450 break; 1451 case Instruction::AShr: 1452 // An ashr recurrence will extend the initial sign bit 1453 Known.Zero.setHighBits(Known2.countMinLeadingZeros()); 1454 Known.One.setHighBits(Known2.countMinLeadingOnes()); 1455 break; 1456 }; 1457 } 1458 1459 // Check for operations that have the property that if 1460 // both their operands have low zero bits, the result 1461 // will have low zero bits. 1462 if (Opcode == Instruction::Add || 1463 Opcode == Instruction::Sub || 1464 Opcode == Instruction::And || 1465 Opcode == Instruction::Or || 1466 Opcode == Instruction::Mul) { 1467 // Change the context instruction to the "edge" that flows into the 1468 // phi. This is important because that is where the value is actually 1469 // "evaluated" even though it is used later somewhere else. (see also 1470 // D69571). 1471 SimplifyQuery RecQ = Q.getWithoutCondContext(); 1472 1473 unsigned OpNum = P->getOperand(0) == R ? 0 : 1; 1474 Instruction *RInst = P->getIncomingBlock(OpNum)->getTerminator(); 1475 Instruction *LInst = P->getIncomingBlock(1 - OpNum)->getTerminator(); 1476 1477 // Ok, we have a PHI of the form L op= R. Check for low 1478 // zero bits. 1479 RecQ.CxtI = RInst; 1480 computeKnownBits(R, DemandedElts, Known2, Depth + 1, RecQ); 1481 1482 // We need to take the minimum number of known bits 1483 KnownBits Known3(BitWidth); 1484 RecQ.CxtI = LInst; 1485 computeKnownBits(L, DemandedElts, Known3, Depth + 1, RecQ); 1486 1487 Known.Zero.setLowBits(std::min(Known2.countMinTrailingZeros(), 1488 Known3.countMinTrailingZeros())); 1489 1490 auto *OverflowOp = dyn_cast<OverflowingBinaryOperator>(BO); 1491 if (OverflowOp && Q.IIQ.hasNoSignedWrap(OverflowOp)) { 1492 // If initial value of recurrence is nonnegative, and we are adding 1493 // a nonnegative number with nsw, the result can only be nonnegative 1494 // or poison value regardless of the number of times we execute the 1495 // add in phi recurrence. If initial value is negative and we are 1496 // adding a negative number with nsw, the result can only be 1497 // negative or poison value. Similar arguments apply to sub and mul. 1498 // 1499 // (add non-negative, non-negative) --> non-negative 1500 // (add negative, negative) --> negative 1501 if (Opcode == Instruction::Add) { 1502 if (Known2.isNonNegative() && Known3.isNonNegative()) 1503 Known.makeNonNegative(); 1504 else if (Known2.isNegative() && Known3.isNegative()) 1505 Known.makeNegative(); 1506 } 1507 1508 // (sub nsw non-negative, negative) --> non-negative 1509 // (sub nsw negative, non-negative) --> negative 1510 else if (Opcode == Instruction::Sub && BO->getOperand(0) == I) { 1511 if (Known2.isNonNegative() && Known3.isNegative()) 1512 Known.makeNonNegative(); 1513 else if (Known2.isNegative() && Known3.isNonNegative()) 1514 Known.makeNegative(); 1515 } 1516 1517 // (mul nsw non-negative, non-negative) --> non-negative 1518 else if (Opcode == Instruction::Mul && Known2.isNonNegative() && 1519 Known3.isNonNegative()) 1520 Known.makeNonNegative(); 1521 } 1522 1523 break; 1524 } 1525 } 1526 1527 // Unreachable blocks may have zero-operand PHI nodes. 1528 if (P->getNumIncomingValues() == 0) 1529 break; 1530 1531 // Otherwise take the unions of the known bit sets of the operands, 1532 // taking conservative care to avoid excessive recursion. 1533 if (Depth < MaxAnalysisRecursionDepth - 1 && Known.isUnknown()) { 1534 // Skip if every incoming value references to ourself. 1535 if (isa_and_nonnull<UndefValue>(P->hasConstantValue())) 1536 break; 1537 1538 Known.Zero.setAllBits(); 1539 Known.One.setAllBits(); 1540 for (unsigned u = 0, e = P->getNumIncomingValues(); u < e; ++u) { 1541 Value *IncValue = P->getIncomingValue(u); 1542 // Skip direct self references. 1543 if (IncValue == P) continue; 1544 1545 // Change the context instruction to the "edge" that flows into the 1546 // phi. This is important because that is where the value is actually 1547 // "evaluated" even though it is used later somewhere else. (see also 1548 // D69571). 1549 SimplifyQuery RecQ = Q.getWithoutCondContext(); 1550 RecQ.CxtI = P->getIncomingBlock(u)->getTerminator(); 1551 1552 Known2 = KnownBits(BitWidth); 1553 1554 // Recurse, but cap the recursion to one level, because we don't 1555 // want to waste time spinning around in loops. 1556 // TODO: See if we can base recursion limiter on number of incoming phi 1557 // edges so we don't overly clamp analysis. 1558 computeKnownBits(IncValue, DemandedElts, Known2, 1559 MaxAnalysisRecursionDepth - 1, RecQ); 1560 1561 // See if we can further use a conditional branch into the phi 1562 // to help us determine the range of the value. 1563 if (!Known2.isConstant()) { 1564 ICmpInst::Predicate Pred; 1565 const APInt *RHSC; 1566 BasicBlock *TrueSucc, *FalseSucc; 1567 // TODO: Use RHS Value and compute range from its known bits. 1568 if (match(RecQ.CxtI, 1569 m_Br(m_c_ICmp(Pred, m_Specific(IncValue), m_APInt(RHSC)), 1570 m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) { 1571 // Check for cases of duplicate successors. 1572 if ((TrueSucc == P->getParent()) != (FalseSucc == P->getParent())) { 1573 // If we're using the false successor, invert the predicate. 1574 if (FalseSucc == P->getParent()) 1575 Pred = CmpInst::getInversePredicate(Pred); 1576 // Get the knownbits implied by the incoming phi condition. 1577 auto CR = ConstantRange::makeExactICmpRegion(Pred, *RHSC); 1578 KnownBits KnownUnion = Known2.unionWith(CR.toKnownBits()); 1579 // We can have conflicts here if we are analyzing deadcode (its 1580 // impossible for us reach this BB based the icmp). 1581 if (KnownUnion.hasConflict()) { 1582 // No reason to continue analyzing in a known dead region, so 1583 // just resetAll and break. This will cause us to also exit the 1584 // outer loop. 1585 Known.resetAll(); 1586 break; 1587 } 1588 Known2 = KnownUnion; 1589 } 1590 } 1591 } 1592 1593 Known = Known.intersectWith(Known2); 1594 // If all bits have been ruled out, there's no need to check 1595 // more operands. 1596 if (Known.isUnknown()) 1597 break; 1598 } 1599 } 1600 break; 1601 } 1602 case Instruction::Call: 1603 case Instruction::Invoke: { 1604 // If range metadata is attached to this call, set known bits from that, 1605 // and then intersect with known bits based on other properties of the 1606 // function. 1607 if (MDNode *MD = 1608 Q.IIQ.getMetadata(cast<Instruction>(I), LLVMContext::MD_range)) 1609 computeKnownBitsFromRangeMetadata(*MD, Known); 1610 1611 const auto *CB = cast<CallBase>(I); 1612 1613 if (std::optional<ConstantRange> Range = CB->getRange()) 1614 Known = Known.unionWith(Range->toKnownBits()); 1615 1616 if (const Value *RV = CB->getReturnedArgOperand()) { 1617 if (RV->getType() == I->getType()) { 1618 computeKnownBits(RV, Known2, Depth + 1, Q); 1619 Known = Known.unionWith(Known2); 1620 // If the function doesn't return properly for all input values 1621 // (e.g. unreachable exits) then there might be conflicts between the 1622 // argument value and the range metadata. Simply discard the known bits 1623 // in case of conflicts. 1624 if (Known.hasConflict()) 1625 Known.resetAll(); 1626 } 1627 } 1628 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 1629 switch (II->getIntrinsicID()) { 1630 default: 1631 break; 1632 case Intrinsic::abs: { 1633 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); 1634 bool IntMinIsPoison = match(II->getArgOperand(1), m_One()); 1635 Known = Known2.abs(IntMinIsPoison); 1636 break; 1637 } 1638 case Intrinsic::bitreverse: 1639 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); 1640 Known.Zero |= Known2.Zero.reverseBits(); 1641 Known.One |= Known2.One.reverseBits(); 1642 break; 1643 case Intrinsic::bswap: 1644 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); 1645 Known.Zero |= Known2.Zero.byteSwap(); 1646 Known.One |= Known2.One.byteSwap(); 1647 break; 1648 case Intrinsic::ctlz: { 1649 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); 1650 // If we have a known 1, its position is our upper bound. 1651 unsigned PossibleLZ = Known2.countMaxLeadingZeros(); 1652 // If this call is poison for 0 input, the result will be less than 2^n. 1653 if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext())) 1654 PossibleLZ = std::min(PossibleLZ, BitWidth - 1); 1655 unsigned LowBits = llvm::bit_width(PossibleLZ); 1656 Known.Zero.setBitsFrom(LowBits); 1657 break; 1658 } 1659 case Intrinsic::cttz: { 1660 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); 1661 // If we have a known 1, its position is our upper bound. 1662 unsigned PossibleTZ = Known2.countMaxTrailingZeros(); 1663 // If this call is poison for 0 input, the result will be less than 2^n. 1664 if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext())) 1665 PossibleTZ = std::min(PossibleTZ, BitWidth - 1); 1666 unsigned LowBits = llvm::bit_width(PossibleTZ); 1667 Known.Zero.setBitsFrom(LowBits); 1668 break; 1669 } 1670 case Intrinsic::ctpop: { 1671 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); 1672 // We can bound the space the count needs. Also, bits known to be zero 1673 // can't contribute to the population. 1674 unsigned BitsPossiblySet = Known2.countMaxPopulation(); 1675 unsigned LowBits = llvm::bit_width(BitsPossiblySet); 1676 Known.Zero.setBitsFrom(LowBits); 1677 // TODO: we could bound KnownOne using the lower bound on the number 1678 // of bits which might be set provided by popcnt KnownOne2. 1679 break; 1680 } 1681 case Intrinsic::fshr: 1682 case Intrinsic::fshl: { 1683 const APInt *SA; 1684 if (!match(I->getOperand(2), m_APInt(SA))) 1685 break; 1686 1687 // Normalize to funnel shift left. 1688 uint64_t ShiftAmt = SA->urem(BitWidth); 1689 if (II->getIntrinsicID() == Intrinsic::fshr) 1690 ShiftAmt = BitWidth - ShiftAmt; 1691 1692 KnownBits Known3(BitWidth); 1693 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); 1694 computeKnownBits(I->getOperand(1), DemandedElts, Known3, Depth + 1, Q); 1695 1696 Known.Zero = 1697 Known2.Zero.shl(ShiftAmt) | Known3.Zero.lshr(BitWidth - ShiftAmt); 1698 Known.One = 1699 Known2.One.shl(ShiftAmt) | Known3.One.lshr(BitWidth - ShiftAmt); 1700 break; 1701 } 1702 case Intrinsic::uadd_sat: 1703 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q); 1704 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q); 1705 Known = KnownBits::uadd_sat(Known, Known2); 1706 break; 1707 case Intrinsic::usub_sat: 1708 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q); 1709 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q); 1710 Known = KnownBits::usub_sat(Known, Known2); 1711 break; 1712 case Intrinsic::sadd_sat: 1713 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q); 1714 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q); 1715 Known = KnownBits::sadd_sat(Known, Known2); 1716 break; 1717 case Intrinsic::ssub_sat: 1718 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q); 1719 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q); 1720 Known = KnownBits::ssub_sat(Known, Known2); 1721 break; 1722 // Vec reverse preserves bits from input vec. 1723 case Intrinsic::vector_reverse: 1724 computeKnownBits(I->getOperand(0), DemandedElts.reverseBits(), Known, 1725 Depth + 1, Q); 1726 break; 1727 // for min/max/and/or reduce, any bit common to each element in the 1728 // input vec is set in the output. 1729 case Intrinsic::vector_reduce_and: 1730 case Intrinsic::vector_reduce_or: 1731 case Intrinsic::vector_reduce_umax: 1732 case Intrinsic::vector_reduce_umin: 1733 case Intrinsic::vector_reduce_smax: 1734 case Intrinsic::vector_reduce_smin: 1735 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1736 break; 1737 case Intrinsic::vector_reduce_xor: { 1738 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1739 // The zeros common to all vecs are zero in the output. 1740 // If the number of elements is odd, then the common ones remain. If the 1741 // number of elements is even, then the common ones becomes zeros. 1742 auto *VecTy = cast<VectorType>(I->getOperand(0)->getType()); 1743 // Even, so the ones become zeros. 1744 bool EvenCnt = VecTy->getElementCount().isKnownEven(); 1745 if (EvenCnt) 1746 Known.Zero |= Known.One; 1747 // Maybe even element count so need to clear ones. 1748 if (VecTy->isScalableTy() || EvenCnt) 1749 Known.One.clearAllBits(); 1750 break; 1751 } 1752 case Intrinsic::umin: 1753 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q); 1754 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q); 1755 Known = KnownBits::umin(Known, Known2); 1756 break; 1757 case Intrinsic::umax: 1758 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q); 1759 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q); 1760 Known = KnownBits::umax(Known, Known2); 1761 break; 1762 case Intrinsic::smin: 1763 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q); 1764 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q); 1765 Known = KnownBits::smin(Known, Known2); 1766 break; 1767 case Intrinsic::smax: 1768 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q); 1769 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q); 1770 Known = KnownBits::smax(Known, Known2); 1771 break; 1772 case Intrinsic::ptrmask: { 1773 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q); 1774 1775 const Value *Mask = I->getOperand(1); 1776 Known2 = KnownBits(Mask->getType()->getScalarSizeInBits()); 1777 computeKnownBits(Mask, DemandedElts, Known2, Depth + 1, Q); 1778 // TODO: 1-extend would be more precise. 1779 Known &= Known2.anyextOrTrunc(BitWidth); 1780 break; 1781 } 1782 case Intrinsic::x86_sse2_pmulh_w: 1783 case Intrinsic::x86_avx2_pmulh_w: 1784 case Intrinsic::x86_avx512_pmulh_w_512: 1785 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q); 1786 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q); 1787 Known = KnownBits::mulhs(Known, Known2); 1788 break; 1789 case Intrinsic::x86_sse2_pmulhu_w: 1790 case Intrinsic::x86_avx2_pmulhu_w: 1791 case Intrinsic::x86_avx512_pmulhu_w_512: 1792 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q); 1793 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q); 1794 Known = KnownBits::mulhu(Known, Known2); 1795 break; 1796 case Intrinsic::x86_sse42_crc32_64_64: 1797 Known.Zero.setBitsFrom(32); 1798 break; 1799 case Intrinsic::x86_ssse3_phadd_d_128: 1800 case Intrinsic::x86_ssse3_phadd_w_128: 1801 case Intrinsic::x86_avx2_phadd_d: 1802 case Intrinsic::x86_avx2_phadd_w: { 1803 Known = computeKnownBitsForHorizontalOperation( 1804 I, DemandedElts, Depth, Q, 1805 [](const KnownBits &KnownLHS, const KnownBits &KnownRHS) { 1806 return KnownBits::computeForAddSub(/*Add=*/true, /*NSW=*/false, 1807 /*NUW=*/false, KnownLHS, 1808 KnownRHS); 1809 }); 1810 break; 1811 } 1812 case Intrinsic::x86_ssse3_phadd_sw_128: 1813 case Intrinsic::x86_avx2_phadd_sw: { 1814 Known = computeKnownBitsForHorizontalOperation(I, DemandedElts, Depth, 1815 Q, KnownBits::sadd_sat); 1816 break; 1817 } 1818 case Intrinsic::x86_ssse3_phsub_d_128: 1819 case Intrinsic::x86_ssse3_phsub_w_128: 1820 case Intrinsic::x86_avx2_phsub_d: 1821 case Intrinsic::x86_avx2_phsub_w: { 1822 Known = computeKnownBitsForHorizontalOperation( 1823 I, DemandedElts, Depth, Q, 1824 [](const KnownBits &KnownLHS, const KnownBits &KnownRHS) { 1825 return KnownBits::computeForAddSub(/*Add=*/false, /*NSW=*/false, 1826 /*NUW=*/false, KnownLHS, 1827 KnownRHS); 1828 }); 1829 break; 1830 } 1831 case Intrinsic::x86_ssse3_phsub_sw_128: 1832 case Intrinsic::x86_avx2_phsub_sw: { 1833 Known = computeKnownBitsForHorizontalOperation(I, DemandedElts, Depth, 1834 Q, KnownBits::ssub_sat); 1835 break; 1836 } 1837 case Intrinsic::riscv_vsetvli: 1838 case Intrinsic::riscv_vsetvlimax: { 1839 bool HasAVL = II->getIntrinsicID() == Intrinsic::riscv_vsetvli; 1840 const ConstantRange Range = getVScaleRange(II->getFunction(), BitWidth); 1841 uint64_t SEW = RISCVVType::decodeVSEW( 1842 cast<ConstantInt>(II->getArgOperand(HasAVL))->getZExtValue()); 1843 RISCVII::VLMUL VLMUL = static_cast<RISCVII::VLMUL>( 1844 cast<ConstantInt>(II->getArgOperand(1 + HasAVL))->getZExtValue()); 1845 uint64_t MaxVLEN = 1846 Range.getUnsignedMax().getZExtValue() * RISCV::RVVBitsPerBlock; 1847 uint64_t MaxVL = MaxVLEN / RISCVVType::getSEWLMULRatio(SEW, VLMUL); 1848 1849 // Result of vsetvli must be not larger than AVL. 1850 if (HasAVL) 1851 if (auto *CI = dyn_cast<ConstantInt>(II->getArgOperand(0))) 1852 MaxVL = std::min(MaxVL, CI->getZExtValue()); 1853 1854 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1; 1855 if (BitWidth > KnownZeroFirstBit) 1856 Known.Zero.setBitsFrom(KnownZeroFirstBit); 1857 break; 1858 } 1859 case Intrinsic::vscale: { 1860 if (!II->getParent() || !II->getFunction()) 1861 break; 1862 1863 Known = getVScaleRange(II->getFunction(), BitWidth).toKnownBits(); 1864 break; 1865 } 1866 } 1867 } 1868 break; 1869 } 1870 case Instruction::ShuffleVector: { 1871 auto *Shuf = dyn_cast<ShuffleVectorInst>(I); 1872 // FIXME: Do we need to handle ConstantExpr involving shufflevectors? 1873 if (!Shuf) { 1874 Known.resetAll(); 1875 return; 1876 } 1877 // For undef elements, we don't know anything about the common state of 1878 // the shuffle result. 1879 APInt DemandedLHS, DemandedRHS; 1880 if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS)) { 1881 Known.resetAll(); 1882 return; 1883 } 1884 Known.One.setAllBits(); 1885 Known.Zero.setAllBits(); 1886 if (!!DemandedLHS) { 1887 const Value *LHS = Shuf->getOperand(0); 1888 computeKnownBits(LHS, DemandedLHS, Known, Depth + 1, Q); 1889 // If we don't know any bits, early out. 1890 if (Known.isUnknown()) 1891 break; 1892 } 1893 if (!!DemandedRHS) { 1894 const Value *RHS = Shuf->getOperand(1); 1895 computeKnownBits(RHS, DemandedRHS, Known2, Depth + 1, Q); 1896 Known = Known.intersectWith(Known2); 1897 } 1898 break; 1899 } 1900 case Instruction::InsertElement: { 1901 if (isa<ScalableVectorType>(I->getType())) { 1902 Known.resetAll(); 1903 return; 1904 } 1905 const Value *Vec = I->getOperand(0); 1906 const Value *Elt = I->getOperand(1); 1907 auto *CIdx = dyn_cast<ConstantInt>(I->getOperand(2)); 1908 unsigned NumElts = DemandedElts.getBitWidth(); 1909 APInt DemandedVecElts = DemandedElts; 1910 bool NeedsElt = true; 1911 // If we know the index we are inserting too, clear it from Vec check. 1912 if (CIdx && CIdx->getValue().ult(NumElts)) { 1913 DemandedVecElts.clearBit(CIdx->getZExtValue()); 1914 NeedsElt = DemandedElts[CIdx->getZExtValue()]; 1915 } 1916 1917 Known.One.setAllBits(); 1918 Known.Zero.setAllBits(); 1919 if (NeedsElt) { 1920 computeKnownBits(Elt, Known, Depth + 1, Q); 1921 // If we don't know any bits, early out. 1922 if (Known.isUnknown()) 1923 break; 1924 } 1925 1926 if (!DemandedVecElts.isZero()) { 1927 computeKnownBits(Vec, DemandedVecElts, Known2, Depth + 1, Q); 1928 Known = Known.intersectWith(Known2); 1929 } 1930 break; 1931 } 1932 case Instruction::ExtractElement: { 1933 // Look through extract element. If the index is non-constant or 1934 // out-of-range demand all elements, otherwise just the extracted element. 1935 const Value *Vec = I->getOperand(0); 1936 const Value *Idx = I->getOperand(1); 1937 auto *CIdx = dyn_cast<ConstantInt>(Idx); 1938 if (isa<ScalableVectorType>(Vec->getType())) { 1939 // FIXME: there's probably *something* we can do with scalable vectors 1940 Known.resetAll(); 1941 break; 1942 } 1943 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements(); 1944 APInt DemandedVecElts = APInt::getAllOnes(NumElts); 1945 if (CIdx && CIdx->getValue().ult(NumElts)) 1946 DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue()); 1947 computeKnownBits(Vec, DemandedVecElts, Known, Depth + 1, Q); 1948 break; 1949 } 1950 case Instruction::ExtractValue: 1951 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->getOperand(0))) { 1952 const ExtractValueInst *EVI = cast<ExtractValueInst>(I); 1953 if (EVI->getNumIndices() != 1) break; 1954 if (EVI->getIndices()[0] == 0) { 1955 switch (II->getIntrinsicID()) { 1956 default: break; 1957 case Intrinsic::uadd_with_overflow: 1958 case Intrinsic::sadd_with_overflow: 1959 computeKnownBitsAddSub( 1960 true, II->getArgOperand(0), II->getArgOperand(1), /*NSW=*/false, 1961 /* NUW=*/false, DemandedElts, Known, Known2, Depth, Q); 1962 break; 1963 case Intrinsic::usub_with_overflow: 1964 case Intrinsic::ssub_with_overflow: 1965 computeKnownBitsAddSub( 1966 false, II->getArgOperand(0), II->getArgOperand(1), /*NSW=*/false, 1967 /* NUW=*/false, DemandedElts, Known, Known2, Depth, Q); 1968 break; 1969 case Intrinsic::umul_with_overflow: 1970 case Intrinsic::smul_with_overflow: 1971 computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), false, 1972 DemandedElts, Known, Known2, Depth, Q); 1973 break; 1974 } 1975 } 1976 } 1977 break; 1978 case Instruction::Freeze: 1979 if (isGuaranteedNotToBePoison(I->getOperand(0), Q.AC, Q.CxtI, Q.DT, 1980 Depth + 1)) 1981 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1982 break; 1983 } 1984 } 1985 1986 /// Determine which bits of V are known to be either zero or one and return 1987 /// them. 1988 KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts, 1989 unsigned Depth, const SimplifyQuery &Q) { 1990 KnownBits Known(getBitWidth(V->getType(), Q.DL)); 1991 ::computeKnownBits(V, DemandedElts, Known, Depth, Q); 1992 return Known; 1993 } 1994 1995 /// Determine which bits of V are known to be either zero or one and return 1996 /// them. 1997 KnownBits llvm::computeKnownBits(const Value *V, unsigned Depth, 1998 const SimplifyQuery &Q) { 1999 KnownBits Known(getBitWidth(V->getType(), Q.DL)); 2000 computeKnownBits(V, Known, Depth, Q); 2001 return Known; 2002 } 2003 2004 /// Determine which bits of V are known to be either zero or one and return 2005 /// them in the Known bit set. 2006 /// 2007 /// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that 2008 /// we cannot optimize based on the assumption that it is zero without changing 2009 /// it to be an explicit zero. If we don't change it to zero, other code could 2010 /// optimized based on the contradictory assumption that it is non-zero. 2011 /// Because instcombine aggressively folds operations with undef args anyway, 2012 /// this won't lose us code quality. 2013 /// 2014 /// This function is defined on values with integer type, values with pointer 2015 /// type, and vectors of integers. In the case 2016 /// where V is a vector, known zero, and known one values are the 2017 /// same width as the vector element, and the bit is set only if it is true 2018 /// for all of the demanded elements in the vector specified by DemandedElts. 2019 void computeKnownBits(const Value *V, const APInt &DemandedElts, 2020 KnownBits &Known, unsigned Depth, 2021 const SimplifyQuery &Q) { 2022 if (!DemandedElts) { 2023 // No demanded elts, better to assume we don't know anything. 2024 Known.resetAll(); 2025 return; 2026 } 2027 2028 assert(V && "No Value?"); 2029 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); 2030 2031 #ifndef NDEBUG 2032 Type *Ty = V->getType(); 2033 unsigned BitWidth = Known.getBitWidth(); 2034 2035 assert((Ty->isIntOrIntVectorTy(BitWidth) || Ty->isPtrOrPtrVectorTy()) && 2036 "Not integer or pointer type!"); 2037 2038 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) { 2039 assert( 2040 FVTy->getNumElements() == DemandedElts.getBitWidth() && 2041 "DemandedElt width should equal the fixed vector number of elements"); 2042 } else { 2043 assert(DemandedElts == APInt(1, 1) && 2044 "DemandedElt width should be 1 for scalars or scalable vectors"); 2045 } 2046 2047 Type *ScalarTy = Ty->getScalarType(); 2048 if (ScalarTy->isPointerTy()) { 2049 assert(BitWidth == Q.DL.getPointerTypeSizeInBits(ScalarTy) && 2050 "V and Known should have same BitWidth"); 2051 } else { 2052 assert(BitWidth == Q.DL.getTypeSizeInBits(ScalarTy) && 2053 "V and Known should have same BitWidth"); 2054 } 2055 #endif 2056 2057 const APInt *C; 2058 if (match(V, m_APInt(C))) { 2059 // We know all of the bits for a scalar constant or a splat vector constant! 2060 Known = KnownBits::makeConstant(*C); 2061 return; 2062 } 2063 // Null and aggregate-zero are all-zeros. 2064 if (isa<ConstantPointerNull>(V) || isa<ConstantAggregateZero>(V)) { 2065 Known.setAllZero(); 2066 return; 2067 } 2068 // Handle a constant vector by taking the intersection of the known bits of 2069 // each element. 2070 if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(V)) { 2071 assert(!isa<ScalableVectorType>(V->getType())); 2072 // We know that CDV must be a vector of integers. Take the intersection of 2073 // each element. 2074 Known.Zero.setAllBits(); Known.One.setAllBits(); 2075 for (unsigned i = 0, e = CDV->getNumElements(); i != e; ++i) { 2076 if (!DemandedElts[i]) 2077 continue; 2078 APInt Elt = CDV->getElementAsAPInt(i); 2079 Known.Zero &= ~Elt; 2080 Known.One &= Elt; 2081 } 2082 if (Known.hasConflict()) 2083 Known.resetAll(); 2084 return; 2085 } 2086 2087 if (const auto *CV = dyn_cast<ConstantVector>(V)) { 2088 assert(!isa<ScalableVectorType>(V->getType())); 2089 // We know that CV must be a vector of integers. Take the intersection of 2090 // each element. 2091 Known.Zero.setAllBits(); Known.One.setAllBits(); 2092 for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) { 2093 if (!DemandedElts[i]) 2094 continue; 2095 Constant *Element = CV->getAggregateElement(i); 2096 if (isa<PoisonValue>(Element)) 2097 continue; 2098 auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element); 2099 if (!ElementCI) { 2100 Known.resetAll(); 2101 return; 2102 } 2103 const APInt &Elt = ElementCI->getValue(); 2104 Known.Zero &= ~Elt; 2105 Known.One &= Elt; 2106 } 2107 if (Known.hasConflict()) 2108 Known.resetAll(); 2109 return; 2110 } 2111 2112 // Start out not knowing anything. 2113 Known.resetAll(); 2114 2115 // We can't imply anything about undefs. 2116 if (isa<UndefValue>(V)) 2117 return; 2118 2119 // There's no point in looking through other users of ConstantData for 2120 // assumptions. Confirm that we've handled them all. 2121 assert(!isa<ConstantData>(V) && "Unhandled constant data!"); 2122 2123 if (const auto *A = dyn_cast<Argument>(V)) 2124 if (std::optional<ConstantRange> Range = A->getRange()) 2125 Known = Range->toKnownBits(); 2126 2127 // All recursive calls that increase depth must come after this. 2128 if (Depth == MaxAnalysisRecursionDepth) 2129 return; 2130 2131 // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has 2132 // the bits of its aliasee. 2133 if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { 2134 if (!GA->isInterposable()) 2135 computeKnownBits(GA->getAliasee(), Known, Depth + 1, Q); 2136 return; 2137 } 2138 2139 if (const Operator *I = dyn_cast<Operator>(V)) 2140 computeKnownBitsFromOperator(I, DemandedElts, Known, Depth, Q); 2141 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 2142 if (std::optional<ConstantRange> CR = GV->getAbsoluteSymbolRange()) 2143 Known = CR->toKnownBits(); 2144 } 2145 2146 // Aligned pointers have trailing zeros - refine Known.Zero set 2147 if (isa<PointerType>(V->getType())) { 2148 Align Alignment = V->getPointerAlignment(Q.DL); 2149 Known.Zero.setLowBits(Log2(Alignment)); 2150 } 2151 2152 // computeKnownBitsFromContext strictly refines Known. 2153 // Therefore, we run them after computeKnownBitsFromOperator. 2154 2155 // Check whether we can determine known bits from context such as assumes. 2156 computeKnownBitsFromContext(V, Known, Depth, Q); 2157 } 2158 2159 /// Try to detect a recurrence that the value of the induction variable is 2160 /// always a power of two (or zero). 2161 static bool isPowerOfTwoRecurrence(const PHINode *PN, bool OrZero, 2162 unsigned Depth, SimplifyQuery &Q) { 2163 BinaryOperator *BO = nullptr; 2164 Value *Start = nullptr, *Step = nullptr; 2165 if (!matchSimpleRecurrence(PN, BO, Start, Step)) 2166 return false; 2167 2168 // Initial value must be a power of two. 2169 for (const Use &U : PN->operands()) { 2170 if (U.get() == Start) { 2171 // Initial value comes from a different BB, need to adjust context 2172 // instruction for analysis. 2173 Q.CxtI = PN->getIncomingBlock(U)->getTerminator(); 2174 if (!isKnownToBeAPowerOfTwo(Start, OrZero, Depth, Q)) 2175 return false; 2176 } 2177 } 2178 2179 // Except for Mul, the induction variable must be on the left side of the 2180 // increment expression, otherwise its value can be arbitrary. 2181 if (BO->getOpcode() != Instruction::Mul && BO->getOperand(1) != Step) 2182 return false; 2183 2184 Q.CxtI = BO->getParent()->getTerminator(); 2185 switch (BO->getOpcode()) { 2186 case Instruction::Mul: 2187 // Power of two is closed under multiplication. 2188 return (OrZero || Q.IIQ.hasNoUnsignedWrap(BO) || 2189 Q.IIQ.hasNoSignedWrap(BO)) && 2190 isKnownToBeAPowerOfTwo(Step, OrZero, Depth, Q); 2191 case Instruction::SDiv: 2192 // Start value must not be signmask for signed division, so simply being a 2193 // power of two is not sufficient, and it has to be a constant. 2194 if (!match(Start, m_Power2()) || match(Start, m_SignMask())) 2195 return false; 2196 [[fallthrough]]; 2197 case Instruction::UDiv: 2198 // Divisor must be a power of two. 2199 // If OrZero is false, cannot guarantee induction variable is non-zero after 2200 // division, same for Shr, unless it is exact division. 2201 return (OrZero || Q.IIQ.isExact(BO)) && 2202 isKnownToBeAPowerOfTwo(Step, false, Depth, Q); 2203 case Instruction::Shl: 2204 return OrZero || Q.IIQ.hasNoUnsignedWrap(BO) || Q.IIQ.hasNoSignedWrap(BO); 2205 case Instruction::AShr: 2206 if (!match(Start, m_Power2()) || match(Start, m_SignMask())) 2207 return false; 2208 [[fallthrough]]; 2209 case Instruction::LShr: 2210 return OrZero || Q.IIQ.isExact(BO); 2211 default: 2212 return false; 2213 } 2214 } 2215 2216 /// Return true if the given value is known to have exactly one 2217 /// bit set when defined. For vectors return true if every element is known to 2218 /// be a power of two when defined. Supports values with integer or pointer 2219 /// types and vectors of integers. 2220 bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, 2221 const SimplifyQuery &Q) { 2222 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); 2223 2224 if (isa<Constant>(V)) 2225 return OrZero ? match(V, m_Power2OrZero()) : match(V, m_Power2()); 2226 2227 // i1 is by definition a power of 2 or zero. 2228 if (OrZero && V->getType()->getScalarSizeInBits() == 1) 2229 return true; 2230 2231 auto *I = dyn_cast<Instruction>(V); 2232 if (!I) 2233 return false; 2234 2235 if (Q.CxtI && match(V, m_VScale())) { 2236 const Function *F = Q.CxtI->getFunction(); 2237 // The vscale_range indicates vscale is a power-of-two. 2238 return F->hasFnAttribute(Attribute::VScaleRange); 2239 } 2240 2241 // 1 << X is clearly a power of two if the one is not shifted off the end. If 2242 // it is shifted off the end then the result is undefined. 2243 if (match(I, m_Shl(m_One(), m_Value()))) 2244 return true; 2245 2246 // (signmask) >>l X is clearly a power of two if the one is not shifted off 2247 // the bottom. If it is shifted off the bottom then the result is undefined. 2248 if (match(I, m_LShr(m_SignMask(), m_Value()))) 2249 return true; 2250 2251 // The remaining tests are all recursive, so bail out if we hit the limit. 2252 if (Depth++ == MaxAnalysisRecursionDepth) 2253 return false; 2254 2255 switch (I->getOpcode()) { 2256 case Instruction::ZExt: 2257 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q); 2258 case Instruction::Trunc: 2259 return OrZero && isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q); 2260 case Instruction::Shl: 2261 if (OrZero || Q.IIQ.hasNoUnsignedWrap(I) || Q.IIQ.hasNoSignedWrap(I)) 2262 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q); 2263 return false; 2264 case Instruction::LShr: 2265 if (OrZero || Q.IIQ.isExact(cast<BinaryOperator>(I))) 2266 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q); 2267 return false; 2268 case Instruction::UDiv: 2269 if (Q.IIQ.isExact(cast<BinaryOperator>(I))) 2270 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q); 2271 return false; 2272 case Instruction::Mul: 2273 return isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q) && 2274 isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q) && 2275 (OrZero || isKnownNonZero(I, Q, Depth)); 2276 case Instruction::And: 2277 // A power of two and'd with anything is a power of two or zero. 2278 if (OrZero && 2279 (isKnownToBeAPowerOfTwo(I->getOperand(1), /*OrZero*/ true, Depth, Q) || 2280 isKnownToBeAPowerOfTwo(I->getOperand(0), /*OrZero*/ true, Depth, Q))) 2281 return true; 2282 // X & (-X) is always a power of two or zero. 2283 if (match(I->getOperand(0), m_Neg(m_Specific(I->getOperand(1)))) || 2284 match(I->getOperand(1), m_Neg(m_Specific(I->getOperand(0))))) 2285 return OrZero || isKnownNonZero(I->getOperand(0), Q, Depth); 2286 return false; 2287 case Instruction::Add: { 2288 // Adding a power-of-two or zero to the same power-of-two or zero yields 2289 // either the original power-of-two, a larger power-of-two or zero. 2290 const OverflowingBinaryOperator *VOBO = cast<OverflowingBinaryOperator>(V); 2291 if (OrZero || Q.IIQ.hasNoUnsignedWrap(VOBO) || 2292 Q.IIQ.hasNoSignedWrap(VOBO)) { 2293 if (match(I->getOperand(0), 2294 m_c_And(m_Specific(I->getOperand(1)), m_Value())) && 2295 isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q)) 2296 return true; 2297 if (match(I->getOperand(1), 2298 m_c_And(m_Specific(I->getOperand(0)), m_Value())) && 2299 isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q)) 2300 return true; 2301 2302 unsigned BitWidth = V->getType()->getScalarSizeInBits(); 2303 KnownBits LHSBits(BitWidth); 2304 computeKnownBits(I->getOperand(0), LHSBits, Depth, Q); 2305 2306 KnownBits RHSBits(BitWidth); 2307 computeKnownBits(I->getOperand(1), RHSBits, Depth, Q); 2308 // If i8 V is a power of two or zero: 2309 // ZeroBits: 1 1 1 0 1 1 1 1 2310 // ~ZeroBits: 0 0 0 1 0 0 0 0 2311 if ((~(LHSBits.Zero & RHSBits.Zero)).isPowerOf2()) 2312 // If OrZero isn't set, we cannot give back a zero result. 2313 // Make sure either the LHS or RHS has a bit set. 2314 if (OrZero || RHSBits.One.getBoolValue() || LHSBits.One.getBoolValue()) 2315 return true; 2316 } 2317 2318 // LShr(UINT_MAX, Y) + 1 is a power of two (if add is nuw) or zero. 2319 if (OrZero || Q.IIQ.hasNoUnsignedWrap(VOBO)) 2320 if (match(I, m_Add(m_LShr(m_AllOnes(), m_Value()), m_One()))) 2321 return true; 2322 return false; 2323 } 2324 case Instruction::Select: 2325 return isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q) && 2326 isKnownToBeAPowerOfTwo(I->getOperand(2), OrZero, Depth, Q); 2327 case Instruction::PHI: { 2328 // A PHI node is power of two if all incoming values are power of two, or if 2329 // it is an induction variable where in each step its value is a power of 2330 // two. 2331 auto *PN = cast<PHINode>(I); 2332 SimplifyQuery RecQ = Q.getWithoutCondContext(); 2333 2334 // Check if it is an induction variable and always power of two. 2335 if (isPowerOfTwoRecurrence(PN, OrZero, Depth, RecQ)) 2336 return true; 2337 2338 // Recursively check all incoming values. Limit recursion to 2 levels, so 2339 // that search complexity is limited to number of operands^2. 2340 unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1); 2341 return llvm::all_of(PN->operands(), [&](const Use &U) { 2342 // Value is power of 2 if it is coming from PHI node itself by induction. 2343 if (U.get() == PN) 2344 return true; 2345 2346 // Change the context instruction to the incoming block where it is 2347 // evaluated. 2348 RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator(); 2349 return isKnownToBeAPowerOfTwo(U.get(), OrZero, NewDepth, RecQ); 2350 }); 2351 } 2352 case Instruction::Invoke: 2353 case Instruction::Call: { 2354 if (auto *II = dyn_cast<IntrinsicInst>(I)) { 2355 switch (II->getIntrinsicID()) { 2356 case Intrinsic::umax: 2357 case Intrinsic::smax: 2358 case Intrinsic::umin: 2359 case Intrinsic::smin: 2360 return isKnownToBeAPowerOfTwo(II->getArgOperand(1), OrZero, Depth, Q) && 2361 isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Depth, Q); 2362 // bswap/bitreverse just move around bits, but don't change any 1s/0s 2363 // thus dont change pow2/non-pow2 status. 2364 case Intrinsic::bitreverse: 2365 case Intrinsic::bswap: 2366 return isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Depth, Q); 2367 case Intrinsic::fshr: 2368 case Intrinsic::fshl: 2369 // If Op0 == Op1, this is a rotate. is_pow2(rotate(x, y)) == is_pow2(x) 2370 if (II->getArgOperand(0) == II->getArgOperand(1)) 2371 return isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Depth, Q); 2372 break; 2373 default: 2374 break; 2375 } 2376 } 2377 return false; 2378 } 2379 default: 2380 return false; 2381 } 2382 } 2383 2384 /// Test whether a GEP's result is known to be non-null. 2385 /// 2386 /// Uses properties inherent in a GEP to try to determine whether it is known 2387 /// to be non-null. 2388 /// 2389 /// Currently this routine does not support vector GEPs. 2390 static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth, 2391 const SimplifyQuery &Q) { 2392 const Function *F = nullptr; 2393 if (const Instruction *I = dyn_cast<Instruction>(GEP)) 2394 F = I->getFunction(); 2395 2396 // If the gep is nuw or inbounds with invalid null pointer, then the GEP 2397 // may be null iff the base pointer is null and the offset is zero. 2398 if (!GEP->hasNoUnsignedWrap() && 2399 !(GEP->isInBounds() && 2400 !NullPointerIsDefined(F, GEP->getPointerAddressSpace()))) 2401 return false; 2402 2403 // FIXME: Support vector-GEPs. 2404 assert(GEP->getType()->isPointerTy() && "We only support plain pointer GEP"); 2405 2406 // If the base pointer is non-null, we cannot walk to a null address with an 2407 // inbounds GEP in address space zero. 2408 if (isKnownNonZero(GEP->getPointerOperand(), Q, Depth)) 2409 return true; 2410 2411 // Walk the GEP operands and see if any operand introduces a non-zero offset. 2412 // If so, then the GEP cannot produce a null pointer, as doing so would 2413 // inherently violate the inbounds contract within address space zero. 2414 for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); 2415 GTI != GTE; ++GTI) { 2416 // Struct types are easy -- they must always be indexed by a constant. 2417 if (StructType *STy = GTI.getStructTypeOrNull()) { 2418 ConstantInt *OpC = cast<ConstantInt>(GTI.getOperand()); 2419 unsigned ElementIdx = OpC->getZExtValue(); 2420 const StructLayout *SL = Q.DL.getStructLayout(STy); 2421 uint64_t ElementOffset = SL->getElementOffset(ElementIdx); 2422 if (ElementOffset > 0) 2423 return true; 2424 continue; 2425 } 2426 2427 // If we have a zero-sized type, the index doesn't matter. Keep looping. 2428 if (GTI.getSequentialElementStride(Q.DL).isZero()) 2429 continue; 2430 2431 // Fast path the constant operand case both for efficiency and so we don't 2432 // increment Depth when just zipping down an all-constant GEP. 2433 if (ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand())) { 2434 if (!OpC->isZero()) 2435 return true; 2436 continue; 2437 } 2438 2439 // We post-increment Depth here because while isKnownNonZero increments it 2440 // as well, when we pop back up that increment won't persist. We don't want 2441 // to recurse 10k times just because we have 10k GEP operands. We don't 2442 // bail completely out because we want to handle constant GEPs regardless 2443 // of depth. 2444 if (Depth++ >= MaxAnalysisRecursionDepth) 2445 continue; 2446 2447 if (isKnownNonZero(GTI.getOperand(), Q, Depth)) 2448 return true; 2449 } 2450 2451 return false; 2452 } 2453 2454 static bool isKnownNonNullFromDominatingCondition(const Value *V, 2455 const Instruction *CtxI, 2456 const DominatorTree *DT) { 2457 assert(!isa<Constant>(V) && "Called for constant?"); 2458 2459 if (!CtxI || !DT) 2460 return false; 2461 2462 unsigned NumUsesExplored = 0; 2463 for (const auto *U : V->users()) { 2464 // Avoid massive lists 2465 if (NumUsesExplored >= DomConditionsMaxUses) 2466 break; 2467 NumUsesExplored++; 2468 2469 // If the value is used as an argument to a call or invoke, then argument 2470 // attributes may provide an answer about null-ness. 2471 if (const auto *CB = dyn_cast<CallBase>(U)) 2472 if (auto *CalledFunc = CB->getCalledFunction()) 2473 for (const Argument &Arg : CalledFunc->args()) 2474 if (CB->getArgOperand(Arg.getArgNo()) == V && 2475 Arg.hasNonNullAttr(/* AllowUndefOrPoison */ false) && 2476 DT->dominates(CB, CtxI)) 2477 return true; 2478 2479 // If the value is used as a load/store, then the pointer must be non null. 2480 if (V == getLoadStorePointerOperand(U)) { 2481 const Instruction *I = cast<Instruction>(U); 2482 if (!NullPointerIsDefined(I->getFunction(), 2483 V->getType()->getPointerAddressSpace()) && 2484 DT->dominates(I, CtxI)) 2485 return true; 2486 } 2487 2488 if ((match(U, m_IDiv(m_Value(), m_Specific(V))) || 2489 match(U, m_IRem(m_Value(), m_Specific(V)))) && 2490 isValidAssumeForContext(cast<Instruction>(U), CtxI, DT)) 2491 return true; 2492 2493 // Consider only compare instructions uniquely controlling a branch 2494 Value *RHS; 2495 CmpInst::Predicate Pred; 2496 if (!match(U, m_c_ICmp(Pred, m_Specific(V), m_Value(RHS)))) 2497 continue; 2498 2499 bool NonNullIfTrue; 2500 if (cmpExcludesZero(Pred, RHS)) 2501 NonNullIfTrue = true; 2502 else if (cmpExcludesZero(CmpInst::getInversePredicate(Pred), RHS)) 2503 NonNullIfTrue = false; 2504 else 2505 continue; 2506 2507 SmallVector<const User *, 4> WorkList; 2508 SmallPtrSet<const User *, 4> Visited; 2509 for (const auto *CmpU : U->users()) { 2510 assert(WorkList.empty() && "Should be!"); 2511 if (Visited.insert(CmpU).second) 2512 WorkList.push_back(CmpU); 2513 2514 while (!WorkList.empty()) { 2515 auto *Curr = WorkList.pop_back_val(); 2516 2517 // If a user is an AND, add all its users to the work list. We only 2518 // propagate "pred != null" condition through AND because it is only 2519 // correct to assume that all conditions of AND are met in true branch. 2520 // TODO: Support similar logic of OR and EQ predicate? 2521 if (NonNullIfTrue) 2522 if (match(Curr, m_LogicalAnd(m_Value(), m_Value()))) { 2523 for (const auto *CurrU : Curr->users()) 2524 if (Visited.insert(CurrU).second) 2525 WorkList.push_back(CurrU); 2526 continue; 2527 } 2528 2529 if (const BranchInst *BI = dyn_cast<BranchInst>(Curr)) { 2530 assert(BI->isConditional() && "uses a comparison!"); 2531 2532 BasicBlock *NonNullSuccessor = 2533 BI->getSuccessor(NonNullIfTrue ? 0 : 1); 2534 BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor); 2535 if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent())) 2536 return true; 2537 } else if (NonNullIfTrue && isGuard(Curr) && 2538 DT->dominates(cast<Instruction>(Curr), CtxI)) { 2539 return true; 2540 } 2541 } 2542 } 2543 } 2544 2545 return false; 2546 } 2547 2548 /// Does the 'Range' metadata (which must be a valid MD_range operand list) 2549 /// ensure that the value it's attached to is never Value? 'RangeType' is 2550 /// is the type of the value described by the range. 2551 static bool rangeMetadataExcludesValue(const MDNode* Ranges, const APInt& Value) { 2552 const unsigned NumRanges = Ranges->getNumOperands() / 2; 2553 assert(NumRanges >= 1); 2554 for (unsigned i = 0; i < NumRanges; ++i) { 2555 ConstantInt *Lower = 2556 mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 0)); 2557 ConstantInt *Upper = 2558 mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 1)); 2559 ConstantRange Range(Lower->getValue(), Upper->getValue()); 2560 if (Range.contains(Value)) 2561 return false; 2562 } 2563 return true; 2564 } 2565 2566 /// Try to detect a recurrence that monotonically increases/decreases from a 2567 /// non-zero starting value. These are common as induction variables. 2568 static bool isNonZeroRecurrence(const PHINode *PN) { 2569 BinaryOperator *BO = nullptr; 2570 Value *Start = nullptr, *Step = nullptr; 2571 const APInt *StartC, *StepC; 2572 if (!matchSimpleRecurrence(PN, BO, Start, Step) || 2573 !match(Start, m_APInt(StartC)) || StartC->isZero()) 2574 return false; 2575 2576 switch (BO->getOpcode()) { 2577 case Instruction::Add: 2578 // Starting from non-zero and stepping away from zero can never wrap back 2579 // to zero. 2580 return BO->hasNoUnsignedWrap() || 2581 (BO->hasNoSignedWrap() && match(Step, m_APInt(StepC)) && 2582 StartC->isNegative() == StepC->isNegative()); 2583 case Instruction::Mul: 2584 return (BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap()) && 2585 match(Step, m_APInt(StepC)) && !StepC->isZero(); 2586 case Instruction::Shl: 2587 return BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap(); 2588 case Instruction::AShr: 2589 case Instruction::LShr: 2590 return BO->isExact(); 2591 default: 2592 return false; 2593 } 2594 } 2595 2596 static bool matchOpWithOpEqZero(Value *Op0, Value *Op1) { 2597 ICmpInst::Predicate Pred; 2598 return (match(Op0, m_ZExtOrSExt(m_ICmp(Pred, m_Specific(Op1), m_Zero()))) || 2599 match(Op1, m_ZExtOrSExt(m_ICmp(Pred, m_Specific(Op0), m_Zero())))) && 2600 Pred == ICmpInst::ICMP_EQ; 2601 } 2602 2603 static bool isNonZeroAdd(const APInt &DemandedElts, unsigned Depth, 2604 const SimplifyQuery &Q, unsigned BitWidth, Value *X, 2605 Value *Y, bool NSW, bool NUW) { 2606 // (X + (X != 0)) is non zero 2607 if (matchOpWithOpEqZero(X, Y)) 2608 return true; 2609 2610 if (NUW) 2611 return isKnownNonZero(Y, DemandedElts, Q, Depth) || 2612 isKnownNonZero(X, DemandedElts, Q, Depth); 2613 2614 KnownBits XKnown = computeKnownBits(X, DemandedElts, Depth, Q); 2615 KnownBits YKnown = computeKnownBits(Y, DemandedElts, Depth, Q); 2616 2617 // If X and Y are both non-negative (as signed values) then their sum is not 2618 // zero unless both X and Y are zero. 2619 if (XKnown.isNonNegative() && YKnown.isNonNegative()) 2620 if (isKnownNonZero(Y, DemandedElts, Q, Depth) || 2621 isKnownNonZero(X, DemandedElts, Q, Depth)) 2622 return true; 2623 2624 // If X and Y are both negative (as signed values) then their sum is not 2625 // zero unless both X and Y equal INT_MIN. 2626 if (XKnown.isNegative() && YKnown.isNegative()) { 2627 APInt Mask = APInt::getSignedMaxValue(BitWidth); 2628 // The sign bit of X is set. If some other bit is set then X is not equal 2629 // to INT_MIN. 2630 if (XKnown.One.intersects(Mask)) 2631 return true; 2632 // The sign bit of Y is set. If some other bit is set then Y is not equal 2633 // to INT_MIN. 2634 if (YKnown.One.intersects(Mask)) 2635 return true; 2636 } 2637 2638 // The sum of a non-negative number and a power of two is not zero. 2639 if (XKnown.isNonNegative() && 2640 isKnownToBeAPowerOfTwo(Y, /*OrZero*/ false, Depth, Q)) 2641 return true; 2642 if (YKnown.isNonNegative() && 2643 isKnownToBeAPowerOfTwo(X, /*OrZero*/ false, Depth, Q)) 2644 return true; 2645 2646 return KnownBits::computeForAddSub(/*Add=*/true, NSW, NUW, XKnown, YKnown) 2647 .isNonZero(); 2648 } 2649 2650 static bool isNonZeroSub(const APInt &DemandedElts, unsigned Depth, 2651 const SimplifyQuery &Q, unsigned BitWidth, Value *X, 2652 Value *Y) { 2653 // (X - (X != 0)) is non zero 2654 // ((X != 0) - X) is non zero 2655 if (matchOpWithOpEqZero(X, Y)) 2656 return true; 2657 2658 // TODO: Move this case into isKnownNonEqual(). 2659 if (auto *C = dyn_cast<Constant>(X)) 2660 if (C->isNullValue() && isKnownNonZero(Y, DemandedElts, Q, Depth)) 2661 return true; 2662 2663 return ::isKnownNonEqual(X, Y, DemandedElts, Depth, Q); 2664 } 2665 2666 static bool isNonZeroMul(const APInt &DemandedElts, unsigned Depth, 2667 const SimplifyQuery &Q, unsigned BitWidth, Value *X, 2668 Value *Y, bool NSW, bool NUW) { 2669 // If X and Y are non-zero then so is X * Y as long as the multiplication 2670 // does not overflow. 2671 if (NSW || NUW) 2672 return isKnownNonZero(X, DemandedElts, Q, Depth) && 2673 isKnownNonZero(Y, DemandedElts, Q, Depth); 2674 2675 // If either X or Y is odd, then if the other is non-zero the result can't 2676 // be zero. 2677 KnownBits XKnown = computeKnownBits(X, DemandedElts, Depth, Q); 2678 if (XKnown.One[0]) 2679 return isKnownNonZero(Y, DemandedElts, Q, Depth); 2680 2681 KnownBits YKnown = computeKnownBits(Y, DemandedElts, Depth, Q); 2682 if (YKnown.One[0]) 2683 return XKnown.isNonZero() || isKnownNonZero(X, DemandedElts, Q, Depth); 2684 2685 // If there exists any subset of X (sX) and subset of Y (sY) s.t sX * sY is 2686 // non-zero, then X * Y is non-zero. We can find sX and sY by just taking 2687 // the lowest known One of X and Y. If they are non-zero, the result 2688 // must be non-zero. We can check if LSB(X) * LSB(Y) != 0 by doing 2689 // X.CountLeadingZeros + Y.CountLeadingZeros < BitWidth. 2690 return (XKnown.countMaxTrailingZeros() + YKnown.countMaxTrailingZeros()) < 2691 BitWidth; 2692 } 2693 2694 static bool isNonZeroShift(const Operator *I, const APInt &DemandedElts, 2695 unsigned Depth, const SimplifyQuery &Q, 2696 const KnownBits &KnownVal) { 2697 auto ShiftOp = [&](const APInt &Lhs, const APInt &Rhs) { 2698 switch (I->getOpcode()) { 2699 case Instruction::Shl: 2700 return Lhs.shl(Rhs); 2701 case Instruction::LShr: 2702 return Lhs.lshr(Rhs); 2703 case Instruction::AShr: 2704 return Lhs.ashr(Rhs); 2705 default: 2706 llvm_unreachable("Unknown Shift Opcode"); 2707 } 2708 }; 2709 2710 auto InvShiftOp = [&](const APInt &Lhs, const APInt &Rhs) { 2711 switch (I->getOpcode()) { 2712 case Instruction::Shl: 2713 return Lhs.lshr(Rhs); 2714 case Instruction::LShr: 2715 case Instruction::AShr: 2716 return Lhs.shl(Rhs); 2717 default: 2718 llvm_unreachable("Unknown Shift Opcode"); 2719 } 2720 }; 2721 2722 if (KnownVal.isUnknown()) 2723 return false; 2724 2725 KnownBits KnownCnt = 2726 computeKnownBits(I->getOperand(1), DemandedElts, Depth, Q); 2727 APInt MaxShift = KnownCnt.getMaxValue(); 2728 unsigned NumBits = KnownVal.getBitWidth(); 2729 if (MaxShift.uge(NumBits)) 2730 return false; 2731 2732 if (!ShiftOp(KnownVal.One, MaxShift).isZero()) 2733 return true; 2734 2735 // If all of the bits shifted out are known to be zero, and Val is known 2736 // non-zero then at least one non-zero bit must remain. 2737 if (InvShiftOp(KnownVal.Zero, NumBits - MaxShift) 2738 .eq(InvShiftOp(APInt::getAllOnes(NumBits), NumBits - MaxShift)) && 2739 isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth)) 2740 return true; 2741 2742 return false; 2743 } 2744 2745 static bool isKnownNonZeroFromOperator(const Operator *I, 2746 const APInt &DemandedElts, 2747 unsigned Depth, const SimplifyQuery &Q) { 2748 unsigned BitWidth = getBitWidth(I->getType()->getScalarType(), Q.DL); 2749 switch (I->getOpcode()) { 2750 case Instruction::Alloca: 2751 // Alloca never returns null, malloc might. 2752 return I->getType()->getPointerAddressSpace() == 0; 2753 case Instruction::GetElementPtr: 2754 if (I->getType()->isPointerTy()) 2755 return isGEPKnownNonNull(cast<GEPOperator>(I), Depth, Q); 2756 break; 2757 case Instruction::BitCast: { 2758 // We need to be a bit careful here. We can only peek through the bitcast 2759 // if the scalar size of elements in the operand are smaller than and a 2760 // multiple of the size they are casting too. Take three cases: 2761 // 2762 // 1) Unsafe: 2763 // bitcast <2 x i16> %NonZero to <4 x i8> 2764 // 2765 // %NonZero can have 2 non-zero i16 elements, but isKnownNonZero on a 2766 // <4 x i8> requires that all 4 i8 elements be non-zero which isn't 2767 // guranteed (imagine just sign bit set in the 2 i16 elements). 2768 // 2769 // 2) Unsafe: 2770 // bitcast <4 x i3> %NonZero to <3 x i4> 2771 // 2772 // Even though the scalar size of the src (`i3`) is smaller than the 2773 // scalar size of the dst `i4`, because `i3` is not a multiple of `i4` 2774 // its possible for the `3 x i4` elements to be zero because there are 2775 // some elements in the destination that don't contain any full src 2776 // element. 2777 // 2778 // 3) Safe: 2779 // bitcast <4 x i8> %NonZero to <2 x i16> 2780 // 2781 // This is always safe as non-zero in the 4 i8 elements implies 2782 // non-zero in the combination of any two adjacent ones. Since i8 is a 2783 // multiple of i16, each i16 is guranteed to have 2 full i8 elements. 2784 // This all implies the 2 i16 elements are non-zero. 2785 Type *FromTy = I->getOperand(0)->getType(); 2786 if ((FromTy->isIntOrIntVectorTy() || FromTy->isPtrOrPtrVectorTy()) && 2787 (BitWidth % getBitWidth(FromTy->getScalarType(), Q.DL)) == 0) 2788 return isKnownNonZero(I->getOperand(0), Q, Depth); 2789 } break; 2790 case Instruction::IntToPtr: 2791 // Note that we have to take special care to avoid looking through 2792 // truncating casts, e.g., int2ptr/ptr2int with appropriate sizes, as well 2793 // as casts that can alter the value, e.g., AddrSpaceCasts. 2794 if (!isa<ScalableVectorType>(I->getType()) && 2795 Q.DL.getTypeSizeInBits(I->getOperand(0)->getType()).getFixedValue() <= 2796 Q.DL.getTypeSizeInBits(I->getType()).getFixedValue()) 2797 return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth); 2798 break; 2799 case Instruction::PtrToInt: 2800 // Similar to int2ptr above, we can look through ptr2int here if the cast 2801 // is a no-op or an extend and not a truncate. 2802 if (!isa<ScalableVectorType>(I->getType()) && 2803 Q.DL.getTypeSizeInBits(I->getOperand(0)->getType()).getFixedValue() <= 2804 Q.DL.getTypeSizeInBits(I->getType()).getFixedValue()) 2805 return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth); 2806 break; 2807 case Instruction::Trunc: 2808 // nuw/nsw trunc preserves zero/non-zero status of input. 2809 if (auto *TI = dyn_cast<TruncInst>(I)) 2810 if (TI->hasNoSignedWrap() || TI->hasNoUnsignedWrap()) 2811 return isKnownNonZero(TI->getOperand(0), DemandedElts, Q, Depth); 2812 break; 2813 2814 case Instruction::Sub: 2815 return isNonZeroSub(DemandedElts, Depth, Q, BitWidth, I->getOperand(0), 2816 I->getOperand(1)); 2817 case Instruction::Xor: 2818 // (X ^ (X != 0)) is non zero 2819 if (matchOpWithOpEqZero(I->getOperand(0), I->getOperand(1))) 2820 return true; 2821 break; 2822 case Instruction::Or: 2823 // (X | (X != 0)) is non zero 2824 if (matchOpWithOpEqZero(I->getOperand(0), I->getOperand(1))) 2825 return true; 2826 // X | Y != 0 if X != 0 or Y != 0. 2827 return isKnownNonZero(I->getOperand(1), DemandedElts, Q, Depth) || 2828 isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth); 2829 case Instruction::SExt: 2830 case Instruction::ZExt: 2831 // ext X != 0 if X != 0. 2832 return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth); 2833 2834 case Instruction::Shl: { 2835 // shl nsw/nuw can't remove any non-zero bits. 2836 const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(I); 2837 if (Q.IIQ.hasNoUnsignedWrap(BO) || Q.IIQ.hasNoSignedWrap(BO)) 2838 return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth); 2839 2840 // shl X, Y != 0 if X is odd. Note that the value of the shift is undefined 2841 // if the lowest bit is shifted off the end. 2842 KnownBits Known(BitWidth); 2843 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth, Q); 2844 if (Known.One[0]) 2845 return true; 2846 2847 return isNonZeroShift(I, DemandedElts, Depth, Q, Known); 2848 } 2849 case Instruction::LShr: 2850 case Instruction::AShr: { 2851 // shr exact can only shift out zero bits. 2852 const PossiblyExactOperator *BO = cast<PossiblyExactOperator>(I); 2853 if (BO->isExact()) 2854 return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth); 2855 2856 // shr X, Y != 0 if X is negative. Note that the value of the shift is not 2857 // defined if the sign bit is shifted off the end. 2858 KnownBits Known = 2859 computeKnownBits(I->getOperand(0), DemandedElts, Depth, Q); 2860 if (Known.isNegative()) 2861 return true; 2862 2863 return isNonZeroShift(I, DemandedElts, Depth, Q, Known); 2864 } 2865 case Instruction::UDiv: 2866 case Instruction::SDiv: { 2867 // X / Y 2868 // div exact can only produce a zero if the dividend is zero. 2869 if (cast<PossiblyExactOperator>(I)->isExact()) 2870 return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth); 2871 2872 KnownBits XKnown = 2873 computeKnownBits(I->getOperand(0), DemandedElts, Depth, Q); 2874 // If X is fully unknown we won't be able to figure anything out so don't 2875 // both computing knownbits for Y. 2876 if (XKnown.isUnknown()) 2877 return false; 2878 2879 KnownBits YKnown = 2880 computeKnownBits(I->getOperand(1), DemandedElts, Depth, Q); 2881 if (I->getOpcode() == Instruction::SDiv) { 2882 // For signed division need to compare abs value of the operands. 2883 XKnown = XKnown.abs(/*IntMinIsPoison*/ false); 2884 YKnown = YKnown.abs(/*IntMinIsPoison*/ false); 2885 } 2886 // If X u>= Y then div is non zero (0/0 is UB). 2887 std::optional<bool> XUgeY = KnownBits::uge(XKnown, YKnown); 2888 // If X is total unknown or X u< Y we won't be able to prove non-zero 2889 // with compute known bits so just return early. 2890 return XUgeY && *XUgeY; 2891 } 2892 case Instruction::Add: { 2893 // X + Y. 2894 2895 // If Add has nuw wrap flag, then if either X or Y is non-zero the result is 2896 // non-zero. 2897 auto *BO = cast<OverflowingBinaryOperator>(I); 2898 return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth, I->getOperand(0), 2899 I->getOperand(1), Q.IIQ.hasNoSignedWrap(BO), 2900 Q.IIQ.hasNoUnsignedWrap(BO)); 2901 } 2902 case Instruction::Mul: { 2903 const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(I); 2904 return isNonZeroMul(DemandedElts, Depth, Q, BitWidth, I->getOperand(0), 2905 I->getOperand(1), Q.IIQ.hasNoSignedWrap(BO), 2906 Q.IIQ.hasNoUnsignedWrap(BO)); 2907 } 2908 case Instruction::Select: { 2909 // (C ? X : Y) != 0 if X != 0 and Y != 0. 2910 2911 // First check if the arm is non-zero using `isKnownNonZero`. If that fails, 2912 // then see if the select condition implies the arm is non-zero. For example 2913 // (X != 0 ? X : Y), we know the true arm is non-zero as the `X` "return" is 2914 // dominated by `X != 0`. 2915 auto SelectArmIsNonZero = [&](bool IsTrueArm) { 2916 Value *Op; 2917 Op = IsTrueArm ? I->getOperand(1) : I->getOperand(2); 2918 // Op is trivially non-zero. 2919 if (isKnownNonZero(Op, DemandedElts, Q, Depth)) 2920 return true; 2921 2922 // The condition of the select dominates the true/false arm. Check if the 2923 // condition implies that a given arm is non-zero. 2924 Value *X; 2925 CmpInst::Predicate Pred; 2926 if (!match(I->getOperand(0), m_c_ICmp(Pred, m_Specific(Op), m_Value(X)))) 2927 return false; 2928 2929 if (!IsTrueArm) 2930 Pred = ICmpInst::getInversePredicate(Pred); 2931 2932 return cmpExcludesZero(Pred, X); 2933 }; 2934 2935 if (SelectArmIsNonZero(/* IsTrueArm */ true) && 2936 SelectArmIsNonZero(/* IsTrueArm */ false)) 2937 return true; 2938 break; 2939 } 2940 case Instruction::PHI: { 2941 auto *PN = cast<PHINode>(I); 2942 if (Q.IIQ.UseInstrInfo && isNonZeroRecurrence(PN)) 2943 return true; 2944 2945 // Check if all incoming values are non-zero using recursion. 2946 SimplifyQuery RecQ = Q.getWithoutCondContext(); 2947 unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1); 2948 return llvm::all_of(PN->operands(), [&](const Use &U) { 2949 if (U.get() == PN) 2950 return true; 2951 RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator(); 2952 // Check if the branch on the phi excludes zero. 2953 ICmpInst::Predicate Pred; 2954 Value *X; 2955 BasicBlock *TrueSucc, *FalseSucc; 2956 if (match(RecQ.CxtI, 2957 m_Br(m_c_ICmp(Pred, m_Specific(U.get()), m_Value(X)), 2958 m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) { 2959 // Check for cases of duplicate successors. 2960 if ((TrueSucc == PN->getParent()) != (FalseSucc == PN->getParent())) { 2961 // If we're using the false successor, invert the predicate. 2962 if (FalseSucc == PN->getParent()) 2963 Pred = CmpInst::getInversePredicate(Pred); 2964 if (cmpExcludesZero(Pred, X)) 2965 return true; 2966 } 2967 } 2968 // Finally recurse on the edge and check it directly. 2969 return isKnownNonZero(U.get(), DemandedElts, RecQ, NewDepth); 2970 }); 2971 } 2972 case Instruction::InsertElement: { 2973 if (isa<ScalableVectorType>(I->getType())) 2974 break; 2975 2976 const Value *Vec = I->getOperand(0); 2977 const Value *Elt = I->getOperand(1); 2978 auto *CIdx = dyn_cast<ConstantInt>(I->getOperand(2)); 2979 2980 unsigned NumElts = DemandedElts.getBitWidth(); 2981 APInt DemandedVecElts = DemandedElts; 2982 bool SkipElt = false; 2983 // If we know the index we are inserting too, clear it from Vec check. 2984 if (CIdx && CIdx->getValue().ult(NumElts)) { 2985 DemandedVecElts.clearBit(CIdx->getZExtValue()); 2986 SkipElt = !DemandedElts[CIdx->getZExtValue()]; 2987 } 2988 2989 // Result is zero if Elt is non-zero and rest of the demanded elts in Vec 2990 // are non-zero. 2991 return (SkipElt || isKnownNonZero(Elt, Q, Depth)) && 2992 (DemandedVecElts.isZero() || 2993 isKnownNonZero(Vec, DemandedVecElts, Q, Depth)); 2994 } 2995 case Instruction::ExtractElement: 2996 if (const auto *EEI = dyn_cast<ExtractElementInst>(I)) { 2997 const Value *Vec = EEI->getVectorOperand(); 2998 const Value *Idx = EEI->getIndexOperand(); 2999 auto *CIdx = dyn_cast<ConstantInt>(Idx); 3000 if (auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType())) { 3001 unsigned NumElts = VecTy->getNumElements(); 3002 APInt DemandedVecElts = APInt::getAllOnes(NumElts); 3003 if (CIdx && CIdx->getValue().ult(NumElts)) 3004 DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue()); 3005 return isKnownNonZero(Vec, DemandedVecElts, Q, Depth); 3006 } 3007 } 3008 break; 3009 case Instruction::ShuffleVector: { 3010 auto *Shuf = dyn_cast<ShuffleVectorInst>(I); 3011 if (!Shuf) 3012 break; 3013 APInt DemandedLHS, DemandedRHS; 3014 // For undef elements, we don't know anything about the common state of 3015 // the shuffle result. 3016 if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS)) 3017 break; 3018 // If demanded elements for both vecs are non-zero, the shuffle is non-zero. 3019 return (DemandedRHS.isZero() || 3020 isKnownNonZero(Shuf->getOperand(1), DemandedRHS, Q, Depth)) && 3021 (DemandedLHS.isZero() || 3022 isKnownNonZero(Shuf->getOperand(0), DemandedLHS, Q, Depth)); 3023 } 3024 case Instruction::Freeze: 3025 return isKnownNonZero(I->getOperand(0), Q, Depth) && 3026 isGuaranteedNotToBePoison(I->getOperand(0), Q.AC, Q.CxtI, Q.DT, 3027 Depth); 3028 case Instruction::Load: { 3029 auto *LI = cast<LoadInst>(I); 3030 // A Load tagged with nonnull or dereferenceable with null pointer undefined 3031 // is never null. 3032 if (auto *PtrT = dyn_cast<PointerType>(I->getType())) { 3033 if (Q.IIQ.getMetadata(LI, LLVMContext::MD_nonnull) || 3034 (Q.IIQ.getMetadata(LI, LLVMContext::MD_dereferenceable) && 3035 !NullPointerIsDefined(LI->getFunction(), PtrT->getAddressSpace()))) 3036 return true; 3037 } else if (MDNode *Ranges = Q.IIQ.getMetadata(LI, LLVMContext::MD_range)) { 3038 return rangeMetadataExcludesValue(Ranges, APInt::getZero(BitWidth)); 3039 } 3040 3041 // No need to fall through to computeKnownBits as range metadata is already 3042 // handled in isKnownNonZero. 3043 return false; 3044 } 3045 case Instruction::ExtractValue: { 3046 const WithOverflowInst *WO; 3047 if (match(I, m_ExtractValue<0>(m_WithOverflowInst(WO)))) { 3048 switch (WO->getBinaryOp()) { 3049 default: 3050 break; 3051 case Instruction::Add: 3052 return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth, 3053 WO->getArgOperand(0), WO->getArgOperand(1), 3054 /*NSW=*/false, 3055 /*NUW=*/false); 3056 case Instruction::Sub: 3057 return isNonZeroSub(DemandedElts, Depth, Q, BitWidth, 3058 WO->getArgOperand(0), WO->getArgOperand(1)); 3059 case Instruction::Mul: 3060 return isNonZeroMul(DemandedElts, Depth, Q, BitWidth, 3061 WO->getArgOperand(0), WO->getArgOperand(1), 3062 /*NSW=*/false, /*NUW=*/false); 3063 break; 3064 } 3065 } 3066 break; 3067 } 3068 case Instruction::Call: 3069 case Instruction::Invoke: { 3070 const auto *Call = cast<CallBase>(I); 3071 if (I->getType()->isPointerTy()) { 3072 if (Call->isReturnNonNull()) 3073 return true; 3074 if (const auto *RP = getArgumentAliasingToReturnedPointer(Call, true)) 3075 return isKnownNonZero(RP, Q, Depth); 3076 } else { 3077 if (MDNode *Ranges = Q.IIQ.getMetadata(Call, LLVMContext::MD_range)) 3078 return rangeMetadataExcludesValue(Ranges, APInt::getZero(BitWidth)); 3079 if (std::optional<ConstantRange> Range = Call->getRange()) { 3080 const APInt ZeroValue(Range->getBitWidth(), 0); 3081 if (!Range->contains(ZeroValue)) 3082 return true; 3083 } 3084 if (const Value *RV = Call->getReturnedArgOperand()) 3085 if (RV->getType() == I->getType() && isKnownNonZero(RV, Q, Depth)) 3086 return true; 3087 } 3088 3089 if (auto *II = dyn_cast<IntrinsicInst>(I)) { 3090 switch (II->getIntrinsicID()) { 3091 case Intrinsic::sshl_sat: 3092 case Intrinsic::ushl_sat: 3093 case Intrinsic::abs: 3094 case Intrinsic::bitreverse: 3095 case Intrinsic::bswap: 3096 case Intrinsic::ctpop: 3097 return isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth); 3098 // NB: We don't do usub_sat here as in any case we can prove its 3099 // non-zero, we will fold it to `sub nuw` in InstCombine. 3100 case Intrinsic::ssub_sat: 3101 return isNonZeroSub(DemandedElts, Depth, Q, BitWidth, 3102 II->getArgOperand(0), II->getArgOperand(1)); 3103 case Intrinsic::sadd_sat: 3104 return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth, 3105 II->getArgOperand(0), II->getArgOperand(1), 3106 /*NSW=*/true, /* NUW=*/false); 3107 // Vec reverse preserves zero/non-zero status from input vec. 3108 case Intrinsic::vector_reverse: 3109 return isKnownNonZero(II->getArgOperand(0), DemandedElts.reverseBits(), 3110 Q, Depth); 3111 // umin/smin/smax/smin/or of all non-zero elements is always non-zero. 3112 case Intrinsic::vector_reduce_or: 3113 case Intrinsic::vector_reduce_umax: 3114 case Intrinsic::vector_reduce_umin: 3115 case Intrinsic::vector_reduce_smax: 3116 case Intrinsic::vector_reduce_smin: 3117 return isKnownNonZero(II->getArgOperand(0), Q, Depth); 3118 case Intrinsic::umax: 3119 case Intrinsic::uadd_sat: 3120 // umax(X, (X != 0)) is non zero 3121 // X +usat (X != 0) is non zero 3122 if (matchOpWithOpEqZero(II->getArgOperand(0), II->getArgOperand(1))) 3123 return true; 3124 3125 return isKnownNonZero(II->getArgOperand(1), DemandedElts, Q, Depth) || 3126 isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth); 3127 case Intrinsic::smax: { 3128 // If either arg is strictly positive the result is non-zero. Otherwise 3129 // the result is non-zero if both ops are non-zero. 3130 auto IsNonZero = [&](Value *Op, std::optional<bool> &OpNonZero, 3131 const KnownBits &OpKnown) { 3132 if (!OpNonZero.has_value()) 3133 OpNonZero = OpKnown.isNonZero() || 3134 isKnownNonZero(Op, DemandedElts, Q, Depth); 3135 return *OpNonZero; 3136 }; 3137 // Avoid re-computing isKnownNonZero. 3138 std::optional<bool> Op0NonZero, Op1NonZero; 3139 KnownBits Op1Known = 3140 computeKnownBits(II->getArgOperand(1), DemandedElts, Depth, Q); 3141 if (Op1Known.isNonNegative() && 3142 IsNonZero(II->getArgOperand(1), Op1NonZero, Op1Known)) 3143 return true; 3144 KnownBits Op0Known = 3145 computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q); 3146 if (Op0Known.isNonNegative() && 3147 IsNonZero(II->getArgOperand(0), Op0NonZero, Op0Known)) 3148 return true; 3149 return IsNonZero(II->getArgOperand(1), Op1NonZero, Op1Known) && 3150 IsNonZero(II->getArgOperand(0), Op0NonZero, Op0Known); 3151 } 3152 case Intrinsic::smin: { 3153 // If either arg is negative the result is non-zero. Otherwise 3154 // the result is non-zero if both ops are non-zero. 3155 KnownBits Op1Known = 3156 computeKnownBits(II->getArgOperand(1), DemandedElts, Depth, Q); 3157 if (Op1Known.isNegative()) 3158 return true; 3159 KnownBits Op0Known = 3160 computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q); 3161 if (Op0Known.isNegative()) 3162 return true; 3163 3164 if (Op1Known.isNonZero() && Op0Known.isNonZero()) 3165 return true; 3166 } 3167 [[fallthrough]]; 3168 case Intrinsic::umin: 3169 return isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth) && 3170 isKnownNonZero(II->getArgOperand(1), DemandedElts, Q, Depth); 3171 case Intrinsic::cttz: 3172 return computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q) 3173 .Zero[0]; 3174 case Intrinsic::ctlz: 3175 return computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q) 3176 .isNonNegative(); 3177 case Intrinsic::fshr: 3178 case Intrinsic::fshl: 3179 // If Op0 == Op1, this is a rotate. rotate(x, y) != 0 iff x != 0. 3180 if (II->getArgOperand(0) == II->getArgOperand(1)) 3181 return isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth); 3182 break; 3183 case Intrinsic::vscale: 3184 return true; 3185 case Intrinsic::experimental_get_vector_length: 3186 return isKnownNonZero(I->getOperand(0), Q, Depth); 3187 default: 3188 break; 3189 } 3190 break; 3191 } 3192 3193 return false; 3194 } 3195 } 3196 3197 KnownBits Known(BitWidth); 3198 computeKnownBits(I, DemandedElts, Known, Depth, Q); 3199 return Known.One != 0; 3200 } 3201 3202 /// Return true if the given value is known to be non-zero when defined. For 3203 /// vectors, return true if every demanded element is known to be non-zero when 3204 /// defined. For pointers, if the context instruction and dominator tree are 3205 /// specified, perform context-sensitive analysis and return true if the 3206 /// pointer couldn't possibly be null at the specified instruction. 3207 /// Supports values with integer or pointer type and vectors of integers. 3208 bool isKnownNonZero(const Value *V, const APInt &DemandedElts, 3209 const SimplifyQuery &Q, unsigned Depth) { 3210 Type *Ty = V->getType(); 3211 3212 #ifndef NDEBUG 3213 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); 3214 3215 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) { 3216 assert( 3217 FVTy->getNumElements() == DemandedElts.getBitWidth() && 3218 "DemandedElt width should equal the fixed vector number of elements"); 3219 } else { 3220 assert(DemandedElts == APInt(1, 1) && 3221 "DemandedElt width should be 1 for scalars"); 3222 } 3223 #endif 3224 3225 if (auto *C = dyn_cast<Constant>(V)) { 3226 if (C->isNullValue()) 3227 return false; 3228 if (isa<ConstantInt>(C)) 3229 // Must be non-zero due to null test above. 3230 return true; 3231 3232 // For constant vectors, check that all elements are poison or known 3233 // non-zero to determine that the whole vector is known non-zero. 3234 if (auto *VecTy = dyn_cast<FixedVectorType>(Ty)) { 3235 for (unsigned i = 0, e = VecTy->getNumElements(); i != e; ++i) { 3236 if (!DemandedElts[i]) 3237 continue; 3238 Constant *Elt = C->getAggregateElement(i); 3239 if (!Elt || Elt->isNullValue()) 3240 return false; 3241 if (!isa<PoisonValue>(Elt) && !isa<ConstantInt>(Elt)) 3242 return false; 3243 } 3244 return true; 3245 } 3246 3247 // Constant ptrauth can be null, iff the base pointer can be. 3248 if (auto *CPA = dyn_cast<ConstantPtrAuth>(V)) 3249 return isKnownNonZero(CPA->getPointer(), DemandedElts, Q, Depth); 3250 3251 // A global variable in address space 0 is non null unless extern weak 3252 // or an absolute symbol reference. Other address spaces may have null as a 3253 // valid address for a global, so we can't assume anything. 3254 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 3255 if (!GV->isAbsoluteSymbolRef() && !GV->hasExternalWeakLinkage() && 3256 GV->getType()->getAddressSpace() == 0) 3257 return true; 3258 } 3259 3260 // For constant expressions, fall through to the Operator code below. 3261 if (!isa<ConstantExpr>(V)) 3262 return false; 3263 } 3264 3265 if (const auto *A = dyn_cast<Argument>(V)) 3266 if (std::optional<ConstantRange> Range = A->getRange()) { 3267 const APInt ZeroValue(Range->getBitWidth(), 0); 3268 if (!Range->contains(ZeroValue)) 3269 return true; 3270 } 3271 3272 if (!isa<Constant>(V) && isKnownNonZeroFromAssume(V, Q)) 3273 return true; 3274 3275 // Some of the tests below are recursive, so bail out if we hit the limit. 3276 if (Depth++ >= MaxAnalysisRecursionDepth) 3277 return false; 3278 3279 // Check for pointer simplifications. 3280 3281 if (PointerType *PtrTy = dyn_cast<PointerType>(Ty)) { 3282 // A byval, inalloca may not be null in a non-default addres space. A 3283 // nonnull argument is assumed never 0. 3284 if (const Argument *A = dyn_cast<Argument>(V)) { 3285 if (((A->hasPassPointeeByValueCopyAttr() && 3286 !NullPointerIsDefined(A->getParent(), PtrTy->getAddressSpace())) || 3287 A->hasNonNullAttr())) 3288 return true; 3289 } 3290 } 3291 3292 if (const auto *I = dyn_cast<Operator>(V)) 3293 if (isKnownNonZeroFromOperator(I, DemandedElts, Depth, Q)) 3294 return true; 3295 3296 if (!isa<Constant>(V) && 3297 isKnownNonNullFromDominatingCondition(V, Q.CxtI, Q.DT)) 3298 return true; 3299 3300 return false; 3301 } 3302 3303 bool llvm::isKnownNonZero(const Value *V, const SimplifyQuery &Q, 3304 unsigned Depth) { 3305 auto *FVTy = dyn_cast<FixedVectorType>(V->getType()); 3306 APInt DemandedElts = 3307 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1); 3308 return ::isKnownNonZero(V, DemandedElts, Q, Depth); 3309 } 3310 3311 /// If the pair of operators are the same invertible function, return the 3312 /// the operands of the function corresponding to each input. Otherwise, 3313 /// return std::nullopt. An invertible function is one that is 1-to-1 and maps 3314 /// every input value to exactly one output value. This is equivalent to 3315 /// saying that Op1 and Op2 are equal exactly when the specified pair of 3316 /// operands are equal, (except that Op1 and Op2 may be poison more often.) 3317 static std::optional<std::pair<Value*, Value*>> 3318 getInvertibleOperands(const Operator *Op1, 3319 const Operator *Op2) { 3320 if (Op1->getOpcode() != Op2->getOpcode()) 3321 return std::nullopt; 3322 3323 auto getOperands = [&](unsigned OpNum) -> auto { 3324 return std::make_pair(Op1->getOperand(OpNum), Op2->getOperand(OpNum)); 3325 }; 3326 3327 switch (Op1->getOpcode()) { 3328 default: 3329 break; 3330 case Instruction::Or: 3331 if (!cast<PossiblyDisjointInst>(Op1)->isDisjoint() || 3332 !cast<PossiblyDisjointInst>(Op2)->isDisjoint()) 3333 break; 3334 [[fallthrough]]; 3335 case Instruction::Xor: 3336 case Instruction::Add: { 3337 Value *Other; 3338 if (match(Op2, m_c_BinOp(m_Specific(Op1->getOperand(0)), m_Value(Other)))) 3339 return std::make_pair(Op1->getOperand(1), Other); 3340 if (match(Op2, m_c_BinOp(m_Specific(Op1->getOperand(1)), m_Value(Other)))) 3341 return std::make_pair(Op1->getOperand(0), Other); 3342 break; 3343 } 3344 case Instruction::Sub: 3345 if (Op1->getOperand(0) == Op2->getOperand(0)) 3346 return getOperands(1); 3347 if (Op1->getOperand(1) == Op2->getOperand(1)) 3348 return getOperands(0); 3349 break; 3350 case Instruction::Mul: { 3351 // invertible if A * B == (A * B) mod 2^N where A, and B are integers 3352 // and N is the bitwdith. The nsw case is non-obvious, but proven by 3353 // alive2: https://alive2.llvm.org/ce/z/Z6D5qK 3354 auto *OBO1 = cast<OverflowingBinaryOperator>(Op1); 3355 auto *OBO2 = cast<OverflowingBinaryOperator>(Op2); 3356 if ((!OBO1->hasNoUnsignedWrap() || !OBO2->hasNoUnsignedWrap()) && 3357 (!OBO1->hasNoSignedWrap() || !OBO2->hasNoSignedWrap())) 3358 break; 3359 3360 // Assume operand order has been canonicalized 3361 if (Op1->getOperand(1) == Op2->getOperand(1) && 3362 isa<ConstantInt>(Op1->getOperand(1)) && 3363 !cast<ConstantInt>(Op1->getOperand(1))->isZero()) 3364 return getOperands(0); 3365 break; 3366 } 3367 case Instruction::Shl: { 3368 // Same as multiplies, with the difference that we don't need to check 3369 // for a non-zero multiply. Shifts always multiply by non-zero. 3370 auto *OBO1 = cast<OverflowingBinaryOperator>(Op1); 3371 auto *OBO2 = cast<OverflowingBinaryOperator>(Op2); 3372 if ((!OBO1->hasNoUnsignedWrap() || !OBO2->hasNoUnsignedWrap()) && 3373 (!OBO1->hasNoSignedWrap() || !OBO2->hasNoSignedWrap())) 3374 break; 3375 3376 if (Op1->getOperand(1) == Op2->getOperand(1)) 3377 return getOperands(0); 3378 break; 3379 } 3380 case Instruction::AShr: 3381 case Instruction::LShr: { 3382 auto *PEO1 = cast<PossiblyExactOperator>(Op1); 3383 auto *PEO2 = cast<PossiblyExactOperator>(Op2); 3384 if (!PEO1->isExact() || !PEO2->isExact()) 3385 break; 3386 3387 if (Op1->getOperand(1) == Op2->getOperand(1)) 3388 return getOperands(0); 3389 break; 3390 } 3391 case Instruction::SExt: 3392 case Instruction::ZExt: 3393 if (Op1->getOperand(0)->getType() == Op2->getOperand(0)->getType()) 3394 return getOperands(0); 3395 break; 3396 case Instruction::PHI: { 3397 const PHINode *PN1 = cast<PHINode>(Op1); 3398 const PHINode *PN2 = cast<PHINode>(Op2); 3399 3400 // If PN1 and PN2 are both recurrences, can we prove the entire recurrences 3401 // are a single invertible function of the start values? Note that repeated 3402 // application of an invertible function is also invertible 3403 BinaryOperator *BO1 = nullptr; 3404 Value *Start1 = nullptr, *Step1 = nullptr; 3405 BinaryOperator *BO2 = nullptr; 3406 Value *Start2 = nullptr, *Step2 = nullptr; 3407 if (PN1->getParent() != PN2->getParent() || 3408 !matchSimpleRecurrence(PN1, BO1, Start1, Step1) || 3409 !matchSimpleRecurrence(PN2, BO2, Start2, Step2)) 3410 break; 3411 3412 auto Values = getInvertibleOperands(cast<Operator>(BO1), 3413 cast<Operator>(BO2)); 3414 if (!Values) 3415 break; 3416 3417 // We have to be careful of mutually defined recurrences here. Ex: 3418 // * X_i = X_(i-1) OP Y_(i-1), and Y_i = X_(i-1) OP V 3419 // * X_i = Y_i = X_(i-1) OP Y_(i-1) 3420 // The invertibility of these is complicated, and not worth reasoning 3421 // about (yet?). 3422 if (Values->first != PN1 || Values->second != PN2) 3423 break; 3424 3425 return std::make_pair(Start1, Start2); 3426 } 3427 } 3428 return std::nullopt; 3429 } 3430 3431 /// Return true if V1 == (binop V2, X), where X is known non-zero. 3432 /// Only handle a small subset of binops where (binop V2, X) with non-zero X 3433 /// implies V2 != V1. 3434 static bool isModifyingBinopOfNonZero(const Value *V1, const Value *V2, 3435 const APInt &DemandedElts, unsigned Depth, 3436 const SimplifyQuery &Q) { 3437 const BinaryOperator *BO = dyn_cast<BinaryOperator>(V1); 3438 if (!BO) 3439 return false; 3440 switch (BO->getOpcode()) { 3441 default: 3442 break; 3443 case Instruction::Or: 3444 if (!cast<PossiblyDisjointInst>(V1)->isDisjoint()) 3445 break; 3446 [[fallthrough]]; 3447 case Instruction::Xor: 3448 case Instruction::Add: 3449 Value *Op = nullptr; 3450 if (V2 == BO->getOperand(0)) 3451 Op = BO->getOperand(1); 3452 else if (V2 == BO->getOperand(1)) 3453 Op = BO->getOperand(0); 3454 else 3455 return false; 3456 return isKnownNonZero(Op, DemandedElts, Q, Depth + 1); 3457 } 3458 return false; 3459 } 3460 3461 /// Return true if V2 == V1 * C, where V1 is known non-zero, C is not 0/1 and 3462 /// the multiplication is nuw or nsw. 3463 static bool isNonEqualMul(const Value *V1, const Value *V2, 3464 const APInt &DemandedElts, unsigned Depth, 3465 const SimplifyQuery &Q) { 3466 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(V2)) { 3467 const APInt *C; 3468 return match(OBO, m_Mul(m_Specific(V1), m_APInt(C))) && 3469 (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) && 3470 !C->isZero() && !C->isOne() && 3471 isKnownNonZero(V1, DemandedElts, Q, Depth + 1); 3472 } 3473 return false; 3474 } 3475 3476 /// Return true if V2 == V1 << C, where V1 is known non-zero, C is not 0 and 3477 /// the shift is nuw or nsw. 3478 static bool isNonEqualShl(const Value *V1, const Value *V2, 3479 const APInt &DemandedElts, unsigned Depth, 3480 const SimplifyQuery &Q) { 3481 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(V2)) { 3482 const APInt *C; 3483 return match(OBO, m_Shl(m_Specific(V1), m_APInt(C))) && 3484 (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) && 3485 !C->isZero() && isKnownNonZero(V1, DemandedElts, Q, Depth + 1); 3486 } 3487 return false; 3488 } 3489 3490 static bool isNonEqualPHIs(const PHINode *PN1, const PHINode *PN2, 3491 const APInt &DemandedElts, unsigned Depth, 3492 const SimplifyQuery &Q) { 3493 // Check two PHIs are in same block. 3494 if (PN1->getParent() != PN2->getParent()) 3495 return false; 3496 3497 SmallPtrSet<const BasicBlock *, 8> VisitedBBs; 3498 bool UsedFullRecursion = false; 3499 for (const BasicBlock *IncomBB : PN1->blocks()) { 3500 if (!VisitedBBs.insert(IncomBB).second) 3501 continue; // Don't reprocess blocks that we have dealt with already. 3502 const Value *IV1 = PN1->getIncomingValueForBlock(IncomBB); 3503 const Value *IV2 = PN2->getIncomingValueForBlock(IncomBB); 3504 const APInt *C1, *C2; 3505 if (match(IV1, m_APInt(C1)) && match(IV2, m_APInt(C2)) && *C1 != *C2) 3506 continue; 3507 3508 // Only one pair of phi operands is allowed for full recursion. 3509 if (UsedFullRecursion) 3510 return false; 3511 3512 SimplifyQuery RecQ = Q.getWithoutCondContext(); 3513 RecQ.CxtI = IncomBB->getTerminator(); 3514 if (!isKnownNonEqual(IV1, IV2, DemandedElts, Depth + 1, RecQ)) 3515 return false; 3516 UsedFullRecursion = true; 3517 } 3518 return true; 3519 } 3520 3521 static bool isNonEqualSelect(const Value *V1, const Value *V2, 3522 const APInt &DemandedElts, unsigned Depth, 3523 const SimplifyQuery &Q) { 3524 const SelectInst *SI1 = dyn_cast<SelectInst>(V1); 3525 if (!SI1) 3526 return false; 3527 3528 if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2)) { 3529 const Value *Cond1 = SI1->getCondition(); 3530 const Value *Cond2 = SI2->getCondition(); 3531 if (Cond1 == Cond2) 3532 return isKnownNonEqual(SI1->getTrueValue(), SI2->getTrueValue(), 3533 DemandedElts, Depth + 1, Q) && 3534 isKnownNonEqual(SI1->getFalseValue(), SI2->getFalseValue(), 3535 DemandedElts, Depth + 1, Q); 3536 } 3537 return isKnownNonEqual(SI1->getTrueValue(), V2, DemandedElts, Depth + 1, Q) && 3538 isKnownNonEqual(SI1->getFalseValue(), V2, DemandedElts, Depth + 1, Q); 3539 } 3540 3541 // Check to see if A is both a GEP and is the incoming value for a PHI in the 3542 // loop, and B is either a ptr or another GEP. If the PHI has 2 incoming values, 3543 // one of them being the recursive GEP A and the other a ptr at same base and at 3544 // the same/higher offset than B we are only incrementing the pointer further in 3545 // loop if offset of recursive GEP is greater than 0. 3546 static bool isNonEqualPointersWithRecursiveGEP(const Value *A, const Value *B, 3547 const SimplifyQuery &Q) { 3548 if (!A->getType()->isPointerTy() || !B->getType()->isPointerTy()) 3549 return false; 3550 3551 auto *GEPA = dyn_cast<GEPOperator>(A); 3552 if (!GEPA || GEPA->getNumIndices() != 1 || !isa<Constant>(GEPA->idx_begin())) 3553 return false; 3554 3555 // Handle 2 incoming PHI values with one being a recursive GEP. 3556 auto *PN = dyn_cast<PHINode>(GEPA->getPointerOperand()); 3557 if (!PN || PN->getNumIncomingValues() != 2) 3558 return false; 3559 3560 // Search for the recursive GEP as an incoming operand, and record that as 3561 // Step. 3562 Value *Start = nullptr; 3563 Value *Step = const_cast<Value *>(A); 3564 if (PN->getIncomingValue(0) == Step) 3565 Start = PN->getIncomingValue(1); 3566 else if (PN->getIncomingValue(1) == Step) 3567 Start = PN->getIncomingValue(0); 3568 else 3569 return false; 3570 3571 // Other incoming node base should match the B base. 3572 // StartOffset >= OffsetB && StepOffset > 0? 3573 // StartOffset <= OffsetB && StepOffset < 0? 3574 // Is non-equal if above are true. 3575 // We use stripAndAccumulateInBoundsConstantOffsets to restrict the 3576 // optimisation to inbounds GEPs only. 3577 unsigned IndexWidth = Q.DL.getIndexTypeSizeInBits(Start->getType()); 3578 APInt StartOffset(IndexWidth, 0); 3579 Start = Start->stripAndAccumulateInBoundsConstantOffsets(Q.DL, StartOffset); 3580 APInt StepOffset(IndexWidth, 0); 3581 Step = Step->stripAndAccumulateInBoundsConstantOffsets(Q.DL, StepOffset); 3582 3583 // Check if Base Pointer of Step matches the PHI. 3584 if (Step != PN) 3585 return false; 3586 APInt OffsetB(IndexWidth, 0); 3587 B = B->stripAndAccumulateInBoundsConstantOffsets(Q.DL, OffsetB); 3588 return Start == B && 3589 ((StartOffset.sge(OffsetB) && StepOffset.isStrictlyPositive()) || 3590 (StartOffset.sle(OffsetB) && StepOffset.isNegative())); 3591 } 3592 3593 /// Return true if it is known that V1 != V2. 3594 static bool isKnownNonEqual(const Value *V1, const Value *V2, 3595 const APInt &DemandedElts, unsigned Depth, 3596 const SimplifyQuery &Q) { 3597 if (V1 == V2) 3598 return false; 3599 if (V1->getType() != V2->getType()) 3600 // We can't look through casts yet. 3601 return false; 3602 3603 if (Depth >= MaxAnalysisRecursionDepth) 3604 return false; 3605 3606 // See if we can recurse through (exactly one of) our operands. This 3607 // requires our operation be 1-to-1 and map every input value to exactly 3608 // one output value. Such an operation is invertible. 3609 auto *O1 = dyn_cast<Operator>(V1); 3610 auto *O2 = dyn_cast<Operator>(V2); 3611 if (O1 && O2 && O1->getOpcode() == O2->getOpcode()) { 3612 if (auto Values = getInvertibleOperands(O1, O2)) 3613 return isKnownNonEqual(Values->first, Values->second, DemandedElts, 3614 Depth + 1, Q); 3615 3616 if (const PHINode *PN1 = dyn_cast<PHINode>(V1)) { 3617 const PHINode *PN2 = cast<PHINode>(V2); 3618 // FIXME: This is missing a generalization to handle the case where one is 3619 // a PHI and another one isn't. 3620 if (isNonEqualPHIs(PN1, PN2, DemandedElts, Depth, Q)) 3621 return true; 3622 }; 3623 } 3624 3625 if (isModifyingBinopOfNonZero(V1, V2, DemandedElts, Depth, Q) || 3626 isModifyingBinopOfNonZero(V2, V1, DemandedElts, Depth, Q)) 3627 return true; 3628 3629 if (isNonEqualMul(V1, V2, DemandedElts, Depth, Q) || 3630 isNonEqualMul(V2, V1, DemandedElts, Depth, Q)) 3631 return true; 3632 3633 if (isNonEqualShl(V1, V2, DemandedElts, Depth, Q) || 3634 isNonEqualShl(V2, V1, DemandedElts, Depth, Q)) 3635 return true; 3636 3637 if (V1->getType()->isIntOrIntVectorTy()) { 3638 // Are any known bits in V1 contradictory to known bits in V2? If V1 3639 // has a known zero where V2 has a known one, they must not be equal. 3640 KnownBits Known1 = computeKnownBits(V1, DemandedElts, Depth, Q); 3641 if (!Known1.isUnknown()) { 3642 KnownBits Known2 = computeKnownBits(V2, DemandedElts, Depth, Q); 3643 if (Known1.Zero.intersects(Known2.One) || 3644 Known2.Zero.intersects(Known1.One)) 3645 return true; 3646 } 3647 } 3648 3649 if (isNonEqualSelect(V1, V2, DemandedElts, Depth, Q) || 3650 isNonEqualSelect(V2, V1, DemandedElts, Depth, Q)) 3651 return true; 3652 3653 if (isNonEqualPointersWithRecursiveGEP(V1, V2, Q) || 3654 isNonEqualPointersWithRecursiveGEP(V2, V1, Q)) 3655 return true; 3656 3657 Value *A, *B; 3658 // PtrToInts are NonEqual if their Ptrs are NonEqual. 3659 // Check PtrToInt type matches the pointer size. 3660 if (match(V1, m_PtrToIntSameSize(Q.DL, m_Value(A))) && 3661 match(V2, m_PtrToIntSameSize(Q.DL, m_Value(B)))) 3662 return isKnownNonEqual(A, B, DemandedElts, Depth + 1, Q); 3663 3664 return false; 3665 } 3666 3667 // Match a signed min+max clamp pattern like smax(smin(In, CHigh), CLow). 3668 // Returns the input and lower/upper bounds. 3669 static bool isSignedMinMaxClamp(const Value *Select, const Value *&In, 3670 const APInt *&CLow, const APInt *&CHigh) { 3671 assert(isa<Operator>(Select) && 3672 cast<Operator>(Select)->getOpcode() == Instruction::Select && 3673 "Input should be a Select!"); 3674 3675 const Value *LHS = nullptr, *RHS = nullptr; 3676 SelectPatternFlavor SPF = matchSelectPattern(Select, LHS, RHS).Flavor; 3677 if (SPF != SPF_SMAX && SPF != SPF_SMIN) 3678 return false; 3679 3680 if (!match(RHS, m_APInt(CLow))) 3681 return false; 3682 3683 const Value *LHS2 = nullptr, *RHS2 = nullptr; 3684 SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2).Flavor; 3685 if (getInverseMinMaxFlavor(SPF) != SPF2) 3686 return false; 3687 3688 if (!match(RHS2, m_APInt(CHigh))) 3689 return false; 3690 3691 if (SPF == SPF_SMIN) 3692 std::swap(CLow, CHigh); 3693 3694 In = LHS2; 3695 return CLow->sle(*CHigh); 3696 } 3697 3698 static bool isSignedMinMaxIntrinsicClamp(const IntrinsicInst *II, 3699 const APInt *&CLow, 3700 const APInt *&CHigh) { 3701 assert((II->getIntrinsicID() == Intrinsic::smin || 3702 II->getIntrinsicID() == Intrinsic::smax) && "Must be smin/smax"); 3703 3704 Intrinsic::ID InverseID = getInverseMinMaxIntrinsic(II->getIntrinsicID()); 3705 auto *InnerII = dyn_cast<IntrinsicInst>(II->getArgOperand(0)); 3706 if (!InnerII || InnerII->getIntrinsicID() != InverseID || 3707 !match(II->getArgOperand(1), m_APInt(CLow)) || 3708 !match(InnerII->getArgOperand(1), m_APInt(CHigh))) 3709 return false; 3710 3711 if (II->getIntrinsicID() == Intrinsic::smin) 3712 std::swap(CLow, CHigh); 3713 return CLow->sle(*CHigh); 3714 } 3715 3716 /// For vector constants, loop over the elements and find the constant with the 3717 /// minimum number of sign bits. Return 0 if the value is not a vector constant 3718 /// or if any element was not analyzed; otherwise, return the count for the 3719 /// element with the minimum number of sign bits. 3720 static unsigned computeNumSignBitsVectorConstant(const Value *V, 3721 const APInt &DemandedElts, 3722 unsigned TyBits) { 3723 const auto *CV = dyn_cast<Constant>(V); 3724 if (!CV || !isa<FixedVectorType>(CV->getType())) 3725 return 0; 3726 3727 unsigned MinSignBits = TyBits; 3728 unsigned NumElts = cast<FixedVectorType>(CV->getType())->getNumElements(); 3729 for (unsigned i = 0; i != NumElts; ++i) { 3730 if (!DemandedElts[i]) 3731 continue; 3732 // If we find a non-ConstantInt, bail out. 3733 auto *Elt = dyn_cast_or_null<ConstantInt>(CV->getAggregateElement(i)); 3734 if (!Elt) 3735 return 0; 3736 3737 MinSignBits = std::min(MinSignBits, Elt->getValue().getNumSignBits()); 3738 } 3739 3740 return MinSignBits; 3741 } 3742 3743 static unsigned ComputeNumSignBitsImpl(const Value *V, 3744 const APInt &DemandedElts, 3745 unsigned Depth, const SimplifyQuery &Q); 3746 3747 static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts, 3748 unsigned Depth, const SimplifyQuery &Q) { 3749 unsigned Result = ComputeNumSignBitsImpl(V, DemandedElts, Depth, Q); 3750 assert(Result > 0 && "At least one sign bit needs to be present!"); 3751 return Result; 3752 } 3753 3754 /// Return the number of times the sign bit of the register is replicated into 3755 /// the other bits. We know that at least 1 bit is always equal to the sign bit 3756 /// (itself), but other cases can give us information. For example, immediately 3757 /// after an "ashr X, 2", we know that the top 3 bits are all equal to each 3758 /// other, so we return 3. For vectors, return the number of sign bits for the 3759 /// vector element with the minimum number of known sign bits of the demanded 3760 /// elements in the vector specified by DemandedElts. 3761 static unsigned ComputeNumSignBitsImpl(const Value *V, 3762 const APInt &DemandedElts, 3763 unsigned Depth, const SimplifyQuery &Q) { 3764 Type *Ty = V->getType(); 3765 #ifndef NDEBUG 3766 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); 3767 3768 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) { 3769 assert( 3770 FVTy->getNumElements() == DemandedElts.getBitWidth() && 3771 "DemandedElt width should equal the fixed vector number of elements"); 3772 } else { 3773 assert(DemandedElts == APInt(1, 1) && 3774 "DemandedElt width should be 1 for scalars"); 3775 } 3776 #endif 3777 3778 // We return the minimum number of sign bits that are guaranteed to be present 3779 // in V, so for undef we have to conservatively return 1. We don't have the 3780 // same behavior for poison though -- that's a FIXME today. 3781 3782 Type *ScalarTy = Ty->getScalarType(); 3783 unsigned TyBits = ScalarTy->isPointerTy() ? 3784 Q.DL.getPointerTypeSizeInBits(ScalarTy) : 3785 Q.DL.getTypeSizeInBits(ScalarTy); 3786 3787 unsigned Tmp, Tmp2; 3788 unsigned FirstAnswer = 1; 3789 3790 // Note that ConstantInt is handled by the general computeKnownBits case 3791 // below. 3792 3793 if (Depth == MaxAnalysisRecursionDepth) 3794 return 1; 3795 3796 if (auto *U = dyn_cast<Operator>(V)) { 3797 switch (Operator::getOpcode(V)) { 3798 default: break; 3799 case Instruction::SExt: 3800 Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits(); 3801 return ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q) + 3802 Tmp; 3803 3804 case Instruction::SDiv: { 3805 const APInt *Denominator; 3806 // sdiv X, C -> adds log(C) sign bits. 3807 if (match(U->getOperand(1), m_APInt(Denominator))) { 3808 3809 // Ignore non-positive denominator. 3810 if (!Denominator->isStrictlyPositive()) 3811 break; 3812 3813 // Calculate the incoming numerator bits. 3814 unsigned NumBits = 3815 ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q); 3816 3817 // Add floor(log(C)) bits to the numerator bits. 3818 return std::min(TyBits, NumBits + Denominator->logBase2()); 3819 } 3820 break; 3821 } 3822 3823 case Instruction::SRem: { 3824 Tmp = ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q); 3825 3826 const APInt *Denominator; 3827 // srem X, C -> we know that the result is within [-C+1,C) when C is a 3828 // positive constant. This let us put a lower bound on the number of sign 3829 // bits. 3830 if (match(U->getOperand(1), m_APInt(Denominator))) { 3831 3832 // Ignore non-positive denominator. 3833 if (Denominator->isStrictlyPositive()) { 3834 // Calculate the leading sign bit constraints by examining the 3835 // denominator. Given that the denominator is positive, there are two 3836 // cases: 3837 // 3838 // 1. The numerator is positive. The result range is [0,C) and 3839 // [0,C) u< (1 << ceilLogBase2(C)). 3840 // 3841 // 2. The numerator is negative. Then the result range is (-C,0] and 3842 // integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)). 3843 // 3844 // Thus a lower bound on the number of sign bits is `TyBits - 3845 // ceilLogBase2(C)`. 3846 3847 unsigned ResBits = TyBits - Denominator->ceilLogBase2(); 3848 Tmp = std::max(Tmp, ResBits); 3849 } 3850 } 3851 return Tmp; 3852 } 3853 3854 case Instruction::AShr: { 3855 Tmp = ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q); 3856 // ashr X, C -> adds C sign bits. Vectors too. 3857 const APInt *ShAmt; 3858 if (match(U->getOperand(1), m_APInt(ShAmt))) { 3859 if (ShAmt->uge(TyBits)) 3860 break; // Bad shift. 3861 unsigned ShAmtLimited = ShAmt->getZExtValue(); 3862 Tmp += ShAmtLimited; 3863 if (Tmp > TyBits) Tmp = TyBits; 3864 } 3865 return Tmp; 3866 } 3867 case Instruction::Shl: { 3868 const APInt *ShAmt; 3869 Value *X = nullptr; 3870 if (match(U->getOperand(1), m_APInt(ShAmt))) { 3871 // shl destroys sign bits. 3872 if (ShAmt->uge(TyBits)) 3873 break; // Bad shift. 3874 // We can look through a zext (more or less treating it as a sext) if 3875 // all extended bits are shifted out. 3876 if (match(U->getOperand(0), m_ZExt(m_Value(X))) && 3877 ShAmt->uge(TyBits - X->getType()->getScalarSizeInBits())) { 3878 Tmp = ComputeNumSignBits(X, DemandedElts, Depth + 1, Q); 3879 Tmp += TyBits - X->getType()->getScalarSizeInBits(); 3880 } else 3881 Tmp = 3882 ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q); 3883 if (ShAmt->uge(Tmp)) 3884 break; // Shifted all sign bits out. 3885 Tmp2 = ShAmt->getZExtValue(); 3886 return Tmp - Tmp2; 3887 } 3888 break; 3889 } 3890 case Instruction::And: 3891 case Instruction::Or: 3892 case Instruction::Xor: // NOT is handled here. 3893 // Logical binary ops preserve the number of sign bits at the worst. 3894 Tmp = ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q); 3895 if (Tmp != 1) { 3896 Tmp2 = ComputeNumSignBits(U->getOperand(1), DemandedElts, Depth + 1, Q); 3897 FirstAnswer = std::min(Tmp, Tmp2); 3898 // We computed what we know about the sign bits as our first 3899 // answer. Now proceed to the generic code that uses 3900 // computeKnownBits, and pick whichever answer is better. 3901 } 3902 break; 3903 3904 case Instruction::Select: { 3905 // If we have a clamp pattern, we know that the number of sign bits will 3906 // be the minimum of the clamp min/max range. 3907 const Value *X; 3908 const APInt *CLow, *CHigh; 3909 if (isSignedMinMaxClamp(U, X, CLow, CHigh)) 3910 return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits()); 3911 3912 Tmp = ComputeNumSignBits(U->getOperand(1), DemandedElts, Depth + 1, Q); 3913 if (Tmp == 1) 3914 break; 3915 Tmp2 = ComputeNumSignBits(U->getOperand(2), DemandedElts, Depth + 1, Q); 3916 return std::min(Tmp, Tmp2); 3917 } 3918 3919 case Instruction::Add: 3920 // Add can have at most one carry bit. Thus we know that the output 3921 // is, at worst, one more bit than the inputs. 3922 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3923 if (Tmp == 1) break; 3924 3925 // Special case decrementing a value (ADD X, -1): 3926 if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1))) 3927 if (CRHS->isAllOnesValue()) { 3928 KnownBits Known(TyBits); 3929 computeKnownBits(U->getOperand(0), DemandedElts, Known, Depth + 1, Q); 3930 3931 // If the input is known to be 0 or 1, the output is 0/-1, which is 3932 // all sign bits set. 3933 if ((Known.Zero | 1).isAllOnes()) 3934 return TyBits; 3935 3936 // If we are subtracting one from a positive number, there is no carry 3937 // out of the result. 3938 if (Known.isNonNegative()) 3939 return Tmp; 3940 } 3941 3942 Tmp2 = ComputeNumSignBits(U->getOperand(1), DemandedElts, Depth + 1, Q); 3943 if (Tmp2 == 1) 3944 break; 3945 return std::min(Tmp, Tmp2) - 1; 3946 3947 case Instruction::Sub: 3948 Tmp2 = ComputeNumSignBits(U->getOperand(1), DemandedElts, Depth + 1, Q); 3949 if (Tmp2 == 1) 3950 break; 3951 3952 // Handle NEG. 3953 if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0))) 3954 if (CLHS->isNullValue()) { 3955 KnownBits Known(TyBits); 3956 computeKnownBits(U->getOperand(1), DemandedElts, Known, Depth + 1, Q); 3957 // If the input is known to be 0 or 1, the output is 0/-1, which is 3958 // all sign bits set. 3959 if ((Known.Zero | 1).isAllOnes()) 3960 return TyBits; 3961 3962 // If the input is known to be positive (the sign bit is known clear), 3963 // the output of the NEG has the same number of sign bits as the 3964 // input. 3965 if (Known.isNonNegative()) 3966 return Tmp2; 3967 3968 // Otherwise, we treat this like a SUB. 3969 } 3970 3971 // Sub can have at most one carry bit. Thus we know that the output 3972 // is, at worst, one more bit than the inputs. 3973 Tmp = ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q); 3974 if (Tmp == 1) 3975 break; 3976 return std::min(Tmp, Tmp2) - 1; 3977 3978 case Instruction::Mul: { 3979 // The output of the Mul can be at most twice the valid bits in the 3980 // inputs. 3981 unsigned SignBitsOp0 = 3982 ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q); 3983 if (SignBitsOp0 == 1) 3984 break; 3985 unsigned SignBitsOp1 = 3986 ComputeNumSignBits(U->getOperand(1), DemandedElts, Depth + 1, Q); 3987 if (SignBitsOp1 == 1) 3988 break; 3989 unsigned OutValidBits = 3990 (TyBits - SignBitsOp0 + 1) + (TyBits - SignBitsOp1 + 1); 3991 return OutValidBits > TyBits ? 1 : TyBits - OutValidBits + 1; 3992 } 3993 3994 case Instruction::PHI: { 3995 const PHINode *PN = cast<PHINode>(U); 3996 unsigned NumIncomingValues = PN->getNumIncomingValues(); 3997 // Don't analyze large in-degree PHIs. 3998 if (NumIncomingValues > 4) break; 3999 // Unreachable blocks may have zero-operand PHI nodes. 4000 if (NumIncomingValues == 0) break; 4001 4002 // Take the minimum of all incoming values. This can't infinitely loop 4003 // because of our depth threshold. 4004 SimplifyQuery RecQ = Q.getWithoutCondContext(); 4005 Tmp = TyBits; 4006 for (unsigned i = 0, e = NumIncomingValues; i != e; ++i) { 4007 if (Tmp == 1) return Tmp; 4008 RecQ.CxtI = PN->getIncomingBlock(i)->getTerminator(); 4009 Tmp = std::min(Tmp, ComputeNumSignBits(PN->getIncomingValue(i), 4010 DemandedElts, Depth + 1, RecQ)); 4011 } 4012 return Tmp; 4013 } 4014 4015 case Instruction::Trunc: { 4016 // If the input contained enough sign bits that some remain after the 4017 // truncation, then we can make use of that. Otherwise we don't know 4018 // anything. 4019 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 4020 unsigned OperandTyBits = U->getOperand(0)->getType()->getScalarSizeInBits(); 4021 if (Tmp > (OperandTyBits - TyBits)) 4022 return Tmp - (OperandTyBits - TyBits); 4023 4024 return 1; 4025 } 4026 4027 case Instruction::ExtractElement: 4028 // Look through extract element. At the moment we keep this simple and 4029 // skip tracking the specific element. But at least we might find 4030 // information valid for all elements of the vector (for example if vector 4031 // is sign extended, shifted, etc). 4032 return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 4033 4034 case Instruction::ShuffleVector: { 4035 // Collect the minimum number of sign bits that are shared by every vector 4036 // element referenced by the shuffle. 4037 auto *Shuf = dyn_cast<ShuffleVectorInst>(U); 4038 if (!Shuf) { 4039 // FIXME: Add support for shufflevector constant expressions. 4040 return 1; 4041 } 4042 APInt DemandedLHS, DemandedRHS; 4043 // For undef elements, we don't know anything about the common state of 4044 // the shuffle result. 4045 if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS)) 4046 return 1; 4047 Tmp = std::numeric_limits<unsigned>::max(); 4048 if (!!DemandedLHS) { 4049 const Value *LHS = Shuf->getOperand(0); 4050 Tmp = ComputeNumSignBits(LHS, DemandedLHS, Depth + 1, Q); 4051 } 4052 // If we don't know anything, early out and try computeKnownBits 4053 // fall-back. 4054 if (Tmp == 1) 4055 break; 4056 if (!!DemandedRHS) { 4057 const Value *RHS = Shuf->getOperand(1); 4058 Tmp2 = ComputeNumSignBits(RHS, DemandedRHS, Depth + 1, Q); 4059 Tmp = std::min(Tmp, Tmp2); 4060 } 4061 // If we don't know anything, early out and try computeKnownBits 4062 // fall-back. 4063 if (Tmp == 1) 4064 break; 4065 assert(Tmp <= TyBits && "Failed to determine minimum sign bits"); 4066 return Tmp; 4067 } 4068 case Instruction::Call: { 4069 if (const auto *II = dyn_cast<IntrinsicInst>(U)) { 4070 switch (II->getIntrinsicID()) { 4071 default: 4072 break; 4073 case Intrinsic::abs: 4074 Tmp = 4075 ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q); 4076 if (Tmp == 1) 4077 break; 4078 4079 // Absolute value reduces number of sign bits by at most 1. 4080 return Tmp - 1; 4081 case Intrinsic::smin: 4082 case Intrinsic::smax: { 4083 const APInt *CLow, *CHigh; 4084 if (isSignedMinMaxIntrinsicClamp(II, CLow, CHigh)) 4085 return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits()); 4086 } 4087 } 4088 } 4089 } 4090 } 4091 } 4092 4093 // Finally, if we can prove that the top bits of the result are 0's or 1's, 4094 // use this information. 4095 4096 // If we can examine all elements of a vector constant successfully, we're 4097 // done (we can't do any better than that). If not, keep trying. 4098 if (unsigned VecSignBits = 4099 computeNumSignBitsVectorConstant(V, DemandedElts, TyBits)) 4100 return VecSignBits; 4101 4102 KnownBits Known(TyBits); 4103 computeKnownBits(V, DemandedElts, Known, Depth, Q); 4104 4105 // If we know that the sign bit is either zero or one, determine the number of 4106 // identical bits in the top of the input value. 4107 return std::max(FirstAnswer, Known.countMinSignBits()); 4108 } 4109 4110 Intrinsic::ID llvm::getIntrinsicForCallSite(const CallBase &CB, 4111 const TargetLibraryInfo *TLI) { 4112 const Function *F = CB.getCalledFunction(); 4113 if (!F) 4114 return Intrinsic::not_intrinsic; 4115 4116 if (F->isIntrinsic()) 4117 return F->getIntrinsicID(); 4118 4119 // We are going to infer semantics of a library function based on mapping it 4120 // to an LLVM intrinsic. Check that the library function is available from 4121 // this callbase and in this environment. 4122 LibFunc Func; 4123 if (F->hasLocalLinkage() || !TLI || !TLI->getLibFunc(CB, Func) || 4124 !CB.onlyReadsMemory()) 4125 return Intrinsic::not_intrinsic; 4126 4127 switch (Func) { 4128 default: 4129 break; 4130 case LibFunc_sin: 4131 case LibFunc_sinf: 4132 case LibFunc_sinl: 4133 return Intrinsic::sin; 4134 case LibFunc_cos: 4135 case LibFunc_cosf: 4136 case LibFunc_cosl: 4137 return Intrinsic::cos; 4138 case LibFunc_tan: 4139 case LibFunc_tanf: 4140 case LibFunc_tanl: 4141 return Intrinsic::tan; 4142 case LibFunc_exp: 4143 case LibFunc_expf: 4144 case LibFunc_expl: 4145 return Intrinsic::exp; 4146 case LibFunc_exp2: 4147 case LibFunc_exp2f: 4148 case LibFunc_exp2l: 4149 return Intrinsic::exp2; 4150 case LibFunc_log: 4151 case LibFunc_logf: 4152 case LibFunc_logl: 4153 return Intrinsic::log; 4154 case LibFunc_log10: 4155 case LibFunc_log10f: 4156 case LibFunc_log10l: 4157 return Intrinsic::log10; 4158 case LibFunc_log2: 4159 case LibFunc_log2f: 4160 case LibFunc_log2l: 4161 return Intrinsic::log2; 4162 case LibFunc_fabs: 4163 case LibFunc_fabsf: 4164 case LibFunc_fabsl: 4165 return Intrinsic::fabs; 4166 case LibFunc_fmin: 4167 case LibFunc_fminf: 4168 case LibFunc_fminl: 4169 return Intrinsic::minnum; 4170 case LibFunc_fmax: 4171 case LibFunc_fmaxf: 4172 case LibFunc_fmaxl: 4173 return Intrinsic::maxnum; 4174 case LibFunc_copysign: 4175 case LibFunc_copysignf: 4176 case LibFunc_copysignl: 4177 return Intrinsic::copysign; 4178 case LibFunc_floor: 4179 case LibFunc_floorf: 4180 case LibFunc_floorl: 4181 return Intrinsic::floor; 4182 case LibFunc_ceil: 4183 case LibFunc_ceilf: 4184 case LibFunc_ceill: 4185 return Intrinsic::ceil; 4186 case LibFunc_trunc: 4187 case LibFunc_truncf: 4188 case LibFunc_truncl: 4189 return Intrinsic::trunc; 4190 case LibFunc_rint: 4191 case LibFunc_rintf: 4192 case LibFunc_rintl: 4193 return Intrinsic::rint; 4194 case LibFunc_nearbyint: 4195 case LibFunc_nearbyintf: 4196 case LibFunc_nearbyintl: 4197 return Intrinsic::nearbyint; 4198 case LibFunc_round: 4199 case LibFunc_roundf: 4200 case LibFunc_roundl: 4201 return Intrinsic::round; 4202 case LibFunc_roundeven: 4203 case LibFunc_roundevenf: 4204 case LibFunc_roundevenl: 4205 return Intrinsic::roundeven; 4206 case LibFunc_pow: 4207 case LibFunc_powf: 4208 case LibFunc_powl: 4209 return Intrinsic::pow; 4210 case LibFunc_sqrt: 4211 case LibFunc_sqrtf: 4212 case LibFunc_sqrtl: 4213 return Intrinsic::sqrt; 4214 } 4215 4216 return Intrinsic::not_intrinsic; 4217 } 4218 4219 /// Return true if it's possible to assume IEEE treatment of input denormals in 4220 /// \p F for \p Val. 4221 static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) { 4222 Ty = Ty->getScalarType(); 4223 return F.getDenormalMode(Ty->getFltSemantics()).Input == DenormalMode::IEEE; 4224 } 4225 4226 static bool inputDenormalIsIEEEOrPosZero(const Function &F, const Type *Ty) { 4227 Ty = Ty->getScalarType(); 4228 DenormalMode Mode = F.getDenormalMode(Ty->getFltSemantics()); 4229 return Mode.Input == DenormalMode::IEEE || 4230 Mode.Input == DenormalMode::PositiveZero; 4231 } 4232 4233 static bool outputDenormalIsIEEEOrPosZero(const Function &F, const Type *Ty) { 4234 Ty = Ty->getScalarType(); 4235 DenormalMode Mode = F.getDenormalMode(Ty->getFltSemantics()); 4236 return Mode.Output == DenormalMode::IEEE || 4237 Mode.Output == DenormalMode::PositiveZero; 4238 } 4239 4240 bool KnownFPClass::isKnownNeverLogicalZero(const Function &F, Type *Ty) const { 4241 return isKnownNeverZero() && 4242 (isKnownNeverSubnormal() || inputDenormalIsIEEE(F, Ty)); 4243 } 4244 4245 bool KnownFPClass::isKnownNeverLogicalNegZero(const Function &F, 4246 Type *Ty) const { 4247 return isKnownNeverNegZero() && 4248 (isKnownNeverNegSubnormal() || inputDenormalIsIEEEOrPosZero(F, Ty)); 4249 } 4250 4251 bool KnownFPClass::isKnownNeverLogicalPosZero(const Function &F, 4252 Type *Ty) const { 4253 if (!isKnownNeverPosZero()) 4254 return false; 4255 4256 // If we know there are no denormals, nothing can be flushed to zero. 4257 if (isKnownNeverSubnormal()) 4258 return true; 4259 4260 DenormalMode Mode = F.getDenormalMode(Ty->getScalarType()->getFltSemantics()); 4261 switch (Mode.Input) { 4262 case DenormalMode::IEEE: 4263 return true; 4264 case DenormalMode::PreserveSign: 4265 // Negative subnormal won't flush to +0 4266 return isKnownNeverPosSubnormal(); 4267 case DenormalMode::PositiveZero: 4268 default: 4269 // Both positive and negative subnormal could flush to +0 4270 return false; 4271 } 4272 4273 llvm_unreachable("covered switch over denormal mode"); 4274 } 4275 4276 void KnownFPClass::propagateDenormal(const KnownFPClass &Src, const Function &F, 4277 Type *Ty) { 4278 KnownFPClasses = Src.KnownFPClasses; 4279 // If we aren't assuming the source can't be a zero, we don't have to check if 4280 // a denormal input could be flushed. 4281 if (!Src.isKnownNeverPosZero() && !Src.isKnownNeverNegZero()) 4282 return; 4283 4284 // If we know the input can't be a denormal, it can't be flushed to 0. 4285 if (Src.isKnownNeverSubnormal()) 4286 return; 4287 4288 DenormalMode Mode = F.getDenormalMode(Ty->getScalarType()->getFltSemantics()); 4289 4290 if (!Src.isKnownNeverPosSubnormal() && Mode != DenormalMode::getIEEE()) 4291 KnownFPClasses |= fcPosZero; 4292 4293 if (!Src.isKnownNeverNegSubnormal() && Mode != DenormalMode::getIEEE()) { 4294 if (Mode != DenormalMode::getPositiveZero()) 4295 KnownFPClasses |= fcNegZero; 4296 4297 if (Mode.Input == DenormalMode::PositiveZero || 4298 Mode.Output == DenormalMode::PositiveZero || 4299 Mode.Input == DenormalMode::Dynamic || 4300 Mode.Output == DenormalMode::Dynamic) 4301 KnownFPClasses |= fcPosZero; 4302 } 4303 } 4304 4305 void KnownFPClass::propagateCanonicalizingSrc(const KnownFPClass &Src, 4306 const Function &F, Type *Ty) { 4307 propagateDenormal(Src, F, Ty); 4308 propagateNaN(Src, /*PreserveSign=*/true); 4309 } 4310 4311 /// Given an exploded icmp instruction, return true if the comparison only 4312 /// checks the sign bit. If it only checks the sign bit, set TrueIfSigned if 4313 /// the result of the comparison is true when the input value is signed. 4314 bool llvm::isSignBitCheck(ICmpInst::Predicate Pred, const APInt &RHS, 4315 bool &TrueIfSigned) { 4316 switch (Pred) { 4317 case ICmpInst::ICMP_SLT: // True if LHS s< 0 4318 TrueIfSigned = true; 4319 return RHS.isZero(); 4320 case ICmpInst::ICMP_SLE: // True if LHS s<= -1 4321 TrueIfSigned = true; 4322 return RHS.isAllOnes(); 4323 case ICmpInst::ICMP_SGT: // True if LHS s> -1 4324 TrueIfSigned = false; 4325 return RHS.isAllOnes(); 4326 case ICmpInst::ICMP_SGE: // True if LHS s>= 0 4327 TrueIfSigned = false; 4328 return RHS.isZero(); 4329 case ICmpInst::ICMP_UGT: 4330 // True if LHS u> RHS and RHS == sign-bit-mask - 1 4331 TrueIfSigned = true; 4332 return RHS.isMaxSignedValue(); 4333 case ICmpInst::ICMP_UGE: 4334 // True if LHS u>= RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc) 4335 TrueIfSigned = true; 4336 return RHS.isMinSignedValue(); 4337 case ICmpInst::ICMP_ULT: 4338 // True if LHS u< RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc) 4339 TrueIfSigned = false; 4340 return RHS.isMinSignedValue(); 4341 case ICmpInst::ICMP_ULE: 4342 // True if LHS u<= RHS and RHS == sign-bit-mask - 1 4343 TrueIfSigned = false; 4344 return RHS.isMaxSignedValue(); 4345 default: 4346 return false; 4347 } 4348 } 4349 4350 /// Returns a pair of values, which if passed to llvm.is.fpclass, returns the 4351 /// same result as an fcmp with the given operands. 4352 std::pair<Value *, FPClassTest> llvm::fcmpToClassTest(FCmpInst::Predicate Pred, 4353 const Function &F, 4354 Value *LHS, Value *RHS, 4355 bool LookThroughSrc) { 4356 const APFloat *ConstRHS; 4357 if (!match(RHS, m_APFloatAllowPoison(ConstRHS))) 4358 return {nullptr, fcAllFlags}; 4359 4360 return fcmpToClassTest(Pred, F, LHS, ConstRHS, LookThroughSrc); 4361 } 4362 4363 std::pair<Value *, FPClassTest> 4364 llvm::fcmpToClassTest(FCmpInst::Predicate Pred, const Function &F, Value *LHS, 4365 const APFloat *ConstRHS, bool LookThroughSrc) { 4366 4367 auto [Src, ClassIfTrue, ClassIfFalse] = 4368 fcmpImpliesClass(Pred, F, LHS, *ConstRHS, LookThroughSrc); 4369 if (Src && ClassIfTrue == ~ClassIfFalse) 4370 return {Src, ClassIfTrue}; 4371 return {nullptr, fcAllFlags}; 4372 } 4373 4374 /// Return the return value for fcmpImpliesClass for a compare that produces an 4375 /// exact class test. 4376 static std::tuple<Value *, FPClassTest, FPClassTest> exactClass(Value *V, 4377 FPClassTest M) { 4378 return {V, M, ~M}; 4379 } 4380 4381 std::tuple<Value *, FPClassTest, FPClassTest> 4382 llvm::fcmpImpliesClass(CmpInst::Predicate Pred, const Function &F, Value *LHS, 4383 FPClassTest RHSClass, bool LookThroughSrc) { 4384 assert(RHSClass != fcNone); 4385 Value *Src = LHS; 4386 4387 if (Pred == FCmpInst::FCMP_TRUE) 4388 return exactClass(Src, fcAllFlags); 4389 4390 if (Pred == FCmpInst::FCMP_FALSE) 4391 return exactClass(Src, fcNone); 4392 4393 const FPClassTest OrigClass = RHSClass; 4394 4395 const bool IsNegativeRHS = (RHSClass & fcNegative) == RHSClass; 4396 const bool IsPositiveRHS = (RHSClass & fcPositive) == RHSClass; 4397 const bool IsNaN = (RHSClass & ~fcNan) == fcNone; 4398 4399 if (IsNaN) { 4400 // fcmp o__ x, nan -> false 4401 // fcmp u__ x, nan -> true 4402 return exactClass(Src, CmpInst::isOrdered(Pred) ? fcNone : fcAllFlags); 4403 } 4404 4405 // fcmp ord x, zero|normal|subnormal|inf -> ~fcNan 4406 if (Pred == FCmpInst::FCMP_ORD) 4407 return exactClass(Src, ~fcNan); 4408 4409 // fcmp uno x, zero|normal|subnormal|inf -> fcNan 4410 if (Pred == FCmpInst::FCMP_UNO) 4411 return exactClass(Src, fcNan); 4412 4413 const bool IsFabs = LookThroughSrc && match(LHS, m_FAbs(m_Value(Src))); 4414 if (IsFabs) 4415 RHSClass = llvm::inverse_fabs(RHSClass); 4416 4417 const bool IsZero = (OrigClass & fcZero) == OrigClass; 4418 if (IsZero) { 4419 assert(Pred != FCmpInst::FCMP_ORD && Pred != FCmpInst::FCMP_UNO); 4420 // Compares with fcNone are only exactly equal to fcZero if input denormals 4421 // are not flushed. 4422 // TODO: Handle DAZ by expanding masks to cover subnormal cases. 4423 if (!inputDenormalIsIEEE(F, LHS->getType())) 4424 return {nullptr, fcAllFlags, fcAllFlags}; 4425 4426 switch (Pred) { 4427 case FCmpInst::FCMP_OEQ: // Match x == 0.0 4428 return exactClass(Src, fcZero); 4429 case FCmpInst::FCMP_UEQ: // Match isnan(x) || (x == 0.0) 4430 return exactClass(Src, fcZero | fcNan); 4431 case FCmpInst::FCMP_UNE: // Match (x != 0.0) 4432 return exactClass(Src, ~fcZero); 4433 case FCmpInst::FCMP_ONE: // Match !isnan(x) && x != 0.0 4434 return exactClass(Src, ~fcNan & ~fcZero); 4435 case FCmpInst::FCMP_ORD: 4436 // Canonical form of ord/uno is with a zero. We could also handle 4437 // non-canonical other non-NaN constants or LHS == RHS. 4438 return exactClass(Src, ~fcNan); 4439 case FCmpInst::FCMP_UNO: 4440 return exactClass(Src, fcNan); 4441 case FCmpInst::FCMP_OGT: // x > 0 4442 return exactClass(Src, fcPosSubnormal | fcPosNormal | fcPosInf); 4443 case FCmpInst::FCMP_UGT: // isnan(x) || x > 0 4444 return exactClass(Src, fcPosSubnormal | fcPosNormal | fcPosInf | fcNan); 4445 case FCmpInst::FCMP_OGE: // x >= 0 4446 return exactClass(Src, fcPositive | fcNegZero); 4447 case FCmpInst::FCMP_UGE: // isnan(x) || x >= 0 4448 return exactClass(Src, fcPositive | fcNegZero | fcNan); 4449 case FCmpInst::FCMP_OLT: // x < 0 4450 return exactClass(Src, fcNegSubnormal | fcNegNormal | fcNegInf); 4451 case FCmpInst::FCMP_ULT: // isnan(x) || x < 0 4452 return exactClass(Src, fcNegSubnormal | fcNegNormal | fcNegInf | fcNan); 4453 case FCmpInst::FCMP_OLE: // x <= 0 4454 return exactClass(Src, fcNegative | fcPosZero); 4455 case FCmpInst::FCMP_ULE: // isnan(x) || x <= 0 4456 return exactClass(Src, fcNegative | fcPosZero | fcNan); 4457 default: 4458 llvm_unreachable("all compare types are handled"); 4459 } 4460 4461 return {nullptr, fcAllFlags, fcAllFlags}; 4462 } 4463 4464 const bool IsDenormalRHS = (OrigClass & fcSubnormal) == OrigClass; 4465 4466 const bool IsInf = (OrigClass & fcInf) == OrigClass; 4467 if (IsInf) { 4468 FPClassTest Mask = fcAllFlags; 4469 4470 switch (Pred) { 4471 case FCmpInst::FCMP_OEQ: 4472 case FCmpInst::FCMP_UNE: { 4473 // Match __builtin_isinf patterns 4474 // 4475 // fcmp oeq x, +inf -> is_fpclass x, fcPosInf 4476 // fcmp oeq fabs(x), +inf -> is_fpclass x, fcInf 4477 // fcmp oeq x, -inf -> is_fpclass x, fcNegInf 4478 // fcmp oeq fabs(x), -inf -> is_fpclass x, 0 -> false 4479 // 4480 // fcmp une x, +inf -> is_fpclass x, ~fcPosInf 4481 // fcmp une fabs(x), +inf -> is_fpclass x, ~fcInf 4482 // fcmp une x, -inf -> is_fpclass x, ~fcNegInf 4483 // fcmp une fabs(x), -inf -> is_fpclass x, fcAllFlags -> true 4484 if (IsNegativeRHS) { 4485 Mask = fcNegInf; 4486 if (IsFabs) 4487 Mask = fcNone; 4488 } else { 4489 Mask = fcPosInf; 4490 if (IsFabs) 4491 Mask |= fcNegInf; 4492 } 4493 break; 4494 } 4495 case FCmpInst::FCMP_ONE: 4496 case FCmpInst::FCMP_UEQ: { 4497 // Match __builtin_isinf patterns 4498 // fcmp one x, -inf -> is_fpclass x, fcNegInf 4499 // fcmp one fabs(x), -inf -> is_fpclass x, ~fcNegInf & ~fcNan 4500 // fcmp one x, +inf -> is_fpclass x, ~fcNegInf & ~fcNan 4501 // fcmp one fabs(x), +inf -> is_fpclass x, ~fcInf & fcNan 4502 // 4503 // fcmp ueq x, +inf -> is_fpclass x, fcPosInf|fcNan 4504 // fcmp ueq (fabs x), +inf -> is_fpclass x, fcInf|fcNan 4505 // fcmp ueq x, -inf -> is_fpclass x, fcNegInf|fcNan 4506 // fcmp ueq fabs(x), -inf -> is_fpclass x, fcNan 4507 if (IsNegativeRHS) { 4508 Mask = ~fcNegInf & ~fcNan; 4509 if (IsFabs) 4510 Mask = ~fcNan; 4511 } else { 4512 Mask = ~fcPosInf & ~fcNan; 4513 if (IsFabs) 4514 Mask &= ~fcNegInf; 4515 } 4516 4517 break; 4518 } 4519 case FCmpInst::FCMP_OLT: 4520 case FCmpInst::FCMP_UGE: { 4521 if (IsNegativeRHS) { 4522 // No value is ordered and less than negative infinity. 4523 // All values are unordered with or at least negative infinity. 4524 // fcmp olt x, -inf -> false 4525 // fcmp uge x, -inf -> true 4526 Mask = fcNone; 4527 break; 4528 } 4529 4530 // fcmp olt fabs(x), +inf -> fcFinite 4531 // fcmp uge fabs(x), +inf -> ~fcFinite 4532 // fcmp olt x, +inf -> fcFinite|fcNegInf 4533 // fcmp uge x, +inf -> ~(fcFinite|fcNegInf) 4534 Mask = fcFinite; 4535 if (!IsFabs) 4536 Mask |= fcNegInf; 4537 break; 4538 } 4539 case FCmpInst::FCMP_OGE: 4540 case FCmpInst::FCMP_ULT: { 4541 if (IsNegativeRHS) { 4542 // fcmp oge x, -inf -> ~fcNan 4543 // fcmp oge fabs(x), -inf -> ~fcNan 4544 // fcmp ult x, -inf -> fcNan 4545 // fcmp ult fabs(x), -inf -> fcNan 4546 Mask = ~fcNan; 4547 break; 4548 } 4549 4550 // fcmp oge fabs(x), +inf -> fcInf 4551 // fcmp oge x, +inf -> fcPosInf 4552 // fcmp ult fabs(x), +inf -> ~fcInf 4553 // fcmp ult x, +inf -> ~fcPosInf 4554 Mask = fcPosInf; 4555 if (IsFabs) 4556 Mask |= fcNegInf; 4557 break; 4558 } 4559 case FCmpInst::FCMP_OGT: 4560 case FCmpInst::FCMP_ULE: { 4561 if (IsNegativeRHS) { 4562 // fcmp ogt x, -inf -> fcmp one x, -inf 4563 // fcmp ogt fabs(x), -inf -> fcmp ord x, x 4564 // fcmp ule x, -inf -> fcmp ueq x, -inf 4565 // fcmp ule fabs(x), -inf -> fcmp uno x, x 4566 Mask = IsFabs ? ~fcNan : ~(fcNegInf | fcNan); 4567 break; 4568 } 4569 4570 // No value is ordered and greater than infinity. 4571 Mask = fcNone; 4572 break; 4573 } 4574 case FCmpInst::FCMP_OLE: 4575 case FCmpInst::FCMP_UGT: { 4576 if (IsNegativeRHS) { 4577 Mask = IsFabs ? fcNone : fcNegInf; 4578 break; 4579 } 4580 4581 // fcmp ole x, +inf -> fcmp ord x, x 4582 // fcmp ole fabs(x), +inf -> fcmp ord x, x 4583 // fcmp ole x, -inf -> fcmp oeq x, -inf 4584 // fcmp ole fabs(x), -inf -> false 4585 Mask = ~fcNan; 4586 break; 4587 } 4588 default: 4589 llvm_unreachable("all compare types are handled"); 4590 } 4591 4592 // Invert the comparison for the unordered cases. 4593 if (FCmpInst::isUnordered(Pred)) 4594 Mask = ~Mask; 4595 4596 return exactClass(Src, Mask); 4597 } 4598 4599 if (Pred == FCmpInst::FCMP_OEQ) 4600 return {Src, RHSClass, fcAllFlags}; 4601 4602 if (Pred == FCmpInst::FCMP_UEQ) { 4603 FPClassTest Class = RHSClass | fcNan; 4604 return {Src, Class, ~fcNan}; 4605 } 4606 4607 if (Pred == FCmpInst::FCMP_ONE) 4608 return {Src, ~fcNan, RHSClass | fcNan}; 4609 4610 if (Pred == FCmpInst::FCMP_UNE) 4611 return {Src, fcAllFlags, RHSClass}; 4612 4613 assert((RHSClass == fcNone || RHSClass == fcPosNormal || 4614 RHSClass == fcNegNormal || RHSClass == fcNormal || 4615 RHSClass == fcPosSubnormal || RHSClass == fcNegSubnormal || 4616 RHSClass == fcSubnormal) && 4617 "should have been recognized as an exact class test"); 4618 4619 if (IsNegativeRHS) { 4620 // TODO: Handle fneg(fabs) 4621 if (IsFabs) { 4622 // fabs(x) o> -k -> fcmp ord x, x 4623 // fabs(x) u> -k -> true 4624 // fabs(x) o< -k -> false 4625 // fabs(x) u< -k -> fcmp uno x, x 4626 switch (Pred) { 4627 case FCmpInst::FCMP_OGT: 4628 case FCmpInst::FCMP_OGE: 4629 return {Src, ~fcNan, fcNan}; 4630 case FCmpInst::FCMP_UGT: 4631 case FCmpInst::FCMP_UGE: 4632 return {Src, fcAllFlags, fcNone}; 4633 case FCmpInst::FCMP_OLT: 4634 case FCmpInst::FCMP_OLE: 4635 return {Src, fcNone, fcAllFlags}; 4636 case FCmpInst::FCMP_ULT: 4637 case FCmpInst::FCMP_ULE: 4638 return {Src, fcNan, ~fcNan}; 4639 default: 4640 break; 4641 } 4642 4643 return {nullptr, fcAllFlags, fcAllFlags}; 4644 } 4645 4646 FPClassTest ClassesLE = fcNegInf | fcNegNormal; 4647 FPClassTest ClassesGE = fcPositive | fcNegZero | fcNegSubnormal; 4648 4649 if (IsDenormalRHS) 4650 ClassesLE |= fcNegSubnormal; 4651 else 4652 ClassesGE |= fcNegNormal; 4653 4654 switch (Pred) { 4655 case FCmpInst::FCMP_OGT: 4656 case FCmpInst::FCMP_OGE: 4657 return {Src, ClassesGE, ~ClassesGE | RHSClass}; 4658 case FCmpInst::FCMP_UGT: 4659 case FCmpInst::FCMP_UGE: 4660 return {Src, ClassesGE | fcNan, ~(ClassesGE | fcNan) | RHSClass}; 4661 case FCmpInst::FCMP_OLT: 4662 case FCmpInst::FCMP_OLE: 4663 return {Src, ClassesLE, ~ClassesLE | RHSClass}; 4664 case FCmpInst::FCMP_ULT: 4665 case FCmpInst::FCMP_ULE: 4666 return {Src, ClassesLE | fcNan, ~(ClassesLE | fcNan) | RHSClass}; 4667 default: 4668 break; 4669 } 4670 } else if (IsPositiveRHS) { 4671 FPClassTest ClassesGE = fcPosNormal | fcPosInf; 4672 FPClassTest ClassesLE = fcNegative | fcPosZero | fcPosSubnormal; 4673 if (IsDenormalRHS) 4674 ClassesGE |= fcPosSubnormal; 4675 else 4676 ClassesLE |= fcPosNormal; 4677 4678 if (IsFabs) { 4679 ClassesGE = llvm::inverse_fabs(ClassesGE); 4680 ClassesLE = llvm::inverse_fabs(ClassesLE); 4681 } 4682 4683 switch (Pred) { 4684 case FCmpInst::FCMP_OGT: 4685 case FCmpInst::FCMP_OGE: 4686 return {Src, ClassesGE, ~ClassesGE | RHSClass}; 4687 case FCmpInst::FCMP_UGT: 4688 case FCmpInst::FCMP_UGE: 4689 return {Src, ClassesGE | fcNan, ~(ClassesGE | fcNan) | RHSClass}; 4690 case FCmpInst::FCMP_OLT: 4691 case FCmpInst::FCMP_OLE: 4692 return {Src, ClassesLE, ~ClassesLE | RHSClass}; 4693 case FCmpInst::FCMP_ULT: 4694 case FCmpInst::FCMP_ULE: 4695 return {Src, ClassesLE | fcNan, ~(ClassesLE | fcNan) | RHSClass}; 4696 default: 4697 break; 4698 } 4699 } 4700 4701 return {nullptr, fcAllFlags, fcAllFlags}; 4702 } 4703 4704 std::tuple<Value *, FPClassTest, FPClassTest> 4705 llvm::fcmpImpliesClass(CmpInst::Predicate Pred, const Function &F, Value *LHS, 4706 const APFloat &ConstRHS, bool LookThroughSrc) { 4707 // We can refine checks against smallest normal / largest denormal to an 4708 // exact class test. 4709 if (!ConstRHS.isNegative() && ConstRHS.isSmallestNormalized()) { 4710 Value *Src = LHS; 4711 const bool IsFabs = LookThroughSrc && match(LHS, m_FAbs(m_Value(Src))); 4712 4713 FPClassTest Mask; 4714 // Match pattern that's used in __builtin_isnormal. 4715 switch (Pred) { 4716 case FCmpInst::FCMP_OLT: 4717 case FCmpInst::FCMP_UGE: { 4718 // fcmp olt x, smallest_normal -> fcNegInf|fcNegNormal|fcSubnormal|fcZero 4719 // fcmp olt fabs(x), smallest_normal -> fcSubnormal|fcZero 4720 // fcmp uge x, smallest_normal -> fcNan|fcPosNormal|fcPosInf 4721 // fcmp uge fabs(x), smallest_normal -> ~(fcSubnormal|fcZero) 4722 Mask = fcZero | fcSubnormal; 4723 if (!IsFabs) 4724 Mask |= fcNegNormal | fcNegInf; 4725 4726 break; 4727 } 4728 case FCmpInst::FCMP_OGE: 4729 case FCmpInst::FCMP_ULT: { 4730 // fcmp oge x, smallest_normal -> fcPosNormal | fcPosInf 4731 // fcmp oge fabs(x), smallest_normal -> fcInf | fcNormal 4732 // fcmp ult x, smallest_normal -> ~(fcPosNormal | fcPosInf) 4733 // fcmp ult fabs(x), smallest_normal -> ~(fcInf | fcNormal) 4734 Mask = fcPosInf | fcPosNormal; 4735 if (IsFabs) 4736 Mask |= fcNegInf | fcNegNormal; 4737 break; 4738 } 4739 default: 4740 return fcmpImpliesClass(Pred, F, LHS, ConstRHS.classify(), 4741 LookThroughSrc); 4742 } 4743 4744 // Invert the comparison for the unordered cases. 4745 if (FCmpInst::isUnordered(Pred)) 4746 Mask = ~Mask; 4747 4748 return exactClass(Src, Mask); 4749 } 4750 4751 return fcmpImpliesClass(Pred, F, LHS, ConstRHS.classify(), LookThroughSrc); 4752 } 4753 4754 std::tuple<Value *, FPClassTest, FPClassTest> 4755 llvm::fcmpImpliesClass(CmpInst::Predicate Pred, const Function &F, Value *LHS, 4756 Value *RHS, bool LookThroughSrc) { 4757 const APFloat *ConstRHS; 4758 if (!match(RHS, m_APFloatAllowPoison(ConstRHS))) 4759 return {nullptr, fcAllFlags, fcAllFlags}; 4760 4761 // TODO: Just call computeKnownFPClass for RHS to handle non-constants. 4762 return fcmpImpliesClass(Pred, F, LHS, *ConstRHS, LookThroughSrc); 4763 } 4764 4765 static void computeKnownFPClassFromCond(const Value *V, Value *Cond, 4766 bool CondIsTrue, 4767 const Instruction *CxtI, 4768 KnownFPClass &KnownFromContext) { 4769 CmpInst::Predicate Pred; 4770 Value *LHS; 4771 uint64_t ClassVal = 0; 4772 const APFloat *CRHS; 4773 const APInt *RHS; 4774 if (match(Cond, m_FCmp(Pred, m_Value(LHS), m_APFloat(CRHS)))) { 4775 auto [CmpVal, MaskIfTrue, MaskIfFalse] = fcmpImpliesClass( 4776 Pred, *CxtI->getParent()->getParent(), LHS, *CRHS, LHS != V); 4777 if (CmpVal == V) 4778 KnownFromContext.knownNot(~(CondIsTrue ? MaskIfTrue : MaskIfFalse)); 4779 } else if (match(Cond, m_Intrinsic<Intrinsic::is_fpclass>( 4780 m_Value(LHS), m_ConstantInt(ClassVal)))) { 4781 FPClassTest Mask = static_cast<FPClassTest>(ClassVal); 4782 KnownFromContext.knownNot(CondIsTrue ? ~Mask : Mask); 4783 } else if (match(Cond, m_ICmp(Pred, m_ElementWiseBitCast(m_Value(LHS)), 4784 m_APInt(RHS)))) { 4785 bool TrueIfSigned; 4786 if (!isSignBitCheck(Pred, *RHS, TrueIfSigned)) 4787 return; 4788 if (TrueIfSigned == CondIsTrue) 4789 KnownFromContext.signBitMustBeOne(); 4790 else 4791 KnownFromContext.signBitMustBeZero(); 4792 } 4793 } 4794 4795 static KnownFPClass computeKnownFPClassFromContext(const Value *V, 4796 const SimplifyQuery &Q) { 4797 KnownFPClass KnownFromContext; 4798 4799 if (!Q.CxtI) 4800 return KnownFromContext; 4801 4802 if (Q.DC && Q.DT) { 4803 // Handle dominating conditions. 4804 for (BranchInst *BI : Q.DC->conditionsFor(V)) { 4805 Value *Cond = BI->getCondition(); 4806 4807 BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(0)); 4808 if (Q.DT->dominates(Edge0, Q.CxtI->getParent())) 4809 computeKnownFPClassFromCond(V, Cond, /*CondIsTrue=*/true, Q.CxtI, 4810 KnownFromContext); 4811 4812 BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(1)); 4813 if (Q.DT->dominates(Edge1, Q.CxtI->getParent())) 4814 computeKnownFPClassFromCond(V, Cond, /*CondIsTrue=*/false, Q.CxtI, 4815 KnownFromContext); 4816 } 4817 } 4818 4819 if (!Q.AC) 4820 return KnownFromContext; 4821 4822 // Try to restrict the floating-point classes based on information from 4823 // assumptions. 4824 for (auto &AssumeVH : Q.AC->assumptionsFor(V)) { 4825 if (!AssumeVH) 4826 continue; 4827 CallInst *I = cast<CallInst>(AssumeVH); 4828 4829 assert(I->getFunction() == Q.CxtI->getParent()->getParent() && 4830 "Got assumption for the wrong function!"); 4831 assert(I->getIntrinsicID() == Intrinsic::assume && 4832 "must be an assume intrinsic"); 4833 4834 if (!isValidAssumeForContext(I, Q.CxtI, Q.DT)) 4835 continue; 4836 4837 computeKnownFPClassFromCond(V, I->getArgOperand(0), /*CondIsTrue=*/true, 4838 Q.CxtI, KnownFromContext); 4839 } 4840 4841 return KnownFromContext; 4842 } 4843 4844 void computeKnownFPClass(const Value *V, const APInt &DemandedElts, 4845 FPClassTest InterestedClasses, KnownFPClass &Known, 4846 unsigned Depth, const SimplifyQuery &Q); 4847 4848 static void computeKnownFPClass(const Value *V, KnownFPClass &Known, 4849 FPClassTest InterestedClasses, unsigned Depth, 4850 const SimplifyQuery &Q) { 4851 auto *FVTy = dyn_cast<FixedVectorType>(V->getType()); 4852 APInt DemandedElts = 4853 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1); 4854 computeKnownFPClass(V, DemandedElts, InterestedClasses, Known, Depth, Q); 4855 } 4856 4857 static void computeKnownFPClassForFPTrunc(const Operator *Op, 4858 const APInt &DemandedElts, 4859 FPClassTest InterestedClasses, 4860 KnownFPClass &Known, unsigned Depth, 4861 const SimplifyQuery &Q) { 4862 if ((InterestedClasses & 4863 (KnownFPClass::OrderedLessThanZeroMask | fcNan)) == fcNone) 4864 return; 4865 4866 KnownFPClass KnownSrc; 4867 computeKnownFPClass(Op->getOperand(0), DemandedElts, InterestedClasses, 4868 KnownSrc, Depth + 1, Q); 4869 4870 // Sign should be preserved 4871 // TODO: Handle cannot be ordered greater than zero 4872 if (KnownSrc.cannotBeOrderedLessThanZero()) 4873 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); 4874 4875 Known.propagateNaN(KnownSrc, true); 4876 4877 // Infinity needs a range check. 4878 } 4879 4880 void computeKnownFPClass(const Value *V, const APInt &DemandedElts, 4881 FPClassTest InterestedClasses, KnownFPClass &Known, 4882 unsigned Depth, const SimplifyQuery &Q) { 4883 assert(Known.isUnknown() && "should not be called with known information"); 4884 4885 if (!DemandedElts) { 4886 // No demanded elts, better to assume we don't know anything. 4887 Known.resetAll(); 4888 return; 4889 } 4890 4891 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); 4892 4893 if (auto *CFP = dyn_cast<ConstantFP>(V)) { 4894 Known.KnownFPClasses = CFP->getValueAPF().classify(); 4895 Known.SignBit = CFP->isNegative(); 4896 return; 4897 } 4898 4899 if (isa<ConstantAggregateZero>(V)) { 4900 Known.KnownFPClasses = fcPosZero; 4901 Known.SignBit = false; 4902 return; 4903 } 4904 4905 if (isa<PoisonValue>(V)) { 4906 Known.KnownFPClasses = fcNone; 4907 Known.SignBit = false; 4908 return; 4909 } 4910 4911 // Try to handle fixed width vector constants 4912 auto *VFVTy = dyn_cast<FixedVectorType>(V->getType()); 4913 const Constant *CV = dyn_cast<Constant>(V); 4914 if (VFVTy && CV) { 4915 Known.KnownFPClasses = fcNone; 4916 bool SignBitAllZero = true; 4917 bool SignBitAllOne = true; 4918 4919 // For vectors, verify that each element is not NaN. 4920 unsigned NumElts = VFVTy->getNumElements(); 4921 for (unsigned i = 0; i != NumElts; ++i) { 4922 if (!DemandedElts[i]) 4923 continue; 4924 4925 Constant *Elt = CV->getAggregateElement(i); 4926 if (!Elt) { 4927 Known = KnownFPClass(); 4928 return; 4929 } 4930 if (isa<PoisonValue>(Elt)) 4931 continue; 4932 auto *CElt = dyn_cast<ConstantFP>(Elt); 4933 if (!CElt) { 4934 Known = KnownFPClass(); 4935 return; 4936 } 4937 4938 const APFloat &C = CElt->getValueAPF(); 4939 Known.KnownFPClasses |= C.classify(); 4940 if (C.isNegative()) 4941 SignBitAllZero = false; 4942 else 4943 SignBitAllOne = false; 4944 } 4945 if (SignBitAllOne != SignBitAllZero) 4946 Known.SignBit = SignBitAllOne; 4947 return; 4948 } 4949 4950 FPClassTest KnownNotFromFlags = fcNone; 4951 if (const auto *CB = dyn_cast<CallBase>(V)) 4952 KnownNotFromFlags |= CB->getRetNoFPClass(); 4953 else if (const auto *Arg = dyn_cast<Argument>(V)) 4954 KnownNotFromFlags |= Arg->getNoFPClass(); 4955 4956 const Operator *Op = dyn_cast<Operator>(V); 4957 if (const FPMathOperator *FPOp = dyn_cast_or_null<FPMathOperator>(Op)) { 4958 if (FPOp->hasNoNaNs()) 4959 KnownNotFromFlags |= fcNan; 4960 if (FPOp->hasNoInfs()) 4961 KnownNotFromFlags |= fcInf; 4962 } 4963 4964 KnownFPClass AssumedClasses = computeKnownFPClassFromContext(V, Q); 4965 KnownNotFromFlags |= ~AssumedClasses.KnownFPClasses; 4966 4967 // We no longer need to find out about these bits from inputs if we can 4968 // assume this from flags/attributes. 4969 InterestedClasses &= ~KnownNotFromFlags; 4970 4971 auto ClearClassesFromFlags = make_scope_exit([=, &Known] { 4972 Known.knownNot(KnownNotFromFlags); 4973 if (!Known.SignBit && AssumedClasses.SignBit) { 4974 if (*AssumedClasses.SignBit) 4975 Known.signBitMustBeOne(); 4976 else 4977 Known.signBitMustBeZero(); 4978 } 4979 }); 4980 4981 if (!Op) 4982 return; 4983 4984 // All recursive calls that increase depth must come after this. 4985 if (Depth == MaxAnalysisRecursionDepth) 4986 return; 4987 4988 const unsigned Opc = Op->getOpcode(); 4989 switch (Opc) { 4990 case Instruction::FNeg: { 4991 computeKnownFPClass(Op->getOperand(0), DemandedElts, InterestedClasses, 4992 Known, Depth + 1, Q); 4993 Known.fneg(); 4994 break; 4995 } 4996 case Instruction::Select: { 4997 Value *Cond = Op->getOperand(0); 4998 Value *LHS = Op->getOperand(1); 4999 Value *RHS = Op->getOperand(2); 5000 5001 FPClassTest FilterLHS = fcAllFlags; 5002 FPClassTest FilterRHS = fcAllFlags; 5003 5004 Value *TestedValue = nullptr; 5005 FPClassTest MaskIfTrue = fcAllFlags; 5006 FPClassTest MaskIfFalse = fcAllFlags; 5007 uint64_t ClassVal = 0; 5008 const Function *F = cast<Instruction>(Op)->getFunction(); 5009 CmpInst::Predicate Pred; 5010 Value *CmpLHS, *CmpRHS; 5011 if (F && match(Cond, m_FCmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS)))) { 5012 // If the select filters out a value based on the class, it no longer 5013 // participates in the class of the result 5014 5015 // TODO: In some degenerate cases we can infer something if we try again 5016 // without looking through sign operations. 5017 bool LookThroughFAbsFNeg = CmpLHS != LHS && CmpLHS != RHS; 5018 std::tie(TestedValue, MaskIfTrue, MaskIfFalse) = 5019 fcmpImpliesClass(Pred, *F, CmpLHS, CmpRHS, LookThroughFAbsFNeg); 5020 } else if (match(Cond, 5021 m_Intrinsic<Intrinsic::is_fpclass>( 5022 m_Value(TestedValue), m_ConstantInt(ClassVal)))) { 5023 FPClassTest TestedMask = static_cast<FPClassTest>(ClassVal); 5024 MaskIfTrue = TestedMask; 5025 MaskIfFalse = ~TestedMask; 5026 } 5027 5028 if (TestedValue == LHS) { 5029 // match !isnan(x) ? x : y 5030 FilterLHS = MaskIfTrue; 5031 } else if (TestedValue == RHS) { // && IsExactClass 5032 // match !isnan(x) ? y : x 5033 FilterRHS = MaskIfFalse; 5034 } 5035 5036 KnownFPClass Known2; 5037 computeKnownFPClass(LHS, DemandedElts, InterestedClasses & FilterLHS, Known, 5038 Depth + 1, Q); 5039 Known.KnownFPClasses &= FilterLHS; 5040 5041 computeKnownFPClass(RHS, DemandedElts, InterestedClasses & FilterRHS, 5042 Known2, Depth + 1, Q); 5043 Known2.KnownFPClasses &= FilterRHS; 5044 5045 Known |= Known2; 5046 break; 5047 } 5048 case Instruction::Call: { 5049 const CallInst *II = cast<CallInst>(Op); 5050 const Intrinsic::ID IID = II->getIntrinsicID(); 5051 switch (IID) { 5052 case Intrinsic::fabs: { 5053 if ((InterestedClasses & (fcNan | fcPositive)) != fcNone) { 5054 // If we only care about the sign bit we don't need to inspect the 5055 // operand. 5056 computeKnownFPClass(II->getArgOperand(0), DemandedElts, 5057 InterestedClasses, Known, Depth + 1, Q); 5058 } 5059 5060 Known.fabs(); 5061 break; 5062 } 5063 case Intrinsic::copysign: { 5064 KnownFPClass KnownSign; 5065 5066 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, 5067 Known, Depth + 1, Q); 5068 computeKnownFPClass(II->getArgOperand(1), DemandedElts, InterestedClasses, 5069 KnownSign, Depth + 1, Q); 5070 Known.copysign(KnownSign); 5071 break; 5072 } 5073 case Intrinsic::fma: 5074 case Intrinsic::fmuladd: { 5075 if ((InterestedClasses & fcNegative) == fcNone) 5076 break; 5077 5078 if (II->getArgOperand(0) != II->getArgOperand(1)) 5079 break; 5080 5081 // The multiply cannot be -0 and therefore the add can't be -0 5082 Known.knownNot(fcNegZero); 5083 5084 // x * x + y is non-negative if y is non-negative. 5085 KnownFPClass KnownAddend; 5086 computeKnownFPClass(II->getArgOperand(2), DemandedElts, InterestedClasses, 5087 KnownAddend, Depth + 1, Q); 5088 5089 if (KnownAddend.cannotBeOrderedLessThanZero()) 5090 Known.knownNot(fcNegative); 5091 break; 5092 } 5093 case Intrinsic::sqrt: 5094 case Intrinsic::experimental_constrained_sqrt: { 5095 KnownFPClass KnownSrc; 5096 FPClassTest InterestedSrcs = InterestedClasses; 5097 if (InterestedClasses & fcNan) 5098 InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask; 5099 5100 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedSrcs, 5101 KnownSrc, Depth + 1, Q); 5102 5103 if (KnownSrc.isKnownNeverPosInfinity()) 5104 Known.knownNot(fcPosInf); 5105 if (KnownSrc.isKnownNever(fcSNan)) 5106 Known.knownNot(fcSNan); 5107 5108 // Any negative value besides -0 returns a nan. 5109 if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero()) 5110 Known.knownNot(fcNan); 5111 5112 // The only negative value that can be returned is -0 for -0 inputs. 5113 Known.knownNot(fcNegInf | fcNegSubnormal | fcNegNormal); 5114 5115 // If the input denormal mode could be PreserveSign, a negative 5116 // subnormal input could produce a negative zero output. 5117 const Function *F = II->getFunction(); 5118 if (Q.IIQ.hasNoSignedZeros(II) || 5119 (F && KnownSrc.isKnownNeverLogicalNegZero(*F, II->getType()))) 5120 Known.knownNot(fcNegZero); 5121 5122 break; 5123 } 5124 case Intrinsic::sin: 5125 case Intrinsic::cos: { 5126 // Return NaN on infinite inputs. 5127 KnownFPClass KnownSrc; 5128 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, 5129 KnownSrc, Depth + 1, Q); 5130 Known.knownNot(fcInf); 5131 if (KnownSrc.isKnownNeverNaN() && KnownSrc.isKnownNeverInfinity()) 5132 Known.knownNot(fcNan); 5133 break; 5134 } 5135 case Intrinsic::maxnum: 5136 case Intrinsic::minnum: 5137 case Intrinsic::minimum: 5138 case Intrinsic::maximum: { 5139 KnownFPClass KnownLHS, KnownRHS; 5140 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, 5141 KnownLHS, Depth + 1, Q); 5142 computeKnownFPClass(II->getArgOperand(1), DemandedElts, InterestedClasses, 5143 KnownRHS, Depth + 1, Q); 5144 5145 bool NeverNaN = KnownLHS.isKnownNeverNaN() || KnownRHS.isKnownNeverNaN(); 5146 Known = KnownLHS | KnownRHS; 5147 5148 // If either operand is not NaN, the result is not NaN. 5149 if (NeverNaN && (IID == Intrinsic::minnum || IID == Intrinsic::maxnum)) 5150 Known.knownNot(fcNan); 5151 5152 if (IID == Intrinsic::maxnum) { 5153 // If at least one operand is known to be positive, the result must be 5154 // positive. 5155 if ((KnownLHS.cannotBeOrderedLessThanZero() && 5156 KnownLHS.isKnownNeverNaN()) || 5157 (KnownRHS.cannotBeOrderedLessThanZero() && 5158 KnownRHS.isKnownNeverNaN())) 5159 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); 5160 } else if (IID == Intrinsic::maximum) { 5161 // If at least one operand is known to be positive, the result must be 5162 // positive. 5163 if (KnownLHS.cannotBeOrderedLessThanZero() || 5164 KnownRHS.cannotBeOrderedLessThanZero()) 5165 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); 5166 } else if (IID == Intrinsic::minnum) { 5167 // If at least one operand is known to be negative, the result must be 5168 // negative. 5169 if ((KnownLHS.cannotBeOrderedGreaterThanZero() && 5170 KnownLHS.isKnownNeverNaN()) || 5171 (KnownRHS.cannotBeOrderedGreaterThanZero() && 5172 KnownRHS.isKnownNeverNaN())) 5173 Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask); 5174 } else { 5175 // If at least one operand is known to be negative, the result must be 5176 // negative. 5177 if (KnownLHS.cannotBeOrderedGreaterThanZero() || 5178 KnownRHS.cannotBeOrderedGreaterThanZero()) 5179 Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask); 5180 } 5181 5182 // Fixup zero handling if denormals could be returned as a zero. 5183 // 5184 // As there's no spec for denormal flushing, be conservative with the 5185 // treatment of denormals that could be flushed to zero. For older 5186 // subtargets on AMDGPU the min/max instructions would not flush the 5187 // output and return the original value. 5188 // 5189 if ((Known.KnownFPClasses & fcZero) != fcNone && 5190 !Known.isKnownNeverSubnormal()) { 5191 const Function *Parent = II->getFunction(); 5192 if (!Parent) 5193 break; 5194 5195 DenormalMode Mode = Parent->getDenormalMode( 5196 II->getType()->getScalarType()->getFltSemantics()); 5197 if (Mode != DenormalMode::getIEEE()) 5198 Known.KnownFPClasses |= fcZero; 5199 } 5200 5201 if (Known.isKnownNeverNaN()) { 5202 if (KnownLHS.SignBit && KnownRHS.SignBit && 5203 *KnownLHS.SignBit == *KnownRHS.SignBit) { 5204 if (*KnownLHS.SignBit) 5205 Known.signBitMustBeOne(); 5206 else 5207 Known.signBitMustBeZero(); 5208 } else if ((IID == Intrinsic::maximum || IID == Intrinsic::minimum) || 5209 ((KnownLHS.isKnownNeverNegZero() || 5210 KnownRHS.isKnownNeverPosZero()) && 5211 (KnownLHS.isKnownNeverPosZero() || 5212 KnownRHS.isKnownNeverNegZero()))) { 5213 if ((IID == Intrinsic::maximum || IID == Intrinsic::maxnum) && 5214 (KnownLHS.SignBit == false || KnownRHS.SignBit == false)) 5215 Known.signBitMustBeZero(); 5216 else if ((IID == Intrinsic::minimum || IID == Intrinsic::minnum) && 5217 (KnownLHS.SignBit == true || KnownRHS.SignBit == true)) 5218 Known.signBitMustBeOne(); 5219 } 5220 } 5221 break; 5222 } 5223 case Intrinsic::canonicalize: { 5224 KnownFPClass KnownSrc; 5225 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, 5226 KnownSrc, Depth + 1, Q); 5227 5228 // This is essentially a stronger form of 5229 // propagateCanonicalizingSrc. Other "canonicalizing" operations don't 5230 // actually have an IR canonicalization guarantee. 5231 5232 // Canonicalize may flush denormals to zero, so we have to consider the 5233 // denormal mode to preserve known-not-0 knowledge. 5234 Known.KnownFPClasses = KnownSrc.KnownFPClasses | fcZero | fcQNan; 5235 5236 // Stronger version of propagateNaN 5237 // Canonicalize is guaranteed to quiet signaling nans. 5238 if (KnownSrc.isKnownNeverNaN()) 5239 Known.knownNot(fcNan); 5240 else 5241 Known.knownNot(fcSNan); 5242 5243 const Function *F = II->getFunction(); 5244 if (!F) 5245 break; 5246 5247 // If the parent function flushes denormals, the canonical output cannot 5248 // be a denormal. 5249 const fltSemantics &FPType = 5250 II->getType()->getScalarType()->getFltSemantics(); 5251 DenormalMode DenormMode = F->getDenormalMode(FPType); 5252 if (DenormMode == DenormalMode::getIEEE()) { 5253 if (KnownSrc.isKnownNever(fcPosZero)) 5254 Known.knownNot(fcPosZero); 5255 if (KnownSrc.isKnownNever(fcNegZero)) 5256 Known.knownNot(fcNegZero); 5257 break; 5258 } 5259 5260 if (DenormMode.inputsAreZero() || DenormMode.outputsAreZero()) 5261 Known.knownNot(fcSubnormal); 5262 5263 if (DenormMode.Input == DenormalMode::PositiveZero || 5264 (DenormMode.Output == DenormalMode::PositiveZero && 5265 DenormMode.Input == DenormalMode::IEEE)) 5266 Known.knownNot(fcNegZero); 5267 5268 break; 5269 } 5270 case Intrinsic::vector_reduce_fmax: 5271 case Intrinsic::vector_reduce_fmin: 5272 case Intrinsic::vector_reduce_fmaximum: 5273 case Intrinsic::vector_reduce_fminimum: { 5274 // reduce min/max will choose an element from one of the vector elements, 5275 // so we can infer and class information that is common to all elements. 5276 Known = computeKnownFPClass(II->getArgOperand(0), II->getFastMathFlags(), 5277 InterestedClasses, Depth + 1, Q); 5278 // Can only propagate sign if output is never NaN. 5279 if (!Known.isKnownNeverNaN()) 5280 Known.SignBit.reset(); 5281 break; 5282 } 5283 // reverse preserves all characteristics of the input vec's element. 5284 case Intrinsic::vector_reverse: 5285 Known = computeKnownFPClass( 5286 II->getArgOperand(0), DemandedElts.reverseBits(), 5287 II->getFastMathFlags(), InterestedClasses, Depth + 1, Q); 5288 break; 5289 case Intrinsic::trunc: 5290 case Intrinsic::floor: 5291 case Intrinsic::ceil: 5292 case Intrinsic::rint: 5293 case Intrinsic::nearbyint: 5294 case Intrinsic::round: 5295 case Intrinsic::roundeven: { 5296 KnownFPClass KnownSrc; 5297 FPClassTest InterestedSrcs = InterestedClasses; 5298 if (InterestedSrcs & fcPosFinite) 5299 InterestedSrcs |= fcPosFinite; 5300 if (InterestedSrcs & fcNegFinite) 5301 InterestedSrcs |= fcNegFinite; 5302 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedSrcs, 5303 KnownSrc, Depth + 1, Q); 5304 5305 // Integer results cannot be subnormal. 5306 Known.knownNot(fcSubnormal); 5307 5308 Known.propagateNaN(KnownSrc, true); 5309 5310 // Pass through infinities, except PPC_FP128 is a special case for 5311 // intrinsics other than trunc. 5312 if (IID == Intrinsic::trunc || !V->getType()->isMultiUnitFPType()) { 5313 if (KnownSrc.isKnownNeverPosInfinity()) 5314 Known.knownNot(fcPosInf); 5315 if (KnownSrc.isKnownNeverNegInfinity()) 5316 Known.knownNot(fcNegInf); 5317 } 5318 5319 // Negative round ups to 0 produce -0 5320 if (KnownSrc.isKnownNever(fcPosFinite)) 5321 Known.knownNot(fcPosFinite); 5322 if (KnownSrc.isKnownNever(fcNegFinite)) 5323 Known.knownNot(fcNegFinite); 5324 5325 break; 5326 } 5327 case Intrinsic::exp: 5328 case Intrinsic::exp2: 5329 case Intrinsic::exp10: { 5330 Known.knownNot(fcNegative); 5331 if ((InterestedClasses & fcNan) == fcNone) 5332 break; 5333 5334 KnownFPClass KnownSrc; 5335 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, 5336 KnownSrc, Depth + 1, Q); 5337 if (KnownSrc.isKnownNeverNaN()) { 5338 Known.knownNot(fcNan); 5339 Known.signBitMustBeZero(); 5340 } 5341 5342 break; 5343 } 5344 case Intrinsic::fptrunc_round: { 5345 computeKnownFPClassForFPTrunc(Op, DemandedElts, InterestedClasses, Known, 5346 Depth, Q); 5347 break; 5348 } 5349 case Intrinsic::log: 5350 case Intrinsic::log10: 5351 case Intrinsic::log2: 5352 case Intrinsic::experimental_constrained_log: 5353 case Intrinsic::experimental_constrained_log10: 5354 case Intrinsic::experimental_constrained_log2: { 5355 // log(+inf) -> +inf 5356 // log([+-]0.0) -> -inf 5357 // log(-inf) -> nan 5358 // log(-x) -> nan 5359 if ((InterestedClasses & (fcNan | fcInf)) == fcNone) 5360 break; 5361 5362 FPClassTest InterestedSrcs = InterestedClasses; 5363 if ((InterestedClasses & fcNegInf) != fcNone) 5364 InterestedSrcs |= fcZero | fcSubnormal; 5365 if ((InterestedClasses & fcNan) != fcNone) 5366 InterestedSrcs |= fcNan | (fcNegative & ~fcNan); 5367 5368 KnownFPClass KnownSrc; 5369 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedSrcs, 5370 KnownSrc, Depth + 1, Q); 5371 5372 if (KnownSrc.isKnownNeverPosInfinity()) 5373 Known.knownNot(fcPosInf); 5374 5375 if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero()) 5376 Known.knownNot(fcNan); 5377 5378 const Function *F = II->getFunction(); 5379 if (F && KnownSrc.isKnownNeverLogicalZero(*F, II->getType())) 5380 Known.knownNot(fcNegInf); 5381 5382 break; 5383 } 5384 case Intrinsic::powi: { 5385 if ((InterestedClasses & fcNegative) == fcNone) 5386 break; 5387 5388 const Value *Exp = II->getArgOperand(1); 5389 Type *ExpTy = Exp->getType(); 5390 unsigned BitWidth = ExpTy->getScalarType()->getIntegerBitWidth(); 5391 KnownBits ExponentKnownBits(BitWidth); 5392 computeKnownBits(Exp, isa<VectorType>(ExpTy) ? DemandedElts : APInt(1, 1), 5393 ExponentKnownBits, Depth + 1, Q); 5394 5395 if (ExponentKnownBits.Zero[0]) { // Is even 5396 Known.knownNot(fcNegative); 5397 break; 5398 } 5399 5400 // Given that exp is an integer, here are the 5401 // ways that pow can return a negative value: 5402 // 5403 // pow(-x, exp) --> negative if exp is odd and x is negative. 5404 // pow(-0, exp) --> -inf if exp is negative odd. 5405 // pow(-0, exp) --> -0 if exp is positive odd. 5406 // pow(-inf, exp) --> -0 if exp is negative odd. 5407 // pow(-inf, exp) --> -inf if exp is positive odd. 5408 KnownFPClass KnownSrc; 5409 computeKnownFPClass(II->getArgOperand(0), DemandedElts, fcNegative, 5410 KnownSrc, Depth + 1, Q); 5411 if (KnownSrc.isKnownNever(fcNegative)) 5412 Known.knownNot(fcNegative); 5413 break; 5414 } 5415 case Intrinsic::ldexp: { 5416 KnownFPClass KnownSrc; 5417 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, 5418 KnownSrc, Depth + 1, Q); 5419 Known.propagateNaN(KnownSrc, /*PropagateSign=*/true); 5420 5421 // Sign is preserved, but underflows may produce zeroes. 5422 if (KnownSrc.isKnownNever(fcNegative)) 5423 Known.knownNot(fcNegative); 5424 else if (KnownSrc.cannotBeOrderedLessThanZero()) 5425 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); 5426 5427 if (KnownSrc.isKnownNever(fcPositive)) 5428 Known.knownNot(fcPositive); 5429 else if (KnownSrc.cannotBeOrderedGreaterThanZero()) 5430 Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask); 5431 5432 // Can refine inf/zero handling based on the exponent operand. 5433 const FPClassTest ExpInfoMask = fcZero | fcSubnormal | fcInf; 5434 if ((InterestedClasses & ExpInfoMask) == fcNone) 5435 break; 5436 if ((KnownSrc.KnownFPClasses & ExpInfoMask) == fcNone) 5437 break; 5438 5439 const fltSemantics &Flt = 5440 II->getType()->getScalarType()->getFltSemantics(); 5441 unsigned Precision = APFloat::semanticsPrecision(Flt); 5442 const Value *ExpArg = II->getArgOperand(1); 5443 ConstantRange ExpRange = computeConstantRange( 5444 ExpArg, true, Q.IIQ.UseInstrInfo, Q.AC, Q.CxtI, Q.DT, Depth + 1); 5445 5446 const int MantissaBits = Precision - 1; 5447 if (ExpRange.getSignedMin().sge(static_cast<int64_t>(MantissaBits))) 5448 Known.knownNot(fcSubnormal); 5449 5450 const Function *F = II->getFunction(); 5451 const APInt *ConstVal = ExpRange.getSingleElement(); 5452 if (ConstVal && ConstVal->isZero()) { 5453 // ldexp(x, 0) -> x, so propagate everything. 5454 Known.propagateCanonicalizingSrc(KnownSrc, *F, II->getType()); 5455 } else if (ExpRange.isAllNegative()) { 5456 // If we know the power is <= 0, can't introduce inf 5457 if (KnownSrc.isKnownNeverPosInfinity()) 5458 Known.knownNot(fcPosInf); 5459 if (KnownSrc.isKnownNeverNegInfinity()) 5460 Known.knownNot(fcNegInf); 5461 } else if (ExpRange.isAllNonNegative()) { 5462 // If we know the power is >= 0, can't introduce subnormal or zero 5463 if (KnownSrc.isKnownNeverPosSubnormal()) 5464 Known.knownNot(fcPosSubnormal); 5465 if (KnownSrc.isKnownNeverNegSubnormal()) 5466 Known.knownNot(fcNegSubnormal); 5467 if (F && KnownSrc.isKnownNeverLogicalPosZero(*F, II->getType())) 5468 Known.knownNot(fcPosZero); 5469 if (F && KnownSrc.isKnownNeverLogicalNegZero(*F, II->getType())) 5470 Known.knownNot(fcNegZero); 5471 } 5472 5473 break; 5474 } 5475 case Intrinsic::arithmetic_fence: { 5476 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, 5477 Known, Depth + 1, Q); 5478 break; 5479 } 5480 case Intrinsic::experimental_constrained_sitofp: 5481 case Intrinsic::experimental_constrained_uitofp: 5482 // Cannot produce nan 5483 Known.knownNot(fcNan); 5484 5485 // sitofp and uitofp turn into +0.0 for zero. 5486 Known.knownNot(fcNegZero); 5487 5488 // Integers cannot be subnormal 5489 Known.knownNot(fcSubnormal); 5490 5491 if (IID == Intrinsic::experimental_constrained_uitofp) 5492 Known.signBitMustBeZero(); 5493 5494 // TODO: Copy inf handling from instructions 5495 break; 5496 default: 5497 break; 5498 } 5499 5500 break; 5501 } 5502 case Instruction::FAdd: 5503 case Instruction::FSub: { 5504 KnownFPClass KnownLHS, KnownRHS; 5505 bool WantNegative = 5506 Op->getOpcode() == Instruction::FAdd && 5507 (InterestedClasses & KnownFPClass::OrderedLessThanZeroMask) != fcNone; 5508 bool WantNaN = (InterestedClasses & fcNan) != fcNone; 5509 bool WantNegZero = (InterestedClasses & fcNegZero) != fcNone; 5510 5511 if (!WantNaN && !WantNegative && !WantNegZero) 5512 break; 5513 5514 FPClassTest InterestedSrcs = InterestedClasses; 5515 if (WantNegative) 5516 InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask; 5517 if (InterestedClasses & fcNan) 5518 InterestedSrcs |= fcInf; 5519 computeKnownFPClass(Op->getOperand(1), DemandedElts, InterestedSrcs, 5520 KnownRHS, Depth + 1, Q); 5521 5522 if ((WantNaN && KnownRHS.isKnownNeverNaN()) || 5523 (WantNegative && KnownRHS.cannotBeOrderedLessThanZero()) || 5524 WantNegZero || Opc == Instruction::FSub) { 5525 5526 // RHS is canonically cheaper to compute. Skip inspecting the LHS if 5527 // there's no point. 5528 computeKnownFPClass(Op->getOperand(0), DemandedElts, InterestedSrcs, 5529 KnownLHS, Depth + 1, Q); 5530 // Adding positive and negative infinity produces NaN. 5531 // TODO: Check sign of infinities. 5532 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() && 5533 (KnownLHS.isKnownNeverInfinity() || KnownRHS.isKnownNeverInfinity())) 5534 Known.knownNot(fcNan); 5535 5536 // FIXME: Context function should always be passed in separately 5537 const Function *F = cast<Instruction>(Op)->getFunction(); 5538 5539 if (Op->getOpcode() == Instruction::FAdd) { 5540 if (KnownLHS.cannotBeOrderedLessThanZero() && 5541 KnownRHS.cannotBeOrderedLessThanZero()) 5542 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); 5543 if (!F) 5544 break; 5545 5546 // (fadd x, 0.0) is guaranteed to return +0.0, not -0.0. 5547 if ((KnownLHS.isKnownNeverLogicalNegZero(*F, Op->getType()) || 5548 KnownRHS.isKnownNeverLogicalNegZero(*F, Op->getType())) && 5549 // Make sure output negative denormal can't flush to -0 5550 outputDenormalIsIEEEOrPosZero(*F, Op->getType())) 5551 Known.knownNot(fcNegZero); 5552 } else { 5553 if (!F) 5554 break; 5555 5556 // Only fsub -0, +0 can return -0 5557 if ((KnownLHS.isKnownNeverLogicalNegZero(*F, Op->getType()) || 5558 KnownRHS.isKnownNeverLogicalPosZero(*F, Op->getType())) && 5559 // Make sure output negative denormal can't flush to -0 5560 outputDenormalIsIEEEOrPosZero(*F, Op->getType())) 5561 Known.knownNot(fcNegZero); 5562 } 5563 } 5564 5565 break; 5566 } 5567 case Instruction::FMul: { 5568 // X * X is always non-negative or a NaN. 5569 if (Op->getOperand(0) == Op->getOperand(1)) 5570 Known.knownNot(fcNegative); 5571 5572 if ((InterestedClasses & fcNan) != fcNan) 5573 break; 5574 5575 // fcSubnormal is only needed in case of DAZ. 5576 const FPClassTest NeedForNan = fcNan | fcInf | fcZero | fcSubnormal; 5577 5578 KnownFPClass KnownLHS, KnownRHS; 5579 computeKnownFPClass(Op->getOperand(1), DemandedElts, NeedForNan, KnownRHS, 5580 Depth + 1, Q); 5581 if (!KnownRHS.isKnownNeverNaN()) 5582 break; 5583 5584 computeKnownFPClass(Op->getOperand(0), DemandedElts, NeedForNan, KnownLHS, 5585 Depth + 1, Q); 5586 if (!KnownLHS.isKnownNeverNaN()) 5587 break; 5588 5589 if (KnownLHS.SignBit && KnownRHS.SignBit) { 5590 if (*KnownLHS.SignBit == *KnownRHS.SignBit) 5591 Known.signBitMustBeZero(); 5592 else 5593 Known.signBitMustBeOne(); 5594 } 5595 5596 // If 0 * +/-inf produces NaN. 5597 if (KnownLHS.isKnownNeverInfinity() && KnownRHS.isKnownNeverInfinity()) { 5598 Known.knownNot(fcNan); 5599 break; 5600 } 5601 5602 const Function *F = cast<Instruction>(Op)->getFunction(); 5603 if (!F) 5604 break; 5605 5606 if ((KnownRHS.isKnownNeverInfinity() || 5607 KnownLHS.isKnownNeverLogicalZero(*F, Op->getType())) && 5608 (KnownLHS.isKnownNeverInfinity() || 5609 KnownRHS.isKnownNeverLogicalZero(*F, Op->getType()))) 5610 Known.knownNot(fcNan); 5611 5612 break; 5613 } 5614 case Instruction::FDiv: 5615 case Instruction::FRem: { 5616 if (Op->getOperand(0) == Op->getOperand(1)) { 5617 // TODO: Could filter out snan if we inspect the operand 5618 if (Op->getOpcode() == Instruction::FDiv) { 5619 // X / X is always exactly 1.0 or a NaN. 5620 Known.KnownFPClasses = fcNan | fcPosNormal; 5621 } else { 5622 // X % X is always exactly [+-]0.0 or a NaN. 5623 Known.KnownFPClasses = fcNan | fcZero; 5624 } 5625 5626 break; 5627 } 5628 5629 const bool WantNan = (InterestedClasses & fcNan) != fcNone; 5630 const bool WantNegative = (InterestedClasses & fcNegative) != fcNone; 5631 const bool WantPositive = 5632 Opc == Instruction::FRem && (InterestedClasses & fcPositive) != fcNone; 5633 if (!WantNan && !WantNegative && !WantPositive) 5634 break; 5635 5636 KnownFPClass KnownLHS, KnownRHS; 5637 5638 computeKnownFPClass(Op->getOperand(1), DemandedElts, 5639 fcNan | fcInf | fcZero | fcNegative, KnownRHS, 5640 Depth + 1, Q); 5641 5642 bool KnowSomethingUseful = 5643 KnownRHS.isKnownNeverNaN() || KnownRHS.isKnownNever(fcNegative); 5644 5645 if (KnowSomethingUseful || WantPositive) { 5646 const FPClassTest InterestedLHS = 5647 WantPositive ? fcAllFlags 5648 : fcNan | fcInf | fcZero | fcSubnormal | fcNegative; 5649 5650 computeKnownFPClass(Op->getOperand(0), DemandedElts, 5651 InterestedClasses & InterestedLHS, KnownLHS, 5652 Depth + 1, Q); 5653 } 5654 5655 const Function *F = cast<Instruction>(Op)->getFunction(); 5656 5657 if (Op->getOpcode() == Instruction::FDiv) { 5658 // Only 0/0, Inf/Inf produce NaN. 5659 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() && 5660 (KnownLHS.isKnownNeverInfinity() || 5661 KnownRHS.isKnownNeverInfinity()) && 5662 ((F && KnownLHS.isKnownNeverLogicalZero(*F, Op->getType())) || 5663 (F && KnownRHS.isKnownNeverLogicalZero(*F, Op->getType())))) { 5664 Known.knownNot(fcNan); 5665 } 5666 5667 // X / -0.0 is -Inf (or NaN). 5668 // +X / +X is +X 5669 if (KnownLHS.isKnownNever(fcNegative) && KnownRHS.isKnownNever(fcNegative)) 5670 Known.knownNot(fcNegative); 5671 } else { 5672 // Inf REM x and x REM 0 produce NaN. 5673 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() && 5674 KnownLHS.isKnownNeverInfinity() && F && 5675 KnownRHS.isKnownNeverLogicalZero(*F, Op->getType())) { 5676 Known.knownNot(fcNan); 5677 } 5678 5679 // The sign for frem is the same as the first operand. 5680 if (KnownLHS.cannotBeOrderedLessThanZero()) 5681 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); 5682 if (KnownLHS.cannotBeOrderedGreaterThanZero()) 5683 Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask); 5684 5685 // See if we can be more aggressive about the sign of 0. 5686 if (KnownLHS.isKnownNever(fcNegative)) 5687 Known.knownNot(fcNegative); 5688 if (KnownLHS.isKnownNever(fcPositive)) 5689 Known.knownNot(fcPositive); 5690 } 5691 5692 break; 5693 } 5694 case Instruction::FPExt: { 5695 // Infinity, nan and zero propagate from source. 5696 computeKnownFPClass(Op->getOperand(0), DemandedElts, InterestedClasses, 5697 Known, Depth + 1, Q); 5698 5699 const fltSemantics &DstTy = 5700 Op->getType()->getScalarType()->getFltSemantics(); 5701 const fltSemantics &SrcTy = 5702 Op->getOperand(0)->getType()->getScalarType()->getFltSemantics(); 5703 5704 // All subnormal inputs should be in the normal range in the result type. 5705 if (APFloat::isRepresentableAsNormalIn(SrcTy, DstTy)) { 5706 if (Known.KnownFPClasses & fcPosSubnormal) 5707 Known.KnownFPClasses |= fcPosNormal; 5708 if (Known.KnownFPClasses & fcNegSubnormal) 5709 Known.KnownFPClasses |= fcNegNormal; 5710 Known.knownNot(fcSubnormal); 5711 } 5712 5713 // Sign bit of a nan isn't guaranteed. 5714 if (!Known.isKnownNeverNaN()) 5715 Known.SignBit = std::nullopt; 5716 break; 5717 } 5718 case Instruction::FPTrunc: { 5719 computeKnownFPClassForFPTrunc(Op, DemandedElts, InterestedClasses, Known, 5720 Depth, Q); 5721 break; 5722 } 5723 case Instruction::SIToFP: 5724 case Instruction::UIToFP: { 5725 // Cannot produce nan 5726 Known.knownNot(fcNan); 5727 5728 // Integers cannot be subnormal 5729 Known.knownNot(fcSubnormal); 5730 5731 // sitofp and uitofp turn into +0.0 for zero. 5732 Known.knownNot(fcNegZero); 5733 if (Op->getOpcode() == Instruction::UIToFP) 5734 Known.signBitMustBeZero(); 5735 5736 if (InterestedClasses & fcInf) { 5737 // Get width of largest magnitude integer (remove a bit if signed). 5738 // This still works for a signed minimum value because the largest FP 5739 // value is scaled by some fraction close to 2.0 (1.0 + 0.xxxx). 5740 int IntSize = Op->getOperand(0)->getType()->getScalarSizeInBits(); 5741 if (Op->getOpcode() == Instruction::SIToFP) 5742 --IntSize; 5743 5744 // If the exponent of the largest finite FP value can hold the largest 5745 // integer, the result of the cast must be finite. 5746 Type *FPTy = Op->getType()->getScalarType(); 5747 if (ilogb(APFloat::getLargest(FPTy->getFltSemantics())) >= IntSize) 5748 Known.knownNot(fcInf); 5749 } 5750 5751 break; 5752 } 5753 case Instruction::ExtractElement: { 5754 // Look through extract element. If the index is non-constant or 5755 // out-of-range demand all elements, otherwise just the extracted element. 5756 const Value *Vec = Op->getOperand(0); 5757 const Value *Idx = Op->getOperand(1); 5758 auto *CIdx = dyn_cast<ConstantInt>(Idx); 5759 5760 if (auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType())) { 5761 unsigned NumElts = VecTy->getNumElements(); 5762 APInt DemandedVecElts = APInt::getAllOnes(NumElts); 5763 if (CIdx && CIdx->getValue().ult(NumElts)) 5764 DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue()); 5765 return computeKnownFPClass(Vec, DemandedVecElts, InterestedClasses, Known, 5766 Depth + 1, Q); 5767 } 5768 5769 break; 5770 } 5771 case Instruction::InsertElement: { 5772 if (isa<ScalableVectorType>(Op->getType())) 5773 return; 5774 5775 const Value *Vec = Op->getOperand(0); 5776 const Value *Elt = Op->getOperand(1); 5777 auto *CIdx = dyn_cast<ConstantInt>(Op->getOperand(2)); 5778 unsigned NumElts = DemandedElts.getBitWidth(); 5779 APInt DemandedVecElts = DemandedElts; 5780 bool NeedsElt = true; 5781 // If we know the index we are inserting to, clear it from Vec check. 5782 if (CIdx && CIdx->getValue().ult(NumElts)) { 5783 DemandedVecElts.clearBit(CIdx->getZExtValue()); 5784 NeedsElt = DemandedElts[CIdx->getZExtValue()]; 5785 } 5786 5787 // Do we demand the inserted element? 5788 if (NeedsElt) { 5789 computeKnownFPClass(Elt, Known, InterestedClasses, Depth + 1, Q); 5790 // If we don't know any bits, early out. 5791 if (Known.isUnknown()) 5792 break; 5793 } else { 5794 Known.KnownFPClasses = fcNone; 5795 } 5796 5797 // Do we need anymore elements from Vec? 5798 if (!DemandedVecElts.isZero()) { 5799 KnownFPClass Known2; 5800 computeKnownFPClass(Vec, DemandedVecElts, InterestedClasses, Known2, 5801 Depth + 1, Q); 5802 Known |= Known2; 5803 } 5804 5805 break; 5806 } 5807 case Instruction::ShuffleVector: { 5808 // For undef elements, we don't know anything about the common state of 5809 // the shuffle result. 5810 APInt DemandedLHS, DemandedRHS; 5811 auto *Shuf = dyn_cast<ShuffleVectorInst>(Op); 5812 if (!Shuf || !getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS)) 5813 return; 5814 5815 if (!!DemandedLHS) { 5816 const Value *LHS = Shuf->getOperand(0); 5817 computeKnownFPClass(LHS, DemandedLHS, InterestedClasses, Known, 5818 Depth + 1, Q); 5819 5820 // If we don't know any bits, early out. 5821 if (Known.isUnknown()) 5822 break; 5823 } else { 5824 Known.KnownFPClasses = fcNone; 5825 } 5826 5827 if (!!DemandedRHS) { 5828 KnownFPClass Known2; 5829 const Value *RHS = Shuf->getOperand(1); 5830 computeKnownFPClass(RHS, DemandedRHS, InterestedClasses, Known2, 5831 Depth + 1, Q); 5832 Known |= Known2; 5833 } 5834 5835 break; 5836 } 5837 case Instruction::ExtractValue: { 5838 const ExtractValueInst *Extract = cast<ExtractValueInst>(Op); 5839 ArrayRef<unsigned> Indices = Extract->getIndices(); 5840 const Value *Src = Extract->getAggregateOperand(); 5841 if (isa<StructType>(Src->getType()) && Indices.size() == 1 && 5842 Indices[0] == 0) { 5843 if (const auto *II = dyn_cast<IntrinsicInst>(Src)) { 5844 switch (II->getIntrinsicID()) { 5845 case Intrinsic::frexp: { 5846 Known.knownNot(fcSubnormal); 5847 5848 KnownFPClass KnownSrc; 5849 computeKnownFPClass(II->getArgOperand(0), DemandedElts, 5850 InterestedClasses, KnownSrc, Depth + 1, Q); 5851 5852 const Function *F = cast<Instruction>(Op)->getFunction(); 5853 5854 if (KnownSrc.isKnownNever(fcNegative)) 5855 Known.knownNot(fcNegative); 5856 else { 5857 if (F && KnownSrc.isKnownNeverLogicalNegZero(*F, Op->getType())) 5858 Known.knownNot(fcNegZero); 5859 if (KnownSrc.isKnownNever(fcNegInf)) 5860 Known.knownNot(fcNegInf); 5861 } 5862 5863 if (KnownSrc.isKnownNever(fcPositive)) 5864 Known.knownNot(fcPositive); 5865 else { 5866 if (F && KnownSrc.isKnownNeverLogicalPosZero(*F, Op->getType())) 5867 Known.knownNot(fcPosZero); 5868 if (KnownSrc.isKnownNever(fcPosInf)) 5869 Known.knownNot(fcPosInf); 5870 } 5871 5872 Known.propagateNaN(KnownSrc); 5873 return; 5874 } 5875 default: 5876 break; 5877 } 5878 } 5879 } 5880 5881 computeKnownFPClass(Src, DemandedElts, InterestedClasses, Known, Depth + 1, 5882 Q); 5883 break; 5884 } 5885 case Instruction::PHI: { 5886 const PHINode *P = cast<PHINode>(Op); 5887 // Unreachable blocks may have zero-operand PHI nodes. 5888 if (P->getNumIncomingValues() == 0) 5889 break; 5890 5891 // Otherwise take the unions of the known bit sets of the operands, 5892 // taking conservative care to avoid excessive recursion. 5893 const unsigned PhiRecursionLimit = MaxAnalysisRecursionDepth - 2; 5894 5895 if (Depth < PhiRecursionLimit) { 5896 // Skip if every incoming value references to ourself. 5897 if (isa_and_nonnull<UndefValue>(P->hasConstantValue())) 5898 break; 5899 5900 bool First = true; 5901 5902 for (const Use &U : P->operands()) { 5903 Value *IncValue = U.get(); 5904 // Skip direct self references. 5905 if (IncValue == P) 5906 continue; 5907 5908 KnownFPClass KnownSrc; 5909 // Recurse, but cap the recursion to two levels, because we don't want 5910 // to waste time spinning around in loops. We need at least depth 2 to 5911 // detect known sign bits. 5912 computeKnownFPClass(IncValue, DemandedElts, InterestedClasses, KnownSrc, 5913 PhiRecursionLimit, 5914 Q.getWithoutCondContext().getWithInstruction( 5915 P->getIncomingBlock(U)->getTerminator())); 5916 5917 if (First) { 5918 Known = KnownSrc; 5919 First = false; 5920 } else { 5921 Known |= KnownSrc; 5922 } 5923 5924 if (Known.KnownFPClasses == fcAllFlags) 5925 break; 5926 } 5927 } 5928 5929 break; 5930 } 5931 default: 5932 break; 5933 } 5934 } 5935 5936 KnownFPClass llvm::computeKnownFPClass(const Value *V, 5937 const APInt &DemandedElts, 5938 FPClassTest InterestedClasses, 5939 unsigned Depth, 5940 const SimplifyQuery &SQ) { 5941 KnownFPClass KnownClasses; 5942 ::computeKnownFPClass(V, DemandedElts, InterestedClasses, KnownClasses, Depth, 5943 SQ); 5944 return KnownClasses; 5945 } 5946 5947 KnownFPClass llvm::computeKnownFPClass(const Value *V, 5948 FPClassTest InterestedClasses, 5949 unsigned Depth, 5950 const SimplifyQuery &SQ) { 5951 KnownFPClass Known; 5952 ::computeKnownFPClass(V, Known, InterestedClasses, Depth, SQ); 5953 return Known; 5954 } 5955 5956 Value *llvm::isBytewiseValue(Value *V, const DataLayout &DL) { 5957 5958 // All byte-wide stores are splatable, even of arbitrary variables. 5959 if (V->getType()->isIntegerTy(8)) 5960 return V; 5961 5962 LLVMContext &Ctx = V->getContext(); 5963 5964 // Undef don't care. 5965 auto *UndefInt8 = UndefValue::get(Type::getInt8Ty(Ctx)); 5966 if (isa<UndefValue>(V)) 5967 return UndefInt8; 5968 5969 // Return Undef for zero-sized type. 5970 if (DL.getTypeStoreSize(V->getType()).isZero()) 5971 return UndefInt8; 5972 5973 Constant *C = dyn_cast<Constant>(V); 5974 if (!C) { 5975 // Conceptually, we could handle things like: 5976 // %a = zext i8 %X to i16 5977 // %b = shl i16 %a, 8 5978 // %c = or i16 %a, %b 5979 // but until there is an example that actually needs this, it doesn't seem 5980 // worth worrying about. 5981 return nullptr; 5982 } 5983 5984 // Handle 'null' ConstantArrayZero etc. 5985 if (C->isNullValue()) 5986 return Constant::getNullValue(Type::getInt8Ty(Ctx)); 5987 5988 // Constant floating-point values can be handled as integer values if the 5989 // corresponding integer value is "byteable". An important case is 0.0. 5990 if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) { 5991 Type *Ty = nullptr; 5992 if (CFP->getType()->isHalfTy()) 5993 Ty = Type::getInt16Ty(Ctx); 5994 else if (CFP->getType()->isFloatTy()) 5995 Ty = Type::getInt32Ty(Ctx); 5996 else if (CFP->getType()->isDoubleTy()) 5997 Ty = Type::getInt64Ty(Ctx); 5998 // Don't handle long double formats, which have strange constraints. 5999 return Ty ? isBytewiseValue(ConstantExpr::getBitCast(CFP, Ty), DL) 6000 : nullptr; 6001 } 6002 6003 // We can handle constant integers that are multiple of 8 bits. 6004 if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) { 6005 if (CI->getBitWidth() % 8 == 0) { 6006 assert(CI->getBitWidth() > 8 && "8 bits should be handled above!"); 6007 if (!CI->getValue().isSplat(8)) 6008 return nullptr; 6009 return ConstantInt::get(Ctx, CI->getValue().trunc(8)); 6010 } 6011 } 6012 6013 if (auto *CE = dyn_cast<ConstantExpr>(C)) { 6014 if (CE->getOpcode() == Instruction::IntToPtr) { 6015 if (auto *PtrTy = dyn_cast<PointerType>(CE->getType())) { 6016 unsigned BitWidth = DL.getPointerSizeInBits(PtrTy->getAddressSpace()); 6017 if (Constant *Op = ConstantFoldIntegerCast( 6018 CE->getOperand(0), Type::getIntNTy(Ctx, BitWidth), false, DL)) 6019 return isBytewiseValue(Op, DL); 6020 } 6021 } 6022 } 6023 6024 auto Merge = [&](Value *LHS, Value *RHS) -> Value * { 6025 if (LHS == RHS) 6026 return LHS; 6027 if (!LHS || !RHS) 6028 return nullptr; 6029 if (LHS == UndefInt8) 6030 return RHS; 6031 if (RHS == UndefInt8) 6032 return LHS; 6033 return nullptr; 6034 }; 6035 6036 if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(C)) { 6037 Value *Val = UndefInt8; 6038 for (unsigned I = 0, E = CA->getNumElements(); I != E; ++I) 6039 if (!(Val = Merge(Val, isBytewiseValue(CA->getElementAsConstant(I), DL)))) 6040 return nullptr; 6041 return Val; 6042 } 6043 6044 if (isa<ConstantAggregate>(C)) { 6045 Value *Val = UndefInt8; 6046 for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I) 6047 if (!(Val = Merge(Val, isBytewiseValue(C->getOperand(I), DL)))) 6048 return nullptr; 6049 return Val; 6050 } 6051 6052 // Don't try to handle the handful of other constants. 6053 return nullptr; 6054 } 6055 6056 // This is the recursive version of BuildSubAggregate. It takes a few different 6057 // arguments. Idxs is the index within the nested struct From that we are 6058 // looking at now (which is of type IndexedType). IdxSkip is the number of 6059 // indices from Idxs that should be left out when inserting into the resulting 6060 // struct. To is the result struct built so far, new insertvalue instructions 6061 // build on that. 6062 static Value *BuildSubAggregate(Value *From, Value *To, Type *IndexedType, 6063 SmallVectorImpl<unsigned> &Idxs, 6064 unsigned IdxSkip, 6065 BasicBlock::iterator InsertBefore) { 6066 StructType *STy = dyn_cast<StructType>(IndexedType); 6067 if (STy) { 6068 // Save the original To argument so we can modify it 6069 Value *OrigTo = To; 6070 // General case, the type indexed by Idxs is a struct 6071 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 6072 // Process each struct element recursively 6073 Idxs.push_back(i); 6074 Value *PrevTo = To; 6075 To = BuildSubAggregate(From, To, STy->getElementType(i), Idxs, IdxSkip, 6076 InsertBefore); 6077 Idxs.pop_back(); 6078 if (!To) { 6079 // Couldn't find any inserted value for this index? Cleanup 6080 while (PrevTo != OrigTo) { 6081 InsertValueInst* Del = cast<InsertValueInst>(PrevTo); 6082 PrevTo = Del->getAggregateOperand(); 6083 Del->eraseFromParent(); 6084 } 6085 // Stop processing elements 6086 break; 6087 } 6088 } 6089 // If we successfully found a value for each of our subaggregates 6090 if (To) 6091 return To; 6092 } 6093 // Base case, the type indexed by SourceIdxs is not a struct, or not all of 6094 // the struct's elements had a value that was inserted directly. In the latter 6095 // case, perhaps we can't determine each of the subelements individually, but 6096 // we might be able to find the complete struct somewhere. 6097 6098 // Find the value that is at that particular spot 6099 Value *V = FindInsertedValue(From, Idxs); 6100 6101 if (!V) 6102 return nullptr; 6103 6104 // Insert the value in the new (sub) aggregate 6105 return InsertValueInst::Create(To, V, ArrayRef(Idxs).slice(IdxSkip), "tmp", 6106 InsertBefore); 6107 } 6108 6109 // This helper takes a nested struct and extracts a part of it (which is again a 6110 // struct) into a new value. For example, given the struct: 6111 // { a, { b, { c, d }, e } } 6112 // and the indices "1, 1" this returns 6113 // { c, d }. 6114 // 6115 // It does this by inserting an insertvalue for each element in the resulting 6116 // struct, as opposed to just inserting a single struct. This will only work if 6117 // each of the elements of the substruct are known (ie, inserted into From by an 6118 // insertvalue instruction somewhere). 6119 // 6120 // All inserted insertvalue instructions are inserted before InsertBefore 6121 static Value *BuildSubAggregate(Value *From, ArrayRef<unsigned> idx_range, 6122 BasicBlock::iterator InsertBefore) { 6123 Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(), 6124 idx_range); 6125 Value *To = PoisonValue::get(IndexedType); 6126 SmallVector<unsigned, 10> Idxs(idx_range.begin(), idx_range.end()); 6127 unsigned IdxSkip = Idxs.size(); 6128 6129 return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore); 6130 } 6131 6132 /// Given an aggregate and a sequence of indices, see if the scalar value 6133 /// indexed is already around as a register, for example if it was inserted 6134 /// directly into the aggregate. 6135 /// 6136 /// If InsertBefore is not null, this function will duplicate (modified) 6137 /// insertvalues when a part of a nested struct is extracted. 6138 Value * 6139 llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range, 6140 std::optional<BasicBlock::iterator> InsertBefore) { 6141 // Nothing to index? Just return V then (this is useful at the end of our 6142 // recursion). 6143 if (idx_range.empty()) 6144 return V; 6145 // We have indices, so V should have an indexable type. 6146 assert((V->getType()->isStructTy() || V->getType()->isArrayTy()) && 6147 "Not looking at a struct or array?"); 6148 assert(ExtractValueInst::getIndexedType(V->getType(), idx_range) && 6149 "Invalid indices for type?"); 6150 6151 if (Constant *C = dyn_cast<Constant>(V)) { 6152 C = C->getAggregateElement(idx_range[0]); 6153 if (!C) return nullptr; 6154 return FindInsertedValue(C, idx_range.slice(1), InsertBefore); 6155 } 6156 6157 if (InsertValueInst *I = dyn_cast<InsertValueInst>(V)) { 6158 // Loop the indices for the insertvalue instruction in parallel with the 6159 // requested indices 6160 const unsigned *req_idx = idx_range.begin(); 6161 for (const unsigned *i = I->idx_begin(), *e = I->idx_end(); 6162 i != e; ++i, ++req_idx) { 6163 if (req_idx == idx_range.end()) { 6164 // We can't handle this without inserting insertvalues 6165 if (!InsertBefore) 6166 return nullptr; 6167 6168 // The requested index identifies a part of a nested aggregate. Handle 6169 // this specially. For example, 6170 // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0 6171 // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1 6172 // %C = extractvalue {i32, { i32, i32 } } %B, 1 6173 // This can be changed into 6174 // %A = insertvalue {i32, i32 } undef, i32 10, 0 6175 // %C = insertvalue {i32, i32 } %A, i32 11, 1 6176 // which allows the unused 0,0 element from the nested struct to be 6177 // removed. 6178 return BuildSubAggregate(V, ArrayRef(idx_range.begin(), req_idx), 6179 *InsertBefore); 6180 } 6181 6182 // This insert value inserts something else than what we are looking for. 6183 // See if the (aggregate) value inserted into has the value we are 6184 // looking for, then. 6185 if (*req_idx != *i) 6186 return FindInsertedValue(I->getAggregateOperand(), idx_range, 6187 InsertBefore); 6188 } 6189 // If we end up here, the indices of the insertvalue match with those 6190 // requested (though possibly only partially). Now we recursively look at 6191 // the inserted value, passing any remaining indices. 6192 return FindInsertedValue(I->getInsertedValueOperand(), 6193 ArrayRef(req_idx, idx_range.end()), InsertBefore); 6194 } 6195 6196 if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) { 6197 // If we're extracting a value from an aggregate that was extracted from 6198 // something else, we can extract from that something else directly instead. 6199 // However, we will need to chain I's indices with the requested indices. 6200 6201 // Calculate the number of indices required 6202 unsigned size = I->getNumIndices() + idx_range.size(); 6203 // Allocate some space to put the new indices in 6204 SmallVector<unsigned, 5> Idxs; 6205 Idxs.reserve(size); 6206 // Add indices from the extract value instruction 6207 Idxs.append(I->idx_begin(), I->idx_end()); 6208 6209 // Add requested indices 6210 Idxs.append(idx_range.begin(), idx_range.end()); 6211 6212 assert(Idxs.size() == size 6213 && "Number of indices added not correct?"); 6214 6215 return FindInsertedValue(I->getAggregateOperand(), Idxs, InsertBefore); 6216 } 6217 // Otherwise, we don't know (such as, extracting from a function return value 6218 // or load instruction) 6219 return nullptr; 6220 } 6221 6222 bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP, 6223 unsigned CharSize) { 6224 // Make sure the GEP has exactly three arguments. 6225 if (GEP->getNumOperands() != 3) 6226 return false; 6227 6228 // Make sure the index-ee is a pointer to array of \p CharSize integers. 6229 // CharSize. 6230 ArrayType *AT = dyn_cast<ArrayType>(GEP->getSourceElementType()); 6231 if (!AT || !AT->getElementType()->isIntegerTy(CharSize)) 6232 return false; 6233 6234 // Check to make sure that the first operand of the GEP is an integer and 6235 // has value 0 so that we are sure we're indexing into the initializer. 6236 const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1)); 6237 if (!FirstIdx || !FirstIdx->isZero()) 6238 return false; 6239 6240 return true; 6241 } 6242 6243 // If V refers to an initialized global constant, set Slice either to 6244 // its initializer if the size of its elements equals ElementSize, or, 6245 // for ElementSize == 8, to its representation as an array of unsiged 6246 // char. Return true on success. 6247 // Offset is in the unit "nr of ElementSize sized elements". 6248 bool llvm::getConstantDataArrayInfo(const Value *V, 6249 ConstantDataArraySlice &Slice, 6250 unsigned ElementSize, uint64_t Offset) { 6251 assert(V && "V should not be null."); 6252 assert((ElementSize % 8) == 0 && 6253 "ElementSize expected to be a multiple of the size of a byte."); 6254 unsigned ElementSizeInBytes = ElementSize / 8; 6255 6256 // Drill down into the pointer expression V, ignoring any intervening 6257 // casts, and determine the identity of the object it references along 6258 // with the cumulative byte offset into it. 6259 const GlobalVariable *GV = 6260 dyn_cast<GlobalVariable>(getUnderlyingObject(V)); 6261 if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) 6262 // Fail if V is not based on constant global object. 6263 return false; 6264 6265 const DataLayout &DL = GV->getDataLayout(); 6266 APInt Off(DL.getIndexTypeSizeInBits(V->getType()), 0); 6267 6268 if (GV != V->stripAndAccumulateConstantOffsets(DL, Off, 6269 /*AllowNonInbounds*/ true)) 6270 // Fail if a constant offset could not be determined. 6271 return false; 6272 6273 uint64_t StartIdx = Off.getLimitedValue(); 6274 if (StartIdx == UINT64_MAX) 6275 // Fail if the constant offset is excessive. 6276 return false; 6277 6278 // Off/StartIdx is in the unit of bytes. So we need to convert to number of 6279 // elements. Simply bail out if that isn't possible. 6280 if ((StartIdx % ElementSizeInBytes) != 0) 6281 return false; 6282 6283 Offset += StartIdx / ElementSizeInBytes; 6284 ConstantDataArray *Array = nullptr; 6285 ArrayType *ArrayTy = nullptr; 6286 6287 if (GV->getInitializer()->isNullValue()) { 6288 Type *GVTy = GV->getValueType(); 6289 uint64_t SizeInBytes = DL.getTypeStoreSize(GVTy).getFixedValue(); 6290 uint64_t Length = SizeInBytes / ElementSizeInBytes; 6291 6292 Slice.Array = nullptr; 6293 Slice.Offset = 0; 6294 // Return an empty Slice for undersized constants to let callers 6295 // transform even undefined library calls into simpler, well-defined 6296 // expressions. This is preferable to making the calls although it 6297 // prevents sanitizers from detecting such calls. 6298 Slice.Length = Length < Offset ? 0 : Length - Offset; 6299 return true; 6300 } 6301 6302 auto *Init = const_cast<Constant *>(GV->getInitializer()); 6303 if (auto *ArrayInit = dyn_cast<ConstantDataArray>(Init)) { 6304 Type *InitElTy = ArrayInit->getElementType(); 6305 if (InitElTy->isIntegerTy(ElementSize)) { 6306 // If Init is an initializer for an array of the expected type 6307 // and size, use it as is. 6308 Array = ArrayInit; 6309 ArrayTy = ArrayInit->getType(); 6310 } 6311 } 6312 6313 if (!Array) { 6314 if (ElementSize != 8) 6315 // TODO: Handle conversions to larger integral types. 6316 return false; 6317 6318 // Otherwise extract the portion of the initializer starting 6319 // at Offset as an array of bytes, and reset Offset. 6320 Init = ReadByteArrayFromGlobal(GV, Offset); 6321 if (!Init) 6322 return false; 6323 6324 Offset = 0; 6325 Array = dyn_cast<ConstantDataArray>(Init); 6326 ArrayTy = dyn_cast<ArrayType>(Init->getType()); 6327 } 6328 6329 uint64_t NumElts = ArrayTy->getArrayNumElements(); 6330 if (Offset > NumElts) 6331 return false; 6332 6333 Slice.Array = Array; 6334 Slice.Offset = Offset; 6335 Slice.Length = NumElts - Offset; 6336 return true; 6337 } 6338 6339 /// Extract bytes from the initializer of the constant array V, which need 6340 /// not be a nul-terminated string. On success, store the bytes in Str and 6341 /// return true. When TrimAtNul is set, Str will contain only the bytes up 6342 /// to but not including the first nul. Return false on failure. 6343 bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, 6344 bool TrimAtNul) { 6345 ConstantDataArraySlice Slice; 6346 if (!getConstantDataArrayInfo(V, Slice, 8)) 6347 return false; 6348 6349 if (Slice.Array == nullptr) { 6350 if (TrimAtNul) { 6351 // Return a nul-terminated string even for an empty Slice. This is 6352 // safe because all existing SimplifyLibcalls callers require string 6353 // arguments and the behavior of the functions they fold is undefined 6354 // otherwise. Folding the calls this way is preferable to making 6355 // the undefined library calls, even though it prevents sanitizers 6356 // from reporting such calls. 6357 Str = StringRef(); 6358 return true; 6359 } 6360 if (Slice.Length == 1) { 6361 Str = StringRef("", 1); 6362 return true; 6363 } 6364 // We cannot instantiate a StringRef as we do not have an appropriate string 6365 // of 0s at hand. 6366 return false; 6367 } 6368 6369 // Start out with the entire array in the StringRef. 6370 Str = Slice.Array->getAsString(); 6371 // Skip over 'offset' bytes. 6372 Str = Str.substr(Slice.Offset); 6373 6374 if (TrimAtNul) { 6375 // Trim off the \0 and anything after it. If the array is not nul 6376 // terminated, we just return the whole end of string. The client may know 6377 // some other way that the string is length-bound. 6378 Str = Str.substr(0, Str.find('\0')); 6379 } 6380 return true; 6381 } 6382 6383 // These next two are very similar to the above, but also look through PHI 6384 // nodes. 6385 // TODO: See if we can integrate these two together. 6386 6387 /// If we can compute the length of the string pointed to by 6388 /// the specified pointer, return 'len+1'. If we can't, return 0. 6389 static uint64_t GetStringLengthH(const Value *V, 6390 SmallPtrSetImpl<const PHINode*> &PHIs, 6391 unsigned CharSize) { 6392 // Look through noop bitcast instructions. 6393 V = V->stripPointerCasts(); 6394 6395 // If this is a PHI node, there are two cases: either we have already seen it 6396 // or we haven't. 6397 if (const PHINode *PN = dyn_cast<PHINode>(V)) { 6398 if (!PHIs.insert(PN).second) 6399 return ~0ULL; // already in the set. 6400 6401 // If it was new, see if all the input strings are the same length. 6402 uint64_t LenSoFar = ~0ULL; 6403 for (Value *IncValue : PN->incoming_values()) { 6404 uint64_t Len = GetStringLengthH(IncValue, PHIs, CharSize); 6405 if (Len == 0) return 0; // Unknown length -> unknown. 6406 6407 if (Len == ~0ULL) continue; 6408 6409 if (Len != LenSoFar && LenSoFar != ~0ULL) 6410 return 0; // Disagree -> unknown. 6411 LenSoFar = Len; 6412 } 6413 6414 // Success, all agree. 6415 return LenSoFar; 6416 } 6417 6418 // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y) 6419 if (const SelectInst *SI = dyn_cast<SelectInst>(V)) { 6420 uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs, CharSize); 6421 if (Len1 == 0) return 0; 6422 uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs, CharSize); 6423 if (Len2 == 0) return 0; 6424 if (Len1 == ~0ULL) return Len2; 6425 if (Len2 == ~0ULL) return Len1; 6426 if (Len1 != Len2) return 0; 6427 return Len1; 6428 } 6429 6430 // Otherwise, see if we can read the string. 6431 ConstantDataArraySlice Slice; 6432 if (!getConstantDataArrayInfo(V, Slice, CharSize)) 6433 return 0; 6434 6435 if (Slice.Array == nullptr) 6436 // Zeroinitializer (including an empty one). 6437 return 1; 6438 6439 // Search for the first nul character. Return a conservative result even 6440 // when there is no nul. This is safe since otherwise the string function 6441 // being folded such as strlen is undefined, and can be preferable to 6442 // making the undefined library call. 6443 unsigned NullIndex = 0; 6444 for (unsigned E = Slice.Length; NullIndex < E; ++NullIndex) { 6445 if (Slice.Array->getElementAsInteger(Slice.Offset + NullIndex) == 0) 6446 break; 6447 } 6448 6449 return NullIndex + 1; 6450 } 6451 6452 /// If we can compute the length of the string pointed to by 6453 /// the specified pointer, return 'len+1'. If we can't, return 0. 6454 uint64_t llvm::GetStringLength(const Value *V, unsigned CharSize) { 6455 if (!V->getType()->isPointerTy()) 6456 return 0; 6457 6458 SmallPtrSet<const PHINode*, 32> PHIs; 6459 uint64_t Len = GetStringLengthH(V, PHIs, CharSize); 6460 // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return 6461 // an empty string as a length. 6462 return Len == ~0ULL ? 1 : Len; 6463 } 6464 6465 const Value * 6466 llvm::getArgumentAliasingToReturnedPointer(const CallBase *Call, 6467 bool MustPreserveNullness) { 6468 assert(Call && 6469 "getArgumentAliasingToReturnedPointer only works on nonnull calls"); 6470 if (const Value *RV = Call->getReturnedArgOperand()) 6471 return RV; 6472 // This can be used only as a aliasing property. 6473 if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( 6474 Call, MustPreserveNullness)) 6475 return Call->getArgOperand(0); 6476 return nullptr; 6477 } 6478 6479 bool llvm::isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( 6480 const CallBase *Call, bool MustPreserveNullness) { 6481 switch (Call->getIntrinsicID()) { 6482 case Intrinsic::launder_invariant_group: 6483 case Intrinsic::strip_invariant_group: 6484 case Intrinsic::aarch64_irg: 6485 case Intrinsic::aarch64_tagp: 6486 // The amdgcn_make_buffer_rsrc function does not alter the address of the 6487 // input pointer (and thus preserve null-ness for the purposes of escape 6488 // analysis, which is where the MustPreserveNullness flag comes in to play). 6489 // However, it will not necessarily map ptr addrspace(N) null to ptr 6490 // addrspace(8) null, aka the "null descriptor", which has "all loads return 6491 // 0, all stores are dropped" semantics. Given the context of this intrinsic 6492 // list, no one should be relying on such a strict interpretation of 6493 // MustPreserveNullness (and, at time of writing, they are not), but we 6494 // document this fact out of an abundance of caution. 6495 case Intrinsic::amdgcn_make_buffer_rsrc: 6496 return true; 6497 case Intrinsic::ptrmask: 6498 return !MustPreserveNullness; 6499 case Intrinsic::threadlocal_address: 6500 // The underlying variable changes with thread ID. The Thread ID may change 6501 // at coroutine suspend points. 6502 return !Call->getParent()->getParent()->isPresplitCoroutine(); 6503 default: 6504 return false; 6505 } 6506 } 6507 6508 /// \p PN defines a loop-variant pointer to an object. Check if the 6509 /// previous iteration of the loop was referring to the same object as \p PN. 6510 static bool isSameUnderlyingObjectInLoop(const PHINode *PN, 6511 const LoopInfo *LI) { 6512 // Find the loop-defined value. 6513 Loop *L = LI->getLoopFor(PN->getParent()); 6514 if (PN->getNumIncomingValues() != 2) 6515 return true; 6516 6517 // Find the value from previous iteration. 6518 auto *PrevValue = dyn_cast<Instruction>(PN->getIncomingValue(0)); 6519 if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L) 6520 PrevValue = dyn_cast<Instruction>(PN->getIncomingValue(1)); 6521 if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L) 6522 return true; 6523 6524 // If a new pointer is loaded in the loop, the pointer references a different 6525 // object in every iteration. E.g.: 6526 // for (i) 6527 // int *p = a[i]; 6528 // ... 6529 if (auto *Load = dyn_cast<LoadInst>(PrevValue)) 6530 if (!L->isLoopInvariant(Load->getPointerOperand())) 6531 return false; 6532 return true; 6533 } 6534 6535 const Value *llvm::getUnderlyingObject(const Value *V, unsigned MaxLookup) { 6536 if (!V->getType()->isPointerTy()) 6537 return V; 6538 for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) { 6539 if (auto *GEP = dyn_cast<GEPOperator>(V)) { 6540 V = GEP->getPointerOperand(); 6541 } else if (Operator::getOpcode(V) == Instruction::BitCast || 6542 Operator::getOpcode(V) == Instruction::AddrSpaceCast) { 6543 Value *NewV = cast<Operator>(V)->getOperand(0); 6544 if (!NewV->getType()->isPointerTy()) 6545 return V; 6546 V = NewV; 6547 } else if (auto *GA = dyn_cast<GlobalAlias>(V)) { 6548 if (GA->isInterposable()) 6549 return V; 6550 V = GA->getAliasee(); 6551 } else { 6552 if (auto *PHI = dyn_cast<PHINode>(V)) { 6553 // Look through single-arg phi nodes created by LCSSA. 6554 if (PHI->getNumIncomingValues() == 1) { 6555 V = PHI->getIncomingValue(0); 6556 continue; 6557 } 6558 } else if (auto *Call = dyn_cast<CallBase>(V)) { 6559 // CaptureTracking can know about special capturing properties of some 6560 // intrinsics like launder.invariant.group, that can't be expressed with 6561 // the attributes, but have properties like returning aliasing pointer. 6562 // Because some analysis may assume that nocaptured pointer is not 6563 // returned from some special intrinsic (because function would have to 6564 // be marked with returns attribute), it is crucial to use this function 6565 // because it should be in sync with CaptureTracking. Not using it may 6566 // cause weird miscompilations where 2 aliasing pointers are assumed to 6567 // noalias. 6568 if (auto *RP = getArgumentAliasingToReturnedPointer(Call, false)) { 6569 V = RP; 6570 continue; 6571 } 6572 } 6573 6574 return V; 6575 } 6576 assert(V->getType()->isPointerTy() && "Unexpected operand type!"); 6577 } 6578 return V; 6579 } 6580 6581 void llvm::getUnderlyingObjects(const Value *V, 6582 SmallVectorImpl<const Value *> &Objects, 6583 LoopInfo *LI, unsigned MaxLookup) { 6584 SmallPtrSet<const Value *, 4> Visited; 6585 SmallVector<const Value *, 4> Worklist; 6586 Worklist.push_back(V); 6587 do { 6588 const Value *P = Worklist.pop_back_val(); 6589 P = getUnderlyingObject(P, MaxLookup); 6590 6591 if (!Visited.insert(P).second) 6592 continue; 6593 6594 if (auto *SI = dyn_cast<SelectInst>(P)) { 6595 Worklist.push_back(SI->getTrueValue()); 6596 Worklist.push_back(SI->getFalseValue()); 6597 continue; 6598 } 6599 6600 if (auto *PN = dyn_cast<PHINode>(P)) { 6601 // If this PHI changes the underlying object in every iteration of the 6602 // loop, don't look through it. Consider: 6603 // int **A; 6604 // for (i) { 6605 // Prev = Curr; // Prev = PHI (Prev_0, Curr) 6606 // Curr = A[i]; 6607 // *Prev, *Curr; 6608 // 6609 // Prev is tracking Curr one iteration behind so they refer to different 6610 // underlying objects. 6611 if (!LI || !LI->isLoopHeader(PN->getParent()) || 6612 isSameUnderlyingObjectInLoop(PN, LI)) 6613 append_range(Worklist, PN->incoming_values()); 6614 else 6615 Objects.push_back(P); 6616 continue; 6617 } 6618 6619 Objects.push_back(P); 6620 } while (!Worklist.empty()); 6621 } 6622 6623 const Value *llvm::getUnderlyingObjectAggressive(const Value *V) { 6624 const unsigned MaxVisited = 8; 6625 6626 SmallPtrSet<const Value *, 8> Visited; 6627 SmallVector<const Value *, 8> Worklist; 6628 Worklist.push_back(V); 6629 const Value *Object = nullptr; 6630 // Used as fallback if we can't find a common underlying object through 6631 // recursion. 6632 bool First = true; 6633 const Value *FirstObject = getUnderlyingObject(V); 6634 do { 6635 const Value *P = Worklist.pop_back_val(); 6636 P = First ? FirstObject : getUnderlyingObject(P); 6637 First = false; 6638 6639 if (!Visited.insert(P).second) 6640 continue; 6641 6642 if (Visited.size() == MaxVisited) 6643 return FirstObject; 6644 6645 if (auto *SI = dyn_cast<SelectInst>(P)) { 6646 Worklist.push_back(SI->getTrueValue()); 6647 Worklist.push_back(SI->getFalseValue()); 6648 continue; 6649 } 6650 6651 if (auto *PN = dyn_cast<PHINode>(P)) { 6652 append_range(Worklist, PN->incoming_values()); 6653 continue; 6654 } 6655 6656 if (!Object) 6657 Object = P; 6658 else if (Object != P) 6659 return FirstObject; 6660 } while (!Worklist.empty()); 6661 6662 return Object; 6663 } 6664 6665 /// This is the function that does the work of looking through basic 6666 /// ptrtoint+arithmetic+inttoptr sequences. 6667 static const Value *getUnderlyingObjectFromInt(const Value *V) { 6668 do { 6669 if (const Operator *U = dyn_cast<Operator>(V)) { 6670 // If we find a ptrtoint, we can transfer control back to the 6671 // regular getUnderlyingObjectFromInt. 6672 if (U->getOpcode() == Instruction::PtrToInt) 6673 return U->getOperand(0); 6674 // If we find an add of a constant, a multiplied value, or a phi, it's 6675 // likely that the other operand will lead us to the base 6676 // object. We don't have to worry about the case where the 6677 // object address is somehow being computed by the multiply, 6678 // because our callers only care when the result is an 6679 // identifiable object. 6680 if (U->getOpcode() != Instruction::Add || 6681 (!isa<ConstantInt>(U->getOperand(1)) && 6682 Operator::getOpcode(U->getOperand(1)) != Instruction::Mul && 6683 !isa<PHINode>(U->getOperand(1)))) 6684 return V; 6685 V = U->getOperand(0); 6686 } else { 6687 return V; 6688 } 6689 assert(V->getType()->isIntegerTy() && "Unexpected operand type!"); 6690 } while (true); 6691 } 6692 6693 /// This is a wrapper around getUnderlyingObjects and adds support for basic 6694 /// ptrtoint+arithmetic+inttoptr sequences. 6695 /// It returns false if unidentified object is found in getUnderlyingObjects. 6696 bool llvm::getUnderlyingObjectsForCodeGen(const Value *V, 6697 SmallVectorImpl<Value *> &Objects) { 6698 SmallPtrSet<const Value *, 16> Visited; 6699 SmallVector<const Value *, 4> Working(1, V); 6700 do { 6701 V = Working.pop_back_val(); 6702 6703 SmallVector<const Value *, 4> Objs; 6704 getUnderlyingObjects(V, Objs); 6705 6706 for (const Value *V : Objs) { 6707 if (!Visited.insert(V).second) 6708 continue; 6709 if (Operator::getOpcode(V) == Instruction::IntToPtr) { 6710 const Value *O = 6711 getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0)); 6712 if (O->getType()->isPointerTy()) { 6713 Working.push_back(O); 6714 continue; 6715 } 6716 } 6717 // If getUnderlyingObjects fails to find an identifiable object, 6718 // getUnderlyingObjectsForCodeGen also fails for safety. 6719 if (!isIdentifiedObject(V)) { 6720 Objects.clear(); 6721 return false; 6722 } 6723 Objects.push_back(const_cast<Value *>(V)); 6724 } 6725 } while (!Working.empty()); 6726 return true; 6727 } 6728 6729 AllocaInst *llvm::findAllocaForValue(Value *V, bool OffsetZero) { 6730 AllocaInst *Result = nullptr; 6731 SmallPtrSet<Value *, 4> Visited; 6732 SmallVector<Value *, 4> Worklist; 6733 6734 auto AddWork = [&](Value *V) { 6735 if (Visited.insert(V).second) 6736 Worklist.push_back(V); 6737 }; 6738 6739 AddWork(V); 6740 do { 6741 V = Worklist.pop_back_val(); 6742 assert(Visited.count(V)); 6743 6744 if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { 6745 if (Result && Result != AI) 6746 return nullptr; 6747 Result = AI; 6748 } else if (CastInst *CI = dyn_cast<CastInst>(V)) { 6749 AddWork(CI->getOperand(0)); 6750 } else if (PHINode *PN = dyn_cast<PHINode>(V)) { 6751 for (Value *IncValue : PN->incoming_values()) 6752 AddWork(IncValue); 6753 } else if (auto *SI = dyn_cast<SelectInst>(V)) { 6754 AddWork(SI->getTrueValue()); 6755 AddWork(SI->getFalseValue()); 6756 } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) { 6757 if (OffsetZero && !GEP->hasAllZeroIndices()) 6758 return nullptr; 6759 AddWork(GEP->getPointerOperand()); 6760 } else if (CallBase *CB = dyn_cast<CallBase>(V)) { 6761 Value *Returned = CB->getReturnedArgOperand(); 6762 if (Returned) 6763 AddWork(Returned); 6764 else 6765 return nullptr; 6766 } else { 6767 return nullptr; 6768 } 6769 } while (!Worklist.empty()); 6770 6771 return Result; 6772 } 6773 6774 static bool onlyUsedByLifetimeMarkersOrDroppableInstsHelper( 6775 const Value *V, bool AllowLifetime, bool AllowDroppable) { 6776 for (const User *U : V->users()) { 6777 const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U); 6778 if (!II) 6779 return false; 6780 6781 if (AllowLifetime && II->isLifetimeStartOrEnd()) 6782 continue; 6783 6784 if (AllowDroppable && II->isDroppable()) 6785 continue; 6786 6787 return false; 6788 } 6789 return true; 6790 } 6791 6792 bool llvm::onlyUsedByLifetimeMarkers(const Value *V) { 6793 return onlyUsedByLifetimeMarkersOrDroppableInstsHelper( 6794 V, /* AllowLifetime */ true, /* AllowDroppable */ false); 6795 } 6796 bool llvm::onlyUsedByLifetimeMarkersOrDroppableInsts(const Value *V) { 6797 return onlyUsedByLifetimeMarkersOrDroppableInstsHelper( 6798 V, /* AllowLifetime */ true, /* AllowDroppable */ true); 6799 } 6800 6801 bool llvm::mustSuppressSpeculation(const LoadInst &LI) { 6802 if (!LI.isUnordered()) 6803 return true; 6804 const Function &F = *LI.getFunction(); 6805 // Speculative load may create a race that did not exist in the source. 6806 return F.hasFnAttribute(Attribute::SanitizeThread) || 6807 // Speculative load may load data from dirty regions. 6808 F.hasFnAttribute(Attribute::SanitizeAddress) || 6809 F.hasFnAttribute(Attribute::SanitizeHWAddress); 6810 } 6811 6812 bool llvm::isSafeToSpeculativelyExecute(const Instruction *Inst, 6813 const Instruction *CtxI, 6814 AssumptionCache *AC, 6815 const DominatorTree *DT, 6816 const TargetLibraryInfo *TLI, 6817 bool UseVariableInfo) { 6818 return isSafeToSpeculativelyExecuteWithOpcode(Inst->getOpcode(), Inst, CtxI, 6819 AC, DT, TLI, UseVariableInfo); 6820 } 6821 6822 bool llvm::isSafeToSpeculativelyExecuteWithOpcode( 6823 unsigned Opcode, const Instruction *Inst, const Instruction *CtxI, 6824 AssumptionCache *AC, const DominatorTree *DT, const TargetLibraryInfo *TLI, 6825 bool UseVariableInfo) { 6826 #ifndef NDEBUG 6827 if (Inst->getOpcode() != Opcode) { 6828 // Check that the operands are actually compatible with the Opcode override. 6829 auto hasEqualReturnAndLeadingOperandTypes = 6830 [](const Instruction *Inst, unsigned NumLeadingOperands) { 6831 if (Inst->getNumOperands() < NumLeadingOperands) 6832 return false; 6833 const Type *ExpectedType = Inst->getType(); 6834 for (unsigned ItOp = 0; ItOp < NumLeadingOperands; ++ItOp) 6835 if (Inst->getOperand(ItOp)->getType() != ExpectedType) 6836 return false; 6837 return true; 6838 }; 6839 assert(!Instruction::isBinaryOp(Opcode) || 6840 hasEqualReturnAndLeadingOperandTypes(Inst, 2)); 6841 assert(!Instruction::isUnaryOp(Opcode) || 6842 hasEqualReturnAndLeadingOperandTypes(Inst, 1)); 6843 } 6844 #endif 6845 6846 switch (Opcode) { 6847 default: 6848 return true; 6849 case Instruction::UDiv: 6850 case Instruction::URem: { 6851 // x / y is undefined if y == 0. 6852 const APInt *V; 6853 if (match(Inst->getOperand(1), m_APInt(V))) 6854 return *V != 0; 6855 return false; 6856 } 6857 case Instruction::SDiv: 6858 case Instruction::SRem: { 6859 // x / y is undefined if y == 0 or x == INT_MIN and y == -1 6860 const APInt *Numerator, *Denominator; 6861 if (!match(Inst->getOperand(1), m_APInt(Denominator))) 6862 return false; 6863 // We cannot hoist this division if the denominator is 0. 6864 if (*Denominator == 0) 6865 return false; 6866 // It's safe to hoist if the denominator is not 0 or -1. 6867 if (!Denominator->isAllOnes()) 6868 return true; 6869 // At this point we know that the denominator is -1. It is safe to hoist as 6870 // long we know that the numerator is not INT_MIN. 6871 if (match(Inst->getOperand(0), m_APInt(Numerator))) 6872 return !Numerator->isMinSignedValue(); 6873 // The numerator *might* be MinSignedValue. 6874 return false; 6875 } 6876 case Instruction::Load: { 6877 if (!UseVariableInfo) 6878 return false; 6879 6880 const LoadInst *LI = dyn_cast<LoadInst>(Inst); 6881 if (!LI) 6882 return false; 6883 if (mustSuppressSpeculation(*LI)) 6884 return false; 6885 const DataLayout &DL = LI->getDataLayout(); 6886 return isDereferenceableAndAlignedPointer(LI->getPointerOperand(), 6887 LI->getType(), LI->getAlign(), DL, 6888 CtxI, AC, DT, TLI); 6889 } 6890 case Instruction::Call: { 6891 auto *CI = dyn_cast<const CallInst>(Inst); 6892 if (!CI) 6893 return false; 6894 const Function *Callee = CI->getCalledFunction(); 6895 6896 // The called function could have undefined behavior or side-effects, even 6897 // if marked readnone nounwind. 6898 return Callee && Callee->isSpeculatable(); 6899 } 6900 case Instruction::VAArg: 6901 case Instruction::Alloca: 6902 case Instruction::Invoke: 6903 case Instruction::CallBr: 6904 case Instruction::PHI: 6905 case Instruction::Store: 6906 case Instruction::Ret: 6907 case Instruction::Br: 6908 case Instruction::IndirectBr: 6909 case Instruction::Switch: 6910 case Instruction::Unreachable: 6911 case Instruction::Fence: 6912 case Instruction::AtomicRMW: 6913 case Instruction::AtomicCmpXchg: 6914 case Instruction::LandingPad: 6915 case Instruction::Resume: 6916 case Instruction::CatchSwitch: 6917 case Instruction::CatchPad: 6918 case Instruction::CatchRet: 6919 case Instruction::CleanupPad: 6920 case Instruction::CleanupRet: 6921 return false; // Misc instructions which have effects 6922 } 6923 } 6924 6925 bool llvm::mayHaveNonDefUseDependency(const Instruction &I) { 6926 if (I.mayReadOrWriteMemory()) 6927 // Memory dependency possible 6928 return true; 6929 if (!isSafeToSpeculativelyExecute(&I)) 6930 // Can't move above a maythrow call or infinite loop. Or if an 6931 // inalloca alloca, above a stacksave call. 6932 return true; 6933 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 6934 // 1) Can't reorder two inf-loop calls, even if readonly 6935 // 2) Also can't reorder an inf-loop call below a instruction which isn't 6936 // safe to speculative execute. (Inverse of above) 6937 return true; 6938 return false; 6939 } 6940 6941 /// Convert ConstantRange OverflowResult into ValueTracking OverflowResult. 6942 static OverflowResult mapOverflowResult(ConstantRange::OverflowResult OR) { 6943 switch (OR) { 6944 case ConstantRange::OverflowResult::MayOverflow: 6945 return OverflowResult::MayOverflow; 6946 case ConstantRange::OverflowResult::AlwaysOverflowsLow: 6947 return OverflowResult::AlwaysOverflowsLow; 6948 case ConstantRange::OverflowResult::AlwaysOverflowsHigh: 6949 return OverflowResult::AlwaysOverflowsHigh; 6950 case ConstantRange::OverflowResult::NeverOverflows: 6951 return OverflowResult::NeverOverflows; 6952 } 6953 llvm_unreachable("Unknown OverflowResult"); 6954 } 6955 6956 /// Combine constant ranges from computeConstantRange() and computeKnownBits(). 6957 ConstantRange 6958 llvm::computeConstantRangeIncludingKnownBits(const WithCache<const Value *> &V, 6959 bool ForSigned, 6960 const SimplifyQuery &SQ) { 6961 ConstantRange CR1 = 6962 ConstantRange::fromKnownBits(V.getKnownBits(SQ), ForSigned); 6963 ConstantRange CR2 = computeConstantRange(V, ForSigned, SQ.IIQ.UseInstrInfo); 6964 ConstantRange::PreferredRangeType RangeType = 6965 ForSigned ? ConstantRange::Signed : ConstantRange::Unsigned; 6966 return CR1.intersectWith(CR2, RangeType); 6967 } 6968 6969 OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS, 6970 const Value *RHS, 6971 const SimplifyQuery &SQ, 6972 bool IsNSW) { 6973 KnownBits LHSKnown = computeKnownBits(LHS, /*Depth=*/0, SQ); 6974 KnownBits RHSKnown = computeKnownBits(RHS, /*Depth=*/0, SQ); 6975 6976 // mul nsw of two non-negative numbers is also nuw. 6977 if (IsNSW && LHSKnown.isNonNegative() && RHSKnown.isNonNegative()) 6978 return OverflowResult::NeverOverflows; 6979 6980 ConstantRange LHSRange = ConstantRange::fromKnownBits(LHSKnown, false); 6981 ConstantRange RHSRange = ConstantRange::fromKnownBits(RHSKnown, false); 6982 return mapOverflowResult(LHSRange.unsignedMulMayOverflow(RHSRange)); 6983 } 6984 6985 OverflowResult llvm::computeOverflowForSignedMul(const Value *LHS, 6986 const Value *RHS, 6987 const SimplifyQuery &SQ) { 6988 // Multiplying n * m significant bits yields a result of n + m significant 6989 // bits. If the total number of significant bits does not exceed the 6990 // result bit width (minus 1), there is no overflow. 6991 // This means if we have enough leading sign bits in the operands 6992 // we can guarantee that the result does not overflow. 6993 // Ref: "Hacker's Delight" by Henry Warren 6994 unsigned BitWidth = LHS->getType()->getScalarSizeInBits(); 6995 6996 // Note that underestimating the number of sign bits gives a more 6997 // conservative answer. 6998 unsigned SignBits = 6999 ::ComputeNumSignBits(LHS, 0, SQ) + ::ComputeNumSignBits(RHS, 0, SQ); 7000 7001 // First handle the easy case: if we have enough sign bits there's 7002 // definitely no overflow. 7003 if (SignBits > BitWidth + 1) 7004 return OverflowResult::NeverOverflows; 7005 7006 // There are two ambiguous cases where there can be no overflow: 7007 // SignBits == BitWidth + 1 and 7008 // SignBits == BitWidth 7009 // The second case is difficult to check, therefore we only handle the 7010 // first case. 7011 if (SignBits == BitWidth + 1) { 7012 // It overflows only when both arguments are negative and the true 7013 // product is exactly the minimum negative number. 7014 // E.g. mul i16 with 17 sign bits: 0xff00 * 0xff80 = 0x8000 7015 // For simplicity we just check if at least one side is not negative. 7016 KnownBits LHSKnown = computeKnownBits(LHS, /*Depth=*/0, SQ); 7017 KnownBits RHSKnown = computeKnownBits(RHS, /*Depth=*/0, SQ); 7018 if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative()) 7019 return OverflowResult::NeverOverflows; 7020 } 7021 return OverflowResult::MayOverflow; 7022 } 7023 7024 OverflowResult 7025 llvm::computeOverflowForUnsignedAdd(const WithCache<const Value *> &LHS, 7026 const WithCache<const Value *> &RHS, 7027 const SimplifyQuery &SQ) { 7028 ConstantRange LHSRange = 7029 computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/false, SQ); 7030 ConstantRange RHSRange = 7031 computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/false, SQ); 7032 return mapOverflowResult(LHSRange.unsignedAddMayOverflow(RHSRange)); 7033 } 7034 7035 static OverflowResult 7036 computeOverflowForSignedAdd(const WithCache<const Value *> &LHS, 7037 const WithCache<const Value *> &RHS, 7038 const AddOperator *Add, const SimplifyQuery &SQ) { 7039 if (Add && Add->hasNoSignedWrap()) { 7040 return OverflowResult::NeverOverflows; 7041 } 7042 7043 // If LHS and RHS each have at least two sign bits, the addition will look 7044 // like 7045 // 7046 // XX..... + 7047 // YY..... 7048 // 7049 // If the carry into the most significant position is 0, X and Y can't both 7050 // be 1 and therefore the carry out of the addition is also 0. 7051 // 7052 // If the carry into the most significant position is 1, X and Y can't both 7053 // be 0 and therefore the carry out of the addition is also 1. 7054 // 7055 // Since the carry into the most significant position is always equal to 7056 // the carry out of the addition, there is no signed overflow. 7057 if (::ComputeNumSignBits(LHS, 0, SQ) > 1 && 7058 ::ComputeNumSignBits(RHS, 0, SQ) > 1) 7059 return OverflowResult::NeverOverflows; 7060 7061 ConstantRange LHSRange = 7062 computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/true, SQ); 7063 ConstantRange RHSRange = 7064 computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/true, SQ); 7065 OverflowResult OR = 7066 mapOverflowResult(LHSRange.signedAddMayOverflow(RHSRange)); 7067 if (OR != OverflowResult::MayOverflow) 7068 return OR; 7069 7070 // The remaining code needs Add to be available. Early returns if not so. 7071 if (!Add) 7072 return OverflowResult::MayOverflow; 7073 7074 // If the sign of Add is the same as at least one of the operands, this add 7075 // CANNOT overflow. If this can be determined from the known bits of the 7076 // operands the above signedAddMayOverflow() check will have already done so. 7077 // The only other way to improve on the known bits is from an assumption, so 7078 // call computeKnownBitsFromContext() directly. 7079 bool LHSOrRHSKnownNonNegative = 7080 (LHSRange.isAllNonNegative() || RHSRange.isAllNonNegative()); 7081 bool LHSOrRHSKnownNegative = 7082 (LHSRange.isAllNegative() || RHSRange.isAllNegative()); 7083 if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) { 7084 KnownBits AddKnown(LHSRange.getBitWidth()); 7085 computeKnownBitsFromContext(Add, AddKnown, /*Depth=*/0, SQ); 7086 if ((AddKnown.isNonNegative() && LHSOrRHSKnownNonNegative) || 7087 (AddKnown.isNegative() && LHSOrRHSKnownNegative)) 7088 return OverflowResult::NeverOverflows; 7089 } 7090 7091 return OverflowResult::MayOverflow; 7092 } 7093 7094 OverflowResult llvm::computeOverflowForUnsignedSub(const Value *LHS, 7095 const Value *RHS, 7096 const SimplifyQuery &SQ) { 7097 // X - (X % ?) 7098 // The remainder of a value can't have greater magnitude than itself, 7099 // so the subtraction can't overflow. 7100 7101 // X - (X -nuw ?) 7102 // In the minimal case, this would simplify to "?", so there's no subtract 7103 // at all. But if this analysis is used to peek through casts, for example, 7104 // then determining no-overflow may allow other transforms. 7105 7106 // TODO: There are other patterns like this. 7107 // See simplifyICmpWithBinOpOnLHS() for candidates. 7108 if (match(RHS, m_URem(m_Specific(LHS), m_Value())) || 7109 match(RHS, m_NUWSub(m_Specific(LHS), m_Value()))) 7110 if (isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT)) 7111 return OverflowResult::NeverOverflows; 7112 7113 // Checking for conditions implied by dominating conditions may be expensive. 7114 // Limit it to usub_with_overflow calls for now. 7115 if (match(SQ.CxtI, 7116 m_Intrinsic<Intrinsic::usub_with_overflow>(m_Value(), m_Value()))) 7117 if (auto C = isImpliedByDomCondition(CmpInst::ICMP_UGE, LHS, RHS, SQ.CxtI, 7118 SQ.DL)) { 7119 if (*C) 7120 return OverflowResult::NeverOverflows; 7121 return OverflowResult::AlwaysOverflowsLow; 7122 } 7123 ConstantRange LHSRange = 7124 computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/false, SQ); 7125 ConstantRange RHSRange = 7126 computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/false, SQ); 7127 return mapOverflowResult(LHSRange.unsignedSubMayOverflow(RHSRange)); 7128 } 7129 7130 OverflowResult llvm::computeOverflowForSignedSub(const Value *LHS, 7131 const Value *RHS, 7132 const SimplifyQuery &SQ) { 7133 // X - (X % ?) 7134 // The remainder of a value can't have greater magnitude than itself, 7135 // so the subtraction can't overflow. 7136 7137 // X - (X -nsw ?) 7138 // In the minimal case, this would simplify to "?", so there's no subtract 7139 // at all. But if this analysis is used to peek through casts, for example, 7140 // then determining no-overflow may allow other transforms. 7141 if (match(RHS, m_SRem(m_Specific(LHS), m_Value())) || 7142 match(RHS, m_NSWSub(m_Specific(LHS), m_Value()))) 7143 if (isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT)) 7144 return OverflowResult::NeverOverflows; 7145 7146 // If LHS and RHS each have at least two sign bits, the subtraction 7147 // cannot overflow. 7148 if (::ComputeNumSignBits(LHS, 0, SQ) > 1 && 7149 ::ComputeNumSignBits(RHS, 0, SQ) > 1) 7150 return OverflowResult::NeverOverflows; 7151 7152 ConstantRange LHSRange = 7153 computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/true, SQ); 7154 ConstantRange RHSRange = 7155 computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/true, SQ); 7156 return mapOverflowResult(LHSRange.signedSubMayOverflow(RHSRange)); 7157 } 7158 7159 bool llvm::isOverflowIntrinsicNoWrap(const WithOverflowInst *WO, 7160 const DominatorTree &DT) { 7161 SmallVector<const BranchInst *, 2> GuardingBranches; 7162 SmallVector<const ExtractValueInst *, 2> Results; 7163 7164 for (const User *U : WO->users()) { 7165 if (const auto *EVI = dyn_cast<ExtractValueInst>(U)) { 7166 assert(EVI->getNumIndices() == 1 && "Obvious from CI's type"); 7167 7168 if (EVI->getIndices()[0] == 0) 7169 Results.push_back(EVI); 7170 else { 7171 assert(EVI->getIndices()[0] == 1 && "Obvious from CI's type"); 7172 7173 for (const auto *U : EVI->users()) 7174 if (const auto *B = dyn_cast<BranchInst>(U)) { 7175 assert(B->isConditional() && "How else is it using an i1?"); 7176 GuardingBranches.push_back(B); 7177 } 7178 } 7179 } else { 7180 // We are using the aggregate directly in a way we don't want to analyze 7181 // here (storing it to a global, say). 7182 return false; 7183 } 7184 } 7185 7186 auto AllUsesGuardedByBranch = [&](const BranchInst *BI) { 7187 BasicBlockEdge NoWrapEdge(BI->getParent(), BI->getSuccessor(1)); 7188 if (!NoWrapEdge.isSingleEdge()) 7189 return false; 7190 7191 // Check if all users of the add are provably no-wrap. 7192 for (const auto *Result : Results) { 7193 // If the extractvalue itself is not executed on overflow, the we don't 7194 // need to check each use separately, since domination is transitive. 7195 if (DT.dominates(NoWrapEdge, Result->getParent())) 7196 continue; 7197 7198 for (const auto &RU : Result->uses()) 7199 if (!DT.dominates(NoWrapEdge, RU)) 7200 return false; 7201 } 7202 7203 return true; 7204 }; 7205 7206 return llvm::any_of(GuardingBranches, AllUsesGuardedByBranch); 7207 } 7208 7209 /// Shifts return poison if shiftwidth is larger than the bitwidth. 7210 static bool shiftAmountKnownInRange(const Value *ShiftAmount) { 7211 auto *C = dyn_cast<Constant>(ShiftAmount); 7212 if (!C) 7213 return false; 7214 7215 // Shifts return poison if shiftwidth is larger than the bitwidth. 7216 SmallVector<const Constant *, 4> ShiftAmounts; 7217 if (auto *FVTy = dyn_cast<FixedVectorType>(C->getType())) { 7218 unsigned NumElts = FVTy->getNumElements(); 7219 for (unsigned i = 0; i < NumElts; ++i) 7220 ShiftAmounts.push_back(C->getAggregateElement(i)); 7221 } else if (isa<ScalableVectorType>(C->getType())) 7222 return false; // Can't tell, just return false to be safe 7223 else 7224 ShiftAmounts.push_back(C); 7225 7226 bool Safe = llvm::all_of(ShiftAmounts, [](const Constant *C) { 7227 auto *CI = dyn_cast_or_null<ConstantInt>(C); 7228 return CI && CI->getValue().ult(C->getType()->getIntegerBitWidth()); 7229 }); 7230 7231 return Safe; 7232 } 7233 7234 enum class UndefPoisonKind { 7235 PoisonOnly = (1 << 0), 7236 UndefOnly = (1 << 1), 7237 UndefOrPoison = PoisonOnly | UndefOnly, 7238 }; 7239 7240 static bool includesPoison(UndefPoisonKind Kind) { 7241 return (unsigned(Kind) & unsigned(UndefPoisonKind::PoisonOnly)) != 0; 7242 } 7243 7244 static bool includesUndef(UndefPoisonKind Kind) { 7245 return (unsigned(Kind) & unsigned(UndefPoisonKind::UndefOnly)) != 0; 7246 } 7247 7248 static bool canCreateUndefOrPoison(const Operator *Op, UndefPoisonKind Kind, 7249 bool ConsiderFlagsAndMetadata) { 7250 7251 if (ConsiderFlagsAndMetadata && includesPoison(Kind) && 7252 Op->hasPoisonGeneratingAnnotations()) 7253 return true; 7254 7255 unsigned Opcode = Op->getOpcode(); 7256 7257 // Check whether opcode is a poison/undef-generating operation 7258 switch (Opcode) { 7259 case Instruction::Shl: 7260 case Instruction::AShr: 7261 case Instruction::LShr: 7262 return includesPoison(Kind) && !shiftAmountKnownInRange(Op->getOperand(1)); 7263 case Instruction::FPToSI: 7264 case Instruction::FPToUI: 7265 // fptosi/ui yields poison if the resulting value does not fit in the 7266 // destination type. 7267 return true; 7268 case Instruction::Call: 7269 if (auto *II = dyn_cast<IntrinsicInst>(Op)) { 7270 switch (II->getIntrinsicID()) { 7271 // TODO: Add more intrinsics. 7272 case Intrinsic::ctlz: 7273 case Intrinsic::cttz: 7274 case Intrinsic::abs: 7275 if (cast<ConstantInt>(II->getArgOperand(1))->isNullValue()) 7276 return false; 7277 break; 7278 case Intrinsic::ctpop: 7279 case Intrinsic::bswap: 7280 case Intrinsic::bitreverse: 7281 case Intrinsic::fshl: 7282 case Intrinsic::fshr: 7283 case Intrinsic::smax: 7284 case Intrinsic::smin: 7285 case Intrinsic::umax: 7286 case Intrinsic::umin: 7287 case Intrinsic::ptrmask: 7288 case Intrinsic::fptoui_sat: 7289 case Intrinsic::fptosi_sat: 7290 case Intrinsic::sadd_with_overflow: 7291 case Intrinsic::ssub_with_overflow: 7292 case Intrinsic::smul_with_overflow: 7293 case Intrinsic::uadd_with_overflow: 7294 case Intrinsic::usub_with_overflow: 7295 case Intrinsic::umul_with_overflow: 7296 case Intrinsic::sadd_sat: 7297 case Intrinsic::uadd_sat: 7298 case Intrinsic::ssub_sat: 7299 case Intrinsic::usub_sat: 7300 return false; 7301 case Intrinsic::sshl_sat: 7302 case Intrinsic::ushl_sat: 7303 return includesPoison(Kind) && 7304 !shiftAmountKnownInRange(II->getArgOperand(1)); 7305 case Intrinsic::fma: 7306 case Intrinsic::fmuladd: 7307 case Intrinsic::sqrt: 7308 case Intrinsic::powi: 7309 case Intrinsic::sin: 7310 case Intrinsic::cos: 7311 case Intrinsic::pow: 7312 case Intrinsic::log: 7313 case Intrinsic::log10: 7314 case Intrinsic::log2: 7315 case Intrinsic::exp: 7316 case Intrinsic::exp2: 7317 case Intrinsic::exp10: 7318 case Intrinsic::fabs: 7319 case Intrinsic::copysign: 7320 case Intrinsic::floor: 7321 case Intrinsic::ceil: 7322 case Intrinsic::trunc: 7323 case Intrinsic::rint: 7324 case Intrinsic::nearbyint: 7325 case Intrinsic::round: 7326 case Intrinsic::roundeven: 7327 case Intrinsic::fptrunc_round: 7328 case Intrinsic::canonicalize: 7329 case Intrinsic::arithmetic_fence: 7330 case Intrinsic::minnum: 7331 case Intrinsic::maxnum: 7332 case Intrinsic::minimum: 7333 case Intrinsic::maximum: 7334 case Intrinsic::is_fpclass: 7335 case Intrinsic::ldexp: 7336 case Intrinsic::frexp: 7337 return false; 7338 case Intrinsic::lround: 7339 case Intrinsic::llround: 7340 case Intrinsic::lrint: 7341 case Intrinsic::llrint: 7342 // If the value doesn't fit an unspecified value is returned (but this 7343 // is not poison). 7344 return false; 7345 } 7346 } 7347 [[fallthrough]]; 7348 case Instruction::CallBr: 7349 case Instruction::Invoke: { 7350 const auto *CB = cast<CallBase>(Op); 7351 return !CB->hasRetAttr(Attribute::NoUndef); 7352 } 7353 case Instruction::InsertElement: 7354 case Instruction::ExtractElement: { 7355 // If index exceeds the length of the vector, it returns poison 7356 auto *VTy = cast<VectorType>(Op->getOperand(0)->getType()); 7357 unsigned IdxOp = Op->getOpcode() == Instruction::InsertElement ? 2 : 1; 7358 auto *Idx = dyn_cast<ConstantInt>(Op->getOperand(IdxOp)); 7359 if (includesPoison(Kind)) 7360 return !Idx || 7361 Idx->getValue().uge(VTy->getElementCount().getKnownMinValue()); 7362 return false; 7363 } 7364 case Instruction::ShuffleVector: { 7365 ArrayRef<int> Mask = isa<ConstantExpr>(Op) 7366 ? cast<ConstantExpr>(Op)->getShuffleMask() 7367 : cast<ShuffleVectorInst>(Op)->getShuffleMask(); 7368 return includesPoison(Kind) && is_contained(Mask, PoisonMaskElem); 7369 } 7370 case Instruction::FNeg: 7371 case Instruction::PHI: 7372 case Instruction::Select: 7373 case Instruction::URem: 7374 case Instruction::SRem: 7375 case Instruction::ExtractValue: 7376 case Instruction::InsertValue: 7377 case Instruction::Freeze: 7378 case Instruction::ICmp: 7379 case Instruction::FCmp: 7380 case Instruction::FAdd: 7381 case Instruction::FSub: 7382 case Instruction::FMul: 7383 case Instruction::FDiv: 7384 case Instruction::FRem: 7385 return false; 7386 case Instruction::GetElementPtr: 7387 // inbounds is handled above 7388 // TODO: what about inrange on constexpr? 7389 return false; 7390 default: { 7391 const auto *CE = dyn_cast<ConstantExpr>(Op); 7392 if (isa<CastInst>(Op) || (CE && CE->isCast())) 7393 return false; 7394 else if (Instruction::isBinaryOp(Opcode)) 7395 return false; 7396 // Be conservative and return true. 7397 return true; 7398 } 7399 } 7400 } 7401 7402 bool llvm::canCreateUndefOrPoison(const Operator *Op, 7403 bool ConsiderFlagsAndMetadata) { 7404 return ::canCreateUndefOrPoison(Op, UndefPoisonKind::UndefOrPoison, 7405 ConsiderFlagsAndMetadata); 7406 } 7407 7408 bool llvm::canCreatePoison(const Operator *Op, bool ConsiderFlagsAndMetadata) { 7409 return ::canCreateUndefOrPoison(Op, UndefPoisonKind::PoisonOnly, 7410 ConsiderFlagsAndMetadata); 7411 } 7412 7413 static bool directlyImpliesPoison(const Value *ValAssumedPoison, const Value *V, 7414 unsigned Depth) { 7415 if (ValAssumedPoison == V) 7416 return true; 7417 7418 const unsigned MaxDepth = 2; 7419 if (Depth >= MaxDepth) 7420 return false; 7421 7422 if (const auto *I = dyn_cast<Instruction>(V)) { 7423 if (any_of(I->operands(), [=](const Use &Op) { 7424 return propagatesPoison(Op) && 7425 directlyImpliesPoison(ValAssumedPoison, Op, Depth + 1); 7426 })) 7427 return true; 7428 7429 // V = extractvalue V0, idx 7430 // V2 = extractvalue V0, idx2 7431 // V0's elements are all poison or not. (e.g., add_with_overflow) 7432 const WithOverflowInst *II; 7433 if (match(I, m_ExtractValue(m_WithOverflowInst(II))) && 7434 (match(ValAssumedPoison, m_ExtractValue(m_Specific(II))) || 7435 llvm::is_contained(II->args(), ValAssumedPoison))) 7436 return true; 7437 } 7438 return false; 7439 } 7440 7441 static bool impliesPoison(const Value *ValAssumedPoison, const Value *V, 7442 unsigned Depth) { 7443 if (isGuaranteedNotToBePoison(ValAssumedPoison)) 7444 return true; 7445 7446 if (directlyImpliesPoison(ValAssumedPoison, V, /* Depth */ 0)) 7447 return true; 7448 7449 const unsigned MaxDepth = 2; 7450 if (Depth >= MaxDepth) 7451 return false; 7452 7453 const auto *I = dyn_cast<Instruction>(ValAssumedPoison); 7454 if (I && !canCreatePoison(cast<Operator>(I))) { 7455 return all_of(I->operands(), [=](const Value *Op) { 7456 return impliesPoison(Op, V, Depth + 1); 7457 }); 7458 } 7459 return false; 7460 } 7461 7462 bool llvm::impliesPoison(const Value *ValAssumedPoison, const Value *V) { 7463 return ::impliesPoison(ValAssumedPoison, V, /* Depth */ 0); 7464 } 7465 7466 static bool programUndefinedIfUndefOrPoison(const Value *V, bool PoisonOnly); 7467 7468 static bool isGuaranteedNotToBeUndefOrPoison( 7469 const Value *V, AssumptionCache *AC, const Instruction *CtxI, 7470 const DominatorTree *DT, unsigned Depth, UndefPoisonKind Kind) { 7471 if (Depth >= MaxAnalysisRecursionDepth) 7472 return false; 7473 7474 if (isa<MetadataAsValue>(V)) 7475 return false; 7476 7477 if (const auto *A = dyn_cast<Argument>(V)) { 7478 if (A->hasAttribute(Attribute::NoUndef) || 7479 A->hasAttribute(Attribute::Dereferenceable) || 7480 A->hasAttribute(Attribute::DereferenceableOrNull)) 7481 return true; 7482 } 7483 7484 if (auto *C = dyn_cast<Constant>(V)) { 7485 if (isa<PoisonValue>(C)) 7486 return !includesPoison(Kind); 7487 7488 if (isa<UndefValue>(C)) 7489 return !includesUndef(Kind); 7490 7491 if (isa<ConstantInt>(C) || isa<GlobalVariable>(C) || isa<ConstantFP>(V) || 7492 isa<ConstantPointerNull>(C) || isa<Function>(C)) 7493 return true; 7494 7495 if (C->getType()->isVectorTy() && !isa<ConstantExpr>(C)) { 7496 if (includesUndef(Kind) && C->containsUndefElement()) 7497 return false; 7498 if (includesPoison(Kind) && C->containsPoisonElement()) 7499 return false; 7500 return !C->containsConstantExpression(); 7501 } 7502 } 7503 7504 // Strip cast operations from a pointer value. 7505 // Note that stripPointerCastsSameRepresentation can strip off getelementptr 7506 // inbounds with zero offset. To guarantee that the result isn't poison, the 7507 // stripped pointer is checked as it has to be pointing into an allocated 7508 // object or be null `null` to ensure `inbounds` getelement pointers with a 7509 // zero offset could not produce poison. 7510 // It can strip off addrspacecast that do not change bit representation as 7511 // well. We believe that such addrspacecast is equivalent to no-op. 7512 auto *StrippedV = V->stripPointerCastsSameRepresentation(); 7513 if (isa<AllocaInst>(StrippedV) || isa<GlobalVariable>(StrippedV) || 7514 isa<Function>(StrippedV) || isa<ConstantPointerNull>(StrippedV)) 7515 return true; 7516 7517 auto OpCheck = [&](const Value *V) { 7518 return isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth + 1, Kind); 7519 }; 7520 7521 if (auto *Opr = dyn_cast<Operator>(V)) { 7522 // If the value is a freeze instruction, then it can never 7523 // be undef or poison. 7524 if (isa<FreezeInst>(V)) 7525 return true; 7526 7527 if (const auto *CB = dyn_cast<CallBase>(V)) { 7528 if (CB->hasRetAttr(Attribute::NoUndef) || 7529 CB->hasRetAttr(Attribute::Dereferenceable) || 7530 CB->hasRetAttr(Attribute::DereferenceableOrNull)) 7531 return true; 7532 } 7533 7534 if (const auto *PN = dyn_cast<PHINode>(V)) { 7535 unsigned Num = PN->getNumIncomingValues(); 7536 bool IsWellDefined = true; 7537 for (unsigned i = 0; i < Num; ++i) { 7538 auto *TI = PN->getIncomingBlock(i)->getTerminator(); 7539 if (!isGuaranteedNotToBeUndefOrPoison(PN->getIncomingValue(i), AC, TI, 7540 DT, Depth + 1, Kind)) { 7541 IsWellDefined = false; 7542 break; 7543 } 7544 } 7545 if (IsWellDefined) 7546 return true; 7547 } else if (!::canCreateUndefOrPoison(Opr, Kind, 7548 /*ConsiderFlagsAndMetadata*/ true) && 7549 all_of(Opr->operands(), OpCheck)) 7550 return true; 7551 } 7552 7553 if (auto *I = dyn_cast<LoadInst>(V)) 7554 if (I->hasMetadata(LLVMContext::MD_noundef) || 7555 I->hasMetadata(LLVMContext::MD_dereferenceable) || 7556 I->hasMetadata(LLVMContext::MD_dereferenceable_or_null)) 7557 return true; 7558 7559 if (programUndefinedIfUndefOrPoison(V, !includesUndef(Kind))) 7560 return true; 7561 7562 // CxtI may be null or a cloned instruction. 7563 if (!CtxI || !CtxI->getParent() || !DT) 7564 return false; 7565 7566 auto *DNode = DT->getNode(CtxI->getParent()); 7567 if (!DNode) 7568 // Unreachable block 7569 return false; 7570 7571 // If V is used as a branch condition before reaching CtxI, V cannot be 7572 // undef or poison. 7573 // br V, BB1, BB2 7574 // BB1: 7575 // CtxI ; V cannot be undef or poison here 7576 auto *Dominator = DNode->getIDom(); 7577 // This check is purely for compile time reasons: we can skip the IDom walk 7578 // if what we are checking for includes undef and the value is not an integer. 7579 if (!includesUndef(Kind) || V->getType()->isIntegerTy()) 7580 while (Dominator) { 7581 auto *TI = Dominator->getBlock()->getTerminator(); 7582 7583 Value *Cond = nullptr; 7584 if (auto BI = dyn_cast_or_null<BranchInst>(TI)) { 7585 if (BI->isConditional()) 7586 Cond = BI->getCondition(); 7587 } else if (auto SI = dyn_cast_or_null<SwitchInst>(TI)) { 7588 Cond = SI->getCondition(); 7589 } 7590 7591 if (Cond) { 7592 if (Cond == V) 7593 return true; 7594 else if (!includesUndef(Kind) && isa<Operator>(Cond)) { 7595 // For poison, we can analyze further 7596 auto *Opr = cast<Operator>(Cond); 7597 if (any_of(Opr->operands(), [V](const Use &U) { 7598 return V == U && propagatesPoison(U); 7599 })) 7600 return true; 7601 } 7602 } 7603 7604 Dominator = Dominator->getIDom(); 7605 } 7606 7607 if (getKnowledgeValidInContext(V, {Attribute::NoUndef}, CtxI, DT, AC)) 7608 return true; 7609 7610 return false; 7611 } 7612 7613 bool llvm::isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC, 7614 const Instruction *CtxI, 7615 const DominatorTree *DT, 7616 unsigned Depth) { 7617 return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth, 7618 UndefPoisonKind::UndefOrPoison); 7619 } 7620 7621 bool llvm::isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC, 7622 const Instruction *CtxI, 7623 const DominatorTree *DT, unsigned Depth) { 7624 return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth, 7625 UndefPoisonKind::PoisonOnly); 7626 } 7627 7628 bool llvm::isGuaranteedNotToBeUndef(const Value *V, AssumptionCache *AC, 7629 const Instruction *CtxI, 7630 const DominatorTree *DT, unsigned Depth) { 7631 return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth, 7632 UndefPoisonKind::UndefOnly); 7633 } 7634 7635 /// Return true if undefined behavior would provably be executed on the path to 7636 /// OnPathTo if Root produced a posion result. Note that this doesn't say 7637 /// anything about whether OnPathTo is actually executed or whether Root is 7638 /// actually poison. This can be used to assess whether a new use of Root can 7639 /// be added at a location which is control equivalent with OnPathTo (such as 7640 /// immediately before it) without introducing UB which didn't previously 7641 /// exist. Note that a false result conveys no information. 7642 bool llvm::mustExecuteUBIfPoisonOnPathTo(Instruction *Root, 7643 Instruction *OnPathTo, 7644 DominatorTree *DT) { 7645 // Basic approach is to assume Root is poison, propagate poison forward 7646 // through all users we can easily track, and then check whether any of those 7647 // users are provable UB and must execute before out exiting block might 7648 // exit. 7649 7650 // The set of all recursive users we've visited (which are assumed to all be 7651 // poison because of said visit) 7652 SmallSet<const Value *, 16> KnownPoison; 7653 SmallVector<const Instruction*, 16> Worklist; 7654 Worklist.push_back(Root); 7655 while (!Worklist.empty()) { 7656 const Instruction *I = Worklist.pop_back_val(); 7657 7658 // If we know this must trigger UB on a path leading our target. 7659 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo)) 7660 return true; 7661 7662 // If we can't analyze propagation through this instruction, just skip it 7663 // and transitive users. Safe as false is a conservative result. 7664 if (I != Root && !any_of(I->operands(), [&KnownPoison](const Use &U) { 7665 return KnownPoison.contains(U) && propagatesPoison(U); 7666 })) 7667 continue; 7668 7669 if (KnownPoison.insert(I).second) 7670 for (const User *User : I->users()) 7671 Worklist.push_back(cast<Instruction>(User)); 7672 } 7673 7674 // Might be non-UB, or might have a path we couldn't prove must execute on 7675 // way to exiting bb. 7676 return false; 7677 } 7678 7679 OverflowResult llvm::computeOverflowForSignedAdd(const AddOperator *Add, 7680 const SimplifyQuery &SQ) { 7681 return ::computeOverflowForSignedAdd(Add->getOperand(0), Add->getOperand(1), 7682 Add, SQ); 7683 } 7684 7685 OverflowResult 7686 llvm::computeOverflowForSignedAdd(const WithCache<const Value *> &LHS, 7687 const WithCache<const Value *> &RHS, 7688 const SimplifyQuery &SQ) { 7689 return ::computeOverflowForSignedAdd(LHS, RHS, nullptr, SQ); 7690 } 7691 7692 bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) { 7693 // Note: An atomic operation isn't guaranteed to return in a reasonable amount 7694 // of time because it's possible for another thread to interfere with it for an 7695 // arbitrary length of time, but programs aren't allowed to rely on that. 7696 7697 // If there is no successor, then execution can't transfer to it. 7698 if (isa<ReturnInst>(I)) 7699 return false; 7700 if (isa<UnreachableInst>(I)) 7701 return false; 7702 7703 // Note: Do not add new checks here; instead, change Instruction::mayThrow or 7704 // Instruction::willReturn. 7705 // 7706 // FIXME: Move this check into Instruction::willReturn. 7707 if (isa<CatchPadInst>(I)) { 7708 switch (classifyEHPersonality(I->getFunction()->getPersonalityFn())) { 7709 default: 7710 // A catchpad may invoke exception object constructors and such, which 7711 // in some languages can be arbitrary code, so be conservative by default. 7712 return false; 7713 case EHPersonality::CoreCLR: 7714 // For CoreCLR, it just involves a type test. 7715 return true; 7716 } 7717 } 7718 7719 // An instruction that returns without throwing must transfer control flow 7720 // to a successor. 7721 return !I->mayThrow() && I->willReturn(); 7722 } 7723 7724 bool llvm::isGuaranteedToTransferExecutionToSuccessor(const BasicBlock *BB) { 7725 // TODO: This is slightly conservative for invoke instruction since exiting 7726 // via an exception *is* normal control for them. 7727 for (const Instruction &I : *BB) 7728 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 7729 return false; 7730 return true; 7731 } 7732 7733 bool llvm::isGuaranteedToTransferExecutionToSuccessor( 7734 BasicBlock::const_iterator Begin, BasicBlock::const_iterator End, 7735 unsigned ScanLimit) { 7736 return isGuaranteedToTransferExecutionToSuccessor(make_range(Begin, End), 7737 ScanLimit); 7738 } 7739 7740 bool llvm::isGuaranteedToTransferExecutionToSuccessor( 7741 iterator_range<BasicBlock::const_iterator> Range, unsigned ScanLimit) { 7742 assert(ScanLimit && "scan limit must be non-zero"); 7743 for (const Instruction &I : Range) { 7744 if (isa<DbgInfoIntrinsic>(I)) 7745 continue; 7746 if (--ScanLimit == 0) 7747 return false; 7748 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 7749 return false; 7750 } 7751 return true; 7752 } 7753 7754 bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I, 7755 const Loop *L) { 7756 // The loop header is guaranteed to be executed for every iteration. 7757 // 7758 // FIXME: Relax this constraint to cover all basic blocks that are 7759 // guaranteed to be executed at every iteration. 7760 if (I->getParent() != L->getHeader()) return false; 7761 7762 for (const Instruction &LI : *L->getHeader()) { 7763 if (&LI == I) return true; 7764 if (!isGuaranteedToTransferExecutionToSuccessor(&LI)) return false; 7765 } 7766 llvm_unreachable("Instruction not contained in its own parent basic block."); 7767 } 7768 7769 bool llvm::propagatesPoison(const Use &PoisonOp) { 7770 const Operator *I = cast<Operator>(PoisonOp.getUser()); 7771 switch (I->getOpcode()) { 7772 case Instruction::Freeze: 7773 case Instruction::PHI: 7774 case Instruction::Invoke: 7775 return false; 7776 case Instruction::Select: 7777 return PoisonOp.getOperandNo() == 0; 7778 case Instruction::Call: 7779 if (auto *II = dyn_cast<IntrinsicInst>(I)) { 7780 switch (II->getIntrinsicID()) { 7781 // TODO: Add more intrinsics. 7782 case Intrinsic::sadd_with_overflow: 7783 case Intrinsic::ssub_with_overflow: 7784 case Intrinsic::smul_with_overflow: 7785 case Intrinsic::uadd_with_overflow: 7786 case Intrinsic::usub_with_overflow: 7787 case Intrinsic::umul_with_overflow: 7788 // If an input is a vector containing a poison element, the 7789 // two output vectors (calculated results, overflow bits)' 7790 // corresponding lanes are poison. 7791 return true; 7792 case Intrinsic::ctpop: 7793 case Intrinsic::ctlz: 7794 case Intrinsic::cttz: 7795 case Intrinsic::abs: 7796 case Intrinsic::smax: 7797 case Intrinsic::smin: 7798 case Intrinsic::umax: 7799 case Intrinsic::umin: 7800 case Intrinsic::bitreverse: 7801 case Intrinsic::bswap: 7802 case Intrinsic::sadd_sat: 7803 case Intrinsic::ssub_sat: 7804 case Intrinsic::sshl_sat: 7805 case Intrinsic::uadd_sat: 7806 case Intrinsic::usub_sat: 7807 case Intrinsic::ushl_sat: 7808 return true; 7809 } 7810 } 7811 return false; 7812 case Instruction::ICmp: 7813 case Instruction::FCmp: 7814 case Instruction::GetElementPtr: 7815 return true; 7816 default: 7817 if (isa<BinaryOperator>(I) || isa<UnaryOperator>(I) || isa<CastInst>(I)) 7818 return true; 7819 7820 // Be conservative and return false. 7821 return false; 7822 } 7823 } 7824 7825 /// Enumerates all operands of \p I that are guaranteed to not be undef or 7826 /// poison. If the callback \p Handle returns true, stop processing and return 7827 /// true. Otherwise, return false. 7828 template <typename CallableT> 7829 static bool handleGuaranteedWellDefinedOps(const Instruction *I, 7830 const CallableT &Handle) { 7831 switch (I->getOpcode()) { 7832 case Instruction::Store: 7833 if (Handle(cast<StoreInst>(I)->getPointerOperand())) 7834 return true; 7835 break; 7836 7837 case Instruction::Load: 7838 if (Handle(cast<LoadInst>(I)->getPointerOperand())) 7839 return true; 7840 break; 7841 7842 // Since dereferenceable attribute imply noundef, atomic operations 7843 // also implicitly have noundef pointers too 7844 case Instruction::AtomicCmpXchg: 7845 if (Handle(cast<AtomicCmpXchgInst>(I)->getPointerOperand())) 7846 return true; 7847 break; 7848 7849 case Instruction::AtomicRMW: 7850 if (Handle(cast<AtomicRMWInst>(I)->getPointerOperand())) 7851 return true; 7852 break; 7853 7854 case Instruction::Call: 7855 case Instruction::Invoke: { 7856 const CallBase *CB = cast<CallBase>(I); 7857 if (CB->isIndirectCall() && Handle(CB->getCalledOperand())) 7858 return true; 7859 for (unsigned i = 0; i < CB->arg_size(); ++i) 7860 if ((CB->paramHasAttr(i, Attribute::NoUndef) || 7861 CB->paramHasAttr(i, Attribute::Dereferenceable) || 7862 CB->paramHasAttr(i, Attribute::DereferenceableOrNull)) && 7863 Handle(CB->getArgOperand(i))) 7864 return true; 7865 break; 7866 } 7867 case Instruction::Ret: 7868 if (I->getFunction()->hasRetAttribute(Attribute::NoUndef) && 7869 Handle(I->getOperand(0))) 7870 return true; 7871 break; 7872 case Instruction::Switch: 7873 if (Handle(cast<SwitchInst>(I)->getCondition())) 7874 return true; 7875 break; 7876 case Instruction::Br: { 7877 auto *BR = cast<BranchInst>(I); 7878 if (BR->isConditional() && Handle(BR->getCondition())) 7879 return true; 7880 break; 7881 } 7882 default: 7883 break; 7884 } 7885 7886 return false; 7887 } 7888 7889 void llvm::getGuaranteedWellDefinedOps( 7890 const Instruction *I, SmallVectorImpl<const Value *> &Operands) { 7891 handleGuaranteedWellDefinedOps(I, [&](const Value *V) { 7892 Operands.push_back(V); 7893 return false; 7894 }); 7895 } 7896 7897 /// Enumerates all operands of \p I that are guaranteed to not be poison. 7898 template <typename CallableT> 7899 static bool handleGuaranteedNonPoisonOps(const Instruction *I, 7900 const CallableT &Handle) { 7901 if (handleGuaranteedWellDefinedOps(I, Handle)) 7902 return true; 7903 switch (I->getOpcode()) { 7904 // Divisors of these operations are allowed to be partially undef. 7905 case Instruction::UDiv: 7906 case Instruction::SDiv: 7907 case Instruction::URem: 7908 case Instruction::SRem: 7909 return Handle(I->getOperand(1)); 7910 default: 7911 return false; 7912 } 7913 } 7914 7915 void llvm::getGuaranteedNonPoisonOps(const Instruction *I, 7916 SmallVectorImpl<const Value *> &Operands) { 7917 handleGuaranteedNonPoisonOps(I, [&](const Value *V) { 7918 Operands.push_back(V); 7919 return false; 7920 }); 7921 } 7922 7923 bool llvm::mustTriggerUB(const Instruction *I, 7924 const SmallPtrSetImpl<const Value *> &KnownPoison) { 7925 return handleGuaranteedNonPoisonOps( 7926 I, [&](const Value *V) { return KnownPoison.count(V); }); 7927 } 7928 7929 static bool programUndefinedIfUndefOrPoison(const Value *V, 7930 bool PoisonOnly) { 7931 // We currently only look for uses of values within the same basic 7932 // block, as that makes it easier to guarantee that the uses will be 7933 // executed given that Inst is executed. 7934 // 7935 // FIXME: Expand this to consider uses beyond the same basic block. To do 7936 // this, look out for the distinction between post-dominance and strong 7937 // post-dominance. 7938 const BasicBlock *BB = nullptr; 7939 BasicBlock::const_iterator Begin; 7940 if (const auto *Inst = dyn_cast<Instruction>(V)) { 7941 BB = Inst->getParent(); 7942 Begin = Inst->getIterator(); 7943 Begin++; 7944 } else if (const auto *Arg = dyn_cast<Argument>(V)) { 7945 if (Arg->getParent()->isDeclaration()) 7946 return false; 7947 BB = &Arg->getParent()->getEntryBlock(); 7948 Begin = BB->begin(); 7949 } else { 7950 return false; 7951 } 7952 7953 // Limit number of instructions we look at, to avoid scanning through large 7954 // blocks. The current limit is chosen arbitrarily. 7955 unsigned ScanLimit = 32; 7956 BasicBlock::const_iterator End = BB->end(); 7957 7958 if (!PoisonOnly) { 7959 // Since undef does not propagate eagerly, be conservative & just check 7960 // whether a value is directly passed to an instruction that must take 7961 // well-defined operands. 7962 7963 for (const auto &I : make_range(Begin, End)) { 7964 if (isa<DbgInfoIntrinsic>(I)) 7965 continue; 7966 if (--ScanLimit == 0) 7967 break; 7968 7969 if (handleGuaranteedWellDefinedOps(&I, [V](const Value *WellDefinedOp) { 7970 return WellDefinedOp == V; 7971 })) 7972 return true; 7973 7974 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 7975 break; 7976 } 7977 return false; 7978 } 7979 7980 // Set of instructions that we have proved will yield poison if Inst 7981 // does. 7982 SmallSet<const Value *, 16> YieldsPoison; 7983 SmallSet<const BasicBlock *, 4> Visited; 7984 7985 YieldsPoison.insert(V); 7986 Visited.insert(BB); 7987 7988 while (true) { 7989 for (const auto &I : make_range(Begin, End)) { 7990 if (isa<DbgInfoIntrinsic>(I)) 7991 continue; 7992 if (--ScanLimit == 0) 7993 return false; 7994 if (mustTriggerUB(&I, YieldsPoison)) 7995 return true; 7996 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 7997 return false; 7998 7999 // If an operand is poison and propagates it, mark I as yielding poison. 8000 for (const Use &Op : I.operands()) { 8001 if (YieldsPoison.count(Op) && propagatesPoison(Op)) { 8002 YieldsPoison.insert(&I); 8003 break; 8004 } 8005 } 8006 8007 // Special handling for select, which returns poison if its operand 0 is 8008 // poison (handled in the loop above) *or* if both its true/false operands 8009 // are poison (handled here). 8010 if (I.getOpcode() == Instruction::Select && 8011 YieldsPoison.count(I.getOperand(1)) && 8012 YieldsPoison.count(I.getOperand(2))) { 8013 YieldsPoison.insert(&I); 8014 } 8015 } 8016 8017 BB = BB->getSingleSuccessor(); 8018 if (!BB || !Visited.insert(BB).second) 8019 break; 8020 8021 Begin = BB->getFirstNonPHI()->getIterator(); 8022 End = BB->end(); 8023 } 8024 return false; 8025 } 8026 8027 bool llvm::programUndefinedIfUndefOrPoison(const Instruction *Inst) { 8028 return ::programUndefinedIfUndefOrPoison(Inst, false); 8029 } 8030 8031 bool llvm::programUndefinedIfPoison(const Instruction *Inst) { 8032 return ::programUndefinedIfUndefOrPoison(Inst, true); 8033 } 8034 8035 static bool isKnownNonNaN(const Value *V, FastMathFlags FMF) { 8036 if (FMF.noNaNs()) 8037 return true; 8038 8039 if (auto *C = dyn_cast<ConstantFP>(V)) 8040 return !C->isNaN(); 8041 8042 if (auto *C = dyn_cast<ConstantDataVector>(V)) { 8043 if (!C->getElementType()->isFloatingPointTy()) 8044 return false; 8045 for (unsigned I = 0, E = C->getNumElements(); I < E; ++I) { 8046 if (C->getElementAsAPFloat(I).isNaN()) 8047 return false; 8048 } 8049 return true; 8050 } 8051 8052 if (isa<ConstantAggregateZero>(V)) 8053 return true; 8054 8055 return false; 8056 } 8057 8058 static bool isKnownNonZero(const Value *V) { 8059 if (auto *C = dyn_cast<ConstantFP>(V)) 8060 return !C->isZero(); 8061 8062 if (auto *C = dyn_cast<ConstantDataVector>(V)) { 8063 if (!C->getElementType()->isFloatingPointTy()) 8064 return false; 8065 for (unsigned I = 0, E = C->getNumElements(); I < E; ++I) { 8066 if (C->getElementAsAPFloat(I).isZero()) 8067 return false; 8068 } 8069 return true; 8070 } 8071 8072 return false; 8073 } 8074 8075 /// Match clamp pattern for float types without care about NaNs or signed zeros. 8076 /// Given non-min/max outer cmp/select from the clamp pattern this 8077 /// function recognizes if it can be substitued by a "canonical" min/max 8078 /// pattern. 8079 static SelectPatternResult matchFastFloatClamp(CmpInst::Predicate Pred, 8080 Value *CmpLHS, Value *CmpRHS, 8081 Value *TrueVal, Value *FalseVal, 8082 Value *&LHS, Value *&RHS) { 8083 // Try to match 8084 // X < C1 ? C1 : Min(X, C2) --> Max(C1, Min(X, C2)) 8085 // X > C1 ? C1 : Max(X, C2) --> Min(C1, Max(X, C2)) 8086 // and return description of the outer Max/Min. 8087 8088 // First, check if select has inverse order: 8089 if (CmpRHS == FalseVal) { 8090 std::swap(TrueVal, FalseVal); 8091 Pred = CmpInst::getInversePredicate(Pred); 8092 } 8093 8094 // Assume success now. If there's no match, callers should not use these anyway. 8095 LHS = TrueVal; 8096 RHS = FalseVal; 8097 8098 const APFloat *FC1; 8099 if (CmpRHS != TrueVal || !match(CmpRHS, m_APFloat(FC1)) || !FC1->isFinite()) 8100 return {SPF_UNKNOWN, SPNB_NA, false}; 8101 8102 const APFloat *FC2; 8103 switch (Pred) { 8104 case CmpInst::FCMP_OLT: 8105 case CmpInst::FCMP_OLE: 8106 case CmpInst::FCMP_ULT: 8107 case CmpInst::FCMP_ULE: 8108 if (match(FalseVal, 8109 m_CombineOr(m_OrdFMin(m_Specific(CmpLHS), m_APFloat(FC2)), 8110 m_UnordFMin(m_Specific(CmpLHS), m_APFloat(FC2)))) && 8111 *FC1 < *FC2) 8112 return {SPF_FMAXNUM, SPNB_RETURNS_ANY, false}; 8113 break; 8114 case CmpInst::FCMP_OGT: 8115 case CmpInst::FCMP_OGE: 8116 case CmpInst::FCMP_UGT: 8117 case CmpInst::FCMP_UGE: 8118 if (match(FalseVal, 8119 m_CombineOr(m_OrdFMax(m_Specific(CmpLHS), m_APFloat(FC2)), 8120 m_UnordFMax(m_Specific(CmpLHS), m_APFloat(FC2)))) && 8121 *FC1 > *FC2) 8122 return {SPF_FMINNUM, SPNB_RETURNS_ANY, false}; 8123 break; 8124 default: 8125 break; 8126 } 8127 8128 return {SPF_UNKNOWN, SPNB_NA, false}; 8129 } 8130 8131 /// Recognize variations of: 8132 /// CLAMP(v,l,h) ==> ((v) < (l) ? (l) : ((v) > (h) ? (h) : (v))) 8133 static SelectPatternResult matchClamp(CmpInst::Predicate Pred, 8134 Value *CmpLHS, Value *CmpRHS, 8135 Value *TrueVal, Value *FalseVal) { 8136 // Swap the select operands and predicate to match the patterns below. 8137 if (CmpRHS != TrueVal) { 8138 Pred = ICmpInst::getSwappedPredicate(Pred); 8139 std::swap(TrueVal, FalseVal); 8140 } 8141 const APInt *C1; 8142 if (CmpRHS == TrueVal && match(CmpRHS, m_APInt(C1))) { 8143 const APInt *C2; 8144 // (X <s C1) ? C1 : SMIN(X, C2) ==> SMAX(SMIN(X, C2), C1) 8145 if (match(FalseVal, m_SMin(m_Specific(CmpLHS), m_APInt(C2))) && 8146 C1->slt(*C2) && Pred == CmpInst::ICMP_SLT) 8147 return {SPF_SMAX, SPNB_NA, false}; 8148 8149 // (X >s C1) ? C1 : SMAX(X, C2) ==> SMIN(SMAX(X, C2), C1) 8150 if (match(FalseVal, m_SMax(m_Specific(CmpLHS), m_APInt(C2))) && 8151 C1->sgt(*C2) && Pred == CmpInst::ICMP_SGT) 8152 return {SPF_SMIN, SPNB_NA, false}; 8153 8154 // (X <u C1) ? C1 : UMIN(X, C2) ==> UMAX(UMIN(X, C2), C1) 8155 if (match(FalseVal, m_UMin(m_Specific(CmpLHS), m_APInt(C2))) && 8156 C1->ult(*C2) && Pred == CmpInst::ICMP_ULT) 8157 return {SPF_UMAX, SPNB_NA, false}; 8158 8159 // (X >u C1) ? C1 : UMAX(X, C2) ==> UMIN(UMAX(X, C2), C1) 8160 if (match(FalseVal, m_UMax(m_Specific(CmpLHS), m_APInt(C2))) && 8161 C1->ugt(*C2) && Pred == CmpInst::ICMP_UGT) 8162 return {SPF_UMIN, SPNB_NA, false}; 8163 } 8164 return {SPF_UNKNOWN, SPNB_NA, false}; 8165 } 8166 8167 /// Recognize variations of: 8168 /// a < c ? min(a,b) : min(b,c) ==> min(min(a,b),min(b,c)) 8169 static SelectPatternResult matchMinMaxOfMinMax(CmpInst::Predicate Pred, 8170 Value *CmpLHS, Value *CmpRHS, 8171 Value *TVal, Value *FVal, 8172 unsigned Depth) { 8173 // TODO: Allow FP min/max with nnan/nsz. 8174 assert(CmpInst::isIntPredicate(Pred) && "Expected integer comparison"); 8175 8176 Value *A = nullptr, *B = nullptr; 8177 SelectPatternResult L = matchSelectPattern(TVal, A, B, nullptr, Depth + 1); 8178 if (!SelectPatternResult::isMinOrMax(L.Flavor)) 8179 return {SPF_UNKNOWN, SPNB_NA, false}; 8180 8181 Value *C = nullptr, *D = nullptr; 8182 SelectPatternResult R = matchSelectPattern(FVal, C, D, nullptr, Depth + 1); 8183 if (L.Flavor != R.Flavor) 8184 return {SPF_UNKNOWN, SPNB_NA, false}; 8185 8186 // We have something like: x Pred y ? min(a, b) : min(c, d). 8187 // Try to match the compare to the min/max operations of the select operands. 8188 // First, make sure we have the right compare predicate. 8189 switch (L.Flavor) { 8190 case SPF_SMIN: 8191 if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) { 8192 Pred = ICmpInst::getSwappedPredicate(Pred); 8193 std::swap(CmpLHS, CmpRHS); 8194 } 8195 if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) 8196 break; 8197 return {SPF_UNKNOWN, SPNB_NA, false}; 8198 case SPF_SMAX: 8199 if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) { 8200 Pred = ICmpInst::getSwappedPredicate(Pred); 8201 std::swap(CmpLHS, CmpRHS); 8202 } 8203 if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) 8204 break; 8205 return {SPF_UNKNOWN, SPNB_NA, false}; 8206 case SPF_UMIN: 8207 if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) { 8208 Pred = ICmpInst::getSwappedPredicate(Pred); 8209 std::swap(CmpLHS, CmpRHS); 8210 } 8211 if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) 8212 break; 8213 return {SPF_UNKNOWN, SPNB_NA, false}; 8214 case SPF_UMAX: 8215 if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) { 8216 Pred = ICmpInst::getSwappedPredicate(Pred); 8217 std::swap(CmpLHS, CmpRHS); 8218 } 8219 if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) 8220 break; 8221 return {SPF_UNKNOWN, SPNB_NA, false}; 8222 default: 8223 return {SPF_UNKNOWN, SPNB_NA, false}; 8224 } 8225 8226 // If there is a common operand in the already matched min/max and the other 8227 // min/max operands match the compare operands (either directly or inverted), 8228 // then this is min/max of the same flavor. 8229 8230 // a pred c ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b)) 8231 // ~c pred ~a ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b)) 8232 if (D == B) { 8233 if ((CmpLHS == A && CmpRHS == C) || (match(C, m_Not(m_Specific(CmpLHS))) && 8234 match(A, m_Not(m_Specific(CmpRHS))))) 8235 return {L.Flavor, SPNB_NA, false}; 8236 } 8237 // a pred d ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d)) 8238 // ~d pred ~a ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d)) 8239 if (C == B) { 8240 if ((CmpLHS == A && CmpRHS == D) || (match(D, m_Not(m_Specific(CmpLHS))) && 8241 match(A, m_Not(m_Specific(CmpRHS))))) 8242 return {L.Flavor, SPNB_NA, false}; 8243 } 8244 // b pred c ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a)) 8245 // ~c pred ~b ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a)) 8246 if (D == A) { 8247 if ((CmpLHS == B && CmpRHS == C) || (match(C, m_Not(m_Specific(CmpLHS))) && 8248 match(B, m_Not(m_Specific(CmpRHS))))) 8249 return {L.Flavor, SPNB_NA, false}; 8250 } 8251 // b pred d ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d)) 8252 // ~d pred ~b ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d)) 8253 if (C == A) { 8254 if ((CmpLHS == B && CmpRHS == D) || (match(D, m_Not(m_Specific(CmpLHS))) && 8255 match(B, m_Not(m_Specific(CmpRHS))))) 8256 return {L.Flavor, SPNB_NA, false}; 8257 } 8258 8259 return {SPF_UNKNOWN, SPNB_NA, false}; 8260 } 8261 8262 /// If the input value is the result of a 'not' op, constant integer, or vector 8263 /// splat of a constant integer, return the bitwise-not source value. 8264 /// TODO: This could be extended to handle non-splat vector integer constants. 8265 static Value *getNotValue(Value *V) { 8266 Value *NotV; 8267 if (match(V, m_Not(m_Value(NotV)))) 8268 return NotV; 8269 8270 const APInt *C; 8271 if (match(V, m_APInt(C))) 8272 return ConstantInt::get(V->getType(), ~(*C)); 8273 8274 return nullptr; 8275 } 8276 8277 /// Match non-obvious integer minimum and maximum sequences. 8278 static SelectPatternResult matchMinMax(CmpInst::Predicate Pred, 8279 Value *CmpLHS, Value *CmpRHS, 8280 Value *TrueVal, Value *FalseVal, 8281 Value *&LHS, Value *&RHS, 8282 unsigned Depth) { 8283 // Assume success. If there's no match, callers should not use these anyway. 8284 LHS = TrueVal; 8285 RHS = FalseVal; 8286 8287 SelectPatternResult SPR = matchClamp(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal); 8288 if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN) 8289 return SPR; 8290 8291 SPR = matchMinMaxOfMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, Depth); 8292 if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN) 8293 return SPR; 8294 8295 // Look through 'not' ops to find disguised min/max. 8296 // (X > Y) ? ~X : ~Y ==> (~X < ~Y) ? ~X : ~Y ==> MIN(~X, ~Y) 8297 // (X < Y) ? ~X : ~Y ==> (~X > ~Y) ? ~X : ~Y ==> MAX(~X, ~Y) 8298 if (CmpLHS == getNotValue(TrueVal) && CmpRHS == getNotValue(FalseVal)) { 8299 switch (Pred) { 8300 case CmpInst::ICMP_SGT: return {SPF_SMIN, SPNB_NA, false}; 8301 case CmpInst::ICMP_SLT: return {SPF_SMAX, SPNB_NA, false}; 8302 case CmpInst::ICMP_UGT: return {SPF_UMIN, SPNB_NA, false}; 8303 case CmpInst::ICMP_ULT: return {SPF_UMAX, SPNB_NA, false}; 8304 default: break; 8305 } 8306 } 8307 8308 // (X > Y) ? ~Y : ~X ==> (~X < ~Y) ? ~Y : ~X ==> MAX(~Y, ~X) 8309 // (X < Y) ? ~Y : ~X ==> (~X > ~Y) ? ~Y : ~X ==> MIN(~Y, ~X) 8310 if (CmpLHS == getNotValue(FalseVal) && CmpRHS == getNotValue(TrueVal)) { 8311 switch (Pred) { 8312 case CmpInst::ICMP_SGT: return {SPF_SMAX, SPNB_NA, false}; 8313 case CmpInst::ICMP_SLT: return {SPF_SMIN, SPNB_NA, false}; 8314 case CmpInst::ICMP_UGT: return {SPF_UMAX, SPNB_NA, false}; 8315 case CmpInst::ICMP_ULT: return {SPF_UMIN, SPNB_NA, false}; 8316 default: break; 8317 } 8318 } 8319 8320 if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT) 8321 return {SPF_UNKNOWN, SPNB_NA, false}; 8322 8323 const APInt *C1; 8324 if (!match(CmpRHS, m_APInt(C1))) 8325 return {SPF_UNKNOWN, SPNB_NA, false}; 8326 8327 // An unsigned min/max can be written with a signed compare. 8328 const APInt *C2; 8329 if ((CmpLHS == TrueVal && match(FalseVal, m_APInt(C2))) || 8330 (CmpLHS == FalseVal && match(TrueVal, m_APInt(C2)))) { 8331 // Is the sign bit set? 8332 // (X <s 0) ? X : MAXVAL ==> (X >u MAXVAL) ? X : MAXVAL ==> UMAX 8333 // (X <s 0) ? MAXVAL : X ==> (X >u MAXVAL) ? MAXVAL : X ==> UMIN 8334 if (Pred == CmpInst::ICMP_SLT && C1->isZero() && C2->isMaxSignedValue()) 8335 return {CmpLHS == TrueVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false}; 8336 8337 // Is the sign bit clear? 8338 // (X >s -1) ? MINVAL : X ==> (X <u MINVAL) ? MINVAL : X ==> UMAX 8339 // (X >s -1) ? X : MINVAL ==> (X <u MINVAL) ? X : MINVAL ==> UMIN 8340 if (Pred == CmpInst::ICMP_SGT && C1->isAllOnes() && C2->isMinSignedValue()) 8341 return {CmpLHS == FalseVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false}; 8342 } 8343 8344 return {SPF_UNKNOWN, SPNB_NA, false}; 8345 } 8346 8347 bool llvm::isKnownNegation(const Value *X, const Value *Y, bool NeedNSW, 8348 bool AllowPoison) { 8349 assert(X && Y && "Invalid operand"); 8350 8351 auto IsNegationOf = [&](const Value *X, const Value *Y) { 8352 if (!match(X, m_Neg(m_Specific(Y)))) 8353 return false; 8354 8355 auto *BO = cast<BinaryOperator>(X); 8356 if (NeedNSW && !BO->hasNoSignedWrap()) 8357 return false; 8358 8359 auto *Zero = cast<Constant>(BO->getOperand(0)); 8360 if (!AllowPoison && !Zero->isNullValue()) 8361 return false; 8362 8363 return true; 8364 }; 8365 8366 // X = -Y or Y = -X 8367 if (IsNegationOf(X, Y) || IsNegationOf(Y, X)) 8368 return true; 8369 8370 // X = sub (A, B), Y = sub (B, A) || X = sub nsw (A, B), Y = sub nsw (B, A) 8371 Value *A, *B; 8372 return (!NeedNSW && (match(X, m_Sub(m_Value(A), m_Value(B))) && 8373 match(Y, m_Sub(m_Specific(B), m_Specific(A))))) || 8374 (NeedNSW && (match(X, m_NSWSub(m_Value(A), m_Value(B))) && 8375 match(Y, m_NSWSub(m_Specific(B), m_Specific(A))))); 8376 } 8377 8378 bool llvm::isKnownInversion(const Value *X, const Value *Y) { 8379 // Handle X = icmp pred A, B, Y = icmp pred A, C. 8380 Value *A, *B, *C; 8381 ICmpInst::Predicate Pred1, Pred2; 8382 if (!match(X, m_ICmp(Pred1, m_Value(A), m_Value(B))) || 8383 !match(Y, m_c_ICmp(Pred2, m_Specific(A), m_Value(C)))) 8384 return false; 8385 8386 if (B == C) 8387 return Pred1 == ICmpInst::getInversePredicate(Pred2); 8388 8389 // Try to infer the relationship from constant ranges. 8390 const APInt *RHSC1, *RHSC2; 8391 if (!match(B, m_APInt(RHSC1)) || !match(C, m_APInt(RHSC2))) 8392 return false; 8393 8394 const auto CR1 = ConstantRange::makeExactICmpRegion(Pred1, *RHSC1); 8395 const auto CR2 = ConstantRange::makeExactICmpRegion(Pred2, *RHSC2); 8396 8397 return CR1.inverse() == CR2; 8398 } 8399 8400 static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred, 8401 FastMathFlags FMF, 8402 Value *CmpLHS, Value *CmpRHS, 8403 Value *TrueVal, Value *FalseVal, 8404 Value *&LHS, Value *&RHS, 8405 unsigned Depth) { 8406 bool HasMismatchedZeros = false; 8407 if (CmpInst::isFPPredicate(Pred)) { 8408 // IEEE-754 ignores the sign of 0.0 in comparisons. So if the select has one 8409 // 0.0 operand, set the compare's 0.0 operands to that same value for the 8410 // purpose of identifying min/max. Disregard vector constants with undefined 8411 // elements because those can not be back-propagated for analysis. 8412 Value *OutputZeroVal = nullptr; 8413 if (match(TrueVal, m_AnyZeroFP()) && !match(FalseVal, m_AnyZeroFP()) && 8414 !cast<Constant>(TrueVal)->containsUndefOrPoisonElement()) 8415 OutputZeroVal = TrueVal; 8416 else if (match(FalseVal, m_AnyZeroFP()) && !match(TrueVal, m_AnyZeroFP()) && 8417 !cast<Constant>(FalseVal)->containsUndefOrPoisonElement()) 8418 OutputZeroVal = FalseVal; 8419 8420 if (OutputZeroVal) { 8421 if (match(CmpLHS, m_AnyZeroFP()) && CmpLHS != OutputZeroVal) { 8422 HasMismatchedZeros = true; 8423 CmpLHS = OutputZeroVal; 8424 } 8425 if (match(CmpRHS, m_AnyZeroFP()) && CmpRHS != OutputZeroVal) { 8426 HasMismatchedZeros = true; 8427 CmpRHS = OutputZeroVal; 8428 } 8429 } 8430 } 8431 8432 LHS = CmpLHS; 8433 RHS = CmpRHS; 8434 8435 // Signed zero may return inconsistent results between implementations. 8436 // (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0 8437 // minNum(0.0, -0.0) // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1) 8438 // Therefore, we behave conservatively and only proceed if at least one of the 8439 // operands is known to not be zero or if we don't care about signed zero. 8440 switch (Pred) { 8441 default: break; 8442 case CmpInst::FCMP_OGT: case CmpInst::FCMP_OLT: 8443 case CmpInst::FCMP_UGT: case CmpInst::FCMP_ULT: 8444 if (!HasMismatchedZeros) 8445 break; 8446 [[fallthrough]]; 8447 case CmpInst::FCMP_OGE: case CmpInst::FCMP_OLE: 8448 case CmpInst::FCMP_UGE: case CmpInst::FCMP_ULE: 8449 if (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) && 8450 !isKnownNonZero(CmpRHS)) 8451 return {SPF_UNKNOWN, SPNB_NA, false}; 8452 } 8453 8454 SelectPatternNaNBehavior NaNBehavior = SPNB_NA; 8455 bool Ordered = false; 8456 8457 // When given one NaN and one non-NaN input: 8458 // - maxnum/minnum (C99 fmaxf()/fminf()) return the non-NaN input. 8459 // - A simple C99 (a < b ? a : b) construction will return 'b' (as the 8460 // ordered comparison fails), which could be NaN or non-NaN. 8461 // so here we discover exactly what NaN behavior is required/accepted. 8462 if (CmpInst::isFPPredicate(Pred)) { 8463 bool LHSSafe = isKnownNonNaN(CmpLHS, FMF); 8464 bool RHSSafe = isKnownNonNaN(CmpRHS, FMF); 8465 8466 if (LHSSafe && RHSSafe) { 8467 // Both operands are known non-NaN. 8468 NaNBehavior = SPNB_RETURNS_ANY; 8469 } else if (CmpInst::isOrdered(Pred)) { 8470 // An ordered comparison will return false when given a NaN, so it 8471 // returns the RHS. 8472 Ordered = true; 8473 if (LHSSafe) 8474 // LHS is non-NaN, so if RHS is NaN then NaN will be returned. 8475 NaNBehavior = SPNB_RETURNS_NAN; 8476 else if (RHSSafe) 8477 NaNBehavior = SPNB_RETURNS_OTHER; 8478 else 8479 // Completely unsafe. 8480 return {SPF_UNKNOWN, SPNB_NA, false}; 8481 } else { 8482 Ordered = false; 8483 // An unordered comparison will return true when given a NaN, so it 8484 // returns the LHS. 8485 if (LHSSafe) 8486 // LHS is non-NaN, so if RHS is NaN then non-NaN will be returned. 8487 NaNBehavior = SPNB_RETURNS_OTHER; 8488 else if (RHSSafe) 8489 NaNBehavior = SPNB_RETURNS_NAN; 8490 else 8491 // Completely unsafe. 8492 return {SPF_UNKNOWN, SPNB_NA, false}; 8493 } 8494 } 8495 8496 if (TrueVal == CmpRHS && FalseVal == CmpLHS) { 8497 std::swap(CmpLHS, CmpRHS); 8498 Pred = CmpInst::getSwappedPredicate(Pred); 8499 if (NaNBehavior == SPNB_RETURNS_NAN) 8500 NaNBehavior = SPNB_RETURNS_OTHER; 8501 else if (NaNBehavior == SPNB_RETURNS_OTHER) 8502 NaNBehavior = SPNB_RETURNS_NAN; 8503 Ordered = !Ordered; 8504 } 8505 8506 // ([if]cmp X, Y) ? X : Y 8507 if (TrueVal == CmpLHS && FalseVal == CmpRHS) { 8508 switch (Pred) { 8509 default: return {SPF_UNKNOWN, SPNB_NA, false}; // Equality. 8510 case ICmpInst::ICMP_UGT: 8511 case ICmpInst::ICMP_UGE: return {SPF_UMAX, SPNB_NA, false}; 8512 case ICmpInst::ICMP_SGT: 8513 case ICmpInst::ICMP_SGE: return {SPF_SMAX, SPNB_NA, false}; 8514 case ICmpInst::ICMP_ULT: 8515 case ICmpInst::ICMP_ULE: return {SPF_UMIN, SPNB_NA, false}; 8516 case ICmpInst::ICMP_SLT: 8517 case ICmpInst::ICMP_SLE: return {SPF_SMIN, SPNB_NA, false}; 8518 case FCmpInst::FCMP_UGT: 8519 case FCmpInst::FCMP_UGE: 8520 case FCmpInst::FCMP_OGT: 8521 case FCmpInst::FCMP_OGE: return {SPF_FMAXNUM, NaNBehavior, Ordered}; 8522 case FCmpInst::FCMP_ULT: 8523 case FCmpInst::FCMP_ULE: 8524 case FCmpInst::FCMP_OLT: 8525 case FCmpInst::FCMP_OLE: return {SPF_FMINNUM, NaNBehavior, Ordered}; 8526 } 8527 } 8528 8529 if (isKnownNegation(TrueVal, FalseVal)) { 8530 // Sign-extending LHS does not change its sign, so TrueVal/FalseVal can 8531 // match against either LHS or sext(LHS). 8532 auto MaybeSExtCmpLHS = 8533 m_CombineOr(m_Specific(CmpLHS), m_SExt(m_Specific(CmpLHS))); 8534 auto ZeroOrAllOnes = m_CombineOr(m_ZeroInt(), m_AllOnes()); 8535 auto ZeroOrOne = m_CombineOr(m_ZeroInt(), m_One()); 8536 if (match(TrueVal, MaybeSExtCmpLHS)) { 8537 // Set the return values. If the compare uses the negated value (-X >s 0), 8538 // swap the return values because the negated value is always 'RHS'. 8539 LHS = TrueVal; 8540 RHS = FalseVal; 8541 if (match(CmpLHS, m_Neg(m_Specific(FalseVal)))) 8542 std::swap(LHS, RHS); 8543 8544 // (X >s 0) ? X : -X or (X >s -1) ? X : -X --> ABS(X) 8545 // (-X >s 0) ? -X : X or (-X >s -1) ? -X : X --> ABS(X) 8546 if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, ZeroOrAllOnes)) 8547 return {SPF_ABS, SPNB_NA, false}; 8548 8549 // (X >=s 0) ? X : -X or (X >=s 1) ? X : -X --> ABS(X) 8550 if (Pred == ICmpInst::ICMP_SGE && match(CmpRHS, ZeroOrOne)) 8551 return {SPF_ABS, SPNB_NA, false}; 8552 8553 // (X <s 0) ? X : -X or (X <s 1) ? X : -X --> NABS(X) 8554 // (-X <s 0) ? -X : X or (-X <s 1) ? -X : X --> NABS(X) 8555 if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, ZeroOrOne)) 8556 return {SPF_NABS, SPNB_NA, false}; 8557 } 8558 else if (match(FalseVal, MaybeSExtCmpLHS)) { 8559 // Set the return values. If the compare uses the negated value (-X >s 0), 8560 // swap the return values because the negated value is always 'RHS'. 8561 LHS = FalseVal; 8562 RHS = TrueVal; 8563 if (match(CmpLHS, m_Neg(m_Specific(TrueVal)))) 8564 std::swap(LHS, RHS); 8565 8566 // (X >s 0) ? -X : X or (X >s -1) ? -X : X --> NABS(X) 8567 // (-X >s 0) ? X : -X or (-X >s -1) ? X : -X --> NABS(X) 8568 if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, ZeroOrAllOnes)) 8569 return {SPF_NABS, SPNB_NA, false}; 8570 8571 // (X <s 0) ? -X : X or (X <s 1) ? -X : X --> ABS(X) 8572 // (-X <s 0) ? X : -X or (-X <s 1) ? X : -X --> ABS(X) 8573 if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, ZeroOrOne)) 8574 return {SPF_ABS, SPNB_NA, false}; 8575 } 8576 } 8577 8578 if (CmpInst::isIntPredicate(Pred)) 8579 return matchMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS, Depth); 8580 8581 // According to (IEEE 754-2008 5.3.1), minNum(0.0, -0.0) and similar 8582 // may return either -0.0 or 0.0, so fcmp/select pair has stricter 8583 // semantics than minNum. Be conservative in such case. 8584 if (NaNBehavior != SPNB_RETURNS_ANY || 8585 (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) && 8586 !isKnownNonZero(CmpRHS))) 8587 return {SPF_UNKNOWN, SPNB_NA, false}; 8588 8589 return matchFastFloatClamp(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS); 8590 } 8591 8592 /// Helps to match a select pattern in case of a type mismatch. 8593 /// 8594 /// The function processes the case when type of true and false values of a 8595 /// select instruction differs from type of the cmp instruction operands because 8596 /// of a cast instruction. The function checks if it is legal to move the cast 8597 /// operation after "select". If yes, it returns the new second value of 8598 /// "select" (with the assumption that cast is moved): 8599 /// 1. As operand of cast instruction when both values of "select" are same cast 8600 /// instructions. 8601 /// 2. As restored constant (by applying reverse cast operation) when the first 8602 /// value of the "select" is a cast operation and the second value is a 8603 /// constant. 8604 /// NOTE: We return only the new second value because the first value could be 8605 /// accessed as operand of cast instruction. 8606 static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2, 8607 Instruction::CastOps *CastOp) { 8608 auto *Cast1 = dyn_cast<CastInst>(V1); 8609 if (!Cast1) 8610 return nullptr; 8611 8612 *CastOp = Cast1->getOpcode(); 8613 Type *SrcTy = Cast1->getSrcTy(); 8614 if (auto *Cast2 = dyn_cast<CastInst>(V2)) { 8615 // If V1 and V2 are both the same cast from the same type, look through V1. 8616 if (*CastOp == Cast2->getOpcode() && SrcTy == Cast2->getSrcTy()) 8617 return Cast2->getOperand(0); 8618 return nullptr; 8619 } 8620 8621 auto *C = dyn_cast<Constant>(V2); 8622 if (!C) 8623 return nullptr; 8624 8625 const DataLayout &DL = CmpI->getDataLayout(); 8626 Constant *CastedTo = nullptr; 8627 switch (*CastOp) { 8628 case Instruction::ZExt: 8629 if (CmpI->isUnsigned()) 8630 CastedTo = ConstantExpr::getTrunc(C, SrcTy); 8631 break; 8632 case Instruction::SExt: 8633 if (CmpI->isSigned()) 8634 CastedTo = ConstantExpr::getTrunc(C, SrcTy, true); 8635 break; 8636 case Instruction::Trunc: 8637 Constant *CmpConst; 8638 if (match(CmpI->getOperand(1), m_Constant(CmpConst)) && 8639 CmpConst->getType() == SrcTy) { 8640 // Here we have the following case: 8641 // 8642 // %cond = cmp iN %x, CmpConst 8643 // %tr = trunc iN %x to iK 8644 // %narrowsel = select i1 %cond, iK %t, iK C 8645 // 8646 // We can always move trunc after select operation: 8647 // 8648 // %cond = cmp iN %x, CmpConst 8649 // %widesel = select i1 %cond, iN %x, iN CmpConst 8650 // %tr = trunc iN %widesel to iK 8651 // 8652 // Note that C could be extended in any way because we don't care about 8653 // upper bits after truncation. It can't be abs pattern, because it would 8654 // look like: 8655 // 8656 // select i1 %cond, x, -x. 8657 // 8658 // So only min/max pattern could be matched. Such match requires widened C 8659 // == CmpConst. That is why set widened C = CmpConst, condition trunc 8660 // CmpConst == C is checked below. 8661 CastedTo = CmpConst; 8662 } else { 8663 unsigned ExtOp = CmpI->isSigned() ? Instruction::SExt : Instruction::ZExt; 8664 CastedTo = ConstantFoldCastOperand(ExtOp, C, SrcTy, DL); 8665 } 8666 break; 8667 case Instruction::FPTrunc: 8668 CastedTo = ConstantFoldCastOperand(Instruction::FPExt, C, SrcTy, DL); 8669 break; 8670 case Instruction::FPExt: 8671 CastedTo = ConstantFoldCastOperand(Instruction::FPTrunc, C, SrcTy, DL); 8672 break; 8673 case Instruction::FPToUI: 8674 CastedTo = ConstantFoldCastOperand(Instruction::UIToFP, C, SrcTy, DL); 8675 break; 8676 case Instruction::FPToSI: 8677 CastedTo = ConstantFoldCastOperand(Instruction::SIToFP, C, SrcTy, DL); 8678 break; 8679 case Instruction::UIToFP: 8680 CastedTo = ConstantFoldCastOperand(Instruction::FPToUI, C, SrcTy, DL); 8681 break; 8682 case Instruction::SIToFP: 8683 CastedTo = ConstantFoldCastOperand(Instruction::FPToSI, C, SrcTy, DL); 8684 break; 8685 default: 8686 break; 8687 } 8688 8689 if (!CastedTo) 8690 return nullptr; 8691 8692 // Make sure the cast doesn't lose any information. 8693 Constant *CastedBack = 8694 ConstantFoldCastOperand(*CastOp, CastedTo, C->getType(), DL); 8695 if (CastedBack && CastedBack != C) 8696 return nullptr; 8697 8698 return CastedTo; 8699 } 8700 8701 SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, 8702 Instruction::CastOps *CastOp, 8703 unsigned Depth) { 8704 if (Depth >= MaxAnalysisRecursionDepth) 8705 return {SPF_UNKNOWN, SPNB_NA, false}; 8706 8707 SelectInst *SI = dyn_cast<SelectInst>(V); 8708 if (!SI) return {SPF_UNKNOWN, SPNB_NA, false}; 8709 8710 CmpInst *CmpI = dyn_cast<CmpInst>(SI->getCondition()); 8711 if (!CmpI) return {SPF_UNKNOWN, SPNB_NA, false}; 8712 8713 Value *TrueVal = SI->getTrueValue(); 8714 Value *FalseVal = SI->getFalseValue(); 8715 8716 return llvm::matchDecomposedSelectPattern(CmpI, TrueVal, FalseVal, LHS, RHS, 8717 CastOp, Depth); 8718 } 8719 8720 SelectPatternResult llvm::matchDecomposedSelectPattern( 8721 CmpInst *CmpI, Value *TrueVal, Value *FalseVal, Value *&LHS, Value *&RHS, 8722 Instruction::CastOps *CastOp, unsigned Depth) { 8723 CmpInst::Predicate Pred = CmpI->getPredicate(); 8724 Value *CmpLHS = CmpI->getOperand(0); 8725 Value *CmpRHS = CmpI->getOperand(1); 8726 FastMathFlags FMF; 8727 if (isa<FPMathOperator>(CmpI)) 8728 FMF = CmpI->getFastMathFlags(); 8729 8730 // Bail out early. 8731 if (CmpI->isEquality()) 8732 return {SPF_UNKNOWN, SPNB_NA, false}; 8733 8734 // Deal with type mismatches. 8735 if (CastOp && CmpLHS->getType() != TrueVal->getType()) { 8736 if (Value *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp)) { 8737 // If this is a potential fmin/fmax with a cast to integer, then ignore 8738 // -0.0 because there is no corresponding integer value. 8739 if (*CastOp == Instruction::FPToSI || *CastOp == Instruction::FPToUI) 8740 FMF.setNoSignedZeros(); 8741 return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, 8742 cast<CastInst>(TrueVal)->getOperand(0), C, 8743 LHS, RHS, Depth); 8744 } 8745 if (Value *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp)) { 8746 // If this is a potential fmin/fmax with a cast to integer, then ignore 8747 // -0.0 because there is no corresponding integer value. 8748 if (*CastOp == Instruction::FPToSI || *CastOp == Instruction::FPToUI) 8749 FMF.setNoSignedZeros(); 8750 return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, 8751 C, cast<CastInst>(FalseVal)->getOperand(0), 8752 LHS, RHS, Depth); 8753 } 8754 } 8755 return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, TrueVal, FalseVal, 8756 LHS, RHS, Depth); 8757 } 8758 8759 CmpInst::Predicate llvm::getMinMaxPred(SelectPatternFlavor SPF, bool Ordered) { 8760 if (SPF == SPF_SMIN) return ICmpInst::ICMP_SLT; 8761 if (SPF == SPF_UMIN) return ICmpInst::ICMP_ULT; 8762 if (SPF == SPF_SMAX) return ICmpInst::ICMP_SGT; 8763 if (SPF == SPF_UMAX) return ICmpInst::ICMP_UGT; 8764 if (SPF == SPF_FMINNUM) 8765 return Ordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT; 8766 if (SPF == SPF_FMAXNUM) 8767 return Ordered ? FCmpInst::FCMP_OGT : FCmpInst::FCMP_UGT; 8768 llvm_unreachable("unhandled!"); 8769 } 8770 8771 SelectPatternFlavor llvm::getInverseMinMaxFlavor(SelectPatternFlavor SPF) { 8772 if (SPF == SPF_SMIN) return SPF_SMAX; 8773 if (SPF == SPF_UMIN) return SPF_UMAX; 8774 if (SPF == SPF_SMAX) return SPF_SMIN; 8775 if (SPF == SPF_UMAX) return SPF_UMIN; 8776 llvm_unreachable("unhandled!"); 8777 } 8778 8779 Intrinsic::ID llvm::getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID) { 8780 switch (MinMaxID) { 8781 case Intrinsic::smax: return Intrinsic::smin; 8782 case Intrinsic::smin: return Intrinsic::smax; 8783 case Intrinsic::umax: return Intrinsic::umin; 8784 case Intrinsic::umin: return Intrinsic::umax; 8785 // Please note that next four intrinsics may produce the same result for 8786 // original and inverted case even if X != Y due to NaN is handled specially. 8787 case Intrinsic::maximum: return Intrinsic::minimum; 8788 case Intrinsic::minimum: return Intrinsic::maximum; 8789 case Intrinsic::maxnum: return Intrinsic::minnum; 8790 case Intrinsic::minnum: return Intrinsic::maxnum; 8791 default: llvm_unreachable("Unexpected intrinsic"); 8792 } 8793 } 8794 8795 APInt llvm::getMinMaxLimit(SelectPatternFlavor SPF, unsigned BitWidth) { 8796 switch (SPF) { 8797 case SPF_SMAX: return APInt::getSignedMaxValue(BitWidth); 8798 case SPF_SMIN: return APInt::getSignedMinValue(BitWidth); 8799 case SPF_UMAX: return APInt::getMaxValue(BitWidth); 8800 case SPF_UMIN: return APInt::getMinValue(BitWidth); 8801 default: llvm_unreachable("Unexpected flavor"); 8802 } 8803 } 8804 8805 std::pair<Intrinsic::ID, bool> 8806 llvm::canConvertToMinOrMaxIntrinsic(ArrayRef<Value *> VL) { 8807 // Check if VL contains select instructions that can be folded into a min/max 8808 // vector intrinsic and return the intrinsic if it is possible. 8809 // TODO: Support floating point min/max. 8810 bool AllCmpSingleUse = true; 8811 SelectPatternResult SelectPattern; 8812 SelectPattern.Flavor = SPF_UNKNOWN; 8813 if (all_of(VL, [&SelectPattern, &AllCmpSingleUse](Value *I) { 8814 Value *LHS, *RHS; 8815 auto CurrentPattern = matchSelectPattern(I, LHS, RHS); 8816 if (!SelectPatternResult::isMinOrMax(CurrentPattern.Flavor)) 8817 return false; 8818 if (SelectPattern.Flavor != SPF_UNKNOWN && 8819 SelectPattern.Flavor != CurrentPattern.Flavor) 8820 return false; 8821 SelectPattern = CurrentPattern; 8822 AllCmpSingleUse &= 8823 match(I, m_Select(m_OneUse(m_Value()), m_Value(), m_Value())); 8824 return true; 8825 })) { 8826 switch (SelectPattern.Flavor) { 8827 case SPF_SMIN: 8828 return {Intrinsic::smin, AllCmpSingleUse}; 8829 case SPF_UMIN: 8830 return {Intrinsic::umin, AllCmpSingleUse}; 8831 case SPF_SMAX: 8832 return {Intrinsic::smax, AllCmpSingleUse}; 8833 case SPF_UMAX: 8834 return {Intrinsic::umax, AllCmpSingleUse}; 8835 case SPF_FMAXNUM: 8836 return {Intrinsic::maxnum, AllCmpSingleUse}; 8837 case SPF_FMINNUM: 8838 return {Intrinsic::minnum, AllCmpSingleUse}; 8839 default: 8840 llvm_unreachable("unexpected select pattern flavor"); 8841 } 8842 } 8843 return {Intrinsic::not_intrinsic, false}; 8844 } 8845 8846 bool llvm::matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, 8847 Value *&Start, Value *&Step) { 8848 // Handle the case of a simple two-predecessor recurrence PHI. 8849 // There's a lot more that could theoretically be done here, but 8850 // this is sufficient to catch some interesting cases. 8851 if (P->getNumIncomingValues() != 2) 8852 return false; 8853 8854 for (unsigned i = 0; i != 2; ++i) { 8855 Value *L = P->getIncomingValue(i); 8856 Value *R = P->getIncomingValue(!i); 8857 auto *LU = dyn_cast<BinaryOperator>(L); 8858 if (!LU) 8859 continue; 8860 unsigned Opcode = LU->getOpcode(); 8861 8862 switch (Opcode) { 8863 default: 8864 continue; 8865 // TODO: Expand list -- xor, div, gep, uaddo, etc.. 8866 case Instruction::LShr: 8867 case Instruction::AShr: 8868 case Instruction::Shl: 8869 case Instruction::Add: 8870 case Instruction::Sub: 8871 case Instruction::And: 8872 case Instruction::Or: 8873 case Instruction::Mul: 8874 case Instruction::FMul: { 8875 Value *LL = LU->getOperand(0); 8876 Value *LR = LU->getOperand(1); 8877 // Find a recurrence. 8878 if (LL == P) 8879 L = LR; 8880 else if (LR == P) 8881 L = LL; 8882 else 8883 continue; // Check for recurrence with L and R flipped. 8884 8885 break; // Match! 8886 } 8887 }; 8888 8889 // We have matched a recurrence of the form: 8890 // %iv = [R, %entry], [%iv.next, %backedge] 8891 // %iv.next = binop %iv, L 8892 // OR 8893 // %iv = [R, %entry], [%iv.next, %backedge] 8894 // %iv.next = binop L, %iv 8895 BO = LU; 8896 Start = R; 8897 Step = L; 8898 return true; 8899 } 8900 return false; 8901 } 8902 8903 bool llvm::matchSimpleRecurrence(const BinaryOperator *I, PHINode *&P, 8904 Value *&Start, Value *&Step) { 8905 BinaryOperator *BO = nullptr; 8906 P = dyn_cast<PHINode>(I->getOperand(0)); 8907 if (!P) 8908 P = dyn_cast<PHINode>(I->getOperand(1)); 8909 return P && matchSimpleRecurrence(P, BO, Start, Step) && BO == I; 8910 } 8911 8912 /// Return true if "icmp Pred LHS RHS" is always true. 8913 static bool isTruePredicate(CmpInst::Predicate Pred, const Value *LHS, 8914 const Value *RHS) { 8915 if (ICmpInst::isTrueWhenEqual(Pred) && LHS == RHS) 8916 return true; 8917 8918 switch (Pred) { 8919 default: 8920 return false; 8921 8922 case CmpInst::ICMP_SLE: { 8923 const APInt *C; 8924 8925 // LHS s<= LHS +_{nsw} C if C >= 0 8926 // LHS s<= LHS | C if C >= 0 8927 if (match(RHS, m_NSWAdd(m_Specific(LHS), m_APInt(C))) || 8928 match(RHS, m_Or(m_Specific(LHS), m_APInt(C)))) 8929 return !C->isNegative(); 8930 8931 // LHS s<= smax(LHS, V) for any V 8932 if (match(RHS, m_c_SMax(m_Specific(LHS), m_Value()))) 8933 return true; 8934 8935 // smin(RHS, V) s<= RHS for any V 8936 if (match(LHS, m_c_SMin(m_Specific(RHS), m_Value()))) 8937 return true; 8938 8939 // Match A to (X +_{nsw} CA) and B to (X +_{nsw} CB) 8940 const Value *X; 8941 const APInt *CLHS, *CRHS; 8942 if (match(LHS, m_NSWAddLike(m_Value(X), m_APInt(CLHS))) && 8943 match(RHS, m_NSWAddLike(m_Specific(X), m_APInt(CRHS)))) 8944 return CLHS->sle(*CRHS); 8945 8946 return false; 8947 } 8948 8949 case CmpInst::ICMP_ULE: { 8950 // LHS u<= LHS +_{nuw} V for any V 8951 if (match(RHS, m_c_Add(m_Specific(LHS), m_Value())) && 8952 cast<OverflowingBinaryOperator>(RHS)->hasNoUnsignedWrap()) 8953 return true; 8954 8955 // LHS u<= LHS | V for any V 8956 if (match(RHS, m_c_Or(m_Specific(LHS), m_Value()))) 8957 return true; 8958 8959 // LHS u<= umax(LHS, V) for any V 8960 if (match(RHS, m_c_UMax(m_Specific(LHS), m_Value()))) 8961 return true; 8962 8963 // RHS >> V u<= RHS for any V 8964 if (match(LHS, m_LShr(m_Specific(RHS), m_Value()))) 8965 return true; 8966 8967 // RHS u/ C_ugt_1 u<= RHS 8968 const APInt *C; 8969 if (match(LHS, m_UDiv(m_Specific(RHS), m_APInt(C))) && C->ugt(1)) 8970 return true; 8971 8972 // RHS & V u<= RHS for any V 8973 if (match(LHS, m_c_And(m_Specific(RHS), m_Value()))) 8974 return true; 8975 8976 // umin(RHS, V) u<= RHS for any V 8977 if (match(LHS, m_c_UMin(m_Specific(RHS), m_Value()))) 8978 return true; 8979 8980 // Match A to (X +_{nuw} CA) and B to (X +_{nuw} CB) 8981 const Value *X; 8982 const APInt *CLHS, *CRHS; 8983 if (match(LHS, m_NUWAddLike(m_Value(X), m_APInt(CLHS))) && 8984 match(RHS, m_NUWAddLike(m_Specific(X), m_APInt(CRHS)))) 8985 return CLHS->ule(*CRHS); 8986 8987 return false; 8988 } 8989 } 8990 } 8991 8992 /// Return true if "icmp Pred BLHS BRHS" is true whenever "icmp Pred 8993 /// ALHS ARHS" is true. Otherwise, return std::nullopt. 8994 static std::optional<bool> 8995 isImpliedCondOperands(CmpInst::Predicate Pred, const Value *ALHS, 8996 const Value *ARHS, const Value *BLHS, const Value *BRHS) { 8997 switch (Pred) { 8998 default: 8999 return std::nullopt; 9000 9001 case CmpInst::ICMP_SLT: 9002 case CmpInst::ICMP_SLE: 9003 if (isTruePredicate(CmpInst::ICMP_SLE, BLHS, ALHS) && 9004 isTruePredicate(CmpInst::ICMP_SLE, ARHS, BRHS)) 9005 return true; 9006 return std::nullopt; 9007 9008 case CmpInst::ICMP_SGT: 9009 case CmpInst::ICMP_SGE: 9010 if (isTruePredicate(CmpInst::ICMP_SLE, ALHS, BLHS) && 9011 isTruePredicate(CmpInst::ICMP_SLE, BRHS, ARHS)) 9012 return true; 9013 return std::nullopt; 9014 9015 case CmpInst::ICMP_ULT: 9016 case CmpInst::ICMP_ULE: 9017 if (isTruePredicate(CmpInst::ICMP_ULE, BLHS, ALHS) && 9018 isTruePredicate(CmpInst::ICMP_ULE, ARHS, BRHS)) 9019 return true; 9020 return std::nullopt; 9021 9022 case CmpInst::ICMP_UGT: 9023 case CmpInst::ICMP_UGE: 9024 if (isTruePredicate(CmpInst::ICMP_ULE, ALHS, BLHS) && 9025 isTruePredicate(CmpInst::ICMP_ULE, BRHS, ARHS)) 9026 return true; 9027 return std::nullopt; 9028 } 9029 } 9030 9031 /// Return true if "icmp1 LPred X, Y" implies "icmp2 RPred X, Y" is true. 9032 /// Return false if "icmp1 LPred X, Y" implies "icmp2 RPred X, Y" is false. 9033 /// Otherwise, return std::nullopt if we can't infer anything. 9034 static std::optional<bool> 9035 isImpliedCondMatchingOperands(CmpInst::Predicate LPred, 9036 CmpInst::Predicate RPred) { 9037 if (CmpInst::isImpliedTrueByMatchingCmp(LPred, RPred)) 9038 return true; 9039 if (CmpInst::isImpliedFalseByMatchingCmp(LPred, RPred)) 9040 return false; 9041 9042 return std::nullopt; 9043 } 9044 9045 /// Return true if "icmp LPred X, LCR" implies "icmp RPred X, RCR" is true. 9046 /// Return false if "icmp LPred X, LCR" implies "icmp RPred X, RCR" is false. 9047 /// Otherwise, return std::nullopt if we can't infer anything. 9048 static std::optional<bool> isImpliedCondCommonOperandWithCR( 9049 CmpInst::Predicate LPred, const ConstantRange &LCR, 9050 CmpInst::Predicate RPred, const ConstantRange &RCR) { 9051 ConstantRange DomCR = ConstantRange::makeAllowedICmpRegion(LPred, LCR); 9052 // If all true values for lhs and true for rhs, lhs implies rhs 9053 if (DomCR.icmp(RPred, RCR)) 9054 return true; 9055 9056 // If there is no overlap, lhs implies not rhs 9057 if (DomCR.icmp(CmpInst::getInversePredicate(RPred), RCR)) 9058 return false; 9059 return std::nullopt; 9060 } 9061 9062 /// Return true if LHS implies RHS (expanded to its components as "R0 RPred R1") 9063 /// is true. Return false if LHS implies RHS is false. Otherwise, return 9064 /// std::nullopt if we can't infer anything. 9065 static std::optional<bool> isImpliedCondICmps(const ICmpInst *LHS, 9066 CmpInst::Predicate RPred, 9067 const Value *R0, const Value *R1, 9068 const DataLayout &DL, 9069 bool LHSIsTrue) { 9070 Value *L0 = LHS->getOperand(0); 9071 Value *L1 = LHS->getOperand(1); 9072 9073 // The rest of the logic assumes the LHS condition is true. If that's not the 9074 // case, invert the predicate to make it so. 9075 CmpInst::Predicate LPred = 9076 LHSIsTrue ? LHS->getPredicate() : LHS->getInversePredicate(); 9077 9078 // We can have non-canonical operands, so try to normalize any common operand 9079 // to L0/R0. 9080 if (L0 == R1) { 9081 std::swap(R0, R1); 9082 RPred = ICmpInst::getSwappedPredicate(RPred); 9083 } 9084 if (R0 == L1) { 9085 std::swap(L0, L1); 9086 LPred = ICmpInst::getSwappedPredicate(LPred); 9087 } 9088 if (L1 == R1) { 9089 // If we have L0 == R0 and L1 == R1, then make L1/R1 the constants. 9090 if (L0 != R0 || match(L0, m_ImmConstant())) { 9091 std::swap(L0, L1); 9092 LPred = ICmpInst::getSwappedPredicate(LPred); 9093 std::swap(R0, R1); 9094 RPred = ICmpInst::getSwappedPredicate(RPred); 9095 } 9096 } 9097 9098 // See if we can infer anything if operand-0 matches and we have at least one 9099 // constant. 9100 const APInt *Unused; 9101 if (L0 == R0 && (match(L1, m_APInt(Unused)) || match(R1, m_APInt(Unused)))) { 9102 // Potential TODO: We could also further use the constant range of L0/R0 to 9103 // further constraint the constant ranges. At the moment this leads to 9104 // several regressions related to not transforming `multi_use(A + C0) eq/ne 9105 // C1` (see discussion: D58633). 9106 ConstantRange LCR = computeConstantRange( 9107 L1, ICmpInst::isSigned(LPred), /* UseInstrInfo=*/true, /*AC=*/nullptr, 9108 /*CxtI=*/nullptr, /*DT=*/nullptr, MaxAnalysisRecursionDepth - 1); 9109 ConstantRange RCR = computeConstantRange( 9110 R1, ICmpInst::isSigned(RPred), /* UseInstrInfo=*/true, /*AC=*/nullptr, 9111 /*CxtI=*/nullptr, /*DT=*/nullptr, MaxAnalysisRecursionDepth - 1); 9112 // Even if L1/R1 are not both constant, we can still sometimes deduce 9113 // relationship from a single constant. For example X u> Y implies X != 0. 9114 if (auto R = isImpliedCondCommonOperandWithCR(LPred, LCR, RPred, RCR)) 9115 return R; 9116 // If both L1/R1 were exact constant ranges and we didn't get anything 9117 // here, we won't be able to deduce this. 9118 if (match(L1, m_APInt(Unused)) && match(R1, m_APInt(Unused))) 9119 return std::nullopt; 9120 } 9121 9122 // Can we infer anything when the two compares have matching operands? 9123 if (L0 == R0 && L1 == R1) 9124 return isImpliedCondMatchingOperands(LPred, RPred); 9125 9126 // L0 = R0 = L1 + R1, L0 >=u L1 implies R0 >=u R1, L0 <u L1 implies R0 <u R1 9127 if (L0 == R0 && 9128 (LPred == ICmpInst::ICMP_ULT || LPred == ICmpInst::ICMP_UGE) && 9129 (RPred == ICmpInst::ICMP_ULT || RPred == ICmpInst::ICMP_UGE) && 9130 match(L0, m_c_Add(m_Specific(L1), m_Specific(R1)))) 9131 return LPred == RPred; 9132 9133 if (LPred == RPred) 9134 return isImpliedCondOperands(LPred, L0, L1, R0, R1); 9135 9136 return std::nullopt; 9137 } 9138 9139 /// Return true if LHS implies RHS is true. Return false if LHS implies RHS is 9140 /// false. Otherwise, return std::nullopt if we can't infer anything. We 9141 /// expect the RHS to be an icmp and the LHS to be an 'and', 'or', or a 'select' 9142 /// instruction. 9143 static std::optional<bool> 9144 isImpliedCondAndOr(const Instruction *LHS, CmpInst::Predicate RHSPred, 9145 const Value *RHSOp0, const Value *RHSOp1, 9146 const DataLayout &DL, bool LHSIsTrue, unsigned Depth) { 9147 // The LHS must be an 'or', 'and', or a 'select' instruction. 9148 assert((LHS->getOpcode() == Instruction::And || 9149 LHS->getOpcode() == Instruction::Or || 9150 LHS->getOpcode() == Instruction::Select) && 9151 "Expected LHS to be 'and', 'or', or 'select'."); 9152 9153 assert(Depth <= MaxAnalysisRecursionDepth && "Hit recursion limit"); 9154 9155 // If the result of an 'or' is false, then we know both legs of the 'or' are 9156 // false. Similarly, if the result of an 'and' is true, then we know both 9157 // legs of the 'and' are true. 9158 const Value *ALHS, *ARHS; 9159 if ((!LHSIsTrue && match(LHS, m_LogicalOr(m_Value(ALHS), m_Value(ARHS)))) || 9160 (LHSIsTrue && match(LHS, m_LogicalAnd(m_Value(ALHS), m_Value(ARHS))))) { 9161 // FIXME: Make this non-recursion. 9162 if (std::optional<bool> Implication = isImpliedCondition( 9163 ALHS, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, Depth + 1)) 9164 return Implication; 9165 if (std::optional<bool> Implication = isImpliedCondition( 9166 ARHS, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, Depth + 1)) 9167 return Implication; 9168 return std::nullopt; 9169 } 9170 return std::nullopt; 9171 } 9172 9173 std::optional<bool> 9174 llvm::isImpliedCondition(const Value *LHS, CmpInst::Predicate RHSPred, 9175 const Value *RHSOp0, const Value *RHSOp1, 9176 const DataLayout &DL, bool LHSIsTrue, unsigned Depth) { 9177 // Bail out when we hit the limit. 9178 if (Depth == MaxAnalysisRecursionDepth) 9179 return std::nullopt; 9180 9181 // A mismatch occurs when we compare a scalar cmp to a vector cmp, for 9182 // example. 9183 if (RHSOp0->getType()->isVectorTy() != LHS->getType()->isVectorTy()) 9184 return std::nullopt; 9185 9186 assert(LHS->getType()->isIntOrIntVectorTy(1) && 9187 "Expected integer type only!"); 9188 9189 // Match not 9190 if (match(LHS, m_Not(m_Value(LHS)))) 9191 LHSIsTrue = !LHSIsTrue; 9192 9193 // Both LHS and RHS are icmps. 9194 const ICmpInst *LHSCmp = dyn_cast<ICmpInst>(LHS); 9195 if (LHSCmp) 9196 return isImpliedCondICmps(LHSCmp, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue); 9197 9198 /// The LHS should be an 'or', 'and', or a 'select' instruction. We expect 9199 /// the RHS to be an icmp. 9200 /// FIXME: Add support for and/or/select on the RHS. 9201 if (const Instruction *LHSI = dyn_cast<Instruction>(LHS)) { 9202 if ((LHSI->getOpcode() == Instruction::And || 9203 LHSI->getOpcode() == Instruction::Or || 9204 LHSI->getOpcode() == Instruction::Select)) 9205 return isImpliedCondAndOr(LHSI, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, 9206 Depth); 9207 } 9208 return std::nullopt; 9209 } 9210 9211 std::optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS, 9212 const DataLayout &DL, 9213 bool LHSIsTrue, unsigned Depth) { 9214 // LHS ==> RHS by definition 9215 if (LHS == RHS) 9216 return LHSIsTrue; 9217 9218 // Match not 9219 bool InvertRHS = false; 9220 if (match(RHS, m_Not(m_Value(RHS)))) { 9221 if (LHS == RHS) 9222 return !LHSIsTrue; 9223 InvertRHS = true; 9224 } 9225 9226 if (const ICmpInst *RHSCmp = dyn_cast<ICmpInst>(RHS)) { 9227 if (auto Implied = isImpliedCondition( 9228 LHS, RHSCmp->getPredicate(), RHSCmp->getOperand(0), 9229 RHSCmp->getOperand(1), DL, LHSIsTrue, Depth)) 9230 return InvertRHS ? !*Implied : *Implied; 9231 return std::nullopt; 9232 } 9233 9234 if (Depth == MaxAnalysisRecursionDepth) 9235 return std::nullopt; 9236 9237 // LHS ==> (RHS1 || RHS2) if LHS ==> RHS1 or LHS ==> RHS2 9238 // LHS ==> !(RHS1 && RHS2) if LHS ==> !RHS1 or LHS ==> !RHS2 9239 const Value *RHS1, *RHS2; 9240 if (match(RHS, m_LogicalOr(m_Value(RHS1), m_Value(RHS2)))) { 9241 if (std::optional<bool> Imp = 9242 isImpliedCondition(LHS, RHS1, DL, LHSIsTrue, Depth + 1)) 9243 if (*Imp == true) 9244 return !InvertRHS; 9245 if (std::optional<bool> Imp = 9246 isImpliedCondition(LHS, RHS2, DL, LHSIsTrue, Depth + 1)) 9247 if (*Imp == true) 9248 return !InvertRHS; 9249 } 9250 if (match(RHS, m_LogicalAnd(m_Value(RHS1), m_Value(RHS2)))) { 9251 if (std::optional<bool> Imp = 9252 isImpliedCondition(LHS, RHS1, DL, LHSIsTrue, Depth + 1)) 9253 if (*Imp == false) 9254 return InvertRHS; 9255 if (std::optional<bool> Imp = 9256 isImpliedCondition(LHS, RHS2, DL, LHSIsTrue, Depth + 1)) 9257 if (*Imp == false) 9258 return InvertRHS; 9259 } 9260 9261 return std::nullopt; 9262 } 9263 9264 // Returns a pair (Condition, ConditionIsTrue), where Condition is a branch 9265 // condition dominating ContextI or nullptr, if no condition is found. 9266 static std::pair<Value *, bool> 9267 getDomPredecessorCondition(const Instruction *ContextI) { 9268 if (!ContextI || !ContextI->getParent()) 9269 return {nullptr, false}; 9270 9271 // TODO: This is a poor/cheap way to determine dominance. Should we use a 9272 // dominator tree (eg, from a SimplifyQuery) instead? 9273 const BasicBlock *ContextBB = ContextI->getParent(); 9274 const BasicBlock *PredBB = ContextBB->getSinglePredecessor(); 9275 if (!PredBB) 9276 return {nullptr, false}; 9277 9278 // We need a conditional branch in the predecessor. 9279 Value *PredCond; 9280 BasicBlock *TrueBB, *FalseBB; 9281 if (!match(PredBB->getTerminator(), m_Br(m_Value(PredCond), TrueBB, FalseBB))) 9282 return {nullptr, false}; 9283 9284 // The branch should get simplified. Don't bother simplifying this condition. 9285 if (TrueBB == FalseBB) 9286 return {nullptr, false}; 9287 9288 assert((TrueBB == ContextBB || FalseBB == ContextBB) && 9289 "Predecessor block does not point to successor?"); 9290 9291 // Is this condition implied by the predecessor condition? 9292 return {PredCond, TrueBB == ContextBB}; 9293 } 9294 9295 std::optional<bool> llvm::isImpliedByDomCondition(const Value *Cond, 9296 const Instruction *ContextI, 9297 const DataLayout &DL) { 9298 assert(Cond->getType()->isIntOrIntVectorTy(1) && "Condition must be bool"); 9299 auto PredCond = getDomPredecessorCondition(ContextI); 9300 if (PredCond.first) 9301 return isImpliedCondition(PredCond.first, Cond, DL, PredCond.second); 9302 return std::nullopt; 9303 } 9304 9305 std::optional<bool> llvm::isImpliedByDomCondition(CmpInst::Predicate Pred, 9306 const Value *LHS, 9307 const Value *RHS, 9308 const Instruction *ContextI, 9309 const DataLayout &DL) { 9310 auto PredCond = getDomPredecessorCondition(ContextI); 9311 if (PredCond.first) 9312 return isImpliedCondition(PredCond.first, Pred, LHS, RHS, DL, 9313 PredCond.second); 9314 return std::nullopt; 9315 } 9316 9317 static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower, 9318 APInt &Upper, const InstrInfoQuery &IIQ, 9319 bool PreferSignedRange) { 9320 unsigned Width = Lower.getBitWidth(); 9321 const APInt *C; 9322 switch (BO.getOpcode()) { 9323 case Instruction::Add: 9324 if (match(BO.getOperand(1), m_APInt(C)) && !C->isZero()) { 9325 bool HasNSW = IIQ.hasNoSignedWrap(&BO); 9326 bool HasNUW = IIQ.hasNoUnsignedWrap(&BO); 9327 9328 // If the caller expects a signed compare, then try to use a signed range. 9329 // Otherwise if both no-wraps are set, use the unsigned range because it 9330 // is never larger than the signed range. Example: 9331 // "add nuw nsw i8 X, -2" is unsigned [254,255] vs. signed [-128, 125]. 9332 if (PreferSignedRange && HasNSW && HasNUW) 9333 HasNUW = false; 9334 9335 if (HasNUW) { 9336 // 'add nuw x, C' produces [C, UINT_MAX]. 9337 Lower = *C; 9338 } else if (HasNSW) { 9339 if (C->isNegative()) { 9340 // 'add nsw x, -C' produces [SINT_MIN, SINT_MAX - C]. 9341 Lower = APInt::getSignedMinValue(Width); 9342 Upper = APInt::getSignedMaxValue(Width) + *C + 1; 9343 } else { 9344 // 'add nsw x, +C' produces [SINT_MIN + C, SINT_MAX]. 9345 Lower = APInt::getSignedMinValue(Width) + *C; 9346 Upper = APInt::getSignedMaxValue(Width) + 1; 9347 } 9348 } 9349 } 9350 break; 9351 9352 case Instruction::And: 9353 if (match(BO.getOperand(1), m_APInt(C))) 9354 // 'and x, C' produces [0, C]. 9355 Upper = *C + 1; 9356 // X & -X is a power of two or zero. So we can cap the value at max power of 9357 // two. 9358 if (match(BO.getOperand(0), m_Neg(m_Specific(BO.getOperand(1)))) || 9359 match(BO.getOperand(1), m_Neg(m_Specific(BO.getOperand(0))))) 9360 Upper = APInt::getSignedMinValue(Width) + 1; 9361 break; 9362 9363 case Instruction::Or: 9364 if (match(BO.getOperand(1), m_APInt(C))) 9365 // 'or x, C' produces [C, UINT_MAX]. 9366 Lower = *C; 9367 break; 9368 9369 case Instruction::AShr: 9370 if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) { 9371 // 'ashr x, C' produces [INT_MIN >> C, INT_MAX >> C]. 9372 Lower = APInt::getSignedMinValue(Width).ashr(*C); 9373 Upper = APInt::getSignedMaxValue(Width).ashr(*C) + 1; 9374 } else if (match(BO.getOperand(0), m_APInt(C))) { 9375 unsigned ShiftAmount = Width - 1; 9376 if (!C->isZero() && IIQ.isExact(&BO)) 9377 ShiftAmount = C->countr_zero(); 9378 if (C->isNegative()) { 9379 // 'ashr C, x' produces [C, C >> (Width-1)] 9380 Lower = *C; 9381 Upper = C->ashr(ShiftAmount) + 1; 9382 } else { 9383 // 'ashr C, x' produces [C >> (Width-1), C] 9384 Lower = C->ashr(ShiftAmount); 9385 Upper = *C + 1; 9386 } 9387 } 9388 break; 9389 9390 case Instruction::LShr: 9391 if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) { 9392 // 'lshr x, C' produces [0, UINT_MAX >> C]. 9393 Upper = APInt::getAllOnes(Width).lshr(*C) + 1; 9394 } else if (match(BO.getOperand(0), m_APInt(C))) { 9395 // 'lshr C, x' produces [C >> (Width-1), C]. 9396 unsigned ShiftAmount = Width - 1; 9397 if (!C->isZero() && IIQ.isExact(&BO)) 9398 ShiftAmount = C->countr_zero(); 9399 Lower = C->lshr(ShiftAmount); 9400 Upper = *C + 1; 9401 } 9402 break; 9403 9404 case Instruction::Shl: 9405 if (match(BO.getOperand(0), m_APInt(C))) { 9406 if (IIQ.hasNoUnsignedWrap(&BO)) { 9407 // 'shl nuw C, x' produces [C, C << CLZ(C)] 9408 Lower = *C; 9409 Upper = Lower.shl(Lower.countl_zero()) + 1; 9410 } else if (BO.hasNoSignedWrap()) { // TODO: What if both nuw+nsw? 9411 if (C->isNegative()) { 9412 // 'shl nsw C, x' produces [C << CLO(C)-1, C] 9413 unsigned ShiftAmount = C->countl_one() - 1; 9414 Lower = C->shl(ShiftAmount); 9415 Upper = *C + 1; 9416 } else { 9417 // 'shl nsw C, x' produces [C, C << CLZ(C)-1] 9418 unsigned ShiftAmount = C->countl_zero() - 1; 9419 Lower = *C; 9420 Upper = C->shl(ShiftAmount) + 1; 9421 } 9422 } else { 9423 // If lowbit is set, value can never be zero. 9424 if ((*C)[0]) 9425 Lower = APInt::getOneBitSet(Width, 0); 9426 // If we are shifting a constant the largest it can be is if the longest 9427 // sequence of consecutive ones is shifted to the highbits (breaking 9428 // ties for which sequence is higher). At the moment we take a liberal 9429 // upper bound on this by just popcounting the constant. 9430 // TODO: There may be a bitwise trick for it longest/highest 9431 // consecutative sequence of ones (naive method is O(Width) loop). 9432 Upper = APInt::getHighBitsSet(Width, C->popcount()) + 1; 9433 } 9434 } else if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) { 9435 Upper = APInt::getBitsSetFrom(Width, C->getZExtValue()) + 1; 9436 } 9437 break; 9438 9439 case Instruction::SDiv: 9440 if (match(BO.getOperand(1), m_APInt(C))) { 9441 APInt IntMin = APInt::getSignedMinValue(Width); 9442 APInt IntMax = APInt::getSignedMaxValue(Width); 9443 if (C->isAllOnes()) { 9444 // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX] 9445 // where C != -1 and C != 0 and C != 1 9446 Lower = IntMin + 1; 9447 Upper = IntMax + 1; 9448 } else if (C->countl_zero() < Width - 1) { 9449 // 'sdiv x, C' produces [INT_MIN / C, INT_MAX / C] 9450 // where C != -1 and C != 0 and C != 1 9451 Lower = IntMin.sdiv(*C); 9452 Upper = IntMax.sdiv(*C); 9453 if (Lower.sgt(Upper)) 9454 std::swap(Lower, Upper); 9455 Upper = Upper + 1; 9456 assert(Upper != Lower && "Upper part of range has wrapped!"); 9457 } 9458 } else if (match(BO.getOperand(0), m_APInt(C))) { 9459 if (C->isMinSignedValue()) { 9460 // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2]. 9461 Lower = *C; 9462 Upper = Lower.lshr(1) + 1; 9463 } else { 9464 // 'sdiv C, x' produces [-|C|, |C|]. 9465 Upper = C->abs() + 1; 9466 Lower = (-Upper) + 1; 9467 } 9468 } 9469 break; 9470 9471 case Instruction::UDiv: 9472 if (match(BO.getOperand(1), m_APInt(C)) && !C->isZero()) { 9473 // 'udiv x, C' produces [0, UINT_MAX / C]. 9474 Upper = APInt::getMaxValue(Width).udiv(*C) + 1; 9475 } else if (match(BO.getOperand(0), m_APInt(C))) { 9476 // 'udiv C, x' produces [0, C]. 9477 Upper = *C + 1; 9478 } 9479 break; 9480 9481 case Instruction::SRem: 9482 if (match(BO.getOperand(1), m_APInt(C))) { 9483 // 'srem x, C' produces (-|C|, |C|). 9484 Upper = C->abs(); 9485 Lower = (-Upper) + 1; 9486 } else if (match(BO.getOperand(0), m_APInt(C))) { 9487 if (C->isNegative()) { 9488 // 'srem -|C|, x' produces [-|C|, 0]. 9489 Upper = 1; 9490 Lower = *C; 9491 } else { 9492 // 'srem |C|, x' produces [0, |C|]. 9493 Upper = *C + 1; 9494 } 9495 } 9496 break; 9497 9498 case Instruction::URem: 9499 if (match(BO.getOperand(1), m_APInt(C))) 9500 // 'urem x, C' produces [0, C). 9501 Upper = *C; 9502 else if (match(BO.getOperand(0), m_APInt(C))) 9503 // 'urem C, x' produces [0, C]. 9504 Upper = *C + 1; 9505 break; 9506 9507 default: 9508 break; 9509 } 9510 } 9511 9512 static ConstantRange getRangeForIntrinsic(const IntrinsicInst &II) { 9513 unsigned Width = II.getType()->getScalarSizeInBits(); 9514 const APInt *C; 9515 switch (II.getIntrinsicID()) { 9516 case Intrinsic::ctpop: 9517 case Intrinsic::ctlz: 9518 case Intrinsic::cttz: 9519 // Maximum of set/clear bits is the bit width. 9520 return ConstantRange::getNonEmpty(APInt::getZero(Width), 9521 APInt(Width, Width + 1)); 9522 case Intrinsic::uadd_sat: 9523 // uadd.sat(x, C) produces [C, UINT_MAX]. 9524 if (match(II.getOperand(0), m_APInt(C)) || 9525 match(II.getOperand(1), m_APInt(C))) 9526 return ConstantRange::getNonEmpty(*C, APInt::getZero(Width)); 9527 break; 9528 case Intrinsic::sadd_sat: 9529 if (match(II.getOperand(0), m_APInt(C)) || 9530 match(II.getOperand(1), m_APInt(C))) { 9531 if (C->isNegative()) 9532 // sadd.sat(x, -C) produces [SINT_MIN, SINT_MAX + (-C)]. 9533 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width), 9534 APInt::getSignedMaxValue(Width) + *C + 9535 1); 9536 9537 // sadd.sat(x, +C) produces [SINT_MIN + C, SINT_MAX]. 9538 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width) + *C, 9539 APInt::getSignedMaxValue(Width) + 1); 9540 } 9541 break; 9542 case Intrinsic::usub_sat: 9543 // usub.sat(C, x) produces [0, C]. 9544 if (match(II.getOperand(0), m_APInt(C))) 9545 return ConstantRange::getNonEmpty(APInt::getZero(Width), *C + 1); 9546 9547 // usub.sat(x, C) produces [0, UINT_MAX - C]. 9548 if (match(II.getOperand(1), m_APInt(C))) 9549 return ConstantRange::getNonEmpty(APInt::getZero(Width), 9550 APInt::getMaxValue(Width) - *C + 1); 9551 break; 9552 case Intrinsic::ssub_sat: 9553 if (match(II.getOperand(0), m_APInt(C))) { 9554 if (C->isNegative()) 9555 // ssub.sat(-C, x) produces [SINT_MIN, -SINT_MIN + (-C)]. 9556 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width), 9557 *C - APInt::getSignedMinValue(Width) + 9558 1); 9559 9560 // ssub.sat(+C, x) produces [-SINT_MAX + C, SINT_MAX]. 9561 return ConstantRange::getNonEmpty(*C - APInt::getSignedMaxValue(Width), 9562 APInt::getSignedMaxValue(Width) + 1); 9563 } else if (match(II.getOperand(1), m_APInt(C))) { 9564 if (C->isNegative()) 9565 // ssub.sat(x, -C) produces [SINT_MIN - (-C), SINT_MAX]: 9566 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width) - *C, 9567 APInt::getSignedMaxValue(Width) + 1); 9568 9569 // ssub.sat(x, +C) produces [SINT_MIN, SINT_MAX - C]. 9570 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width), 9571 APInt::getSignedMaxValue(Width) - *C + 9572 1); 9573 } 9574 break; 9575 case Intrinsic::umin: 9576 case Intrinsic::umax: 9577 case Intrinsic::smin: 9578 case Intrinsic::smax: 9579 if (!match(II.getOperand(0), m_APInt(C)) && 9580 !match(II.getOperand(1), m_APInt(C))) 9581 break; 9582 9583 switch (II.getIntrinsicID()) { 9584 case Intrinsic::umin: 9585 return ConstantRange::getNonEmpty(APInt::getZero(Width), *C + 1); 9586 case Intrinsic::umax: 9587 return ConstantRange::getNonEmpty(*C, APInt::getZero(Width)); 9588 case Intrinsic::smin: 9589 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width), 9590 *C + 1); 9591 case Intrinsic::smax: 9592 return ConstantRange::getNonEmpty(*C, 9593 APInt::getSignedMaxValue(Width) + 1); 9594 default: 9595 llvm_unreachable("Must be min/max intrinsic"); 9596 } 9597 break; 9598 case Intrinsic::abs: 9599 // If abs of SIGNED_MIN is poison, then the result is [0..SIGNED_MAX], 9600 // otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN. 9601 if (match(II.getOperand(1), m_One())) 9602 return ConstantRange::getNonEmpty(APInt::getZero(Width), 9603 APInt::getSignedMaxValue(Width) + 1); 9604 9605 return ConstantRange::getNonEmpty(APInt::getZero(Width), 9606 APInt::getSignedMinValue(Width) + 1); 9607 case Intrinsic::vscale: 9608 if (!II.getParent() || !II.getFunction()) 9609 break; 9610 return getVScaleRange(II.getFunction(), Width); 9611 case Intrinsic::scmp: 9612 case Intrinsic::ucmp: 9613 return ConstantRange::getNonEmpty(APInt::getAllOnes(Width), 9614 APInt(Width, 2)); 9615 default: 9616 break; 9617 } 9618 9619 return ConstantRange::getFull(Width); 9620 } 9621 9622 static ConstantRange getRangeForSelectPattern(const SelectInst &SI, 9623 const InstrInfoQuery &IIQ) { 9624 unsigned BitWidth = SI.getType()->getScalarSizeInBits(); 9625 const Value *LHS = nullptr, *RHS = nullptr; 9626 SelectPatternResult R = matchSelectPattern(&SI, LHS, RHS); 9627 if (R.Flavor == SPF_UNKNOWN) 9628 return ConstantRange::getFull(BitWidth); 9629 9630 if (R.Flavor == SelectPatternFlavor::SPF_ABS) { 9631 // If the negation part of the abs (in RHS) has the NSW flag, 9632 // then the result of abs(X) is [0..SIGNED_MAX], 9633 // otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN. 9634 if (match(RHS, m_Neg(m_Specific(LHS))) && 9635 IIQ.hasNoSignedWrap(cast<Instruction>(RHS))) 9636 return ConstantRange::getNonEmpty(APInt::getZero(BitWidth), 9637 APInt::getSignedMaxValue(BitWidth) + 1); 9638 9639 return ConstantRange::getNonEmpty(APInt::getZero(BitWidth), 9640 APInt::getSignedMinValue(BitWidth) + 1); 9641 } 9642 9643 if (R.Flavor == SelectPatternFlavor::SPF_NABS) { 9644 // The result of -abs(X) is <= 0. 9645 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(BitWidth), 9646 APInt(BitWidth, 1)); 9647 } 9648 9649 const APInt *C; 9650 if (!match(LHS, m_APInt(C)) && !match(RHS, m_APInt(C))) 9651 return ConstantRange::getFull(BitWidth); 9652 9653 switch (R.Flavor) { 9654 case SPF_UMIN: 9655 return ConstantRange::getNonEmpty(APInt::getZero(BitWidth), *C + 1); 9656 case SPF_UMAX: 9657 return ConstantRange::getNonEmpty(*C, APInt::getZero(BitWidth)); 9658 case SPF_SMIN: 9659 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(BitWidth), 9660 *C + 1); 9661 case SPF_SMAX: 9662 return ConstantRange::getNonEmpty(*C, 9663 APInt::getSignedMaxValue(BitWidth) + 1); 9664 default: 9665 return ConstantRange::getFull(BitWidth); 9666 } 9667 } 9668 9669 static void setLimitForFPToI(const Instruction *I, APInt &Lower, APInt &Upper) { 9670 // The maximum representable value of a half is 65504. For floats the maximum 9671 // value is 3.4e38 which requires roughly 129 bits. 9672 unsigned BitWidth = I->getType()->getScalarSizeInBits(); 9673 if (!I->getOperand(0)->getType()->getScalarType()->isHalfTy()) 9674 return; 9675 if (isa<FPToSIInst>(I) && BitWidth >= 17) { 9676 Lower = APInt(BitWidth, -65504); 9677 Upper = APInt(BitWidth, 65505); 9678 } 9679 9680 if (isa<FPToUIInst>(I) && BitWidth >= 16) { 9681 // For a fptoui the lower limit is left as 0. 9682 Upper = APInt(BitWidth, 65505); 9683 } 9684 } 9685 9686 ConstantRange llvm::computeConstantRange(const Value *V, bool ForSigned, 9687 bool UseInstrInfo, AssumptionCache *AC, 9688 const Instruction *CtxI, 9689 const DominatorTree *DT, 9690 unsigned Depth) { 9691 assert(V->getType()->isIntOrIntVectorTy() && "Expected integer instruction"); 9692 9693 if (Depth == MaxAnalysisRecursionDepth) 9694 return ConstantRange::getFull(V->getType()->getScalarSizeInBits()); 9695 9696 if (auto *C = dyn_cast<Constant>(V)) 9697 return C->toConstantRange(); 9698 9699 unsigned BitWidth = V->getType()->getScalarSizeInBits(); 9700 InstrInfoQuery IIQ(UseInstrInfo); 9701 ConstantRange CR = ConstantRange::getFull(BitWidth); 9702 if (auto *BO = dyn_cast<BinaryOperator>(V)) { 9703 APInt Lower = APInt(BitWidth, 0); 9704 APInt Upper = APInt(BitWidth, 0); 9705 // TODO: Return ConstantRange. 9706 setLimitsForBinOp(*BO, Lower, Upper, IIQ, ForSigned); 9707 CR = ConstantRange::getNonEmpty(Lower, Upper); 9708 } else if (auto *II = dyn_cast<IntrinsicInst>(V)) 9709 CR = getRangeForIntrinsic(*II); 9710 else if (auto *SI = dyn_cast<SelectInst>(V)) { 9711 ConstantRange CRTrue = computeConstantRange( 9712 SI->getTrueValue(), ForSigned, UseInstrInfo, AC, CtxI, DT, Depth + 1); 9713 ConstantRange CRFalse = computeConstantRange( 9714 SI->getFalseValue(), ForSigned, UseInstrInfo, AC, CtxI, DT, Depth + 1); 9715 CR = CRTrue.unionWith(CRFalse); 9716 CR = CR.intersectWith(getRangeForSelectPattern(*SI, IIQ)); 9717 } else if (isa<FPToUIInst>(V) || isa<FPToSIInst>(V)) { 9718 APInt Lower = APInt(BitWidth, 0); 9719 APInt Upper = APInt(BitWidth, 0); 9720 // TODO: Return ConstantRange. 9721 setLimitForFPToI(cast<Instruction>(V), Lower, Upper); 9722 CR = ConstantRange::getNonEmpty(Lower, Upper); 9723 } else if (const auto *A = dyn_cast<Argument>(V)) 9724 if (std::optional<ConstantRange> Range = A->getRange()) 9725 CR = *Range; 9726 9727 if (auto *I = dyn_cast<Instruction>(V)) { 9728 if (auto *Range = IIQ.getMetadata(I, LLVMContext::MD_range)) 9729 CR = CR.intersectWith(getConstantRangeFromMetadata(*Range)); 9730 9731 if (const auto *CB = dyn_cast<CallBase>(V)) 9732 if (std::optional<ConstantRange> Range = CB->getRange()) 9733 CR = CR.intersectWith(*Range); 9734 } 9735 9736 if (CtxI && AC) { 9737 // Try to restrict the range based on information from assumptions. 9738 for (auto &AssumeVH : AC->assumptionsFor(V)) { 9739 if (!AssumeVH) 9740 continue; 9741 CallInst *I = cast<CallInst>(AssumeVH); 9742 assert(I->getParent()->getParent() == CtxI->getParent()->getParent() && 9743 "Got assumption for the wrong function!"); 9744 assert(I->getIntrinsicID() == Intrinsic::assume && 9745 "must be an assume intrinsic"); 9746 9747 if (!isValidAssumeForContext(I, CtxI, DT)) 9748 continue; 9749 Value *Arg = I->getArgOperand(0); 9750 ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg); 9751 // Currently we just use information from comparisons. 9752 if (!Cmp || Cmp->getOperand(0) != V) 9753 continue; 9754 // TODO: Set "ForSigned" parameter via Cmp->isSigned()? 9755 ConstantRange RHS = 9756 computeConstantRange(Cmp->getOperand(1), /* ForSigned */ false, 9757 UseInstrInfo, AC, I, DT, Depth + 1); 9758 CR = CR.intersectWith( 9759 ConstantRange::makeAllowedICmpRegion(Cmp->getPredicate(), RHS)); 9760 } 9761 } 9762 9763 return CR; 9764 } 9765 9766 static void 9767 addValueAffectedByCondition(Value *V, 9768 function_ref<void(Value *)> InsertAffected) { 9769 assert(V != nullptr); 9770 if (isa<Argument>(V) || isa<GlobalValue>(V)) { 9771 InsertAffected(V); 9772 } else if (auto *I = dyn_cast<Instruction>(V)) { 9773 InsertAffected(V); 9774 9775 // Peek through unary operators to find the source of the condition. 9776 Value *Op; 9777 if (match(I, m_CombineOr(m_PtrToInt(m_Value(Op)), m_Trunc(m_Value(Op))))) { 9778 if (isa<Instruction>(Op) || isa<Argument>(Op)) 9779 InsertAffected(Op); 9780 } 9781 } 9782 } 9783 9784 void llvm::findValuesAffectedByCondition( 9785 Value *Cond, bool IsAssume, function_ref<void(Value *)> InsertAffected) { 9786 auto AddAffected = [&InsertAffected](Value *V) { 9787 addValueAffectedByCondition(V, InsertAffected); 9788 }; 9789 9790 auto AddCmpOperands = [&AddAffected, IsAssume](Value *LHS, Value *RHS) { 9791 if (IsAssume) { 9792 AddAffected(LHS); 9793 AddAffected(RHS); 9794 } else if (match(RHS, m_Constant())) 9795 AddAffected(LHS); 9796 }; 9797 9798 SmallVector<Value *, 8> Worklist; 9799 SmallPtrSet<Value *, 8> Visited; 9800 Worklist.push_back(Cond); 9801 while (!Worklist.empty()) { 9802 Value *V = Worklist.pop_back_val(); 9803 if (!Visited.insert(V).second) 9804 continue; 9805 9806 CmpInst::Predicate Pred; 9807 Value *A, *B, *X; 9808 9809 if (IsAssume) { 9810 AddAffected(V); 9811 if (match(V, m_Not(m_Value(X)))) 9812 AddAffected(X); 9813 } 9814 9815 if (match(V, m_LogicalOp(m_Value(A), m_Value(B)))) { 9816 // assume(A && B) is split to -> assume(A); assume(B); 9817 // assume(!(A || B)) is split to -> assume(!A); assume(!B); 9818 // Finally, assume(A || B) / assume(!(A && B)) generally don't provide 9819 // enough information to be worth handling (intersection of information as 9820 // opposed to union). 9821 if (!IsAssume) { 9822 Worklist.push_back(A); 9823 Worklist.push_back(B); 9824 } 9825 } else if (match(V, m_ICmp(Pred, m_Value(A), m_Value(B)))) { 9826 AddCmpOperands(A, B); 9827 9828 if (ICmpInst::isEquality(Pred)) { 9829 if (match(B, m_ConstantInt())) { 9830 Value *Y; 9831 // (X & C) or (X | C) or (X ^ C). 9832 // (X << C) or (X >>_s C) or (X >>_u C). 9833 if (match(A, m_BitwiseLogic(m_Value(X), m_ConstantInt())) || 9834 match(A, m_Shift(m_Value(X), m_ConstantInt()))) 9835 AddAffected(X); 9836 else if (match(A, m_And(m_Value(X), m_Value(Y))) || 9837 match(A, m_Or(m_Value(X), m_Value(Y)))) { 9838 AddAffected(X); 9839 AddAffected(Y); 9840 } 9841 } 9842 } else { 9843 if (match(B, m_ConstantInt())) { 9844 // Handle (A + C1) u< C2, which is the canonical form of 9845 // A > C3 && A < C4. 9846 if (match(A, m_AddLike(m_Value(X), m_ConstantInt()))) 9847 AddAffected(X); 9848 9849 if (ICmpInst::isUnsigned(Pred)) { 9850 Value *Y; 9851 // X & Y u> C -> X >u C && Y >u C 9852 // X | Y u< C -> X u< C && Y u< C 9853 // X nuw+ Y u< C -> X u< C && Y u< C 9854 if (match(A, m_And(m_Value(X), m_Value(Y))) || 9855 match(A, m_Or(m_Value(X), m_Value(Y))) || 9856 match(A, m_NUWAdd(m_Value(X), m_Value(Y)))) { 9857 AddAffected(X); 9858 AddAffected(Y); 9859 } 9860 // X nuw- Y u> C -> X u> C 9861 if (match(A, m_NUWSub(m_Value(X), m_Value()))) 9862 AddAffected(X); 9863 } 9864 } 9865 9866 // Handle icmp slt/sgt (bitcast X to int), 0/-1, which is supported 9867 // by computeKnownFPClass(). 9868 if (match(A, m_ElementWiseBitCast(m_Value(X)))) { 9869 if (Pred == ICmpInst::ICMP_SLT && match(B, m_Zero())) 9870 InsertAffected(X); 9871 else if (Pred == ICmpInst::ICMP_SGT && match(B, m_AllOnes())) 9872 InsertAffected(X); 9873 } 9874 } 9875 } else if (match(Cond, m_FCmp(Pred, m_Value(A), m_Value(B)))) { 9876 AddCmpOperands(A, B); 9877 9878 // fcmp fneg(x), y 9879 // fcmp fabs(x), y 9880 // fcmp fneg(fabs(x)), y 9881 if (match(A, m_FNeg(m_Value(A)))) 9882 AddAffected(A); 9883 if (match(A, m_FAbs(m_Value(A)))) 9884 AddAffected(A); 9885 9886 } else if (match(V, m_Intrinsic<Intrinsic::is_fpclass>(m_Value(A), 9887 m_Value()))) { 9888 // Handle patterns that computeKnownFPClass() support. 9889 AddAffected(A); 9890 } 9891 } 9892 } 9893