1 //===- ValueTracking.cpp - Walk computations to compute properties --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains routines that help analyze properties that chains of 10 // computations have. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Analysis/ValueTracking.h" 15 #include "llvm/ADT/APFloat.h" 16 #include "llvm/ADT/APInt.h" 17 #include "llvm/ADT/ArrayRef.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/ADT/ScopeExit.h" 20 #include "llvm/ADT/SmallPtrSet.h" 21 #include "llvm/ADT/SmallSet.h" 22 #include "llvm/ADT/SmallVector.h" 23 #include "llvm/ADT/StringRef.h" 24 #include "llvm/ADT/iterator_range.h" 25 #include "llvm/Analysis/AliasAnalysis.h" 26 #include "llvm/Analysis/AssumeBundleQueries.h" 27 #include "llvm/Analysis/AssumptionCache.h" 28 #include "llvm/Analysis/ConstantFolding.h" 29 #include "llvm/Analysis/DomConditionCache.h" 30 #include "llvm/Analysis/GuardUtils.h" 31 #include "llvm/Analysis/InstructionSimplify.h" 32 #include "llvm/Analysis/Loads.h" 33 #include "llvm/Analysis/LoopInfo.h" 34 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 35 #include "llvm/Analysis/TargetLibraryInfo.h" 36 #include "llvm/Analysis/VectorUtils.h" 37 #include "llvm/Analysis/WithCache.h" 38 #include "llvm/IR/Argument.h" 39 #include "llvm/IR/Attributes.h" 40 #include "llvm/IR/BasicBlock.h" 41 #include "llvm/IR/Constant.h" 42 #include "llvm/IR/ConstantRange.h" 43 #include "llvm/IR/Constants.h" 44 #include "llvm/IR/DerivedTypes.h" 45 #include "llvm/IR/DiagnosticInfo.h" 46 #include "llvm/IR/Dominators.h" 47 #include "llvm/IR/EHPersonalities.h" 48 #include "llvm/IR/Function.h" 49 #include "llvm/IR/GetElementPtrTypeIterator.h" 50 #include "llvm/IR/GlobalAlias.h" 51 #include "llvm/IR/GlobalValue.h" 52 #include "llvm/IR/GlobalVariable.h" 53 #include "llvm/IR/InstrTypes.h" 54 #include "llvm/IR/Instruction.h" 55 #include "llvm/IR/Instructions.h" 56 #include "llvm/IR/IntrinsicInst.h" 57 #include "llvm/IR/Intrinsics.h" 58 #include "llvm/IR/IntrinsicsAArch64.h" 59 #include "llvm/IR/IntrinsicsAMDGPU.h" 60 #include "llvm/IR/IntrinsicsRISCV.h" 61 #include "llvm/IR/IntrinsicsX86.h" 62 #include "llvm/IR/LLVMContext.h" 63 #include "llvm/IR/Metadata.h" 64 #include "llvm/IR/Module.h" 65 #include "llvm/IR/Operator.h" 66 #include "llvm/IR/PatternMatch.h" 67 #include "llvm/IR/Type.h" 68 #include "llvm/IR/User.h" 69 #include "llvm/IR/Value.h" 70 #include "llvm/Support/Casting.h" 71 #include "llvm/Support/CommandLine.h" 72 #include "llvm/Support/Compiler.h" 73 #include "llvm/Support/ErrorHandling.h" 74 #include "llvm/Support/KnownBits.h" 75 #include "llvm/Support/MathExtras.h" 76 #include <algorithm> 77 #include <cassert> 78 #include <cstdint> 79 #include <optional> 80 #include <utility> 81 82 using namespace llvm; 83 using namespace llvm::PatternMatch; 84 85 // Controls the number of uses of the value searched for possible 86 // dominating comparisons. 87 static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses", 88 cl::Hidden, cl::init(20)); 89 90 91 /// Returns the bitwidth of the given scalar or pointer type. For vector types, 92 /// returns the element type's bitwidth. 93 static unsigned getBitWidth(Type *Ty, const DataLayout &DL) { 94 if (unsigned BitWidth = Ty->getScalarSizeInBits()) 95 return BitWidth; 96 97 return DL.getPointerTypeSizeInBits(Ty); 98 } 99 100 // Given the provided Value and, potentially, a context instruction, return 101 // the preferred context instruction (if any). 102 static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) { 103 // If we've been provided with a context instruction, then use that (provided 104 // it has been inserted). 105 if (CxtI && CxtI->getParent()) 106 return CxtI; 107 108 // If the value is really an already-inserted instruction, then use that. 109 CxtI = dyn_cast<Instruction>(V); 110 if (CxtI && CxtI->getParent()) 111 return CxtI; 112 113 return nullptr; 114 } 115 116 static const Instruction *safeCxtI(const Value *V1, const Value *V2, const Instruction *CxtI) { 117 // If we've been provided with a context instruction, then use that (provided 118 // it has been inserted). 119 if (CxtI && CxtI->getParent()) 120 return CxtI; 121 122 // If the value is really an already-inserted instruction, then use that. 123 CxtI = dyn_cast<Instruction>(V1); 124 if (CxtI && CxtI->getParent()) 125 return CxtI; 126 127 CxtI = dyn_cast<Instruction>(V2); 128 if (CxtI && CxtI->getParent()) 129 return CxtI; 130 131 return nullptr; 132 } 133 134 static bool getShuffleDemandedElts(const ShuffleVectorInst *Shuf, 135 const APInt &DemandedElts, 136 APInt &DemandedLHS, APInt &DemandedRHS) { 137 if (isa<ScalableVectorType>(Shuf->getType())) { 138 assert(DemandedElts == APInt(1,1)); 139 DemandedLHS = DemandedRHS = DemandedElts; 140 return true; 141 } 142 143 int NumElts = 144 cast<FixedVectorType>(Shuf->getOperand(0)->getType())->getNumElements(); 145 return llvm::getShuffleDemandedElts(NumElts, Shuf->getShuffleMask(), 146 DemandedElts, DemandedLHS, DemandedRHS); 147 } 148 149 static void computeKnownBits(const Value *V, const APInt &DemandedElts, 150 KnownBits &Known, unsigned Depth, 151 const SimplifyQuery &Q); 152 153 void llvm::computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, 154 const SimplifyQuery &Q) { 155 // Since the number of lanes in a scalable vector is unknown at compile time, 156 // we track one bit which is implicitly broadcast to all lanes. This means 157 // that all lanes in a scalable vector are considered demanded. 158 auto *FVTy = dyn_cast<FixedVectorType>(V->getType()); 159 APInt DemandedElts = 160 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1); 161 ::computeKnownBits(V, DemandedElts, Known, Depth, Q); 162 } 163 164 void llvm::computeKnownBits(const Value *V, KnownBits &Known, 165 const DataLayout &DL, unsigned Depth, 166 AssumptionCache *AC, const Instruction *CxtI, 167 const DominatorTree *DT, bool UseInstrInfo) { 168 computeKnownBits( 169 V, Known, Depth, 170 SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); 171 } 172 173 KnownBits llvm::computeKnownBits(const Value *V, const DataLayout &DL, 174 unsigned Depth, AssumptionCache *AC, 175 const Instruction *CxtI, 176 const DominatorTree *DT, bool UseInstrInfo) { 177 return computeKnownBits( 178 V, Depth, SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); 179 } 180 181 KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts, 182 const DataLayout &DL, unsigned Depth, 183 AssumptionCache *AC, const Instruction *CxtI, 184 const DominatorTree *DT, bool UseInstrInfo) { 185 return computeKnownBits( 186 V, DemandedElts, Depth, 187 SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); 188 } 189 190 static bool haveNoCommonBitsSetSpecialCases(const Value *LHS, const Value *RHS, 191 const SimplifyQuery &SQ) { 192 // Look for an inverted mask: (X & ~M) op (Y & M). 193 { 194 Value *M; 195 if (match(LHS, m_c_And(m_Not(m_Value(M)), m_Value())) && 196 match(RHS, m_c_And(m_Specific(M), m_Value())) && 197 isGuaranteedNotToBeUndef(M, SQ.AC, SQ.CxtI, SQ.DT)) 198 return true; 199 } 200 201 // X op (Y & ~X) 202 if (match(RHS, m_c_And(m_Not(m_Specific(LHS)), m_Value())) && 203 isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT)) 204 return true; 205 206 // X op ((X & Y) ^ Y) -- this is the canonical form of the previous pattern 207 // for constant Y. 208 Value *Y; 209 if (match(RHS, 210 m_c_Xor(m_c_And(m_Specific(LHS), m_Value(Y)), m_Deferred(Y))) && 211 isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT) && 212 isGuaranteedNotToBeUndef(Y, SQ.AC, SQ.CxtI, SQ.DT)) 213 return true; 214 215 // Peek through extends to find a 'not' of the other side: 216 // (ext Y) op ext(~Y) 217 if (match(LHS, m_ZExtOrSExt(m_Value(Y))) && 218 match(RHS, m_ZExtOrSExt(m_Not(m_Specific(Y)))) && 219 isGuaranteedNotToBeUndef(Y, SQ.AC, SQ.CxtI, SQ.DT)) 220 return true; 221 222 // Look for: (A & B) op ~(A | B) 223 { 224 Value *A, *B; 225 if (match(LHS, m_And(m_Value(A), m_Value(B))) && 226 match(RHS, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))) && 227 isGuaranteedNotToBeUndef(A, SQ.AC, SQ.CxtI, SQ.DT) && 228 isGuaranteedNotToBeUndef(B, SQ.AC, SQ.CxtI, SQ.DT)) 229 return true; 230 } 231 232 return false; 233 } 234 235 bool llvm::haveNoCommonBitsSet(const WithCache<const Value *> &LHSCache, 236 const WithCache<const Value *> &RHSCache, 237 const SimplifyQuery &SQ) { 238 const Value *LHS = LHSCache.getValue(); 239 const Value *RHS = RHSCache.getValue(); 240 241 assert(LHS->getType() == RHS->getType() && 242 "LHS and RHS should have the same type"); 243 assert(LHS->getType()->isIntOrIntVectorTy() && 244 "LHS and RHS should be integers"); 245 246 if (haveNoCommonBitsSetSpecialCases(LHS, RHS, SQ) || 247 haveNoCommonBitsSetSpecialCases(RHS, LHS, SQ)) 248 return true; 249 250 return KnownBits::haveNoCommonBitsSet(LHSCache.getKnownBits(SQ), 251 RHSCache.getKnownBits(SQ)); 252 } 253 254 bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *I) { 255 return !I->user_empty() && all_of(I->users(), [](const User *U) { 256 ICmpInst::Predicate P; 257 return match(U, m_ICmp(P, m_Value(), m_Zero())) && ICmpInst::isEquality(P); 258 }); 259 } 260 261 static bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, 262 const SimplifyQuery &Q); 263 264 bool llvm::isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, 265 bool OrZero, unsigned Depth, 266 AssumptionCache *AC, const Instruction *CxtI, 267 const DominatorTree *DT, bool UseInstrInfo) { 268 return ::isKnownToBeAPowerOfTwo( 269 V, OrZero, Depth, 270 SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); 271 } 272 273 static bool isKnownNonZero(const Value *V, const APInt &DemandedElts, 274 unsigned Depth, const SimplifyQuery &Q); 275 276 static bool isKnownNonZero(const Value *V, unsigned Depth, 277 const SimplifyQuery &Q); 278 279 bool llvm::isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth, 280 AssumptionCache *AC, const Instruction *CxtI, 281 const DominatorTree *DT, bool UseInstrInfo) { 282 return ::isKnownNonZero( 283 V, Depth, SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); 284 } 285 286 bool llvm::isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, 287 unsigned Depth) { 288 return computeKnownBits(V, Depth, SQ).isNonNegative(); 289 } 290 291 bool llvm::isKnownPositive(const Value *V, const SimplifyQuery &SQ, 292 unsigned Depth) { 293 if (auto *CI = dyn_cast<ConstantInt>(V)) 294 return CI->getValue().isStrictlyPositive(); 295 296 // TODO: We'd doing two recursive queries here. We should factor this such 297 // that only a single query is needed. 298 return isKnownNonNegative(V, SQ, Depth) && ::isKnownNonZero(V, Depth, SQ); 299 } 300 301 bool llvm::isKnownNegative(const Value *V, const SimplifyQuery &SQ, 302 unsigned Depth) { 303 return computeKnownBits(V, Depth, SQ).isNegative(); 304 } 305 306 static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth, 307 const SimplifyQuery &Q); 308 309 bool llvm::isKnownNonEqual(const Value *V1, const Value *V2, 310 const DataLayout &DL, AssumptionCache *AC, 311 const Instruction *CxtI, const DominatorTree *DT, 312 bool UseInstrInfo) { 313 return ::isKnownNonEqual( 314 V1, V2, 0, 315 SimplifyQuery(DL, DT, AC, safeCxtI(V2, V1, CxtI), UseInstrInfo)); 316 } 317 318 bool llvm::MaskedValueIsZero(const Value *V, const APInt &Mask, 319 const SimplifyQuery &SQ, unsigned Depth) { 320 KnownBits Known(Mask.getBitWidth()); 321 computeKnownBits(V, Known, Depth, SQ); 322 return Mask.isSubsetOf(Known.Zero); 323 } 324 325 static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts, 326 unsigned Depth, const SimplifyQuery &Q); 327 328 static unsigned ComputeNumSignBits(const Value *V, unsigned Depth, 329 const SimplifyQuery &Q) { 330 auto *FVTy = dyn_cast<FixedVectorType>(V->getType()); 331 APInt DemandedElts = 332 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1); 333 return ComputeNumSignBits(V, DemandedElts, Depth, Q); 334 } 335 336 unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL, 337 unsigned Depth, AssumptionCache *AC, 338 const Instruction *CxtI, 339 const DominatorTree *DT, bool UseInstrInfo) { 340 return ::ComputeNumSignBits( 341 V, Depth, SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); 342 } 343 344 unsigned llvm::ComputeMaxSignificantBits(const Value *V, const DataLayout &DL, 345 unsigned Depth, AssumptionCache *AC, 346 const Instruction *CxtI, 347 const DominatorTree *DT) { 348 unsigned SignBits = ComputeNumSignBits(V, DL, Depth, AC, CxtI, DT); 349 return V->getType()->getScalarSizeInBits() - SignBits + 1; 350 } 351 352 static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1, 353 bool NSW, const APInt &DemandedElts, 354 KnownBits &KnownOut, KnownBits &Known2, 355 unsigned Depth, const SimplifyQuery &Q) { 356 computeKnownBits(Op1, DemandedElts, KnownOut, Depth + 1, Q); 357 358 // If one operand is unknown and we have no nowrap information, 359 // the result will be unknown independently of the second operand. 360 if (KnownOut.isUnknown() && !NSW) 361 return; 362 363 computeKnownBits(Op0, DemandedElts, Known2, Depth + 1, Q); 364 KnownOut = KnownBits::computeForAddSub(Add, NSW, Known2, KnownOut); 365 } 366 367 static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW, 368 const APInt &DemandedElts, KnownBits &Known, 369 KnownBits &Known2, unsigned Depth, 370 const SimplifyQuery &Q) { 371 computeKnownBits(Op1, DemandedElts, Known, Depth + 1, Q); 372 computeKnownBits(Op0, DemandedElts, Known2, Depth + 1, Q); 373 374 bool isKnownNegative = false; 375 bool isKnownNonNegative = false; 376 // If the multiplication is known not to overflow, compute the sign bit. 377 if (NSW) { 378 if (Op0 == Op1) { 379 // The product of a number with itself is non-negative. 380 isKnownNonNegative = true; 381 } else { 382 bool isKnownNonNegativeOp1 = Known.isNonNegative(); 383 bool isKnownNonNegativeOp0 = Known2.isNonNegative(); 384 bool isKnownNegativeOp1 = Known.isNegative(); 385 bool isKnownNegativeOp0 = Known2.isNegative(); 386 // The product of two numbers with the same sign is non-negative. 387 isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) || 388 (isKnownNonNegativeOp1 && isKnownNonNegativeOp0); 389 // The product of a negative number and a non-negative number is either 390 // negative or zero. 391 if (!isKnownNonNegative) 392 isKnownNegative = 393 (isKnownNegativeOp1 && isKnownNonNegativeOp0 && 394 Known2.isNonZero()) || 395 (isKnownNegativeOp0 && isKnownNonNegativeOp1 && Known.isNonZero()); 396 } 397 } 398 399 bool SelfMultiply = Op0 == Op1; 400 if (SelfMultiply) 401 SelfMultiply &= 402 isGuaranteedNotToBeUndef(Op0, Q.AC, Q.CxtI, Q.DT, Depth + 1); 403 Known = KnownBits::mul(Known, Known2, SelfMultiply); 404 405 // Only make use of no-wrap flags if we failed to compute the sign bit 406 // directly. This matters if the multiplication always overflows, in 407 // which case we prefer to follow the result of the direct computation, 408 // though as the program is invoking undefined behaviour we can choose 409 // whatever we like here. 410 if (isKnownNonNegative && !Known.isNegative()) 411 Known.makeNonNegative(); 412 else if (isKnownNegative && !Known.isNonNegative()) 413 Known.makeNegative(); 414 } 415 416 void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges, 417 KnownBits &Known) { 418 unsigned BitWidth = Known.getBitWidth(); 419 unsigned NumRanges = Ranges.getNumOperands() / 2; 420 assert(NumRanges >= 1); 421 422 Known.Zero.setAllBits(); 423 Known.One.setAllBits(); 424 425 for (unsigned i = 0; i < NumRanges; ++i) { 426 ConstantInt *Lower = 427 mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0)); 428 ConstantInt *Upper = 429 mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1)); 430 ConstantRange Range(Lower->getValue(), Upper->getValue()); 431 432 // The first CommonPrefixBits of all values in Range are equal. 433 unsigned CommonPrefixBits = 434 (Range.getUnsignedMax() ^ Range.getUnsignedMin()).countl_zero(); 435 APInt Mask = APInt::getHighBitsSet(BitWidth, CommonPrefixBits); 436 APInt UnsignedMax = Range.getUnsignedMax().zextOrTrunc(BitWidth); 437 Known.One &= UnsignedMax & Mask; 438 Known.Zero &= ~UnsignedMax & Mask; 439 } 440 } 441 442 static bool isEphemeralValueOf(const Instruction *I, const Value *E) { 443 SmallVector<const Value *, 16> WorkSet(1, I); 444 SmallPtrSet<const Value *, 32> Visited; 445 SmallPtrSet<const Value *, 16> EphValues; 446 447 // The instruction defining an assumption's condition itself is always 448 // considered ephemeral to that assumption (even if it has other 449 // non-ephemeral users). See r246696's test case for an example. 450 if (is_contained(I->operands(), E)) 451 return true; 452 453 while (!WorkSet.empty()) { 454 const Value *V = WorkSet.pop_back_val(); 455 if (!Visited.insert(V).second) 456 continue; 457 458 // If all uses of this value are ephemeral, then so is this value. 459 if (llvm::all_of(V->users(), [&](const User *U) { 460 return EphValues.count(U); 461 })) { 462 if (V == E) 463 return true; 464 465 if (V == I || (isa<Instruction>(V) && 466 !cast<Instruction>(V)->mayHaveSideEffects() && 467 !cast<Instruction>(V)->isTerminator())) { 468 EphValues.insert(V); 469 if (const User *U = dyn_cast<User>(V)) 470 append_range(WorkSet, U->operands()); 471 } 472 } 473 } 474 475 return false; 476 } 477 478 // Is this an intrinsic that cannot be speculated but also cannot trap? 479 bool llvm::isAssumeLikeIntrinsic(const Instruction *I) { 480 if (const IntrinsicInst *CI = dyn_cast<IntrinsicInst>(I)) 481 return CI->isAssumeLikeIntrinsic(); 482 483 return false; 484 } 485 486 bool llvm::isValidAssumeForContext(const Instruction *Inv, 487 const Instruction *CxtI, 488 const DominatorTree *DT, 489 bool AllowEphemerals) { 490 // There are two restrictions on the use of an assume: 491 // 1. The assume must dominate the context (or the control flow must 492 // reach the assume whenever it reaches the context). 493 // 2. The context must not be in the assume's set of ephemeral values 494 // (otherwise we will use the assume to prove that the condition 495 // feeding the assume is trivially true, thus causing the removal of 496 // the assume). 497 498 if (Inv->getParent() == CxtI->getParent()) { 499 // If Inv and CtxI are in the same block, check if the assume (Inv) is first 500 // in the BB. 501 if (Inv->comesBefore(CxtI)) 502 return true; 503 504 // Don't let an assume affect itself - this would cause the problems 505 // `isEphemeralValueOf` is trying to prevent, and it would also make 506 // the loop below go out of bounds. 507 if (!AllowEphemerals && Inv == CxtI) 508 return false; 509 510 // The context comes first, but they're both in the same block. 511 // Make sure there is nothing in between that might interrupt 512 // the control flow, not even CxtI itself. 513 // We limit the scan distance between the assume and its context instruction 514 // to avoid a compile-time explosion. This limit is chosen arbitrarily, so 515 // it can be adjusted if needed (could be turned into a cl::opt). 516 auto Range = make_range(CxtI->getIterator(), Inv->getIterator()); 517 if (!isGuaranteedToTransferExecutionToSuccessor(Range, 15)) 518 return false; 519 520 return AllowEphemerals || !isEphemeralValueOf(Inv, CxtI); 521 } 522 523 // Inv and CxtI are in different blocks. 524 if (DT) { 525 if (DT->dominates(Inv, CxtI)) 526 return true; 527 } else if (Inv->getParent() == CxtI->getParent()->getSinglePredecessor()) { 528 // We don't have a DT, but this trivially dominates. 529 return true; 530 } 531 532 return false; 533 } 534 535 // TODO: cmpExcludesZero misses many cases where `RHS` is non-constant but 536 // we still have enough information about `RHS` to conclude non-zero. For 537 // example Pred=EQ, RHS=isKnownNonZero. cmpExcludesZero is called in loops 538 // so the extra compile time may not be worth it, but possibly a second API 539 // should be created for use outside of loops. 540 static bool cmpExcludesZero(CmpInst::Predicate Pred, const Value *RHS) { 541 // v u> y implies v != 0. 542 if (Pred == ICmpInst::ICMP_UGT) 543 return true; 544 545 // Special-case v != 0 to also handle v != null. 546 if (Pred == ICmpInst::ICMP_NE) 547 return match(RHS, m_Zero()); 548 549 // All other predicates - rely on generic ConstantRange handling. 550 const APInt *C; 551 auto Zero = APInt::getZero(RHS->getType()->getScalarSizeInBits()); 552 if (match(RHS, m_APInt(C))) { 553 ConstantRange TrueValues = ConstantRange::makeExactICmpRegion(Pred, *C); 554 return !TrueValues.contains(Zero); 555 } 556 557 auto *VC = dyn_cast<ConstantDataVector>(RHS); 558 if (VC == nullptr) 559 return false; 560 561 for (unsigned ElemIdx = 0, NElem = VC->getNumElements(); ElemIdx < NElem; 562 ++ElemIdx) { 563 ConstantRange TrueValues = ConstantRange::makeExactICmpRegion( 564 Pred, VC->getElementAsAPInt(ElemIdx)); 565 if (TrueValues.contains(Zero)) 566 return false; 567 } 568 return true; 569 } 570 571 static bool isKnownNonZeroFromAssume(const Value *V, const SimplifyQuery &Q) { 572 // Use of assumptions is context-sensitive. If we don't have a context, we 573 // cannot use them! 574 if (!Q.AC || !Q.CxtI) 575 return false; 576 577 for (AssumptionCache::ResultElem &Elem : Q.AC->assumptionsFor(V)) { 578 if (!Elem.Assume) 579 continue; 580 581 AssumeInst *I = cast<AssumeInst>(Elem.Assume); 582 assert(I->getFunction() == Q.CxtI->getFunction() && 583 "Got assumption for the wrong function!"); 584 585 if (Elem.Index != AssumptionCache::ExprResultIdx) { 586 if (!V->getType()->isPointerTy()) 587 continue; 588 if (RetainedKnowledge RK = getKnowledgeFromBundle( 589 *I, I->bundle_op_info_begin()[Elem.Index])) { 590 if (RK.WasOn == V && 591 (RK.AttrKind == Attribute::NonNull || 592 (RK.AttrKind == Attribute::Dereferenceable && 593 !NullPointerIsDefined(Q.CxtI->getFunction(), 594 V->getType()->getPointerAddressSpace()))) && 595 isValidAssumeForContext(I, Q.CxtI, Q.DT)) 596 return true; 597 } 598 continue; 599 } 600 601 // Warning: This loop can end up being somewhat performance sensitive. 602 // We're running this loop for once for each value queried resulting in a 603 // runtime of ~O(#assumes * #values). 604 605 Value *RHS; 606 CmpInst::Predicate Pred; 607 auto m_V = m_CombineOr(m_Specific(V), m_PtrToInt(m_Specific(V))); 608 if (!match(I->getArgOperand(0), m_c_ICmp(Pred, m_V, m_Value(RHS)))) 609 return false; 610 611 if (cmpExcludesZero(Pred, RHS) && isValidAssumeForContext(I, Q.CxtI, Q.DT)) 612 return true; 613 } 614 615 return false; 616 } 617 618 static void computeKnownBitsFromCmp(const Value *V, CmpInst::Predicate Pred, 619 Value *LHS, Value *RHS, KnownBits &Known, 620 const SimplifyQuery &Q) { 621 if (RHS->getType()->isPointerTy()) { 622 // Handle comparison of pointer to null explicitly, as it will not be 623 // covered by the m_APInt() logic below. 624 if (LHS == V && match(RHS, m_Zero())) { 625 switch (Pred) { 626 case ICmpInst::ICMP_EQ: 627 Known.setAllZero(); 628 break; 629 case ICmpInst::ICMP_SGE: 630 case ICmpInst::ICMP_SGT: 631 Known.makeNonNegative(); 632 break; 633 case ICmpInst::ICMP_SLT: 634 Known.makeNegative(); 635 break; 636 default: 637 break; 638 } 639 } 640 return; 641 } 642 643 unsigned BitWidth = Known.getBitWidth(); 644 auto m_V = 645 m_CombineOr(m_Specific(V), m_PtrToIntSameSize(Q.DL, m_Specific(V))); 646 647 const APInt *Mask, *C; 648 uint64_t ShAmt; 649 switch (Pred) { 650 case ICmpInst::ICMP_EQ: 651 // assume(V = C) 652 if (match(LHS, m_V) && match(RHS, m_APInt(C))) { 653 Known = Known.unionWith(KnownBits::makeConstant(*C)); 654 // assume(V & Mask = C) 655 } else if (match(LHS, m_And(m_V, m_APInt(Mask))) && 656 match(RHS, m_APInt(C))) { 657 // For one bits in Mask, we can propagate bits from C to V. 658 Known.Zero |= ~*C & *Mask; 659 Known.One |= *C & *Mask; 660 // assume(V | Mask = C) 661 } else if (match(LHS, m_Or(m_V, m_APInt(Mask))) && match(RHS, m_APInt(C))) { 662 // For zero bits in Mask, we can propagate bits from C to V. 663 Known.Zero |= ~*C & ~*Mask; 664 Known.One |= *C & ~*Mask; 665 // assume(V ^ Mask = C) 666 } else if (match(LHS, m_Xor(m_V, m_APInt(Mask))) && 667 match(RHS, m_APInt(C))) { 668 // Equivalent to assume(V == Mask ^ C) 669 Known = Known.unionWith(KnownBits::makeConstant(*C ^ *Mask)); 670 // assume(V << ShAmt = C) 671 } else if (match(LHS, m_Shl(m_V, m_ConstantInt(ShAmt))) && 672 match(RHS, m_APInt(C)) && ShAmt < BitWidth) { 673 // For those bits in C that are known, we can propagate them to known 674 // bits in V shifted to the right by ShAmt. 675 KnownBits RHSKnown = KnownBits::makeConstant(*C); 676 RHSKnown.Zero.lshrInPlace(ShAmt); 677 RHSKnown.One.lshrInPlace(ShAmt); 678 Known = Known.unionWith(RHSKnown); 679 // assume(V >> ShAmt = C) 680 } else if (match(LHS, m_Shr(m_V, m_ConstantInt(ShAmt))) && 681 match(RHS, m_APInt(C)) && ShAmt < BitWidth) { 682 KnownBits RHSKnown = KnownBits::makeConstant(*C); 683 // For those bits in RHS that are known, we can propagate them to known 684 // bits in V shifted to the right by C. 685 Known.Zero |= RHSKnown.Zero << ShAmt; 686 Known.One |= RHSKnown.One << ShAmt; 687 } 688 break; 689 case ICmpInst::ICMP_NE: { 690 // assume (V & B != 0) where B is a power of 2 691 const APInt *BPow2; 692 if (match(LHS, m_And(m_V, m_Power2(BPow2))) && match(RHS, m_Zero())) 693 Known.One |= *BPow2; 694 break; 695 } 696 default: 697 const APInt *Offset = nullptr; 698 if (match(LHS, m_CombineOr(m_V, m_Add(m_V, m_APInt(Offset)))) && 699 match(RHS, m_APInt(C))) { 700 ConstantRange LHSRange = ConstantRange::makeAllowedICmpRegion(Pred, *C); 701 if (Offset) 702 LHSRange = LHSRange.sub(*Offset); 703 Known = Known.unionWith(LHSRange.toKnownBits()); 704 } 705 break; 706 } 707 } 708 709 void llvm::computeKnownBitsFromContext(const Value *V, KnownBits &Known, 710 unsigned Depth, const SimplifyQuery &Q) { 711 if (!Q.CxtI) 712 return; 713 714 if (Q.DC && Q.DT) { 715 // Handle dominating conditions. 716 for (BranchInst *BI : Q.DC->conditionsFor(V)) { 717 auto *Cmp = dyn_cast<ICmpInst>(BI->getCondition()); 718 if (!Cmp) 719 continue; 720 721 BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(0)); 722 if (Q.DT->dominates(Edge0, Q.CxtI->getParent())) 723 computeKnownBitsFromCmp(V, Cmp->getPredicate(), Cmp->getOperand(0), 724 Cmp->getOperand(1), Known, Q); 725 726 BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(1)); 727 if (Q.DT->dominates(Edge1, Q.CxtI->getParent())) 728 computeKnownBitsFromCmp(V, Cmp->getInversePredicate(), 729 Cmp->getOperand(0), Cmp->getOperand(1), Known, 730 Q); 731 } 732 733 if (Known.hasConflict()) 734 Known.resetAll(); 735 } 736 737 if (!Q.AC) 738 return; 739 740 unsigned BitWidth = Known.getBitWidth(); 741 742 // Note that the patterns below need to be kept in sync with the code 743 // in AssumptionCache::updateAffectedValues. 744 745 for (AssumptionCache::ResultElem &Elem : Q.AC->assumptionsFor(V)) { 746 if (!Elem.Assume) 747 continue; 748 749 AssumeInst *I = cast<AssumeInst>(Elem.Assume); 750 assert(I->getParent()->getParent() == Q.CxtI->getParent()->getParent() && 751 "Got assumption for the wrong function!"); 752 753 if (Elem.Index != AssumptionCache::ExprResultIdx) { 754 if (!V->getType()->isPointerTy()) 755 continue; 756 if (RetainedKnowledge RK = getKnowledgeFromBundle( 757 *I, I->bundle_op_info_begin()[Elem.Index])) { 758 if (RK.WasOn == V && RK.AttrKind == Attribute::Alignment && 759 isPowerOf2_64(RK.ArgValue) && 760 isValidAssumeForContext(I, Q.CxtI, Q.DT)) 761 Known.Zero.setLowBits(Log2_64(RK.ArgValue)); 762 } 763 continue; 764 } 765 766 // Warning: This loop can end up being somewhat performance sensitive. 767 // We're running this loop for once for each value queried resulting in a 768 // runtime of ~O(#assumes * #values). 769 770 Value *Arg = I->getArgOperand(0); 771 772 if (Arg == V && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { 773 assert(BitWidth == 1 && "assume operand is not i1?"); 774 (void)BitWidth; 775 Known.setAllOnes(); 776 return; 777 } 778 if (match(Arg, m_Not(m_Specific(V))) && 779 isValidAssumeForContext(I, Q.CxtI, Q.DT)) { 780 assert(BitWidth == 1 && "assume operand is not i1?"); 781 (void)BitWidth; 782 Known.setAllZero(); 783 return; 784 } 785 786 // The remaining tests are all recursive, so bail out if we hit the limit. 787 if (Depth == MaxAnalysisRecursionDepth) 788 continue; 789 790 ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg); 791 if (!Cmp) 792 continue; 793 794 if (!isValidAssumeForContext(I, Q.CxtI, Q.DT)) 795 continue; 796 797 computeKnownBitsFromCmp(V, Cmp->getPredicate(), Cmp->getOperand(0), 798 Cmp->getOperand(1), Known, Q); 799 } 800 801 // Conflicting assumption: Undefined behavior will occur on this execution 802 // path. 803 if (Known.hasConflict()) 804 Known.resetAll(); 805 } 806 807 /// Compute known bits from a shift operator, including those with a 808 /// non-constant shift amount. Known is the output of this function. Known2 is a 809 /// pre-allocated temporary with the same bit width as Known and on return 810 /// contains the known bit of the shift value source. KF is an 811 /// operator-specific function that, given the known-bits and a shift amount, 812 /// compute the implied known-bits of the shift operator's result respectively 813 /// for that shift amount. The results from calling KF are conservatively 814 /// combined for all permitted shift amounts. 815 static void computeKnownBitsFromShiftOperator( 816 const Operator *I, const APInt &DemandedElts, KnownBits &Known, 817 KnownBits &Known2, unsigned Depth, const SimplifyQuery &Q, 818 function_ref<KnownBits(const KnownBits &, const KnownBits &, bool)> KF) { 819 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); 820 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); 821 // To limit compile-time impact, only query isKnownNonZero() if we know at 822 // least something about the shift amount. 823 bool ShAmtNonZero = 824 Known.isNonZero() || 825 (Known.getMaxValue().ult(Known.getBitWidth()) && 826 isKnownNonZero(I->getOperand(1), DemandedElts, Depth + 1, Q)); 827 Known = KF(Known2, Known, ShAmtNonZero); 828 } 829 830 static KnownBits 831 getKnownBitsFromAndXorOr(const Operator *I, const APInt &DemandedElts, 832 const KnownBits &KnownLHS, const KnownBits &KnownRHS, 833 unsigned Depth, const SimplifyQuery &Q) { 834 unsigned BitWidth = KnownLHS.getBitWidth(); 835 KnownBits KnownOut(BitWidth); 836 bool IsAnd = false; 837 bool HasKnownOne = !KnownLHS.One.isZero() || !KnownRHS.One.isZero(); 838 Value *X = nullptr, *Y = nullptr; 839 840 switch (I->getOpcode()) { 841 case Instruction::And: 842 KnownOut = KnownLHS & KnownRHS; 843 IsAnd = true; 844 // and(x, -x) is common idioms that will clear all but lowest set 845 // bit. If we have a single known bit in x, we can clear all bits 846 // above it. 847 // TODO: instcombine often reassociates independent `and` which can hide 848 // this pattern. Try to match and(x, and(-x, y)) / and(and(x, y), -x). 849 if (HasKnownOne && match(I, m_c_And(m_Value(X), m_Neg(m_Deferred(X))))) { 850 // -(-x) == x so using whichever (LHS/RHS) gets us a better result. 851 if (KnownLHS.countMaxTrailingZeros() <= KnownRHS.countMaxTrailingZeros()) 852 KnownOut = KnownLHS.blsi(); 853 else 854 KnownOut = KnownRHS.blsi(); 855 } 856 break; 857 case Instruction::Or: 858 KnownOut = KnownLHS | KnownRHS; 859 break; 860 case Instruction::Xor: 861 KnownOut = KnownLHS ^ KnownRHS; 862 // xor(x, x-1) is common idioms that will clear all but lowest set 863 // bit. If we have a single known bit in x, we can clear all bits 864 // above it. 865 // TODO: xor(x, x-1) is often rewritting as xor(x, x-C) where C != 866 // -1 but for the purpose of demanded bits (xor(x, x-C) & 867 // Demanded) == (xor(x, x-1) & Demanded). Extend the xor pattern 868 // to use arbitrary C if xor(x, x-C) as the same as xor(x, x-1). 869 if (HasKnownOne && 870 match(I, m_c_Xor(m_Value(X), m_c_Add(m_Deferred(X), m_AllOnes())))) { 871 const KnownBits &XBits = I->getOperand(0) == X ? KnownLHS : KnownRHS; 872 KnownOut = XBits.blsmsk(); 873 } 874 break; 875 default: 876 llvm_unreachable("Invalid Op used in 'analyzeKnownBitsFromAndXorOr'"); 877 } 878 879 // and(x, add (x, -1)) is a common idiom that always clears the low bit; 880 // xor/or(x, add (x, -1)) is an idiom that will always set the low bit. 881 // here we handle the more general case of adding any odd number by 882 // matching the form and/xor/or(x, add(x, y)) where y is odd. 883 // TODO: This could be generalized to clearing any bit set in y where the 884 // following bit is known to be unset in y. 885 if (!KnownOut.Zero[0] && !KnownOut.One[0] && 886 (match(I, m_c_BinOp(m_Value(X), m_c_Add(m_Deferred(X), m_Value(Y)))) || 887 match(I, m_c_BinOp(m_Value(X), m_Sub(m_Deferred(X), m_Value(Y)))) || 888 match(I, m_c_BinOp(m_Value(X), m_Sub(m_Value(Y), m_Deferred(X)))))) { 889 KnownBits KnownY(BitWidth); 890 computeKnownBits(Y, DemandedElts, KnownY, Depth + 1, Q); 891 if (KnownY.countMinTrailingOnes() > 0) { 892 if (IsAnd) 893 KnownOut.Zero.setBit(0); 894 else 895 KnownOut.One.setBit(0); 896 } 897 } 898 return KnownOut; 899 } 900 901 // Public so this can be used in `SimplifyDemandedUseBits`. 902 KnownBits llvm::analyzeKnownBitsFromAndXorOr(const Operator *I, 903 const KnownBits &KnownLHS, 904 const KnownBits &KnownRHS, 905 unsigned Depth, 906 const SimplifyQuery &SQ) { 907 auto *FVTy = dyn_cast<FixedVectorType>(I->getType()); 908 APInt DemandedElts = 909 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1); 910 911 return getKnownBitsFromAndXorOr(I, DemandedElts, KnownLHS, KnownRHS, Depth, 912 SQ); 913 } 914 915 ConstantRange llvm::getVScaleRange(const Function *F, unsigned BitWidth) { 916 Attribute Attr = F->getFnAttribute(Attribute::VScaleRange); 917 // Without vscale_range, we only know that vscale is non-zero. 918 if (!Attr.isValid()) 919 return ConstantRange(APInt(BitWidth, 1), APInt::getZero(BitWidth)); 920 921 unsigned AttrMin = Attr.getVScaleRangeMin(); 922 // Minimum is larger than vscale width, result is always poison. 923 if ((unsigned)llvm::bit_width(AttrMin) > BitWidth) 924 return ConstantRange::getEmpty(BitWidth); 925 926 APInt Min(BitWidth, AttrMin); 927 std::optional<unsigned> AttrMax = Attr.getVScaleRangeMax(); 928 if (!AttrMax || (unsigned)llvm::bit_width(*AttrMax) > BitWidth) 929 return ConstantRange(Min, APInt::getZero(BitWidth)); 930 931 return ConstantRange(Min, APInt(BitWidth, *AttrMax) + 1); 932 } 933 934 static void computeKnownBitsFromOperator(const Operator *I, 935 const APInt &DemandedElts, 936 KnownBits &Known, unsigned Depth, 937 const SimplifyQuery &Q) { 938 unsigned BitWidth = Known.getBitWidth(); 939 940 KnownBits Known2(BitWidth); 941 switch (I->getOpcode()) { 942 default: break; 943 case Instruction::Load: 944 if (MDNode *MD = 945 Q.IIQ.getMetadata(cast<LoadInst>(I), LLVMContext::MD_range)) 946 computeKnownBitsFromRangeMetadata(*MD, Known); 947 break; 948 case Instruction::And: 949 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); 950 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); 951 952 Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Depth, Q); 953 break; 954 case Instruction::Or: 955 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); 956 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); 957 958 Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Depth, Q); 959 break; 960 case Instruction::Xor: 961 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); 962 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); 963 964 Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Depth, Q); 965 break; 966 case Instruction::Mul: { 967 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I)); 968 computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, DemandedElts, 969 Known, Known2, Depth, Q); 970 break; 971 } 972 case Instruction::UDiv: { 973 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 974 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 975 Known = 976 KnownBits::udiv(Known, Known2, Q.IIQ.isExact(cast<BinaryOperator>(I))); 977 break; 978 } 979 case Instruction::SDiv: { 980 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 981 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 982 Known = 983 KnownBits::sdiv(Known, Known2, Q.IIQ.isExact(cast<BinaryOperator>(I))); 984 break; 985 } 986 case Instruction::Select: { 987 computeKnownBits(I->getOperand(2), Known, Depth + 1, Q); 988 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 989 990 // Only known if known in both the LHS and RHS. 991 Known = Known.intersectWith(Known2); 992 break; 993 } 994 case Instruction::FPTrunc: 995 case Instruction::FPExt: 996 case Instruction::FPToUI: 997 case Instruction::FPToSI: 998 case Instruction::SIToFP: 999 case Instruction::UIToFP: 1000 break; // Can't work with floating point. 1001 case Instruction::PtrToInt: 1002 case Instruction::IntToPtr: 1003 // Fall through and handle them the same as zext/trunc. 1004 [[fallthrough]]; 1005 case Instruction::ZExt: 1006 case Instruction::Trunc: { 1007 Type *SrcTy = I->getOperand(0)->getType(); 1008 1009 unsigned SrcBitWidth; 1010 // Note that we handle pointer operands here because of inttoptr/ptrtoint 1011 // which fall through here. 1012 Type *ScalarTy = SrcTy->getScalarType(); 1013 SrcBitWidth = ScalarTy->isPointerTy() ? 1014 Q.DL.getPointerTypeSizeInBits(ScalarTy) : 1015 Q.DL.getTypeSizeInBits(ScalarTy); 1016 1017 assert(SrcBitWidth && "SrcBitWidth can't be zero"); 1018 Known = Known.anyextOrTrunc(SrcBitWidth); 1019 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1020 if (auto *Inst = dyn_cast<PossiblyNonNegInst>(I); 1021 Inst && Inst->hasNonNeg() && !Known.isNegative()) 1022 Known.makeNonNegative(); 1023 Known = Known.zextOrTrunc(BitWidth); 1024 break; 1025 } 1026 case Instruction::BitCast: { 1027 Type *SrcTy = I->getOperand(0)->getType(); 1028 if (SrcTy->isIntOrPtrTy() && 1029 // TODO: For now, not handling conversions like: 1030 // (bitcast i64 %x to <2 x i32>) 1031 !I->getType()->isVectorTy()) { 1032 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1033 break; 1034 } 1035 1036 // Handle cast from vector integer type to scalar or vector integer. 1037 auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcTy); 1038 if (!SrcVecTy || !SrcVecTy->getElementType()->isIntegerTy() || 1039 !I->getType()->isIntOrIntVectorTy() || 1040 isa<ScalableVectorType>(I->getType())) 1041 break; 1042 1043 // Look through a cast from narrow vector elements to wider type. 1044 // Examples: v4i32 -> v2i64, v3i8 -> v24 1045 unsigned SubBitWidth = SrcVecTy->getScalarSizeInBits(); 1046 if (BitWidth % SubBitWidth == 0) { 1047 // Known bits are automatically intersected across demanded elements of a 1048 // vector. So for example, if a bit is computed as known zero, it must be 1049 // zero across all demanded elements of the vector. 1050 // 1051 // For this bitcast, each demanded element of the output is sub-divided 1052 // across a set of smaller vector elements in the source vector. To get 1053 // the known bits for an entire element of the output, compute the known 1054 // bits for each sub-element sequentially. This is done by shifting the 1055 // one-set-bit demanded elements parameter across the sub-elements for 1056 // consecutive calls to computeKnownBits. We are using the demanded 1057 // elements parameter as a mask operator. 1058 // 1059 // The known bits of each sub-element are then inserted into place 1060 // (dependent on endian) to form the full result of known bits. 1061 unsigned NumElts = DemandedElts.getBitWidth(); 1062 unsigned SubScale = BitWidth / SubBitWidth; 1063 APInt SubDemandedElts = APInt::getZero(NumElts * SubScale); 1064 for (unsigned i = 0; i != NumElts; ++i) { 1065 if (DemandedElts[i]) 1066 SubDemandedElts.setBit(i * SubScale); 1067 } 1068 1069 KnownBits KnownSrc(SubBitWidth); 1070 for (unsigned i = 0; i != SubScale; ++i) { 1071 computeKnownBits(I->getOperand(0), SubDemandedElts.shl(i), KnownSrc, 1072 Depth + 1, Q); 1073 unsigned ShiftElt = Q.DL.isLittleEndian() ? i : SubScale - 1 - i; 1074 Known.insertBits(KnownSrc, ShiftElt * SubBitWidth); 1075 } 1076 } 1077 break; 1078 } 1079 case Instruction::SExt: { 1080 // Compute the bits in the result that are not present in the input. 1081 unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits(); 1082 1083 Known = Known.trunc(SrcBitWidth); 1084 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1085 // If the sign bit of the input is known set or clear, then we know the 1086 // top bits of the result. 1087 Known = Known.sext(BitWidth); 1088 break; 1089 } 1090 case Instruction::Shl: { 1091 bool NUW = Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(I)); 1092 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I)); 1093 auto KF = [NUW, NSW](const KnownBits &KnownVal, const KnownBits &KnownAmt, 1094 bool ShAmtNonZero) { 1095 return KnownBits::shl(KnownVal, KnownAmt, NUW, NSW, ShAmtNonZero); 1096 }; 1097 computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q, 1098 KF); 1099 // Trailing zeros of a right-shifted constant never decrease. 1100 const APInt *C; 1101 if (match(I->getOperand(0), m_APInt(C))) 1102 Known.Zero.setLowBits(C->countr_zero()); 1103 break; 1104 } 1105 case Instruction::LShr: { 1106 auto KF = [](const KnownBits &KnownVal, const KnownBits &KnownAmt, 1107 bool ShAmtNonZero) { 1108 return KnownBits::lshr(KnownVal, KnownAmt, ShAmtNonZero); 1109 }; 1110 computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q, 1111 KF); 1112 // Leading zeros of a left-shifted constant never decrease. 1113 const APInt *C; 1114 if (match(I->getOperand(0), m_APInt(C))) 1115 Known.Zero.setHighBits(C->countl_zero()); 1116 break; 1117 } 1118 case Instruction::AShr: { 1119 auto KF = [](const KnownBits &KnownVal, const KnownBits &KnownAmt, 1120 bool ShAmtNonZero) { 1121 return KnownBits::ashr(KnownVal, KnownAmt, ShAmtNonZero); 1122 }; 1123 computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q, 1124 KF); 1125 break; 1126 } 1127 case Instruction::Sub: { 1128 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I)); 1129 computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, 1130 DemandedElts, Known, Known2, Depth, Q); 1131 break; 1132 } 1133 case Instruction::Add: { 1134 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I)); 1135 computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW, 1136 DemandedElts, Known, Known2, Depth, Q); 1137 break; 1138 } 1139 case Instruction::SRem: 1140 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1141 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1142 Known = KnownBits::srem(Known, Known2); 1143 break; 1144 1145 case Instruction::URem: 1146 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1147 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1148 Known = KnownBits::urem(Known, Known2); 1149 break; 1150 case Instruction::Alloca: 1151 Known.Zero.setLowBits(Log2(cast<AllocaInst>(I)->getAlign())); 1152 break; 1153 case Instruction::GetElementPtr: { 1154 // Analyze all of the subscripts of this getelementptr instruction 1155 // to determine if we can prove known low zero bits. 1156 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1157 // Accumulate the constant indices in a separate variable 1158 // to minimize the number of calls to computeForAddSub. 1159 APInt AccConstIndices(BitWidth, 0, /*IsSigned*/ true); 1160 1161 gep_type_iterator GTI = gep_type_begin(I); 1162 for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) { 1163 // TrailZ can only become smaller, short-circuit if we hit zero. 1164 if (Known.isUnknown()) 1165 break; 1166 1167 Value *Index = I->getOperand(i); 1168 1169 // Handle case when index is zero. 1170 Constant *CIndex = dyn_cast<Constant>(Index); 1171 if (CIndex && CIndex->isZeroValue()) 1172 continue; 1173 1174 if (StructType *STy = GTI.getStructTypeOrNull()) { 1175 // Handle struct member offset arithmetic. 1176 1177 assert(CIndex && 1178 "Access to structure field must be known at compile time"); 1179 1180 if (CIndex->getType()->isVectorTy()) 1181 Index = CIndex->getSplatValue(); 1182 1183 unsigned Idx = cast<ConstantInt>(Index)->getZExtValue(); 1184 const StructLayout *SL = Q.DL.getStructLayout(STy); 1185 uint64_t Offset = SL->getElementOffset(Idx); 1186 AccConstIndices += Offset; 1187 continue; 1188 } 1189 1190 // Handle array index arithmetic. 1191 Type *IndexedTy = GTI.getIndexedType(); 1192 if (!IndexedTy->isSized()) { 1193 Known.resetAll(); 1194 break; 1195 } 1196 1197 unsigned IndexBitWidth = Index->getType()->getScalarSizeInBits(); 1198 KnownBits IndexBits(IndexBitWidth); 1199 computeKnownBits(Index, IndexBits, Depth + 1, Q); 1200 TypeSize IndexTypeSize = GTI.getSequentialElementStride(Q.DL); 1201 uint64_t TypeSizeInBytes = IndexTypeSize.getKnownMinValue(); 1202 KnownBits ScalingFactor(IndexBitWidth); 1203 // Multiply by current sizeof type. 1204 // &A[i] == A + i * sizeof(*A[i]). 1205 if (IndexTypeSize.isScalable()) { 1206 // For scalable types the only thing we know about sizeof is 1207 // that this is a multiple of the minimum size. 1208 ScalingFactor.Zero.setLowBits(llvm::countr_zero(TypeSizeInBytes)); 1209 } else if (IndexBits.isConstant()) { 1210 APInt IndexConst = IndexBits.getConstant(); 1211 APInt ScalingFactor(IndexBitWidth, TypeSizeInBytes); 1212 IndexConst *= ScalingFactor; 1213 AccConstIndices += IndexConst.sextOrTrunc(BitWidth); 1214 continue; 1215 } else { 1216 ScalingFactor = 1217 KnownBits::makeConstant(APInt(IndexBitWidth, TypeSizeInBytes)); 1218 } 1219 IndexBits = KnownBits::mul(IndexBits, ScalingFactor); 1220 1221 // If the offsets have a different width from the pointer, according 1222 // to the language reference we need to sign-extend or truncate them 1223 // to the width of the pointer. 1224 IndexBits = IndexBits.sextOrTrunc(BitWidth); 1225 1226 // Note that inbounds does *not* guarantee nsw for the addition, as only 1227 // the offset is signed, while the base address is unsigned. 1228 Known = KnownBits::computeForAddSub( 1229 /*Add=*/true, /*NSW=*/false, Known, IndexBits); 1230 } 1231 if (!Known.isUnknown() && !AccConstIndices.isZero()) { 1232 KnownBits Index = KnownBits::makeConstant(AccConstIndices); 1233 Known = KnownBits::computeForAddSub( 1234 /*Add=*/true, /*NSW=*/false, Known, Index); 1235 } 1236 break; 1237 } 1238 case Instruction::PHI: { 1239 const PHINode *P = cast<PHINode>(I); 1240 BinaryOperator *BO = nullptr; 1241 Value *R = nullptr, *L = nullptr; 1242 if (matchSimpleRecurrence(P, BO, R, L)) { 1243 // Handle the case of a simple two-predecessor recurrence PHI. 1244 // There's a lot more that could theoretically be done here, but 1245 // this is sufficient to catch some interesting cases. 1246 unsigned Opcode = BO->getOpcode(); 1247 1248 // If this is a shift recurrence, we know the bits being shifted in. 1249 // We can combine that with information about the start value of the 1250 // recurrence to conclude facts about the result. 1251 if ((Opcode == Instruction::LShr || Opcode == Instruction::AShr || 1252 Opcode == Instruction::Shl) && 1253 BO->getOperand(0) == I) { 1254 1255 // We have matched a recurrence of the form: 1256 // %iv = [R, %entry], [%iv.next, %backedge] 1257 // %iv.next = shift_op %iv, L 1258 1259 // Recurse with the phi context to avoid concern about whether facts 1260 // inferred hold at original context instruction. TODO: It may be 1261 // correct to use the original context. IF warranted, explore and 1262 // add sufficient tests to cover. 1263 SimplifyQuery RecQ = Q; 1264 RecQ.CxtI = P; 1265 computeKnownBits(R, DemandedElts, Known2, Depth + 1, RecQ); 1266 switch (Opcode) { 1267 case Instruction::Shl: 1268 // A shl recurrence will only increase the tailing zeros 1269 Known.Zero.setLowBits(Known2.countMinTrailingZeros()); 1270 break; 1271 case Instruction::LShr: 1272 // A lshr recurrence will preserve the leading zeros of the 1273 // start value 1274 Known.Zero.setHighBits(Known2.countMinLeadingZeros()); 1275 break; 1276 case Instruction::AShr: 1277 // An ashr recurrence will extend the initial sign bit 1278 Known.Zero.setHighBits(Known2.countMinLeadingZeros()); 1279 Known.One.setHighBits(Known2.countMinLeadingOnes()); 1280 break; 1281 }; 1282 } 1283 1284 // Check for operations that have the property that if 1285 // both their operands have low zero bits, the result 1286 // will have low zero bits. 1287 if (Opcode == Instruction::Add || 1288 Opcode == Instruction::Sub || 1289 Opcode == Instruction::And || 1290 Opcode == Instruction::Or || 1291 Opcode == Instruction::Mul) { 1292 // Change the context instruction to the "edge" that flows into the 1293 // phi. This is important because that is where the value is actually 1294 // "evaluated" even though it is used later somewhere else. (see also 1295 // D69571). 1296 SimplifyQuery RecQ = Q; 1297 1298 unsigned OpNum = P->getOperand(0) == R ? 0 : 1; 1299 Instruction *RInst = P->getIncomingBlock(OpNum)->getTerminator(); 1300 Instruction *LInst = P->getIncomingBlock(1-OpNum)->getTerminator(); 1301 1302 // Ok, we have a PHI of the form L op= R. Check for low 1303 // zero bits. 1304 RecQ.CxtI = RInst; 1305 computeKnownBits(R, Known2, Depth + 1, RecQ); 1306 1307 // We need to take the minimum number of known bits 1308 KnownBits Known3(BitWidth); 1309 RecQ.CxtI = LInst; 1310 computeKnownBits(L, Known3, Depth + 1, RecQ); 1311 1312 Known.Zero.setLowBits(std::min(Known2.countMinTrailingZeros(), 1313 Known3.countMinTrailingZeros())); 1314 1315 auto *OverflowOp = dyn_cast<OverflowingBinaryOperator>(BO); 1316 if (OverflowOp && Q.IIQ.hasNoSignedWrap(OverflowOp)) { 1317 // If initial value of recurrence is nonnegative, and we are adding 1318 // a nonnegative number with nsw, the result can only be nonnegative 1319 // or poison value regardless of the number of times we execute the 1320 // add in phi recurrence. If initial value is negative and we are 1321 // adding a negative number with nsw, the result can only be 1322 // negative or poison value. Similar arguments apply to sub and mul. 1323 // 1324 // (add non-negative, non-negative) --> non-negative 1325 // (add negative, negative) --> negative 1326 if (Opcode == Instruction::Add) { 1327 if (Known2.isNonNegative() && Known3.isNonNegative()) 1328 Known.makeNonNegative(); 1329 else if (Known2.isNegative() && Known3.isNegative()) 1330 Known.makeNegative(); 1331 } 1332 1333 // (sub nsw non-negative, negative) --> non-negative 1334 // (sub nsw negative, non-negative) --> negative 1335 else if (Opcode == Instruction::Sub && BO->getOperand(0) == I) { 1336 if (Known2.isNonNegative() && Known3.isNegative()) 1337 Known.makeNonNegative(); 1338 else if (Known2.isNegative() && Known3.isNonNegative()) 1339 Known.makeNegative(); 1340 } 1341 1342 // (mul nsw non-negative, non-negative) --> non-negative 1343 else if (Opcode == Instruction::Mul && Known2.isNonNegative() && 1344 Known3.isNonNegative()) 1345 Known.makeNonNegative(); 1346 } 1347 1348 break; 1349 } 1350 } 1351 1352 // Unreachable blocks may have zero-operand PHI nodes. 1353 if (P->getNumIncomingValues() == 0) 1354 break; 1355 1356 // Otherwise take the unions of the known bit sets of the operands, 1357 // taking conservative care to avoid excessive recursion. 1358 if (Depth < MaxAnalysisRecursionDepth - 1 && Known.isUnknown()) { 1359 // Skip if every incoming value references to ourself. 1360 if (isa_and_nonnull<UndefValue>(P->hasConstantValue())) 1361 break; 1362 1363 Known.Zero.setAllBits(); 1364 Known.One.setAllBits(); 1365 for (unsigned u = 0, e = P->getNumIncomingValues(); u < e; ++u) { 1366 Value *IncValue = P->getIncomingValue(u); 1367 // Skip direct self references. 1368 if (IncValue == P) continue; 1369 1370 // Change the context instruction to the "edge" that flows into the 1371 // phi. This is important because that is where the value is actually 1372 // "evaluated" even though it is used later somewhere else. (see also 1373 // D69571). 1374 SimplifyQuery RecQ = Q; 1375 RecQ.CxtI = P->getIncomingBlock(u)->getTerminator(); 1376 1377 Known2 = KnownBits(BitWidth); 1378 1379 // Recurse, but cap the recursion to one level, because we don't 1380 // want to waste time spinning around in loops. 1381 // TODO: See if we can base recursion limiter on number of incoming phi 1382 // edges so we don't overly clamp analysis. 1383 computeKnownBits(IncValue, Known2, MaxAnalysisRecursionDepth - 1, RecQ); 1384 1385 // See if we can further use a conditional branch into the phi 1386 // to help us determine the range of the value. 1387 if (!Known2.isConstant()) { 1388 ICmpInst::Predicate Pred; 1389 const APInt *RHSC; 1390 BasicBlock *TrueSucc, *FalseSucc; 1391 // TODO: Use RHS Value and compute range from its known bits. 1392 if (match(RecQ.CxtI, 1393 m_Br(m_c_ICmp(Pred, m_Specific(IncValue), m_APInt(RHSC)), 1394 m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) { 1395 // Check for cases of duplicate successors. 1396 if ((TrueSucc == P->getParent()) != (FalseSucc == P->getParent())) { 1397 // If we're using the false successor, invert the predicate. 1398 if (FalseSucc == P->getParent()) 1399 Pred = CmpInst::getInversePredicate(Pred); 1400 // Get the knownbits implied by the incoming phi condition. 1401 auto CR = ConstantRange::makeExactICmpRegion(Pred, *RHSC); 1402 KnownBits KnownUnion = Known2.unionWith(CR.toKnownBits()); 1403 // We can have conflicts here if we are analyzing deadcode (its 1404 // impossible for us reach this BB based the icmp). 1405 if (KnownUnion.hasConflict()) { 1406 // No reason to continue analyzing in a known dead region, so 1407 // just resetAll and break. This will cause us to also exit the 1408 // outer loop. 1409 Known.resetAll(); 1410 break; 1411 } 1412 Known2 = KnownUnion; 1413 } 1414 } 1415 } 1416 1417 Known = Known.intersectWith(Known2); 1418 // If all bits have been ruled out, there's no need to check 1419 // more operands. 1420 if (Known.isUnknown()) 1421 break; 1422 } 1423 } 1424 break; 1425 } 1426 case Instruction::Call: 1427 case Instruction::Invoke: 1428 // If range metadata is attached to this call, set known bits from that, 1429 // and then intersect with known bits based on other properties of the 1430 // function. 1431 if (MDNode *MD = 1432 Q.IIQ.getMetadata(cast<Instruction>(I), LLVMContext::MD_range)) 1433 computeKnownBitsFromRangeMetadata(*MD, Known); 1434 if (const Value *RV = cast<CallBase>(I)->getReturnedArgOperand()) { 1435 if (RV->getType() == I->getType()) { 1436 computeKnownBits(RV, Known2, Depth + 1, Q); 1437 Known = Known.unionWith(Known2); 1438 } 1439 } 1440 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 1441 switch (II->getIntrinsicID()) { 1442 default: break; 1443 case Intrinsic::abs: { 1444 computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); 1445 bool IntMinIsPoison = match(II->getArgOperand(1), m_One()); 1446 Known = Known2.abs(IntMinIsPoison); 1447 break; 1448 } 1449 case Intrinsic::bitreverse: 1450 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); 1451 Known.Zero |= Known2.Zero.reverseBits(); 1452 Known.One |= Known2.One.reverseBits(); 1453 break; 1454 case Intrinsic::bswap: 1455 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); 1456 Known.Zero |= Known2.Zero.byteSwap(); 1457 Known.One |= Known2.One.byteSwap(); 1458 break; 1459 case Intrinsic::ctlz: { 1460 computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); 1461 // If we have a known 1, its position is our upper bound. 1462 unsigned PossibleLZ = Known2.countMaxLeadingZeros(); 1463 // If this call is poison for 0 input, the result will be less than 2^n. 1464 if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext())) 1465 PossibleLZ = std::min(PossibleLZ, BitWidth - 1); 1466 unsigned LowBits = llvm::bit_width(PossibleLZ); 1467 Known.Zero.setBitsFrom(LowBits); 1468 break; 1469 } 1470 case Intrinsic::cttz: { 1471 computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); 1472 // If we have a known 1, its position is our upper bound. 1473 unsigned PossibleTZ = Known2.countMaxTrailingZeros(); 1474 // If this call is poison for 0 input, the result will be less than 2^n. 1475 if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext())) 1476 PossibleTZ = std::min(PossibleTZ, BitWidth - 1); 1477 unsigned LowBits = llvm::bit_width(PossibleTZ); 1478 Known.Zero.setBitsFrom(LowBits); 1479 break; 1480 } 1481 case Intrinsic::ctpop: { 1482 computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); 1483 // We can bound the space the count needs. Also, bits known to be zero 1484 // can't contribute to the population. 1485 unsigned BitsPossiblySet = Known2.countMaxPopulation(); 1486 unsigned LowBits = llvm::bit_width(BitsPossiblySet); 1487 Known.Zero.setBitsFrom(LowBits); 1488 // TODO: we could bound KnownOne using the lower bound on the number 1489 // of bits which might be set provided by popcnt KnownOne2. 1490 break; 1491 } 1492 case Intrinsic::fshr: 1493 case Intrinsic::fshl: { 1494 const APInt *SA; 1495 if (!match(I->getOperand(2), m_APInt(SA))) 1496 break; 1497 1498 // Normalize to funnel shift left. 1499 uint64_t ShiftAmt = SA->urem(BitWidth); 1500 if (II->getIntrinsicID() == Intrinsic::fshr) 1501 ShiftAmt = BitWidth - ShiftAmt; 1502 1503 KnownBits Known3(BitWidth); 1504 computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); 1505 computeKnownBits(I->getOperand(1), Known3, Depth + 1, Q); 1506 1507 Known.Zero = 1508 Known2.Zero.shl(ShiftAmt) | Known3.Zero.lshr(BitWidth - ShiftAmt); 1509 Known.One = 1510 Known2.One.shl(ShiftAmt) | Known3.One.lshr(BitWidth - ShiftAmt); 1511 break; 1512 } 1513 case Intrinsic::uadd_sat: 1514 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1515 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1516 Known = KnownBits::uadd_sat(Known, Known2); 1517 break; 1518 case Intrinsic::usub_sat: 1519 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1520 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1521 Known = KnownBits::usub_sat(Known, Known2); 1522 break; 1523 case Intrinsic::sadd_sat: 1524 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1525 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1526 Known = KnownBits::sadd_sat(Known, Known2); 1527 break; 1528 case Intrinsic::ssub_sat: 1529 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1530 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1531 Known = KnownBits::ssub_sat(Known, Known2); 1532 break; 1533 case Intrinsic::umin: 1534 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1535 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1536 Known = KnownBits::umin(Known, Known2); 1537 break; 1538 case Intrinsic::umax: 1539 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1540 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1541 Known = KnownBits::umax(Known, Known2); 1542 break; 1543 case Intrinsic::smin: 1544 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1545 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1546 Known = KnownBits::smin(Known, Known2); 1547 break; 1548 case Intrinsic::smax: 1549 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1550 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1551 Known = KnownBits::smax(Known, Known2); 1552 break; 1553 case Intrinsic::ptrmask: { 1554 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1555 1556 const Value *Mask = I->getOperand(1); 1557 Known2 = KnownBits(Mask->getType()->getScalarSizeInBits()); 1558 computeKnownBits(Mask, Known2, Depth + 1, Q); 1559 // TODO: 1-extend would be more precise. 1560 Known &= Known2.anyextOrTrunc(BitWidth); 1561 break; 1562 } 1563 case Intrinsic::x86_sse42_crc32_64_64: 1564 Known.Zero.setBitsFrom(32); 1565 break; 1566 case Intrinsic::riscv_vsetvli: 1567 case Intrinsic::riscv_vsetvlimax: 1568 // Assume that VL output is <= 65536. 1569 // TODO: Take SEW and LMUL into account. 1570 if (BitWidth > 17) 1571 Known.Zero.setBitsFrom(17); 1572 break; 1573 case Intrinsic::vscale: { 1574 if (!II->getParent() || !II->getFunction()) 1575 break; 1576 1577 Known = getVScaleRange(II->getFunction(), BitWidth).toKnownBits(); 1578 break; 1579 } 1580 } 1581 } 1582 break; 1583 case Instruction::ShuffleVector: { 1584 auto *Shuf = dyn_cast<ShuffleVectorInst>(I); 1585 // FIXME: Do we need to handle ConstantExpr involving shufflevectors? 1586 if (!Shuf) { 1587 Known.resetAll(); 1588 return; 1589 } 1590 // For undef elements, we don't know anything about the common state of 1591 // the shuffle result. 1592 APInt DemandedLHS, DemandedRHS; 1593 if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS)) { 1594 Known.resetAll(); 1595 return; 1596 } 1597 Known.One.setAllBits(); 1598 Known.Zero.setAllBits(); 1599 if (!!DemandedLHS) { 1600 const Value *LHS = Shuf->getOperand(0); 1601 computeKnownBits(LHS, DemandedLHS, Known, Depth + 1, Q); 1602 // If we don't know any bits, early out. 1603 if (Known.isUnknown()) 1604 break; 1605 } 1606 if (!!DemandedRHS) { 1607 const Value *RHS = Shuf->getOperand(1); 1608 computeKnownBits(RHS, DemandedRHS, Known2, Depth + 1, Q); 1609 Known = Known.intersectWith(Known2); 1610 } 1611 break; 1612 } 1613 case Instruction::InsertElement: { 1614 if (isa<ScalableVectorType>(I->getType())) { 1615 Known.resetAll(); 1616 return; 1617 } 1618 const Value *Vec = I->getOperand(0); 1619 const Value *Elt = I->getOperand(1); 1620 auto *CIdx = dyn_cast<ConstantInt>(I->getOperand(2)); 1621 // Early out if the index is non-constant or out-of-range. 1622 unsigned NumElts = DemandedElts.getBitWidth(); 1623 if (!CIdx || CIdx->getValue().uge(NumElts)) { 1624 Known.resetAll(); 1625 return; 1626 } 1627 Known.One.setAllBits(); 1628 Known.Zero.setAllBits(); 1629 unsigned EltIdx = CIdx->getZExtValue(); 1630 // Do we demand the inserted element? 1631 if (DemandedElts[EltIdx]) { 1632 computeKnownBits(Elt, Known, Depth + 1, Q); 1633 // If we don't know any bits, early out. 1634 if (Known.isUnknown()) 1635 break; 1636 } 1637 // We don't need the base vector element that has been inserted. 1638 APInt DemandedVecElts = DemandedElts; 1639 DemandedVecElts.clearBit(EltIdx); 1640 if (!!DemandedVecElts) { 1641 computeKnownBits(Vec, DemandedVecElts, Known2, Depth + 1, Q); 1642 Known = Known.intersectWith(Known2); 1643 } 1644 break; 1645 } 1646 case Instruction::ExtractElement: { 1647 // Look through extract element. If the index is non-constant or 1648 // out-of-range demand all elements, otherwise just the extracted element. 1649 const Value *Vec = I->getOperand(0); 1650 const Value *Idx = I->getOperand(1); 1651 auto *CIdx = dyn_cast<ConstantInt>(Idx); 1652 if (isa<ScalableVectorType>(Vec->getType())) { 1653 // FIXME: there's probably *something* we can do with scalable vectors 1654 Known.resetAll(); 1655 break; 1656 } 1657 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements(); 1658 APInt DemandedVecElts = APInt::getAllOnes(NumElts); 1659 if (CIdx && CIdx->getValue().ult(NumElts)) 1660 DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue()); 1661 computeKnownBits(Vec, DemandedVecElts, Known, Depth + 1, Q); 1662 break; 1663 } 1664 case Instruction::ExtractValue: 1665 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->getOperand(0))) { 1666 const ExtractValueInst *EVI = cast<ExtractValueInst>(I); 1667 if (EVI->getNumIndices() != 1) break; 1668 if (EVI->getIndices()[0] == 0) { 1669 switch (II->getIntrinsicID()) { 1670 default: break; 1671 case Intrinsic::uadd_with_overflow: 1672 case Intrinsic::sadd_with_overflow: 1673 computeKnownBitsAddSub(true, II->getArgOperand(0), 1674 II->getArgOperand(1), false, DemandedElts, 1675 Known, Known2, Depth, Q); 1676 break; 1677 case Intrinsic::usub_with_overflow: 1678 case Intrinsic::ssub_with_overflow: 1679 computeKnownBitsAddSub(false, II->getArgOperand(0), 1680 II->getArgOperand(1), false, DemandedElts, 1681 Known, Known2, Depth, Q); 1682 break; 1683 case Intrinsic::umul_with_overflow: 1684 case Intrinsic::smul_with_overflow: 1685 computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), false, 1686 DemandedElts, Known, Known2, Depth, Q); 1687 break; 1688 } 1689 } 1690 } 1691 break; 1692 case Instruction::Freeze: 1693 if (isGuaranteedNotToBePoison(I->getOperand(0), Q.AC, Q.CxtI, Q.DT, 1694 Depth + 1)) 1695 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1696 break; 1697 } 1698 } 1699 1700 /// Determine which bits of V are known to be either zero or one and return 1701 /// them. 1702 KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts, 1703 unsigned Depth, const SimplifyQuery &Q) { 1704 KnownBits Known(getBitWidth(V->getType(), Q.DL)); 1705 ::computeKnownBits(V, DemandedElts, Known, Depth, Q); 1706 return Known; 1707 } 1708 1709 /// Determine which bits of V are known to be either zero or one and return 1710 /// them. 1711 KnownBits llvm::computeKnownBits(const Value *V, unsigned Depth, 1712 const SimplifyQuery &Q) { 1713 KnownBits Known(getBitWidth(V->getType(), Q.DL)); 1714 computeKnownBits(V, Known, Depth, Q); 1715 return Known; 1716 } 1717 1718 /// Determine which bits of V are known to be either zero or one and return 1719 /// them in the Known bit set. 1720 /// 1721 /// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that 1722 /// we cannot optimize based on the assumption that it is zero without changing 1723 /// it to be an explicit zero. If we don't change it to zero, other code could 1724 /// optimized based on the contradictory assumption that it is non-zero. 1725 /// Because instcombine aggressively folds operations with undef args anyway, 1726 /// this won't lose us code quality. 1727 /// 1728 /// This function is defined on values with integer type, values with pointer 1729 /// type, and vectors of integers. In the case 1730 /// where V is a vector, known zero, and known one values are the 1731 /// same width as the vector element, and the bit is set only if it is true 1732 /// for all of the demanded elements in the vector specified by DemandedElts. 1733 void computeKnownBits(const Value *V, const APInt &DemandedElts, 1734 KnownBits &Known, unsigned Depth, 1735 const SimplifyQuery &Q) { 1736 if (!DemandedElts) { 1737 // No demanded elts, better to assume we don't know anything. 1738 Known.resetAll(); 1739 return; 1740 } 1741 1742 assert(V && "No Value?"); 1743 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); 1744 1745 #ifndef NDEBUG 1746 Type *Ty = V->getType(); 1747 unsigned BitWidth = Known.getBitWidth(); 1748 1749 assert((Ty->isIntOrIntVectorTy(BitWidth) || Ty->isPtrOrPtrVectorTy()) && 1750 "Not integer or pointer type!"); 1751 1752 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) { 1753 assert( 1754 FVTy->getNumElements() == DemandedElts.getBitWidth() && 1755 "DemandedElt width should equal the fixed vector number of elements"); 1756 } else { 1757 assert(DemandedElts == APInt(1, 1) && 1758 "DemandedElt width should be 1 for scalars or scalable vectors"); 1759 } 1760 1761 Type *ScalarTy = Ty->getScalarType(); 1762 if (ScalarTy->isPointerTy()) { 1763 assert(BitWidth == Q.DL.getPointerTypeSizeInBits(ScalarTy) && 1764 "V and Known should have same BitWidth"); 1765 } else { 1766 assert(BitWidth == Q.DL.getTypeSizeInBits(ScalarTy) && 1767 "V and Known should have same BitWidth"); 1768 } 1769 #endif 1770 1771 const APInt *C; 1772 if (match(V, m_APInt(C))) { 1773 // We know all of the bits for a scalar constant or a splat vector constant! 1774 Known = KnownBits::makeConstant(*C); 1775 return; 1776 } 1777 // Null and aggregate-zero are all-zeros. 1778 if (isa<ConstantPointerNull>(V) || isa<ConstantAggregateZero>(V)) { 1779 Known.setAllZero(); 1780 return; 1781 } 1782 // Handle a constant vector by taking the intersection of the known bits of 1783 // each element. 1784 if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(V)) { 1785 assert(!isa<ScalableVectorType>(V->getType())); 1786 // We know that CDV must be a vector of integers. Take the intersection of 1787 // each element. 1788 Known.Zero.setAllBits(); Known.One.setAllBits(); 1789 for (unsigned i = 0, e = CDV->getNumElements(); i != e; ++i) { 1790 if (!DemandedElts[i]) 1791 continue; 1792 APInt Elt = CDV->getElementAsAPInt(i); 1793 Known.Zero &= ~Elt; 1794 Known.One &= Elt; 1795 } 1796 if (Known.hasConflict()) 1797 Known.resetAll(); 1798 return; 1799 } 1800 1801 if (const auto *CV = dyn_cast<ConstantVector>(V)) { 1802 assert(!isa<ScalableVectorType>(V->getType())); 1803 // We know that CV must be a vector of integers. Take the intersection of 1804 // each element. 1805 Known.Zero.setAllBits(); Known.One.setAllBits(); 1806 for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) { 1807 if (!DemandedElts[i]) 1808 continue; 1809 Constant *Element = CV->getAggregateElement(i); 1810 if (isa<PoisonValue>(Element)) 1811 continue; 1812 auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element); 1813 if (!ElementCI) { 1814 Known.resetAll(); 1815 return; 1816 } 1817 const APInt &Elt = ElementCI->getValue(); 1818 Known.Zero &= ~Elt; 1819 Known.One &= Elt; 1820 } 1821 if (Known.hasConflict()) 1822 Known.resetAll(); 1823 return; 1824 } 1825 1826 // Start out not knowing anything. 1827 Known.resetAll(); 1828 1829 // We can't imply anything about undefs. 1830 if (isa<UndefValue>(V)) 1831 return; 1832 1833 // There's no point in looking through other users of ConstantData for 1834 // assumptions. Confirm that we've handled them all. 1835 assert(!isa<ConstantData>(V) && "Unhandled constant data!"); 1836 1837 // All recursive calls that increase depth must come after this. 1838 if (Depth == MaxAnalysisRecursionDepth) 1839 return; 1840 1841 // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has 1842 // the bits of its aliasee. 1843 if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { 1844 if (!GA->isInterposable()) 1845 computeKnownBits(GA->getAliasee(), Known, Depth + 1, Q); 1846 return; 1847 } 1848 1849 if (const Operator *I = dyn_cast<Operator>(V)) 1850 computeKnownBitsFromOperator(I, DemandedElts, Known, Depth, Q); 1851 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 1852 if (std::optional<ConstantRange> CR = GV->getAbsoluteSymbolRange()) 1853 Known = CR->toKnownBits(); 1854 } 1855 1856 // Aligned pointers have trailing zeros - refine Known.Zero set 1857 if (isa<PointerType>(V->getType())) { 1858 Align Alignment = V->getPointerAlignment(Q.DL); 1859 Known.Zero.setLowBits(Log2(Alignment)); 1860 } 1861 1862 // computeKnownBitsFromContext strictly refines Known. 1863 // Therefore, we run them after computeKnownBitsFromOperator. 1864 1865 // Check whether we can determine known bits from context such as assumes. 1866 computeKnownBitsFromContext(V, Known, Depth, Q); 1867 1868 assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); 1869 } 1870 1871 /// Try to detect a recurrence that the value of the induction variable is 1872 /// always a power of two (or zero). 1873 static bool isPowerOfTwoRecurrence(const PHINode *PN, bool OrZero, 1874 unsigned Depth, SimplifyQuery &Q) { 1875 BinaryOperator *BO = nullptr; 1876 Value *Start = nullptr, *Step = nullptr; 1877 if (!matchSimpleRecurrence(PN, BO, Start, Step)) 1878 return false; 1879 1880 // Initial value must be a power of two. 1881 for (const Use &U : PN->operands()) { 1882 if (U.get() == Start) { 1883 // Initial value comes from a different BB, need to adjust context 1884 // instruction for analysis. 1885 Q.CxtI = PN->getIncomingBlock(U)->getTerminator(); 1886 if (!isKnownToBeAPowerOfTwo(Start, OrZero, Depth, Q)) 1887 return false; 1888 } 1889 } 1890 1891 // Except for Mul, the induction variable must be on the left side of the 1892 // increment expression, otherwise its value can be arbitrary. 1893 if (BO->getOpcode() != Instruction::Mul && BO->getOperand(1) != Step) 1894 return false; 1895 1896 Q.CxtI = BO->getParent()->getTerminator(); 1897 switch (BO->getOpcode()) { 1898 case Instruction::Mul: 1899 // Power of two is closed under multiplication. 1900 return (OrZero || Q.IIQ.hasNoUnsignedWrap(BO) || 1901 Q.IIQ.hasNoSignedWrap(BO)) && 1902 isKnownToBeAPowerOfTwo(Step, OrZero, Depth, Q); 1903 case Instruction::SDiv: 1904 // Start value must not be signmask for signed division, so simply being a 1905 // power of two is not sufficient, and it has to be a constant. 1906 if (!match(Start, m_Power2()) || match(Start, m_SignMask())) 1907 return false; 1908 [[fallthrough]]; 1909 case Instruction::UDiv: 1910 // Divisor must be a power of two. 1911 // If OrZero is false, cannot guarantee induction variable is non-zero after 1912 // division, same for Shr, unless it is exact division. 1913 return (OrZero || Q.IIQ.isExact(BO)) && 1914 isKnownToBeAPowerOfTwo(Step, false, Depth, Q); 1915 case Instruction::Shl: 1916 return OrZero || Q.IIQ.hasNoUnsignedWrap(BO) || Q.IIQ.hasNoSignedWrap(BO); 1917 case Instruction::AShr: 1918 if (!match(Start, m_Power2()) || match(Start, m_SignMask())) 1919 return false; 1920 [[fallthrough]]; 1921 case Instruction::LShr: 1922 return OrZero || Q.IIQ.isExact(BO); 1923 default: 1924 return false; 1925 } 1926 } 1927 1928 /// Return true if the given value is known to have exactly one 1929 /// bit set when defined. For vectors return true if every element is known to 1930 /// be a power of two when defined. Supports values with integer or pointer 1931 /// types and vectors of integers. 1932 bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, 1933 const SimplifyQuery &Q) { 1934 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); 1935 1936 if (isa<Constant>(V)) 1937 return OrZero ? match(V, m_Power2OrZero()) : match(V, m_Power2()); 1938 1939 // i1 is by definition a power of 2 or zero. 1940 if (OrZero && V->getType()->getScalarSizeInBits() == 1) 1941 return true; 1942 1943 auto *I = dyn_cast<Instruction>(V); 1944 if (!I) 1945 return false; 1946 1947 if (Q.CxtI && match(V, m_VScale())) { 1948 const Function *F = Q.CxtI->getFunction(); 1949 // The vscale_range indicates vscale is a power-of-two. 1950 return F->hasFnAttribute(Attribute::VScaleRange); 1951 } 1952 1953 // 1 << X is clearly a power of two if the one is not shifted off the end. If 1954 // it is shifted off the end then the result is undefined. 1955 if (match(I, m_Shl(m_One(), m_Value()))) 1956 return true; 1957 1958 // (signmask) >>l X is clearly a power of two if the one is not shifted off 1959 // the bottom. If it is shifted off the bottom then the result is undefined. 1960 if (match(I, m_LShr(m_SignMask(), m_Value()))) 1961 return true; 1962 1963 // The remaining tests are all recursive, so bail out if we hit the limit. 1964 if (Depth++ == MaxAnalysisRecursionDepth) 1965 return false; 1966 1967 switch (I->getOpcode()) { 1968 case Instruction::ZExt: 1969 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q); 1970 case Instruction::Trunc: 1971 return OrZero && isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q); 1972 case Instruction::Shl: 1973 if (OrZero || Q.IIQ.hasNoUnsignedWrap(I) || Q.IIQ.hasNoSignedWrap(I)) 1974 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q); 1975 return false; 1976 case Instruction::LShr: 1977 if (OrZero || Q.IIQ.isExact(cast<BinaryOperator>(I))) 1978 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q); 1979 return false; 1980 case Instruction::UDiv: 1981 if (Q.IIQ.isExact(cast<BinaryOperator>(I))) 1982 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q); 1983 return false; 1984 case Instruction::Mul: 1985 return isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q) && 1986 isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q) && 1987 (OrZero || isKnownNonZero(I, Depth, Q)); 1988 case Instruction::And: 1989 // A power of two and'd with anything is a power of two or zero. 1990 if (OrZero && 1991 (isKnownToBeAPowerOfTwo(I->getOperand(1), /*OrZero*/ true, Depth, Q) || 1992 isKnownToBeAPowerOfTwo(I->getOperand(0), /*OrZero*/ true, Depth, Q))) 1993 return true; 1994 // X & (-X) is always a power of two or zero. 1995 if (match(I->getOperand(0), m_Neg(m_Specific(I->getOperand(1)))) || 1996 match(I->getOperand(1), m_Neg(m_Specific(I->getOperand(0))))) 1997 return OrZero || isKnownNonZero(I->getOperand(0), Depth, Q); 1998 return false; 1999 case Instruction::Add: { 2000 // Adding a power-of-two or zero to the same power-of-two or zero yields 2001 // either the original power-of-two, a larger power-of-two or zero. 2002 const OverflowingBinaryOperator *VOBO = cast<OverflowingBinaryOperator>(V); 2003 if (OrZero || Q.IIQ.hasNoUnsignedWrap(VOBO) || 2004 Q.IIQ.hasNoSignedWrap(VOBO)) { 2005 if (match(I->getOperand(0), 2006 m_c_And(m_Specific(I->getOperand(1)), m_Value())) && 2007 isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q)) 2008 return true; 2009 if (match(I->getOperand(1), 2010 m_c_And(m_Specific(I->getOperand(0)), m_Value())) && 2011 isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q)) 2012 return true; 2013 2014 unsigned BitWidth = V->getType()->getScalarSizeInBits(); 2015 KnownBits LHSBits(BitWidth); 2016 computeKnownBits(I->getOperand(0), LHSBits, Depth, Q); 2017 2018 KnownBits RHSBits(BitWidth); 2019 computeKnownBits(I->getOperand(1), RHSBits, Depth, Q); 2020 // If i8 V is a power of two or zero: 2021 // ZeroBits: 1 1 1 0 1 1 1 1 2022 // ~ZeroBits: 0 0 0 1 0 0 0 0 2023 if ((~(LHSBits.Zero & RHSBits.Zero)).isPowerOf2()) 2024 // If OrZero isn't set, we cannot give back a zero result. 2025 // Make sure either the LHS or RHS has a bit set. 2026 if (OrZero || RHSBits.One.getBoolValue() || LHSBits.One.getBoolValue()) 2027 return true; 2028 } 2029 return false; 2030 } 2031 case Instruction::Select: 2032 return isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q) && 2033 isKnownToBeAPowerOfTwo(I->getOperand(2), OrZero, Depth, Q); 2034 case Instruction::PHI: { 2035 // A PHI node is power of two if all incoming values are power of two, or if 2036 // it is an induction variable where in each step its value is a power of 2037 // two. 2038 auto *PN = cast<PHINode>(I); 2039 SimplifyQuery RecQ = Q; 2040 2041 // Check if it is an induction variable and always power of two. 2042 if (isPowerOfTwoRecurrence(PN, OrZero, Depth, RecQ)) 2043 return true; 2044 2045 // Recursively check all incoming values. Limit recursion to 2 levels, so 2046 // that search complexity is limited to number of operands^2. 2047 unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1); 2048 return llvm::all_of(PN->operands(), [&](const Use &U) { 2049 // Value is power of 2 if it is coming from PHI node itself by induction. 2050 if (U.get() == PN) 2051 return true; 2052 2053 // Change the context instruction to the incoming block where it is 2054 // evaluated. 2055 RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator(); 2056 return isKnownToBeAPowerOfTwo(U.get(), OrZero, NewDepth, RecQ); 2057 }); 2058 } 2059 case Instruction::Invoke: 2060 case Instruction::Call: { 2061 if (auto *II = dyn_cast<IntrinsicInst>(I)) { 2062 switch (II->getIntrinsicID()) { 2063 case Intrinsic::umax: 2064 case Intrinsic::smax: 2065 case Intrinsic::umin: 2066 case Intrinsic::smin: 2067 return isKnownToBeAPowerOfTwo(II->getArgOperand(1), OrZero, Depth, Q) && 2068 isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Depth, Q); 2069 // bswap/bitreverse just move around bits, but don't change any 1s/0s 2070 // thus dont change pow2/non-pow2 status. 2071 case Intrinsic::bitreverse: 2072 case Intrinsic::bswap: 2073 return isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Depth, Q); 2074 case Intrinsic::fshr: 2075 case Intrinsic::fshl: 2076 // If Op0 == Op1, this is a rotate. is_pow2(rotate(x, y)) == is_pow2(x) 2077 if (II->getArgOperand(0) == II->getArgOperand(1)) 2078 return isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Depth, Q); 2079 break; 2080 default: 2081 break; 2082 } 2083 } 2084 return false; 2085 } 2086 default: 2087 return false; 2088 } 2089 } 2090 2091 /// Test whether a GEP's result is known to be non-null. 2092 /// 2093 /// Uses properties inherent in a GEP to try to determine whether it is known 2094 /// to be non-null. 2095 /// 2096 /// Currently this routine does not support vector GEPs. 2097 static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth, 2098 const SimplifyQuery &Q) { 2099 const Function *F = nullptr; 2100 if (const Instruction *I = dyn_cast<Instruction>(GEP)) 2101 F = I->getFunction(); 2102 2103 if (!GEP->isInBounds() || 2104 NullPointerIsDefined(F, GEP->getPointerAddressSpace())) 2105 return false; 2106 2107 // FIXME: Support vector-GEPs. 2108 assert(GEP->getType()->isPointerTy() && "We only support plain pointer GEP"); 2109 2110 // If the base pointer is non-null, we cannot walk to a null address with an 2111 // inbounds GEP in address space zero. 2112 if (isKnownNonZero(GEP->getPointerOperand(), Depth, Q)) 2113 return true; 2114 2115 // Walk the GEP operands and see if any operand introduces a non-zero offset. 2116 // If so, then the GEP cannot produce a null pointer, as doing so would 2117 // inherently violate the inbounds contract within address space zero. 2118 for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); 2119 GTI != GTE; ++GTI) { 2120 // Struct types are easy -- they must always be indexed by a constant. 2121 if (StructType *STy = GTI.getStructTypeOrNull()) { 2122 ConstantInt *OpC = cast<ConstantInt>(GTI.getOperand()); 2123 unsigned ElementIdx = OpC->getZExtValue(); 2124 const StructLayout *SL = Q.DL.getStructLayout(STy); 2125 uint64_t ElementOffset = SL->getElementOffset(ElementIdx); 2126 if (ElementOffset > 0) 2127 return true; 2128 continue; 2129 } 2130 2131 // If we have a zero-sized type, the index doesn't matter. Keep looping. 2132 if (GTI.getSequentialElementStride(Q.DL).isZero()) 2133 continue; 2134 2135 // Fast path the constant operand case both for efficiency and so we don't 2136 // increment Depth when just zipping down an all-constant GEP. 2137 if (ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand())) { 2138 if (!OpC->isZero()) 2139 return true; 2140 continue; 2141 } 2142 2143 // We post-increment Depth here because while isKnownNonZero increments it 2144 // as well, when we pop back up that increment won't persist. We don't want 2145 // to recurse 10k times just because we have 10k GEP operands. We don't 2146 // bail completely out because we want to handle constant GEPs regardless 2147 // of depth. 2148 if (Depth++ >= MaxAnalysisRecursionDepth) 2149 continue; 2150 2151 if (isKnownNonZero(GTI.getOperand(), Depth, Q)) 2152 return true; 2153 } 2154 2155 return false; 2156 } 2157 2158 static bool isKnownNonNullFromDominatingCondition(const Value *V, 2159 const Instruction *CtxI, 2160 const DominatorTree *DT) { 2161 assert(!isa<Constant>(V) && "Called for constant?"); 2162 2163 if (!CtxI || !DT) 2164 return false; 2165 2166 unsigned NumUsesExplored = 0; 2167 for (const auto *U : V->users()) { 2168 // Avoid massive lists 2169 if (NumUsesExplored >= DomConditionsMaxUses) 2170 break; 2171 NumUsesExplored++; 2172 2173 // If the value is used as an argument to a call or invoke, then argument 2174 // attributes may provide an answer about null-ness. 2175 if (const auto *CB = dyn_cast<CallBase>(U)) 2176 if (auto *CalledFunc = CB->getCalledFunction()) 2177 for (const Argument &Arg : CalledFunc->args()) 2178 if (CB->getArgOperand(Arg.getArgNo()) == V && 2179 Arg.hasNonNullAttr(/* AllowUndefOrPoison */ false) && 2180 DT->dominates(CB, CtxI)) 2181 return true; 2182 2183 // If the value is used as a load/store, then the pointer must be non null. 2184 if (V == getLoadStorePointerOperand(U)) { 2185 const Instruction *I = cast<Instruction>(U); 2186 if (!NullPointerIsDefined(I->getFunction(), 2187 V->getType()->getPointerAddressSpace()) && 2188 DT->dominates(I, CtxI)) 2189 return true; 2190 } 2191 2192 if ((match(U, m_IDiv(m_Value(), m_Specific(V))) || 2193 match(U, m_IRem(m_Value(), m_Specific(V)))) && 2194 isValidAssumeForContext(cast<Instruction>(U), CtxI, DT)) 2195 return true; 2196 2197 // Consider only compare instructions uniquely controlling a branch 2198 Value *RHS; 2199 CmpInst::Predicate Pred; 2200 if (!match(U, m_c_ICmp(Pred, m_Specific(V), m_Value(RHS)))) 2201 continue; 2202 2203 bool NonNullIfTrue; 2204 if (cmpExcludesZero(Pred, RHS)) 2205 NonNullIfTrue = true; 2206 else if (cmpExcludesZero(CmpInst::getInversePredicate(Pred), RHS)) 2207 NonNullIfTrue = false; 2208 else 2209 continue; 2210 2211 SmallVector<const User *, 4> WorkList; 2212 SmallPtrSet<const User *, 4> Visited; 2213 for (const auto *CmpU : U->users()) { 2214 assert(WorkList.empty() && "Should be!"); 2215 if (Visited.insert(CmpU).second) 2216 WorkList.push_back(CmpU); 2217 2218 while (!WorkList.empty()) { 2219 auto *Curr = WorkList.pop_back_val(); 2220 2221 // If a user is an AND, add all its users to the work list. We only 2222 // propagate "pred != null" condition through AND because it is only 2223 // correct to assume that all conditions of AND are met in true branch. 2224 // TODO: Support similar logic of OR and EQ predicate? 2225 if (NonNullIfTrue) 2226 if (match(Curr, m_LogicalAnd(m_Value(), m_Value()))) { 2227 for (const auto *CurrU : Curr->users()) 2228 if (Visited.insert(CurrU).second) 2229 WorkList.push_back(CurrU); 2230 continue; 2231 } 2232 2233 if (const BranchInst *BI = dyn_cast<BranchInst>(Curr)) { 2234 assert(BI->isConditional() && "uses a comparison!"); 2235 2236 BasicBlock *NonNullSuccessor = 2237 BI->getSuccessor(NonNullIfTrue ? 0 : 1); 2238 BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor); 2239 if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent())) 2240 return true; 2241 } else if (NonNullIfTrue && isGuard(Curr) && 2242 DT->dominates(cast<Instruction>(Curr), CtxI)) { 2243 return true; 2244 } 2245 } 2246 } 2247 } 2248 2249 return false; 2250 } 2251 2252 /// Does the 'Range' metadata (which must be a valid MD_range operand list) 2253 /// ensure that the value it's attached to is never Value? 'RangeType' is 2254 /// is the type of the value described by the range. 2255 static bool rangeMetadataExcludesValue(const MDNode* Ranges, const APInt& Value) { 2256 const unsigned NumRanges = Ranges->getNumOperands() / 2; 2257 assert(NumRanges >= 1); 2258 for (unsigned i = 0; i < NumRanges; ++i) { 2259 ConstantInt *Lower = 2260 mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 0)); 2261 ConstantInt *Upper = 2262 mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 1)); 2263 ConstantRange Range(Lower->getValue(), Upper->getValue()); 2264 if (Range.contains(Value)) 2265 return false; 2266 } 2267 return true; 2268 } 2269 2270 /// Try to detect a recurrence that monotonically increases/decreases from a 2271 /// non-zero starting value. These are common as induction variables. 2272 static bool isNonZeroRecurrence(const PHINode *PN) { 2273 BinaryOperator *BO = nullptr; 2274 Value *Start = nullptr, *Step = nullptr; 2275 const APInt *StartC, *StepC; 2276 if (!matchSimpleRecurrence(PN, BO, Start, Step) || 2277 !match(Start, m_APInt(StartC)) || StartC->isZero()) 2278 return false; 2279 2280 switch (BO->getOpcode()) { 2281 case Instruction::Add: 2282 // Starting from non-zero and stepping away from zero can never wrap back 2283 // to zero. 2284 return BO->hasNoUnsignedWrap() || 2285 (BO->hasNoSignedWrap() && match(Step, m_APInt(StepC)) && 2286 StartC->isNegative() == StepC->isNegative()); 2287 case Instruction::Mul: 2288 return (BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap()) && 2289 match(Step, m_APInt(StepC)) && !StepC->isZero(); 2290 case Instruction::Shl: 2291 return BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap(); 2292 case Instruction::AShr: 2293 case Instruction::LShr: 2294 return BO->isExact(); 2295 default: 2296 return false; 2297 } 2298 } 2299 2300 static bool isNonZeroAdd(const APInt &DemandedElts, unsigned Depth, 2301 const SimplifyQuery &Q, unsigned BitWidth, Value *X, 2302 Value *Y, bool NSW) { 2303 KnownBits XKnown = computeKnownBits(X, DemandedElts, Depth, Q); 2304 KnownBits YKnown = computeKnownBits(Y, DemandedElts, Depth, Q); 2305 2306 // If X and Y are both non-negative (as signed values) then their sum is not 2307 // zero unless both X and Y are zero. 2308 if (XKnown.isNonNegative() && YKnown.isNonNegative()) 2309 if (isKnownNonZero(Y, DemandedElts, Depth, Q) || 2310 isKnownNonZero(X, DemandedElts, Depth, Q)) 2311 return true; 2312 2313 // If X and Y are both negative (as signed values) then their sum is not 2314 // zero unless both X and Y equal INT_MIN. 2315 if (XKnown.isNegative() && YKnown.isNegative()) { 2316 APInt Mask = APInt::getSignedMaxValue(BitWidth); 2317 // The sign bit of X is set. If some other bit is set then X is not equal 2318 // to INT_MIN. 2319 if (XKnown.One.intersects(Mask)) 2320 return true; 2321 // The sign bit of Y is set. If some other bit is set then Y is not equal 2322 // to INT_MIN. 2323 if (YKnown.One.intersects(Mask)) 2324 return true; 2325 } 2326 2327 // The sum of a non-negative number and a power of two is not zero. 2328 if (XKnown.isNonNegative() && 2329 isKnownToBeAPowerOfTwo(Y, /*OrZero*/ false, Depth, Q)) 2330 return true; 2331 if (YKnown.isNonNegative() && 2332 isKnownToBeAPowerOfTwo(X, /*OrZero*/ false, Depth, Q)) 2333 return true; 2334 2335 return KnownBits::computeForAddSub(/*Add*/ true, NSW, XKnown, YKnown) 2336 .isNonZero(); 2337 } 2338 2339 static bool isNonZeroSub(const APInt &DemandedElts, unsigned Depth, 2340 const SimplifyQuery &Q, unsigned BitWidth, Value *X, 2341 Value *Y) { 2342 // TODO: Move this case into isKnownNonEqual(). 2343 if (auto *C = dyn_cast<Constant>(X)) 2344 if (C->isNullValue() && isKnownNonZero(Y, DemandedElts, Depth, Q)) 2345 return true; 2346 2347 return ::isKnownNonEqual(X, Y, Depth, Q); 2348 } 2349 2350 static bool isNonZeroShift(const Operator *I, const APInt &DemandedElts, 2351 unsigned Depth, const SimplifyQuery &Q, 2352 const KnownBits &KnownVal) { 2353 auto ShiftOp = [&](const APInt &Lhs, const APInt &Rhs) { 2354 switch (I->getOpcode()) { 2355 case Instruction::Shl: 2356 return Lhs.shl(Rhs); 2357 case Instruction::LShr: 2358 return Lhs.lshr(Rhs); 2359 case Instruction::AShr: 2360 return Lhs.ashr(Rhs); 2361 default: 2362 llvm_unreachable("Unknown Shift Opcode"); 2363 } 2364 }; 2365 2366 auto InvShiftOp = [&](const APInt &Lhs, const APInt &Rhs) { 2367 switch (I->getOpcode()) { 2368 case Instruction::Shl: 2369 return Lhs.lshr(Rhs); 2370 case Instruction::LShr: 2371 case Instruction::AShr: 2372 return Lhs.shl(Rhs); 2373 default: 2374 llvm_unreachable("Unknown Shift Opcode"); 2375 } 2376 }; 2377 2378 if (KnownVal.isUnknown()) 2379 return false; 2380 2381 KnownBits KnownCnt = 2382 computeKnownBits(I->getOperand(1), DemandedElts, Depth, Q); 2383 APInt MaxShift = KnownCnt.getMaxValue(); 2384 unsigned NumBits = KnownVal.getBitWidth(); 2385 if (MaxShift.uge(NumBits)) 2386 return false; 2387 2388 if (!ShiftOp(KnownVal.One, MaxShift).isZero()) 2389 return true; 2390 2391 // If all of the bits shifted out are known to be zero, and Val is known 2392 // non-zero then at least one non-zero bit must remain. 2393 if (InvShiftOp(KnownVal.Zero, NumBits - MaxShift) 2394 .eq(InvShiftOp(APInt::getAllOnes(NumBits), NumBits - MaxShift)) && 2395 isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q)) 2396 return true; 2397 2398 return false; 2399 } 2400 2401 static bool isKnownNonZeroFromOperator(const Operator *I, 2402 const APInt &DemandedElts, 2403 unsigned Depth, const SimplifyQuery &Q) { 2404 unsigned BitWidth = getBitWidth(I->getType()->getScalarType(), Q.DL); 2405 switch (I->getOpcode()) { 2406 case Instruction::Alloca: 2407 // Alloca never returns null, malloc might. 2408 return I->getType()->getPointerAddressSpace() == 0; 2409 case Instruction::GetElementPtr: 2410 if (I->getType()->isPointerTy()) 2411 return isGEPKnownNonNull(cast<GEPOperator>(I), Depth, Q); 2412 break; 2413 case Instruction::BitCast: { 2414 // We need to be a bit careful here. We can only peek through the bitcast 2415 // if the scalar size of elements in the operand are smaller than and a 2416 // multiple of the size they are casting too. Take three cases: 2417 // 2418 // 1) Unsafe: 2419 // bitcast <2 x i16> %NonZero to <4 x i8> 2420 // 2421 // %NonZero can have 2 non-zero i16 elements, but isKnownNonZero on a 2422 // <4 x i8> requires that all 4 i8 elements be non-zero which isn't 2423 // guranteed (imagine just sign bit set in the 2 i16 elements). 2424 // 2425 // 2) Unsafe: 2426 // bitcast <4 x i3> %NonZero to <3 x i4> 2427 // 2428 // Even though the scalar size of the src (`i3`) is smaller than the 2429 // scalar size of the dst `i4`, because `i3` is not a multiple of `i4` 2430 // its possible for the `3 x i4` elements to be zero because there are 2431 // some elements in the destination that don't contain any full src 2432 // element. 2433 // 2434 // 3) Safe: 2435 // bitcast <4 x i8> %NonZero to <2 x i16> 2436 // 2437 // This is always safe as non-zero in the 4 i8 elements implies 2438 // non-zero in the combination of any two adjacent ones. Since i8 is a 2439 // multiple of i16, each i16 is guranteed to have 2 full i8 elements. 2440 // This all implies the 2 i16 elements are non-zero. 2441 Type *FromTy = I->getOperand(0)->getType(); 2442 if ((FromTy->isIntOrIntVectorTy() || FromTy->isPtrOrPtrVectorTy()) && 2443 (BitWidth % getBitWidth(FromTy->getScalarType(), Q.DL)) == 0) 2444 return isKnownNonZero(I->getOperand(0), Depth, Q); 2445 } break; 2446 case Instruction::IntToPtr: 2447 // Note that we have to take special care to avoid looking through 2448 // truncating casts, e.g., int2ptr/ptr2int with appropriate sizes, as well 2449 // as casts that can alter the value, e.g., AddrSpaceCasts. 2450 if (!isa<ScalableVectorType>(I->getType()) && 2451 Q.DL.getTypeSizeInBits(I->getOperand(0)->getType()).getFixedValue() <= 2452 Q.DL.getTypeSizeInBits(I->getType()).getFixedValue()) 2453 return isKnownNonZero(I->getOperand(0), Depth, Q); 2454 break; 2455 case Instruction::PtrToInt: 2456 // Similar to int2ptr above, we can look through ptr2int here if the cast 2457 // is a no-op or an extend and not a truncate. 2458 if (!isa<ScalableVectorType>(I->getType()) && 2459 Q.DL.getTypeSizeInBits(I->getOperand(0)->getType()).getFixedValue() <= 2460 Q.DL.getTypeSizeInBits(I->getType()).getFixedValue()) 2461 return isKnownNonZero(I->getOperand(0), Depth, Q); 2462 break; 2463 case Instruction::Sub: 2464 return isNonZeroSub(DemandedElts, Depth, Q, BitWidth, I->getOperand(0), 2465 I->getOperand(1)); 2466 case Instruction::Or: 2467 // X | Y != 0 if X != 0 or Y != 0. 2468 return isKnownNonZero(I->getOperand(1), DemandedElts, Depth, Q) || 2469 isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q); 2470 case Instruction::SExt: 2471 case Instruction::ZExt: 2472 // ext X != 0 if X != 0. 2473 return isKnownNonZero(I->getOperand(0), Depth, Q); 2474 2475 case Instruction::Shl: { 2476 // shl nsw/nuw can't remove any non-zero bits. 2477 const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(I); 2478 if (Q.IIQ.hasNoUnsignedWrap(BO) || Q.IIQ.hasNoSignedWrap(BO)) 2479 return isKnownNonZero(I->getOperand(0), Depth, Q); 2480 2481 // shl X, Y != 0 if X is odd. Note that the value of the shift is undefined 2482 // if the lowest bit is shifted off the end. 2483 KnownBits Known(BitWidth); 2484 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth, Q); 2485 if (Known.One[0]) 2486 return true; 2487 2488 return isNonZeroShift(I, DemandedElts, Depth, Q, Known); 2489 } 2490 case Instruction::LShr: 2491 case Instruction::AShr: { 2492 // shr exact can only shift out zero bits. 2493 const PossiblyExactOperator *BO = cast<PossiblyExactOperator>(I); 2494 if (BO->isExact()) 2495 return isKnownNonZero(I->getOperand(0), Depth, Q); 2496 2497 // shr X, Y != 0 if X is negative. Note that the value of the shift is not 2498 // defined if the sign bit is shifted off the end. 2499 KnownBits Known = 2500 computeKnownBits(I->getOperand(0), DemandedElts, Depth, Q); 2501 if (Known.isNegative()) 2502 return true; 2503 2504 return isNonZeroShift(I, DemandedElts, Depth, Q, Known); 2505 } 2506 case Instruction::UDiv: 2507 case Instruction::SDiv: { 2508 // X / Y 2509 // div exact can only produce a zero if the dividend is zero. 2510 if (cast<PossiblyExactOperator>(I)->isExact()) 2511 return isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q); 2512 2513 std::optional<bool> XUgeY; 2514 KnownBits XKnown = 2515 computeKnownBits(I->getOperand(0), DemandedElts, Depth, Q); 2516 // If X is fully unknown we won't be able to figure anything out so don't 2517 // both computing knownbits for Y. 2518 if (XKnown.isUnknown()) 2519 return false; 2520 2521 KnownBits YKnown = 2522 computeKnownBits(I->getOperand(1), DemandedElts, Depth, Q); 2523 if (I->getOpcode() == Instruction::SDiv) { 2524 // For signed division need to compare abs value of the operands. 2525 XKnown = XKnown.abs(/*IntMinIsPoison*/ false); 2526 YKnown = YKnown.abs(/*IntMinIsPoison*/ false); 2527 } 2528 // If X u>= Y then div is non zero (0/0 is UB). 2529 XUgeY = KnownBits::uge(XKnown, YKnown); 2530 // If X is total unknown or X u< Y we won't be able to prove non-zero 2531 // with compute known bits so just return early. 2532 return XUgeY && *XUgeY; 2533 } 2534 case Instruction::Add: { 2535 // X + Y. 2536 2537 // If Add has nuw wrap flag, then if either X or Y is non-zero the result is 2538 // non-zero. 2539 auto *BO = cast<OverflowingBinaryOperator>(I); 2540 if (Q.IIQ.hasNoUnsignedWrap(BO)) 2541 return isKnownNonZero(I->getOperand(1), DemandedElts, Depth, Q) || 2542 isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q); 2543 2544 return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth, I->getOperand(0), 2545 I->getOperand(1), Q.IIQ.hasNoSignedWrap(BO)); 2546 } 2547 case Instruction::Mul: { 2548 // If X and Y are non-zero then so is X * Y as long as the multiplication 2549 // does not overflow. 2550 const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(I); 2551 if (Q.IIQ.hasNoSignedWrap(BO) || Q.IIQ.hasNoUnsignedWrap(BO)) 2552 return isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q) && 2553 isKnownNonZero(I->getOperand(1), DemandedElts, Depth, Q); 2554 2555 // If either X or Y is odd, then if the other is non-zero the result can't 2556 // be zero. 2557 KnownBits XKnown = 2558 computeKnownBits(I->getOperand(0), DemandedElts, Depth, Q); 2559 if (XKnown.One[0]) 2560 return isKnownNonZero(I->getOperand(1), DemandedElts, Depth, Q); 2561 2562 KnownBits YKnown = 2563 computeKnownBits(I->getOperand(1), DemandedElts, Depth, Q); 2564 if (YKnown.One[0]) 2565 return XKnown.isNonZero() || 2566 isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q); 2567 2568 // If there exists any subset of X (sX) and subset of Y (sY) s.t sX * sY is 2569 // non-zero, then X * Y is non-zero. We can find sX and sY by just taking 2570 // the lowest known One of X and Y. If they are non-zero, the result 2571 // must be non-zero. We can check if LSB(X) * LSB(Y) != 0 by doing 2572 // X.CountLeadingZeros + Y.CountLeadingZeros < BitWidth. 2573 return (XKnown.countMaxTrailingZeros() + YKnown.countMaxTrailingZeros()) < 2574 BitWidth; 2575 } 2576 case Instruction::Select: { 2577 // (C ? X : Y) != 0 if X != 0 and Y != 0. 2578 2579 // First check if the arm is non-zero using `isKnownNonZero`. If that fails, 2580 // then see if the select condition implies the arm is non-zero. For example 2581 // (X != 0 ? X : Y), we know the true arm is non-zero as the `X` "return" is 2582 // dominated by `X != 0`. 2583 auto SelectArmIsNonZero = [&](bool IsTrueArm) { 2584 Value *Op; 2585 Op = IsTrueArm ? I->getOperand(1) : I->getOperand(2); 2586 // Op is trivially non-zero. 2587 if (isKnownNonZero(Op, DemandedElts, Depth, Q)) 2588 return true; 2589 2590 // The condition of the select dominates the true/false arm. Check if the 2591 // condition implies that a given arm is non-zero. 2592 Value *X; 2593 CmpInst::Predicate Pred; 2594 if (!match(I->getOperand(0), m_c_ICmp(Pred, m_Specific(Op), m_Value(X)))) 2595 return false; 2596 2597 if (!IsTrueArm) 2598 Pred = ICmpInst::getInversePredicate(Pred); 2599 2600 return cmpExcludesZero(Pred, X); 2601 }; 2602 2603 if (SelectArmIsNonZero(/* IsTrueArm */ true) && 2604 SelectArmIsNonZero(/* IsTrueArm */ false)) 2605 return true; 2606 break; 2607 } 2608 case Instruction::PHI: { 2609 auto *PN = cast<PHINode>(I); 2610 if (Q.IIQ.UseInstrInfo && isNonZeroRecurrence(PN)) 2611 return true; 2612 2613 // Check if all incoming values are non-zero using recursion. 2614 SimplifyQuery RecQ = Q; 2615 unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1); 2616 return llvm::all_of(PN->operands(), [&](const Use &U) { 2617 if (U.get() == PN) 2618 return true; 2619 RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator(); 2620 // Check if the branch on the phi excludes zero. 2621 ICmpInst::Predicate Pred; 2622 Value *X; 2623 BasicBlock *TrueSucc, *FalseSucc; 2624 if (match(RecQ.CxtI, 2625 m_Br(m_c_ICmp(Pred, m_Specific(U.get()), m_Value(X)), 2626 m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) { 2627 // Check for cases of duplicate successors. 2628 if ((TrueSucc == PN->getParent()) != (FalseSucc == PN->getParent())) { 2629 // If we're using the false successor, invert the predicate. 2630 if (FalseSucc == PN->getParent()) 2631 Pred = CmpInst::getInversePredicate(Pred); 2632 if (cmpExcludesZero(Pred, X)) 2633 return true; 2634 } 2635 } 2636 // Finally recurse on the edge and check it directly. 2637 return isKnownNonZero(U.get(), DemandedElts, NewDepth, RecQ); 2638 }); 2639 } 2640 case Instruction::ExtractElement: 2641 if (const auto *EEI = dyn_cast<ExtractElementInst>(I)) { 2642 const Value *Vec = EEI->getVectorOperand(); 2643 const Value *Idx = EEI->getIndexOperand(); 2644 auto *CIdx = dyn_cast<ConstantInt>(Idx); 2645 if (auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType())) { 2646 unsigned NumElts = VecTy->getNumElements(); 2647 APInt DemandedVecElts = APInt::getAllOnes(NumElts); 2648 if (CIdx && CIdx->getValue().ult(NumElts)) 2649 DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue()); 2650 return isKnownNonZero(Vec, DemandedVecElts, Depth, Q); 2651 } 2652 } 2653 break; 2654 case Instruction::Freeze: 2655 return isKnownNonZero(I->getOperand(0), Depth, Q) && 2656 isGuaranteedNotToBePoison(I->getOperand(0), Q.AC, Q.CxtI, Q.DT, 2657 Depth); 2658 case Instruction::Load: { 2659 auto *LI = cast<LoadInst>(I); 2660 // A Load tagged with nonnull or dereferenceable with null pointer undefined 2661 // is never null. 2662 if (auto *PtrT = dyn_cast<PointerType>(I->getType())) 2663 if (Q.IIQ.getMetadata(LI, LLVMContext::MD_nonnull) || 2664 (Q.IIQ.getMetadata(LI, LLVMContext::MD_dereferenceable) && 2665 !NullPointerIsDefined(LI->getFunction(), PtrT->getAddressSpace()))) 2666 return true; 2667 2668 // No need to fall through to computeKnownBits as range metadata is already 2669 // handled in isKnownNonZero. 2670 return false; 2671 } 2672 case Instruction::Call: 2673 case Instruction::Invoke: 2674 if (I->getType()->isPointerTy()) { 2675 const auto *Call = cast<CallBase>(I); 2676 if (Call->isReturnNonNull()) 2677 return true; 2678 if (const auto *RP = getArgumentAliasingToReturnedPointer(Call, true)) 2679 return isKnownNonZero(RP, Depth, Q); 2680 } else if (const Value *RV = cast<CallBase>(I)->getReturnedArgOperand()) { 2681 if (RV->getType() == I->getType() && isKnownNonZero(RV, Depth, Q)) 2682 return true; 2683 } 2684 2685 if (auto *II = dyn_cast<IntrinsicInst>(I)) { 2686 switch (II->getIntrinsicID()) { 2687 case Intrinsic::sshl_sat: 2688 case Intrinsic::ushl_sat: 2689 case Intrinsic::abs: 2690 case Intrinsic::bitreverse: 2691 case Intrinsic::bswap: 2692 case Intrinsic::ctpop: 2693 return isKnownNonZero(II->getArgOperand(0), DemandedElts, Depth, Q); 2694 case Intrinsic::ssub_sat: 2695 return isNonZeroSub(DemandedElts, Depth, Q, BitWidth, 2696 II->getArgOperand(0), II->getArgOperand(1)); 2697 case Intrinsic::sadd_sat: 2698 return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth, 2699 II->getArgOperand(0), II->getArgOperand(1), 2700 /*NSW*/ true); 2701 case Intrinsic::umax: 2702 case Intrinsic::uadd_sat: 2703 return isKnownNonZero(II->getArgOperand(1), DemandedElts, Depth, Q) || 2704 isKnownNonZero(II->getArgOperand(0), DemandedElts, Depth, Q); 2705 case Intrinsic::smin: 2706 case Intrinsic::smax: { 2707 auto KnownOpImpliesNonZero = [&](const KnownBits &K) { 2708 return II->getIntrinsicID() == Intrinsic::smin 2709 ? K.isNegative() 2710 : K.isStrictlyPositive(); 2711 }; 2712 KnownBits XKnown = 2713 computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q); 2714 if (KnownOpImpliesNonZero(XKnown)) 2715 return true; 2716 KnownBits YKnown = 2717 computeKnownBits(II->getArgOperand(1), DemandedElts, Depth, Q); 2718 if (KnownOpImpliesNonZero(YKnown)) 2719 return true; 2720 2721 if (XKnown.isNonZero() && YKnown.isNonZero()) 2722 return true; 2723 } 2724 [[fallthrough]]; 2725 case Intrinsic::umin: 2726 return isKnownNonZero(II->getArgOperand(0), DemandedElts, Depth, Q) && 2727 isKnownNonZero(II->getArgOperand(1), DemandedElts, Depth, Q); 2728 case Intrinsic::cttz: 2729 return computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q) 2730 .Zero[0]; 2731 case Intrinsic::ctlz: 2732 return computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q) 2733 .isNonNegative(); 2734 case Intrinsic::fshr: 2735 case Intrinsic::fshl: 2736 // If Op0 == Op1, this is a rotate. rotate(x, y) != 0 iff x != 0. 2737 if (II->getArgOperand(0) == II->getArgOperand(1)) 2738 return isKnownNonZero(II->getArgOperand(0), DemandedElts, Depth, Q); 2739 break; 2740 case Intrinsic::vscale: 2741 return true; 2742 default: 2743 break; 2744 } 2745 break; 2746 } 2747 2748 return false; 2749 } 2750 2751 KnownBits Known(BitWidth); 2752 computeKnownBits(I, DemandedElts, Known, Depth, Q); 2753 return Known.One != 0; 2754 } 2755 2756 /// Return true if the given value is known to be non-zero when defined. For 2757 /// vectors, return true if every demanded element is known to be non-zero when 2758 /// defined. For pointers, if the context instruction and dominator tree are 2759 /// specified, perform context-sensitive analysis and return true if the 2760 /// pointer couldn't possibly be null at the specified instruction. 2761 /// Supports values with integer or pointer type and vectors of integers. 2762 bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth, 2763 const SimplifyQuery &Q) { 2764 2765 #ifndef NDEBUG 2766 Type *Ty = V->getType(); 2767 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); 2768 2769 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) { 2770 assert( 2771 FVTy->getNumElements() == DemandedElts.getBitWidth() && 2772 "DemandedElt width should equal the fixed vector number of elements"); 2773 } else { 2774 assert(DemandedElts == APInt(1, 1) && 2775 "DemandedElt width should be 1 for scalars"); 2776 } 2777 #endif 2778 2779 if (auto *C = dyn_cast<Constant>(V)) { 2780 if (C->isNullValue()) 2781 return false; 2782 if (isa<ConstantInt>(C)) 2783 // Must be non-zero due to null test above. 2784 return true; 2785 2786 // For constant vectors, check that all elements are undefined or known 2787 // non-zero to determine that the whole vector is known non-zero. 2788 if (auto *VecTy = dyn_cast<FixedVectorType>(C->getType())) { 2789 for (unsigned i = 0, e = VecTy->getNumElements(); i != e; ++i) { 2790 if (!DemandedElts[i]) 2791 continue; 2792 Constant *Elt = C->getAggregateElement(i); 2793 if (!Elt || Elt->isNullValue()) 2794 return false; 2795 if (!isa<UndefValue>(Elt) && !isa<ConstantInt>(Elt)) 2796 return false; 2797 } 2798 return true; 2799 } 2800 2801 // A global variable in address space 0 is non null unless extern weak 2802 // or an absolute symbol reference. Other address spaces may have null as a 2803 // valid address for a global, so we can't assume anything. 2804 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 2805 if (!GV->isAbsoluteSymbolRef() && !GV->hasExternalWeakLinkage() && 2806 GV->getType()->getAddressSpace() == 0) 2807 return true; 2808 } 2809 2810 // For constant expressions, fall through to the Operator code below. 2811 if (!isa<ConstantExpr>(V)) 2812 return false; 2813 } 2814 2815 if (auto *I = dyn_cast<Instruction>(V)) { 2816 if (MDNode *Ranges = Q.IIQ.getMetadata(I, LLVMContext::MD_range)) { 2817 // If the possible ranges don't contain zero, then the value is 2818 // definitely non-zero. 2819 if (auto *Ty = dyn_cast<IntegerType>(V->getType())) { 2820 const APInt ZeroValue(Ty->getBitWidth(), 0); 2821 if (rangeMetadataExcludesValue(Ranges, ZeroValue)) 2822 return true; 2823 } 2824 } 2825 } 2826 2827 if (!isa<Constant>(V) && isKnownNonZeroFromAssume(V, Q)) 2828 return true; 2829 2830 // Some of the tests below are recursive, so bail out if we hit the limit. 2831 if (Depth++ >= MaxAnalysisRecursionDepth) 2832 return false; 2833 2834 // Check for pointer simplifications. 2835 2836 if (PointerType *PtrTy = dyn_cast<PointerType>(V->getType())) { 2837 // A byval, inalloca may not be null in a non-default addres space. A 2838 // nonnull argument is assumed never 0. 2839 if (const Argument *A = dyn_cast<Argument>(V)) { 2840 if (((A->hasPassPointeeByValueCopyAttr() && 2841 !NullPointerIsDefined(A->getParent(), PtrTy->getAddressSpace())) || 2842 A->hasNonNullAttr())) 2843 return true; 2844 } 2845 } 2846 2847 if (const auto *I = dyn_cast<Operator>(V)) 2848 if (isKnownNonZeroFromOperator(I, DemandedElts, Depth, Q)) 2849 return true; 2850 2851 if (!isa<Constant>(V) && 2852 isKnownNonNullFromDominatingCondition(V, Q.CxtI, Q.DT)) 2853 return true; 2854 2855 return false; 2856 } 2857 2858 bool isKnownNonZero(const Value *V, unsigned Depth, const SimplifyQuery &Q) { 2859 auto *FVTy = dyn_cast<FixedVectorType>(V->getType()); 2860 APInt DemandedElts = 2861 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1); 2862 return isKnownNonZero(V, DemandedElts, Depth, Q); 2863 } 2864 2865 /// If the pair of operators are the same invertible function, return the 2866 /// the operands of the function corresponding to each input. Otherwise, 2867 /// return std::nullopt. An invertible function is one that is 1-to-1 and maps 2868 /// every input value to exactly one output value. This is equivalent to 2869 /// saying that Op1 and Op2 are equal exactly when the specified pair of 2870 /// operands are equal, (except that Op1 and Op2 may be poison more often.) 2871 static std::optional<std::pair<Value*, Value*>> 2872 getInvertibleOperands(const Operator *Op1, 2873 const Operator *Op2) { 2874 if (Op1->getOpcode() != Op2->getOpcode()) 2875 return std::nullopt; 2876 2877 auto getOperands = [&](unsigned OpNum) -> auto { 2878 return std::make_pair(Op1->getOperand(OpNum), Op2->getOperand(OpNum)); 2879 }; 2880 2881 switch (Op1->getOpcode()) { 2882 default: 2883 break; 2884 case Instruction::Add: 2885 case Instruction::Sub: 2886 if (Op1->getOperand(0) == Op2->getOperand(0)) 2887 return getOperands(1); 2888 if (Op1->getOperand(1) == Op2->getOperand(1)) 2889 return getOperands(0); 2890 break; 2891 case Instruction::Mul: { 2892 // invertible if A * B == (A * B) mod 2^N where A, and B are integers 2893 // and N is the bitwdith. The nsw case is non-obvious, but proven by 2894 // alive2: https://alive2.llvm.org/ce/z/Z6D5qK 2895 auto *OBO1 = cast<OverflowingBinaryOperator>(Op1); 2896 auto *OBO2 = cast<OverflowingBinaryOperator>(Op2); 2897 if ((!OBO1->hasNoUnsignedWrap() || !OBO2->hasNoUnsignedWrap()) && 2898 (!OBO1->hasNoSignedWrap() || !OBO2->hasNoSignedWrap())) 2899 break; 2900 2901 // Assume operand order has been canonicalized 2902 if (Op1->getOperand(1) == Op2->getOperand(1) && 2903 isa<ConstantInt>(Op1->getOperand(1)) && 2904 !cast<ConstantInt>(Op1->getOperand(1))->isZero()) 2905 return getOperands(0); 2906 break; 2907 } 2908 case Instruction::Shl: { 2909 // Same as multiplies, with the difference that we don't need to check 2910 // for a non-zero multiply. Shifts always multiply by non-zero. 2911 auto *OBO1 = cast<OverflowingBinaryOperator>(Op1); 2912 auto *OBO2 = cast<OverflowingBinaryOperator>(Op2); 2913 if ((!OBO1->hasNoUnsignedWrap() || !OBO2->hasNoUnsignedWrap()) && 2914 (!OBO1->hasNoSignedWrap() || !OBO2->hasNoSignedWrap())) 2915 break; 2916 2917 if (Op1->getOperand(1) == Op2->getOperand(1)) 2918 return getOperands(0); 2919 break; 2920 } 2921 case Instruction::AShr: 2922 case Instruction::LShr: { 2923 auto *PEO1 = cast<PossiblyExactOperator>(Op1); 2924 auto *PEO2 = cast<PossiblyExactOperator>(Op2); 2925 if (!PEO1->isExact() || !PEO2->isExact()) 2926 break; 2927 2928 if (Op1->getOperand(1) == Op2->getOperand(1)) 2929 return getOperands(0); 2930 break; 2931 } 2932 case Instruction::SExt: 2933 case Instruction::ZExt: 2934 if (Op1->getOperand(0)->getType() == Op2->getOperand(0)->getType()) 2935 return getOperands(0); 2936 break; 2937 case Instruction::PHI: { 2938 const PHINode *PN1 = cast<PHINode>(Op1); 2939 const PHINode *PN2 = cast<PHINode>(Op2); 2940 2941 // If PN1 and PN2 are both recurrences, can we prove the entire recurrences 2942 // are a single invertible function of the start values? Note that repeated 2943 // application of an invertible function is also invertible 2944 BinaryOperator *BO1 = nullptr; 2945 Value *Start1 = nullptr, *Step1 = nullptr; 2946 BinaryOperator *BO2 = nullptr; 2947 Value *Start2 = nullptr, *Step2 = nullptr; 2948 if (PN1->getParent() != PN2->getParent() || 2949 !matchSimpleRecurrence(PN1, BO1, Start1, Step1) || 2950 !matchSimpleRecurrence(PN2, BO2, Start2, Step2)) 2951 break; 2952 2953 auto Values = getInvertibleOperands(cast<Operator>(BO1), 2954 cast<Operator>(BO2)); 2955 if (!Values) 2956 break; 2957 2958 // We have to be careful of mutually defined recurrences here. Ex: 2959 // * X_i = X_(i-1) OP Y_(i-1), and Y_i = X_(i-1) OP V 2960 // * X_i = Y_i = X_(i-1) OP Y_(i-1) 2961 // The invertibility of these is complicated, and not worth reasoning 2962 // about (yet?). 2963 if (Values->first != PN1 || Values->second != PN2) 2964 break; 2965 2966 return std::make_pair(Start1, Start2); 2967 } 2968 } 2969 return std::nullopt; 2970 } 2971 2972 /// Return true if V2 == V1 + X, where X is known non-zero. 2973 static bool isAddOfNonZero(const Value *V1, const Value *V2, unsigned Depth, 2974 const SimplifyQuery &Q) { 2975 const BinaryOperator *BO = dyn_cast<BinaryOperator>(V1); 2976 if (!BO || BO->getOpcode() != Instruction::Add) 2977 return false; 2978 Value *Op = nullptr; 2979 if (V2 == BO->getOperand(0)) 2980 Op = BO->getOperand(1); 2981 else if (V2 == BO->getOperand(1)) 2982 Op = BO->getOperand(0); 2983 else 2984 return false; 2985 return isKnownNonZero(Op, Depth + 1, Q); 2986 } 2987 2988 /// Return true if V2 == V1 * C, where V1 is known non-zero, C is not 0/1 and 2989 /// the multiplication is nuw or nsw. 2990 static bool isNonEqualMul(const Value *V1, const Value *V2, unsigned Depth, 2991 const SimplifyQuery &Q) { 2992 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(V2)) { 2993 const APInt *C; 2994 return match(OBO, m_Mul(m_Specific(V1), m_APInt(C))) && 2995 (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) && 2996 !C->isZero() && !C->isOne() && isKnownNonZero(V1, Depth + 1, Q); 2997 } 2998 return false; 2999 } 3000 3001 /// Return true if V2 == V1 << C, where V1 is known non-zero, C is not 0 and 3002 /// the shift is nuw or nsw. 3003 static bool isNonEqualShl(const Value *V1, const Value *V2, unsigned Depth, 3004 const SimplifyQuery &Q) { 3005 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(V2)) { 3006 const APInt *C; 3007 return match(OBO, m_Shl(m_Specific(V1), m_APInt(C))) && 3008 (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) && 3009 !C->isZero() && isKnownNonZero(V1, Depth + 1, Q); 3010 } 3011 return false; 3012 } 3013 3014 static bool isNonEqualPHIs(const PHINode *PN1, const PHINode *PN2, 3015 unsigned Depth, const SimplifyQuery &Q) { 3016 // Check two PHIs are in same block. 3017 if (PN1->getParent() != PN2->getParent()) 3018 return false; 3019 3020 SmallPtrSet<const BasicBlock *, 8> VisitedBBs; 3021 bool UsedFullRecursion = false; 3022 for (const BasicBlock *IncomBB : PN1->blocks()) { 3023 if (!VisitedBBs.insert(IncomBB).second) 3024 continue; // Don't reprocess blocks that we have dealt with already. 3025 const Value *IV1 = PN1->getIncomingValueForBlock(IncomBB); 3026 const Value *IV2 = PN2->getIncomingValueForBlock(IncomBB); 3027 const APInt *C1, *C2; 3028 if (match(IV1, m_APInt(C1)) && match(IV2, m_APInt(C2)) && *C1 != *C2) 3029 continue; 3030 3031 // Only one pair of phi operands is allowed for full recursion. 3032 if (UsedFullRecursion) 3033 return false; 3034 3035 SimplifyQuery RecQ = Q; 3036 RecQ.CxtI = IncomBB->getTerminator(); 3037 if (!isKnownNonEqual(IV1, IV2, Depth + 1, RecQ)) 3038 return false; 3039 UsedFullRecursion = true; 3040 } 3041 return true; 3042 } 3043 3044 static bool isNonEqualSelect(const Value *V1, const Value *V2, unsigned Depth, 3045 const SimplifyQuery &Q) { 3046 const SelectInst *SI1 = dyn_cast<SelectInst>(V1); 3047 if (!SI1) 3048 return false; 3049 3050 if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2)) { 3051 const Value *Cond1 = SI1->getCondition(); 3052 const Value *Cond2 = SI2->getCondition(); 3053 if (Cond1 == Cond2) 3054 return isKnownNonEqual(SI1->getTrueValue(), SI2->getTrueValue(), 3055 Depth + 1, Q) && 3056 isKnownNonEqual(SI1->getFalseValue(), SI2->getFalseValue(), 3057 Depth + 1, Q); 3058 } 3059 return isKnownNonEqual(SI1->getTrueValue(), V2, Depth + 1, Q) && 3060 isKnownNonEqual(SI1->getFalseValue(), V2, Depth + 1, Q); 3061 } 3062 3063 // Check to see if A is both a GEP and is the incoming value for a PHI in the 3064 // loop, and B is either a ptr or another GEP. If the PHI has 2 incoming values, 3065 // one of them being the recursive GEP A and the other a ptr at same base and at 3066 // the same/higher offset than B we are only incrementing the pointer further in 3067 // loop if offset of recursive GEP is greater than 0. 3068 static bool isNonEqualPointersWithRecursiveGEP(const Value *A, const Value *B, 3069 const SimplifyQuery &Q) { 3070 if (!A->getType()->isPointerTy() || !B->getType()->isPointerTy()) 3071 return false; 3072 3073 auto *GEPA = dyn_cast<GEPOperator>(A); 3074 if (!GEPA || GEPA->getNumIndices() != 1 || !isa<Constant>(GEPA->idx_begin())) 3075 return false; 3076 3077 // Handle 2 incoming PHI values with one being a recursive GEP. 3078 auto *PN = dyn_cast<PHINode>(GEPA->getPointerOperand()); 3079 if (!PN || PN->getNumIncomingValues() != 2) 3080 return false; 3081 3082 // Search for the recursive GEP as an incoming operand, and record that as 3083 // Step. 3084 Value *Start = nullptr; 3085 Value *Step = const_cast<Value *>(A); 3086 if (PN->getIncomingValue(0) == Step) 3087 Start = PN->getIncomingValue(1); 3088 else if (PN->getIncomingValue(1) == Step) 3089 Start = PN->getIncomingValue(0); 3090 else 3091 return false; 3092 3093 // Other incoming node base should match the B base. 3094 // StartOffset >= OffsetB && StepOffset > 0? 3095 // StartOffset <= OffsetB && StepOffset < 0? 3096 // Is non-equal if above are true. 3097 // We use stripAndAccumulateInBoundsConstantOffsets to restrict the 3098 // optimisation to inbounds GEPs only. 3099 unsigned IndexWidth = Q.DL.getIndexTypeSizeInBits(Start->getType()); 3100 APInt StartOffset(IndexWidth, 0); 3101 Start = Start->stripAndAccumulateInBoundsConstantOffsets(Q.DL, StartOffset); 3102 APInt StepOffset(IndexWidth, 0); 3103 Step = Step->stripAndAccumulateInBoundsConstantOffsets(Q.DL, StepOffset); 3104 3105 // Check if Base Pointer of Step matches the PHI. 3106 if (Step != PN) 3107 return false; 3108 APInt OffsetB(IndexWidth, 0); 3109 B = B->stripAndAccumulateInBoundsConstantOffsets(Q.DL, OffsetB); 3110 return Start == B && 3111 ((StartOffset.sge(OffsetB) && StepOffset.isStrictlyPositive()) || 3112 (StartOffset.sle(OffsetB) && StepOffset.isNegative())); 3113 } 3114 3115 /// Return true if it is known that V1 != V2. 3116 static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth, 3117 const SimplifyQuery &Q) { 3118 if (V1 == V2) 3119 return false; 3120 if (V1->getType() != V2->getType()) 3121 // We can't look through casts yet. 3122 return false; 3123 3124 if (Depth >= MaxAnalysisRecursionDepth) 3125 return false; 3126 3127 // See if we can recurse through (exactly one of) our operands. This 3128 // requires our operation be 1-to-1 and map every input value to exactly 3129 // one output value. Such an operation is invertible. 3130 auto *O1 = dyn_cast<Operator>(V1); 3131 auto *O2 = dyn_cast<Operator>(V2); 3132 if (O1 && O2 && O1->getOpcode() == O2->getOpcode()) { 3133 if (auto Values = getInvertibleOperands(O1, O2)) 3134 return isKnownNonEqual(Values->first, Values->second, Depth + 1, Q); 3135 3136 if (const PHINode *PN1 = dyn_cast<PHINode>(V1)) { 3137 const PHINode *PN2 = cast<PHINode>(V2); 3138 // FIXME: This is missing a generalization to handle the case where one is 3139 // a PHI and another one isn't. 3140 if (isNonEqualPHIs(PN1, PN2, Depth, Q)) 3141 return true; 3142 }; 3143 } 3144 3145 if (isAddOfNonZero(V1, V2, Depth, Q) || isAddOfNonZero(V2, V1, Depth, Q)) 3146 return true; 3147 3148 if (isNonEqualMul(V1, V2, Depth, Q) || isNonEqualMul(V2, V1, Depth, Q)) 3149 return true; 3150 3151 if (isNonEqualShl(V1, V2, Depth, Q) || isNonEqualShl(V2, V1, Depth, Q)) 3152 return true; 3153 3154 if (V1->getType()->isIntOrIntVectorTy()) { 3155 // Are any known bits in V1 contradictory to known bits in V2? If V1 3156 // has a known zero where V2 has a known one, they must not be equal. 3157 KnownBits Known1 = computeKnownBits(V1, Depth, Q); 3158 if (!Known1.isUnknown()) { 3159 KnownBits Known2 = computeKnownBits(V2, Depth, Q); 3160 if (Known1.Zero.intersects(Known2.One) || 3161 Known2.Zero.intersects(Known1.One)) 3162 return true; 3163 } 3164 } 3165 3166 if (isNonEqualSelect(V1, V2, Depth, Q) || isNonEqualSelect(V2, V1, Depth, Q)) 3167 return true; 3168 3169 if (isNonEqualPointersWithRecursiveGEP(V1, V2, Q) || 3170 isNonEqualPointersWithRecursiveGEP(V2, V1, Q)) 3171 return true; 3172 3173 Value *A, *B; 3174 // PtrToInts are NonEqual if their Ptrs are NonEqual. 3175 // Check PtrToInt type matches the pointer size. 3176 if (match(V1, m_PtrToIntSameSize(Q.DL, m_Value(A))) && 3177 match(V2, m_PtrToIntSameSize(Q.DL, m_Value(B)))) 3178 return isKnownNonEqual(A, B, Depth + 1, Q); 3179 3180 return false; 3181 } 3182 3183 // Match a signed min+max clamp pattern like smax(smin(In, CHigh), CLow). 3184 // Returns the input and lower/upper bounds. 3185 static bool isSignedMinMaxClamp(const Value *Select, const Value *&In, 3186 const APInt *&CLow, const APInt *&CHigh) { 3187 assert(isa<Operator>(Select) && 3188 cast<Operator>(Select)->getOpcode() == Instruction::Select && 3189 "Input should be a Select!"); 3190 3191 const Value *LHS = nullptr, *RHS = nullptr; 3192 SelectPatternFlavor SPF = matchSelectPattern(Select, LHS, RHS).Flavor; 3193 if (SPF != SPF_SMAX && SPF != SPF_SMIN) 3194 return false; 3195 3196 if (!match(RHS, m_APInt(CLow))) 3197 return false; 3198 3199 const Value *LHS2 = nullptr, *RHS2 = nullptr; 3200 SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2).Flavor; 3201 if (getInverseMinMaxFlavor(SPF) != SPF2) 3202 return false; 3203 3204 if (!match(RHS2, m_APInt(CHigh))) 3205 return false; 3206 3207 if (SPF == SPF_SMIN) 3208 std::swap(CLow, CHigh); 3209 3210 In = LHS2; 3211 return CLow->sle(*CHigh); 3212 } 3213 3214 static bool isSignedMinMaxIntrinsicClamp(const IntrinsicInst *II, 3215 const APInt *&CLow, 3216 const APInt *&CHigh) { 3217 assert((II->getIntrinsicID() == Intrinsic::smin || 3218 II->getIntrinsicID() == Intrinsic::smax) && "Must be smin/smax"); 3219 3220 Intrinsic::ID InverseID = getInverseMinMaxIntrinsic(II->getIntrinsicID()); 3221 auto *InnerII = dyn_cast<IntrinsicInst>(II->getArgOperand(0)); 3222 if (!InnerII || InnerII->getIntrinsicID() != InverseID || 3223 !match(II->getArgOperand(1), m_APInt(CLow)) || 3224 !match(InnerII->getArgOperand(1), m_APInt(CHigh))) 3225 return false; 3226 3227 if (II->getIntrinsicID() == Intrinsic::smin) 3228 std::swap(CLow, CHigh); 3229 return CLow->sle(*CHigh); 3230 } 3231 3232 /// For vector constants, loop over the elements and find the constant with the 3233 /// minimum number of sign bits. Return 0 if the value is not a vector constant 3234 /// or if any element was not analyzed; otherwise, return the count for the 3235 /// element with the minimum number of sign bits. 3236 static unsigned computeNumSignBitsVectorConstant(const Value *V, 3237 const APInt &DemandedElts, 3238 unsigned TyBits) { 3239 const auto *CV = dyn_cast<Constant>(V); 3240 if (!CV || !isa<FixedVectorType>(CV->getType())) 3241 return 0; 3242 3243 unsigned MinSignBits = TyBits; 3244 unsigned NumElts = cast<FixedVectorType>(CV->getType())->getNumElements(); 3245 for (unsigned i = 0; i != NumElts; ++i) { 3246 if (!DemandedElts[i]) 3247 continue; 3248 // If we find a non-ConstantInt, bail out. 3249 auto *Elt = dyn_cast_or_null<ConstantInt>(CV->getAggregateElement(i)); 3250 if (!Elt) 3251 return 0; 3252 3253 MinSignBits = std::min(MinSignBits, Elt->getValue().getNumSignBits()); 3254 } 3255 3256 return MinSignBits; 3257 } 3258 3259 static unsigned ComputeNumSignBitsImpl(const Value *V, 3260 const APInt &DemandedElts, 3261 unsigned Depth, const SimplifyQuery &Q); 3262 3263 static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts, 3264 unsigned Depth, const SimplifyQuery &Q) { 3265 unsigned Result = ComputeNumSignBitsImpl(V, DemandedElts, Depth, Q); 3266 assert(Result > 0 && "At least one sign bit needs to be present!"); 3267 return Result; 3268 } 3269 3270 /// Return the number of times the sign bit of the register is replicated into 3271 /// the other bits. We know that at least 1 bit is always equal to the sign bit 3272 /// (itself), but other cases can give us information. For example, immediately 3273 /// after an "ashr X, 2", we know that the top 3 bits are all equal to each 3274 /// other, so we return 3. For vectors, return the number of sign bits for the 3275 /// vector element with the minimum number of known sign bits of the demanded 3276 /// elements in the vector specified by DemandedElts. 3277 static unsigned ComputeNumSignBitsImpl(const Value *V, 3278 const APInt &DemandedElts, 3279 unsigned Depth, const SimplifyQuery &Q) { 3280 Type *Ty = V->getType(); 3281 #ifndef NDEBUG 3282 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); 3283 3284 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) { 3285 assert( 3286 FVTy->getNumElements() == DemandedElts.getBitWidth() && 3287 "DemandedElt width should equal the fixed vector number of elements"); 3288 } else { 3289 assert(DemandedElts == APInt(1, 1) && 3290 "DemandedElt width should be 1 for scalars"); 3291 } 3292 #endif 3293 3294 // We return the minimum number of sign bits that are guaranteed to be present 3295 // in V, so for undef we have to conservatively return 1. We don't have the 3296 // same behavior for poison though -- that's a FIXME today. 3297 3298 Type *ScalarTy = Ty->getScalarType(); 3299 unsigned TyBits = ScalarTy->isPointerTy() ? 3300 Q.DL.getPointerTypeSizeInBits(ScalarTy) : 3301 Q.DL.getTypeSizeInBits(ScalarTy); 3302 3303 unsigned Tmp, Tmp2; 3304 unsigned FirstAnswer = 1; 3305 3306 // Note that ConstantInt is handled by the general computeKnownBits case 3307 // below. 3308 3309 if (Depth == MaxAnalysisRecursionDepth) 3310 return 1; 3311 3312 if (auto *U = dyn_cast<Operator>(V)) { 3313 switch (Operator::getOpcode(V)) { 3314 default: break; 3315 case Instruction::SExt: 3316 Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits(); 3317 return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q) + Tmp; 3318 3319 case Instruction::SDiv: { 3320 const APInt *Denominator; 3321 // sdiv X, C -> adds log(C) sign bits. 3322 if (match(U->getOperand(1), m_APInt(Denominator))) { 3323 3324 // Ignore non-positive denominator. 3325 if (!Denominator->isStrictlyPositive()) 3326 break; 3327 3328 // Calculate the incoming numerator bits. 3329 unsigned NumBits = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3330 3331 // Add floor(log(C)) bits to the numerator bits. 3332 return std::min(TyBits, NumBits + Denominator->logBase2()); 3333 } 3334 break; 3335 } 3336 3337 case Instruction::SRem: { 3338 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3339 3340 const APInt *Denominator; 3341 // srem X, C -> we know that the result is within [-C+1,C) when C is a 3342 // positive constant. This let us put a lower bound on the number of sign 3343 // bits. 3344 if (match(U->getOperand(1), m_APInt(Denominator))) { 3345 3346 // Ignore non-positive denominator. 3347 if (Denominator->isStrictlyPositive()) { 3348 // Calculate the leading sign bit constraints by examining the 3349 // denominator. Given that the denominator is positive, there are two 3350 // cases: 3351 // 3352 // 1. The numerator is positive. The result range is [0,C) and 3353 // [0,C) u< (1 << ceilLogBase2(C)). 3354 // 3355 // 2. The numerator is negative. Then the result range is (-C,0] and 3356 // integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)). 3357 // 3358 // Thus a lower bound on the number of sign bits is `TyBits - 3359 // ceilLogBase2(C)`. 3360 3361 unsigned ResBits = TyBits - Denominator->ceilLogBase2(); 3362 Tmp = std::max(Tmp, ResBits); 3363 } 3364 } 3365 return Tmp; 3366 } 3367 3368 case Instruction::AShr: { 3369 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3370 // ashr X, C -> adds C sign bits. Vectors too. 3371 const APInt *ShAmt; 3372 if (match(U->getOperand(1), m_APInt(ShAmt))) { 3373 if (ShAmt->uge(TyBits)) 3374 break; // Bad shift. 3375 unsigned ShAmtLimited = ShAmt->getZExtValue(); 3376 Tmp += ShAmtLimited; 3377 if (Tmp > TyBits) Tmp = TyBits; 3378 } 3379 return Tmp; 3380 } 3381 case Instruction::Shl: { 3382 const APInt *ShAmt; 3383 if (match(U->getOperand(1), m_APInt(ShAmt))) { 3384 // shl destroys sign bits. 3385 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3386 if (ShAmt->uge(TyBits) || // Bad shift. 3387 ShAmt->uge(Tmp)) break; // Shifted all sign bits out. 3388 Tmp2 = ShAmt->getZExtValue(); 3389 return Tmp - Tmp2; 3390 } 3391 break; 3392 } 3393 case Instruction::And: 3394 case Instruction::Or: 3395 case Instruction::Xor: // NOT is handled here. 3396 // Logical binary ops preserve the number of sign bits at the worst. 3397 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3398 if (Tmp != 1) { 3399 Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); 3400 FirstAnswer = std::min(Tmp, Tmp2); 3401 // We computed what we know about the sign bits as our first 3402 // answer. Now proceed to the generic code that uses 3403 // computeKnownBits, and pick whichever answer is better. 3404 } 3405 break; 3406 3407 case Instruction::Select: { 3408 // If we have a clamp pattern, we know that the number of sign bits will 3409 // be the minimum of the clamp min/max range. 3410 const Value *X; 3411 const APInt *CLow, *CHigh; 3412 if (isSignedMinMaxClamp(U, X, CLow, CHigh)) 3413 return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits()); 3414 3415 Tmp = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); 3416 if (Tmp == 1) break; 3417 Tmp2 = ComputeNumSignBits(U->getOperand(2), Depth + 1, Q); 3418 return std::min(Tmp, Tmp2); 3419 } 3420 3421 case Instruction::Add: 3422 // Add can have at most one carry bit. Thus we know that the output 3423 // is, at worst, one more bit than the inputs. 3424 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3425 if (Tmp == 1) break; 3426 3427 // Special case decrementing a value (ADD X, -1): 3428 if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1))) 3429 if (CRHS->isAllOnesValue()) { 3430 KnownBits Known(TyBits); 3431 computeKnownBits(U->getOperand(0), Known, Depth + 1, Q); 3432 3433 // If the input is known to be 0 or 1, the output is 0/-1, which is 3434 // all sign bits set. 3435 if ((Known.Zero | 1).isAllOnes()) 3436 return TyBits; 3437 3438 // If we are subtracting one from a positive number, there is no carry 3439 // out of the result. 3440 if (Known.isNonNegative()) 3441 return Tmp; 3442 } 3443 3444 Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); 3445 if (Tmp2 == 1) break; 3446 return std::min(Tmp, Tmp2) - 1; 3447 3448 case Instruction::Sub: 3449 Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); 3450 if (Tmp2 == 1) break; 3451 3452 // Handle NEG. 3453 if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0))) 3454 if (CLHS->isNullValue()) { 3455 KnownBits Known(TyBits); 3456 computeKnownBits(U->getOperand(1), Known, Depth + 1, Q); 3457 // If the input is known to be 0 or 1, the output is 0/-1, which is 3458 // all sign bits set. 3459 if ((Known.Zero | 1).isAllOnes()) 3460 return TyBits; 3461 3462 // If the input is known to be positive (the sign bit is known clear), 3463 // the output of the NEG has the same number of sign bits as the 3464 // input. 3465 if (Known.isNonNegative()) 3466 return Tmp2; 3467 3468 // Otherwise, we treat this like a SUB. 3469 } 3470 3471 // Sub can have at most one carry bit. Thus we know that the output 3472 // is, at worst, one more bit than the inputs. 3473 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3474 if (Tmp == 1) break; 3475 return std::min(Tmp, Tmp2) - 1; 3476 3477 case Instruction::Mul: { 3478 // The output of the Mul can be at most twice the valid bits in the 3479 // inputs. 3480 unsigned SignBitsOp0 = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3481 if (SignBitsOp0 == 1) break; 3482 unsigned SignBitsOp1 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); 3483 if (SignBitsOp1 == 1) break; 3484 unsigned OutValidBits = 3485 (TyBits - SignBitsOp0 + 1) + (TyBits - SignBitsOp1 + 1); 3486 return OutValidBits > TyBits ? 1 : TyBits - OutValidBits + 1; 3487 } 3488 3489 case Instruction::PHI: { 3490 const PHINode *PN = cast<PHINode>(U); 3491 unsigned NumIncomingValues = PN->getNumIncomingValues(); 3492 // Don't analyze large in-degree PHIs. 3493 if (NumIncomingValues > 4) break; 3494 // Unreachable blocks may have zero-operand PHI nodes. 3495 if (NumIncomingValues == 0) break; 3496 3497 // Take the minimum of all incoming values. This can't infinitely loop 3498 // because of our depth threshold. 3499 SimplifyQuery RecQ = Q; 3500 Tmp = TyBits; 3501 for (unsigned i = 0, e = NumIncomingValues; i != e; ++i) { 3502 if (Tmp == 1) return Tmp; 3503 RecQ.CxtI = PN->getIncomingBlock(i)->getTerminator(); 3504 Tmp = std::min( 3505 Tmp, ComputeNumSignBits(PN->getIncomingValue(i), Depth + 1, RecQ)); 3506 } 3507 return Tmp; 3508 } 3509 3510 case Instruction::Trunc: { 3511 // If the input contained enough sign bits that some remain after the 3512 // truncation, then we can make use of that. Otherwise we don't know 3513 // anything. 3514 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3515 unsigned OperandTyBits = U->getOperand(0)->getType()->getScalarSizeInBits(); 3516 if (Tmp > (OperandTyBits - TyBits)) 3517 return Tmp - (OperandTyBits - TyBits); 3518 3519 return 1; 3520 } 3521 3522 case Instruction::ExtractElement: 3523 // Look through extract element. At the moment we keep this simple and 3524 // skip tracking the specific element. But at least we might find 3525 // information valid for all elements of the vector (for example if vector 3526 // is sign extended, shifted, etc). 3527 return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3528 3529 case Instruction::ShuffleVector: { 3530 // Collect the minimum number of sign bits that are shared by every vector 3531 // element referenced by the shuffle. 3532 auto *Shuf = dyn_cast<ShuffleVectorInst>(U); 3533 if (!Shuf) { 3534 // FIXME: Add support for shufflevector constant expressions. 3535 return 1; 3536 } 3537 APInt DemandedLHS, DemandedRHS; 3538 // For undef elements, we don't know anything about the common state of 3539 // the shuffle result. 3540 if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS)) 3541 return 1; 3542 Tmp = std::numeric_limits<unsigned>::max(); 3543 if (!!DemandedLHS) { 3544 const Value *LHS = Shuf->getOperand(0); 3545 Tmp = ComputeNumSignBits(LHS, DemandedLHS, Depth + 1, Q); 3546 } 3547 // If we don't know anything, early out and try computeKnownBits 3548 // fall-back. 3549 if (Tmp == 1) 3550 break; 3551 if (!!DemandedRHS) { 3552 const Value *RHS = Shuf->getOperand(1); 3553 Tmp2 = ComputeNumSignBits(RHS, DemandedRHS, Depth + 1, Q); 3554 Tmp = std::min(Tmp, Tmp2); 3555 } 3556 // If we don't know anything, early out and try computeKnownBits 3557 // fall-back. 3558 if (Tmp == 1) 3559 break; 3560 assert(Tmp <= TyBits && "Failed to determine minimum sign bits"); 3561 return Tmp; 3562 } 3563 case Instruction::Call: { 3564 if (const auto *II = dyn_cast<IntrinsicInst>(U)) { 3565 switch (II->getIntrinsicID()) { 3566 default: break; 3567 case Intrinsic::abs: 3568 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3569 if (Tmp == 1) break; 3570 3571 // Absolute value reduces number of sign bits by at most 1. 3572 return Tmp - 1; 3573 case Intrinsic::smin: 3574 case Intrinsic::smax: { 3575 const APInt *CLow, *CHigh; 3576 if (isSignedMinMaxIntrinsicClamp(II, CLow, CHigh)) 3577 return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits()); 3578 } 3579 } 3580 } 3581 } 3582 } 3583 } 3584 3585 // Finally, if we can prove that the top bits of the result are 0's or 1's, 3586 // use this information. 3587 3588 // If we can examine all elements of a vector constant successfully, we're 3589 // done (we can't do any better than that). If not, keep trying. 3590 if (unsigned VecSignBits = 3591 computeNumSignBitsVectorConstant(V, DemandedElts, TyBits)) 3592 return VecSignBits; 3593 3594 KnownBits Known(TyBits); 3595 computeKnownBits(V, DemandedElts, Known, Depth, Q); 3596 3597 // If we know that the sign bit is either zero or one, determine the number of 3598 // identical bits in the top of the input value. 3599 return std::max(FirstAnswer, Known.countMinSignBits()); 3600 } 3601 3602 Intrinsic::ID llvm::getIntrinsicForCallSite(const CallBase &CB, 3603 const TargetLibraryInfo *TLI) { 3604 const Function *F = CB.getCalledFunction(); 3605 if (!F) 3606 return Intrinsic::not_intrinsic; 3607 3608 if (F->isIntrinsic()) 3609 return F->getIntrinsicID(); 3610 3611 // We are going to infer semantics of a library function based on mapping it 3612 // to an LLVM intrinsic. Check that the library function is available from 3613 // this callbase and in this environment. 3614 LibFunc Func; 3615 if (F->hasLocalLinkage() || !TLI || !TLI->getLibFunc(CB, Func) || 3616 !CB.onlyReadsMemory()) 3617 return Intrinsic::not_intrinsic; 3618 3619 switch (Func) { 3620 default: 3621 break; 3622 case LibFunc_sin: 3623 case LibFunc_sinf: 3624 case LibFunc_sinl: 3625 return Intrinsic::sin; 3626 case LibFunc_cos: 3627 case LibFunc_cosf: 3628 case LibFunc_cosl: 3629 return Intrinsic::cos; 3630 case LibFunc_exp: 3631 case LibFunc_expf: 3632 case LibFunc_expl: 3633 return Intrinsic::exp; 3634 case LibFunc_exp2: 3635 case LibFunc_exp2f: 3636 case LibFunc_exp2l: 3637 return Intrinsic::exp2; 3638 case LibFunc_log: 3639 case LibFunc_logf: 3640 case LibFunc_logl: 3641 return Intrinsic::log; 3642 case LibFunc_log10: 3643 case LibFunc_log10f: 3644 case LibFunc_log10l: 3645 return Intrinsic::log10; 3646 case LibFunc_log2: 3647 case LibFunc_log2f: 3648 case LibFunc_log2l: 3649 return Intrinsic::log2; 3650 case LibFunc_fabs: 3651 case LibFunc_fabsf: 3652 case LibFunc_fabsl: 3653 return Intrinsic::fabs; 3654 case LibFunc_fmin: 3655 case LibFunc_fminf: 3656 case LibFunc_fminl: 3657 return Intrinsic::minnum; 3658 case LibFunc_fmax: 3659 case LibFunc_fmaxf: 3660 case LibFunc_fmaxl: 3661 return Intrinsic::maxnum; 3662 case LibFunc_copysign: 3663 case LibFunc_copysignf: 3664 case LibFunc_copysignl: 3665 return Intrinsic::copysign; 3666 case LibFunc_floor: 3667 case LibFunc_floorf: 3668 case LibFunc_floorl: 3669 return Intrinsic::floor; 3670 case LibFunc_ceil: 3671 case LibFunc_ceilf: 3672 case LibFunc_ceill: 3673 return Intrinsic::ceil; 3674 case LibFunc_trunc: 3675 case LibFunc_truncf: 3676 case LibFunc_truncl: 3677 return Intrinsic::trunc; 3678 case LibFunc_rint: 3679 case LibFunc_rintf: 3680 case LibFunc_rintl: 3681 return Intrinsic::rint; 3682 case LibFunc_nearbyint: 3683 case LibFunc_nearbyintf: 3684 case LibFunc_nearbyintl: 3685 return Intrinsic::nearbyint; 3686 case LibFunc_round: 3687 case LibFunc_roundf: 3688 case LibFunc_roundl: 3689 return Intrinsic::round; 3690 case LibFunc_roundeven: 3691 case LibFunc_roundevenf: 3692 case LibFunc_roundevenl: 3693 return Intrinsic::roundeven; 3694 case LibFunc_pow: 3695 case LibFunc_powf: 3696 case LibFunc_powl: 3697 return Intrinsic::pow; 3698 case LibFunc_sqrt: 3699 case LibFunc_sqrtf: 3700 case LibFunc_sqrtl: 3701 return Intrinsic::sqrt; 3702 } 3703 3704 return Intrinsic::not_intrinsic; 3705 } 3706 3707 /// Deprecated, use computeKnownFPClass instead. 3708 /// 3709 /// If \p SignBitOnly is true, test for a known 0 sign bit rather than a 3710 /// standard ordered compare. e.g. make -0.0 olt 0.0 be true because of the sign 3711 /// bit despite comparing equal. 3712 static bool cannotBeOrderedLessThanZeroImpl(const Value *V, 3713 const DataLayout &DL, 3714 const TargetLibraryInfo *TLI, 3715 bool SignBitOnly, unsigned Depth) { 3716 // TODO: This function does not do the right thing when SignBitOnly is true 3717 // and we're lowering to a hypothetical IEEE 754-compliant-but-evil platform 3718 // which flips the sign bits of NaNs. See 3719 // https://llvm.org/bugs/show_bug.cgi?id=31702. 3720 3721 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V)) { 3722 return !CFP->getValueAPF().isNegative() || 3723 (!SignBitOnly && CFP->getValueAPF().isZero()); 3724 } 3725 3726 // Handle vector of constants. 3727 if (auto *CV = dyn_cast<Constant>(V)) { 3728 if (auto *CVFVTy = dyn_cast<FixedVectorType>(CV->getType())) { 3729 unsigned NumElts = CVFVTy->getNumElements(); 3730 for (unsigned i = 0; i != NumElts; ++i) { 3731 auto *CFP = dyn_cast_or_null<ConstantFP>(CV->getAggregateElement(i)); 3732 if (!CFP) 3733 return false; 3734 if (CFP->getValueAPF().isNegative() && 3735 (SignBitOnly || !CFP->getValueAPF().isZero())) 3736 return false; 3737 } 3738 3739 // All non-negative ConstantFPs. 3740 return true; 3741 } 3742 } 3743 3744 if (Depth == MaxAnalysisRecursionDepth) 3745 return false; 3746 3747 const Operator *I = dyn_cast<Operator>(V); 3748 if (!I) 3749 return false; 3750 3751 switch (I->getOpcode()) { 3752 default: 3753 break; 3754 // Unsigned integers are always nonnegative. 3755 case Instruction::UIToFP: 3756 return true; 3757 case Instruction::FDiv: 3758 // X / X is always exactly 1.0 or a NaN. 3759 if (I->getOperand(0) == I->getOperand(1) && 3760 (!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs())) 3761 return true; 3762 3763 // Set SignBitOnly for RHS, because X / -0.0 is -Inf (or NaN). 3764 return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, 3765 SignBitOnly, Depth + 1) && 3766 cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI, 3767 /*SignBitOnly*/ true, Depth + 1); 3768 case Instruction::FMul: 3769 // X * X is always non-negative or a NaN. 3770 if (I->getOperand(0) == I->getOperand(1) && 3771 (!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs())) 3772 return true; 3773 3774 [[fallthrough]]; 3775 case Instruction::FAdd: 3776 case Instruction::FRem: 3777 return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, 3778 SignBitOnly, Depth + 1) && 3779 cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI, 3780 SignBitOnly, Depth + 1); 3781 case Instruction::Select: 3782 return cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI, 3783 SignBitOnly, Depth + 1) && 3784 cannotBeOrderedLessThanZeroImpl(I->getOperand(2), DL, TLI, 3785 SignBitOnly, Depth + 1); 3786 case Instruction::FPExt: 3787 case Instruction::FPTrunc: 3788 // Widening/narrowing never change sign. 3789 return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, 3790 SignBitOnly, Depth + 1); 3791 case Instruction::ExtractElement: 3792 // Look through extract element. At the moment we keep this simple and skip 3793 // tracking the specific element. But at least we might find information 3794 // valid for all elements of the vector. 3795 return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, 3796 SignBitOnly, Depth + 1); 3797 case Instruction::Call: 3798 const auto *CI = cast<CallInst>(I); 3799 Intrinsic::ID IID = getIntrinsicForCallSite(*CI, TLI); 3800 switch (IID) { 3801 default: 3802 break; 3803 case Intrinsic::canonicalize: 3804 case Intrinsic::arithmetic_fence: 3805 case Intrinsic::floor: 3806 case Intrinsic::ceil: 3807 case Intrinsic::trunc: 3808 case Intrinsic::rint: 3809 case Intrinsic::nearbyint: 3810 case Intrinsic::round: 3811 case Intrinsic::roundeven: 3812 case Intrinsic::fptrunc_round: 3813 return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, 3814 SignBitOnly, Depth + 1); 3815 case Intrinsic::maxnum: { 3816 Value *V0 = I->getOperand(0), *V1 = I->getOperand(1); 3817 auto isPositiveNum = [&](Value *V) { 3818 if (SignBitOnly) { 3819 // With SignBitOnly, this is tricky because the result of 3820 // maxnum(+0.0, -0.0) is unspecified. Just check if the operand is 3821 // a constant strictly greater than 0.0. 3822 const APFloat *C; 3823 return match(V, m_APFloat(C)) && 3824 *C > APFloat::getZero(C->getSemantics()); 3825 } 3826 3827 // -0.0 compares equal to 0.0, so if this operand is at least -0.0, 3828 // maxnum can't be ordered-less-than-zero. 3829 return isKnownNeverNaN(V, DL, TLI) && 3830 cannotBeOrderedLessThanZeroImpl(V, DL, TLI, false, Depth + 1); 3831 }; 3832 3833 // TODO: This could be improved. We could also check that neither operand 3834 // has its sign bit set (and at least 1 is not-NAN?). 3835 return isPositiveNum(V0) || isPositiveNum(V1); 3836 } 3837 3838 case Intrinsic::maximum: 3839 return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, 3840 SignBitOnly, Depth + 1) || 3841 cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI, 3842 SignBitOnly, Depth + 1); 3843 case Intrinsic::minnum: 3844 case Intrinsic::minimum: 3845 return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, 3846 SignBitOnly, Depth + 1) && 3847 cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI, 3848 SignBitOnly, Depth + 1); 3849 case Intrinsic::exp: 3850 case Intrinsic::exp2: 3851 case Intrinsic::fabs: 3852 return true; 3853 case Intrinsic::copysign: 3854 // Only the sign operand matters. 3855 return cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI, true, 3856 Depth + 1); 3857 case Intrinsic::sqrt: 3858 // sqrt(x) is always >= -0 or NaN. Moreover, sqrt(x) == -0 iff x == -0. 3859 if (!SignBitOnly) 3860 return true; 3861 return CI->hasNoNaNs() && 3862 (CI->hasNoSignedZeros() || 3863 cannotBeNegativeZero(CI->getOperand(0), DL, TLI)); 3864 3865 case Intrinsic::powi: 3866 if (ConstantInt *Exponent = dyn_cast<ConstantInt>(I->getOperand(1))) { 3867 // powi(x,n) is non-negative if n is even. 3868 if (Exponent->getBitWidth() <= 64 && Exponent->getSExtValue() % 2u == 0) 3869 return true; 3870 } 3871 // TODO: This is not correct. Given that exp is an integer, here are the 3872 // ways that pow can return a negative value: 3873 // 3874 // pow(x, exp) --> negative if exp is odd and x is negative. 3875 // pow(-0, exp) --> -inf if exp is negative odd. 3876 // pow(-0, exp) --> -0 if exp is positive odd. 3877 // pow(-inf, exp) --> -0 if exp is negative odd. 3878 // pow(-inf, exp) --> -inf if exp is positive odd. 3879 // 3880 // Therefore, if !SignBitOnly, we can return true if x >= +0 or x is NaN, 3881 // but we must return false if x == -0. Unfortunately we do not currently 3882 // have a way of expressing this constraint. See details in 3883 // https://llvm.org/bugs/show_bug.cgi?id=31702. 3884 return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, 3885 SignBitOnly, Depth + 1); 3886 3887 case Intrinsic::fma: 3888 case Intrinsic::fmuladd: 3889 // x*x+y is non-negative if y is non-negative. 3890 return I->getOperand(0) == I->getOperand(1) && 3891 (!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs()) && 3892 cannotBeOrderedLessThanZeroImpl(I->getOperand(2), DL, TLI, 3893 SignBitOnly, Depth + 1); 3894 } 3895 break; 3896 } 3897 return false; 3898 } 3899 3900 bool llvm::SignBitMustBeZero(const Value *V, const DataLayout &DL, 3901 const TargetLibraryInfo *TLI) { 3902 // FIXME: Use computeKnownFPClass and pass all arguments 3903 return cannotBeOrderedLessThanZeroImpl(V, DL, TLI, true, 0); 3904 } 3905 3906 /// Return true if it's possible to assume IEEE treatment of input denormals in 3907 /// \p F for \p Val. 3908 static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) { 3909 Ty = Ty->getScalarType(); 3910 return F.getDenormalMode(Ty->getFltSemantics()).Input == DenormalMode::IEEE; 3911 } 3912 3913 static bool inputDenormalIsIEEEOrPosZero(const Function &F, const Type *Ty) { 3914 Ty = Ty->getScalarType(); 3915 DenormalMode Mode = F.getDenormalMode(Ty->getFltSemantics()); 3916 return Mode.Input == DenormalMode::IEEE || 3917 Mode.Input == DenormalMode::PositiveZero; 3918 } 3919 3920 static bool outputDenormalIsIEEEOrPosZero(const Function &F, const Type *Ty) { 3921 Ty = Ty->getScalarType(); 3922 DenormalMode Mode = F.getDenormalMode(Ty->getFltSemantics()); 3923 return Mode.Output == DenormalMode::IEEE || 3924 Mode.Output == DenormalMode::PositiveZero; 3925 } 3926 3927 bool KnownFPClass::isKnownNeverLogicalZero(const Function &F, Type *Ty) const { 3928 return isKnownNeverZero() && 3929 (isKnownNeverSubnormal() || inputDenormalIsIEEE(F, Ty)); 3930 } 3931 3932 bool KnownFPClass::isKnownNeverLogicalNegZero(const Function &F, 3933 Type *Ty) const { 3934 return isKnownNeverNegZero() && 3935 (isKnownNeverNegSubnormal() || inputDenormalIsIEEEOrPosZero(F, Ty)); 3936 } 3937 3938 bool KnownFPClass::isKnownNeverLogicalPosZero(const Function &F, 3939 Type *Ty) const { 3940 if (!isKnownNeverPosZero()) 3941 return false; 3942 3943 // If we know there are no denormals, nothing can be flushed to zero. 3944 if (isKnownNeverSubnormal()) 3945 return true; 3946 3947 DenormalMode Mode = F.getDenormalMode(Ty->getScalarType()->getFltSemantics()); 3948 switch (Mode.Input) { 3949 case DenormalMode::IEEE: 3950 return true; 3951 case DenormalMode::PreserveSign: 3952 // Negative subnormal won't flush to +0 3953 return isKnownNeverPosSubnormal(); 3954 case DenormalMode::PositiveZero: 3955 default: 3956 // Both positive and negative subnormal could flush to +0 3957 return false; 3958 } 3959 3960 llvm_unreachable("covered switch over denormal mode"); 3961 } 3962 3963 void KnownFPClass::propagateDenormal(const KnownFPClass &Src, const Function &F, 3964 Type *Ty) { 3965 KnownFPClasses = Src.KnownFPClasses; 3966 // If we aren't assuming the source can't be a zero, we don't have to check if 3967 // a denormal input could be flushed. 3968 if (!Src.isKnownNeverPosZero() && !Src.isKnownNeverNegZero()) 3969 return; 3970 3971 // If we know the input can't be a denormal, it can't be flushed to 0. 3972 if (Src.isKnownNeverSubnormal()) 3973 return; 3974 3975 DenormalMode Mode = F.getDenormalMode(Ty->getScalarType()->getFltSemantics()); 3976 3977 if (!Src.isKnownNeverPosSubnormal() && Mode != DenormalMode::getIEEE()) 3978 KnownFPClasses |= fcPosZero; 3979 3980 if (!Src.isKnownNeverNegSubnormal() && Mode != DenormalMode::getIEEE()) { 3981 if (Mode != DenormalMode::getPositiveZero()) 3982 KnownFPClasses |= fcNegZero; 3983 3984 if (Mode.Input == DenormalMode::PositiveZero || 3985 Mode.Output == DenormalMode::PositiveZero || 3986 Mode.Input == DenormalMode::Dynamic || 3987 Mode.Output == DenormalMode::Dynamic) 3988 KnownFPClasses |= fcPosZero; 3989 } 3990 } 3991 3992 void KnownFPClass::propagateCanonicalizingSrc(const KnownFPClass &Src, 3993 const Function &F, Type *Ty) { 3994 propagateDenormal(Src, F, Ty); 3995 propagateNaN(Src, /*PreserveSign=*/true); 3996 } 3997 3998 /// Returns a pair of values, which if passed to llvm.is.fpclass, returns the 3999 /// same result as an fcmp with the given operands. 4000 std::pair<Value *, FPClassTest> llvm::fcmpToClassTest(FCmpInst::Predicate Pred, 4001 const Function &F, 4002 Value *LHS, Value *RHS, 4003 bool LookThroughSrc) { 4004 const APFloat *ConstRHS; 4005 if (!match(RHS, m_APFloatAllowUndef(ConstRHS))) 4006 return {nullptr, fcAllFlags}; 4007 4008 return fcmpToClassTest(Pred, F, LHS, ConstRHS, LookThroughSrc); 4009 } 4010 4011 std::pair<Value *, FPClassTest> 4012 llvm::fcmpToClassTest(FCmpInst::Predicate Pred, const Function &F, Value *LHS, 4013 const APFloat *ConstRHS, bool LookThroughSrc) { 4014 // fcmp ord x, zero|normal|subnormal|inf -> ~fcNan 4015 if (Pred == FCmpInst::FCMP_ORD && !ConstRHS->isNaN()) 4016 return {LHS, ~fcNan}; 4017 4018 // fcmp uno x, zero|normal|subnormal|inf -> fcNan 4019 if (Pred == FCmpInst::FCMP_UNO && !ConstRHS->isNaN()) 4020 return {LHS, fcNan}; 4021 4022 if (ConstRHS->isZero()) { 4023 // Compares with fcNone are only exactly equal to fcZero if input denormals 4024 // are not flushed. 4025 // TODO: Handle DAZ by expanding masks to cover subnormal cases. 4026 if (Pred != FCmpInst::FCMP_ORD && Pred != FCmpInst::FCMP_UNO && 4027 !inputDenormalIsIEEE(F, LHS->getType())) 4028 return {nullptr, fcAllFlags}; 4029 4030 switch (Pred) { 4031 case FCmpInst::FCMP_OEQ: // Match x == 0.0 4032 return {LHS, fcZero}; 4033 case FCmpInst::FCMP_UEQ: // Match isnan(x) || (x == 0.0) 4034 return {LHS, fcZero | fcNan}; 4035 case FCmpInst::FCMP_UNE: // Match (x != 0.0) 4036 return {LHS, ~fcZero}; 4037 case FCmpInst::FCMP_ONE: // Match !isnan(x) && x != 0.0 4038 return {LHS, ~fcNan & ~fcZero}; 4039 case FCmpInst::FCMP_ORD: 4040 // Canonical form of ord/uno is with a zero. We could also handle 4041 // non-canonical other non-NaN constants or LHS == RHS. 4042 return {LHS, ~fcNan}; 4043 case FCmpInst::FCMP_UNO: 4044 return {LHS, fcNan}; 4045 case FCmpInst::FCMP_OGT: // x > 0 4046 return {LHS, fcPosSubnormal | fcPosNormal | fcPosInf}; 4047 case FCmpInst::FCMP_UGT: // isnan(x) || x > 0 4048 return {LHS, fcPosSubnormal | fcPosNormal | fcPosInf | fcNan}; 4049 case FCmpInst::FCMP_OGE: // x >= 0 4050 return {LHS, fcPositive | fcNegZero}; 4051 case FCmpInst::FCMP_UGE: // isnan(x) || x >= 0 4052 return {LHS, fcPositive | fcNegZero | fcNan}; 4053 case FCmpInst::FCMP_OLT: // x < 0 4054 return {LHS, fcNegSubnormal | fcNegNormal | fcNegInf}; 4055 case FCmpInst::FCMP_ULT: // isnan(x) || x < 0 4056 return {LHS, fcNegSubnormal | fcNegNormal | fcNegInf | fcNan}; 4057 case FCmpInst::FCMP_OLE: // x <= 0 4058 return {LHS, fcNegative | fcPosZero}; 4059 case FCmpInst::FCMP_ULE: // isnan(x) || x <= 0 4060 return {LHS, fcNegative | fcPosZero | fcNan}; 4061 default: 4062 break; 4063 } 4064 4065 return {nullptr, fcAllFlags}; 4066 } 4067 4068 Value *Src = LHS; 4069 const bool IsFabs = LookThroughSrc && match(LHS, m_FAbs(m_Value(Src))); 4070 4071 // Compute the test mask that would return true for the ordered comparisons. 4072 FPClassTest Mask; 4073 4074 if (ConstRHS->isInfinity()) { 4075 switch (Pred) { 4076 case FCmpInst::FCMP_OEQ: 4077 case FCmpInst::FCMP_UNE: { 4078 // Match __builtin_isinf patterns 4079 // 4080 // fcmp oeq x, +inf -> is_fpclass x, fcPosInf 4081 // fcmp oeq fabs(x), +inf -> is_fpclass x, fcInf 4082 // fcmp oeq x, -inf -> is_fpclass x, fcNegInf 4083 // fcmp oeq fabs(x), -inf -> is_fpclass x, 0 -> false 4084 // 4085 // fcmp une x, +inf -> is_fpclass x, ~fcPosInf 4086 // fcmp une fabs(x), +inf -> is_fpclass x, ~fcInf 4087 // fcmp une x, -inf -> is_fpclass x, ~fcNegInf 4088 // fcmp une fabs(x), -inf -> is_fpclass x, fcAllFlags -> true 4089 4090 if (ConstRHS->isNegative()) { 4091 Mask = fcNegInf; 4092 if (IsFabs) 4093 Mask = fcNone; 4094 } else { 4095 Mask = fcPosInf; 4096 if (IsFabs) 4097 Mask |= fcNegInf; 4098 } 4099 4100 break; 4101 } 4102 case FCmpInst::FCMP_ONE: 4103 case FCmpInst::FCMP_UEQ: { 4104 // Match __builtin_isinf patterns 4105 // fcmp one x, -inf -> is_fpclass x, fcNegInf 4106 // fcmp one fabs(x), -inf -> is_fpclass x, ~fcNegInf & ~fcNan 4107 // fcmp one x, +inf -> is_fpclass x, ~fcNegInf & ~fcNan 4108 // fcmp one fabs(x), +inf -> is_fpclass x, ~fcInf & fcNan 4109 // 4110 // fcmp ueq x, +inf -> is_fpclass x, fcPosInf|fcNan 4111 // fcmp ueq (fabs x), +inf -> is_fpclass x, fcInf|fcNan 4112 // fcmp ueq x, -inf -> is_fpclass x, fcNegInf|fcNan 4113 // fcmp ueq fabs(x), -inf -> is_fpclass x, fcNan 4114 if (ConstRHS->isNegative()) { 4115 Mask = ~fcNegInf & ~fcNan; 4116 if (IsFabs) 4117 Mask = ~fcNan; 4118 } else { 4119 Mask = ~fcPosInf & ~fcNan; 4120 if (IsFabs) 4121 Mask &= ~fcNegInf; 4122 } 4123 4124 break; 4125 } 4126 case FCmpInst::FCMP_OLT: 4127 case FCmpInst::FCMP_UGE: { 4128 if (ConstRHS->isNegative()) { 4129 // No value is ordered and less than negative infinity. 4130 // All values are unordered with or at least negative infinity. 4131 // fcmp olt x, -inf -> false 4132 // fcmp uge x, -inf -> true 4133 Mask = fcNone; 4134 break; 4135 } 4136 4137 // fcmp olt fabs(x), +inf -> fcFinite 4138 // fcmp uge fabs(x), +inf -> ~fcFinite 4139 // fcmp olt x, +inf -> fcFinite|fcNegInf 4140 // fcmp uge x, +inf -> ~(fcFinite|fcNegInf) 4141 Mask = fcFinite; 4142 if (!IsFabs) 4143 Mask |= fcNegInf; 4144 break; 4145 } 4146 case FCmpInst::FCMP_OGE: 4147 case FCmpInst::FCMP_ULT: { 4148 if (ConstRHS->isNegative()) { 4149 // fcmp oge x, -inf -> ~fcNan 4150 // fcmp oge fabs(x), -inf -> ~fcNan 4151 // fcmp ult x, -inf -> fcNan 4152 // fcmp ult fabs(x), -inf -> fcNan 4153 Mask = ~fcNan; 4154 break; 4155 } 4156 4157 // fcmp oge fabs(x), +inf -> fcInf 4158 // fcmp oge x, +inf -> fcPosInf 4159 // fcmp ult fabs(x), +inf -> ~fcInf 4160 // fcmp ult x, +inf -> ~fcPosInf 4161 Mask = fcPosInf; 4162 if (IsFabs) 4163 Mask |= fcNegInf; 4164 break; 4165 } 4166 case FCmpInst::FCMP_OGT: 4167 case FCmpInst::FCMP_ULE: { 4168 if (ConstRHS->isNegative()) { 4169 // fcmp ogt x, -inf -> fcmp one x, -inf 4170 // fcmp ogt fabs(x), -inf -> fcmp ord x, x 4171 // fcmp ule x, -inf -> fcmp ueq x, -inf 4172 // fcmp ule fabs(x), -inf -> fcmp uno x, x 4173 Mask = IsFabs ? ~fcNan : ~(fcNegInf | fcNan); 4174 break; 4175 } 4176 4177 // No value is ordered and greater than infinity. 4178 Mask = fcNone; 4179 break; 4180 } 4181 default: 4182 return {nullptr, fcAllFlags}; 4183 } 4184 } else if (ConstRHS->isSmallestNormalized() && !ConstRHS->isNegative()) { 4185 // Match pattern that's used in __builtin_isnormal. 4186 switch (Pred) { 4187 case FCmpInst::FCMP_OLT: 4188 case FCmpInst::FCMP_UGE: { 4189 // fcmp olt x, smallest_normal -> fcNegInf|fcNegNormal|fcSubnormal|fcZero 4190 // fcmp olt fabs(x), smallest_normal -> fcSubnormal|fcZero 4191 // fcmp uge x, smallest_normal -> fcNan|fcPosNormal|fcPosInf 4192 // fcmp uge fabs(x), smallest_normal -> ~(fcSubnormal|fcZero) 4193 Mask = fcZero | fcSubnormal; 4194 if (!IsFabs) 4195 Mask |= fcNegNormal | fcNegInf; 4196 4197 break; 4198 } 4199 case FCmpInst::FCMP_OGE: 4200 case FCmpInst::FCMP_ULT: { 4201 // fcmp oge x, smallest_normal -> fcPosNormal | fcPosInf 4202 // fcmp oge fabs(x), smallest_normal -> fcInf | fcNormal 4203 // fcmp ult x, smallest_normal -> ~(fcPosNormal | fcPosInf) 4204 // fcmp ult fabs(x), smallest_normal -> ~(fcInf | fcNormal) 4205 Mask = fcPosInf | fcPosNormal; 4206 if (IsFabs) 4207 Mask |= fcNegInf | fcNegNormal; 4208 break; 4209 } 4210 default: 4211 return {nullptr, fcAllFlags}; 4212 } 4213 } else if (ConstRHS->isNaN()) { 4214 // fcmp o__ x, nan -> false 4215 // fcmp u__ x, nan -> true 4216 Mask = fcNone; 4217 } else 4218 return {nullptr, fcAllFlags}; 4219 4220 // Invert the comparison for the unordered cases. 4221 if (FCmpInst::isUnordered(Pred)) 4222 Mask = ~Mask; 4223 4224 return {Src, Mask}; 4225 } 4226 4227 static FPClassTest computeKnownFPClassFromAssumes(const Value *V, 4228 const SimplifyQuery &Q) { 4229 FPClassTest KnownFromAssume = fcAllFlags; 4230 4231 // Try to restrict the floating-point classes based on information from 4232 // assumptions. 4233 for (auto &AssumeVH : Q.AC->assumptionsFor(V)) { 4234 if (!AssumeVH) 4235 continue; 4236 CallInst *I = cast<CallInst>(AssumeVH); 4237 const Function *F = I->getFunction(); 4238 4239 assert(F == Q.CxtI->getParent()->getParent() && 4240 "Got assumption for the wrong function!"); 4241 assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume && 4242 "must be an assume intrinsic"); 4243 4244 if (!isValidAssumeForContext(I, Q.CxtI, Q.DT)) 4245 continue; 4246 4247 CmpInst::Predicate Pred; 4248 Value *LHS, *RHS; 4249 uint64_t ClassVal = 0; 4250 if (match(I->getArgOperand(0), m_FCmp(Pred, m_Value(LHS), m_Value(RHS)))) { 4251 auto [TestedValue, TestedMask] = 4252 fcmpToClassTest(Pred, *F, LHS, RHS, true); 4253 // First see if we can fold in fabs/fneg into the test. 4254 if (TestedValue == V) 4255 KnownFromAssume &= TestedMask; 4256 else { 4257 // Try again without the lookthrough if we found a different source 4258 // value. 4259 auto [TestedValue, TestedMask] = 4260 fcmpToClassTest(Pred, *F, LHS, RHS, false); 4261 if (TestedValue == V) 4262 KnownFromAssume &= TestedMask; 4263 } 4264 } else if (match(I->getArgOperand(0), 4265 m_Intrinsic<Intrinsic::is_fpclass>( 4266 m_Value(LHS), m_ConstantInt(ClassVal)))) { 4267 KnownFromAssume &= static_cast<FPClassTest>(ClassVal); 4268 } 4269 } 4270 4271 return KnownFromAssume; 4272 } 4273 4274 void computeKnownFPClass(const Value *V, const APInt &DemandedElts, 4275 FPClassTest InterestedClasses, KnownFPClass &Known, 4276 unsigned Depth, const SimplifyQuery &Q); 4277 4278 static void computeKnownFPClass(const Value *V, KnownFPClass &Known, 4279 FPClassTest InterestedClasses, unsigned Depth, 4280 const SimplifyQuery &Q) { 4281 auto *FVTy = dyn_cast<FixedVectorType>(V->getType()); 4282 APInt DemandedElts = 4283 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1); 4284 computeKnownFPClass(V, DemandedElts, InterestedClasses, Known, Depth, Q); 4285 } 4286 4287 static void computeKnownFPClassForFPTrunc(const Operator *Op, 4288 const APInt &DemandedElts, 4289 FPClassTest InterestedClasses, 4290 KnownFPClass &Known, unsigned Depth, 4291 const SimplifyQuery &Q) { 4292 if ((InterestedClasses & 4293 (KnownFPClass::OrderedLessThanZeroMask | fcNan)) == fcNone) 4294 return; 4295 4296 KnownFPClass KnownSrc; 4297 computeKnownFPClass(Op->getOperand(0), DemandedElts, InterestedClasses, 4298 KnownSrc, Depth + 1, Q); 4299 4300 // Sign should be preserved 4301 // TODO: Handle cannot be ordered greater than zero 4302 if (KnownSrc.cannotBeOrderedLessThanZero()) 4303 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); 4304 4305 Known.propagateNaN(KnownSrc, true); 4306 4307 // Infinity needs a range check. 4308 } 4309 4310 // TODO: Merge implementation of cannotBeOrderedLessThanZero into here. 4311 void computeKnownFPClass(const Value *V, const APInt &DemandedElts, 4312 FPClassTest InterestedClasses, KnownFPClass &Known, 4313 unsigned Depth, const SimplifyQuery &Q) { 4314 assert(Known.isUnknown() && "should not be called with known information"); 4315 4316 if (!DemandedElts) { 4317 // No demanded elts, better to assume we don't know anything. 4318 Known.resetAll(); 4319 return; 4320 } 4321 4322 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); 4323 4324 if (auto *CFP = dyn_cast_or_null<ConstantFP>(V)) { 4325 Known.KnownFPClasses = CFP->getValueAPF().classify(); 4326 Known.SignBit = CFP->isNegative(); 4327 return; 4328 } 4329 4330 // Try to handle fixed width vector constants 4331 auto *VFVTy = dyn_cast<FixedVectorType>(V->getType()); 4332 const Constant *CV = dyn_cast<Constant>(V); 4333 if (VFVTy && CV) { 4334 Known.KnownFPClasses = fcNone; 4335 4336 // For vectors, verify that each element is not NaN. 4337 unsigned NumElts = VFVTy->getNumElements(); 4338 for (unsigned i = 0; i != NumElts; ++i) { 4339 Constant *Elt = CV->getAggregateElement(i); 4340 if (!Elt) { 4341 Known = KnownFPClass(); 4342 return; 4343 } 4344 if (isa<UndefValue>(Elt)) 4345 continue; 4346 auto *CElt = dyn_cast<ConstantFP>(Elt); 4347 if (!CElt) { 4348 Known = KnownFPClass(); 4349 return; 4350 } 4351 4352 KnownFPClass KnownElt{CElt->getValueAPF().classify(), CElt->isNegative()}; 4353 Known |= KnownElt; 4354 } 4355 4356 return; 4357 } 4358 4359 FPClassTest KnownNotFromFlags = fcNone; 4360 if (const auto *CB = dyn_cast<CallBase>(V)) 4361 KnownNotFromFlags |= CB->getRetNoFPClass(); 4362 else if (const auto *Arg = dyn_cast<Argument>(V)) 4363 KnownNotFromFlags |= Arg->getNoFPClass(); 4364 4365 const Operator *Op = dyn_cast<Operator>(V); 4366 if (const FPMathOperator *FPOp = dyn_cast_or_null<FPMathOperator>(Op)) { 4367 if (FPOp->hasNoNaNs()) 4368 KnownNotFromFlags |= fcNan; 4369 if (FPOp->hasNoInfs()) 4370 KnownNotFromFlags |= fcInf; 4371 } 4372 4373 if (Q.AC) { 4374 FPClassTest AssumedClasses = computeKnownFPClassFromAssumes(V, Q); 4375 KnownNotFromFlags |= ~AssumedClasses; 4376 } 4377 4378 // We no longer need to find out about these bits from inputs if we can 4379 // assume this from flags/attributes. 4380 InterestedClasses &= ~KnownNotFromFlags; 4381 4382 auto ClearClassesFromFlags = make_scope_exit([=, &Known] { 4383 Known.knownNot(KnownNotFromFlags); 4384 }); 4385 4386 if (!Op) 4387 return; 4388 4389 // All recursive calls that increase depth must come after this. 4390 if (Depth == MaxAnalysisRecursionDepth) 4391 return; 4392 4393 const unsigned Opc = Op->getOpcode(); 4394 switch (Opc) { 4395 case Instruction::FNeg: { 4396 computeKnownFPClass(Op->getOperand(0), DemandedElts, InterestedClasses, 4397 Known, Depth + 1, Q); 4398 Known.fneg(); 4399 break; 4400 } 4401 case Instruction::Select: { 4402 Value *Cond = Op->getOperand(0); 4403 Value *LHS = Op->getOperand(1); 4404 Value *RHS = Op->getOperand(2); 4405 4406 FPClassTest FilterLHS = fcAllFlags; 4407 FPClassTest FilterRHS = fcAllFlags; 4408 4409 Value *TestedValue = nullptr; 4410 FPClassTest TestedMask = fcNone; 4411 uint64_t ClassVal = 0; 4412 const Function *F = cast<Instruction>(Op)->getFunction(); 4413 CmpInst::Predicate Pred; 4414 Value *CmpLHS, *CmpRHS; 4415 if (F && match(Cond, m_FCmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS)))) { 4416 // If the select filters out a value based on the class, it no longer 4417 // participates in the class of the result 4418 4419 // TODO: In some degenerate cases we can infer something if we try again 4420 // without looking through sign operations. 4421 bool LookThroughFAbsFNeg = CmpLHS != LHS && CmpLHS != RHS; 4422 std::tie(TestedValue, TestedMask) = 4423 fcmpToClassTest(Pred, *F, CmpLHS, CmpRHS, LookThroughFAbsFNeg); 4424 } else if (match(Cond, 4425 m_Intrinsic<Intrinsic::is_fpclass>( 4426 m_Value(TestedValue), m_ConstantInt(ClassVal)))) { 4427 TestedMask = static_cast<FPClassTest>(ClassVal); 4428 } 4429 4430 if (TestedValue == LHS) { 4431 // match !isnan(x) ? x : y 4432 FilterLHS = TestedMask; 4433 } else if (TestedValue == RHS) { 4434 // match !isnan(x) ? y : x 4435 FilterRHS = ~TestedMask; 4436 } 4437 4438 KnownFPClass Known2; 4439 computeKnownFPClass(LHS, DemandedElts, InterestedClasses & FilterLHS, Known, 4440 Depth + 1, Q); 4441 Known.KnownFPClasses &= FilterLHS; 4442 4443 computeKnownFPClass(RHS, DemandedElts, InterestedClasses & FilterRHS, 4444 Known2, Depth + 1, Q); 4445 Known2.KnownFPClasses &= FilterRHS; 4446 4447 Known |= Known2; 4448 break; 4449 } 4450 case Instruction::Call: { 4451 const CallInst *II = cast<CallInst>(Op); 4452 const Intrinsic::ID IID = II->getIntrinsicID(); 4453 switch (IID) { 4454 case Intrinsic::fabs: { 4455 if ((InterestedClasses & (fcNan | fcPositive)) != fcNone) { 4456 // If we only care about the sign bit we don't need to inspect the 4457 // operand. 4458 computeKnownFPClass(II->getArgOperand(0), DemandedElts, 4459 InterestedClasses, Known, Depth + 1, Q); 4460 } 4461 4462 Known.fabs(); 4463 break; 4464 } 4465 case Intrinsic::copysign: { 4466 KnownFPClass KnownSign; 4467 4468 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, 4469 Known, Depth + 1, Q); 4470 computeKnownFPClass(II->getArgOperand(1), DemandedElts, InterestedClasses, 4471 KnownSign, Depth + 1, Q); 4472 Known.copysign(KnownSign); 4473 break; 4474 } 4475 case Intrinsic::fma: 4476 case Intrinsic::fmuladd: { 4477 if ((InterestedClasses & fcNegative) == fcNone) 4478 break; 4479 4480 if (II->getArgOperand(0) != II->getArgOperand(1)) 4481 break; 4482 4483 // The multiply cannot be -0 and therefore the add can't be -0 4484 Known.knownNot(fcNegZero); 4485 4486 // x * x + y is non-negative if y is non-negative. 4487 KnownFPClass KnownAddend; 4488 computeKnownFPClass(II->getArgOperand(2), DemandedElts, InterestedClasses, 4489 KnownAddend, Depth + 1, Q); 4490 4491 // TODO: Known sign bit with no nans 4492 if (KnownAddend.cannotBeOrderedLessThanZero()) 4493 Known.knownNot(fcNegative); 4494 break; 4495 } 4496 case Intrinsic::sqrt: 4497 case Intrinsic::experimental_constrained_sqrt: { 4498 KnownFPClass KnownSrc; 4499 FPClassTest InterestedSrcs = InterestedClasses; 4500 if (InterestedClasses & fcNan) 4501 InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask; 4502 4503 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedSrcs, 4504 KnownSrc, Depth + 1, Q); 4505 4506 if (KnownSrc.isKnownNeverPosInfinity()) 4507 Known.knownNot(fcPosInf); 4508 if (KnownSrc.isKnownNever(fcSNan)) 4509 Known.knownNot(fcSNan); 4510 4511 // Any negative value besides -0 returns a nan. 4512 if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero()) 4513 Known.knownNot(fcNan); 4514 4515 // The only negative value that can be returned is -0 for -0 inputs. 4516 Known.knownNot(fcNegInf | fcNegSubnormal | fcNegNormal); 4517 4518 // If the input denormal mode could be PreserveSign, a negative 4519 // subnormal input could produce a negative zero output. 4520 const Function *F = II->getFunction(); 4521 if (Q.IIQ.hasNoSignedZeros(II) || 4522 (F && KnownSrc.isKnownNeverLogicalNegZero(*F, II->getType()))) { 4523 Known.knownNot(fcNegZero); 4524 if (KnownSrc.isKnownNeverNaN()) 4525 Known.SignBit = false; 4526 } 4527 4528 break; 4529 } 4530 case Intrinsic::sin: 4531 case Intrinsic::cos: { 4532 // Return NaN on infinite inputs. 4533 KnownFPClass KnownSrc; 4534 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, 4535 KnownSrc, Depth + 1, Q); 4536 Known.knownNot(fcInf); 4537 if (KnownSrc.isKnownNeverNaN() && KnownSrc.isKnownNeverInfinity()) 4538 Known.knownNot(fcNan); 4539 break; 4540 } 4541 case Intrinsic::maxnum: 4542 case Intrinsic::minnum: 4543 case Intrinsic::minimum: 4544 case Intrinsic::maximum: { 4545 KnownFPClass KnownLHS, KnownRHS; 4546 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, 4547 KnownLHS, Depth + 1, Q); 4548 computeKnownFPClass(II->getArgOperand(1), DemandedElts, InterestedClasses, 4549 KnownRHS, Depth + 1, Q); 4550 4551 bool NeverNaN = KnownLHS.isKnownNeverNaN() || KnownRHS.isKnownNeverNaN(); 4552 Known = KnownLHS | KnownRHS; 4553 4554 // If either operand is not NaN, the result is not NaN. 4555 if (NeverNaN && (IID == Intrinsic::minnum || IID == Intrinsic::maxnum)) 4556 Known.knownNot(fcNan); 4557 4558 if (IID == Intrinsic::maxnum) { 4559 // If at least one operand is known to be positive, the result must be 4560 // positive. 4561 if ((KnownLHS.cannotBeOrderedLessThanZero() && 4562 KnownLHS.isKnownNeverNaN()) || 4563 (KnownRHS.cannotBeOrderedLessThanZero() && 4564 KnownRHS.isKnownNeverNaN())) 4565 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); 4566 } else if (IID == Intrinsic::maximum) { 4567 // If at least one operand is known to be positive, the result must be 4568 // positive. 4569 if (KnownLHS.cannotBeOrderedLessThanZero() || 4570 KnownRHS.cannotBeOrderedLessThanZero()) 4571 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); 4572 } else if (IID == Intrinsic::minnum) { 4573 // If at least one operand is known to be negative, the result must be 4574 // negative. 4575 if ((KnownLHS.cannotBeOrderedGreaterThanZero() && 4576 KnownLHS.isKnownNeverNaN()) || 4577 (KnownRHS.cannotBeOrderedGreaterThanZero() && 4578 KnownRHS.isKnownNeverNaN())) 4579 Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask); 4580 } else { 4581 // If at least one operand is known to be negative, the result must be 4582 // negative. 4583 if (KnownLHS.cannotBeOrderedGreaterThanZero() || 4584 KnownRHS.cannotBeOrderedGreaterThanZero()) 4585 Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask); 4586 } 4587 4588 // Fixup zero handling if denormals could be returned as a zero. 4589 // 4590 // As there's no spec for denormal flushing, be conservative with the 4591 // treatment of denormals that could be flushed to zero. For older 4592 // subtargets on AMDGPU the min/max instructions would not flush the 4593 // output and return the original value. 4594 // 4595 // TODO: This could be refined based on the sign 4596 if ((Known.KnownFPClasses & fcZero) != fcNone && 4597 !Known.isKnownNeverSubnormal()) { 4598 const Function *Parent = II->getFunction(); 4599 if (!Parent) 4600 break; 4601 4602 DenormalMode Mode = Parent->getDenormalMode( 4603 II->getType()->getScalarType()->getFltSemantics()); 4604 if (Mode != DenormalMode::getIEEE()) 4605 Known.KnownFPClasses |= fcZero; 4606 } 4607 4608 break; 4609 } 4610 case Intrinsic::canonicalize: { 4611 KnownFPClass KnownSrc; 4612 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, 4613 KnownSrc, Depth + 1, Q); 4614 4615 // This is essentially a stronger form of 4616 // propagateCanonicalizingSrc. Other "canonicalizing" operations don't 4617 // actually have an IR canonicalization guarantee. 4618 4619 // Canonicalize may flush denormals to zero, so we have to consider the 4620 // denormal mode to preserve known-not-0 knowledge. 4621 Known.KnownFPClasses = KnownSrc.KnownFPClasses | fcZero | fcQNan; 4622 4623 // Stronger version of propagateNaN 4624 // Canonicalize is guaranteed to quiet signaling nans. 4625 if (KnownSrc.isKnownNeverNaN()) 4626 Known.knownNot(fcNan); 4627 else 4628 Known.knownNot(fcSNan); 4629 4630 const Function *F = II->getFunction(); 4631 if (!F) 4632 break; 4633 4634 // If the parent function flushes denormals, the canonical output cannot 4635 // be a denormal. 4636 const fltSemantics &FPType = 4637 II->getType()->getScalarType()->getFltSemantics(); 4638 DenormalMode DenormMode = F->getDenormalMode(FPType); 4639 if (DenormMode == DenormalMode::getIEEE()) { 4640 if (KnownSrc.isKnownNever(fcPosZero)) 4641 Known.knownNot(fcPosZero); 4642 if (KnownSrc.isKnownNever(fcNegZero)) 4643 Known.knownNot(fcNegZero); 4644 break; 4645 } 4646 4647 if (DenormMode.inputsAreZero() || DenormMode.outputsAreZero()) 4648 Known.knownNot(fcSubnormal); 4649 4650 if (DenormMode.Input == DenormalMode::PositiveZero || 4651 (DenormMode.Output == DenormalMode::PositiveZero && 4652 DenormMode.Input == DenormalMode::IEEE)) 4653 Known.knownNot(fcNegZero); 4654 4655 break; 4656 } 4657 case Intrinsic::trunc: 4658 case Intrinsic::floor: 4659 case Intrinsic::ceil: 4660 case Intrinsic::rint: 4661 case Intrinsic::nearbyint: 4662 case Intrinsic::round: 4663 case Intrinsic::roundeven: { 4664 KnownFPClass KnownSrc; 4665 FPClassTest InterestedSrcs = InterestedClasses; 4666 if (InterestedSrcs & fcPosFinite) 4667 InterestedSrcs |= fcPosFinite; 4668 if (InterestedSrcs & fcNegFinite) 4669 InterestedSrcs |= fcNegFinite; 4670 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedSrcs, 4671 KnownSrc, Depth + 1, Q); 4672 4673 // Integer results cannot be subnormal. 4674 Known.knownNot(fcSubnormal); 4675 4676 Known.propagateNaN(KnownSrc, true); 4677 4678 // Pass through infinities, except PPC_FP128 is a special case for 4679 // intrinsics other than trunc. 4680 if (IID == Intrinsic::trunc || !V->getType()->isMultiUnitFPType()) { 4681 if (KnownSrc.isKnownNeverPosInfinity()) 4682 Known.knownNot(fcPosInf); 4683 if (KnownSrc.isKnownNeverNegInfinity()) 4684 Known.knownNot(fcNegInf); 4685 } 4686 4687 // Negative round ups to 0 produce -0 4688 if (KnownSrc.isKnownNever(fcPosFinite)) 4689 Known.knownNot(fcPosFinite); 4690 if (KnownSrc.isKnownNever(fcNegFinite)) 4691 Known.knownNot(fcNegFinite); 4692 4693 break; 4694 } 4695 case Intrinsic::exp: 4696 case Intrinsic::exp2: 4697 case Intrinsic::exp10: { 4698 Known.knownNot(fcNegative); 4699 if ((InterestedClasses & fcNan) == fcNone) 4700 break; 4701 4702 KnownFPClass KnownSrc; 4703 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, 4704 KnownSrc, Depth + 1, Q); 4705 if (KnownSrc.isKnownNeverNaN()) { 4706 Known.knownNot(fcNan); 4707 Known.SignBit = false; 4708 } 4709 4710 break; 4711 } 4712 case Intrinsic::fptrunc_round: { 4713 computeKnownFPClassForFPTrunc(Op, DemandedElts, InterestedClasses, Known, 4714 Depth, Q); 4715 break; 4716 } 4717 case Intrinsic::log: 4718 case Intrinsic::log10: 4719 case Intrinsic::log2: 4720 case Intrinsic::experimental_constrained_log: 4721 case Intrinsic::experimental_constrained_log10: 4722 case Intrinsic::experimental_constrained_log2: { 4723 // log(+inf) -> +inf 4724 // log([+-]0.0) -> -inf 4725 // log(-inf) -> nan 4726 // log(-x) -> nan 4727 if ((InterestedClasses & (fcNan | fcInf)) == fcNone) 4728 break; 4729 4730 FPClassTest InterestedSrcs = InterestedClasses; 4731 if ((InterestedClasses & fcNegInf) != fcNone) 4732 InterestedSrcs |= fcZero | fcSubnormal; 4733 if ((InterestedClasses & fcNan) != fcNone) 4734 InterestedSrcs |= fcNan | (fcNegative & ~fcNan); 4735 4736 KnownFPClass KnownSrc; 4737 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedSrcs, 4738 KnownSrc, Depth + 1, Q); 4739 4740 if (KnownSrc.isKnownNeverPosInfinity()) 4741 Known.knownNot(fcPosInf); 4742 4743 if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero()) 4744 Known.knownNot(fcNan); 4745 4746 const Function *F = II->getFunction(); 4747 if (F && KnownSrc.isKnownNeverLogicalZero(*F, II->getType())) 4748 Known.knownNot(fcNegInf); 4749 4750 break; 4751 } 4752 case Intrinsic::powi: { 4753 if ((InterestedClasses & fcNegative) == fcNone) 4754 break; 4755 4756 const Value *Exp = II->getArgOperand(1); 4757 Type *ExpTy = Exp->getType(); 4758 unsigned BitWidth = ExpTy->getScalarType()->getIntegerBitWidth(); 4759 KnownBits ExponentKnownBits(BitWidth); 4760 computeKnownBits(Exp, isa<VectorType>(ExpTy) ? DemandedElts : APInt(1, 1), 4761 ExponentKnownBits, Depth + 1, Q); 4762 4763 if (ExponentKnownBits.Zero[0]) { // Is even 4764 Known.knownNot(fcNegative); 4765 break; 4766 } 4767 4768 // Given that exp is an integer, here are the 4769 // ways that pow can return a negative value: 4770 // 4771 // pow(-x, exp) --> negative if exp is odd and x is negative. 4772 // pow(-0, exp) --> -inf if exp is negative odd. 4773 // pow(-0, exp) --> -0 if exp is positive odd. 4774 // pow(-inf, exp) --> -0 if exp is negative odd. 4775 // pow(-inf, exp) --> -inf if exp is positive odd. 4776 KnownFPClass KnownSrc; 4777 computeKnownFPClass(II->getArgOperand(0), DemandedElts, fcNegative, 4778 KnownSrc, Depth + 1, Q); 4779 if (KnownSrc.isKnownNever(fcNegative)) 4780 Known.knownNot(fcNegative); 4781 break; 4782 } 4783 case Intrinsic::ldexp: { 4784 KnownFPClass KnownSrc; 4785 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, 4786 KnownSrc, Depth + 1, Q); 4787 Known.propagateNaN(KnownSrc, /*PropagateSign=*/true); 4788 4789 // Sign is preserved, but underflows may produce zeroes. 4790 if (KnownSrc.isKnownNever(fcNegative)) 4791 Known.knownNot(fcNegative); 4792 else if (KnownSrc.cannotBeOrderedLessThanZero()) 4793 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); 4794 4795 if (KnownSrc.isKnownNever(fcPositive)) 4796 Known.knownNot(fcPositive); 4797 else if (KnownSrc.cannotBeOrderedGreaterThanZero()) 4798 Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask); 4799 4800 // Can refine inf/zero handling based on the exponent operand. 4801 const FPClassTest ExpInfoMask = fcZero | fcSubnormal | fcInf; 4802 if ((InterestedClasses & ExpInfoMask) == fcNone) 4803 break; 4804 if ((KnownSrc.KnownFPClasses & ExpInfoMask) == fcNone) 4805 break; 4806 4807 const fltSemantics &Flt = 4808 II->getType()->getScalarType()->getFltSemantics(); 4809 unsigned Precision = APFloat::semanticsPrecision(Flt); 4810 const Value *ExpArg = II->getArgOperand(1); 4811 ConstantRange ExpRange = computeConstantRange( 4812 ExpArg, true, Q.IIQ.UseInstrInfo, Q.AC, Q.CxtI, Q.DT, Depth + 1); 4813 4814 const int MantissaBits = Precision - 1; 4815 if (ExpRange.getSignedMin().sge(static_cast<int64_t>(MantissaBits))) 4816 Known.knownNot(fcSubnormal); 4817 4818 const Function *F = II->getFunction(); 4819 const APInt *ConstVal = ExpRange.getSingleElement(); 4820 if (ConstVal && ConstVal->isZero()) { 4821 // ldexp(x, 0) -> x, so propagate everything. 4822 Known.propagateCanonicalizingSrc(KnownSrc, *F, II->getType()); 4823 } else if (ExpRange.isAllNegative()) { 4824 // If we know the power is <= 0, can't introduce inf 4825 if (KnownSrc.isKnownNeverPosInfinity()) 4826 Known.knownNot(fcPosInf); 4827 if (KnownSrc.isKnownNeverNegInfinity()) 4828 Known.knownNot(fcNegInf); 4829 } else if (ExpRange.isAllNonNegative()) { 4830 // If we know the power is >= 0, can't introduce subnormal or zero 4831 if (KnownSrc.isKnownNeverPosSubnormal()) 4832 Known.knownNot(fcPosSubnormal); 4833 if (KnownSrc.isKnownNeverNegSubnormal()) 4834 Known.knownNot(fcNegSubnormal); 4835 if (F && KnownSrc.isKnownNeverLogicalPosZero(*F, II->getType())) 4836 Known.knownNot(fcPosZero); 4837 if (F && KnownSrc.isKnownNeverLogicalNegZero(*F, II->getType())) 4838 Known.knownNot(fcNegZero); 4839 } 4840 4841 break; 4842 } 4843 case Intrinsic::arithmetic_fence: { 4844 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, 4845 Known, Depth + 1, Q); 4846 break; 4847 } 4848 case Intrinsic::experimental_constrained_sitofp: 4849 case Intrinsic::experimental_constrained_uitofp: 4850 // Cannot produce nan 4851 Known.knownNot(fcNan); 4852 4853 // sitofp and uitofp turn into +0.0 for zero. 4854 Known.knownNot(fcNegZero); 4855 4856 // Integers cannot be subnormal 4857 Known.knownNot(fcSubnormal); 4858 4859 if (IID == Intrinsic::experimental_constrained_uitofp) 4860 Known.signBitMustBeZero(); 4861 4862 // TODO: Copy inf handling from instructions 4863 break; 4864 default: 4865 break; 4866 } 4867 4868 break; 4869 } 4870 case Instruction::FAdd: 4871 case Instruction::FSub: { 4872 KnownFPClass KnownLHS, KnownRHS; 4873 bool WantNegative = 4874 Op->getOpcode() == Instruction::FAdd && 4875 (InterestedClasses & KnownFPClass::OrderedLessThanZeroMask) != fcNone; 4876 bool WantNaN = (InterestedClasses & fcNan) != fcNone; 4877 bool WantNegZero = (InterestedClasses & fcNegZero) != fcNone; 4878 4879 if (!WantNaN && !WantNegative && !WantNegZero) 4880 break; 4881 4882 FPClassTest InterestedSrcs = InterestedClasses; 4883 if (WantNegative) 4884 InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask; 4885 if (InterestedClasses & fcNan) 4886 InterestedSrcs |= fcInf; 4887 computeKnownFPClass(Op->getOperand(1), DemandedElts, InterestedSrcs, 4888 KnownRHS, Depth + 1, Q); 4889 4890 if ((WantNaN && KnownRHS.isKnownNeverNaN()) || 4891 (WantNegative && KnownRHS.cannotBeOrderedLessThanZero()) || 4892 WantNegZero || Opc == Instruction::FSub) { 4893 4894 // RHS is canonically cheaper to compute. Skip inspecting the LHS if 4895 // there's no point. 4896 computeKnownFPClass(Op->getOperand(0), DemandedElts, InterestedSrcs, 4897 KnownLHS, Depth + 1, Q); 4898 // Adding positive and negative infinity produces NaN. 4899 // TODO: Check sign of infinities. 4900 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() && 4901 (KnownLHS.isKnownNeverInfinity() || KnownRHS.isKnownNeverInfinity())) 4902 Known.knownNot(fcNan); 4903 4904 // FIXME: Context function should always be passed in separately 4905 const Function *F = cast<Instruction>(Op)->getFunction(); 4906 4907 if (Op->getOpcode() == Instruction::FAdd) { 4908 if (KnownLHS.cannotBeOrderedLessThanZero() && 4909 KnownRHS.cannotBeOrderedLessThanZero()) 4910 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); 4911 if (!F) 4912 break; 4913 4914 // (fadd x, 0.0) is guaranteed to return +0.0, not -0.0. 4915 if ((KnownLHS.isKnownNeverLogicalNegZero(*F, Op->getType()) || 4916 KnownRHS.isKnownNeverLogicalNegZero(*F, Op->getType())) && 4917 // Make sure output negative denormal can't flush to -0 4918 outputDenormalIsIEEEOrPosZero(*F, Op->getType())) 4919 Known.knownNot(fcNegZero); 4920 } else { 4921 if (!F) 4922 break; 4923 4924 // Only fsub -0, +0 can return -0 4925 if ((KnownLHS.isKnownNeverLogicalNegZero(*F, Op->getType()) || 4926 KnownRHS.isKnownNeverLogicalPosZero(*F, Op->getType())) && 4927 // Make sure output negative denormal can't flush to -0 4928 outputDenormalIsIEEEOrPosZero(*F, Op->getType())) 4929 Known.knownNot(fcNegZero); 4930 } 4931 } 4932 4933 break; 4934 } 4935 case Instruction::FMul: { 4936 // X * X is always non-negative or a NaN. 4937 if (Op->getOperand(0) == Op->getOperand(1)) 4938 Known.knownNot(fcNegative); 4939 4940 if ((InterestedClasses & fcNan) != fcNan) 4941 break; 4942 4943 // fcSubnormal is only needed in case of DAZ. 4944 const FPClassTest NeedForNan = fcNan | fcInf | fcZero | fcSubnormal; 4945 4946 KnownFPClass KnownLHS, KnownRHS; 4947 computeKnownFPClass(Op->getOperand(1), DemandedElts, NeedForNan, KnownRHS, 4948 Depth + 1, Q); 4949 if (!KnownRHS.isKnownNeverNaN()) 4950 break; 4951 4952 computeKnownFPClass(Op->getOperand(0), DemandedElts, NeedForNan, KnownLHS, 4953 Depth + 1, Q); 4954 if (!KnownLHS.isKnownNeverNaN()) 4955 break; 4956 4957 // If 0 * +/-inf produces NaN. 4958 if (KnownLHS.isKnownNeverInfinity() && KnownRHS.isKnownNeverInfinity()) { 4959 Known.knownNot(fcNan); 4960 break; 4961 } 4962 4963 const Function *F = cast<Instruction>(Op)->getFunction(); 4964 if (!F) 4965 break; 4966 4967 if ((KnownRHS.isKnownNeverInfinity() || 4968 KnownLHS.isKnownNeverLogicalZero(*F, Op->getType())) && 4969 (KnownLHS.isKnownNeverInfinity() || 4970 KnownRHS.isKnownNeverLogicalZero(*F, Op->getType()))) 4971 Known.knownNot(fcNan); 4972 4973 break; 4974 } 4975 case Instruction::FDiv: 4976 case Instruction::FRem: { 4977 if (Op->getOperand(0) == Op->getOperand(1)) { 4978 // TODO: Could filter out snan if we inspect the operand 4979 if (Op->getOpcode() == Instruction::FDiv) { 4980 // X / X is always exactly 1.0 or a NaN. 4981 Known.KnownFPClasses = fcNan | fcPosNormal; 4982 } else { 4983 // X % X is always exactly [+-]0.0 or a NaN. 4984 Known.KnownFPClasses = fcNan | fcZero; 4985 } 4986 4987 break; 4988 } 4989 4990 const bool WantNan = (InterestedClasses & fcNan) != fcNone; 4991 const bool WantNegative = (InterestedClasses & fcNegative) != fcNone; 4992 const bool WantPositive = 4993 Opc == Instruction::FRem && (InterestedClasses & fcPositive) != fcNone; 4994 if (!WantNan && !WantNegative && !WantPositive) 4995 break; 4996 4997 KnownFPClass KnownLHS, KnownRHS; 4998 4999 computeKnownFPClass(Op->getOperand(1), DemandedElts, 5000 fcNan | fcInf | fcZero | fcNegative, KnownRHS, 5001 Depth + 1, Q); 5002 5003 bool KnowSomethingUseful = 5004 KnownRHS.isKnownNeverNaN() || KnownRHS.isKnownNever(fcNegative); 5005 5006 if (KnowSomethingUseful || WantPositive) { 5007 const FPClassTest InterestedLHS = 5008 WantPositive ? fcAllFlags 5009 : fcNan | fcInf | fcZero | fcSubnormal | fcNegative; 5010 5011 computeKnownFPClass(Op->getOperand(0), DemandedElts, 5012 InterestedClasses & InterestedLHS, KnownLHS, 5013 Depth + 1, Q); 5014 } 5015 5016 const Function *F = cast<Instruction>(Op)->getFunction(); 5017 5018 if (Op->getOpcode() == Instruction::FDiv) { 5019 // Only 0/0, Inf/Inf produce NaN. 5020 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() && 5021 (KnownLHS.isKnownNeverInfinity() || 5022 KnownRHS.isKnownNeverInfinity()) && 5023 ((F && KnownLHS.isKnownNeverLogicalZero(*F, Op->getType())) || 5024 (F && KnownRHS.isKnownNeverLogicalZero(*F, Op->getType())))) { 5025 Known.knownNot(fcNan); 5026 } 5027 5028 // X / -0.0 is -Inf (or NaN). 5029 // +X / +X is +X 5030 if (KnownLHS.isKnownNever(fcNegative) && KnownRHS.isKnownNever(fcNegative)) 5031 Known.knownNot(fcNegative); 5032 } else { 5033 // Inf REM x and x REM 0 produce NaN. 5034 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() && 5035 KnownLHS.isKnownNeverInfinity() && F && 5036 KnownRHS.isKnownNeverLogicalZero(*F, Op->getType())) { 5037 Known.knownNot(fcNan); 5038 } 5039 5040 // The sign for frem is the same as the first operand. 5041 if (KnownLHS.cannotBeOrderedLessThanZero()) 5042 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); 5043 if (KnownLHS.cannotBeOrderedGreaterThanZero()) 5044 Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask); 5045 5046 // See if we can be more aggressive about the sign of 0. 5047 if (KnownLHS.isKnownNever(fcNegative)) 5048 Known.knownNot(fcNegative); 5049 if (KnownLHS.isKnownNever(fcPositive)) 5050 Known.knownNot(fcPositive); 5051 } 5052 5053 break; 5054 } 5055 case Instruction::FPExt: { 5056 // Infinity, nan and zero propagate from source. 5057 computeKnownFPClass(Op->getOperand(0), DemandedElts, InterestedClasses, 5058 Known, Depth + 1, Q); 5059 5060 const fltSemantics &DstTy = 5061 Op->getType()->getScalarType()->getFltSemantics(); 5062 const fltSemantics &SrcTy = 5063 Op->getOperand(0)->getType()->getScalarType()->getFltSemantics(); 5064 5065 // All subnormal inputs should be in the normal range in the result type. 5066 if (APFloat::isRepresentableAsNormalIn(SrcTy, DstTy)) 5067 Known.knownNot(fcSubnormal); 5068 5069 // Sign bit of a nan isn't guaranteed. 5070 if (!Known.isKnownNeverNaN()) 5071 Known.SignBit = std::nullopt; 5072 break; 5073 } 5074 case Instruction::FPTrunc: { 5075 computeKnownFPClassForFPTrunc(Op, DemandedElts, InterestedClasses, Known, 5076 Depth, Q); 5077 break; 5078 } 5079 case Instruction::SIToFP: 5080 case Instruction::UIToFP: { 5081 // Cannot produce nan 5082 Known.knownNot(fcNan); 5083 5084 // Integers cannot be subnormal 5085 Known.knownNot(fcSubnormal); 5086 5087 // sitofp and uitofp turn into +0.0 for zero. 5088 Known.knownNot(fcNegZero); 5089 if (Op->getOpcode() == Instruction::UIToFP) 5090 Known.signBitMustBeZero(); 5091 5092 if (InterestedClasses & fcInf) { 5093 // Get width of largest magnitude integer (remove a bit if signed). 5094 // This still works for a signed minimum value because the largest FP 5095 // value is scaled by some fraction close to 2.0 (1.0 + 0.xxxx). 5096 int IntSize = Op->getOperand(0)->getType()->getScalarSizeInBits(); 5097 if (Op->getOpcode() == Instruction::SIToFP) 5098 --IntSize; 5099 5100 // If the exponent of the largest finite FP value can hold the largest 5101 // integer, the result of the cast must be finite. 5102 Type *FPTy = Op->getType()->getScalarType(); 5103 if (ilogb(APFloat::getLargest(FPTy->getFltSemantics())) >= IntSize) 5104 Known.knownNot(fcInf); 5105 } 5106 5107 break; 5108 } 5109 case Instruction::ExtractElement: { 5110 // Look through extract element. If the index is non-constant or 5111 // out-of-range demand all elements, otherwise just the extracted element. 5112 const Value *Vec = Op->getOperand(0); 5113 const Value *Idx = Op->getOperand(1); 5114 auto *CIdx = dyn_cast<ConstantInt>(Idx); 5115 5116 if (auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType())) { 5117 unsigned NumElts = VecTy->getNumElements(); 5118 APInt DemandedVecElts = APInt::getAllOnes(NumElts); 5119 if (CIdx && CIdx->getValue().ult(NumElts)) 5120 DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue()); 5121 return computeKnownFPClass(Vec, DemandedVecElts, InterestedClasses, Known, 5122 Depth + 1, Q); 5123 } 5124 5125 break; 5126 } 5127 case Instruction::InsertElement: { 5128 if (isa<ScalableVectorType>(Op->getType())) 5129 return; 5130 5131 const Value *Vec = Op->getOperand(0); 5132 const Value *Elt = Op->getOperand(1); 5133 auto *CIdx = dyn_cast<ConstantInt>(Op->getOperand(2)); 5134 // Early out if the index is non-constant or out-of-range. 5135 unsigned NumElts = DemandedElts.getBitWidth(); 5136 if (!CIdx || CIdx->getValue().uge(NumElts)) 5137 return; 5138 5139 unsigned EltIdx = CIdx->getZExtValue(); 5140 // Do we demand the inserted element? 5141 if (DemandedElts[EltIdx]) { 5142 computeKnownFPClass(Elt, Known, InterestedClasses, Depth + 1, Q); 5143 // If we don't know any bits, early out. 5144 if (Known.isUnknown()) 5145 break; 5146 } else { 5147 Known.KnownFPClasses = fcNone; 5148 } 5149 5150 // We don't need the base vector element that has been inserted. 5151 APInt DemandedVecElts = DemandedElts; 5152 DemandedVecElts.clearBit(EltIdx); 5153 if (!!DemandedVecElts) { 5154 KnownFPClass Known2; 5155 computeKnownFPClass(Vec, DemandedVecElts, InterestedClasses, Known2, 5156 Depth + 1, Q); 5157 Known |= Known2; 5158 } 5159 5160 break; 5161 } 5162 case Instruction::ShuffleVector: { 5163 // For undef elements, we don't know anything about the common state of 5164 // the shuffle result. 5165 APInt DemandedLHS, DemandedRHS; 5166 auto *Shuf = dyn_cast<ShuffleVectorInst>(Op); 5167 if (!Shuf || !getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS)) 5168 return; 5169 5170 if (!!DemandedLHS) { 5171 const Value *LHS = Shuf->getOperand(0); 5172 computeKnownFPClass(LHS, DemandedLHS, InterestedClasses, Known, 5173 Depth + 1, Q); 5174 5175 // If we don't know any bits, early out. 5176 if (Known.isUnknown()) 5177 break; 5178 } else { 5179 Known.KnownFPClasses = fcNone; 5180 } 5181 5182 if (!!DemandedRHS) { 5183 KnownFPClass Known2; 5184 const Value *RHS = Shuf->getOperand(1); 5185 computeKnownFPClass(RHS, DemandedRHS, InterestedClasses, Known2, 5186 Depth + 1, Q); 5187 Known |= Known2; 5188 } 5189 5190 break; 5191 } 5192 case Instruction::ExtractValue: { 5193 const ExtractValueInst *Extract = cast<ExtractValueInst>(Op); 5194 ArrayRef<unsigned> Indices = Extract->getIndices(); 5195 const Value *Src = Extract->getAggregateOperand(); 5196 if (isa<StructType>(Src->getType()) && Indices.size() == 1 && 5197 Indices[0] == 0) { 5198 if (const auto *II = dyn_cast<IntrinsicInst>(Src)) { 5199 switch (II->getIntrinsicID()) { 5200 case Intrinsic::frexp: { 5201 Known.knownNot(fcSubnormal); 5202 5203 KnownFPClass KnownSrc; 5204 computeKnownFPClass(II->getArgOperand(0), DemandedElts, 5205 InterestedClasses, KnownSrc, Depth + 1, Q); 5206 5207 const Function *F = cast<Instruction>(Op)->getFunction(); 5208 5209 if (KnownSrc.isKnownNever(fcNegative)) 5210 Known.knownNot(fcNegative); 5211 else { 5212 if (F && KnownSrc.isKnownNeverLogicalNegZero(*F, Op->getType())) 5213 Known.knownNot(fcNegZero); 5214 if (KnownSrc.isKnownNever(fcNegInf)) 5215 Known.knownNot(fcNegInf); 5216 } 5217 5218 if (KnownSrc.isKnownNever(fcPositive)) 5219 Known.knownNot(fcPositive); 5220 else { 5221 if (F && KnownSrc.isKnownNeverLogicalPosZero(*F, Op->getType())) 5222 Known.knownNot(fcPosZero); 5223 if (KnownSrc.isKnownNever(fcPosInf)) 5224 Known.knownNot(fcPosInf); 5225 } 5226 5227 Known.propagateNaN(KnownSrc); 5228 return; 5229 } 5230 default: 5231 break; 5232 } 5233 } 5234 } 5235 5236 computeKnownFPClass(Src, DemandedElts, InterestedClasses, Known, Depth + 1, 5237 Q); 5238 break; 5239 } 5240 case Instruction::PHI: { 5241 const PHINode *P = cast<PHINode>(Op); 5242 // Unreachable blocks may have zero-operand PHI nodes. 5243 if (P->getNumIncomingValues() == 0) 5244 break; 5245 5246 // Otherwise take the unions of the known bit sets of the operands, 5247 // taking conservative care to avoid excessive recursion. 5248 const unsigned PhiRecursionLimit = MaxAnalysisRecursionDepth - 2; 5249 5250 if (Depth < PhiRecursionLimit) { 5251 // Skip if every incoming value references to ourself. 5252 if (isa_and_nonnull<UndefValue>(P->hasConstantValue())) 5253 break; 5254 5255 bool First = true; 5256 5257 for (Value *IncValue : P->incoming_values()) { 5258 // Skip direct self references. 5259 if (IncValue == P) 5260 continue; 5261 5262 KnownFPClass KnownSrc; 5263 // Recurse, but cap the recursion to two levels, because we don't want 5264 // to waste time spinning around in loops. We need at least depth 2 to 5265 // detect known sign bits. 5266 computeKnownFPClass(IncValue, DemandedElts, InterestedClasses, KnownSrc, 5267 PhiRecursionLimit, Q); 5268 5269 if (First) { 5270 Known = KnownSrc; 5271 First = false; 5272 } else { 5273 Known |= KnownSrc; 5274 } 5275 5276 if (Known.KnownFPClasses == fcAllFlags) 5277 break; 5278 } 5279 } 5280 5281 break; 5282 } 5283 default: 5284 break; 5285 } 5286 } 5287 5288 KnownFPClass llvm::computeKnownFPClass( 5289 const Value *V, const APInt &DemandedElts, const DataLayout &DL, 5290 FPClassTest InterestedClasses, unsigned Depth, const TargetLibraryInfo *TLI, 5291 AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, 5292 bool UseInstrInfo) { 5293 KnownFPClass KnownClasses; 5294 ::computeKnownFPClass( 5295 V, DemandedElts, InterestedClasses, KnownClasses, Depth, 5296 SimplifyQuery(DL, TLI, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); 5297 return KnownClasses; 5298 } 5299 5300 KnownFPClass llvm::computeKnownFPClass( 5301 const Value *V, const DataLayout &DL, FPClassTest InterestedClasses, 5302 unsigned Depth, const TargetLibraryInfo *TLI, AssumptionCache *AC, 5303 const Instruction *CxtI, const DominatorTree *DT, bool UseInstrInfo) { 5304 KnownFPClass Known; 5305 ::computeKnownFPClass( 5306 V, Known, InterestedClasses, Depth, 5307 SimplifyQuery(DL, TLI, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); 5308 return Known; 5309 } 5310 5311 Value *llvm::isBytewiseValue(Value *V, const DataLayout &DL) { 5312 5313 // All byte-wide stores are splatable, even of arbitrary variables. 5314 if (V->getType()->isIntegerTy(8)) 5315 return V; 5316 5317 LLVMContext &Ctx = V->getContext(); 5318 5319 // Undef don't care. 5320 auto *UndefInt8 = UndefValue::get(Type::getInt8Ty(Ctx)); 5321 if (isa<UndefValue>(V)) 5322 return UndefInt8; 5323 5324 // Return Undef for zero-sized type. 5325 if (DL.getTypeStoreSize(V->getType()).isZero()) 5326 return UndefInt8; 5327 5328 Constant *C = dyn_cast<Constant>(V); 5329 if (!C) { 5330 // Conceptually, we could handle things like: 5331 // %a = zext i8 %X to i16 5332 // %b = shl i16 %a, 8 5333 // %c = or i16 %a, %b 5334 // but until there is an example that actually needs this, it doesn't seem 5335 // worth worrying about. 5336 return nullptr; 5337 } 5338 5339 // Handle 'null' ConstantArrayZero etc. 5340 if (C->isNullValue()) 5341 return Constant::getNullValue(Type::getInt8Ty(Ctx)); 5342 5343 // Constant floating-point values can be handled as integer values if the 5344 // corresponding integer value is "byteable". An important case is 0.0. 5345 if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) { 5346 Type *Ty = nullptr; 5347 if (CFP->getType()->isHalfTy()) 5348 Ty = Type::getInt16Ty(Ctx); 5349 else if (CFP->getType()->isFloatTy()) 5350 Ty = Type::getInt32Ty(Ctx); 5351 else if (CFP->getType()->isDoubleTy()) 5352 Ty = Type::getInt64Ty(Ctx); 5353 // Don't handle long double formats, which have strange constraints. 5354 return Ty ? isBytewiseValue(ConstantExpr::getBitCast(CFP, Ty), DL) 5355 : nullptr; 5356 } 5357 5358 // We can handle constant integers that are multiple of 8 bits. 5359 if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) { 5360 if (CI->getBitWidth() % 8 == 0) { 5361 assert(CI->getBitWidth() > 8 && "8 bits should be handled above!"); 5362 if (!CI->getValue().isSplat(8)) 5363 return nullptr; 5364 return ConstantInt::get(Ctx, CI->getValue().trunc(8)); 5365 } 5366 } 5367 5368 if (auto *CE = dyn_cast<ConstantExpr>(C)) { 5369 if (CE->getOpcode() == Instruction::IntToPtr) { 5370 if (auto *PtrTy = dyn_cast<PointerType>(CE->getType())) { 5371 unsigned BitWidth = DL.getPointerSizeInBits(PtrTy->getAddressSpace()); 5372 if (Constant *Op = ConstantFoldIntegerCast( 5373 CE->getOperand(0), Type::getIntNTy(Ctx, BitWidth), false, DL)) 5374 return isBytewiseValue(Op, DL); 5375 } 5376 } 5377 } 5378 5379 auto Merge = [&](Value *LHS, Value *RHS) -> Value * { 5380 if (LHS == RHS) 5381 return LHS; 5382 if (!LHS || !RHS) 5383 return nullptr; 5384 if (LHS == UndefInt8) 5385 return RHS; 5386 if (RHS == UndefInt8) 5387 return LHS; 5388 return nullptr; 5389 }; 5390 5391 if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(C)) { 5392 Value *Val = UndefInt8; 5393 for (unsigned I = 0, E = CA->getNumElements(); I != E; ++I) 5394 if (!(Val = Merge(Val, isBytewiseValue(CA->getElementAsConstant(I), DL)))) 5395 return nullptr; 5396 return Val; 5397 } 5398 5399 if (isa<ConstantAggregate>(C)) { 5400 Value *Val = UndefInt8; 5401 for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I) 5402 if (!(Val = Merge(Val, isBytewiseValue(C->getOperand(I), DL)))) 5403 return nullptr; 5404 return Val; 5405 } 5406 5407 // Don't try to handle the handful of other constants. 5408 return nullptr; 5409 } 5410 5411 // This is the recursive version of BuildSubAggregate. It takes a few different 5412 // arguments. Idxs is the index within the nested struct From that we are 5413 // looking at now (which is of type IndexedType). IdxSkip is the number of 5414 // indices from Idxs that should be left out when inserting into the resulting 5415 // struct. To is the result struct built so far, new insertvalue instructions 5416 // build on that. 5417 static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType, 5418 SmallVectorImpl<unsigned> &Idxs, 5419 unsigned IdxSkip, 5420 Instruction *InsertBefore) { 5421 StructType *STy = dyn_cast<StructType>(IndexedType); 5422 if (STy) { 5423 // Save the original To argument so we can modify it 5424 Value *OrigTo = To; 5425 // General case, the type indexed by Idxs is a struct 5426 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 5427 // Process each struct element recursively 5428 Idxs.push_back(i); 5429 Value *PrevTo = To; 5430 To = BuildSubAggregate(From, To, STy->getElementType(i), Idxs, IdxSkip, 5431 InsertBefore); 5432 Idxs.pop_back(); 5433 if (!To) { 5434 // Couldn't find any inserted value for this index? Cleanup 5435 while (PrevTo != OrigTo) { 5436 InsertValueInst* Del = cast<InsertValueInst>(PrevTo); 5437 PrevTo = Del->getAggregateOperand(); 5438 Del->eraseFromParent(); 5439 } 5440 // Stop processing elements 5441 break; 5442 } 5443 } 5444 // If we successfully found a value for each of our subaggregates 5445 if (To) 5446 return To; 5447 } 5448 // Base case, the type indexed by SourceIdxs is not a struct, or not all of 5449 // the struct's elements had a value that was inserted directly. In the latter 5450 // case, perhaps we can't determine each of the subelements individually, but 5451 // we might be able to find the complete struct somewhere. 5452 5453 // Find the value that is at that particular spot 5454 Value *V = FindInsertedValue(From, Idxs); 5455 5456 if (!V) 5457 return nullptr; 5458 5459 // Insert the value in the new (sub) aggregate 5460 return InsertValueInst::Create(To, V, ArrayRef(Idxs).slice(IdxSkip), "tmp", 5461 InsertBefore); 5462 } 5463 5464 // This helper takes a nested struct and extracts a part of it (which is again a 5465 // struct) into a new value. For example, given the struct: 5466 // { a, { b, { c, d }, e } } 5467 // and the indices "1, 1" this returns 5468 // { c, d }. 5469 // 5470 // It does this by inserting an insertvalue for each element in the resulting 5471 // struct, as opposed to just inserting a single struct. This will only work if 5472 // each of the elements of the substruct are known (ie, inserted into From by an 5473 // insertvalue instruction somewhere). 5474 // 5475 // All inserted insertvalue instructions are inserted before InsertBefore 5476 static Value *BuildSubAggregate(Value *From, ArrayRef<unsigned> idx_range, 5477 Instruction *InsertBefore) { 5478 assert(InsertBefore && "Must have someplace to insert!"); 5479 Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(), 5480 idx_range); 5481 Value *To = PoisonValue::get(IndexedType); 5482 SmallVector<unsigned, 10> Idxs(idx_range.begin(), idx_range.end()); 5483 unsigned IdxSkip = Idxs.size(); 5484 5485 return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore); 5486 } 5487 5488 /// Given an aggregate and a sequence of indices, see if the scalar value 5489 /// indexed is already around as a register, for example if it was inserted 5490 /// directly into the aggregate. 5491 /// 5492 /// If InsertBefore is not null, this function will duplicate (modified) 5493 /// insertvalues when a part of a nested struct is extracted. 5494 Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range, 5495 Instruction *InsertBefore) { 5496 // Nothing to index? Just return V then (this is useful at the end of our 5497 // recursion). 5498 if (idx_range.empty()) 5499 return V; 5500 // We have indices, so V should have an indexable type. 5501 assert((V->getType()->isStructTy() || V->getType()->isArrayTy()) && 5502 "Not looking at a struct or array?"); 5503 assert(ExtractValueInst::getIndexedType(V->getType(), idx_range) && 5504 "Invalid indices for type?"); 5505 5506 if (Constant *C = dyn_cast<Constant>(V)) { 5507 C = C->getAggregateElement(idx_range[0]); 5508 if (!C) return nullptr; 5509 return FindInsertedValue(C, idx_range.slice(1), InsertBefore); 5510 } 5511 5512 if (InsertValueInst *I = dyn_cast<InsertValueInst>(V)) { 5513 // Loop the indices for the insertvalue instruction in parallel with the 5514 // requested indices 5515 const unsigned *req_idx = idx_range.begin(); 5516 for (const unsigned *i = I->idx_begin(), *e = I->idx_end(); 5517 i != e; ++i, ++req_idx) { 5518 if (req_idx == idx_range.end()) { 5519 // We can't handle this without inserting insertvalues 5520 if (!InsertBefore) 5521 return nullptr; 5522 5523 // The requested index identifies a part of a nested aggregate. Handle 5524 // this specially. For example, 5525 // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0 5526 // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1 5527 // %C = extractvalue {i32, { i32, i32 } } %B, 1 5528 // This can be changed into 5529 // %A = insertvalue {i32, i32 } undef, i32 10, 0 5530 // %C = insertvalue {i32, i32 } %A, i32 11, 1 5531 // which allows the unused 0,0 element from the nested struct to be 5532 // removed. 5533 return BuildSubAggregate(V, ArrayRef(idx_range.begin(), req_idx), 5534 InsertBefore); 5535 } 5536 5537 // This insert value inserts something else than what we are looking for. 5538 // See if the (aggregate) value inserted into has the value we are 5539 // looking for, then. 5540 if (*req_idx != *i) 5541 return FindInsertedValue(I->getAggregateOperand(), idx_range, 5542 InsertBefore); 5543 } 5544 // If we end up here, the indices of the insertvalue match with those 5545 // requested (though possibly only partially). Now we recursively look at 5546 // the inserted value, passing any remaining indices. 5547 return FindInsertedValue(I->getInsertedValueOperand(), 5548 ArrayRef(req_idx, idx_range.end()), InsertBefore); 5549 } 5550 5551 if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) { 5552 // If we're extracting a value from an aggregate that was extracted from 5553 // something else, we can extract from that something else directly instead. 5554 // However, we will need to chain I's indices with the requested indices. 5555 5556 // Calculate the number of indices required 5557 unsigned size = I->getNumIndices() + idx_range.size(); 5558 // Allocate some space to put the new indices in 5559 SmallVector<unsigned, 5> Idxs; 5560 Idxs.reserve(size); 5561 // Add indices from the extract value instruction 5562 Idxs.append(I->idx_begin(), I->idx_end()); 5563 5564 // Add requested indices 5565 Idxs.append(idx_range.begin(), idx_range.end()); 5566 5567 assert(Idxs.size() == size 5568 && "Number of indices added not correct?"); 5569 5570 return FindInsertedValue(I->getAggregateOperand(), Idxs, InsertBefore); 5571 } 5572 // Otherwise, we don't know (such as, extracting from a function return value 5573 // or load instruction) 5574 return nullptr; 5575 } 5576 5577 bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP, 5578 unsigned CharSize) { 5579 // Make sure the GEP has exactly three arguments. 5580 if (GEP->getNumOperands() != 3) 5581 return false; 5582 5583 // Make sure the index-ee is a pointer to array of \p CharSize integers. 5584 // CharSize. 5585 ArrayType *AT = dyn_cast<ArrayType>(GEP->getSourceElementType()); 5586 if (!AT || !AT->getElementType()->isIntegerTy(CharSize)) 5587 return false; 5588 5589 // Check to make sure that the first operand of the GEP is an integer and 5590 // has value 0 so that we are sure we're indexing into the initializer. 5591 const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1)); 5592 if (!FirstIdx || !FirstIdx->isZero()) 5593 return false; 5594 5595 return true; 5596 } 5597 5598 // If V refers to an initialized global constant, set Slice either to 5599 // its initializer if the size of its elements equals ElementSize, or, 5600 // for ElementSize == 8, to its representation as an array of unsiged 5601 // char. Return true on success. 5602 // Offset is in the unit "nr of ElementSize sized elements". 5603 bool llvm::getConstantDataArrayInfo(const Value *V, 5604 ConstantDataArraySlice &Slice, 5605 unsigned ElementSize, uint64_t Offset) { 5606 assert(V && "V should not be null."); 5607 assert((ElementSize % 8) == 0 && 5608 "ElementSize expected to be a multiple of the size of a byte."); 5609 unsigned ElementSizeInBytes = ElementSize / 8; 5610 5611 // Drill down into the pointer expression V, ignoring any intervening 5612 // casts, and determine the identity of the object it references along 5613 // with the cumulative byte offset into it. 5614 const GlobalVariable *GV = 5615 dyn_cast<GlobalVariable>(getUnderlyingObject(V)); 5616 if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) 5617 // Fail if V is not based on constant global object. 5618 return false; 5619 5620 const DataLayout &DL = GV->getParent()->getDataLayout(); 5621 APInt Off(DL.getIndexTypeSizeInBits(V->getType()), 0); 5622 5623 if (GV != V->stripAndAccumulateConstantOffsets(DL, Off, 5624 /*AllowNonInbounds*/ true)) 5625 // Fail if a constant offset could not be determined. 5626 return false; 5627 5628 uint64_t StartIdx = Off.getLimitedValue(); 5629 if (StartIdx == UINT64_MAX) 5630 // Fail if the constant offset is excessive. 5631 return false; 5632 5633 // Off/StartIdx is in the unit of bytes. So we need to convert to number of 5634 // elements. Simply bail out if that isn't possible. 5635 if ((StartIdx % ElementSizeInBytes) != 0) 5636 return false; 5637 5638 Offset += StartIdx / ElementSizeInBytes; 5639 ConstantDataArray *Array = nullptr; 5640 ArrayType *ArrayTy = nullptr; 5641 5642 if (GV->getInitializer()->isNullValue()) { 5643 Type *GVTy = GV->getValueType(); 5644 uint64_t SizeInBytes = DL.getTypeStoreSize(GVTy).getFixedValue(); 5645 uint64_t Length = SizeInBytes / ElementSizeInBytes; 5646 5647 Slice.Array = nullptr; 5648 Slice.Offset = 0; 5649 // Return an empty Slice for undersized constants to let callers 5650 // transform even undefined library calls into simpler, well-defined 5651 // expressions. This is preferable to making the calls although it 5652 // prevents sanitizers from detecting such calls. 5653 Slice.Length = Length < Offset ? 0 : Length - Offset; 5654 return true; 5655 } 5656 5657 auto *Init = const_cast<Constant *>(GV->getInitializer()); 5658 if (auto *ArrayInit = dyn_cast<ConstantDataArray>(Init)) { 5659 Type *InitElTy = ArrayInit->getElementType(); 5660 if (InitElTy->isIntegerTy(ElementSize)) { 5661 // If Init is an initializer for an array of the expected type 5662 // and size, use it as is. 5663 Array = ArrayInit; 5664 ArrayTy = ArrayInit->getType(); 5665 } 5666 } 5667 5668 if (!Array) { 5669 if (ElementSize != 8) 5670 // TODO: Handle conversions to larger integral types. 5671 return false; 5672 5673 // Otherwise extract the portion of the initializer starting 5674 // at Offset as an array of bytes, and reset Offset. 5675 Init = ReadByteArrayFromGlobal(GV, Offset); 5676 if (!Init) 5677 return false; 5678 5679 Offset = 0; 5680 Array = dyn_cast<ConstantDataArray>(Init); 5681 ArrayTy = dyn_cast<ArrayType>(Init->getType()); 5682 } 5683 5684 uint64_t NumElts = ArrayTy->getArrayNumElements(); 5685 if (Offset > NumElts) 5686 return false; 5687 5688 Slice.Array = Array; 5689 Slice.Offset = Offset; 5690 Slice.Length = NumElts - Offset; 5691 return true; 5692 } 5693 5694 /// Extract bytes from the initializer of the constant array V, which need 5695 /// not be a nul-terminated string. On success, store the bytes in Str and 5696 /// return true. When TrimAtNul is set, Str will contain only the bytes up 5697 /// to but not including the first nul. Return false on failure. 5698 bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, 5699 bool TrimAtNul) { 5700 ConstantDataArraySlice Slice; 5701 if (!getConstantDataArrayInfo(V, Slice, 8)) 5702 return false; 5703 5704 if (Slice.Array == nullptr) { 5705 if (TrimAtNul) { 5706 // Return a nul-terminated string even for an empty Slice. This is 5707 // safe because all existing SimplifyLibcalls callers require string 5708 // arguments and the behavior of the functions they fold is undefined 5709 // otherwise. Folding the calls this way is preferable to making 5710 // the undefined library calls, even though it prevents sanitizers 5711 // from reporting such calls. 5712 Str = StringRef(); 5713 return true; 5714 } 5715 if (Slice.Length == 1) { 5716 Str = StringRef("", 1); 5717 return true; 5718 } 5719 // We cannot instantiate a StringRef as we do not have an appropriate string 5720 // of 0s at hand. 5721 return false; 5722 } 5723 5724 // Start out with the entire array in the StringRef. 5725 Str = Slice.Array->getAsString(); 5726 // Skip over 'offset' bytes. 5727 Str = Str.substr(Slice.Offset); 5728 5729 if (TrimAtNul) { 5730 // Trim off the \0 and anything after it. If the array is not nul 5731 // terminated, we just return the whole end of string. The client may know 5732 // some other way that the string is length-bound. 5733 Str = Str.substr(0, Str.find('\0')); 5734 } 5735 return true; 5736 } 5737 5738 // These next two are very similar to the above, but also look through PHI 5739 // nodes. 5740 // TODO: See if we can integrate these two together. 5741 5742 /// If we can compute the length of the string pointed to by 5743 /// the specified pointer, return 'len+1'. If we can't, return 0. 5744 static uint64_t GetStringLengthH(const Value *V, 5745 SmallPtrSetImpl<const PHINode*> &PHIs, 5746 unsigned CharSize) { 5747 // Look through noop bitcast instructions. 5748 V = V->stripPointerCasts(); 5749 5750 // If this is a PHI node, there are two cases: either we have already seen it 5751 // or we haven't. 5752 if (const PHINode *PN = dyn_cast<PHINode>(V)) { 5753 if (!PHIs.insert(PN).second) 5754 return ~0ULL; // already in the set. 5755 5756 // If it was new, see if all the input strings are the same length. 5757 uint64_t LenSoFar = ~0ULL; 5758 for (Value *IncValue : PN->incoming_values()) { 5759 uint64_t Len = GetStringLengthH(IncValue, PHIs, CharSize); 5760 if (Len == 0) return 0; // Unknown length -> unknown. 5761 5762 if (Len == ~0ULL) continue; 5763 5764 if (Len != LenSoFar && LenSoFar != ~0ULL) 5765 return 0; // Disagree -> unknown. 5766 LenSoFar = Len; 5767 } 5768 5769 // Success, all agree. 5770 return LenSoFar; 5771 } 5772 5773 // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y) 5774 if (const SelectInst *SI = dyn_cast<SelectInst>(V)) { 5775 uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs, CharSize); 5776 if (Len1 == 0) return 0; 5777 uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs, CharSize); 5778 if (Len2 == 0) return 0; 5779 if (Len1 == ~0ULL) return Len2; 5780 if (Len2 == ~0ULL) return Len1; 5781 if (Len1 != Len2) return 0; 5782 return Len1; 5783 } 5784 5785 // Otherwise, see if we can read the string. 5786 ConstantDataArraySlice Slice; 5787 if (!getConstantDataArrayInfo(V, Slice, CharSize)) 5788 return 0; 5789 5790 if (Slice.Array == nullptr) 5791 // Zeroinitializer (including an empty one). 5792 return 1; 5793 5794 // Search for the first nul character. Return a conservative result even 5795 // when there is no nul. This is safe since otherwise the string function 5796 // being folded such as strlen is undefined, and can be preferable to 5797 // making the undefined library call. 5798 unsigned NullIndex = 0; 5799 for (unsigned E = Slice.Length; NullIndex < E; ++NullIndex) { 5800 if (Slice.Array->getElementAsInteger(Slice.Offset + NullIndex) == 0) 5801 break; 5802 } 5803 5804 return NullIndex + 1; 5805 } 5806 5807 /// If we can compute the length of the string pointed to by 5808 /// the specified pointer, return 'len+1'. If we can't, return 0. 5809 uint64_t llvm::GetStringLength(const Value *V, unsigned CharSize) { 5810 if (!V->getType()->isPointerTy()) 5811 return 0; 5812 5813 SmallPtrSet<const PHINode*, 32> PHIs; 5814 uint64_t Len = GetStringLengthH(V, PHIs, CharSize); 5815 // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return 5816 // an empty string as a length. 5817 return Len == ~0ULL ? 1 : Len; 5818 } 5819 5820 const Value * 5821 llvm::getArgumentAliasingToReturnedPointer(const CallBase *Call, 5822 bool MustPreserveNullness) { 5823 assert(Call && 5824 "getArgumentAliasingToReturnedPointer only works on nonnull calls"); 5825 if (const Value *RV = Call->getReturnedArgOperand()) 5826 return RV; 5827 // This can be used only as a aliasing property. 5828 if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( 5829 Call, MustPreserveNullness)) 5830 return Call->getArgOperand(0); 5831 return nullptr; 5832 } 5833 5834 bool llvm::isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( 5835 const CallBase *Call, bool MustPreserveNullness) { 5836 switch (Call->getIntrinsicID()) { 5837 case Intrinsic::launder_invariant_group: 5838 case Intrinsic::strip_invariant_group: 5839 case Intrinsic::aarch64_irg: 5840 case Intrinsic::aarch64_tagp: 5841 // The amdgcn_make_buffer_rsrc function does not alter the address of the 5842 // input pointer (and thus preserve null-ness for the purposes of escape 5843 // analysis, which is where the MustPreserveNullness flag comes in to play). 5844 // However, it will not necessarily map ptr addrspace(N) null to ptr 5845 // addrspace(8) null, aka the "null descriptor", which has "all loads return 5846 // 0, all stores are dropped" semantics. Given the context of this intrinsic 5847 // list, no one should be relying on such a strict interpretation of 5848 // MustPreserveNullness (and, at time of writing, they are not), but we 5849 // document this fact out of an abundance of caution. 5850 case Intrinsic::amdgcn_make_buffer_rsrc: 5851 return true; 5852 case Intrinsic::ptrmask: 5853 return !MustPreserveNullness; 5854 default: 5855 return false; 5856 } 5857 } 5858 5859 /// \p PN defines a loop-variant pointer to an object. Check if the 5860 /// previous iteration of the loop was referring to the same object as \p PN. 5861 static bool isSameUnderlyingObjectInLoop(const PHINode *PN, 5862 const LoopInfo *LI) { 5863 // Find the loop-defined value. 5864 Loop *L = LI->getLoopFor(PN->getParent()); 5865 if (PN->getNumIncomingValues() != 2) 5866 return true; 5867 5868 // Find the value from previous iteration. 5869 auto *PrevValue = dyn_cast<Instruction>(PN->getIncomingValue(0)); 5870 if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L) 5871 PrevValue = dyn_cast<Instruction>(PN->getIncomingValue(1)); 5872 if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L) 5873 return true; 5874 5875 // If a new pointer is loaded in the loop, the pointer references a different 5876 // object in every iteration. E.g.: 5877 // for (i) 5878 // int *p = a[i]; 5879 // ... 5880 if (auto *Load = dyn_cast<LoadInst>(PrevValue)) 5881 if (!L->isLoopInvariant(Load->getPointerOperand())) 5882 return false; 5883 return true; 5884 } 5885 5886 const Value *llvm::getUnderlyingObject(const Value *V, unsigned MaxLookup) { 5887 if (!V->getType()->isPointerTy()) 5888 return V; 5889 for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) { 5890 if (auto *GEP = dyn_cast<GEPOperator>(V)) { 5891 V = GEP->getPointerOperand(); 5892 } else if (Operator::getOpcode(V) == Instruction::BitCast || 5893 Operator::getOpcode(V) == Instruction::AddrSpaceCast) { 5894 V = cast<Operator>(V)->getOperand(0); 5895 if (!V->getType()->isPointerTy()) 5896 return V; 5897 } else if (auto *GA = dyn_cast<GlobalAlias>(V)) { 5898 if (GA->isInterposable()) 5899 return V; 5900 V = GA->getAliasee(); 5901 } else { 5902 if (auto *PHI = dyn_cast<PHINode>(V)) { 5903 // Look through single-arg phi nodes created by LCSSA. 5904 if (PHI->getNumIncomingValues() == 1) { 5905 V = PHI->getIncomingValue(0); 5906 continue; 5907 } 5908 } else if (auto *Call = dyn_cast<CallBase>(V)) { 5909 // CaptureTracking can know about special capturing properties of some 5910 // intrinsics like launder.invariant.group, that can't be expressed with 5911 // the attributes, but have properties like returning aliasing pointer. 5912 // Because some analysis may assume that nocaptured pointer is not 5913 // returned from some special intrinsic (because function would have to 5914 // be marked with returns attribute), it is crucial to use this function 5915 // because it should be in sync with CaptureTracking. Not using it may 5916 // cause weird miscompilations where 2 aliasing pointers are assumed to 5917 // noalias. 5918 if (auto *RP = getArgumentAliasingToReturnedPointer(Call, false)) { 5919 V = RP; 5920 continue; 5921 } 5922 } 5923 5924 return V; 5925 } 5926 assert(V->getType()->isPointerTy() && "Unexpected operand type!"); 5927 } 5928 return V; 5929 } 5930 5931 void llvm::getUnderlyingObjects(const Value *V, 5932 SmallVectorImpl<const Value *> &Objects, 5933 LoopInfo *LI, unsigned MaxLookup) { 5934 SmallPtrSet<const Value *, 4> Visited; 5935 SmallVector<const Value *, 4> Worklist; 5936 Worklist.push_back(V); 5937 do { 5938 const Value *P = Worklist.pop_back_val(); 5939 P = getUnderlyingObject(P, MaxLookup); 5940 5941 if (!Visited.insert(P).second) 5942 continue; 5943 5944 if (auto *SI = dyn_cast<SelectInst>(P)) { 5945 Worklist.push_back(SI->getTrueValue()); 5946 Worklist.push_back(SI->getFalseValue()); 5947 continue; 5948 } 5949 5950 if (auto *PN = dyn_cast<PHINode>(P)) { 5951 // If this PHI changes the underlying object in every iteration of the 5952 // loop, don't look through it. Consider: 5953 // int **A; 5954 // for (i) { 5955 // Prev = Curr; // Prev = PHI (Prev_0, Curr) 5956 // Curr = A[i]; 5957 // *Prev, *Curr; 5958 // 5959 // Prev is tracking Curr one iteration behind so they refer to different 5960 // underlying objects. 5961 if (!LI || !LI->isLoopHeader(PN->getParent()) || 5962 isSameUnderlyingObjectInLoop(PN, LI)) 5963 append_range(Worklist, PN->incoming_values()); 5964 continue; 5965 } 5966 5967 Objects.push_back(P); 5968 } while (!Worklist.empty()); 5969 } 5970 5971 /// This is the function that does the work of looking through basic 5972 /// ptrtoint+arithmetic+inttoptr sequences. 5973 static const Value *getUnderlyingObjectFromInt(const Value *V) { 5974 do { 5975 if (const Operator *U = dyn_cast<Operator>(V)) { 5976 // If we find a ptrtoint, we can transfer control back to the 5977 // regular getUnderlyingObjectFromInt. 5978 if (U->getOpcode() == Instruction::PtrToInt) 5979 return U->getOperand(0); 5980 // If we find an add of a constant, a multiplied value, or a phi, it's 5981 // likely that the other operand will lead us to the base 5982 // object. We don't have to worry about the case where the 5983 // object address is somehow being computed by the multiply, 5984 // because our callers only care when the result is an 5985 // identifiable object. 5986 if (U->getOpcode() != Instruction::Add || 5987 (!isa<ConstantInt>(U->getOperand(1)) && 5988 Operator::getOpcode(U->getOperand(1)) != Instruction::Mul && 5989 !isa<PHINode>(U->getOperand(1)))) 5990 return V; 5991 V = U->getOperand(0); 5992 } else { 5993 return V; 5994 } 5995 assert(V->getType()->isIntegerTy() && "Unexpected operand type!"); 5996 } while (true); 5997 } 5998 5999 /// This is a wrapper around getUnderlyingObjects and adds support for basic 6000 /// ptrtoint+arithmetic+inttoptr sequences. 6001 /// It returns false if unidentified object is found in getUnderlyingObjects. 6002 bool llvm::getUnderlyingObjectsForCodeGen(const Value *V, 6003 SmallVectorImpl<Value *> &Objects) { 6004 SmallPtrSet<const Value *, 16> Visited; 6005 SmallVector<const Value *, 4> Working(1, V); 6006 do { 6007 V = Working.pop_back_val(); 6008 6009 SmallVector<const Value *, 4> Objs; 6010 getUnderlyingObjects(V, Objs); 6011 6012 for (const Value *V : Objs) { 6013 if (!Visited.insert(V).second) 6014 continue; 6015 if (Operator::getOpcode(V) == Instruction::IntToPtr) { 6016 const Value *O = 6017 getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0)); 6018 if (O->getType()->isPointerTy()) { 6019 Working.push_back(O); 6020 continue; 6021 } 6022 } 6023 // If getUnderlyingObjects fails to find an identifiable object, 6024 // getUnderlyingObjectsForCodeGen also fails for safety. 6025 if (!isIdentifiedObject(V)) { 6026 Objects.clear(); 6027 return false; 6028 } 6029 Objects.push_back(const_cast<Value *>(V)); 6030 } 6031 } while (!Working.empty()); 6032 return true; 6033 } 6034 6035 AllocaInst *llvm::findAllocaForValue(Value *V, bool OffsetZero) { 6036 AllocaInst *Result = nullptr; 6037 SmallPtrSet<Value *, 4> Visited; 6038 SmallVector<Value *, 4> Worklist; 6039 6040 auto AddWork = [&](Value *V) { 6041 if (Visited.insert(V).second) 6042 Worklist.push_back(V); 6043 }; 6044 6045 AddWork(V); 6046 do { 6047 V = Worklist.pop_back_val(); 6048 assert(Visited.count(V)); 6049 6050 if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { 6051 if (Result && Result != AI) 6052 return nullptr; 6053 Result = AI; 6054 } else if (CastInst *CI = dyn_cast<CastInst>(V)) { 6055 AddWork(CI->getOperand(0)); 6056 } else if (PHINode *PN = dyn_cast<PHINode>(V)) { 6057 for (Value *IncValue : PN->incoming_values()) 6058 AddWork(IncValue); 6059 } else if (auto *SI = dyn_cast<SelectInst>(V)) { 6060 AddWork(SI->getTrueValue()); 6061 AddWork(SI->getFalseValue()); 6062 } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) { 6063 if (OffsetZero && !GEP->hasAllZeroIndices()) 6064 return nullptr; 6065 AddWork(GEP->getPointerOperand()); 6066 } else if (CallBase *CB = dyn_cast<CallBase>(V)) { 6067 Value *Returned = CB->getReturnedArgOperand(); 6068 if (Returned) 6069 AddWork(Returned); 6070 else 6071 return nullptr; 6072 } else { 6073 return nullptr; 6074 } 6075 } while (!Worklist.empty()); 6076 6077 return Result; 6078 } 6079 6080 static bool onlyUsedByLifetimeMarkersOrDroppableInstsHelper( 6081 const Value *V, bool AllowLifetime, bool AllowDroppable) { 6082 for (const User *U : V->users()) { 6083 const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U); 6084 if (!II) 6085 return false; 6086 6087 if (AllowLifetime && II->isLifetimeStartOrEnd()) 6088 continue; 6089 6090 if (AllowDroppable && II->isDroppable()) 6091 continue; 6092 6093 return false; 6094 } 6095 return true; 6096 } 6097 6098 bool llvm::onlyUsedByLifetimeMarkers(const Value *V) { 6099 return onlyUsedByLifetimeMarkersOrDroppableInstsHelper( 6100 V, /* AllowLifetime */ true, /* AllowDroppable */ false); 6101 } 6102 bool llvm::onlyUsedByLifetimeMarkersOrDroppableInsts(const Value *V) { 6103 return onlyUsedByLifetimeMarkersOrDroppableInstsHelper( 6104 V, /* AllowLifetime */ true, /* AllowDroppable */ true); 6105 } 6106 6107 bool llvm::mustSuppressSpeculation(const LoadInst &LI) { 6108 if (!LI.isUnordered()) 6109 return true; 6110 const Function &F = *LI.getFunction(); 6111 // Speculative load may create a race that did not exist in the source. 6112 return F.hasFnAttribute(Attribute::SanitizeThread) || 6113 // Speculative load may load data from dirty regions. 6114 F.hasFnAttribute(Attribute::SanitizeAddress) || 6115 F.hasFnAttribute(Attribute::SanitizeHWAddress); 6116 } 6117 6118 bool llvm::isSafeToSpeculativelyExecute(const Instruction *Inst, 6119 const Instruction *CtxI, 6120 AssumptionCache *AC, 6121 const DominatorTree *DT, 6122 const TargetLibraryInfo *TLI) { 6123 return isSafeToSpeculativelyExecuteWithOpcode(Inst->getOpcode(), Inst, CtxI, 6124 AC, DT, TLI); 6125 } 6126 6127 bool llvm::isSafeToSpeculativelyExecuteWithOpcode( 6128 unsigned Opcode, const Instruction *Inst, const Instruction *CtxI, 6129 AssumptionCache *AC, const DominatorTree *DT, 6130 const TargetLibraryInfo *TLI) { 6131 #ifndef NDEBUG 6132 if (Inst->getOpcode() != Opcode) { 6133 // Check that the operands are actually compatible with the Opcode override. 6134 auto hasEqualReturnAndLeadingOperandTypes = 6135 [](const Instruction *Inst, unsigned NumLeadingOperands) { 6136 if (Inst->getNumOperands() < NumLeadingOperands) 6137 return false; 6138 const Type *ExpectedType = Inst->getType(); 6139 for (unsigned ItOp = 0; ItOp < NumLeadingOperands; ++ItOp) 6140 if (Inst->getOperand(ItOp)->getType() != ExpectedType) 6141 return false; 6142 return true; 6143 }; 6144 assert(!Instruction::isBinaryOp(Opcode) || 6145 hasEqualReturnAndLeadingOperandTypes(Inst, 2)); 6146 assert(!Instruction::isUnaryOp(Opcode) || 6147 hasEqualReturnAndLeadingOperandTypes(Inst, 1)); 6148 } 6149 #endif 6150 6151 switch (Opcode) { 6152 default: 6153 return true; 6154 case Instruction::UDiv: 6155 case Instruction::URem: { 6156 // x / y is undefined if y == 0. 6157 const APInt *V; 6158 if (match(Inst->getOperand(1), m_APInt(V))) 6159 return *V != 0; 6160 return false; 6161 } 6162 case Instruction::SDiv: 6163 case Instruction::SRem: { 6164 // x / y is undefined if y == 0 or x == INT_MIN and y == -1 6165 const APInt *Numerator, *Denominator; 6166 if (!match(Inst->getOperand(1), m_APInt(Denominator))) 6167 return false; 6168 // We cannot hoist this division if the denominator is 0. 6169 if (*Denominator == 0) 6170 return false; 6171 // It's safe to hoist if the denominator is not 0 or -1. 6172 if (!Denominator->isAllOnes()) 6173 return true; 6174 // At this point we know that the denominator is -1. It is safe to hoist as 6175 // long we know that the numerator is not INT_MIN. 6176 if (match(Inst->getOperand(0), m_APInt(Numerator))) 6177 return !Numerator->isMinSignedValue(); 6178 // The numerator *might* be MinSignedValue. 6179 return false; 6180 } 6181 case Instruction::Load: { 6182 const LoadInst *LI = dyn_cast<LoadInst>(Inst); 6183 if (!LI) 6184 return false; 6185 if (mustSuppressSpeculation(*LI)) 6186 return false; 6187 const DataLayout &DL = LI->getModule()->getDataLayout(); 6188 return isDereferenceableAndAlignedPointer(LI->getPointerOperand(), 6189 LI->getType(), LI->getAlign(), DL, 6190 CtxI, AC, DT, TLI); 6191 } 6192 case Instruction::Call: { 6193 auto *CI = dyn_cast<const CallInst>(Inst); 6194 if (!CI) 6195 return false; 6196 const Function *Callee = CI->getCalledFunction(); 6197 6198 // The called function could have undefined behavior or side-effects, even 6199 // if marked readnone nounwind. 6200 return Callee && Callee->isSpeculatable(); 6201 } 6202 case Instruction::VAArg: 6203 case Instruction::Alloca: 6204 case Instruction::Invoke: 6205 case Instruction::CallBr: 6206 case Instruction::PHI: 6207 case Instruction::Store: 6208 case Instruction::Ret: 6209 case Instruction::Br: 6210 case Instruction::IndirectBr: 6211 case Instruction::Switch: 6212 case Instruction::Unreachable: 6213 case Instruction::Fence: 6214 case Instruction::AtomicRMW: 6215 case Instruction::AtomicCmpXchg: 6216 case Instruction::LandingPad: 6217 case Instruction::Resume: 6218 case Instruction::CatchSwitch: 6219 case Instruction::CatchPad: 6220 case Instruction::CatchRet: 6221 case Instruction::CleanupPad: 6222 case Instruction::CleanupRet: 6223 return false; // Misc instructions which have effects 6224 } 6225 } 6226 6227 bool llvm::mayHaveNonDefUseDependency(const Instruction &I) { 6228 if (I.mayReadOrWriteMemory()) 6229 // Memory dependency possible 6230 return true; 6231 if (!isSafeToSpeculativelyExecute(&I)) 6232 // Can't move above a maythrow call or infinite loop. Or if an 6233 // inalloca alloca, above a stacksave call. 6234 return true; 6235 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 6236 // 1) Can't reorder two inf-loop calls, even if readonly 6237 // 2) Also can't reorder an inf-loop call below a instruction which isn't 6238 // safe to speculative execute. (Inverse of above) 6239 return true; 6240 return false; 6241 } 6242 6243 /// Convert ConstantRange OverflowResult into ValueTracking OverflowResult. 6244 static OverflowResult mapOverflowResult(ConstantRange::OverflowResult OR) { 6245 switch (OR) { 6246 case ConstantRange::OverflowResult::MayOverflow: 6247 return OverflowResult::MayOverflow; 6248 case ConstantRange::OverflowResult::AlwaysOverflowsLow: 6249 return OverflowResult::AlwaysOverflowsLow; 6250 case ConstantRange::OverflowResult::AlwaysOverflowsHigh: 6251 return OverflowResult::AlwaysOverflowsHigh; 6252 case ConstantRange::OverflowResult::NeverOverflows: 6253 return OverflowResult::NeverOverflows; 6254 } 6255 llvm_unreachable("Unknown OverflowResult"); 6256 } 6257 6258 /// Combine constant ranges from computeConstantRange() and computeKnownBits(). 6259 ConstantRange 6260 llvm::computeConstantRangeIncludingKnownBits(const WithCache<const Value *> &V, 6261 bool ForSigned, 6262 const SimplifyQuery &SQ) { 6263 ConstantRange CR1 = 6264 ConstantRange::fromKnownBits(V.getKnownBits(SQ), ForSigned); 6265 ConstantRange CR2 = computeConstantRange(V, ForSigned, SQ.IIQ.UseInstrInfo); 6266 ConstantRange::PreferredRangeType RangeType = 6267 ForSigned ? ConstantRange::Signed : ConstantRange::Unsigned; 6268 return CR1.intersectWith(CR2, RangeType); 6269 } 6270 6271 OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS, 6272 const Value *RHS, 6273 const SimplifyQuery &SQ) { 6274 KnownBits LHSKnown = computeKnownBits(LHS, /*Depth=*/0, SQ); 6275 KnownBits RHSKnown = computeKnownBits(RHS, /*Depth=*/0, SQ); 6276 ConstantRange LHSRange = ConstantRange::fromKnownBits(LHSKnown, false); 6277 ConstantRange RHSRange = ConstantRange::fromKnownBits(RHSKnown, false); 6278 return mapOverflowResult(LHSRange.unsignedMulMayOverflow(RHSRange)); 6279 } 6280 6281 OverflowResult llvm::computeOverflowForSignedMul(const Value *LHS, 6282 const Value *RHS, 6283 const SimplifyQuery &SQ) { 6284 // Multiplying n * m significant bits yields a result of n + m significant 6285 // bits. If the total number of significant bits does not exceed the 6286 // result bit width (minus 1), there is no overflow. 6287 // This means if we have enough leading sign bits in the operands 6288 // we can guarantee that the result does not overflow. 6289 // Ref: "Hacker's Delight" by Henry Warren 6290 unsigned BitWidth = LHS->getType()->getScalarSizeInBits(); 6291 6292 // Note that underestimating the number of sign bits gives a more 6293 // conservative answer. 6294 unsigned SignBits = 6295 ::ComputeNumSignBits(LHS, 0, SQ) + ::ComputeNumSignBits(RHS, 0, SQ); 6296 6297 // First handle the easy case: if we have enough sign bits there's 6298 // definitely no overflow. 6299 if (SignBits > BitWidth + 1) 6300 return OverflowResult::NeverOverflows; 6301 6302 // There are two ambiguous cases where there can be no overflow: 6303 // SignBits == BitWidth + 1 and 6304 // SignBits == BitWidth 6305 // The second case is difficult to check, therefore we only handle the 6306 // first case. 6307 if (SignBits == BitWidth + 1) { 6308 // It overflows only when both arguments are negative and the true 6309 // product is exactly the minimum negative number. 6310 // E.g. mul i16 with 17 sign bits: 0xff00 * 0xff80 = 0x8000 6311 // For simplicity we just check if at least one side is not negative. 6312 KnownBits LHSKnown = computeKnownBits(LHS, /*Depth=*/0, SQ); 6313 KnownBits RHSKnown = computeKnownBits(RHS, /*Depth=*/0, SQ); 6314 if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative()) 6315 return OverflowResult::NeverOverflows; 6316 } 6317 return OverflowResult::MayOverflow; 6318 } 6319 6320 OverflowResult 6321 llvm::computeOverflowForUnsignedAdd(const WithCache<const Value *> &LHS, 6322 const WithCache<const Value *> &RHS, 6323 const SimplifyQuery &SQ) { 6324 ConstantRange LHSRange = 6325 computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/false, SQ); 6326 ConstantRange RHSRange = 6327 computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/false, SQ); 6328 return mapOverflowResult(LHSRange.unsignedAddMayOverflow(RHSRange)); 6329 } 6330 6331 static OverflowResult 6332 computeOverflowForSignedAdd(const WithCache<const Value *> &LHS, 6333 const WithCache<const Value *> &RHS, 6334 const AddOperator *Add, const SimplifyQuery &SQ) { 6335 if (Add && Add->hasNoSignedWrap()) { 6336 return OverflowResult::NeverOverflows; 6337 } 6338 6339 // If LHS and RHS each have at least two sign bits, the addition will look 6340 // like 6341 // 6342 // XX..... + 6343 // YY..... 6344 // 6345 // If the carry into the most significant position is 0, X and Y can't both 6346 // be 1 and therefore the carry out of the addition is also 0. 6347 // 6348 // If the carry into the most significant position is 1, X and Y can't both 6349 // be 0 and therefore the carry out of the addition is also 1. 6350 // 6351 // Since the carry into the most significant position is always equal to 6352 // the carry out of the addition, there is no signed overflow. 6353 if (::ComputeNumSignBits(LHS, 0, SQ) > 1 && 6354 ::ComputeNumSignBits(RHS, 0, SQ) > 1) 6355 return OverflowResult::NeverOverflows; 6356 6357 ConstantRange LHSRange = 6358 computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/true, SQ); 6359 ConstantRange RHSRange = 6360 computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/true, SQ); 6361 OverflowResult OR = 6362 mapOverflowResult(LHSRange.signedAddMayOverflow(RHSRange)); 6363 if (OR != OverflowResult::MayOverflow) 6364 return OR; 6365 6366 // The remaining code needs Add to be available. Early returns if not so. 6367 if (!Add) 6368 return OverflowResult::MayOverflow; 6369 6370 // If the sign of Add is the same as at least one of the operands, this add 6371 // CANNOT overflow. If this can be determined from the known bits of the 6372 // operands the above signedAddMayOverflow() check will have already done so. 6373 // The only other way to improve on the known bits is from an assumption, so 6374 // call computeKnownBitsFromContext() directly. 6375 bool LHSOrRHSKnownNonNegative = 6376 (LHSRange.isAllNonNegative() || RHSRange.isAllNonNegative()); 6377 bool LHSOrRHSKnownNegative = 6378 (LHSRange.isAllNegative() || RHSRange.isAllNegative()); 6379 if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) { 6380 KnownBits AddKnown(LHSRange.getBitWidth()); 6381 computeKnownBitsFromContext(Add, AddKnown, /*Depth=*/0, SQ); 6382 if ((AddKnown.isNonNegative() && LHSOrRHSKnownNonNegative) || 6383 (AddKnown.isNegative() && LHSOrRHSKnownNegative)) 6384 return OverflowResult::NeverOverflows; 6385 } 6386 6387 return OverflowResult::MayOverflow; 6388 } 6389 6390 OverflowResult llvm::computeOverflowForUnsignedSub(const Value *LHS, 6391 const Value *RHS, 6392 const SimplifyQuery &SQ) { 6393 // X - (X % ?) 6394 // The remainder of a value can't have greater magnitude than itself, 6395 // so the subtraction can't overflow. 6396 6397 // X - (X -nuw ?) 6398 // In the minimal case, this would simplify to "?", so there's no subtract 6399 // at all. But if this analysis is used to peek through casts, for example, 6400 // then determining no-overflow may allow other transforms. 6401 6402 // TODO: There are other patterns like this. 6403 // See simplifyICmpWithBinOpOnLHS() for candidates. 6404 if (match(RHS, m_URem(m_Specific(LHS), m_Value())) || 6405 match(RHS, m_NUWSub(m_Specific(LHS), m_Value()))) 6406 if (isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT)) 6407 return OverflowResult::NeverOverflows; 6408 6409 // Checking for conditions implied by dominating conditions may be expensive. 6410 // Limit it to usub_with_overflow calls for now. 6411 if (match(SQ.CxtI, 6412 m_Intrinsic<Intrinsic::usub_with_overflow>(m_Value(), m_Value()))) 6413 if (auto C = isImpliedByDomCondition(CmpInst::ICMP_UGE, LHS, RHS, SQ.CxtI, 6414 SQ.DL)) { 6415 if (*C) 6416 return OverflowResult::NeverOverflows; 6417 return OverflowResult::AlwaysOverflowsLow; 6418 } 6419 ConstantRange LHSRange = 6420 computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/false, SQ); 6421 ConstantRange RHSRange = 6422 computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/false, SQ); 6423 return mapOverflowResult(LHSRange.unsignedSubMayOverflow(RHSRange)); 6424 } 6425 6426 OverflowResult llvm::computeOverflowForSignedSub(const Value *LHS, 6427 const Value *RHS, 6428 const SimplifyQuery &SQ) { 6429 // X - (X % ?) 6430 // The remainder of a value can't have greater magnitude than itself, 6431 // so the subtraction can't overflow. 6432 6433 // X - (X -nsw ?) 6434 // In the minimal case, this would simplify to "?", so there's no subtract 6435 // at all. But if this analysis is used to peek through casts, for example, 6436 // then determining no-overflow may allow other transforms. 6437 if (match(RHS, m_SRem(m_Specific(LHS), m_Value())) || 6438 match(RHS, m_NSWSub(m_Specific(LHS), m_Value()))) 6439 if (isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT)) 6440 return OverflowResult::NeverOverflows; 6441 6442 // If LHS and RHS each have at least two sign bits, the subtraction 6443 // cannot overflow. 6444 if (::ComputeNumSignBits(LHS, 0, SQ) > 1 && 6445 ::ComputeNumSignBits(RHS, 0, SQ) > 1) 6446 return OverflowResult::NeverOverflows; 6447 6448 ConstantRange LHSRange = 6449 computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/true, SQ); 6450 ConstantRange RHSRange = 6451 computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/true, SQ); 6452 return mapOverflowResult(LHSRange.signedSubMayOverflow(RHSRange)); 6453 } 6454 6455 bool llvm::isOverflowIntrinsicNoWrap(const WithOverflowInst *WO, 6456 const DominatorTree &DT) { 6457 SmallVector<const BranchInst *, 2> GuardingBranches; 6458 SmallVector<const ExtractValueInst *, 2> Results; 6459 6460 for (const User *U : WO->users()) { 6461 if (const auto *EVI = dyn_cast<ExtractValueInst>(U)) { 6462 assert(EVI->getNumIndices() == 1 && "Obvious from CI's type"); 6463 6464 if (EVI->getIndices()[0] == 0) 6465 Results.push_back(EVI); 6466 else { 6467 assert(EVI->getIndices()[0] == 1 && "Obvious from CI's type"); 6468 6469 for (const auto *U : EVI->users()) 6470 if (const auto *B = dyn_cast<BranchInst>(U)) { 6471 assert(B->isConditional() && "How else is it using an i1?"); 6472 GuardingBranches.push_back(B); 6473 } 6474 } 6475 } else { 6476 // We are using the aggregate directly in a way we don't want to analyze 6477 // here (storing it to a global, say). 6478 return false; 6479 } 6480 } 6481 6482 auto AllUsesGuardedByBranch = [&](const BranchInst *BI) { 6483 BasicBlockEdge NoWrapEdge(BI->getParent(), BI->getSuccessor(1)); 6484 if (!NoWrapEdge.isSingleEdge()) 6485 return false; 6486 6487 // Check if all users of the add are provably no-wrap. 6488 for (const auto *Result : Results) { 6489 // If the extractvalue itself is not executed on overflow, the we don't 6490 // need to check each use separately, since domination is transitive. 6491 if (DT.dominates(NoWrapEdge, Result->getParent())) 6492 continue; 6493 6494 for (const auto &RU : Result->uses()) 6495 if (!DT.dominates(NoWrapEdge, RU)) 6496 return false; 6497 } 6498 6499 return true; 6500 }; 6501 6502 return llvm::any_of(GuardingBranches, AllUsesGuardedByBranch); 6503 } 6504 6505 /// Shifts return poison if shiftwidth is larger than the bitwidth. 6506 static bool shiftAmountKnownInRange(const Value *ShiftAmount) { 6507 auto *C = dyn_cast<Constant>(ShiftAmount); 6508 if (!C) 6509 return false; 6510 6511 // Shifts return poison if shiftwidth is larger than the bitwidth. 6512 SmallVector<const Constant *, 4> ShiftAmounts; 6513 if (auto *FVTy = dyn_cast<FixedVectorType>(C->getType())) { 6514 unsigned NumElts = FVTy->getNumElements(); 6515 for (unsigned i = 0; i < NumElts; ++i) 6516 ShiftAmounts.push_back(C->getAggregateElement(i)); 6517 } else if (isa<ScalableVectorType>(C->getType())) 6518 return false; // Can't tell, just return false to be safe 6519 else 6520 ShiftAmounts.push_back(C); 6521 6522 bool Safe = llvm::all_of(ShiftAmounts, [](const Constant *C) { 6523 auto *CI = dyn_cast_or_null<ConstantInt>(C); 6524 return CI && CI->getValue().ult(C->getType()->getIntegerBitWidth()); 6525 }); 6526 6527 return Safe; 6528 } 6529 6530 enum class UndefPoisonKind { 6531 PoisonOnly = (1 << 0), 6532 UndefOnly = (1 << 1), 6533 UndefOrPoison = PoisonOnly | UndefOnly, 6534 }; 6535 6536 static bool includesPoison(UndefPoisonKind Kind) { 6537 return (unsigned(Kind) & unsigned(UndefPoisonKind::PoisonOnly)) != 0; 6538 } 6539 6540 static bool includesUndef(UndefPoisonKind Kind) { 6541 return (unsigned(Kind) & unsigned(UndefPoisonKind::UndefOnly)) != 0; 6542 } 6543 6544 static bool canCreateUndefOrPoison(const Operator *Op, UndefPoisonKind Kind, 6545 bool ConsiderFlagsAndMetadata) { 6546 6547 if (ConsiderFlagsAndMetadata && includesPoison(Kind) && 6548 Op->hasPoisonGeneratingFlagsOrMetadata()) 6549 return true; 6550 6551 unsigned Opcode = Op->getOpcode(); 6552 6553 // Check whether opcode is a poison/undef-generating operation 6554 switch (Opcode) { 6555 case Instruction::Shl: 6556 case Instruction::AShr: 6557 case Instruction::LShr: 6558 return includesPoison(Kind) && !shiftAmountKnownInRange(Op->getOperand(1)); 6559 case Instruction::FPToSI: 6560 case Instruction::FPToUI: 6561 // fptosi/ui yields poison if the resulting value does not fit in the 6562 // destination type. 6563 return true; 6564 case Instruction::Call: 6565 if (auto *II = dyn_cast<IntrinsicInst>(Op)) { 6566 switch (II->getIntrinsicID()) { 6567 // TODO: Add more intrinsics. 6568 case Intrinsic::ctlz: 6569 case Intrinsic::cttz: 6570 case Intrinsic::abs: 6571 if (cast<ConstantInt>(II->getArgOperand(1))->isNullValue()) 6572 return false; 6573 break; 6574 case Intrinsic::ctpop: 6575 case Intrinsic::bswap: 6576 case Intrinsic::bitreverse: 6577 case Intrinsic::fshl: 6578 case Intrinsic::fshr: 6579 case Intrinsic::smax: 6580 case Intrinsic::smin: 6581 case Intrinsic::umax: 6582 case Intrinsic::umin: 6583 case Intrinsic::ptrmask: 6584 case Intrinsic::fptoui_sat: 6585 case Intrinsic::fptosi_sat: 6586 case Intrinsic::sadd_with_overflow: 6587 case Intrinsic::ssub_with_overflow: 6588 case Intrinsic::smul_with_overflow: 6589 case Intrinsic::uadd_with_overflow: 6590 case Intrinsic::usub_with_overflow: 6591 case Intrinsic::umul_with_overflow: 6592 case Intrinsic::sadd_sat: 6593 case Intrinsic::uadd_sat: 6594 case Intrinsic::ssub_sat: 6595 case Intrinsic::usub_sat: 6596 return false; 6597 case Intrinsic::sshl_sat: 6598 case Intrinsic::ushl_sat: 6599 return includesPoison(Kind) && 6600 !shiftAmountKnownInRange(II->getArgOperand(1)); 6601 case Intrinsic::fma: 6602 case Intrinsic::fmuladd: 6603 case Intrinsic::sqrt: 6604 case Intrinsic::powi: 6605 case Intrinsic::sin: 6606 case Intrinsic::cos: 6607 case Intrinsic::pow: 6608 case Intrinsic::log: 6609 case Intrinsic::log10: 6610 case Intrinsic::log2: 6611 case Intrinsic::exp: 6612 case Intrinsic::exp2: 6613 case Intrinsic::exp10: 6614 case Intrinsic::fabs: 6615 case Intrinsic::copysign: 6616 case Intrinsic::floor: 6617 case Intrinsic::ceil: 6618 case Intrinsic::trunc: 6619 case Intrinsic::rint: 6620 case Intrinsic::nearbyint: 6621 case Intrinsic::round: 6622 case Intrinsic::roundeven: 6623 case Intrinsic::fptrunc_round: 6624 case Intrinsic::canonicalize: 6625 case Intrinsic::arithmetic_fence: 6626 case Intrinsic::minnum: 6627 case Intrinsic::maxnum: 6628 case Intrinsic::minimum: 6629 case Intrinsic::maximum: 6630 case Intrinsic::is_fpclass: 6631 case Intrinsic::ldexp: 6632 case Intrinsic::frexp: 6633 return false; 6634 case Intrinsic::lround: 6635 case Intrinsic::llround: 6636 case Intrinsic::lrint: 6637 case Intrinsic::llrint: 6638 // If the value doesn't fit an unspecified value is returned (but this 6639 // is not poison). 6640 return false; 6641 } 6642 } 6643 [[fallthrough]]; 6644 case Instruction::CallBr: 6645 case Instruction::Invoke: { 6646 const auto *CB = cast<CallBase>(Op); 6647 return !CB->hasRetAttr(Attribute::NoUndef); 6648 } 6649 case Instruction::InsertElement: 6650 case Instruction::ExtractElement: { 6651 // If index exceeds the length of the vector, it returns poison 6652 auto *VTy = cast<VectorType>(Op->getOperand(0)->getType()); 6653 unsigned IdxOp = Op->getOpcode() == Instruction::InsertElement ? 2 : 1; 6654 auto *Idx = dyn_cast<ConstantInt>(Op->getOperand(IdxOp)); 6655 if (includesPoison(Kind)) 6656 return !Idx || 6657 Idx->getValue().uge(VTy->getElementCount().getKnownMinValue()); 6658 return false; 6659 } 6660 case Instruction::ShuffleVector: { 6661 ArrayRef<int> Mask = isa<ConstantExpr>(Op) 6662 ? cast<ConstantExpr>(Op)->getShuffleMask() 6663 : cast<ShuffleVectorInst>(Op)->getShuffleMask(); 6664 return includesPoison(Kind) && is_contained(Mask, PoisonMaskElem); 6665 } 6666 case Instruction::FNeg: 6667 case Instruction::PHI: 6668 case Instruction::Select: 6669 case Instruction::URem: 6670 case Instruction::SRem: 6671 case Instruction::ExtractValue: 6672 case Instruction::InsertValue: 6673 case Instruction::Freeze: 6674 case Instruction::ICmp: 6675 case Instruction::FCmp: 6676 case Instruction::FAdd: 6677 case Instruction::FSub: 6678 case Instruction::FMul: 6679 case Instruction::FDiv: 6680 case Instruction::FRem: 6681 return false; 6682 case Instruction::GetElementPtr: 6683 // inbounds is handled above 6684 // TODO: what about inrange on constexpr? 6685 return false; 6686 default: { 6687 const auto *CE = dyn_cast<ConstantExpr>(Op); 6688 if (isa<CastInst>(Op) || (CE && CE->isCast())) 6689 return false; 6690 else if (Instruction::isBinaryOp(Opcode)) 6691 return false; 6692 // Be conservative and return true. 6693 return true; 6694 } 6695 } 6696 } 6697 6698 bool llvm::canCreateUndefOrPoison(const Operator *Op, 6699 bool ConsiderFlagsAndMetadata) { 6700 return ::canCreateUndefOrPoison(Op, UndefPoisonKind::UndefOrPoison, 6701 ConsiderFlagsAndMetadata); 6702 } 6703 6704 bool llvm::canCreatePoison(const Operator *Op, bool ConsiderFlagsAndMetadata) { 6705 return ::canCreateUndefOrPoison(Op, UndefPoisonKind::PoisonOnly, 6706 ConsiderFlagsAndMetadata); 6707 } 6708 6709 static bool directlyImpliesPoison(const Value *ValAssumedPoison, const Value *V, 6710 unsigned Depth) { 6711 if (ValAssumedPoison == V) 6712 return true; 6713 6714 const unsigned MaxDepth = 2; 6715 if (Depth >= MaxDepth) 6716 return false; 6717 6718 if (const auto *I = dyn_cast<Instruction>(V)) { 6719 if (any_of(I->operands(), [=](const Use &Op) { 6720 return propagatesPoison(Op) && 6721 directlyImpliesPoison(ValAssumedPoison, Op, Depth + 1); 6722 })) 6723 return true; 6724 6725 // V = extractvalue V0, idx 6726 // V2 = extractvalue V0, idx2 6727 // V0's elements are all poison or not. (e.g., add_with_overflow) 6728 const WithOverflowInst *II; 6729 if (match(I, m_ExtractValue(m_WithOverflowInst(II))) && 6730 (match(ValAssumedPoison, m_ExtractValue(m_Specific(II))) || 6731 llvm::is_contained(II->args(), ValAssumedPoison))) 6732 return true; 6733 } 6734 return false; 6735 } 6736 6737 static bool impliesPoison(const Value *ValAssumedPoison, const Value *V, 6738 unsigned Depth) { 6739 if (isGuaranteedNotToBePoison(ValAssumedPoison)) 6740 return true; 6741 6742 if (directlyImpliesPoison(ValAssumedPoison, V, /* Depth */ 0)) 6743 return true; 6744 6745 const unsigned MaxDepth = 2; 6746 if (Depth >= MaxDepth) 6747 return false; 6748 6749 const auto *I = dyn_cast<Instruction>(ValAssumedPoison); 6750 if (I && !canCreatePoison(cast<Operator>(I))) { 6751 return all_of(I->operands(), [=](const Value *Op) { 6752 return impliesPoison(Op, V, Depth + 1); 6753 }); 6754 } 6755 return false; 6756 } 6757 6758 bool llvm::impliesPoison(const Value *ValAssumedPoison, const Value *V) { 6759 return ::impliesPoison(ValAssumedPoison, V, /* Depth */ 0); 6760 } 6761 6762 static bool programUndefinedIfUndefOrPoison(const Value *V, bool PoisonOnly); 6763 6764 static bool isGuaranteedNotToBeUndefOrPoison( 6765 const Value *V, AssumptionCache *AC, const Instruction *CtxI, 6766 const DominatorTree *DT, unsigned Depth, UndefPoisonKind Kind) { 6767 if (Depth >= MaxAnalysisRecursionDepth) 6768 return false; 6769 6770 if (isa<MetadataAsValue>(V)) 6771 return false; 6772 6773 if (const auto *A = dyn_cast<Argument>(V)) { 6774 if (A->hasAttribute(Attribute::NoUndef) || 6775 A->hasAttribute(Attribute::Dereferenceable) || 6776 A->hasAttribute(Attribute::DereferenceableOrNull)) 6777 return true; 6778 } 6779 6780 if (auto *C = dyn_cast<Constant>(V)) { 6781 if (isa<PoisonValue>(C)) 6782 return !includesPoison(Kind); 6783 6784 if (isa<UndefValue>(C)) 6785 return !includesUndef(Kind); 6786 6787 if (isa<ConstantInt>(C) || isa<GlobalVariable>(C) || isa<ConstantFP>(V) || 6788 isa<ConstantPointerNull>(C) || isa<Function>(C)) 6789 return true; 6790 6791 if (C->getType()->isVectorTy() && !isa<ConstantExpr>(C)) 6792 return (!includesUndef(Kind) ? !C->containsPoisonElement() 6793 : !C->containsUndefOrPoisonElement()) && 6794 !C->containsConstantExpression(); 6795 } 6796 6797 // Strip cast operations from a pointer value. 6798 // Note that stripPointerCastsSameRepresentation can strip off getelementptr 6799 // inbounds with zero offset. To guarantee that the result isn't poison, the 6800 // stripped pointer is checked as it has to be pointing into an allocated 6801 // object or be null `null` to ensure `inbounds` getelement pointers with a 6802 // zero offset could not produce poison. 6803 // It can strip off addrspacecast that do not change bit representation as 6804 // well. We believe that such addrspacecast is equivalent to no-op. 6805 auto *StrippedV = V->stripPointerCastsSameRepresentation(); 6806 if (isa<AllocaInst>(StrippedV) || isa<GlobalVariable>(StrippedV) || 6807 isa<Function>(StrippedV) || isa<ConstantPointerNull>(StrippedV)) 6808 return true; 6809 6810 auto OpCheck = [&](const Value *V) { 6811 return isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth + 1, Kind); 6812 }; 6813 6814 if (auto *Opr = dyn_cast<Operator>(V)) { 6815 // If the value is a freeze instruction, then it can never 6816 // be undef or poison. 6817 if (isa<FreezeInst>(V)) 6818 return true; 6819 6820 if (const auto *CB = dyn_cast<CallBase>(V)) { 6821 if (CB->hasRetAttr(Attribute::NoUndef) || 6822 CB->hasRetAttr(Attribute::Dereferenceable) || 6823 CB->hasRetAttr(Attribute::DereferenceableOrNull)) 6824 return true; 6825 } 6826 6827 if (const auto *PN = dyn_cast<PHINode>(V)) { 6828 unsigned Num = PN->getNumIncomingValues(); 6829 bool IsWellDefined = true; 6830 for (unsigned i = 0; i < Num; ++i) { 6831 auto *TI = PN->getIncomingBlock(i)->getTerminator(); 6832 if (!isGuaranteedNotToBeUndefOrPoison(PN->getIncomingValue(i), AC, TI, 6833 DT, Depth + 1, Kind)) { 6834 IsWellDefined = false; 6835 break; 6836 } 6837 } 6838 if (IsWellDefined) 6839 return true; 6840 } else if (!::canCreateUndefOrPoison(Opr, Kind, 6841 /*ConsiderFlagsAndMetadata*/ true) && 6842 all_of(Opr->operands(), OpCheck)) 6843 return true; 6844 } 6845 6846 if (auto *I = dyn_cast<LoadInst>(V)) 6847 if (I->hasMetadata(LLVMContext::MD_noundef) || 6848 I->hasMetadata(LLVMContext::MD_dereferenceable) || 6849 I->hasMetadata(LLVMContext::MD_dereferenceable_or_null)) 6850 return true; 6851 6852 if (programUndefinedIfUndefOrPoison(V, !includesUndef(Kind))) 6853 return true; 6854 6855 // CxtI may be null or a cloned instruction. 6856 if (!CtxI || !CtxI->getParent() || !DT) 6857 return false; 6858 6859 auto *DNode = DT->getNode(CtxI->getParent()); 6860 if (!DNode) 6861 // Unreachable block 6862 return false; 6863 6864 // If V is used as a branch condition before reaching CtxI, V cannot be 6865 // undef or poison. 6866 // br V, BB1, BB2 6867 // BB1: 6868 // CtxI ; V cannot be undef or poison here 6869 auto *Dominator = DNode->getIDom(); 6870 while (Dominator) { 6871 auto *TI = Dominator->getBlock()->getTerminator(); 6872 6873 Value *Cond = nullptr; 6874 if (auto BI = dyn_cast_or_null<BranchInst>(TI)) { 6875 if (BI->isConditional()) 6876 Cond = BI->getCondition(); 6877 } else if (auto SI = dyn_cast_or_null<SwitchInst>(TI)) { 6878 Cond = SI->getCondition(); 6879 } 6880 6881 if (Cond) { 6882 if (Cond == V) 6883 return true; 6884 else if (!includesUndef(Kind) && isa<Operator>(Cond)) { 6885 // For poison, we can analyze further 6886 auto *Opr = cast<Operator>(Cond); 6887 if (any_of(Opr->operands(), 6888 [V](const Use &U) { return V == U && propagatesPoison(U); })) 6889 return true; 6890 } 6891 } 6892 6893 Dominator = Dominator->getIDom(); 6894 } 6895 6896 if (getKnowledgeValidInContext(V, {Attribute::NoUndef}, CtxI, DT, AC)) 6897 return true; 6898 6899 return false; 6900 } 6901 6902 bool llvm::isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC, 6903 const Instruction *CtxI, 6904 const DominatorTree *DT, 6905 unsigned Depth) { 6906 return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth, 6907 UndefPoisonKind::UndefOrPoison); 6908 } 6909 6910 bool llvm::isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC, 6911 const Instruction *CtxI, 6912 const DominatorTree *DT, unsigned Depth) { 6913 return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth, 6914 UndefPoisonKind::PoisonOnly); 6915 } 6916 6917 bool llvm::isGuaranteedNotToBeUndef(const Value *V, AssumptionCache *AC, 6918 const Instruction *CtxI, 6919 const DominatorTree *DT, unsigned Depth) { 6920 return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth, 6921 UndefPoisonKind::UndefOnly); 6922 } 6923 6924 /// Return true if undefined behavior would provably be executed on the path to 6925 /// OnPathTo if Root produced a posion result. Note that this doesn't say 6926 /// anything about whether OnPathTo is actually executed or whether Root is 6927 /// actually poison. This can be used to assess whether a new use of Root can 6928 /// be added at a location which is control equivalent with OnPathTo (such as 6929 /// immediately before it) without introducing UB which didn't previously 6930 /// exist. Note that a false result conveys no information. 6931 bool llvm::mustExecuteUBIfPoisonOnPathTo(Instruction *Root, 6932 Instruction *OnPathTo, 6933 DominatorTree *DT) { 6934 // Basic approach is to assume Root is poison, propagate poison forward 6935 // through all users we can easily track, and then check whether any of those 6936 // users are provable UB and must execute before out exiting block might 6937 // exit. 6938 6939 // The set of all recursive users we've visited (which are assumed to all be 6940 // poison because of said visit) 6941 SmallSet<const Value *, 16> KnownPoison; 6942 SmallVector<const Instruction*, 16> Worklist; 6943 Worklist.push_back(Root); 6944 while (!Worklist.empty()) { 6945 const Instruction *I = Worklist.pop_back_val(); 6946 6947 // If we know this must trigger UB on a path leading our target. 6948 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo)) 6949 return true; 6950 6951 // If we can't analyze propagation through this instruction, just skip it 6952 // and transitive users. Safe as false is a conservative result. 6953 if (I != Root && !any_of(I->operands(), [&KnownPoison](const Use &U) { 6954 return KnownPoison.contains(U) && propagatesPoison(U); 6955 })) 6956 continue; 6957 6958 if (KnownPoison.insert(I).second) 6959 for (const User *User : I->users()) 6960 Worklist.push_back(cast<Instruction>(User)); 6961 } 6962 6963 // Might be non-UB, or might have a path we couldn't prove must execute on 6964 // way to exiting bb. 6965 return false; 6966 } 6967 6968 OverflowResult llvm::computeOverflowForSignedAdd(const AddOperator *Add, 6969 const SimplifyQuery &SQ) { 6970 return ::computeOverflowForSignedAdd(Add->getOperand(0), Add->getOperand(1), 6971 Add, SQ); 6972 } 6973 6974 OverflowResult 6975 llvm::computeOverflowForSignedAdd(const WithCache<const Value *> &LHS, 6976 const WithCache<const Value *> &RHS, 6977 const SimplifyQuery &SQ) { 6978 return ::computeOverflowForSignedAdd(LHS, RHS, nullptr, SQ); 6979 } 6980 6981 bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) { 6982 // Note: An atomic operation isn't guaranteed to return in a reasonable amount 6983 // of time because it's possible for another thread to interfere with it for an 6984 // arbitrary length of time, but programs aren't allowed to rely on that. 6985 6986 // If there is no successor, then execution can't transfer to it. 6987 if (isa<ReturnInst>(I)) 6988 return false; 6989 if (isa<UnreachableInst>(I)) 6990 return false; 6991 6992 // Note: Do not add new checks here; instead, change Instruction::mayThrow or 6993 // Instruction::willReturn. 6994 // 6995 // FIXME: Move this check into Instruction::willReturn. 6996 if (isa<CatchPadInst>(I)) { 6997 switch (classifyEHPersonality(I->getFunction()->getPersonalityFn())) { 6998 default: 6999 // A catchpad may invoke exception object constructors and such, which 7000 // in some languages can be arbitrary code, so be conservative by default. 7001 return false; 7002 case EHPersonality::CoreCLR: 7003 // For CoreCLR, it just involves a type test. 7004 return true; 7005 } 7006 } 7007 7008 // An instruction that returns without throwing must transfer control flow 7009 // to a successor. 7010 return !I->mayThrow() && I->willReturn(); 7011 } 7012 7013 bool llvm::isGuaranteedToTransferExecutionToSuccessor(const BasicBlock *BB) { 7014 // TODO: This is slightly conservative for invoke instruction since exiting 7015 // via an exception *is* normal control for them. 7016 for (const Instruction &I : *BB) 7017 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 7018 return false; 7019 return true; 7020 } 7021 7022 bool llvm::isGuaranteedToTransferExecutionToSuccessor( 7023 BasicBlock::const_iterator Begin, BasicBlock::const_iterator End, 7024 unsigned ScanLimit) { 7025 return isGuaranteedToTransferExecutionToSuccessor(make_range(Begin, End), 7026 ScanLimit); 7027 } 7028 7029 bool llvm::isGuaranteedToTransferExecutionToSuccessor( 7030 iterator_range<BasicBlock::const_iterator> Range, unsigned ScanLimit) { 7031 assert(ScanLimit && "scan limit must be non-zero"); 7032 for (const Instruction &I : Range) { 7033 if (isa<DbgInfoIntrinsic>(I)) 7034 continue; 7035 if (--ScanLimit == 0) 7036 return false; 7037 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 7038 return false; 7039 } 7040 return true; 7041 } 7042 7043 bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I, 7044 const Loop *L) { 7045 // The loop header is guaranteed to be executed for every iteration. 7046 // 7047 // FIXME: Relax this constraint to cover all basic blocks that are 7048 // guaranteed to be executed at every iteration. 7049 if (I->getParent() != L->getHeader()) return false; 7050 7051 for (const Instruction &LI : *L->getHeader()) { 7052 if (&LI == I) return true; 7053 if (!isGuaranteedToTransferExecutionToSuccessor(&LI)) return false; 7054 } 7055 llvm_unreachable("Instruction not contained in its own parent basic block."); 7056 } 7057 7058 bool llvm::propagatesPoison(const Use &PoisonOp) { 7059 const Operator *I = cast<Operator>(PoisonOp.getUser()); 7060 switch (I->getOpcode()) { 7061 case Instruction::Freeze: 7062 case Instruction::PHI: 7063 case Instruction::Invoke: 7064 return false; 7065 case Instruction::Select: 7066 return PoisonOp.getOperandNo() == 0; 7067 case Instruction::Call: 7068 if (auto *II = dyn_cast<IntrinsicInst>(I)) { 7069 switch (II->getIntrinsicID()) { 7070 // TODO: Add more intrinsics. 7071 case Intrinsic::sadd_with_overflow: 7072 case Intrinsic::ssub_with_overflow: 7073 case Intrinsic::smul_with_overflow: 7074 case Intrinsic::uadd_with_overflow: 7075 case Intrinsic::usub_with_overflow: 7076 case Intrinsic::umul_with_overflow: 7077 // If an input is a vector containing a poison element, the 7078 // two output vectors (calculated results, overflow bits)' 7079 // corresponding lanes are poison. 7080 return true; 7081 case Intrinsic::ctpop: 7082 return true; 7083 } 7084 } 7085 return false; 7086 case Instruction::ICmp: 7087 case Instruction::FCmp: 7088 case Instruction::GetElementPtr: 7089 return true; 7090 default: 7091 if (isa<BinaryOperator>(I) || isa<UnaryOperator>(I) || isa<CastInst>(I)) 7092 return true; 7093 7094 // Be conservative and return false. 7095 return false; 7096 } 7097 } 7098 7099 void llvm::getGuaranteedWellDefinedOps( 7100 const Instruction *I, SmallVectorImpl<const Value *> &Operands) { 7101 switch (I->getOpcode()) { 7102 case Instruction::Store: 7103 Operands.push_back(cast<StoreInst>(I)->getPointerOperand()); 7104 break; 7105 7106 case Instruction::Load: 7107 Operands.push_back(cast<LoadInst>(I)->getPointerOperand()); 7108 break; 7109 7110 // Since dereferenceable attribute imply noundef, atomic operations 7111 // also implicitly have noundef pointers too 7112 case Instruction::AtomicCmpXchg: 7113 Operands.push_back(cast<AtomicCmpXchgInst>(I)->getPointerOperand()); 7114 break; 7115 7116 case Instruction::AtomicRMW: 7117 Operands.push_back(cast<AtomicRMWInst>(I)->getPointerOperand()); 7118 break; 7119 7120 case Instruction::Call: 7121 case Instruction::Invoke: { 7122 const CallBase *CB = cast<CallBase>(I); 7123 if (CB->isIndirectCall()) 7124 Operands.push_back(CB->getCalledOperand()); 7125 for (unsigned i = 0; i < CB->arg_size(); ++i) { 7126 if (CB->paramHasAttr(i, Attribute::NoUndef) || 7127 CB->paramHasAttr(i, Attribute::Dereferenceable) || 7128 CB->paramHasAttr(i, Attribute::DereferenceableOrNull)) 7129 Operands.push_back(CB->getArgOperand(i)); 7130 } 7131 break; 7132 } 7133 case Instruction::Ret: 7134 if (I->getFunction()->hasRetAttribute(Attribute::NoUndef)) 7135 Operands.push_back(I->getOperand(0)); 7136 break; 7137 case Instruction::Switch: 7138 Operands.push_back(cast<SwitchInst>(I)->getCondition()); 7139 break; 7140 case Instruction::Br: { 7141 auto *BR = cast<BranchInst>(I); 7142 if (BR->isConditional()) 7143 Operands.push_back(BR->getCondition()); 7144 break; 7145 } 7146 default: 7147 break; 7148 } 7149 } 7150 7151 void llvm::getGuaranteedNonPoisonOps(const Instruction *I, 7152 SmallVectorImpl<const Value *> &Operands) { 7153 getGuaranteedWellDefinedOps(I, Operands); 7154 switch (I->getOpcode()) { 7155 // Divisors of these operations are allowed to be partially undef. 7156 case Instruction::UDiv: 7157 case Instruction::SDiv: 7158 case Instruction::URem: 7159 case Instruction::SRem: 7160 Operands.push_back(I->getOperand(1)); 7161 break; 7162 default: 7163 break; 7164 } 7165 } 7166 7167 bool llvm::mustTriggerUB(const Instruction *I, 7168 const SmallPtrSetImpl<const Value *> &KnownPoison) { 7169 SmallVector<const Value *, 4> NonPoisonOps; 7170 getGuaranteedNonPoisonOps(I, NonPoisonOps); 7171 7172 for (const auto *V : NonPoisonOps) 7173 if (KnownPoison.count(V)) 7174 return true; 7175 7176 return false; 7177 } 7178 7179 static bool programUndefinedIfUndefOrPoison(const Value *V, 7180 bool PoisonOnly) { 7181 // We currently only look for uses of values within the same basic 7182 // block, as that makes it easier to guarantee that the uses will be 7183 // executed given that Inst is executed. 7184 // 7185 // FIXME: Expand this to consider uses beyond the same basic block. To do 7186 // this, look out for the distinction between post-dominance and strong 7187 // post-dominance. 7188 const BasicBlock *BB = nullptr; 7189 BasicBlock::const_iterator Begin; 7190 if (const auto *Inst = dyn_cast<Instruction>(V)) { 7191 BB = Inst->getParent(); 7192 Begin = Inst->getIterator(); 7193 Begin++; 7194 } else if (const auto *Arg = dyn_cast<Argument>(V)) { 7195 if (Arg->getParent()->isDeclaration()) 7196 return false; 7197 BB = &Arg->getParent()->getEntryBlock(); 7198 Begin = BB->begin(); 7199 } else { 7200 return false; 7201 } 7202 7203 // Limit number of instructions we look at, to avoid scanning through large 7204 // blocks. The current limit is chosen arbitrarily. 7205 unsigned ScanLimit = 32; 7206 BasicBlock::const_iterator End = BB->end(); 7207 7208 if (!PoisonOnly) { 7209 // Since undef does not propagate eagerly, be conservative & just check 7210 // whether a value is directly passed to an instruction that must take 7211 // well-defined operands. 7212 7213 for (const auto &I : make_range(Begin, End)) { 7214 if (isa<DbgInfoIntrinsic>(I)) 7215 continue; 7216 if (--ScanLimit == 0) 7217 break; 7218 7219 SmallVector<const Value *, 4> WellDefinedOps; 7220 getGuaranteedWellDefinedOps(&I, WellDefinedOps); 7221 if (is_contained(WellDefinedOps, V)) 7222 return true; 7223 7224 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 7225 break; 7226 } 7227 return false; 7228 } 7229 7230 // Set of instructions that we have proved will yield poison if Inst 7231 // does. 7232 SmallSet<const Value *, 16> YieldsPoison; 7233 SmallSet<const BasicBlock *, 4> Visited; 7234 7235 YieldsPoison.insert(V); 7236 Visited.insert(BB); 7237 7238 while (true) { 7239 for (const auto &I : make_range(Begin, End)) { 7240 if (isa<DbgInfoIntrinsic>(I)) 7241 continue; 7242 if (--ScanLimit == 0) 7243 return false; 7244 if (mustTriggerUB(&I, YieldsPoison)) 7245 return true; 7246 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 7247 return false; 7248 7249 // If an operand is poison and propagates it, mark I as yielding poison. 7250 for (const Use &Op : I.operands()) { 7251 if (YieldsPoison.count(Op) && propagatesPoison(Op)) { 7252 YieldsPoison.insert(&I); 7253 break; 7254 } 7255 } 7256 7257 // Special handling for select, which returns poison if its operand 0 is 7258 // poison (handled in the loop above) *or* if both its true/false operands 7259 // are poison (handled here). 7260 if (I.getOpcode() == Instruction::Select && 7261 YieldsPoison.count(I.getOperand(1)) && 7262 YieldsPoison.count(I.getOperand(2))) { 7263 YieldsPoison.insert(&I); 7264 } 7265 } 7266 7267 BB = BB->getSingleSuccessor(); 7268 if (!BB || !Visited.insert(BB).second) 7269 break; 7270 7271 Begin = BB->getFirstNonPHI()->getIterator(); 7272 End = BB->end(); 7273 } 7274 return false; 7275 } 7276 7277 bool llvm::programUndefinedIfUndefOrPoison(const Instruction *Inst) { 7278 return ::programUndefinedIfUndefOrPoison(Inst, false); 7279 } 7280 7281 bool llvm::programUndefinedIfPoison(const Instruction *Inst) { 7282 return ::programUndefinedIfUndefOrPoison(Inst, true); 7283 } 7284 7285 static bool isKnownNonNaN(const Value *V, FastMathFlags FMF) { 7286 if (FMF.noNaNs()) 7287 return true; 7288 7289 if (auto *C = dyn_cast<ConstantFP>(V)) 7290 return !C->isNaN(); 7291 7292 if (auto *C = dyn_cast<ConstantDataVector>(V)) { 7293 if (!C->getElementType()->isFloatingPointTy()) 7294 return false; 7295 for (unsigned I = 0, E = C->getNumElements(); I < E; ++I) { 7296 if (C->getElementAsAPFloat(I).isNaN()) 7297 return false; 7298 } 7299 return true; 7300 } 7301 7302 if (isa<ConstantAggregateZero>(V)) 7303 return true; 7304 7305 return false; 7306 } 7307 7308 static bool isKnownNonZero(const Value *V) { 7309 if (auto *C = dyn_cast<ConstantFP>(V)) 7310 return !C->isZero(); 7311 7312 if (auto *C = dyn_cast<ConstantDataVector>(V)) { 7313 if (!C->getElementType()->isFloatingPointTy()) 7314 return false; 7315 for (unsigned I = 0, E = C->getNumElements(); I < E; ++I) { 7316 if (C->getElementAsAPFloat(I).isZero()) 7317 return false; 7318 } 7319 return true; 7320 } 7321 7322 return false; 7323 } 7324 7325 /// Match clamp pattern for float types without care about NaNs or signed zeros. 7326 /// Given non-min/max outer cmp/select from the clamp pattern this 7327 /// function recognizes if it can be substitued by a "canonical" min/max 7328 /// pattern. 7329 static SelectPatternResult matchFastFloatClamp(CmpInst::Predicate Pred, 7330 Value *CmpLHS, Value *CmpRHS, 7331 Value *TrueVal, Value *FalseVal, 7332 Value *&LHS, Value *&RHS) { 7333 // Try to match 7334 // X < C1 ? C1 : Min(X, C2) --> Max(C1, Min(X, C2)) 7335 // X > C1 ? C1 : Max(X, C2) --> Min(C1, Max(X, C2)) 7336 // and return description of the outer Max/Min. 7337 7338 // First, check if select has inverse order: 7339 if (CmpRHS == FalseVal) { 7340 std::swap(TrueVal, FalseVal); 7341 Pred = CmpInst::getInversePredicate(Pred); 7342 } 7343 7344 // Assume success now. If there's no match, callers should not use these anyway. 7345 LHS = TrueVal; 7346 RHS = FalseVal; 7347 7348 const APFloat *FC1; 7349 if (CmpRHS != TrueVal || !match(CmpRHS, m_APFloat(FC1)) || !FC1->isFinite()) 7350 return {SPF_UNKNOWN, SPNB_NA, false}; 7351 7352 const APFloat *FC2; 7353 switch (Pred) { 7354 case CmpInst::FCMP_OLT: 7355 case CmpInst::FCMP_OLE: 7356 case CmpInst::FCMP_ULT: 7357 case CmpInst::FCMP_ULE: 7358 if (match(FalseVal, 7359 m_CombineOr(m_OrdFMin(m_Specific(CmpLHS), m_APFloat(FC2)), 7360 m_UnordFMin(m_Specific(CmpLHS), m_APFloat(FC2)))) && 7361 *FC1 < *FC2) 7362 return {SPF_FMAXNUM, SPNB_RETURNS_ANY, false}; 7363 break; 7364 case CmpInst::FCMP_OGT: 7365 case CmpInst::FCMP_OGE: 7366 case CmpInst::FCMP_UGT: 7367 case CmpInst::FCMP_UGE: 7368 if (match(FalseVal, 7369 m_CombineOr(m_OrdFMax(m_Specific(CmpLHS), m_APFloat(FC2)), 7370 m_UnordFMax(m_Specific(CmpLHS), m_APFloat(FC2)))) && 7371 *FC1 > *FC2) 7372 return {SPF_FMINNUM, SPNB_RETURNS_ANY, false}; 7373 break; 7374 default: 7375 break; 7376 } 7377 7378 return {SPF_UNKNOWN, SPNB_NA, false}; 7379 } 7380 7381 /// Recognize variations of: 7382 /// CLAMP(v,l,h) ==> ((v) < (l) ? (l) : ((v) > (h) ? (h) : (v))) 7383 static SelectPatternResult matchClamp(CmpInst::Predicate Pred, 7384 Value *CmpLHS, Value *CmpRHS, 7385 Value *TrueVal, Value *FalseVal) { 7386 // Swap the select operands and predicate to match the patterns below. 7387 if (CmpRHS != TrueVal) { 7388 Pred = ICmpInst::getSwappedPredicate(Pred); 7389 std::swap(TrueVal, FalseVal); 7390 } 7391 const APInt *C1; 7392 if (CmpRHS == TrueVal && match(CmpRHS, m_APInt(C1))) { 7393 const APInt *C2; 7394 // (X <s C1) ? C1 : SMIN(X, C2) ==> SMAX(SMIN(X, C2), C1) 7395 if (match(FalseVal, m_SMin(m_Specific(CmpLHS), m_APInt(C2))) && 7396 C1->slt(*C2) && Pred == CmpInst::ICMP_SLT) 7397 return {SPF_SMAX, SPNB_NA, false}; 7398 7399 // (X >s C1) ? C1 : SMAX(X, C2) ==> SMIN(SMAX(X, C2), C1) 7400 if (match(FalseVal, m_SMax(m_Specific(CmpLHS), m_APInt(C2))) && 7401 C1->sgt(*C2) && Pred == CmpInst::ICMP_SGT) 7402 return {SPF_SMIN, SPNB_NA, false}; 7403 7404 // (X <u C1) ? C1 : UMIN(X, C2) ==> UMAX(UMIN(X, C2), C1) 7405 if (match(FalseVal, m_UMin(m_Specific(CmpLHS), m_APInt(C2))) && 7406 C1->ult(*C2) && Pred == CmpInst::ICMP_ULT) 7407 return {SPF_UMAX, SPNB_NA, false}; 7408 7409 // (X >u C1) ? C1 : UMAX(X, C2) ==> UMIN(UMAX(X, C2), C1) 7410 if (match(FalseVal, m_UMax(m_Specific(CmpLHS), m_APInt(C2))) && 7411 C1->ugt(*C2) && Pred == CmpInst::ICMP_UGT) 7412 return {SPF_UMIN, SPNB_NA, false}; 7413 } 7414 return {SPF_UNKNOWN, SPNB_NA, false}; 7415 } 7416 7417 /// Recognize variations of: 7418 /// a < c ? min(a,b) : min(b,c) ==> min(min(a,b),min(b,c)) 7419 static SelectPatternResult matchMinMaxOfMinMax(CmpInst::Predicate Pred, 7420 Value *CmpLHS, Value *CmpRHS, 7421 Value *TVal, Value *FVal, 7422 unsigned Depth) { 7423 // TODO: Allow FP min/max with nnan/nsz. 7424 assert(CmpInst::isIntPredicate(Pred) && "Expected integer comparison"); 7425 7426 Value *A = nullptr, *B = nullptr; 7427 SelectPatternResult L = matchSelectPattern(TVal, A, B, nullptr, Depth + 1); 7428 if (!SelectPatternResult::isMinOrMax(L.Flavor)) 7429 return {SPF_UNKNOWN, SPNB_NA, false}; 7430 7431 Value *C = nullptr, *D = nullptr; 7432 SelectPatternResult R = matchSelectPattern(FVal, C, D, nullptr, Depth + 1); 7433 if (L.Flavor != R.Flavor) 7434 return {SPF_UNKNOWN, SPNB_NA, false}; 7435 7436 // We have something like: x Pred y ? min(a, b) : min(c, d). 7437 // Try to match the compare to the min/max operations of the select operands. 7438 // First, make sure we have the right compare predicate. 7439 switch (L.Flavor) { 7440 case SPF_SMIN: 7441 if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) { 7442 Pred = ICmpInst::getSwappedPredicate(Pred); 7443 std::swap(CmpLHS, CmpRHS); 7444 } 7445 if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) 7446 break; 7447 return {SPF_UNKNOWN, SPNB_NA, false}; 7448 case SPF_SMAX: 7449 if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) { 7450 Pred = ICmpInst::getSwappedPredicate(Pred); 7451 std::swap(CmpLHS, CmpRHS); 7452 } 7453 if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) 7454 break; 7455 return {SPF_UNKNOWN, SPNB_NA, false}; 7456 case SPF_UMIN: 7457 if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) { 7458 Pred = ICmpInst::getSwappedPredicate(Pred); 7459 std::swap(CmpLHS, CmpRHS); 7460 } 7461 if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) 7462 break; 7463 return {SPF_UNKNOWN, SPNB_NA, false}; 7464 case SPF_UMAX: 7465 if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) { 7466 Pred = ICmpInst::getSwappedPredicate(Pred); 7467 std::swap(CmpLHS, CmpRHS); 7468 } 7469 if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) 7470 break; 7471 return {SPF_UNKNOWN, SPNB_NA, false}; 7472 default: 7473 return {SPF_UNKNOWN, SPNB_NA, false}; 7474 } 7475 7476 // If there is a common operand in the already matched min/max and the other 7477 // min/max operands match the compare operands (either directly or inverted), 7478 // then this is min/max of the same flavor. 7479 7480 // a pred c ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b)) 7481 // ~c pred ~a ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b)) 7482 if (D == B) { 7483 if ((CmpLHS == A && CmpRHS == C) || (match(C, m_Not(m_Specific(CmpLHS))) && 7484 match(A, m_Not(m_Specific(CmpRHS))))) 7485 return {L.Flavor, SPNB_NA, false}; 7486 } 7487 // a pred d ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d)) 7488 // ~d pred ~a ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d)) 7489 if (C == B) { 7490 if ((CmpLHS == A && CmpRHS == D) || (match(D, m_Not(m_Specific(CmpLHS))) && 7491 match(A, m_Not(m_Specific(CmpRHS))))) 7492 return {L.Flavor, SPNB_NA, false}; 7493 } 7494 // b pred c ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a)) 7495 // ~c pred ~b ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a)) 7496 if (D == A) { 7497 if ((CmpLHS == B && CmpRHS == C) || (match(C, m_Not(m_Specific(CmpLHS))) && 7498 match(B, m_Not(m_Specific(CmpRHS))))) 7499 return {L.Flavor, SPNB_NA, false}; 7500 } 7501 // b pred d ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d)) 7502 // ~d pred ~b ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d)) 7503 if (C == A) { 7504 if ((CmpLHS == B && CmpRHS == D) || (match(D, m_Not(m_Specific(CmpLHS))) && 7505 match(B, m_Not(m_Specific(CmpRHS))))) 7506 return {L.Flavor, SPNB_NA, false}; 7507 } 7508 7509 return {SPF_UNKNOWN, SPNB_NA, false}; 7510 } 7511 7512 /// If the input value is the result of a 'not' op, constant integer, or vector 7513 /// splat of a constant integer, return the bitwise-not source value. 7514 /// TODO: This could be extended to handle non-splat vector integer constants. 7515 static Value *getNotValue(Value *V) { 7516 Value *NotV; 7517 if (match(V, m_Not(m_Value(NotV)))) 7518 return NotV; 7519 7520 const APInt *C; 7521 if (match(V, m_APInt(C))) 7522 return ConstantInt::get(V->getType(), ~(*C)); 7523 7524 return nullptr; 7525 } 7526 7527 /// Match non-obvious integer minimum and maximum sequences. 7528 static SelectPatternResult matchMinMax(CmpInst::Predicate Pred, 7529 Value *CmpLHS, Value *CmpRHS, 7530 Value *TrueVal, Value *FalseVal, 7531 Value *&LHS, Value *&RHS, 7532 unsigned Depth) { 7533 // Assume success. If there's no match, callers should not use these anyway. 7534 LHS = TrueVal; 7535 RHS = FalseVal; 7536 7537 SelectPatternResult SPR = matchClamp(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal); 7538 if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN) 7539 return SPR; 7540 7541 SPR = matchMinMaxOfMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, Depth); 7542 if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN) 7543 return SPR; 7544 7545 // Look through 'not' ops to find disguised min/max. 7546 // (X > Y) ? ~X : ~Y ==> (~X < ~Y) ? ~X : ~Y ==> MIN(~X, ~Y) 7547 // (X < Y) ? ~X : ~Y ==> (~X > ~Y) ? ~X : ~Y ==> MAX(~X, ~Y) 7548 if (CmpLHS == getNotValue(TrueVal) && CmpRHS == getNotValue(FalseVal)) { 7549 switch (Pred) { 7550 case CmpInst::ICMP_SGT: return {SPF_SMIN, SPNB_NA, false}; 7551 case CmpInst::ICMP_SLT: return {SPF_SMAX, SPNB_NA, false}; 7552 case CmpInst::ICMP_UGT: return {SPF_UMIN, SPNB_NA, false}; 7553 case CmpInst::ICMP_ULT: return {SPF_UMAX, SPNB_NA, false}; 7554 default: break; 7555 } 7556 } 7557 7558 // (X > Y) ? ~Y : ~X ==> (~X < ~Y) ? ~Y : ~X ==> MAX(~Y, ~X) 7559 // (X < Y) ? ~Y : ~X ==> (~X > ~Y) ? ~Y : ~X ==> MIN(~Y, ~X) 7560 if (CmpLHS == getNotValue(FalseVal) && CmpRHS == getNotValue(TrueVal)) { 7561 switch (Pred) { 7562 case CmpInst::ICMP_SGT: return {SPF_SMAX, SPNB_NA, false}; 7563 case CmpInst::ICMP_SLT: return {SPF_SMIN, SPNB_NA, false}; 7564 case CmpInst::ICMP_UGT: return {SPF_UMAX, SPNB_NA, false}; 7565 case CmpInst::ICMP_ULT: return {SPF_UMIN, SPNB_NA, false}; 7566 default: break; 7567 } 7568 } 7569 7570 if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT) 7571 return {SPF_UNKNOWN, SPNB_NA, false}; 7572 7573 const APInt *C1; 7574 if (!match(CmpRHS, m_APInt(C1))) 7575 return {SPF_UNKNOWN, SPNB_NA, false}; 7576 7577 // An unsigned min/max can be written with a signed compare. 7578 const APInt *C2; 7579 if ((CmpLHS == TrueVal && match(FalseVal, m_APInt(C2))) || 7580 (CmpLHS == FalseVal && match(TrueVal, m_APInt(C2)))) { 7581 // Is the sign bit set? 7582 // (X <s 0) ? X : MAXVAL ==> (X >u MAXVAL) ? X : MAXVAL ==> UMAX 7583 // (X <s 0) ? MAXVAL : X ==> (X >u MAXVAL) ? MAXVAL : X ==> UMIN 7584 if (Pred == CmpInst::ICMP_SLT && C1->isZero() && C2->isMaxSignedValue()) 7585 return {CmpLHS == TrueVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false}; 7586 7587 // Is the sign bit clear? 7588 // (X >s -1) ? MINVAL : X ==> (X <u MINVAL) ? MINVAL : X ==> UMAX 7589 // (X >s -1) ? X : MINVAL ==> (X <u MINVAL) ? X : MINVAL ==> UMIN 7590 if (Pred == CmpInst::ICMP_SGT && C1->isAllOnes() && C2->isMinSignedValue()) 7591 return {CmpLHS == FalseVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false}; 7592 } 7593 7594 return {SPF_UNKNOWN, SPNB_NA, false}; 7595 } 7596 7597 bool llvm::isKnownNegation(const Value *X, const Value *Y, bool NeedNSW) { 7598 assert(X && Y && "Invalid operand"); 7599 7600 // X = sub (0, Y) || X = sub nsw (0, Y) 7601 if ((!NeedNSW && match(X, m_Sub(m_ZeroInt(), m_Specific(Y)))) || 7602 (NeedNSW && match(X, m_NSWSub(m_ZeroInt(), m_Specific(Y))))) 7603 return true; 7604 7605 // Y = sub (0, X) || Y = sub nsw (0, X) 7606 if ((!NeedNSW && match(Y, m_Sub(m_ZeroInt(), m_Specific(X)))) || 7607 (NeedNSW && match(Y, m_NSWSub(m_ZeroInt(), m_Specific(X))))) 7608 return true; 7609 7610 // X = sub (A, B), Y = sub (B, A) || X = sub nsw (A, B), Y = sub nsw (B, A) 7611 Value *A, *B; 7612 return (!NeedNSW && (match(X, m_Sub(m_Value(A), m_Value(B))) && 7613 match(Y, m_Sub(m_Specific(B), m_Specific(A))))) || 7614 (NeedNSW && (match(X, m_NSWSub(m_Value(A), m_Value(B))) && 7615 match(Y, m_NSWSub(m_Specific(B), m_Specific(A))))); 7616 } 7617 7618 static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred, 7619 FastMathFlags FMF, 7620 Value *CmpLHS, Value *CmpRHS, 7621 Value *TrueVal, Value *FalseVal, 7622 Value *&LHS, Value *&RHS, 7623 unsigned Depth) { 7624 bool HasMismatchedZeros = false; 7625 if (CmpInst::isFPPredicate(Pred)) { 7626 // IEEE-754 ignores the sign of 0.0 in comparisons. So if the select has one 7627 // 0.0 operand, set the compare's 0.0 operands to that same value for the 7628 // purpose of identifying min/max. Disregard vector constants with undefined 7629 // elements because those can not be back-propagated for analysis. 7630 Value *OutputZeroVal = nullptr; 7631 if (match(TrueVal, m_AnyZeroFP()) && !match(FalseVal, m_AnyZeroFP()) && 7632 !cast<Constant>(TrueVal)->containsUndefOrPoisonElement()) 7633 OutputZeroVal = TrueVal; 7634 else if (match(FalseVal, m_AnyZeroFP()) && !match(TrueVal, m_AnyZeroFP()) && 7635 !cast<Constant>(FalseVal)->containsUndefOrPoisonElement()) 7636 OutputZeroVal = FalseVal; 7637 7638 if (OutputZeroVal) { 7639 if (match(CmpLHS, m_AnyZeroFP()) && CmpLHS != OutputZeroVal) { 7640 HasMismatchedZeros = true; 7641 CmpLHS = OutputZeroVal; 7642 } 7643 if (match(CmpRHS, m_AnyZeroFP()) && CmpRHS != OutputZeroVal) { 7644 HasMismatchedZeros = true; 7645 CmpRHS = OutputZeroVal; 7646 } 7647 } 7648 } 7649 7650 LHS = CmpLHS; 7651 RHS = CmpRHS; 7652 7653 // Signed zero may return inconsistent results between implementations. 7654 // (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0 7655 // minNum(0.0, -0.0) // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1) 7656 // Therefore, we behave conservatively and only proceed if at least one of the 7657 // operands is known to not be zero or if we don't care about signed zero. 7658 switch (Pred) { 7659 default: break; 7660 case CmpInst::FCMP_OGT: case CmpInst::FCMP_OLT: 7661 case CmpInst::FCMP_UGT: case CmpInst::FCMP_ULT: 7662 if (!HasMismatchedZeros) 7663 break; 7664 [[fallthrough]]; 7665 case CmpInst::FCMP_OGE: case CmpInst::FCMP_OLE: 7666 case CmpInst::FCMP_UGE: case CmpInst::FCMP_ULE: 7667 if (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) && 7668 !isKnownNonZero(CmpRHS)) 7669 return {SPF_UNKNOWN, SPNB_NA, false}; 7670 } 7671 7672 SelectPatternNaNBehavior NaNBehavior = SPNB_NA; 7673 bool Ordered = false; 7674 7675 // When given one NaN and one non-NaN input: 7676 // - maxnum/minnum (C99 fmaxf()/fminf()) return the non-NaN input. 7677 // - A simple C99 (a < b ? a : b) construction will return 'b' (as the 7678 // ordered comparison fails), which could be NaN or non-NaN. 7679 // so here we discover exactly what NaN behavior is required/accepted. 7680 if (CmpInst::isFPPredicate(Pred)) { 7681 bool LHSSafe = isKnownNonNaN(CmpLHS, FMF); 7682 bool RHSSafe = isKnownNonNaN(CmpRHS, FMF); 7683 7684 if (LHSSafe && RHSSafe) { 7685 // Both operands are known non-NaN. 7686 NaNBehavior = SPNB_RETURNS_ANY; 7687 } else if (CmpInst::isOrdered(Pred)) { 7688 // An ordered comparison will return false when given a NaN, so it 7689 // returns the RHS. 7690 Ordered = true; 7691 if (LHSSafe) 7692 // LHS is non-NaN, so if RHS is NaN then NaN will be returned. 7693 NaNBehavior = SPNB_RETURNS_NAN; 7694 else if (RHSSafe) 7695 NaNBehavior = SPNB_RETURNS_OTHER; 7696 else 7697 // Completely unsafe. 7698 return {SPF_UNKNOWN, SPNB_NA, false}; 7699 } else { 7700 Ordered = false; 7701 // An unordered comparison will return true when given a NaN, so it 7702 // returns the LHS. 7703 if (LHSSafe) 7704 // LHS is non-NaN, so if RHS is NaN then non-NaN will be returned. 7705 NaNBehavior = SPNB_RETURNS_OTHER; 7706 else if (RHSSafe) 7707 NaNBehavior = SPNB_RETURNS_NAN; 7708 else 7709 // Completely unsafe. 7710 return {SPF_UNKNOWN, SPNB_NA, false}; 7711 } 7712 } 7713 7714 if (TrueVal == CmpRHS && FalseVal == CmpLHS) { 7715 std::swap(CmpLHS, CmpRHS); 7716 Pred = CmpInst::getSwappedPredicate(Pred); 7717 if (NaNBehavior == SPNB_RETURNS_NAN) 7718 NaNBehavior = SPNB_RETURNS_OTHER; 7719 else if (NaNBehavior == SPNB_RETURNS_OTHER) 7720 NaNBehavior = SPNB_RETURNS_NAN; 7721 Ordered = !Ordered; 7722 } 7723 7724 // ([if]cmp X, Y) ? X : Y 7725 if (TrueVal == CmpLHS && FalseVal == CmpRHS) { 7726 switch (Pred) { 7727 default: return {SPF_UNKNOWN, SPNB_NA, false}; // Equality. 7728 case ICmpInst::ICMP_UGT: 7729 case ICmpInst::ICMP_UGE: return {SPF_UMAX, SPNB_NA, false}; 7730 case ICmpInst::ICMP_SGT: 7731 case ICmpInst::ICMP_SGE: return {SPF_SMAX, SPNB_NA, false}; 7732 case ICmpInst::ICMP_ULT: 7733 case ICmpInst::ICMP_ULE: return {SPF_UMIN, SPNB_NA, false}; 7734 case ICmpInst::ICMP_SLT: 7735 case ICmpInst::ICMP_SLE: return {SPF_SMIN, SPNB_NA, false}; 7736 case FCmpInst::FCMP_UGT: 7737 case FCmpInst::FCMP_UGE: 7738 case FCmpInst::FCMP_OGT: 7739 case FCmpInst::FCMP_OGE: return {SPF_FMAXNUM, NaNBehavior, Ordered}; 7740 case FCmpInst::FCMP_ULT: 7741 case FCmpInst::FCMP_ULE: 7742 case FCmpInst::FCMP_OLT: 7743 case FCmpInst::FCMP_OLE: return {SPF_FMINNUM, NaNBehavior, Ordered}; 7744 } 7745 } 7746 7747 if (isKnownNegation(TrueVal, FalseVal)) { 7748 // Sign-extending LHS does not change its sign, so TrueVal/FalseVal can 7749 // match against either LHS or sext(LHS). 7750 auto MaybeSExtCmpLHS = 7751 m_CombineOr(m_Specific(CmpLHS), m_SExt(m_Specific(CmpLHS))); 7752 auto ZeroOrAllOnes = m_CombineOr(m_ZeroInt(), m_AllOnes()); 7753 auto ZeroOrOne = m_CombineOr(m_ZeroInt(), m_One()); 7754 if (match(TrueVal, MaybeSExtCmpLHS)) { 7755 // Set the return values. If the compare uses the negated value (-X >s 0), 7756 // swap the return values because the negated value is always 'RHS'. 7757 LHS = TrueVal; 7758 RHS = FalseVal; 7759 if (match(CmpLHS, m_Neg(m_Specific(FalseVal)))) 7760 std::swap(LHS, RHS); 7761 7762 // (X >s 0) ? X : -X or (X >s -1) ? X : -X --> ABS(X) 7763 // (-X >s 0) ? -X : X or (-X >s -1) ? -X : X --> ABS(X) 7764 if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, ZeroOrAllOnes)) 7765 return {SPF_ABS, SPNB_NA, false}; 7766 7767 // (X >=s 0) ? X : -X or (X >=s 1) ? X : -X --> ABS(X) 7768 if (Pred == ICmpInst::ICMP_SGE && match(CmpRHS, ZeroOrOne)) 7769 return {SPF_ABS, SPNB_NA, false}; 7770 7771 // (X <s 0) ? X : -X or (X <s 1) ? X : -X --> NABS(X) 7772 // (-X <s 0) ? -X : X or (-X <s 1) ? -X : X --> NABS(X) 7773 if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, ZeroOrOne)) 7774 return {SPF_NABS, SPNB_NA, false}; 7775 } 7776 else if (match(FalseVal, MaybeSExtCmpLHS)) { 7777 // Set the return values. If the compare uses the negated value (-X >s 0), 7778 // swap the return values because the negated value is always 'RHS'. 7779 LHS = FalseVal; 7780 RHS = TrueVal; 7781 if (match(CmpLHS, m_Neg(m_Specific(TrueVal)))) 7782 std::swap(LHS, RHS); 7783 7784 // (X >s 0) ? -X : X or (X >s -1) ? -X : X --> NABS(X) 7785 // (-X >s 0) ? X : -X or (-X >s -1) ? X : -X --> NABS(X) 7786 if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, ZeroOrAllOnes)) 7787 return {SPF_NABS, SPNB_NA, false}; 7788 7789 // (X <s 0) ? -X : X or (X <s 1) ? -X : X --> ABS(X) 7790 // (-X <s 0) ? X : -X or (-X <s 1) ? X : -X --> ABS(X) 7791 if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, ZeroOrOne)) 7792 return {SPF_ABS, SPNB_NA, false}; 7793 } 7794 } 7795 7796 if (CmpInst::isIntPredicate(Pred)) 7797 return matchMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS, Depth); 7798 7799 // According to (IEEE 754-2008 5.3.1), minNum(0.0, -0.0) and similar 7800 // may return either -0.0 or 0.0, so fcmp/select pair has stricter 7801 // semantics than minNum. Be conservative in such case. 7802 if (NaNBehavior != SPNB_RETURNS_ANY || 7803 (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) && 7804 !isKnownNonZero(CmpRHS))) 7805 return {SPF_UNKNOWN, SPNB_NA, false}; 7806 7807 return matchFastFloatClamp(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS); 7808 } 7809 7810 /// Helps to match a select pattern in case of a type mismatch. 7811 /// 7812 /// The function processes the case when type of true and false values of a 7813 /// select instruction differs from type of the cmp instruction operands because 7814 /// of a cast instruction. The function checks if it is legal to move the cast 7815 /// operation after "select". If yes, it returns the new second value of 7816 /// "select" (with the assumption that cast is moved): 7817 /// 1. As operand of cast instruction when both values of "select" are same cast 7818 /// instructions. 7819 /// 2. As restored constant (by applying reverse cast operation) when the first 7820 /// value of the "select" is a cast operation and the second value is a 7821 /// constant. 7822 /// NOTE: We return only the new second value because the first value could be 7823 /// accessed as operand of cast instruction. 7824 static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2, 7825 Instruction::CastOps *CastOp) { 7826 auto *Cast1 = dyn_cast<CastInst>(V1); 7827 if (!Cast1) 7828 return nullptr; 7829 7830 *CastOp = Cast1->getOpcode(); 7831 Type *SrcTy = Cast1->getSrcTy(); 7832 if (auto *Cast2 = dyn_cast<CastInst>(V2)) { 7833 // If V1 and V2 are both the same cast from the same type, look through V1. 7834 if (*CastOp == Cast2->getOpcode() && SrcTy == Cast2->getSrcTy()) 7835 return Cast2->getOperand(0); 7836 return nullptr; 7837 } 7838 7839 auto *C = dyn_cast<Constant>(V2); 7840 if (!C) 7841 return nullptr; 7842 7843 const DataLayout &DL = CmpI->getModule()->getDataLayout(); 7844 Constant *CastedTo = nullptr; 7845 switch (*CastOp) { 7846 case Instruction::ZExt: 7847 if (CmpI->isUnsigned()) 7848 CastedTo = ConstantExpr::getTrunc(C, SrcTy); 7849 break; 7850 case Instruction::SExt: 7851 if (CmpI->isSigned()) 7852 CastedTo = ConstantExpr::getTrunc(C, SrcTy, true); 7853 break; 7854 case Instruction::Trunc: 7855 Constant *CmpConst; 7856 if (match(CmpI->getOperand(1), m_Constant(CmpConst)) && 7857 CmpConst->getType() == SrcTy) { 7858 // Here we have the following case: 7859 // 7860 // %cond = cmp iN %x, CmpConst 7861 // %tr = trunc iN %x to iK 7862 // %narrowsel = select i1 %cond, iK %t, iK C 7863 // 7864 // We can always move trunc after select operation: 7865 // 7866 // %cond = cmp iN %x, CmpConst 7867 // %widesel = select i1 %cond, iN %x, iN CmpConst 7868 // %tr = trunc iN %widesel to iK 7869 // 7870 // Note that C could be extended in any way because we don't care about 7871 // upper bits after truncation. It can't be abs pattern, because it would 7872 // look like: 7873 // 7874 // select i1 %cond, x, -x. 7875 // 7876 // So only min/max pattern could be matched. Such match requires widened C 7877 // == CmpConst. That is why set widened C = CmpConst, condition trunc 7878 // CmpConst == C is checked below. 7879 CastedTo = CmpConst; 7880 } else { 7881 unsigned ExtOp = CmpI->isSigned() ? Instruction::SExt : Instruction::ZExt; 7882 CastedTo = ConstantFoldCastOperand(ExtOp, C, SrcTy, DL); 7883 } 7884 break; 7885 case Instruction::FPTrunc: 7886 CastedTo = ConstantFoldCastOperand(Instruction::FPExt, C, SrcTy, DL); 7887 break; 7888 case Instruction::FPExt: 7889 CastedTo = ConstantFoldCastOperand(Instruction::FPTrunc, C, SrcTy, DL); 7890 break; 7891 case Instruction::FPToUI: 7892 CastedTo = ConstantFoldCastOperand(Instruction::UIToFP, C, SrcTy, DL); 7893 break; 7894 case Instruction::FPToSI: 7895 CastedTo = ConstantFoldCastOperand(Instruction::SIToFP, C, SrcTy, DL); 7896 break; 7897 case Instruction::UIToFP: 7898 CastedTo = ConstantFoldCastOperand(Instruction::FPToUI, C, SrcTy, DL); 7899 break; 7900 case Instruction::SIToFP: 7901 CastedTo = ConstantFoldCastOperand(Instruction::FPToSI, C, SrcTy, DL); 7902 break; 7903 default: 7904 break; 7905 } 7906 7907 if (!CastedTo) 7908 return nullptr; 7909 7910 // Make sure the cast doesn't lose any information. 7911 Constant *CastedBack = 7912 ConstantFoldCastOperand(*CastOp, CastedTo, C->getType(), DL); 7913 if (CastedBack && CastedBack != C) 7914 return nullptr; 7915 7916 return CastedTo; 7917 } 7918 7919 SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, 7920 Instruction::CastOps *CastOp, 7921 unsigned Depth) { 7922 if (Depth >= MaxAnalysisRecursionDepth) 7923 return {SPF_UNKNOWN, SPNB_NA, false}; 7924 7925 SelectInst *SI = dyn_cast<SelectInst>(V); 7926 if (!SI) return {SPF_UNKNOWN, SPNB_NA, false}; 7927 7928 CmpInst *CmpI = dyn_cast<CmpInst>(SI->getCondition()); 7929 if (!CmpI) return {SPF_UNKNOWN, SPNB_NA, false}; 7930 7931 Value *TrueVal = SI->getTrueValue(); 7932 Value *FalseVal = SI->getFalseValue(); 7933 7934 return llvm::matchDecomposedSelectPattern(CmpI, TrueVal, FalseVal, LHS, RHS, 7935 CastOp, Depth); 7936 } 7937 7938 SelectPatternResult llvm::matchDecomposedSelectPattern( 7939 CmpInst *CmpI, Value *TrueVal, Value *FalseVal, Value *&LHS, Value *&RHS, 7940 Instruction::CastOps *CastOp, unsigned Depth) { 7941 CmpInst::Predicate Pred = CmpI->getPredicate(); 7942 Value *CmpLHS = CmpI->getOperand(0); 7943 Value *CmpRHS = CmpI->getOperand(1); 7944 FastMathFlags FMF; 7945 if (isa<FPMathOperator>(CmpI)) 7946 FMF = CmpI->getFastMathFlags(); 7947 7948 // Bail out early. 7949 if (CmpI->isEquality()) 7950 return {SPF_UNKNOWN, SPNB_NA, false}; 7951 7952 // Deal with type mismatches. 7953 if (CastOp && CmpLHS->getType() != TrueVal->getType()) { 7954 if (Value *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp)) { 7955 // If this is a potential fmin/fmax with a cast to integer, then ignore 7956 // -0.0 because there is no corresponding integer value. 7957 if (*CastOp == Instruction::FPToSI || *CastOp == Instruction::FPToUI) 7958 FMF.setNoSignedZeros(); 7959 return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, 7960 cast<CastInst>(TrueVal)->getOperand(0), C, 7961 LHS, RHS, Depth); 7962 } 7963 if (Value *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp)) { 7964 // If this is a potential fmin/fmax with a cast to integer, then ignore 7965 // -0.0 because there is no corresponding integer value. 7966 if (*CastOp == Instruction::FPToSI || *CastOp == Instruction::FPToUI) 7967 FMF.setNoSignedZeros(); 7968 return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, 7969 C, cast<CastInst>(FalseVal)->getOperand(0), 7970 LHS, RHS, Depth); 7971 } 7972 } 7973 return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, TrueVal, FalseVal, 7974 LHS, RHS, Depth); 7975 } 7976 7977 CmpInst::Predicate llvm::getMinMaxPred(SelectPatternFlavor SPF, bool Ordered) { 7978 if (SPF == SPF_SMIN) return ICmpInst::ICMP_SLT; 7979 if (SPF == SPF_UMIN) return ICmpInst::ICMP_ULT; 7980 if (SPF == SPF_SMAX) return ICmpInst::ICMP_SGT; 7981 if (SPF == SPF_UMAX) return ICmpInst::ICMP_UGT; 7982 if (SPF == SPF_FMINNUM) 7983 return Ordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT; 7984 if (SPF == SPF_FMAXNUM) 7985 return Ordered ? FCmpInst::FCMP_OGT : FCmpInst::FCMP_UGT; 7986 llvm_unreachable("unhandled!"); 7987 } 7988 7989 SelectPatternFlavor llvm::getInverseMinMaxFlavor(SelectPatternFlavor SPF) { 7990 if (SPF == SPF_SMIN) return SPF_SMAX; 7991 if (SPF == SPF_UMIN) return SPF_UMAX; 7992 if (SPF == SPF_SMAX) return SPF_SMIN; 7993 if (SPF == SPF_UMAX) return SPF_UMIN; 7994 llvm_unreachable("unhandled!"); 7995 } 7996 7997 Intrinsic::ID llvm::getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID) { 7998 switch (MinMaxID) { 7999 case Intrinsic::smax: return Intrinsic::smin; 8000 case Intrinsic::smin: return Intrinsic::smax; 8001 case Intrinsic::umax: return Intrinsic::umin; 8002 case Intrinsic::umin: return Intrinsic::umax; 8003 // Please note that next four intrinsics may produce the same result for 8004 // original and inverted case even if X != Y due to NaN is handled specially. 8005 case Intrinsic::maximum: return Intrinsic::minimum; 8006 case Intrinsic::minimum: return Intrinsic::maximum; 8007 case Intrinsic::maxnum: return Intrinsic::minnum; 8008 case Intrinsic::minnum: return Intrinsic::maxnum; 8009 default: llvm_unreachable("Unexpected intrinsic"); 8010 } 8011 } 8012 8013 APInt llvm::getMinMaxLimit(SelectPatternFlavor SPF, unsigned BitWidth) { 8014 switch (SPF) { 8015 case SPF_SMAX: return APInt::getSignedMaxValue(BitWidth); 8016 case SPF_SMIN: return APInt::getSignedMinValue(BitWidth); 8017 case SPF_UMAX: return APInt::getMaxValue(BitWidth); 8018 case SPF_UMIN: return APInt::getMinValue(BitWidth); 8019 default: llvm_unreachable("Unexpected flavor"); 8020 } 8021 } 8022 8023 std::pair<Intrinsic::ID, bool> 8024 llvm::canConvertToMinOrMaxIntrinsic(ArrayRef<Value *> VL) { 8025 // Check if VL contains select instructions that can be folded into a min/max 8026 // vector intrinsic and return the intrinsic if it is possible. 8027 // TODO: Support floating point min/max. 8028 bool AllCmpSingleUse = true; 8029 SelectPatternResult SelectPattern; 8030 SelectPattern.Flavor = SPF_UNKNOWN; 8031 if (all_of(VL, [&SelectPattern, &AllCmpSingleUse](Value *I) { 8032 Value *LHS, *RHS; 8033 auto CurrentPattern = matchSelectPattern(I, LHS, RHS); 8034 if (!SelectPatternResult::isMinOrMax(CurrentPattern.Flavor) || 8035 CurrentPattern.Flavor == SPF_FMINNUM || 8036 CurrentPattern.Flavor == SPF_FMAXNUM || 8037 !I->getType()->isIntOrIntVectorTy()) 8038 return false; 8039 if (SelectPattern.Flavor != SPF_UNKNOWN && 8040 SelectPattern.Flavor != CurrentPattern.Flavor) 8041 return false; 8042 SelectPattern = CurrentPattern; 8043 AllCmpSingleUse &= 8044 match(I, m_Select(m_OneUse(m_Value()), m_Value(), m_Value())); 8045 return true; 8046 })) { 8047 switch (SelectPattern.Flavor) { 8048 case SPF_SMIN: 8049 return {Intrinsic::smin, AllCmpSingleUse}; 8050 case SPF_UMIN: 8051 return {Intrinsic::umin, AllCmpSingleUse}; 8052 case SPF_SMAX: 8053 return {Intrinsic::smax, AllCmpSingleUse}; 8054 case SPF_UMAX: 8055 return {Intrinsic::umax, AllCmpSingleUse}; 8056 default: 8057 llvm_unreachable("unexpected select pattern flavor"); 8058 } 8059 } 8060 return {Intrinsic::not_intrinsic, false}; 8061 } 8062 8063 bool llvm::matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, 8064 Value *&Start, Value *&Step) { 8065 // Handle the case of a simple two-predecessor recurrence PHI. 8066 // There's a lot more that could theoretically be done here, but 8067 // this is sufficient to catch some interesting cases. 8068 if (P->getNumIncomingValues() != 2) 8069 return false; 8070 8071 for (unsigned i = 0; i != 2; ++i) { 8072 Value *L = P->getIncomingValue(i); 8073 Value *R = P->getIncomingValue(!i); 8074 auto *LU = dyn_cast<BinaryOperator>(L); 8075 if (!LU) 8076 continue; 8077 unsigned Opcode = LU->getOpcode(); 8078 8079 switch (Opcode) { 8080 default: 8081 continue; 8082 // TODO: Expand list -- xor, div, gep, uaddo, etc.. 8083 case Instruction::LShr: 8084 case Instruction::AShr: 8085 case Instruction::Shl: 8086 case Instruction::Add: 8087 case Instruction::Sub: 8088 case Instruction::And: 8089 case Instruction::Or: 8090 case Instruction::Mul: 8091 case Instruction::FMul: { 8092 Value *LL = LU->getOperand(0); 8093 Value *LR = LU->getOperand(1); 8094 // Find a recurrence. 8095 if (LL == P) 8096 L = LR; 8097 else if (LR == P) 8098 L = LL; 8099 else 8100 continue; // Check for recurrence with L and R flipped. 8101 8102 break; // Match! 8103 } 8104 }; 8105 8106 // We have matched a recurrence of the form: 8107 // %iv = [R, %entry], [%iv.next, %backedge] 8108 // %iv.next = binop %iv, L 8109 // OR 8110 // %iv = [R, %entry], [%iv.next, %backedge] 8111 // %iv.next = binop L, %iv 8112 BO = LU; 8113 Start = R; 8114 Step = L; 8115 return true; 8116 } 8117 return false; 8118 } 8119 8120 bool llvm::matchSimpleRecurrence(const BinaryOperator *I, PHINode *&P, 8121 Value *&Start, Value *&Step) { 8122 BinaryOperator *BO = nullptr; 8123 P = dyn_cast<PHINode>(I->getOperand(0)); 8124 if (!P) 8125 P = dyn_cast<PHINode>(I->getOperand(1)); 8126 return P && matchSimpleRecurrence(P, BO, Start, Step) && BO == I; 8127 } 8128 8129 /// Return true if "icmp Pred LHS RHS" is always true. 8130 static bool isTruePredicate(CmpInst::Predicate Pred, const Value *LHS, 8131 const Value *RHS, const DataLayout &DL, 8132 unsigned Depth) { 8133 if (ICmpInst::isTrueWhenEqual(Pred) && LHS == RHS) 8134 return true; 8135 8136 switch (Pred) { 8137 default: 8138 return false; 8139 8140 case CmpInst::ICMP_SLE: { 8141 const APInt *C; 8142 8143 // LHS s<= LHS +_{nsw} C if C >= 0 8144 if (match(RHS, m_NSWAdd(m_Specific(LHS), m_APInt(C)))) 8145 return !C->isNegative(); 8146 return false; 8147 } 8148 8149 case CmpInst::ICMP_ULE: { 8150 // LHS u<= LHS +_{nuw} V for any V 8151 if (match(RHS, m_c_Add(m_Specific(LHS), m_Value())) && 8152 cast<OverflowingBinaryOperator>(RHS)->hasNoUnsignedWrap()) 8153 return true; 8154 8155 // RHS >> V u<= RHS for any V 8156 if (match(LHS, m_LShr(m_Specific(RHS), m_Value()))) 8157 return true; 8158 8159 // Match A to (X +_{nuw} CA) and B to (X +_{nuw} CB) 8160 auto MatchNUWAddsToSameValue = [&](const Value *A, const Value *B, 8161 const Value *&X, 8162 const APInt *&CA, const APInt *&CB) { 8163 if (match(A, m_NUWAdd(m_Value(X), m_APInt(CA))) && 8164 match(B, m_NUWAdd(m_Specific(X), m_APInt(CB)))) 8165 return true; 8166 8167 // If X & C == 0 then (X | C) == X +_{nuw} C 8168 if (match(A, m_Or(m_Value(X), m_APInt(CA))) && 8169 match(B, m_Or(m_Specific(X), m_APInt(CB)))) { 8170 KnownBits Known(CA->getBitWidth()); 8171 computeKnownBits(X, Known, DL, Depth + 1, /*AC*/ nullptr, 8172 /*CxtI*/ nullptr, /*DT*/ nullptr); 8173 if (CA->isSubsetOf(Known.Zero) && CB->isSubsetOf(Known.Zero)) 8174 return true; 8175 } 8176 8177 return false; 8178 }; 8179 8180 const Value *X; 8181 const APInt *CLHS, *CRHS; 8182 if (MatchNUWAddsToSameValue(LHS, RHS, X, CLHS, CRHS)) 8183 return CLHS->ule(*CRHS); 8184 8185 return false; 8186 } 8187 } 8188 } 8189 8190 /// Return true if "icmp Pred BLHS BRHS" is true whenever "icmp Pred 8191 /// ALHS ARHS" is true. Otherwise, return std::nullopt. 8192 static std::optional<bool> 8193 isImpliedCondOperands(CmpInst::Predicate Pred, const Value *ALHS, 8194 const Value *ARHS, const Value *BLHS, const Value *BRHS, 8195 const DataLayout &DL, unsigned Depth) { 8196 switch (Pred) { 8197 default: 8198 return std::nullopt; 8199 8200 case CmpInst::ICMP_SLT: 8201 case CmpInst::ICMP_SLE: 8202 if (isTruePredicate(CmpInst::ICMP_SLE, BLHS, ALHS, DL, Depth) && 8203 isTruePredicate(CmpInst::ICMP_SLE, ARHS, BRHS, DL, Depth)) 8204 return true; 8205 return std::nullopt; 8206 8207 case CmpInst::ICMP_SGT: 8208 case CmpInst::ICMP_SGE: 8209 if (isTruePredicate(CmpInst::ICMP_SLE, ALHS, BLHS, DL, Depth) && 8210 isTruePredicate(CmpInst::ICMP_SLE, BRHS, ARHS, DL, Depth)) 8211 return true; 8212 return std::nullopt; 8213 8214 case CmpInst::ICMP_ULT: 8215 case CmpInst::ICMP_ULE: 8216 if (isTruePredicate(CmpInst::ICMP_ULE, BLHS, ALHS, DL, Depth) && 8217 isTruePredicate(CmpInst::ICMP_ULE, ARHS, BRHS, DL, Depth)) 8218 return true; 8219 return std::nullopt; 8220 8221 case CmpInst::ICMP_UGT: 8222 case CmpInst::ICMP_UGE: 8223 if (isTruePredicate(CmpInst::ICMP_ULE, ALHS, BLHS, DL, Depth) && 8224 isTruePredicate(CmpInst::ICMP_ULE, BRHS, ARHS, DL, Depth)) 8225 return true; 8226 return std::nullopt; 8227 } 8228 } 8229 8230 /// Return true if the operands of two compares (expanded as "L0 pred L1" and 8231 /// "R0 pred R1") match. IsSwappedOps is true when the operands match, but are 8232 /// swapped. 8233 static bool areMatchingOperands(const Value *L0, const Value *L1, const Value *R0, 8234 const Value *R1, bool &AreSwappedOps) { 8235 bool AreMatchingOps = (L0 == R0 && L1 == R1); 8236 AreSwappedOps = (L0 == R1 && L1 == R0); 8237 return AreMatchingOps || AreSwappedOps; 8238 } 8239 8240 /// Return true if "icmp1 LPred X, Y" implies "icmp2 RPred X, Y" is true. 8241 /// Return false if "icmp1 LPred X, Y" implies "icmp2 RPred X, Y" is false. 8242 /// Otherwise, return std::nullopt if we can't infer anything. 8243 static std::optional<bool> 8244 isImpliedCondMatchingOperands(CmpInst::Predicate LPred, 8245 CmpInst::Predicate RPred, bool AreSwappedOps) { 8246 // Canonicalize the predicate as if the operands were not commuted. 8247 if (AreSwappedOps) 8248 RPred = ICmpInst::getSwappedPredicate(RPred); 8249 8250 if (CmpInst::isImpliedTrueByMatchingCmp(LPred, RPred)) 8251 return true; 8252 if (CmpInst::isImpliedFalseByMatchingCmp(LPred, RPred)) 8253 return false; 8254 8255 return std::nullopt; 8256 } 8257 8258 /// Return true if "icmp LPred X, LC" implies "icmp RPred X, RC" is true. 8259 /// Return false if "icmp LPred X, LC" implies "icmp RPred X, RC" is false. 8260 /// Otherwise, return std::nullopt if we can't infer anything. 8261 static std::optional<bool> isImpliedCondCommonOperandWithConstants( 8262 CmpInst::Predicate LPred, const APInt &LC, CmpInst::Predicate RPred, 8263 const APInt &RC) { 8264 ConstantRange DomCR = ConstantRange::makeExactICmpRegion(LPred, LC); 8265 ConstantRange CR = ConstantRange::makeExactICmpRegion(RPred, RC); 8266 ConstantRange Intersection = DomCR.intersectWith(CR); 8267 ConstantRange Difference = DomCR.difference(CR); 8268 if (Intersection.isEmptySet()) 8269 return false; 8270 if (Difference.isEmptySet()) 8271 return true; 8272 return std::nullopt; 8273 } 8274 8275 /// Return true if LHS implies RHS (expanded to its components as "R0 RPred R1") 8276 /// is true. Return false if LHS implies RHS is false. Otherwise, return 8277 /// std::nullopt if we can't infer anything. 8278 static std::optional<bool> isImpliedCondICmps(const ICmpInst *LHS, 8279 CmpInst::Predicate RPred, 8280 const Value *R0, const Value *R1, 8281 const DataLayout &DL, 8282 bool LHSIsTrue, unsigned Depth) { 8283 Value *L0 = LHS->getOperand(0); 8284 Value *L1 = LHS->getOperand(1); 8285 8286 // The rest of the logic assumes the LHS condition is true. If that's not the 8287 // case, invert the predicate to make it so. 8288 CmpInst::Predicate LPred = 8289 LHSIsTrue ? LHS->getPredicate() : LHS->getInversePredicate(); 8290 8291 // Can we infer anything when the 0-operands match and the 1-operands are 8292 // constants (not necessarily matching)? 8293 const APInt *LC, *RC; 8294 if (L0 == R0 && match(L1, m_APInt(LC)) && match(R1, m_APInt(RC))) 8295 return isImpliedCondCommonOperandWithConstants(LPred, *LC, RPred, *RC); 8296 8297 // Can we infer anything when the two compares have matching operands? 8298 bool AreSwappedOps; 8299 if (areMatchingOperands(L0, L1, R0, R1, AreSwappedOps)) 8300 return isImpliedCondMatchingOperands(LPred, RPred, AreSwappedOps); 8301 8302 // L0 = R0 = L1 + R1, L0 >=u L1 implies R0 >=u R1, L0 <u L1 implies R0 <u R1 8303 if (ICmpInst::isUnsigned(LPred) && ICmpInst::isUnsigned(RPred)) { 8304 if (L0 == R1) { 8305 std::swap(R0, R1); 8306 RPred = ICmpInst::getSwappedPredicate(RPred); 8307 } 8308 if (L1 == R0) { 8309 std::swap(L0, L1); 8310 LPred = ICmpInst::getSwappedPredicate(LPred); 8311 } 8312 if (L1 == R1) { 8313 std::swap(L0, L1); 8314 LPred = ICmpInst::getSwappedPredicate(LPred); 8315 std::swap(R0, R1); 8316 RPred = ICmpInst::getSwappedPredicate(RPred); 8317 } 8318 if (L0 == R0 && 8319 (LPred == ICmpInst::ICMP_ULT || LPred == ICmpInst::ICMP_UGE) && 8320 (RPred == ICmpInst::ICMP_ULT || RPred == ICmpInst::ICMP_UGE) && 8321 match(L0, m_c_Add(m_Specific(L1), m_Specific(R1)))) 8322 return LPred == RPred; 8323 } 8324 8325 if (LPred == RPred) 8326 return isImpliedCondOperands(LPred, L0, L1, R0, R1, DL, Depth); 8327 8328 return std::nullopt; 8329 } 8330 8331 /// Return true if LHS implies RHS is true. Return false if LHS implies RHS is 8332 /// false. Otherwise, return std::nullopt if we can't infer anything. We 8333 /// expect the RHS to be an icmp and the LHS to be an 'and', 'or', or a 'select' 8334 /// instruction. 8335 static std::optional<bool> 8336 isImpliedCondAndOr(const Instruction *LHS, CmpInst::Predicate RHSPred, 8337 const Value *RHSOp0, const Value *RHSOp1, 8338 const DataLayout &DL, bool LHSIsTrue, unsigned Depth) { 8339 // The LHS must be an 'or', 'and', or a 'select' instruction. 8340 assert((LHS->getOpcode() == Instruction::And || 8341 LHS->getOpcode() == Instruction::Or || 8342 LHS->getOpcode() == Instruction::Select) && 8343 "Expected LHS to be 'and', 'or', or 'select'."); 8344 8345 assert(Depth <= MaxAnalysisRecursionDepth && "Hit recursion limit"); 8346 8347 // If the result of an 'or' is false, then we know both legs of the 'or' are 8348 // false. Similarly, if the result of an 'and' is true, then we know both 8349 // legs of the 'and' are true. 8350 const Value *ALHS, *ARHS; 8351 if ((!LHSIsTrue && match(LHS, m_LogicalOr(m_Value(ALHS), m_Value(ARHS)))) || 8352 (LHSIsTrue && match(LHS, m_LogicalAnd(m_Value(ALHS), m_Value(ARHS))))) { 8353 // FIXME: Make this non-recursion. 8354 if (std::optional<bool> Implication = isImpliedCondition( 8355 ALHS, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, Depth + 1)) 8356 return Implication; 8357 if (std::optional<bool> Implication = isImpliedCondition( 8358 ARHS, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, Depth + 1)) 8359 return Implication; 8360 return std::nullopt; 8361 } 8362 return std::nullopt; 8363 } 8364 8365 std::optional<bool> 8366 llvm::isImpliedCondition(const Value *LHS, CmpInst::Predicate RHSPred, 8367 const Value *RHSOp0, const Value *RHSOp1, 8368 const DataLayout &DL, bool LHSIsTrue, unsigned Depth) { 8369 // Bail out when we hit the limit. 8370 if (Depth == MaxAnalysisRecursionDepth) 8371 return std::nullopt; 8372 8373 // A mismatch occurs when we compare a scalar cmp to a vector cmp, for 8374 // example. 8375 if (RHSOp0->getType()->isVectorTy() != LHS->getType()->isVectorTy()) 8376 return std::nullopt; 8377 8378 assert(LHS->getType()->isIntOrIntVectorTy(1) && 8379 "Expected integer type only!"); 8380 8381 // Both LHS and RHS are icmps. 8382 const ICmpInst *LHSCmp = dyn_cast<ICmpInst>(LHS); 8383 if (LHSCmp) 8384 return isImpliedCondICmps(LHSCmp, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, 8385 Depth); 8386 8387 /// The LHS should be an 'or', 'and', or a 'select' instruction. We expect 8388 /// the RHS to be an icmp. 8389 /// FIXME: Add support for and/or/select on the RHS. 8390 if (const Instruction *LHSI = dyn_cast<Instruction>(LHS)) { 8391 if ((LHSI->getOpcode() == Instruction::And || 8392 LHSI->getOpcode() == Instruction::Or || 8393 LHSI->getOpcode() == Instruction::Select)) 8394 return isImpliedCondAndOr(LHSI, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, 8395 Depth); 8396 } 8397 return std::nullopt; 8398 } 8399 8400 std::optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS, 8401 const DataLayout &DL, 8402 bool LHSIsTrue, unsigned Depth) { 8403 // LHS ==> RHS by definition 8404 if (LHS == RHS) 8405 return LHSIsTrue; 8406 8407 if (const ICmpInst *RHSCmp = dyn_cast<ICmpInst>(RHS)) 8408 return isImpliedCondition(LHS, RHSCmp->getPredicate(), 8409 RHSCmp->getOperand(0), RHSCmp->getOperand(1), DL, 8410 LHSIsTrue, Depth); 8411 8412 if (Depth == MaxAnalysisRecursionDepth) 8413 return std::nullopt; 8414 8415 // LHS ==> (RHS1 || RHS2) if LHS ==> RHS1 or LHS ==> RHS2 8416 // LHS ==> !(RHS1 && RHS2) if LHS ==> !RHS1 or LHS ==> !RHS2 8417 const Value *RHS1, *RHS2; 8418 if (match(RHS, m_LogicalOr(m_Value(RHS1), m_Value(RHS2)))) { 8419 if (std::optional<bool> Imp = 8420 isImpliedCondition(LHS, RHS1, DL, LHSIsTrue, Depth + 1)) 8421 if (*Imp == true) 8422 return true; 8423 if (std::optional<bool> Imp = 8424 isImpliedCondition(LHS, RHS2, DL, LHSIsTrue, Depth + 1)) 8425 if (*Imp == true) 8426 return true; 8427 } 8428 if (match(RHS, m_LogicalAnd(m_Value(RHS1), m_Value(RHS2)))) { 8429 if (std::optional<bool> Imp = 8430 isImpliedCondition(LHS, RHS1, DL, LHSIsTrue, Depth + 1)) 8431 if (*Imp == false) 8432 return false; 8433 if (std::optional<bool> Imp = 8434 isImpliedCondition(LHS, RHS2, DL, LHSIsTrue, Depth + 1)) 8435 if (*Imp == false) 8436 return false; 8437 } 8438 8439 return std::nullopt; 8440 } 8441 8442 // Returns a pair (Condition, ConditionIsTrue), where Condition is a branch 8443 // condition dominating ContextI or nullptr, if no condition is found. 8444 static std::pair<Value *, bool> 8445 getDomPredecessorCondition(const Instruction *ContextI) { 8446 if (!ContextI || !ContextI->getParent()) 8447 return {nullptr, false}; 8448 8449 // TODO: This is a poor/cheap way to determine dominance. Should we use a 8450 // dominator tree (eg, from a SimplifyQuery) instead? 8451 const BasicBlock *ContextBB = ContextI->getParent(); 8452 const BasicBlock *PredBB = ContextBB->getSinglePredecessor(); 8453 if (!PredBB) 8454 return {nullptr, false}; 8455 8456 // We need a conditional branch in the predecessor. 8457 Value *PredCond; 8458 BasicBlock *TrueBB, *FalseBB; 8459 if (!match(PredBB->getTerminator(), m_Br(m_Value(PredCond), TrueBB, FalseBB))) 8460 return {nullptr, false}; 8461 8462 // The branch should get simplified. Don't bother simplifying this condition. 8463 if (TrueBB == FalseBB) 8464 return {nullptr, false}; 8465 8466 assert((TrueBB == ContextBB || FalseBB == ContextBB) && 8467 "Predecessor block does not point to successor?"); 8468 8469 // Is this condition implied by the predecessor condition? 8470 return {PredCond, TrueBB == ContextBB}; 8471 } 8472 8473 std::optional<bool> llvm::isImpliedByDomCondition(const Value *Cond, 8474 const Instruction *ContextI, 8475 const DataLayout &DL) { 8476 assert(Cond->getType()->isIntOrIntVectorTy(1) && "Condition must be bool"); 8477 auto PredCond = getDomPredecessorCondition(ContextI); 8478 if (PredCond.first) 8479 return isImpliedCondition(PredCond.first, Cond, DL, PredCond.second); 8480 return std::nullopt; 8481 } 8482 8483 std::optional<bool> llvm::isImpliedByDomCondition(CmpInst::Predicate Pred, 8484 const Value *LHS, 8485 const Value *RHS, 8486 const Instruction *ContextI, 8487 const DataLayout &DL) { 8488 auto PredCond = getDomPredecessorCondition(ContextI); 8489 if (PredCond.first) 8490 return isImpliedCondition(PredCond.first, Pred, LHS, RHS, DL, 8491 PredCond.second); 8492 return std::nullopt; 8493 } 8494 8495 static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower, 8496 APInt &Upper, const InstrInfoQuery &IIQ, 8497 bool PreferSignedRange) { 8498 unsigned Width = Lower.getBitWidth(); 8499 const APInt *C; 8500 switch (BO.getOpcode()) { 8501 case Instruction::Add: 8502 if (match(BO.getOperand(1), m_APInt(C)) && !C->isZero()) { 8503 bool HasNSW = IIQ.hasNoSignedWrap(&BO); 8504 bool HasNUW = IIQ.hasNoUnsignedWrap(&BO); 8505 8506 // If the caller expects a signed compare, then try to use a signed range. 8507 // Otherwise if both no-wraps are set, use the unsigned range because it 8508 // is never larger than the signed range. Example: 8509 // "add nuw nsw i8 X, -2" is unsigned [254,255] vs. signed [-128, 125]. 8510 if (PreferSignedRange && HasNSW && HasNUW) 8511 HasNUW = false; 8512 8513 if (HasNUW) { 8514 // 'add nuw x, C' produces [C, UINT_MAX]. 8515 Lower = *C; 8516 } else if (HasNSW) { 8517 if (C->isNegative()) { 8518 // 'add nsw x, -C' produces [SINT_MIN, SINT_MAX - C]. 8519 Lower = APInt::getSignedMinValue(Width); 8520 Upper = APInt::getSignedMaxValue(Width) + *C + 1; 8521 } else { 8522 // 'add nsw x, +C' produces [SINT_MIN + C, SINT_MAX]. 8523 Lower = APInt::getSignedMinValue(Width) + *C; 8524 Upper = APInt::getSignedMaxValue(Width) + 1; 8525 } 8526 } 8527 } 8528 break; 8529 8530 case Instruction::And: 8531 if (match(BO.getOperand(1), m_APInt(C))) 8532 // 'and x, C' produces [0, C]. 8533 Upper = *C + 1; 8534 // X & -X is a power of two or zero. So we can cap the value at max power of 8535 // two. 8536 if (match(BO.getOperand(0), m_Neg(m_Specific(BO.getOperand(1)))) || 8537 match(BO.getOperand(1), m_Neg(m_Specific(BO.getOperand(0))))) 8538 Upper = APInt::getSignedMinValue(Width) + 1; 8539 break; 8540 8541 case Instruction::Or: 8542 if (match(BO.getOperand(1), m_APInt(C))) 8543 // 'or x, C' produces [C, UINT_MAX]. 8544 Lower = *C; 8545 break; 8546 8547 case Instruction::AShr: 8548 if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) { 8549 // 'ashr x, C' produces [INT_MIN >> C, INT_MAX >> C]. 8550 Lower = APInt::getSignedMinValue(Width).ashr(*C); 8551 Upper = APInt::getSignedMaxValue(Width).ashr(*C) + 1; 8552 } else if (match(BO.getOperand(0), m_APInt(C))) { 8553 unsigned ShiftAmount = Width - 1; 8554 if (!C->isZero() && IIQ.isExact(&BO)) 8555 ShiftAmount = C->countr_zero(); 8556 if (C->isNegative()) { 8557 // 'ashr C, x' produces [C, C >> (Width-1)] 8558 Lower = *C; 8559 Upper = C->ashr(ShiftAmount) + 1; 8560 } else { 8561 // 'ashr C, x' produces [C >> (Width-1), C] 8562 Lower = C->ashr(ShiftAmount); 8563 Upper = *C + 1; 8564 } 8565 } 8566 break; 8567 8568 case Instruction::LShr: 8569 if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) { 8570 // 'lshr x, C' produces [0, UINT_MAX >> C]. 8571 Upper = APInt::getAllOnes(Width).lshr(*C) + 1; 8572 } else if (match(BO.getOperand(0), m_APInt(C))) { 8573 // 'lshr C, x' produces [C >> (Width-1), C]. 8574 unsigned ShiftAmount = Width - 1; 8575 if (!C->isZero() && IIQ.isExact(&BO)) 8576 ShiftAmount = C->countr_zero(); 8577 Lower = C->lshr(ShiftAmount); 8578 Upper = *C + 1; 8579 } 8580 break; 8581 8582 case Instruction::Shl: 8583 if (match(BO.getOperand(0), m_APInt(C))) { 8584 if (IIQ.hasNoUnsignedWrap(&BO)) { 8585 // 'shl nuw C, x' produces [C, C << CLZ(C)] 8586 Lower = *C; 8587 Upper = Lower.shl(Lower.countl_zero()) + 1; 8588 } else if (BO.hasNoSignedWrap()) { // TODO: What if both nuw+nsw? 8589 if (C->isNegative()) { 8590 // 'shl nsw C, x' produces [C << CLO(C)-1, C] 8591 unsigned ShiftAmount = C->countl_one() - 1; 8592 Lower = C->shl(ShiftAmount); 8593 Upper = *C + 1; 8594 } else { 8595 // 'shl nsw C, x' produces [C, C << CLZ(C)-1] 8596 unsigned ShiftAmount = C->countl_zero() - 1; 8597 Lower = *C; 8598 Upper = C->shl(ShiftAmount) + 1; 8599 } 8600 } else { 8601 // If lowbit is set, value can never be zero. 8602 if ((*C)[0]) 8603 Lower = APInt::getOneBitSet(Width, 0); 8604 // If we are shifting a constant the largest it can be is if the longest 8605 // sequence of consecutive ones is shifted to the highbits (breaking 8606 // ties for which sequence is higher). At the moment we take a liberal 8607 // upper bound on this by just popcounting the constant. 8608 // TODO: There may be a bitwise trick for it longest/highest 8609 // consecutative sequence of ones (naive method is O(Width) loop). 8610 Upper = APInt::getHighBitsSet(Width, C->popcount()) + 1; 8611 } 8612 } else if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) { 8613 Upper = APInt::getBitsSetFrom(Width, C->getZExtValue()) + 1; 8614 } 8615 break; 8616 8617 case Instruction::SDiv: 8618 if (match(BO.getOperand(1), m_APInt(C))) { 8619 APInt IntMin = APInt::getSignedMinValue(Width); 8620 APInt IntMax = APInt::getSignedMaxValue(Width); 8621 if (C->isAllOnes()) { 8622 // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX] 8623 // where C != -1 and C != 0 and C != 1 8624 Lower = IntMin + 1; 8625 Upper = IntMax + 1; 8626 } else if (C->countl_zero() < Width - 1) { 8627 // 'sdiv x, C' produces [INT_MIN / C, INT_MAX / C] 8628 // where C != -1 and C != 0 and C != 1 8629 Lower = IntMin.sdiv(*C); 8630 Upper = IntMax.sdiv(*C); 8631 if (Lower.sgt(Upper)) 8632 std::swap(Lower, Upper); 8633 Upper = Upper + 1; 8634 assert(Upper != Lower && "Upper part of range has wrapped!"); 8635 } 8636 } else if (match(BO.getOperand(0), m_APInt(C))) { 8637 if (C->isMinSignedValue()) { 8638 // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2]. 8639 Lower = *C; 8640 Upper = Lower.lshr(1) + 1; 8641 } else { 8642 // 'sdiv C, x' produces [-|C|, |C|]. 8643 Upper = C->abs() + 1; 8644 Lower = (-Upper) + 1; 8645 } 8646 } 8647 break; 8648 8649 case Instruction::UDiv: 8650 if (match(BO.getOperand(1), m_APInt(C)) && !C->isZero()) { 8651 // 'udiv x, C' produces [0, UINT_MAX / C]. 8652 Upper = APInt::getMaxValue(Width).udiv(*C) + 1; 8653 } else if (match(BO.getOperand(0), m_APInt(C))) { 8654 // 'udiv C, x' produces [0, C]. 8655 Upper = *C + 1; 8656 } 8657 break; 8658 8659 case Instruction::SRem: 8660 if (match(BO.getOperand(1), m_APInt(C))) { 8661 // 'srem x, C' produces (-|C|, |C|). 8662 Upper = C->abs(); 8663 Lower = (-Upper) + 1; 8664 } 8665 break; 8666 8667 case Instruction::URem: 8668 if (match(BO.getOperand(1), m_APInt(C))) 8669 // 'urem x, C' produces [0, C). 8670 Upper = *C; 8671 break; 8672 8673 default: 8674 break; 8675 } 8676 } 8677 8678 static ConstantRange getRangeForIntrinsic(const IntrinsicInst &II) { 8679 unsigned Width = II.getType()->getScalarSizeInBits(); 8680 const APInt *C; 8681 switch (II.getIntrinsicID()) { 8682 case Intrinsic::ctpop: 8683 case Intrinsic::ctlz: 8684 case Intrinsic::cttz: 8685 // Maximum of set/clear bits is the bit width. 8686 return ConstantRange::getNonEmpty(APInt::getZero(Width), 8687 APInt(Width, Width + 1)); 8688 case Intrinsic::uadd_sat: 8689 // uadd.sat(x, C) produces [C, UINT_MAX]. 8690 if (match(II.getOperand(0), m_APInt(C)) || 8691 match(II.getOperand(1), m_APInt(C))) 8692 return ConstantRange::getNonEmpty(*C, APInt::getZero(Width)); 8693 break; 8694 case Intrinsic::sadd_sat: 8695 if (match(II.getOperand(0), m_APInt(C)) || 8696 match(II.getOperand(1), m_APInt(C))) { 8697 if (C->isNegative()) 8698 // sadd.sat(x, -C) produces [SINT_MIN, SINT_MAX + (-C)]. 8699 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width), 8700 APInt::getSignedMaxValue(Width) + *C + 8701 1); 8702 8703 // sadd.sat(x, +C) produces [SINT_MIN + C, SINT_MAX]. 8704 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width) + *C, 8705 APInt::getSignedMaxValue(Width) + 1); 8706 } 8707 break; 8708 case Intrinsic::usub_sat: 8709 // usub.sat(C, x) produces [0, C]. 8710 if (match(II.getOperand(0), m_APInt(C))) 8711 return ConstantRange::getNonEmpty(APInt::getZero(Width), *C + 1); 8712 8713 // usub.sat(x, C) produces [0, UINT_MAX - C]. 8714 if (match(II.getOperand(1), m_APInt(C))) 8715 return ConstantRange::getNonEmpty(APInt::getZero(Width), 8716 APInt::getMaxValue(Width) - *C + 1); 8717 break; 8718 case Intrinsic::ssub_sat: 8719 if (match(II.getOperand(0), m_APInt(C))) { 8720 if (C->isNegative()) 8721 // ssub.sat(-C, x) produces [SINT_MIN, -SINT_MIN + (-C)]. 8722 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width), 8723 *C - APInt::getSignedMinValue(Width) + 8724 1); 8725 8726 // ssub.sat(+C, x) produces [-SINT_MAX + C, SINT_MAX]. 8727 return ConstantRange::getNonEmpty(*C - APInt::getSignedMaxValue(Width), 8728 APInt::getSignedMaxValue(Width) + 1); 8729 } else if (match(II.getOperand(1), m_APInt(C))) { 8730 if (C->isNegative()) 8731 // ssub.sat(x, -C) produces [SINT_MIN - (-C), SINT_MAX]: 8732 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width) - *C, 8733 APInt::getSignedMaxValue(Width) + 1); 8734 8735 // ssub.sat(x, +C) produces [SINT_MIN, SINT_MAX - C]. 8736 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width), 8737 APInt::getSignedMaxValue(Width) - *C + 8738 1); 8739 } 8740 break; 8741 case Intrinsic::umin: 8742 case Intrinsic::umax: 8743 case Intrinsic::smin: 8744 case Intrinsic::smax: 8745 if (!match(II.getOperand(0), m_APInt(C)) && 8746 !match(II.getOperand(1), m_APInt(C))) 8747 break; 8748 8749 switch (II.getIntrinsicID()) { 8750 case Intrinsic::umin: 8751 return ConstantRange::getNonEmpty(APInt::getZero(Width), *C + 1); 8752 case Intrinsic::umax: 8753 return ConstantRange::getNonEmpty(*C, APInt::getZero(Width)); 8754 case Intrinsic::smin: 8755 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width), 8756 *C + 1); 8757 case Intrinsic::smax: 8758 return ConstantRange::getNonEmpty(*C, 8759 APInt::getSignedMaxValue(Width) + 1); 8760 default: 8761 llvm_unreachable("Must be min/max intrinsic"); 8762 } 8763 break; 8764 case Intrinsic::abs: 8765 // If abs of SIGNED_MIN is poison, then the result is [0..SIGNED_MAX], 8766 // otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN. 8767 if (match(II.getOperand(1), m_One())) 8768 return ConstantRange::getNonEmpty(APInt::getZero(Width), 8769 APInt::getSignedMaxValue(Width) + 1); 8770 8771 return ConstantRange::getNonEmpty(APInt::getZero(Width), 8772 APInt::getSignedMinValue(Width) + 1); 8773 case Intrinsic::vscale: 8774 if (!II.getParent() || !II.getFunction()) 8775 break; 8776 return getVScaleRange(II.getFunction(), Width); 8777 default: 8778 break; 8779 } 8780 8781 return ConstantRange::getFull(Width); 8782 } 8783 8784 static ConstantRange getRangeForSelectPattern(const SelectInst &SI, 8785 const InstrInfoQuery &IIQ) { 8786 unsigned BitWidth = SI.getType()->getScalarSizeInBits(); 8787 const Value *LHS = nullptr, *RHS = nullptr; 8788 SelectPatternResult R = matchSelectPattern(&SI, LHS, RHS); 8789 if (R.Flavor == SPF_UNKNOWN) 8790 return ConstantRange::getFull(BitWidth); 8791 8792 if (R.Flavor == SelectPatternFlavor::SPF_ABS) { 8793 // If the negation part of the abs (in RHS) has the NSW flag, 8794 // then the result of abs(X) is [0..SIGNED_MAX], 8795 // otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN. 8796 if (match(RHS, m_Neg(m_Specific(LHS))) && 8797 IIQ.hasNoSignedWrap(cast<Instruction>(RHS))) 8798 return ConstantRange::getNonEmpty(APInt::getZero(BitWidth), 8799 APInt::getSignedMaxValue(BitWidth) + 1); 8800 8801 return ConstantRange::getNonEmpty(APInt::getZero(BitWidth), 8802 APInt::getSignedMinValue(BitWidth) + 1); 8803 } 8804 8805 if (R.Flavor == SelectPatternFlavor::SPF_NABS) { 8806 // The result of -abs(X) is <= 0. 8807 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(BitWidth), 8808 APInt(BitWidth, 1)); 8809 } 8810 8811 const APInt *C; 8812 if (!match(LHS, m_APInt(C)) && !match(RHS, m_APInt(C))) 8813 return ConstantRange::getFull(BitWidth); 8814 8815 switch (R.Flavor) { 8816 case SPF_UMIN: 8817 return ConstantRange::getNonEmpty(APInt::getZero(BitWidth), *C + 1); 8818 case SPF_UMAX: 8819 return ConstantRange::getNonEmpty(*C, APInt::getZero(BitWidth)); 8820 case SPF_SMIN: 8821 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(BitWidth), 8822 *C + 1); 8823 case SPF_SMAX: 8824 return ConstantRange::getNonEmpty(*C, 8825 APInt::getSignedMaxValue(BitWidth) + 1); 8826 default: 8827 return ConstantRange::getFull(BitWidth); 8828 } 8829 } 8830 8831 static void setLimitForFPToI(const Instruction *I, APInt &Lower, APInt &Upper) { 8832 // The maximum representable value of a half is 65504. For floats the maximum 8833 // value is 3.4e38 which requires roughly 129 bits. 8834 unsigned BitWidth = I->getType()->getScalarSizeInBits(); 8835 if (!I->getOperand(0)->getType()->getScalarType()->isHalfTy()) 8836 return; 8837 if (isa<FPToSIInst>(I) && BitWidth >= 17) { 8838 Lower = APInt(BitWidth, -65504); 8839 Upper = APInt(BitWidth, 65505); 8840 } 8841 8842 if (isa<FPToUIInst>(I) && BitWidth >= 16) { 8843 // For a fptoui the lower limit is left as 0. 8844 Upper = APInt(BitWidth, 65505); 8845 } 8846 } 8847 8848 ConstantRange llvm::computeConstantRange(const Value *V, bool ForSigned, 8849 bool UseInstrInfo, AssumptionCache *AC, 8850 const Instruction *CtxI, 8851 const DominatorTree *DT, 8852 unsigned Depth) { 8853 assert(V->getType()->isIntOrIntVectorTy() && "Expected integer instruction"); 8854 8855 if (Depth == MaxAnalysisRecursionDepth) 8856 return ConstantRange::getFull(V->getType()->getScalarSizeInBits()); 8857 8858 const APInt *C; 8859 if (match(V, m_APInt(C))) 8860 return ConstantRange(*C); 8861 unsigned BitWidth = V->getType()->getScalarSizeInBits(); 8862 8863 if (auto *VC = dyn_cast<ConstantDataVector>(V)) { 8864 ConstantRange CR = ConstantRange::getEmpty(BitWidth); 8865 for (unsigned ElemIdx = 0, NElem = VC->getNumElements(); ElemIdx < NElem; 8866 ++ElemIdx) 8867 CR = CR.unionWith(VC->getElementAsAPInt(ElemIdx)); 8868 return CR; 8869 } 8870 8871 InstrInfoQuery IIQ(UseInstrInfo); 8872 ConstantRange CR = ConstantRange::getFull(BitWidth); 8873 if (auto *BO = dyn_cast<BinaryOperator>(V)) { 8874 APInt Lower = APInt(BitWidth, 0); 8875 APInt Upper = APInt(BitWidth, 0); 8876 // TODO: Return ConstantRange. 8877 setLimitsForBinOp(*BO, Lower, Upper, IIQ, ForSigned); 8878 CR = ConstantRange::getNonEmpty(Lower, Upper); 8879 } else if (auto *II = dyn_cast<IntrinsicInst>(V)) 8880 CR = getRangeForIntrinsic(*II); 8881 else if (auto *SI = dyn_cast<SelectInst>(V)) { 8882 ConstantRange CRTrue = computeConstantRange( 8883 SI->getTrueValue(), ForSigned, UseInstrInfo, AC, CtxI, DT, Depth + 1); 8884 ConstantRange CRFalse = computeConstantRange( 8885 SI->getFalseValue(), ForSigned, UseInstrInfo, AC, CtxI, DT, Depth + 1); 8886 CR = CRTrue.unionWith(CRFalse); 8887 CR = CR.intersectWith(getRangeForSelectPattern(*SI, IIQ)); 8888 } else if (isa<FPToUIInst>(V) || isa<FPToSIInst>(V)) { 8889 APInt Lower = APInt(BitWidth, 0); 8890 APInt Upper = APInt(BitWidth, 0); 8891 // TODO: Return ConstantRange. 8892 setLimitForFPToI(cast<Instruction>(V), Lower, Upper); 8893 CR = ConstantRange::getNonEmpty(Lower, Upper); 8894 } 8895 8896 if (auto *I = dyn_cast<Instruction>(V)) 8897 if (auto *Range = IIQ.getMetadata(I, LLVMContext::MD_range)) 8898 CR = CR.intersectWith(getConstantRangeFromMetadata(*Range)); 8899 8900 if (CtxI && AC) { 8901 // Try to restrict the range based on information from assumptions. 8902 for (auto &AssumeVH : AC->assumptionsFor(V)) { 8903 if (!AssumeVH) 8904 continue; 8905 CallInst *I = cast<CallInst>(AssumeVH); 8906 assert(I->getParent()->getParent() == CtxI->getParent()->getParent() && 8907 "Got assumption for the wrong function!"); 8908 assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume && 8909 "must be an assume intrinsic"); 8910 8911 if (!isValidAssumeForContext(I, CtxI, DT)) 8912 continue; 8913 Value *Arg = I->getArgOperand(0); 8914 ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg); 8915 // Currently we just use information from comparisons. 8916 if (!Cmp || Cmp->getOperand(0) != V) 8917 continue; 8918 // TODO: Set "ForSigned" parameter via Cmp->isSigned()? 8919 ConstantRange RHS = 8920 computeConstantRange(Cmp->getOperand(1), /* ForSigned */ false, 8921 UseInstrInfo, AC, I, DT, Depth + 1); 8922 CR = CR.intersectWith( 8923 ConstantRange::makeAllowedICmpRegion(Cmp->getPredicate(), RHS)); 8924 } 8925 } 8926 8927 return CR; 8928 } 8929