1 //===- ValueTracking.cpp - Walk computations to compute properties --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains routines that help analyze properties that chains of 10 // computations have. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Analysis/ValueTracking.h" 15 #include "llvm/ADT/APFloat.h" 16 #include "llvm/ADT/APInt.h" 17 #include "llvm/ADT/ArrayRef.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/ADT/ScopeExit.h" 20 #include "llvm/ADT/SmallPtrSet.h" 21 #include "llvm/ADT/SmallSet.h" 22 #include "llvm/ADT/SmallVector.h" 23 #include "llvm/ADT/StringRef.h" 24 #include "llvm/ADT/iterator_range.h" 25 #include "llvm/Analysis/AliasAnalysis.h" 26 #include "llvm/Analysis/AssumeBundleQueries.h" 27 #include "llvm/Analysis/AssumptionCache.h" 28 #include "llvm/Analysis/ConstantFolding.h" 29 #include "llvm/Analysis/DomConditionCache.h" 30 #include "llvm/Analysis/GuardUtils.h" 31 #include "llvm/Analysis/InstructionSimplify.h" 32 #include "llvm/Analysis/Loads.h" 33 #include "llvm/Analysis/LoopInfo.h" 34 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 35 #include "llvm/Analysis/TargetLibraryInfo.h" 36 #include "llvm/Analysis/VectorUtils.h" 37 #include "llvm/Analysis/WithCache.h" 38 #include "llvm/IR/Argument.h" 39 #include "llvm/IR/Attributes.h" 40 #include "llvm/IR/BasicBlock.h" 41 #include "llvm/IR/Constant.h" 42 #include "llvm/IR/ConstantRange.h" 43 #include "llvm/IR/Constants.h" 44 #include "llvm/IR/DerivedTypes.h" 45 #include "llvm/IR/DiagnosticInfo.h" 46 #include "llvm/IR/Dominators.h" 47 #include "llvm/IR/EHPersonalities.h" 48 #include "llvm/IR/Function.h" 49 #include "llvm/IR/GetElementPtrTypeIterator.h" 50 #include "llvm/IR/GlobalAlias.h" 51 #include "llvm/IR/GlobalValue.h" 52 #include "llvm/IR/GlobalVariable.h" 53 #include "llvm/IR/InstrTypes.h" 54 #include "llvm/IR/Instruction.h" 55 #include "llvm/IR/Instructions.h" 56 #include "llvm/IR/IntrinsicInst.h" 57 #include "llvm/IR/Intrinsics.h" 58 #include "llvm/IR/IntrinsicsAArch64.h" 59 #include "llvm/IR/IntrinsicsAMDGPU.h" 60 #include "llvm/IR/IntrinsicsRISCV.h" 61 #include "llvm/IR/IntrinsicsX86.h" 62 #include "llvm/IR/LLVMContext.h" 63 #include "llvm/IR/Metadata.h" 64 #include "llvm/IR/Module.h" 65 #include "llvm/IR/Operator.h" 66 #include "llvm/IR/PatternMatch.h" 67 #include "llvm/IR/Type.h" 68 #include "llvm/IR/User.h" 69 #include "llvm/IR/Value.h" 70 #include "llvm/Support/Casting.h" 71 #include "llvm/Support/CommandLine.h" 72 #include "llvm/Support/Compiler.h" 73 #include "llvm/Support/ErrorHandling.h" 74 #include "llvm/Support/KnownBits.h" 75 #include "llvm/Support/MathExtras.h" 76 #include <algorithm> 77 #include <cassert> 78 #include <cstdint> 79 #include <optional> 80 #include <utility> 81 82 using namespace llvm; 83 using namespace llvm::PatternMatch; 84 85 // Controls the number of uses of the value searched for possible 86 // dominating comparisons. 87 static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses", 88 cl::Hidden, cl::init(20)); 89 90 91 /// Returns the bitwidth of the given scalar or pointer type. For vector types, 92 /// returns the element type's bitwidth. 93 static unsigned getBitWidth(Type *Ty, const DataLayout &DL) { 94 if (unsigned BitWidth = Ty->getScalarSizeInBits()) 95 return BitWidth; 96 97 return DL.getPointerTypeSizeInBits(Ty); 98 } 99 100 // Given the provided Value and, potentially, a context instruction, return 101 // the preferred context instruction (if any). 102 static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) { 103 // If we've been provided with a context instruction, then use that (provided 104 // it has been inserted). 105 if (CxtI && CxtI->getParent()) 106 return CxtI; 107 108 // If the value is really an already-inserted instruction, then use that. 109 CxtI = dyn_cast<Instruction>(V); 110 if (CxtI && CxtI->getParent()) 111 return CxtI; 112 113 return nullptr; 114 } 115 116 static const Instruction *safeCxtI(const Value *V1, const Value *V2, const Instruction *CxtI) { 117 // If we've been provided with a context instruction, then use that (provided 118 // it has been inserted). 119 if (CxtI && CxtI->getParent()) 120 return CxtI; 121 122 // If the value is really an already-inserted instruction, then use that. 123 CxtI = dyn_cast<Instruction>(V1); 124 if (CxtI && CxtI->getParent()) 125 return CxtI; 126 127 CxtI = dyn_cast<Instruction>(V2); 128 if (CxtI && CxtI->getParent()) 129 return CxtI; 130 131 return nullptr; 132 } 133 134 static bool getShuffleDemandedElts(const ShuffleVectorInst *Shuf, 135 const APInt &DemandedElts, 136 APInt &DemandedLHS, APInt &DemandedRHS) { 137 if (isa<ScalableVectorType>(Shuf->getType())) { 138 assert(DemandedElts == APInt(1,1)); 139 DemandedLHS = DemandedRHS = DemandedElts; 140 return true; 141 } 142 143 int NumElts = 144 cast<FixedVectorType>(Shuf->getOperand(0)->getType())->getNumElements(); 145 return llvm::getShuffleDemandedElts(NumElts, Shuf->getShuffleMask(), 146 DemandedElts, DemandedLHS, DemandedRHS); 147 } 148 149 static void computeKnownBits(const Value *V, const APInt &DemandedElts, 150 KnownBits &Known, unsigned Depth, 151 const SimplifyQuery &Q); 152 153 void llvm::computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, 154 const SimplifyQuery &Q) { 155 // Since the number of lanes in a scalable vector is unknown at compile time, 156 // we track one bit which is implicitly broadcast to all lanes. This means 157 // that all lanes in a scalable vector are considered demanded. 158 auto *FVTy = dyn_cast<FixedVectorType>(V->getType()); 159 APInt DemandedElts = 160 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1); 161 ::computeKnownBits(V, DemandedElts, Known, Depth, Q); 162 } 163 164 void llvm::computeKnownBits(const Value *V, KnownBits &Known, 165 const DataLayout &DL, unsigned Depth, 166 AssumptionCache *AC, const Instruction *CxtI, 167 const DominatorTree *DT, bool UseInstrInfo) { 168 computeKnownBits( 169 V, Known, Depth, 170 SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); 171 } 172 173 KnownBits llvm::computeKnownBits(const Value *V, const DataLayout &DL, 174 unsigned Depth, AssumptionCache *AC, 175 const Instruction *CxtI, 176 const DominatorTree *DT, bool UseInstrInfo) { 177 return computeKnownBits( 178 V, Depth, SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); 179 } 180 181 KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts, 182 const DataLayout &DL, unsigned Depth, 183 AssumptionCache *AC, const Instruction *CxtI, 184 const DominatorTree *DT, bool UseInstrInfo) { 185 return computeKnownBits( 186 V, DemandedElts, Depth, 187 SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); 188 } 189 190 static bool haveNoCommonBitsSetSpecialCases(const Value *LHS, const Value *RHS, 191 const SimplifyQuery &SQ) { 192 // Look for an inverted mask: (X & ~M) op (Y & M). 193 { 194 Value *M; 195 if (match(LHS, m_c_And(m_Not(m_Value(M)), m_Value())) && 196 match(RHS, m_c_And(m_Specific(M), m_Value())) && 197 isGuaranteedNotToBeUndef(M, SQ.AC, SQ.CxtI, SQ.DT)) 198 return true; 199 } 200 201 // X op (Y & ~X) 202 if (match(RHS, m_c_And(m_Not(m_Specific(LHS)), m_Value())) && 203 isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT)) 204 return true; 205 206 // X op ((X & Y) ^ Y) -- this is the canonical form of the previous pattern 207 // for constant Y. 208 Value *Y; 209 if (match(RHS, 210 m_c_Xor(m_c_And(m_Specific(LHS), m_Value(Y)), m_Deferred(Y))) && 211 isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT) && 212 isGuaranteedNotToBeUndef(Y, SQ.AC, SQ.CxtI, SQ.DT)) 213 return true; 214 215 // Peek through extends to find a 'not' of the other side: 216 // (ext Y) op ext(~Y) 217 if (match(LHS, m_ZExtOrSExt(m_Value(Y))) && 218 match(RHS, m_ZExtOrSExt(m_Not(m_Specific(Y)))) && 219 isGuaranteedNotToBeUndef(Y, SQ.AC, SQ.CxtI, SQ.DT)) 220 return true; 221 222 // Look for: (A & B) op ~(A | B) 223 { 224 Value *A, *B; 225 if (match(LHS, m_And(m_Value(A), m_Value(B))) && 226 match(RHS, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))) && 227 isGuaranteedNotToBeUndef(A, SQ.AC, SQ.CxtI, SQ.DT) && 228 isGuaranteedNotToBeUndef(B, SQ.AC, SQ.CxtI, SQ.DT)) 229 return true; 230 } 231 232 return false; 233 } 234 235 bool llvm::haveNoCommonBitsSet(const WithCache<const Value *> &LHSCache, 236 const WithCache<const Value *> &RHSCache, 237 const SimplifyQuery &SQ) { 238 const Value *LHS = LHSCache.getValue(); 239 const Value *RHS = RHSCache.getValue(); 240 241 assert(LHS->getType() == RHS->getType() && 242 "LHS and RHS should have the same type"); 243 assert(LHS->getType()->isIntOrIntVectorTy() && 244 "LHS and RHS should be integers"); 245 246 if (haveNoCommonBitsSetSpecialCases(LHS, RHS, SQ) || 247 haveNoCommonBitsSetSpecialCases(RHS, LHS, SQ)) 248 return true; 249 250 return KnownBits::haveNoCommonBitsSet(LHSCache.getKnownBits(SQ), 251 RHSCache.getKnownBits(SQ)); 252 } 253 254 bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *I) { 255 return !I->user_empty() && all_of(I->users(), [](const User *U) { 256 ICmpInst::Predicate P; 257 return match(U, m_ICmp(P, m_Value(), m_Zero())) && ICmpInst::isEquality(P); 258 }); 259 } 260 261 static bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, 262 const SimplifyQuery &Q); 263 264 bool llvm::isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, 265 bool OrZero, unsigned Depth, 266 AssumptionCache *AC, const Instruction *CxtI, 267 const DominatorTree *DT, bool UseInstrInfo) { 268 return ::isKnownToBeAPowerOfTwo( 269 V, OrZero, Depth, 270 SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); 271 } 272 273 static bool isKnownNonZero(const Value *V, const APInt &DemandedElts, 274 unsigned Depth, const SimplifyQuery &Q); 275 276 static bool isKnownNonZero(const Value *V, unsigned Depth, 277 const SimplifyQuery &Q); 278 279 bool llvm::isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth, 280 AssumptionCache *AC, const Instruction *CxtI, 281 const DominatorTree *DT, bool UseInstrInfo) { 282 return ::isKnownNonZero( 283 V, Depth, SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); 284 } 285 286 bool llvm::isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, 287 unsigned Depth) { 288 return computeKnownBits(V, Depth, SQ).isNonNegative(); 289 } 290 291 bool llvm::isKnownPositive(const Value *V, const SimplifyQuery &SQ, 292 unsigned Depth) { 293 if (auto *CI = dyn_cast<ConstantInt>(V)) 294 return CI->getValue().isStrictlyPositive(); 295 296 // TODO: We'd doing two recursive queries here. We should factor this such 297 // that only a single query is needed. 298 return isKnownNonNegative(V, SQ, Depth) && ::isKnownNonZero(V, Depth, SQ); 299 } 300 301 bool llvm::isKnownNegative(const Value *V, const SimplifyQuery &SQ, 302 unsigned Depth) { 303 return computeKnownBits(V, Depth, SQ).isNegative(); 304 } 305 306 static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth, 307 const SimplifyQuery &Q); 308 309 bool llvm::isKnownNonEqual(const Value *V1, const Value *V2, 310 const DataLayout &DL, AssumptionCache *AC, 311 const Instruction *CxtI, const DominatorTree *DT, 312 bool UseInstrInfo) { 313 return ::isKnownNonEqual( 314 V1, V2, 0, 315 SimplifyQuery(DL, DT, AC, safeCxtI(V2, V1, CxtI), UseInstrInfo)); 316 } 317 318 bool llvm::MaskedValueIsZero(const Value *V, const APInt &Mask, 319 const SimplifyQuery &SQ, unsigned Depth) { 320 KnownBits Known(Mask.getBitWidth()); 321 computeKnownBits(V, Known, Depth, SQ); 322 return Mask.isSubsetOf(Known.Zero); 323 } 324 325 static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts, 326 unsigned Depth, const SimplifyQuery &Q); 327 328 static unsigned ComputeNumSignBits(const Value *V, unsigned Depth, 329 const SimplifyQuery &Q) { 330 auto *FVTy = dyn_cast<FixedVectorType>(V->getType()); 331 APInt DemandedElts = 332 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1); 333 return ComputeNumSignBits(V, DemandedElts, Depth, Q); 334 } 335 336 unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL, 337 unsigned Depth, AssumptionCache *AC, 338 const Instruction *CxtI, 339 const DominatorTree *DT, bool UseInstrInfo) { 340 return ::ComputeNumSignBits( 341 V, Depth, SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); 342 } 343 344 unsigned llvm::ComputeMaxSignificantBits(const Value *V, const DataLayout &DL, 345 unsigned Depth, AssumptionCache *AC, 346 const Instruction *CxtI, 347 const DominatorTree *DT) { 348 unsigned SignBits = ComputeNumSignBits(V, DL, Depth, AC, CxtI, DT); 349 return V->getType()->getScalarSizeInBits() - SignBits + 1; 350 } 351 352 static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1, 353 bool NSW, const APInt &DemandedElts, 354 KnownBits &KnownOut, KnownBits &Known2, 355 unsigned Depth, const SimplifyQuery &Q) { 356 computeKnownBits(Op1, DemandedElts, KnownOut, Depth + 1, Q); 357 358 // If one operand is unknown and we have no nowrap information, 359 // the result will be unknown independently of the second operand. 360 if (KnownOut.isUnknown() && !NSW) 361 return; 362 363 computeKnownBits(Op0, DemandedElts, Known2, Depth + 1, Q); 364 KnownOut = KnownBits::computeForAddSub(Add, NSW, Known2, KnownOut); 365 } 366 367 static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW, 368 const APInt &DemandedElts, KnownBits &Known, 369 KnownBits &Known2, unsigned Depth, 370 const SimplifyQuery &Q) { 371 computeKnownBits(Op1, DemandedElts, Known, Depth + 1, Q); 372 computeKnownBits(Op0, DemandedElts, Known2, Depth + 1, Q); 373 374 bool isKnownNegative = false; 375 bool isKnownNonNegative = false; 376 // If the multiplication is known not to overflow, compute the sign bit. 377 if (NSW) { 378 if (Op0 == Op1) { 379 // The product of a number with itself is non-negative. 380 isKnownNonNegative = true; 381 } else { 382 bool isKnownNonNegativeOp1 = Known.isNonNegative(); 383 bool isKnownNonNegativeOp0 = Known2.isNonNegative(); 384 bool isKnownNegativeOp1 = Known.isNegative(); 385 bool isKnownNegativeOp0 = Known2.isNegative(); 386 // The product of two numbers with the same sign is non-negative. 387 isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) || 388 (isKnownNonNegativeOp1 && isKnownNonNegativeOp0); 389 // The product of a negative number and a non-negative number is either 390 // negative or zero. 391 if (!isKnownNonNegative) 392 isKnownNegative = 393 (isKnownNegativeOp1 && isKnownNonNegativeOp0 && 394 Known2.isNonZero()) || 395 (isKnownNegativeOp0 && isKnownNonNegativeOp1 && Known.isNonZero()); 396 } 397 } 398 399 bool SelfMultiply = Op0 == Op1; 400 if (SelfMultiply) 401 SelfMultiply &= 402 isGuaranteedNotToBeUndef(Op0, Q.AC, Q.CxtI, Q.DT, Depth + 1); 403 Known = KnownBits::mul(Known, Known2, SelfMultiply); 404 405 // Only make use of no-wrap flags if we failed to compute the sign bit 406 // directly. This matters if the multiplication always overflows, in 407 // which case we prefer to follow the result of the direct computation, 408 // though as the program is invoking undefined behaviour we can choose 409 // whatever we like here. 410 if (isKnownNonNegative && !Known.isNegative()) 411 Known.makeNonNegative(); 412 else if (isKnownNegative && !Known.isNonNegative()) 413 Known.makeNegative(); 414 } 415 416 void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges, 417 KnownBits &Known) { 418 unsigned BitWidth = Known.getBitWidth(); 419 unsigned NumRanges = Ranges.getNumOperands() / 2; 420 assert(NumRanges >= 1); 421 422 Known.Zero.setAllBits(); 423 Known.One.setAllBits(); 424 425 for (unsigned i = 0; i < NumRanges; ++i) { 426 ConstantInt *Lower = 427 mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0)); 428 ConstantInt *Upper = 429 mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1)); 430 ConstantRange Range(Lower->getValue(), Upper->getValue()); 431 432 // The first CommonPrefixBits of all values in Range are equal. 433 unsigned CommonPrefixBits = 434 (Range.getUnsignedMax() ^ Range.getUnsignedMin()).countl_zero(); 435 APInt Mask = APInt::getHighBitsSet(BitWidth, CommonPrefixBits); 436 APInt UnsignedMax = Range.getUnsignedMax().zextOrTrunc(BitWidth); 437 Known.One &= UnsignedMax & Mask; 438 Known.Zero &= ~UnsignedMax & Mask; 439 } 440 } 441 442 static bool isEphemeralValueOf(const Instruction *I, const Value *E) { 443 SmallVector<const Value *, 16> WorkSet(1, I); 444 SmallPtrSet<const Value *, 32> Visited; 445 SmallPtrSet<const Value *, 16> EphValues; 446 447 // The instruction defining an assumption's condition itself is always 448 // considered ephemeral to that assumption (even if it has other 449 // non-ephemeral users). See r246696's test case for an example. 450 if (is_contained(I->operands(), E)) 451 return true; 452 453 while (!WorkSet.empty()) { 454 const Value *V = WorkSet.pop_back_val(); 455 if (!Visited.insert(V).second) 456 continue; 457 458 // If all uses of this value are ephemeral, then so is this value. 459 if (llvm::all_of(V->users(), [&](const User *U) { 460 return EphValues.count(U); 461 })) { 462 if (V == E) 463 return true; 464 465 if (V == I || (isa<Instruction>(V) && 466 !cast<Instruction>(V)->mayHaveSideEffects() && 467 !cast<Instruction>(V)->isTerminator())) { 468 EphValues.insert(V); 469 if (const User *U = dyn_cast<User>(V)) 470 append_range(WorkSet, U->operands()); 471 } 472 } 473 } 474 475 return false; 476 } 477 478 // Is this an intrinsic that cannot be speculated but also cannot trap? 479 bool llvm::isAssumeLikeIntrinsic(const Instruction *I) { 480 if (const IntrinsicInst *CI = dyn_cast<IntrinsicInst>(I)) 481 return CI->isAssumeLikeIntrinsic(); 482 483 return false; 484 } 485 486 bool llvm::isValidAssumeForContext(const Instruction *Inv, 487 const Instruction *CxtI, 488 const DominatorTree *DT, 489 bool AllowEphemerals) { 490 // There are two restrictions on the use of an assume: 491 // 1. The assume must dominate the context (or the control flow must 492 // reach the assume whenever it reaches the context). 493 // 2. The context must not be in the assume's set of ephemeral values 494 // (otherwise we will use the assume to prove that the condition 495 // feeding the assume is trivially true, thus causing the removal of 496 // the assume). 497 498 if (Inv->getParent() == CxtI->getParent()) { 499 // If Inv and CtxI are in the same block, check if the assume (Inv) is first 500 // in the BB. 501 if (Inv->comesBefore(CxtI)) 502 return true; 503 504 // Don't let an assume affect itself - this would cause the problems 505 // `isEphemeralValueOf` is trying to prevent, and it would also make 506 // the loop below go out of bounds. 507 if (!AllowEphemerals && Inv == CxtI) 508 return false; 509 510 // The context comes first, but they're both in the same block. 511 // Make sure there is nothing in between that might interrupt 512 // the control flow, not even CxtI itself. 513 // We limit the scan distance between the assume and its context instruction 514 // to avoid a compile-time explosion. This limit is chosen arbitrarily, so 515 // it can be adjusted if needed (could be turned into a cl::opt). 516 auto Range = make_range(CxtI->getIterator(), Inv->getIterator()); 517 if (!isGuaranteedToTransferExecutionToSuccessor(Range, 15)) 518 return false; 519 520 return AllowEphemerals || !isEphemeralValueOf(Inv, CxtI); 521 } 522 523 // Inv and CxtI are in different blocks. 524 if (DT) { 525 if (DT->dominates(Inv, CxtI)) 526 return true; 527 } else if (Inv->getParent() == CxtI->getParent()->getSinglePredecessor()) { 528 // We don't have a DT, but this trivially dominates. 529 return true; 530 } 531 532 return false; 533 } 534 535 // TODO: cmpExcludesZero misses many cases where `RHS` is non-constant but 536 // we still have enough information about `RHS` to conclude non-zero. For 537 // example Pred=EQ, RHS=isKnownNonZero. cmpExcludesZero is called in loops 538 // so the extra compile time may not be worth it, but possibly a second API 539 // should be created for use outside of loops. 540 static bool cmpExcludesZero(CmpInst::Predicate Pred, const Value *RHS) { 541 // v u> y implies v != 0. 542 if (Pred == ICmpInst::ICMP_UGT) 543 return true; 544 545 // Special-case v != 0 to also handle v != null. 546 if (Pred == ICmpInst::ICMP_NE) 547 return match(RHS, m_Zero()); 548 549 // All other predicates - rely on generic ConstantRange handling. 550 const APInt *C; 551 auto Zero = APInt::getZero(RHS->getType()->getScalarSizeInBits()); 552 if (match(RHS, m_APInt(C))) { 553 ConstantRange TrueValues = ConstantRange::makeExactICmpRegion(Pred, *C); 554 return !TrueValues.contains(Zero); 555 } 556 557 auto *VC = dyn_cast<ConstantDataVector>(RHS); 558 if (VC == nullptr) 559 return false; 560 561 for (unsigned ElemIdx = 0, NElem = VC->getNumElements(); ElemIdx < NElem; 562 ++ElemIdx) { 563 ConstantRange TrueValues = ConstantRange::makeExactICmpRegion( 564 Pred, VC->getElementAsAPInt(ElemIdx)); 565 if (TrueValues.contains(Zero)) 566 return false; 567 } 568 return true; 569 } 570 571 static bool isKnownNonZeroFromAssume(const Value *V, const SimplifyQuery &Q) { 572 // Use of assumptions is context-sensitive. If we don't have a context, we 573 // cannot use them! 574 if (!Q.AC || !Q.CxtI) 575 return false; 576 577 for (AssumptionCache::ResultElem &Elem : Q.AC->assumptionsFor(V)) { 578 if (!Elem.Assume) 579 continue; 580 581 AssumeInst *I = cast<AssumeInst>(Elem.Assume); 582 assert(I->getFunction() == Q.CxtI->getFunction() && 583 "Got assumption for the wrong function!"); 584 585 if (Elem.Index != AssumptionCache::ExprResultIdx) { 586 if (!V->getType()->isPointerTy()) 587 continue; 588 if (RetainedKnowledge RK = getKnowledgeFromBundle( 589 *I, I->bundle_op_info_begin()[Elem.Index])) { 590 if (RK.WasOn == V && 591 (RK.AttrKind == Attribute::NonNull || 592 (RK.AttrKind == Attribute::Dereferenceable && 593 !NullPointerIsDefined(Q.CxtI->getFunction(), 594 V->getType()->getPointerAddressSpace()))) && 595 isValidAssumeForContext(I, Q.CxtI, Q.DT)) 596 return true; 597 } 598 continue; 599 } 600 601 // Warning: This loop can end up being somewhat performance sensitive. 602 // We're running this loop for once for each value queried resulting in a 603 // runtime of ~O(#assumes * #values). 604 605 Value *RHS; 606 CmpInst::Predicate Pred; 607 auto m_V = m_CombineOr(m_Specific(V), m_PtrToInt(m_Specific(V))); 608 if (!match(I->getArgOperand(0), m_c_ICmp(Pred, m_V, m_Value(RHS)))) 609 return false; 610 611 if (cmpExcludesZero(Pred, RHS) && isValidAssumeForContext(I, Q.CxtI, Q.DT)) 612 return true; 613 } 614 615 return false; 616 } 617 618 static void computeKnownBitsFromCmp(const Value *V, CmpInst::Predicate Pred, 619 Value *LHS, Value *RHS, KnownBits &Known, 620 const SimplifyQuery &Q) { 621 if (RHS->getType()->isPointerTy()) { 622 // Handle comparison of pointer to null explicitly, as it will not be 623 // covered by the m_APInt() logic below. 624 if (LHS == V && match(RHS, m_Zero())) { 625 switch (Pred) { 626 case ICmpInst::ICMP_EQ: 627 Known.setAllZero(); 628 break; 629 case ICmpInst::ICMP_SGE: 630 case ICmpInst::ICMP_SGT: 631 Known.makeNonNegative(); 632 break; 633 case ICmpInst::ICMP_SLT: 634 Known.makeNegative(); 635 break; 636 default: 637 break; 638 } 639 } 640 return; 641 } 642 643 unsigned BitWidth = Known.getBitWidth(); 644 auto m_V = 645 m_CombineOr(m_Specific(V), m_PtrToIntSameSize(Q.DL, m_Specific(V))); 646 647 const APInt *Mask, *C; 648 uint64_t ShAmt; 649 switch (Pred) { 650 case ICmpInst::ICMP_EQ: 651 // assume(V = C) 652 if (match(LHS, m_V) && match(RHS, m_APInt(C))) { 653 Known = Known.unionWith(KnownBits::makeConstant(*C)); 654 // assume(V & Mask = C) 655 } else if (match(LHS, m_And(m_V, m_APInt(Mask))) && 656 match(RHS, m_APInt(C))) { 657 // For one bits in Mask, we can propagate bits from C to V. 658 Known.Zero |= ~*C & *Mask; 659 Known.One |= *C & *Mask; 660 // assume(V | Mask = C) 661 } else if (match(LHS, m_Or(m_V, m_APInt(Mask))) && match(RHS, m_APInt(C))) { 662 // For zero bits in Mask, we can propagate bits from C to V. 663 Known.Zero |= ~*C & ~*Mask; 664 Known.One |= *C & ~*Mask; 665 // assume(V ^ Mask = C) 666 } else if (match(LHS, m_Xor(m_V, m_APInt(Mask))) && 667 match(RHS, m_APInt(C))) { 668 // Equivalent to assume(V == Mask ^ C) 669 Known = Known.unionWith(KnownBits::makeConstant(*C ^ *Mask)); 670 // assume(V << ShAmt = C) 671 } else if (match(LHS, m_Shl(m_V, m_ConstantInt(ShAmt))) && 672 match(RHS, m_APInt(C)) && ShAmt < BitWidth) { 673 // For those bits in C that are known, we can propagate them to known 674 // bits in V shifted to the right by ShAmt. 675 KnownBits RHSKnown = KnownBits::makeConstant(*C); 676 RHSKnown.Zero.lshrInPlace(ShAmt); 677 RHSKnown.One.lshrInPlace(ShAmt); 678 Known = Known.unionWith(RHSKnown); 679 // assume(V >> ShAmt = C) 680 } else if (match(LHS, m_Shr(m_V, m_ConstantInt(ShAmt))) && 681 match(RHS, m_APInt(C)) && ShAmt < BitWidth) { 682 KnownBits RHSKnown = KnownBits::makeConstant(*C); 683 // For those bits in RHS that are known, we can propagate them to known 684 // bits in V shifted to the right by C. 685 Known.Zero |= RHSKnown.Zero << ShAmt; 686 Known.One |= RHSKnown.One << ShAmt; 687 } 688 break; 689 case ICmpInst::ICMP_NE: { 690 // assume (V & B != 0) where B is a power of 2 691 const APInt *BPow2; 692 if (match(LHS, m_And(m_V, m_Power2(BPow2))) && match(RHS, m_Zero())) 693 Known.One |= *BPow2; 694 break; 695 } 696 default: 697 const APInt *Offset = nullptr; 698 if (match(LHS, m_CombineOr(m_V, m_Add(m_V, m_APInt(Offset)))) && 699 match(RHS, m_APInt(C))) { 700 ConstantRange LHSRange = ConstantRange::makeAllowedICmpRegion(Pred, *C); 701 if (Offset) 702 LHSRange = LHSRange.sub(*Offset); 703 Known = Known.unionWith(LHSRange.toKnownBits()); 704 } 705 break; 706 } 707 } 708 709 void llvm::computeKnownBitsFromContext(const Value *V, KnownBits &Known, 710 unsigned Depth, const SimplifyQuery &Q) { 711 if (!Q.CxtI) 712 return; 713 714 if (Q.DC && Q.DT) { 715 // Handle dominating conditions. 716 for (BranchInst *BI : Q.DC->conditionsFor(V)) { 717 auto *Cmp = dyn_cast<ICmpInst>(BI->getCondition()); 718 if (!Cmp) 719 continue; 720 721 BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(0)); 722 if (Q.DT->dominates(Edge0, Q.CxtI->getParent())) 723 computeKnownBitsFromCmp(V, Cmp->getPredicate(), Cmp->getOperand(0), 724 Cmp->getOperand(1), Known, Q); 725 726 BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(1)); 727 if (Q.DT->dominates(Edge1, Q.CxtI->getParent())) 728 computeKnownBitsFromCmp(V, Cmp->getInversePredicate(), 729 Cmp->getOperand(0), Cmp->getOperand(1), Known, 730 Q); 731 } 732 733 if (Known.hasConflict()) 734 Known.resetAll(); 735 } 736 737 if (!Q.AC) 738 return; 739 740 unsigned BitWidth = Known.getBitWidth(); 741 742 // Note that the patterns below need to be kept in sync with the code 743 // in AssumptionCache::updateAffectedValues. 744 745 for (AssumptionCache::ResultElem &Elem : Q.AC->assumptionsFor(V)) { 746 if (!Elem.Assume) 747 continue; 748 749 AssumeInst *I = cast<AssumeInst>(Elem.Assume); 750 assert(I->getParent()->getParent() == Q.CxtI->getParent()->getParent() && 751 "Got assumption for the wrong function!"); 752 753 if (Elem.Index != AssumptionCache::ExprResultIdx) { 754 if (!V->getType()->isPointerTy()) 755 continue; 756 if (RetainedKnowledge RK = getKnowledgeFromBundle( 757 *I, I->bundle_op_info_begin()[Elem.Index])) { 758 if (RK.WasOn == V && RK.AttrKind == Attribute::Alignment && 759 isPowerOf2_64(RK.ArgValue) && 760 isValidAssumeForContext(I, Q.CxtI, Q.DT)) 761 Known.Zero.setLowBits(Log2_64(RK.ArgValue)); 762 } 763 continue; 764 } 765 766 // Warning: This loop can end up being somewhat performance sensitive. 767 // We're running this loop for once for each value queried resulting in a 768 // runtime of ~O(#assumes * #values). 769 770 Value *Arg = I->getArgOperand(0); 771 772 if (Arg == V && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { 773 assert(BitWidth == 1 && "assume operand is not i1?"); 774 (void)BitWidth; 775 Known.setAllOnes(); 776 return; 777 } 778 if (match(Arg, m_Not(m_Specific(V))) && 779 isValidAssumeForContext(I, Q.CxtI, Q.DT)) { 780 assert(BitWidth == 1 && "assume operand is not i1?"); 781 (void)BitWidth; 782 Known.setAllZero(); 783 return; 784 } 785 786 // The remaining tests are all recursive, so bail out if we hit the limit. 787 if (Depth == MaxAnalysisRecursionDepth) 788 continue; 789 790 ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg); 791 if (!Cmp) 792 continue; 793 794 if (!isValidAssumeForContext(I, Q.CxtI, Q.DT)) 795 continue; 796 797 computeKnownBitsFromCmp(V, Cmp->getPredicate(), Cmp->getOperand(0), 798 Cmp->getOperand(1), Known, Q); 799 } 800 801 // Conflicting assumption: Undefined behavior will occur on this execution 802 // path. 803 if (Known.hasConflict()) 804 Known.resetAll(); 805 } 806 807 /// Compute known bits from a shift operator, including those with a 808 /// non-constant shift amount. Known is the output of this function. Known2 is a 809 /// pre-allocated temporary with the same bit width as Known and on return 810 /// contains the known bit of the shift value source. KF is an 811 /// operator-specific function that, given the known-bits and a shift amount, 812 /// compute the implied known-bits of the shift operator's result respectively 813 /// for that shift amount. The results from calling KF are conservatively 814 /// combined for all permitted shift amounts. 815 static void computeKnownBitsFromShiftOperator( 816 const Operator *I, const APInt &DemandedElts, KnownBits &Known, 817 KnownBits &Known2, unsigned Depth, const SimplifyQuery &Q, 818 function_ref<KnownBits(const KnownBits &, const KnownBits &, bool)> KF) { 819 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); 820 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); 821 // To limit compile-time impact, only query isKnownNonZero() if we know at 822 // least something about the shift amount. 823 bool ShAmtNonZero = 824 Known.isNonZero() || 825 (Known.getMaxValue().ult(Known.getBitWidth()) && 826 isKnownNonZero(I->getOperand(1), DemandedElts, Depth + 1, Q)); 827 Known = KF(Known2, Known, ShAmtNonZero); 828 } 829 830 static KnownBits 831 getKnownBitsFromAndXorOr(const Operator *I, const APInt &DemandedElts, 832 const KnownBits &KnownLHS, const KnownBits &KnownRHS, 833 unsigned Depth, const SimplifyQuery &Q) { 834 unsigned BitWidth = KnownLHS.getBitWidth(); 835 KnownBits KnownOut(BitWidth); 836 bool IsAnd = false; 837 bool HasKnownOne = !KnownLHS.One.isZero() || !KnownRHS.One.isZero(); 838 Value *X = nullptr, *Y = nullptr; 839 840 switch (I->getOpcode()) { 841 case Instruction::And: 842 KnownOut = KnownLHS & KnownRHS; 843 IsAnd = true; 844 // and(x, -x) is common idioms that will clear all but lowest set 845 // bit. If we have a single known bit in x, we can clear all bits 846 // above it. 847 // TODO: instcombine often reassociates independent `and` which can hide 848 // this pattern. Try to match and(x, and(-x, y)) / and(and(x, y), -x). 849 if (HasKnownOne && match(I, m_c_And(m_Value(X), m_Neg(m_Deferred(X))))) { 850 // -(-x) == x so using whichever (LHS/RHS) gets us a better result. 851 if (KnownLHS.countMaxTrailingZeros() <= KnownRHS.countMaxTrailingZeros()) 852 KnownOut = KnownLHS.blsi(); 853 else 854 KnownOut = KnownRHS.blsi(); 855 } 856 break; 857 case Instruction::Or: 858 KnownOut = KnownLHS | KnownRHS; 859 break; 860 case Instruction::Xor: 861 KnownOut = KnownLHS ^ KnownRHS; 862 // xor(x, x-1) is common idioms that will clear all but lowest set 863 // bit. If we have a single known bit in x, we can clear all bits 864 // above it. 865 // TODO: xor(x, x-1) is often rewritting as xor(x, x-C) where C != 866 // -1 but for the purpose of demanded bits (xor(x, x-C) & 867 // Demanded) == (xor(x, x-1) & Demanded). Extend the xor pattern 868 // to use arbitrary C if xor(x, x-C) as the same as xor(x, x-1). 869 if (HasKnownOne && 870 match(I, m_c_Xor(m_Value(X), m_c_Add(m_Deferred(X), m_AllOnes())))) { 871 const KnownBits &XBits = I->getOperand(0) == X ? KnownLHS : KnownRHS; 872 KnownOut = XBits.blsmsk(); 873 } 874 break; 875 default: 876 llvm_unreachable("Invalid Op used in 'analyzeKnownBitsFromAndXorOr'"); 877 } 878 879 // and(x, add (x, -1)) is a common idiom that always clears the low bit; 880 // xor/or(x, add (x, -1)) is an idiom that will always set the low bit. 881 // here we handle the more general case of adding any odd number by 882 // matching the form and/xor/or(x, add(x, y)) where y is odd. 883 // TODO: This could be generalized to clearing any bit set in y where the 884 // following bit is known to be unset in y. 885 if (!KnownOut.Zero[0] && !KnownOut.One[0] && 886 (match(I, m_c_BinOp(m_Value(X), m_c_Add(m_Deferred(X), m_Value(Y)))) || 887 match(I, m_c_BinOp(m_Value(X), m_Sub(m_Deferred(X), m_Value(Y)))) || 888 match(I, m_c_BinOp(m_Value(X), m_Sub(m_Value(Y), m_Deferred(X)))))) { 889 KnownBits KnownY(BitWidth); 890 computeKnownBits(Y, DemandedElts, KnownY, Depth + 1, Q); 891 if (KnownY.countMinTrailingOnes() > 0) { 892 if (IsAnd) 893 KnownOut.Zero.setBit(0); 894 else 895 KnownOut.One.setBit(0); 896 } 897 } 898 return KnownOut; 899 } 900 901 // Public so this can be used in `SimplifyDemandedUseBits`. 902 KnownBits llvm::analyzeKnownBitsFromAndXorOr(const Operator *I, 903 const KnownBits &KnownLHS, 904 const KnownBits &KnownRHS, 905 unsigned Depth, 906 const SimplifyQuery &SQ) { 907 auto *FVTy = dyn_cast<FixedVectorType>(I->getType()); 908 APInt DemandedElts = 909 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1); 910 911 return getKnownBitsFromAndXorOr(I, DemandedElts, KnownLHS, KnownRHS, Depth, 912 SQ); 913 } 914 915 ConstantRange llvm::getVScaleRange(const Function *F, unsigned BitWidth) { 916 Attribute Attr = F->getFnAttribute(Attribute::VScaleRange); 917 // Without vscale_range, we only know that vscale is non-zero. 918 if (!Attr.isValid()) 919 return ConstantRange(APInt(BitWidth, 1), APInt::getZero(BitWidth)); 920 921 unsigned AttrMin = Attr.getVScaleRangeMin(); 922 // Minimum is larger than vscale width, result is always poison. 923 if ((unsigned)llvm::bit_width(AttrMin) > BitWidth) 924 return ConstantRange::getEmpty(BitWidth); 925 926 APInt Min(BitWidth, AttrMin); 927 std::optional<unsigned> AttrMax = Attr.getVScaleRangeMax(); 928 if (!AttrMax || (unsigned)llvm::bit_width(*AttrMax) > BitWidth) 929 return ConstantRange(Min, APInt::getZero(BitWidth)); 930 931 return ConstantRange(Min, APInt(BitWidth, *AttrMax) + 1); 932 } 933 934 static void computeKnownBitsFromOperator(const Operator *I, 935 const APInt &DemandedElts, 936 KnownBits &Known, unsigned Depth, 937 const SimplifyQuery &Q) { 938 unsigned BitWidth = Known.getBitWidth(); 939 940 KnownBits Known2(BitWidth); 941 switch (I->getOpcode()) { 942 default: break; 943 case Instruction::Load: 944 if (MDNode *MD = 945 Q.IIQ.getMetadata(cast<LoadInst>(I), LLVMContext::MD_range)) 946 computeKnownBitsFromRangeMetadata(*MD, Known); 947 break; 948 case Instruction::And: 949 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); 950 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); 951 952 Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Depth, Q); 953 break; 954 case Instruction::Or: 955 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); 956 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); 957 958 Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Depth, Q); 959 break; 960 case Instruction::Xor: 961 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); 962 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); 963 964 Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Depth, Q); 965 break; 966 case Instruction::Mul: { 967 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I)); 968 computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, DemandedElts, 969 Known, Known2, Depth, Q); 970 break; 971 } 972 case Instruction::UDiv: { 973 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 974 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 975 Known = 976 KnownBits::udiv(Known, Known2, Q.IIQ.isExact(cast<BinaryOperator>(I))); 977 break; 978 } 979 case Instruction::SDiv: { 980 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 981 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 982 Known = 983 KnownBits::sdiv(Known, Known2, Q.IIQ.isExact(cast<BinaryOperator>(I))); 984 break; 985 } 986 case Instruction::Select: { 987 computeKnownBits(I->getOperand(2), Known, Depth + 1, Q); 988 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 989 990 // Only known if known in both the LHS and RHS. 991 Known = Known.intersectWith(Known2); 992 break; 993 } 994 case Instruction::FPTrunc: 995 case Instruction::FPExt: 996 case Instruction::FPToUI: 997 case Instruction::FPToSI: 998 case Instruction::SIToFP: 999 case Instruction::UIToFP: 1000 break; // Can't work with floating point. 1001 case Instruction::PtrToInt: 1002 case Instruction::IntToPtr: 1003 // Fall through and handle them the same as zext/trunc. 1004 [[fallthrough]]; 1005 case Instruction::ZExt: 1006 case Instruction::Trunc: { 1007 Type *SrcTy = I->getOperand(0)->getType(); 1008 1009 unsigned SrcBitWidth; 1010 // Note that we handle pointer operands here because of inttoptr/ptrtoint 1011 // which fall through here. 1012 Type *ScalarTy = SrcTy->getScalarType(); 1013 SrcBitWidth = ScalarTy->isPointerTy() ? 1014 Q.DL.getPointerTypeSizeInBits(ScalarTy) : 1015 Q.DL.getTypeSizeInBits(ScalarTy); 1016 1017 assert(SrcBitWidth && "SrcBitWidth can't be zero"); 1018 Known = Known.anyextOrTrunc(SrcBitWidth); 1019 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1020 if (auto *Inst = dyn_cast<PossiblyNonNegInst>(I); 1021 Inst && Inst->hasNonNeg() && !Known.isNegative()) 1022 Known.makeNonNegative(); 1023 Known = Known.zextOrTrunc(BitWidth); 1024 break; 1025 } 1026 case Instruction::BitCast: { 1027 Type *SrcTy = I->getOperand(0)->getType(); 1028 if (SrcTy->isIntOrPtrTy() && 1029 // TODO: For now, not handling conversions like: 1030 // (bitcast i64 %x to <2 x i32>) 1031 !I->getType()->isVectorTy()) { 1032 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1033 break; 1034 } 1035 1036 // Handle cast from vector integer type to scalar or vector integer. 1037 auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcTy); 1038 if (!SrcVecTy || !SrcVecTy->getElementType()->isIntegerTy() || 1039 !I->getType()->isIntOrIntVectorTy() || 1040 isa<ScalableVectorType>(I->getType())) 1041 break; 1042 1043 // Look through a cast from narrow vector elements to wider type. 1044 // Examples: v4i32 -> v2i64, v3i8 -> v24 1045 unsigned SubBitWidth = SrcVecTy->getScalarSizeInBits(); 1046 if (BitWidth % SubBitWidth == 0) { 1047 // Known bits are automatically intersected across demanded elements of a 1048 // vector. So for example, if a bit is computed as known zero, it must be 1049 // zero across all demanded elements of the vector. 1050 // 1051 // For this bitcast, each demanded element of the output is sub-divided 1052 // across a set of smaller vector elements in the source vector. To get 1053 // the known bits for an entire element of the output, compute the known 1054 // bits for each sub-element sequentially. This is done by shifting the 1055 // one-set-bit demanded elements parameter across the sub-elements for 1056 // consecutive calls to computeKnownBits. We are using the demanded 1057 // elements parameter as a mask operator. 1058 // 1059 // The known bits of each sub-element are then inserted into place 1060 // (dependent on endian) to form the full result of known bits. 1061 unsigned NumElts = DemandedElts.getBitWidth(); 1062 unsigned SubScale = BitWidth / SubBitWidth; 1063 APInt SubDemandedElts = APInt::getZero(NumElts * SubScale); 1064 for (unsigned i = 0; i != NumElts; ++i) { 1065 if (DemandedElts[i]) 1066 SubDemandedElts.setBit(i * SubScale); 1067 } 1068 1069 KnownBits KnownSrc(SubBitWidth); 1070 for (unsigned i = 0; i != SubScale; ++i) { 1071 computeKnownBits(I->getOperand(0), SubDemandedElts.shl(i), KnownSrc, 1072 Depth + 1, Q); 1073 unsigned ShiftElt = Q.DL.isLittleEndian() ? i : SubScale - 1 - i; 1074 Known.insertBits(KnownSrc, ShiftElt * SubBitWidth); 1075 } 1076 } 1077 break; 1078 } 1079 case Instruction::SExt: { 1080 // Compute the bits in the result that are not present in the input. 1081 unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits(); 1082 1083 Known = Known.trunc(SrcBitWidth); 1084 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1085 // If the sign bit of the input is known set or clear, then we know the 1086 // top bits of the result. 1087 Known = Known.sext(BitWidth); 1088 break; 1089 } 1090 case Instruction::Shl: { 1091 bool NUW = Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(I)); 1092 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I)); 1093 auto KF = [NUW, NSW](const KnownBits &KnownVal, const KnownBits &KnownAmt, 1094 bool ShAmtNonZero) { 1095 return KnownBits::shl(KnownVal, KnownAmt, NUW, NSW, ShAmtNonZero); 1096 }; 1097 computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q, 1098 KF); 1099 // Trailing zeros of a right-shifted constant never decrease. 1100 const APInt *C; 1101 if (match(I->getOperand(0), m_APInt(C))) 1102 Known.Zero.setLowBits(C->countr_zero()); 1103 break; 1104 } 1105 case Instruction::LShr: { 1106 auto KF = [](const KnownBits &KnownVal, const KnownBits &KnownAmt, 1107 bool ShAmtNonZero) { 1108 return KnownBits::lshr(KnownVal, KnownAmt, ShAmtNonZero); 1109 }; 1110 computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q, 1111 KF); 1112 // Leading zeros of a left-shifted constant never decrease. 1113 const APInt *C; 1114 if (match(I->getOperand(0), m_APInt(C))) 1115 Known.Zero.setHighBits(C->countl_zero()); 1116 break; 1117 } 1118 case Instruction::AShr: { 1119 auto KF = [](const KnownBits &KnownVal, const KnownBits &KnownAmt, 1120 bool ShAmtNonZero) { 1121 return KnownBits::ashr(KnownVal, KnownAmt, ShAmtNonZero); 1122 }; 1123 computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q, 1124 KF); 1125 break; 1126 } 1127 case Instruction::Sub: { 1128 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I)); 1129 computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, 1130 DemandedElts, Known, Known2, Depth, Q); 1131 break; 1132 } 1133 case Instruction::Add: { 1134 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I)); 1135 computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW, 1136 DemandedElts, Known, Known2, Depth, Q); 1137 break; 1138 } 1139 case Instruction::SRem: 1140 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1141 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1142 Known = KnownBits::srem(Known, Known2); 1143 break; 1144 1145 case Instruction::URem: 1146 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1147 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1148 Known = KnownBits::urem(Known, Known2); 1149 break; 1150 case Instruction::Alloca: 1151 Known.Zero.setLowBits(Log2(cast<AllocaInst>(I)->getAlign())); 1152 break; 1153 case Instruction::GetElementPtr: { 1154 // Analyze all of the subscripts of this getelementptr instruction 1155 // to determine if we can prove known low zero bits. 1156 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1157 // Accumulate the constant indices in a separate variable 1158 // to minimize the number of calls to computeForAddSub. 1159 APInt AccConstIndices(BitWidth, 0, /*IsSigned*/ true); 1160 1161 gep_type_iterator GTI = gep_type_begin(I); 1162 for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) { 1163 // TrailZ can only become smaller, short-circuit if we hit zero. 1164 if (Known.isUnknown()) 1165 break; 1166 1167 Value *Index = I->getOperand(i); 1168 1169 // Handle case when index is zero. 1170 Constant *CIndex = dyn_cast<Constant>(Index); 1171 if (CIndex && CIndex->isZeroValue()) 1172 continue; 1173 1174 if (StructType *STy = GTI.getStructTypeOrNull()) { 1175 // Handle struct member offset arithmetic. 1176 1177 assert(CIndex && 1178 "Access to structure field must be known at compile time"); 1179 1180 if (CIndex->getType()->isVectorTy()) 1181 Index = CIndex->getSplatValue(); 1182 1183 unsigned Idx = cast<ConstantInt>(Index)->getZExtValue(); 1184 const StructLayout *SL = Q.DL.getStructLayout(STy); 1185 uint64_t Offset = SL->getElementOffset(Idx); 1186 AccConstIndices += Offset; 1187 continue; 1188 } 1189 1190 // Handle array index arithmetic. 1191 Type *IndexedTy = GTI.getIndexedType(); 1192 if (!IndexedTy->isSized()) { 1193 Known.resetAll(); 1194 break; 1195 } 1196 1197 unsigned IndexBitWidth = Index->getType()->getScalarSizeInBits(); 1198 KnownBits IndexBits(IndexBitWidth); 1199 computeKnownBits(Index, IndexBits, Depth + 1, Q); 1200 TypeSize IndexTypeSize = GTI.getSequentialElementStride(Q.DL); 1201 uint64_t TypeSizeInBytes = IndexTypeSize.getKnownMinValue(); 1202 KnownBits ScalingFactor(IndexBitWidth); 1203 // Multiply by current sizeof type. 1204 // &A[i] == A + i * sizeof(*A[i]). 1205 if (IndexTypeSize.isScalable()) { 1206 // For scalable types the only thing we know about sizeof is 1207 // that this is a multiple of the minimum size. 1208 ScalingFactor.Zero.setLowBits(llvm::countr_zero(TypeSizeInBytes)); 1209 } else if (IndexBits.isConstant()) { 1210 APInt IndexConst = IndexBits.getConstant(); 1211 APInt ScalingFactor(IndexBitWidth, TypeSizeInBytes); 1212 IndexConst *= ScalingFactor; 1213 AccConstIndices += IndexConst.sextOrTrunc(BitWidth); 1214 continue; 1215 } else { 1216 ScalingFactor = 1217 KnownBits::makeConstant(APInt(IndexBitWidth, TypeSizeInBytes)); 1218 } 1219 IndexBits = KnownBits::mul(IndexBits, ScalingFactor); 1220 1221 // If the offsets have a different width from the pointer, according 1222 // to the language reference we need to sign-extend or truncate them 1223 // to the width of the pointer. 1224 IndexBits = IndexBits.sextOrTrunc(BitWidth); 1225 1226 // Note that inbounds does *not* guarantee nsw for the addition, as only 1227 // the offset is signed, while the base address is unsigned. 1228 Known = KnownBits::computeForAddSub( 1229 /*Add=*/true, /*NSW=*/false, Known, IndexBits); 1230 } 1231 if (!Known.isUnknown() && !AccConstIndices.isZero()) { 1232 KnownBits Index = KnownBits::makeConstant(AccConstIndices); 1233 Known = KnownBits::computeForAddSub( 1234 /*Add=*/true, /*NSW=*/false, Known, Index); 1235 } 1236 break; 1237 } 1238 case Instruction::PHI: { 1239 const PHINode *P = cast<PHINode>(I); 1240 BinaryOperator *BO = nullptr; 1241 Value *R = nullptr, *L = nullptr; 1242 if (matchSimpleRecurrence(P, BO, R, L)) { 1243 // Handle the case of a simple two-predecessor recurrence PHI. 1244 // There's a lot more that could theoretically be done here, but 1245 // this is sufficient to catch some interesting cases. 1246 unsigned Opcode = BO->getOpcode(); 1247 1248 // If this is a shift recurrence, we know the bits being shifted in. 1249 // We can combine that with information about the start value of the 1250 // recurrence to conclude facts about the result. 1251 if ((Opcode == Instruction::LShr || Opcode == Instruction::AShr || 1252 Opcode == Instruction::Shl) && 1253 BO->getOperand(0) == I) { 1254 1255 // We have matched a recurrence of the form: 1256 // %iv = [R, %entry], [%iv.next, %backedge] 1257 // %iv.next = shift_op %iv, L 1258 1259 // Recurse with the phi context to avoid concern about whether facts 1260 // inferred hold at original context instruction. TODO: It may be 1261 // correct to use the original context. IF warranted, explore and 1262 // add sufficient tests to cover. 1263 SimplifyQuery RecQ = Q; 1264 RecQ.CxtI = P; 1265 computeKnownBits(R, DemandedElts, Known2, Depth + 1, RecQ); 1266 switch (Opcode) { 1267 case Instruction::Shl: 1268 // A shl recurrence will only increase the tailing zeros 1269 Known.Zero.setLowBits(Known2.countMinTrailingZeros()); 1270 break; 1271 case Instruction::LShr: 1272 // A lshr recurrence will preserve the leading zeros of the 1273 // start value 1274 Known.Zero.setHighBits(Known2.countMinLeadingZeros()); 1275 break; 1276 case Instruction::AShr: 1277 // An ashr recurrence will extend the initial sign bit 1278 Known.Zero.setHighBits(Known2.countMinLeadingZeros()); 1279 Known.One.setHighBits(Known2.countMinLeadingOnes()); 1280 break; 1281 }; 1282 } 1283 1284 // Check for operations that have the property that if 1285 // both their operands have low zero bits, the result 1286 // will have low zero bits. 1287 if (Opcode == Instruction::Add || 1288 Opcode == Instruction::Sub || 1289 Opcode == Instruction::And || 1290 Opcode == Instruction::Or || 1291 Opcode == Instruction::Mul) { 1292 // Change the context instruction to the "edge" that flows into the 1293 // phi. This is important because that is where the value is actually 1294 // "evaluated" even though it is used later somewhere else. (see also 1295 // D69571). 1296 SimplifyQuery RecQ = Q; 1297 1298 unsigned OpNum = P->getOperand(0) == R ? 0 : 1; 1299 Instruction *RInst = P->getIncomingBlock(OpNum)->getTerminator(); 1300 Instruction *LInst = P->getIncomingBlock(1-OpNum)->getTerminator(); 1301 1302 // Ok, we have a PHI of the form L op= R. Check for low 1303 // zero bits. 1304 RecQ.CxtI = RInst; 1305 computeKnownBits(R, Known2, Depth + 1, RecQ); 1306 1307 // We need to take the minimum number of known bits 1308 KnownBits Known3(BitWidth); 1309 RecQ.CxtI = LInst; 1310 computeKnownBits(L, Known3, Depth + 1, RecQ); 1311 1312 Known.Zero.setLowBits(std::min(Known2.countMinTrailingZeros(), 1313 Known3.countMinTrailingZeros())); 1314 1315 auto *OverflowOp = dyn_cast<OverflowingBinaryOperator>(BO); 1316 if (OverflowOp && Q.IIQ.hasNoSignedWrap(OverflowOp)) { 1317 // If initial value of recurrence is nonnegative, and we are adding 1318 // a nonnegative number with nsw, the result can only be nonnegative 1319 // or poison value regardless of the number of times we execute the 1320 // add in phi recurrence. If initial value is negative and we are 1321 // adding a negative number with nsw, the result can only be 1322 // negative or poison value. Similar arguments apply to sub and mul. 1323 // 1324 // (add non-negative, non-negative) --> non-negative 1325 // (add negative, negative) --> negative 1326 if (Opcode == Instruction::Add) { 1327 if (Known2.isNonNegative() && Known3.isNonNegative()) 1328 Known.makeNonNegative(); 1329 else if (Known2.isNegative() && Known3.isNegative()) 1330 Known.makeNegative(); 1331 } 1332 1333 // (sub nsw non-negative, negative) --> non-negative 1334 // (sub nsw negative, non-negative) --> negative 1335 else if (Opcode == Instruction::Sub && BO->getOperand(0) == I) { 1336 if (Known2.isNonNegative() && Known3.isNegative()) 1337 Known.makeNonNegative(); 1338 else if (Known2.isNegative() && Known3.isNonNegative()) 1339 Known.makeNegative(); 1340 } 1341 1342 // (mul nsw non-negative, non-negative) --> non-negative 1343 else if (Opcode == Instruction::Mul && Known2.isNonNegative() && 1344 Known3.isNonNegative()) 1345 Known.makeNonNegative(); 1346 } 1347 1348 break; 1349 } 1350 } 1351 1352 // Unreachable blocks may have zero-operand PHI nodes. 1353 if (P->getNumIncomingValues() == 0) 1354 break; 1355 1356 // Otherwise take the unions of the known bit sets of the operands, 1357 // taking conservative care to avoid excessive recursion. 1358 if (Depth < MaxAnalysisRecursionDepth - 1 && Known.isUnknown()) { 1359 // Skip if every incoming value references to ourself. 1360 if (isa_and_nonnull<UndefValue>(P->hasConstantValue())) 1361 break; 1362 1363 Known.Zero.setAllBits(); 1364 Known.One.setAllBits(); 1365 for (unsigned u = 0, e = P->getNumIncomingValues(); u < e; ++u) { 1366 Value *IncValue = P->getIncomingValue(u); 1367 // Skip direct self references. 1368 if (IncValue == P) continue; 1369 1370 // Change the context instruction to the "edge" that flows into the 1371 // phi. This is important because that is where the value is actually 1372 // "evaluated" even though it is used later somewhere else. (see also 1373 // D69571). 1374 SimplifyQuery RecQ = Q; 1375 RecQ.CxtI = P->getIncomingBlock(u)->getTerminator(); 1376 1377 Known2 = KnownBits(BitWidth); 1378 1379 // Recurse, but cap the recursion to one level, because we don't 1380 // want to waste time spinning around in loops. 1381 // TODO: See if we can base recursion limiter on number of incoming phi 1382 // edges so we don't overly clamp analysis. 1383 computeKnownBits(IncValue, Known2, MaxAnalysisRecursionDepth - 1, RecQ); 1384 1385 // See if we can further use a conditional branch into the phi 1386 // to help us determine the range of the value. 1387 if (!Known2.isConstant()) { 1388 ICmpInst::Predicate Pred; 1389 const APInt *RHSC; 1390 BasicBlock *TrueSucc, *FalseSucc; 1391 // TODO: Use RHS Value and compute range from its known bits. 1392 if (match(RecQ.CxtI, 1393 m_Br(m_c_ICmp(Pred, m_Specific(IncValue), m_APInt(RHSC)), 1394 m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) { 1395 // Check for cases of duplicate successors. 1396 if ((TrueSucc == P->getParent()) != (FalseSucc == P->getParent())) { 1397 // If we're using the false successor, invert the predicate. 1398 if (FalseSucc == P->getParent()) 1399 Pred = CmpInst::getInversePredicate(Pred); 1400 // Get the knownbits implied by the incoming phi condition. 1401 auto CR = ConstantRange::makeExactICmpRegion(Pred, *RHSC); 1402 KnownBits KnownUnion = Known2.unionWith(CR.toKnownBits()); 1403 // We can have conflicts here if we are analyzing deadcode (its 1404 // impossible for us reach this BB based the icmp). 1405 if (KnownUnion.hasConflict()) { 1406 // No reason to continue analyzing in a known dead region, so 1407 // just resetAll and break. This will cause us to also exit the 1408 // outer loop. 1409 Known.resetAll(); 1410 break; 1411 } 1412 Known2 = KnownUnion; 1413 } 1414 } 1415 } 1416 1417 Known = Known.intersectWith(Known2); 1418 // If all bits have been ruled out, there's no need to check 1419 // more operands. 1420 if (Known.isUnknown()) 1421 break; 1422 } 1423 } 1424 break; 1425 } 1426 case Instruction::Call: 1427 case Instruction::Invoke: 1428 // If range metadata is attached to this call, set known bits from that, 1429 // and then intersect with known bits based on other properties of the 1430 // function. 1431 if (MDNode *MD = 1432 Q.IIQ.getMetadata(cast<Instruction>(I), LLVMContext::MD_range)) 1433 computeKnownBitsFromRangeMetadata(*MD, Known); 1434 if (const Value *RV = cast<CallBase>(I)->getReturnedArgOperand()) { 1435 if (RV->getType() == I->getType()) { 1436 computeKnownBits(RV, Known2, Depth + 1, Q); 1437 Known = Known.unionWith(Known2); 1438 } 1439 } 1440 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 1441 switch (II->getIntrinsicID()) { 1442 default: break; 1443 case Intrinsic::abs: { 1444 computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); 1445 bool IntMinIsPoison = match(II->getArgOperand(1), m_One()); 1446 Known = Known2.abs(IntMinIsPoison); 1447 break; 1448 } 1449 case Intrinsic::bitreverse: 1450 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); 1451 Known.Zero |= Known2.Zero.reverseBits(); 1452 Known.One |= Known2.One.reverseBits(); 1453 break; 1454 case Intrinsic::bswap: 1455 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); 1456 Known.Zero |= Known2.Zero.byteSwap(); 1457 Known.One |= Known2.One.byteSwap(); 1458 break; 1459 case Intrinsic::ctlz: { 1460 computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); 1461 // If we have a known 1, its position is our upper bound. 1462 unsigned PossibleLZ = Known2.countMaxLeadingZeros(); 1463 // If this call is poison for 0 input, the result will be less than 2^n. 1464 if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext())) 1465 PossibleLZ = std::min(PossibleLZ, BitWidth - 1); 1466 unsigned LowBits = llvm::bit_width(PossibleLZ); 1467 Known.Zero.setBitsFrom(LowBits); 1468 break; 1469 } 1470 case Intrinsic::cttz: { 1471 computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); 1472 // If we have a known 1, its position is our upper bound. 1473 unsigned PossibleTZ = Known2.countMaxTrailingZeros(); 1474 // If this call is poison for 0 input, the result will be less than 2^n. 1475 if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext())) 1476 PossibleTZ = std::min(PossibleTZ, BitWidth - 1); 1477 unsigned LowBits = llvm::bit_width(PossibleTZ); 1478 Known.Zero.setBitsFrom(LowBits); 1479 break; 1480 } 1481 case Intrinsic::ctpop: { 1482 computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); 1483 // We can bound the space the count needs. Also, bits known to be zero 1484 // can't contribute to the population. 1485 unsigned BitsPossiblySet = Known2.countMaxPopulation(); 1486 unsigned LowBits = llvm::bit_width(BitsPossiblySet); 1487 Known.Zero.setBitsFrom(LowBits); 1488 // TODO: we could bound KnownOne using the lower bound on the number 1489 // of bits which might be set provided by popcnt KnownOne2. 1490 break; 1491 } 1492 case Intrinsic::fshr: 1493 case Intrinsic::fshl: { 1494 const APInt *SA; 1495 if (!match(I->getOperand(2), m_APInt(SA))) 1496 break; 1497 1498 // Normalize to funnel shift left. 1499 uint64_t ShiftAmt = SA->urem(BitWidth); 1500 if (II->getIntrinsicID() == Intrinsic::fshr) 1501 ShiftAmt = BitWidth - ShiftAmt; 1502 1503 KnownBits Known3(BitWidth); 1504 computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); 1505 computeKnownBits(I->getOperand(1), Known3, Depth + 1, Q); 1506 1507 Known.Zero = 1508 Known2.Zero.shl(ShiftAmt) | Known3.Zero.lshr(BitWidth - ShiftAmt); 1509 Known.One = 1510 Known2.One.shl(ShiftAmt) | Known3.One.lshr(BitWidth - ShiftAmt); 1511 break; 1512 } 1513 case Intrinsic::uadd_sat: 1514 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1515 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1516 Known = KnownBits::uadd_sat(Known, Known2); 1517 break; 1518 case Intrinsic::usub_sat: 1519 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1520 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1521 Known = KnownBits::usub_sat(Known, Known2); 1522 break; 1523 case Intrinsic::sadd_sat: 1524 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1525 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1526 Known = KnownBits::sadd_sat(Known, Known2); 1527 break; 1528 case Intrinsic::ssub_sat: 1529 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1530 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1531 Known = KnownBits::ssub_sat(Known, Known2); 1532 break; 1533 case Intrinsic::umin: 1534 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1535 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1536 Known = KnownBits::umin(Known, Known2); 1537 break; 1538 case Intrinsic::umax: 1539 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1540 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1541 Known = KnownBits::umax(Known, Known2); 1542 break; 1543 case Intrinsic::smin: 1544 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1545 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1546 Known = KnownBits::smin(Known, Known2); 1547 break; 1548 case Intrinsic::smax: 1549 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1550 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1551 Known = KnownBits::smax(Known, Known2); 1552 break; 1553 case Intrinsic::ptrmask: { 1554 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1555 1556 const Value *Mask = I->getOperand(1); 1557 Known2 = KnownBits(Mask->getType()->getScalarSizeInBits()); 1558 computeKnownBits(Mask, Known2, Depth + 1, Q); 1559 // TODO: 1-extend would be more precise. 1560 Known &= Known2.anyextOrTrunc(BitWidth); 1561 break; 1562 } 1563 case Intrinsic::x86_sse42_crc32_64_64: 1564 Known.Zero.setBitsFrom(32); 1565 break; 1566 case Intrinsic::riscv_vsetvli: 1567 case Intrinsic::riscv_vsetvlimax: 1568 // Assume that VL output is <= 65536. 1569 // TODO: Take SEW and LMUL into account. 1570 if (BitWidth > 17) 1571 Known.Zero.setBitsFrom(17); 1572 break; 1573 case Intrinsic::vscale: { 1574 if (!II->getParent() || !II->getFunction()) 1575 break; 1576 1577 Known = getVScaleRange(II->getFunction(), BitWidth).toKnownBits(); 1578 break; 1579 } 1580 } 1581 } 1582 break; 1583 case Instruction::ShuffleVector: { 1584 auto *Shuf = dyn_cast<ShuffleVectorInst>(I); 1585 // FIXME: Do we need to handle ConstantExpr involving shufflevectors? 1586 if (!Shuf) { 1587 Known.resetAll(); 1588 return; 1589 } 1590 // For undef elements, we don't know anything about the common state of 1591 // the shuffle result. 1592 APInt DemandedLHS, DemandedRHS; 1593 if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS)) { 1594 Known.resetAll(); 1595 return; 1596 } 1597 Known.One.setAllBits(); 1598 Known.Zero.setAllBits(); 1599 if (!!DemandedLHS) { 1600 const Value *LHS = Shuf->getOperand(0); 1601 computeKnownBits(LHS, DemandedLHS, Known, Depth + 1, Q); 1602 // If we don't know any bits, early out. 1603 if (Known.isUnknown()) 1604 break; 1605 } 1606 if (!!DemandedRHS) { 1607 const Value *RHS = Shuf->getOperand(1); 1608 computeKnownBits(RHS, DemandedRHS, Known2, Depth + 1, Q); 1609 Known = Known.intersectWith(Known2); 1610 } 1611 break; 1612 } 1613 case Instruction::InsertElement: { 1614 if (isa<ScalableVectorType>(I->getType())) { 1615 Known.resetAll(); 1616 return; 1617 } 1618 const Value *Vec = I->getOperand(0); 1619 const Value *Elt = I->getOperand(1); 1620 auto *CIdx = dyn_cast<ConstantInt>(I->getOperand(2)); 1621 // Early out if the index is non-constant or out-of-range. 1622 unsigned NumElts = DemandedElts.getBitWidth(); 1623 if (!CIdx || CIdx->getValue().uge(NumElts)) { 1624 Known.resetAll(); 1625 return; 1626 } 1627 Known.One.setAllBits(); 1628 Known.Zero.setAllBits(); 1629 unsigned EltIdx = CIdx->getZExtValue(); 1630 // Do we demand the inserted element? 1631 if (DemandedElts[EltIdx]) { 1632 computeKnownBits(Elt, Known, Depth + 1, Q); 1633 // If we don't know any bits, early out. 1634 if (Known.isUnknown()) 1635 break; 1636 } 1637 // We don't need the base vector element that has been inserted. 1638 APInt DemandedVecElts = DemandedElts; 1639 DemandedVecElts.clearBit(EltIdx); 1640 if (!!DemandedVecElts) { 1641 computeKnownBits(Vec, DemandedVecElts, Known2, Depth + 1, Q); 1642 Known = Known.intersectWith(Known2); 1643 } 1644 break; 1645 } 1646 case Instruction::ExtractElement: { 1647 // Look through extract element. If the index is non-constant or 1648 // out-of-range demand all elements, otherwise just the extracted element. 1649 const Value *Vec = I->getOperand(0); 1650 const Value *Idx = I->getOperand(1); 1651 auto *CIdx = dyn_cast<ConstantInt>(Idx); 1652 if (isa<ScalableVectorType>(Vec->getType())) { 1653 // FIXME: there's probably *something* we can do with scalable vectors 1654 Known.resetAll(); 1655 break; 1656 } 1657 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements(); 1658 APInt DemandedVecElts = APInt::getAllOnes(NumElts); 1659 if (CIdx && CIdx->getValue().ult(NumElts)) 1660 DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue()); 1661 computeKnownBits(Vec, DemandedVecElts, Known, Depth + 1, Q); 1662 break; 1663 } 1664 case Instruction::ExtractValue: 1665 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->getOperand(0))) { 1666 const ExtractValueInst *EVI = cast<ExtractValueInst>(I); 1667 if (EVI->getNumIndices() != 1) break; 1668 if (EVI->getIndices()[0] == 0) { 1669 switch (II->getIntrinsicID()) { 1670 default: break; 1671 case Intrinsic::uadd_with_overflow: 1672 case Intrinsic::sadd_with_overflow: 1673 computeKnownBitsAddSub(true, II->getArgOperand(0), 1674 II->getArgOperand(1), false, DemandedElts, 1675 Known, Known2, Depth, Q); 1676 break; 1677 case Intrinsic::usub_with_overflow: 1678 case Intrinsic::ssub_with_overflow: 1679 computeKnownBitsAddSub(false, II->getArgOperand(0), 1680 II->getArgOperand(1), false, DemandedElts, 1681 Known, Known2, Depth, Q); 1682 break; 1683 case Intrinsic::umul_with_overflow: 1684 case Intrinsic::smul_with_overflow: 1685 computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), false, 1686 DemandedElts, Known, Known2, Depth, Q); 1687 break; 1688 } 1689 } 1690 } 1691 break; 1692 case Instruction::Freeze: 1693 if (isGuaranteedNotToBePoison(I->getOperand(0), Q.AC, Q.CxtI, Q.DT, 1694 Depth + 1)) 1695 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1696 break; 1697 } 1698 } 1699 1700 /// Determine which bits of V are known to be either zero or one and return 1701 /// them. 1702 KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts, 1703 unsigned Depth, const SimplifyQuery &Q) { 1704 KnownBits Known(getBitWidth(V->getType(), Q.DL)); 1705 ::computeKnownBits(V, DemandedElts, Known, Depth, Q); 1706 return Known; 1707 } 1708 1709 /// Determine which bits of V are known to be either zero or one and return 1710 /// them. 1711 KnownBits llvm::computeKnownBits(const Value *V, unsigned Depth, 1712 const SimplifyQuery &Q) { 1713 KnownBits Known(getBitWidth(V->getType(), Q.DL)); 1714 computeKnownBits(V, Known, Depth, Q); 1715 return Known; 1716 } 1717 1718 /// Determine which bits of V are known to be either zero or one and return 1719 /// them in the Known bit set. 1720 /// 1721 /// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that 1722 /// we cannot optimize based on the assumption that it is zero without changing 1723 /// it to be an explicit zero. If we don't change it to zero, other code could 1724 /// optimized based on the contradictory assumption that it is non-zero. 1725 /// Because instcombine aggressively folds operations with undef args anyway, 1726 /// this won't lose us code quality. 1727 /// 1728 /// This function is defined on values with integer type, values with pointer 1729 /// type, and vectors of integers. In the case 1730 /// where V is a vector, known zero, and known one values are the 1731 /// same width as the vector element, and the bit is set only if it is true 1732 /// for all of the demanded elements in the vector specified by DemandedElts. 1733 void computeKnownBits(const Value *V, const APInt &DemandedElts, 1734 KnownBits &Known, unsigned Depth, 1735 const SimplifyQuery &Q) { 1736 if (!DemandedElts) { 1737 // No demanded elts, better to assume we don't know anything. 1738 Known.resetAll(); 1739 return; 1740 } 1741 1742 assert(V && "No Value?"); 1743 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); 1744 1745 #ifndef NDEBUG 1746 Type *Ty = V->getType(); 1747 unsigned BitWidth = Known.getBitWidth(); 1748 1749 assert((Ty->isIntOrIntVectorTy(BitWidth) || Ty->isPtrOrPtrVectorTy()) && 1750 "Not integer or pointer type!"); 1751 1752 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) { 1753 assert( 1754 FVTy->getNumElements() == DemandedElts.getBitWidth() && 1755 "DemandedElt width should equal the fixed vector number of elements"); 1756 } else { 1757 assert(DemandedElts == APInt(1, 1) && 1758 "DemandedElt width should be 1 for scalars or scalable vectors"); 1759 } 1760 1761 Type *ScalarTy = Ty->getScalarType(); 1762 if (ScalarTy->isPointerTy()) { 1763 assert(BitWidth == Q.DL.getPointerTypeSizeInBits(ScalarTy) && 1764 "V and Known should have same BitWidth"); 1765 } else { 1766 assert(BitWidth == Q.DL.getTypeSizeInBits(ScalarTy) && 1767 "V and Known should have same BitWidth"); 1768 } 1769 #endif 1770 1771 const APInt *C; 1772 if (match(V, m_APInt(C))) { 1773 // We know all of the bits for a scalar constant or a splat vector constant! 1774 Known = KnownBits::makeConstant(*C); 1775 return; 1776 } 1777 // Null and aggregate-zero are all-zeros. 1778 if (isa<ConstantPointerNull>(V) || isa<ConstantAggregateZero>(V)) { 1779 Known.setAllZero(); 1780 return; 1781 } 1782 // Handle a constant vector by taking the intersection of the known bits of 1783 // each element. 1784 if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(V)) { 1785 assert(!isa<ScalableVectorType>(V->getType())); 1786 // We know that CDV must be a vector of integers. Take the intersection of 1787 // each element. 1788 Known.Zero.setAllBits(); Known.One.setAllBits(); 1789 for (unsigned i = 0, e = CDV->getNumElements(); i != e; ++i) { 1790 if (!DemandedElts[i]) 1791 continue; 1792 APInt Elt = CDV->getElementAsAPInt(i); 1793 Known.Zero &= ~Elt; 1794 Known.One &= Elt; 1795 } 1796 if (Known.hasConflict()) 1797 Known.resetAll(); 1798 return; 1799 } 1800 1801 if (const auto *CV = dyn_cast<ConstantVector>(V)) { 1802 assert(!isa<ScalableVectorType>(V->getType())); 1803 // We know that CV must be a vector of integers. Take the intersection of 1804 // each element. 1805 Known.Zero.setAllBits(); Known.One.setAllBits(); 1806 for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) { 1807 if (!DemandedElts[i]) 1808 continue; 1809 Constant *Element = CV->getAggregateElement(i); 1810 if (isa<PoisonValue>(Element)) 1811 continue; 1812 auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element); 1813 if (!ElementCI) { 1814 Known.resetAll(); 1815 return; 1816 } 1817 const APInt &Elt = ElementCI->getValue(); 1818 Known.Zero &= ~Elt; 1819 Known.One &= Elt; 1820 } 1821 if (Known.hasConflict()) 1822 Known.resetAll(); 1823 return; 1824 } 1825 1826 // Start out not knowing anything. 1827 Known.resetAll(); 1828 1829 // We can't imply anything about undefs. 1830 if (isa<UndefValue>(V)) 1831 return; 1832 1833 // There's no point in looking through other users of ConstantData for 1834 // assumptions. Confirm that we've handled them all. 1835 assert(!isa<ConstantData>(V) && "Unhandled constant data!"); 1836 1837 // All recursive calls that increase depth must come after this. 1838 if (Depth == MaxAnalysisRecursionDepth) 1839 return; 1840 1841 // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has 1842 // the bits of its aliasee. 1843 if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { 1844 if (!GA->isInterposable()) 1845 computeKnownBits(GA->getAliasee(), Known, Depth + 1, Q); 1846 return; 1847 } 1848 1849 if (const Operator *I = dyn_cast<Operator>(V)) 1850 computeKnownBitsFromOperator(I, DemandedElts, Known, Depth, Q); 1851 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 1852 if (std::optional<ConstantRange> CR = GV->getAbsoluteSymbolRange()) 1853 Known = CR->toKnownBits(); 1854 } 1855 1856 // Aligned pointers have trailing zeros - refine Known.Zero set 1857 if (isa<PointerType>(V->getType())) { 1858 Align Alignment = V->getPointerAlignment(Q.DL); 1859 Known.Zero.setLowBits(Log2(Alignment)); 1860 } 1861 1862 // computeKnownBitsFromContext strictly refines Known. 1863 // Therefore, we run them after computeKnownBitsFromOperator. 1864 1865 // Check whether we can determine known bits from context such as assumes. 1866 computeKnownBitsFromContext(V, Known, Depth, Q); 1867 1868 assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); 1869 } 1870 1871 /// Try to detect a recurrence that the value of the induction variable is 1872 /// always a power of two (or zero). 1873 static bool isPowerOfTwoRecurrence(const PHINode *PN, bool OrZero, 1874 unsigned Depth, SimplifyQuery &Q) { 1875 BinaryOperator *BO = nullptr; 1876 Value *Start = nullptr, *Step = nullptr; 1877 if (!matchSimpleRecurrence(PN, BO, Start, Step)) 1878 return false; 1879 1880 // Initial value must be a power of two. 1881 for (const Use &U : PN->operands()) { 1882 if (U.get() == Start) { 1883 // Initial value comes from a different BB, need to adjust context 1884 // instruction for analysis. 1885 Q.CxtI = PN->getIncomingBlock(U)->getTerminator(); 1886 if (!isKnownToBeAPowerOfTwo(Start, OrZero, Depth, Q)) 1887 return false; 1888 } 1889 } 1890 1891 // Except for Mul, the induction variable must be on the left side of the 1892 // increment expression, otherwise its value can be arbitrary. 1893 if (BO->getOpcode() != Instruction::Mul && BO->getOperand(1) != Step) 1894 return false; 1895 1896 Q.CxtI = BO->getParent()->getTerminator(); 1897 switch (BO->getOpcode()) { 1898 case Instruction::Mul: 1899 // Power of two is closed under multiplication. 1900 return (OrZero || Q.IIQ.hasNoUnsignedWrap(BO) || 1901 Q.IIQ.hasNoSignedWrap(BO)) && 1902 isKnownToBeAPowerOfTwo(Step, OrZero, Depth, Q); 1903 case Instruction::SDiv: 1904 // Start value must not be signmask for signed division, so simply being a 1905 // power of two is not sufficient, and it has to be a constant. 1906 if (!match(Start, m_Power2()) || match(Start, m_SignMask())) 1907 return false; 1908 [[fallthrough]]; 1909 case Instruction::UDiv: 1910 // Divisor must be a power of two. 1911 // If OrZero is false, cannot guarantee induction variable is non-zero after 1912 // division, same for Shr, unless it is exact division. 1913 return (OrZero || Q.IIQ.isExact(BO)) && 1914 isKnownToBeAPowerOfTwo(Step, false, Depth, Q); 1915 case Instruction::Shl: 1916 return OrZero || Q.IIQ.hasNoUnsignedWrap(BO) || Q.IIQ.hasNoSignedWrap(BO); 1917 case Instruction::AShr: 1918 if (!match(Start, m_Power2()) || match(Start, m_SignMask())) 1919 return false; 1920 [[fallthrough]]; 1921 case Instruction::LShr: 1922 return OrZero || Q.IIQ.isExact(BO); 1923 default: 1924 return false; 1925 } 1926 } 1927 1928 /// Return true if the given value is known to have exactly one 1929 /// bit set when defined. For vectors return true if every element is known to 1930 /// be a power of two when defined. Supports values with integer or pointer 1931 /// types and vectors of integers. 1932 bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, 1933 const SimplifyQuery &Q) { 1934 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); 1935 1936 if (isa<Constant>(V)) 1937 return OrZero ? match(V, m_Power2OrZero()) : match(V, m_Power2()); 1938 1939 // i1 is by definition a power of 2 or zero. 1940 if (OrZero && V->getType()->getScalarSizeInBits() == 1) 1941 return true; 1942 1943 auto *I = dyn_cast<Instruction>(V); 1944 if (!I) 1945 return false; 1946 1947 if (Q.CxtI && match(V, m_VScale())) { 1948 const Function *F = Q.CxtI->getFunction(); 1949 // The vscale_range indicates vscale is a power-of-two. 1950 return F->hasFnAttribute(Attribute::VScaleRange); 1951 } 1952 1953 // 1 << X is clearly a power of two if the one is not shifted off the end. If 1954 // it is shifted off the end then the result is undefined. 1955 if (match(I, m_Shl(m_One(), m_Value()))) 1956 return true; 1957 1958 // (signmask) >>l X is clearly a power of two if the one is not shifted off 1959 // the bottom. If it is shifted off the bottom then the result is undefined. 1960 if (match(I, m_LShr(m_SignMask(), m_Value()))) 1961 return true; 1962 1963 // The remaining tests are all recursive, so bail out if we hit the limit. 1964 if (Depth++ == MaxAnalysisRecursionDepth) 1965 return false; 1966 1967 switch (I->getOpcode()) { 1968 case Instruction::ZExt: 1969 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q); 1970 case Instruction::Trunc: 1971 return OrZero && isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q); 1972 case Instruction::Shl: 1973 if (OrZero || Q.IIQ.hasNoUnsignedWrap(I) || Q.IIQ.hasNoSignedWrap(I)) 1974 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q); 1975 return false; 1976 case Instruction::LShr: 1977 if (OrZero || Q.IIQ.isExact(cast<BinaryOperator>(I))) 1978 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q); 1979 return false; 1980 case Instruction::UDiv: 1981 if (Q.IIQ.isExact(cast<BinaryOperator>(I))) 1982 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q); 1983 return false; 1984 case Instruction::Mul: 1985 return isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q) && 1986 isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q) && 1987 (OrZero || isKnownNonZero(I, Depth, Q)); 1988 case Instruction::And: 1989 // A power of two and'd with anything is a power of two or zero. 1990 if (OrZero && 1991 (isKnownToBeAPowerOfTwo(I->getOperand(1), /*OrZero*/ true, Depth, Q) || 1992 isKnownToBeAPowerOfTwo(I->getOperand(0), /*OrZero*/ true, Depth, Q))) 1993 return true; 1994 // X & (-X) is always a power of two or zero. 1995 if (match(I->getOperand(0), m_Neg(m_Specific(I->getOperand(1)))) || 1996 match(I->getOperand(1), m_Neg(m_Specific(I->getOperand(0))))) 1997 return OrZero || isKnownNonZero(I->getOperand(0), Depth, Q); 1998 return false; 1999 case Instruction::Add: { 2000 // Adding a power-of-two or zero to the same power-of-two or zero yields 2001 // either the original power-of-two, a larger power-of-two or zero. 2002 const OverflowingBinaryOperator *VOBO = cast<OverflowingBinaryOperator>(V); 2003 if (OrZero || Q.IIQ.hasNoUnsignedWrap(VOBO) || 2004 Q.IIQ.hasNoSignedWrap(VOBO)) { 2005 if (match(I->getOperand(0), 2006 m_c_And(m_Specific(I->getOperand(1)), m_Value())) && 2007 isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q)) 2008 return true; 2009 if (match(I->getOperand(1), 2010 m_c_And(m_Specific(I->getOperand(0)), m_Value())) && 2011 isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q)) 2012 return true; 2013 2014 unsigned BitWidth = V->getType()->getScalarSizeInBits(); 2015 KnownBits LHSBits(BitWidth); 2016 computeKnownBits(I->getOperand(0), LHSBits, Depth, Q); 2017 2018 KnownBits RHSBits(BitWidth); 2019 computeKnownBits(I->getOperand(1), RHSBits, Depth, Q); 2020 // If i8 V is a power of two or zero: 2021 // ZeroBits: 1 1 1 0 1 1 1 1 2022 // ~ZeroBits: 0 0 0 1 0 0 0 0 2023 if ((~(LHSBits.Zero & RHSBits.Zero)).isPowerOf2()) 2024 // If OrZero isn't set, we cannot give back a zero result. 2025 // Make sure either the LHS or RHS has a bit set. 2026 if (OrZero || RHSBits.One.getBoolValue() || LHSBits.One.getBoolValue()) 2027 return true; 2028 } 2029 return false; 2030 } 2031 case Instruction::Select: 2032 return isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q) && 2033 isKnownToBeAPowerOfTwo(I->getOperand(2), OrZero, Depth, Q); 2034 case Instruction::PHI: { 2035 // A PHI node is power of two if all incoming values are power of two, or if 2036 // it is an induction variable where in each step its value is a power of 2037 // two. 2038 auto *PN = cast<PHINode>(I); 2039 SimplifyQuery RecQ = Q; 2040 2041 // Check if it is an induction variable and always power of two. 2042 if (isPowerOfTwoRecurrence(PN, OrZero, Depth, RecQ)) 2043 return true; 2044 2045 // Recursively check all incoming values. Limit recursion to 2 levels, so 2046 // that search complexity is limited to number of operands^2. 2047 unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1); 2048 return llvm::all_of(PN->operands(), [&](const Use &U) { 2049 // Value is power of 2 if it is coming from PHI node itself by induction. 2050 if (U.get() == PN) 2051 return true; 2052 2053 // Change the context instruction to the incoming block where it is 2054 // evaluated. 2055 RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator(); 2056 return isKnownToBeAPowerOfTwo(U.get(), OrZero, NewDepth, RecQ); 2057 }); 2058 } 2059 case Instruction::Invoke: 2060 case Instruction::Call: { 2061 if (auto *II = dyn_cast<IntrinsicInst>(I)) { 2062 switch (II->getIntrinsicID()) { 2063 case Intrinsic::umax: 2064 case Intrinsic::smax: 2065 case Intrinsic::umin: 2066 case Intrinsic::smin: 2067 return isKnownToBeAPowerOfTwo(II->getArgOperand(1), OrZero, Depth, Q) && 2068 isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Depth, Q); 2069 // bswap/bitreverse just move around bits, but don't change any 1s/0s 2070 // thus dont change pow2/non-pow2 status. 2071 case Intrinsic::bitreverse: 2072 case Intrinsic::bswap: 2073 return isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Depth, Q); 2074 case Intrinsic::fshr: 2075 case Intrinsic::fshl: 2076 // If Op0 == Op1, this is a rotate. is_pow2(rotate(x, y)) == is_pow2(x) 2077 if (II->getArgOperand(0) == II->getArgOperand(1)) 2078 return isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Depth, Q); 2079 break; 2080 default: 2081 break; 2082 } 2083 } 2084 return false; 2085 } 2086 default: 2087 return false; 2088 } 2089 } 2090 2091 /// Test whether a GEP's result is known to be non-null. 2092 /// 2093 /// Uses properties inherent in a GEP to try to determine whether it is known 2094 /// to be non-null. 2095 /// 2096 /// Currently this routine does not support vector GEPs. 2097 static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth, 2098 const SimplifyQuery &Q) { 2099 const Function *F = nullptr; 2100 if (const Instruction *I = dyn_cast<Instruction>(GEP)) 2101 F = I->getFunction(); 2102 2103 if (!GEP->isInBounds() || 2104 NullPointerIsDefined(F, GEP->getPointerAddressSpace())) 2105 return false; 2106 2107 // FIXME: Support vector-GEPs. 2108 assert(GEP->getType()->isPointerTy() && "We only support plain pointer GEP"); 2109 2110 // If the base pointer is non-null, we cannot walk to a null address with an 2111 // inbounds GEP in address space zero. 2112 if (isKnownNonZero(GEP->getPointerOperand(), Depth, Q)) 2113 return true; 2114 2115 // Walk the GEP operands and see if any operand introduces a non-zero offset. 2116 // If so, then the GEP cannot produce a null pointer, as doing so would 2117 // inherently violate the inbounds contract within address space zero. 2118 for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); 2119 GTI != GTE; ++GTI) { 2120 // Struct types are easy -- they must always be indexed by a constant. 2121 if (StructType *STy = GTI.getStructTypeOrNull()) { 2122 ConstantInt *OpC = cast<ConstantInt>(GTI.getOperand()); 2123 unsigned ElementIdx = OpC->getZExtValue(); 2124 const StructLayout *SL = Q.DL.getStructLayout(STy); 2125 uint64_t ElementOffset = SL->getElementOffset(ElementIdx); 2126 if (ElementOffset > 0) 2127 return true; 2128 continue; 2129 } 2130 2131 // If we have a zero-sized type, the index doesn't matter. Keep looping. 2132 if (GTI.getSequentialElementStride(Q.DL).isZero()) 2133 continue; 2134 2135 // Fast path the constant operand case both for efficiency and so we don't 2136 // increment Depth when just zipping down an all-constant GEP. 2137 if (ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand())) { 2138 if (!OpC->isZero()) 2139 return true; 2140 continue; 2141 } 2142 2143 // We post-increment Depth here because while isKnownNonZero increments it 2144 // as well, when we pop back up that increment won't persist. We don't want 2145 // to recurse 10k times just because we have 10k GEP operands. We don't 2146 // bail completely out because we want to handle constant GEPs regardless 2147 // of depth. 2148 if (Depth++ >= MaxAnalysisRecursionDepth) 2149 continue; 2150 2151 if (isKnownNonZero(GTI.getOperand(), Depth, Q)) 2152 return true; 2153 } 2154 2155 return false; 2156 } 2157 2158 static bool isKnownNonNullFromDominatingCondition(const Value *V, 2159 const Instruction *CtxI, 2160 const DominatorTree *DT) { 2161 assert(!isa<Constant>(V) && "Called for constant?"); 2162 2163 if (!CtxI || !DT) 2164 return false; 2165 2166 unsigned NumUsesExplored = 0; 2167 for (const auto *U : V->users()) { 2168 // Avoid massive lists 2169 if (NumUsesExplored >= DomConditionsMaxUses) 2170 break; 2171 NumUsesExplored++; 2172 2173 // If the value is used as an argument to a call or invoke, then argument 2174 // attributes may provide an answer about null-ness. 2175 if (const auto *CB = dyn_cast<CallBase>(U)) 2176 if (auto *CalledFunc = CB->getCalledFunction()) 2177 for (const Argument &Arg : CalledFunc->args()) 2178 if (CB->getArgOperand(Arg.getArgNo()) == V && 2179 Arg.hasNonNullAttr(/* AllowUndefOrPoison */ false) && 2180 DT->dominates(CB, CtxI)) 2181 return true; 2182 2183 // If the value is used as a load/store, then the pointer must be non null. 2184 if (V == getLoadStorePointerOperand(U)) { 2185 const Instruction *I = cast<Instruction>(U); 2186 if (!NullPointerIsDefined(I->getFunction(), 2187 V->getType()->getPointerAddressSpace()) && 2188 DT->dominates(I, CtxI)) 2189 return true; 2190 } 2191 2192 if ((match(U, m_IDiv(m_Value(), m_Specific(V))) || 2193 match(U, m_IRem(m_Value(), m_Specific(V)))) && 2194 isValidAssumeForContext(cast<Instruction>(U), CtxI, DT)) 2195 return true; 2196 2197 // Consider only compare instructions uniquely controlling a branch 2198 Value *RHS; 2199 CmpInst::Predicate Pred; 2200 if (!match(U, m_c_ICmp(Pred, m_Specific(V), m_Value(RHS)))) 2201 continue; 2202 2203 bool NonNullIfTrue; 2204 if (cmpExcludesZero(Pred, RHS)) 2205 NonNullIfTrue = true; 2206 else if (cmpExcludesZero(CmpInst::getInversePredicate(Pred), RHS)) 2207 NonNullIfTrue = false; 2208 else 2209 continue; 2210 2211 SmallVector<const User *, 4> WorkList; 2212 SmallPtrSet<const User *, 4> Visited; 2213 for (const auto *CmpU : U->users()) { 2214 assert(WorkList.empty() && "Should be!"); 2215 if (Visited.insert(CmpU).second) 2216 WorkList.push_back(CmpU); 2217 2218 while (!WorkList.empty()) { 2219 auto *Curr = WorkList.pop_back_val(); 2220 2221 // If a user is an AND, add all its users to the work list. We only 2222 // propagate "pred != null" condition through AND because it is only 2223 // correct to assume that all conditions of AND are met in true branch. 2224 // TODO: Support similar logic of OR and EQ predicate? 2225 if (NonNullIfTrue) 2226 if (match(Curr, m_LogicalAnd(m_Value(), m_Value()))) { 2227 for (const auto *CurrU : Curr->users()) 2228 if (Visited.insert(CurrU).second) 2229 WorkList.push_back(CurrU); 2230 continue; 2231 } 2232 2233 if (const BranchInst *BI = dyn_cast<BranchInst>(Curr)) { 2234 assert(BI->isConditional() && "uses a comparison!"); 2235 2236 BasicBlock *NonNullSuccessor = 2237 BI->getSuccessor(NonNullIfTrue ? 0 : 1); 2238 BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor); 2239 if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent())) 2240 return true; 2241 } else if (NonNullIfTrue && isGuard(Curr) && 2242 DT->dominates(cast<Instruction>(Curr), CtxI)) { 2243 return true; 2244 } 2245 } 2246 } 2247 } 2248 2249 return false; 2250 } 2251 2252 /// Does the 'Range' metadata (which must be a valid MD_range operand list) 2253 /// ensure that the value it's attached to is never Value? 'RangeType' is 2254 /// is the type of the value described by the range. 2255 static bool rangeMetadataExcludesValue(const MDNode* Ranges, const APInt& Value) { 2256 const unsigned NumRanges = Ranges->getNumOperands() / 2; 2257 assert(NumRanges >= 1); 2258 for (unsigned i = 0; i < NumRanges; ++i) { 2259 ConstantInt *Lower = 2260 mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 0)); 2261 ConstantInt *Upper = 2262 mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 1)); 2263 ConstantRange Range(Lower->getValue(), Upper->getValue()); 2264 if (Range.contains(Value)) 2265 return false; 2266 } 2267 return true; 2268 } 2269 2270 /// Try to detect a recurrence that monotonically increases/decreases from a 2271 /// non-zero starting value. These are common as induction variables. 2272 static bool isNonZeroRecurrence(const PHINode *PN) { 2273 BinaryOperator *BO = nullptr; 2274 Value *Start = nullptr, *Step = nullptr; 2275 const APInt *StartC, *StepC; 2276 if (!matchSimpleRecurrence(PN, BO, Start, Step) || 2277 !match(Start, m_APInt(StartC)) || StartC->isZero()) 2278 return false; 2279 2280 switch (BO->getOpcode()) { 2281 case Instruction::Add: 2282 // Starting from non-zero and stepping away from zero can never wrap back 2283 // to zero. 2284 return BO->hasNoUnsignedWrap() || 2285 (BO->hasNoSignedWrap() && match(Step, m_APInt(StepC)) && 2286 StartC->isNegative() == StepC->isNegative()); 2287 case Instruction::Mul: 2288 return (BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap()) && 2289 match(Step, m_APInt(StepC)) && !StepC->isZero(); 2290 case Instruction::Shl: 2291 return BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap(); 2292 case Instruction::AShr: 2293 case Instruction::LShr: 2294 return BO->isExact(); 2295 default: 2296 return false; 2297 } 2298 } 2299 2300 static bool isNonZeroAdd(const APInt &DemandedElts, unsigned Depth, 2301 const SimplifyQuery &Q, unsigned BitWidth, Value *X, 2302 Value *Y, bool NSW) { 2303 KnownBits XKnown = computeKnownBits(X, DemandedElts, Depth, Q); 2304 KnownBits YKnown = computeKnownBits(Y, DemandedElts, Depth, Q); 2305 2306 // If X and Y are both non-negative (as signed values) then their sum is not 2307 // zero unless both X and Y are zero. 2308 if (XKnown.isNonNegative() && YKnown.isNonNegative()) 2309 if (isKnownNonZero(Y, DemandedElts, Depth, Q) || 2310 isKnownNonZero(X, DemandedElts, Depth, Q)) 2311 return true; 2312 2313 // If X and Y are both negative (as signed values) then their sum is not 2314 // zero unless both X and Y equal INT_MIN. 2315 if (XKnown.isNegative() && YKnown.isNegative()) { 2316 APInt Mask = APInt::getSignedMaxValue(BitWidth); 2317 // The sign bit of X is set. If some other bit is set then X is not equal 2318 // to INT_MIN. 2319 if (XKnown.One.intersects(Mask)) 2320 return true; 2321 // The sign bit of Y is set. If some other bit is set then Y is not equal 2322 // to INT_MIN. 2323 if (YKnown.One.intersects(Mask)) 2324 return true; 2325 } 2326 2327 // The sum of a non-negative number and a power of two is not zero. 2328 if (XKnown.isNonNegative() && 2329 isKnownToBeAPowerOfTwo(Y, /*OrZero*/ false, Depth, Q)) 2330 return true; 2331 if (YKnown.isNonNegative() && 2332 isKnownToBeAPowerOfTwo(X, /*OrZero*/ false, Depth, Q)) 2333 return true; 2334 2335 return KnownBits::computeForAddSub(/*Add*/ true, NSW, XKnown, YKnown) 2336 .isNonZero(); 2337 } 2338 2339 static bool isNonZeroSub(const APInt &DemandedElts, unsigned Depth, 2340 const SimplifyQuery &Q, unsigned BitWidth, Value *X, 2341 Value *Y) { 2342 // TODO: Move this case into isKnownNonEqual(). 2343 if (auto *C = dyn_cast<Constant>(X)) 2344 if (C->isNullValue() && isKnownNonZero(Y, DemandedElts, Depth, Q)) 2345 return true; 2346 2347 return ::isKnownNonEqual(X, Y, Depth, Q); 2348 } 2349 2350 static bool isNonZeroShift(const Operator *I, const APInt &DemandedElts, 2351 unsigned Depth, const SimplifyQuery &Q, 2352 const KnownBits &KnownVal) { 2353 auto ShiftOp = [&](const APInt &Lhs, const APInt &Rhs) { 2354 switch (I->getOpcode()) { 2355 case Instruction::Shl: 2356 return Lhs.shl(Rhs); 2357 case Instruction::LShr: 2358 return Lhs.lshr(Rhs); 2359 case Instruction::AShr: 2360 return Lhs.ashr(Rhs); 2361 default: 2362 llvm_unreachable("Unknown Shift Opcode"); 2363 } 2364 }; 2365 2366 auto InvShiftOp = [&](const APInt &Lhs, const APInt &Rhs) { 2367 switch (I->getOpcode()) { 2368 case Instruction::Shl: 2369 return Lhs.lshr(Rhs); 2370 case Instruction::LShr: 2371 case Instruction::AShr: 2372 return Lhs.shl(Rhs); 2373 default: 2374 llvm_unreachable("Unknown Shift Opcode"); 2375 } 2376 }; 2377 2378 if (KnownVal.isUnknown()) 2379 return false; 2380 2381 KnownBits KnownCnt = 2382 computeKnownBits(I->getOperand(1), DemandedElts, Depth, Q); 2383 APInt MaxShift = KnownCnt.getMaxValue(); 2384 unsigned NumBits = KnownVal.getBitWidth(); 2385 if (MaxShift.uge(NumBits)) 2386 return false; 2387 2388 if (!ShiftOp(KnownVal.One, MaxShift).isZero()) 2389 return true; 2390 2391 // If all of the bits shifted out are known to be zero, and Val is known 2392 // non-zero then at least one non-zero bit must remain. 2393 if (InvShiftOp(KnownVal.Zero, NumBits - MaxShift) 2394 .eq(InvShiftOp(APInt::getAllOnes(NumBits), NumBits - MaxShift)) && 2395 isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q)) 2396 return true; 2397 2398 return false; 2399 } 2400 2401 static bool isKnownNonZeroFromOperator(const Operator *I, 2402 const APInt &DemandedElts, 2403 unsigned Depth, const SimplifyQuery &Q) { 2404 unsigned BitWidth = getBitWidth(I->getType()->getScalarType(), Q.DL); 2405 switch (I->getOpcode()) { 2406 case Instruction::Alloca: 2407 // Alloca never returns null, malloc might. 2408 return I->getType()->getPointerAddressSpace() == 0; 2409 case Instruction::GetElementPtr: 2410 if (I->getType()->isPointerTy()) 2411 return isGEPKnownNonNull(cast<GEPOperator>(I), Depth, Q); 2412 break; 2413 case Instruction::BitCast: { 2414 // We need to be a bit careful here. We can only peek through the bitcast 2415 // if the scalar size of elements in the operand are smaller than and a 2416 // multiple of the size they are casting too. Take three cases: 2417 // 2418 // 1) Unsafe: 2419 // bitcast <2 x i16> %NonZero to <4 x i8> 2420 // 2421 // %NonZero can have 2 non-zero i16 elements, but isKnownNonZero on a 2422 // <4 x i8> requires that all 4 i8 elements be non-zero which isn't 2423 // guranteed (imagine just sign bit set in the 2 i16 elements). 2424 // 2425 // 2) Unsafe: 2426 // bitcast <4 x i3> %NonZero to <3 x i4> 2427 // 2428 // Even though the scalar size of the src (`i3`) is smaller than the 2429 // scalar size of the dst `i4`, because `i3` is not a multiple of `i4` 2430 // its possible for the `3 x i4` elements to be zero because there are 2431 // some elements in the destination that don't contain any full src 2432 // element. 2433 // 2434 // 3) Safe: 2435 // bitcast <4 x i8> %NonZero to <2 x i16> 2436 // 2437 // This is always safe as non-zero in the 4 i8 elements implies 2438 // non-zero in the combination of any two adjacent ones. Since i8 is a 2439 // multiple of i16, each i16 is guranteed to have 2 full i8 elements. 2440 // This all implies the 2 i16 elements are non-zero. 2441 Type *FromTy = I->getOperand(0)->getType(); 2442 if ((FromTy->isIntOrIntVectorTy() || FromTy->isPtrOrPtrVectorTy()) && 2443 (BitWidth % getBitWidth(FromTy->getScalarType(), Q.DL)) == 0) 2444 return isKnownNonZero(I->getOperand(0), Depth, Q); 2445 } break; 2446 case Instruction::IntToPtr: 2447 // Note that we have to take special care to avoid looking through 2448 // truncating casts, e.g., int2ptr/ptr2int with appropriate sizes, as well 2449 // as casts that can alter the value, e.g., AddrSpaceCasts. 2450 if (!isa<ScalableVectorType>(I->getType()) && 2451 Q.DL.getTypeSizeInBits(I->getOperand(0)->getType()).getFixedValue() <= 2452 Q.DL.getTypeSizeInBits(I->getType()).getFixedValue()) 2453 return isKnownNonZero(I->getOperand(0), Depth, Q); 2454 break; 2455 case Instruction::PtrToInt: 2456 // Similar to int2ptr above, we can look through ptr2int here if the cast 2457 // is a no-op or an extend and not a truncate. 2458 if (!isa<ScalableVectorType>(I->getType()) && 2459 Q.DL.getTypeSizeInBits(I->getOperand(0)->getType()).getFixedValue() <= 2460 Q.DL.getTypeSizeInBits(I->getType()).getFixedValue()) 2461 return isKnownNonZero(I->getOperand(0), Depth, Q); 2462 break; 2463 case Instruction::Sub: 2464 return isNonZeroSub(DemandedElts, Depth, Q, BitWidth, I->getOperand(0), 2465 I->getOperand(1)); 2466 case Instruction::Or: 2467 // X | Y != 0 if X != 0 or Y != 0. 2468 return isKnownNonZero(I->getOperand(1), DemandedElts, Depth, Q) || 2469 isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q); 2470 case Instruction::SExt: 2471 case Instruction::ZExt: 2472 // ext X != 0 if X != 0. 2473 return isKnownNonZero(I->getOperand(0), Depth, Q); 2474 2475 case Instruction::Shl: { 2476 // shl nsw/nuw can't remove any non-zero bits. 2477 const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(I); 2478 if (Q.IIQ.hasNoUnsignedWrap(BO) || Q.IIQ.hasNoSignedWrap(BO)) 2479 return isKnownNonZero(I->getOperand(0), Depth, Q); 2480 2481 // shl X, Y != 0 if X is odd. Note that the value of the shift is undefined 2482 // if the lowest bit is shifted off the end. 2483 KnownBits Known(BitWidth); 2484 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth, Q); 2485 if (Known.One[0]) 2486 return true; 2487 2488 return isNonZeroShift(I, DemandedElts, Depth, Q, Known); 2489 } 2490 case Instruction::LShr: 2491 case Instruction::AShr: { 2492 // shr exact can only shift out zero bits. 2493 const PossiblyExactOperator *BO = cast<PossiblyExactOperator>(I); 2494 if (BO->isExact()) 2495 return isKnownNonZero(I->getOperand(0), Depth, Q); 2496 2497 // shr X, Y != 0 if X is negative. Note that the value of the shift is not 2498 // defined if the sign bit is shifted off the end. 2499 KnownBits Known = 2500 computeKnownBits(I->getOperand(0), DemandedElts, Depth, Q); 2501 if (Known.isNegative()) 2502 return true; 2503 2504 return isNonZeroShift(I, DemandedElts, Depth, Q, Known); 2505 } 2506 case Instruction::UDiv: 2507 case Instruction::SDiv: { 2508 // X / Y 2509 // div exact can only produce a zero if the dividend is zero. 2510 if (cast<PossiblyExactOperator>(I)->isExact()) 2511 return isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q); 2512 2513 std::optional<bool> XUgeY; 2514 KnownBits XKnown = 2515 computeKnownBits(I->getOperand(0), DemandedElts, Depth, Q); 2516 // If X is fully unknown we won't be able to figure anything out so don't 2517 // both computing knownbits for Y. 2518 if (XKnown.isUnknown()) 2519 return false; 2520 2521 KnownBits YKnown = 2522 computeKnownBits(I->getOperand(1), DemandedElts, Depth, Q); 2523 if (I->getOpcode() == Instruction::SDiv) { 2524 // For signed division need to compare abs value of the operands. 2525 XKnown = XKnown.abs(/*IntMinIsPoison*/ false); 2526 YKnown = YKnown.abs(/*IntMinIsPoison*/ false); 2527 } 2528 // If X u>= Y then div is non zero (0/0 is UB). 2529 XUgeY = KnownBits::uge(XKnown, YKnown); 2530 // If X is total unknown or X u< Y we won't be able to prove non-zero 2531 // with compute known bits so just return early. 2532 return XUgeY && *XUgeY; 2533 } 2534 case Instruction::Add: { 2535 // X + Y. 2536 2537 // If Add has nuw wrap flag, then if either X or Y is non-zero the result is 2538 // non-zero. 2539 auto *BO = cast<OverflowingBinaryOperator>(I); 2540 if (Q.IIQ.hasNoUnsignedWrap(BO)) 2541 return isKnownNonZero(I->getOperand(1), DemandedElts, Depth, Q) || 2542 isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q); 2543 2544 return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth, I->getOperand(0), 2545 I->getOperand(1), Q.IIQ.hasNoSignedWrap(BO)); 2546 } 2547 case Instruction::Mul: { 2548 // If X and Y are non-zero then so is X * Y as long as the multiplication 2549 // does not overflow. 2550 const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(I); 2551 if (Q.IIQ.hasNoSignedWrap(BO) || Q.IIQ.hasNoUnsignedWrap(BO)) 2552 return isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q) && 2553 isKnownNonZero(I->getOperand(1), DemandedElts, Depth, Q); 2554 2555 // If either X or Y is odd, then if the other is non-zero the result can't 2556 // be zero. 2557 KnownBits XKnown = 2558 computeKnownBits(I->getOperand(0), DemandedElts, Depth, Q); 2559 if (XKnown.One[0]) 2560 return isKnownNonZero(I->getOperand(1), DemandedElts, Depth, Q); 2561 2562 KnownBits YKnown = 2563 computeKnownBits(I->getOperand(1), DemandedElts, Depth, Q); 2564 if (YKnown.One[0]) 2565 return XKnown.isNonZero() || 2566 isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q); 2567 2568 // If there exists any subset of X (sX) and subset of Y (sY) s.t sX * sY is 2569 // non-zero, then X * Y is non-zero. We can find sX and sY by just taking 2570 // the lowest known One of X and Y. If they are non-zero, the result 2571 // must be non-zero. We can check if LSB(X) * LSB(Y) != 0 by doing 2572 // X.CountLeadingZeros + Y.CountLeadingZeros < BitWidth. 2573 return (XKnown.countMaxTrailingZeros() + YKnown.countMaxTrailingZeros()) < 2574 BitWidth; 2575 } 2576 case Instruction::Select: { 2577 // (C ? X : Y) != 0 if X != 0 and Y != 0. 2578 2579 // First check if the arm is non-zero using `isKnownNonZero`. If that fails, 2580 // then see if the select condition implies the arm is non-zero. For example 2581 // (X != 0 ? X : Y), we know the true arm is non-zero as the `X` "return" is 2582 // dominated by `X != 0`. 2583 auto SelectArmIsNonZero = [&](bool IsTrueArm) { 2584 Value *Op; 2585 Op = IsTrueArm ? I->getOperand(1) : I->getOperand(2); 2586 // Op is trivially non-zero. 2587 if (isKnownNonZero(Op, DemandedElts, Depth, Q)) 2588 return true; 2589 2590 // The condition of the select dominates the true/false arm. Check if the 2591 // condition implies that a given arm is non-zero. 2592 Value *X; 2593 CmpInst::Predicate Pred; 2594 if (!match(I->getOperand(0), m_c_ICmp(Pred, m_Specific(Op), m_Value(X)))) 2595 return false; 2596 2597 if (!IsTrueArm) 2598 Pred = ICmpInst::getInversePredicate(Pred); 2599 2600 return cmpExcludesZero(Pred, X); 2601 }; 2602 2603 if (SelectArmIsNonZero(/* IsTrueArm */ true) && 2604 SelectArmIsNonZero(/* IsTrueArm */ false)) 2605 return true; 2606 break; 2607 } 2608 case Instruction::PHI: { 2609 auto *PN = cast<PHINode>(I); 2610 if (Q.IIQ.UseInstrInfo && isNonZeroRecurrence(PN)) 2611 return true; 2612 2613 // Check if all incoming values are non-zero using recursion. 2614 SimplifyQuery RecQ = Q; 2615 unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1); 2616 return llvm::all_of(PN->operands(), [&](const Use &U) { 2617 if (U.get() == PN) 2618 return true; 2619 RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator(); 2620 // Check if the branch on the phi excludes zero. 2621 ICmpInst::Predicate Pred; 2622 Value *X; 2623 BasicBlock *TrueSucc, *FalseSucc; 2624 if (match(RecQ.CxtI, 2625 m_Br(m_c_ICmp(Pred, m_Specific(U.get()), m_Value(X)), 2626 m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) { 2627 // Check for cases of duplicate successors. 2628 if ((TrueSucc == PN->getParent()) != (FalseSucc == PN->getParent())) { 2629 // If we're using the false successor, invert the predicate. 2630 if (FalseSucc == PN->getParent()) 2631 Pred = CmpInst::getInversePredicate(Pred); 2632 if (cmpExcludesZero(Pred, X)) 2633 return true; 2634 } 2635 } 2636 // Finally recurse on the edge and check it directly. 2637 return isKnownNonZero(U.get(), DemandedElts, NewDepth, RecQ); 2638 }); 2639 } 2640 case Instruction::ExtractElement: 2641 if (const auto *EEI = dyn_cast<ExtractElementInst>(I)) { 2642 const Value *Vec = EEI->getVectorOperand(); 2643 const Value *Idx = EEI->getIndexOperand(); 2644 auto *CIdx = dyn_cast<ConstantInt>(Idx); 2645 if (auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType())) { 2646 unsigned NumElts = VecTy->getNumElements(); 2647 APInt DemandedVecElts = APInt::getAllOnes(NumElts); 2648 if (CIdx && CIdx->getValue().ult(NumElts)) 2649 DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue()); 2650 return isKnownNonZero(Vec, DemandedVecElts, Depth, Q); 2651 } 2652 } 2653 break; 2654 case Instruction::Freeze: 2655 return isKnownNonZero(I->getOperand(0), Depth, Q) && 2656 isGuaranteedNotToBePoison(I->getOperand(0), Q.AC, Q.CxtI, Q.DT, 2657 Depth); 2658 case Instruction::Load: { 2659 auto *LI = cast<LoadInst>(I); 2660 // A Load tagged with nonnull or dereferenceable with null pointer undefined 2661 // is never null. 2662 if (auto *PtrT = dyn_cast<PointerType>(I->getType())) 2663 if (Q.IIQ.getMetadata(LI, LLVMContext::MD_nonnull) || 2664 (Q.IIQ.getMetadata(LI, LLVMContext::MD_dereferenceable) && 2665 !NullPointerIsDefined(LI->getFunction(), PtrT->getAddressSpace()))) 2666 return true; 2667 2668 // No need to fall through to computeKnownBits as range metadata is already 2669 // handled in isKnownNonZero. 2670 return false; 2671 } 2672 case Instruction::Call: 2673 case Instruction::Invoke: 2674 if (I->getType()->isPointerTy()) { 2675 const auto *Call = cast<CallBase>(I); 2676 if (Call->isReturnNonNull()) 2677 return true; 2678 if (const auto *RP = getArgumentAliasingToReturnedPointer(Call, true)) 2679 return isKnownNonZero(RP, Depth, Q); 2680 } else if (const Value *RV = cast<CallBase>(I)->getReturnedArgOperand()) { 2681 if (RV->getType() == I->getType() && isKnownNonZero(RV, Depth, Q)) 2682 return true; 2683 } 2684 2685 if (auto *II = dyn_cast<IntrinsicInst>(I)) { 2686 switch (II->getIntrinsicID()) { 2687 case Intrinsic::sshl_sat: 2688 case Intrinsic::ushl_sat: 2689 case Intrinsic::abs: 2690 case Intrinsic::bitreverse: 2691 case Intrinsic::bswap: 2692 case Intrinsic::ctpop: 2693 return isKnownNonZero(II->getArgOperand(0), DemandedElts, Depth, Q); 2694 case Intrinsic::ssub_sat: 2695 return isNonZeroSub(DemandedElts, Depth, Q, BitWidth, 2696 II->getArgOperand(0), II->getArgOperand(1)); 2697 case Intrinsic::sadd_sat: 2698 return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth, 2699 II->getArgOperand(0), II->getArgOperand(1), 2700 /*NSW*/ true); 2701 case Intrinsic::umax: 2702 case Intrinsic::uadd_sat: 2703 return isKnownNonZero(II->getArgOperand(1), DemandedElts, Depth, Q) || 2704 isKnownNonZero(II->getArgOperand(0), DemandedElts, Depth, Q); 2705 case Intrinsic::smin: 2706 case Intrinsic::smax: { 2707 auto KnownOpImpliesNonZero = [&](const KnownBits &K) { 2708 return II->getIntrinsicID() == Intrinsic::smin 2709 ? K.isNegative() 2710 : K.isStrictlyPositive(); 2711 }; 2712 KnownBits XKnown = 2713 computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q); 2714 if (KnownOpImpliesNonZero(XKnown)) 2715 return true; 2716 KnownBits YKnown = 2717 computeKnownBits(II->getArgOperand(1), DemandedElts, Depth, Q); 2718 if (KnownOpImpliesNonZero(YKnown)) 2719 return true; 2720 2721 if (XKnown.isNonZero() && YKnown.isNonZero()) 2722 return true; 2723 } 2724 [[fallthrough]]; 2725 case Intrinsic::umin: 2726 return isKnownNonZero(II->getArgOperand(0), DemandedElts, Depth, Q) && 2727 isKnownNonZero(II->getArgOperand(1), DemandedElts, Depth, Q); 2728 case Intrinsic::cttz: 2729 return computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q) 2730 .Zero[0]; 2731 case Intrinsic::ctlz: 2732 return computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q) 2733 .isNonNegative(); 2734 case Intrinsic::fshr: 2735 case Intrinsic::fshl: 2736 // If Op0 == Op1, this is a rotate. rotate(x, y) != 0 iff x != 0. 2737 if (II->getArgOperand(0) == II->getArgOperand(1)) 2738 return isKnownNonZero(II->getArgOperand(0), DemandedElts, Depth, Q); 2739 break; 2740 case Intrinsic::vscale: 2741 return true; 2742 default: 2743 break; 2744 } 2745 break; 2746 } 2747 2748 return false; 2749 } 2750 2751 KnownBits Known(BitWidth); 2752 computeKnownBits(I, DemandedElts, Known, Depth, Q); 2753 return Known.One != 0; 2754 } 2755 2756 /// Return true if the given value is known to be non-zero when defined. For 2757 /// vectors, return true if every demanded element is known to be non-zero when 2758 /// defined. For pointers, if the context instruction and dominator tree are 2759 /// specified, perform context-sensitive analysis and return true if the 2760 /// pointer couldn't possibly be null at the specified instruction. 2761 /// Supports values with integer or pointer type and vectors of integers. 2762 bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth, 2763 const SimplifyQuery &Q) { 2764 2765 #ifndef NDEBUG 2766 Type *Ty = V->getType(); 2767 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); 2768 2769 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) { 2770 assert( 2771 FVTy->getNumElements() == DemandedElts.getBitWidth() && 2772 "DemandedElt width should equal the fixed vector number of elements"); 2773 } else { 2774 assert(DemandedElts == APInt(1, 1) && 2775 "DemandedElt width should be 1 for scalars"); 2776 } 2777 #endif 2778 2779 if (auto *C = dyn_cast<Constant>(V)) { 2780 if (C->isNullValue()) 2781 return false; 2782 if (isa<ConstantInt>(C)) 2783 // Must be non-zero due to null test above. 2784 return true; 2785 2786 // For constant vectors, check that all elements are undefined or known 2787 // non-zero to determine that the whole vector is known non-zero. 2788 if (auto *VecTy = dyn_cast<FixedVectorType>(C->getType())) { 2789 for (unsigned i = 0, e = VecTy->getNumElements(); i != e; ++i) { 2790 if (!DemandedElts[i]) 2791 continue; 2792 Constant *Elt = C->getAggregateElement(i); 2793 if (!Elt || Elt->isNullValue()) 2794 return false; 2795 if (!isa<UndefValue>(Elt) && !isa<ConstantInt>(Elt)) 2796 return false; 2797 } 2798 return true; 2799 } 2800 2801 // A global variable in address space 0 is non null unless extern weak 2802 // or an absolute symbol reference. Other address spaces may have null as a 2803 // valid address for a global, so we can't assume anything. 2804 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 2805 if (!GV->isAbsoluteSymbolRef() && !GV->hasExternalWeakLinkage() && 2806 GV->getType()->getAddressSpace() == 0) 2807 return true; 2808 } 2809 2810 // For constant expressions, fall through to the Operator code below. 2811 if (!isa<ConstantExpr>(V)) 2812 return false; 2813 } 2814 2815 if (auto *I = dyn_cast<Instruction>(V)) { 2816 if (MDNode *Ranges = Q.IIQ.getMetadata(I, LLVMContext::MD_range)) { 2817 // If the possible ranges don't contain zero, then the value is 2818 // definitely non-zero. 2819 if (auto *Ty = dyn_cast<IntegerType>(V->getType())) { 2820 const APInt ZeroValue(Ty->getBitWidth(), 0); 2821 if (rangeMetadataExcludesValue(Ranges, ZeroValue)) 2822 return true; 2823 } 2824 } 2825 } 2826 2827 if (!isa<Constant>(V) && isKnownNonZeroFromAssume(V, Q)) 2828 return true; 2829 2830 // Some of the tests below are recursive, so bail out if we hit the limit. 2831 if (Depth++ >= MaxAnalysisRecursionDepth) 2832 return false; 2833 2834 // Check for pointer simplifications. 2835 2836 if (PointerType *PtrTy = dyn_cast<PointerType>(V->getType())) { 2837 // A byval, inalloca may not be null in a non-default addres space. A 2838 // nonnull argument is assumed never 0. 2839 if (const Argument *A = dyn_cast<Argument>(V)) { 2840 if (((A->hasPassPointeeByValueCopyAttr() && 2841 !NullPointerIsDefined(A->getParent(), PtrTy->getAddressSpace())) || 2842 A->hasNonNullAttr())) 2843 return true; 2844 } 2845 } 2846 2847 if (const auto *I = dyn_cast<Operator>(V)) 2848 if (isKnownNonZeroFromOperator(I, DemandedElts, Depth, Q)) 2849 return true; 2850 2851 if (!isa<Constant>(V) && 2852 isKnownNonNullFromDominatingCondition(V, Q.CxtI, Q.DT)) 2853 return true; 2854 2855 return false; 2856 } 2857 2858 bool isKnownNonZero(const Value *V, unsigned Depth, const SimplifyQuery &Q) { 2859 auto *FVTy = dyn_cast<FixedVectorType>(V->getType()); 2860 APInt DemandedElts = 2861 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1); 2862 return isKnownNonZero(V, DemandedElts, Depth, Q); 2863 } 2864 2865 /// If the pair of operators are the same invertible function, return the 2866 /// the operands of the function corresponding to each input. Otherwise, 2867 /// return std::nullopt. An invertible function is one that is 1-to-1 and maps 2868 /// every input value to exactly one output value. This is equivalent to 2869 /// saying that Op1 and Op2 are equal exactly when the specified pair of 2870 /// operands are equal, (except that Op1 and Op2 may be poison more often.) 2871 static std::optional<std::pair<Value*, Value*>> 2872 getInvertibleOperands(const Operator *Op1, 2873 const Operator *Op2) { 2874 if (Op1->getOpcode() != Op2->getOpcode()) 2875 return std::nullopt; 2876 2877 auto getOperands = [&](unsigned OpNum) -> auto { 2878 return std::make_pair(Op1->getOperand(OpNum), Op2->getOperand(OpNum)); 2879 }; 2880 2881 switch (Op1->getOpcode()) { 2882 default: 2883 break; 2884 case Instruction::Add: 2885 case Instruction::Sub: 2886 if (Op1->getOperand(0) == Op2->getOperand(0)) 2887 return getOperands(1); 2888 if (Op1->getOperand(1) == Op2->getOperand(1)) 2889 return getOperands(0); 2890 break; 2891 case Instruction::Mul: { 2892 // invertible if A * B == (A * B) mod 2^N where A, and B are integers 2893 // and N is the bitwdith. The nsw case is non-obvious, but proven by 2894 // alive2: https://alive2.llvm.org/ce/z/Z6D5qK 2895 auto *OBO1 = cast<OverflowingBinaryOperator>(Op1); 2896 auto *OBO2 = cast<OverflowingBinaryOperator>(Op2); 2897 if ((!OBO1->hasNoUnsignedWrap() || !OBO2->hasNoUnsignedWrap()) && 2898 (!OBO1->hasNoSignedWrap() || !OBO2->hasNoSignedWrap())) 2899 break; 2900 2901 // Assume operand order has been canonicalized 2902 if (Op1->getOperand(1) == Op2->getOperand(1) && 2903 isa<ConstantInt>(Op1->getOperand(1)) && 2904 !cast<ConstantInt>(Op1->getOperand(1))->isZero()) 2905 return getOperands(0); 2906 break; 2907 } 2908 case Instruction::Shl: { 2909 // Same as multiplies, with the difference that we don't need to check 2910 // for a non-zero multiply. Shifts always multiply by non-zero. 2911 auto *OBO1 = cast<OverflowingBinaryOperator>(Op1); 2912 auto *OBO2 = cast<OverflowingBinaryOperator>(Op2); 2913 if ((!OBO1->hasNoUnsignedWrap() || !OBO2->hasNoUnsignedWrap()) && 2914 (!OBO1->hasNoSignedWrap() || !OBO2->hasNoSignedWrap())) 2915 break; 2916 2917 if (Op1->getOperand(1) == Op2->getOperand(1)) 2918 return getOperands(0); 2919 break; 2920 } 2921 case Instruction::AShr: 2922 case Instruction::LShr: { 2923 auto *PEO1 = cast<PossiblyExactOperator>(Op1); 2924 auto *PEO2 = cast<PossiblyExactOperator>(Op2); 2925 if (!PEO1->isExact() || !PEO2->isExact()) 2926 break; 2927 2928 if (Op1->getOperand(1) == Op2->getOperand(1)) 2929 return getOperands(0); 2930 break; 2931 } 2932 case Instruction::SExt: 2933 case Instruction::ZExt: 2934 if (Op1->getOperand(0)->getType() == Op2->getOperand(0)->getType()) 2935 return getOperands(0); 2936 break; 2937 case Instruction::PHI: { 2938 const PHINode *PN1 = cast<PHINode>(Op1); 2939 const PHINode *PN2 = cast<PHINode>(Op2); 2940 2941 // If PN1 and PN2 are both recurrences, can we prove the entire recurrences 2942 // are a single invertible function of the start values? Note that repeated 2943 // application of an invertible function is also invertible 2944 BinaryOperator *BO1 = nullptr; 2945 Value *Start1 = nullptr, *Step1 = nullptr; 2946 BinaryOperator *BO2 = nullptr; 2947 Value *Start2 = nullptr, *Step2 = nullptr; 2948 if (PN1->getParent() != PN2->getParent() || 2949 !matchSimpleRecurrence(PN1, BO1, Start1, Step1) || 2950 !matchSimpleRecurrence(PN2, BO2, Start2, Step2)) 2951 break; 2952 2953 auto Values = getInvertibleOperands(cast<Operator>(BO1), 2954 cast<Operator>(BO2)); 2955 if (!Values) 2956 break; 2957 2958 // We have to be careful of mutually defined recurrences here. Ex: 2959 // * X_i = X_(i-1) OP Y_(i-1), and Y_i = X_(i-1) OP V 2960 // * X_i = Y_i = X_(i-1) OP Y_(i-1) 2961 // The invertibility of these is complicated, and not worth reasoning 2962 // about (yet?). 2963 if (Values->first != PN1 || Values->second != PN2) 2964 break; 2965 2966 return std::make_pair(Start1, Start2); 2967 } 2968 } 2969 return std::nullopt; 2970 } 2971 2972 /// Return true if V2 == V1 + X, where X is known non-zero. 2973 static bool isAddOfNonZero(const Value *V1, const Value *V2, unsigned Depth, 2974 const SimplifyQuery &Q) { 2975 const BinaryOperator *BO = dyn_cast<BinaryOperator>(V1); 2976 if (!BO || BO->getOpcode() != Instruction::Add) 2977 return false; 2978 Value *Op = nullptr; 2979 if (V2 == BO->getOperand(0)) 2980 Op = BO->getOperand(1); 2981 else if (V2 == BO->getOperand(1)) 2982 Op = BO->getOperand(0); 2983 else 2984 return false; 2985 return isKnownNonZero(Op, Depth + 1, Q); 2986 } 2987 2988 /// Return true if V2 == V1 * C, where V1 is known non-zero, C is not 0/1 and 2989 /// the multiplication is nuw or nsw. 2990 static bool isNonEqualMul(const Value *V1, const Value *V2, unsigned Depth, 2991 const SimplifyQuery &Q) { 2992 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(V2)) { 2993 const APInt *C; 2994 return match(OBO, m_Mul(m_Specific(V1), m_APInt(C))) && 2995 (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) && 2996 !C->isZero() && !C->isOne() && isKnownNonZero(V1, Depth + 1, Q); 2997 } 2998 return false; 2999 } 3000 3001 /// Return true if V2 == V1 << C, where V1 is known non-zero, C is not 0 and 3002 /// the shift is nuw or nsw. 3003 static bool isNonEqualShl(const Value *V1, const Value *V2, unsigned Depth, 3004 const SimplifyQuery &Q) { 3005 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(V2)) { 3006 const APInt *C; 3007 return match(OBO, m_Shl(m_Specific(V1), m_APInt(C))) && 3008 (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) && 3009 !C->isZero() && isKnownNonZero(V1, Depth + 1, Q); 3010 } 3011 return false; 3012 } 3013 3014 static bool isNonEqualPHIs(const PHINode *PN1, const PHINode *PN2, 3015 unsigned Depth, const SimplifyQuery &Q) { 3016 // Check two PHIs are in same block. 3017 if (PN1->getParent() != PN2->getParent()) 3018 return false; 3019 3020 SmallPtrSet<const BasicBlock *, 8> VisitedBBs; 3021 bool UsedFullRecursion = false; 3022 for (const BasicBlock *IncomBB : PN1->blocks()) { 3023 if (!VisitedBBs.insert(IncomBB).second) 3024 continue; // Don't reprocess blocks that we have dealt with already. 3025 const Value *IV1 = PN1->getIncomingValueForBlock(IncomBB); 3026 const Value *IV2 = PN2->getIncomingValueForBlock(IncomBB); 3027 const APInt *C1, *C2; 3028 if (match(IV1, m_APInt(C1)) && match(IV2, m_APInt(C2)) && *C1 != *C2) 3029 continue; 3030 3031 // Only one pair of phi operands is allowed for full recursion. 3032 if (UsedFullRecursion) 3033 return false; 3034 3035 SimplifyQuery RecQ = Q; 3036 RecQ.CxtI = IncomBB->getTerminator(); 3037 if (!isKnownNonEqual(IV1, IV2, Depth + 1, RecQ)) 3038 return false; 3039 UsedFullRecursion = true; 3040 } 3041 return true; 3042 } 3043 3044 static bool isNonEqualSelect(const Value *V1, const Value *V2, unsigned Depth, 3045 const SimplifyQuery &Q) { 3046 const SelectInst *SI1 = dyn_cast<SelectInst>(V1); 3047 if (!SI1) 3048 return false; 3049 3050 if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2)) { 3051 const Value *Cond1 = SI1->getCondition(); 3052 const Value *Cond2 = SI2->getCondition(); 3053 if (Cond1 == Cond2) 3054 return isKnownNonEqual(SI1->getTrueValue(), SI2->getTrueValue(), 3055 Depth + 1, Q) && 3056 isKnownNonEqual(SI1->getFalseValue(), SI2->getFalseValue(), 3057 Depth + 1, Q); 3058 } 3059 return isKnownNonEqual(SI1->getTrueValue(), V2, Depth + 1, Q) && 3060 isKnownNonEqual(SI1->getFalseValue(), V2, Depth + 1, Q); 3061 } 3062 3063 // Check to see if A is both a GEP and is the incoming value for a PHI in the 3064 // loop, and B is either a ptr or another GEP. If the PHI has 2 incoming values, 3065 // one of them being the recursive GEP A and the other a ptr at same base and at 3066 // the same/higher offset than B we are only incrementing the pointer further in 3067 // loop if offset of recursive GEP is greater than 0. 3068 static bool isNonEqualPointersWithRecursiveGEP(const Value *A, const Value *B, 3069 const SimplifyQuery &Q) { 3070 if (!A->getType()->isPointerTy() || !B->getType()->isPointerTy()) 3071 return false; 3072 3073 auto *GEPA = dyn_cast<GEPOperator>(A); 3074 if (!GEPA || GEPA->getNumIndices() != 1 || !isa<Constant>(GEPA->idx_begin())) 3075 return false; 3076 3077 // Handle 2 incoming PHI values with one being a recursive GEP. 3078 auto *PN = dyn_cast<PHINode>(GEPA->getPointerOperand()); 3079 if (!PN || PN->getNumIncomingValues() != 2) 3080 return false; 3081 3082 // Search for the recursive GEP as an incoming operand, and record that as 3083 // Step. 3084 Value *Start = nullptr; 3085 Value *Step = const_cast<Value *>(A); 3086 if (PN->getIncomingValue(0) == Step) 3087 Start = PN->getIncomingValue(1); 3088 else if (PN->getIncomingValue(1) == Step) 3089 Start = PN->getIncomingValue(0); 3090 else 3091 return false; 3092 3093 // Other incoming node base should match the B base. 3094 // StartOffset >= OffsetB && StepOffset > 0? 3095 // StartOffset <= OffsetB && StepOffset < 0? 3096 // Is non-equal if above are true. 3097 // We use stripAndAccumulateInBoundsConstantOffsets to restrict the 3098 // optimisation to inbounds GEPs only. 3099 unsigned IndexWidth = Q.DL.getIndexTypeSizeInBits(Start->getType()); 3100 APInt StartOffset(IndexWidth, 0); 3101 Start = Start->stripAndAccumulateInBoundsConstantOffsets(Q.DL, StartOffset); 3102 APInt StepOffset(IndexWidth, 0); 3103 Step = Step->stripAndAccumulateInBoundsConstantOffsets(Q.DL, StepOffset); 3104 3105 // Check if Base Pointer of Step matches the PHI. 3106 if (Step != PN) 3107 return false; 3108 APInt OffsetB(IndexWidth, 0); 3109 B = B->stripAndAccumulateInBoundsConstantOffsets(Q.DL, OffsetB); 3110 return Start == B && 3111 ((StartOffset.sge(OffsetB) && StepOffset.isStrictlyPositive()) || 3112 (StartOffset.sle(OffsetB) && StepOffset.isNegative())); 3113 } 3114 3115 /// Return true if it is known that V1 != V2. 3116 static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth, 3117 const SimplifyQuery &Q) { 3118 if (V1 == V2) 3119 return false; 3120 if (V1->getType() != V2->getType()) 3121 // We can't look through casts yet. 3122 return false; 3123 3124 if (Depth >= MaxAnalysisRecursionDepth) 3125 return false; 3126 3127 // See if we can recurse through (exactly one of) our operands. This 3128 // requires our operation be 1-to-1 and map every input value to exactly 3129 // one output value. Such an operation is invertible. 3130 auto *O1 = dyn_cast<Operator>(V1); 3131 auto *O2 = dyn_cast<Operator>(V2); 3132 if (O1 && O2 && O1->getOpcode() == O2->getOpcode()) { 3133 if (auto Values = getInvertibleOperands(O1, O2)) 3134 return isKnownNonEqual(Values->first, Values->second, Depth + 1, Q); 3135 3136 if (const PHINode *PN1 = dyn_cast<PHINode>(V1)) { 3137 const PHINode *PN2 = cast<PHINode>(V2); 3138 // FIXME: This is missing a generalization to handle the case where one is 3139 // a PHI and another one isn't. 3140 if (isNonEqualPHIs(PN1, PN2, Depth, Q)) 3141 return true; 3142 }; 3143 } 3144 3145 if (isAddOfNonZero(V1, V2, Depth, Q) || isAddOfNonZero(V2, V1, Depth, Q)) 3146 return true; 3147 3148 if (isNonEqualMul(V1, V2, Depth, Q) || isNonEqualMul(V2, V1, Depth, Q)) 3149 return true; 3150 3151 if (isNonEqualShl(V1, V2, Depth, Q) || isNonEqualShl(V2, V1, Depth, Q)) 3152 return true; 3153 3154 if (V1->getType()->isIntOrIntVectorTy()) { 3155 // Are any known bits in V1 contradictory to known bits in V2? If V1 3156 // has a known zero where V2 has a known one, they must not be equal. 3157 KnownBits Known1 = computeKnownBits(V1, Depth, Q); 3158 if (!Known1.isUnknown()) { 3159 KnownBits Known2 = computeKnownBits(V2, Depth, Q); 3160 if (Known1.Zero.intersects(Known2.One) || 3161 Known2.Zero.intersects(Known1.One)) 3162 return true; 3163 } 3164 } 3165 3166 if (isNonEqualSelect(V1, V2, Depth, Q) || isNonEqualSelect(V2, V1, Depth, Q)) 3167 return true; 3168 3169 if (isNonEqualPointersWithRecursiveGEP(V1, V2, Q) || 3170 isNonEqualPointersWithRecursiveGEP(V2, V1, Q)) 3171 return true; 3172 3173 Value *A, *B; 3174 // PtrToInts are NonEqual if their Ptrs are NonEqual. 3175 // Check PtrToInt type matches the pointer size. 3176 if (match(V1, m_PtrToIntSameSize(Q.DL, m_Value(A))) && 3177 match(V2, m_PtrToIntSameSize(Q.DL, m_Value(B)))) 3178 return isKnownNonEqual(A, B, Depth + 1, Q); 3179 3180 return false; 3181 } 3182 3183 // Match a signed min+max clamp pattern like smax(smin(In, CHigh), CLow). 3184 // Returns the input and lower/upper bounds. 3185 static bool isSignedMinMaxClamp(const Value *Select, const Value *&In, 3186 const APInt *&CLow, const APInt *&CHigh) { 3187 assert(isa<Operator>(Select) && 3188 cast<Operator>(Select)->getOpcode() == Instruction::Select && 3189 "Input should be a Select!"); 3190 3191 const Value *LHS = nullptr, *RHS = nullptr; 3192 SelectPatternFlavor SPF = matchSelectPattern(Select, LHS, RHS).Flavor; 3193 if (SPF != SPF_SMAX && SPF != SPF_SMIN) 3194 return false; 3195 3196 if (!match(RHS, m_APInt(CLow))) 3197 return false; 3198 3199 const Value *LHS2 = nullptr, *RHS2 = nullptr; 3200 SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2).Flavor; 3201 if (getInverseMinMaxFlavor(SPF) != SPF2) 3202 return false; 3203 3204 if (!match(RHS2, m_APInt(CHigh))) 3205 return false; 3206 3207 if (SPF == SPF_SMIN) 3208 std::swap(CLow, CHigh); 3209 3210 In = LHS2; 3211 return CLow->sle(*CHigh); 3212 } 3213 3214 static bool isSignedMinMaxIntrinsicClamp(const IntrinsicInst *II, 3215 const APInt *&CLow, 3216 const APInt *&CHigh) { 3217 assert((II->getIntrinsicID() == Intrinsic::smin || 3218 II->getIntrinsicID() == Intrinsic::smax) && "Must be smin/smax"); 3219 3220 Intrinsic::ID InverseID = getInverseMinMaxIntrinsic(II->getIntrinsicID()); 3221 auto *InnerII = dyn_cast<IntrinsicInst>(II->getArgOperand(0)); 3222 if (!InnerII || InnerII->getIntrinsicID() != InverseID || 3223 !match(II->getArgOperand(1), m_APInt(CLow)) || 3224 !match(InnerII->getArgOperand(1), m_APInt(CHigh))) 3225 return false; 3226 3227 if (II->getIntrinsicID() == Intrinsic::smin) 3228 std::swap(CLow, CHigh); 3229 return CLow->sle(*CHigh); 3230 } 3231 3232 /// For vector constants, loop over the elements and find the constant with the 3233 /// minimum number of sign bits. Return 0 if the value is not a vector constant 3234 /// or if any element was not analyzed; otherwise, return the count for the 3235 /// element with the minimum number of sign bits. 3236 static unsigned computeNumSignBitsVectorConstant(const Value *V, 3237 const APInt &DemandedElts, 3238 unsigned TyBits) { 3239 const auto *CV = dyn_cast<Constant>(V); 3240 if (!CV || !isa<FixedVectorType>(CV->getType())) 3241 return 0; 3242 3243 unsigned MinSignBits = TyBits; 3244 unsigned NumElts = cast<FixedVectorType>(CV->getType())->getNumElements(); 3245 for (unsigned i = 0; i != NumElts; ++i) { 3246 if (!DemandedElts[i]) 3247 continue; 3248 // If we find a non-ConstantInt, bail out. 3249 auto *Elt = dyn_cast_or_null<ConstantInt>(CV->getAggregateElement(i)); 3250 if (!Elt) 3251 return 0; 3252 3253 MinSignBits = std::min(MinSignBits, Elt->getValue().getNumSignBits()); 3254 } 3255 3256 return MinSignBits; 3257 } 3258 3259 static unsigned ComputeNumSignBitsImpl(const Value *V, 3260 const APInt &DemandedElts, 3261 unsigned Depth, const SimplifyQuery &Q); 3262 3263 static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts, 3264 unsigned Depth, const SimplifyQuery &Q) { 3265 unsigned Result = ComputeNumSignBitsImpl(V, DemandedElts, Depth, Q); 3266 assert(Result > 0 && "At least one sign bit needs to be present!"); 3267 return Result; 3268 } 3269 3270 /// Return the number of times the sign bit of the register is replicated into 3271 /// the other bits. We know that at least 1 bit is always equal to the sign bit 3272 /// (itself), but other cases can give us information. For example, immediately 3273 /// after an "ashr X, 2", we know that the top 3 bits are all equal to each 3274 /// other, so we return 3. For vectors, return the number of sign bits for the 3275 /// vector element with the minimum number of known sign bits of the demanded 3276 /// elements in the vector specified by DemandedElts. 3277 static unsigned ComputeNumSignBitsImpl(const Value *V, 3278 const APInt &DemandedElts, 3279 unsigned Depth, const SimplifyQuery &Q) { 3280 Type *Ty = V->getType(); 3281 #ifndef NDEBUG 3282 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); 3283 3284 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) { 3285 assert( 3286 FVTy->getNumElements() == DemandedElts.getBitWidth() && 3287 "DemandedElt width should equal the fixed vector number of elements"); 3288 } else { 3289 assert(DemandedElts == APInt(1, 1) && 3290 "DemandedElt width should be 1 for scalars"); 3291 } 3292 #endif 3293 3294 // We return the minimum number of sign bits that are guaranteed to be present 3295 // in V, so for undef we have to conservatively return 1. We don't have the 3296 // same behavior for poison though -- that's a FIXME today. 3297 3298 Type *ScalarTy = Ty->getScalarType(); 3299 unsigned TyBits = ScalarTy->isPointerTy() ? 3300 Q.DL.getPointerTypeSizeInBits(ScalarTy) : 3301 Q.DL.getTypeSizeInBits(ScalarTy); 3302 3303 unsigned Tmp, Tmp2; 3304 unsigned FirstAnswer = 1; 3305 3306 // Note that ConstantInt is handled by the general computeKnownBits case 3307 // below. 3308 3309 if (Depth == MaxAnalysisRecursionDepth) 3310 return 1; 3311 3312 if (auto *U = dyn_cast<Operator>(V)) { 3313 switch (Operator::getOpcode(V)) { 3314 default: break; 3315 case Instruction::SExt: 3316 Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits(); 3317 return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q) + Tmp; 3318 3319 case Instruction::SDiv: { 3320 const APInt *Denominator; 3321 // sdiv X, C -> adds log(C) sign bits. 3322 if (match(U->getOperand(1), m_APInt(Denominator))) { 3323 3324 // Ignore non-positive denominator. 3325 if (!Denominator->isStrictlyPositive()) 3326 break; 3327 3328 // Calculate the incoming numerator bits. 3329 unsigned NumBits = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3330 3331 // Add floor(log(C)) bits to the numerator bits. 3332 return std::min(TyBits, NumBits + Denominator->logBase2()); 3333 } 3334 break; 3335 } 3336 3337 case Instruction::SRem: { 3338 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3339 3340 const APInt *Denominator; 3341 // srem X, C -> we know that the result is within [-C+1,C) when C is a 3342 // positive constant. This let us put a lower bound on the number of sign 3343 // bits. 3344 if (match(U->getOperand(1), m_APInt(Denominator))) { 3345 3346 // Ignore non-positive denominator. 3347 if (Denominator->isStrictlyPositive()) { 3348 // Calculate the leading sign bit constraints by examining the 3349 // denominator. Given that the denominator is positive, there are two 3350 // cases: 3351 // 3352 // 1. The numerator is positive. The result range is [0,C) and 3353 // [0,C) u< (1 << ceilLogBase2(C)). 3354 // 3355 // 2. The numerator is negative. Then the result range is (-C,0] and 3356 // integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)). 3357 // 3358 // Thus a lower bound on the number of sign bits is `TyBits - 3359 // ceilLogBase2(C)`. 3360 3361 unsigned ResBits = TyBits - Denominator->ceilLogBase2(); 3362 Tmp = std::max(Tmp, ResBits); 3363 } 3364 } 3365 return Tmp; 3366 } 3367 3368 case Instruction::AShr: { 3369 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3370 // ashr X, C -> adds C sign bits. Vectors too. 3371 const APInt *ShAmt; 3372 if (match(U->getOperand(1), m_APInt(ShAmt))) { 3373 if (ShAmt->uge(TyBits)) 3374 break; // Bad shift. 3375 unsigned ShAmtLimited = ShAmt->getZExtValue(); 3376 Tmp += ShAmtLimited; 3377 if (Tmp > TyBits) Tmp = TyBits; 3378 } 3379 return Tmp; 3380 } 3381 case Instruction::Shl: { 3382 const APInt *ShAmt; 3383 if (match(U->getOperand(1), m_APInt(ShAmt))) { 3384 // shl destroys sign bits. 3385 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3386 if (ShAmt->uge(TyBits) || // Bad shift. 3387 ShAmt->uge(Tmp)) break; // Shifted all sign bits out. 3388 Tmp2 = ShAmt->getZExtValue(); 3389 return Tmp - Tmp2; 3390 } 3391 break; 3392 } 3393 case Instruction::And: 3394 case Instruction::Or: 3395 case Instruction::Xor: // NOT is handled here. 3396 // Logical binary ops preserve the number of sign bits at the worst. 3397 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3398 if (Tmp != 1) { 3399 Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); 3400 FirstAnswer = std::min(Tmp, Tmp2); 3401 // We computed what we know about the sign bits as our first 3402 // answer. Now proceed to the generic code that uses 3403 // computeKnownBits, and pick whichever answer is better. 3404 } 3405 break; 3406 3407 case Instruction::Select: { 3408 // If we have a clamp pattern, we know that the number of sign bits will 3409 // be the minimum of the clamp min/max range. 3410 const Value *X; 3411 const APInt *CLow, *CHigh; 3412 if (isSignedMinMaxClamp(U, X, CLow, CHigh)) 3413 return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits()); 3414 3415 Tmp = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); 3416 if (Tmp == 1) break; 3417 Tmp2 = ComputeNumSignBits(U->getOperand(2), Depth + 1, Q); 3418 return std::min(Tmp, Tmp2); 3419 } 3420 3421 case Instruction::Add: 3422 // Add can have at most one carry bit. Thus we know that the output 3423 // is, at worst, one more bit than the inputs. 3424 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3425 if (Tmp == 1) break; 3426 3427 // Special case decrementing a value (ADD X, -1): 3428 if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1))) 3429 if (CRHS->isAllOnesValue()) { 3430 KnownBits Known(TyBits); 3431 computeKnownBits(U->getOperand(0), Known, Depth + 1, Q); 3432 3433 // If the input is known to be 0 or 1, the output is 0/-1, which is 3434 // all sign bits set. 3435 if ((Known.Zero | 1).isAllOnes()) 3436 return TyBits; 3437 3438 // If we are subtracting one from a positive number, there is no carry 3439 // out of the result. 3440 if (Known.isNonNegative()) 3441 return Tmp; 3442 } 3443 3444 Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); 3445 if (Tmp2 == 1) break; 3446 return std::min(Tmp, Tmp2) - 1; 3447 3448 case Instruction::Sub: 3449 Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); 3450 if (Tmp2 == 1) break; 3451 3452 // Handle NEG. 3453 if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0))) 3454 if (CLHS->isNullValue()) { 3455 KnownBits Known(TyBits); 3456 computeKnownBits(U->getOperand(1), Known, Depth + 1, Q); 3457 // If the input is known to be 0 or 1, the output is 0/-1, which is 3458 // all sign bits set. 3459 if ((Known.Zero | 1).isAllOnes()) 3460 return TyBits; 3461 3462 // If the input is known to be positive (the sign bit is known clear), 3463 // the output of the NEG has the same number of sign bits as the 3464 // input. 3465 if (Known.isNonNegative()) 3466 return Tmp2; 3467 3468 // Otherwise, we treat this like a SUB. 3469 } 3470 3471 // Sub can have at most one carry bit. Thus we know that the output 3472 // is, at worst, one more bit than the inputs. 3473 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3474 if (Tmp == 1) break; 3475 return std::min(Tmp, Tmp2) - 1; 3476 3477 case Instruction::Mul: { 3478 // The output of the Mul can be at most twice the valid bits in the 3479 // inputs. 3480 unsigned SignBitsOp0 = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3481 if (SignBitsOp0 == 1) break; 3482 unsigned SignBitsOp1 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); 3483 if (SignBitsOp1 == 1) break; 3484 unsigned OutValidBits = 3485 (TyBits - SignBitsOp0 + 1) + (TyBits - SignBitsOp1 + 1); 3486 return OutValidBits > TyBits ? 1 : TyBits - OutValidBits + 1; 3487 } 3488 3489 case Instruction::PHI: { 3490 const PHINode *PN = cast<PHINode>(U); 3491 unsigned NumIncomingValues = PN->getNumIncomingValues(); 3492 // Don't analyze large in-degree PHIs. 3493 if (NumIncomingValues > 4) break; 3494 // Unreachable blocks may have zero-operand PHI nodes. 3495 if (NumIncomingValues == 0) break; 3496 3497 // Take the minimum of all incoming values. This can't infinitely loop 3498 // because of our depth threshold. 3499 SimplifyQuery RecQ = Q; 3500 Tmp = TyBits; 3501 for (unsigned i = 0, e = NumIncomingValues; i != e; ++i) { 3502 if (Tmp == 1) return Tmp; 3503 RecQ.CxtI = PN->getIncomingBlock(i)->getTerminator(); 3504 Tmp = std::min( 3505 Tmp, ComputeNumSignBits(PN->getIncomingValue(i), Depth + 1, RecQ)); 3506 } 3507 return Tmp; 3508 } 3509 3510 case Instruction::Trunc: { 3511 // If the input contained enough sign bits that some remain after the 3512 // truncation, then we can make use of that. Otherwise we don't know 3513 // anything. 3514 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3515 unsigned OperandTyBits = U->getOperand(0)->getType()->getScalarSizeInBits(); 3516 if (Tmp > (OperandTyBits - TyBits)) 3517 return Tmp - (OperandTyBits - TyBits); 3518 3519 return 1; 3520 } 3521 3522 case Instruction::ExtractElement: 3523 // Look through extract element. At the moment we keep this simple and 3524 // skip tracking the specific element. But at least we might find 3525 // information valid for all elements of the vector (for example if vector 3526 // is sign extended, shifted, etc). 3527 return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3528 3529 case Instruction::ShuffleVector: { 3530 // Collect the minimum number of sign bits that are shared by every vector 3531 // element referenced by the shuffle. 3532 auto *Shuf = dyn_cast<ShuffleVectorInst>(U); 3533 if (!Shuf) { 3534 // FIXME: Add support for shufflevector constant expressions. 3535 return 1; 3536 } 3537 APInt DemandedLHS, DemandedRHS; 3538 // For undef elements, we don't know anything about the common state of 3539 // the shuffle result. 3540 if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS)) 3541 return 1; 3542 Tmp = std::numeric_limits<unsigned>::max(); 3543 if (!!DemandedLHS) { 3544 const Value *LHS = Shuf->getOperand(0); 3545 Tmp = ComputeNumSignBits(LHS, DemandedLHS, Depth + 1, Q); 3546 } 3547 // If we don't know anything, early out and try computeKnownBits 3548 // fall-back. 3549 if (Tmp == 1) 3550 break; 3551 if (!!DemandedRHS) { 3552 const Value *RHS = Shuf->getOperand(1); 3553 Tmp2 = ComputeNumSignBits(RHS, DemandedRHS, Depth + 1, Q); 3554 Tmp = std::min(Tmp, Tmp2); 3555 } 3556 // If we don't know anything, early out and try computeKnownBits 3557 // fall-back. 3558 if (Tmp == 1) 3559 break; 3560 assert(Tmp <= TyBits && "Failed to determine minimum sign bits"); 3561 return Tmp; 3562 } 3563 case Instruction::Call: { 3564 if (const auto *II = dyn_cast<IntrinsicInst>(U)) { 3565 switch (II->getIntrinsicID()) { 3566 default: break; 3567 case Intrinsic::abs: 3568 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3569 if (Tmp == 1) break; 3570 3571 // Absolute value reduces number of sign bits by at most 1. 3572 return Tmp - 1; 3573 case Intrinsic::smin: 3574 case Intrinsic::smax: { 3575 const APInt *CLow, *CHigh; 3576 if (isSignedMinMaxIntrinsicClamp(II, CLow, CHigh)) 3577 return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits()); 3578 } 3579 } 3580 } 3581 } 3582 } 3583 } 3584 3585 // Finally, if we can prove that the top bits of the result are 0's or 1's, 3586 // use this information. 3587 3588 // If we can examine all elements of a vector constant successfully, we're 3589 // done (we can't do any better than that). If not, keep trying. 3590 if (unsigned VecSignBits = 3591 computeNumSignBitsVectorConstant(V, DemandedElts, TyBits)) 3592 return VecSignBits; 3593 3594 KnownBits Known(TyBits); 3595 computeKnownBits(V, DemandedElts, Known, Depth, Q); 3596 3597 // If we know that the sign bit is either zero or one, determine the number of 3598 // identical bits in the top of the input value. 3599 return std::max(FirstAnswer, Known.countMinSignBits()); 3600 } 3601 3602 Intrinsic::ID llvm::getIntrinsicForCallSite(const CallBase &CB, 3603 const TargetLibraryInfo *TLI) { 3604 const Function *F = CB.getCalledFunction(); 3605 if (!F) 3606 return Intrinsic::not_intrinsic; 3607 3608 if (F->isIntrinsic()) 3609 return F->getIntrinsicID(); 3610 3611 // We are going to infer semantics of a library function based on mapping it 3612 // to an LLVM intrinsic. Check that the library function is available from 3613 // this callbase and in this environment. 3614 LibFunc Func; 3615 if (F->hasLocalLinkage() || !TLI || !TLI->getLibFunc(CB, Func) || 3616 !CB.onlyReadsMemory()) 3617 return Intrinsic::not_intrinsic; 3618 3619 switch (Func) { 3620 default: 3621 break; 3622 case LibFunc_sin: 3623 case LibFunc_sinf: 3624 case LibFunc_sinl: 3625 return Intrinsic::sin; 3626 case LibFunc_cos: 3627 case LibFunc_cosf: 3628 case LibFunc_cosl: 3629 return Intrinsic::cos; 3630 case LibFunc_exp: 3631 case LibFunc_expf: 3632 case LibFunc_expl: 3633 return Intrinsic::exp; 3634 case LibFunc_exp2: 3635 case LibFunc_exp2f: 3636 case LibFunc_exp2l: 3637 return Intrinsic::exp2; 3638 case LibFunc_log: 3639 case LibFunc_logf: 3640 case LibFunc_logl: 3641 return Intrinsic::log; 3642 case LibFunc_log10: 3643 case LibFunc_log10f: 3644 case LibFunc_log10l: 3645 return Intrinsic::log10; 3646 case LibFunc_log2: 3647 case LibFunc_log2f: 3648 case LibFunc_log2l: 3649 return Intrinsic::log2; 3650 case LibFunc_fabs: 3651 case LibFunc_fabsf: 3652 case LibFunc_fabsl: 3653 return Intrinsic::fabs; 3654 case LibFunc_fmin: 3655 case LibFunc_fminf: 3656 case LibFunc_fminl: 3657 return Intrinsic::minnum; 3658 case LibFunc_fmax: 3659 case LibFunc_fmaxf: 3660 case LibFunc_fmaxl: 3661 return Intrinsic::maxnum; 3662 case LibFunc_copysign: 3663 case LibFunc_copysignf: 3664 case LibFunc_copysignl: 3665 return Intrinsic::copysign; 3666 case LibFunc_floor: 3667 case LibFunc_floorf: 3668 case LibFunc_floorl: 3669 return Intrinsic::floor; 3670 case LibFunc_ceil: 3671 case LibFunc_ceilf: 3672 case LibFunc_ceill: 3673 return Intrinsic::ceil; 3674 case LibFunc_trunc: 3675 case LibFunc_truncf: 3676 case LibFunc_truncl: 3677 return Intrinsic::trunc; 3678 case LibFunc_rint: 3679 case LibFunc_rintf: 3680 case LibFunc_rintl: 3681 return Intrinsic::rint; 3682 case LibFunc_nearbyint: 3683 case LibFunc_nearbyintf: 3684 case LibFunc_nearbyintl: 3685 return Intrinsic::nearbyint; 3686 case LibFunc_round: 3687 case LibFunc_roundf: 3688 case LibFunc_roundl: 3689 return Intrinsic::round; 3690 case LibFunc_roundeven: 3691 case LibFunc_roundevenf: 3692 case LibFunc_roundevenl: 3693 return Intrinsic::roundeven; 3694 case LibFunc_pow: 3695 case LibFunc_powf: 3696 case LibFunc_powl: 3697 return Intrinsic::pow; 3698 case LibFunc_sqrt: 3699 case LibFunc_sqrtf: 3700 case LibFunc_sqrtl: 3701 return Intrinsic::sqrt; 3702 } 3703 3704 return Intrinsic::not_intrinsic; 3705 } 3706 3707 /// Deprecated, use computeKnownFPClass instead. 3708 /// 3709 /// If \p SignBitOnly is true, test for a known 0 sign bit rather than a 3710 /// standard ordered compare. e.g. make -0.0 olt 0.0 be true because of the sign 3711 /// bit despite comparing equal. 3712 static bool cannotBeOrderedLessThanZeroImpl(const Value *V, 3713 const DataLayout &DL, 3714 const TargetLibraryInfo *TLI, 3715 bool SignBitOnly, unsigned Depth) { 3716 // TODO: This function does not do the right thing when SignBitOnly is true 3717 // and we're lowering to a hypothetical IEEE 754-compliant-but-evil platform 3718 // which flips the sign bits of NaNs. See 3719 // https://llvm.org/bugs/show_bug.cgi?id=31702. 3720 3721 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V)) { 3722 return !CFP->getValueAPF().isNegative() || 3723 (!SignBitOnly && CFP->getValueAPF().isZero()); 3724 } 3725 3726 // Handle vector of constants. 3727 if (auto *CV = dyn_cast<Constant>(V)) { 3728 if (auto *CVFVTy = dyn_cast<FixedVectorType>(CV->getType())) { 3729 unsigned NumElts = CVFVTy->getNumElements(); 3730 for (unsigned i = 0; i != NumElts; ++i) { 3731 auto *CFP = dyn_cast_or_null<ConstantFP>(CV->getAggregateElement(i)); 3732 if (!CFP) 3733 return false; 3734 if (CFP->getValueAPF().isNegative() && 3735 (SignBitOnly || !CFP->getValueAPF().isZero())) 3736 return false; 3737 } 3738 3739 // All non-negative ConstantFPs. 3740 return true; 3741 } 3742 } 3743 3744 if (Depth == MaxAnalysisRecursionDepth) 3745 return false; 3746 3747 const Operator *I = dyn_cast<Operator>(V); 3748 if (!I) 3749 return false; 3750 3751 switch (I->getOpcode()) { 3752 default: 3753 break; 3754 // Unsigned integers are always nonnegative. 3755 case Instruction::UIToFP: 3756 return true; 3757 case Instruction::FDiv: 3758 // X / X is always exactly 1.0 or a NaN. 3759 if (I->getOperand(0) == I->getOperand(1) && 3760 (!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs())) 3761 return true; 3762 3763 // Set SignBitOnly for RHS, because X / -0.0 is -Inf (or NaN). 3764 return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, 3765 SignBitOnly, Depth + 1) && 3766 cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI, 3767 /*SignBitOnly*/ true, Depth + 1); 3768 case Instruction::FMul: 3769 // X * X is always non-negative or a NaN. 3770 if (I->getOperand(0) == I->getOperand(1) && 3771 (!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs())) 3772 return true; 3773 3774 [[fallthrough]]; 3775 case Instruction::FAdd: 3776 case Instruction::FRem: 3777 return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, 3778 SignBitOnly, Depth + 1) && 3779 cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI, 3780 SignBitOnly, Depth + 1); 3781 case Instruction::Select: 3782 return cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI, 3783 SignBitOnly, Depth + 1) && 3784 cannotBeOrderedLessThanZeroImpl(I->getOperand(2), DL, TLI, 3785 SignBitOnly, Depth + 1); 3786 case Instruction::FPExt: 3787 case Instruction::FPTrunc: 3788 // Widening/narrowing never change sign. 3789 return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, 3790 SignBitOnly, Depth + 1); 3791 case Instruction::ExtractElement: 3792 // Look through extract element. At the moment we keep this simple and skip 3793 // tracking the specific element. But at least we might find information 3794 // valid for all elements of the vector. 3795 return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, 3796 SignBitOnly, Depth + 1); 3797 case Instruction::Call: 3798 const auto *CI = cast<CallInst>(I); 3799 Intrinsic::ID IID = getIntrinsicForCallSite(*CI, TLI); 3800 switch (IID) { 3801 default: 3802 break; 3803 case Intrinsic::canonicalize: 3804 case Intrinsic::arithmetic_fence: 3805 case Intrinsic::floor: 3806 case Intrinsic::ceil: 3807 case Intrinsic::trunc: 3808 case Intrinsic::rint: 3809 case Intrinsic::nearbyint: 3810 case Intrinsic::round: 3811 case Intrinsic::roundeven: 3812 case Intrinsic::fptrunc_round: 3813 return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, 3814 SignBitOnly, Depth + 1); 3815 case Intrinsic::maxnum: { 3816 Value *V0 = I->getOperand(0), *V1 = I->getOperand(1); 3817 auto isPositiveNum = [&](Value *V) { 3818 if (SignBitOnly) { 3819 // With SignBitOnly, this is tricky because the result of 3820 // maxnum(+0.0, -0.0) is unspecified. Just check if the operand is 3821 // a constant strictly greater than 0.0. 3822 const APFloat *C; 3823 return match(V, m_APFloat(C)) && 3824 *C > APFloat::getZero(C->getSemantics()); 3825 } 3826 3827 // -0.0 compares equal to 0.0, so if this operand is at least -0.0, 3828 // maxnum can't be ordered-less-than-zero. 3829 return isKnownNeverNaN(V, DL, TLI) && 3830 cannotBeOrderedLessThanZeroImpl(V, DL, TLI, false, Depth + 1); 3831 }; 3832 3833 // TODO: This could be improved. We could also check that neither operand 3834 // has its sign bit set (and at least 1 is not-NAN?). 3835 return isPositiveNum(V0) || isPositiveNum(V1); 3836 } 3837 3838 case Intrinsic::maximum: 3839 return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, 3840 SignBitOnly, Depth + 1) || 3841 cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI, 3842 SignBitOnly, Depth + 1); 3843 case Intrinsic::minnum: 3844 case Intrinsic::minimum: 3845 return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, 3846 SignBitOnly, Depth + 1) && 3847 cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI, 3848 SignBitOnly, Depth + 1); 3849 case Intrinsic::exp: 3850 case Intrinsic::exp2: 3851 case Intrinsic::fabs: 3852 return true; 3853 case Intrinsic::copysign: 3854 // Only the sign operand matters. 3855 return cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI, true, 3856 Depth + 1); 3857 case Intrinsic::sqrt: 3858 // sqrt(x) is always >= -0 or NaN. Moreover, sqrt(x) == -0 iff x == -0. 3859 if (!SignBitOnly) 3860 return true; 3861 return CI->hasNoNaNs() && 3862 (CI->hasNoSignedZeros() || 3863 cannotBeNegativeZero(CI->getOperand(0), DL, TLI)); 3864 3865 case Intrinsic::powi: 3866 if (ConstantInt *Exponent = dyn_cast<ConstantInt>(I->getOperand(1))) { 3867 // powi(x,n) is non-negative if n is even. 3868 if (Exponent->getBitWidth() <= 64 && Exponent->getSExtValue() % 2u == 0) 3869 return true; 3870 } 3871 // TODO: This is not correct. Given that exp is an integer, here are the 3872 // ways that pow can return a negative value: 3873 // 3874 // pow(x, exp) --> negative if exp is odd and x is negative. 3875 // pow(-0, exp) --> -inf if exp is negative odd. 3876 // pow(-0, exp) --> -0 if exp is positive odd. 3877 // pow(-inf, exp) --> -0 if exp is negative odd. 3878 // pow(-inf, exp) --> -inf if exp is positive odd. 3879 // 3880 // Therefore, if !SignBitOnly, we can return true if x >= +0 or x is NaN, 3881 // but we must return false if x == -0. Unfortunately we do not currently 3882 // have a way of expressing this constraint. See details in 3883 // https://llvm.org/bugs/show_bug.cgi?id=31702. 3884 return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, 3885 SignBitOnly, Depth + 1); 3886 3887 case Intrinsic::fma: 3888 case Intrinsic::fmuladd: 3889 // x*x+y is non-negative if y is non-negative. 3890 return I->getOperand(0) == I->getOperand(1) && 3891 (!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs()) && 3892 cannotBeOrderedLessThanZeroImpl(I->getOperand(2), DL, TLI, 3893 SignBitOnly, Depth + 1); 3894 } 3895 break; 3896 } 3897 return false; 3898 } 3899 3900 bool llvm::SignBitMustBeZero(const Value *V, const DataLayout &DL, 3901 const TargetLibraryInfo *TLI) { 3902 // FIXME: Use computeKnownFPClass and pass all arguments 3903 return cannotBeOrderedLessThanZeroImpl(V, DL, TLI, true, 0); 3904 } 3905 3906 /// Return true if it's possible to assume IEEE treatment of input denormals in 3907 /// \p F for \p Val. 3908 static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) { 3909 Ty = Ty->getScalarType(); 3910 return F.getDenormalMode(Ty->getFltSemantics()).Input == DenormalMode::IEEE; 3911 } 3912 3913 static bool inputDenormalIsIEEEOrPosZero(const Function &F, const Type *Ty) { 3914 Ty = Ty->getScalarType(); 3915 DenormalMode Mode = F.getDenormalMode(Ty->getFltSemantics()); 3916 return Mode.Input == DenormalMode::IEEE || 3917 Mode.Input == DenormalMode::PositiveZero; 3918 } 3919 3920 static bool outputDenormalIsIEEEOrPosZero(const Function &F, const Type *Ty) { 3921 Ty = Ty->getScalarType(); 3922 DenormalMode Mode = F.getDenormalMode(Ty->getFltSemantics()); 3923 return Mode.Output == DenormalMode::IEEE || 3924 Mode.Output == DenormalMode::PositiveZero; 3925 } 3926 3927 bool KnownFPClass::isKnownNeverLogicalZero(const Function &F, Type *Ty) const { 3928 return isKnownNeverZero() && 3929 (isKnownNeverSubnormal() || inputDenormalIsIEEE(F, Ty)); 3930 } 3931 3932 bool KnownFPClass::isKnownNeverLogicalNegZero(const Function &F, 3933 Type *Ty) const { 3934 return isKnownNeverNegZero() && 3935 (isKnownNeverNegSubnormal() || inputDenormalIsIEEEOrPosZero(F, Ty)); 3936 } 3937 3938 bool KnownFPClass::isKnownNeverLogicalPosZero(const Function &F, 3939 Type *Ty) const { 3940 if (!isKnownNeverPosZero()) 3941 return false; 3942 3943 // If we know there are no denormals, nothing can be flushed to zero. 3944 if (isKnownNeverSubnormal()) 3945 return true; 3946 3947 DenormalMode Mode = F.getDenormalMode(Ty->getScalarType()->getFltSemantics()); 3948 switch (Mode.Input) { 3949 case DenormalMode::IEEE: 3950 return true; 3951 case DenormalMode::PreserveSign: 3952 // Negative subnormal won't flush to +0 3953 return isKnownNeverPosSubnormal(); 3954 case DenormalMode::PositiveZero: 3955 default: 3956 // Both positive and negative subnormal could flush to +0 3957 return false; 3958 } 3959 3960 llvm_unreachable("covered switch over denormal mode"); 3961 } 3962 3963 void KnownFPClass::propagateDenormal(const KnownFPClass &Src, const Function &F, 3964 Type *Ty) { 3965 KnownFPClasses = Src.KnownFPClasses; 3966 // If we aren't assuming the source can't be a zero, we don't have to check if 3967 // a denormal input could be flushed. 3968 if (!Src.isKnownNeverPosZero() && !Src.isKnownNeverNegZero()) 3969 return; 3970 3971 // If we know the input can't be a denormal, it can't be flushed to 0. 3972 if (Src.isKnownNeverSubnormal()) 3973 return; 3974 3975 DenormalMode Mode = F.getDenormalMode(Ty->getScalarType()->getFltSemantics()); 3976 3977 if (!Src.isKnownNeverPosSubnormal() && Mode != DenormalMode::getIEEE()) 3978 KnownFPClasses |= fcPosZero; 3979 3980 if (!Src.isKnownNeverNegSubnormal() && Mode != DenormalMode::getIEEE()) { 3981 if (Mode != DenormalMode::getPositiveZero()) 3982 KnownFPClasses |= fcNegZero; 3983 3984 if (Mode.Input == DenormalMode::PositiveZero || 3985 Mode.Output == DenormalMode::PositiveZero || 3986 Mode.Input == DenormalMode::Dynamic || 3987 Mode.Output == DenormalMode::Dynamic) 3988 KnownFPClasses |= fcPosZero; 3989 } 3990 } 3991 3992 void KnownFPClass::propagateCanonicalizingSrc(const KnownFPClass &Src, 3993 const Function &F, Type *Ty) { 3994 propagateDenormal(Src, F, Ty); 3995 propagateNaN(Src, /*PreserveSign=*/true); 3996 } 3997 3998 /// Returns a pair of values, which if passed to llvm.is.fpclass, returns the 3999 /// same result as an fcmp with the given operands. 4000 std::pair<Value *, FPClassTest> llvm::fcmpToClassTest(FCmpInst::Predicate Pred, 4001 const Function &F, 4002 Value *LHS, Value *RHS, 4003 bool LookThroughSrc) { 4004 const APFloat *ConstRHS; 4005 if (!match(RHS, m_APFloatAllowUndef(ConstRHS))) 4006 return {nullptr, fcAllFlags}; 4007 4008 return fcmpToClassTest(Pred, F, LHS, ConstRHS, LookThroughSrc); 4009 } 4010 4011 std::pair<Value *, FPClassTest> 4012 llvm::fcmpToClassTest(FCmpInst::Predicate Pred, const Function &F, Value *LHS, 4013 const APFloat *ConstRHS, bool LookThroughSrc) { 4014 // fcmp ord x, zero|normal|subnormal|inf -> ~fcNan 4015 if (Pred == FCmpInst::FCMP_ORD && !ConstRHS->isNaN()) 4016 return {LHS, ~fcNan}; 4017 4018 // fcmp uno x, zero|normal|subnormal|inf -> fcNan 4019 if (Pred == FCmpInst::FCMP_UNO && !ConstRHS->isNaN()) 4020 return {LHS, fcNan}; 4021 4022 if (Pred == FCmpInst::FCMP_TRUE) 4023 return {LHS, fcAllFlags}; 4024 4025 if (Pred == FCmpInst::FCMP_FALSE) 4026 return {LHS, fcNone}; 4027 4028 if (ConstRHS->isZero()) { 4029 // Compares with fcNone are only exactly equal to fcZero if input denormals 4030 // are not flushed. 4031 // TODO: Handle DAZ by expanding masks to cover subnormal cases. 4032 if (Pred != FCmpInst::FCMP_ORD && Pred != FCmpInst::FCMP_UNO && 4033 !inputDenormalIsIEEE(F, LHS->getType())) 4034 return {nullptr, fcAllFlags}; 4035 4036 switch (Pred) { 4037 case FCmpInst::FCMP_OEQ: // Match x == 0.0 4038 return {LHS, fcZero}; 4039 case FCmpInst::FCMP_UEQ: // Match isnan(x) || (x == 0.0) 4040 return {LHS, fcZero | fcNan}; 4041 case FCmpInst::FCMP_UNE: // Match (x != 0.0) 4042 return {LHS, ~fcZero}; 4043 case FCmpInst::FCMP_ONE: // Match !isnan(x) && x != 0.0 4044 return {LHS, ~fcNan & ~fcZero}; 4045 case FCmpInst::FCMP_ORD: 4046 // Canonical form of ord/uno is with a zero. We could also handle 4047 // non-canonical other non-NaN constants or LHS == RHS. 4048 return {LHS, ~fcNan}; 4049 case FCmpInst::FCMP_UNO: 4050 return {LHS, fcNan}; 4051 case FCmpInst::FCMP_OGT: // x > 0 4052 return {LHS, fcPosSubnormal | fcPosNormal | fcPosInf}; 4053 case FCmpInst::FCMP_UGT: // isnan(x) || x > 0 4054 return {LHS, fcPosSubnormal | fcPosNormal | fcPosInf | fcNan}; 4055 case FCmpInst::FCMP_OGE: // x >= 0 4056 return {LHS, fcPositive | fcNegZero}; 4057 case FCmpInst::FCMP_UGE: // isnan(x) || x >= 0 4058 return {LHS, fcPositive | fcNegZero | fcNan}; 4059 case FCmpInst::FCMP_OLT: // x < 0 4060 return {LHS, fcNegSubnormal | fcNegNormal | fcNegInf}; 4061 case FCmpInst::FCMP_ULT: // isnan(x) || x < 0 4062 return {LHS, fcNegSubnormal | fcNegNormal | fcNegInf | fcNan}; 4063 case FCmpInst::FCMP_OLE: // x <= 0 4064 return {LHS, fcNegative | fcPosZero}; 4065 case FCmpInst::FCMP_ULE: // isnan(x) || x <= 0 4066 return {LHS, fcNegative | fcPosZero | fcNan}; 4067 default: 4068 llvm_unreachable("all compare types are handled"); 4069 } 4070 4071 return {nullptr, fcAllFlags}; 4072 } 4073 4074 Value *Src = LHS; 4075 const bool IsFabs = LookThroughSrc && match(LHS, m_FAbs(m_Value(Src))); 4076 4077 // Compute the test mask that would return true for the ordered comparisons. 4078 FPClassTest Mask; 4079 4080 if (ConstRHS->isInfinity()) { 4081 switch (Pred) { 4082 case FCmpInst::FCMP_OEQ: 4083 case FCmpInst::FCMP_UNE: { 4084 // Match __builtin_isinf patterns 4085 // 4086 // fcmp oeq x, +inf -> is_fpclass x, fcPosInf 4087 // fcmp oeq fabs(x), +inf -> is_fpclass x, fcInf 4088 // fcmp oeq x, -inf -> is_fpclass x, fcNegInf 4089 // fcmp oeq fabs(x), -inf -> is_fpclass x, 0 -> false 4090 // 4091 // fcmp une x, +inf -> is_fpclass x, ~fcPosInf 4092 // fcmp une fabs(x), +inf -> is_fpclass x, ~fcInf 4093 // fcmp une x, -inf -> is_fpclass x, ~fcNegInf 4094 // fcmp une fabs(x), -inf -> is_fpclass x, fcAllFlags -> true 4095 4096 if (ConstRHS->isNegative()) { 4097 Mask = fcNegInf; 4098 if (IsFabs) 4099 Mask = fcNone; 4100 } else { 4101 Mask = fcPosInf; 4102 if (IsFabs) 4103 Mask |= fcNegInf; 4104 } 4105 4106 break; 4107 } 4108 case FCmpInst::FCMP_ONE: 4109 case FCmpInst::FCMP_UEQ: { 4110 // Match __builtin_isinf patterns 4111 // fcmp one x, -inf -> is_fpclass x, fcNegInf 4112 // fcmp one fabs(x), -inf -> is_fpclass x, ~fcNegInf & ~fcNan 4113 // fcmp one x, +inf -> is_fpclass x, ~fcNegInf & ~fcNan 4114 // fcmp one fabs(x), +inf -> is_fpclass x, ~fcInf & fcNan 4115 // 4116 // fcmp ueq x, +inf -> is_fpclass x, fcPosInf|fcNan 4117 // fcmp ueq (fabs x), +inf -> is_fpclass x, fcInf|fcNan 4118 // fcmp ueq x, -inf -> is_fpclass x, fcNegInf|fcNan 4119 // fcmp ueq fabs(x), -inf -> is_fpclass x, fcNan 4120 if (ConstRHS->isNegative()) { 4121 Mask = ~fcNegInf & ~fcNan; 4122 if (IsFabs) 4123 Mask = ~fcNan; 4124 } else { 4125 Mask = ~fcPosInf & ~fcNan; 4126 if (IsFabs) 4127 Mask &= ~fcNegInf; 4128 } 4129 4130 break; 4131 } 4132 case FCmpInst::FCMP_OLT: 4133 case FCmpInst::FCMP_UGE: { 4134 if (ConstRHS->isNegative()) { 4135 // No value is ordered and less than negative infinity. 4136 // All values are unordered with or at least negative infinity. 4137 // fcmp olt x, -inf -> false 4138 // fcmp uge x, -inf -> true 4139 Mask = fcNone; 4140 break; 4141 } 4142 4143 // fcmp olt fabs(x), +inf -> fcFinite 4144 // fcmp uge fabs(x), +inf -> ~fcFinite 4145 // fcmp olt x, +inf -> fcFinite|fcNegInf 4146 // fcmp uge x, +inf -> ~(fcFinite|fcNegInf) 4147 Mask = fcFinite; 4148 if (!IsFabs) 4149 Mask |= fcNegInf; 4150 break; 4151 } 4152 case FCmpInst::FCMP_OGE: 4153 case FCmpInst::FCMP_ULT: { 4154 if (ConstRHS->isNegative()) { 4155 // fcmp oge x, -inf -> ~fcNan 4156 // fcmp oge fabs(x), -inf -> ~fcNan 4157 // fcmp ult x, -inf -> fcNan 4158 // fcmp ult fabs(x), -inf -> fcNan 4159 Mask = ~fcNan; 4160 break; 4161 } 4162 4163 // fcmp oge fabs(x), +inf -> fcInf 4164 // fcmp oge x, +inf -> fcPosInf 4165 // fcmp ult fabs(x), +inf -> ~fcInf 4166 // fcmp ult x, +inf -> ~fcPosInf 4167 Mask = fcPosInf; 4168 if (IsFabs) 4169 Mask |= fcNegInf; 4170 break; 4171 } 4172 case FCmpInst::FCMP_OGT: 4173 case FCmpInst::FCMP_ULE: { 4174 if (ConstRHS->isNegative()) { 4175 // fcmp ogt x, -inf -> fcmp one x, -inf 4176 // fcmp ogt fabs(x), -inf -> fcmp ord x, x 4177 // fcmp ule x, -inf -> fcmp ueq x, -inf 4178 // fcmp ule fabs(x), -inf -> fcmp uno x, x 4179 Mask = IsFabs ? ~fcNan : ~(fcNegInf | fcNan); 4180 break; 4181 } 4182 4183 // No value is ordered and greater than infinity. 4184 Mask = fcNone; 4185 break; 4186 } 4187 case FCmpInst::FCMP_OLE: 4188 case FCmpInst::FCMP_UGT: { 4189 if (ConstRHS->isNegative()) { 4190 Mask = IsFabs ? fcNone : fcNegInf; 4191 break; 4192 } 4193 4194 // fcmp ole x, +inf -> fcmp ord x, x 4195 // fcmp ole fabs(x), +inf -> fcmp ord x, x 4196 // fcmp ole x, -inf -> fcmp oeq x, -inf 4197 // fcmp ole fabs(x), -inf -> false 4198 Mask = ~fcNan; 4199 break; 4200 } 4201 default: 4202 llvm_unreachable("all compare types are handled"); 4203 } 4204 } else if (ConstRHS->isSmallestNormalized() && !ConstRHS->isNegative()) { 4205 // Match pattern that's used in __builtin_isnormal. 4206 switch (Pred) { 4207 case FCmpInst::FCMP_OLT: 4208 case FCmpInst::FCMP_UGE: { 4209 // fcmp olt x, smallest_normal -> fcNegInf|fcNegNormal|fcSubnormal|fcZero 4210 // fcmp olt fabs(x), smallest_normal -> fcSubnormal|fcZero 4211 // fcmp uge x, smallest_normal -> fcNan|fcPosNormal|fcPosInf 4212 // fcmp uge fabs(x), smallest_normal -> ~(fcSubnormal|fcZero) 4213 Mask = fcZero | fcSubnormal; 4214 if (!IsFabs) 4215 Mask |= fcNegNormal | fcNegInf; 4216 4217 break; 4218 } 4219 case FCmpInst::FCMP_OGE: 4220 case FCmpInst::FCMP_ULT: { 4221 // fcmp oge x, smallest_normal -> fcPosNormal | fcPosInf 4222 // fcmp oge fabs(x), smallest_normal -> fcInf | fcNormal 4223 // fcmp ult x, smallest_normal -> ~(fcPosNormal | fcPosInf) 4224 // fcmp ult fabs(x), smallest_normal -> ~(fcInf | fcNormal) 4225 Mask = fcPosInf | fcPosNormal; 4226 if (IsFabs) 4227 Mask |= fcNegInf | fcNegNormal; 4228 break; 4229 } 4230 default: 4231 return {nullptr, fcAllFlags}; 4232 } 4233 } else if (ConstRHS->isNaN()) { 4234 // fcmp o__ x, nan -> false 4235 // fcmp u__ x, nan -> true 4236 Mask = fcNone; 4237 } else 4238 return {nullptr, fcAllFlags}; 4239 4240 // Invert the comparison for the unordered cases. 4241 if (FCmpInst::isUnordered(Pred)) 4242 Mask = ~Mask; 4243 4244 return {Src, Mask}; 4245 } 4246 4247 static FPClassTest computeKnownFPClassFromAssumes(const Value *V, 4248 const SimplifyQuery &Q) { 4249 FPClassTest KnownFromAssume = fcAllFlags; 4250 4251 // Try to restrict the floating-point classes based on information from 4252 // assumptions. 4253 for (auto &AssumeVH : Q.AC->assumptionsFor(V)) { 4254 if (!AssumeVH) 4255 continue; 4256 CallInst *I = cast<CallInst>(AssumeVH); 4257 const Function *F = I->getFunction(); 4258 4259 assert(F == Q.CxtI->getParent()->getParent() && 4260 "Got assumption for the wrong function!"); 4261 assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume && 4262 "must be an assume intrinsic"); 4263 4264 if (!isValidAssumeForContext(I, Q.CxtI, Q.DT)) 4265 continue; 4266 4267 CmpInst::Predicate Pred; 4268 Value *LHS, *RHS; 4269 uint64_t ClassVal = 0; 4270 if (match(I->getArgOperand(0), m_FCmp(Pred, m_Value(LHS), m_Value(RHS)))) { 4271 auto [TestedValue, TestedMask] = 4272 fcmpToClassTest(Pred, *F, LHS, RHS, true); 4273 // First see if we can fold in fabs/fneg into the test. 4274 if (TestedValue == V) 4275 KnownFromAssume &= TestedMask; 4276 else { 4277 // Try again without the lookthrough if we found a different source 4278 // value. 4279 auto [TestedValue, TestedMask] = 4280 fcmpToClassTest(Pred, *F, LHS, RHS, false); 4281 if (TestedValue == V) 4282 KnownFromAssume &= TestedMask; 4283 } 4284 } else if (match(I->getArgOperand(0), 4285 m_Intrinsic<Intrinsic::is_fpclass>( 4286 m_Value(LHS), m_ConstantInt(ClassVal)))) { 4287 KnownFromAssume &= static_cast<FPClassTest>(ClassVal); 4288 } 4289 } 4290 4291 return KnownFromAssume; 4292 } 4293 4294 void computeKnownFPClass(const Value *V, const APInt &DemandedElts, 4295 FPClassTest InterestedClasses, KnownFPClass &Known, 4296 unsigned Depth, const SimplifyQuery &Q); 4297 4298 static void computeKnownFPClass(const Value *V, KnownFPClass &Known, 4299 FPClassTest InterestedClasses, unsigned Depth, 4300 const SimplifyQuery &Q) { 4301 auto *FVTy = dyn_cast<FixedVectorType>(V->getType()); 4302 APInt DemandedElts = 4303 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1); 4304 computeKnownFPClass(V, DemandedElts, InterestedClasses, Known, Depth, Q); 4305 } 4306 4307 static void computeKnownFPClassForFPTrunc(const Operator *Op, 4308 const APInt &DemandedElts, 4309 FPClassTest InterestedClasses, 4310 KnownFPClass &Known, unsigned Depth, 4311 const SimplifyQuery &Q) { 4312 if ((InterestedClasses & 4313 (KnownFPClass::OrderedLessThanZeroMask | fcNan)) == fcNone) 4314 return; 4315 4316 KnownFPClass KnownSrc; 4317 computeKnownFPClass(Op->getOperand(0), DemandedElts, InterestedClasses, 4318 KnownSrc, Depth + 1, Q); 4319 4320 // Sign should be preserved 4321 // TODO: Handle cannot be ordered greater than zero 4322 if (KnownSrc.cannotBeOrderedLessThanZero()) 4323 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); 4324 4325 Known.propagateNaN(KnownSrc, true); 4326 4327 // Infinity needs a range check. 4328 } 4329 4330 // TODO: Merge implementation of cannotBeOrderedLessThanZero into here. 4331 void computeKnownFPClass(const Value *V, const APInt &DemandedElts, 4332 FPClassTest InterestedClasses, KnownFPClass &Known, 4333 unsigned Depth, const SimplifyQuery &Q) { 4334 assert(Known.isUnknown() && "should not be called with known information"); 4335 4336 if (!DemandedElts) { 4337 // No demanded elts, better to assume we don't know anything. 4338 Known.resetAll(); 4339 return; 4340 } 4341 4342 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); 4343 4344 if (auto *CFP = dyn_cast_or_null<ConstantFP>(V)) { 4345 Known.KnownFPClasses = CFP->getValueAPF().classify(); 4346 Known.SignBit = CFP->isNegative(); 4347 return; 4348 } 4349 4350 // Try to handle fixed width vector constants 4351 auto *VFVTy = dyn_cast<FixedVectorType>(V->getType()); 4352 const Constant *CV = dyn_cast<Constant>(V); 4353 if (VFVTy && CV) { 4354 Known.KnownFPClasses = fcNone; 4355 4356 // For vectors, verify that each element is not NaN. 4357 unsigned NumElts = VFVTy->getNumElements(); 4358 for (unsigned i = 0; i != NumElts; ++i) { 4359 Constant *Elt = CV->getAggregateElement(i); 4360 if (!Elt) { 4361 Known = KnownFPClass(); 4362 return; 4363 } 4364 if (isa<UndefValue>(Elt)) 4365 continue; 4366 auto *CElt = dyn_cast<ConstantFP>(Elt); 4367 if (!CElt) { 4368 Known = KnownFPClass(); 4369 return; 4370 } 4371 4372 KnownFPClass KnownElt{CElt->getValueAPF().classify(), CElt->isNegative()}; 4373 Known |= KnownElt; 4374 } 4375 4376 return; 4377 } 4378 4379 FPClassTest KnownNotFromFlags = fcNone; 4380 if (const auto *CB = dyn_cast<CallBase>(V)) 4381 KnownNotFromFlags |= CB->getRetNoFPClass(); 4382 else if (const auto *Arg = dyn_cast<Argument>(V)) 4383 KnownNotFromFlags |= Arg->getNoFPClass(); 4384 4385 const Operator *Op = dyn_cast<Operator>(V); 4386 if (const FPMathOperator *FPOp = dyn_cast_or_null<FPMathOperator>(Op)) { 4387 if (FPOp->hasNoNaNs()) 4388 KnownNotFromFlags |= fcNan; 4389 if (FPOp->hasNoInfs()) 4390 KnownNotFromFlags |= fcInf; 4391 } 4392 4393 if (Q.AC) { 4394 FPClassTest AssumedClasses = computeKnownFPClassFromAssumes(V, Q); 4395 KnownNotFromFlags |= ~AssumedClasses; 4396 } 4397 4398 // We no longer need to find out about these bits from inputs if we can 4399 // assume this from flags/attributes. 4400 InterestedClasses &= ~KnownNotFromFlags; 4401 4402 auto ClearClassesFromFlags = make_scope_exit([=, &Known] { 4403 Known.knownNot(KnownNotFromFlags); 4404 }); 4405 4406 if (!Op) 4407 return; 4408 4409 // All recursive calls that increase depth must come after this. 4410 if (Depth == MaxAnalysisRecursionDepth) 4411 return; 4412 4413 const unsigned Opc = Op->getOpcode(); 4414 switch (Opc) { 4415 case Instruction::FNeg: { 4416 computeKnownFPClass(Op->getOperand(0), DemandedElts, InterestedClasses, 4417 Known, Depth + 1, Q); 4418 Known.fneg(); 4419 break; 4420 } 4421 case Instruction::Select: { 4422 Value *Cond = Op->getOperand(0); 4423 Value *LHS = Op->getOperand(1); 4424 Value *RHS = Op->getOperand(2); 4425 4426 FPClassTest FilterLHS = fcAllFlags; 4427 FPClassTest FilterRHS = fcAllFlags; 4428 4429 Value *TestedValue = nullptr; 4430 FPClassTest TestedMask = fcNone; 4431 uint64_t ClassVal = 0; 4432 const Function *F = cast<Instruction>(Op)->getFunction(); 4433 CmpInst::Predicate Pred; 4434 Value *CmpLHS, *CmpRHS; 4435 if (F && match(Cond, m_FCmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS)))) { 4436 // If the select filters out a value based on the class, it no longer 4437 // participates in the class of the result 4438 4439 // TODO: In some degenerate cases we can infer something if we try again 4440 // without looking through sign operations. 4441 bool LookThroughFAbsFNeg = CmpLHS != LHS && CmpLHS != RHS; 4442 std::tie(TestedValue, TestedMask) = 4443 fcmpToClassTest(Pred, *F, CmpLHS, CmpRHS, LookThroughFAbsFNeg); 4444 } else if (match(Cond, 4445 m_Intrinsic<Intrinsic::is_fpclass>( 4446 m_Value(TestedValue), m_ConstantInt(ClassVal)))) { 4447 TestedMask = static_cast<FPClassTest>(ClassVal); 4448 } 4449 4450 if (TestedValue == LHS) { 4451 // match !isnan(x) ? x : y 4452 FilterLHS = TestedMask; 4453 } else if (TestedValue == RHS) { 4454 // match !isnan(x) ? y : x 4455 FilterRHS = ~TestedMask; 4456 } 4457 4458 KnownFPClass Known2; 4459 computeKnownFPClass(LHS, DemandedElts, InterestedClasses & FilterLHS, Known, 4460 Depth + 1, Q); 4461 Known.KnownFPClasses &= FilterLHS; 4462 4463 computeKnownFPClass(RHS, DemandedElts, InterestedClasses & FilterRHS, 4464 Known2, Depth + 1, Q); 4465 Known2.KnownFPClasses &= FilterRHS; 4466 4467 Known |= Known2; 4468 break; 4469 } 4470 case Instruction::Call: { 4471 const CallInst *II = cast<CallInst>(Op); 4472 const Intrinsic::ID IID = II->getIntrinsicID(); 4473 switch (IID) { 4474 case Intrinsic::fabs: { 4475 if ((InterestedClasses & (fcNan | fcPositive)) != fcNone) { 4476 // If we only care about the sign bit we don't need to inspect the 4477 // operand. 4478 computeKnownFPClass(II->getArgOperand(0), DemandedElts, 4479 InterestedClasses, Known, Depth + 1, Q); 4480 } 4481 4482 Known.fabs(); 4483 break; 4484 } 4485 case Intrinsic::copysign: { 4486 KnownFPClass KnownSign; 4487 4488 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, 4489 Known, Depth + 1, Q); 4490 computeKnownFPClass(II->getArgOperand(1), DemandedElts, InterestedClasses, 4491 KnownSign, Depth + 1, Q); 4492 Known.copysign(KnownSign); 4493 break; 4494 } 4495 case Intrinsic::fma: 4496 case Intrinsic::fmuladd: { 4497 if ((InterestedClasses & fcNegative) == fcNone) 4498 break; 4499 4500 if (II->getArgOperand(0) != II->getArgOperand(1)) 4501 break; 4502 4503 // The multiply cannot be -0 and therefore the add can't be -0 4504 Known.knownNot(fcNegZero); 4505 4506 // x * x + y is non-negative if y is non-negative. 4507 KnownFPClass KnownAddend; 4508 computeKnownFPClass(II->getArgOperand(2), DemandedElts, InterestedClasses, 4509 KnownAddend, Depth + 1, Q); 4510 4511 // TODO: Known sign bit with no nans 4512 if (KnownAddend.cannotBeOrderedLessThanZero()) 4513 Known.knownNot(fcNegative); 4514 break; 4515 } 4516 case Intrinsic::sqrt: 4517 case Intrinsic::experimental_constrained_sqrt: { 4518 KnownFPClass KnownSrc; 4519 FPClassTest InterestedSrcs = InterestedClasses; 4520 if (InterestedClasses & fcNan) 4521 InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask; 4522 4523 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedSrcs, 4524 KnownSrc, Depth + 1, Q); 4525 4526 if (KnownSrc.isKnownNeverPosInfinity()) 4527 Known.knownNot(fcPosInf); 4528 if (KnownSrc.isKnownNever(fcSNan)) 4529 Known.knownNot(fcSNan); 4530 4531 // Any negative value besides -0 returns a nan. 4532 if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero()) 4533 Known.knownNot(fcNan); 4534 4535 // The only negative value that can be returned is -0 for -0 inputs. 4536 Known.knownNot(fcNegInf | fcNegSubnormal | fcNegNormal); 4537 4538 // If the input denormal mode could be PreserveSign, a negative 4539 // subnormal input could produce a negative zero output. 4540 const Function *F = II->getFunction(); 4541 if (Q.IIQ.hasNoSignedZeros(II) || 4542 (F && KnownSrc.isKnownNeverLogicalNegZero(*F, II->getType()))) { 4543 Known.knownNot(fcNegZero); 4544 if (KnownSrc.isKnownNeverNaN()) 4545 Known.SignBit = false; 4546 } 4547 4548 break; 4549 } 4550 case Intrinsic::sin: 4551 case Intrinsic::cos: { 4552 // Return NaN on infinite inputs. 4553 KnownFPClass KnownSrc; 4554 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, 4555 KnownSrc, Depth + 1, Q); 4556 Known.knownNot(fcInf); 4557 if (KnownSrc.isKnownNeverNaN() && KnownSrc.isKnownNeverInfinity()) 4558 Known.knownNot(fcNan); 4559 break; 4560 } 4561 case Intrinsic::maxnum: 4562 case Intrinsic::minnum: 4563 case Intrinsic::minimum: 4564 case Intrinsic::maximum: { 4565 KnownFPClass KnownLHS, KnownRHS; 4566 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, 4567 KnownLHS, Depth + 1, Q); 4568 computeKnownFPClass(II->getArgOperand(1), DemandedElts, InterestedClasses, 4569 KnownRHS, Depth + 1, Q); 4570 4571 bool NeverNaN = KnownLHS.isKnownNeverNaN() || KnownRHS.isKnownNeverNaN(); 4572 Known = KnownLHS | KnownRHS; 4573 4574 // If either operand is not NaN, the result is not NaN. 4575 if (NeverNaN && (IID == Intrinsic::minnum || IID == Intrinsic::maxnum)) 4576 Known.knownNot(fcNan); 4577 4578 if (IID == Intrinsic::maxnum) { 4579 // If at least one operand is known to be positive, the result must be 4580 // positive. 4581 if ((KnownLHS.cannotBeOrderedLessThanZero() && 4582 KnownLHS.isKnownNeverNaN()) || 4583 (KnownRHS.cannotBeOrderedLessThanZero() && 4584 KnownRHS.isKnownNeverNaN())) 4585 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); 4586 } else if (IID == Intrinsic::maximum) { 4587 // If at least one operand is known to be positive, the result must be 4588 // positive. 4589 if (KnownLHS.cannotBeOrderedLessThanZero() || 4590 KnownRHS.cannotBeOrderedLessThanZero()) 4591 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); 4592 } else if (IID == Intrinsic::minnum) { 4593 // If at least one operand is known to be negative, the result must be 4594 // negative. 4595 if ((KnownLHS.cannotBeOrderedGreaterThanZero() && 4596 KnownLHS.isKnownNeverNaN()) || 4597 (KnownRHS.cannotBeOrderedGreaterThanZero() && 4598 KnownRHS.isKnownNeverNaN())) 4599 Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask); 4600 } else { 4601 // If at least one operand is known to be negative, the result must be 4602 // negative. 4603 if (KnownLHS.cannotBeOrderedGreaterThanZero() || 4604 KnownRHS.cannotBeOrderedGreaterThanZero()) 4605 Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask); 4606 } 4607 4608 // Fixup zero handling if denormals could be returned as a zero. 4609 // 4610 // As there's no spec for denormal flushing, be conservative with the 4611 // treatment of denormals that could be flushed to zero. For older 4612 // subtargets on AMDGPU the min/max instructions would not flush the 4613 // output and return the original value. 4614 // 4615 // TODO: This could be refined based on the sign 4616 if ((Known.KnownFPClasses & fcZero) != fcNone && 4617 !Known.isKnownNeverSubnormal()) { 4618 const Function *Parent = II->getFunction(); 4619 if (!Parent) 4620 break; 4621 4622 DenormalMode Mode = Parent->getDenormalMode( 4623 II->getType()->getScalarType()->getFltSemantics()); 4624 if (Mode != DenormalMode::getIEEE()) 4625 Known.KnownFPClasses |= fcZero; 4626 } 4627 4628 break; 4629 } 4630 case Intrinsic::canonicalize: { 4631 KnownFPClass KnownSrc; 4632 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, 4633 KnownSrc, Depth + 1, Q); 4634 4635 // This is essentially a stronger form of 4636 // propagateCanonicalizingSrc. Other "canonicalizing" operations don't 4637 // actually have an IR canonicalization guarantee. 4638 4639 // Canonicalize may flush denormals to zero, so we have to consider the 4640 // denormal mode to preserve known-not-0 knowledge. 4641 Known.KnownFPClasses = KnownSrc.KnownFPClasses | fcZero | fcQNan; 4642 4643 // Stronger version of propagateNaN 4644 // Canonicalize is guaranteed to quiet signaling nans. 4645 if (KnownSrc.isKnownNeverNaN()) 4646 Known.knownNot(fcNan); 4647 else 4648 Known.knownNot(fcSNan); 4649 4650 const Function *F = II->getFunction(); 4651 if (!F) 4652 break; 4653 4654 // If the parent function flushes denormals, the canonical output cannot 4655 // be a denormal. 4656 const fltSemantics &FPType = 4657 II->getType()->getScalarType()->getFltSemantics(); 4658 DenormalMode DenormMode = F->getDenormalMode(FPType); 4659 if (DenormMode == DenormalMode::getIEEE()) { 4660 if (KnownSrc.isKnownNever(fcPosZero)) 4661 Known.knownNot(fcPosZero); 4662 if (KnownSrc.isKnownNever(fcNegZero)) 4663 Known.knownNot(fcNegZero); 4664 break; 4665 } 4666 4667 if (DenormMode.inputsAreZero() || DenormMode.outputsAreZero()) 4668 Known.knownNot(fcSubnormal); 4669 4670 if (DenormMode.Input == DenormalMode::PositiveZero || 4671 (DenormMode.Output == DenormalMode::PositiveZero && 4672 DenormMode.Input == DenormalMode::IEEE)) 4673 Known.knownNot(fcNegZero); 4674 4675 break; 4676 } 4677 case Intrinsic::trunc: 4678 case Intrinsic::floor: 4679 case Intrinsic::ceil: 4680 case Intrinsic::rint: 4681 case Intrinsic::nearbyint: 4682 case Intrinsic::round: 4683 case Intrinsic::roundeven: { 4684 KnownFPClass KnownSrc; 4685 FPClassTest InterestedSrcs = InterestedClasses; 4686 if (InterestedSrcs & fcPosFinite) 4687 InterestedSrcs |= fcPosFinite; 4688 if (InterestedSrcs & fcNegFinite) 4689 InterestedSrcs |= fcNegFinite; 4690 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedSrcs, 4691 KnownSrc, Depth + 1, Q); 4692 4693 // Integer results cannot be subnormal. 4694 Known.knownNot(fcSubnormal); 4695 4696 Known.propagateNaN(KnownSrc, true); 4697 4698 // Pass through infinities, except PPC_FP128 is a special case for 4699 // intrinsics other than trunc. 4700 if (IID == Intrinsic::trunc || !V->getType()->isMultiUnitFPType()) { 4701 if (KnownSrc.isKnownNeverPosInfinity()) 4702 Known.knownNot(fcPosInf); 4703 if (KnownSrc.isKnownNeverNegInfinity()) 4704 Known.knownNot(fcNegInf); 4705 } 4706 4707 // Negative round ups to 0 produce -0 4708 if (KnownSrc.isKnownNever(fcPosFinite)) 4709 Known.knownNot(fcPosFinite); 4710 if (KnownSrc.isKnownNever(fcNegFinite)) 4711 Known.knownNot(fcNegFinite); 4712 4713 break; 4714 } 4715 case Intrinsic::exp: 4716 case Intrinsic::exp2: 4717 case Intrinsic::exp10: { 4718 Known.knownNot(fcNegative); 4719 if ((InterestedClasses & fcNan) == fcNone) 4720 break; 4721 4722 KnownFPClass KnownSrc; 4723 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, 4724 KnownSrc, Depth + 1, Q); 4725 if (KnownSrc.isKnownNeverNaN()) { 4726 Known.knownNot(fcNan); 4727 Known.SignBit = false; 4728 } 4729 4730 break; 4731 } 4732 case Intrinsic::fptrunc_round: { 4733 computeKnownFPClassForFPTrunc(Op, DemandedElts, InterestedClasses, Known, 4734 Depth, Q); 4735 break; 4736 } 4737 case Intrinsic::log: 4738 case Intrinsic::log10: 4739 case Intrinsic::log2: 4740 case Intrinsic::experimental_constrained_log: 4741 case Intrinsic::experimental_constrained_log10: 4742 case Intrinsic::experimental_constrained_log2: { 4743 // log(+inf) -> +inf 4744 // log([+-]0.0) -> -inf 4745 // log(-inf) -> nan 4746 // log(-x) -> nan 4747 if ((InterestedClasses & (fcNan | fcInf)) == fcNone) 4748 break; 4749 4750 FPClassTest InterestedSrcs = InterestedClasses; 4751 if ((InterestedClasses & fcNegInf) != fcNone) 4752 InterestedSrcs |= fcZero | fcSubnormal; 4753 if ((InterestedClasses & fcNan) != fcNone) 4754 InterestedSrcs |= fcNan | (fcNegative & ~fcNan); 4755 4756 KnownFPClass KnownSrc; 4757 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedSrcs, 4758 KnownSrc, Depth + 1, Q); 4759 4760 if (KnownSrc.isKnownNeverPosInfinity()) 4761 Known.knownNot(fcPosInf); 4762 4763 if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero()) 4764 Known.knownNot(fcNan); 4765 4766 const Function *F = II->getFunction(); 4767 if (F && KnownSrc.isKnownNeverLogicalZero(*F, II->getType())) 4768 Known.knownNot(fcNegInf); 4769 4770 break; 4771 } 4772 case Intrinsic::powi: { 4773 if ((InterestedClasses & fcNegative) == fcNone) 4774 break; 4775 4776 const Value *Exp = II->getArgOperand(1); 4777 Type *ExpTy = Exp->getType(); 4778 unsigned BitWidth = ExpTy->getScalarType()->getIntegerBitWidth(); 4779 KnownBits ExponentKnownBits(BitWidth); 4780 computeKnownBits(Exp, isa<VectorType>(ExpTy) ? DemandedElts : APInt(1, 1), 4781 ExponentKnownBits, Depth + 1, Q); 4782 4783 if (ExponentKnownBits.Zero[0]) { // Is even 4784 Known.knownNot(fcNegative); 4785 break; 4786 } 4787 4788 // Given that exp is an integer, here are the 4789 // ways that pow can return a negative value: 4790 // 4791 // pow(-x, exp) --> negative if exp is odd and x is negative. 4792 // pow(-0, exp) --> -inf if exp is negative odd. 4793 // pow(-0, exp) --> -0 if exp is positive odd. 4794 // pow(-inf, exp) --> -0 if exp is negative odd. 4795 // pow(-inf, exp) --> -inf if exp is positive odd. 4796 KnownFPClass KnownSrc; 4797 computeKnownFPClass(II->getArgOperand(0), DemandedElts, fcNegative, 4798 KnownSrc, Depth + 1, Q); 4799 if (KnownSrc.isKnownNever(fcNegative)) 4800 Known.knownNot(fcNegative); 4801 break; 4802 } 4803 case Intrinsic::ldexp: { 4804 KnownFPClass KnownSrc; 4805 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, 4806 KnownSrc, Depth + 1, Q); 4807 Known.propagateNaN(KnownSrc, /*PropagateSign=*/true); 4808 4809 // Sign is preserved, but underflows may produce zeroes. 4810 if (KnownSrc.isKnownNever(fcNegative)) 4811 Known.knownNot(fcNegative); 4812 else if (KnownSrc.cannotBeOrderedLessThanZero()) 4813 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); 4814 4815 if (KnownSrc.isKnownNever(fcPositive)) 4816 Known.knownNot(fcPositive); 4817 else if (KnownSrc.cannotBeOrderedGreaterThanZero()) 4818 Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask); 4819 4820 // Can refine inf/zero handling based on the exponent operand. 4821 const FPClassTest ExpInfoMask = fcZero | fcSubnormal | fcInf; 4822 if ((InterestedClasses & ExpInfoMask) == fcNone) 4823 break; 4824 if ((KnownSrc.KnownFPClasses & ExpInfoMask) == fcNone) 4825 break; 4826 4827 const fltSemantics &Flt = 4828 II->getType()->getScalarType()->getFltSemantics(); 4829 unsigned Precision = APFloat::semanticsPrecision(Flt); 4830 const Value *ExpArg = II->getArgOperand(1); 4831 ConstantRange ExpRange = computeConstantRange( 4832 ExpArg, true, Q.IIQ.UseInstrInfo, Q.AC, Q.CxtI, Q.DT, Depth + 1); 4833 4834 const int MantissaBits = Precision - 1; 4835 if (ExpRange.getSignedMin().sge(static_cast<int64_t>(MantissaBits))) 4836 Known.knownNot(fcSubnormal); 4837 4838 const Function *F = II->getFunction(); 4839 const APInt *ConstVal = ExpRange.getSingleElement(); 4840 if (ConstVal && ConstVal->isZero()) { 4841 // ldexp(x, 0) -> x, so propagate everything. 4842 Known.propagateCanonicalizingSrc(KnownSrc, *F, II->getType()); 4843 } else if (ExpRange.isAllNegative()) { 4844 // If we know the power is <= 0, can't introduce inf 4845 if (KnownSrc.isKnownNeverPosInfinity()) 4846 Known.knownNot(fcPosInf); 4847 if (KnownSrc.isKnownNeverNegInfinity()) 4848 Known.knownNot(fcNegInf); 4849 } else if (ExpRange.isAllNonNegative()) { 4850 // If we know the power is >= 0, can't introduce subnormal or zero 4851 if (KnownSrc.isKnownNeverPosSubnormal()) 4852 Known.knownNot(fcPosSubnormal); 4853 if (KnownSrc.isKnownNeverNegSubnormal()) 4854 Known.knownNot(fcNegSubnormal); 4855 if (F && KnownSrc.isKnownNeverLogicalPosZero(*F, II->getType())) 4856 Known.knownNot(fcPosZero); 4857 if (F && KnownSrc.isKnownNeverLogicalNegZero(*F, II->getType())) 4858 Known.knownNot(fcNegZero); 4859 } 4860 4861 break; 4862 } 4863 case Intrinsic::arithmetic_fence: { 4864 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, 4865 Known, Depth + 1, Q); 4866 break; 4867 } 4868 case Intrinsic::experimental_constrained_sitofp: 4869 case Intrinsic::experimental_constrained_uitofp: 4870 // Cannot produce nan 4871 Known.knownNot(fcNan); 4872 4873 // sitofp and uitofp turn into +0.0 for zero. 4874 Known.knownNot(fcNegZero); 4875 4876 // Integers cannot be subnormal 4877 Known.knownNot(fcSubnormal); 4878 4879 if (IID == Intrinsic::experimental_constrained_uitofp) 4880 Known.signBitMustBeZero(); 4881 4882 // TODO: Copy inf handling from instructions 4883 break; 4884 default: 4885 break; 4886 } 4887 4888 break; 4889 } 4890 case Instruction::FAdd: 4891 case Instruction::FSub: { 4892 KnownFPClass KnownLHS, KnownRHS; 4893 bool WantNegative = 4894 Op->getOpcode() == Instruction::FAdd && 4895 (InterestedClasses & KnownFPClass::OrderedLessThanZeroMask) != fcNone; 4896 bool WantNaN = (InterestedClasses & fcNan) != fcNone; 4897 bool WantNegZero = (InterestedClasses & fcNegZero) != fcNone; 4898 4899 if (!WantNaN && !WantNegative && !WantNegZero) 4900 break; 4901 4902 FPClassTest InterestedSrcs = InterestedClasses; 4903 if (WantNegative) 4904 InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask; 4905 if (InterestedClasses & fcNan) 4906 InterestedSrcs |= fcInf; 4907 computeKnownFPClass(Op->getOperand(1), DemandedElts, InterestedSrcs, 4908 KnownRHS, Depth + 1, Q); 4909 4910 if ((WantNaN && KnownRHS.isKnownNeverNaN()) || 4911 (WantNegative && KnownRHS.cannotBeOrderedLessThanZero()) || 4912 WantNegZero || Opc == Instruction::FSub) { 4913 4914 // RHS is canonically cheaper to compute. Skip inspecting the LHS if 4915 // there's no point. 4916 computeKnownFPClass(Op->getOperand(0), DemandedElts, InterestedSrcs, 4917 KnownLHS, Depth + 1, Q); 4918 // Adding positive and negative infinity produces NaN. 4919 // TODO: Check sign of infinities. 4920 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() && 4921 (KnownLHS.isKnownNeverInfinity() || KnownRHS.isKnownNeverInfinity())) 4922 Known.knownNot(fcNan); 4923 4924 // FIXME: Context function should always be passed in separately 4925 const Function *F = cast<Instruction>(Op)->getFunction(); 4926 4927 if (Op->getOpcode() == Instruction::FAdd) { 4928 if (KnownLHS.cannotBeOrderedLessThanZero() && 4929 KnownRHS.cannotBeOrderedLessThanZero()) 4930 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); 4931 if (!F) 4932 break; 4933 4934 // (fadd x, 0.0) is guaranteed to return +0.0, not -0.0. 4935 if ((KnownLHS.isKnownNeverLogicalNegZero(*F, Op->getType()) || 4936 KnownRHS.isKnownNeverLogicalNegZero(*F, Op->getType())) && 4937 // Make sure output negative denormal can't flush to -0 4938 outputDenormalIsIEEEOrPosZero(*F, Op->getType())) 4939 Known.knownNot(fcNegZero); 4940 } else { 4941 if (!F) 4942 break; 4943 4944 // Only fsub -0, +0 can return -0 4945 if ((KnownLHS.isKnownNeverLogicalNegZero(*F, Op->getType()) || 4946 KnownRHS.isKnownNeverLogicalPosZero(*F, Op->getType())) && 4947 // Make sure output negative denormal can't flush to -0 4948 outputDenormalIsIEEEOrPosZero(*F, Op->getType())) 4949 Known.knownNot(fcNegZero); 4950 } 4951 } 4952 4953 break; 4954 } 4955 case Instruction::FMul: { 4956 // X * X is always non-negative or a NaN. 4957 if (Op->getOperand(0) == Op->getOperand(1)) 4958 Known.knownNot(fcNegative); 4959 4960 if ((InterestedClasses & fcNan) != fcNan) 4961 break; 4962 4963 // fcSubnormal is only needed in case of DAZ. 4964 const FPClassTest NeedForNan = fcNan | fcInf | fcZero | fcSubnormal; 4965 4966 KnownFPClass KnownLHS, KnownRHS; 4967 computeKnownFPClass(Op->getOperand(1), DemandedElts, NeedForNan, KnownRHS, 4968 Depth + 1, Q); 4969 if (!KnownRHS.isKnownNeverNaN()) 4970 break; 4971 4972 computeKnownFPClass(Op->getOperand(0), DemandedElts, NeedForNan, KnownLHS, 4973 Depth + 1, Q); 4974 if (!KnownLHS.isKnownNeverNaN()) 4975 break; 4976 4977 // If 0 * +/-inf produces NaN. 4978 if (KnownLHS.isKnownNeverInfinity() && KnownRHS.isKnownNeverInfinity()) { 4979 Known.knownNot(fcNan); 4980 break; 4981 } 4982 4983 const Function *F = cast<Instruction>(Op)->getFunction(); 4984 if (!F) 4985 break; 4986 4987 if ((KnownRHS.isKnownNeverInfinity() || 4988 KnownLHS.isKnownNeverLogicalZero(*F, Op->getType())) && 4989 (KnownLHS.isKnownNeverInfinity() || 4990 KnownRHS.isKnownNeverLogicalZero(*F, Op->getType()))) 4991 Known.knownNot(fcNan); 4992 4993 break; 4994 } 4995 case Instruction::FDiv: 4996 case Instruction::FRem: { 4997 if (Op->getOperand(0) == Op->getOperand(1)) { 4998 // TODO: Could filter out snan if we inspect the operand 4999 if (Op->getOpcode() == Instruction::FDiv) { 5000 // X / X is always exactly 1.0 or a NaN. 5001 Known.KnownFPClasses = fcNan | fcPosNormal; 5002 } else { 5003 // X % X is always exactly [+-]0.0 or a NaN. 5004 Known.KnownFPClasses = fcNan | fcZero; 5005 } 5006 5007 break; 5008 } 5009 5010 const bool WantNan = (InterestedClasses & fcNan) != fcNone; 5011 const bool WantNegative = (InterestedClasses & fcNegative) != fcNone; 5012 const bool WantPositive = 5013 Opc == Instruction::FRem && (InterestedClasses & fcPositive) != fcNone; 5014 if (!WantNan && !WantNegative && !WantPositive) 5015 break; 5016 5017 KnownFPClass KnownLHS, KnownRHS; 5018 5019 computeKnownFPClass(Op->getOperand(1), DemandedElts, 5020 fcNan | fcInf | fcZero | fcNegative, KnownRHS, 5021 Depth + 1, Q); 5022 5023 bool KnowSomethingUseful = 5024 KnownRHS.isKnownNeverNaN() || KnownRHS.isKnownNever(fcNegative); 5025 5026 if (KnowSomethingUseful || WantPositive) { 5027 const FPClassTest InterestedLHS = 5028 WantPositive ? fcAllFlags 5029 : fcNan | fcInf | fcZero | fcSubnormal | fcNegative; 5030 5031 computeKnownFPClass(Op->getOperand(0), DemandedElts, 5032 InterestedClasses & InterestedLHS, KnownLHS, 5033 Depth + 1, Q); 5034 } 5035 5036 const Function *F = cast<Instruction>(Op)->getFunction(); 5037 5038 if (Op->getOpcode() == Instruction::FDiv) { 5039 // Only 0/0, Inf/Inf produce NaN. 5040 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() && 5041 (KnownLHS.isKnownNeverInfinity() || 5042 KnownRHS.isKnownNeverInfinity()) && 5043 ((F && KnownLHS.isKnownNeverLogicalZero(*F, Op->getType())) || 5044 (F && KnownRHS.isKnownNeverLogicalZero(*F, Op->getType())))) { 5045 Known.knownNot(fcNan); 5046 } 5047 5048 // X / -0.0 is -Inf (or NaN). 5049 // +X / +X is +X 5050 if (KnownLHS.isKnownNever(fcNegative) && KnownRHS.isKnownNever(fcNegative)) 5051 Known.knownNot(fcNegative); 5052 } else { 5053 // Inf REM x and x REM 0 produce NaN. 5054 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() && 5055 KnownLHS.isKnownNeverInfinity() && F && 5056 KnownRHS.isKnownNeverLogicalZero(*F, Op->getType())) { 5057 Known.knownNot(fcNan); 5058 } 5059 5060 // The sign for frem is the same as the first operand. 5061 if (KnownLHS.cannotBeOrderedLessThanZero()) 5062 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); 5063 if (KnownLHS.cannotBeOrderedGreaterThanZero()) 5064 Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask); 5065 5066 // See if we can be more aggressive about the sign of 0. 5067 if (KnownLHS.isKnownNever(fcNegative)) 5068 Known.knownNot(fcNegative); 5069 if (KnownLHS.isKnownNever(fcPositive)) 5070 Known.knownNot(fcPositive); 5071 } 5072 5073 break; 5074 } 5075 case Instruction::FPExt: { 5076 // Infinity, nan and zero propagate from source. 5077 computeKnownFPClass(Op->getOperand(0), DemandedElts, InterestedClasses, 5078 Known, Depth + 1, Q); 5079 5080 const fltSemantics &DstTy = 5081 Op->getType()->getScalarType()->getFltSemantics(); 5082 const fltSemantics &SrcTy = 5083 Op->getOperand(0)->getType()->getScalarType()->getFltSemantics(); 5084 5085 // All subnormal inputs should be in the normal range in the result type. 5086 if (APFloat::isRepresentableAsNormalIn(SrcTy, DstTy)) { 5087 if (Known.KnownFPClasses & fcPosSubnormal) 5088 Known.KnownFPClasses |= fcPosNormal; 5089 if (Known.KnownFPClasses & fcNegSubnormal) 5090 Known.KnownFPClasses |= fcNegNormal; 5091 Known.knownNot(fcSubnormal); 5092 } 5093 5094 // Sign bit of a nan isn't guaranteed. 5095 if (!Known.isKnownNeverNaN()) 5096 Known.SignBit = std::nullopt; 5097 break; 5098 } 5099 case Instruction::FPTrunc: { 5100 computeKnownFPClassForFPTrunc(Op, DemandedElts, InterestedClasses, Known, 5101 Depth, Q); 5102 break; 5103 } 5104 case Instruction::SIToFP: 5105 case Instruction::UIToFP: { 5106 // Cannot produce nan 5107 Known.knownNot(fcNan); 5108 5109 // Integers cannot be subnormal 5110 Known.knownNot(fcSubnormal); 5111 5112 // sitofp and uitofp turn into +0.0 for zero. 5113 Known.knownNot(fcNegZero); 5114 if (Op->getOpcode() == Instruction::UIToFP) 5115 Known.signBitMustBeZero(); 5116 5117 if (InterestedClasses & fcInf) { 5118 // Get width of largest magnitude integer (remove a bit if signed). 5119 // This still works for a signed minimum value because the largest FP 5120 // value is scaled by some fraction close to 2.0 (1.0 + 0.xxxx). 5121 int IntSize = Op->getOperand(0)->getType()->getScalarSizeInBits(); 5122 if (Op->getOpcode() == Instruction::SIToFP) 5123 --IntSize; 5124 5125 // If the exponent of the largest finite FP value can hold the largest 5126 // integer, the result of the cast must be finite. 5127 Type *FPTy = Op->getType()->getScalarType(); 5128 if (ilogb(APFloat::getLargest(FPTy->getFltSemantics())) >= IntSize) 5129 Known.knownNot(fcInf); 5130 } 5131 5132 break; 5133 } 5134 case Instruction::ExtractElement: { 5135 // Look through extract element. If the index is non-constant or 5136 // out-of-range demand all elements, otherwise just the extracted element. 5137 const Value *Vec = Op->getOperand(0); 5138 const Value *Idx = Op->getOperand(1); 5139 auto *CIdx = dyn_cast<ConstantInt>(Idx); 5140 5141 if (auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType())) { 5142 unsigned NumElts = VecTy->getNumElements(); 5143 APInt DemandedVecElts = APInt::getAllOnes(NumElts); 5144 if (CIdx && CIdx->getValue().ult(NumElts)) 5145 DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue()); 5146 return computeKnownFPClass(Vec, DemandedVecElts, InterestedClasses, Known, 5147 Depth + 1, Q); 5148 } 5149 5150 break; 5151 } 5152 case Instruction::InsertElement: { 5153 if (isa<ScalableVectorType>(Op->getType())) 5154 return; 5155 5156 const Value *Vec = Op->getOperand(0); 5157 const Value *Elt = Op->getOperand(1); 5158 auto *CIdx = dyn_cast<ConstantInt>(Op->getOperand(2)); 5159 // Early out if the index is non-constant or out-of-range. 5160 unsigned NumElts = DemandedElts.getBitWidth(); 5161 if (!CIdx || CIdx->getValue().uge(NumElts)) 5162 return; 5163 5164 unsigned EltIdx = CIdx->getZExtValue(); 5165 // Do we demand the inserted element? 5166 if (DemandedElts[EltIdx]) { 5167 computeKnownFPClass(Elt, Known, InterestedClasses, Depth + 1, Q); 5168 // If we don't know any bits, early out. 5169 if (Known.isUnknown()) 5170 break; 5171 } else { 5172 Known.KnownFPClasses = fcNone; 5173 } 5174 5175 // We don't need the base vector element that has been inserted. 5176 APInt DemandedVecElts = DemandedElts; 5177 DemandedVecElts.clearBit(EltIdx); 5178 if (!!DemandedVecElts) { 5179 KnownFPClass Known2; 5180 computeKnownFPClass(Vec, DemandedVecElts, InterestedClasses, Known2, 5181 Depth + 1, Q); 5182 Known |= Known2; 5183 } 5184 5185 break; 5186 } 5187 case Instruction::ShuffleVector: { 5188 // For undef elements, we don't know anything about the common state of 5189 // the shuffle result. 5190 APInt DemandedLHS, DemandedRHS; 5191 auto *Shuf = dyn_cast<ShuffleVectorInst>(Op); 5192 if (!Shuf || !getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS)) 5193 return; 5194 5195 if (!!DemandedLHS) { 5196 const Value *LHS = Shuf->getOperand(0); 5197 computeKnownFPClass(LHS, DemandedLHS, InterestedClasses, Known, 5198 Depth + 1, Q); 5199 5200 // If we don't know any bits, early out. 5201 if (Known.isUnknown()) 5202 break; 5203 } else { 5204 Known.KnownFPClasses = fcNone; 5205 } 5206 5207 if (!!DemandedRHS) { 5208 KnownFPClass Known2; 5209 const Value *RHS = Shuf->getOperand(1); 5210 computeKnownFPClass(RHS, DemandedRHS, InterestedClasses, Known2, 5211 Depth + 1, Q); 5212 Known |= Known2; 5213 } 5214 5215 break; 5216 } 5217 case Instruction::ExtractValue: { 5218 const ExtractValueInst *Extract = cast<ExtractValueInst>(Op); 5219 ArrayRef<unsigned> Indices = Extract->getIndices(); 5220 const Value *Src = Extract->getAggregateOperand(); 5221 if (isa<StructType>(Src->getType()) && Indices.size() == 1 && 5222 Indices[0] == 0) { 5223 if (const auto *II = dyn_cast<IntrinsicInst>(Src)) { 5224 switch (II->getIntrinsicID()) { 5225 case Intrinsic::frexp: { 5226 Known.knownNot(fcSubnormal); 5227 5228 KnownFPClass KnownSrc; 5229 computeKnownFPClass(II->getArgOperand(0), DemandedElts, 5230 InterestedClasses, KnownSrc, Depth + 1, Q); 5231 5232 const Function *F = cast<Instruction>(Op)->getFunction(); 5233 5234 if (KnownSrc.isKnownNever(fcNegative)) 5235 Known.knownNot(fcNegative); 5236 else { 5237 if (F && KnownSrc.isKnownNeverLogicalNegZero(*F, Op->getType())) 5238 Known.knownNot(fcNegZero); 5239 if (KnownSrc.isKnownNever(fcNegInf)) 5240 Known.knownNot(fcNegInf); 5241 } 5242 5243 if (KnownSrc.isKnownNever(fcPositive)) 5244 Known.knownNot(fcPositive); 5245 else { 5246 if (F && KnownSrc.isKnownNeverLogicalPosZero(*F, Op->getType())) 5247 Known.knownNot(fcPosZero); 5248 if (KnownSrc.isKnownNever(fcPosInf)) 5249 Known.knownNot(fcPosInf); 5250 } 5251 5252 Known.propagateNaN(KnownSrc); 5253 return; 5254 } 5255 default: 5256 break; 5257 } 5258 } 5259 } 5260 5261 computeKnownFPClass(Src, DemandedElts, InterestedClasses, Known, Depth + 1, 5262 Q); 5263 break; 5264 } 5265 case Instruction::PHI: { 5266 const PHINode *P = cast<PHINode>(Op); 5267 // Unreachable blocks may have zero-operand PHI nodes. 5268 if (P->getNumIncomingValues() == 0) 5269 break; 5270 5271 // Otherwise take the unions of the known bit sets of the operands, 5272 // taking conservative care to avoid excessive recursion. 5273 const unsigned PhiRecursionLimit = MaxAnalysisRecursionDepth - 2; 5274 5275 if (Depth < PhiRecursionLimit) { 5276 // Skip if every incoming value references to ourself. 5277 if (isa_and_nonnull<UndefValue>(P->hasConstantValue())) 5278 break; 5279 5280 bool First = true; 5281 5282 for (Value *IncValue : P->incoming_values()) { 5283 // Skip direct self references. 5284 if (IncValue == P) 5285 continue; 5286 5287 KnownFPClass KnownSrc; 5288 // Recurse, but cap the recursion to two levels, because we don't want 5289 // to waste time spinning around in loops. We need at least depth 2 to 5290 // detect known sign bits. 5291 computeKnownFPClass(IncValue, DemandedElts, InterestedClasses, KnownSrc, 5292 PhiRecursionLimit, Q); 5293 5294 if (First) { 5295 Known = KnownSrc; 5296 First = false; 5297 } else { 5298 Known |= KnownSrc; 5299 } 5300 5301 if (Known.KnownFPClasses == fcAllFlags) 5302 break; 5303 } 5304 } 5305 5306 break; 5307 } 5308 default: 5309 break; 5310 } 5311 } 5312 5313 KnownFPClass llvm::computeKnownFPClass( 5314 const Value *V, const APInt &DemandedElts, const DataLayout &DL, 5315 FPClassTest InterestedClasses, unsigned Depth, const TargetLibraryInfo *TLI, 5316 AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, 5317 bool UseInstrInfo) { 5318 KnownFPClass KnownClasses; 5319 ::computeKnownFPClass( 5320 V, DemandedElts, InterestedClasses, KnownClasses, Depth, 5321 SimplifyQuery(DL, TLI, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); 5322 return KnownClasses; 5323 } 5324 5325 KnownFPClass llvm::computeKnownFPClass( 5326 const Value *V, const DataLayout &DL, FPClassTest InterestedClasses, 5327 unsigned Depth, const TargetLibraryInfo *TLI, AssumptionCache *AC, 5328 const Instruction *CxtI, const DominatorTree *DT, bool UseInstrInfo) { 5329 KnownFPClass Known; 5330 ::computeKnownFPClass( 5331 V, Known, InterestedClasses, Depth, 5332 SimplifyQuery(DL, TLI, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); 5333 return Known; 5334 } 5335 5336 Value *llvm::isBytewiseValue(Value *V, const DataLayout &DL) { 5337 5338 // All byte-wide stores are splatable, even of arbitrary variables. 5339 if (V->getType()->isIntegerTy(8)) 5340 return V; 5341 5342 LLVMContext &Ctx = V->getContext(); 5343 5344 // Undef don't care. 5345 auto *UndefInt8 = UndefValue::get(Type::getInt8Ty(Ctx)); 5346 if (isa<UndefValue>(V)) 5347 return UndefInt8; 5348 5349 // Return Undef for zero-sized type. 5350 if (DL.getTypeStoreSize(V->getType()).isZero()) 5351 return UndefInt8; 5352 5353 Constant *C = dyn_cast<Constant>(V); 5354 if (!C) { 5355 // Conceptually, we could handle things like: 5356 // %a = zext i8 %X to i16 5357 // %b = shl i16 %a, 8 5358 // %c = or i16 %a, %b 5359 // but until there is an example that actually needs this, it doesn't seem 5360 // worth worrying about. 5361 return nullptr; 5362 } 5363 5364 // Handle 'null' ConstantArrayZero etc. 5365 if (C->isNullValue()) 5366 return Constant::getNullValue(Type::getInt8Ty(Ctx)); 5367 5368 // Constant floating-point values can be handled as integer values if the 5369 // corresponding integer value is "byteable". An important case is 0.0. 5370 if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) { 5371 Type *Ty = nullptr; 5372 if (CFP->getType()->isHalfTy()) 5373 Ty = Type::getInt16Ty(Ctx); 5374 else if (CFP->getType()->isFloatTy()) 5375 Ty = Type::getInt32Ty(Ctx); 5376 else if (CFP->getType()->isDoubleTy()) 5377 Ty = Type::getInt64Ty(Ctx); 5378 // Don't handle long double formats, which have strange constraints. 5379 return Ty ? isBytewiseValue(ConstantExpr::getBitCast(CFP, Ty), DL) 5380 : nullptr; 5381 } 5382 5383 // We can handle constant integers that are multiple of 8 bits. 5384 if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) { 5385 if (CI->getBitWidth() % 8 == 0) { 5386 assert(CI->getBitWidth() > 8 && "8 bits should be handled above!"); 5387 if (!CI->getValue().isSplat(8)) 5388 return nullptr; 5389 return ConstantInt::get(Ctx, CI->getValue().trunc(8)); 5390 } 5391 } 5392 5393 if (auto *CE = dyn_cast<ConstantExpr>(C)) { 5394 if (CE->getOpcode() == Instruction::IntToPtr) { 5395 if (auto *PtrTy = dyn_cast<PointerType>(CE->getType())) { 5396 unsigned BitWidth = DL.getPointerSizeInBits(PtrTy->getAddressSpace()); 5397 if (Constant *Op = ConstantFoldIntegerCast( 5398 CE->getOperand(0), Type::getIntNTy(Ctx, BitWidth), false, DL)) 5399 return isBytewiseValue(Op, DL); 5400 } 5401 } 5402 } 5403 5404 auto Merge = [&](Value *LHS, Value *RHS) -> Value * { 5405 if (LHS == RHS) 5406 return LHS; 5407 if (!LHS || !RHS) 5408 return nullptr; 5409 if (LHS == UndefInt8) 5410 return RHS; 5411 if (RHS == UndefInt8) 5412 return LHS; 5413 return nullptr; 5414 }; 5415 5416 if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(C)) { 5417 Value *Val = UndefInt8; 5418 for (unsigned I = 0, E = CA->getNumElements(); I != E; ++I) 5419 if (!(Val = Merge(Val, isBytewiseValue(CA->getElementAsConstant(I), DL)))) 5420 return nullptr; 5421 return Val; 5422 } 5423 5424 if (isa<ConstantAggregate>(C)) { 5425 Value *Val = UndefInt8; 5426 for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I) 5427 if (!(Val = Merge(Val, isBytewiseValue(C->getOperand(I), DL)))) 5428 return nullptr; 5429 return Val; 5430 } 5431 5432 // Don't try to handle the handful of other constants. 5433 return nullptr; 5434 } 5435 5436 // This is the recursive version of BuildSubAggregate. It takes a few different 5437 // arguments. Idxs is the index within the nested struct From that we are 5438 // looking at now (which is of type IndexedType). IdxSkip is the number of 5439 // indices from Idxs that should be left out when inserting into the resulting 5440 // struct. To is the result struct built so far, new insertvalue instructions 5441 // build on that. 5442 static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType, 5443 SmallVectorImpl<unsigned> &Idxs, 5444 unsigned IdxSkip, 5445 Instruction *InsertBefore) { 5446 StructType *STy = dyn_cast<StructType>(IndexedType); 5447 if (STy) { 5448 // Save the original To argument so we can modify it 5449 Value *OrigTo = To; 5450 // General case, the type indexed by Idxs is a struct 5451 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 5452 // Process each struct element recursively 5453 Idxs.push_back(i); 5454 Value *PrevTo = To; 5455 To = BuildSubAggregate(From, To, STy->getElementType(i), Idxs, IdxSkip, 5456 InsertBefore); 5457 Idxs.pop_back(); 5458 if (!To) { 5459 // Couldn't find any inserted value for this index? Cleanup 5460 while (PrevTo != OrigTo) { 5461 InsertValueInst* Del = cast<InsertValueInst>(PrevTo); 5462 PrevTo = Del->getAggregateOperand(); 5463 Del->eraseFromParent(); 5464 } 5465 // Stop processing elements 5466 break; 5467 } 5468 } 5469 // If we successfully found a value for each of our subaggregates 5470 if (To) 5471 return To; 5472 } 5473 // Base case, the type indexed by SourceIdxs is not a struct, or not all of 5474 // the struct's elements had a value that was inserted directly. In the latter 5475 // case, perhaps we can't determine each of the subelements individually, but 5476 // we might be able to find the complete struct somewhere. 5477 5478 // Find the value that is at that particular spot 5479 Value *V = FindInsertedValue(From, Idxs); 5480 5481 if (!V) 5482 return nullptr; 5483 5484 // Insert the value in the new (sub) aggregate 5485 return InsertValueInst::Create(To, V, ArrayRef(Idxs).slice(IdxSkip), "tmp", 5486 InsertBefore); 5487 } 5488 5489 // This helper takes a nested struct and extracts a part of it (which is again a 5490 // struct) into a new value. For example, given the struct: 5491 // { a, { b, { c, d }, e } } 5492 // and the indices "1, 1" this returns 5493 // { c, d }. 5494 // 5495 // It does this by inserting an insertvalue for each element in the resulting 5496 // struct, as opposed to just inserting a single struct. This will only work if 5497 // each of the elements of the substruct are known (ie, inserted into From by an 5498 // insertvalue instruction somewhere). 5499 // 5500 // All inserted insertvalue instructions are inserted before InsertBefore 5501 static Value *BuildSubAggregate(Value *From, ArrayRef<unsigned> idx_range, 5502 Instruction *InsertBefore) { 5503 assert(InsertBefore && "Must have someplace to insert!"); 5504 Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(), 5505 idx_range); 5506 Value *To = PoisonValue::get(IndexedType); 5507 SmallVector<unsigned, 10> Idxs(idx_range.begin(), idx_range.end()); 5508 unsigned IdxSkip = Idxs.size(); 5509 5510 return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore); 5511 } 5512 5513 /// Given an aggregate and a sequence of indices, see if the scalar value 5514 /// indexed is already around as a register, for example if it was inserted 5515 /// directly into the aggregate. 5516 /// 5517 /// If InsertBefore is not null, this function will duplicate (modified) 5518 /// insertvalues when a part of a nested struct is extracted. 5519 Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range, 5520 Instruction *InsertBefore) { 5521 // Nothing to index? Just return V then (this is useful at the end of our 5522 // recursion). 5523 if (idx_range.empty()) 5524 return V; 5525 // We have indices, so V should have an indexable type. 5526 assert((V->getType()->isStructTy() || V->getType()->isArrayTy()) && 5527 "Not looking at a struct or array?"); 5528 assert(ExtractValueInst::getIndexedType(V->getType(), idx_range) && 5529 "Invalid indices for type?"); 5530 5531 if (Constant *C = dyn_cast<Constant>(V)) { 5532 C = C->getAggregateElement(idx_range[0]); 5533 if (!C) return nullptr; 5534 return FindInsertedValue(C, idx_range.slice(1), InsertBefore); 5535 } 5536 5537 if (InsertValueInst *I = dyn_cast<InsertValueInst>(V)) { 5538 // Loop the indices for the insertvalue instruction in parallel with the 5539 // requested indices 5540 const unsigned *req_idx = idx_range.begin(); 5541 for (const unsigned *i = I->idx_begin(), *e = I->idx_end(); 5542 i != e; ++i, ++req_idx) { 5543 if (req_idx == idx_range.end()) { 5544 // We can't handle this without inserting insertvalues 5545 if (!InsertBefore) 5546 return nullptr; 5547 5548 // The requested index identifies a part of a nested aggregate. Handle 5549 // this specially. For example, 5550 // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0 5551 // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1 5552 // %C = extractvalue {i32, { i32, i32 } } %B, 1 5553 // This can be changed into 5554 // %A = insertvalue {i32, i32 } undef, i32 10, 0 5555 // %C = insertvalue {i32, i32 } %A, i32 11, 1 5556 // which allows the unused 0,0 element from the nested struct to be 5557 // removed. 5558 return BuildSubAggregate(V, ArrayRef(idx_range.begin(), req_idx), 5559 InsertBefore); 5560 } 5561 5562 // This insert value inserts something else than what we are looking for. 5563 // See if the (aggregate) value inserted into has the value we are 5564 // looking for, then. 5565 if (*req_idx != *i) 5566 return FindInsertedValue(I->getAggregateOperand(), idx_range, 5567 InsertBefore); 5568 } 5569 // If we end up here, the indices of the insertvalue match with those 5570 // requested (though possibly only partially). Now we recursively look at 5571 // the inserted value, passing any remaining indices. 5572 return FindInsertedValue(I->getInsertedValueOperand(), 5573 ArrayRef(req_idx, idx_range.end()), InsertBefore); 5574 } 5575 5576 if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) { 5577 // If we're extracting a value from an aggregate that was extracted from 5578 // something else, we can extract from that something else directly instead. 5579 // However, we will need to chain I's indices with the requested indices. 5580 5581 // Calculate the number of indices required 5582 unsigned size = I->getNumIndices() + idx_range.size(); 5583 // Allocate some space to put the new indices in 5584 SmallVector<unsigned, 5> Idxs; 5585 Idxs.reserve(size); 5586 // Add indices from the extract value instruction 5587 Idxs.append(I->idx_begin(), I->idx_end()); 5588 5589 // Add requested indices 5590 Idxs.append(idx_range.begin(), idx_range.end()); 5591 5592 assert(Idxs.size() == size 5593 && "Number of indices added not correct?"); 5594 5595 return FindInsertedValue(I->getAggregateOperand(), Idxs, InsertBefore); 5596 } 5597 // Otherwise, we don't know (such as, extracting from a function return value 5598 // or load instruction) 5599 return nullptr; 5600 } 5601 5602 bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP, 5603 unsigned CharSize) { 5604 // Make sure the GEP has exactly three arguments. 5605 if (GEP->getNumOperands() != 3) 5606 return false; 5607 5608 // Make sure the index-ee is a pointer to array of \p CharSize integers. 5609 // CharSize. 5610 ArrayType *AT = dyn_cast<ArrayType>(GEP->getSourceElementType()); 5611 if (!AT || !AT->getElementType()->isIntegerTy(CharSize)) 5612 return false; 5613 5614 // Check to make sure that the first operand of the GEP is an integer and 5615 // has value 0 so that we are sure we're indexing into the initializer. 5616 const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1)); 5617 if (!FirstIdx || !FirstIdx->isZero()) 5618 return false; 5619 5620 return true; 5621 } 5622 5623 // If V refers to an initialized global constant, set Slice either to 5624 // its initializer if the size of its elements equals ElementSize, or, 5625 // for ElementSize == 8, to its representation as an array of unsiged 5626 // char. Return true on success. 5627 // Offset is in the unit "nr of ElementSize sized elements". 5628 bool llvm::getConstantDataArrayInfo(const Value *V, 5629 ConstantDataArraySlice &Slice, 5630 unsigned ElementSize, uint64_t Offset) { 5631 assert(V && "V should not be null."); 5632 assert((ElementSize % 8) == 0 && 5633 "ElementSize expected to be a multiple of the size of a byte."); 5634 unsigned ElementSizeInBytes = ElementSize / 8; 5635 5636 // Drill down into the pointer expression V, ignoring any intervening 5637 // casts, and determine the identity of the object it references along 5638 // with the cumulative byte offset into it. 5639 const GlobalVariable *GV = 5640 dyn_cast<GlobalVariable>(getUnderlyingObject(V)); 5641 if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) 5642 // Fail if V is not based on constant global object. 5643 return false; 5644 5645 const DataLayout &DL = GV->getParent()->getDataLayout(); 5646 APInt Off(DL.getIndexTypeSizeInBits(V->getType()), 0); 5647 5648 if (GV != V->stripAndAccumulateConstantOffsets(DL, Off, 5649 /*AllowNonInbounds*/ true)) 5650 // Fail if a constant offset could not be determined. 5651 return false; 5652 5653 uint64_t StartIdx = Off.getLimitedValue(); 5654 if (StartIdx == UINT64_MAX) 5655 // Fail if the constant offset is excessive. 5656 return false; 5657 5658 // Off/StartIdx is in the unit of bytes. So we need to convert to number of 5659 // elements. Simply bail out if that isn't possible. 5660 if ((StartIdx % ElementSizeInBytes) != 0) 5661 return false; 5662 5663 Offset += StartIdx / ElementSizeInBytes; 5664 ConstantDataArray *Array = nullptr; 5665 ArrayType *ArrayTy = nullptr; 5666 5667 if (GV->getInitializer()->isNullValue()) { 5668 Type *GVTy = GV->getValueType(); 5669 uint64_t SizeInBytes = DL.getTypeStoreSize(GVTy).getFixedValue(); 5670 uint64_t Length = SizeInBytes / ElementSizeInBytes; 5671 5672 Slice.Array = nullptr; 5673 Slice.Offset = 0; 5674 // Return an empty Slice for undersized constants to let callers 5675 // transform even undefined library calls into simpler, well-defined 5676 // expressions. This is preferable to making the calls although it 5677 // prevents sanitizers from detecting such calls. 5678 Slice.Length = Length < Offset ? 0 : Length - Offset; 5679 return true; 5680 } 5681 5682 auto *Init = const_cast<Constant *>(GV->getInitializer()); 5683 if (auto *ArrayInit = dyn_cast<ConstantDataArray>(Init)) { 5684 Type *InitElTy = ArrayInit->getElementType(); 5685 if (InitElTy->isIntegerTy(ElementSize)) { 5686 // If Init is an initializer for an array of the expected type 5687 // and size, use it as is. 5688 Array = ArrayInit; 5689 ArrayTy = ArrayInit->getType(); 5690 } 5691 } 5692 5693 if (!Array) { 5694 if (ElementSize != 8) 5695 // TODO: Handle conversions to larger integral types. 5696 return false; 5697 5698 // Otherwise extract the portion of the initializer starting 5699 // at Offset as an array of bytes, and reset Offset. 5700 Init = ReadByteArrayFromGlobal(GV, Offset); 5701 if (!Init) 5702 return false; 5703 5704 Offset = 0; 5705 Array = dyn_cast<ConstantDataArray>(Init); 5706 ArrayTy = dyn_cast<ArrayType>(Init->getType()); 5707 } 5708 5709 uint64_t NumElts = ArrayTy->getArrayNumElements(); 5710 if (Offset > NumElts) 5711 return false; 5712 5713 Slice.Array = Array; 5714 Slice.Offset = Offset; 5715 Slice.Length = NumElts - Offset; 5716 return true; 5717 } 5718 5719 /// Extract bytes from the initializer of the constant array V, which need 5720 /// not be a nul-terminated string. On success, store the bytes in Str and 5721 /// return true. When TrimAtNul is set, Str will contain only the bytes up 5722 /// to but not including the first nul. Return false on failure. 5723 bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, 5724 bool TrimAtNul) { 5725 ConstantDataArraySlice Slice; 5726 if (!getConstantDataArrayInfo(V, Slice, 8)) 5727 return false; 5728 5729 if (Slice.Array == nullptr) { 5730 if (TrimAtNul) { 5731 // Return a nul-terminated string even for an empty Slice. This is 5732 // safe because all existing SimplifyLibcalls callers require string 5733 // arguments and the behavior of the functions they fold is undefined 5734 // otherwise. Folding the calls this way is preferable to making 5735 // the undefined library calls, even though it prevents sanitizers 5736 // from reporting such calls. 5737 Str = StringRef(); 5738 return true; 5739 } 5740 if (Slice.Length == 1) { 5741 Str = StringRef("", 1); 5742 return true; 5743 } 5744 // We cannot instantiate a StringRef as we do not have an appropriate string 5745 // of 0s at hand. 5746 return false; 5747 } 5748 5749 // Start out with the entire array in the StringRef. 5750 Str = Slice.Array->getAsString(); 5751 // Skip over 'offset' bytes. 5752 Str = Str.substr(Slice.Offset); 5753 5754 if (TrimAtNul) { 5755 // Trim off the \0 and anything after it. If the array is not nul 5756 // terminated, we just return the whole end of string. The client may know 5757 // some other way that the string is length-bound. 5758 Str = Str.substr(0, Str.find('\0')); 5759 } 5760 return true; 5761 } 5762 5763 // These next two are very similar to the above, but also look through PHI 5764 // nodes. 5765 // TODO: See if we can integrate these two together. 5766 5767 /// If we can compute the length of the string pointed to by 5768 /// the specified pointer, return 'len+1'. If we can't, return 0. 5769 static uint64_t GetStringLengthH(const Value *V, 5770 SmallPtrSetImpl<const PHINode*> &PHIs, 5771 unsigned CharSize) { 5772 // Look through noop bitcast instructions. 5773 V = V->stripPointerCasts(); 5774 5775 // If this is a PHI node, there are two cases: either we have already seen it 5776 // or we haven't. 5777 if (const PHINode *PN = dyn_cast<PHINode>(V)) { 5778 if (!PHIs.insert(PN).second) 5779 return ~0ULL; // already in the set. 5780 5781 // If it was new, see if all the input strings are the same length. 5782 uint64_t LenSoFar = ~0ULL; 5783 for (Value *IncValue : PN->incoming_values()) { 5784 uint64_t Len = GetStringLengthH(IncValue, PHIs, CharSize); 5785 if (Len == 0) return 0; // Unknown length -> unknown. 5786 5787 if (Len == ~0ULL) continue; 5788 5789 if (Len != LenSoFar && LenSoFar != ~0ULL) 5790 return 0; // Disagree -> unknown. 5791 LenSoFar = Len; 5792 } 5793 5794 // Success, all agree. 5795 return LenSoFar; 5796 } 5797 5798 // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y) 5799 if (const SelectInst *SI = dyn_cast<SelectInst>(V)) { 5800 uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs, CharSize); 5801 if (Len1 == 0) return 0; 5802 uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs, CharSize); 5803 if (Len2 == 0) return 0; 5804 if (Len1 == ~0ULL) return Len2; 5805 if (Len2 == ~0ULL) return Len1; 5806 if (Len1 != Len2) return 0; 5807 return Len1; 5808 } 5809 5810 // Otherwise, see if we can read the string. 5811 ConstantDataArraySlice Slice; 5812 if (!getConstantDataArrayInfo(V, Slice, CharSize)) 5813 return 0; 5814 5815 if (Slice.Array == nullptr) 5816 // Zeroinitializer (including an empty one). 5817 return 1; 5818 5819 // Search for the first nul character. Return a conservative result even 5820 // when there is no nul. This is safe since otherwise the string function 5821 // being folded such as strlen is undefined, and can be preferable to 5822 // making the undefined library call. 5823 unsigned NullIndex = 0; 5824 for (unsigned E = Slice.Length; NullIndex < E; ++NullIndex) { 5825 if (Slice.Array->getElementAsInteger(Slice.Offset + NullIndex) == 0) 5826 break; 5827 } 5828 5829 return NullIndex + 1; 5830 } 5831 5832 /// If we can compute the length of the string pointed to by 5833 /// the specified pointer, return 'len+1'. If we can't, return 0. 5834 uint64_t llvm::GetStringLength(const Value *V, unsigned CharSize) { 5835 if (!V->getType()->isPointerTy()) 5836 return 0; 5837 5838 SmallPtrSet<const PHINode*, 32> PHIs; 5839 uint64_t Len = GetStringLengthH(V, PHIs, CharSize); 5840 // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return 5841 // an empty string as a length. 5842 return Len == ~0ULL ? 1 : Len; 5843 } 5844 5845 const Value * 5846 llvm::getArgumentAliasingToReturnedPointer(const CallBase *Call, 5847 bool MustPreserveNullness) { 5848 assert(Call && 5849 "getArgumentAliasingToReturnedPointer only works on nonnull calls"); 5850 if (const Value *RV = Call->getReturnedArgOperand()) 5851 return RV; 5852 // This can be used only as a aliasing property. 5853 if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( 5854 Call, MustPreserveNullness)) 5855 return Call->getArgOperand(0); 5856 return nullptr; 5857 } 5858 5859 bool llvm::isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( 5860 const CallBase *Call, bool MustPreserveNullness) { 5861 switch (Call->getIntrinsicID()) { 5862 case Intrinsic::launder_invariant_group: 5863 case Intrinsic::strip_invariant_group: 5864 case Intrinsic::aarch64_irg: 5865 case Intrinsic::aarch64_tagp: 5866 // The amdgcn_make_buffer_rsrc function does not alter the address of the 5867 // input pointer (and thus preserve null-ness for the purposes of escape 5868 // analysis, which is where the MustPreserveNullness flag comes in to play). 5869 // However, it will not necessarily map ptr addrspace(N) null to ptr 5870 // addrspace(8) null, aka the "null descriptor", which has "all loads return 5871 // 0, all stores are dropped" semantics. Given the context of this intrinsic 5872 // list, no one should be relying on such a strict interpretation of 5873 // MustPreserveNullness (and, at time of writing, they are not), but we 5874 // document this fact out of an abundance of caution. 5875 case Intrinsic::amdgcn_make_buffer_rsrc: 5876 return true; 5877 case Intrinsic::ptrmask: 5878 return !MustPreserveNullness; 5879 default: 5880 return false; 5881 } 5882 } 5883 5884 /// \p PN defines a loop-variant pointer to an object. Check if the 5885 /// previous iteration of the loop was referring to the same object as \p PN. 5886 static bool isSameUnderlyingObjectInLoop(const PHINode *PN, 5887 const LoopInfo *LI) { 5888 // Find the loop-defined value. 5889 Loop *L = LI->getLoopFor(PN->getParent()); 5890 if (PN->getNumIncomingValues() != 2) 5891 return true; 5892 5893 // Find the value from previous iteration. 5894 auto *PrevValue = dyn_cast<Instruction>(PN->getIncomingValue(0)); 5895 if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L) 5896 PrevValue = dyn_cast<Instruction>(PN->getIncomingValue(1)); 5897 if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L) 5898 return true; 5899 5900 // If a new pointer is loaded in the loop, the pointer references a different 5901 // object in every iteration. E.g.: 5902 // for (i) 5903 // int *p = a[i]; 5904 // ... 5905 if (auto *Load = dyn_cast<LoadInst>(PrevValue)) 5906 if (!L->isLoopInvariant(Load->getPointerOperand())) 5907 return false; 5908 return true; 5909 } 5910 5911 const Value *llvm::getUnderlyingObject(const Value *V, unsigned MaxLookup) { 5912 if (!V->getType()->isPointerTy()) 5913 return V; 5914 for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) { 5915 if (auto *GEP = dyn_cast<GEPOperator>(V)) { 5916 V = GEP->getPointerOperand(); 5917 } else if (Operator::getOpcode(V) == Instruction::BitCast || 5918 Operator::getOpcode(V) == Instruction::AddrSpaceCast) { 5919 V = cast<Operator>(V)->getOperand(0); 5920 if (!V->getType()->isPointerTy()) 5921 return V; 5922 } else if (auto *GA = dyn_cast<GlobalAlias>(V)) { 5923 if (GA->isInterposable()) 5924 return V; 5925 V = GA->getAliasee(); 5926 } else { 5927 if (auto *PHI = dyn_cast<PHINode>(V)) { 5928 // Look through single-arg phi nodes created by LCSSA. 5929 if (PHI->getNumIncomingValues() == 1) { 5930 V = PHI->getIncomingValue(0); 5931 continue; 5932 } 5933 } else if (auto *Call = dyn_cast<CallBase>(V)) { 5934 // CaptureTracking can know about special capturing properties of some 5935 // intrinsics like launder.invariant.group, that can't be expressed with 5936 // the attributes, but have properties like returning aliasing pointer. 5937 // Because some analysis may assume that nocaptured pointer is not 5938 // returned from some special intrinsic (because function would have to 5939 // be marked with returns attribute), it is crucial to use this function 5940 // because it should be in sync with CaptureTracking. Not using it may 5941 // cause weird miscompilations where 2 aliasing pointers are assumed to 5942 // noalias. 5943 if (auto *RP = getArgumentAliasingToReturnedPointer(Call, false)) { 5944 V = RP; 5945 continue; 5946 } 5947 } 5948 5949 return V; 5950 } 5951 assert(V->getType()->isPointerTy() && "Unexpected operand type!"); 5952 } 5953 return V; 5954 } 5955 5956 void llvm::getUnderlyingObjects(const Value *V, 5957 SmallVectorImpl<const Value *> &Objects, 5958 LoopInfo *LI, unsigned MaxLookup) { 5959 SmallPtrSet<const Value *, 4> Visited; 5960 SmallVector<const Value *, 4> Worklist; 5961 Worklist.push_back(V); 5962 do { 5963 const Value *P = Worklist.pop_back_val(); 5964 P = getUnderlyingObject(P, MaxLookup); 5965 5966 if (!Visited.insert(P).second) 5967 continue; 5968 5969 if (auto *SI = dyn_cast<SelectInst>(P)) { 5970 Worklist.push_back(SI->getTrueValue()); 5971 Worklist.push_back(SI->getFalseValue()); 5972 continue; 5973 } 5974 5975 if (auto *PN = dyn_cast<PHINode>(P)) { 5976 // If this PHI changes the underlying object in every iteration of the 5977 // loop, don't look through it. Consider: 5978 // int **A; 5979 // for (i) { 5980 // Prev = Curr; // Prev = PHI (Prev_0, Curr) 5981 // Curr = A[i]; 5982 // *Prev, *Curr; 5983 // 5984 // Prev is tracking Curr one iteration behind so they refer to different 5985 // underlying objects. 5986 if (!LI || !LI->isLoopHeader(PN->getParent()) || 5987 isSameUnderlyingObjectInLoop(PN, LI)) 5988 append_range(Worklist, PN->incoming_values()); 5989 else 5990 Objects.push_back(P); 5991 continue; 5992 } 5993 5994 Objects.push_back(P); 5995 } while (!Worklist.empty()); 5996 } 5997 5998 /// This is the function that does the work of looking through basic 5999 /// ptrtoint+arithmetic+inttoptr sequences. 6000 static const Value *getUnderlyingObjectFromInt(const Value *V) { 6001 do { 6002 if (const Operator *U = dyn_cast<Operator>(V)) { 6003 // If we find a ptrtoint, we can transfer control back to the 6004 // regular getUnderlyingObjectFromInt. 6005 if (U->getOpcode() == Instruction::PtrToInt) 6006 return U->getOperand(0); 6007 // If we find an add of a constant, a multiplied value, or a phi, it's 6008 // likely that the other operand will lead us to the base 6009 // object. We don't have to worry about the case where the 6010 // object address is somehow being computed by the multiply, 6011 // because our callers only care when the result is an 6012 // identifiable object. 6013 if (U->getOpcode() != Instruction::Add || 6014 (!isa<ConstantInt>(U->getOperand(1)) && 6015 Operator::getOpcode(U->getOperand(1)) != Instruction::Mul && 6016 !isa<PHINode>(U->getOperand(1)))) 6017 return V; 6018 V = U->getOperand(0); 6019 } else { 6020 return V; 6021 } 6022 assert(V->getType()->isIntegerTy() && "Unexpected operand type!"); 6023 } while (true); 6024 } 6025 6026 /// This is a wrapper around getUnderlyingObjects and adds support for basic 6027 /// ptrtoint+arithmetic+inttoptr sequences. 6028 /// It returns false if unidentified object is found in getUnderlyingObjects. 6029 bool llvm::getUnderlyingObjectsForCodeGen(const Value *V, 6030 SmallVectorImpl<Value *> &Objects) { 6031 SmallPtrSet<const Value *, 16> Visited; 6032 SmallVector<const Value *, 4> Working(1, V); 6033 do { 6034 V = Working.pop_back_val(); 6035 6036 SmallVector<const Value *, 4> Objs; 6037 getUnderlyingObjects(V, Objs); 6038 6039 for (const Value *V : Objs) { 6040 if (!Visited.insert(V).second) 6041 continue; 6042 if (Operator::getOpcode(V) == Instruction::IntToPtr) { 6043 const Value *O = 6044 getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0)); 6045 if (O->getType()->isPointerTy()) { 6046 Working.push_back(O); 6047 continue; 6048 } 6049 } 6050 // If getUnderlyingObjects fails to find an identifiable object, 6051 // getUnderlyingObjectsForCodeGen also fails for safety. 6052 if (!isIdentifiedObject(V)) { 6053 Objects.clear(); 6054 return false; 6055 } 6056 Objects.push_back(const_cast<Value *>(V)); 6057 } 6058 } while (!Working.empty()); 6059 return true; 6060 } 6061 6062 AllocaInst *llvm::findAllocaForValue(Value *V, bool OffsetZero) { 6063 AllocaInst *Result = nullptr; 6064 SmallPtrSet<Value *, 4> Visited; 6065 SmallVector<Value *, 4> Worklist; 6066 6067 auto AddWork = [&](Value *V) { 6068 if (Visited.insert(V).second) 6069 Worklist.push_back(V); 6070 }; 6071 6072 AddWork(V); 6073 do { 6074 V = Worklist.pop_back_val(); 6075 assert(Visited.count(V)); 6076 6077 if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { 6078 if (Result && Result != AI) 6079 return nullptr; 6080 Result = AI; 6081 } else if (CastInst *CI = dyn_cast<CastInst>(V)) { 6082 AddWork(CI->getOperand(0)); 6083 } else if (PHINode *PN = dyn_cast<PHINode>(V)) { 6084 for (Value *IncValue : PN->incoming_values()) 6085 AddWork(IncValue); 6086 } else if (auto *SI = dyn_cast<SelectInst>(V)) { 6087 AddWork(SI->getTrueValue()); 6088 AddWork(SI->getFalseValue()); 6089 } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) { 6090 if (OffsetZero && !GEP->hasAllZeroIndices()) 6091 return nullptr; 6092 AddWork(GEP->getPointerOperand()); 6093 } else if (CallBase *CB = dyn_cast<CallBase>(V)) { 6094 Value *Returned = CB->getReturnedArgOperand(); 6095 if (Returned) 6096 AddWork(Returned); 6097 else 6098 return nullptr; 6099 } else { 6100 return nullptr; 6101 } 6102 } while (!Worklist.empty()); 6103 6104 return Result; 6105 } 6106 6107 static bool onlyUsedByLifetimeMarkersOrDroppableInstsHelper( 6108 const Value *V, bool AllowLifetime, bool AllowDroppable) { 6109 for (const User *U : V->users()) { 6110 const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U); 6111 if (!II) 6112 return false; 6113 6114 if (AllowLifetime && II->isLifetimeStartOrEnd()) 6115 continue; 6116 6117 if (AllowDroppable && II->isDroppable()) 6118 continue; 6119 6120 return false; 6121 } 6122 return true; 6123 } 6124 6125 bool llvm::onlyUsedByLifetimeMarkers(const Value *V) { 6126 return onlyUsedByLifetimeMarkersOrDroppableInstsHelper( 6127 V, /* AllowLifetime */ true, /* AllowDroppable */ false); 6128 } 6129 bool llvm::onlyUsedByLifetimeMarkersOrDroppableInsts(const Value *V) { 6130 return onlyUsedByLifetimeMarkersOrDroppableInstsHelper( 6131 V, /* AllowLifetime */ true, /* AllowDroppable */ true); 6132 } 6133 6134 bool llvm::mustSuppressSpeculation(const LoadInst &LI) { 6135 if (!LI.isUnordered()) 6136 return true; 6137 const Function &F = *LI.getFunction(); 6138 // Speculative load may create a race that did not exist in the source. 6139 return F.hasFnAttribute(Attribute::SanitizeThread) || 6140 // Speculative load may load data from dirty regions. 6141 F.hasFnAttribute(Attribute::SanitizeAddress) || 6142 F.hasFnAttribute(Attribute::SanitizeHWAddress); 6143 } 6144 6145 bool llvm::isSafeToSpeculativelyExecute(const Instruction *Inst, 6146 const Instruction *CtxI, 6147 AssumptionCache *AC, 6148 const DominatorTree *DT, 6149 const TargetLibraryInfo *TLI) { 6150 return isSafeToSpeculativelyExecuteWithOpcode(Inst->getOpcode(), Inst, CtxI, 6151 AC, DT, TLI); 6152 } 6153 6154 bool llvm::isSafeToSpeculativelyExecuteWithOpcode( 6155 unsigned Opcode, const Instruction *Inst, const Instruction *CtxI, 6156 AssumptionCache *AC, const DominatorTree *DT, 6157 const TargetLibraryInfo *TLI) { 6158 #ifndef NDEBUG 6159 if (Inst->getOpcode() != Opcode) { 6160 // Check that the operands are actually compatible with the Opcode override. 6161 auto hasEqualReturnAndLeadingOperandTypes = 6162 [](const Instruction *Inst, unsigned NumLeadingOperands) { 6163 if (Inst->getNumOperands() < NumLeadingOperands) 6164 return false; 6165 const Type *ExpectedType = Inst->getType(); 6166 for (unsigned ItOp = 0; ItOp < NumLeadingOperands; ++ItOp) 6167 if (Inst->getOperand(ItOp)->getType() != ExpectedType) 6168 return false; 6169 return true; 6170 }; 6171 assert(!Instruction::isBinaryOp(Opcode) || 6172 hasEqualReturnAndLeadingOperandTypes(Inst, 2)); 6173 assert(!Instruction::isUnaryOp(Opcode) || 6174 hasEqualReturnAndLeadingOperandTypes(Inst, 1)); 6175 } 6176 #endif 6177 6178 switch (Opcode) { 6179 default: 6180 return true; 6181 case Instruction::UDiv: 6182 case Instruction::URem: { 6183 // x / y is undefined if y == 0. 6184 const APInt *V; 6185 if (match(Inst->getOperand(1), m_APInt(V))) 6186 return *V != 0; 6187 return false; 6188 } 6189 case Instruction::SDiv: 6190 case Instruction::SRem: { 6191 // x / y is undefined if y == 0 or x == INT_MIN and y == -1 6192 const APInt *Numerator, *Denominator; 6193 if (!match(Inst->getOperand(1), m_APInt(Denominator))) 6194 return false; 6195 // We cannot hoist this division if the denominator is 0. 6196 if (*Denominator == 0) 6197 return false; 6198 // It's safe to hoist if the denominator is not 0 or -1. 6199 if (!Denominator->isAllOnes()) 6200 return true; 6201 // At this point we know that the denominator is -1. It is safe to hoist as 6202 // long we know that the numerator is not INT_MIN. 6203 if (match(Inst->getOperand(0), m_APInt(Numerator))) 6204 return !Numerator->isMinSignedValue(); 6205 // The numerator *might* be MinSignedValue. 6206 return false; 6207 } 6208 case Instruction::Load: { 6209 const LoadInst *LI = dyn_cast<LoadInst>(Inst); 6210 if (!LI) 6211 return false; 6212 if (mustSuppressSpeculation(*LI)) 6213 return false; 6214 const DataLayout &DL = LI->getModule()->getDataLayout(); 6215 return isDereferenceableAndAlignedPointer(LI->getPointerOperand(), 6216 LI->getType(), LI->getAlign(), DL, 6217 CtxI, AC, DT, TLI); 6218 } 6219 case Instruction::Call: { 6220 auto *CI = dyn_cast<const CallInst>(Inst); 6221 if (!CI) 6222 return false; 6223 const Function *Callee = CI->getCalledFunction(); 6224 6225 // The called function could have undefined behavior or side-effects, even 6226 // if marked readnone nounwind. 6227 return Callee && Callee->isSpeculatable(); 6228 } 6229 case Instruction::VAArg: 6230 case Instruction::Alloca: 6231 case Instruction::Invoke: 6232 case Instruction::CallBr: 6233 case Instruction::PHI: 6234 case Instruction::Store: 6235 case Instruction::Ret: 6236 case Instruction::Br: 6237 case Instruction::IndirectBr: 6238 case Instruction::Switch: 6239 case Instruction::Unreachable: 6240 case Instruction::Fence: 6241 case Instruction::AtomicRMW: 6242 case Instruction::AtomicCmpXchg: 6243 case Instruction::LandingPad: 6244 case Instruction::Resume: 6245 case Instruction::CatchSwitch: 6246 case Instruction::CatchPad: 6247 case Instruction::CatchRet: 6248 case Instruction::CleanupPad: 6249 case Instruction::CleanupRet: 6250 return false; // Misc instructions which have effects 6251 } 6252 } 6253 6254 bool llvm::mayHaveNonDefUseDependency(const Instruction &I) { 6255 if (I.mayReadOrWriteMemory()) 6256 // Memory dependency possible 6257 return true; 6258 if (!isSafeToSpeculativelyExecute(&I)) 6259 // Can't move above a maythrow call or infinite loop. Or if an 6260 // inalloca alloca, above a stacksave call. 6261 return true; 6262 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 6263 // 1) Can't reorder two inf-loop calls, even if readonly 6264 // 2) Also can't reorder an inf-loop call below a instruction which isn't 6265 // safe to speculative execute. (Inverse of above) 6266 return true; 6267 return false; 6268 } 6269 6270 /// Convert ConstantRange OverflowResult into ValueTracking OverflowResult. 6271 static OverflowResult mapOverflowResult(ConstantRange::OverflowResult OR) { 6272 switch (OR) { 6273 case ConstantRange::OverflowResult::MayOverflow: 6274 return OverflowResult::MayOverflow; 6275 case ConstantRange::OverflowResult::AlwaysOverflowsLow: 6276 return OverflowResult::AlwaysOverflowsLow; 6277 case ConstantRange::OverflowResult::AlwaysOverflowsHigh: 6278 return OverflowResult::AlwaysOverflowsHigh; 6279 case ConstantRange::OverflowResult::NeverOverflows: 6280 return OverflowResult::NeverOverflows; 6281 } 6282 llvm_unreachable("Unknown OverflowResult"); 6283 } 6284 6285 /// Combine constant ranges from computeConstantRange() and computeKnownBits(). 6286 ConstantRange 6287 llvm::computeConstantRangeIncludingKnownBits(const WithCache<const Value *> &V, 6288 bool ForSigned, 6289 const SimplifyQuery &SQ) { 6290 ConstantRange CR1 = 6291 ConstantRange::fromKnownBits(V.getKnownBits(SQ), ForSigned); 6292 ConstantRange CR2 = computeConstantRange(V, ForSigned, SQ.IIQ.UseInstrInfo); 6293 ConstantRange::PreferredRangeType RangeType = 6294 ForSigned ? ConstantRange::Signed : ConstantRange::Unsigned; 6295 return CR1.intersectWith(CR2, RangeType); 6296 } 6297 6298 OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS, 6299 const Value *RHS, 6300 const SimplifyQuery &SQ) { 6301 KnownBits LHSKnown = computeKnownBits(LHS, /*Depth=*/0, SQ); 6302 KnownBits RHSKnown = computeKnownBits(RHS, /*Depth=*/0, SQ); 6303 ConstantRange LHSRange = ConstantRange::fromKnownBits(LHSKnown, false); 6304 ConstantRange RHSRange = ConstantRange::fromKnownBits(RHSKnown, false); 6305 return mapOverflowResult(LHSRange.unsignedMulMayOverflow(RHSRange)); 6306 } 6307 6308 OverflowResult llvm::computeOverflowForSignedMul(const Value *LHS, 6309 const Value *RHS, 6310 const SimplifyQuery &SQ) { 6311 // Multiplying n * m significant bits yields a result of n + m significant 6312 // bits. If the total number of significant bits does not exceed the 6313 // result bit width (minus 1), there is no overflow. 6314 // This means if we have enough leading sign bits in the operands 6315 // we can guarantee that the result does not overflow. 6316 // Ref: "Hacker's Delight" by Henry Warren 6317 unsigned BitWidth = LHS->getType()->getScalarSizeInBits(); 6318 6319 // Note that underestimating the number of sign bits gives a more 6320 // conservative answer. 6321 unsigned SignBits = 6322 ::ComputeNumSignBits(LHS, 0, SQ) + ::ComputeNumSignBits(RHS, 0, SQ); 6323 6324 // First handle the easy case: if we have enough sign bits there's 6325 // definitely no overflow. 6326 if (SignBits > BitWidth + 1) 6327 return OverflowResult::NeverOverflows; 6328 6329 // There are two ambiguous cases where there can be no overflow: 6330 // SignBits == BitWidth + 1 and 6331 // SignBits == BitWidth 6332 // The second case is difficult to check, therefore we only handle the 6333 // first case. 6334 if (SignBits == BitWidth + 1) { 6335 // It overflows only when both arguments are negative and the true 6336 // product is exactly the minimum negative number. 6337 // E.g. mul i16 with 17 sign bits: 0xff00 * 0xff80 = 0x8000 6338 // For simplicity we just check if at least one side is not negative. 6339 KnownBits LHSKnown = computeKnownBits(LHS, /*Depth=*/0, SQ); 6340 KnownBits RHSKnown = computeKnownBits(RHS, /*Depth=*/0, SQ); 6341 if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative()) 6342 return OverflowResult::NeverOverflows; 6343 } 6344 return OverflowResult::MayOverflow; 6345 } 6346 6347 OverflowResult 6348 llvm::computeOverflowForUnsignedAdd(const WithCache<const Value *> &LHS, 6349 const WithCache<const Value *> &RHS, 6350 const SimplifyQuery &SQ) { 6351 ConstantRange LHSRange = 6352 computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/false, SQ); 6353 ConstantRange RHSRange = 6354 computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/false, SQ); 6355 return mapOverflowResult(LHSRange.unsignedAddMayOverflow(RHSRange)); 6356 } 6357 6358 static OverflowResult 6359 computeOverflowForSignedAdd(const WithCache<const Value *> &LHS, 6360 const WithCache<const Value *> &RHS, 6361 const AddOperator *Add, const SimplifyQuery &SQ) { 6362 if (Add && Add->hasNoSignedWrap()) { 6363 return OverflowResult::NeverOverflows; 6364 } 6365 6366 // If LHS and RHS each have at least two sign bits, the addition will look 6367 // like 6368 // 6369 // XX..... + 6370 // YY..... 6371 // 6372 // If the carry into the most significant position is 0, X and Y can't both 6373 // be 1 and therefore the carry out of the addition is also 0. 6374 // 6375 // If the carry into the most significant position is 1, X and Y can't both 6376 // be 0 and therefore the carry out of the addition is also 1. 6377 // 6378 // Since the carry into the most significant position is always equal to 6379 // the carry out of the addition, there is no signed overflow. 6380 if (::ComputeNumSignBits(LHS, 0, SQ) > 1 && 6381 ::ComputeNumSignBits(RHS, 0, SQ) > 1) 6382 return OverflowResult::NeverOverflows; 6383 6384 ConstantRange LHSRange = 6385 computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/true, SQ); 6386 ConstantRange RHSRange = 6387 computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/true, SQ); 6388 OverflowResult OR = 6389 mapOverflowResult(LHSRange.signedAddMayOverflow(RHSRange)); 6390 if (OR != OverflowResult::MayOverflow) 6391 return OR; 6392 6393 // The remaining code needs Add to be available. Early returns if not so. 6394 if (!Add) 6395 return OverflowResult::MayOverflow; 6396 6397 // If the sign of Add is the same as at least one of the operands, this add 6398 // CANNOT overflow. If this can be determined from the known bits of the 6399 // operands the above signedAddMayOverflow() check will have already done so. 6400 // The only other way to improve on the known bits is from an assumption, so 6401 // call computeKnownBitsFromContext() directly. 6402 bool LHSOrRHSKnownNonNegative = 6403 (LHSRange.isAllNonNegative() || RHSRange.isAllNonNegative()); 6404 bool LHSOrRHSKnownNegative = 6405 (LHSRange.isAllNegative() || RHSRange.isAllNegative()); 6406 if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) { 6407 KnownBits AddKnown(LHSRange.getBitWidth()); 6408 computeKnownBitsFromContext(Add, AddKnown, /*Depth=*/0, SQ); 6409 if ((AddKnown.isNonNegative() && LHSOrRHSKnownNonNegative) || 6410 (AddKnown.isNegative() && LHSOrRHSKnownNegative)) 6411 return OverflowResult::NeverOverflows; 6412 } 6413 6414 return OverflowResult::MayOverflow; 6415 } 6416 6417 OverflowResult llvm::computeOverflowForUnsignedSub(const Value *LHS, 6418 const Value *RHS, 6419 const SimplifyQuery &SQ) { 6420 // X - (X % ?) 6421 // The remainder of a value can't have greater magnitude than itself, 6422 // so the subtraction can't overflow. 6423 6424 // X - (X -nuw ?) 6425 // In the minimal case, this would simplify to "?", so there's no subtract 6426 // at all. But if this analysis is used to peek through casts, for example, 6427 // then determining no-overflow may allow other transforms. 6428 6429 // TODO: There are other patterns like this. 6430 // See simplifyICmpWithBinOpOnLHS() for candidates. 6431 if (match(RHS, m_URem(m_Specific(LHS), m_Value())) || 6432 match(RHS, m_NUWSub(m_Specific(LHS), m_Value()))) 6433 if (isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT)) 6434 return OverflowResult::NeverOverflows; 6435 6436 // Checking for conditions implied by dominating conditions may be expensive. 6437 // Limit it to usub_with_overflow calls for now. 6438 if (match(SQ.CxtI, 6439 m_Intrinsic<Intrinsic::usub_with_overflow>(m_Value(), m_Value()))) 6440 if (auto C = isImpliedByDomCondition(CmpInst::ICMP_UGE, LHS, RHS, SQ.CxtI, 6441 SQ.DL)) { 6442 if (*C) 6443 return OverflowResult::NeverOverflows; 6444 return OverflowResult::AlwaysOverflowsLow; 6445 } 6446 ConstantRange LHSRange = 6447 computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/false, SQ); 6448 ConstantRange RHSRange = 6449 computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/false, SQ); 6450 return mapOverflowResult(LHSRange.unsignedSubMayOverflow(RHSRange)); 6451 } 6452 6453 OverflowResult llvm::computeOverflowForSignedSub(const Value *LHS, 6454 const Value *RHS, 6455 const SimplifyQuery &SQ) { 6456 // X - (X % ?) 6457 // The remainder of a value can't have greater magnitude than itself, 6458 // so the subtraction can't overflow. 6459 6460 // X - (X -nsw ?) 6461 // In the minimal case, this would simplify to "?", so there's no subtract 6462 // at all. But if this analysis is used to peek through casts, for example, 6463 // then determining no-overflow may allow other transforms. 6464 if (match(RHS, m_SRem(m_Specific(LHS), m_Value())) || 6465 match(RHS, m_NSWSub(m_Specific(LHS), m_Value()))) 6466 if (isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT)) 6467 return OverflowResult::NeverOverflows; 6468 6469 // If LHS and RHS each have at least two sign bits, the subtraction 6470 // cannot overflow. 6471 if (::ComputeNumSignBits(LHS, 0, SQ) > 1 && 6472 ::ComputeNumSignBits(RHS, 0, SQ) > 1) 6473 return OverflowResult::NeverOverflows; 6474 6475 ConstantRange LHSRange = 6476 computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/true, SQ); 6477 ConstantRange RHSRange = 6478 computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/true, SQ); 6479 return mapOverflowResult(LHSRange.signedSubMayOverflow(RHSRange)); 6480 } 6481 6482 bool llvm::isOverflowIntrinsicNoWrap(const WithOverflowInst *WO, 6483 const DominatorTree &DT) { 6484 SmallVector<const BranchInst *, 2> GuardingBranches; 6485 SmallVector<const ExtractValueInst *, 2> Results; 6486 6487 for (const User *U : WO->users()) { 6488 if (const auto *EVI = dyn_cast<ExtractValueInst>(U)) { 6489 assert(EVI->getNumIndices() == 1 && "Obvious from CI's type"); 6490 6491 if (EVI->getIndices()[0] == 0) 6492 Results.push_back(EVI); 6493 else { 6494 assert(EVI->getIndices()[0] == 1 && "Obvious from CI's type"); 6495 6496 for (const auto *U : EVI->users()) 6497 if (const auto *B = dyn_cast<BranchInst>(U)) { 6498 assert(B->isConditional() && "How else is it using an i1?"); 6499 GuardingBranches.push_back(B); 6500 } 6501 } 6502 } else { 6503 // We are using the aggregate directly in a way we don't want to analyze 6504 // here (storing it to a global, say). 6505 return false; 6506 } 6507 } 6508 6509 auto AllUsesGuardedByBranch = [&](const BranchInst *BI) { 6510 BasicBlockEdge NoWrapEdge(BI->getParent(), BI->getSuccessor(1)); 6511 if (!NoWrapEdge.isSingleEdge()) 6512 return false; 6513 6514 // Check if all users of the add are provably no-wrap. 6515 for (const auto *Result : Results) { 6516 // If the extractvalue itself is not executed on overflow, the we don't 6517 // need to check each use separately, since domination is transitive. 6518 if (DT.dominates(NoWrapEdge, Result->getParent())) 6519 continue; 6520 6521 for (const auto &RU : Result->uses()) 6522 if (!DT.dominates(NoWrapEdge, RU)) 6523 return false; 6524 } 6525 6526 return true; 6527 }; 6528 6529 return llvm::any_of(GuardingBranches, AllUsesGuardedByBranch); 6530 } 6531 6532 /// Shifts return poison if shiftwidth is larger than the bitwidth. 6533 static bool shiftAmountKnownInRange(const Value *ShiftAmount) { 6534 auto *C = dyn_cast<Constant>(ShiftAmount); 6535 if (!C) 6536 return false; 6537 6538 // Shifts return poison if shiftwidth is larger than the bitwidth. 6539 SmallVector<const Constant *, 4> ShiftAmounts; 6540 if (auto *FVTy = dyn_cast<FixedVectorType>(C->getType())) { 6541 unsigned NumElts = FVTy->getNumElements(); 6542 for (unsigned i = 0; i < NumElts; ++i) 6543 ShiftAmounts.push_back(C->getAggregateElement(i)); 6544 } else if (isa<ScalableVectorType>(C->getType())) 6545 return false; // Can't tell, just return false to be safe 6546 else 6547 ShiftAmounts.push_back(C); 6548 6549 bool Safe = llvm::all_of(ShiftAmounts, [](const Constant *C) { 6550 auto *CI = dyn_cast_or_null<ConstantInt>(C); 6551 return CI && CI->getValue().ult(C->getType()->getIntegerBitWidth()); 6552 }); 6553 6554 return Safe; 6555 } 6556 6557 enum class UndefPoisonKind { 6558 PoisonOnly = (1 << 0), 6559 UndefOnly = (1 << 1), 6560 UndefOrPoison = PoisonOnly | UndefOnly, 6561 }; 6562 6563 static bool includesPoison(UndefPoisonKind Kind) { 6564 return (unsigned(Kind) & unsigned(UndefPoisonKind::PoisonOnly)) != 0; 6565 } 6566 6567 static bool includesUndef(UndefPoisonKind Kind) { 6568 return (unsigned(Kind) & unsigned(UndefPoisonKind::UndefOnly)) != 0; 6569 } 6570 6571 static bool canCreateUndefOrPoison(const Operator *Op, UndefPoisonKind Kind, 6572 bool ConsiderFlagsAndMetadata) { 6573 6574 if (ConsiderFlagsAndMetadata && includesPoison(Kind) && 6575 Op->hasPoisonGeneratingFlagsOrMetadata()) 6576 return true; 6577 6578 unsigned Opcode = Op->getOpcode(); 6579 6580 // Check whether opcode is a poison/undef-generating operation 6581 switch (Opcode) { 6582 case Instruction::Shl: 6583 case Instruction::AShr: 6584 case Instruction::LShr: 6585 return includesPoison(Kind) && !shiftAmountKnownInRange(Op->getOperand(1)); 6586 case Instruction::FPToSI: 6587 case Instruction::FPToUI: 6588 // fptosi/ui yields poison if the resulting value does not fit in the 6589 // destination type. 6590 return true; 6591 case Instruction::Call: 6592 if (auto *II = dyn_cast<IntrinsicInst>(Op)) { 6593 switch (II->getIntrinsicID()) { 6594 // TODO: Add more intrinsics. 6595 case Intrinsic::ctlz: 6596 case Intrinsic::cttz: 6597 case Intrinsic::abs: 6598 if (cast<ConstantInt>(II->getArgOperand(1))->isNullValue()) 6599 return false; 6600 break; 6601 case Intrinsic::ctpop: 6602 case Intrinsic::bswap: 6603 case Intrinsic::bitreverse: 6604 case Intrinsic::fshl: 6605 case Intrinsic::fshr: 6606 case Intrinsic::smax: 6607 case Intrinsic::smin: 6608 case Intrinsic::umax: 6609 case Intrinsic::umin: 6610 case Intrinsic::ptrmask: 6611 case Intrinsic::fptoui_sat: 6612 case Intrinsic::fptosi_sat: 6613 case Intrinsic::sadd_with_overflow: 6614 case Intrinsic::ssub_with_overflow: 6615 case Intrinsic::smul_with_overflow: 6616 case Intrinsic::uadd_with_overflow: 6617 case Intrinsic::usub_with_overflow: 6618 case Intrinsic::umul_with_overflow: 6619 case Intrinsic::sadd_sat: 6620 case Intrinsic::uadd_sat: 6621 case Intrinsic::ssub_sat: 6622 case Intrinsic::usub_sat: 6623 return false; 6624 case Intrinsic::sshl_sat: 6625 case Intrinsic::ushl_sat: 6626 return includesPoison(Kind) && 6627 !shiftAmountKnownInRange(II->getArgOperand(1)); 6628 case Intrinsic::fma: 6629 case Intrinsic::fmuladd: 6630 case Intrinsic::sqrt: 6631 case Intrinsic::powi: 6632 case Intrinsic::sin: 6633 case Intrinsic::cos: 6634 case Intrinsic::pow: 6635 case Intrinsic::log: 6636 case Intrinsic::log10: 6637 case Intrinsic::log2: 6638 case Intrinsic::exp: 6639 case Intrinsic::exp2: 6640 case Intrinsic::exp10: 6641 case Intrinsic::fabs: 6642 case Intrinsic::copysign: 6643 case Intrinsic::floor: 6644 case Intrinsic::ceil: 6645 case Intrinsic::trunc: 6646 case Intrinsic::rint: 6647 case Intrinsic::nearbyint: 6648 case Intrinsic::round: 6649 case Intrinsic::roundeven: 6650 case Intrinsic::fptrunc_round: 6651 case Intrinsic::canonicalize: 6652 case Intrinsic::arithmetic_fence: 6653 case Intrinsic::minnum: 6654 case Intrinsic::maxnum: 6655 case Intrinsic::minimum: 6656 case Intrinsic::maximum: 6657 case Intrinsic::is_fpclass: 6658 case Intrinsic::ldexp: 6659 case Intrinsic::frexp: 6660 return false; 6661 case Intrinsic::lround: 6662 case Intrinsic::llround: 6663 case Intrinsic::lrint: 6664 case Intrinsic::llrint: 6665 // If the value doesn't fit an unspecified value is returned (but this 6666 // is not poison). 6667 return false; 6668 } 6669 } 6670 [[fallthrough]]; 6671 case Instruction::CallBr: 6672 case Instruction::Invoke: { 6673 const auto *CB = cast<CallBase>(Op); 6674 return !CB->hasRetAttr(Attribute::NoUndef); 6675 } 6676 case Instruction::InsertElement: 6677 case Instruction::ExtractElement: { 6678 // If index exceeds the length of the vector, it returns poison 6679 auto *VTy = cast<VectorType>(Op->getOperand(0)->getType()); 6680 unsigned IdxOp = Op->getOpcode() == Instruction::InsertElement ? 2 : 1; 6681 auto *Idx = dyn_cast<ConstantInt>(Op->getOperand(IdxOp)); 6682 if (includesPoison(Kind)) 6683 return !Idx || 6684 Idx->getValue().uge(VTy->getElementCount().getKnownMinValue()); 6685 return false; 6686 } 6687 case Instruction::ShuffleVector: { 6688 ArrayRef<int> Mask = isa<ConstantExpr>(Op) 6689 ? cast<ConstantExpr>(Op)->getShuffleMask() 6690 : cast<ShuffleVectorInst>(Op)->getShuffleMask(); 6691 return includesPoison(Kind) && is_contained(Mask, PoisonMaskElem); 6692 } 6693 case Instruction::FNeg: 6694 case Instruction::PHI: 6695 case Instruction::Select: 6696 case Instruction::URem: 6697 case Instruction::SRem: 6698 case Instruction::ExtractValue: 6699 case Instruction::InsertValue: 6700 case Instruction::Freeze: 6701 case Instruction::ICmp: 6702 case Instruction::FCmp: 6703 case Instruction::FAdd: 6704 case Instruction::FSub: 6705 case Instruction::FMul: 6706 case Instruction::FDiv: 6707 case Instruction::FRem: 6708 return false; 6709 case Instruction::GetElementPtr: 6710 // inbounds is handled above 6711 // TODO: what about inrange on constexpr? 6712 return false; 6713 default: { 6714 const auto *CE = dyn_cast<ConstantExpr>(Op); 6715 if (isa<CastInst>(Op) || (CE && CE->isCast())) 6716 return false; 6717 else if (Instruction::isBinaryOp(Opcode)) 6718 return false; 6719 // Be conservative and return true. 6720 return true; 6721 } 6722 } 6723 } 6724 6725 bool llvm::canCreateUndefOrPoison(const Operator *Op, 6726 bool ConsiderFlagsAndMetadata) { 6727 return ::canCreateUndefOrPoison(Op, UndefPoisonKind::UndefOrPoison, 6728 ConsiderFlagsAndMetadata); 6729 } 6730 6731 bool llvm::canCreatePoison(const Operator *Op, bool ConsiderFlagsAndMetadata) { 6732 return ::canCreateUndefOrPoison(Op, UndefPoisonKind::PoisonOnly, 6733 ConsiderFlagsAndMetadata); 6734 } 6735 6736 static bool directlyImpliesPoison(const Value *ValAssumedPoison, const Value *V, 6737 unsigned Depth) { 6738 if (ValAssumedPoison == V) 6739 return true; 6740 6741 const unsigned MaxDepth = 2; 6742 if (Depth >= MaxDepth) 6743 return false; 6744 6745 if (const auto *I = dyn_cast<Instruction>(V)) { 6746 if (any_of(I->operands(), [=](const Use &Op) { 6747 return propagatesPoison(Op) && 6748 directlyImpliesPoison(ValAssumedPoison, Op, Depth + 1); 6749 })) 6750 return true; 6751 6752 // V = extractvalue V0, idx 6753 // V2 = extractvalue V0, idx2 6754 // V0's elements are all poison or not. (e.g., add_with_overflow) 6755 const WithOverflowInst *II; 6756 if (match(I, m_ExtractValue(m_WithOverflowInst(II))) && 6757 (match(ValAssumedPoison, m_ExtractValue(m_Specific(II))) || 6758 llvm::is_contained(II->args(), ValAssumedPoison))) 6759 return true; 6760 } 6761 return false; 6762 } 6763 6764 static bool impliesPoison(const Value *ValAssumedPoison, const Value *V, 6765 unsigned Depth) { 6766 if (isGuaranteedNotToBePoison(ValAssumedPoison)) 6767 return true; 6768 6769 if (directlyImpliesPoison(ValAssumedPoison, V, /* Depth */ 0)) 6770 return true; 6771 6772 const unsigned MaxDepth = 2; 6773 if (Depth >= MaxDepth) 6774 return false; 6775 6776 const auto *I = dyn_cast<Instruction>(ValAssumedPoison); 6777 if (I && !canCreatePoison(cast<Operator>(I))) { 6778 return all_of(I->operands(), [=](const Value *Op) { 6779 return impliesPoison(Op, V, Depth + 1); 6780 }); 6781 } 6782 return false; 6783 } 6784 6785 bool llvm::impliesPoison(const Value *ValAssumedPoison, const Value *V) { 6786 return ::impliesPoison(ValAssumedPoison, V, /* Depth */ 0); 6787 } 6788 6789 static bool programUndefinedIfUndefOrPoison(const Value *V, bool PoisonOnly); 6790 6791 static bool isGuaranteedNotToBeUndefOrPoison( 6792 const Value *V, AssumptionCache *AC, const Instruction *CtxI, 6793 const DominatorTree *DT, unsigned Depth, UndefPoisonKind Kind) { 6794 if (Depth >= MaxAnalysisRecursionDepth) 6795 return false; 6796 6797 if (isa<MetadataAsValue>(V)) 6798 return false; 6799 6800 if (const auto *A = dyn_cast<Argument>(V)) { 6801 if (A->hasAttribute(Attribute::NoUndef) || 6802 A->hasAttribute(Attribute::Dereferenceable) || 6803 A->hasAttribute(Attribute::DereferenceableOrNull)) 6804 return true; 6805 } 6806 6807 if (auto *C = dyn_cast<Constant>(V)) { 6808 if (isa<PoisonValue>(C)) 6809 return !includesPoison(Kind); 6810 6811 if (isa<UndefValue>(C)) 6812 return !includesUndef(Kind); 6813 6814 if (isa<ConstantInt>(C) || isa<GlobalVariable>(C) || isa<ConstantFP>(V) || 6815 isa<ConstantPointerNull>(C) || isa<Function>(C)) 6816 return true; 6817 6818 if (C->getType()->isVectorTy() && !isa<ConstantExpr>(C)) 6819 return (!includesUndef(Kind) ? !C->containsPoisonElement() 6820 : !C->containsUndefOrPoisonElement()) && 6821 !C->containsConstantExpression(); 6822 } 6823 6824 // Strip cast operations from a pointer value. 6825 // Note that stripPointerCastsSameRepresentation can strip off getelementptr 6826 // inbounds with zero offset. To guarantee that the result isn't poison, the 6827 // stripped pointer is checked as it has to be pointing into an allocated 6828 // object or be null `null` to ensure `inbounds` getelement pointers with a 6829 // zero offset could not produce poison. 6830 // It can strip off addrspacecast that do not change bit representation as 6831 // well. We believe that such addrspacecast is equivalent to no-op. 6832 auto *StrippedV = V->stripPointerCastsSameRepresentation(); 6833 if (isa<AllocaInst>(StrippedV) || isa<GlobalVariable>(StrippedV) || 6834 isa<Function>(StrippedV) || isa<ConstantPointerNull>(StrippedV)) 6835 return true; 6836 6837 auto OpCheck = [&](const Value *V) { 6838 return isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth + 1, Kind); 6839 }; 6840 6841 if (auto *Opr = dyn_cast<Operator>(V)) { 6842 // If the value is a freeze instruction, then it can never 6843 // be undef or poison. 6844 if (isa<FreezeInst>(V)) 6845 return true; 6846 6847 if (const auto *CB = dyn_cast<CallBase>(V)) { 6848 if (CB->hasRetAttr(Attribute::NoUndef) || 6849 CB->hasRetAttr(Attribute::Dereferenceable) || 6850 CB->hasRetAttr(Attribute::DereferenceableOrNull)) 6851 return true; 6852 } 6853 6854 if (const auto *PN = dyn_cast<PHINode>(V)) { 6855 unsigned Num = PN->getNumIncomingValues(); 6856 bool IsWellDefined = true; 6857 for (unsigned i = 0; i < Num; ++i) { 6858 auto *TI = PN->getIncomingBlock(i)->getTerminator(); 6859 if (!isGuaranteedNotToBeUndefOrPoison(PN->getIncomingValue(i), AC, TI, 6860 DT, Depth + 1, Kind)) { 6861 IsWellDefined = false; 6862 break; 6863 } 6864 } 6865 if (IsWellDefined) 6866 return true; 6867 } else if (!::canCreateUndefOrPoison(Opr, Kind, 6868 /*ConsiderFlagsAndMetadata*/ true) && 6869 all_of(Opr->operands(), OpCheck)) 6870 return true; 6871 } 6872 6873 if (auto *I = dyn_cast<LoadInst>(V)) 6874 if (I->hasMetadata(LLVMContext::MD_noundef) || 6875 I->hasMetadata(LLVMContext::MD_dereferenceable) || 6876 I->hasMetadata(LLVMContext::MD_dereferenceable_or_null)) 6877 return true; 6878 6879 if (programUndefinedIfUndefOrPoison(V, !includesUndef(Kind))) 6880 return true; 6881 6882 // CxtI may be null or a cloned instruction. 6883 if (!CtxI || !CtxI->getParent() || !DT) 6884 return false; 6885 6886 auto *DNode = DT->getNode(CtxI->getParent()); 6887 if (!DNode) 6888 // Unreachable block 6889 return false; 6890 6891 // If V is used as a branch condition before reaching CtxI, V cannot be 6892 // undef or poison. 6893 // br V, BB1, BB2 6894 // BB1: 6895 // CtxI ; V cannot be undef or poison here 6896 auto *Dominator = DNode->getIDom(); 6897 while (Dominator) { 6898 auto *TI = Dominator->getBlock()->getTerminator(); 6899 6900 Value *Cond = nullptr; 6901 if (auto BI = dyn_cast_or_null<BranchInst>(TI)) { 6902 if (BI->isConditional()) 6903 Cond = BI->getCondition(); 6904 } else if (auto SI = dyn_cast_or_null<SwitchInst>(TI)) { 6905 Cond = SI->getCondition(); 6906 } 6907 6908 if (Cond) { 6909 if (Cond == V) 6910 return true; 6911 else if (!includesUndef(Kind) && isa<Operator>(Cond)) { 6912 // For poison, we can analyze further 6913 auto *Opr = cast<Operator>(Cond); 6914 if (any_of(Opr->operands(), 6915 [V](const Use &U) { return V == U && propagatesPoison(U); })) 6916 return true; 6917 } 6918 } 6919 6920 Dominator = Dominator->getIDom(); 6921 } 6922 6923 if (getKnowledgeValidInContext(V, {Attribute::NoUndef}, CtxI, DT, AC)) 6924 return true; 6925 6926 return false; 6927 } 6928 6929 bool llvm::isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC, 6930 const Instruction *CtxI, 6931 const DominatorTree *DT, 6932 unsigned Depth) { 6933 return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth, 6934 UndefPoisonKind::UndefOrPoison); 6935 } 6936 6937 bool llvm::isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC, 6938 const Instruction *CtxI, 6939 const DominatorTree *DT, unsigned Depth) { 6940 return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth, 6941 UndefPoisonKind::PoisonOnly); 6942 } 6943 6944 bool llvm::isGuaranteedNotToBeUndef(const Value *V, AssumptionCache *AC, 6945 const Instruction *CtxI, 6946 const DominatorTree *DT, unsigned Depth) { 6947 return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth, 6948 UndefPoisonKind::UndefOnly); 6949 } 6950 6951 /// Return true if undefined behavior would provably be executed on the path to 6952 /// OnPathTo if Root produced a posion result. Note that this doesn't say 6953 /// anything about whether OnPathTo is actually executed or whether Root is 6954 /// actually poison. This can be used to assess whether a new use of Root can 6955 /// be added at a location which is control equivalent with OnPathTo (such as 6956 /// immediately before it) without introducing UB which didn't previously 6957 /// exist. Note that a false result conveys no information. 6958 bool llvm::mustExecuteUBIfPoisonOnPathTo(Instruction *Root, 6959 Instruction *OnPathTo, 6960 DominatorTree *DT) { 6961 // Basic approach is to assume Root is poison, propagate poison forward 6962 // through all users we can easily track, and then check whether any of those 6963 // users are provable UB and must execute before out exiting block might 6964 // exit. 6965 6966 // The set of all recursive users we've visited (which are assumed to all be 6967 // poison because of said visit) 6968 SmallSet<const Value *, 16> KnownPoison; 6969 SmallVector<const Instruction*, 16> Worklist; 6970 Worklist.push_back(Root); 6971 while (!Worklist.empty()) { 6972 const Instruction *I = Worklist.pop_back_val(); 6973 6974 // If we know this must trigger UB on a path leading our target. 6975 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo)) 6976 return true; 6977 6978 // If we can't analyze propagation through this instruction, just skip it 6979 // and transitive users. Safe as false is a conservative result. 6980 if (I != Root && !any_of(I->operands(), [&KnownPoison](const Use &U) { 6981 return KnownPoison.contains(U) && propagatesPoison(U); 6982 })) 6983 continue; 6984 6985 if (KnownPoison.insert(I).second) 6986 for (const User *User : I->users()) 6987 Worklist.push_back(cast<Instruction>(User)); 6988 } 6989 6990 // Might be non-UB, or might have a path we couldn't prove must execute on 6991 // way to exiting bb. 6992 return false; 6993 } 6994 6995 OverflowResult llvm::computeOverflowForSignedAdd(const AddOperator *Add, 6996 const SimplifyQuery &SQ) { 6997 return ::computeOverflowForSignedAdd(Add->getOperand(0), Add->getOperand(1), 6998 Add, SQ); 6999 } 7000 7001 OverflowResult 7002 llvm::computeOverflowForSignedAdd(const WithCache<const Value *> &LHS, 7003 const WithCache<const Value *> &RHS, 7004 const SimplifyQuery &SQ) { 7005 return ::computeOverflowForSignedAdd(LHS, RHS, nullptr, SQ); 7006 } 7007 7008 bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) { 7009 // Note: An atomic operation isn't guaranteed to return in a reasonable amount 7010 // of time because it's possible for another thread to interfere with it for an 7011 // arbitrary length of time, but programs aren't allowed to rely on that. 7012 7013 // If there is no successor, then execution can't transfer to it. 7014 if (isa<ReturnInst>(I)) 7015 return false; 7016 if (isa<UnreachableInst>(I)) 7017 return false; 7018 7019 // Note: Do not add new checks here; instead, change Instruction::mayThrow or 7020 // Instruction::willReturn. 7021 // 7022 // FIXME: Move this check into Instruction::willReturn. 7023 if (isa<CatchPadInst>(I)) { 7024 switch (classifyEHPersonality(I->getFunction()->getPersonalityFn())) { 7025 default: 7026 // A catchpad may invoke exception object constructors and such, which 7027 // in some languages can be arbitrary code, so be conservative by default. 7028 return false; 7029 case EHPersonality::CoreCLR: 7030 // For CoreCLR, it just involves a type test. 7031 return true; 7032 } 7033 } 7034 7035 // An instruction that returns without throwing must transfer control flow 7036 // to a successor. 7037 return !I->mayThrow() && I->willReturn(); 7038 } 7039 7040 bool llvm::isGuaranteedToTransferExecutionToSuccessor(const BasicBlock *BB) { 7041 // TODO: This is slightly conservative for invoke instruction since exiting 7042 // via an exception *is* normal control for them. 7043 for (const Instruction &I : *BB) 7044 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 7045 return false; 7046 return true; 7047 } 7048 7049 bool llvm::isGuaranteedToTransferExecutionToSuccessor( 7050 BasicBlock::const_iterator Begin, BasicBlock::const_iterator End, 7051 unsigned ScanLimit) { 7052 return isGuaranteedToTransferExecutionToSuccessor(make_range(Begin, End), 7053 ScanLimit); 7054 } 7055 7056 bool llvm::isGuaranteedToTransferExecutionToSuccessor( 7057 iterator_range<BasicBlock::const_iterator> Range, unsigned ScanLimit) { 7058 assert(ScanLimit && "scan limit must be non-zero"); 7059 for (const Instruction &I : Range) { 7060 if (isa<DbgInfoIntrinsic>(I)) 7061 continue; 7062 if (--ScanLimit == 0) 7063 return false; 7064 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 7065 return false; 7066 } 7067 return true; 7068 } 7069 7070 bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I, 7071 const Loop *L) { 7072 // The loop header is guaranteed to be executed for every iteration. 7073 // 7074 // FIXME: Relax this constraint to cover all basic blocks that are 7075 // guaranteed to be executed at every iteration. 7076 if (I->getParent() != L->getHeader()) return false; 7077 7078 for (const Instruction &LI : *L->getHeader()) { 7079 if (&LI == I) return true; 7080 if (!isGuaranteedToTransferExecutionToSuccessor(&LI)) return false; 7081 } 7082 llvm_unreachable("Instruction not contained in its own parent basic block."); 7083 } 7084 7085 bool llvm::propagatesPoison(const Use &PoisonOp) { 7086 const Operator *I = cast<Operator>(PoisonOp.getUser()); 7087 switch (I->getOpcode()) { 7088 case Instruction::Freeze: 7089 case Instruction::PHI: 7090 case Instruction::Invoke: 7091 return false; 7092 case Instruction::Select: 7093 return PoisonOp.getOperandNo() == 0; 7094 case Instruction::Call: 7095 if (auto *II = dyn_cast<IntrinsicInst>(I)) { 7096 switch (II->getIntrinsicID()) { 7097 // TODO: Add more intrinsics. 7098 case Intrinsic::sadd_with_overflow: 7099 case Intrinsic::ssub_with_overflow: 7100 case Intrinsic::smul_with_overflow: 7101 case Intrinsic::uadd_with_overflow: 7102 case Intrinsic::usub_with_overflow: 7103 case Intrinsic::umul_with_overflow: 7104 // If an input is a vector containing a poison element, the 7105 // two output vectors (calculated results, overflow bits)' 7106 // corresponding lanes are poison. 7107 return true; 7108 case Intrinsic::ctpop: 7109 return true; 7110 } 7111 } 7112 return false; 7113 case Instruction::ICmp: 7114 case Instruction::FCmp: 7115 case Instruction::GetElementPtr: 7116 return true; 7117 default: 7118 if (isa<BinaryOperator>(I) || isa<UnaryOperator>(I) || isa<CastInst>(I)) 7119 return true; 7120 7121 // Be conservative and return false. 7122 return false; 7123 } 7124 } 7125 7126 void llvm::getGuaranteedWellDefinedOps( 7127 const Instruction *I, SmallVectorImpl<const Value *> &Operands) { 7128 switch (I->getOpcode()) { 7129 case Instruction::Store: 7130 Operands.push_back(cast<StoreInst>(I)->getPointerOperand()); 7131 break; 7132 7133 case Instruction::Load: 7134 Operands.push_back(cast<LoadInst>(I)->getPointerOperand()); 7135 break; 7136 7137 // Since dereferenceable attribute imply noundef, atomic operations 7138 // also implicitly have noundef pointers too 7139 case Instruction::AtomicCmpXchg: 7140 Operands.push_back(cast<AtomicCmpXchgInst>(I)->getPointerOperand()); 7141 break; 7142 7143 case Instruction::AtomicRMW: 7144 Operands.push_back(cast<AtomicRMWInst>(I)->getPointerOperand()); 7145 break; 7146 7147 case Instruction::Call: 7148 case Instruction::Invoke: { 7149 const CallBase *CB = cast<CallBase>(I); 7150 if (CB->isIndirectCall()) 7151 Operands.push_back(CB->getCalledOperand()); 7152 for (unsigned i = 0; i < CB->arg_size(); ++i) { 7153 if (CB->paramHasAttr(i, Attribute::NoUndef) || 7154 CB->paramHasAttr(i, Attribute::Dereferenceable) || 7155 CB->paramHasAttr(i, Attribute::DereferenceableOrNull)) 7156 Operands.push_back(CB->getArgOperand(i)); 7157 } 7158 break; 7159 } 7160 case Instruction::Ret: 7161 if (I->getFunction()->hasRetAttribute(Attribute::NoUndef)) 7162 Operands.push_back(I->getOperand(0)); 7163 break; 7164 case Instruction::Switch: 7165 Operands.push_back(cast<SwitchInst>(I)->getCondition()); 7166 break; 7167 case Instruction::Br: { 7168 auto *BR = cast<BranchInst>(I); 7169 if (BR->isConditional()) 7170 Operands.push_back(BR->getCondition()); 7171 break; 7172 } 7173 default: 7174 break; 7175 } 7176 } 7177 7178 void llvm::getGuaranteedNonPoisonOps(const Instruction *I, 7179 SmallVectorImpl<const Value *> &Operands) { 7180 getGuaranteedWellDefinedOps(I, Operands); 7181 switch (I->getOpcode()) { 7182 // Divisors of these operations are allowed to be partially undef. 7183 case Instruction::UDiv: 7184 case Instruction::SDiv: 7185 case Instruction::URem: 7186 case Instruction::SRem: 7187 Operands.push_back(I->getOperand(1)); 7188 break; 7189 default: 7190 break; 7191 } 7192 } 7193 7194 bool llvm::mustTriggerUB(const Instruction *I, 7195 const SmallPtrSetImpl<const Value *> &KnownPoison) { 7196 SmallVector<const Value *, 4> NonPoisonOps; 7197 getGuaranteedNonPoisonOps(I, NonPoisonOps); 7198 7199 for (const auto *V : NonPoisonOps) 7200 if (KnownPoison.count(V)) 7201 return true; 7202 7203 return false; 7204 } 7205 7206 static bool programUndefinedIfUndefOrPoison(const Value *V, 7207 bool PoisonOnly) { 7208 // We currently only look for uses of values within the same basic 7209 // block, as that makes it easier to guarantee that the uses will be 7210 // executed given that Inst is executed. 7211 // 7212 // FIXME: Expand this to consider uses beyond the same basic block. To do 7213 // this, look out for the distinction between post-dominance and strong 7214 // post-dominance. 7215 const BasicBlock *BB = nullptr; 7216 BasicBlock::const_iterator Begin; 7217 if (const auto *Inst = dyn_cast<Instruction>(V)) { 7218 BB = Inst->getParent(); 7219 Begin = Inst->getIterator(); 7220 Begin++; 7221 } else if (const auto *Arg = dyn_cast<Argument>(V)) { 7222 if (Arg->getParent()->isDeclaration()) 7223 return false; 7224 BB = &Arg->getParent()->getEntryBlock(); 7225 Begin = BB->begin(); 7226 } else { 7227 return false; 7228 } 7229 7230 // Limit number of instructions we look at, to avoid scanning through large 7231 // blocks. The current limit is chosen arbitrarily. 7232 unsigned ScanLimit = 32; 7233 BasicBlock::const_iterator End = BB->end(); 7234 7235 if (!PoisonOnly) { 7236 // Since undef does not propagate eagerly, be conservative & just check 7237 // whether a value is directly passed to an instruction that must take 7238 // well-defined operands. 7239 7240 for (const auto &I : make_range(Begin, End)) { 7241 if (isa<DbgInfoIntrinsic>(I)) 7242 continue; 7243 if (--ScanLimit == 0) 7244 break; 7245 7246 SmallVector<const Value *, 4> WellDefinedOps; 7247 getGuaranteedWellDefinedOps(&I, WellDefinedOps); 7248 if (is_contained(WellDefinedOps, V)) 7249 return true; 7250 7251 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 7252 break; 7253 } 7254 return false; 7255 } 7256 7257 // Set of instructions that we have proved will yield poison if Inst 7258 // does. 7259 SmallSet<const Value *, 16> YieldsPoison; 7260 SmallSet<const BasicBlock *, 4> Visited; 7261 7262 YieldsPoison.insert(V); 7263 Visited.insert(BB); 7264 7265 while (true) { 7266 for (const auto &I : make_range(Begin, End)) { 7267 if (isa<DbgInfoIntrinsic>(I)) 7268 continue; 7269 if (--ScanLimit == 0) 7270 return false; 7271 if (mustTriggerUB(&I, YieldsPoison)) 7272 return true; 7273 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 7274 return false; 7275 7276 // If an operand is poison and propagates it, mark I as yielding poison. 7277 for (const Use &Op : I.operands()) { 7278 if (YieldsPoison.count(Op) && propagatesPoison(Op)) { 7279 YieldsPoison.insert(&I); 7280 break; 7281 } 7282 } 7283 7284 // Special handling for select, which returns poison if its operand 0 is 7285 // poison (handled in the loop above) *or* if both its true/false operands 7286 // are poison (handled here). 7287 if (I.getOpcode() == Instruction::Select && 7288 YieldsPoison.count(I.getOperand(1)) && 7289 YieldsPoison.count(I.getOperand(2))) { 7290 YieldsPoison.insert(&I); 7291 } 7292 } 7293 7294 BB = BB->getSingleSuccessor(); 7295 if (!BB || !Visited.insert(BB).second) 7296 break; 7297 7298 Begin = BB->getFirstNonPHI()->getIterator(); 7299 End = BB->end(); 7300 } 7301 return false; 7302 } 7303 7304 bool llvm::programUndefinedIfUndefOrPoison(const Instruction *Inst) { 7305 return ::programUndefinedIfUndefOrPoison(Inst, false); 7306 } 7307 7308 bool llvm::programUndefinedIfPoison(const Instruction *Inst) { 7309 return ::programUndefinedIfUndefOrPoison(Inst, true); 7310 } 7311 7312 static bool isKnownNonNaN(const Value *V, FastMathFlags FMF) { 7313 if (FMF.noNaNs()) 7314 return true; 7315 7316 if (auto *C = dyn_cast<ConstantFP>(V)) 7317 return !C->isNaN(); 7318 7319 if (auto *C = dyn_cast<ConstantDataVector>(V)) { 7320 if (!C->getElementType()->isFloatingPointTy()) 7321 return false; 7322 for (unsigned I = 0, E = C->getNumElements(); I < E; ++I) { 7323 if (C->getElementAsAPFloat(I).isNaN()) 7324 return false; 7325 } 7326 return true; 7327 } 7328 7329 if (isa<ConstantAggregateZero>(V)) 7330 return true; 7331 7332 return false; 7333 } 7334 7335 static bool isKnownNonZero(const Value *V) { 7336 if (auto *C = dyn_cast<ConstantFP>(V)) 7337 return !C->isZero(); 7338 7339 if (auto *C = dyn_cast<ConstantDataVector>(V)) { 7340 if (!C->getElementType()->isFloatingPointTy()) 7341 return false; 7342 for (unsigned I = 0, E = C->getNumElements(); I < E; ++I) { 7343 if (C->getElementAsAPFloat(I).isZero()) 7344 return false; 7345 } 7346 return true; 7347 } 7348 7349 return false; 7350 } 7351 7352 /// Match clamp pattern for float types without care about NaNs or signed zeros. 7353 /// Given non-min/max outer cmp/select from the clamp pattern this 7354 /// function recognizes if it can be substitued by a "canonical" min/max 7355 /// pattern. 7356 static SelectPatternResult matchFastFloatClamp(CmpInst::Predicate Pred, 7357 Value *CmpLHS, Value *CmpRHS, 7358 Value *TrueVal, Value *FalseVal, 7359 Value *&LHS, Value *&RHS) { 7360 // Try to match 7361 // X < C1 ? C1 : Min(X, C2) --> Max(C1, Min(X, C2)) 7362 // X > C1 ? C1 : Max(X, C2) --> Min(C1, Max(X, C2)) 7363 // and return description of the outer Max/Min. 7364 7365 // First, check if select has inverse order: 7366 if (CmpRHS == FalseVal) { 7367 std::swap(TrueVal, FalseVal); 7368 Pred = CmpInst::getInversePredicate(Pred); 7369 } 7370 7371 // Assume success now. If there's no match, callers should not use these anyway. 7372 LHS = TrueVal; 7373 RHS = FalseVal; 7374 7375 const APFloat *FC1; 7376 if (CmpRHS != TrueVal || !match(CmpRHS, m_APFloat(FC1)) || !FC1->isFinite()) 7377 return {SPF_UNKNOWN, SPNB_NA, false}; 7378 7379 const APFloat *FC2; 7380 switch (Pred) { 7381 case CmpInst::FCMP_OLT: 7382 case CmpInst::FCMP_OLE: 7383 case CmpInst::FCMP_ULT: 7384 case CmpInst::FCMP_ULE: 7385 if (match(FalseVal, 7386 m_CombineOr(m_OrdFMin(m_Specific(CmpLHS), m_APFloat(FC2)), 7387 m_UnordFMin(m_Specific(CmpLHS), m_APFloat(FC2)))) && 7388 *FC1 < *FC2) 7389 return {SPF_FMAXNUM, SPNB_RETURNS_ANY, false}; 7390 break; 7391 case CmpInst::FCMP_OGT: 7392 case CmpInst::FCMP_OGE: 7393 case CmpInst::FCMP_UGT: 7394 case CmpInst::FCMP_UGE: 7395 if (match(FalseVal, 7396 m_CombineOr(m_OrdFMax(m_Specific(CmpLHS), m_APFloat(FC2)), 7397 m_UnordFMax(m_Specific(CmpLHS), m_APFloat(FC2)))) && 7398 *FC1 > *FC2) 7399 return {SPF_FMINNUM, SPNB_RETURNS_ANY, false}; 7400 break; 7401 default: 7402 break; 7403 } 7404 7405 return {SPF_UNKNOWN, SPNB_NA, false}; 7406 } 7407 7408 /// Recognize variations of: 7409 /// CLAMP(v,l,h) ==> ((v) < (l) ? (l) : ((v) > (h) ? (h) : (v))) 7410 static SelectPatternResult matchClamp(CmpInst::Predicate Pred, 7411 Value *CmpLHS, Value *CmpRHS, 7412 Value *TrueVal, Value *FalseVal) { 7413 // Swap the select operands and predicate to match the patterns below. 7414 if (CmpRHS != TrueVal) { 7415 Pred = ICmpInst::getSwappedPredicate(Pred); 7416 std::swap(TrueVal, FalseVal); 7417 } 7418 const APInt *C1; 7419 if (CmpRHS == TrueVal && match(CmpRHS, m_APInt(C1))) { 7420 const APInt *C2; 7421 // (X <s C1) ? C1 : SMIN(X, C2) ==> SMAX(SMIN(X, C2), C1) 7422 if (match(FalseVal, m_SMin(m_Specific(CmpLHS), m_APInt(C2))) && 7423 C1->slt(*C2) && Pred == CmpInst::ICMP_SLT) 7424 return {SPF_SMAX, SPNB_NA, false}; 7425 7426 // (X >s C1) ? C1 : SMAX(X, C2) ==> SMIN(SMAX(X, C2), C1) 7427 if (match(FalseVal, m_SMax(m_Specific(CmpLHS), m_APInt(C2))) && 7428 C1->sgt(*C2) && Pred == CmpInst::ICMP_SGT) 7429 return {SPF_SMIN, SPNB_NA, false}; 7430 7431 // (X <u C1) ? C1 : UMIN(X, C2) ==> UMAX(UMIN(X, C2), C1) 7432 if (match(FalseVal, m_UMin(m_Specific(CmpLHS), m_APInt(C2))) && 7433 C1->ult(*C2) && Pred == CmpInst::ICMP_ULT) 7434 return {SPF_UMAX, SPNB_NA, false}; 7435 7436 // (X >u C1) ? C1 : UMAX(X, C2) ==> UMIN(UMAX(X, C2), C1) 7437 if (match(FalseVal, m_UMax(m_Specific(CmpLHS), m_APInt(C2))) && 7438 C1->ugt(*C2) && Pred == CmpInst::ICMP_UGT) 7439 return {SPF_UMIN, SPNB_NA, false}; 7440 } 7441 return {SPF_UNKNOWN, SPNB_NA, false}; 7442 } 7443 7444 /// Recognize variations of: 7445 /// a < c ? min(a,b) : min(b,c) ==> min(min(a,b),min(b,c)) 7446 static SelectPatternResult matchMinMaxOfMinMax(CmpInst::Predicate Pred, 7447 Value *CmpLHS, Value *CmpRHS, 7448 Value *TVal, Value *FVal, 7449 unsigned Depth) { 7450 // TODO: Allow FP min/max with nnan/nsz. 7451 assert(CmpInst::isIntPredicate(Pred) && "Expected integer comparison"); 7452 7453 Value *A = nullptr, *B = nullptr; 7454 SelectPatternResult L = matchSelectPattern(TVal, A, B, nullptr, Depth + 1); 7455 if (!SelectPatternResult::isMinOrMax(L.Flavor)) 7456 return {SPF_UNKNOWN, SPNB_NA, false}; 7457 7458 Value *C = nullptr, *D = nullptr; 7459 SelectPatternResult R = matchSelectPattern(FVal, C, D, nullptr, Depth + 1); 7460 if (L.Flavor != R.Flavor) 7461 return {SPF_UNKNOWN, SPNB_NA, false}; 7462 7463 // We have something like: x Pred y ? min(a, b) : min(c, d). 7464 // Try to match the compare to the min/max operations of the select operands. 7465 // First, make sure we have the right compare predicate. 7466 switch (L.Flavor) { 7467 case SPF_SMIN: 7468 if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) { 7469 Pred = ICmpInst::getSwappedPredicate(Pred); 7470 std::swap(CmpLHS, CmpRHS); 7471 } 7472 if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) 7473 break; 7474 return {SPF_UNKNOWN, SPNB_NA, false}; 7475 case SPF_SMAX: 7476 if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) { 7477 Pred = ICmpInst::getSwappedPredicate(Pred); 7478 std::swap(CmpLHS, CmpRHS); 7479 } 7480 if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) 7481 break; 7482 return {SPF_UNKNOWN, SPNB_NA, false}; 7483 case SPF_UMIN: 7484 if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) { 7485 Pred = ICmpInst::getSwappedPredicate(Pred); 7486 std::swap(CmpLHS, CmpRHS); 7487 } 7488 if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) 7489 break; 7490 return {SPF_UNKNOWN, SPNB_NA, false}; 7491 case SPF_UMAX: 7492 if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) { 7493 Pred = ICmpInst::getSwappedPredicate(Pred); 7494 std::swap(CmpLHS, CmpRHS); 7495 } 7496 if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) 7497 break; 7498 return {SPF_UNKNOWN, SPNB_NA, false}; 7499 default: 7500 return {SPF_UNKNOWN, SPNB_NA, false}; 7501 } 7502 7503 // If there is a common operand in the already matched min/max and the other 7504 // min/max operands match the compare operands (either directly or inverted), 7505 // then this is min/max of the same flavor. 7506 7507 // a pred c ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b)) 7508 // ~c pred ~a ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b)) 7509 if (D == B) { 7510 if ((CmpLHS == A && CmpRHS == C) || (match(C, m_Not(m_Specific(CmpLHS))) && 7511 match(A, m_Not(m_Specific(CmpRHS))))) 7512 return {L.Flavor, SPNB_NA, false}; 7513 } 7514 // a pred d ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d)) 7515 // ~d pred ~a ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d)) 7516 if (C == B) { 7517 if ((CmpLHS == A && CmpRHS == D) || (match(D, m_Not(m_Specific(CmpLHS))) && 7518 match(A, m_Not(m_Specific(CmpRHS))))) 7519 return {L.Flavor, SPNB_NA, false}; 7520 } 7521 // b pred c ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a)) 7522 // ~c pred ~b ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a)) 7523 if (D == A) { 7524 if ((CmpLHS == B && CmpRHS == C) || (match(C, m_Not(m_Specific(CmpLHS))) && 7525 match(B, m_Not(m_Specific(CmpRHS))))) 7526 return {L.Flavor, SPNB_NA, false}; 7527 } 7528 // b pred d ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d)) 7529 // ~d pred ~b ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d)) 7530 if (C == A) { 7531 if ((CmpLHS == B && CmpRHS == D) || (match(D, m_Not(m_Specific(CmpLHS))) && 7532 match(B, m_Not(m_Specific(CmpRHS))))) 7533 return {L.Flavor, SPNB_NA, false}; 7534 } 7535 7536 return {SPF_UNKNOWN, SPNB_NA, false}; 7537 } 7538 7539 /// If the input value is the result of a 'not' op, constant integer, or vector 7540 /// splat of a constant integer, return the bitwise-not source value. 7541 /// TODO: This could be extended to handle non-splat vector integer constants. 7542 static Value *getNotValue(Value *V) { 7543 Value *NotV; 7544 if (match(V, m_Not(m_Value(NotV)))) 7545 return NotV; 7546 7547 const APInt *C; 7548 if (match(V, m_APInt(C))) 7549 return ConstantInt::get(V->getType(), ~(*C)); 7550 7551 return nullptr; 7552 } 7553 7554 /// Match non-obvious integer minimum and maximum sequences. 7555 static SelectPatternResult matchMinMax(CmpInst::Predicate Pred, 7556 Value *CmpLHS, Value *CmpRHS, 7557 Value *TrueVal, Value *FalseVal, 7558 Value *&LHS, Value *&RHS, 7559 unsigned Depth) { 7560 // Assume success. If there's no match, callers should not use these anyway. 7561 LHS = TrueVal; 7562 RHS = FalseVal; 7563 7564 SelectPatternResult SPR = matchClamp(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal); 7565 if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN) 7566 return SPR; 7567 7568 SPR = matchMinMaxOfMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, Depth); 7569 if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN) 7570 return SPR; 7571 7572 // Look through 'not' ops to find disguised min/max. 7573 // (X > Y) ? ~X : ~Y ==> (~X < ~Y) ? ~X : ~Y ==> MIN(~X, ~Y) 7574 // (X < Y) ? ~X : ~Y ==> (~X > ~Y) ? ~X : ~Y ==> MAX(~X, ~Y) 7575 if (CmpLHS == getNotValue(TrueVal) && CmpRHS == getNotValue(FalseVal)) { 7576 switch (Pred) { 7577 case CmpInst::ICMP_SGT: return {SPF_SMIN, SPNB_NA, false}; 7578 case CmpInst::ICMP_SLT: return {SPF_SMAX, SPNB_NA, false}; 7579 case CmpInst::ICMP_UGT: return {SPF_UMIN, SPNB_NA, false}; 7580 case CmpInst::ICMP_ULT: return {SPF_UMAX, SPNB_NA, false}; 7581 default: break; 7582 } 7583 } 7584 7585 // (X > Y) ? ~Y : ~X ==> (~X < ~Y) ? ~Y : ~X ==> MAX(~Y, ~X) 7586 // (X < Y) ? ~Y : ~X ==> (~X > ~Y) ? ~Y : ~X ==> MIN(~Y, ~X) 7587 if (CmpLHS == getNotValue(FalseVal) && CmpRHS == getNotValue(TrueVal)) { 7588 switch (Pred) { 7589 case CmpInst::ICMP_SGT: return {SPF_SMAX, SPNB_NA, false}; 7590 case CmpInst::ICMP_SLT: return {SPF_SMIN, SPNB_NA, false}; 7591 case CmpInst::ICMP_UGT: return {SPF_UMAX, SPNB_NA, false}; 7592 case CmpInst::ICMP_ULT: return {SPF_UMIN, SPNB_NA, false}; 7593 default: break; 7594 } 7595 } 7596 7597 if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT) 7598 return {SPF_UNKNOWN, SPNB_NA, false}; 7599 7600 const APInt *C1; 7601 if (!match(CmpRHS, m_APInt(C1))) 7602 return {SPF_UNKNOWN, SPNB_NA, false}; 7603 7604 // An unsigned min/max can be written with a signed compare. 7605 const APInt *C2; 7606 if ((CmpLHS == TrueVal && match(FalseVal, m_APInt(C2))) || 7607 (CmpLHS == FalseVal && match(TrueVal, m_APInt(C2)))) { 7608 // Is the sign bit set? 7609 // (X <s 0) ? X : MAXVAL ==> (X >u MAXVAL) ? X : MAXVAL ==> UMAX 7610 // (X <s 0) ? MAXVAL : X ==> (X >u MAXVAL) ? MAXVAL : X ==> UMIN 7611 if (Pred == CmpInst::ICMP_SLT && C1->isZero() && C2->isMaxSignedValue()) 7612 return {CmpLHS == TrueVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false}; 7613 7614 // Is the sign bit clear? 7615 // (X >s -1) ? MINVAL : X ==> (X <u MINVAL) ? MINVAL : X ==> UMAX 7616 // (X >s -1) ? X : MINVAL ==> (X <u MINVAL) ? X : MINVAL ==> UMIN 7617 if (Pred == CmpInst::ICMP_SGT && C1->isAllOnes() && C2->isMinSignedValue()) 7618 return {CmpLHS == FalseVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false}; 7619 } 7620 7621 return {SPF_UNKNOWN, SPNB_NA, false}; 7622 } 7623 7624 bool llvm::isKnownNegation(const Value *X, const Value *Y, bool NeedNSW) { 7625 assert(X && Y && "Invalid operand"); 7626 7627 // X = sub (0, Y) || X = sub nsw (0, Y) 7628 if ((!NeedNSW && match(X, m_Sub(m_ZeroInt(), m_Specific(Y)))) || 7629 (NeedNSW && match(X, m_NSWSub(m_ZeroInt(), m_Specific(Y))))) 7630 return true; 7631 7632 // Y = sub (0, X) || Y = sub nsw (0, X) 7633 if ((!NeedNSW && match(Y, m_Sub(m_ZeroInt(), m_Specific(X)))) || 7634 (NeedNSW && match(Y, m_NSWSub(m_ZeroInt(), m_Specific(X))))) 7635 return true; 7636 7637 // X = sub (A, B), Y = sub (B, A) || X = sub nsw (A, B), Y = sub nsw (B, A) 7638 Value *A, *B; 7639 return (!NeedNSW && (match(X, m_Sub(m_Value(A), m_Value(B))) && 7640 match(Y, m_Sub(m_Specific(B), m_Specific(A))))) || 7641 (NeedNSW && (match(X, m_NSWSub(m_Value(A), m_Value(B))) && 7642 match(Y, m_NSWSub(m_Specific(B), m_Specific(A))))); 7643 } 7644 7645 static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred, 7646 FastMathFlags FMF, 7647 Value *CmpLHS, Value *CmpRHS, 7648 Value *TrueVal, Value *FalseVal, 7649 Value *&LHS, Value *&RHS, 7650 unsigned Depth) { 7651 bool HasMismatchedZeros = false; 7652 if (CmpInst::isFPPredicate(Pred)) { 7653 // IEEE-754 ignores the sign of 0.0 in comparisons. So if the select has one 7654 // 0.0 operand, set the compare's 0.0 operands to that same value for the 7655 // purpose of identifying min/max. Disregard vector constants with undefined 7656 // elements because those can not be back-propagated for analysis. 7657 Value *OutputZeroVal = nullptr; 7658 if (match(TrueVal, m_AnyZeroFP()) && !match(FalseVal, m_AnyZeroFP()) && 7659 !cast<Constant>(TrueVal)->containsUndefOrPoisonElement()) 7660 OutputZeroVal = TrueVal; 7661 else if (match(FalseVal, m_AnyZeroFP()) && !match(TrueVal, m_AnyZeroFP()) && 7662 !cast<Constant>(FalseVal)->containsUndefOrPoisonElement()) 7663 OutputZeroVal = FalseVal; 7664 7665 if (OutputZeroVal) { 7666 if (match(CmpLHS, m_AnyZeroFP()) && CmpLHS != OutputZeroVal) { 7667 HasMismatchedZeros = true; 7668 CmpLHS = OutputZeroVal; 7669 } 7670 if (match(CmpRHS, m_AnyZeroFP()) && CmpRHS != OutputZeroVal) { 7671 HasMismatchedZeros = true; 7672 CmpRHS = OutputZeroVal; 7673 } 7674 } 7675 } 7676 7677 LHS = CmpLHS; 7678 RHS = CmpRHS; 7679 7680 // Signed zero may return inconsistent results between implementations. 7681 // (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0 7682 // minNum(0.0, -0.0) // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1) 7683 // Therefore, we behave conservatively and only proceed if at least one of the 7684 // operands is known to not be zero or if we don't care about signed zero. 7685 switch (Pred) { 7686 default: break; 7687 case CmpInst::FCMP_OGT: case CmpInst::FCMP_OLT: 7688 case CmpInst::FCMP_UGT: case CmpInst::FCMP_ULT: 7689 if (!HasMismatchedZeros) 7690 break; 7691 [[fallthrough]]; 7692 case CmpInst::FCMP_OGE: case CmpInst::FCMP_OLE: 7693 case CmpInst::FCMP_UGE: case CmpInst::FCMP_ULE: 7694 if (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) && 7695 !isKnownNonZero(CmpRHS)) 7696 return {SPF_UNKNOWN, SPNB_NA, false}; 7697 } 7698 7699 SelectPatternNaNBehavior NaNBehavior = SPNB_NA; 7700 bool Ordered = false; 7701 7702 // When given one NaN and one non-NaN input: 7703 // - maxnum/minnum (C99 fmaxf()/fminf()) return the non-NaN input. 7704 // - A simple C99 (a < b ? a : b) construction will return 'b' (as the 7705 // ordered comparison fails), which could be NaN or non-NaN. 7706 // so here we discover exactly what NaN behavior is required/accepted. 7707 if (CmpInst::isFPPredicate(Pred)) { 7708 bool LHSSafe = isKnownNonNaN(CmpLHS, FMF); 7709 bool RHSSafe = isKnownNonNaN(CmpRHS, FMF); 7710 7711 if (LHSSafe && RHSSafe) { 7712 // Both operands are known non-NaN. 7713 NaNBehavior = SPNB_RETURNS_ANY; 7714 } else if (CmpInst::isOrdered(Pred)) { 7715 // An ordered comparison will return false when given a NaN, so it 7716 // returns the RHS. 7717 Ordered = true; 7718 if (LHSSafe) 7719 // LHS is non-NaN, so if RHS is NaN then NaN will be returned. 7720 NaNBehavior = SPNB_RETURNS_NAN; 7721 else if (RHSSafe) 7722 NaNBehavior = SPNB_RETURNS_OTHER; 7723 else 7724 // Completely unsafe. 7725 return {SPF_UNKNOWN, SPNB_NA, false}; 7726 } else { 7727 Ordered = false; 7728 // An unordered comparison will return true when given a NaN, so it 7729 // returns the LHS. 7730 if (LHSSafe) 7731 // LHS is non-NaN, so if RHS is NaN then non-NaN will be returned. 7732 NaNBehavior = SPNB_RETURNS_OTHER; 7733 else if (RHSSafe) 7734 NaNBehavior = SPNB_RETURNS_NAN; 7735 else 7736 // Completely unsafe. 7737 return {SPF_UNKNOWN, SPNB_NA, false}; 7738 } 7739 } 7740 7741 if (TrueVal == CmpRHS && FalseVal == CmpLHS) { 7742 std::swap(CmpLHS, CmpRHS); 7743 Pred = CmpInst::getSwappedPredicate(Pred); 7744 if (NaNBehavior == SPNB_RETURNS_NAN) 7745 NaNBehavior = SPNB_RETURNS_OTHER; 7746 else if (NaNBehavior == SPNB_RETURNS_OTHER) 7747 NaNBehavior = SPNB_RETURNS_NAN; 7748 Ordered = !Ordered; 7749 } 7750 7751 // ([if]cmp X, Y) ? X : Y 7752 if (TrueVal == CmpLHS && FalseVal == CmpRHS) { 7753 switch (Pred) { 7754 default: return {SPF_UNKNOWN, SPNB_NA, false}; // Equality. 7755 case ICmpInst::ICMP_UGT: 7756 case ICmpInst::ICMP_UGE: return {SPF_UMAX, SPNB_NA, false}; 7757 case ICmpInst::ICMP_SGT: 7758 case ICmpInst::ICMP_SGE: return {SPF_SMAX, SPNB_NA, false}; 7759 case ICmpInst::ICMP_ULT: 7760 case ICmpInst::ICMP_ULE: return {SPF_UMIN, SPNB_NA, false}; 7761 case ICmpInst::ICMP_SLT: 7762 case ICmpInst::ICMP_SLE: return {SPF_SMIN, SPNB_NA, false}; 7763 case FCmpInst::FCMP_UGT: 7764 case FCmpInst::FCMP_UGE: 7765 case FCmpInst::FCMP_OGT: 7766 case FCmpInst::FCMP_OGE: return {SPF_FMAXNUM, NaNBehavior, Ordered}; 7767 case FCmpInst::FCMP_ULT: 7768 case FCmpInst::FCMP_ULE: 7769 case FCmpInst::FCMP_OLT: 7770 case FCmpInst::FCMP_OLE: return {SPF_FMINNUM, NaNBehavior, Ordered}; 7771 } 7772 } 7773 7774 if (isKnownNegation(TrueVal, FalseVal)) { 7775 // Sign-extending LHS does not change its sign, so TrueVal/FalseVal can 7776 // match against either LHS or sext(LHS). 7777 auto MaybeSExtCmpLHS = 7778 m_CombineOr(m_Specific(CmpLHS), m_SExt(m_Specific(CmpLHS))); 7779 auto ZeroOrAllOnes = m_CombineOr(m_ZeroInt(), m_AllOnes()); 7780 auto ZeroOrOne = m_CombineOr(m_ZeroInt(), m_One()); 7781 if (match(TrueVal, MaybeSExtCmpLHS)) { 7782 // Set the return values. If the compare uses the negated value (-X >s 0), 7783 // swap the return values because the negated value is always 'RHS'. 7784 LHS = TrueVal; 7785 RHS = FalseVal; 7786 if (match(CmpLHS, m_Neg(m_Specific(FalseVal)))) 7787 std::swap(LHS, RHS); 7788 7789 // (X >s 0) ? X : -X or (X >s -1) ? X : -X --> ABS(X) 7790 // (-X >s 0) ? -X : X or (-X >s -1) ? -X : X --> ABS(X) 7791 if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, ZeroOrAllOnes)) 7792 return {SPF_ABS, SPNB_NA, false}; 7793 7794 // (X >=s 0) ? X : -X or (X >=s 1) ? X : -X --> ABS(X) 7795 if (Pred == ICmpInst::ICMP_SGE && match(CmpRHS, ZeroOrOne)) 7796 return {SPF_ABS, SPNB_NA, false}; 7797 7798 // (X <s 0) ? X : -X or (X <s 1) ? X : -X --> NABS(X) 7799 // (-X <s 0) ? -X : X or (-X <s 1) ? -X : X --> NABS(X) 7800 if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, ZeroOrOne)) 7801 return {SPF_NABS, SPNB_NA, false}; 7802 } 7803 else if (match(FalseVal, MaybeSExtCmpLHS)) { 7804 // Set the return values. If the compare uses the negated value (-X >s 0), 7805 // swap the return values because the negated value is always 'RHS'. 7806 LHS = FalseVal; 7807 RHS = TrueVal; 7808 if (match(CmpLHS, m_Neg(m_Specific(TrueVal)))) 7809 std::swap(LHS, RHS); 7810 7811 // (X >s 0) ? -X : X or (X >s -1) ? -X : X --> NABS(X) 7812 // (-X >s 0) ? X : -X or (-X >s -1) ? X : -X --> NABS(X) 7813 if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, ZeroOrAllOnes)) 7814 return {SPF_NABS, SPNB_NA, false}; 7815 7816 // (X <s 0) ? -X : X or (X <s 1) ? -X : X --> ABS(X) 7817 // (-X <s 0) ? X : -X or (-X <s 1) ? X : -X --> ABS(X) 7818 if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, ZeroOrOne)) 7819 return {SPF_ABS, SPNB_NA, false}; 7820 } 7821 } 7822 7823 if (CmpInst::isIntPredicate(Pred)) 7824 return matchMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS, Depth); 7825 7826 // According to (IEEE 754-2008 5.3.1), minNum(0.0, -0.0) and similar 7827 // may return either -0.0 or 0.0, so fcmp/select pair has stricter 7828 // semantics than minNum. Be conservative in such case. 7829 if (NaNBehavior != SPNB_RETURNS_ANY || 7830 (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) && 7831 !isKnownNonZero(CmpRHS))) 7832 return {SPF_UNKNOWN, SPNB_NA, false}; 7833 7834 return matchFastFloatClamp(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS); 7835 } 7836 7837 /// Helps to match a select pattern in case of a type mismatch. 7838 /// 7839 /// The function processes the case when type of true and false values of a 7840 /// select instruction differs from type of the cmp instruction operands because 7841 /// of a cast instruction. The function checks if it is legal to move the cast 7842 /// operation after "select". If yes, it returns the new second value of 7843 /// "select" (with the assumption that cast is moved): 7844 /// 1. As operand of cast instruction when both values of "select" are same cast 7845 /// instructions. 7846 /// 2. As restored constant (by applying reverse cast operation) when the first 7847 /// value of the "select" is a cast operation and the second value is a 7848 /// constant. 7849 /// NOTE: We return only the new second value because the first value could be 7850 /// accessed as operand of cast instruction. 7851 static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2, 7852 Instruction::CastOps *CastOp) { 7853 auto *Cast1 = dyn_cast<CastInst>(V1); 7854 if (!Cast1) 7855 return nullptr; 7856 7857 *CastOp = Cast1->getOpcode(); 7858 Type *SrcTy = Cast1->getSrcTy(); 7859 if (auto *Cast2 = dyn_cast<CastInst>(V2)) { 7860 // If V1 and V2 are both the same cast from the same type, look through V1. 7861 if (*CastOp == Cast2->getOpcode() && SrcTy == Cast2->getSrcTy()) 7862 return Cast2->getOperand(0); 7863 return nullptr; 7864 } 7865 7866 auto *C = dyn_cast<Constant>(V2); 7867 if (!C) 7868 return nullptr; 7869 7870 const DataLayout &DL = CmpI->getModule()->getDataLayout(); 7871 Constant *CastedTo = nullptr; 7872 switch (*CastOp) { 7873 case Instruction::ZExt: 7874 if (CmpI->isUnsigned()) 7875 CastedTo = ConstantExpr::getTrunc(C, SrcTy); 7876 break; 7877 case Instruction::SExt: 7878 if (CmpI->isSigned()) 7879 CastedTo = ConstantExpr::getTrunc(C, SrcTy, true); 7880 break; 7881 case Instruction::Trunc: 7882 Constant *CmpConst; 7883 if (match(CmpI->getOperand(1), m_Constant(CmpConst)) && 7884 CmpConst->getType() == SrcTy) { 7885 // Here we have the following case: 7886 // 7887 // %cond = cmp iN %x, CmpConst 7888 // %tr = trunc iN %x to iK 7889 // %narrowsel = select i1 %cond, iK %t, iK C 7890 // 7891 // We can always move trunc after select operation: 7892 // 7893 // %cond = cmp iN %x, CmpConst 7894 // %widesel = select i1 %cond, iN %x, iN CmpConst 7895 // %tr = trunc iN %widesel to iK 7896 // 7897 // Note that C could be extended in any way because we don't care about 7898 // upper bits after truncation. It can't be abs pattern, because it would 7899 // look like: 7900 // 7901 // select i1 %cond, x, -x. 7902 // 7903 // So only min/max pattern could be matched. Such match requires widened C 7904 // == CmpConst. That is why set widened C = CmpConst, condition trunc 7905 // CmpConst == C is checked below. 7906 CastedTo = CmpConst; 7907 } else { 7908 unsigned ExtOp = CmpI->isSigned() ? Instruction::SExt : Instruction::ZExt; 7909 CastedTo = ConstantFoldCastOperand(ExtOp, C, SrcTy, DL); 7910 } 7911 break; 7912 case Instruction::FPTrunc: 7913 CastedTo = ConstantFoldCastOperand(Instruction::FPExt, C, SrcTy, DL); 7914 break; 7915 case Instruction::FPExt: 7916 CastedTo = ConstantFoldCastOperand(Instruction::FPTrunc, C, SrcTy, DL); 7917 break; 7918 case Instruction::FPToUI: 7919 CastedTo = ConstantFoldCastOperand(Instruction::UIToFP, C, SrcTy, DL); 7920 break; 7921 case Instruction::FPToSI: 7922 CastedTo = ConstantFoldCastOperand(Instruction::SIToFP, C, SrcTy, DL); 7923 break; 7924 case Instruction::UIToFP: 7925 CastedTo = ConstantFoldCastOperand(Instruction::FPToUI, C, SrcTy, DL); 7926 break; 7927 case Instruction::SIToFP: 7928 CastedTo = ConstantFoldCastOperand(Instruction::FPToSI, C, SrcTy, DL); 7929 break; 7930 default: 7931 break; 7932 } 7933 7934 if (!CastedTo) 7935 return nullptr; 7936 7937 // Make sure the cast doesn't lose any information. 7938 Constant *CastedBack = 7939 ConstantFoldCastOperand(*CastOp, CastedTo, C->getType(), DL); 7940 if (CastedBack && CastedBack != C) 7941 return nullptr; 7942 7943 return CastedTo; 7944 } 7945 7946 SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, 7947 Instruction::CastOps *CastOp, 7948 unsigned Depth) { 7949 if (Depth >= MaxAnalysisRecursionDepth) 7950 return {SPF_UNKNOWN, SPNB_NA, false}; 7951 7952 SelectInst *SI = dyn_cast<SelectInst>(V); 7953 if (!SI) return {SPF_UNKNOWN, SPNB_NA, false}; 7954 7955 CmpInst *CmpI = dyn_cast<CmpInst>(SI->getCondition()); 7956 if (!CmpI) return {SPF_UNKNOWN, SPNB_NA, false}; 7957 7958 Value *TrueVal = SI->getTrueValue(); 7959 Value *FalseVal = SI->getFalseValue(); 7960 7961 return llvm::matchDecomposedSelectPattern(CmpI, TrueVal, FalseVal, LHS, RHS, 7962 CastOp, Depth); 7963 } 7964 7965 SelectPatternResult llvm::matchDecomposedSelectPattern( 7966 CmpInst *CmpI, Value *TrueVal, Value *FalseVal, Value *&LHS, Value *&RHS, 7967 Instruction::CastOps *CastOp, unsigned Depth) { 7968 CmpInst::Predicate Pred = CmpI->getPredicate(); 7969 Value *CmpLHS = CmpI->getOperand(0); 7970 Value *CmpRHS = CmpI->getOperand(1); 7971 FastMathFlags FMF; 7972 if (isa<FPMathOperator>(CmpI)) 7973 FMF = CmpI->getFastMathFlags(); 7974 7975 // Bail out early. 7976 if (CmpI->isEquality()) 7977 return {SPF_UNKNOWN, SPNB_NA, false}; 7978 7979 // Deal with type mismatches. 7980 if (CastOp && CmpLHS->getType() != TrueVal->getType()) { 7981 if (Value *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp)) { 7982 // If this is a potential fmin/fmax with a cast to integer, then ignore 7983 // -0.0 because there is no corresponding integer value. 7984 if (*CastOp == Instruction::FPToSI || *CastOp == Instruction::FPToUI) 7985 FMF.setNoSignedZeros(); 7986 return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, 7987 cast<CastInst>(TrueVal)->getOperand(0), C, 7988 LHS, RHS, Depth); 7989 } 7990 if (Value *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp)) { 7991 // If this is a potential fmin/fmax with a cast to integer, then ignore 7992 // -0.0 because there is no corresponding integer value. 7993 if (*CastOp == Instruction::FPToSI || *CastOp == Instruction::FPToUI) 7994 FMF.setNoSignedZeros(); 7995 return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, 7996 C, cast<CastInst>(FalseVal)->getOperand(0), 7997 LHS, RHS, Depth); 7998 } 7999 } 8000 return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, TrueVal, FalseVal, 8001 LHS, RHS, Depth); 8002 } 8003 8004 CmpInst::Predicate llvm::getMinMaxPred(SelectPatternFlavor SPF, bool Ordered) { 8005 if (SPF == SPF_SMIN) return ICmpInst::ICMP_SLT; 8006 if (SPF == SPF_UMIN) return ICmpInst::ICMP_ULT; 8007 if (SPF == SPF_SMAX) return ICmpInst::ICMP_SGT; 8008 if (SPF == SPF_UMAX) return ICmpInst::ICMP_UGT; 8009 if (SPF == SPF_FMINNUM) 8010 return Ordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT; 8011 if (SPF == SPF_FMAXNUM) 8012 return Ordered ? FCmpInst::FCMP_OGT : FCmpInst::FCMP_UGT; 8013 llvm_unreachable("unhandled!"); 8014 } 8015 8016 SelectPatternFlavor llvm::getInverseMinMaxFlavor(SelectPatternFlavor SPF) { 8017 if (SPF == SPF_SMIN) return SPF_SMAX; 8018 if (SPF == SPF_UMIN) return SPF_UMAX; 8019 if (SPF == SPF_SMAX) return SPF_SMIN; 8020 if (SPF == SPF_UMAX) return SPF_UMIN; 8021 llvm_unreachable("unhandled!"); 8022 } 8023 8024 Intrinsic::ID llvm::getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID) { 8025 switch (MinMaxID) { 8026 case Intrinsic::smax: return Intrinsic::smin; 8027 case Intrinsic::smin: return Intrinsic::smax; 8028 case Intrinsic::umax: return Intrinsic::umin; 8029 case Intrinsic::umin: return Intrinsic::umax; 8030 // Please note that next four intrinsics may produce the same result for 8031 // original and inverted case even if X != Y due to NaN is handled specially. 8032 case Intrinsic::maximum: return Intrinsic::minimum; 8033 case Intrinsic::minimum: return Intrinsic::maximum; 8034 case Intrinsic::maxnum: return Intrinsic::minnum; 8035 case Intrinsic::minnum: return Intrinsic::maxnum; 8036 default: llvm_unreachable("Unexpected intrinsic"); 8037 } 8038 } 8039 8040 APInt llvm::getMinMaxLimit(SelectPatternFlavor SPF, unsigned BitWidth) { 8041 switch (SPF) { 8042 case SPF_SMAX: return APInt::getSignedMaxValue(BitWidth); 8043 case SPF_SMIN: return APInt::getSignedMinValue(BitWidth); 8044 case SPF_UMAX: return APInt::getMaxValue(BitWidth); 8045 case SPF_UMIN: return APInt::getMinValue(BitWidth); 8046 default: llvm_unreachable("Unexpected flavor"); 8047 } 8048 } 8049 8050 std::pair<Intrinsic::ID, bool> 8051 llvm::canConvertToMinOrMaxIntrinsic(ArrayRef<Value *> VL) { 8052 // Check if VL contains select instructions that can be folded into a min/max 8053 // vector intrinsic and return the intrinsic if it is possible. 8054 // TODO: Support floating point min/max. 8055 bool AllCmpSingleUse = true; 8056 SelectPatternResult SelectPattern; 8057 SelectPattern.Flavor = SPF_UNKNOWN; 8058 if (all_of(VL, [&SelectPattern, &AllCmpSingleUse](Value *I) { 8059 Value *LHS, *RHS; 8060 auto CurrentPattern = matchSelectPattern(I, LHS, RHS); 8061 if (!SelectPatternResult::isMinOrMax(CurrentPattern.Flavor) || 8062 CurrentPattern.Flavor == SPF_FMINNUM || 8063 CurrentPattern.Flavor == SPF_FMAXNUM || 8064 !I->getType()->isIntOrIntVectorTy()) 8065 return false; 8066 if (SelectPattern.Flavor != SPF_UNKNOWN && 8067 SelectPattern.Flavor != CurrentPattern.Flavor) 8068 return false; 8069 SelectPattern = CurrentPattern; 8070 AllCmpSingleUse &= 8071 match(I, m_Select(m_OneUse(m_Value()), m_Value(), m_Value())); 8072 return true; 8073 })) { 8074 switch (SelectPattern.Flavor) { 8075 case SPF_SMIN: 8076 return {Intrinsic::smin, AllCmpSingleUse}; 8077 case SPF_UMIN: 8078 return {Intrinsic::umin, AllCmpSingleUse}; 8079 case SPF_SMAX: 8080 return {Intrinsic::smax, AllCmpSingleUse}; 8081 case SPF_UMAX: 8082 return {Intrinsic::umax, AllCmpSingleUse}; 8083 default: 8084 llvm_unreachable("unexpected select pattern flavor"); 8085 } 8086 } 8087 return {Intrinsic::not_intrinsic, false}; 8088 } 8089 8090 bool llvm::matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, 8091 Value *&Start, Value *&Step) { 8092 // Handle the case of a simple two-predecessor recurrence PHI. 8093 // There's a lot more that could theoretically be done here, but 8094 // this is sufficient to catch some interesting cases. 8095 if (P->getNumIncomingValues() != 2) 8096 return false; 8097 8098 for (unsigned i = 0; i != 2; ++i) { 8099 Value *L = P->getIncomingValue(i); 8100 Value *R = P->getIncomingValue(!i); 8101 auto *LU = dyn_cast<BinaryOperator>(L); 8102 if (!LU) 8103 continue; 8104 unsigned Opcode = LU->getOpcode(); 8105 8106 switch (Opcode) { 8107 default: 8108 continue; 8109 // TODO: Expand list -- xor, div, gep, uaddo, etc.. 8110 case Instruction::LShr: 8111 case Instruction::AShr: 8112 case Instruction::Shl: 8113 case Instruction::Add: 8114 case Instruction::Sub: 8115 case Instruction::And: 8116 case Instruction::Or: 8117 case Instruction::Mul: 8118 case Instruction::FMul: { 8119 Value *LL = LU->getOperand(0); 8120 Value *LR = LU->getOperand(1); 8121 // Find a recurrence. 8122 if (LL == P) 8123 L = LR; 8124 else if (LR == P) 8125 L = LL; 8126 else 8127 continue; // Check for recurrence with L and R flipped. 8128 8129 break; // Match! 8130 } 8131 }; 8132 8133 // We have matched a recurrence of the form: 8134 // %iv = [R, %entry], [%iv.next, %backedge] 8135 // %iv.next = binop %iv, L 8136 // OR 8137 // %iv = [R, %entry], [%iv.next, %backedge] 8138 // %iv.next = binop L, %iv 8139 BO = LU; 8140 Start = R; 8141 Step = L; 8142 return true; 8143 } 8144 return false; 8145 } 8146 8147 bool llvm::matchSimpleRecurrence(const BinaryOperator *I, PHINode *&P, 8148 Value *&Start, Value *&Step) { 8149 BinaryOperator *BO = nullptr; 8150 P = dyn_cast<PHINode>(I->getOperand(0)); 8151 if (!P) 8152 P = dyn_cast<PHINode>(I->getOperand(1)); 8153 return P && matchSimpleRecurrence(P, BO, Start, Step) && BO == I; 8154 } 8155 8156 /// Return true if "icmp Pred LHS RHS" is always true. 8157 static bool isTruePredicate(CmpInst::Predicate Pred, const Value *LHS, 8158 const Value *RHS, const DataLayout &DL, 8159 unsigned Depth) { 8160 if (ICmpInst::isTrueWhenEqual(Pred) && LHS == RHS) 8161 return true; 8162 8163 switch (Pred) { 8164 default: 8165 return false; 8166 8167 case CmpInst::ICMP_SLE: { 8168 const APInt *C; 8169 8170 // LHS s<= LHS +_{nsw} C if C >= 0 8171 if (match(RHS, m_NSWAdd(m_Specific(LHS), m_APInt(C)))) 8172 return !C->isNegative(); 8173 return false; 8174 } 8175 8176 case CmpInst::ICMP_ULE: { 8177 // LHS u<= LHS +_{nuw} V for any V 8178 if (match(RHS, m_c_Add(m_Specific(LHS), m_Value())) && 8179 cast<OverflowingBinaryOperator>(RHS)->hasNoUnsignedWrap()) 8180 return true; 8181 8182 // RHS >> V u<= RHS for any V 8183 if (match(LHS, m_LShr(m_Specific(RHS), m_Value()))) 8184 return true; 8185 8186 // Match A to (X +_{nuw} CA) and B to (X +_{nuw} CB) 8187 auto MatchNUWAddsToSameValue = [&](const Value *A, const Value *B, 8188 const Value *&X, 8189 const APInt *&CA, const APInt *&CB) { 8190 if (match(A, m_NUWAdd(m_Value(X), m_APInt(CA))) && 8191 match(B, m_NUWAdd(m_Specific(X), m_APInt(CB)))) 8192 return true; 8193 8194 // If X & C == 0 then (X | C) == X +_{nuw} C 8195 if (match(A, m_Or(m_Value(X), m_APInt(CA))) && 8196 match(B, m_Or(m_Specific(X), m_APInt(CB)))) { 8197 KnownBits Known(CA->getBitWidth()); 8198 computeKnownBits(X, Known, DL, Depth + 1, /*AC*/ nullptr, 8199 /*CxtI*/ nullptr, /*DT*/ nullptr); 8200 if (CA->isSubsetOf(Known.Zero) && CB->isSubsetOf(Known.Zero)) 8201 return true; 8202 } 8203 8204 return false; 8205 }; 8206 8207 const Value *X; 8208 const APInt *CLHS, *CRHS; 8209 if (MatchNUWAddsToSameValue(LHS, RHS, X, CLHS, CRHS)) 8210 return CLHS->ule(*CRHS); 8211 8212 return false; 8213 } 8214 } 8215 } 8216 8217 /// Return true if "icmp Pred BLHS BRHS" is true whenever "icmp Pred 8218 /// ALHS ARHS" is true. Otherwise, return std::nullopt. 8219 static std::optional<bool> 8220 isImpliedCondOperands(CmpInst::Predicate Pred, const Value *ALHS, 8221 const Value *ARHS, const Value *BLHS, const Value *BRHS, 8222 const DataLayout &DL, unsigned Depth) { 8223 switch (Pred) { 8224 default: 8225 return std::nullopt; 8226 8227 case CmpInst::ICMP_SLT: 8228 case CmpInst::ICMP_SLE: 8229 if (isTruePredicate(CmpInst::ICMP_SLE, BLHS, ALHS, DL, Depth) && 8230 isTruePredicate(CmpInst::ICMP_SLE, ARHS, BRHS, DL, Depth)) 8231 return true; 8232 return std::nullopt; 8233 8234 case CmpInst::ICMP_SGT: 8235 case CmpInst::ICMP_SGE: 8236 if (isTruePredicate(CmpInst::ICMP_SLE, ALHS, BLHS, DL, Depth) && 8237 isTruePredicate(CmpInst::ICMP_SLE, BRHS, ARHS, DL, Depth)) 8238 return true; 8239 return std::nullopt; 8240 8241 case CmpInst::ICMP_ULT: 8242 case CmpInst::ICMP_ULE: 8243 if (isTruePredicate(CmpInst::ICMP_ULE, BLHS, ALHS, DL, Depth) && 8244 isTruePredicate(CmpInst::ICMP_ULE, ARHS, BRHS, DL, Depth)) 8245 return true; 8246 return std::nullopt; 8247 8248 case CmpInst::ICMP_UGT: 8249 case CmpInst::ICMP_UGE: 8250 if (isTruePredicate(CmpInst::ICMP_ULE, ALHS, BLHS, DL, Depth) && 8251 isTruePredicate(CmpInst::ICMP_ULE, BRHS, ARHS, DL, Depth)) 8252 return true; 8253 return std::nullopt; 8254 } 8255 } 8256 8257 /// Return true if the operands of two compares (expanded as "L0 pred L1" and 8258 /// "R0 pred R1") match. IsSwappedOps is true when the operands match, but are 8259 /// swapped. 8260 static bool areMatchingOperands(const Value *L0, const Value *L1, const Value *R0, 8261 const Value *R1, bool &AreSwappedOps) { 8262 bool AreMatchingOps = (L0 == R0 && L1 == R1); 8263 AreSwappedOps = (L0 == R1 && L1 == R0); 8264 return AreMatchingOps || AreSwappedOps; 8265 } 8266 8267 /// Return true if "icmp1 LPred X, Y" implies "icmp2 RPred X, Y" is true. 8268 /// Return false if "icmp1 LPred X, Y" implies "icmp2 RPred X, Y" is false. 8269 /// Otherwise, return std::nullopt if we can't infer anything. 8270 static std::optional<bool> 8271 isImpliedCondMatchingOperands(CmpInst::Predicate LPred, 8272 CmpInst::Predicate RPred, bool AreSwappedOps) { 8273 // Canonicalize the predicate as if the operands were not commuted. 8274 if (AreSwappedOps) 8275 RPred = ICmpInst::getSwappedPredicate(RPred); 8276 8277 if (CmpInst::isImpliedTrueByMatchingCmp(LPred, RPred)) 8278 return true; 8279 if (CmpInst::isImpliedFalseByMatchingCmp(LPred, RPred)) 8280 return false; 8281 8282 return std::nullopt; 8283 } 8284 8285 /// Return true if "icmp LPred X, LC" implies "icmp RPred X, RC" is true. 8286 /// Return false if "icmp LPred X, LC" implies "icmp RPred X, RC" is false. 8287 /// Otherwise, return std::nullopt if we can't infer anything. 8288 static std::optional<bool> isImpliedCondCommonOperandWithConstants( 8289 CmpInst::Predicate LPred, const APInt &LC, CmpInst::Predicate RPred, 8290 const APInt &RC) { 8291 ConstantRange DomCR = ConstantRange::makeExactICmpRegion(LPred, LC); 8292 ConstantRange CR = ConstantRange::makeExactICmpRegion(RPred, RC); 8293 ConstantRange Intersection = DomCR.intersectWith(CR); 8294 ConstantRange Difference = DomCR.difference(CR); 8295 if (Intersection.isEmptySet()) 8296 return false; 8297 if (Difference.isEmptySet()) 8298 return true; 8299 return std::nullopt; 8300 } 8301 8302 /// Return true if LHS implies RHS (expanded to its components as "R0 RPred R1") 8303 /// is true. Return false if LHS implies RHS is false. Otherwise, return 8304 /// std::nullopt if we can't infer anything. 8305 static std::optional<bool> isImpliedCondICmps(const ICmpInst *LHS, 8306 CmpInst::Predicate RPred, 8307 const Value *R0, const Value *R1, 8308 const DataLayout &DL, 8309 bool LHSIsTrue, unsigned Depth) { 8310 Value *L0 = LHS->getOperand(0); 8311 Value *L1 = LHS->getOperand(1); 8312 8313 // The rest of the logic assumes the LHS condition is true. If that's not the 8314 // case, invert the predicate to make it so. 8315 CmpInst::Predicate LPred = 8316 LHSIsTrue ? LHS->getPredicate() : LHS->getInversePredicate(); 8317 8318 // Can we infer anything when the 0-operands match and the 1-operands are 8319 // constants (not necessarily matching)? 8320 const APInt *LC, *RC; 8321 if (L0 == R0 && match(L1, m_APInt(LC)) && match(R1, m_APInt(RC))) 8322 return isImpliedCondCommonOperandWithConstants(LPred, *LC, RPred, *RC); 8323 8324 // Can we infer anything when the two compares have matching operands? 8325 bool AreSwappedOps; 8326 if (areMatchingOperands(L0, L1, R0, R1, AreSwappedOps)) 8327 return isImpliedCondMatchingOperands(LPred, RPred, AreSwappedOps); 8328 8329 // L0 = R0 = L1 + R1, L0 >=u L1 implies R0 >=u R1, L0 <u L1 implies R0 <u R1 8330 if (ICmpInst::isUnsigned(LPred) && ICmpInst::isUnsigned(RPred)) { 8331 if (L0 == R1) { 8332 std::swap(R0, R1); 8333 RPred = ICmpInst::getSwappedPredicate(RPred); 8334 } 8335 if (L1 == R0) { 8336 std::swap(L0, L1); 8337 LPred = ICmpInst::getSwappedPredicate(LPred); 8338 } 8339 if (L1 == R1) { 8340 std::swap(L0, L1); 8341 LPred = ICmpInst::getSwappedPredicate(LPred); 8342 std::swap(R0, R1); 8343 RPred = ICmpInst::getSwappedPredicate(RPred); 8344 } 8345 if (L0 == R0 && 8346 (LPred == ICmpInst::ICMP_ULT || LPred == ICmpInst::ICMP_UGE) && 8347 (RPred == ICmpInst::ICMP_ULT || RPred == ICmpInst::ICMP_UGE) && 8348 match(L0, m_c_Add(m_Specific(L1), m_Specific(R1)))) 8349 return LPred == RPred; 8350 } 8351 8352 if (LPred == RPred) 8353 return isImpliedCondOperands(LPred, L0, L1, R0, R1, DL, Depth); 8354 8355 return std::nullopt; 8356 } 8357 8358 /// Return true if LHS implies RHS is true. Return false if LHS implies RHS is 8359 /// false. Otherwise, return std::nullopt if we can't infer anything. We 8360 /// expect the RHS to be an icmp and the LHS to be an 'and', 'or', or a 'select' 8361 /// instruction. 8362 static std::optional<bool> 8363 isImpliedCondAndOr(const Instruction *LHS, CmpInst::Predicate RHSPred, 8364 const Value *RHSOp0, const Value *RHSOp1, 8365 const DataLayout &DL, bool LHSIsTrue, unsigned Depth) { 8366 // The LHS must be an 'or', 'and', or a 'select' instruction. 8367 assert((LHS->getOpcode() == Instruction::And || 8368 LHS->getOpcode() == Instruction::Or || 8369 LHS->getOpcode() == Instruction::Select) && 8370 "Expected LHS to be 'and', 'or', or 'select'."); 8371 8372 assert(Depth <= MaxAnalysisRecursionDepth && "Hit recursion limit"); 8373 8374 // If the result of an 'or' is false, then we know both legs of the 'or' are 8375 // false. Similarly, if the result of an 'and' is true, then we know both 8376 // legs of the 'and' are true. 8377 const Value *ALHS, *ARHS; 8378 if ((!LHSIsTrue && match(LHS, m_LogicalOr(m_Value(ALHS), m_Value(ARHS)))) || 8379 (LHSIsTrue && match(LHS, m_LogicalAnd(m_Value(ALHS), m_Value(ARHS))))) { 8380 // FIXME: Make this non-recursion. 8381 if (std::optional<bool> Implication = isImpliedCondition( 8382 ALHS, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, Depth + 1)) 8383 return Implication; 8384 if (std::optional<bool> Implication = isImpliedCondition( 8385 ARHS, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, Depth + 1)) 8386 return Implication; 8387 return std::nullopt; 8388 } 8389 return std::nullopt; 8390 } 8391 8392 std::optional<bool> 8393 llvm::isImpliedCondition(const Value *LHS, CmpInst::Predicate RHSPred, 8394 const Value *RHSOp0, const Value *RHSOp1, 8395 const DataLayout &DL, bool LHSIsTrue, unsigned Depth) { 8396 // Bail out when we hit the limit. 8397 if (Depth == MaxAnalysisRecursionDepth) 8398 return std::nullopt; 8399 8400 // A mismatch occurs when we compare a scalar cmp to a vector cmp, for 8401 // example. 8402 if (RHSOp0->getType()->isVectorTy() != LHS->getType()->isVectorTy()) 8403 return std::nullopt; 8404 8405 assert(LHS->getType()->isIntOrIntVectorTy(1) && 8406 "Expected integer type only!"); 8407 8408 // Both LHS and RHS are icmps. 8409 const ICmpInst *LHSCmp = dyn_cast<ICmpInst>(LHS); 8410 if (LHSCmp) 8411 return isImpliedCondICmps(LHSCmp, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, 8412 Depth); 8413 8414 /// The LHS should be an 'or', 'and', or a 'select' instruction. We expect 8415 /// the RHS to be an icmp. 8416 /// FIXME: Add support for and/or/select on the RHS. 8417 if (const Instruction *LHSI = dyn_cast<Instruction>(LHS)) { 8418 if ((LHSI->getOpcode() == Instruction::And || 8419 LHSI->getOpcode() == Instruction::Or || 8420 LHSI->getOpcode() == Instruction::Select)) 8421 return isImpliedCondAndOr(LHSI, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, 8422 Depth); 8423 } 8424 return std::nullopt; 8425 } 8426 8427 std::optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS, 8428 const DataLayout &DL, 8429 bool LHSIsTrue, unsigned Depth) { 8430 // LHS ==> RHS by definition 8431 if (LHS == RHS) 8432 return LHSIsTrue; 8433 8434 if (const ICmpInst *RHSCmp = dyn_cast<ICmpInst>(RHS)) 8435 return isImpliedCondition(LHS, RHSCmp->getPredicate(), 8436 RHSCmp->getOperand(0), RHSCmp->getOperand(1), DL, 8437 LHSIsTrue, Depth); 8438 8439 if (Depth == MaxAnalysisRecursionDepth) 8440 return std::nullopt; 8441 8442 // LHS ==> (RHS1 || RHS2) if LHS ==> RHS1 or LHS ==> RHS2 8443 // LHS ==> !(RHS1 && RHS2) if LHS ==> !RHS1 or LHS ==> !RHS2 8444 const Value *RHS1, *RHS2; 8445 if (match(RHS, m_LogicalOr(m_Value(RHS1), m_Value(RHS2)))) { 8446 if (std::optional<bool> Imp = 8447 isImpliedCondition(LHS, RHS1, DL, LHSIsTrue, Depth + 1)) 8448 if (*Imp == true) 8449 return true; 8450 if (std::optional<bool> Imp = 8451 isImpliedCondition(LHS, RHS2, DL, LHSIsTrue, Depth + 1)) 8452 if (*Imp == true) 8453 return true; 8454 } 8455 if (match(RHS, m_LogicalAnd(m_Value(RHS1), m_Value(RHS2)))) { 8456 if (std::optional<bool> Imp = 8457 isImpliedCondition(LHS, RHS1, DL, LHSIsTrue, Depth + 1)) 8458 if (*Imp == false) 8459 return false; 8460 if (std::optional<bool> Imp = 8461 isImpliedCondition(LHS, RHS2, DL, LHSIsTrue, Depth + 1)) 8462 if (*Imp == false) 8463 return false; 8464 } 8465 8466 return std::nullopt; 8467 } 8468 8469 // Returns a pair (Condition, ConditionIsTrue), where Condition is a branch 8470 // condition dominating ContextI or nullptr, if no condition is found. 8471 static std::pair<Value *, bool> 8472 getDomPredecessorCondition(const Instruction *ContextI) { 8473 if (!ContextI || !ContextI->getParent()) 8474 return {nullptr, false}; 8475 8476 // TODO: This is a poor/cheap way to determine dominance. Should we use a 8477 // dominator tree (eg, from a SimplifyQuery) instead? 8478 const BasicBlock *ContextBB = ContextI->getParent(); 8479 const BasicBlock *PredBB = ContextBB->getSinglePredecessor(); 8480 if (!PredBB) 8481 return {nullptr, false}; 8482 8483 // We need a conditional branch in the predecessor. 8484 Value *PredCond; 8485 BasicBlock *TrueBB, *FalseBB; 8486 if (!match(PredBB->getTerminator(), m_Br(m_Value(PredCond), TrueBB, FalseBB))) 8487 return {nullptr, false}; 8488 8489 // The branch should get simplified. Don't bother simplifying this condition. 8490 if (TrueBB == FalseBB) 8491 return {nullptr, false}; 8492 8493 assert((TrueBB == ContextBB || FalseBB == ContextBB) && 8494 "Predecessor block does not point to successor?"); 8495 8496 // Is this condition implied by the predecessor condition? 8497 return {PredCond, TrueBB == ContextBB}; 8498 } 8499 8500 std::optional<bool> llvm::isImpliedByDomCondition(const Value *Cond, 8501 const Instruction *ContextI, 8502 const DataLayout &DL) { 8503 assert(Cond->getType()->isIntOrIntVectorTy(1) && "Condition must be bool"); 8504 auto PredCond = getDomPredecessorCondition(ContextI); 8505 if (PredCond.first) 8506 return isImpliedCondition(PredCond.first, Cond, DL, PredCond.second); 8507 return std::nullopt; 8508 } 8509 8510 std::optional<bool> llvm::isImpliedByDomCondition(CmpInst::Predicate Pred, 8511 const Value *LHS, 8512 const Value *RHS, 8513 const Instruction *ContextI, 8514 const DataLayout &DL) { 8515 auto PredCond = getDomPredecessorCondition(ContextI); 8516 if (PredCond.first) 8517 return isImpliedCondition(PredCond.first, Pred, LHS, RHS, DL, 8518 PredCond.second); 8519 return std::nullopt; 8520 } 8521 8522 static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower, 8523 APInt &Upper, const InstrInfoQuery &IIQ, 8524 bool PreferSignedRange) { 8525 unsigned Width = Lower.getBitWidth(); 8526 const APInt *C; 8527 switch (BO.getOpcode()) { 8528 case Instruction::Add: 8529 if (match(BO.getOperand(1), m_APInt(C)) && !C->isZero()) { 8530 bool HasNSW = IIQ.hasNoSignedWrap(&BO); 8531 bool HasNUW = IIQ.hasNoUnsignedWrap(&BO); 8532 8533 // If the caller expects a signed compare, then try to use a signed range. 8534 // Otherwise if both no-wraps are set, use the unsigned range because it 8535 // is never larger than the signed range. Example: 8536 // "add nuw nsw i8 X, -2" is unsigned [254,255] vs. signed [-128, 125]. 8537 if (PreferSignedRange && HasNSW && HasNUW) 8538 HasNUW = false; 8539 8540 if (HasNUW) { 8541 // 'add nuw x, C' produces [C, UINT_MAX]. 8542 Lower = *C; 8543 } else if (HasNSW) { 8544 if (C->isNegative()) { 8545 // 'add nsw x, -C' produces [SINT_MIN, SINT_MAX - C]. 8546 Lower = APInt::getSignedMinValue(Width); 8547 Upper = APInt::getSignedMaxValue(Width) + *C + 1; 8548 } else { 8549 // 'add nsw x, +C' produces [SINT_MIN + C, SINT_MAX]. 8550 Lower = APInt::getSignedMinValue(Width) + *C; 8551 Upper = APInt::getSignedMaxValue(Width) + 1; 8552 } 8553 } 8554 } 8555 break; 8556 8557 case Instruction::And: 8558 if (match(BO.getOperand(1), m_APInt(C))) 8559 // 'and x, C' produces [0, C]. 8560 Upper = *C + 1; 8561 // X & -X is a power of two or zero. So we can cap the value at max power of 8562 // two. 8563 if (match(BO.getOperand(0), m_Neg(m_Specific(BO.getOperand(1)))) || 8564 match(BO.getOperand(1), m_Neg(m_Specific(BO.getOperand(0))))) 8565 Upper = APInt::getSignedMinValue(Width) + 1; 8566 break; 8567 8568 case Instruction::Or: 8569 if (match(BO.getOperand(1), m_APInt(C))) 8570 // 'or x, C' produces [C, UINT_MAX]. 8571 Lower = *C; 8572 break; 8573 8574 case Instruction::AShr: 8575 if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) { 8576 // 'ashr x, C' produces [INT_MIN >> C, INT_MAX >> C]. 8577 Lower = APInt::getSignedMinValue(Width).ashr(*C); 8578 Upper = APInt::getSignedMaxValue(Width).ashr(*C) + 1; 8579 } else if (match(BO.getOperand(0), m_APInt(C))) { 8580 unsigned ShiftAmount = Width - 1; 8581 if (!C->isZero() && IIQ.isExact(&BO)) 8582 ShiftAmount = C->countr_zero(); 8583 if (C->isNegative()) { 8584 // 'ashr C, x' produces [C, C >> (Width-1)] 8585 Lower = *C; 8586 Upper = C->ashr(ShiftAmount) + 1; 8587 } else { 8588 // 'ashr C, x' produces [C >> (Width-1), C] 8589 Lower = C->ashr(ShiftAmount); 8590 Upper = *C + 1; 8591 } 8592 } 8593 break; 8594 8595 case Instruction::LShr: 8596 if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) { 8597 // 'lshr x, C' produces [0, UINT_MAX >> C]. 8598 Upper = APInt::getAllOnes(Width).lshr(*C) + 1; 8599 } else if (match(BO.getOperand(0), m_APInt(C))) { 8600 // 'lshr C, x' produces [C >> (Width-1), C]. 8601 unsigned ShiftAmount = Width - 1; 8602 if (!C->isZero() && IIQ.isExact(&BO)) 8603 ShiftAmount = C->countr_zero(); 8604 Lower = C->lshr(ShiftAmount); 8605 Upper = *C + 1; 8606 } 8607 break; 8608 8609 case Instruction::Shl: 8610 if (match(BO.getOperand(0), m_APInt(C))) { 8611 if (IIQ.hasNoUnsignedWrap(&BO)) { 8612 // 'shl nuw C, x' produces [C, C << CLZ(C)] 8613 Lower = *C; 8614 Upper = Lower.shl(Lower.countl_zero()) + 1; 8615 } else if (BO.hasNoSignedWrap()) { // TODO: What if both nuw+nsw? 8616 if (C->isNegative()) { 8617 // 'shl nsw C, x' produces [C << CLO(C)-1, C] 8618 unsigned ShiftAmount = C->countl_one() - 1; 8619 Lower = C->shl(ShiftAmount); 8620 Upper = *C + 1; 8621 } else { 8622 // 'shl nsw C, x' produces [C, C << CLZ(C)-1] 8623 unsigned ShiftAmount = C->countl_zero() - 1; 8624 Lower = *C; 8625 Upper = C->shl(ShiftAmount) + 1; 8626 } 8627 } else { 8628 // If lowbit is set, value can never be zero. 8629 if ((*C)[0]) 8630 Lower = APInt::getOneBitSet(Width, 0); 8631 // If we are shifting a constant the largest it can be is if the longest 8632 // sequence of consecutive ones is shifted to the highbits (breaking 8633 // ties for which sequence is higher). At the moment we take a liberal 8634 // upper bound on this by just popcounting the constant. 8635 // TODO: There may be a bitwise trick for it longest/highest 8636 // consecutative sequence of ones (naive method is O(Width) loop). 8637 Upper = APInt::getHighBitsSet(Width, C->popcount()) + 1; 8638 } 8639 } else if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) { 8640 Upper = APInt::getBitsSetFrom(Width, C->getZExtValue()) + 1; 8641 } 8642 break; 8643 8644 case Instruction::SDiv: 8645 if (match(BO.getOperand(1), m_APInt(C))) { 8646 APInt IntMin = APInt::getSignedMinValue(Width); 8647 APInt IntMax = APInt::getSignedMaxValue(Width); 8648 if (C->isAllOnes()) { 8649 // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX] 8650 // where C != -1 and C != 0 and C != 1 8651 Lower = IntMin + 1; 8652 Upper = IntMax + 1; 8653 } else if (C->countl_zero() < Width - 1) { 8654 // 'sdiv x, C' produces [INT_MIN / C, INT_MAX / C] 8655 // where C != -1 and C != 0 and C != 1 8656 Lower = IntMin.sdiv(*C); 8657 Upper = IntMax.sdiv(*C); 8658 if (Lower.sgt(Upper)) 8659 std::swap(Lower, Upper); 8660 Upper = Upper + 1; 8661 assert(Upper != Lower && "Upper part of range has wrapped!"); 8662 } 8663 } else if (match(BO.getOperand(0), m_APInt(C))) { 8664 if (C->isMinSignedValue()) { 8665 // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2]. 8666 Lower = *C; 8667 Upper = Lower.lshr(1) + 1; 8668 } else { 8669 // 'sdiv C, x' produces [-|C|, |C|]. 8670 Upper = C->abs() + 1; 8671 Lower = (-Upper) + 1; 8672 } 8673 } 8674 break; 8675 8676 case Instruction::UDiv: 8677 if (match(BO.getOperand(1), m_APInt(C)) && !C->isZero()) { 8678 // 'udiv x, C' produces [0, UINT_MAX / C]. 8679 Upper = APInt::getMaxValue(Width).udiv(*C) + 1; 8680 } else if (match(BO.getOperand(0), m_APInt(C))) { 8681 // 'udiv C, x' produces [0, C]. 8682 Upper = *C + 1; 8683 } 8684 break; 8685 8686 case Instruction::SRem: 8687 if (match(BO.getOperand(1), m_APInt(C))) { 8688 // 'srem x, C' produces (-|C|, |C|). 8689 Upper = C->abs(); 8690 Lower = (-Upper) + 1; 8691 } 8692 break; 8693 8694 case Instruction::URem: 8695 if (match(BO.getOperand(1), m_APInt(C))) 8696 // 'urem x, C' produces [0, C). 8697 Upper = *C; 8698 break; 8699 8700 default: 8701 break; 8702 } 8703 } 8704 8705 static ConstantRange getRangeForIntrinsic(const IntrinsicInst &II) { 8706 unsigned Width = II.getType()->getScalarSizeInBits(); 8707 const APInt *C; 8708 switch (II.getIntrinsicID()) { 8709 case Intrinsic::ctpop: 8710 case Intrinsic::ctlz: 8711 case Intrinsic::cttz: 8712 // Maximum of set/clear bits is the bit width. 8713 return ConstantRange::getNonEmpty(APInt::getZero(Width), 8714 APInt(Width, Width + 1)); 8715 case Intrinsic::uadd_sat: 8716 // uadd.sat(x, C) produces [C, UINT_MAX]. 8717 if (match(II.getOperand(0), m_APInt(C)) || 8718 match(II.getOperand(1), m_APInt(C))) 8719 return ConstantRange::getNonEmpty(*C, APInt::getZero(Width)); 8720 break; 8721 case Intrinsic::sadd_sat: 8722 if (match(II.getOperand(0), m_APInt(C)) || 8723 match(II.getOperand(1), m_APInt(C))) { 8724 if (C->isNegative()) 8725 // sadd.sat(x, -C) produces [SINT_MIN, SINT_MAX + (-C)]. 8726 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width), 8727 APInt::getSignedMaxValue(Width) + *C + 8728 1); 8729 8730 // sadd.sat(x, +C) produces [SINT_MIN + C, SINT_MAX]. 8731 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width) + *C, 8732 APInt::getSignedMaxValue(Width) + 1); 8733 } 8734 break; 8735 case Intrinsic::usub_sat: 8736 // usub.sat(C, x) produces [0, C]. 8737 if (match(II.getOperand(0), m_APInt(C))) 8738 return ConstantRange::getNonEmpty(APInt::getZero(Width), *C + 1); 8739 8740 // usub.sat(x, C) produces [0, UINT_MAX - C]. 8741 if (match(II.getOperand(1), m_APInt(C))) 8742 return ConstantRange::getNonEmpty(APInt::getZero(Width), 8743 APInt::getMaxValue(Width) - *C + 1); 8744 break; 8745 case Intrinsic::ssub_sat: 8746 if (match(II.getOperand(0), m_APInt(C))) { 8747 if (C->isNegative()) 8748 // ssub.sat(-C, x) produces [SINT_MIN, -SINT_MIN + (-C)]. 8749 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width), 8750 *C - APInt::getSignedMinValue(Width) + 8751 1); 8752 8753 // ssub.sat(+C, x) produces [-SINT_MAX + C, SINT_MAX]. 8754 return ConstantRange::getNonEmpty(*C - APInt::getSignedMaxValue(Width), 8755 APInt::getSignedMaxValue(Width) + 1); 8756 } else if (match(II.getOperand(1), m_APInt(C))) { 8757 if (C->isNegative()) 8758 // ssub.sat(x, -C) produces [SINT_MIN - (-C), SINT_MAX]: 8759 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width) - *C, 8760 APInt::getSignedMaxValue(Width) + 1); 8761 8762 // ssub.sat(x, +C) produces [SINT_MIN, SINT_MAX - C]. 8763 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width), 8764 APInt::getSignedMaxValue(Width) - *C + 8765 1); 8766 } 8767 break; 8768 case Intrinsic::umin: 8769 case Intrinsic::umax: 8770 case Intrinsic::smin: 8771 case Intrinsic::smax: 8772 if (!match(II.getOperand(0), m_APInt(C)) && 8773 !match(II.getOperand(1), m_APInt(C))) 8774 break; 8775 8776 switch (II.getIntrinsicID()) { 8777 case Intrinsic::umin: 8778 return ConstantRange::getNonEmpty(APInt::getZero(Width), *C + 1); 8779 case Intrinsic::umax: 8780 return ConstantRange::getNonEmpty(*C, APInt::getZero(Width)); 8781 case Intrinsic::smin: 8782 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width), 8783 *C + 1); 8784 case Intrinsic::smax: 8785 return ConstantRange::getNonEmpty(*C, 8786 APInt::getSignedMaxValue(Width) + 1); 8787 default: 8788 llvm_unreachable("Must be min/max intrinsic"); 8789 } 8790 break; 8791 case Intrinsic::abs: 8792 // If abs of SIGNED_MIN is poison, then the result is [0..SIGNED_MAX], 8793 // otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN. 8794 if (match(II.getOperand(1), m_One())) 8795 return ConstantRange::getNonEmpty(APInt::getZero(Width), 8796 APInt::getSignedMaxValue(Width) + 1); 8797 8798 return ConstantRange::getNonEmpty(APInt::getZero(Width), 8799 APInt::getSignedMinValue(Width) + 1); 8800 case Intrinsic::vscale: 8801 if (!II.getParent() || !II.getFunction()) 8802 break; 8803 return getVScaleRange(II.getFunction(), Width); 8804 default: 8805 break; 8806 } 8807 8808 return ConstantRange::getFull(Width); 8809 } 8810 8811 static ConstantRange getRangeForSelectPattern(const SelectInst &SI, 8812 const InstrInfoQuery &IIQ) { 8813 unsigned BitWidth = SI.getType()->getScalarSizeInBits(); 8814 const Value *LHS = nullptr, *RHS = nullptr; 8815 SelectPatternResult R = matchSelectPattern(&SI, LHS, RHS); 8816 if (R.Flavor == SPF_UNKNOWN) 8817 return ConstantRange::getFull(BitWidth); 8818 8819 if (R.Flavor == SelectPatternFlavor::SPF_ABS) { 8820 // If the negation part of the abs (in RHS) has the NSW flag, 8821 // then the result of abs(X) is [0..SIGNED_MAX], 8822 // otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN. 8823 if (match(RHS, m_Neg(m_Specific(LHS))) && 8824 IIQ.hasNoSignedWrap(cast<Instruction>(RHS))) 8825 return ConstantRange::getNonEmpty(APInt::getZero(BitWidth), 8826 APInt::getSignedMaxValue(BitWidth) + 1); 8827 8828 return ConstantRange::getNonEmpty(APInt::getZero(BitWidth), 8829 APInt::getSignedMinValue(BitWidth) + 1); 8830 } 8831 8832 if (R.Flavor == SelectPatternFlavor::SPF_NABS) { 8833 // The result of -abs(X) is <= 0. 8834 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(BitWidth), 8835 APInt(BitWidth, 1)); 8836 } 8837 8838 const APInt *C; 8839 if (!match(LHS, m_APInt(C)) && !match(RHS, m_APInt(C))) 8840 return ConstantRange::getFull(BitWidth); 8841 8842 switch (R.Flavor) { 8843 case SPF_UMIN: 8844 return ConstantRange::getNonEmpty(APInt::getZero(BitWidth), *C + 1); 8845 case SPF_UMAX: 8846 return ConstantRange::getNonEmpty(*C, APInt::getZero(BitWidth)); 8847 case SPF_SMIN: 8848 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(BitWidth), 8849 *C + 1); 8850 case SPF_SMAX: 8851 return ConstantRange::getNonEmpty(*C, 8852 APInt::getSignedMaxValue(BitWidth) + 1); 8853 default: 8854 return ConstantRange::getFull(BitWidth); 8855 } 8856 } 8857 8858 static void setLimitForFPToI(const Instruction *I, APInt &Lower, APInt &Upper) { 8859 // The maximum representable value of a half is 65504. For floats the maximum 8860 // value is 3.4e38 which requires roughly 129 bits. 8861 unsigned BitWidth = I->getType()->getScalarSizeInBits(); 8862 if (!I->getOperand(0)->getType()->getScalarType()->isHalfTy()) 8863 return; 8864 if (isa<FPToSIInst>(I) && BitWidth >= 17) { 8865 Lower = APInt(BitWidth, -65504); 8866 Upper = APInt(BitWidth, 65505); 8867 } 8868 8869 if (isa<FPToUIInst>(I) && BitWidth >= 16) { 8870 // For a fptoui the lower limit is left as 0. 8871 Upper = APInt(BitWidth, 65505); 8872 } 8873 } 8874 8875 ConstantRange llvm::computeConstantRange(const Value *V, bool ForSigned, 8876 bool UseInstrInfo, AssumptionCache *AC, 8877 const Instruction *CtxI, 8878 const DominatorTree *DT, 8879 unsigned Depth) { 8880 assert(V->getType()->isIntOrIntVectorTy() && "Expected integer instruction"); 8881 8882 if (Depth == MaxAnalysisRecursionDepth) 8883 return ConstantRange::getFull(V->getType()->getScalarSizeInBits()); 8884 8885 const APInt *C; 8886 if (match(V, m_APInt(C))) 8887 return ConstantRange(*C); 8888 unsigned BitWidth = V->getType()->getScalarSizeInBits(); 8889 8890 if (auto *VC = dyn_cast<ConstantDataVector>(V)) { 8891 ConstantRange CR = ConstantRange::getEmpty(BitWidth); 8892 for (unsigned ElemIdx = 0, NElem = VC->getNumElements(); ElemIdx < NElem; 8893 ++ElemIdx) 8894 CR = CR.unionWith(VC->getElementAsAPInt(ElemIdx)); 8895 return CR; 8896 } 8897 8898 InstrInfoQuery IIQ(UseInstrInfo); 8899 ConstantRange CR = ConstantRange::getFull(BitWidth); 8900 if (auto *BO = dyn_cast<BinaryOperator>(V)) { 8901 APInt Lower = APInt(BitWidth, 0); 8902 APInt Upper = APInt(BitWidth, 0); 8903 // TODO: Return ConstantRange. 8904 setLimitsForBinOp(*BO, Lower, Upper, IIQ, ForSigned); 8905 CR = ConstantRange::getNonEmpty(Lower, Upper); 8906 } else if (auto *II = dyn_cast<IntrinsicInst>(V)) 8907 CR = getRangeForIntrinsic(*II); 8908 else if (auto *SI = dyn_cast<SelectInst>(V)) { 8909 ConstantRange CRTrue = computeConstantRange( 8910 SI->getTrueValue(), ForSigned, UseInstrInfo, AC, CtxI, DT, Depth + 1); 8911 ConstantRange CRFalse = computeConstantRange( 8912 SI->getFalseValue(), ForSigned, UseInstrInfo, AC, CtxI, DT, Depth + 1); 8913 CR = CRTrue.unionWith(CRFalse); 8914 CR = CR.intersectWith(getRangeForSelectPattern(*SI, IIQ)); 8915 } else if (isa<FPToUIInst>(V) || isa<FPToSIInst>(V)) { 8916 APInt Lower = APInt(BitWidth, 0); 8917 APInt Upper = APInt(BitWidth, 0); 8918 // TODO: Return ConstantRange. 8919 setLimitForFPToI(cast<Instruction>(V), Lower, Upper); 8920 CR = ConstantRange::getNonEmpty(Lower, Upper); 8921 } 8922 8923 if (auto *I = dyn_cast<Instruction>(V)) 8924 if (auto *Range = IIQ.getMetadata(I, LLVMContext::MD_range)) 8925 CR = CR.intersectWith(getConstantRangeFromMetadata(*Range)); 8926 8927 if (CtxI && AC) { 8928 // Try to restrict the range based on information from assumptions. 8929 for (auto &AssumeVH : AC->assumptionsFor(V)) { 8930 if (!AssumeVH) 8931 continue; 8932 CallInst *I = cast<CallInst>(AssumeVH); 8933 assert(I->getParent()->getParent() == CtxI->getParent()->getParent() && 8934 "Got assumption for the wrong function!"); 8935 assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume && 8936 "must be an assume intrinsic"); 8937 8938 if (!isValidAssumeForContext(I, CtxI, DT)) 8939 continue; 8940 Value *Arg = I->getArgOperand(0); 8941 ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg); 8942 // Currently we just use information from comparisons. 8943 if (!Cmp || Cmp->getOperand(0) != V) 8944 continue; 8945 // TODO: Set "ForSigned" parameter via Cmp->isSigned()? 8946 ConstantRange RHS = 8947 computeConstantRange(Cmp->getOperand(1), /* ForSigned */ false, 8948 UseInstrInfo, AC, I, DT, Depth + 1); 8949 CR = CR.intersectWith( 8950 ConstantRange::makeAllowedICmpRegion(Cmp->getPredicate(), RHS)); 8951 } 8952 } 8953 8954 return CR; 8955 } 8956