1 //===-- ConstantFolding.cpp - Fold instructions into constants ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines routines for folding instructions into constants. 10 // 11 // Also, to supplement the basic IR ConstantExpr simplifications, 12 // this file defines some additional folding routines that can make use of 13 // DataLayout information. These functions cannot go in IR due to library 14 // dependency issues. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "llvm/Analysis/ConstantFolding.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/APSInt.h" 22 #include "llvm/ADT/ArrayRef.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/SmallVector.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/Analysis/TargetFolder.h" 28 #include "llvm/Analysis/TargetLibraryInfo.h" 29 #include "llvm/Analysis/ValueTracking.h" 30 #include "llvm/Analysis/VectorUtils.h" 31 #include "llvm/Config/config.h" 32 #include "llvm/IR/Constant.h" 33 #include "llvm/IR/ConstantFold.h" 34 #include "llvm/IR/Constants.h" 35 #include "llvm/IR/DataLayout.h" 36 #include "llvm/IR/DerivedTypes.h" 37 #include "llvm/IR/Function.h" 38 #include "llvm/IR/GlobalValue.h" 39 #include "llvm/IR/GlobalVariable.h" 40 #include "llvm/IR/InstrTypes.h" 41 #include "llvm/IR/Instruction.h" 42 #include "llvm/IR/Instructions.h" 43 #include "llvm/IR/IntrinsicInst.h" 44 #include "llvm/IR/Intrinsics.h" 45 #include "llvm/IR/IntrinsicsAArch64.h" 46 #include "llvm/IR/IntrinsicsAMDGPU.h" 47 #include "llvm/IR/IntrinsicsARM.h" 48 #include "llvm/IR/IntrinsicsNVPTX.h" 49 #include "llvm/IR/IntrinsicsWebAssembly.h" 50 #include "llvm/IR/IntrinsicsX86.h" 51 #include "llvm/IR/NVVMIntrinsicUtils.h" 52 #include "llvm/IR/Operator.h" 53 #include "llvm/IR/Type.h" 54 #include "llvm/IR/Value.h" 55 #include "llvm/Support/Casting.h" 56 #include "llvm/Support/ErrorHandling.h" 57 #include "llvm/Support/KnownBits.h" 58 #include "llvm/Support/MathExtras.h" 59 #include <cassert> 60 #include <cerrno> 61 #include <cfenv> 62 #include <cmath> 63 #include <cstdint> 64 65 using namespace llvm; 66 67 static cl::opt<bool> DisableFPCallFolding( 68 "disable-fp-call-folding", 69 cl::desc("Disable constant-folding of FP intrinsics and libcalls."), 70 cl::init(false), cl::Hidden); 71 72 namespace { 73 74 //===----------------------------------------------------------------------===// 75 // Constant Folding internal helper functions 76 //===----------------------------------------------------------------------===// 77 78 static Constant *foldConstVectorToAPInt(APInt &Result, Type *DestTy, 79 Constant *C, Type *SrcEltTy, 80 unsigned NumSrcElts, 81 const DataLayout &DL) { 82 // Now that we know that the input value is a vector of integers, just shift 83 // and insert them into our result. 84 unsigned BitShift = DL.getTypeSizeInBits(SrcEltTy); 85 for (unsigned i = 0; i != NumSrcElts; ++i) { 86 Constant *Element; 87 if (DL.isLittleEndian()) 88 Element = C->getAggregateElement(NumSrcElts - i - 1); 89 else 90 Element = C->getAggregateElement(i); 91 92 if (isa_and_nonnull<UndefValue>(Element)) { 93 Result <<= BitShift; 94 continue; 95 } 96 97 auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element); 98 if (!ElementCI) 99 return ConstantExpr::getBitCast(C, DestTy); 100 101 Result <<= BitShift; 102 Result |= ElementCI->getValue().zext(Result.getBitWidth()); 103 } 104 105 return nullptr; 106 } 107 108 /// Constant fold bitcast, symbolically evaluating it with DataLayout. 109 /// This always returns a non-null constant, but it may be a 110 /// ConstantExpr if unfoldable. 111 Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) { 112 assert(CastInst::castIsValid(Instruction::BitCast, C, DestTy) && 113 "Invalid constantexpr bitcast!"); 114 115 // Catch the obvious splat cases. 116 if (Constant *Res = ConstantFoldLoadFromUniformValue(C, DestTy, DL)) 117 return Res; 118 119 if (auto *VTy = dyn_cast<VectorType>(C->getType())) { 120 // Handle a vector->scalar integer/fp cast. 121 if (isa<IntegerType>(DestTy) || DestTy->isFloatingPointTy()) { 122 unsigned NumSrcElts = cast<FixedVectorType>(VTy)->getNumElements(); 123 Type *SrcEltTy = VTy->getElementType(); 124 125 // If the vector is a vector of floating point, convert it to vector of int 126 // to simplify things. 127 if (SrcEltTy->isFloatingPointTy()) { 128 unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits(); 129 auto *SrcIVTy = FixedVectorType::get( 130 IntegerType::get(C->getContext(), FPWidth), NumSrcElts); 131 // Ask IR to do the conversion now that #elts line up. 132 C = ConstantExpr::getBitCast(C, SrcIVTy); 133 } 134 135 APInt Result(DL.getTypeSizeInBits(DestTy), 0); 136 if (Constant *CE = foldConstVectorToAPInt(Result, DestTy, C, 137 SrcEltTy, NumSrcElts, DL)) 138 return CE; 139 140 if (isa<IntegerType>(DestTy)) 141 return ConstantInt::get(DestTy, Result); 142 143 APFloat FP(DestTy->getFltSemantics(), Result); 144 return ConstantFP::get(DestTy->getContext(), FP); 145 } 146 } 147 148 // The code below only handles casts to vectors currently. 149 auto *DestVTy = dyn_cast<VectorType>(DestTy); 150 if (!DestVTy) 151 return ConstantExpr::getBitCast(C, DestTy); 152 153 // If this is a scalar -> vector cast, convert the input into a <1 x scalar> 154 // vector so the code below can handle it uniformly. 155 if (!isa<VectorType>(C->getType()) && 156 (isa<ConstantFP>(C) || isa<ConstantInt>(C))) { 157 Constant *Ops = C; // don't take the address of C! 158 return FoldBitCast(ConstantVector::get(Ops), DestTy, DL); 159 } 160 161 // Some of what follows may extend to cover scalable vectors but the current 162 // implementation is fixed length specific. 163 if (!isa<FixedVectorType>(C->getType())) 164 return ConstantExpr::getBitCast(C, DestTy); 165 166 // If this is a bitcast from constant vector -> vector, fold it. 167 if (!isa<ConstantDataVector>(C) && !isa<ConstantVector>(C) && 168 !isa<ConstantInt>(C) && !isa<ConstantFP>(C)) 169 return ConstantExpr::getBitCast(C, DestTy); 170 171 // If the element types match, IR can fold it. 172 unsigned NumDstElt = cast<FixedVectorType>(DestVTy)->getNumElements(); 173 unsigned NumSrcElt = cast<FixedVectorType>(C->getType())->getNumElements(); 174 if (NumDstElt == NumSrcElt) 175 return ConstantExpr::getBitCast(C, DestTy); 176 177 Type *SrcEltTy = cast<VectorType>(C->getType())->getElementType(); 178 Type *DstEltTy = DestVTy->getElementType(); 179 180 // Otherwise, we're changing the number of elements in a vector, which 181 // requires endianness information to do the right thing. For example, 182 // bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>) 183 // folds to (little endian): 184 // <4 x i32> <i32 0, i32 0, i32 1, i32 0> 185 // and to (big endian): 186 // <4 x i32> <i32 0, i32 0, i32 0, i32 1> 187 188 // First thing is first. We only want to think about integer here, so if 189 // we have something in FP form, recast it as integer. 190 if (DstEltTy->isFloatingPointTy()) { 191 // Fold to an vector of integers with same size as our FP type. 192 unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits(); 193 auto *DestIVTy = FixedVectorType::get( 194 IntegerType::get(C->getContext(), FPWidth), NumDstElt); 195 // Recursively handle this integer conversion, if possible. 196 C = FoldBitCast(C, DestIVTy, DL); 197 198 // Finally, IR can handle this now that #elts line up. 199 return ConstantExpr::getBitCast(C, DestTy); 200 } 201 202 // Okay, we know the destination is integer, if the input is FP, convert 203 // it to integer first. 204 if (SrcEltTy->isFloatingPointTy()) { 205 unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits(); 206 auto *SrcIVTy = FixedVectorType::get( 207 IntegerType::get(C->getContext(), FPWidth), NumSrcElt); 208 // Ask IR to do the conversion now that #elts line up. 209 C = ConstantExpr::getBitCast(C, SrcIVTy); 210 assert((isa<ConstantVector>(C) || // FIXME: Remove ConstantVector. 211 isa<ConstantDataVector>(C) || isa<ConstantInt>(C)) && 212 "Constant folding cannot fail for plain fp->int bitcast!"); 213 } 214 215 // Now we know that the input and output vectors are both integer vectors 216 // of the same size, and that their #elements is not the same. Do the 217 // conversion here, which depends on whether the input or output has 218 // more elements. 219 bool isLittleEndian = DL.isLittleEndian(); 220 221 SmallVector<Constant*, 32> Result; 222 if (NumDstElt < NumSrcElt) { 223 // Handle: bitcast (<4 x i32> <i32 0, i32 1, i32 2, i32 3> to <2 x i64>) 224 Constant *Zero = Constant::getNullValue(DstEltTy); 225 unsigned Ratio = NumSrcElt/NumDstElt; 226 unsigned SrcBitSize = SrcEltTy->getPrimitiveSizeInBits(); 227 unsigned SrcElt = 0; 228 for (unsigned i = 0; i != NumDstElt; ++i) { 229 // Build each element of the result. 230 Constant *Elt = Zero; 231 unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize*(Ratio-1); 232 for (unsigned j = 0; j != Ratio; ++j) { 233 Constant *Src = C->getAggregateElement(SrcElt++); 234 if (isa_and_nonnull<UndefValue>(Src)) 235 Src = Constant::getNullValue( 236 cast<VectorType>(C->getType())->getElementType()); 237 else 238 Src = dyn_cast_or_null<ConstantInt>(Src); 239 if (!Src) // Reject constantexpr elements. 240 return ConstantExpr::getBitCast(C, DestTy); 241 242 // Zero extend the element to the right size. 243 Src = ConstantFoldCastOperand(Instruction::ZExt, Src, Elt->getType(), 244 DL); 245 assert(Src && "Constant folding cannot fail on plain integers"); 246 247 // Shift it to the right place, depending on endianness. 248 Src = ConstantFoldBinaryOpOperands( 249 Instruction::Shl, Src, ConstantInt::get(Src->getType(), ShiftAmt), 250 DL); 251 assert(Src && "Constant folding cannot fail on plain integers"); 252 253 ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize; 254 255 // Mix it in. 256 Elt = ConstantFoldBinaryOpOperands(Instruction::Or, Elt, Src, DL); 257 assert(Elt && "Constant folding cannot fail on plain integers"); 258 } 259 Result.push_back(Elt); 260 } 261 return ConstantVector::get(Result); 262 } 263 264 // Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>) 265 unsigned Ratio = NumDstElt/NumSrcElt; 266 unsigned DstBitSize = DL.getTypeSizeInBits(DstEltTy); 267 268 // Loop over each source value, expanding into multiple results. 269 for (unsigned i = 0; i != NumSrcElt; ++i) { 270 auto *Element = C->getAggregateElement(i); 271 272 if (!Element) // Reject constantexpr elements. 273 return ConstantExpr::getBitCast(C, DestTy); 274 275 if (isa<UndefValue>(Element)) { 276 // Correctly Propagate undef values. 277 Result.append(Ratio, UndefValue::get(DstEltTy)); 278 continue; 279 } 280 281 auto *Src = dyn_cast<ConstantInt>(Element); 282 if (!Src) 283 return ConstantExpr::getBitCast(C, DestTy); 284 285 unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1); 286 for (unsigned j = 0; j != Ratio; ++j) { 287 // Shift the piece of the value into the right place, depending on 288 // endianness. 289 APInt Elt = Src->getValue().lshr(ShiftAmt); 290 ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize; 291 292 // Truncate and remember this piece. 293 Result.push_back(ConstantInt::get(DstEltTy, Elt.trunc(DstBitSize))); 294 } 295 } 296 297 return ConstantVector::get(Result); 298 } 299 300 } // end anonymous namespace 301 302 /// If this constant is a constant offset from a global, return the global and 303 /// the constant. Because of constantexprs, this function is recursive. 304 bool llvm::IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, 305 APInt &Offset, const DataLayout &DL, 306 DSOLocalEquivalent **DSOEquiv) { 307 if (DSOEquiv) 308 *DSOEquiv = nullptr; 309 310 // Trivial case, constant is the global. 311 if ((GV = dyn_cast<GlobalValue>(C))) { 312 unsigned BitWidth = DL.getIndexTypeSizeInBits(GV->getType()); 313 Offset = APInt(BitWidth, 0); 314 return true; 315 } 316 317 if (auto *FoundDSOEquiv = dyn_cast<DSOLocalEquivalent>(C)) { 318 if (DSOEquiv) 319 *DSOEquiv = FoundDSOEquiv; 320 GV = FoundDSOEquiv->getGlobalValue(); 321 unsigned BitWidth = DL.getIndexTypeSizeInBits(GV->getType()); 322 Offset = APInt(BitWidth, 0); 323 return true; 324 } 325 326 // Otherwise, if this isn't a constant expr, bail out. 327 auto *CE = dyn_cast<ConstantExpr>(C); 328 if (!CE) return false; 329 330 // Look through ptr->int and ptr->ptr casts. 331 if (CE->getOpcode() == Instruction::PtrToInt || 332 CE->getOpcode() == Instruction::BitCast) 333 return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, DL, 334 DSOEquiv); 335 336 // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5) 337 auto *GEP = dyn_cast<GEPOperator>(CE); 338 if (!GEP) 339 return false; 340 341 unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType()); 342 APInt TmpOffset(BitWidth, 0); 343 344 // If the base isn't a global+constant, we aren't either. 345 if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, TmpOffset, DL, 346 DSOEquiv)) 347 return false; 348 349 // Otherwise, add any offset that our operands provide. 350 if (!GEP->accumulateConstantOffset(DL, TmpOffset)) 351 return false; 352 353 Offset = TmpOffset; 354 return true; 355 } 356 357 Constant *llvm::ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy, 358 const DataLayout &DL) { 359 do { 360 Type *SrcTy = C->getType(); 361 if (SrcTy == DestTy) 362 return C; 363 364 TypeSize DestSize = DL.getTypeSizeInBits(DestTy); 365 TypeSize SrcSize = DL.getTypeSizeInBits(SrcTy); 366 if (!TypeSize::isKnownGE(SrcSize, DestSize)) 367 return nullptr; 368 369 // Catch the obvious splat cases (since all-zeros can coerce non-integral 370 // pointers legally). 371 if (Constant *Res = ConstantFoldLoadFromUniformValue(C, DestTy, DL)) 372 return Res; 373 374 // If the type sizes are the same and a cast is legal, just directly 375 // cast the constant. 376 // But be careful not to coerce non-integral pointers illegally. 377 if (SrcSize == DestSize && 378 DL.isNonIntegralPointerType(SrcTy->getScalarType()) == 379 DL.isNonIntegralPointerType(DestTy->getScalarType())) { 380 Instruction::CastOps Cast = Instruction::BitCast; 381 // If we are going from a pointer to int or vice versa, we spell the cast 382 // differently. 383 if (SrcTy->isIntegerTy() && DestTy->isPointerTy()) 384 Cast = Instruction::IntToPtr; 385 else if (SrcTy->isPointerTy() && DestTy->isIntegerTy()) 386 Cast = Instruction::PtrToInt; 387 388 if (CastInst::castIsValid(Cast, C, DestTy)) 389 return ConstantFoldCastOperand(Cast, C, DestTy, DL); 390 } 391 392 // If this isn't an aggregate type, there is nothing we can do to drill down 393 // and find a bitcastable constant. 394 if (!SrcTy->isAggregateType() && !SrcTy->isVectorTy()) 395 return nullptr; 396 397 // We're simulating a load through a pointer that was bitcast to point to 398 // a different type, so we can try to walk down through the initial 399 // elements of an aggregate to see if some part of the aggregate is 400 // castable to implement the "load" semantic model. 401 if (SrcTy->isStructTy()) { 402 // Struct types might have leading zero-length elements like [0 x i32], 403 // which are certainly not what we are looking for, so skip them. 404 unsigned Elem = 0; 405 Constant *ElemC; 406 do { 407 ElemC = C->getAggregateElement(Elem++); 408 } while (ElemC && DL.getTypeSizeInBits(ElemC->getType()).isZero()); 409 C = ElemC; 410 } else { 411 // For non-byte-sized vector elements, the first element is not 412 // necessarily located at the vector base address. 413 if (auto *VT = dyn_cast<VectorType>(SrcTy)) 414 if (!DL.typeSizeEqualsStoreSize(VT->getElementType())) 415 return nullptr; 416 417 C = C->getAggregateElement(0u); 418 } 419 } while (C); 420 421 return nullptr; 422 } 423 424 namespace { 425 426 /// Recursive helper to read bits out of global. C is the constant being copied 427 /// out of. ByteOffset is an offset into C. CurPtr is the pointer to copy 428 /// results into and BytesLeft is the number of bytes left in 429 /// the CurPtr buffer. DL is the DataLayout. 430 bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, unsigned char *CurPtr, 431 unsigned BytesLeft, const DataLayout &DL) { 432 assert(ByteOffset <= DL.getTypeAllocSize(C->getType()) && 433 "Out of range access"); 434 435 // Reading type padding, return zero. 436 if (ByteOffset >= DL.getTypeStoreSize(C->getType())) 437 return true; 438 439 // If this element is zero or undefined, we can just return since *CurPtr is 440 // zero initialized. 441 if (isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) 442 return true; 443 444 if (auto *CI = dyn_cast<ConstantInt>(C)) { 445 if ((CI->getBitWidth() & 7) != 0) 446 return false; 447 const APInt &Val = CI->getValue(); 448 unsigned IntBytes = unsigned(CI->getBitWidth()/8); 449 450 for (unsigned i = 0; i != BytesLeft && ByteOffset != IntBytes; ++i) { 451 unsigned n = ByteOffset; 452 if (!DL.isLittleEndian()) 453 n = IntBytes - n - 1; 454 CurPtr[i] = Val.extractBits(8, n * 8).getZExtValue(); 455 ++ByteOffset; 456 } 457 return true; 458 } 459 460 if (auto *CFP = dyn_cast<ConstantFP>(C)) { 461 if (CFP->getType()->isDoubleTy()) { 462 C = FoldBitCast(C, Type::getInt64Ty(C->getContext()), DL); 463 return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL); 464 } 465 if (CFP->getType()->isFloatTy()){ 466 C = FoldBitCast(C, Type::getInt32Ty(C->getContext()), DL); 467 return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL); 468 } 469 if (CFP->getType()->isHalfTy()){ 470 C = FoldBitCast(C, Type::getInt16Ty(C->getContext()), DL); 471 return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL); 472 } 473 return false; 474 } 475 476 if (auto *CS = dyn_cast<ConstantStruct>(C)) { 477 const StructLayout *SL = DL.getStructLayout(CS->getType()); 478 unsigned Index = SL->getElementContainingOffset(ByteOffset); 479 uint64_t CurEltOffset = SL->getElementOffset(Index); 480 ByteOffset -= CurEltOffset; 481 482 while (true) { 483 // If the element access is to the element itself and not to tail padding, 484 // read the bytes from the element. 485 uint64_t EltSize = DL.getTypeAllocSize(CS->getOperand(Index)->getType()); 486 487 if (ByteOffset < EltSize && 488 !ReadDataFromGlobal(CS->getOperand(Index), ByteOffset, CurPtr, 489 BytesLeft, DL)) 490 return false; 491 492 ++Index; 493 494 // Check to see if we read from the last struct element, if so we're done. 495 if (Index == CS->getType()->getNumElements()) 496 return true; 497 498 // If we read all of the bytes we needed from this element we're done. 499 uint64_t NextEltOffset = SL->getElementOffset(Index); 500 501 if (BytesLeft <= NextEltOffset - CurEltOffset - ByteOffset) 502 return true; 503 504 // Move to the next element of the struct. 505 CurPtr += NextEltOffset - CurEltOffset - ByteOffset; 506 BytesLeft -= NextEltOffset - CurEltOffset - ByteOffset; 507 ByteOffset = 0; 508 CurEltOffset = NextEltOffset; 509 } 510 // not reached. 511 } 512 513 if (isa<ConstantArray>(C) || isa<ConstantVector>(C) || 514 isa<ConstantDataSequential>(C)) { 515 uint64_t NumElts, EltSize; 516 Type *EltTy; 517 if (auto *AT = dyn_cast<ArrayType>(C->getType())) { 518 NumElts = AT->getNumElements(); 519 EltTy = AT->getElementType(); 520 EltSize = DL.getTypeAllocSize(EltTy); 521 } else { 522 NumElts = cast<FixedVectorType>(C->getType())->getNumElements(); 523 EltTy = cast<FixedVectorType>(C->getType())->getElementType(); 524 // TODO: For non-byte-sized vectors, current implementation assumes there is 525 // padding to the next byte boundary between elements. 526 if (!DL.typeSizeEqualsStoreSize(EltTy)) 527 return false; 528 529 EltSize = DL.getTypeStoreSize(EltTy); 530 } 531 uint64_t Index = ByteOffset / EltSize; 532 uint64_t Offset = ByteOffset - Index * EltSize; 533 534 for (; Index != NumElts; ++Index) { 535 if (!ReadDataFromGlobal(C->getAggregateElement(Index), Offset, CurPtr, 536 BytesLeft, DL)) 537 return false; 538 539 uint64_t BytesWritten = EltSize - Offset; 540 assert(BytesWritten <= EltSize && "Not indexing into this element?"); 541 if (BytesWritten >= BytesLeft) 542 return true; 543 544 Offset = 0; 545 BytesLeft -= BytesWritten; 546 CurPtr += BytesWritten; 547 } 548 return true; 549 } 550 551 if (auto *CE = dyn_cast<ConstantExpr>(C)) { 552 if (CE->getOpcode() == Instruction::IntToPtr && 553 CE->getOperand(0)->getType() == DL.getIntPtrType(CE->getType())) { 554 return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr, 555 BytesLeft, DL); 556 } 557 } 558 559 // Otherwise, unknown initializer type. 560 return false; 561 } 562 563 Constant *FoldReinterpretLoadFromConst(Constant *C, Type *LoadTy, 564 int64_t Offset, const DataLayout &DL) { 565 // Bail out early. Not expect to load from scalable global variable. 566 if (isa<ScalableVectorType>(LoadTy)) 567 return nullptr; 568 569 auto *IntType = dyn_cast<IntegerType>(LoadTy); 570 571 // If this isn't an integer load we can't fold it directly. 572 if (!IntType) { 573 // If this is a non-integer load, we can try folding it as an int load and 574 // then bitcast the result. This can be useful for union cases. Note 575 // that address spaces don't matter here since we're not going to result in 576 // an actual new load. 577 if (!LoadTy->isFloatingPointTy() && !LoadTy->isPointerTy() && 578 !LoadTy->isVectorTy()) 579 return nullptr; 580 581 Type *MapTy = Type::getIntNTy(C->getContext(), 582 DL.getTypeSizeInBits(LoadTy).getFixedValue()); 583 if (Constant *Res = FoldReinterpretLoadFromConst(C, MapTy, Offset, DL)) { 584 if (Res->isNullValue() && !LoadTy->isX86_AMXTy()) 585 // Materializing a zero can be done trivially without a bitcast 586 return Constant::getNullValue(LoadTy); 587 Type *CastTy = LoadTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(LoadTy) : LoadTy; 588 Res = FoldBitCast(Res, CastTy, DL); 589 if (LoadTy->isPtrOrPtrVectorTy()) { 590 // For vector of pointer, we needed to first convert to a vector of integer, then do vector inttoptr 591 if (Res->isNullValue() && !LoadTy->isX86_AMXTy()) 592 return Constant::getNullValue(LoadTy); 593 if (DL.isNonIntegralPointerType(LoadTy->getScalarType())) 594 // Be careful not to replace a load of an addrspace value with an inttoptr here 595 return nullptr; 596 Res = ConstantExpr::getIntToPtr(Res, LoadTy); 597 } 598 return Res; 599 } 600 return nullptr; 601 } 602 603 unsigned BytesLoaded = (IntType->getBitWidth() + 7) / 8; 604 if (BytesLoaded > 32 || BytesLoaded == 0) 605 return nullptr; 606 607 // If we're not accessing anything in this constant, the result is undefined. 608 if (Offset <= -1 * static_cast<int64_t>(BytesLoaded)) 609 return PoisonValue::get(IntType); 610 611 // TODO: We should be able to support scalable types. 612 TypeSize InitializerSize = DL.getTypeAllocSize(C->getType()); 613 if (InitializerSize.isScalable()) 614 return nullptr; 615 616 // If we're not accessing anything in this constant, the result is undefined. 617 if (Offset >= (int64_t)InitializerSize.getFixedValue()) 618 return PoisonValue::get(IntType); 619 620 unsigned char RawBytes[32] = {0}; 621 unsigned char *CurPtr = RawBytes; 622 unsigned BytesLeft = BytesLoaded; 623 624 // If we're loading off the beginning of the global, some bytes may be valid. 625 if (Offset < 0) { 626 CurPtr += -Offset; 627 BytesLeft += Offset; 628 Offset = 0; 629 } 630 631 if (!ReadDataFromGlobal(C, Offset, CurPtr, BytesLeft, DL)) 632 return nullptr; 633 634 APInt ResultVal = APInt(IntType->getBitWidth(), 0); 635 if (DL.isLittleEndian()) { 636 ResultVal = RawBytes[BytesLoaded - 1]; 637 for (unsigned i = 1; i != BytesLoaded; ++i) { 638 ResultVal <<= 8; 639 ResultVal |= RawBytes[BytesLoaded - 1 - i]; 640 } 641 } else { 642 ResultVal = RawBytes[0]; 643 for (unsigned i = 1; i != BytesLoaded; ++i) { 644 ResultVal <<= 8; 645 ResultVal |= RawBytes[i]; 646 } 647 } 648 649 return ConstantInt::get(IntType->getContext(), ResultVal); 650 } 651 652 } // anonymous namespace 653 654 // If GV is a constant with an initializer read its representation starting 655 // at Offset and return it as a constant array of unsigned char. Otherwise 656 // return null. 657 Constant *llvm::ReadByteArrayFromGlobal(const GlobalVariable *GV, 658 uint64_t Offset) { 659 if (!GV->isConstant() || !GV->hasDefinitiveInitializer()) 660 return nullptr; 661 662 const DataLayout &DL = GV->getDataLayout(); 663 Constant *Init = const_cast<Constant *>(GV->getInitializer()); 664 TypeSize InitSize = DL.getTypeAllocSize(Init->getType()); 665 if (InitSize < Offset) 666 return nullptr; 667 668 uint64_t NBytes = InitSize - Offset; 669 if (NBytes > UINT16_MAX) 670 // Bail for large initializers in excess of 64K to avoid allocating 671 // too much memory. 672 // Offset is assumed to be less than or equal than InitSize (this 673 // is enforced in ReadDataFromGlobal). 674 return nullptr; 675 676 SmallVector<unsigned char, 256> RawBytes(static_cast<size_t>(NBytes)); 677 unsigned char *CurPtr = RawBytes.data(); 678 679 if (!ReadDataFromGlobal(Init, Offset, CurPtr, NBytes, DL)) 680 return nullptr; 681 682 return ConstantDataArray::get(GV->getContext(), RawBytes); 683 } 684 685 /// If this Offset points exactly to the start of an aggregate element, return 686 /// that element, otherwise return nullptr. 687 Constant *getConstantAtOffset(Constant *Base, APInt Offset, 688 const DataLayout &DL) { 689 if (Offset.isZero()) 690 return Base; 691 692 if (!isa<ConstantAggregate>(Base) && !isa<ConstantDataSequential>(Base)) 693 return nullptr; 694 695 Type *ElemTy = Base->getType(); 696 SmallVector<APInt> Indices = DL.getGEPIndicesForOffset(ElemTy, Offset); 697 if (!Offset.isZero() || !Indices[0].isZero()) 698 return nullptr; 699 700 Constant *C = Base; 701 for (const APInt &Index : drop_begin(Indices)) { 702 if (Index.isNegative() || Index.getActiveBits() >= 32) 703 return nullptr; 704 705 C = C->getAggregateElement(Index.getZExtValue()); 706 if (!C) 707 return nullptr; 708 } 709 710 return C; 711 } 712 713 Constant *llvm::ConstantFoldLoadFromConst(Constant *C, Type *Ty, 714 const APInt &Offset, 715 const DataLayout &DL) { 716 if (Constant *AtOffset = getConstantAtOffset(C, Offset, DL)) 717 if (Constant *Result = ConstantFoldLoadThroughBitcast(AtOffset, Ty, DL)) 718 return Result; 719 720 // Explicitly check for out-of-bounds access, so we return poison even if the 721 // constant is a uniform value. 722 TypeSize Size = DL.getTypeAllocSize(C->getType()); 723 if (!Size.isScalable() && Offset.sge(Size.getFixedValue())) 724 return PoisonValue::get(Ty); 725 726 // Try an offset-independent fold of a uniform value. 727 if (Constant *Result = ConstantFoldLoadFromUniformValue(C, Ty, DL)) 728 return Result; 729 730 // Try hard to fold loads from bitcasted strange and non-type-safe things. 731 if (Offset.getSignificantBits() <= 64) 732 if (Constant *Result = 733 FoldReinterpretLoadFromConst(C, Ty, Offset.getSExtValue(), DL)) 734 return Result; 735 736 return nullptr; 737 } 738 739 Constant *llvm::ConstantFoldLoadFromConst(Constant *C, Type *Ty, 740 const DataLayout &DL) { 741 return ConstantFoldLoadFromConst(C, Ty, APInt(64, 0), DL); 742 } 743 744 Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, 745 APInt Offset, 746 const DataLayout &DL) { 747 // We can only fold loads from constant globals with a definitive initializer. 748 // Check this upfront, to skip expensive offset calculations. 749 auto *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(C)); 750 if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) 751 return nullptr; 752 753 C = cast<Constant>(C->stripAndAccumulateConstantOffsets( 754 DL, Offset, /* AllowNonInbounds */ true)); 755 756 if (C == GV) 757 if (Constant *Result = ConstantFoldLoadFromConst(GV->getInitializer(), Ty, 758 Offset, DL)) 759 return Result; 760 761 // If this load comes from anywhere in a uniform constant global, the value 762 // is always the same, regardless of the loaded offset. 763 return ConstantFoldLoadFromUniformValue(GV->getInitializer(), Ty, DL); 764 } 765 766 Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, 767 const DataLayout &DL) { 768 APInt Offset(DL.getIndexTypeSizeInBits(C->getType()), 0); 769 return ConstantFoldLoadFromConstPtr(C, Ty, std::move(Offset), DL); 770 } 771 772 Constant *llvm::ConstantFoldLoadFromUniformValue(Constant *C, Type *Ty, 773 const DataLayout &DL) { 774 if (isa<PoisonValue>(C)) 775 return PoisonValue::get(Ty); 776 if (isa<UndefValue>(C)) 777 return UndefValue::get(Ty); 778 // If padding is needed when storing C to memory, then it isn't considered as 779 // uniform. 780 if (!DL.typeSizeEqualsStoreSize(C->getType())) 781 return nullptr; 782 if (C->isNullValue() && !Ty->isX86_AMXTy()) 783 return Constant::getNullValue(Ty); 784 if (C->isAllOnesValue() && 785 (Ty->isIntOrIntVectorTy() || Ty->isFPOrFPVectorTy())) 786 return Constant::getAllOnesValue(Ty); 787 return nullptr; 788 } 789 790 namespace { 791 792 /// One of Op0/Op1 is a constant expression. 793 /// Attempt to symbolically evaluate the result of a binary operator merging 794 /// these together. If target data info is available, it is provided as DL, 795 /// otherwise DL is null. 796 Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, Constant *Op1, 797 const DataLayout &DL) { 798 // SROA 799 800 // Fold (and 0xffffffff00000000, (shl x, 32)) -> shl. 801 // Fold (lshr (or X, Y), 32) -> (lshr [X/Y], 32) if one doesn't contribute 802 // bits. 803 804 if (Opc == Instruction::And) { 805 KnownBits Known0 = computeKnownBits(Op0, DL); 806 KnownBits Known1 = computeKnownBits(Op1, DL); 807 if ((Known1.One | Known0.Zero).isAllOnes()) { 808 // All the bits of Op0 that the 'and' could be masking are already zero. 809 return Op0; 810 } 811 if ((Known0.One | Known1.Zero).isAllOnes()) { 812 // All the bits of Op1 that the 'and' could be masking are already zero. 813 return Op1; 814 } 815 816 Known0 &= Known1; 817 if (Known0.isConstant()) 818 return ConstantInt::get(Op0->getType(), Known0.getConstant()); 819 } 820 821 // If the constant expr is something like &A[123] - &A[4].f, fold this into a 822 // constant. This happens frequently when iterating over a global array. 823 if (Opc == Instruction::Sub) { 824 GlobalValue *GV1, *GV2; 825 APInt Offs1, Offs2; 826 827 if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, DL)) 828 if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, DL) && GV1 == GV2) { 829 unsigned OpSize = DL.getTypeSizeInBits(Op0->getType()); 830 831 // (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow. 832 // PtrToInt may change the bitwidth so we have convert to the right size 833 // first. 834 return ConstantInt::get(Op0->getType(), Offs1.zextOrTrunc(OpSize) - 835 Offs2.zextOrTrunc(OpSize)); 836 } 837 } 838 839 return nullptr; 840 } 841 842 /// If array indices are not pointer-sized integers, explicitly cast them so 843 /// that they aren't implicitly casted by the getelementptr. 844 Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops, 845 Type *ResultTy, GEPNoWrapFlags NW, 846 std::optional<ConstantRange> InRange, 847 const DataLayout &DL, const TargetLibraryInfo *TLI) { 848 Type *IntIdxTy = DL.getIndexType(ResultTy); 849 Type *IntIdxScalarTy = IntIdxTy->getScalarType(); 850 851 bool Any = false; 852 SmallVector<Constant*, 32> NewIdxs; 853 for (unsigned i = 1, e = Ops.size(); i != e; ++i) { 854 if ((i == 1 || 855 !isa<StructType>(GetElementPtrInst::getIndexedType( 856 SrcElemTy, Ops.slice(1, i - 1)))) && 857 Ops[i]->getType()->getScalarType() != IntIdxScalarTy) { 858 Any = true; 859 Type *NewType = 860 Ops[i]->getType()->isVectorTy() ? IntIdxTy : IntIdxScalarTy; 861 Constant *NewIdx = ConstantFoldCastOperand( 862 CastInst::getCastOpcode(Ops[i], true, NewType, true), Ops[i], NewType, 863 DL); 864 if (!NewIdx) 865 return nullptr; 866 NewIdxs.push_back(NewIdx); 867 } else 868 NewIdxs.push_back(Ops[i]); 869 } 870 871 if (!Any) 872 return nullptr; 873 874 Constant *C = 875 ConstantExpr::getGetElementPtr(SrcElemTy, Ops[0], NewIdxs, NW, InRange); 876 return ConstantFoldConstant(C, DL, TLI); 877 } 878 879 /// If we can symbolically evaluate the GEP constant expression, do so. 880 Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP, 881 ArrayRef<Constant *> Ops, 882 const DataLayout &DL, 883 const TargetLibraryInfo *TLI) { 884 Type *SrcElemTy = GEP->getSourceElementType(); 885 Type *ResTy = GEP->getType(); 886 if (!SrcElemTy->isSized() || isa<ScalableVectorType>(SrcElemTy)) 887 return nullptr; 888 889 if (Constant *C = CastGEPIndices(SrcElemTy, Ops, ResTy, GEP->getNoWrapFlags(), 890 GEP->getInRange(), DL, TLI)) 891 return C; 892 893 Constant *Ptr = Ops[0]; 894 if (!Ptr->getType()->isPointerTy()) 895 return nullptr; 896 897 Type *IntIdxTy = DL.getIndexType(Ptr->getType()); 898 899 for (unsigned i = 1, e = Ops.size(); i != e; ++i) 900 if (!isa<ConstantInt>(Ops[i]) || !Ops[i]->getType()->isIntegerTy()) 901 return nullptr; 902 903 unsigned BitWidth = DL.getTypeSizeInBits(IntIdxTy); 904 APInt Offset = APInt( 905 BitWidth, 906 DL.getIndexedOffsetInType( 907 SrcElemTy, ArrayRef((Value *const *)Ops.data() + 1, Ops.size() - 1)), 908 /*isSigned=*/true, /*implicitTrunc=*/true); 909 910 std::optional<ConstantRange> InRange = GEP->getInRange(); 911 if (InRange) 912 InRange = InRange->sextOrTrunc(BitWidth); 913 914 // If this is a GEP of a GEP, fold it all into a single GEP. 915 GEPNoWrapFlags NW = GEP->getNoWrapFlags(); 916 bool Overflow = false; 917 while (auto *GEP = dyn_cast<GEPOperator>(Ptr)) { 918 NW &= GEP->getNoWrapFlags(); 919 920 SmallVector<Value *, 4> NestedOps(llvm::drop_begin(GEP->operands())); 921 922 // Do not try the incorporate the sub-GEP if some index is not a number. 923 bool AllConstantInt = true; 924 for (Value *NestedOp : NestedOps) 925 if (!isa<ConstantInt>(NestedOp)) { 926 AllConstantInt = false; 927 break; 928 } 929 if (!AllConstantInt) 930 break; 931 932 // TODO: Try to intersect two inrange attributes? 933 if (!InRange) { 934 InRange = GEP->getInRange(); 935 if (InRange) 936 // Adjust inrange by offset until now. 937 InRange = InRange->sextOrTrunc(BitWidth).subtract(Offset); 938 } 939 940 Ptr = cast<Constant>(GEP->getOperand(0)); 941 SrcElemTy = GEP->getSourceElementType(); 942 Offset = Offset.sadd_ov( 943 APInt(BitWidth, DL.getIndexedOffsetInType(SrcElemTy, NestedOps), 944 /*isSigned=*/true, /*implicitTrunc=*/true), 945 Overflow); 946 } 947 948 // Preserving nusw (without inbounds) also requires that the offset 949 // additions did not overflow. 950 if (NW.hasNoUnsignedSignedWrap() && !NW.isInBounds() && Overflow) 951 NW = NW.withoutNoUnsignedSignedWrap(); 952 953 // If the base value for this address is a literal integer value, fold the 954 // getelementptr to the resulting integer value casted to the pointer type. 955 APInt BasePtr(DL.getPointerTypeSizeInBits(Ptr->getType()), 0); 956 if (auto *CE = dyn_cast<ConstantExpr>(Ptr)) { 957 if (CE->getOpcode() == Instruction::IntToPtr) { 958 if (auto *Base = dyn_cast<ConstantInt>(CE->getOperand(0))) 959 BasePtr = Base->getValue().zextOrTrunc(BasePtr.getBitWidth()); 960 } 961 } 962 963 auto *PTy = cast<PointerType>(Ptr->getType()); 964 if ((Ptr->isNullValue() || BasePtr != 0) && 965 !DL.isNonIntegralPointerType(PTy)) { 966 // If the index size is smaller than the pointer size, add to the low 967 // bits only. 968 BasePtr.insertBits(BasePtr.trunc(BitWidth) + Offset, 0); 969 Constant *C = ConstantInt::get(Ptr->getContext(), BasePtr); 970 return ConstantExpr::getIntToPtr(C, ResTy); 971 } 972 973 // Try to infer inbounds for GEPs of globals. 974 if (!NW.isInBounds() && Offset.isNonNegative()) { 975 bool CanBeNull, CanBeFreed; 976 uint64_t DerefBytes = 977 Ptr->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed); 978 if (DerefBytes != 0 && !CanBeNull && Offset.sle(DerefBytes)) 979 NW |= GEPNoWrapFlags::inBounds(); 980 } 981 982 // nusw + nneg -> nuw 983 if (NW.hasNoUnsignedSignedWrap() && Offset.isNonNegative()) 984 NW |= GEPNoWrapFlags::noUnsignedWrap(); 985 986 // Otherwise canonicalize this to a single ptradd. 987 LLVMContext &Ctx = Ptr->getContext(); 988 return ConstantExpr::getGetElementPtr(Type::getInt8Ty(Ctx), Ptr, 989 ConstantInt::get(Ctx, Offset), NW, 990 InRange); 991 } 992 993 /// Attempt to constant fold an instruction with the 994 /// specified opcode and operands. If successful, the constant result is 995 /// returned, if not, null is returned. Note that this function can fail when 996 /// attempting to fold instructions like loads and stores, which have no 997 /// constant expression form. 998 Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode, 999 ArrayRef<Constant *> Ops, 1000 const DataLayout &DL, 1001 const TargetLibraryInfo *TLI, 1002 bool AllowNonDeterministic) { 1003 Type *DestTy = InstOrCE->getType(); 1004 1005 if (Instruction::isUnaryOp(Opcode)) 1006 return ConstantFoldUnaryOpOperand(Opcode, Ops[0], DL); 1007 1008 if (Instruction::isBinaryOp(Opcode)) { 1009 switch (Opcode) { 1010 default: 1011 break; 1012 case Instruction::FAdd: 1013 case Instruction::FSub: 1014 case Instruction::FMul: 1015 case Instruction::FDiv: 1016 case Instruction::FRem: 1017 // Handle floating point instructions separately to account for denormals 1018 // TODO: If a constant expression is being folded rather than an 1019 // instruction, denormals will not be flushed/treated as zero 1020 if (const auto *I = dyn_cast<Instruction>(InstOrCE)) { 1021 return ConstantFoldFPInstOperands(Opcode, Ops[0], Ops[1], DL, I, 1022 AllowNonDeterministic); 1023 } 1024 } 1025 return ConstantFoldBinaryOpOperands(Opcode, Ops[0], Ops[1], DL); 1026 } 1027 1028 if (Instruction::isCast(Opcode)) 1029 return ConstantFoldCastOperand(Opcode, Ops[0], DestTy, DL); 1030 1031 if (auto *GEP = dyn_cast<GEPOperator>(InstOrCE)) { 1032 Type *SrcElemTy = GEP->getSourceElementType(); 1033 if (!ConstantExpr::isSupportedGetElementPtr(SrcElemTy)) 1034 return nullptr; 1035 1036 if (Constant *C = SymbolicallyEvaluateGEP(GEP, Ops, DL, TLI)) 1037 return C; 1038 1039 return ConstantExpr::getGetElementPtr(SrcElemTy, Ops[0], Ops.slice(1), 1040 GEP->getNoWrapFlags(), 1041 GEP->getInRange()); 1042 } 1043 1044 if (auto *CE = dyn_cast<ConstantExpr>(InstOrCE)) 1045 return CE->getWithOperands(Ops); 1046 1047 switch (Opcode) { 1048 default: return nullptr; 1049 case Instruction::ICmp: 1050 case Instruction::FCmp: { 1051 auto *C = cast<CmpInst>(InstOrCE); 1052 return ConstantFoldCompareInstOperands(C->getPredicate(), Ops[0], Ops[1], 1053 DL, TLI, C); 1054 } 1055 case Instruction::Freeze: 1056 return isGuaranteedNotToBeUndefOrPoison(Ops[0]) ? Ops[0] : nullptr; 1057 case Instruction::Call: 1058 if (auto *F = dyn_cast<Function>(Ops.back())) { 1059 const auto *Call = cast<CallBase>(InstOrCE); 1060 if (canConstantFoldCallTo(Call, F)) 1061 return ConstantFoldCall(Call, F, Ops.slice(0, Ops.size() - 1), TLI, 1062 AllowNonDeterministic); 1063 } 1064 return nullptr; 1065 case Instruction::Select: 1066 return ConstantFoldSelectInstruction(Ops[0], Ops[1], Ops[2]); 1067 case Instruction::ExtractElement: 1068 return ConstantExpr::getExtractElement(Ops[0], Ops[1]); 1069 case Instruction::ExtractValue: 1070 return ConstantFoldExtractValueInstruction( 1071 Ops[0], cast<ExtractValueInst>(InstOrCE)->getIndices()); 1072 case Instruction::InsertElement: 1073 return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]); 1074 case Instruction::InsertValue: 1075 return ConstantFoldInsertValueInstruction( 1076 Ops[0], Ops[1], cast<InsertValueInst>(InstOrCE)->getIndices()); 1077 case Instruction::ShuffleVector: 1078 return ConstantExpr::getShuffleVector( 1079 Ops[0], Ops[1], cast<ShuffleVectorInst>(InstOrCE)->getShuffleMask()); 1080 case Instruction::Load: { 1081 const auto *LI = dyn_cast<LoadInst>(InstOrCE); 1082 if (LI->isVolatile()) 1083 return nullptr; 1084 return ConstantFoldLoadFromConstPtr(Ops[0], LI->getType(), DL); 1085 } 1086 } 1087 } 1088 1089 } // end anonymous namespace 1090 1091 //===----------------------------------------------------------------------===// 1092 // Constant Folding public APIs 1093 //===----------------------------------------------------------------------===// 1094 1095 namespace { 1096 1097 Constant * 1098 ConstantFoldConstantImpl(const Constant *C, const DataLayout &DL, 1099 const TargetLibraryInfo *TLI, 1100 SmallDenseMap<Constant *, Constant *> &FoldedOps) { 1101 if (!isa<ConstantVector>(C) && !isa<ConstantExpr>(C)) 1102 return const_cast<Constant *>(C); 1103 1104 SmallVector<Constant *, 8> Ops; 1105 for (const Use &OldU : C->operands()) { 1106 Constant *OldC = cast<Constant>(&OldU); 1107 Constant *NewC = OldC; 1108 // Recursively fold the ConstantExpr's operands. If we have already folded 1109 // a ConstantExpr, we don't have to process it again. 1110 if (isa<ConstantVector>(OldC) || isa<ConstantExpr>(OldC)) { 1111 auto It = FoldedOps.find(OldC); 1112 if (It == FoldedOps.end()) { 1113 NewC = ConstantFoldConstantImpl(OldC, DL, TLI, FoldedOps); 1114 FoldedOps.insert({OldC, NewC}); 1115 } else { 1116 NewC = It->second; 1117 } 1118 } 1119 Ops.push_back(NewC); 1120 } 1121 1122 if (auto *CE = dyn_cast<ConstantExpr>(C)) { 1123 if (Constant *Res = ConstantFoldInstOperandsImpl( 1124 CE, CE->getOpcode(), Ops, DL, TLI, /*AllowNonDeterministic=*/true)) 1125 return Res; 1126 return const_cast<Constant *>(C); 1127 } 1128 1129 assert(isa<ConstantVector>(C)); 1130 return ConstantVector::get(Ops); 1131 } 1132 1133 } // end anonymous namespace 1134 1135 Constant *llvm::ConstantFoldInstruction(const Instruction *I, 1136 const DataLayout &DL, 1137 const TargetLibraryInfo *TLI) { 1138 // Handle PHI nodes quickly here... 1139 if (auto *PN = dyn_cast<PHINode>(I)) { 1140 Constant *CommonValue = nullptr; 1141 1142 SmallDenseMap<Constant *, Constant *> FoldedOps; 1143 for (Value *Incoming : PN->incoming_values()) { 1144 // If the incoming value is undef then skip it. Note that while we could 1145 // skip the value if it is equal to the phi node itself we choose not to 1146 // because that would break the rule that constant folding only applies if 1147 // all operands are constants. 1148 if (isa<UndefValue>(Incoming)) 1149 continue; 1150 // If the incoming value is not a constant, then give up. 1151 auto *C = dyn_cast<Constant>(Incoming); 1152 if (!C) 1153 return nullptr; 1154 // Fold the PHI's operands. 1155 C = ConstantFoldConstantImpl(C, DL, TLI, FoldedOps); 1156 // If the incoming value is a different constant to 1157 // the one we saw previously, then give up. 1158 if (CommonValue && C != CommonValue) 1159 return nullptr; 1160 CommonValue = C; 1161 } 1162 1163 // If we reach here, all incoming values are the same constant or undef. 1164 return CommonValue ? CommonValue : UndefValue::get(PN->getType()); 1165 } 1166 1167 // Scan the operand list, checking to see if they are all constants, if so, 1168 // hand off to ConstantFoldInstOperandsImpl. 1169 if (!all_of(I->operands(), [](const Use &U) { return isa<Constant>(U); })) 1170 return nullptr; 1171 1172 SmallDenseMap<Constant *, Constant *> FoldedOps; 1173 SmallVector<Constant *, 8> Ops; 1174 for (const Use &OpU : I->operands()) { 1175 auto *Op = cast<Constant>(&OpU); 1176 // Fold the Instruction's operands. 1177 Op = ConstantFoldConstantImpl(Op, DL, TLI, FoldedOps); 1178 Ops.push_back(Op); 1179 } 1180 1181 return ConstantFoldInstOperands(I, Ops, DL, TLI); 1182 } 1183 1184 Constant *llvm::ConstantFoldConstant(const Constant *C, const DataLayout &DL, 1185 const TargetLibraryInfo *TLI) { 1186 SmallDenseMap<Constant *, Constant *> FoldedOps; 1187 return ConstantFoldConstantImpl(C, DL, TLI, FoldedOps); 1188 } 1189 1190 Constant *llvm::ConstantFoldInstOperands(const Instruction *I, 1191 ArrayRef<Constant *> Ops, 1192 const DataLayout &DL, 1193 const TargetLibraryInfo *TLI, 1194 bool AllowNonDeterministic) { 1195 return ConstantFoldInstOperandsImpl(I, I->getOpcode(), Ops, DL, TLI, 1196 AllowNonDeterministic); 1197 } 1198 1199 Constant *llvm::ConstantFoldCompareInstOperands( 1200 unsigned IntPredicate, Constant *Ops0, Constant *Ops1, const DataLayout &DL, 1201 const TargetLibraryInfo *TLI, const Instruction *I) { 1202 CmpInst::Predicate Predicate = (CmpInst::Predicate)IntPredicate; 1203 // fold: icmp (inttoptr x), null -> icmp x, 0 1204 // fold: icmp null, (inttoptr x) -> icmp 0, x 1205 // fold: icmp (ptrtoint x), 0 -> icmp x, null 1206 // fold: icmp 0, (ptrtoint x) -> icmp null, x 1207 // fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y 1208 // fold: icmp (ptrtoint x), (ptrtoint y) -> icmp x, y 1209 // 1210 // FIXME: The following comment is out of data and the DataLayout is here now. 1211 // ConstantExpr::getCompare cannot do this, because it doesn't have DL 1212 // around to know if bit truncation is happening. 1213 if (auto *CE0 = dyn_cast<ConstantExpr>(Ops0)) { 1214 if (Ops1->isNullValue()) { 1215 if (CE0->getOpcode() == Instruction::IntToPtr) { 1216 Type *IntPtrTy = DL.getIntPtrType(CE0->getType()); 1217 // Convert the integer value to the right size to ensure we get the 1218 // proper extension or truncation. 1219 if (Constant *C = ConstantFoldIntegerCast(CE0->getOperand(0), IntPtrTy, 1220 /*IsSigned*/ false, DL)) { 1221 Constant *Null = Constant::getNullValue(C->getType()); 1222 return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI); 1223 } 1224 } 1225 1226 // Only do this transformation if the int is intptrty in size, otherwise 1227 // there is a truncation or extension that we aren't modeling. 1228 if (CE0->getOpcode() == Instruction::PtrToInt) { 1229 Type *IntPtrTy = DL.getIntPtrType(CE0->getOperand(0)->getType()); 1230 if (CE0->getType() == IntPtrTy) { 1231 Constant *C = CE0->getOperand(0); 1232 Constant *Null = Constant::getNullValue(C->getType()); 1233 return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI); 1234 } 1235 } 1236 } 1237 1238 if (auto *CE1 = dyn_cast<ConstantExpr>(Ops1)) { 1239 if (CE0->getOpcode() == CE1->getOpcode()) { 1240 if (CE0->getOpcode() == Instruction::IntToPtr) { 1241 Type *IntPtrTy = DL.getIntPtrType(CE0->getType()); 1242 1243 // Convert the integer value to the right size to ensure we get the 1244 // proper extension or truncation. 1245 Constant *C0 = ConstantFoldIntegerCast(CE0->getOperand(0), IntPtrTy, 1246 /*IsSigned*/ false, DL); 1247 Constant *C1 = ConstantFoldIntegerCast(CE1->getOperand(0), IntPtrTy, 1248 /*IsSigned*/ false, DL); 1249 if (C0 && C1) 1250 return ConstantFoldCompareInstOperands(Predicate, C0, C1, DL, TLI); 1251 } 1252 1253 // Only do this transformation if the int is intptrty in size, otherwise 1254 // there is a truncation or extension that we aren't modeling. 1255 if (CE0->getOpcode() == Instruction::PtrToInt) { 1256 Type *IntPtrTy = DL.getIntPtrType(CE0->getOperand(0)->getType()); 1257 if (CE0->getType() == IntPtrTy && 1258 CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()) { 1259 return ConstantFoldCompareInstOperands( 1260 Predicate, CE0->getOperand(0), CE1->getOperand(0), DL, TLI); 1261 } 1262 } 1263 } 1264 } 1265 1266 // Convert pointer comparison (base+offset1) pred (base+offset2) into 1267 // offset1 pred offset2, for the case where the offset is inbounds. This 1268 // only works for equality and unsigned comparison, as inbounds permits 1269 // crossing the sign boundary. However, the offset comparison itself is 1270 // signed. 1271 if (Ops0->getType()->isPointerTy() && !ICmpInst::isSigned(Predicate)) { 1272 unsigned IndexWidth = DL.getIndexTypeSizeInBits(Ops0->getType()); 1273 APInt Offset0(IndexWidth, 0); 1274 bool IsEqPred = ICmpInst::isEquality(Predicate); 1275 Value *Stripped0 = Ops0->stripAndAccumulateConstantOffsets( 1276 DL, Offset0, /*AllowNonInbounds=*/IsEqPred, 1277 /*AllowInvariantGroup=*/false, /*ExternalAnalysis=*/nullptr, 1278 /*LookThroughIntToPtr=*/IsEqPred); 1279 APInt Offset1(IndexWidth, 0); 1280 Value *Stripped1 = Ops1->stripAndAccumulateConstantOffsets( 1281 DL, Offset1, /*AllowNonInbounds=*/IsEqPred, 1282 /*AllowInvariantGroup=*/false, /*ExternalAnalysis=*/nullptr, 1283 /*LookThroughIntToPtr=*/IsEqPred); 1284 if (Stripped0 == Stripped1) 1285 return ConstantInt::getBool( 1286 Ops0->getContext(), 1287 ICmpInst::compare(Offset0, Offset1, 1288 ICmpInst::getSignedPredicate(Predicate))); 1289 } 1290 } else if (isa<ConstantExpr>(Ops1)) { 1291 // If RHS is a constant expression, but the left side isn't, swap the 1292 // operands and try again. 1293 Predicate = ICmpInst::getSwappedPredicate(Predicate); 1294 return ConstantFoldCompareInstOperands(Predicate, Ops1, Ops0, DL, TLI); 1295 } 1296 1297 if (CmpInst::isFPPredicate(Predicate)) { 1298 // Flush any denormal constant float input according to denormal handling 1299 // mode. 1300 Ops0 = FlushFPConstant(Ops0, I, /*IsOutput=*/false); 1301 if (!Ops0) 1302 return nullptr; 1303 Ops1 = FlushFPConstant(Ops1, I, /*IsOutput=*/false); 1304 if (!Ops1) 1305 return nullptr; 1306 } 1307 1308 return ConstantFoldCompareInstruction(Predicate, Ops0, Ops1); 1309 } 1310 1311 Constant *llvm::ConstantFoldUnaryOpOperand(unsigned Opcode, Constant *Op, 1312 const DataLayout &DL) { 1313 assert(Instruction::isUnaryOp(Opcode)); 1314 1315 return ConstantFoldUnaryInstruction(Opcode, Op); 1316 } 1317 1318 Constant *llvm::ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, 1319 Constant *RHS, 1320 const DataLayout &DL) { 1321 assert(Instruction::isBinaryOp(Opcode)); 1322 if (isa<ConstantExpr>(LHS) || isa<ConstantExpr>(RHS)) 1323 if (Constant *C = SymbolicallyEvaluateBinop(Opcode, LHS, RHS, DL)) 1324 return C; 1325 1326 if (ConstantExpr::isDesirableBinOp(Opcode)) 1327 return ConstantExpr::get(Opcode, LHS, RHS); 1328 return ConstantFoldBinaryInstruction(Opcode, LHS, RHS); 1329 } 1330 1331 static ConstantFP *flushDenormalConstant(Type *Ty, const APFloat &APF, 1332 DenormalMode::DenormalModeKind Mode) { 1333 switch (Mode) { 1334 case DenormalMode::Dynamic: 1335 return nullptr; 1336 case DenormalMode::IEEE: 1337 return ConstantFP::get(Ty->getContext(), APF); 1338 case DenormalMode::PreserveSign: 1339 return ConstantFP::get( 1340 Ty->getContext(), 1341 APFloat::getZero(APF.getSemantics(), APF.isNegative())); 1342 case DenormalMode::PositiveZero: 1343 return ConstantFP::get(Ty->getContext(), 1344 APFloat::getZero(APF.getSemantics(), false)); 1345 default: 1346 break; 1347 } 1348 1349 llvm_unreachable("unknown denormal mode"); 1350 } 1351 1352 /// Return the denormal mode that can be assumed when executing a floating point 1353 /// operation at \p CtxI. 1354 static DenormalMode getInstrDenormalMode(const Instruction *CtxI, Type *Ty) { 1355 if (!CtxI || !CtxI->getParent() || !CtxI->getFunction()) 1356 return DenormalMode::getDynamic(); 1357 return CtxI->getFunction()->getDenormalMode(Ty->getFltSemantics()); 1358 } 1359 1360 static ConstantFP *flushDenormalConstantFP(ConstantFP *CFP, 1361 const Instruction *Inst, 1362 bool IsOutput) { 1363 const APFloat &APF = CFP->getValueAPF(); 1364 if (!APF.isDenormal()) 1365 return CFP; 1366 1367 DenormalMode Mode = getInstrDenormalMode(Inst, CFP->getType()); 1368 return flushDenormalConstant(CFP->getType(), APF, 1369 IsOutput ? Mode.Output : Mode.Input); 1370 } 1371 1372 Constant *llvm::FlushFPConstant(Constant *Operand, const Instruction *Inst, 1373 bool IsOutput) { 1374 if (ConstantFP *CFP = dyn_cast<ConstantFP>(Operand)) 1375 return flushDenormalConstantFP(CFP, Inst, IsOutput); 1376 1377 if (isa<ConstantAggregateZero, UndefValue, ConstantExpr>(Operand)) 1378 return Operand; 1379 1380 Type *Ty = Operand->getType(); 1381 VectorType *VecTy = dyn_cast<VectorType>(Ty); 1382 if (VecTy) { 1383 if (auto *Splat = dyn_cast_or_null<ConstantFP>(Operand->getSplatValue())) { 1384 ConstantFP *Folded = flushDenormalConstantFP(Splat, Inst, IsOutput); 1385 if (!Folded) 1386 return nullptr; 1387 return ConstantVector::getSplat(VecTy->getElementCount(), Folded); 1388 } 1389 1390 Ty = VecTy->getElementType(); 1391 } 1392 1393 if (const auto *CV = dyn_cast<ConstantVector>(Operand)) { 1394 SmallVector<Constant *, 16> NewElts; 1395 for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) { 1396 Constant *Element = CV->getAggregateElement(i); 1397 if (isa<UndefValue>(Element)) { 1398 NewElts.push_back(Element); 1399 continue; 1400 } 1401 1402 ConstantFP *CFP = dyn_cast<ConstantFP>(Element); 1403 if (!CFP) 1404 return nullptr; 1405 1406 ConstantFP *Folded = flushDenormalConstantFP(CFP, Inst, IsOutput); 1407 if (!Folded) 1408 return nullptr; 1409 NewElts.push_back(Folded); 1410 } 1411 1412 return ConstantVector::get(NewElts); 1413 } 1414 1415 if (const auto *CDV = dyn_cast<ConstantDataVector>(Operand)) { 1416 SmallVector<Constant *, 16> NewElts; 1417 for (unsigned I = 0, E = CDV->getNumElements(); I < E; ++I) { 1418 const APFloat &Elt = CDV->getElementAsAPFloat(I); 1419 if (!Elt.isDenormal()) { 1420 NewElts.push_back(ConstantFP::get(Ty, Elt)); 1421 } else { 1422 DenormalMode Mode = getInstrDenormalMode(Inst, Ty); 1423 ConstantFP *Folded = 1424 flushDenormalConstant(Ty, Elt, IsOutput ? Mode.Output : Mode.Input); 1425 if (!Folded) 1426 return nullptr; 1427 NewElts.push_back(Folded); 1428 } 1429 } 1430 1431 return ConstantVector::get(NewElts); 1432 } 1433 1434 return nullptr; 1435 } 1436 1437 Constant *llvm::ConstantFoldFPInstOperands(unsigned Opcode, Constant *LHS, 1438 Constant *RHS, const DataLayout &DL, 1439 const Instruction *I, 1440 bool AllowNonDeterministic) { 1441 if (Instruction::isBinaryOp(Opcode)) { 1442 // Flush denormal inputs if needed. 1443 Constant *Op0 = FlushFPConstant(LHS, I, /* IsOutput */ false); 1444 if (!Op0) 1445 return nullptr; 1446 Constant *Op1 = FlushFPConstant(RHS, I, /* IsOutput */ false); 1447 if (!Op1) 1448 return nullptr; 1449 1450 // If nsz or an algebraic FMF flag is set, the result of the FP operation 1451 // may change due to future optimization. Don't constant fold them if 1452 // non-deterministic results are not allowed. 1453 if (!AllowNonDeterministic) 1454 if (auto *FP = dyn_cast_or_null<FPMathOperator>(I)) 1455 if (FP->hasNoSignedZeros() || FP->hasAllowReassoc() || 1456 FP->hasAllowContract() || FP->hasAllowReciprocal()) 1457 return nullptr; 1458 1459 // Calculate constant result. 1460 Constant *C = ConstantFoldBinaryOpOperands(Opcode, Op0, Op1, DL); 1461 if (!C) 1462 return nullptr; 1463 1464 // Flush denormal output if needed. 1465 C = FlushFPConstant(C, I, /* IsOutput */ true); 1466 if (!C) 1467 return nullptr; 1468 1469 // The precise NaN value is non-deterministic. 1470 if (!AllowNonDeterministic && C->isNaN()) 1471 return nullptr; 1472 1473 return C; 1474 } 1475 // If instruction lacks a parent/function and the denormal mode cannot be 1476 // determined, use the default (IEEE). 1477 return ConstantFoldBinaryOpOperands(Opcode, LHS, RHS, DL); 1478 } 1479 1480 Constant *llvm::ConstantFoldCastOperand(unsigned Opcode, Constant *C, 1481 Type *DestTy, const DataLayout &DL) { 1482 assert(Instruction::isCast(Opcode)); 1483 switch (Opcode) { 1484 default: 1485 llvm_unreachable("Missing case"); 1486 case Instruction::PtrToInt: 1487 if (auto *CE = dyn_cast<ConstantExpr>(C)) { 1488 Constant *FoldedValue = nullptr; 1489 // If the input is a inttoptr, eliminate the pair. This requires knowing 1490 // the width of a pointer, so it can't be done in ConstantExpr::getCast. 1491 if (CE->getOpcode() == Instruction::IntToPtr) { 1492 // zext/trunc the inttoptr to pointer size. 1493 FoldedValue = ConstantFoldIntegerCast(CE->getOperand(0), 1494 DL.getIntPtrType(CE->getType()), 1495 /*IsSigned=*/false, DL); 1496 } else if (auto *GEP = dyn_cast<GEPOperator>(CE)) { 1497 // If we have GEP, we can perform the following folds: 1498 // (ptrtoint (gep null, x)) -> x 1499 // (ptrtoint (gep (gep null, x), y) -> x + y, etc. 1500 unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType()); 1501 APInt BaseOffset(BitWidth, 0); 1502 auto *Base = cast<Constant>(GEP->stripAndAccumulateConstantOffsets( 1503 DL, BaseOffset, /*AllowNonInbounds=*/true)); 1504 if (Base->isNullValue()) { 1505 FoldedValue = ConstantInt::get(CE->getContext(), BaseOffset); 1506 } else { 1507 // ptrtoint (gep i8, Ptr, (sub 0, V)) -> sub (ptrtoint Ptr), V 1508 if (GEP->getNumIndices() == 1 && 1509 GEP->getSourceElementType()->isIntegerTy(8)) { 1510 auto *Ptr = cast<Constant>(GEP->getPointerOperand()); 1511 auto *Sub = dyn_cast<ConstantExpr>(GEP->getOperand(1)); 1512 Type *IntIdxTy = DL.getIndexType(Ptr->getType()); 1513 if (Sub && Sub->getType() == IntIdxTy && 1514 Sub->getOpcode() == Instruction::Sub && 1515 Sub->getOperand(0)->isNullValue()) 1516 FoldedValue = ConstantExpr::getSub( 1517 ConstantExpr::getPtrToInt(Ptr, IntIdxTy), Sub->getOperand(1)); 1518 } 1519 } 1520 } 1521 if (FoldedValue) { 1522 // Do a zext or trunc to get to the ptrtoint dest size. 1523 return ConstantFoldIntegerCast(FoldedValue, DestTy, /*IsSigned=*/false, 1524 DL); 1525 } 1526 } 1527 break; 1528 case Instruction::IntToPtr: 1529 // If the input is a ptrtoint, turn the pair into a ptr to ptr bitcast if 1530 // the int size is >= the ptr size and the address spaces are the same. 1531 // This requires knowing the width of a pointer, so it can't be done in 1532 // ConstantExpr::getCast. 1533 if (auto *CE = dyn_cast<ConstantExpr>(C)) { 1534 if (CE->getOpcode() == Instruction::PtrToInt) { 1535 Constant *SrcPtr = CE->getOperand(0); 1536 unsigned SrcPtrSize = DL.getPointerTypeSizeInBits(SrcPtr->getType()); 1537 unsigned MidIntSize = CE->getType()->getScalarSizeInBits(); 1538 1539 if (MidIntSize >= SrcPtrSize) { 1540 unsigned SrcAS = SrcPtr->getType()->getPointerAddressSpace(); 1541 if (SrcAS == DestTy->getPointerAddressSpace()) 1542 return FoldBitCast(CE->getOperand(0), DestTy, DL); 1543 } 1544 } 1545 } 1546 break; 1547 case Instruction::Trunc: 1548 case Instruction::ZExt: 1549 case Instruction::SExt: 1550 case Instruction::FPTrunc: 1551 case Instruction::FPExt: 1552 case Instruction::UIToFP: 1553 case Instruction::SIToFP: 1554 case Instruction::FPToUI: 1555 case Instruction::FPToSI: 1556 case Instruction::AddrSpaceCast: 1557 break; 1558 case Instruction::BitCast: 1559 return FoldBitCast(C, DestTy, DL); 1560 } 1561 1562 if (ConstantExpr::isDesirableCastOp(Opcode)) 1563 return ConstantExpr::getCast(Opcode, C, DestTy); 1564 return ConstantFoldCastInstruction(Opcode, C, DestTy); 1565 } 1566 1567 Constant *llvm::ConstantFoldIntegerCast(Constant *C, Type *DestTy, 1568 bool IsSigned, const DataLayout &DL) { 1569 Type *SrcTy = C->getType(); 1570 if (SrcTy == DestTy) 1571 return C; 1572 if (SrcTy->getScalarSizeInBits() > DestTy->getScalarSizeInBits()) 1573 return ConstantFoldCastOperand(Instruction::Trunc, C, DestTy, DL); 1574 if (IsSigned) 1575 return ConstantFoldCastOperand(Instruction::SExt, C, DestTy, DL); 1576 return ConstantFoldCastOperand(Instruction::ZExt, C, DestTy, DL); 1577 } 1578 1579 //===----------------------------------------------------------------------===// 1580 // Constant Folding for Calls 1581 // 1582 1583 bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { 1584 if (Call->isNoBuiltin()) 1585 return false; 1586 if (Call->getFunctionType() != F->getFunctionType()) 1587 return false; 1588 1589 // Allow FP calls (both libcalls and intrinsics) to avoid being folded. 1590 // This can be useful for GPU targets or in cross-compilation scenarios 1591 // when the exact target FP behaviour is required, and the host compiler's 1592 // behaviour may be slightly different from the device's run-time behaviour. 1593 if (DisableFPCallFolding && (F->getReturnType()->isFloatingPointTy() || 1594 any_of(F->args(), [](const Argument &Arg) { 1595 return Arg.getType()->isFloatingPointTy(); 1596 }))) 1597 return false; 1598 1599 switch (F->getIntrinsicID()) { 1600 // Operations that do not operate floating-point numbers and do not depend on 1601 // FP environment can be folded even in strictfp functions. 1602 case Intrinsic::bswap: 1603 case Intrinsic::ctpop: 1604 case Intrinsic::ctlz: 1605 case Intrinsic::cttz: 1606 case Intrinsic::fshl: 1607 case Intrinsic::fshr: 1608 case Intrinsic::launder_invariant_group: 1609 case Intrinsic::strip_invariant_group: 1610 case Intrinsic::masked_load: 1611 case Intrinsic::get_active_lane_mask: 1612 case Intrinsic::abs: 1613 case Intrinsic::smax: 1614 case Intrinsic::smin: 1615 case Intrinsic::umax: 1616 case Intrinsic::umin: 1617 case Intrinsic::scmp: 1618 case Intrinsic::ucmp: 1619 case Intrinsic::sadd_with_overflow: 1620 case Intrinsic::uadd_with_overflow: 1621 case Intrinsic::ssub_with_overflow: 1622 case Intrinsic::usub_with_overflow: 1623 case Intrinsic::smul_with_overflow: 1624 case Intrinsic::umul_with_overflow: 1625 case Intrinsic::sadd_sat: 1626 case Intrinsic::uadd_sat: 1627 case Intrinsic::ssub_sat: 1628 case Intrinsic::usub_sat: 1629 case Intrinsic::smul_fix: 1630 case Intrinsic::smul_fix_sat: 1631 case Intrinsic::bitreverse: 1632 case Intrinsic::is_constant: 1633 case Intrinsic::vector_reduce_add: 1634 case Intrinsic::vector_reduce_mul: 1635 case Intrinsic::vector_reduce_and: 1636 case Intrinsic::vector_reduce_or: 1637 case Intrinsic::vector_reduce_xor: 1638 case Intrinsic::vector_reduce_smin: 1639 case Intrinsic::vector_reduce_smax: 1640 case Intrinsic::vector_reduce_umin: 1641 case Intrinsic::vector_reduce_umax: 1642 case Intrinsic::vector_extract: 1643 case Intrinsic::vector_insert: 1644 case Intrinsic::vector_interleave2: 1645 case Intrinsic::vector_deinterleave2: 1646 // Target intrinsics 1647 case Intrinsic::amdgcn_perm: 1648 case Intrinsic::amdgcn_wave_reduce_umin: 1649 case Intrinsic::amdgcn_wave_reduce_umax: 1650 case Intrinsic::amdgcn_s_wqm: 1651 case Intrinsic::amdgcn_s_quadmask: 1652 case Intrinsic::amdgcn_s_bitreplicate: 1653 case Intrinsic::arm_mve_vctp8: 1654 case Intrinsic::arm_mve_vctp16: 1655 case Intrinsic::arm_mve_vctp32: 1656 case Intrinsic::arm_mve_vctp64: 1657 case Intrinsic::aarch64_sve_convert_from_svbool: 1658 case Intrinsic::wasm_alltrue: 1659 case Intrinsic::wasm_anytrue: 1660 // WebAssembly float semantics are always known 1661 case Intrinsic::wasm_trunc_signed: 1662 case Intrinsic::wasm_trunc_unsigned: 1663 return true; 1664 1665 // Floating point operations cannot be folded in strictfp functions in 1666 // general case. They can be folded if FP environment is known to compiler. 1667 case Intrinsic::minnum: 1668 case Intrinsic::maxnum: 1669 case Intrinsic::minimum: 1670 case Intrinsic::maximum: 1671 case Intrinsic::minimumnum: 1672 case Intrinsic::maximumnum: 1673 case Intrinsic::log: 1674 case Intrinsic::log2: 1675 case Intrinsic::log10: 1676 case Intrinsic::exp: 1677 case Intrinsic::exp2: 1678 case Intrinsic::exp10: 1679 case Intrinsic::sqrt: 1680 case Intrinsic::sin: 1681 case Intrinsic::cos: 1682 case Intrinsic::sincos: 1683 case Intrinsic::sinh: 1684 case Intrinsic::cosh: 1685 case Intrinsic::atan: 1686 case Intrinsic::pow: 1687 case Intrinsic::powi: 1688 case Intrinsic::ldexp: 1689 case Intrinsic::fma: 1690 case Intrinsic::fmuladd: 1691 case Intrinsic::frexp: 1692 case Intrinsic::fptoui_sat: 1693 case Intrinsic::fptosi_sat: 1694 case Intrinsic::convert_from_fp16: 1695 case Intrinsic::convert_to_fp16: 1696 case Intrinsic::amdgcn_cos: 1697 case Intrinsic::amdgcn_cubeid: 1698 case Intrinsic::amdgcn_cubema: 1699 case Intrinsic::amdgcn_cubesc: 1700 case Intrinsic::amdgcn_cubetc: 1701 case Intrinsic::amdgcn_fmul_legacy: 1702 case Intrinsic::amdgcn_fma_legacy: 1703 case Intrinsic::amdgcn_fract: 1704 case Intrinsic::amdgcn_sin: 1705 // The intrinsics below depend on rounding mode in MXCSR. 1706 case Intrinsic::x86_sse_cvtss2si: 1707 case Intrinsic::x86_sse_cvtss2si64: 1708 case Intrinsic::x86_sse_cvttss2si: 1709 case Intrinsic::x86_sse_cvttss2si64: 1710 case Intrinsic::x86_sse2_cvtsd2si: 1711 case Intrinsic::x86_sse2_cvtsd2si64: 1712 case Intrinsic::x86_sse2_cvttsd2si: 1713 case Intrinsic::x86_sse2_cvttsd2si64: 1714 case Intrinsic::x86_avx512_vcvtss2si32: 1715 case Intrinsic::x86_avx512_vcvtss2si64: 1716 case Intrinsic::x86_avx512_cvttss2si: 1717 case Intrinsic::x86_avx512_cvttss2si64: 1718 case Intrinsic::x86_avx512_vcvtsd2si32: 1719 case Intrinsic::x86_avx512_vcvtsd2si64: 1720 case Intrinsic::x86_avx512_cvttsd2si: 1721 case Intrinsic::x86_avx512_cvttsd2si64: 1722 case Intrinsic::x86_avx512_vcvtss2usi32: 1723 case Intrinsic::x86_avx512_vcvtss2usi64: 1724 case Intrinsic::x86_avx512_cvttss2usi: 1725 case Intrinsic::x86_avx512_cvttss2usi64: 1726 case Intrinsic::x86_avx512_vcvtsd2usi32: 1727 case Intrinsic::x86_avx512_vcvtsd2usi64: 1728 case Intrinsic::x86_avx512_cvttsd2usi: 1729 case Intrinsic::x86_avx512_cvttsd2usi64: 1730 1731 // NVVM FMax intrinsics 1732 case Intrinsic::nvvm_fmax_d: 1733 case Intrinsic::nvvm_fmax_f: 1734 case Intrinsic::nvvm_fmax_ftz_f: 1735 case Intrinsic::nvvm_fmax_ftz_nan_f: 1736 case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f: 1737 case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f: 1738 case Intrinsic::nvvm_fmax_nan_f: 1739 case Intrinsic::nvvm_fmax_nan_xorsign_abs_f: 1740 case Intrinsic::nvvm_fmax_xorsign_abs_f: 1741 1742 // NVVM FMin intrinsics 1743 case Intrinsic::nvvm_fmin_d: 1744 case Intrinsic::nvvm_fmin_f: 1745 case Intrinsic::nvvm_fmin_ftz_f: 1746 case Intrinsic::nvvm_fmin_ftz_nan_f: 1747 case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f: 1748 case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f: 1749 case Intrinsic::nvvm_fmin_nan_f: 1750 case Intrinsic::nvvm_fmin_nan_xorsign_abs_f: 1751 case Intrinsic::nvvm_fmin_xorsign_abs_f: 1752 1753 // NVVM float/double to int32/uint32 conversion intrinsics 1754 case Intrinsic::nvvm_f2i_rm: 1755 case Intrinsic::nvvm_f2i_rn: 1756 case Intrinsic::nvvm_f2i_rp: 1757 case Intrinsic::nvvm_f2i_rz: 1758 case Intrinsic::nvvm_f2i_rm_ftz: 1759 case Intrinsic::nvvm_f2i_rn_ftz: 1760 case Intrinsic::nvvm_f2i_rp_ftz: 1761 case Intrinsic::nvvm_f2i_rz_ftz: 1762 case Intrinsic::nvvm_f2ui_rm: 1763 case Intrinsic::nvvm_f2ui_rn: 1764 case Intrinsic::nvvm_f2ui_rp: 1765 case Intrinsic::nvvm_f2ui_rz: 1766 case Intrinsic::nvvm_f2ui_rm_ftz: 1767 case Intrinsic::nvvm_f2ui_rn_ftz: 1768 case Intrinsic::nvvm_f2ui_rp_ftz: 1769 case Intrinsic::nvvm_f2ui_rz_ftz: 1770 case Intrinsic::nvvm_d2i_rm: 1771 case Intrinsic::nvvm_d2i_rn: 1772 case Intrinsic::nvvm_d2i_rp: 1773 case Intrinsic::nvvm_d2i_rz: 1774 case Intrinsic::nvvm_d2ui_rm: 1775 case Intrinsic::nvvm_d2ui_rn: 1776 case Intrinsic::nvvm_d2ui_rp: 1777 case Intrinsic::nvvm_d2ui_rz: 1778 1779 // NVVM float/double to int64/uint64 conversion intrinsics 1780 case Intrinsic::nvvm_f2ll_rm: 1781 case Intrinsic::nvvm_f2ll_rn: 1782 case Intrinsic::nvvm_f2ll_rp: 1783 case Intrinsic::nvvm_f2ll_rz: 1784 case Intrinsic::nvvm_f2ll_rm_ftz: 1785 case Intrinsic::nvvm_f2ll_rn_ftz: 1786 case Intrinsic::nvvm_f2ll_rp_ftz: 1787 case Intrinsic::nvvm_f2ll_rz_ftz: 1788 case Intrinsic::nvvm_f2ull_rm: 1789 case Intrinsic::nvvm_f2ull_rn: 1790 case Intrinsic::nvvm_f2ull_rp: 1791 case Intrinsic::nvvm_f2ull_rz: 1792 case Intrinsic::nvvm_f2ull_rm_ftz: 1793 case Intrinsic::nvvm_f2ull_rn_ftz: 1794 case Intrinsic::nvvm_f2ull_rp_ftz: 1795 case Intrinsic::nvvm_f2ull_rz_ftz: 1796 case Intrinsic::nvvm_d2ll_rm: 1797 case Intrinsic::nvvm_d2ll_rn: 1798 case Intrinsic::nvvm_d2ll_rp: 1799 case Intrinsic::nvvm_d2ll_rz: 1800 case Intrinsic::nvvm_d2ull_rm: 1801 case Intrinsic::nvvm_d2ull_rn: 1802 case Intrinsic::nvvm_d2ull_rp: 1803 case Intrinsic::nvvm_d2ull_rz: 1804 return !Call->isStrictFP(); 1805 1806 // Sign operations are actually bitwise operations, they do not raise 1807 // exceptions even for SNANs. 1808 case Intrinsic::fabs: 1809 case Intrinsic::copysign: 1810 case Intrinsic::is_fpclass: 1811 // Non-constrained variants of rounding operations means default FP 1812 // environment, they can be folded in any case. 1813 case Intrinsic::ceil: 1814 case Intrinsic::floor: 1815 case Intrinsic::round: 1816 case Intrinsic::roundeven: 1817 case Intrinsic::trunc: 1818 case Intrinsic::nearbyint: 1819 case Intrinsic::rint: 1820 case Intrinsic::canonicalize: 1821 // Constrained intrinsics can be folded if FP environment is known 1822 // to compiler. 1823 case Intrinsic::experimental_constrained_fma: 1824 case Intrinsic::experimental_constrained_fmuladd: 1825 case Intrinsic::experimental_constrained_fadd: 1826 case Intrinsic::experimental_constrained_fsub: 1827 case Intrinsic::experimental_constrained_fmul: 1828 case Intrinsic::experimental_constrained_fdiv: 1829 case Intrinsic::experimental_constrained_frem: 1830 case Intrinsic::experimental_constrained_ceil: 1831 case Intrinsic::experimental_constrained_floor: 1832 case Intrinsic::experimental_constrained_round: 1833 case Intrinsic::experimental_constrained_roundeven: 1834 case Intrinsic::experimental_constrained_trunc: 1835 case Intrinsic::experimental_constrained_nearbyint: 1836 case Intrinsic::experimental_constrained_rint: 1837 case Intrinsic::experimental_constrained_fcmp: 1838 case Intrinsic::experimental_constrained_fcmps: 1839 return true; 1840 default: 1841 return false; 1842 case Intrinsic::not_intrinsic: break; 1843 } 1844 1845 if (!F->hasName() || Call->isStrictFP()) 1846 return false; 1847 1848 // In these cases, the check of the length is required. We don't want to 1849 // return true for a name like "cos\0blah" which strcmp would return equal to 1850 // "cos", but has length 8. 1851 StringRef Name = F->getName(); 1852 switch (Name[0]) { 1853 default: 1854 return false; 1855 case 'a': 1856 return Name == "acos" || Name == "acosf" || 1857 Name == "asin" || Name == "asinf" || 1858 Name == "atan" || Name == "atanf" || 1859 Name == "atan2" || Name == "atan2f"; 1860 case 'c': 1861 return Name == "ceil" || Name == "ceilf" || 1862 Name == "cos" || Name == "cosf" || 1863 Name == "cosh" || Name == "coshf"; 1864 case 'e': 1865 return Name == "exp" || Name == "expf" || Name == "exp2" || 1866 Name == "exp2f" || Name == "erf" || Name == "erff"; 1867 case 'f': 1868 return Name == "fabs" || Name == "fabsf" || 1869 Name == "floor" || Name == "floorf" || 1870 Name == "fmod" || Name == "fmodf"; 1871 case 'i': 1872 return Name == "ilogb" || Name == "ilogbf"; 1873 case 'l': 1874 return Name == "log" || Name == "logf" || Name == "logl" || 1875 Name == "log2" || Name == "log2f" || Name == "log10" || 1876 Name == "log10f" || Name == "logb" || Name == "logbf" || 1877 Name == "log1p" || Name == "log1pf"; 1878 case 'n': 1879 return Name == "nearbyint" || Name == "nearbyintf"; 1880 case 'p': 1881 return Name == "pow" || Name == "powf"; 1882 case 'r': 1883 return Name == "remainder" || Name == "remainderf" || 1884 Name == "rint" || Name == "rintf" || 1885 Name == "round" || Name == "roundf"; 1886 case 's': 1887 return Name == "sin" || Name == "sinf" || 1888 Name == "sinh" || Name == "sinhf" || 1889 Name == "sqrt" || Name == "sqrtf"; 1890 case 't': 1891 return Name == "tan" || Name == "tanf" || 1892 Name == "tanh" || Name == "tanhf" || 1893 Name == "trunc" || Name == "truncf"; 1894 case '_': 1895 // Check for various function names that get used for the math functions 1896 // when the header files are preprocessed with the macro 1897 // __FINITE_MATH_ONLY__ enabled. 1898 // The '12' here is the length of the shortest name that can match. 1899 // We need to check the size before looking at Name[1] and Name[2] 1900 // so we may as well check a limit that will eliminate mismatches. 1901 if (Name.size() < 12 || Name[1] != '_') 1902 return false; 1903 switch (Name[2]) { 1904 default: 1905 return false; 1906 case 'a': 1907 return Name == "__acos_finite" || Name == "__acosf_finite" || 1908 Name == "__asin_finite" || Name == "__asinf_finite" || 1909 Name == "__atan2_finite" || Name == "__atan2f_finite"; 1910 case 'c': 1911 return Name == "__cosh_finite" || Name == "__coshf_finite"; 1912 case 'e': 1913 return Name == "__exp_finite" || Name == "__expf_finite" || 1914 Name == "__exp2_finite" || Name == "__exp2f_finite"; 1915 case 'l': 1916 return Name == "__log_finite" || Name == "__logf_finite" || 1917 Name == "__log10_finite" || Name == "__log10f_finite"; 1918 case 'p': 1919 return Name == "__pow_finite" || Name == "__powf_finite"; 1920 case 's': 1921 return Name == "__sinh_finite" || Name == "__sinhf_finite"; 1922 } 1923 } 1924 } 1925 1926 namespace { 1927 1928 Constant *GetConstantFoldFPValue(double V, Type *Ty) { 1929 if (Ty->isHalfTy() || Ty->isFloatTy()) { 1930 APFloat APF(V); 1931 bool unused; 1932 APF.convert(Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &unused); 1933 return ConstantFP::get(Ty->getContext(), APF); 1934 } 1935 if (Ty->isDoubleTy()) 1936 return ConstantFP::get(Ty->getContext(), APFloat(V)); 1937 llvm_unreachable("Can only constant fold half/float/double"); 1938 } 1939 1940 #if defined(HAS_IEE754_FLOAT128) && defined(HAS_LOGF128) 1941 Constant *GetConstantFoldFPValue128(float128 V, Type *Ty) { 1942 if (Ty->isFP128Ty()) 1943 return ConstantFP::get(Ty, V); 1944 llvm_unreachable("Can only constant fold fp128"); 1945 } 1946 #endif 1947 1948 /// Clear the floating-point exception state. 1949 inline void llvm_fenv_clearexcept() { 1950 #if HAVE_DECL_FE_ALL_EXCEPT 1951 feclearexcept(FE_ALL_EXCEPT); 1952 #endif 1953 errno = 0; 1954 } 1955 1956 /// Test if a floating-point exception was raised. 1957 inline bool llvm_fenv_testexcept() { 1958 int errno_val = errno; 1959 if (errno_val == ERANGE || errno_val == EDOM) 1960 return true; 1961 #if HAVE_DECL_FE_ALL_EXCEPT && HAVE_DECL_FE_INEXACT 1962 if (fetestexcept(FE_ALL_EXCEPT & ~FE_INEXACT)) 1963 return true; 1964 #endif 1965 return false; 1966 } 1967 1968 static APFloat FTZPreserveSign(const APFloat &V) { 1969 if (V.isDenormal()) 1970 return APFloat::getZero(V.getSemantics(), V.isNegative()); 1971 return V; 1972 } 1973 1974 Constant *ConstantFoldFP(double (*NativeFP)(double), const APFloat &V, 1975 Type *Ty) { 1976 llvm_fenv_clearexcept(); 1977 double Result = NativeFP(V.convertToDouble()); 1978 if (llvm_fenv_testexcept()) { 1979 llvm_fenv_clearexcept(); 1980 return nullptr; 1981 } 1982 1983 return GetConstantFoldFPValue(Result, Ty); 1984 } 1985 1986 #if defined(HAS_IEE754_FLOAT128) && defined(HAS_LOGF128) 1987 Constant *ConstantFoldFP128(float128 (*NativeFP)(float128), const APFloat &V, 1988 Type *Ty) { 1989 llvm_fenv_clearexcept(); 1990 float128 Result = NativeFP(V.convertToQuad()); 1991 if (llvm_fenv_testexcept()) { 1992 llvm_fenv_clearexcept(); 1993 return nullptr; 1994 } 1995 1996 return GetConstantFoldFPValue128(Result, Ty); 1997 } 1998 #endif 1999 2000 Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), 2001 const APFloat &V, const APFloat &W, Type *Ty) { 2002 llvm_fenv_clearexcept(); 2003 double Result = NativeFP(V.convertToDouble(), W.convertToDouble()); 2004 if (llvm_fenv_testexcept()) { 2005 llvm_fenv_clearexcept(); 2006 return nullptr; 2007 } 2008 2009 return GetConstantFoldFPValue(Result, Ty); 2010 } 2011 2012 Constant *constantFoldVectorReduce(Intrinsic::ID IID, Constant *Op) { 2013 FixedVectorType *VT = dyn_cast<FixedVectorType>(Op->getType()); 2014 if (!VT) 2015 return nullptr; 2016 2017 // This isn't strictly necessary, but handle the special/common case of zero: 2018 // all integer reductions of a zero input produce zero. 2019 if (isa<ConstantAggregateZero>(Op)) 2020 return ConstantInt::get(VT->getElementType(), 0); 2021 2022 // This is the same as the underlying binops - poison propagates. 2023 if (isa<PoisonValue>(Op) || Op->containsPoisonElement()) 2024 return PoisonValue::get(VT->getElementType()); 2025 2026 // TODO: Handle undef. 2027 if (!isa<ConstantVector>(Op) && !isa<ConstantDataVector>(Op)) 2028 return nullptr; 2029 2030 auto *EltC = dyn_cast<ConstantInt>(Op->getAggregateElement(0U)); 2031 if (!EltC) 2032 return nullptr; 2033 2034 APInt Acc = EltC->getValue(); 2035 for (unsigned I = 1, E = VT->getNumElements(); I != E; I++) { 2036 if (!(EltC = dyn_cast<ConstantInt>(Op->getAggregateElement(I)))) 2037 return nullptr; 2038 const APInt &X = EltC->getValue(); 2039 switch (IID) { 2040 case Intrinsic::vector_reduce_add: 2041 Acc = Acc + X; 2042 break; 2043 case Intrinsic::vector_reduce_mul: 2044 Acc = Acc * X; 2045 break; 2046 case Intrinsic::vector_reduce_and: 2047 Acc = Acc & X; 2048 break; 2049 case Intrinsic::vector_reduce_or: 2050 Acc = Acc | X; 2051 break; 2052 case Intrinsic::vector_reduce_xor: 2053 Acc = Acc ^ X; 2054 break; 2055 case Intrinsic::vector_reduce_smin: 2056 Acc = APIntOps::smin(Acc, X); 2057 break; 2058 case Intrinsic::vector_reduce_smax: 2059 Acc = APIntOps::smax(Acc, X); 2060 break; 2061 case Intrinsic::vector_reduce_umin: 2062 Acc = APIntOps::umin(Acc, X); 2063 break; 2064 case Intrinsic::vector_reduce_umax: 2065 Acc = APIntOps::umax(Acc, X); 2066 break; 2067 } 2068 } 2069 2070 return ConstantInt::get(Op->getContext(), Acc); 2071 } 2072 2073 /// Attempt to fold an SSE floating point to integer conversion of a constant 2074 /// floating point. If roundTowardZero is false, the default IEEE rounding is 2075 /// used (toward nearest, ties to even). This matches the behavior of the 2076 /// non-truncating SSE instructions in the default rounding mode. The desired 2077 /// integer type Ty is used to select how many bits are available for the 2078 /// result. Returns null if the conversion cannot be performed, otherwise 2079 /// returns the Constant value resulting from the conversion. 2080 Constant *ConstantFoldSSEConvertToInt(const APFloat &Val, bool roundTowardZero, 2081 Type *Ty, bool IsSigned) { 2082 // All of these conversion intrinsics form an integer of at most 64bits. 2083 unsigned ResultWidth = Ty->getIntegerBitWidth(); 2084 assert(ResultWidth <= 64 && 2085 "Can only constant fold conversions to 64 and 32 bit ints"); 2086 2087 uint64_t UIntVal; 2088 bool isExact = false; 2089 APFloat::roundingMode mode = roundTowardZero? APFloat::rmTowardZero 2090 : APFloat::rmNearestTiesToEven; 2091 APFloat::opStatus status = 2092 Val.convertToInteger(MutableArrayRef(UIntVal), ResultWidth, 2093 IsSigned, mode, &isExact); 2094 if (status != APFloat::opOK && 2095 (!roundTowardZero || status != APFloat::opInexact)) 2096 return nullptr; 2097 return ConstantInt::get(Ty, UIntVal, IsSigned); 2098 } 2099 2100 double getValueAsDouble(ConstantFP *Op) { 2101 Type *Ty = Op->getType(); 2102 2103 if (Ty->isBFloatTy() || Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy()) 2104 return Op->getValueAPF().convertToDouble(); 2105 2106 bool unused; 2107 APFloat APF = Op->getValueAPF(); 2108 APF.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &unused); 2109 return APF.convertToDouble(); 2110 } 2111 2112 static bool getConstIntOrUndef(Value *Op, const APInt *&C) { 2113 if (auto *CI = dyn_cast<ConstantInt>(Op)) { 2114 C = &CI->getValue(); 2115 return true; 2116 } 2117 if (isa<UndefValue>(Op)) { 2118 C = nullptr; 2119 return true; 2120 } 2121 return false; 2122 } 2123 2124 /// Checks if the given intrinsic call, which evaluates to constant, is allowed 2125 /// to be folded. 2126 /// 2127 /// \param CI Constrained intrinsic call. 2128 /// \param St Exception flags raised during constant evaluation. 2129 static bool mayFoldConstrained(ConstrainedFPIntrinsic *CI, 2130 APFloat::opStatus St) { 2131 std::optional<RoundingMode> ORM = CI->getRoundingMode(); 2132 std::optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior(); 2133 2134 // If the operation does not change exception status flags, it is safe 2135 // to fold. 2136 if (St == APFloat::opStatus::opOK) 2137 return true; 2138 2139 // If evaluation raised FP exception, the result can depend on rounding 2140 // mode. If the latter is unknown, folding is not possible. 2141 if (ORM == RoundingMode::Dynamic) 2142 return false; 2143 2144 // If FP exceptions are ignored, fold the call, even if such exception is 2145 // raised. 2146 if (EB && *EB != fp::ExceptionBehavior::ebStrict) 2147 return true; 2148 2149 // Leave the calculation for runtime so that exception flags be correctly set 2150 // in hardware. 2151 return false; 2152 } 2153 2154 /// Returns the rounding mode that should be used for constant evaluation. 2155 static RoundingMode 2156 getEvaluationRoundingMode(const ConstrainedFPIntrinsic *CI) { 2157 std::optional<RoundingMode> ORM = CI->getRoundingMode(); 2158 if (!ORM || *ORM == RoundingMode::Dynamic) 2159 // Even if the rounding mode is unknown, try evaluating the operation. 2160 // If it does not raise inexact exception, rounding was not applied, 2161 // so the result is exact and does not depend on rounding mode. Whether 2162 // other FP exceptions are raised, it does not depend on rounding mode. 2163 return RoundingMode::NearestTiesToEven; 2164 return *ORM; 2165 } 2166 2167 /// Try to constant fold llvm.canonicalize for the given caller and value. 2168 static Constant *constantFoldCanonicalize(const Type *Ty, const CallBase *CI, 2169 const APFloat &Src) { 2170 // Zero, positive and negative, is always OK to fold. 2171 if (Src.isZero()) { 2172 // Get a fresh 0, since ppc_fp128 does have non-canonical zeros. 2173 return ConstantFP::get( 2174 CI->getContext(), 2175 APFloat::getZero(Src.getSemantics(), Src.isNegative())); 2176 } 2177 2178 if (!Ty->isIEEELikeFPTy()) 2179 return nullptr; 2180 2181 // Zero is always canonical and the sign must be preserved. 2182 // 2183 // Denorms and nans may have special encodings, but it should be OK to fold a 2184 // totally average number. 2185 if (Src.isNormal() || Src.isInfinity()) 2186 return ConstantFP::get(CI->getContext(), Src); 2187 2188 if (Src.isDenormal() && CI->getParent() && CI->getFunction()) { 2189 DenormalMode DenormMode = 2190 CI->getFunction()->getDenormalMode(Src.getSemantics()); 2191 2192 if (DenormMode == DenormalMode::getIEEE()) 2193 return ConstantFP::get(CI->getContext(), Src); 2194 2195 if (DenormMode.Input == DenormalMode::Dynamic) 2196 return nullptr; 2197 2198 // If we know if either input or output is flushed, we can fold. 2199 if ((DenormMode.Input == DenormalMode::Dynamic && 2200 DenormMode.Output == DenormalMode::IEEE) || 2201 (DenormMode.Input == DenormalMode::IEEE && 2202 DenormMode.Output == DenormalMode::Dynamic)) 2203 return nullptr; 2204 2205 bool IsPositive = 2206 (!Src.isNegative() || DenormMode.Input == DenormalMode::PositiveZero || 2207 (DenormMode.Output == DenormalMode::PositiveZero && 2208 DenormMode.Input == DenormalMode::IEEE)); 2209 2210 return ConstantFP::get(CI->getContext(), 2211 APFloat::getZero(Src.getSemantics(), !IsPositive)); 2212 } 2213 2214 return nullptr; 2215 } 2216 2217 static Constant *ConstantFoldScalarCall1(StringRef Name, 2218 Intrinsic::ID IntrinsicID, 2219 Type *Ty, 2220 ArrayRef<Constant *> Operands, 2221 const TargetLibraryInfo *TLI, 2222 const CallBase *Call) { 2223 assert(Operands.size() == 1 && "Wrong number of operands."); 2224 2225 if (IntrinsicID == Intrinsic::is_constant) { 2226 // We know we have a "Constant" argument. But we want to only 2227 // return true for manifest constants, not those that depend on 2228 // constants with unknowable values, e.g. GlobalValue or BlockAddress. 2229 if (Operands[0]->isManifestConstant()) 2230 return ConstantInt::getTrue(Ty->getContext()); 2231 return nullptr; 2232 } 2233 2234 if (isa<UndefValue>(Operands[0])) { 2235 // cosine(arg) is between -1 and 1. cosine(invalid arg) is NaN. 2236 // ctpop() is between 0 and bitwidth, pick 0 for undef. 2237 // fptoui.sat and fptosi.sat can always fold to zero (for a zero input). 2238 if (IntrinsicID == Intrinsic::cos || 2239 IntrinsicID == Intrinsic::ctpop || 2240 IntrinsicID == Intrinsic::fptoui_sat || 2241 IntrinsicID == Intrinsic::fptosi_sat || 2242 IntrinsicID == Intrinsic::canonicalize) 2243 return Constant::getNullValue(Ty); 2244 if (IntrinsicID == Intrinsic::bswap || 2245 IntrinsicID == Intrinsic::bitreverse || 2246 IntrinsicID == Intrinsic::launder_invariant_group || 2247 IntrinsicID == Intrinsic::strip_invariant_group) 2248 return Operands[0]; 2249 } 2250 2251 if (isa<ConstantPointerNull>(Operands[0])) { 2252 // launder(null) == null == strip(null) iff in addrspace 0 2253 if (IntrinsicID == Intrinsic::launder_invariant_group || 2254 IntrinsicID == Intrinsic::strip_invariant_group) { 2255 // If instruction is not yet put in a basic block (e.g. when cloning 2256 // a function during inlining), Call's caller may not be available. 2257 // So check Call's BB first before querying Call->getCaller. 2258 const Function *Caller = 2259 Call->getParent() ? Call->getCaller() : nullptr; 2260 if (Caller && 2261 !NullPointerIsDefined( 2262 Caller, Operands[0]->getType()->getPointerAddressSpace())) { 2263 return Operands[0]; 2264 } 2265 return nullptr; 2266 } 2267 } 2268 2269 if (auto *Op = dyn_cast<ConstantFP>(Operands[0])) { 2270 if (IntrinsicID == Intrinsic::convert_to_fp16) { 2271 APFloat Val(Op->getValueAPF()); 2272 2273 bool lost = false; 2274 Val.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &lost); 2275 2276 return ConstantInt::get(Ty->getContext(), Val.bitcastToAPInt()); 2277 } 2278 2279 APFloat U = Op->getValueAPF(); 2280 2281 if (IntrinsicID == Intrinsic::wasm_trunc_signed || 2282 IntrinsicID == Intrinsic::wasm_trunc_unsigned) { 2283 bool Signed = IntrinsicID == Intrinsic::wasm_trunc_signed; 2284 2285 if (U.isNaN()) 2286 return nullptr; 2287 2288 unsigned Width = Ty->getIntegerBitWidth(); 2289 APSInt Int(Width, !Signed); 2290 bool IsExact = false; 2291 APFloat::opStatus Status = 2292 U.convertToInteger(Int, APFloat::rmTowardZero, &IsExact); 2293 2294 if (Status == APFloat::opOK || Status == APFloat::opInexact) 2295 return ConstantInt::get(Ty, Int); 2296 2297 return nullptr; 2298 } 2299 2300 if (IntrinsicID == Intrinsic::fptoui_sat || 2301 IntrinsicID == Intrinsic::fptosi_sat) { 2302 // convertToInteger() already has the desired saturation semantics. 2303 APSInt Int(Ty->getIntegerBitWidth(), 2304 IntrinsicID == Intrinsic::fptoui_sat); 2305 bool IsExact; 2306 U.convertToInteger(Int, APFloat::rmTowardZero, &IsExact); 2307 return ConstantInt::get(Ty, Int); 2308 } 2309 2310 if (IntrinsicID == Intrinsic::canonicalize) 2311 return constantFoldCanonicalize(Ty, Call, U); 2312 2313 #if defined(HAS_IEE754_FLOAT128) && defined(HAS_LOGF128) 2314 if (Ty->isFP128Ty()) { 2315 if (IntrinsicID == Intrinsic::log) { 2316 float128 Result = logf128(Op->getValueAPF().convertToQuad()); 2317 return GetConstantFoldFPValue128(Result, Ty); 2318 } 2319 2320 LibFunc Fp128Func = NotLibFunc; 2321 if (TLI && TLI->getLibFunc(Name, Fp128Func) && TLI->has(Fp128Func) && 2322 Fp128Func == LibFunc_logl) 2323 return ConstantFoldFP128(logf128, Op->getValueAPF(), Ty); 2324 } 2325 #endif 2326 2327 if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy() && 2328 !Ty->isIntegerTy()) 2329 return nullptr; 2330 2331 // Use internal versions of these intrinsics. 2332 2333 if (IntrinsicID == Intrinsic::nearbyint || IntrinsicID == Intrinsic::rint) { 2334 U.roundToIntegral(APFloat::rmNearestTiesToEven); 2335 return ConstantFP::get(Ty->getContext(), U); 2336 } 2337 2338 if (IntrinsicID == Intrinsic::round) { 2339 U.roundToIntegral(APFloat::rmNearestTiesToAway); 2340 return ConstantFP::get(Ty->getContext(), U); 2341 } 2342 2343 if (IntrinsicID == Intrinsic::roundeven) { 2344 U.roundToIntegral(APFloat::rmNearestTiesToEven); 2345 return ConstantFP::get(Ty->getContext(), U); 2346 } 2347 2348 if (IntrinsicID == Intrinsic::ceil) { 2349 U.roundToIntegral(APFloat::rmTowardPositive); 2350 return ConstantFP::get(Ty->getContext(), U); 2351 } 2352 2353 if (IntrinsicID == Intrinsic::floor) { 2354 U.roundToIntegral(APFloat::rmTowardNegative); 2355 return ConstantFP::get(Ty->getContext(), U); 2356 } 2357 2358 if (IntrinsicID == Intrinsic::trunc) { 2359 U.roundToIntegral(APFloat::rmTowardZero); 2360 return ConstantFP::get(Ty->getContext(), U); 2361 } 2362 2363 if (IntrinsicID == Intrinsic::fabs) { 2364 U.clearSign(); 2365 return ConstantFP::get(Ty->getContext(), U); 2366 } 2367 2368 if (IntrinsicID == Intrinsic::amdgcn_fract) { 2369 // The v_fract instruction behaves like the OpenCL spec, which defines 2370 // fract(x) as fmin(x - floor(x), 0x1.fffffep-1f): "The min() operator is 2371 // there to prevent fract(-small) from returning 1.0. It returns the 2372 // largest positive floating-point number less than 1.0." 2373 APFloat FloorU(U); 2374 FloorU.roundToIntegral(APFloat::rmTowardNegative); 2375 APFloat FractU(U - FloorU); 2376 APFloat AlmostOne(U.getSemantics(), 1); 2377 AlmostOne.next(/*nextDown*/ true); 2378 return ConstantFP::get(Ty->getContext(), minimum(FractU, AlmostOne)); 2379 } 2380 2381 // Rounding operations (floor, trunc, ceil, round and nearbyint) do not 2382 // raise FP exceptions, unless the argument is signaling NaN. 2383 2384 std::optional<APFloat::roundingMode> RM; 2385 switch (IntrinsicID) { 2386 default: 2387 break; 2388 case Intrinsic::experimental_constrained_nearbyint: 2389 case Intrinsic::experimental_constrained_rint: { 2390 auto CI = cast<ConstrainedFPIntrinsic>(Call); 2391 RM = CI->getRoundingMode(); 2392 if (!RM || *RM == RoundingMode::Dynamic) 2393 return nullptr; 2394 break; 2395 } 2396 case Intrinsic::experimental_constrained_round: 2397 RM = APFloat::rmNearestTiesToAway; 2398 break; 2399 case Intrinsic::experimental_constrained_ceil: 2400 RM = APFloat::rmTowardPositive; 2401 break; 2402 case Intrinsic::experimental_constrained_floor: 2403 RM = APFloat::rmTowardNegative; 2404 break; 2405 case Intrinsic::experimental_constrained_trunc: 2406 RM = APFloat::rmTowardZero; 2407 break; 2408 } 2409 if (RM) { 2410 auto CI = cast<ConstrainedFPIntrinsic>(Call); 2411 if (U.isFinite()) { 2412 APFloat::opStatus St = U.roundToIntegral(*RM); 2413 if (IntrinsicID == Intrinsic::experimental_constrained_rint && 2414 St == APFloat::opInexact) { 2415 std::optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior(); 2416 if (EB == fp::ebStrict) 2417 return nullptr; 2418 } 2419 } else if (U.isSignaling()) { 2420 std::optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior(); 2421 if (EB && *EB != fp::ebIgnore) 2422 return nullptr; 2423 U = APFloat::getQNaN(U.getSemantics()); 2424 } 2425 return ConstantFP::get(Ty->getContext(), U); 2426 } 2427 2428 // NVVM float/double to signed/unsigned int32/int64 conversions: 2429 switch (IntrinsicID) { 2430 // f2i 2431 case Intrinsic::nvvm_f2i_rm: 2432 case Intrinsic::nvvm_f2i_rn: 2433 case Intrinsic::nvvm_f2i_rp: 2434 case Intrinsic::nvvm_f2i_rz: 2435 case Intrinsic::nvvm_f2i_rm_ftz: 2436 case Intrinsic::nvvm_f2i_rn_ftz: 2437 case Intrinsic::nvvm_f2i_rp_ftz: 2438 case Intrinsic::nvvm_f2i_rz_ftz: 2439 // f2ui 2440 case Intrinsic::nvvm_f2ui_rm: 2441 case Intrinsic::nvvm_f2ui_rn: 2442 case Intrinsic::nvvm_f2ui_rp: 2443 case Intrinsic::nvvm_f2ui_rz: 2444 case Intrinsic::nvvm_f2ui_rm_ftz: 2445 case Intrinsic::nvvm_f2ui_rn_ftz: 2446 case Intrinsic::nvvm_f2ui_rp_ftz: 2447 case Intrinsic::nvvm_f2ui_rz_ftz: 2448 // d2i 2449 case Intrinsic::nvvm_d2i_rm: 2450 case Intrinsic::nvvm_d2i_rn: 2451 case Intrinsic::nvvm_d2i_rp: 2452 case Intrinsic::nvvm_d2i_rz: 2453 // d2ui 2454 case Intrinsic::nvvm_d2ui_rm: 2455 case Intrinsic::nvvm_d2ui_rn: 2456 case Intrinsic::nvvm_d2ui_rp: 2457 case Intrinsic::nvvm_d2ui_rz: 2458 // f2ll 2459 case Intrinsic::nvvm_f2ll_rm: 2460 case Intrinsic::nvvm_f2ll_rn: 2461 case Intrinsic::nvvm_f2ll_rp: 2462 case Intrinsic::nvvm_f2ll_rz: 2463 case Intrinsic::nvvm_f2ll_rm_ftz: 2464 case Intrinsic::nvvm_f2ll_rn_ftz: 2465 case Intrinsic::nvvm_f2ll_rp_ftz: 2466 case Intrinsic::nvvm_f2ll_rz_ftz: 2467 // f2ull 2468 case Intrinsic::nvvm_f2ull_rm: 2469 case Intrinsic::nvvm_f2ull_rn: 2470 case Intrinsic::nvvm_f2ull_rp: 2471 case Intrinsic::nvvm_f2ull_rz: 2472 case Intrinsic::nvvm_f2ull_rm_ftz: 2473 case Intrinsic::nvvm_f2ull_rn_ftz: 2474 case Intrinsic::nvvm_f2ull_rp_ftz: 2475 case Intrinsic::nvvm_f2ull_rz_ftz: 2476 // d2ll 2477 case Intrinsic::nvvm_d2ll_rm: 2478 case Intrinsic::nvvm_d2ll_rn: 2479 case Intrinsic::nvvm_d2ll_rp: 2480 case Intrinsic::nvvm_d2ll_rz: 2481 // d2ull 2482 case Intrinsic::nvvm_d2ull_rm: 2483 case Intrinsic::nvvm_d2ull_rn: 2484 case Intrinsic::nvvm_d2ull_rp: 2485 case Intrinsic::nvvm_d2ull_rz: { 2486 // In float-to-integer conversion, NaN inputs are converted to 0. 2487 if (U.isNaN()) 2488 return ConstantInt::get(Ty, 0); 2489 2490 APFloat::roundingMode RMode = 2491 nvvm::GetFPToIntegerRoundingMode(IntrinsicID); 2492 bool IsFTZ = nvvm::FPToIntegerIntrinsicShouldFTZ(IntrinsicID); 2493 bool IsSigned = nvvm::FPToIntegerIntrinsicResultIsSigned(IntrinsicID); 2494 2495 APSInt ResInt(Ty->getIntegerBitWidth(), !IsSigned); 2496 auto FloatToRound = IsFTZ ? FTZPreserveSign(U) : U; 2497 2498 bool IsExact = false; 2499 APFloat::opStatus Status = 2500 FloatToRound.convertToInteger(ResInt, RMode, &IsExact); 2501 2502 if (Status != APFloat::opInvalidOp) 2503 return ConstantInt::get(Ty, ResInt); 2504 return nullptr; 2505 } 2506 } 2507 2508 /// We only fold functions with finite arguments. Folding NaN and inf is 2509 /// likely to be aborted with an exception anyway, and some host libms 2510 /// have known errors raising exceptions. 2511 if (!U.isFinite()) 2512 return nullptr; 2513 2514 /// Currently APFloat versions of these functions do not exist, so we use 2515 /// the host native double versions. Float versions are not called 2516 /// directly but for all these it is true (float)(f((double)arg)) == 2517 /// f(arg). Long double not supported yet. 2518 const APFloat &APF = Op->getValueAPF(); 2519 2520 switch (IntrinsicID) { 2521 default: break; 2522 case Intrinsic::log: 2523 return ConstantFoldFP(log, APF, Ty); 2524 case Intrinsic::log2: 2525 // TODO: What about hosts that lack a C99 library? 2526 return ConstantFoldFP(log2, APF, Ty); 2527 case Intrinsic::log10: 2528 // TODO: What about hosts that lack a C99 library? 2529 return ConstantFoldFP(log10, APF, Ty); 2530 case Intrinsic::exp: 2531 return ConstantFoldFP(exp, APF, Ty); 2532 case Intrinsic::exp2: 2533 // Fold exp2(x) as pow(2, x), in case the host lacks a C99 library. 2534 return ConstantFoldBinaryFP(pow, APFloat(2.0), APF, Ty); 2535 case Intrinsic::exp10: 2536 // Fold exp10(x) as pow(10, x), in case the host lacks a C99 library. 2537 return ConstantFoldBinaryFP(pow, APFloat(10.0), APF, Ty); 2538 case Intrinsic::sin: 2539 return ConstantFoldFP(sin, APF, Ty); 2540 case Intrinsic::cos: 2541 return ConstantFoldFP(cos, APF, Ty); 2542 case Intrinsic::sinh: 2543 return ConstantFoldFP(sinh, APF, Ty); 2544 case Intrinsic::cosh: 2545 return ConstantFoldFP(cosh, APF, Ty); 2546 case Intrinsic::atan: 2547 // Implement optional behavior from C's Annex F for +/-0.0. 2548 if (U.isZero()) 2549 return ConstantFP::get(Ty->getContext(), U); 2550 return ConstantFoldFP(atan, APF, Ty); 2551 case Intrinsic::sqrt: 2552 return ConstantFoldFP(sqrt, APF, Ty); 2553 case Intrinsic::amdgcn_cos: 2554 case Intrinsic::amdgcn_sin: { 2555 double V = getValueAsDouble(Op); 2556 if (V < -256.0 || V > 256.0) 2557 // The gfx8 and gfx9 architectures handle arguments outside the range 2558 // [-256, 256] differently. This should be a rare case so bail out 2559 // rather than trying to handle the difference. 2560 return nullptr; 2561 bool IsCos = IntrinsicID == Intrinsic::amdgcn_cos; 2562 double V4 = V * 4.0; 2563 if (V4 == floor(V4)) { 2564 // Force exact results for quarter-integer inputs. 2565 const double SinVals[4] = { 0.0, 1.0, 0.0, -1.0 }; 2566 V = SinVals[((int)V4 + (IsCos ? 1 : 0)) & 3]; 2567 } else { 2568 if (IsCos) 2569 V = cos(V * 2.0 * numbers::pi); 2570 else 2571 V = sin(V * 2.0 * numbers::pi); 2572 } 2573 return GetConstantFoldFPValue(V, Ty); 2574 } 2575 } 2576 2577 if (!TLI) 2578 return nullptr; 2579 2580 LibFunc Func = NotLibFunc; 2581 if (!TLI->getLibFunc(Name, Func)) 2582 return nullptr; 2583 2584 switch (Func) { 2585 default: 2586 break; 2587 case LibFunc_acos: 2588 case LibFunc_acosf: 2589 case LibFunc_acos_finite: 2590 case LibFunc_acosf_finite: 2591 if (TLI->has(Func)) 2592 return ConstantFoldFP(acos, APF, Ty); 2593 break; 2594 case LibFunc_asin: 2595 case LibFunc_asinf: 2596 case LibFunc_asin_finite: 2597 case LibFunc_asinf_finite: 2598 if (TLI->has(Func)) 2599 return ConstantFoldFP(asin, APF, Ty); 2600 break; 2601 case LibFunc_atan: 2602 case LibFunc_atanf: 2603 // Implement optional behavior from C's Annex F for +/-0.0. 2604 if (U.isZero()) 2605 return ConstantFP::get(Ty->getContext(), U); 2606 if (TLI->has(Func)) 2607 return ConstantFoldFP(atan, APF, Ty); 2608 break; 2609 case LibFunc_ceil: 2610 case LibFunc_ceilf: 2611 if (TLI->has(Func)) { 2612 U.roundToIntegral(APFloat::rmTowardPositive); 2613 return ConstantFP::get(Ty->getContext(), U); 2614 } 2615 break; 2616 case LibFunc_cos: 2617 case LibFunc_cosf: 2618 if (TLI->has(Func)) 2619 return ConstantFoldFP(cos, APF, Ty); 2620 break; 2621 case LibFunc_cosh: 2622 case LibFunc_coshf: 2623 case LibFunc_cosh_finite: 2624 case LibFunc_coshf_finite: 2625 if (TLI->has(Func)) 2626 return ConstantFoldFP(cosh, APF, Ty); 2627 break; 2628 case LibFunc_exp: 2629 case LibFunc_expf: 2630 case LibFunc_exp_finite: 2631 case LibFunc_expf_finite: 2632 if (TLI->has(Func)) 2633 return ConstantFoldFP(exp, APF, Ty); 2634 break; 2635 case LibFunc_exp2: 2636 case LibFunc_exp2f: 2637 case LibFunc_exp2_finite: 2638 case LibFunc_exp2f_finite: 2639 if (TLI->has(Func)) 2640 // Fold exp2(x) as pow(2, x), in case the host lacks a C99 library. 2641 return ConstantFoldBinaryFP(pow, APFloat(2.0), APF, Ty); 2642 break; 2643 case LibFunc_fabs: 2644 case LibFunc_fabsf: 2645 if (TLI->has(Func)) { 2646 U.clearSign(); 2647 return ConstantFP::get(Ty->getContext(), U); 2648 } 2649 break; 2650 case LibFunc_floor: 2651 case LibFunc_floorf: 2652 if (TLI->has(Func)) { 2653 U.roundToIntegral(APFloat::rmTowardNegative); 2654 return ConstantFP::get(Ty->getContext(), U); 2655 } 2656 break; 2657 case LibFunc_log: 2658 case LibFunc_logf: 2659 case LibFunc_log_finite: 2660 case LibFunc_logf_finite: 2661 if (!APF.isNegative() && !APF.isZero() && TLI->has(Func)) 2662 return ConstantFoldFP(log, APF, Ty); 2663 break; 2664 case LibFunc_log2: 2665 case LibFunc_log2f: 2666 case LibFunc_log2_finite: 2667 case LibFunc_log2f_finite: 2668 if (!APF.isNegative() && !APF.isZero() && TLI->has(Func)) 2669 // TODO: What about hosts that lack a C99 library? 2670 return ConstantFoldFP(log2, APF, Ty); 2671 break; 2672 case LibFunc_log10: 2673 case LibFunc_log10f: 2674 case LibFunc_log10_finite: 2675 case LibFunc_log10f_finite: 2676 if (!APF.isNegative() && !APF.isZero() && TLI->has(Func)) 2677 // TODO: What about hosts that lack a C99 library? 2678 return ConstantFoldFP(log10, APF, Ty); 2679 break; 2680 case LibFunc_ilogb: 2681 case LibFunc_ilogbf: 2682 if (!APF.isZero() && TLI->has(Func)) 2683 return ConstantInt::get(Ty, ilogb(APF), true); 2684 break; 2685 case LibFunc_logb: 2686 case LibFunc_logbf: 2687 if (!APF.isZero() && TLI->has(Func)) 2688 return ConstantFoldFP(logb, APF, Ty); 2689 break; 2690 case LibFunc_log1p: 2691 case LibFunc_log1pf: 2692 // Implement optional behavior from C's Annex F for +/-0.0. 2693 if (U.isZero()) 2694 return ConstantFP::get(Ty->getContext(), U); 2695 if (APF > APFloat::getOne(APF.getSemantics(), true) && TLI->has(Func)) 2696 return ConstantFoldFP(log1p, APF, Ty); 2697 break; 2698 case LibFunc_logl: 2699 return nullptr; 2700 case LibFunc_erf: 2701 case LibFunc_erff: 2702 if (TLI->has(Func)) 2703 return ConstantFoldFP(erf, APF, Ty); 2704 break; 2705 case LibFunc_nearbyint: 2706 case LibFunc_nearbyintf: 2707 case LibFunc_rint: 2708 case LibFunc_rintf: 2709 if (TLI->has(Func)) { 2710 U.roundToIntegral(APFloat::rmNearestTiesToEven); 2711 return ConstantFP::get(Ty->getContext(), U); 2712 } 2713 break; 2714 case LibFunc_round: 2715 case LibFunc_roundf: 2716 if (TLI->has(Func)) { 2717 U.roundToIntegral(APFloat::rmNearestTiesToAway); 2718 return ConstantFP::get(Ty->getContext(), U); 2719 } 2720 break; 2721 case LibFunc_sin: 2722 case LibFunc_sinf: 2723 if (TLI->has(Func)) 2724 return ConstantFoldFP(sin, APF, Ty); 2725 break; 2726 case LibFunc_sinh: 2727 case LibFunc_sinhf: 2728 case LibFunc_sinh_finite: 2729 case LibFunc_sinhf_finite: 2730 if (TLI->has(Func)) 2731 return ConstantFoldFP(sinh, APF, Ty); 2732 break; 2733 case LibFunc_sqrt: 2734 case LibFunc_sqrtf: 2735 if (!APF.isNegative() && TLI->has(Func)) 2736 return ConstantFoldFP(sqrt, APF, Ty); 2737 break; 2738 case LibFunc_tan: 2739 case LibFunc_tanf: 2740 if (TLI->has(Func)) 2741 return ConstantFoldFP(tan, APF, Ty); 2742 break; 2743 case LibFunc_tanh: 2744 case LibFunc_tanhf: 2745 if (TLI->has(Func)) 2746 return ConstantFoldFP(tanh, APF, Ty); 2747 break; 2748 case LibFunc_trunc: 2749 case LibFunc_truncf: 2750 if (TLI->has(Func)) { 2751 U.roundToIntegral(APFloat::rmTowardZero); 2752 return ConstantFP::get(Ty->getContext(), U); 2753 } 2754 break; 2755 } 2756 return nullptr; 2757 } 2758 2759 if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) { 2760 switch (IntrinsicID) { 2761 case Intrinsic::bswap: 2762 return ConstantInt::get(Ty->getContext(), Op->getValue().byteSwap()); 2763 case Intrinsic::ctpop: 2764 return ConstantInt::get(Ty, Op->getValue().popcount()); 2765 case Intrinsic::bitreverse: 2766 return ConstantInt::get(Ty->getContext(), Op->getValue().reverseBits()); 2767 case Intrinsic::convert_from_fp16: { 2768 APFloat Val(APFloat::IEEEhalf(), Op->getValue()); 2769 2770 bool lost = false; 2771 APFloat::opStatus status = Val.convert( 2772 Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &lost); 2773 2774 // Conversion is always precise. 2775 (void)status; 2776 assert(status != APFloat::opInexact && !lost && 2777 "Precision lost during fp16 constfolding"); 2778 2779 return ConstantFP::get(Ty->getContext(), Val); 2780 } 2781 2782 case Intrinsic::amdgcn_s_wqm: { 2783 uint64_t Val = Op->getZExtValue(); 2784 Val |= (Val & 0x5555555555555555ULL) << 1 | 2785 ((Val >> 1) & 0x5555555555555555ULL); 2786 Val |= (Val & 0x3333333333333333ULL) << 2 | 2787 ((Val >> 2) & 0x3333333333333333ULL); 2788 return ConstantInt::get(Ty, Val); 2789 } 2790 2791 case Intrinsic::amdgcn_s_quadmask: { 2792 uint64_t Val = Op->getZExtValue(); 2793 uint64_t QuadMask = 0; 2794 for (unsigned I = 0; I < Op->getBitWidth() / 4; ++I, Val >>= 4) { 2795 if (!(Val & 0xF)) 2796 continue; 2797 2798 QuadMask |= (1ULL << I); 2799 } 2800 return ConstantInt::get(Ty, QuadMask); 2801 } 2802 2803 case Intrinsic::amdgcn_s_bitreplicate: { 2804 uint64_t Val = Op->getZExtValue(); 2805 Val = (Val & 0x000000000000FFFFULL) | (Val & 0x00000000FFFF0000ULL) << 16; 2806 Val = (Val & 0x000000FF000000FFULL) | (Val & 0x0000FF000000FF00ULL) << 8; 2807 Val = (Val & 0x000F000F000F000FULL) | (Val & 0x00F000F000F000F0ULL) << 4; 2808 Val = (Val & 0x0303030303030303ULL) | (Val & 0x0C0C0C0C0C0C0C0CULL) << 2; 2809 Val = (Val & 0x1111111111111111ULL) | (Val & 0x2222222222222222ULL) << 1; 2810 Val = Val | Val << 1; 2811 return ConstantInt::get(Ty, Val); 2812 } 2813 2814 default: 2815 return nullptr; 2816 } 2817 } 2818 2819 switch (IntrinsicID) { 2820 default: break; 2821 case Intrinsic::vector_reduce_add: 2822 case Intrinsic::vector_reduce_mul: 2823 case Intrinsic::vector_reduce_and: 2824 case Intrinsic::vector_reduce_or: 2825 case Intrinsic::vector_reduce_xor: 2826 case Intrinsic::vector_reduce_smin: 2827 case Intrinsic::vector_reduce_smax: 2828 case Intrinsic::vector_reduce_umin: 2829 case Intrinsic::vector_reduce_umax: 2830 if (Constant *C = constantFoldVectorReduce(IntrinsicID, Operands[0])) 2831 return C; 2832 break; 2833 } 2834 2835 // Support ConstantVector in case we have an Undef in the top. 2836 if (isa<ConstantVector>(Operands[0]) || 2837 isa<ConstantDataVector>(Operands[0]) || 2838 isa<ConstantAggregateZero>(Operands[0])) { 2839 auto *Op = cast<Constant>(Operands[0]); 2840 switch (IntrinsicID) { 2841 default: break; 2842 case Intrinsic::x86_sse_cvtss2si: 2843 case Intrinsic::x86_sse_cvtss2si64: 2844 case Intrinsic::x86_sse2_cvtsd2si: 2845 case Intrinsic::x86_sse2_cvtsd2si64: 2846 if (ConstantFP *FPOp = 2847 dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U))) 2848 return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), 2849 /*roundTowardZero=*/false, Ty, 2850 /*IsSigned*/true); 2851 break; 2852 case Intrinsic::x86_sse_cvttss2si: 2853 case Intrinsic::x86_sse_cvttss2si64: 2854 case Intrinsic::x86_sse2_cvttsd2si: 2855 case Intrinsic::x86_sse2_cvttsd2si64: 2856 if (ConstantFP *FPOp = 2857 dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U))) 2858 return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), 2859 /*roundTowardZero=*/true, Ty, 2860 /*IsSigned*/true); 2861 break; 2862 2863 case Intrinsic::wasm_anytrue: 2864 return Op->isZeroValue() ? ConstantInt::get(Ty, 0) 2865 : ConstantInt::get(Ty, 1); 2866 2867 case Intrinsic::wasm_alltrue: 2868 // Check each element individually 2869 unsigned E = cast<FixedVectorType>(Op->getType())->getNumElements(); 2870 for (unsigned I = 0; I != E; ++I) 2871 if (Constant *Elt = Op->getAggregateElement(I)) 2872 if (Elt->isZeroValue()) 2873 return ConstantInt::get(Ty, 0); 2874 2875 return ConstantInt::get(Ty, 1); 2876 } 2877 } 2878 2879 return nullptr; 2880 } 2881 2882 static Constant *evaluateCompare(const APFloat &Op1, const APFloat &Op2, 2883 const ConstrainedFPIntrinsic *Call) { 2884 APFloat::opStatus St = APFloat::opOK; 2885 auto *FCmp = cast<ConstrainedFPCmpIntrinsic>(Call); 2886 FCmpInst::Predicate Cond = FCmp->getPredicate(); 2887 if (FCmp->isSignaling()) { 2888 if (Op1.isNaN() || Op2.isNaN()) 2889 St = APFloat::opInvalidOp; 2890 } else { 2891 if (Op1.isSignaling() || Op2.isSignaling()) 2892 St = APFloat::opInvalidOp; 2893 } 2894 bool Result = FCmpInst::compare(Op1, Op2, Cond); 2895 if (mayFoldConstrained(const_cast<ConstrainedFPCmpIntrinsic *>(FCmp), St)) 2896 return ConstantInt::get(Call->getType()->getScalarType(), Result); 2897 return nullptr; 2898 } 2899 2900 static Constant *ConstantFoldLibCall2(StringRef Name, Type *Ty, 2901 ArrayRef<Constant *> Operands, 2902 const TargetLibraryInfo *TLI) { 2903 if (!TLI) 2904 return nullptr; 2905 2906 LibFunc Func = NotLibFunc; 2907 if (!TLI->getLibFunc(Name, Func)) 2908 return nullptr; 2909 2910 const auto *Op1 = dyn_cast<ConstantFP>(Operands[0]); 2911 if (!Op1) 2912 return nullptr; 2913 2914 const auto *Op2 = dyn_cast<ConstantFP>(Operands[1]); 2915 if (!Op2) 2916 return nullptr; 2917 2918 const APFloat &Op1V = Op1->getValueAPF(); 2919 const APFloat &Op2V = Op2->getValueAPF(); 2920 2921 switch (Func) { 2922 default: 2923 break; 2924 case LibFunc_pow: 2925 case LibFunc_powf: 2926 case LibFunc_pow_finite: 2927 case LibFunc_powf_finite: 2928 if (TLI->has(Func)) 2929 return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); 2930 break; 2931 case LibFunc_fmod: 2932 case LibFunc_fmodf: 2933 if (TLI->has(Func)) { 2934 APFloat V = Op1->getValueAPF(); 2935 if (APFloat::opStatus::opOK == V.mod(Op2->getValueAPF())) 2936 return ConstantFP::get(Ty->getContext(), V); 2937 } 2938 break; 2939 case LibFunc_remainder: 2940 case LibFunc_remainderf: 2941 if (TLI->has(Func)) { 2942 APFloat V = Op1->getValueAPF(); 2943 if (APFloat::opStatus::opOK == V.remainder(Op2->getValueAPF())) 2944 return ConstantFP::get(Ty->getContext(), V); 2945 } 2946 break; 2947 case LibFunc_atan2: 2948 case LibFunc_atan2f: 2949 // atan2(+/-0.0, +/-0.0) is known to raise an exception on some libm 2950 // (Solaris), so we do not assume a known result for that. 2951 if (Op1V.isZero() && Op2V.isZero()) 2952 return nullptr; 2953 [[fallthrough]]; 2954 case LibFunc_atan2_finite: 2955 case LibFunc_atan2f_finite: 2956 if (TLI->has(Func)) 2957 return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty); 2958 break; 2959 } 2960 2961 return nullptr; 2962 } 2963 2964 static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty, 2965 ArrayRef<Constant *> Operands, 2966 const CallBase *Call) { 2967 assert(Operands.size() == 2 && "Wrong number of operands."); 2968 2969 if (Ty->isFloatingPointTy()) { 2970 // TODO: We should have undef handling for all of the FP intrinsics that 2971 // are attempted to be folded in this function. 2972 bool IsOp0Undef = isa<UndefValue>(Operands[0]); 2973 bool IsOp1Undef = isa<UndefValue>(Operands[1]); 2974 switch (IntrinsicID) { 2975 case Intrinsic::maxnum: 2976 case Intrinsic::minnum: 2977 case Intrinsic::maximum: 2978 case Intrinsic::minimum: 2979 case Intrinsic::maximumnum: 2980 case Intrinsic::minimumnum: 2981 case Intrinsic::nvvm_fmax_d: 2982 case Intrinsic::nvvm_fmin_d: 2983 // If one argument is undef, return the other argument. 2984 if (IsOp0Undef) 2985 return Operands[1]; 2986 if (IsOp1Undef) 2987 return Operands[0]; 2988 break; 2989 2990 case Intrinsic::nvvm_fmax_f: 2991 case Intrinsic::nvvm_fmax_ftz_f: 2992 case Intrinsic::nvvm_fmax_ftz_nan_f: 2993 case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f: 2994 case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f: 2995 case Intrinsic::nvvm_fmax_nan_f: 2996 case Intrinsic::nvvm_fmax_nan_xorsign_abs_f: 2997 case Intrinsic::nvvm_fmax_xorsign_abs_f: 2998 2999 case Intrinsic::nvvm_fmin_f: 3000 case Intrinsic::nvvm_fmin_ftz_f: 3001 case Intrinsic::nvvm_fmin_ftz_nan_f: 3002 case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f: 3003 case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f: 3004 case Intrinsic::nvvm_fmin_nan_f: 3005 case Intrinsic::nvvm_fmin_nan_xorsign_abs_f: 3006 case Intrinsic::nvvm_fmin_xorsign_abs_f: 3007 // If one arg is undef, the other arg can be returned only if it is 3008 // constant, as we may need to flush it to sign-preserving zero or 3009 // canonicalize the NaN. 3010 if (!IsOp0Undef && !IsOp1Undef) 3011 break; 3012 if (auto *Op = dyn_cast<ConstantFP>(Operands[IsOp0Undef ? 1 : 0])) { 3013 if (Op->isNaN()) { 3014 APInt NVCanonicalNaN(32, 0x7fffffff); 3015 return ConstantFP::get( 3016 Ty, APFloat(Ty->getFltSemantics(), NVCanonicalNaN)); 3017 } 3018 if (nvvm::FMinFMaxShouldFTZ(IntrinsicID)) 3019 return ConstantFP::get(Ty, FTZPreserveSign(Op->getValueAPF())); 3020 else 3021 return Op; 3022 } 3023 break; 3024 } 3025 } 3026 3027 if (const auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) { 3028 const APFloat &Op1V = Op1->getValueAPF(); 3029 3030 if (const auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) { 3031 if (Op2->getType() != Op1->getType()) 3032 return nullptr; 3033 const APFloat &Op2V = Op2->getValueAPF(); 3034 3035 if (const auto *ConstrIntr = 3036 dyn_cast_if_present<ConstrainedFPIntrinsic>(Call)) { 3037 RoundingMode RM = getEvaluationRoundingMode(ConstrIntr); 3038 APFloat Res = Op1V; 3039 APFloat::opStatus St; 3040 switch (IntrinsicID) { 3041 default: 3042 return nullptr; 3043 case Intrinsic::experimental_constrained_fadd: 3044 St = Res.add(Op2V, RM); 3045 break; 3046 case Intrinsic::experimental_constrained_fsub: 3047 St = Res.subtract(Op2V, RM); 3048 break; 3049 case Intrinsic::experimental_constrained_fmul: 3050 St = Res.multiply(Op2V, RM); 3051 break; 3052 case Intrinsic::experimental_constrained_fdiv: 3053 St = Res.divide(Op2V, RM); 3054 break; 3055 case Intrinsic::experimental_constrained_frem: 3056 St = Res.mod(Op2V); 3057 break; 3058 case Intrinsic::experimental_constrained_fcmp: 3059 case Intrinsic::experimental_constrained_fcmps: 3060 return evaluateCompare(Op1V, Op2V, ConstrIntr); 3061 } 3062 if (mayFoldConstrained(const_cast<ConstrainedFPIntrinsic *>(ConstrIntr), 3063 St)) 3064 return ConstantFP::get(Ty->getContext(), Res); 3065 return nullptr; 3066 } 3067 3068 switch (IntrinsicID) { 3069 default: 3070 break; 3071 case Intrinsic::copysign: 3072 return ConstantFP::get(Ty->getContext(), APFloat::copySign(Op1V, Op2V)); 3073 case Intrinsic::minnum: 3074 return ConstantFP::get(Ty->getContext(), minnum(Op1V, Op2V)); 3075 case Intrinsic::maxnum: 3076 return ConstantFP::get(Ty->getContext(), maxnum(Op1V, Op2V)); 3077 case Intrinsic::minimum: 3078 return ConstantFP::get(Ty->getContext(), minimum(Op1V, Op2V)); 3079 case Intrinsic::maximum: 3080 return ConstantFP::get(Ty->getContext(), maximum(Op1V, Op2V)); 3081 case Intrinsic::minimumnum: 3082 return ConstantFP::get(Ty->getContext(), minimumnum(Op1V, Op2V)); 3083 case Intrinsic::maximumnum: 3084 return ConstantFP::get(Ty->getContext(), maximumnum(Op1V, Op2V)); 3085 3086 case Intrinsic::nvvm_fmax_d: 3087 case Intrinsic::nvvm_fmax_f: 3088 case Intrinsic::nvvm_fmax_ftz_f: 3089 case Intrinsic::nvvm_fmax_ftz_nan_f: 3090 case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f: 3091 case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f: 3092 case Intrinsic::nvvm_fmax_nan_f: 3093 case Intrinsic::nvvm_fmax_nan_xorsign_abs_f: 3094 case Intrinsic::nvvm_fmax_xorsign_abs_f: 3095 3096 case Intrinsic::nvvm_fmin_d: 3097 case Intrinsic::nvvm_fmin_f: 3098 case Intrinsic::nvvm_fmin_ftz_f: 3099 case Intrinsic::nvvm_fmin_ftz_nan_f: 3100 case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f: 3101 case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f: 3102 case Intrinsic::nvvm_fmin_nan_f: 3103 case Intrinsic::nvvm_fmin_nan_xorsign_abs_f: 3104 case Intrinsic::nvvm_fmin_xorsign_abs_f: { 3105 3106 bool ShouldCanonicalizeNaNs = !(IntrinsicID == Intrinsic::nvvm_fmax_d || 3107 IntrinsicID == Intrinsic::nvvm_fmin_d); 3108 bool IsFTZ = nvvm::FMinFMaxShouldFTZ(IntrinsicID); 3109 bool IsNaNPropagating = nvvm::FMinFMaxPropagatesNaNs(IntrinsicID); 3110 bool IsXorSignAbs = nvvm::FMinFMaxIsXorSignAbs(IntrinsicID); 3111 3112 APFloat A = IsFTZ ? FTZPreserveSign(Op1V) : Op1V; 3113 APFloat B = IsFTZ ? FTZPreserveSign(Op2V) : Op2V; 3114 3115 bool XorSign = false; 3116 if (IsXorSignAbs) { 3117 XorSign = A.isNegative() ^ B.isNegative(); 3118 A = abs(A); 3119 B = abs(B); 3120 } 3121 3122 bool IsFMax = false; 3123 switch (IntrinsicID) { 3124 case Intrinsic::nvvm_fmax_d: 3125 case Intrinsic::nvvm_fmax_f: 3126 case Intrinsic::nvvm_fmax_ftz_f: 3127 case Intrinsic::nvvm_fmax_ftz_nan_f: 3128 case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f: 3129 case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f: 3130 case Intrinsic::nvvm_fmax_nan_f: 3131 case Intrinsic::nvvm_fmax_nan_xorsign_abs_f: 3132 case Intrinsic::nvvm_fmax_xorsign_abs_f: 3133 IsFMax = true; 3134 break; 3135 } 3136 APFloat Res = IsFMax ? maximum(A, B) : minimum(A, B); 3137 3138 if (ShouldCanonicalizeNaNs) { 3139 APFloat NVCanonicalNaN(Res.getSemantics(), APInt(32, 0x7fffffff)); 3140 if (A.isNaN() && B.isNaN()) 3141 return ConstantFP::get(Ty, NVCanonicalNaN); 3142 else if (IsNaNPropagating && (A.isNaN() || B.isNaN())) 3143 return ConstantFP::get(Ty, NVCanonicalNaN); 3144 } 3145 3146 if (A.isNaN() && B.isNaN()) 3147 return Operands[1]; 3148 else if (A.isNaN()) 3149 Res = B; 3150 else if (B.isNaN()) 3151 Res = A; 3152 3153 if (IsXorSignAbs && XorSign != Res.isNegative()) 3154 Res.changeSign(); 3155 3156 return ConstantFP::get(Ty->getContext(), Res); 3157 } 3158 } 3159 3160 if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) 3161 return nullptr; 3162 3163 switch (IntrinsicID) { 3164 default: 3165 break; 3166 case Intrinsic::pow: 3167 return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); 3168 case Intrinsic::amdgcn_fmul_legacy: 3169 // The legacy behaviour is that multiplying +/- 0.0 by anything, even 3170 // NaN or infinity, gives +0.0. 3171 if (Op1V.isZero() || Op2V.isZero()) 3172 return ConstantFP::getZero(Ty); 3173 return ConstantFP::get(Ty->getContext(), Op1V * Op2V); 3174 } 3175 3176 } else if (auto *Op2C = dyn_cast<ConstantInt>(Operands[1])) { 3177 switch (IntrinsicID) { 3178 case Intrinsic::ldexp: { 3179 return ConstantFP::get( 3180 Ty->getContext(), 3181 scalbn(Op1V, Op2C->getSExtValue(), APFloat::rmNearestTiesToEven)); 3182 } 3183 case Intrinsic::is_fpclass: { 3184 FPClassTest Mask = static_cast<FPClassTest>(Op2C->getZExtValue()); 3185 bool Result = 3186 ((Mask & fcSNan) && Op1V.isNaN() && Op1V.isSignaling()) || 3187 ((Mask & fcQNan) && Op1V.isNaN() && !Op1V.isSignaling()) || 3188 ((Mask & fcNegInf) && Op1V.isNegInfinity()) || 3189 ((Mask & fcNegNormal) && Op1V.isNormal() && Op1V.isNegative()) || 3190 ((Mask & fcNegSubnormal) && Op1V.isDenormal() && Op1V.isNegative()) || 3191 ((Mask & fcNegZero) && Op1V.isZero() && Op1V.isNegative()) || 3192 ((Mask & fcPosZero) && Op1V.isZero() && !Op1V.isNegative()) || 3193 ((Mask & fcPosSubnormal) && Op1V.isDenormal() && !Op1V.isNegative()) || 3194 ((Mask & fcPosNormal) && Op1V.isNormal() && !Op1V.isNegative()) || 3195 ((Mask & fcPosInf) && Op1V.isPosInfinity()); 3196 return ConstantInt::get(Ty, Result); 3197 } 3198 case Intrinsic::powi: { 3199 int Exp = static_cast<int>(Op2C->getSExtValue()); 3200 switch (Ty->getTypeID()) { 3201 case Type::HalfTyID: 3202 case Type::FloatTyID: { 3203 APFloat Res(static_cast<float>(std::pow(Op1V.convertToFloat(), Exp))); 3204 if (Ty->isHalfTy()) { 3205 bool Unused; 3206 Res.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, 3207 &Unused); 3208 } 3209 return ConstantFP::get(Ty->getContext(), Res); 3210 } 3211 case Type::DoubleTyID: 3212 return ConstantFP::get(Ty, std::pow(Op1V.convertToDouble(), Exp)); 3213 default: 3214 return nullptr; 3215 } 3216 } 3217 default: 3218 break; 3219 } 3220 } 3221 return nullptr; 3222 } 3223 3224 if (Operands[0]->getType()->isIntegerTy() && 3225 Operands[1]->getType()->isIntegerTy()) { 3226 const APInt *C0, *C1; 3227 if (!getConstIntOrUndef(Operands[0], C0) || 3228 !getConstIntOrUndef(Operands[1], C1)) 3229 return nullptr; 3230 3231 switch (IntrinsicID) { 3232 default: break; 3233 case Intrinsic::smax: 3234 case Intrinsic::smin: 3235 case Intrinsic::umax: 3236 case Intrinsic::umin: 3237 if (!C0 && !C1) 3238 return UndefValue::get(Ty); 3239 if (!C0 || !C1) 3240 return MinMaxIntrinsic::getSaturationPoint(IntrinsicID, Ty); 3241 return ConstantInt::get( 3242 Ty, ICmpInst::compare(*C0, *C1, 3243 MinMaxIntrinsic::getPredicate(IntrinsicID)) 3244 ? *C0 3245 : *C1); 3246 3247 case Intrinsic::scmp: 3248 case Intrinsic::ucmp: 3249 if (!C0 || !C1) 3250 return ConstantInt::get(Ty, 0); 3251 3252 int Res; 3253 if (IntrinsicID == Intrinsic::scmp) 3254 Res = C0->sgt(*C1) ? 1 : C0->slt(*C1) ? -1 : 0; 3255 else 3256 Res = C0->ugt(*C1) ? 1 : C0->ult(*C1) ? -1 : 0; 3257 return ConstantInt::get(Ty, Res, /*IsSigned=*/true); 3258 3259 case Intrinsic::usub_with_overflow: 3260 case Intrinsic::ssub_with_overflow: 3261 // X - undef -> { 0, false } 3262 // undef - X -> { 0, false } 3263 if (!C0 || !C1) 3264 return Constant::getNullValue(Ty); 3265 [[fallthrough]]; 3266 case Intrinsic::uadd_with_overflow: 3267 case Intrinsic::sadd_with_overflow: 3268 // X + undef -> { -1, false } 3269 // undef + x -> { -1, false } 3270 if (!C0 || !C1) { 3271 return ConstantStruct::get( 3272 cast<StructType>(Ty), 3273 {Constant::getAllOnesValue(Ty->getStructElementType(0)), 3274 Constant::getNullValue(Ty->getStructElementType(1))}); 3275 } 3276 [[fallthrough]]; 3277 case Intrinsic::smul_with_overflow: 3278 case Intrinsic::umul_with_overflow: { 3279 // undef * X -> { 0, false } 3280 // X * undef -> { 0, false } 3281 if (!C0 || !C1) 3282 return Constant::getNullValue(Ty); 3283 3284 APInt Res; 3285 bool Overflow; 3286 switch (IntrinsicID) { 3287 default: llvm_unreachable("Invalid case"); 3288 case Intrinsic::sadd_with_overflow: 3289 Res = C0->sadd_ov(*C1, Overflow); 3290 break; 3291 case Intrinsic::uadd_with_overflow: 3292 Res = C0->uadd_ov(*C1, Overflow); 3293 break; 3294 case Intrinsic::ssub_with_overflow: 3295 Res = C0->ssub_ov(*C1, Overflow); 3296 break; 3297 case Intrinsic::usub_with_overflow: 3298 Res = C0->usub_ov(*C1, Overflow); 3299 break; 3300 case Intrinsic::smul_with_overflow: 3301 Res = C0->smul_ov(*C1, Overflow); 3302 break; 3303 case Intrinsic::umul_with_overflow: 3304 Res = C0->umul_ov(*C1, Overflow); 3305 break; 3306 } 3307 Constant *Ops[] = { 3308 ConstantInt::get(Ty->getContext(), Res), 3309 ConstantInt::get(Type::getInt1Ty(Ty->getContext()), Overflow) 3310 }; 3311 return ConstantStruct::get(cast<StructType>(Ty), Ops); 3312 } 3313 case Intrinsic::uadd_sat: 3314 case Intrinsic::sadd_sat: 3315 if (!C0 && !C1) 3316 return UndefValue::get(Ty); 3317 if (!C0 || !C1) 3318 return Constant::getAllOnesValue(Ty); 3319 if (IntrinsicID == Intrinsic::uadd_sat) 3320 return ConstantInt::get(Ty, C0->uadd_sat(*C1)); 3321 else 3322 return ConstantInt::get(Ty, C0->sadd_sat(*C1)); 3323 case Intrinsic::usub_sat: 3324 case Intrinsic::ssub_sat: 3325 if (!C0 && !C1) 3326 return UndefValue::get(Ty); 3327 if (!C0 || !C1) 3328 return Constant::getNullValue(Ty); 3329 if (IntrinsicID == Intrinsic::usub_sat) 3330 return ConstantInt::get(Ty, C0->usub_sat(*C1)); 3331 else 3332 return ConstantInt::get(Ty, C0->ssub_sat(*C1)); 3333 case Intrinsic::cttz: 3334 case Intrinsic::ctlz: 3335 assert(C1 && "Must be constant int"); 3336 3337 // cttz(0, 1) and ctlz(0, 1) are poison. 3338 if (C1->isOne() && (!C0 || C0->isZero())) 3339 return PoisonValue::get(Ty); 3340 if (!C0) 3341 return Constant::getNullValue(Ty); 3342 if (IntrinsicID == Intrinsic::cttz) 3343 return ConstantInt::get(Ty, C0->countr_zero()); 3344 else 3345 return ConstantInt::get(Ty, C0->countl_zero()); 3346 3347 case Intrinsic::abs: 3348 assert(C1 && "Must be constant int"); 3349 assert((C1->isOne() || C1->isZero()) && "Must be 0 or 1"); 3350 3351 // Undef or minimum val operand with poison min --> poison 3352 if (C1->isOne() && (!C0 || C0->isMinSignedValue())) 3353 return PoisonValue::get(Ty); 3354 3355 // Undef operand with no poison min --> 0 (sign bit must be clear) 3356 if (!C0) 3357 return Constant::getNullValue(Ty); 3358 3359 return ConstantInt::get(Ty, C0->abs()); 3360 case Intrinsic::amdgcn_wave_reduce_umin: 3361 case Intrinsic::amdgcn_wave_reduce_umax: 3362 return dyn_cast<Constant>(Operands[0]); 3363 } 3364 3365 return nullptr; 3366 } 3367 3368 // Support ConstantVector in case we have an Undef in the top. 3369 if ((isa<ConstantVector>(Operands[0]) || 3370 isa<ConstantDataVector>(Operands[0])) && 3371 // Check for default rounding mode. 3372 // FIXME: Support other rounding modes? 3373 isa<ConstantInt>(Operands[1]) && 3374 cast<ConstantInt>(Operands[1])->getValue() == 4) { 3375 auto *Op = cast<Constant>(Operands[0]); 3376 switch (IntrinsicID) { 3377 default: break; 3378 case Intrinsic::x86_avx512_vcvtss2si32: 3379 case Intrinsic::x86_avx512_vcvtss2si64: 3380 case Intrinsic::x86_avx512_vcvtsd2si32: 3381 case Intrinsic::x86_avx512_vcvtsd2si64: 3382 if (ConstantFP *FPOp = 3383 dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U))) 3384 return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), 3385 /*roundTowardZero=*/false, Ty, 3386 /*IsSigned*/true); 3387 break; 3388 case Intrinsic::x86_avx512_vcvtss2usi32: 3389 case Intrinsic::x86_avx512_vcvtss2usi64: 3390 case Intrinsic::x86_avx512_vcvtsd2usi32: 3391 case Intrinsic::x86_avx512_vcvtsd2usi64: 3392 if (ConstantFP *FPOp = 3393 dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U))) 3394 return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), 3395 /*roundTowardZero=*/false, Ty, 3396 /*IsSigned*/false); 3397 break; 3398 case Intrinsic::x86_avx512_cvttss2si: 3399 case Intrinsic::x86_avx512_cvttss2si64: 3400 case Intrinsic::x86_avx512_cvttsd2si: 3401 case Intrinsic::x86_avx512_cvttsd2si64: 3402 if (ConstantFP *FPOp = 3403 dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U))) 3404 return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), 3405 /*roundTowardZero=*/true, Ty, 3406 /*IsSigned*/true); 3407 break; 3408 case Intrinsic::x86_avx512_cvttss2usi: 3409 case Intrinsic::x86_avx512_cvttss2usi64: 3410 case Intrinsic::x86_avx512_cvttsd2usi: 3411 case Intrinsic::x86_avx512_cvttsd2usi64: 3412 if (ConstantFP *FPOp = 3413 dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U))) 3414 return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), 3415 /*roundTowardZero=*/true, Ty, 3416 /*IsSigned*/false); 3417 break; 3418 } 3419 } 3420 return nullptr; 3421 } 3422 3423 static APFloat ConstantFoldAMDGCNCubeIntrinsic(Intrinsic::ID IntrinsicID, 3424 const APFloat &S0, 3425 const APFloat &S1, 3426 const APFloat &S2) { 3427 unsigned ID; 3428 const fltSemantics &Sem = S0.getSemantics(); 3429 APFloat MA(Sem), SC(Sem), TC(Sem); 3430 if (abs(S2) >= abs(S0) && abs(S2) >= abs(S1)) { 3431 if (S2.isNegative() && S2.isNonZero() && !S2.isNaN()) { 3432 // S2 < 0 3433 ID = 5; 3434 SC = -S0; 3435 } else { 3436 ID = 4; 3437 SC = S0; 3438 } 3439 MA = S2; 3440 TC = -S1; 3441 } else if (abs(S1) >= abs(S0)) { 3442 if (S1.isNegative() && S1.isNonZero() && !S1.isNaN()) { 3443 // S1 < 0 3444 ID = 3; 3445 TC = -S2; 3446 } else { 3447 ID = 2; 3448 TC = S2; 3449 } 3450 MA = S1; 3451 SC = S0; 3452 } else { 3453 if (S0.isNegative() && S0.isNonZero() && !S0.isNaN()) { 3454 // S0 < 0 3455 ID = 1; 3456 SC = S2; 3457 } else { 3458 ID = 0; 3459 SC = -S2; 3460 } 3461 MA = S0; 3462 TC = -S1; 3463 } 3464 switch (IntrinsicID) { 3465 default: 3466 llvm_unreachable("unhandled amdgcn cube intrinsic"); 3467 case Intrinsic::amdgcn_cubeid: 3468 return APFloat(Sem, ID); 3469 case Intrinsic::amdgcn_cubema: 3470 return MA + MA; 3471 case Intrinsic::amdgcn_cubesc: 3472 return SC; 3473 case Intrinsic::amdgcn_cubetc: 3474 return TC; 3475 } 3476 } 3477 3478 static Constant *ConstantFoldAMDGCNPermIntrinsic(ArrayRef<Constant *> Operands, 3479 Type *Ty) { 3480 const APInt *C0, *C1, *C2; 3481 if (!getConstIntOrUndef(Operands[0], C0) || 3482 !getConstIntOrUndef(Operands[1], C1) || 3483 !getConstIntOrUndef(Operands[2], C2)) 3484 return nullptr; 3485 3486 if (!C2) 3487 return UndefValue::get(Ty); 3488 3489 APInt Val(32, 0); 3490 unsigned NumUndefBytes = 0; 3491 for (unsigned I = 0; I < 32; I += 8) { 3492 unsigned Sel = C2->extractBitsAsZExtValue(8, I); 3493 unsigned B = 0; 3494 3495 if (Sel >= 13) 3496 B = 0xff; 3497 else if (Sel == 12) 3498 B = 0x00; 3499 else { 3500 const APInt *Src = ((Sel & 10) == 10 || (Sel & 12) == 4) ? C0 : C1; 3501 if (!Src) 3502 ++NumUndefBytes; 3503 else if (Sel < 8) 3504 B = Src->extractBitsAsZExtValue(8, (Sel & 3) * 8); 3505 else 3506 B = Src->extractBitsAsZExtValue(1, (Sel & 1) ? 31 : 15) * 0xff; 3507 } 3508 3509 Val.insertBits(B, I, 8); 3510 } 3511 3512 if (NumUndefBytes == 4) 3513 return UndefValue::get(Ty); 3514 3515 return ConstantInt::get(Ty, Val); 3516 } 3517 3518 static Constant *ConstantFoldScalarCall3(StringRef Name, 3519 Intrinsic::ID IntrinsicID, 3520 Type *Ty, 3521 ArrayRef<Constant *> Operands, 3522 const TargetLibraryInfo *TLI, 3523 const CallBase *Call) { 3524 assert(Operands.size() == 3 && "Wrong number of operands."); 3525 3526 if (const auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) { 3527 if (const auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) { 3528 if (const auto *Op3 = dyn_cast<ConstantFP>(Operands[2])) { 3529 const APFloat &C1 = Op1->getValueAPF(); 3530 const APFloat &C2 = Op2->getValueAPF(); 3531 const APFloat &C3 = Op3->getValueAPF(); 3532 3533 if (const auto *ConstrIntr = dyn_cast<ConstrainedFPIntrinsic>(Call)) { 3534 RoundingMode RM = getEvaluationRoundingMode(ConstrIntr); 3535 APFloat Res = C1; 3536 APFloat::opStatus St; 3537 switch (IntrinsicID) { 3538 default: 3539 return nullptr; 3540 case Intrinsic::experimental_constrained_fma: 3541 case Intrinsic::experimental_constrained_fmuladd: 3542 St = Res.fusedMultiplyAdd(C2, C3, RM); 3543 break; 3544 } 3545 if (mayFoldConstrained( 3546 const_cast<ConstrainedFPIntrinsic *>(ConstrIntr), St)) 3547 return ConstantFP::get(Ty->getContext(), Res); 3548 return nullptr; 3549 } 3550 3551 switch (IntrinsicID) { 3552 default: break; 3553 case Intrinsic::amdgcn_fma_legacy: { 3554 // The legacy behaviour is that multiplying +/- 0.0 by anything, even 3555 // NaN or infinity, gives +0.0. 3556 if (C1.isZero() || C2.isZero()) { 3557 // It's tempting to just return C3 here, but that would give the 3558 // wrong result if C3 was -0.0. 3559 return ConstantFP::get(Ty->getContext(), APFloat(0.0f) + C3); 3560 } 3561 [[fallthrough]]; 3562 } 3563 case Intrinsic::fma: 3564 case Intrinsic::fmuladd: { 3565 APFloat V = C1; 3566 V.fusedMultiplyAdd(C2, C3, APFloat::rmNearestTiesToEven); 3567 return ConstantFP::get(Ty->getContext(), V); 3568 } 3569 case Intrinsic::amdgcn_cubeid: 3570 case Intrinsic::amdgcn_cubema: 3571 case Intrinsic::amdgcn_cubesc: 3572 case Intrinsic::amdgcn_cubetc: { 3573 APFloat V = ConstantFoldAMDGCNCubeIntrinsic(IntrinsicID, C1, C2, C3); 3574 return ConstantFP::get(Ty->getContext(), V); 3575 } 3576 } 3577 } 3578 } 3579 } 3580 3581 if (IntrinsicID == Intrinsic::smul_fix || 3582 IntrinsicID == Intrinsic::smul_fix_sat) { 3583 const APInt *C0, *C1; 3584 if (!getConstIntOrUndef(Operands[0], C0) || 3585 !getConstIntOrUndef(Operands[1], C1)) 3586 return nullptr; 3587 3588 // undef * C -> 0 3589 // C * undef -> 0 3590 if (!C0 || !C1) 3591 return Constant::getNullValue(Ty); 3592 3593 // This code performs rounding towards negative infinity in case the result 3594 // cannot be represented exactly for the given scale. Targets that do care 3595 // about rounding should use a target hook for specifying how rounding 3596 // should be done, and provide their own folding to be consistent with 3597 // rounding. This is the same approach as used by 3598 // DAGTypeLegalizer::ExpandIntRes_MULFIX. 3599 unsigned Scale = cast<ConstantInt>(Operands[2])->getZExtValue(); 3600 unsigned Width = C0->getBitWidth(); 3601 assert(Scale < Width && "Illegal scale."); 3602 unsigned ExtendedWidth = Width * 2; 3603 APInt Product = 3604 (C0->sext(ExtendedWidth) * C1->sext(ExtendedWidth)).ashr(Scale); 3605 if (IntrinsicID == Intrinsic::smul_fix_sat) { 3606 APInt Max = APInt::getSignedMaxValue(Width).sext(ExtendedWidth); 3607 APInt Min = APInt::getSignedMinValue(Width).sext(ExtendedWidth); 3608 Product = APIntOps::smin(Product, Max); 3609 Product = APIntOps::smax(Product, Min); 3610 } 3611 return ConstantInt::get(Ty->getContext(), Product.sextOrTrunc(Width)); 3612 } 3613 3614 if (IntrinsicID == Intrinsic::fshl || IntrinsicID == Intrinsic::fshr) { 3615 const APInt *C0, *C1, *C2; 3616 if (!getConstIntOrUndef(Operands[0], C0) || 3617 !getConstIntOrUndef(Operands[1], C1) || 3618 !getConstIntOrUndef(Operands[2], C2)) 3619 return nullptr; 3620 3621 bool IsRight = IntrinsicID == Intrinsic::fshr; 3622 if (!C2) 3623 return Operands[IsRight ? 1 : 0]; 3624 if (!C0 && !C1) 3625 return UndefValue::get(Ty); 3626 3627 // The shift amount is interpreted as modulo the bitwidth. If the shift 3628 // amount is effectively 0, avoid UB due to oversized inverse shift below. 3629 unsigned BitWidth = C2->getBitWidth(); 3630 unsigned ShAmt = C2->urem(BitWidth); 3631 if (!ShAmt) 3632 return Operands[IsRight ? 1 : 0]; 3633 3634 // (C0 << ShlAmt) | (C1 >> LshrAmt) 3635 unsigned LshrAmt = IsRight ? ShAmt : BitWidth - ShAmt; 3636 unsigned ShlAmt = !IsRight ? ShAmt : BitWidth - ShAmt; 3637 if (!C0) 3638 return ConstantInt::get(Ty, C1->lshr(LshrAmt)); 3639 if (!C1) 3640 return ConstantInt::get(Ty, C0->shl(ShlAmt)); 3641 return ConstantInt::get(Ty, C0->shl(ShlAmt) | C1->lshr(LshrAmt)); 3642 } 3643 3644 if (IntrinsicID == Intrinsic::amdgcn_perm) 3645 return ConstantFoldAMDGCNPermIntrinsic(Operands, Ty); 3646 3647 return nullptr; 3648 } 3649 3650 static Constant *ConstantFoldScalarCall(StringRef Name, 3651 Intrinsic::ID IntrinsicID, 3652 Type *Ty, 3653 ArrayRef<Constant *> Operands, 3654 const TargetLibraryInfo *TLI, 3655 const CallBase *Call) { 3656 if (IntrinsicID != Intrinsic::not_intrinsic && 3657 any_of(Operands, IsaPred<PoisonValue>) && 3658 intrinsicPropagatesPoison(IntrinsicID)) 3659 return PoisonValue::get(Ty); 3660 3661 if (Operands.size() == 1) 3662 return ConstantFoldScalarCall1(Name, IntrinsicID, Ty, Operands, TLI, Call); 3663 3664 if (Operands.size() == 2) { 3665 if (Constant *FoldedLibCall = 3666 ConstantFoldLibCall2(Name, Ty, Operands, TLI)) { 3667 return FoldedLibCall; 3668 } 3669 return ConstantFoldIntrinsicCall2(IntrinsicID, Ty, Operands, Call); 3670 } 3671 3672 if (Operands.size() == 3) 3673 return ConstantFoldScalarCall3(Name, IntrinsicID, Ty, Operands, TLI, Call); 3674 3675 return nullptr; 3676 } 3677 3678 static Constant *ConstantFoldFixedVectorCall( 3679 StringRef Name, Intrinsic::ID IntrinsicID, FixedVectorType *FVTy, 3680 ArrayRef<Constant *> Operands, const DataLayout &DL, 3681 const TargetLibraryInfo *TLI, const CallBase *Call) { 3682 SmallVector<Constant *, 4> Result(FVTy->getNumElements()); 3683 SmallVector<Constant *, 4> Lane(Operands.size()); 3684 Type *Ty = FVTy->getElementType(); 3685 3686 switch (IntrinsicID) { 3687 case Intrinsic::masked_load: { 3688 auto *SrcPtr = Operands[0]; 3689 auto *Mask = Operands[2]; 3690 auto *Passthru = Operands[3]; 3691 3692 Constant *VecData = ConstantFoldLoadFromConstPtr(SrcPtr, FVTy, DL); 3693 3694 SmallVector<Constant *, 32> NewElements; 3695 for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) { 3696 auto *MaskElt = Mask->getAggregateElement(I); 3697 if (!MaskElt) 3698 break; 3699 auto *PassthruElt = Passthru->getAggregateElement(I); 3700 auto *VecElt = VecData ? VecData->getAggregateElement(I) : nullptr; 3701 if (isa<UndefValue>(MaskElt)) { 3702 if (PassthruElt) 3703 NewElements.push_back(PassthruElt); 3704 else if (VecElt) 3705 NewElements.push_back(VecElt); 3706 else 3707 return nullptr; 3708 } 3709 if (MaskElt->isNullValue()) { 3710 if (!PassthruElt) 3711 return nullptr; 3712 NewElements.push_back(PassthruElt); 3713 } else if (MaskElt->isOneValue()) { 3714 if (!VecElt) 3715 return nullptr; 3716 NewElements.push_back(VecElt); 3717 } else { 3718 return nullptr; 3719 } 3720 } 3721 if (NewElements.size() != FVTy->getNumElements()) 3722 return nullptr; 3723 return ConstantVector::get(NewElements); 3724 } 3725 case Intrinsic::arm_mve_vctp8: 3726 case Intrinsic::arm_mve_vctp16: 3727 case Intrinsic::arm_mve_vctp32: 3728 case Intrinsic::arm_mve_vctp64: { 3729 if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) { 3730 unsigned Lanes = FVTy->getNumElements(); 3731 uint64_t Limit = Op->getZExtValue(); 3732 3733 SmallVector<Constant *, 16> NCs; 3734 for (unsigned i = 0; i < Lanes; i++) { 3735 if (i < Limit) 3736 NCs.push_back(ConstantInt::getTrue(Ty)); 3737 else 3738 NCs.push_back(ConstantInt::getFalse(Ty)); 3739 } 3740 return ConstantVector::get(NCs); 3741 } 3742 return nullptr; 3743 } 3744 case Intrinsic::get_active_lane_mask: { 3745 auto *Op0 = dyn_cast<ConstantInt>(Operands[0]); 3746 auto *Op1 = dyn_cast<ConstantInt>(Operands[1]); 3747 if (Op0 && Op1) { 3748 unsigned Lanes = FVTy->getNumElements(); 3749 uint64_t Base = Op0->getZExtValue(); 3750 uint64_t Limit = Op1->getZExtValue(); 3751 3752 SmallVector<Constant *, 16> NCs; 3753 for (unsigned i = 0; i < Lanes; i++) { 3754 if (Base + i < Limit) 3755 NCs.push_back(ConstantInt::getTrue(Ty)); 3756 else 3757 NCs.push_back(ConstantInt::getFalse(Ty)); 3758 } 3759 return ConstantVector::get(NCs); 3760 } 3761 return nullptr; 3762 } 3763 case Intrinsic::vector_extract: { 3764 auto *Idx = dyn_cast<ConstantInt>(Operands[1]); 3765 Constant *Vec = Operands[0]; 3766 if (!Idx || !isa<FixedVectorType>(Vec->getType())) 3767 return nullptr; 3768 3769 unsigned NumElements = FVTy->getNumElements(); 3770 unsigned VecNumElements = 3771 cast<FixedVectorType>(Vec->getType())->getNumElements(); 3772 unsigned StartingIndex = Idx->getZExtValue(); 3773 3774 // Extracting entire vector is nop 3775 if (NumElements == VecNumElements && StartingIndex == 0) 3776 return Vec; 3777 3778 for (unsigned I = StartingIndex, E = StartingIndex + NumElements; I < E; 3779 ++I) { 3780 Constant *Elt = Vec->getAggregateElement(I); 3781 if (!Elt) 3782 return nullptr; 3783 Result[I - StartingIndex] = Elt; 3784 } 3785 3786 return ConstantVector::get(Result); 3787 } 3788 case Intrinsic::vector_insert: { 3789 Constant *Vec = Operands[0]; 3790 Constant *SubVec = Operands[1]; 3791 auto *Idx = dyn_cast<ConstantInt>(Operands[2]); 3792 if (!Idx || !isa<FixedVectorType>(Vec->getType())) 3793 return nullptr; 3794 3795 unsigned SubVecNumElements = 3796 cast<FixedVectorType>(SubVec->getType())->getNumElements(); 3797 unsigned VecNumElements = 3798 cast<FixedVectorType>(Vec->getType())->getNumElements(); 3799 unsigned IdxN = Idx->getZExtValue(); 3800 // Replacing entire vector with a subvec is nop 3801 if (SubVecNumElements == VecNumElements && IdxN == 0) 3802 return SubVec; 3803 3804 for (unsigned I = 0; I < VecNumElements; ++I) { 3805 Constant *Elt; 3806 if (I < IdxN + SubVecNumElements) 3807 Elt = SubVec->getAggregateElement(I - IdxN); 3808 else 3809 Elt = Vec->getAggregateElement(I); 3810 if (!Elt) 3811 return nullptr; 3812 Result[I] = Elt; 3813 } 3814 return ConstantVector::get(Result); 3815 } 3816 case Intrinsic::vector_interleave2: { 3817 unsigned NumElements = 3818 cast<FixedVectorType>(Operands[0]->getType())->getNumElements(); 3819 for (unsigned I = 0; I < NumElements; ++I) { 3820 Constant *Elt0 = Operands[0]->getAggregateElement(I); 3821 Constant *Elt1 = Operands[1]->getAggregateElement(I); 3822 if (!Elt0 || !Elt1) 3823 return nullptr; 3824 Result[2 * I] = Elt0; 3825 Result[2 * I + 1] = Elt1; 3826 } 3827 return ConstantVector::get(Result); 3828 } 3829 default: 3830 break; 3831 } 3832 3833 for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) { 3834 // Gather a column of constants. 3835 for (unsigned J = 0, JE = Operands.size(); J != JE; ++J) { 3836 // Some intrinsics use a scalar type for certain arguments. 3837 if (isVectorIntrinsicWithScalarOpAtArg(IntrinsicID, J, /*TTI=*/nullptr)) { 3838 Lane[J] = Operands[J]; 3839 continue; 3840 } 3841 3842 Constant *Agg = Operands[J]->getAggregateElement(I); 3843 if (!Agg) 3844 return nullptr; 3845 3846 Lane[J] = Agg; 3847 } 3848 3849 // Use the regular scalar folding to simplify this column. 3850 Constant *Folded = 3851 ConstantFoldScalarCall(Name, IntrinsicID, Ty, Lane, TLI, Call); 3852 if (!Folded) 3853 return nullptr; 3854 Result[I] = Folded; 3855 } 3856 3857 return ConstantVector::get(Result); 3858 } 3859 3860 static Constant *ConstantFoldScalableVectorCall( 3861 StringRef Name, Intrinsic::ID IntrinsicID, ScalableVectorType *SVTy, 3862 ArrayRef<Constant *> Operands, const DataLayout &DL, 3863 const TargetLibraryInfo *TLI, const CallBase *Call) { 3864 switch (IntrinsicID) { 3865 case Intrinsic::aarch64_sve_convert_from_svbool: { 3866 auto *Src = dyn_cast<Constant>(Operands[0]); 3867 if (!Src || !Src->isNullValue()) 3868 break; 3869 3870 return ConstantInt::getFalse(SVTy); 3871 } 3872 default: 3873 break; 3874 } 3875 3876 // If trivially vectorizable, try folding it via the scalar call if all 3877 // operands are splats. 3878 3879 // TODO: ConstantFoldFixedVectorCall should probably check this too? 3880 if (!isTriviallyVectorizable(IntrinsicID)) 3881 return nullptr; 3882 3883 SmallVector<Constant *, 4> SplatOps; 3884 for (auto [I, Op] : enumerate(Operands)) { 3885 if (isVectorIntrinsicWithScalarOpAtArg(IntrinsicID, I, /*TTI=*/nullptr)) { 3886 SplatOps.push_back(Op); 3887 continue; 3888 } 3889 Constant *Splat = Op->getSplatValue(); 3890 if (!Splat) 3891 return nullptr; 3892 SplatOps.push_back(Splat); 3893 } 3894 Constant *Folded = ConstantFoldScalarCall( 3895 Name, IntrinsicID, SVTy->getElementType(), SplatOps, TLI, Call); 3896 if (!Folded) 3897 return nullptr; 3898 return ConstantVector::getSplat(SVTy->getElementCount(), Folded); 3899 } 3900 3901 static std::pair<Constant *, Constant *> 3902 ConstantFoldScalarFrexpCall(Constant *Op, Type *IntTy) { 3903 if (isa<PoisonValue>(Op)) 3904 return {Op, PoisonValue::get(IntTy)}; 3905 3906 auto *ConstFP = dyn_cast<ConstantFP>(Op); 3907 if (!ConstFP) 3908 return {}; 3909 3910 const APFloat &U = ConstFP->getValueAPF(); 3911 int FrexpExp; 3912 APFloat FrexpMant = frexp(U, FrexpExp, APFloat::rmNearestTiesToEven); 3913 Constant *Result0 = ConstantFP::get(ConstFP->getType(), FrexpMant); 3914 3915 // The exponent is an "unspecified value" for inf/nan. We use zero to avoid 3916 // using undef. 3917 Constant *Result1 = FrexpMant.isFinite() 3918 ? ConstantInt::getSigned(IntTy, FrexpExp) 3919 : ConstantInt::getNullValue(IntTy); 3920 return {Result0, Result1}; 3921 } 3922 3923 /// Handle intrinsics that return tuples, which may be tuples of vectors. 3924 static Constant * 3925 ConstantFoldStructCall(StringRef Name, Intrinsic::ID IntrinsicID, 3926 StructType *StTy, ArrayRef<Constant *> Operands, 3927 const DataLayout &DL, const TargetLibraryInfo *TLI, 3928 const CallBase *Call) { 3929 3930 switch (IntrinsicID) { 3931 case Intrinsic::frexp: { 3932 Type *Ty0 = StTy->getContainedType(0); 3933 Type *Ty1 = StTy->getContainedType(1)->getScalarType(); 3934 3935 if (auto *FVTy0 = dyn_cast<FixedVectorType>(Ty0)) { 3936 SmallVector<Constant *, 4> Results0(FVTy0->getNumElements()); 3937 SmallVector<Constant *, 4> Results1(FVTy0->getNumElements()); 3938 3939 for (unsigned I = 0, E = FVTy0->getNumElements(); I != E; ++I) { 3940 Constant *Lane = Operands[0]->getAggregateElement(I); 3941 std::tie(Results0[I], Results1[I]) = 3942 ConstantFoldScalarFrexpCall(Lane, Ty1); 3943 if (!Results0[I]) 3944 return nullptr; 3945 } 3946 3947 return ConstantStruct::get(StTy, ConstantVector::get(Results0), 3948 ConstantVector::get(Results1)); 3949 } 3950 3951 auto [Result0, Result1] = ConstantFoldScalarFrexpCall(Operands[0], Ty1); 3952 if (!Result0) 3953 return nullptr; 3954 return ConstantStruct::get(StTy, Result0, Result1); 3955 } 3956 case Intrinsic::sincos: { 3957 Type *Ty = StTy->getContainedType(0); 3958 Type *TyScalar = Ty->getScalarType(); 3959 3960 auto ConstantFoldScalarSincosCall = 3961 [&](Constant *Op) -> std::pair<Constant *, Constant *> { 3962 Constant *SinResult = 3963 ConstantFoldScalarCall(Name, Intrinsic::sin, TyScalar, Op, TLI, Call); 3964 Constant *CosResult = 3965 ConstantFoldScalarCall(Name, Intrinsic::cos, TyScalar, Op, TLI, Call); 3966 return std::make_pair(SinResult, CosResult); 3967 }; 3968 3969 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) { 3970 SmallVector<Constant *> SinResults(FVTy->getNumElements()); 3971 SmallVector<Constant *> CosResults(FVTy->getNumElements()); 3972 3973 for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) { 3974 Constant *Lane = Operands[0]->getAggregateElement(I); 3975 std::tie(SinResults[I], CosResults[I]) = 3976 ConstantFoldScalarSincosCall(Lane); 3977 if (!SinResults[I] || !CosResults[I]) 3978 return nullptr; 3979 } 3980 3981 return ConstantStruct::get(StTy, ConstantVector::get(SinResults), 3982 ConstantVector::get(CosResults)); 3983 } 3984 3985 auto [SinResult, CosResult] = ConstantFoldScalarSincosCall(Operands[0]); 3986 if (!SinResult || !CosResult) 3987 return nullptr; 3988 return ConstantStruct::get(StTy, SinResult, CosResult); 3989 } 3990 case Intrinsic::vector_deinterleave2: { 3991 auto *Vec = Operands[0]; 3992 auto *VecTy = cast<VectorType>(Vec->getType()); 3993 3994 if (auto *EltC = Vec->getSplatValue()) { 3995 ElementCount HalfEC = VecTy->getElementCount().divideCoefficientBy(2); 3996 auto *HalfVec = ConstantVector::getSplat(HalfEC, EltC); 3997 return ConstantStruct::get(StTy, HalfVec, HalfVec); 3998 } 3999 4000 if (!isa<FixedVectorType>(Vec->getType())) 4001 return nullptr; 4002 4003 unsigned NumElements = VecTy->getElementCount().getFixedValue() / 2; 4004 SmallVector<Constant *, 4> Res0(NumElements), Res1(NumElements); 4005 for (unsigned I = 0; I < NumElements; ++I) { 4006 Constant *Elt0 = Vec->getAggregateElement(2 * I); 4007 Constant *Elt1 = Vec->getAggregateElement(2 * I + 1); 4008 if (!Elt0 || !Elt1) 4009 return nullptr; 4010 Res0[I] = Elt0; 4011 Res1[I] = Elt1; 4012 } 4013 return ConstantStruct::get(StTy, ConstantVector::get(Res0), 4014 ConstantVector::get(Res1)); 4015 } 4016 default: 4017 // TODO: Constant folding of vector intrinsics that fall through here does 4018 // not work (e.g. overflow intrinsics) 4019 return ConstantFoldScalarCall(Name, IntrinsicID, StTy, Operands, TLI, Call); 4020 } 4021 4022 return nullptr; 4023 } 4024 4025 } // end anonymous namespace 4026 4027 Constant *llvm::ConstantFoldBinaryIntrinsic(Intrinsic::ID ID, Constant *LHS, 4028 Constant *RHS, Type *Ty, 4029 Instruction *FMFSource) { 4030 auto *Call = dyn_cast_if_present<CallBase>(FMFSource); 4031 // Ensure we check flags like StrictFP that might prevent this from getting 4032 // folded before generating a result. 4033 if (Call && !canConstantFoldCallTo(Call, Call->getCalledFunction())) 4034 return nullptr; 4035 return ConstantFoldIntrinsicCall2(ID, Ty, {LHS, RHS}, Call); 4036 } 4037 4038 Constant *llvm::ConstantFoldCall(const CallBase *Call, Function *F, 4039 ArrayRef<Constant *> Operands, 4040 const TargetLibraryInfo *TLI, 4041 bool AllowNonDeterministic) { 4042 if (Call->isNoBuiltin()) 4043 return nullptr; 4044 if (!F->hasName()) 4045 return nullptr; 4046 4047 // If this is not an intrinsic and not recognized as a library call, bail out. 4048 Intrinsic::ID IID = F->getIntrinsicID(); 4049 if (IID == Intrinsic::not_intrinsic) { 4050 if (!TLI) 4051 return nullptr; 4052 LibFunc LibF; 4053 if (!TLI->getLibFunc(*F, LibF)) 4054 return nullptr; 4055 } 4056 4057 // Conservatively assume that floating-point libcalls may be 4058 // non-deterministic. 4059 Type *Ty = F->getReturnType(); 4060 if (!AllowNonDeterministic && Ty->isFPOrFPVectorTy()) 4061 return nullptr; 4062 4063 StringRef Name = F->getName(); 4064 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) 4065 return ConstantFoldFixedVectorCall( 4066 Name, IID, FVTy, Operands, F->getDataLayout(), TLI, Call); 4067 4068 if (auto *SVTy = dyn_cast<ScalableVectorType>(Ty)) 4069 return ConstantFoldScalableVectorCall( 4070 Name, IID, SVTy, Operands, F->getDataLayout(), TLI, Call); 4071 4072 if (auto *StTy = dyn_cast<StructType>(Ty)) 4073 return ConstantFoldStructCall(Name, IID, StTy, Operands, 4074 F->getDataLayout(), TLI, Call); 4075 4076 // TODO: If this is a library function, we already discovered that above, 4077 // so we should pass the LibFunc, not the name (and it might be better 4078 // still to separate intrinsic handling from libcalls). 4079 return ConstantFoldScalarCall(Name, IID, Ty, Operands, TLI, Call); 4080 } 4081 4082 bool llvm::isMathLibCallNoop(const CallBase *Call, 4083 const TargetLibraryInfo *TLI) { 4084 // FIXME: Refactor this code; this duplicates logic in LibCallsShrinkWrap 4085 // (and to some extent ConstantFoldScalarCall). 4086 if (Call->isNoBuiltin() || Call->isStrictFP()) 4087 return false; 4088 Function *F = Call->getCalledFunction(); 4089 if (!F) 4090 return false; 4091 4092 LibFunc Func; 4093 if (!TLI || !TLI->getLibFunc(*F, Func)) 4094 return false; 4095 4096 if (Call->arg_size() == 1) { 4097 if (ConstantFP *OpC = dyn_cast<ConstantFP>(Call->getArgOperand(0))) { 4098 const APFloat &Op = OpC->getValueAPF(); 4099 switch (Func) { 4100 case LibFunc_logl: 4101 case LibFunc_log: 4102 case LibFunc_logf: 4103 case LibFunc_log2l: 4104 case LibFunc_log2: 4105 case LibFunc_log2f: 4106 case LibFunc_log10l: 4107 case LibFunc_log10: 4108 case LibFunc_log10f: 4109 return Op.isNaN() || (!Op.isZero() && !Op.isNegative()); 4110 4111 case LibFunc_ilogb: 4112 return !Op.isNaN() && !Op.isZero() && !Op.isInfinity(); 4113 4114 case LibFunc_expl: 4115 case LibFunc_exp: 4116 case LibFunc_expf: 4117 // FIXME: These boundaries are slightly conservative. 4118 if (OpC->getType()->isDoubleTy()) 4119 return !(Op < APFloat(-745.0) || Op > APFloat(709.0)); 4120 if (OpC->getType()->isFloatTy()) 4121 return !(Op < APFloat(-103.0f) || Op > APFloat(88.0f)); 4122 break; 4123 4124 case LibFunc_exp2l: 4125 case LibFunc_exp2: 4126 case LibFunc_exp2f: 4127 // FIXME: These boundaries are slightly conservative. 4128 if (OpC->getType()->isDoubleTy()) 4129 return !(Op < APFloat(-1074.0) || Op > APFloat(1023.0)); 4130 if (OpC->getType()->isFloatTy()) 4131 return !(Op < APFloat(-149.0f) || Op > APFloat(127.0f)); 4132 break; 4133 4134 case LibFunc_sinl: 4135 case LibFunc_sin: 4136 case LibFunc_sinf: 4137 case LibFunc_cosl: 4138 case LibFunc_cos: 4139 case LibFunc_cosf: 4140 return !Op.isInfinity(); 4141 4142 case LibFunc_tanl: 4143 case LibFunc_tan: 4144 case LibFunc_tanf: { 4145 // FIXME: Stop using the host math library. 4146 // FIXME: The computation isn't done in the right precision. 4147 Type *Ty = OpC->getType(); 4148 if (Ty->isDoubleTy() || Ty->isFloatTy() || Ty->isHalfTy()) 4149 return ConstantFoldFP(tan, OpC->getValueAPF(), Ty) != nullptr; 4150 break; 4151 } 4152 4153 case LibFunc_atan: 4154 case LibFunc_atanf: 4155 case LibFunc_atanl: 4156 // Per POSIX, this MAY fail if Op is denormal. We choose not failing. 4157 return true; 4158 4159 case LibFunc_asinl: 4160 case LibFunc_asin: 4161 case LibFunc_asinf: 4162 case LibFunc_acosl: 4163 case LibFunc_acos: 4164 case LibFunc_acosf: 4165 return !(Op < APFloat::getOne(Op.getSemantics(), true) || 4166 Op > APFloat::getOne(Op.getSemantics())); 4167 4168 case LibFunc_sinh: 4169 case LibFunc_cosh: 4170 case LibFunc_sinhf: 4171 case LibFunc_coshf: 4172 case LibFunc_sinhl: 4173 case LibFunc_coshl: 4174 // FIXME: These boundaries are slightly conservative. 4175 if (OpC->getType()->isDoubleTy()) 4176 return !(Op < APFloat(-710.0) || Op > APFloat(710.0)); 4177 if (OpC->getType()->isFloatTy()) 4178 return !(Op < APFloat(-89.0f) || Op > APFloat(89.0f)); 4179 break; 4180 4181 case LibFunc_sqrtl: 4182 case LibFunc_sqrt: 4183 case LibFunc_sqrtf: 4184 return Op.isNaN() || Op.isZero() || !Op.isNegative(); 4185 4186 // FIXME: Add more functions: sqrt_finite, atanh, expm1, log1p, 4187 // maybe others? 4188 default: 4189 break; 4190 } 4191 } 4192 } 4193 4194 if (Call->arg_size() == 2) { 4195 ConstantFP *Op0C = dyn_cast<ConstantFP>(Call->getArgOperand(0)); 4196 ConstantFP *Op1C = dyn_cast<ConstantFP>(Call->getArgOperand(1)); 4197 if (Op0C && Op1C) { 4198 const APFloat &Op0 = Op0C->getValueAPF(); 4199 const APFloat &Op1 = Op1C->getValueAPF(); 4200 4201 switch (Func) { 4202 case LibFunc_powl: 4203 case LibFunc_pow: 4204 case LibFunc_powf: { 4205 // FIXME: Stop using the host math library. 4206 // FIXME: The computation isn't done in the right precision. 4207 Type *Ty = Op0C->getType(); 4208 if (Ty->isDoubleTy() || Ty->isFloatTy() || Ty->isHalfTy()) { 4209 if (Ty == Op1C->getType()) 4210 return ConstantFoldBinaryFP(pow, Op0, Op1, Ty) != nullptr; 4211 } 4212 break; 4213 } 4214 4215 case LibFunc_fmodl: 4216 case LibFunc_fmod: 4217 case LibFunc_fmodf: 4218 case LibFunc_remainderl: 4219 case LibFunc_remainder: 4220 case LibFunc_remainderf: 4221 return Op0.isNaN() || Op1.isNaN() || 4222 (!Op0.isInfinity() && !Op1.isZero()); 4223 4224 case LibFunc_atan2: 4225 case LibFunc_atan2f: 4226 case LibFunc_atan2l: 4227 // Although IEEE-754 says atan2(+/-0.0, +/-0.0) are well-defined, and 4228 // GLIBC and MSVC do not appear to raise an error on those, we 4229 // cannot rely on that behavior. POSIX and C11 say that a domain error 4230 // may occur, so allow for that possibility. 4231 return !Op0.isZero() || !Op1.isZero(); 4232 4233 default: 4234 break; 4235 } 4236 } 4237 } 4238 4239 return false; 4240 } 4241 4242 void TargetFolder::anchor() {} 4243