1 //===-- ConstantFolding.cpp - Fold instructions into constants ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines routines for folding instructions into constants. 10 // 11 // Also, to supplement the basic IR ConstantExpr simplifications, 12 // this file defines some additional folding routines that can make use of 13 // DataLayout information. These functions cannot go in IR due to library 14 // dependency issues. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "llvm/Analysis/ConstantFolding.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/APSInt.h" 22 #include "llvm/ADT/ArrayRef.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/SmallVector.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/Analysis/TargetFolder.h" 28 #include "llvm/Analysis/TargetLibraryInfo.h" 29 #include "llvm/Analysis/ValueTracking.h" 30 #include "llvm/Analysis/VectorUtils.h" 31 #include "llvm/Config/config.h" 32 #include "llvm/IR/Constant.h" 33 #include "llvm/IR/ConstantFold.h" 34 #include "llvm/IR/Constants.h" 35 #include "llvm/IR/DataLayout.h" 36 #include "llvm/IR/DerivedTypes.h" 37 #include "llvm/IR/Function.h" 38 #include "llvm/IR/GlobalValue.h" 39 #include "llvm/IR/GlobalVariable.h" 40 #include "llvm/IR/InstrTypes.h" 41 #include "llvm/IR/Instruction.h" 42 #include "llvm/IR/Instructions.h" 43 #include "llvm/IR/IntrinsicInst.h" 44 #include "llvm/IR/Intrinsics.h" 45 #include "llvm/IR/IntrinsicsAArch64.h" 46 #include "llvm/IR/IntrinsicsAMDGPU.h" 47 #include "llvm/IR/IntrinsicsARM.h" 48 #include "llvm/IR/IntrinsicsWebAssembly.h" 49 #include "llvm/IR/IntrinsicsX86.h" 50 #include "llvm/IR/Operator.h" 51 #include "llvm/IR/Type.h" 52 #include "llvm/IR/Value.h" 53 #include "llvm/Support/Casting.h" 54 #include "llvm/Support/ErrorHandling.h" 55 #include "llvm/Support/KnownBits.h" 56 #include "llvm/Support/MathExtras.h" 57 #include <cassert> 58 #include <cerrno> 59 #include <cfenv> 60 #include <cmath> 61 #include <cstdint> 62 63 using namespace llvm; 64 65 namespace { 66 67 //===----------------------------------------------------------------------===// 68 // Constant Folding internal helper functions 69 //===----------------------------------------------------------------------===// 70 71 static Constant *foldConstVectorToAPInt(APInt &Result, Type *DestTy, 72 Constant *C, Type *SrcEltTy, 73 unsigned NumSrcElts, 74 const DataLayout &DL) { 75 // Now that we know that the input value is a vector of integers, just shift 76 // and insert them into our result. 77 unsigned BitShift = DL.getTypeSizeInBits(SrcEltTy); 78 for (unsigned i = 0; i != NumSrcElts; ++i) { 79 Constant *Element; 80 if (DL.isLittleEndian()) 81 Element = C->getAggregateElement(NumSrcElts - i - 1); 82 else 83 Element = C->getAggregateElement(i); 84 85 if (Element && isa<UndefValue>(Element)) { 86 Result <<= BitShift; 87 continue; 88 } 89 90 auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element); 91 if (!ElementCI) 92 return ConstantExpr::getBitCast(C, DestTy); 93 94 Result <<= BitShift; 95 Result |= ElementCI->getValue().zext(Result.getBitWidth()); 96 } 97 98 return nullptr; 99 } 100 101 /// Constant fold bitcast, symbolically evaluating it with DataLayout. 102 /// This always returns a non-null constant, but it may be a 103 /// ConstantExpr if unfoldable. 104 Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) { 105 assert(CastInst::castIsValid(Instruction::BitCast, C, DestTy) && 106 "Invalid constantexpr bitcast!"); 107 108 // Catch the obvious splat cases. 109 if (Constant *Res = ConstantFoldLoadFromUniformValue(C, DestTy)) 110 return Res; 111 112 if (auto *VTy = dyn_cast<VectorType>(C->getType())) { 113 // Handle a vector->scalar integer/fp cast. 114 if (isa<IntegerType>(DestTy) || DestTy->isFloatingPointTy()) { 115 unsigned NumSrcElts = cast<FixedVectorType>(VTy)->getNumElements(); 116 Type *SrcEltTy = VTy->getElementType(); 117 118 // If the vector is a vector of floating point, convert it to vector of int 119 // to simplify things. 120 if (SrcEltTy->isFloatingPointTy()) { 121 unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits(); 122 auto *SrcIVTy = FixedVectorType::get( 123 IntegerType::get(C->getContext(), FPWidth), NumSrcElts); 124 // Ask IR to do the conversion now that #elts line up. 125 C = ConstantExpr::getBitCast(C, SrcIVTy); 126 } 127 128 APInt Result(DL.getTypeSizeInBits(DestTy), 0); 129 if (Constant *CE = foldConstVectorToAPInt(Result, DestTy, C, 130 SrcEltTy, NumSrcElts, DL)) 131 return CE; 132 133 if (isa<IntegerType>(DestTy)) 134 return ConstantInt::get(DestTy, Result); 135 136 APFloat FP(DestTy->getFltSemantics(), Result); 137 return ConstantFP::get(DestTy->getContext(), FP); 138 } 139 } 140 141 // The code below only handles casts to vectors currently. 142 auto *DestVTy = dyn_cast<VectorType>(DestTy); 143 if (!DestVTy) 144 return ConstantExpr::getBitCast(C, DestTy); 145 146 // If this is a scalar -> vector cast, convert the input into a <1 x scalar> 147 // vector so the code below can handle it uniformly. 148 if (isa<ConstantFP>(C) || isa<ConstantInt>(C)) { 149 Constant *Ops = C; // don't take the address of C! 150 return FoldBitCast(ConstantVector::get(Ops), DestTy, DL); 151 } 152 153 // If this is a bitcast from constant vector -> vector, fold it. 154 if (!isa<ConstantDataVector>(C) && !isa<ConstantVector>(C)) 155 return ConstantExpr::getBitCast(C, DestTy); 156 157 // If the element types match, IR can fold it. 158 unsigned NumDstElt = cast<FixedVectorType>(DestVTy)->getNumElements(); 159 unsigned NumSrcElt = cast<FixedVectorType>(C->getType())->getNumElements(); 160 if (NumDstElt == NumSrcElt) 161 return ConstantExpr::getBitCast(C, DestTy); 162 163 Type *SrcEltTy = cast<VectorType>(C->getType())->getElementType(); 164 Type *DstEltTy = DestVTy->getElementType(); 165 166 // Otherwise, we're changing the number of elements in a vector, which 167 // requires endianness information to do the right thing. For example, 168 // bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>) 169 // folds to (little endian): 170 // <4 x i32> <i32 0, i32 0, i32 1, i32 0> 171 // and to (big endian): 172 // <4 x i32> <i32 0, i32 0, i32 0, i32 1> 173 174 // First thing is first. We only want to think about integer here, so if 175 // we have something in FP form, recast it as integer. 176 if (DstEltTy->isFloatingPointTy()) { 177 // Fold to an vector of integers with same size as our FP type. 178 unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits(); 179 auto *DestIVTy = FixedVectorType::get( 180 IntegerType::get(C->getContext(), FPWidth), NumDstElt); 181 // Recursively handle this integer conversion, if possible. 182 C = FoldBitCast(C, DestIVTy, DL); 183 184 // Finally, IR can handle this now that #elts line up. 185 return ConstantExpr::getBitCast(C, DestTy); 186 } 187 188 // Okay, we know the destination is integer, if the input is FP, convert 189 // it to integer first. 190 if (SrcEltTy->isFloatingPointTy()) { 191 unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits(); 192 auto *SrcIVTy = FixedVectorType::get( 193 IntegerType::get(C->getContext(), FPWidth), NumSrcElt); 194 // Ask IR to do the conversion now that #elts line up. 195 C = ConstantExpr::getBitCast(C, SrcIVTy); 196 // If IR wasn't able to fold it, bail out. 197 if (!isa<ConstantVector>(C) && // FIXME: Remove ConstantVector. 198 !isa<ConstantDataVector>(C)) 199 return C; 200 } 201 202 // Now we know that the input and output vectors are both integer vectors 203 // of the same size, and that their #elements is not the same. Do the 204 // conversion here, which depends on whether the input or output has 205 // more elements. 206 bool isLittleEndian = DL.isLittleEndian(); 207 208 SmallVector<Constant*, 32> Result; 209 if (NumDstElt < NumSrcElt) { 210 // Handle: bitcast (<4 x i32> <i32 0, i32 1, i32 2, i32 3> to <2 x i64>) 211 Constant *Zero = Constant::getNullValue(DstEltTy); 212 unsigned Ratio = NumSrcElt/NumDstElt; 213 unsigned SrcBitSize = SrcEltTy->getPrimitiveSizeInBits(); 214 unsigned SrcElt = 0; 215 for (unsigned i = 0; i != NumDstElt; ++i) { 216 // Build each element of the result. 217 Constant *Elt = Zero; 218 unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize*(Ratio-1); 219 for (unsigned j = 0; j != Ratio; ++j) { 220 Constant *Src = C->getAggregateElement(SrcElt++); 221 if (Src && isa<UndefValue>(Src)) 222 Src = Constant::getNullValue( 223 cast<VectorType>(C->getType())->getElementType()); 224 else 225 Src = dyn_cast_or_null<ConstantInt>(Src); 226 if (!Src) // Reject constantexpr elements. 227 return ConstantExpr::getBitCast(C, DestTy); 228 229 // Zero extend the element to the right size. 230 Src = ConstantExpr::getZExt(Src, Elt->getType()); 231 232 // Shift it to the right place, depending on endianness. 233 Src = ConstantExpr::getShl(Src, 234 ConstantInt::get(Src->getType(), ShiftAmt)); 235 ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize; 236 237 // Mix it in. 238 Elt = ConstantFoldBinaryOpOperands(Instruction::Or, Elt, Src, DL); 239 assert(Elt && "Constant folding cannot fail on plain integers"); 240 } 241 Result.push_back(Elt); 242 } 243 return ConstantVector::get(Result); 244 } 245 246 // Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>) 247 unsigned Ratio = NumDstElt/NumSrcElt; 248 unsigned DstBitSize = DL.getTypeSizeInBits(DstEltTy); 249 250 // Loop over each source value, expanding into multiple results. 251 for (unsigned i = 0; i != NumSrcElt; ++i) { 252 auto *Element = C->getAggregateElement(i); 253 254 if (!Element) // Reject constantexpr elements. 255 return ConstantExpr::getBitCast(C, DestTy); 256 257 if (isa<UndefValue>(Element)) { 258 // Correctly Propagate undef values. 259 Result.append(Ratio, UndefValue::get(DstEltTy)); 260 continue; 261 } 262 263 auto *Src = dyn_cast<ConstantInt>(Element); 264 if (!Src) 265 return ConstantExpr::getBitCast(C, DestTy); 266 267 unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1); 268 for (unsigned j = 0; j != Ratio; ++j) { 269 // Shift the piece of the value into the right place, depending on 270 // endianness. 271 Constant *Elt = ConstantExpr::getLShr(Src, 272 ConstantInt::get(Src->getType(), ShiftAmt)); 273 ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize; 274 275 // Truncate the element to an integer with the same pointer size and 276 // convert the element back to a pointer using a inttoptr. 277 if (DstEltTy->isPointerTy()) { 278 IntegerType *DstIntTy = Type::getIntNTy(C->getContext(), DstBitSize); 279 Constant *CE = ConstantExpr::getTrunc(Elt, DstIntTy); 280 Result.push_back(ConstantExpr::getIntToPtr(CE, DstEltTy)); 281 continue; 282 } 283 284 // Truncate and remember this piece. 285 Result.push_back(ConstantExpr::getTrunc(Elt, DstEltTy)); 286 } 287 } 288 289 return ConstantVector::get(Result); 290 } 291 292 } // end anonymous namespace 293 294 /// If this constant is a constant offset from a global, return the global and 295 /// the constant. Because of constantexprs, this function is recursive. 296 bool llvm::IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, 297 APInt &Offset, const DataLayout &DL, 298 DSOLocalEquivalent **DSOEquiv) { 299 if (DSOEquiv) 300 *DSOEquiv = nullptr; 301 302 // Trivial case, constant is the global. 303 if ((GV = dyn_cast<GlobalValue>(C))) { 304 unsigned BitWidth = DL.getIndexTypeSizeInBits(GV->getType()); 305 Offset = APInt(BitWidth, 0); 306 return true; 307 } 308 309 if (auto *FoundDSOEquiv = dyn_cast<DSOLocalEquivalent>(C)) { 310 if (DSOEquiv) 311 *DSOEquiv = FoundDSOEquiv; 312 GV = FoundDSOEquiv->getGlobalValue(); 313 unsigned BitWidth = DL.getIndexTypeSizeInBits(GV->getType()); 314 Offset = APInt(BitWidth, 0); 315 return true; 316 } 317 318 // Otherwise, if this isn't a constant expr, bail out. 319 auto *CE = dyn_cast<ConstantExpr>(C); 320 if (!CE) return false; 321 322 // Look through ptr->int and ptr->ptr casts. 323 if (CE->getOpcode() == Instruction::PtrToInt || 324 CE->getOpcode() == Instruction::BitCast) 325 return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, DL, 326 DSOEquiv); 327 328 // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5) 329 auto *GEP = dyn_cast<GEPOperator>(CE); 330 if (!GEP) 331 return false; 332 333 unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType()); 334 APInt TmpOffset(BitWidth, 0); 335 336 // If the base isn't a global+constant, we aren't either. 337 if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, TmpOffset, DL, 338 DSOEquiv)) 339 return false; 340 341 // Otherwise, add any offset that our operands provide. 342 if (!GEP->accumulateConstantOffset(DL, TmpOffset)) 343 return false; 344 345 Offset = TmpOffset; 346 return true; 347 } 348 349 Constant *llvm::ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy, 350 const DataLayout &DL) { 351 do { 352 Type *SrcTy = C->getType(); 353 if (SrcTy == DestTy) 354 return C; 355 356 TypeSize DestSize = DL.getTypeSizeInBits(DestTy); 357 TypeSize SrcSize = DL.getTypeSizeInBits(SrcTy); 358 if (!TypeSize::isKnownGE(SrcSize, DestSize)) 359 return nullptr; 360 361 // Catch the obvious splat cases (since all-zeros can coerce non-integral 362 // pointers legally). 363 if (Constant *Res = ConstantFoldLoadFromUniformValue(C, DestTy)) 364 return Res; 365 366 // If the type sizes are the same and a cast is legal, just directly 367 // cast the constant. 368 // But be careful not to coerce non-integral pointers illegally. 369 if (SrcSize == DestSize && 370 DL.isNonIntegralPointerType(SrcTy->getScalarType()) == 371 DL.isNonIntegralPointerType(DestTy->getScalarType())) { 372 Instruction::CastOps Cast = Instruction::BitCast; 373 // If we are going from a pointer to int or vice versa, we spell the cast 374 // differently. 375 if (SrcTy->isIntegerTy() && DestTy->isPointerTy()) 376 Cast = Instruction::IntToPtr; 377 else if (SrcTy->isPointerTy() && DestTy->isIntegerTy()) 378 Cast = Instruction::PtrToInt; 379 380 if (CastInst::castIsValid(Cast, C, DestTy)) 381 return ConstantExpr::getCast(Cast, C, DestTy); 382 } 383 384 // If this isn't an aggregate type, there is nothing we can do to drill down 385 // and find a bitcastable constant. 386 if (!SrcTy->isAggregateType() && !SrcTy->isVectorTy()) 387 return nullptr; 388 389 // We're simulating a load through a pointer that was bitcast to point to 390 // a different type, so we can try to walk down through the initial 391 // elements of an aggregate to see if some part of the aggregate is 392 // castable to implement the "load" semantic model. 393 if (SrcTy->isStructTy()) { 394 // Struct types might have leading zero-length elements like [0 x i32], 395 // which are certainly not what we are looking for, so skip them. 396 unsigned Elem = 0; 397 Constant *ElemC; 398 do { 399 ElemC = C->getAggregateElement(Elem++); 400 } while (ElemC && DL.getTypeSizeInBits(ElemC->getType()).isZero()); 401 C = ElemC; 402 } else { 403 // For non-byte-sized vector elements, the first element is not 404 // necessarily located at the vector base address. 405 if (auto *VT = dyn_cast<VectorType>(SrcTy)) 406 if (!DL.typeSizeEqualsStoreSize(VT->getElementType())) 407 return nullptr; 408 409 C = C->getAggregateElement(0u); 410 } 411 } while (C); 412 413 return nullptr; 414 } 415 416 namespace { 417 418 /// Recursive helper to read bits out of global. C is the constant being copied 419 /// out of. ByteOffset is an offset into C. CurPtr is the pointer to copy 420 /// results into and BytesLeft is the number of bytes left in 421 /// the CurPtr buffer. DL is the DataLayout. 422 bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, unsigned char *CurPtr, 423 unsigned BytesLeft, const DataLayout &DL) { 424 assert(ByteOffset <= DL.getTypeAllocSize(C->getType()) && 425 "Out of range access"); 426 427 // If this element is zero or undefined, we can just return since *CurPtr is 428 // zero initialized. 429 if (isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) 430 return true; 431 432 if (auto *CI = dyn_cast<ConstantInt>(C)) { 433 if ((CI->getBitWidth() & 7) != 0) 434 return false; 435 const APInt &Val = CI->getValue(); 436 unsigned IntBytes = unsigned(CI->getBitWidth()/8); 437 438 for (unsigned i = 0; i != BytesLeft && ByteOffset != IntBytes; ++i) { 439 unsigned n = ByteOffset; 440 if (!DL.isLittleEndian()) 441 n = IntBytes - n - 1; 442 CurPtr[i] = Val.extractBits(8, n * 8).getZExtValue(); 443 ++ByteOffset; 444 } 445 return true; 446 } 447 448 if (auto *CFP = dyn_cast<ConstantFP>(C)) { 449 if (CFP->getType()->isDoubleTy()) { 450 C = FoldBitCast(C, Type::getInt64Ty(C->getContext()), DL); 451 return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL); 452 } 453 if (CFP->getType()->isFloatTy()){ 454 C = FoldBitCast(C, Type::getInt32Ty(C->getContext()), DL); 455 return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL); 456 } 457 if (CFP->getType()->isHalfTy()){ 458 C = FoldBitCast(C, Type::getInt16Ty(C->getContext()), DL); 459 return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL); 460 } 461 return false; 462 } 463 464 if (auto *CS = dyn_cast<ConstantStruct>(C)) { 465 const StructLayout *SL = DL.getStructLayout(CS->getType()); 466 unsigned Index = SL->getElementContainingOffset(ByteOffset); 467 uint64_t CurEltOffset = SL->getElementOffset(Index); 468 ByteOffset -= CurEltOffset; 469 470 while (true) { 471 // If the element access is to the element itself and not to tail padding, 472 // read the bytes from the element. 473 uint64_t EltSize = DL.getTypeAllocSize(CS->getOperand(Index)->getType()); 474 475 if (ByteOffset < EltSize && 476 !ReadDataFromGlobal(CS->getOperand(Index), ByteOffset, CurPtr, 477 BytesLeft, DL)) 478 return false; 479 480 ++Index; 481 482 // Check to see if we read from the last struct element, if so we're done. 483 if (Index == CS->getType()->getNumElements()) 484 return true; 485 486 // If we read all of the bytes we needed from this element we're done. 487 uint64_t NextEltOffset = SL->getElementOffset(Index); 488 489 if (BytesLeft <= NextEltOffset - CurEltOffset - ByteOffset) 490 return true; 491 492 // Move to the next element of the struct. 493 CurPtr += NextEltOffset - CurEltOffset - ByteOffset; 494 BytesLeft -= NextEltOffset - CurEltOffset - ByteOffset; 495 ByteOffset = 0; 496 CurEltOffset = NextEltOffset; 497 } 498 // not reached. 499 } 500 501 if (isa<ConstantArray>(C) || isa<ConstantVector>(C) || 502 isa<ConstantDataSequential>(C)) { 503 uint64_t NumElts, EltSize; 504 Type *EltTy; 505 if (auto *AT = dyn_cast<ArrayType>(C->getType())) { 506 NumElts = AT->getNumElements(); 507 EltTy = AT->getElementType(); 508 EltSize = DL.getTypeAllocSize(EltTy); 509 } else { 510 NumElts = cast<FixedVectorType>(C->getType())->getNumElements(); 511 EltTy = cast<FixedVectorType>(C->getType())->getElementType(); 512 // TODO: For non-byte-sized vectors, current implementation assumes there is 513 // padding to the next byte boundary between elements. 514 if (!DL.typeSizeEqualsStoreSize(EltTy)) 515 return false; 516 517 EltSize = DL.getTypeStoreSize(EltTy); 518 } 519 uint64_t Index = ByteOffset / EltSize; 520 uint64_t Offset = ByteOffset - Index * EltSize; 521 522 for (; Index != NumElts; ++Index) { 523 if (!ReadDataFromGlobal(C->getAggregateElement(Index), Offset, CurPtr, 524 BytesLeft, DL)) 525 return false; 526 527 uint64_t BytesWritten = EltSize - Offset; 528 assert(BytesWritten <= EltSize && "Not indexing into this element?"); 529 if (BytesWritten >= BytesLeft) 530 return true; 531 532 Offset = 0; 533 BytesLeft -= BytesWritten; 534 CurPtr += BytesWritten; 535 } 536 return true; 537 } 538 539 if (auto *CE = dyn_cast<ConstantExpr>(C)) { 540 if (CE->getOpcode() == Instruction::IntToPtr && 541 CE->getOperand(0)->getType() == DL.getIntPtrType(CE->getType())) { 542 return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr, 543 BytesLeft, DL); 544 } 545 } 546 547 // Otherwise, unknown initializer type. 548 return false; 549 } 550 551 Constant *FoldReinterpretLoadFromConst(Constant *C, Type *LoadTy, 552 int64_t Offset, const DataLayout &DL) { 553 // Bail out early. Not expect to load from scalable global variable. 554 if (isa<ScalableVectorType>(LoadTy)) 555 return nullptr; 556 557 auto *IntType = dyn_cast<IntegerType>(LoadTy); 558 559 // If this isn't an integer load we can't fold it directly. 560 if (!IntType) { 561 // If this is a non-integer load, we can try folding it as an int load and 562 // then bitcast the result. This can be useful for union cases. Note 563 // that address spaces don't matter here since we're not going to result in 564 // an actual new load. 565 if (!LoadTy->isFloatingPointTy() && !LoadTy->isPointerTy() && 566 !LoadTy->isVectorTy()) 567 return nullptr; 568 569 Type *MapTy = Type::getIntNTy(C->getContext(), 570 DL.getTypeSizeInBits(LoadTy).getFixedValue()); 571 if (Constant *Res = FoldReinterpretLoadFromConst(C, MapTy, Offset, DL)) { 572 if (Res->isNullValue() && !LoadTy->isX86_MMXTy() && 573 !LoadTy->isX86_AMXTy()) 574 // Materializing a zero can be done trivially without a bitcast 575 return Constant::getNullValue(LoadTy); 576 Type *CastTy = LoadTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(LoadTy) : LoadTy; 577 Res = FoldBitCast(Res, CastTy, DL); 578 if (LoadTy->isPtrOrPtrVectorTy()) { 579 // For vector of pointer, we needed to first convert to a vector of integer, then do vector inttoptr 580 if (Res->isNullValue() && !LoadTy->isX86_MMXTy() && 581 !LoadTy->isX86_AMXTy()) 582 return Constant::getNullValue(LoadTy); 583 if (DL.isNonIntegralPointerType(LoadTy->getScalarType())) 584 // Be careful not to replace a load of an addrspace value with an inttoptr here 585 return nullptr; 586 Res = ConstantExpr::getCast(Instruction::IntToPtr, Res, LoadTy); 587 } 588 return Res; 589 } 590 return nullptr; 591 } 592 593 unsigned BytesLoaded = (IntType->getBitWidth() + 7) / 8; 594 if (BytesLoaded > 32 || BytesLoaded == 0) 595 return nullptr; 596 597 // If we're not accessing anything in this constant, the result is undefined. 598 if (Offset <= -1 * static_cast<int64_t>(BytesLoaded)) 599 return PoisonValue::get(IntType); 600 601 // TODO: We should be able to support scalable types. 602 TypeSize InitializerSize = DL.getTypeAllocSize(C->getType()); 603 if (InitializerSize.isScalable()) 604 return nullptr; 605 606 // If we're not accessing anything in this constant, the result is undefined. 607 if (Offset >= (int64_t)InitializerSize.getFixedValue()) 608 return PoisonValue::get(IntType); 609 610 unsigned char RawBytes[32] = {0}; 611 unsigned char *CurPtr = RawBytes; 612 unsigned BytesLeft = BytesLoaded; 613 614 // If we're loading off the beginning of the global, some bytes may be valid. 615 if (Offset < 0) { 616 CurPtr += -Offset; 617 BytesLeft += Offset; 618 Offset = 0; 619 } 620 621 if (!ReadDataFromGlobal(C, Offset, CurPtr, BytesLeft, DL)) 622 return nullptr; 623 624 APInt ResultVal = APInt(IntType->getBitWidth(), 0); 625 if (DL.isLittleEndian()) { 626 ResultVal = RawBytes[BytesLoaded - 1]; 627 for (unsigned i = 1; i != BytesLoaded; ++i) { 628 ResultVal <<= 8; 629 ResultVal |= RawBytes[BytesLoaded - 1 - i]; 630 } 631 } else { 632 ResultVal = RawBytes[0]; 633 for (unsigned i = 1; i != BytesLoaded; ++i) { 634 ResultVal <<= 8; 635 ResultVal |= RawBytes[i]; 636 } 637 } 638 639 return ConstantInt::get(IntType->getContext(), ResultVal); 640 } 641 642 } // anonymous namespace 643 644 // If GV is a constant with an initializer read its representation starting 645 // at Offset and return it as a constant array of unsigned char. Otherwise 646 // return null. 647 Constant *llvm::ReadByteArrayFromGlobal(const GlobalVariable *GV, 648 uint64_t Offset) { 649 if (!GV->isConstant() || !GV->hasDefinitiveInitializer()) 650 return nullptr; 651 652 const DataLayout &DL = GV->getParent()->getDataLayout(); 653 Constant *Init = const_cast<Constant *>(GV->getInitializer()); 654 TypeSize InitSize = DL.getTypeAllocSize(Init->getType()); 655 if (InitSize < Offset) 656 return nullptr; 657 658 uint64_t NBytes = InitSize - Offset; 659 if (NBytes > UINT16_MAX) 660 // Bail for large initializers in excess of 64K to avoid allocating 661 // too much memory. 662 // Offset is assumed to be less than or equal than InitSize (this 663 // is enforced in ReadDataFromGlobal). 664 return nullptr; 665 666 SmallVector<unsigned char, 256> RawBytes(static_cast<size_t>(NBytes)); 667 unsigned char *CurPtr = RawBytes.data(); 668 669 if (!ReadDataFromGlobal(Init, Offset, CurPtr, NBytes, DL)) 670 return nullptr; 671 672 return ConstantDataArray::get(GV->getContext(), RawBytes); 673 } 674 675 /// If this Offset points exactly to the start of an aggregate element, return 676 /// that element, otherwise return nullptr. 677 Constant *getConstantAtOffset(Constant *Base, APInt Offset, 678 const DataLayout &DL) { 679 if (Offset.isZero()) 680 return Base; 681 682 if (!isa<ConstantAggregate>(Base) && !isa<ConstantDataSequential>(Base)) 683 return nullptr; 684 685 Type *ElemTy = Base->getType(); 686 SmallVector<APInt> Indices = DL.getGEPIndicesForOffset(ElemTy, Offset); 687 if (!Offset.isZero() || !Indices[0].isZero()) 688 return nullptr; 689 690 Constant *C = Base; 691 for (const APInt &Index : drop_begin(Indices)) { 692 if (Index.isNegative() || Index.getActiveBits() >= 32) 693 return nullptr; 694 695 C = C->getAggregateElement(Index.getZExtValue()); 696 if (!C) 697 return nullptr; 698 } 699 700 return C; 701 } 702 703 Constant *llvm::ConstantFoldLoadFromConst(Constant *C, Type *Ty, 704 const APInt &Offset, 705 const DataLayout &DL) { 706 if (Constant *AtOffset = getConstantAtOffset(C, Offset, DL)) 707 if (Constant *Result = ConstantFoldLoadThroughBitcast(AtOffset, Ty, DL)) 708 return Result; 709 710 // Explicitly check for out-of-bounds access, so we return poison even if the 711 // constant is a uniform value. 712 TypeSize Size = DL.getTypeAllocSize(C->getType()); 713 if (!Size.isScalable() && Offset.sge(Size.getFixedValue())) 714 return PoisonValue::get(Ty); 715 716 // Try an offset-independent fold of a uniform value. 717 if (Constant *Result = ConstantFoldLoadFromUniformValue(C, Ty)) 718 return Result; 719 720 // Try hard to fold loads from bitcasted strange and non-type-safe things. 721 if (Offset.getSignificantBits() <= 64) 722 if (Constant *Result = 723 FoldReinterpretLoadFromConst(C, Ty, Offset.getSExtValue(), DL)) 724 return Result; 725 726 return nullptr; 727 } 728 729 Constant *llvm::ConstantFoldLoadFromConst(Constant *C, Type *Ty, 730 const DataLayout &DL) { 731 return ConstantFoldLoadFromConst(C, Ty, APInt(64, 0), DL); 732 } 733 734 Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, 735 APInt Offset, 736 const DataLayout &DL) { 737 // We can only fold loads from constant globals with a definitive initializer. 738 // Check this upfront, to skip expensive offset calculations. 739 auto *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(C)); 740 if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) 741 return nullptr; 742 743 C = cast<Constant>(C->stripAndAccumulateConstantOffsets( 744 DL, Offset, /* AllowNonInbounds */ true)); 745 746 if (C == GV) 747 if (Constant *Result = ConstantFoldLoadFromConst(GV->getInitializer(), Ty, 748 Offset, DL)) 749 return Result; 750 751 // If this load comes from anywhere in a uniform constant global, the value 752 // is always the same, regardless of the loaded offset. 753 return ConstantFoldLoadFromUniformValue(GV->getInitializer(), Ty); 754 } 755 756 Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, 757 const DataLayout &DL) { 758 APInt Offset(DL.getIndexTypeSizeInBits(C->getType()), 0); 759 return ConstantFoldLoadFromConstPtr(C, Ty, Offset, DL); 760 } 761 762 Constant *llvm::ConstantFoldLoadFromUniformValue(Constant *C, Type *Ty) { 763 if (isa<PoisonValue>(C)) 764 return PoisonValue::get(Ty); 765 if (isa<UndefValue>(C)) 766 return UndefValue::get(Ty); 767 if (C->isNullValue() && !Ty->isX86_MMXTy() && !Ty->isX86_AMXTy()) 768 return Constant::getNullValue(Ty); 769 if (C->isAllOnesValue() && 770 (Ty->isIntOrIntVectorTy() || Ty->isFPOrFPVectorTy())) 771 return Constant::getAllOnesValue(Ty); 772 return nullptr; 773 } 774 775 namespace { 776 777 /// One of Op0/Op1 is a constant expression. 778 /// Attempt to symbolically evaluate the result of a binary operator merging 779 /// these together. If target data info is available, it is provided as DL, 780 /// otherwise DL is null. 781 Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, Constant *Op1, 782 const DataLayout &DL) { 783 // SROA 784 785 // Fold (and 0xffffffff00000000, (shl x, 32)) -> shl. 786 // Fold (lshr (or X, Y), 32) -> (lshr [X/Y], 32) if one doesn't contribute 787 // bits. 788 789 if (Opc == Instruction::And) { 790 KnownBits Known0 = computeKnownBits(Op0, DL); 791 KnownBits Known1 = computeKnownBits(Op1, DL); 792 if ((Known1.One | Known0.Zero).isAllOnes()) { 793 // All the bits of Op0 that the 'and' could be masking are already zero. 794 return Op0; 795 } 796 if ((Known0.One | Known1.Zero).isAllOnes()) { 797 // All the bits of Op1 that the 'and' could be masking are already zero. 798 return Op1; 799 } 800 801 Known0 &= Known1; 802 if (Known0.isConstant()) 803 return ConstantInt::get(Op0->getType(), Known0.getConstant()); 804 } 805 806 // If the constant expr is something like &A[123] - &A[4].f, fold this into a 807 // constant. This happens frequently when iterating over a global array. 808 if (Opc == Instruction::Sub) { 809 GlobalValue *GV1, *GV2; 810 APInt Offs1, Offs2; 811 812 if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, DL)) 813 if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, DL) && GV1 == GV2) { 814 unsigned OpSize = DL.getTypeSizeInBits(Op0->getType()); 815 816 // (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow. 817 // PtrToInt may change the bitwidth so we have convert to the right size 818 // first. 819 return ConstantInt::get(Op0->getType(), Offs1.zextOrTrunc(OpSize) - 820 Offs2.zextOrTrunc(OpSize)); 821 } 822 } 823 824 return nullptr; 825 } 826 827 /// If array indices are not pointer-sized integers, explicitly cast them so 828 /// that they aren't implicitly casted by the getelementptr. 829 Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops, 830 Type *ResultTy, bool InBounds, 831 std::optional<unsigned> InRangeIndex, 832 const DataLayout &DL, const TargetLibraryInfo *TLI) { 833 Type *IntIdxTy = DL.getIndexType(ResultTy); 834 Type *IntIdxScalarTy = IntIdxTy->getScalarType(); 835 836 bool Any = false; 837 SmallVector<Constant*, 32> NewIdxs; 838 for (unsigned i = 1, e = Ops.size(); i != e; ++i) { 839 if ((i == 1 || 840 !isa<StructType>(GetElementPtrInst::getIndexedType( 841 SrcElemTy, Ops.slice(1, i - 1)))) && 842 Ops[i]->getType()->getScalarType() != IntIdxScalarTy) { 843 Any = true; 844 Type *NewType = Ops[i]->getType()->isVectorTy() 845 ? IntIdxTy 846 : IntIdxScalarTy; 847 NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i], 848 true, 849 NewType, 850 true), 851 Ops[i], NewType)); 852 } else 853 NewIdxs.push_back(Ops[i]); 854 } 855 856 if (!Any) 857 return nullptr; 858 859 Constant *C = ConstantExpr::getGetElementPtr( 860 SrcElemTy, Ops[0], NewIdxs, InBounds, InRangeIndex); 861 return ConstantFoldConstant(C, DL, TLI); 862 } 863 864 /// Strip the pointer casts, but preserve the address space information. 865 // TODO: This probably doesn't make sense with opaque pointers. 866 static Constant *StripPtrCastKeepAS(Constant *Ptr) { 867 assert(Ptr->getType()->isPointerTy() && "Not a pointer type"); 868 auto *OldPtrTy = cast<PointerType>(Ptr->getType()); 869 Ptr = cast<Constant>(Ptr->stripPointerCasts()); 870 auto *NewPtrTy = cast<PointerType>(Ptr->getType()); 871 872 // Preserve the address space number of the pointer. 873 if (NewPtrTy->getAddressSpace() != OldPtrTy->getAddressSpace()) 874 Ptr = ConstantExpr::getPointerCast(Ptr, OldPtrTy); 875 return Ptr; 876 } 877 878 /// If we can symbolically evaluate the GEP constant expression, do so. 879 Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP, 880 ArrayRef<Constant *> Ops, 881 const DataLayout &DL, 882 const TargetLibraryInfo *TLI) { 883 const GEPOperator *InnermostGEP = GEP; 884 bool InBounds = GEP->isInBounds(); 885 886 Type *SrcElemTy = GEP->getSourceElementType(); 887 Type *ResElemTy = GEP->getResultElementType(); 888 Type *ResTy = GEP->getType(); 889 if (!SrcElemTy->isSized() || isa<ScalableVectorType>(SrcElemTy)) 890 return nullptr; 891 892 if (Constant *C = CastGEPIndices(SrcElemTy, Ops, ResTy, 893 GEP->isInBounds(), GEP->getInRangeIndex(), 894 DL, TLI)) 895 return C; 896 897 Constant *Ptr = Ops[0]; 898 if (!Ptr->getType()->isPointerTy()) 899 return nullptr; 900 901 Type *IntIdxTy = DL.getIndexType(Ptr->getType()); 902 903 for (unsigned i = 1, e = Ops.size(); i != e; ++i) 904 if (!isa<ConstantInt>(Ops[i])) 905 return nullptr; 906 907 unsigned BitWidth = DL.getTypeSizeInBits(IntIdxTy); 908 APInt Offset = APInt( 909 BitWidth, 910 DL.getIndexedOffsetInType( 911 SrcElemTy, ArrayRef((Value *const *)Ops.data() + 1, Ops.size() - 1))); 912 Ptr = StripPtrCastKeepAS(Ptr); 913 914 // If this is a GEP of a GEP, fold it all into a single GEP. 915 while (auto *GEP = dyn_cast<GEPOperator>(Ptr)) { 916 InnermostGEP = GEP; 917 InBounds &= GEP->isInBounds(); 918 919 SmallVector<Value *, 4> NestedOps(llvm::drop_begin(GEP->operands())); 920 921 // Do not try the incorporate the sub-GEP if some index is not a number. 922 bool AllConstantInt = true; 923 for (Value *NestedOp : NestedOps) 924 if (!isa<ConstantInt>(NestedOp)) { 925 AllConstantInt = false; 926 break; 927 } 928 if (!AllConstantInt) 929 break; 930 931 Ptr = cast<Constant>(GEP->getOperand(0)); 932 SrcElemTy = GEP->getSourceElementType(); 933 Offset += APInt(BitWidth, DL.getIndexedOffsetInType(SrcElemTy, NestedOps)); 934 Ptr = StripPtrCastKeepAS(Ptr); 935 } 936 937 // If the base value for this address is a literal integer value, fold the 938 // getelementptr to the resulting integer value casted to the pointer type. 939 APInt BasePtr(BitWidth, 0); 940 if (auto *CE = dyn_cast<ConstantExpr>(Ptr)) { 941 if (CE->getOpcode() == Instruction::IntToPtr) { 942 if (auto *Base = dyn_cast<ConstantInt>(CE->getOperand(0))) 943 BasePtr = Base->getValue().zextOrTrunc(BitWidth); 944 } 945 } 946 947 auto *PTy = cast<PointerType>(Ptr->getType()); 948 if ((Ptr->isNullValue() || BasePtr != 0) && 949 !DL.isNonIntegralPointerType(PTy)) { 950 Constant *C = ConstantInt::get(Ptr->getContext(), Offset + BasePtr); 951 return ConstantExpr::getIntToPtr(C, ResTy); 952 } 953 954 // Otherwise form a regular getelementptr. Recompute the indices so that 955 // we eliminate over-indexing of the notional static type array bounds. 956 // This makes it easy to determine if the getelementptr is "inbounds". 957 958 // For GEPs of GlobalValues, use the value type, otherwise use an i8 GEP. 959 if (auto *GV = dyn_cast<GlobalValue>(Ptr)) 960 SrcElemTy = GV->getValueType(); 961 else 962 SrcElemTy = Type::getInt8Ty(Ptr->getContext()); 963 964 if (!SrcElemTy->isSized()) 965 return nullptr; 966 967 Type *ElemTy = SrcElemTy; 968 SmallVector<APInt> Indices = DL.getGEPIndicesForOffset(ElemTy, Offset); 969 if (Offset != 0) 970 return nullptr; 971 972 // Try to add additional zero indices to reach the desired result element 973 // type. 974 // TODO: Should we avoid extra zero indices if ResElemTy can't be reached and 975 // we'll have to insert a bitcast anyway? 976 while (ElemTy != ResElemTy) { 977 Type *NextTy = GetElementPtrInst::getTypeAtIndex(ElemTy, (uint64_t)0); 978 if (!NextTy) 979 break; 980 981 Indices.push_back(APInt::getZero(isa<StructType>(ElemTy) ? 32 : BitWidth)); 982 ElemTy = NextTy; 983 } 984 985 SmallVector<Constant *, 32> NewIdxs; 986 for (const APInt &Index : Indices) 987 NewIdxs.push_back(ConstantInt::get( 988 Type::getIntNTy(Ptr->getContext(), Index.getBitWidth()), Index)); 989 990 // Preserve the inrange index from the innermost GEP if possible. We must 991 // have calculated the same indices up to and including the inrange index. 992 std::optional<unsigned> InRangeIndex; 993 if (std::optional<unsigned> LastIRIndex = InnermostGEP->getInRangeIndex()) 994 if (SrcElemTy == InnermostGEP->getSourceElementType() && 995 NewIdxs.size() > *LastIRIndex) { 996 InRangeIndex = LastIRIndex; 997 for (unsigned I = 0; I <= *LastIRIndex; ++I) 998 if (NewIdxs[I] != InnermostGEP->getOperand(I + 1)) 999 return nullptr; 1000 } 1001 1002 // Create a GEP. 1003 return ConstantExpr::getGetElementPtr(SrcElemTy, Ptr, NewIdxs, InBounds, 1004 InRangeIndex); 1005 } 1006 1007 /// Attempt to constant fold an instruction with the 1008 /// specified opcode and operands. If successful, the constant result is 1009 /// returned, if not, null is returned. Note that this function can fail when 1010 /// attempting to fold instructions like loads and stores, which have no 1011 /// constant expression form. 1012 Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode, 1013 ArrayRef<Constant *> Ops, 1014 const DataLayout &DL, 1015 const TargetLibraryInfo *TLI) { 1016 Type *DestTy = InstOrCE->getType(); 1017 1018 if (Instruction::isUnaryOp(Opcode)) 1019 return ConstantFoldUnaryOpOperand(Opcode, Ops[0], DL); 1020 1021 if (Instruction::isBinaryOp(Opcode)) { 1022 switch (Opcode) { 1023 default: 1024 break; 1025 case Instruction::FAdd: 1026 case Instruction::FSub: 1027 case Instruction::FMul: 1028 case Instruction::FDiv: 1029 case Instruction::FRem: 1030 // Handle floating point instructions separately to account for denormals 1031 // TODO: If a constant expression is being folded rather than an 1032 // instruction, denormals will not be flushed/treated as zero 1033 if (const auto *I = dyn_cast<Instruction>(InstOrCE)) { 1034 return ConstantFoldFPInstOperands(Opcode, Ops[0], Ops[1], DL, I); 1035 } 1036 } 1037 return ConstantFoldBinaryOpOperands(Opcode, Ops[0], Ops[1], DL); 1038 } 1039 1040 if (Instruction::isCast(Opcode)) 1041 return ConstantFoldCastOperand(Opcode, Ops[0], DestTy, DL); 1042 1043 if (auto *GEP = dyn_cast<GEPOperator>(InstOrCE)) { 1044 Type *SrcElemTy = GEP->getSourceElementType(); 1045 if (!ConstantExpr::isSupportedGetElementPtr(SrcElemTy)) 1046 return nullptr; 1047 1048 if (Constant *C = SymbolicallyEvaluateGEP(GEP, Ops, DL, TLI)) 1049 return C; 1050 1051 return ConstantExpr::getGetElementPtr(SrcElemTy, Ops[0], Ops.slice(1), 1052 GEP->isInBounds(), 1053 GEP->getInRangeIndex()); 1054 } 1055 1056 if (auto *CE = dyn_cast<ConstantExpr>(InstOrCE)) { 1057 if (CE->isCompare()) 1058 return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1], 1059 DL, TLI); 1060 return CE->getWithOperands(Ops); 1061 } 1062 1063 switch (Opcode) { 1064 default: return nullptr; 1065 case Instruction::ICmp: 1066 case Instruction::FCmp: { 1067 auto *C = cast<CmpInst>(InstOrCE); 1068 return ConstantFoldCompareInstOperands(C->getPredicate(), Ops[0], Ops[1], 1069 DL, TLI, C); 1070 } 1071 case Instruction::Freeze: 1072 return isGuaranteedNotToBeUndefOrPoison(Ops[0]) ? Ops[0] : nullptr; 1073 case Instruction::Call: 1074 if (auto *F = dyn_cast<Function>(Ops.back())) { 1075 const auto *Call = cast<CallBase>(InstOrCE); 1076 if (canConstantFoldCallTo(Call, F)) 1077 return ConstantFoldCall(Call, F, Ops.slice(0, Ops.size() - 1), TLI); 1078 } 1079 return nullptr; 1080 case Instruction::Select: 1081 return ConstantFoldSelectInstruction(Ops[0], Ops[1], Ops[2]); 1082 case Instruction::ExtractElement: 1083 return ConstantExpr::getExtractElement(Ops[0], Ops[1]); 1084 case Instruction::ExtractValue: 1085 return ConstantFoldExtractValueInstruction( 1086 Ops[0], cast<ExtractValueInst>(InstOrCE)->getIndices()); 1087 case Instruction::InsertElement: 1088 return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]); 1089 case Instruction::InsertValue: 1090 return ConstantFoldInsertValueInstruction( 1091 Ops[0], Ops[1], cast<InsertValueInst>(InstOrCE)->getIndices()); 1092 case Instruction::ShuffleVector: 1093 return ConstantExpr::getShuffleVector( 1094 Ops[0], Ops[1], cast<ShuffleVectorInst>(InstOrCE)->getShuffleMask()); 1095 case Instruction::Load: { 1096 const auto *LI = dyn_cast<LoadInst>(InstOrCE); 1097 if (LI->isVolatile()) 1098 return nullptr; 1099 return ConstantFoldLoadFromConstPtr(Ops[0], LI->getType(), DL); 1100 } 1101 } 1102 } 1103 1104 } // end anonymous namespace 1105 1106 //===----------------------------------------------------------------------===// 1107 // Constant Folding public APIs 1108 //===----------------------------------------------------------------------===// 1109 1110 namespace { 1111 1112 Constant * 1113 ConstantFoldConstantImpl(const Constant *C, const DataLayout &DL, 1114 const TargetLibraryInfo *TLI, 1115 SmallDenseMap<Constant *, Constant *> &FoldedOps) { 1116 if (!isa<ConstantVector>(C) && !isa<ConstantExpr>(C)) 1117 return const_cast<Constant *>(C); 1118 1119 SmallVector<Constant *, 8> Ops; 1120 for (const Use &OldU : C->operands()) { 1121 Constant *OldC = cast<Constant>(&OldU); 1122 Constant *NewC = OldC; 1123 // Recursively fold the ConstantExpr's operands. If we have already folded 1124 // a ConstantExpr, we don't have to process it again. 1125 if (isa<ConstantVector>(OldC) || isa<ConstantExpr>(OldC)) { 1126 auto It = FoldedOps.find(OldC); 1127 if (It == FoldedOps.end()) { 1128 NewC = ConstantFoldConstantImpl(OldC, DL, TLI, FoldedOps); 1129 FoldedOps.insert({OldC, NewC}); 1130 } else { 1131 NewC = It->second; 1132 } 1133 } 1134 Ops.push_back(NewC); 1135 } 1136 1137 if (auto *CE = dyn_cast<ConstantExpr>(C)) { 1138 if (Constant *Res = 1139 ConstantFoldInstOperandsImpl(CE, CE->getOpcode(), Ops, DL, TLI)) 1140 return Res; 1141 return const_cast<Constant *>(C); 1142 } 1143 1144 assert(isa<ConstantVector>(C)); 1145 return ConstantVector::get(Ops); 1146 } 1147 1148 } // end anonymous namespace 1149 1150 Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL, 1151 const TargetLibraryInfo *TLI) { 1152 // Handle PHI nodes quickly here... 1153 if (auto *PN = dyn_cast<PHINode>(I)) { 1154 Constant *CommonValue = nullptr; 1155 1156 SmallDenseMap<Constant *, Constant *> FoldedOps; 1157 for (Value *Incoming : PN->incoming_values()) { 1158 // If the incoming value is undef then skip it. Note that while we could 1159 // skip the value if it is equal to the phi node itself we choose not to 1160 // because that would break the rule that constant folding only applies if 1161 // all operands are constants. 1162 if (isa<UndefValue>(Incoming)) 1163 continue; 1164 // If the incoming value is not a constant, then give up. 1165 auto *C = dyn_cast<Constant>(Incoming); 1166 if (!C) 1167 return nullptr; 1168 // Fold the PHI's operands. 1169 C = ConstantFoldConstantImpl(C, DL, TLI, FoldedOps); 1170 // If the incoming value is a different constant to 1171 // the one we saw previously, then give up. 1172 if (CommonValue && C != CommonValue) 1173 return nullptr; 1174 CommonValue = C; 1175 } 1176 1177 // If we reach here, all incoming values are the same constant or undef. 1178 return CommonValue ? CommonValue : UndefValue::get(PN->getType()); 1179 } 1180 1181 // Scan the operand list, checking to see if they are all constants, if so, 1182 // hand off to ConstantFoldInstOperandsImpl. 1183 if (!all_of(I->operands(), [](Use &U) { return isa<Constant>(U); })) 1184 return nullptr; 1185 1186 SmallDenseMap<Constant *, Constant *> FoldedOps; 1187 SmallVector<Constant *, 8> Ops; 1188 for (const Use &OpU : I->operands()) { 1189 auto *Op = cast<Constant>(&OpU); 1190 // Fold the Instruction's operands. 1191 Op = ConstantFoldConstantImpl(Op, DL, TLI, FoldedOps); 1192 Ops.push_back(Op); 1193 } 1194 1195 return ConstantFoldInstOperands(I, Ops, DL, TLI); 1196 } 1197 1198 Constant *llvm::ConstantFoldConstant(const Constant *C, const DataLayout &DL, 1199 const TargetLibraryInfo *TLI) { 1200 SmallDenseMap<Constant *, Constant *> FoldedOps; 1201 return ConstantFoldConstantImpl(C, DL, TLI, FoldedOps); 1202 } 1203 1204 Constant *llvm::ConstantFoldInstOperands(Instruction *I, 1205 ArrayRef<Constant *> Ops, 1206 const DataLayout &DL, 1207 const TargetLibraryInfo *TLI) { 1208 return ConstantFoldInstOperandsImpl(I, I->getOpcode(), Ops, DL, TLI); 1209 } 1210 1211 Constant *llvm::ConstantFoldCompareInstOperands( 1212 unsigned IntPredicate, Constant *Ops0, Constant *Ops1, const DataLayout &DL, 1213 const TargetLibraryInfo *TLI, const Instruction *I) { 1214 CmpInst::Predicate Predicate = (CmpInst::Predicate)IntPredicate; 1215 // fold: icmp (inttoptr x), null -> icmp x, 0 1216 // fold: icmp null, (inttoptr x) -> icmp 0, x 1217 // fold: icmp (ptrtoint x), 0 -> icmp x, null 1218 // fold: icmp 0, (ptrtoint x) -> icmp null, x 1219 // fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y 1220 // fold: icmp (ptrtoint x), (ptrtoint y) -> icmp x, y 1221 // 1222 // FIXME: The following comment is out of data and the DataLayout is here now. 1223 // ConstantExpr::getCompare cannot do this, because it doesn't have DL 1224 // around to know if bit truncation is happening. 1225 if (auto *CE0 = dyn_cast<ConstantExpr>(Ops0)) { 1226 if (Ops1->isNullValue()) { 1227 if (CE0->getOpcode() == Instruction::IntToPtr) { 1228 Type *IntPtrTy = DL.getIntPtrType(CE0->getType()); 1229 // Convert the integer value to the right size to ensure we get the 1230 // proper extension or truncation. 1231 Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0), 1232 IntPtrTy, false); 1233 Constant *Null = Constant::getNullValue(C->getType()); 1234 return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI); 1235 } 1236 1237 // Only do this transformation if the int is intptrty in size, otherwise 1238 // there is a truncation or extension that we aren't modeling. 1239 if (CE0->getOpcode() == Instruction::PtrToInt) { 1240 Type *IntPtrTy = DL.getIntPtrType(CE0->getOperand(0)->getType()); 1241 if (CE0->getType() == IntPtrTy) { 1242 Constant *C = CE0->getOperand(0); 1243 Constant *Null = Constant::getNullValue(C->getType()); 1244 return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI); 1245 } 1246 } 1247 } 1248 1249 if (auto *CE1 = dyn_cast<ConstantExpr>(Ops1)) { 1250 if (CE0->getOpcode() == CE1->getOpcode()) { 1251 if (CE0->getOpcode() == Instruction::IntToPtr) { 1252 Type *IntPtrTy = DL.getIntPtrType(CE0->getType()); 1253 1254 // Convert the integer value to the right size to ensure we get the 1255 // proper extension or truncation. 1256 Constant *C0 = ConstantExpr::getIntegerCast(CE0->getOperand(0), 1257 IntPtrTy, false); 1258 Constant *C1 = ConstantExpr::getIntegerCast(CE1->getOperand(0), 1259 IntPtrTy, false); 1260 return ConstantFoldCompareInstOperands(Predicate, C0, C1, DL, TLI); 1261 } 1262 1263 // Only do this transformation if the int is intptrty in size, otherwise 1264 // there is a truncation or extension that we aren't modeling. 1265 if (CE0->getOpcode() == Instruction::PtrToInt) { 1266 Type *IntPtrTy = DL.getIntPtrType(CE0->getOperand(0)->getType()); 1267 if (CE0->getType() == IntPtrTy && 1268 CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()) { 1269 return ConstantFoldCompareInstOperands( 1270 Predicate, CE0->getOperand(0), CE1->getOperand(0), DL, TLI); 1271 } 1272 } 1273 } 1274 } 1275 1276 // icmp eq (or x, y), 0 -> (icmp eq x, 0) & (icmp eq y, 0) 1277 // icmp ne (or x, y), 0 -> (icmp ne x, 0) | (icmp ne y, 0) 1278 if ((Predicate == ICmpInst::ICMP_EQ || Predicate == ICmpInst::ICMP_NE) && 1279 CE0->getOpcode() == Instruction::Or && Ops1->isNullValue()) { 1280 Constant *LHS = ConstantFoldCompareInstOperands( 1281 Predicate, CE0->getOperand(0), Ops1, DL, TLI); 1282 Constant *RHS = ConstantFoldCompareInstOperands( 1283 Predicate, CE0->getOperand(1), Ops1, DL, TLI); 1284 unsigned OpC = 1285 Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or; 1286 return ConstantFoldBinaryOpOperands(OpC, LHS, RHS, DL); 1287 } 1288 1289 // Convert pointer comparison (base+offset1) pred (base+offset2) into 1290 // offset1 pred offset2, for the case where the offset is inbounds. This 1291 // only works for equality and unsigned comparison, as inbounds permits 1292 // crossing the sign boundary. However, the offset comparison itself is 1293 // signed. 1294 if (Ops0->getType()->isPointerTy() && !ICmpInst::isSigned(Predicate)) { 1295 unsigned IndexWidth = DL.getIndexTypeSizeInBits(Ops0->getType()); 1296 APInt Offset0(IndexWidth, 0); 1297 Value *Stripped0 = 1298 Ops0->stripAndAccumulateInBoundsConstantOffsets(DL, Offset0); 1299 APInt Offset1(IndexWidth, 0); 1300 Value *Stripped1 = 1301 Ops1->stripAndAccumulateInBoundsConstantOffsets(DL, Offset1); 1302 if (Stripped0 == Stripped1) 1303 return ConstantExpr::getCompare( 1304 ICmpInst::getSignedPredicate(Predicate), 1305 ConstantInt::get(CE0->getContext(), Offset0), 1306 ConstantInt::get(CE0->getContext(), Offset1)); 1307 } 1308 } else if (isa<ConstantExpr>(Ops1)) { 1309 // If RHS is a constant expression, but the left side isn't, swap the 1310 // operands and try again. 1311 Predicate = ICmpInst::getSwappedPredicate(Predicate); 1312 return ConstantFoldCompareInstOperands(Predicate, Ops1, Ops0, DL, TLI); 1313 } 1314 1315 // Flush any denormal constant float input according to denormal handling 1316 // mode. 1317 Ops0 = FlushFPConstant(Ops0, I, /* IsOutput */ false); 1318 if (!Ops0) 1319 return nullptr; 1320 Ops1 = FlushFPConstant(Ops1, I, /* IsOutput */ false); 1321 if (!Ops1) 1322 return nullptr; 1323 1324 return ConstantExpr::getCompare(Predicate, Ops0, Ops1); 1325 } 1326 1327 Constant *llvm::ConstantFoldUnaryOpOperand(unsigned Opcode, Constant *Op, 1328 const DataLayout &DL) { 1329 assert(Instruction::isUnaryOp(Opcode)); 1330 1331 return ConstantFoldUnaryInstruction(Opcode, Op); 1332 } 1333 1334 Constant *llvm::ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, 1335 Constant *RHS, 1336 const DataLayout &DL) { 1337 assert(Instruction::isBinaryOp(Opcode)); 1338 if (isa<ConstantExpr>(LHS) || isa<ConstantExpr>(RHS)) 1339 if (Constant *C = SymbolicallyEvaluateBinop(Opcode, LHS, RHS, DL)) 1340 return C; 1341 1342 if (ConstantExpr::isDesirableBinOp(Opcode)) 1343 return ConstantExpr::get(Opcode, LHS, RHS); 1344 return ConstantFoldBinaryInstruction(Opcode, LHS, RHS); 1345 } 1346 1347 Constant *llvm::FlushFPConstant(Constant *Operand, const Instruction *I, 1348 bool IsOutput) { 1349 if (!I || !I->getParent() || !I->getFunction()) 1350 return Operand; 1351 1352 ConstantFP *CFP = dyn_cast<ConstantFP>(Operand); 1353 if (!CFP) 1354 return Operand; 1355 1356 const APFloat &APF = CFP->getValueAPF(); 1357 // TODO: Should this canonicalize nans? 1358 if (!APF.isDenormal()) 1359 return Operand; 1360 1361 Type *Ty = CFP->getType(); 1362 DenormalMode DenormMode = 1363 I->getFunction()->getDenormalMode(Ty->getFltSemantics()); 1364 DenormalMode::DenormalModeKind Mode = 1365 IsOutput ? DenormMode.Output : DenormMode.Input; 1366 switch (Mode) { 1367 default: 1368 llvm_unreachable("unknown denormal mode"); 1369 case DenormalMode::Dynamic: 1370 return nullptr; 1371 case DenormalMode::IEEE: 1372 return Operand; 1373 case DenormalMode::PreserveSign: 1374 if (APF.isDenormal()) { 1375 return ConstantFP::get( 1376 Ty->getContext(), 1377 APFloat::getZero(Ty->getFltSemantics(), APF.isNegative())); 1378 } 1379 return Operand; 1380 case DenormalMode::PositiveZero: 1381 if (APF.isDenormal()) { 1382 return ConstantFP::get(Ty->getContext(), 1383 APFloat::getZero(Ty->getFltSemantics(), false)); 1384 } 1385 return Operand; 1386 } 1387 return Operand; 1388 } 1389 1390 Constant *llvm::ConstantFoldFPInstOperands(unsigned Opcode, Constant *LHS, 1391 Constant *RHS, const DataLayout &DL, 1392 const Instruction *I) { 1393 if (Instruction::isBinaryOp(Opcode)) { 1394 // Flush denormal inputs if needed. 1395 Constant *Op0 = FlushFPConstant(LHS, I, /* IsOutput */ false); 1396 if (!Op0) 1397 return nullptr; 1398 Constant *Op1 = FlushFPConstant(RHS, I, /* IsOutput */ false); 1399 if (!Op1) 1400 return nullptr; 1401 1402 // Calculate constant result. 1403 Constant *C = ConstantFoldBinaryOpOperands(Opcode, Op0, Op1, DL); 1404 if (!C) 1405 return nullptr; 1406 1407 // Flush denormal output if needed. 1408 return FlushFPConstant(C, I, /* IsOutput */ true); 1409 } 1410 // If instruction lacks a parent/function and the denormal mode cannot be 1411 // determined, use the default (IEEE). 1412 return ConstantFoldBinaryOpOperands(Opcode, LHS, RHS, DL); 1413 } 1414 1415 Constant *llvm::ConstantFoldCastOperand(unsigned Opcode, Constant *C, 1416 Type *DestTy, const DataLayout &DL) { 1417 assert(Instruction::isCast(Opcode)); 1418 switch (Opcode) { 1419 default: 1420 llvm_unreachable("Missing case"); 1421 case Instruction::PtrToInt: 1422 if (auto *CE = dyn_cast<ConstantExpr>(C)) { 1423 Constant *FoldedValue = nullptr; 1424 // If the input is a inttoptr, eliminate the pair. This requires knowing 1425 // the width of a pointer, so it can't be done in ConstantExpr::getCast. 1426 if (CE->getOpcode() == Instruction::IntToPtr) { 1427 // zext/trunc the inttoptr to pointer size. 1428 FoldedValue = ConstantExpr::getIntegerCast( 1429 CE->getOperand(0), DL.getIntPtrType(CE->getType()), 1430 /*IsSigned=*/false); 1431 } else if (auto *GEP = dyn_cast<GEPOperator>(CE)) { 1432 // If we have GEP, we can perform the following folds: 1433 // (ptrtoint (gep null, x)) -> x 1434 // (ptrtoint (gep (gep null, x), y) -> x + y, etc. 1435 unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType()); 1436 APInt BaseOffset(BitWidth, 0); 1437 auto *Base = cast<Constant>(GEP->stripAndAccumulateConstantOffsets( 1438 DL, BaseOffset, /*AllowNonInbounds=*/true)); 1439 if (Base->isNullValue()) { 1440 FoldedValue = ConstantInt::get(CE->getContext(), BaseOffset); 1441 } else { 1442 // ptrtoint (gep i8, Ptr, (sub 0, V)) -> sub (ptrtoint Ptr), V 1443 if (GEP->getNumIndices() == 1 && 1444 GEP->getSourceElementType()->isIntegerTy(8)) { 1445 auto *Ptr = cast<Constant>(GEP->getPointerOperand()); 1446 auto *Sub = dyn_cast<ConstantExpr>(GEP->getOperand(1)); 1447 Type *IntIdxTy = DL.getIndexType(Ptr->getType()); 1448 if (Sub && Sub->getType() == IntIdxTy && 1449 Sub->getOpcode() == Instruction::Sub && 1450 Sub->getOperand(0)->isNullValue()) 1451 FoldedValue = ConstantExpr::getSub( 1452 ConstantExpr::getPtrToInt(Ptr, IntIdxTy), Sub->getOperand(1)); 1453 } 1454 } 1455 } 1456 if (FoldedValue) { 1457 // Do a zext or trunc to get to the ptrtoint dest size. 1458 return ConstantExpr::getIntegerCast(FoldedValue, DestTy, 1459 /*IsSigned=*/false); 1460 } 1461 } 1462 return ConstantExpr::getCast(Opcode, C, DestTy); 1463 case Instruction::IntToPtr: 1464 // If the input is a ptrtoint, turn the pair into a ptr to ptr bitcast if 1465 // the int size is >= the ptr size and the address spaces are the same. 1466 // This requires knowing the width of a pointer, so it can't be done in 1467 // ConstantExpr::getCast. 1468 if (auto *CE = dyn_cast<ConstantExpr>(C)) { 1469 if (CE->getOpcode() == Instruction::PtrToInt) { 1470 Constant *SrcPtr = CE->getOperand(0); 1471 unsigned SrcPtrSize = DL.getPointerTypeSizeInBits(SrcPtr->getType()); 1472 unsigned MidIntSize = CE->getType()->getScalarSizeInBits(); 1473 1474 if (MidIntSize >= SrcPtrSize) { 1475 unsigned SrcAS = SrcPtr->getType()->getPointerAddressSpace(); 1476 if (SrcAS == DestTy->getPointerAddressSpace()) 1477 return FoldBitCast(CE->getOperand(0), DestTy, DL); 1478 } 1479 } 1480 } 1481 1482 return ConstantExpr::getCast(Opcode, C, DestTy); 1483 case Instruction::Trunc: 1484 case Instruction::ZExt: 1485 case Instruction::SExt: 1486 case Instruction::FPTrunc: 1487 case Instruction::FPExt: 1488 case Instruction::UIToFP: 1489 case Instruction::SIToFP: 1490 case Instruction::FPToUI: 1491 case Instruction::FPToSI: 1492 case Instruction::AddrSpaceCast: 1493 return ConstantExpr::getCast(Opcode, C, DestTy); 1494 case Instruction::BitCast: 1495 return FoldBitCast(C, DestTy, DL); 1496 } 1497 } 1498 1499 //===----------------------------------------------------------------------===// 1500 // Constant Folding for Calls 1501 // 1502 1503 bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { 1504 if (Call->isNoBuiltin()) 1505 return false; 1506 if (Call->getFunctionType() != F->getFunctionType()) 1507 return false; 1508 switch (F->getIntrinsicID()) { 1509 // Operations that do not operate floating-point numbers and do not depend on 1510 // FP environment can be folded even in strictfp functions. 1511 case Intrinsic::bswap: 1512 case Intrinsic::ctpop: 1513 case Intrinsic::ctlz: 1514 case Intrinsic::cttz: 1515 case Intrinsic::fshl: 1516 case Intrinsic::fshr: 1517 case Intrinsic::launder_invariant_group: 1518 case Intrinsic::strip_invariant_group: 1519 case Intrinsic::masked_load: 1520 case Intrinsic::get_active_lane_mask: 1521 case Intrinsic::abs: 1522 case Intrinsic::smax: 1523 case Intrinsic::smin: 1524 case Intrinsic::umax: 1525 case Intrinsic::umin: 1526 case Intrinsic::sadd_with_overflow: 1527 case Intrinsic::uadd_with_overflow: 1528 case Intrinsic::ssub_with_overflow: 1529 case Intrinsic::usub_with_overflow: 1530 case Intrinsic::smul_with_overflow: 1531 case Intrinsic::umul_with_overflow: 1532 case Intrinsic::sadd_sat: 1533 case Intrinsic::uadd_sat: 1534 case Intrinsic::ssub_sat: 1535 case Intrinsic::usub_sat: 1536 case Intrinsic::smul_fix: 1537 case Intrinsic::smul_fix_sat: 1538 case Intrinsic::bitreverse: 1539 case Intrinsic::is_constant: 1540 case Intrinsic::vector_reduce_add: 1541 case Intrinsic::vector_reduce_mul: 1542 case Intrinsic::vector_reduce_and: 1543 case Intrinsic::vector_reduce_or: 1544 case Intrinsic::vector_reduce_xor: 1545 case Intrinsic::vector_reduce_smin: 1546 case Intrinsic::vector_reduce_smax: 1547 case Intrinsic::vector_reduce_umin: 1548 case Intrinsic::vector_reduce_umax: 1549 // Target intrinsics 1550 case Intrinsic::amdgcn_perm: 1551 case Intrinsic::arm_mve_vctp8: 1552 case Intrinsic::arm_mve_vctp16: 1553 case Intrinsic::arm_mve_vctp32: 1554 case Intrinsic::arm_mve_vctp64: 1555 case Intrinsic::aarch64_sve_convert_from_svbool: 1556 // WebAssembly float semantics are always known 1557 case Intrinsic::wasm_trunc_signed: 1558 case Intrinsic::wasm_trunc_unsigned: 1559 return true; 1560 1561 // Floating point operations cannot be folded in strictfp functions in 1562 // general case. They can be folded if FP environment is known to compiler. 1563 case Intrinsic::minnum: 1564 case Intrinsic::maxnum: 1565 case Intrinsic::minimum: 1566 case Intrinsic::maximum: 1567 case Intrinsic::log: 1568 case Intrinsic::log2: 1569 case Intrinsic::log10: 1570 case Intrinsic::exp: 1571 case Intrinsic::exp2: 1572 case Intrinsic::sqrt: 1573 case Intrinsic::sin: 1574 case Intrinsic::cos: 1575 case Intrinsic::pow: 1576 case Intrinsic::powi: 1577 case Intrinsic::fma: 1578 case Intrinsic::fmuladd: 1579 case Intrinsic::frexp: 1580 case Intrinsic::fptoui_sat: 1581 case Intrinsic::fptosi_sat: 1582 case Intrinsic::convert_from_fp16: 1583 case Intrinsic::convert_to_fp16: 1584 case Intrinsic::amdgcn_cos: 1585 case Intrinsic::amdgcn_cubeid: 1586 case Intrinsic::amdgcn_cubema: 1587 case Intrinsic::amdgcn_cubesc: 1588 case Intrinsic::amdgcn_cubetc: 1589 case Intrinsic::amdgcn_fmul_legacy: 1590 case Intrinsic::amdgcn_fma_legacy: 1591 case Intrinsic::amdgcn_fract: 1592 case Intrinsic::amdgcn_ldexp: 1593 case Intrinsic::amdgcn_sin: 1594 // The intrinsics below depend on rounding mode in MXCSR. 1595 case Intrinsic::x86_sse_cvtss2si: 1596 case Intrinsic::x86_sse_cvtss2si64: 1597 case Intrinsic::x86_sse_cvttss2si: 1598 case Intrinsic::x86_sse_cvttss2si64: 1599 case Intrinsic::x86_sse2_cvtsd2si: 1600 case Intrinsic::x86_sse2_cvtsd2si64: 1601 case Intrinsic::x86_sse2_cvttsd2si: 1602 case Intrinsic::x86_sse2_cvttsd2si64: 1603 case Intrinsic::x86_avx512_vcvtss2si32: 1604 case Intrinsic::x86_avx512_vcvtss2si64: 1605 case Intrinsic::x86_avx512_cvttss2si: 1606 case Intrinsic::x86_avx512_cvttss2si64: 1607 case Intrinsic::x86_avx512_vcvtsd2si32: 1608 case Intrinsic::x86_avx512_vcvtsd2si64: 1609 case Intrinsic::x86_avx512_cvttsd2si: 1610 case Intrinsic::x86_avx512_cvttsd2si64: 1611 case Intrinsic::x86_avx512_vcvtss2usi32: 1612 case Intrinsic::x86_avx512_vcvtss2usi64: 1613 case Intrinsic::x86_avx512_cvttss2usi: 1614 case Intrinsic::x86_avx512_cvttss2usi64: 1615 case Intrinsic::x86_avx512_vcvtsd2usi32: 1616 case Intrinsic::x86_avx512_vcvtsd2usi64: 1617 case Intrinsic::x86_avx512_cvttsd2usi: 1618 case Intrinsic::x86_avx512_cvttsd2usi64: 1619 return !Call->isStrictFP(); 1620 1621 // Sign operations are actually bitwise operations, they do not raise 1622 // exceptions even for SNANs. 1623 case Intrinsic::fabs: 1624 case Intrinsic::copysign: 1625 case Intrinsic::is_fpclass: 1626 // Non-constrained variants of rounding operations means default FP 1627 // environment, they can be folded in any case. 1628 case Intrinsic::ceil: 1629 case Intrinsic::floor: 1630 case Intrinsic::round: 1631 case Intrinsic::roundeven: 1632 case Intrinsic::trunc: 1633 case Intrinsic::nearbyint: 1634 case Intrinsic::rint: 1635 case Intrinsic::canonicalize: 1636 // Constrained intrinsics can be folded if FP environment is known 1637 // to compiler. 1638 case Intrinsic::experimental_constrained_fma: 1639 case Intrinsic::experimental_constrained_fmuladd: 1640 case Intrinsic::experimental_constrained_fadd: 1641 case Intrinsic::experimental_constrained_fsub: 1642 case Intrinsic::experimental_constrained_fmul: 1643 case Intrinsic::experimental_constrained_fdiv: 1644 case Intrinsic::experimental_constrained_frem: 1645 case Intrinsic::experimental_constrained_ceil: 1646 case Intrinsic::experimental_constrained_floor: 1647 case Intrinsic::experimental_constrained_round: 1648 case Intrinsic::experimental_constrained_roundeven: 1649 case Intrinsic::experimental_constrained_trunc: 1650 case Intrinsic::experimental_constrained_nearbyint: 1651 case Intrinsic::experimental_constrained_rint: 1652 case Intrinsic::experimental_constrained_fcmp: 1653 case Intrinsic::experimental_constrained_fcmps: 1654 return true; 1655 default: 1656 return false; 1657 case Intrinsic::not_intrinsic: break; 1658 } 1659 1660 if (!F->hasName() || Call->isStrictFP()) 1661 return false; 1662 1663 // In these cases, the check of the length is required. We don't want to 1664 // return true for a name like "cos\0blah" which strcmp would return equal to 1665 // "cos", but has length 8. 1666 StringRef Name = F->getName(); 1667 switch (Name[0]) { 1668 default: 1669 return false; 1670 case 'a': 1671 return Name == "acos" || Name == "acosf" || 1672 Name == "asin" || Name == "asinf" || 1673 Name == "atan" || Name == "atanf" || 1674 Name == "atan2" || Name == "atan2f"; 1675 case 'c': 1676 return Name == "ceil" || Name == "ceilf" || 1677 Name == "cos" || Name == "cosf" || 1678 Name == "cosh" || Name == "coshf"; 1679 case 'e': 1680 return Name == "exp" || Name == "expf" || 1681 Name == "exp2" || Name == "exp2f"; 1682 case 'f': 1683 return Name == "fabs" || Name == "fabsf" || 1684 Name == "floor" || Name == "floorf" || 1685 Name == "fmod" || Name == "fmodf"; 1686 case 'l': 1687 return Name == "log" || Name == "logf" || 1688 Name == "log2" || Name == "log2f" || 1689 Name == "log10" || Name == "log10f"; 1690 case 'n': 1691 return Name == "nearbyint" || Name == "nearbyintf"; 1692 case 'p': 1693 return Name == "pow" || Name == "powf"; 1694 case 'r': 1695 return Name == "remainder" || Name == "remainderf" || 1696 Name == "rint" || Name == "rintf" || 1697 Name == "round" || Name == "roundf"; 1698 case 's': 1699 return Name == "sin" || Name == "sinf" || 1700 Name == "sinh" || Name == "sinhf" || 1701 Name == "sqrt" || Name == "sqrtf"; 1702 case 't': 1703 return Name == "tan" || Name == "tanf" || 1704 Name == "tanh" || Name == "tanhf" || 1705 Name == "trunc" || Name == "truncf"; 1706 case '_': 1707 // Check for various function names that get used for the math functions 1708 // when the header files are preprocessed with the macro 1709 // __FINITE_MATH_ONLY__ enabled. 1710 // The '12' here is the length of the shortest name that can match. 1711 // We need to check the size before looking at Name[1] and Name[2] 1712 // so we may as well check a limit that will eliminate mismatches. 1713 if (Name.size() < 12 || Name[1] != '_') 1714 return false; 1715 switch (Name[2]) { 1716 default: 1717 return false; 1718 case 'a': 1719 return Name == "__acos_finite" || Name == "__acosf_finite" || 1720 Name == "__asin_finite" || Name == "__asinf_finite" || 1721 Name == "__atan2_finite" || Name == "__atan2f_finite"; 1722 case 'c': 1723 return Name == "__cosh_finite" || Name == "__coshf_finite"; 1724 case 'e': 1725 return Name == "__exp_finite" || Name == "__expf_finite" || 1726 Name == "__exp2_finite" || Name == "__exp2f_finite"; 1727 case 'l': 1728 return Name == "__log_finite" || Name == "__logf_finite" || 1729 Name == "__log10_finite" || Name == "__log10f_finite"; 1730 case 'p': 1731 return Name == "__pow_finite" || Name == "__powf_finite"; 1732 case 's': 1733 return Name == "__sinh_finite" || Name == "__sinhf_finite"; 1734 } 1735 } 1736 } 1737 1738 namespace { 1739 1740 Constant *GetConstantFoldFPValue(double V, Type *Ty) { 1741 if (Ty->isHalfTy() || Ty->isFloatTy()) { 1742 APFloat APF(V); 1743 bool unused; 1744 APF.convert(Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &unused); 1745 return ConstantFP::get(Ty->getContext(), APF); 1746 } 1747 if (Ty->isDoubleTy()) 1748 return ConstantFP::get(Ty->getContext(), APFloat(V)); 1749 llvm_unreachable("Can only constant fold half/float/double"); 1750 } 1751 1752 /// Clear the floating-point exception state. 1753 inline void llvm_fenv_clearexcept() { 1754 #if defined(HAVE_FENV_H) && HAVE_DECL_FE_ALL_EXCEPT 1755 feclearexcept(FE_ALL_EXCEPT); 1756 #endif 1757 errno = 0; 1758 } 1759 1760 /// Test if a floating-point exception was raised. 1761 inline bool llvm_fenv_testexcept() { 1762 int errno_val = errno; 1763 if (errno_val == ERANGE || errno_val == EDOM) 1764 return true; 1765 #if defined(HAVE_FENV_H) && HAVE_DECL_FE_ALL_EXCEPT && HAVE_DECL_FE_INEXACT 1766 if (fetestexcept(FE_ALL_EXCEPT & ~FE_INEXACT)) 1767 return true; 1768 #endif 1769 return false; 1770 } 1771 1772 Constant *ConstantFoldFP(double (*NativeFP)(double), const APFloat &V, 1773 Type *Ty) { 1774 llvm_fenv_clearexcept(); 1775 double Result = NativeFP(V.convertToDouble()); 1776 if (llvm_fenv_testexcept()) { 1777 llvm_fenv_clearexcept(); 1778 return nullptr; 1779 } 1780 1781 return GetConstantFoldFPValue(Result, Ty); 1782 } 1783 1784 Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), 1785 const APFloat &V, const APFloat &W, Type *Ty) { 1786 llvm_fenv_clearexcept(); 1787 double Result = NativeFP(V.convertToDouble(), W.convertToDouble()); 1788 if (llvm_fenv_testexcept()) { 1789 llvm_fenv_clearexcept(); 1790 return nullptr; 1791 } 1792 1793 return GetConstantFoldFPValue(Result, Ty); 1794 } 1795 1796 Constant *constantFoldVectorReduce(Intrinsic::ID IID, Constant *Op) { 1797 FixedVectorType *VT = dyn_cast<FixedVectorType>(Op->getType()); 1798 if (!VT) 1799 return nullptr; 1800 1801 // This isn't strictly necessary, but handle the special/common case of zero: 1802 // all integer reductions of a zero input produce zero. 1803 if (isa<ConstantAggregateZero>(Op)) 1804 return ConstantInt::get(VT->getElementType(), 0); 1805 1806 // This is the same as the underlying binops - poison propagates. 1807 if (isa<PoisonValue>(Op) || Op->containsPoisonElement()) 1808 return PoisonValue::get(VT->getElementType()); 1809 1810 // TODO: Handle undef. 1811 if (!isa<ConstantVector>(Op) && !isa<ConstantDataVector>(Op)) 1812 return nullptr; 1813 1814 auto *EltC = dyn_cast<ConstantInt>(Op->getAggregateElement(0U)); 1815 if (!EltC) 1816 return nullptr; 1817 1818 APInt Acc = EltC->getValue(); 1819 for (unsigned I = 1, E = VT->getNumElements(); I != E; I++) { 1820 if (!(EltC = dyn_cast<ConstantInt>(Op->getAggregateElement(I)))) 1821 return nullptr; 1822 const APInt &X = EltC->getValue(); 1823 switch (IID) { 1824 case Intrinsic::vector_reduce_add: 1825 Acc = Acc + X; 1826 break; 1827 case Intrinsic::vector_reduce_mul: 1828 Acc = Acc * X; 1829 break; 1830 case Intrinsic::vector_reduce_and: 1831 Acc = Acc & X; 1832 break; 1833 case Intrinsic::vector_reduce_or: 1834 Acc = Acc | X; 1835 break; 1836 case Intrinsic::vector_reduce_xor: 1837 Acc = Acc ^ X; 1838 break; 1839 case Intrinsic::vector_reduce_smin: 1840 Acc = APIntOps::smin(Acc, X); 1841 break; 1842 case Intrinsic::vector_reduce_smax: 1843 Acc = APIntOps::smax(Acc, X); 1844 break; 1845 case Intrinsic::vector_reduce_umin: 1846 Acc = APIntOps::umin(Acc, X); 1847 break; 1848 case Intrinsic::vector_reduce_umax: 1849 Acc = APIntOps::umax(Acc, X); 1850 break; 1851 } 1852 } 1853 1854 return ConstantInt::get(Op->getContext(), Acc); 1855 } 1856 1857 /// Attempt to fold an SSE floating point to integer conversion of a constant 1858 /// floating point. If roundTowardZero is false, the default IEEE rounding is 1859 /// used (toward nearest, ties to even). This matches the behavior of the 1860 /// non-truncating SSE instructions in the default rounding mode. The desired 1861 /// integer type Ty is used to select how many bits are available for the 1862 /// result. Returns null if the conversion cannot be performed, otherwise 1863 /// returns the Constant value resulting from the conversion. 1864 Constant *ConstantFoldSSEConvertToInt(const APFloat &Val, bool roundTowardZero, 1865 Type *Ty, bool IsSigned) { 1866 // All of these conversion intrinsics form an integer of at most 64bits. 1867 unsigned ResultWidth = Ty->getIntegerBitWidth(); 1868 assert(ResultWidth <= 64 && 1869 "Can only constant fold conversions to 64 and 32 bit ints"); 1870 1871 uint64_t UIntVal; 1872 bool isExact = false; 1873 APFloat::roundingMode mode = roundTowardZero? APFloat::rmTowardZero 1874 : APFloat::rmNearestTiesToEven; 1875 APFloat::opStatus status = 1876 Val.convertToInteger(MutableArrayRef(UIntVal), ResultWidth, 1877 IsSigned, mode, &isExact); 1878 if (status != APFloat::opOK && 1879 (!roundTowardZero || status != APFloat::opInexact)) 1880 return nullptr; 1881 return ConstantInt::get(Ty, UIntVal, IsSigned); 1882 } 1883 1884 double getValueAsDouble(ConstantFP *Op) { 1885 Type *Ty = Op->getType(); 1886 1887 if (Ty->isBFloatTy() || Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy()) 1888 return Op->getValueAPF().convertToDouble(); 1889 1890 bool unused; 1891 APFloat APF = Op->getValueAPF(); 1892 APF.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &unused); 1893 return APF.convertToDouble(); 1894 } 1895 1896 static bool getConstIntOrUndef(Value *Op, const APInt *&C) { 1897 if (auto *CI = dyn_cast<ConstantInt>(Op)) { 1898 C = &CI->getValue(); 1899 return true; 1900 } 1901 if (isa<UndefValue>(Op)) { 1902 C = nullptr; 1903 return true; 1904 } 1905 return false; 1906 } 1907 1908 /// Checks if the given intrinsic call, which evaluates to constant, is allowed 1909 /// to be folded. 1910 /// 1911 /// \param CI Constrained intrinsic call. 1912 /// \param St Exception flags raised during constant evaluation. 1913 static bool mayFoldConstrained(ConstrainedFPIntrinsic *CI, 1914 APFloat::opStatus St) { 1915 std::optional<RoundingMode> ORM = CI->getRoundingMode(); 1916 std::optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior(); 1917 1918 // If the operation does not change exception status flags, it is safe 1919 // to fold. 1920 if (St == APFloat::opStatus::opOK) 1921 return true; 1922 1923 // If evaluation raised FP exception, the result can depend on rounding 1924 // mode. If the latter is unknown, folding is not possible. 1925 if (ORM && *ORM == RoundingMode::Dynamic) 1926 return false; 1927 1928 // If FP exceptions are ignored, fold the call, even if such exception is 1929 // raised. 1930 if (EB && *EB != fp::ExceptionBehavior::ebStrict) 1931 return true; 1932 1933 // Leave the calculation for runtime so that exception flags be correctly set 1934 // in hardware. 1935 return false; 1936 } 1937 1938 /// Returns the rounding mode that should be used for constant evaluation. 1939 static RoundingMode 1940 getEvaluationRoundingMode(const ConstrainedFPIntrinsic *CI) { 1941 std::optional<RoundingMode> ORM = CI->getRoundingMode(); 1942 if (!ORM || *ORM == RoundingMode::Dynamic) 1943 // Even if the rounding mode is unknown, try evaluating the operation. 1944 // If it does not raise inexact exception, rounding was not applied, 1945 // so the result is exact and does not depend on rounding mode. Whether 1946 // other FP exceptions are raised, it does not depend on rounding mode. 1947 return RoundingMode::NearestTiesToEven; 1948 return *ORM; 1949 } 1950 1951 /// Try to constant fold llvm.canonicalize for the given caller and value. 1952 static Constant *constantFoldCanonicalize(const Type *Ty, const CallBase *CI, 1953 const APFloat &Src) { 1954 // Zero, positive and negative, is always OK to fold. 1955 if (Src.isZero()) { 1956 // Get a fresh 0, since ppc_fp128 does have non-canonical zeros. 1957 return ConstantFP::get( 1958 CI->getContext(), 1959 APFloat::getZero(Src.getSemantics(), Src.isNegative())); 1960 } 1961 1962 if (!Ty->isIEEELikeFPTy()) 1963 return nullptr; 1964 1965 // Zero is always canonical and the sign must be preserved. 1966 // 1967 // Denorms and nans may have special encodings, but it should be OK to fold a 1968 // totally average number. 1969 if (Src.isNormal() || Src.isInfinity()) 1970 return ConstantFP::get(CI->getContext(), Src); 1971 1972 if (Src.isDenormal() && CI->getParent() && CI->getFunction()) { 1973 DenormalMode DenormMode = 1974 CI->getFunction()->getDenormalMode(Src.getSemantics()); 1975 1976 if (DenormMode == DenormalMode::getIEEE()) 1977 return ConstantFP::get(CI->getContext(), Src); 1978 1979 if (DenormMode.Input == DenormalMode::Dynamic) 1980 return nullptr; 1981 1982 // If we know if either input or output is flushed, we can fold. 1983 if ((DenormMode.Input == DenormalMode::Dynamic && 1984 DenormMode.Output == DenormalMode::IEEE) || 1985 (DenormMode.Input == DenormalMode::IEEE && 1986 DenormMode.Output == DenormalMode::Dynamic)) 1987 return nullptr; 1988 1989 bool IsPositive = 1990 (!Src.isNegative() || DenormMode.Input == DenormalMode::PositiveZero || 1991 (DenormMode.Output == DenormalMode::PositiveZero && 1992 DenormMode.Input == DenormalMode::IEEE)); 1993 1994 return ConstantFP::get(CI->getContext(), 1995 APFloat::getZero(Src.getSemantics(), !IsPositive)); 1996 } 1997 1998 return nullptr; 1999 } 2000 2001 static Constant *ConstantFoldScalarCall1(StringRef Name, 2002 Intrinsic::ID IntrinsicID, 2003 Type *Ty, 2004 ArrayRef<Constant *> Operands, 2005 const TargetLibraryInfo *TLI, 2006 const CallBase *Call) { 2007 assert(Operands.size() == 1 && "Wrong number of operands."); 2008 2009 if (IntrinsicID == Intrinsic::is_constant) { 2010 // We know we have a "Constant" argument. But we want to only 2011 // return true for manifest constants, not those that depend on 2012 // constants with unknowable values, e.g. GlobalValue or BlockAddress. 2013 if (Operands[0]->isManifestConstant()) 2014 return ConstantInt::getTrue(Ty->getContext()); 2015 return nullptr; 2016 } 2017 2018 if (isa<PoisonValue>(Operands[0])) { 2019 // TODO: All of these operations should probably propagate poison. 2020 if (IntrinsicID == Intrinsic::canonicalize) 2021 return PoisonValue::get(Ty); 2022 } 2023 2024 if (isa<UndefValue>(Operands[0])) { 2025 // cosine(arg) is between -1 and 1. cosine(invalid arg) is NaN. 2026 // ctpop() is between 0 and bitwidth, pick 0 for undef. 2027 // fptoui.sat and fptosi.sat can always fold to zero (for a zero input). 2028 if (IntrinsicID == Intrinsic::cos || 2029 IntrinsicID == Intrinsic::ctpop || 2030 IntrinsicID == Intrinsic::fptoui_sat || 2031 IntrinsicID == Intrinsic::fptosi_sat || 2032 IntrinsicID == Intrinsic::canonicalize) 2033 return Constant::getNullValue(Ty); 2034 if (IntrinsicID == Intrinsic::bswap || 2035 IntrinsicID == Intrinsic::bitreverse || 2036 IntrinsicID == Intrinsic::launder_invariant_group || 2037 IntrinsicID == Intrinsic::strip_invariant_group) 2038 return Operands[0]; 2039 } 2040 2041 if (isa<ConstantPointerNull>(Operands[0])) { 2042 // launder(null) == null == strip(null) iff in addrspace 0 2043 if (IntrinsicID == Intrinsic::launder_invariant_group || 2044 IntrinsicID == Intrinsic::strip_invariant_group) { 2045 // If instruction is not yet put in a basic block (e.g. when cloning 2046 // a function during inlining), Call's caller may not be available. 2047 // So check Call's BB first before querying Call->getCaller. 2048 const Function *Caller = 2049 Call->getParent() ? Call->getCaller() : nullptr; 2050 if (Caller && 2051 !NullPointerIsDefined( 2052 Caller, Operands[0]->getType()->getPointerAddressSpace())) { 2053 return Operands[0]; 2054 } 2055 return nullptr; 2056 } 2057 } 2058 2059 if (auto *Op = dyn_cast<ConstantFP>(Operands[0])) { 2060 if (IntrinsicID == Intrinsic::convert_to_fp16) { 2061 APFloat Val(Op->getValueAPF()); 2062 2063 bool lost = false; 2064 Val.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &lost); 2065 2066 return ConstantInt::get(Ty->getContext(), Val.bitcastToAPInt()); 2067 } 2068 2069 APFloat U = Op->getValueAPF(); 2070 2071 if (IntrinsicID == Intrinsic::wasm_trunc_signed || 2072 IntrinsicID == Intrinsic::wasm_trunc_unsigned) { 2073 bool Signed = IntrinsicID == Intrinsic::wasm_trunc_signed; 2074 2075 if (U.isNaN()) 2076 return nullptr; 2077 2078 unsigned Width = Ty->getIntegerBitWidth(); 2079 APSInt Int(Width, !Signed); 2080 bool IsExact = false; 2081 APFloat::opStatus Status = 2082 U.convertToInteger(Int, APFloat::rmTowardZero, &IsExact); 2083 2084 if (Status == APFloat::opOK || Status == APFloat::opInexact) 2085 return ConstantInt::get(Ty, Int); 2086 2087 return nullptr; 2088 } 2089 2090 if (IntrinsicID == Intrinsic::fptoui_sat || 2091 IntrinsicID == Intrinsic::fptosi_sat) { 2092 // convertToInteger() already has the desired saturation semantics. 2093 APSInt Int(Ty->getIntegerBitWidth(), 2094 IntrinsicID == Intrinsic::fptoui_sat); 2095 bool IsExact; 2096 U.convertToInteger(Int, APFloat::rmTowardZero, &IsExact); 2097 return ConstantInt::get(Ty, Int); 2098 } 2099 2100 if (IntrinsicID == Intrinsic::canonicalize) 2101 return constantFoldCanonicalize(Ty, Call, U); 2102 2103 if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) 2104 return nullptr; 2105 2106 // Use internal versions of these intrinsics. 2107 2108 if (IntrinsicID == Intrinsic::nearbyint || IntrinsicID == Intrinsic::rint) { 2109 U.roundToIntegral(APFloat::rmNearestTiesToEven); 2110 return ConstantFP::get(Ty->getContext(), U); 2111 } 2112 2113 if (IntrinsicID == Intrinsic::round) { 2114 U.roundToIntegral(APFloat::rmNearestTiesToAway); 2115 return ConstantFP::get(Ty->getContext(), U); 2116 } 2117 2118 if (IntrinsicID == Intrinsic::roundeven) { 2119 U.roundToIntegral(APFloat::rmNearestTiesToEven); 2120 return ConstantFP::get(Ty->getContext(), U); 2121 } 2122 2123 if (IntrinsicID == Intrinsic::ceil) { 2124 U.roundToIntegral(APFloat::rmTowardPositive); 2125 return ConstantFP::get(Ty->getContext(), U); 2126 } 2127 2128 if (IntrinsicID == Intrinsic::floor) { 2129 U.roundToIntegral(APFloat::rmTowardNegative); 2130 return ConstantFP::get(Ty->getContext(), U); 2131 } 2132 2133 if (IntrinsicID == Intrinsic::trunc) { 2134 U.roundToIntegral(APFloat::rmTowardZero); 2135 return ConstantFP::get(Ty->getContext(), U); 2136 } 2137 2138 if (IntrinsicID == Intrinsic::fabs) { 2139 U.clearSign(); 2140 return ConstantFP::get(Ty->getContext(), U); 2141 } 2142 2143 if (IntrinsicID == Intrinsic::amdgcn_fract) { 2144 // The v_fract instruction behaves like the OpenCL spec, which defines 2145 // fract(x) as fmin(x - floor(x), 0x1.fffffep-1f): "The min() operator is 2146 // there to prevent fract(-small) from returning 1.0. It returns the 2147 // largest positive floating-point number less than 1.0." 2148 APFloat FloorU(U); 2149 FloorU.roundToIntegral(APFloat::rmTowardNegative); 2150 APFloat FractU(U - FloorU); 2151 APFloat AlmostOne(U.getSemantics(), 1); 2152 AlmostOne.next(/*nextDown*/ true); 2153 return ConstantFP::get(Ty->getContext(), minimum(FractU, AlmostOne)); 2154 } 2155 2156 // Rounding operations (floor, trunc, ceil, round and nearbyint) do not 2157 // raise FP exceptions, unless the argument is signaling NaN. 2158 2159 std::optional<APFloat::roundingMode> RM; 2160 switch (IntrinsicID) { 2161 default: 2162 break; 2163 case Intrinsic::experimental_constrained_nearbyint: 2164 case Intrinsic::experimental_constrained_rint: { 2165 auto CI = cast<ConstrainedFPIntrinsic>(Call); 2166 RM = CI->getRoundingMode(); 2167 if (!RM || *RM == RoundingMode::Dynamic) 2168 return nullptr; 2169 break; 2170 } 2171 case Intrinsic::experimental_constrained_round: 2172 RM = APFloat::rmNearestTiesToAway; 2173 break; 2174 case Intrinsic::experimental_constrained_ceil: 2175 RM = APFloat::rmTowardPositive; 2176 break; 2177 case Intrinsic::experimental_constrained_floor: 2178 RM = APFloat::rmTowardNegative; 2179 break; 2180 case Intrinsic::experimental_constrained_trunc: 2181 RM = APFloat::rmTowardZero; 2182 break; 2183 } 2184 if (RM) { 2185 auto CI = cast<ConstrainedFPIntrinsic>(Call); 2186 if (U.isFinite()) { 2187 APFloat::opStatus St = U.roundToIntegral(*RM); 2188 if (IntrinsicID == Intrinsic::experimental_constrained_rint && 2189 St == APFloat::opInexact) { 2190 std::optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior(); 2191 if (EB && *EB == fp::ebStrict) 2192 return nullptr; 2193 } 2194 } else if (U.isSignaling()) { 2195 std::optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior(); 2196 if (EB && *EB != fp::ebIgnore) 2197 return nullptr; 2198 U = APFloat::getQNaN(U.getSemantics()); 2199 } 2200 return ConstantFP::get(Ty->getContext(), U); 2201 } 2202 2203 /// We only fold functions with finite arguments. Folding NaN and inf is 2204 /// likely to be aborted with an exception anyway, and some host libms 2205 /// have known errors raising exceptions. 2206 if (!U.isFinite()) 2207 return nullptr; 2208 2209 /// Currently APFloat versions of these functions do not exist, so we use 2210 /// the host native double versions. Float versions are not called 2211 /// directly but for all these it is true (float)(f((double)arg)) == 2212 /// f(arg). Long double not supported yet. 2213 const APFloat &APF = Op->getValueAPF(); 2214 2215 switch (IntrinsicID) { 2216 default: break; 2217 case Intrinsic::log: 2218 return ConstantFoldFP(log, APF, Ty); 2219 case Intrinsic::log2: 2220 // TODO: What about hosts that lack a C99 library? 2221 return ConstantFoldFP(log2, APF, Ty); 2222 case Intrinsic::log10: 2223 // TODO: What about hosts that lack a C99 library? 2224 return ConstantFoldFP(log10, APF, Ty); 2225 case Intrinsic::exp: 2226 return ConstantFoldFP(exp, APF, Ty); 2227 case Intrinsic::exp2: 2228 // Fold exp2(x) as pow(2, x), in case the host lacks a C99 library. 2229 return ConstantFoldBinaryFP(pow, APFloat(2.0), APF, Ty); 2230 case Intrinsic::sin: 2231 return ConstantFoldFP(sin, APF, Ty); 2232 case Intrinsic::cos: 2233 return ConstantFoldFP(cos, APF, Ty); 2234 case Intrinsic::sqrt: 2235 return ConstantFoldFP(sqrt, APF, Ty); 2236 case Intrinsic::amdgcn_cos: 2237 case Intrinsic::amdgcn_sin: { 2238 double V = getValueAsDouble(Op); 2239 if (V < -256.0 || V > 256.0) 2240 // The gfx8 and gfx9 architectures handle arguments outside the range 2241 // [-256, 256] differently. This should be a rare case so bail out 2242 // rather than trying to handle the difference. 2243 return nullptr; 2244 bool IsCos = IntrinsicID == Intrinsic::amdgcn_cos; 2245 double V4 = V * 4.0; 2246 if (V4 == floor(V4)) { 2247 // Force exact results for quarter-integer inputs. 2248 const double SinVals[4] = { 0.0, 1.0, 0.0, -1.0 }; 2249 V = SinVals[((int)V4 + (IsCos ? 1 : 0)) & 3]; 2250 } else { 2251 if (IsCos) 2252 V = cos(V * 2.0 * numbers::pi); 2253 else 2254 V = sin(V * 2.0 * numbers::pi); 2255 } 2256 return GetConstantFoldFPValue(V, Ty); 2257 } 2258 } 2259 2260 if (!TLI) 2261 return nullptr; 2262 2263 LibFunc Func = NotLibFunc; 2264 if (!TLI->getLibFunc(Name, Func)) 2265 return nullptr; 2266 2267 switch (Func) { 2268 default: 2269 break; 2270 case LibFunc_acos: 2271 case LibFunc_acosf: 2272 case LibFunc_acos_finite: 2273 case LibFunc_acosf_finite: 2274 if (TLI->has(Func)) 2275 return ConstantFoldFP(acos, APF, Ty); 2276 break; 2277 case LibFunc_asin: 2278 case LibFunc_asinf: 2279 case LibFunc_asin_finite: 2280 case LibFunc_asinf_finite: 2281 if (TLI->has(Func)) 2282 return ConstantFoldFP(asin, APF, Ty); 2283 break; 2284 case LibFunc_atan: 2285 case LibFunc_atanf: 2286 if (TLI->has(Func)) 2287 return ConstantFoldFP(atan, APF, Ty); 2288 break; 2289 case LibFunc_ceil: 2290 case LibFunc_ceilf: 2291 if (TLI->has(Func)) { 2292 U.roundToIntegral(APFloat::rmTowardPositive); 2293 return ConstantFP::get(Ty->getContext(), U); 2294 } 2295 break; 2296 case LibFunc_cos: 2297 case LibFunc_cosf: 2298 if (TLI->has(Func)) 2299 return ConstantFoldFP(cos, APF, Ty); 2300 break; 2301 case LibFunc_cosh: 2302 case LibFunc_coshf: 2303 case LibFunc_cosh_finite: 2304 case LibFunc_coshf_finite: 2305 if (TLI->has(Func)) 2306 return ConstantFoldFP(cosh, APF, Ty); 2307 break; 2308 case LibFunc_exp: 2309 case LibFunc_expf: 2310 case LibFunc_exp_finite: 2311 case LibFunc_expf_finite: 2312 if (TLI->has(Func)) 2313 return ConstantFoldFP(exp, APF, Ty); 2314 break; 2315 case LibFunc_exp2: 2316 case LibFunc_exp2f: 2317 case LibFunc_exp2_finite: 2318 case LibFunc_exp2f_finite: 2319 if (TLI->has(Func)) 2320 // Fold exp2(x) as pow(2, x), in case the host lacks a C99 library. 2321 return ConstantFoldBinaryFP(pow, APFloat(2.0), APF, Ty); 2322 break; 2323 case LibFunc_fabs: 2324 case LibFunc_fabsf: 2325 if (TLI->has(Func)) { 2326 U.clearSign(); 2327 return ConstantFP::get(Ty->getContext(), U); 2328 } 2329 break; 2330 case LibFunc_floor: 2331 case LibFunc_floorf: 2332 if (TLI->has(Func)) { 2333 U.roundToIntegral(APFloat::rmTowardNegative); 2334 return ConstantFP::get(Ty->getContext(), U); 2335 } 2336 break; 2337 case LibFunc_log: 2338 case LibFunc_logf: 2339 case LibFunc_log_finite: 2340 case LibFunc_logf_finite: 2341 if (!APF.isNegative() && !APF.isZero() && TLI->has(Func)) 2342 return ConstantFoldFP(log, APF, Ty); 2343 break; 2344 case LibFunc_log2: 2345 case LibFunc_log2f: 2346 case LibFunc_log2_finite: 2347 case LibFunc_log2f_finite: 2348 if (!APF.isNegative() && !APF.isZero() && TLI->has(Func)) 2349 // TODO: What about hosts that lack a C99 library? 2350 return ConstantFoldFP(log2, APF, Ty); 2351 break; 2352 case LibFunc_log10: 2353 case LibFunc_log10f: 2354 case LibFunc_log10_finite: 2355 case LibFunc_log10f_finite: 2356 if (!APF.isNegative() && !APF.isZero() && TLI->has(Func)) 2357 // TODO: What about hosts that lack a C99 library? 2358 return ConstantFoldFP(log10, APF, Ty); 2359 break; 2360 case LibFunc_nearbyint: 2361 case LibFunc_nearbyintf: 2362 case LibFunc_rint: 2363 case LibFunc_rintf: 2364 if (TLI->has(Func)) { 2365 U.roundToIntegral(APFloat::rmNearestTiesToEven); 2366 return ConstantFP::get(Ty->getContext(), U); 2367 } 2368 break; 2369 case LibFunc_round: 2370 case LibFunc_roundf: 2371 if (TLI->has(Func)) { 2372 U.roundToIntegral(APFloat::rmNearestTiesToAway); 2373 return ConstantFP::get(Ty->getContext(), U); 2374 } 2375 break; 2376 case LibFunc_sin: 2377 case LibFunc_sinf: 2378 if (TLI->has(Func)) 2379 return ConstantFoldFP(sin, APF, Ty); 2380 break; 2381 case LibFunc_sinh: 2382 case LibFunc_sinhf: 2383 case LibFunc_sinh_finite: 2384 case LibFunc_sinhf_finite: 2385 if (TLI->has(Func)) 2386 return ConstantFoldFP(sinh, APF, Ty); 2387 break; 2388 case LibFunc_sqrt: 2389 case LibFunc_sqrtf: 2390 if (!APF.isNegative() && TLI->has(Func)) 2391 return ConstantFoldFP(sqrt, APF, Ty); 2392 break; 2393 case LibFunc_tan: 2394 case LibFunc_tanf: 2395 if (TLI->has(Func)) 2396 return ConstantFoldFP(tan, APF, Ty); 2397 break; 2398 case LibFunc_tanh: 2399 case LibFunc_tanhf: 2400 if (TLI->has(Func)) 2401 return ConstantFoldFP(tanh, APF, Ty); 2402 break; 2403 case LibFunc_trunc: 2404 case LibFunc_truncf: 2405 if (TLI->has(Func)) { 2406 U.roundToIntegral(APFloat::rmTowardZero); 2407 return ConstantFP::get(Ty->getContext(), U); 2408 } 2409 break; 2410 } 2411 return nullptr; 2412 } 2413 2414 if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) { 2415 switch (IntrinsicID) { 2416 case Intrinsic::bswap: 2417 return ConstantInt::get(Ty->getContext(), Op->getValue().byteSwap()); 2418 case Intrinsic::ctpop: 2419 return ConstantInt::get(Ty, Op->getValue().popcount()); 2420 case Intrinsic::bitreverse: 2421 return ConstantInt::get(Ty->getContext(), Op->getValue().reverseBits()); 2422 case Intrinsic::convert_from_fp16: { 2423 APFloat Val(APFloat::IEEEhalf(), Op->getValue()); 2424 2425 bool lost = false; 2426 APFloat::opStatus status = Val.convert( 2427 Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &lost); 2428 2429 // Conversion is always precise. 2430 (void)status; 2431 assert(status != APFloat::opInexact && !lost && 2432 "Precision lost during fp16 constfolding"); 2433 2434 return ConstantFP::get(Ty->getContext(), Val); 2435 } 2436 default: 2437 return nullptr; 2438 } 2439 } 2440 2441 switch (IntrinsicID) { 2442 default: break; 2443 case Intrinsic::vector_reduce_add: 2444 case Intrinsic::vector_reduce_mul: 2445 case Intrinsic::vector_reduce_and: 2446 case Intrinsic::vector_reduce_or: 2447 case Intrinsic::vector_reduce_xor: 2448 case Intrinsic::vector_reduce_smin: 2449 case Intrinsic::vector_reduce_smax: 2450 case Intrinsic::vector_reduce_umin: 2451 case Intrinsic::vector_reduce_umax: 2452 if (Constant *C = constantFoldVectorReduce(IntrinsicID, Operands[0])) 2453 return C; 2454 break; 2455 } 2456 2457 // Support ConstantVector in case we have an Undef in the top. 2458 if (isa<ConstantVector>(Operands[0]) || 2459 isa<ConstantDataVector>(Operands[0])) { 2460 auto *Op = cast<Constant>(Operands[0]); 2461 switch (IntrinsicID) { 2462 default: break; 2463 case Intrinsic::x86_sse_cvtss2si: 2464 case Intrinsic::x86_sse_cvtss2si64: 2465 case Intrinsic::x86_sse2_cvtsd2si: 2466 case Intrinsic::x86_sse2_cvtsd2si64: 2467 if (ConstantFP *FPOp = 2468 dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U))) 2469 return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), 2470 /*roundTowardZero=*/false, Ty, 2471 /*IsSigned*/true); 2472 break; 2473 case Intrinsic::x86_sse_cvttss2si: 2474 case Intrinsic::x86_sse_cvttss2si64: 2475 case Intrinsic::x86_sse2_cvttsd2si: 2476 case Intrinsic::x86_sse2_cvttsd2si64: 2477 if (ConstantFP *FPOp = 2478 dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U))) 2479 return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), 2480 /*roundTowardZero=*/true, Ty, 2481 /*IsSigned*/true); 2482 break; 2483 } 2484 } 2485 2486 return nullptr; 2487 } 2488 2489 static Constant *evaluateCompare(const APFloat &Op1, const APFloat &Op2, 2490 const ConstrainedFPIntrinsic *Call) { 2491 APFloat::opStatus St = APFloat::opOK; 2492 auto *FCmp = cast<ConstrainedFPCmpIntrinsic>(Call); 2493 FCmpInst::Predicate Cond = FCmp->getPredicate(); 2494 if (FCmp->isSignaling()) { 2495 if (Op1.isNaN() || Op2.isNaN()) 2496 St = APFloat::opInvalidOp; 2497 } else { 2498 if (Op1.isSignaling() || Op2.isSignaling()) 2499 St = APFloat::opInvalidOp; 2500 } 2501 bool Result = FCmpInst::compare(Op1, Op2, Cond); 2502 if (mayFoldConstrained(const_cast<ConstrainedFPCmpIntrinsic *>(FCmp), St)) 2503 return ConstantInt::get(Call->getType()->getScalarType(), Result); 2504 return nullptr; 2505 } 2506 2507 static Constant *ConstantFoldScalarCall2(StringRef Name, 2508 Intrinsic::ID IntrinsicID, 2509 Type *Ty, 2510 ArrayRef<Constant *> Operands, 2511 const TargetLibraryInfo *TLI, 2512 const CallBase *Call) { 2513 assert(Operands.size() == 2 && "Wrong number of operands."); 2514 2515 if (Ty->isFloatingPointTy()) { 2516 // TODO: We should have undef handling for all of the FP intrinsics that 2517 // are attempted to be folded in this function. 2518 bool IsOp0Undef = isa<UndefValue>(Operands[0]); 2519 bool IsOp1Undef = isa<UndefValue>(Operands[1]); 2520 switch (IntrinsicID) { 2521 case Intrinsic::maxnum: 2522 case Intrinsic::minnum: 2523 case Intrinsic::maximum: 2524 case Intrinsic::minimum: 2525 // If one argument is undef, return the other argument. 2526 if (IsOp0Undef) 2527 return Operands[1]; 2528 if (IsOp1Undef) 2529 return Operands[0]; 2530 break; 2531 } 2532 } 2533 2534 if (const auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) { 2535 const APFloat &Op1V = Op1->getValueAPF(); 2536 2537 if (const auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) { 2538 if (Op2->getType() != Op1->getType()) 2539 return nullptr; 2540 const APFloat &Op2V = Op2->getValueAPF(); 2541 2542 if (const auto *ConstrIntr = dyn_cast<ConstrainedFPIntrinsic>(Call)) { 2543 RoundingMode RM = getEvaluationRoundingMode(ConstrIntr); 2544 APFloat Res = Op1V; 2545 APFloat::opStatus St; 2546 switch (IntrinsicID) { 2547 default: 2548 return nullptr; 2549 case Intrinsic::experimental_constrained_fadd: 2550 St = Res.add(Op2V, RM); 2551 break; 2552 case Intrinsic::experimental_constrained_fsub: 2553 St = Res.subtract(Op2V, RM); 2554 break; 2555 case Intrinsic::experimental_constrained_fmul: 2556 St = Res.multiply(Op2V, RM); 2557 break; 2558 case Intrinsic::experimental_constrained_fdiv: 2559 St = Res.divide(Op2V, RM); 2560 break; 2561 case Intrinsic::experimental_constrained_frem: 2562 St = Res.mod(Op2V); 2563 break; 2564 case Intrinsic::experimental_constrained_fcmp: 2565 case Intrinsic::experimental_constrained_fcmps: 2566 return evaluateCompare(Op1V, Op2V, ConstrIntr); 2567 } 2568 if (mayFoldConstrained(const_cast<ConstrainedFPIntrinsic *>(ConstrIntr), 2569 St)) 2570 return ConstantFP::get(Ty->getContext(), Res); 2571 return nullptr; 2572 } 2573 2574 switch (IntrinsicID) { 2575 default: 2576 break; 2577 case Intrinsic::copysign: 2578 return ConstantFP::get(Ty->getContext(), APFloat::copySign(Op1V, Op2V)); 2579 case Intrinsic::minnum: 2580 return ConstantFP::get(Ty->getContext(), minnum(Op1V, Op2V)); 2581 case Intrinsic::maxnum: 2582 return ConstantFP::get(Ty->getContext(), maxnum(Op1V, Op2V)); 2583 case Intrinsic::minimum: 2584 return ConstantFP::get(Ty->getContext(), minimum(Op1V, Op2V)); 2585 case Intrinsic::maximum: 2586 return ConstantFP::get(Ty->getContext(), maximum(Op1V, Op2V)); 2587 } 2588 2589 if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) 2590 return nullptr; 2591 2592 switch (IntrinsicID) { 2593 default: 2594 break; 2595 case Intrinsic::pow: 2596 return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); 2597 case Intrinsic::amdgcn_fmul_legacy: 2598 // The legacy behaviour is that multiplying +/- 0.0 by anything, even 2599 // NaN or infinity, gives +0.0. 2600 if (Op1V.isZero() || Op2V.isZero()) 2601 return ConstantFP::getZero(Ty); 2602 return ConstantFP::get(Ty->getContext(), Op1V * Op2V); 2603 } 2604 2605 if (!TLI) 2606 return nullptr; 2607 2608 LibFunc Func = NotLibFunc; 2609 if (!TLI->getLibFunc(Name, Func)) 2610 return nullptr; 2611 2612 switch (Func) { 2613 default: 2614 break; 2615 case LibFunc_pow: 2616 case LibFunc_powf: 2617 case LibFunc_pow_finite: 2618 case LibFunc_powf_finite: 2619 if (TLI->has(Func)) 2620 return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); 2621 break; 2622 case LibFunc_fmod: 2623 case LibFunc_fmodf: 2624 if (TLI->has(Func)) { 2625 APFloat V = Op1->getValueAPF(); 2626 if (APFloat::opStatus::opOK == V.mod(Op2->getValueAPF())) 2627 return ConstantFP::get(Ty->getContext(), V); 2628 } 2629 break; 2630 case LibFunc_remainder: 2631 case LibFunc_remainderf: 2632 if (TLI->has(Func)) { 2633 APFloat V = Op1->getValueAPF(); 2634 if (APFloat::opStatus::opOK == V.remainder(Op2->getValueAPF())) 2635 return ConstantFP::get(Ty->getContext(), V); 2636 } 2637 break; 2638 case LibFunc_atan2: 2639 case LibFunc_atan2f: 2640 // atan2(+/-0.0, +/-0.0) is known to raise an exception on some libm 2641 // (Solaris), so we do not assume a known result for that. 2642 if (Op1V.isZero() && Op2V.isZero()) 2643 return nullptr; 2644 [[fallthrough]]; 2645 case LibFunc_atan2_finite: 2646 case LibFunc_atan2f_finite: 2647 if (TLI->has(Func)) 2648 return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty); 2649 break; 2650 } 2651 } else if (auto *Op2C = dyn_cast<ConstantInt>(Operands[1])) { 2652 switch (IntrinsicID) { 2653 case Intrinsic::is_fpclass: { 2654 FPClassTest Mask = static_cast<FPClassTest>(Op2C->getZExtValue()); 2655 bool Result = 2656 ((Mask & fcSNan) && Op1V.isNaN() && Op1V.isSignaling()) || 2657 ((Mask & fcQNan) && Op1V.isNaN() && !Op1V.isSignaling()) || 2658 ((Mask & fcNegInf) && Op1V.isNegInfinity()) || 2659 ((Mask & fcNegNormal) && Op1V.isNormal() && Op1V.isNegative()) || 2660 ((Mask & fcNegSubnormal) && Op1V.isDenormal() && Op1V.isNegative()) || 2661 ((Mask & fcNegZero) && Op1V.isZero() && Op1V.isNegative()) || 2662 ((Mask & fcPosZero) && Op1V.isZero() && !Op1V.isNegative()) || 2663 ((Mask & fcPosSubnormal) && Op1V.isDenormal() && !Op1V.isNegative()) || 2664 ((Mask & fcPosNormal) && Op1V.isNormal() && !Op1V.isNegative()) || 2665 ((Mask & fcPosInf) && Op1V.isPosInfinity()); 2666 return ConstantInt::get(Ty, Result); 2667 } 2668 default: 2669 break; 2670 } 2671 2672 if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) 2673 return nullptr; 2674 if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy()) 2675 return ConstantFP::get( 2676 Ty->getContext(), 2677 APFloat((float)std::pow((float)Op1V.convertToDouble(), 2678 (int)Op2C->getZExtValue()))); 2679 if (IntrinsicID == Intrinsic::powi && Ty->isFloatTy()) 2680 return ConstantFP::get( 2681 Ty->getContext(), 2682 APFloat((float)std::pow((float)Op1V.convertToDouble(), 2683 (int)Op2C->getZExtValue()))); 2684 if (IntrinsicID == Intrinsic::powi && Ty->isDoubleTy()) 2685 return ConstantFP::get( 2686 Ty->getContext(), 2687 APFloat((double)std::pow(Op1V.convertToDouble(), 2688 (int)Op2C->getZExtValue()))); 2689 2690 if (IntrinsicID == Intrinsic::amdgcn_ldexp) { 2691 // FIXME: Should flush denorms depending on FP mode, but that's ignored 2692 // everywhere else. 2693 2694 // scalbn is equivalent to ldexp with float radix 2 2695 APFloat Result = scalbn(Op1->getValueAPF(), Op2C->getSExtValue(), 2696 APFloat::rmNearestTiesToEven); 2697 return ConstantFP::get(Ty->getContext(), Result); 2698 } 2699 } 2700 return nullptr; 2701 } 2702 2703 if (Operands[0]->getType()->isIntegerTy() && 2704 Operands[1]->getType()->isIntegerTy()) { 2705 const APInt *C0, *C1; 2706 if (!getConstIntOrUndef(Operands[0], C0) || 2707 !getConstIntOrUndef(Operands[1], C1)) 2708 return nullptr; 2709 2710 switch (IntrinsicID) { 2711 default: break; 2712 case Intrinsic::smax: 2713 case Intrinsic::smin: 2714 case Intrinsic::umax: 2715 case Intrinsic::umin: 2716 // This is the same as for binary ops - poison propagates. 2717 // TODO: Poison handling should be consolidated. 2718 if (isa<PoisonValue>(Operands[0]) || isa<PoisonValue>(Operands[1])) 2719 return PoisonValue::get(Ty); 2720 2721 if (!C0 && !C1) 2722 return UndefValue::get(Ty); 2723 if (!C0 || !C1) 2724 return MinMaxIntrinsic::getSaturationPoint(IntrinsicID, Ty); 2725 return ConstantInt::get( 2726 Ty, ICmpInst::compare(*C0, *C1, 2727 MinMaxIntrinsic::getPredicate(IntrinsicID)) 2728 ? *C0 2729 : *C1); 2730 2731 case Intrinsic::usub_with_overflow: 2732 case Intrinsic::ssub_with_overflow: 2733 // X - undef -> { 0, false } 2734 // undef - X -> { 0, false } 2735 if (!C0 || !C1) 2736 return Constant::getNullValue(Ty); 2737 [[fallthrough]]; 2738 case Intrinsic::uadd_with_overflow: 2739 case Intrinsic::sadd_with_overflow: 2740 // X + undef -> { -1, false } 2741 // undef + x -> { -1, false } 2742 if (!C0 || !C1) { 2743 return ConstantStruct::get( 2744 cast<StructType>(Ty), 2745 {Constant::getAllOnesValue(Ty->getStructElementType(0)), 2746 Constant::getNullValue(Ty->getStructElementType(1))}); 2747 } 2748 [[fallthrough]]; 2749 case Intrinsic::smul_with_overflow: 2750 case Intrinsic::umul_with_overflow: { 2751 // undef * X -> { 0, false } 2752 // X * undef -> { 0, false } 2753 if (!C0 || !C1) 2754 return Constant::getNullValue(Ty); 2755 2756 APInt Res; 2757 bool Overflow; 2758 switch (IntrinsicID) { 2759 default: llvm_unreachable("Invalid case"); 2760 case Intrinsic::sadd_with_overflow: 2761 Res = C0->sadd_ov(*C1, Overflow); 2762 break; 2763 case Intrinsic::uadd_with_overflow: 2764 Res = C0->uadd_ov(*C1, Overflow); 2765 break; 2766 case Intrinsic::ssub_with_overflow: 2767 Res = C0->ssub_ov(*C1, Overflow); 2768 break; 2769 case Intrinsic::usub_with_overflow: 2770 Res = C0->usub_ov(*C1, Overflow); 2771 break; 2772 case Intrinsic::smul_with_overflow: 2773 Res = C0->smul_ov(*C1, Overflow); 2774 break; 2775 case Intrinsic::umul_with_overflow: 2776 Res = C0->umul_ov(*C1, Overflow); 2777 break; 2778 } 2779 Constant *Ops[] = { 2780 ConstantInt::get(Ty->getContext(), Res), 2781 ConstantInt::get(Type::getInt1Ty(Ty->getContext()), Overflow) 2782 }; 2783 return ConstantStruct::get(cast<StructType>(Ty), Ops); 2784 } 2785 case Intrinsic::uadd_sat: 2786 case Intrinsic::sadd_sat: 2787 // This is the same as for binary ops - poison propagates. 2788 // TODO: Poison handling should be consolidated. 2789 if (isa<PoisonValue>(Operands[0]) || isa<PoisonValue>(Operands[1])) 2790 return PoisonValue::get(Ty); 2791 2792 if (!C0 && !C1) 2793 return UndefValue::get(Ty); 2794 if (!C0 || !C1) 2795 return Constant::getAllOnesValue(Ty); 2796 if (IntrinsicID == Intrinsic::uadd_sat) 2797 return ConstantInt::get(Ty, C0->uadd_sat(*C1)); 2798 else 2799 return ConstantInt::get(Ty, C0->sadd_sat(*C1)); 2800 case Intrinsic::usub_sat: 2801 case Intrinsic::ssub_sat: 2802 // This is the same as for binary ops - poison propagates. 2803 // TODO: Poison handling should be consolidated. 2804 if (isa<PoisonValue>(Operands[0]) || isa<PoisonValue>(Operands[1])) 2805 return PoisonValue::get(Ty); 2806 2807 if (!C0 && !C1) 2808 return UndefValue::get(Ty); 2809 if (!C0 || !C1) 2810 return Constant::getNullValue(Ty); 2811 if (IntrinsicID == Intrinsic::usub_sat) 2812 return ConstantInt::get(Ty, C0->usub_sat(*C1)); 2813 else 2814 return ConstantInt::get(Ty, C0->ssub_sat(*C1)); 2815 case Intrinsic::cttz: 2816 case Intrinsic::ctlz: 2817 assert(C1 && "Must be constant int"); 2818 2819 // cttz(0, 1) and ctlz(0, 1) are poison. 2820 if (C1->isOne() && (!C0 || C0->isZero())) 2821 return PoisonValue::get(Ty); 2822 if (!C0) 2823 return Constant::getNullValue(Ty); 2824 if (IntrinsicID == Intrinsic::cttz) 2825 return ConstantInt::get(Ty, C0->countr_zero()); 2826 else 2827 return ConstantInt::get(Ty, C0->countl_zero()); 2828 2829 case Intrinsic::abs: 2830 assert(C1 && "Must be constant int"); 2831 assert((C1->isOne() || C1->isZero()) && "Must be 0 or 1"); 2832 2833 // Undef or minimum val operand with poison min --> undef 2834 if (C1->isOne() && (!C0 || C0->isMinSignedValue())) 2835 return UndefValue::get(Ty); 2836 2837 // Undef operand with no poison min --> 0 (sign bit must be clear) 2838 if (!C0) 2839 return Constant::getNullValue(Ty); 2840 2841 return ConstantInt::get(Ty, C0->abs()); 2842 } 2843 2844 return nullptr; 2845 } 2846 2847 // Support ConstantVector in case we have an Undef in the top. 2848 if ((isa<ConstantVector>(Operands[0]) || 2849 isa<ConstantDataVector>(Operands[0])) && 2850 // Check for default rounding mode. 2851 // FIXME: Support other rounding modes? 2852 isa<ConstantInt>(Operands[1]) && 2853 cast<ConstantInt>(Operands[1])->getValue() == 4) { 2854 auto *Op = cast<Constant>(Operands[0]); 2855 switch (IntrinsicID) { 2856 default: break; 2857 case Intrinsic::x86_avx512_vcvtss2si32: 2858 case Intrinsic::x86_avx512_vcvtss2si64: 2859 case Intrinsic::x86_avx512_vcvtsd2si32: 2860 case Intrinsic::x86_avx512_vcvtsd2si64: 2861 if (ConstantFP *FPOp = 2862 dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U))) 2863 return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), 2864 /*roundTowardZero=*/false, Ty, 2865 /*IsSigned*/true); 2866 break; 2867 case Intrinsic::x86_avx512_vcvtss2usi32: 2868 case Intrinsic::x86_avx512_vcvtss2usi64: 2869 case Intrinsic::x86_avx512_vcvtsd2usi32: 2870 case Intrinsic::x86_avx512_vcvtsd2usi64: 2871 if (ConstantFP *FPOp = 2872 dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U))) 2873 return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), 2874 /*roundTowardZero=*/false, Ty, 2875 /*IsSigned*/false); 2876 break; 2877 case Intrinsic::x86_avx512_cvttss2si: 2878 case Intrinsic::x86_avx512_cvttss2si64: 2879 case Intrinsic::x86_avx512_cvttsd2si: 2880 case Intrinsic::x86_avx512_cvttsd2si64: 2881 if (ConstantFP *FPOp = 2882 dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U))) 2883 return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), 2884 /*roundTowardZero=*/true, Ty, 2885 /*IsSigned*/true); 2886 break; 2887 case Intrinsic::x86_avx512_cvttss2usi: 2888 case Intrinsic::x86_avx512_cvttss2usi64: 2889 case Intrinsic::x86_avx512_cvttsd2usi: 2890 case Intrinsic::x86_avx512_cvttsd2usi64: 2891 if (ConstantFP *FPOp = 2892 dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U))) 2893 return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), 2894 /*roundTowardZero=*/true, Ty, 2895 /*IsSigned*/false); 2896 break; 2897 } 2898 } 2899 return nullptr; 2900 } 2901 2902 static APFloat ConstantFoldAMDGCNCubeIntrinsic(Intrinsic::ID IntrinsicID, 2903 const APFloat &S0, 2904 const APFloat &S1, 2905 const APFloat &S2) { 2906 unsigned ID; 2907 const fltSemantics &Sem = S0.getSemantics(); 2908 APFloat MA(Sem), SC(Sem), TC(Sem); 2909 if (abs(S2) >= abs(S0) && abs(S2) >= abs(S1)) { 2910 if (S2.isNegative() && S2.isNonZero() && !S2.isNaN()) { 2911 // S2 < 0 2912 ID = 5; 2913 SC = -S0; 2914 } else { 2915 ID = 4; 2916 SC = S0; 2917 } 2918 MA = S2; 2919 TC = -S1; 2920 } else if (abs(S1) >= abs(S0)) { 2921 if (S1.isNegative() && S1.isNonZero() && !S1.isNaN()) { 2922 // S1 < 0 2923 ID = 3; 2924 TC = -S2; 2925 } else { 2926 ID = 2; 2927 TC = S2; 2928 } 2929 MA = S1; 2930 SC = S0; 2931 } else { 2932 if (S0.isNegative() && S0.isNonZero() && !S0.isNaN()) { 2933 // S0 < 0 2934 ID = 1; 2935 SC = S2; 2936 } else { 2937 ID = 0; 2938 SC = -S2; 2939 } 2940 MA = S0; 2941 TC = -S1; 2942 } 2943 switch (IntrinsicID) { 2944 default: 2945 llvm_unreachable("unhandled amdgcn cube intrinsic"); 2946 case Intrinsic::amdgcn_cubeid: 2947 return APFloat(Sem, ID); 2948 case Intrinsic::amdgcn_cubema: 2949 return MA + MA; 2950 case Intrinsic::amdgcn_cubesc: 2951 return SC; 2952 case Intrinsic::amdgcn_cubetc: 2953 return TC; 2954 } 2955 } 2956 2957 static Constant *ConstantFoldAMDGCNPermIntrinsic(ArrayRef<Constant *> Operands, 2958 Type *Ty) { 2959 const APInt *C0, *C1, *C2; 2960 if (!getConstIntOrUndef(Operands[0], C0) || 2961 !getConstIntOrUndef(Operands[1], C1) || 2962 !getConstIntOrUndef(Operands[2], C2)) 2963 return nullptr; 2964 2965 if (!C2) 2966 return UndefValue::get(Ty); 2967 2968 APInt Val(32, 0); 2969 unsigned NumUndefBytes = 0; 2970 for (unsigned I = 0; I < 32; I += 8) { 2971 unsigned Sel = C2->extractBitsAsZExtValue(8, I); 2972 unsigned B = 0; 2973 2974 if (Sel >= 13) 2975 B = 0xff; 2976 else if (Sel == 12) 2977 B = 0x00; 2978 else { 2979 const APInt *Src = ((Sel & 10) == 10 || (Sel & 12) == 4) ? C0 : C1; 2980 if (!Src) 2981 ++NumUndefBytes; 2982 else if (Sel < 8) 2983 B = Src->extractBitsAsZExtValue(8, (Sel & 3) * 8); 2984 else 2985 B = Src->extractBitsAsZExtValue(1, (Sel & 1) ? 31 : 15) * 0xff; 2986 } 2987 2988 Val.insertBits(B, I, 8); 2989 } 2990 2991 if (NumUndefBytes == 4) 2992 return UndefValue::get(Ty); 2993 2994 return ConstantInt::get(Ty, Val); 2995 } 2996 2997 static Constant *ConstantFoldScalarCall3(StringRef Name, 2998 Intrinsic::ID IntrinsicID, 2999 Type *Ty, 3000 ArrayRef<Constant *> Operands, 3001 const TargetLibraryInfo *TLI, 3002 const CallBase *Call) { 3003 assert(Operands.size() == 3 && "Wrong number of operands."); 3004 3005 if (const auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) { 3006 if (const auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) { 3007 if (const auto *Op3 = dyn_cast<ConstantFP>(Operands[2])) { 3008 const APFloat &C1 = Op1->getValueAPF(); 3009 const APFloat &C2 = Op2->getValueAPF(); 3010 const APFloat &C3 = Op3->getValueAPF(); 3011 3012 if (const auto *ConstrIntr = dyn_cast<ConstrainedFPIntrinsic>(Call)) { 3013 RoundingMode RM = getEvaluationRoundingMode(ConstrIntr); 3014 APFloat Res = C1; 3015 APFloat::opStatus St; 3016 switch (IntrinsicID) { 3017 default: 3018 return nullptr; 3019 case Intrinsic::experimental_constrained_fma: 3020 case Intrinsic::experimental_constrained_fmuladd: 3021 St = Res.fusedMultiplyAdd(C2, C3, RM); 3022 break; 3023 } 3024 if (mayFoldConstrained( 3025 const_cast<ConstrainedFPIntrinsic *>(ConstrIntr), St)) 3026 return ConstantFP::get(Ty->getContext(), Res); 3027 return nullptr; 3028 } 3029 3030 switch (IntrinsicID) { 3031 default: break; 3032 case Intrinsic::amdgcn_fma_legacy: { 3033 // The legacy behaviour is that multiplying +/- 0.0 by anything, even 3034 // NaN or infinity, gives +0.0. 3035 if (C1.isZero() || C2.isZero()) { 3036 // It's tempting to just return C3 here, but that would give the 3037 // wrong result if C3 was -0.0. 3038 return ConstantFP::get(Ty->getContext(), APFloat(0.0f) + C3); 3039 } 3040 [[fallthrough]]; 3041 } 3042 case Intrinsic::fma: 3043 case Intrinsic::fmuladd: { 3044 APFloat V = C1; 3045 V.fusedMultiplyAdd(C2, C3, APFloat::rmNearestTiesToEven); 3046 return ConstantFP::get(Ty->getContext(), V); 3047 } 3048 case Intrinsic::amdgcn_cubeid: 3049 case Intrinsic::amdgcn_cubema: 3050 case Intrinsic::amdgcn_cubesc: 3051 case Intrinsic::amdgcn_cubetc: { 3052 APFloat V = ConstantFoldAMDGCNCubeIntrinsic(IntrinsicID, C1, C2, C3); 3053 return ConstantFP::get(Ty->getContext(), V); 3054 } 3055 } 3056 } 3057 } 3058 } 3059 3060 if (IntrinsicID == Intrinsic::smul_fix || 3061 IntrinsicID == Intrinsic::smul_fix_sat) { 3062 // poison * C -> poison 3063 // C * poison -> poison 3064 if (isa<PoisonValue>(Operands[0]) || isa<PoisonValue>(Operands[1])) 3065 return PoisonValue::get(Ty); 3066 3067 const APInt *C0, *C1; 3068 if (!getConstIntOrUndef(Operands[0], C0) || 3069 !getConstIntOrUndef(Operands[1], C1)) 3070 return nullptr; 3071 3072 // undef * C -> 0 3073 // C * undef -> 0 3074 if (!C0 || !C1) 3075 return Constant::getNullValue(Ty); 3076 3077 // This code performs rounding towards negative infinity in case the result 3078 // cannot be represented exactly for the given scale. Targets that do care 3079 // about rounding should use a target hook for specifying how rounding 3080 // should be done, and provide their own folding to be consistent with 3081 // rounding. This is the same approach as used by 3082 // DAGTypeLegalizer::ExpandIntRes_MULFIX. 3083 unsigned Scale = cast<ConstantInt>(Operands[2])->getZExtValue(); 3084 unsigned Width = C0->getBitWidth(); 3085 assert(Scale < Width && "Illegal scale."); 3086 unsigned ExtendedWidth = Width * 2; 3087 APInt Product = 3088 (C0->sext(ExtendedWidth) * C1->sext(ExtendedWidth)).ashr(Scale); 3089 if (IntrinsicID == Intrinsic::smul_fix_sat) { 3090 APInt Max = APInt::getSignedMaxValue(Width).sext(ExtendedWidth); 3091 APInt Min = APInt::getSignedMinValue(Width).sext(ExtendedWidth); 3092 Product = APIntOps::smin(Product, Max); 3093 Product = APIntOps::smax(Product, Min); 3094 } 3095 return ConstantInt::get(Ty->getContext(), Product.sextOrTrunc(Width)); 3096 } 3097 3098 if (IntrinsicID == Intrinsic::fshl || IntrinsicID == Intrinsic::fshr) { 3099 const APInt *C0, *C1, *C2; 3100 if (!getConstIntOrUndef(Operands[0], C0) || 3101 !getConstIntOrUndef(Operands[1], C1) || 3102 !getConstIntOrUndef(Operands[2], C2)) 3103 return nullptr; 3104 3105 bool IsRight = IntrinsicID == Intrinsic::fshr; 3106 if (!C2) 3107 return Operands[IsRight ? 1 : 0]; 3108 if (!C0 && !C1) 3109 return UndefValue::get(Ty); 3110 3111 // The shift amount is interpreted as modulo the bitwidth. If the shift 3112 // amount is effectively 0, avoid UB due to oversized inverse shift below. 3113 unsigned BitWidth = C2->getBitWidth(); 3114 unsigned ShAmt = C2->urem(BitWidth); 3115 if (!ShAmt) 3116 return Operands[IsRight ? 1 : 0]; 3117 3118 // (C0 << ShlAmt) | (C1 >> LshrAmt) 3119 unsigned LshrAmt = IsRight ? ShAmt : BitWidth - ShAmt; 3120 unsigned ShlAmt = !IsRight ? ShAmt : BitWidth - ShAmt; 3121 if (!C0) 3122 return ConstantInt::get(Ty, C1->lshr(LshrAmt)); 3123 if (!C1) 3124 return ConstantInt::get(Ty, C0->shl(ShlAmt)); 3125 return ConstantInt::get(Ty, C0->shl(ShlAmt) | C1->lshr(LshrAmt)); 3126 } 3127 3128 if (IntrinsicID == Intrinsic::amdgcn_perm) 3129 return ConstantFoldAMDGCNPermIntrinsic(Operands, Ty); 3130 3131 return nullptr; 3132 } 3133 3134 static Constant *ConstantFoldScalarCall(StringRef Name, 3135 Intrinsic::ID IntrinsicID, 3136 Type *Ty, 3137 ArrayRef<Constant *> Operands, 3138 const TargetLibraryInfo *TLI, 3139 const CallBase *Call) { 3140 if (Operands.size() == 1) 3141 return ConstantFoldScalarCall1(Name, IntrinsicID, Ty, Operands, TLI, Call); 3142 3143 if (Operands.size() == 2) 3144 return ConstantFoldScalarCall2(Name, IntrinsicID, Ty, Operands, TLI, Call); 3145 3146 if (Operands.size() == 3) 3147 return ConstantFoldScalarCall3(Name, IntrinsicID, Ty, Operands, TLI, Call); 3148 3149 return nullptr; 3150 } 3151 3152 static Constant *ConstantFoldFixedVectorCall( 3153 StringRef Name, Intrinsic::ID IntrinsicID, FixedVectorType *FVTy, 3154 ArrayRef<Constant *> Operands, const DataLayout &DL, 3155 const TargetLibraryInfo *TLI, const CallBase *Call) { 3156 SmallVector<Constant *, 4> Result(FVTy->getNumElements()); 3157 SmallVector<Constant *, 4> Lane(Operands.size()); 3158 Type *Ty = FVTy->getElementType(); 3159 3160 switch (IntrinsicID) { 3161 case Intrinsic::masked_load: { 3162 auto *SrcPtr = Operands[0]; 3163 auto *Mask = Operands[2]; 3164 auto *Passthru = Operands[3]; 3165 3166 Constant *VecData = ConstantFoldLoadFromConstPtr(SrcPtr, FVTy, DL); 3167 3168 SmallVector<Constant *, 32> NewElements; 3169 for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) { 3170 auto *MaskElt = Mask->getAggregateElement(I); 3171 if (!MaskElt) 3172 break; 3173 auto *PassthruElt = Passthru->getAggregateElement(I); 3174 auto *VecElt = VecData ? VecData->getAggregateElement(I) : nullptr; 3175 if (isa<UndefValue>(MaskElt)) { 3176 if (PassthruElt) 3177 NewElements.push_back(PassthruElt); 3178 else if (VecElt) 3179 NewElements.push_back(VecElt); 3180 else 3181 return nullptr; 3182 } 3183 if (MaskElt->isNullValue()) { 3184 if (!PassthruElt) 3185 return nullptr; 3186 NewElements.push_back(PassthruElt); 3187 } else if (MaskElt->isOneValue()) { 3188 if (!VecElt) 3189 return nullptr; 3190 NewElements.push_back(VecElt); 3191 } else { 3192 return nullptr; 3193 } 3194 } 3195 if (NewElements.size() != FVTy->getNumElements()) 3196 return nullptr; 3197 return ConstantVector::get(NewElements); 3198 } 3199 case Intrinsic::arm_mve_vctp8: 3200 case Intrinsic::arm_mve_vctp16: 3201 case Intrinsic::arm_mve_vctp32: 3202 case Intrinsic::arm_mve_vctp64: { 3203 if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) { 3204 unsigned Lanes = FVTy->getNumElements(); 3205 uint64_t Limit = Op->getZExtValue(); 3206 3207 SmallVector<Constant *, 16> NCs; 3208 for (unsigned i = 0; i < Lanes; i++) { 3209 if (i < Limit) 3210 NCs.push_back(ConstantInt::getTrue(Ty)); 3211 else 3212 NCs.push_back(ConstantInt::getFalse(Ty)); 3213 } 3214 return ConstantVector::get(NCs); 3215 } 3216 return nullptr; 3217 } 3218 case Intrinsic::get_active_lane_mask: { 3219 auto *Op0 = dyn_cast<ConstantInt>(Operands[0]); 3220 auto *Op1 = dyn_cast<ConstantInt>(Operands[1]); 3221 if (Op0 && Op1) { 3222 unsigned Lanes = FVTy->getNumElements(); 3223 uint64_t Base = Op0->getZExtValue(); 3224 uint64_t Limit = Op1->getZExtValue(); 3225 3226 SmallVector<Constant *, 16> NCs; 3227 for (unsigned i = 0; i < Lanes; i++) { 3228 if (Base + i < Limit) 3229 NCs.push_back(ConstantInt::getTrue(Ty)); 3230 else 3231 NCs.push_back(ConstantInt::getFalse(Ty)); 3232 } 3233 return ConstantVector::get(NCs); 3234 } 3235 return nullptr; 3236 } 3237 default: 3238 break; 3239 } 3240 3241 for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) { 3242 // Gather a column of constants. 3243 for (unsigned J = 0, JE = Operands.size(); J != JE; ++J) { 3244 // Some intrinsics use a scalar type for certain arguments. 3245 if (isVectorIntrinsicWithScalarOpAtArg(IntrinsicID, J)) { 3246 Lane[J] = Operands[J]; 3247 continue; 3248 } 3249 3250 Constant *Agg = Operands[J]->getAggregateElement(I); 3251 if (!Agg) 3252 return nullptr; 3253 3254 Lane[J] = Agg; 3255 } 3256 3257 // Use the regular scalar folding to simplify this column. 3258 Constant *Folded = 3259 ConstantFoldScalarCall(Name, IntrinsicID, Ty, Lane, TLI, Call); 3260 if (!Folded) 3261 return nullptr; 3262 Result[I] = Folded; 3263 } 3264 3265 return ConstantVector::get(Result); 3266 } 3267 3268 static Constant *ConstantFoldScalableVectorCall( 3269 StringRef Name, Intrinsic::ID IntrinsicID, ScalableVectorType *SVTy, 3270 ArrayRef<Constant *> Operands, const DataLayout &DL, 3271 const TargetLibraryInfo *TLI, const CallBase *Call) { 3272 switch (IntrinsicID) { 3273 case Intrinsic::aarch64_sve_convert_from_svbool: { 3274 auto *Src = dyn_cast<Constant>(Operands[0]); 3275 if (!Src || !Src->isNullValue()) 3276 break; 3277 3278 return ConstantInt::getFalse(SVTy); 3279 } 3280 default: 3281 break; 3282 } 3283 return nullptr; 3284 } 3285 3286 static std::pair<Constant *, Constant *> 3287 ConstantFoldScalarFrexpCall(Constant *Op, Type *IntTy) { 3288 if (isa<PoisonValue>(Op)) 3289 return {Op, PoisonValue::get(IntTy)}; 3290 3291 auto *ConstFP = dyn_cast<ConstantFP>(Op); 3292 if (!ConstFP) 3293 return {}; 3294 3295 const APFloat &U = ConstFP->getValueAPF(); 3296 int FrexpExp; 3297 APFloat FrexpMant = frexp(U, FrexpExp, APFloat::rmNearestTiesToEven); 3298 Constant *Result0 = ConstantFP::get(ConstFP->getType(), FrexpMant); 3299 3300 // The exponent is an "unspecified value" for inf/nan. We use zero to avoid 3301 // using undef. 3302 Constant *Result1 = FrexpMant.isFinite() ? ConstantInt::get(IntTy, FrexpExp) 3303 : ConstantInt::getNullValue(IntTy); 3304 return {Result0, Result1}; 3305 } 3306 3307 /// Handle intrinsics that return tuples, which may be tuples of vectors. 3308 static Constant * 3309 ConstantFoldStructCall(StringRef Name, Intrinsic::ID IntrinsicID, 3310 StructType *StTy, ArrayRef<Constant *> Operands, 3311 const DataLayout &DL, const TargetLibraryInfo *TLI, 3312 const CallBase *Call) { 3313 3314 switch (IntrinsicID) { 3315 case Intrinsic::frexp: { 3316 Type *Ty0 = StTy->getContainedType(0); 3317 Type *Ty1 = StTy->getContainedType(1)->getScalarType(); 3318 3319 if (auto *FVTy0 = dyn_cast<FixedVectorType>(Ty0)) { 3320 SmallVector<Constant *, 4> Results0(FVTy0->getNumElements()); 3321 SmallVector<Constant *, 4> Results1(FVTy0->getNumElements()); 3322 3323 for (unsigned I = 0, E = FVTy0->getNumElements(); I != E; ++I) { 3324 Constant *Lane = Operands[0]->getAggregateElement(I); 3325 std::tie(Results0[I], Results1[I]) = 3326 ConstantFoldScalarFrexpCall(Lane, Ty1); 3327 if (!Results0[I]) 3328 return nullptr; 3329 } 3330 3331 return ConstantStruct::get(StTy, ConstantVector::get(Results0), 3332 ConstantVector::get(Results1)); 3333 } 3334 3335 auto [Result0, Result1] = ConstantFoldScalarFrexpCall(Operands[0], Ty1); 3336 if (!Result0) 3337 return nullptr; 3338 return ConstantStruct::get(StTy, Result0, Result1); 3339 } 3340 default: 3341 // TODO: Constant folding of vector intrinsics that fall through here does 3342 // not work (e.g. overflow intrinsics) 3343 return ConstantFoldScalarCall(Name, IntrinsicID, StTy, Operands, TLI, Call); 3344 } 3345 3346 return nullptr; 3347 } 3348 3349 } // end anonymous namespace 3350 3351 Constant *llvm::ConstantFoldCall(const CallBase *Call, Function *F, 3352 ArrayRef<Constant *> Operands, 3353 const TargetLibraryInfo *TLI) { 3354 if (Call->isNoBuiltin()) 3355 return nullptr; 3356 if (!F->hasName()) 3357 return nullptr; 3358 3359 // If this is not an intrinsic and not recognized as a library call, bail out. 3360 Intrinsic::ID IID = F->getIntrinsicID(); 3361 if (IID == Intrinsic::not_intrinsic) { 3362 if (!TLI) 3363 return nullptr; 3364 LibFunc LibF; 3365 if (!TLI->getLibFunc(*F, LibF)) 3366 return nullptr; 3367 } 3368 3369 StringRef Name = F->getName(); 3370 Type *Ty = F->getReturnType(); 3371 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) 3372 return ConstantFoldFixedVectorCall( 3373 Name, IID, FVTy, Operands, F->getParent()->getDataLayout(), TLI, Call); 3374 3375 if (auto *SVTy = dyn_cast<ScalableVectorType>(Ty)) 3376 return ConstantFoldScalableVectorCall( 3377 Name, IID, SVTy, Operands, F->getParent()->getDataLayout(), TLI, Call); 3378 3379 if (auto *StTy = dyn_cast<StructType>(Ty)) 3380 return ConstantFoldStructCall(Name, IID, StTy, Operands, 3381 F->getParent()->getDataLayout(), TLI, Call); 3382 3383 // TODO: If this is a library function, we already discovered that above, 3384 // so we should pass the LibFunc, not the name (and it might be better 3385 // still to separate intrinsic handling from libcalls). 3386 return ConstantFoldScalarCall(Name, IID, Ty, Operands, TLI, Call); 3387 } 3388 3389 bool llvm::isMathLibCallNoop(const CallBase *Call, 3390 const TargetLibraryInfo *TLI) { 3391 // FIXME: Refactor this code; this duplicates logic in LibCallsShrinkWrap 3392 // (and to some extent ConstantFoldScalarCall). 3393 if (Call->isNoBuiltin() || Call->isStrictFP()) 3394 return false; 3395 Function *F = Call->getCalledFunction(); 3396 if (!F) 3397 return false; 3398 3399 LibFunc Func; 3400 if (!TLI || !TLI->getLibFunc(*F, Func)) 3401 return false; 3402 3403 if (Call->arg_size() == 1) { 3404 if (ConstantFP *OpC = dyn_cast<ConstantFP>(Call->getArgOperand(0))) { 3405 const APFloat &Op = OpC->getValueAPF(); 3406 switch (Func) { 3407 case LibFunc_logl: 3408 case LibFunc_log: 3409 case LibFunc_logf: 3410 case LibFunc_log2l: 3411 case LibFunc_log2: 3412 case LibFunc_log2f: 3413 case LibFunc_log10l: 3414 case LibFunc_log10: 3415 case LibFunc_log10f: 3416 return Op.isNaN() || (!Op.isZero() && !Op.isNegative()); 3417 3418 case LibFunc_expl: 3419 case LibFunc_exp: 3420 case LibFunc_expf: 3421 // FIXME: These boundaries are slightly conservative. 3422 if (OpC->getType()->isDoubleTy()) 3423 return !(Op < APFloat(-745.0) || Op > APFloat(709.0)); 3424 if (OpC->getType()->isFloatTy()) 3425 return !(Op < APFloat(-103.0f) || Op > APFloat(88.0f)); 3426 break; 3427 3428 case LibFunc_exp2l: 3429 case LibFunc_exp2: 3430 case LibFunc_exp2f: 3431 // FIXME: These boundaries are slightly conservative. 3432 if (OpC->getType()->isDoubleTy()) 3433 return !(Op < APFloat(-1074.0) || Op > APFloat(1023.0)); 3434 if (OpC->getType()->isFloatTy()) 3435 return !(Op < APFloat(-149.0f) || Op > APFloat(127.0f)); 3436 break; 3437 3438 case LibFunc_sinl: 3439 case LibFunc_sin: 3440 case LibFunc_sinf: 3441 case LibFunc_cosl: 3442 case LibFunc_cos: 3443 case LibFunc_cosf: 3444 return !Op.isInfinity(); 3445 3446 case LibFunc_tanl: 3447 case LibFunc_tan: 3448 case LibFunc_tanf: { 3449 // FIXME: Stop using the host math library. 3450 // FIXME: The computation isn't done in the right precision. 3451 Type *Ty = OpC->getType(); 3452 if (Ty->isDoubleTy() || Ty->isFloatTy() || Ty->isHalfTy()) 3453 return ConstantFoldFP(tan, OpC->getValueAPF(), Ty) != nullptr; 3454 break; 3455 } 3456 3457 case LibFunc_atan: 3458 case LibFunc_atanf: 3459 case LibFunc_atanl: 3460 // Per POSIX, this MAY fail if Op is denormal. We choose not failing. 3461 return true; 3462 3463 3464 case LibFunc_asinl: 3465 case LibFunc_asin: 3466 case LibFunc_asinf: 3467 case LibFunc_acosl: 3468 case LibFunc_acos: 3469 case LibFunc_acosf: 3470 return !(Op < APFloat(Op.getSemantics(), "-1") || 3471 Op > APFloat(Op.getSemantics(), "1")); 3472 3473 case LibFunc_sinh: 3474 case LibFunc_cosh: 3475 case LibFunc_sinhf: 3476 case LibFunc_coshf: 3477 case LibFunc_sinhl: 3478 case LibFunc_coshl: 3479 // FIXME: These boundaries are slightly conservative. 3480 if (OpC->getType()->isDoubleTy()) 3481 return !(Op < APFloat(-710.0) || Op > APFloat(710.0)); 3482 if (OpC->getType()->isFloatTy()) 3483 return !(Op < APFloat(-89.0f) || Op > APFloat(89.0f)); 3484 break; 3485 3486 case LibFunc_sqrtl: 3487 case LibFunc_sqrt: 3488 case LibFunc_sqrtf: 3489 return Op.isNaN() || Op.isZero() || !Op.isNegative(); 3490 3491 // FIXME: Add more functions: sqrt_finite, atanh, expm1, log1p, 3492 // maybe others? 3493 default: 3494 break; 3495 } 3496 } 3497 } 3498 3499 if (Call->arg_size() == 2) { 3500 ConstantFP *Op0C = dyn_cast<ConstantFP>(Call->getArgOperand(0)); 3501 ConstantFP *Op1C = dyn_cast<ConstantFP>(Call->getArgOperand(1)); 3502 if (Op0C && Op1C) { 3503 const APFloat &Op0 = Op0C->getValueAPF(); 3504 const APFloat &Op1 = Op1C->getValueAPF(); 3505 3506 switch (Func) { 3507 case LibFunc_powl: 3508 case LibFunc_pow: 3509 case LibFunc_powf: { 3510 // FIXME: Stop using the host math library. 3511 // FIXME: The computation isn't done in the right precision. 3512 Type *Ty = Op0C->getType(); 3513 if (Ty->isDoubleTy() || Ty->isFloatTy() || Ty->isHalfTy()) { 3514 if (Ty == Op1C->getType()) 3515 return ConstantFoldBinaryFP(pow, Op0, Op1, Ty) != nullptr; 3516 } 3517 break; 3518 } 3519 3520 case LibFunc_fmodl: 3521 case LibFunc_fmod: 3522 case LibFunc_fmodf: 3523 case LibFunc_remainderl: 3524 case LibFunc_remainder: 3525 case LibFunc_remainderf: 3526 return Op0.isNaN() || Op1.isNaN() || 3527 (!Op0.isInfinity() && !Op1.isZero()); 3528 3529 case LibFunc_atan2: 3530 case LibFunc_atan2f: 3531 case LibFunc_atan2l: 3532 // Although IEEE-754 says atan2(+/-0.0, +/-0.0) are well-defined, and 3533 // GLIBC and MSVC do not appear to raise an error on those, we 3534 // cannot rely on that behavior. POSIX and C11 say that a domain error 3535 // may occur, so allow for that possibility. 3536 return !Op0.isZero() || !Op1.isZero(); 3537 3538 default: 3539 break; 3540 } 3541 } 3542 } 3543 3544 return false; 3545 } 3546 3547 void TargetFolder::anchor() {} 3548