1 //===- AMDGPULibCalls.cpp -------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file does AMD library function optimizations. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPU.h" 15 #include "AMDGPULibFunc.h" 16 #include "GCNSubtarget.h" 17 #include "llvm/Analysis/AliasAnalysis.h" 18 #include "llvm/Analysis/Loads.h" 19 #include "llvm/IR/IRBuilder.h" 20 #include "llvm/IR/IntrinsicInst.h" 21 #include "llvm/IR/IntrinsicsAMDGPU.h" 22 #include "llvm/InitializePasses.h" 23 #include "llvm/Target/TargetMachine.h" 24 25 #define DEBUG_TYPE "amdgpu-simplifylib" 26 27 using namespace llvm; 28 29 static cl::opt<bool> EnablePreLink("amdgpu-prelink", 30 cl::desc("Enable pre-link mode optimizations"), 31 cl::init(false), 32 cl::Hidden); 33 34 static cl::list<std::string> UseNative("amdgpu-use-native", 35 cl::desc("Comma separated list of functions to replace with native, or all"), 36 cl::CommaSeparated, cl::ValueOptional, 37 cl::Hidden); 38 39 #define MATH_PI numbers::pi 40 #define MATH_E numbers::e 41 #define MATH_SQRT2 numbers::sqrt2 42 #define MATH_SQRT1_2 numbers::inv_sqrt2 43 44 namespace llvm { 45 46 class AMDGPULibCalls { 47 private: 48 49 typedef llvm::AMDGPULibFunc FuncInfo; 50 51 const TargetMachine *TM; 52 53 // -fuse-native. 54 bool AllNative = false; 55 56 bool useNativeFunc(const StringRef F) const; 57 58 // Return a pointer (pointer expr) to the function if function definition with 59 // "FuncName" exists. It may create a new function prototype in pre-link mode. 60 FunctionCallee getFunction(Module *M, const FuncInfo &fInfo); 61 62 bool parseFunctionName(const StringRef &FMangledName, FuncInfo &FInfo); 63 64 bool TDOFold(CallInst *CI, const FuncInfo &FInfo); 65 66 /* Specialized optimizations */ 67 68 // recip (half or native) 69 bool fold_recip(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); 70 71 // divide (half or native) 72 bool fold_divide(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); 73 74 // pow/powr/pown 75 bool fold_pow(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); 76 77 // rootn 78 bool fold_rootn(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); 79 80 // fma/mad 81 bool fold_fma_mad(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); 82 83 // -fuse-native for sincos 84 bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo); 85 86 // evaluate calls if calls' arguments are constants. 87 bool evaluateScalarMathFunc(const FuncInfo &FInfo, double& Res0, 88 double& Res1, Constant *copr0, Constant *copr1, Constant *copr2); 89 bool evaluateCall(CallInst *aCI, const FuncInfo &FInfo); 90 91 // sqrt 92 bool fold_sqrt(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); 93 94 // sin/cos 95 bool fold_sincos(CallInst * CI, IRBuilder<> &B, AliasAnalysis * AA); 96 97 // __read_pipe/__write_pipe 98 bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, 99 const FuncInfo &FInfo); 100 101 // llvm.amdgcn.wavefrontsize 102 bool fold_wavefrontsize(CallInst *CI, IRBuilder<> &B); 103 104 // Get insertion point at entry. 105 BasicBlock::iterator getEntryIns(CallInst * UI); 106 // Insert an Alloc instruction. 107 AllocaInst* insertAlloca(CallInst * UI, IRBuilder<> &B, const char *prefix); 108 // Get a scalar native builtin single argument FP function 109 FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo); 110 111 protected: 112 CallInst *CI; 113 114 bool isUnsafeMath(const CallInst *CI) const; 115 116 void replaceCall(Value *With) { 117 CI->replaceAllUsesWith(With); 118 CI->eraseFromParent(); 119 } 120 121 public: 122 AMDGPULibCalls(const TargetMachine *TM_ = nullptr) : TM(TM_) {} 123 124 bool fold(CallInst *CI, AliasAnalysis *AA = nullptr); 125 126 void initNativeFuncs(); 127 128 // Replace a normal math function call with that native version 129 bool useNative(CallInst *CI); 130 }; 131 132 } // end llvm namespace 133 134 namespace { 135 136 class AMDGPUSimplifyLibCalls : public FunctionPass { 137 138 AMDGPULibCalls Simplifier; 139 140 public: 141 static char ID; // Pass identification 142 143 AMDGPUSimplifyLibCalls(const TargetMachine *TM = nullptr) 144 : FunctionPass(ID), Simplifier(TM) { 145 initializeAMDGPUSimplifyLibCallsPass(*PassRegistry::getPassRegistry()); 146 } 147 148 void getAnalysisUsage(AnalysisUsage &AU) const override { 149 AU.addRequired<AAResultsWrapperPass>(); 150 } 151 152 bool runOnFunction(Function &M) override; 153 }; 154 155 class AMDGPUUseNativeCalls : public FunctionPass { 156 157 AMDGPULibCalls Simplifier; 158 159 public: 160 static char ID; // Pass identification 161 162 AMDGPUUseNativeCalls() : FunctionPass(ID) { 163 initializeAMDGPUUseNativeCallsPass(*PassRegistry::getPassRegistry()); 164 Simplifier.initNativeFuncs(); 165 } 166 167 bool runOnFunction(Function &F) override; 168 }; 169 170 } // end anonymous namespace. 171 172 char AMDGPUSimplifyLibCalls::ID = 0; 173 char AMDGPUUseNativeCalls::ID = 0; 174 175 INITIALIZE_PASS_BEGIN(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib", 176 "Simplify well-known AMD library calls", false, false) 177 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 178 INITIALIZE_PASS_END(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib", 179 "Simplify well-known AMD library calls", false, false) 180 181 INITIALIZE_PASS(AMDGPUUseNativeCalls, "amdgpu-usenative", 182 "Replace builtin math calls with that native versions.", 183 false, false) 184 185 template <typename IRB> 186 static CallInst *CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg, 187 const Twine &Name = "") { 188 CallInst *R = B.CreateCall(Callee, Arg, Name); 189 if (Function *F = dyn_cast<Function>(Callee.getCallee())) 190 R->setCallingConv(F->getCallingConv()); 191 return R; 192 } 193 194 template <typename IRB> 195 static CallInst *CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1, 196 Value *Arg2, const Twine &Name = "") { 197 CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name); 198 if (Function *F = dyn_cast<Function>(Callee.getCallee())) 199 R->setCallingConv(F->getCallingConv()); 200 return R; 201 } 202 203 // Data structures for table-driven optimizations. 204 // FuncTbl works for both f32 and f64 functions with 1 input argument 205 206 struct TableEntry { 207 double result; 208 double input; 209 }; 210 211 /* a list of {result, input} */ 212 static const TableEntry tbl_acos[] = { 213 {MATH_PI / 2.0, 0.0}, 214 {MATH_PI / 2.0, -0.0}, 215 {0.0, 1.0}, 216 {MATH_PI, -1.0} 217 }; 218 static const TableEntry tbl_acosh[] = { 219 {0.0, 1.0} 220 }; 221 static const TableEntry tbl_acospi[] = { 222 {0.5, 0.0}, 223 {0.5, -0.0}, 224 {0.0, 1.0}, 225 {1.0, -1.0} 226 }; 227 static const TableEntry tbl_asin[] = { 228 {0.0, 0.0}, 229 {-0.0, -0.0}, 230 {MATH_PI / 2.0, 1.0}, 231 {-MATH_PI / 2.0, -1.0} 232 }; 233 static const TableEntry tbl_asinh[] = { 234 {0.0, 0.0}, 235 {-0.0, -0.0} 236 }; 237 static const TableEntry tbl_asinpi[] = { 238 {0.0, 0.0}, 239 {-0.0, -0.0}, 240 {0.5, 1.0}, 241 {-0.5, -1.0} 242 }; 243 static const TableEntry tbl_atan[] = { 244 {0.0, 0.0}, 245 {-0.0, -0.0}, 246 {MATH_PI / 4.0, 1.0}, 247 {-MATH_PI / 4.0, -1.0} 248 }; 249 static const TableEntry tbl_atanh[] = { 250 {0.0, 0.0}, 251 {-0.0, -0.0} 252 }; 253 static const TableEntry tbl_atanpi[] = { 254 {0.0, 0.0}, 255 {-0.0, -0.0}, 256 {0.25, 1.0}, 257 {-0.25, -1.0} 258 }; 259 static const TableEntry tbl_cbrt[] = { 260 {0.0, 0.0}, 261 {-0.0, -0.0}, 262 {1.0, 1.0}, 263 {-1.0, -1.0}, 264 }; 265 static const TableEntry tbl_cos[] = { 266 {1.0, 0.0}, 267 {1.0, -0.0} 268 }; 269 static const TableEntry tbl_cosh[] = { 270 {1.0, 0.0}, 271 {1.0, -0.0} 272 }; 273 static const TableEntry tbl_cospi[] = { 274 {1.0, 0.0}, 275 {1.0, -0.0} 276 }; 277 static const TableEntry tbl_erfc[] = { 278 {1.0, 0.0}, 279 {1.0, -0.0} 280 }; 281 static const TableEntry tbl_erf[] = { 282 {0.0, 0.0}, 283 {-0.0, -0.0} 284 }; 285 static const TableEntry tbl_exp[] = { 286 {1.0, 0.0}, 287 {1.0, -0.0}, 288 {MATH_E, 1.0} 289 }; 290 static const TableEntry tbl_exp2[] = { 291 {1.0, 0.0}, 292 {1.0, -0.0}, 293 {2.0, 1.0} 294 }; 295 static const TableEntry tbl_exp10[] = { 296 {1.0, 0.0}, 297 {1.0, -0.0}, 298 {10.0, 1.0} 299 }; 300 static const TableEntry tbl_expm1[] = { 301 {0.0, 0.0}, 302 {-0.0, -0.0} 303 }; 304 static const TableEntry tbl_log[] = { 305 {0.0, 1.0}, 306 {1.0, MATH_E} 307 }; 308 static const TableEntry tbl_log2[] = { 309 {0.0, 1.0}, 310 {1.0, 2.0} 311 }; 312 static const TableEntry tbl_log10[] = { 313 {0.0, 1.0}, 314 {1.0, 10.0} 315 }; 316 static const TableEntry tbl_rsqrt[] = { 317 {1.0, 1.0}, 318 {MATH_SQRT1_2, 2.0} 319 }; 320 static const TableEntry tbl_sin[] = { 321 {0.0, 0.0}, 322 {-0.0, -0.0} 323 }; 324 static const TableEntry tbl_sinh[] = { 325 {0.0, 0.0}, 326 {-0.0, -0.0} 327 }; 328 static const TableEntry tbl_sinpi[] = { 329 {0.0, 0.0}, 330 {-0.0, -0.0} 331 }; 332 static const TableEntry tbl_sqrt[] = { 333 {0.0, 0.0}, 334 {1.0, 1.0}, 335 {MATH_SQRT2, 2.0} 336 }; 337 static const TableEntry tbl_tan[] = { 338 {0.0, 0.0}, 339 {-0.0, -0.0} 340 }; 341 static const TableEntry tbl_tanh[] = { 342 {0.0, 0.0}, 343 {-0.0, -0.0} 344 }; 345 static const TableEntry tbl_tanpi[] = { 346 {0.0, 0.0}, 347 {-0.0, -0.0} 348 }; 349 static const TableEntry tbl_tgamma[] = { 350 {1.0, 1.0}, 351 {1.0, 2.0}, 352 {2.0, 3.0}, 353 {6.0, 4.0} 354 }; 355 356 static bool HasNative(AMDGPULibFunc::EFuncId id) { 357 switch(id) { 358 case AMDGPULibFunc::EI_DIVIDE: 359 case AMDGPULibFunc::EI_COS: 360 case AMDGPULibFunc::EI_EXP: 361 case AMDGPULibFunc::EI_EXP2: 362 case AMDGPULibFunc::EI_EXP10: 363 case AMDGPULibFunc::EI_LOG: 364 case AMDGPULibFunc::EI_LOG2: 365 case AMDGPULibFunc::EI_LOG10: 366 case AMDGPULibFunc::EI_POWR: 367 case AMDGPULibFunc::EI_RECIP: 368 case AMDGPULibFunc::EI_RSQRT: 369 case AMDGPULibFunc::EI_SIN: 370 case AMDGPULibFunc::EI_SINCOS: 371 case AMDGPULibFunc::EI_SQRT: 372 case AMDGPULibFunc::EI_TAN: 373 return true; 374 default:; 375 } 376 return false; 377 } 378 379 struct TableRef { 380 size_t size; 381 const TableEntry *table; // variable size: from 0 to (size - 1) 382 383 TableRef() : size(0), table(nullptr) {} 384 385 template <size_t N> 386 TableRef(const TableEntry (&tbl)[N]) : size(N), table(&tbl[0]) {} 387 }; 388 389 static TableRef getOptTable(AMDGPULibFunc::EFuncId id) { 390 switch(id) { 391 case AMDGPULibFunc::EI_ACOS: return TableRef(tbl_acos); 392 case AMDGPULibFunc::EI_ACOSH: return TableRef(tbl_acosh); 393 case AMDGPULibFunc::EI_ACOSPI: return TableRef(tbl_acospi); 394 case AMDGPULibFunc::EI_ASIN: return TableRef(tbl_asin); 395 case AMDGPULibFunc::EI_ASINH: return TableRef(tbl_asinh); 396 case AMDGPULibFunc::EI_ASINPI: return TableRef(tbl_asinpi); 397 case AMDGPULibFunc::EI_ATAN: return TableRef(tbl_atan); 398 case AMDGPULibFunc::EI_ATANH: return TableRef(tbl_atanh); 399 case AMDGPULibFunc::EI_ATANPI: return TableRef(tbl_atanpi); 400 case AMDGPULibFunc::EI_CBRT: return TableRef(tbl_cbrt); 401 case AMDGPULibFunc::EI_NCOS: 402 case AMDGPULibFunc::EI_COS: return TableRef(tbl_cos); 403 case AMDGPULibFunc::EI_COSH: return TableRef(tbl_cosh); 404 case AMDGPULibFunc::EI_COSPI: return TableRef(tbl_cospi); 405 case AMDGPULibFunc::EI_ERFC: return TableRef(tbl_erfc); 406 case AMDGPULibFunc::EI_ERF: return TableRef(tbl_erf); 407 case AMDGPULibFunc::EI_EXP: return TableRef(tbl_exp); 408 case AMDGPULibFunc::EI_NEXP2: 409 case AMDGPULibFunc::EI_EXP2: return TableRef(tbl_exp2); 410 case AMDGPULibFunc::EI_EXP10: return TableRef(tbl_exp10); 411 case AMDGPULibFunc::EI_EXPM1: return TableRef(tbl_expm1); 412 case AMDGPULibFunc::EI_LOG: return TableRef(tbl_log); 413 case AMDGPULibFunc::EI_NLOG2: 414 case AMDGPULibFunc::EI_LOG2: return TableRef(tbl_log2); 415 case AMDGPULibFunc::EI_LOG10: return TableRef(tbl_log10); 416 case AMDGPULibFunc::EI_NRSQRT: 417 case AMDGPULibFunc::EI_RSQRT: return TableRef(tbl_rsqrt); 418 case AMDGPULibFunc::EI_NSIN: 419 case AMDGPULibFunc::EI_SIN: return TableRef(tbl_sin); 420 case AMDGPULibFunc::EI_SINH: return TableRef(tbl_sinh); 421 case AMDGPULibFunc::EI_SINPI: return TableRef(tbl_sinpi); 422 case AMDGPULibFunc::EI_NSQRT: 423 case AMDGPULibFunc::EI_SQRT: return TableRef(tbl_sqrt); 424 case AMDGPULibFunc::EI_TAN: return TableRef(tbl_tan); 425 case AMDGPULibFunc::EI_TANH: return TableRef(tbl_tanh); 426 case AMDGPULibFunc::EI_TANPI: return TableRef(tbl_tanpi); 427 case AMDGPULibFunc::EI_TGAMMA: return TableRef(tbl_tgamma); 428 default:; 429 } 430 return TableRef(); 431 } 432 433 static inline int getVecSize(const AMDGPULibFunc& FInfo) { 434 return FInfo.getLeads()[0].VectorSize; 435 } 436 437 static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) { 438 return (AMDGPULibFunc::EType)FInfo.getLeads()[0].ArgType; 439 } 440 441 FunctionCallee AMDGPULibCalls::getFunction(Module *M, const FuncInfo &fInfo) { 442 // If we are doing PreLinkOpt, the function is external. So it is safe to 443 // use getOrInsertFunction() at this stage. 444 445 return EnablePreLink ? AMDGPULibFunc::getOrInsertFunction(M, fInfo) 446 : AMDGPULibFunc::getFunction(M, fInfo); 447 } 448 449 bool AMDGPULibCalls::parseFunctionName(const StringRef &FMangledName, 450 FuncInfo &FInfo) { 451 return AMDGPULibFunc::parse(FMangledName, FInfo); 452 } 453 454 bool AMDGPULibCalls::isUnsafeMath(const CallInst *CI) const { 455 if (auto Op = dyn_cast<FPMathOperator>(CI)) 456 if (Op->isFast()) 457 return true; 458 const Function *F = CI->getParent()->getParent(); 459 Attribute Attr = F->getFnAttribute("unsafe-fp-math"); 460 return Attr.getValueAsBool(); 461 } 462 463 bool AMDGPULibCalls::useNativeFunc(const StringRef F) const { 464 return AllNative || llvm::is_contained(UseNative, F); 465 } 466 467 void AMDGPULibCalls::initNativeFuncs() { 468 AllNative = useNativeFunc("all") || 469 (UseNative.getNumOccurrences() && UseNative.size() == 1 && 470 UseNative.begin()->empty()); 471 } 472 473 bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) { 474 bool native_sin = useNativeFunc("sin"); 475 bool native_cos = useNativeFunc("cos"); 476 477 if (native_sin && native_cos) { 478 Module *M = aCI->getModule(); 479 Value *opr0 = aCI->getArgOperand(0); 480 481 AMDGPULibFunc nf; 482 nf.getLeads()[0].ArgType = FInfo.getLeads()[0].ArgType; 483 nf.getLeads()[0].VectorSize = FInfo.getLeads()[0].VectorSize; 484 485 nf.setPrefix(AMDGPULibFunc::NATIVE); 486 nf.setId(AMDGPULibFunc::EI_SIN); 487 FunctionCallee sinExpr = getFunction(M, nf); 488 489 nf.setPrefix(AMDGPULibFunc::NATIVE); 490 nf.setId(AMDGPULibFunc::EI_COS); 491 FunctionCallee cosExpr = getFunction(M, nf); 492 if (sinExpr && cosExpr) { 493 Value *sinval = CallInst::Create(sinExpr, opr0, "splitsin", aCI); 494 Value *cosval = CallInst::Create(cosExpr, opr0, "splitcos", aCI); 495 new StoreInst(cosval, aCI->getArgOperand(1), aCI); 496 497 DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI 498 << " with native version of sin/cos"); 499 500 replaceCall(sinval); 501 return true; 502 } 503 } 504 return false; 505 } 506 507 bool AMDGPULibCalls::useNative(CallInst *aCI) { 508 CI = aCI; 509 Function *Callee = aCI->getCalledFunction(); 510 511 FuncInfo FInfo; 512 if (!parseFunctionName(Callee->getName(), FInfo) || !FInfo.isMangled() || 513 FInfo.getPrefix() != AMDGPULibFunc::NOPFX || 514 getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()) || 515 !(AllNative || useNativeFunc(FInfo.getName()))) { 516 return false; 517 } 518 519 if (FInfo.getId() == AMDGPULibFunc::EI_SINCOS) 520 return sincosUseNative(aCI, FInfo); 521 522 FInfo.setPrefix(AMDGPULibFunc::NATIVE); 523 FunctionCallee F = getFunction(aCI->getModule(), FInfo); 524 if (!F) 525 return false; 526 527 aCI->setCalledFunction(F); 528 DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI 529 << " with native version"); 530 return true; 531 } 532 533 // Clang emits call of __read_pipe_2 or __read_pipe_4 for OpenCL read_pipe 534 // builtin, with appended type size and alignment arguments, where 2 or 4 535 // indicates the original number of arguments. The library has optimized version 536 // of __read_pipe_2/__read_pipe_4 when the type size and alignment has the same 537 // power of 2 value. This function transforms __read_pipe_2 to __read_pipe_2_N 538 // for such cases where N is the size in bytes of the type (N = 1, 2, 4, 8, ..., 539 // 128). The same for __read_pipe_4, write_pipe_2, and write_pipe_4. 540 bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, 541 const FuncInfo &FInfo) { 542 auto *Callee = CI->getCalledFunction(); 543 if (!Callee->isDeclaration()) 544 return false; 545 546 assert(Callee->hasName() && "Invalid read_pipe/write_pipe function"); 547 auto *M = Callee->getParent(); 548 auto &Ctx = M->getContext(); 549 std::string Name = std::string(Callee->getName()); 550 auto NumArg = CI->arg_size(); 551 if (NumArg != 4 && NumArg != 6) 552 return false; 553 auto *PacketSize = CI->getArgOperand(NumArg - 2); 554 auto *PacketAlign = CI->getArgOperand(NumArg - 1); 555 if (!isa<ConstantInt>(PacketSize) || !isa<ConstantInt>(PacketAlign)) 556 return false; 557 unsigned Size = cast<ConstantInt>(PacketSize)->getZExtValue(); 558 Align Alignment = cast<ConstantInt>(PacketAlign)->getAlignValue(); 559 if (Alignment != Size) 560 return false; 561 562 Type *PtrElemTy; 563 if (Size <= 8) 564 PtrElemTy = Type::getIntNTy(Ctx, Size * 8); 565 else 566 PtrElemTy = FixedVectorType::get(Type::getInt64Ty(Ctx), Size / 8); 567 unsigned PtrArgLoc = CI->arg_size() - 3; 568 auto PtrArg = CI->getArgOperand(PtrArgLoc); 569 unsigned PtrArgAS = PtrArg->getType()->getPointerAddressSpace(); 570 auto *PtrTy = llvm::PointerType::get(PtrElemTy, PtrArgAS); 571 572 SmallVector<llvm::Type *, 6> ArgTys; 573 for (unsigned I = 0; I != PtrArgLoc; ++I) 574 ArgTys.push_back(CI->getArgOperand(I)->getType()); 575 ArgTys.push_back(PtrTy); 576 577 Name = Name + "_" + std::to_string(Size); 578 auto *FTy = FunctionType::get(Callee->getReturnType(), 579 ArrayRef<Type *>(ArgTys), false); 580 AMDGPULibFunc NewLibFunc(Name, FTy); 581 FunctionCallee F = AMDGPULibFunc::getOrInsertFunction(M, NewLibFunc); 582 if (!F) 583 return false; 584 585 auto *BCast = B.CreatePointerCast(PtrArg, PtrTy); 586 SmallVector<Value *, 6> Args; 587 for (unsigned I = 0; I != PtrArgLoc; ++I) 588 Args.push_back(CI->getArgOperand(I)); 589 Args.push_back(BCast); 590 591 auto *NCI = B.CreateCall(F, Args); 592 NCI->setAttributes(CI->getAttributes()); 593 CI->replaceAllUsesWith(NCI); 594 CI->dropAllReferences(); 595 CI->eraseFromParent(); 596 597 return true; 598 } 599 600 // This function returns false if no change; return true otherwise. 601 bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) { 602 this->CI = CI; 603 Function *Callee = CI->getCalledFunction(); 604 605 // Ignore indirect calls. 606 if (Callee == nullptr) 607 return false; 608 609 BasicBlock *BB = CI->getParent(); 610 LLVMContext &Context = CI->getParent()->getContext(); 611 IRBuilder<> B(Context); 612 613 // Set the builder to the instruction after the call. 614 B.SetInsertPoint(BB, CI->getIterator()); 615 616 // Copy fast flags from the original call. 617 if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(CI)) 618 B.setFastMathFlags(FPOp->getFastMathFlags()); 619 620 switch (Callee->getIntrinsicID()) { 621 default: 622 break; 623 case Intrinsic::amdgcn_wavefrontsize: 624 return !EnablePreLink && fold_wavefrontsize(CI, B); 625 } 626 627 FuncInfo FInfo; 628 if (!parseFunctionName(Callee->getName(), FInfo)) 629 return false; 630 631 // Further check the number of arguments to see if they match. 632 if (CI->arg_size() != FInfo.getNumArgs()) 633 return false; 634 635 if (TDOFold(CI, FInfo)) 636 return true; 637 638 // Under unsafe-math, evaluate calls if possible. 639 // According to Brian Sumner, we can do this for all f32 function calls 640 // using host's double function calls. 641 if (isUnsafeMath(CI) && evaluateCall(CI, FInfo)) 642 return true; 643 644 // Specialized optimizations for each function call 645 switch (FInfo.getId()) { 646 case AMDGPULibFunc::EI_RECIP: 647 // skip vector function 648 assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE || 649 FInfo.getPrefix() == AMDGPULibFunc::HALF) && 650 "recip must be an either native or half function"); 651 return (getVecSize(FInfo) != 1) ? false : fold_recip(CI, B, FInfo); 652 653 case AMDGPULibFunc::EI_DIVIDE: 654 // skip vector function 655 assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE || 656 FInfo.getPrefix() == AMDGPULibFunc::HALF) && 657 "divide must be an either native or half function"); 658 return (getVecSize(FInfo) != 1) ? false : fold_divide(CI, B, FInfo); 659 660 case AMDGPULibFunc::EI_POW: 661 case AMDGPULibFunc::EI_POWR: 662 case AMDGPULibFunc::EI_POWN: 663 return fold_pow(CI, B, FInfo); 664 665 case AMDGPULibFunc::EI_ROOTN: 666 // skip vector function 667 return (getVecSize(FInfo) != 1) ? false : fold_rootn(CI, B, FInfo); 668 669 case AMDGPULibFunc::EI_FMA: 670 case AMDGPULibFunc::EI_MAD: 671 case AMDGPULibFunc::EI_NFMA: 672 // skip vector function 673 return (getVecSize(FInfo) != 1) ? false : fold_fma_mad(CI, B, FInfo); 674 675 case AMDGPULibFunc::EI_SQRT: 676 return isUnsafeMath(CI) && fold_sqrt(CI, B, FInfo); 677 case AMDGPULibFunc::EI_COS: 678 case AMDGPULibFunc::EI_SIN: 679 if ((getArgType(FInfo) == AMDGPULibFunc::F32 || 680 getArgType(FInfo) == AMDGPULibFunc::F64) 681 && (FInfo.getPrefix() == AMDGPULibFunc::NOPFX)) 682 return fold_sincos(CI, B, AA); 683 684 break; 685 case AMDGPULibFunc::EI_READ_PIPE_2: 686 case AMDGPULibFunc::EI_READ_PIPE_4: 687 case AMDGPULibFunc::EI_WRITE_PIPE_2: 688 case AMDGPULibFunc::EI_WRITE_PIPE_4: 689 return fold_read_write_pipe(CI, B, FInfo); 690 691 default: 692 break; 693 } 694 695 return false; 696 } 697 698 bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) { 699 // Table-Driven optimization 700 const TableRef tr = getOptTable(FInfo.getId()); 701 if (tr.size==0) 702 return false; 703 704 int const sz = (int)tr.size; 705 const TableEntry * const ftbl = tr.table; 706 Value *opr0 = CI->getArgOperand(0); 707 708 if (getVecSize(FInfo) > 1) { 709 if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(opr0)) { 710 SmallVector<double, 0> DVal; 711 for (int eltNo = 0; eltNo < getVecSize(FInfo); ++eltNo) { 712 ConstantFP *eltval = dyn_cast<ConstantFP>( 713 CV->getElementAsConstant((unsigned)eltNo)); 714 assert(eltval && "Non-FP arguments in math function!"); 715 bool found = false; 716 for (int i=0; i < sz; ++i) { 717 if (eltval->isExactlyValue(ftbl[i].input)) { 718 DVal.push_back(ftbl[i].result); 719 found = true; 720 break; 721 } 722 } 723 if (!found) { 724 // This vector constants not handled yet. 725 return false; 726 } 727 } 728 LLVMContext &context = CI->getParent()->getParent()->getContext(); 729 Constant *nval; 730 if (getArgType(FInfo) == AMDGPULibFunc::F32) { 731 SmallVector<float, 0> FVal; 732 for (unsigned i = 0; i < DVal.size(); ++i) { 733 FVal.push_back((float)DVal[i]); 734 } 735 ArrayRef<float> tmp(FVal); 736 nval = ConstantDataVector::get(context, tmp); 737 } else { // F64 738 ArrayRef<double> tmp(DVal); 739 nval = ConstantDataVector::get(context, tmp); 740 } 741 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n"); 742 replaceCall(nval); 743 return true; 744 } 745 } else { 746 // Scalar version 747 if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) { 748 for (int i = 0; i < sz; ++i) { 749 if (CF->isExactlyValue(ftbl[i].input)) { 750 Value *nval = ConstantFP::get(CF->getType(), ftbl[i].result); 751 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n"); 752 replaceCall(nval); 753 return true; 754 } 755 } 756 } 757 } 758 759 return false; 760 } 761 762 // [native_]half_recip(c) ==> 1.0/c 763 bool AMDGPULibCalls::fold_recip(CallInst *CI, IRBuilder<> &B, 764 const FuncInfo &FInfo) { 765 Value *opr0 = CI->getArgOperand(0); 766 if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) { 767 // Just create a normal div. Later, InstCombine will be able 768 // to compute the divide into a constant (avoid check float infinity 769 // or subnormal at this point). 770 Value *nval = B.CreateFDiv(ConstantFP::get(CF->getType(), 1.0), 771 opr0, 772 "recip2div"); 773 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n"); 774 replaceCall(nval); 775 return true; 776 } 777 return false; 778 } 779 780 // [native_]half_divide(x, c) ==> x/c 781 bool AMDGPULibCalls::fold_divide(CallInst *CI, IRBuilder<> &B, 782 const FuncInfo &FInfo) { 783 Value *opr0 = CI->getArgOperand(0); 784 Value *opr1 = CI->getArgOperand(1); 785 ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0); 786 ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1); 787 788 if ((CF0 && CF1) || // both are constants 789 (CF1 && (getArgType(FInfo) == AMDGPULibFunc::F32))) 790 // CF1 is constant && f32 divide 791 { 792 Value *nval1 = B.CreateFDiv(ConstantFP::get(opr1->getType(), 1.0), 793 opr1, "__div2recip"); 794 Value *nval = B.CreateFMul(opr0, nval1, "__div2mul"); 795 replaceCall(nval); 796 return true; 797 } 798 return false; 799 } 800 801 namespace llvm { 802 static double log2(double V) { 803 #if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L 804 return ::log2(V); 805 #else 806 return log(V) / numbers::ln2; 807 #endif 808 } 809 } 810 811 bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B, 812 const FuncInfo &FInfo) { 813 assert((FInfo.getId() == AMDGPULibFunc::EI_POW || 814 FInfo.getId() == AMDGPULibFunc::EI_POWR || 815 FInfo.getId() == AMDGPULibFunc::EI_POWN) && 816 "fold_pow: encounter a wrong function call"); 817 818 Value *opr0, *opr1; 819 ConstantFP *CF; 820 ConstantInt *CINT; 821 ConstantAggregateZero *CZero; 822 Type *eltType; 823 824 opr0 = CI->getArgOperand(0); 825 opr1 = CI->getArgOperand(1); 826 CZero = dyn_cast<ConstantAggregateZero>(opr1); 827 if (getVecSize(FInfo) == 1) { 828 eltType = opr0->getType(); 829 CF = dyn_cast<ConstantFP>(opr1); 830 CINT = dyn_cast<ConstantInt>(opr1); 831 } else { 832 VectorType *VTy = dyn_cast<VectorType>(opr0->getType()); 833 assert(VTy && "Oprand of vector function should be of vectortype"); 834 eltType = VTy->getElementType(); 835 ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1); 836 837 // Now, only Handle vector const whose elements have the same value. 838 CF = CDV ? dyn_cast_or_null<ConstantFP>(CDV->getSplatValue()) : nullptr; 839 CINT = CDV ? dyn_cast_or_null<ConstantInt>(CDV->getSplatValue()) : nullptr; 840 } 841 842 // No unsafe math , no constant argument, do nothing 843 if (!isUnsafeMath(CI) && !CF && !CINT && !CZero) 844 return false; 845 846 // 0x1111111 means that we don't do anything for this call. 847 int ci_opr1 = (CINT ? (int)CINT->getSExtValue() : 0x1111111); 848 849 if ((CF && CF->isZero()) || (CINT && ci_opr1 == 0) || CZero) { 850 // pow/powr/pown(x, 0) == 1 851 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1\n"); 852 Constant *cnval = ConstantFP::get(eltType, 1.0); 853 if (getVecSize(FInfo) > 1) { 854 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval); 855 } 856 replaceCall(cnval); 857 return true; 858 } 859 if ((CF && CF->isExactlyValue(1.0)) || (CINT && ci_opr1 == 1)) { 860 // pow/powr/pown(x, 1.0) = x 861 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n"); 862 replaceCall(opr0); 863 return true; 864 } 865 if ((CF && CF->isExactlyValue(2.0)) || (CINT && ci_opr1 == 2)) { 866 // pow/powr/pown(x, 2.0) = x*x 867 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * " << *opr0 868 << "\n"); 869 Value *nval = B.CreateFMul(opr0, opr0, "__pow2"); 870 replaceCall(nval); 871 return true; 872 } 873 if ((CF && CF->isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) { 874 // pow/powr/pown(x, -1.0) = 1.0/x 875 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1 / " << *opr0 << "\n"); 876 Constant *cnval = ConstantFP::get(eltType, 1.0); 877 if (getVecSize(FInfo) > 1) { 878 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval); 879 } 880 Value *nval = B.CreateFDiv(cnval, opr0, "__powrecip"); 881 replaceCall(nval); 882 return true; 883 } 884 885 Module *M = CI->getModule(); 886 if (CF && (CF->isExactlyValue(0.5) || CF->isExactlyValue(-0.5))) { 887 // pow[r](x, [-]0.5) = sqrt(x) 888 bool issqrt = CF->isExactlyValue(0.5); 889 if (FunctionCallee FPExpr = 890 getFunction(M, AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT 891 : AMDGPULibFunc::EI_RSQRT, 892 FInfo))) { 893 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " 894 << FInfo.getName().c_str() << "(" << *opr0 << ")\n"); 895 Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt" 896 : "__pow2rsqrt"); 897 replaceCall(nval); 898 return true; 899 } 900 } 901 902 if (!isUnsafeMath(CI)) 903 return false; 904 905 // Unsafe Math optimization 906 907 // Remember that ci_opr1 is set if opr1 is integral 908 if (CF) { 909 double dval = (getArgType(FInfo) == AMDGPULibFunc::F32) 910 ? (double)CF->getValueAPF().convertToFloat() 911 : CF->getValueAPF().convertToDouble(); 912 int ival = (int)dval; 913 if ((double)ival == dval) { 914 ci_opr1 = ival; 915 } else 916 ci_opr1 = 0x11111111; 917 } 918 919 // pow/powr/pown(x, c) = [1/](x*x*..x); where 920 // trunc(c) == c && the number of x == c && |c| <= 12 921 unsigned abs_opr1 = (ci_opr1 < 0) ? -ci_opr1 : ci_opr1; 922 if (abs_opr1 <= 12) { 923 Constant *cnval; 924 Value *nval; 925 if (abs_opr1 == 0) { 926 cnval = ConstantFP::get(eltType, 1.0); 927 if (getVecSize(FInfo) > 1) { 928 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval); 929 } 930 nval = cnval; 931 } else { 932 Value *valx2 = nullptr; 933 nval = nullptr; 934 while (abs_opr1 > 0) { 935 valx2 = valx2 ? B.CreateFMul(valx2, valx2, "__powx2") : opr0; 936 if (abs_opr1 & 1) { 937 nval = nval ? B.CreateFMul(nval, valx2, "__powprod") : valx2; 938 } 939 abs_opr1 >>= 1; 940 } 941 } 942 943 if (ci_opr1 < 0) { 944 cnval = ConstantFP::get(eltType, 1.0); 945 if (getVecSize(FInfo) > 1) { 946 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval); 947 } 948 nval = B.CreateFDiv(cnval, nval, "__1powprod"); 949 } 950 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " 951 << ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0 952 << ")\n"); 953 replaceCall(nval); 954 return true; 955 } 956 957 // powr ---> exp2(y * log2(x)) 958 // pown/pow ---> powr(fabs(x), y) | (x & ((int)y << 31)) 959 FunctionCallee ExpExpr = 960 getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2, FInfo)); 961 if (!ExpExpr) 962 return false; 963 964 bool needlog = false; 965 bool needabs = false; 966 bool needcopysign = false; 967 Constant *cnval = nullptr; 968 if (getVecSize(FInfo) == 1) { 969 CF = dyn_cast<ConstantFP>(opr0); 970 971 if (CF) { 972 double V = (getArgType(FInfo) == AMDGPULibFunc::F32) 973 ? (double)CF->getValueAPF().convertToFloat() 974 : CF->getValueAPF().convertToDouble(); 975 976 V = log2(std::abs(V)); 977 cnval = ConstantFP::get(eltType, V); 978 needcopysign = (FInfo.getId() != AMDGPULibFunc::EI_POWR) && 979 CF->isNegative(); 980 } else { 981 needlog = true; 982 needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR && 983 (!CF || CF->isNegative()); 984 } 985 } else { 986 ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr0); 987 988 if (!CDV) { 989 needlog = true; 990 needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR; 991 } else { 992 assert ((int)CDV->getNumElements() == getVecSize(FInfo) && 993 "Wrong vector size detected"); 994 995 SmallVector<double, 0> DVal; 996 for (int i=0; i < getVecSize(FInfo); ++i) { 997 double V = (getArgType(FInfo) == AMDGPULibFunc::F32) 998 ? (double)CDV->getElementAsFloat(i) 999 : CDV->getElementAsDouble(i); 1000 if (V < 0.0) needcopysign = true; 1001 V = log2(std::abs(V)); 1002 DVal.push_back(V); 1003 } 1004 if (getArgType(FInfo) == AMDGPULibFunc::F32) { 1005 SmallVector<float, 0> FVal; 1006 for (unsigned i=0; i < DVal.size(); ++i) { 1007 FVal.push_back((float)DVal[i]); 1008 } 1009 ArrayRef<float> tmp(FVal); 1010 cnval = ConstantDataVector::get(M->getContext(), tmp); 1011 } else { 1012 ArrayRef<double> tmp(DVal); 1013 cnval = ConstantDataVector::get(M->getContext(), tmp); 1014 } 1015 } 1016 } 1017 1018 if (needcopysign && (FInfo.getId() == AMDGPULibFunc::EI_POW)) { 1019 // We cannot handle corner cases for a general pow() function, give up 1020 // unless y is a constant integral value. Then proceed as if it were pown. 1021 if (getVecSize(FInfo) == 1) { 1022 if (const ConstantFP *CF = dyn_cast<ConstantFP>(opr1)) { 1023 double y = (getArgType(FInfo) == AMDGPULibFunc::F32) 1024 ? (double)CF->getValueAPF().convertToFloat() 1025 : CF->getValueAPF().convertToDouble(); 1026 if (y != (double)(int64_t)y) 1027 return false; 1028 } else 1029 return false; 1030 } else { 1031 if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1)) { 1032 for (int i=0; i < getVecSize(FInfo); ++i) { 1033 double y = (getArgType(FInfo) == AMDGPULibFunc::F32) 1034 ? (double)CDV->getElementAsFloat(i) 1035 : CDV->getElementAsDouble(i); 1036 if (y != (double)(int64_t)y) 1037 return false; 1038 } 1039 } else 1040 return false; 1041 } 1042 } 1043 1044 Value *nval; 1045 if (needabs) { 1046 FunctionCallee AbsExpr = 1047 getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_FABS, FInfo)); 1048 if (!AbsExpr) 1049 return false; 1050 nval = CreateCallEx(B, AbsExpr, opr0, "__fabs"); 1051 } else { 1052 nval = cnval ? cnval : opr0; 1053 } 1054 if (needlog) { 1055 FunctionCallee LogExpr = 1056 getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2, FInfo)); 1057 if (!LogExpr) 1058 return false; 1059 nval = CreateCallEx(B,LogExpr, nval, "__log2"); 1060 } 1061 1062 if (FInfo.getId() == AMDGPULibFunc::EI_POWN) { 1063 // convert int(32) to fp(f32 or f64) 1064 opr1 = B.CreateSIToFP(opr1, nval->getType(), "pownI2F"); 1065 } 1066 nval = B.CreateFMul(opr1, nval, "__ylogx"); 1067 nval = CreateCallEx(B,ExpExpr, nval, "__exp2"); 1068 1069 if (needcopysign) { 1070 Value *opr_n; 1071 Type* rTy = opr0->getType(); 1072 Type* nTyS = eltType->isDoubleTy() ? B.getInt64Ty() : B.getInt32Ty(); 1073 Type *nTy = nTyS; 1074 if (const auto *vTy = dyn_cast<FixedVectorType>(rTy)) 1075 nTy = FixedVectorType::get(nTyS, vTy); 1076 unsigned size = nTy->getScalarSizeInBits(); 1077 opr_n = CI->getArgOperand(1); 1078 if (opr_n->getType()->isIntegerTy()) 1079 opr_n = B.CreateZExtOrBitCast(opr_n, nTy, "__ytou"); 1080 else 1081 opr_n = B.CreateFPToSI(opr1, nTy, "__ytou"); 1082 1083 Value *sign = B.CreateShl(opr_n, size-1, "__yeven"); 1084 sign = B.CreateAnd(B.CreateBitCast(opr0, nTy), sign, "__pow_sign"); 1085 nval = B.CreateOr(B.CreateBitCast(nval, nTy), sign); 1086 nval = B.CreateBitCast(nval, opr0->getType()); 1087 } 1088 1089 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " 1090 << "exp2(" << *opr1 << " * log2(" << *opr0 << "))\n"); 1091 replaceCall(nval); 1092 1093 return true; 1094 } 1095 1096 bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B, 1097 const FuncInfo &FInfo) { 1098 Value *opr0 = CI->getArgOperand(0); 1099 Value *opr1 = CI->getArgOperand(1); 1100 1101 ConstantInt *CINT = dyn_cast<ConstantInt>(opr1); 1102 if (!CINT) { 1103 return false; 1104 } 1105 int ci_opr1 = (int)CINT->getSExtValue(); 1106 if (ci_opr1 == 1) { // rootn(x, 1) = x 1107 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n"); 1108 replaceCall(opr0); 1109 return true; 1110 } 1111 if (ci_opr1 == 2) { // rootn(x, 2) = sqrt(x) 1112 Module *M = CI->getModule(); 1113 if (FunctionCallee FPExpr = 1114 getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) { 1115 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> sqrt(" << *opr0 << ")\n"); 1116 Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2sqrt"); 1117 replaceCall(nval); 1118 return true; 1119 } 1120 } else if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x) 1121 Module *M = CI->getModule(); 1122 if (FunctionCallee FPExpr = 1123 getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT, FInfo))) { 1124 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> cbrt(" << *opr0 << ")\n"); 1125 Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt"); 1126 replaceCall(nval); 1127 return true; 1128 } 1129 } else if (ci_opr1 == -1) { // rootn(x, -1) = 1.0/x 1130 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1.0 / " << *opr0 << "\n"); 1131 Value *nval = B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0), 1132 opr0, 1133 "__rootn2div"); 1134 replaceCall(nval); 1135 return true; 1136 } else if (ci_opr1 == -2) { // rootn(x, -2) = rsqrt(x) 1137 Module *M = CI->getModule(); 1138 if (FunctionCallee FPExpr = 1139 getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_RSQRT, FInfo))) { 1140 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> rsqrt(" << *opr0 1141 << ")\n"); 1142 Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2rsqrt"); 1143 replaceCall(nval); 1144 return true; 1145 } 1146 } 1147 return false; 1148 } 1149 1150 bool AMDGPULibCalls::fold_fma_mad(CallInst *CI, IRBuilder<> &B, 1151 const FuncInfo &FInfo) { 1152 Value *opr0 = CI->getArgOperand(0); 1153 Value *opr1 = CI->getArgOperand(1); 1154 Value *opr2 = CI->getArgOperand(2); 1155 1156 ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0); 1157 ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1); 1158 if ((CF0 && CF0->isZero()) || (CF1 && CF1->isZero())) { 1159 // fma/mad(a, b, c) = c if a=0 || b=0 1160 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr2 << "\n"); 1161 replaceCall(opr2); 1162 return true; 1163 } 1164 if (CF0 && CF0->isExactlyValue(1.0f)) { 1165 // fma/mad(a, b, c) = b+c if a=1 1166 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr1 << " + " << *opr2 1167 << "\n"); 1168 Value *nval = B.CreateFAdd(opr1, opr2, "fmaadd"); 1169 replaceCall(nval); 1170 return true; 1171 } 1172 if (CF1 && CF1->isExactlyValue(1.0f)) { 1173 // fma/mad(a, b, c) = a+c if b=1 1174 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " + " << *opr2 1175 << "\n"); 1176 Value *nval = B.CreateFAdd(opr0, opr2, "fmaadd"); 1177 replaceCall(nval); 1178 return true; 1179 } 1180 if (ConstantFP *CF = dyn_cast<ConstantFP>(opr2)) { 1181 if (CF->isZero()) { 1182 // fma/mad(a, b, c) = a*b if c=0 1183 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * " 1184 << *opr1 << "\n"); 1185 Value *nval = B.CreateFMul(opr0, opr1, "fmamul"); 1186 replaceCall(nval); 1187 return true; 1188 } 1189 } 1190 1191 return false; 1192 } 1193 1194 // Get a scalar native builtin single argument FP function 1195 FunctionCallee AMDGPULibCalls::getNativeFunction(Module *M, 1196 const FuncInfo &FInfo) { 1197 if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId())) 1198 return nullptr; 1199 FuncInfo nf = FInfo; 1200 nf.setPrefix(AMDGPULibFunc::NATIVE); 1201 return getFunction(M, nf); 1202 } 1203 1204 // fold sqrt -> native_sqrt (x) 1205 bool AMDGPULibCalls::fold_sqrt(CallInst *CI, IRBuilder<> &B, 1206 const FuncInfo &FInfo) { 1207 if (getArgType(FInfo) == AMDGPULibFunc::F32 && (getVecSize(FInfo) == 1) && 1208 (FInfo.getPrefix() != AMDGPULibFunc::NATIVE)) { 1209 if (FunctionCallee FPExpr = getNativeFunction( 1210 CI->getModule(), AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) { 1211 Value *opr0 = CI->getArgOperand(0); 1212 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " 1213 << "sqrt(" << *opr0 << ")\n"); 1214 Value *nval = CreateCallEx(B,FPExpr, opr0, "__sqrt"); 1215 replaceCall(nval); 1216 return true; 1217 } 1218 } 1219 return false; 1220 } 1221 1222 // fold sin, cos -> sincos. 1223 bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B, 1224 AliasAnalysis *AA) { 1225 AMDGPULibFunc fInfo; 1226 if (!AMDGPULibFunc::parse(CI->getCalledFunction()->getName(), fInfo)) 1227 return false; 1228 1229 assert(fInfo.getId() == AMDGPULibFunc::EI_SIN || 1230 fInfo.getId() == AMDGPULibFunc::EI_COS); 1231 bool const isSin = fInfo.getId() == AMDGPULibFunc::EI_SIN; 1232 1233 Value *CArgVal = CI->getArgOperand(0); 1234 BasicBlock * const CBB = CI->getParent(); 1235 1236 int const MaxScan = 30; 1237 bool Changed = false; 1238 1239 { // fold in load value. 1240 LoadInst *LI = dyn_cast<LoadInst>(CArgVal); 1241 if (LI && LI->getParent() == CBB) { 1242 BasicBlock::iterator BBI = LI->getIterator(); 1243 Value *AvailableVal = FindAvailableLoadedValue(LI, CBB, BBI, MaxScan, AA); 1244 if (AvailableVal) { 1245 Changed = true; 1246 CArgVal->replaceAllUsesWith(AvailableVal); 1247 if (CArgVal->getNumUses() == 0) 1248 LI->eraseFromParent(); 1249 CArgVal = CI->getArgOperand(0); 1250 } 1251 } 1252 } 1253 1254 Module *M = CI->getModule(); 1255 fInfo.setId(isSin ? AMDGPULibFunc::EI_COS : AMDGPULibFunc::EI_SIN); 1256 std::string const PairName = fInfo.mangle(); 1257 1258 CallInst *UI = nullptr; 1259 for (User* U : CArgVal->users()) { 1260 CallInst *XI = dyn_cast_or_null<CallInst>(U); 1261 if (!XI || XI == CI || XI->getParent() != CBB) 1262 continue; 1263 1264 Function *UCallee = XI->getCalledFunction(); 1265 if (!UCallee || !UCallee->getName().equals(PairName)) 1266 continue; 1267 1268 BasicBlock::iterator BBI = CI->getIterator(); 1269 if (BBI == CI->getParent()->begin()) 1270 break; 1271 --BBI; 1272 for (int I = MaxScan; I > 0 && BBI != CBB->begin(); --BBI, --I) { 1273 if (cast<Instruction>(BBI) == XI) { 1274 UI = XI; 1275 break; 1276 } 1277 } 1278 if (UI) break; 1279 } 1280 1281 if (!UI) 1282 return Changed; 1283 1284 // Merge the sin and cos. 1285 1286 // for OpenCL 2.0 we have only generic implementation of sincos 1287 // function. 1288 AMDGPULibFunc nf(AMDGPULibFunc::EI_SINCOS, fInfo); 1289 nf.getLeads()[0].PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::FLAT_ADDRESS); 1290 FunctionCallee Fsincos = getFunction(M, nf); 1291 if (!Fsincos) 1292 return Changed; 1293 1294 BasicBlock::iterator ItOld = B.GetInsertPoint(); 1295 AllocaInst *Alloc = insertAlloca(UI, B, "__sincos_"); 1296 B.SetInsertPoint(UI); 1297 1298 Value *P = Alloc; 1299 Type *PTy = Fsincos.getFunctionType()->getParamType(1); 1300 // The allocaInst allocates the memory in private address space. This need 1301 // to be bitcasted to point to the address space of cos pointer type. 1302 // In OpenCL 2.0 this is generic, while in 1.2 that is private. 1303 if (PTy->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) 1304 P = B.CreateAddrSpaceCast(Alloc, PTy); 1305 CallInst *Call = CreateCallEx2(B, Fsincos, UI->getArgOperand(0), P); 1306 1307 LLVM_DEBUG(errs() << "AMDIC: fold_sincos (" << *CI << ", " << *UI << ") with " 1308 << *Call << "\n"); 1309 1310 if (!isSin) { // CI->cos, UI->sin 1311 B.SetInsertPoint(&*ItOld); 1312 UI->replaceAllUsesWith(&*Call); 1313 Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc); 1314 CI->replaceAllUsesWith(Reload); 1315 UI->eraseFromParent(); 1316 CI->eraseFromParent(); 1317 } else { // CI->sin, UI->cos 1318 Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc); 1319 UI->replaceAllUsesWith(Reload); 1320 CI->replaceAllUsesWith(Call); 1321 UI->eraseFromParent(); 1322 CI->eraseFromParent(); 1323 } 1324 return true; 1325 } 1326 1327 bool AMDGPULibCalls::fold_wavefrontsize(CallInst *CI, IRBuilder<> &B) { 1328 if (!TM) 1329 return false; 1330 1331 StringRef CPU = TM->getTargetCPU(); 1332 StringRef Features = TM->getTargetFeatureString(); 1333 if ((CPU.empty() || CPU.equals_insensitive("generic")) && 1334 (Features.empty() || !Features.contains_insensitive("wavefrontsize"))) 1335 return false; 1336 1337 Function *F = CI->getParent()->getParent(); 1338 const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(*F); 1339 unsigned N = ST.getWavefrontSize(); 1340 1341 LLVM_DEBUG(errs() << "AMDIC: fold_wavefrontsize (" << *CI << ") with " 1342 << N << "\n"); 1343 1344 CI->replaceAllUsesWith(ConstantInt::get(B.getInt32Ty(), N)); 1345 CI->eraseFromParent(); 1346 return true; 1347 } 1348 1349 // Get insertion point at entry. 1350 BasicBlock::iterator AMDGPULibCalls::getEntryIns(CallInst * UI) { 1351 Function * Func = UI->getParent()->getParent(); 1352 BasicBlock * BB = &Func->getEntryBlock(); 1353 assert(BB && "Entry block not found!"); 1354 BasicBlock::iterator ItNew = BB->begin(); 1355 return ItNew; 1356 } 1357 1358 // Insert a AllocsInst at the beginning of function entry block. 1359 AllocaInst* AMDGPULibCalls::insertAlloca(CallInst *UI, IRBuilder<> &B, 1360 const char *prefix) { 1361 BasicBlock::iterator ItNew = getEntryIns(UI); 1362 Function *UCallee = UI->getCalledFunction(); 1363 Type *RetType = UCallee->getReturnType(); 1364 B.SetInsertPoint(&*ItNew); 1365 AllocaInst *Alloc = 1366 B.CreateAlloca(RetType, nullptr, std::string(prefix) + UI->getName()); 1367 Alloc->setAlignment( 1368 Align(UCallee->getParent()->getDataLayout().getTypeAllocSize(RetType))); 1369 return Alloc; 1370 } 1371 1372 bool AMDGPULibCalls::evaluateScalarMathFunc(const FuncInfo &FInfo, 1373 double& Res0, double& Res1, 1374 Constant *copr0, Constant *copr1, 1375 Constant *copr2) { 1376 // By default, opr0/opr1/opr3 holds values of float/double type. 1377 // If they are not float/double, each function has to its 1378 // operand separately. 1379 double opr0=0.0, opr1=0.0, opr2=0.0; 1380 ConstantFP *fpopr0 = dyn_cast_or_null<ConstantFP>(copr0); 1381 ConstantFP *fpopr1 = dyn_cast_or_null<ConstantFP>(copr1); 1382 ConstantFP *fpopr2 = dyn_cast_or_null<ConstantFP>(copr2); 1383 if (fpopr0) { 1384 opr0 = (getArgType(FInfo) == AMDGPULibFunc::F64) 1385 ? fpopr0->getValueAPF().convertToDouble() 1386 : (double)fpopr0->getValueAPF().convertToFloat(); 1387 } 1388 1389 if (fpopr1) { 1390 opr1 = (getArgType(FInfo) == AMDGPULibFunc::F64) 1391 ? fpopr1->getValueAPF().convertToDouble() 1392 : (double)fpopr1->getValueAPF().convertToFloat(); 1393 } 1394 1395 if (fpopr2) { 1396 opr2 = (getArgType(FInfo) == AMDGPULibFunc::F64) 1397 ? fpopr2->getValueAPF().convertToDouble() 1398 : (double)fpopr2->getValueAPF().convertToFloat(); 1399 } 1400 1401 switch (FInfo.getId()) { 1402 default : return false; 1403 1404 case AMDGPULibFunc::EI_ACOS: 1405 Res0 = acos(opr0); 1406 return true; 1407 1408 case AMDGPULibFunc::EI_ACOSH: 1409 // acosh(x) == log(x + sqrt(x*x - 1)) 1410 Res0 = log(opr0 + sqrt(opr0*opr0 - 1.0)); 1411 return true; 1412 1413 case AMDGPULibFunc::EI_ACOSPI: 1414 Res0 = acos(opr0) / MATH_PI; 1415 return true; 1416 1417 case AMDGPULibFunc::EI_ASIN: 1418 Res0 = asin(opr0); 1419 return true; 1420 1421 case AMDGPULibFunc::EI_ASINH: 1422 // asinh(x) == log(x + sqrt(x*x + 1)) 1423 Res0 = log(opr0 + sqrt(opr0*opr0 + 1.0)); 1424 return true; 1425 1426 case AMDGPULibFunc::EI_ASINPI: 1427 Res0 = asin(opr0) / MATH_PI; 1428 return true; 1429 1430 case AMDGPULibFunc::EI_ATAN: 1431 Res0 = atan(opr0); 1432 return true; 1433 1434 case AMDGPULibFunc::EI_ATANH: 1435 // atanh(x) == (log(x+1) - log(x-1))/2; 1436 Res0 = (log(opr0 + 1.0) - log(opr0 - 1.0))/2.0; 1437 return true; 1438 1439 case AMDGPULibFunc::EI_ATANPI: 1440 Res0 = atan(opr0) / MATH_PI; 1441 return true; 1442 1443 case AMDGPULibFunc::EI_CBRT: 1444 Res0 = (opr0 < 0.0) ? -pow(-opr0, 1.0/3.0) : pow(opr0, 1.0/3.0); 1445 return true; 1446 1447 case AMDGPULibFunc::EI_COS: 1448 Res0 = cos(opr0); 1449 return true; 1450 1451 case AMDGPULibFunc::EI_COSH: 1452 Res0 = cosh(opr0); 1453 return true; 1454 1455 case AMDGPULibFunc::EI_COSPI: 1456 Res0 = cos(MATH_PI * opr0); 1457 return true; 1458 1459 case AMDGPULibFunc::EI_EXP: 1460 Res0 = exp(opr0); 1461 return true; 1462 1463 case AMDGPULibFunc::EI_EXP2: 1464 Res0 = pow(2.0, opr0); 1465 return true; 1466 1467 case AMDGPULibFunc::EI_EXP10: 1468 Res0 = pow(10.0, opr0); 1469 return true; 1470 1471 case AMDGPULibFunc::EI_EXPM1: 1472 Res0 = exp(opr0) - 1.0; 1473 return true; 1474 1475 case AMDGPULibFunc::EI_LOG: 1476 Res0 = log(opr0); 1477 return true; 1478 1479 case AMDGPULibFunc::EI_LOG2: 1480 Res0 = log(opr0) / log(2.0); 1481 return true; 1482 1483 case AMDGPULibFunc::EI_LOG10: 1484 Res0 = log(opr0) / log(10.0); 1485 return true; 1486 1487 case AMDGPULibFunc::EI_RSQRT: 1488 Res0 = 1.0 / sqrt(opr0); 1489 return true; 1490 1491 case AMDGPULibFunc::EI_SIN: 1492 Res0 = sin(opr0); 1493 return true; 1494 1495 case AMDGPULibFunc::EI_SINH: 1496 Res0 = sinh(opr0); 1497 return true; 1498 1499 case AMDGPULibFunc::EI_SINPI: 1500 Res0 = sin(MATH_PI * opr0); 1501 return true; 1502 1503 case AMDGPULibFunc::EI_SQRT: 1504 Res0 = sqrt(opr0); 1505 return true; 1506 1507 case AMDGPULibFunc::EI_TAN: 1508 Res0 = tan(opr0); 1509 return true; 1510 1511 case AMDGPULibFunc::EI_TANH: 1512 Res0 = tanh(opr0); 1513 return true; 1514 1515 case AMDGPULibFunc::EI_TANPI: 1516 Res0 = tan(MATH_PI * opr0); 1517 return true; 1518 1519 case AMDGPULibFunc::EI_RECIP: 1520 Res0 = 1.0 / opr0; 1521 return true; 1522 1523 // two-arg functions 1524 case AMDGPULibFunc::EI_DIVIDE: 1525 Res0 = opr0 / opr1; 1526 return true; 1527 1528 case AMDGPULibFunc::EI_POW: 1529 case AMDGPULibFunc::EI_POWR: 1530 Res0 = pow(opr0, opr1); 1531 return true; 1532 1533 case AMDGPULibFunc::EI_POWN: { 1534 if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) { 1535 double val = (double)iopr1->getSExtValue(); 1536 Res0 = pow(opr0, val); 1537 return true; 1538 } 1539 return false; 1540 } 1541 1542 case AMDGPULibFunc::EI_ROOTN: { 1543 if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) { 1544 double val = (double)iopr1->getSExtValue(); 1545 Res0 = pow(opr0, 1.0 / val); 1546 return true; 1547 } 1548 return false; 1549 } 1550 1551 // with ptr arg 1552 case AMDGPULibFunc::EI_SINCOS: 1553 Res0 = sin(opr0); 1554 Res1 = cos(opr0); 1555 return true; 1556 1557 // three-arg functions 1558 case AMDGPULibFunc::EI_FMA: 1559 case AMDGPULibFunc::EI_MAD: 1560 Res0 = opr0 * opr1 + opr2; 1561 return true; 1562 } 1563 1564 return false; 1565 } 1566 1567 bool AMDGPULibCalls::evaluateCall(CallInst *aCI, const FuncInfo &FInfo) { 1568 int numArgs = (int)aCI->arg_size(); 1569 if (numArgs > 3) 1570 return false; 1571 1572 Constant *copr0 = nullptr; 1573 Constant *copr1 = nullptr; 1574 Constant *copr2 = nullptr; 1575 if (numArgs > 0) { 1576 if ((copr0 = dyn_cast<Constant>(aCI->getArgOperand(0))) == nullptr) 1577 return false; 1578 } 1579 1580 if (numArgs > 1) { 1581 if ((copr1 = dyn_cast<Constant>(aCI->getArgOperand(1))) == nullptr) { 1582 if (FInfo.getId() != AMDGPULibFunc::EI_SINCOS) 1583 return false; 1584 } 1585 } 1586 1587 if (numArgs > 2) { 1588 if ((copr2 = dyn_cast<Constant>(aCI->getArgOperand(2))) == nullptr) 1589 return false; 1590 } 1591 1592 // At this point, all arguments to aCI are constants. 1593 1594 // max vector size is 16, and sincos will generate two results. 1595 double DVal0[16], DVal1[16]; 1596 bool hasTwoResults = (FInfo.getId() == AMDGPULibFunc::EI_SINCOS); 1597 if (getVecSize(FInfo) == 1) { 1598 if (!evaluateScalarMathFunc(FInfo, DVal0[0], 1599 DVal1[0], copr0, copr1, copr2)) { 1600 return false; 1601 } 1602 } else { 1603 ConstantDataVector *CDV0 = dyn_cast_or_null<ConstantDataVector>(copr0); 1604 ConstantDataVector *CDV1 = dyn_cast_or_null<ConstantDataVector>(copr1); 1605 ConstantDataVector *CDV2 = dyn_cast_or_null<ConstantDataVector>(copr2); 1606 for (int i=0; i < getVecSize(FInfo); ++i) { 1607 Constant *celt0 = CDV0 ? CDV0->getElementAsConstant(i) : nullptr; 1608 Constant *celt1 = CDV1 ? CDV1->getElementAsConstant(i) : nullptr; 1609 Constant *celt2 = CDV2 ? CDV2->getElementAsConstant(i) : nullptr; 1610 if (!evaluateScalarMathFunc(FInfo, DVal0[i], 1611 DVal1[i], celt0, celt1, celt2)) { 1612 return false; 1613 } 1614 } 1615 } 1616 1617 LLVMContext &context = CI->getParent()->getParent()->getContext(); 1618 Constant *nval0, *nval1; 1619 if (getVecSize(FInfo) == 1) { 1620 nval0 = ConstantFP::get(CI->getType(), DVal0[0]); 1621 if (hasTwoResults) 1622 nval1 = ConstantFP::get(CI->getType(), DVal1[0]); 1623 } else { 1624 if (getArgType(FInfo) == AMDGPULibFunc::F32) { 1625 SmallVector <float, 0> FVal0, FVal1; 1626 for (int i=0; i < getVecSize(FInfo); ++i) 1627 FVal0.push_back((float)DVal0[i]); 1628 ArrayRef<float> tmp0(FVal0); 1629 nval0 = ConstantDataVector::get(context, tmp0); 1630 if (hasTwoResults) { 1631 for (int i=0; i < getVecSize(FInfo); ++i) 1632 FVal1.push_back((float)DVal1[i]); 1633 ArrayRef<float> tmp1(FVal1); 1634 nval1 = ConstantDataVector::get(context, tmp1); 1635 } 1636 } else { 1637 ArrayRef<double> tmp0(DVal0); 1638 nval0 = ConstantDataVector::get(context, tmp0); 1639 if (hasTwoResults) { 1640 ArrayRef<double> tmp1(DVal1); 1641 nval1 = ConstantDataVector::get(context, tmp1); 1642 } 1643 } 1644 } 1645 1646 if (hasTwoResults) { 1647 // sincos 1648 assert(FInfo.getId() == AMDGPULibFunc::EI_SINCOS && 1649 "math function with ptr arg not supported yet"); 1650 new StoreInst(nval1, aCI->getArgOperand(1), aCI); 1651 } 1652 1653 replaceCall(nval0); 1654 return true; 1655 } 1656 1657 // Public interface to the Simplify LibCalls pass. 1658 FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass(const TargetMachine *TM) { 1659 return new AMDGPUSimplifyLibCalls(TM); 1660 } 1661 1662 FunctionPass *llvm::createAMDGPUUseNativeCallsPass() { 1663 return new AMDGPUUseNativeCalls(); 1664 } 1665 1666 bool AMDGPUSimplifyLibCalls::runOnFunction(Function &F) { 1667 if (skipFunction(F)) 1668 return false; 1669 1670 bool Changed = false; 1671 auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 1672 1673 LLVM_DEBUG(dbgs() << "AMDIC: process function "; 1674 F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';); 1675 1676 for (auto &BB : F) { 1677 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) { 1678 // Ignore non-calls. 1679 CallInst *CI = dyn_cast<CallInst>(I); 1680 ++I; 1681 // Ignore intrinsics that do not become real instructions. 1682 if (!CI || isa<DbgInfoIntrinsic>(CI) || CI->isLifetimeStartOrEnd()) 1683 continue; 1684 1685 // Ignore indirect calls. 1686 Function *Callee = CI->getCalledFunction(); 1687 if (Callee == nullptr) 1688 continue; 1689 1690 LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n"; 1691 dbgs().flush()); 1692 if(Simplifier.fold(CI, AA)) 1693 Changed = true; 1694 } 1695 } 1696 return Changed; 1697 } 1698 1699 PreservedAnalyses AMDGPUSimplifyLibCallsPass::run(Function &F, 1700 FunctionAnalysisManager &AM) { 1701 AMDGPULibCalls Simplifier(&TM); 1702 Simplifier.initNativeFuncs(); 1703 1704 bool Changed = false; 1705 auto AA = &AM.getResult<AAManager>(F); 1706 1707 LLVM_DEBUG(dbgs() << "AMDIC: process function "; 1708 F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';); 1709 1710 for (auto &BB : F) { 1711 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) { 1712 // Ignore non-calls. 1713 CallInst *CI = dyn_cast<CallInst>(I); 1714 ++I; 1715 // Ignore intrinsics that do not become real instructions. 1716 if (!CI || isa<DbgInfoIntrinsic>(CI) || CI->isLifetimeStartOrEnd()) 1717 continue; 1718 1719 // Ignore indirect calls. 1720 Function *Callee = CI->getCalledFunction(); 1721 if (Callee == nullptr) 1722 continue; 1723 1724 LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n"; 1725 dbgs().flush()); 1726 if (Simplifier.fold(CI, AA)) 1727 Changed = true; 1728 } 1729 } 1730 return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); 1731 } 1732 1733 bool AMDGPUUseNativeCalls::runOnFunction(Function &F) { 1734 if (skipFunction(F) || UseNative.empty()) 1735 return false; 1736 1737 bool Changed = false; 1738 for (auto &BB : F) { 1739 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) { 1740 // Ignore non-calls. 1741 CallInst *CI = dyn_cast<CallInst>(I); 1742 ++I; 1743 if (!CI) continue; 1744 1745 // Ignore indirect calls. 1746 Function *Callee = CI->getCalledFunction(); 1747 if (Callee == nullptr) 1748 continue; 1749 1750 if (Simplifier.useNative(CI)) 1751 Changed = true; 1752 } 1753 } 1754 return Changed; 1755 } 1756 1757 PreservedAnalyses AMDGPUUseNativeCallsPass::run(Function &F, 1758 FunctionAnalysisManager &AM) { 1759 if (UseNative.empty()) 1760 return PreservedAnalyses::all(); 1761 1762 AMDGPULibCalls Simplifier; 1763 Simplifier.initNativeFuncs(); 1764 1765 bool Changed = false; 1766 for (auto &BB : F) { 1767 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) { 1768 // Ignore non-calls. 1769 CallInst *CI = dyn_cast<CallInst>(I); 1770 ++I; 1771 if (!CI) 1772 continue; 1773 1774 // Ignore indirect calls. 1775 Function *Callee = CI->getCalledFunction(); 1776 if (Callee == nullptr) 1777 continue; 1778 1779 if (Simplifier.useNative(CI)) 1780 Changed = true; 1781 } 1782 } 1783 return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); 1784 } 1785