1 //===--- PartiallyInlineLibCalls.cpp - Partially inline libcalls ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass tries to partially inline the fast path of well-known library 10 // functions, such as using square-root instructions for cases where sqrt() 11 // does not need to set errno. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h" 16 #include "llvm/Analysis/DomTreeUpdater.h" 17 #include "llvm/Analysis/TargetLibraryInfo.h" 18 #include "llvm/Analysis/TargetTransformInfo.h" 19 #include "llvm/IR/Dominators.h" 20 #include "llvm/IR/IRBuilder.h" 21 #include "llvm/InitializePasses.h" 22 #include "llvm/Support/DebugCounter.h" 23 #include "llvm/Transforms/Scalar.h" 24 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 25 #include <optional> 26 27 using namespace llvm; 28 29 #define DEBUG_TYPE "partially-inline-libcalls" 30 31 DEBUG_COUNTER(PILCounter, "partially-inline-libcalls-transform", 32 "Controls transformations in partially-inline-libcalls"); 33 34 static bool optimizeSQRT(CallInst *Call, Function *CalledFunc, 35 BasicBlock &CurrBB, Function::iterator &BB, 36 const TargetTransformInfo *TTI, DomTreeUpdater *DTU) { 37 // There is no need to change the IR, since backend will emit sqrt 38 // instruction if the call has already been marked read-only. 39 if (Call->onlyReadsMemory()) 40 return false; 41 42 if (!DebugCounter::shouldExecute(PILCounter)) 43 return false; 44 45 // Do the following transformation: 46 // 47 // (before) 48 // dst = sqrt(src) 49 // 50 // (after) 51 // v0 = sqrt_noreadmem(src) # native sqrt instruction. 52 // [if (v0 is a NaN) || if (src < 0)] 53 // v1 = sqrt(src) # library call. 54 // dst = phi(v0, v1) 55 // 56 57 Type *Ty = Call->getType(); 58 IRBuilder<> Builder(Call->getNextNode()); 59 60 // Split CurrBB right after the call, create a 'then' block (that branches 61 // back to split-off tail of CurrBB) into which we'll insert a libcall. 62 Instruction *LibCallTerm = SplitBlockAndInsertIfThen( 63 Builder.getTrue(), Call->getNextNode(), /*Unreachable=*/false, 64 /*BranchWeights*/ nullptr, DTU); 65 66 auto *CurrBBTerm = cast<BranchInst>(CurrBB.getTerminator()); 67 // We want an 'else' block though, not a 'then' block. 68 cast<BranchInst>(CurrBBTerm)->swapSuccessors(); 69 70 // Create phi that will merge results of either sqrt and replace all uses. 71 BasicBlock *JoinBB = LibCallTerm->getSuccessor(0); 72 JoinBB->setName(CurrBB.getName() + ".split"); 73 Builder.SetInsertPoint(JoinBB, JoinBB->begin()); 74 PHINode *Phi = Builder.CreatePHI(Ty, 2); 75 Call->replaceAllUsesWith(Phi); 76 77 // Finally, insert the libcall into 'else' block. 78 BasicBlock *LibCallBB = LibCallTerm->getParent(); 79 LibCallBB->setName("call.sqrt"); 80 Builder.SetInsertPoint(LibCallTerm); 81 Instruction *LibCall = Call->clone(); 82 Builder.Insert(LibCall); 83 84 // Add memory(none) attribute, so that the backend can use a native sqrt 85 // instruction for this call. 86 Call->setDoesNotAccessMemory(); 87 88 // Insert a FP compare instruction and use it as the CurrBB branch condition. 89 Builder.SetInsertPoint(CurrBBTerm); 90 Value *FCmp = TTI->isFCmpOrdCheaperThanFCmpZero(Ty) 91 ? Builder.CreateFCmpORD(Call, Call) 92 : Builder.CreateFCmpOGE(Call->getOperand(0), 93 ConstantFP::get(Ty, 0.0)); 94 CurrBBTerm->setCondition(FCmp); 95 96 // Add phi operands. 97 Phi->addIncoming(Call, &CurrBB); 98 Phi->addIncoming(LibCall, LibCallBB); 99 100 BB = JoinBB->getIterator(); 101 return true; 102 } 103 104 static bool runPartiallyInlineLibCalls(Function &F, TargetLibraryInfo *TLI, 105 const TargetTransformInfo *TTI, 106 DominatorTree *DT) { 107 std::optional<DomTreeUpdater> DTU; 108 if (DT) 109 DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy); 110 111 bool Changed = false; 112 113 Function::iterator CurrBB; 114 for (Function::iterator BB = F.begin(), BE = F.end(); BB != BE;) { 115 CurrBB = BB++; 116 117 for (BasicBlock::iterator II = CurrBB->begin(), IE = CurrBB->end(); 118 II != IE; ++II) { 119 CallInst *Call = dyn_cast<CallInst>(&*II); 120 Function *CalledFunc; 121 122 if (!Call || !(CalledFunc = Call->getCalledFunction())) 123 continue; 124 125 if (Call->isNoBuiltin() || Call->isStrictFP()) 126 continue; 127 128 if (Call->isMustTailCall()) 129 continue; 130 131 // Skip if function either has local linkage or is not a known library 132 // function. 133 LibFunc LF; 134 if (CalledFunc->hasLocalLinkage() || 135 !TLI->getLibFunc(*CalledFunc, LF) || !TLI->has(LF)) 136 continue; 137 138 switch (LF) { 139 case LibFunc_sqrtf: 140 case LibFunc_sqrt: 141 if (TTI->haveFastSqrt(Call->getType()) && 142 optimizeSQRT(Call, CalledFunc, *CurrBB, BB, TTI, 143 DTU ? &*DTU : nullptr)) 144 break; 145 continue; 146 default: 147 continue; 148 } 149 150 Changed = true; 151 break; 152 } 153 } 154 155 return Changed; 156 } 157 158 PreservedAnalyses 159 PartiallyInlineLibCallsPass::run(Function &F, FunctionAnalysisManager &AM) { 160 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); 161 auto &TTI = AM.getResult<TargetIRAnalysis>(F); 162 auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F); 163 if (!runPartiallyInlineLibCalls(F, &TLI, &TTI, DT)) 164 return PreservedAnalyses::all(); 165 PreservedAnalyses PA; 166 PA.preserve<DominatorTreeAnalysis>(); 167 return PA; 168 } 169 170 namespace { 171 class PartiallyInlineLibCallsLegacyPass : public FunctionPass { 172 public: 173 static char ID; 174 175 PartiallyInlineLibCallsLegacyPass() : FunctionPass(ID) { 176 initializePartiallyInlineLibCallsLegacyPassPass( 177 *PassRegistry::getPassRegistry()); 178 } 179 180 void getAnalysisUsage(AnalysisUsage &AU) const override { 181 AU.addRequired<TargetLibraryInfoWrapperPass>(); 182 AU.addRequired<TargetTransformInfoWrapperPass>(); 183 AU.addPreserved<DominatorTreeWrapperPass>(); 184 FunctionPass::getAnalysisUsage(AU); 185 } 186 187 bool runOnFunction(Function &F) override { 188 if (skipFunction(F)) 189 return false; 190 191 TargetLibraryInfo *TLI = 192 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 193 const TargetTransformInfo *TTI = 194 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 195 DominatorTree *DT = nullptr; 196 if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>()) 197 DT = &DTWP->getDomTree(); 198 return runPartiallyInlineLibCalls(F, TLI, TTI, DT); 199 } 200 }; 201 } 202 203 char PartiallyInlineLibCallsLegacyPass::ID = 0; 204 INITIALIZE_PASS_BEGIN(PartiallyInlineLibCallsLegacyPass, 205 "partially-inline-libcalls", 206 "Partially inline calls to library functions", false, 207 false) 208 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 209 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 210 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 211 INITIALIZE_PASS_END(PartiallyInlineLibCallsLegacyPass, 212 "partially-inline-libcalls", 213 "Partially inline calls to library functions", false, false) 214 215 FunctionPass *llvm::createPartiallyInlineLibCallsPass() { 216 return new PartiallyInlineLibCallsLegacyPass(); 217 } 218