1 //===--- PartiallyInlineLibCalls.cpp - Partially inline libcalls ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass tries to partially inline the fast path of well-known library
10 // functions, such as using square-root instructions for cases where sqrt()
11 // does not need to set errno.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h"
16 #include "llvm/Analysis/DomTreeUpdater.h"
17 #include "llvm/Analysis/TargetLibraryInfo.h"
18 #include "llvm/Analysis/TargetTransformInfo.h"
19 #include "llvm/IR/Dominators.h"
20 #include "llvm/IR/IRBuilder.h"
21 #include "llvm/InitializePasses.h"
22 #include "llvm/Support/DebugCounter.h"
23 #include "llvm/Transforms/Scalar.h"
24 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
25 #include <optional>
26
27 using namespace llvm;
28
29 #define DEBUG_TYPE "partially-inline-libcalls"
30
31 DEBUG_COUNTER(PILCounter, "partially-inline-libcalls-transform",
32 "Controls transformations in partially-inline-libcalls");
33
optimizeSQRT(CallInst * Call,Function * CalledFunc,BasicBlock & CurrBB,Function::iterator & BB,const TargetTransformInfo * TTI,DomTreeUpdater * DTU)34 static bool optimizeSQRT(CallInst *Call, Function *CalledFunc,
35 BasicBlock &CurrBB, Function::iterator &BB,
36 const TargetTransformInfo *TTI, DomTreeUpdater *DTU) {
37 // There is no need to change the IR, since backend will emit sqrt
38 // instruction if the call has already been marked read-only.
39 if (Call->onlyReadsMemory())
40 return false;
41
42 if (!DebugCounter::shouldExecute(PILCounter))
43 return false;
44
45 // Do the following transformation:
46 //
47 // (before)
48 // dst = sqrt(src)
49 //
50 // (after)
51 // v0 = sqrt_noreadmem(src) # native sqrt instruction.
52 // [if (v0 is a NaN) || if (src < 0)]
53 // v1 = sqrt(src) # library call.
54 // dst = phi(v0, v1)
55 //
56
57 Type *Ty = Call->getType();
58 IRBuilder<> Builder(Call->getNextNode());
59
60 // Split CurrBB right after the call, create a 'then' block (that branches
61 // back to split-off tail of CurrBB) into which we'll insert a libcall.
62 Instruction *LibCallTerm = SplitBlockAndInsertIfThen(
63 Builder.getTrue(), Call->getNextNode(), /*Unreachable=*/false,
64 /*BranchWeights*/ nullptr, DTU);
65
66 auto *CurrBBTerm = cast<BranchInst>(CurrBB.getTerminator());
67 // We want an 'else' block though, not a 'then' block.
68 cast<BranchInst>(CurrBBTerm)->swapSuccessors();
69
70 // Create phi that will merge results of either sqrt and replace all uses.
71 BasicBlock *JoinBB = LibCallTerm->getSuccessor(0);
72 JoinBB->setName(CurrBB.getName() + ".split");
73 Builder.SetInsertPoint(JoinBB, JoinBB->begin());
74 PHINode *Phi = Builder.CreatePHI(Ty, 2);
75 Call->replaceAllUsesWith(Phi);
76
77 // Finally, insert the libcall into 'else' block.
78 BasicBlock *LibCallBB = LibCallTerm->getParent();
79 LibCallBB->setName("call.sqrt");
80 Builder.SetInsertPoint(LibCallTerm);
81 Instruction *LibCall = Call->clone();
82 Builder.Insert(LibCall);
83
84 // Add memory(none) attribute, so that the backend can use a native sqrt
85 // instruction for this call.
86 Call->setDoesNotAccessMemory();
87
88 // Insert a FP compare instruction and use it as the CurrBB branch condition.
89 Builder.SetInsertPoint(CurrBBTerm);
90 Value *FCmp = TTI->isFCmpOrdCheaperThanFCmpZero(Ty)
91 ? Builder.CreateFCmpORD(Call, Call)
92 : Builder.CreateFCmpOGE(Call->getOperand(0),
93 ConstantFP::get(Ty, 0.0));
94 CurrBBTerm->setCondition(FCmp);
95
96 // Add phi operands.
97 Phi->addIncoming(Call, &CurrBB);
98 Phi->addIncoming(LibCall, LibCallBB);
99
100 BB = JoinBB->getIterator();
101 return true;
102 }
103
runPartiallyInlineLibCalls(Function & F,TargetLibraryInfo * TLI,const TargetTransformInfo * TTI,DominatorTree * DT)104 static bool runPartiallyInlineLibCalls(Function &F, TargetLibraryInfo *TLI,
105 const TargetTransformInfo *TTI,
106 DominatorTree *DT) {
107 std::optional<DomTreeUpdater> DTU;
108 if (DT)
109 DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);
110
111 bool Changed = false;
112
113 Function::iterator CurrBB;
114 for (Function::iterator BB = F.begin(), BE = F.end(); BB != BE;) {
115 CurrBB = BB++;
116
117 for (BasicBlock::iterator II = CurrBB->begin(), IE = CurrBB->end();
118 II != IE; ++II) {
119 CallInst *Call = dyn_cast<CallInst>(&*II);
120 Function *CalledFunc;
121
122 if (!Call || !(CalledFunc = Call->getCalledFunction()))
123 continue;
124
125 if (Call->isNoBuiltin() || Call->isStrictFP())
126 continue;
127
128 if (Call->isMustTailCall())
129 continue;
130
131 // Skip if function either has local linkage or is not a known library
132 // function.
133 LibFunc LF;
134 if (CalledFunc->hasLocalLinkage() ||
135 !TLI->getLibFunc(*CalledFunc, LF) || !TLI->has(LF))
136 continue;
137
138 switch (LF) {
139 case LibFunc_sqrtf:
140 case LibFunc_sqrt:
141 if (TTI->haveFastSqrt(Call->getType()) &&
142 optimizeSQRT(Call, CalledFunc, *CurrBB, BB, TTI,
143 DTU ? &*DTU : nullptr))
144 break;
145 continue;
146 default:
147 continue;
148 }
149
150 Changed = true;
151 break;
152 }
153 }
154
155 return Changed;
156 }
157
158 PreservedAnalyses
run(Function & F,FunctionAnalysisManager & AM)159 PartiallyInlineLibCallsPass::run(Function &F, FunctionAnalysisManager &AM) {
160 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
161 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
162 auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
163 if (!runPartiallyInlineLibCalls(F, &TLI, &TTI, DT))
164 return PreservedAnalyses::all();
165 PreservedAnalyses PA;
166 PA.preserve<DominatorTreeAnalysis>();
167 return PA;
168 }
169
170 namespace {
171 class PartiallyInlineLibCallsLegacyPass : public FunctionPass {
172 public:
173 static char ID;
174
PartiallyInlineLibCallsLegacyPass()175 PartiallyInlineLibCallsLegacyPass() : FunctionPass(ID) {
176 initializePartiallyInlineLibCallsLegacyPassPass(
177 *PassRegistry::getPassRegistry());
178 }
179
getAnalysisUsage(AnalysisUsage & AU) const180 void getAnalysisUsage(AnalysisUsage &AU) const override {
181 AU.addRequired<TargetLibraryInfoWrapperPass>();
182 AU.addRequired<TargetTransformInfoWrapperPass>();
183 AU.addPreserved<DominatorTreeWrapperPass>();
184 FunctionPass::getAnalysisUsage(AU);
185 }
186
runOnFunction(Function & F)187 bool runOnFunction(Function &F) override {
188 if (skipFunction(F))
189 return false;
190
191 TargetLibraryInfo *TLI =
192 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
193 const TargetTransformInfo *TTI =
194 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
195 DominatorTree *DT = nullptr;
196 if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
197 DT = &DTWP->getDomTree();
198 return runPartiallyInlineLibCalls(F, TLI, TTI, DT);
199 }
200 };
201 }
202
203 char PartiallyInlineLibCallsLegacyPass::ID = 0;
204 INITIALIZE_PASS_BEGIN(PartiallyInlineLibCallsLegacyPass,
205 "partially-inline-libcalls",
206 "Partially inline calls to library functions", false,
207 false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)208 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
209 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
210 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
211 INITIALIZE_PASS_END(PartiallyInlineLibCallsLegacyPass,
212 "partially-inline-libcalls",
213 "Partially inline calls to library functions", false, false)
214
215 FunctionPass *llvm::createPartiallyInlineLibCallsPass() {
216 return new PartiallyInlineLibCallsLegacyPass();
217 }
218