1 //===- ModuleInliner.cpp - Code related to module inliner -----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the mechanics required to implement inlining without 10 // missing any calls in the module level. It doesn't need any infromation about 11 // SCC or call graph, which is different from the SCC inliner. The decisions of 12 // which calls are profitable to inline are implemented elsewhere. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/Transforms/IPO/ModuleInliner.h" 17 #include "llvm/ADT/DenseMap.h" 18 #include "llvm/ADT/ScopeExit.h" 19 #include "llvm/ADT/SetVector.h" 20 #include "llvm/ADT/SmallPtrSet.h" 21 #include "llvm/ADT/SmallVector.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/Analysis/AssumptionCache.h" 24 #include "llvm/Analysis/BlockFrequencyInfo.h" 25 #include "llvm/Analysis/GlobalsModRef.h" 26 #include "llvm/Analysis/InlineAdvisor.h" 27 #include "llvm/Analysis/InlineCost.h" 28 #include "llvm/Analysis/InlineOrder.h" 29 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 30 #include "llvm/Analysis/ProfileSummaryInfo.h" 31 #include "llvm/Analysis/TargetLibraryInfo.h" 32 #include "llvm/Analysis/TargetTransformInfo.h" 33 #include "llvm/IR/DebugLoc.h" 34 #include "llvm/IR/DiagnosticInfo.h" 35 #include "llvm/IR/Function.h" 36 #include "llvm/IR/InstIterator.h" 37 #include "llvm/IR/Instruction.h" 38 #include "llvm/IR/Instructions.h" 39 #include "llvm/IR/IntrinsicInst.h" 40 #include "llvm/IR/Metadata.h" 41 #include "llvm/IR/Module.h" 42 #include "llvm/IR/PassManager.h" 43 #include "llvm/IR/User.h" 44 #include "llvm/IR/Value.h" 45 #include "llvm/Support/CommandLine.h" 46 #include "llvm/Support/Debug.h" 47 #include "llvm/Support/raw_ostream.h" 48 #include "llvm/Transforms/Utils/CallPromotionUtils.h" 49 #include "llvm/Transforms/Utils/Cloning.h" 50 #include "llvm/Transforms/Utils/Local.h" 51 #include "llvm/Transforms/Utils/ModuleUtils.h" 52 #include <cassert> 53 #include <functional> 54 55 using namespace llvm; 56 57 #define DEBUG_TYPE "module-inline" 58 59 STATISTIC(NumInlined, "Number of functions inlined"); 60 STATISTIC(NumDeleted, "Number of functions deleted because all callers found"); 61 62 static cl::opt<bool> InlineEnablePriorityOrder( 63 "module-inline-enable-priority-order", cl::Hidden, cl::init(true), 64 cl::desc("Enable the priority inline order for the module inliner")); 65 66 /// Return true if the specified inline history ID 67 /// indicates an inline history that includes the specified function. 68 static bool inlineHistoryIncludes( 69 Function *F, int InlineHistoryID, 70 const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory) { 71 while (InlineHistoryID != -1) { 72 assert(unsigned(InlineHistoryID) < InlineHistory.size() && 73 "Invalid inline history ID"); 74 if (InlineHistory[InlineHistoryID].first == F) 75 return true; 76 InlineHistoryID = InlineHistory[InlineHistoryID].second; 77 } 78 return false; 79 } 80 81 InlineAdvisor &ModuleInlinerPass::getAdvisor(const ModuleAnalysisManager &MAM, 82 FunctionAnalysisManager &FAM, 83 Module &M) { 84 if (OwnedAdvisor) 85 return *OwnedAdvisor; 86 87 auto *IAA = MAM.getCachedResult<InlineAdvisorAnalysis>(M); 88 if (!IAA) { 89 // It should still be possible to run the inliner as a stand-alone module 90 // pass, for test scenarios. In that case, we default to the 91 // DefaultInlineAdvisor, which doesn't need to keep state between module 92 // pass runs. It also uses just the default InlineParams. In this case, we 93 // need to use the provided FAM, which is valid for the duration of the 94 // inliner pass, and thus the lifetime of the owned advisor. The one we 95 // would get from the MAM can be invalidated as a result of the inliner's 96 // activity. 97 OwnedAdvisor = std::make_unique<DefaultInlineAdvisor>(M, FAM, Params); 98 99 return *OwnedAdvisor; 100 } 101 assert(IAA->getAdvisor() && 102 "Expected a present InlineAdvisorAnalysis also have an " 103 "InlineAdvisor initialized"); 104 return *IAA->getAdvisor(); 105 } 106 107 static bool isKnownLibFunction(Function &F, TargetLibraryInfo &TLI) { 108 LibFunc LF; 109 110 // Either this is a normal library function or a "vectorizable" 111 // function. Not using the VFDatabase here because this query 112 // is related only to libraries handled via the TLI. 113 return TLI.getLibFunc(F, LF) || 114 TLI.isKnownVectorFunctionInLibrary(F.getName()); 115 } 116 117 PreservedAnalyses ModuleInlinerPass::run(Module &M, 118 ModuleAnalysisManager &MAM) { 119 LLVM_DEBUG(dbgs() << "---- Module Inliner is Running ---- \n"); 120 121 auto &IAA = MAM.getResult<InlineAdvisorAnalysis>(M); 122 if (!IAA.tryCreate(Params, Mode, {})) { 123 M.getContext().emitError( 124 "Could not setup Inlining Advisor for the requested " 125 "mode and/or options"); 126 return PreservedAnalyses::all(); 127 } 128 129 bool Changed = false; 130 131 ProfileSummaryInfo *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(M); 132 133 FunctionAnalysisManager &FAM = 134 MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 135 136 auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 137 return FAM.getResult<TargetLibraryAnalysis>(F); 138 }; 139 140 InlineAdvisor &Advisor = getAdvisor(MAM, FAM, M); 141 Advisor.onPassEntry(); 142 143 auto AdvisorOnExit = make_scope_exit([&] { Advisor.onPassExit(); }); 144 145 // In the module inliner, a priority-based worklist is used for calls across 146 // the entire Module. With this module inliner, the inline order is not 147 // limited to bottom-up order. More globally scope inline order is enabled. 148 // Also, the inline deferral logic become unnecessary in this module inliner. 149 // It is possible to use other priority heuristics, e.g. profile-based 150 // heuristic. 151 // 152 // TODO: Here is a huge amount duplicate code between the module inliner and 153 // the SCC inliner, which need some refactoring. 154 std::unique_ptr<InlineOrder<std::pair<CallBase *, int>>> Calls; 155 if (InlineEnablePriorityOrder) 156 Calls = std::make_unique<PriorityInlineOrder<InlineSizePriority>>(); 157 else 158 Calls = std::make_unique<DefaultInlineOrder<std::pair<CallBase *, int>>>(); 159 assert(Calls != nullptr && "Expected an initialized InlineOrder"); 160 161 // Populate the initial list of calls in this module. 162 for (Function &F : M) { 163 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 164 // We want to generally process call sites top-down in order for 165 // simplifications stemming from replacing the call with the returned value 166 // after inlining to be visible to subsequent inlining decisions. 167 // FIXME: Using instructions sequence is a really bad way to do this. 168 // Instead we should do an actual RPO walk of the function body. 169 for (Instruction &I : instructions(F)) 170 if (auto *CB = dyn_cast<CallBase>(&I)) 171 if (Function *Callee = CB->getCalledFunction()) { 172 if (!Callee->isDeclaration()) 173 Calls->push({CB, -1}); 174 else if (!isa<IntrinsicInst>(I)) { 175 using namespace ore; 176 setInlineRemark(*CB, "unavailable definition"); 177 ORE.emit([&]() { 178 return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I) 179 << NV("Callee", Callee) << " will not be inlined into " 180 << NV("Caller", CB->getCaller()) 181 << " because its definition is unavailable" 182 << setIsVerbose(); 183 }); 184 } 185 } 186 } 187 if (Calls->empty()) 188 return PreservedAnalyses::all(); 189 190 // When inlining a callee produces new call sites, we want to keep track of 191 // the fact that they were inlined from the callee. This allows us to avoid 192 // infinite inlining in some obscure cases. To represent this, we use an 193 // index into the InlineHistory vector. 194 SmallVector<std::pair<Function *, int>, 16> InlineHistory; 195 196 // Track a set vector of inlined callees so that we can augment the caller 197 // with all of their edges in the call graph before pruning out the ones that 198 // got simplified away. 199 SmallSetVector<Function *, 4> InlinedCallees; 200 201 // Track the dead functions to delete once finished with inlining calls. We 202 // defer deleting these to make it easier to handle the call graph updates. 203 SmallVector<Function *, 4> DeadFunctions; 204 205 // Loop forward over all of the calls. 206 while (!Calls->empty()) { 207 // We expect the calls to typically be batched with sequences of calls that 208 // have the same caller, so we first set up some shared infrastructure for 209 // this caller. We also do any pruning we can at this layer on the caller 210 // alone. 211 Function &F = *Calls->front().first->getCaller(); 212 213 LLVM_DEBUG(dbgs() << "Inlining calls in: " << F.getName() << "\n" 214 << " Function size: " << F.getInstructionCount() 215 << "\n"); 216 217 auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { 218 return FAM.getResult<AssumptionAnalysis>(F); 219 }; 220 221 // Now process as many calls as we have within this caller in the sequence. 222 // We bail out as soon as the caller has to change so we can 223 // prepare the context of that new caller. 224 bool DidInline = false; 225 while (!Calls->empty() && Calls->front().first->getCaller() == &F) { 226 auto P = Calls->pop(); 227 CallBase *CB = P.first; 228 const int InlineHistoryID = P.second; 229 Function &Callee = *CB->getCalledFunction(); 230 231 if (InlineHistoryID != -1 && 232 inlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)) { 233 setInlineRemark(*CB, "recursive"); 234 continue; 235 } 236 237 auto Advice = Advisor.getAdvice(*CB, /*OnlyMandatory*/ false); 238 // Check whether we want to inline this callsite. 239 if (!Advice->isInliningRecommended()) { 240 Advice->recordUnattemptedInlining(); 241 continue; 242 } 243 244 // Setup the data structure used to plumb customization into the 245 // `InlineFunction` routine. 246 InlineFunctionInfo IFI( 247 /*cg=*/nullptr, GetAssumptionCache, PSI, 248 &FAM.getResult<BlockFrequencyAnalysis>(*(CB->getCaller())), 249 &FAM.getResult<BlockFrequencyAnalysis>(Callee)); 250 251 InlineResult IR = 252 InlineFunction(*CB, IFI, &FAM.getResult<AAManager>(*CB->getCaller())); 253 if (!IR.isSuccess()) { 254 Advice->recordUnsuccessfulInlining(IR); 255 continue; 256 } 257 258 DidInline = true; 259 InlinedCallees.insert(&Callee); 260 ++NumInlined; 261 262 LLVM_DEBUG(dbgs() << " Size after inlining: " 263 << F.getInstructionCount() << "\n"); 264 265 // Add any new callsites to defined functions to the worklist. 266 if (!IFI.InlinedCallSites.empty()) { 267 int NewHistoryID = InlineHistory.size(); 268 InlineHistory.push_back({&Callee, InlineHistoryID}); 269 270 for (CallBase *ICB : reverse(IFI.InlinedCallSites)) { 271 Function *NewCallee = ICB->getCalledFunction(); 272 if (!NewCallee) { 273 // Try to promote an indirect (virtual) call without waiting for 274 // the post-inline cleanup and the next DevirtSCCRepeatedPass 275 // iteration because the next iteration may not happen and we may 276 // miss inlining it. 277 if (tryPromoteCall(*ICB)) 278 NewCallee = ICB->getCalledFunction(); 279 } 280 if (NewCallee) 281 if (!NewCallee->isDeclaration()) 282 Calls->push({ICB, NewHistoryID}); 283 } 284 } 285 286 // Merge the attributes based on the inlining. 287 AttributeFuncs::mergeAttributesForInlining(F, Callee); 288 289 // For local functions, check whether this makes the callee trivially 290 // dead. In that case, we can drop the body of the function eagerly 291 // which may reduce the number of callers of other functions to one, 292 // changing inline cost thresholds. 293 bool CalleeWasDeleted = false; 294 if (Callee.hasLocalLinkage()) { 295 // To check this we also need to nuke any dead constant uses (perhaps 296 // made dead by this operation on other functions). 297 Callee.removeDeadConstantUsers(); 298 // if (Callee.use_empty() && !CG.isLibFunction(Callee)) { 299 if (Callee.use_empty() && !isKnownLibFunction(Callee, GetTLI(Callee))) { 300 Calls->erase_if([&](const std::pair<CallBase *, int> &Call) { 301 return Call.first->getCaller() == &Callee; 302 }); 303 // Clear the body and queue the function itself for deletion when we 304 // finish inlining. 305 // Note that after this point, it is an error to do anything other 306 // than use the callee's address or delete it. 307 Callee.dropAllReferences(); 308 assert(!is_contained(DeadFunctions, &Callee) && 309 "Cannot put cause a function to become dead twice!"); 310 DeadFunctions.push_back(&Callee); 311 CalleeWasDeleted = true; 312 } 313 } 314 if (CalleeWasDeleted) 315 Advice->recordInliningWithCalleeDeleted(); 316 else 317 Advice->recordInlining(); 318 } 319 320 if (!DidInline) 321 continue; 322 Changed = true; 323 324 InlinedCallees.clear(); 325 } 326 327 // Now that we've finished inlining all of the calls across this module, 328 // delete all of the trivially dead functions. 329 // 330 // Note that this walks a pointer set which has non-deterministic order but 331 // that is OK as all we do is delete things and add pointers to unordered 332 // sets. 333 for (Function *DeadF : DeadFunctions) { 334 // Clear out any cached analyses. 335 FAM.clear(*DeadF, DeadF->getName()); 336 337 // And delete the actual function from the module. 338 M.getFunctionList().erase(DeadF); 339 340 ++NumDeleted; 341 } 342 343 if (!Changed) 344 return PreservedAnalyses::all(); 345 346 return PreservedAnalyses::none(); 347 } 348