1 //===- MLInlineAdvisor.cpp - machine learned InlineAdvisor ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the interface between the inliner and a learned model. 10 // It delegates model evaluation to either the AOT compiled model (the 11 // 'release' mode) or a runtime-loaded model (the 'development' case). 12 // 13 //===----------------------------------------------------------------------===// 14 #include "llvm/Analysis/MLInlineAdvisor.h" 15 #include "llvm/ADT/SCCIterator.h" 16 #include "llvm/Analysis/CallGraph.h" 17 #include "llvm/Analysis/FunctionPropertiesAnalysis.h" 18 #include "llvm/Analysis/InlineCost.h" 19 #include "llvm/Analysis/InlineModelFeatureMaps.h" 20 #include "llvm/Analysis/LazyCallGraph.h" 21 #include "llvm/Analysis/MLModelRunner.h" 22 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 23 #include "llvm/Analysis/ReleaseModeModelRunner.h" 24 #include "llvm/Analysis/TargetLibraryInfo.h" 25 #include "llvm/Analysis/TargetTransformInfo.h" 26 #include "llvm/Config/config.h" 27 #include "llvm/IR/InstIterator.h" 28 #include "llvm/IR/Instructions.h" 29 #include "llvm/IR/PassManager.h" 30 #include "llvm/Support/CommandLine.h" 31 #include "llvm/Support/Path.h" 32 33 #include <limits> 34 #include <unordered_map> 35 #include <unordered_set> 36 37 using namespace llvm; 38 39 #if defined(LLVM_HAVE_TF_AOT_INLINERSIZEMODEL) 40 // codegen-ed file 41 #include "InlinerSizeModel.h" // NOLINT 42 43 std::unique_ptr<InlineAdvisor> 44 llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM) { 45 auto AOTRunner = 46 std::make_unique<ReleaseModeModelRunner<llvm::InlinerSizeModel>>( 47 M.getContext(), FeatureNameMap, DecisionName); 48 return std::make_unique<MLInlineAdvisor>(M, MAM, std::move(AOTRunner)); 49 } 50 #endif 51 52 #define DEBUG_TYPE "inline-ml" 53 54 static cl::opt<float> SizeIncreaseThreshold( 55 "ml-advisor-size-increase-threshold", cl::Hidden, 56 cl::desc("Maximum factor by which expected native size may increase before " 57 "blocking any further inlining."), 58 cl::init(2.0)); 59 60 // clang-format off 61 const std::array<std::string, NumberOfFeatures> llvm::FeatureNameMap{ 62 // InlineCost features - these must come first 63 #define POPULATE_NAMES(INDEX_NAME, NAME) NAME, 64 INLINE_COST_FEATURE_ITERATOR(POPULATE_NAMES) 65 #undef POPULATE_NAMES 66 67 // Non-cost features 68 #define POPULATE_NAMES(INDEX_NAME, NAME, COMMENT) NAME, 69 INLINE_FEATURE_ITERATOR(POPULATE_NAMES) 70 #undef POPULATE_NAMES 71 }; 72 // clang-format on 73 74 const char *const llvm::DecisionName = "inlining_decision"; 75 const char *const llvm::DefaultDecisionName = "inlining_default"; 76 const char *const llvm::RewardName = "delta_size"; 77 78 CallBase *getInlinableCS(Instruction &I) { 79 if (auto *CS = dyn_cast<CallBase>(&I)) 80 if (Function *Callee = CS->getCalledFunction()) { 81 if (!Callee->isDeclaration()) { 82 return CS; 83 } 84 } 85 return nullptr; 86 } 87 88 MLInlineAdvisor::MLInlineAdvisor(Module &M, ModuleAnalysisManager &MAM, 89 std::unique_ptr<MLModelRunner> Runner) 90 : InlineAdvisor( 91 M, MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()), 92 ModelRunner(std::move(Runner)), 93 CG(MAM.getResult<LazyCallGraphAnalysis>(M)), 94 InitialIRSize(getModuleIRSize()), CurrentIRSize(InitialIRSize) { 95 assert(ModelRunner); 96 97 // Extract the 'call site height' feature - the position of a call site 98 // relative to the farthest statically reachable SCC node. We don't mutate 99 // this value while inlining happens. Empirically, this feature proved 100 // critical in behavioral cloning - i.e. training a model to mimic the manual 101 // heuristic's decisions - and, thus, equally important for training for 102 // improvement. 103 CallGraph CGraph(M); 104 for (auto I = scc_begin(&CGraph); !I.isAtEnd(); ++I) { 105 const std::vector<CallGraphNode *> &CGNodes = *I; 106 unsigned Level = 0; 107 for (auto *CGNode : CGNodes) { 108 Function *F = CGNode->getFunction(); 109 if (!F || F->isDeclaration()) 110 continue; 111 for (auto &I : instructions(F)) { 112 if (auto *CS = getInlinableCS(I)) { 113 auto *Called = CS->getCalledFunction(); 114 auto Pos = FunctionLevels.find(&CG.get(*Called)); 115 // In bottom up traversal, an inlinable callee is either in the 116 // same SCC, or to a function in a visited SCC. So not finding its 117 // level means we haven't visited it yet, meaning it's in this SCC. 118 if (Pos == FunctionLevels.end()) 119 continue; 120 Level = std::max(Level, Pos->second + 1); 121 } 122 } 123 } 124 for (auto *CGNode : CGNodes) { 125 Function *F = CGNode->getFunction(); 126 if (F && !F->isDeclaration()) 127 FunctionLevels[&CG.get(*F)] = Level; 128 } 129 } 130 for (auto KVP : FunctionLevels) { 131 AllNodes.insert(KVP.first); 132 EdgeCount += getLocalCalls(KVP.first->getFunction()); 133 } 134 NodeCount = AllNodes.size(); 135 } 136 137 unsigned MLInlineAdvisor::getInitialFunctionLevel(const Function &F) const { 138 return CG.lookup(F) ? FunctionLevels.at(CG.lookup(F)) : 0; 139 } 140 141 void MLInlineAdvisor::onPassEntry() { 142 // Function passes executed between InlinerPass runs may have changed the 143 // module-wide features. 144 // The cgscc pass manager rules are such that: 145 // - if a pass leads to merging SCCs, then the pipeline is restarted on the 146 // merged SCC 147 // - if a pass leads to splitting the SCC, then we continue with one of the 148 // splits 149 // This means that the NodesInLastSCC is a superset (not strict) of the nodes 150 // that subsequent passes would have processed 151 // - in addition, if new Nodes were created by a pass (e.g. CoroSplit), 152 // they'd be adjacent to Nodes in the last SCC. So we just need to check the 153 // boundary of Nodes in NodesInLastSCC for Nodes we haven't seen. We don't 154 // care about the nature of the Edge (call or ref). 155 NodeCount -= static_cast<int64_t>(NodesInLastSCC.size()); 156 while (!NodesInLastSCC.empty()) { 157 const auto *N = NodesInLastSCC.front(); 158 NodesInLastSCC.pop_front(); 159 // The Function wrapped by N could have been deleted since we last saw it. 160 if (N->isDead()) { 161 assert(!N->getFunction().isDeclaration()); 162 continue; 163 } 164 ++NodeCount; 165 EdgeCount += getLocalCalls(N->getFunction()); 166 for (const auto &E : *(*N)) { 167 const auto *AdjNode = &E.getNode(); 168 assert(!AdjNode->isDead() && !AdjNode->getFunction().isDeclaration()); 169 auto I = AllNodes.insert(AdjNode); 170 if (I.second) 171 NodesInLastSCC.push_back(AdjNode); 172 } 173 } 174 175 EdgeCount -= EdgesOfLastSeenNodes; 176 EdgesOfLastSeenNodes = 0; 177 } 178 179 void MLInlineAdvisor::onPassExit(LazyCallGraph::SCC *LastSCC) { 180 if (!LastSCC) 181 return; 182 // Keep track of the nodes and edges we last saw. Then, in onPassEntry, 183 // we update the node count and edge count from the subset of these nodes that 184 // survived. 185 assert(NodesInLastSCC.empty()); 186 assert(NodeCount >= LastSCC->size()); 187 EdgesOfLastSeenNodes = 0; 188 for (const auto &N : *LastSCC) { 189 assert(!N.isDead()); 190 EdgesOfLastSeenNodes += getLocalCalls(N.getFunction()); 191 NodesInLastSCC.push_back(&N); 192 } 193 assert(EdgeCount >= EdgesOfLastSeenNodes); 194 } 195 196 int64_t MLInlineAdvisor::getLocalCalls(Function &F) { 197 return FAM.getResult<FunctionPropertiesAnalysis>(F) 198 .DirectCallsToDefinedFunctions; 199 } 200 201 // Update the internal state of the advisor, and force invalidate feature 202 // analysis. Currently, we maintain minimal (and very simple) global state - the 203 // number of functions and the number of static calls. We also keep track of the 204 // total IR size in this module, to stop misbehaving policies at a certain bloat 205 // factor (SizeIncreaseThreshold) 206 void MLInlineAdvisor::onSuccessfulInlining(const MLInlineAdvice &Advice, 207 bool CalleeWasDeleted) { 208 assert(!ForceStop); 209 Function *Caller = Advice.getCaller(); 210 Function *Callee = Advice.getCallee(); 211 212 // The caller features aren't valid anymore. 213 { 214 PreservedAnalyses PA = PreservedAnalyses::all(); 215 PA.abandon<FunctionPropertiesAnalysis>(); 216 FAM.invalidate(*Caller, PA); 217 } 218 int64_t IRSizeAfter = 219 getIRSize(*Caller) + (CalleeWasDeleted ? 0 : Advice.CalleeIRSize); 220 CurrentIRSize += IRSizeAfter - (Advice.CallerIRSize + Advice.CalleeIRSize); 221 if (CurrentIRSize > SizeIncreaseThreshold * InitialIRSize) 222 ForceStop = true; 223 224 // We can delta-update module-wide features. We know the inlining only changed 225 // the caller, and maybe the callee (by deleting the latter). 226 // Nodes are simple to update. 227 // For edges, we 'forget' the edges that the caller and callee used to have 228 // before inlining, and add back what they currently have together. 229 int64_t NewCallerAndCalleeEdges = 230 FAM.getResult<FunctionPropertiesAnalysis>(*Caller) 231 .DirectCallsToDefinedFunctions; 232 233 if (CalleeWasDeleted) 234 --NodeCount; 235 else 236 NewCallerAndCalleeEdges += 237 FAM.getResult<FunctionPropertiesAnalysis>(*Callee) 238 .DirectCallsToDefinedFunctions; 239 EdgeCount += (NewCallerAndCalleeEdges - Advice.CallerAndCalleeEdges); 240 assert(CurrentIRSize >= 0 && EdgeCount >= 0 && NodeCount >= 0); 241 } 242 243 int64_t MLInlineAdvisor::getModuleIRSize() const { 244 int64_t Ret = 0; 245 for (auto &F : M) 246 if (!F.isDeclaration()) 247 Ret += getIRSize(F); 248 return Ret; 249 } 250 251 std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdviceImpl(CallBase &CB) { 252 auto &Caller = *CB.getCaller(); 253 auto &Callee = *CB.getCalledFunction(); 254 255 auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { 256 return FAM.getResult<AssumptionAnalysis>(F); 257 }; 258 auto &TIR = FAM.getResult<TargetIRAnalysis>(Callee); 259 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller); 260 261 auto MandatoryKind = InlineAdvisor::getMandatoryKind(CB, FAM, ORE); 262 // If this is a "never inline" case, there won't be any changes to internal 263 // state we need to track, so we can just return the base InlineAdvice, which 264 // will do nothing interesting. 265 // Same thing if this is a recursive case. 266 if (MandatoryKind == InlineAdvisor::MandatoryInliningKind::Never || 267 &Caller == &Callee) 268 return getMandatoryAdvice(CB, false); 269 270 bool Mandatory = 271 MandatoryKind == InlineAdvisor::MandatoryInliningKind::Always; 272 273 // If we need to stop, we won't want to track anymore any state changes, so 274 // we just return the base InlineAdvice, which acts as a noop. 275 if (ForceStop) { 276 ORE.emit([&] { 277 return OptimizationRemarkMissed(DEBUG_TYPE, "ForceStop", &CB) 278 << "Won't attempt inlining because module size grew too much."; 279 }); 280 return std::make_unique<InlineAdvice>(this, CB, ORE, Mandatory); 281 } 282 283 int CostEstimate = 0; 284 if (!Mandatory) { 285 auto IsCallSiteInlinable = 286 llvm::getInliningCostEstimate(CB, TIR, GetAssumptionCache); 287 if (!IsCallSiteInlinable) { 288 // We can't inline this for correctness reasons, so return the base 289 // InlineAdvice, as we don't care about tracking any state changes (which 290 // won't happen). 291 return std::make_unique<InlineAdvice>(this, CB, ORE, false); 292 } 293 CostEstimate = *IsCallSiteInlinable; 294 } 295 296 const auto CostFeatures = 297 llvm::getInliningCostFeatures(CB, TIR, GetAssumptionCache); 298 if (!CostFeatures) { 299 return std::make_unique<InlineAdvice>(this, CB, ORE, false); 300 } 301 302 if (Mandatory) 303 return getMandatoryAdvice(CB, true); 304 305 auto NrCtantParams = 0; 306 for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) { 307 NrCtantParams += (isa<Constant>(*I)); 308 } 309 310 auto &CallerBefore = FAM.getResult<FunctionPropertiesAnalysis>(Caller); 311 auto &CalleeBefore = FAM.getResult<FunctionPropertiesAnalysis>(Callee); 312 313 *ModelRunner->getTensor<int64_t>(FeatureIndex::CalleeBasicBlockCount) = 314 CalleeBefore.BasicBlockCount; 315 *ModelRunner->getTensor<int64_t>(FeatureIndex::CallSiteHeight) = 316 getInitialFunctionLevel(Caller); 317 *ModelRunner->getTensor<int64_t>(FeatureIndex::NodeCount) = NodeCount; 318 *ModelRunner->getTensor<int64_t>(FeatureIndex::NrCtantParams) = NrCtantParams; 319 *ModelRunner->getTensor<int64_t>(FeatureIndex::EdgeCount) = EdgeCount; 320 *ModelRunner->getTensor<int64_t>(FeatureIndex::CallerUsers) = 321 CallerBefore.Uses; 322 *ModelRunner->getTensor<int64_t>( 323 FeatureIndex::CallerConditionallyExecutedBlocks) = 324 CallerBefore.BlocksReachedFromConditionalInstruction; 325 *ModelRunner->getTensor<int64_t>(FeatureIndex::CallerBasicBlockCount) = 326 CallerBefore.BasicBlockCount; 327 *ModelRunner->getTensor<int64_t>( 328 FeatureIndex::CalleeConditionallyExecutedBlocks) = 329 CalleeBefore.BlocksReachedFromConditionalInstruction; 330 *ModelRunner->getTensor<int64_t>(FeatureIndex::CalleeUsers) = 331 CalleeBefore.Uses; 332 *ModelRunner->getTensor<int64_t>(FeatureIndex::CostEstimate) = CostEstimate; 333 334 // Add the cost features 335 for (size_t I = 0; 336 I < static_cast<size_t>(InlineCostFeatureIndex::NumberOfFeatures); ++I) { 337 *ModelRunner->getTensor<int64_t>(inlineCostFeatureToMlFeature( 338 static_cast<InlineCostFeatureIndex>(I))) = CostFeatures->at(I); 339 } 340 341 return getAdviceFromModel(CB, ORE); 342 } 343 344 std::unique_ptr<MLInlineAdvice> 345 MLInlineAdvisor::getAdviceFromModel(CallBase &CB, 346 OptimizationRemarkEmitter &ORE) { 347 return std::make_unique<MLInlineAdvice>( 348 this, CB, ORE, static_cast<bool>(ModelRunner->evaluate<int64_t>())); 349 } 350 351 std::unique_ptr<InlineAdvice> MLInlineAdvisor::getMandatoryAdvice(CallBase &CB, 352 bool Advice) { 353 // Make sure we track inlinings in all cases - mandatory or not. 354 if (Advice && !ForceStop) 355 return getMandatoryAdviceImpl(CB); 356 357 // If this is a "never inline" case, there won't be any changes to internal 358 // state we need to track, so we can just return the base InlineAdvice, which 359 // will do nothing interesting. 360 // Same if we are forced to stop - we don't track anymore. 361 return std::make_unique<InlineAdvice>(this, CB, getCallerORE(CB), Advice); 362 } 363 364 std::unique_ptr<MLInlineAdvice> 365 MLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) { 366 return std::make_unique<MLInlineAdvice>(this, CB, getCallerORE(CB), true); 367 } 368 369 void MLInlineAdvice::reportContextForRemark( 370 DiagnosticInfoOptimizationBase &OR) { 371 using namespace ore; 372 OR << NV("Callee", Callee->getName()); 373 for (size_t I = 0; I < NumberOfFeatures; ++I) 374 OR << NV(FeatureNameMap[I], 375 *getAdvisor()->getModelRunner().getTensor<int64_t>(I)); 376 OR << NV("ShouldInline", isInliningRecommended()); 377 } 378 379 void MLInlineAdvice::recordInliningImpl() { 380 ORE.emit([&]() { 381 OptimizationRemark R(DEBUG_TYPE, "InliningSuccess", DLoc, Block); 382 reportContextForRemark(R); 383 return R; 384 }); 385 getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ false); 386 } 387 388 void MLInlineAdvice::recordInliningWithCalleeDeletedImpl() { 389 ORE.emit([&]() { 390 OptimizationRemark R(DEBUG_TYPE, "InliningSuccessWithCalleeDeleted", DLoc, 391 Block); 392 reportContextForRemark(R); 393 return R; 394 }); 395 getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ true); 396 } 397 398 void MLInlineAdvice::recordUnsuccessfulInliningImpl( 399 const InlineResult &Result) { 400 ORE.emit([&]() { 401 OptimizationRemarkMissed R(DEBUG_TYPE, "InliningAttemptedAndUnsuccessful", 402 DLoc, Block); 403 reportContextForRemark(R); 404 return R; 405 }); 406 } 407 void MLInlineAdvice::recordUnattemptedInliningImpl() { 408 ORE.emit([&]() { 409 OptimizationRemarkMissed R(DEBUG_TYPE, "IniningNotAttempted", DLoc, Block); 410 reportContextForRemark(R); 411 return R; 412 }); 413 } 414