1 //===- MLInlineAdvisor.cpp - machine learned InlineAdvisor ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the interface between the inliner and a learned model. 10 // It delegates model evaluation to either the AOT compiled model (the 11 // 'release' mode) or a runtime-loaded model (the 'development' case). 12 // 13 //===----------------------------------------------------------------------===// 14 #include "llvm/Analysis/MLInlineAdvisor.h" 15 #include "llvm/ADT/SCCIterator.h" 16 #include "llvm/Analysis/AssumptionCache.h" 17 #include "llvm/Analysis/CallGraph.h" 18 #include "llvm/Analysis/FunctionPropertiesAnalysis.h" 19 #include "llvm/Analysis/InlineCost.h" 20 #include "llvm/Analysis/InlineModelFeatureMaps.h" 21 #include "llvm/Analysis/LazyCallGraph.h" 22 #include "llvm/Analysis/LoopInfo.h" 23 #include "llvm/Analysis/MLModelRunner.h" 24 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 25 #include "llvm/Analysis/TargetTransformInfo.h" 26 #include "llvm/IR/Dominators.h" 27 #include "llvm/IR/InstIterator.h" 28 #include "llvm/IR/PassManager.h" 29 #include "llvm/Support/CommandLine.h" 30 31 using namespace llvm; 32 33 #if defined(LLVM_HAVE_TF_AOT_INLINERSIZEMODEL) 34 #include "llvm/Analysis/ReleaseModeModelRunner.h" 35 // codegen-ed file 36 #include "InlinerSizeModel.h" // NOLINT 37 38 std::unique_ptr<InlineAdvisor> 39 llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM) { 40 auto AOTRunner = 41 std::make_unique<ReleaseModeModelRunner<llvm::InlinerSizeModel>>( 42 M.getContext(), FeatureMap, DecisionName); 43 return std::make_unique<MLInlineAdvisor>(M, MAM, std::move(AOTRunner)); 44 } 45 #endif 46 47 #define DEBUG_TYPE "inline-ml" 48 49 static cl::opt<float> SizeIncreaseThreshold( 50 "ml-advisor-size-increase-threshold", cl::Hidden, 51 cl::desc("Maximum factor by which expected native size may increase before " 52 "blocking any further inlining."), 53 cl::init(2.0)); 54 55 static cl::opt<bool> KeepFPICache( 56 "ml-advisor-keep-fpi-cache", cl::Hidden, 57 cl::desc( 58 "For test - keep the ML Inline advisor's FunctionPropertiesInfo cache"), 59 cl::init(false)); 60 61 // clang-format off 62 const std::array<TensorSpec, NumberOfFeatures> llvm::FeatureMap{ 63 #define POPULATE_NAMES(_, NAME) TensorSpec::createSpec<int64_t>(NAME, {1} ), 64 // InlineCost features - these must come first 65 INLINE_COST_FEATURE_ITERATOR(POPULATE_NAMES) 66 #undef POPULATE_NAMES 67 68 // Non-cost features 69 #define POPULATE_NAMES(_, NAME, __) TensorSpec::createSpec<int64_t>(NAME, {1} ), 70 INLINE_FEATURE_ITERATOR(POPULATE_NAMES) 71 #undef POPULATE_NAMES 72 }; 73 // clang-format on 74 75 const char *const llvm::DecisionName = "inlining_decision"; 76 const char *const llvm::DefaultDecisionName = "inlining_default"; 77 const char *const llvm::RewardName = "delta_size"; 78 79 CallBase *getInlinableCS(Instruction &I) { 80 if (auto *CS = dyn_cast<CallBase>(&I)) 81 if (Function *Callee = CS->getCalledFunction()) { 82 if (!Callee->isDeclaration()) { 83 return CS; 84 } 85 } 86 return nullptr; 87 } 88 89 MLInlineAdvisor::MLInlineAdvisor(Module &M, ModuleAnalysisManager &MAM, 90 std::unique_ptr<MLModelRunner> Runner) 91 : InlineAdvisor( 92 M, MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()), 93 ModelRunner(std::move(Runner)), 94 CG(MAM.getResult<LazyCallGraphAnalysis>(M)), 95 InitialIRSize(getModuleIRSize()), CurrentIRSize(InitialIRSize) { 96 assert(ModelRunner); 97 98 // Extract the 'call site height' feature - the position of a call site 99 // relative to the farthest statically reachable SCC node. We don't mutate 100 // this value while inlining happens. Empirically, this feature proved 101 // critical in behavioral cloning - i.e. training a model to mimic the manual 102 // heuristic's decisions - and, thus, equally important for training for 103 // improvement. 104 CallGraph CGraph(M); 105 for (auto I = scc_begin(&CGraph); !I.isAtEnd(); ++I) { 106 const std::vector<CallGraphNode *> &CGNodes = *I; 107 unsigned Level = 0; 108 for (auto *CGNode : CGNodes) { 109 Function *F = CGNode->getFunction(); 110 if (!F || F->isDeclaration()) 111 continue; 112 for (auto &I : instructions(F)) { 113 if (auto *CS = getInlinableCS(I)) { 114 auto *Called = CS->getCalledFunction(); 115 auto Pos = FunctionLevels.find(&CG.get(*Called)); 116 // In bottom up traversal, an inlinable callee is either in the 117 // same SCC, or to a function in a visited SCC. So not finding its 118 // level means we haven't visited it yet, meaning it's in this SCC. 119 if (Pos == FunctionLevels.end()) 120 continue; 121 Level = std::max(Level, Pos->second + 1); 122 } 123 } 124 } 125 for (auto *CGNode : CGNodes) { 126 Function *F = CGNode->getFunction(); 127 if (F && !F->isDeclaration()) 128 FunctionLevels[&CG.get(*F)] = Level; 129 } 130 } 131 for (auto KVP : FunctionLevels) { 132 AllNodes.insert(KVP.first); 133 EdgeCount += getLocalCalls(KVP.first->getFunction()); 134 } 135 NodeCount = AllNodes.size(); 136 } 137 138 unsigned MLInlineAdvisor::getInitialFunctionLevel(const Function &F) const { 139 return CG.lookup(F) ? FunctionLevels.at(CG.lookup(F)) : 0; 140 } 141 142 void MLInlineAdvisor::onPassEntry(LazyCallGraph::SCC *LastSCC) { 143 if (!LastSCC || ForceStop) 144 return; 145 FPICache.clear(); 146 // Function passes executed between InlinerPass runs may have changed the 147 // module-wide features. 148 // The cgscc pass manager rules are such that: 149 // - if a pass leads to merging SCCs, then the pipeline is restarted on the 150 // merged SCC 151 // - if a pass leads to splitting the SCC, then we continue with one of the 152 // splits 153 // This means that the NodesInLastSCC is a superset (not strict) of the nodes 154 // that subsequent passes would have processed 155 // - in addition, if new Nodes were created by a pass (e.g. CoroSplit), 156 // they'd be adjacent to Nodes in the last SCC. So we just need to check the 157 // boundary of Nodes in NodesInLastSCC for Nodes we haven't seen. We don't 158 // care about the nature of the Edge (call or ref). 159 NodeCount -= static_cast<int64_t>(NodesInLastSCC.size()); 160 while (!NodesInLastSCC.empty()) { 161 const auto *N = *NodesInLastSCC.begin(); 162 NodesInLastSCC.erase(N); 163 // The Function wrapped by N could have been deleted since we last saw it. 164 if (N->isDead()) { 165 assert(!N->getFunction().isDeclaration()); 166 continue; 167 } 168 ++NodeCount; 169 EdgeCount += getLocalCalls(N->getFunction()); 170 for (const auto &E : *(*N)) { 171 const auto *AdjNode = &E.getNode(); 172 assert(!AdjNode->isDead() && !AdjNode->getFunction().isDeclaration()); 173 auto I = AllNodes.insert(AdjNode); 174 if (I.second) 175 NodesInLastSCC.insert(AdjNode); 176 } 177 } 178 179 EdgeCount -= EdgesOfLastSeenNodes; 180 EdgesOfLastSeenNodes = 0; 181 182 // (Re)use NodesInLastSCC to remember the nodes in the SCC right now, 183 // in case the SCC is split before onPassExit and some nodes are split out 184 assert(NodesInLastSCC.empty()); 185 for (const auto &N : *LastSCC) 186 NodesInLastSCC.insert(&N); 187 } 188 189 void MLInlineAdvisor::onPassExit(LazyCallGraph::SCC *LastSCC) { 190 // No need to keep this around - function passes will invalidate it. 191 if (!KeepFPICache) 192 FPICache.clear(); 193 if (!LastSCC || ForceStop) 194 return; 195 // Keep track of the nodes and edges we last saw. Then, in onPassEntry, 196 // we update the node count and edge count from the subset of these nodes that 197 // survived. 198 EdgesOfLastSeenNodes = 0; 199 200 // Check on nodes that were in SCC onPassEntry 201 for (auto I = NodesInLastSCC.begin(); I != NodesInLastSCC.end();) { 202 if ((*I)->isDead()) 203 NodesInLastSCC.erase(*I++); 204 else 205 EdgesOfLastSeenNodes += getLocalCalls((*I++)->getFunction()); 206 } 207 208 // Check on nodes that may have got added to SCC 209 for (const auto &N : *LastSCC) { 210 assert(!N.isDead()); 211 auto I = NodesInLastSCC.insert(&N); 212 if (I.second) 213 EdgesOfLastSeenNodes += getLocalCalls(N.getFunction()); 214 } 215 assert(NodeCount >= NodesInLastSCC.size()); 216 assert(EdgeCount >= EdgesOfLastSeenNodes); 217 } 218 219 int64_t MLInlineAdvisor::getLocalCalls(Function &F) { 220 return getCachedFPI(F).DirectCallsToDefinedFunctions; 221 } 222 223 // Update the internal state of the advisor, and force invalidate feature 224 // analysis. Currently, we maintain minimal (and very simple) global state - the 225 // number of functions and the number of static calls. We also keep track of the 226 // total IR size in this module, to stop misbehaving policies at a certain bloat 227 // factor (SizeIncreaseThreshold) 228 void MLInlineAdvisor::onSuccessfulInlining(const MLInlineAdvice &Advice, 229 bool CalleeWasDeleted) { 230 assert(!ForceStop); 231 Function *Caller = Advice.getCaller(); 232 Function *Callee = Advice.getCallee(); 233 // The caller features aren't valid anymore. 234 { 235 PreservedAnalyses PA = PreservedAnalyses::all(); 236 PA.abandon<FunctionPropertiesAnalysis>(); 237 PA.abandon<DominatorTreeAnalysis>(); 238 PA.abandon<LoopAnalysis>(); 239 FAM.invalidate(*Caller, PA); 240 } 241 Advice.updateCachedCallerFPI(FAM); 242 int64_t IRSizeAfter = 243 getIRSize(*Caller) + (CalleeWasDeleted ? 0 : Advice.CalleeIRSize); 244 CurrentIRSize += IRSizeAfter - (Advice.CallerIRSize + Advice.CalleeIRSize); 245 if (CurrentIRSize > SizeIncreaseThreshold * InitialIRSize) 246 ForceStop = true; 247 248 // We can delta-update module-wide features. We know the inlining only changed 249 // the caller, and maybe the callee (by deleting the latter). 250 // Nodes are simple to update. 251 // For edges, we 'forget' the edges that the caller and callee used to have 252 // before inlining, and add back what they currently have together. 253 int64_t NewCallerAndCalleeEdges = 254 getCachedFPI(*Caller).DirectCallsToDefinedFunctions; 255 256 if (CalleeWasDeleted) 257 --NodeCount; 258 else 259 NewCallerAndCalleeEdges += 260 getCachedFPI(*Callee).DirectCallsToDefinedFunctions; 261 EdgeCount += (NewCallerAndCalleeEdges - Advice.CallerAndCalleeEdges); 262 assert(CurrentIRSize >= 0 && EdgeCount >= 0 && NodeCount >= 0); 263 } 264 265 int64_t MLInlineAdvisor::getModuleIRSize() const { 266 int64_t Ret = 0; 267 for (auto &F : M) 268 if (!F.isDeclaration()) 269 Ret += getIRSize(F); 270 return Ret; 271 } 272 273 FunctionPropertiesInfo &MLInlineAdvisor::getCachedFPI(Function &F) const { 274 auto InsertPair = 275 FPICache.insert(std::make_pair(&F, FunctionPropertiesInfo())); 276 if (!InsertPair.second) 277 return InsertPair.first->second; 278 InsertPair.first->second = FAM.getResult<FunctionPropertiesAnalysis>(F); 279 return InsertPair.first->second; 280 } 281 282 std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdviceImpl(CallBase &CB) { 283 if (auto Skip = getSkipAdviceIfUnreachableCallsite(CB)) 284 return Skip; 285 286 auto &Caller = *CB.getCaller(); 287 auto &Callee = *CB.getCalledFunction(); 288 289 auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { 290 return FAM.getResult<AssumptionAnalysis>(F); 291 }; 292 auto &TIR = FAM.getResult<TargetIRAnalysis>(Callee); 293 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller); 294 295 auto MandatoryKind = InlineAdvisor::getMandatoryKind(CB, FAM, ORE); 296 // If this is a "never inline" case, there won't be any changes to internal 297 // state we need to track, so we can just return the base InlineAdvice, which 298 // will do nothing interesting. 299 // Same thing if this is a recursive case. 300 if (MandatoryKind == InlineAdvisor::MandatoryInliningKind::Never || 301 &Caller == &Callee) 302 return getMandatoryAdvice(CB, false); 303 304 bool Mandatory = 305 MandatoryKind == InlineAdvisor::MandatoryInliningKind::Always; 306 307 // If we need to stop, we won't want to track anymore any state changes, so 308 // we just return the base InlineAdvice, which acts as a noop. 309 if (ForceStop) { 310 ORE.emit([&] { 311 return OptimizationRemarkMissed(DEBUG_TYPE, "ForceStop", &CB) 312 << "Won't attempt inlining because module size grew too much."; 313 }); 314 return std::make_unique<InlineAdvice>(this, CB, ORE, Mandatory); 315 } 316 317 int CostEstimate = 0; 318 if (!Mandatory) { 319 auto IsCallSiteInlinable = 320 llvm::getInliningCostEstimate(CB, TIR, GetAssumptionCache); 321 if (!IsCallSiteInlinable) { 322 // We can't inline this for correctness reasons, so return the base 323 // InlineAdvice, as we don't care about tracking any state changes (which 324 // won't happen). 325 return std::make_unique<InlineAdvice>(this, CB, ORE, false); 326 } 327 CostEstimate = *IsCallSiteInlinable; 328 } 329 330 const auto CostFeatures = 331 llvm::getInliningCostFeatures(CB, TIR, GetAssumptionCache); 332 if (!CostFeatures) { 333 return std::make_unique<InlineAdvice>(this, CB, ORE, false); 334 } 335 336 if (Mandatory) 337 return getMandatoryAdvice(CB, true); 338 339 auto NrCtantParams = 0; 340 for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) { 341 NrCtantParams += (isa<Constant>(*I)); 342 } 343 344 auto &CallerBefore = getCachedFPI(Caller); 345 auto &CalleeBefore = getCachedFPI(Callee); 346 347 *ModelRunner->getTensor<int64_t>(FeatureIndex::CalleeBasicBlockCount) = 348 CalleeBefore.BasicBlockCount; 349 *ModelRunner->getTensor<int64_t>(FeatureIndex::CallSiteHeight) = 350 getInitialFunctionLevel(Caller); 351 *ModelRunner->getTensor<int64_t>(FeatureIndex::NodeCount) = NodeCount; 352 *ModelRunner->getTensor<int64_t>(FeatureIndex::NrCtantParams) = NrCtantParams; 353 *ModelRunner->getTensor<int64_t>(FeatureIndex::EdgeCount) = EdgeCount; 354 *ModelRunner->getTensor<int64_t>(FeatureIndex::CallerUsers) = 355 CallerBefore.Uses; 356 *ModelRunner->getTensor<int64_t>( 357 FeatureIndex::CallerConditionallyExecutedBlocks) = 358 CallerBefore.BlocksReachedFromConditionalInstruction; 359 *ModelRunner->getTensor<int64_t>(FeatureIndex::CallerBasicBlockCount) = 360 CallerBefore.BasicBlockCount; 361 *ModelRunner->getTensor<int64_t>( 362 FeatureIndex::CalleeConditionallyExecutedBlocks) = 363 CalleeBefore.BlocksReachedFromConditionalInstruction; 364 *ModelRunner->getTensor<int64_t>(FeatureIndex::CalleeUsers) = 365 CalleeBefore.Uses; 366 *ModelRunner->getTensor<int64_t>(FeatureIndex::CostEstimate) = CostEstimate; 367 368 // Add the cost features 369 for (size_t I = 0; 370 I < static_cast<size_t>(InlineCostFeatureIndex::NumberOfFeatures); ++I) { 371 *ModelRunner->getTensor<int64_t>(inlineCostFeatureToMlFeature( 372 static_cast<InlineCostFeatureIndex>(I))) = CostFeatures->at(I); 373 } 374 375 return getAdviceFromModel(CB, ORE); 376 } 377 378 std::unique_ptr<MLInlineAdvice> 379 MLInlineAdvisor::getAdviceFromModel(CallBase &CB, 380 OptimizationRemarkEmitter &ORE) { 381 return std::make_unique<MLInlineAdvice>( 382 this, CB, ORE, static_cast<bool>(ModelRunner->evaluate<int64_t>())); 383 } 384 385 std::unique_ptr<InlineAdvice> 386 MLInlineAdvisor::getSkipAdviceIfUnreachableCallsite(CallBase &CB) { 387 if (!FAM.getResult<DominatorTreeAnalysis>(*CB.getCaller()) 388 .isReachableFromEntry(CB.getParent())) 389 return std::make_unique<InlineAdvice>(this, CB, getCallerORE(CB), false); 390 return nullptr; 391 } 392 393 std::unique_ptr<InlineAdvice> MLInlineAdvisor::getMandatoryAdvice(CallBase &CB, 394 bool Advice) { 395 // Make sure we track inlinings in all cases - mandatory or not. 396 if (auto Skip = getSkipAdviceIfUnreachableCallsite(CB)) 397 return Skip; 398 if (Advice && !ForceStop) 399 return getMandatoryAdviceImpl(CB); 400 401 // If this is a "never inline" case, there won't be any changes to internal 402 // state we need to track, so we can just return the base InlineAdvice, which 403 // will do nothing interesting. 404 // Same if we are forced to stop - we don't track anymore. 405 return std::make_unique<InlineAdvice>(this, CB, getCallerORE(CB), Advice); 406 } 407 408 std::unique_ptr<MLInlineAdvice> 409 MLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) { 410 return std::make_unique<MLInlineAdvice>(this, CB, getCallerORE(CB), true); 411 } 412 413 void MLInlineAdvisor::print(raw_ostream &OS) const { 414 OS << "[MLInlineAdvisor] Nodes: " << NodeCount << " Edges: " << EdgeCount 415 << " EdgesOfLastSeenNodes: " << EdgesOfLastSeenNodes << "\n"; 416 OS << "[MLInlineAdvisor] FPI:\n"; 417 for (auto I : FPICache) { 418 OS << I.getFirst()->getName() << ":\n"; 419 I.getSecond().print(OS); 420 OS << "\n"; 421 } 422 OS << "\n"; 423 } 424 425 MLInlineAdvice::MLInlineAdvice(MLInlineAdvisor *Advisor, CallBase &CB, 426 OptimizationRemarkEmitter &ORE, 427 bool Recommendation) 428 : InlineAdvice(Advisor, CB, ORE, Recommendation), 429 CallerIRSize(Advisor->isForcedToStop() ? 0 : Advisor->getIRSize(*Caller)), 430 CalleeIRSize(Advisor->isForcedToStop() ? 0 : Advisor->getIRSize(*Callee)), 431 CallerAndCalleeEdges(Advisor->isForcedToStop() 432 ? 0 433 : (Advisor->getLocalCalls(*Caller) + 434 Advisor->getLocalCalls(*Callee))), 435 PreInlineCallerFPI(Advisor->getCachedFPI(*Caller)) { 436 if (Recommendation) 437 FPU.emplace(Advisor->getCachedFPI(*getCaller()), CB); 438 } 439 440 void MLInlineAdvice::reportContextForRemark( 441 DiagnosticInfoOptimizationBase &OR) { 442 using namespace ore; 443 OR << NV("Callee", Callee->getName()); 444 for (size_t I = 0; I < NumberOfFeatures; ++I) 445 OR << NV(FeatureMap[I].name(), 446 *getAdvisor()->getModelRunner().getTensor<int64_t>(I)); 447 OR << NV("ShouldInline", isInliningRecommended()); 448 } 449 450 void MLInlineAdvice::updateCachedCallerFPI(FunctionAnalysisManager &FAM) const { 451 FPU->finish(FAM); 452 } 453 454 void MLInlineAdvice::recordInliningImpl() { 455 ORE.emit([&]() { 456 OptimizationRemark R(DEBUG_TYPE, "InliningSuccess", DLoc, Block); 457 reportContextForRemark(R); 458 return R; 459 }); 460 getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ false); 461 } 462 463 void MLInlineAdvice::recordInliningWithCalleeDeletedImpl() { 464 ORE.emit([&]() { 465 OptimizationRemark R(DEBUG_TYPE, "InliningSuccessWithCalleeDeleted", DLoc, 466 Block); 467 reportContextForRemark(R); 468 return R; 469 }); 470 getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ true); 471 } 472 473 void MLInlineAdvice::recordUnsuccessfulInliningImpl( 474 const InlineResult &Result) { 475 getAdvisor()->getCachedFPI(*Caller) = PreInlineCallerFPI; 476 ORE.emit([&]() { 477 OptimizationRemarkMissed R(DEBUG_TYPE, "InliningAttemptedAndUnsuccessful", 478 DLoc, Block); 479 reportContextForRemark(R); 480 return R; 481 }); 482 } 483 void MLInlineAdvice::recordUnattemptedInliningImpl() { 484 assert(!FPU); 485 ORE.emit([&]() { 486 OptimizationRemarkMissed R(DEBUG_TYPE, "IniningNotAttempted", DLoc, Block); 487 reportContextForRemark(R); 488 return R; 489 }); 490 } 491