1 //===- IndirectCallPromotion.cpp - Optimizations based on value profiling -===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the transformation that promotes indirect calls to 10 // conditional direct calls when the indirect-call value profile metadata is 11 // available. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/ADT/ArrayRef.h" 16 #include "llvm/ADT/Statistic.h" 17 #include "llvm/ADT/StringRef.h" 18 #include "llvm/Analysis/IndirectCallPromotionAnalysis.h" 19 #include "llvm/Analysis/IndirectCallVisitor.h" 20 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 21 #include "llvm/Analysis/ProfileSummaryInfo.h" 22 #include "llvm/IR/DiagnosticInfo.h" 23 #include "llvm/IR/Function.h" 24 #include "llvm/IR/InstrTypes.h" 25 #include "llvm/IR/Instructions.h" 26 #include "llvm/IR/LLVMContext.h" 27 #include "llvm/IR/MDBuilder.h" 28 #include "llvm/IR/PassManager.h" 29 #include "llvm/IR/Value.h" 30 #include "llvm/ProfileData/InstrProf.h" 31 #include "llvm/Support/Casting.h" 32 #include "llvm/Support/CommandLine.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Support/Error.h" 35 #include "llvm/Support/raw_ostream.h" 36 #include "llvm/Transforms/Instrumentation.h" 37 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" 38 #include "llvm/Transforms/Utils/CallPromotionUtils.h" 39 #include <cassert> 40 #include <cstdint> 41 #include <memory> 42 #include <string> 43 #include <utility> 44 #include <vector> 45 46 using namespace llvm; 47 48 #define DEBUG_TYPE "pgo-icall-prom" 49 50 STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions."); 51 STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites."); 52 53 // Command line option to disable indirect-call promotion with the default as 54 // false. This is for debug purpose. 55 static cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden, 56 cl::desc("Disable indirect call promotion")); 57 58 // Set the cutoff value for the promotion. If the value is other than 0, we 59 // stop the transformation once the total number of promotions equals the cutoff 60 // value. 61 // For debug use only. 62 static cl::opt<unsigned> 63 ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden, 64 cl::desc("Max number of promotions for this compilation")); 65 66 // If ICPCSSkip is non zero, the first ICPCSSkip callsites will be skipped. 67 // For debug use only. 68 static cl::opt<unsigned> 69 ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden, 70 cl::desc("Skip Callsite up to this number for this compilation")); 71 72 // Set if the pass is called in LTO optimization. The difference for LTO mode 73 // is the pass won't prefix the source module name to the internal linkage 74 // symbols. 75 static cl::opt<bool> ICPLTOMode("icp-lto", cl::init(false), cl::Hidden, 76 cl::desc("Run indirect-call promotion in LTO " 77 "mode")); 78 79 // Set if the pass is called in SamplePGO mode. The difference for SamplePGO 80 // mode is it will add prof metadatato the created direct call. 81 static cl::opt<bool> 82 ICPSamplePGOMode("icp-samplepgo", cl::init(false), cl::Hidden, 83 cl::desc("Run indirect-call promotion in SamplePGO mode")); 84 85 // If the option is set to true, only call instructions will be considered for 86 // transformation -- invoke instructions will be ignored. 87 static cl::opt<bool> 88 ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden, 89 cl::desc("Run indirect-call promotion for call instructions " 90 "only")); 91 92 // If the option is set to true, only invoke instructions will be considered for 93 // transformation -- call instructions will be ignored. 94 static cl::opt<bool> ICPInvokeOnly("icp-invoke-only", cl::init(false), 95 cl::Hidden, 96 cl::desc("Run indirect-call promotion for " 97 "invoke instruction only")); 98 99 // Dump the function level IR if the transformation happened in this 100 // function. For debug use only. 101 static cl::opt<bool> 102 ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden, 103 cl::desc("Dump IR after transformation happens")); 104 105 namespace { 106 107 // Promote indirect calls to conditional direct calls, keeping track of 108 // thresholds. 109 class IndirectCallPromoter { 110 private: 111 Function &F; 112 113 // Symtab that maps indirect call profile values to function names and 114 // defines. 115 InstrProfSymtab *const Symtab; 116 117 const bool SamplePGO; 118 119 OptimizationRemarkEmitter &ORE; 120 121 // A struct that records the direct target and it's call count. 122 struct PromotionCandidate { 123 Function *const TargetFunction; 124 const uint64_t Count; 125 126 PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {} 127 }; 128 129 // Check if the indirect-call call site should be promoted. Return the number 130 // of promotions. Inst is the candidate indirect call, ValueDataRef 131 // contains the array of value profile data for profiled targets, 132 // TotalCount is the total profiled count of call executions, and 133 // NumCandidates is the number of candidate entries in ValueDataRef. 134 std::vector<PromotionCandidate> getPromotionCandidatesForCallSite( 135 const CallBase &CB, const ArrayRef<InstrProfValueData> &ValueDataRef, 136 uint64_t TotalCount, uint32_t NumCandidates); 137 138 // Promote a list of targets for one indirect-call callsite. Return 139 // the number of promotions. 140 uint32_t tryToPromote(CallBase &CB, 141 const std::vector<PromotionCandidate> &Candidates, 142 uint64_t &TotalCount); 143 144 public: 145 IndirectCallPromoter(Function &Func, InstrProfSymtab *Symtab, bool SamplePGO, 146 OptimizationRemarkEmitter &ORE) 147 : F(Func), Symtab(Symtab), SamplePGO(SamplePGO), ORE(ORE) {} 148 IndirectCallPromoter(const IndirectCallPromoter &) = delete; 149 IndirectCallPromoter &operator=(const IndirectCallPromoter &) = delete; 150 151 bool processFunction(ProfileSummaryInfo *PSI); 152 }; 153 154 } // end anonymous namespace 155 156 // Indirect-call promotion heuristic. The direct targets are sorted based on 157 // the count. Stop at the first target that is not promoted. 158 std::vector<IndirectCallPromoter::PromotionCandidate> 159 IndirectCallPromoter::getPromotionCandidatesForCallSite( 160 const CallBase &CB, const ArrayRef<InstrProfValueData> &ValueDataRef, 161 uint64_t TotalCount, uint32_t NumCandidates) { 162 std::vector<PromotionCandidate> Ret; 163 164 LLVM_DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << CB 165 << " Num_targets: " << ValueDataRef.size() 166 << " Num_candidates: " << NumCandidates << "\n"); 167 NumOfPGOICallsites++; 168 if (ICPCSSkip != 0 && NumOfPGOICallsites <= ICPCSSkip) { 169 LLVM_DEBUG(dbgs() << " Skip: User options.\n"); 170 return Ret; 171 } 172 173 for (uint32_t I = 0; I < NumCandidates; I++) { 174 uint64_t Count = ValueDataRef[I].Count; 175 assert(Count <= TotalCount); 176 (void)TotalCount; 177 uint64_t Target = ValueDataRef[I].Value; 178 LLVM_DEBUG(dbgs() << " Candidate " << I << " Count=" << Count 179 << " Target_func: " << Target << "\n"); 180 181 if (ICPInvokeOnly && isa<CallInst>(CB)) { 182 LLVM_DEBUG(dbgs() << " Not promote: User options.\n"); 183 ORE.emit([&]() { 184 return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB) 185 << " Not promote: User options"; 186 }); 187 break; 188 } 189 if (ICPCallOnly && isa<InvokeInst>(CB)) { 190 LLVM_DEBUG(dbgs() << " Not promote: User option.\n"); 191 ORE.emit([&]() { 192 return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB) 193 << " Not promote: User options"; 194 }); 195 break; 196 } 197 if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) { 198 LLVM_DEBUG(dbgs() << " Not promote: Cutoff reached.\n"); 199 ORE.emit([&]() { 200 return OptimizationRemarkMissed(DEBUG_TYPE, "CutOffReached", &CB) 201 << " Not promote: Cutoff reached"; 202 }); 203 break; 204 } 205 206 // Don't promote if the symbol is not defined in the module. This avoids 207 // creating a reference to a symbol that doesn't exist in the module 208 // This can happen when we compile with a sample profile collected from 209 // one binary but used for another, which may have profiled targets that 210 // aren't used in the new binary. We might have a declaration initially in 211 // the case where the symbol is globally dead in the binary and removed by 212 // ThinLTO. 213 Function *TargetFunction = Symtab->getFunction(Target); 214 if (TargetFunction == nullptr || TargetFunction->isDeclaration()) { 215 LLVM_DEBUG(dbgs() << " Not promote: Cannot find the target\n"); 216 ORE.emit([&]() { 217 return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", &CB) 218 << "Cannot promote indirect call: target with md5sum " 219 << ore::NV("target md5sum", Target) << " not found"; 220 }); 221 break; 222 } 223 224 const char *Reason = nullptr; 225 if (!isLegalToPromote(CB, TargetFunction, &Reason)) { 226 using namespace ore; 227 228 ORE.emit([&]() { 229 return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", &CB) 230 << "Cannot promote indirect call to " 231 << NV("TargetFunction", TargetFunction) << " with count of " 232 << NV("Count", Count) << ": " << Reason; 233 }); 234 break; 235 } 236 237 Ret.push_back(PromotionCandidate(TargetFunction, Count)); 238 TotalCount -= Count; 239 } 240 return Ret; 241 } 242 243 CallBase &llvm::pgo::promoteIndirectCall(CallBase &CB, Function *DirectCallee, 244 uint64_t Count, uint64_t TotalCount, 245 bool AttachProfToDirectCall, 246 OptimizationRemarkEmitter *ORE) { 247 248 uint64_t ElseCount = TotalCount - Count; 249 uint64_t MaxCount = (Count >= ElseCount ? Count : ElseCount); 250 uint64_t Scale = calculateCountScale(MaxCount); 251 MDBuilder MDB(CB.getContext()); 252 MDNode *BranchWeights = MDB.createBranchWeights( 253 scaleBranchCount(Count, Scale), scaleBranchCount(ElseCount, Scale)); 254 255 CallBase &NewInst = 256 promoteCallWithIfThenElse(CB, DirectCallee, BranchWeights); 257 258 if (AttachProfToDirectCall) { 259 MDBuilder MDB(NewInst.getContext()); 260 NewInst.setMetadata( 261 LLVMContext::MD_prof, 262 MDB.createBranchWeights({static_cast<uint32_t>(Count)})); 263 } 264 265 using namespace ore; 266 267 if (ORE) 268 ORE->emit([&]() { 269 return OptimizationRemark(DEBUG_TYPE, "Promoted", &CB) 270 << "Promote indirect call to " << NV("DirectCallee", DirectCallee) 271 << " with count " << NV("Count", Count) << " out of " 272 << NV("TotalCount", TotalCount); 273 }); 274 return NewInst; 275 } 276 277 // Promote indirect-call to conditional direct-call for one callsite. 278 uint32_t IndirectCallPromoter::tryToPromote( 279 CallBase &CB, const std::vector<PromotionCandidate> &Candidates, 280 uint64_t &TotalCount) { 281 uint32_t NumPromoted = 0; 282 283 for (const auto &C : Candidates) { 284 uint64_t Count = C.Count; 285 pgo::promoteIndirectCall(CB, C.TargetFunction, Count, TotalCount, SamplePGO, 286 &ORE); 287 assert(TotalCount >= Count); 288 TotalCount -= Count; 289 NumOfPGOICallPromotion++; 290 NumPromoted++; 291 } 292 return NumPromoted; 293 } 294 295 // Traverse all the indirect-call callsite and get the value profile 296 // annotation to perform indirect-call promotion. 297 bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) { 298 bool Changed = false; 299 ICallPromotionAnalysis ICallAnalysis; 300 for (auto *CB : findIndirectCalls(F)) { 301 uint32_t NumVals, NumCandidates; 302 uint64_t TotalCount; 303 auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction( 304 CB, NumVals, TotalCount, NumCandidates); 305 if (!NumCandidates || 306 (PSI && PSI->hasProfileSummary() && !PSI->isHotCount(TotalCount))) 307 continue; 308 auto PromotionCandidates = getPromotionCandidatesForCallSite( 309 *CB, ICallProfDataRef, TotalCount, NumCandidates); 310 uint32_t NumPromoted = tryToPromote(*CB, PromotionCandidates, TotalCount); 311 if (NumPromoted == 0) 312 continue; 313 314 Changed = true; 315 // Adjust the MD.prof metadata. First delete the old one. 316 CB->setMetadata(LLVMContext::MD_prof, nullptr); 317 // If all promoted, we don't need the MD.prof metadata. 318 if (TotalCount == 0 || NumPromoted == NumVals) 319 continue; 320 // Otherwise we need update with the un-promoted records back. 321 annotateValueSite(*F.getParent(), *CB, ICallProfDataRef.slice(NumPromoted), 322 TotalCount, IPVK_IndirectCallTarget, NumCandidates); 323 } 324 return Changed; 325 } 326 327 // A wrapper function that does the actual work. 328 static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO, 329 bool SamplePGO, ModuleAnalysisManager &MAM) { 330 if (DisableICP) 331 return false; 332 InstrProfSymtab Symtab; 333 if (Error E = Symtab.create(M, InLTO)) { 334 std::string SymtabFailure = toString(std::move(E)); 335 M.getContext().emitError("Failed to create symtab: " + SymtabFailure); 336 return false; 337 } 338 bool Changed = false; 339 for (auto &F : M) { 340 if (F.isDeclaration() || F.hasOptNone()) 341 continue; 342 343 auto &FAM = 344 MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 345 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 346 347 IndirectCallPromoter CallPromoter(F, &Symtab, SamplePGO, ORE); 348 bool FuncChanged = CallPromoter.processFunction(PSI); 349 if (ICPDUMPAFTER && FuncChanged) { 350 LLVM_DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs())); 351 LLVM_DEBUG(dbgs() << "\n"); 352 } 353 Changed |= FuncChanged; 354 if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) { 355 LLVM_DEBUG(dbgs() << " Stop: Cutoff reached.\n"); 356 break; 357 } 358 } 359 return Changed; 360 } 361 362 PreservedAnalyses PGOIndirectCallPromotion::run(Module &M, 363 ModuleAnalysisManager &MAM) { 364 ProfileSummaryInfo *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M); 365 366 if (!promoteIndirectCalls(M, PSI, InLTO | ICPLTOMode, 367 SamplePGO | ICPSamplePGOMode, MAM)) 368 return PreservedAnalyses::all(); 369 370 return PreservedAnalyses::none(); 371 } 372