1 //===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements PGO instrumentation using a minimum spanning tree based 10 // on the following paper: 11 // [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points 12 // for program frequency counts. BIT Numerical Mathematics 1973, Volume 13, 13 // Issue 3, pp 313-322 14 // The idea of the algorithm based on the fact that for each node (except for 15 // the entry and exit), the sum of incoming edge counts equals the sum of 16 // outgoing edge counts. The count of edge on spanning tree can be derived from 17 // those edges not on the spanning tree. Knuth proves this method instruments 18 // the minimum number of edges. 19 // 20 // The minimal spanning tree here is actually a maximum weight tree -- on-tree 21 // edges have higher frequencies (more likely to execute). The idea is to 22 // instrument those less frequently executed edges to reduce the runtime 23 // overhead of instrumented binaries. 24 // 25 // This file contains two passes: 26 // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge 27 // count profile, and generates the instrumentation for indirect call 28 // profiling. 29 // (2) Pass PGOInstrumentationUse which reads the edge count profile and 30 // annotates the branch weights. It also reads the indirect call value 31 // profiling records and annotate the indirect call instructions. 32 // 33 // To get the precise counter information, These two passes need to invoke at 34 // the same compilation point (so they see the same IR). For pass 35 // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For 36 // pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and 37 // the profile is opened in module level and passed to each PGOUseFunc instance. 38 // The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put 39 // in class FuncPGOInstrumentation. 40 // 41 // Class PGOEdge represents a CFG edge and some auxiliary information. Class 42 // BBInfo contains auxiliary information for each BB. These two classes are used 43 // in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived 44 // class of PGOEdge and BBInfo, respectively. They contains extra data structure 45 // used in populating profile counters. 46 // The MST implementation is in Class CFGMST (CFGMST.h). 47 // 48 //===----------------------------------------------------------------------===// 49 50 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" 51 #include "CFGMST.h" 52 #include "ValueProfileCollector.h" 53 #include "llvm/ADT/APInt.h" 54 #include "llvm/ADT/ArrayRef.h" 55 #include "llvm/ADT/STLExtras.h" 56 #include "llvm/ADT/SmallVector.h" 57 #include "llvm/ADT/Statistic.h" 58 #include "llvm/ADT/StringRef.h" 59 #include "llvm/ADT/Triple.h" 60 #include "llvm/ADT/Twine.h" 61 #include "llvm/ADT/iterator.h" 62 #include "llvm/ADT/iterator_range.h" 63 #include "llvm/Analysis/BlockFrequencyInfo.h" 64 #include "llvm/Analysis/BranchProbabilityInfo.h" 65 #include "llvm/Analysis/CFG.h" 66 #include "llvm/Analysis/LoopInfo.h" 67 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 68 #include "llvm/Analysis/ProfileSummaryInfo.h" 69 #include "llvm/IR/Attributes.h" 70 #include "llvm/IR/BasicBlock.h" 71 #include "llvm/IR/CFG.h" 72 #include "llvm/IR/CallSite.h" 73 #include "llvm/IR/Comdat.h" 74 #include "llvm/IR/Constant.h" 75 #include "llvm/IR/Constants.h" 76 #include "llvm/IR/DiagnosticInfo.h" 77 #include "llvm/IR/Dominators.h" 78 #include "llvm/IR/Function.h" 79 #include "llvm/IR/GlobalAlias.h" 80 #include "llvm/IR/GlobalValue.h" 81 #include "llvm/IR/GlobalVariable.h" 82 #include "llvm/IR/IRBuilder.h" 83 #include "llvm/IR/InstVisitor.h" 84 #include "llvm/IR/InstrTypes.h" 85 #include "llvm/IR/Instruction.h" 86 #include "llvm/IR/Instructions.h" 87 #include "llvm/IR/IntrinsicInst.h" 88 #include "llvm/IR/Intrinsics.h" 89 #include "llvm/IR/LLVMContext.h" 90 #include "llvm/IR/MDBuilder.h" 91 #include "llvm/IR/Module.h" 92 #include "llvm/IR/PassManager.h" 93 #include "llvm/IR/ProfileSummary.h" 94 #include "llvm/IR/Type.h" 95 #include "llvm/IR/Value.h" 96 #include "llvm/InitializePasses.h" 97 #include "llvm/Pass.h" 98 #include "llvm/ProfileData/InstrProf.h" 99 #include "llvm/ProfileData/InstrProfReader.h" 100 #include "llvm/Support/BranchProbability.h" 101 #include "llvm/Support/CRC.h" 102 #include "llvm/Support/Casting.h" 103 #include "llvm/Support/CommandLine.h" 104 #include "llvm/Support/DOTGraphTraits.h" 105 #include "llvm/Support/Debug.h" 106 #include "llvm/Support/Error.h" 107 #include "llvm/Support/ErrorHandling.h" 108 #include "llvm/Support/GraphWriter.h" 109 #include "llvm/Support/raw_ostream.h" 110 #include "llvm/Transforms/Instrumentation.h" 111 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 112 #include "llvm/Transforms/Utils/MisExpect.h" 113 #include <algorithm> 114 #include <cassert> 115 #include <cstdint> 116 #include <memory> 117 #include <numeric> 118 #include <string> 119 #include <unordered_map> 120 #include <utility> 121 #include <vector> 122 123 using namespace llvm; 124 using ProfileCount = Function::ProfileCount; 125 using VPCandidateInfo = ValueProfileCollector::CandidateInfo; 126 127 #define DEBUG_TYPE "pgo-instrumentation" 128 129 STATISTIC(NumOfPGOInstrument, "Number of edges instrumented."); 130 STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented."); 131 STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented."); 132 STATISTIC(NumOfPGOEdge, "Number of edges."); 133 STATISTIC(NumOfPGOBB, "Number of basic-blocks."); 134 STATISTIC(NumOfPGOSplit, "Number of critical edge splits."); 135 STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts."); 136 STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile."); 137 STATISTIC(NumOfPGOMissing, "Number of functions without profile."); 138 STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations."); 139 STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO."); 140 STATISTIC(NumOfCSPGOSelectInsts, 141 "Number of select instruction instrumented in CSPGO."); 142 STATISTIC(NumOfCSPGOMemIntrinsics, 143 "Number of mem intrinsics instrumented in CSPGO."); 144 STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO."); 145 STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO."); 146 STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO."); 147 STATISTIC(NumOfCSPGOFunc, 148 "Number of functions having valid profile counts in CSPGO."); 149 STATISTIC(NumOfCSPGOMismatch, 150 "Number of functions having mismatch profile in CSPGO."); 151 STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO."); 152 153 // Command line option to specify the file to read profile from. This is 154 // mainly used for testing. 155 static cl::opt<std::string> 156 PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, 157 cl::value_desc("filename"), 158 cl::desc("Specify the path of profile data file. This is" 159 "mainly for test purpose.")); 160 static cl::opt<std::string> PGOTestProfileRemappingFile( 161 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, 162 cl::value_desc("filename"), 163 cl::desc("Specify the path of profile remapping file. This is mainly for " 164 "test purpose.")); 165 166 // Command line option to disable value profiling. The default is false: 167 // i.e. value profiling is enabled by default. This is for debug purpose. 168 static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false), 169 cl::Hidden, 170 cl::desc("Disable Value Profiling")); 171 172 // Command line option to set the maximum number of VP annotations to write to 173 // the metadata for a single indirect call callsite. 174 static cl::opt<unsigned> MaxNumAnnotations( 175 "icp-max-annotations", cl::init(3), cl::Hidden, cl::ZeroOrMore, 176 cl::desc("Max number of annotations for a single indirect " 177 "call callsite")); 178 179 // Command line option to set the maximum number of value annotations 180 // to write to the metadata for a single memop intrinsic. 181 static cl::opt<unsigned> MaxNumMemOPAnnotations( 182 "memop-max-annotations", cl::init(4), cl::Hidden, cl::ZeroOrMore, 183 cl::desc("Max number of preicise value annotations for a single memop" 184 "intrinsic")); 185 186 // Command line option to control appending FunctionHash to the name of a COMDAT 187 // function. This is to avoid the hash mismatch caused by the preinliner. 188 static cl::opt<bool> DoComdatRenaming( 189 "do-comdat-renaming", cl::init(false), cl::Hidden, 190 cl::desc("Append function hash to the name of COMDAT function to avoid " 191 "function hash mismatch due to the preinliner")); 192 193 // Command line option to enable/disable the warning about missing profile 194 // information. 195 static cl::opt<bool> 196 PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden, 197 cl::desc("Use this option to turn on/off " 198 "warnings about missing profile data for " 199 "functions.")); 200 201 // Command line option to enable/disable the warning about a hash mismatch in 202 // the profile data. 203 static cl::opt<bool> 204 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden, 205 cl::desc("Use this option to turn off/on " 206 "warnings about profile cfg mismatch.")); 207 208 // Command line option to enable/disable the warning about a hash mismatch in 209 // the profile data for Comdat functions, which often turns out to be false 210 // positive due to the pre-instrumentation inline. 211 static cl::opt<bool> 212 NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", cl::init(true), 213 cl::Hidden, 214 cl::desc("The option is used to turn on/off " 215 "warnings about hash mismatch for comdat " 216 "functions.")); 217 218 // Command line option to enable/disable select instruction instrumentation. 219 static cl::opt<bool> 220 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, 221 cl::desc("Use this option to turn on/off SELECT " 222 "instruction instrumentation. ")); 223 224 // Command line option to turn on CFG dot or text dump of raw profile counts 225 static cl::opt<PGOViewCountsType> PGOViewRawCounts( 226 "pgo-view-raw-counts", cl::Hidden, 227 cl::desc("A boolean option to show CFG dag or text " 228 "with raw profile counts from " 229 "profile data. See also option " 230 "-pgo-view-counts. To limit graph " 231 "display to only one function, use " 232 "filtering option -view-bfi-func-name."), 233 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), 234 clEnumValN(PGOVCT_Graph, "graph", "show a graph."), 235 clEnumValN(PGOVCT_Text, "text", "show in text."))); 236 237 // Command line option to enable/disable memop intrinsic call.size profiling. 238 static cl::opt<bool> 239 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, 240 cl::desc("Use this option to turn on/off " 241 "memory intrinsic size profiling.")); 242 243 // Emit branch probability as optimization remarks. 244 static cl::opt<bool> 245 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, 246 cl::desc("When this option is on, the annotated " 247 "branch probability will be emitted as " 248 "optimization remarks: -{Rpass|" 249 "pass-remarks}=pgo-instrumentation")); 250 251 // Command line option to turn on CFG dot dump after profile annotation. 252 // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts 253 extern cl::opt<PGOViewCountsType> PGOViewCounts; 254 255 // Command line option to specify the name of the function for CFG dump 256 // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= 257 extern cl::opt<std::string> ViewBlockFreqFuncName; 258 259 // Return a string describing the branch condition that can be 260 // used in static branch probability heuristics: 261 static std::string getBranchCondString(Instruction *TI) { 262 BranchInst *BI = dyn_cast<BranchInst>(TI); 263 if (!BI || !BI->isConditional()) 264 return std::string(); 265 266 Value *Cond = BI->getCondition(); 267 ICmpInst *CI = dyn_cast<ICmpInst>(Cond); 268 if (!CI) 269 return std::string(); 270 271 std::string result; 272 raw_string_ostream OS(result); 273 OS << CmpInst::getPredicateName(CI->getPredicate()) << "_"; 274 CI->getOperand(0)->getType()->print(OS, true); 275 276 Value *RHS = CI->getOperand(1); 277 ConstantInt *CV = dyn_cast<ConstantInt>(RHS); 278 if (CV) { 279 if (CV->isZero()) 280 OS << "_Zero"; 281 else if (CV->isOne()) 282 OS << "_One"; 283 else if (CV->isMinusOne()) 284 OS << "_MinusOne"; 285 else 286 OS << "_Const"; 287 } 288 OS.flush(); 289 return result; 290 } 291 292 static const char *ValueProfKindDescr[] = { 293 #define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr, 294 #include "llvm/ProfileData/InstrProfData.inc" 295 }; 296 297 namespace { 298 299 /// The select instruction visitor plays three roles specified 300 /// by the mode. In \c VM_counting mode, it simply counts the number of 301 /// select instructions. In \c VM_instrument mode, it inserts code to count 302 /// the number times TrueValue of select is taken. In \c VM_annotate mode, 303 /// it reads the profile data and annotate the select instruction with metadata. 304 enum VisitMode { VM_counting, VM_instrument, VM_annotate }; 305 class PGOUseFunc; 306 307 /// Instruction Visitor class to visit select instructions. 308 struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> { 309 Function &F; 310 unsigned NSIs = 0; // Number of select instructions instrumented. 311 VisitMode Mode = VM_counting; // Visiting mode. 312 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index. 313 unsigned TotalNumCtrs = 0; // Total number of counters 314 GlobalVariable *FuncNameVar = nullptr; 315 uint64_t FuncHash = 0; 316 PGOUseFunc *UseFunc = nullptr; 317 318 SelectInstVisitor(Function &Func) : F(Func) {} 319 320 void countSelects(Function &Func) { 321 NSIs = 0; 322 Mode = VM_counting; 323 visit(Func); 324 } 325 326 // Visit the IR stream and instrument all select instructions. \p 327 // Ind is a pointer to the counter index variable; \p TotalNC 328 // is the total number of counters; \p FNV is the pointer to the 329 // PGO function name var; \p FHash is the function hash. 330 void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC, 331 GlobalVariable *FNV, uint64_t FHash) { 332 Mode = VM_instrument; 333 CurCtrIdx = Ind; 334 TotalNumCtrs = TotalNC; 335 FuncHash = FHash; 336 FuncNameVar = FNV; 337 visit(Func); 338 } 339 340 // Visit the IR stream and annotate all select instructions. 341 void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) { 342 Mode = VM_annotate; 343 UseFunc = UF; 344 CurCtrIdx = Ind; 345 visit(Func); 346 } 347 348 void instrumentOneSelectInst(SelectInst &SI); 349 void annotateOneSelectInst(SelectInst &SI); 350 351 // Visit \p SI instruction and perform tasks according to visit mode. 352 void visitSelectInst(SelectInst &SI); 353 354 // Return the number of select instructions. This needs be called after 355 // countSelects(). 356 unsigned getNumOfSelectInsts() const { return NSIs; } 357 }; 358 359 360 class PGOInstrumentationGenLegacyPass : public ModulePass { 361 public: 362 static char ID; 363 364 PGOInstrumentationGenLegacyPass(bool IsCS = false) 365 : ModulePass(ID), IsCS(IsCS) { 366 initializePGOInstrumentationGenLegacyPassPass( 367 *PassRegistry::getPassRegistry()); 368 } 369 370 StringRef getPassName() const override { return "PGOInstrumentationGenPass"; } 371 372 private: 373 // Is this is context-sensitive instrumentation. 374 bool IsCS; 375 bool runOnModule(Module &M) override; 376 377 void getAnalysisUsage(AnalysisUsage &AU) const override { 378 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 379 } 380 }; 381 382 class PGOInstrumentationUseLegacyPass : public ModulePass { 383 public: 384 static char ID; 385 386 // Provide the profile filename as the parameter. 387 PGOInstrumentationUseLegacyPass(std::string Filename = "", bool IsCS = false) 388 : ModulePass(ID), ProfileFileName(std::move(Filename)), IsCS(IsCS) { 389 if (!PGOTestProfileFile.empty()) 390 ProfileFileName = PGOTestProfileFile; 391 initializePGOInstrumentationUseLegacyPassPass( 392 *PassRegistry::getPassRegistry()); 393 } 394 395 StringRef getPassName() const override { return "PGOInstrumentationUsePass"; } 396 397 private: 398 std::string ProfileFileName; 399 // Is this is context-sensitive instrumentation use. 400 bool IsCS; 401 402 bool runOnModule(Module &M) override; 403 404 void getAnalysisUsage(AnalysisUsage &AU) const override { 405 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 406 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 407 } 408 }; 409 410 class PGOInstrumentationGenCreateVarLegacyPass : public ModulePass { 411 public: 412 static char ID; 413 StringRef getPassName() const override { 414 return "PGOInstrumentationGenCreateVarPass"; 415 } 416 PGOInstrumentationGenCreateVarLegacyPass(std::string CSInstrName = "") 417 : ModulePass(ID), InstrProfileOutput(CSInstrName) { 418 initializePGOInstrumentationGenCreateVarLegacyPassPass( 419 *PassRegistry::getPassRegistry()); 420 } 421 422 private: 423 bool runOnModule(Module &M) override { 424 createProfileFileNameVar(M, InstrProfileOutput); 425 createIRLevelProfileFlagVar(M, true); 426 return false; 427 } 428 std::string InstrProfileOutput; 429 }; 430 431 } // end anonymous namespace 432 433 char PGOInstrumentationGenLegacyPass::ID = 0; 434 435 INITIALIZE_PASS_BEGIN(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", 436 "PGO instrumentation.", false, false) 437 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 438 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 439 INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", 440 "PGO instrumentation.", false, false) 441 442 ModulePass *llvm::createPGOInstrumentationGenLegacyPass(bool IsCS) { 443 return new PGOInstrumentationGenLegacyPass(IsCS); 444 } 445 446 char PGOInstrumentationUseLegacyPass::ID = 0; 447 448 INITIALIZE_PASS_BEGIN(PGOInstrumentationUseLegacyPass, "pgo-instr-use", 449 "Read PGO instrumentation profile.", false, false) 450 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 451 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 452 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 453 INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use", 454 "Read PGO instrumentation profile.", false, false) 455 456 ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename, 457 bool IsCS) { 458 return new PGOInstrumentationUseLegacyPass(Filename.str(), IsCS); 459 } 460 461 char PGOInstrumentationGenCreateVarLegacyPass::ID = 0; 462 463 INITIALIZE_PASS(PGOInstrumentationGenCreateVarLegacyPass, 464 "pgo-instr-gen-create-var", 465 "Create PGO instrumentation version variable for CSPGO.", false, 466 false) 467 468 ModulePass * 469 llvm::createPGOInstrumentationGenCreateVarLegacyPass(StringRef CSInstrName) { 470 return new PGOInstrumentationGenCreateVarLegacyPass(CSInstrName); 471 } 472 473 namespace { 474 475 /// An MST based instrumentation for PGO 476 /// 477 /// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO 478 /// in the function level. 479 struct PGOEdge { 480 // This class implements the CFG edges. Note the CFG can be a multi-graph. 481 // So there might be multiple edges with same SrcBB and DestBB. 482 const BasicBlock *SrcBB; 483 const BasicBlock *DestBB; 484 uint64_t Weight; 485 bool InMST = false; 486 bool Removed = false; 487 bool IsCritical = false; 488 489 PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) 490 : SrcBB(Src), DestBB(Dest), Weight(W) {} 491 492 // Return the information string of an edge. 493 const std::string infoString() const { 494 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") + 495 (IsCritical ? "c" : " ") + " W=" + Twine(Weight)).str(); 496 } 497 }; 498 499 // This class stores the auxiliary information for each BB. 500 struct BBInfo { 501 BBInfo *Group; 502 uint32_t Index; 503 uint32_t Rank = 0; 504 505 BBInfo(unsigned IX) : Group(this), Index(IX) {} 506 507 // Return the information string of this object. 508 const std::string infoString() const { 509 return (Twine("Index=") + Twine(Index)).str(); 510 } 511 512 // Empty function -- only applicable to UseBBInfo. 513 void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} 514 515 // Empty function -- only applicable to UseBBInfo. 516 void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} 517 }; 518 519 // This class implements the CFG edges. Note the CFG can be a multi-graph. 520 template <class Edge, class BBInfo> class FuncPGOInstrumentation { 521 private: 522 Function &F; 523 524 // Is this is context-sensitive instrumentation. 525 bool IsCS; 526 527 // A map that stores the Comdat group in function F. 528 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers; 529 530 ValueProfileCollector VPC; 531 532 void computeCFGHash(); 533 void renameComdatFunction(); 534 535 public: 536 std::vector<std::vector<VPCandidateInfo>> ValueSites; 537 SelectInstVisitor SIVisitor; 538 std::string FuncName; 539 GlobalVariable *FuncNameVar; 540 541 // CFG hash value for this function. 542 uint64_t FunctionHash = 0; 543 544 // The Minimum Spanning Tree of function CFG. 545 CFGMST<Edge, BBInfo> MST; 546 547 // Collect all the BBs that will be instrumented, and store them in 548 // InstrumentBBs. 549 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs); 550 551 // Give an edge, find the BB that will be instrumented. 552 // Return nullptr if there is no BB to be instrumented. 553 BasicBlock *getInstrBB(Edge *E); 554 555 // Return the auxiliary BB information. 556 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); } 557 558 // Return the auxiliary BB information if available. 559 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); } 560 561 // Dump edges and BB information. 562 void dumpInfo(std::string Str = "") const { 563 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " + 564 Twine(FunctionHash) + "\t" + Str); 565 } 566 567 FuncPGOInstrumentation( 568 Function &Func, 569 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 570 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, 571 BlockFrequencyInfo *BFI = nullptr, bool IsCS = false) 572 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func), 573 ValueSites(IPVK_Last + 1), SIVisitor(Func), MST(F, BPI, BFI) { 574 // This should be done before CFG hash computation. 575 SIVisitor.countSelects(Func); 576 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize); 577 if (!IsCS) { 578 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 579 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 580 NumOfPGOBB += MST.BBInfos.size(); 581 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget); 582 } else { 583 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 584 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 585 NumOfCSPGOBB += MST.BBInfos.size(); 586 } 587 588 FuncName = getPGOFuncName(F); 589 computeCFGHash(); 590 if (!ComdatMembers.empty()) 591 renameComdatFunction(); 592 LLVM_DEBUG(dumpInfo("after CFGMST")); 593 594 for (auto &E : MST.AllEdges) { 595 if (E->Removed) 596 continue; 597 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++; 598 if (!E->InMST) 599 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++; 600 } 601 602 if (CreateGlobalVar) 603 FuncNameVar = createPGOFuncNameVar(F, FuncName); 604 } 605 }; 606 607 } // end anonymous namespace 608 609 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index 610 // value of each BB in the CFG. The higher 32 bits record the number of edges. 611 template <class Edge, class BBInfo> 612 void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { 613 std::vector<uint8_t> Indexes; 614 JamCRC JC; 615 for (auto &BB : F) { 616 const Instruction *TI = BB.getTerminator(); 617 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { 618 BasicBlock *Succ = TI->getSuccessor(I); 619 auto BI = findBBInfo(Succ); 620 if (BI == nullptr) 621 continue; 622 uint32_t Index = BI->Index; 623 for (int J = 0; J < 4; J++) 624 Indexes.push_back((uint8_t)(Index >> (J * 8))); 625 } 626 } 627 JC.update(Indexes); 628 629 // Hash format for context sensitive profile. Reserve 4 bits for other 630 // information. 631 FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 | 632 (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 | 633 //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 | 634 (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC(); 635 // Reserve bit 60-63 for other information purpose. 636 FunctionHash &= 0x0FFFFFFFFFFFFFFF; 637 if (IsCS) 638 NamedInstrProfRecord::setCSFlagInHash(FunctionHash); 639 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n" 640 << " CRC = " << JC.getCRC() 641 << ", Selects = " << SIVisitor.getNumOfSelectInsts() 642 << ", Edges = " << MST.AllEdges.size() << ", ICSites = " 643 << ValueSites[IPVK_IndirectCallTarget].size() 644 << ", Hash = " << FunctionHash << "\n";); 645 } 646 647 // Check if we can safely rename this Comdat function. 648 static bool canRenameComdat( 649 Function &F, 650 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 651 if (!DoComdatRenaming || !canRenameComdatFunc(F, true)) 652 return false; 653 654 // FIXME: Current only handle those Comdat groups that only containing one 655 // function and function aliases. 656 // (1) For a Comdat group containing multiple functions, we need to have a 657 // unique postfix based on the hashes for each function. There is a 658 // non-trivial code refactoring to do this efficiently. 659 // (2) Variables can not be renamed, so we can not rename Comdat function in a 660 // group including global vars. 661 Comdat *C = F.getComdat(); 662 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) { 663 if (dyn_cast<GlobalAlias>(CM.second)) 664 continue; 665 Function *FM = dyn_cast<Function>(CM.second); 666 if (FM != &F) 667 return false; 668 } 669 return true; 670 } 671 672 // Append the CFGHash to the Comdat function name. 673 template <class Edge, class BBInfo> 674 void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() { 675 if (!canRenameComdat(F, ComdatMembers)) 676 return; 677 std::string OrigName = F.getName().str(); 678 std::string NewFuncName = 679 Twine(F.getName() + "." + Twine(FunctionHash)).str(); 680 F.setName(Twine(NewFuncName)); 681 GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F); 682 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str(); 683 Comdat *NewComdat; 684 Module *M = F.getParent(); 685 // For AvailableExternallyLinkage functions, change the linkage to 686 // LinkOnceODR and put them into comdat. This is because after renaming, there 687 // is no backup external copy available for the function. 688 if (!F.hasComdat()) { 689 assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage); 690 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName)); 691 F.setLinkage(GlobalValue::LinkOnceODRLinkage); 692 F.setComdat(NewComdat); 693 return; 694 } 695 696 // This function belongs to a single function Comdat group. 697 Comdat *OrigComdat = F.getComdat(); 698 std::string NewComdatName = 699 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str(); 700 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName)); 701 NewComdat->setSelectionKind(OrigComdat->getSelectionKind()); 702 703 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) { 704 if (GlobalAlias *GA = dyn_cast<GlobalAlias>(CM.second)) { 705 // For aliases, change the name directly. 706 assert(dyn_cast<Function>(GA->getAliasee()->stripPointerCasts()) == &F); 707 std::string OrigGAName = GA->getName().str(); 708 GA->setName(Twine(GA->getName() + "." + Twine(FunctionHash))); 709 GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigGAName, GA); 710 continue; 711 } 712 // Must be a function. 713 Function *CF = dyn_cast<Function>(CM.second); 714 assert(CF); 715 CF->setComdat(NewComdat); 716 } 717 } 718 719 // Collect all the BBs that will be instruments and return them in 720 // InstrumentBBs and setup InEdges/OutEdge for UseBBInfo. 721 template <class Edge, class BBInfo> 722 void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs( 723 std::vector<BasicBlock *> &InstrumentBBs) { 724 // Use a worklist as we will update the vector during the iteration. 725 std::vector<Edge *> EdgeList; 726 EdgeList.reserve(MST.AllEdges.size()); 727 for (auto &E : MST.AllEdges) 728 EdgeList.push_back(E.get()); 729 730 for (auto &E : EdgeList) { 731 BasicBlock *InstrBB = getInstrBB(E); 732 if (InstrBB) 733 InstrumentBBs.push_back(InstrBB); 734 } 735 736 // Set up InEdges/OutEdges for all BBs. 737 for (auto &E : MST.AllEdges) { 738 if (E->Removed) 739 continue; 740 const BasicBlock *SrcBB = E->SrcBB; 741 const BasicBlock *DestBB = E->DestBB; 742 BBInfo &SrcInfo = getBBInfo(SrcBB); 743 BBInfo &DestInfo = getBBInfo(DestBB); 744 SrcInfo.addOutEdge(E.get()); 745 DestInfo.addInEdge(E.get()); 746 } 747 } 748 749 // Given a CFG E to be instrumented, find which BB to place the instrumented 750 // code. The function will split the critical edge if necessary. 751 template <class Edge, class BBInfo> 752 BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) { 753 if (E->InMST || E->Removed) 754 return nullptr; 755 756 BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB); 757 BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB); 758 // For a fake edge, instrument the real BB. 759 if (SrcBB == nullptr) 760 return DestBB; 761 if (DestBB == nullptr) 762 return SrcBB; 763 764 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * { 765 // There are basic blocks (such as catchswitch) cannot be instrumented. 766 // If the returned first insertion point is the end of BB, skip this BB. 767 if (BB->getFirstInsertionPt() == BB->end()) 768 return nullptr; 769 return BB; 770 }; 771 772 // Instrument the SrcBB if it has a single successor, 773 // otherwise, the DestBB if this is not a critical edge. 774 Instruction *TI = SrcBB->getTerminator(); 775 if (TI->getNumSuccessors() <= 1) 776 return canInstrument(SrcBB); 777 if (!E->IsCritical) 778 return canInstrument(DestBB); 779 780 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 781 BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum); 782 if (!InstrBB) { 783 LLVM_DEBUG( 784 dbgs() << "Fail to split critical edge: not instrument this edge.\n"); 785 return nullptr; 786 } 787 // For a critical edge, we have to split. Instrument the newly 788 // created BB. 789 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++; 790 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index 791 << " --> " << getBBInfo(DestBB).Index << "\n"); 792 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB. 793 MST.addEdge(SrcBB, InstrBB, 0); 794 // Second one: Add new edge of InstrBB->DestBB. 795 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0); 796 NewEdge1.InMST = true; 797 E->Removed = true; 798 799 return canInstrument(InstrBB); 800 } 801 802 // Visit all edge and instrument the edges not in MST, and do value profiling. 803 // Critical edges will be split. 804 static void instrumentOneFunc( 805 Function &F, Module *M, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI, 806 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 807 bool IsCS) { 808 // Split indirectbr critical edges here before computing the MST rather than 809 // later in getInstrBB() to avoid invalidating it. 810 SplitIndirectBrCriticalEdges(F, BPI, BFI); 811 812 FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(F, ComdatMembers, true, BPI, 813 BFI, IsCS); 814 std::vector<BasicBlock *> InstrumentBBs; 815 FuncInfo.getInstrumentBBs(InstrumentBBs); 816 unsigned NumCounters = 817 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 818 819 uint32_t I = 0; 820 Type *I8PtrTy = Type::getInt8PtrTy(M->getContext()); 821 for (auto *InstrBB : InstrumentBBs) { 822 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt()); 823 assert(Builder.GetInsertPoint() != InstrBB->end() && 824 "Cannot get the Instrumentation point"); 825 Builder.CreateCall( 826 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment), 827 {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), 828 Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters), 829 Builder.getInt32(I++)}); 830 } 831 832 // Now instrument select instructions: 833 FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar, 834 FuncInfo.FunctionHash); 835 assert(I == NumCounters); 836 837 if (DisableValueProfiling) 838 return; 839 840 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size(); 841 842 // For each VP Kind, walk the VP candidates and instrument each one. 843 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) { 844 unsigned SiteIndex = 0; 845 if (Kind == IPVK_MemOPSize && !PGOInstrMemOP) 846 continue; 847 848 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) { 849 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind] 850 << " site: CallSite Index = " << SiteIndex << "\n"); 851 852 IRBuilder<> Builder(Cand.InsertPt); 853 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() && 854 "Cannot get the Instrumentation point"); 855 856 Value *ToProfile = nullptr; 857 if (Cand.V->getType()->isIntegerTy()) 858 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty()); 859 else if (Cand.V->getType()->isPointerTy()) 860 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty()); 861 assert(ToProfile && "value profiling Value is of unexpected type"); 862 863 Builder.CreateCall( 864 Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), 865 {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), 866 Builder.getInt64(FuncInfo.FunctionHash), ToProfile, 867 Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)}); 868 } 869 } // IPVK_First <= Kind <= IPVK_Last 870 } 871 872 namespace { 873 874 // This class represents a CFG edge in profile use compilation. 875 struct PGOUseEdge : public PGOEdge { 876 bool CountValid = false; 877 uint64_t CountValue = 0; 878 879 PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) 880 : PGOEdge(Src, Dest, W) {} 881 882 // Set edge count value 883 void setEdgeCount(uint64_t Value) { 884 CountValue = Value; 885 CountValid = true; 886 } 887 888 // Return the information string for this object. 889 const std::string infoString() const { 890 if (!CountValid) 891 return PGOEdge::infoString(); 892 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue)) 893 .str(); 894 } 895 }; 896 897 using DirectEdges = SmallVector<PGOUseEdge *, 2>; 898 899 // This class stores the auxiliary information for each BB. 900 struct UseBBInfo : public BBInfo { 901 uint64_t CountValue = 0; 902 bool CountValid; 903 int32_t UnknownCountInEdge = 0; 904 int32_t UnknownCountOutEdge = 0; 905 DirectEdges InEdges; 906 DirectEdges OutEdges; 907 908 UseBBInfo(unsigned IX) : BBInfo(IX), CountValid(false) {} 909 910 UseBBInfo(unsigned IX, uint64_t C) 911 : BBInfo(IX), CountValue(C), CountValid(true) {} 912 913 // Set the profile count value for this BB. 914 void setBBInfoCount(uint64_t Value) { 915 CountValue = Value; 916 CountValid = true; 917 } 918 919 // Return the information string of this object. 920 const std::string infoString() const { 921 if (!CountValid) 922 return BBInfo::infoString(); 923 return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str(); 924 } 925 926 // Add an OutEdge and update the edge count. 927 void addOutEdge(PGOUseEdge *E) { 928 OutEdges.push_back(E); 929 UnknownCountOutEdge++; 930 } 931 932 // Add an InEdge and update the edge count. 933 void addInEdge(PGOUseEdge *E) { 934 InEdges.push_back(E); 935 UnknownCountInEdge++; 936 } 937 }; 938 939 } // end anonymous namespace 940 941 // Sum up the count values for all the edges. 942 static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) { 943 uint64_t Total = 0; 944 for (auto &E : Edges) { 945 if (E->Removed) 946 continue; 947 Total += E->CountValue; 948 } 949 return Total; 950 } 951 952 namespace { 953 954 class PGOUseFunc { 955 public: 956 PGOUseFunc(Function &Func, Module *Modu, 957 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 958 BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin, 959 ProfileSummaryInfo *PSI, bool IsCS) 960 : F(Func), M(Modu), BFI(BFIin), PSI(PSI), 961 FuncInfo(Func, ComdatMembers, false, BPI, BFIin, IsCS), 962 FreqAttr(FFA_Normal), IsCS(IsCS) {} 963 964 // Read counts for the instrumented BB from profile. 965 bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros); 966 967 // Populate the counts for all BBs. 968 void populateCounters(); 969 970 // Set the branch weights based on the count values. 971 void setBranchWeights(); 972 973 // Annotate the value profile call sites for all value kind. 974 void annotateValueSites(); 975 976 // Annotate the value profile call sites for one value kind. 977 void annotateValueSites(uint32_t Kind); 978 979 // Annotate the irreducible loop header weights. 980 void annotateIrrLoopHeaderWeights(); 981 982 // The hotness of the function from the profile count. 983 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot }; 984 985 // Return the function hotness from the profile. 986 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; } 987 988 // Return the function hash. 989 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; } 990 991 // Return the profile record for this function; 992 InstrProfRecord &getProfileRecord() { return ProfileRecord; } 993 994 // Return the auxiliary BB information. 995 UseBBInfo &getBBInfo(const BasicBlock *BB) const { 996 return FuncInfo.getBBInfo(BB); 997 } 998 999 // Return the auxiliary BB information if available. 1000 UseBBInfo *findBBInfo(const BasicBlock *BB) const { 1001 return FuncInfo.findBBInfo(BB); 1002 } 1003 1004 Function &getFunc() const { return F; } 1005 1006 void dumpInfo(std::string Str = "") const { 1007 FuncInfo.dumpInfo(Str); 1008 } 1009 1010 uint64_t getProgramMaxCount() const { return ProgramMaxCount; } 1011 private: 1012 Function &F; 1013 Module *M; 1014 BlockFrequencyInfo *BFI; 1015 ProfileSummaryInfo *PSI; 1016 1017 // This member stores the shared information with class PGOGenFunc. 1018 FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo; 1019 1020 // The maximum count value in the profile. This is only used in PGO use 1021 // compilation. 1022 uint64_t ProgramMaxCount; 1023 1024 // Position of counter that remains to be read. 1025 uint32_t CountPosition = 0; 1026 1027 // Total size of the profile count for this function. 1028 uint32_t ProfileCountSize = 0; 1029 1030 // ProfileRecord for this function. 1031 InstrProfRecord ProfileRecord; 1032 1033 // Function hotness info derived from profile. 1034 FuncFreqAttr FreqAttr; 1035 1036 // Is to use the context sensitive profile. 1037 bool IsCS; 1038 1039 // Find the Instrumented BB and set the value. Return false on error. 1040 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile); 1041 1042 // Set the edge counter value for the unknown edge -- there should be only 1043 // one unknown edge. 1044 void setEdgeCount(DirectEdges &Edges, uint64_t Value); 1045 1046 // Return FuncName string; 1047 const std::string getFuncName() const { return FuncInfo.FuncName; } 1048 1049 // Set the hot/cold inline hints based on the count values. 1050 // FIXME: This function should be removed once the functionality in 1051 // the inliner is implemented. 1052 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) { 1053 if (PSI->isHotCount(EntryCount)) 1054 FreqAttr = FFA_Hot; 1055 else if (PSI->isColdCount(MaxCount)) 1056 FreqAttr = FFA_Cold; 1057 } 1058 }; 1059 1060 } // end anonymous namespace 1061 1062 // Visit all the edges and assign the count value for the instrumented 1063 // edges and the BB. Return false on error. 1064 bool PGOUseFunc::setInstrumentedCounts( 1065 const std::vector<uint64_t> &CountFromProfile) { 1066 1067 std::vector<BasicBlock *> InstrumentBBs; 1068 FuncInfo.getInstrumentBBs(InstrumentBBs); 1069 unsigned NumCounters = 1070 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 1071 // The number of counters here should match the number of counters 1072 // in profile. Return if they mismatch. 1073 if (NumCounters != CountFromProfile.size()) { 1074 return false; 1075 } 1076 // Set the profile count to the Instrumented BBs. 1077 uint32_t I = 0; 1078 for (BasicBlock *InstrBB : InstrumentBBs) { 1079 uint64_t CountValue = CountFromProfile[I++]; 1080 UseBBInfo &Info = getBBInfo(InstrBB); 1081 Info.setBBInfoCount(CountValue); 1082 } 1083 ProfileCountSize = CountFromProfile.size(); 1084 CountPosition = I; 1085 1086 // Set the edge count and update the count of unknown edges for BBs. 1087 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void { 1088 E->setEdgeCount(Value); 1089 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1090 this->getBBInfo(E->DestBB).UnknownCountInEdge--; 1091 }; 1092 1093 // Set the profile count the Instrumented edges. There are BBs that not in 1094 // MST but not instrumented. Need to set the edge count value so that we can 1095 // populate the profile counts later. 1096 for (auto &E : FuncInfo.MST.AllEdges) { 1097 if (E->Removed || E->InMST) 1098 continue; 1099 const BasicBlock *SrcBB = E->SrcBB; 1100 UseBBInfo &SrcInfo = getBBInfo(SrcBB); 1101 1102 // If only one out-edge, the edge profile count should be the same as BB 1103 // profile count. 1104 if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1) 1105 setEdgeCount(E.get(), SrcInfo.CountValue); 1106 else { 1107 const BasicBlock *DestBB = E->DestBB; 1108 UseBBInfo &DestInfo = getBBInfo(DestBB); 1109 // If only one in-edge, the edge profile count should be the same as BB 1110 // profile count. 1111 if (DestInfo.CountValid && DestInfo.InEdges.size() == 1) 1112 setEdgeCount(E.get(), DestInfo.CountValue); 1113 } 1114 if (E->CountValid) 1115 continue; 1116 // E's count should have been set from profile. If not, this meenas E skips 1117 // the instrumentation. We set the count to 0. 1118 setEdgeCount(E.get(), 0); 1119 } 1120 return true; 1121 } 1122 1123 // Set the count value for the unknown edge. There should be one and only one 1124 // unknown edge in Edges vector. 1125 void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) { 1126 for (auto &E : Edges) { 1127 if (E->CountValid) 1128 continue; 1129 E->setEdgeCount(Value); 1130 1131 getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1132 getBBInfo(E->DestBB).UnknownCountInEdge--; 1133 return; 1134 } 1135 llvm_unreachable("Cannot find the unknown count edge"); 1136 } 1137 1138 // Read the profile from ProfileFileName and assign the value to the 1139 // instrumented BB and the edges. This function also updates ProgramMaxCount. 1140 // Return true if the profile are successfully read, and false on errors. 1141 bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros) { 1142 auto &Ctx = M->getContext(); 1143 Expected<InstrProfRecord> Result = 1144 PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash); 1145 if (Error E = Result.takeError()) { 1146 handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { 1147 auto Err = IPE.get(); 1148 bool SkipWarning = false; 1149 LLVM_DEBUG(dbgs() << "Error in reading profile for Func " 1150 << FuncInfo.FuncName << ": "); 1151 if (Err == instrprof_error::unknown_function) { 1152 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++; 1153 SkipWarning = !PGOWarnMissing; 1154 LLVM_DEBUG(dbgs() << "unknown function"); 1155 } else if (Err == instrprof_error::hash_mismatch || 1156 Err == instrprof_error::malformed) { 1157 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++; 1158 SkipWarning = 1159 NoPGOWarnMismatch || 1160 (NoPGOWarnMismatchComdat && 1161 (F.hasComdat() || 1162 F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); 1163 LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); 1164 } 1165 1166 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n"); 1167 if (SkipWarning) 1168 return; 1169 1170 std::string Msg = IPE.message() + std::string(" ") + F.getName().str() + 1171 std::string(" Hash = ") + 1172 std::to_string(FuncInfo.FunctionHash); 1173 1174 Ctx.diagnose( 1175 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); 1176 }); 1177 return false; 1178 } 1179 ProfileRecord = std::move(Result.get()); 1180 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts; 1181 1182 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++; 1183 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n"); 1184 uint64_t ValueSum = 0; 1185 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { 1186 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n"); 1187 ValueSum += CountFromProfile[I]; 1188 } 1189 AllZeros = (ValueSum == 0); 1190 1191 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n"); 1192 1193 getBBInfo(nullptr).UnknownCountOutEdge = 2; 1194 getBBInfo(nullptr).UnknownCountInEdge = 2; 1195 1196 if (!setInstrumentedCounts(CountFromProfile)) { 1197 LLVM_DEBUG( 1198 dbgs() << "Inconsistent number of counts, skipping this function"); 1199 Ctx.diagnose(DiagnosticInfoPGOProfile( 1200 M->getName().data(), 1201 Twine("Inconsistent number of counts in ") + F.getName().str() 1202 + Twine(": the profile may be stale or there is a function name collision."), 1203 DS_Warning)); 1204 return false; 1205 } 1206 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS); 1207 return true; 1208 } 1209 1210 // Populate the counters from instrumented BBs to all BBs. 1211 // In the end of this operation, all BBs should have a valid count value. 1212 void PGOUseFunc::populateCounters() { 1213 bool Changes = true; 1214 unsigned NumPasses = 0; 1215 while (Changes) { 1216 NumPasses++; 1217 Changes = false; 1218 1219 // For efficient traversal, it's better to start from the end as most 1220 // of the instrumented edges are at the end. 1221 for (auto &BB : reverse(F)) { 1222 UseBBInfo *Count = findBBInfo(&BB); 1223 if (Count == nullptr) 1224 continue; 1225 if (!Count->CountValid) { 1226 if (Count->UnknownCountOutEdge == 0) { 1227 Count->CountValue = sumEdgeCount(Count->OutEdges); 1228 Count->CountValid = true; 1229 Changes = true; 1230 } else if (Count->UnknownCountInEdge == 0) { 1231 Count->CountValue = sumEdgeCount(Count->InEdges); 1232 Count->CountValid = true; 1233 Changes = true; 1234 } 1235 } 1236 if (Count->CountValid) { 1237 if (Count->UnknownCountOutEdge == 1) { 1238 uint64_t Total = 0; 1239 uint64_t OutSum = sumEdgeCount(Count->OutEdges); 1240 // If the one of the successor block can early terminate (no-return), 1241 // we can end up with situation where out edge sum count is larger as 1242 // the source BB's count is collected by a post-dominated block. 1243 if (Count->CountValue > OutSum) 1244 Total = Count->CountValue - OutSum; 1245 setEdgeCount(Count->OutEdges, Total); 1246 Changes = true; 1247 } 1248 if (Count->UnknownCountInEdge == 1) { 1249 uint64_t Total = 0; 1250 uint64_t InSum = sumEdgeCount(Count->InEdges); 1251 if (Count->CountValue > InSum) 1252 Total = Count->CountValue - InSum; 1253 setEdgeCount(Count->InEdges, Total); 1254 Changes = true; 1255 } 1256 } 1257 } 1258 } 1259 1260 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n"); 1261 #ifndef NDEBUG 1262 // Assert every BB has a valid counter. 1263 for (auto &BB : F) { 1264 auto BI = findBBInfo(&BB); 1265 if (BI == nullptr) 1266 continue; 1267 assert(BI->CountValid && "BB count is not valid"); 1268 } 1269 #endif 1270 uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue; 1271 F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real)); 1272 uint64_t FuncMaxCount = FuncEntryCount; 1273 for (auto &BB : F) { 1274 auto BI = findBBInfo(&BB); 1275 if (BI == nullptr) 1276 continue; 1277 FuncMaxCount = std::max(FuncMaxCount, BI->CountValue); 1278 } 1279 markFunctionAttributes(FuncEntryCount, FuncMaxCount); 1280 1281 // Now annotate select instructions 1282 FuncInfo.SIVisitor.annotateSelects(F, this, &CountPosition); 1283 assert(CountPosition == ProfileCountSize); 1284 1285 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile.")); 1286 } 1287 1288 // Assign the scaled count values to the BB with multiple out edges. 1289 void PGOUseFunc::setBranchWeights() { 1290 // Generate MD_prof metadata for every branch instruction. 1291 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName() 1292 << " IsCS=" << IsCS << "\n"); 1293 for (auto &BB : F) { 1294 Instruction *TI = BB.getTerminator(); 1295 if (TI->getNumSuccessors() < 2) 1296 continue; 1297 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || 1298 isa<IndirectBrInst>(TI))) 1299 continue; 1300 1301 if (getBBInfo(&BB).CountValue == 0) 1302 continue; 1303 1304 // We have a non-zero Branch BB. 1305 const UseBBInfo &BBCountInfo = getBBInfo(&BB); 1306 unsigned Size = BBCountInfo.OutEdges.size(); 1307 SmallVector<uint64_t, 2> EdgeCounts(Size, 0); 1308 uint64_t MaxCount = 0; 1309 for (unsigned s = 0; s < Size; s++) { 1310 const PGOUseEdge *E = BBCountInfo.OutEdges[s]; 1311 const BasicBlock *SrcBB = E->SrcBB; 1312 const BasicBlock *DestBB = E->DestBB; 1313 if (DestBB == nullptr) 1314 continue; 1315 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 1316 uint64_t EdgeCount = E->CountValue; 1317 if (EdgeCount > MaxCount) 1318 MaxCount = EdgeCount; 1319 EdgeCounts[SuccNum] = EdgeCount; 1320 } 1321 setProfMetadata(M, TI, EdgeCounts, MaxCount); 1322 } 1323 } 1324 1325 static bool isIndirectBrTarget(BasicBlock *BB) { 1326 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { 1327 if (isa<IndirectBrInst>((*PI)->getTerminator())) 1328 return true; 1329 } 1330 return false; 1331 } 1332 1333 void PGOUseFunc::annotateIrrLoopHeaderWeights() { 1334 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n"); 1335 // Find irr loop headers 1336 for (auto &BB : F) { 1337 // As a heuristic also annotate indrectbr targets as they have a high chance 1338 // to become an irreducible loop header after the indirectbr tail 1339 // duplication. 1340 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) { 1341 Instruction *TI = BB.getTerminator(); 1342 const UseBBInfo &BBCountInfo = getBBInfo(&BB); 1343 setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue); 1344 } 1345 } 1346 } 1347 1348 void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) { 1349 Module *M = F.getParent(); 1350 IRBuilder<> Builder(&SI); 1351 Type *Int64Ty = Builder.getInt64Ty(); 1352 Type *I8PtrTy = Builder.getInt8PtrTy(); 1353 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty); 1354 Builder.CreateCall( 1355 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step), 1356 {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), 1357 Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs), 1358 Builder.getInt32(*CurCtrIdx), Step}); 1359 ++(*CurCtrIdx); 1360 } 1361 1362 void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) { 1363 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts; 1364 assert(*CurCtrIdx < CountFromProfile.size() && 1365 "Out of bound access of counters"); 1366 uint64_t SCounts[2]; 1367 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count 1368 ++(*CurCtrIdx); 1369 uint64_t TotalCount = 0; 1370 auto BI = UseFunc->findBBInfo(SI.getParent()); 1371 if (BI != nullptr) 1372 TotalCount = BI->CountValue; 1373 // False Count 1374 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0); 1375 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]); 1376 if (MaxCount) 1377 setProfMetadata(F.getParent(), &SI, SCounts, MaxCount); 1378 } 1379 1380 void SelectInstVisitor::visitSelectInst(SelectInst &SI) { 1381 if (!PGOInstrSelect) 1382 return; 1383 // FIXME: do not handle this yet. 1384 if (SI.getCondition()->getType()->isVectorTy()) 1385 return; 1386 1387 switch (Mode) { 1388 case VM_counting: 1389 NSIs++; 1390 return; 1391 case VM_instrument: 1392 instrumentOneSelectInst(SI); 1393 return; 1394 case VM_annotate: 1395 annotateOneSelectInst(SI); 1396 return; 1397 } 1398 1399 llvm_unreachable("Unknown visiting mode"); 1400 } 1401 1402 // Traverse all valuesites and annotate the instructions for all value kind. 1403 void PGOUseFunc::annotateValueSites() { 1404 if (DisableValueProfiling) 1405 return; 1406 1407 // Create the PGOFuncName meta data. 1408 createPGOFuncNameMetadata(F, FuncInfo.FuncName); 1409 1410 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 1411 annotateValueSites(Kind); 1412 } 1413 1414 // Annotate the instructions for a specific value kind. 1415 void PGOUseFunc::annotateValueSites(uint32_t Kind) { 1416 assert(Kind <= IPVK_Last); 1417 unsigned ValueSiteIndex = 0; 1418 auto &ValueSites = FuncInfo.ValueSites[Kind]; 1419 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind); 1420 if (NumValueSites != ValueSites.size()) { 1421 auto &Ctx = M->getContext(); 1422 Ctx.diagnose(DiagnosticInfoPGOProfile( 1423 M->getName().data(), 1424 Twine("Inconsistent number of value sites for ") + 1425 Twine(ValueProfKindDescr[Kind]) + 1426 Twine(" profiling in \"") + F.getName().str() + 1427 Twine("\", possibly due to the use of a stale profile."), 1428 DS_Warning)); 1429 return; 1430 } 1431 1432 for (VPCandidateInfo &I : ValueSites) { 1433 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind 1434 << "): Index = " << ValueSiteIndex << " out of " 1435 << NumValueSites << "\n"); 1436 annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord, 1437 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex, 1438 Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations 1439 : MaxNumAnnotations); 1440 ValueSiteIndex++; 1441 } 1442 } 1443 1444 // Collect the set of members for each Comdat in module M and store 1445 // in ComdatMembers. 1446 static void collectComdatMembers( 1447 Module &M, 1448 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 1449 if (!DoComdatRenaming) 1450 return; 1451 for (Function &F : M) 1452 if (Comdat *C = F.getComdat()) 1453 ComdatMembers.insert(std::make_pair(C, &F)); 1454 for (GlobalVariable &GV : M.globals()) 1455 if (Comdat *C = GV.getComdat()) 1456 ComdatMembers.insert(std::make_pair(C, &GV)); 1457 for (GlobalAlias &GA : M.aliases()) 1458 if (Comdat *C = GA.getComdat()) 1459 ComdatMembers.insert(std::make_pair(C, &GA)); 1460 } 1461 1462 static bool InstrumentAllFunctions( 1463 Module &M, function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1464 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) { 1465 // For the context-sensitve instrumentation, we should have a separated pass 1466 // (before LTO/ThinLTO linking) to create these variables. 1467 if (!IsCS) 1468 createIRLevelProfileFlagVar(M, /* IsCS */ false); 1469 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1470 collectComdatMembers(M, ComdatMembers); 1471 1472 for (auto &F : M) { 1473 if (F.isDeclaration()) 1474 continue; 1475 auto *BPI = LookupBPI(F); 1476 auto *BFI = LookupBFI(F); 1477 instrumentOneFunc(F, &M, BPI, BFI, ComdatMembers, IsCS); 1478 } 1479 return true; 1480 } 1481 1482 PreservedAnalyses 1483 PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) { 1484 createProfileFileNameVar(M, CSInstrName); 1485 createIRLevelProfileFlagVar(M, /* IsCS */ true); 1486 return PreservedAnalyses::all(); 1487 } 1488 1489 bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) { 1490 if (skipModule(M)) 1491 return false; 1492 1493 auto LookupBPI = [this](Function &F) { 1494 return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); 1495 }; 1496 auto LookupBFI = [this](Function &F) { 1497 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); 1498 }; 1499 return InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS); 1500 } 1501 1502 PreservedAnalyses PGOInstrumentationGen::run(Module &M, 1503 ModuleAnalysisManager &AM) { 1504 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1505 auto LookupBPI = [&FAM](Function &F) { 1506 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1507 }; 1508 1509 auto LookupBFI = [&FAM](Function &F) { 1510 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1511 }; 1512 1513 if (!InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS)) 1514 return PreservedAnalyses::all(); 1515 1516 return PreservedAnalyses::none(); 1517 } 1518 1519 static bool annotateAllFunctions( 1520 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, 1521 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1522 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, 1523 ProfileSummaryInfo *PSI, bool IsCS) { 1524 LLVM_DEBUG(dbgs() << "Read in profile counters: "); 1525 auto &Ctx = M.getContext(); 1526 // Read the counter array from file. 1527 auto ReaderOrErr = 1528 IndexedInstrProfReader::create(ProfileFileName, ProfileRemappingFileName); 1529 if (Error E = ReaderOrErr.takeError()) { 1530 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { 1531 Ctx.diagnose( 1532 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message())); 1533 }); 1534 return false; 1535 } 1536 1537 std::unique_ptr<IndexedInstrProfReader> PGOReader = 1538 std::move(ReaderOrErr.get()); 1539 if (!PGOReader) { 1540 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(), 1541 StringRef("Cannot get PGOReader"))); 1542 return false; 1543 } 1544 if (!PGOReader->hasCSIRLevelProfile() && IsCS) 1545 return false; 1546 1547 // TODO: might need to change the warning once the clang option is finalized. 1548 if (!PGOReader->isIRLevelProfile()) { 1549 Ctx.diagnose(DiagnosticInfoPGOProfile( 1550 ProfileFileName.data(), "Not an IR level instrumentation profile")); 1551 return false; 1552 } 1553 1554 // Add the profile summary (read from the header of the indexed summary) here 1555 // so that we can use it below when reading counters (which checks if the 1556 // function should be marked with a cold or inlinehint attribute). 1557 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()), 1558 IsCS ? ProfileSummary::PSK_CSInstr 1559 : ProfileSummary::PSK_Instr); 1560 1561 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1562 collectComdatMembers(M, ComdatMembers); 1563 std::vector<Function *> HotFunctions; 1564 std::vector<Function *> ColdFunctions; 1565 for (auto &F : M) { 1566 if (F.isDeclaration()) 1567 continue; 1568 auto *BPI = LookupBPI(F); 1569 auto *BFI = LookupBFI(F); 1570 // Split indirectbr critical edges here before computing the MST rather than 1571 // later in getInstrBB() to avoid invalidating it. 1572 SplitIndirectBrCriticalEdges(F, BPI, BFI); 1573 PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI, PSI, IsCS); 1574 bool AllZeros = false; 1575 if (!Func.readCounters(PGOReader.get(), AllZeros)) 1576 continue; 1577 if (AllZeros) { 1578 F.setEntryCount(ProfileCount(0, Function::PCT_Real)); 1579 if (Func.getProgramMaxCount() != 0) 1580 ColdFunctions.push_back(&F); 1581 continue; 1582 } 1583 Func.populateCounters(); 1584 Func.setBranchWeights(); 1585 Func.annotateValueSites(); 1586 Func.annotateIrrLoopHeaderWeights(); 1587 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr(); 1588 if (FreqAttr == PGOUseFunc::FFA_Cold) 1589 ColdFunctions.push_back(&F); 1590 else if (FreqAttr == PGOUseFunc::FFA_Hot) 1591 HotFunctions.push_back(&F); 1592 if (PGOViewCounts != PGOVCT_None && 1593 (ViewBlockFreqFuncName.empty() || 1594 F.getName().equals(ViewBlockFreqFuncName))) { 1595 LoopInfo LI{DominatorTree(F)}; 1596 std::unique_ptr<BranchProbabilityInfo> NewBPI = 1597 std::make_unique<BranchProbabilityInfo>(F, LI); 1598 std::unique_ptr<BlockFrequencyInfo> NewBFI = 1599 std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI); 1600 if (PGOViewCounts == PGOVCT_Graph) 1601 NewBFI->view(); 1602 else if (PGOViewCounts == PGOVCT_Text) { 1603 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n"; 1604 NewBFI->print(dbgs()); 1605 } 1606 } 1607 if (PGOViewRawCounts != PGOVCT_None && 1608 (ViewBlockFreqFuncName.empty() || 1609 F.getName().equals(ViewBlockFreqFuncName))) { 1610 if (PGOViewRawCounts == PGOVCT_Graph) 1611 if (ViewBlockFreqFuncName.empty()) 1612 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 1613 else 1614 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 1615 else if (PGOViewRawCounts == PGOVCT_Text) { 1616 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n"; 1617 Func.dumpInfo(); 1618 } 1619 } 1620 } 1621 1622 // Set function hotness attribute from the profile. 1623 // We have to apply these attributes at the end because their presence 1624 // can affect the BranchProbabilityInfo of any callers, resulting in an 1625 // inconsistent MST between prof-gen and prof-use. 1626 for (auto &F : HotFunctions) { 1627 F->addFnAttr(Attribute::InlineHint); 1628 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName() 1629 << "\n"); 1630 } 1631 for (auto &F : ColdFunctions) { 1632 F->addFnAttr(Attribute::Cold); 1633 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() 1634 << "\n"); 1635 } 1636 return true; 1637 } 1638 1639 PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename, 1640 std::string RemappingFilename, 1641 bool IsCS) 1642 : ProfileFileName(std::move(Filename)), 1643 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) { 1644 if (!PGOTestProfileFile.empty()) 1645 ProfileFileName = PGOTestProfileFile; 1646 if (!PGOTestProfileRemappingFile.empty()) 1647 ProfileRemappingFileName = PGOTestProfileRemappingFile; 1648 } 1649 1650 PreservedAnalyses PGOInstrumentationUse::run(Module &M, 1651 ModuleAnalysisManager &AM) { 1652 1653 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1654 auto LookupBPI = [&FAM](Function &F) { 1655 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1656 }; 1657 1658 auto LookupBFI = [&FAM](Function &F) { 1659 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1660 }; 1661 1662 auto *PSI = &AM.getResult<ProfileSummaryAnalysis>(M); 1663 1664 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, 1665 LookupBPI, LookupBFI, PSI, IsCS)) 1666 return PreservedAnalyses::all(); 1667 1668 return PreservedAnalyses::none(); 1669 } 1670 1671 bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) { 1672 if (skipModule(M)) 1673 return false; 1674 1675 auto LookupBPI = [this](Function &F) { 1676 return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); 1677 }; 1678 auto LookupBFI = [this](Function &F) { 1679 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); 1680 }; 1681 1682 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 1683 return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI, PSI, 1684 IsCS); 1685 } 1686 1687 static std::string getSimpleNodeName(const BasicBlock *Node) { 1688 if (!Node->getName().empty()) 1689 return Node->getName(); 1690 1691 std::string SimpleNodeName; 1692 raw_string_ostream OS(SimpleNodeName); 1693 Node->printAsOperand(OS, false); 1694 return OS.str(); 1695 } 1696 1697 void llvm::setProfMetadata(Module *M, Instruction *TI, 1698 ArrayRef<uint64_t> EdgeCounts, 1699 uint64_t MaxCount) { 1700 MDBuilder MDB(M->getContext()); 1701 assert(MaxCount > 0 && "Bad max count"); 1702 uint64_t Scale = calculateCountScale(MaxCount); 1703 SmallVector<unsigned, 4> Weights; 1704 for (const auto &ECI : EdgeCounts) 1705 Weights.push_back(scaleBranchCount(ECI, Scale)); 1706 1707 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W 1708 : Weights) { 1709 dbgs() << W << " "; 1710 } dbgs() << "\n";); 1711 1712 misexpect::verifyMisExpect(TI, Weights, TI->getContext()); 1713 1714 TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 1715 if (EmitBranchProbability) { 1716 std::string BrCondStr = getBranchCondString(TI); 1717 if (BrCondStr.empty()) 1718 return; 1719 1720 uint64_t WSum = 1721 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0, 1722 [](uint64_t w1, uint64_t w2) { return w1 + w2; }); 1723 uint64_t TotalCount = 1724 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0, 1725 [](uint64_t c1, uint64_t c2) { return c1 + c2; }); 1726 Scale = calculateCountScale(WSum); 1727 BranchProbability BP(scaleBranchCount(Weights[0], Scale), 1728 scaleBranchCount(WSum, Scale)); 1729 std::string BranchProbStr; 1730 raw_string_ostream OS(BranchProbStr); 1731 OS << BP; 1732 OS << " (total count : " << TotalCount << ")"; 1733 OS.flush(); 1734 Function *F = TI->getParent()->getParent(); 1735 OptimizationRemarkEmitter ORE(F); 1736 ORE.emit([&]() { 1737 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI) 1738 << BrCondStr << " is true with probability : " << BranchProbStr; 1739 }); 1740 } 1741 } 1742 1743 namespace llvm { 1744 1745 void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) { 1746 MDBuilder MDB(M->getContext()); 1747 TI->setMetadata(llvm::LLVMContext::MD_irr_loop, 1748 MDB.createIrrLoopHeaderWeight(Count)); 1749 } 1750 1751 template <> struct GraphTraits<PGOUseFunc *> { 1752 using NodeRef = const BasicBlock *; 1753 using ChildIteratorType = succ_const_iterator; 1754 using nodes_iterator = pointer_iterator<Function::const_iterator>; 1755 1756 static NodeRef getEntryNode(const PGOUseFunc *G) { 1757 return &G->getFunc().front(); 1758 } 1759 1760 static ChildIteratorType child_begin(const NodeRef N) { 1761 return succ_begin(N); 1762 } 1763 1764 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); } 1765 1766 static nodes_iterator nodes_begin(const PGOUseFunc *G) { 1767 return nodes_iterator(G->getFunc().begin()); 1768 } 1769 1770 static nodes_iterator nodes_end(const PGOUseFunc *G) { 1771 return nodes_iterator(G->getFunc().end()); 1772 } 1773 }; 1774 1775 template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits { 1776 explicit DOTGraphTraits(bool isSimple = false) 1777 : DefaultDOTGraphTraits(isSimple) {} 1778 1779 static std::string getGraphName(const PGOUseFunc *G) { 1780 return G->getFunc().getName(); 1781 } 1782 1783 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) { 1784 std::string Result; 1785 raw_string_ostream OS(Result); 1786 1787 OS << getSimpleNodeName(Node) << ":\\l"; 1788 UseBBInfo *BI = Graph->findBBInfo(Node); 1789 OS << "Count : "; 1790 if (BI && BI->CountValid) 1791 OS << BI->CountValue << "\\l"; 1792 else 1793 OS << "Unknown\\l"; 1794 1795 if (!PGOInstrSelect) 1796 return Result; 1797 1798 for (auto BI = Node->begin(); BI != Node->end(); ++BI) { 1799 auto *I = &*BI; 1800 if (!isa<SelectInst>(I)) 1801 continue; 1802 // Display scaled counts for SELECT instruction: 1803 OS << "SELECT : { T = "; 1804 uint64_t TC, FC; 1805 bool HasProf = I->extractProfMetadata(TC, FC); 1806 if (!HasProf) 1807 OS << "Unknown, F = Unknown }\\l"; 1808 else 1809 OS << TC << ", F = " << FC << " }\\l"; 1810 } 1811 return Result; 1812 } 1813 }; 1814 1815 } // end namespace llvm 1816