1 //===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements PGO instrumentation using a minimum spanning tree based 10 // on the following paper: 11 // [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points 12 // for program frequency counts. BIT Numerical Mathematics 1973, Volume 13, 13 // Issue 3, pp 313-322 14 // The idea of the algorithm based on the fact that for each node (except for 15 // the entry and exit), the sum of incoming edge counts equals the sum of 16 // outgoing edge counts. The count of edge on spanning tree can be derived from 17 // those edges not on the spanning tree. Knuth proves this method instruments 18 // the minimum number of edges. 19 // 20 // The minimal spanning tree here is actually a maximum weight tree -- on-tree 21 // edges have higher frequencies (more likely to execute). The idea is to 22 // instrument those less frequently executed edges to reduce the runtime 23 // overhead of instrumented binaries. 24 // 25 // This file contains two passes: 26 // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge 27 // count profile, and generates the instrumentation for indirect call 28 // profiling. 29 // (2) Pass PGOInstrumentationUse which reads the edge count profile and 30 // annotates the branch weights. It also reads the indirect call value 31 // profiling records and annotate the indirect call instructions. 32 // 33 // To get the precise counter information, These two passes need to invoke at 34 // the same compilation point (so they see the same IR). For pass 35 // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For 36 // pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and 37 // the profile is opened in module level and passed to each PGOUseFunc instance. 38 // The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put 39 // in class FuncPGOInstrumentation. 40 // 41 // Class PGOEdge represents a CFG edge and some auxiliary information. Class 42 // BBInfo contains auxiliary information for each BB. These two classes are used 43 // in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived 44 // class of PGOEdge and BBInfo, respectively. They contains extra data structure 45 // used in populating profile counters. 46 // The MST implementation is in Class CFGMST (CFGMST.h). 47 // 48 //===----------------------------------------------------------------------===// 49 50 #include "CFGMST.h" 51 #include "ValueProfileCollector.h" 52 #include "llvm/ADT/APInt.h" 53 #include "llvm/ADT/ArrayRef.h" 54 #include "llvm/ADT/STLExtras.h" 55 #include "llvm/ADT/SmallVector.h" 56 #include "llvm/ADT/Statistic.h" 57 #include "llvm/ADT/StringRef.h" 58 #include "llvm/ADT/Triple.h" 59 #include "llvm/ADT/Twine.h" 60 #include "llvm/ADT/iterator.h" 61 #include "llvm/ADT/iterator_range.h" 62 #include "llvm/Analysis/BlockFrequencyInfo.h" 63 #include "llvm/Analysis/BranchProbabilityInfo.h" 64 #include "llvm/Analysis/CFG.h" 65 #include "llvm/Analysis/LoopInfo.h" 66 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 67 #include "llvm/Analysis/ProfileSummaryInfo.h" 68 #include "llvm/IR/Attributes.h" 69 #include "llvm/IR/BasicBlock.h" 70 #include "llvm/IR/CFG.h" 71 #include "llvm/IR/CallSite.h" 72 #include "llvm/IR/Comdat.h" 73 #include "llvm/IR/Constant.h" 74 #include "llvm/IR/Constants.h" 75 #include "llvm/IR/DiagnosticInfo.h" 76 #include "llvm/IR/Dominators.h" 77 #include "llvm/IR/Function.h" 78 #include "llvm/IR/GlobalAlias.h" 79 #include "llvm/IR/GlobalValue.h" 80 #include "llvm/IR/GlobalVariable.h" 81 #include "llvm/IR/IRBuilder.h" 82 #include "llvm/IR/InstVisitor.h" 83 #include "llvm/IR/InstrTypes.h" 84 #include "llvm/IR/Instruction.h" 85 #include "llvm/IR/Instructions.h" 86 #include "llvm/IR/IntrinsicInst.h" 87 #include "llvm/IR/Intrinsics.h" 88 #include "llvm/IR/LLVMContext.h" 89 #include "llvm/IR/MDBuilder.h" 90 #include "llvm/IR/Module.h" 91 #include "llvm/IR/PassManager.h" 92 #include "llvm/IR/ProfileSummary.h" 93 #include "llvm/IR/Type.h" 94 #include "llvm/IR/Value.h" 95 #include "llvm/Pass.h" 96 #include "llvm/ProfileData/InstrProf.h" 97 #include "llvm/ProfileData/InstrProfReader.h" 98 #include "llvm/Support/BranchProbability.h" 99 #include "llvm/Support/CRC.h" 100 #include "llvm/Support/Casting.h" 101 #include "llvm/Support/CommandLine.h" 102 #include "llvm/Support/DOTGraphTraits.h" 103 #include "llvm/Support/Debug.h" 104 #include "llvm/Support/Error.h" 105 #include "llvm/Support/ErrorHandling.h" 106 #include "llvm/Support/GraphWriter.h" 107 #include "llvm/Support/raw_ostream.h" 108 #include "llvm/Transforms/Instrumentation.h" 109 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" 110 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 111 #include "llvm/Transforms/Utils/MisExpect.h" 112 #include <algorithm> 113 #include <cassert> 114 #include <cstdint> 115 #include <memory> 116 #include <numeric> 117 #include <string> 118 #include <unordered_map> 119 #include <utility> 120 #include <vector> 121 122 using namespace llvm; 123 using ProfileCount = Function::ProfileCount; 124 using VPCandidateInfo = ValueProfileCollector::CandidateInfo; 125 126 #define DEBUG_TYPE "pgo-instrumentation" 127 128 STATISTIC(NumOfPGOInstrument, "Number of edges instrumented."); 129 STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented."); 130 STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented."); 131 STATISTIC(NumOfPGOEdge, "Number of edges."); 132 STATISTIC(NumOfPGOBB, "Number of basic-blocks."); 133 STATISTIC(NumOfPGOSplit, "Number of critical edge splits."); 134 STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts."); 135 STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile."); 136 STATISTIC(NumOfPGOMissing, "Number of functions without profile."); 137 STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations."); 138 STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO."); 139 STATISTIC(NumOfCSPGOSelectInsts, 140 "Number of select instruction instrumented in CSPGO."); 141 STATISTIC(NumOfCSPGOMemIntrinsics, 142 "Number of mem intrinsics instrumented in CSPGO."); 143 STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO."); 144 STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO."); 145 STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO."); 146 STATISTIC(NumOfCSPGOFunc, 147 "Number of functions having valid profile counts in CSPGO."); 148 STATISTIC(NumOfCSPGOMismatch, 149 "Number of functions having mismatch profile in CSPGO."); 150 STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO."); 151 152 // Command line option to specify the file to read profile from. This is 153 // mainly used for testing. 154 static cl::opt<std::string> 155 PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, 156 cl::value_desc("filename"), 157 cl::desc("Specify the path of profile data file. This is" 158 "mainly for test purpose.")); 159 static cl::opt<std::string> PGOTestProfileRemappingFile( 160 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, 161 cl::value_desc("filename"), 162 cl::desc("Specify the path of profile remapping file. This is mainly for " 163 "test purpose.")); 164 165 // Command line option to disable value profiling. The default is false: 166 // i.e. value profiling is enabled by default. This is for debug purpose. 167 static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false), 168 cl::Hidden, 169 cl::desc("Disable Value Profiling")); 170 171 // Command line option to set the maximum number of VP annotations to write to 172 // the metadata for a single indirect call callsite. 173 static cl::opt<unsigned> MaxNumAnnotations( 174 "icp-max-annotations", cl::init(3), cl::Hidden, cl::ZeroOrMore, 175 cl::desc("Max number of annotations for a single indirect " 176 "call callsite")); 177 178 // Command line option to set the maximum number of value annotations 179 // to write to the metadata for a single memop intrinsic. 180 static cl::opt<unsigned> MaxNumMemOPAnnotations( 181 "memop-max-annotations", cl::init(4), cl::Hidden, cl::ZeroOrMore, 182 cl::desc("Max number of preicise value annotations for a single memop" 183 "intrinsic")); 184 185 // Command line option to control appending FunctionHash to the name of a COMDAT 186 // function. This is to avoid the hash mismatch caused by the preinliner. 187 static cl::opt<bool> DoComdatRenaming( 188 "do-comdat-renaming", cl::init(false), cl::Hidden, 189 cl::desc("Append function hash to the name of COMDAT function to avoid " 190 "function hash mismatch due to the preinliner")); 191 192 // Command line option to enable/disable the warning about missing profile 193 // information. 194 static cl::opt<bool> 195 PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden, 196 cl::desc("Use this option to turn on/off " 197 "warnings about missing profile data for " 198 "functions.")); 199 200 // Command line option to enable/disable the warning about a hash mismatch in 201 // the profile data. 202 static cl::opt<bool> 203 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden, 204 cl::desc("Use this option to turn off/on " 205 "warnings about profile cfg mismatch.")); 206 207 // Command line option to enable/disable the warning about a hash mismatch in 208 // the profile data for Comdat functions, which often turns out to be false 209 // positive due to the pre-instrumentation inline. 210 static cl::opt<bool> 211 NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", cl::init(true), 212 cl::Hidden, 213 cl::desc("The option is used to turn on/off " 214 "warnings about hash mismatch for comdat " 215 "functions.")); 216 217 // Command line option to enable/disable select instruction instrumentation. 218 static cl::opt<bool> 219 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, 220 cl::desc("Use this option to turn on/off SELECT " 221 "instruction instrumentation. ")); 222 223 // Command line option to turn on CFG dot or text dump of raw profile counts 224 static cl::opt<PGOViewCountsType> PGOViewRawCounts( 225 "pgo-view-raw-counts", cl::Hidden, 226 cl::desc("A boolean option to show CFG dag or text " 227 "with raw profile counts from " 228 "profile data. See also option " 229 "-pgo-view-counts. To limit graph " 230 "display to only one function, use " 231 "filtering option -view-bfi-func-name."), 232 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), 233 clEnumValN(PGOVCT_Graph, "graph", "show a graph."), 234 clEnumValN(PGOVCT_Text, "text", "show in text."))); 235 236 // Command line option to enable/disable memop intrinsic call.size profiling. 237 static cl::opt<bool> 238 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, 239 cl::desc("Use this option to turn on/off " 240 "memory intrinsic size profiling.")); 241 242 // Emit branch probability as optimization remarks. 243 static cl::opt<bool> 244 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, 245 cl::desc("When this option is on, the annotated " 246 "branch probability will be emitted as " 247 "optimization remarks: -{Rpass|" 248 "pass-remarks}=pgo-instrumentation")); 249 250 // Command line option to turn on CFG dot dump after profile annotation. 251 // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts 252 extern cl::opt<PGOViewCountsType> PGOViewCounts; 253 254 // Command line option to specify the name of the function for CFG dump 255 // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= 256 extern cl::opt<std::string> ViewBlockFreqFuncName; 257 258 // Return a string describing the branch condition that can be 259 // used in static branch probability heuristics: 260 static std::string getBranchCondString(Instruction *TI) { 261 BranchInst *BI = dyn_cast<BranchInst>(TI); 262 if (!BI || !BI->isConditional()) 263 return std::string(); 264 265 Value *Cond = BI->getCondition(); 266 ICmpInst *CI = dyn_cast<ICmpInst>(Cond); 267 if (!CI) 268 return std::string(); 269 270 std::string result; 271 raw_string_ostream OS(result); 272 OS << CmpInst::getPredicateName(CI->getPredicate()) << "_"; 273 CI->getOperand(0)->getType()->print(OS, true); 274 275 Value *RHS = CI->getOperand(1); 276 ConstantInt *CV = dyn_cast<ConstantInt>(RHS); 277 if (CV) { 278 if (CV->isZero()) 279 OS << "_Zero"; 280 else if (CV->isOne()) 281 OS << "_One"; 282 else if (CV->isMinusOne()) 283 OS << "_MinusOne"; 284 else 285 OS << "_Const"; 286 } 287 OS.flush(); 288 return result; 289 } 290 291 static const char *ValueProfKindDescr[] = { 292 #define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr, 293 #include "llvm/ProfileData/InstrProfData.inc" 294 }; 295 296 namespace { 297 298 /// The select instruction visitor plays three roles specified 299 /// by the mode. In \c VM_counting mode, it simply counts the number of 300 /// select instructions. In \c VM_instrument mode, it inserts code to count 301 /// the number times TrueValue of select is taken. In \c VM_annotate mode, 302 /// it reads the profile data and annotate the select instruction with metadata. 303 enum VisitMode { VM_counting, VM_instrument, VM_annotate }; 304 class PGOUseFunc; 305 306 /// Instruction Visitor class to visit select instructions. 307 struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> { 308 Function &F; 309 unsigned NSIs = 0; // Number of select instructions instrumented. 310 VisitMode Mode = VM_counting; // Visiting mode. 311 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index. 312 unsigned TotalNumCtrs = 0; // Total number of counters 313 GlobalVariable *FuncNameVar = nullptr; 314 uint64_t FuncHash = 0; 315 PGOUseFunc *UseFunc = nullptr; 316 317 SelectInstVisitor(Function &Func) : F(Func) {} 318 319 void countSelects(Function &Func) { 320 NSIs = 0; 321 Mode = VM_counting; 322 visit(Func); 323 } 324 325 // Visit the IR stream and instrument all select instructions. \p 326 // Ind is a pointer to the counter index variable; \p TotalNC 327 // is the total number of counters; \p FNV is the pointer to the 328 // PGO function name var; \p FHash is the function hash. 329 void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC, 330 GlobalVariable *FNV, uint64_t FHash) { 331 Mode = VM_instrument; 332 CurCtrIdx = Ind; 333 TotalNumCtrs = TotalNC; 334 FuncHash = FHash; 335 FuncNameVar = FNV; 336 visit(Func); 337 } 338 339 // Visit the IR stream and annotate all select instructions. 340 void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) { 341 Mode = VM_annotate; 342 UseFunc = UF; 343 CurCtrIdx = Ind; 344 visit(Func); 345 } 346 347 void instrumentOneSelectInst(SelectInst &SI); 348 void annotateOneSelectInst(SelectInst &SI); 349 350 // Visit \p SI instruction and perform tasks according to visit mode. 351 void visitSelectInst(SelectInst &SI); 352 353 // Return the number of select instructions. This needs be called after 354 // countSelects(). 355 unsigned getNumOfSelectInsts() const { return NSIs; } 356 }; 357 358 359 class PGOInstrumentationGenLegacyPass : public ModulePass { 360 public: 361 static char ID; 362 363 PGOInstrumentationGenLegacyPass(bool IsCS = false) 364 : ModulePass(ID), IsCS(IsCS) { 365 initializePGOInstrumentationGenLegacyPassPass( 366 *PassRegistry::getPassRegistry()); 367 } 368 369 StringRef getPassName() const override { return "PGOInstrumentationGenPass"; } 370 371 private: 372 // Is this is context-sensitive instrumentation. 373 bool IsCS; 374 bool runOnModule(Module &M) override; 375 376 void getAnalysisUsage(AnalysisUsage &AU) const override { 377 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 378 } 379 }; 380 381 class PGOInstrumentationUseLegacyPass : public ModulePass { 382 public: 383 static char ID; 384 385 // Provide the profile filename as the parameter. 386 PGOInstrumentationUseLegacyPass(std::string Filename = "", bool IsCS = false) 387 : ModulePass(ID), ProfileFileName(std::move(Filename)), IsCS(IsCS) { 388 if (!PGOTestProfileFile.empty()) 389 ProfileFileName = PGOTestProfileFile; 390 initializePGOInstrumentationUseLegacyPassPass( 391 *PassRegistry::getPassRegistry()); 392 } 393 394 StringRef getPassName() const override { return "PGOInstrumentationUsePass"; } 395 396 private: 397 std::string ProfileFileName; 398 // Is this is context-sensitive instrumentation use. 399 bool IsCS; 400 401 bool runOnModule(Module &M) override; 402 403 void getAnalysisUsage(AnalysisUsage &AU) const override { 404 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 405 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 406 } 407 }; 408 409 class PGOInstrumentationGenCreateVarLegacyPass : public ModulePass { 410 public: 411 static char ID; 412 StringRef getPassName() const override { 413 return "PGOInstrumentationGenCreateVarPass"; 414 } 415 PGOInstrumentationGenCreateVarLegacyPass(std::string CSInstrName = "") 416 : ModulePass(ID), InstrProfileOutput(CSInstrName) { 417 initializePGOInstrumentationGenCreateVarLegacyPassPass( 418 *PassRegistry::getPassRegistry()); 419 } 420 421 private: 422 bool runOnModule(Module &M) override { 423 createProfileFileNameVar(M, InstrProfileOutput); 424 createIRLevelProfileFlagVar(M, true); 425 return false; 426 } 427 std::string InstrProfileOutput; 428 }; 429 430 } // end anonymous namespace 431 432 char PGOInstrumentationGenLegacyPass::ID = 0; 433 434 INITIALIZE_PASS_BEGIN(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", 435 "PGO instrumentation.", false, false) 436 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 437 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 438 INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", 439 "PGO instrumentation.", false, false) 440 441 ModulePass *llvm::createPGOInstrumentationGenLegacyPass(bool IsCS) { 442 return new PGOInstrumentationGenLegacyPass(IsCS); 443 } 444 445 char PGOInstrumentationUseLegacyPass::ID = 0; 446 447 INITIALIZE_PASS_BEGIN(PGOInstrumentationUseLegacyPass, "pgo-instr-use", 448 "Read PGO instrumentation profile.", false, false) 449 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 450 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 451 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 452 INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use", 453 "Read PGO instrumentation profile.", false, false) 454 455 ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename, 456 bool IsCS) { 457 return new PGOInstrumentationUseLegacyPass(Filename.str(), IsCS); 458 } 459 460 char PGOInstrumentationGenCreateVarLegacyPass::ID = 0; 461 462 INITIALIZE_PASS(PGOInstrumentationGenCreateVarLegacyPass, 463 "pgo-instr-gen-create-var", 464 "Create PGO instrumentation version variable for CSPGO.", false, 465 false) 466 467 ModulePass * 468 llvm::createPGOInstrumentationGenCreateVarLegacyPass(StringRef CSInstrName) { 469 return new PGOInstrumentationGenCreateVarLegacyPass(CSInstrName); 470 } 471 472 namespace { 473 474 /// An MST based instrumentation for PGO 475 /// 476 /// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO 477 /// in the function level. 478 struct PGOEdge { 479 // This class implements the CFG edges. Note the CFG can be a multi-graph. 480 // So there might be multiple edges with same SrcBB and DestBB. 481 const BasicBlock *SrcBB; 482 const BasicBlock *DestBB; 483 uint64_t Weight; 484 bool InMST = false; 485 bool Removed = false; 486 bool IsCritical = false; 487 488 PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) 489 : SrcBB(Src), DestBB(Dest), Weight(W) {} 490 491 // Return the information string of an edge. 492 const std::string infoString() const { 493 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") + 494 (IsCritical ? "c" : " ") + " W=" + Twine(Weight)).str(); 495 } 496 }; 497 498 // This class stores the auxiliary information for each BB. 499 struct BBInfo { 500 BBInfo *Group; 501 uint32_t Index; 502 uint32_t Rank = 0; 503 504 BBInfo(unsigned IX) : Group(this), Index(IX) {} 505 506 // Return the information string of this object. 507 const std::string infoString() const { 508 return (Twine("Index=") + Twine(Index)).str(); 509 } 510 511 // Empty function -- only applicable to UseBBInfo. 512 void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} 513 514 // Empty function -- only applicable to UseBBInfo. 515 void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} 516 }; 517 518 // This class implements the CFG edges. Note the CFG can be a multi-graph. 519 template <class Edge, class BBInfo> class FuncPGOInstrumentation { 520 private: 521 Function &F; 522 523 // Is this is context-sensitive instrumentation. 524 bool IsCS; 525 526 // A map that stores the Comdat group in function F. 527 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers; 528 529 ValueProfileCollector VPC; 530 531 void computeCFGHash(); 532 void renameComdatFunction(); 533 534 public: 535 std::vector<std::vector<VPCandidateInfo>> ValueSites; 536 SelectInstVisitor SIVisitor; 537 std::string FuncName; 538 GlobalVariable *FuncNameVar; 539 540 // CFG hash value for this function. 541 uint64_t FunctionHash = 0; 542 543 // The Minimum Spanning Tree of function CFG. 544 CFGMST<Edge, BBInfo> MST; 545 546 // Collect all the BBs that will be instrumented, and store them in 547 // InstrumentBBs. 548 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs); 549 550 // Give an edge, find the BB that will be instrumented. 551 // Return nullptr if there is no BB to be instrumented. 552 BasicBlock *getInstrBB(Edge *E); 553 554 // Return the auxiliary BB information. 555 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); } 556 557 // Return the auxiliary BB information if available. 558 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); } 559 560 // Dump edges and BB information. 561 void dumpInfo(std::string Str = "") const { 562 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " + 563 Twine(FunctionHash) + "\t" + Str); 564 } 565 566 FuncPGOInstrumentation( 567 Function &Func, 568 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 569 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, 570 BlockFrequencyInfo *BFI = nullptr, bool IsCS = false) 571 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func), 572 ValueSites(IPVK_Last + 1), SIVisitor(Func), MST(F, BPI, BFI) { 573 // This should be done before CFG hash computation. 574 SIVisitor.countSelects(Func); 575 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize); 576 if (!IsCS) { 577 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 578 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 579 NumOfPGOBB += MST.BBInfos.size(); 580 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget); 581 } else { 582 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 583 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 584 NumOfCSPGOBB += MST.BBInfos.size(); 585 } 586 587 FuncName = getPGOFuncName(F); 588 computeCFGHash(); 589 if (!ComdatMembers.empty()) 590 renameComdatFunction(); 591 LLVM_DEBUG(dumpInfo("after CFGMST")); 592 593 for (auto &E : MST.AllEdges) { 594 if (E->Removed) 595 continue; 596 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++; 597 if (!E->InMST) 598 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++; 599 } 600 601 if (CreateGlobalVar) 602 FuncNameVar = createPGOFuncNameVar(F, FuncName); 603 } 604 }; 605 606 } // end anonymous namespace 607 608 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index 609 // value of each BB in the CFG. The higher 32 bits record the number of edges. 610 template <class Edge, class BBInfo> 611 void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { 612 std::vector<uint8_t> Indexes; 613 JamCRC JC; 614 for (auto &BB : F) { 615 const Instruction *TI = BB.getTerminator(); 616 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { 617 BasicBlock *Succ = TI->getSuccessor(I); 618 auto BI = findBBInfo(Succ); 619 if (BI == nullptr) 620 continue; 621 uint32_t Index = BI->Index; 622 for (int J = 0; J < 4; J++) 623 Indexes.push_back((uint8_t)(Index >> (J * 8))); 624 } 625 } 626 JC.update(Indexes); 627 628 // Hash format for context sensitive profile. Reserve 4 bits for other 629 // information. 630 FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 | 631 (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 | 632 //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 | 633 (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC(); 634 // Reserve bit 60-63 for other information purpose. 635 FunctionHash &= 0x0FFFFFFFFFFFFFFF; 636 if (IsCS) 637 NamedInstrProfRecord::setCSFlagInHash(FunctionHash); 638 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n" 639 << " CRC = " << JC.getCRC() 640 << ", Selects = " << SIVisitor.getNumOfSelectInsts() 641 << ", Edges = " << MST.AllEdges.size() << ", ICSites = " 642 << ValueSites[IPVK_IndirectCallTarget].size() 643 << ", Hash = " << FunctionHash << "\n";); 644 } 645 646 // Check if we can safely rename this Comdat function. 647 static bool canRenameComdat( 648 Function &F, 649 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 650 if (!DoComdatRenaming || !canRenameComdatFunc(F, true)) 651 return false; 652 653 // FIXME: Current only handle those Comdat groups that only containing one 654 // function and function aliases. 655 // (1) For a Comdat group containing multiple functions, we need to have a 656 // unique postfix based on the hashes for each function. There is a 657 // non-trivial code refactoring to do this efficiently. 658 // (2) Variables can not be renamed, so we can not rename Comdat function in a 659 // group including global vars. 660 Comdat *C = F.getComdat(); 661 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) { 662 if (dyn_cast<GlobalAlias>(CM.second)) 663 continue; 664 Function *FM = dyn_cast<Function>(CM.second); 665 if (FM != &F) 666 return false; 667 } 668 return true; 669 } 670 671 // Append the CFGHash to the Comdat function name. 672 template <class Edge, class BBInfo> 673 void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() { 674 if (!canRenameComdat(F, ComdatMembers)) 675 return; 676 std::string OrigName = F.getName().str(); 677 std::string NewFuncName = 678 Twine(F.getName() + "." + Twine(FunctionHash)).str(); 679 F.setName(Twine(NewFuncName)); 680 GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F); 681 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str(); 682 Comdat *NewComdat; 683 Module *M = F.getParent(); 684 // For AvailableExternallyLinkage functions, change the linkage to 685 // LinkOnceODR and put them into comdat. This is because after renaming, there 686 // is no backup external copy available for the function. 687 if (!F.hasComdat()) { 688 assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage); 689 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName)); 690 F.setLinkage(GlobalValue::LinkOnceODRLinkage); 691 F.setComdat(NewComdat); 692 return; 693 } 694 695 // This function belongs to a single function Comdat group. 696 Comdat *OrigComdat = F.getComdat(); 697 std::string NewComdatName = 698 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str(); 699 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName)); 700 NewComdat->setSelectionKind(OrigComdat->getSelectionKind()); 701 702 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) { 703 if (GlobalAlias *GA = dyn_cast<GlobalAlias>(CM.second)) { 704 // For aliases, change the name directly. 705 assert(dyn_cast<Function>(GA->getAliasee()->stripPointerCasts()) == &F); 706 std::string OrigGAName = GA->getName().str(); 707 GA->setName(Twine(GA->getName() + "." + Twine(FunctionHash))); 708 GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigGAName, GA); 709 continue; 710 } 711 // Must be a function. 712 Function *CF = dyn_cast<Function>(CM.second); 713 assert(CF); 714 CF->setComdat(NewComdat); 715 } 716 } 717 718 // Collect all the BBs that will be instruments and return them in 719 // InstrumentBBs and setup InEdges/OutEdge for UseBBInfo. 720 template <class Edge, class BBInfo> 721 void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs( 722 std::vector<BasicBlock *> &InstrumentBBs) { 723 // Use a worklist as we will update the vector during the iteration. 724 std::vector<Edge *> EdgeList; 725 EdgeList.reserve(MST.AllEdges.size()); 726 for (auto &E : MST.AllEdges) 727 EdgeList.push_back(E.get()); 728 729 for (auto &E : EdgeList) { 730 BasicBlock *InstrBB = getInstrBB(E); 731 if (InstrBB) 732 InstrumentBBs.push_back(InstrBB); 733 } 734 735 // Set up InEdges/OutEdges for all BBs. 736 for (auto &E : MST.AllEdges) { 737 if (E->Removed) 738 continue; 739 const BasicBlock *SrcBB = E->SrcBB; 740 const BasicBlock *DestBB = E->DestBB; 741 BBInfo &SrcInfo = getBBInfo(SrcBB); 742 BBInfo &DestInfo = getBBInfo(DestBB); 743 SrcInfo.addOutEdge(E.get()); 744 DestInfo.addInEdge(E.get()); 745 } 746 } 747 748 // Given a CFG E to be instrumented, find which BB to place the instrumented 749 // code. The function will split the critical edge if necessary. 750 template <class Edge, class BBInfo> 751 BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) { 752 if (E->InMST || E->Removed) 753 return nullptr; 754 755 BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB); 756 BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB); 757 // For a fake edge, instrument the real BB. 758 if (SrcBB == nullptr) 759 return DestBB; 760 if (DestBB == nullptr) 761 return SrcBB; 762 763 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * { 764 // There are basic blocks (such as catchswitch) cannot be instrumented. 765 // If the returned first insertion point is the end of BB, skip this BB. 766 if (BB->getFirstInsertionPt() == BB->end()) 767 return nullptr; 768 return BB; 769 }; 770 771 // Instrument the SrcBB if it has a single successor, 772 // otherwise, the DestBB if this is not a critical edge. 773 Instruction *TI = SrcBB->getTerminator(); 774 if (TI->getNumSuccessors() <= 1) 775 return canInstrument(SrcBB); 776 if (!E->IsCritical) 777 return canInstrument(DestBB); 778 779 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 780 BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum); 781 if (!InstrBB) { 782 LLVM_DEBUG( 783 dbgs() << "Fail to split critical edge: not instrument this edge.\n"); 784 return nullptr; 785 } 786 // For a critical edge, we have to split. Instrument the newly 787 // created BB. 788 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++; 789 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index 790 << " --> " << getBBInfo(DestBB).Index << "\n"); 791 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB. 792 MST.addEdge(SrcBB, InstrBB, 0); 793 // Second one: Add new edge of InstrBB->DestBB. 794 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0); 795 NewEdge1.InMST = true; 796 E->Removed = true; 797 798 return canInstrument(InstrBB); 799 } 800 801 // Visit all edge and instrument the edges not in MST, and do value profiling. 802 // Critical edges will be split. 803 static void instrumentOneFunc( 804 Function &F, Module *M, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI, 805 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 806 bool IsCS) { 807 // Split indirectbr critical edges here before computing the MST rather than 808 // later in getInstrBB() to avoid invalidating it. 809 SplitIndirectBrCriticalEdges(F, BPI, BFI); 810 811 FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(F, ComdatMembers, true, BPI, 812 BFI, IsCS); 813 std::vector<BasicBlock *> InstrumentBBs; 814 FuncInfo.getInstrumentBBs(InstrumentBBs); 815 unsigned NumCounters = 816 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 817 818 uint32_t I = 0; 819 Type *I8PtrTy = Type::getInt8PtrTy(M->getContext()); 820 for (auto *InstrBB : InstrumentBBs) { 821 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt()); 822 assert(Builder.GetInsertPoint() != InstrBB->end() && 823 "Cannot get the Instrumentation point"); 824 Builder.CreateCall( 825 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment), 826 {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), 827 Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters), 828 Builder.getInt32(I++)}); 829 } 830 831 // Now instrument select instructions: 832 FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar, 833 FuncInfo.FunctionHash); 834 assert(I == NumCounters); 835 836 if (DisableValueProfiling) 837 return; 838 839 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size(); 840 841 // For each VP Kind, walk the VP candidates and instrument each one. 842 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) { 843 unsigned SiteIndex = 0; 844 if (Kind == IPVK_MemOPSize && !PGOInstrMemOP) 845 continue; 846 847 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) { 848 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind] 849 << " site: CallSite Index = " << SiteIndex << "\n"); 850 851 IRBuilder<> Builder(Cand.InsertPt); 852 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() && 853 "Cannot get the Instrumentation point"); 854 855 Value *ToProfile = nullptr; 856 if (Cand.V->getType()->isIntegerTy()) 857 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty()); 858 else if (Cand.V->getType()->isPointerTy()) 859 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty()); 860 assert(ToProfile && "value profiling Value is of unexpected type"); 861 862 Builder.CreateCall( 863 Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), 864 {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), 865 Builder.getInt64(FuncInfo.FunctionHash), ToProfile, 866 Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)}); 867 } 868 } // IPVK_First <= Kind <= IPVK_Last 869 } 870 871 namespace { 872 873 // This class represents a CFG edge in profile use compilation. 874 struct PGOUseEdge : public PGOEdge { 875 bool CountValid = false; 876 uint64_t CountValue = 0; 877 878 PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) 879 : PGOEdge(Src, Dest, W) {} 880 881 // Set edge count value 882 void setEdgeCount(uint64_t Value) { 883 CountValue = Value; 884 CountValid = true; 885 } 886 887 // Return the information string for this object. 888 const std::string infoString() const { 889 if (!CountValid) 890 return PGOEdge::infoString(); 891 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue)) 892 .str(); 893 } 894 }; 895 896 using DirectEdges = SmallVector<PGOUseEdge *, 2>; 897 898 // This class stores the auxiliary information for each BB. 899 struct UseBBInfo : public BBInfo { 900 uint64_t CountValue = 0; 901 bool CountValid; 902 int32_t UnknownCountInEdge = 0; 903 int32_t UnknownCountOutEdge = 0; 904 DirectEdges InEdges; 905 DirectEdges OutEdges; 906 907 UseBBInfo(unsigned IX) : BBInfo(IX), CountValid(false) {} 908 909 UseBBInfo(unsigned IX, uint64_t C) 910 : BBInfo(IX), CountValue(C), CountValid(true) {} 911 912 // Set the profile count value for this BB. 913 void setBBInfoCount(uint64_t Value) { 914 CountValue = Value; 915 CountValid = true; 916 } 917 918 // Return the information string of this object. 919 const std::string infoString() const { 920 if (!CountValid) 921 return BBInfo::infoString(); 922 return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str(); 923 } 924 925 // Add an OutEdge and update the edge count. 926 void addOutEdge(PGOUseEdge *E) { 927 OutEdges.push_back(E); 928 UnknownCountOutEdge++; 929 } 930 931 // Add an InEdge and update the edge count. 932 void addInEdge(PGOUseEdge *E) { 933 InEdges.push_back(E); 934 UnknownCountInEdge++; 935 } 936 }; 937 938 } // end anonymous namespace 939 940 // Sum up the count values for all the edges. 941 static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) { 942 uint64_t Total = 0; 943 for (auto &E : Edges) { 944 if (E->Removed) 945 continue; 946 Total += E->CountValue; 947 } 948 return Total; 949 } 950 951 namespace { 952 953 class PGOUseFunc { 954 public: 955 PGOUseFunc(Function &Func, Module *Modu, 956 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 957 BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin, 958 ProfileSummaryInfo *PSI, bool IsCS) 959 : F(Func), M(Modu), BFI(BFIin), PSI(PSI), 960 FuncInfo(Func, ComdatMembers, false, BPI, BFIin, IsCS), 961 FreqAttr(FFA_Normal), IsCS(IsCS) {} 962 963 // Read counts for the instrumented BB from profile. 964 bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros); 965 966 // Populate the counts for all BBs. 967 void populateCounters(); 968 969 // Set the branch weights based on the count values. 970 void setBranchWeights(); 971 972 // Annotate the value profile call sites for all value kind. 973 void annotateValueSites(); 974 975 // Annotate the value profile call sites for one value kind. 976 void annotateValueSites(uint32_t Kind); 977 978 // Annotate the irreducible loop header weights. 979 void annotateIrrLoopHeaderWeights(); 980 981 // The hotness of the function from the profile count. 982 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot }; 983 984 // Return the function hotness from the profile. 985 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; } 986 987 // Return the function hash. 988 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; } 989 990 // Return the profile record for this function; 991 InstrProfRecord &getProfileRecord() { return ProfileRecord; } 992 993 // Return the auxiliary BB information. 994 UseBBInfo &getBBInfo(const BasicBlock *BB) const { 995 return FuncInfo.getBBInfo(BB); 996 } 997 998 // Return the auxiliary BB information if available. 999 UseBBInfo *findBBInfo(const BasicBlock *BB) const { 1000 return FuncInfo.findBBInfo(BB); 1001 } 1002 1003 Function &getFunc() const { return F; } 1004 1005 void dumpInfo(std::string Str = "") const { 1006 FuncInfo.dumpInfo(Str); 1007 } 1008 1009 uint64_t getProgramMaxCount() const { return ProgramMaxCount; } 1010 private: 1011 Function &F; 1012 Module *M; 1013 BlockFrequencyInfo *BFI; 1014 ProfileSummaryInfo *PSI; 1015 1016 // This member stores the shared information with class PGOGenFunc. 1017 FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo; 1018 1019 // The maximum count value in the profile. This is only used in PGO use 1020 // compilation. 1021 uint64_t ProgramMaxCount; 1022 1023 // Position of counter that remains to be read. 1024 uint32_t CountPosition = 0; 1025 1026 // Total size of the profile count for this function. 1027 uint32_t ProfileCountSize = 0; 1028 1029 // ProfileRecord for this function. 1030 InstrProfRecord ProfileRecord; 1031 1032 // Function hotness info derived from profile. 1033 FuncFreqAttr FreqAttr; 1034 1035 // Is to use the context sensitive profile. 1036 bool IsCS; 1037 1038 // Find the Instrumented BB and set the value. Return false on error. 1039 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile); 1040 1041 // Set the edge counter value for the unknown edge -- there should be only 1042 // one unknown edge. 1043 void setEdgeCount(DirectEdges &Edges, uint64_t Value); 1044 1045 // Return FuncName string; 1046 const std::string getFuncName() const { return FuncInfo.FuncName; } 1047 1048 // Set the hot/cold inline hints based on the count values. 1049 // FIXME: This function should be removed once the functionality in 1050 // the inliner is implemented. 1051 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) { 1052 if (PSI->isHotCount(EntryCount)) 1053 FreqAttr = FFA_Hot; 1054 else if (PSI->isColdCount(MaxCount)) 1055 FreqAttr = FFA_Cold; 1056 } 1057 }; 1058 1059 } // end anonymous namespace 1060 1061 // Visit all the edges and assign the count value for the instrumented 1062 // edges and the BB. Return false on error. 1063 bool PGOUseFunc::setInstrumentedCounts( 1064 const std::vector<uint64_t> &CountFromProfile) { 1065 1066 std::vector<BasicBlock *> InstrumentBBs; 1067 FuncInfo.getInstrumentBBs(InstrumentBBs); 1068 unsigned NumCounters = 1069 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 1070 // The number of counters here should match the number of counters 1071 // in profile. Return if they mismatch. 1072 if (NumCounters != CountFromProfile.size()) { 1073 return false; 1074 } 1075 // Set the profile count to the Instrumented BBs. 1076 uint32_t I = 0; 1077 for (BasicBlock *InstrBB : InstrumentBBs) { 1078 uint64_t CountValue = CountFromProfile[I++]; 1079 UseBBInfo &Info = getBBInfo(InstrBB); 1080 Info.setBBInfoCount(CountValue); 1081 } 1082 ProfileCountSize = CountFromProfile.size(); 1083 CountPosition = I; 1084 1085 // Set the edge count and update the count of unknown edges for BBs. 1086 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void { 1087 E->setEdgeCount(Value); 1088 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1089 this->getBBInfo(E->DestBB).UnknownCountInEdge--; 1090 }; 1091 1092 // Set the profile count the Instrumented edges. There are BBs that not in 1093 // MST but not instrumented. Need to set the edge count value so that we can 1094 // populate the profile counts later. 1095 for (auto &E : FuncInfo.MST.AllEdges) { 1096 if (E->Removed || E->InMST) 1097 continue; 1098 const BasicBlock *SrcBB = E->SrcBB; 1099 UseBBInfo &SrcInfo = getBBInfo(SrcBB); 1100 1101 // If only one out-edge, the edge profile count should be the same as BB 1102 // profile count. 1103 if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1) 1104 setEdgeCount(E.get(), SrcInfo.CountValue); 1105 else { 1106 const BasicBlock *DestBB = E->DestBB; 1107 UseBBInfo &DestInfo = getBBInfo(DestBB); 1108 // If only one in-edge, the edge profile count should be the same as BB 1109 // profile count. 1110 if (DestInfo.CountValid && DestInfo.InEdges.size() == 1) 1111 setEdgeCount(E.get(), DestInfo.CountValue); 1112 } 1113 if (E->CountValid) 1114 continue; 1115 // E's count should have been set from profile. If not, this meenas E skips 1116 // the instrumentation. We set the count to 0. 1117 setEdgeCount(E.get(), 0); 1118 } 1119 return true; 1120 } 1121 1122 // Set the count value for the unknown edge. There should be one and only one 1123 // unknown edge in Edges vector. 1124 void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) { 1125 for (auto &E : Edges) { 1126 if (E->CountValid) 1127 continue; 1128 E->setEdgeCount(Value); 1129 1130 getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1131 getBBInfo(E->DestBB).UnknownCountInEdge--; 1132 return; 1133 } 1134 llvm_unreachable("Cannot find the unknown count edge"); 1135 } 1136 1137 // Read the profile from ProfileFileName and assign the value to the 1138 // instrumented BB and the edges. This function also updates ProgramMaxCount. 1139 // Return true if the profile are successfully read, and false on errors. 1140 bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros) { 1141 auto &Ctx = M->getContext(); 1142 Expected<InstrProfRecord> Result = 1143 PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash); 1144 if (Error E = Result.takeError()) { 1145 handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { 1146 auto Err = IPE.get(); 1147 bool SkipWarning = false; 1148 LLVM_DEBUG(dbgs() << "Error in reading profile for Func " 1149 << FuncInfo.FuncName << ": "); 1150 if (Err == instrprof_error::unknown_function) { 1151 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++; 1152 SkipWarning = !PGOWarnMissing; 1153 LLVM_DEBUG(dbgs() << "unknown function"); 1154 } else if (Err == instrprof_error::hash_mismatch || 1155 Err == instrprof_error::malformed) { 1156 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++; 1157 SkipWarning = 1158 NoPGOWarnMismatch || 1159 (NoPGOWarnMismatchComdat && 1160 (F.hasComdat() || 1161 F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); 1162 LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); 1163 } 1164 1165 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n"); 1166 if (SkipWarning) 1167 return; 1168 1169 std::string Msg = IPE.message() + std::string(" ") + F.getName().str() + 1170 std::string(" Hash = ") + 1171 std::to_string(FuncInfo.FunctionHash); 1172 1173 Ctx.diagnose( 1174 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); 1175 }); 1176 return false; 1177 } 1178 ProfileRecord = std::move(Result.get()); 1179 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts; 1180 1181 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++; 1182 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n"); 1183 uint64_t ValueSum = 0; 1184 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { 1185 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n"); 1186 ValueSum += CountFromProfile[I]; 1187 } 1188 AllZeros = (ValueSum == 0); 1189 1190 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n"); 1191 1192 getBBInfo(nullptr).UnknownCountOutEdge = 2; 1193 getBBInfo(nullptr).UnknownCountInEdge = 2; 1194 1195 if (!setInstrumentedCounts(CountFromProfile)) { 1196 LLVM_DEBUG( 1197 dbgs() << "Inconsistent number of counts, skipping this function"); 1198 Ctx.diagnose(DiagnosticInfoPGOProfile( 1199 M->getName().data(), 1200 Twine("Inconsistent number of counts in ") + F.getName().str() 1201 + Twine(": the profile may be stale or there is a function name collision."), 1202 DS_Warning)); 1203 return false; 1204 } 1205 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS); 1206 return true; 1207 } 1208 1209 // Populate the counters from instrumented BBs to all BBs. 1210 // In the end of this operation, all BBs should have a valid count value. 1211 void PGOUseFunc::populateCounters() { 1212 bool Changes = true; 1213 unsigned NumPasses = 0; 1214 while (Changes) { 1215 NumPasses++; 1216 Changes = false; 1217 1218 // For efficient traversal, it's better to start from the end as most 1219 // of the instrumented edges are at the end. 1220 for (auto &BB : reverse(F)) { 1221 UseBBInfo *Count = findBBInfo(&BB); 1222 if (Count == nullptr) 1223 continue; 1224 if (!Count->CountValid) { 1225 if (Count->UnknownCountOutEdge == 0) { 1226 Count->CountValue = sumEdgeCount(Count->OutEdges); 1227 Count->CountValid = true; 1228 Changes = true; 1229 } else if (Count->UnknownCountInEdge == 0) { 1230 Count->CountValue = sumEdgeCount(Count->InEdges); 1231 Count->CountValid = true; 1232 Changes = true; 1233 } 1234 } 1235 if (Count->CountValid) { 1236 if (Count->UnknownCountOutEdge == 1) { 1237 uint64_t Total = 0; 1238 uint64_t OutSum = sumEdgeCount(Count->OutEdges); 1239 // If the one of the successor block can early terminate (no-return), 1240 // we can end up with situation where out edge sum count is larger as 1241 // the source BB's count is collected by a post-dominated block. 1242 if (Count->CountValue > OutSum) 1243 Total = Count->CountValue - OutSum; 1244 setEdgeCount(Count->OutEdges, Total); 1245 Changes = true; 1246 } 1247 if (Count->UnknownCountInEdge == 1) { 1248 uint64_t Total = 0; 1249 uint64_t InSum = sumEdgeCount(Count->InEdges); 1250 if (Count->CountValue > InSum) 1251 Total = Count->CountValue - InSum; 1252 setEdgeCount(Count->InEdges, Total); 1253 Changes = true; 1254 } 1255 } 1256 } 1257 } 1258 1259 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n"); 1260 #ifndef NDEBUG 1261 // Assert every BB has a valid counter. 1262 for (auto &BB : F) { 1263 auto BI = findBBInfo(&BB); 1264 if (BI == nullptr) 1265 continue; 1266 assert(BI->CountValid && "BB count is not valid"); 1267 } 1268 #endif 1269 uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue; 1270 F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real)); 1271 uint64_t FuncMaxCount = FuncEntryCount; 1272 for (auto &BB : F) { 1273 auto BI = findBBInfo(&BB); 1274 if (BI == nullptr) 1275 continue; 1276 FuncMaxCount = std::max(FuncMaxCount, BI->CountValue); 1277 } 1278 markFunctionAttributes(FuncEntryCount, FuncMaxCount); 1279 1280 // Now annotate select instructions 1281 FuncInfo.SIVisitor.annotateSelects(F, this, &CountPosition); 1282 assert(CountPosition == ProfileCountSize); 1283 1284 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile.")); 1285 } 1286 1287 // Assign the scaled count values to the BB with multiple out edges. 1288 void PGOUseFunc::setBranchWeights() { 1289 // Generate MD_prof metadata for every branch instruction. 1290 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName() 1291 << " IsCS=" << IsCS << "\n"); 1292 for (auto &BB : F) { 1293 Instruction *TI = BB.getTerminator(); 1294 if (TI->getNumSuccessors() < 2) 1295 continue; 1296 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || 1297 isa<IndirectBrInst>(TI))) 1298 continue; 1299 1300 if (getBBInfo(&BB).CountValue == 0) 1301 continue; 1302 1303 // We have a non-zero Branch BB. 1304 const UseBBInfo &BBCountInfo = getBBInfo(&BB); 1305 unsigned Size = BBCountInfo.OutEdges.size(); 1306 SmallVector<uint64_t, 2> EdgeCounts(Size, 0); 1307 uint64_t MaxCount = 0; 1308 for (unsigned s = 0; s < Size; s++) { 1309 const PGOUseEdge *E = BBCountInfo.OutEdges[s]; 1310 const BasicBlock *SrcBB = E->SrcBB; 1311 const BasicBlock *DestBB = E->DestBB; 1312 if (DestBB == nullptr) 1313 continue; 1314 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 1315 uint64_t EdgeCount = E->CountValue; 1316 if (EdgeCount > MaxCount) 1317 MaxCount = EdgeCount; 1318 EdgeCounts[SuccNum] = EdgeCount; 1319 } 1320 setProfMetadata(M, TI, EdgeCounts, MaxCount); 1321 } 1322 } 1323 1324 static bool isIndirectBrTarget(BasicBlock *BB) { 1325 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { 1326 if (isa<IndirectBrInst>((*PI)->getTerminator())) 1327 return true; 1328 } 1329 return false; 1330 } 1331 1332 void PGOUseFunc::annotateIrrLoopHeaderWeights() { 1333 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n"); 1334 // Find irr loop headers 1335 for (auto &BB : F) { 1336 // As a heuristic also annotate indrectbr targets as they have a high chance 1337 // to become an irreducible loop header after the indirectbr tail 1338 // duplication. 1339 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) { 1340 Instruction *TI = BB.getTerminator(); 1341 const UseBBInfo &BBCountInfo = getBBInfo(&BB); 1342 setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue); 1343 } 1344 } 1345 } 1346 1347 void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) { 1348 Module *M = F.getParent(); 1349 IRBuilder<> Builder(&SI); 1350 Type *Int64Ty = Builder.getInt64Ty(); 1351 Type *I8PtrTy = Builder.getInt8PtrTy(); 1352 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty); 1353 Builder.CreateCall( 1354 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step), 1355 {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), 1356 Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs), 1357 Builder.getInt32(*CurCtrIdx), Step}); 1358 ++(*CurCtrIdx); 1359 } 1360 1361 void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) { 1362 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts; 1363 assert(*CurCtrIdx < CountFromProfile.size() && 1364 "Out of bound access of counters"); 1365 uint64_t SCounts[2]; 1366 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count 1367 ++(*CurCtrIdx); 1368 uint64_t TotalCount = 0; 1369 auto BI = UseFunc->findBBInfo(SI.getParent()); 1370 if (BI != nullptr) 1371 TotalCount = BI->CountValue; 1372 // False Count 1373 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0); 1374 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]); 1375 if (MaxCount) 1376 setProfMetadata(F.getParent(), &SI, SCounts, MaxCount); 1377 } 1378 1379 void SelectInstVisitor::visitSelectInst(SelectInst &SI) { 1380 if (!PGOInstrSelect) 1381 return; 1382 // FIXME: do not handle this yet. 1383 if (SI.getCondition()->getType()->isVectorTy()) 1384 return; 1385 1386 switch (Mode) { 1387 case VM_counting: 1388 NSIs++; 1389 return; 1390 case VM_instrument: 1391 instrumentOneSelectInst(SI); 1392 return; 1393 case VM_annotate: 1394 annotateOneSelectInst(SI); 1395 return; 1396 } 1397 1398 llvm_unreachable("Unknown visiting mode"); 1399 } 1400 1401 // Traverse all valuesites and annotate the instructions for all value kind. 1402 void PGOUseFunc::annotateValueSites() { 1403 if (DisableValueProfiling) 1404 return; 1405 1406 // Create the PGOFuncName meta data. 1407 createPGOFuncNameMetadata(F, FuncInfo.FuncName); 1408 1409 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 1410 annotateValueSites(Kind); 1411 } 1412 1413 // Annotate the instructions for a specific value kind. 1414 void PGOUseFunc::annotateValueSites(uint32_t Kind) { 1415 assert(Kind <= IPVK_Last); 1416 unsigned ValueSiteIndex = 0; 1417 auto &ValueSites = FuncInfo.ValueSites[Kind]; 1418 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind); 1419 if (NumValueSites != ValueSites.size()) { 1420 auto &Ctx = M->getContext(); 1421 Ctx.diagnose(DiagnosticInfoPGOProfile( 1422 M->getName().data(), 1423 Twine("Inconsistent number of value sites for ") + 1424 Twine(ValueProfKindDescr[Kind]) + 1425 Twine(" profiling in \"") + F.getName().str() + 1426 Twine("\", possibly due to the use of a stale profile."), 1427 DS_Warning)); 1428 return; 1429 } 1430 1431 for (VPCandidateInfo &I : ValueSites) { 1432 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind 1433 << "): Index = " << ValueSiteIndex << " out of " 1434 << NumValueSites << "\n"); 1435 annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord, 1436 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex, 1437 Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations 1438 : MaxNumAnnotations); 1439 ValueSiteIndex++; 1440 } 1441 } 1442 1443 // Collect the set of members for each Comdat in module M and store 1444 // in ComdatMembers. 1445 static void collectComdatMembers( 1446 Module &M, 1447 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 1448 if (!DoComdatRenaming) 1449 return; 1450 for (Function &F : M) 1451 if (Comdat *C = F.getComdat()) 1452 ComdatMembers.insert(std::make_pair(C, &F)); 1453 for (GlobalVariable &GV : M.globals()) 1454 if (Comdat *C = GV.getComdat()) 1455 ComdatMembers.insert(std::make_pair(C, &GV)); 1456 for (GlobalAlias &GA : M.aliases()) 1457 if (Comdat *C = GA.getComdat()) 1458 ComdatMembers.insert(std::make_pair(C, &GA)); 1459 } 1460 1461 static bool InstrumentAllFunctions( 1462 Module &M, function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1463 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) { 1464 // For the context-sensitve instrumentation, we should have a separated pass 1465 // (before LTO/ThinLTO linking) to create these variables. 1466 if (!IsCS) 1467 createIRLevelProfileFlagVar(M, /* IsCS */ false); 1468 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1469 collectComdatMembers(M, ComdatMembers); 1470 1471 for (auto &F : M) { 1472 if (F.isDeclaration()) 1473 continue; 1474 auto *BPI = LookupBPI(F); 1475 auto *BFI = LookupBFI(F); 1476 instrumentOneFunc(F, &M, BPI, BFI, ComdatMembers, IsCS); 1477 } 1478 return true; 1479 } 1480 1481 PreservedAnalyses 1482 PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) { 1483 createProfileFileNameVar(M, CSInstrName); 1484 createIRLevelProfileFlagVar(M, /* IsCS */ true); 1485 return PreservedAnalyses::all(); 1486 } 1487 1488 bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) { 1489 if (skipModule(M)) 1490 return false; 1491 1492 auto LookupBPI = [this](Function &F) { 1493 return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); 1494 }; 1495 auto LookupBFI = [this](Function &F) { 1496 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); 1497 }; 1498 return InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS); 1499 } 1500 1501 PreservedAnalyses PGOInstrumentationGen::run(Module &M, 1502 ModuleAnalysisManager &AM) { 1503 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1504 auto LookupBPI = [&FAM](Function &F) { 1505 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1506 }; 1507 1508 auto LookupBFI = [&FAM](Function &F) { 1509 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1510 }; 1511 1512 if (!InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS)) 1513 return PreservedAnalyses::all(); 1514 1515 return PreservedAnalyses::none(); 1516 } 1517 1518 static bool annotateAllFunctions( 1519 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, 1520 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1521 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, 1522 ProfileSummaryInfo *PSI, bool IsCS) { 1523 LLVM_DEBUG(dbgs() << "Read in profile counters: "); 1524 auto &Ctx = M.getContext(); 1525 // Read the counter array from file. 1526 auto ReaderOrErr = 1527 IndexedInstrProfReader::create(ProfileFileName, ProfileRemappingFileName); 1528 if (Error E = ReaderOrErr.takeError()) { 1529 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { 1530 Ctx.diagnose( 1531 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message())); 1532 }); 1533 return false; 1534 } 1535 1536 std::unique_ptr<IndexedInstrProfReader> PGOReader = 1537 std::move(ReaderOrErr.get()); 1538 if (!PGOReader) { 1539 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(), 1540 StringRef("Cannot get PGOReader"))); 1541 return false; 1542 } 1543 if (!PGOReader->hasCSIRLevelProfile() && IsCS) 1544 return false; 1545 1546 // TODO: might need to change the warning once the clang option is finalized. 1547 if (!PGOReader->isIRLevelProfile()) { 1548 Ctx.diagnose(DiagnosticInfoPGOProfile( 1549 ProfileFileName.data(), "Not an IR level instrumentation profile")); 1550 return false; 1551 } 1552 1553 // Add the profile summary (read from the header of the indexed summary) here 1554 // so that we can use it below when reading counters (which checks if the 1555 // function should be marked with a cold or inlinehint attribute). 1556 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()), 1557 IsCS ? ProfileSummary::PSK_CSInstr 1558 : ProfileSummary::PSK_Instr); 1559 1560 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1561 collectComdatMembers(M, ComdatMembers); 1562 std::vector<Function *> HotFunctions; 1563 std::vector<Function *> ColdFunctions; 1564 for (auto &F : M) { 1565 if (F.isDeclaration()) 1566 continue; 1567 auto *BPI = LookupBPI(F); 1568 auto *BFI = LookupBFI(F); 1569 // Split indirectbr critical edges here before computing the MST rather than 1570 // later in getInstrBB() to avoid invalidating it. 1571 SplitIndirectBrCriticalEdges(F, BPI, BFI); 1572 PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI, PSI, IsCS); 1573 bool AllZeros = false; 1574 if (!Func.readCounters(PGOReader.get(), AllZeros)) 1575 continue; 1576 if (AllZeros) { 1577 F.setEntryCount(ProfileCount(0, Function::PCT_Real)); 1578 if (Func.getProgramMaxCount() != 0) 1579 ColdFunctions.push_back(&F); 1580 continue; 1581 } 1582 Func.populateCounters(); 1583 Func.setBranchWeights(); 1584 Func.annotateValueSites(); 1585 Func.annotateIrrLoopHeaderWeights(); 1586 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr(); 1587 if (FreqAttr == PGOUseFunc::FFA_Cold) 1588 ColdFunctions.push_back(&F); 1589 else if (FreqAttr == PGOUseFunc::FFA_Hot) 1590 HotFunctions.push_back(&F); 1591 if (PGOViewCounts != PGOVCT_None && 1592 (ViewBlockFreqFuncName.empty() || 1593 F.getName().equals(ViewBlockFreqFuncName))) { 1594 LoopInfo LI{DominatorTree(F)}; 1595 std::unique_ptr<BranchProbabilityInfo> NewBPI = 1596 std::make_unique<BranchProbabilityInfo>(F, LI); 1597 std::unique_ptr<BlockFrequencyInfo> NewBFI = 1598 std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI); 1599 if (PGOViewCounts == PGOVCT_Graph) 1600 NewBFI->view(); 1601 else if (PGOViewCounts == PGOVCT_Text) { 1602 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n"; 1603 NewBFI->print(dbgs()); 1604 } 1605 } 1606 if (PGOViewRawCounts != PGOVCT_None && 1607 (ViewBlockFreqFuncName.empty() || 1608 F.getName().equals(ViewBlockFreqFuncName))) { 1609 if (PGOViewRawCounts == PGOVCT_Graph) 1610 if (ViewBlockFreqFuncName.empty()) 1611 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 1612 else 1613 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 1614 else if (PGOViewRawCounts == PGOVCT_Text) { 1615 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n"; 1616 Func.dumpInfo(); 1617 } 1618 } 1619 } 1620 1621 // Set function hotness attribute from the profile. 1622 // We have to apply these attributes at the end because their presence 1623 // can affect the BranchProbabilityInfo of any callers, resulting in an 1624 // inconsistent MST between prof-gen and prof-use. 1625 for (auto &F : HotFunctions) { 1626 F->addFnAttr(Attribute::InlineHint); 1627 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName() 1628 << "\n"); 1629 } 1630 for (auto &F : ColdFunctions) { 1631 F->addFnAttr(Attribute::Cold); 1632 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() 1633 << "\n"); 1634 } 1635 return true; 1636 } 1637 1638 PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename, 1639 std::string RemappingFilename, 1640 bool IsCS) 1641 : ProfileFileName(std::move(Filename)), 1642 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) { 1643 if (!PGOTestProfileFile.empty()) 1644 ProfileFileName = PGOTestProfileFile; 1645 if (!PGOTestProfileRemappingFile.empty()) 1646 ProfileRemappingFileName = PGOTestProfileRemappingFile; 1647 } 1648 1649 PreservedAnalyses PGOInstrumentationUse::run(Module &M, 1650 ModuleAnalysisManager &AM) { 1651 1652 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1653 auto LookupBPI = [&FAM](Function &F) { 1654 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1655 }; 1656 1657 auto LookupBFI = [&FAM](Function &F) { 1658 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1659 }; 1660 1661 auto *PSI = &AM.getResult<ProfileSummaryAnalysis>(M); 1662 1663 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, 1664 LookupBPI, LookupBFI, PSI, IsCS)) 1665 return PreservedAnalyses::all(); 1666 1667 return PreservedAnalyses::none(); 1668 } 1669 1670 bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) { 1671 if (skipModule(M)) 1672 return false; 1673 1674 auto LookupBPI = [this](Function &F) { 1675 return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); 1676 }; 1677 auto LookupBFI = [this](Function &F) { 1678 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); 1679 }; 1680 1681 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 1682 return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI, PSI, 1683 IsCS); 1684 } 1685 1686 static std::string getSimpleNodeName(const BasicBlock *Node) { 1687 if (!Node->getName().empty()) 1688 return Node->getName(); 1689 1690 std::string SimpleNodeName; 1691 raw_string_ostream OS(SimpleNodeName); 1692 Node->printAsOperand(OS, false); 1693 return OS.str(); 1694 } 1695 1696 void llvm::setProfMetadata(Module *M, Instruction *TI, 1697 ArrayRef<uint64_t> EdgeCounts, 1698 uint64_t MaxCount) { 1699 MDBuilder MDB(M->getContext()); 1700 assert(MaxCount > 0 && "Bad max count"); 1701 uint64_t Scale = calculateCountScale(MaxCount); 1702 SmallVector<unsigned, 4> Weights; 1703 for (const auto &ECI : EdgeCounts) 1704 Weights.push_back(scaleBranchCount(ECI, Scale)); 1705 1706 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W 1707 : Weights) { 1708 dbgs() << W << " "; 1709 } dbgs() << "\n";); 1710 1711 misexpect::verifyMisExpect(TI, Weights, TI->getContext()); 1712 1713 TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 1714 if (EmitBranchProbability) { 1715 std::string BrCondStr = getBranchCondString(TI); 1716 if (BrCondStr.empty()) 1717 return; 1718 1719 uint64_t WSum = 1720 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0, 1721 [](uint64_t w1, uint64_t w2) { return w1 + w2; }); 1722 uint64_t TotalCount = 1723 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0, 1724 [](uint64_t c1, uint64_t c2) { return c1 + c2; }); 1725 Scale = calculateCountScale(WSum); 1726 BranchProbability BP(scaleBranchCount(Weights[0], Scale), 1727 scaleBranchCount(WSum, Scale)); 1728 std::string BranchProbStr; 1729 raw_string_ostream OS(BranchProbStr); 1730 OS << BP; 1731 OS << " (total count : " << TotalCount << ")"; 1732 OS.flush(); 1733 Function *F = TI->getParent()->getParent(); 1734 OptimizationRemarkEmitter ORE(F); 1735 ORE.emit([&]() { 1736 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI) 1737 << BrCondStr << " is true with probability : " << BranchProbStr; 1738 }); 1739 } 1740 } 1741 1742 namespace llvm { 1743 1744 void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) { 1745 MDBuilder MDB(M->getContext()); 1746 TI->setMetadata(llvm::LLVMContext::MD_irr_loop, 1747 MDB.createIrrLoopHeaderWeight(Count)); 1748 } 1749 1750 template <> struct GraphTraits<PGOUseFunc *> { 1751 using NodeRef = const BasicBlock *; 1752 using ChildIteratorType = succ_const_iterator; 1753 using nodes_iterator = pointer_iterator<Function::const_iterator>; 1754 1755 static NodeRef getEntryNode(const PGOUseFunc *G) { 1756 return &G->getFunc().front(); 1757 } 1758 1759 static ChildIteratorType child_begin(const NodeRef N) { 1760 return succ_begin(N); 1761 } 1762 1763 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); } 1764 1765 static nodes_iterator nodes_begin(const PGOUseFunc *G) { 1766 return nodes_iterator(G->getFunc().begin()); 1767 } 1768 1769 static nodes_iterator nodes_end(const PGOUseFunc *G) { 1770 return nodes_iterator(G->getFunc().end()); 1771 } 1772 }; 1773 1774 template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits { 1775 explicit DOTGraphTraits(bool isSimple = false) 1776 : DefaultDOTGraphTraits(isSimple) {} 1777 1778 static std::string getGraphName(const PGOUseFunc *G) { 1779 return G->getFunc().getName(); 1780 } 1781 1782 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) { 1783 std::string Result; 1784 raw_string_ostream OS(Result); 1785 1786 OS << getSimpleNodeName(Node) << ":\\l"; 1787 UseBBInfo *BI = Graph->findBBInfo(Node); 1788 OS << "Count : "; 1789 if (BI && BI->CountValid) 1790 OS << BI->CountValue << "\\l"; 1791 else 1792 OS << "Unknown\\l"; 1793 1794 if (!PGOInstrSelect) 1795 return Result; 1796 1797 for (auto BI = Node->begin(); BI != Node->end(); ++BI) { 1798 auto *I = &*BI; 1799 if (!isa<SelectInst>(I)) 1800 continue; 1801 // Display scaled counts for SELECT instruction: 1802 OS << "SELECT : { T = "; 1803 uint64_t TC, FC; 1804 bool HasProf = I->extractProfMetadata(TC, FC); 1805 if (!HasProf) 1806 OS << "Unknown, F = Unknown }\\l"; 1807 else 1808 OS << TC << ", F = " << FC << " }\\l"; 1809 } 1810 return Result; 1811 } 1812 }; 1813 1814 } // end namespace llvm 1815