1 //===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements PGO instrumentation using a minimum spanning tree based 10 // on the following paper: 11 // [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points 12 // for program frequency counts. BIT Numerical Mathematics 1973, Volume 13, 13 // Issue 3, pp 313-322 14 // The idea of the algorithm based on the fact that for each node (except for 15 // the entry and exit), the sum of incoming edge counts equals the sum of 16 // outgoing edge counts. The count of edge on spanning tree can be derived from 17 // those edges not on the spanning tree. Knuth proves this method instruments 18 // the minimum number of edges. 19 // 20 // The minimal spanning tree here is actually a maximum weight tree -- on-tree 21 // edges have higher frequencies (more likely to execute). The idea is to 22 // instrument those less frequently executed edges to reduce the runtime 23 // overhead of instrumented binaries. 24 // 25 // This file contains two passes: 26 // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge 27 // count profile, and generates the instrumentation for indirect call 28 // profiling. 29 // (2) Pass PGOInstrumentationUse which reads the edge count profile and 30 // annotates the branch weights. It also reads the indirect call value 31 // profiling records and annotate the indirect call instructions. 32 // 33 // To get the precise counter information, These two passes need to invoke at 34 // the same compilation point (so they see the same IR). For pass 35 // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For 36 // pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and 37 // the profile is opened in module level and passed to each PGOUseFunc instance. 38 // The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put 39 // in class FuncPGOInstrumentation. 40 // 41 // Class PGOEdge represents a CFG edge and some auxiliary information. Class 42 // BBInfo contains auxiliary information for each BB. These two classes are used 43 // in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived 44 // class of PGOEdge and BBInfo, respectively. They contains extra data structure 45 // used in populating profile counters. 46 // The MST implementation is in Class CFGMST (CFGMST.h). 47 // 48 //===----------------------------------------------------------------------===// 49 50 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" 51 #include "CFGMST.h" 52 #include "ValueProfileCollector.h" 53 #include "llvm/ADT/APInt.h" 54 #include "llvm/ADT/ArrayRef.h" 55 #include "llvm/ADT/MapVector.h" 56 #include "llvm/ADT/STLExtras.h" 57 #include "llvm/ADT/SmallVector.h" 58 #include "llvm/ADT/Statistic.h" 59 #include "llvm/ADT/StringRef.h" 60 #include "llvm/ADT/Triple.h" 61 #include "llvm/ADT/Twine.h" 62 #include "llvm/ADT/iterator.h" 63 #include "llvm/ADT/iterator_range.h" 64 #include "llvm/Analysis/BlockFrequencyInfo.h" 65 #include "llvm/Analysis/BranchProbabilityInfo.h" 66 #include "llvm/Analysis/CFG.h" 67 #include "llvm/Analysis/EHPersonalities.h" 68 #include "llvm/Analysis/LoopInfo.h" 69 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 70 #include "llvm/Analysis/ProfileSummaryInfo.h" 71 #include "llvm/IR/Attributes.h" 72 #include "llvm/IR/BasicBlock.h" 73 #include "llvm/IR/CFG.h" 74 #include "llvm/IR/Comdat.h" 75 #include "llvm/IR/Constant.h" 76 #include "llvm/IR/Constants.h" 77 #include "llvm/IR/DiagnosticInfo.h" 78 #include "llvm/IR/Dominators.h" 79 #include "llvm/IR/Function.h" 80 #include "llvm/IR/GlobalAlias.h" 81 #include "llvm/IR/GlobalValue.h" 82 #include "llvm/IR/GlobalVariable.h" 83 #include "llvm/IR/IRBuilder.h" 84 #include "llvm/IR/InstVisitor.h" 85 #include "llvm/IR/InstrTypes.h" 86 #include "llvm/IR/Instruction.h" 87 #include "llvm/IR/Instructions.h" 88 #include "llvm/IR/IntrinsicInst.h" 89 #include "llvm/IR/Intrinsics.h" 90 #include "llvm/IR/LLVMContext.h" 91 #include "llvm/IR/MDBuilder.h" 92 #include "llvm/IR/Module.h" 93 #include "llvm/IR/PassManager.h" 94 #include "llvm/IR/ProfileSummary.h" 95 #include "llvm/IR/Type.h" 96 #include "llvm/IR/Value.h" 97 #include "llvm/InitializePasses.h" 98 #include "llvm/Pass.h" 99 #include "llvm/ProfileData/InstrProf.h" 100 #include "llvm/ProfileData/InstrProfReader.h" 101 #include "llvm/Support/BranchProbability.h" 102 #include "llvm/Support/CRC.h" 103 #include "llvm/Support/Casting.h" 104 #include "llvm/Support/CommandLine.h" 105 #include "llvm/Support/DOTGraphTraits.h" 106 #include "llvm/Support/Debug.h" 107 #include "llvm/Support/Error.h" 108 #include "llvm/Support/ErrorHandling.h" 109 #include "llvm/Support/GraphWriter.h" 110 #include "llvm/Support/raw_ostream.h" 111 #include "llvm/Transforms/Instrumentation.h" 112 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 113 #include "llvm/Transforms/Utils/MisExpect.h" 114 #include <algorithm> 115 #include <cassert> 116 #include <cstdint> 117 #include <memory> 118 #include <numeric> 119 #include <string> 120 #include <unordered_map> 121 #include <utility> 122 #include <vector> 123 124 using namespace llvm; 125 using ProfileCount = Function::ProfileCount; 126 using VPCandidateInfo = ValueProfileCollector::CandidateInfo; 127 128 #define DEBUG_TYPE "pgo-instrumentation" 129 130 STATISTIC(NumOfPGOInstrument, "Number of edges instrumented."); 131 STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented."); 132 STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented."); 133 STATISTIC(NumOfPGOEdge, "Number of edges."); 134 STATISTIC(NumOfPGOBB, "Number of basic-blocks."); 135 STATISTIC(NumOfPGOSplit, "Number of critical edge splits."); 136 STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts."); 137 STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile."); 138 STATISTIC(NumOfPGOMissing, "Number of functions without profile."); 139 STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations."); 140 STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO."); 141 STATISTIC(NumOfCSPGOSelectInsts, 142 "Number of select instruction instrumented in CSPGO."); 143 STATISTIC(NumOfCSPGOMemIntrinsics, 144 "Number of mem intrinsics instrumented in CSPGO."); 145 STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO."); 146 STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO."); 147 STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO."); 148 STATISTIC(NumOfCSPGOFunc, 149 "Number of functions having valid profile counts in CSPGO."); 150 STATISTIC(NumOfCSPGOMismatch, 151 "Number of functions having mismatch profile in CSPGO."); 152 STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO."); 153 154 // Command line option to specify the file to read profile from. This is 155 // mainly used for testing. 156 static cl::opt<std::string> 157 PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, 158 cl::value_desc("filename"), 159 cl::desc("Specify the path of profile data file. This is" 160 "mainly for test purpose.")); 161 static cl::opt<std::string> PGOTestProfileRemappingFile( 162 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, 163 cl::value_desc("filename"), 164 cl::desc("Specify the path of profile remapping file. This is mainly for " 165 "test purpose.")); 166 167 // Command line option to disable value profiling. The default is false: 168 // i.e. value profiling is enabled by default. This is for debug purpose. 169 static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false), 170 cl::Hidden, 171 cl::desc("Disable Value Profiling")); 172 173 // Command line option to set the maximum number of VP annotations to write to 174 // the metadata for a single indirect call callsite. 175 static cl::opt<unsigned> MaxNumAnnotations( 176 "icp-max-annotations", cl::init(3), cl::Hidden, cl::ZeroOrMore, 177 cl::desc("Max number of annotations for a single indirect " 178 "call callsite")); 179 180 // Command line option to set the maximum number of value annotations 181 // to write to the metadata for a single memop intrinsic. 182 static cl::opt<unsigned> MaxNumMemOPAnnotations( 183 "memop-max-annotations", cl::init(4), cl::Hidden, cl::ZeroOrMore, 184 cl::desc("Max number of preicise value annotations for a single memop" 185 "intrinsic")); 186 187 // Command line option to control appending FunctionHash to the name of a COMDAT 188 // function. This is to avoid the hash mismatch caused by the preinliner. 189 static cl::opt<bool> DoComdatRenaming( 190 "do-comdat-renaming", cl::init(false), cl::Hidden, 191 cl::desc("Append function hash to the name of COMDAT function to avoid " 192 "function hash mismatch due to the preinliner")); 193 194 // Command line option to enable/disable the warning about missing profile 195 // information. 196 static cl::opt<bool> 197 PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden, 198 cl::desc("Use this option to turn on/off " 199 "warnings about missing profile data for " 200 "functions.")); 201 202 // Command line option to enable/disable the warning about a hash mismatch in 203 // the profile data. 204 static cl::opt<bool> 205 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden, 206 cl::desc("Use this option to turn off/on " 207 "warnings about profile cfg mismatch.")); 208 209 // Command line option to enable/disable the warning about a hash mismatch in 210 // the profile data for Comdat functions, which often turns out to be false 211 // positive due to the pre-instrumentation inline. 212 static cl::opt<bool> 213 NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", cl::init(true), 214 cl::Hidden, 215 cl::desc("The option is used to turn on/off " 216 "warnings about hash mismatch for comdat " 217 "functions.")); 218 219 // Command line option to enable/disable select instruction instrumentation. 220 static cl::opt<bool> 221 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, 222 cl::desc("Use this option to turn on/off SELECT " 223 "instruction instrumentation. ")); 224 225 // Command line option to turn on CFG dot or text dump of raw profile counts 226 static cl::opt<PGOViewCountsType> PGOViewRawCounts( 227 "pgo-view-raw-counts", cl::Hidden, 228 cl::desc("A boolean option to show CFG dag or text " 229 "with raw profile counts from " 230 "profile data. See also option " 231 "-pgo-view-counts. To limit graph " 232 "display to only one function, use " 233 "filtering option -view-bfi-func-name."), 234 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), 235 clEnumValN(PGOVCT_Graph, "graph", "show a graph."), 236 clEnumValN(PGOVCT_Text, "text", "show in text."))); 237 238 // Command line option to enable/disable memop intrinsic call.size profiling. 239 static cl::opt<bool> 240 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, 241 cl::desc("Use this option to turn on/off " 242 "memory intrinsic size profiling.")); 243 244 // Emit branch probability as optimization remarks. 245 static cl::opt<bool> 246 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, 247 cl::desc("When this option is on, the annotated " 248 "branch probability will be emitted as " 249 "optimization remarks: -{Rpass|" 250 "pass-remarks}=pgo-instrumentation")); 251 252 // Command line option to turn on CFG dot dump after profile annotation. 253 // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts 254 extern cl::opt<PGOViewCountsType> PGOViewCounts; 255 256 // Command line option to specify the name of the function for CFG dump 257 // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= 258 extern cl::opt<std::string> ViewBlockFreqFuncName; 259 260 // Return a string describing the branch condition that can be 261 // used in static branch probability heuristics: 262 static std::string getBranchCondString(Instruction *TI) { 263 BranchInst *BI = dyn_cast<BranchInst>(TI); 264 if (!BI || !BI->isConditional()) 265 return std::string(); 266 267 Value *Cond = BI->getCondition(); 268 ICmpInst *CI = dyn_cast<ICmpInst>(Cond); 269 if (!CI) 270 return std::string(); 271 272 std::string result; 273 raw_string_ostream OS(result); 274 OS << CmpInst::getPredicateName(CI->getPredicate()) << "_"; 275 CI->getOperand(0)->getType()->print(OS, true); 276 277 Value *RHS = CI->getOperand(1); 278 ConstantInt *CV = dyn_cast<ConstantInt>(RHS); 279 if (CV) { 280 if (CV->isZero()) 281 OS << "_Zero"; 282 else if (CV->isOne()) 283 OS << "_One"; 284 else if (CV->isMinusOne()) 285 OS << "_MinusOne"; 286 else 287 OS << "_Const"; 288 } 289 OS.flush(); 290 return result; 291 } 292 293 static const char *ValueProfKindDescr[] = { 294 #define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr, 295 #include "llvm/ProfileData/InstrProfData.inc" 296 }; 297 298 namespace { 299 300 /// The select instruction visitor plays three roles specified 301 /// by the mode. In \c VM_counting mode, it simply counts the number of 302 /// select instructions. In \c VM_instrument mode, it inserts code to count 303 /// the number times TrueValue of select is taken. In \c VM_annotate mode, 304 /// it reads the profile data and annotate the select instruction with metadata. 305 enum VisitMode { VM_counting, VM_instrument, VM_annotate }; 306 class PGOUseFunc; 307 308 /// Instruction Visitor class to visit select instructions. 309 struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> { 310 Function &F; 311 unsigned NSIs = 0; // Number of select instructions instrumented. 312 VisitMode Mode = VM_counting; // Visiting mode. 313 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index. 314 unsigned TotalNumCtrs = 0; // Total number of counters 315 GlobalVariable *FuncNameVar = nullptr; 316 uint64_t FuncHash = 0; 317 PGOUseFunc *UseFunc = nullptr; 318 319 SelectInstVisitor(Function &Func) : F(Func) {} 320 321 void countSelects(Function &Func) { 322 NSIs = 0; 323 Mode = VM_counting; 324 visit(Func); 325 } 326 327 // Visit the IR stream and instrument all select instructions. \p 328 // Ind is a pointer to the counter index variable; \p TotalNC 329 // is the total number of counters; \p FNV is the pointer to the 330 // PGO function name var; \p FHash is the function hash. 331 void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC, 332 GlobalVariable *FNV, uint64_t FHash) { 333 Mode = VM_instrument; 334 CurCtrIdx = Ind; 335 TotalNumCtrs = TotalNC; 336 FuncHash = FHash; 337 FuncNameVar = FNV; 338 visit(Func); 339 } 340 341 // Visit the IR stream and annotate all select instructions. 342 void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) { 343 Mode = VM_annotate; 344 UseFunc = UF; 345 CurCtrIdx = Ind; 346 visit(Func); 347 } 348 349 void instrumentOneSelectInst(SelectInst &SI); 350 void annotateOneSelectInst(SelectInst &SI); 351 352 // Visit \p SI instruction and perform tasks according to visit mode. 353 void visitSelectInst(SelectInst &SI); 354 355 // Return the number of select instructions. This needs be called after 356 // countSelects(). 357 unsigned getNumOfSelectInsts() const { return NSIs; } 358 }; 359 360 361 class PGOInstrumentationGenLegacyPass : public ModulePass { 362 public: 363 static char ID; 364 365 PGOInstrumentationGenLegacyPass(bool IsCS = false) 366 : ModulePass(ID), IsCS(IsCS) { 367 initializePGOInstrumentationGenLegacyPassPass( 368 *PassRegistry::getPassRegistry()); 369 } 370 371 StringRef getPassName() const override { return "PGOInstrumentationGenPass"; } 372 373 private: 374 // Is this is context-sensitive instrumentation. 375 bool IsCS; 376 bool runOnModule(Module &M) override; 377 378 void getAnalysisUsage(AnalysisUsage &AU) const override { 379 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 380 AU.addRequired<TargetLibraryInfoWrapperPass>(); 381 } 382 }; 383 384 class PGOInstrumentationUseLegacyPass : public ModulePass { 385 public: 386 static char ID; 387 388 // Provide the profile filename as the parameter. 389 PGOInstrumentationUseLegacyPass(std::string Filename = "", bool IsCS = false) 390 : ModulePass(ID), ProfileFileName(std::move(Filename)), IsCS(IsCS) { 391 if (!PGOTestProfileFile.empty()) 392 ProfileFileName = PGOTestProfileFile; 393 initializePGOInstrumentationUseLegacyPassPass( 394 *PassRegistry::getPassRegistry()); 395 } 396 397 StringRef getPassName() const override { return "PGOInstrumentationUsePass"; } 398 399 private: 400 std::string ProfileFileName; 401 // Is this is context-sensitive instrumentation use. 402 bool IsCS; 403 404 bool runOnModule(Module &M) override; 405 406 void getAnalysisUsage(AnalysisUsage &AU) const override { 407 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 408 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 409 AU.addRequired<TargetLibraryInfoWrapperPass>(); 410 } 411 }; 412 413 class PGOInstrumentationGenCreateVarLegacyPass : public ModulePass { 414 public: 415 static char ID; 416 StringRef getPassName() const override { 417 return "PGOInstrumentationGenCreateVarPass"; 418 } 419 PGOInstrumentationGenCreateVarLegacyPass(std::string CSInstrName = "") 420 : ModulePass(ID), InstrProfileOutput(CSInstrName) { 421 initializePGOInstrumentationGenCreateVarLegacyPassPass( 422 *PassRegistry::getPassRegistry()); 423 } 424 425 private: 426 bool runOnModule(Module &M) override { 427 createProfileFileNameVar(M, InstrProfileOutput); 428 createIRLevelProfileFlagVar(M, true); 429 return false; 430 } 431 std::string InstrProfileOutput; 432 }; 433 434 } // end anonymous namespace 435 436 char PGOInstrumentationGenLegacyPass::ID = 0; 437 438 INITIALIZE_PASS_BEGIN(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", 439 "PGO instrumentation.", false, false) 440 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 441 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 442 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 443 INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", 444 "PGO instrumentation.", false, false) 445 446 ModulePass *llvm::createPGOInstrumentationGenLegacyPass(bool IsCS) { 447 return new PGOInstrumentationGenLegacyPass(IsCS); 448 } 449 450 char PGOInstrumentationUseLegacyPass::ID = 0; 451 452 INITIALIZE_PASS_BEGIN(PGOInstrumentationUseLegacyPass, "pgo-instr-use", 453 "Read PGO instrumentation profile.", false, false) 454 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 455 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 456 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 457 INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use", 458 "Read PGO instrumentation profile.", false, false) 459 460 ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename, 461 bool IsCS) { 462 return new PGOInstrumentationUseLegacyPass(Filename.str(), IsCS); 463 } 464 465 char PGOInstrumentationGenCreateVarLegacyPass::ID = 0; 466 467 INITIALIZE_PASS(PGOInstrumentationGenCreateVarLegacyPass, 468 "pgo-instr-gen-create-var", 469 "Create PGO instrumentation version variable for CSPGO.", false, 470 false) 471 472 ModulePass * 473 llvm::createPGOInstrumentationGenCreateVarLegacyPass(StringRef CSInstrName) { 474 return new PGOInstrumentationGenCreateVarLegacyPass(std::string(CSInstrName)); 475 } 476 477 namespace { 478 479 /// An MST based instrumentation for PGO 480 /// 481 /// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO 482 /// in the function level. 483 struct PGOEdge { 484 // This class implements the CFG edges. Note the CFG can be a multi-graph. 485 // So there might be multiple edges with same SrcBB and DestBB. 486 const BasicBlock *SrcBB; 487 const BasicBlock *DestBB; 488 uint64_t Weight; 489 bool InMST = false; 490 bool Removed = false; 491 bool IsCritical = false; 492 493 PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) 494 : SrcBB(Src), DestBB(Dest), Weight(W) {} 495 496 // Return the information string of an edge. 497 const std::string infoString() const { 498 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") + 499 (IsCritical ? "c" : " ") + " W=" + Twine(Weight)).str(); 500 } 501 }; 502 503 // This class stores the auxiliary information for each BB. 504 struct BBInfo { 505 BBInfo *Group; 506 uint32_t Index; 507 uint32_t Rank = 0; 508 509 BBInfo(unsigned IX) : Group(this), Index(IX) {} 510 511 // Return the information string of this object. 512 const std::string infoString() const { 513 return (Twine("Index=") + Twine(Index)).str(); 514 } 515 516 // Empty function -- only applicable to UseBBInfo. 517 void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} 518 519 // Empty function -- only applicable to UseBBInfo. 520 void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} 521 }; 522 523 // This class implements the CFG edges. Note the CFG can be a multi-graph. 524 template <class Edge, class BBInfo> class FuncPGOInstrumentation { 525 private: 526 Function &F; 527 528 // Is this is context-sensitive instrumentation. 529 bool IsCS; 530 531 // A map that stores the Comdat group in function F. 532 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers; 533 534 ValueProfileCollector VPC; 535 536 void computeCFGHash(); 537 void renameComdatFunction(); 538 539 public: 540 std::vector<std::vector<VPCandidateInfo>> ValueSites; 541 SelectInstVisitor SIVisitor; 542 std::string FuncName; 543 GlobalVariable *FuncNameVar; 544 545 // CFG hash value for this function. 546 uint64_t FunctionHash = 0; 547 548 // The Minimum Spanning Tree of function CFG. 549 CFGMST<Edge, BBInfo> MST; 550 551 // Collect all the BBs that will be instrumented, and store them in 552 // InstrumentBBs. 553 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs); 554 555 // Give an edge, find the BB that will be instrumented. 556 // Return nullptr if there is no BB to be instrumented. 557 BasicBlock *getInstrBB(Edge *E); 558 559 // Return the auxiliary BB information. 560 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); } 561 562 // Return the auxiliary BB information if available. 563 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); } 564 565 // Dump edges and BB information. 566 void dumpInfo(std::string Str = "") const { 567 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " + 568 Twine(FunctionHash) + "\t" + Str); 569 } 570 571 FuncPGOInstrumentation( 572 Function &Func, TargetLibraryInfo &TLI, 573 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 574 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, 575 BlockFrequencyInfo *BFI = nullptr, bool IsCS = false) 576 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI), 577 ValueSites(IPVK_Last + 1), SIVisitor(Func), MST(F, BPI, BFI) { 578 // This should be done before CFG hash computation. 579 SIVisitor.countSelects(Func); 580 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize); 581 if (!IsCS) { 582 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 583 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 584 NumOfPGOBB += MST.BBInfos.size(); 585 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget); 586 } else { 587 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 588 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 589 NumOfCSPGOBB += MST.BBInfos.size(); 590 } 591 592 FuncName = getPGOFuncName(F); 593 computeCFGHash(); 594 if (!ComdatMembers.empty()) 595 renameComdatFunction(); 596 LLVM_DEBUG(dumpInfo("after CFGMST")); 597 598 for (auto &E : MST.AllEdges) { 599 if (E->Removed) 600 continue; 601 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++; 602 if (!E->InMST) 603 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++; 604 } 605 606 if (CreateGlobalVar) 607 FuncNameVar = createPGOFuncNameVar(F, FuncName); 608 } 609 }; 610 611 } // end anonymous namespace 612 613 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index 614 // value of each BB in the CFG. The higher 32 bits record the number of edges. 615 template <class Edge, class BBInfo> 616 void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { 617 std::vector<uint8_t> Indexes; 618 JamCRC JC; 619 for (auto &BB : F) { 620 const Instruction *TI = BB.getTerminator(); 621 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { 622 BasicBlock *Succ = TI->getSuccessor(I); 623 auto BI = findBBInfo(Succ); 624 if (BI == nullptr) 625 continue; 626 uint32_t Index = BI->Index; 627 for (int J = 0; J < 4; J++) 628 Indexes.push_back((uint8_t)(Index >> (J * 8))); 629 } 630 } 631 JC.update(Indexes); 632 633 // Hash format for context sensitive profile. Reserve 4 bits for other 634 // information. 635 FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 | 636 (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 | 637 //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 | 638 (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC(); 639 // Reserve bit 60-63 for other information purpose. 640 FunctionHash &= 0x0FFFFFFFFFFFFFFF; 641 if (IsCS) 642 NamedInstrProfRecord::setCSFlagInHash(FunctionHash); 643 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n" 644 << " CRC = " << JC.getCRC() 645 << ", Selects = " << SIVisitor.getNumOfSelectInsts() 646 << ", Edges = " << MST.AllEdges.size() << ", ICSites = " 647 << ValueSites[IPVK_IndirectCallTarget].size() 648 << ", Hash = " << FunctionHash << "\n";); 649 } 650 651 // Check if we can safely rename this Comdat function. 652 static bool canRenameComdat( 653 Function &F, 654 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 655 if (!DoComdatRenaming || !canRenameComdatFunc(F, true)) 656 return false; 657 658 // FIXME: Current only handle those Comdat groups that only containing one 659 // function and function aliases. 660 // (1) For a Comdat group containing multiple functions, we need to have a 661 // unique postfix based on the hashes for each function. There is a 662 // non-trivial code refactoring to do this efficiently. 663 // (2) Variables can not be renamed, so we can not rename Comdat function in a 664 // group including global vars. 665 Comdat *C = F.getComdat(); 666 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) { 667 if (dyn_cast<GlobalAlias>(CM.second)) 668 continue; 669 Function *FM = dyn_cast<Function>(CM.second); 670 if (FM != &F) 671 return false; 672 } 673 return true; 674 } 675 676 // Append the CFGHash to the Comdat function name. 677 template <class Edge, class BBInfo> 678 void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() { 679 if (!canRenameComdat(F, ComdatMembers)) 680 return; 681 std::string OrigName = F.getName().str(); 682 std::string NewFuncName = 683 Twine(F.getName() + "." + Twine(FunctionHash)).str(); 684 F.setName(Twine(NewFuncName)); 685 GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F); 686 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str(); 687 Comdat *NewComdat; 688 Module *M = F.getParent(); 689 // For AvailableExternallyLinkage functions, change the linkage to 690 // LinkOnceODR and put them into comdat. This is because after renaming, there 691 // is no backup external copy available for the function. 692 if (!F.hasComdat()) { 693 assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage); 694 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName)); 695 F.setLinkage(GlobalValue::LinkOnceODRLinkage); 696 F.setComdat(NewComdat); 697 return; 698 } 699 700 // This function belongs to a single function Comdat group. 701 Comdat *OrigComdat = F.getComdat(); 702 std::string NewComdatName = 703 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str(); 704 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName)); 705 NewComdat->setSelectionKind(OrigComdat->getSelectionKind()); 706 707 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) { 708 if (GlobalAlias *GA = dyn_cast<GlobalAlias>(CM.second)) { 709 // For aliases, change the name directly. 710 assert(dyn_cast<Function>(GA->getAliasee()->stripPointerCasts()) == &F); 711 std::string OrigGAName = GA->getName().str(); 712 GA->setName(Twine(GA->getName() + "." + Twine(FunctionHash))); 713 GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigGAName, GA); 714 continue; 715 } 716 // Must be a function. 717 Function *CF = dyn_cast<Function>(CM.second); 718 assert(CF); 719 CF->setComdat(NewComdat); 720 } 721 } 722 723 // Collect all the BBs that will be instruments and return them in 724 // InstrumentBBs and setup InEdges/OutEdge for UseBBInfo. 725 template <class Edge, class BBInfo> 726 void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs( 727 std::vector<BasicBlock *> &InstrumentBBs) { 728 // Use a worklist as we will update the vector during the iteration. 729 std::vector<Edge *> EdgeList; 730 EdgeList.reserve(MST.AllEdges.size()); 731 for (auto &E : MST.AllEdges) 732 EdgeList.push_back(E.get()); 733 734 for (auto &E : EdgeList) { 735 BasicBlock *InstrBB = getInstrBB(E); 736 if (InstrBB) 737 InstrumentBBs.push_back(InstrBB); 738 } 739 740 // Set up InEdges/OutEdges for all BBs. 741 for (auto &E : MST.AllEdges) { 742 if (E->Removed) 743 continue; 744 const BasicBlock *SrcBB = E->SrcBB; 745 const BasicBlock *DestBB = E->DestBB; 746 BBInfo &SrcInfo = getBBInfo(SrcBB); 747 BBInfo &DestInfo = getBBInfo(DestBB); 748 SrcInfo.addOutEdge(E.get()); 749 DestInfo.addInEdge(E.get()); 750 } 751 } 752 753 // Given a CFG E to be instrumented, find which BB to place the instrumented 754 // code. The function will split the critical edge if necessary. 755 template <class Edge, class BBInfo> 756 BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) { 757 if (E->InMST || E->Removed) 758 return nullptr; 759 760 BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB); 761 BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB); 762 // For a fake edge, instrument the real BB. 763 if (SrcBB == nullptr) 764 return DestBB; 765 if (DestBB == nullptr) 766 return SrcBB; 767 768 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * { 769 // There are basic blocks (such as catchswitch) cannot be instrumented. 770 // If the returned first insertion point is the end of BB, skip this BB. 771 if (BB->getFirstInsertionPt() == BB->end()) 772 return nullptr; 773 return BB; 774 }; 775 776 // Instrument the SrcBB if it has a single successor, 777 // otherwise, the DestBB if this is not a critical edge. 778 Instruction *TI = SrcBB->getTerminator(); 779 if (TI->getNumSuccessors() <= 1) 780 return canInstrument(SrcBB); 781 if (!E->IsCritical) 782 return canInstrument(DestBB); 783 784 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 785 BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum); 786 if (!InstrBB) { 787 LLVM_DEBUG( 788 dbgs() << "Fail to split critical edge: not instrument this edge.\n"); 789 return nullptr; 790 } 791 // For a critical edge, we have to split. Instrument the newly 792 // created BB. 793 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++; 794 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index 795 << " --> " << getBBInfo(DestBB).Index << "\n"); 796 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB. 797 MST.addEdge(SrcBB, InstrBB, 0); 798 // Second one: Add new edge of InstrBB->DestBB. 799 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0); 800 NewEdge1.InMST = true; 801 E->Removed = true; 802 803 return canInstrument(InstrBB); 804 } 805 806 // When generating value profiling calls on Windows routines that make use of 807 // handler funclets for exception processing an operand bundle needs to attached 808 // to the called function. This routine will set \p OpBundles to contain the 809 // funclet information, if any is needed, that should be placed on the generated 810 // value profiling call for the value profile candidate call. 811 static void 812 populateEHOperandBundle(VPCandidateInfo &Cand, 813 DenseMap<BasicBlock *, ColorVector> &BlockColors, 814 SmallVectorImpl<OperandBundleDef> &OpBundles) { 815 auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst); 816 if (OrigCall && !isa<IntrinsicInst>(OrigCall)) { 817 // The instrumentation call should belong to the same funclet as a 818 // non-intrinsic call, so just copy the operand bundle, if any exists. 819 Optional<OperandBundleUse> ParentFunclet = 820 OrigCall->getOperandBundle(LLVMContext::OB_funclet); 821 if (ParentFunclet) 822 OpBundles.emplace_back(OperandBundleDef(*ParentFunclet)); 823 } else { 824 // Intrinsics or other instructions do not get funclet information from the 825 // front-end. Need to use the BlockColors that was computed by the routine 826 // colorEHFunclets to determine whether a funclet is needed. 827 if (!BlockColors.empty()) { 828 const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second; 829 assert(CV.size() == 1 && "non-unique color for block!"); 830 Instruction *EHPad = CV.front()->getFirstNonPHI(); 831 if (EHPad->isEHPad()) 832 OpBundles.emplace_back("funclet", EHPad); 833 } 834 } 835 } 836 837 // Visit all edge and instrument the edges not in MST, and do value profiling. 838 // Critical edges will be split. 839 static void instrumentOneFunc( 840 Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI, 841 BlockFrequencyInfo *BFI, 842 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 843 bool IsCS) { 844 // Split indirectbr critical edges here before computing the MST rather than 845 // later in getInstrBB() to avoid invalidating it. 846 SplitIndirectBrCriticalEdges(F, BPI, BFI); 847 848 FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(F, TLI, ComdatMembers, true, 849 BPI, BFI, IsCS); 850 std::vector<BasicBlock *> InstrumentBBs; 851 FuncInfo.getInstrumentBBs(InstrumentBBs); 852 unsigned NumCounters = 853 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 854 855 uint32_t I = 0; 856 Type *I8PtrTy = Type::getInt8PtrTy(M->getContext()); 857 for (auto *InstrBB : InstrumentBBs) { 858 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt()); 859 assert(Builder.GetInsertPoint() != InstrBB->end() && 860 "Cannot get the Instrumentation point"); 861 Builder.CreateCall( 862 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment), 863 {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), 864 Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters), 865 Builder.getInt32(I++)}); 866 } 867 868 // Now instrument select instructions: 869 FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar, 870 FuncInfo.FunctionHash); 871 assert(I == NumCounters); 872 873 if (DisableValueProfiling) 874 return; 875 876 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size(); 877 878 // Intrinsic function calls do not have funclet operand bundles needed for 879 // Windows exception handling attached to them. However, if value profiling is 880 // inserted for one of these calls, then a funclet value will need to be set 881 // on the instrumentation call based on the funclet coloring. 882 DenseMap<BasicBlock *, ColorVector> BlockColors; 883 if (F.hasPersonalityFn() && 884 isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) 885 BlockColors = colorEHFunclets(F); 886 887 // For each VP Kind, walk the VP candidates and instrument each one. 888 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) { 889 unsigned SiteIndex = 0; 890 if (Kind == IPVK_MemOPSize && !PGOInstrMemOP) 891 continue; 892 893 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) { 894 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind] 895 << " site: CallSite Index = " << SiteIndex << "\n"); 896 897 IRBuilder<> Builder(Cand.InsertPt); 898 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() && 899 "Cannot get the Instrumentation point"); 900 901 Value *ToProfile = nullptr; 902 if (Cand.V->getType()->isIntegerTy()) 903 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty()); 904 else if (Cand.V->getType()->isPointerTy()) 905 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty()); 906 assert(ToProfile && "value profiling Value is of unexpected type"); 907 908 SmallVector<OperandBundleDef, 1> OpBundles; 909 populateEHOperandBundle(Cand, BlockColors, OpBundles); 910 Builder.CreateCall( 911 Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), 912 {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), 913 Builder.getInt64(FuncInfo.FunctionHash), ToProfile, 914 Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)}, 915 OpBundles); 916 } 917 } // IPVK_First <= Kind <= IPVK_Last 918 } 919 920 namespace { 921 922 // This class represents a CFG edge in profile use compilation. 923 struct PGOUseEdge : public PGOEdge { 924 bool CountValid = false; 925 uint64_t CountValue = 0; 926 927 PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) 928 : PGOEdge(Src, Dest, W) {} 929 930 // Set edge count value 931 void setEdgeCount(uint64_t Value) { 932 CountValue = Value; 933 CountValid = true; 934 } 935 936 // Return the information string for this object. 937 const std::string infoString() const { 938 if (!CountValid) 939 return PGOEdge::infoString(); 940 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue)) 941 .str(); 942 } 943 }; 944 945 using DirectEdges = SmallVector<PGOUseEdge *, 2>; 946 947 // This class stores the auxiliary information for each BB. 948 struct UseBBInfo : public BBInfo { 949 uint64_t CountValue = 0; 950 bool CountValid; 951 int32_t UnknownCountInEdge = 0; 952 int32_t UnknownCountOutEdge = 0; 953 DirectEdges InEdges; 954 DirectEdges OutEdges; 955 956 UseBBInfo(unsigned IX) : BBInfo(IX), CountValid(false) {} 957 958 UseBBInfo(unsigned IX, uint64_t C) 959 : BBInfo(IX), CountValue(C), CountValid(true) {} 960 961 // Set the profile count value for this BB. 962 void setBBInfoCount(uint64_t Value) { 963 CountValue = Value; 964 CountValid = true; 965 } 966 967 // Return the information string of this object. 968 const std::string infoString() const { 969 if (!CountValid) 970 return BBInfo::infoString(); 971 return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str(); 972 } 973 974 // Add an OutEdge and update the edge count. 975 void addOutEdge(PGOUseEdge *E) { 976 OutEdges.push_back(E); 977 UnknownCountOutEdge++; 978 } 979 980 // Add an InEdge and update the edge count. 981 void addInEdge(PGOUseEdge *E) { 982 InEdges.push_back(E); 983 UnknownCountInEdge++; 984 } 985 }; 986 987 } // end anonymous namespace 988 989 // Sum up the count values for all the edges. 990 static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) { 991 uint64_t Total = 0; 992 for (auto &E : Edges) { 993 if (E->Removed) 994 continue; 995 Total += E->CountValue; 996 } 997 return Total; 998 } 999 1000 namespace { 1001 1002 class PGOUseFunc { 1003 public: 1004 PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI, 1005 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 1006 BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin, 1007 ProfileSummaryInfo *PSI, bool IsCS) 1008 : F(Func), M(Modu), BFI(BFIin), PSI(PSI), 1009 FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS), 1010 FreqAttr(FFA_Normal), IsCS(IsCS) {} 1011 1012 // Read counts for the instrumented BB from profile. 1013 bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros); 1014 1015 // Populate the counts for all BBs. 1016 void populateCounters(); 1017 1018 // Set the branch weights based on the count values. 1019 void setBranchWeights(); 1020 1021 // Annotate the value profile call sites for all value kind. 1022 void annotateValueSites(); 1023 1024 // Annotate the value profile call sites for one value kind. 1025 void annotateValueSites(uint32_t Kind); 1026 1027 // Annotate the irreducible loop header weights. 1028 void annotateIrrLoopHeaderWeights(); 1029 1030 // The hotness of the function from the profile count. 1031 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot }; 1032 1033 // Return the function hotness from the profile. 1034 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; } 1035 1036 // Return the function hash. 1037 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; } 1038 1039 // Return the profile record for this function; 1040 InstrProfRecord &getProfileRecord() { return ProfileRecord; } 1041 1042 // Return the auxiliary BB information. 1043 UseBBInfo &getBBInfo(const BasicBlock *BB) const { 1044 return FuncInfo.getBBInfo(BB); 1045 } 1046 1047 // Return the auxiliary BB information if available. 1048 UseBBInfo *findBBInfo(const BasicBlock *BB) const { 1049 return FuncInfo.findBBInfo(BB); 1050 } 1051 1052 Function &getFunc() const { return F; } 1053 1054 void dumpInfo(std::string Str = "") const { 1055 FuncInfo.dumpInfo(Str); 1056 } 1057 1058 uint64_t getProgramMaxCount() const { return ProgramMaxCount; } 1059 private: 1060 Function &F; 1061 Module *M; 1062 BlockFrequencyInfo *BFI; 1063 ProfileSummaryInfo *PSI; 1064 1065 // This member stores the shared information with class PGOGenFunc. 1066 FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo; 1067 1068 // The maximum count value in the profile. This is only used in PGO use 1069 // compilation. 1070 uint64_t ProgramMaxCount; 1071 1072 // Position of counter that remains to be read. 1073 uint32_t CountPosition = 0; 1074 1075 // Total size of the profile count for this function. 1076 uint32_t ProfileCountSize = 0; 1077 1078 // ProfileRecord for this function. 1079 InstrProfRecord ProfileRecord; 1080 1081 // Function hotness info derived from profile. 1082 FuncFreqAttr FreqAttr; 1083 1084 // Is to use the context sensitive profile. 1085 bool IsCS; 1086 1087 // Find the Instrumented BB and set the value. Return false on error. 1088 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile); 1089 1090 // Set the edge counter value for the unknown edge -- there should be only 1091 // one unknown edge. 1092 void setEdgeCount(DirectEdges &Edges, uint64_t Value); 1093 1094 // Return FuncName string; 1095 const std::string getFuncName() const { return FuncInfo.FuncName; } 1096 1097 // Set the hot/cold inline hints based on the count values. 1098 // FIXME: This function should be removed once the functionality in 1099 // the inliner is implemented. 1100 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) { 1101 if (PSI->isHotCount(EntryCount)) 1102 FreqAttr = FFA_Hot; 1103 else if (PSI->isColdCount(MaxCount)) 1104 FreqAttr = FFA_Cold; 1105 } 1106 }; 1107 1108 } // end anonymous namespace 1109 1110 // Visit all the edges and assign the count value for the instrumented 1111 // edges and the BB. Return false on error. 1112 bool PGOUseFunc::setInstrumentedCounts( 1113 const std::vector<uint64_t> &CountFromProfile) { 1114 1115 std::vector<BasicBlock *> InstrumentBBs; 1116 FuncInfo.getInstrumentBBs(InstrumentBBs); 1117 unsigned NumCounters = 1118 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 1119 // The number of counters here should match the number of counters 1120 // in profile. Return if they mismatch. 1121 if (NumCounters != CountFromProfile.size()) { 1122 return false; 1123 } 1124 // Set the profile count to the Instrumented BBs. 1125 uint32_t I = 0; 1126 for (BasicBlock *InstrBB : InstrumentBBs) { 1127 uint64_t CountValue = CountFromProfile[I++]; 1128 UseBBInfo &Info = getBBInfo(InstrBB); 1129 Info.setBBInfoCount(CountValue); 1130 } 1131 ProfileCountSize = CountFromProfile.size(); 1132 CountPosition = I; 1133 1134 // Set the edge count and update the count of unknown edges for BBs. 1135 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void { 1136 E->setEdgeCount(Value); 1137 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1138 this->getBBInfo(E->DestBB).UnknownCountInEdge--; 1139 }; 1140 1141 // Set the profile count the Instrumented edges. There are BBs that not in 1142 // MST but not instrumented. Need to set the edge count value so that we can 1143 // populate the profile counts later. 1144 for (auto &E : FuncInfo.MST.AllEdges) { 1145 if (E->Removed || E->InMST) 1146 continue; 1147 const BasicBlock *SrcBB = E->SrcBB; 1148 UseBBInfo &SrcInfo = getBBInfo(SrcBB); 1149 1150 // If only one out-edge, the edge profile count should be the same as BB 1151 // profile count. 1152 if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1) 1153 setEdgeCount(E.get(), SrcInfo.CountValue); 1154 else { 1155 const BasicBlock *DestBB = E->DestBB; 1156 UseBBInfo &DestInfo = getBBInfo(DestBB); 1157 // If only one in-edge, the edge profile count should be the same as BB 1158 // profile count. 1159 if (DestInfo.CountValid && DestInfo.InEdges.size() == 1) 1160 setEdgeCount(E.get(), DestInfo.CountValue); 1161 } 1162 if (E->CountValid) 1163 continue; 1164 // E's count should have been set from profile. If not, this meenas E skips 1165 // the instrumentation. We set the count to 0. 1166 setEdgeCount(E.get(), 0); 1167 } 1168 return true; 1169 } 1170 1171 // Set the count value for the unknown edge. There should be one and only one 1172 // unknown edge in Edges vector. 1173 void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) { 1174 for (auto &E : Edges) { 1175 if (E->CountValid) 1176 continue; 1177 E->setEdgeCount(Value); 1178 1179 getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1180 getBBInfo(E->DestBB).UnknownCountInEdge--; 1181 return; 1182 } 1183 llvm_unreachable("Cannot find the unknown count edge"); 1184 } 1185 1186 // Read the profile from ProfileFileName and assign the value to the 1187 // instrumented BB and the edges. This function also updates ProgramMaxCount. 1188 // Return true if the profile are successfully read, and false on errors. 1189 bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros) { 1190 auto &Ctx = M->getContext(); 1191 Expected<InstrProfRecord> Result = 1192 PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash); 1193 if (Error E = Result.takeError()) { 1194 handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { 1195 auto Err = IPE.get(); 1196 bool SkipWarning = false; 1197 LLVM_DEBUG(dbgs() << "Error in reading profile for Func " 1198 << FuncInfo.FuncName << ": "); 1199 if (Err == instrprof_error::unknown_function) { 1200 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++; 1201 SkipWarning = !PGOWarnMissing; 1202 LLVM_DEBUG(dbgs() << "unknown function"); 1203 } else if (Err == instrprof_error::hash_mismatch || 1204 Err == instrprof_error::malformed) { 1205 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++; 1206 SkipWarning = 1207 NoPGOWarnMismatch || 1208 (NoPGOWarnMismatchComdat && 1209 (F.hasComdat() || 1210 F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); 1211 LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); 1212 } 1213 1214 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n"); 1215 if (SkipWarning) 1216 return; 1217 1218 std::string Msg = IPE.message() + std::string(" ") + F.getName().str() + 1219 std::string(" Hash = ") + 1220 std::to_string(FuncInfo.FunctionHash); 1221 1222 Ctx.diagnose( 1223 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); 1224 }); 1225 return false; 1226 } 1227 ProfileRecord = std::move(Result.get()); 1228 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts; 1229 1230 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++; 1231 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n"); 1232 uint64_t ValueSum = 0; 1233 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { 1234 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n"); 1235 ValueSum += CountFromProfile[I]; 1236 } 1237 AllZeros = (ValueSum == 0); 1238 1239 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n"); 1240 1241 getBBInfo(nullptr).UnknownCountOutEdge = 2; 1242 getBBInfo(nullptr).UnknownCountInEdge = 2; 1243 1244 if (!setInstrumentedCounts(CountFromProfile)) { 1245 LLVM_DEBUG( 1246 dbgs() << "Inconsistent number of counts, skipping this function"); 1247 Ctx.diagnose(DiagnosticInfoPGOProfile( 1248 M->getName().data(), 1249 Twine("Inconsistent number of counts in ") + F.getName().str() 1250 + Twine(": the profile may be stale or there is a function name collision."), 1251 DS_Warning)); 1252 return false; 1253 } 1254 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS); 1255 return true; 1256 } 1257 1258 // Populate the counters from instrumented BBs to all BBs. 1259 // In the end of this operation, all BBs should have a valid count value. 1260 void PGOUseFunc::populateCounters() { 1261 bool Changes = true; 1262 unsigned NumPasses = 0; 1263 while (Changes) { 1264 NumPasses++; 1265 Changes = false; 1266 1267 // For efficient traversal, it's better to start from the end as most 1268 // of the instrumented edges are at the end. 1269 for (auto &BB : reverse(F)) { 1270 UseBBInfo *Count = findBBInfo(&BB); 1271 if (Count == nullptr) 1272 continue; 1273 if (!Count->CountValid) { 1274 if (Count->UnknownCountOutEdge == 0) { 1275 Count->CountValue = sumEdgeCount(Count->OutEdges); 1276 Count->CountValid = true; 1277 Changes = true; 1278 } else if (Count->UnknownCountInEdge == 0) { 1279 Count->CountValue = sumEdgeCount(Count->InEdges); 1280 Count->CountValid = true; 1281 Changes = true; 1282 } 1283 } 1284 if (Count->CountValid) { 1285 if (Count->UnknownCountOutEdge == 1) { 1286 uint64_t Total = 0; 1287 uint64_t OutSum = sumEdgeCount(Count->OutEdges); 1288 // If the one of the successor block can early terminate (no-return), 1289 // we can end up with situation where out edge sum count is larger as 1290 // the source BB's count is collected by a post-dominated block. 1291 if (Count->CountValue > OutSum) 1292 Total = Count->CountValue - OutSum; 1293 setEdgeCount(Count->OutEdges, Total); 1294 Changes = true; 1295 } 1296 if (Count->UnknownCountInEdge == 1) { 1297 uint64_t Total = 0; 1298 uint64_t InSum = sumEdgeCount(Count->InEdges); 1299 if (Count->CountValue > InSum) 1300 Total = Count->CountValue - InSum; 1301 setEdgeCount(Count->InEdges, Total); 1302 Changes = true; 1303 } 1304 } 1305 } 1306 } 1307 1308 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n"); 1309 #ifndef NDEBUG 1310 // Assert every BB has a valid counter. 1311 for (auto &BB : F) { 1312 auto BI = findBBInfo(&BB); 1313 if (BI == nullptr) 1314 continue; 1315 assert(BI->CountValid && "BB count is not valid"); 1316 } 1317 #endif 1318 uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue; 1319 F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real)); 1320 uint64_t FuncMaxCount = FuncEntryCount; 1321 for (auto &BB : F) { 1322 auto BI = findBBInfo(&BB); 1323 if (BI == nullptr) 1324 continue; 1325 FuncMaxCount = std::max(FuncMaxCount, BI->CountValue); 1326 } 1327 markFunctionAttributes(FuncEntryCount, FuncMaxCount); 1328 1329 // Now annotate select instructions 1330 FuncInfo.SIVisitor.annotateSelects(F, this, &CountPosition); 1331 assert(CountPosition == ProfileCountSize); 1332 1333 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile.")); 1334 } 1335 1336 // Assign the scaled count values to the BB with multiple out edges. 1337 void PGOUseFunc::setBranchWeights() { 1338 // Generate MD_prof metadata for every branch instruction. 1339 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName() 1340 << " IsCS=" << IsCS << "\n"); 1341 for (auto &BB : F) { 1342 Instruction *TI = BB.getTerminator(); 1343 if (TI->getNumSuccessors() < 2) 1344 continue; 1345 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || 1346 isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI))) 1347 continue; 1348 1349 if (getBBInfo(&BB).CountValue == 0) 1350 continue; 1351 1352 // We have a non-zero Branch BB. 1353 const UseBBInfo &BBCountInfo = getBBInfo(&BB); 1354 unsigned Size = BBCountInfo.OutEdges.size(); 1355 SmallVector<uint64_t, 2> EdgeCounts(Size, 0); 1356 uint64_t MaxCount = 0; 1357 for (unsigned s = 0; s < Size; s++) { 1358 const PGOUseEdge *E = BBCountInfo.OutEdges[s]; 1359 const BasicBlock *SrcBB = E->SrcBB; 1360 const BasicBlock *DestBB = E->DestBB; 1361 if (DestBB == nullptr) 1362 continue; 1363 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 1364 uint64_t EdgeCount = E->CountValue; 1365 if (EdgeCount > MaxCount) 1366 MaxCount = EdgeCount; 1367 EdgeCounts[SuccNum] = EdgeCount; 1368 } 1369 setProfMetadata(M, TI, EdgeCounts, MaxCount); 1370 } 1371 } 1372 1373 static bool isIndirectBrTarget(BasicBlock *BB) { 1374 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { 1375 if (isa<IndirectBrInst>((*PI)->getTerminator())) 1376 return true; 1377 } 1378 return false; 1379 } 1380 1381 void PGOUseFunc::annotateIrrLoopHeaderWeights() { 1382 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n"); 1383 // Find irr loop headers 1384 for (auto &BB : F) { 1385 // As a heuristic also annotate indrectbr targets as they have a high chance 1386 // to become an irreducible loop header after the indirectbr tail 1387 // duplication. 1388 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) { 1389 Instruction *TI = BB.getTerminator(); 1390 const UseBBInfo &BBCountInfo = getBBInfo(&BB); 1391 setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue); 1392 } 1393 } 1394 } 1395 1396 void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) { 1397 Module *M = F.getParent(); 1398 IRBuilder<> Builder(&SI); 1399 Type *Int64Ty = Builder.getInt64Ty(); 1400 Type *I8PtrTy = Builder.getInt8PtrTy(); 1401 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty); 1402 Builder.CreateCall( 1403 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step), 1404 {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), 1405 Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs), 1406 Builder.getInt32(*CurCtrIdx), Step}); 1407 ++(*CurCtrIdx); 1408 } 1409 1410 void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) { 1411 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts; 1412 assert(*CurCtrIdx < CountFromProfile.size() && 1413 "Out of bound access of counters"); 1414 uint64_t SCounts[2]; 1415 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count 1416 ++(*CurCtrIdx); 1417 uint64_t TotalCount = 0; 1418 auto BI = UseFunc->findBBInfo(SI.getParent()); 1419 if (BI != nullptr) 1420 TotalCount = BI->CountValue; 1421 // False Count 1422 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0); 1423 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]); 1424 if (MaxCount) 1425 setProfMetadata(F.getParent(), &SI, SCounts, MaxCount); 1426 } 1427 1428 void SelectInstVisitor::visitSelectInst(SelectInst &SI) { 1429 if (!PGOInstrSelect) 1430 return; 1431 // FIXME: do not handle this yet. 1432 if (SI.getCondition()->getType()->isVectorTy()) 1433 return; 1434 1435 switch (Mode) { 1436 case VM_counting: 1437 NSIs++; 1438 return; 1439 case VM_instrument: 1440 instrumentOneSelectInst(SI); 1441 return; 1442 case VM_annotate: 1443 annotateOneSelectInst(SI); 1444 return; 1445 } 1446 1447 llvm_unreachable("Unknown visiting mode"); 1448 } 1449 1450 // Traverse all valuesites and annotate the instructions for all value kind. 1451 void PGOUseFunc::annotateValueSites() { 1452 if (DisableValueProfiling) 1453 return; 1454 1455 // Create the PGOFuncName meta data. 1456 createPGOFuncNameMetadata(F, FuncInfo.FuncName); 1457 1458 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 1459 annotateValueSites(Kind); 1460 } 1461 1462 // Annotate the instructions for a specific value kind. 1463 void PGOUseFunc::annotateValueSites(uint32_t Kind) { 1464 assert(Kind <= IPVK_Last); 1465 unsigned ValueSiteIndex = 0; 1466 auto &ValueSites = FuncInfo.ValueSites[Kind]; 1467 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind); 1468 if (NumValueSites != ValueSites.size()) { 1469 auto &Ctx = M->getContext(); 1470 Ctx.diagnose(DiagnosticInfoPGOProfile( 1471 M->getName().data(), 1472 Twine("Inconsistent number of value sites for ") + 1473 Twine(ValueProfKindDescr[Kind]) + 1474 Twine(" profiling in \"") + F.getName().str() + 1475 Twine("\", possibly due to the use of a stale profile."), 1476 DS_Warning)); 1477 return; 1478 } 1479 1480 for (VPCandidateInfo &I : ValueSites) { 1481 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind 1482 << "): Index = " << ValueSiteIndex << " out of " 1483 << NumValueSites << "\n"); 1484 annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord, 1485 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex, 1486 Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations 1487 : MaxNumAnnotations); 1488 ValueSiteIndex++; 1489 } 1490 } 1491 1492 // Collect the set of members for each Comdat in module M and store 1493 // in ComdatMembers. 1494 static void collectComdatMembers( 1495 Module &M, 1496 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 1497 if (!DoComdatRenaming) 1498 return; 1499 for (Function &F : M) 1500 if (Comdat *C = F.getComdat()) 1501 ComdatMembers.insert(std::make_pair(C, &F)); 1502 for (GlobalVariable &GV : M.globals()) 1503 if (Comdat *C = GV.getComdat()) 1504 ComdatMembers.insert(std::make_pair(C, &GV)); 1505 for (GlobalAlias &GA : M.aliases()) 1506 if (Comdat *C = GA.getComdat()) 1507 ComdatMembers.insert(std::make_pair(C, &GA)); 1508 } 1509 1510 static bool InstrumentAllFunctions( 1511 Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI, 1512 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1513 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) { 1514 // For the context-sensitve instrumentation, we should have a separated pass 1515 // (before LTO/ThinLTO linking) to create these variables. 1516 if (!IsCS) 1517 createIRLevelProfileFlagVar(M, /* IsCS */ false); 1518 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1519 collectComdatMembers(M, ComdatMembers); 1520 1521 for (auto &F : M) { 1522 if (F.isDeclaration()) 1523 continue; 1524 auto &TLI = LookupTLI(F); 1525 auto *BPI = LookupBPI(F); 1526 auto *BFI = LookupBFI(F); 1527 instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS); 1528 } 1529 return true; 1530 } 1531 1532 PreservedAnalyses 1533 PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) { 1534 createProfileFileNameVar(M, CSInstrName); 1535 createIRLevelProfileFlagVar(M, /* IsCS */ true); 1536 return PreservedAnalyses::all(); 1537 } 1538 1539 bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) { 1540 if (skipModule(M)) 1541 return false; 1542 1543 auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & { 1544 return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 1545 }; 1546 auto LookupBPI = [this](Function &F) { 1547 return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); 1548 }; 1549 auto LookupBFI = [this](Function &F) { 1550 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); 1551 }; 1552 return InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS); 1553 } 1554 1555 PreservedAnalyses PGOInstrumentationGen::run(Module &M, 1556 ModuleAnalysisManager &AM) { 1557 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1558 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 1559 return FAM.getResult<TargetLibraryAnalysis>(F); 1560 }; 1561 auto LookupBPI = [&FAM](Function &F) { 1562 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1563 }; 1564 auto LookupBFI = [&FAM](Function &F) { 1565 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1566 }; 1567 1568 if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS)) 1569 return PreservedAnalyses::all(); 1570 1571 return PreservedAnalyses::none(); 1572 } 1573 1574 static bool annotateAllFunctions( 1575 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, 1576 function_ref<TargetLibraryInfo &(Function &)> LookupTLI, 1577 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1578 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, 1579 ProfileSummaryInfo *PSI, bool IsCS) { 1580 LLVM_DEBUG(dbgs() << "Read in profile counters: "); 1581 auto &Ctx = M.getContext(); 1582 // Read the counter array from file. 1583 auto ReaderOrErr = 1584 IndexedInstrProfReader::create(ProfileFileName, ProfileRemappingFileName); 1585 if (Error E = ReaderOrErr.takeError()) { 1586 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { 1587 Ctx.diagnose( 1588 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message())); 1589 }); 1590 return false; 1591 } 1592 1593 std::unique_ptr<IndexedInstrProfReader> PGOReader = 1594 std::move(ReaderOrErr.get()); 1595 if (!PGOReader) { 1596 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(), 1597 StringRef("Cannot get PGOReader"))); 1598 return false; 1599 } 1600 if (!PGOReader->hasCSIRLevelProfile() && IsCS) 1601 return false; 1602 1603 // TODO: might need to change the warning once the clang option is finalized. 1604 if (!PGOReader->isIRLevelProfile()) { 1605 Ctx.diagnose(DiagnosticInfoPGOProfile( 1606 ProfileFileName.data(), "Not an IR level instrumentation profile")); 1607 return false; 1608 } 1609 1610 // Add the profile summary (read from the header of the indexed summary) here 1611 // so that we can use it below when reading counters (which checks if the 1612 // function should be marked with a cold or inlinehint attribute). 1613 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()), 1614 IsCS ? ProfileSummary::PSK_CSInstr 1615 : ProfileSummary::PSK_Instr); 1616 PSI->refresh(); 1617 1618 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1619 collectComdatMembers(M, ComdatMembers); 1620 std::vector<Function *> HotFunctions; 1621 std::vector<Function *> ColdFunctions; 1622 for (auto &F : M) { 1623 if (F.isDeclaration()) 1624 continue; 1625 auto &TLI = LookupTLI(F); 1626 auto *BPI = LookupBPI(F); 1627 auto *BFI = LookupBFI(F); 1628 // Split indirectbr critical edges here before computing the MST rather than 1629 // later in getInstrBB() to avoid invalidating it. 1630 SplitIndirectBrCriticalEdges(F, BPI, BFI); 1631 PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS); 1632 bool AllZeros = false; 1633 if (!Func.readCounters(PGOReader.get(), AllZeros)) 1634 continue; 1635 if (AllZeros) { 1636 F.setEntryCount(ProfileCount(0, Function::PCT_Real)); 1637 if (Func.getProgramMaxCount() != 0) 1638 ColdFunctions.push_back(&F); 1639 continue; 1640 } 1641 Func.populateCounters(); 1642 Func.setBranchWeights(); 1643 Func.annotateValueSites(); 1644 Func.annotateIrrLoopHeaderWeights(); 1645 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr(); 1646 if (FreqAttr == PGOUseFunc::FFA_Cold) 1647 ColdFunctions.push_back(&F); 1648 else if (FreqAttr == PGOUseFunc::FFA_Hot) 1649 HotFunctions.push_back(&F); 1650 if (PGOViewCounts != PGOVCT_None && 1651 (ViewBlockFreqFuncName.empty() || 1652 F.getName().equals(ViewBlockFreqFuncName))) { 1653 LoopInfo LI{DominatorTree(F)}; 1654 std::unique_ptr<BranchProbabilityInfo> NewBPI = 1655 std::make_unique<BranchProbabilityInfo>(F, LI); 1656 std::unique_ptr<BlockFrequencyInfo> NewBFI = 1657 std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI); 1658 if (PGOViewCounts == PGOVCT_Graph) 1659 NewBFI->view(); 1660 else if (PGOViewCounts == PGOVCT_Text) { 1661 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n"; 1662 NewBFI->print(dbgs()); 1663 } 1664 } 1665 if (PGOViewRawCounts != PGOVCT_None && 1666 (ViewBlockFreqFuncName.empty() || 1667 F.getName().equals(ViewBlockFreqFuncName))) { 1668 if (PGOViewRawCounts == PGOVCT_Graph) 1669 if (ViewBlockFreqFuncName.empty()) 1670 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 1671 else 1672 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 1673 else if (PGOViewRawCounts == PGOVCT_Text) { 1674 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n"; 1675 Func.dumpInfo(); 1676 } 1677 } 1678 } 1679 1680 // Set function hotness attribute from the profile. 1681 // We have to apply these attributes at the end because their presence 1682 // can affect the BranchProbabilityInfo of any callers, resulting in an 1683 // inconsistent MST between prof-gen and prof-use. 1684 for (auto &F : HotFunctions) { 1685 F->addFnAttr(Attribute::InlineHint); 1686 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName() 1687 << "\n"); 1688 } 1689 for (auto &F : ColdFunctions) { 1690 F->addFnAttr(Attribute::Cold); 1691 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() 1692 << "\n"); 1693 } 1694 return true; 1695 } 1696 1697 PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename, 1698 std::string RemappingFilename, 1699 bool IsCS) 1700 : ProfileFileName(std::move(Filename)), 1701 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) { 1702 if (!PGOTestProfileFile.empty()) 1703 ProfileFileName = PGOTestProfileFile; 1704 if (!PGOTestProfileRemappingFile.empty()) 1705 ProfileRemappingFileName = PGOTestProfileRemappingFile; 1706 } 1707 1708 PreservedAnalyses PGOInstrumentationUse::run(Module &M, 1709 ModuleAnalysisManager &AM) { 1710 1711 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1712 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 1713 return FAM.getResult<TargetLibraryAnalysis>(F); 1714 }; 1715 auto LookupBPI = [&FAM](Function &F) { 1716 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1717 }; 1718 auto LookupBFI = [&FAM](Function &F) { 1719 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1720 }; 1721 1722 auto *PSI = &AM.getResult<ProfileSummaryAnalysis>(M); 1723 1724 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, 1725 LookupTLI, LookupBPI, LookupBFI, PSI, IsCS)) 1726 return PreservedAnalyses::all(); 1727 1728 return PreservedAnalyses::none(); 1729 } 1730 1731 bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) { 1732 if (skipModule(M)) 1733 return false; 1734 1735 auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & { 1736 return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 1737 }; 1738 auto LookupBPI = [this](Function &F) { 1739 return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); 1740 }; 1741 auto LookupBFI = [this](Function &F) { 1742 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); 1743 }; 1744 1745 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 1746 return annotateAllFunctions(M, ProfileFileName, "", LookupTLI, LookupBPI, 1747 LookupBFI, PSI, IsCS); 1748 } 1749 1750 static std::string getSimpleNodeName(const BasicBlock *Node) { 1751 if (!Node->getName().empty()) 1752 return std::string(Node->getName()); 1753 1754 std::string SimpleNodeName; 1755 raw_string_ostream OS(SimpleNodeName); 1756 Node->printAsOperand(OS, false); 1757 return OS.str(); 1758 } 1759 1760 void llvm::setProfMetadata(Module *M, Instruction *TI, 1761 ArrayRef<uint64_t> EdgeCounts, 1762 uint64_t MaxCount) { 1763 MDBuilder MDB(M->getContext()); 1764 assert(MaxCount > 0 && "Bad max count"); 1765 uint64_t Scale = calculateCountScale(MaxCount); 1766 SmallVector<unsigned, 4> Weights; 1767 for (const auto &ECI : EdgeCounts) 1768 Weights.push_back(scaleBranchCount(ECI, Scale)); 1769 1770 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W 1771 : Weights) { 1772 dbgs() << W << " "; 1773 } dbgs() << "\n";); 1774 1775 misexpect::verifyMisExpect(TI, Weights, TI->getContext()); 1776 1777 TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 1778 if (EmitBranchProbability) { 1779 std::string BrCondStr = getBranchCondString(TI); 1780 if (BrCondStr.empty()) 1781 return; 1782 1783 uint64_t WSum = 1784 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0, 1785 [](uint64_t w1, uint64_t w2) { return w1 + w2; }); 1786 uint64_t TotalCount = 1787 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0, 1788 [](uint64_t c1, uint64_t c2) { return c1 + c2; }); 1789 Scale = calculateCountScale(WSum); 1790 BranchProbability BP(scaleBranchCount(Weights[0], Scale), 1791 scaleBranchCount(WSum, Scale)); 1792 std::string BranchProbStr; 1793 raw_string_ostream OS(BranchProbStr); 1794 OS << BP; 1795 OS << " (total count : " << TotalCount << ")"; 1796 OS.flush(); 1797 Function *F = TI->getParent()->getParent(); 1798 OptimizationRemarkEmitter ORE(F); 1799 ORE.emit([&]() { 1800 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI) 1801 << BrCondStr << " is true with probability : " << BranchProbStr; 1802 }); 1803 } 1804 } 1805 1806 namespace llvm { 1807 1808 void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) { 1809 MDBuilder MDB(M->getContext()); 1810 TI->setMetadata(llvm::LLVMContext::MD_irr_loop, 1811 MDB.createIrrLoopHeaderWeight(Count)); 1812 } 1813 1814 template <> struct GraphTraits<PGOUseFunc *> { 1815 using NodeRef = const BasicBlock *; 1816 using ChildIteratorType = const_succ_iterator; 1817 using nodes_iterator = pointer_iterator<Function::const_iterator>; 1818 1819 static NodeRef getEntryNode(const PGOUseFunc *G) { 1820 return &G->getFunc().front(); 1821 } 1822 1823 static ChildIteratorType child_begin(const NodeRef N) { 1824 return succ_begin(N); 1825 } 1826 1827 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); } 1828 1829 static nodes_iterator nodes_begin(const PGOUseFunc *G) { 1830 return nodes_iterator(G->getFunc().begin()); 1831 } 1832 1833 static nodes_iterator nodes_end(const PGOUseFunc *G) { 1834 return nodes_iterator(G->getFunc().end()); 1835 } 1836 }; 1837 1838 template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits { 1839 explicit DOTGraphTraits(bool isSimple = false) 1840 : DefaultDOTGraphTraits(isSimple) {} 1841 1842 static std::string getGraphName(const PGOUseFunc *G) { 1843 return std::string(G->getFunc().getName()); 1844 } 1845 1846 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) { 1847 std::string Result; 1848 raw_string_ostream OS(Result); 1849 1850 OS << getSimpleNodeName(Node) << ":\\l"; 1851 UseBBInfo *BI = Graph->findBBInfo(Node); 1852 OS << "Count : "; 1853 if (BI && BI->CountValid) 1854 OS << BI->CountValue << "\\l"; 1855 else 1856 OS << "Unknown\\l"; 1857 1858 if (!PGOInstrSelect) 1859 return Result; 1860 1861 for (auto BI = Node->begin(); BI != Node->end(); ++BI) { 1862 auto *I = &*BI; 1863 if (!isa<SelectInst>(I)) 1864 continue; 1865 // Display scaled counts for SELECT instruction: 1866 OS << "SELECT : { T = "; 1867 uint64_t TC, FC; 1868 bool HasProf = I->extractProfMetadata(TC, FC); 1869 if (!HasProf) 1870 OS << "Unknown, F = Unknown }\\l"; 1871 else 1872 OS << TC << ", F = " << FC << " }\\l"; 1873 } 1874 return Result; 1875 } 1876 }; 1877 1878 } // end namespace llvm 1879