1 //===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements PGO instrumentation using a minimum spanning tree based 10 // on the following paper: 11 // [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points 12 // for program frequency counts. BIT Numerical Mathematics 1973, Volume 13, 13 // Issue 3, pp 313-322 14 // The idea of the algorithm based on the fact that for each node (except for 15 // the entry and exit), the sum of incoming edge counts equals the sum of 16 // outgoing edge counts. The count of edge on spanning tree can be derived from 17 // those edges not on the spanning tree. Knuth proves this method instruments 18 // the minimum number of edges. 19 // 20 // The minimal spanning tree here is actually a maximum weight tree -- on-tree 21 // edges have higher frequencies (more likely to execute). The idea is to 22 // instrument those less frequently executed edges to reduce the runtime 23 // overhead of instrumented binaries. 24 // 25 // This file contains two passes: 26 // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge 27 // count profile, and generates the instrumentation for indirect call 28 // profiling. 29 // (2) Pass PGOInstrumentationUse which reads the edge count profile and 30 // annotates the branch weights. It also reads the indirect call value 31 // profiling records and annotate the indirect call instructions. 32 // 33 // To get the precise counter information, These two passes need to invoke at 34 // the same compilation point (so they see the same IR). For pass 35 // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For 36 // pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and 37 // the profile is opened in module level and passed to each PGOUseFunc instance. 38 // The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put 39 // in class FuncPGOInstrumentation. 40 // 41 // Class PGOEdge represents a CFG edge and some auxiliary information. Class 42 // BBInfo contains auxiliary information for each BB. These two classes are used 43 // in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived 44 // class of PGOEdge and BBInfo, respectively. They contains extra data structure 45 // used in populating profile counters. 46 // The MST implementation is in Class CFGMST (CFGMST.h). 47 // 48 //===----------------------------------------------------------------------===// 49 50 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" 51 #include "CFGMST.h" 52 #include "ValueProfileCollector.h" 53 #include "llvm/ADT/APInt.h" 54 #include "llvm/ADT/ArrayRef.h" 55 #include "llvm/ADT/MapVector.h" 56 #include "llvm/ADT/STLExtras.h" 57 #include "llvm/ADT/SmallVector.h" 58 #include "llvm/ADT/Statistic.h" 59 #include "llvm/ADT/StringRef.h" 60 #include "llvm/ADT/Triple.h" 61 #include "llvm/ADT/Twine.h" 62 #include "llvm/ADT/iterator.h" 63 #include "llvm/ADT/iterator_range.h" 64 #include "llvm/Analysis/BlockFrequencyInfo.h" 65 #include "llvm/Analysis/BranchProbabilityInfo.h" 66 #include "llvm/Analysis/CFG.h" 67 #include "llvm/Analysis/EHPersonalities.h" 68 #include "llvm/Analysis/LoopInfo.h" 69 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 70 #include "llvm/Analysis/ProfileSummaryInfo.h" 71 #include "llvm/IR/Attributes.h" 72 #include "llvm/IR/BasicBlock.h" 73 #include "llvm/IR/CFG.h" 74 #include "llvm/IR/Comdat.h" 75 #include "llvm/IR/Constant.h" 76 #include "llvm/IR/Constants.h" 77 #include "llvm/IR/DiagnosticInfo.h" 78 #include "llvm/IR/Dominators.h" 79 #include "llvm/IR/Function.h" 80 #include "llvm/IR/GlobalAlias.h" 81 #include "llvm/IR/GlobalValue.h" 82 #include "llvm/IR/GlobalVariable.h" 83 #include "llvm/IR/IRBuilder.h" 84 #include "llvm/IR/InstVisitor.h" 85 #include "llvm/IR/InstrTypes.h" 86 #include "llvm/IR/Instruction.h" 87 #include "llvm/IR/Instructions.h" 88 #include "llvm/IR/IntrinsicInst.h" 89 #include "llvm/IR/Intrinsics.h" 90 #include "llvm/IR/LLVMContext.h" 91 #include "llvm/IR/MDBuilder.h" 92 #include "llvm/IR/Module.h" 93 #include "llvm/IR/PassManager.h" 94 #include "llvm/IR/ProfileSummary.h" 95 #include "llvm/IR/Type.h" 96 #include "llvm/IR/Value.h" 97 #include "llvm/InitializePasses.h" 98 #include "llvm/Pass.h" 99 #include "llvm/ProfileData/InstrProf.h" 100 #include "llvm/ProfileData/InstrProfReader.h" 101 #include "llvm/Support/BranchProbability.h" 102 #include "llvm/Support/CRC.h" 103 #include "llvm/Support/Casting.h" 104 #include "llvm/Support/CommandLine.h" 105 #include "llvm/Support/DOTGraphTraits.h" 106 #include "llvm/Support/Debug.h" 107 #include "llvm/Support/Error.h" 108 #include "llvm/Support/ErrorHandling.h" 109 #include "llvm/Support/GraphWriter.h" 110 #include "llvm/Support/raw_ostream.h" 111 #include "llvm/Transforms/Instrumentation.h" 112 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 113 #include <algorithm> 114 #include <cassert> 115 #include <cstdint> 116 #include <memory> 117 #include <numeric> 118 #include <string> 119 #include <unordered_map> 120 #include <utility> 121 #include <vector> 122 123 using namespace llvm; 124 using ProfileCount = Function::ProfileCount; 125 using VPCandidateInfo = ValueProfileCollector::CandidateInfo; 126 127 #define DEBUG_TYPE "pgo-instrumentation" 128 129 STATISTIC(NumOfPGOInstrument, "Number of edges instrumented."); 130 STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented."); 131 STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented."); 132 STATISTIC(NumOfPGOEdge, "Number of edges."); 133 STATISTIC(NumOfPGOBB, "Number of basic-blocks."); 134 STATISTIC(NumOfPGOSplit, "Number of critical edge splits."); 135 STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts."); 136 STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile."); 137 STATISTIC(NumOfPGOMissing, "Number of functions without profile."); 138 STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations."); 139 STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO."); 140 STATISTIC(NumOfCSPGOSelectInsts, 141 "Number of select instruction instrumented in CSPGO."); 142 STATISTIC(NumOfCSPGOMemIntrinsics, 143 "Number of mem intrinsics instrumented in CSPGO."); 144 STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO."); 145 STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO."); 146 STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO."); 147 STATISTIC(NumOfCSPGOFunc, 148 "Number of functions having valid profile counts in CSPGO."); 149 STATISTIC(NumOfCSPGOMismatch, 150 "Number of functions having mismatch profile in CSPGO."); 151 STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO."); 152 153 // Command line option to specify the file to read profile from. This is 154 // mainly used for testing. 155 static cl::opt<std::string> 156 PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, 157 cl::value_desc("filename"), 158 cl::desc("Specify the path of profile data file. This is" 159 "mainly for test purpose.")); 160 static cl::opt<std::string> PGOTestProfileRemappingFile( 161 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, 162 cl::value_desc("filename"), 163 cl::desc("Specify the path of profile remapping file. This is mainly for " 164 "test purpose.")); 165 166 // Command line option to disable value profiling. The default is false: 167 // i.e. value profiling is enabled by default. This is for debug purpose. 168 static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false), 169 cl::Hidden, 170 cl::desc("Disable Value Profiling")); 171 172 // Command line option to set the maximum number of VP annotations to write to 173 // the metadata for a single indirect call callsite. 174 static cl::opt<unsigned> MaxNumAnnotations( 175 "icp-max-annotations", cl::init(3), cl::Hidden, cl::ZeroOrMore, 176 cl::desc("Max number of annotations for a single indirect " 177 "call callsite")); 178 179 // Command line option to set the maximum number of value annotations 180 // to write to the metadata for a single memop intrinsic. 181 static cl::opt<unsigned> MaxNumMemOPAnnotations( 182 "memop-max-annotations", cl::init(4), cl::Hidden, cl::ZeroOrMore, 183 cl::desc("Max number of preicise value annotations for a single memop" 184 "intrinsic")); 185 186 // Command line option to control appending FunctionHash to the name of a COMDAT 187 // function. This is to avoid the hash mismatch caused by the preinliner. 188 static cl::opt<bool> DoComdatRenaming( 189 "do-comdat-renaming", cl::init(false), cl::Hidden, 190 cl::desc("Append function hash to the name of COMDAT function to avoid " 191 "function hash mismatch due to the preinliner")); 192 193 // Command line option to enable/disable the warning about missing profile 194 // information. 195 static cl::opt<bool> 196 PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden, 197 cl::desc("Use this option to turn on/off " 198 "warnings about missing profile data for " 199 "functions.")); 200 201 // Command line option to enable/disable the warning about a hash mismatch in 202 // the profile data. 203 static cl::opt<bool> 204 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden, 205 cl::desc("Use this option to turn off/on " 206 "warnings about profile cfg mismatch.")); 207 208 // Command line option to enable/disable the warning about a hash mismatch in 209 // the profile data for Comdat functions, which often turns out to be false 210 // positive due to the pre-instrumentation inline. 211 static cl::opt<bool> 212 NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", cl::init(true), 213 cl::Hidden, 214 cl::desc("The option is used to turn on/off " 215 "warnings about hash mismatch for comdat " 216 "functions.")); 217 218 // Command line option to enable/disable select instruction instrumentation. 219 static cl::opt<bool> 220 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, 221 cl::desc("Use this option to turn on/off SELECT " 222 "instruction instrumentation. ")); 223 224 // Command line option to turn on CFG dot or text dump of raw profile counts 225 static cl::opt<PGOViewCountsType> PGOViewRawCounts( 226 "pgo-view-raw-counts", cl::Hidden, 227 cl::desc("A boolean option to show CFG dag or text " 228 "with raw profile counts from " 229 "profile data. See also option " 230 "-pgo-view-counts. To limit graph " 231 "display to only one function, use " 232 "filtering option -view-bfi-func-name."), 233 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), 234 clEnumValN(PGOVCT_Graph, "graph", "show a graph."), 235 clEnumValN(PGOVCT_Text, "text", "show in text."))); 236 237 // Command line option to enable/disable memop intrinsic call.size profiling. 238 static cl::opt<bool> 239 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, 240 cl::desc("Use this option to turn on/off " 241 "memory intrinsic size profiling.")); 242 243 // Emit branch probability as optimization remarks. 244 static cl::opt<bool> 245 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, 246 cl::desc("When this option is on, the annotated " 247 "branch probability will be emitted as " 248 "optimization remarks: -{Rpass|" 249 "pass-remarks}=pgo-instrumentation")); 250 251 static cl::opt<bool> PGOInstrumentEntry( 252 "pgo-instrument-entry", cl::init(false), cl::Hidden, 253 cl::desc("Force to instrument function entry basicblock.")); 254 255 static cl::opt<bool> 256 PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, 257 cl::desc("Fix function entry count in profile use.")); 258 259 static cl::opt<bool> PGOVerifyHotBFI( 260 "pgo-verify-hot-bfi", cl::init(false), cl::Hidden, 261 cl::desc("Print out the non-match BFI count if a hot raw profile count " 262 "becomes non-hot, or a cold raw profile count becomes hot. " 263 "The print is enabled under -Rpass-analysis=pgo, or " 264 "internal option -pass-remakrs-analysis=pgo.")); 265 266 static cl::opt<bool> PGOVerifyBFI( 267 "pgo-verify-bfi", cl::init(false), cl::Hidden, 268 cl::desc("Print out mismatched BFI counts after setting profile metadata " 269 "The print is enabled under -Rpass-analysis=pgo, or " 270 "internal option -pass-remakrs-analysis=pgo.")); 271 272 static cl::opt<unsigned> PGOVerifyBFIRatio( 273 "pgo-verify-bfi-ratio", cl::init(5), cl::Hidden, 274 cl::desc("Set the threshold for pgo-verify-big -- only print out " 275 "mismatched BFI if the difference percentage is greater than " 276 "this value (in percentage).")); 277 278 static cl::opt<unsigned> PGOVerifyBFICutoff( 279 "pgo-verify-bfi-cutoff", cl::init(1), cl::Hidden, 280 cl::desc("Set the threshold for pgo-verify-bfi -- skip the counts whose " 281 "profile count value is below.")); 282 283 // Command line option to turn on CFG dot dump after profile annotation. 284 // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts 285 extern cl::opt<PGOViewCountsType> PGOViewCounts; 286 287 // Command line option to specify the name of the function for CFG dump 288 // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= 289 extern cl::opt<std::string> ViewBlockFreqFuncName; 290 291 static cl::opt<bool> 292 PGOOldCFGHashing("pgo-instr-old-cfg-hashing", cl::init(false), cl::Hidden, 293 cl::desc("Use the old CFG function hashing")); 294 295 // Return a string describing the branch condition that can be 296 // used in static branch probability heuristics: 297 static std::string getBranchCondString(Instruction *TI) { 298 BranchInst *BI = dyn_cast<BranchInst>(TI); 299 if (!BI || !BI->isConditional()) 300 return std::string(); 301 302 Value *Cond = BI->getCondition(); 303 ICmpInst *CI = dyn_cast<ICmpInst>(Cond); 304 if (!CI) 305 return std::string(); 306 307 std::string result; 308 raw_string_ostream OS(result); 309 OS << CmpInst::getPredicateName(CI->getPredicate()) << "_"; 310 CI->getOperand(0)->getType()->print(OS, true); 311 312 Value *RHS = CI->getOperand(1); 313 ConstantInt *CV = dyn_cast<ConstantInt>(RHS); 314 if (CV) { 315 if (CV->isZero()) 316 OS << "_Zero"; 317 else if (CV->isOne()) 318 OS << "_One"; 319 else if (CV->isMinusOne()) 320 OS << "_MinusOne"; 321 else 322 OS << "_Const"; 323 } 324 OS.flush(); 325 return result; 326 } 327 328 static const char *ValueProfKindDescr[] = { 329 #define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr, 330 #include "llvm/ProfileData/InstrProfData.inc" 331 }; 332 333 namespace { 334 335 /// The select instruction visitor plays three roles specified 336 /// by the mode. In \c VM_counting mode, it simply counts the number of 337 /// select instructions. In \c VM_instrument mode, it inserts code to count 338 /// the number times TrueValue of select is taken. In \c VM_annotate mode, 339 /// it reads the profile data and annotate the select instruction with metadata. 340 enum VisitMode { VM_counting, VM_instrument, VM_annotate }; 341 class PGOUseFunc; 342 343 /// Instruction Visitor class to visit select instructions. 344 struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> { 345 Function &F; 346 unsigned NSIs = 0; // Number of select instructions instrumented. 347 VisitMode Mode = VM_counting; // Visiting mode. 348 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index. 349 unsigned TotalNumCtrs = 0; // Total number of counters 350 GlobalVariable *FuncNameVar = nullptr; 351 uint64_t FuncHash = 0; 352 PGOUseFunc *UseFunc = nullptr; 353 354 SelectInstVisitor(Function &Func) : F(Func) {} 355 356 void countSelects(Function &Func) { 357 NSIs = 0; 358 Mode = VM_counting; 359 visit(Func); 360 } 361 362 // Visit the IR stream and instrument all select instructions. \p 363 // Ind is a pointer to the counter index variable; \p TotalNC 364 // is the total number of counters; \p FNV is the pointer to the 365 // PGO function name var; \p FHash is the function hash. 366 void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC, 367 GlobalVariable *FNV, uint64_t FHash) { 368 Mode = VM_instrument; 369 CurCtrIdx = Ind; 370 TotalNumCtrs = TotalNC; 371 FuncHash = FHash; 372 FuncNameVar = FNV; 373 visit(Func); 374 } 375 376 // Visit the IR stream and annotate all select instructions. 377 void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) { 378 Mode = VM_annotate; 379 UseFunc = UF; 380 CurCtrIdx = Ind; 381 visit(Func); 382 } 383 384 void instrumentOneSelectInst(SelectInst &SI); 385 void annotateOneSelectInst(SelectInst &SI); 386 387 // Visit \p SI instruction and perform tasks according to visit mode. 388 void visitSelectInst(SelectInst &SI); 389 390 // Return the number of select instructions. This needs be called after 391 // countSelects(). 392 unsigned getNumOfSelectInsts() const { return NSIs; } 393 }; 394 395 396 class PGOInstrumentationGenLegacyPass : public ModulePass { 397 public: 398 static char ID; 399 400 PGOInstrumentationGenLegacyPass(bool IsCS = false) 401 : ModulePass(ID), IsCS(IsCS) { 402 initializePGOInstrumentationGenLegacyPassPass( 403 *PassRegistry::getPassRegistry()); 404 } 405 406 StringRef getPassName() const override { return "PGOInstrumentationGenPass"; } 407 408 private: 409 // Is this is context-sensitive instrumentation. 410 bool IsCS; 411 bool runOnModule(Module &M) override; 412 413 void getAnalysisUsage(AnalysisUsage &AU) const override { 414 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 415 AU.addRequired<TargetLibraryInfoWrapperPass>(); 416 } 417 }; 418 419 class PGOInstrumentationUseLegacyPass : public ModulePass { 420 public: 421 static char ID; 422 423 // Provide the profile filename as the parameter. 424 PGOInstrumentationUseLegacyPass(std::string Filename = "", bool IsCS = false) 425 : ModulePass(ID), ProfileFileName(std::move(Filename)), IsCS(IsCS) { 426 if (!PGOTestProfileFile.empty()) 427 ProfileFileName = PGOTestProfileFile; 428 initializePGOInstrumentationUseLegacyPassPass( 429 *PassRegistry::getPassRegistry()); 430 } 431 432 StringRef getPassName() const override { return "PGOInstrumentationUsePass"; } 433 434 private: 435 std::string ProfileFileName; 436 // Is this is context-sensitive instrumentation use. 437 bool IsCS; 438 439 bool runOnModule(Module &M) override; 440 441 void getAnalysisUsage(AnalysisUsage &AU) const override { 442 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 443 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 444 AU.addRequired<TargetLibraryInfoWrapperPass>(); 445 } 446 }; 447 448 class PGOInstrumentationGenCreateVarLegacyPass : public ModulePass { 449 public: 450 static char ID; 451 StringRef getPassName() const override { 452 return "PGOInstrumentationGenCreateVarPass"; 453 } 454 PGOInstrumentationGenCreateVarLegacyPass(std::string CSInstrName = "") 455 : ModulePass(ID), InstrProfileOutput(CSInstrName) { 456 initializePGOInstrumentationGenCreateVarLegacyPassPass( 457 *PassRegistry::getPassRegistry()); 458 } 459 460 private: 461 bool runOnModule(Module &M) override { 462 createProfileFileNameVar(M, InstrProfileOutput); 463 createIRLevelProfileFlagVar(M, /* IsCS */ true, PGOInstrumentEntry); 464 return false; 465 } 466 std::string InstrProfileOutput; 467 }; 468 469 } // end anonymous namespace 470 471 char PGOInstrumentationGenLegacyPass::ID = 0; 472 473 INITIALIZE_PASS_BEGIN(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", 474 "PGO instrumentation.", false, false) 475 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 476 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 477 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 478 INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", 479 "PGO instrumentation.", false, false) 480 481 ModulePass *llvm::createPGOInstrumentationGenLegacyPass(bool IsCS) { 482 return new PGOInstrumentationGenLegacyPass(IsCS); 483 } 484 485 char PGOInstrumentationUseLegacyPass::ID = 0; 486 487 INITIALIZE_PASS_BEGIN(PGOInstrumentationUseLegacyPass, "pgo-instr-use", 488 "Read PGO instrumentation profile.", false, false) 489 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 490 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 491 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 492 INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use", 493 "Read PGO instrumentation profile.", false, false) 494 495 ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename, 496 bool IsCS) { 497 return new PGOInstrumentationUseLegacyPass(Filename.str(), IsCS); 498 } 499 500 char PGOInstrumentationGenCreateVarLegacyPass::ID = 0; 501 502 INITIALIZE_PASS(PGOInstrumentationGenCreateVarLegacyPass, 503 "pgo-instr-gen-create-var", 504 "Create PGO instrumentation version variable for CSPGO.", false, 505 false) 506 507 ModulePass * 508 llvm::createPGOInstrumentationGenCreateVarLegacyPass(StringRef CSInstrName) { 509 return new PGOInstrumentationGenCreateVarLegacyPass(std::string(CSInstrName)); 510 } 511 512 namespace { 513 514 /// An MST based instrumentation for PGO 515 /// 516 /// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO 517 /// in the function level. 518 struct PGOEdge { 519 // This class implements the CFG edges. Note the CFG can be a multi-graph. 520 // So there might be multiple edges with same SrcBB and DestBB. 521 const BasicBlock *SrcBB; 522 const BasicBlock *DestBB; 523 uint64_t Weight; 524 bool InMST = false; 525 bool Removed = false; 526 bool IsCritical = false; 527 528 PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) 529 : SrcBB(Src), DestBB(Dest), Weight(W) {} 530 531 // Return the information string of an edge. 532 const std::string infoString() const { 533 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") + 534 (IsCritical ? "c" : " ") + " W=" + Twine(Weight)).str(); 535 } 536 }; 537 538 // This class stores the auxiliary information for each BB. 539 struct BBInfo { 540 BBInfo *Group; 541 uint32_t Index; 542 uint32_t Rank = 0; 543 544 BBInfo(unsigned IX) : Group(this), Index(IX) {} 545 546 // Return the information string of this object. 547 const std::string infoString() const { 548 return (Twine("Index=") + Twine(Index)).str(); 549 } 550 551 // Empty function -- only applicable to UseBBInfo. 552 void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} 553 554 // Empty function -- only applicable to UseBBInfo. 555 void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} 556 }; 557 558 // This class implements the CFG edges. Note the CFG can be a multi-graph. 559 template <class Edge, class BBInfo> class FuncPGOInstrumentation { 560 private: 561 Function &F; 562 563 // Is this is context-sensitive instrumentation. 564 bool IsCS; 565 566 // A map that stores the Comdat group in function F. 567 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers; 568 569 ValueProfileCollector VPC; 570 571 void computeCFGHash(); 572 void renameComdatFunction(); 573 574 public: 575 std::vector<std::vector<VPCandidateInfo>> ValueSites; 576 SelectInstVisitor SIVisitor; 577 std::string FuncName; 578 GlobalVariable *FuncNameVar; 579 580 // CFG hash value for this function. 581 uint64_t FunctionHash = 0; 582 583 // The Minimum Spanning Tree of function CFG. 584 CFGMST<Edge, BBInfo> MST; 585 586 // Collect all the BBs that will be instrumented, and store them in 587 // InstrumentBBs. 588 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs); 589 590 // Give an edge, find the BB that will be instrumented. 591 // Return nullptr if there is no BB to be instrumented. 592 BasicBlock *getInstrBB(Edge *E); 593 594 // Return the auxiliary BB information. 595 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); } 596 597 // Return the auxiliary BB information if available. 598 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); } 599 600 // Dump edges and BB information. 601 void dumpInfo(std::string Str = "") const { 602 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " + 603 Twine(FunctionHash) + "\t" + Str); 604 } 605 606 FuncPGOInstrumentation( 607 Function &Func, TargetLibraryInfo &TLI, 608 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 609 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, 610 BlockFrequencyInfo *BFI = nullptr, bool IsCS = false, 611 bool InstrumentFuncEntry = true) 612 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI), 613 ValueSites(IPVK_Last + 1), SIVisitor(Func), 614 MST(F, InstrumentFuncEntry, BPI, BFI) { 615 // This should be done before CFG hash computation. 616 SIVisitor.countSelects(Func); 617 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize); 618 if (!IsCS) { 619 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 620 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 621 NumOfPGOBB += MST.BBInfos.size(); 622 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget); 623 } else { 624 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 625 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 626 NumOfCSPGOBB += MST.BBInfos.size(); 627 } 628 629 FuncName = getPGOFuncName(F); 630 computeCFGHash(); 631 if (!ComdatMembers.empty()) 632 renameComdatFunction(); 633 LLVM_DEBUG(dumpInfo("after CFGMST")); 634 635 for (auto &E : MST.AllEdges) { 636 if (E->Removed) 637 continue; 638 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++; 639 if (!E->InMST) 640 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++; 641 } 642 643 if (CreateGlobalVar) 644 FuncNameVar = createPGOFuncNameVar(F, FuncName); 645 } 646 }; 647 648 } // end anonymous namespace 649 650 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index 651 // value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers 652 // of selects, indirect calls, mem ops and edges. 653 template <class Edge, class BBInfo> 654 void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { 655 std::vector<uint8_t> Indexes; 656 JamCRC JC; 657 for (auto &BB : F) { 658 const Instruction *TI = BB.getTerminator(); 659 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { 660 BasicBlock *Succ = TI->getSuccessor(I); 661 auto BI = findBBInfo(Succ); 662 if (BI == nullptr) 663 continue; 664 uint32_t Index = BI->Index; 665 for (int J = 0; J < 4; J++) 666 Indexes.push_back((uint8_t)(Index >> (J * 8))); 667 } 668 } 669 JC.update(Indexes); 670 671 JamCRC JCH; 672 if (PGOOldCFGHashing) { 673 // Hash format for context sensitive profile. Reserve 4 bits for other 674 // information. 675 FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 | 676 (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 | 677 //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 | 678 (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC(); 679 } else { 680 // The higher 32 bits. 681 auto updateJCH = [&JCH](uint64_t Num) { 682 uint8_t Data[8]; 683 support::endian::write64le(Data, Num); 684 JCH.update(Data); 685 }; 686 updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts()); 687 updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size()); 688 updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size()); 689 updateJCH((uint64_t)MST.AllEdges.size()); 690 691 // Hash format for context sensitive profile. Reserve 4 bits for other 692 // information. 693 FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC(); 694 } 695 696 // Reserve bit 60-63 for other information purpose. 697 FunctionHash &= 0x0FFFFFFFFFFFFFFF; 698 if (IsCS) 699 NamedInstrProfRecord::setCSFlagInHash(FunctionHash); 700 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n" 701 << " CRC = " << JC.getCRC() 702 << ", Selects = " << SIVisitor.getNumOfSelectInsts() 703 << ", Edges = " << MST.AllEdges.size() << ", ICSites = " 704 << ValueSites[IPVK_IndirectCallTarget].size()); 705 if (!PGOOldCFGHashing) { 706 LLVM_DEBUG(dbgs() << ", Memops = " << ValueSites[IPVK_MemOPSize].size() 707 << ", High32 CRC = " << JCH.getCRC()); 708 } 709 LLVM_DEBUG(dbgs() << ", Hash = " << FunctionHash << "\n";); 710 } 711 712 // Check if we can safely rename this Comdat function. 713 static bool canRenameComdat( 714 Function &F, 715 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 716 if (!DoComdatRenaming || !canRenameComdatFunc(F, true)) 717 return false; 718 719 // FIXME: Current only handle those Comdat groups that only containing one 720 // function. 721 // (1) For a Comdat group containing multiple functions, we need to have a 722 // unique postfix based on the hashes for each function. There is a 723 // non-trivial code refactoring to do this efficiently. 724 // (2) Variables can not be renamed, so we can not rename Comdat function in a 725 // group including global vars. 726 Comdat *C = F.getComdat(); 727 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) { 728 assert(!isa<GlobalAlias>(CM.second)); 729 Function *FM = dyn_cast<Function>(CM.second); 730 if (FM != &F) 731 return false; 732 } 733 return true; 734 } 735 736 // Append the CFGHash to the Comdat function name. 737 template <class Edge, class BBInfo> 738 void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() { 739 if (!canRenameComdat(F, ComdatMembers)) 740 return; 741 std::string OrigName = F.getName().str(); 742 std::string NewFuncName = 743 Twine(F.getName() + "." + Twine(FunctionHash)).str(); 744 F.setName(Twine(NewFuncName)); 745 GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F); 746 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str(); 747 Comdat *NewComdat; 748 Module *M = F.getParent(); 749 // For AvailableExternallyLinkage functions, change the linkage to 750 // LinkOnceODR and put them into comdat. This is because after renaming, there 751 // is no backup external copy available for the function. 752 if (!F.hasComdat()) { 753 assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage); 754 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName)); 755 F.setLinkage(GlobalValue::LinkOnceODRLinkage); 756 F.setComdat(NewComdat); 757 return; 758 } 759 760 // This function belongs to a single function Comdat group. 761 Comdat *OrigComdat = F.getComdat(); 762 std::string NewComdatName = 763 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str(); 764 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName)); 765 NewComdat->setSelectionKind(OrigComdat->getSelectionKind()); 766 767 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) { 768 // Must be a function. 769 cast<Function>(CM.second)->setComdat(NewComdat); 770 } 771 } 772 773 // Collect all the BBs that will be instruments and return them in 774 // InstrumentBBs and setup InEdges/OutEdge for UseBBInfo. 775 template <class Edge, class BBInfo> 776 void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs( 777 std::vector<BasicBlock *> &InstrumentBBs) { 778 // Use a worklist as we will update the vector during the iteration. 779 std::vector<Edge *> EdgeList; 780 EdgeList.reserve(MST.AllEdges.size()); 781 for (auto &E : MST.AllEdges) 782 EdgeList.push_back(E.get()); 783 784 for (auto &E : EdgeList) { 785 BasicBlock *InstrBB = getInstrBB(E); 786 if (InstrBB) 787 InstrumentBBs.push_back(InstrBB); 788 } 789 790 // Set up InEdges/OutEdges for all BBs. 791 for (auto &E : MST.AllEdges) { 792 if (E->Removed) 793 continue; 794 const BasicBlock *SrcBB = E->SrcBB; 795 const BasicBlock *DestBB = E->DestBB; 796 BBInfo &SrcInfo = getBBInfo(SrcBB); 797 BBInfo &DestInfo = getBBInfo(DestBB); 798 SrcInfo.addOutEdge(E.get()); 799 DestInfo.addInEdge(E.get()); 800 } 801 } 802 803 // Given a CFG E to be instrumented, find which BB to place the instrumented 804 // code. The function will split the critical edge if necessary. 805 template <class Edge, class BBInfo> 806 BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) { 807 if (E->InMST || E->Removed) 808 return nullptr; 809 810 BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB); 811 BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB); 812 // For a fake edge, instrument the real BB. 813 if (SrcBB == nullptr) 814 return DestBB; 815 if (DestBB == nullptr) 816 return SrcBB; 817 818 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * { 819 // There are basic blocks (such as catchswitch) cannot be instrumented. 820 // If the returned first insertion point is the end of BB, skip this BB. 821 if (BB->getFirstInsertionPt() == BB->end()) 822 return nullptr; 823 return BB; 824 }; 825 826 // Instrument the SrcBB if it has a single successor, 827 // otherwise, the DestBB if this is not a critical edge. 828 Instruction *TI = SrcBB->getTerminator(); 829 if (TI->getNumSuccessors() <= 1) 830 return canInstrument(SrcBB); 831 if (!E->IsCritical) 832 return canInstrument(DestBB); 833 834 // Some IndirectBr critical edges cannot be split by the previous 835 // SplitIndirectBrCriticalEdges call. Bail out. 836 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 837 BasicBlock *InstrBB = 838 isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum); 839 if (!InstrBB) { 840 LLVM_DEBUG( 841 dbgs() << "Fail to split critical edge: not instrument this edge.\n"); 842 return nullptr; 843 } 844 // For a critical edge, we have to split. Instrument the newly 845 // created BB. 846 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++; 847 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index 848 << " --> " << getBBInfo(DestBB).Index << "\n"); 849 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB. 850 MST.addEdge(SrcBB, InstrBB, 0); 851 // Second one: Add new edge of InstrBB->DestBB. 852 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0); 853 NewEdge1.InMST = true; 854 E->Removed = true; 855 856 return canInstrument(InstrBB); 857 } 858 859 // When generating value profiling calls on Windows routines that make use of 860 // handler funclets for exception processing an operand bundle needs to attached 861 // to the called function. This routine will set \p OpBundles to contain the 862 // funclet information, if any is needed, that should be placed on the generated 863 // value profiling call for the value profile candidate call. 864 static void 865 populateEHOperandBundle(VPCandidateInfo &Cand, 866 DenseMap<BasicBlock *, ColorVector> &BlockColors, 867 SmallVectorImpl<OperandBundleDef> &OpBundles) { 868 auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst); 869 if (OrigCall && !isa<IntrinsicInst>(OrigCall)) { 870 // The instrumentation call should belong to the same funclet as a 871 // non-intrinsic call, so just copy the operand bundle, if any exists. 872 Optional<OperandBundleUse> ParentFunclet = 873 OrigCall->getOperandBundle(LLVMContext::OB_funclet); 874 if (ParentFunclet) 875 OpBundles.emplace_back(OperandBundleDef(*ParentFunclet)); 876 } else { 877 // Intrinsics or other instructions do not get funclet information from the 878 // front-end. Need to use the BlockColors that was computed by the routine 879 // colorEHFunclets to determine whether a funclet is needed. 880 if (!BlockColors.empty()) { 881 const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second; 882 assert(CV.size() == 1 && "non-unique color for block!"); 883 Instruction *EHPad = CV.front()->getFirstNonPHI(); 884 if (EHPad->isEHPad()) 885 OpBundles.emplace_back("funclet", EHPad); 886 } 887 } 888 } 889 890 // Visit all edge and instrument the edges not in MST, and do value profiling. 891 // Critical edges will be split. 892 static void instrumentOneFunc( 893 Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI, 894 BlockFrequencyInfo *BFI, 895 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 896 bool IsCS) { 897 // Split indirectbr critical edges here before computing the MST rather than 898 // later in getInstrBB() to avoid invalidating it. 899 SplitIndirectBrCriticalEdges(F, BPI, BFI); 900 901 FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo( 902 F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry); 903 std::vector<BasicBlock *> InstrumentBBs; 904 FuncInfo.getInstrumentBBs(InstrumentBBs); 905 unsigned NumCounters = 906 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 907 908 uint32_t I = 0; 909 Type *I8PtrTy = Type::getInt8PtrTy(M->getContext()); 910 for (auto *InstrBB : InstrumentBBs) { 911 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt()); 912 assert(Builder.GetInsertPoint() != InstrBB->end() && 913 "Cannot get the Instrumentation point"); 914 Builder.CreateCall( 915 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment), 916 {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), 917 Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters), 918 Builder.getInt32(I++)}); 919 } 920 921 // Now instrument select instructions: 922 FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar, 923 FuncInfo.FunctionHash); 924 assert(I == NumCounters); 925 926 if (DisableValueProfiling) 927 return; 928 929 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size(); 930 931 // Intrinsic function calls do not have funclet operand bundles needed for 932 // Windows exception handling attached to them. However, if value profiling is 933 // inserted for one of these calls, then a funclet value will need to be set 934 // on the instrumentation call based on the funclet coloring. 935 DenseMap<BasicBlock *, ColorVector> BlockColors; 936 if (F.hasPersonalityFn() && 937 isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) 938 BlockColors = colorEHFunclets(F); 939 940 // For each VP Kind, walk the VP candidates and instrument each one. 941 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) { 942 unsigned SiteIndex = 0; 943 if (Kind == IPVK_MemOPSize && !PGOInstrMemOP) 944 continue; 945 946 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) { 947 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind] 948 << " site: CallSite Index = " << SiteIndex << "\n"); 949 950 IRBuilder<> Builder(Cand.InsertPt); 951 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() && 952 "Cannot get the Instrumentation point"); 953 954 Value *ToProfile = nullptr; 955 if (Cand.V->getType()->isIntegerTy()) 956 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty()); 957 else if (Cand.V->getType()->isPointerTy()) 958 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty()); 959 assert(ToProfile && "value profiling Value is of unexpected type"); 960 961 SmallVector<OperandBundleDef, 1> OpBundles; 962 populateEHOperandBundle(Cand, BlockColors, OpBundles); 963 Builder.CreateCall( 964 Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), 965 {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), 966 Builder.getInt64(FuncInfo.FunctionHash), ToProfile, 967 Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)}, 968 OpBundles); 969 } 970 } // IPVK_First <= Kind <= IPVK_Last 971 } 972 973 namespace { 974 975 // This class represents a CFG edge in profile use compilation. 976 struct PGOUseEdge : public PGOEdge { 977 bool CountValid = false; 978 uint64_t CountValue = 0; 979 980 PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) 981 : PGOEdge(Src, Dest, W) {} 982 983 // Set edge count value 984 void setEdgeCount(uint64_t Value) { 985 CountValue = Value; 986 CountValid = true; 987 } 988 989 // Return the information string for this object. 990 const std::string infoString() const { 991 if (!CountValid) 992 return PGOEdge::infoString(); 993 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue)) 994 .str(); 995 } 996 }; 997 998 using DirectEdges = SmallVector<PGOUseEdge *, 2>; 999 1000 // This class stores the auxiliary information for each BB. 1001 struct UseBBInfo : public BBInfo { 1002 uint64_t CountValue = 0; 1003 bool CountValid; 1004 int32_t UnknownCountInEdge = 0; 1005 int32_t UnknownCountOutEdge = 0; 1006 DirectEdges InEdges; 1007 DirectEdges OutEdges; 1008 1009 UseBBInfo(unsigned IX) : BBInfo(IX), CountValid(false) {} 1010 1011 UseBBInfo(unsigned IX, uint64_t C) 1012 : BBInfo(IX), CountValue(C), CountValid(true) {} 1013 1014 // Set the profile count value for this BB. 1015 void setBBInfoCount(uint64_t Value) { 1016 CountValue = Value; 1017 CountValid = true; 1018 } 1019 1020 // Return the information string of this object. 1021 const std::string infoString() const { 1022 if (!CountValid) 1023 return BBInfo::infoString(); 1024 return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str(); 1025 } 1026 1027 // Add an OutEdge and update the edge count. 1028 void addOutEdge(PGOUseEdge *E) { 1029 OutEdges.push_back(E); 1030 UnknownCountOutEdge++; 1031 } 1032 1033 // Add an InEdge and update the edge count. 1034 void addInEdge(PGOUseEdge *E) { 1035 InEdges.push_back(E); 1036 UnknownCountInEdge++; 1037 } 1038 }; 1039 1040 } // end anonymous namespace 1041 1042 // Sum up the count values for all the edges. 1043 static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) { 1044 uint64_t Total = 0; 1045 for (auto &E : Edges) { 1046 if (E->Removed) 1047 continue; 1048 Total += E->CountValue; 1049 } 1050 return Total; 1051 } 1052 1053 namespace { 1054 1055 class PGOUseFunc { 1056 public: 1057 PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI, 1058 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 1059 BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin, 1060 ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry) 1061 : F(Func), M(Modu), BFI(BFIin), PSI(PSI), 1062 FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS, 1063 InstrumentFuncEntry), 1064 FreqAttr(FFA_Normal), IsCS(IsCS) {} 1065 1066 // Read counts for the instrumented BB from profile. 1067 bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, 1068 bool &AllMinusOnes); 1069 1070 // Populate the counts for all BBs. 1071 void populateCounters(); 1072 1073 // Set the branch weights based on the count values. 1074 void setBranchWeights(); 1075 1076 // Annotate the value profile call sites for all value kind. 1077 void annotateValueSites(); 1078 1079 // Annotate the value profile call sites for one value kind. 1080 void annotateValueSites(uint32_t Kind); 1081 1082 // Annotate the irreducible loop header weights. 1083 void annotateIrrLoopHeaderWeights(); 1084 1085 // The hotness of the function from the profile count. 1086 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot }; 1087 1088 // Return the function hotness from the profile. 1089 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; } 1090 1091 // Return the function hash. 1092 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; } 1093 1094 // Return the profile record for this function; 1095 InstrProfRecord &getProfileRecord() { return ProfileRecord; } 1096 1097 // Return the auxiliary BB information. 1098 UseBBInfo &getBBInfo(const BasicBlock *BB) const { 1099 return FuncInfo.getBBInfo(BB); 1100 } 1101 1102 // Return the auxiliary BB information if available. 1103 UseBBInfo *findBBInfo(const BasicBlock *BB) const { 1104 return FuncInfo.findBBInfo(BB); 1105 } 1106 1107 Function &getFunc() const { return F; } 1108 1109 void dumpInfo(std::string Str = "") const { 1110 FuncInfo.dumpInfo(Str); 1111 } 1112 1113 uint64_t getProgramMaxCount() const { return ProgramMaxCount; } 1114 private: 1115 Function &F; 1116 Module *M; 1117 BlockFrequencyInfo *BFI; 1118 ProfileSummaryInfo *PSI; 1119 1120 // This member stores the shared information with class PGOGenFunc. 1121 FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo; 1122 1123 // The maximum count value in the profile. This is only used in PGO use 1124 // compilation. 1125 uint64_t ProgramMaxCount; 1126 1127 // Position of counter that remains to be read. 1128 uint32_t CountPosition = 0; 1129 1130 // Total size of the profile count for this function. 1131 uint32_t ProfileCountSize = 0; 1132 1133 // ProfileRecord for this function. 1134 InstrProfRecord ProfileRecord; 1135 1136 // Function hotness info derived from profile. 1137 FuncFreqAttr FreqAttr; 1138 1139 // Is to use the context sensitive profile. 1140 bool IsCS; 1141 1142 // Find the Instrumented BB and set the value. Return false on error. 1143 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile); 1144 1145 // Set the edge counter value for the unknown edge -- there should be only 1146 // one unknown edge. 1147 void setEdgeCount(DirectEdges &Edges, uint64_t Value); 1148 1149 // Return FuncName string; 1150 const std::string getFuncName() const { return FuncInfo.FuncName; } 1151 1152 // Set the hot/cold inline hints based on the count values. 1153 // FIXME: This function should be removed once the functionality in 1154 // the inliner is implemented. 1155 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) { 1156 if (PSI->isHotCount(EntryCount)) 1157 FreqAttr = FFA_Hot; 1158 else if (PSI->isColdCount(MaxCount)) 1159 FreqAttr = FFA_Cold; 1160 } 1161 }; 1162 1163 } // end anonymous namespace 1164 1165 // Visit all the edges and assign the count value for the instrumented 1166 // edges and the BB. Return false on error. 1167 bool PGOUseFunc::setInstrumentedCounts( 1168 const std::vector<uint64_t> &CountFromProfile) { 1169 1170 std::vector<BasicBlock *> InstrumentBBs; 1171 FuncInfo.getInstrumentBBs(InstrumentBBs); 1172 unsigned NumCounters = 1173 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 1174 // The number of counters here should match the number of counters 1175 // in profile. Return if they mismatch. 1176 if (NumCounters != CountFromProfile.size()) { 1177 return false; 1178 } 1179 auto *FuncEntry = &*F.begin(); 1180 1181 // Set the profile count to the Instrumented BBs. 1182 uint32_t I = 0; 1183 for (BasicBlock *InstrBB : InstrumentBBs) { 1184 uint64_t CountValue = CountFromProfile[I++]; 1185 UseBBInfo &Info = getBBInfo(InstrBB); 1186 // If we reach here, we know that we have some nonzero count 1187 // values in this function. The entry count should not be 0. 1188 // Fix it if necessary. 1189 if (InstrBB == FuncEntry && CountValue == 0) 1190 CountValue = 1; 1191 Info.setBBInfoCount(CountValue); 1192 } 1193 ProfileCountSize = CountFromProfile.size(); 1194 CountPosition = I; 1195 1196 // Set the edge count and update the count of unknown edges for BBs. 1197 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void { 1198 E->setEdgeCount(Value); 1199 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1200 this->getBBInfo(E->DestBB).UnknownCountInEdge--; 1201 }; 1202 1203 // Set the profile count the Instrumented edges. There are BBs that not in 1204 // MST but not instrumented. Need to set the edge count value so that we can 1205 // populate the profile counts later. 1206 for (auto &E : FuncInfo.MST.AllEdges) { 1207 if (E->Removed || E->InMST) 1208 continue; 1209 const BasicBlock *SrcBB = E->SrcBB; 1210 UseBBInfo &SrcInfo = getBBInfo(SrcBB); 1211 1212 // If only one out-edge, the edge profile count should be the same as BB 1213 // profile count. 1214 if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1) 1215 setEdgeCount(E.get(), SrcInfo.CountValue); 1216 else { 1217 const BasicBlock *DestBB = E->DestBB; 1218 UseBBInfo &DestInfo = getBBInfo(DestBB); 1219 // If only one in-edge, the edge profile count should be the same as BB 1220 // profile count. 1221 if (DestInfo.CountValid && DestInfo.InEdges.size() == 1) 1222 setEdgeCount(E.get(), DestInfo.CountValue); 1223 } 1224 if (E->CountValid) 1225 continue; 1226 // E's count should have been set from profile. If not, this meenas E skips 1227 // the instrumentation. We set the count to 0. 1228 setEdgeCount(E.get(), 0); 1229 } 1230 return true; 1231 } 1232 1233 // Set the count value for the unknown edge. There should be one and only one 1234 // unknown edge in Edges vector. 1235 void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) { 1236 for (auto &E : Edges) { 1237 if (E->CountValid) 1238 continue; 1239 E->setEdgeCount(Value); 1240 1241 getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1242 getBBInfo(E->DestBB).UnknownCountInEdge--; 1243 return; 1244 } 1245 llvm_unreachable("Cannot find the unknown count edge"); 1246 } 1247 1248 // Read the profile from ProfileFileName and assign the value to the 1249 // instrumented BB and the edges. This function also updates ProgramMaxCount. 1250 // Return true if the profile are successfully read, and false on errors. 1251 bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, 1252 bool &AllMinusOnes) { 1253 auto &Ctx = M->getContext(); 1254 Expected<InstrProfRecord> Result = 1255 PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash); 1256 if (Error E = Result.takeError()) { 1257 handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { 1258 auto Err = IPE.get(); 1259 bool SkipWarning = false; 1260 LLVM_DEBUG(dbgs() << "Error in reading profile for Func " 1261 << FuncInfo.FuncName << ": "); 1262 if (Err == instrprof_error::unknown_function) { 1263 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++; 1264 SkipWarning = !PGOWarnMissing; 1265 LLVM_DEBUG(dbgs() << "unknown function"); 1266 } else if (Err == instrprof_error::hash_mismatch || 1267 Err == instrprof_error::malformed) { 1268 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++; 1269 SkipWarning = 1270 NoPGOWarnMismatch || 1271 (NoPGOWarnMismatchComdat && 1272 (F.hasComdat() || 1273 F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); 1274 LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); 1275 } 1276 1277 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n"); 1278 if (SkipWarning) 1279 return; 1280 1281 std::string Msg = IPE.message() + std::string(" ") + F.getName().str() + 1282 std::string(" Hash = ") + 1283 std::to_string(FuncInfo.FunctionHash); 1284 1285 Ctx.diagnose( 1286 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); 1287 }); 1288 return false; 1289 } 1290 ProfileRecord = std::move(Result.get()); 1291 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts; 1292 1293 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++; 1294 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n"); 1295 AllMinusOnes = (CountFromProfile.size() > 0); 1296 uint64_t ValueSum = 0; 1297 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { 1298 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n"); 1299 ValueSum += CountFromProfile[I]; 1300 if (CountFromProfile[I] != (uint64_t)-1) 1301 AllMinusOnes = false; 1302 } 1303 AllZeros = (ValueSum == 0); 1304 1305 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n"); 1306 1307 getBBInfo(nullptr).UnknownCountOutEdge = 2; 1308 getBBInfo(nullptr).UnknownCountInEdge = 2; 1309 1310 if (!setInstrumentedCounts(CountFromProfile)) { 1311 LLVM_DEBUG( 1312 dbgs() << "Inconsistent number of counts, skipping this function"); 1313 Ctx.diagnose(DiagnosticInfoPGOProfile( 1314 M->getName().data(), 1315 Twine("Inconsistent number of counts in ") + F.getName().str() 1316 + Twine(": the profile may be stale or there is a function name collision."), 1317 DS_Warning)); 1318 return false; 1319 } 1320 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS); 1321 return true; 1322 } 1323 1324 // Populate the counters from instrumented BBs to all BBs. 1325 // In the end of this operation, all BBs should have a valid count value. 1326 void PGOUseFunc::populateCounters() { 1327 bool Changes = true; 1328 unsigned NumPasses = 0; 1329 while (Changes) { 1330 NumPasses++; 1331 Changes = false; 1332 1333 // For efficient traversal, it's better to start from the end as most 1334 // of the instrumented edges are at the end. 1335 for (auto &BB : reverse(F)) { 1336 UseBBInfo *Count = findBBInfo(&BB); 1337 if (Count == nullptr) 1338 continue; 1339 if (!Count->CountValid) { 1340 if (Count->UnknownCountOutEdge == 0) { 1341 Count->CountValue = sumEdgeCount(Count->OutEdges); 1342 Count->CountValid = true; 1343 Changes = true; 1344 } else if (Count->UnknownCountInEdge == 0) { 1345 Count->CountValue = sumEdgeCount(Count->InEdges); 1346 Count->CountValid = true; 1347 Changes = true; 1348 } 1349 } 1350 if (Count->CountValid) { 1351 if (Count->UnknownCountOutEdge == 1) { 1352 uint64_t Total = 0; 1353 uint64_t OutSum = sumEdgeCount(Count->OutEdges); 1354 // If the one of the successor block can early terminate (no-return), 1355 // we can end up with situation where out edge sum count is larger as 1356 // the source BB's count is collected by a post-dominated block. 1357 if (Count->CountValue > OutSum) 1358 Total = Count->CountValue - OutSum; 1359 setEdgeCount(Count->OutEdges, Total); 1360 Changes = true; 1361 } 1362 if (Count->UnknownCountInEdge == 1) { 1363 uint64_t Total = 0; 1364 uint64_t InSum = sumEdgeCount(Count->InEdges); 1365 if (Count->CountValue > InSum) 1366 Total = Count->CountValue - InSum; 1367 setEdgeCount(Count->InEdges, Total); 1368 Changes = true; 1369 } 1370 } 1371 } 1372 } 1373 1374 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n"); 1375 #ifndef NDEBUG 1376 // Assert every BB has a valid counter. 1377 for (auto &BB : F) { 1378 auto BI = findBBInfo(&BB); 1379 if (BI == nullptr) 1380 continue; 1381 assert(BI->CountValid && "BB count is not valid"); 1382 } 1383 #endif 1384 uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue; 1385 uint64_t FuncMaxCount = FuncEntryCount; 1386 for (auto &BB : F) { 1387 auto BI = findBBInfo(&BB); 1388 if (BI == nullptr) 1389 continue; 1390 FuncMaxCount = std::max(FuncMaxCount, BI->CountValue); 1391 } 1392 1393 // Fix the obviously inconsistent entry count. 1394 if (FuncMaxCount > 0 && FuncEntryCount == 0) 1395 FuncEntryCount = 1; 1396 F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real)); 1397 markFunctionAttributes(FuncEntryCount, FuncMaxCount); 1398 1399 // Now annotate select instructions 1400 FuncInfo.SIVisitor.annotateSelects(F, this, &CountPosition); 1401 assert(CountPosition == ProfileCountSize); 1402 1403 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile.")); 1404 } 1405 1406 // Assign the scaled count values to the BB with multiple out edges. 1407 void PGOUseFunc::setBranchWeights() { 1408 // Generate MD_prof metadata for every branch instruction. 1409 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName() 1410 << " IsCS=" << IsCS << "\n"); 1411 for (auto &BB : F) { 1412 Instruction *TI = BB.getTerminator(); 1413 if (TI->getNumSuccessors() < 2) 1414 continue; 1415 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || 1416 isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI))) 1417 continue; 1418 1419 if (getBBInfo(&BB).CountValue == 0) 1420 continue; 1421 1422 // We have a non-zero Branch BB. 1423 const UseBBInfo &BBCountInfo = getBBInfo(&BB); 1424 unsigned Size = BBCountInfo.OutEdges.size(); 1425 SmallVector<uint64_t, 2> EdgeCounts(Size, 0); 1426 uint64_t MaxCount = 0; 1427 for (unsigned s = 0; s < Size; s++) { 1428 const PGOUseEdge *E = BBCountInfo.OutEdges[s]; 1429 const BasicBlock *SrcBB = E->SrcBB; 1430 const BasicBlock *DestBB = E->DestBB; 1431 if (DestBB == nullptr) 1432 continue; 1433 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 1434 uint64_t EdgeCount = E->CountValue; 1435 if (EdgeCount > MaxCount) 1436 MaxCount = EdgeCount; 1437 EdgeCounts[SuccNum] = EdgeCount; 1438 } 1439 setProfMetadata(M, TI, EdgeCounts, MaxCount); 1440 } 1441 } 1442 1443 static bool isIndirectBrTarget(BasicBlock *BB) { 1444 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { 1445 if (isa<IndirectBrInst>((*PI)->getTerminator())) 1446 return true; 1447 } 1448 return false; 1449 } 1450 1451 void PGOUseFunc::annotateIrrLoopHeaderWeights() { 1452 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n"); 1453 // Find irr loop headers 1454 for (auto &BB : F) { 1455 // As a heuristic also annotate indrectbr targets as they have a high chance 1456 // to become an irreducible loop header after the indirectbr tail 1457 // duplication. 1458 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) { 1459 Instruction *TI = BB.getTerminator(); 1460 const UseBBInfo &BBCountInfo = getBBInfo(&BB); 1461 setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue); 1462 } 1463 } 1464 } 1465 1466 void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) { 1467 Module *M = F.getParent(); 1468 IRBuilder<> Builder(&SI); 1469 Type *Int64Ty = Builder.getInt64Ty(); 1470 Type *I8PtrTy = Builder.getInt8PtrTy(); 1471 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty); 1472 Builder.CreateCall( 1473 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step), 1474 {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), 1475 Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs), 1476 Builder.getInt32(*CurCtrIdx), Step}); 1477 ++(*CurCtrIdx); 1478 } 1479 1480 void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) { 1481 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts; 1482 assert(*CurCtrIdx < CountFromProfile.size() && 1483 "Out of bound access of counters"); 1484 uint64_t SCounts[2]; 1485 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count 1486 ++(*CurCtrIdx); 1487 uint64_t TotalCount = 0; 1488 auto BI = UseFunc->findBBInfo(SI.getParent()); 1489 if (BI != nullptr) 1490 TotalCount = BI->CountValue; 1491 // False Count 1492 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0); 1493 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]); 1494 if (MaxCount) 1495 setProfMetadata(F.getParent(), &SI, SCounts, MaxCount); 1496 } 1497 1498 void SelectInstVisitor::visitSelectInst(SelectInst &SI) { 1499 if (!PGOInstrSelect) 1500 return; 1501 // FIXME: do not handle this yet. 1502 if (SI.getCondition()->getType()->isVectorTy()) 1503 return; 1504 1505 switch (Mode) { 1506 case VM_counting: 1507 NSIs++; 1508 return; 1509 case VM_instrument: 1510 instrumentOneSelectInst(SI); 1511 return; 1512 case VM_annotate: 1513 annotateOneSelectInst(SI); 1514 return; 1515 } 1516 1517 llvm_unreachable("Unknown visiting mode"); 1518 } 1519 1520 // Traverse all valuesites and annotate the instructions for all value kind. 1521 void PGOUseFunc::annotateValueSites() { 1522 if (DisableValueProfiling) 1523 return; 1524 1525 // Create the PGOFuncName meta data. 1526 createPGOFuncNameMetadata(F, FuncInfo.FuncName); 1527 1528 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 1529 annotateValueSites(Kind); 1530 } 1531 1532 // Annotate the instructions for a specific value kind. 1533 void PGOUseFunc::annotateValueSites(uint32_t Kind) { 1534 assert(Kind <= IPVK_Last); 1535 unsigned ValueSiteIndex = 0; 1536 auto &ValueSites = FuncInfo.ValueSites[Kind]; 1537 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind); 1538 if (NumValueSites != ValueSites.size()) { 1539 auto &Ctx = M->getContext(); 1540 Ctx.diagnose(DiagnosticInfoPGOProfile( 1541 M->getName().data(), 1542 Twine("Inconsistent number of value sites for ") + 1543 Twine(ValueProfKindDescr[Kind]) + 1544 Twine(" profiling in \"") + F.getName().str() + 1545 Twine("\", possibly due to the use of a stale profile."), 1546 DS_Warning)); 1547 return; 1548 } 1549 1550 for (VPCandidateInfo &I : ValueSites) { 1551 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind 1552 << "): Index = " << ValueSiteIndex << " out of " 1553 << NumValueSites << "\n"); 1554 annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord, 1555 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex, 1556 Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations 1557 : MaxNumAnnotations); 1558 ValueSiteIndex++; 1559 } 1560 } 1561 1562 // Collect the set of members for each Comdat in module M and store 1563 // in ComdatMembers. 1564 static void collectComdatMembers( 1565 Module &M, 1566 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 1567 if (!DoComdatRenaming) 1568 return; 1569 for (Function &F : M) 1570 if (Comdat *C = F.getComdat()) 1571 ComdatMembers.insert(std::make_pair(C, &F)); 1572 for (GlobalVariable &GV : M.globals()) 1573 if (Comdat *C = GV.getComdat()) 1574 ComdatMembers.insert(std::make_pair(C, &GV)); 1575 for (GlobalAlias &GA : M.aliases()) 1576 if (Comdat *C = GA.getComdat()) 1577 ComdatMembers.insert(std::make_pair(C, &GA)); 1578 } 1579 1580 static bool InstrumentAllFunctions( 1581 Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI, 1582 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1583 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) { 1584 // For the context-sensitve instrumentation, we should have a separated pass 1585 // (before LTO/ThinLTO linking) to create these variables. 1586 if (!IsCS) 1587 createIRLevelProfileFlagVar(M, /* IsCS */ false, PGOInstrumentEntry); 1588 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1589 collectComdatMembers(M, ComdatMembers); 1590 1591 for (auto &F : M) { 1592 if (F.isDeclaration()) 1593 continue; 1594 if (F.hasFnAttribute(llvm::Attribute::NoProfile)) 1595 continue; 1596 auto &TLI = LookupTLI(F); 1597 auto *BPI = LookupBPI(F); 1598 auto *BFI = LookupBFI(F); 1599 instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS); 1600 } 1601 return true; 1602 } 1603 1604 PreservedAnalyses 1605 PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) { 1606 createProfileFileNameVar(M, CSInstrName); 1607 createIRLevelProfileFlagVar(M, /* IsCS */ true, PGOInstrumentEntry); 1608 return PreservedAnalyses::all(); 1609 } 1610 1611 bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) { 1612 if (skipModule(M)) 1613 return false; 1614 1615 auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & { 1616 return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 1617 }; 1618 auto LookupBPI = [this](Function &F) { 1619 return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); 1620 }; 1621 auto LookupBFI = [this](Function &F) { 1622 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); 1623 }; 1624 return InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS); 1625 } 1626 1627 PreservedAnalyses PGOInstrumentationGen::run(Module &M, 1628 ModuleAnalysisManager &AM) { 1629 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1630 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 1631 return FAM.getResult<TargetLibraryAnalysis>(F); 1632 }; 1633 auto LookupBPI = [&FAM](Function &F) { 1634 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1635 }; 1636 auto LookupBFI = [&FAM](Function &F) { 1637 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1638 }; 1639 1640 if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS)) 1641 return PreservedAnalyses::all(); 1642 1643 return PreservedAnalyses::none(); 1644 } 1645 1646 // Using the ratio b/w sums of profile count values and BFI count values to 1647 // adjust the func entry count. 1648 static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, 1649 BranchProbabilityInfo &NBPI) { 1650 Function &F = Func.getFunc(); 1651 BlockFrequencyInfo NBFI(F, NBPI, LI); 1652 #ifndef NDEBUG 1653 auto BFIEntryCount = F.getEntryCount(); 1654 assert(BFIEntryCount.hasValue() && (BFIEntryCount.getCount() > 0) && 1655 "Invalid BFI Entrycount"); 1656 #endif 1657 auto SumCount = APFloat::getZero(APFloat::IEEEdouble()); 1658 auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble()); 1659 for (auto &BBI : F) { 1660 uint64_t CountValue = 0; 1661 uint64_t BFICountValue = 0; 1662 if (!Func.findBBInfo(&BBI)) 1663 continue; 1664 auto BFICount = NBFI.getBlockProfileCount(&BBI); 1665 CountValue = Func.getBBInfo(&BBI).CountValue; 1666 BFICountValue = BFICount.getValue(); 1667 SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven); 1668 SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven); 1669 } 1670 if (SumCount.isZero()) 1671 return; 1672 1673 assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan && 1674 "Incorrect sum of BFI counts"); 1675 if (SumBFICount.compare(SumCount) == APFloat::cmpEqual) 1676 return; 1677 double Scale = (SumCount / SumBFICount).convertToDouble(); 1678 if (Scale < 1.001 && Scale > 0.999) 1679 return; 1680 1681 uint64_t FuncEntryCount = Func.getBBInfo(&*F.begin()).CountValue; 1682 uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale; 1683 if (NewEntryCount == 0) 1684 NewEntryCount = 1; 1685 if (NewEntryCount != FuncEntryCount) { 1686 F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real)); 1687 LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName() 1688 << ", entry_count " << FuncEntryCount << " --> " 1689 << NewEntryCount << "\n"); 1690 } 1691 } 1692 1693 // Compare the profile count values with BFI count values, and print out 1694 // the non-matching ones. 1695 static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, 1696 BranchProbabilityInfo &NBPI, 1697 uint64_t HotCountThreshold, 1698 uint64_t ColdCountThreshold) { 1699 Function &F = Func.getFunc(); 1700 BlockFrequencyInfo NBFI(F, NBPI, LI); 1701 // bool PrintFunc = false; 1702 bool HotBBOnly = PGOVerifyHotBFI; 1703 std::string Msg; 1704 OptimizationRemarkEmitter ORE(&F); 1705 1706 unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0; 1707 for (auto &BBI : F) { 1708 uint64_t CountValue = 0; 1709 uint64_t BFICountValue = 0; 1710 1711 if (Func.getBBInfo(&BBI).CountValid) 1712 CountValue = Func.getBBInfo(&BBI).CountValue; 1713 1714 BBNum++; 1715 if (CountValue) 1716 NonZeroBBNum++; 1717 auto BFICount = NBFI.getBlockProfileCount(&BBI); 1718 if (BFICount) 1719 BFICountValue = BFICount.getValue(); 1720 1721 if (HotBBOnly) { 1722 bool rawIsHot = CountValue >= HotCountThreshold; 1723 bool BFIIsHot = BFICountValue >= HotCountThreshold; 1724 bool rawIsCold = CountValue <= ColdCountThreshold; 1725 bool ShowCount = false; 1726 if (rawIsHot && !BFIIsHot) { 1727 Msg = "raw-Hot to BFI-nonHot"; 1728 ShowCount = true; 1729 } else if (rawIsCold && BFIIsHot) { 1730 Msg = "raw-Cold to BFI-Hot"; 1731 ShowCount = true; 1732 } 1733 if (!ShowCount) 1734 continue; 1735 } else { 1736 if ((CountValue < PGOVerifyBFICutoff) && 1737 (BFICountValue < PGOVerifyBFICutoff)) 1738 continue; 1739 uint64_t Diff = (BFICountValue >= CountValue) 1740 ? BFICountValue - CountValue 1741 : CountValue - BFICountValue; 1742 if (Diff < CountValue / 100 * PGOVerifyBFIRatio) 1743 continue; 1744 } 1745 BBMisMatchNum++; 1746 1747 ORE.emit([&]() { 1748 OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "bfi-verify", 1749 F.getSubprogram(), &BBI); 1750 Remark << "BB " << ore::NV("Block", BBI.getName()) 1751 << " Count=" << ore::NV("Count", CountValue) 1752 << " BFI_Count=" << ore::NV("Count", BFICountValue); 1753 if (!Msg.empty()) 1754 Remark << " (" << Msg << ")"; 1755 return Remark; 1756 }); 1757 } 1758 if (BBMisMatchNum) 1759 ORE.emit([&]() { 1760 return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify", 1761 F.getSubprogram(), &F.getEntryBlock()) 1762 << "In Func " << ore::NV("Function", F.getName()) 1763 << ": Num_of_BB=" << ore::NV("Count", BBNum) 1764 << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum) 1765 << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum); 1766 }); 1767 } 1768 1769 static bool annotateAllFunctions( 1770 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, 1771 function_ref<TargetLibraryInfo &(Function &)> LookupTLI, 1772 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1773 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, 1774 ProfileSummaryInfo *PSI, bool IsCS) { 1775 LLVM_DEBUG(dbgs() << "Read in profile counters: "); 1776 auto &Ctx = M.getContext(); 1777 // Read the counter array from file. 1778 auto ReaderOrErr = 1779 IndexedInstrProfReader::create(ProfileFileName, ProfileRemappingFileName); 1780 if (Error E = ReaderOrErr.takeError()) { 1781 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { 1782 Ctx.diagnose( 1783 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message())); 1784 }); 1785 return false; 1786 } 1787 1788 std::unique_ptr<IndexedInstrProfReader> PGOReader = 1789 std::move(ReaderOrErr.get()); 1790 if (!PGOReader) { 1791 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(), 1792 StringRef("Cannot get PGOReader"))); 1793 return false; 1794 } 1795 if (!PGOReader->hasCSIRLevelProfile() && IsCS) 1796 return false; 1797 1798 // TODO: might need to change the warning once the clang option is finalized. 1799 if (!PGOReader->isIRLevelProfile()) { 1800 Ctx.diagnose(DiagnosticInfoPGOProfile( 1801 ProfileFileName.data(), "Not an IR level instrumentation profile")); 1802 return false; 1803 } 1804 1805 // Add the profile summary (read from the header of the indexed summary) here 1806 // so that we can use it below when reading counters (which checks if the 1807 // function should be marked with a cold or inlinehint attribute). 1808 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()), 1809 IsCS ? ProfileSummary::PSK_CSInstr 1810 : ProfileSummary::PSK_Instr); 1811 PSI->refresh(); 1812 1813 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1814 collectComdatMembers(M, ComdatMembers); 1815 std::vector<Function *> HotFunctions; 1816 std::vector<Function *> ColdFunctions; 1817 1818 // If the profile marked as always instrument the entry BB, do the 1819 // same. Note this can be overwritten by the internal option in CFGMST.h 1820 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled(); 1821 if (PGOInstrumentEntry.getNumOccurrences() > 0) 1822 InstrumentFuncEntry = PGOInstrumentEntry; 1823 for (auto &F : M) { 1824 if (F.isDeclaration()) 1825 continue; 1826 auto &TLI = LookupTLI(F); 1827 auto *BPI = LookupBPI(F); 1828 auto *BFI = LookupBFI(F); 1829 // Split indirectbr critical edges here before computing the MST rather than 1830 // later in getInstrBB() to avoid invalidating it. 1831 SplitIndirectBrCriticalEdges(F, BPI, BFI); 1832 PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS, 1833 InstrumentFuncEntry); 1834 // When AllMinusOnes is true, it means the profile for the function 1835 // is unrepresentative and this function is actually hot. Set the 1836 // entry count of the function to be multiple times of hot threshold 1837 // and drop all its internal counters. 1838 bool AllMinusOnes = false; 1839 bool AllZeros = false; 1840 if (!Func.readCounters(PGOReader.get(), AllZeros, AllMinusOnes)) 1841 continue; 1842 if (AllZeros) { 1843 F.setEntryCount(ProfileCount(0, Function::PCT_Real)); 1844 if (Func.getProgramMaxCount() != 0) 1845 ColdFunctions.push_back(&F); 1846 continue; 1847 } 1848 const unsigned MultiplyFactor = 3; 1849 if (AllMinusOnes) { 1850 uint64_t HotThreshold = PSI->getHotCountThreshold(); 1851 if (HotThreshold) 1852 F.setEntryCount( 1853 ProfileCount(HotThreshold * MultiplyFactor, Function::PCT_Real)); 1854 HotFunctions.push_back(&F); 1855 continue; 1856 } 1857 Func.populateCounters(); 1858 Func.setBranchWeights(); 1859 Func.annotateValueSites(); 1860 Func.annotateIrrLoopHeaderWeights(); 1861 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr(); 1862 if (FreqAttr == PGOUseFunc::FFA_Cold) 1863 ColdFunctions.push_back(&F); 1864 else if (FreqAttr == PGOUseFunc::FFA_Hot) 1865 HotFunctions.push_back(&F); 1866 if (PGOViewCounts != PGOVCT_None && 1867 (ViewBlockFreqFuncName.empty() || 1868 F.getName().equals(ViewBlockFreqFuncName))) { 1869 LoopInfo LI{DominatorTree(F)}; 1870 std::unique_ptr<BranchProbabilityInfo> NewBPI = 1871 std::make_unique<BranchProbabilityInfo>(F, LI); 1872 std::unique_ptr<BlockFrequencyInfo> NewBFI = 1873 std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI); 1874 if (PGOViewCounts == PGOVCT_Graph) 1875 NewBFI->view(); 1876 else if (PGOViewCounts == PGOVCT_Text) { 1877 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n"; 1878 NewBFI->print(dbgs()); 1879 } 1880 } 1881 if (PGOViewRawCounts != PGOVCT_None && 1882 (ViewBlockFreqFuncName.empty() || 1883 F.getName().equals(ViewBlockFreqFuncName))) { 1884 if (PGOViewRawCounts == PGOVCT_Graph) 1885 if (ViewBlockFreqFuncName.empty()) 1886 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 1887 else 1888 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 1889 else if (PGOViewRawCounts == PGOVCT_Text) { 1890 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n"; 1891 Func.dumpInfo(); 1892 } 1893 } 1894 1895 if (PGOVerifyBFI || PGOVerifyHotBFI || PGOFixEntryCount) { 1896 LoopInfo LI{DominatorTree(F)}; 1897 BranchProbabilityInfo NBPI(F, LI); 1898 1899 // Fix func entry count. 1900 if (PGOFixEntryCount) 1901 fixFuncEntryCount(Func, LI, NBPI); 1902 1903 // Verify BlockFrequency information. 1904 uint64_t HotCountThreshold = 0, ColdCountThreshold = 0; 1905 if (PGOVerifyHotBFI) { 1906 HotCountThreshold = PSI->getOrCompHotCountThreshold(); 1907 ColdCountThreshold = PSI->getOrCompColdCountThreshold(); 1908 } 1909 verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold); 1910 } 1911 } 1912 1913 // Set function hotness attribute from the profile. 1914 // We have to apply these attributes at the end because their presence 1915 // can affect the BranchProbabilityInfo of any callers, resulting in an 1916 // inconsistent MST between prof-gen and prof-use. 1917 for (auto &F : HotFunctions) { 1918 F->addFnAttr(Attribute::InlineHint); 1919 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName() 1920 << "\n"); 1921 } 1922 for (auto &F : ColdFunctions) { 1923 // Only set when there is no Attribute::Hot set by the user. For Hot 1924 // attribute, user's annotation has the precedence over the profile. 1925 if (F->hasFnAttribute(Attribute::Hot)) { 1926 auto &Ctx = M.getContext(); 1927 std::string Msg = std::string("Function ") + F->getName().str() + 1928 std::string(" is annotated as a hot function but" 1929 " the profile is cold"); 1930 Ctx.diagnose( 1931 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning)); 1932 continue; 1933 } 1934 F->addFnAttr(Attribute::Cold); 1935 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() 1936 << "\n"); 1937 } 1938 return true; 1939 } 1940 1941 PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename, 1942 std::string RemappingFilename, 1943 bool IsCS) 1944 : ProfileFileName(std::move(Filename)), 1945 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) { 1946 if (!PGOTestProfileFile.empty()) 1947 ProfileFileName = PGOTestProfileFile; 1948 if (!PGOTestProfileRemappingFile.empty()) 1949 ProfileRemappingFileName = PGOTestProfileRemappingFile; 1950 } 1951 1952 PreservedAnalyses PGOInstrumentationUse::run(Module &M, 1953 ModuleAnalysisManager &AM) { 1954 1955 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1956 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 1957 return FAM.getResult<TargetLibraryAnalysis>(F); 1958 }; 1959 auto LookupBPI = [&FAM](Function &F) { 1960 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1961 }; 1962 auto LookupBFI = [&FAM](Function &F) { 1963 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1964 }; 1965 1966 auto *PSI = &AM.getResult<ProfileSummaryAnalysis>(M); 1967 1968 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, 1969 LookupTLI, LookupBPI, LookupBFI, PSI, IsCS)) 1970 return PreservedAnalyses::all(); 1971 1972 return PreservedAnalyses::none(); 1973 } 1974 1975 bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) { 1976 if (skipModule(M)) 1977 return false; 1978 1979 auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & { 1980 return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 1981 }; 1982 auto LookupBPI = [this](Function &F) { 1983 return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); 1984 }; 1985 auto LookupBFI = [this](Function &F) { 1986 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); 1987 }; 1988 1989 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 1990 return annotateAllFunctions(M, ProfileFileName, "", LookupTLI, LookupBPI, 1991 LookupBFI, PSI, IsCS); 1992 } 1993 1994 static std::string getSimpleNodeName(const BasicBlock *Node) { 1995 if (!Node->getName().empty()) 1996 return std::string(Node->getName()); 1997 1998 std::string SimpleNodeName; 1999 raw_string_ostream OS(SimpleNodeName); 2000 Node->printAsOperand(OS, false); 2001 return OS.str(); 2002 } 2003 2004 void llvm::setProfMetadata(Module *M, Instruction *TI, 2005 ArrayRef<uint64_t> EdgeCounts, 2006 uint64_t MaxCount) { 2007 MDBuilder MDB(M->getContext()); 2008 assert(MaxCount > 0 && "Bad max count"); 2009 uint64_t Scale = calculateCountScale(MaxCount); 2010 SmallVector<unsigned, 4> Weights; 2011 for (const auto &ECI : EdgeCounts) 2012 Weights.push_back(scaleBranchCount(ECI, Scale)); 2013 2014 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W 2015 : Weights) { 2016 dbgs() << W << " "; 2017 } dbgs() << "\n";); 2018 2019 TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 2020 if (EmitBranchProbability) { 2021 std::string BrCondStr = getBranchCondString(TI); 2022 if (BrCondStr.empty()) 2023 return; 2024 2025 uint64_t WSum = 2026 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0, 2027 [](uint64_t w1, uint64_t w2) { return w1 + w2; }); 2028 uint64_t TotalCount = 2029 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0, 2030 [](uint64_t c1, uint64_t c2) { return c1 + c2; }); 2031 Scale = calculateCountScale(WSum); 2032 BranchProbability BP(scaleBranchCount(Weights[0], Scale), 2033 scaleBranchCount(WSum, Scale)); 2034 std::string BranchProbStr; 2035 raw_string_ostream OS(BranchProbStr); 2036 OS << BP; 2037 OS << " (total count : " << TotalCount << ")"; 2038 OS.flush(); 2039 Function *F = TI->getParent()->getParent(); 2040 OptimizationRemarkEmitter ORE(F); 2041 ORE.emit([&]() { 2042 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI) 2043 << BrCondStr << " is true with probability : " << BranchProbStr; 2044 }); 2045 } 2046 } 2047 2048 namespace llvm { 2049 2050 void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) { 2051 MDBuilder MDB(M->getContext()); 2052 TI->setMetadata(llvm::LLVMContext::MD_irr_loop, 2053 MDB.createIrrLoopHeaderWeight(Count)); 2054 } 2055 2056 template <> struct GraphTraits<PGOUseFunc *> { 2057 using NodeRef = const BasicBlock *; 2058 using ChildIteratorType = const_succ_iterator; 2059 using nodes_iterator = pointer_iterator<Function::const_iterator>; 2060 2061 static NodeRef getEntryNode(const PGOUseFunc *G) { 2062 return &G->getFunc().front(); 2063 } 2064 2065 static ChildIteratorType child_begin(const NodeRef N) { 2066 return succ_begin(N); 2067 } 2068 2069 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); } 2070 2071 static nodes_iterator nodes_begin(const PGOUseFunc *G) { 2072 return nodes_iterator(G->getFunc().begin()); 2073 } 2074 2075 static nodes_iterator nodes_end(const PGOUseFunc *G) { 2076 return nodes_iterator(G->getFunc().end()); 2077 } 2078 }; 2079 2080 template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits { 2081 explicit DOTGraphTraits(bool isSimple = false) 2082 : DefaultDOTGraphTraits(isSimple) {} 2083 2084 static std::string getGraphName(const PGOUseFunc *G) { 2085 return std::string(G->getFunc().getName()); 2086 } 2087 2088 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) { 2089 std::string Result; 2090 raw_string_ostream OS(Result); 2091 2092 OS << getSimpleNodeName(Node) << ":\\l"; 2093 UseBBInfo *BI = Graph->findBBInfo(Node); 2094 OS << "Count : "; 2095 if (BI && BI->CountValid) 2096 OS << BI->CountValue << "\\l"; 2097 else 2098 OS << "Unknown\\l"; 2099 2100 if (!PGOInstrSelect) 2101 return Result; 2102 2103 for (auto BI = Node->begin(); BI != Node->end(); ++BI) { 2104 auto *I = &*BI; 2105 if (!isa<SelectInst>(I)) 2106 continue; 2107 // Display scaled counts for SELECT instruction: 2108 OS << "SELECT : { T = "; 2109 uint64_t TC, FC; 2110 bool HasProf = I->extractProfMetadata(TC, FC); 2111 if (!HasProf) 2112 OS << "Unknown, F = Unknown }\\l"; 2113 else 2114 OS << TC << ", F = " << FC << " }\\l"; 2115 } 2116 return Result; 2117 } 2118 }; 2119 2120 } // end namespace llvm 2121