1 //===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements PGO instrumentation using a minimum spanning tree based 10 // on the following paper: 11 // [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points 12 // for program frequency counts. BIT Numerical Mathematics 1973, Volume 13, 13 // Issue 3, pp 313-322 14 // The idea of the algorithm based on the fact that for each node (except for 15 // the entry and exit), the sum of incoming edge counts equals the sum of 16 // outgoing edge counts. The count of edge on spanning tree can be derived from 17 // those edges not on the spanning tree. Knuth proves this method instruments 18 // the minimum number of edges. 19 // 20 // The minimal spanning tree here is actually a maximum weight tree -- on-tree 21 // edges have higher frequencies (more likely to execute). The idea is to 22 // instrument those less frequently executed edges to reduce the runtime 23 // overhead of instrumented binaries. 24 // 25 // This file contains two passes: 26 // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge 27 // count profile, and generates the instrumentation for indirect call 28 // profiling. 29 // (2) Pass PGOInstrumentationUse which reads the edge count profile and 30 // annotates the branch weights. It also reads the indirect call value 31 // profiling records and annotate the indirect call instructions. 32 // 33 // To get the precise counter information, These two passes need to invoke at 34 // the same compilation point (so they see the same IR). For pass 35 // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For 36 // pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and 37 // the profile is opened in module level and passed to each PGOUseFunc instance. 38 // The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put 39 // in class FuncPGOInstrumentation. 40 // 41 // Class PGOEdge represents a CFG edge and some auxiliary information. Class 42 // BBInfo contains auxiliary information for each BB. These two classes are used 43 // in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived 44 // class of PGOEdge and BBInfo, respectively. They contains extra data structure 45 // used in populating profile counters. 46 // The MST implementation is in Class CFGMST (CFGMST.h). 47 // 48 //===----------------------------------------------------------------------===// 49 50 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" 51 #include "CFGMST.h" 52 #include "ValueProfileCollector.h" 53 #include "llvm/ADT/APInt.h" 54 #include "llvm/ADT/ArrayRef.h" 55 #include "llvm/ADT/MapVector.h" 56 #include "llvm/ADT/STLExtras.h" 57 #include "llvm/ADT/SmallVector.h" 58 #include "llvm/ADT/Statistic.h" 59 #include "llvm/ADT/StringRef.h" 60 #include "llvm/ADT/Triple.h" 61 #include "llvm/ADT/Twine.h" 62 #include "llvm/ADT/iterator.h" 63 #include "llvm/ADT/iterator_range.h" 64 #include "llvm/Analysis/BlockFrequencyInfo.h" 65 #include "llvm/Analysis/BranchProbabilityInfo.h" 66 #include "llvm/Analysis/CFG.h" 67 #include "llvm/Analysis/EHPersonalities.h" 68 #include "llvm/Analysis/LoopInfo.h" 69 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 70 #include "llvm/Analysis/ProfileSummaryInfo.h" 71 #include "llvm/IR/Attributes.h" 72 #include "llvm/IR/BasicBlock.h" 73 #include "llvm/IR/CFG.h" 74 #include "llvm/IR/Comdat.h" 75 #include "llvm/IR/Constant.h" 76 #include "llvm/IR/Constants.h" 77 #include "llvm/IR/DiagnosticInfo.h" 78 #include "llvm/IR/Dominators.h" 79 #include "llvm/IR/Function.h" 80 #include "llvm/IR/GlobalAlias.h" 81 #include "llvm/IR/GlobalValue.h" 82 #include "llvm/IR/GlobalVariable.h" 83 #include "llvm/IR/IRBuilder.h" 84 #include "llvm/IR/InstVisitor.h" 85 #include "llvm/IR/InstrTypes.h" 86 #include "llvm/IR/Instruction.h" 87 #include "llvm/IR/Instructions.h" 88 #include "llvm/IR/IntrinsicInst.h" 89 #include "llvm/IR/Intrinsics.h" 90 #include "llvm/IR/LLVMContext.h" 91 #include "llvm/IR/MDBuilder.h" 92 #include "llvm/IR/Module.h" 93 #include "llvm/IR/PassManager.h" 94 #include "llvm/IR/ProfileSummary.h" 95 #include "llvm/IR/Type.h" 96 #include "llvm/IR/Value.h" 97 #include "llvm/InitializePasses.h" 98 #include "llvm/Pass.h" 99 #include "llvm/ProfileData/InstrProf.h" 100 #include "llvm/ProfileData/InstrProfReader.h" 101 #include "llvm/Support/BranchProbability.h" 102 #include "llvm/Support/CRC.h" 103 #include "llvm/Support/Casting.h" 104 #include "llvm/Support/CommandLine.h" 105 #include "llvm/Support/DOTGraphTraits.h" 106 #include "llvm/Support/Debug.h" 107 #include "llvm/Support/Error.h" 108 #include "llvm/Support/ErrorHandling.h" 109 #include "llvm/Support/GraphWriter.h" 110 #include "llvm/Support/raw_ostream.h" 111 #include "llvm/Transforms/Instrumentation.h" 112 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 113 #include "llvm/Transforms/Utils/ModuleUtils.h" 114 #include <algorithm> 115 #include <cassert> 116 #include <cstdint> 117 #include <memory> 118 #include <numeric> 119 #include <string> 120 #include <unordered_map> 121 #include <utility> 122 #include <vector> 123 124 using namespace llvm; 125 using ProfileCount = Function::ProfileCount; 126 using VPCandidateInfo = ValueProfileCollector::CandidateInfo; 127 128 #define DEBUG_TYPE "pgo-instrumentation" 129 130 STATISTIC(NumOfPGOInstrument, "Number of edges instrumented."); 131 STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented."); 132 STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented."); 133 STATISTIC(NumOfPGOEdge, "Number of edges."); 134 STATISTIC(NumOfPGOBB, "Number of basic-blocks."); 135 STATISTIC(NumOfPGOSplit, "Number of critical edge splits."); 136 STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts."); 137 STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile."); 138 STATISTIC(NumOfPGOMissing, "Number of functions without profile."); 139 STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations."); 140 STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO."); 141 STATISTIC(NumOfCSPGOSelectInsts, 142 "Number of select instruction instrumented in CSPGO."); 143 STATISTIC(NumOfCSPGOMemIntrinsics, 144 "Number of mem intrinsics instrumented in CSPGO."); 145 STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO."); 146 STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO."); 147 STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO."); 148 STATISTIC(NumOfCSPGOFunc, 149 "Number of functions having valid profile counts in CSPGO."); 150 STATISTIC(NumOfCSPGOMismatch, 151 "Number of functions having mismatch profile in CSPGO."); 152 STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO."); 153 154 // Command line option to specify the file to read profile from. This is 155 // mainly used for testing. 156 static cl::opt<std::string> 157 PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, 158 cl::value_desc("filename"), 159 cl::desc("Specify the path of profile data file. This is" 160 "mainly for test purpose.")); 161 static cl::opt<std::string> PGOTestProfileRemappingFile( 162 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, 163 cl::value_desc("filename"), 164 cl::desc("Specify the path of profile remapping file. This is mainly for " 165 "test purpose.")); 166 167 // Command line option to disable value profiling. The default is false: 168 // i.e. value profiling is enabled by default. This is for debug purpose. 169 static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false), 170 cl::Hidden, 171 cl::desc("Disable Value Profiling")); 172 173 // Command line option to set the maximum number of VP annotations to write to 174 // the metadata for a single indirect call callsite. 175 static cl::opt<unsigned> MaxNumAnnotations( 176 "icp-max-annotations", cl::init(3), cl::Hidden, cl::ZeroOrMore, 177 cl::desc("Max number of annotations for a single indirect " 178 "call callsite")); 179 180 // Command line option to set the maximum number of value annotations 181 // to write to the metadata for a single memop intrinsic. 182 static cl::opt<unsigned> MaxNumMemOPAnnotations( 183 "memop-max-annotations", cl::init(4), cl::Hidden, cl::ZeroOrMore, 184 cl::desc("Max number of preicise value annotations for a single memop" 185 "intrinsic")); 186 187 // Command line option to control appending FunctionHash to the name of a COMDAT 188 // function. This is to avoid the hash mismatch caused by the preinliner. 189 static cl::opt<bool> DoComdatRenaming( 190 "do-comdat-renaming", cl::init(false), cl::Hidden, 191 cl::desc("Append function hash to the name of COMDAT function to avoid " 192 "function hash mismatch due to the preinliner")); 193 194 // Command line option to enable/disable the warning about missing profile 195 // information. 196 static cl::opt<bool> 197 PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden, 198 cl::desc("Use this option to turn on/off " 199 "warnings about missing profile data for " 200 "functions.")); 201 202 namespace llvm { 203 // Command line option to enable/disable the warning about a hash mismatch in 204 // the profile data. 205 cl::opt<bool> 206 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden, 207 cl::desc("Use this option to turn off/on " 208 "warnings about profile cfg mismatch.")); 209 } // namespace llvm 210 211 // Command line option to enable/disable the warning about a hash mismatch in 212 // the profile data for Comdat functions, which often turns out to be false 213 // positive due to the pre-instrumentation inline. 214 static cl::opt<bool> 215 NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", cl::init(true), 216 cl::Hidden, 217 cl::desc("The option is used to turn on/off " 218 "warnings about hash mismatch for comdat " 219 "functions.")); 220 221 // Command line option to enable/disable select instruction instrumentation. 222 static cl::opt<bool> 223 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, 224 cl::desc("Use this option to turn on/off SELECT " 225 "instruction instrumentation. ")); 226 227 // Command line option to turn on CFG dot or text dump of raw profile counts 228 static cl::opt<PGOViewCountsType> PGOViewRawCounts( 229 "pgo-view-raw-counts", cl::Hidden, 230 cl::desc("A boolean option to show CFG dag or text " 231 "with raw profile counts from " 232 "profile data. See also option " 233 "-pgo-view-counts. To limit graph " 234 "display to only one function, use " 235 "filtering option -view-bfi-func-name."), 236 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), 237 clEnumValN(PGOVCT_Graph, "graph", "show a graph."), 238 clEnumValN(PGOVCT_Text, "text", "show in text."))); 239 240 // Command line option to enable/disable memop intrinsic call.size profiling. 241 static cl::opt<bool> 242 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, 243 cl::desc("Use this option to turn on/off " 244 "memory intrinsic size profiling.")); 245 246 // Emit branch probability as optimization remarks. 247 static cl::opt<bool> 248 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, 249 cl::desc("When this option is on, the annotated " 250 "branch probability will be emitted as " 251 "optimization remarks: -{Rpass|" 252 "pass-remarks}=pgo-instrumentation")); 253 254 static cl::opt<bool> PGOInstrumentEntry( 255 "pgo-instrument-entry", cl::init(false), cl::Hidden, 256 cl::desc("Force to instrument function entry basicblock.")); 257 258 static cl::opt<bool> PGOFunctionEntryCoverage( 259 "pgo-function-entry-coverage", cl::init(false), cl::Hidden, cl::ZeroOrMore, 260 cl::desc( 261 "Use this option to enable function entry coverage instrumentation.")); 262 263 static cl::opt<bool> 264 PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, 265 cl::desc("Fix function entry count in profile use.")); 266 267 static cl::opt<bool> PGOVerifyHotBFI( 268 "pgo-verify-hot-bfi", cl::init(false), cl::Hidden, 269 cl::desc("Print out the non-match BFI count if a hot raw profile count " 270 "becomes non-hot, or a cold raw profile count becomes hot. " 271 "The print is enabled under -Rpass-analysis=pgo, or " 272 "internal option -pass-remakrs-analysis=pgo.")); 273 274 static cl::opt<bool> PGOVerifyBFI( 275 "pgo-verify-bfi", cl::init(false), cl::Hidden, 276 cl::desc("Print out mismatched BFI counts after setting profile metadata " 277 "The print is enabled under -Rpass-analysis=pgo, or " 278 "internal option -pass-remakrs-analysis=pgo.")); 279 280 static cl::opt<unsigned> PGOVerifyBFIRatio( 281 "pgo-verify-bfi-ratio", cl::init(2), cl::Hidden, 282 cl::desc("Set the threshold for pgo-verify-bfi: only print out " 283 "mismatched BFI if the difference percentage is greater than " 284 "this value (in percentage).")); 285 286 static cl::opt<unsigned> PGOVerifyBFICutoff( 287 "pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden, 288 cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " 289 "profile count value is below.")); 290 291 namespace llvm { 292 // Command line option to turn on CFG dot dump after profile annotation. 293 // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts 294 extern cl::opt<PGOViewCountsType> PGOViewCounts; 295 296 // Command line option to specify the name of the function for CFG dump 297 // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= 298 extern cl::opt<std::string> ViewBlockFreqFuncName; 299 300 extern cl::opt<bool> DebugInfoCorrelate; 301 } // namespace llvm 302 303 static cl::opt<bool> 304 PGOOldCFGHashing("pgo-instr-old-cfg-hashing", cl::init(false), cl::Hidden, 305 cl::desc("Use the old CFG function hashing")); 306 307 // Return a string describing the branch condition that can be 308 // used in static branch probability heuristics: 309 static std::string getBranchCondString(Instruction *TI) { 310 BranchInst *BI = dyn_cast<BranchInst>(TI); 311 if (!BI || !BI->isConditional()) 312 return std::string(); 313 314 Value *Cond = BI->getCondition(); 315 ICmpInst *CI = dyn_cast<ICmpInst>(Cond); 316 if (!CI) 317 return std::string(); 318 319 std::string result; 320 raw_string_ostream OS(result); 321 OS << CmpInst::getPredicateName(CI->getPredicate()) << "_"; 322 CI->getOperand(0)->getType()->print(OS, true); 323 324 Value *RHS = CI->getOperand(1); 325 ConstantInt *CV = dyn_cast<ConstantInt>(RHS); 326 if (CV) { 327 if (CV->isZero()) 328 OS << "_Zero"; 329 else if (CV->isOne()) 330 OS << "_One"; 331 else if (CV->isMinusOne()) 332 OS << "_MinusOne"; 333 else 334 OS << "_Const"; 335 } 336 OS.flush(); 337 return result; 338 } 339 340 static const char *ValueProfKindDescr[] = { 341 #define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr, 342 #include "llvm/ProfileData/InstrProfData.inc" 343 }; 344 345 // Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime 346 // aware this is an ir_level profile so it can set the version flag. 347 static GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS) { 348 const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)); 349 Type *IntTy64 = Type::getInt64Ty(M.getContext()); 350 uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF); 351 if (IsCS) 352 ProfileVersion |= VARIANT_MASK_CSIR_PROF; 353 if (PGOInstrumentEntry) 354 ProfileVersion |= VARIANT_MASK_INSTR_ENTRY; 355 if (DebugInfoCorrelate) 356 ProfileVersion |= VARIANT_MASK_DBG_CORRELATE; 357 if (PGOFunctionEntryCoverage) 358 ProfileVersion |= 359 VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY; 360 auto IRLevelVersionVariable = new GlobalVariable( 361 M, IntTy64, true, GlobalValue::WeakAnyLinkage, 362 Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName); 363 IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility); 364 Triple TT(M.getTargetTriple()); 365 if (TT.supportsCOMDAT()) { 366 IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage); 367 IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName)); 368 } 369 return IRLevelVersionVariable; 370 } 371 372 namespace { 373 374 /// The select instruction visitor plays three roles specified 375 /// by the mode. In \c VM_counting mode, it simply counts the number of 376 /// select instructions. In \c VM_instrument mode, it inserts code to count 377 /// the number times TrueValue of select is taken. In \c VM_annotate mode, 378 /// it reads the profile data and annotate the select instruction with metadata. 379 enum VisitMode { VM_counting, VM_instrument, VM_annotate }; 380 class PGOUseFunc; 381 382 /// Instruction Visitor class to visit select instructions. 383 struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> { 384 Function &F; 385 unsigned NSIs = 0; // Number of select instructions instrumented. 386 VisitMode Mode = VM_counting; // Visiting mode. 387 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index. 388 unsigned TotalNumCtrs = 0; // Total number of counters 389 GlobalVariable *FuncNameVar = nullptr; 390 uint64_t FuncHash = 0; 391 PGOUseFunc *UseFunc = nullptr; 392 393 SelectInstVisitor(Function &Func) : F(Func) {} 394 395 void countSelects(Function &Func) { 396 NSIs = 0; 397 Mode = VM_counting; 398 visit(Func); 399 } 400 401 // Visit the IR stream and instrument all select instructions. \p 402 // Ind is a pointer to the counter index variable; \p TotalNC 403 // is the total number of counters; \p FNV is the pointer to the 404 // PGO function name var; \p FHash is the function hash. 405 void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC, 406 GlobalVariable *FNV, uint64_t FHash) { 407 Mode = VM_instrument; 408 CurCtrIdx = Ind; 409 TotalNumCtrs = TotalNC; 410 FuncHash = FHash; 411 FuncNameVar = FNV; 412 visit(Func); 413 } 414 415 // Visit the IR stream and annotate all select instructions. 416 void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) { 417 Mode = VM_annotate; 418 UseFunc = UF; 419 CurCtrIdx = Ind; 420 visit(Func); 421 } 422 423 void instrumentOneSelectInst(SelectInst &SI); 424 void annotateOneSelectInst(SelectInst &SI); 425 426 // Visit \p SI instruction and perform tasks according to visit mode. 427 void visitSelectInst(SelectInst &SI); 428 429 // Return the number of select instructions. This needs be called after 430 // countSelects(). 431 unsigned getNumOfSelectInsts() const { return NSIs; } 432 }; 433 434 435 class PGOInstrumentationGenLegacyPass : public ModulePass { 436 public: 437 static char ID; 438 439 PGOInstrumentationGenLegacyPass(bool IsCS = false) 440 : ModulePass(ID), IsCS(IsCS) { 441 initializePGOInstrumentationGenLegacyPassPass( 442 *PassRegistry::getPassRegistry()); 443 } 444 445 StringRef getPassName() const override { return "PGOInstrumentationGenPass"; } 446 447 private: 448 // Is this is context-sensitive instrumentation. 449 bool IsCS; 450 bool runOnModule(Module &M) override; 451 452 void getAnalysisUsage(AnalysisUsage &AU) const override { 453 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 454 AU.addRequired<TargetLibraryInfoWrapperPass>(); 455 } 456 }; 457 458 class PGOInstrumentationUseLegacyPass : public ModulePass { 459 public: 460 static char ID; 461 462 // Provide the profile filename as the parameter. 463 PGOInstrumentationUseLegacyPass(std::string Filename = "", bool IsCS = false) 464 : ModulePass(ID), ProfileFileName(std::move(Filename)), IsCS(IsCS) { 465 if (!PGOTestProfileFile.empty()) 466 ProfileFileName = PGOTestProfileFile; 467 initializePGOInstrumentationUseLegacyPassPass( 468 *PassRegistry::getPassRegistry()); 469 } 470 471 StringRef getPassName() const override { return "PGOInstrumentationUsePass"; } 472 473 private: 474 std::string ProfileFileName; 475 // Is this is context-sensitive instrumentation use. 476 bool IsCS; 477 478 bool runOnModule(Module &M) override; 479 480 void getAnalysisUsage(AnalysisUsage &AU) const override { 481 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 482 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 483 AU.addRequired<TargetLibraryInfoWrapperPass>(); 484 } 485 }; 486 487 class PGOInstrumentationGenCreateVarLegacyPass : public ModulePass { 488 public: 489 static char ID; 490 StringRef getPassName() const override { 491 return "PGOInstrumentationGenCreateVarPass"; 492 } 493 PGOInstrumentationGenCreateVarLegacyPass(std::string CSInstrName = "") 494 : ModulePass(ID), InstrProfileOutput(CSInstrName) { 495 initializePGOInstrumentationGenCreateVarLegacyPassPass( 496 *PassRegistry::getPassRegistry()); 497 } 498 499 private: 500 bool runOnModule(Module &M) override { 501 createProfileFileNameVar(M, InstrProfileOutput); 502 // The variable in a comdat may be discarded by LTO. Ensure the 503 // declaration will be retained. 504 appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true)); 505 return false; 506 } 507 std::string InstrProfileOutput; 508 }; 509 510 } // end anonymous namespace 511 512 char PGOInstrumentationGenLegacyPass::ID = 0; 513 514 INITIALIZE_PASS_BEGIN(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", 515 "PGO instrumentation.", false, false) 516 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 517 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 518 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 519 INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", 520 "PGO instrumentation.", false, false) 521 522 ModulePass *llvm::createPGOInstrumentationGenLegacyPass(bool IsCS) { 523 return new PGOInstrumentationGenLegacyPass(IsCS); 524 } 525 526 char PGOInstrumentationUseLegacyPass::ID = 0; 527 528 INITIALIZE_PASS_BEGIN(PGOInstrumentationUseLegacyPass, "pgo-instr-use", 529 "Read PGO instrumentation profile.", false, false) 530 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 531 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 532 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 533 INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use", 534 "Read PGO instrumentation profile.", false, false) 535 536 ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename, 537 bool IsCS) { 538 return new PGOInstrumentationUseLegacyPass(Filename.str(), IsCS); 539 } 540 541 char PGOInstrumentationGenCreateVarLegacyPass::ID = 0; 542 543 INITIALIZE_PASS(PGOInstrumentationGenCreateVarLegacyPass, 544 "pgo-instr-gen-create-var", 545 "Create PGO instrumentation version variable for CSPGO.", false, 546 false) 547 548 ModulePass * 549 llvm::createPGOInstrumentationGenCreateVarLegacyPass(StringRef CSInstrName) { 550 return new PGOInstrumentationGenCreateVarLegacyPass(std::string(CSInstrName)); 551 } 552 553 namespace { 554 555 /// An MST based instrumentation for PGO 556 /// 557 /// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO 558 /// in the function level. 559 struct PGOEdge { 560 // This class implements the CFG edges. Note the CFG can be a multi-graph. 561 // So there might be multiple edges with same SrcBB and DestBB. 562 const BasicBlock *SrcBB; 563 const BasicBlock *DestBB; 564 uint64_t Weight; 565 bool InMST = false; 566 bool Removed = false; 567 bool IsCritical = false; 568 569 PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) 570 : SrcBB(Src), DestBB(Dest), Weight(W) {} 571 572 // Return the information string of an edge. 573 std::string infoString() const { 574 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") + 575 (IsCritical ? "c" : " ") + " W=" + Twine(Weight)).str(); 576 } 577 }; 578 579 // This class stores the auxiliary information for each BB. 580 struct BBInfo { 581 BBInfo *Group; 582 uint32_t Index; 583 uint32_t Rank = 0; 584 585 BBInfo(unsigned IX) : Group(this), Index(IX) {} 586 587 // Return the information string of this object. 588 std::string infoString() const { 589 return (Twine("Index=") + Twine(Index)).str(); 590 } 591 592 // Empty function -- only applicable to UseBBInfo. 593 void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} 594 595 // Empty function -- only applicable to UseBBInfo. 596 void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} 597 }; 598 599 // This class implements the CFG edges. Note the CFG can be a multi-graph. 600 template <class Edge, class BBInfo> class FuncPGOInstrumentation { 601 private: 602 Function &F; 603 604 // Is this is context-sensitive instrumentation. 605 bool IsCS; 606 607 // A map that stores the Comdat group in function F. 608 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers; 609 610 ValueProfileCollector VPC; 611 612 void computeCFGHash(); 613 void renameComdatFunction(); 614 615 public: 616 std::vector<std::vector<VPCandidateInfo>> ValueSites; 617 SelectInstVisitor SIVisitor; 618 std::string FuncName; 619 GlobalVariable *FuncNameVar; 620 621 // CFG hash value for this function. 622 uint64_t FunctionHash = 0; 623 624 // The Minimum Spanning Tree of function CFG. 625 CFGMST<Edge, BBInfo> MST; 626 627 // Collect all the BBs that will be instrumented, and store them in 628 // InstrumentBBs. 629 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs); 630 631 // Give an edge, find the BB that will be instrumented. 632 // Return nullptr if there is no BB to be instrumented. 633 BasicBlock *getInstrBB(Edge *E); 634 635 // Return the auxiliary BB information. 636 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); } 637 638 // Return the auxiliary BB information if available. 639 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); } 640 641 // Dump edges and BB information. 642 void dumpInfo(std::string Str = "") const { 643 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " + 644 Twine(FunctionHash) + "\t" + Str); 645 } 646 647 FuncPGOInstrumentation( 648 Function &Func, TargetLibraryInfo &TLI, 649 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 650 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, 651 BlockFrequencyInfo *BFI = nullptr, bool IsCS = false, 652 bool InstrumentFuncEntry = true) 653 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI), 654 ValueSites(IPVK_Last + 1), SIVisitor(Func), 655 MST(F, InstrumentFuncEntry, BPI, BFI) { 656 // This should be done before CFG hash computation. 657 SIVisitor.countSelects(Func); 658 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize); 659 if (!IsCS) { 660 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 661 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 662 NumOfPGOBB += MST.BBInfos.size(); 663 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget); 664 } else { 665 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 666 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 667 NumOfCSPGOBB += MST.BBInfos.size(); 668 } 669 670 FuncName = getPGOFuncName(F); 671 computeCFGHash(); 672 if (!ComdatMembers.empty()) 673 renameComdatFunction(); 674 LLVM_DEBUG(dumpInfo("after CFGMST")); 675 676 for (auto &E : MST.AllEdges) { 677 if (E->Removed) 678 continue; 679 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++; 680 if (!E->InMST) 681 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++; 682 } 683 684 if (CreateGlobalVar) 685 FuncNameVar = createPGOFuncNameVar(F, FuncName); 686 } 687 }; 688 689 } // end anonymous namespace 690 691 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index 692 // value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers 693 // of selects, indirect calls, mem ops and edges. 694 template <class Edge, class BBInfo> 695 void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { 696 std::vector<uint8_t> Indexes; 697 JamCRC JC; 698 for (auto &BB : F) { 699 const Instruction *TI = BB.getTerminator(); 700 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { 701 BasicBlock *Succ = TI->getSuccessor(I); 702 auto BI = findBBInfo(Succ); 703 if (BI == nullptr) 704 continue; 705 uint32_t Index = BI->Index; 706 for (int J = 0; J < 4; J++) 707 Indexes.push_back((uint8_t)(Index >> (J * 8))); 708 } 709 } 710 JC.update(Indexes); 711 712 JamCRC JCH; 713 if (PGOOldCFGHashing) { 714 // Hash format for context sensitive profile. Reserve 4 bits for other 715 // information. 716 FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 | 717 (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 | 718 //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 | 719 (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC(); 720 } else { 721 // The higher 32 bits. 722 auto updateJCH = [&JCH](uint64_t Num) { 723 uint8_t Data[8]; 724 support::endian::write64le(Data, Num); 725 JCH.update(Data); 726 }; 727 updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts()); 728 updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size()); 729 updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size()); 730 updateJCH((uint64_t)MST.AllEdges.size()); 731 732 // Hash format for context sensitive profile. Reserve 4 bits for other 733 // information. 734 FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC(); 735 } 736 737 // Reserve bit 60-63 for other information purpose. 738 FunctionHash &= 0x0FFFFFFFFFFFFFFF; 739 if (IsCS) 740 NamedInstrProfRecord::setCSFlagInHash(FunctionHash); 741 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n" 742 << " CRC = " << JC.getCRC() 743 << ", Selects = " << SIVisitor.getNumOfSelectInsts() 744 << ", Edges = " << MST.AllEdges.size() << ", ICSites = " 745 << ValueSites[IPVK_IndirectCallTarget].size()); 746 if (!PGOOldCFGHashing) { 747 LLVM_DEBUG(dbgs() << ", Memops = " << ValueSites[IPVK_MemOPSize].size() 748 << ", High32 CRC = " << JCH.getCRC()); 749 } 750 LLVM_DEBUG(dbgs() << ", Hash = " << FunctionHash << "\n";); 751 } 752 753 // Check if we can safely rename this Comdat function. 754 static bool canRenameComdat( 755 Function &F, 756 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 757 if (!DoComdatRenaming || !canRenameComdatFunc(F, true)) 758 return false; 759 760 // FIXME: Current only handle those Comdat groups that only containing one 761 // function. 762 // (1) For a Comdat group containing multiple functions, we need to have a 763 // unique postfix based on the hashes for each function. There is a 764 // non-trivial code refactoring to do this efficiently. 765 // (2) Variables can not be renamed, so we can not rename Comdat function in a 766 // group including global vars. 767 Comdat *C = F.getComdat(); 768 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) { 769 assert(!isa<GlobalAlias>(CM.second)); 770 Function *FM = dyn_cast<Function>(CM.second); 771 if (FM != &F) 772 return false; 773 } 774 return true; 775 } 776 777 // Append the CFGHash to the Comdat function name. 778 template <class Edge, class BBInfo> 779 void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() { 780 if (!canRenameComdat(F, ComdatMembers)) 781 return; 782 std::string OrigName = F.getName().str(); 783 std::string NewFuncName = 784 Twine(F.getName() + "." + Twine(FunctionHash)).str(); 785 F.setName(Twine(NewFuncName)); 786 GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F); 787 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str(); 788 Comdat *NewComdat; 789 Module *M = F.getParent(); 790 // For AvailableExternallyLinkage functions, change the linkage to 791 // LinkOnceODR and put them into comdat. This is because after renaming, there 792 // is no backup external copy available for the function. 793 if (!F.hasComdat()) { 794 assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage); 795 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName)); 796 F.setLinkage(GlobalValue::LinkOnceODRLinkage); 797 F.setComdat(NewComdat); 798 return; 799 } 800 801 // This function belongs to a single function Comdat group. 802 Comdat *OrigComdat = F.getComdat(); 803 std::string NewComdatName = 804 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str(); 805 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName)); 806 NewComdat->setSelectionKind(OrigComdat->getSelectionKind()); 807 808 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) { 809 // Must be a function. 810 cast<Function>(CM.second)->setComdat(NewComdat); 811 } 812 } 813 814 // Collect all the BBs that will be instruments and return them in 815 // InstrumentBBs and setup InEdges/OutEdge for UseBBInfo. 816 template <class Edge, class BBInfo> 817 void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs( 818 std::vector<BasicBlock *> &InstrumentBBs) { 819 // Use a worklist as we will update the vector during the iteration. 820 std::vector<Edge *> EdgeList; 821 EdgeList.reserve(MST.AllEdges.size()); 822 for (auto &E : MST.AllEdges) 823 EdgeList.push_back(E.get()); 824 825 for (auto &E : EdgeList) { 826 BasicBlock *InstrBB = getInstrBB(E); 827 if (InstrBB) 828 InstrumentBBs.push_back(InstrBB); 829 } 830 831 // Set up InEdges/OutEdges for all BBs. 832 for (auto &E : MST.AllEdges) { 833 if (E->Removed) 834 continue; 835 const BasicBlock *SrcBB = E->SrcBB; 836 const BasicBlock *DestBB = E->DestBB; 837 BBInfo &SrcInfo = getBBInfo(SrcBB); 838 BBInfo &DestInfo = getBBInfo(DestBB); 839 SrcInfo.addOutEdge(E.get()); 840 DestInfo.addInEdge(E.get()); 841 } 842 } 843 844 // Given a CFG E to be instrumented, find which BB to place the instrumented 845 // code. The function will split the critical edge if necessary. 846 template <class Edge, class BBInfo> 847 BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) { 848 if (E->InMST || E->Removed) 849 return nullptr; 850 851 BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB); 852 BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB); 853 // For a fake edge, instrument the real BB. 854 if (SrcBB == nullptr) 855 return DestBB; 856 if (DestBB == nullptr) 857 return SrcBB; 858 859 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * { 860 // There are basic blocks (such as catchswitch) cannot be instrumented. 861 // If the returned first insertion point is the end of BB, skip this BB. 862 if (BB->getFirstInsertionPt() == BB->end()) 863 return nullptr; 864 return BB; 865 }; 866 867 // Instrument the SrcBB if it has a single successor, 868 // otherwise, the DestBB if this is not a critical edge. 869 Instruction *TI = SrcBB->getTerminator(); 870 if (TI->getNumSuccessors() <= 1) 871 return canInstrument(SrcBB); 872 if (!E->IsCritical) 873 return canInstrument(DestBB); 874 875 // Some IndirectBr critical edges cannot be split by the previous 876 // SplitIndirectBrCriticalEdges call. Bail out. 877 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 878 BasicBlock *InstrBB = 879 isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum); 880 if (!InstrBB) { 881 LLVM_DEBUG( 882 dbgs() << "Fail to split critical edge: not instrument this edge.\n"); 883 return nullptr; 884 } 885 // For a critical edge, we have to split. Instrument the newly 886 // created BB. 887 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++; 888 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index 889 << " --> " << getBBInfo(DestBB).Index << "\n"); 890 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB. 891 MST.addEdge(SrcBB, InstrBB, 0); 892 // Second one: Add new edge of InstrBB->DestBB. 893 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0); 894 NewEdge1.InMST = true; 895 E->Removed = true; 896 897 return canInstrument(InstrBB); 898 } 899 900 // When generating value profiling calls on Windows routines that make use of 901 // handler funclets for exception processing an operand bundle needs to attached 902 // to the called function. This routine will set \p OpBundles to contain the 903 // funclet information, if any is needed, that should be placed on the generated 904 // value profiling call for the value profile candidate call. 905 static void 906 populateEHOperandBundle(VPCandidateInfo &Cand, 907 DenseMap<BasicBlock *, ColorVector> &BlockColors, 908 SmallVectorImpl<OperandBundleDef> &OpBundles) { 909 auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst); 910 if (!OrigCall) 911 return; 912 913 if (!isa<IntrinsicInst>(OrigCall)) { 914 // The instrumentation call should belong to the same funclet as a 915 // non-intrinsic call, so just copy the operand bundle, if any exists. 916 Optional<OperandBundleUse> ParentFunclet = 917 OrigCall->getOperandBundle(LLVMContext::OB_funclet); 918 if (ParentFunclet) 919 OpBundles.emplace_back(OperandBundleDef(*ParentFunclet)); 920 } else { 921 // Intrinsics or other instructions do not get funclet information from the 922 // front-end. Need to use the BlockColors that was computed by the routine 923 // colorEHFunclets to determine whether a funclet is needed. 924 if (!BlockColors.empty()) { 925 const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second; 926 assert(CV.size() == 1 && "non-unique color for block!"); 927 Instruction *EHPad = CV.front()->getFirstNonPHI(); 928 if (EHPad->isEHPad()) 929 OpBundles.emplace_back("funclet", EHPad); 930 } 931 } 932 } 933 934 // Visit all edge and instrument the edges not in MST, and do value profiling. 935 // Critical edges will be split. 936 static void instrumentOneFunc( 937 Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI, 938 BlockFrequencyInfo *BFI, 939 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 940 bool IsCS) { 941 // Split indirectbr critical edges here before computing the MST rather than 942 // later in getInstrBB() to avoid invalidating it. 943 SplitIndirectBrCriticalEdges(F, BPI, BFI); 944 945 FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo( 946 F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry); 947 948 Type *I8PtrTy = Type::getInt8PtrTy(M->getContext()); 949 auto Name = ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy); 950 auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()), 951 FuncInfo.FunctionHash); 952 if (PGOFunctionEntryCoverage) { 953 assert(!IsCS && 954 "entry coverge does not support context-sensitive instrumentation"); 955 auto &EntryBB = F.getEntryBlock(); 956 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt()); 957 // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>, 958 // i32 <index>) 959 Builder.CreateCall( 960 Intrinsic::getDeclaration(M, Intrinsic::instrprof_cover), 961 {Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)}); 962 return; 963 } 964 965 std::vector<BasicBlock *> InstrumentBBs; 966 FuncInfo.getInstrumentBBs(InstrumentBBs); 967 unsigned NumCounters = 968 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 969 970 uint32_t I = 0; 971 for (auto *InstrBB : InstrumentBBs) { 972 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt()); 973 assert(Builder.GetInsertPoint() != InstrBB->end() && 974 "Cannot get the Instrumentation point"); 975 // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>, 976 // i32 <index>) 977 Builder.CreateCall( 978 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment), 979 {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)}); 980 } 981 982 // Now instrument select instructions: 983 FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar, 984 FuncInfo.FunctionHash); 985 assert(I == NumCounters); 986 987 if (DisableValueProfiling) 988 return; 989 990 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size(); 991 992 // Intrinsic function calls do not have funclet operand bundles needed for 993 // Windows exception handling attached to them. However, if value profiling is 994 // inserted for one of these calls, then a funclet value will need to be set 995 // on the instrumentation call based on the funclet coloring. 996 DenseMap<BasicBlock *, ColorVector> BlockColors; 997 if (F.hasPersonalityFn() && 998 isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) 999 BlockColors = colorEHFunclets(F); 1000 1001 // For each VP Kind, walk the VP candidates and instrument each one. 1002 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) { 1003 unsigned SiteIndex = 0; 1004 if (Kind == IPVK_MemOPSize && !PGOInstrMemOP) 1005 continue; 1006 1007 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) { 1008 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind] 1009 << " site: CallSite Index = " << SiteIndex << "\n"); 1010 1011 IRBuilder<> Builder(Cand.InsertPt); 1012 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() && 1013 "Cannot get the Instrumentation point"); 1014 1015 Value *ToProfile = nullptr; 1016 if (Cand.V->getType()->isIntegerTy()) 1017 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty()); 1018 else if (Cand.V->getType()->isPointerTy()) 1019 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty()); 1020 assert(ToProfile && "value profiling Value is of unexpected type"); 1021 1022 SmallVector<OperandBundleDef, 1> OpBundles; 1023 populateEHOperandBundle(Cand, BlockColors, OpBundles); 1024 Builder.CreateCall( 1025 Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), 1026 {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), 1027 Builder.getInt64(FuncInfo.FunctionHash), ToProfile, 1028 Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)}, 1029 OpBundles); 1030 } 1031 } // IPVK_First <= Kind <= IPVK_Last 1032 } 1033 1034 namespace { 1035 1036 // This class represents a CFG edge in profile use compilation. 1037 struct PGOUseEdge : public PGOEdge { 1038 bool CountValid = false; 1039 uint64_t CountValue = 0; 1040 1041 PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) 1042 : PGOEdge(Src, Dest, W) {} 1043 1044 // Set edge count value 1045 void setEdgeCount(uint64_t Value) { 1046 CountValue = Value; 1047 CountValid = true; 1048 } 1049 1050 // Return the information string for this object. 1051 std::string infoString() const { 1052 if (!CountValid) 1053 return PGOEdge::infoString(); 1054 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue)) 1055 .str(); 1056 } 1057 }; 1058 1059 using DirectEdges = SmallVector<PGOUseEdge *, 2>; 1060 1061 // This class stores the auxiliary information for each BB. 1062 struct UseBBInfo : public BBInfo { 1063 uint64_t CountValue = 0; 1064 bool CountValid; 1065 int32_t UnknownCountInEdge = 0; 1066 int32_t UnknownCountOutEdge = 0; 1067 DirectEdges InEdges; 1068 DirectEdges OutEdges; 1069 1070 UseBBInfo(unsigned IX) : BBInfo(IX), CountValid(false) {} 1071 1072 UseBBInfo(unsigned IX, uint64_t C) 1073 : BBInfo(IX), CountValue(C), CountValid(true) {} 1074 1075 // Set the profile count value for this BB. 1076 void setBBInfoCount(uint64_t Value) { 1077 CountValue = Value; 1078 CountValid = true; 1079 } 1080 1081 // Return the information string of this object. 1082 std::string infoString() const { 1083 if (!CountValid) 1084 return BBInfo::infoString(); 1085 return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str(); 1086 } 1087 1088 // Add an OutEdge and update the edge count. 1089 void addOutEdge(PGOUseEdge *E) { 1090 OutEdges.push_back(E); 1091 UnknownCountOutEdge++; 1092 } 1093 1094 // Add an InEdge and update the edge count. 1095 void addInEdge(PGOUseEdge *E) { 1096 InEdges.push_back(E); 1097 UnknownCountInEdge++; 1098 } 1099 }; 1100 1101 } // end anonymous namespace 1102 1103 // Sum up the count values for all the edges. 1104 static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) { 1105 uint64_t Total = 0; 1106 for (auto &E : Edges) { 1107 if (E->Removed) 1108 continue; 1109 Total += E->CountValue; 1110 } 1111 return Total; 1112 } 1113 1114 namespace { 1115 1116 class PGOUseFunc { 1117 public: 1118 PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI, 1119 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 1120 BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin, 1121 ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry) 1122 : F(Func), M(Modu), BFI(BFIin), PSI(PSI), 1123 FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS, 1124 InstrumentFuncEntry), 1125 FreqAttr(FFA_Normal), IsCS(IsCS) {} 1126 1127 // Read counts for the instrumented BB from profile. 1128 bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, 1129 bool &AllMinusOnes); 1130 1131 // Populate the counts for all BBs. 1132 void populateCounters(); 1133 1134 // Set the branch weights based on the count values. 1135 void setBranchWeights(); 1136 1137 // Annotate the value profile call sites for all value kind. 1138 void annotateValueSites(); 1139 1140 // Annotate the value profile call sites for one value kind. 1141 void annotateValueSites(uint32_t Kind); 1142 1143 // Annotate the irreducible loop header weights. 1144 void annotateIrrLoopHeaderWeights(); 1145 1146 // The hotness of the function from the profile count. 1147 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot }; 1148 1149 // Return the function hotness from the profile. 1150 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; } 1151 1152 // Return the function hash. 1153 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; } 1154 1155 // Return the profile record for this function; 1156 InstrProfRecord &getProfileRecord() { return ProfileRecord; } 1157 1158 // Return the auxiliary BB information. 1159 UseBBInfo &getBBInfo(const BasicBlock *BB) const { 1160 return FuncInfo.getBBInfo(BB); 1161 } 1162 1163 // Return the auxiliary BB information if available. 1164 UseBBInfo *findBBInfo(const BasicBlock *BB) const { 1165 return FuncInfo.findBBInfo(BB); 1166 } 1167 1168 Function &getFunc() const { return F; } 1169 1170 void dumpInfo(std::string Str = "") const { 1171 FuncInfo.dumpInfo(Str); 1172 } 1173 1174 uint64_t getProgramMaxCount() const { return ProgramMaxCount; } 1175 private: 1176 Function &F; 1177 Module *M; 1178 BlockFrequencyInfo *BFI; 1179 ProfileSummaryInfo *PSI; 1180 1181 // This member stores the shared information with class PGOGenFunc. 1182 FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo; 1183 1184 // The maximum count value in the profile. This is only used in PGO use 1185 // compilation. 1186 uint64_t ProgramMaxCount; 1187 1188 // Position of counter that remains to be read. 1189 uint32_t CountPosition = 0; 1190 1191 // Total size of the profile count for this function. 1192 uint32_t ProfileCountSize = 0; 1193 1194 // ProfileRecord for this function. 1195 InstrProfRecord ProfileRecord; 1196 1197 // Function hotness info derived from profile. 1198 FuncFreqAttr FreqAttr; 1199 1200 // Is to use the context sensitive profile. 1201 bool IsCS; 1202 1203 // Find the Instrumented BB and set the value. Return false on error. 1204 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile); 1205 1206 // Set the edge counter value for the unknown edge -- there should be only 1207 // one unknown edge. 1208 void setEdgeCount(DirectEdges &Edges, uint64_t Value); 1209 1210 // Return FuncName string; 1211 std::string getFuncName() const { return FuncInfo.FuncName; } 1212 1213 // Set the hot/cold inline hints based on the count values. 1214 // FIXME: This function should be removed once the functionality in 1215 // the inliner is implemented. 1216 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) { 1217 if (PSI->isHotCount(EntryCount)) 1218 FreqAttr = FFA_Hot; 1219 else if (PSI->isColdCount(MaxCount)) 1220 FreqAttr = FFA_Cold; 1221 } 1222 }; 1223 1224 } // end anonymous namespace 1225 1226 // Visit all the edges and assign the count value for the instrumented 1227 // edges and the BB. Return false on error. 1228 bool PGOUseFunc::setInstrumentedCounts( 1229 const std::vector<uint64_t> &CountFromProfile) { 1230 1231 std::vector<BasicBlock *> InstrumentBBs; 1232 FuncInfo.getInstrumentBBs(InstrumentBBs); 1233 unsigned NumCounters = 1234 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 1235 // The number of counters here should match the number of counters 1236 // in profile. Return if they mismatch. 1237 if (NumCounters != CountFromProfile.size()) { 1238 return false; 1239 } 1240 auto *FuncEntry = &*F.begin(); 1241 1242 // Set the profile count to the Instrumented BBs. 1243 uint32_t I = 0; 1244 for (BasicBlock *InstrBB : InstrumentBBs) { 1245 uint64_t CountValue = CountFromProfile[I++]; 1246 UseBBInfo &Info = getBBInfo(InstrBB); 1247 // If we reach here, we know that we have some nonzero count 1248 // values in this function. The entry count should not be 0. 1249 // Fix it if necessary. 1250 if (InstrBB == FuncEntry && CountValue == 0) 1251 CountValue = 1; 1252 Info.setBBInfoCount(CountValue); 1253 } 1254 ProfileCountSize = CountFromProfile.size(); 1255 CountPosition = I; 1256 1257 // Set the edge count and update the count of unknown edges for BBs. 1258 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void { 1259 E->setEdgeCount(Value); 1260 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1261 this->getBBInfo(E->DestBB).UnknownCountInEdge--; 1262 }; 1263 1264 // Set the profile count the Instrumented edges. There are BBs that not in 1265 // MST but not instrumented. Need to set the edge count value so that we can 1266 // populate the profile counts later. 1267 for (auto &E : FuncInfo.MST.AllEdges) { 1268 if (E->Removed || E->InMST) 1269 continue; 1270 const BasicBlock *SrcBB = E->SrcBB; 1271 UseBBInfo &SrcInfo = getBBInfo(SrcBB); 1272 1273 // If only one out-edge, the edge profile count should be the same as BB 1274 // profile count. 1275 if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1) 1276 setEdgeCount(E.get(), SrcInfo.CountValue); 1277 else { 1278 const BasicBlock *DestBB = E->DestBB; 1279 UseBBInfo &DestInfo = getBBInfo(DestBB); 1280 // If only one in-edge, the edge profile count should be the same as BB 1281 // profile count. 1282 if (DestInfo.CountValid && DestInfo.InEdges.size() == 1) 1283 setEdgeCount(E.get(), DestInfo.CountValue); 1284 } 1285 if (E->CountValid) 1286 continue; 1287 // E's count should have been set from profile. If not, this meenas E skips 1288 // the instrumentation. We set the count to 0. 1289 setEdgeCount(E.get(), 0); 1290 } 1291 return true; 1292 } 1293 1294 // Set the count value for the unknown edge. There should be one and only one 1295 // unknown edge in Edges vector. 1296 void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) { 1297 for (auto &E : Edges) { 1298 if (E->CountValid) 1299 continue; 1300 E->setEdgeCount(Value); 1301 1302 getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1303 getBBInfo(E->DestBB).UnknownCountInEdge--; 1304 return; 1305 } 1306 llvm_unreachable("Cannot find the unknown count edge"); 1307 } 1308 1309 // Emit function metadata indicating PGO profile mismatch. 1310 static void annotateFunctionWithHashMismatch(Function &F, 1311 LLVMContext &ctx) { 1312 const char MetadataName[] = "instr_prof_hash_mismatch"; 1313 SmallVector<Metadata *, 2> Names; 1314 // If this metadata already exists, ignore. 1315 auto *Existing = F.getMetadata(LLVMContext::MD_annotation); 1316 if (Existing) { 1317 MDTuple *Tuple = cast<MDTuple>(Existing); 1318 for (auto &N : Tuple->operands()) { 1319 if (cast<MDString>(N.get())->getString() == MetadataName) 1320 return; 1321 Names.push_back(N.get()); 1322 } 1323 } 1324 1325 MDBuilder MDB(ctx); 1326 Names.push_back(MDB.createString(MetadataName)); 1327 MDNode *MD = MDTuple::get(ctx, Names); 1328 F.setMetadata(LLVMContext::MD_annotation, MD); 1329 } 1330 1331 // Read the profile from ProfileFileName and assign the value to the 1332 // instrumented BB and the edges. This function also updates ProgramMaxCount. 1333 // Return true if the profile are successfully read, and false on errors. 1334 bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, 1335 bool &AllMinusOnes) { 1336 auto &Ctx = M->getContext(); 1337 Expected<InstrProfRecord> Result = 1338 PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash); 1339 if (Error E = Result.takeError()) { 1340 handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { 1341 auto Err = IPE.get(); 1342 bool SkipWarning = false; 1343 LLVM_DEBUG(dbgs() << "Error in reading profile for Func " 1344 << FuncInfo.FuncName << ": "); 1345 if (Err == instrprof_error::unknown_function) { 1346 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++; 1347 SkipWarning = !PGOWarnMissing; 1348 LLVM_DEBUG(dbgs() << "unknown function"); 1349 } else if (Err == instrprof_error::hash_mismatch || 1350 Err == instrprof_error::malformed) { 1351 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++; 1352 SkipWarning = 1353 NoPGOWarnMismatch || 1354 (NoPGOWarnMismatchComdat && 1355 (F.hasComdat() || 1356 F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); 1357 LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); 1358 // Emit function metadata indicating PGO profile mismatch. 1359 annotateFunctionWithHashMismatch(F, M->getContext()); 1360 } 1361 1362 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n"); 1363 if (SkipWarning) 1364 return; 1365 1366 std::string Msg = IPE.message() + std::string(" ") + F.getName().str() + 1367 std::string(" Hash = ") + 1368 std::to_string(FuncInfo.FunctionHash); 1369 1370 Ctx.diagnose( 1371 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); 1372 }); 1373 return false; 1374 } 1375 ProfileRecord = std::move(Result.get()); 1376 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts; 1377 1378 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++; 1379 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n"); 1380 AllMinusOnes = (CountFromProfile.size() > 0); 1381 uint64_t ValueSum = 0; 1382 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { 1383 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n"); 1384 ValueSum += CountFromProfile[I]; 1385 if (CountFromProfile[I] != (uint64_t)-1) 1386 AllMinusOnes = false; 1387 } 1388 AllZeros = (ValueSum == 0); 1389 1390 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n"); 1391 1392 getBBInfo(nullptr).UnknownCountOutEdge = 2; 1393 getBBInfo(nullptr).UnknownCountInEdge = 2; 1394 1395 if (!setInstrumentedCounts(CountFromProfile)) { 1396 LLVM_DEBUG( 1397 dbgs() << "Inconsistent number of counts, skipping this function"); 1398 Ctx.diagnose(DiagnosticInfoPGOProfile( 1399 M->getName().data(), 1400 Twine("Inconsistent number of counts in ") + F.getName().str() 1401 + Twine(": the profile may be stale or there is a function name collision."), 1402 DS_Warning)); 1403 return false; 1404 } 1405 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS); 1406 return true; 1407 } 1408 1409 // Populate the counters from instrumented BBs to all BBs. 1410 // In the end of this operation, all BBs should have a valid count value. 1411 void PGOUseFunc::populateCounters() { 1412 bool Changes = true; 1413 unsigned NumPasses = 0; 1414 while (Changes) { 1415 NumPasses++; 1416 Changes = false; 1417 1418 // For efficient traversal, it's better to start from the end as most 1419 // of the instrumented edges are at the end. 1420 for (auto &BB : reverse(F)) { 1421 UseBBInfo *Count = findBBInfo(&BB); 1422 if (Count == nullptr) 1423 continue; 1424 if (!Count->CountValid) { 1425 if (Count->UnknownCountOutEdge == 0) { 1426 Count->CountValue = sumEdgeCount(Count->OutEdges); 1427 Count->CountValid = true; 1428 Changes = true; 1429 } else if (Count->UnknownCountInEdge == 0) { 1430 Count->CountValue = sumEdgeCount(Count->InEdges); 1431 Count->CountValid = true; 1432 Changes = true; 1433 } 1434 } 1435 if (Count->CountValid) { 1436 if (Count->UnknownCountOutEdge == 1) { 1437 uint64_t Total = 0; 1438 uint64_t OutSum = sumEdgeCount(Count->OutEdges); 1439 // If the one of the successor block can early terminate (no-return), 1440 // we can end up with situation where out edge sum count is larger as 1441 // the source BB's count is collected by a post-dominated block. 1442 if (Count->CountValue > OutSum) 1443 Total = Count->CountValue - OutSum; 1444 setEdgeCount(Count->OutEdges, Total); 1445 Changes = true; 1446 } 1447 if (Count->UnknownCountInEdge == 1) { 1448 uint64_t Total = 0; 1449 uint64_t InSum = sumEdgeCount(Count->InEdges); 1450 if (Count->CountValue > InSum) 1451 Total = Count->CountValue - InSum; 1452 setEdgeCount(Count->InEdges, Total); 1453 Changes = true; 1454 } 1455 } 1456 } 1457 } 1458 1459 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n"); 1460 #ifndef NDEBUG 1461 // Assert every BB has a valid counter. 1462 for (auto &BB : F) { 1463 auto BI = findBBInfo(&BB); 1464 if (BI == nullptr) 1465 continue; 1466 assert(BI->CountValid && "BB count is not valid"); 1467 } 1468 #endif 1469 uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue; 1470 uint64_t FuncMaxCount = FuncEntryCount; 1471 for (auto &BB : F) { 1472 auto BI = findBBInfo(&BB); 1473 if (BI == nullptr) 1474 continue; 1475 FuncMaxCount = std::max(FuncMaxCount, BI->CountValue); 1476 } 1477 1478 // Fix the obviously inconsistent entry count. 1479 if (FuncMaxCount > 0 && FuncEntryCount == 0) 1480 FuncEntryCount = 1; 1481 F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real)); 1482 markFunctionAttributes(FuncEntryCount, FuncMaxCount); 1483 1484 // Now annotate select instructions 1485 FuncInfo.SIVisitor.annotateSelects(F, this, &CountPosition); 1486 assert(CountPosition == ProfileCountSize); 1487 1488 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile.")); 1489 } 1490 1491 // Assign the scaled count values to the BB with multiple out edges. 1492 void PGOUseFunc::setBranchWeights() { 1493 // Generate MD_prof metadata for every branch instruction. 1494 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName() 1495 << " IsCS=" << IsCS << "\n"); 1496 for (auto &BB : F) { 1497 Instruction *TI = BB.getTerminator(); 1498 if (TI->getNumSuccessors() < 2) 1499 continue; 1500 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || 1501 isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI))) 1502 continue; 1503 1504 if (getBBInfo(&BB).CountValue == 0) 1505 continue; 1506 1507 // We have a non-zero Branch BB. 1508 const UseBBInfo &BBCountInfo = getBBInfo(&BB); 1509 unsigned Size = BBCountInfo.OutEdges.size(); 1510 SmallVector<uint64_t, 2> EdgeCounts(Size, 0); 1511 uint64_t MaxCount = 0; 1512 for (unsigned s = 0; s < Size; s++) { 1513 const PGOUseEdge *E = BBCountInfo.OutEdges[s]; 1514 const BasicBlock *SrcBB = E->SrcBB; 1515 const BasicBlock *DestBB = E->DestBB; 1516 if (DestBB == nullptr) 1517 continue; 1518 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 1519 uint64_t EdgeCount = E->CountValue; 1520 if (EdgeCount > MaxCount) 1521 MaxCount = EdgeCount; 1522 EdgeCounts[SuccNum] = EdgeCount; 1523 } 1524 setProfMetadata(M, TI, EdgeCounts, MaxCount); 1525 } 1526 } 1527 1528 static bool isIndirectBrTarget(BasicBlock *BB) { 1529 for (BasicBlock *Pred : predecessors(BB)) { 1530 if (isa<IndirectBrInst>(Pred->getTerminator())) 1531 return true; 1532 } 1533 return false; 1534 } 1535 1536 void PGOUseFunc::annotateIrrLoopHeaderWeights() { 1537 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n"); 1538 // Find irr loop headers 1539 for (auto &BB : F) { 1540 // As a heuristic also annotate indrectbr targets as they have a high chance 1541 // to become an irreducible loop header after the indirectbr tail 1542 // duplication. 1543 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) { 1544 Instruction *TI = BB.getTerminator(); 1545 const UseBBInfo &BBCountInfo = getBBInfo(&BB); 1546 setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue); 1547 } 1548 } 1549 } 1550 1551 void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) { 1552 if (PGOFunctionEntryCoverage) 1553 return; 1554 Module *M = F.getParent(); 1555 IRBuilder<> Builder(&SI); 1556 Type *Int64Ty = Builder.getInt64Ty(); 1557 Type *I8PtrTy = Builder.getInt8PtrTy(); 1558 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty); 1559 Builder.CreateCall( 1560 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step), 1561 {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), 1562 Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs), 1563 Builder.getInt32(*CurCtrIdx), Step}); 1564 ++(*CurCtrIdx); 1565 } 1566 1567 void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) { 1568 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts; 1569 assert(*CurCtrIdx < CountFromProfile.size() && 1570 "Out of bound access of counters"); 1571 uint64_t SCounts[2]; 1572 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count 1573 ++(*CurCtrIdx); 1574 uint64_t TotalCount = 0; 1575 auto BI = UseFunc->findBBInfo(SI.getParent()); 1576 if (BI != nullptr) 1577 TotalCount = BI->CountValue; 1578 // False Count 1579 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0); 1580 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]); 1581 if (MaxCount) 1582 setProfMetadata(F.getParent(), &SI, SCounts, MaxCount); 1583 } 1584 1585 void SelectInstVisitor::visitSelectInst(SelectInst &SI) { 1586 if (!PGOInstrSelect) 1587 return; 1588 // FIXME: do not handle this yet. 1589 if (SI.getCondition()->getType()->isVectorTy()) 1590 return; 1591 1592 switch (Mode) { 1593 case VM_counting: 1594 NSIs++; 1595 return; 1596 case VM_instrument: 1597 instrumentOneSelectInst(SI); 1598 return; 1599 case VM_annotate: 1600 annotateOneSelectInst(SI); 1601 return; 1602 } 1603 1604 llvm_unreachable("Unknown visiting mode"); 1605 } 1606 1607 // Traverse all valuesites and annotate the instructions for all value kind. 1608 void PGOUseFunc::annotateValueSites() { 1609 if (DisableValueProfiling) 1610 return; 1611 1612 // Create the PGOFuncName meta data. 1613 createPGOFuncNameMetadata(F, FuncInfo.FuncName); 1614 1615 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 1616 annotateValueSites(Kind); 1617 } 1618 1619 // Annotate the instructions for a specific value kind. 1620 void PGOUseFunc::annotateValueSites(uint32_t Kind) { 1621 assert(Kind <= IPVK_Last); 1622 unsigned ValueSiteIndex = 0; 1623 auto &ValueSites = FuncInfo.ValueSites[Kind]; 1624 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind); 1625 if (NumValueSites != ValueSites.size()) { 1626 auto &Ctx = M->getContext(); 1627 Ctx.diagnose(DiagnosticInfoPGOProfile( 1628 M->getName().data(), 1629 Twine("Inconsistent number of value sites for ") + 1630 Twine(ValueProfKindDescr[Kind]) + 1631 Twine(" profiling in \"") + F.getName().str() + 1632 Twine("\", possibly due to the use of a stale profile."), 1633 DS_Warning)); 1634 return; 1635 } 1636 1637 for (VPCandidateInfo &I : ValueSites) { 1638 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind 1639 << "): Index = " << ValueSiteIndex << " out of " 1640 << NumValueSites << "\n"); 1641 annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord, 1642 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex, 1643 Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations 1644 : MaxNumAnnotations); 1645 ValueSiteIndex++; 1646 } 1647 } 1648 1649 // Collect the set of members for each Comdat in module M and store 1650 // in ComdatMembers. 1651 static void collectComdatMembers( 1652 Module &M, 1653 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 1654 if (!DoComdatRenaming) 1655 return; 1656 for (Function &F : M) 1657 if (Comdat *C = F.getComdat()) 1658 ComdatMembers.insert(std::make_pair(C, &F)); 1659 for (GlobalVariable &GV : M.globals()) 1660 if (Comdat *C = GV.getComdat()) 1661 ComdatMembers.insert(std::make_pair(C, &GV)); 1662 for (GlobalAlias &GA : M.aliases()) 1663 if (Comdat *C = GA.getComdat()) 1664 ComdatMembers.insert(std::make_pair(C, &GA)); 1665 } 1666 1667 static bool InstrumentAllFunctions( 1668 Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI, 1669 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1670 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) { 1671 // For the context-sensitve instrumentation, we should have a separated pass 1672 // (before LTO/ThinLTO linking) to create these variables. 1673 if (!IsCS) 1674 createIRLevelProfileFlagVar(M, /*IsCS=*/false); 1675 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1676 collectComdatMembers(M, ComdatMembers); 1677 1678 for (auto &F : M) { 1679 if (F.isDeclaration()) 1680 continue; 1681 if (F.hasFnAttribute(llvm::Attribute::NoProfile)) 1682 continue; 1683 auto &TLI = LookupTLI(F); 1684 auto *BPI = LookupBPI(F); 1685 auto *BFI = LookupBFI(F); 1686 instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS); 1687 } 1688 return true; 1689 } 1690 1691 PreservedAnalyses 1692 PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) { 1693 createProfileFileNameVar(M, CSInstrName); 1694 // The variable in a comdat may be discarded by LTO. Ensure the declaration 1695 // will be retained. 1696 appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true)); 1697 return PreservedAnalyses::all(); 1698 } 1699 1700 bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) { 1701 if (skipModule(M)) 1702 return false; 1703 1704 auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & { 1705 return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 1706 }; 1707 auto LookupBPI = [this](Function &F) { 1708 return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); 1709 }; 1710 auto LookupBFI = [this](Function &F) { 1711 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); 1712 }; 1713 return InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS); 1714 } 1715 1716 PreservedAnalyses PGOInstrumentationGen::run(Module &M, 1717 ModuleAnalysisManager &AM) { 1718 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1719 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 1720 return FAM.getResult<TargetLibraryAnalysis>(F); 1721 }; 1722 auto LookupBPI = [&FAM](Function &F) { 1723 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1724 }; 1725 auto LookupBFI = [&FAM](Function &F) { 1726 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1727 }; 1728 1729 if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS)) 1730 return PreservedAnalyses::all(); 1731 1732 return PreservedAnalyses::none(); 1733 } 1734 1735 // Using the ratio b/w sums of profile count values and BFI count values to 1736 // adjust the func entry count. 1737 static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, 1738 BranchProbabilityInfo &NBPI) { 1739 Function &F = Func.getFunc(); 1740 BlockFrequencyInfo NBFI(F, NBPI, LI); 1741 #ifndef NDEBUG 1742 auto BFIEntryCount = F.getEntryCount(); 1743 assert(BFIEntryCount.hasValue() && (BFIEntryCount->getCount() > 0) && 1744 "Invalid BFI Entrycount"); 1745 #endif 1746 auto SumCount = APFloat::getZero(APFloat::IEEEdouble()); 1747 auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble()); 1748 for (auto &BBI : F) { 1749 uint64_t CountValue = 0; 1750 uint64_t BFICountValue = 0; 1751 if (!Func.findBBInfo(&BBI)) 1752 continue; 1753 auto BFICount = NBFI.getBlockProfileCount(&BBI); 1754 CountValue = Func.getBBInfo(&BBI).CountValue; 1755 BFICountValue = BFICount.getValue(); 1756 SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven); 1757 SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven); 1758 } 1759 if (SumCount.isZero()) 1760 return; 1761 1762 assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan && 1763 "Incorrect sum of BFI counts"); 1764 if (SumBFICount.compare(SumCount) == APFloat::cmpEqual) 1765 return; 1766 double Scale = (SumCount / SumBFICount).convertToDouble(); 1767 if (Scale < 1.001 && Scale > 0.999) 1768 return; 1769 1770 uint64_t FuncEntryCount = Func.getBBInfo(&*F.begin()).CountValue; 1771 uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale; 1772 if (NewEntryCount == 0) 1773 NewEntryCount = 1; 1774 if (NewEntryCount != FuncEntryCount) { 1775 F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real)); 1776 LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName() 1777 << ", entry_count " << FuncEntryCount << " --> " 1778 << NewEntryCount << "\n"); 1779 } 1780 } 1781 1782 // Compare the profile count values with BFI count values, and print out 1783 // the non-matching ones. 1784 static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, 1785 BranchProbabilityInfo &NBPI, 1786 uint64_t HotCountThreshold, 1787 uint64_t ColdCountThreshold) { 1788 Function &F = Func.getFunc(); 1789 BlockFrequencyInfo NBFI(F, NBPI, LI); 1790 // bool PrintFunc = false; 1791 bool HotBBOnly = PGOVerifyHotBFI; 1792 std::string Msg; 1793 OptimizationRemarkEmitter ORE(&F); 1794 1795 unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0; 1796 for (auto &BBI : F) { 1797 uint64_t CountValue = 0; 1798 uint64_t BFICountValue = 0; 1799 1800 if (Func.getBBInfo(&BBI).CountValid) 1801 CountValue = Func.getBBInfo(&BBI).CountValue; 1802 1803 BBNum++; 1804 if (CountValue) 1805 NonZeroBBNum++; 1806 auto BFICount = NBFI.getBlockProfileCount(&BBI); 1807 if (BFICount) 1808 BFICountValue = BFICount.getValue(); 1809 1810 if (HotBBOnly) { 1811 bool rawIsHot = CountValue >= HotCountThreshold; 1812 bool BFIIsHot = BFICountValue >= HotCountThreshold; 1813 bool rawIsCold = CountValue <= ColdCountThreshold; 1814 bool ShowCount = false; 1815 if (rawIsHot && !BFIIsHot) { 1816 Msg = "raw-Hot to BFI-nonHot"; 1817 ShowCount = true; 1818 } else if (rawIsCold && BFIIsHot) { 1819 Msg = "raw-Cold to BFI-Hot"; 1820 ShowCount = true; 1821 } 1822 if (!ShowCount) 1823 continue; 1824 } else { 1825 if ((CountValue < PGOVerifyBFICutoff) && 1826 (BFICountValue < PGOVerifyBFICutoff)) 1827 continue; 1828 uint64_t Diff = (BFICountValue >= CountValue) 1829 ? BFICountValue - CountValue 1830 : CountValue - BFICountValue; 1831 if (Diff <= CountValue / 100 * PGOVerifyBFIRatio) 1832 continue; 1833 } 1834 BBMisMatchNum++; 1835 1836 ORE.emit([&]() { 1837 OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "bfi-verify", 1838 F.getSubprogram(), &BBI); 1839 Remark << "BB " << ore::NV("Block", BBI.getName()) 1840 << " Count=" << ore::NV("Count", CountValue) 1841 << " BFI_Count=" << ore::NV("Count", BFICountValue); 1842 if (!Msg.empty()) 1843 Remark << " (" << Msg << ")"; 1844 return Remark; 1845 }); 1846 } 1847 if (BBMisMatchNum) 1848 ORE.emit([&]() { 1849 return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify", 1850 F.getSubprogram(), &F.getEntryBlock()) 1851 << "In Func " << ore::NV("Function", F.getName()) 1852 << ": Num_of_BB=" << ore::NV("Count", BBNum) 1853 << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum) 1854 << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum); 1855 }); 1856 } 1857 1858 static bool annotateAllFunctions( 1859 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, 1860 function_ref<TargetLibraryInfo &(Function &)> LookupTLI, 1861 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1862 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, 1863 ProfileSummaryInfo *PSI, bool IsCS) { 1864 LLVM_DEBUG(dbgs() << "Read in profile counters: "); 1865 auto &Ctx = M.getContext(); 1866 // Read the counter array from file. 1867 auto ReaderOrErr = 1868 IndexedInstrProfReader::create(ProfileFileName, ProfileRemappingFileName); 1869 if (Error E = ReaderOrErr.takeError()) { 1870 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { 1871 Ctx.diagnose( 1872 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message())); 1873 }); 1874 return false; 1875 } 1876 1877 std::unique_ptr<IndexedInstrProfReader> PGOReader = 1878 std::move(ReaderOrErr.get()); 1879 if (!PGOReader) { 1880 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(), 1881 StringRef("Cannot get PGOReader"))); 1882 return false; 1883 } 1884 if (!PGOReader->hasCSIRLevelProfile() && IsCS) 1885 return false; 1886 1887 // TODO: might need to change the warning once the clang option is finalized. 1888 if (!PGOReader->isIRLevelProfile()) { 1889 Ctx.diagnose(DiagnosticInfoPGOProfile( 1890 ProfileFileName.data(), "Not an IR level instrumentation profile")); 1891 return false; 1892 } 1893 if (PGOReader->hasSingleByteCoverage()) { 1894 Ctx.diagnose(DiagnosticInfoPGOProfile( 1895 ProfileFileName.data(), 1896 "Cannot use coverage profiles for optimization")); 1897 return false; 1898 } 1899 if (PGOReader->functionEntryOnly()) { 1900 Ctx.diagnose(DiagnosticInfoPGOProfile( 1901 ProfileFileName.data(), 1902 "Function entry profiles are not yet supported for optimization")); 1903 return false; 1904 } 1905 1906 // Add the profile summary (read from the header of the indexed summary) here 1907 // so that we can use it below when reading counters (which checks if the 1908 // function should be marked with a cold or inlinehint attribute). 1909 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()), 1910 IsCS ? ProfileSummary::PSK_CSInstr 1911 : ProfileSummary::PSK_Instr); 1912 PSI->refresh(); 1913 1914 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1915 collectComdatMembers(M, ComdatMembers); 1916 std::vector<Function *> HotFunctions; 1917 std::vector<Function *> ColdFunctions; 1918 1919 // If the profile marked as always instrument the entry BB, do the 1920 // same. Note this can be overwritten by the internal option in CFGMST.h 1921 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled(); 1922 if (PGOInstrumentEntry.getNumOccurrences() > 0) 1923 InstrumentFuncEntry = PGOInstrumentEntry; 1924 for (auto &F : M) { 1925 if (F.isDeclaration()) 1926 continue; 1927 auto &TLI = LookupTLI(F); 1928 auto *BPI = LookupBPI(F); 1929 auto *BFI = LookupBFI(F); 1930 // Split indirectbr critical edges here before computing the MST rather than 1931 // later in getInstrBB() to avoid invalidating it. 1932 SplitIndirectBrCriticalEdges(F, BPI, BFI); 1933 PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS, 1934 InstrumentFuncEntry); 1935 // When AllMinusOnes is true, it means the profile for the function 1936 // is unrepresentative and this function is actually hot. Set the 1937 // entry count of the function to be multiple times of hot threshold 1938 // and drop all its internal counters. 1939 bool AllMinusOnes = false; 1940 bool AllZeros = false; 1941 if (!Func.readCounters(PGOReader.get(), AllZeros, AllMinusOnes)) 1942 continue; 1943 if (AllZeros) { 1944 F.setEntryCount(ProfileCount(0, Function::PCT_Real)); 1945 if (Func.getProgramMaxCount() != 0) 1946 ColdFunctions.push_back(&F); 1947 continue; 1948 } 1949 const unsigned MultiplyFactor = 3; 1950 if (AllMinusOnes) { 1951 uint64_t HotThreshold = PSI->getHotCountThreshold(); 1952 if (HotThreshold) 1953 F.setEntryCount( 1954 ProfileCount(HotThreshold * MultiplyFactor, Function::PCT_Real)); 1955 HotFunctions.push_back(&F); 1956 continue; 1957 } 1958 Func.populateCounters(); 1959 Func.setBranchWeights(); 1960 Func.annotateValueSites(); 1961 Func.annotateIrrLoopHeaderWeights(); 1962 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr(); 1963 if (FreqAttr == PGOUseFunc::FFA_Cold) 1964 ColdFunctions.push_back(&F); 1965 else if (FreqAttr == PGOUseFunc::FFA_Hot) 1966 HotFunctions.push_back(&F); 1967 if (PGOViewCounts != PGOVCT_None && 1968 (ViewBlockFreqFuncName.empty() || 1969 F.getName().equals(ViewBlockFreqFuncName))) { 1970 LoopInfo LI{DominatorTree(F)}; 1971 std::unique_ptr<BranchProbabilityInfo> NewBPI = 1972 std::make_unique<BranchProbabilityInfo>(F, LI); 1973 std::unique_ptr<BlockFrequencyInfo> NewBFI = 1974 std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI); 1975 if (PGOViewCounts == PGOVCT_Graph) 1976 NewBFI->view(); 1977 else if (PGOViewCounts == PGOVCT_Text) { 1978 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n"; 1979 NewBFI->print(dbgs()); 1980 } 1981 } 1982 if (PGOViewRawCounts != PGOVCT_None && 1983 (ViewBlockFreqFuncName.empty() || 1984 F.getName().equals(ViewBlockFreqFuncName))) { 1985 if (PGOViewRawCounts == PGOVCT_Graph) 1986 if (ViewBlockFreqFuncName.empty()) 1987 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 1988 else 1989 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 1990 else if (PGOViewRawCounts == PGOVCT_Text) { 1991 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n"; 1992 Func.dumpInfo(); 1993 } 1994 } 1995 1996 if (PGOVerifyBFI || PGOVerifyHotBFI || PGOFixEntryCount) { 1997 LoopInfo LI{DominatorTree(F)}; 1998 BranchProbabilityInfo NBPI(F, LI); 1999 2000 // Fix func entry count. 2001 if (PGOFixEntryCount) 2002 fixFuncEntryCount(Func, LI, NBPI); 2003 2004 // Verify BlockFrequency information. 2005 uint64_t HotCountThreshold = 0, ColdCountThreshold = 0; 2006 if (PGOVerifyHotBFI) { 2007 HotCountThreshold = PSI->getOrCompHotCountThreshold(); 2008 ColdCountThreshold = PSI->getOrCompColdCountThreshold(); 2009 } 2010 verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold); 2011 } 2012 } 2013 2014 // Set function hotness attribute from the profile. 2015 // We have to apply these attributes at the end because their presence 2016 // can affect the BranchProbabilityInfo of any callers, resulting in an 2017 // inconsistent MST between prof-gen and prof-use. 2018 for (auto &F : HotFunctions) { 2019 F->addFnAttr(Attribute::InlineHint); 2020 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName() 2021 << "\n"); 2022 } 2023 for (auto &F : ColdFunctions) { 2024 // Only set when there is no Attribute::Hot set by the user. For Hot 2025 // attribute, user's annotation has the precedence over the profile. 2026 if (F->hasFnAttribute(Attribute::Hot)) { 2027 auto &Ctx = M.getContext(); 2028 std::string Msg = std::string("Function ") + F->getName().str() + 2029 std::string(" is annotated as a hot function but" 2030 " the profile is cold"); 2031 Ctx.diagnose( 2032 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning)); 2033 continue; 2034 } 2035 F->addFnAttr(Attribute::Cold); 2036 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() 2037 << "\n"); 2038 } 2039 return true; 2040 } 2041 2042 PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename, 2043 std::string RemappingFilename, 2044 bool IsCS) 2045 : ProfileFileName(std::move(Filename)), 2046 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) { 2047 if (!PGOTestProfileFile.empty()) 2048 ProfileFileName = PGOTestProfileFile; 2049 if (!PGOTestProfileRemappingFile.empty()) 2050 ProfileRemappingFileName = PGOTestProfileRemappingFile; 2051 } 2052 2053 PreservedAnalyses PGOInstrumentationUse::run(Module &M, 2054 ModuleAnalysisManager &AM) { 2055 2056 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 2057 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 2058 return FAM.getResult<TargetLibraryAnalysis>(F); 2059 }; 2060 auto LookupBPI = [&FAM](Function &F) { 2061 return &FAM.getResult<BranchProbabilityAnalysis>(F); 2062 }; 2063 auto LookupBFI = [&FAM](Function &F) { 2064 return &FAM.getResult<BlockFrequencyAnalysis>(F); 2065 }; 2066 2067 auto *PSI = &AM.getResult<ProfileSummaryAnalysis>(M); 2068 2069 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, 2070 LookupTLI, LookupBPI, LookupBFI, PSI, IsCS)) 2071 return PreservedAnalyses::all(); 2072 2073 return PreservedAnalyses::none(); 2074 } 2075 2076 bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) { 2077 if (skipModule(M)) 2078 return false; 2079 2080 auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & { 2081 return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 2082 }; 2083 auto LookupBPI = [this](Function &F) { 2084 return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); 2085 }; 2086 auto LookupBFI = [this](Function &F) { 2087 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); 2088 }; 2089 2090 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 2091 return annotateAllFunctions(M, ProfileFileName, "", LookupTLI, LookupBPI, 2092 LookupBFI, PSI, IsCS); 2093 } 2094 2095 static std::string getSimpleNodeName(const BasicBlock *Node) { 2096 if (!Node->getName().empty()) 2097 return std::string(Node->getName()); 2098 2099 std::string SimpleNodeName; 2100 raw_string_ostream OS(SimpleNodeName); 2101 Node->printAsOperand(OS, false); 2102 return OS.str(); 2103 } 2104 2105 void llvm::setProfMetadata(Module *M, Instruction *TI, 2106 ArrayRef<uint64_t> EdgeCounts, 2107 uint64_t MaxCount) { 2108 MDBuilder MDB(M->getContext()); 2109 assert(MaxCount > 0 && "Bad max count"); 2110 uint64_t Scale = calculateCountScale(MaxCount); 2111 SmallVector<unsigned, 4> Weights; 2112 for (const auto &ECI : EdgeCounts) 2113 Weights.push_back(scaleBranchCount(ECI, Scale)); 2114 2115 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W 2116 : Weights) { 2117 dbgs() << W << " "; 2118 } dbgs() << "\n";); 2119 2120 TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 2121 if (EmitBranchProbability) { 2122 std::string BrCondStr = getBranchCondString(TI); 2123 if (BrCondStr.empty()) 2124 return; 2125 2126 uint64_t WSum = 2127 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0, 2128 [](uint64_t w1, uint64_t w2) { return w1 + w2; }); 2129 uint64_t TotalCount = 2130 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0, 2131 [](uint64_t c1, uint64_t c2) { return c1 + c2; }); 2132 Scale = calculateCountScale(WSum); 2133 BranchProbability BP(scaleBranchCount(Weights[0], Scale), 2134 scaleBranchCount(WSum, Scale)); 2135 std::string BranchProbStr; 2136 raw_string_ostream OS(BranchProbStr); 2137 OS << BP; 2138 OS << " (total count : " << TotalCount << ")"; 2139 OS.flush(); 2140 Function *F = TI->getParent()->getParent(); 2141 OptimizationRemarkEmitter ORE(F); 2142 ORE.emit([&]() { 2143 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI) 2144 << BrCondStr << " is true with probability : " << BranchProbStr; 2145 }); 2146 } 2147 } 2148 2149 namespace llvm { 2150 2151 void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) { 2152 MDBuilder MDB(M->getContext()); 2153 TI->setMetadata(llvm::LLVMContext::MD_irr_loop, 2154 MDB.createIrrLoopHeaderWeight(Count)); 2155 } 2156 2157 template <> struct GraphTraits<PGOUseFunc *> { 2158 using NodeRef = const BasicBlock *; 2159 using ChildIteratorType = const_succ_iterator; 2160 using nodes_iterator = pointer_iterator<Function::const_iterator>; 2161 2162 static NodeRef getEntryNode(const PGOUseFunc *G) { 2163 return &G->getFunc().front(); 2164 } 2165 2166 static ChildIteratorType child_begin(const NodeRef N) { 2167 return succ_begin(N); 2168 } 2169 2170 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); } 2171 2172 static nodes_iterator nodes_begin(const PGOUseFunc *G) { 2173 return nodes_iterator(G->getFunc().begin()); 2174 } 2175 2176 static nodes_iterator nodes_end(const PGOUseFunc *G) { 2177 return nodes_iterator(G->getFunc().end()); 2178 } 2179 }; 2180 2181 template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits { 2182 explicit DOTGraphTraits(bool isSimple = false) 2183 : DefaultDOTGraphTraits(isSimple) {} 2184 2185 static std::string getGraphName(const PGOUseFunc *G) { 2186 return std::string(G->getFunc().getName()); 2187 } 2188 2189 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) { 2190 std::string Result; 2191 raw_string_ostream OS(Result); 2192 2193 OS << getSimpleNodeName(Node) << ":\\l"; 2194 UseBBInfo *BI = Graph->findBBInfo(Node); 2195 OS << "Count : "; 2196 if (BI && BI->CountValid) 2197 OS << BI->CountValue << "\\l"; 2198 else 2199 OS << "Unknown\\l"; 2200 2201 if (!PGOInstrSelect) 2202 return Result; 2203 2204 for (const Instruction &I : *Node) { 2205 if (!isa<SelectInst>(&I)) 2206 continue; 2207 // Display scaled counts for SELECT instruction: 2208 OS << "SELECT : { T = "; 2209 uint64_t TC, FC; 2210 bool HasProf = I.extractProfMetadata(TC, FC); 2211 if (!HasProf) 2212 OS << "Unknown, F = Unknown }\\l"; 2213 else 2214 OS << TC << ", F = " << FC << " }\\l"; 2215 } 2216 return Result; 2217 } 2218 }; 2219 2220 } // end namespace llvm 2221