1 //===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements PGO instrumentation using a minimum spanning tree based 10 // on the following paper: 11 // [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points 12 // for program frequency counts. BIT Numerical Mathematics 1973, Volume 13, 13 // Issue 3, pp 313-322 14 // The idea of the algorithm based on the fact that for each node (except for 15 // the entry and exit), the sum of incoming edge counts equals the sum of 16 // outgoing edge counts. The count of edge on spanning tree can be derived from 17 // those edges not on the spanning tree. Knuth proves this method instruments 18 // the minimum number of edges. 19 // 20 // The minimal spanning tree here is actually a maximum weight tree -- on-tree 21 // edges have higher frequencies (more likely to execute). The idea is to 22 // instrument those less frequently executed edges to reduce the runtime 23 // overhead of instrumented binaries. 24 // 25 // This file contains two passes: 26 // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge 27 // count profile, and generates the instrumentation for indirect call 28 // profiling. 29 // (2) Pass PGOInstrumentationUse which reads the edge count profile and 30 // annotates the branch weights. It also reads the indirect call value 31 // profiling records and annotate the indirect call instructions. 32 // 33 // To get the precise counter information, These two passes need to invoke at 34 // the same compilation point (so they see the same IR). For pass 35 // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For 36 // pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and 37 // the profile is opened in module level and passed to each PGOUseFunc instance. 38 // The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put 39 // in class FuncPGOInstrumentation. 40 // 41 // Class PGOEdge represents a CFG edge and some auxiliary information. Class 42 // BBInfo contains auxiliary information for each BB. These two classes are used 43 // in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived 44 // class of PGOEdge and BBInfo, respectively. They contains extra data structure 45 // used in populating profile counters. 46 // The MST implementation is in Class CFGMST (CFGMST.h). 47 // 48 //===----------------------------------------------------------------------===// 49 50 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" 51 #include "CFGMST.h" 52 #include "ValueProfileCollector.h" 53 #include "llvm/ADT/APInt.h" 54 #include "llvm/ADT/ArrayRef.h" 55 #include "llvm/ADT/MapVector.h" 56 #include "llvm/ADT/STLExtras.h" 57 #include "llvm/ADT/SmallVector.h" 58 #include "llvm/ADT/Statistic.h" 59 #include "llvm/ADT/StringRef.h" 60 #include "llvm/ADT/Triple.h" 61 #include "llvm/ADT/Twine.h" 62 #include "llvm/ADT/iterator.h" 63 #include "llvm/ADT/iterator_range.h" 64 #include "llvm/Analysis/BlockFrequencyInfo.h" 65 #include "llvm/Analysis/BranchProbabilityInfo.h" 66 #include "llvm/Analysis/CFG.h" 67 #include "llvm/Analysis/EHPersonalities.h" 68 #include "llvm/Analysis/LoopInfo.h" 69 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 70 #include "llvm/Analysis/ProfileSummaryInfo.h" 71 #include "llvm/IR/Attributes.h" 72 #include "llvm/IR/BasicBlock.h" 73 #include "llvm/IR/CFG.h" 74 #include "llvm/IR/Comdat.h" 75 #include "llvm/IR/Constant.h" 76 #include "llvm/IR/Constants.h" 77 #include "llvm/IR/DiagnosticInfo.h" 78 #include "llvm/IR/Dominators.h" 79 #include "llvm/IR/Function.h" 80 #include "llvm/IR/GlobalAlias.h" 81 #include "llvm/IR/GlobalValue.h" 82 #include "llvm/IR/GlobalVariable.h" 83 #include "llvm/IR/IRBuilder.h" 84 #include "llvm/IR/InstVisitor.h" 85 #include "llvm/IR/InstrTypes.h" 86 #include "llvm/IR/Instruction.h" 87 #include "llvm/IR/Instructions.h" 88 #include "llvm/IR/IntrinsicInst.h" 89 #include "llvm/IR/Intrinsics.h" 90 #include "llvm/IR/LLVMContext.h" 91 #include "llvm/IR/MDBuilder.h" 92 #include "llvm/IR/Module.h" 93 #include "llvm/IR/PassManager.h" 94 #include "llvm/IR/ProfileSummary.h" 95 #include "llvm/IR/Type.h" 96 #include "llvm/IR/Value.h" 97 #include "llvm/InitializePasses.h" 98 #include "llvm/Pass.h" 99 #include "llvm/ProfileData/InstrProf.h" 100 #include "llvm/ProfileData/InstrProfReader.h" 101 #include "llvm/Support/BranchProbability.h" 102 #include "llvm/Support/CRC.h" 103 #include "llvm/Support/Casting.h" 104 #include "llvm/Support/CommandLine.h" 105 #include "llvm/Support/DOTGraphTraits.h" 106 #include "llvm/Support/Debug.h" 107 #include "llvm/Support/Error.h" 108 #include "llvm/Support/ErrorHandling.h" 109 #include "llvm/Support/GraphWriter.h" 110 #include "llvm/Support/raw_ostream.h" 111 #include "llvm/Transforms/Instrumentation.h" 112 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 113 #include <algorithm> 114 #include <cassert> 115 #include <cstdint> 116 #include <memory> 117 #include <numeric> 118 #include <string> 119 #include <unordered_map> 120 #include <utility> 121 #include <vector> 122 123 using namespace llvm; 124 using ProfileCount = Function::ProfileCount; 125 using VPCandidateInfo = ValueProfileCollector::CandidateInfo; 126 127 #define DEBUG_TYPE "pgo-instrumentation" 128 129 STATISTIC(NumOfPGOInstrument, "Number of edges instrumented."); 130 STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented."); 131 STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented."); 132 STATISTIC(NumOfPGOEdge, "Number of edges."); 133 STATISTIC(NumOfPGOBB, "Number of basic-blocks."); 134 STATISTIC(NumOfPGOSplit, "Number of critical edge splits."); 135 STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts."); 136 STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile."); 137 STATISTIC(NumOfPGOMissing, "Number of functions without profile."); 138 STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations."); 139 STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO."); 140 STATISTIC(NumOfCSPGOSelectInsts, 141 "Number of select instruction instrumented in CSPGO."); 142 STATISTIC(NumOfCSPGOMemIntrinsics, 143 "Number of mem intrinsics instrumented in CSPGO."); 144 STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO."); 145 STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO."); 146 STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO."); 147 STATISTIC(NumOfCSPGOFunc, 148 "Number of functions having valid profile counts in CSPGO."); 149 STATISTIC(NumOfCSPGOMismatch, 150 "Number of functions having mismatch profile in CSPGO."); 151 STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO."); 152 153 // Command line option to specify the file to read profile from. This is 154 // mainly used for testing. 155 static cl::opt<std::string> 156 PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, 157 cl::value_desc("filename"), 158 cl::desc("Specify the path of profile data file. This is" 159 "mainly for test purpose.")); 160 static cl::opt<std::string> PGOTestProfileRemappingFile( 161 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, 162 cl::value_desc("filename"), 163 cl::desc("Specify the path of profile remapping file. This is mainly for " 164 "test purpose.")); 165 166 // Command line option to disable value profiling. The default is false: 167 // i.e. value profiling is enabled by default. This is for debug purpose. 168 static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false), 169 cl::Hidden, 170 cl::desc("Disable Value Profiling")); 171 172 // Command line option to set the maximum number of VP annotations to write to 173 // the metadata for a single indirect call callsite. 174 static cl::opt<unsigned> MaxNumAnnotations( 175 "icp-max-annotations", cl::init(3), cl::Hidden, cl::ZeroOrMore, 176 cl::desc("Max number of annotations for a single indirect " 177 "call callsite")); 178 179 // Command line option to set the maximum number of value annotations 180 // to write to the metadata for a single memop intrinsic. 181 static cl::opt<unsigned> MaxNumMemOPAnnotations( 182 "memop-max-annotations", cl::init(4), cl::Hidden, cl::ZeroOrMore, 183 cl::desc("Max number of preicise value annotations for a single memop" 184 "intrinsic")); 185 186 // Command line option to control appending FunctionHash to the name of a COMDAT 187 // function. This is to avoid the hash mismatch caused by the preinliner. 188 static cl::opt<bool> DoComdatRenaming( 189 "do-comdat-renaming", cl::init(false), cl::Hidden, 190 cl::desc("Append function hash to the name of COMDAT function to avoid " 191 "function hash mismatch due to the preinliner")); 192 193 // Command line option to enable/disable the warning about missing profile 194 // information. 195 static cl::opt<bool> 196 PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden, 197 cl::desc("Use this option to turn on/off " 198 "warnings about missing profile data for " 199 "functions.")); 200 201 // Command line option to enable/disable the warning about a hash mismatch in 202 // the profile data. 203 static cl::opt<bool> 204 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden, 205 cl::desc("Use this option to turn off/on " 206 "warnings about profile cfg mismatch.")); 207 208 // Command line option to enable/disable the warning about a hash mismatch in 209 // the profile data for Comdat functions, which often turns out to be false 210 // positive due to the pre-instrumentation inline. 211 static cl::opt<bool> 212 NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", cl::init(true), 213 cl::Hidden, 214 cl::desc("The option is used to turn on/off " 215 "warnings about hash mismatch for comdat " 216 "functions.")); 217 218 // Command line option to enable/disable select instruction instrumentation. 219 static cl::opt<bool> 220 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, 221 cl::desc("Use this option to turn on/off SELECT " 222 "instruction instrumentation. ")); 223 224 // Command line option to turn on CFG dot or text dump of raw profile counts 225 static cl::opt<PGOViewCountsType> PGOViewRawCounts( 226 "pgo-view-raw-counts", cl::Hidden, 227 cl::desc("A boolean option to show CFG dag or text " 228 "with raw profile counts from " 229 "profile data. See also option " 230 "-pgo-view-counts. To limit graph " 231 "display to only one function, use " 232 "filtering option -view-bfi-func-name."), 233 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), 234 clEnumValN(PGOVCT_Graph, "graph", "show a graph."), 235 clEnumValN(PGOVCT_Text, "text", "show in text."))); 236 237 // Command line option to enable/disable memop intrinsic call.size profiling. 238 static cl::opt<bool> 239 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, 240 cl::desc("Use this option to turn on/off " 241 "memory intrinsic size profiling.")); 242 243 // Emit branch probability as optimization remarks. 244 static cl::opt<bool> 245 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, 246 cl::desc("When this option is on, the annotated " 247 "branch probability will be emitted as " 248 "optimization remarks: -{Rpass|" 249 "pass-remarks}=pgo-instrumentation")); 250 251 static cl::opt<bool> PGOInstrumentEntry( 252 "pgo-instrument-entry", cl::init(false), cl::Hidden, 253 cl::desc("Force to instrument function entry basicblock.")); 254 255 static cl::opt<bool> 256 PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, 257 cl::desc("Fix function entry count in profile use.")); 258 259 static cl::opt<bool> PGOVerifyHotBFI( 260 "pgo-verify-hot-bfi", cl::init(false), cl::Hidden, 261 cl::desc("Print out the non-match BFI count if a hot raw profile count " 262 "becomes non-hot, or a cold raw profile count becomes hot. " 263 "The print is enabled under -Rpass-analysis=pgo, or " 264 "internal option -pass-remakrs-analysis=pgo.")); 265 266 static cl::opt<bool> PGOVerifyBFI( 267 "pgo-verify-bfi", cl::init(false), cl::Hidden, 268 cl::desc("Print out mismatched BFI counts after setting profile metadata " 269 "The print is enabled under -Rpass-analysis=pgo, or " 270 "internal option -pass-remakrs-analysis=pgo.")); 271 272 static cl::opt<unsigned> PGOVerifyBFIRatio( 273 "pgo-verify-bfi-ratio", cl::init(5), cl::Hidden, 274 cl::desc("Set the threshold for pgo-verify-big -- only print out " 275 "mismatched BFI if the difference percentage is greater than " 276 "this value (in percentage).")); 277 278 static cl::opt<unsigned> PGOVerifyBFICutoff( 279 "pgo-verify-bfi-cutoff", cl::init(1), cl::Hidden, 280 cl::desc("Set the threshold for pgo-verify-bfi -- skip the counts whose " 281 "profile count value is below.")); 282 283 namespace llvm { 284 // Command line option to turn on CFG dot dump after profile annotation. 285 // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts 286 extern cl::opt<PGOViewCountsType> PGOViewCounts; 287 288 // Command line option to specify the name of the function for CFG dump 289 // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= 290 extern cl::opt<std::string> ViewBlockFreqFuncName; 291 } // namespace llvm 292 293 static cl::opt<bool> 294 PGOOldCFGHashing("pgo-instr-old-cfg-hashing", cl::init(false), cl::Hidden, 295 cl::desc("Use the old CFG function hashing")); 296 297 // Return a string describing the branch condition that can be 298 // used in static branch probability heuristics: 299 static std::string getBranchCondString(Instruction *TI) { 300 BranchInst *BI = dyn_cast<BranchInst>(TI); 301 if (!BI || !BI->isConditional()) 302 return std::string(); 303 304 Value *Cond = BI->getCondition(); 305 ICmpInst *CI = dyn_cast<ICmpInst>(Cond); 306 if (!CI) 307 return std::string(); 308 309 std::string result; 310 raw_string_ostream OS(result); 311 OS << CmpInst::getPredicateName(CI->getPredicate()) << "_"; 312 CI->getOperand(0)->getType()->print(OS, true); 313 314 Value *RHS = CI->getOperand(1); 315 ConstantInt *CV = dyn_cast<ConstantInt>(RHS); 316 if (CV) { 317 if (CV->isZero()) 318 OS << "_Zero"; 319 else if (CV->isOne()) 320 OS << "_One"; 321 else if (CV->isMinusOne()) 322 OS << "_MinusOne"; 323 else 324 OS << "_Const"; 325 } 326 OS.flush(); 327 return result; 328 } 329 330 static const char *ValueProfKindDescr[] = { 331 #define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr, 332 #include "llvm/ProfileData/InstrProfData.inc" 333 }; 334 335 namespace { 336 337 /// The select instruction visitor plays three roles specified 338 /// by the mode. In \c VM_counting mode, it simply counts the number of 339 /// select instructions. In \c VM_instrument mode, it inserts code to count 340 /// the number times TrueValue of select is taken. In \c VM_annotate mode, 341 /// it reads the profile data and annotate the select instruction with metadata. 342 enum VisitMode { VM_counting, VM_instrument, VM_annotate }; 343 class PGOUseFunc; 344 345 /// Instruction Visitor class to visit select instructions. 346 struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> { 347 Function &F; 348 unsigned NSIs = 0; // Number of select instructions instrumented. 349 VisitMode Mode = VM_counting; // Visiting mode. 350 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index. 351 unsigned TotalNumCtrs = 0; // Total number of counters 352 GlobalVariable *FuncNameVar = nullptr; 353 uint64_t FuncHash = 0; 354 PGOUseFunc *UseFunc = nullptr; 355 356 SelectInstVisitor(Function &Func) : F(Func) {} 357 358 void countSelects(Function &Func) { 359 NSIs = 0; 360 Mode = VM_counting; 361 visit(Func); 362 } 363 364 // Visit the IR stream and instrument all select instructions. \p 365 // Ind is a pointer to the counter index variable; \p TotalNC 366 // is the total number of counters; \p FNV is the pointer to the 367 // PGO function name var; \p FHash is the function hash. 368 void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC, 369 GlobalVariable *FNV, uint64_t FHash) { 370 Mode = VM_instrument; 371 CurCtrIdx = Ind; 372 TotalNumCtrs = TotalNC; 373 FuncHash = FHash; 374 FuncNameVar = FNV; 375 visit(Func); 376 } 377 378 // Visit the IR stream and annotate all select instructions. 379 void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) { 380 Mode = VM_annotate; 381 UseFunc = UF; 382 CurCtrIdx = Ind; 383 visit(Func); 384 } 385 386 void instrumentOneSelectInst(SelectInst &SI); 387 void annotateOneSelectInst(SelectInst &SI); 388 389 // Visit \p SI instruction and perform tasks according to visit mode. 390 void visitSelectInst(SelectInst &SI); 391 392 // Return the number of select instructions. This needs be called after 393 // countSelects(). 394 unsigned getNumOfSelectInsts() const { return NSIs; } 395 }; 396 397 398 class PGOInstrumentationGenLegacyPass : public ModulePass { 399 public: 400 static char ID; 401 402 PGOInstrumentationGenLegacyPass(bool IsCS = false) 403 : ModulePass(ID), IsCS(IsCS) { 404 initializePGOInstrumentationGenLegacyPassPass( 405 *PassRegistry::getPassRegistry()); 406 } 407 408 StringRef getPassName() const override { return "PGOInstrumentationGenPass"; } 409 410 private: 411 // Is this is context-sensitive instrumentation. 412 bool IsCS; 413 bool runOnModule(Module &M) override; 414 415 void getAnalysisUsage(AnalysisUsage &AU) const override { 416 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 417 AU.addRequired<TargetLibraryInfoWrapperPass>(); 418 } 419 }; 420 421 class PGOInstrumentationUseLegacyPass : public ModulePass { 422 public: 423 static char ID; 424 425 // Provide the profile filename as the parameter. 426 PGOInstrumentationUseLegacyPass(std::string Filename = "", bool IsCS = false) 427 : ModulePass(ID), ProfileFileName(std::move(Filename)), IsCS(IsCS) { 428 if (!PGOTestProfileFile.empty()) 429 ProfileFileName = PGOTestProfileFile; 430 initializePGOInstrumentationUseLegacyPassPass( 431 *PassRegistry::getPassRegistry()); 432 } 433 434 StringRef getPassName() const override { return "PGOInstrumentationUsePass"; } 435 436 private: 437 std::string ProfileFileName; 438 // Is this is context-sensitive instrumentation use. 439 bool IsCS; 440 441 bool runOnModule(Module &M) override; 442 443 void getAnalysisUsage(AnalysisUsage &AU) const override { 444 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 445 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 446 AU.addRequired<TargetLibraryInfoWrapperPass>(); 447 } 448 }; 449 450 class PGOInstrumentationGenCreateVarLegacyPass : public ModulePass { 451 public: 452 static char ID; 453 StringRef getPassName() const override { 454 return "PGOInstrumentationGenCreateVarPass"; 455 } 456 PGOInstrumentationGenCreateVarLegacyPass(std::string CSInstrName = "") 457 : ModulePass(ID), InstrProfileOutput(CSInstrName) { 458 initializePGOInstrumentationGenCreateVarLegacyPassPass( 459 *PassRegistry::getPassRegistry()); 460 } 461 462 private: 463 bool runOnModule(Module &M) override { 464 createProfileFileNameVar(M, InstrProfileOutput); 465 createIRLevelProfileFlagVar(M, /* IsCS */ true, PGOInstrumentEntry); 466 return false; 467 } 468 std::string InstrProfileOutput; 469 }; 470 471 } // end anonymous namespace 472 473 char PGOInstrumentationGenLegacyPass::ID = 0; 474 475 INITIALIZE_PASS_BEGIN(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", 476 "PGO instrumentation.", false, false) 477 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 478 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 479 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 480 INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", 481 "PGO instrumentation.", false, false) 482 483 ModulePass *llvm::createPGOInstrumentationGenLegacyPass(bool IsCS) { 484 return new PGOInstrumentationGenLegacyPass(IsCS); 485 } 486 487 char PGOInstrumentationUseLegacyPass::ID = 0; 488 489 INITIALIZE_PASS_BEGIN(PGOInstrumentationUseLegacyPass, "pgo-instr-use", 490 "Read PGO instrumentation profile.", false, false) 491 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 492 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 493 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 494 INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use", 495 "Read PGO instrumentation profile.", false, false) 496 497 ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename, 498 bool IsCS) { 499 return new PGOInstrumentationUseLegacyPass(Filename.str(), IsCS); 500 } 501 502 char PGOInstrumentationGenCreateVarLegacyPass::ID = 0; 503 504 INITIALIZE_PASS(PGOInstrumentationGenCreateVarLegacyPass, 505 "pgo-instr-gen-create-var", 506 "Create PGO instrumentation version variable for CSPGO.", false, 507 false) 508 509 ModulePass * 510 llvm::createPGOInstrumentationGenCreateVarLegacyPass(StringRef CSInstrName) { 511 return new PGOInstrumentationGenCreateVarLegacyPass(std::string(CSInstrName)); 512 } 513 514 namespace { 515 516 /// An MST based instrumentation for PGO 517 /// 518 /// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO 519 /// in the function level. 520 struct PGOEdge { 521 // This class implements the CFG edges. Note the CFG can be a multi-graph. 522 // So there might be multiple edges with same SrcBB and DestBB. 523 const BasicBlock *SrcBB; 524 const BasicBlock *DestBB; 525 uint64_t Weight; 526 bool InMST = false; 527 bool Removed = false; 528 bool IsCritical = false; 529 530 PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) 531 : SrcBB(Src), DestBB(Dest), Weight(W) {} 532 533 // Return the information string of an edge. 534 std::string infoString() const { 535 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") + 536 (IsCritical ? "c" : " ") + " W=" + Twine(Weight)).str(); 537 } 538 }; 539 540 // This class stores the auxiliary information for each BB. 541 struct BBInfo { 542 BBInfo *Group; 543 uint32_t Index; 544 uint32_t Rank = 0; 545 546 BBInfo(unsigned IX) : Group(this), Index(IX) {} 547 548 // Return the information string of this object. 549 std::string infoString() const { 550 return (Twine("Index=") + Twine(Index)).str(); 551 } 552 553 // Empty function -- only applicable to UseBBInfo. 554 void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} 555 556 // Empty function -- only applicable to UseBBInfo. 557 void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} 558 }; 559 560 // This class implements the CFG edges. Note the CFG can be a multi-graph. 561 template <class Edge, class BBInfo> class FuncPGOInstrumentation { 562 private: 563 Function &F; 564 565 // Is this is context-sensitive instrumentation. 566 bool IsCS; 567 568 // A map that stores the Comdat group in function F. 569 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers; 570 571 ValueProfileCollector VPC; 572 573 void computeCFGHash(); 574 void renameComdatFunction(); 575 576 public: 577 std::vector<std::vector<VPCandidateInfo>> ValueSites; 578 SelectInstVisitor SIVisitor; 579 std::string FuncName; 580 GlobalVariable *FuncNameVar; 581 582 // CFG hash value for this function. 583 uint64_t FunctionHash = 0; 584 585 // The Minimum Spanning Tree of function CFG. 586 CFGMST<Edge, BBInfo> MST; 587 588 // Collect all the BBs that will be instrumented, and store them in 589 // InstrumentBBs. 590 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs); 591 592 // Give an edge, find the BB that will be instrumented. 593 // Return nullptr if there is no BB to be instrumented. 594 BasicBlock *getInstrBB(Edge *E); 595 596 // Return the auxiliary BB information. 597 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); } 598 599 // Return the auxiliary BB information if available. 600 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); } 601 602 // Dump edges and BB information. 603 void dumpInfo(std::string Str = "") const { 604 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " + 605 Twine(FunctionHash) + "\t" + Str); 606 } 607 608 FuncPGOInstrumentation( 609 Function &Func, TargetLibraryInfo &TLI, 610 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 611 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, 612 BlockFrequencyInfo *BFI = nullptr, bool IsCS = false, 613 bool InstrumentFuncEntry = true) 614 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI), 615 ValueSites(IPVK_Last + 1), SIVisitor(Func), 616 MST(F, InstrumentFuncEntry, BPI, BFI) { 617 // This should be done before CFG hash computation. 618 SIVisitor.countSelects(Func); 619 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize); 620 if (!IsCS) { 621 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 622 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 623 NumOfPGOBB += MST.BBInfos.size(); 624 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget); 625 } else { 626 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 627 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 628 NumOfCSPGOBB += MST.BBInfos.size(); 629 } 630 631 FuncName = getPGOFuncName(F); 632 computeCFGHash(); 633 if (!ComdatMembers.empty()) 634 renameComdatFunction(); 635 LLVM_DEBUG(dumpInfo("after CFGMST")); 636 637 for (auto &E : MST.AllEdges) { 638 if (E->Removed) 639 continue; 640 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++; 641 if (!E->InMST) 642 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++; 643 } 644 645 if (CreateGlobalVar) 646 FuncNameVar = createPGOFuncNameVar(F, FuncName); 647 } 648 }; 649 650 } // end anonymous namespace 651 652 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index 653 // value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers 654 // of selects, indirect calls, mem ops and edges. 655 template <class Edge, class BBInfo> 656 void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { 657 std::vector<uint8_t> Indexes; 658 JamCRC JC; 659 for (auto &BB : F) { 660 const Instruction *TI = BB.getTerminator(); 661 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { 662 BasicBlock *Succ = TI->getSuccessor(I); 663 auto BI = findBBInfo(Succ); 664 if (BI == nullptr) 665 continue; 666 uint32_t Index = BI->Index; 667 for (int J = 0; J < 4; J++) 668 Indexes.push_back((uint8_t)(Index >> (J * 8))); 669 } 670 } 671 JC.update(Indexes); 672 673 JamCRC JCH; 674 if (PGOOldCFGHashing) { 675 // Hash format for context sensitive profile. Reserve 4 bits for other 676 // information. 677 FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 | 678 (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 | 679 //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 | 680 (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC(); 681 } else { 682 // The higher 32 bits. 683 auto updateJCH = [&JCH](uint64_t Num) { 684 uint8_t Data[8]; 685 support::endian::write64le(Data, Num); 686 JCH.update(Data); 687 }; 688 updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts()); 689 updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size()); 690 updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size()); 691 updateJCH((uint64_t)MST.AllEdges.size()); 692 693 // Hash format for context sensitive profile. Reserve 4 bits for other 694 // information. 695 FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC(); 696 } 697 698 // Reserve bit 60-63 for other information purpose. 699 FunctionHash &= 0x0FFFFFFFFFFFFFFF; 700 if (IsCS) 701 NamedInstrProfRecord::setCSFlagInHash(FunctionHash); 702 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n" 703 << " CRC = " << JC.getCRC() 704 << ", Selects = " << SIVisitor.getNumOfSelectInsts() 705 << ", Edges = " << MST.AllEdges.size() << ", ICSites = " 706 << ValueSites[IPVK_IndirectCallTarget].size()); 707 if (!PGOOldCFGHashing) { 708 LLVM_DEBUG(dbgs() << ", Memops = " << ValueSites[IPVK_MemOPSize].size() 709 << ", High32 CRC = " << JCH.getCRC()); 710 } 711 LLVM_DEBUG(dbgs() << ", Hash = " << FunctionHash << "\n";); 712 } 713 714 // Check if we can safely rename this Comdat function. 715 static bool canRenameComdat( 716 Function &F, 717 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 718 if (!DoComdatRenaming || !canRenameComdatFunc(F, true)) 719 return false; 720 721 // FIXME: Current only handle those Comdat groups that only containing one 722 // function. 723 // (1) For a Comdat group containing multiple functions, we need to have a 724 // unique postfix based on the hashes for each function. There is a 725 // non-trivial code refactoring to do this efficiently. 726 // (2) Variables can not be renamed, so we can not rename Comdat function in a 727 // group including global vars. 728 Comdat *C = F.getComdat(); 729 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) { 730 assert(!isa<GlobalAlias>(CM.second)); 731 Function *FM = dyn_cast<Function>(CM.second); 732 if (FM != &F) 733 return false; 734 } 735 return true; 736 } 737 738 // Append the CFGHash to the Comdat function name. 739 template <class Edge, class BBInfo> 740 void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() { 741 if (!canRenameComdat(F, ComdatMembers)) 742 return; 743 std::string OrigName = F.getName().str(); 744 std::string NewFuncName = 745 Twine(F.getName() + "." + Twine(FunctionHash)).str(); 746 F.setName(Twine(NewFuncName)); 747 GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F); 748 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str(); 749 Comdat *NewComdat; 750 Module *M = F.getParent(); 751 // For AvailableExternallyLinkage functions, change the linkage to 752 // LinkOnceODR and put them into comdat. This is because after renaming, there 753 // is no backup external copy available for the function. 754 if (!F.hasComdat()) { 755 assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage); 756 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName)); 757 F.setLinkage(GlobalValue::LinkOnceODRLinkage); 758 F.setComdat(NewComdat); 759 return; 760 } 761 762 // This function belongs to a single function Comdat group. 763 Comdat *OrigComdat = F.getComdat(); 764 std::string NewComdatName = 765 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str(); 766 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName)); 767 NewComdat->setSelectionKind(OrigComdat->getSelectionKind()); 768 769 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) { 770 // Must be a function. 771 cast<Function>(CM.second)->setComdat(NewComdat); 772 } 773 } 774 775 // Collect all the BBs that will be instruments and return them in 776 // InstrumentBBs and setup InEdges/OutEdge for UseBBInfo. 777 template <class Edge, class BBInfo> 778 void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs( 779 std::vector<BasicBlock *> &InstrumentBBs) { 780 // Use a worklist as we will update the vector during the iteration. 781 std::vector<Edge *> EdgeList; 782 EdgeList.reserve(MST.AllEdges.size()); 783 for (auto &E : MST.AllEdges) 784 EdgeList.push_back(E.get()); 785 786 for (auto &E : EdgeList) { 787 BasicBlock *InstrBB = getInstrBB(E); 788 if (InstrBB) 789 InstrumentBBs.push_back(InstrBB); 790 } 791 792 // Set up InEdges/OutEdges for all BBs. 793 for (auto &E : MST.AllEdges) { 794 if (E->Removed) 795 continue; 796 const BasicBlock *SrcBB = E->SrcBB; 797 const BasicBlock *DestBB = E->DestBB; 798 BBInfo &SrcInfo = getBBInfo(SrcBB); 799 BBInfo &DestInfo = getBBInfo(DestBB); 800 SrcInfo.addOutEdge(E.get()); 801 DestInfo.addInEdge(E.get()); 802 } 803 } 804 805 // Given a CFG E to be instrumented, find which BB to place the instrumented 806 // code. The function will split the critical edge if necessary. 807 template <class Edge, class BBInfo> 808 BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) { 809 if (E->InMST || E->Removed) 810 return nullptr; 811 812 BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB); 813 BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB); 814 // For a fake edge, instrument the real BB. 815 if (SrcBB == nullptr) 816 return DestBB; 817 if (DestBB == nullptr) 818 return SrcBB; 819 820 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * { 821 // There are basic blocks (such as catchswitch) cannot be instrumented. 822 // If the returned first insertion point is the end of BB, skip this BB. 823 if (BB->getFirstInsertionPt() == BB->end()) 824 return nullptr; 825 return BB; 826 }; 827 828 // Instrument the SrcBB if it has a single successor, 829 // otherwise, the DestBB if this is not a critical edge. 830 Instruction *TI = SrcBB->getTerminator(); 831 if (TI->getNumSuccessors() <= 1) 832 return canInstrument(SrcBB); 833 if (!E->IsCritical) 834 return canInstrument(DestBB); 835 836 // Some IndirectBr critical edges cannot be split by the previous 837 // SplitIndirectBrCriticalEdges call. Bail out. 838 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 839 BasicBlock *InstrBB = 840 isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum); 841 if (!InstrBB) { 842 LLVM_DEBUG( 843 dbgs() << "Fail to split critical edge: not instrument this edge.\n"); 844 return nullptr; 845 } 846 // For a critical edge, we have to split. Instrument the newly 847 // created BB. 848 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++; 849 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index 850 << " --> " << getBBInfo(DestBB).Index << "\n"); 851 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB. 852 MST.addEdge(SrcBB, InstrBB, 0); 853 // Second one: Add new edge of InstrBB->DestBB. 854 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0); 855 NewEdge1.InMST = true; 856 E->Removed = true; 857 858 return canInstrument(InstrBB); 859 } 860 861 // When generating value profiling calls on Windows routines that make use of 862 // handler funclets for exception processing an operand bundle needs to attached 863 // to the called function. This routine will set \p OpBundles to contain the 864 // funclet information, if any is needed, that should be placed on the generated 865 // value profiling call for the value profile candidate call. 866 static void 867 populateEHOperandBundle(VPCandidateInfo &Cand, 868 DenseMap<BasicBlock *, ColorVector> &BlockColors, 869 SmallVectorImpl<OperandBundleDef> &OpBundles) { 870 auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst); 871 if (OrigCall && !isa<IntrinsicInst>(OrigCall)) { 872 // The instrumentation call should belong to the same funclet as a 873 // non-intrinsic call, so just copy the operand bundle, if any exists. 874 Optional<OperandBundleUse> ParentFunclet = 875 OrigCall->getOperandBundle(LLVMContext::OB_funclet); 876 if (ParentFunclet) 877 OpBundles.emplace_back(OperandBundleDef(*ParentFunclet)); 878 } else { 879 // Intrinsics or other instructions do not get funclet information from the 880 // front-end. Need to use the BlockColors that was computed by the routine 881 // colorEHFunclets to determine whether a funclet is needed. 882 if (!BlockColors.empty()) { 883 const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second; 884 assert(CV.size() == 1 && "non-unique color for block!"); 885 Instruction *EHPad = CV.front()->getFirstNonPHI(); 886 if (EHPad->isEHPad()) 887 OpBundles.emplace_back("funclet", EHPad); 888 } 889 } 890 } 891 892 // Visit all edge and instrument the edges not in MST, and do value profiling. 893 // Critical edges will be split. 894 static void instrumentOneFunc( 895 Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI, 896 BlockFrequencyInfo *BFI, 897 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 898 bool IsCS) { 899 // Split indirectbr critical edges here before computing the MST rather than 900 // later in getInstrBB() to avoid invalidating it. 901 SplitIndirectBrCriticalEdges(F, BPI, BFI); 902 903 FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo( 904 F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry); 905 std::vector<BasicBlock *> InstrumentBBs; 906 FuncInfo.getInstrumentBBs(InstrumentBBs); 907 unsigned NumCounters = 908 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 909 910 uint32_t I = 0; 911 Type *I8PtrTy = Type::getInt8PtrTy(M->getContext()); 912 for (auto *InstrBB : InstrumentBBs) { 913 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt()); 914 assert(Builder.GetInsertPoint() != InstrBB->end() && 915 "Cannot get the Instrumentation point"); 916 Builder.CreateCall( 917 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment), 918 {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), 919 Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters), 920 Builder.getInt32(I++)}); 921 } 922 923 // Now instrument select instructions: 924 FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar, 925 FuncInfo.FunctionHash); 926 assert(I == NumCounters); 927 928 if (DisableValueProfiling) 929 return; 930 931 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size(); 932 933 // Intrinsic function calls do not have funclet operand bundles needed for 934 // Windows exception handling attached to them. However, if value profiling is 935 // inserted for one of these calls, then a funclet value will need to be set 936 // on the instrumentation call based on the funclet coloring. 937 DenseMap<BasicBlock *, ColorVector> BlockColors; 938 if (F.hasPersonalityFn() && 939 isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) 940 BlockColors = colorEHFunclets(F); 941 942 // For each VP Kind, walk the VP candidates and instrument each one. 943 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) { 944 unsigned SiteIndex = 0; 945 if (Kind == IPVK_MemOPSize && !PGOInstrMemOP) 946 continue; 947 948 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) { 949 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind] 950 << " site: CallSite Index = " << SiteIndex << "\n"); 951 952 IRBuilder<> Builder(Cand.InsertPt); 953 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() && 954 "Cannot get the Instrumentation point"); 955 956 Value *ToProfile = nullptr; 957 if (Cand.V->getType()->isIntegerTy()) 958 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty()); 959 else if (Cand.V->getType()->isPointerTy()) 960 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty()); 961 assert(ToProfile && "value profiling Value is of unexpected type"); 962 963 SmallVector<OperandBundleDef, 1> OpBundles; 964 populateEHOperandBundle(Cand, BlockColors, OpBundles); 965 Builder.CreateCall( 966 Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), 967 {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), 968 Builder.getInt64(FuncInfo.FunctionHash), ToProfile, 969 Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)}, 970 OpBundles); 971 } 972 } // IPVK_First <= Kind <= IPVK_Last 973 } 974 975 namespace { 976 977 // This class represents a CFG edge in profile use compilation. 978 struct PGOUseEdge : public PGOEdge { 979 bool CountValid = false; 980 uint64_t CountValue = 0; 981 982 PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) 983 : PGOEdge(Src, Dest, W) {} 984 985 // Set edge count value 986 void setEdgeCount(uint64_t Value) { 987 CountValue = Value; 988 CountValid = true; 989 } 990 991 // Return the information string for this object. 992 std::string infoString() const { 993 if (!CountValid) 994 return PGOEdge::infoString(); 995 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue)) 996 .str(); 997 } 998 }; 999 1000 using DirectEdges = SmallVector<PGOUseEdge *, 2>; 1001 1002 // This class stores the auxiliary information for each BB. 1003 struct UseBBInfo : public BBInfo { 1004 uint64_t CountValue = 0; 1005 bool CountValid; 1006 int32_t UnknownCountInEdge = 0; 1007 int32_t UnknownCountOutEdge = 0; 1008 DirectEdges InEdges; 1009 DirectEdges OutEdges; 1010 1011 UseBBInfo(unsigned IX) : BBInfo(IX), CountValid(false) {} 1012 1013 UseBBInfo(unsigned IX, uint64_t C) 1014 : BBInfo(IX), CountValue(C), CountValid(true) {} 1015 1016 // Set the profile count value for this BB. 1017 void setBBInfoCount(uint64_t Value) { 1018 CountValue = Value; 1019 CountValid = true; 1020 } 1021 1022 // Return the information string of this object. 1023 std::string infoString() const { 1024 if (!CountValid) 1025 return BBInfo::infoString(); 1026 return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str(); 1027 } 1028 1029 // Add an OutEdge and update the edge count. 1030 void addOutEdge(PGOUseEdge *E) { 1031 OutEdges.push_back(E); 1032 UnknownCountOutEdge++; 1033 } 1034 1035 // Add an InEdge and update the edge count. 1036 void addInEdge(PGOUseEdge *E) { 1037 InEdges.push_back(E); 1038 UnknownCountInEdge++; 1039 } 1040 }; 1041 1042 } // end anonymous namespace 1043 1044 // Sum up the count values for all the edges. 1045 static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) { 1046 uint64_t Total = 0; 1047 for (auto &E : Edges) { 1048 if (E->Removed) 1049 continue; 1050 Total += E->CountValue; 1051 } 1052 return Total; 1053 } 1054 1055 namespace { 1056 1057 class PGOUseFunc { 1058 public: 1059 PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI, 1060 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 1061 BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin, 1062 ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry) 1063 : F(Func), M(Modu), BFI(BFIin), PSI(PSI), 1064 FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS, 1065 InstrumentFuncEntry), 1066 FreqAttr(FFA_Normal), IsCS(IsCS) {} 1067 1068 // Read counts for the instrumented BB from profile. 1069 bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, 1070 bool &AllMinusOnes); 1071 1072 // Populate the counts for all BBs. 1073 void populateCounters(); 1074 1075 // Set the branch weights based on the count values. 1076 void setBranchWeights(); 1077 1078 // Annotate the value profile call sites for all value kind. 1079 void annotateValueSites(); 1080 1081 // Annotate the value profile call sites for one value kind. 1082 void annotateValueSites(uint32_t Kind); 1083 1084 // Annotate the irreducible loop header weights. 1085 void annotateIrrLoopHeaderWeights(); 1086 1087 // The hotness of the function from the profile count. 1088 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot }; 1089 1090 // Return the function hotness from the profile. 1091 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; } 1092 1093 // Return the function hash. 1094 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; } 1095 1096 // Return the profile record for this function; 1097 InstrProfRecord &getProfileRecord() { return ProfileRecord; } 1098 1099 // Return the auxiliary BB information. 1100 UseBBInfo &getBBInfo(const BasicBlock *BB) const { 1101 return FuncInfo.getBBInfo(BB); 1102 } 1103 1104 // Return the auxiliary BB information if available. 1105 UseBBInfo *findBBInfo(const BasicBlock *BB) const { 1106 return FuncInfo.findBBInfo(BB); 1107 } 1108 1109 Function &getFunc() const { return F; } 1110 1111 void dumpInfo(std::string Str = "") const { 1112 FuncInfo.dumpInfo(Str); 1113 } 1114 1115 uint64_t getProgramMaxCount() const { return ProgramMaxCount; } 1116 private: 1117 Function &F; 1118 Module *M; 1119 BlockFrequencyInfo *BFI; 1120 ProfileSummaryInfo *PSI; 1121 1122 // This member stores the shared information with class PGOGenFunc. 1123 FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo; 1124 1125 // The maximum count value in the profile. This is only used in PGO use 1126 // compilation. 1127 uint64_t ProgramMaxCount; 1128 1129 // Position of counter that remains to be read. 1130 uint32_t CountPosition = 0; 1131 1132 // Total size of the profile count for this function. 1133 uint32_t ProfileCountSize = 0; 1134 1135 // ProfileRecord for this function. 1136 InstrProfRecord ProfileRecord; 1137 1138 // Function hotness info derived from profile. 1139 FuncFreqAttr FreqAttr; 1140 1141 // Is to use the context sensitive profile. 1142 bool IsCS; 1143 1144 // Find the Instrumented BB and set the value. Return false on error. 1145 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile); 1146 1147 // Set the edge counter value for the unknown edge -- there should be only 1148 // one unknown edge. 1149 void setEdgeCount(DirectEdges &Edges, uint64_t Value); 1150 1151 // Return FuncName string; 1152 std::string getFuncName() const { return FuncInfo.FuncName; } 1153 1154 // Set the hot/cold inline hints based on the count values. 1155 // FIXME: This function should be removed once the functionality in 1156 // the inliner is implemented. 1157 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) { 1158 if (PSI->isHotCount(EntryCount)) 1159 FreqAttr = FFA_Hot; 1160 else if (PSI->isColdCount(MaxCount)) 1161 FreqAttr = FFA_Cold; 1162 } 1163 }; 1164 1165 } // end anonymous namespace 1166 1167 // Visit all the edges and assign the count value for the instrumented 1168 // edges and the BB. Return false on error. 1169 bool PGOUseFunc::setInstrumentedCounts( 1170 const std::vector<uint64_t> &CountFromProfile) { 1171 1172 std::vector<BasicBlock *> InstrumentBBs; 1173 FuncInfo.getInstrumentBBs(InstrumentBBs); 1174 unsigned NumCounters = 1175 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 1176 // The number of counters here should match the number of counters 1177 // in profile. Return if they mismatch. 1178 if (NumCounters != CountFromProfile.size()) { 1179 return false; 1180 } 1181 auto *FuncEntry = &*F.begin(); 1182 1183 // Set the profile count to the Instrumented BBs. 1184 uint32_t I = 0; 1185 for (BasicBlock *InstrBB : InstrumentBBs) { 1186 uint64_t CountValue = CountFromProfile[I++]; 1187 UseBBInfo &Info = getBBInfo(InstrBB); 1188 // If we reach here, we know that we have some nonzero count 1189 // values in this function. The entry count should not be 0. 1190 // Fix it if necessary. 1191 if (InstrBB == FuncEntry && CountValue == 0) 1192 CountValue = 1; 1193 Info.setBBInfoCount(CountValue); 1194 } 1195 ProfileCountSize = CountFromProfile.size(); 1196 CountPosition = I; 1197 1198 // Set the edge count and update the count of unknown edges for BBs. 1199 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void { 1200 E->setEdgeCount(Value); 1201 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1202 this->getBBInfo(E->DestBB).UnknownCountInEdge--; 1203 }; 1204 1205 // Set the profile count the Instrumented edges. There are BBs that not in 1206 // MST but not instrumented. Need to set the edge count value so that we can 1207 // populate the profile counts later. 1208 for (auto &E : FuncInfo.MST.AllEdges) { 1209 if (E->Removed || E->InMST) 1210 continue; 1211 const BasicBlock *SrcBB = E->SrcBB; 1212 UseBBInfo &SrcInfo = getBBInfo(SrcBB); 1213 1214 // If only one out-edge, the edge profile count should be the same as BB 1215 // profile count. 1216 if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1) 1217 setEdgeCount(E.get(), SrcInfo.CountValue); 1218 else { 1219 const BasicBlock *DestBB = E->DestBB; 1220 UseBBInfo &DestInfo = getBBInfo(DestBB); 1221 // If only one in-edge, the edge profile count should be the same as BB 1222 // profile count. 1223 if (DestInfo.CountValid && DestInfo.InEdges.size() == 1) 1224 setEdgeCount(E.get(), DestInfo.CountValue); 1225 } 1226 if (E->CountValid) 1227 continue; 1228 // E's count should have been set from profile. If not, this meenas E skips 1229 // the instrumentation. We set the count to 0. 1230 setEdgeCount(E.get(), 0); 1231 } 1232 return true; 1233 } 1234 1235 // Set the count value for the unknown edge. There should be one and only one 1236 // unknown edge in Edges vector. 1237 void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) { 1238 for (auto &E : Edges) { 1239 if (E->CountValid) 1240 continue; 1241 E->setEdgeCount(Value); 1242 1243 getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1244 getBBInfo(E->DestBB).UnknownCountInEdge--; 1245 return; 1246 } 1247 llvm_unreachable("Cannot find the unknown count edge"); 1248 } 1249 1250 // Emit function metadata indicating PGO profile mismatch. 1251 static void annotateFunctionWithHashMismatch(Function &F, 1252 LLVMContext &ctx) { 1253 const char MetadataName[] = "instr_prof_hash_mismatch"; 1254 SmallVector<Metadata *, 2> Names; 1255 // If this metadata already exists, ignore. 1256 auto *Existing = F.getMetadata(LLVMContext::MD_annotation); 1257 if (Existing) { 1258 MDTuple *Tuple = cast<MDTuple>(Existing); 1259 for (auto &N : Tuple->operands()) { 1260 if (cast<MDString>(N.get())->getString() == MetadataName) 1261 return; 1262 Names.push_back(N.get()); 1263 } 1264 } 1265 1266 MDBuilder MDB(ctx); 1267 Names.push_back(MDB.createString(MetadataName)); 1268 MDNode *MD = MDTuple::get(ctx, Names); 1269 F.setMetadata(LLVMContext::MD_annotation, MD); 1270 } 1271 1272 // Read the profile from ProfileFileName and assign the value to the 1273 // instrumented BB and the edges. This function also updates ProgramMaxCount. 1274 // Return true if the profile are successfully read, and false on errors. 1275 bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, 1276 bool &AllMinusOnes) { 1277 auto &Ctx = M->getContext(); 1278 Expected<InstrProfRecord> Result = 1279 PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash); 1280 if (Error E = Result.takeError()) { 1281 handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { 1282 auto Err = IPE.get(); 1283 bool SkipWarning = false; 1284 LLVM_DEBUG(dbgs() << "Error in reading profile for Func " 1285 << FuncInfo.FuncName << ": "); 1286 if (Err == instrprof_error::unknown_function) { 1287 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++; 1288 SkipWarning = !PGOWarnMissing; 1289 LLVM_DEBUG(dbgs() << "unknown function"); 1290 } else if (Err == instrprof_error::hash_mismatch || 1291 Err == instrprof_error::malformed) { 1292 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++; 1293 SkipWarning = 1294 NoPGOWarnMismatch || 1295 (NoPGOWarnMismatchComdat && 1296 (F.hasComdat() || 1297 F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); 1298 LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); 1299 // Emit function metadata indicating PGO profile mismatch. 1300 annotateFunctionWithHashMismatch(F, M->getContext()); 1301 } 1302 1303 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n"); 1304 if (SkipWarning) 1305 return; 1306 1307 std::string Msg = IPE.message() + std::string(" ") + F.getName().str() + 1308 std::string(" Hash = ") + 1309 std::to_string(FuncInfo.FunctionHash); 1310 1311 Ctx.diagnose( 1312 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); 1313 }); 1314 return false; 1315 } 1316 ProfileRecord = std::move(Result.get()); 1317 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts; 1318 1319 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++; 1320 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n"); 1321 AllMinusOnes = (CountFromProfile.size() > 0); 1322 uint64_t ValueSum = 0; 1323 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { 1324 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n"); 1325 ValueSum += CountFromProfile[I]; 1326 if (CountFromProfile[I] != (uint64_t)-1) 1327 AllMinusOnes = false; 1328 } 1329 AllZeros = (ValueSum == 0); 1330 1331 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n"); 1332 1333 getBBInfo(nullptr).UnknownCountOutEdge = 2; 1334 getBBInfo(nullptr).UnknownCountInEdge = 2; 1335 1336 if (!setInstrumentedCounts(CountFromProfile)) { 1337 LLVM_DEBUG( 1338 dbgs() << "Inconsistent number of counts, skipping this function"); 1339 Ctx.diagnose(DiagnosticInfoPGOProfile( 1340 M->getName().data(), 1341 Twine("Inconsistent number of counts in ") + F.getName().str() 1342 + Twine(": the profile may be stale or there is a function name collision."), 1343 DS_Warning)); 1344 return false; 1345 } 1346 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS); 1347 return true; 1348 } 1349 1350 // Populate the counters from instrumented BBs to all BBs. 1351 // In the end of this operation, all BBs should have a valid count value. 1352 void PGOUseFunc::populateCounters() { 1353 bool Changes = true; 1354 unsigned NumPasses = 0; 1355 while (Changes) { 1356 NumPasses++; 1357 Changes = false; 1358 1359 // For efficient traversal, it's better to start from the end as most 1360 // of the instrumented edges are at the end. 1361 for (auto &BB : reverse(F)) { 1362 UseBBInfo *Count = findBBInfo(&BB); 1363 if (Count == nullptr) 1364 continue; 1365 if (!Count->CountValid) { 1366 if (Count->UnknownCountOutEdge == 0) { 1367 Count->CountValue = sumEdgeCount(Count->OutEdges); 1368 Count->CountValid = true; 1369 Changes = true; 1370 } else if (Count->UnknownCountInEdge == 0) { 1371 Count->CountValue = sumEdgeCount(Count->InEdges); 1372 Count->CountValid = true; 1373 Changes = true; 1374 } 1375 } 1376 if (Count->CountValid) { 1377 if (Count->UnknownCountOutEdge == 1) { 1378 uint64_t Total = 0; 1379 uint64_t OutSum = sumEdgeCount(Count->OutEdges); 1380 // If the one of the successor block can early terminate (no-return), 1381 // we can end up with situation where out edge sum count is larger as 1382 // the source BB's count is collected by a post-dominated block. 1383 if (Count->CountValue > OutSum) 1384 Total = Count->CountValue - OutSum; 1385 setEdgeCount(Count->OutEdges, Total); 1386 Changes = true; 1387 } 1388 if (Count->UnknownCountInEdge == 1) { 1389 uint64_t Total = 0; 1390 uint64_t InSum = sumEdgeCount(Count->InEdges); 1391 if (Count->CountValue > InSum) 1392 Total = Count->CountValue - InSum; 1393 setEdgeCount(Count->InEdges, Total); 1394 Changes = true; 1395 } 1396 } 1397 } 1398 } 1399 1400 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n"); 1401 #ifndef NDEBUG 1402 // Assert every BB has a valid counter. 1403 for (auto &BB : F) { 1404 auto BI = findBBInfo(&BB); 1405 if (BI == nullptr) 1406 continue; 1407 assert(BI->CountValid && "BB count is not valid"); 1408 } 1409 #endif 1410 uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue; 1411 uint64_t FuncMaxCount = FuncEntryCount; 1412 for (auto &BB : F) { 1413 auto BI = findBBInfo(&BB); 1414 if (BI == nullptr) 1415 continue; 1416 FuncMaxCount = std::max(FuncMaxCount, BI->CountValue); 1417 } 1418 1419 // Fix the obviously inconsistent entry count. 1420 if (FuncMaxCount > 0 && FuncEntryCount == 0) 1421 FuncEntryCount = 1; 1422 F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real)); 1423 markFunctionAttributes(FuncEntryCount, FuncMaxCount); 1424 1425 // Now annotate select instructions 1426 FuncInfo.SIVisitor.annotateSelects(F, this, &CountPosition); 1427 assert(CountPosition == ProfileCountSize); 1428 1429 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile.")); 1430 } 1431 1432 // Assign the scaled count values to the BB with multiple out edges. 1433 void PGOUseFunc::setBranchWeights() { 1434 // Generate MD_prof metadata for every branch instruction. 1435 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName() 1436 << " IsCS=" << IsCS << "\n"); 1437 for (auto &BB : F) { 1438 Instruction *TI = BB.getTerminator(); 1439 if (TI->getNumSuccessors() < 2) 1440 continue; 1441 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || 1442 isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI))) 1443 continue; 1444 1445 if (getBBInfo(&BB).CountValue == 0) 1446 continue; 1447 1448 // We have a non-zero Branch BB. 1449 const UseBBInfo &BBCountInfo = getBBInfo(&BB); 1450 unsigned Size = BBCountInfo.OutEdges.size(); 1451 SmallVector<uint64_t, 2> EdgeCounts(Size, 0); 1452 uint64_t MaxCount = 0; 1453 for (unsigned s = 0; s < Size; s++) { 1454 const PGOUseEdge *E = BBCountInfo.OutEdges[s]; 1455 const BasicBlock *SrcBB = E->SrcBB; 1456 const BasicBlock *DestBB = E->DestBB; 1457 if (DestBB == nullptr) 1458 continue; 1459 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 1460 uint64_t EdgeCount = E->CountValue; 1461 if (EdgeCount > MaxCount) 1462 MaxCount = EdgeCount; 1463 EdgeCounts[SuccNum] = EdgeCount; 1464 } 1465 setProfMetadata(M, TI, EdgeCounts, MaxCount); 1466 } 1467 } 1468 1469 static bool isIndirectBrTarget(BasicBlock *BB) { 1470 for (BasicBlock *Pred : predecessors(BB)) { 1471 if (isa<IndirectBrInst>(Pred->getTerminator())) 1472 return true; 1473 } 1474 return false; 1475 } 1476 1477 void PGOUseFunc::annotateIrrLoopHeaderWeights() { 1478 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n"); 1479 // Find irr loop headers 1480 for (auto &BB : F) { 1481 // As a heuristic also annotate indrectbr targets as they have a high chance 1482 // to become an irreducible loop header after the indirectbr tail 1483 // duplication. 1484 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) { 1485 Instruction *TI = BB.getTerminator(); 1486 const UseBBInfo &BBCountInfo = getBBInfo(&BB); 1487 setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue); 1488 } 1489 } 1490 } 1491 1492 void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) { 1493 Module *M = F.getParent(); 1494 IRBuilder<> Builder(&SI); 1495 Type *Int64Ty = Builder.getInt64Ty(); 1496 Type *I8PtrTy = Builder.getInt8PtrTy(); 1497 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty); 1498 Builder.CreateCall( 1499 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step), 1500 {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), 1501 Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs), 1502 Builder.getInt32(*CurCtrIdx), Step}); 1503 ++(*CurCtrIdx); 1504 } 1505 1506 void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) { 1507 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts; 1508 assert(*CurCtrIdx < CountFromProfile.size() && 1509 "Out of bound access of counters"); 1510 uint64_t SCounts[2]; 1511 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count 1512 ++(*CurCtrIdx); 1513 uint64_t TotalCount = 0; 1514 auto BI = UseFunc->findBBInfo(SI.getParent()); 1515 if (BI != nullptr) 1516 TotalCount = BI->CountValue; 1517 // False Count 1518 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0); 1519 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]); 1520 if (MaxCount) 1521 setProfMetadata(F.getParent(), &SI, SCounts, MaxCount); 1522 } 1523 1524 void SelectInstVisitor::visitSelectInst(SelectInst &SI) { 1525 if (!PGOInstrSelect) 1526 return; 1527 // FIXME: do not handle this yet. 1528 if (SI.getCondition()->getType()->isVectorTy()) 1529 return; 1530 1531 switch (Mode) { 1532 case VM_counting: 1533 NSIs++; 1534 return; 1535 case VM_instrument: 1536 instrumentOneSelectInst(SI); 1537 return; 1538 case VM_annotate: 1539 annotateOneSelectInst(SI); 1540 return; 1541 } 1542 1543 llvm_unreachable("Unknown visiting mode"); 1544 } 1545 1546 // Traverse all valuesites and annotate the instructions for all value kind. 1547 void PGOUseFunc::annotateValueSites() { 1548 if (DisableValueProfiling) 1549 return; 1550 1551 // Create the PGOFuncName meta data. 1552 createPGOFuncNameMetadata(F, FuncInfo.FuncName); 1553 1554 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 1555 annotateValueSites(Kind); 1556 } 1557 1558 // Annotate the instructions for a specific value kind. 1559 void PGOUseFunc::annotateValueSites(uint32_t Kind) { 1560 assert(Kind <= IPVK_Last); 1561 unsigned ValueSiteIndex = 0; 1562 auto &ValueSites = FuncInfo.ValueSites[Kind]; 1563 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind); 1564 if (NumValueSites != ValueSites.size()) { 1565 auto &Ctx = M->getContext(); 1566 Ctx.diagnose(DiagnosticInfoPGOProfile( 1567 M->getName().data(), 1568 Twine("Inconsistent number of value sites for ") + 1569 Twine(ValueProfKindDescr[Kind]) + 1570 Twine(" profiling in \"") + F.getName().str() + 1571 Twine("\", possibly due to the use of a stale profile."), 1572 DS_Warning)); 1573 return; 1574 } 1575 1576 for (VPCandidateInfo &I : ValueSites) { 1577 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind 1578 << "): Index = " << ValueSiteIndex << " out of " 1579 << NumValueSites << "\n"); 1580 annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord, 1581 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex, 1582 Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations 1583 : MaxNumAnnotations); 1584 ValueSiteIndex++; 1585 } 1586 } 1587 1588 // Collect the set of members for each Comdat in module M and store 1589 // in ComdatMembers. 1590 static void collectComdatMembers( 1591 Module &M, 1592 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 1593 if (!DoComdatRenaming) 1594 return; 1595 for (Function &F : M) 1596 if (Comdat *C = F.getComdat()) 1597 ComdatMembers.insert(std::make_pair(C, &F)); 1598 for (GlobalVariable &GV : M.globals()) 1599 if (Comdat *C = GV.getComdat()) 1600 ComdatMembers.insert(std::make_pair(C, &GV)); 1601 for (GlobalAlias &GA : M.aliases()) 1602 if (Comdat *C = GA.getComdat()) 1603 ComdatMembers.insert(std::make_pair(C, &GA)); 1604 } 1605 1606 static bool InstrumentAllFunctions( 1607 Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI, 1608 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1609 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) { 1610 // For the context-sensitve instrumentation, we should have a separated pass 1611 // (before LTO/ThinLTO linking) to create these variables. 1612 if (!IsCS) 1613 createIRLevelProfileFlagVar(M, /* IsCS */ false, PGOInstrumentEntry); 1614 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1615 collectComdatMembers(M, ComdatMembers); 1616 1617 for (auto &F : M) { 1618 if (F.isDeclaration()) 1619 continue; 1620 if (F.hasFnAttribute(llvm::Attribute::NoProfile)) 1621 continue; 1622 auto &TLI = LookupTLI(F); 1623 auto *BPI = LookupBPI(F); 1624 auto *BFI = LookupBFI(F); 1625 instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS); 1626 } 1627 return true; 1628 } 1629 1630 PreservedAnalyses 1631 PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) { 1632 createProfileFileNameVar(M, CSInstrName); 1633 createIRLevelProfileFlagVar(M, /* IsCS */ true, PGOInstrumentEntry); 1634 return PreservedAnalyses::all(); 1635 } 1636 1637 bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) { 1638 if (skipModule(M)) 1639 return false; 1640 1641 auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & { 1642 return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 1643 }; 1644 auto LookupBPI = [this](Function &F) { 1645 return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); 1646 }; 1647 auto LookupBFI = [this](Function &F) { 1648 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); 1649 }; 1650 return InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS); 1651 } 1652 1653 PreservedAnalyses PGOInstrumentationGen::run(Module &M, 1654 ModuleAnalysisManager &AM) { 1655 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1656 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 1657 return FAM.getResult<TargetLibraryAnalysis>(F); 1658 }; 1659 auto LookupBPI = [&FAM](Function &F) { 1660 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1661 }; 1662 auto LookupBFI = [&FAM](Function &F) { 1663 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1664 }; 1665 1666 if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS)) 1667 return PreservedAnalyses::all(); 1668 1669 return PreservedAnalyses::none(); 1670 } 1671 1672 // Using the ratio b/w sums of profile count values and BFI count values to 1673 // adjust the func entry count. 1674 static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, 1675 BranchProbabilityInfo &NBPI) { 1676 Function &F = Func.getFunc(); 1677 BlockFrequencyInfo NBFI(F, NBPI, LI); 1678 #ifndef NDEBUG 1679 auto BFIEntryCount = F.getEntryCount(); 1680 assert(BFIEntryCount.hasValue() && (BFIEntryCount.getCount() > 0) && 1681 "Invalid BFI Entrycount"); 1682 #endif 1683 auto SumCount = APFloat::getZero(APFloat::IEEEdouble()); 1684 auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble()); 1685 for (auto &BBI : F) { 1686 uint64_t CountValue = 0; 1687 uint64_t BFICountValue = 0; 1688 if (!Func.findBBInfo(&BBI)) 1689 continue; 1690 auto BFICount = NBFI.getBlockProfileCount(&BBI); 1691 CountValue = Func.getBBInfo(&BBI).CountValue; 1692 BFICountValue = BFICount.getValue(); 1693 SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven); 1694 SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven); 1695 } 1696 if (SumCount.isZero()) 1697 return; 1698 1699 assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan && 1700 "Incorrect sum of BFI counts"); 1701 if (SumBFICount.compare(SumCount) == APFloat::cmpEqual) 1702 return; 1703 double Scale = (SumCount / SumBFICount).convertToDouble(); 1704 if (Scale < 1.001 && Scale > 0.999) 1705 return; 1706 1707 uint64_t FuncEntryCount = Func.getBBInfo(&*F.begin()).CountValue; 1708 uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale; 1709 if (NewEntryCount == 0) 1710 NewEntryCount = 1; 1711 if (NewEntryCount != FuncEntryCount) { 1712 F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real)); 1713 LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName() 1714 << ", entry_count " << FuncEntryCount << " --> " 1715 << NewEntryCount << "\n"); 1716 } 1717 } 1718 1719 // Compare the profile count values with BFI count values, and print out 1720 // the non-matching ones. 1721 static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, 1722 BranchProbabilityInfo &NBPI, 1723 uint64_t HotCountThreshold, 1724 uint64_t ColdCountThreshold) { 1725 Function &F = Func.getFunc(); 1726 BlockFrequencyInfo NBFI(F, NBPI, LI); 1727 // bool PrintFunc = false; 1728 bool HotBBOnly = PGOVerifyHotBFI; 1729 std::string Msg; 1730 OptimizationRemarkEmitter ORE(&F); 1731 1732 unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0; 1733 for (auto &BBI : F) { 1734 uint64_t CountValue = 0; 1735 uint64_t BFICountValue = 0; 1736 1737 if (Func.getBBInfo(&BBI).CountValid) 1738 CountValue = Func.getBBInfo(&BBI).CountValue; 1739 1740 BBNum++; 1741 if (CountValue) 1742 NonZeroBBNum++; 1743 auto BFICount = NBFI.getBlockProfileCount(&BBI); 1744 if (BFICount) 1745 BFICountValue = BFICount.getValue(); 1746 1747 if (HotBBOnly) { 1748 bool rawIsHot = CountValue >= HotCountThreshold; 1749 bool BFIIsHot = BFICountValue >= HotCountThreshold; 1750 bool rawIsCold = CountValue <= ColdCountThreshold; 1751 bool ShowCount = false; 1752 if (rawIsHot && !BFIIsHot) { 1753 Msg = "raw-Hot to BFI-nonHot"; 1754 ShowCount = true; 1755 } else if (rawIsCold && BFIIsHot) { 1756 Msg = "raw-Cold to BFI-Hot"; 1757 ShowCount = true; 1758 } 1759 if (!ShowCount) 1760 continue; 1761 } else { 1762 if ((CountValue < PGOVerifyBFICutoff) && 1763 (BFICountValue < PGOVerifyBFICutoff)) 1764 continue; 1765 uint64_t Diff = (BFICountValue >= CountValue) 1766 ? BFICountValue - CountValue 1767 : CountValue - BFICountValue; 1768 if (Diff < CountValue / 100 * PGOVerifyBFIRatio) 1769 continue; 1770 } 1771 BBMisMatchNum++; 1772 1773 ORE.emit([&]() { 1774 OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "bfi-verify", 1775 F.getSubprogram(), &BBI); 1776 Remark << "BB " << ore::NV("Block", BBI.getName()) 1777 << " Count=" << ore::NV("Count", CountValue) 1778 << " BFI_Count=" << ore::NV("Count", BFICountValue); 1779 if (!Msg.empty()) 1780 Remark << " (" << Msg << ")"; 1781 return Remark; 1782 }); 1783 } 1784 if (BBMisMatchNum) 1785 ORE.emit([&]() { 1786 return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify", 1787 F.getSubprogram(), &F.getEntryBlock()) 1788 << "In Func " << ore::NV("Function", F.getName()) 1789 << ": Num_of_BB=" << ore::NV("Count", BBNum) 1790 << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum) 1791 << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum); 1792 }); 1793 } 1794 1795 static bool annotateAllFunctions( 1796 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, 1797 function_ref<TargetLibraryInfo &(Function &)> LookupTLI, 1798 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1799 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, 1800 ProfileSummaryInfo *PSI, bool IsCS) { 1801 LLVM_DEBUG(dbgs() << "Read in profile counters: "); 1802 auto &Ctx = M.getContext(); 1803 // Read the counter array from file. 1804 auto ReaderOrErr = 1805 IndexedInstrProfReader::create(ProfileFileName, ProfileRemappingFileName); 1806 if (Error E = ReaderOrErr.takeError()) { 1807 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { 1808 Ctx.diagnose( 1809 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message())); 1810 }); 1811 return false; 1812 } 1813 1814 std::unique_ptr<IndexedInstrProfReader> PGOReader = 1815 std::move(ReaderOrErr.get()); 1816 if (!PGOReader) { 1817 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(), 1818 StringRef("Cannot get PGOReader"))); 1819 return false; 1820 } 1821 if (!PGOReader->hasCSIRLevelProfile() && IsCS) 1822 return false; 1823 1824 // TODO: might need to change the warning once the clang option is finalized. 1825 if (!PGOReader->isIRLevelProfile()) { 1826 Ctx.diagnose(DiagnosticInfoPGOProfile( 1827 ProfileFileName.data(), "Not an IR level instrumentation profile")); 1828 return false; 1829 } 1830 1831 // Add the profile summary (read from the header of the indexed summary) here 1832 // so that we can use it below when reading counters (which checks if the 1833 // function should be marked with a cold or inlinehint attribute). 1834 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()), 1835 IsCS ? ProfileSummary::PSK_CSInstr 1836 : ProfileSummary::PSK_Instr); 1837 PSI->refresh(); 1838 1839 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1840 collectComdatMembers(M, ComdatMembers); 1841 std::vector<Function *> HotFunctions; 1842 std::vector<Function *> ColdFunctions; 1843 1844 // If the profile marked as always instrument the entry BB, do the 1845 // same. Note this can be overwritten by the internal option in CFGMST.h 1846 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled(); 1847 if (PGOInstrumentEntry.getNumOccurrences() > 0) 1848 InstrumentFuncEntry = PGOInstrumentEntry; 1849 for (auto &F : M) { 1850 if (F.isDeclaration()) 1851 continue; 1852 auto &TLI = LookupTLI(F); 1853 auto *BPI = LookupBPI(F); 1854 auto *BFI = LookupBFI(F); 1855 // Split indirectbr critical edges here before computing the MST rather than 1856 // later in getInstrBB() to avoid invalidating it. 1857 SplitIndirectBrCriticalEdges(F, BPI, BFI); 1858 PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS, 1859 InstrumentFuncEntry); 1860 // When AllMinusOnes is true, it means the profile for the function 1861 // is unrepresentative and this function is actually hot. Set the 1862 // entry count of the function to be multiple times of hot threshold 1863 // and drop all its internal counters. 1864 bool AllMinusOnes = false; 1865 bool AllZeros = false; 1866 if (!Func.readCounters(PGOReader.get(), AllZeros, AllMinusOnes)) 1867 continue; 1868 if (AllZeros) { 1869 F.setEntryCount(ProfileCount(0, Function::PCT_Real)); 1870 if (Func.getProgramMaxCount() != 0) 1871 ColdFunctions.push_back(&F); 1872 continue; 1873 } 1874 const unsigned MultiplyFactor = 3; 1875 if (AllMinusOnes) { 1876 uint64_t HotThreshold = PSI->getHotCountThreshold(); 1877 if (HotThreshold) 1878 F.setEntryCount( 1879 ProfileCount(HotThreshold * MultiplyFactor, Function::PCT_Real)); 1880 HotFunctions.push_back(&F); 1881 continue; 1882 } 1883 Func.populateCounters(); 1884 Func.setBranchWeights(); 1885 Func.annotateValueSites(); 1886 Func.annotateIrrLoopHeaderWeights(); 1887 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr(); 1888 if (FreqAttr == PGOUseFunc::FFA_Cold) 1889 ColdFunctions.push_back(&F); 1890 else if (FreqAttr == PGOUseFunc::FFA_Hot) 1891 HotFunctions.push_back(&F); 1892 if (PGOViewCounts != PGOVCT_None && 1893 (ViewBlockFreqFuncName.empty() || 1894 F.getName().equals(ViewBlockFreqFuncName))) { 1895 LoopInfo LI{DominatorTree(F)}; 1896 std::unique_ptr<BranchProbabilityInfo> NewBPI = 1897 std::make_unique<BranchProbabilityInfo>(F, LI); 1898 std::unique_ptr<BlockFrequencyInfo> NewBFI = 1899 std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI); 1900 if (PGOViewCounts == PGOVCT_Graph) 1901 NewBFI->view(); 1902 else if (PGOViewCounts == PGOVCT_Text) { 1903 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n"; 1904 NewBFI->print(dbgs()); 1905 } 1906 } 1907 if (PGOViewRawCounts != PGOVCT_None && 1908 (ViewBlockFreqFuncName.empty() || 1909 F.getName().equals(ViewBlockFreqFuncName))) { 1910 if (PGOViewRawCounts == PGOVCT_Graph) 1911 if (ViewBlockFreqFuncName.empty()) 1912 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 1913 else 1914 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 1915 else if (PGOViewRawCounts == PGOVCT_Text) { 1916 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n"; 1917 Func.dumpInfo(); 1918 } 1919 } 1920 1921 if (PGOVerifyBFI || PGOVerifyHotBFI || PGOFixEntryCount) { 1922 LoopInfo LI{DominatorTree(F)}; 1923 BranchProbabilityInfo NBPI(F, LI); 1924 1925 // Fix func entry count. 1926 if (PGOFixEntryCount) 1927 fixFuncEntryCount(Func, LI, NBPI); 1928 1929 // Verify BlockFrequency information. 1930 uint64_t HotCountThreshold = 0, ColdCountThreshold = 0; 1931 if (PGOVerifyHotBFI) { 1932 HotCountThreshold = PSI->getOrCompHotCountThreshold(); 1933 ColdCountThreshold = PSI->getOrCompColdCountThreshold(); 1934 } 1935 verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold); 1936 } 1937 } 1938 1939 // Set function hotness attribute from the profile. 1940 // We have to apply these attributes at the end because their presence 1941 // can affect the BranchProbabilityInfo of any callers, resulting in an 1942 // inconsistent MST between prof-gen and prof-use. 1943 for (auto &F : HotFunctions) { 1944 F->addFnAttr(Attribute::InlineHint); 1945 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName() 1946 << "\n"); 1947 } 1948 for (auto &F : ColdFunctions) { 1949 // Only set when there is no Attribute::Hot set by the user. For Hot 1950 // attribute, user's annotation has the precedence over the profile. 1951 if (F->hasFnAttribute(Attribute::Hot)) { 1952 auto &Ctx = M.getContext(); 1953 std::string Msg = std::string("Function ") + F->getName().str() + 1954 std::string(" is annotated as a hot function but" 1955 " the profile is cold"); 1956 Ctx.diagnose( 1957 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning)); 1958 continue; 1959 } 1960 F->addFnAttr(Attribute::Cold); 1961 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() 1962 << "\n"); 1963 } 1964 return true; 1965 } 1966 1967 PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename, 1968 std::string RemappingFilename, 1969 bool IsCS) 1970 : ProfileFileName(std::move(Filename)), 1971 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) { 1972 if (!PGOTestProfileFile.empty()) 1973 ProfileFileName = PGOTestProfileFile; 1974 if (!PGOTestProfileRemappingFile.empty()) 1975 ProfileRemappingFileName = PGOTestProfileRemappingFile; 1976 } 1977 1978 PreservedAnalyses PGOInstrumentationUse::run(Module &M, 1979 ModuleAnalysisManager &AM) { 1980 1981 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1982 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 1983 return FAM.getResult<TargetLibraryAnalysis>(F); 1984 }; 1985 auto LookupBPI = [&FAM](Function &F) { 1986 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1987 }; 1988 auto LookupBFI = [&FAM](Function &F) { 1989 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1990 }; 1991 1992 auto *PSI = &AM.getResult<ProfileSummaryAnalysis>(M); 1993 1994 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, 1995 LookupTLI, LookupBPI, LookupBFI, PSI, IsCS)) 1996 return PreservedAnalyses::all(); 1997 1998 return PreservedAnalyses::none(); 1999 } 2000 2001 bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) { 2002 if (skipModule(M)) 2003 return false; 2004 2005 auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & { 2006 return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 2007 }; 2008 auto LookupBPI = [this](Function &F) { 2009 return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); 2010 }; 2011 auto LookupBFI = [this](Function &F) { 2012 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); 2013 }; 2014 2015 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 2016 return annotateAllFunctions(M, ProfileFileName, "", LookupTLI, LookupBPI, 2017 LookupBFI, PSI, IsCS); 2018 } 2019 2020 static std::string getSimpleNodeName(const BasicBlock *Node) { 2021 if (!Node->getName().empty()) 2022 return std::string(Node->getName()); 2023 2024 std::string SimpleNodeName; 2025 raw_string_ostream OS(SimpleNodeName); 2026 Node->printAsOperand(OS, false); 2027 return OS.str(); 2028 } 2029 2030 void llvm::setProfMetadata(Module *M, Instruction *TI, 2031 ArrayRef<uint64_t> EdgeCounts, 2032 uint64_t MaxCount) { 2033 MDBuilder MDB(M->getContext()); 2034 assert(MaxCount > 0 && "Bad max count"); 2035 uint64_t Scale = calculateCountScale(MaxCount); 2036 SmallVector<unsigned, 4> Weights; 2037 for (const auto &ECI : EdgeCounts) 2038 Weights.push_back(scaleBranchCount(ECI, Scale)); 2039 2040 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W 2041 : Weights) { 2042 dbgs() << W << " "; 2043 } dbgs() << "\n";); 2044 2045 TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 2046 if (EmitBranchProbability) { 2047 std::string BrCondStr = getBranchCondString(TI); 2048 if (BrCondStr.empty()) 2049 return; 2050 2051 uint64_t WSum = 2052 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0, 2053 [](uint64_t w1, uint64_t w2) { return w1 + w2; }); 2054 uint64_t TotalCount = 2055 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0, 2056 [](uint64_t c1, uint64_t c2) { return c1 + c2; }); 2057 Scale = calculateCountScale(WSum); 2058 BranchProbability BP(scaleBranchCount(Weights[0], Scale), 2059 scaleBranchCount(WSum, Scale)); 2060 std::string BranchProbStr; 2061 raw_string_ostream OS(BranchProbStr); 2062 OS << BP; 2063 OS << " (total count : " << TotalCount << ")"; 2064 OS.flush(); 2065 Function *F = TI->getParent()->getParent(); 2066 OptimizationRemarkEmitter ORE(F); 2067 ORE.emit([&]() { 2068 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI) 2069 << BrCondStr << " is true with probability : " << BranchProbStr; 2070 }); 2071 } 2072 } 2073 2074 namespace llvm { 2075 2076 void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) { 2077 MDBuilder MDB(M->getContext()); 2078 TI->setMetadata(llvm::LLVMContext::MD_irr_loop, 2079 MDB.createIrrLoopHeaderWeight(Count)); 2080 } 2081 2082 template <> struct GraphTraits<PGOUseFunc *> { 2083 using NodeRef = const BasicBlock *; 2084 using ChildIteratorType = const_succ_iterator; 2085 using nodes_iterator = pointer_iterator<Function::const_iterator>; 2086 2087 static NodeRef getEntryNode(const PGOUseFunc *G) { 2088 return &G->getFunc().front(); 2089 } 2090 2091 static ChildIteratorType child_begin(const NodeRef N) { 2092 return succ_begin(N); 2093 } 2094 2095 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); } 2096 2097 static nodes_iterator nodes_begin(const PGOUseFunc *G) { 2098 return nodes_iterator(G->getFunc().begin()); 2099 } 2100 2101 static nodes_iterator nodes_end(const PGOUseFunc *G) { 2102 return nodes_iterator(G->getFunc().end()); 2103 } 2104 }; 2105 2106 template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits { 2107 explicit DOTGraphTraits(bool isSimple = false) 2108 : DefaultDOTGraphTraits(isSimple) {} 2109 2110 static std::string getGraphName(const PGOUseFunc *G) { 2111 return std::string(G->getFunc().getName()); 2112 } 2113 2114 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) { 2115 std::string Result; 2116 raw_string_ostream OS(Result); 2117 2118 OS << getSimpleNodeName(Node) << ":\\l"; 2119 UseBBInfo *BI = Graph->findBBInfo(Node); 2120 OS << "Count : "; 2121 if (BI && BI->CountValid) 2122 OS << BI->CountValue << "\\l"; 2123 else 2124 OS << "Unknown\\l"; 2125 2126 if (!PGOInstrSelect) 2127 return Result; 2128 2129 for (const Instruction &I : *Node) { 2130 if (!isa<SelectInst>(&I)) 2131 continue; 2132 // Display scaled counts for SELECT instruction: 2133 OS << "SELECT : { T = "; 2134 uint64_t TC, FC; 2135 bool HasProf = I.extractProfMetadata(TC, FC); 2136 if (!HasProf) 2137 OS << "Unknown, F = Unknown }\\l"; 2138 else 2139 OS << TC << ", F = " << FC << " }\\l"; 2140 } 2141 return Result; 2142 } 2143 }; 2144 2145 } // end namespace llvm 2146