1 //===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements PGO instrumentation using a minimum spanning tree based 10 // on the following paper: 11 // [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points 12 // for program frequency counts. BIT Numerical Mathematics 1973, Volume 13, 13 // Issue 3, pp 313-322 14 // The idea of the algorithm based on the fact that for each node (except for 15 // the entry and exit), the sum of incoming edge counts equals the sum of 16 // outgoing edge counts. The count of edge on spanning tree can be derived from 17 // those edges not on the spanning tree. Knuth proves this method instruments 18 // the minimum number of edges. 19 // 20 // The minimal spanning tree here is actually a maximum weight tree -- on-tree 21 // edges have higher frequencies (more likely to execute). The idea is to 22 // instrument those less frequently executed edges to reduce the runtime 23 // overhead of instrumented binaries. 24 // 25 // This file contains two passes: 26 // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge 27 // count profile, and generates the instrumentation for indirect call 28 // profiling. 29 // (2) Pass PGOInstrumentationUse which reads the edge count profile and 30 // annotates the branch weights. It also reads the indirect call value 31 // profiling records and annotate the indirect call instructions. 32 // 33 // To get the precise counter information, These two passes need to invoke at 34 // the same compilation point (so they see the same IR). For pass 35 // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For 36 // pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and 37 // the profile is opened in module level and passed to each PGOUseFunc instance. 38 // The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put 39 // in class FuncPGOInstrumentation. 40 // 41 // Class PGOEdge represents a CFG edge and some auxiliary information. Class 42 // BBInfo contains auxiliary information for each BB. These two classes are used 43 // in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived 44 // class of PGOEdge and BBInfo, respectively. They contains extra data structure 45 // used in populating profile counters. 46 // The MST implementation is in Class CFGMST (CFGMST.h). 47 // 48 //===----------------------------------------------------------------------===// 49 50 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" 51 #include "ValueProfileCollector.h" 52 #include "llvm/ADT/APInt.h" 53 #include "llvm/ADT/ArrayRef.h" 54 #include "llvm/ADT/STLExtras.h" 55 #include "llvm/ADT/SmallVector.h" 56 #include "llvm/ADT/Statistic.h" 57 #include "llvm/ADT/StringRef.h" 58 #include "llvm/ADT/Twine.h" 59 #include "llvm/ADT/iterator.h" 60 #include "llvm/ADT/iterator_range.h" 61 #include "llvm/Analysis/BlockFrequencyInfo.h" 62 #include "llvm/Analysis/BranchProbabilityInfo.h" 63 #include "llvm/Analysis/CFG.h" 64 #include "llvm/Analysis/LoopInfo.h" 65 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 66 #include "llvm/Analysis/ProfileSummaryInfo.h" 67 #include "llvm/Analysis/TargetLibraryInfo.h" 68 #include "llvm/IR/Attributes.h" 69 #include "llvm/IR/BasicBlock.h" 70 #include "llvm/IR/CFG.h" 71 #include "llvm/IR/Comdat.h" 72 #include "llvm/IR/Constant.h" 73 #include "llvm/IR/Constants.h" 74 #include "llvm/IR/DiagnosticInfo.h" 75 #include "llvm/IR/Dominators.h" 76 #include "llvm/IR/EHPersonalities.h" 77 #include "llvm/IR/Function.h" 78 #include "llvm/IR/GlobalAlias.h" 79 #include "llvm/IR/GlobalValue.h" 80 #include "llvm/IR/GlobalVariable.h" 81 #include "llvm/IR/IRBuilder.h" 82 #include "llvm/IR/InstVisitor.h" 83 #include "llvm/IR/InstrTypes.h" 84 #include "llvm/IR/Instruction.h" 85 #include "llvm/IR/Instructions.h" 86 #include "llvm/IR/IntrinsicInst.h" 87 #include "llvm/IR/Intrinsics.h" 88 #include "llvm/IR/LLVMContext.h" 89 #include "llvm/IR/MDBuilder.h" 90 #include "llvm/IR/Module.h" 91 #include "llvm/IR/PassManager.h" 92 #include "llvm/IR/ProfDataUtils.h" 93 #include "llvm/IR/ProfileSummary.h" 94 #include "llvm/IR/Type.h" 95 #include "llvm/IR/Value.h" 96 #include "llvm/ProfileData/InstrProf.h" 97 #include "llvm/ProfileData/InstrProfReader.h" 98 #include "llvm/Support/BranchProbability.h" 99 #include "llvm/Support/CRC.h" 100 #include "llvm/Support/Casting.h" 101 #include "llvm/Support/CommandLine.h" 102 #include "llvm/Support/DOTGraphTraits.h" 103 #include "llvm/Support/Debug.h" 104 #include "llvm/Support/Error.h" 105 #include "llvm/Support/ErrorHandling.h" 106 #include "llvm/Support/GraphWriter.h" 107 #include "llvm/Support/VirtualFileSystem.h" 108 #include "llvm/Support/raw_ostream.h" 109 #include "llvm/TargetParser/Triple.h" 110 #include "llvm/Transforms/Instrumentation.h" 111 #include "llvm/Transforms/Instrumentation/BlockCoverageInference.h" 112 #include "llvm/Transforms/Instrumentation/CFGMST.h" 113 #include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h" 114 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 115 #include "llvm/Transforms/Utils/MisExpect.h" 116 #include "llvm/Transforms/Utils/ModuleUtils.h" 117 #include <algorithm> 118 #include <cassert> 119 #include <cstdint> 120 #include <memory> 121 #include <numeric> 122 #include <optional> 123 #include <stack> 124 #include <string> 125 #include <unordered_map> 126 #include <utility> 127 #include <vector> 128 129 using namespace llvm; 130 using ProfileCount = Function::ProfileCount; 131 using VPCandidateInfo = ValueProfileCollector::CandidateInfo; 132 133 #define DEBUG_TYPE "pgo-instrumentation" 134 135 STATISTIC(NumOfPGOInstrument, "Number of edges instrumented."); 136 STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented."); 137 STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented."); 138 STATISTIC(NumOfPGOEdge, "Number of edges."); 139 STATISTIC(NumOfPGOBB, "Number of basic-blocks."); 140 STATISTIC(NumOfPGOSplit, "Number of critical edge splits."); 141 STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts."); 142 STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile."); 143 STATISTIC(NumOfPGOMissing, "Number of functions without profile."); 144 STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations."); 145 STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO."); 146 STATISTIC(NumOfCSPGOSelectInsts, 147 "Number of select instruction instrumented in CSPGO."); 148 STATISTIC(NumOfCSPGOMemIntrinsics, 149 "Number of mem intrinsics instrumented in CSPGO."); 150 STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO."); 151 STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO."); 152 STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO."); 153 STATISTIC(NumOfCSPGOFunc, 154 "Number of functions having valid profile counts in CSPGO."); 155 STATISTIC(NumOfCSPGOMismatch, 156 "Number of functions having mismatch profile in CSPGO."); 157 STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO."); 158 STATISTIC(NumCoveredBlocks, "Number of basic blocks that were executed"); 159 160 // Command line option to specify the file to read profile from. This is 161 // mainly used for testing. 162 static cl::opt<std::string> 163 PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, 164 cl::value_desc("filename"), 165 cl::desc("Specify the path of profile data file. This is" 166 "mainly for test purpose.")); 167 static cl::opt<std::string> PGOTestProfileRemappingFile( 168 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, 169 cl::value_desc("filename"), 170 cl::desc("Specify the path of profile remapping file. This is mainly for " 171 "test purpose.")); 172 173 // Command line option to disable value profiling. The default is false: 174 // i.e. value profiling is enabled by default. This is for debug purpose. 175 static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false), 176 cl::Hidden, 177 cl::desc("Disable Value Profiling")); 178 179 // Command line option to set the maximum number of VP annotations to write to 180 // the metadata for a single indirect call callsite. 181 static cl::opt<unsigned> MaxNumAnnotations( 182 "icp-max-annotations", cl::init(3), cl::Hidden, 183 cl::desc("Max number of annotations for a single indirect " 184 "call callsite")); 185 186 // Command line option to set the maximum number of value annotations 187 // to write to the metadata for a single memop intrinsic. 188 static cl::opt<unsigned> MaxNumMemOPAnnotations( 189 "memop-max-annotations", cl::init(4), cl::Hidden, 190 cl::desc("Max number of preicise value annotations for a single memop" 191 "intrinsic")); 192 193 // Command line option to control appending FunctionHash to the name of a COMDAT 194 // function. This is to avoid the hash mismatch caused by the preinliner. 195 static cl::opt<bool> DoComdatRenaming( 196 "do-comdat-renaming", cl::init(false), cl::Hidden, 197 cl::desc("Append function hash to the name of COMDAT function to avoid " 198 "function hash mismatch due to the preinliner")); 199 200 namespace llvm { 201 // Command line option to enable/disable the warning about missing profile 202 // information. 203 cl::opt<bool> PGOWarnMissing("pgo-warn-missing-function", cl::init(false), 204 cl::Hidden, 205 cl::desc("Use this option to turn on/off " 206 "warnings about missing profile data for " 207 "functions.")); 208 209 // Command line option to enable/disable the warning about a hash mismatch in 210 // the profile data. 211 cl::opt<bool> 212 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden, 213 cl::desc("Use this option to turn off/on " 214 "warnings about profile cfg mismatch.")); 215 216 // Command line option to enable/disable the warning about a hash mismatch in 217 // the profile data for Comdat functions, which often turns out to be false 218 // positive due to the pre-instrumentation inline. 219 cl::opt<bool> NoPGOWarnMismatchComdatWeak( 220 "no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden, 221 cl::desc("The option is used to turn on/off " 222 "warnings about hash mismatch for comdat " 223 "or weak functions.")); 224 } // namespace llvm 225 226 // Command line option to enable/disable select instruction instrumentation. 227 static cl::opt<bool> 228 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, 229 cl::desc("Use this option to turn on/off SELECT " 230 "instruction instrumentation. ")); 231 232 // Command line option to turn on CFG dot or text dump of raw profile counts 233 static cl::opt<PGOViewCountsType> PGOViewRawCounts( 234 "pgo-view-raw-counts", cl::Hidden, 235 cl::desc("A boolean option to show CFG dag or text " 236 "with raw profile counts from " 237 "profile data. See also option " 238 "-pgo-view-counts. To limit graph " 239 "display to only one function, use " 240 "filtering option -view-bfi-func-name."), 241 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), 242 clEnumValN(PGOVCT_Graph, "graph", "show a graph."), 243 clEnumValN(PGOVCT_Text, "text", "show in text."))); 244 245 // Command line option to enable/disable memop intrinsic call.size profiling. 246 static cl::opt<bool> 247 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, 248 cl::desc("Use this option to turn on/off " 249 "memory intrinsic size profiling.")); 250 251 // Emit branch probability as optimization remarks. 252 static cl::opt<bool> 253 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, 254 cl::desc("When this option is on, the annotated " 255 "branch probability will be emitted as " 256 "optimization remarks: -{Rpass|" 257 "pass-remarks}=pgo-instrumentation")); 258 259 static cl::opt<bool> PGOInstrumentEntry( 260 "pgo-instrument-entry", cl::init(false), cl::Hidden, 261 cl::desc("Force to instrument function entry basicblock.")); 262 263 static cl::opt<bool> PGOFunctionEntryCoverage( 264 "pgo-function-entry-coverage", cl::Hidden, 265 cl::desc( 266 "Use this option to enable function entry coverage instrumentation.")); 267 268 static cl::opt<bool> PGOBlockCoverage( 269 "pgo-block-coverage", 270 cl::desc("Use this option to enable basic block coverage instrumentation")); 271 272 static cl::opt<bool> 273 PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph", 274 cl::desc("Create a dot file of CFGs with block " 275 "coverage inference information")); 276 277 static cl::opt<bool> PGOTemporalInstrumentation( 278 "pgo-temporal-instrumentation", 279 cl::desc("Use this option to enable temporal instrumentation")); 280 281 static cl::opt<bool> 282 PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, 283 cl::desc("Fix function entry count in profile use.")); 284 285 static cl::opt<bool> PGOVerifyHotBFI( 286 "pgo-verify-hot-bfi", cl::init(false), cl::Hidden, 287 cl::desc("Print out the non-match BFI count if a hot raw profile count " 288 "becomes non-hot, or a cold raw profile count becomes hot. " 289 "The print is enabled under -Rpass-analysis=pgo, or " 290 "internal option -pass-remakrs-analysis=pgo.")); 291 292 static cl::opt<bool> PGOVerifyBFI( 293 "pgo-verify-bfi", cl::init(false), cl::Hidden, 294 cl::desc("Print out mismatched BFI counts after setting profile metadata " 295 "The print is enabled under -Rpass-analysis=pgo, or " 296 "internal option -pass-remakrs-analysis=pgo.")); 297 298 static cl::opt<unsigned> PGOVerifyBFIRatio( 299 "pgo-verify-bfi-ratio", cl::init(2), cl::Hidden, 300 cl::desc("Set the threshold for pgo-verify-bfi: only print out " 301 "mismatched BFI if the difference percentage is greater than " 302 "this value (in percentage).")); 303 304 static cl::opt<unsigned> PGOVerifyBFICutoff( 305 "pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden, 306 cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " 307 "profile count value is below.")); 308 309 static cl::opt<std::string> PGOTraceFuncHash( 310 "pgo-trace-func-hash", cl::init("-"), cl::Hidden, 311 cl::value_desc("function name"), 312 cl::desc("Trace the hash of the function with this name.")); 313 314 static cl::opt<unsigned> PGOFunctionSizeThreshold( 315 "pgo-function-size-threshold", cl::Hidden, 316 cl::desc("Do not instrument functions smaller than this threshold.")); 317 318 static cl::opt<unsigned> PGOFunctionCriticalEdgeThreshold( 319 "pgo-critical-edge-threshold", cl::init(20000), cl::Hidden, 320 cl::desc("Do not instrument functions with the number of critical edges " 321 " greater than this threshold.")); 322 323 extern cl::opt<unsigned> MaxNumVTableAnnotations; 324 325 namespace llvm { 326 // Command line option to turn on CFG dot dump after profile annotation. 327 // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts 328 extern cl::opt<PGOViewCountsType> PGOViewCounts; 329 330 // Command line option to specify the name of the function for CFG dump 331 // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= 332 extern cl::opt<std::string> ViewBlockFreqFuncName; 333 334 // Command line option to enable vtable value profiling. Defined in 335 // ProfileData/InstrProf.cpp: -enable-vtable-value-profiling= 336 extern cl::opt<bool> EnableVTableValueProfiling; 337 extern cl::opt<bool> EnableVTableProfileUse; 338 extern cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate; 339 } // namespace llvm 340 341 bool shouldInstrumentEntryBB() { 342 return PGOInstrumentEntry || 343 PGOCtxProfLoweringPass::isContextualIRPGOEnabled(); 344 } 345 346 // FIXME(mtrofin): re-enable this for ctx profiling, for non-indirect calls. Ctx 347 // profiling implicitly captures indirect call cases, but not other values. 348 // Supporting other values is relatively straight-forward - just another counter 349 // range within the context. 350 bool isValueProfilingDisabled() { 351 return DisableValueProfiling || 352 PGOCtxProfLoweringPass::isContextualIRPGOEnabled(); 353 } 354 355 // Return a string describing the branch condition that can be 356 // used in static branch probability heuristics: 357 static std::string getBranchCondString(Instruction *TI) { 358 BranchInst *BI = dyn_cast<BranchInst>(TI); 359 if (!BI || !BI->isConditional()) 360 return std::string(); 361 362 Value *Cond = BI->getCondition(); 363 ICmpInst *CI = dyn_cast<ICmpInst>(Cond); 364 if (!CI) 365 return std::string(); 366 367 std::string result; 368 raw_string_ostream OS(result); 369 OS << CI->getPredicate() << "_"; 370 CI->getOperand(0)->getType()->print(OS, true); 371 372 Value *RHS = CI->getOperand(1); 373 ConstantInt *CV = dyn_cast<ConstantInt>(RHS); 374 if (CV) { 375 if (CV->isZero()) 376 OS << "_Zero"; 377 else if (CV->isOne()) 378 OS << "_One"; 379 else if (CV->isMinusOne()) 380 OS << "_MinusOne"; 381 else 382 OS << "_Const"; 383 } 384 OS.flush(); 385 return result; 386 } 387 388 static const char *ValueProfKindDescr[] = { 389 #define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr, 390 #include "llvm/ProfileData/InstrProfData.inc" 391 }; 392 393 // Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime 394 // aware this is an ir_level profile so it can set the version flag. 395 static GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS) { 396 const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)); 397 Type *IntTy64 = Type::getInt64Ty(M.getContext()); 398 uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF); 399 if (IsCS) 400 ProfileVersion |= VARIANT_MASK_CSIR_PROF; 401 if (shouldInstrumentEntryBB()) 402 ProfileVersion |= VARIANT_MASK_INSTR_ENTRY; 403 if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) 404 ProfileVersion |= VARIANT_MASK_DBG_CORRELATE; 405 if (PGOFunctionEntryCoverage) 406 ProfileVersion |= 407 VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY; 408 if (PGOBlockCoverage) 409 ProfileVersion |= VARIANT_MASK_BYTE_COVERAGE; 410 if (PGOTemporalInstrumentation) 411 ProfileVersion |= VARIANT_MASK_TEMPORAL_PROF; 412 auto IRLevelVersionVariable = new GlobalVariable( 413 M, IntTy64, true, GlobalValue::WeakAnyLinkage, 414 Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName); 415 IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility); 416 Triple TT(M.getTargetTriple()); 417 if (TT.supportsCOMDAT()) { 418 IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage); 419 IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName)); 420 } 421 return IRLevelVersionVariable; 422 } 423 424 namespace { 425 426 /// The select instruction visitor plays three roles specified 427 /// by the mode. In \c VM_counting mode, it simply counts the number of 428 /// select instructions. In \c VM_instrument mode, it inserts code to count 429 /// the number times TrueValue of select is taken. In \c VM_annotate mode, 430 /// it reads the profile data and annotate the select instruction with metadata. 431 enum VisitMode { VM_counting, VM_instrument, VM_annotate }; 432 class PGOUseFunc; 433 434 /// Instruction Visitor class to visit select instructions. 435 struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> { 436 Function &F; 437 unsigned NSIs = 0; // Number of select instructions instrumented. 438 VisitMode Mode = VM_counting; // Visiting mode. 439 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index. 440 unsigned TotalNumCtrs = 0; // Total number of counters 441 GlobalVariable *FuncNameVar = nullptr; 442 uint64_t FuncHash = 0; 443 PGOUseFunc *UseFunc = nullptr; 444 bool HasSingleByteCoverage; 445 446 SelectInstVisitor(Function &Func, bool HasSingleByteCoverage) 447 : F(Func), HasSingleByteCoverage(HasSingleByteCoverage) {} 448 449 void countSelects() { 450 NSIs = 0; 451 Mode = VM_counting; 452 visit(F); 453 } 454 455 // Visit the IR stream and instrument all select instructions. \p 456 // Ind is a pointer to the counter index variable; \p TotalNC 457 // is the total number of counters; \p FNV is the pointer to the 458 // PGO function name var; \p FHash is the function hash. 459 void instrumentSelects(unsigned *Ind, unsigned TotalNC, GlobalVariable *FNV, 460 uint64_t FHash) { 461 Mode = VM_instrument; 462 CurCtrIdx = Ind; 463 TotalNumCtrs = TotalNC; 464 FuncHash = FHash; 465 FuncNameVar = FNV; 466 visit(F); 467 } 468 469 // Visit the IR stream and annotate all select instructions. 470 void annotateSelects(PGOUseFunc *UF, unsigned *Ind) { 471 Mode = VM_annotate; 472 UseFunc = UF; 473 CurCtrIdx = Ind; 474 visit(F); 475 } 476 477 void instrumentOneSelectInst(SelectInst &SI); 478 void annotateOneSelectInst(SelectInst &SI); 479 480 // Visit \p SI instruction and perform tasks according to visit mode. 481 void visitSelectInst(SelectInst &SI); 482 483 // Return the number of select instructions. This needs be called after 484 // countSelects(). 485 unsigned getNumOfSelectInsts() const { return NSIs; } 486 }; 487 488 /// This class implements the CFG edges for the Minimum Spanning Tree (MST) 489 /// based instrumentation. 490 /// Note that the CFG can be a multi-graph. So there might be multiple edges 491 /// with the same SrcBB and DestBB. 492 struct PGOEdge { 493 BasicBlock *SrcBB; 494 BasicBlock *DestBB; 495 uint64_t Weight; 496 bool InMST = false; 497 bool Removed = false; 498 bool IsCritical = false; 499 500 PGOEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W = 1) 501 : SrcBB(Src), DestBB(Dest), Weight(W) {} 502 503 /// Return the information string of an edge. 504 std::string infoString() const { 505 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") + 506 (IsCritical ? "c" : " ") + " W=" + Twine(Weight)) 507 .str(); 508 } 509 }; 510 511 /// This class stores the auxiliary information for each BB in the MST. 512 struct PGOBBInfo { 513 PGOBBInfo *Group; 514 uint32_t Index; 515 uint32_t Rank = 0; 516 517 PGOBBInfo(unsigned IX) : Group(this), Index(IX) {} 518 519 /// Return the information string of this object. 520 std::string infoString() const { 521 return (Twine("Index=") + Twine(Index)).str(); 522 } 523 }; 524 525 // This class implements the CFG edges. Note the CFG can be a multi-graph. 526 template <class Edge, class BBInfo> class FuncPGOInstrumentation { 527 private: 528 Function &F; 529 530 // Is this is context-sensitive instrumentation. 531 bool IsCS; 532 533 // A map that stores the Comdat group in function F. 534 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers; 535 536 ValueProfileCollector VPC; 537 538 void computeCFGHash(); 539 void renameComdatFunction(); 540 541 public: 542 const TargetLibraryInfo &TLI; 543 std::vector<std::vector<VPCandidateInfo>> ValueSites; 544 SelectInstVisitor SIVisitor; 545 std::string FuncName; 546 std::string DeprecatedFuncName; 547 GlobalVariable *FuncNameVar; 548 549 // CFG hash value for this function. 550 uint64_t FunctionHash = 0; 551 552 // The Minimum Spanning Tree of function CFG. 553 CFGMST<Edge, BBInfo> MST; 554 555 const std::optional<BlockCoverageInference> BCI; 556 557 static std::optional<BlockCoverageInference> 558 constructBCI(Function &Func, bool HasSingleByteCoverage, 559 bool InstrumentFuncEntry) { 560 if (HasSingleByteCoverage) 561 return BlockCoverageInference(Func, InstrumentFuncEntry); 562 return {}; 563 } 564 565 // Collect all the BBs that will be instrumented, and store them in 566 // InstrumentBBs. 567 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs); 568 569 // Give an edge, find the BB that will be instrumented. 570 // Return nullptr if there is no BB to be instrumented. 571 BasicBlock *getInstrBB(Edge *E); 572 573 // Return the auxiliary BB information. 574 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); } 575 576 // Return the auxiliary BB information if available. 577 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); } 578 579 // Dump edges and BB information. 580 void dumpInfo(StringRef Str = "") const { 581 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + 582 " Hash: " + Twine(FunctionHash) + "\t" + Str); 583 } 584 585 FuncPGOInstrumentation( 586 Function &Func, TargetLibraryInfo &TLI, 587 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 588 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, 589 BlockFrequencyInfo *BFI = nullptr, bool IsCS = false, 590 bool InstrumentFuncEntry = true, bool HasSingleByteCoverage = false) 591 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI), 592 TLI(TLI), ValueSites(IPVK_Last + 1), 593 SIVisitor(Func, HasSingleByteCoverage), 594 MST(F, InstrumentFuncEntry, BPI, BFI), 595 BCI(constructBCI(Func, HasSingleByteCoverage, InstrumentFuncEntry)) { 596 if (BCI && PGOViewBlockCoverageGraph) 597 BCI->viewBlockCoverageGraph(); 598 // This should be done before CFG hash computation. 599 SIVisitor.countSelects(); 600 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize); 601 if (!IsCS) { 602 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 603 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 604 NumOfPGOBB += MST.bbInfoSize(); 605 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget); 606 if (EnableVTableValueProfiling) 607 ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget); 608 } else { 609 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 610 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 611 NumOfCSPGOBB += MST.bbInfoSize(); 612 } 613 614 FuncName = getIRPGOFuncName(F); 615 DeprecatedFuncName = getPGOFuncName(F); 616 computeCFGHash(); 617 if (!ComdatMembers.empty()) 618 renameComdatFunction(); 619 LLVM_DEBUG(dumpInfo("after CFGMST")); 620 621 for (const auto &E : MST.allEdges()) { 622 if (E->Removed) 623 continue; 624 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++; 625 if (!E->InMST) 626 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++; 627 } 628 629 if (CreateGlobalVar) 630 FuncNameVar = createPGOFuncNameVar(F, FuncName); 631 } 632 }; 633 634 } // end anonymous namespace 635 636 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index 637 // value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers 638 // of selects, indirect calls, mem ops and edges. 639 template <class Edge, class BBInfo> 640 void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { 641 std::vector<uint8_t> Indexes; 642 JamCRC JC; 643 for (auto &BB : F) { 644 for (BasicBlock *Succ : successors(&BB)) { 645 auto BI = findBBInfo(Succ); 646 if (BI == nullptr) 647 continue; 648 uint32_t Index = BI->Index; 649 for (int J = 0; J < 4; J++) 650 Indexes.push_back((uint8_t)(Index >> (J * 8))); 651 } 652 } 653 JC.update(Indexes); 654 655 JamCRC JCH; 656 // The higher 32 bits. 657 auto updateJCH = [&JCH](uint64_t Num) { 658 uint8_t Data[8]; 659 support::endian::write64le(Data, Num); 660 JCH.update(Data); 661 }; 662 updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts()); 663 updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size()); 664 updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size()); 665 if (BCI) { 666 updateJCH(BCI->getInstrumentedBlocksHash()); 667 } else { 668 updateJCH((uint64_t)MST.numEdges()); 669 } 670 671 // Hash format for context sensitive profile. Reserve 4 bits for other 672 // information. 673 FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC(); 674 675 // Reserve bit 60-63 for other information purpose. 676 FunctionHash &= 0x0FFFFFFFFFFFFFFF; 677 if (IsCS) 678 NamedInstrProfRecord::setCSFlagInHash(FunctionHash); 679 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n" 680 << " CRC = " << JC.getCRC() 681 << ", Selects = " << SIVisitor.getNumOfSelectInsts() 682 << ", Edges = " << MST.numEdges() << ", ICSites = " 683 << ValueSites[IPVK_IndirectCallTarget].size() 684 << ", Memops = " << ValueSites[IPVK_MemOPSize].size() 685 << ", High32 CRC = " << JCH.getCRC() 686 << ", Hash = " << FunctionHash << "\n";); 687 688 if (PGOTraceFuncHash != "-" && F.getName().contains(PGOTraceFuncHash)) 689 dbgs() << "Funcname=" << F.getName() << ", Hash=" << FunctionHash 690 << " in building " << F.getParent()->getSourceFileName() << "\n"; 691 } 692 693 // Check if we can safely rename this Comdat function. 694 static bool canRenameComdat( 695 Function &F, 696 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 697 if (!DoComdatRenaming || !canRenameComdatFunc(F, true)) 698 return false; 699 700 // FIXME: Current only handle those Comdat groups that only containing one 701 // function. 702 // (1) For a Comdat group containing multiple functions, we need to have a 703 // unique postfix based on the hashes for each function. There is a 704 // non-trivial code refactoring to do this efficiently. 705 // (2) Variables can not be renamed, so we can not rename Comdat function in a 706 // group including global vars. 707 Comdat *C = F.getComdat(); 708 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) { 709 assert(!isa<GlobalAlias>(CM.second)); 710 Function *FM = dyn_cast<Function>(CM.second); 711 if (FM != &F) 712 return false; 713 } 714 return true; 715 } 716 717 // Append the CFGHash to the Comdat function name. 718 template <class Edge, class BBInfo> 719 void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() { 720 if (!canRenameComdat(F, ComdatMembers)) 721 return; 722 std::string OrigName = F.getName().str(); 723 std::string NewFuncName = 724 Twine(F.getName() + "." + Twine(FunctionHash)).str(); 725 F.setName(Twine(NewFuncName)); 726 GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F); 727 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str(); 728 Comdat *NewComdat; 729 Module *M = F.getParent(); 730 // For AvailableExternallyLinkage functions, change the linkage to 731 // LinkOnceODR and put them into comdat. This is because after renaming, there 732 // is no backup external copy available for the function. 733 if (!F.hasComdat()) { 734 assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage); 735 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName)); 736 F.setLinkage(GlobalValue::LinkOnceODRLinkage); 737 F.setComdat(NewComdat); 738 return; 739 } 740 741 // This function belongs to a single function Comdat group. 742 Comdat *OrigComdat = F.getComdat(); 743 std::string NewComdatName = 744 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str(); 745 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName)); 746 NewComdat->setSelectionKind(OrigComdat->getSelectionKind()); 747 748 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) { 749 // Must be a function. 750 cast<Function>(CM.second)->setComdat(NewComdat); 751 } 752 } 753 754 /// Collect all the BBs that will be instruments and add them to 755 /// `InstrumentBBs`. 756 template <class Edge, class BBInfo> 757 void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs( 758 std::vector<BasicBlock *> &InstrumentBBs) { 759 if (BCI) { 760 for (auto &BB : F) 761 if (BCI->shouldInstrumentBlock(BB)) 762 InstrumentBBs.push_back(&BB); 763 return; 764 } 765 766 // Use a worklist as we will update the vector during the iteration. 767 std::vector<Edge *> EdgeList; 768 EdgeList.reserve(MST.numEdges()); 769 for (const auto &E : MST.allEdges()) 770 EdgeList.push_back(E.get()); 771 772 for (auto &E : EdgeList) { 773 BasicBlock *InstrBB = getInstrBB(E); 774 if (InstrBB) 775 InstrumentBBs.push_back(InstrBB); 776 } 777 } 778 779 // Given a CFG E to be instrumented, find which BB to place the instrumented 780 // code. The function will split the critical edge if necessary. 781 template <class Edge, class BBInfo> 782 BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) { 783 if (E->InMST || E->Removed) 784 return nullptr; 785 786 BasicBlock *SrcBB = E->SrcBB; 787 BasicBlock *DestBB = E->DestBB; 788 // For a fake edge, instrument the real BB. 789 if (SrcBB == nullptr) 790 return DestBB; 791 if (DestBB == nullptr) 792 return SrcBB; 793 794 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * { 795 // There are basic blocks (such as catchswitch) cannot be instrumented. 796 // If the returned first insertion point is the end of BB, skip this BB. 797 if (BB->getFirstInsertionPt() == BB->end()) 798 return nullptr; 799 return BB; 800 }; 801 802 // Instrument the SrcBB if it has a single successor, 803 // otherwise, the DestBB if this is not a critical edge. 804 Instruction *TI = SrcBB->getTerminator(); 805 if (TI->getNumSuccessors() <= 1) 806 return canInstrument(SrcBB); 807 if (!E->IsCritical) 808 return canInstrument(DestBB); 809 810 // Some IndirectBr critical edges cannot be split by the previous 811 // SplitIndirectBrCriticalEdges call. Bail out. 812 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 813 BasicBlock *InstrBB = 814 isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum); 815 if (!InstrBB) { 816 LLVM_DEBUG( 817 dbgs() << "Fail to split critical edge: not instrument this edge.\n"); 818 return nullptr; 819 } 820 // For a critical edge, we have to split. Instrument the newly 821 // created BB. 822 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++; 823 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index 824 << " --> " << getBBInfo(DestBB).Index << "\n"); 825 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB. 826 MST.addEdge(SrcBB, InstrBB, 0); 827 // Second one: Add new edge of InstrBB->DestBB. 828 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0); 829 NewEdge1.InMST = true; 830 E->Removed = true; 831 832 return canInstrument(InstrBB); 833 } 834 835 // When generating value profiling calls on Windows routines that make use of 836 // handler funclets for exception processing an operand bundle needs to attached 837 // to the called function. This routine will set \p OpBundles to contain the 838 // funclet information, if any is needed, that should be placed on the generated 839 // value profiling call for the value profile candidate call. 840 static void 841 populateEHOperandBundle(VPCandidateInfo &Cand, 842 DenseMap<BasicBlock *, ColorVector> &BlockColors, 843 SmallVectorImpl<OperandBundleDef> &OpBundles) { 844 auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst); 845 if (!OrigCall) 846 return; 847 848 if (!isa<IntrinsicInst>(OrigCall)) { 849 // The instrumentation call should belong to the same funclet as a 850 // non-intrinsic call, so just copy the operand bundle, if any exists. 851 std::optional<OperandBundleUse> ParentFunclet = 852 OrigCall->getOperandBundle(LLVMContext::OB_funclet); 853 if (ParentFunclet) 854 OpBundles.emplace_back(OperandBundleDef(*ParentFunclet)); 855 } else { 856 // Intrinsics or other instructions do not get funclet information from the 857 // front-end. Need to use the BlockColors that was computed by the routine 858 // colorEHFunclets to determine whether a funclet is needed. 859 if (!BlockColors.empty()) { 860 const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second; 861 assert(CV.size() == 1 && "non-unique color for block!"); 862 Instruction *EHPad = CV.front()->getFirstNonPHI(); 863 if (EHPad->isEHPad()) 864 OpBundles.emplace_back("funclet", EHPad); 865 } 866 } 867 } 868 869 // Visit all edge and instrument the edges not in MST, and do value profiling. 870 // Critical edges will be split. 871 static void instrumentOneFunc( 872 Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI, 873 BlockFrequencyInfo *BFI, 874 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 875 bool IsCS) { 876 if (!PGOBlockCoverage) { 877 // Split indirectbr critical edges here before computing the MST rather than 878 // later in getInstrBB() to avoid invalidating it. 879 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI); 880 } 881 882 FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo( 883 F, TLI, ComdatMembers, true, BPI, BFI, IsCS, shouldInstrumentEntryBB(), 884 PGOBlockCoverage); 885 886 auto Name = FuncInfo.FuncNameVar; 887 auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()), 888 FuncInfo.FunctionHash); 889 if (PGOFunctionEntryCoverage) { 890 auto &EntryBB = F.getEntryBlock(); 891 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt()); 892 // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>, 893 // i32 <index>) 894 Builder.CreateCall( 895 Intrinsic::getDeclaration(M, Intrinsic::instrprof_cover), 896 {Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)}); 897 return; 898 } 899 900 std::vector<BasicBlock *> InstrumentBBs; 901 FuncInfo.getInstrumentBBs(InstrumentBBs); 902 unsigned NumCounters = 903 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 904 905 if (PGOCtxProfLoweringPass::isContextualIRPGOEnabled()) { 906 auto *CSIntrinsic = 907 Intrinsic::getDeclaration(M, Intrinsic::instrprof_callsite); 908 // We want to count the instrumentable callsites, then instrument them. This 909 // is because the llvm.instrprof.callsite intrinsic has an argument (like 910 // the other instrprof intrinsics) capturing the total number of 911 // instrumented objects (counters, or callsites, in this case). In this 912 // case, we want that value so we can readily pass it to the compiler-rt 913 // APIs that may have to allocate memory based on the nr of callsites. 914 // The traversal logic is the same for both counting and instrumentation, 915 // just needs to be done in succession. 916 auto Visit = [&](llvm::function_ref<void(CallBase * CB)> Visitor) { 917 for (auto &BB : F) 918 for (auto &Instr : BB) 919 if (auto *CS = dyn_cast<CallBase>(&Instr)) { 920 if ((CS->getCalledFunction() && 921 CS->getCalledFunction()->isIntrinsic()) || 922 dyn_cast<InlineAsm>(CS->getCalledOperand())) 923 continue; 924 Visitor(CS); 925 } 926 }; 927 // First, count callsites. 928 uint32_t TotalNrCallsites = 0; 929 Visit([&TotalNrCallsites](auto *) { ++TotalNrCallsites; }); 930 931 // Now instrument. 932 uint32_t CallsiteIndex = 0; 933 Visit([&](auto *CB) { 934 IRBuilder<> Builder(CB); 935 Builder.CreateCall(CSIntrinsic, 936 {Name, CFGHash, Builder.getInt32(TotalNrCallsites), 937 Builder.getInt32(CallsiteIndex++), 938 CB->getCalledOperand()}); 939 }); 940 } 941 942 uint32_t I = 0; 943 if (PGOTemporalInstrumentation) { 944 NumCounters += PGOBlockCoverage ? 8 : 1; 945 auto &EntryBB = F.getEntryBlock(); 946 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt()); 947 // llvm.instrprof.timestamp(i8* <name>, i64 <hash>, i32 <num-counters>, 948 // i32 <index>) 949 Builder.CreateCall( 950 Intrinsic::getDeclaration(M, Intrinsic::instrprof_timestamp), 951 {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I)}); 952 I += PGOBlockCoverage ? 8 : 1; 953 } 954 955 for (auto *InstrBB : InstrumentBBs) { 956 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt()); 957 assert(Builder.GetInsertPoint() != InstrBB->end() && 958 "Cannot get the Instrumentation point"); 959 // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>, 960 // i32 <index>) 961 Builder.CreateCall( 962 Intrinsic::getDeclaration(M, PGOBlockCoverage 963 ? Intrinsic::instrprof_cover 964 : Intrinsic::instrprof_increment), 965 {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)}); 966 } 967 968 // Now instrument select instructions: 969 FuncInfo.SIVisitor.instrumentSelects(&I, NumCounters, FuncInfo.FuncNameVar, 970 FuncInfo.FunctionHash); 971 assert(I == NumCounters); 972 973 if (isValueProfilingDisabled()) 974 return; 975 976 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size(); 977 978 // Intrinsic function calls do not have funclet operand bundles needed for 979 // Windows exception handling attached to them. However, if value profiling is 980 // inserted for one of these calls, then a funclet value will need to be set 981 // on the instrumentation call based on the funclet coloring. 982 DenseMap<BasicBlock *, ColorVector> BlockColors; 983 if (F.hasPersonalityFn() && 984 isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) 985 BlockColors = colorEHFunclets(F); 986 987 // For each VP Kind, walk the VP candidates and instrument each one. 988 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) { 989 unsigned SiteIndex = 0; 990 if (Kind == IPVK_MemOPSize && !PGOInstrMemOP) 991 continue; 992 993 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) { 994 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind] 995 << " site: CallSite Index = " << SiteIndex << "\n"); 996 997 IRBuilder<> Builder(Cand.InsertPt); 998 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() && 999 "Cannot get the Instrumentation point"); 1000 1001 Value *ToProfile = nullptr; 1002 if (Cand.V->getType()->isIntegerTy()) 1003 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty()); 1004 else if (Cand.V->getType()->isPointerTy()) 1005 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty()); 1006 assert(ToProfile && "value profiling Value is of unexpected type"); 1007 1008 SmallVector<OperandBundleDef, 1> OpBundles; 1009 populateEHOperandBundle(Cand, BlockColors, OpBundles); 1010 Builder.CreateCall( 1011 Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), 1012 {FuncInfo.FuncNameVar, Builder.getInt64(FuncInfo.FunctionHash), 1013 ToProfile, Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)}, 1014 OpBundles); 1015 } 1016 } // IPVK_First <= Kind <= IPVK_Last 1017 } 1018 1019 namespace { 1020 1021 // This class represents a CFG edge in profile use compilation. 1022 struct PGOUseEdge : public PGOEdge { 1023 using PGOEdge::PGOEdge; 1024 1025 std::optional<uint64_t> Count; 1026 1027 // Set edge count value 1028 void setEdgeCount(uint64_t Value) { Count = Value; } 1029 1030 // Return the information string for this object. 1031 std::string infoString() const { 1032 if (!Count) 1033 return PGOEdge::infoString(); 1034 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(*Count)).str(); 1035 } 1036 }; 1037 1038 using DirectEdges = SmallVector<PGOUseEdge *, 2>; 1039 1040 // This class stores the auxiliary information for each BB. 1041 struct PGOUseBBInfo : public PGOBBInfo { 1042 std::optional<uint64_t> Count; 1043 int32_t UnknownCountInEdge = 0; 1044 int32_t UnknownCountOutEdge = 0; 1045 DirectEdges InEdges; 1046 DirectEdges OutEdges; 1047 1048 PGOUseBBInfo(unsigned IX) : PGOBBInfo(IX) {} 1049 1050 // Set the profile count value for this BB. 1051 void setBBInfoCount(uint64_t Value) { Count = Value; } 1052 1053 // Return the information string of this object. 1054 std::string infoString() const { 1055 if (!Count) 1056 return PGOBBInfo::infoString(); 1057 return (Twine(PGOBBInfo::infoString()) + " Count=" + Twine(*Count)).str(); 1058 } 1059 1060 // Add an OutEdge and update the edge count. 1061 void addOutEdge(PGOUseEdge *E) { 1062 OutEdges.push_back(E); 1063 UnknownCountOutEdge++; 1064 } 1065 1066 // Add an InEdge and update the edge count. 1067 void addInEdge(PGOUseEdge *E) { 1068 InEdges.push_back(E); 1069 UnknownCountInEdge++; 1070 } 1071 }; 1072 1073 } // end anonymous namespace 1074 1075 // Sum up the count values for all the edges. 1076 static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) { 1077 uint64_t Total = 0; 1078 for (const auto &E : Edges) { 1079 if (E->Removed) 1080 continue; 1081 if (E->Count) 1082 Total += *E->Count; 1083 } 1084 return Total; 1085 } 1086 1087 namespace { 1088 1089 class PGOUseFunc { 1090 public: 1091 PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI, 1092 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 1093 BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin, 1094 ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry, 1095 bool HasSingleByteCoverage) 1096 : F(Func), M(Modu), BFI(BFIin), PSI(PSI), 1097 FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS, 1098 InstrumentFuncEntry, HasSingleByteCoverage), 1099 FreqAttr(FFA_Normal), IsCS(IsCS), VPC(Func, TLI) {} 1100 1101 void handleInstrProfError(Error Err, uint64_t MismatchedFuncSum); 1102 1103 // Read counts for the instrumented BB from profile. 1104 bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, 1105 InstrProfRecord::CountPseudoKind &PseudoKind); 1106 1107 // Populate the counts for all BBs. 1108 void populateCounters(); 1109 1110 // Set block coverage based on profile coverage values. 1111 void populateCoverage(IndexedInstrProfReader *PGOReader); 1112 1113 // Set the branch weights based on the count values. 1114 void setBranchWeights(); 1115 1116 // Annotate the value profile call sites for all value kind. 1117 void annotateValueSites(); 1118 1119 // Annotate the value profile call sites for one value kind. 1120 void annotateValueSites(uint32_t Kind); 1121 1122 // Annotate the irreducible loop header weights. 1123 void annotateIrrLoopHeaderWeights(); 1124 1125 // The hotness of the function from the profile count. 1126 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot }; 1127 1128 // Return the function hotness from the profile. 1129 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; } 1130 1131 // Return the function hash. 1132 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; } 1133 1134 // Return the profile record for this function; 1135 InstrProfRecord &getProfileRecord() { return ProfileRecord; } 1136 1137 // Return the auxiliary BB information. 1138 PGOUseBBInfo &getBBInfo(const BasicBlock *BB) const { 1139 return FuncInfo.getBBInfo(BB); 1140 } 1141 1142 // Return the auxiliary BB information if available. 1143 PGOUseBBInfo *findBBInfo(const BasicBlock *BB) const { 1144 return FuncInfo.findBBInfo(BB); 1145 } 1146 1147 Function &getFunc() const { return F; } 1148 1149 void dumpInfo(StringRef Str = "") const { FuncInfo.dumpInfo(Str); } 1150 1151 uint64_t getProgramMaxCount() const { return ProgramMaxCount; } 1152 1153 private: 1154 Function &F; 1155 Module *M; 1156 BlockFrequencyInfo *BFI; 1157 ProfileSummaryInfo *PSI; 1158 1159 // This member stores the shared information with class PGOGenFunc. 1160 FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> FuncInfo; 1161 1162 // The maximum count value in the profile. This is only used in PGO use 1163 // compilation. 1164 uint64_t ProgramMaxCount; 1165 1166 // Position of counter that remains to be read. 1167 uint32_t CountPosition = 0; 1168 1169 // Total size of the profile count for this function. 1170 uint32_t ProfileCountSize = 0; 1171 1172 // ProfileRecord for this function. 1173 InstrProfRecord ProfileRecord; 1174 1175 // Function hotness info derived from profile. 1176 FuncFreqAttr FreqAttr; 1177 1178 // Is to use the context sensitive profile. 1179 bool IsCS; 1180 1181 ValueProfileCollector VPC; 1182 1183 // Find the Instrumented BB and set the value. Return false on error. 1184 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile); 1185 1186 // Set the edge counter value for the unknown edge -- there should be only 1187 // one unknown edge. 1188 void setEdgeCount(DirectEdges &Edges, uint64_t Value); 1189 1190 // Set the hot/cold inline hints based on the count values. 1191 // FIXME: This function should be removed once the functionality in 1192 // the inliner is implemented. 1193 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) { 1194 if (PSI->isHotCount(EntryCount)) 1195 FreqAttr = FFA_Hot; 1196 else if (PSI->isColdCount(MaxCount)) 1197 FreqAttr = FFA_Cold; 1198 } 1199 }; 1200 1201 } // end anonymous namespace 1202 1203 /// Set up InEdges/OutEdges for all BBs in the MST. 1204 static void setupBBInfoEdges( 1205 const FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> &FuncInfo) { 1206 // This is not required when there is block coverage inference. 1207 if (FuncInfo.BCI) 1208 return; 1209 for (const auto &E : FuncInfo.MST.allEdges()) { 1210 if (E->Removed) 1211 continue; 1212 const BasicBlock *SrcBB = E->SrcBB; 1213 const BasicBlock *DestBB = E->DestBB; 1214 PGOUseBBInfo &SrcInfo = FuncInfo.getBBInfo(SrcBB); 1215 PGOUseBBInfo &DestInfo = FuncInfo.getBBInfo(DestBB); 1216 SrcInfo.addOutEdge(E.get()); 1217 DestInfo.addInEdge(E.get()); 1218 } 1219 } 1220 1221 // Visit all the edges and assign the count value for the instrumented 1222 // edges and the BB. Return false on error. 1223 bool PGOUseFunc::setInstrumentedCounts( 1224 const std::vector<uint64_t> &CountFromProfile) { 1225 1226 std::vector<BasicBlock *> InstrumentBBs; 1227 FuncInfo.getInstrumentBBs(InstrumentBBs); 1228 1229 setupBBInfoEdges(FuncInfo); 1230 1231 unsigned NumCounters = 1232 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 1233 // The number of counters here should match the number of counters 1234 // in profile. Return if they mismatch. 1235 if (NumCounters != CountFromProfile.size()) { 1236 return false; 1237 } 1238 auto *FuncEntry = &*F.begin(); 1239 1240 // Set the profile count to the Instrumented BBs. 1241 uint32_t I = 0; 1242 for (BasicBlock *InstrBB : InstrumentBBs) { 1243 uint64_t CountValue = CountFromProfile[I++]; 1244 PGOUseBBInfo &Info = getBBInfo(InstrBB); 1245 // If we reach here, we know that we have some nonzero count 1246 // values in this function. The entry count should not be 0. 1247 // Fix it if necessary. 1248 if (InstrBB == FuncEntry && CountValue == 0) 1249 CountValue = 1; 1250 Info.setBBInfoCount(CountValue); 1251 } 1252 ProfileCountSize = CountFromProfile.size(); 1253 CountPosition = I; 1254 1255 // Set the edge count and update the count of unknown edges for BBs. 1256 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void { 1257 E->setEdgeCount(Value); 1258 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1259 this->getBBInfo(E->DestBB).UnknownCountInEdge--; 1260 }; 1261 1262 // Set the profile count the Instrumented edges. There are BBs that not in 1263 // MST but not instrumented. Need to set the edge count value so that we can 1264 // populate the profile counts later. 1265 for (const auto &E : FuncInfo.MST.allEdges()) { 1266 if (E->Removed || E->InMST) 1267 continue; 1268 const BasicBlock *SrcBB = E->SrcBB; 1269 PGOUseBBInfo &SrcInfo = getBBInfo(SrcBB); 1270 1271 // If only one out-edge, the edge profile count should be the same as BB 1272 // profile count. 1273 if (SrcInfo.Count && SrcInfo.OutEdges.size() == 1) 1274 setEdgeCount(E.get(), *SrcInfo.Count); 1275 else { 1276 const BasicBlock *DestBB = E->DestBB; 1277 PGOUseBBInfo &DestInfo = getBBInfo(DestBB); 1278 // If only one in-edge, the edge profile count should be the same as BB 1279 // profile count. 1280 if (DestInfo.Count && DestInfo.InEdges.size() == 1) 1281 setEdgeCount(E.get(), *DestInfo.Count); 1282 } 1283 if (E->Count) 1284 continue; 1285 // E's count should have been set from profile. If not, this meenas E skips 1286 // the instrumentation. We set the count to 0. 1287 setEdgeCount(E.get(), 0); 1288 } 1289 return true; 1290 } 1291 1292 // Set the count value for the unknown edge. There should be one and only one 1293 // unknown edge in Edges vector. 1294 void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) { 1295 for (auto &E : Edges) { 1296 if (E->Count) 1297 continue; 1298 E->setEdgeCount(Value); 1299 1300 getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1301 getBBInfo(E->DestBB).UnknownCountInEdge--; 1302 return; 1303 } 1304 llvm_unreachable("Cannot find the unknown count edge"); 1305 } 1306 1307 // Emit function metadata indicating PGO profile mismatch. 1308 static void annotateFunctionWithHashMismatch(Function &F, LLVMContext &ctx) { 1309 const char MetadataName[] = "instr_prof_hash_mismatch"; 1310 SmallVector<Metadata *, 2> Names; 1311 // If this metadata already exists, ignore. 1312 auto *Existing = F.getMetadata(LLVMContext::MD_annotation); 1313 if (Existing) { 1314 MDTuple *Tuple = cast<MDTuple>(Existing); 1315 for (const auto &N : Tuple->operands()) { 1316 if (N.equalsStr(MetadataName)) 1317 return; 1318 Names.push_back(N.get()); 1319 } 1320 } 1321 1322 MDBuilder MDB(ctx); 1323 Names.push_back(MDB.createString(MetadataName)); 1324 MDNode *MD = MDTuple::get(ctx, Names); 1325 F.setMetadata(LLVMContext::MD_annotation, MD); 1326 } 1327 1328 void PGOUseFunc::handleInstrProfError(Error Err, uint64_t MismatchedFuncSum) { 1329 handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) { 1330 auto &Ctx = M->getContext(); 1331 auto Err = IPE.get(); 1332 bool SkipWarning = false; 1333 LLVM_DEBUG(dbgs() << "Error in reading profile for Func " 1334 << FuncInfo.FuncName << ": "); 1335 if (Err == instrprof_error::unknown_function) { 1336 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++; 1337 SkipWarning = !PGOWarnMissing; 1338 LLVM_DEBUG(dbgs() << "unknown function"); 1339 } else if (Err == instrprof_error::hash_mismatch || 1340 Err == instrprof_error::malformed) { 1341 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++; 1342 SkipWarning = 1343 NoPGOWarnMismatch || 1344 (NoPGOWarnMismatchComdatWeak && 1345 (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage || 1346 F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); 1347 LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash 1348 << " skip=" << SkipWarning << ")"); 1349 // Emit function metadata indicating PGO profile mismatch. 1350 annotateFunctionWithHashMismatch(F, M->getContext()); 1351 } 1352 1353 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n"); 1354 if (SkipWarning) 1355 return; 1356 1357 std::string Msg = 1358 IPE.message() + std::string(" ") + F.getName().str() + 1359 std::string(" Hash = ") + std::to_string(FuncInfo.FunctionHash) + 1360 std::string(" up to ") + std::to_string(MismatchedFuncSum) + 1361 std::string(" count discarded"); 1362 1363 Ctx.diagnose( 1364 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); 1365 }); 1366 } 1367 1368 // Read the profile from ProfileFileName and assign the value to the 1369 // instrumented BB and the edges. This function also updates ProgramMaxCount. 1370 // Return true if the profile are successfully read, and false on errors. 1371 bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, 1372 InstrProfRecord::CountPseudoKind &PseudoKind) { 1373 auto &Ctx = M->getContext(); 1374 uint64_t MismatchedFuncSum = 0; 1375 Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord( 1376 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName, 1377 &MismatchedFuncSum); 1378 if (Error E = Result.takeError()) { 1379 handleInstrProfError(std::move(E), MismatchedFuncSum); 1380 return false; 1381 } 1382 ProfileRecord = std::move(Result.get()); 1383 PseudoKind = ProfileRecord.getCountPseudoKind(); 1384 if (PseudoKind != InstrProfRecord::NotPseudo) { 1385 return true; 1386 } 1387 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts; 1388 1389 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++; 1390 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n"); 1391 1392 uint64_t ValueSum = 0; 1393 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { 1394 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n"); 1395 ValueSum += CountFromProfile[I]; 1396 } 1397 AllZeros = (ValueSum == 0); 1398 1399 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n"); 1400 1401 getBBInfo(nullptr).UnknownCountOutEdge = 2; 1402 getBBInfo(nullptr).UnknownCountInEdge = 2; 1403 1404 if (!setInstrumentedCounts(CountFromProfile)) { 1405 LLVM_DEBUG( 1406 dbgs() << "Inconsistent number of counts, skipping this function"); 1407 Ctx.diagnose(DiagnosticInfoPGOProfile( 1408 M->getName().data(), 1409 Twine("Inconsistent number of counts in ") + F.getName().str() + 1410 Twine(": the profile may be stale or there is a function name " 1411 "collision."), 1412 DS_Warning)); 1413 return false; 1414 } 1415 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS); 1416 return true; 1417 } 1418 1419 void PGOUseFunc::populateCoverage(IndexedInstrProfReader *PGOReader) { 1420 uint64_t MismatchedFuncSum = 0; 1421 Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord( 1422 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName, 1423 &MismatchedFuncSum); 1424 if (auto Err = Result.takeError()) { 1425 handleInstrProfError(std::move(Err), MismatchedFuncSum); 1426 return; 1427 } 1428 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++; 1429 1430 std::vector<uint64_t> &CountsFromProfile = Result.get().Counts; 1431 DenseMap<const BasicBlock *, bool> Coverage; 1432 unsigned Index = 0; 1433 for (auto &BB : F) 1434 if (FuncInfo.BCI->shouldInstrumentBlock(BB)) 1435 Coverage[&BB] = (CountsFromProfile[Index++] != 0); 1436 assert(Index == CountsFromProfile.size()); 1437 1438 // For each B in InverseDependencies[A], if A is covered then B is covered. 1439 DenseMap<const BasicBlock *, DenseSet<const BasicBlock *>> 1440 InverseDependencies; 1441 for (auto &BB : F) { 1442 for (auto *Dep : FuncInfo.BCI->getDependencies(BB)) { 1443 // If Dep is covered then BB is covered. 1444 InverseDependencies[Dep].insert(&BB); 1445 } 1446 } 1447 1448 // Infer coverage of the non-instrumented blocks using a flood-fill algorithm. 1449 std::stack<const BasicBlock *> CoveredBlocksToProcess; 1450 for (auto &[BB, IsCovered] : Coverage) 1451 if (IsCovered) 1452 CoveredBlocksToProcess.push(BB); 1453 1454 while (!CoveredBlocksToProcess.empty()) { 1455 auto *CoveredBlock = CoveredBlocksToProcess.top(); 1456 assert(Coverage[CoveredBlock]); 1457 CoveredBlocksToProcess.pop(); 1458 for (auto *BB : InverseDependencies[CoveredBlock]) { 1459 // If CoveredBlock is covered then BB is covered. 1460 if (Coverage[BB]) 1461 continue; 1462 Coverage[BB] = true; 1463 CoveredBlocksToProcess.push(BB); 1464 } 1465 } 1466 1467 // Annotate block coverage. 1468 MDBuilder MDB(F.getContext()); 1469 // We set the entry count to 10000 if the entry block is covered so that BFI 1470 // can propagate a fraction of this count to the other covered blocks. 1471 F.setEntryCount(Coverage[&F.getEntryBlock()] ? 10000 : 0); 1472 for (auto &BB : F) { 1473 // For a block A and its successor B, we set the edge weight as follows: 1474 // If A is covered and B is covered, set weight=1. 1475 // If A is covered and B is uncovered, set weight=0. 1476 // If A is uncovered, set weight=1. 1477 // This setup will allow BFI to give nonzero profile counts to only covered 1478 // blocks. 1479 SmallVector<uint32_t, 4> Weights; 1480 for (auto *Succ : successors(&BB)) 1481 Weights.push_back((Coverage[Succ] || !Coverage[&BB]) ? 1 : 0); 1482 if (Weights.size() >= 2) 1483 llvm::setBranchWeights(*BB.getTerminator(), Weights, 1484 /*IsExpected=*/false); 1485 } 1486 1487 unsigned NumCorruptCoverage = 0; 1488 DominatorTree DT(F); 1489 LoopInfo LI(DT); 1490 BranchProbabilityInfo BPI(F, LI); 1491 BlockFrequencyInfo BFI(F, BPI, LI); 1492 auto IsBlockDead = [&](const BasicBlock &BB) -> std::optional<bool> { 1493 if (auto C = BFI.getBlockProfileCount(&BB)) 1494 return C == 0; 1495 return {}; 1496 }; 1497 LLVM_DEBUG(dbgs() << "Block Coverage: (Instrumented=*, Covered=X)\n"); 1498 for (auto &BB : F) { 1499 LLVM_DEBUG(dbgs() << (FuncInfo.BCI->shouldInstrumentBlock(BB) ? "* " : " ") 1500 << (Coverage[&BB] ? "X " : " ") << " " << BB.getName() 1501 << "\n"); 1502 // In some cases it is possible to find a covered block that has no covered 1503 // successors, e.g., when a block calls a function that may call exit(). In 1504 // those cases, BFI could find its successor to be covered while BCI could 1505 // find its successor to be dead. 1506 if (Coverage[&BB] == IsBlockDead(BB).value_or(false)) { 1507 LLVM_DEBUG( 1508 dbgs() << "Found inconsistent block covearge for " << BB.getName() 1509 << ": BCI=" << (Coverage[&BB] ? "Covered" : "Dead") << " BFI=" 1510 << (IsBlockDead(BB).value() ? "Dead" : "Covered") << "\n"); 1511 ++NumCorruptCoverage; 1512 } 1513 if (Coverage[&BB]) 1514 ++NumCoveredBlocks; 1515 } 1516 if (PGOVerifyBFI && NumCorruptCoverage) { 1517 auto &Ctx = M->getContext(); 1518 Ctx.diagnose(DiagnosticInfoPGOProfile( 1519 M->getName().data(), 1520 Twine("Found inconsistent block coverage for function ") + F.getName() + 1521 " in " + Twine(NumCorruptCoverage) + " blocks.", 1522 DS_Warning)); 1523 } 1524 if (PGOViewBlockCoverageGraph) 1525 FuncInfo.BCI->viewBlockCoverageGraph(&Coverage); 1526 } 1527 1528 // Populate the counters from instrumented BBs to all BBs. 1529 // In the end of this operation, all BBs should have a valid count value. 1530 void PGOUseFunc::populateCounters() { 1531 bool Changes = true; 1532 unsigned NumPasses = 0; 1533 while (Changes) { 1534 NumPasses++; 1535 Changes = false; 1536 1537 // For efficient traversal, it's better to start from the end as most 1538 // of the instrumented edges are at the end. 1539 for (auto &BB : reverse(F)) { 1540 PGOUseBBInfo *UseBBInfo = findBBInfo(&BB); 1541 if (UseBBInfo == nullptr) 1542 continue; 1543 if (!UseBBInfo->Count) { 1544 if (UseBBInfo->UnknownCountOutEdge == 0) { 1545 UseBBInfo->Count = sumEdgeCount(UseBBInfo->OutEdges); 1546 Changes = true; 1547 } else if (UseBBInfo->UnknownCountInEdge == 0) { 1548 UseBBInfo->Count = sumEdgeCount(UseBBInfo->InEdges); 1549 Changes = true; 1550 } 1551 } 1552 if (UseBBInfo->Count) { 1553 if (UseBBInfo->UnknownCountOutEdge == 1) { 1554 uint64_t Total = 0; 1555 uint64_t OutSum = sumEdgeCount(UseBBInfo->OutEdges); 1556 // If the one of the successor block can early terminate (no-return), 1557 // we can end up with situation where out edge sum count is larger as 1558 // the source BB's count is collected by a post-dominated block. 1559 if (*UseBBInfo->Count > OutSum) 1560 Total = *UseBBInfo->Count - OutSum; 1561 setEdgeCount(UseBBInfo->OutEdges, Total); 1562 Changes = true; 1563 } 1564 if (UseBBInfo->UnknownCountInEdge == 1) { 1565 uint64_t Total = 0; 1566 uint64_t InSum = sumEdgeCount(UseBBInfo->InEdges); 1567 if (*UseBBInfo->Count > InSum) 1568 Total = *UseBBInfo->Count - InSum; 1569 setEdgeCount(UseBBInfo->InEdges, Total); 1570 Changes = true; 1571 } 1572 } 1573 } 1574 } 1575 1576 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n"); 1577 (void)NumPasses; 1578 #ifndef NDEBUG 1579 // Assert every BB has a valid counter. 1580 for (auto &BB : F) { 1581 auto BI = findBBInfo(&BB); 1582 if (BI == nullptr) 1583 continue; 1584 assert(BI->Count && "BB count is not valid"); 1585 } 1586 #endif 1587 uint64_t FuncEntryCount = *getBBInfo(&*F.begin()).Count; 1588 uint64_t FuncMaxCount = FuncEntryCount; 1589 for (auto &BB : F) { 1590 auto BI = findBBInfo(&BB); 1591 if (BI == nullptr) 1592 continue; 1593 FuncMaxCount = std::max(FuncMaxCount, *BI->Count); 1594 } 1595 1596 // Fix the obviously inconsistent entry count. 1597 if (FuncMaxCount > 0 && FuncEntryCount == 0) 1598 FuncEntryCount = 1; 1599 F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real)); 1600 markFunctionAttributes(FuncEntryCount, FuncMaxCount); 1601 1602 // Now annotate select instructions 1603 FuncInfo.SIVisitor.annotateSelects(this, &CountPosition); 1604 assert(CountPosition == ProfileCountSize); 1605 1606 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile.")); 1607 } 1608 1609 // Assign the scaled count values to the BB with multiple out edges. 1610 void PGOUseFunc::setBranchWeights() { 1611 // Generate MD_prof metadata for every branch instruction. 1612 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName() 1613 << " IsCS=" << IsCS << "\n"); 1614 for (auto &BB : F) { 1615 Instruction *TI = BB.getTerminator(); 1616 if (TI->getNumSuccessors() < 2) 1617 continue; 1618 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || 1619 isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI) || 1620 isa<CallBrInst>(TI))) 1621 continue; 1622 1623 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB); 1624 if (!*BBCountInfo.Count) 1625 continue; 1626 1627 // We have a non-zero Branch BB. 1628 1629 // SuccessorCount can be greater than OutEdgesCount, because 1630 // removed edges don't appear in OutEdges. 1631 unsigned OutEdgesCount = BBCountInfo.OutEdges.size(); 1632 unsigned SuccessorCount = BB.getTerminator()->getNumSuccessors(); 1633 assert(OutEdgesCount <= SuccessorCount); 1634 1635 SmallVector<uint64_t, 2> EdgeCounts(SuccessorCount, 0); 1636 uint64_t MaxCount = 0; 1637 for (unsigned It = 0; It < OutEdgesCount; It++) { 1638 const PGOUseEdge *E = BBCountInfo.OutEdges[It]; 1639 const BasicBlock *SrcBB = E->SrcBB; 1640 const BasicBlock *DestBB = E->DestBB; 1641 if (DestBB == nullptr) 1642 continue; 1643 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 1644 uint64_t EdgeCount = *E->Count; 1645 if (EdgeCount > MaxCount) 1646 MaxCount = EdgeCount; 1647 EdgeCounts[SuccNum] = EdgeCount; 1648 } 1649 1650 if (MaxCount) 1651 setProfMetadata(M, TI, EdgeCounts, MaxCount); 1652 else { 1653 // A zero MaxCount can come about when we have a BB with a positive 1654 // count, and whose successor blocks all have 0 count. This can happen 1655 // when there is no exit block and the code exits via a noreturn function. 1656 auto &Ctx = M->getContext(); 1657 Ctx.diagnose(DiagnosticInfoPGOProfile( 1658 M->getName().data(), 1659 Twine("Profile in ") + F.getName().str() + 1660 Twine(" partially ignored") + 1661 Twine(", possibly due to the lack of a return path."), 1662 DS_Warning)); 1663 } 1664 } 1665 } 1666 1667 static bool isIndirectBrTarget(BasicBlock *BB) { 1668 for (BasicBlock *Pred : predecessors(BB)) { 1669 if (isa<IndirectBrInst>(Pred->getTerminator())) 1670 return true; 1671 } 1672 return false; 1673 } 1674 1675 void PGOUseFunc::annotateIrrLoopHeaderWeights() { 1676 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n"); 1677 // Find irr loop headers 1678 for (auto &BB : F) { 1679 // As a heuristic also annotate indrectbr targets as they have a high chance 1680 // to become an irreducible loop header after the indirectbr tail 1681 // duplication. 1682 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) { 1683 Instruction *TI = BB.getTerminator(); 1684 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB); 1685 setIrrLoopHeaderMetadata(M, TI, *BBCountInfo.Count); 1686 } 1687 } 1688 } 1689 1690 void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) { 1691 Module *M = F.getParent(); 1692 IRBuilder<> Builder(&SI); 1693 Type *Int64Ty = Builder.getInt64Ty(); 1694 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty); 1695 Builder.CreateCall( 1696 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step), 1697 {FuncNameVar, Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs), 1698 Builder.getInt32(*CurCtrIdx), Step}); 1699 ++(*CurCtrIdx); 1700 } 1701 1702 void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) { 1703 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts; 1704 assert(*CurCtrIdx < CountFromProfile.size() && 1705 "Out of bound access of counters"); 1706 uint64_t SCounts[2]; 1707 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count 1708 ++(*CurCtrIdx); 1709 uint64_t TotalCount = 0; 1710 auto BI = UseFunc->findBBInfo(SI.getParent()); 1711 if (BI != nullptr) 1712 TotalCount = *BI->Count; 1713 // False Count 1714 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0); 1715 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]); 1716 if (MaxCount) 1717 setProfMetadata(F.getParent(), &SI, SCounts, MaxCount); 1718 } 1719 1720 void SelectInstVisitor::visitSelectInst(SelectInst &SI) { 1721 if (!PGOInstrSelect || PGOFunctionEntryCoverage || HasSingleByteCoverage) 1722 return; 1723 // FIXME: do not handle this yet. 1724 if (SI.getCondition()->getType()->isVectorTy()) 1725 return; 1726 1727 switch (Mode) { 1728 case VM_counting: 1729 NSIs++; 1730 return; 1731 case VM_instrument: 1732 instrumentOneSelectInst(SI); 1733 return; 1734 case VM_annotate: 1735 annotateOneSelectInst(SI); 1736 return; 1737 } 1738 1739 llvm_unreachable("Unknown visiting mode"); 1740 } 1741 1742 static uint32_t getMaxNumAnnotations(InstrProfValueKind ValueProfKind) { 1743 if (ValueProfKind == IPVK_MemOPSize) 1744 return MaxNumMemOPAnnotations; 1745 if (ValueProfKind == llvm::IPVK_VTableTarget) 1746 return MaxNumVTableAnnotations; 1747 return MaxNumAnnotations; 1748 } 1749 1750 // Traverse all valuesites and annotate the instructions for all value kind. 1751 void PGOUseFunc::annotateValueSites() { 1752 if (isValueProfilingDisabled()) 1753 return; 1754 1755 // Create the PGOFuncName meta data. 1756 createPGOFuncNameMetadata(F, FuncInfo.FuncName); 1757 1758 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 1759 annotateValueSites(Kind); 1760 } 1761 1762 // Annotate the instructions for a specific value kind. 1763 void PGOUseFunc::annotateValueSites(uint32_t Kind) { 1764 assert(Kind <= IPVK_Last); 1765 unsigned ValueSiteIndex = 0; 1766 1767 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind); 1768 1769 // Since there isn't a reliable or fast way for profile reader to tell if a 1770 // profile is generated with `-enable-vtable-value-profiling` on, we run the 1771 // value profile collector over the function IR to find the instrumented sites 1772 // iff function profile records shows the number of instrumented vtable sites 1773 // is not zero. Function cfg already takes the number of instrumented 1774 // indirect call sites into account so it doesn't hash the number of 1775 // instrumented vtables; as a side effect it makes it easier to enable 1776 // profiling and profile use in two steps if needed. 1777 // TODO: Remove this if/when -enable-vtable-value-profiling is on by default. 1778 if (NumValueSites > 0 && Kind == IPVK_VTableTarget && 1779 NumValueSites != FuncInfo.ValueSites[IPVK_VTableTarget].size() && 1780 MaxNumVTableAnnotations != 0) 1781 FuncInfo.ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget); 1782 auto &ValueSites = FuncInfo.ValueSites[Kind]; 1783 if (NumValueSites != ValueSites.size()) { 1784 auto &Ctx = M->getContext(); 1785 Ctx.diagnose(DiagnosticInfoPGOProfile( 1786 M->getName().data(), 1787 Twine("Inconsistent number of value sites for ") + 1788 Twine(ValueProfKindDescr[Kind]) + Twine(" profiling in \"") + 1789 F.getName().str() + 1790 Twine("\", possibly due to the use of a stale profile."), 1791 DS_Warning)); 1792 return; 1793 } 1794 1795 for (VPCandidateInfo &I : ValueSites) { 1796 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind 1797 << "): Index = " << ValueSiteIndex << " out of " 1798 << NumValueSites << "\n"); 1799 annotateValueSite( 1800 *M, *I.AnnotatedInst, ProfileRecord, 1801 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex, 1802 getMaxNumAnnotations(static_cast<InstrProfValueKind>(Kind))); 1803 ValueSiteIndex++; 1804 } 1805 } 1806 1807 // Collect the set of members for each Comdat in module M and store 1808 // in ComdatMembers. 1809 static void collectComdatMembers( 1810 Module &M, 1811 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 1812 if (!DoComdatRenaming) 1813 return; 1814 for (Function &F : M) 1815 if (Comdat *C = F.getComdat()) 1816 ComdatMembers.insert(std::make_pair(C, &F)); 1817 for (GlobalVariable &GV : M.globals()) 1818 if (Comdat *C = GV.getComdat()) 1819 ComdatMembers.insert(std::make_pair(C, &GV)); 1820 for (GlobalAlias &GA : M.aliases()) 1821 if (Comdat *C = GA.getComdat()) 1822 ComdatMembers.insert(std::make_pair(C, &GA)); 1823 } 1824 1825 // Return true if we should not find instrumentation data for this function 1826 static bool skipPGOUse(const Function &F) { 1827 if (F.isDeclaration()) 1828 return true; 1829 // If there are too many critical edges, PGO might cause 1830 // compiler time problem. Skip PGO if the number of 1831 // critical edges execeed the threshold. 1832 unsigned NumCriticalEdges = 0; 1833 for (auto &BB : F) { 1834 const Instruction *TI = BB.getTerminator(); 1835 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { 1836 if (isCriticalEdge(TI, I)) 1837 NumCriticalEdges++; 1838 } 1839 } 1840 if (NumCriticalEdges > PGOFunctionCriticalEdgeThreshold) { 1841 LLVM_DEBUG(dbgs() << "In func " << F.getName() 1842 << ", NumCriticalEdges=" << NumCriticalEdges 1843 << " exceed the threshold. Skip PGO.\n"); 1844 return true; 1845 } 1846 return false; 1847 } 1848 1849 // Return true if we should not instrument this function 1850 static bool skipPGOGen(const Function &F) { 1851 if (skipPGOUse(F)) 1852 return true; 1853 if (F.hasFnAttribute(llvm::Attribute::Naked)) 1854 return true; 1855 if (F.hasFnAttribute(llvm::Attribute::NoProfile)) 1856 return true; 1857 if (F.hasFnAttribute(llvm::Attribute::SkipProfile)) 1858 return true; 1859 if (F.getInstructionCount() < PGOFunctionSizeThreshold) 1860 return true; 1861 return false; 1862 } 1863 1864 static bool InstrumentAllFunctions( 1865 Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI, 1866 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1867 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) { 1868 // For the context-sensitve instrumentation, we should have a separated pass 1869 // (before LTO/ThinLTO linking) to create these variables. 1870 if (!IsCS && !PGOCtxProfLoweringPass::isContextualIRPGOEnabled()) 1871 createIRLevelProfileFlagVar(M, /*IsCS=*/false); 1872 1873 Triple TT(M.getTargetTriple()); 1874 LLVMContext &Ctx = M.getContext(); 1875 if (!TT.isOSBinFormatELF() && EnableVTableValueProfiling) 1876 Ctx.diagnose(DiagnosticInfoPGOProfile( 1877 M.getName().data(), 1878 Twine("VTable value profiling is presently not " 1879 "supported for non-ELF object formats"), 1880 DS_Warning)); 1881 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1882 collectComdatMembers(M, ComdatMembers); 1883 1884 for (auto &F : M) { 1885 if (skipPGOGen(F)) 1886 continue; 1887 auto &TLI = LookupTLI(F); 1888 auto *BPI = LookupBPI(F); 1889 auto *BFI = LookupBFI(F); 1890 instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS); 1891 } 1892 return true; 1893 } 1894 1895 PreservedAnalyses 1896 PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &MAM) { 1897 createProfileFileNameVar(M, CSInstrName); 1898 // The variable in a comdat may be discarded by LTO. Ensure the declaration 1899 // will be retained. 1900 appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true)); 1901 if (ProfileSampling) 1902 createProfileSamplingVar(M); 1903 PreservedAnalyses PA; 1904 PA.preserve<FunctionAnalysisManagerModuleProxy>(); 1905 PA.preserveSet<AllAnalysesOn<Function>>(); 1906 return PA; 1907 } 1908 1909 PreservedAnalyses PGOInstrumentationGen::run(Module &M, 1910 ModuleAnalysisManager &MAM) { 1911 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1912 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 1913 return FAM.getResult<TargetLibraryAnalysis>(F); 1914 }; 1915 auto LookupBPI = [&FAM](Function &F) { 1916 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1917 }; 1918 auto LookupBFI = [&FAM](Function &F) { 1919 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1920 }; 1921 1922 if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS)) 1923 return PreservedAnalyses::all(); 1924 1925 return PreservedAnalyses::none(); 1926 } 1927 1928 // Using the ratio b/w sums of profile count values and BFI count values to 1929 // adjust the func entry count. 1930 static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, 1931 BranchProbabilityInfo &NBPI) { 1932 Function &F = Func.getFunc(); 1933 BlockFrequencyInfo NBFI(F, NBPI, LI); 1934 #ifndef NDEBUG 1935 auto BFIEntryCount = F.getEntryCount(); 1936 assert(BFIEntryCount && (BFIEntryCount->getCount() > 0) && 1937 "Invalid BFI Entrycount"); 1938 #endif 1939 auto SumCount = APFloat::getZero(APFloat::IEEEdouble()); 1940 auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble()); 1941 for (auto &BBI : F) { 1942 uint64_t CountValue = 0; 1943 uint64_t BFICountValue = 0; 1944 if (!Func.findBBInfo(&BBI)) 1945 continue; 1946 auto BFICount = NBFI.getBlockProfileCount(&BBI); 1947 CountValue = *Func.getBBInfo(&BBI).Count; 1948 BFICountValue = *BFICount; 1949 SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven); 1950 SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven); 1951 } 1952 if (SumCount.isZero()) 1953 return; 1954 1955 assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan && 1956 "Incorrect sum of BFI counts"); 1957 if (SumBFICount.compare(SumCount) == APFloat::cmpEqual) 1958 return; 1959 double Scale = (SumCount / SumBFICount).convertToDouble(); 1960 if (Scale < 1.001 && Scale > 0.999) 1961 return; 1962 1963 uint64_t FuncEntryCount = *Func.getBBInfo(&*F.begin()).Count; 1964 uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale; 1965 if (NewEntryCount == 0) 1966 NewEntryCount = 1; 1967 if (NewEntryCount != FuncEntryCount) { 1968 F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real)); 1969 LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName() 1970 << ", entry_count " << FuncEntryCount << " --> " 1971 << NewEntryCount << "\n"); 1972 } 1973 } 1974 1975 // Compare the profile count values with BFI count values, and print out 1976 // the non-matching ones. 1977 static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, 1978 BranchProbabilityInfo &NBPI, 1979 uint64_t HotCountThreshold, 1980 uint64_t ColdCountThreshold) { 1981 Function &F = Func.getFunc(); 1982 BlockFrequencyInfo NBFI(F, NBPI, LI); 1983 // bool PrintFunc = false; 1984 bool HotBBOnly = PGOVerifyHotBFI; 1985 StringRef Msg; 1986 OptimizationRemarkEmitter ORE(&F); 1987 1988 unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0; 1989 for (auto &BBI : F) { 1990 uint64_t CountValue = 0; 1991 uint64_t BFICountValue = 0; 1992 1993 CountValue = Func.getBBInfo(&BBI).Count.value_or(CountValue); 1994 1995 BBNum++; 1996 if (CountValue) 1997 NonZeroBBNum++; 1998 auto BFICount = NBFI.getBlockProfileCount(&BBI); 1999 if (BFICount) 2000 BFICountValue = *BFICount; 2001 2002 if (HotBBOnly) { 2003 bool rawIsHot = CountValue >= HotCountThreshold; 2004 bool BFIIsHot = BFICountValue >= HotCountThreshold; 2005 bool rawIsCold = CountValue <= ColdCountThreshold; 2006 bool ShowCount = false; 2007 if (rawIsHot && !BFIIsHot) { 2008 Msg = "raw-Hot to BFI-nonHot"; 2009 ShowCount = true; 2010 } else if (rawIsCold && BFIIsHot) { 2011 Msg = "raw-Cold to BFI-Hot"; 2012 ShowCount = true; 2013 } 2014 if (!ShowCount) 2015 continue; 2016 } else { 2017 if ((CountValue < PGOVerifyBFICutoff) && 2018 (BFICountValue < PGOVerifyBFICutoff)) 2019 continue; 2020 uint64_t Diff = (BFICountValue >= CountValue) 2021 ? BFICountValue - CountValue 2022 : CountValue - BFICountValue; 2023 if (Diff <= CountValue / 100 * PGOVerifyBFIRatio) 2024 continue; 2025 } 2026 BBMisMatchNum++; 2027 2028 ORE.emit([&]() { 2029 OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "bfi-verify", 2030 F.getSubprogram(), &BBI); 2031 Remark << "BB " << ore::NV("Block", BBI.getName()) 2032 << " Count=" << ore::NV("Count", CountValue) 2033 << " BFI_Count=" << ore::NV("Count", BFICountValue); 2034 if (!Msg.empty()) 2035 Remark << " (" << Msg << ")"; 2036 return Remark; 2037 }); 2038 } 2039 if (BBMisMatchNum) 2040 ORE.emit([&]() { 2041 return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify", 2042 F.getSubprogram(), &F.getEntryBlock()) 2043 << "In Func " << ore::NV("Function", F.getName()) 2044 << ": Num_of_BB=" << ore::NV("Count", BBNum) 2045 << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum) 2046 << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum); 2047 }); 2048 } 2049 2050 static bool annotateAllFunctions( 2051 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, 2052 vfs::FileSystem &FS, 2053 function_ref<TargetLibraryInfo &(Function &)> LookupTLI, 2054 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 2055 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, 2056 ProfileSummaryInfo *PSI, bool IsCS) { 2057 LLVM_DEBUG(dbgs() << "Read in profile counters: "); 2058 auto &Ctx = M.getContext(); 2059 // Read the counter array from file. 2060 auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName, FS, 2061 ProfileRemappingFileName); 2062 if (Error E = ReaderOrErr.takeError()) { 2063 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { 2064 Ctx.diagnose( 2065 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message())); 2066 }); 2067 return false; 2068 } 2069 2070 std::unique_ptr<IndexedInstrProfReader> PGOReader = 2071 std::move(ReaderOrErr.get()); 2072 if (!PGOReader) { 2073 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(), 2074 StringRef("Cannot get PGOReader"))); 2075 return false; 2076 } 2077 if (!PGOReader->hasCSIRLevelProfile() && IsCS) 2078 return false; 2079 2080 // TODO: might need to change the warning once the clang option is finalized. 2081 if (!PGOReader->isIRLevelProfile()) { 2082 Ctx.diagnose(DiagnosticInfoPGOProfile( 2083 ProfileFileName.data(), "Not an IR level instrumentation profile")); 2084 return false; 2085 } 2086 if (PGOReader->functionEntryOnly()) { 2087 Ctx.diagnose(DiagnosticInfoPGOProfile( 2088 ProfileFileName.data(), 2089 "Function entry profiles are not yet supported for optimization")); 2090 return false; 2091 } 2092 2093 if (EnableVTableProfileUse) { 2094 for (GlobalVariable &G : M.globals()) { 2095 if (!G.hasName() || !G.hasMetadata(LLVMContext::MD_type)) 2096 continue; 2097 2098 // Create the PGOFuncName meta data. 2099 createPGONameMetadata(G, getPGOName(G, false /* InLTO*/)); 2100 } 2101 } 2102 2103 // Add the profile summary (read from the header of the indexed summary) here 2104 // so that we can use it below when reading counters (which checks if the 2105 // function should be marked with a cold or inlinehint attribute). 2106 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()), 2107 IsCS ? ProfileSummary::PSK_CSInstr 2108 : ProfileSummary::PSK_Instr); 2109 PSI->refresh(); 2110 2111 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 2112 collectComdatMembers(M, ComdatMembers); 2113 std::vector<Function *> HotFunctions; 2114 std::vector<Function *> ColdFunctions; 2115 2116 // If the profile marked as always instrument the entry BB, do the 2117 // same. Note this can be overwritten by the internal option in CFGMST.h 2118 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled(); 2119 if (PGOInstrumentEntry.getNumOccurrences() > 0) 2120 InstrumentFuncEntry = PGOInstrumentEntry; 2121 InstrumentFuncEntry |= PGOCtxProfLoweringPass::isContextualIRPGOEnabled(); 2122 2123 bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage(); 2124 for (auto &F : M) { 2125 if (skipPGOUse(F)) 2126 continue; 2127 auto &TLI = LookupTLI(F); 2128 auto *BPI = LookupBPI(F); 2129 auto *BFI = LookupBFI(F); 2130 if (!HasSingleByteCoverage) { 2131 // Split indirectbr critical edges here before computing the MST rather 2132 // than later in getInstrBB() to avoid invalidating it. 2133 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, 2134 BFI); 2135 } 2136 PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS, 2137 InstrumentFuncEntry, HasSingleByteCoverage); 2138 if (HasSingleByteCoverage) { 2139 Func.populateCoverage(PGOReader.get()); 2140 continue; 2141 } 2142 // When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo, 2143 // it means the profile for the function is unrepresentative and this 2144 // function is actually hot / warm. We will reset the function hot / cold 2145 // attribute and drop all the profile counters. 2146 InstrProfRecord::CountPseudoKind PseudoKind = InstrProfRecord::NotPseudo; 2147 bool AllZeros = false; 2148 if (!Func.readCounters(PGOReader.get(), AllZeros, PseudoKind)) 2149 continue; 2150 if (AllZeros) { 2151 F.setEntryCount(ProfileCount(0, Function::PCT_Real)); 2152 if (Func.getProgramMaxCount() != 0) 2153 ColdFunctions.push_back(&F); 2154 continue; 2155 } 2156 if (PseudoKind != InstrProfRecord::NotPseudo) { 2157 // Clear function attribute cold. 2158 if (F.hasFnAttribute(Attribute::Cold)) 2159 F.removeFnAttr(Attribute::Cold); 2160 // Set function attribute as hot. 2161 if (PseudoKind == InstrProfRecord::PseudoHot) 2162 F.addFnAttr(Attribute::Hot); 2163 continue; 2164 } 2165 Func.populateCounters(); 2166 Func.setBranchWeights(); 2167 Func.annotateValueSites(); 2168 Func.annotateIrrLoopHeaderWeights(); 2169 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr(); 2170 if (FreqAttr == PGOUseFunc::FFA_Cold) 2171 ColdFunctions.push_back(&F); 2172 else if (FreqAttr == PGOUseFunc::FFA_Hot) 2173 HotFunctions.push_back(&F); 2174 if (PGOViewCounts != PGOVCT_None && 2175 (ViewBlockFreqFuncName.empty() || 2176 F.getName() == ViewBlockFreqFuncName)) { 2177 LoopInfo LI{DominatorTree(F)}; 2178 std::unique_ptr<BranchProbabilityInfo> NewBPI = 2179 std::make_unique<BranchProbabilityInfo>(F, LI); 2180 std::unique_ptr<BlockFrequencyInfo> NewBFI = 2181 std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI); 2182 if (PGOViewCounts == PGOVCT_Graph) 2183 NewBFI->view(); 2184 else if (PGOViewCounts == PGOVCT_Text) { 2185 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n"; 2186 NewBFI->print(dbgs()); 2187 } 2188 } 2189 if (PGOViewRawCounts != PGOVCT_None && 2190 (ViewBlockFreqFuncName.empty() || 2191 F.getName() == ViewBlockFreqFuncName)) { 2192 if (PGOViewRawCounts == PGOVCT_Graph) 2193 if (ViewBlockFreqFuncName.empty()) 2194 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 2195 else 2196 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 2197 else if (PGOViewRawCounts == PGOVCT_Text) { 2198 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n"; 2199 Func.dumpInfo(); 2200 } 2201 } 2202 2203 if (PGOVerifyBFI || PGOVerifyHotBFI || PGOFixEntryCount) { 2204 LoopInfo LI{DominatorTree(F)}; 2205 BranchProbabilityInfo NBPI(F, LI); 2206 2207 // Fix func entry count. 2208 if (PGOFixEntryCount) 2209 fixFuncEntryCount(Func, LI, NBPI); 2210 2211 // Verify BlockFrequency information. 2212 uint64_t HotCountThreshold = 0, ColdCountThreshold = 0; 2213 if (PGOVerifyHotBFI) { 2214 HotCountThreshold = PSI->getOrCompHotCountThreshold(); 2215 ColdCountThreshold = PSI->getOrCompColdCountThreshold(); 2216 } 2217 verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold); 2218 } 2219 } 2220 2221 // Set function hotness attribute from the profile. 2222 // We have to apply these attributes at the end because their presence 2223 // can affect the BranchProbabilityInfo of any callers, resulting in an 2224 // inconsistent MST between prof-gen and prof-use. 2225 for (auto &F : HotFunctions) { 2226 F->addFnAttr(Attribute::InlineHint); 2227 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName() 2228 << "\n"); 2229 } 2230 for (auto &F : ColdFunctions) { 2231 // Only set when there is no Attribute::Hot set by the user. For Hot 2232 // attribute, user's annotation has the precedence over the profile. 2233 if (F->hasFnAttribute(Attribute::Hot)) { 2234 auto &Ctx = M.getContext(); 2235 std::string Msg = std::string("Function ") + F->getName().str() + 2236 std::string(" is annotated as a hot function but" 2237 " the profile is cold"); 2238 Ctx.diagnose( 2239 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning)); 2240 continue; 2241 } 2242 F->addFnAttr(Attribute::Cold); 2243 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() 2244 << "\n"); 2245 } 2246 return true; 2247 } 2248 2249 PGOInstrumentationUse::PGOInstrumentationUse( 2250 std::string Filename, std::string RemappingFilename, bool IsCS, 2251 IntrusiveRefCntPtr<vfs::FileSystem> VFS) 2252 : ProfileFileName(std::move(Filename)), 2253 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS), 2254 FS(std::move(VFS)) { 2255 if (!PGOTestProfileFile.empty()) 2256 ProfileFileName = PGOTestProfileFile; 2257 if (!PGOTestProfileRemappingFile.empty()) 2258 ProfileRemappingFileName = PGOTestProfileRemappingFile; 2259 if (!FS) 2260 FS = vfs::getRealFileSystem(); 2261 } 2262 2263 PreservedAnalyses PGOInstrumentationUse::run(Module &M, 2264 ModuleAnalysisManager &MAM) { 2265 2266 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 2267 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 2268 return FAM.getResult<TargetLibraryAnalysis>(F); 2269 }; 2270 auto LookupBPI = [&FAM](Function &F) { 2271 return &FAM.getResult<BranchProbabilityAnalysis>(F); 2272 }; 2273 auto LookupBFI = [&FAM](Function &F) { 2274 return &FAM.getResult<BlockFrequencyAnalysis>(F); 2275 }; 2276 2277 auto *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M); 2278 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, *FS, 2279 LookupTLI, LookupBPI, LookupBFI, PSI, IsCS)) 2280 return PreservedAnalyses::all(); 2281 2282 return PreservedAnalyses::none(); 2283 } 2284 2285 static std::string getSimpleNodeName(const BasicBlock *Node) { 2286 if (!Node->getName().empty()) 2287 return Node->getName().str(); 2288 2289 std::string SimpleNodeName; 2290 raw_string_ostream OS(SimpleNodeName); 2291 Node->printAsOperand(OS, false); 2292 return SimpleNodeName; 2293 } 2294 2295 void llvm::setProfMetadata(Module *M, Instruction *TI, 2296 ArrayRef<uint64_t> EdgeCounts, uint64_t MaxCount) { 2297 assert(MaxCount > 0 && "Bad max count"); 2298 uint64_t Scale = calculateCountScale(MaxCount); 2299 SmallVector<unsigned, 4> Weights; 2300 for (const auto &ECI : EdgeCounts) 2301 Weights.push_back(scaleBranchCount(ECI, Scale)); 2302 2303 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W 2304 : Weights) { 2305 dbgs() << W << " "; 2306 } dbgs() << "\n";); 2307 2308 misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false); 2309 2310 setBranchWeights(*TI, Weights, /*IsExpected=*/false); 2311 if (EmitBranchProbability) { 2312 std::string BrCondStr = getBranchCondString(TI); 2313 if (BrCondStr.empty()) 2314 return; 2315 2316 uint64_t WSum = 2317 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0, 2318 [](uint64_t w1, uint64_t w2) { return w1 + w2; }); 2319 uint64_t TotalCount = 2320 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0, 2321 [](uint64_t c1, uint64_t c2) { return c1 + c2; }); 2322 Scale = calculateCountScale(WSum); 2323 BranchProbability BP(scaleBranchCount(Weights[0], Scale), 2324 scaleBranchCount(WSum, Scale)); 2325 std::string BranchProbStr; 2326 raw_string_ostream OS(BranchProbStr); 2327 OS << BP; 2328 OS << " (total count : " << TotalCount << ")"; 2329 OS.flush(); 2330 Function *F = TI->getParent()->getParent(); 2331 OptimizationRemarkEmitter ORE(F); 2332 ORE.emit([&]() { 2333 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI) 2334 << BrCondStr << " is true with probability : " << BranchProbStr; 2335 }); 2336 } 2337 } 2338 2339 namespace llvm { 2340 2341 void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) { 2342 MDBuilder MDB(M->getContext()); 2343 TI->setMetadata(llvm::LLVMContext::MD_irr_loop, 2344 MDB.createIrrLoopHeaderWeight(Count)); 2345 } 2346 2347 template <> struct GraphTraits<PGOUseFunc *> { 2348 using NodeRef = const BasicBlock *; 2349 using ChildIteratorType = const_succ_iterator; 2350 using nodes_iterator = pointer_iterator<Function::const_iterator>; 2351 2352 static NodeRef getEntryNode(const PGOUseFunc *G) { 2353 return &G->getFunc().front(); 2354 } 2355 2356 static ChildIteratorType child_begin(const NodeRef N) { 2357 return succ_begin(N); 2358 } 2359 2360 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); } 2361 2362 static nodes_iterator nodes_begin(const PGOUseFunc *G) { 2363 return nodes_iterator(G->getFunc().begin()); 2364 } 2365 2366 static nodes_iterator nodes_end(const PGOUseFunc *G) { 2367 return nodes_iterator(G->getFunc().end()); 2368 } 2369 }; 2370 2371 template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits { 2372 explicit DOTGraphTraits(bool isSimple = false) 2373 : DefaultDOTGraphTraits(isSimple) {} 2374 2375 static std::string getGraphName(const PGOUseFunc *G) { 2376 return std::string(G->getFunc().getName()); 2377 } 2378 2379 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) { 2380 std::string Result; 2381 raw_string_ostream OS(Result); 2382 2383 OS << getSimpleNodeName(Node) << ":\\l"; 2384 PGOUseBBInfo *BI = Graph->findBBInfo(Node); 2385 OS << "Count : "; 2386 if (BI && BI->Count) 2387 OS << *BI->Count << "\\l"; 2388 else 2389 OS << "Unknown\\l"; 2390 2391 if (!PGOInstrSelect) 2392 return Result; 2393 2394 for (const Instruction &I : *Node) { 2395 if (!isa<SelectInst>(&I)) 2396 continue; 2397 // Display scaled counts for SELECT instruction: 2398 OS << "SELECT : { T = "; 2399 uint64_t TC, FC; 2400 bool HasProf = extractBranchWeights(I, TC, FC); 2401 if (!HasProf) 2402 OS << "Unknown, F = Unknown }\\l"; 2403 else 2404 OS << TC << ", F = " << FC << " }\\l"; 2405 } 2406 return Result; 2407 } 2408 }; 2409 2410 } // end namespace llvm 2411