1 //===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements PGO instrumentation using a minimum spanning tree based 10 // on the following paper: 11 // [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points 12 // for program frequency counts. BIT Numerical Mathematics 1973, Volume 13, 13 // Issue 3, pp 313-322 14 // The idea of the algorithm based on the fact that for each node (except for 15 // the entry and exit), the sum of incoming edge counts equals the sum of 16 // outgoing edge counts. The count of edge on spanning tree can be derived from 17 // those edges not on the spanning tree. Knuth proves this method instruments 18 // the minimum number of edges. 19 // 20 // The minimal spanning tree here is actually a maximum weight tree -- on-tree 21 // edges have higher frequencies (more likely to execute). The idea is to 22 // instrument those less frequently executed edges to reduce the runtime 23 // overhead of instrumented binaries. 24 // 25 // This file contains two passes: 26 // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge 27 // count profile, and generates the instrumentation for indirect call 28 // profiling. 29 // (2) Pass PGOInstrumentationUse which reads the edge count profile and 30 // annotates the branch weights. It also reads the indirect call value 31 // profiling records and annotate the indirect call instructions. 32 // 33 // To get the precise counter information, These two passes need to invoke at 34 // the same compilation point (so they see the same IR). For pass 35 // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For 36 // pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and 37 // the profile is opened in module level and passed to each PGOUseFunc instance. 38 // The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put 39 // in class FuncPGOInstrumentation. 40 // 41 // Class PGOEdge represents a CFG edge and some auxiliary information. Class 42 // BBInfo contains auxiliary information for each BB. These two classes are used 43 // in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived 44 // class of PGOEdge and BBInfo, respectively. They contains extra data structure 45 // used in populating profile counters. 46 // The MST implementation is in Class CFGMST (CFGMST.h). 47 // 48 //===----------------------------------------------------------------------===// 49 50 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" 51 #include "ValueProfileCollector.h" 52 #include "llvm/ADT/APInt.h" 53 #include "llvm/ADT/ArrayRef.h" 54 #include "llvm/ADT/STLExtras.h" 55 #include "llvm/ADT/SmallVector.h" 56 #include "llvm/ADT/Statistic.h" 57 #include "llvm/ADT/StringRef.h" 58 #include "llvm/ADT/Twine.h" 59 #include "llvm/ADT/iterator.h" 60 #include "llvm/ADT/iterator_range.h" 61 #include "llvm/Analysis/BlockFrequencyInfo.h" 62 #include "llvm/Analysis/BranchProbabilityInfo.h" 63 #include "llvm/Analysis/CFG.h" 64 #include "llvm/Analysis/LoopInfo.h" 65 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 66 #include "llvm/Analysis/ProfileSummaryInfo.h" 67 #include "llvm/Analysis/TargetLibraryInfo.h" 68 #include "llvm/IR/Attributes.h" 69 #include "llvm/IR/BasicBlock.h" 70 #include "llvm/IR/CFG.h" 71 #include "llvm/IR/Comdat.h" 72 #include "llvm/IR/Constant.h" 73 #include "llvm/IR/Constants.h" 74 #include "llvm/IR/DiagnosticInfo.h" 75 #include "llvm/IR/Dominators.h" 76 #include "llvm/IR/EHPersonalities.h" 77 #include "llvm/IR/Function.h" 78 #include "llvm/IR/GlobalAlias.h" 79 #include "llvm/IR/GlobalValue.h" 80 #include "llvm/IR/GlobalVariable.h" 81 #include "llvm/IR/IRBuilder.h" 82 #include "llvm/IR/InstVisitor.h" 83 #include "llvm/IR/InstrTypes.h" 84 #include "llvm/IR/Instruction.h" 85 #include "llvm/IR/Instructions.h" 86 #include "llvm/IR/IntrinsicInst.h" 87 #include "llvm/IR/Intrinsics.h" 88 #include "llvm/IR/LLVMContext.h" 89 #include "llvm/IR/MDBuilder.h" 90 #include "llvm/IR/Module.h" 91 #include "llvm/IR/PassManager.h" 92 #include "llvm/IR/ProfDataUtils.h" 93 #include "llvm/IR/ProfileSummary.h" 94 #include "llvm/IR/Type.h" 95 #include "llvm/IR/Value.h" 96 #include "llvm/ProfileData/InstrProf.h" 97 #include "llvm/ProfileData/InstrProfReader.h" 98 #include "llvm/Support/BranchProbability.h" 99 #include "llvm/Support/CRC.h" 100 #include "llvm/Support/Casting.h" 101 #include "llvm/Support/CommandLine.h" 102 #include "llvm/Support/DOTGraphTraits.h" 103 #include "llvm/Support/Debug.h" 104 #include "llvm/Support/Error.h" 105 #include "llvm/Support/ErrorHandling.h" 106 #include "llvm/Support/GraphWriter.h" 107 #include "llvm/Support/VirtualFileSystem.h" 108 #include "llvm/Support/raw_ostream.h" 109 #include "llvm/TargetParser/Triple.h" 110 #include "llvm/Transforms/Instrumentation.h" 111 #include "llvm/Transforms/Instrumentation/BlockCoverageInference.h" 112 #include "llvm/Transforms/Instrumentation/CFGMST.h" 113 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 114 #include "llvm/Transforms/Utils/MisExpect.h" 115 #include "llvm/Transforms/Utils/ModuleUtils.h" 116 #include <algorithm> 117 #include <cassert> 118 #include <cstdint> 119 #include <memory> 120 #include <numeric> 121 #include <optional> 122 #include <string> 123 #include <unordered_map> 124 #include <utility> 125 #include <vector> 126 127 using namespace llvm; 128 using ProfileCount = Function::ProfileCount; 129 using VPCandidateInfo = ValueProfileCollector::CandidateInfo; 130 131 #define DEBUG_TYPE "pgo-instrumentation" 132 133 STATISTIC(NumOfPGOInstrument, "Number of edges instrumented."); 134 STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented."); 135 STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented."); 136 STATISTIC(NumOfPGOEdge, "Number of edges."); 137 STATISTIC(NumOfPGOBB, "Number of basic-blocks."); 138 STATISTIC(NumOfPGOSplit, "Number of critical edge splits."); 139 STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts."); 140 STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile."); 141 STATISTIC(NumOfPGOMissing, "Number of functions without profile."); 142 STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations."); 143 STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO."); 144 STATISTIC(NumOfCSPGOSelectInsts, 145 "Number of select instruction instrumented in CSPGO."); 146 STATISTIC(NumOfCSPGOMemIntrinsics, 147 "Number of mem intrinsics instrumented in CSPGO."); 148 STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO."); 149 STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO."); 150 STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO."); 151 STATISTIC(NumOfCSPGOFunc, 152 "Number of functions having valid profile counts in CSPGO."); 153 STATISTIC(NumOfCSPGOMismatch, 154 "Number of functions having mismatch profile in CSPGO."); 155 STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO."); 156 STATISTIC(NumCoveredBlocks, "Number of basic blocks that were executed"); 157 158 // Command line option to specify the file to read profile from. This is 159 // mainly used for testing. 160 static cl::opt<std::string> 161 PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, 162 cl::value_desc("filename"), 163 cl::desc("Specify the path of profile data file. This is" 164 "mainly for test purpose.")); 165 static cl::opt<std::string> PGOTestProfileRemappingFile( 166 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, 167 cl::value_desc("filename"), 168 cl::desc("Specify the path of profile remapping file. This is mainly for " 169 "test purpose.")); 170 171 // Command line option to disable value profiling. The default is false: 172 // i.e. value profiling is enabled by default. This is for debug purpose. 173 static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false), 174 cl::Hidden, 175 cl::desc("Disable Value Profiling")); 176 177 // Command line option to set the maximum number of VP annotations to write to 178 // the metadata for a single indirect call callsite. 179 static cl::opt<unsigned> MaxNumAnnotations( 180 "icp-max-annotations", cl::init(3), cl::Hidden, 181 cl::desc("Max number of annotations for a single indirect " 182 "call callsite")); 183 184 // Command line option to set the maximum number of value annotations 185 // to write to the metadata for a single memop intrinsic. 186 static cl::opt<unsigned> MaxNumMemOPAnnotations( 187 "memop-max-annotations", cl::init(4), cl::Hidden, 188 cl::desc("Max number of preicise value annotations for a single memop" 189 "intrinsic")); 190 191 // Command line option to control appending FunctionHash to the name of a COMDAT 192 // function. This is to avoid the hash mismatch caused by the preinliner. 193 static cl::opt<bool> DoComdatRenaming( 194 "do-comdat-renaming", cl::init(false), cl::Hidden, 195 cl::desc("Append function hash to the name of COMDAT function to avoid " 196 "function hash mismatch due to the preinliner")); 197 198 namespace llvm { 199 // Command line option to enable/disable the warning about missing profile 200 // information. 201 cl::opt<bool> PGOWarnMissing("pgo-warn-missing-function", cl::init(false), 202 cl::Hidden, 203 cl::desc("Use this option to turn on/off " 204 "warnings about missing profile data for " 205 "functions.")); 206 207 // Command line option to enable/disable the warning about a hash mismatch in 208 // the profile data. 209 cl::opt<bool> 210 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden, 211 cl::desc("Use this option to turn off/on " 212 "warnings about profile cfg mismatch.")); 213 214 // Command line option to enable/disable the warning about a hash mismatch in 215 // the profile data for Comdat functions, which often turns out to be false 216 // positive due to the pre-instrumentation inline. 217 cl::opt<bool> NoPGOWarnMismatchComdatWeak( 218 "no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden, 219 cl::desc("The option is used to turn on/off " 220 "warnings about hash mismatch for comdat " 221 "or weak functions.")); 222 } // namespace llvm 223 224 // Command line option to enable/disable select instruction instrumentation. 225 static cl::opt<bool> 226 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, 227 cl::desc("Use this option to turn on/off SELECT " 228 "instruction instrumentation. ")); 229 230 // Command line option to turn on CFG dot or text dump of raw profile counts 231 static cl::opt<PGOViewCountsType> PGOViewRawCounts( 232 "pgo-view-raw-counts", cl::Hidden, 233 cl::desc("A boolean option to show CFG dag or text " 234 "with raw profile counts from " 235 "profile data. See also option " 236 "-pgo-view-counts. To limit graph " 237 "display to only one function, use " 238 "filtering option -view-bfi-func-name."), 239 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), 240 clEnumValN(PGOVCT_Graph, "graph", "show a graph."), 241 clEnumValN(PGOVCT_Text, "text", "show in text."))); 242 243 // Command line option to enable/disable memop intrinsic call.size profiling. 244 static cl::opt<bool> 245 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, 246 cl::desc("Use this option to turn on/off " 247 "memory intrinsic size profiling.")); 248 249 // Emit branch probability as optimization remarks. 250 static cl::opt<bool> 251 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, 252 cl::desc("When this option is on, the annotated " 253 "branch probability will be emitted as " 254 "optimization remarks: -{Rpass|" 255 "pass-remarks}=pgo-instrumentation")); 256 257 static cl::opt<bool> PGOInstrumentEntry( 258 "pgo-instrument-entry", cl::init(false), cl::Hidden, 259 cl::desc("Force to instrument function entry basicblock.")); 260 261 static cl::opt<bool> PGOFunctionEntryCoverage( 262 "pgo-function-entry-coverage", cl::Hidden, 263 cl::desc( 264 "Use this option to enable function entry coverage instrumentation.")); 265 266 static cl::opt<bool> PGOBlockCoverage( 267 "pgo-block-coverage", 268 cl::desc("Use this option to enable basic block coverage instrumentation")); 269 270 static cl::opt<bool> 271 PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph", 272 cl::desc("Create a dot file of CFGs with block " 273 "coverage inference information")); 274 275 static cl::opt<bool> PGOTemporalInstrumentation( 276 "pgo-temporal-instrumentation", 277 cl::desc("Use this option to enable temporal instrumentation")); 278 279 static cl::opt<bool> 280 PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, 281 cl::desc("Fix function entry count in profile use.")); 282 283 static cl::opt<bool> PGOVerifyHotBFI( 284 "pgo-verify-hot-bfi", cl::init(false), cl::Hidden, 285 cl::desc("Print out the non-match BFI count if a hot raw profile count " 286 "becomes non-hot, or a cold raw profile count becomes hot. " 287 "The print is enabled under -Rpass-analysis=pgo, or " 288 "internal option -pass-remakrs-analysis=pgo.")); 289 290 static cl::opt<bool> PGOVerifyBFI( 291 "pgo-verify-bfi", cl::init(false), cl::Hidden, 292 cl::desc("Print out mismatched BFI counts after setting profile metadata " 293 "The print is enabled under -Rpass-analysis=pgo, or " 294 "internal option -pass-remakrs-analysis=pgo.")); 295 296 static cl::opt<unsigned> PGOVerifyBFIRatio( 297 "pgo-verify-bfi-ratio", cl::init(2), cl::Hidden, 298 cl::desc("Set the threshold for pgo-verify-bfi: only print out " 299 "mismatched BFI if the difference percentage is greater than " 300 "this value (in percentage).")); 301 302 static cl::opt<unsigned> PGOVerifyBFICutoff( 303 "pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden, 304 cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " 305 "profile count value is below.")); 306 307 static cl::opt<std::string> PGOTraceFuncHash( 308 "pgo-trace-func-hash", cl::init("-"), cl::Hidden, 309 cl::value_desc("function name"), 310 cl::desc("Trace the hash of the function with this name.")); 311 312 static cl::opt<unsigned> PGOFunctionSizeThreshold( 313 "pgo-function-size-threshold", cl::Hidden, 314 cl::desc("Do not instrument functions smaller than this threshold.")); 315 316 static cl::opt<unsigned> PGOFunctionCriticalEdgeThreshold( 317 "pgo-critical-edge-threshold", cl::init(20000), cl::Hidden, 318 cl::desc("Do not instrument functions with the number of critical edges " 319 " greater than this threshold.")); 320 321 namespace llvm { 322 // Command line option to turn on CFG dot dump after profile annotation. 323 // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts 324 extern cl::opt<PGOViewCountsType> PGOViewCounts; 325 326 // Command line option to specify the name of the function for CFG dump 327 // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= 328 extern cl::opt<std::string> ViewBlockFreqFuncName; 329 330 extern cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate; 331 } // namespace llvm 332 333 // Return a string describing the branch condition that can be 334 // used in static branch probability heuristics: 335 static std::string getBranchCondString(Instruction *TI) { 336 BranchInst *BI = dyn_cast<BranchInst>(TI); 337 if (!BI || !BI->isConditional()) 338 return std::string(); 339 340 Value *Cond = BI->getCondition(); 341 ICmpInst *CI = dyn_cast<ICmpInst>(Cond); 342 if (!CI) 343 return std::string(); 344 345 std::string result; 346 raw_string_ostream OS(result); 347 OS << CI->getPredicate() << "_"; 348 CI->getOperand(0)->getType()->print(OS, true); 349 350 Value *RHS = CI->getOperand(1); 351 ConstantInt *CV = dyn_cast<ConstantInt>(RHS); 352 if (CV) { 353 if (CV->isZero()) 354 OS << "_Zero"; 355 else if (CV->isOne()) 356 OS << "_One"; 357 else if (CV->isMinusOne()) 358 OS << "_MinusOne"; 359 else 360 OS << "_Const"; 361 } 362 OS.flush(); 363 return result; 364 } 365 366 static const char *ValueProfKindDescr[] = { 367 #define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr, 368 #include "llvm/ProfileData/InstrProfData.inc" 369 }; 370 371 // Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime 372 // aware this is an ir_level profile so it can set the version flag. 373 static GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS) { 374 const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)); 375 Type *IntTy64 = Type::getInt64Ty(M.getContext()); 376 uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF); 377 if (IsCS) 378 ProfileVersion |= VARIANT_MASK_CSIR_PROF; 379 if (PGOInstrumentEntry) 380 ProfileVersion |= VARIANT_MASK_INSTR_ENTRY; 381 if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) 382 ProfileVersion |= VARIANT_MASK_DBG_CORRELATE; 383 if (PGOFunctionEntryCoverage) 384 ProfileVersion |= 385 VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY; 386 if (PGOBlockCoverage) 387 ProfileVersion |= VARIANT_MASK_BYTE_COVERAGE; 388 if (PGOTemporalInstrumentation) 389 ProfileVersion |= VARIANT_MASK_TEMPORAL_PROF; 390 auto IRLevelVersionVariable = new GlobalVariable( 391 M, IntTy64, true, GlobalValue::WeakAnyLinkage, 392 Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName); 393 IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility); 394 Triple TT(M.getTargetTriple()); 395 if (TT.supportsCOMDAT()) { 396 IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage); 397 IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName)); 398 } 399 return IRLevelVersionVariable; 400 } 401 402 namespace { 403 404 /// The select instruction visitor plays three roles specified 405 /// by the mode. In \c VM_counting mode, it simply counts the number of 406 /// select instructions. In \c VM_instrument mode, it inserts code to count 407 /// the number times TrueValue of select is taken. In \c VM_annotate mode, 408 /// it reads the profile data and annotate the select instruction with metadata. 409 enum VisitMode { VM_counting, VM_instrument, VM_annotate }; 410 class PGOUseFunc; 411 412 /// Instruction Visitor class to visit select instructions. 413 struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> { 414 Function &F; 415 unsigned NSIs = 0; // Number of select instructions instrumented. 416 VisitMode Mode = VM_counting; // Visiting mode. 417 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index. 418 unsigned TotalNumCtrs = 0; // Total number of counters 419 GlobalVariable *FuncNameVar = nullptr; 420 uint64_t FuncHash = 0; 421 PGOUseFunc *UseFunc = nullptr; 422 bool HasSingleByteCoverage; 423 424 SelectInstVisitor(Function &Func, bool HasSingleByteCoverage) 425 : F(Func), HasSingleByteCoverage(HasSingleByteCoverage) {} 426 427 void countSelects() { 428 NSIs = 0; 429 Mode = VM_counting; 430 visit(F); 431 } 432 433 // Visit the IR stream and instrument all select instructions. \p 434 // Ind is a pointer to the counter index variable; \p TotalNC 435 // is the total number of counters; \p FNV is the pointer to the 436 // PGO function name var; \p FHash is the function hash. 437 void instrumentSelects(unsigned *Ind, unsigned TotalNC, GlobalVariable *FNV, 438 uint64_t FHash) { 439 Mode = VM_instrument; 440 CurCtrIdx = Ind; 441 TotalNumCtrs = TotalNC; 442 FuncHash = FHash; 443 FuncNameVar = FNV; 444 visit(F); 445 } 446 447 // Visit the IR stream and annotate all select instructions. 448 void annotateSelects(PGOUseFunc *UF, unsigned *Ind) { 449 Mode = VM_annotate; 450 UseFunc = UF; 451 CurCtrIdx = Ind; 452 visit(F); 453 } 454 455 void instrumentOneSelectInst(SelectInst &SI); 456 void annotateOneSelectInst(SelectInst &SI); 457 458 // Visit \p SI instruction and perform tasks according to visit mode. 459 void visitSelectInst(SelectInst &SI); 460 461 // Return the number of select instructions. This needs be called after 462 // countSelects(). 463 unsigned getNumOfSelectInsts() const { return NSIs; } 464 }; 465 466 /// This class implements the CFG edges for the Minimum Spanning Tree (MST) 467 /// based instrumentation. 468 /// Note that the CFG can be a multi-graph. So there might be multiple edges 469 /// with the same SrcBB and DestBB. 470 struct PGOEdge { 471 BasicBlock *SrcBB; 472 BasicBlock *DestBB; 473 uint64_t Weight; 474 bool InMST = false; 475 bool Removed = false; 476 bool IsCritical = false; 477 478 PGOEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W = 1) 479 : SrcBB(Src), DestBB(Dest), Weight(W) {} 480 481 /// Return the information string of an edge. 482 std::string infoString() const { 483 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") + 484 (IsCritical ? "c" : " ") + " W=" + Twine(Weight)) 485 .str(); 486 } 487 }; 488 489 /// This class stores the auxiliary information for each BB in the MST. 490 struct PGOBBInfo { 491 PGOBBInfo *Group; 492 uint32_t Index; 493 uint32_t Rank = 0; 494 495 PGOBBInfo(unsigned IX) : Group(this), Index(IX) {} 496 497 /// Return the information string of this object. 498 std::string infoString() const { 499 return (Twine("Index=") + Twine(Index)).str(); 500 } 501 }; 502 503 // This class implements the CFG edges. Note the CFG can be a multi-graph. 504 template <class Edge, class BBInfo> class FuncPGOInstrumentation { 505 private: 506 Function &F; 507 508 // Is this is context-sensitive instrumentation. 509 bool IsCS; 510 511 // A map that stores the Comdat group in function F. 512 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers; 513 514 ValueProfileCollector VPC; 515 516 void computeCFGHash(); 517 void renameComdatFunction(); 518 519 public: 520 const TargetLibraryInfo &TLI; 521 std::vector<std::vector<VPCandidateInfo>> ValueSites; 522 SelectInstVisitor SIVisitor; 523 std::string FuncName; 524 std::string DeprecatedFuncName; 525 GlobalVariable *FuncNameVar; 526 527 // CFG hash value for this function. 528 uint64_t FunctionHash = 0; 529 530 // The Minimum Spanning Tree of function CFG. 531 CFGMST<Edge, BBInfo> MST; 532 533 const std::optional<BlockCoverageInference> BCI; 534 535 static std::optional<BlockCoverageInference> 536 constructBCI(Function &Func, bool HasSingleByteCoverage, 537 bool InstrumentFuncEntry) { 538 if (HasSingleByteCoverage) 539 return BlockCoverageInference(Func, InstrumentFuncEntry); 540 return {}; 541 } 542 543 // Collect all the BBs that will be instrumented, and store them in 544 // InstrumentBBs. 545 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs); 546 547 // Give an edge, find the BB that will be instrumented. 548 // Return nullptr if there is no BB to be instrumented. 549 BasicBlock *getInstrBB(Edge *E); 550 551 // Return the auxiliary BB information. 552 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); } 553 554 // Return the auxiliary BB information if available. 555 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); } 556 557 // Dump edges and BB information. 558 void dumpInfo(StringRef Str = "") const { 559 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + 560 " Hash: " + Twine(FunctionHash) + "\t" + Str); 561 } 562 563 FuncPGOInstrumentation( 564 Function &Func, TargetLibraryInfo &TLI, 565 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 566 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, 567 BlockFrequencyInfo *BFI = nullptr, bool IsCS = false, 568 bool InstrumentFuncEntry = true, bool HasSingleByteCoverage = false) 569 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI), 570 TLI(TLI), ValueSites(IPVK_Last + 1), 571 SIVisitor(Func, HasSingleByteCoverage), 572 MST(F, InstrumentFuncEntry, BPI, BFI), 573 BCI(constructBCI(Func, HasSingleByteCoverage, InstrumentFuncEntry)) { 574 if (BCI && PGOViewBlockCoverageGraph) 575 BCI->viewBlockCoverageGraph(); 576 // This should be done before CFG hash computation. 577 SIVisitor.countSelects(); 578 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize); 579 if (!IsCS) { 580 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 581 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 582 NumOfPGOBB += MST.bbInfoSize(); 583 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget); 584 } else { 585 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 586 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 587 NumOfCSPGOBB += MST.bbInfoSize(); 588 } 589 590 FuncName = getIRPGOFuncName(F); 591 DeprecatedFuncName = getPGOFuncName(F); 592 computeCFGHash(); 593 if (!ComdatMembers.empty()) 594 renameComdatFunction(); 595 LLVM_DEBUG(dumpInfo("after CFGMST")); 596 597 for (const auto &E : MST.allEdges()) { 598 if (E->Removed) 599 continue; 600 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++; 601 if (!E->InMST) 602 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++; 603 } 604 605 if (CreateGlobalVar) 606 FuncNameVar = createPGOFuncNameVar(F, FuncName); 607 } 608 }; 609 610 } // end anonymous namespace 611 612 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index 613 // value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers 614 // of selects, indirect calls, mem ops and edges. 615 template <class Edge, class BBInfo> 616 void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { 617 std::vector<uint8_t> Indexes; 618 JamCRC JC; 619 for (auto &BB : F) { 620 for (BasicBlock *Succ : successors(&BB)) { 621 auto BI = findBBInfo(Succ); 622 if (BI == nullptr) 623 continue; 624 uint32_t Index = BI->Index; 625 for (int J = 0; J < 4; J++) 626 Indexes.push_back((uint8_t)(Index >> (J * 8))); 627 } 628 } 629 JC.update(Indexes); 630 631 JamCRC JCH; 632 // The higher 32 bits. 633 auto updateJCH = [&JCH](uint64_t Num) { 634 uint8_t Data[8]; 635 support::endian::write64le(Data, Num); 636 JCH.update(Data); 637 }; 638 updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts()); 639 updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size()); 640 updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size()); 641 if (BCI) { 642 updateJCH(BCI->getInstrumentedBlocksHash()); 643 } else { 644 updateJCH((uint64_t)MST.numEdges()); 645 } 646 647 // Hash format for context sensitive profile. Reserve 4 bits for other 648 // information. 649 FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC(); 650 651 // Reserve bit 60-63 for other information purpose. 652 FunctionHash &= 0x0FFFFFFFFFFFFFFF; 653 if (IsCS) 654 NamedInstrProfRecord::setCSFlagInHash(FunctionHash); 655 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n" 656 << " CRC = " << JC.getCRC() 657 << ", Selects = " << SIVisitor.getNumOfSelectInsts() 658 << ", Edges = " << MST.numEdges() << ", ICSites = " 659 << ValueSites[IPVK_IndirectCallTarget].size() 660 << ", Memops = " << ValueSites[IPVK_MemOPSize].size() 661 << ", High32 CRC = " << JCH.getCRC() 662 << ", Hash = " << FunctionHash << "\n";); 663 664 if (PGOTraceFuncHash != "-" && F.getName().contains(PGOTraceFuncHash)) 665 dbgs() << "Funcname=" << F.getName() << ", Hash=" << FunctionHash 666 << " in building " << F.getParent()->getSourceFileName() << "\n"; 667 } 668 669 // Check if we can safely rename this Comdat function. 670 static bool canRenameComdat( 671 Function &F, 672 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 673 if (!DoComdatRenaming || !canRenameComdatFunc(F, true)) 674 return false; 675 676 // FIXME: Current only handle those Comdat groups that only containing one 677 // function. 678 // (1) For a Comdat group containing multiple functions, we need to have a 679 // unique postfix based on the hashes for each function. There is a 680 // non-trivial code refactoring to do this efficiently. 681 // (2) Variables can not be renamed, so we can not rename Comdat function in a 682 // group including global vars. 683 Comdat *C = F.getComdat(); 684 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) { 685 assert(!isa<GlobalAlias>(CM.second)); 686 Function *FM = dyn_cast<Function>(CM.second); 687 if (FM != &F) 688 return false; 689 } 690 return true; 691 } 692 693 // Append the CFGHash to the Comdat function name. 694 template <class Edge, class BBInfo> 695 void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() { 696 if (!canRenameComdat(F, ComdatMembers)) 697 return; 698 std::string OrigName = F.getName().str(); 699 std::string NewFuncName = 700 Twine(F.getName() + "." + Twine(FunctionHash)).str(); 701 F.setName(Twine(NewFuncName)); 702 GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F); 703 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str(); 704 Comdat *NewComdat; 705 Module *M = F.getParent(); 706 // For AvailableExternallyLinkage functions, change the linkage to 707 // LinkOnceODR and put them into comdat. This is because after renaming, there 708 // is no backup external copy available for the function. 709 if (!F.hasComdat()) { 710 assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage); 711 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName)); 712 F.setLinkage(GlobalValue::LinkOnceODRLinkage); 713 F.setComdat(NewComdat); 714 return; 715 } 716 717 // This function belongs to a single function Comdat group. 718 Comdat *OrigComdat = F.getComdat(); 719 std::string NewComdatName = 720 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str(); 721 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName)); 722 NewComdat->setSelectionKind(OrigComdat->getSelectionKind()); 723 724 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) { 725 // Must be a function. 726 cast<Function>(CM.second)->setComdat(NewComdat); 727 } 728 } 729 730 /// Collect all the BBs that will be instruments and add them to 731 /// `InstrumentBBs`. 732 template <class Edge, class BBInfo> 733 void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs( 734 std::vector<BasicBlock *> &InstrumentBBs) { 735 if (BCI) { 736 for (auto &BB : F) 737 if (BCI->shouldInstrumentBlock(BB)) 738 InstrumentBBs.push_back(&BB); 739 return; 740 } 741 742 // Use a worklist as we will update the vector during the iteration. 743 std::vector<Edge *> EdgeList; 744 EdgeList.reserve(MST.numEdges()); 745 for (const auto &E : MST.allEdges()) 746 EdgeList.push_back(E.get()); 747 748 for (auto &E : EdgeList) { 749 BasicBlock *InstrBB = getInstrBB(E); 750 if (InstrBB) 751 InstrumentBBs.push_back(InstrBB); 752 } 753 } 754 755 // Given a CFG E to be instrumented, find which BB to place the instrumented 756 // code. The function will split the critical edge if necessary. 757 template <class Edge, class BBInfo> 758 BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) { 759 if (E->InMST || E->Removed) 760 return nullptr; 761 762 BasicBlock *SrcBB = E->SrcBB; 763 BasicBlock *DestBB = E->DestBB; 764 // For a fake edge, instrument the real BB. 765 if (SrcBB == nullptr) 766 return DestBB; 767 if (DestBB == nullptr) 768 return SrcBB; 769 770 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * { 771 // There are basic blocks (such as catchswitch) cannot be instrumented. 772 // If the returned first insertion point is the end of BB, skip this BB. 773 if (BB->getFirstInsertionPt() == BB->end()) 774 return nullptr; 775 return BB; 776 }; 777 778 // Instrument the SrcBB if it has a single successor, 779 // otherwise, the DestBB if this is not a critical edge. 780 Instruction *TI = SrcBB->getTerminator(); 781 if (TI->getNumSuccessors() <= 1) 782 return canInstrument(SrcBB); 783 if (!E->IsCritical) 784 return canInstrument(DestBB); 785 786 // Some IndirectBr critical edges cannot be split by the previous 787 // SplitIndirectBrCriticalEdges call. Bail out. 788 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 789 BasicBlock *InstrBB = 790 isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum); 791 if (!InstrBB) { 792 LLVM_DEBUG( 793 dbgs() << "Fail to split critical edge: not instrument this edge.\n"); 794 return nullptr; 795 } 796 // For a critical edge, we have to split. Instrument the newly 797 // created BB. 798 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++; 799 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index 800 << " --> " << getBBInfo(DestBB).Index << "\n"); 801 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB. 802 MST.addEdge(SrcBB, InstrBB, 0); 803 // Second one: Add new edge of InstrBB->DestBB. 804 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0); 805 NewEdge1.InMST = true; 806 E->Removed = true; 807 808 return canInstrument(InstrBB); 809 } 810 811 // When generating value profiling calls on Windows routines that make use of 812 // handler funclets for exception processing an operand bundle needs to attached 813 // to the called function. This routine will set \p OpBundles to contain the 814 // funclet information, if any is needed, that should be placed on the generated 815 // value profiling call for the value profile candidate call. 816 static void 817 populateEHOperandBundle(VPCandidateInfo &Cand, 818 DenseMap<BasicBlock *, ColorVector> &BlockColors, 819 SmallVectorImpl<OperandBundleDef> &OpBundles) { 820 auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst); 821 if (!OrigCall) 822 return; 823 824 if (!isa<IntrinsicInst>(OrigCall)) { 825 // The instrumentation call should belong to the same funclet as a 826 // non-intrinsic call, so just copy the operand bundle, if any exists. 827 std::optional<OperandBundleUse> ParentFunclet = 828 OrigCall->getOperandBundle(LLVMContext::OB_funclet); 829 if (ParentFunclet) 830 OpBundles.emplace_back(OperandBundleDef(*ParentFunclet)); 831 } else { 832 // Intrinsics or other instructions do not get funclet information from the 833 // front-end. Need to use the BlockColors that was computed by the routine 834 // colorEHFunclets to determine whether a funclet is needed. 835 if (!BlockColors.empty()) { 836 const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second; 837 assert(CV.size() == 1 && "non-unique color for block!"); 838 Instruction *EHPad = CV.front()->getFirstNonPHI(); 839 if (EHPad->isEHPad()) 840 OpBundles.emplace_back("funclet", EHPad); 841 } 842 } 843 } 844 845 // Visit all edge and instrument the edges not in MST, and do value profiling. 846 // Critical edges will be split. 847 static void instrumentOneFunc( 848 Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI, 849 BlockFrequencyInfo *BFI, 850 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 851 bool IsCS) { 852 if (!PGOBlockCoverage) { 853 // Split indirectbr critical edges here before computing the MST rather than 854 // later in getInstrBB() to avoid invalidating it. 855 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI); 856 } 857 858 FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo( 859 F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry, 860 PGOBlockCoverage); 861 862 auto Name = FuncInfo.FuncNameVar; 863 auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()), 864 FuncInfo.FunctionHash); 865 if (PGOFunctionEntryCoverage) { 866 auto &EntryBB = F.getEntryBlock(); 867 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt()); 868 // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>, 869 // i32 <index>) 870 Builder.CreateCall( 871 Intrinsic::getDeclaration(M, Intrinsic::instrprof_cover), 872 {Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)}); 873 return; 874 } 875 876 std::vector<BasicBlock *> InstrumentBBs; 877 FuncInfo.getInstrumentBBs(InstrumentBBs); 878 unsigned NumCounters = 879 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 880 881 uint32_t I = 0; 882 if (PGOTemporalInstrumentation) { 883 NumCounters += PGOBlockCoverage ? 8 : 1; 884 auto &EntryBB = F.getEntryBlock(); 885 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt()); 886 // llvm.instrprof.timestamp(i8* <name>, i64 <hash>, i32 <num-counters>, 887 // i32 <index>) 888 Builder.CreateCall( 889 Intrinsic::getDeclaration(M, Intrinsic::instrprof_timestamp), 890 {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I)}); 891 I += PGOBlockCoverage ? 8 : 1; 892 } 893 894 for (auto *InstrBB : InstrumentBBs) { 895 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt()); 896 assert(Builder.GetInsertPoint() != InstrBB->end() && 897 "Cannot get the Instrumentation point"); 898 // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>, 899 // i32 <index>) 900 Builder.CreateCall( 901 Intrinsic::getDeclaration(M, PGOBlockCoverage 902 ? Intrinsic::instrprof_cover 903 : Intrinsic::instrprof_increment), 904 {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)}); 905 } 906 907 // Now instrument select instructions: 908 FuncInfo.SIVisitor.instrumentSelects(&I, NumCounters, FuncInfo.FuncNameVar, 909 FuncInfo.FunctionHash); 910 assert(I == NumCounters); 911 912 if (DisableValueProfiling) 913 return; 914 915 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size(); 916 917 // Intrinsic function calls do not have funclet operand bundles needed for 918 // Windows exception handling attached to them. However, if value profiling is 919 // inserted for one of these calls, then a funclet value will need to be set 920 // on the instrumentation call based on the funclet coloring. 921 DenseMap<BasicBlock *, ColorVector> BlockColors; 922 if (F.hasPersonalityFn() && 923 isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) 924 BlockColors = colorEHFunclets(F); 925 926 // For each VP Kind, walk the VP candidates and instrument each one. 927 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) { 928 unsigned SiteIndex = 0; 929 if (Kind == IPVK_MemOPSize && !PGOInstrMemOP) 930 continue; 931 932 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) { 933 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind] 934 << " site: CallSite Index = " << SiteIndex << "\n"); 935 936 IRBuilder<> Builder(Cand.InsertPt); 937 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() && 938 "Cannot get the Instrumentation point"); 939 940 Value *ToProfile = nullptr; 941 if (Cand.V->getType()->isIntegerTy()) 942 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty()); 943 else if (Cand.V->getType()->isPointerTy()) 944 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty()); 945 assert(ToProfile && "value profiling Value is of unexpected type"); 946 947 SmallVector<OperandBundleDef, 1> OpBundles; 948 populateEHOperandBundle(Cand, BlockColors, OpBundles); 949 Builder.CreateCall( 950 Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), 951 {FuncInfo.FuncNameVar, Builder.getInt64(FuncInfo.FunctionHash), 952 ToProfile, Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)}, 953 OpBundles); 954 } 955 } // IPVK_First <= Kind <= IPVK_Last 956 } 957 958 namespace { 959 960 // This class represents a CFG edge in profile use compilation. 961 struct PGOUseEdge : public PGOEdge { 962 using PGOEdge::PGOEdge; 963 964 bool CountValid = false; 965 uint64_t CountValue = 0; 966 967 // Set edge count value 968 void setEdgeCount(uint64_t Value) { 969 CountValue = Value; 970 CountValid = true; 971 } 972 973 // Return the information string for this object. 974 std::string infoString() const { 975 if (!CountValid) 976 return PGOEdge::infoString(); 977 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue)) 978 .str(); 979 } 980 }; 981 982 using DirectEdges = SmallVector<PGOUseEdge *, 2>; 983 984 // This class stores the auxiliary information for each BB. 985 struct PGOUseBBInfo : public PGOBBInfo { 986 uint64_t CountValue = 0; 987 bool CountValid; 988 int32_t UnknownCountInEdge = 0; 989 int32_t UnknownCountOutEdge = 0; 990 DirectEdges InEdges; 991 DirectEdges OutEdges; 992 993 PGOUseBBInfo(unsigned IX) : PGOBBInfo(IX), CountValid(false) {} 994 995 // Set the profile count value for this BB. 996 void setBBInfoCount(uint64_t Value) { 997 CountValue = Value; 998 CountValid = true; 999 } 1000 1001 // Return the information string of this object. 1002 std::string infoString() const { 1003 if (!CountValid) 1004 return PGOBBInfo::infoString(); 1005 return (Twine(PGOBBInfo::infoString()) + " Count=" + Twine(CountValue)) 1006 .str(); 1007 } 1008 1009 // Add an OutEdge and update the edge count. 1010 void addOutEdge(PGOUseEdge *E) { 1011 OutEdges.push_back(E); 1012 UnknownCountOutEdge++; 1013 } 1014 1015 // Add an InEdge and update the edge count. 1016 void addInEdge(PGOUseEdge *E) { 1017 InEdges.push_back(E); 1018 UnknownCountInEdge++; 1019 } 1020 }; 1021 1022 } // end anonymous namespace 1023 1024 // Sum up the count values for all the edges. 1025 static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) { 1026 uint64_t Total = 0; 1027 for (const auto &E : Edges) { 1028 if (E->Removed) 1029 continue; 1030 Total += E->CountValue; 1031 } 1032 return Total; 1033 } 1034 1035 namespace { 1036 1037 class PGOUseFunc { 1038 public: 1039 PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI, 1040 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 1041 BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin, 1042 ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry, 1043 bool HasSingleByteCoverage) 1044 : F(Func), M(Modu), BFI(BFIin), PSI(PSI), 1045 FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS, 1046 InstrumentFuncEntry, HasSingleByteCoverage), 1047 FreqAttr(FFA_Normal), IsCS(IsCS) {} 1048 1049 void handleInstrProfError(Error Err, uint64_t MismatchedFuncSum); 1050 1051 // Read counts for the instrumented BB from profile. 1052 bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, 1053 InstrProfRecord::CountPseudoKind &PseudoKind); 1054 1055 // Populate the counts for all BBs. 1056 void populateCounters(); 1057 1058 // Set block coverage based on profile coverage values. 1059 void populateCoverage(IndexedInstrProfReader *PGOReader); 1060 1061 // Set the branch weights based on the count values. 1062 void setBranchWeights(); 1063 1064 // Annotate the value profile call sites for all value kind. 1065 void annotateValueSites(); 1066 1067 // Annotate the value profile call sites for one value kind. 1068 void annotateValueSites(uint32_t Kind); 1069 1070 // Annotate the irreducible loop header weights. 1071 void annotateIrrLoopHeaderWeights(); 1072 1073 // The hotness of the function from the profile count. 1074 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot }; 1075 1076 // Return the function hotness from the profile. 1077 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; } 1078 1079 // Return the function hash. 1080 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; } 1081 1082 // Return the profile record for this function; 1083 InstrProfRecord &getProfileRecord() { return ProfileRecord; } 1084 1085 // Return the auxiliary BB information. 1086 PGOUseBBInfo &getBBInfo(const BasicBlock *BB) const { 1087 return FuncInfo.getBBInfo(BB); 1088 } 1089 1090 // Return the auxiliary BB information if available. 1091 PGOUseBBInfo *findBBInfo(const BasicBlock *BB) const { 1092 return FuncInfo.findBBInfo(BB); 1093 } 1094 1095 Function &getFunc() const { return F; } 1096 1097 void dumpInfo(StringRef Str = "") const { FuncInfo.dumpInfo(Str); } 1098 1099 uint64_t getProgramMaxCount() const { return ProgramMaxCount; } 1100 1101 private: 1102 Function &F; 1103 Module *M; 1104 BlockFrequencyInfo *BFI; 1105 ProfileSummaryInfo *PSI; 1106 1107 // This member stores the shared information with class PGOGenFunc. 1108 FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> FuncInfo; 1109 1110 // The maximum count value in the profile. This is only used in PGO use 1111 // compilation. 1112 uint64_t ProgramMaxCount; 1113 1114 // Position of counter that remains to be read. 1115 uint32_t CountPosition = 0; 1116 1117 // Total size of the profile count for this function. 1118 uint32_t ProfileCountSize = 0; 1119 1120 // ProfileRecord for this function. 1121 InstrProfRecord ProfileRecord; 1122 1123 // Function hotness info derived from profile. 1124 FuncFreqAttr FreqAttr; 1125 1126 // Is to use the context sensitive profile. 1127 bool IsCS; 1128 1129 // Find the Instrumented BB and set the value. Return false on error. 1130 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile); 1131 1132 // Set the edge counter value for the unknown edge -- there should be only 1133 // one unknown edge. 1134 void setEdgeCount(DirectEdges &Edges, uint64_t Value); 1135 1136 // Set the hot/cold inline hints based on the count values. 1137 // FIXME: This function should be removed once the functionality in 1138 // the inliner is implemented. 1139 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) { 1140 if (PSI->isHotCount(EntryCount)) 1141 FreqAttr = FFA_Hot; 1142 else if (PSI->isColdCount(MaxCount)) 1143 FreqAttr = FFA_Cold; 1144 } 1145 }; 1146 1147 } // end anonymous namespace 1148 1149 /// Set up InEdges/OutEdges for all BBs in the MST. 1150 static void setupBBInfoEdges( 1151 const FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> &FuncInfo) { 1152 // This is not required when there is block coverage inference. 1153 if (FuncInfo.BCI) 1154 return; 1155 for (const auto &E : FuncInfo.MST.allEdges()) { 1156 if (E->Removed) 1157 continue; 1158 const BasicBlock *SrcBB = E->SrcBB; 1159 const BasicBlock *DestBB = E->DestBB; 1160 PGOUseBBInfo &SrcInfo = FuncInfo.getBBInfo(SrcBB); 1161 PGOUseBBInfo &DestInfo = FuncInfo.getBBInfo(DestBB); 1162 SrcInfo.addOutEdge(E.get()); 1163 DestInfo.addInEdge(E.get()); 1164 } 1165 } 1166 1167 // Visit all the edges and assign the count value for the instrumented 1168 // edges and the BB. Return false on error. 1169 bool PGOUseFunc::setInstrumentedCounts( 1170 const std::vector<uint64_t> &CountFromProfile) { 1171 1172 std::vector<BasicBlock *> InstrumentBBs; 1173 FuncInfo.getInstrumentBBs(InstrumentBBs); 1174 1175 setupBBInfoEdges(FuncInfo); 1176 1177 unsigned NumCounters = 1178 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 1179 // The number of counters here should match the number of counters 1180 // in profile. Return if they mismatch. 1181 if (NumCounters != CountFromProfile.size()) { 1182 return false; 1183 } 1184 auto *FuncEntry = &*F.begin(); 1185 1186 // Set the profile count to the Instrumented BBs. 1187 uint32_t I = 0; 1188 for (BasicBlock *InstrBB : InstrumentBBs) { 1189 uint64_t CountValue = CountFromProfile[I++]; 1190 PGOUseBBInfo &Info = getBBInfo(InstrBB); 1191 // If we reach here, we know that we have some nonzero count 1192 // values in this function. The entry count should not be 0. 1193 // Fix it if necessary. 1194 if (InstrBB == FuncEntry && CountValue == 0) 1195 CountValue = 1; 1196 Info.setBBInfoCount(CountValue); 1197 } 1198 ProfileCountSize = CountFromProfile.size(); 1199 CountPosition = I; 1200 1201 // Set the edge count and update the count of unknown edges for BBs. 1202 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void { 1203 E->setEdgeCount(Value); 1204 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1205 this->getBBInfo(E->DestBB).UnknownCountInEdge--; 1206 }; 1207 1208 // Set the profile count the Instrumented edges. There are BBs that not in 1209 // MST but not instrumented. Need to set the edge count value so that we can 1210 // populate the profile counts later. 1211 for (const auto &E : FuncInfo.MST.allEdges()) { 1212 if (E->Removed || E->InMST) 1213 continue; 1214 const BasicBlock *SrcBB = E->SrcBB; 1215 PGOUseBBInfo &SrcInfo = getBBInfo(SrcBB); 1216 1217 // If only one out-edge, the edge profile count should be the same as BB 1218 // profile count. 1219 if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1) 1220 setEdgeCount(E.get(), SrcInfo.CountValue); 1221 else { 1222 const BasicBlock *DestBB = E->DestBB; 1223 PGOUseBBInfo &DestInfo = getBBInfo(DestBB); 1224 // If only one in-edge, the edge profile count should be the same as BB 1225 // profile count. 1226 if (DestInfo.CountValid && DestInfo.InEdges.size() == 1) 1227 setEdgeCount(E.get(), DestInfo.CountValue); 1228 } 1229 if (E->CountValid) 1230 continue; 1231 // E's count should have been set from profile. If not, this meenas E skips 1232 // the instrumentation. We set the count to 0. 1233 setEdgeCount(E.get(), 0); 1234 } 1235 return true; 1236 } 1237 1238 // Set the count value for the unknown edge. There should be one and only one 1239 // unknown edge in Edges vector. 1240 void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) { 1241 for (auto &E : Edges) { 1242 if (E->CountValid) 1243 continue; 1244 E->setEdgeCount(Value); 1245 1246 getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1247 getBBInfo(E->DestBB).UnknownCountInEdge--; 1248 return; 1249 } 1250 llvm_unreachable("Cannot find the unknown count edge"); 1251 } 1252 1253 // Emit function metadata indicating PGO profile mismatch. 1254 static void annotateFunctionWithHashMismatch(Function &F, LLVMContext &ctx) { 1255 const char MetadataName[] = "instr_prof_hash_mismatch"; 1256 SmallVector<Metadata *, 2> Names; 1257 // If this metadata already exists, ignore. 1258 auto *Existing = F.getMetadata(LLVMContext::MD_annotation); 1259 if (Existing) { 1260 MDTuple *Tuple = cast<MDTuple>(Existing); 1261 for (const auto &N : Tuple->operands()) { 1262 if (N.equalsStr(MetadataName)) 1263 return; 1264 Names.push_back(N.get()); 1265 } 1266 } 1267 1268 MDBuilder MDB(ctx); 1269 Names.push_back(MDB.createString(MetadataName)); 1270 MDNode *MD = MDTuple::get(ctx, Names); 1271 F.setMetadata(LLVMContext::MD_annotation, MD); 1272 } 1273 1274 void PGOUseFunc::handleInstrProfError(Error Err, uint64_t MismatchedFuncSum) { 1275 handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) { 1276 auto &Ctx = M->getContext(); 1277 auto Err = IPE.get(); 1278 bool SkipWarning = false; 1279 LLVM_DEBUG(dbgs() << "Error in reading profile for Func " 1280 << FuncInfo.FuncName << ": "); 1281 if (Err == instrprof_error::unknown_function) { 1282 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++; 1283 SkipWarning = !PGOWarnMissing; 1284 LLVM_DEBUG(dbgs() << "unknown function"); 1285 } else if (Err == instrprof_error::hash_mismatch || 1286 Err == instrprof_error::malformed) { 1287 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++; 1288 SkipWarning = 1289 NoPGOWarnMismatch || 1290 (NoPGOWarnMismatchComdatWeak && 1291 (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage || 1292 F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); 1293 LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash 1294 << " skip=" << SkipWarning << ")"); 1295 // Emit function metadata indicating PGO profile mismatch. 1296 annotateFunctionWithHashMismatch(F, M->getContext()); 1297 } 1298 1299 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n"); 1300 if (SkipWarning) 1301 return; 1302 1303 std::string Msg = 1304 IPE.message() + std::string(" ") + F.getName().str() + 1305 std::string(" Hash = ") + std::to_string(FuncInfo.FunctionHash) + 1306 std::string(" up to ") + std::to_string(MismatchedFuncSum) + 1307 std::string(" count discarded"); 1308 1309 Ctx.diagnose( 1310 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); 1311 }); 1312 } 1313 1314 // Read the profile from ProfileFileName and assign the value to the 1315 // instrumented BB and the edges. This function also updates ProgramMaxCount. 1316 // Return true if the profile are successfully read, and false on errors. 1317 bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, 1318 InstrProfRecord::CountPseudoKind &PseudoKind) { 1319 auto &Ctx = M->getContext(); 1320 uint64_t MismatchedFuncSum = 0; 1321 Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord( 1322 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName, 1323 &MismatchedFuncSum); 1324 if (Error E = Result.takeError()) { 1325 handleInstrProfError(std::move(E), MismatchedFuncSum); 1326 return false; 1327 } 1328 ProfileRecord = std::move(Result.get()); 1329 PseudoKind = ProfileRecord.getCountPseudoKind(); 1330 if (PseudoKind != InstrProfRecord::NotPseudo) { 1331 return true; 1332 } 1333 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts; 1334 1335 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++; 1336 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n"); 1337 1338 uint64_t ValueSum = 0; 1339 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { 1340 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n"); 1341 ValueSum += CountFromProfile[I]; 1342 } 1343 AllZeros = (ValueSum == 0); 1344 1345 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n"); 1346 1347 getBBInfo(nullptr).UnknownCountOutEdge = 2; 1348 getBBInfo(nullptr).UnknownCountInEdge = 2; 1349 1350 if (!setInstrumentedCounts(CountFromProfile)) { 1351 LLVM_DEBUG( 1352 dbgs() << "Inconsistent number of counts, skipping this function"); 1353 Ctx.diagnose(DiagnosticInfoPGOProfile( 1354 M->getName().data(), 1355 Twine("Inconsistent number of counts in ") + F.getName().str() + 1356 Twine(": the profile may be stale or there is a function name " 1357 "collision."), 1358 DS_Warning)); 1359 return false; 1360 } 1361 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS); 1362 return true; 1363 } 1364 1365 void PGOUseFunc::populateCoverage(IndexedInstrProfReader *PGOReader) { 1366 uint64_t MismatchedFuncSum = 0; 1367 Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord( 1368 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName, 1369 &MismatchedFuncSum); 1370 if (auto Err = Result.takeError()) { 1371 handleInstrProfError(std::move(Err), MismatchedFuncSum); 1372 return; 1373 } 1374 1375 std::vector<uint64_t> &CountsFromProfile = Result.get().Counts; 1376 DenseMap<const BasicBlock *, bool> Coverage; 1377 unsigned Index = 0; 1378 for (auto &BB : F) 1379 if (FuncInfo.BCI->shouldInstrumentBlock(BB)) 1380 Coverage[&BB] = (CountsFromProfile[Index++] != 0); 1381 assert(Index == CountsFromProfile.size()); 1382 1383 // For each B in InverseDependencies[A], if A is covered then B is covered. 1384 DenseMap<const BasicBlock *, DenseSet<const BasicBlock *>> 1385 InverseDependencies; 1386 for (auto &BB : F) { 1387 for (auto *Dep : FuncInfo.BCI->getDependencies(BB)) { 1388 // If Dep is covered then BB is covered. 1389 InverseDependencies[Dep].insert(&BB); 1390 } 1391 } 1392 1393 // Infer coverage of the non-instrumented blocks using a flood-fill algorithm. 1394 std::stack<const BasicBlock *> CoveredBlocksToProcess; 1395 for (auto &[BB, IsCovered] : Coverage) 1396 if (IsCovered) 1397 CoveredBlocksToProcess.push(BB); 1398 1399 while (!CoveredBlocksToProcess.empty()) { 1400 auto *CoveredBlock = CoveredBlocksToProcess.top(); 1401 assert(Coverage[CoveredBlock]); 1402 CoveredBlocksToProcess.pop(); 1403 for (auto *BB : InverseDependencies[CoveredBlock]) { 1404 // If CoveredBlock is covered then BB is covered. 1405 if (Coverage[BB]) 1406 continue; 1407 Coverage[BB] = true; 1408 CoveredBlocksToProcess.push(BB); 1409 } 1410 } 1411 1412 // Annotate block coverage. 1413 MDBuilder MDB(F.getContext()); 1414 // We set the entry count to 10000 if the entry block is covered so that BFI 1415 // can propagate a fraction of this count to the other covered blocks. 1416 F.setEntryCount(Coverage[&F.getEntryBlock()] ? 10000 : 0); 1417 for (auto &BB : F) { 1418 // For a block A and its successor B, we set the edge weight as follows: 1419 // If A is covered and B is covered, set weight=1. 1420 // If A is covered and B is uncovered, set weight=0. 1421 // If A is uncovered, set weight=1. 1422 // This setup will allow BFI to give nonzero profile counts to only covered 1423 // blocks. 1424 SmallVector<uint32_t, 4> Weights; 1425 for (auto *Succ : successors(&BB)) 1426 Weights.push_back((Coverage[Succ] || !Coverage[&BB]) ? 1 : 0); 1427 if (Weights.size() >= 2) 1428 llvm::setBranchWeights(*BB.getTerminator(), Weights); 1429 } 1430 1431 unsigned NumCorruptCoverage = 0; 1432 DominatorTree DT(F); 1433 LoopInfo LI(DT); 1434 BranchProbabilityInfo BPI(F, LI); 1435 BlockFrequencyInfo BFI(F, BPI, LI); 1436 auto IsBlockDead = [&](const BasicBlock &BB) -> std::optional<bool> { 1437 if (auto C = BFI.getBlockProfileCount(&BB)) 1438 return C == 0; 1439 return {}; 1440 }; 1441 LLVM_DEBUG(dbgs() << "Block Coverage: (Instrumented=*, Covered=X)\n"); 1442 for (auto &BB : F) { 1443 LLVM_DEBUG(dbgs() << (FuncInfo.BCI->shouldInstrumentBlock(BB) ? "* " : " ") 1444 << (Coverage[&BB] ? "X " : " ") << " " << BB.getName() 1445 << "\n"); 1446 // In some cases it is possible to find a covered block that has no covered 1447 // successors, e.g., when a block calls a function that may call exit(). In 1448 // those cases, BFI could find its successor to be covered while BCI could 1449 // find its successor to be dead. 1450 if (Coverage[&BB] == IsBlockDead(BB).value_or(false)) { 1451 LLVM_DEBUG( 1452 dbgs() << "Found inconsistent block covearge for " << BB.getName() 1453 << ": BCI=" << (Coverage[&BB] ? "Covered" : "Dead") << " BFI=" 1454 << (IsBlockDead(BB).value() ? "Dead" : "Covered") << "\n"); 1455 ++NumCorruptCoverage; 1456 } 1457 if (Coverage[&BB]) 1458 ++NumCoveredBlocks; 1459 } 1460 if (PGOVerifyBFI && NumCorruptCoverage) { 1461 auto &Ctx = M->getContext(); 1462 Ctx.diagnose(DiagnosticInfoPGOProfile( 1463 M->getName().data(), 1464 Twine("Found inconsistent block coverage for function ") + F.getName() + 1465 " in " + Twine(NumCorruptCoverage) + " blocks.", 1466 DS_Warning)); 1467 } 1468 if (PGOViewBlockCoverageGraph) 1469 FuncInfo.BCI->viewBlockCoverageGraph(&Coverage); 1470 } 1471 1472 // Populate the counters from instrumented BBs to all BBs. 1473 // In the end of this operation, all BBs should have a valid count value. 1474 void PGOUseFunc::populateCounters() { 1475 bool Changes = true; 1476 unsigned NumPasses = 0; 1477 while (Changes) { 1478 NumPasses++; 1479 Changes = false; 1480 1481 // For efficient traversal, it's better to start from the end as most 1482 // of the instrumented edges are at the end. 1483 for (auto &BB : reverse(F)) { 1484 PGOUseBBInfo *Count = findBBInfo(&BB); 1485 if (Count == nullptr) 1486 continue; 1487 if (!Count->CountValid) { 1488 if (Count->UnknownCountOutEdge == 0) { 1489 Count->CountValue = sumEdgeCount(Count->OutEdges); 1490 Count->CountValid = true; 1491 Changes = true; 1492 } else if (Count->UnknownCountInEdge == 0) { 1493 Count->CountValue = sumEdgeCount(Count->InEdges); 1494 Count->CountValid = true; 1495 Changes = true; 1496 } 1497 } 1498 if (Count->CountValid) { 1499 if (Count->UnknownCountOutEdge == 1) { 1500 uint64_t Total = 0; 1501 uint64_t OutSum = sumEdgeCount(Count->OutEdges); 1502 // If the one of the successor block can early terminate (no-return), 1503 // we can end up with situation where out edge sum count is larger as 1504 // the source BB's count is collected by a post-dominated block. 1505 if (Count->CountValue > OutSum) 1506 Total = Count->CountValue - OutSum; 1507 setEdgeCount(Count->OutEdges, Total); 1508 Changes = true; 1509 } 1510 if (Count->UnknownCountInEdge == 1) { 1511 uint64_t Total = 0; 1512 uint64_t InSum = sumEdgeCount(Count->InEdges); 1513 if (Count->CountValue > InSum) 1514 Total = Count->CountValue - InSum; 1515 setEdgeCount(Count->InEdges, Total); 1516 Changes = true; 1517 } 1518 } 1519 } 1520 } 1521 1522 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n"); 1523 (void)NumPasses; 1524 #ifndef NDEBUG 1525 // Assert every BB has a valid counter. 1526 for (auto &BB : F) { 1527 auto BI = findBBInfo(&BB); 1528 if (BI == nullptr) 1529 continue; 1530 assert(BI->CountValid && "BB count is not valid"); 1531 } 1532 #endif 1533 uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue; 1534 uint64_t FuncMaxCount = FuncEntryCount; 1535 for (auto &BB : F) { 1536 auto BI = findBBInfo(&BB); 1537 if (BI == nullptr) 1538 continue; 1539 FuncMaxCount = std::max(FuncMaxCount, BI->CountValue); 1540 } 1541 1542 // Fix the obviously inconsistent entry count. 1543 if (FuncMaxCount > 0 && FuncEntryCount == 0) 1544 FuncEntryCount = 1; 1545 F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real)); 1546 markFunctionAttributes(FuncEntryCount, FuncMaxCount); 1547 1548 // Now annotate select instructions 1549 FuncInfo.SIVisitor.annotateSelects(this, &CountPosition); 1550 assert(CountPosition == ProfileCountSize); 1551 1552 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile.")); 1553 } 1554 1555 // Assign the scaled count values to the BB with multiple out edges. 1556 void PGOUseFunc::setBranchWeights() { 1557 // Generate MD_prof metadata for every branch instruction. 1558 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName() 1559 << " IsCS=" << IsCS << "\n"); 1560 for (auto &BB : F) { 1561 Instruction *TI = BB.getTerminator(); 1562 if (TI->getNumSuccessors() < 2) 1563 continue; 1564 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || 1565 isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI) || 1566 isa<CallBrInst>(TI))) 1567 continue; 1568 1569 if (getBBInfo(&BB).CountValue == 0) 1570 continue; 1571 1572 // We have a non-zero Branch BB. 1573 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB); 1574 unsigned Size = BBCountInfo.OutEdges.size(); 1575 SmallVector<uint64_t, 2> EdgeCounts(Size, 0); 1576 uint64_t MaxCount = 0; 1577 for (unsigned s = 0; s < Size; s++) { 1578 const PGOUseEdge *E = BBCountInfo.OutEdges[s]; 1579 const BasicBlock *SrcBB = E->SrcBB; 1580 const BasicBlock *DestBB = E->DestBB; 1581 if (DestBB == nullptr) 1582 continue; 1583 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 1584 uint64_t EdgeCount = E->CountValue; 1585 if (EdgeCount > MaxCount) 1586 MaxCount = EdgeCount; 1587 EdgeCounts[SuccNum] = EdgeCount; 1588 } 1589 1590 if (MaxCount) 1591 setProfMetadata(M, TI, EdgeCounts, MaxCount); 1592 else { 1593 // A zero MaxCount can come about when we have a BB with a positive 1594 // count, and whose successor blocks all have 0 count. This can happen 1595 // when there is no exit block and the code exits via a noreturn function. 1596 auto &Ctx = M->getContext(); 1597 Ctx.diagnose(DiagnosticInfoPGOProfile( 1598 M->getName().data(), 1599 Twine("Profile in ") + F.getName().str() + 1600 Twine(" partially ignored") + 1601 Twine(", possibly due to the lack of a return path."), 1602 DS_Warning)); 1603 } 1604 } 1605 } 1606 1607 static bool isIndirectBrTarget(BasicBlock *BB) { 1608 for (BasicBlock *Pred : predecessors(BB)) { 1609 if (isa<IndirectBrInst>(Pred->getTerminator())) 1610 return true; 1611 } 1612 return false; 1613 } 1614 1615 void PGOUseFunc::annotateIrrLoopHeaderWeights() { 1616 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n"); 1617 // Find irr loop headers 1618 for (auto &BB : F) { 1619 // As a heuristic also annotate indrectbr targets as they have a high chance 1620 // to become an irreducible loop header after the indirectbr tail 1621 // duplication. 1622 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) { 1623 Instruction *TI = BB.getTerminator(); 1624 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB); 1625 setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue); 1626 } 1627 } 1628 } 1629 1630 void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) { 1631 Module *M = F.getParent(); 1632 IRBuilder<> Builder(&SI); 1633 Type *Int64Ty = Builder.getInt64Ty(); 1634 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty); 1635 Builder.CreateCall( 1636 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step), 1637 {FuncNameVar, Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs), 1638 Builder.getInt32(*CurCtrIdx), Step}); 1639 ++(*CurCtrIdx); 1640 } 1641 1642 void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) { 1643 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts; 1644 assert(*CurCtrIdx < CountFromProfile.size() && 1645 "Out of bound access of counters"); 1646 uint64_t SCounts[2]; 1647 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count 1648 ++(*CurCtrIdx); 1649 uint64_t TotalCount = 0; 1650 auto BI = UseFunc->findBBInfo(SI.getParent()); 1651 if (BI != nullptr) 1652 TotalCount = BI->CountValue; 1653 // False Count 1654 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0); 1655 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]); 1656 if (MaxCount) 1657 setProfMetadata(F.getParent(), &SI, SCounts, MaxCount); 1658 } 1659 1660 void SelectInstVisitor::visitSelectInst(SelectInst &SI) { 1661 if (!PGOInstrSelect || PGOFunctionEntryCoverage || HasSingleByteCoverage) 1662 return; 1663 // FIXME: do not handle this yet. 1664 if (SI.getCondition()->getType()->isVectorTy()) 1665 return; 1666 1667 switch (Mode) { 1668 case VM_counting: 1669 NSIs++; 1670 return; 1671 case VM_instrument: 1672 instrumentOneSelectInst(SI); 1673 return; 1674 case VM_annotate: 1675 annotateOneSelectInst(SI); 1676 return; 1677 } 1678 1679 llvm_unreachable("Unknown visiting mode"); 1680 } 1681 1682 // Traverse all valuesites and annotate the instructions for all value kind. 1683 void PGOUseFunc::annotateValueSites() { 1684 if (DisableValueProfiling) 1685 return; 1686 1687 // Create the PGOFuncName meta data. 1688 createPGOFuncNameMetadata(F, FuncInfo.FuncName); 1689 1690 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 1691 annotateValueSites(Kind); 1692 } 1693 1694 // Annotate the instructions for a specific value kind. 1695 void PGOUseFunc::annotateValueSites(uint32_t Kind) { 1696 assert(Kind <= IPVK_Last); 1697 unsigned ValueSiteIndex = 0; 1698 auto &ValueSites = FuncInfo.ValueSites[Kind]; 1699 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind); 1700 if (NumValueSites != ValueSites.size()) { 1701 auto &Ctx = M->getContext(); 1702 Ctx.diagnose(DiagnosticInfoPGOProfile( 1703 M->getName().data(), 1704 Twine("Inconsistent number of value sites for ") + 1705 Twine(ValueProfKindDescr[Kind]) + Twine(" profiling in \"") + 1706 F.getName().str() + 1707 Twine("\", possibly due to the use of a stale profile."), 1708 DS_Warning)); 1709 return; 1710 } 1711 1712 for (VPCandidateInfo &I : ValueSites) { 1713 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind 1714 << "): Index = " << ValueSiteIndex << " out of " 1715 << NumValueSites << "\n"); 1716 annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord, 1717 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex, 1718 Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations 1719 : MaxNumAnnotations); 1720 ValueSiteIndex++; 1721 } 1722 } 1723 1724 // Collect the set of members for each Comdat in module M and store 1725 // in ComdatMembers. 1726 static void collectComdatMembers( 1727 Module &M, 1728 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 1729 if (!DoComdatRenaming) 1730 return; 1731 for (Function &F : M) 1732 if (Comdat *C = F.getComdat()) 1733 ComdatMembers.insert(std::make_pair(C, &F)); 1734 for (GlobalVariable &GV : M.globals()) 1735 if (Comdat *C = GV.getComdat()) 1736 ComdatMembers.insert(std::make_pair(C, &GV)); 1737 for (GlobalAlias &GA : M.aliases()) 1738 if (Comdat *C = GA.getComdat()) 1739 ComdatMembers.insert(std::make_pair(C, &GA)); 1740 } 1741 1742 // Return true if we should not find instrumentation data for this function 1743 static bool skipPGOUse(const Function &F) { 1744 if (F.isDeclaration()) 1745 return true; 1746 // If there are too many critical edges, PGO might cause 1747 // compiler time problem. Skip PGO if the number of 1748 // critical edges execeed the threshold. 1749 unsigned NumCriticalEdges = 0; 1750 for (auto &BB : F) { 1751 const Instruction *TI = BB.getTerminator(); 1752 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { 1753 if (isCriticalEdge(TI, I)) 1754 NumCriticalEdges++; 1755 } 1756 } 1757 if (NumCriticalEdges > PGOFunctionCriticalEdgeThreshold) { 1758 LLVM_DEBUG(dbgs() << "In func " << F.getName() 1759 << ", NumCriticalEdges=" << NumCriticalEdges 1760 << " exceed the threshold. Skip PGO.\n"); 1761 return true; 1762 } 1763 return false; 1764 } 1765 1766 // Return true if we should not instrument this function 1767 static bool skipPGOGen(const Function &F) { 1768 if (skipPGOUse(F)) 1769 return true; 1770 if (F.hasFnAttribute(llvm::Attribute::Naked)) 1771 return true; 1772 if (F.hasFnAttribute(llvm::Attribute::NoProfile)) 1773 return true; 1774 if (F.hasFnAttribute(llvm::Attribute::SkipProfile)) 1775 return true; 1776 if (F.getInstructionCount() < PGOFunctionSizeThreshold) 1777 return true; 1778 return false; 1779 } 1780 1781 static bool InstrumentAllFunctions( 1782 Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI, 1783 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1784 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) { 1785 // For the context-sensitve instrumentation, we should have a separated pass 1786 // (before LTO/ThinLTO linking) to create these variables. 1787 if (!IsCS) 1788 createIRLevelProfileFlagVar(M, /*IsCS=*/false); 1789 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1790 collectComdatMembers(M, ComdatMembers); 1791 1792 for (auto &F : M) { 1793 if (skipPGOGen(F)) 1794 continue; 1795 auto &TLI = LookupTLI(F); 1796 auto *BPI = LookupBPI(F); 1797 auto *BFI = LookupBFI(F); 1798 instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS); 1799 } 1800 return true; 1801 } 1802 1803 PreservedAnalyses 1804 PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &MAM) { 1805 createProfileFileNameVar(M, CSInstrName); 1806 // The variable in a comdat may be discarded by LTO. Ensure the declaration 1807 // will be retained. 1808 appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true)); 1809 PreservedAnalyses PA; 1810 PA.preserve<FunctionAnalysisManagerModuleProxy>(); 1811 PA.preserveSet<AllAnalysesOn<Function>>(); 1812 return PA; 1813 } 1814 1815 PreservedAnalyses PGOInstrumentationGen::run(Module &M, 1816 ModuleAnalysisManager &MAM) { 1817 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1818 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 1819 return FAM.getResult<TargetLibraryAnalysis>(F); 1820 }; 1821 auto LookupBPI = [&FAM](Function &F) { 1822 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1823 }; 1824 auto LookupBFI = [&FAM](Function &F) { 1825 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1826 }; 1827 1828 if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS)) 1829 return PreservedAnalyses::all(); 1830 1831 return PreservedAnalyses::none(); 1832 } 1833 1834 // Using the ratio b/w sums of profile count values and BFI count values to 1835 // adjust the func entry count. 1836 static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, 1837 BranchProbabilityInfo &NBPI) { 1838 Function &F = Func.getFunc(); 1839 BlockFrequencyInfo NBFI(F, NBPI, LI); 1840 #ifndef NDEBUG 1841 auto BFIEntryCount = F.getEntryCount(); 1842 assert(BFIEntryCount && (BFIEntryCount->getCount() > 0) && 1843 "Invalid BFI Entrycount"); 1844 #endif 1845 auto SumCount = APFloat::getZero(APFloat::IEEEdouble()); 1846 auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble()); 1847 for (auto &BBI : F) { 1848 uint64_t CountValue = 0; 1849 uint64_t BFICountValue = 0; 1850 if (!Func.findBBInfo(&BBI)) 1851 continue; 1852 auto BFICount = NBFI.getBlockProfileCount(&BBI); 1853 CountValue = Func.getBBInfo(&BBI).CountValue; 1854 BFICountValue = *BFICount; 1855 SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven); 1856 SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven); 1857 } 1858 if (SumCount.isZero()) 1859 return; 1860 1861 assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan && 1862 "Incorrect sum of BFI counts"); 1863 if (SumBFICount.compare(SumCount) == APFloat::cmpEqual) 1864 return; 1865 double Scale = (SumCount / SumBFICount).convertToDouble(); 1866 if (Scale < 1.001 && Scale > 0.999) 1867 return; 1868 1869 uint64_t FuncEntryCount = Func.getBBInfo(&*F.begin()).CountValue; 1870 uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale; 1871 if (NewEntryCount == 0) 1872 NewEntryCount = 1; 1873 if (NewEntryCount != FuncEntryCount) { 1874 F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real)); 1875 LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName() 1876 << ", entry_count " << FuncEntryCount << " --> " 1877 << NewEntryCount << "\n"); 1878 } 1879 } 1880 1881 // Compare the profile count values with BFI count values, and print out 1882 // the non-matching ones. 1883 static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, 1884 BranchProbabilityInfo &NBPI, 1885 uint64_t HotCountThreshold, 1886 uint64_t ColdCountThreshold) { 1887 Function &F = Func.getFunc(); 1888 BlockFrequencyInfo NBFI(F, NBPI, LI); 1889 // bool PrintFunc = false; 1890 bool HotBBOnly = PGOVerifyHotBFI; 1891 StringRef Msg; 1892 OptimizationRemarkEmitter ORE(&F); 1893 1894 unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0; 1895 for (auto &BBI : F) { 1896 uint64_t CountValue = 0; 1897 uint64_t BFICountValue = 0; 1898 1899 if (Func.getBBInfo(&BBI).CountValid) 1900 CountValue = Func.getBBInfo(&BBI).CountValue; 1901 1902 BBNum++; 1903 if (CountValue) 1904 NonZeroBBNum++; 1905 auto BFICount = NBFI.getBlockProfileCount(&BBI); 1906 if (BFICount) 1907 BFICountValue = *BFICount; 1908 1909 if (HotBBOnly) { 1910 bool rawIsHot = CountValue >= HotCountThreshold; 1911 bool BFIIsHot = BFICountValue >= HotCountThreshold; 1912 bool rawIsCold = CountValue <= ColdCountThreshold; 1913 bool ShowCount = false; 1914 if (rawIsHot && !BFIIsHot) { 1915 Msg = "raw-Hot to BFI-nonHot"; 1916 ShowCount = true; 1917 } else if (rawIsCold && BFIIsHot) { 1918 Msg = "raw-Cold to BFI-Hot"; 1919 ShowCount = true; 1920 } 1921 if (!ShowCount) 1922 continue; 1923 } else { 1924 if ((CountValue < PGOVerifyBFICutoff) && 1925 (BFICountValue < PGOVerifyBFICutoff)) 1926 continue; 1927 uint64_t Diff = (BFICountValue >= CountValue) 1928 ? BFICountValue - CountValue 1929 : CountValue - BFICountValue; 1930 if (Diff <= CountValue / 100 * PGOVerifyBFIRatio) 1931 continue; 1932 } 1933 BBMisMatchNum++; 1934 1935 ORE.emit([&]() { 1936 OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "bfi-verify", 1937 F.getSubprogram(), &BBI); 1938 Remark << "BB " << ore::NV("Block", BBI.getName()) 1939 << " Count=" << ore::NV("Count", CountValue) 1940 << " BFI_Count=" << ore::NV("Count", BFICountValue); 1941 if (!Msg.empty()) 1942 Remark << " (" << Msg << ")"; 1943 return Remark; 1944 }); 1945 } 1946 if (BBMisMatchNum) 1947 ORE.emit([&]() { 1948 return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify", 1949 F.getSubprogram(), &F.getEntryBlock()) 1950 << "In Func " << ore::NV("Function", F.getName()) 1951 << ": Num_of_BB=" << ore::NV("Count", BBNum) 1952 << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum) 1953 << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum); 1954 }); 1955 } 1956 1957 static bool annotateAllFunctions( 1958 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, 1959 vfs::FileSystem &FS, 1960 function_ref<TargetLibraryInfo &(Function &)> LookupTLI, 1961 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1962 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, 1963 ProfileSummaryInfo *PSI, bool IsCS) { 1964 LLVM_DEBUG(dbgs() << "Read in profile counters: "); 1965 auto &Ctx = M.getContext(); 1966 // Read the counter array from file. 1967 auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName, FS, 1968 ProfileRemappingFileName); 1969 if (Error E = ReaderOrErr.takeError()) { 1970 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { 1971 Ctx.diagnose( 1972 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message())); 1973 }); 1974 return false; 1975 } 1976 1977 std::unique_ptr<IndexedInstrProfReader> PGOReader = 1978 std::move(ReaderOrErr.get()); 1979 if (!PGOReader) { 1980 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(), 1981 StringRef("Cannot get PGOReader"))); 1982 return false; 1983 } 1984 if (!PGOReader->hasCSIRLevelProfile() && IsCS) 1985 return false; 1986 1987 // TODO: might need to change the warning once the clang option is finalized. 1988 if (!PGOReader->isIRLevelProfile()) { 1989 Ctx.diagnose(DiagnosticInfoPGOProfile( 1990 ProfileFileName.data(), "Not an IR level instrumentation profile")); 1991 return false; 1992 } 1993 if (PGOReader->functionEntryOnly()) { 1994 Ctx.diagnose(DiagnosticInfoPGOProfile( 1995 ProfileFileName.data(), 1996 "Function entry profiles are not yet supported for optimization")); 1997 return false; 1998 } 1999 2000 // Add the profile summary (read from the header of the indexed summary) here 2001 // so that we can use it below when reading counters (which checks if the 2002 // function should be marked with a cold or inlinehint attribute). 2003 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()), 2004 IsCS ? ProfileSummary::PSK_CSInstr 2005 : ProfileSummary::PSK_Instr); 2006 PSI->refresh(); 2007 2008 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 2009 collectComdatMembers(M, ComdatMembers); 2010 std::vector<Function *> HotFunctions; 2011 std::vector<Function *> ColdFunctions; 2012 2013 // If the profile marked as always instrument the entry BB, do the 2014 // same. Note this can be overwritten by the internal option in CFGMST.h 2015 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled(); 2016 if (PGOInstrumentEntry.getNumOccurrences() > 0) 2017 InstrumentFuncEntry = PGOInstrumentEntry; 2018 bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage(); 2019 for (auto &F : M) { 2020 if (skipPGOUse(F)) 2021 continue; 2022 auto &TLI = LookupTLI(F); 2023 auto *BPI = LookupBPI(F); 2024 auto *BFI = LookupBFI(F); 2025 if (!HasSingleByteCoverage) { 2026 // Split indirectbr critical edges here before computing the MST rather 2027 // than later in getInstrBB() to avoid invalidating it. 2028 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, 2029 BFI); 2030 } 2031 PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS, 2032 InstrumentFuncEntry, HasSingleByteCoverage); 2033 if (HasSingleByteCoverage) { 2034 Func.populateCoverage(PGOReader.get()); 2035 continue; 2036 } 2037 // When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo, 2038 // it means the profile for the function is unrepresentative and this 2039 // function is actually hot / warm. We will reset the function hot / cold 2040 // attribute and drop all the profile counters. 2041 InstrProfRecord::CountPseudoKind PseudoKind = InstrProfRecord::NotPseudo; 2042 bool AllZeros = false; 2043 if (!Func.readCounters(PGOReader.get(), AllZeros, PseudoKind)) 2044 continue; 2045 if (AllZeros) { 2046 F.setEntryCount(ProfileCount(0, Function::PCT_Real)); 2047 if (Func.getProgramMaxCount() != 0) 2048 ColdFunctions.push_back(&F); 2049 continue; 2050 } 2051 if (PseudoKind != InstrProfRecord::NotPseudo) { 2052 // Clear function attribute cold. 2053 if (F.hasFnAttribute(Attribute::Cold)) 2054 F.removeFnAttr(Attribute::Cold); 2055 // Set function attribute as hot. 2056 if (PseudoKind == InstrProfRecord::PseudoHot) 2057 F.addFnAttr(Attribute::Hot); 2058 continue; 2059 } 2060 Func.populateCounters(); 2061 Func.setBranchWeights(); 2062 Func.annotateValueSites(); 2063 Func.annotateIrrLoopHeaderWeights(); 2064 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr(); 2065 if (FreqAttr == PGOUseFunc::FFA_Cold) 2066 ColdFunctions.push_back(&F); 2067 else if (FreqAttr == PGOUseFunc::FFA_Hot) 2068 HotFunctions.push_back(&F); 2069 if (PGOViewCounts != PGOVCT_None && 2070 (ViewBlockFreqFuncName.empty() || 2071 F.getName().equals(ViewBlockFreqFuncName))) { 2072 LoopInfo LI{DominatorTree(F)}; 2073 std::unique_ptr<BranchProbabilityInfo> NewBPI = 2074 std::make_unique<BranchProbabilityInfo>(F, LI); 2075 std::unique_ptr<BlockFrequencyInfo> NewBFI = 2076 std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI); 2077 if (PGOViewCounts == PGOVCT_Graph) 2078 NewBFI->view(); 2079 else if (PGOViewCounts == PGOVCT_Text) { 2080 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n"; 2081 NewBFI->print(dbgs()); 2082 } 2083 } 2084 if (PGOViewRawCounts != PGOVCT_None && 2085 (ViewBlockFreqFuncName.empty() || 2086 F.getName().equals(ViewBlockFreqFuncName))) { 2087 if (PGOViewRawCounts == PGOVCT_Graph) 2088 if (ViewBlockFreqFuncName.empty()) 2089 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 2090 else 2091 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 2092 else if (PGOViewRawCounts == PGOVCT_Text) { 2093 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n"; 2094 Func.dumpInfo(); 2095 } 2096 } 2097 2098 if (PGOVerifyBFI || PGOVerifyHotBFI || PGOFixEntryCount) { 2099 LoopInfo LI{DominatorTree(F)}; 2100 BranchProbabilityInfo NBPI(F, LI); 2101 2102 // Fix func entry count. 2103 if (PGOFixEntryCount) 2104 fixFuncEntryCount(Func, LI, NBPI); 2105 2106 // Verify BlockFrequency information. 2107 uint64_t HotCountThreshold = 0, ColdCountThreshold = 0; 2108 if (PGOVerifyHotBFI) { 2109 HotCountThreshold = PSI->getOrCompHotCountThreshold(); 2110 ColdCountThreshold = PSI->getOrCompColdCountThreshold(); 2111 } 2112 verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold); 2113 } 2114 } 2115 2116 // Set function hotness attribute from the profile. 2117 // We have to apply these attributes at the end because their presence 2118 // can affect the BranchProbabilityInfo of any callers, resulting in an 2119 // inconsistent MST between prof-gen and prof-use. 2120 for (auto &F : HotFunctions) { 2121 F->addFnAttr(Attribute::InlineHint); 2122 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName() 2123 << "\n"); 2124 } 2125 for (auto &F : ColdFunctions) { 2126 // Only set when there is no Attribute::Hot set by the user. For Hot 2127 // attribute, user's annotation has the precedence over the profile. 2128 if (F->hasFnAttribute(Attribute::Hot)) { 2129 auto &Ctx = M.getContext(); 2130 std::string Msg = std::string("Function ") + F->getName().str() + 2131 std::string(" is annotated as a hot function but" 2132 " the profile is cold"); 2133 Ctx.diagnose( 2134 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning)); 2135 continue; 2136 } 2137 F->addFnAttr(Attribute::Cold); 2138 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() 2139 << "\n"); 2140 } 2141 return true; 2142 } 2143 2144 PGOInstrumentationUse::PGOInstrumentationUse( 2145 std::string Filename, std::string RemappingFilename, bool IsCS, 2146 IntrusiveRefCntPtr<vfs::FileSystem> VFS) 2147 : ProfileFileName(std::move(Filename)), 2148 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS), 2149 FS(std::move(VFS)) { 2150 if (!PGOTestProfileFile.empty()) 2151 ProfileFileName = PGOTestProfileFile; 2152 if (!PGOTestProfileRemappingFile.empty()) 2153 ProfileRemappingFileName = PGOTestProfileRemappingFile; 2154 if (!FS) 2155 FS = vfs::getRealFileSystem(); 2156 } 2157 2158 PreservedAnalyses PGOInstrumentationUse::run(Module &M, 2159 ModuleAnalysisManager &MAM) { 2160 2161 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 2162 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 2163 return FAM.getResult<TargetLibraryAnalysis>(F); 2164 }; 2165 auto LookupBPI = [&FAM](Function &F) { 2166 return &FAM.getResult<BranchProbabilityAnalysis>(F); 2167 }; 2168 auto LookupBFI = [&FAM](Function &F) { 2169 return &FAM.getResult<BlockFrequencyAnalysis>(F); 2170 }; 2171 2172 auto *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M); 2173 2174 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, *FS, 2175 LookupTLI, LookupBPI, LookupBFI, PSI, IsCS)) 2176 return PreservedAnalyses::all(); 2177 2178 return PreservedAnalyses::none(); 2179 } 2180 2181 static std::string getSimpleNodeName(const BasicBlock *Node) { 2182 if (!Node->getName().empty()) 2183 return Node->getName().str(); 2184 2185 std::string SimpleNodeName; 2186 raw_string_ostream OS(SimpleNodeName); 2187 Node->printAsOperand(OS, false); 2188 return OS.str(); 2189 } 2190 2191 void llvm::setProfMetadata(Module *M, Instruction *TI, 2192 ArrayRef<uint64_t> EdgeCounts, uint64_t MaxCount) { 2193 assert(MaxCount > 0 && "Bad max count"); 2194 uint64_t Scale = calculateCountScale(MaxCount); 2195 SmallVector<unsigned, 4> Weights; 2196 for (const auto &ECI : EdgeCounts) 2197 Weights.push_back(scaleBranchCount(ECI, Scale)); 2198 2199 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W 2200 : Weights) { 2201 dbgs() << W << " "; 2202 } dbgs() << "\n";); 2203 2204 misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false); 2205 2206 setBranchWeights(*TI, Weights); 2207 if (EmitBranchProbability) { 2208 std::string BrCondStr = getBranchCondString(TI); 2209 if (BrCondStr.empty()) 2210 return; 2211 2212 uint64_t WSum = 2213 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0, 2214 [](uint64_t w1, uint64_t w2) { return w1 + w2; }); 2215 uint64_t TotalCount = 2216 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0, 2217 [](uint64_t c1, uint64_t c2) { return c1 + c2; }); 2218 Scale = calculateCountScale(WSum); 2219 BranchProbability BP(scaleBranchCount(Weights[0], Scale), 2220 scaleBranchCount(WSum, Scale)); 2221 std::string BranchProbStr; 2222 raw_string_ostream OS(BranchProbStr); 2223 OS << BP; 2224 OS << " (total count : " << TotalCount << ")"; 2225 OS.flush(); 2226 Function *F = TI->getParent()->getParent(); 2227 OptimizationRemarkEmitter ORE(F); 2228 ORE.emit([&]() { 2229 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI) 2230 << BrCondStr << " is true with probability : " << BranchProbStr; 2231 }); 2232 } 2233 } 2234 2235 namespace llvm { 2236 2237 void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) { 2238 MDBuilder MDB(M->getContext()); 2239 TI->setMetadata(llvm::LLVMContext::MD_irr_loop, 2240 MDB.createIrrLoopHeaderWeight(Count)); 2241 } 2242 2243 template <> struct GraphTraits<PGOUseFunc *> { 2244 using NodeRef = const BasicBlock *; 2245 using ChildIteratorType = const_succ_iterator; 2246 using nodes_iterator = pointer_iterator<Function::const_iterator>; 2247 2248 static NodeRef getEntryNode(const PGOUseFunc *G) { 2249 return &G->getFunc().front(); 2250 } 2251 2252 static ChildIteratorType child_begin(const NodeRef N) { 2253 return succ_begin(N); 2254 } 2255 2256 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); } 2257 2258 static nodes_iterator nodes_begin(const PGOUseFunc *G) { 2259 return nodes_iterator(G->getFunc().begin()); 2260 } 2261 2262 static nodes_iterator nodes_end(const PGOUseFunc *G) { 2263 return nodes_iterator(G->getFunc().end()); 2264 } 2265 }; 2266 2267 template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits { 2268 explicit DOTGraphTraits(bool isSimple = false) 2269 : DefaultDOTGraphTraits(isSimple) {} 2270 2271 static std::string getGraphName(const PGOUseFunc *G) { 2272 return std::string(G->getFunc().getName()); 2273 } 2274 2275 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) { 2276 std::string Result; 2277 raw_string_ostream OS(Result); 2278 2279 OS << getSimpleNodeName(Node) << ":\\l"; 2280 PGOUseBBInfo *BI = Graph->findBBInfo(Node); 2281 OS << "Count : "; 2282 if (BI && BI->CountValid) 2283 OS << BI->CountValue << "\\l"; 2284 else 2285 OS << "Unknown\\l"; 2286 2287 if (!PGOInstrSelect) 2288 return Result; 2289 2290 for (const Instruction &I : *Node) { 2291 if (!isa<SelectInst>(&I)) 2292 continue; 2293 // Display scaled counts for SELECT instruction: 2294 OS << "SELECT : { T = "; 2295 uint64_t TC, FC; 2296 bool HasProf = extractBranchWeights(I, TC, FC); 2297 if (!HasProf) 2298 OS << "Unknown, F = Unknown }\\l"; 2299 else 2300 OS << TC << ", F = " << FC << " }\\l"; 2301 } 2302 return Result; 2303 } 2304 }; 2305 2306 } // end namespace llvm 2307