1 //===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements PGO instrumentation using a minimum spanning tree based 10 // on the following paper: 11 // [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points 12 // for program frequency counts. BIT Numerical Mathematics 1973, Volume 13, 13 // Issue 3, pp 313-322 14 // The idea of the algorithm based on the fact that for each node (except for 15 // the entry and exit), the sum of incoming edge counts equals the sum of 16 // outgoing edge counts. The count of edge on spanning tree can be derived from 17 // those edges not on the spanning tree. Knuth proves this method instruments 18 // the minimum number of edges. 19 // 20 // The minimal spanning tree here is actually a maximum weight tree -- on-tree 21 // edges have higher frequencies (more likely to execute). The idea is to 22 // instrument those less frequently executed edges to reduce the runtime 23 // overhead of instrumented binaries. 24 // 25 // This file contains two passes: 26 // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge 27 // count profile, and generates the instrumentation for indirect call 28 // profiling. 29 // (2) Pass PGOInstrumentationUse which reads the edge count profile and 30 // annotates the branch weights. It also reads the indirect call value 31 // profiling records and annotate the indirect call instructions. 32 // 33 // To get the precise counter information, These two passes need to invoke at 34 // the same compilation point (so they see the same IR). For pass 35 // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For 36 // pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and 37 // the profile is opened in module level and passed to each PGOUseFunc instance. 38 // The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put 39 // in class FuncPGOInstrumentation. 40 // 41 // Class PGOEdge represents a CFG edge and some auxiliary information. Class 42 // BBInfo contains auxiliary information for each BB. These two classes are used 43 // in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived 44 // class of PGOEdge and BBInfo, respectively. They contains extra data structure 45 // used in populating profile counters. 46 // The MST implementation is in Class CFGMST (CFGMST.h). 47 // 48 //===----------------------------------------------------------------------===// 49 50 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" 51 #include "ValueProfileCollector.h" 52 #include "llvm/ADT/APInt.h" 53 #include "llvm/ADT/ArrayRef.h" 54 #include "llvm/ADT/STLExtras.h" 55 #include "llvm/ADT/SmallVector.h" 56 #include "llvm/ADT/Statistic.h" 57 #include "llvm/ADT/StringRef.h" 58 #include "llvm/ADT/Twine.h" 59 #include "llvm/ADT/iterator.h" 60 #include "llvm/ADT/iterator_range.h" 61 #include "llvm/Analysis/BlockFrequencyInfo.h" 62 #include "llvm/Analysis/BranchProbabilityInfo.h" 63 #include "llvm/Analysis/CFG.h" 64 #include "llvm/Analysis/LoopInfo.h" 65 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 66 #include "llvm/Analysis/ProfileSummaryInfo.h" 67 #include "llvm/Analysis/TargetLibraryInfo.h" 68 #include "llvm/IR/Attributes.h" 69 #include "llvm/IR/BasicBlock.h" 70 #include "llvm/IR/CFG.h" 71 #include "llvm/IR/Comdat.h" 72 #include "llvm/IR/Constant.h" 73 #include "llvm/IR/Constants.h" 74 #include "llvm/IR/DiagnosticInfo.h" 75 #include "llvm/IR/Dominators.h" 76 #include "llvm/IR/EHPersonalities.h" 77 #include "llvm/IR/Function.h" 78 #include "llvm/IR/GlobalAlias.h" 79 #include "llvm/IR/GlobalValue.h" 80 #include "llvm/IR/GlobalVariable.h" 81 #include "llvm/IR/IRBuilder.h" 82 #include "llvm/IR/InstVisitor.h" 83 #include "llvm/IR/InstrTypes.h" 84 #include "llvm/IR/Instruction.h" 85 #include "llvm/IR/Instructions.h" 86 #include "llvm/IR/IntrinsicInst.h" 87 #include "llvm/IR/Intrinsics.h" 88 #include "llvm/IR/LLVMContext.h" 89 #include "llvm/IR/MDBuilder.h" 90 #include "llvm/IR/Module.h" 91 #include "llvm/IR/PassManager.h" 92 #include "llvm/IR/ProfDataUtils.h" 93 #include "llvm/IR/ProfileSummary.h" 94 #include "llvm/IR/Type.h" 95 #include "llvm/IR/Value.h" 96 #include "llvm/ProfileData/InstrProf.h" 97 #include "llvm/ProfileData/InstrProfReader.h" 98 #include "llvm/Support/BranchProbability.h" 99 #include "llvm/Support/CRC.h" 100 #include "llvm/Support/Casting.h" 101 #include "llvm/Support/CommandLine.h" 102 #include "llvm/Support/DOTGraphTraits.h" 103 #include "llvm/Support/Debug.h" 104 #include "llvm/Support/Error.h" 105 #include "llvm/Support/ErrorHandling.h" 106 #include "llvm/Support/GraphWriter.h" 107 #include "llvm/Support/VirtualFileSystem.h" 108 #include "llvm/Support/raw_ostream.h" 109 #include "llvm/TargetParser/Triple.h" 110 #include "llvm/Transforms/Instrumentation.h" 111 #include "llvm/Transforms/Instrumentation/BlockCoverageInference.h" 112 #include "llvm/Transforms/Instrumentation/CFGMST.h" 113 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 114 #include "llvm/Transforms/Utils/MisExpect.h" 115 #include "llvm/Transforms/Utils/ModuleUtils.h" 116 #include <algorithm> 117 #include <cassert> 118 #include <cstdint> 119 #include <memory> 120 #include <numeric> 121 #include <optional> 122 #include <string> 123 #include <unordered_map> 124 #include <utility> 125 #include <vector> 126 127 using namespace llvm; 128 using ProfileCount = Function::ProfileCount; 129 using VPCandidateInfo = ValueProfileCollector::CandidateInfo; 130 131 #define DEBUG_TYPE "pgo-instrumentation" 132 133 STATISTIC(NumOfPGOInstrument, "Number of edges instrumented."); 134 STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented."); 135 STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented."); 136 STATISTIC(NumOfPGOEdge, "Number of edges."); 137 STATISTIC(NumOfPGOBB, "Number of basic-blocks."); 138 STATISTIC(NumOfPGOSplit, "Number of critical edge splits."); 139 STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts."); 140 STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile."); 141 STATISTIC(NumOfPGOMissing, "Number of functions without profile."); 142 STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations."); 143 STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO."); 144 STATISTIC(NumOfCSPGOSelectInsts, 145 "Number of select instruction instrumented in CSPGO."); 146 STATISTIC(NumOfCSPGOMemIntrinsics, 147 "Number of mem intrinsics instrumented in CSPGO."); 148 STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO."); 149 STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO."); 150 STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO."); 151 STATISTIC(NumOfCSPGOFunc, 152 "Number of functions having valid profile counts in CSPGO."); 153 STATISTIC(NumOfCSPGOMismatch, 154 "Number of functions having mismatch profile in CSPGO."); 155 STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO."); 156 STATISTIC(NumCoveredBlocks, "Number of basic blocks that were executed"); 157 158 // Command line option to specify the file to read profile from. This is 159 // mainly used for testing. 160 static cl::opt<std::string> 161 PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, 162 cl::value_desc("filename"), 163 cl::desc("Specify the path of profile data file. This is" 164 "mainly for test purpose.")); 165 static cl::opt<std::string> PGOTestProfileRemappingFile( 166 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, 167 cl::value_desc("filename"), 168 cl::desc("Specify the path of profile remapping file. This is mainly for " 169 "test purpose.")); 170 171 // Command line option to disable value profiling. The default is false: 172 // i.e. value profiling is enabled by default. This is for debug purpose. 173 static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false), 174 cl::Hidden, 175 cl::desc("Disable Value Profiling")); 176 177 // Command line option to set the maximum number of VP annotations to write to 178 // the metadata for a single indirect call callsite. 179 static cl::opt<unsigned> MaxNumAnnotations( 180 "icp-max-annotations", cl::init(3), cl::Hidden, 181 cl::desc("Max number of annotations for a single indirect " 182 "call callsite")); 183 184 // Command line option to set the maximum number of value annotations 185 // to write to the metadata for a single memop intrinsic. 186 static cl::opt<unsigned> MaxNumMemOPAnnotations( 187 "memop-max-annotations", cl::init(4), cl::Hidden, 188 cl::desc("Max number of preicise value annotations for a single memop" 189 "intrinsic")); 190 191 // Command line option to control appending FunctionHash to the name of a COMDAT 192 // function. This is to avoid the hash mismatch caused by the preinliner. 193 static cl::opt<bool> DoComdatRenaming( 194 "do-comdat-renaming", cl::init(false), cl::Hidden, 195 cl::desc("Append function hash to the name of COMDAT function to avoid " 196 "function hash mismatch due to the preinliner")); 197 198 namespace llvm { 199 // Command line option to enable/disable the warning about missing profile 200 // information. 201 cl::opt<bool> PGOWarnMissing("pgo-warn-missing-function", cl::init(false), 202 cl::Hidden, 203 cl::desc("Use this option to turn on/off " 204 "warnings about missing profile data for " 205 "functions.")); 206 207 // Command line option to enable/disable the warning about a hash mismatch in 208 // the profile data. 209 cl::opt<bool> 210 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden, 211 cl::desc("Use this option to turn off/on " 212 "warnings about profile cfg mismatch.")); 213 214 // Command line option to enable/disable the warning about a hash mismatch in 215 // the profile data for Comdat functions, which often turns out to be false 216 // positive due to the pre-instrumentation inline. 217 cl::opt<bool> NoPGOWarnMismatchComdatWeak( 218 "no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden, 219 cl::desc("The option is used to turn on/off " 220 "warnings about hash mismatch for comdat " 221 "or weak functions.")); 222 } // namespace llvm 223 224 // Command line option to enable/disable select instruction instrumentation. 225 static cl::opt<bool> 226 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, 227 cl::desc("Use this option to turn on/off SELECT " 228 "instruction instrumentation. ")); 229 230 // Command line option to turn on CFG dot or text dump of raw profile counts 231 static cl::opt<PGOViewCountsType> PGOViewRawCounts( 232 "pgo-view-raw-counts", cl::Hidden, 233 cl::desc("A boolean option to show CFG dag or text " 234 "with raw profile counts from " 235 "profile data. See also option " 236 "-pgo-view-counts. To limit graph " 237 "display to only one function, use " 238 "filtering option -view-bfi-func-name."), 239 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), 240 clEnumValN(PGOVCT_Graph, "graph", "show a graph."), 241 clEnumValN(PGOVCT_Text, "text", "show in text."))); 242 243 // Command line option to enable/disable memop intrinsic call.size profiling. 244 static cl::opt<bool> 245 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, 246 cl::desc("Use this option to turn on/off " 247 "memory intrinsic size profiling.")); 248 249 // Emit branch probability as optimization remarks. 250 static cl::opt<bool> 251 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, 252 cl::desc("When this option is on, the annotated " 253 "branch probability will be emitted as " 254 "optimization remarks: -{Rpass|" 255 "pass-remarks}=pgo-instrumentation")); 256 257 static cl::opt<bool> PGOInstrumentEntry( 258 "pgo-instrument-entry", cl::init(false), cl::Hidden, 259 cl::desc("Force to instrument function entry basicblock.")); 260 261 static cl::opt<bool> PGOFunctionEntryCoverage( 262 "pgo-function-entry-coverage", cl::Hidden, 263 cl::desc( 264 "Use this option to enable function entry coverage instrumentation.")); 265 266 static cl::opt<bool> PGOBlockCoverage( 267 "pgo-block-coverage", 268 cl::desc("Use this option to enable basic block coverage instrumentation")); 269 270 static cl::opt<bool> 271 PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph", 272 cl::desc("Create a dot file of CFGs with block " 273 "coverage inference information")); 274 275 static cl::opt<bool> PGOTemporalInstrumentation( 276 "pgo-temporal-instrumentation", 277 cl::desc("Use this option to enable temporal instrumentation")); 278 279 static cl::opt<bool> 280 PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, 281 cl::desc("Fix function entry count in profile use.")); 282 283 static cl::opt<bool> PGOVerifyHotBFI( 284 "pgo-verify-hot-bfi", cl::init(false), cl::Hidden, 285 cl::desc("Print out the non-match BFI count if a hot raw profile count " 286 "becomes non-hot, or a cold raw profile count becomes hot. " 287 "The print is enabled under -Rpass-analysis=pgo, or " 288 "internal option -pass-remakrs-analysis=pgo.")); 289 290 static cl::opt<bool> PGOVerifyBFI( 291 "pgo-verify-bfi", cl::init(false), cl::Hidden, 292 cl::desc("Print out mismatched BFI counts after setting profile metadata " 293 "The print is enabled under -Rpass-analysis=pgo, or " 294 "internal option -pass-remakrs-analysis=pgo.")); 295 296 static cl::opt<unsigned> PGOVerifyBFIRatio( 297 "pgo-verify-bfi-ratio", cl::init(2), cl::Hidden, 298 cl::desc("Set the threshold for pgo-verify-bfi: only print out " 299 "mismatched BFI if the difference percentage is greater than " 300 "this value (in percentage).")); 301 302 static cl::opt<unsigned> PGOVerifyBFICutoff( 303 "pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden, 304 cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " 305 "profile count value is below.")); 306 307 static cl::opt<std::string> PGOTraceFuncHash( 308 "pgo-trace-func-hash", cl::init("-"), cl::Hidden, 309 cl::value_desc("function name"), 310 cl::desc("Trace the hash of the function with this name.")); 311 312 static cl::opt<unsigned> PGOFunctionSizeThreshold( 313 "pgo-function-size-threshold", cl::Hidden, 314 cl::desc("Do not instrument functions smaller than this threshold.")); 315 316 static cl::opt<unsigned> PGOFunctionCriticalEdgeThreshold( 317 "pgo-critical-edge-threshold", cl::init(20000), cl::Hidden, 318 cl::desc("Do not instrument functions with the number of critical edges " 319 " greater than this threshold.")); 320 321 namespace llvm { 322 // Command line option to turn on CFG dot dump after profile annotation. 323 // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts 324 extern cl::opt<PGOViewCountsType> PGOViewCounts; 325 326 // Command line option to specify the name of the function for CFG dump 327 // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= 328 extern cl::opt<std::string> ViewBlockFreqFuncName; 329 330 extern cl::opt<bool> DebugInfoCorrelate; 331 } // namespace llvm 332 333 static cl::opt<bool> 334 PGOOldCFGHashing("pgo-instr-old-cfg-hashing", cl::init(false), cl::Hidden, 335 cl::desc("Use the old CFG function hashing")); 336 337 // Return a string describing the branch condition that can be 338 // used in static branch probability heuristics: 339 static std::string getBranchCondString(Instruction *TI) { 340 BranchInst *BI = dyn_cast<BranchInst>(TI); 341 if (!BI || !BI->isConditional()) 342 return std::string(); 343 344 Value *Cond = BI->getCondition(); 345 ICmpInst *CI = dyn_cast<ICmpInst>(Cond); 346 if (!CI) 347 return std::string(); 348 349 std::string result; 350 raw_string_ostream OS(result); 351 OS << CI->getPredicate() << "_"; 352 CI->getOperand(0)->getType()->print(OS, true); 353 354 Value *RHS = CI->getOperand(1); 355 ConstantInt *CV = dyn_cast<ConstantInt>(RHS); 356 if (CV) { 357 if (CV->isZero()) 358 OS << "_Zero"; 359 else if (CV->isOne()) 360 OS << "_One"; 361 else if (CV->isMinusOne()) 362 OS << "_MinusOne"; 363 else 364 OS << "_Const"; 365 } 366 OS.flush(); 367 return result; 368 } 369 370 static const char *ValueProfKindDescr[] = { 371 #define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr, 372 #include "llvm/ProfileData/InstrProfData.inc" 373 }; 374 375 // Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime 376 // aware this is an ir_level profile so it can set the version flag. 377 static GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS) { 378 const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)); 379 Type *IntTy64 = Type::getInt64Ty(M.getContext()); 380 uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF); 381 if (IsCS) 382 ProfileVersion |= VARIANT_MASK_CSIR_PROF; 383 if (PGOInstrumentEntry) 384 ProfileVersion |= VARIANT_MASK_INSTR_ENTRY; 385 if (DebugInfoCorrelate) 386 ProfileVersion |= VARIANT_MASK_DBG_CORRELATE; 387 if (PGOFunctionEntryCoverage) 388 ProfileVersion |= 389 VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY; 390 if (PGOBlockCoverage) 391 ProfileVersion |= VARIANT_MASK_BYTE_COVERAGE; 392 if (PGOTemporalInstrumentation) 393 ProfileVersion |= VARIANT_MASK_TEMPORAL_PROF; 394 auto IRLevelVersionVariable = new GlobalVariable( 395 M, IntTy64, true, GlobalValue::WeakAnyLinkage, 396 Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName); 397 IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility); 398 Triple TT(M.getTargetTriple()); 399 if (TT.supportsCOMDAT()) { 400 IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage); 401 IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName)); 402 } 403 return IRLevelVersionVariable; 404 } 405 406 namespace { 407 408 /// The select instruction visitor plays three roles specified 409 /// by the mode. In \c VM_counting mode, it simply counts the number of 410 /// select instructions. In \c VM_instrument mode, it inserts code to count 411 /// the number times TrueValue of select is taken. In \c VM_annotate mode, 412 /// it reads the profile data and annotate the select instruction with metadata. 413 enum VisitMode { VM_counting, VM_instrument, VM_annotate }; 414 class PGOUseFunc; 415 416 /// Instruction Visitor class to visit select instructions. 417 struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> { 418 Function &F; 419 unsigned NSIs = 0; // Number of select instructions instrumented. 420 VisitMode Mode = VM_counting; // Visiting mode. 421 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index. 422 unsigned TotalNumCtrs = 0; // Total number of counters 423 GlobalVariable *FuncNameVar = nullptr; 424 uint64_t FuncHash = 0; 425 PGOUseFunc *UseFunc = nullptr; 426 bool HasSingleByteCoverage; 427 428 SelectInstVisitor(Function &Func, bool HasSingleByteCoverage) 429 : F(Func), HasSingleByteCoverage(HasSingleByteCoverage) {} 430 431 void countSelects() { 432 NSIs = 0; 433 Mode = VM_counting; 434 visit(F); 435 } 436 437 // Visit the IR stream and instrument all select instructions. \p 438 // Ind is a pointer to the counter index variable; \p TotalNC 439 // is the total number of counters; \p FNV is the pointer to the 440 // PGO function name var; \p FHash is the function hash. 441 void instrumentSelects(unsigned *Ind, unsigned TotalNC, GlobalVariable *FNV, 442 uint64_t FHash) { 443 Mode = VM_instrument; 444 CurCtrIdx = Ind; 445 TotalNumCtrs = TotalNC; 446 FuncHash = FHash; 447 FuncNameVar = FNV; 448 visit(F); 449 } 450 451 // Visit the IR stream and annotate all select instructions. 452 void annotateSelects(PGOUseFunc *UF, unsigned *Ind) { 453 Mode = VM_annotate; 454 UseFunc = UF; 455 CurCtrIdx = Ind; 456 visit(F); 457 } 458 459 void instrumentOneSelectInst(SelectInst &SI); 460 void annotateOneSelectInst(SelectInst &SI); 461 462 // Visit \p SI instruction and perform tasks according to visit mode. 463 void visitSelectInst(SelectInst &SI); 464 465 // Return the number of select instructions. This needs be called after 466 // countSelects(). 467 unsigned getNumOfSelectInsts() const { return NSIs; } 468 }; 469 470 /// This class implements the CFG edges for the Minimum Spanning Tree (MST) 471 /// based instrumentation. 472 /// Note that the CFG can be a multi-graph. So there might be multiple edges 473 /// with the same SrcBB and DestBB. 474 struct PGOEdge { 475 BasicBlock *SrcBB; 476 BasicBlock *DestBB; 477 uint64_t Weight; 478 bool InMST = false; 479 bool Removed = false; 480 bool IsCritical = false; 481 482 PGOEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W = 1) 483 : SrcBB(Src), DestBB(Dest), Weight(W) {} 484 485 /// Return the information string of an edge. 486 std::string infoString() const { 487 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") + 488 (IsCritical ? "c" : " ") + " W=" + Twine(Weight)) 489 .str(); 490 } 491 }; 492 493 /// This class stores the auxiliary information for each BB in the MST. 494 struct PGOBBInfo { 495 PGOBBInfo *Group; 496 uint32_t Index; 497 uint32_t Rank = 0; 498 499 PGOBBInfo(unsigned IX) : Group(this), Index(IX) {} 500 501 /// Return the information string of this object. 502 std::string infoString() const { 503 return (Twine("Index=") + Twine(Index)).str(); 504 } 505 }; 506 507 // This class implements the CFG edges. Note the CFG can be a multi-graph. 508 template <class Edge, class BBInfo> class FuncPGOInstrumentation { 509 private: 510 Function &F; 511 512 // Is this is context-sensitive instrumentation. 513 bool IsCS; 514 515 // A map that stores the Comdat group in function F. 516 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers; 517 518 ValueProfileCollector VPC; 519 520 void computeCFGHash(); 521 void renameComdatFunction(); 522 523 public: 524 const TargetLibraryInfo &TLI; 525 std::vector<std::vector<VPCandidateInfo>> ValueSites; 526 SelectInstVisitor SIVisitor; 527 std::string FuncName; 528 GlobalVariable *FuncNameVar; 529 530 // CFG hash value for this function. 531 uint64_t FunctionHash = 0; 532 533 // The Minimum Spanning Tree of function CFG. 534 CFGMST<Edge, BBInfo> MST; 535 536 const std::optional<BlockCoverageInference> BCI; 537 538 static std::optional<BlockCoverageInference> 539 constructBCI(Function &Func, bool HasSingleByteCoverage, 540 bool InstrumentFuncEntry) { 541 if (HasSingleByteCoverage) 542 return BlockCoverageInference(Func, InstrumentFuncEntry); 543 return {}; 544 } 545 546 // Collect all the BBs that will be instrumented, and store them in 547 // InstrumentBBs. 548 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs); 549 550 // Give an edge, find the BB that will be instrumented. 551 // Return nullptr if there is no BB to be instrumented. 552 BasicBlock *getInstrBB(Edge *E); 553 554 // Return the auxiliary BB information. 555 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); } 556 557 // Return the auxiliary BB information if available. 558 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); } 559 560 // Dump edges and BB information. 561 void dumpInfo(StringRef Str = "") const { 562 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + 563 " Hash: " + Twine(FunctionHash) + "\t" + Str); 564 } 565 566 FuncPGOInstrumentation( 567 Function &Func, TargetLibraryInfo &TLI, 568 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 569 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, 570 BlockFrequencyInfo *BFI = nullptr, bool IsCS = false, 571 bool InstrumentFuncEntry = true, bool HasSingleByteCoverage = false) 572 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI), 573 TLI(TLI), ValueSites(IPVK_Last + 1), 574 SIVisitor(Func, HasSingleByteCoverage), 575 MST(F, InstrumentFuncEntry, BPI, BFI), 576 BCI(constructBCI(Func, HasSingleByteCoverage, InstrumentFuncEntry)) { 577 if (BCI && PGOViewBlockCoverageGraph) 578 BCI->viewBlockCoverageGraph(); 579 // This should be done before CFG hash computation. 580 SIVisitor.countSelects(); 581 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize); 582 if (!IsCS) { 583 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 584 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 585 NumOfPGOBB += MST.BBInfos.size(); 586 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget); 587 } else { 588 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 589 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 590 NumOfCSPGOBB += MST.BBInfos.size(); 591 } 592 593 FuncName = getPGOFuncName(F); 594 computeCFGHash(); 595 if (!ComdatMembers.empty()) 596 renameComdatFunction(); 597 LLVM_DEBUG(dumpInfo("after CFGMST")); 598 599 for (auto &E : MST.AllEdges) { 600 if (E->Removed) 601 continue; 602 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++; 603 if (!E->InMST) 604 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++; 605 } 606 607 if (CreateGlobalVar) 608 FuncNameVar = createPGOFuncNameVar(F, FuncName); 609 } 610 }; 611 612 } // end anonymous namespace 613 614 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index 615 // value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers 616 // of selects, indirect calls, mem ops and edges. 617 template <class Edge, class BBInfo> 618 void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { 619 std::vector<uint8_t> Indexes; 620 JamCRC JC; 621 for (auto &BB : F) { 622 const Instruction *TI = BB.getTerminator(); 623 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { 624 BasicBlock *Succ = TI->getSuccessor(I); 625 auto BI = findBBInfo(Succ); 626 if (BI == nullptr) 627 continue; 628 uint32_t Index = BI->Index; 629 for (int J = 0; J < 4; J++) 630 Indexes.push_back((uint8_t)(Index >> (J * 8))); 631 } 632 } 633 JC.update(Indexes); 634 635 JamCRC JCH; 636 if (PGOOldCFGHashing) { 637 // Hash format for context sensitive profile. Reserve 4 bits for other 638 // information. 639 FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 | 640 (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 | 641 //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 | 642 (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC(); 643 } else { 644 // The higher 32 bits. 645 auto updateJCH = [&JCH](uint64_t Num) { 646 uint8_t Data[8]; 647 support::endian::write64le(Data, Num); 648 JCH.update(Data); 649 }; 650 updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts()); 651 updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size()); 652 updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size()); 653 if (BCI) { 654 updateJCH(BCI->getInstrumentedBlocksHash()); 655 } else { 656 updateJCH((uint64_t)MST.AllEdges.size()); 657 } 658 659 // Hash format for context sensitive profile. Reserve 4 bits for other 660 // information. 661 FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC(); 662 } 663 664 // Reserve bit 60-63 for other information purpose. 665 FunctionHash &= 0x0FFFFFFFFFFFFFFF; 666 if (IsCS) 667 NamedInstrProfRecord::setCSFlagInHash(FunctionHash); 668 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n" 669 << " CRC = " << JC.getCRC() 670 << ", Selects = " << SIVisitor.getNumOfSelectInsts() 671 << ", Edges = " << MST.AllEdges.size() << ", ICSites = " 672 << ValueSites[IPVK_IndirectCallTarget].size()); 673 if (!PGOOldCFGHashing) { 674 LLVM_DEBUG(dbgs() << ", Memops = " << ValueSites[IPVK_MemOPSize].size() 675 << ", High32 CRC = " << JCH.getCRC()); 676 } 677 LLVM_DEBUG(dbgs() << ", Hash = " << FunctionHash << "\n";); 678 679 if (PGOTraceFuncHash != "-" && F.getName().contains(PGOTraceFuncHash)) 680 dbgs() << "Funcname=" << F.getName() << ", Hash=" << FunctionHash 681 << " in building " << F.getParent()->getSourceFileName() << "\n"; 682 } 683 684 // Check if we can safely rename this Comdat function. 685 static bool canRenameComdat( 686 Function &F, 687 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 688 if (!DoComdatRenaming || !canRenameComdatFunc(F, true)) 689 return false; 690 691 // FIXME: Current only handle those Comdat groups that only containing one 692 // function. 693 // (1) For a Comdat group containing multiple functions, we need to have a 694 // unique postfix based on the hashes for each function. There is a 695 // non-trivial code refactoring to do this efficiently. 696 // (2) Variables can not be renamed, so we can not rename Comdat function in a 697 // group including global vars. 698 Comdat *C = F.getComdat(); 699 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) { 700 assert(!isa<GlobalAlias>(CM.second)); 701 Function *FM = dyn_cast<Function>(CM.second); 702 if (FM != &F) 703 return false; 704 } 705 return true; 706 } 707 708 // Append the CFGHash to the Comdat function name. 709 template <class Edge, class BBInfo> 710 void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() { 711 if (!canRenameComdat(F, ComdatMembers)) 712 return; 713 std::string OrigName = F.getName().str(); 714 std::string NewFuncName = 715 Twine(F.getName() + "." + Twine(FunctionHash)).str(); 716 F.setName(Twine(NewFuncName)); 717 GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F); 718 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str(); 719 Comdat *NewComdat; 720 Module *M = F.getParent(); 721 // For AvailableExternallyLinkage functions, change the linkage to 722 // LinkOnceODR and put them into comdat. This is because after renaming, there 723 // is no backup external copy available for the function. 724 if (!F.hasComdat()) { 725 assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage); 726 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName)); 727 F.setLinkage(GlobalValue::LinkOnceODRLinkage); 728 F.setComdat(NewComdat); 729 return; 730 } 731 732 // This function belongs to a single function Comdat group. 733 Comdat *OrigComdat = F.getComdat(); 734 std::string NewComdatName = 735 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str(); 736 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName)); 737 NewComdat->setSelectionKind(OrigComdat->getSelectionKind()); 738 739 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) { 740 // Must be a function. 741 cast<Function>(CM.second)->setComdat(NewComdat); 742 } 743 } 744 745 /// Collect all the BBs that will be instruments and add them to 746 /// `InstrumentBBs`. 747 template <class Edge, class BBInfo> 748 void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs( 749 std::vector<BasicBlock *> &InstrumentBBs) { 750 if (BCI) { 751 for (auto &BB : F) 752 if (BCI->shouldInstrumentBlock(BB)) 753 InstrumentBBs.push_back(&BB); 754 return; 755 } 756 757 // Use a worklist as we will update the vector during the iteration. 758 std::vector<Edge *> EdgeList; 759 EdgeList.reserve(MST.AllEdges.size()); 760 for (auto &E : MST.AllEdges) 761 EdgeList.push_back(E.get()); 762 763 for (auto &E : EdgeList) { 764 BasicBlock *InstrBB = getInstrBB(E); 765 if (InstrBB) 766 InstrumentBBs.push_back(InstrBB); 767 } 768 } 769 770 // Given a CFG E to be instrumented, find which BB to place the instrumented 771 // code. The function will split the critical edge if necessary. 772 template <class Edge, class BBInfo> 773 BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) { 774 if (E->InMST || E->Removed) 775 return nullptr; 776 777 BasicBlock *SrcBB = E->SrcBB; 778 BasicBlock *DestBB = E->DestBB; 779 // For a fake edge, instrument the real BB. 780 if (SrcBB == nullptr) 781 return DestBB; 782 if (DestBB == nullptr) 783 return SrcBB; 784 785 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * { 786 // There are basic blocks (such as catchswitch) cannot be instrumented. 787 // If the returned first insertion point is the end of BB, skip this BB. 788 if (BB->getFirstInsertionPt() == BB->end()) 789 return nullptr; 790 return BB; 791 }; 792 793 // Instrument the SrcBB if it has a single successor, 794 // otherwise, the DestBB if this is not a critical edge. 795 Instruction *TI = SrcBB->getTerminator(); 796 if (TI->getNumSuccessors() <= 1) 797 return canInstrument(SrcBB); 798 if (!E->IsCritical) 799 return canInstrument(DestBB); 800 801 // Some IndirectBr critical edges cannot be split by the previous 802 // SplitIndirectBrCriticalEdges call. Bail out. 803 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 804 BasicBlock *InstrBB = 805 isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum); 806 if (!InstrBB) { 807 LLVM_DEBUG( 808 dbgs() << "Fail to split critical edge: not instrument this edge.\n"); 809 return nullptr; 810 } 811 // For a critical edge, we have to split. Instrument the newly 812 // created BB. 813 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++; 814 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index 815 << " --> " << getBBInfo(DestBB).Index << "\n"); 816 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB. 817 MST.addEdge(SrcBB, InstrBB, 0); 818 // Second one: Add new edge of InstrBB->DestBB. 819 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0); 820 NewEdge1.InMST = true; 821 E->Removed = true; 822 823 return canInstrument(InstrBB); 824 } 825 826 // When generating value profiling calls on Windows routines that make use of 827 // handler funclets for exception processing an operand bundle needs to attached 828 // to the called function. This routine will set \p OpBundles to contain the 829 // funclet information, if any is needed, that should be placed on the generated 830 // value profiling call for the value profile candidate call. 831 static void 832 populateEHOperandBundle(VPCandidateInfo &Cand, 833 DenseMap<BasicBlock *, ColorVector> &BlockColors, 834 SmallVectorImpl<OperandBundleDef> &OpBundles) { 835 auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst); 836 if (!OrigCall) 837 return; 838 839 if (!isa<IntrinsicInst>(OrigCall)) { 840 // The instrumentation call should belong to the same funclet as a 841 // non-intrinsic call, so just copy the operand bundle, if any exists. 842 std::optional<OperandBundleUse> ParentFunclet = 843 OrigCall->getOperandBundle(LLVMContext::OB_funclet); 844 if (ParentFunclet) 845 OpBundles.emplace_back(OperandBundleDef(*ParentFunclet)); 846 } else { 847 // Intrinsics or other instructions do not get funclet information from the 848 // front-end. Need to use the BlockColors that was computed by the routine 849 // colorEHFunclets to determine whether a funclet is needed. 850 if (!BlockColors.empty()) { 851 const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second; 852 assert(CV.size() == 1 && "non-unique color for block!"); 853 Instruction *EHPad = CV.front()->getFirstNonPHI(); 854 if (EHPad->isEHPad()) 855 OpBundles.emplace_back("funclet", EHPad); 856 } 857 } 858 } 859 860 // Visit all edge and instrument the edges not in MST, and do value profiling. 861 // Critical edges will be split. 862 static void instrumentOneFunc( 863 Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI, 864 BlockFrequencyInfo *BFI, 865 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 866 bool IsCS) { 867 if (!PGOBlockCoverage) { 868 // Split indirectbr critical edges here before computing the MST rather than 869 // later in getInstrBB() to avoid invalidating it. 870 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI); 871 } 872 873 FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo( 874 F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry, 875 PGOBlockCoverage); 876 877 Type *I8PtrTy = Type::getInt8PtrTy(M->getContext()); 878 auto Name = ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy); 879 auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()), 880 FuncInfo.FunctionHash); 881 if (PGOFunctionEntryCoverage) { 882 auto &EntryBB = F.getEntryBlock(); 883 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt()); 884 // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>, 885 // i32 <index>) 886 Builder.CreateCall( 887 Intrinsic::getDeclaration(M, Intrinsic::instrprof_cover), 888 {Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)}); 889 return; 890 } 891 892 std::vector<BasicBlock *> InstrumentBBs; 893 FuncInfo.getInstrumentBBs(InstrumentBBs); 894 unsigned NumCounters = 895 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 896 897 uint32_t I = 0; 898 if (PGOTemporalInstrumentation) { 899 NumCounters += PGOBlockCoverage ? 8 : 1; 900 auto &EntryBB = F.getEntryBlock(); 901 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt()); 902 // llvm.instrprof.timestamp(i8* <name>, i64 <hash>, i32 <num-counters>, 903 // i32 <index>) 904 Builder.CreateCall( 905 Intrinsic::getDeclaration(M, Intrinsic::instrprof_timestamp), 906 {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I)}); 907 I += PGOBlockCoverage ? 8 : 1; 908 } 909 910 for (auto *InstrBB : InstrumentBBs) { 911 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt()); 912 assert(Builder.GetInsertPoint() != InstrBB->end() && 913 "Cannot get the Instrumentation point"); 914 // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>, 915 // i32 <index>) 916 Builder.CreateCall( 917 Intrinsic::getDeclaration(M, PGOBlockCoverage 918 ? Intrinsic::instrprof_cover 919 : Intrinsic::instrprof_increment), 920 {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)}); 921 } 922 923 // Now instrument select instructions: 924 FuncInfo.SIVisitor.instrumentSelects(&I, NumCounters, FuncInfo.FuncNameVar, 925 FuncInfo.FunctionHash); 926 assert(I == NumCounters); 927 928 if (DisableValueProfiling) 929 return; 930 931 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size(); 932 933 // Intrinsic function calls do not have funclet operand bundles needed for 934 // Windows exception handling attached to them. However, if value profiling is 935 // inserted for one of these calls, then a funclet value will need to be set 936 // on the instrumentation call based on the funclet coloring. 937 DenseMap<BasicBlock *, ColorVector> BlockColors; 938 if (F.hasPersonalityFn() && 939 isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) 940 BlockColors = colorEHFunclets(F); 941 942 // For each VP Kind, walk the VP candidates and instrument each one. 943 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) { 944 unsigned SiteIndex = 0; 945 if (Kind == IPVK_MemOPSize && !PGOInstrMemOP) 946 continue; 947 948 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) { 949 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind] 950 << " site: CallSite Index = " << SiteIndex << "\n"); 951 952 IRBuilder<> Builder(Cand.InsertPt); 953 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() && 954 "Cannot get the Instrumentation point"); 955 956 Value *ToProfile = nullptr; 957 if (Cand.V->getType()->isIntegerTy()) 958 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty()); 959 else if (Cand.V->getType()->isPointerTy()) 960 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty()); 961 assert(ToProfile && "value profiling Value is of unexpected type"); 962 963 SmallVector<OperandBundleDef, 1> OpBundles; 964 populateEHOperandBundle(Cand, BlockColors, OpBundles); 965 Builder.CreateCall( 966 Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), 967 {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), 968 Builder.getInt64(FuncInfo.FunctionHash), ToProfile, 969 Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)}, 970 OpBundles); 971 } 972 } // IPVK_First <= Kind <= IPVK_Last 973 } 974 975 namespace { 976 977 // This class represents a CFG edge in profile use compilation. 978 struct PGOUseEdge : public PGOEdge { 979 using PGOEdge::PGOEdge; 980 981 bool CountValid = false; 982 uint64_t CountValue = 0; 983 984 // Set edge count value 985 void setEdgeCount(uint64_t Value) { 986 CountValue = Value; 987 CountValid = true; 988 } 989 990 // Return the information string for this object. 991 std::string infoString() const { 992 if (!CountValid) 993 return PGOEdge::infoString(); 994 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue)) 995 .str(); 996 } 997 }; 998 999 using DirectEdges = SmallVector<PGOUseEdge *, 2>; 1000 1001 // This class stores the auxiliary information for each BB. 1002 struct PGOUseBBInfo : public PGOBBInfo { 1003 uint64_t CountValue = 0; 1004 bool CountValid; 1005 int32_t UnknownCountInEdge = 0; 1006 int32_t UnknownCountOutEdge = 0; 1007 DirectEdges InEdges; 1008 DirectEdges OutEdges; 1009 1010 PGOUseBBInfo(unsigned IX) : PGOBBInfo(IX), CountValid(false) {} 1011 1012 // Set the profile count value for this BB. 1013 void setBBInfoCount(uint64_t Value) { 1014 CountValue = Value; 1015 CountValid = true; 1016 } 1017 1018 // Return the information string of this object. 1019 std::string infoString() const { 1020 if (!CountValid) 1021 return PGOBBInfo::infoString(); 1022 return (Twine(PGOBBInfo::infoString()) + " Count=" + Twine(CountValue)) 1023 .str(); 1024 } 1025 1026 // Add an OutEdge and update the edge count. 1027 void addOutEdge(PGOUseEdge *E) { 1028 OutEdges.push_back(E); 1029 UnknownCountOutEdge++; 1030 } 1031 1032 // Add an InEdge and update the edge count. 1033 void addInEdge(PGOUseEdge *E) { 1034 InEdges.push_back(E); 1035 UnknownCountInEdge++; 1036 } 1037 }; 1038 1039 } // end anonymous namespace 1040 1041 // Sum up the count values for all the edges. 1042 static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) { 1043 uint64_t Total = 0; 1044 for (const auto &E : Edges) { 1045 if (E->Removed) 1046 continue; 1047 Total += E->CountValue; 1048 } 1049 return Total; 1050 } 1051 1052 namespace { 1053 1054 class PGOUseFunc { 1055 public: 1056 PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI, 1057 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 1058 BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin, 1059 ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry, 1060 bool HasSingleByteCoverage) 1061 : F(Func), M(Modu), BFI(BFIin), PSI(PSI), 1062 FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS, 1063 InstrumentFuncEntry, HasSingleByteCoverage), 1064 FreqAttr(FFA_Normal), IsCS(IsCS) {} 1065 1066 void handleInstrProfError(Error Err, uint64_t MismatchedFuncSum); 1067 1068 // Read counts for the instrumented BB from profile. 1069 bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, 1070 InstrProfRecord::CountPseudoKind &PseudoKind); 1071 1072 // Populate the counts for all BBs. 1073 void populateCounters(); 1074 1075 // Set block coverage based on profile coverage values. 1076 void populateCoverage(IndexedInstrProfReader *PGOReader); 1077 1078 // Set the branch weights based on the count values. 1079 void setBranchWeights(); 1080 1081 // Annotate the value profile call sites for all value kind. 1082 void annotateValueSites(); 1083 1084 // Annotate the value profile call sites for one value kind. 1085 void annotateValueSites(uint32_t Kind); 1086 1087 // Annotate the irreducible loop header weights. 1088 void annotateIrrLoopHeaderWeights(); 1089 1090 // The hotness of the function from the profile count. 1091 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot }; 1092 1093 // Return the function hotness from the profile. 1094 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; } 1095 1096 // Return the function hash. 1097 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; } 1098 1099 // Return the profile record for this function; 1100 InstrProfRecord &getProfileRecord() { return ProfileRecord; } 1101 1102 // Return the auxiliary BB information. 1103 PGOUseBBInfo &getBBInfo(const BasicBlock *BB) const { 1104 return FuncInfo.getBBInfo(BB); 1105 } 1106 1107 // Return the auxiliary BB information if available. 1108 PGOUseBBInfo *findBBInfo(const BasicBlock *BB) const { 1109 return FuncInfo.findBBInfo(BB); 1110 } 1111 1112 Function &getFunc() const { return F; } 1113 1114 void dumpInfo(StringRef Str = "") const { FuncInfo.dumpInfo(Str); } 1115 1116 uint64_t getProgramMaxCount() const { return ProgramMaxCount; } 1117 1118 private: 1119 Function &F; 1120 Module *M; 1121 BlockFrequencyInfo *BFI; 1122 ProfileSummaryInfo *PSI; 1123 1124 // This member stores the shared information with class PGOGenFunc. 1125 FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> FuncInfo; 1126 1127 // The maximum count value in the profile. This is only used in PGO use 1128 // compilation. 1129 uint64_t ProgramMaxCount; 1130 1131 // Position of counter that remains to be read. 1132 uint32_t CountPosition = 0; 1133 1134 // Total size of the profile count for this function. 1135 uint32_t ProfileCountSize = 0; 1136 1137 // ProfileRecord for this function. 1138 InstrProfRecord ProfileRecord; 1139 1140 // Function hotness info derived from profile. 1141 FuncFreqAttr FreqAttr; 1142 1143 // Is to use the context sensitive profile. 1144 bool IsCS; 1145 1146 // Find the Instrumented BB and set the value. Return false on error. 1147 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile); 1148 1149 // Set the edge counter value for the unknown edge -- there should be only 1150 // one unknown edge. 1151 void setEdgeCount(DirectEdges &Edges, uint64_t Value); 1152 1153 // Set the hot/cold inline hints based on the count values. 1154 // FIXME: This function should be removed once the functionality in 1155 // the inliner is implemented. 1156 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) { 1157 if (PSI->isHotCount(EntryCount)) 1158 FreqAttr = FFA_Hot; 1159 else if (PSI->isColdCount(MaxCount)) 1160 FreqAttr = FFA_Cold; 1161 } 1162 }; 1163 1164 } // end anonymous namespace 1165 1166 /// Set up InEdges/OutEdges for all BBs in the MST. 1167 static void 1168 setupBBInfoEdges(FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> &FuncInfo) { 1169 // This is not required when there is block coverage inference. 1170 if (FuncInfo.BCI) 1171 return; 1172 for (auto &E : FuncInfo.MST.AllEdges) { 1173 if (E->Removed) 1174 continue; 1175 const BasicBlock *SrcBB = E->SrcBB; 1176 const BasicBlock *DestBB = E->DestBB; 1177 PGOUseBBInfo &SrcInfo = FuncInfo.getBBInfo(SrcBB); 1178 PGOUseBBInfo &DestInfo = FuncInfo.getBBInfo(DestBB); 1179 SrcInfo.addOutEdge(E.get()); 1180 DestInfo.addInEdge(E.get()); 1181 } 1182 } 1183 1184 // Visit all the edges and assign the count value for the instrumented 1185 // edges and the BB. Return false on error. 1186 bool PGOUseFunc::setInstrumentedCounts( 1187 const std::vector<uint64_t> &CountFromProfile) { 1188 1189 std::vector<BasicBlock *> InstrumentBBs; 1190 FuncInfo.getInstrumentBBs(InstrumentBBs); 1191 1192 setupBBInfoEdges(FuncInfo); 1193 1194 unsigned NumCounters = 1195 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 1196 // The number of counters here should match the number of counters 1197 // in profile. Return if they mismatch. 1198 if (NumCounters != CountFromProfile.size()) { 1199 return false; 1200 } 1201 auto *FuncEntry = &*F.begin(); 1202 1203 // Set the profile count to the Instrumented BBs. 1204 uint32_t I = 0; 1205 for (BasicBlock *InstrBB : InstrumentBBs) { 1206 uint64_t CountValue = CountFromProfile[I++]; 1207 PGOUseBBInfo &Info = getBBInfo(InstrBB); 1208 // If we reach here, we know that we have some nonzero count 1209 // values in this function. The entry count should not be 0. 1210 // Fix it if necessary. 1211 if (InstrBB == FuncEntry && CountValue == 0) 1212 CountValue = 1; 1213 Info.setBBInfoCount(CountValue); 1214 } 1215 ProfileCountSize = CountFromProfile.size(); 1216 CountPosition = I; 1217 1218 // Set the edge count and update the count of unknown edges for BBs. 1219 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void { 1220 E->setEdgeCount(Value); 1221 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1222 this->getBBInfo(E->DestBB).UnknownCountInEdge--; 1223 }; 1224 1225 // Set the profile count the Instrumented edges. There are BBs that not in 1226 // MST but not instrumented. Need to set the edge count value so that we can 1227 // populate the profile counts later. 1228 for (auto &E : FuncInfo.MST.AllEdges) { 1229 if (E->Removed || E->InMST) 1230 continue; 1231 const BasicBlock *SrcBB = E->SrcBB; 1232 PGOUseBBInfo &SrcInfo = getBBInfo(SrcBB); 1233 1234 // If only one out-edge, the edge profile count should be the same as BB 1235 // profile count. 1236 if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1) 1237 setEdgeCount(E.get(), SrcInfo.CountValue); 1238 else { 1239 const BasicBlock *DestBB = E->DestBB; 1240 PGOUseBBInfo &DestInfo = getBBInfo(DestBB); 1241 // If only one in-edge, the edge profile count should be the same as BB 1242 // profile count. 1243 if (DestInfo.CountValid && DestInfo.InEdges.size() == 1) 1244 setEdgeCount(E.get(), DestInfo.CountValue); 1245 } 1246 if (E->CountValid) 1247 continue; 1248 // E's count should have been set from profile. If not, this meenas E skips 1249 // the instrumentation. We set the count to 0. 1250 setEdgeCount(E.get(), 0); 1251 } 1252 return true; 1253 } 1254 1255 // Set the count value for the unknown edge. There should be one and only one 1256 // unknown edge in Edges vector. 1257 void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) { 1258 for (auto &E : Edges) { 1259 if (E->CountValid) 1260 continue; 1261 E->setEdgeCount(Value); 1262 1263 getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1264 getBBInfo(E->DestBB).UnknownCountInEdge--; 1265 return; 1266 } 1267 llvm_unreachable("Cannot find the unknown count edge"); 1268 } 1269 1270 // Emit function metadata indicating PGO profile mismatch. 1271 static void annotateFunctionWithHashMismatch(Function &F, LLVMContext &ctx) { 1272 const char MetadataName[] = "instr_prof_hash_mismatch"; 1273 SmallVector<Metadata *, 2> Names; 1274 // If this metadata already exists, ignore. 1275 auto *Existing = F.getMetadata(LLVMContext::MD_annotation); 1276 if (Existing) { 1277 MDTuple *Tuple = cast<MDTuple>(Existing); 1278 for (const auto &N : Tuple->operands()) { 1279 if (N.equalsStr(MetadataName)) 1280 return; 1281 Names.push_back(N.get()); 1282 } 1283 } 1284 1285 MDBuilder MDB(ctx); 1286 Names.push_back(MDB.createString(MetadataName)); 1287 MDNode *MD = MDTuple::get(ctx, Names); 1288 F.setMetadata(LLVMContext::MD_annotation, MD); 1289 } 1290 1291 void PGOUseFunc::handleInstrProfError(Error Err, uint64_t MismatchedFuncSum) { 1292 handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) { 1293 auto &Ctx = M->getContext(); 1294 auto Err = IPE.get(); 1295 bool SkipWarning = false; 1296 LLVM_DEBUG(dbgs() << "Error in reading profile for Func " 1297 << FuncInfo.FuncName << ": "); 1298 if (Err == instrprof_error::unknown_function) { 1299 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++; 1300 SkipWarning = !PGOWarnMissing; 1301 LLVM_DEBUG(dbgs() << "unknown function"); 1302 } else if (Err == instrprof_error::hash_mismatch || 1303 Err == instrprof_error::malformed) { 1304 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++; 1305 SkipWarning = 1306 NoPGOWarnMismatch || 1307 (NoPGOWarnMismatchComdatWeak && 1308 (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage || 1309 F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); 1310 LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash 1311 << " skip=" << SkipWarning << ")"); 1312 // Emit function metadata indicating PGO profile mismatch. 1313 annotateFunctionWithHashMismatch(F, M->getContext()); 1314 } 1315 1316 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n"); 1317 if (SkipWarning) 1318 return; 1319 1320 std::string Msg = 1321 IPE.message() + std::string(" ") + F.getName().str() + 1322 std::string(" Hash = ") + std::to_string(FuncInfo.FunctionHash) + 1323 std::string(" up to ") + std::to_string(MismatchedFuncSum) + 1324 std::string(" count discarded"); 1325 1326 Ctx.diagnose( 1327 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); 1328 }); 1329 } 1330 1331 // Read the profile from ProfileFileName and assign the value to the 1332 // instrumented BB and the edges. This function also updates ProgramMaxCount. 1333 // Return true if the profile are successfully read, and false on errors. 1334 bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, 1335 InstrProfRecord::CountPseudoKind &PseudoKind) { 1336 auto &Ctx = M->getContext(); 1337 uint64_t MismatchedFuncSum = 0; 1338 Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord( 1339 FuncInfo.FuncName, FuncInfo.FunctionHash, &MismatchedFuncSum); 1340 if (Error E = Result.takeError()) { 1341 handleInstrProfError(std::move(E), MismatchedFuncSum); 1342 return false; 1343 } 1344 ProfileRecord = std::move(Result.get()); 1345 PseudoKind = ProfileRecord.getCountPseudoKind(); 1346 if (PseudoKind != InstrProfRecord::NotPseudo) { 1347 return true; 1348 } 1349 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts; 1350 1351 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++; 1352 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n"); 1353 1354 uint64_t ValueSum = 0; 1355 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { 1356 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n"); 1357 ValueSum += CountFromProfile[I]; 1358 } 1359 AllZeros = (ValueSum == 0); 1360 1361 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n"); 1362 1363 getBBInfo(nullptr).UnknownCountOutEdge = 2; 1364 getBBInfo(nullptr).UnknownCountInEdge = 2; 1365 1366 if (!setInstrumentedCounts(CountFromProfile)) { 1367 LLVM_DEBUG( 1368 dbgs() << "Inconsistent number of counts, skipping this function"); 1369 Ctx.diagnose(DiagnosticInfoPGOProfile( 1370 M->getName().data(), 1371 Twine("Inconsistent number of counts in ") + F.getName().str() + 1372 Twine(": the profile may be stale or there is a function name " 1373 "collision."), 1374 DS_Warning)); 1375 return false; 1376 } 1377 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS); 1378 return true; 1379 } 1380 1381 void PGOUseFunc::populateCoverage(IndexedInstrProfReader *PGOReader) { 1382 uint64_t MismatchedFuncSum = 0; 1383 Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord( 1384 FuncInfo.FuncName, FuncInfo.FunctionHash, &MismatchedFuncSum); 1385 if (auto Err = Result.takeError()) { 1386 handleInstrProfError(std::move(Err), MismatchedFuncSum); 1387 return; 1388 } 1389 1390 std::vector<uint64_t> &CountsFromProfile = Result.get().Counts; 1391 DenseMap<const BasicBlock *, bool> Coverage; 1392 unsigned Index = 0; 1393 for (auto &BB : F) 1394 if (FuncInfo.BCI->shouldInstrumentBlock(BB)) 1395 Coverage[&BB] = (CountsFromProfile[Index++] != 0); 1396 assert(Index == CountsFromProfile.size()); 1397 1398 // For each B in InverseDependencies[A], if A is covered then B is covered. 1399 DenseMap<const BasicBlock *, DenseSet<const BasicBlock *>> 1400 InverseDependencies; 1401 for (auto &BB : F) { 1402 for (auto *Dep : FuncInfo.BCI->getDependencies(BB)) { 1403 // If Dep is covered then BB is covered. 1404 InverseDependencies[Dep].insert(&BB); 1405 } 1406 } 1407 1408 // Infer coverage of the non-instrumented blocks using a flood-fill algorithm. 1409 std::stack<const BasicBlock *> CoveredBlocksToProcess; 1410 for (auto &[BB, IsCovered] : Coverage) 1411 if (IsCovered) 1412 CoveredBlocksToProcess.push(BB); 1413 1414 while (!CoveredBlocksToProcess.empty()) { 1415 auto *CoveredBlock = CoveredBlocksToProcess.top(); 1416 assert(Coverage[CoveredBlock]); 1417 CoveredBlocksToProcess.pop(); 1418 for (auto *BB : InverseDependencies[CoveredBlock]) { 1419 // If CoveredBlock is covered then BB is covered. 1420 if (Coverage[BB]) 1421 continue; 1422 Coverage[BB] = true; 1423 CoveredBlocksToProcess.push(BB); 1424 } 1425 } 1426 1427 // Annotate block coverage. 1428 MDBuilder MDB(F.getContext()); 1429 // We set the entry count to 10000 if the entry block is covered so that BFI 1430 // can propagate a fraction of this count to the other covered blocks. 1431 F.setEntryCount(Coverage[&F.getEntryBlock()] ? 10000 : 0); 1432 for (auto &BB : F) { 1433 // For a block A and its successor B, we set the edge weight as follows: 1434 // If A is covered and B is covered, set weight=1. 1435 // If A is covered and B is uncovered, set weight=0. 1436 // If A is uncovered, set weight=1. 1437 // This setup will allow BFI to give nonzero profile counts to only covered 1438 // blocks. 1439 SmallVector<unsigned, 4> Weights; 1440 for (auto *Succ : successors(&BB)) 1441 Weights.push_back((Coverage[Succ] || !Coverage[&BB]) ? 1 : 0); 1442 if (Weights.size() >= 2) 1443 BB.getTerminator()->setMetadata(LLVMContext::MD_prof, 1444 MDB.createBranchWeights(Weights)); 1445 } 1446 1447 unsigned NumCorruptCoverage = 0; 1448 DominatorTree DT(F); 1449 LoopInfo LI(DT); 1450 BranchProbabilityInfo BPI(F, LI); 1451 BlockFrequencyInfo BFI(F, BPI, LI); 1452 auto IsBlockDead = [&](const BasicBlock &BB) -> std::optional<bool> { 1453 if (auto C = BFI.getBlockProfileCount(&BB)) 1454 return C == 0; 1455 return {}; 1456 }; 1457 LLVM_DEBUG(dbgs() << "Block Coverage: (Instrumented=*, Covered=X)\n"); 1458 for (auto &BB : F) { 1459 LLVM_DEBUG(dbgs() << (FuncInfo.BCI->shouldInstrumentBlock(BB) ? "* " : " ") 1460 << (Coverage[&BB] ? "X " : " ") << " " << BB.getName() 1461 << "\n"); 1462 // In some cases it is possible to find a covered block that has no covered 1463 // successors, e.g., when a block calls a function that may call exit(). In 1464 // those cases, BFI could find its successor to be covered while BCI could 1465 // find its successor to be dead. 1466 if (Coverage[&BB] == IsBlockDead(BB).value_or(false)) { 1467 LLVM_DEBUG( 1468 dbgs() << "Found inconsistent block covearge for " << BB.getName() 1469 << ": BCI=" << (Coverage[&BB] ? "Covered" : "Dead") << " BFI=" 1470 << (IsBlockDead(BB).value() ? "Dead" : "Covered") << "\n"); 1471 ++NumCorruptCoverage; 1472 } 1473 if (Coverage[&BB]) 1474 ++NumCoveredBlocks; 1475 } 1476 if (PGOVerifyBFI && NumCorruptCoverage) { 1477 auto &Ctx = M->getContext(); 1478 Ctx.diagnose(DiagnosticInfoPGOProfile( 1479 M->getName().data(), 1480 Twine("Found inconsistent block coverage for function ") + F.getName() + 1481 " in " + Twine(NumCorruptCoverage) + " blocks.", 1482 DS_Warning)); 1483 } 1484 if (PGOViewBlockCoverageGraph) 1485 FuncInfo.BCI->viewBlockCoverageGraph(&Coverage); 1486 } 1487 1488 // Populate the counters from instrumented BBs to all BBs. 1489 // In the end of this operation, all BBs should have a valid count value. 1490 void PGOUseFunc::populateCounters() { 1491 bool Changes = true; 1492 unsigned NumPasses = 0; 1493 while (Changes) { 1494 NumPasses++; 1495 Changes = false; 1496 1497 // For efficient traversal, it's better to start from the end as most 1498 // of the instrumented edges are at the end. 1499 for (auto &BB : reverse(F)) { 1500 PGOUseBBInfo *Count = findBBInfo(&BB); 1501 if (Count == nullptr) 1502 continue; 1503 if (!Count->CountValid) { 1504 if (Count->UnknownCountOutEdge == 0) { 1505 Count->CountValue = sumEdgeCount(Count->OutEdges); 1506 Count->CountValid = true; 1507 Changes = true; 1508 } else if (Count->UnknownCountInEdge == 0) { 1509 Count->CountValue = sumEdgeCount(Count->InEdges); 1510 Count->CountValid = true; 1511 Changes = true; 1512 } 1513 } 1514 if (Count->CountValid) { 1515 if (Count->UnknownCountOutEdge == 1) { 1516 uint64_t Total = 0; 1517 uint64_t OutSum = sumEdgeCount(Count->OutEdges); 1518 // If the one of the successor block can early terminate (no-return), 1519 // we can end up with situation where out edge sum count is larger as 1520 // the source BB's count is collected by a post-dominated block. 1521 if (Count->CountValue > OutSum) 1522 Total = Count->CountValue - OutSum; 1523 setEdgeCount(Count->OutEdges, Total); 1524 Changes = true; 1525 } 1526 if (Count->UnknownCountInEdge == 1) { 1527 uint64_t Total = 0; 1528 uint64_t InSum = sumEdgeCount(Count->InEdges); 1529 if (Count->CountValue > InSum) 1530 Total = Count->CountValue - InSum; 1531 setEdgeCount(Count->InEdges, Total); 1532 Changes = true; 1533 } 1534 } 1535 } 1536 } 1537 1538 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n"); 1539 (void)NumPasses; 1540 #ifndef NDEBUG 1541 // Assert every BB has a valid counter. 1542 for (auto &BB : F) { 1543 auto BI = findBBInfo(&BB); 1544 if (BI == nullptr) 1545 continue; 1546 assert(BI->CountValid && "BB count is not valid"); 1547 } 1548 #endif 1549 uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue; 1550 uint64_t FuncMaxCount = FuncEntryCount; 1551 for (auto &BB : F) { 1552 auto BI = findBBInfo(&BB); 1553 if (BI == nullptr) 1554 continue; 1555 FuncMaxCount = std::max(FuncMaxCount, BI->CountValue); 1556 } 1557 1558 // Fix the obviously inconsistent entry count. 1559 if (FuncMaxCount > 0 && FuncEntryCount == 0) 1560 FuncEntryCount = 1; 1561 F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real)); 1562 markFunctionAttributes(FuncEntryCount, FuncMaxCount); 1563 1564 // Now annotate select instructions 1565 FuncInfo.SIVisitor.annotateSelects(this, &CountPosition); 1566 assert(CountPosition == ProfileCountSize); 1567 1568 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile.")); 1569 } 1570 1571 // Assign the scaled count values to the BB with multiple out edges. 1572 void PGOUseFunc::setBranchWeights() { 1573 // Generate MD_prof metadata for every branch instruction. 1574 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName() 1575 << " IsCS=" << IsCS << "\n"); 1576 for (auto &BB : F) { 1577 Instruction *TI = BB.getTerminator(); 1578 if (TI->getNumSuccessors() < 2) 1579 continue; 1580 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || 1581 isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI) || 1582 isa<CallBrInst>(TI))) 1583 continue; 1584 1585 if (getBBInfo(&BB).CountValue == 0) 1586 continue; 1587 1588 // We have a non-zero Branch BB. 1589 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB); 1590 unsigned Size = BBCountInfo.OutEdges.size(); 1591 SmallVector<uint64_t, 2> EdgeCounts(Size, 0); 1592 uint64_t MaxCount = 0; 1593 for (unsigned s = 0; s < Size; s++) { 1594 const PGOUseEdge *E = BBCountInfo.OutEdges[s]; 1595 const BasicBlock *SrcBB = E->SrcBB; 1596 const BasicBlock *DestBB = E->DestBB; 1597 if (DestBB == nullptr) 1598 continue; 1599 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 1600 uint64_t EdgeCount = E->CountValue; 1601 if (EdgeCount > MaxCount) 1602 MaxCount = EdgeCount; 1603 EdgeCounts[SuccNum] = EdgeCount; 1604 } 1605 1606 if (MaxCount) 1607 setProfMetadata(M, TI, EdgeCounts, MaxCount); 1608 else { 1609 // A zero MaxCount can come about when we have a BB with a positive 1610 // count, and whose successor blocks all have 0 count. This can happen 1611 // when there is no exit block and the code exits via a noreturn function. 1612 auto &Ctx = M->getContext(); 1613 Ctx.diagnose(DiagnosticInfoPGOProfile( 1614 M->getName().data(), 1615 Twine("Profile in ") + F.getName().str() + 1616 Twine(" partially ignored") + 1617 Twine(", possibly due to the lack of a return path."), 1618 DS_Warning)); 1619 } 1620 } 1621 } 1622 1623 static bool isIndirectBrTarget(BasicBlock *BB) { 1624 for (BasicBlock *Pred : predecessors(BB)) { 1625 if (isa<IndirectBrInst>(Pred->getTerminator())) 1626 return true; 1627 } 1628 return false; 1629 } 1630 1631 void PGOUseFunc::annotateIrrLoopHeaderWeights() { 1632 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n"); 1633 // Find irr loop headers 1634 for (auto &BB : F) { 1635 // As a heuristic also annotate indrectbr targets as they have a high chance 1636 // to become an irreducible loop header after the indirectbr tail 1637 // duplication. 1638 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) { 1639 Instruction *TI = BB.getTerminator(); 1640 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB); 1641 setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue); 1642 } 1643 } 1644 } 1645 1646 void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) { 1647 Module *M = F.getParent(); 1648 IRBuilder<> Builder(&SI); 1649 Type *Int64Ty = Builder.getInt64Ty(); 1650 Type *I8PtrTy = Builder.getInt8PtrTy(); 1651 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty); 1652 Builder.CreateCall( 1653 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step), 1654 {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), 1655 Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs), 1656 Builder.getInt32(*CurCtrIdx), Step}); 1657 ++(*CurCtrIdx); 1658 } 1659 1660 void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) { 1661 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts; 1662 assert(*CurCtrIdx < CountFromProfile.size() && 1663 "Out of bound access of counters"); 1664 uint64_t SCounts[2]; 1665 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count 1666 ++(*CurCtrIdx); 1667 uint64_t TotalCount = 0; 1668 auto BI = UseFunc->findBBInfo(SI.getParent()); 1669 if (BI != nullptr) 1670 TotalCount = BI->CountValue; 1671 // False Count 1672 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0); 1673 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]); 1674 if (MaxCount) 1675 setProfMetadata(F.getParent(), &SI, SCounts, MaxCount); 1676 } 1677 1678 void SelectInstVisitor::visitSelectInst(SelectInst &SI) { 1679 if (!PGOInstrSelect || PGOFunctionEntryCoverage || HasSingleByteCoverage) 1680 return; 1681 // FIXME: do not handle this yet. 1682 if (SI.getCondition()->getType()->isVectorTy()) 1683 return; 1684 1685 switch (Mode) { 1686 case VM_counting: 1687 NSIs++; 1688 return; 1689 case VM_instrument: 1690 instrumentOneSelectInst(SI); 1691 return; 1692 case VM_annotate: 1693 annotateOneSelectInst(SI); 1694 return; 1695 } 1696 1697 llvm_unreachable("Unknown visiting mode"); 1698 } 1699 1700 // Traverse all valuesites and annotate the instructions for all value kind. 1701 void PGOUseFunc::annotateValueSites() { 1702 if (DisableValueProfiling) 1703 return; 1704 1705 // Create the PGOFuncName meta data. 1706 createPGOFuncNameMetadata(F, FuncInfo.FuncName); 1707 1708 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 1709 annotateValueSites(Kind); 1710 } 1711 1712 // Annotate the instructions for a specific value kind. 1713 void PGOUseFunc::annotateValueSites(uint32_t Kind) { 1714 assert(Kind <= IPVK_Last); 1715 unsigned ValueSiteIndex = 0; 1716 auto &ValueSites = FuncInfo.ValueSites[Kind]; 1717 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind); 1718 if (NumValueSites != ValueSites.size()) { 1719 auto &Ctx = M->getContext(); 1720 Ctx.diagnose(DiagnosticInfoPGOProfile( 1721 M->getName().data(), 1722 Twine("Inconsistent number of value sites for ") + 1723 Twine(ValueProfKindDescr[Kind]) + Twine(" profiling in \"") + 1724 F.getName().str() + 1725 Twine("\", possibly due to the use of a stale profile."), 1726 DS_Warning)); 1727 return; 1728 } 1729 1730 for (VPCandidateInfo &I : ValueSites) { 1731 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind 1732 << "): Index = " << ValueSiteIndex << " out of " 1733 << NumValueSites << "\n"); 1734 annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord, 1735 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex, 1736 Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations 1737 : MaxNumAnnotations); 1738 ValueSiteIndex++; 1739 } 1740 } 1741 1742 // Collect the set of members for each Comdat in module M and store 1743 // in ComdatMembers. 1744 static void collectComdatMembers( 1745 Module &M, 1746 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 1747 if (!DoComdatRenaming) 1748 return; 1749 for (Function &F : M) 1750 if (Comdat *C = F.getComdat()) 1751 ComdatMembers.insert(std::make_pair(C, &F)); 1752 for (GlobalVariable &GV : M.globals()) 1753 if (Comdat *C = GV.getComdat()) 1754 ComdatMembers.insert(std::make_pair(C, &GV)); 1755 for (GlobalAlias &GA : M.aliases()) 1756 if (Comdat *C = GA.getComdat()) 1757 ComdatMembers.insert(std::make_pair(C, &GA)); 1758 } 1759 1760 // Don't perform PGO instrumeatnion / profile-use. 1761 static bool skipPGO(const Function &F) { 1762 if (F.isDeclaration()) 1763 return true; 1764 if (F.hasFnAttribute(llvm::Attribute::NoProfile)) 1765 return true; 1766 if (F.hasFnAttribute(llvm::Attribute::SkipProfile)) 1767 return true; 1768 if (F.getInstructionCount() < PGOFunctionSizeThreshold) 1769 return true; 1770 1771 // If there are too many critical edges, PGO might cause 1772 // compiler time problem. Skip PGO if the number of 1773 // critical edges execeed the threshold. 1774 unsigned NumCriticalEdges = 0; 1775 for (auto &BB : F) { 1776 const Instruction *TI = BB.getTerminator(); 1777 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { 1778 if (isCriticalEdge(TI, I)) 1779 NumCriticalEdges++; 1780 } 1781 } 1782 if (NumCriticalEdges > PGOFunctionCriticalEdgeThreshold) { 1783 LLVM_DEBUG(dbgs() << "In func " << F.getName() 1784 << ", NumCriticalEdges=" << NumCriticalEdges 1785 << " exceed the threshold. Skip PGO.\n"); 1786 return true; 1787 } 1788 1789 return false; 1790 } 1791 1792 static bool InstrumentAllFunctions( 1793 Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI, 1794 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1795 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) { 1796 // For the context-sensitve instrumentation, we should have a separated pass 1797 // (before LTO/ThinLTO linking) to create these variables. 1798 if (!IsCS) 1799 createIRLevelProfileFlagVar(M, /*IsCS=*/false); 1800 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1801 collectComdatMembers(M, ComdatMembers); 1802 1803 for (auto &F : M) { 1804 if (skipPGO(F)) 1805 continue; 1806 auto &TLI = LookupTLI(F); 1807 auto *BPI = LookupBPI(F); 1808 auto *BFI = LookupBFI(F); 1809 instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS); 1810 } 1811 return true; 1812 } 1813 1814 PreservedAnalyses 1815 PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &MAM) { 1816 createProfileFileNameVar(M, CSInstrName); 1817 // The variable in a comdat may be discarded by LTO. Ensure the declaration 1818 // will be retained. 1819 appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true)); 1820 PreservedAnalyses PA; 1821 PA.preserve<FunctionAnalysisManagerModuleProxy>(); 1822 PA.preserveSet<AllAnalysesOn<Function>>(); 1823 return PA; 1824 } 1825 1826 PreservedAnalyses PGOInstrumentationGen::run(Module &M, 1827 ModuleAnalysisManager &MAM) { 1828 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1829 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 1830 return FAM.getResult<TargetLibraryAnalysis>(F); 1831 }; 1832 auto LookupBPI = [&FAM](Function &F) { 1833 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1834 }; 1835 auto LookupBFI = [&FAM](Function &F) { 1836 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1837 }; 1838 1839 if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS)) 1840 return PreservedAnalyses::all(); 1841 1842 return PreservedAnalyses::none(); 1843 } 1844 1845 // Using the ratio b/w sums of profile count values and BFI count values to 1846 // adjust the func entry count. 1847 static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, 1848 BranchProbabilityInfo &NBPI) { 1849 Function &F = Func.getFunc(); 1850 BlockFrequencyInfo NBFI(F, NBPI, LI); 1851 #ifndef NDEBUG 1852 auto BFIEntryCount = F.getEntryCount(); 1853 assert(BFIEntryCount && (BFIEntryCount->getCount() > 0) && 1854 "Invalid BFI Entrycount"); 1855 #endif 1856 auto SumCount = APFloat::getZero(APFloat::IEEEdouble()); 1857 auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble()); 1858 for (auto &BBI : F) { 1859 uint64_t CountValue = 0; 1860 uint64_t BFICountValue = 0; 1861 if (!Func.findBBInfo(&BBI)) 1862 continue; 1863 auto BFICount = NBFI.getBlockProfileCount(&BBI); 1864 CountValue = Func.getBBInfo(&BBI).CountValue; 1865 BFICountValue = *BFICount; 1866 SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven); 1867 SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven); 1868 } 1869 if (SumCount.isZero()) 1870 return; 1871 1872 assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan && 1873 "Incorrect sum of BFI counts"); 1874 if (SumBFICount.compare(SumCount) == APFloat::cmpEqual) 1875 return; 1876 double Scale = (SumCount / SumBFICount).convertToDouble(); 1877 if (Scale < 1.001 && Scale > 0.999) 1878 return; 1879 1880 uint64_t FuncEntryCount = Func.getBBInfo(&*F.begin()).CountValue; 1881 uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale; 1882 if (NewEntryCount == 0) 1883 NewEntryCount = 1; 1884 if (NewEntryCount != FuncEntryCount) { 1885 F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real)); 1886 LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName() 1887 << ", entry_count " << FuncEntryCount << " --> " 1888 << NewEntryCount << "\n"); 1889 } 1890 } 1891 1892 // Compare the profile count values with BFI count values, and print out 1893 // the non-matching ones. 1894 static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, 1895 BranchProbabilityInfo &NBPI, 1896 uint64_t HotCountThreshold, 1897 uint64_t ColdCountThreshold) { 1898 Function &F = Func.getFunc(); 1899 BlockFrequencyInfo NBFI(F, NBPI, LI); 1900 // bool PrintFunc = false; 1901 bool HotBBOnly = PGOVerifyHotBFI; 1902 StringRef Msg; 1903 OptimizationRemarkEmitter ORE(&F); 1904 1905 unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0; 1906 for (auto &BBI : F) { 1907 uint64_t CountValue = 0; 1908 uint64_t BFICountValue = 0; 1909 1910 if (Func.getBBInfo(&BBI).CountValid) 1911 CountValue = Func.getBBInfo(&BBI).CountValue; 1912 1913 BBNum++; 1914 if (CountValue) 1915 NonZeroBBNum++; 1916 auto BFICount = NBFI.getBlockProfileCount(&BBI); 1917 if (BFICount) 1918 BFICountValue = *BFICount; 1919 1920 if (HotBBOnly) { 1921 bool rawIsHot = CountValue >= HotCountThreshold; 1922 bool BFIIsHot = BFICountValue >= HotCountThreshold; 1923 bool rawIsCold = CountValue <= ColdCountThreshold; 1924 bool ShowCount = false; 1925 if (rawIsHot && !BFIIsHot) { 1926 Msg = "raw-Hot to BFI-nonHot"; 1927 ShowCount = true; 1928 } else if (rawIsCold && BFIIsHot) { 1929 Msg = "raw-Cold to BFI-Hot"; 1930 ShowCount = true; 1931 } 1932 if (!ShowCount) 1933 continue; 1934 } else { 1935 if ((CountValue < PGOVerifyBFICutoff) && 1936 (BFICountValue < PGOVerifyBFICutoff)) 1937 continue; 1938 uint64_t Diff = (BFICountValue >= CountValue) 1939 ? BFICountValue - CountValue 1940 : CountValue - BFICountValue; 1941 if (Diff <= CountValue / 100 * PGOVerifyBFIRatio) 1942 continue; 1943 } 1944 BBMisMatchNum++; 1945 1946 ORE.emit([&]() { 1947 OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "bfi-verify", 1948 F.getSubprogram(), &BBI); 1949 Remark << "BB " << ore::NV("Block", BBI.getName()) 1950 << " Count=" << ore::NV("Count", CountValue) 1951 << " BFI_Count=" << ore::NV("Count", BFICountValue); 1952 if (!Msg.empty()) 1953 Remark << " (" << Msg << ")"; 1954 return Remark; 1955 }); 1956 } 1957 if (BBMisMatchNum) 1958 ORE.emit([&]() { 1959 return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify", 1960 F.getSubprogram(), &F.getEntryBlock()) 1961 << "In Func " << ore::NV("Function", F.getName()) 1962 << ": Num_of_BB=" << ore::NV("Count", BBNum) 1963 << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum) 1964 << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum); 1965 }); 1966 } 1967 1968 static bool annotateAllFunctions( 1969 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, 1970 vfs::FileSystem &FS, 1971 function_ref<TargetLibraryInfo &(Function &)> LookupTLI, 1972 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1973 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, 1974 ProfileSummaryInfo *PSI, bool IsCS) { 1975 LLVM_DEBUG(dbgs() << "Read in profile counters: "); 1976 auto &Ctx = M.getContext(); 1977 // Read the counter array from file. 1978 auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName, FS, 1979 ProfileRemappingFileName); 1980 if (Error E = ReaderOrErr.takeError()) { 1981 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { 1982 Ctx.diagnose( 1983 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message())); 1984 }); 1985 return false; 1986 } 1987 1988 std::unique_ptr<IndexedInstrProfReader> PGOReader = 1989 std::move(ReaderOrErr.get()); 1990 if (!PGOReader) { 1991 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(), 1992 StringRef("Cannot get PGOReader"))); 1993 return false; 1994 } 1995 if (!PGOReader->hasCSIRLevelProfile() && IsCS) 1996 return false; 1997 1998 // TODO: might need to change the warning once the clang option is finalized. 1999 if (!PGOReader->isIRLevelProfile()) { 2000 Ctx.diagnose(DiagnosticInfoPGOProfile( 2001 ProfileFileName.data(), "Not an IR level instrumentation profile")); 2002 return false; 2003 } 2004 if (PGOReader->functionEntryOnly()) { 2005 Ctx.diagnose(DiagnosticInfoPGOProfile( 2006 ProfileFileName.data(), 2007 "Function entry profiles are not yet supported for optimization")); 2008 return false; 2009 } 2010 2011 // Add the profile summary (read from the header of the indexed summary) here 2012 // so that we can use it below when reading counters (which checks if the 2013 // function should be marked with a cold or inlinehint attribute). 2014 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()), 2015 IsCS ? ProfileSummary::PSK_CSInstr 2016 : ProfileSummary::PSK_Instr); 2017 PSI->refresh(); 2018 2019 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 2020 collectComdatMembers(M, ComdatMembers); 2021 std::vector<Function *> HotFunctions; 2022 std::vector<Function *> ColdFunctions; 2023 2024 // If the profile marked as always instrument the entry BB, do the 2025 // same. Note this can be overwritten by the internal option in CFGMST.h 2026 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled(); 2027 if (PGOInstrumentEntry.getNumOccurrences() > 0) 2028 InstrumentFuncEntry = PGOInstrumentEntry; 2029 bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage(); 2030 for (auto &F : M) { 2031 if (skipPGO(F)) 2032 continue; 2033 auto &TLI = LookupTLI(F); 2034 auto *BPI = LookupBPI(F); 2035 auto *BFI = LookupBFI(F); 2036 if (!HasSingleByteCoverage) { 2037 // Split indirectbr critical edges here before computing the MST rather 2038 // than later in getInstrBB() to avoid invalidating it. 2039 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, 2040 BFI); 2041 } 2042 PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS, 2043 InstrumentFuncEntry, HasSingleByteCoverage); 2044 if (HasSingleByteCoverage) { 2045 Func.populateCoverage(PGOReader.get()); 2046 continue; 2047 } 2048 // When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo, 2049 // it means the profile for the function is unrepresentative and this 2050 // function is actually hot / warm. We will reset the function hot / cold 2051 // attribute and drop all the profile counters. 2052 InstrProfRecord::CountPseudoKind PseudoKind = InstrProfRecord::NotPseudo; 2053 bool AllZeros = false; 2054 if (!Func.readCounters(PGOReader.get(), AllZeros, PseudoKind)) 2055 continue; 2056 if (AllZeros) { 2057 F.setEntryCount(ProfileCount(0, Function::PCT_Real)); 2058 if (Func.getProgramMaxCount() != 0) 2059 ColdFunctions.push_back(&F); 2060 continue; 2061 } 2062 if (PseudoKind != InstrProfRecord::NotPseudo) { 2063 // Clear function attribute cold. 2064 if (F.hasFnAttribute(Attribute::Cold)) 2065 F.removeFnAttr(Attribute::Cold); 2066 // Set function attribute as hot. 2067 if (PseudoKind == InstrProfRecord::PseudoHot) 2068 F.addFnAttr(Attribute::Hot); 2069 continue; 2070 } 2071 Func.populateCounters(); 2072 Func.setBranchWeights(); 2073 Func.annotateValueSites(); 2074 Func.annotateIrrLoopHeaderWeights(); 2075 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr(); 2076 if (FreqAttr == PGOUseFunc::FFA_Cold) 2077 ColdFunctions.push_back(&F); 2078 else if (FreqAttr == PGOUseFunc::FFA_Hot) 2079 HotFunctions.push_back(&F); 2080 if (PGOViewCounts != PGOVCT_None && 2081 (ViewBlockFreqFuncName.empty() || 2082 F.getName().equals(ViewBlockFreqFuncName))) { 2083 LoopInfo LI{DominatorTree(F)}; 2084 std::unique_ptr<BranchProbabilityInfo> NewBPI = 2085 std::make_unique<BranchProbabilityInfo>(F, LI); 2086 std::unique_ptr<BlockFrequencyInfo> NewBFI = 2087 std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI); 2088 if (PGOViewCounts == PGOVCT_Graph) 2089 NewBFI->view(); 2090 else if (PGOViewCounts == PGOVCT_Text) { 2091 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n"; 2092 NewBFI->print(dbgs()); 2093 } 2094 } 2095 if (PGOViewRawCounts != PGOVCT_None && 2096 (ViewBlockFreqFuncName.empty() || 2097 F.getName().equals(ViewBlockFreqFuncName))) { 2098 if (PGOViewRawCounts == PGOVCT_Graph) 2099 if (ViewBlockFreqFuncName.empty()) 2100 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 2101 else 2102 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 2103 else if (PGOViewRawCounts == PGOVCT_Text) { 2104 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n"; 2105 Func.dumpInfo(); 2106 } 2107 } 2108 2109 if (PGOVerifyBFI || PGOVerifyHotBFI || PGOFixEntryCount) { 2110 LoopInfo LI{DominatorTree(F)}; 2111 BranchProbabilityInfo NBPI(F, LI); 2112 2113 // Fix func entry count. 2114 if (PGOFixEntryCount) 2115 fixFuncEntryCount(Func, LI, NBPI); 2116 2117 // Verify BlockFrequency information. 2118 uint64_t HotCountThreshold = 0, ColdCountThreshold = 0; 2119 if (PGOVerifyHotBFI) { 2120 HotCountThreshold = PSI->getOrCompHotCountThreshold(); 2121 ColdCountThreshold = PSI->getOrCompColdCountThreshold(); 2122 } 2123 verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold); 2124 } 2125 } 2126 2127 // Set function hotness attribute from the profile. 2128 // We have to apply these attributes at the end because their presence 2129 // can affect the BranchProbabilityInfo of any callers, resulting in an 2130 // inconsistent MST between prof-gen and prof-use. 2131 for (auto &F : HotFunctions) { 2132 F->addFnAttr(Attribute::InlineHint); 2133 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName() 2134 << "\n"); 2135 } 2136 for (auto &F : ColdFunctions) { 2137 // Only set when there is no Attribute::Hot set by the user. For Hot 2138 // attribute, user's annotation has the precedence over the profile. 2139 if (F->hasFnAttribute(Attribute::Hot)) { 2140 auto &Ctx = M.getContext(); 2141 std::string Msg = std::string("Function ") + F->getName().str() + 2142 std::string(" is annotated as a hot function but" 2143 " the profile is cold"); 2144 Ctx.diagnose( 2145 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning)); 2146 continue; 2147 } 2148 F->addFnAttr(Attribute::Cold); 2149 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() 2150 << "\n"); 2151 } 2152 return true; 2153 } 2154 2155 PGOInstrumentationUse::PGOInstrumentationUse( 2156 std::string Filename, std::string RemappingFilename, bool IsCS, 2157 IntrusiveRefCntPtr<vfs::FileSystem> VFS) 2158 : ProfileFileName(std::move(Filename)), 2159 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS), 2160 FS(std::move(VFS)) { 2161 if (!PGOTestProfileFile.empty()) 2162 ProfileFileName = PGOTestProfileFile; 2163 if (!PGOTestProfileRemappingFile.empty()) 2164 ProfileRemappingFileName = PGOTestProfileRemappingFile; 2165 if (!FS) 2166 FS = vfs::getRealFileSystem(); 2167 } 2168 2169 PreservedAnalyses PGOInstrumentationUse::run(Module &M, 2170 ModuleAnalysisManager &MAM) { 2171 2172 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 2173 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 2174 return FAM.getResult<TargetLibraryAnalysis>(F); 2175 }; 2176 auto LookupBPI = [&FAM](Function &F) { 2177 return &FAM.getResult<BranchProbabilityAnalysis>(F); 2178 }; 2179 auto LookupBFI = [&FAM](Function &F) { 2180 return &FAM.getResult<BlockFrequencyAnalysis>(F); 2181 }; 2182 2183 auto *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M); 2184 2185 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, *FS, 2186 LookupTLI, LookupBPI, LookupBFI, PSI, IsCS)) 2187 return PreservedAnalyses::all(); 2188 2189 return PreservedAnalyses::none(); 2190 } 2191 2192 static std::string getSimpleNodeName(const BasicBlock *Node) { 2193 if (!Node->getName().empty()) 2194 return Node->getName().str(); 2195 2196 std::string SimpleNodeName; 2197 raw_string_ostream OS(SimpleNodeName); 2198 Node->printAsOperand(OS, false); 2199 return OS.str(); 2200 } 2201 2202 void llvm::setProfMetadata(Module *M, Instruction *TI, 2203 ArrayRef<uint64_t> EdgeCounts, uint64_t MaxCount) { 2204 MDBuilder MDB(M->getContext()); 2205 assert(MaxCount > 0 && "Bad max count"); 2206 uint64_t Scale = calculateCountScale(MaxCount); 2207 SmallVector<unsigned, 4> Weights; 2208 for (const auto &ECI : EdgeCounts) 2209 Weights.push_back(scaleBranchCount(ECI, Scale)); 2210 2211 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W 2212 : Weights) { 2213 dbgs() << W << " "; 2214 } dbgs() << "\n";); 2215 2216 misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false); 2217 2218 TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 2219 if (EmitBranchProbability) { 2220 std::string BrCondStr = getBranchCondString(TI); 2221 if (BrCondStr.empty()) 2222 return; 2223 2224 uint64_t WSum = 2225 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0, 2226 [](uint64_t w1, uint64_t w2) { return w1 + w2; }); 2227 uint64_t TotalCount = 2228 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0, 2229 [](uint64_t c1, uint64_t c2) { return c1 + c2; }); 2230 Scale = calculateCountScale(WSum); 2231 BranchProbability BP(scaleBranchCount(Weights[0], Scale), 2232 scaleBranchCount(WSum, Scale)); 2233 std::string BranchProbStr; 2234 raw_string_ostream OS(BranchProbStr); 2235 OS << BP; 2236 OS << " (total count : " << TotalCount << ")"; 2237 OS.flush(); 2238 Function *F = TI->getParent()->getParent(); 2239 OptimizationRemarkEmitter ORE(F); 2240 ORE.emit([&]() { 2241 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI) 2242 << BrCondStr << " is true with probability : " << BranchProbStr; 2243 }); 2244 } 2245 } 2246 2247 namespace llvm { 2248 2249 void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) { 2250 MDBuilder MDB(M->getContext()); 2251 TI->setMetadata(llvm::LLVMContext::MD_irr_loop, 2252 MDB.createIrrLoopHeaderWeight(Count)); 2253 } 2254 2255 template <> struct GraphTraits<PGOUseFunc *> { 2256 using NodeRef = const BasicBlock *; 2257 using ChildIteratorType = const_succ_iterator; 2258 using nodes_iterator = pointer_iterator<Function::const_iterator>; 2259 2260 static NodeRef getEntryNode(const PGOUseFunc *G) { 2261 return &G->getFunc().front(); 2262 } 2263 2264 static ChildIteratorType child_begin(const NodeRef N) { 2265 return succ_begin(N); 2266 } 2267 2268 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); } 2269 2270 static nodes_iterator nodes_begin(const PGOUseFunc *G) { 2271 return nodes_iterator(G->getFunc().begin()); 2272 } 2273 2274 static nodes_iterator nodes_end(const PGOUseFunc *G) { 2275 return nodes_iterator(G->getFunc().end()); 2276 } 2277 }; 2278 2279 template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits { 2280 explicit DOTGraphTraits(bool isSimple = false) 2281 : DefaultDOTGraphTraits(isSimple) {} 2282 2283 static std::string getGraphName(const PGOUseFunc *G) { 2284 return std::string(G->getFunc().getName()); 2285 } 2286 2287 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) { 2288 std::string Result; 2289 raw_string_ostream OS(Result); 2290 2291 OS << getSimpleNodeName(Node) << ":\\l"; 2292 PGOUseBBInfo *BI = Graph->findBBInfo(Node); 2293 OS << "Count : "; 2294 if (BI && BI->CountValid) 2295 OS << BI->CountValue << "\\l"; 2296 else 2297 OS << "Unknown\\l"; 2298 2299 if (!PGOInstrSelect) 2300 return Result; 2301 2302 for (const Instruction &I : *Node) { 2303 if (!isa<SelectInst>(&I)) 2304 continue; 2305 // Display scaled counts for SELECT instruction: 2306 OS << "SELECT : { T = "; 2307 uint64_t TC, FC; 2308 bool HasProf = extractBranchWeights(I, TC, FC); 2309 if (!HasProf) 2310 OS << "Unknown, F = Unknown }\\l"; 2311 else 2312 OS << TC << ", F = " << FC << " }\\l"; 2313 } 2314 return Result; 2315 } 2316 }; 2317 2318 } // end namespace llvm 2319