1 //===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements PGO instrumentation using a minimum spanning tree based 10 // on the following paper: 11 // [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points 12 // for program frequency counts. BIT Numerical Mathematics 1973, Volume 13, 13 // Issue 3, pp 313-322 14 // The idea of the algorithm based on the fact that for each node (except for 15 // the entry and exit), the sum of incoming edge counts equals the sum of 16 // outgoing edge counts. The count of edge on spanning tree can be derived from 17 // those edges not on the spanning tree. Knuth proves this method instruments 18 // the minimum number of edges. 19 // 20 // The minimal spanning tree here is actually a maximum weight tree -- on-tree 21 // edges have higher frequencies (more likely to execute). The idea is to 22 // instrument those less frequently executed edges to reduce the runtime 23 // overhead of instrumented binaries. 24 // 25 // This file contains two passes: 26 // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge 27 // count profile, and generates the instrumentation for indirect call 28 // profiling. 29 // (2) Pass PGOInstrumentationUse which reads the edge count profile and 30 // annotates the branch weights. It also reads the indirect call value 31 // profiling records and annotate the indirect call instructions. 32 // 33 // To get the precise counter information, These two passes need to invoke at 34 // the same compilation point (so they see the same IR). For pass 35 // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For 36 // pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and 37 // the profile is opened in module level and passed to each PGOUseFunc instance. 38 // The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put 39 // in class FuncPGOInstrumentation. 40 // 41 // Class PGOEdge represents a CFG edge and some auxiliary information. Class 42 // BBInfo contains auxiliary information for each BB. These two classes are used 43 // in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived 44 // class of PGOEdge and BBInfo, respectively. They contains extra data structure 45 // used in populating profile counters. 46 // The MST implementation is in Class CFGMST (CFGMST.h). 47 // 48 //===----------------------------------------------------------------------===// 49 50 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" 51 #include "CFGMST.h" 52 #include "ValueProfileCollector.h" 53 #include "llvm/ADT/APInt.h" 54 #include "llvm/ADT/ArrayRef.h" 55 #include "llvm/ADT/STLExtras.h" 56 #include "llvm/ADT/SmallVector.h" 57 #include "llvm/ADT/Statistic.h" 58 #include "llvm/ADT/StringRef.h" 59 #include "llvm/ADT/Triple.h" 60 #include "llvm/ADT/Twine.h" 61 #include "llvm/ADT/iterator.h" 62 #include "llvm/ADT/iterator_range.h" 63 #include "llvm/Analysis/BlockFrequencyInfo.h" 64 #include "llvm/Analysis/BranchProbabilityInfo.h" 65 #include "llvm/Analysis/CFG.h" 66 #include "llvm/Analysis/EHPersonalities.h" 67 #include "llvm/Analysis/LoopInfo.h" 68 #include "llvm/Analysis/MemoryBuiltins.h" 69 #include "llvm/Analysis/MemoryProfileInfo.h" 70 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 71 #include "llvm/Analysis/ProfileSummaryInfo.h" 72 #include "llvm/Analysis/TargetLibraryInfo.h" 73 #include "llvm/IR/Attributes.h" 74 #include "llvm/IR/BasicBlock.h" 75 #include "llvm/IR/CFG.h" 76 #include "llvm/IR/Comdat.h" 77 #include "llvm/IR/Constant.h" 78 #include "llvm/IR/Constants.h" 79 #include "llvm/IR/DiagnosticInfo.h" 80 #include "llvm/IR/Dominators.h" 81 #include "llvm/IR/Function.h" 82 #include "llvm/IR/GlobalAlias.h" 83 #include "llvm/IR/GlobalValue.h" 84 #include "llvm/IR/GlobalVariable.h" 85 #include "llvm/IR/IRBuilder.h" 86 #include "llvm/IR/InstVisitor.h" 87 #include "llvm/IR/InstrTypes.h" 88 #include "llvm/IR/Instruction.h" 89 #include "llvm/IR/Instructions.h" 90 #include "llvm/IR/IntrinsicInst.h" 91 #include "llvm/IR/Intrinsics.h" 92 #include "llvm/IR/LLVMContext.h" 93 #include "llvm/IR/MDBuilder.h" 94 #include "llvm/IR/Module.h" 95 #include "llvm/IR/PassManager.h" 96 #include "llvm/IR/ProfDataUtils.h" 97 #include "llvm/IR/ProfileSummary.h" 98 #include "llvm/IR/Type.h" 99 #include "llvm/IR/Value.h" 100 #include "llvm/ProfileData/InstrProf.h" 101 #include "llvm/ProfileData/InstrProfReader.h" 102 #include "llvm/Support/BLAKE3.h" 103 #include "llvm/Support/BranchProbability.h" 104 #include "llvm/Support/CRC.h" 105 #include "llvm/Support/Casting.h" 106 #include "llvm/Support/CommandLine.h" 107 #include "llvm/Support/DOTGraphTraits.h" 108 #include "llvm/Support/Debug.h" 109 #include "llvm/Support/Error.h" 110 #include "llvm/Support/ErrorHandling.h" 111 #include "llvm/Support/GraphWriter.h" 112 #include "llvm/Support/HashBuilder.h" 113 #include "llvm/Support/raw_ostream.h" 114 #include "llvm/Transforms/Instrumentation.h" 115 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 116 #include "llvm/Transforms/Utils/MisExpect.h" 117 #include "llvm/Transforms/Utils/ModuleUtils.h" 118 #include <algorithm> 119 #include <cassert> 120 #include <cstdint> 121 #include <map> 122 #include <memory> 123 #include <numeric> 124 #include <optional> 125 #include <set> 126 #include <string> 127 #include <unordered_map> 128 #include <utility> 129 #include <vector> 130 131 using namespace llvm; 132 using namespace llvm::memprof; 133 using ProfileCount = Function::ProfileCount; 134 using VPCandidateInfo = ValueProfileCollector::CandidateInfo; 135 136 #define DEBUG_TYPE "pgo-instrumentation" 137 138 STATISTIC(NumOfPGOInstrument, "Number of edges instrumented."); 139 STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented."); 140 STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented."); 141 STATISTIC(NumOfPGOEdge, "Number of edges."); 142 STATISTIC(NumOfPGOBB, "Number of basic-blocks."); 143 STATISTIC(NumOfPGOSplit, "Number of critical edge splits."); 144 STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts."); 145 STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile."); 146 STATISTIC(NumOfPGOMissing, "Number of functions without profile."); 147 STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile."); 148 STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations."); 149 STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO."); 150 STATISTIC(NumOfCSPGOSelectInsts, 151 "Number of select instruction instrumented in CSPGO."); 152 STATISTIC(NumOfCSPGOMemIntrinsics, 153 "Number of mem intrinsics instrumented in CSPGO."); 154 STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO."); 155 STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO."); 156 STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO."); 157 STATISTIC(NumOfCSPGOFunc, 158 "Number of functions having valid profile counts in CSPGO."); 159 STATISTIC(NumOfCSPGOMismatch, 160 "Number of functions having mismatch profile in CSPGO."); 161 STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO."); 162 163 // Command line option to specify the file to read profile from. This is 164 // mainly used for testing. 165 static cl::opt<std::string> 166 PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, 167 cl::value_desc("filename"), 168 cl::desc("Specify the path of profile data file. This is" 169 "mainly for test purpose.")); 170 static cl::opt<std::string> PGOTestProfileRemappingFile( 171 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, 172 cl::value_desc("filename"), 173 cl::desc("Specify the path of profile remapping file. This is mainly for " 174 "test purpose.")); 175 176 // Command line option to disable value profiling. The default is false: 177 // i.e. value profiling is enabled by default. This is for debug purpose. 178 static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false), 179 cl::Hidden, 180 cl::desc("Disable Value Profiling")); 181 182 // Command line option to set the maximum number of VP annotations to write to 183 // the metadata for a single indirect call callsite. 184 static cl::opt<unsigned> MaxNumAnnotations( 185 "icp-max-annotations", cl::init(3), cl::Hidden, 186 cl::desc("Max number of annotations for a single indirect " 187 "call callsite")); 188 189 // Command line option to set the maximum number of value annotations 190 // to write to the metadata for a single memop intrinsic. 191 static cl::opt<unsigned> MaxNumMemOPAnnotations( 192 "memop-max-annotations", cl::init(4), cl::Hidden, 193 cl::desc("Max number of preicise value annotations for a single memop" 194 "intrinsic")); 195 196 // Command line option to control appending FunctionHash to the name of a COMDAT 197 // function. This is to avoid the hash mismatch caused by the preinliner. 198 static cl::opt<bool> DoComdatRenaming( 199 "do-comdat-renaming", cl::init(false), cl::Hidden, 200 cl::desc("Append function hash to the name of COMDAT function to avoid " 201 "function hash mismatch due to the preinliner")); 202 203 // Command line option to enable/disable the warning about missing profile 204 // information. 205 static cl::opt<bool> 206 PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden, 207 cl::desc("Use this option to turn on/off " 208 "warnings about missing profile data for " 209 "functions.")); 210 211 namespace llvm { 212 // Command line option to enable/disable the warning about a hash mismatch in 213 // the profile data. 214 cl::opt<bool> 215 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden, 216 cl::desc("Use this option to turn off/on " 217 "warnings about profile cfg mismatch.")); 218 } // namespace llvm 219 220 // Command line option to enable/disable the warning about a hash mismatch in 221 // the profile data for Comdat functions, which often turns out to be false 222 // positive due to the pre-instrumentation inline. 223 static cl::opt<bool> NoPGOWarnMismatchComdatWeak( 224 "no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden, 225 cl::desc("The option is used to turn on/off " 226 "warnings about hash mismatch for comdat " 227 "or weak functions.")); 228 229 // Command line option to enable/disable select instruction instrumentation. 230 static cl::opt<bool> 231 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, 232 cl::desc("Use this option to turn on/off SELECT " 233 "instruction instrumentation. ")); 234 235 // Command line option to turn on CFG dot or text dump of raw profile counts 236 static cl::opt<PGOViewCountsType> PGOViewRawCounts( 237 "pgo-view-raw-counts", cl::Hidden, 238 cl::desc("A boolean option to show CFG dag or text " 239 "with raw profile counts from " 240 "profile data. See also option " 241 "-pgo-view-counts. To limit graph " 242 "display to only one function, use " 243 "filtering option -view-bfi-func-name."), 244 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), 245 clEnumValN(PGOVCT_Graph, "graph", "show a graph."), 246 clEnumValN(PGOVCT_Text, "text", "show in text."))); 247 248 // Command line option to enable/disable memop intrinsic call.size profiling. 249 static cl::opt<bool> 250 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, 251 cl::desc("Use this option to turn on/off " 252 "memory intrinsic size profiling.")); 253 254 // Emit branch probability as optimization remarks. 255 static cl::opt<bool> 256 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, 257 cl::desc("When this option is on, the annotated " 258 "branch probability will be emitted as " 259 "optimization remarks: -{Rpass|" 260 "pass-remarks}=pgo-instrumentation")); 261 262 static cl::opt<bool> PGOInstrumentEntry( 263 "pgo-instrument-entry", cl::init(false), cl::Hidden, 264 cl::desc("Force to instrument function entry basicblock.")); 265 266 static cl::opt<bool> PGOFunctionEntryCoverage( 267 "pgo-function-entry-coverage", cl::Hidden, 268 cl::desc( 269 "Use this option to enable function entry coverage instrumentation.")); 270 271 static cl::opt<bool> 272 PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, 273 cl::desc("Fix function entry count in profile use.")); 274 275 static cl::opt<bool> PGOVerifyHotBFI( 276 "pgo-verify-hot-bfi", cl::init(false), cl::Hidden, 277 cl::desc("Print out the non-match BFI count if a hot raw profile count " 278 "becomes non-hot, or a cold raw profile count becomes hot. " 279 "The print is enabled under -Rpass-analysis=pgo, or " 280 "internal option -pass-remakrs-analysis=pgo.")); 281 282 static cl::opt<bool> PGOVerifyBFI( 283 "pgo-verify-bfi", cl::init(false), cl::Hidden, 284 cl::desc("Print out mismatched BFI counts after setting profile metadata " 285 "The print is enabled under -Rpass-analysis=pgo, or " 286 "internal option -pass-remakrs-analysis=pgo.")); 287 288 static cl::opt<unsigned> PGOVerifyBFIRatio( 289 "pgo-verify-bfi-ratio", cl::init(2), cl::Hidden, 290 cl::desc("Set the threshold for pgo-verify-bfi: only print out " 291 "mismatched BFI if the difference percentage is greater than " 292 "this value (in percentage).")); 293 294 static cl::opt<unsigned> PGOVerifyBFICutoff( 295 "pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden, 296 cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " 297 "profile count value is below.")); 298 299 static cl::opt<std::string> PGOTraceFuncHash( 300 "pgo-trace-func-hash", cl::init("-"), cl::Hidden, 301 cl::value_desc("function name"), 302 cl::desc("Trace the hash of the function with this name.")); 303 304 static cl::opt<unsigned> PGOFunctionSizeThreshold( 305 "pgo-function-size-threshold", cl::Hidden, 306 cl::desc("Do not instrument functions smaller than this threshold.")); 307 308 static cl::opt<bool> MatchMemProf( 309 "pgo-match-memprof", cl::init(true), cl::Hidden, 310 cl::desc("Perform matching and annotation of memprof profiles.")); 311 312 static cl::opt<unsigned> PGOFunctionCriticalEdgeThreshold( 313 "pgo-critical-edge-threshold", cl::init(20000), cl::Hidden, 314 cl::desc("Do not instrument functions with the number of critical edges " 315 " greater than this threshold.")); 316 317 namespace llvm { 318 // Command line option to turn on CFG dot dump after profile annotation. 319 // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts 320 extern cl::opt<PGOViewCountsType> PGOViewCounts; 321 322 // Command line option to specify the name of the function for CFG dump 323 // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= 324 extern cl::opt<std::string> ViewBlockFreqFuncName; 325 326 extern cl::opt<bool> DebugInfoCorrelate; 327 } // namespace llvm 328 329 static cl::opt<bool> 330 PGOOldCFGHashing("pgo-instr-old-cfg-hashing", cl::init(false), cl::Hidden, 331 cl::desc("Use the old CFG function hashing")); 332 333 // Return a string describing the branch condition that can be 334 // used in static branch probability heuristics: 335 static std::string getBranchCondString(Instruction *TI) { 336 BranchInst *BI = dyn_cast<BranchInst>(TI); 337 if (!BI || !BI->isConditional()) 338 return std::string(); 339 340 Value *Cond = BI->getCondition(); 341 ICmpInst *CI = dyn_cast<ICmpInst>(Cond); 342 if (!CI) 343 return std::string(); 344 345 std::string result; 346 raw_string_ostream OS(result); 347 OS << CmpInst::getPredicateName(CI->getPredicate()) << "_"; 348 CI->getOperand(0)->getType()->print(OS, true); 349 350 Value *RHS = CI->getOperand(1); 351 ConstantInt *CV = dyn_cast<ConstantInt>(RHS); 352 if (CV) { 353 if (CV->isZero()) 354 OS << "_Zero"; 355 else if (CV->isOne()) 356 OS << "_One"; 357 else if (CV->isMinusOne()) 358 OS << "_MinusOne"; 359 else 360 OS << "_Const"; 361 } 362 OS.flush(); 363 return result; 364 } 365 366 static const char *ValueProfKindDescr[] = { 367 #define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr, 368 #include "llvm/ProfileData/InstrProfData.inc" 369 }; 370 371 // Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime 372 // aware this is an ir_level profile so it can set the version flag. 373 static GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS) { 374 const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)); 375 Type *IntTy64 = Type::getInt64Ty(M.getContext()); 376 uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF); 377 if (IsCS) 378 ProfileVersion |= VARIANT_MASK_CSIR_PROF; 379 if (PGOInstrumentEntry) 380 ProfileVersion |= VARIANT_MASK_INSTR_ENTRY; 381 if (DebugInfoCorrelate) 382 ProfileVersion |= VARIANT_MASK_DBG_CORRELATE; 383 if (PGOFunctionEntryCoverage) 384 ProfileVersion |= 385 VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY; 386 auto IRLevelVersionVariable = new GlobalVariable( 387 M, IntTy64, true, GlobalValue::WeakAnyLinkage, 388 Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName); 389 IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility); 390 Triple TT(M.getTargetTriple()); 391 if (TT.supportsCOMDAT()) { 392 IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage); 393 IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName)); 394 } 395 return IRLevelVersionVariable; 396 } 397 398 namespace { 399 400 /// The select instruction visitor plays three roles specified 401 /// by the mode. In \c VM_counting mode, it simply counts the number of 402 /// select instructions. In \c VM_instrument mode, it inserts code to count 403 /// the number times TrueValue of select is taken. In \c VM_annotate mode, 404 /// it reads the profile data and annotate the select instruction with metadata. 405 enum VisitMode { VM_counting, VM_instrument, VM_annotate }; 406 class PGOUseFunc; 407 408 /// Instruction Visitor class to visit select instructions. 409 struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> { 410 Function &F; 411 unsigned NSIs = 0; // Number of select instructions instrumented. 412 VisitMode Mode = VM_counting; // Visiting mode. 413 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index. 414 unsigned TotalNumCtrs = 0; // Total number of counters 415 GlobalVariable *FuncNameVar = nullptr; 416 uint64_t FuncHash = 0; 417 PGOUseFunc *UseFunc = nullptr; 418 419 SelectInstVisitor(Function &Func) : F(Func) {} 420 421 void countSelects(Function &Func) { 422 NSIs = 0; 423 Mode = VM_counting; 424 visit(Func); 425 } 426 427 // Visit the IR stream and instrument all select instructions. \p 428 // Ind is a pointer to the counter index variable; \p TotalNC 429 // is the total number of counters; \p FNV is the pointer to the 430 // PGO function name var; \p FHash is the function hash. 431 void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC, 432 GlobalVariable *FNV, uint64_t FHash) { 433 Mode = VM_instrument; 434 CurCtrIdx = Ind; 435 TotalNumCtrs = TotalNC; 436 FuncHash = FHash; 437 FuncNameVar = FNV; 438 visit(Func); 439 } 440 441 // Visit the IR stream and annotate all select instructions. 442 void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) { 443 Mode = VM_annotate; 444 UseFunc = UF; 445 CurCtrIdx = Ind; 446 visit(Func); 447 } 448 449 void instrumentOneSelectInst(SelectInst &SI); 450 void annotateOneSelectInst(SelectInst &SI); 451 452 // Visit \p SI instruction and perform tasks according to visit mode. 453 void visitSelectInst(SelectInst &SI); 454 455 // Return the number of select instructions. This needs be called after 456 // countSelects(). 457 unsigned getNumOfSelectInsts() const { return NSIs; } 458 }; 459 460 } // end anonymous namespace 461 462 namespace { 463 464 /// An MST based instrumentation for PGO 465 /// 466 /// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO 467 /// in the function level. 468 struct PGOEdge { 469 // This class implements the CFG edges. Note the CFG can be a multi-graph. 470 // So there might be multiple edges with same SrcBB and DestBB. 471 const BasicBlock *SrcBB; 472 const BasicBlock *DestBB; 473 uint64_t Weight; 474 bool InMST = false; 475 bool Removed = false; 476 bool IsCritical = false; 477 478 PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) 479 : SrcBB(Src), DestBB(Dest), Weight(W) {} 480 481 // Return the information string of an edge. 482 std::string infoString() const { 483 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") + 484 (IsCritical ? "c" : " ") + " W=" + Twine(Weight)).str(); 485 } 486 }; 487 488 // This class stores the auxiliary information for each BB. 489 struct BBInfo { 490 BBInfo *Group; 491 uint32_t Index; 492 uint32_t Rank = 0; 493 494 BBInfo(unsigned IX) : Group(this), Index(IX) {} 495 496 // Return the information string of this object. 497 std::string infoString() const { 498 return (Twine("Index=") + Twine(Index)).str(); 499 } 500 501 // Empty function -- only applicable to UseBBInfo. 502 void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} 503 504 // Empty function -- only applicable to UseBBInfo. 505 void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} 506 }; 507 508 // This class implements the CFG edges. Note the CFG can be a multi-graph. 509 template <class Edge, class BBInfo> class FuncPGOInstrumentation { 510 private: 511 Function &F; 512 513 // Is this is context-sensitive instrumentation. 514 bool IsCS; 515 516 // A map that stores the Comdat group in function F. 517 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers; 518 519 ValueProfileCollector VPC; 520 521 void computeCFGHash(); 522 void renameComdatFunction(); 523 524 public: 525 const TargetLibraryInfo &TLI; 526 std::vector<std::vector<VPCandidateInfo>> ValueSites; 527 SelectInstVisitor SIVisitor; 528 std::string FuncName; 529 GlobalVariable *FuncNameVar; 530 531 // CFG hash value for this function. 532 uint64_t FunctionHash = 0; 533 534 // The Minimum Spanning Tree of function CFG. 535 CFGMST<Edge, BBInfo> MST; 536 537 // Collect all the BBs that will be instrumented, and store them in 538 // InstrumentBBs. 539 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs); 540 541 // Give an edge, find the BB that will be instrumented. 542 // Return nullptr if there is no BB to be instrumented. 543 BasicBlock *getInstrBB(Edge *E); 544 545 // Return the auxiliary BB information. 546 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); } 547 548 // Return the auxiliary BB information if available. 549 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); } 550 551 // Dump edges and BB information. 552 void dumpInfo(std::string Str = "") const { 553 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " + 554 Twine(FunctionHash) + "\t" + Str); 555 } 556 557 FuncPGOInstrumentation( 558 Function &Func, TargetLibraryInfo &TLI, 559 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 560 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, 561 BlockFrequencyInfo *BFI = nullptr, bool IsCS = false, 562 bool InstrumentFuncEntry = true) 563 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI), 564 TLI(TLI), ValueSites(IPVK_Last + 1), SIVisitor(Func), 565 MST(F, InstrumentFuncEntry, BPI, BFI) { 566 // This should be done before CFG hash computation. 567 SIVisitor.countSelects(Func); 568 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize); 569 if (!IsCS) { 570 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 571 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 572 NumOfPGOBB += MST.BBInfos.size(); 573 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget); 574 } else { 575 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 576 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 577 NumOfCSPGOBB += MST.BBInfos.size(); 578 } 579 580 FuncName = getPGOFuncName(F); 581 computeCFGHash(); 582 if (!ComdatMembers.empty()) 583 renameComdatFunction(); 584 LLVM_DEBUG(dumpInfo("after CFGMST")); 585 586 for (auto &E : MST.AllEdges) { 587 if (E->Removed) 588 continue; 589 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++; 590 if (!E->InMST) 591 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++; 592 } 593 594 if (CreateGlobalVar) 595 FuncNameVar = createPGOFuncNameVar(F, FuncName); 596 } 597 }; 598 599 } // end anonymous namespace 600 601 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index 602 // value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers 603 // of selects, indirect calls, mem ops and edges. 604 template <class Edge, class BBInfo> 605 void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { 606 std::vector<uint8_t> Indexes; 607 JamCRC JC; 608 for (auto &BB : F) { 609 const Instruction *TI = BB.getTerminator(); 610 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { 611 BasicBlock *Succ = TI->getSuccessor(I); 612 auto BI = findBBInfo(Succ); 613 if (BI == nullptr) 614 continue; 615 uint32_t Index = BI->Index; 616 for (int J = 0; J < 4; J++) 617 Indexes.push_back((uint8_t)(Index >> (J * 8))); 618 } 619 } 620 JC.update(Indexes); 621 622 JamCRC JCH; 623 if (PGOOldCFGHashing) { 624 // Hash format for context sensitive profile. Reserve 4 bits for other 625 // information. 626 FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 | 627 (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 | 628 //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 | 629 (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC(); 630 } else { 631 // The higher 32 bits. 632 auto updateJCH = [&JCH](uint64_t Num) { 633 uint8_t Data[8]; 634 support::endian::write64le(Data, Num); 635 JCH.update(Data); 636 }; 637 updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts()); 638 updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size()); 639 updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size()); 640 updateJCH((uint64_t)MST.AllEdges.size()); 641 642 // Hash format for context sensitive profile. Reserve 4 bits for other 643 // information. 644 FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC(); 645 } 646 647 // Reserve bit 60-63 for other information purpose. 648 FunctionHash &= 0x0FFFFFFFFFFFFFFF; 649 if (IsCS) 650 NamedInstrProfRecord::setCSFlagInHash(FunctionHash); 651 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n" 652 << " CRC = " << JC.getCRC() 653 << ", Selects = " << SIVisitor.getNumOfSelectInsts() 654 << ", Edges = " << MST.AllEdges.size() << ", ICSites = " 655 << ValueSites[IPVK_IndirectCallTarget].size()); 656 if (!PGOOldCFGHashing) { 657 LLVM_DEBUG(dbgs() << ", Memops = " << ValueSites[IPVK_MemOPSize].size() 658 << ", High32 CRC = " << JCH.getCRC()); 659 } 660 LLVM_DEBUG(dbgs() << ", Hash = " << FunctionHash << "\n";); 661 662 if (PGOTraceFuncHash != "-" && F.getName().contains(PGOTraceFuncHash)) 663 dbgs() << "Funcname=" << F.getName() << ", Hash=" << FunctionHash 664 << " in building " << F.getParent()->getSourceFileName() << "\n"; 665 } 666 667 // Check if we can safely rename this Comdat function. 668 static bool canRenameComdat( 669 Function &F, 670 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 671 if (!DoComdatRenaming || !canRenameComdatFunc(F, true)) 672 return false; 673 674 // FIXME: Current only handle those Comdat groups that only containing one 675 // function. 676 // (1) For a Comdat group containing multiple functions, we need to have a 677 // unique postfix based on the hashes for each function. There is a 678 // non-trivial code refactoring to do this efficiently. 679 // (2) Variables can not be renamed, so we can not rename Comdat function in a 680 // group including global vars. 681 Comdat *C = F.getComdat(); 682 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) { 683 assert(!isa<GlobalAlias>(CM.second)); 684 Function *FM = dyn_cast<Function>(CM.second); 685 if (FM != &F) 686 return false; 687 } 688 return true; 689 } 690 691 // Append the CFGHash to the Comdat function name. 692 template <class Edge, class BBInfo> 693 void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() { 694 if (!canRenameComdat(F, ComdatMembers)) 695 return; 696 std::string OrigName = F.getName().str(); 697 std::string NewFuncName = 698 Twine(F.getName() + "." + Twine(FunctionHash)).str(); 699 F.setName(Twine(NewFuncName)); 700 GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F); 701 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str(); 702 Comdat *NewComdat; 703 Module *M = F.getParent(); 704 // For AvailableExternallyLinkage functions, change the linkage to 705 // LinkOnceODR and put them into comdat. This is because after renaming, there 706 // is no backup external copy available for the function. 707 if (!F.hasComdat()) { 708 assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage); 709 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName)); 710 F.setLinkage(GlobalValue::LinkOnceODRLinkage); 711 F.setComdat(NewComdat); 712 return; 713 } 714 715 // This function belongs to a single function Comdat group. 716 Comdat *OrigComdat = F.getComdat(); 717 std::string NewComdatName = 718 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str(); 719 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName)); 720 NewComdat->setSelectionKind(OrigComdat->getSelectionKind()); 721 722 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) { 723 // Must be a function. 724 cast<Function>(CM.second)->setComdat(NewComdat); 725 } 726 } 727 728 // Collect all the BBs that will be instruments and return them in 729 // InstrumentBBs and setup InEdges/OutEdge for UseBBInfo. 730 template <class Edge, class BBInfo> 731 void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs( 732 std::vector<BasicBlock *> &InstrumentBBs) { 733 // Use a worklist as we will update the vector during the iteration. 734 std::vector<Edge *> EdgeList; 735 EdgeList.reserve(MST.AllEdges.size()); 736 for (auto &E : MST.AllEdges) 737 EdgeList.push_back(E.get()); 738 739 for (auto &E : EdgeList) { 740 BasicBlock *InstrBB = getInstrBB(E); 741 if (InstrBB) 742 InstrumentBBs.push_back(InstrBB); 743 } 744 745 // Set up InEdges/OutEdges for all BBs. 746 for (auto &E : MST.AllEdges) { 747 if (E->Removed) 748 continue; 749 const BasicBlock *SrcBB = E->SrcBB; 750 const BasicBlock *DestBB = E->DestBB; 751 BBInfo &SrcInfo = getBBInfo(SrcBB); 752 BBInfo &DestInfo = getBBInfo(DestBB); 753 SrcInfo.addOutEdge(E.get()); 754 DestInfo.addInEdge(E.get()); 755 } 756 } 757 758 // Given a CFG E to be instrumented, find which BB to place the instrumented 759 // code. The function will split the critical edge if necessary. 760 template <class Edge, class BBInfo> 761 BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) { 762 if (E->InMST || E->Removed) 763 return nullptr; 764 765 BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB); 766 BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB); 767 // For a fake edge, instrument the real BB. 768 if (SrcBB == nullptr) 769 return DestBB; 770 if (DestBB == nullptr) 771 return SrcBB; 772 773 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * { 774 // There are basic blocks (such as catchswitch) cannot be instrumented. 775 // If the returned first insertion point is the end of BB, skip this BB. 776 if (BB->getFirstInsertionPt() == BB->end()) 777 return nullptr; 778 return BB; 779 }; 780 781 // Instrument the SrcBB if it has a single successor, 782 // otherwise, the DestBB if this is not a critical edge. 783 Instruction *TI = SrcBB->getTerminator(); 784 if (TI->getNumSuccessors() <= 1) 785 return canInstrument(SrcBB); 786 if (!E->IsCritical) 787 return canInstrument(DestBB); 788 789 // Some IndirectBr critical edges cannot be split by the previous 790 // SplitIndirectBrCriticalEdges call. Bail out. 791 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 792 BasicBlock *InstrBB = 793 isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum); 794 if (!InstrBB) { 795 LLVM_DEBUG( 796 dbgs() << "Fail to split critical edge: not instrument this edge.\n"); 797 return nullptr; 798 } 799 // For a critical edge, we have to split. Instrument the newly 800 // created BB. 801 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++; 802 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index 803 << " --> " << getBBInfo(DestBB).Index << "\n"); 804 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB. 805 MST.addEdge(SrcBB, InstrBB, 0); 806 // Second one: Add new edge of InstrBB->DestBB. 807 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0); 808 NewEdge1.InMST = true; 809 E->Removed = true; 810 811 return canInstrument(InstrBB); 812 } 813 814 // When generating value profiling calls on Windows routines that make use of 815 // handler funclets for exception processing an operand bundle needs to attached 816 // to the called function. This routine will set \p OpBundles to contain the 817 // funclet information, if any is needed, that should be placed on the generated 818 // value profiling call for the value profile candidate call. 819 static void 820 populateEHOperandBundle(VPCandidateInfo &Cand, 821 DenseMap<BasicBlock *, ColorVector> &BlockColors, 822 SmallVectorImpl<OperandBundleDef> &OpBundles) { 823 auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst); 824 if (!OrigCall) 825 return; 826 827 if (!isa<IntrinsicInst>(OrigCall)) { 828 // The instrumentation call should belong to the same funclet as a 829 // non-intrinsic call, so just copy the operand bundle, if any exists. 830 std::optional<OperandBundleUse> ParentFunclet = 831 OrigCall->getOperandBundle(LLVMContext::OB_funclet); 832 if (ParentFunclet) 833 OpBundles.emplace_back(OperandBundleDef(*ParentFunclet)); 834 } else { 835 // Intrinsics or other instructions do not get funclet information from the 836 // front-end. Need to use the BlockColors that was computed by the routine 837 // colorEHFunclets to determine whether a funclet is needed. 838 if (!BlockColors.empty()) { 839 const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second; 840 assert(CV.size() == 1 && "non-unique color for block!"); 841 Instruction *EHPad = CV.front()->getFirstNonPHI(); 842 if (EHPad->isEHPad()) 843 OpBundles.emplace_back("funclet", EHPad); 844 } 845 } 846 } 847 848 // Visit all edge and instrument the edges not in MST, and do value profiling. 849 // Critical edges will be split. 850 static void instrumentOneFunc( 851 Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI, 852 BlockFrequencyInfo *BFI, 853 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 854 bool IsCS) { 855 // Split indirectbr critical edges here before computing the MST rather than 856 // later in getInstrBB() to avoid invalidating it. 857 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI); 858 859 FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo( 860 F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry); 861 862 Type *I8PtrTy = Type::getInt8PtrTy(M->getContext()); 863 auto Name = ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy); 864 auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()), 865 FuncInfo.FunctionHash); 866 if (PGOFunctionEntryCoverage) { 867 auto &EntryBB = F.getEntryBlock(); 868 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt()); 869 // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>, 870 // i32 <index>) 871 Builder.CreateCall( 872 Intrinsic::getDeclaration(M, Intrinsic::instrprof_cover), 873 {Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)}); 874 return; 875 } 876 877 std::vector<BasicBlock *> InstrumentBBs; 878 FuncInfo.getInstrumentBBs(InstrumentBBs); 879 unsigned NumCounters = 880 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 881 882 uint32_t I = 0; 883 for (auto *InstrBB : InstrumentBBs) { 884 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt()); 885 assert(Builder.GetInsertPoint() != InstrBB->end() && 886 "Cannot get the Instrumentation point"); 887 // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>, 888 // i32 <index>) 889 Builder.CreateCall( 890 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment), 891 {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)}); 892 } 893 894 // Now instrument select instructions: 895 FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar, 896 FuncInfo.FunctionHash); 897 assert(I == NumCounters); 898 899 if (DisableValueProfiling) 900 return; 901 902 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size(); 903 904 // Intrinsic function calls do not have funclet operand bundles needed for 905 // Windows exception handling attached to them. However, if value profiling is 906 // inserted for one of these calls, then a funclet value will need to be set 907 // on the instrumentation call based on the funclet coloring. 908 DenseMap<BasicBlock *, ColorVector> BlockColors; 909 if (F.hasPersonalityFn() && 910 isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) 911 BlockColors = colorEHFunclets(F); 912 913 // For each VP Kind, walk the VP candidates and instrument each one. 914 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) { 915 unsigned SiteIndex = 0; 916 if (Kind == IPVK_MemOPSize && !PGOInstrMemOP) 917 continue; 918 919 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) { 920 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind] 921 << " site: CallSite Index = " << SiteIndex << "\n"); 922 923 IRBuilder<> Builder(Cand.InsertPt); 924 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() && 925 "Cannot get the Instrumentation point"); 926 927 Value *ToProfile = nullptr; 928 if (Cand.V->getType()->isIntegerTy()) 929 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty()); 930 else if (Cand.V->getType()->isPointerTy()) 931 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty()); 932 assert(ToProfile && "value profiling Value is of unexpected type"); 933 934 SmallVector<OperandBundleDef, 1> OpBundles; 935 populateEHOperandBundle(Cand, BlockColors, OpBundles); 936 Builder.CreateCall( 937 Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), 938 {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), 939 Builder.getInt64(FuncInfo.FunctionHash), ToProfile, 940 Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)}, 941 OpBundles); 942 } 943 } // IPVK_First <= Kind <= IPVK_Last 944 } 945 946 namespace { 947 948 // This class represents a CFG edge in profile use compilation. 949 struct PGOUseEdge : public PGOEdge { 950 bool CountValid = false; 951 uint64_t CountValue = 0; 952 953 PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) 954 : PGOEdge(Src, Dest, W) {} 955 956 // Set edge count value 957 void setEdgeCount(uint64_t Value) { 958 CountValue = Value; 959 CountValid = true; 960 } 961 962 // Return the information string for this object. 963 std::string infoString() const { 964 if (!CountValid) 965 return PGOEdge::infoString(); 966 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue)) 967 .str(); 968 } 969 }; 970 971 using DirectEdges = SmallVector<PGOUseEdge *, 2>; 972 973 // This class stores the auxiliary information for each BB. 974 struct UseBBInfo : public BBInfo { 975 uint64_t CountValue = 0; 976 bool CountValid; 977 int32_t UnknownCountInEdge = 0; 978 int32_t UnknownCountOutEdge = 0; 979 DirectEdges InEdges; 980 DirectEdges OutEdges; 981 982 UseBBInfo(unsigned IX) : BBInfo(IX), CountValid(false) {} 983 984 UseBBInfo(unsigned IX, uint64_t C) 985 : BBInfo(IX), CountValue(C), CountValid(true) {} 986 987 // Set the profile count value for this BB. 988 void setBBInfoCount(uint64_t Value) { 989 CountValue = Value; 990 CountValid = true; 991 } 992 993 // Return the information string of this object. 994 std::string infoString() const { 995 if (!CountValid) 996 return BBInfo::infoString(); 997 return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str(); 998 } 999 1000 // Add an OutEdge and update the edge count. 1001 void addOutEdge(PGOUseEdge *E) { 1002 OutEdges.push_back(E); 1003 UnknownCountOutEdge++; 1004 } 1005 1006 // Add an InEdge and update the edge count. 1007 void addInEdge(PGOUseEdge *E) { 1008 InEdges.push_back(E); 1009 UnknownCountInEdge++; 1010 } 1011 }; 1012 1013 } // end anonymous namespace 1014 1015 // Sum up the count values for all the edges. 1016 static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) { 1017 uint64_t Total = 0; 1018 for (const auto &E : Edges) { 1019 if (E->Removed) 1020 continue; 1021 Total += E->CountValue; 1022 } 1023 return Total; 1024 } 1025 1026 namespace { 1027 1028 class PGOUseFunc { 1029 public: 1030 PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI, 1031 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 1032 BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin, 1033 ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry) 1034 : F(Func), M(Modu), BFI(BFIin), PSI(PSI), 1035 FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS, 1036 InstrumentFuncEntry), 1037 FreqAttr(FFA_Normal), IsCS(IsCS) {} 1038 1039 // Read counts for the instrumented BB from profile. 1040 bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, 1041 InstrProfRecord::CountPseudoKind &PseudoKind); 1042 1043 // Read memprof data for the instrumented function from profile. 1044 bool readMemprof(IndexedInstrProfReader *PGOReader); 1045 1046 // Populate the counts for all BBs. 1047 void populateCounters(); 1048 1049 // Set the branch weights based on the count values. 1050 void setBranchWeights(); 1051 1052 // Annotate the value profile call sites for all value kind. 1053 void annotateValueSites(); 1054 1055 // Annotate the value profile call sites for one value kind. 1056 void annotateValueSites(uint32_t Kind); 1057 1058 // Annotate the irreducible loop header weights. 1059 void annotateIrrLoopHeaderWeights(); 1060 1061 // The hotness of the function from the profile count. 1062 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot }; 1063 1064 // Return the function hotness from the profile. 1065 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; } 1066 1067 // Return the function hash. 1068 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; } 1069 1070 // Return the profile record for this function; 1071 InstrProfRecord &getProfileRecord() { return ProfileRecord; } 1072 1073 // Return the auxiliary BB information. 1074 UseBBInfo &getBBInfo(const BasicBlock *BB) const { 1075 return FuncInfo.getBBInfo(BB); 1076 } 1077 1078 // Return the auxiliary BB information if available. 1079 UseBBInfo *findBBInfo(const BasicBlock *BB) const { 1080 return FuncInfo.findBBInfo(BB); 1081 } 1082 1083 Function &getFunc() const { return F; } 1084 1085 void dumpInfo(std::string Str = "") const { 1086 FuncInfo.dumpInfo(Str); 1087 } 1088 1089 uint64_t getProgramMaxCount() const { return ProgramMaxCount; } 1090 private: 1091 Function &F; 1092 Module *M; 1093 BlockFrequencyInfo *BFI; 1094 ProfileSummaryInfo *PSI; 1095 1096 // This member stores the shared information with class PGOGenFunc. 1097 FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo; 1098 1099 // The maximum count value in the profile. This is only used in PGO use 1100 // compilation. 1101 uint64_t ProgramMaxCount; 1102 1103 // Position of counter that remains to be read. 1104 uint32_t CountPosition = 0; 1105 1106 // Total size of the profile count for this function. 1107 uint32_t ProfileCountSize = 0; 1108 1109 // ProfileRecord for this function. 1110 InstrProfRecord ProfileRecord; 1111 1112 // Function hotness info derived from profile. 1113 FuncFreqAttr FreqAttr; 1114 1115 // Is to use the context sensitive profile. 1116 bool IsCS; 1117 1118 // Find the Instrumented BB and set the value. Return false on error. 1119 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile); 1120 1121 // Set the edge counter value for the unknown edge -- there should be only 1122 // one unknown edge. 1123 void setEdgeCount(DirectEdges &Edges, uint64_t Value); 1124 1125 // Return FuncName string; 1126 std::string getFuncName() const { return FuncInfo.FuncName; } 1127 1128 // Set the hot/cold inline hints based on the count values. 1129 // FIXME: This function should be removed once the functionality in 1130 // the inliner is implemented. 1131 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) { 1132 if (PSI->isHotCount(EntryCount)) 1133 FreqAttr = FFA_Hot; 1134 else if (PSI->isColdCount(MaxCount)) 1135 FreqAttr = FFA_Cold; 1136 } 1137 }; 1138 1139 } // end anonymous namespace 1140 1141 // Visit all the edges and assign the count value for the instrumented 1142 // edges and the BB. Return false on error. 1143 bool PGOUseFunc::setInstrumentedCounts( 1144 const std::vector<uint64_t> &CountFromProfile) { 1145 1146 std::vector<BasicBlock *> InstrumentBBs; 1147 FuncInfo.getInstrumentBBs(InstrumentBBs); 1148 unsigned NumCounters = 1149 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 1150 // The number of counters here should match the number of counters 1151 // in profile. Return if they mismatch. 1152 if (NumCounters != CountFromProfile.size()) { 1153 return false; 1154 } 1155 auto *FuncEntry = &*F.begin(); 1156 1157 // Set the profile count to the Instrumented BBs. 1158 uint32_t I = 0; 1159 for (BasicBlock *InstrBB : InstrumentBBs) { 1160 uint64_t CountValue = CountFromProfile[I++]; 1161 UseBBInfo &Info = getBBInfo(InstrBB); 1162 // If we reach here, we know that we have some nonzero count 1163 // values in this function. The entry count should not be 0. 1164 // Fix it if necessary. 1165 if (InstrBB == FuncEntry && CountValue == 0) 1166 CountValue = 1; 1167 Info.setBBInfoCount(CountValue); 1168 } 1169 ProfileCountSize = CountFromProfile.size(); 1170 CountPosition = I; 1171 1172 // Set the edge count and update the count of unknown edges for BBs. 1173 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void { 1174 E->setEdgeCount(Value); 1175 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1176 this->getBBInfo(E->DestBB).UnknownCountInEdge--; 1177 }; 1178 1179 // Set the profile count the Instrumented edges. There are BBs that not in 1180 // MST but not instrumented. Need to set the edge count value so that we can 1181 // populate the profile counts later. 1182 for (auto &E : FuncInfo.MST.AllEdges) { 1183 if (E->Removed || E->InMST) 1184 continue; 1185 const BasicBlock *SrcBB = E->SrcBB; 1186 UseBBInfo &SrcInfo = getBBInfo(SrcBB); 1187 1188 // If only one out-edge, the edge profile count should be the same as BB 1189 // profile count. 1190 if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1) 1191 setEdgeCount(E.get(), SrcInfo.CountValue); 1192 else { 1193 const BasicBlock *DestBB = E->DestBB; 1194 UseBBInfo &DestInfo = getBBInfo(DestBB); 1195 // If only one in-edge, the edge profile count should be the same as BB 1196 // profile count. 1197 if (DestInfo.CountValid && DestInfo.InEdges.size() == 1) 1198 setEdgeCount(E.get(), DestInfo.CountValue); 1199 } 1200 if (E->CountValid) 1201 continue; 1202 // E's count should have been set from profile. If not, this meenas E skips 1203 // the instrumentation. We set the count to 0. 1204 setEdgeCount(E.get(), 0); 1205 } 1206 return true; 1207 } 1208 1209 // Set the count value for the unknown edge. There should be one and only one 1210 // unknown edge in Edges vector. 1211 void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) { 1212 for (auto &E : Edges) { 1213 if (E->CountValid) 1214 continue; 1215 E->setEdgeCount(Value); 1216 1217 getBBInfo(E->SrcBB).UnknownCountOutEdge--; 1218 getBBInfo(E->DestBB).UnknownCountInEdge--; 1219 return; 1220 } 1221 llvm_unreachable("Cannot find the unknown count edge"); 1222 } 1223 1224 // Emit function metadata indicating PGO profile mismatch. 1225 static void annotateFunctionWithHashMismatch(Function &F, 1226 LLVMContext &ctx) { 1227 const char MetadataName[] = "instr_prof_hash_mismatch"; 1228 SmallVector<Metadata *, 2> Names; 1229 // If this metadata already exists, ignore. 1230 auto *Existing = F.getMetadata(LLVMContext::MD_annotation); 1231 if (Existing) { 1232 MDTuple *Tuple = cast<MDTuple>(Existing); 1233 for (const auto &N : Tuple->operands()) { 1234 if (cast<MDString>(N.get())->getString() == MetadataName) 1235 return; 1236 Names.push_back(N.get()); 1237 } 1238 } 1239 1240 MDBuilder MDB(ctx); 1241 Names.push_back(MDB.createString(MetadataName)); 1242 MDNode *MD = MDTuple::get(ctx, Names); 1243 F.setMetadata(LLVMContext::MD_annotation, MD); 1244 } 1245 1246 static void addCallsiteMetadata(Instruction &I, 1247 std::vector<uint64_t> &InlinedCallStack, 1248 LLVMContext &Ctx) { 1249 I.setMetadata(LLVMContext::MD_callsite, 1250 buildCallstackMetadata(InlinedCallStack, Ctx)); 1251 } 1252 1253 static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset, 1254 uint32_t Column) { 1255 llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::support::endianness::little> 1256 HashBuilder; 1257 HashBuilder.add(Function, LineOffset, Column); 1258 llvm::BLAKE3Result<8> Hash = HashBuilder.final(); 1259 uint64_t Id; 1260 std::memcpy(&Id, Hash.data(), sizeof(Hash)); 1261 return Id; 1262 } 1263 1264 static uint64_t computeStackId(const memprof::Frame &Frame) { 1265 return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column); 1266 } 1267 1268 static void addCallStack(CallStackTrie &AllocTrie, 1269 const AllocationInfo *AllocInfo) { 1270 SmallVector<uint64_t> StackIds; 1271 for (auto StackFrame : AllocInfo->CallStack) 1272 StackIds.push_back(computeStackId(StackFrame)); 1273 auto AllocType = getAllocType(AllocInfo->Info.getMaxAccessCount(), 1274 AllocInfo->Info.getMinSize(), 1275 AllocInfo->Info.getMinLifetime()); 1276 AllocTrie.addCallStack(AllocType, StackIds); 1277 } 1278 1279 // Helper to compare the InlinedCallStack computed from an instruction's debug 1280 // info to a list of Frames from profile data (either the allocation data or a 1281 // callsite). For callsites, the StartIndex to use in the Frame array may be 1282 // non-zero. 1283 static bool 1284 stackFrameIncludesInlinedCallStack(ArrayRef<Frame> ProfileCallStack, 1285 ArrayRef<uint64_t> InlinedCallStack, 1286 unsigned StartIndex = 0) { 1287 auto StackFrame = ProfileCallStack.begin() + StartIndex; 1288 auto InlCallStackIter = InlinedCallStack.begin(); 1289 for (; StackFrame != ProfileCallStack.end() && 1290 InlCallStackIter != InlinedCallStack.end(); 1291 ++StackFrame, ++InlCallStackIter) { 1292 uint64_t StackId = computeStackId(*StackFrame); 1293 if (StackId != *InlCallStackIter) 1294 return false; 1295 } 1296 // Return true if we found and matched all stack ids from the call 1297 // instruction. 1298 return InlCallStackIter == InlinedCallStack.end(); 1299 } 1300 1301 bool PGOUseFunc::readMemprof(IndexedInstrProfReader *PGOReader) { 1302 if (!MatchMemProf) 1303 return true; 1304 1305 auto &Ctx = M->getContext(); 1306 1307 auto FuncGUID = Function::getGUID(FuncInfo.FuncName); 1308 Expected<memprof::MemProfRecord> MemProfResult = 1309 PGOReader->getMemProfRecord(FuncGUID); 1310 if (Error E = MemProfResult.takeError()) { 1311 handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { 1312 auto Err = IPE.get(); 1313 bool SkipWarning = false; 1314 LLVM_DEBUG(dbgs() << "Error in reading profile for Func " 1315 << FuncInfo.FuncName << ": "); 1316 if (Err == instrprof_error::unknown_function) { 1317 NumOfMemProfMissing++; 1318 SkipWarning = !PGOWarnMissing; 1319 LLVM_DEBUG(dbgs() << "unknown function"); 1320 } else if (Err == instrprof_error::hash_mismatch) { 1321 SkipWarning = 1322 NoPGOWarnMismatch || 1323 (NoPGOWarnMismatchComdatWeak && 1324 (F.hasComdat() || 1325 F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); 1326 LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); 1327 } 1328 1329 if (SkipWarning) 1330 return; 1331 1332 std::string Msg = 1333 (IPE.message() + Twine(" ") + F.getName().str() + Twine(" Hash = ") + 1334 std::to_string(FuncInfo.FunctionHash)) 1335 .str(); 1336 1337 Ctx.diagnose( 1338 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); 1339 }); 1340 return false; 1341 } 1342 1343 // Build maps of the location hash to all profile data with that leaf location 1344 // (allocation info and the callsites). 1345 std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo; 1346 // For the callsites we need to record the index of the associated frame in 1347 // the frame array (see comments below where the map entries are added). 1348 std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *, unsigned>>> 1349 LocHashToCallSites; 1350 const auto MemProfRec = std::move(MemProfResult.get()); 1351 for (auto &AI : MemProfRec.AllocSites) { 1352 // Associate the allocation info with the leaf frame. The later matching 1353 // code will match any inlined call sequences in the IR with a longer prefix 1354 // of call stack frames. 1355 uint64_t StackId = computeStackId(AI.CallStack[0]); 1356 LocHashToAllocInfo[StackId].insert(&AI); 1357 } 1358 for (auto &CS : MemProfRec.CallSites) { 1359 // Need to record all frames from leaf up to and including this function, 1360 // as any of these may or may not have been inlined at this point. 1361 unsigned Idx = 0; 1362 for (auto &StackFrame : CS) { 1363 uint64_t StackId = computeStackId(StackFrame); 1364 LocHashToCallSites[StackId].insert(std::make_pair(&CS, Idx++)); 1365 // Once we find this function, we can stop recording. 1366 if (StackFrame.Function == FuncGUID) 1367 break; 1368 } 1369 assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID); 1370 } 1371 1372 auto GetOffset = [](const DILocation *DIL) { 1373 return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) & 1374 0xffff; 1375 }; 1376 1377 // Now walk the instructions, looking up the associated profile data using 1378 // dbug locations. 1379 for (auto &BB : F) { 1380 for (auto &I : BB) { 1381 if (I.isDebugOrPseudoInst()) 1382 continue; 1383 // We are only interested in calls (allocation or interior call stack 1384 // context calls). 1385 auto *CI = dyn_cast<CallBase>(&I); 1386 if (!CI) 1387 continue; 1388 auto *CalledFunction = CI->getCalledFunction(); 1389 if (CalledFunction && CalledFunction->isIntrinsic()) 1390 continue; 1391 // List of call stack ids computed from the location hashes on debug 1392 // locations (leaf to inlined at root). 1393 std::vector<uint64_t> InlinedCallStack; 1394 // Was the leaf location found in one of the profile maps? 1395 bool LeafFound = false; 1396 // If leaf was found in a map, iterators pointing to its location in both 1397 // of the maps. It might exist in neither, one, or both (the latter case 1398 // can happen because we don't currently have discriminators to 1399 // distinguish the case when a single line/col maps to both an allocation 1400 // and another callsite). 1401 std::map<uint64_t, std::set<const AllocationInfo *>>::iterator 1402 AllocInfoIter; 1403 std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *, 1404 unsigned>>>::iterator CallSitesIter; 1405 for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr; 1406 DIL = DIL->getInlinedAt()) { 1407 // Use C++ linkage name if possible. Need to compile with 1408 // -fdebug-info-for-profiling to get linkage name. 1409 StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName(); 1410 if (Name.empty()) 1411 Name = DIL->getScope()->getSubprogram()->getName(); 1412 auto CalleeGUID = Function::getGUID(Name); 1413 auto StackId = 1414 computeStackId(CalleeGUID, GetOffset(DIL), DIL->getColumn()); 1415 // LeafFound will only be false on the first iteration, since we either 1416 // set it true or break out of the loop below. 1417 if (!LeafFound) { 1418 AllocInfoIter = LocHashToAllocInfo.find(StackId); 1419 CallSitesIter = LocHashToCallSites.find(StackId); 1420 // Check if the leaf is in one of the maps. If not, no need to look 1421 // further at this call. 1422 if (AllocInfoIter == LocHashToAllocInfo.end() && 1423 CallSitesIter == LocHashToCallSites.end()) 1424 break; 1425 LeafFound = true; 1426 } 1427 InlinedCallStack.push_back(StackId); 1428 } 1429 // If leaf not in either of the maps, skip inst. 1430 if (!LeafFound) 1431 continue; 1432 1433 // First add !memprof metadata from allocation info, if we found the 1434 // instruction's leaf location in that map, and if the rest of the 1435 // instruction's locations match the prefix Frame locations on an 1436 // allocation context with the same leaf. 1437 if (AllocInfoIter != LocHashToAllocInfo.end()) { 1438 // Only consider allocations via new, to reduce unnecessary metadata, 1439 // since those are the only allocations that will be targeted initially. 1440 if (!isNewLikeFn(CI, &FuncInfo.TLI)) 1441 continue; 1442 // We may match this instruction's location list to multiple MIB 1443 // contexts. Add them to a Trie specialized for trimming the contexts to 1444 // the minimal needed to disambiguate contexts with unique behavior. 1445 CallStackTrie AllocTrie; 1446 for (auto *AllocInfo : AllocInfoIter->second) { 1447 // Check the full inlined call stack against this one. 1448 // If we found and thus matched all frames on the call, include 1449 // this MIB. 1450 if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack, 1451 InlinedCallStack)) 1452 addCallStack(AllocTrie, AllocInfo); 1453 } 1454 // We might not have matched any to the full inlined call stack. 1455 // But if we did, create and attach metadata, or a function attribute if 1456 // all contexts have identical profiled behavior. 1457 if (!AllocTrie.empty()) { 1458 // MemprofMDAttached will be false if a function attribute was 1459 // attached. 1460 bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI); 1461 assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof)); 1462 if (MemprofMDAttached) { 1463 // Add callsite metadata for the instruction's location list so that 1464 // it simpler later on to identify which part of the MIB contexts 1465 // are from this particular instruction (including during inlining, 1466 // when the callsite metdata will be updated appropriately). 1467 // FIXME: can this be changed to strip out the matching stack 1468 // context ids from the MIB contexts and not add any callsite 1469 // metadata here to save space? 1470 addCallsiteMetadata(I, InlinedCallStack, Ctx); 1471 } 1472 } 1473 continue; 1474 } 1475 1476 // Otherwise, add callsite metadata. If we reach here then we found the 1477 // instruction's leaf location in the callsites map and not the allocation 1478 // map. 1479 assert(CallSitesIter != LocHashToCallSites.end()); 1480 for (auto CallStackIdx : CallSitesIter->second) { 1481 // If we found and thus matched all frames on the call, create and 1482 // attach call stack metadata. 1483 if (stackFrameIncludesInlinedCallStack( 1484 *CallStackIdx.first, InlinedCallStack, CallStackIdx.second)) { 1485 addCallsiteMetadata(I, InlinedCallStack, Ctx); 1486 // Only need to find one with a matching call stack and add a single 1487 // callsite metadata. 1488 break; 1489 } 1490 } 1491 } 1492 } 1493 1494 return true; 1495 } 1496 1497 // Read the profile from ProfileFileName and assign the value to the 1498 // instrumented BB and the edges. This function also updates ProgramMaxCount. 1499 // Return true if the profile are successfully read, and false on errors. 1500 bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, 1501 InstrProfRecord::CountPseudoKind &PseudoKind) { 1502 auto &Ctx = M->getContext(); 1503 uint64_t MismatchedFuncSum = 0; 1504 Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord( 1505 FuncInfo.FuncName, FuncInfo.FunctionHash, &MismatchedFuncSum); 1506 if (Error E = Result.takeError()) { 1507 handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { 1508 auto Err = IPE.get(); 1509 bool SkipWarning = false; 1510 LLVM_DEBUG(dbgs() << "Error in reading profile for Func " 1511 << FuncInfo.FuncName << ": "); 1512 if (Err == instrprof_error::unknown_function) { 1513 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++; 1514 SkipWarning = !PGOWarnMissing; 1515 LLVM_DEBUG(dbgs() << "unknown function"); 1516 } else if (Err == instrprof_error::hash_mismatch || 1517 Err == instrprof_error::malformed) { 1518 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++; 1519 SkipWarning = 1520 NoPGOWarnMismatch || 1521 (NoPGOWarnMismatchComdatWeak && 1522 (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage || 1523 F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); 1524 LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash 1525 << " skip=" << SkipWarning << ")"); 1526 // Emit function metadata indicating PGO profile mismatch. 1527 annotateFunctionWithHashMismatch(F, M->getContext()); 1528 } 1529 1530 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n"); 1531 if (SkipWarning) 1532 return; 1533 1534 std::string Msg = 1535 IPE.message() + std::string(" ") + F.getName().str() + 1536 std::string(" Hash = ") + std::to_string(FuncInfo.FunctionHash) + 1537 std::string(" up to ") + std::to_string(MismatchedFuncSum) + 1538 std::string(" count discarded"); 1539 1540 Ctx.diagnose( 1541 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); 1542 }); 1543 return false; 1544 } 1545 ProfileRecord = std::move(Result.get()); 1546 PseudoKind = ProfileRecord.getCountPseudoKind(); 1547 if (PseudoKind != InstrProfRecord::NotPseudo) { 1548 return true; 1549 } 1550 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts; 1551 1552 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++; 1553 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n"); 1554 1555 uint64_t ValueSum = 0; 1556 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { 1557 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n"); 1558 ValueSum += CountFromProfile[I]; 1559 } 1560 AllZeros = (ValueSum == 0); 1561 1562 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n"); 1563 1564 getBBInfo(nullptr).UnknownCountOutEdge = 2; 1565 getBBInfo(nullptr).UnknownCountInEdge = 2; 1566 1567 if (!setInstrumentedCounts(CountFromProfile)) { 1568 LLVM_DEBUG( 1569 dbgs() << "Inconsistent number of counts, skipping this function"); 1570 Ctx.diagnose(DiagnosticInfoPGOProfile( 1571 M->getName().data(), 1572 Twine("Inconsistent number of counts in ") + F.getName().str() 1573 + Twine(": the profile may be stale or there is a function name collision."), 1574 DS_Warning)); 1575 return false; 1576 } 1577 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS); 1578 return true; 1579 } 1580 1581 // Populate the counters from instrumented BBs to all BBs. 1582 // In the end of this operation, all BBs should have a valid count value. 1583 void PGOUseFunc::populateCounters() { 1584 bool Changes = true; 1585 unsigned NumPasses = 0; 1586 while (Changes) { 1587 NumPasses++; 1588 Changes = false; 1589 1590 // For efficient traversal, it's better to start from the end as most 1591 // of the instrumented edges are at the end. 1592 for (auto &BB : reverse(F)) { 1593 UseBBInfo *Count = findBBInfo(&BB); 1594 if (Count == nullptr) 1595 continue; 1596 if (!Count->CountValid) { 1597 if (Count->UnknownCountOutEdge == 0) { 1598 Count->CountValue = sumEdgeCount(Count->OutEdges); 1599 Count->CountValid = true; 1600 Changes = true; 1601 } else if (Count->UnknownCountInEdge == 0) { 1602 Count->CountValue = sumEdgeCount(Count->InEdges); 1603 Count->CountValid = true; 1604 Changes = true; 1605 } 1606 } 1607 if (Count->CountValid) { 1608 if (Count->UnknownCountOutEdge == 1) { 1609 uint64_t Total = 0; 1610 uint64_t OutSum = sumEdgeCount(Count->OutEdges); 1611 // If the one of the successor block can early terminate (no-return), 1612 // we can end up with situation where out edge sum count is larger as 1613 // the source BB's count is collected by a post-dominated block. 1614 if (Count->CountValue > OutSum) 1615 Total = Count->CountValue - OutSum; 1616 setEdgeCount(Count->OutEdges, Total); 1617 Changes = true; 1618 } 1619 if (Count->UnknownCountInEdge == 1) { 1620 uint64_t Total = 0; 1621 uint64_t InSum = sumEdgeCount(Count->InEdges); 1622 if (Count->CountValue > InSum) 1623 Total = Count->CountValue - InSum; 1624 setEdgeCount(Count->InEdges, Total); 1625 Changes = true; 1626 } 1627 } 1628 } 1629 } 1630 1631 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n"); 1632 (void) NumPasses; 1633 #ifndef NDEBUG 1634 // Assert every BB has a valid counter. 1635 for (auto &BB : F) { 1636 auto BI = findBBInfo(&BB); 1637 if (BI == nullptr) 1638 continue; 1639 assert(BI->CountValid && "BB count is not valid"); 1640 } 1641 #endif 1642 uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue; 1643 uint64_t FuncMaxCount = FuncEntryCount; 1644 for (auto &BB : F) { 1645 auto BI = findBBInfo(&BB); 1646 if (BI == nullptr) 1647 continue; 1648 FuncMaxCount = std::max(FuncMaxCount, BI->CountValue); 1649 } 1650 1651 // Fix the obviously inconsistent entry count. 1652 if (FuncMaxCount > 0 && FuncEntryCount == 0) 1653 FuncEntryCount = 1; 1654 F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real)); 1655 markFunctionAttributes(FuncEntryCount, FuncMaxCount); 1656 1657 // Now annotate select instructions 1658 FuncInfo.SIVisitor.annotateSelects(F, this, &CountPosition); 1659 assert(CountPosition == ProfileCountSize); 1660 1661 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile.")); 1662 } 1663 1664 // Assign the scaled count values to the BB with multiple out edges. 1665 void PGOUseFunc::setBranchWeights() { 1666 // Generate MD_prof metadata for every branch instruction. 1667 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName() 1668 << " IsCS=" << IsCS << "\n"); 1669 for (auto &BB : F) { 1670 Instruction *TI = BB.getTerminator(); 1671 if (TI->getNumSuccessors() < 2) 1672 continue; 1673 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || 1674 isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI) || 1675 isa<CallBrInst>(TI))) 1676 continue; 1677 1678 if (getBBInfo(&BB).CountValue == 0) 1679 continue; 1680 1681 // We have a non-zero Branch BB. 1682 const UseBBInfo &BBCountInfo = getBBInfo(&BB); 1683 unsigned Size = BBCountInfo.OutEdges.size(); 1684 SmallVector<uint64_t, 2> EdgeCounts(Size, 0); 1685 uint64_t MaxCount = 0; 1686 for (unsigned s = 0; s < Size; s++) { 1687 const PGOUseEdge *E = BBCountInfo.OutEdges[s]; 1688 const BasicBlock *SrcBB = E->SrcBB; 1689 const BasicBlock *DestBB = E->DestBB; 1690 if (DestBB == nullptr) 1691 continue; 1692 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 1693 uint64_t EdgeCount = E->CountValue; 1694 if (EdgeCount > MaxCount) 1695 MaxCount = EdgeCount; 1696 EdgeCounts[SuccNum] = EdgeCount; 1697 } 1698 1699 if (MaxCount) 1700 setProfMetadata(M, TI, EdgeCounts, MaxCount); 1701 else { 1702 // A zero MaxCount can come about when we have a BB with a positive 1703 // count, and whose successor blocks all have 0 count. This can happen 1704 // when there is no exit block and the code exits via a noreturn function. 1705 auto &Ctx = M->getContext(); 1706 Ctx.diagnose(DiagnosticInfoPGOProfile( 1707 M->getName().data(), 1708 Twine("Profile in ") + F.getName().str() + 1709 Twine(" partially ignored") + 1710 Twine(", possibly due to the lack of a return path."), 1711 DS_Warning)); 1712 } 1713 } 1714 } 1715 1716 static bool isIndirectBrTarget(BasicBlock *BB) { 1717 for (BasicBlock *Pred : predecessors(BB)) { 1718 if (isa<IndirectBrInst>(Pred->getTerminator())) 1719 return true; 1720 } 1721 return false; 1722 } 1723 1724 void PGOUseFunc::annotateIrrLoopHeaderWeights() { 1725 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n"); 1726 // Find irr loop headers 1727 for (auto &BB : F) { 1728 // As a heuristic also annotate indrectbr targets as they have a high chance 1729 // to become an irreducible loop header after the indirectbr tail 1730 // duplication. 1731 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) { 1732 Instruction *TI = BB.getTerminator(); 1733 const UseBBInfo &BBCountInfo = getBBInfo(&BB); 1734 setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue); 1735 } 1736 } 1737 } 1738 1739 void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) { 1740 if (PGOFunctionEntryCoverage) 1741 return; 1742 Module *M = F.getParent(); 1743 IRBuilder<> Builder(&SI); 1744 Type *Int64Ty = Builder.getInt64Ty(); 1745 Type *I8PtrTy = Builder.getInt8PtrTy(); 1746 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty); 1747 Builder.CreateCall( 1748 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step), 1749 {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), 1750 Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs), 1751 Builder.getInt32(*CurCtrIdx), Step}); 1752 ++(*CurCtrIdx); 1753 } 1754 1755 void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) { 1756 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts; 1757 assert(*CurCtrIdx < CountFromProfile.size() && 1758 "Out of bound access of counters"); 1759 uint64_t SCounts[2]; 1760 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count 1761 ++(*CurCtrIdx); 1762 uint64_t TotalCount = 0; 1763 auto BI = UseFunc->findBBInfo(SI.getParent()); 1764 if (BI != nullptr) 1765 TotalCount = BI->CountValue; 1766 // False Count 1767 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0); 1768 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]); 1769 if (MaxCount) 1770 setProfMetadata(F.getParent(), &SI, SCounts, MaxCount); 1771 } 1772 1773 void SelectInstVisitor::visitSelectInst(SelectInst &SI) { 1774 if (!PGOInstrSelect) 1775 return; 1776 // FIXME: do not handle this yet. 1777 if (SI.getCondition()->getType()->isVectorTy()) 1778 return; 1779 1780 switch (Mode) { 1781 case VM_counting: 1782 NSIs++; 1783 return; 1784 case VM_instrument: 1785 instrumentOneSelectInst(SI); 1786 return; 1787 case VM_annotate: 1788 annotateOneSelectInst(SI); 1789 return; 1790 } 1791 1792 llvm_unreachable("Unknown visiting mode"); 1793 } 1794 1795 // Traverse all valuesites and annotate the instructions for all value kind. 1796 void PGOUseFunc::annotateValueSites() { 1797 if (DisableValueProfiling) 1798 return; 1799 1800 // Create the PGOFuncName meta data. 1801 createPGOFuncNameMetadata(F, FuncInfo.FuncName); 1802 1803 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 1804 annotateValueSites(Kind); 1805 } 1806 1807 // Annotate the instructions for a specific value kind. 1808 void PGOUseFunc::annotateValueSites(uint32_t Kind) { 1809 assert(Kind <= IPVK_Last); 1810 unsigned ValueSiteIndex = 0; 1811 auto &ValueSites = FuncInfo.ValueSites[Kind]; 1812 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind); 1813 if (NumValueSites != ValueSites.size()) { 1814 auto &Ctx = M->getContext(); 1815 Ctx.diagnose(DiagnosticInfoPGOProfile( 1816 M->getName().data(), 1817 Twine("Inconsistent number of value sites for ") + 1818 Twine(ValueProfKindDescr[Kind]) + 1819 Twine(" profiling in \"") + F.getName().str() + 1820 Twine("\", possibly due to the use of a stale profile."), 1821 DS_Warning)); 1822 return; 1823 } 1824 1825 for (VPCandidateInfo &I : ValueSites) { 1826 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind 1827 << "): Index = " << ValueSiteIndex << " out of " 1828 << NumValueSites << "\n"); 1829 annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord, 1830 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex, 1831 Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations 1832 : MaxNumAnnotations); 1833 ValueSiteIndex++; 1834 } 1835 } 1836 1837 // Collect the set of members for each Comdat in module M and store 1838 // in ComdatMembers. 1839 static void collectComdatMembers( 1840 Module &M, 1841 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 1842 if (!DoComdatRenaming) 1843 return; 1844 for (Function &F : M) 1845 if (Comdat *C = F.getComdat()) 1846 ComdatMembers.insert(std::make_pair(C, &F)); 1847 for (GlobalVariable &GV : M.globals()) 1848 if (Comdat *C = GV.getComdat()) 1849 ComdatMembers.insert(std::make_pair(C, &GV)); 1850 for (GlobalAlias &GA : M.aliases()) 1851 if (Comdat *C = GA.getComdat()) 1852 ComdatMembers.insert(std::make_pair(C, &GA)); 1853 } 1854 1855 // Don't perform PGO instrumeatnion / profile-use. 1856 static bool skipPGO(const Function &F) { 1857 if (F.isDeclaration()) 1858 return true; 1859 if (F.hasFnAttribute(llvm::Attribute::NoProfile)) 1860 return true; 1861 if (F.hasFnAttribute(llvm::Attribute::SkipProfile)) 1862 return true; 1863 if (F.getInstructionCount() < PGOFunctionSizeThreshold) 1864 return true; 1865 1866 // If there are too many critical edges, PGO might cause 1867 // compiler time problem. Skip PGO if the number of 1868 // critical edges execeed the threshold. 1869 unsigned NumCriticalEdges = 0; 1870 for (auto &BB : F) { 1871 const Instruction *TI = BB.getTerminator(); 1872 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { 1873 if (isCriticalEdge(TI, I)) 1874 NumCriticalEdges++; 1875 } 1876 } 1877 if (NumCriticalEdges > PGOFunctionCriticalEdgeThreshold) { 1878 LLVM_DEBUG(dbgs() << "In func " << F.getName() 1879 << ", NumCriticalEdges=" << NumCriticalEdges 1880 << " exceed the threshold. Skip PGO.\n"); 1881 return true; 1882 } 1883 1884 return false; 1885 } 1886 1887 static bool InstrumentAllFunctions( 1888 Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI, 1889 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 1890 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) { 1891 // For the context-sensitve instrumentation, we should have a separated pass 1892 // (before LTO/ThinLTO linking) to create these variables. 1893 if (!IsCS) 1894 createIRLevelProfileFlagVar(M, /*IsCS=*/false); 1895 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 1896 collectComdatMembers(M, ComdatMembers); 1897 1898 for (auto &F : M) { 1899 if (skipPGO(F)) 1900 continue; 1901 auto &TLI = LookupTLI(F); 1902 auto *BPI = LookupBPI(F); 1903 auto *BFI = LookupBFI(F); 1904 instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS); 1905 } 1906 return true; 1907 } 1908 1909 PreservedAnalyses 1910 PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) { 1911 createProfileFileNameVar(M, CSInstrName); 1912 // The variable in a comdat may be discarded by LTO. Ensure the declaration 1913 // will be retained. 1914 appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true)); 1915 return PreservedAnalyses::all(); 1916 } 1917 1918 PreservedAnalyses PGOInstrumentationGen::run(Module &M, 1919 ModuleAnalysisManager &AM) { 1920 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1921 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 1922 return FAM.getResult<TargetLibraryAnalysis>(F); 1923 }; 1924 auto LookupBPI = [&FAM](Function &F) { 1925 return &FAM.getResult<BranchProbabilityAnalysis>(F); 1926 }; 1927 auto LookupBFI = [&FAM](Function &F) { 1928 return &FAM.getResult<BlockFrequencyAnalysis>(F); 1929 }; 1930 1931 if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS)) 1932 return PreservedAnalyses::all(); 1933 1934 return PreservedAnalyses::none(); 1935 } 1936 1937 // Using the ratio b/w sums of profile count values and BFI count values to 1938 // adjust the func entry count. 1939 static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, 1940 BranchProbabilityInfo &NBPI) { 1941 Function &F = Func.getFunc(); 1942 BlockFrequencyInfo NBFI(F, NBPI, LI); 1943 #ifndef NDEBUG 1944 auto BFIEntryCount = F.getEntryCount(); 1945 assert(BFIEntryCount && (BFIEntryCount->getCount() > 0) && 1946 "Invalid BFI Entrycount"); 1947 #endif 1948 auto SumCount = APFloat::getZero(APFloat::IEEEdouble()); 1949 auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble()); 1950 for (auto &BBI : F) { 1951 uint64_t CountValue = 0; 1952 uint64_t BFICountValue = 0; 1953 if (!Func.findBBInfo(&BBI)) 1954 continue; 1955 auto BFICount = NBFI.getBlockProfileCount(&BBI); 1956 CountValue = Func.getBBInfo(&BBI).CountValue; 1957 BFICountValue = *BFICount; 1958 SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven); 1959 SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven); 1960 } 1961 if (SumCount.isZero()) 1962 return; 1963 1964 assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan && 1965 "Incorrect sum of BFI counts"); 1966 if (SumBFICount.compare(SumCount) == APFloat::cmpEqual) 1967 return; 1968 double Scale = (SumCount / SumBFICount).convertToDouble(); 1969 if (Scale < 1.001 && Scale > 0.999) 1970 return; 1971 1972 uint64_t FuncEntryCount = Func.getBBInfo(&*F.begin()).CountValue; 1973 uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale; 1974 if (NewEntryCount == 0) 1975 NewEntryCount = 1; 1976 if (NewEntryCount != FuncEntryCount) { 1977 F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real)); 1978 LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName() 1979 << ", entry_count " << FuncEntryCount << " --> " 1980 << NewEntryCount << "\n"); 1981 } 1982 } 1983 1984 // Compare the profile count values with BFI count values, and print out 1985 // the non-matching ones. 1986 static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, 1987 BranchProbabilityInfo &NBPI, 1988 uint64_t HotCountThreshold, 1989 uint64_t ColdCountThreshold) { 1990 Function &F = Func.getFunc(); 1991 BlockFrequencyInfo NBFI(F, NBPI, LI); 1992 // bool PrintFunc = false; 1993 bool HotBBOnly = PGOVerifyHotBFI; 1994 std::string Msg; 1995 OptimizationRemarkEmitter ORE(&F); 1996 1997 unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0; 1998 for (auto &BBI : F) { 1999 uint64_t CountValue = 0; 2000 uint64_t BFICountValue = 0; 2001 2002 if (Func.getBBInfo(&BBI).CountValid) 2003 CountValue = Func.getBBInfo(&BBI).CountValue; 2004 2005 BBNum++; 2006 if (CountValue) 2007 NonZeroBBNum++; 2008 auto BFICount = NBFI.getBlockProfileCount(&BBI); 2009 if (BFICount) 2010 BFICountValue = *BFICount; 2011 2012 if (HotBBOnly) { 2013 bool rawIsHot = CountValue >= HotCountThreshold; 2014 bool BFIIsHot = BFICountValue >= HotCountThreshold; 2015 bool rawIsCold = CountValue <= ColdCountThreshold; 2016 bool ShowCount = false; 2017 if (rawIsHot && !BFIIsHot) { 2018 Msg = "raw-Hot to BFI-nonHot"; 2019 ShowCount = true; 2020 } else if (rawIsCold && BFIIsHot) { 2021 Msg = "raw-Cold to BFI-Hot"; 2022 ShowCount = true; 2023 } 2024 if (!ShowCount) 2025 continue; 2026 } else { 2027 if ((CountValue < PGOVerifyBFICutoff) && 2028 (BFICountValue < PGOVerifyBFICutoff)) 2029 continue; 2030 uint64_t Diff = (BFICountValue >= CountValue) 2031 ? BFICountValue - CountValue 2032 : CountValue - BFICountValue; 2033 if (Diff <= CountValue / 100 * PGOVerifyBFIRatio) 2034 continue; 2035 } 2036 BBMisMatchNum++; 2037 2038 ORE.emit([&]() { 2039 OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "bfi-verify", 2040 F.getSubprogram(), &BBI); 2041 Remark << "BB " << ore::NV("Block", BBI.getName()) 2042 << " Count=" << ore::NV("Count", CountValue) 2043 << " BFI_Count=" << ore::NV("Count", BFICountValue); 2044 if (!Msg.empty()) 2045 Remark << " (" << Msg << ")"; 2046 return Remark; 2047 }); 2048 } 2049 if (BBMisMatchNum) 2050 ORE.emit([&]() { 2051 return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify", 2052 F.getSubprogram(), &F.getEntryBlock()) 2053 << "In Func " << ore::NV("Function", F.getName()) 2054 << ": Num_of_BB=" << ore::NV("Count", BBNum) 2055 << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum) 2056 << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum); 2057 }); 2058 } 2059 2060 static bool annotateAllFunctions( 2061 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, 2062 function_ref<TargetLibraryInfo &(Function &)> LookupTLI, 2063 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 2064 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, 2065 ProfileSummaryInfo *PSI, bool IsCS) { 2066 LLVM_DEBUG(dbgs() << "Read in profile counters: "); 2067 auto &Ctx = M.getContext(); 2068 // Read the counter array from file. 2069 auto ReaderOrErr = 2070 IndexedInstrProfReader::create(ProfileFileName, ProfileRemappingFileName); 2071 if (Error E = ReaderOrErr.takeError()) { 2072 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { 2073 Ctx.diagnose( 2074 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message())); 2075 }); 2076 return false; 2077 } 2078 2079 std::unique_ptr<IndexedInstrProfReader> PGOReader = 2080 std::move(ReaderOrErr.get()); 2081 if (!PGOReader) { 2082 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(), 2083 StringRef("Cannot get PGOReader"))); 2084 return false; 2085 } 2086 if (!PGOReader->hasCSIRLevelProfile() && IsCS) 2087 return false; 2088 2089 // TODO: might need to change the warning once the clang option is finalized. 2090 if (!PGOReader->isIRLevelProfile() && !PGOReader->hasMemoryProfile()) { 2091 Ctx.diagnose(DiagnosticInfoPGOProfile( 2092 ProfileFileName.data(), "Not an IR level instrumentation profile")); 2093 return false; 2094 } 2095 if (PGOReader->hasSingleByteCoverage()) { 2096 Ctx.diagnose(DiagnosticInfoPGOProfile( 2097 ProfileFileName.data(), 2098 "Cannot use coverage profiles for optimization")); 2099 return false; 2100 } 2101 if (PGOReader->functionEntryOnly()) { 2102 Ctx.diagnose(DiagnosticInfoPGOProfile( 2103 ProfileFileName.data(), 2104 "Function entry profiles are not yet supported for optimization")); 2105 return false; 2106 } 2107 2108 // Add the profile summary (read from the header of the indexed summary) here 2109 // so that we can use it below when reading counters (which checks if the 2110 // function should be marked with a cold or inlinehint attribute). 2111 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()), 2112 IsCS ? ProfileSummary::PSK_CSInstr 2113 : ProfileSummary::PSK_Instr); 2114 PSI->refresh(); 2115 2116 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 2117 collectComdatMembers(M, ComdatMembers); 2118 std::vector<Function *> HotFunctions; 2119 std::vector<Function *> ColdFunctions; 2120 2121 // If the profile marked as always instrument the entry BB, do the 2122 // same. Note this can be overwritten by the internal option in CFGMST.h 2123 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled(); 2124 if (PGOInstrumentEntry.getNumOccurrences() > 0) 2125 InstrumentFuncEntry = PGOInstrumentEntry; 2126 for (auto &F : M) { 2127 if (skipPGO(F)) 2128 continue; 2129 auto &TLI = LookupTLI(F); 2130 auto *BPI = LookupBPI(F); 2131 auto *BFI = LookupBFI(F); 2132 // Split indirectbr critical edges here before computing the MST rather than 2133 // later in getInstrBB() to avoid invalidating it. 2134 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI); 2135 PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS, 2136 InstrumentFuncEntry); 2137 // Read and match memprof first since we do this via debug info and can 2138 // match even if there is an IR mismatch detected for regular PGO below. 2139 if (PGOReader->hasMemoryProfile()) 2140 Func.readMemprof(PGOReader.get()); 2141 2142 if (!PGOReader->isIRLevelProfile()) 2143 continue; 2144 2145 // When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo, 2146 // it means the profile for the function is unrepresentative and this 2147 // function is actually hot / warm. We will reset the function hot / cold 2148 // attribute and drop all the profile counters. 2149 InstrProfRecord::CountPseudoKind PseudoKind = InstrProfRecord::NotPseudo; 2150 bool AllZeros = false; 2151 if (!Func.readCounters(PGOReader.get(), AllZeros, PseudoKind)) 2152 continue; 2153 if (AllZeros) { 2154 F.setEntryCount(ProfileCount(0, Function::PCT_Real)); 2155 if (Func.getProgramMaxCount() != 0) 2156 ColdFunctions.push_back(&F); 2157 continue; 2158 } 2159 if (PseudoKind != InstrProfRecord::NotPseudo) { 2160 // Clear function attribute cold. 2161 if (F.hasFnAttribute(Attribute::Cold)) 2162 F.removeFnAttr(Attribute::Cold); 2163 // Set function attribute as hot. 2164 if (PseudoKind == InstrProfRecord::PseudoHot) 2165 F.addFnAttr(Attribute::Hot); 2166 continue; 2167 } 2168 Func.populateCounters(); 2169 Func.setBranchWeights(); 2170 Func.annotateValueSites(); 2171 Func.annotateIrrLoopHeaderWeights(); 2172 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr(); 2173 if (FreqAttr == PGOUseFunc::FFA_Cold) 2174 ColdFunctions.push_back(&F); 2175 else if (FreqAttr == PGOUseFunc::FFA_Hot) 2176 HotFunctions.push_back(&F); 2177 if (PGOViewCounts != PGOVCT_None && 2178 (ViewBlockFreqFuncName.empty() || 2179 F.getName().equals(ViewBlockFreqFuncName))) { 2180 LoopInfo LI{DominatorTree(F)}; 2181 std::unique_ptr<BranchProbabilityInfo> NewBPI = 2182 std::make_unique<BranchProbabilityInfo>(F, LI); 2183 std::unique_ptr<BlockFrequencyInfo> NewBFI = 2184 std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI); 2185 if (PGOViewCounts == PGOVCT_Graph) 2186 NewBFI->view(); 2187 else if (PGOViewCounts == PGOVCT_Text) { 2188 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n"; 2189 NewBFI->print(dbgs()); 2190 } 2191 } 2192 if (PGOViewRawCounts != PGOVCT_None && 2193 (ViewBlockFreqFuncName.empty() || 2194 F.getName().equals(ViewBlockFreqFuncName))) { 2195 if (PGOViewRawCounts == PGOVCT_Graph) 2196 if (ViewBlockFreqFuncName.empty()) 2197 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 2198 else 2199 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 2200 else if (PGOViewRawCounts == PGOVCT_Text) { 2201 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n"; 2202 Func.dumpInfo(); 2203 } 2204 } 2205 2206 if (PGOVerifyBFI || PGOVerifyHotBFI || PGOFixEntryCount) { 2207 LoopInfo LI{DominatorTree(F)}; 2208 BranchProbabilityInfo NBPI(F, LI); 2209 2210 // Fix func entry count. 2211 if (PGOFixEntryCount) 2212 fixFuncEntryCount(Func, LI, NBPI); 2213 2214 // Verify BlockFrequency information. 2215 uint64_t HotCountThreshold = 0, ColdCountThreshold = 0; 2216 if (PGOVerifyHotBFI) { 2217 HotCountThreshold = PSI->getOrCompHotCountThreshold(); 2218 ColdCountThreshold = PSI->getOrCompColdCountThreshold(); 2219 } 2220 verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold); 2221 } 2222 } 2223 2224 // Set function hotness attribute from the profile. 2225 // We have to apply these attributes at the end because their presence 2226 // can affect the BranchProbabilityInfo of any callers, resulting in an 2227 // inconsistent MST between prof-gen and prof-use. 2228 for (auto &F : HotFunctions) { 2229 F->addFnAttr(Attribute::InlineHint); 2230 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName() 2231 << "\n"); 2232 } 2233 for (auto &F : ColdFunctions) { 2234 // Only set when there is no Attribute::Hot set by the user. For Hot 2235 // attribute, user's annotation has the precedence over the profile. 2236 if (F->hasFnAttribute(Attribute::Hot)) { 2237 auto &Ctx = M.getContext(); 2238 std::string Msg = std::string("Function ") + F->getName().str() + 2239 std::string(" is annotated as a hot function but" 2240 " the profile is cold"); 2241 Ctx.diagnose( 2242 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning)); 2243 continue; 2244 } 2245 F->addFnAttr(Attribute::Cold); 2246 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() 2247 << "\n"); 2248 } 2249 return true; 2250 } 2251 2252 PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename, 2253 std::string RemappingFilename, 2254 bool IsCS) 2255 : ProfileFileName(std::move(Filename)), 2256 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) { 2257 if (!PGOTestProfileFile.empty()) 2258 ProfileFileName = PGOTestProfileFile; 2259 if (!PGOTestProfileRemappingFile.empty()) 2260 ProfileRemappingFileName = PGOTestProfileRemappingFile; 2261 } 2262 2263 PreservedAnalyses PGOInstrumentationUse::run(Module &M, 2264 ModuleAnalysisManager &AM) { 2265 2266 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 2267 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 2268 return FAM.getResult<TargetLibraryAnalysis>(F); 2269 }; 2270 auto LookupBPI = [&FAM](Function &F) { 2271 return &FAM.getResult<BranchProbabilityAnalysis>(F); 2272 }; 2273 auto LookupBFI = [&FAM](Function &F) { 2274 return &FAM.getResult<BlockFrequencyAnalysis>(F); 2275 }; 2276 2277 auto *PSI = &AM.getResult<ProfileSummaryAnalysis>(M); 2278 2279 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, 2280 LookupTLI, LookupBPI, LookupBFI, PSI, IsCS)) 2281 return PreservedAnalyses::all(); 2282 2283 return PreservedAnalyses::none(); 2284 } 2285 2286 static std::string getSimpleNodeName(const BasicBlock *Node) { 2287 if (!Node->getName().empty()) 2288 return std::string(Node->getName()); 2289 2290 std::string SimpleNodeName; 2291 raw_string_ostream OS(SimpleNodeName); 2292 Node->printAsOperand(OS, false); 2293 return OS.str(); 2294 } 2295 2296 void llvm::setProfMetadata(Module *M, Instruction *TI, 2297 ArrayRef<uint64_t> EdgeCounts, 2298 uint64_t MaxCount) { 2299 MDBuilder MDB(M->getContext()); 2300 assert(MaxCount > 0 && "Bad max count"); 2301 uint64_t Scale = calculateCountScale(MaxCount); 2302 SmallVector<unsigned, 4> Weights; 2303 for (const auto &ECI : EdgeCounts) 2304 Weights.push_back(scaleBranchCount(ECI, Scale)); 2305 2306 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W 2307 : Weights) { 2308 dbgs() << W << " "; 2309 } dbgs() << "\n";); 2310 2311 misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false); 2312 2313 TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 2314 if (EmitBranchProbability) { 2315 std::string BrCondStr = getBranchCondString(TI); 2316 if (BrCondStr.empty()) 2317 return; 2318 2319 uint64_t WSum = 2320 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0, 2321 [](uint64_t w1, uint64_t w2) { return w1 + w2; }); 2322 uint64_t TotalCount = 2323 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0, 2324 [](uint64_t c1, uint64_t c2) { return c1 + c2; }); 2325 Scale = calculateCountScale(WSum); 2326 BranchProbability BP(scaleBranchCount(Weights[0], Scale), 2327 scaleBranchCount(WSum, Scale)); 2328 std::string BranchProbStr; 2329 raw_string_ostream OS(BranchProbStr); 2330 OS << BP; 2331 OS << " (total count : " << TotalCount << ")"; 2332 OS.flush(); 2333 Function *F = TI->getParent()->getParent(); 2334 OptimizationRemarkEmitter ORE(F); 2335 ORE.emit([&]() { 2336 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI) 2337 << BrCondStr << " is true with probability : " << BranchProbStr; 2338 }); 2339 } 2340 } 2341 2342 namespace llvm { 2343 2344 void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) { 2345 MDBuilder MDB(M->getContext()); 2346 TI->setMetadata(llvm::LLVMContext::MD_irr_loop, 2347 MDB.createIrrLoopHeaderWeight(Count)); 2348 } 2349 2350 template <> struct GraphTraits<PGOUseFunc *> { 2351 using NodeRef = const BasicBlock *; 2352 using ChildIteratorType = const_succ_iterator; 2353 using nodes_iterator = pointer_iterator<Function::const_iterator>; 2354 2355 static NodeRef getEntryNode(const PGOUseFunc *G) { 2356 return &G->getFunc().front(); 2357 } 2358 2359 static ChildIteratorType child_begin(const NodeRef N) { 2360 return succ_begin(N); 2361 } 2362 2363 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); } 2364 2365 static nodes_iterator nodes_begin(const PGOUseFunc *G) { 2366 return nodes_iterator(G->getFunc().begin()); 2367 } 2368 2369 static nodes_iterator nodes_end(const PGOUseFunc *G) { 2370 return nodes_iterator(G->getFunc().end()); 2371 } 2372 }; 2373 2374 template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits { 2375 explicit DOTGraphTraits(bool isSimple = false) 2376 : DefaultDOTGraphTraits(isSimple) {} 2377 2378 static std::string getGraphName(const PGOUseFunc *G) { 2379 return std::string(G->getFunc().getName()); 2380 } 2381 2382 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) { 2383 std::string Result; 2384 raw_string_ostream OS(Result); 2385 2386 OS << getSimpleNodeName(Node) << ":\\l"; 2387 UseBBInfo *BI = Graph->findBBInfo(Node); 2388 OS << "Count : "; 2389 if (BI && BI->CountValid) 2390 OS << BI->CountValue << "\\l"; 2391 else 2392 OS << "Unknown\\l"; 2393 2394 if (!PGOInstrSelect) 2395 return Result; 2396 2397 for (const Instruction &I : *Node) { 2398 if (!isa<SelectInst>(&I)) 2399 continue; 2400 // Display scaled counts for SELECT instruction: 2401 OS << "SELECT : { T = "; 2402 uint64_t TC, FC; 2403 bool HasProf = extractBranchWeights(I, TC, FC); 2404 if (!HasProf) 2405 OS << "Unknown, F = Unknown }\\l"; 2406 else 2407 OS << TC << ", F = " << FC << " }\\l"; 2408 } 2409 return Result; 2410 } 2411 }; 2412 2413 } // end namespace llvm 2414