1 //===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements PGO instrumentation using a minimum spanning tree based
10 // on the following paper:
11 //   [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points
12 //   for program frequency counts. BIT Numerical Mathematics 1973, Volume 13,
13 //   Issue 3, pp 313-322
14 // The idea of the algorithm based on the fact that for each node (except for
15 // the entry and exit), the sum of incoming edge counts equals the sum of
16 // outgoing edge counts. The count of edge on spanning tree can be derived from
17 // those edges not on the spanning tree. Knuth proves this method instruments
18 // the minimum number of edges.
19 //
20 // The minimal spanning tree here is actually a maximum weight tree -- on-tree
21 // edges have higher frequencies (more likely to execute). The idea is to
22 // instrument those less frequently executed edges to reduce the runtime
23 // overhead of instrumented binaries.
24 //
25 // This file contains two passes:
26 // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge
27 // count profile, and generates the instrumentation for indirect call
28 // profiling.
29 // (2) Pass PGOInstrumentationUse which reads the edge count profile and
30 // annotates the branch weights. It also reads the indirect call value
31 // profiling records and annotate the indirect call instructions.
32 //
33 // To get the precise counter information, These two passes need to invoke at
34 // the same compilation point (so they see the same IR). For pass
35 // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For
36 // pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and
37 // the profile is opened in module level and passed to each PGOUseFunc instance.
38 // The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put
39 // in class FuncPGOInstrumentation.
40 //
41 // Class PGOEdge represents a CFG edge and some auxiliary information. Class
42 // BBInfo contains auxiliary information for each BB. These two classes are used
43 // in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived
44 // class of PGOEdge and BBInfo, respectively. They contains extra data structure
45 // used in populating profile counters.
46 // The MST implementation is in Class CFGMST (CFGMST.h).
47 //
48 //===----------------------------------------------------------------------===//
49 
50 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
51 #include "ValueProfileCollector.h"
52 #include "llvm/ADT/APInt.h"
53 #include "llvm/ADT/ArrayRef.h"
54 #include "llvm/ADT/STLExtras.h"
55 #include "llvm/ADT/SmallVector.h"
56 #include "llvm/ADT/Statistic.h"
57 #include "llvm/ADT/StringRef.h"
58 #include "llvm/ADT/Twine.h"
59 #include "llvm/ADT/iterator.h"
60 #include "llvm/ADT/iterator_range.h"
61 #include "llvm/Analysis/BlockFrequencyInfo.h"
62 #include "llvm/Analysis/BranchProbabilityInfo.h"
63 #include "llvm/Analysis/CFG.h"
64 #include "llvm/Analysis/LoopInfo.h"
65 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
66 #include "llvm/Analysis/ProfileSummaryInfo.h"
67 #include "llvm/Analysis/TargetLibraryInfo.h"
68 #include "llvm/IR/Attributes.h"
69 #include "llvm/IR/BasicBlock.h"
70 #include "llvm/IR/CFG.h"
71 #include "llvm/IR/Comdat.h"
72 #include "llvm/IR/Constant.h"
73 #include "llvm/IR/Constants.h"
74 #include "llvm/IR/DiagnosticInfo.h"
75 #include "llvm/IR/Dominators.h"
76 #include "llvm/IR/EHPersonalities.h"
77 #include "llvm/IR/Function.h"
78 #include "llvm/IR/GlobalAlias.h"
79 #include "llvm/IR/GlobalValue.h"
80 #include "llvm/IR/GlobalVariable.h"
81 #include "llvm/IR/IRBuilder.h"
82 #include "llvm/IR/InstVisitor.h"
83 #include "llvm/IR/InstrTypes.h"
84 #include "llvm/IR/Instruction.h"
85 #include "llvm/IR/Instructions.h"
86 #include "llvm/IR/IntrinsicInst.h"
87 #include "llvm/IR/Intrinsics.h"
88 #include "llvm/IR/LLVMContext.h"
89 #include "llvm/IR/MDBuilder.h"
90 #include "llvm/IR/Module.h"
91 #include "llvm/IR/PassManager.h"
92 #include "llvm/IR/ProfDataUtils.h"
93 #include "llvm/IR/ProfileSummary.h"
94 #include "llvm/IR/Type.h"
95 #include "llvm/IR/Value.h"
96 #include "llvm/ProfileData/InstrProf.h"
97 #include "llvm/ProfileData/InstrProfReader.h"
98 #include "llvm/Support/BranchProbability.h"
99 #include "llvm/Support/CRC.h"
100 #include "llvm/Support/Casting.h"
101 #include "llvm/Support/CommandLine.h"
102 #include "llvm/Support/DOTGraphTraits.h"
103 #include "llvm/Support/Debug.h"
104 #include "llvm/Support/Error.h"
105 #include "llvm/Support/ErrorHandling.h"
106 #include "llvm/Support/GraphWriter.h"
107 #include "llvm/Support/VirtualFileSystem.h"
108 #include "llvm/Support/raw_ostream.h"
109 #include "llvm/TargetParser/Triple.h"
110 #include "llvm/Transforms/Instrumentation.h"
111 #include "llvm/Transforms/Instrumentation/BlockCoverageInference.h"
112 #include "llvm/Transforms/Instrumentation/CFGMST.h"
113 #include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"
114 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
115 #include "llvm/Transforms/Utils/MisExpect.h"
116 #include "llvm/Transforms/Utils/ModuleUtils.h"
117 #include <algorithm>
118 #include <cassert>
119 #include <cstdint>
120 #include <memory>
121 #include <numeric>
122 #include <optional>
123 #include <stack>
124 #include <string>
125 #include <unordered_map>
126 #include <utility>
127 #include <vector>
128 
129 using namespace llvm;
130 using ProfileCount = Function::ProfileCount;
131 using VPCandidateInfo = ValueProfileCollector::CandidateInfo;
132 
133 #define DEBUG_TYPE "pgo-instrumentation"
134 
135 STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
136 STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");
137 STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented.");
138 STATISTIC(NumOfPGOEdge, "Number of edges.");
139 STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
140 STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
141 STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
142 STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
143 STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
144 STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
145 STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
146 STATISTIC(NumOfCSPGOSelectInsts,
147           "Number of select instruction instrumented in CSPGO.");
148 STATISTIC(NumOfCSPGOMemIntrinsics,
149           "Number of mem intrinsics instrumented in CSPGO.");
150 STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO.");
151 STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO.");
152 STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO.");
153 STATISTIC(NumOfCSPGOFunc,
154           "Number of functions having valid profile counts in CSPGO.");
155 STATISTIC(NumOfCSPGOMismatch,
156           "Number of functions having mismatch profile in CSPGO.");
157 STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO.");
158 STATISTIC(NumCoveredBlocks, "Number of basic blocks that were executed");
159 
160 // Command line option to specify the file to read profile from. This is
161 // mainly used for testing.
162 static cl::opt<std::string>
163     PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden,
164                        cl::value_desc("filename"),
165                        cl::desc("Specify the path of profile data file. This is"
166                                 "mainly for test purpose."));
167 static cl::opt<std::string> PGOTestProfileRemappingFile(
168     "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden,
169     cl::value_desc("filename"),
170     cl::desc("Specify the path of profile remapping file. This is mainly for "
171              "test purpose."));
172 
173 // Command line option to disable value profiling. The default is false:
174 // i.e. value profiling is enabled by default. This is for debug purpose.
175 static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false),
176                                            cl::Hidden,
177                                            cl::desc("Disable Value Profiling"));
178 
179 // Command line option to set the maximum number of VP annotations to write to
180 // the metadata for a single indirect call callsite.
181 static cl::opt<unsigned> MaxNumAnnotations(
182     "icp-max-annotations", cl::init(3), cl::Hidden,
183     cl::desc("Max number of annotations for a single indirect "
184              "call callsite"));
185 
186 // Command line option to set the maximum number of value annotations
187 // to write to the metadata for a single memop intrinsic.
188 static cl::opt<unsigned> MaxNumMemOPAnnotations(
189     "memop-max-annotations", cl::init(4), cl::Hidden,
190     cl::desc("Max number of preicise value annotations for a single memop"
191              "intrinsic"));
192 
193 // Command line option to control appending FunctionHash to the name of a COMDAT
194 // function. This is to avoid the hash mismatch caused by the preinliner.
195 static cl::opt<bool> DoComdatRenaming(
196     "do-comdat-renaming", cl::init(false), cl::Hidden,
197     cl::desc("Append function hash to the name of COMDAT function to avoid "
198              "function hash mismatch due to the preinliner"));
199 
200 namespace llvm {
201 // Command line option to enable/disable the warning about missing profile
202 // information.
203 cl::opt<bool> PGOWarnMissing("pgo-warn-missing-function", cl::init(false),
204                              cl::Hidden,
205                              cl::desc("Use this option to turn on/off "
206                                       "warnings about missing profile data for "
207                                       "functions."));
208 
209 // Command line option to enable/disable the warning about a hash mismatch in
210 // the profile data.
211 cl::opt<bool>
212     NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden,
213                       cl::desc("Use this option to turn off/on "
214                                "warnings about profile cfg mismatch."));
215 
216 // Command line option to enable/disable the warning about a hash mismatch in
217 // the profile data for Comdat functions, which often turns out to be false
218 // positive due to the pre-instrumentation inline.
219 cl::opt<bool> NoPGOWarnMismatchComdatWeak(
220     "no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden,
221     cl::desc("The option is used to turn on/off "
222              "warnings about hash mismatch for comdat "
223              "or weak functions."));
224 } // namespace llvm
225 
226 // Command line option to enable/disable select instruction instrumentation.
227 static cl::opt<bool>
228     PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden,
229                    cl::desc("Use this option to turn on/off SELECT "
230                             "instruction instrumentation. "));
231 
232 // Command line option to turn on CFG dot or text dump of raw profile counts
233 static cl::opt<PGOViewCountsType> PGOViewRawCounts(
234     "pgo-view-raw-counts", cl::Hidden,
235     cl::desc("A boolean option to show CFG dag or text "
236              "with raw profile counts from "
237              "profile data. See also option "
238              "-pgo-view-counts. To limit graph "
239              "display to only one function, use "
240              "filtering option -view-bfi-func-name."),
241     cl::values(clEnumValN(PGOVCT_None, "none", "do not show."),
242                clEnumValN(PGOVCT_Graph, "graph", "show a graph."),
243                clEnumValN(PGOVCT_Text, "text", "show in text.")));
244 
245 // Command line option to enable/disable memop intrinsic call.size profiling.
246 static cl::opt<bool>
247     PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden,
248                   cl::desc("Use this option to turn on/off "
249                            "memory intrinsic size profiling."));
250 
251 // Emit branch probability as optimization remarks.
252 static cl::opt<bool>
253     EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden,
254                           cl::desc("When this option is on, the annotated "
255                                    "branch probability will be emitted as "
256                                    "optimization remarks: -{Rpass|"
257                                    "pass-remarks}=pgo-instrumentation"));
258 
259 static cl::opt<bool> PGOInstrumentEntry(
260     "pgo-instrument-entry", cl::init(false), cl::Hidden,
261     cl::desc("Force to instrument function entry basicblock."));
262 
263 static cl::opt<bool> PGOFunctionEntryCoverage(
264     "pgo-function-entry-coverage", cl::Hidden,
265     cl::desc(
266         "Use this option to enable function entry coverage instrumentation."));
267 
268 static cl::opt<bool> PGOBlockCoverage(
269     "pgo-block-coverage",
270     cl::desc("Use this option to enable basic block coverage instrumentation"));
271 
272 static cl::opt<bool>
273     PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph",
274                               cl::desc("Create a dot file of CFGs with block "
275                                        "coverage inference information"));
276 
277 static cl::opt<bool> PGOTemporalInstrumentation(
278     "pgo-temporal-instrumentation",
279     cl::desc("Use this option to enable temporal instrumentation"));
280 
281 static cl::opt<bool>
282     PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden,
283                      cl::desc("Fix function entry count in profile use."));
284 
285 static cl::opt<bool> PGOVerifyHotBFI(
286     "pgo-verify-hot-bfi", cl::init(false), cl::Hidden,
287     cl::desc("Print out the non-match BFI count if a hot raw profile count "
288              "becomes non-hot, or a cold raw profile count becomes hot. "
289              "The print is enabled under -Rpass-analysis=pgo, or "
290              "internal option -pass-remakrs-analysis=pgo."));
291 
292 static cl::opt<bool> PGOVerifyBFI(
293     "pgo-verify-bfi", cl::init(false), cl::Hidden,
294     cl::desc("Print out mismatched BFI counts after setting profile metadata "
295              "The print is enabled under -Rpass-analysis=pgo, or "
296              "internal option -pass-remakrs-analysis=pgo."));
297 
298 static cl::opt<unsigned> PGOVerifyBFIRatio(
299     "pgo-verify-bfi-ratio", cl::init(2), cl::Hidden,
300     cl::desc("Set the threshold for pgo-verify-bfi:  only print out "
301              "mismatched BFI if the difference percentage is greater than "
302              "this value (in percentage)."));
303 
304 static cl::opt<unsigned> PGOVerifyBFICutoff(
305     "pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden,
306     cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose "
307              "profile count value is below."));
308 
309 static cl::opt<std::string> PGOTraceFuncHash(
310     "pgo-trace-func-hash", cl::init("-"), cl::Hidden,
311     cl::value_desc("function name"),
312     cl::desc("Trace the hash of the function with this name."));
313 
314 static cl::opt<unsigned> PGOFunctionSizeThreshold(
315     "pgo-function-size-threshold", cl::Hidden,
316     cl::desc("Do not instrument functions smaller than this threshold."));
317 
318 static cl::opt<unsigned> PGOFunctionCriticalEdgeThreshold(
319     "pgo-critical-edge-threshold", cl::init(20000), cl::Hidden,
320     cl::desc("Do not instrument functions with the number of critical edges "
321              " greater than this threshold."));
322 
323 extern cl::opt<unsigned> MaxNumVTableAnnotations;
324 
325 namespace llvm {
326 // Command line option to turn on CFG dot dump after profile annotation.
327 // Defined in Analysis/BlockFrequencyInfo.cpp:  -pgo-view-counts
328 extern cl::opt<PGOViewCountsType> PGOViewCounts;
329 
330 // Command line option to specify the name of the function for CFG dump
331 // Defined in Analysis/BlockFrequencyInfo.cpp:  -view-bfi-func-name=
332 extern cl::opt<std::string> ViewBlockFreqFuncName;
333 
334 // Command line option to enable vtable value profiling. Defined in
335 // ProfileData/InstrProf.cpp: -enable-vtable-value-profiling=
336 extern cl::opt<bool> EnableVTableValueProfiling;
337 extern cl::opt<bool> EnableVTableProfileUse;
338 extern cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate;
339 } // namespace llvm
340 
shouldInstrumentEntryBB()341 bool shouldInstrumentEntryBB() {
342   return PGOInstrumentEntry ||
343          PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
344 }
345 
346 // FIXME(mtrofin): re-enable this for ctx profiling, for non-indirect calls. Ctx
347 // profiling implicitly captures indirect call cases, but not other values.
348 // Supporting other values is relatively straight-forward - just another counter
349 // range within the context.
isValueProfilingDisabled()350 bool isValueProfilingDisabled() {
351   return DisableValueProfiling ||
352          PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
353 }
354 
355 // Return a string describing the branch condition that can be
356 // used in static branch probability heuristics:
getBranchCondString(Instruction * TI)357 static std::string getBranchCondString(Instruction *TI) {
358   BranchInst *BI = dyn_cast<BranchInst>(TI);
359   if (!BI || !BI->isConditional())
360     return std::string();
361 
362   Value *Cond = BI->getCondition();
363   ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
364   if (!CI)
365     return std::string();
366 
367   std::string result;
368   raw_string_ostream OS(result);
369   OS << CI->getPredicate() << "_";
370   CI->getOperand(0)->getType()->print(OS, true);
371 
372   Value *RHS = CI->getOperand(1);
373   ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
374   if (CV) {
375     if (CV->isZero())
376       OS << "_Zero";
377     else if (CV->isOne())
378       OS << "_One";
379     else if (CV->isMinusOne())
380       OS << "_MinusOne";
381     else
382       OS << "_Const";
383   }
384   OS.flush();
385   return result;
386 }
387 
388 static const char *ValueProfKindDescr[] = {
389 #define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
390 #include "llvm/ProfileData/InstrProfData.inc"
391 };
392 
393 // Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
394 // aware this is an ir_level profile so it can set the version flag.
createIRLevelProfileFlagVar(Module & M,bool IsCS)395 static GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS) {
396   const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
397   Type *IntTy64 = Type::getInt64Ty(M.getContext());
398   uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
399   if (IsCS)
400     ProfileVersion |= VARIANT_MASK_CSIR_PROF;
401   if (shouldInstrumentEntryBB())
402     ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
403   if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO)
404     ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
405   if (PGOFunctionEntryCoverage)
406     ProfileVersion |=
407         VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY;
408   if (PGOBlockCoverage)
409     ProfileVersion |= VARIANT_MASK_BYTE_COVERAGE;
410   if (PGOTemporalInstrumentation)
411     ProfileVersion |= VARIANT_MASK_TEMPORAL_PROF;
412   auto IRLevelVersionVariable = new GlobalVariable(
413       M, IntTy64, true, GlobalValue::WeakAnyLinkage,
414       Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
415   IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility);
416   Triple TT(M.getTargetTriple());
417   if (TT.supportsCOMDAT()) {
418     IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
419     IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
420   }
421   return IRLevelVersionVariable;
422 }
423 
424 namespace {
425 
426 /// The select instruction visitor plays three roles specified
427 /// by the mode. In \c VM_counting mode, it simply counts the number of
428 /// select instructions. In \c VM_instrument mode, it inserts code to count
429 /// the number times TrueValue of select is taken. In \c VM_annotate mode,
430 /// it reads the profile data and annotate the select instruction with metadata.
431 enum VisitMode { VM_counting, VM_instrument, VM_annotate };
432 class PGOUseFunc;
433 
434 /// Instruction Visitor class to visit select instructions.
435 struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
436   Function &F;
437   unsigned NSIs = 0;             // Number of select instructions instrumented.
438   VisitMode Mode = VM_counting;  // Visiting mode.
439   unsigned *CurCtrIdx = nullptr; // Pointer to current counter index.
440   unsigned TotalNumCtrs = 0;     // Total number of counters
441   GlobalVariable *FuncNameVar = nullptr;
442   uint64_t FuncHash = 0;
443   PGOUseFunc *UseFunc = nullptr;
444   bool HasSingleByteCoverage;
445 
SelectInstVisitor__anon0925556b0111::SelectInstVisitor446   SelectInstVisitor(Function &Func, bool HasSingleByteCoverage)
447       : F(Func), HasSingleByteCoverage(HasSingleByteCoverage) {}
448 
countSelects__anon0925556b0111::SelectInstVisitor449   void countSelects() {
450     NSIs = 0;
451     Mode = VM_counting;
452     visit(F);
453   }
454 
455   // Visit the IR stream and instrument all select instructions. \p
456   // Ind is a pointer to the counter index variable; \p TotalNC
457   // is the total number of counters; \p FNV is the pointer to the
458   // PGO function name var; \p FHash is the function hash.
instrumentSelects__anon0925556b0111::SelectInstVisitor459   void instrumentSelects(unsigned *Ind, unsigned TotalNC, GlobalVariable *FNV,
460                          uint64_t FHash) {
461     Mode = VM_instrument;
462     CurCtrIdx = Ind;
463     TotalNumCtrs = TotalNC;
464     FuncHash = FHash;
465     FuncNameVar = FNV;
466     visit(F);
467   }
468 
469   // Visit the IR stream and annotate all select instructions.
annotateSelects__anon0925556b0111::SelectInstVisitor470   void annotateSelects(PGOUseFunc *UF, unsigned *Ind) {
471     Mode = VM_annotate;
472     UseFunc = UF;
473     CurCtrIdx = Ind;
474     visit(F);
475   }
476 
477   void instrumentOneSelectInst(SelectInst &SI);
478   void annotateOneSelectInst(SelectInst &SI);
479 
480   // Visit \p SI instruction and perform tasks according to visit mode.
481   void visitSelectInst(SelectInst &SI);
482 
483   // Return the number of select instructions. This needs be called after
484   // countSelects().
getNumOfSelectInsts__anon0925556b0111::SelectInstVisitor485   unsigned getNumOfSelectInsts() const { return NSIs; }
486 };
487 
488 /// This class implements the CFG edges for the Minimum Spanning Tree (MST)
489 /// based instrumentation.
490 /// Note that the CFG can be a multi-graph. So there might be multiple edges
491 /// with the same SrcBB and DestBB.
492 struct PGOEdge {
493   BasicBlock *SrcBB;
494   BasicBlock *DestBB;
495   uint64_t Weight;
496   bool InMST = false;
497   bool Removed = false;
498   bool IsCritical = false;
499 
PGOEdge__anon0925556b0111::PGOEdge500   PGOEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W = 1)
501       : SrcBB(Src), DestBB(Dest), Weight(W) {}
502 
503   /// Return the information string of an edge.
infoString__anon0925556b0111::PGOEdge504   std::string infoString() const {
505     return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
506             (IsCritical ? "c" : " ") + "  W=" + Twine(Weight))
507         .str();
508   }
509 };
510 
511 /// This class stores the auxiliary information for each BB in the MST.
512 struct PGOBBInfo {
513   PGOBBInfo *Group;
514   uint32_t Index;
515   uint32_t Rank = 0;
516 
PGOBBInfo__anon0925556b0111::PGOBBInfo517   PGOBBInfo(unsigned IX) : Group(this), Index(IX) {}
518 
519   /// Return the information string of this object.
infoString__anon0925556b0111::PGOBBInfo520   std::string infoString() const {
521     return (Twine("Index=") + Twine(Index)).str();
522   }
523 };
524 
525 // This class implements the CFG edges. Note the CFG can be a multi-graph.
526 template <class Edge, class BBInfo> class FuncPGOInstrumentation {
527 private:
528   Function &F;
529 
530   // Is this is context-sensitive instrumentation.
531   bool IsCS;
532 
533   // A map that stores the Comdat group in function F.
534   std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
535 
536   ValueProfileCollector VPC;
537 
538   void computeCFGHash();
539   void renameComdatFunction();
540 
541 public:
542   const TargetLibraryInfo &TLI;
543   std::vector<std::vector<VPCandidateInfo>> ValueSites;
544   SelectInstVisitor SIVisitor;
545   std::string FuncName;
546   std::string DeprecatedFuncName;
547   GlobalVariable *FuncNameVar;
548 
549   // CFG hash value for this function.
550   uint64_t FunctionHash = 0;
551 
552   // The Minimum Spanning Tree of function CFG.
553   CFGMST<Edge, BBInfo> MST;
554 
555   const std::optional<BlockCoverageInference> BCI;
556 
557   static std::optional<BlockCoverageInference>
constructBCI(Function & Func,bool HasSingleByteCoverage,bool InstrumentFuncEntry)558   constructBCI(Function &Func, bool HasSingleByteCoverage,
559                bool InstrumentFuncEntry) {
560     if (HasSingleByteCoverage)
561       return BlockCoverageInference(Func, InstrumentFuncEntry);
562     return {};
563   }
564 
565   // Collect all the BBs that will be instrumented, and store them in
566   // InstrumentBBs.
567   void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
568 
569   // Give an edge, find the BB that will be instrumented.
570   // Return nullptr if there is no BB to be instrumented.
571   BasicBlock *getInstrBB(Edge *E);
572 
573   // Return the auxiliary BB information.
getBBInfo(const BasicBlock * BB) const574   BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }
575 
576   // Return the auxiliary BB information if available.
findBBInfo(const BasicBlock * BB) const577   BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }
578 
579   // Dump edges and BB information.
dumpInfo(StringRef Str="") const580   void dumpInfo(StringRef Str = "") const {
581     MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName +
582                               " Hash: " + Twine(FunctionHash) + "\t" + Str);
583   }
584 
FuncPGOInstrumentation(Function & Func,TargetLibraryInfo & TLI,std::unordered_multimap<Comdat *,GlobalValue * > & ComdatMembers,bool CreateGlobalVar=false,BranchProbabilityInfo * BPI=nullptr,BlockFrequencyInfo * BFI=nullptr,bool IsCS=false,bool InstrumentFuncEntry=true,bool HasSingleByteCoverage=false)585   FuncPGOInstrumentation(
586       Function &Func, TargetLibraryInfo &TLI,
587       std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
588       bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
589       BlockFrequencyInfo *BFI = nullptr, bool IsCS = false,
590       bool InstrumentFuncEntry = true, bool HasSingleByteCoverage = false)
591       : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),
592         TLI(TLI), ValueSites(IPVK_Last + 1),
593         SIVisitor(Func, HasSingleByteCoverage),
594         MST(F, InstrumentFuncEntry, BPI, BFI),
595         BCI(constructBCI(Func, HasSingleByteCoverage, InstrumentFuncEntry)) {
596     if (BCI && PGOViewBlockCoverageGraph)
597       BCI->viewBlockCoverageGraph();
598     // This should be done before CFG hash computation.
599     SIVisitor.countSelects();
600     ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize);
601     if (!IsCS) {
602       NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
603       NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
604       NumOfPGOBB += MST.bbInfoSize();
605       ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget);
606       if (EnableVTableValueProfiling)
607         ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget);
608     } else {
609       NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
610       NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
611       NumOfCSPGOBB += MST.bbInfoSize();
612     }
613 
614     FuncName = getIRPGOFuncName(F);
615     DeprecatedFuncName = getPGOFuncName(F);
616     computeCFGHash();
617     if (!ComdatMembers.empty())
618       renameComdatFunction();
619     LLVM_DEBUG(dumpInfo("after CFGMST"));
620 
621     for (const auto &E : MST.allEdges()) {
622       if (E->Removed)
623         continue;
624       IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
625       if (!E->InMST)
626         IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;
627     }
628 
629     if (CreateGlobalVar)
630       FuncNameVar = createPGOFuncNameVar(F, FuncName);
631   }
632 };
633 
634 } // end anonymous namespace
635 
636 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
637 // value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers
638 // of selects, indirect calls, mem ops and edges.
639 template <class Edge, class BBInfo>
computeCFGHash()640 void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
641   std::vector<uint8_t> Indexes;
642   JamCRC JC;
643   for (auto &BB : F) {
644     for (BasicBlock *Succ : successors(&BB)) {
645       auto BI = findBBInfo(Succ);
646       if (BI == nullptr)
647         continue;
648       uint32_t Index = BI->Index;
649       for (int J = 0; J < 4; J++)
650         Indexes.push_back((uint8_t)(Index >> (J * 8)));
651     }
652   }
653   JC.update(Indexes);
654 
655   JamCRC JCH;
656   // The higher 32 bits.
657   auto updateJCH = [&JCH](uint64_t Num) {
658     uint8_t Data[8];
659     support::endian::write64le(Data, Num);
660     JCH.update(Data);
661   };
662   updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts());
663   updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size());
664   updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size());
665   if (BCI) {
666     updateJCH(BCI->getInstrumentedBlocksHash());
667   } else {
668     updateJCH((uint64_t)MST.numEdges());
669   }
670 
671   // Hash format for context sensitive profile. Reserve 4 bits for other
672   // information.
673   FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC();
674 
675   // Reserve bit 60-63 for other information purpose.
676   FunctionHash &= 0x0FFFFFFFFFFFFFFF;
677   if (IsCS)
678     NamedInstrProfRecord::setCSFlagInHash(FunctionHash);
679   LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
680                     << " CRC = " << JC.getCRC()
681                     << ", Selects = " << SIVisitor.getNumOfSelectInsts()
682                     << ", Edges = " << MST.numEdges() << ", ICSites = "
683                     << ValueSites[IPVK_IndirectCallTarget].size()
684                     << ", Memops = " << ValueSites[IPVK_MemOPSize].size()
685                     << ", High32 CRC = " << JCH.getCRC()
686                     << ", Hash = " << FunctionHash << "\n";);
687 
688   if (PGOTraceFuncHash != "-" && F.getName().contains(PGOTraceFuncHash))
689     dbgs() << "Funcname=" << F.getName() << ", Hash=" << FunctionHash
690            << " in building " << F.getParent()->getSourceFileName() << "\n";
691 }
692 
693 // Check if we can safely rename this Comdat function.
canRenameComdat(Function & F,std::unordered_multimap<Comdat *,GlobalValue * > & ComdatMembers)694 static bool canRenameComdat(
695     Function &F,
696     std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
697   if (!DoComdatRenaming || !canRenameComdatFunc(F, true))
698     return false;
699 
700   // FIXME: Current only handle those Comdat groups that only containing one
701   // function.
702   // (1) For a Comdat group containing multiple functions, we need to have a
703   // unique postfix based on the hashes for each function. There is a
704   // non-trivial code refactoring to do this efficiently.
705   // (2) Variables can not be renamed, so we can not rename Comdat function in a
706   // group including global vars.
707   Comdat *C = F.getComdat();
708   for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {
709     assert(!isa<GlobalAlias>(CM.second));
710     Function *FM = dyn_cast<Function>(CM.second);
711     if (FM != &F)
712       return false;
713   }
714   return true;
715 }
716 
717 // Append the CFGHash to the Comdat function name.
718 template <class Edge, class BBInfo>
renameComdatFunction()719 void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
720   if (!canRenameComdat(F, ComdatMembers))
721     return;
722   std::string OrigName = F.getName().str();
723   std::string NewFuncName =
724       Twine(F.getName() + "." + Twine(FunctionHash)).str();
725   F.setName(Twine(NewFuncName));
726   GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F);
727   FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str();
728   Comdat *NewComdat;
729   Module *M = F.getParent();
730   // For AvailableExternallyLinkage functions, change the linkage to
731   // LinkOnceODR and put them into comdat. This is because after renaming, there
732   // is no backup external copy available for the function.
733   if (!F.hasComdat()) {
734     assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage);
735     NewComdat = M->getOrInsertComdat(StringRef(NewFuncName));
736     F.setLinkage(GlobalValue::LinkOnceODRLinkage);
737     F.setComdat(NewComdat);
738     return;
739   }
740 
741   // This function belongs to a single function Comdat group.
742   Comdat *OrigComdat = F.getComdat();
743   std::string NewComdatName =
744       Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str();
745   NewComdat = M->getOrInsertComdat(StringRef(NewComdatName));
746   NewComdat->setSelectionKind(OrigComdat->getSelectionKind());
747 
748   for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) {
749     // Must be a function.
750     cast<Function>(CM.second)->setComdat(NewComdat);
751   }
752 }
753 
754 /// Collect all the BBs that will be instruments and add them to
755 /// `InstrumentBBs`.
756 template <class Edge, class BBInfo>
getInstrumentBBs(std::vector<BasicBlock * > & InstrumentBBs)757 void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
758     std::vector<BasicBlock *> &InstrumentBBs) {
759   if (BCI) {
760     for (auto &BB : F)
761       if (BCI->shouldInstrumentBlock(BB))
762         InstrumentBBs.push_back(&BB);
763     return;
764   }
765 
766   // Use a worklist as we will update the vector during the iteration.
767   std::vector<Edge *> EdgeList;
768   EdgeList.reserve(MST.numEdges());
769   for (const auto &E : MST.allEdges())
770     EdgeList.push_back(E.get());
771 
772   for (auto &E : EdgeList) {
773     BasicBlock *InstrBB = getInstrBB(E);
774     if (InstrBB)
775       InstrumentBBs.push_back(InstrBB);
776   }
777 }
778 
779 // Given a CFG E to be instrumented, find which BB to place the instrumented
780 // code. The function will split the critical edge if necessary.
781 template <class Edge, class BBInfo>
getInstrBB(Edge * E)782 BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
783   if (E->InMST || E->Removed)
784     return nullptr;
785 
786   BasicBlock *SrcBB = E->SrcBB;
787   BasicBlock *DestBB = E->DestBB;
788   // For a fake edge, instrument the real BB.
789   if (SrcBB == nullptr)
790     return DestBB;
791   if (DestBB == nullptr)
792     return SrcBB;
793 
794   auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
795     // There are basic blocks (such as catchswitch) cannot be instrumented.
796     // If the returned first insertion point is the end of BB, skip this BB.
797     if (BB->getFirstInsertionPt() == BB->end())
798       return nullptr;
799     return BB;
800   };
801 
802   // Instrument the SrcBB if it has a single successor,
803   // otherwise, the DestBB if this is not a critical edge.
804   Instruction *TI = SrcBB->getTerminator();
805   if (TI->getNumSuccessors() <= 1)
806     return canInstrument(SrcBB);
807   if (!E->IsCritical)
808     return canInstrument(DestBB);
809 
810   // Some IndirectBr critical edges cannot be split by the previous
811   // SplitIndirectBrCriticalEdges call. Bail out.
812   unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
813   BasicBlock *InstrBB =
814       isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum);
815   if (!InstrBB) {
816     LLVM_DEBUG(
817         dbgs() << "Fail to split critical edge: not instrument this edge.\n");
818     return nullptr;
819   }
820   // For a critical edge, we have to split. Instrument the newly
821   // created BB.
822   IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
823   LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
824                     << " --> " << getBBInfo(DestBB).Index << "\n");
825   // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
826   MST.addEdge(SrcBB, InstrBB, 0);
827   // Second one: Add new edge of InstrBB->DestBB.
828   Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
829   NewEdge1.InMST = true;
830   E->Removed = true;
831 
832   return canInstrument(InstrBB);
833 }
834 
835 // When generating value profiling calls on Windows routines that make use of
836 // handler funclets for exception processing an operand bundle needs to attached
837 // to the called function. This routine will set \p OpBundles to contain the
838 // funclet information, if any is needed, that should be placed on the generated
839 // value profiling call for the value profile candidate call.
840 static void
populateEHOperandBundle(VPCandidateInfo & Cand,DenseMap<BasicBlock *,ColorVector> & BlockColors,SmallVectorImpl<OperandBundleDef> & OpBundles)841 populateEHOperandBundle(VPCandidateInfo &Cand,
842                         DenseMap<BasicBlock *, ColorVector> &BlockColors,
843                         SmallVectorImpl<OperandBundleDef> &OpBundles) {
844   auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst);
845   if (!OrigCall)
846     return;
847 
848   if (!isa<IntrinsicInst>(OrigCall)) {
849     // The instrumentation call should belong to the same funclet as a
850     // non-intrinsic call, so just copy the operand bundle, if any exists.
851     std::optional<OperandBundleUse> ParentFunclet =
852         OrigCall->getOperandBundle(LLVMContext::OB_funclet);
853     if (ParentFunclet)
854       OpBundles.emplace_back(OperandBundleDef(*ParentFunclet));
855   } else {
856     // Intrinsics or other instructions do not get funclet information from the
857     // front-end. Need to use the BlockColors that was computed by the routine
858     // colorEHFunclets to determine whether a funclet is needed.
859     if (!BlockColors.empty()) {
860       const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second;
861       assert(CV.size() == 1 && "non-unique color for block!");
862       Instruction *EHPad = CV.front()->getFirstNonPHI();
863       if (EHPad->isEHPad())
864         OpBundles.emplace_back("funclet", EHPad);
865     }
866   }
867 }
868 
869 // Visit all edge and instrument the edges not in MST, and do value profiling.
870 // Critical edges will be split.
instrumentOneFunc(Function & F,Module * M,TargetLibraryInfo & TLI,BranchProbabilityInfo * BPI,BlockFrequencyInfo * BFI,std::unordered_multimap<Comdat *,GlobalValue * > & ComdatMembers,bool IsCS)871 static void instrumentOneFunc(
872     Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI,
873     BlockFrequencyInfo *BFI,
874     std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
875     bool IsCS) {
876   if (!PGOBlockCoverage) {
877     // Split indirectbr critical edges here before computing the MST rather than
878     // later in getInstrBB() to avoid invalidating it.
879     SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI);
880   }
881 
882   FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo(
883       F, TLI, ComdatMembers, true, BPI, BFI, IsCS, shouldInstrumentEntryBB(),
884       PGOBlockCoverage);
885 
886   auto Name = FuncInfo.FuncNameVar;
887   auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()),
888                                   FuncInfo.FunctionHash);
889   if (PGOFunctionEntryCoverage) {
890     auto &EntryBB = F.getEntryBlock();
891     IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
892     // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>,
893     //                      i32 <index>)
894     Builder.CreateCall(
895         Intrinsic::getDeclaration(M, Intrinsic::instrprof_cover),
896         {Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
897     return;
898   }
899 
900   std::vector<BasicBlock *> InstrumentBBs;
901   FuncInfo.getInstrumentBBs(InstrumentBBs);
902   unsigned NumCounters =
903       InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
904 
905   if (PGOCtxProfLoweringPass::isContextualIRPGOEnabled()) {
906     auto *CSIntrinsic =
907         Intrinsic::getDeclaration(M, Intrinsic::instrprof_callsite);
908     // We want to count the instrumentable callsites, then instrument them. This
909     // is because the llvm.instrprof.callsite intrinsic has an argument (like
910     // the other instrprof intrinsics) capturing the total number of
911     // instrumented objects (counters, or callsites, in this case). In this
912     // case, we want that value so we can readily pass it to the compiler-rt
913     // APIs that may have to allocate memory based on the nr of callsites.
914     // The traversal logic is the same for both counting and instrumentation,
915     // just needs to be done in succession.
916     auto Visit = [&](llvm::function_ref<void(CallBase * CB)> Visitor) {
917       for (auto &BB : F)
918         for (auto &Instr : BB)
919           if (auto *CS = dyn_cast<CallBase>(&Instr)) {
920             if ((CS->getCalledFunction() &&
921                  CS->getCalledFunction()->isIntrinsic()) ||
922                 dyn_cast<InlineAsm>(CS->getCalledOperand()))
923               continue;
924             Visitor(CS);
925           }
926     };
927     // First, count callsites.
928     uint32_t TotalNrCallsites = 0;
929     Visit([&TotalNrCallsites](auto *) { ++TotalNrCallsites; });
930 
931     // Now instrument.
932     uint32_t CallsiteIndex = 0;
933     Visit([&](auto *CB) {
934       IRBuilder<> Builder(CB);
935       Builder.CreateCall(CSIntrinsic,
936                          {Name, CFGHash, Builder.getInt32(TotalNrCallsites),
937                           Builder.getInt32(CallsiteIndex++),
938                           CB->getCalledOperand()});
939     });
940   }
941 
942   uint32_t I = 0;
943   if (PGOTemporalInstrumentation) {
944     NumCounters += PGOBlockCoverage ? 8 : 1;
945     auto &EntryBB = F.getEntryBlock();
946     IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
947     // llvm.instrprof.timestamp(i8* <name>, i64 <hash>, i32 <num-counters>,
948     //                          i32 <index>)
949     Builder.CreateCall(
950         Intrinsic::getDeclaration(M, Intrinsic::instrprof_timestamp),
951         {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I)});
952     I += PGOBlockCoverage ? 8 : 1;
953   }
954 
955   for (auto *InstrBB : InstrumentBBs) {
956     IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
957     assert(Builder.GetInsertPoint() != InstrBB->end() &&
958            "Cannot get the Instrumentation point");
959     // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>,
960     //                          i32 <index>)
961     Builder.CreateCall(
962         Intrinsic::getDeclaration(M, PGOBlockCoverage
963                                          ? Intrinsic::instrprof_cover
964                                          : Intrinsic::instrprof_increment),
965         {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)});
966   }
967 
968   // Now instrument select instructions:
969   FuncInfo.SIVisitor.instrumentSelects(&I, NumCounters, FuncInfo.FuncNameVar,
970                                        FuncInfo.FunctionHash);
971   assert(I == NumCounters);
972 
973   if (isValueProfilingDisabled())
974     return;
975 
976   NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
977 
978   // Intrinsic function calls do not have funclet operand bundles needed for
979   // Windows exception handling attached to them. However, if value profiling is
980   // inserted for one of these calls, then a funclet value will need to be set
981   // on the instrumentation call based on the funclet coloring.
982   DenseMap<BasicBlock *, ColorVector> BlockColors;
983   if (F.hasPersonalityFn() &&
984       isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
985     BlockColors = colorEHFunclets(F);
986 
987   // For each VP Kind, walk the VP candidates and instrument each one.
988   for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
989     unsigned SiteIndex = 0;
990     if (Kind == IPVK_MemOPSize && !PGOInstrMemOP)
991       continue;
992 
993     for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) {
994       LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind]
995                         << " site: CallSite Index = " << SiteIndex << "\n");
996 
997       IRBuilder<> Builder(Cand.InsertPt);
998       assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() &&
999              "Cannot get the Instrumentation point");
1000 
1001       Value *ToProfile = nullptr;
1002       if (Cand.V->getType()->isIntegerTy())
1003         ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty());
1004       else if (Cand.V->getType()->isPointerTy())
1005         ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty());
1006       assert(ToProfile && "value profiling Value is of unexpected type");
1007 
1008       SmallVector<OperandBundleDef, 1> OpBundles;
1009       populateEHOperandBundle(Cand, BlockColors, OpBundles);
1010       Builder.CreateCall(
1011           Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
1012           {FuncInfo.FuncNameVar, Builder.getInt64(FuncInfo.FunctionHash),
1013            ToProfile, Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},
1014           OpBundles);
1015     }
1016   } // IPVK_First <= Kind <= IPVK_Last
1017 }
1018 
1019 namespace {
1020 
1021 // This class represents a CFG edge in profile use compilation.
1022 struct PGOUseEdge : public PGOEdge {
1023   using PGOEdge::PGOEdge;
1024 
1025   std::optional<uint64_t> Count;
1026 
1027   // Set edge count value
setEdgeCount__anon0925556b0711::PGOUseEdge1028   void setEdgeCount(uint64_t Value) { Count = Value; }
1029 
1030   // Return the information string for this object.
infoString__anon0925556b0711::PGOUseEdge1031   std::string infoString() const {
1032     if (!Count)
1033       return PGOEdge::infoString();
1034     return (Twine(PGOEdge::infoString()) + "  Count=" + Twine(*Count)).str();
1035   }
1036 };
1037 
1038 using DirectEdges = SmallVector<PGOUseEdge *, 2>;
1039 
1040 // This class stores the auxiliary information for each BB.
1041 struct PGOUseBBInfo : public PGOBBInfo {
1042   std::optional<uint64_t> Count;
1043   int32_t UnknownCountInEdge = 0;
1044   int32_t UnknownCountOutEdge = 0;
1045   DirectEdges InEdges;
1046   DirectEdges OutEdges;
1047 
PGOUseBBInfo__anon0925556b0711::PGOUseBBInfo1048   PGOUseBBInfo(unsigned IX) : PGOBBInfo(IX) {}
1049 
1050   // Set the profile count value for this BB.
setBBInfoCount__anon0925556b0711::PGOUseBBInfo1051   void setBBInfoCount(uint64_t Value) { Count = Value; }
1052 
1053   // Return the information string of this object.
infoString__anon0925556b0711::PGOUseBBInfo1054   std::string infoString() const {
1055     if (!Count)
1056       return PGOBBInfo::infoString();
1057     return (Twine(PGOBBInfo::infoString()) + "  Count=" + Twine(*Count)).str();
1058   }
1059 
1060   // Add an OutEdge and update the edge count.
addOutEdge__anon0925556b0711::PGOUseBBInfo1061   void addOutEdge(PGOUseEdge *E) {
1062     OutEdges.push_back(E);
1063     UnknownCountOutEdge++;
1064   }
1065 
1066   // Add an InEdge and update the edge count.
addInEdge__anon0925556b0711::PGOUseBBInfo1067   void addInEdge(PGOUseEdge *E) {
1068     InEdges.push_back(E);
1069     UnknownCountInEdge++;
1070   }
1071 };
1072 
1073 } // end anonymous namespace
1074 
1075 // Sum up the count values for all the edges.
sumEdgeCount(const ArrayRef<PGOUseEdge * > Edges)1076 static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) {
1077   uint64_t Total = 0;
1078   for (const auto &E : Edges) {
1079     if (E->Removed)
1080       continue;
1081     if (E->Count)
1082       Total += *E->Count;
1083   }
1084   return Total;
1085 }
1086 
1087 namespace {
1088 
1089 class PGOUseFunc {
1090 public:
PGOUseFunc(Function & Func,Module * Modu,TargetLibraryInfo & TLI,std::unordered_multimap<Comdat *,GlobalValue * > & ComdatMembers,BranchProbabilityInfo * BPI,BlockFrequencyInfo * BFIin,ProfileSummaryInfo * PSI,bool IsCS,bool InstrumentFuncEntry,bool HasSingleByteCoverage)1091   PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI,
1092              std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
1093              BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin,
1094              ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry,
1095              bool HasSingleByteCoverage)
1096       : F(Func), M(Modu), BFI(BFIin), PSI(PSI),
1097         FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS,
1098                  InstrumentFuncEntry, HasSingleByteCoverage),
1099         FreqAttr(FFA_Normal), IsCS(IsCS), VPC(Func, TLI) {}
1100 
1101   void handleInstrProfError(Error Err, uint64_t MismatchedFuncSum);
1102 
1103   // Read counts for the instrumented BB from profile.
1104   bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1105                     InstrProfRecord::CountPseudoKind &PseudoKind);
1106 
1107   // Populate the counts for all BBs.
1108   void populateCounters();
1109 
1110   // Set block coverage based on profile coverage values.
1111   void populateCoverage(IndexedInstrProfReader *PGOReader);
1112 
1113   // Set the branch weights based on the count values.
1114   void setBranchWeights();
1115 
1116   // Annotate the value profile call sites for all value kind.
1117   void annotateValueSites();
1118 
1119   // Annotate the value profile call sites for one value kind.
1120   void annotateValueSites(uint32_t Kind);
1121 
1122   // Annotate the irreducible loop header weights.
1123   void annotateIrrLoopHeaderWeights();
1124 
1125   // The hotness of the function from the profile count.
1126   enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };
1127 
1128   // Return the function hotness from the profile.
getFuncFreqAttr() const1129   FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; }
1130 
1131   // Return the function hash.
getFuncHash() const1132   uint64_t getFuncHash() const { return FuncInfo.FunctionHash; }
1133 
1134   // Return the profile record for this function;
getProfileRecord()1135   InstrProfRecord &getProfileRecord() { return ProfileRecord; }
1136 
1137   // Return the auxiliary BB information.
getBBInfo(const BasicBlock * BB) const1138   PGOUseBBInfo &getBBInfo(const BasicBlock *BB) const {
1139     return FuncInfo.getBBInfo(BB);
1140   }
1141 
1142   // Return the auxiliary BB information if available.
findBBInfo(const BasicBlock * BB) const1143   PGOUseBBInfo *findBBInfo(const BasicBlock *BB) const {
1144     return FuncInfo.findBBInfo(BB);
1145   }
1146 
getFunc() const1147   Function &getFunc() const { return F; }
1148 
dumpInfo(StringRef Str="") const1149   void dumpInfo(StringRef Str = "") const { FuncInfo.dumpInfo(Str); }
1150 
getProgramMaxCount() const1151   uint64_t getProgramMaxCount() const { return ProgramMaxCount; }
1152 
1153 private:
1154   Function &F;
1155   Module *M;
1156   BlockFrequencyInfo *BFI;
1157   ProfileSummaryInfo *PSI;
1158 
1159   // This member stores the shared information with class PGOGenFunc.
1160   FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> FuncInfo;
1161 
1162   // The maximum count value in the profile. This is only used in PGO use
1163   // compilation.
1164   uint64_t ProgramMaxCount;
1165 
1166   // Position of counter that remains to be read.
1167   uint32_t CountPosition = 0;
1168 
1169   // Total size of the profile count for this function.
1170   uint32_t ProfileCountSize = 0;
1171 
1172   // ProfileRecord for this function.
1173   InstrProfRecord ProfileRecord;
1174 
1175   // Function hotness info derived from profile.
1176   FuncFreqAttr FreqAttr;
1177 
1178   // Is to use the context sensitive profile.
1179   bool IsCS;
1180 
1181   ValueProfileCollector VPC;
1182 
1183   // Find the Instrumented BB and set the value. Return false on error.
1184   bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
1185 
1186   // Set the edge counter value for the unknown edge -- there should be only
1187   // one unknown edge.
1188   void setEdgeCount(DirectEdges &Edges, uint64_t Value);
1189 
1190   // Set the hot/cold inline hints based on the count values.
1191   // FIXME: This function should be removed once the functionality in
1192   // the inliner is implemented.
markFunctionAttributes(uint64_t EntryCount,uint64_t MaxCount)1193   void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {
1194     if (PSI->isHotCount(EntryCount))
1195       FreqAttr = FFA_Hot;
1196     else if (PSI->isColdCount(MaxCount))
1197       FreqAttr = FFA_Cold;
1198   }
1199 };
1200 
1201 } // end anonymous namespace
1202 
1203 /// Set up InEdges/OutEdges for all BBs in the MST.
setupBBInfoEdges(const FuncPGOInstrumentation<PGOUseEdge,PGOUseBBInfo> & FuncInfo)1204 static void setupBBInfoEdges(
1205     const FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> &FuncInfo) {
1206   // This is not required when there is block coverage inference.
1207   if (FuncInfo.BCI)
1208     return;
1209   for (const auto &E : FuncInfo.MST.allEdges()) {
1210     if (E->Removed)
1211       continue;
1212     const BasicBlock *SrcBB = E->SrcBB;
1213     const BasicBlock *DestBB = E->DestBB;
1214     PGOUseBBInfo &SrcInfo = FuncInfo.getBBInfo(SrcBB);
1215     PGOUseBBInfo &DestInfo = FuncInfo.getBBInfo(DestBB);
1216     SrcInfo.addOutEdge(E.get());
1217     DestInfo.addInEdge(E.get());
1218   }
1219 }
1220 
1221 // Visit all the edges and assign the count value for the instrumented
1222 // edges and the BB. Return false on error.
setInstrumentedCounts(const std::vector<uint64_t> & CountFromProfile)1223 bool PGOUseFunc::setInstrumentedCounts(
1224     const std::vector<uint64_t> &CountFromProfile) {
1225 
1226   std::vector<BasicBlock *> InstrumentBBs;
1227   FuncInfo.getInstrumentBBs(InstrumentBBs);
1228 
1229   setupBBInfoEdges(FuncInfo);
1230 
1231   unsigned NumCounters =
1232       InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
1233   // The number of counters here should match the number of counters
1234   // in profile. Return if they mismatch.
1235   if (NumCounters != CountFromProfile.size()) {
1236     return false;
1237   }
1238   auto *FuncEntry = &*F.begin();
1239 
1240   // Set the profile count to the Instrumented BBs.
1241   uint32_t I = 0;
1242   for (BasicBlock *InstrBB : InstrumentBBs) {
1243     uint64_t CountValue = CountFromProfile[I++];
1244     PGOUseBBInfo &Info = getBBInfo(InstrBB);
1245     // If we reach here, we know that we have some nonzero count
1246     // values in this function. The entry count should not be 0.
1247     // Fix it if necessary.
1248     if (InstrBB == FuncEntry && CountValue == 0)
1249       CountValue = 1;
1250     Info.setBBInfoCount(CountValue);
1251   }
1252   ProfileCountSize = CountFromProfile.size();
1253   CountPosition = I;
1254 
1255   // Set the edge count and update the count of unknown edges for BBs.
1256   auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void {
1257     E->setEdgeCount(Value);
1258     this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1259     this->getBBInfo(E->DestBB).UnknownCountInEdge--;
1260   };
1261 
1262   // Set the profile count the Instrumented edges. There are BBs that not in
1263   // MST but not instrumented. Need to set the edge count value so that we can
1264   // populate the profile counts later.
1265   for (const auto &E : FuncInfo.MST.allEdges()) {
1266     if (E->Removed || E->InMST)
1267       continue;
1268     const BasicBlock *SrcBB = E->SrcBB;
1269     PGOUseBBInfo &SrcInfo = getBBInfo(SrcBB);
1270 
1271     // If only one out-edge, the edge profile count should be the same as BB
1272     // profile count.
1273     if (SrcInfo.Count && SrcInfo.OutEdges.size() == 1)
1274       setEdgeCount(E.get(), *SrcInfo.Count);
1275     else {
1276       const BasicBlock *DestBB = E->DestBB;
1277       PGOUseBBInfo &DestInfo = getBBInfo(DestBB);
1278       // If only one in-edge, the edge profile count should be the same as BB
1279       // profile count.
1280       if (DestInfo.Count && DestInfo.InEdges.size() == 1)
1281         setEdgeCount(E.get(), *DestInfo.Count);
1282     }
1283     if (E->Count)
1284       continue;
1285     // E's count should have been set from profile. If not, this meenas E skips
1286     // the instrumentation. We set the count to 0.
1287     setEdgeCount(E.get(), 0);
1288   }
1289   return true;
1290 }
1291 
1292 // Set the count value for the unknown edge. There should be one and only one
1293 // unknown edge in Edges vector.
setEdgeCount(DirectEdges & Edges,uint64_t Value)1294 void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
1295   for (auto &E : Edges) {
1296     if (E->Count)
1297       continue;
1298     E->setEdgeCount(Value);
1299 
1300     getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1301     getBBInfo(E->DestBB).UnknownCountInEdge--;
1302     return;
1303   }
1304   llvm_unreachable("Cannot find the unknown count edge");
1305 }
1306 
1307 // Emit function metadata indicating PGO profile mismatch.
annotateFunctionWithHashMismatch(Function & F,LLVMContext & ctx)1308 static void annotateFunctionWithHashMismatch(Function &F, LLVMContext &ctx) {
1309   const char MetadataName[] = "instr_prof_hash_mismatch";
1310   SmallVector<Metadata *, 2> Names;
1311   // If this metadata already exists, ignore.
1312   auto *Existing = F.getMetadata(LLVMContext::MD_annotation);
1313   if (Existing) {
1314     MDTuple *Tuple = cast<MDTuple>(Existing);
1315     for (const auto &N : Tuple->operands()) {
1316       if (N.equalsStr(MetadataName))
1317         return;
1318       Names.push_back(N.get());
1319     }
1320   }
1321 
1322   MDBuilder MDB(ctx);
1323   Names.push_back(MDB.createString(MetadataName));
1324   MDNode *MD = MDTuple::get(ctx, Names);
1325   F.setMetadata(LLVMContext::MD_annotation, MD);
1326 }
1327 
handleInstrProfError(Error Err,uint64_t MismatchedFuncSum)1328 void PGOUseFunc::handleInstrProfError(Error Err, uint64_t MismatchedFuncSum) {
1329   handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
1330     auto &Ctx = M->getContext();
1331     auto Err = IPE.get();
1332     bool SkipWarning = false;
1333     LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
1334                       << FuncInfo.FuncName << ": ");
1335     if (Err == instrprof_error::unknown_function) {
1336       IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
1337       SkipWarning = !PGOWarnMissing;
1338       LLVM_DEBUG(dbgs() << "unknown function");
1339     } else if (Err == instrprof_error::hash_mismatch ||
1340                Err == instrprof_error::malformed) {
1341       IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
1342       SkipWarning =
1343           NoPGOWarnMismatch ||
1344           (NoPGOWarnMismatchComdatWeak &&
1345            (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage ||
1346             F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
1347       LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash
1348                         << " skip=" << SkipWarning << ")");
1349       // Emit function metadata indicating PGO profile mismatch.
1350       annotateFunctionWithHashMismatch(F, M->getContext());
1351     }
1352 
1353     LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");
1354     if (SkipWarning)
1355       return;
1356 
1357     std::string Msg =
1358         IPE.message() + std::string(" ") + F.getName().str() +
1359         std::string(" Hash = ") + std::to_string(FuncInfo.FunctionHash) +
1360         std::string(" up to ") + std::to_string(MismatchedFuncSum) +
1361         std::string(" count discarded");
1362 
1363     Ctx.diagnose(
1364         DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
1365   });
1366 }
1367 
1368 // Read the profile from ProfileFileName and assign the value to the
1369 // instrumented BB and the edges. This function also updates ProgramMaxCount.
1370 // Return true if the profile are successfully read, and false on errors.
readCounters(IndexedInstrProfReader * PGOReader,bool & AllZeros,InstrProfRecord::CountPseudoKind & PseudoKind)1371 bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1372                               InstrProfRecord::CountPseudoKind &PseudoKind) {
1373   auto &Ctx = M->getContext();
1374   uint64_t MismatchedFuncSum = 0;
1375   Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord(
1376       FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1377       &MismatchedFuncSum);
1378   if (Error E = Result.takeError()) {
1379     handleInstrProfError(std::move(E), MismatchedFuncSum);
1380     return false;
1381   }
1382   ProfileRecord = std::move(Result.get());
1383   PseudoKind = ProfileRecord.getCountPseudoKind();
1384   if (PseudoKind != InstrProfRecord::NotPseudo) {
1385     return true;
1386   }
1387   std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts;
1388 
1389   IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1390   LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
1391 
1392   uint64_t ValueSum = 0;
1393   for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
1394     LLVM_DEBUG(dbgs() << "  " << I << ": " << CountFromProfile[I] << "\n");
1395     ValueSum += CountFromProfile[I];
1396   }
1397   AllZeros = (ValueSum == 0);
1398 
1399   LLVM_DEBUG(dbgs() << "SUM =  " << ValueSum << "\n");
1400 
1401   getBBInfo(nullptr).UnknownCountOutEdge = 2;
1402   getBBInfo(nullptr).UnknownCountInEdge = 2;
1403 
1404   if (!setInstrumentedCounts(CountFromProfile)) {
1405     LLVM_DEBUG(
1406         dbgs() << "Inconsistent number of counts, skipping this function");
1407     Ctx.diagnose(DiagnosticInfoPGOProfile(
1408         M->getName().data(),
1409         Twine("Inconsistent number of counts in ") + F.getName().str() +
1410             Twine(": the profile may be stale or there is a function name "
1411                   "collision."),
1412         DS_Warning));
1413     return false;
1414   }
1415   ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS);
1416   return true;
1417 }
1418 
populateCoverage(IndexedInstrProfReader * PGOReader)1419 void PGOUseFunc::populateCoverage(IndexedInstrProfReader *PGOReader) {
1420   uint64_t MismatchedFuncSum = 0;
1421   Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord(
1422       FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1423       &MismatchedFuncSum);
1424   if (auto Err = Result.takeError()) {
1425     handleInstrProfError(std::move(Err), MismatchedFuncSum);
1426     return;
1427   }
1428   IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1429 
1430   std::vector<uint64_t> &CountsFromProfile = Result.get().Counts;
1431   DenseMap<const BasicBlock *, bool> Coverage;
1432   unsigned Index = 0;
1433   for (auto &BB : F)
1434     if (FuncInfo.BCI->shouldInstrumentBlock(BB))
1435       Coverage[&BB] = (CountsFromProfile[Index++] != 0);
1436   assert(Index == CountsFromProfile.size());
1437 
1438   // For each B in InverseDependencies[A], if A is covered then B is covered.
1439   DenseMap<const BasicBlock *, DenseSet<const BasicBlock *>>
1440       InverseDependencies;
1441   for (auto &BB : F) {
1442     for (auto *Dep : FuncInfo.BCI->getDependencies(BB)) {
1443       // If Dep is covered then BB is covered.
1444       InverseDependencies[Dep].insert(&BB);
1445     }
1446   }
1447 
1448   // Infer coverage of the non-instrumented blocks using a flood-fill algorithm.
1449   std::stack<const BasicBlock *> CoveredBlocksToProcess;
1450   for (auto &[BB, IsCovered] : Coverage)
1451     if (IsCovered)
1452       CoveredBlocksToProcess.push(BB);
1453 
1454   while (!CoveredBlocksToProcess.empty()) {
1455     auto *CoveredBlock = CoveredBlocksToProcess.top();
1456     assert(Coverage[CoveredBlock]);
1457     CoveredBlocksToProcess.pop();
1458     for (auto *BB : InverseDependencies[CoveredBlock]) {
1459       // If CoveredBlock is covered then BB is covered.
1460       if (Coverage[BB])
1461         continue;
1462       Coverage[BB] = true;
1463       CoveredBlocksToProcess.push(BB);
1464     }
1465   }
1466 
1467   // Annotate block coverage.
1468   MDBuilder MDB(F.getContext());
1469   // We set the entry count to 10000 if the entry block is covered so that BFI
1470   // can propagate a fraction of this count to the other covered blocks.
1471   F.setEntryCount(Coverage[&F.getEntryBlock()] ? 10000 : 0);
1472   for (auto &BB : F) {
1473     // For a block A and its successor B, we set the edge weight as follows:
1474     // If A is covered and B is covered, set weight=1.
1475     // If A is covered and B is uncovered, set weight=0.
1476     // If A is uncovered, set weight=1.
1477     // This setup will allow BFI to give nonzero profile counts to only covered
1478     // blocks.
1479     SmallVector<uint32_t, 4> Weights;
1480     for (auto *Succ : successors(&BB))
1481       Weights.push_back((Coverage[Succ] || !Coverage[&BB]) ? 1 : 0);
1482     if (Weights.size() >= 2)
1483       llvm::setBranchWeights(*BB.getTerminator(), Weights,
1484                              /*IsExpected=*/false);
1485   }
1486 
1487   unsigned NumCorruptCoverage = 0;
1488   DominatorTree DT(F);
1489   LoopInfo LI(DT);
1490   BranchProbabilityInfo BPI(F, LI);
1491   BlockFrequencyInfo BFI(F, BPI, LI);
1492   auto IsBlockDead = [&](const BasicBlock &BB) -> std::optional<bool> {
1493     if (auto C = BFI.getBlockProfileCount(&BB))
1494       return C == 0;
1495     return {};
1496   };
1497   LLVM_DEBUG(dbgs() << "Block Coverage: (Instrumented=*, Covered=X)\n");
1498   for (auto &BB : F) {
1499     LLVM_DEBUG(dbgs() << (FuncInfo.BCI->shouldInstrumentBlock(BB) ? "* " : "  ")
1500                       << (Coverage[&BB] ? "X " : "  ") << " " << BB.getName()
1501                       << "\n");
1502     // In some cases it is possible to find a covered block that has no covered
1503     // successors, e.g., when a block calls a function that may call exit(). In
1504     // those cases, BFI could find its successor to be covered while BCI could
1505     // find its successor to be dead.
1506     if (Coverage[&BB] == IsBlockDead(BB).value_or(false)) {
1507       LLVM_DEBUG(
1508           dbgs() << "Found inconsistent block covearge for " << BB.getName()
1509                  << ": BCI=" << (Coverage[&BB] ? "Covered" : "Dead") << " BFI="
1510                  << (IsBlockDead(BB).value() ? "Dead" : "Covered") << "\n");
1511       ++NumCorruptCoverage;
1512     }
1513     if (Coverage[&BB])
1514       ++NumCoveredBlocks;
1515   }
1516   if (PGOVerifyBFI && NumCorruptCoverage) {
1517     auto &Ctx = M->getContext();
1518     Ctx.diagnose(DiagnosticInfoPGOProfile(
1519         M->getName().data(),
1520         Twine("Found inconsistent block coverage for function ") + F.getName() +
1521             " in " + Twine(NumCorruptCoverage) + " blocks.",
1522         DS_Warning));
1523   }
1524   if (PGOViewBlockCoverageGraph)
1525     FuncInfo.BCI->viewBlockCoverageGraph(&Coverage);
1526 }
1527 
1528 // Populate the counters from instrumented BBs to all BBs.
1529 // In the end of this operation, all BBs should have a valid count value.
populateCounters()1530 void PGOUseFunc::populateCounters() {
1531   bool Changes = true;
1532   unsigned NumPasses = 0;
1533   while (Changes) {
1534     NumPasses++;
1535     Changes = false;
1536 
1537     // For efficient traversal, it's better to start from the end as most
1538     // of the instrumented edges are at the end.
1539     for (auto &BB : reverse(F)) {
1540       PGOUseBBInfo *UseBBInfo = findBBInfo(&BB);
1541       if (UseBBInfo == nullptr)
1542         continue;
1543       if (!UseBBInfo->Count) {
1544         if (UseBBInfo->UnknownCountOutEdge == 0) {
1545           UseBBInfo->Count = sumEdgeCount(UseBBInfo->OutEdges);
1546           Changes = true;
1547         } else if (UseBBInfo->UnknownCountInEdge == 0) {
1548           UseBBInfo->Count = sumEdgeCount(UseBBInfo->InEdges);
1549           Changes = true;
1550         }
1551       }
1552       if (UseBBInfo->Count) {
1553         if (UseBBInfo->UnknownCountOutEdge == 1) {
1554           uint64_t Total = 0;
1555           uint64_t OutSum = sumEdgeCount(UseBBInfo->OutEdges);
1556           // If the one of the successor block can early terminate (no-return),
1557           // we can end up with situation where out edge sum count is larger as
1558           // the source BB's count is collected by a post-dominated block.
1559           if (*UseBBInfo->Count > OutSum)
1560             Total = *UseBBInfo->Count - OutSum;
1561           setEdgeCount(UseBBInfo->OutEdges, Total);
1562           Changes = true;
1563         }
1564         if (UseBBInfo->UnknownCountInEdge == 1) {
1565           uint64_t Total = 0;
1566           uint64_t InSum = sumEdgeCount(UseBBInfo->InEdges);
1567           if (*UseBBInfo->Count > InSum)
1568             Total = *UseBBInfo->Count - InSum;
1569           setEdgeCount(UseBBInfo->InEdges, Total);
1570           Changes = true;
1571         }
1572       }
1573     }
1574   }
1575 
1576   LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n");
1577   (void)NumPasses;
1578 #ifndef NDEBUG
1579   // Assert every BB has a valid counter.
1580   for (auto &BB : F) {
1581     auto BI = findBBInfo(&BB);
1582     if (BI == nullptr)
1583       continue;
1584     assert(BI->Count && "BB count is not valid");
1585   }
1586 #endif
1587   uint64_t FuncEntryCount = *getBBInfo(&*F.begin()).Count;
1588   uint64_t FuncMaxCount = FuncEntryCount;
1589   for (auto &BB : F) {
1590     auto BI = findBBInfo(&BB);
1591     if (BI == nullptr)
1592       continue;
1593     FuncMaxCount = std::max(FuncMaxCount, *BI->Count);
1594   }
1595 
1596   // Fix the obviously inconsistent entry count.
1597   if (FuncMaxCount > 0 && FuncEntryCount == 0)
1598     FuncEntryCount = 1;
1599   F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real));
1600   markFunctionAttributes(FuncEntryCount, FuncMaxCount);
1601 
1602   // Now annotate select instructions
1603   FuncInfo.SIVisitor.annotateSelects(this, &CountPosition);
1604   assert(CountPosition == ProfileCountSize);
1605 
1606   LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile."));
1607 }
1608 
1609 // Assign the scaled count values to the BB with multiple out edges.
setBranchWeights()1610 void PGOUseFunc::setBranchWeights() {
1611   // Generate MD_prof metadata for every branch instruction.
1612   LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName()
1613                     << " IsCS=" << IsCS << "\n");
1614   for (auto &BB : F) {
1615     Instruction *TI = BB.getTerminator();
1616     if (TI->getNumSuccessors() < 2)
1617       continue;
1618     if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||
1619           isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI) ||
1620           isa<CallBrInst>(TI)))
1621       continue;
1622 
1623     const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1624     if (!*BBCountInfo.Count)
1625       continue;
1626 
1627     // We have a non-zero Branch BB.
1628 
1629     // SuccessorCount can be greater than OutEdgesCount, because
1630     // removed edges don't appear in OutEdges.
1631     unsigned OutEdgesCount = BBCountInfo.OutEdges.size();
1632     unsigned SuccessorCount = BB.getTerminator()->getNumSuccessors();
1633     assert(OutEdgesCount <= SuccessorCount);
1634 
1635     SmallVector<uint64_t, 2> EdgeCounts(SuccessorCount, 0);
1636     uint64_t MaxCount = 0;
1637     for (unsigned It = 0; It < OutEdgesCount; It++) {
1638       const PGOUseEdge *E = BBCountInfo.OutEdges[It];
1639       const BasicBlock *SrcBB = E->SrcBB;
1640       const BasicBlock *DestBB = E->DestBB;
1641       if (DestBB == nullptr)
1642         continue;
1643       unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
1644       uint64_t EdgeCount = *E->Count;
1645       if (EdgeCount > MaxCount)
1646         MaxCount = EdgeCount;
1647       EdgeCounts[SuccNum] = EdgeCount;
1648     }
1649 
1650     if (MaxCount)
1651       setProfMetadata(M, TI, EdgeCounts, MaxCount);
1652     else {
1653       // A zero MaxCount can come about when we have a BB with a positive
1654       // count, and whose successor blocks all have 0 count. This can happen
1655       // when there is no exit block and the code exits via a noreturn function.
1656       auto &Ctx = M->getContext();
1657       Ctx.diagnose(DiagnosticInfoPGOProfile(
1658           M->getName().data(),
1659           Twine("Profile in ") + F.getName().str() +
1660               Twine(" partially ignored") +
1661               Twine(", possibly due to the lack of a return path."),
1662           DS_Warning));
1663     }
1664   }
1665 }
1666 
isIndirectBrTarget(BasicBlock * BB)1667 static bool isIndirectBrTarget(BasicBlock *BB) {
1668   for (BasicBlock *Pred : predecessors(BB)) {
1669     if (isa<IndirectBrInst>(Pred->getTerminator()))
1670       return true;
1671   }
1672   return false;
1673 }
1674 
annotateIrrLoopHeaderWeights()1675 void PGOUseFunc::annotateIrrLoopHeaderWeights() {
1676   LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n");
1677   // Find irr loop headers
1678   for (auto &BB : F) {
1679     // As a heuristic also annotate indrectbr targets as they have a high chance
1680     // to become an irreducible loop header after the indirectbr tail
1681     // duplication.
1682     if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) {
1683       Instruction *TI = BB.getTerminator();
1684       const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1685       setIrrLoopHeaderMetadata(M, TI, *BBCountInfo.Count);
1686     }
1687   }
1688 }
1689 
instrumentOneSelectInst(SelectInst & SI)1690 void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
1691   Module *M = F.getParent();
1692   IRBuilder<> Builder(&SI);
1693   Type *Int64Ty = Builder.getInt64Ty();
1694   auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
1695   Builder.CreateCall(
1696       Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
1697       {FuncNameVar, Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs),
1698        Builder.getInt32(*CurCtrIdx), Step});
1699   ++(*CurCtrIdx);
1700 }
1701 
annotateOneSelectInst(SelectInst & SI)1702 void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) {
1703   std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
1704   assert(*CurCtrIdx < CountFromProfile.size() &&
1705          "Out of bound access of counters");
1706   uint64_t SCounts[2];
1707   SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count
1708   ++(*CurCtrIdx);
1709   uint64_t TotalCount = 0;
1710   auto BI = UseFunc->findBBInfo(SI.getParent());
1711   if (BI != nullptr)
1712     TotalCount = *BI->Count;
1713   // False Count
1714   SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
1715   uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
1716   if (MaxCount)
1717     setProfMetadata(F.getParent(), &SI, SCounts, MaxCount);
1718 }
1719 
visitSelectInst(SelectInst & SI)1720 void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
1721   if (!PGOInstrSelect || PGOFunctionEntryCoverage || HasSingleByteCoverage)
1722     return;
1723   // FIXME: do not handle this yet.
1724   if (SI.getCondition()->getType()->isVectorTy())
1725     return;
1726 
1727   switch (Mode) {
1728   case VM_counting:
1729     NSIs++;
1730     return;
1731   case VM_instrument:
1732     instrumentOneSelectInst(SI);
1733     return;
1734   case VM_annotate:
1735     annotateOneSelectInst(SI);
1736     return;
1737   }
1738 
1739   llvm_unreachable("Unknown visiting mode");
1740 }
1741 
getMaxNumAnnotations(InstrProfValueKind ValueProfKind)1742 static uint32_t getMaxNumAnnotations(InstrProfValueKind ValueProfKind) {
1743   if (ValueProfKind == IPVK_MemOPSize)
1744     return MaxNumMemOPAnnotations;
1745   if (ValueProfKind == llvm::IPVK_VTableTarget)
1746     return MaxNumVTableAnnotations;
1747   return MaxNumAnnotations;
1748 }
1749 
1750 // Traverse all valuesites and annotate the instructions for all value kind.
annotateValueSites()1751 void PGOUseFunc::annotateValueSites() {
1752   if (isValueProfilingDisabled())
1753     return;
1754 
1755   // Create the PGOFuncName meta data.
1756   createPGOFuncNameMetadata(F, FuncInfo.FuncName);
1757 
1758   for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1759     annotateValueSites(Kind);
1760 }
1761 
1762 // Annotate the instructions for a specific value kind.
annotateValueSites(uint32_t Kind)1763 void PGOUseFunc::annotateValueSites(uint32_t Kind) {
1764   assert(Kind <= IPVK_Last);
1765   unsigned ValueSiteIndex = 0;
1766 
1767   unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind);
1768 
1769   // Since there isn't a reliable or fast way for profile reader to tell if a
1770   // profile is generated with `-enable-vtable-value-profiling` on, we run the
1771   // value profile collector over the function IR to find the instrumented sites
1772   // iff function profile records shows the number of instrumented vtable sites
1773   // is not zero. Function cfg already takes the number of instrumented
1774   // indirect call sites into account so it doesn't hash the number of
1775   // instrumented vtables; as a side effect it makes it easier to enable
1776   // profiling and profile use in two steps if needed.
1777   // TODO: Remove this if/when -enable-vtable-value-profiling is on by default.
1778   if (NumValueSites > 0 && Kind == IPVK_VTableTarget &&
1779       NumValueSites != FuncInfo.ValueSites[IPVK_VTableTarget].size() &&
1780       MaxNumVTableAnnotations != 0)
1781     FuncInfo.ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget);
1782   auto &ValueSites = FuncInfo.ValueSites[Kind];
1783   if (NumValueSites != ValueSites.size()) {
1784     auto &Ctx = M->getContext();
1785     Ctx.diagnose(DiagnosticInfoPGOProfile(
1786         M->getName().data(),
1787         Twine("Inconsistent number of value sites for ") +
1788             Twine(ValueProfKindDescr[Kind]) + Twine(" profiling in \"") +
1789             F.getName().str() +
1790             Twine("\", possibly due to the use of a stale profile."),
1791         DS_Warning));
1792     return;
1793   }
1794 
1795   for (VPCandidateInfo &I : ValueSites) {
1796     LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind
1797                       << "): Index = " << ValueSiteIndex << " out of "
1798                       << NumValueSites << "\n");
1799     annotateValueSite(
1800         *M, *I.AnnotatedInst, ProfileRecord,
1801         static_cast<InstrProfValueKind>(Kind), ValueSiteIndex,
1802         getMaxNumAnnotations(static_cast<InstrProfValueKind>(Kind)));
1803     ValueSiteIndex++;
1804   }
1805 }
1806 
1807 // Collect the set of members for each Comdat in module M and store
1808 // in ComdatMembers.
collectComdatMembers(Module & M,std::unordered_multimap<Comdat *,GlobalValue * > & ComdatMembers)1809 static void collectComdatMembers(
1810     Module &M,
1811     std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
1812   if (!DoComdatRenaming)
1813     return;
1814   for (Function &F : M)
1815     if (Comdat *C = F.getComdat())
1816       ComdatMembers.insert(std::make_pair(C, &F));
1817   for (GlobalVariable &GV : M.globals())
1818     if (Comdat *C = GV.getComdat())
1819       ComdatMembers.insert(std::make_pair(C, &GV));
1820   for (GlobalAlias &GA : M.aliases())
1821     if (Comdat *C = GA.getComdat())
1822       ComdatMembers.insert(std::make_pair(C, &GA));
1823 }
1824 
1825 // Return true if we should not find instrumentation data for this function
skipPGOUse(const Function & F)1826 static bool skipPGOUse(const Function &F) {
1827   if (F.isDeclaration())
1828     return true;
1829   // If there are too many critical edges, PGO might cause
1830   // compiler time problem. Skip PGO if the number of
1831   // critical edges execeed the threshold.
1832   unsigned NumCriticalEdges = 0;
1833   for (auto &BB : F) {
1834     const Instruction *TI = BB.getTerminator();
1835     for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
1836       if (isCriticalEdge(TI, I))
1837         NumCriticalEdges++;
1838     }
1839   }
1840   if (NumCriticalEdges > PGOFunctionCriticalEdgeThreshold) {
1841     LLVM_DEBUG(dbgs() << "In func " << F.getName()
1842                       << ", NumCriticalEdges=" << NumCriticalEdges
1843                       << " exceed the threshold. Skip PGO.\n");
1844     return true;
1845   }
1846   return false;
1847 }
1848 
1849 // Return true if we should not instrument this function
skipPGOGen(const Function & F)1850 static bool skipPGOGen(const Function &F) {
1851   if (skipPGOUse(F))
1852     return true;
1853   if (F.hasFnAttribute(llvm::Attribute::Naked))
1854     return true;
1855   if (F.hasFnAttribute(llvm::Attribute::NoProfile))
1856     return true;
1857   if (F.hasFnAttribute(llvm::Attribute::SkipProfile))
1858     return true;
1859   if (F.getInstructionCount() < PGOFunctionSizeThreshold)
1860     return true;
1861   return false;
1862 }
1863 
InstrumentAllFunctions(Module & M,function_ref<TargetLibraryInfo & (Function &)> LookupTLI,function_ref<BranchProbabilityInfo * (Function &)> LookupBPI,function_ref<BlockFrequencyInfo * (Function &)> LookupBFI,bool IsCS)1864 static bool InstrumentAllFunctions(
1865     Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
1866     function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
1867     function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
1868   // For the context-sensitve instrumentation, we should have a separated pass
1869   // (before LTO/ThinLTO linking) to create these variables.
1870   if (!IsCS && !PGOCtxProfLoweringPass::isContextualIRPGOEnabled())
1871     createIRLevelProfileFlagVar(M, /*IsCS=*/false);
1872 
1873   Triple TT(M.getTargetTriple());
1874   LLVMContext &Ctx = M.getContext();
1875   if (!TT.isOSBinFormatELF() && EnableVTableValueProfiling)
1876     Ctx.diagnose(DiagnosticInfoPGOProfile(
1877         M.getName().data(),
1878         Twine("VTable value profiling is presently not "
1879               "supported for non-ELF object formats"),
1880         DS_Warning));
1881   std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
1882   collectComdatMembers(M, ComdatMembers);
1883 
1884   for (auto &F : M) {
1885     if (skipPGOGen(F))
1886       continue;
1887     auto &TLI = LookupTLI(F);
1888     auto *BPI = LookupBPI(F);
1889     auto *BFI = LookupBFI(F);
1890     instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS);
1891   }
1892   return true;
1893 }
1894 
1895 PreservedAnalyses
run(Module & M,ModuleAnalysisManager & MAM)1896 PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &MAM) {
1897   createProfileFileNameVar(M, CSInstrName);
1898   // The variable in a comdat may be discarded by LTO. Ensure the declaration
1899   // will be retained.
1900   appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true));
1901   if (ProfileSampling)
1902     createProfileSamplingVar(M);
1903   PreservedAnalyses PA;
1904   PA.preserve<FunctionAnalysisManagerModuleProxy>();
1905   PA.preserveSet<AllAnalysesOn<Function>>();
1906   return PA;
1907 }
1908 
run(Module & M,ModuleAnalysisManager & MAM)1909 PreservedAnalyses PGOInstrumentationGen::run(Module &M,
1910                                              ModuleAnalysisManager &MAM) {
1911   auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1912   auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
1913     return FAM.getResult<TargetLibraryAnalysis>(F);
1914   };
1915   auto LookupBPI = [&FAM](Function &F) {
1916     return &FAM.getResult<BranchProbabilityAnalysis>(F);
1917   };
1918   auto LookupBFI = [&FAM](Function &F) {
1919     return &FAM.getResult<BlockFrequencyAnalysis>(F);
1920   };
1921 
1922   if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS))
1923     return PreservedAnalyses::all();
1924 
1925   return PreservedAnalyses::none();
1926 }
1927 
1928 // Using the ratio b/w sums of profile count values and BFI count values to
1929 // adjust the func entry count.
fixFuncEntryCount(PGOUseFunc & Func,LoopInfo & LI,BranchProbabilityInfo & NBPI)1930 static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI,
1931                               BranchProbabilityInfo &NBPI) {
1932   Function &F = Func.getFunc();
1933   BlockFrequencyInfo NBFI(F, NBPI, LI);
1934 #ifndef NDEBUG
1935   auto BFIEntryCount = F.getEntryCount();
1936   assert(BFIEntryCount && (BFIEntryCount->getCount() > 0) &&
1937          "Invalid BFI Entrycount");
1938 #endif
1939   auto SumCount = APFloat::getZero(APFloat::IEEEdouble());
1940   auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble());
1941   for (auto &BBI : F) {
1942     uint64_t CountValue = 0;
1943     uint64_t BFICountValue = 0;
1944     if (!Func.findBBInfo(&BBI))
1945       continue;
1946     auto BFICount = NBFI.getBlockProfileCount(&BBI);
1947     CountValue = *Func.getBBInfo(&BBI).Count;
1948     BFICountValue = *BFICount;
1949     SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven);
1950     SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven);
1951   }
1952   if (SumCount.isZero())
1953     return;
1954 
1955   assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan &&
1956          "Incorrect sum of BFI counts");
1957   if (SumBFICount.compare(SumCount) == APFloat::cmpEqual)
1958     return;
1959   double Scale = (SumCount / SumBFICount).convertToDouble();
1960   if (Scale < 1.001 && Scale > 0.999)
1961     return;
1962 
1963   uint64_t FuncEntryCount = *Func.getBBInfo(&*F.begin()).Count;
1964   uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale;
1965   if (NewEntryCount == 0)
1966     NewEntryCount = 1;
1967   if (NewEntryCount != FuncEntryCount) {
1968     F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real));
1969     LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName()
1970                       << ", entry_count " << FuncEntryCount << " --> "
1971                       << NewEntryCount << "\n");
1972   }
1973 }
1974 
1975 // Compare the profile count values with BFI count values, and print out
1976 // the non-matching ones.
verifyFuncBFI(PGOUseFunc & Func,LoopInfo & LI,BranchProbabilityInfo & NBPI,uint64_t HotCountThreshold,uint64_t ColdCountThreshold)1977 static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI,
1978                           BranchProbabilityInfo &NBPI,
1979                           uint64_t HotCountThreshold,
1980                           uint64_t ColdCountThreshold) {
1981   Function &F = Func.getFunc();
1982   BlockFrequencyInfo NBFI(F, NBPI, LI);
1983   //  bool PrintFunc = false;
1984   bool HotBBOnly = PGOVerifyHotBFI;
1985   StringRef Msg;
1986   OptimizationRemarkEmitter ORE(&F);
1987 
1988   unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0;
1989   for (auto &BBI : F) {
1990     uint64_t CountValue = 0;
1991     uint64_t BFICountValue = 0;
1992 
1993     CountValue = Func.getBBInfo(&BBI).Count.value_or(CountValue);
1994 
1995     BBNum++;
1996     if (CountValue)
1997       NonZeroBBNum++;
1998     auto BFICount = NBFI.getBlockProfileCount(&BBI);
1999     if (BFICount)
2000       BFICountValue = *BFICount;
2001 
2002     if (HotBBOnly) {
2003       bool rawIsHot = CountValue >= HotCountThreshold;
2004       bool BFIIsHot = BFICountValue >= HotCountThreshold;
2005       bool rawIsCold = CountValue <= ColdCountThreshold;
2006       bool ShowCount = false;
2007       if (rawIsHot && !BFIIsHot) {
2008         Msg = "raw-Hot to BFI-nonHot";
2009         ShowCount = true;
2010       } else if (rawIsCold && BFIIsHot) {
2011         Msg = "raw-Cold to BFI-Hot";
2012         ShowCount = true;
2013       }
2014       if (!ShowCount)
2015         continue;
2016     } else {
2017       if ((CountValue < PGOVerifyBFICutoff) &&
2018           (BFICountValue < PGOVerifyBFICutoff))
2019         continue;
2020       uint64_t Diff = (BFICountValue >= CountValue)
2021                           ? BFICountValue - CountValue
2022                           : CountValue - BFICountValue;
2023       if (Diff <= CountValue / 100 * PGOVerifyBFIRatio)
2024         continue;
2025     }
2026     BBMisMatchNum++;
2027 
2028     ORE.emit([&]() {
2029       OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "bfi-verify",
2030                                         F.getSubprogram(), &BBI);
2031       Remark << "BB " << ore::NV("Block", BBI.getName())
2032              << " Count=" << ore::NV("Count", CountValue)
2033              << " BFI_Count=" << ore::NV("Count", BFICountValue);
2034       if (!Msg.empty())
2035         Remark << " (" << Msg << ")";
2036       return Remark;
2037     });
2038   }
2039   if (BBMisMatchNum)
2040     ORE.emit([&]() {
2041       return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify",
2042                                         F.getSubprogram(), &F.getEntryBlock())
2043              << "In Func " << ore::NV("Function", F.getName())
2044              << ": Num_of_BB=" << ore::NV("Count", BBNum)
2045              << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum)
2046              << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum);
2047     });
2048 }
2049 
annotateAllFunctions(Module & M,StringRef ProfileFileName,StringRef ProfileRemappingFileName,vfs::FileSystem & FS,function_ref<TargetLibraryInfo & (Function &)> LookupTLI,function_ref<BranchProbabilityInfo * (Function &)> LookupBPI,function_ref<BlockFrequencyInfo * (Function &)> LookupBFI,ProfileSummaryInfo * PSI,bool IsCS)2050 static bool annotateAllFunctions(
2051     Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
2052     vfs::FileSystem &FS,
2053     function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
2054     function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
2055     function_ref<BlockFrequencyInfo *(Function &)> LookupBFI,
2056     ProfileSummaryInfo *PSI, bool IsCS) {
2057   LLVM_DEBUG(dbgs() << "Read in profile counters: ");
2058   auto &Ctx = M.getContext();
2059   // Read the counter array from file.
2060   auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName, FS,
2061                                                     ProfileRemappingFileName);
2062   if (Error E = ReaderOrErr.takeError()) {
2063     handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
2064       Ctx.diagnose(
2065           DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message()));
2066     });
2067     return false;
2068   }
2069 
2070   std::unique_ptr<IndexedInstrProfReader> PGOReader =
2071       std::move(ReaderOrErr.get());
2072   if (!PGOReader) {
2073     Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(),
2074                                           StringRef("Cannot get PGOReader")));
2075     return false;
2076   }
2077   if (!PGOReader->hasCSIRLevelProfile() && IsCS)
2078     return false;
2079 
2080   // TODO: might need to change the warning once the clang option is finalized.
2081   if (!PGOReader->isIRLevelProfile()) {
2082     Ctx.diagnose(DiagnosticInfoPGOProfile(
2083         ProfileFileName.data(), "Not an IR level instrumentation profile"));
2084     return false;
2085   }
2086   if (PGOReader->functionEntryOnly()) {
2087     Ctx.diagnose(DiagnosticInfoPGOProfile(
2088         ProfileFileName.data(),
2089         "Function entry profiles are not yet supported for optimization"));
2090     return false;
2091   }
2092 
2093   if (EnableVTableProfileUse) {
2094     for (GlobalVariable &G : M.globals()) {
2095       if (!G.hasName() || !G.hasMetadata(LLVMContext::MD_type))
2096         continue;
2097 
2098       // Create the PGOFuncName meta data.
2099       createPGONameMetadata(G, getPGOName(G, false /* InLTO*/));
2100     }
2101   }
2102 
2103   // Add the profile summary (read from the header of the indexed summary) here
2104   // so that we can use it below when reading counters (which checks if the
2105   // function should be marked with a cold or inlinehint attribute).
2106   M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
2107                       IsCS ? ProfileSummary::PSK_CSInstr
2108                            : ProfileSummary::PSK_Instr);
2109   PSI->refresh();
2110 
2111   std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
2112   collectComdatMembers(M, ComdatMembers);
2113   std::vector<Function *> HotFunctions;
2114   std::vector<Function *> ColdFunctions;
2115 
2116   // If the profile marked as always instrument the entry BB, do the
2117   // same. Note this can be overwritten by the internal option in CFGMST.h
2118   bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
2119   if (PGOInstrumentEntry.getNumOccurrences() > 0)
2120     InstrumentFuncEntry = PGOInstrumentEntry;
2121   InstrumentFuncEntry |= PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
2122 
2123   bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
2124   for (auto &F : M) {
2125     if (skipPGOUse(F))
2126       continue;
2127     auto &TLI = LookupTLI(F);
2128     auto *BPI = LookupBPI(F);
2129     auto *BFI = LookupBFI(F);
2130     if (!HasSingleByteCoverage) {
2131       // Split indirectbr critical edges here before computing the MST rather
2132       // than later in getInstrBB() to avoid invalidating it.
2133       SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI,
2134                                    BFI);
2135     }
2136     PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,
2137                     InstrumentFuncEntry, HasSingleByteCoverage);
2138     if (HasSingleByteCoverage) {
2139       Func.populateCoverage(PGOReader.get());
2140       continue;
2141     }
2142     // When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo,
2143     // it means the profile for the function is unrepresentative and this
2144     // function is actually hot / warm. We will reset the function hot / cold
2145     // attribute and drop all the profile counters.
2146     InstrProfRecord::CountPseudoKind PseudoKind = InstrProfRecord::NotPseudo;
2147     bool AllZeros = false;
2148     if (!Func.readCounters(PGOReader.get(), AllZeros, PseudoKind))
2149       continue;
2150     if (AllZeros) {
2151       F.setEntryCount(ProfileCount(0, Function::PCT_Real));
2152       if (Func.getProgramMaxCount() != 0)
2153         ColdFunctions.push_back(&F);
2154       continue;
2155     }
2156     if (PseudoKind != InstrProfRecord::NotPseudo) {
2157       // Clear function attribute cold.
2158       if (F.hasFnAttribute(Attribute::Cold))
2159         F.removeFnAttr(Attribute::Cold);
2160       // Set function attribute as hot.
2161       if (PseudoKind == InstrProfRecord::PseudoHot)
2162         F.addFnAttr(Attribute::Hot);
2163       continue;
2164     }
2165     Func.populateCounters();
2166     Func.setBranchWeights();
2167     Func.annotateValueSites();
2168     Func.annotateIrrLoopHeaderWeights();
2169     PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();
2170     if (FreqAttr == PGOUseFunc::FFA_Cold)
2171       ColdFunctions.push_back(&F);
2172     else if (FreqAttr == PGOUseFunc::FFA_Hot)
2173       HotFunctions.push_back(&F);
2174     if (PGOViewCounts != PGOVCT_None &&
2175         (ViewBlockFreqFuncName.empty() ||
2176          F.getName() == ViewBlockFreqFuncName)) {
2177       LoopInfo LI{DominatorTree(F)};
2178       std::unique_ptr<BranchProbabilityInfo> NewBPI =
2179           std::make_unique<BranchProbabilityInfo>(F, LI);
2180       std::unique_ptr<BlockFrequencyInfo> NewBFI =
2181           std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI);
2182       if (PGOViewCounts == PGOVCT_Graph)
2183         NewBFI->view();
2184       else if (PGOViewCounts == PGOVCT_Text) {
2185         dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n";
2186         NewBFI->print(dbgs());
2187       }
2188     }
2189     if (PGOViewRawCounts != PGOVCT_None &&
2190         (ViewBlockFreqFuncName.empty() ||
2191          F.getName() == ViewBlockFreqFuncName)) {
2192       if (PGOViewRawCounts == PGOVCT_Graph)
2193         if (ViewBlockFreqFuncName.empty())
2194           WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2195         else
2196           ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2197       else if (PGOViewRawCounts == PGOVCT_Text) {
2198         dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n";
2199         Func.dumpInfo();
2200       }
2201     }
2202 
2203     if (PGOVerifyBFI || PGOVerifyHotBFI || PGOFixEntryCount) {
2204       LoopInfo LI{DominatorTree(F)};
2205       BranchProbabilityInfo NBPI(F, LI);
2206 
2207       // Fix func entry count.
2208       if (PGOFixEntryCount)
2209         fixFuncEntryCount(Func, LI, NBPI);
2210 
2211       // Verify BlockFrequency information.
2212       uint64_t HotCountThreshold = 0, ColdCountThreshold = 0;
2213       if (PGOVerifyHotBFI) {
2214         HotCountThreshold = PSI->getOrCompHotCountThreshold();
2215         ColdCountThreshold = PSI->getOrCompColdCountThreshold();
2216       }
2217       verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold);
2218     }
2219   }
2220 
2221   // Set function hotness attribute from the profile.
2222   // We have to apply these attributes at the end because their presence
2223   // can affect the BranchProbabilityInfo of any callers, resulting in an
2224   // inconsistent MST between prof-gen and prof-use.
2225   for (auto &F : HotFunctions) {
2226     F->addFnAttr(Attribute::InlineHint);
2227     LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName()
2228                       << "\n");
2229   }
2230   for (auto &F : ColdFunctions) {
2231     // Only set when there is no Attribute::Hot set by the user. For Hot
2232     // attribute, user's annotation has the precedence over the profile.
2233     if (F->hasFnAttribute(Attribute::Hot)) {
2234       auto &Ctx = M.getContext();
2235       std::string Msg = std::string("Function ") + F->getName().str() +
2236                         std::string(" is annotated as a hot function but"
2237                                     " the profile is cold");
2238       Ctx.diagnose(
2239           DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
2240       continue;
2241     }
2242     F->addFnAttr(Attribute::Cold);
2243     LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName()
2244                       << "\n");
2245   }
2246   return true;
2247 }
2248 
PGOInstrumentationUse(std::string Filename,std::string RemappingFilename,bool IsCS,IntrusiveRefCntPtr<vfs::FileSystem> VFS)2249 PGOInstrumentationUse::PGOInstrumentationUse(
2250     std::string Filename, std::string RemappingFilename, bool IsCS,
2251     IntrusiveRefCntPtr<vfs::FileSystem> VFS)
2252     : ProfileFileName(std::move(Filename)),
2253       ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS),
2254       FS(std::move(VFS)) {
2255   if (!PGOTestProfileFile.empty())
2256     ProfileFileName = PGOTestProfileFile;
2257   if (!PGOTestProfileRemappingFile.empty())
2258     ProfileRemappingFileName = PGOTestProfileRemappingFile;
2259   if (!FS)
2260     FS = vfs::getRealFileSystem();
2261 }
2262 
run(Module & M,ModuleAnalysisManager & MAM)2263 PreservedAnalyses PGOInstrumentationUse::run(Module &M,
2264                                              ModuleAnalysisManager &MAM) {
2265 
2266   auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2267   auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
2268     return FAM.getResult<TargetLibraryAnalysis>(F);
2269   };
2270   auto LookupBPI = [&FAM](Function &F) {
2271     return &FAM.getResult<BranchProbabilityAnalysis>(F);
2272   };
2273   auto LookupBFI = [&FAM](Function &F) {
2274     return &FAM.getResult<BlockFrequencyAnalysis>(F);
2275   };
2276 
2277   auto *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M);
2278   if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, *FS,
2279                             LookupTLI, LookupBPI, LookupBFI, PSI, IsCS))
2280     return PreservedAnalyses::all();
2281 
2282   return PreservedAnalyses::none();
2283 }
2284 
getSimpleNodeName(const BasicBlock * Node)2285 static std::string getSimpleNodeName(const BasicBlock *Node) {
2286   if (!Node->getName().empty())
2287     return Node->getName().str();
2288 
2289   std::string SimpleNodeName;
2290   raw_string_ostream OS(SimpleNodeName);
2291   Node->printAsOperand(OS, false);
2292   return SimpleNodeName;
2293 }
2294 
setProfMetadata(Module * M,Instruction * TI,ArrayRef<uint64_t> EdgeCounts,uint64_t MaxCount)2295 void llvm::setProfMetadata(Module *M, Instruction *TI,
2296                            ArrayRef<uint64_t> EdgeCounts, uint64_t MaxCount) {
2297   assert(MaxCount > 0 && "Bad max count");
2298   uint64_t Scale = calculateCountScale(MaxCount);
2299   SmallVector<unsigned, 4> Weights;
2300   for (const auto &ECI : EdgeCounts)
2301     Weights.push_back(scaleBranchCount(ECI, Scale));
2302 
2303   LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W
2304                                            : Weights) {
2305     dbgs() << W << " ";
2306   } dbgs() << "\n";);
2307 
2308   misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false);
2309 
2310   setBranchWeights(*TI, Weights, /*IsExpected=*/false);
2311   if (EmitBranchProbability) {
2312     std::string BrCondStr = getBranchCondString(TI);
2313     if (BrCondStr.empty())
2314       return;
2315 
2316     uint64_t WSum =
2317         std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0,
2318                         [](uint64_t w1, uint64_t w2) { return w1 + w2; });
2319     uint64_t TotalCount =
2320         std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0,
2321                         [](uint64_t c1, uint64_t c2) { return c1 + c2; });
2322     Scale = calculateCountScale(WSum);
2323     BranchProbability BP(scaleBranchCount(Weights[0], Scale),
2324                          scaleBranchCount(WSum, Scale));
2325     std::string BranchProbStr;
2326     raw_string_ostream OS(BranchProbStr);
2327     OS << BP;
2328     OS << " (total count : " << TotalCount << ")";
2329     OS.flush();
2330     Function *F = TI->getParent()->getParent();
2331     OptimizationRemarkEmitter ORE(F);
2332     ORE.emit([&]() {
2333       return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI)
2334              << BrCondStr << " is true with probability : " << BranchProbStr;
2335     });
2336   }
2337 }
2338 
2339 namespace llvm {
2340 
setIrrLoopHeaderMetadata(Module * M,Instruction * TI,uint64_t Count)2341 void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) {
2342   MDBuilder MDB(M->getContext());
2343   TI->setMetadata(llvm::LLVMContext::MD_irr_loop,
2344                   MDB.createIrrLoopHeaderWeight(Count));
2345 }
2346 
2347 template <> struct GraphTraits<PGOUseFunc *> {
2348   using NodeRef = const BasicBlock *;
2349   using ChildIteratorType = const_succ_iterator;
2350   using nodes_iterator = pointer_iterator<Function::const_iterator>;
2351 
getEntryNodellvm::GraphTraits2352   static NodeRef getEntryNode(const PGOUseFunc *G) {
2353     return &G->getFunc().front();
2354   }
2355 
child_beginllvm::GraphTraits2356   static ChildIteratorType child_begin(const NodeRef N) {
2357     return succ_begin(N);
2358   }
2359 
child_endllvm::GraphTraits2360   static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); }
2361 
nodes_beginllvm::GraphTraits2362   static nodes_iterator nodes_begin(const PGOUseFunc *G) {
2363     return nodes_iterator(G->getFunc().begin());
2364   }
2365 
nodes_endllvm::GraphTraits2366   static nodes_iterator nodes_end(const PGOUseFunc *G) {
2367     return nodes_iterator(G->getFunc().end());
2368   }
2369 };
2370 
2371 template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits {
DOTGraphTraitsllvm::DOTGraphTraits2372   explicit DOTGraphTraits(bool isSimple = false)
2373       : DefaultDOTGraphTraits(isSimple) {}
2374 
getGraphNamellvm::DOTGraphTraits2375   static std::string getGraphName(const PGOUseFunc *G) {
2376     return std::string(G->getFunc().getName());
2377   }
2378 
getNodeLabelllvm::DOTGraphTraits2379   std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) {
2380     std::string Result;
2381     raw_string_ostream OS(Result);
2382 
2383     OS << getSimpleNodeName(Node) << ":\\l";
2384     PGOUseBBInfo *BI = Graph->findBBInfo(Node);
2385     OS << "Count : ";
2386     if (BI && BI->Count)
2387       OS << *BI->Count << "\\l";
2388     else
2389       OS << "Unknown\\l";
2390 
2391     if (!PGOInstrSelect)
2392       return Result;
2393 
2394     for (const Instruction &I : *Node) {
2395       if (!isa<SelectInst>(&I))
2396         continue;
2397       // Display scaled counts for SELECT instruction:
2398       OS << "SELECT : { T = ";
2399       uint64_t TC, FC;
2400       bool HasProf = extractBranchWeights(I, TC, FC);
2401       if (!HasProf)
2402         OS << "Unknown, F = Unknown }\\l";
2403       else
2404         OS << TC << ", F = " << FC << " }\\l";
2405     }
2406     return Result;
2407   }
2408 };
2409 
2410 } // end namespace llvm
2411