xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp (revision 85868e8a1daeaae7a0e48effb2ea2310ae3b02c6)
1 //===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements PGO instrumentation using a minimum spanning tree based
10 // on the following paper:
11 //   [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points
12 //   for program frequency counts. BIT Numerical Mathematics 1973, Volume 13,
13 //   Issue 3, pp 313-322
14 // The idea of the algorithm based on the fact that for each node (except for
15 // the entry and exit), the sum of incoming edge counts equals the sum of
16 // outgoing edge counts. The count of edge on spanning tree can be derived from
17 // those edges not on the spanning tree. Knuth proves this method instruments
18 // the minimum number of edges.
19 //
20 // The minimal spanning tree here is actually a maximum weight tree -- on-tree
21 // edges have higher frequencies (more likely to execute). The idea is to
22 // instrument those less frequently executed edges to reduce the runtime
23 // overhead of instrumented binaries.
24 //
25 // This file contains two passes:
26 // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge
27 // count profile, and generates the instrumentation for indirect call
28 // profiling.
29 // (2) Pass PGOInstrumentationUse which reads the edge count profile and
30 // annotates the branch weights. It also reads the indirect call value
31 // profiling records and annotate the indirect call instructions.
32 //
33 // To get the precise counter information, These two passes need to invoke at
34 // the same compilation point (so they see the same IR). For pass
35 // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For
36 // pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and
37 // the profile is opened in module level and passed to each PGOUseFunc instance.
38 // The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put
39 // in class FuncPGOInstrumentation.
40 //
41 // Class PGOEdge represents a CFG edge and some auxiliary information. Class
42 // BBInfo contains auxiliary information for each BB. These two classes are used
43 // in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived
44 // class of PGOEdge and BBInfo, respectively. They contains extra data structure
45 // used in populating profile counters.
46 // The MST implementation is in Class CFGMST (CFGMST.h).
47 //
48 //===----------------------------------------------------------------------===//
49 
50 #include "CFGMST.h"
51 #include "ValueProfileCollector.h"
52 #include "llvm/ADT/APInt.h"
53 #include "llvm/ADT/ArrayRef.h"
54 #include "llvm/ADT/STLExtras.h"
55 #include "llvm/ADT/SmallVector.h"
56 #include "llvm/ADT/Statistic.h"
57 #include "llvm/ADT/StringRef.h"
58 #include "llvm/ADT/Triple.h"
59 #include "llvm/ADT/Twine.h"
60 #include "llvm/ADT/iterator.h"
61 #include "llvm/ADT/iterator_range.h"
62 #include "llvm/Analysis/BlockFrequencyInfo.h"
63 #include "llvm/Analysis/BranchProbabilityInfo.h"
64 #include "llvm/Analysis/CFG.h"
65 #include "llvm/Analysis/LoopInfo.h"
66 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
67 #include "llvm/Analysis/ProfileSummaryInfo.h"
68 #include "llvm/IR/Attributes.h"
69 #include "llvm/IR/BasicBlock.h"
70 #include "llvm/IR/CFG.h"
71 #include "llvm/IR/CallSite.h"
72 #include "llvm/IR/Comdat.h"
73 #include "llvm/IR/Constant.h"
74 #include "llvm/IR/Constants.h"
75 #include "llvm/IR/DiagnosticInfo.h"
76 #include "llvm/IR/Dominators.h"
77 #include "llvm/IR/Function.h"
78 #include "llvm/IR/GlobalAlias.h"
79 #include "llvm/IR/GlobalValue.h"
80 #include "llvm/IR/GlobalVariable.h"
81 #include "llvm/IR/IRBuilder.h"
82 #include "llvm/IR/InstVisitor.h"
83 #include "llvm/IR/InstrTypes.h"
84 #include "llvm/IR/Instruction.h"
85 #include "llvm/IR/Instructions.h"
86 #include "llvm/IR/IntrinsicInst.h"
87 #include "llvm/IR/Intrinsics.h"
88 #include "llvm/IR/LLVMContext.h"
89 #include "llvm/IR/MDBuilder.h"
90 #include "llvm/IR/Module.h"
91 #include "llvm/IR/PassManager.h"
92 #include "llvm/IR/ProfileSummary.h"
93 #include "llvm/IR/Type.h"
94 #include "llvm/IR/Value.h"
95 #include "llvm/Pass.h"
96 #include "llvm/ProfileData/InstrProf.h"
97 #include "llvm/ProfileData/InstrProfReader.h"
98 #include "llvm/Support/BranchProbability.h"
99 #include "llvm/Support/CRC.h"
100 #include "llvm/Support/Casting.h"
101 #include "llvm/Support/CommandLine.h"
102 #include "llvm/Support/DOTGraphTraits.h"
103 #include "llvm/Support/Debug.h"
104 #include "llvm/Support/Error.h"
105 #include "llvm/Support/ErrorHandling.h"
106 #include "llvm/Support/GraphWriter.h"
107 #include "llvm/Support/raw_ostream.h"
108 #include "llvm/Transforms/Instrumentation.h"
109 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
110 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
111 #include "llvm/Transforms/Utils/MisExpect.h"
112 #include <algorithm>
113 #include <cassert>
114 #include <cstdint>
115 #include <memory>
116 #include <numeric>
117 #include <string>
118 #include <unordered_map>
119 #include <utility>
120 #include <vector>
121 
122 using namespace llvm;
123 using ProfileCount = Function::ProfileCount;
124 using VPCandidateInfo = ValueProfileCollector::CandidateInfo;
125 
126 #define DEBUG_TYPE "pgo-instrumentation"
127 
128 STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
129 STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");
130 STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented.");
131 STATISTIC(NumOfPGOEdge, "Number of edges.");
132 STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
133 STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
134 STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
135 STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
136 STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
137 STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
138 STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
139 STATISTIC(NumOfCSPGOSelectInsts,
140           "Number of select instruction instrumented in CSPGO.");
141 STATISTIC(NumOfCSPGOMemIntrinsics,
142           "Number of mem intrinsics instrumented in CSPGO.");
143 STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO.");
144 STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO.");
145 STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO.");
146 STATISTIC(NumOfCSPGOFunc,
147           "Number of functions having valid profile counts in CSPGO.");
148 STATISTIC(NumOfCSPGOMismatch,
149           "Number of functions having mismatch profile in CSPGO.");
150 STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO.");
151 
152 // Command line option to specify the file to read profile from. This is
153 // mainly used for testing.
154 static cl::opt<std::string>
155     PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden,
156                        cl::value_desc("filename"),
157                        cl::desc("Specify the path of profile data file. This is"
158                                 "mainly for test purpose."));
159 static cl::opt<std::string> PGOTestProfileRemappingFile(
160     "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden,
161     cl::value_desc("filename"),
162     cl::desc("Specify the path of profile remapping file. This is mainly for "
163              "test purpose."));
164 
165 // Command line option to disable value profiling. The default is false:
166 // i.e. value profiling is enabled by default. This is for debug purpose.
167 static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false),
168                                            cl::Hidden,
169                                            cl::desc("Disable Value Profiling"));
170 
171 // Command line option to set the maximum number of VP annotations to write to
172 // the metadata for a single indirect call callsite.
173 static cl::opt<unsigned> MaxNumAnnotations(
174     "icp-max-annotations", cl::init(3), cl::Hidden, cl::ZeroOrMore,
175     cl::desc("Max number of annotations for a single indirect "
176              "call callsite"));
177 
178 // Command line option to set the maximum number of value annotations
179 // to write to the metadata for a single memop intrinsic.
180 static cl::opt<unsigned> MaxNumMemOPAnnotations(
181     "memop-max-annotations", cl::init(4), cl::Hidden, cl::ZeroOrMore,
182     cl::desc("Max number of preicise value annotations for a single memop"
183              "intrinsic"));
184 
185 // Command line option to control appending FunctionHash to the name of a COMDAT
186 // function. This is to avoid the hash mismatch caused by the preinliner.
187 static cl::opt<bool> DoComdatRenaming(
188     "do-comdat-renaming", cl::init(false), cl::Hidden,
189     cl::desc("Append function hash to the name of COMDAT function to avoid "
190              "function hash mismatch due to the preinliner"));
191 
192 // Command line option to enable/disable the warning about missing profile
193 // information.
194 static cl::opt<bool>
195     PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden,
196                    cl::desc("Use this option to turn on/off "
197                             "warnings about missing profile data for "
198                             "functions."));
199 
200 // Command line option to enable/disable the warning about a hash mismatch in
201 // the profile data.
202 static cl::opt<bool>
203     NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden,
204                       cl::desc("Use this option to turn off/on "
205                                "warnings about profile cfg mismatch."));
206 
207 // Command line option to enable/disable the warning about a hash mismatch in
208 // the profile data for Comdat functions, which often turns out to be false
209 // positive due to the pre-instrumentation inline.
210 static cl::opt<bool>
211     NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", cl::init(true),
212                             cl::Hidden,
213                             cl::desc("The option is used to turn on/off "
214                                      "warnings about hash mismatch for comdat "
215                                      "functions."));
216 
217 // Command line option to enable/disable select instruction instrumentation.
218 static cl::opt<bool>
219     PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden,
220                    cl::desc("Use this option to turn on/off SELECT "
221                             "instruction instrumentation. "));
222 
223 // Command line option to turn on CFG dot or text dump of raw profile counts
224 static cl::opt<PGOViewCountsType> PGOViewRawCounts(
225     "pgo-view-raw-counts", cl::Hidden,
226     cl::desc("A boolean option to show CFG dag or text "
227              "with raw profile counts from "
228              "profile data. See also option "
229              "-pgo-view-counts. To limit graph "
230              "display to only one function, use "
231              "filtering option -view-bfi-func-name."),
232     cl::values(clEnumValN(PGOVCT_None, "none", "do not show."),
233                clEnumValN(PGOVCT_Graph, "graph", "show a graph."),
234                clEnumValN(PGOVCT_Text, "text", "show in text.")));
235 
236 // Command line option to enable/disable memop intrinsic call.size profiling.
237 static cl::opt<bool>
238     PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden,
239                   cl::desc("Use this option to turn on/off "
240                            "memory intrinsic size profiling."));
241 
242 // Emit branch probability as optimization remarks.
243 static cl::opt<bool>
244     EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden,
245                           cl::desc("When this option is on, the annotated "
246                                    "branch probability will be emitted as "
247                                    "optimization remarks: -{Rpass|"
248                                    "pass-remarks}=pgo-instrumentation"));
249 
250 // Command line option to turn on CFG dot dump after profile annotation.
251 // Defined in Analysis/BlockFrequencyInfo.cpp:  -pgo-view-counts
252 extern cl::opt<PGOViewCountsType> PGOViewCounts;
253 
254 // Command line option to specify the name of the function for CFG dump
255 // Defined in Analysis/BlockFrequencyInfo.cpp:  -view-bfi-func-name=
256 extern cl::opt<std::string> ViewBlockFreqFuncName;
257 
258 // Return a string describing the branch condition that can be
259 // used in static branch probability heuristics:
260 static std::string getBranchCondString(Instruction *TI) {
261   BranchInst *BI = dyn_cast<BranchInst>(TI);
262   if (!BI || !BI->isConditional())
263     return std::string();
264 
265   Value *Cond = BI->getCondition();
266   ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
267   if (!CI)
268     return std::string();
269 
270   std::string result;
271   raw_string_ostream OS(result);
272   OS << CmpInst::getPredicateName(CI->getPredicate()) << "_";
273   CI->getOperand(0)->getType()->print(OS, true);
274 
275   Value *RHS = CI->getOperand(1);
276   ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
277   if (CV) {
278     if (CV->isZero())
279       OS << "_Zero";
280     else if (CV->isOne())
281       OS << "_One";
282     else if (CV->isMinusOne())
283       OS << "_MinusOne";
284     else
285       OS << "_Const";
286   }
287   OS.flush();
288   return result;
289 }
290 
291 static const char *ValueProfKindDescr[] = {
292 #define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
293 #include "llvm/ProfileData/InstrProfData.inc"
294 };
295 
296 namespace {
297 
298 /// The select instruction visitor plays three roles specified
299 /// by the mode. In \c VM_counting mode, it simply counts the number of
300 /// select instructions. In \c VM_instrument mode, it inserts code to count
301 /// the number times TrueValue of select is taken. In \c VM_annotate mode,
302 /// it reads the profile data and annotate the select instruction with metadata.
303 enum VisitMode { VM_counting, VM_instrument, VM_annotate };
304 class PGOUseFunc;
305 
306 /// Instruction Visitor class to visit select instructions.
307 struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
308   Function &F;
309   unsigned NSIs = 0;             // Number of select instructions instrumented.
310   VisitMode Mode = VM_counting;  // Visiting mode.
311   unsigned *CurCtrIdx = nullptr; // Pointer to current counter index.
312   unsigned TotalNumCtrs = 0;     // Total number of counters
313   GlobalVariable *FuncNameVar = nullptr;
314   uint64_t FuncHash = 0;
315   PGOUseFunc *UseFunc = nullptr;
316 
317   SelectInstVisitor(Function &Func) : F(Func) {}
318 
319   void countSelects(Function &Func) {
320     NSIs = 0;
321     Mode = VM_counting;
322     visit(Func);
323   }
324 
325   // Visit the IR stream and instrument all select instructions. \p
326   // Ind is a pointer to the counter index variable; \p TotalNC
327   // is the total number of counters; \p FNV is the pointer to the
328   // PGO function name var; \p FHash is the function hash.
329   void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC,
330                          GlobalVariable *FNV, uint64_t FHash) {
331     Mode = VM_instrument;
332     CurCtrIdx = Ind;
333     TotalNumCtrs = TotalNC;
334     FuncHash = FHash;
335     FuncNameVar = FNV;
336     visit(Func);
337   }
338 
339   // Visit the IR stream and annotate all select instructions.
340   void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) {
341     Mode = VM_annotate;
342     UseFunc = UF;
343     CurCtrIdx = Ind;
344     visit(Func);
345   }
346 
347   void instrumentOneSelectInst(SelectInst &SI);
348   void annotateOneSelectInst(SelectInst &SI);
349 
350   // Visit \p SI instruction and perform tasks according to visit mode.
351   void visitSelectInst(SelectInst &SI);
352 
353   // Return the number of select instructions. This needs be called after
354   // countSelects().
355   unsigned getNumOfSelectInsts() const { return NSIs; }
356 };
357 
358 
359 class PGOInstrumentationGenLegacyPass : public ModulePass {
360 public:
361   static char ID;
362 
363   PGOInstrumentationGenLegacyPass(bool IsCS = false)
364       : ModulePass(ID), IsCS(IsCS) {
365     initializePGOInstrumentationGenLegacyPassPass(
366         *PassRegistry::getPassRegistry());
367   }
368 
369   StringRef getPassName() const override { return "PGOInstrumentationGenPass"; }
370 
371 private:
372   // Is this is context-sensitive instrumentation.
373   bool IsCS;
374   bool runOnModule(Module &M) override;
375 
376   void getAnalysisUsage(AnalysisUsage &AU) const override {
377     AU.addRequired<BlockFrequencyInfoWrapperPass>();
378   }
379 };
380 
381 class PGOInstrumentationUseLegacyPass : public ModulePass {
382 public:
383   static char ID;
384 
385   // Provide the profile filename as the parameter.
386   PGOInstrumentationUseLegacyPass(std::string Filename = "", bool IsCS = false)
387       : ModulePass(ID), ProfileFileName(std::move(Filename)), IsCS(IsCS) {
388     if (!PGOTestProfileFile.empty())
389       ProfileFileName = PGOTestProfileFile;
390     initializePGOInstrumentationUseLegacyPassPass(
391         *PassRegistry::getPassRegistry());
392   }
393 
394   StringRef getPassName() const override { return "PGOInstrumentationUsePass"; }
395 
396 private:
397   std::string ProfileFileName;
398   // Is this is context-sensitive instrumentation use.
399   bool IsCS;
400 
401   bool runOnModule(Module &M) override;
402 
403   void getAnalysisUsage(AnalysisUsage &AU) const override {
404     AU.addRequired<ProfileSummaryInfoWrapperPass>();
405     AU.addRequired<BlockFrequencyInfoWrapperPass>();
406   }
407 };
408 
409 class PGOInstrumentationGenCreateVarLegacyPass : public ModulePass {
410 public:
411   static char ID;
412   StringRef getPassName() const override {
413     return "PGOInstrumentationGenCreateVarPass";
414   }
415   PGOInstrumentationGenCreateVarLegacyPass(std::string CSInstrName = "")
416       : ModulePass(ID), InstrProfileOutput(CSInstrName) {
417     initializePGOInstrumentationGenCreateVarLegacyPassPass(
418         *PassRegistry::getPassRegistry());
419   }
420 
421 private:
422   bool runOnModule(Module &M) override {
423     createProfileFileNameVar(M, InstrProfileOutput);
424     createIRLevelProfileFlagVar(M, true);
425     return false;
426   }
427   std::string InstrProfileOutput;
428 };
429 
430 } // end anonymous namespace
431 
432 char PGOInstrumentationGenLegacyPass::ID = 0;
433 
434 INITIALIZE_PASS_BEGIN(PGOInstrumentationGenLegacyPass, "pgo-instr-gen",
435                       "PGO instrumentation.", false, false)
436 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
437 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
438 INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen",
439                     "PGO instrumentation.", false, false)
440 
441 ModulePass *llvm::createPGOInstrumentationGenLegacyPass(bool IsCS) {
442   return new PGOInstrumentationGenLegacyPass(IsCS);
443 }
444 
445 char PGOInstrumentationUseLegacyPass::ID = 0;
446 
447 INITIALIZE_PASS_BEGIN(PGOInstrumentationUseLegacyPass, "pgo-instr-use",
448                       "Read PGO instrumentation profile.", false, false)
449 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
450 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
451 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
452 INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use",
453                     "Read PGO instrumentation profile.", false, false)
454 
455 ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename,
456                                                         bool IsCS) {
457   return new PGOInstrumentationUseLegacyPass(Filename.str(), IsCS);
458 }
459 
460 char PGOInstrumentationGenCreateVarLegacyPass::ID = 0;
461 
462 INITIALIZE_PASS(PGOInstrumentationGenCreateVarLegacyPass,
463                 "pgo-instr-gen-create-var",
464                 "Create PGO instrumentation version variable for CSPGO.", false,
465                 false)
466 
467 ModulePass *
468 llvm::createPGOInstrumentationGenCreateVarLegacyPass(StringRef CSInstrName) {
469   return new PGOInstrumentationGenCreateVarLegacyPass(CSInstrName);
470 }
471 
472 namespace {
473 
474 /// An MST based instrumentation for PGO
475 ///
476 /// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO
477 /// in the function level.
478 struct PGOEdge {
479   // This class implements the CFG edges. Note the CFG can be a multi-graph.
480   // So there might be multiple edges with same SrcBB and DestBB.
481   const BasicBlock *SrcBB;
482   const BasicBlock *DestBB;
483   uint64_t Weight;
484   bool InMST = false;
485   bool Removed = false;
486   bool IsCritical = false;
487 
488   PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1)
489       : SrcBB(Src), DestBB(Dest), Weight(W) {}
490 
491   // Return the information string of an edge.
492   const std::string infoString() const {
493     return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
494             (IsCritical ? "c" : " ") + "  W=" + Twine(Weight)).str();
495   }
496 };
497 
498 // This class stores the auxiliary information for each BB.
499 struct BBInfo {
500   BBInfo *Group;
501   uint32_t Index;
502   uint32_t Rank = 0;
503 
504   BBInfo(unsigned IX) : Group(this), Index(IX) {}
505 
506   // Return the information string of this object.
507   const std::string infoString() const {
508     return (Twine("Index=") + Twine(Index)).str();
509   }
510 
511   // Empty function -- only applicable to UseBBInfo.
512   void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {}
513 
514   // Empty function -- only applicable to UseBBInfo.
515   void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {}
516 };
517 
518 // This class implements the CFG edges. Note the CFG can be a multi-graph.
519 template <class Edge, class BBInfo> class FuncPGOInstrumentation {
520 private:
521   Function &F;
522 
523   // Is this is context-sensitive instrumentation.
524   bool IsCS;
525 
526   // A map that stores the Comdat group in function F.
527   std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
528 
529   ValueProfileCollector VPC;
530 
531   void computeCFGHash();
532   void renameComdatFunction();
533 
534 public:
535   std::vector<std::vector<VPCandidateInfo>> ValueSites;
536   SelectInstVisitor SIVisitor;
537   std::string FuncName;
538   GlobalVariable *FuncNameVar;
539 
540   // CFG hash value for this function.
541   uint64_t FunctionHash = 0;
542 
543   // The Minimum Spanning Tree of function CFG.
544   CFGMST<Edge, BBInfo> MST;
545 
546   // Collect all the BBs that will be instrumented, and store them in
547   // InstrumentBBs.
548   void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
549 
550   // Give an edge, find the BB that will be instrumented.
551   // Return nullptr if there is no BB to be instrumented.
552   BasicBlock *getInstrBB(Edge *E);
553 
554   // Return the auxiliary BB information.
555   BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }
556 
557   // Return the auxiliary BB information if available.
558   BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }
559 
560   // Dump edges and BB information.
561   void dumpInfo(std::string Str = "") const {
562     MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " +
563                               Twine(FunctionHash) + "\t" + Str);
564   }
565 
566   FuncPGOInstrumentation(
567       Function &Func,
568       std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
569       bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
570       BlockFrequencyInfo *BFI = nullptr, bool IsCS = false)
571       : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func),
572         ValueSites(IPVK_Last + 1), SIVisitor(Func), MST(F, BPI, BFI) {
573     // This should be done before CFG hash computation.
574     SIVisitor.countSelects(Func);
575     ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize);
576     if (!IsCS) {
577       NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
578       NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
579       NumOfPGOBB += MST.BBInfos.size();
580       ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget);
581     } else {
582       NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
583       NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
584       NumOfCSPGOBB += MST.BBInfos.size();
585     }
586 
587     FuncName = getPGOFuncName(F);
588     computeCFGHash();
589     if (!ComdatMembers.empty())
590       renameComdatFunction();
591     LLVM_DEBUG(dumpInfo("after CFGMST"));
592 
593     for (auto &E : MST.AllEdges) {
594       if (E->Removed)
595         continue;
596       IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
597       if (!E->InMST)
598         IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;
599     }
600 
601     if (CreateGlobalVar)
602       FuncNameVar = createPGOFuncNameVar(F, FuncName);
603   }
604 };
605 
606 } // end anonymous namespace
607 
608 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
609 // value of each BB in the CFG. The higher 32 bits record the number of edges.
610 template <class Edge, class BBInfo>
611 void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
612   std::vector<uint8_t> Indexes;
613   JamCRC JC;
614   for (auto &BB : F) {
615     const Instruction *TI = BB.getTerminator();
616     for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
617       BasicBlock *Succ = TI->getSuccessor(I);
618       auto BI = findBBInfo(Succ);
619       if (BI == nullptr)
620         continue;
621       uint32_t Index = BI->Index;
622       for (int J = 0; J < 4; J++)
623         Indexes.push_back((uint8_t)(Index >> (J * 8)));
624     }
625   }
626   JC.update(Indexes);
627 
628   // Hash format for context sensitive profile. Reserve 4 bits for other
629   // information.
630   FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 |
631                  (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 |
632                  //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 |
633                  (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
634   // Reserve bit 60-63 for other information purpose.
635   FunctionHash &= 0x0FFFFFFFFFFFFFFF;
636   if (IsCS)
637     NamedInstrProfRecord::setCSFlagInHash(FunctionHash);
638   LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
639                     << " CRC = " << JC.getCRC()
640                     << ", Selects = " << SIVisitor.getNumOfSelectInsts()
641                     << ", Edges = " << MST.AllEdges.size() << ", ICSites = "
642                     << ValueSites[IPVK_IndirectCallTarget].size()
643                     << ", Hash = " << FunctionHash << "\n";);
644 }
645 
646 // Check if we can safely rename this Comdat function.
647 static bool canRenameComdat(
648     Function &F,
649     std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
650   if (!DoComdatRenaming || !canRenameComdatFunc(F, true))
651     return false;
652 
653   // FIXME: Current only handle those Comdat groups that only containing one
654   // function and function aliases.
655   // (1) For a Comdat group containing multiple functions, we need to have a
656   // unique postfix based on the hashes for each function. There is a
657   // non-trivial code refactoring to do this efficiently.
658   // (2) Variables can not be renamed, so we can not rename Comdat function in a
659   // group including global vars.
660   Comdat *C = F.getComdat();
661   for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {
662     if (dyn_cast<GlobalAlias>(CM.second))
663       continue;
664     Function *FM = dyn_cast<Function>(CM.second);
665     if (FM != &F)
666       return false;
667   }
668   return true;
669 }
670 
671 // Append the CFGHash to the Comdat function name.
672 template <class Edge, class BBInfo>
673 void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
674   if (!canRenameComdat(F, ComdatMembers))
675     return;
676   std::string OrigName = F.getName().str();
677   std::string NewFuncName =
678       Twine(F.getName() + "." + Twine(FunctionHash)).str();
679   F.setName(Twine(NewFuncName));
680   GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F);
681   FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str();
682   Comdat *NewComdat;
683   Module *M = F.getParent();
684   // For AvailableExternallyLinkage functions, change the linkage to
685   // LinkOnceODR and put them into comdat. This is because after renaming, there
686   // is no backup external copy available for the function.
687   if (!F.hasComdat()) {
688     assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage);
689     NewComdat = M->getOrInsertComdat(StringRef(NewFuncName));
690     F.setLinkage(GlobalValue::LinkOnceODRLinkage);
691     F.setComdat(NewComdat);
692     return;
693   }
694 
695   // This function belongs to a single function Comdat group.
696   Comdat *OrigComdat = F.getComdat();
697   std::string NewComdatName =
698       Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str();
699   NewComdat = M->getOrInsertComdat(StringRef(NewComdatName));
700   NewComdat->setSelectionKind(OrigComdat->getSelectionKind());
701 
702   for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) {
703     if (GlobalAlias *GA = dyn_cast<GlobalAlias>(CM.second)) {
704       // For aliases, change the name directly.
705       assert(dyn_cast<Function>(GA->getAliasee()->stripPointerCasts()) == &F);
706       std::string OrigGAName = GA->getName().str();
707       GA->setName(Twine(GA->getName() + "." + Twine(FunctionHash)));
708       GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigGAName, GA);
709       continue;
710     }
711     // Must be a function.
712     Function *CF = dyn_cast<Function>(CM.second);
713     assert(CF);
714     CF->setComdat(NewComdat);
715   }
716 }
717 
718 // Collect all the BBs that will be instruments and return them in
719 // InstrumentBBs and setup InEdges/OutEdge for UseBBInfo.
720 template <class Edge, class BBInfo>
721 void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
722     std::vector<BasicBlock *> &InstrumentBBs) {
723   // Use a worklist as we will update the vector during the iteration.
724   std::vector<Edge *> EdgeList;
725   EdgeList.reserve(MST.AllEdges.size());
726   for (auto &E : MST.AllEdges)
727     EdgeList.push_back(E.get());
728 
729   for (auto &E : EdgeList) {
730     BasicBlock *InstrBB = getInstrBB(E);
731     if (InstrBB)
732       InstrumentBBs.push_back(InstrBB);
733   }
734 
735   // Set up InEdges/OutEdges for all BBs.
736   for (auto &E : MST.AllEdges) {
737     if (E->Removed)
738       continue;
739     const BasicBlock *SrcBB = E->SrcBB;
740     const BasicBlock *DestBB = E->DestBB;
741     BBInfo &SrcInfo = getBBInfo(SrcBB);
742     BBInfo &DestInfo = getBBInfo(DestBB);
743     SrcInfo.addOutEdge(E.get());
744     DestInfo.addInEdge(E.get());
745   }
746 }
747 
748 // Given a CFG E to be instrumented, find which BB to place the instrumented
749 // code. The function will split the critical edge if necessary.
750 template <class Edge, class BBInfo>
751 BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
752   if (E->InMST || E->Removed)
753     return nullptr;
754 
755   BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB);
756   BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB);
757   // For a fake edge, instrument the real BB.
758   if (SrcBB == nullptr)
759     return DestBB;
760   if (DestBB == nullptr)
761     return SrcBB;
762 
763   auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
764     // There are basic blocks (such as catchswitch) cannot be instrumented.
765     // If the returned first insertion point is the end of BB, skip this BB.
766     if (BB->getFirstInsertionPt() == BB->end())
767       return nullptr;
768     return BB;
769   };
770 
771   // Instrument the SrcBB if it has a single successor,
772   // otherwise, the DestBB if this is not a critical edge.
773   Instruction *TI = SrcBB->getTerminator();
774   if (TI->getNumSuccessors() <= 1)
775     return canInstrument(SrcBB);
776   if (!E->IsCritical)
777     return canInstrument(DestBB);
778 
779   unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
780   BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum);
781   if (!InstrBB) {
782     LLVM_DEBUG(
783         dbgs() << "Fail to split critical edge: not instrument this edge.\n");
784     return nullptr;
785   }
786   // For a critical edge, we have to split. Instrument the newly
787   // created BB.
788   IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
789   LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
790                     << " --> " << getBBInfo(DestBB).Index << "\n");
791   // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
792   MST.addEdge(SrcBB, InstrBB, 0);
793   // Second one: Add new edge of InstrBB->DestBB.
794   Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
795   NewEdge1.InMST = true;
796   E->Removed = true;
797 
798   return canInstrument(InstrBB);
799 }
800 
801 // Visit all edge and instrument the edges not in MST, and do value profiling.
802 // Critical edges will be split.
803 static void instrumentOneFunc(
804     Function &F, Module *M, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI,
805     std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
806     bool IsCS) {
807   // Split indirectbr critical edges here before computing the MST rather than
808   // later in getInstrBB() to avoid invalidating it.
809   SplitIndirectBrCriticalEdges(F, BPI, BFI);
810 
811   FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(F, ComdatMembers, true, BPI,
812                                                    BFI, IsCS);
813   std::vector<BasicBlock *> InstrumentBBs;
814   FuncInfo.getInstrumentBBs(InstrumentBBs);
815   unsigned NumCounters =
816       InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
817 
818   uint32_t I = 0;
819   Type *I8PtrTy = Type::getInt8PtrTy(M->getContext());
820   for (auto *InstrBB : InstrumentBBs) {
821     IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
822     assert(Builder.GetInsertPoint() != InstrBB->end() &&
823            "Cannot get the Instrumentation point");
824     Builder.CreateCall(
825         Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment),
826         {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy),
827          Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters),
828          Builder.getInt32(I++)});
829   }
830 
831   // Now instrument select instructions:
832   FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar,
833                                        FuncInfo.FunctionHash);
834   assert(I == NumCounters);
835 
836   if (DisableValueProfiling)
837     return;
838 
839   NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
840 
841   // For each VP Kind, walk the VP candidates and instrument each one.
842   for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
843     unsigned SiteIndex = 0;
844     if (Kind == IPVK_MemOPSize && !PGOInstrMemOP)
845       continue;
846 
847     for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) {
848       LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind]
849                         << " site: CallSite Index = " << SiteIndex << "\n");
850 
851       IRBuilder<> Builder(Cand.InsertPt);
852       assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() &&
853              "Cannot get the Instrumentation point");
854 
855       Value *ToProfile = nullptr;
856       if (Cand.V->getType()->isIntegerTy())
857         ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty());
858       else if (Cand.V->getType()->isPointerTy())
859         ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty());
860       assert(ToProfile && "value profiling Value is of unexpected type");
861 
862       Builder.CreateCall(
863           Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
864           {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy),
865            Builder.getInt64(FuncInfo.FunctionHash), ToProfile,
866            Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)});
867     }
868   } // IPVK_First <= Kind <= IPVK_Last
869 }
870 
871 namespace {
872 
873 // This class represents a CFG edge in profile use compilation.
874 struct PGOUseEdge : public PGOEdge {
875   bool CountValid = false;
876   uint64_t CountValue = 0;
877 
878   PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1)
879       : PGOEdge(Src, Dest, W) {}
880 
881   // Set edge count value
882   void setEdgeCount(uint64_t Value) {
883     CountValue = Value;
884     CountValid = true;
885   }
886 
887   // Return the information string for this object.
888   const std::string infoString() const {
889     if (!CountValid)
890       return PGOEdge::infoString();
891     return (Twine(PGOEdge::infoString()) + "  Count=" + Twine(CountValue))
892         .str();
893   }
894 };
895 
896 using DirectEdges = SmallVector<PGOUseEdge *, 2>;
897 
898 // This class stores the auxiliary information for each BB.
899 struct UseBBInfo : public BBInfo {
900   uint64_t CountValue = 0;
901   bool CountValid;
902   int32_t UnknownCountInEdge = 0;
903   int32_t UnknownCountOutEdge = 0;
904   DirectEdges InEdges;
905   DirectEdges OutEdges;
906 
907   UseBBInfo(unsigned IX) : BBInfo(IX), CountValid(false) {}
908 
909   UseBBInfo(unsigned IX, uint64_t C)
910       : BBInfo(IX), CountValue(C), CountValid(true) {}
911 
912   // Set the profile count value for this BB.
913   void setBBInfoCount(uint64_t Value) {
914     CountValue = Value;
915     CountValid = true;
916   }
917 
918   // Return the information string of this object.
919   const std::string infoString() const {
920     if (!CountValid)
921       return BBInfo::infoString();
922     return (Twine(BBInfo::infoString()) + "  Count=" + Twine(CountValue)).str();
923   }
924 
925   // Add an OutEdge and update the edge count.
926   void addOutEdge(PGOUseEdge *E) {
927     OutEdges.push_back(E);
928     UnknownCountOutEdge++;
929   }
930 
931   // Add an InEdge and update the edge count.
932   void addInEdge(PGOUseEdge *E) {
933     InEdges.push_back(E);
934     UnknownCountInEdge++;
935   }
936 };
937 
938 } // end anonymous namespace
939 
940 // Sum up the count values for all the edges.
941 static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) {
942   uint64_t Total = 0;
943   for (auto &E : Edges) {
944     if (E->Removed)
945       continue;
946     Total += E->CountValue;
947   }
948   return Total;
949 }
950 
951 namespace {
952 
953 class PGOUseFunc {
954 public:
955   PGOUseFunc(Function &Func, Module *Modu,
956              std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
957              BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin,
958              ProfileSummaryInfo *PSI, bool IsCS)
959       : F(Func), M(Modu), BFI(BFIin), PSI(PSI),
960         FuncInfo(Func, ComdatMembers, false, BPI, BFIin, IsCS),
961         FreqAttr(FFA_Normal), IsCS(IsCS) {}
962 
963   // Read counts for the instrumented BB from profile.
964   bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros);
965 
966   // Populate the counts for all BBs.
967   void populateCounters();
968 
969   // Set the branch weights based on the count values.
970   void setBranchWeights();
971 
972   // Annotate the value profile call sites for all value kind.
973   void annotateValueSites();
974 
975   // Annotate the value profile call sites for one value kind.
976   void annotateValueSites(uint32_t Kind);
977 
978   // Annotate the irreducible loop header weights.
979   void annotateIrrLoopHeaderWeights();
980 
981   // The hotness of the function from the profile count.
982   enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };
983 
984   // Return the function hotness from the profile.
985   FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; }
986 
987   // Return the function hash.
988   uint64_t getFuncHash() const { return FuncInfo.FunctionHash; }
989 
990   // Return the profile record for this function;
991   InstrProfRecord &getProfileRecord() { return ProfileRecord; }
992 
993   // Return the auxiliary BB information.
994   UseBBInfo &getBBInfo(const BasicBlock *BB) const {
995     return FuncInfo.getBBInfo(BB);
996   }
997 
998   // Return the auxiliary BB information if available.
999   UseBBInfo *findBBInfo(const BasicBlock *BB) const {
1000     return FuncInfo.findBBInfo(BB);
1001   }
1002 
1003   Function &getFunc() const { return F; }
1004 
1005   void dumpInfo(std::string Str = "") const {
1006     FuncInfo.dumpInfo(Str);
1007   }
1008 
1009   uint64_t getProgramMaxCount() const { return ProgramMaxCount; }
1010 private:
1011   Function &F;
1012   Module *M;
1013   BlockFrequencyInfo *BFI;
1014   ProfileSummaryInfo *PSI;
1015 
1016   // This member stores the shared information with class PGOGenFunc.
1017   FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo;
1018 
1019   // The maximum count value in the profile. This is only used in PGO use
1020   // compilation.
1021   uint64_t ProgramMaxCount;
1022 
1023   // Position of counter that remains to be read.
1024   uint32_t CountPosition = 0;
1025 
1026   // Total size of the profile count for this function.
1027   uint32_t ProfileCountSize = 0;
1028 
1029   // ProfileRecord for this function.
1030   InstrProfRecord ProfileRecord;
1031 
1032   // Function hotness info derived from profile.
1033   FuncFreqAttr FreqAttr;
1034 
1035   // Is to use the context sensitive profile.
1036   bool IsCS;
1037 
1038   // Find the Instrumented BB and set the value. Return false on error.
1039   bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
1040 
1041   // Set the edge counter value for the unknown edge -- there should be only
1042   // one unknown edge.
1043   void setEdgeCount(DirectEdges &Edges, uint64_t Value);
1044 
1045   // Return FuncName string;
1046   const std::string getFuncName() const { return FuncInfo.FuncName; }
1047 
1048   // Set the hot/cold inline hints based on the count values.
1049   // FIXME: This function should be removed once the functionality in
1050   // the inliner is implemented.
1051   void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {
1052     if (PSI->isHotCount(EntryCount))
1053       FreqAttr = FFA_Hot;
1054     else if (PSI->isColdCount(MaxCount))
1055       FreqAttr = FFA_Cold;
1056   }
1057 };
1058 
1059 } // end anonymous namespace
1060 
1061 // Visit all the edges and assign the count value for the instrumented
1062 // edges and the BB. Return false on error.
1063 bool PGOUseFunc::setInstrumentedCounts(
1064     const std::vector<uint64_t> &CountFromProfile) {
1065 
1066   std::vector<BasicBlock *> InstrumentBBs;
1067   FuncInfo.getInstrumentBBs(InstrumentBBs);
1068   unsigned NumCounters =
1069       InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
1070   // The number of counters here should match the number of counters
1071   // in profile. Return if they mismatch.
1072   if (NumCounters != CountFromProfile.size()) {
1073     return false;
1074   }
1075   // Set the profile count to the Instrumented BBs.
1076   uint32_t I = 0;
1077   for (BasicBlock *InstrBB : InstrumentBBs) {
1078     uint64_t CountValue = CountFromProfile[I++];
1079     UseBBInfo &Info = getBBInfo(InstrBB);
1080     Info.setBBInfoCount(CountValue);
1081   }
1082   ProfileCountSize = CountFromProfile.size();
1083   CountPosition = I;
1084 
1085   // Set the edge count and update the count of unknown edges for BBs.
1086   auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void {
1087     E->setEdgeCount(Value);
1088     this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1089     this->getBBInfo(E->DestBB).UnknownCountInEdge--;
1090   };
1091 
1092   // Set the profile count the Instrumented edges. There are BBs that not in
1093   // MST but not instrumented. Need to set the edge count value so that we can
1094   // populate the profile counts later.
1095   for (auto &E : FuncInfo.MST.AllEdges) {
1096     if (E->Removed || E->InMST)
1097       continue;
1098     const BasicBlock *SrcBB = E->SrcBB;
1099     UseBBInfo &SrcInfo = getBBInfo(SrcBB);
1100 
1101     // If only one out-edge, the edge profile count should be the same as BB
1102     // profile count.
1103     if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1)
1104       setEdgeCount(E.get(), SrcInfo.CountValue);
1105     else {
1106       const BasicBlock *DestBB = E->DestBB;
1107       UseBBInfo &DestInfo = getBBInfo(DestBB);
1108       // If only one in-edge, the edge profile count should be the same as BB
1109       // profile count.
1110       if (DestInfo.CountValid && DestInfo.InEdges.size() == 1)
1111         setEdgeCount(E.get(), DestInfo.CountValue);
1112     }
1113     if (E->CountValid)
1114       continue;
1115     // E's count should have been set from profile. If not, this meenas E skips
1116     // the instrumentation. We set the count to 0.
1117     setEdgeCount(E.get(), 0);
1118   }
1119   return true;
1120 }
1121 
1122 // Set the count value for the unknown edge. There should be one and only one
1123 // unknown edge in Edges vector.
1124 void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
1125   for (auto &E : Edges) {
1126     if (E->CountValid)
1127       continue;
1128     E->setEdgeCount(Value);
1129 
1130     getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1131     getBBInfo(E->DestBB).UnknownCountInEdge--;
1132     return;
1133   }
1134   llvm_unreachable("Cannot find the unknown count edge");
1135 }
1136 
1137 // Read the profile from ProfileFileName and assign the value to the
1138 // instrumented BB and the edges. This function also updates ProgramMaxCount.
1139 // Return true if the profile are successfully read, and false on errors.
1140 bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros) {
1141   auto &Ctx = M->getContext();
1142   Expected<InstrProfRecord> Result =
1143       PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash);
1144   if (Error E = Result.takeError()) {
1145     handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
1146       auto Err = IPE.get();
1147       bool SkipWarning = false;
1148       LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
1149                         << FuncInfo.FuncName << ": ");
1150       if (Err == instrprof_error::unknown_function) {
1151         IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
1152         SkipWarning = !PGOWarnMissing;
1153         LLVM_DEBUG(dbgs() << "unknown function");
1154       } else if (Err == instrprof_error::hash_mismatch ||
1155                  Err == instrprof_error::malformed) {
1156         IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
1157         SkipWarning =
1158             NoPGOWarnMismatch ||
1159             (NoPGOWarnMismatchComdat &&
1160              (F.hasComdat() ||
1161               F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
1162         LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
1163       }
1164 
1165       LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");
1166       if (SkipWarning)
1167         return;
1168 
1169       std::string Msg = IPE.message() + std::string(" ") + F.getName().str() +
1170                         std::string(" Hash = ") +
1171                         std::to_string(FuncInfo.FunctionHash);
1172 
1173       Ctx.diagnose(
1174           DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
1175     });
1176     return false;
1177   }
1178   ProfileRecord = std::move(Result.get());
1179   std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts;
1180 
1181   IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1182   LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
1183   uint64_t ValueSum = 0;
1184   for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
1185     LLVM_DEBUG(dbgs() << "  " << I << ": " << CountFromProfile[I] << "\n");
1186     ValueSum += CountFromProfile[I];
1187   }
1188   AllZeros = (ValueSum == 0);
1189 
1190   LLVM_DEBUG(dbgs() << "SUM =  " << ValueSum << "\n");
1191 
1192   getBBInfo(nullptr).UnknownCountOutEdge = 2;
1193   getBBInfo(nullptr).UnknownCountInEdge = 2;
1194 
1195   if (!setInstrumentedCounts(CountFromProfile)) {
1196     LLVM_DEBUG(
1197         dbgs() << "Inconsistent number of counts, skipping this function");
1198     Ctx.diagnose(DiagnosticInfoPGOProfile(
1199         M->getName().data(),
1200         Twine("Inconsistent number of counts in ") + F.getName().str()
1201         + Twine(": the profile may be stale or there is a function name collision."),
1202         DS_Warning));
1203     return false;
1204   }
1205   ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS);
1206   return true;
1207 }
1208 
1209 // Populate the counters from instrumented BBs to all BBs.
1210 // In the end of this operation, all BBs should have a valid count value.
1211 void PGOUseFunc::populateCounters() {
1212   bool Changes = true;
1213   unsigned NumPasses = 0;
1214   while (Changes) {
1215     NumPasses++;
1216     Changes = false;
1217 
1218     // For efficient traversal, it's better to start from the end as most
1219     // of the instrumented edges are at the end.
1220     for (auto &BB : reverse(F)) {
1221       UseBBInfo *Count = findBBInfo(&BB);
1222       if (Count == nullptr)
1223         continue;
1224       if (!Count->CountValid) {
1225         if (Count->UnknownCountOutEdge == 0) {
1226           Count->CountValue = sumEdgeCount(Count->OutEdges);
1227           Count->CountValid = true;
1228           Changes = true;
1229         } else if (Count->UnknownCountInEdge == 0) {
1230           Count->CountValue = sumEdgeCount(Count->InEdges);
1231           Count->CountValid = true;
1232           Changes = true;
1233         }
1234       }
1235       if (Count->CountValid) {
1236         if (Count->UnknownCountOutEdge == 1) {
1237           uint64_t Total = 0;
1238           uint64_t OutSum = sumEdgeCount(Count->OutEdges);
1239           // If the one of the successor block can early terminate (no-return),
1240           // we can end up with situation where out edge sum count is larger as
1241           // the source BB's count is collected by a post-dominated block.
1242           if (Count->CountValue > OutSum)
1243             Total = Count->CountValue - OutSum;
1244           setEdgeCount(Count->OutEdges, Total);
1245           Changes = true;
1246         }
1247         if (Count->UnknownCountInEdge == 1) {
1248           uint64_t Total = 0;
1249           uint64_t InSum = sumEdgeCount(Count->InEdges);
1250           if (Count->CountValue > InSum)
1251             Total = Count->CountValue - InSum;
1252           setEdgeCount(Count->InEdges, Total);
1253           Changes = true;
1254         }
1255       }
1256     }
1257   }
1258 
1259   LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n");
1260 #ifndef NDEBUG
1261   // Assert every BB has a valid counter.
1262   for (auto &BB : F) {
1263     auto BI = findBBInfo(&BB);
1264     if (BI == nullptr)
1265       continue;
1266     assert(BI->CountValid && "BB count is not valid");
1267   }
1268 #endif
1269   uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue;
1270   F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real));
1271   uint64_t FuncMaxCount = FuncEntryCount;
1272   for (auto &BB : F) {
1273     auto BI = findBBInfo(&BB);
1274     if (BI == nullptr)
1275       continue;
1276     FuncMaxCount = std::max(FuncMaxCount, BI->CountValue);
1277   }
1278   markFunctionAttributes(FuncEntryCount, FuncMaxCount);
1279 
1280   // Now annotate select instructions
1281   FuncInfo.SIVisitor.annotateSelects(F, this, &CountPosition);
1282   assert(CountPosition == ProfileCountSize);
1283 
1284   LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile."));
1285 }
1286 
1287 // Assign the scaled count values to the BB with multiple out edges.
1288 void PGOUseFunc::setBranchWeights() {
1289   // Generate MD_prof metadata for every branch instruction.
1290   LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName()
1291                     << " IsCS=" << IsCS << "\n");
1292   for (auto &BB : F) {
1293     Instruction *TI = BB.getTerminator();
1294     if (TI->getNumSuccessors() < 2)
1295       continue;
1296     if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||
1297           isa<IndirectBrInst>(TI)))
1298       continue;
1299 
1300     if (getBBInfo(&BB).CountValue == 0)
1301       continue;
1302 
1303     // We have a non-zero Branch BB.
1304     const UseBBInfo &BBCountInfo = getBBInfo(&BB);
1305     unsigned Size = BBCountInfo.OutEdges.size();
1306     SmallVector<uint64_t, 2> EdgeCounts(Size, 0);
1307     uint64_t MaxCount = 0;
1308     for (unsigned s = 0; s < Size; s++) {
1309       const PGOUseEdge *E = BBCountInfo.OutEdges[s];
1310       const BasicBlock *SrcBB = E->SrcBB;
1311       const BasicBlock *DestBB = E->DestBB;
1312       if (DestBB == nullptr)
1313         continue;
1314       unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
1315       uint64_t EdgeCount = E->CountValue;
1316       if (EdgeCount > MaxCount)
1317         MaxCount = EdgeCount;
1318       EdgeCounts[SuccNum] = EdgeCount;
1319     }
1320     setProfMetadata(M, TI, EdgeCounts, MaxCount);
1321   }
1322 }
1323 
1324 static bool isIndirectBrTarget(BasicBlock *BB) {
1325   for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
1326     if (isa<IndirectBrInst>((*PI)->getTerminator()))
1327       return true;
1328   }
1329   return false;
1330 }
1331 
1332 void PGOUseFunc::annotateIrrLoopHeaderWeights() {
1333   LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n");
1334   // Find irr loop headers
1335   for (auto &BB : F) {
1336     // As a heuristic also annotate indrectbr targets as they have a high chance
1337     // to become an irreducible loop header after the indirectbr tail
1338     // duplication.
1339     if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) {
1340       Instruction *TI = BB.getTerminator();
1341       const UseBBInfo &BBCountInfo = getBBInfo(&BB);
1342       setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue);
1343     }
1344   }
1345 }
1346 
1347 void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
1348   Module *M = F.getParent();
1349   IRBuilder<> Builder(&SI);
1350   Type *Int64Ty = Builder.getInt64Ty();
1351   Type *I8PtrTy = Builder.getInt8PtrTy();
1352   auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
1353   Builder.CreateCall(
1354       Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
1355       {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
1356        Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs),
1357        Builder.getInt32(*CurCtrIdx), Step});
1358   ++(*CurCtrIdx);
1359 }
1360 
1361 void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) {
1362   std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
1363   assert(*CurCtrIdx < CountFromProfile.size() &&
1364          "Out of bound access of counters");
1365   uint64_t SCounts[2];
1366   SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count
1367   ++(*CurCtrIdx);
1368   uint64_t TotalCount = 0;
1369   auto BI = UseFunc->findBBInfo(SI.getParent());
1370   if (BI != nullptr)
1371     TotalCount = BI->CountValue;
1372   // False Count
1373   SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
1374   uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
1375   if (MaxCount)
1376     setProfMetadata(F.getParent(), &SI, SCounts, MaxCount);
1377 }
1378 
1379 void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
1380   if (!PGOInstrSelect)
1381     return;
1382   // FIXME: do not handle this yet.
1383   if (SI.getCondition()->getType()->isVectorTy())
1384     return;
1385 
1386   switch (Mode) {
1387   case VM_counting:
1388     NSIs++;
1389     return;
1390   case VM_instrument:
1391     instrumentOneSelectInst(SI);
1392     return;
1393   case VM_annotate:
1394     annotateOneSelectInst(SI);
1395     return;
1396   }
1397 
1398   llvm_unreachable("Unknown visiting mode");
1399 }
1400 
1401 // Traverse all valuesites and annotate the instructions for all value kind.
1402 void PGOUseFunc::annotateValueSites() {
1403   if (DisableValueProfiling)
1404     return;
1405 
1406   // Create the PGOFuncName meta data.
1407   createPGOFuncNameMetadata(F, FuncInfo.FuncName);
1408 
1409   for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1410     annotateValueSites(Kind);
1411 }
1412 
1413 // Annotate the instructions for a specific value kind.
1414 void PGOUseFunc::annotateValueSites(uint32_t Kind) {
1415   assert(Kind <= IPVK_Last);
1416   unsigned ValueSiteIndex = 0;
1417   auto &ValueSites = FuncInfo.ValueSites[Kind];
1418   unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind);
1419   if (NumValueSites != ValueSites.size()) {
1420     auto &Ctx = M->getContext();
1421     Ctx.diagnose(DiagnosticInfoPGOProfile(
1422         M->getName().data(),
1423         Twine("Inconsistent number of value sites for ") +
1424             Twine(ValueProfKindDescr[Kind]) +
1425             Twine(" profiling in \"") + F.getName().str() +
1426             Twine("\", possibly due to the use of a stale profile."),
1427         DS_Warning));
1428     return;
1429   }
1430 
1431   for (VPCandidateInfo &I : ValueSites) {
1432     LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind
1433                       << "): Index = " << ValueSiteIndex << " out of "
1434                       << NumValueSites << "\n");
1435     annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord,
1436                       static_cast<InstrProfValueKind>(Kind), ValueSiteIndex,
1437                       Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations
1438                                              : MaxNumAnnotations);
1439     ValueSiteIndex++;
1440   }
1441 }
1442 
1443 // Collect the set of members for each Comdat in module M and store
1444 // in ComdatMembers.
1445 static void collectComdatMembers(
1446     Module &M,
1447     std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
1448   if (!DoComdatRenaming)
1449     return;
1450   for (Function &F : M)
1451     if (Comdat *C = F.getComdat())
1452       ComdatMembers.insert(std::make_pair(C, &F));
1453   for (GlobalVariable &GV : M.globals())
1454     if (Comdat *C = GV.getComdat())
1455       ComdatMembers.insert(std::make_pair(C, &GV));
1456   for (GlobalAlias &GA : M.aliases())
1457     if (Comdat *C = GA.getComdat())
1458       ComdatMembers.insert(std::make_pair(C, &GA));
1459 }
1460 
1461 static bool InstrumentAllFunctions(
1462     Module &M, function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
1463     function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
1464   // For the context-sensitve instrumentation, we should have a separated pass
1465   // (before LTO/ThinLTO linking) to create these variables.
1466   if (!IsCS)
1467     createIRLevelProfileFlagVar(M, /* IsCS */ false);
1468   std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
1469   collectComdatMembers(M, ComdatMembers);
1470 
1471   for (auto &F : M) {
1472     if (F.isDeclaration())
1473       continue;
1474     auto *BPI = LookupBPI(F);
1475     auto *BFI = LookupBFI(F);
1476     instrumentOneFunc(F, &M, BPI, BFI, ComdatMembers, IsCS);
1477   }
1478   return true;
1479 }
1480 
1481 PreservedAnalyses
1482 PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) {
1483   createProfileFileNameVar(M, CSInstrName);
1484   createIRLevelProfileFlagVar(M, /* IsCS */ true);
1485   return PreservedAnalyses::all();
1486 }
1487 
1488 bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) {
1489   if (skipModule(M))
1490     return false;
1491 
1492   auto LookupBPI = [this](Function &F) {
1493     return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI();
1494   };
1495   auto LookupBFI = [this](Function &F) {
1496     return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
1497   };
1498   return InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS);
1499 }
1500 
1501 PreservedAnalyses PGOInstrumentationGen::run(Module &M,
1502                                              ModuleAnalysisManager &AM) {
1503   auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1504   auto LookupBPI = [&FAM](Function &F) {
1505     return &FAM.getResult<BranchProbabilityAnalysis>(F);
1506   };
1507 
1508   auto LookupBFI = [&FAM](Function &F) {
1509     return &FAM.getResult<BlockFrequencyAnalysis>(F);
1510   };
1511 
1512   if (!InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS))
1513     return PreservedAnalyses::all();
1514 
1515   return PreservedAnalyses::none();
1516 }
1517 
1518 static bool annotateAllFunctions(
1519     Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
1520     function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
1521     function_ref<BlockFrequencyInfo *(Function &)> LookupBFI,
1522     ProfileSummaryInfo *PSI, bool IsCS) {
1523   LLVM_DEBUG(dbgs() << "Read in profile counters: ");
1524   auto &Ctx = M.getContext();
1525   // Read the counter array from file.
1526   auto ReaderOrErr =
1527       IndexedInstrProfReader::create(ProfileFileName, ProfileRemappingFileName);
1528   if (Error E = ReaderOrErr.takeError()) {
1529     handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
1530       Ctx.diagnose(
1531           DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message()));
1532     });
1533     return false;
1534   }
1535 
1536   std::unique_ptr<IndexedInstrProfReader> PGOReader =
1537       std::move(ReaderOrErr.get());
1538   if (!PGOReader) {
1539     Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(),
1540                                           StringRef("Cannot get PGOReader")));
1541     return false;
1542   }
1543   if (!PGOReader->hasCSIRLevelProfile() && IsCS)
1544     return false;
1545 
1546   // TODO: might need to change the warning once the clang option is finalized.
1547   if (!PGOReader->isIRLevelProfile()) {
1548     Ctx.diagnose(DiagnosticInfoPGOProfile(
1549         ProfileFileName.data(), "Not an IR level instrumentation profile"));
1550     return false;
1551   }
1552 
1553   // Add the profile summary (read from the header of the indexed summary) here
1554   // so that we can use it below when reading counters (which checks if the
1555   // function should be marked with a cold or inlinehint attribute).
1556   M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
1557                       IsCS ? ProfileSummary::PSK_CSInstr
1558                            : ProfileSummary::PSK_Instr);
1559 
1560   std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
1561   collectComdatMembers(M, ComdatMembers);
1562   std::vector<Function *> HotFunctions;
1563   std::vector<Function *> ColdFunctions;
1564   for (auto &F : M) {
1565     if (F.isDeclaration())
1566       continue;
1567     auto *BPI = LookupBPI(F);
1568     auto *BFI = LookupBFI(F);
1569     // Split indirectbr critical edges here before computing the MST rather than
1570     // later in getInstrBB() to avoid invalidating it.
1571     SplitIndirectBrCriticalEdges(F, BPI, BFI);
1572     PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI, PSI, IsCS);
1573     bool AllZeros = false;
1574     if (!Func.readCounters(PGOReader.get(), AllZeros))
1575       continue;
1576     if (AllZeros) {
1577       F.setEntryCount(ProfileCount(0, Function::PCT_Real));
1578       if (Func.getProgramMaxCount() != 0)
1579         ColdFunctions.push_back(&F);
1580       continue;
1581     }
1582     Func.populateCounters();
1583     Func.setBranchWeights();
1584     Func.annotateValueSites();
1585     Func.annotateIrrLoopHeaderWeights();
1586     PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();
1587     if (FreqAttr == PGOUseFunc::FFA_Cold)
1588       ColdFunctions.push_back(&F);
1589     else if (FreqAttr == PGOUseFunc::FFA_Hot)
1590       HotFunctions.push_back(&F);
1591     if (PGOViewCounts != PGOVCT_None &&
1592         (ViewBlockFreqFuncName.empty() ||
1593          F.getName().equals(ViewBlockFreqFuncName))) {
1594       LoopInfo LI{DominatorTree(F)};
1595       std::unique_ptr<BranchProbabilityInfo> NewBPI =
1596           std::make_unique<BranchProbabilityInfo>(F, LI);
1597       std::unique_ptr<BlockFrequencyInfo> NewBFI =
1598           std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI);
1599       if (PGOViewCounts == PGOVCT_Graph)
1600         NewBFI->view();
1601       else if (PGOViewCounts == PGOVCT_Text) {
1602         dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n";
1603         NewBFI->print(dbgs());
1604       }
1605     }
1606     if (PGOViewRawCounts != PGOVCT_None &&
1607         (ViewBlockFreqFuncName.empty() ||
1608          F.getName().equals(ViewBlockFreqFuncName))) {
1609       if (PGOViewRawCounts == PGOVCT_Graph)
1610         if (ViewBlockFreqFuncName.empty())
1611           WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
1612         else
1613           ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
1614       else if (PGOViewRawCounts == PGOVCT_Text) {
1615         dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n";
1616         Func.dumpInfo();
1617       }
1618     }
1619   }
1620 
1621   // Set function hotness attribute from the profile.
1622   // We have to apply these attributes at the end because their presence
1623   // can affect the BranchProbabilityInfo of any callers, resulting in an
1624   // inconsistent MST between prof-gen and prof-use.
1625   for (auto &F : HotFunctions) {
1626     F->addFnAttr(Attribute::InlineHint);
1627     LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName()
1628                       << "\n");
1629   }
1630   for (auto &F : ColdFunctions) {
1631     F->addFnAttr(Attribute::Cold);
1632     LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName()
1633                       << "\n");
1634   }
1635   return true;
1636 }
1637 
1638 PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename,
1639                                              std::string RemappingFilename,
1640                                              bool IsCS)
1641     : ProfileFileName(std::move(Filename)),
1642       ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) {
1643   if (!PGOTestProfileFile.empty())
1644     ProfileFileName = PGOTestProfileFile;
1645   if (!PGOTestProfileRemappingFile.empty())
1646     ProfileRemappingFileName = PGOTestProfileRemappingFile;
1647 }
1648 
1649 PreservedAnalyses PGOInstrumentationUse::run(Module &M,
1650                                              ModuleAnalysisManager &AM) {
1651 
1652   auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1653   auto LookupBPI = [&FAM](Function &F) {
1654     return &FAM.getResult<BranchProbabilityAnalysis>(F);
1655   };
1656 
1657   auto LookupBFI = [&FAM](Function &F) {
1658     return &FAM.getResult<BlockFrequencyAnalysis>(F);
1659   };
1660 
1661   auto *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
1662 
1663   if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName,
1664                             LookupBPI, LookupBFI, PSI, IsCS))
1665     return PreservedAnalyses::all();
1666 
1667   return PreservedAnalyses::none();
1668 }
1669 
1670 bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) {
1671   if (skipModule(M))
1672     return false;
1673 
1674   auto LookupBPI = [this](Function &F) {
1675     return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI();
1676   };
1677   auto LookupBFI = [this](Function &F) {
1678     return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
1679   };
1680 
1681   auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
1682   return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI, PSI,
1683                               IsCS);
1684 }
1685 
1686 static std::string getSimpleNodeName(const BasicBlock *Node) {
1687   if (!Node->getName().empty())
1688     return Node->getName();
1689 
1690   std::string SimpleNodeName;
1691   raw_string_ostream OS(SimpleNodeName);
1692   Node->printAsOperand(OS, false);
1693   return OS.str();
1694 }
1695 
1696 void llvm::setProfMetadata(Module *M, Instruction *TI,
1697                            ArrayRef<uint64_t> EdgeCounts,
1698                            uint64_t MaxCount) {
1699   MDBuilder MDB(M->getContext());
1700   assert(MaxCount > 0 && "Bad max count");
1701   uint64_t Scale = calculateCountScale(MaxCount);
1702   SmallVector<unsigned, 4> Weights;
1703   for (const auto &ECI : EdgeCounts)
1704     Weights.push_back(scaleBranchCount(ECI, Scale));
1705 
1706   LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W
1707                                            : Weights) {
1708     dbgs() << W << " ";
1709   } dbgs() << "\n";);
1710 
1711   misexpect::verifyMisExpect(TI, Weights, TI->getContext());
1712 
1713   TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
1714   if (EmitBranchProbability) {
1715     std::string BrCondStr = getBranchCondString(TI);
1716     if (BrCondStr.empty())
1717       return;
1718 
1719     uint64_t WSum =
1720         std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0,
1721                         [](uint64_t w1, uint64_t w2) { return w1 + w2; });
1722     uint64_t TotalCount =
1723         std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0,
1724                         [](uint64_t c1, uint64_t c2) { return c1 + c2; });
1725     Scale = calculateCountScale(WSum);
1726     BranchProbability BP(scaleBranchCount(Weights[0], Scale),
1727                          scaleBranchCount(WSum, Scale));
1728     std::string BranchProbStr;
1729     raw_string_ostream OS(BranchProbStr);
1730     OS << BP;
1731     OS << " (total count : " << TotalCount << ")";
1732     OS.flush();
1733     Function *F = TI->getParent()->getParent();
1734     OptimizationRemarkEmitter ORE(F);
1735     ORE.emit([&]() {
1736       return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI)
1737              << BrCondStr << " is true with probability : " << BranchProbStr;
1738     });
1739   }
1740 }
1741 
1742 namespace llvm {
1743 
1744 void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) {
1745   MDBuilder MDB(M->getContext());
1746   TI->setMetadata(llvm::LLVMContext::MD_irr_loop,
1747                   MDB.createIrrLoopHeaderWeight(Count));
1748 }
1749 
1750 template <> struct GraphTraits<PGOUseFunc *> {
1751   using NodeRef = const BasicBlock *;
1752   using ChildIteratorType = succ_const_iterator;
1753   using nodes_iterator = pointer_iterator<Function::const_iterator>;
1754 
1755   static NodeRef getEntryNode(const PGOUseFunc *G) {
1756     return &G->getFunc().front();
1757   }
1758 
1759   static ChildIteratorType child_begin(const NodeRef N) {
1760     return succ_begin(N);
1761   }
1762 
1763   static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); }
1764 
1765   static nodes_iterator nodes_begin(const PGOUseFunc *G) {
1766     return nodes_iterator(G->getFunc().begin());
1767   }
1768 
1769   static nodes_iterator nodes_end(const PGOUseFunc *G) {
1770     return nodes_iterator(G->getFunc().end());
1771   }
1772 };
1773 
1774 template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits {
1775   explicit DOTGraphTraits(bool isSimple = false)
1776       : DefaultDOTGraphTraits(isSimple) {}
1777 
1778   static std::string getGraphName(const PGOUseFunc *G) {
1779     return G->getFunc().getName();
1780   }
1781 
1782   std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) {
1783     std::string Result;
1784     raw_string_ostream OS(Result);
1785 
1786     OS << getSimpleNodeName(Node) << ":\\l";
1787     UseBBInfo *BI = Graph->findBBInfo(Node);
1788     OS << "Count : ";
1789     if (BI && BI->CountValid)
1790       OS << BI->CountValue << "\\l";
1791     else
1792       OS << "Unknown\\l";
1793 
1794     if (!PGOInstrSelect)
1795       return Result;
1796 
1797     for (auto BI = Node->begin(); BI != Node->end(); ++BI) {
1798       auto *I = &*BI;
1799       if (!isa<SelectInst>(I))
1800         continue;
1801       // Display scaled counts for SELECT instruction:
1802       OS << "SELECT : { T = ";
1803       uint64_t TC, FC;
1804       bool HasProf = I->extractProfMetadata(TC, FC);
1805       if (!HasProf)
1806         OS << "Unknown, F = Unknown }\\l";
1807       else
1808         OS << TC << ", F = " << FC << " }\\l";
1809     }
1810     return Result;
1811   }
1812 };
1813 
1814 } // end namespace llvm
1815