1*700637cbSDimitry Andric //===- CtxProfAnalysis.cpp - contextual profile analysis ------------------===//
2*700637cbSDimitry Andric //
3*700637cbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*700637cbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*700637cbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*700637cbSDimitry Andric //
7*700637cbSDimitry Andric //===----------------------------------------------------------------------===//
8*700637cbSDimitry Andric //
9*700637cbSDimitry Andric // Implementation of the contextual profile analysis, which maintains contextual
10*700637cbSDimitry Andric // profiling info through IPO passes.
11*700637cbSDimitry Andric //
12*700637cbSDimitry Andric //===----------------------------------------------------------------------===//
13*700637cbSDimitry Andric
14*700637cbSDimitry Andric #include "llvm/Analysis/CtxProfAnalysis.h"
15*700637cbSDimitry Andric #include "llvm/ADT/APInt.h"
16*700637cbSDimitry Andric #include "llvm/ADT/STLExtras.h"
17*700637cbSDimitry Andric #include "llvm/Analysis/CFG.h"
18*700637cbSDimitry Andric #include "llvm/IR/Analysis.h"
19*700637cbSDimitry Andric #include "llvm/IR/Dominators.h"
20*700637cbSDimitry Andric #include "llvm/IR/IntrinsicInst.h"
21*700637cbSDimitry Andric #include "llvm/IR/Module.h"
22*700637cbSDimitry Andric #include "llvm/IR/PassManager.h"
23*700637cbSDimitry Andric #include "llvm/ProfileData/PGOCtxProfReader.h"
24*700637cbSDimitry Andric #include "llvm/Support/CommandLine.h"
25*700637cbSDimitry Andric #include "llvm/Support/MemoryBuffer.h"
26*700637cbSDimitry Andric #include "llvm/Support/Path.h"
27*700637cbSDimitry Andric #include <deque>
28*700637cbSDimitry Andric #include <memory>
29*700637cbSDimitry Andric
30*700637cbSDimitry Andric #define DEBUG_TYPE "ctx_prof"
31*700637cbSDimitry Andric
32*700637cbSDimitry Andric using namespace llvm;
33*700637cbSDimitry Andric cl::opt<std::string>
34*700637cbSDimitry Andric UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden,
35*700637cbSDimitry Andric cl::desc("Use the specified contextual profile file"));
36*700637cbSDimitry Andric
37*700637cbSDimitry Andric static cl::opt<CtxProfAnalysisPrinterPass::PrintMode> PrintLevel(
38*700637cbSDimitry Andric "ctx-profile-printer-level",
39*700637cbSDimitry Andric cl::init(CtxProfAnalysisPrinterPass::PrintMode::YAML), cl::Hidden,
40*700637cbSDimitry Andric cl::values(clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::Everything,
41*700637cbSDimitry Andric "everything", "print everything - most verbose"),
42*700637cbSDimitry Andric clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::YAML, "yaml",
43*700637cbSDimitry Andric "just the yaml representation of the profile")),
44*700637cbSDimitry Andric cl::desc("Verbosity level of the contextual profile printer pass."));
45*700637cbSDimitry Andric
46*700637cbSDimitry Andric static cl::opt<bool> ForceIsInSpecializedModule(
47*700637cbSDimitry Andric "ctx-profile-force-is-specialized", cl::init(false),
48*700637cbSDimitry Andric cl::desc("Treat the given module as-if it were containing the "
49*700637cbSDimitry Andric "post-thinlink module containing the root"));
50*700637cbSDimitry Andric
51*700637cbSDimitry Andric const char *AssignGUIDPass::GUIDMetadataName = "guid";
52*700637cbSDimitry Andric
53*700637cbSDimitry Andric namespace llvm {
54*700637cbSDimitry Andric class ProfileAnnotatorImpl final {
55*700637cbSDimitry Andric friend class ProfileAnnotator;
56*700637cbSDimitry Andric class BBInfo;
57*700637cbSDimitry Andric struct EdgeInfo {
58*700637cbSDimitry Andric BBInfo *const Src;
59*700637cbSDimitry Andric BBInfo *const Dest;
60*700637cbSDimitry Andric std::optional<uint64_t> Count;
61*700637cbSDimitry Andric
EdgeInfollvm::ProfileAnnotatorImpl::EdgeInfo62*700637cbSDimitry Andric explicit EdgeInfo(BBInfo &Src, BBInfo &Dest) : Src(&Src), Dest(&Dest) {}
63*700637cbSDimitry Andric };
64*700637cbSDimitry Andric
65*700637cbSDimitry Andric class BBInfo {
66*700637cbSDimitry Andric std::optional<uint64_t> Count;
67*700637cbSDimitry Andric // OutEdges is dimensioned to match the number of terminator operands.
68*700637cbSDimitry Andric // Entries in the vector match the index in the terminator operand list. In
69*700637cbSDimitry Andric // some cases - see `shouldExcludeEdge` and its implementation - an entry
70*700637cbSDimitry Andric // will be nullptr.
71*700637cbSDimitry Andric // InEdges doesn't have the above constraint.
72*700637cbSDimitry Andric SmallVector<EdgeInfo *> OutEdges;
73*700637cbSDimitry Andric SmallVector<EdgeInfo *> InEdges;
74*700637cbSDimitry Andric size_t UnknownCountOutEdges = 0;
75*700637cbSDimitry Andric size_t UnknownCountInEdges = 0;
76*700637cbSDimitry Andric
77*700637cbSDimitry Andric // Pass AssumeAllKnown when we try to propagate counts from edges to BBs -
78*700637cbSDimitry Andric // because all the edge counters must be known.
79*700637cbSDimitry Andric // Return std::nullopt if there were no edges to sum. The user can decide
80*700637cbSDimitry Andric // how to interpret that.
getEdgeSum(const SmallVector<EdgeInfo * > & Edges,bool AssumeAllKnown) const81*700637cbSDimitry Andric std::optional<uint64_t> getEdgeSum(const SmallVector<EdgeInfo *> &Edges,
82*700637cbSDimitry Andric bool AssumeAllKnown) const {
83*700637cbSDimitry Andric std::optional<uint64_t> Sum;
84*700637cbSDimitry Andric for (const auto *E : Edges) {
85*700637cbSDimitry Andric // `Edges` may be `OutEdges`, case in which `E` could be nullptr.
86*700637cbSDimitry Andric if (E) {
87*700637cbSDimitry Andric if (!Sum.has_value())
88*700637cbSDimitry Andric Sum = 0;
89*700637cbSDimitry Andric *Sum += (AssumeAllKnown ? *E->Count : E->Count.value_or(0U));
90*700637cbSDimitry Andric }
91*700637cbSDimitry Andric }
92*700637cbSDimitry Andric return Sum;
93*700637cbSDimitry Andric }
94*700637cbSDimitry Andric
computeCountFrom(const SmallVector<EdgeInfo * > & Edges)95*700637cbSDimitry Andric bool computeCountFrom(const SmallVector<EdgeInfo *> &Edges) {
96*700637cbSDimitry Andric assert(!Count.has_value());
97*700637cbSDimitry Andric Count = getEdgeSum(Edges, true);
98*700637cbSDimitry Andric return Count.has_value();
99*700637cbSDimitry Andric }
100*700637cbSDimitry Andric
setSingleUnknownEdgeCount(SmallVector<EdgeInfo * > & Edges)101*700637cbSDimitry Andric void setSingleUnknownEdgeCount(SmallVector<EdgeInfo *> &Edges) {
102*700637cbSDimitry Andric uint64_t KnownSum = getEdgeSum(Edges, false).value_or(0U);
103*700637cbSDimitry Andric uint64_t EdgeVal = *Count > KnownSum ? *Count - KnownSum : 0U;
104*700637cbSDimitry Andric EdgeInfo *E = nullptr;
105*700637cbSDimitry Andric for (auto *I : Edges)
106*700637cbSDimitry Andric if (I && !I->Count.has_value()) {
107*700637cbSDimitry Andric E = I;
108*700637cbSDimitry Andric #ifdef NDEBUG
109*700637cbSDimitry Andric break;
110*700637cbSDimitry Andric #else
111*700637cbSDimitry Andric assert((!E || E == I) &&
112*700637cbSDimitry Andric "Expected exactly one edge to have an unknown count, "
113*700637cbSDimitry Andric "found a second one");
114*700637cbSDimitry Andric continue;
115*700637cbSDimitry Andric #endif
116*700637cbSDimitry Andric }
117*700637cbSDimitry Andric assert(E && "Expected exactly one edge to have an unknown count");
118*700637cbSDimitry Andric assert(!E->Count.has_value());
119*700637cbSDimitry Andric E->Count = EdgeVal;
120*700637cbSDimitry Andric assert(E->Src->UnknownCountOutEdges > 0);
121*700637cbSDimitry Andric assert(E->Dest->UnknownCountInEdges > 0);
122*700637cbSDimitry Andric --E->Src->UnknownCountOutEdges;
123*700637cbSDimitry Andric --E->Dest->UnknownCountInEdges;
124*700637cbSDimitry Andric }
125*700637cbSDimitry Andric
126*700637cbSDimitry Andric public:
BBInfo(size_t NumInEdges,size_t NumOutEdges,std::optional<uint64_t> Count)127*700637cbSDimitry Andric BBInfo(size_t NumInEdges, size_t NumOutEdges, std::optional<uint64_t> Count)
128*700637cbSDimitry Andric : Count(Count) {
129*700637cbSDimitry Andric // For in edges, we just want to pre-allocate enough space, since we know
130*700637cbSDimitry Andric // it at this stage. For out edges, we will insert edges at the indices
131*700637cbSDimitry Andric // corresponding to positions in this BB's terminator instruction, so we
132*700637cbSDimitry Andric // construct a default (nullptr values)-initialized vector. A nullptr edge
133*700637cbSDimitry Andric // corresponds to those that are excluded (see shouldExcludeEdge).
134*700637cbSDimitry Andric InEdges.reserve(NumInEdges);
135*700637cbSDimitry Andric OutEdges.resize(NumOutEdges);
136*700637cbSDimitry Andric }
137*700637cbSDimitry Andric
tryTakeCountFromKnownOutEdges(const BasicBlock & BB)138*700637cbSDimitry Andric bool tryTakeCountFromKnownOutEdges(const BasicBlock &BB) {
139*700637cbSDimitry Andric if (!UnknownCountOutEdges) {
140*700637cbSDimitry Andric return computeCountFrom(OutEdges);
141*700637cbSDimitry Andric }
142*700637cbSDimitry Andric return false;
143*700637cbSDimitry Andric }
144*700637cbSDimitry Andric
tryTakeCountFromKnownInEdges(const BasicBlock & BB)145*700637cbSDimitry Andric bool tryTakeCountFromKnownInEdges(const BasicBlock &BB) {
146*700637cbSDimitry Andric if (!UnknownCountInEdges) {
147*700637cbSDimitry Andric return computeCountFrom(InEdges);
148*700637cbSDimitry Andric }
149*700637cbSDimitry Andric return false;
150*700637cbSDimitry Andric }
151*700637cbSDimitry Andric
addInEdge(EdgeInfo & Info)152*700637cbSDimitry Andric void addInEdge(EdgeInfo &Info) {
153*700637cbSDimitry Andric InEdges.push_back(&Info);
154*700637cbSDimitry Andric ++UnknownCountInEdges;
155*700637cbSDimitry Andric }
156*700637cbSDimitry Andric
157*700637cbSDimitry Andric // For the out edges, we care about the position we place them in, which is
158*700637cbSDimitry Andric // the position in terminator instruction's list (at construction). Later,
159*700637cbSDimitry Andric // we build branch_weights metadata with edge frequency values matching
160*700637cbSDimitry Andric // these positions.
addOutEdge(size_t Index,EdgeInfo & Info)161*700637cbSDimitry Andric void addOutEdge(size_t Index, EdgeInfo &Info) {
162*700637cbSDimitry Andric OutEdges[Index] = &Info;
163*700637cbSDimitry Andric ++UnknownCountOutEdges;
164*700637cbSDimitry Andric }
165*700637cbSDimitry Andric
hasCount() const166*700637cbSDimitry Andric bool hasCount() const { return Count.has_value(); }
167*700637cbSDimitry Andric
getCount() const168*700637cbSDimitry Andric uint64_t getCount() const { return *Count; }
169*700637cbSDimitry Andric
trySetSingleUnknownInEdgeCount()170*700637cbSDimitry Andric bool trySetSingleUnknownInEdgeCount() {
171*700637cbSDimitry Andric if (UnknownCountInEdges == 1) {
172*700637cbSDimitry Andric setSingleUnknownEdgeCount(InEdges);
173*700637cbSDimitry Andric return true;
174*700637cbSDimitry Andric }
175*700637cbSDimitry Andric return false;
176*700637cbSDimitry Andric }
177*700637cbSDimitry Andric
trySetSingleUnknownOutEdgeCount()178*700637cbSDimitry Andric bool trySetSingleUnknownOutEdgeCount() {
179*700637cbSDimitry Andric if (UnknownCountOutEdges == 1) {
180*700637cbSDimitry Andric setSingleUnknownEdgeCount(OutEdges);
181*700637cbSDimitry Andric return true;
182*700637cbSDimitry Andric }
183*700637cbSDimitry Andric return false;
184*700637cbSDimitry Andric }
getNumOutEdges() const185*700637cbSDimitry Andric size_t getNumOutEdges() const { return OutEdges.size(); }
186*700637cbSDimitry Andric
getEdgeCount(size_t Index) const187*700637cbSDimitry Andric uint64_t getEdgeCount(size_t Index) const {
188*700637cbSDimitry Andric if (auto *E = OutEdges[Index])
189*700637cbSDimitry Andric return *E->Count;
190*700637cbSDimitry Andric return 0U;
191*700637cbSDimitry Andric }
192*700637cbSDimitry Andric };
193*700637cbSDimitry Andric
194*700637cbSDimitry Andric const Function &F;
195*700637cbSDimitry Andric ArrayRef<uint64_t> Counters;
196*700637cbSDimitry Andric // To be accessed through getBBInfo() after construction.
197*700637cbSDimitry Andric std::map<const BasicBlock *, BBInfo> BBInfos;
198*700637cbSDimitry Andric std::vector<EdgeInfo> EdgeInfos;
199*700637cbSDimitry Andric
200*700637cbSDimitry Andric // The only criteria for exclusion is faux suspend -> exit edges in presplit
201*700637cbSDimitry Andric // coroutines. The API serves for readability, currently.
shouldExcludeEdge(const BasicBlock & Src,const BasicBlock & Dest) const202*700637cbSDimitry Andric bool shouldExcludeEdge(const BasicBlock &Src, const BasicBlock &Dest) const {
203*700637cbSDimitry Andric return llvm::isPresplitCoroSuspendExitEdge(Src, Dest);
204*700637cbSDimitry Andric }
205*700637cbSDimitry Andric
getBBInfo(const BasicBlock & BB)206*700637cbSDimitry Andric BBInfo &getBBInfo(const BasicBlock &BB) { return BBInfos.find(&BB)->second; }
207*700637cbSDimitry Andric
getBBInfo(const BasicBlock & BB) const208*700637cbSDimitry Andric const BBInfo &getBBInfo(const BasicBlock &BB) const {
209*700637cbSDimitry Andric return BBInfos.find(&BB)->second;
210*700637cbSDimitry Andric }
211*700637cbSDimitry Andric
212*700637cbSDimitry Andric // validation function after we propagate the counters: all BBs and edges'
213*700637cbSDimitry Andric // counters must have a value.
allCountersAreAssigned() const214*700637cbSDimitry Andric bool allCountersAreAssigned() const {
215*700637cbSDimitry Andric for (const auto &BBInfo : BBInfos)
216*700637cbSDimitry Andric if (!BBInfo.second.hasCount())
217*700637cbSDimitry Andric return false;
218*700637cbSDimitry Andric for (const auto &EdgeInfo : EdgeInfos)
219*700637cbSDimitry Andric if (!EdgeInfo.Count.has_value())
220*700637cbSDimitry Andric return false;
221*700637cbSDimitry Andric return true;
222*700637cbSDimitry Andric }
223*700637cbSDimitry Andric
224*700637cbSDimitry Andric /// Check that all paths from the entry basic block that use edges with
225*700637cbSDimitry Andric /// non-zero counts arrive at a basic block with no successors (i.e. "exit")
allTakenPathsExit() const226*700637cbSDimitry Andric bool allTakenPathsExit() const {
227*700637cbSDimitry Andric std::deque<const BasicBlock *> Worklist;
228*700637cbSDimitry Andric DenseSet<const BasicBlock *> Visited;
229*700637cbSDimitry Andric Worklist.push_back(&F.getEntryBlock());
230*700637cbSDimitry Andric bool HitExit = false;
231*700637cbSDimitry Andric while (!Worklist.empty()) {
232*700637cbSDimitry Andric const auto *BB = Worklist.front();
233*700637cbSDimitry Andric Worklist.pop_front();
234*700637cbSDimitry Andric if (!Visited.insert(BB).second)
235*700637cbSDimitry Andric continue;
236*700637cbSDimitry Andric if (succ_size(BB) == 0) {
237*700637cbSDimitry Andric if (isa<UnreachableInst>(BB->getTerminator()))
238*700637cbSDimitry Andric return false;
239*700637cbSDimitry Andric HitExit = true;
240*700637cbSDimitry Andric continue;
241*700637cbSDimitry Andric }
242*700637cbSDimitry Andric if (succ_size(BB) == 1) {
243*700637cbSDimitry Andric Worklist.push_back(BB->getUniqueSuccessor());
244*700637cbSDimitry Andric continue;
245*700637cbSDimitry Andric }
246*700637cbSDimitry Andric const auto &BBInfo = getBBInfo(*BB);
247*700637cbSDimitry Andric bool HasAWayOut = false;
248*700637cbSDimitry Andric for (auto I = 0U; I < BB->getTerminator()->getNumSuccessors(); ++I) {
249*700637cbSDimitry Andric const auto *Succ = BB->getTerminator()->getSuccessor(I);
250*700637cbSDimitry Andric if (!shouldExcludeEdge(*BB, *Succ)) {
251*700637cbSDimitry Andric if (BBInfo.getEdgeCount(I) > 0) {
252*700637cbSDimitry Andric HasAWayOut = true;
253*700637cbSDimitry Andric Worklist.push_back(Succ);
254*700637cbSDimitry Andric }
255*700637cbSDimitry Andric }
256*700637cbSDimitry Andric }
257*700637cbSDimitry Andric if (!HasAWayOut)
258*700637cbSDimitry Andric return false;
259*700637cbSDimitry Andric }
260*700637cbSDimitry Andric return HitExit;
261*700637cbSDimitry Andric }
262*700637cbSDimitry Andric
allNonColdSelectsHaveProfile() const263*700637cbSDimitry Andric bool allNonColdSelectsHaveProfile() const {
264*700637cbSDimitry Andric for (const auto &BB : F) {
265*700637cbSDimitry Andric if (getBBInfo(BB).getCount() > 0) {
266*700637cbSDimitry Andric for (const auto &I : BB) {
267*700637cbSDimitry Andric if (const auto *SI = dyn_cast<SelectInst>(&I)) {
268*700637cbSDimitry Andric if (const auto *Inst = CtxProfAnalysis::getSelectInstrumentation(
269*700637cbSDimitry Andric *const_cast<SelectInst *>(SI))) {
270*700637cbSDimitry Andric auto Index = Inst->getIndex()->getZExtValue();
271*700637cbSDimitry Andric assert(Index < Counters.size());
272*700637cbSDimitry Andric if (Counters[Index] == 0)
273*700637cbSDimitry Andric return false;
274*700637cbSDimitry Andric }
275*700637cbSDimitry Andric }
276*700637cbSDimitry Andric }
277*700637cbSDimitry Andric }
278*700637cbSDimitry Andric }
279*700637cbSDimitry Andric return true;
280*700637cbSDimitry Andric }
281*700637cbSDimitry Andric
282*700637cbSDimitry Andric // This is an adaptation of PGOUseFunc::populateCounters.
283*700637cbSDimitry Andric // FIXME(mtrofin): look into factoring the code to share one implementation.
propagateCounterValues()284*700637cbSDimitry Andric void propagateCounterValues() {
285*700637cbSDimitry Andric bool KeepGoing = true;
286*700637cbSDimitry Andric while (KeepGoing) {
287*700637cbSDimitry Andric KeepGoing = false;
288*700637cbSDimitry Andric for (const auto &BB : F) {
289*700637cbSDimitry Andric auto &Info = getBBInfo(BB);
290*700637cbSDimitry Andric if (!Info.hasCount())
291*700637cbSDimitry Andric KeepGoing |= Info.tryTakeCountFromKnownOutEdges(BB) ||
292*700637cbSDimitry Andric Info.tryTakeCountFromKnownInEdges(BB);
293*700637cbSDimitry Andric if (Info.hasCount()) {
294*700637cbSDimitry Andric KeepGoing |= Info.trySetSingleUnknownOutEdgeCount();
295*700637cbSDimitry Andric KeepGoing |= Info.trySetSingleUnknownInEdgeCount();
296*700637cbSDimitry Andric }
297*700637cbSDimitry Andric }
298*700637cbSDimitry Andric }
299*700637cbSDimitry Andric assert(allCountersAreAssigned() &&
300*700637cbSDimitry Andric "[ctx-prof] Expected all counters have been assigned.");
301*700637cbSDimitry Andric assert(allTakenPathsExit() &&
302*700637cbSDimitry Andric "[ctx-prof] Encountered a BB with more than one successor, where "
303*700637cbSDimitry Andric "all outgoing edges have a 0 count. This occurs in non-exiting "
304*700637cbSDimitry Andric "functions (message pumps, usually) which are not supported in the "
305*700637cbSDimitry Andric "contextual profiling case");
306*700637cbSDimitry Andric assert(allNonColdSelectsHaveProfile() &&
307*700637cbSDimitry Andric "[ctx-prof] All non-cold select instructions were expected to have "
308*700637cbSDimitry Andric "a profile.");
309*700637cbSDimitry Andric }
310*700637cbSDimitry Andric
311*700637cbSDimitry Andric public:
ProfileAnnotatorImpl(const Function & F,ArrayRef<uint64_t> Counters)312*700637cbSDimitry Andric ProfileAnnotatorImpl(const Function &F, ArrayRef<uint64_t> Counters)
313*700637cbSDimitry Andric : F(F), Counters(Counters) {
314*700637cbSDimitry Andric assert(!F.isDeclaration());
315*700637cbSDimitry Andric assert(!Counters.empty());
316*700637cbSDimitry Andric size_t NrEdges = 0;
317*700637cbSDimitry Andric for (const auto &BB : F) {
318*700637cbSDimitry Andric std::optional<uint64_t> Count;
319*700637cbSDimitry Andric if (auto *Ins = CtxProfAnalysis::getBBInstrumentation(
320*700637cbSDimitry Andric const_cast<BasicBlock &>(BB))) {
321*700637cbSDimitry Andric auto Index = Ins->getIndex()->getZExtValue();
322*700637cbSDimitry Andric assert(Index < Counters.size() &&
323*700637cbSDimitry Andric "The index must be inside the counters vector by construction - "
324*700637cbSDimitry Andric "tripping this assertion indicates a bug in how the contextual "
325*700637cbSDimitry Andric "profile is managed by IPO transforms");
326*700637cbSDimitry Andric (void)Index;
327*700637cbSDimitry Andric Count = Counters[Ins->getIndex()->getZExtValue()];
328*700637cbSDimitry Andric } else if (isa<UnreachableInst>(BB.getTerminator())) {
329*700637cbSDimitry Andric // The program presumably didn't crash.
330*700637cbSDimitry Andric Count = 0;
331*700637cbSDimitry Andric }
332*700637cbSDimitry Andric auto [It, Ins] =
333*700637cbSDimitry Andric BBInfos.insert({&BB, {pred_size(&BB), succ_size(&BB), Count}});
334*700637cbSDimitry Andric (void)Ins;
335*700637cbSDimitry Andric assert(Ins && "We iterate through the function's BBs, no reason to "
336*700637cbSDimitry Andric "insert one more than once");
337*700637cbSDimitry Andric NrEdges += llvm::count_if(successors(&BB), [&](const auto *Succ) {
338*700637cbSDimitry Andric return !shouldExcludeEdge(BB, *Succ);
339*700637cbSDimitry Andric });
340*700637cbSDimitry Andric }
341*700637cbSDimitry Andric // Pre-allocate the vector, we want references to its contents to be stable.
342*700637cbSDimitry Andric EdgeInfos.reserve(NrEdges);
343*700637cbSDimitry Andric for (const auto &BB : F) {
344*700637cbSDimitry Andric auto &Info = getBBInfo(BB);
345*700637cbSDimitry Andric for (auto I = 0U; I < BB.getTerminator()->getNumSuccessors(); ++I) {
346*700637cbSDimitry Andric const auto *Succ = BB.getTerminator()->getSuccessor(I);
347*700637cbSDimitry Andric if (!shouldExcludeEdge(BB, *Succ)) {
348*700637cbSDimitry Andric auto &EI = EdgeInfos.emplace_back(getBBInfo(BB), getBBInfo(*Succ));
349*700637cbSDimitry Andric Info.addOutEdge(I, EI);
350*700637cbSDimitry Andric getBBInfo(*Succ).addInEdge(EI);
351*700637cbSDimitry Andric }
352*700637cbSDimitry Andric }
353*700637cbSDimitry Andric }
354*700637cbSDimitry Andric assert(EdgeInfos.capacity() == NrEdges &&
355*700637cbSDimitry Andric "The capacity of EdgeInfos should have stayed unchanged it was "
356*700637cbSDimitry Andric "populated, because we need pointers to its contents to be stable");
357*700637cbSDimitry Andric propagateCounterValues();
358*700637cbSDimitry Andric }
359*700637cbSDimitry Andric
getBBCount(const BasicBlock & BB)360*700637cbSDimitry Andric uint64_t getBBCount(const BasicBlock &BB) { return getBBInfo(BB).getCount(); }
361*700637cbSDimitry Andric };
362*700637cbSDimitry Andric
363*700637cbSDimitry Andric } // namespace llvm
364*700637cbSDimitry Andric
ProfileAnnotator(const Function & F,ArrayRef<uint64_t> RawCounters)365*700637cbSDimitry Andric ProfileAnnotator::ProfileAnnotator(const Function &F,
366*700637cbSDimitry Andric ArrayRef<uint64_t> RawCounters)
367*700637cbSDimitry Andric : PImpl(std::make_unique<ProfileAnnotatorImpl>(F, RawCounters)) {}
368*700637cbSDimitry Andric
369*700637cbSDimitry Andric ProfileAnnotator::~ProfileAnnotator() = default;
370*700637cbSDimitry Andric
getBBCount(const BasicBlock & BB) const371*700637cbSDimitry Andric uint64_t ProfileAnnotator::getBBCount(const BasicBlock &BB) const {
372*700637cbSDimitry Andric return PImpl->getBBCount(BB);
373*700637cbSDimitry Andric }
374*700637cbSDimitry Andric
getSelectInstrProfile(SelectInst & SI,uint64_t & TrueCount,uint64_t & FalseCount) const375*700637cbSDimitry Andric bool ProfileAnnotator::getSelectInstrProfile(SelectInst &SI,
376*700637cbSDimitry Andric uint64_t &TrueCount,
377*700637cbSDimitry Andric uint64_t &FalseCount) const {
378*700637cbSDimitry Andric const auto &BBInfo = PImpl->getBBInfo(*SI.getParent());
379*700637cbSDimitry Andric TrueCount = FalseCount = 0;
380*700637cbSDimitry Andric if (BBInfo.getCount() == 0)
381*700637cbSDimitry Andric return false;
382*700637cbSDimitry Andric
383*700637cbSDimitry Andric auto *Step = CtxProfAnalysis::getSelectInstrumentation(SI);
384*700637cbSDimitry Andric if (!Step)
385*700637cbSDimitry Andric return false;
386*700637cbSDimitry Andric auto Index = Step->getIndex()->getZExtValue();
387*700637cbSDimitry Andric assert(Index < PImpl->Counters.size() &&
388*700637cbSDimitry Andric "The index of the step instruction must be inside the "
389*700637cbSDimitry Andric "counters vector by "
390*700637cbSDimitry Andric "construction - tripping this assertion indicates a bug in "
391*700637cbSDimitry Andric "how the contextual profile is managed by IPO transforms");
392*700637cbSDimitry Andric auto TotalCount = BBInfo.getCount();
393*700637cbSDimitry Andric TrueCount = PImpl->Counters[Index];
394*700637cbSDimitry Andric FalseCount = (TotalCount > TrueCount ? TotalCount - TrueCount : 0U);
395*700637cbSDimitry Andric return true;
396*700637cbSDimitry Andric }
397*700637cbSDimitry Andric
getOutgoingBranchWeights(BasicBlock & BB,SmallVectorImpl<uint64_t> & Profile,uint64_t & MaxCount) const398*700637cbSDimitry Andric bool ProfileAnnotator::getOutgoingBranchWeights(
399*700637cbSDimitry Andric BasicBlock &BB, SmallVectorImpl<uint64_t> &Profile,
400*700637cbSDimitry Andric uint64_t &MaxCount) const {
401*700637cbSDimitry Andric Profile.clear();
402*700637cbSDimitry Andric
403*700637cbSDimitry Andric if (succ_size(&BB) < 2)
404*700637cbSDimitry Andric return false;
405*700637cbSDimitry Andric
406*700637cbSDimitry Andric auto *Term = BB.getTerminator();
407*700637cbSDimitry Andric Profile.resize(Term->getNumSuccessors());
408*700637cbSDimitry Andric
409*700637cbSDimitry Andric const auto &BBInfo = PImpl->getBBInfo(BB);
410*700637cbSDimitry Andric MaxCount = 0;
411*700637cbSDimitry Andric for (unsigned SuccIdx = 0, Size = BBInfo.getNumOutEdges(); SuccIdx < Size;
412*700637cbSDimitry Andric ++SuccIdx) {
413*700637cbSDimitry Andric uint64_t EdgeCount = BBInfo.getEdgeCount(SuccIdx);
414*700637cbSDimitry Andric if (EdgeCount > MaxCount)
415*700637cbSDimitry Andric MaxCount = EdgeCount;
416*700637cbSDimitry Andric Profile[SuccIdx] = EdgeCount;
417*700637cbSDimitry Andric }
418*700637cbSDimitry Andric return MaxCount > 0;
419*700637cbSDimitry Andric }
420*700637cbSDimitry Andric
run(Module & M,ModuleAnalysisManager & MAM)421*700637cbSDimitry Andric PreservedAnalyses AssignGUIDPass::run(Module &M, ModuleAnalysisManager &MAM) {
422*700637cbSDimitry Andric for (auto &F : M.functions()) {
423*700637cbSDimitry Andric if (F.isDeclaration())
424*700637cbSDimitry Andric continue;
425*700637cbSDimitry Andric if (F.getMetadata(GUIDMetadataName))
426*700637cbSDimitry Andric continue;
427*700637cbSDimitry Andric const GlobalValue::GUID GUID = F.getGUID();
428*700637cbSDimitry Andric F.setMetadata(GUIDMetadataName,
429*700637cbSDimitry Andric MDNode::get(M.getContext(),
430*700637cbSDimitry Andric {ConstantAsMetadata::get(ConstantInt::get(
431*700637cbSDimitry Andric Type::getInt64Ty(M.getContext()), GUID))}));
432*700637cbSDimitry Andric }
433*700637cbSDimitry Andric return PreservedAnalyses::none();
434*700637cbSDimitry Andric }
435*700637cbSDimitry Andric
getGUID(const Function & F)436*700637cbSDimitry Andric GlobalValue::GUID AssignGUIDPass::getGUID(const Function &F) {
437*700637cbSDimitry Andric if (F.isDeclaration()) {
438*700637cbSDimitry Andric assert(GlobalValue::isExternalLinkage(F.getLinkage()));
439*700637cbSDimitry Andric return F.getGUID();
440*700637cbSDimitry Andric }
441*700637cbSDimitry Andric auto *MD = F.getMetadata(GUIDMetadataName);
442*700637cbSDimitry Andric assert(MD && "guid not found for defined function");
443*700637cbSDimitry Andric return cast<ConstantInt>(cast<ConstantAsMetadata>(MD->getOperand(0))
444*700637cbSDimitry Andric ->getValue()
445*700637cbSDimitry Andric ->stripPointerCasts())
446*700637cbSDimitry Andric ->getZExtValue();
447*700637cbSDimitry Andric }
448*700637cbSDimitry Andric AnalysisKey CtxProfAnalysis::Key;
449*700637cbSDimitry Andric
CtxProfAnalysis(std::optional<StringRef> Profile)450*700637cbSDimitry Andric CtxProfAnalysis::CtxProfAnalysis(std::optional<StringRef> Profile)
451*700637cbSDimitry Andric : Profile([&]() -> std::optional<StringRef> {
452*700637cbSDimitry Andric if (Profile)
453*700637cbSDimitry Andric return *Profile;
454*700637cbSDimitry Andric if (UseCtxProfile.getNumOccurrences())
455*700637cbSDimitry Andric return UseCtxProfile;
456*700637cbSDimitry Andric return std::nullopt;
457*700637cbSDimitry Andric }()) {}
458*700637cbSDimitry Andric
run(Module & M,ModuleAnalysisManager & MAM)459*700637cbSDimitry Andric PGOContextualProfile CtxProfAnalysis::run(Module &M,
460*700637cbSDimitry Andric ModuleAnalysisManager &MAM) {
461*700637cbSDimitry Andric if (!Profile)
462*700637cbSDimitry Andric return {};
463*700637cbSDimitry Andric ErrorOr<std::unique_ptr<MemoryBuffer>> MB = MemoryBuffer::getFile(*Profile);
464*700637cbSDimitry Andric if (auto EC = MB.getError()) {
465*700637cbSDimitry Andric M.getContext().emitError("could not open contextual profile file: " +
466*700637cbSDimitry Andric EC.message());
467*700637cbSDimitry Andric return {};
468*700637cbSDimitry Andric }
469*700637cbSDimitry Andric PGOCtxProfileReader Reader(MB.get()->getBuffer());
470*700637cbSDimitry Andric auto MaybeProfiles = Reader.loadProfiles();
471*700637cbSDimitry Andric if (!MaybeProfiles) {
472*700637cbSDimitry Andric M.getContext().emitError("contextual profile file is invalid: " +
473*700637cbSDimitry Andric toString(MaybeProfiles.takeError()));
474*700637cbSDimitry Andric return {};
475*700637cbSDimitry Andric }
476*700637cbSDimitry Andric
477*700637cbSDimitry Andric // FIXME: We should drive this from ThinLTO, but for the time being, use the
478*700637cbSDimitry Andric // module name as indicator.
479*700637cbSDimitry Andric // We want to *only* keep the contextual profiles in modules that capture
480*700637cbSDimitry Andric // context trees. That allows us to compute specific PSIs, for example.
481*700637cbSDimitry Andric auto DetermineRootsInModule = [&M]() -> const DenseSet<GlobalValue::GUID> {
482*700637cbSDimitry Andric DenseSet<GlobalValue::GUID> ProfileRootsInModule;
483*700637cbSDimitry Andric auto ModName = M.getName();
484*700637cbSDimitry Andric auto Filename = sys::path::filename(ModName);
485*700637cbSDimitry Andric // Drop the file extension.
486*700637cbSDimitry Andric Filename = Filename.substr(0, Filename.find_last_of('.'));
487*700637cbSDimitry Andric // See if it parses
488*700637cbSDimitry Andric APInt Guid;
489*700637cbSDimitry Andric // getAsInteger returns true if there are more chars to read other than the
490*700637cbSDimitry Andric // integer. So the "false" test is what we want.
491*700637cbSDimitry Andric if (!Filename.getAsInteger(0, Guid))
492*700637cbSDimitry Andric ProfileRootsInModule.insert(Guid.getZExtValue());
493*700637cbSDimitry Andric return ProfileRootsInModule;
494*700637cbSDimitry Andric };
495*700637cbSDimitry Andric const auto ProfileRootsInModule = DetermineRootsInModule();
496*700637cbSDimitry Andric PGOContextualProfile Result;
497*700637cbSDimitry Andric
498*700637cbSDimitry Andric // the logic from here on allows for modules that contain - by design - more
499*700637cbSDimitry Andric // than one root. We currently don't support that, because the determination
500*700637cbSDimitry Andric // happens based on the module name matching the root guid, but the logic can
501*700637cbSDimitry Andric // avoid assuming that.
502*700637cbSDimitry Andric if (!ProfileRootsInModule.empty()) {
503*700637cbSDimitry Andric Result.IsInSpecializedModule = true;
504*700637cbSDimitry Andric // Trim first the roots that aren't in this module.
505*700637cbSDimitry Andric for (auto &[RootGuid, _] :
506*700637cbSDimitry Andric llvm::make_early_inc_range(MaybeProfiles->Contexts))
507*700637cbSDimitry Andric if (!ProfileRootsInModule.contains(RootGuid))
508*700637cbSDimitry Andric MaybeProfiles->Contexts.erase(RootGuid);
509*700637cbSDimitry Andric // we can also drop the flat profiles
510*700637cbSDimitry Andric MaybeProfiles->FlatProfiles.clear();
511*700637cbSDimitry Andric }
512*700637cbSDimitry Andric
513*700637cbSDimitry Andric for (const auto &F : M) {
514*700637cbSDimitry Andric if (F.isDeclaration())
515*700637cbSDimitry Andric continue;
516*700637cbSDimitry Andric auto GUID = AssignGUIDPass::getGUID(F);
517*700637cbSDimitry Andric assert(GUID && "guid not found for defined function");
518*700637cbSDimitry Andric const auto &Entry = F.begin();
519*700637cbSDimitry Andric uint32_t MaxCounters = 0; // we expect at least a counter.
520*700637cbSDimitry Andric for (const auto &I : *Entry)
521*700637cbSDimitry Andric if (auto *C = dyn_cast<InstrProfIncrementInst>(&I)) {
522*700637cbSDimitry Andric MaxCounters =
523*700637cbSDimitry Andric static_cast<uint32_t>(C->getNumCounters()->getZExtValue());
524*700637cbSDimitry Andric break;
525*700637cbSDimitry Andric }
526*700637cbSDimitry Andric if (!MaxCounters)
527*700637cbSDimitry Andric continue;
528*700637cbSDimitry Andric uint32_t MaxCallsites = 0;
529*700637cbSDimitry Andric for (const auto &BB : F)
530*700637cbSDimitry Andric for (const auto &I : BB)
531*700637cbSDimitry Andric if (auto *C = dyn_cast<InstrProfCallsite>(&I)) {
532*700637cbSDimitry Andric MaxCallsites =
533*700637cbSDimitry Andric static_cast<uint32_t>(C->getNumCounters()->getZExtValue());
534*700637cbSDimitry Andric break;
535*700637cbSDimitry Andric }
536*700637cbSDimitry Andric auto [It, Ins] = Result.FuncInfo.insert(
537*700637cbSDimitry Andric {GUID, PGOContextualProfile::FunctionInfo(F.getName())});
538*700637cbSDimitry Andric (void)Ins;
539*700637cbSDimitry Andric assert(Ins);
540*700637cbSDimitry Andric It->second.NextCallsiteIndex = MaxCallsites;
541*700637cbSDimitry Andric It->second.NextCounterIndex = MaxCounters;
542*700637cbSDimitry Andric }
543*700637cbSDimitry Andric // If we made it this far, the Result is valid - which we mark by setting
544*700637cbSDimitry Andric // .Profiles.
545*700637cbSDimitry Andric Result.Profiles = std::move(*MaybeProfiles);
546*700637cbSDimitry Andric Result.initIndex();
547*700637cbSDimitry Andric return Result;
548*700637cbSDimitry Andric }
549*700637cbSDimitry Andric
550*700637cbSDimitry Andric GlobalValue::GUID
getDefinedFunctionGUID(const Function & F) const551*700637cbSDimitry Andric PGOContextualProfile::getDefinedFunctionGUID(const Function &F) const {
552*700637cbSDimitry Andric if (auto It = FuncInfo.find(AssignGUIDPass::getGUID(F)); It != FuncInfo.end())
553*700637cbSDimitry Andric return It->first;
554*700637cbSDimitry Andric return 0;
555*700637cbSDimitry Andric }
556*700637cbSDimitry Andric
CtxProfAnalysisPrinterPass(raw_ostream & OS)557*700637cbSDimitry Andric CtxProfAnalysisPrinterPass::CtxProfAnalysisPrinterPass(raw_ostream &OS)
558*700637cbSDimitry Andric : OS(OS), Mode(PrintLevel) {}
559*700637cbSDimitry Andric
run(Module & M,ModuleAnalysisManager & MAM)560*700637cbSDimitry Andric PreservedAnalyses CtxProfAnalysisPrinterPass::run(Module &M,
561*700637cbSDimitry Andric ModuleAnalysisManager &MAM) {
562*700637cbSDimitry Andric CtxProfAnalysis::Result &C = MAM.getResult<CtxProfAnalysis>(M);
563*700637cbSDimitry Andric if (C.contexts().empty()) {
564*700637cbSDimitry Andric OS << "No contextual profile was provided.\n";
565*700637cbSDimitry Andric return PreservedAnalyses::all();
566*700637cbSDimitry Andric }
567*700637cbSDimitry Andric
568*700637cbSDimitry Andric if (Mode == PrintMode::Everything) {
569*700637cbSDimitry Andric OS << "Function Info:\n";
570*700637cbSDimitry Andric for (const auto &[Guid, FuncInfo] : C.FuncInfo)
571*700637cbSDimitry Andric OS << Guid << " : " << FuncInfo.Name
572*700637cbSDimitry Andric << ". MaxCounterID: " << FuncInfo.NextCounterIndex
573*700637cbSDimitry Andric << ". MaxCallsiteID: " << FuncInfo.NextCallsiteIndex << "\n";
574*700637cbSDimitry Andric }
575*700637cbSDimitry Andric
576*700637cbSDimitry Andric if (Mode == PrintMode::Everything)
577*700637cbSDimitry Andric OS << "\nCurrent Profile:\n";
578*700637cbSDimitry Andric convertCtxProfToYaml(OS, C.profiles());
579*700637cbSDimitry Andric OS << "\n";
580*700637cbSDimitry Andric if (Mode == PrintMode::YAML)
581*700637cbSDimitry Andric return PreservedAnalyses::all();
582*700637cbSDimitry Andric
583*700637cbSDimitry Andric OS << "\nFlat Profile:\n";
584*700637cbSDimitry Andric auto Flat = C.flatten();
585*700637cbSDimitry Andric for (const auto &[Guid, Counters] : Flat) {
586*700637cbSDimitry Andric OS << Guid << " : ";
587*700637cbSDimitry Andric for (auto V : Counters)
588*700637cbSDimitry Andric OS << V << " ";
589*700637cbSDimitry Andric OS << "\n";
590*700637cbSDimitry Andric }
591*700637cbSDimitry Andric return PreservedAnalyses::all();
592*700637cbSDimitry Andric }
593*700637cbSDimitry Andric
getCallsiteInstrumentation(CallBase & CB)594*700637cbSDimitry Andric InstrProfCallsite *CtxProfAnalysis::getCallsiteInstrumentation(CallBase &CB) {
595*700637cbSDimitry Andric if (!InstrProfCallsite::canInstrumentCallsite(CB))
596*700637cbSDimitry Andric return nullptr;
597*700637cbSDimitry Andric for (auto *Prev = CB.getPrevNode(); Prev; Prev = Prev->getPrevNode()) {
598*700637cbSDimitry Andric if (auto *IPC = dyn_cast<InstrProfCallsite>(Prev))
599*700637cbSDimitry Andric return IPC;
600*700637cbSDimitry Andric assert(!isa<CallBase>(Prev) &&
601*700637cbSDimitry Andric "didn't expect to find another call, that's not the callsite "
602*700637cbSDimitry Andric "instrumentation, before an instrumentable callsite");
603*700637cbSDimitry Andric }
604*700637cbSDimitry Andric return nullptr;
605*700637cbSDimitry Andric }
606*700637cbSDimitry Andric
getBBInstrumentation(BasicBlock & BB)607*700637cbSDimitry Andric InstrProfIncrementInst *CtxProfAnalysis::getBBInstrumentation(BasicBlock &BB) {
608*700637cbSDimitry Andric for (auto &I : BB)
609*700637cbSDimitry Andric if (auto *Incr = dyn_cast<InstrProfIncrementInst>(&I))
610*700637cbSDimitry Andric if (!isa<InstrProfIncrementInstStep>(&I))
611*700637cbSDimitry Andric return Incr;
612*700637cbSDimitry Andric return nullptr;
613*700637cbSDimitry Andric }
614*700637cbSDimitry Andric
615*700637cbSDimitry Andric InstrProfIncrementInstStep *
getSelectInstrumentation(SelectInst & SI)616*700637cbSDimitry Andric CtxProfAnalysis::getSelectInstrumentation(SelectInst &SI) {
617*700637cbSDimitry Andric Instruction *Prev = &SI;
618*700637cbSDimitry Andric while ((Prev = Prev->getPrevNode()))
619*700637cbSDimitry Andric if (auto *Step = dyn_cast<InstrProfIncrementInstStep>(Prev))
620*700637cbSDimitry Andric return Step;
621*700637cbSDimitry Andric return nullptr;
622*700637cbSDimitry Andric }
623*700637cbSDimitry Andric
624*700637cbSDimitry Andric template <class ProfTy>
preorderVisitOneRoot(ProfTy & Profile,function_ref<void (ProfTy &)> Visitor)625*700637cbSDimitry Andric static void preorderVisitOneRoot(ProfTy &Profile,
626*700637cbSDimitry Andric function_ref<void(ProfTy &)> Visitor) {
627*700637cbSDimitry Andric std::function<void(ProfTy &)> Traverser = [&](auto &Ctx) {
628*700637cbSDimitry Andric Visitor(Ctx);
629*700637cbSDimitry Andric for (auto &[_, SubCtxSet] : Ctx.callsites())
630*700637cbSDimitry Andric for (auto &[__, Subctx] : SubCtxSet)
631*700637cbSDimitry Andric Traverser(Subctx);
632*700637cbSDimitry Andric };
633*700637cbSDimitry Andric Traverser(Profile);
634*700637cbSDimitry Andric }
635*700637cbSDimitry Andric
636*700637cbSDimitry Andric template <class ProfilesTy, class ProfTy>
preorderVisit(ProfilesTy & Profiles,function_ref<void (ProfTy &)> Visitor)637*700637cbSDimitry Andric static void preorderVisit(ProfilesTy &Profiles,
638*700637cbSDimitry Andric function_ref<void(ProfTy &)> Visitor) {
639*700637cbSDimitry Andric for (auto &[_, P] : Profiles)
640*700637cbSDimitry Andric preorderVisitOneRoot<ProfTy>(P, Visitor);
641*700637cbSDimitry Andric }
642*700637cbSDimitry Andric
initIndex()643*700637cbSDimitry Andric void PGOContextualProfile::initIndex() {
644*700637cbSDimitry Andric // Initialize the head of the index list for each function. We don't need it
645*700637cbSDimitry Andric // after this point.
646*700637cbSDimitry Andric DenseMap<GlobalValue::GUID, PGOCtxProfContext *> InsertionPoints;
647*700637cbSDimitry Andric for (auto &[Guid, FI] : FuncInfo)
648*700637cbSDimitry Andric InsertionPoints[Guid] = &FI.Index;
649*700637cbSDimitry Andric preorderVisit<PGOCtxProfContext::CallTargetMapTy, PGOCtxProfContext>(
650*700637cbSDimitry Andric Profiles.Contexts, [&](PGOCtxProfContext &Ctx) {
651*700637cbSDimitry Andric auto InsertIt = InsertionPoints.find(Ctx.guid());
652*700637cbSDimitry Andric if (InsertIt == InsertionPoints.end())
653*700637cbSDimitry Andric return;
654*700637cbSDimitry Andric // Insert at the end of the list. Since we traverse in preorder, it
655*700637cbSDimitry Andric // means that when we iterate the list from the beginning, we'd
656*700637cbSDimitry Andric // encounter the contexts in the order we would have, should we have
657*700637cbSDimitry Andric // performed a full preorder traversal.
658*700637cbSDimitry Andric InsertIt->second->Next = &Ctx;
659*700637cbSDimitry Andric Ctx.Previous = InsertIt->second;
660*700637cbSDimitry Andric InsertIt->second = &Ctx;
661*700637cbSDimitry Andric });
662*700637cbSDimitry Andric }
663*700637cbSDimitry Andric
isInSpecializedModule() const664*700637cbSDimitry Andric bool PGOContextualProfile::isInSpecializedModule() const {
665*700637cbSDimitry Andric return ForceIsInSpecializedModule.getNumOccurrences() > 0
666*700637cbSDimitry Andric ? ForceIsInSpecializedModule
667*700637cbSDimitry Andric : IsInSpecializedModule;
668*700637cbSDimitry Andric }
669*700637cbSDimitry Andric
update(Visitor V,const Function & F)670*700637cbSDimitry Andric void PGOContextualProfile::update(Visitor V, const Function &F) {
671*700637cbSDimitry Andric assert(isFunctionKnown(F));
672*700637cbSDimitry Andric GlobalValue::GUID G = getDefinedFunctionGUID(F);
673*700637cbSDimitry Andric for (auto *Node = FuncInfo.find(G)->second.Index.Next; Node;
674*700637cbSDimitry Andric Node = Node->Next)
675*700637cbSDimitry Andric V(*reinterpret_cast<PGOCtxProfContext *>(Node));
676*700637cbSDimitry Andric }
677*700637cbSDimitry Andric
visit(ConstVisitor V,const Function * F) const678*700637cbSDimitry Andric void PGOContextualProfile::visit(ConstVisitor V, const Function *F) const {
679*700637cbSDimitry Andric if (!F)
680*700637cbSDimitry Andric return preorderVisit<const PGOCtxProfContext::CallTargetMapTy,
681*700637cbSDimitry Andric const PGOCtxProfContext>(Profiles.Contexts, V);
682*700637cbSDimitry Andric assert(isFunctionKnown(*F));
683*700637cbSDimitry Andric GlobalValue::GUID G = getDefinedFunctionGUID(*F);
684*700637cbSDimitry Andric for (const auto *Node = FuncInfo.find(G)->second.Index.Next; Node;
685*700637cbSDimitry Andric Node = Node->Next)
686*700637cbSDimitry Andric V(*reinterpret_cast<const PGOCtxProfContext *>(Node));
687*700637cbSDimitry Andric }
688*700637cbSDimitry Andric
flatten() const689*700637cbSDimitry Andric const CtxProfFlatProfile PGOContextualProfile::flatten() const {
690*700637cbSDimitry Andric CtxProfFlatProfile Flat;
691*700637cbSDimitry Andric auto Accummulate = [](SmallVectorImpl<uint64_t> &Into,
692*700637cbSDimitry Andric const SmallVectorImpl<uint64_t> &From,
693*700637cbSDimitry Andric uint64_t SamplingRate) {
694*700637cbSDimitry Andric if (Into.empty())
695*700637cbSDimitry Andric Into.resize(From.size());
696*700637cbSDimitry Andric assert(Into.size() == From.size() &&
697*700637cbSDimitry Andric "All contexts corresponding to a function should have the exact "
698*700637cbSDimitry Andric "same number of counters.");
699*700637cbSDimitry Andric for (size_t I = 0, E = Into.size(); I < E; ++I)
700*700637cbSDimitry Andric Into[I] += From[I] * SamplingRate;
701*700637cbSDimitry Andric };
702*700637cbSDimitry Andric
703*700637cbSDimitry Andric for (const auto &[_, CtxRoot] : Profiles.Contexts) {
704*700637cbSDimitry Andric const uint64_t SamplingFactor = CtxRoot.getTotalRootEntryCount();
705*700637cbSDimitry Andric preorderVisitOneRoot<const PGOCtxProfContext>(
706*700637cbSDimitry Andric CtxRoot, [&](const PGOCtxProfContext &Ctx) {
707*700637cbSDimitry Andric Accummulate(Flat[Ctx.guid()], Ctx.counters(), SamplingFactor);
708*700637cbSDimitry Andric });
709*700637cbSDimitry Andric
710*700637cbSDimitry Andric for (const auto &[G, Unh] : CtxRoot.getUnhandled())
711*700637cbSDimitry Andric Accummulate(Flat[G], Unh, SamplingFactor);
712*700637cbSDimitry Andric }
713*700637cbSDimitry Andric // We don't sample "Flat" currently, so sampling rate is 1.
714*700637cbSDimitry Andric for (const auto &[G, FC] : Profiles.FlatProfiles)
715*700637cbSDimitry Andric Accummulate(Flat[G], FC, /*SamplingRate=*/1);
716*700637cbSDimitry Andric return Flat;
717*700637cbSDimitry Andric }
718*700637cbSDimitry Andric
719*700637cbSDimitry Andric const CtxProfFlatIndirectCallProfile
flattenVirtCalls() const720*700637cbSDimitry Andric PGOContextualProfile::flattenVirtCalls() const {
721*700637cbSDimitry Andric CtxProfFlatIndirectCallProfile Ret;
722*700637cbSDimitry Andric for (const auto &[_, CtxRoot] : Profiles.Contexts) {
723*700637cbSDimitry Andric const uint64_t TotalRootEntryCount = CtxRoot.getTotalRootEntryCount();
724*700637cbSDimitry Andric preorderVisitOneRoot<const PGOCtxProfContext>(
725*700637cbSDimitry Andric CtxRoot, [&](const PGOCtxProfContext &Ctx) {
726*700637cbSDimitry Andric auto &Targets = Ret[Ctx.guid()];
727*700637cbSDimitry Andric for (const auto &[ID, SubctxSet] : Ctx.callsites())
728*700637cbSDimitry Andric for (const auto &Subctx : SubctxSet)
729*700637cbSDimitry Andric Targets[ID][Subctx.first] +=
730*700637cbSDimitry Andric Subctx.second.getEntrycount() * TotalRootEntryCount;
731*700637cbSDimitry Andric });
732*700637cbSDimitry Andric }
733*700637cbSDimitry Andric return Ret;
734*700637cbSDimitry Andric }
735*700637cbSDimitry Andric
collectIndirectCallPromotionList(CallBase & IC,Result & Profile,SetVector<std::pair<CallBase *,Function * >> & Candidates)736*700637cbSDimitry Andric void CtxProfAnalysis::collectIndirectCallPromotionList(
737*700637cbSDimitry Andric CallBase &IC, Result &Profile,
738*700637cbSDimitry Andric SetVector<std::pair<CallBase *, Function *>> &Candidates) {
739*700637cbSDimitry Andric const auto *Instr = CtxProfAnalysis::getCallsiteInstrumentation(IC);
740*700637cbSDimitry Andric if (!Instr)
741*700637cbSDimitry Andric return;
742*700637cbSDimitry Andric Module &M = *IC.getParent()->getModule();
743*700637cbSDimitry Andric const uint32_t CallID = Instr->getIndex()->getZExtValue();
744*700637cbSDimitry Andric Profile.visit(
745*700637cbSDimitry Andric [&](const PGOCtxProfContext &Ctx) {
746*700637cbSDimitry Andric const auto &Targets = Ctx.callsites().find(CallID);
747*700637cbSDimitry Andric if (Targets == Ctx.callsites().end())
748*700637cbSDimitry Andric return;
749*700637cbSDimitry Andric for (const auto &[Guid, _] : Targets->second)
750*700637cbSDimitry Andric if (auto Name = Profile.getFunctionName(Guid); !Name.empty())
751*700637cbSDimitry Andric if (auto *Target = M.getFunction(Name))
752*700637cbSDimitry Andric if (Target->hasFnAttribute(Attribute::AlwaysInline))
753*700637cbSDimitry Andric Candidates.insert({&IC, Target});
754*700637cbSDimitry Andric },
755*700637cbSDimitry Andric IC.getCaller());
756*700637cbSDimitry Andric }
757