xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1*700637cbSDimitry Andric //===- PGOCtxProfFlattening.cpp - Contextual Instr. Flattening ------------===//
2*700637cbSDimitry Andric //
3*700637cbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*700637cbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*700637cbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*700637cbSDimitry Andric //
7*700637cbSDimitry Andric //===----------------------------------------------------------------------===//
8*700637cbSDimitry Andric //
9*700637cbSDimitry Andric // Flattens the contextual profile and lowers it to MD_prof.
10*700637cbSDimitry Andric // This should happen after all IPO (which is assumed to have maintained the
11*700637cbSDimitry Andric // contextual profile) happened. Flattening consists of summing the values at
12*700637cbSDimitry Andric // the same index of the counters belonging to all the contexts of a function.
13*700637cbSDimitry Andric // The lowering consists of materializing the counter values to function
14*700637cbSDimitry Andric // entrypoint counts and branch probabilities.
15*700637cbSDimitry Andric //
16*700637cbSDimitry Andric // This pass also removes contextual instrumentation, which has been kept around
17*700637cbSDimitry Andric // to facilitate its functionality.
18*700637cbSDimitry Andric //
19*700637cbSDimitry Andric //===----------------------------------------------------------------------===//
20*700637cbSDimitry Andric 
21*700637cbSDimitry Andric #include "llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h"
22*700637cbSDimitry Andric #include "llvm/ADT/STLExtras.h"
23*700637cbSDimitry Andric #include "llvm/ADT/ScopeExit.h"
24*700637cbSDimitry Andric #include "llvm/Analysis/CFG.h"
25*700637cbSDimitry Andric #include "llvm/Analysis/CtxProfAnalysis.h"
26*700637cbSDimitry Andric #include "llvm/Analysis/ProfileSummaryInfo.h"
27*700637cbSDimitry Andric #include "llvm/IR/Analysis.h"
28*700637cbSDimitry Andric #include "llvm/IR/CFG.h"
29*700637cbSDimitry Andric #include "llvm/IR/Dominators.h"
30*700637cbSDimitry Andric #include "llvm/IR/Instructions.h"
31*700637cbSDimitry Andric #include "llvm/IR/IntrinsicInst.h"
32*700637cbSDimitry Andric #include "llvm/IR/Module.h"
33*700637cbSDimitry Andric #include "llvm/IR/PassManager.h"
34*700637cbSDimitry Andric #include "llvm/IR/ProfileSummary.h"
35*700637cbSDimitry Andric #include "llvm/ProfileData/ProfileCommon.h"
36*700637cbSDimitry Andric #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
37*700637cbSDimitry Andric #include "llvm/Transforms/Scalar/DCE.h"
38*700637cbSDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
39*700637cbSDimitry Andric 
40*700637cbSDimitry Andric using namespace llvm;
41*700637cbSDimitry Andric 
42*700637cbSDimitry Andric #define DEBUG_TYPE "ctx_prof_flatten"
43*700637cbSDimitry Andric 
44*700637cbSDimitry Andric namespace {
45*700637cbSDimitry Andric 
46*700637cbSDimitry Andric /// Assign branch weights and function entry count. Also update the PSI
47*700637cbSDimitry Andric /// builder.
assignProfileData(Function & F,ArrayRef<uint64_t> RawCounters)48*700637cbSDimitry Andric void assignProfileData(Function &F, ArrayRef<uint64_t> RawCounters) {
49*700637cbSDimitry Andric   assert(!RawCounters.empty());
50*700637cbSDimitry Andric   ProfileAnnotator PA(F, RawCounters);
51*700637cbSDimitry Andric 
52*700637cbSDimitry Andric   F.setEntryCount(RawCounters[0]);
53*700637cbSDimitry Andric   SmallVector<uint64_t, 2> ProfileHolder;
54*700637cbSDimitry Andric 
55*700637cbSDimitry Andric   for (auto &BB : F) {
56*700637cbSDimitry Andric     for (auto &I : BB)
57*700637cbSDimitry Andric       if (auto *SI = dyn_cast<SelectInst>(&I)) {
58*700637cbSDimitry Andric         uint64_t TrueCount, FalseCount = 0;
59*700637cbSDimitry Andric         if (!PA.getSelectInstrProfile(*SI, TrueCount, FalseCount))
60*700637cbSDimitry Andric           continue;
61*700637cbSDimitry Andric         setProfMetadata(F.getParent(), SI, {TrueCount, FalseCount},
62*700637cbSDimitry Andric                         std::max(TrueCount, FalseCount));
63*700637cbSDimitry Andric       }
64*700637cbSDimitry Andric     if (succ_size(&BB) < 2)
65*700637cbSDimitry Andric       continue;
66*700637cbSDimitry Andric     uint64_t MaxCount = 0;
67*700637cbSDimitry Andric     if (!PA.getOutgoingBranchWeights(BB, ProfileHolder, MaxCount))
68*700637cbSDimitry Andric       continue;
69*700637cbSDimitry Andric     assert(MaxCount > 0);
70*700637cbSDimitry Andric     setProfMetadata(F.getParent(), BB.getTerminator(), ProfileHolder, MaxCount);
71*700637cbSDimitry Andric   }
72*700637cbSDimitry Andric }
73*700637cbSDimitry Andric 
areAllBBsReachable(const Function & F,FunctionAnalysisManager & FAM)74*700637cbSDimitry Andric [[maybe_unused]] bool areAllBBsReachable(const Function &F,
75*700637cbSDimitry Andric                                          FunctionAnalysisManager &FAM) {
76*700637cbSDimitry Andric   auto &DT = FAM.getResult<DominatorTreeAnalysis>(const_cast<Function &>(F));
77*700637cbSDimitry Andric   return llvm::all_of(
78*700637cbSDimitry Andric       F, [&](const BasicBlock &BB) { return DT.isReachableFromEntry(&BB); });
79*700637cbSDimitry Andric }
80*700637cbSDimitry Andric 
clearColdFunctionProfile(Function & F)81*700637cbSDimitry Andric void clearColdFunctionProfile(Function &F) {
82*700637cbSDimitry Andric   for (auto &BB : F)
83*700637cbSDimitry Andric     BB.getTerminator()->setMetadata(LLVMContext::MD_prof, nullptr);
84*700637cbSDimitry Andric   F.setEntryCount(0U);
85*700637cbSDimitry Andric }
86*700637cbSDimitry Andric 
removeInstrumentation(Function & F)87*700637cbSDimitry Andric void removeInstrumentation(Function &F) {
88*700637cbSDimitry Andric   for (auto &BB : F)
89*700637cbSDimitry Andric     for (auto &I : llvm::make_early_inc_range(BB))
90*700637cbSDimitry Andric       if (isa<InstrProfCntrInstBase>(I))
91*700637cbSDimitry Andric         I.eraseFromParent();
92*700637cbSDimitry Andric }
93*700637cbSDimitry Andric 
annotateIndirectCall(Module & M,CallBase & CB,const DenseMap<uint32_t,FlatIndirectTargets> & FlatProf,const InstrProfCallsite & Ins)94*700637cbSDimitry Andric void annotateIndirectCall(
95*700637cbSDimitry Andric     Module &M, CallBase &CB,
96*700637cbSDimitry Andric     const DenseMap<uint32_t, FlatIndirectTargets> &FlatProf,
97*700637cbSDimitry Andric     const InstrProfCallsite &Ins) {
98*700637cbSDimitry Andric   auto Idx = Ins.getIndex()->getZExtValue();
99*700637cbSDimitry Andric   auto FIt = FlatProf.find(Idx);
100*700637cbSDimitry Andric   if (FIt == FlatProf.end())
101*700637cbSDimitry Andric     return;
102*700637cbSDimitry Andric   const auto &Targets = FIt->second;
103*700637cbSDimitry Andric   SmallVector<InstrProfValueData, 2> Data;
104*700637cbSDimitry Andric   uint64_t Sum = 0;
105*700637cbSDimitry Andric   for (auto &[Guid, Count] : Targets) {
106*700637cbSDimitry Andric     Data.push_back({/*.Value=*/Guid, /*.Count=*/Count});
107*700637cbSDimitry Andric     Sum += Count;
108*700637cbSDimitry Andric   }
109*700637cbSDimitry Andric 
110*700637cbSDimitry Andric   llvm::sort(Data,
111*700637cbSDimitry Andric              [](const InstrProfValueData &A, const InstrProfValueData &B) {
112*700637cbSDimitry Andric                return A.Count > B.Count;
113*700637cbSDimitry Andric              });
114*700637cbSDimitry Andric   llvm::annotateValueSite(M, CB, Data, Sum,
115*700637cbSDimitry Andric                           InstrProfValueKind::IPVK_IndirectCallTarget,
116*700637cbSDimitry Andric                           Data.size());
117*700637cbSDimitry Andric   LLVM_DEBUG(dbgs() << "[ctxprof] flat indirect call prof: " << CB
118*700637cbSDimitry Andric                     << CB.getMetadata(LLVMContext::MD_prof) << "\n");
119*700637cbSDimitry Andric }
120*700637cbSDimitry Andric 
121*700637cbSDimitry Andric // We normally return a "Changed" bool, but the calling pass' run assumes
122*700637cbSDimitry Andric // something will change - some profile will be added - so this won't add much
123*700637cbSDimitry Andric // by returning false when applicable.
annotateIndirectCalls(Module & M,const CtxProfAnalysis::Result & CtxProf)124*700637cbSDimitry Andric void annotateIndirectCalls(Module &M, const CtxProfAnalysis::Result &CtxProf) {
125*700637cbSDimitry Andric   const auto FlatIndCalls = CtxProf.flattenVirtCalls();
126*700637cbSDimitry Andric   for (auto &F : M) {
127*700637cbSDimitry Andric     if (F.isDeclaration())
128*700637cbSDimitry Andric       continue;
129*700637cbSDimitry Andric     auto FlatProfIter = FlatIndCalls.find(AssignGUIDPass::getGUID(F));
130*700637cbSDimitry Andric     if (FlatProfIter == FlatIndCalls.end())
131*700637cbSDimitry Andric       continue;
132*700637cbSDimitry Andric     const auto &FlatProf = FlatProfIter->second;
133*700637cbSDimitry Andric     for (auto &BB : F) {
134*700637cbSDimitry Andric       for (auto &I : BB) {
135*700637cbSDimitry Andric         auto *CB = dyn_cast<CallBase>(&I);
136*700637cbSDimitry Andric         if (!CB || !CB->isIndirectCall())
137*700637cbSDimitry Andric           continue;
138*700637cbSDimitry Andric         if (auto *Ins = CtxProfAnalysis::getCallsiteInstrumentation(*CB))
139*700637cbSDimitry Andric           annotateIndirectCall(M, *CB, FlatProf, *Ins);
140*700637cbSDimitry Andric       }
141*700637cbSDimitry Andric     }
142*700637cbSDimitry Andric   }
143*700637cbSDimitry Andric }
144*700637cbSDimitry Andric 
145*700637cbSDimitry Andric } // namespace
146*700637cbSDimitry Andric 
run(Module & M,ModuleAnalysisManager & MAM)147*700637cbSDimitry Andric PreservedAnalyses PGOCtxProfFlatteningPass::run(Module &M,
148*700637cbSDimitry Andric                                                 ModuleAnalysisManager &MAM) {
149*700637cbSDimitry Andric   // Ensure in all cases the instrumentation is removed: if this module had no
150*700637cbSDimitry Andric   // roots, the contextual profile would evaluate to false, but there would
151*700637cbSDimitry Andric   // still be instrumentation.
152*700637cbSDimitry Andric   // Note: in such cases we leave as-is any other profile info (if present -
153*700637cbSDimitry Andric   // e.g. synthetic weights, etc) because it wouldn't interfere with the
154*700637cbSDimitry Andric   // contextual - based one (which would be in other modules)
155*700637cbSDimitry Andric   auto OnExit = llvm::make_scope_exit([&]() {
156*700637cbSDimitry Andric     if (IsPreThinlink)
157*700637cbSDimitry Andric       return;
158*700637cbSDimitry Andric     for (auto &F : M)
159*700637cbSDimitry Andric       removeInstrumentation(F);
160*700637cbSDimitry Andric   });
161*700637cbSDimitry Andric   auto &CtxProf = MAM.getResult<CtxProfAnalysis>(M);
162*700637cbSDimitry Andric   // post-thinlink, we only reprocess for the module(s) containing the
163*700637cbSDimitry Andric   // contextual tree. For everything else, OnExit will just clean the
164*700637cbSDimitry Andric   // instrumentation.
165*700637cbSDimitry Andric   if (!IsPreThinlink && !CtxProf.isInSpecializedModule())
166*700637cbSDimitry Andric     return PreservedAnalyses::none();
167*700637cbSDimitry Andric 
168*700637cbSDimitry Andric   if (IsPreThinlink)
169*700637cbSDimitry Andric     annotateIndirectCalls(M, CtxProf);
170*700637cbSDimitry Andric   const auto FlattenedProfile = CtxProf.flatten();
171*700637cbSDimitry Andric 
172*700637cbSDimitry Andric   for (auto &F : M) {
173*700637cbSDimitry Andric     if (F.isDeclaration())
174*700637cbSDimitry Andric       continue;
175*700637cbSDimitry Andric 
176*700637cbSDimitry Andric     assert(areAllBBsReachable(
177*700637cbSDimitry Andric                F, MAM.getResult<FunctionAnalysisManagerModuleProxy>(M)
178*700637cbSDimitry Andric                       .getManager()) &&
179*700637cbSDimitry Andric            "Function has unreacheable basic blocks. The expectation was that "
180*700637cbSDimitry Andric            "DCE was run before.");
181*700637cbSDimitry Andric 
182*700637cbSDimitry Andric     auto It = FlattenedProfile.find(AssignGUIDPass::getGUID(F));
183*700637cbSDimitry Andric     // If this function didn't appear in the contextual profile, it's cold.
184*700637cbSDimitry Andric     if (It == FlattenedProfile.end())
185*700637cbSDimitry Andric       clearColdFunctionProfile(F);
186*700637cbSDimitry Andric     else
187*700637cbSDimitry Andric       assignProfileData(F, It->second);
188*700637cbSDimitry Andric   }
189*700637cbSDimitry Andric   InstrProfSummaryBuilder PB(ProfileSummaryBuilder::DefaultCutoffs);
190*700637cbSDimitry Andric   // use here the flat profiles just so the importer doesn't complain about
191*700637cbSDimitry Andric   // how different the PSIs are between the module with the roots and the
192*700637cbSDimitry Andric   // various modules it imports.
193*700637cbSDimitry Andric   for (auto &C : FlattenedProfile) {
194*700637cbSDimitry Andric     PB.addEntryCount(C.second[0]);
195*700637cbSDimitry Andric     for (auto V : llvm::drop_begin(C.second))
196*700637cbSDimitry Andric       PB.addInternalCount(V);
197*700637cbSDimitry Andric   }
198*700637cbSDimitry Andric 
199*700637cbSDimitry Andric   M.setProfileSummary(PB.getSummary()->getMD(M.getContext()),
200*700637cbSDimitry Andric                       ProfileSummary::Kind::PSK_Instr);
201*700637cbSDimitry Andric   PreservedAnalyses PA;
202*700637cbSDimitry Andric   PA.abandon<ProfileSummaryAnalysis>();
203*700637cbSDimitry Andric   MAM.invalidate(M, PA);
204*700637cbSDimitry Andric   auto &PSI = MAM.getResult<ProfileSummaryAnalysis>(M);
205*700637cbSDimitry Andric   PSI.refresh(PB.getSummary());
206*700637cbSDimitry Andric   return PreservedAnalyses::none();
207*700637cbSDimitry Andric }
208