xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1*0fca6ea1SDimitry Andric //===- PGOCtxProfLowering.cpp - Contextual PGO Instr. Lowering ------------===//
2*0fca6ea1SDimitry Andric //
3*0fca6ea1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*0fca6ea1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*0fca6ea1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*0fca6ea1SDimitry Andric //
7*0fca6ea1SDimitry Andric //===----------------------------------------------------------------------===//
8*0fca6ea1SDimitry Andric //
9*0fca6ea1SDimitry Andric 
10*0fca6ea1SDimitry Andric #include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"
11*0fca6ea1SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
12*0fca6ea1SDimitry Andric #include "llvm/IR/Analysis.h"
13*0fca6ea1SDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
14*0fca6ea1SDimitry Andric #include "llvm/IR/IRBuilder.h"
15*0fca6ea1SDimitry Andric #include "llvm/IR/Instructions.h"
16*0fca6ea1SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
17*0fca6ea1SDimitry Andric #include "llvm/IR/Module.h"
18*0fca6ea1SDimitry Andric #include "llvm/IR/PassManager.h"
19*0fca6ea1SDimitry Andric #include "llvm/Support/CommandLine.h"
20*0fca6ea1SDimitry Andric #include <utility>
21*0fca6ea1SDimitry Andric 
22*0fca6ea1SDimitry Andric using namespace llvm;
23*0fca6ea1SDimitry Andric 
24*0fca6ea1SDimitry Andric #define DEBUG_TYPE "ctx-instr-lower"
25*0fca6ea1SDimitry Andric 
26*0fca6ea1SDimitry Andric static cl::list<std::string> ContextRoots(
27*0fca6ea1SDimitry Andric     "profile-context-root", cl::Hidden,
28*0fca6ea1SDimitry Andric     cl::desc(
29*0fca6ea1SDimitry Andric         "A function name, assumed to be global, which will be treated as the "
30*0fca6ea1SDimitry Andric         "root of an interesting graph, which will be profiled independently "
31*0fca6ea1SDimitry Andric         "from other similar graphs."));
32*0fca6ea1SDimitry Andric 
isContextualIRPGOEnabled()33*0fca6ea1SDimitry Andric bool PGOCtxProfLoweringPass::isContextualIRPGOEnabled() {
34*0fca6ea1SDimitry Andric   return !ContextRoots.empty();
35*0fca6ea1SDimitry Andric }
36*0fca6ea1SDimitry Andric 
37*0fca6ea1SDimitry Andric // the names of symbols we expect in compiler-rt. Using a namespace for
38*0fca6ea1SDimitry Andric // readability.
39*0fca6ea1SDimitry Andric namespace CompilerRtAPINames {
40*0fca6ea1SDimitry Andric static auto StartCtx = "__llvm_ctx_profile_start_context";
41*0fca6ea1SDimitry Andric static auto ReleaseCtx = "__llvm_ctx_profile_release_context";
42*0fca6ea1SDimitry Andric static auto GetCtx = "__llvm_ctx_profile_get_context";
43*0fca6ea1SDimitry Andric static auto ExpectedCalleeTLS = "__llvm_ctx_profile_expected_callee";
44*0fca6ea1SDimitry Andric static auto CallsiteTLS = "__llvm_ctx_profile_callsite";
45*0fca6ea1SDimitry Andric } // namespace CompilerRtAPINames
46*0fca6ea1SDimitry Andric 
47*0fca6ea1SDimitry Andric namespace {
48*0fca6ea1SDimitry Andric // The lowering logic and state.
49*0fca6ea1SDimitry Andric class CtxInstrumentationLowerer final {
50*0fca6ea1SDimitry Andric   Module &M;
51*0fca6ea1SDimitry Andric   ModuleAnalysisManager &MAM;
52*0fca6ea1SDimitry Andric   Type *ContextNodeTy = nullptr;
53*0fca6ea1SDimitry Andric   Type *ContextRootTy = nullptr;
54*0fca6ea1SDimitry Andric 
55*0fca6ea1SDimitry Andric   DenseMap<const Function *, Constant *> ContextRootMap;
56*0fca6ea1SDimitry Andric   Function *StartCtx = nullptr;
57*0fca6ea1SDimitry Andric   Function *GetCtx = nullptr;
58*0fca6ea1SDimitry Andric   Function *ReleaseCtx = nullptr;
59*0fca6ea1SDimitry Andric   GlobalVariable *ExpectedCalleeTLS = nullptr;
60*0fca6ea1SDimitry Andric   GlobalVariable *CallsiteInfoTLS = nullptr;
61*0fca6ea1SDimitry Andric 
62*0fca6ea1SDimitry Andric public:
63*0fca6ea1SDimitry Andric   CtxInstrumentationLowerer(Module &M, ModuleAnalysisManager &MAM);
64*0fca6ea1SDimitry Andric   // return true if lowering happened (i.e. a change was made)
65*0fca6ea1SDimitry Andric   bool lowerFunction(Function &F);
66*0fca6ea1SDimitry Andric };
67*0fca6ea1SDimitry Andric 
68*0fca6ea1SDimitry Andric // llvm.instrprof.increment[.step] captures the total number of counters as one
69*0fca6ea1SDimitry Andric // of its parameters, and llvm.instrprof.callsite captures the total number of
70*0fca6ea1SDimitry Andric // callsites. Those values are the same for instances of those intrinsics in
71*0fca6ea1SDimitry Andric // this function. Find the first instance of each and return them.
getNrCountersAndCallsites(const Function & F)72*0fca6ea1SDimitry Andric std::pair<uint32_t, uint32_t> getNrCountersAndCallsites(const Function &F) {
73*0fca6ea1SDimitry Andric   uint32_t NrCounters = 0;
74*0fca6ea1SDimitry Andric   uint32_t NrCallsites = 0;
75*0fca6ea1SDimitry Andric   for (const auto &BB : F) {
76*0fca6ea1SDimitry Andric     for (const auto &I : BB) {
77*0fca6ea1SDimitry Andric       if (const auto *Incr = dyn_cast<InstrProfIncrementInst>(&I)) {
78*0fca6ea1SDimitry Andric         uint32_t V =
79*0fca6ea1SDimitry Andric             static_cast<uint32_t>(Incr->getNumCounters()->getZExtValue());
80*0fca6ea1SDimitry Andric         assert((!NrCounters || V == NrCounters) &&
81*0fca6ea1SDimitry Andric                "expected all llvm.instrprof.increment[.step] intrinsics to "
82*0fca6ea1SDimitry Andric                "have the same total nr of counters parameter");
83*0fca6ea1SDimitry Andric         NrCounters = V;
84*0fca6ea1SDimitry Andric       } else if (const auto *CSIntr = dyn_cast<InstrProfCallsite>(&I)) {
85*0fca6ea1SDimitry Andric         uint32_t V =
86*0fca6ea1SDimitry Andric             static_cast<uint32_t>(CSIntr->getNumCounters()->getZExtValue());
87*0fca6ea1SDimitry Andric         assert((!NrCallsites || V == NrCallsites) &&
88*0fca6ea1SDimitry Andric                "expected all llvm.instrprof.callsite intrinsics to have the "
89*0fca6ea1SDimitry Andric                "same total nr of callsites parameter");
90*0fca6ea1SDimitry Andric         NrCallsites = V;
91*0fca6ea1SDimitry Andric       }
92*0fca6ea1SDimitry Andric #if NDEBUG
93*0fca6ea1SDimitry Andric       if (NrCounters && NrCallsites)
94*0fca6ea1SDimitry Andric         return std::make_pair(NrCounters, NrCallsites);
95*0fca6ea1SDimitry Andric #endif
96*0fca6ea1SDimitry Andric     }
97*0fca6ea1SDimitry Andric   }
98*0fca6ea1SDimitry Andric   return {NrCounters, NrCallsites};
99*0fca6ea1SDimitry Andric }
100*0fca6ea1SDimitry Andric } // namespace
101*0fca6ea1SDimitry Andric 
102*0fca6ea1SDimitry Andric // set up tie-in with compiler-rt.
103*0fca6ea1SDimitry Andric // NOTE!!!
104*0fca6ea1SDimitry Andric // These have to match compiler-rt/lib/ctx_profile/CtxInstrProfiling.h
CtxInstrumentationLowerer(Module & M,ModuleAnalysisManager & MAM)105*0fca6ea1SDimitry Andric CtxInstrumentationLowerer::CtxInstrumentationLowerer(Module &M,
106*0fca6ea1SDimitry Andric                                                      ModuleAnalysisManager &MAM)
107*0fca6ea1SDimitry Andric     : M(M), MAM(MAM) {
108*0fca6ea1SDimitry Andric   auto *PointerTy = PointerType::get(M.getContext(), 0);
109*0fca6ea1SDimitry Andric   auto *SanitizerMutexType = Type::getInt8Ty(M.getContext());
110*0fca6ea1SDimitry Andric   auto *I32Ty = Type::getInt32Ty(M.getContext());
111*0fca6ea1SDimitry Andric   auto *I64Ty = Type::getInt64Ty(M.getContext());
112*0fca6ea1SDimitry Andric 
113*0fca6ea1SDimitry Andric   // The ContextRoot type
114*0fca6ea1SDimitry Andric   ContextRootTy =
115*0fca6ea1SDimitry Andric       StructType::get(M.getContext(), {
116*0fca6ea1SDimitry Andric                                           PointerTy,          /*FirstNode*/
117*0fca6ea1SDimitry Andric                                           PointerTy,          /*FirstMemBlock*/
118*0fca6ea1SDimitry Andric                                           PointerTy,          /*CurrentMem*/
119*0fca6ea1SDimitry Andric                                           SanitizerMutexType, /*Taken*/
120*0fca6ea1SDimitry Andric                                       });
121*0fca6ea1SDimitry Andric   // The Context header.
122*0fca6ea1SDimitry Andric   ContextNodeTy = StructType::get(M.getContext(), {
123*0fca6ea1SDimitry Andric                                                       I64Ty,     /*Guid*/
124*0fca6ea1SDimitry Andric                                                       PointerTy, /*Next*/
125*0fca6ea1SDimitry Andric                                                       I32Ty,     /*NrCounters*/
126*0fca6ea1SDimitry Andric                                                       I32Ty,     /*NrCallsites*/
127*0fca6ea1SDimitry Andric                                                   });
128*0fca6ea1SDimitry Andric 
129*0fca6ea1SDimitry Andric   // Define a global for each entrypoint. We'll reuse the entrypoint's name as
130*0fca6ea1SDimitry Andric   // prefix. We assume the entrypoint names to be unique.
131*0fca6ea1SDimitry Andric   for (const auto &Fname : ContextRoots) {
132*0fca6ea1SDimitry Andric     if (const auto *F = M.getFunction(Fname)) {
133*0fca6ea1SDimitry Andric       if (F->isDeclaration())
134*0fca6ea1SDimitry Andric         continue;
135*0fca6ea1SDimitry Andric       auto *G = M.getOrInsertGlobal(Fname + "_ctx_root", ContextRootTy);
136*0fca6ea1SDimitry Andric       cast<GlobalVariable>(G)->setInitializer(
137*0fca6ea1SDimitry Andric           Constant::getNullValue(ContextRootTy));
138*0fca6ea1SDimitry Andric       ContextRootMap.insert(std::make_pair(F, G));
139*0fca6ea1SDimitry Andric       for (const auto &BB : *F)
140*0fca6ea1SDimitry Andric         for (const auto &I : BB)
141*0fca6ea1SDimitry Andric           if (const auto *CB = dyn_cast<CallBase>(&I))
142*0fca6ea1SDimitry Andric             if (CB->isMustTailCall()) {
143*0fca6ea1SDimitry Andric               M.getContext().emitError(
144*0fca6ea1SDimitry Andric                   "The function " + Fname +
145*0fca6ea1SDimitry Andric                   " was indicated as a context root, but it features musttail "
146*0fca6ea1SDimitry Andric                   "calls, which is not supported.");
147*0fca6ea1SDimitry Andric             }
148*0fca6ea1SDimitry Andric     }
149*0fca6ea1SDimitry Andric   }
150*0fca6ea1SDimitry Andric 
151*0fca6ea1SDimitry Andric   // Declare the functions we will call.
152*0fca6ea1SDimitry Andric   StartCtx = cast<Function>(
153*0fca6ea1SDimitry Andric       M.getOrInsertFunction(
154*0fca6ea1SDimitry Andric            CompilerRtAPINames::StartCtx,
155*0fca6ea1SDimitry Andric            FunctionType::get(ContextNodeTy->getPointerTo(),
156*0fca6ea1SDimitry Andric                              {ContextRootTy->getPointerTo(), /*ContextRoot*/
157*0fca6ea1SDimitry Andric                               I64Ty, /*Guid*/ I32Ty,
158*0fca6ea1SDimitry Andric                               /*NrCounters*/ I32Ty /*NrCallsites*/},
159*0fca6ea1SDimitry Andric                              false))
160*0fca6ea1SDimitry Andric           .getCallee());
161*0fca6ea1SDimitry Andric   GetCtx = cast<Function>(
162*0fca6ea1SDimitry Andric       M.getOrInsertFunction(CompilerRtAPINames::GetCtx,
163*0fca6ea1SDimitry Andric                             FunctionType::get(ContextNodeTy->getPointerTo(),
164*0fca6ea1SDimitry Andric                                               {PointerTy, /*Callee*/
165*0fca6ea1SDimitry Andric                                                I64Ty,     /*Guid*/
166*0fca6ea1SDimitry Andric                                                I32Ty,     /*NrCounters*/
167*0fca6ea1SDimitry Andric                                                I32Ty},    /*NrCallsites*/
168*0fca6ea1SDimitry Andric                                               false))
169*0fca6ea1SDimitry Andric           .getCallee());
170*0fca6ea1SDimitry Andric   ReleaseCtx = cast<Function>(
171*0fca6ea1SDimitry Andric       M.getOrInsertFunction(
172*0fca6ea1SDimitry Andric            CompilerRtAPINames::ReleaseCtx,
173*0fca6ea1SDimitry Andric            FunctionType::get(Type::getVoidTy(M.getContext()),
174*0fca6ea1SDimitry Andric                              {
175*0fca6ea1SDimitry Andric                                  ContextRootTy->getPointerTo(), /*ContextRoot*/
176*0fca6ea1SDimitry Andric                              },
177*0fca6ea1SDimitry Andric                              false))
178*0fca6ea1SDimitry Andric           .getCallee());
179*0fca6ea1SDimitry Andric 
180*0fca6ea1SDimitry Andric   // Declare the TLSes we will need to use.
181*0fca6ea1SDimitry Andric   CallsiteInfoTLS =
182*0fca6ea1SDimitry Andric       new GlobalVariable(M, PointerTy, false, GlobalValue::ExternalLinkage,
183*0fca6ea1SDimitry Andric                          nullptr, CompilerRtAPINames::CallsiteTLS);
184*0fca6ea1SDimitry Andric   CallsiteInfoTLS->setThreadLocal(true);
185*0fca6ea1SDimitry Andric   CallsiteInfoTLS->setVisibility(llvm::GlobalValue::HiddenVisibility);
186*0fca6ea1SDimitry Andric   ExpectedCalleeTLS =
187*0fca6ea1SDimitry Andric       new GlobalVariable(M, PointerTy, false, GlobalValue::ExternalLinkage,
188*0fca6ea1SDimitry Andric                          nullptr, CompilerRtAPINames::ExpectedCalleeTLS);
189*0fca6ea1SDimitry Andric   ExpectedCalleeTLS->setThreadLocal(true);
190*0fca6ea1SDimitry Andric   ExpectedCalleeTLS->setVisibility(llvm::GlobalValue::HiddenVisibility);
191*0fca6ea1SDimitry Andric }
192*0fca6ea1SDimitry Andric 
run(Module & M,ModuleAnalysisManager & MAM)193*0fca6ea1SDimitry Andric PreservedAnalyses PGOCtxProfLoweringPass::run(Module &M,
194*0fca6ea1SDimitry Andric                                               ModuleAnalysisManager &MAM) {
195*0fca6ea1SDimitry Andric   CtxInstrumentationLowerer Lowerer(M, MAM);
196*0fca6ea1SDimitry Andric   bool Changed = false;
197*0fca6ea1SDimitry Andric   for (auto &F : M)
198*0fca6ea1SDimitry Andric     Changed |= Lowerer.lowerFunction(F);
199*0fca6ea1SDimitry Andric   return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
200*0fca6ea1SDimitry Andric }
201*0fca6ea1SDimitry Andric 
lowerFunction(Function & F)202*0fca6ea1SDimitry Andric bool CtxInstrumentationLowerer::lowerFunction(Function &F) {
203*0fca6ea1SDimitry Andric   if (F.isDeclaration())
204*0fca6ea1SDimitry Andric     return false;
205*0fca6ea1SDimitry Andric   auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
206*0fca6ea1SDimitry Andric   auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
207*0fca6ea1SDimitry Andric 
208*0fca6ea1SDimitry Andric   Value *Guid = nullptr;
209*0fca6ea1SDimitry Andric   auto [NrCounters, NrCallsites] = getNrCountersAndCallsites(F);
210*0fca6ea1SDimitry Andric 
211*0fca6ea1SDimitry Andric   Value *Context = nullptr;
212*0fca6ea1SDimitry Andric   Value *RealContext = nullptr;
213*0fca6ea1SDimitry Andric 
214*0fca6ea1SDimitry Andric   StructType *ThisContextType = nullptr;
215*0fca6ea1SDimitry Andric   Value *TheRootContext = nullptr;
216*0fca6ea1SDimitry Andric   Value *ExpectedCalleeTLSAddr = nullptr;
217*0fca6ea1SDimitry Andric   Value *CallsiteInfoTLSAddr = nullptr;
218*0fca6ea1SDimitry Andric 
219*0fca6ea1SDimitry Andric   auto &Head = F.getEntryBlock();
220*0fca6ea1SDimitry Andric   for (auto &I : Head) {
221*0fca6ea1SDimitry Andric     // Find the increment intrinsic in the entry basic block.
222*0fca6ea1SDimitry Andric     if (auto *Mark = dyn_cast<InstrProfIncrementInst>(&I)) {
223*0fca6ea1SDimitry Andric       assert(Mark->getIndex()->isZero());
224*0fca6ea1SDimitry Andric 
225*0fca6ea1SDimitry Andric       IRBuilder<> Builder(Mark);
226*0fca6ea1SDimitry Andric       // FIXME(mtrofin): use InstrProfSymtab::getCanonicalName
227*0fca6ea1SDimitry Andric       Guid = Builder.getInt64(F.getGUID());
228*0fca6ea1SDimitry Andric       // The type of the context of this function is now knowable since we have
229*0fca6ea1SDimitry Andric       // NrCallsites and NrCounters. We delcare it here because it's more
230*0fca6ea1SDimitry Andric       // convenient - we have the Builder.
231*0fca6ea1SDimitry Andric       ThisContextType = StructType::get(
232*0fca6ea1SDimitry Andric           F.getContext(),
233*0fca6ea1SDimitry Andric           {ContextNodeTy, ArrayType::get(Builder.getInt64Ty(), NrCounters),
234*0fca6ea1SDimitry Andric            ArrayType::get(Builder.getPtrTy(), NrCallsites)});
235*0fca6ea1SDimitry Andric       // Figure out which way we obtain the context object for this function -
236*0fca6ea1SDimitry Andric       // if it's an entrypoint, then we call StartCtx, otherwise GetCtx. In the
237*0fca6ea1SDimitry Andric       // former case, we also set TheRootContext since we need to release it
238*0fca6ea1SDimitry Andric       // at the end (plus it can be used to know if we have an entrypoint or a
239*0fca6ea1SDimitry Andric       // regular function)
240*0fca6ea1SDimitry Andric       auto Iter = ContextRootMap.find(&F);
241*0fca6ea1SDimitry Andric       if (Iter != ContextRootMap.end()) {
242*0fca6ea1SDimitry Andric         TheRootContext = Iter->second;
243*0fca6ea1SDimitry Andric         Context = Builder.CreateCall(StartCtx, {TheRootContext, Guid,
244*0fca6ea1SDimitry Andric                                                 Builder.getInt32(NrCounters),
245*0fca6ea1SDimitry Andric                                                 Builder.getInt32(NrCallsites)});
246*0fca6ea1SDimitry Andric         ORE.emit(
247*0fca6ea1SDimitry Andric             [&] { return OptimizationRemark(DEBUG_TYPE, "Entrypoint", &F); });
248*0fca6ea1SDimitry Andric       } else {
249*0fca6ea1SDimitry Andric         Context =
250*0fca6ea1SDimitry Andric             Builder.CreateCall(GetCtx, {&F, Guid, Builder.getInt32(NrCounters),
251*0fca6ea1SDimitry Andric                                         Builder.getInt32(NrCallsites)});
252*0fca6ea1SDimitry Andric         ORE.emit([&] {
253*0fca6ea1SDimitry Andric           return OptimizationRemark(DEBUG_TYPE, "RegularFunction", &F);
254*0fca6ea1SDimitry Andric         });
255*0fca6ea1SDimitry Andric       }
256*0fca6ea1SDimitry Andric       // The context could be scratch.
257*0fca6ea1SDimitry Andric       auto *CtxAsInt = Builder.CreatePtrToInt(Context, Builder.getInt64Ty());
258*0fca6ea1SDimitry Andric       if (NrCallsites > 0) {
259*0fca6ea1SDimitry Andric         // Figure out which index of the TLS 2-element buffers to use.
260*0fca6ea1SDimitry Andric         // Scratch context => we use index == 1. Real contexts => index == 0.
261*0fca6ea1SDimitry Andric         auto *Index = Builder.CreateAnd(CtxAsInt, Builder.getInt64(1));
262*0fca6ea1SDimitry Andric         // The GEPs corresponding to that index, in the respective TLS.
263*0fca6ea1SDimitry Andric         ExpectedCalleeTLSAddr = Builder.CreateGEP(
264*0fca6ea1SDimitry Andric             Builder.getInt8Ty()->getPointerTo(),
265*0fca6ea1SDimitry Andric             Builder.CreateThreadLocalAddress(ExpectedCalleeTLS), {Index});
266*0fca6ea1SDimitry Andric         CallsiteInfoTLSAddr = Builder.CreateGEP(
267*0fca6ea1SDimitry Andric             Builder.getInt32Ty(),
268*0fca6ea1SDimitry Andric             Builder.CreateThreadLocalAddress(CallsiteInfoTLS), {Index});
269*0fca6ea1SDimitry Andric       }
270*0fca6ea1SDimitry Andric       // Because the context pointer may have LSB set (to indicate scratch),
271*0fca6ea1SDimitry Andric       // clear it for the value we use as base address for the counter vector.
272*0fca6ea1SDimitry Andric       // This way, if later we want to have "real" (not clobbered) buffers
273*0fca6ea1SDimitry Andric       // acting as scratch, the lowering (at least this part of it that deals
274*0fca6ea1SDimitry Andric       // with counters) stays the same.
275*0fca6ea1SDimitry Andric       RealContext = Builder.CreateIntToPtr(
276*0fca6ea1SDimitry Andric           Builder.CreateAnd(CtxAsInt, Builder.getInt64(-2)),
277*0fca6ea1SDimitry Andric           ThisContextType->getPointerTo());
278*0fca6ea1SDimitry Andric       I.eraseFromParent();
279*0fca6ea1SDimitry Andric       break;
280*0fca6ea1SDimitry Andric     }
281*0fca6ea1SDimitry Andric   }
282*0fca6ea1SDimitry Andric   if (!Context) {
283*0fca6ea1SDimitry Andric     ORE.emit([&] {
284*0fca6ea1SDimitry Andric       return OptimizationRemarkMissed(DEBUG_TYPE, "Skip", &F)
285*0fca6ea1SDimitry Andric              << "Function doesn't have instrumentation, skipping";
286*0fca6ea1SDimitry Andric     });
287*0fca6ea1SDimitry Andric     return false;
288*0fca6ea1SDimitry Andric   }
289*0fca6ea1SDimitry Andric 
290*0fca6ea1SDimitry Andric   bool ContextWasReleased = false;
291*0fca6ea1SDimitry Andric   for (auto &BB : F) {
292*0fca6ea1SDimitry Andric     for (auto &I : llvm::make_early_inc_range(BB)) {
293*0fca6ea1SDimitry Andric       if (auto *Instr = dyn_cast<InstrProfCntrInstBase>(&I)) {
294*0fca6ea1SDimitry Andric         IRBuilder<> Builder(Instr);
295*0fca6ea1SDimitry Andric         switch (Instr->getIntrinsicID()) {
296*0fca6ea1SDimitry Andric         case llvm::Intrinsic::instrprof_increment:
297*0fca6ea1SDimitry Andric         case llvm::Intrinsic::instrprof_increment_step: {
298*0fca6ea1SDimitry Andric           // Increments (or increment-steps) are just a typical load - increment
299*0fca6ea1SDimitry Andric           // - store in the RealContext.
300*0fca6ea1SDimitry Andric           auto *AsStep = cast<InstrProfIncrementInst>(Instr);
301*0fca6ea1SDimitry Andric           auto *GEP = Builder.CreateGEP(
302*0fca6ea1SDimitry Andric               ThisContextType, RealContext,
303*0fca6ea1SDimitry Andric               {Builder.getInt32(0), Builder.getInt32(1), AsStep->getIndex()});
304*0fca6ea1SDimitry Andric           Builder.CreateStore(
305*0fca6ea1SDimitry Andric               Builder.CreateAdd(Builder.CreateLoad(Builder.getInt64Ty(), GEP),
306*0fca6ea1SDimitry Andric                                 AsStep->getStep()),
307*0fca6ea1SDimitry Andric               GEP);
308*0fca6ea1SDimitry Andric         } break;
309*0fca6ea1SDimitry Andric         case llvm::Intrinsic::instrprof_callsite:
310*0fca6ea1SDimitry Andric           // callsite lowering: write the called value in the expected callee
311*0fca6ea1SDimitry Andric           // TLS we treat the TLS as volatile because of signal handlers and to
312*0fca6ea1SDimitry Andric           // avoid these being moved away from the callsite they decorate.
313*0fca6ea1SDimitry Andric           auto *CSIntrinsic = dyn_cast<InstrProfCallsite>(Instr);
314*0fca6ea1SDimitry Andric           Builder.CreateStore(CSIntrinsic->getCallee(), ExpectedCalleeTLSAddr,
315*0fca6ea1SDimitry Andric                               true);
316*0fca6ea1SDimitry Andric           // write the GEP of the slot in the sub-contexts portion of the
317*0fca6ea1SDimitry Andric           // context in TLS. Now, here, we use the actual Context value - as
318*0fca6ea1SDimitry Andric           // returned from compiler-rt - which may have the LSB set if the
319*0fca6ea1SDimitry Andric           // Context was scratch. Since the header of the context object and
320*0fca6ea1SDimitry Andric           // then the values are all 8-aligned (or, really, insofar as we care,
321*0fca6ea1SDimitry Andric           // they are even) - if the context is scratch (meaning, an odd value),
322*0fca6ea1SDimitry Andric           // so will the GEP. This is important because this is then visible to
323*0fca6ea1SDimitry Andric           // compiler-rt which will produce scratch contexts for callers that
324*0fca6ea1SDimitry Andric           // have a scratch context.
325*0fca6ea1SDimitry Andric           Builder.CreateStore(
326*0fca6ea1SDimitry Andric               Builder.CreateGEP(ThisContextType, Context,
327*0fca6ea1SDimitry Andric                                 {Builder.getInt32(0), Builder.getInt32(2),
328*0fca6ea1SDimitry Andric                                  CSIntrinsic->getIndex()}),
329*0fca6ea1SDimitry Andric               CallsiteInfoTLSAddr, true);
330*0fca6ea1SDimitry Andric           break;
331*0fca6ea1SDimitry Andric         }
332*0fca6ea1SDimitry Andric         I.eraseFromParent();
333*0fca6ea1SDimitry Andric       } else if (TheRootContext && isa<ReturnInst>(I)) {
334*0fca6ea1SDimitry Andric         // Remember to release the context if we are an entrypoint.
335*0fca6ea1SDimitry Andric         IRBuilder<> Builder(&I);
336*0fca6ea1SDimitry Andric         Builder.CreateCall(ReleaseCtx, {TheRootContext});
337*0fca6ea1SDimitry Andric         ContextWasReleased = true;
338*0fca6ea1SDimitry Andric       }
339*0fca6ea1SDimitry Andric     }
340*0fca6ea1SDimitry Andric   }
341*0fca6ea1SDimitry Andric   // FIXME: This would happen if the entrypoint tailcalls. A way to fix would be
342*0fca6ea1SDimitry Andric   // to disallow this, (so this then stays as an error), another is to detect
343*0fca6ea1SDimitry Andric   // that and then do a wrapper or disallow the tail call. This only affects
344*0fca6ea1SDimitry Andric   // instrumentation, when we want to detect the call graph.
345*0fca6ea1SDimitry Andric   if (TheRootContext && !ContextWasReleased)
346*0fca6ea1SDimitry Andric     F.getContext().emitError(
347*0fca6ea1SDimitry Andric         "[ctx_prof] An entrypoint was instrumented but it has no `ret` "
348*0fca6ea1SDimitry Andric         "instructions above which to release the context: " +
349*0fca6ea1SDimitry Andric         F.getName());
350*0fca6ea1SDimitry Andric   return true;
351*0fca6ea1SDimitry Andric }
352