xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp (revision d56accc7c3dcc897489b6a07834763a03b9f3d68)
1 //===- SampleProfileProbe.cpp - Pseudo probe Instrumentation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SampleProfileProber transformation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Transforms/IPO/SampleProfileProbe.h"
14 #include "llvm/ADT/Statistic.h"
15 #include "llvm/Analysis/BlockFrequencyInfo.h"
16 #include "llvm/Analysis/TargetLibraryInfo.h"
17 #include "llvm/IR/BasicBlock.h"
18 #include "llvm/IR/CFG.h"
19 #include "llvm/IR/Constant.h"
20 #include "llvm/IR/Constants.h"
21 #include "llvm/IR/DebugInfoMetadata.h"
22 #include "llvm/IR/GlobalValue.h"
23 #include "llvm/IR/GlobalVariable.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/Instruction.h"
26 #include "llvm/IR/IntrinsicInst.h"
27 #include "llvm/IR/MDBuilder.h"
28 #include "llvm/ProfileData/SampleProf.h"
29 #include "llvm/Support/CRC.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Transforms/Instrumentation.h"
32 #include "llvm/Transforms/Utils/ModuleUtils.h"
33 #include <unordered_set>
34 #include <vector>
35 
36 using namespace llvm;
37 #define DEBUG_TYPE "sample-profile-probe"
38 
39 STATISTIC(ArtificialDbgLine,
40           "Number of probes that have an artificial debug line");
41 
42 static cl::opt<bool>
43     VerifyPseudoProbe("verify-pseudo-probe", cl::init(false), cl::Hidden,
44                       cl::desc("Do pseudo probe verification"));
45 
46 static cl::list<std::string> VerifyPseudoProbeFuncList(
47     "verify-pseudo-probe-funcs", cl::Hidden,
48     cl::desc("The option to specify the name of the functions to verify."));
49 
50 static cl::opt<bool>
51     UpdatePseudoProbe("update-pseudo-probe", cl::init(true), cl::Hidden,
52                       cl::desc("Update pseudo probe distribution factor"));
53 
54 static uint64_t getCallStackHash(const DILocation *DIL) {
55   uint64_t Hash = 0;
56   const DILocation *InlinedAt = DIL ? DIL->getInlinedAt() : nullptr;
57   while (InlinedAt) {
58     Hash ^= MD5Hash(std::to_string(InlinedAt->getLine()));
59     Hash ^= MD5Hash(std::to_string(InlinedAt->getColumn()));
60     const DISubprogram *SP = InlinedAt->getScope()->getSubprogram();
61     // Use linkage name for C++ if possible.
62     auto Name = SP->getLinkageName();
63     if (Name.empty())
64       Name = SP->getName();
65     Hash ^= MD5Hash(Name);
66     InlinedAt = InlinedAt->getInlinedAt();
67   }
68   return Hash;
69 }
70 
71 static uint64_t computeCallStackHash(const Instruction &Inst) {
72   return getCallStackHash(Inst.getDebugLoc());
73 }
74 
75 bool PseudoProbeVerifier::shouldVerifyFunction(const Function *F) {
76   // Skip function declaration.
77   if (F->isDeclaration())
78     return false;
79   // Skip function that will not be emitted into object file. The prevailing
80   // defintion will be verified instead.
81   if (F->hasAvailableExternallyLinkage())
82     return false;
83   // Do a name matching.
84   static std::unordered_set<std::string> VerifyFuncNames(
85       VerifyPseudoProbeFuncList.begin(), VerifyPseudoProbeFuncList.end());
86   return VerifyFuncNames.empty() || VerifyFuncNames.count(F->getName().str());
87 }
88 
89 void PseudoProbeVerifier::registerCallbacks(PassInstrumentationCallbacks &PIC) {
90   if (VerifyPseudoProbe) {
91     PIC.registerAfterPassCallback(
92         [this](StringRef P, Any IR, const PreservedAnalyses &) {
93           this->runAfterPass(P, IR);
94         });
95   }
96 }
97 
98 // Callback to run after each transformation for the new pass manager.
99 void PseudoProbeVerifier::runAfterPass(StringRef PassID, Any IR) {
100   std::string Banner =
101       "\n*** Pseudo Probe Verification After " + PassID.str() + " ***\n";
102   dbgs() << Banner;
103   if (any_isa<const Module *>(IR))
104     runAfterPass(any_cast<const Module *>(IR));
105   else if (any_isa<const Function *>(IR))
106     runAfterPass(any_cast<const Function *>(IR));
107   else if (any_isa<const LazyCallGraph::SCC *>(IR))
108     runAfterPass(any_cast<const LazyCallGraph::SCC *>(IR));
109   else if (any_isa<const Loop *>(IR))
110     runAfterPass(any_cast<const Loop *>(IR));
111   else
112     llvm_unreachable("Unknown IR unit");
113 }
114 
115 void PseudoProbeVerifier::runAfterPass(const Module *M) {
116   for (const Function &F : *M)
117     runAfterPass(&F);
118 }
119 
120 void PseudoProbeVerifier::runAfterPass(const LazyCallGraph::SCC *C) {
121   for (const LazyCallGraph::Node &N : *C)
122     runAfterPass(&N.getFunction());
123 }
124 
125 void PseudoProbeVerifier::runAfterPass(const Function *F) {
126   if (!shouldVerifyFunction(F))
127     return;
128   ProbeFactorMap ProbeFactors;
129   for (const auto &BB : *F)
130     collectProbeFactors(&BB, ProbeFactors);
131   verifyProbeFactors(F, ProbeFactors);
132 }
133 
134 void PseudoProbeVerifier::runAfterPass(const Loop *L) {
135   const Function *F = L->getHeader()->getParent();
136   runAfterPass(F);
137 }
138 
139 void PseudoProbeVerifier::collectProbeFactors(const BasicBlock *Block,
140                                               ProbeFactorMap &ProbeFactors) {
141   for (const auto &I : *Block) {
142     if (Optional<PseudoProbe> Probe = extractProbe(I)) {
143       uint64_t Hash = computeCallStackHash(I);
144       ProbeFactors[{Probe->Id, Hash}] += Probe->Factor;
145     }
146   }
147 }
148 
149 void PseudoProbeVerifier::verifyProbeFactors(
150     const Function *F, const ProbeFactorMap &ProbeFactors) {
151   bool BannerPrinted = false;
152   auto &PrevProbeFactors = FunctionProbeFactors[F->getName()];
153   for (const auto &I : ProbeFactors) {
154     float CurProbeFactor = I.second;
155     if (PrevProbeFactors.count(I.first)) {
156       float PrevProbeFactor = PrevProbeFactors[I.first];
157       if (std::abs(CurProbeFactor - PrevProbeFactor) >
158           DistributionFactorVariance) {
159         if (!BannerPrinted) {
160           dbgs() << "Function " << F->getName() << ":\n";
161           BannerPrinted = true;
162         }
163         dbgs() << "Probe " << I.first.first << "\tprevious factor "
164                << format("%0.2f", PrevProbeFactor) << "\tcurrent factor "
165                << format("%0.2f", CurProbeFactor) << "\n";
166       }
167     }
168 
169     // Update
170     PrevProbeFactors[I.first] = I.second;
171   }
172 }
173 
174 PseudoProbeManager::PseudoProbeManager(const Module &M) {
175   if (NamedMDNode *FuncInfo = M.getNamedMetadata(PseudoProbeDescMetadataName)) {
176     for (const auto *Operand : FuncInfo->operands()) {
177       const auto *MD = cast<MDNode>(Operand);
178       auto GUID =
179           mdconst::dyn_extract<ConstantInt>(MD->getOperand(0))->getZExtValue();
180       auto Hash =
181           mdconst::dyn_extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
182       GUIDToProbeDescMap.try_emplace(GUID, PseudoProbeDescriptor(GUID, Hash));
183     }
184   }
185 }
186 
187 const PseudoProbeDescriptor *
188 PseudoProbeManager::getDesc(const Function &F) const {
189   auto I = GUIDToProbeDescMap.find(
190       Function::getGUID(FunctionSamples::getCanonicalFnName(F)));
191   return I == GUIDToProbeDescMap.end() ? nullptr : &I->second;
192 }
193 
194 bool PseudoProbeManager::moduleIsProbed(const Module &M) const {
195   return M.getNamedMetadata(PseudoProbeDescMetadataName);
196 }
197 
198 bool PseudoProbeManager::profileIsValid(const Function &F,
199                                         const FunctionSamples &Samples) const {
200   const auto *Desc = getDesc(F);
201   if (!Desc) {
202     LLVM_DEBUG(dbgs() << "Probe descriptor missing for Function " << F.getName()
203                       << "\n");
204     return false;
205   } else {
206     if (Desc->getFunctionHash() != Samples.getFunctionHash()) {
207       LLVM_DEBUG(dbgs() << "Hash mismatch for Function " << F.getName()
208                         << "\n");
209       return false;
210     }
211   }
212   return true;
213 }
214 
215 SampleProfileProber::SampleProfileProber(Function &Func,
216                                          const std::string &CurModuleUniqueId)
217     : F(&Func), CurModuleUniqueId(CurModuleUniqueId) {
218   BlockProbeIds.clear();
219   CallProbeIds.clear();
220   LastProbeId = (uint32_t)PseudoProbeReservedId::Last;
221   computeProbeIdForBlocks();
222   computeProbeIdForCallsites();
223   computeCFGHash();
224 }
225 
226 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
227 // value of each BB in the CFG. The higher 32 bits record the number of edges
228 // preceded by the number of indirect calls.
229 // This is derived from FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash().
230 void SampleProfileProber::computeCFGHash() {
231   std::vector<uint8_t> Indexes;
232   JamCRC JC;
233   for (auto &BB : *F) {
234     auto *TI = BB.getTerminator();
235     for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
236       auto *Succ = TI->getSuccessor(I);
237       auto Index = getBlockId(Succ);
238       for (int J = 0; J < 4; J++)
239         Indexes.push_back((uint8_t)(Index >> (J * 8)));
240     }
241   }
242 
243   JC.update(Indexes);
244 
245   FunctionHash = (uint64_t)CallProbeIds.size() << 48 |
246                  (uint64_t)Indexes.size() << 32 | JC.getCRC();
247   // Reserve bit 60-63 for other information purpose.
248   FunctionHash &= 0x0FFFFFFFFFFFFFFF;
249   assert(FunctionHash && "Function checksum should not be zero");
250   LLVM_DEBUG(dbgs() << "\nFunction Hash Computation for " << F->getName()
251                     << ":\n"
252                     << " CRC = " << JC.getCRC() << ", Edges = "
253                     << Indexes.size() << ", ICSites = " << CallProbeIds.size()
254                     << ", Hash = " << FunctionHash << "\n");
255 }
256 
257 void SampleProfileProber::computeProbeIdForBlocks() {
258   for (auto &BB : *F) {
259     BlockProbeIds[&BB] = ++LastProbeId;
260   }
261 }
262 
263 void SampleProfileProber::computeProbeIdForCallsites() {
264   for (auto &BB : *F) {
265     for (auto &I : BB) {
266       if (!isa<CallBase>(I))
267         continue;
268       if (isa<IntrinsicInst>(&I))
269         continue;
270       CallProbeIds[&I] = ++LastProbeId;
271     }
272   }
273 }
274 
275 uint32_t SampleProfileProber::getBlockId(const BasicBlock *BB) const {
276   auto I = BlockProbeIds.find(const_cast<BasicBlock *>(BB));
277   return I == BlockProbeIds.end() ? 0 : I->second;
278 }
279 
280 uint32_t SampleProfileProber::getCallsiteId(const Instruction *Call) const {
281   auto Iter = CallProbeIds.find(const_cast<Instruction *>(Call));
282   return Iter == CallProbeIds.end() ? 0 : Iter->second;
283 }
284 
285 void SampleProfileProber::instrumentOneFunc(Function &F, TargetMachine *TM) {
286   Module *M = F.getParent();
287   MDBuilder MDB(F.getContext());
288   // Compute a GUID without considering the function's linkage type. This is
289   // fine since function name is the only key in the profile database.
290   uint64_t Guid = Function::getGUID(F.getName());
291 
292   // Assign an artificial debug line to a probe that doesn't come with a real
293   // line. A probe not having a debug line will get an incomplete inline
294   // context. This will cause samples collected on the probe to be counted
295   // into the base profile instead of a context profile. The line number
296   // itself is not important though.
297   auto AssignDebugLoc = [&](Instruction *I) {
298     assert((isa<PseudoProbeInst>(I) || isa<CallBase>(I)) &&
299            "Expecting pseudo probe or call instructions");
300     if (!I->getDebugLoc()) {
301       if (auto *SP = F.getSubprogram()) {
302         auto DIL = DILocation::get(SP->getContext(), 0, 0, SP);
303         I->setDebugLoc(DIL);
304         ArtificialDbgLine++;
305         LLVM_DEBUG({
306           dbgs() << "\nIn Function " << F.getName()
307                  << " Probe gets an artificial debug line\n";
308           I->dump();
309         });
310       }
311     }
312   };
313 
314   // Probe basic blocks.
315   for (auto &I : BlockProbeIds) {
316     BasicBlock *BB = I.first;
317     uint32_t Index = I.second;
318     // Insert a probe before an instruction with a valid debug line number which
319     // will be assigned to the probe. The line number will be used later to
320     // model the inline context when the probe is inlined into other functions.
321     // Debug instructions, phi nodes and lifetime markers do not have an valid
322     // line number. Real instructions generated by optimizations may not come
323     // with a line number either.
324     auto HasValidDbgLine = [](Instruction *J) {
325       return !isa<PHINode>(J) && !isa<DbgInfoIntrinsic>(J) &&
326              !J->isLifetimeStartOrEnd() && J->getDebugLoc();
327     };
328 
329     Instruction *J = &*BB->getFirstInsertionPt();
330     while (J != BB->getTerminator() && !HasValidDbgLine(J)) {
331       J = J->getNextNode();
332     }
333 
334     IRBuilder<> Builder(J);
335     assert(Builder.GetInsertPoint() != BB->end() &&
336            "Cannot get the probing point");
337     Function *ProbeFn =
338         llvm::Intrinsic::getDeclaration(M, Intrinsic::pseudoprobe);
339     Value *Args[] = {Builder.getInt64(Guid), Builder.getInt64(Index),
340                      Builder.getInt32(0),
341                      Builder.getInt64(PseudoProbeFullDistributionFactor)};
342     auto *Probe = Builder.CreateCall(ProbeFn, Args);
343     AssignDebugLoc(Probe);
344   }
345 
346   // Probe both direct calls and indirect calls. Direct calls are probed so that
347   // their probe ID can be used as an call site identifier to represent a
348   // calling context.
349   for (auto &I : CallProbeIds) {
350     auto *Call = I.first;
351     uint32_t Index = I.second;
352     uint32_t Type = cast<CallBase>(Call)->getCalledFunction()
353                         ? (uint32_t)PseudoProbeType::DirectCall
354                         : (uint32_t)PseudoProbeType::IndirectCall;
355     AssignDebugLoc(Call);
356     // Levarge the 32-bit discriminator field of debug data to store the ID and
357     // type of a callsite probe. This gets rid of the dependency on plumbing a
358     // customized metadata through the codegen pipeline.
359     uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData(
360         Index, Type, 0, PseudoProbeDwarfDiscriminator::FullDistributionFactor);
361     if (auto DIL = Call->getDebugLoc()) {
362       DIL = DIL->cloneWithDiscriminator(V);
363       Call->setDebugLoc(DIL);
364     }
365   }
366 
367   // Create module-level metadata that contains function info necessary to
368   // synthesize probe-based sample counts,  which are
369   // - FunctionGUID
370   // - FunctionHash.
371   // - FunctionName
372   auto Hash = getFunctionHash();
373   auto *MD = MDB.createPseudoProbeDesc(Guid, Hash, &F);
374   auto *NMD = M->getNamedMetadata(PseudoProbeDescMetadataName);
375   assert(NMD && "llvm.pseudo_probe_desc should be pre-created");
376   NMD->addOperand(MD);
377 
378   // Preserve a comdat group to hold all probes materialized later. This
379   // allows that when the function is considered dead and removed, the
380   // materialized probes are disposed too.
381   // Imported functions are defined in another module. They do not need
382   // the following handling since same care will be taken for them in their
383   // original module. The pseudo probes inserted into an imported functions
384   // above will naturally not be emitted since the imported function is free
385   // from object emission. However they will be emitted together with the
386   // inliner functions that the imported function is inlined into. We are not
387   // creating a comdat group for an import function since it's useless anyway.
388   if (!F.isDeclarationForLinker()) {
389     if (TM) {
390       auto Triple = TM->getTargetTriple();
391       if (Triple.supportsCOMDAT() && TM->getFunctionSections())
392         getOrCreateFunctionComdat(F, Triple);
393     }
394   }
395 }
396 
397 PreservedAnalyses SampleProfileProbePass::run(Module &M,
398                                               ModuleAnalysisManager &AM) {
399   auto ModuleId = getUniqueModuleId(&M);
400   // Create the pseudo probe desc metadata beforehand.
401   // Note that modules with only data but no functions will require this to
402   // be set up so that they will be known as probed later.
403   M.getOrInsertNamedMetadata(PseudoProbeDescMetadataName);
404 
405   for (auto &F : M) {
406     if (F.isDeclaration())
407       continue;
408     SampleProfileProber ProbeManager(F, ModuleId);
409     ProbeManager.instrumentOneFunc(F, TM);
410   }
411 
412   return PreservedAnalyses::none();
413 }
414 
415 void PseudoProbeUpdatePass::runOnFunction(Function &F,
416                                           FunctionAnalysisManager &FAM) {
417   BlockFrequencyInfo &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
418   auto BBProfileCount = [&BFI](BasicBlock *BB) {
419     return BFI.getBlockProfileCount(BB).getValueOr(0);
420   };
421 
422   // Collect the sum of execution weight for each probe.
423   ProbeFactorMap ProbeFactors;
424   for (auto &Block : F) {
425     for (auto &I : Block) {
426       if (Optional<PseudoProbe> Probe = extractProbe(I)) {
427         uint64_t Hash = computeCallStackHash(I);
428         ProbeFactors[{Probe->Id, Hash}] += BBProfileCount(&Block);
429       }
430     }
431   }
432 
433   // Fix up over-counted probes.
434   for (auto &Block : F) {
435     for (auto &I : Block) {
436       if (Optional<PseudoProbe> Probe = extractProbe(I)) {
437         uint64_t Hash = computeCallStackHash(I);
438         float Sum = ProbeFactors[{Probe->Id, Hash}];
439         if (Sum != 0)
440           setProbeDistributionFactor(I, BBProfileCount(&Block) / Sum);
441       }
442     }
443   }
444 }
445 
446 PreservedAnalyses PseudoProbeUpdatePass::run(Module &M,
447                                              ModuleAnalysisManager &AM) {
448   if (UpdatePseudoProbe) {
449     for (auto &F : M) {
450       if (F.isDeclaration())
451         continue;
452       FunctionAnalysisManager &FAM =
453           AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
454       runOnFunction(F, FAM);
455     }
456   }
457   return PreservedAnalyses::none();
458 }
459