xref: /freebsd/contrib/llvm-project/llvm/lib/ProfileData/SampleProf.cpp (revision d35b039af944f68fe99bb3ad2f0c6d5ec7917096)
1  //=-- SampleProf.cpp - Sample profiling format support --------------------===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  //
9  // This file contains common definitions used in the reading and writing of
10  // sample profile data.
11  //
12  //===----------------------------------------------------------------------===//
13  
14  #include "llvm/ProfileData/SampleProf.h"
15  #include "llvm/Config/llvm-config.h"
16  #include "llvm/IR/DebugInfoMetadata.h"
17  #include "llvm/IR/PseudoProbe.h"
18  #include "llvm/ProfileData/SampleProfReader.h"
19  #include "llvm/Support/CommandLine.h"
20  #include "llvm/Support/Compiler.h"
21  #include "llvm/Support/Debug.h"
22  #include "llvm/Support/ErrorHandling.h"
23  #include "llvm/Support/raw_ostream.h"
24  #include <string>
25  #include <system_error>
26  
27  using namespace llvm;
28  using namespace sampleprof;
29  
30  static cl::opt<uint64_t> ProfileSymbolListCutOff(
31      "profile-symbol-list-cutoff", cl::Hidden, cl::init(-1),
32      cl::desc("Cutoff value about how many symbols in profile symbol list "
33               "will be used. This is very useful for performance debugging"));
34  
35  static cl::opt<bool> GenerateMergedBaseProfiles(
36      "generate-merged-base-profiles",
37      cl::desc("When generating nested context-sensitive profiles, always "
38               "generate extra base profile for function with all its context "
39               "profiles merged into it."));
40  
41  namespace llvm {
42  namespace sampleprof {
43  bool FunctionSamples::ProfileIsProbeBased = false;
44  bool FunctionSamples::ProfileIsCS = false;
45  bool FunctionSamples::ProfileIsPreInlined = false;
46  bool FunctionSamples::UseMD5 = false;
47  bool FunctionSamples::HasUniqSuffix = true;
48  bool FunctionSamples::ProfileIsFS = false;
49  } // namespace sampleprof
50  } // namespace llvm
51  
52  namespace {
53  
54  // FIXME: This class is only here to support the transition to llvm::Error. It
55  // will be removed once this transition is complete. Clients should prefer to
56  // deal with the Error value directly, rather than converting to error_code.
57  class SampleProfErrorCategoryType : public std::error_category {
58    const char *name() const noexcept override { return "llvm.sampleprof"; }
59  
60    std::string message(int IE) const override {
61      sampleprof_error E = static_cast<sampleprof_error>(IE);
62      switch (E) {
63      case sampleprof_error::success:
64        return "Success";
65      case sampleprof_error::bad_magic:
66        return "Invalid sample profile data (bad magic)";
67      case sampleprof_error::unsupported_version:
68        return "Unsupported sample profile format version";
69      case sampleprof_error::too_large:
70        return "Too much profile data";
71      case sampleprof_error::truncated:
72        return "Truncated profile data";
73      case sampleprof_error::malformed:
74        return "Malformed sample profile data";
75      case sampleprof_error::unrecognized_format:
76        return "Unrecognized sample profile encoding format";
77      case sampleprof_error::unsupported_writing_format:
78        return "Profile encoding format unsupported for writing operations";
79      case sampleprof_error::truncated_name_table:
80        return "Truncated function name table";
81      case sampleprof_error::not_implemented:
82        return "Unimplemented feature";
83      case sampleprof_error::counter_overflow:
84        return "Counter overflow";
85      case sampleprof_error::ostream_seek_unsupported:
86        return "Ostream does not support seek";
87      case sampleprof_error::uncompress_failed:
88        return "Uncompress failure";
89      case sampleprof_error::zlib_unavailable:
90        return "Zlib is unavailable";
91      case sampleprof_error::hash_mismatch:
92        return "Function hash mismatch";
93      }
94      llvm_unreachable("A value of sampleprof_error has no message.");
95    }
96  };
97  
98  } // end anonymous namespace
99  
100  const std::error_category &llvm::sampleprof_category() {
101    static SampleProfErrorCategoryType ErrorCategory;
102    return ErrorCategory;
103  }
104  
105  void LineLocation::print(raw_ostream &OS) const {
106    OS << LineOffset;
107    if (Discriminator > 0)
108      OS << "." << Discriminator;
109  }
110  
111  raw_ostream &llvm::sampleprof::operator<<(raw_ostream &OS,
112                                            const LineLocation &Loc) {
113    Loc.print(OS);
114    return OS;
115  }
116  
117  /// Merge the samples in \p Other into this record.
118  /// Optionally scale sample counts by \p Weight.
119  sampleprof_error SampleRecord::merge(const SampleRecord &Other,
120                                       uint64_t Weight) {
121    sampleprof_error Result;
122    Result = addSamples(Other.getSamples(), Weight);
123    for (const auto &I : Other.getCallTargets()) {
124      mergeSampleProfErrors(Result, addCalledTarget(I.first, I.second, Weight));
125    }
126    return Result;
127  }
128  
129  #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
130  LLVM_DUMP_METHOD void LineLocation::dump() const { print(dbgs()); }
131  #endif
132  
133  /// Print the sample record to the stream \p OS indented by \p Indent.
134  void SampleRecord::print(raw_ostream &OS, unsigned Indent) const {
135    OS << NumSamples;
136    if (hasCalls()) {
137      OS << ", calls:";
138      for (const auto &I : getSortedCallTargets())
139        OS << " " << I.first << ":" << I.second;
140    }
141    OS << "\n";
142  }
143  
144  #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
145  LLVM_DUMP_METHOD void SampleRecord::dump() const { print(dbgs(), 0); }
146  #endif
147  
148  raw_ostream &llvm::sampleprof::operator<<(raw_ostream &OS,
149                                            const SampleRecord &Sample) {
150    Sample.print(OS, 0);
151    return OS;
152  }
153  
154  /// Print the samples collected for a function on stream \p OS.
155  void FunctionSamples::print(raw_ostream &OS, unsigned Indent) const {
156    if (getFunctionHash())
157      OS << "CFG checksum " << getFunctionHash() << "\n";
158  
159    OS << TotalSamples << ", " << TotalHeadSamples << ", " << BodySamples.size()
160       << " sampled lines\n";
161  
162    OS.indent(Indent);
163    if (!BodySamples.empty()) {
164      OS << "Samples collected in the function's body {\n";
165      SampleSorter<LineLocation, SampleRecord> SortedBodySamples(BodySamples);
166      for (const auto &SI : SortedBodySamples.get()) {
167        OS.indent(Indent + 2);
168        OS << SI->first << ": " << SI->second;
169      }
170      OS.indent(Indent);
171      OS << "}\n";
172    } else {
173      OS << "No samples collected in the function's body\n";
174    }
175  
176    OS.indent(Indent);
177    if (!CallsiteSamples.empty()) {
178      OS << "Samples collected in inlined callsites {\n";
179      SampleSorter<LineLocation, FunctionSamplesMap> SortedCallsiteSamples(
180          CallsiteSamples);
181      for (const auto &CS : SortedCallsiteSamples.get()) {
182        for (const auto &FS : CS->second) {
183          OS.indent(Indent + 2);
184          OS << CS->first << ": inlined callee: " << FS.second.getFunction()
185             << ": ";
186          FS.second.print(OS, Indent + 4);
187        }
188      }
189      OS.indent(Indent);
190      OS << "}\n";
191    } else {
192      OS << "No inlined callsites in this function\n";
193    }
194  }
195  
196  raw_ostream &llvm::sampleprof::operator<<(raw_ostream &OS,
197                                            const FunctionSamples &FS) {
198    FS.print(OS);
199    return OS;
200  }
201  
202  void sampleprof::sortFuncProfiles(
203      const SampleProfileMap &ProfileMap,
204      std::vector<NameFunctionSamples> &SortedProfiles) {
205    for (const auto &I : ProfileMap) {
206      SortedProfiles.push_back(std::make_pair(I.first, &I.second));
207    }
208    llvm::stable_sort(SortedProfiles, [](const NameFunctionSamples &A,
209                                         const NameFunctionSamples &B) {
210      if (A.second->getTotalSamples() == B.second->getTotalSamples())
211        return A.second->getContext() < B.second->getContext();
212      return A.second->getTotalSamples() > B.second->getTotalSamples();
213    });
214  }
215  
216  unsigned FunctionSamples::getOffset(const DILocation *DIL) {
217    return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
218        0xffff;
219  }
220  
221  LineLocation FunctionSamples::getCallSiteIdentifier(const DILocation *DIL,
222                                                      bool ProfileIsFS) {
223    if (FunctionSamples::ProfileIsProbeBased) {
224      // In a pseudo-probe based profile, a callsite is simply represented by the
225      // ID of the probe associated with the call instruction. The probe ID is
226      // encoded in the Discriminator field of the call instruction's debug
227      // metadata.
228      return LineLocation(PseudoProbeDwarfDiscriminator::extractProbeIndex(
229                              DIL->getDiscriminator()),
230                          0);
231    } else {
232      unsigned Discriminator =
233          ProfileIsFS ? DIL->getDiscriminator() : DIL->getBaseDiscriminator();
234      return LineLocation(FunctionSamples::getOffset(DIL), Discriminator);
235    }
236  }
237  
238  const FunctionSamples *FunctionSamples::findFunctionSamples(
239      const DILocation *DIL, SampleProfileReaderItaniumRemapper *Remapper,
240      const HashKeyMap<std::unordered_map, FunctionId, FunctionId>
241          *FuncNameToProfNameMap) const {
242    assert(DIL);
243    SmallVector<std::pair<LineLocation, StringRef>, 10> S;
244  
245    const DILocation *PrevDIL = DIL;
246    for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) {
247      // Use C++ linkage name if possible.
248      StringRef Name = PrevDIL->getScope()->getSubprogram()->getLinkageName();
249      if (Name.empty())
250        Name = PrevDIL->getScope()->getSubprogram()->getName();
251      S.emplace_back(FunctionSamples::getCallSiteIdentifier(
252                         DIL, FunctionSamples::ProfileIsFS),
253                     Name);
254      PrevDIL = DIL;
255    }
256  
257    if (S.size() == 0)
258      return this;
259    const FunctionSamples *FS = this;
260    for (int i = S.size() - 1; i >= 0 && FS != nullptr; i--) {
261      FS = FS->findFunctionSamplesAt(S[i].first, S[i].second, Remapper,
262                                     FuncNameToProfNameMap);
263    }
264    return FS;
265  }
266  
267  void FunctionSamples::findAllNames(DenseSet<FunctionId> &NameSet) const {
268    NameSet.insert(getFunction());
269    for (const auto &BS : BodySamples)
270      for (const auto &TS : BS.second.getCallTargets())
271        NameSet.insert(TS.first);
272  
273    for (const auto &CS : CallsiteSamples) {
274      for (const auto &NameFS : CS.second) {
275        NameSet.insert(NameFS.first);
276        NameFS.second.findAllNames(NameSet);
277      }
278    }
279  }
280  
281  const FunctionSamples *FunctionSamples::findFunctionSamplesAt(
282      const LineLocation &Loc, StringRef CalleeName,
283      SampleProfileReaderItaniumRemapper *Remapper,
284      const HashKeyMap<std::unordered_map, FunctionId, FunctionId>
285          *FuncNameToProfNameMap) const {
286    CalleeName = getCanonicalFnName(CalleeName);
287  
288    auto I = CallsiteSamples.find(mapIRLocToProfileLoc(Loc));
289    if (I == CallsiteSamples.end())
290      return nullptr;
291    auto FS = I->second.find(getRepInFormat(CalleeName));
292    if (FS != I->second.end())
293      return &FS->second;
294  
295    if (FuncNameToProfNameMap && !FuncNameToProfNameMap->empty()) {
296      auto R = FuncNameToProfNameMap->find(FunctionId(CalleeName));
297      if (R != FuncNameToProfNameMap->end()) {
298        CalleeName = R->second.stringRef();
299        auto FS = I->second.find(getRepInFormat(CalleeName));
300        if (FS != I->second.end())
301          return &FS->second;
302      }
303    }
304  
305    if (Remapper) {
306      if (auto NameInProfile = Remapper->lookUpNameInProfile(CalleeName)) {
307        auto FS = I->second.find(getRepInFormat(*NameInProfile));
308        if (FS != I->second.end())
309          return &FS->second;
310      }
311    }
312    // If we cannot find exact match of the callee name, return the FS with
313    // the max total count. Only do this when CalleeName is not provided,
314    // i.e., only for indirect calls.
315    if (!CalleeName.empty())
316      return nullptr;
317    uint64_t MaxTotalSamples = 0;
318    const FunctionSamples *R = nullptr;
319    for (const auto &NameFS : I->second)
320      if (NameFS.second.getTotalSamples() >= MaxTotalSamples) {
321        MaxTotalSamples = NameFS.second.getTotalSamples();
322        R = &NameFS.second;
323      }
324    return R;
325  }
326  
327  #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
328  LLVM_DUMP_METHOD void FunctionSamples::dump() const { print(dbgs(), 0); }
329  #endif
330  
331  std::error_code ProfileSymbolList::read(const uint8_t *Data,
332                                          uint64_t ListSize) {
333    const char *ListStart = reinterpret_cast<const char *>(Data);
334    uint64_t Size = 0;
335    uint64_t StrNum = 0;
336    while (Size < ListSize && StrNum < ProfileSymbolListCutOff) {
337      StringRef Str(ListStart + Size);
338      add(Str);
339      Size += Str.size() + 1;
340      StrNum++;
341    }
342    if (Size != ListSize && StrNum != ProfileSymbolListCutOff)
343      return sampleprof_error::malformed;
344    return sampleprof_error::success;
345  }
346  
347  void SampleContextTrimmer::trimAndMergeColdContextProfiles(
348      uint64_t ColdCountThreshold, bool TrimColdContext, bool MergeColdContext,
349      uint32_t ColdContextFrameLength, bool TrimBaseProfileOnly) {
350    if (!TrimColdContext && !MergeColdContext)
351      return;
352  
353    // Nothing to merge if sample threshold is zero
354    if (ColdCountThreshold == 0)
355      return;
356  
357    // Trimming base profiles only is mainly to honor the preinliner decsion. When
358    // MergeColdContext is true preinliner decsion is not honored anyway so turn
359    // off TrimBaseProfileOnly.
360    if (MergeColdContext)
361      TrimBaseProfileOnly = false;
362  
363    // Filter the cold profiles from ProfileMap and move them into a tmp
364    // container
365    std::vector<std::pair<hash_code, const FunctionSamples *>> ColdProfiles;
366    for (const auto &I : ProfileMap) {
367      const SampleContext &Context = I.second.getContext();
368      const FunctionSamples &FunctionProfile = I.second;
369      if (FunctionProfile.getTotalSamples() < ColdCountThreshold &&
370          (!TrimBaseProfileOnly || Context.isBaseContext()))
371        ColdProfiles.emplace_back(I.first, &I.second);
372    }
373  
374    // Remove the cold profile from ProfileMap and merge them into
375    // MergedProfileMap by the last K frames of context
376    SampleProfileMap MergedProfileMap;
377    for (const auto &I : ColdProfiles) {
378      if (MergeColdContext) {
379        auto MergedContext = I.second->getContext().getContextFrames();
380        if (ColdContextFrameLength < MergedContext.size())
381          MergedContext = MergedContext.take_back(ColdContextFrameLength);
382        // Need to set MergedProfile's context here otherwise it will be lost.
383        FunctionSamples &MergedProfile = MergedProfileMap.create(MergedContext);
384        MergedProfile.merge(*I.second);
385      }
386      ProfileMap.erase(I.first);
387    }
388  
389    // Move the merged profiles into ProfileMap;
390    for (const auto &I : MergedProfileMap) {
391      // Filter the cold merged profile
392      if (TrimColdContext && I.second.getTotalSamples() < ColdCountThreshold &&
393          ProfileMap.find(I.second.getContext()) == ProfileMap.end())
394        continue;
395      // Merge the profile if the original profile exists, otherwise just insert
396      // as a new profile. If inserted as a new profile from MergedProfileMap, it
397      // already has the right context.
398      auto Ret = ProfileMap.emplace(I.second.getContext(), FunctionSamples());
399      FunctionSamples &OrigProfile = Ret.first->second;
400      OrigProfile.merge(I.second);
401    }
402  }
403  
404  std::error_code ProfileSymbolList::write(raw_ostream &OS) {
405    // Sort the symbols before output. If doing compression.
406    // It will make the compression much more effective.
407    std::vector<StringRef> SortedList(Syms.begin(), Syms.end());
408    llvm::sort(SortedList);
409  
410    std::string OutputString;
411    for (auto &Sym : SortedList) {
412      OutputString.append(Sym.str());
413      OutputString.append(1, '\0');
414    }
415  
416    OS << OutputString;
417    return sampleprof_error::success;
418  }
419  
420  void ProfileSymbolList::dump(raw_ostream &OS) const {
421    OS << "======== Dump profile symbol list ========\n";
422    std::vector<StringRef> SortedList(Syms.begin(), Syms.end());
423    llvm::sort(SortedList);
424  
425    for (auto &Sym : SortedList)
426      OS << Sym << "\n";
427  }
428  
429  ProfileConverter::FrameNode *
430  ProfileConverter::FrameNode::getOrCreateChildFrame(const LineLocation &CallSite,
431                                                     FunctionId CalleeName) {
432    uint64_t Hash = FunctionSamples::getCallSiteHash(CalleeName, CallSite);
433    auto It = AllChildFrames.find(Hash);
434    if (It != AllChildFrames.end()) {
435      assert(It->second.FuncName == CalleeName &&
436             "Hash collision for child context node");
437      return &It->second;
438    }
439  
440    AllChildFrames[Hash] = FrameNode(CalleeName, nullptr, CallSite);
441    return &AllChildFrames[Hash];
442  }
443  
444  ProfileConverter::ProfileConverter(SampleProfileMap &Profiles)
445      : ProfileMap(Profiles) {
446    for (auto &FuncSample : Profiles) {
447      FunctionSamples *FSamples = &FuncSample.second;
448      auto *NewNode = getOrCreateContextPath(FSamples->getContext());
449      assert(!NewNode->FuncSamples && "New node cannot have sample profile");
450      NewNode->FuncSamples = FSamples;
451    }
452  }
453  
454  ProfileConverter::FrameNode *
455  ProfileConverter::getOrCreateContextPath(const SampleContext &Context) {
456    auto Node = &RootFrame;
457    LineLocation CallSiteLoc(0, 0);
458    for (auto &Callsite : Context.getContextFrames()) {
459      Node = Node->getOrCreateChildFrame(CallSiteLoc, Callsite.Func);
460      CallSiteLoc = Callsite.Location;
461    }
462    return Node;
463  }
464  
465  void ProfileConverter::convertCSProfiles(ProfileConverter::FrameNode &Node) {
466    // Process each child profile. Add each child profile to callsite profile map
467    // of the current node `Node` if `Node` comes with a profile. Otherwise
468    // promote the child profile to a standalone profile.
469    auto *NodeProfile = Node.FuncSamples;
470    for (auto &It : Node.AllChildFrames) {
471      auto &ChildNode = It.second;
472      convertCSProfiles(ChildNode);
473      auto *ChildProfile = ChildNode.FuncSamples;
474      if (!ChildProfile)
475        continue;
476      SampleContext OrigChildContext = ChildProfile->getContext();
477      uint64_t OrigChildContextHash = OrigChildContext.getHashCode();
478      // Reset the child context to be contextless.
479      ChildProfile->getContext().setFunction(OrigChildContext.getFunction());
480      if (NodeProfile) {
481        // Add child profile to the callsite profile map.
482        auto &SamplesMap = NodeProfile->functionSamplesAt(ChildNode.CallSiteLoc);
483        SamplesMap.emplace(OrigChildContext.getFunction(), *ChildProfile);
484        NodeProfile->addTotalSamples(ChildProfile->getTotalSamples());
485        // Remove the corresponding body sample for the callsite and update the
486        // total weight.
487        auto Count = NodeProfile->removeCalledTargetAndBodySample(
488            ChildNode.CallSiteLoc.LineOffset, ChildNode.CallSiteLoc.Discriminator,
489            OrigChildContext.getFunction());
490        NodeProfile->removeTotalSamples(Count);
491      }
492  
493      uint64_t NewChildProfileHash = 0;
494      // Separate child profile to be a standalone profile, if the current parent
495      // profile doesn't exist. This is a duplicating operation when the child
496      // profile is already incorporated into the parent which is still useful and
497      // thus done optionally. It is seen that duplicating context profiles into
498      // base profiles improves the code quality for thinlto build by allowing a
499      // profile in the prelink phase for to-be-fully-inlined functions.
500      if (!NodeProfile) {
501        ProfileMap[ChildProfile->getContext()].merge(*ChildProfile);
502        NewChildProfileHash = ChildProfile->getContext().getHashCode();
503      } else if (GenerateMergedBaseProfiles) {
504        ProfileMap[ChildProfile->getContext()].merge(*ChildProfile);
505        NewChildProfileHash = ChildProfile->getContext().getHashCode();
506        auto &SamplesMap = NodeProfile->functionSamplesAt(ChildNode.CallSiteLoc);
507        SamplesMap[ChildProfile->getFunction()].getContext().setAttribute(
508            ContextDuplicatedIntoBase);
509      }
510  
511      // Remove the original child profile. Check if MD5 of new child profile
512      // collides with old profile, in this case the [] operator already
513      // overwritten it without the need of erase.
514      if (NewChildProfileHash != OrigChildContextHash)
515        ProfileMap.erase(OrigChildContextHash);
516    }
517  }
518  
519  void ProfileConverter::convertCSProfiles() { convertCSProfiles(RootFrame); }
520