xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1  //===- SampleProfileMatcher.cpp - Sampling-based Stale Profile Matcher ----===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  //
9  // This file implements the SampleProfileMatcher used for stale
10  // profile matching.
11  //
12  //===----------------------------------------------------------------------===//
13  
14  #include "llvm/Transforms/IPO/SampleProfileMatcher.h"
15  #include "llvm/IR/IntrinsicInst.h"
16  #include "llvm/IR/MDBuilder.h"
17  #include "llvm/Support/CommandLine.h"
18  
19  using namespace llvm;
20  using namespace sampleprof;
21  
22  #define DEBUG_TYPE "sample-profile-matcher"
23  
24  static cl::opt<unsigned> FuncProfileSimilarityThreshold(
25      "func-profile-similarity-threshold", cl::Hidden, cl::init(80),
26      cl::desc("Consider a profile matches a function if the similarity of their "
27               "callee sequences is above the specified percentile."));
28  
29  static cl::opt<unsigned> MinFuncCountForCGMatching(
30      "min-func-count-for-cg-matching", cl::Hidden, cl::init(5),
31      cl::desc("The minimum number of basic blocks required for a function to "
32               "run stale profile call graph matching."));
33  
34  static cl::opt<unsigned> MinCallCountForCGMatching(
35      "min-call-count-for-cg-matching", cl::Hidden, cl::init(3),
36      cl::desc("The minimum number of call anchors required for a function to "
37               "run stale profile call graph matching."));
38  
39  extern cl::opt<bool> SalvageStaleProfile;
40  extern cl::opt<bool> SalvageUnusedProfile;
41  extern cl::opt<bool> PersistProfileStaleness;
42  extern cl::opt<bool> ReportProfileStaleness;
43  
44  static cl::opt<unsigned> SalvageStaleProfileMaxCallsites(
45      "salvage-stale-profile-max-callsites", cl::Hidden, cl::init(UINT_MAX),
46      cl::desc("The maximum number of callsites in a function, above which stale "
47               "profile matching will be skipped."));
48  
findIRAnchors(const Function & F,AnchorMap & IRAnchors) const49  void SampleProfileMatcher::findIRAnchors(const Function &F,
50                                           AnchorMap &IRAnchors) const {
51    // For inlined code, recover the original callsite and callee by finding the
52    // top-level inline frame. e.g. For frame stack "main:1 @ foo:2 @ bar:3", the
53    // top-level frame is "main:1", the callsite is "1" and the callee is "foo".
54    auto FindTopLevelInlinedCallsite = [](const DILocation *DIL) {
55      assert((DIL && DIL->getInlinedAt()) && "No inlined callsite");
56      const DILocation *PrevDIL = nullptr;
57      do {
58        PrevDIL = DIL;
59        DIL = DIL->getInlinedAt();
60      } while (DIL->getInlinedAt());
61  
62      LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(
63          DIL, FunctionSamples::ProfileIsFS);
64      StringRef CalleeName = PrevDIL->getSubprogramLinkageName();
65      return std::make_pair(Callsite, FunctionId(CalleeName));
66    };
67  
68    auto GetCanonicalCalleeName = [](const CallBase *CB) {
69      StringRef CalleeName = UnknownIndirectCallee;
70      if (Function *Callee = CB->getCalledFunction())
71        CalleeName = FunctionSamples::getCanonicalFnName(Callee->getName());
72      return CalleeName;
73    };
74  
75    // Extract profile matching anchors in the IR.
76    for (auto &BB : F) {
77      for (auto &I : BB) {
78        DILocation *DIL = I.getDebugLoc();
79        if (!DIL)
80          continue;
81  
82        if (FunctionSamples::ProfileIsProbeBased) {
83          if (auto Probe = extractProbe(I)) {
84            // Flatten inlined IR for the matching.
85            if (DIL->getInlinedAt()) {
86              IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
87            } else {
88              // Use empty StringRef for basic block probe.
89              StringRef CalleeName;
90              if (const auto *CB = dyn_cast<CallBase>(&I)) {
91                // Skip the probe inst whose callee name is "llvm.pseudoprobe".
92                if (!isa<IntrinsicInst>(&I))
93                  CalleeName = GetCanonicalCalleeName(CB);
94              }
95              LineLocation Loc = LineLocation(Probe->Id, 0);
96              IRAnchors.emplace(Loc, FunctionId(CalleeName));
97            }
98          }
99        } else {
100          // TODO: For line-number based profile(AutoFDO), currently only support
101          // find callsite anchors. In future, we need to parse all the non-call
102          // instructions to extract the line locations for profile matching.
103          if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I))
104            continue;
105  
106          if (DIL->getInlinedAt()) {
107            IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
108          } else {
109            LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(
110                DIL, FunctionSamples::ProfileIsFS);
111            StringRef CalleeName = GetCanonicalCalleeName(dyn_cast<CallBase>(&I));
112            IRAnchors.emplace(Callsite, FunctionId(CalleeName));
113          }
114        }
115      }
116    }
117  }
118  
findProfileAnchors(const FunctionSamples & FS,AnchorMap & ProfileAnchors) const119  void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS,
120                                                AnchorMap &ProfileAnchors) const {
121    auto isInvalidLineOffset = [](uint32_t LineOffset) {
122      return LineOffset & 0x8000;
123    };
124  
125    auto InsertAnchor = [](const LineLocation &Loc, const FunctionId &CalleeName,
126                           AnchorMap &ProfileAnchors) {
127      auto Ret = ProfileAnchors.try_emplace(Loc, CalleeName);
128      if (!Ret.second) {
129        // For multiple callees, which indicates it's an indirect call, we use a
130        // dummy name(UnknownIndirectCallee) as the indrect callee name.
131        Ret.first->second = FunctionId(UnknownIndirectCallee);
132      }
133    };
134  
135    for (const auto &I : FS.getBodySamples()) {
136      const LineLocation &Loc = I.first;
137      if (isInvalidLineOffset(Loc.LineOffset))
138        continue;
139      for (const auto &C : I.second.getCallTargets())
140        InsertAnchor(Loc, C.first, ProfileAnchors);
141    }
142  
143    for (const auto &I : FS.getCallsiteSamples()) {
144      const LineLocation &Loc = I.first;
145      if (isInvalidLineOffset(Loc.LineOffset))
146        continue;
147      for (const auto &C : I.second)
148        InsertAnchor(Loc, C.first, ProfileAnchors);
149    }
150  }
151  
functionHasProfile(const FunctionId & IRFuncName,Function * & FuncWithoutProfile)152  bool SampleProfileMatcher::functionHasProfile(const FunctionId &IRFuncName,
153                                                Function *&FuncWithoutProfile) {
154    FuncWithoutProfile = nullptr;
155    auto R = FunctionsWithoutProfile.find(IRFuncName);
156    if (R != FunctionsWithoutProfile.end())
157      FuncWithoutProfile = R->second;
158    return !FuncWithoutProfile;
159  }
160  
isProfileUnused(const FunctionId & ProfileFuncName)161  bool SampleProfileMatcher::isProfileUnused(const FunctionId &ProfileFuncName) {
162    return SymbolMap->find(ProfileFuncName) == SymbolMap->end();
163  }
164  
functionMatchesProfile(const FunctionId & IRFuncName,const FunctionId & ProfileFuncName,bool FindMatchedProfileOnly)165  bool SampleProfileMatcher::functionMatchesProfile(
166      const FunctionId &IRFuncName, const FunctionId &ProfileFuncName,
167      bool FindMatchedProfileOnly) {
168    if (IRFuncName == ProfileFuncName)
169      return true;
170    if (!SalvageUnusedProfile)
171      return false;
172  
173    // If IR function doesn't have profile and the profile is unused, try
174    // matching them.
175    Function *IRFunc = nullptr;
176    if (functionHasProfile(IRFuncName, IRFunc) ||
177        !isProfileUnused(ProfileFuncName))
178      return false;
179  
180    assert(FunctionId(IRFunc->getName()) != ProfileFuncName &&
181           "IR function should be different from profile function to match");
182    return functionMatchesProfile(*IRFunc, ProfileFuncName,
183                                  FindMatchedProfileOnly);
184  }
185  
186  LocToLocMap
longestCommonSequence(const AnchorList & AnchorList1,const AnchorList & AnchorList2,bool MatchUnusedFunction)187  SampleProfileMatcher::longestCommonSequence(const AnchorList &AnchorList1,
188                                              const AnchorList &AnchorList2,
189                                              bool MatchUnusedFunction) {
190    int32_t Size1 = AnchorList1.size(), Size2 = AnchorList2.size(),
191            MaxDepth = Size1 + Size2;
192    auto Index = [&](int32_t I) { return I + MaxDepth; };
193  
194    LocToLocMap EqualLocations;
195    if (MaxDepth == 0)
196      return EqualLocations;
197  
198    // Backtrack the SES result.
199    auto Backtrack = [&](const std::vector<std::vector<int32_t>> &Trace,
200                         const AnchorList &AnchorList1,
201                         const AnchorList &AnchorList2,
202                         LocToLocMap &EqualLocations) {
203      int32_t X = Size1, Y = Size2;
204      for (int32_t Depth = Trace.size() - 1; X > 0 || Y > 0; Depth--) {
205        const auto &P = Trace[Depth];
206        int32_t K = X - Y;
207        int32_t PrevK = K;
208        if (K == -Depth || (K != Depth && P[Index(K - 1)] < P[Index(K + 1)]))
209          PrevK = K + 1;
210        else
211          PrevK = K - 1;
212  
213        int32_t PrevX = P[Index(PrevK)];
214        int32_t PrevY = PrevX - PrevK;
215        while (X > PrevX && Y > PrevY) {
216          X--;
217          Y--;
218          EqualLocations.insert({AnchorList1[X].first, AnchorList2[Y].first});
219        }
220  
221        if (Depth == 0)
222          break;
223  
224        if (Y == PrevY)
225          X--;
226        else if (X == PrevX)
227          Y--;
228        X = PrevX;
229        Y = PrevY;
230      }
231    };
232  
233    // The greedy LCS/SES algorithm.
234  
235    // An array contains the endpoints of the furthest reaching D-paths.
236    std::vector<int32_t> V(2 * MaxDepth + 1, -1);
237    V[Index(1)] = 0;
238    // Trace is used to backtrack the SES result.
239    std::vector<std::vector<int32_t>> Trace;
240    for (int32_t Depth = 0; Depth <= MaxDepth; Depth++) {
241      Trace.push_back(V);
242      for (int32_t K = -Depth; K <= Depth; K += 2) {
243        int32_t X = 0, Y = 0;
244        if (K == -Depth || (K != Depth && V[Index(K - 1)] < V[Index(K + 1)]))
245          X = V[Index(K + 1)];
246        else
247          X = V[Index(K - 1)] + 1;
248        Y = X - K;
249        while (X < Size1 && Y < Size2 &&
250               functionMatchesProfile(
251                   AnchorList1[X].second, AnchorList2[Y].second,
252                   !MatchUnusedFunction /* Find matched function only */))
253          X++, Y++;
254  
255        V[Index(K)] = X;
256  
257        if (X >= Size1 && Y >= Size2) {
258          // Length of an SES is D.
259          Backtrack(Trace, AnchorList1, AnchorList2, EqualLocations);
260          return EqualLocations;
261        }
262      }
263    }
264    // Length of an SES is greater than MaxDepth.
265    return EqualLocations;
266  }
267  
matchNonCallsiteLocs(const LocToLocMap & MatchedAnchors,const AnchorMap & IRAnchors,LocToLocMap & IRToProfileLocationMap)268  void SampleProfileMatcher::matchNonCallsiteLocs(
269      const LocToLocMap &MatchedAnchors, const AnchorMap &IRAnchors,
270      LocToLocMap &IRToProfileLocationMap) {
271    auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) {
272      // Skip the unchanged location mapping to save memory.
273      if (From != To)
274        IRToProfileLocationMap.insert({From, To});
275    };
276  
277    // Use function's beginning location as the initial anchor.
278    int32_t LocationDelta = 0;
279    SmallVector<LineLocation> LastMatchedNonAnchors;
280    for (const auto &IR : IRAnchors) {
281      const auto &Loc = IR.first;
282      bool IsMatchedAnchor = false;
283      // Match the anchor location in lexical order.
284      auto R = MatchedAnchors.find(Loc);
285      if (R != MatchedAnchors.end()) {
286        const auto &Candidate = R->second;
287        InsertMatching(Loc, Candidate);
288        LLVM_DEBUG(dbgs() << "Callsite with callee:" << IR.second.stringRef()
289                          << " is matched from " << Loc << " to " << Candidate
290                          << "\n");
291        LocationDelta = Candidate.LineOffset - Loc.LineOffset;
292  
293        // Match backwards for non-anchor locations.
294        // The locations in LastMatchedNonAnchors have been matched forwards
295        // based on the previous anchor, spilt it evenly and overwrite the
296        // second half based on the current anchor.
297        for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2;
298             I < LastMatchedNonAnchors.size(); I++) {
299          const auto &L = LastMatchedNonAnchors[I];
300          uint32_t CandidateLineOffset = L.LineOffset + LocationDelta;
301          LineLocation Candidate(CandidateLineOffset, L.Discriminator);
302          InsertMatching(L, Candidate);
303          LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L
304                            << " to " << Candidate << "\n");
305        }
306  
307        IsMatchedAnchor = true;
308        LastMatchedNonAnchors.clear();
309      }
310  
311      // Match forwards for non-anchor locations.
312      if (!IsMatchedAnchor) {
313        uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta;
314        LineLocation Candidate(CandidateLineOffset, Loc.Discriminator);
315        InsertMatching(Loc, Candidate);
316        LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to "
317                          << Candidate << "\n");
318        LastMatchedNonAnchors.emplace_back(Loc);
319      }
320    }
321  }
322  
323  // Filter the non-call locations from IRAnchors and ProfileAnchors and write
324  // them into a list for random access later.
getFilteredAnchorList(const AnchorMap & IRAnchors,const AnchorMap & ProfileAnchors,AnchorList & FilteredIRAnchorsList,AnchorList & FilteredProfileAnchorList)325  void SampleProfileMatcher::getFilteredAnchorList(
326      const AnchorMap &IRAnchors, const AnchorMap &ProfileAnchors,
327      AnchorList &FilteredIRAnchorsList, AnchorList &FilteredProfileAnchorList) {
328    for (const auto &I : IRAnchors) {
329      if (I.second.stringRef().empty())
330        continue;
331      FilteredIRAnchorsList.emplace_back(I);
332    }
333  
334    for (const auto &I : ProfileAnchors)
335      FilteredProfileAnchorList.emplace_back(I);
336  }
337  
338  // Call target name anchor based profile fuzzy matching.
339  // Input:
340  // For IR locations, the anchor is the callee name of direct callsite; For
341  // profile locations, it's the call target name for BodySamples or inlinee's
342  // profile name for CallsiteSamples.
343  // Matching heuristic:
344  // First match all the anchors using the diff algorithm, then split the
345  // non-anchor locations between the two anchors evenly, first half are matched
346  // based on the start anchor, second half are matched based on the end anchor.
347  // For example, given:
348  // IR locations:      [1, 2(foo), 3, 5, 6(bar), 7]
349  // Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9]
350  // The matching gives:
351  //   [1,    2(foo), 3,  5,  6(bar), 7]
352  //    |     |       |   |     |     |
353  //   [1, 2, 3(foo), 4,  7,  8(bar), 9]
354  // The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9].
runStaleProfileMatching(const Function & F,const AnchorMap & IRAnchors,const AnchorMap & ProfileAnchors,LocToLocMap & IRToProfileLocationMap,bool RunCFGMatching,bool RunCGMatching)355  void SampleProfileMatcher::runStaleProfileMatching(
356      const Function &F, const AnchorMap &IRAnchors,
357      const AnchorMap &ProfileAnchors, LocToLocMap &IRToProfileLocationMap,
358      bool RunCFGMatching, bool RunCGMatching) {
359    if (!RunCFGMatching && !RunCGMatching)
360      return;
361    LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName()
362                      << "\n");
363    assert(IRToProfileLocationMap.empty() &&
364           "Run stale profile matching only once per function");
365  
366    AnchorList FilteredProfileAnchorList;
367    AnchorList FilteredIRAnchorsList;
368    getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
369                          FilteredProfileAnchorList);
370  
371    if (FilteredIRAnchorsList.empty() || FilteredProfileAnchorList.empty())
372      return;
373  
374    if (FilteredIRAnchorsList.size() > SalvageStaleProfileMaxCallsites ||
375        FilteredProfileAnchorList.size() > SalvageStaleProfileMaxCallsites) {
376      LLVM_DEBUG(dbgs() << "Skip stale profile matching for " << F.getName()
377                        << " because the number of callsites in the IR is "
378                        << FilteredIRAnchorsList.size()
379                        << " and in the profile is "
380                        << FilteredProfileAnchorList.size() << "\n");
381      return;
382    }
383  
384    // Match the callsite anchors by finding the longest common subsequence
385    // between IR and profile.
386    // Define a match between two anchors as follows:
387    // 1) The function names of anchors are the same.
388    // 2) The similarity between the anchor functions is above a threshold if
389    // RunCGMatching is set.
390    // For 2), we only consider the anchor functions from IR and profile don't
391    // appear on either side to reduce the matching scope. Note that we need to
392    // use IR anchor as base(A side) to align with the order of
393    // IRToProfileLocationMap.
394    LocToLocMap MatchedAnchors =
395        longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList,
396                              RunCGMatching /* Match unused functions */);
397  
398    // CFG level matching:
399    // Apply the callsite matchings to infer matching for the basic
400    // block(non-callsite) locations and write the result to
401    // IRToProfileLocationMap.
402    if (RunCFGMatching)
403      matchNonCallsiteLocs(MatchedAnchors, IRAnchors, IRToProfileLocationMap);
404  }
405  
runOnFunction(Function & F)406  void SampleProfileMatcher::runOnFunction(Function &F) {
407    // We need to use flattened function samples for matching.
408    // Unlike IR, which includes all callsites from the source code, the callsites
409    // in profile only show up when they are hit by samples, i,e. the profile
410    // callsites in one context may differ from those in another context. To get
411    // the maximum number of callsites, we merge the function profiles from all
412    // contexts, aka, the flattened profile to find profile anchors.
413    const auto *FSFlattened = getFlattenedSamplesFor(F);
414    if (SalvageUnusedProfile && !FSFlattened) {
415      // Apply the matching in place to find the new function's matched profile.
416      // TODO: For extended profile format, if a function profile is unused and
417      // it's top-level, even if the profile is matched, it's not found in the
418      // profile. This is because sample reader only read the used profile at the
419      // beginning, we need to support loading the profile on-demand in future.
420      auto R = FuncToProfileNameMap.find(&F);
421      if (R != FuncToProfileNameMap.end())
422        FSFlattened = getFlattenedSamplesFor(R->second);
423    }
424    if (!FSFlattened)
425      return;
426  
427    // Anchors for IR. It's a map from IR location to callee name, callee name is
428    // empty for non-call instruction and use a dummy name(UnknownIndirectCallee)
429    // for unknown indrect callee name.
430    AnchorMap IRAnchors;
431    findIRAnchors(F, IRAnchors);
432    // Anchors for profile. It's a map from callsite location to a set of callee
433    // name.
434    AnchorMap ProfileAnchors;
435    findProfileAnchors(*FSFlattened, ProfileAnchors);
436  
437    // Compute the callsite match states for profile staleness report.
438    if (ReportProfileStaleness || PersistProfileStaleness)
439      recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, nullptr);
440  
441    if (!SalvageStaleProfile)
442      return;
443    // For probe-based profiles, run matching only when profile checksum is
444    // mismatched.
445    bool ChecksumMismatch = FunctionSamples::ProfileIsProbeBased &&
446                            !ProbeManager->profileIsValid(F, *FSFlattened);
447    bool RunCFGMatching =
448        !FunctionSamples::ProfileIsProbeBased || ChecksumMismatch;
449    bool RunCGMatching = SalvageUnusedProfile;
450    // For imported functions, the checksum metadata(pseudo_probe_desc) are
451    // dropped, so we leverage function attribute(profile-checksum-mismatch) to
452    // transfer the info: add the attribute during pre-link phase and check it
453    // during post-link phase(see "profileIsValid").
454    if (ChecksumMismatch && LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink)
455      F.addFnAttr("profile-checksum-mismatch");
456  
457    // The matching result will be saved to IRToProfileLocationMap, create a
458    // new map for each function.
459    auto &IRToProfileLocationMap = getIRToProfileLocationMap(F);
460    runStaleProfileMatching(F, IRAnchors, ProfileAnchors, IRToProfileLocationMap,
461                            RunCFGMatching, RunCGMatching);
462    // Find and update callsite match states after matching.
463    if (RunCFGMatching && (ReportProfileStaleness || PersistProfileStaleness))
464      recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors,
465                                &IRToProfileLocationMap);
466  }
467  
recordCallsiteMatchStates(const Function & F,const AnchorMap & IRAnchors,const AnchorMap & ProfileAnchors,const LocToLocMap * IRToProfileLocationMap)468  void SampleProfileMatcher::recordCallsiteMatchStates(
469      const Function &F, const AnchorMap &IRAnchors,
470      const AnchorMap &ProfileAnchors,
471      const LocToLocMap *IRToProfileLocationMap) {
472    bool IsPostMatch = IRToProfileLocationMap != nullptr;
473    auto &CallsiteMatchStates =
474        FuncCallsiteMatchStates[FunctionSamples::getCanonicalFnName(F.getName())];
475  
476    auto MapIRLocToProfileLoc = [&](const LineLocation &IRLoc) {
477      // IRToProfileLocationMap is null in pre-match phrase.
478      if (!IRToProfileLocationMap)
479        return IRLoc;
480      const auto &ProfileLoc = IRToProfileLocationMap->find(IRLoc);
481      if (ProfileLoc != IRToProfileLocationMap->end())
482        return ProfileLoc->second;
483      else
484        return IRLoc;
485    };
486  
487    for (const auto &I : IRAnchors) {
488      // After fuzzy profile matching, use the matching result to remap the
489      // current IR callsite.
490      const auto &ProfileLoc = MapIRLocToProfileLoc(I.first);
491      const auto &IRCalleeId = I.second;
492      const auto &It = ProfileAnchors.find(ProfileLoc);
493      if (It == ProfileAnchors.end())
494        continue;
495      const auto &ProfCalleeId = It->second;
496      if (IRCalleeId == ProfCalleeId) {
497        auto It = CallsiteMatchStates.find(ProfileLoc);
498        if (It == CallsiteMatchStates.end())
499          CallsiteMatchStates.emplace(ProfileLoc, MatchState::InitialMatch);
500        else if (IsPostMatch) {
501          if (It->second == MatchState::InitialMatch)
502            It->second = MatchState::UnchangedMatch;
503          else if (It->second == MatchState::InitialMismatch)
504            It->second = MatchState::RecoveredMismatch;
505        }
506      }
507    }
508  
509    // Check if there are any callsites in the profile that does not match to any
510    // IR callsites.
511    for (const auto &I : ProfileAnchors) {
512      const auto &Loc = I.first;
513      assert(!I.second.stringRef().empty() && "Callees should not be empty");
514      auto It = CallsiteMatchStates.find(Loc);
515      if (It == CallsiteMatchStates.end())
516        CallsiteMatchStates.emplace(Loc, MatchState::InitialMismatch);
517      else if (IsPostMatch) {
518        // Update the state if it's not matched(UnchangedMatch or
519        // RecoveredMismatch).
520        if (It->second == MatchState::InitialMismatch)
521          It->second = MatchState::UnchangedMismatch;
522        else if (It->second == MatchState::InitialMatch)
523          It->second = MatchState::RemovedMatch;
524      }
525    }
526  }
527  
countMismatchedFuncSamples(const FunctionSamples & FS,bool IsTopLevel)528  void SampleProfileMatcher::countMismatchedFuncSamples(const FunctionSamples &FS,
529                                                        bool IsTopLevel) {
530    const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID());
531    // Skip the function that is external or renamed.
532    if (!FuncDesc)
533      return;
534  
535    if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) {
536      if (IsTopLevel)
537        NumStaleProfileFunc++;
538      // Given currently all probe ids are after block probe ids, once the
539      // checksum is mismatched, it's likely all the callites are mismatched and
540      // dropped. We conservatively count all the samples as mismatched and stop
541      // counting the inlinees' profiles.
542      MismatchedFunctionSamples += FS.getTotalSamples();
543      return;
544    }
545  
546    // Even the current-level function checksum is matched, it's possible that the
547    // nested inlinees' checksums are mismatched that affect the inlinee's sample
548    // loading, we need to go deeper to check the inlinees' function samples.
549    // Similarly, count all the samples as mismatched if the inlinee's checksum is
550    // mismatched using this recursive function.
551    for (const auto &I : FS.getCallsiteSamples())
552      for (const auto &CS : I.second)
553        countMismatchedFuncSamples(CS.second, false);
554  }
555  
countMismatchedCallsiteSamples(const FunctionSamples & FS)556  void SampleProfileMatcher::countMismatchedCallsiteSamples(
557      const FunctionSamples &FS) {
558    auto It = FuncCallsiteMatchStates.find(FS.getFuncName());
559    // Skip it if no mismatched callsite or this is an external function.
560    if (It == FuncCallsiteMatchStates.end() || It->second.empty())
561      return;
562    const auto &CallsiteMatchStates = It->second;
563  
564    auto findMatchState = [&](const LineLocation &Loc) {
565      auto It = CallsiteMatchStates.find(Loc);
566      if (It == CallsiteMatchStates.end())
567        return MatchState::Unknown;
568      return It->second;
569    };
570  
571    auto AttributeMismatchedSamples = [&](const enum MatchState &State,
572                                          uint64_t Samples) {
573      if (isMismatchState(State))
574        MismatchedCallsiteSamples += Samples;
575      else if (State == MatchState::RecoveredMismatch)
576        RecoveredCallsiteSamples += Samples;
577    };
578  
579    // The non-inlined callsites are saved in the body samples of function
580    // profile, go through it to count the non-inlined callsite samples.
581    for (const auto &I : FS.getBodySamples())
582      AttributeMismatchedSamples(findMatchState(I.first), I.second.getSamples());
583  
584    // Count the inlined callsite samples.
585    for (const auto &I : FS.getCallsiteSamples()) {
586      auto State = findMatchState(I.first);
587      uint64_t CallsiteSamples = 0;
588      for (const auto &CS : I.second)
589        CallsiteSamples += CS.second.getTotalSamples();
590      AttributeMismatchedSamples(State, CallsiteSamples);
591  
592      if (isMismatchState(State))
593        continue;
594  
595      // When the current level of inlined call site matches the profiled call
596      // site, we need to go deeper along the inline tree to count mismatches from
597      // lower level inlinees.
598      for (const auto &CS : I.second)
599        countMismatchedCallsiteSamples(CS.second);
600    }
601  }
602  
countMismatchCallsites(const FunctionSamples & FS)603  void SampleProfileMatcher::countMismatchCallsites(const FunctionSamples &FS) {
604    auto It = FuncCallsiteMatchStates.find(FS.getFuncName());
605    // Skip it if no mismatched callsite or this is an external function.
606    if (It == FuncCallsiteMatchStates.end() || It->second.empty())
607      return;
608    const auto &MatchStates = It->second;
609    [[maybe_unused]] bool OnInitialState =
610        isInitialState(MatchStates.begin()->second);
611    for (const auto &I : MatchStates) {
612      TotalProfiledCallsites++;
613      assert(
614          (OnInitialState ? isInitialState(I.second) : isFinalState(I.second)) &&
615          "Profile matching state is inconsistent");
616  
617      if (isMismatchState(I.second))
618        NumMismatchedCallsites++;
619      else if (I.second == MatchState::RecoveredMismatch)
620        NumRecoveredCallsites++;
621    }
622  }
623  
countCallGraphRecoveredSamples(const FunctionSamples & FS,std::unordered_set<FunctionId> & CallGraphRecoveredProfiles)624  void SampleProfileMatcher::countCallGraphRecoveredSamples(
625      const FunctionSamples &FS,
626      std::unordered_set<FunctionId> &CallGraphRecoveredProfiles) {
627    if (CallGraphRecoveredProfiles.count(FS.getFunction())) {
628      NumCallGraphRecoveredFuncSamples += FS.getTotalSamples();
629      return;
630    }
631  
632    for (const auto &CM : FS.getCallsiteSamples()) {
633      for (const auto &CS : CM.second) {
634        countCallGraphRecoveredSamples(CS.second, CallGraphRecoveredProfiles);
635      }
636    }
637  }
638  
computeAndReportProfileStaleness()639  void SampleProfileMatcher::computeAndReportProfileStaleness() {
640    if (!ReportProfileStaleness && !PersistProfileStaleness)
641      return;
642  
643    std::unordered_set<FunctionId> CallGraphRecoveredProfiles;
644    if (SalvageUnusedProfile) {
645      for (const auto &I : FuncToProfileNameMap) {
646        CallGraphRecoveredProfiles.insert(I.second);
647        if (GlobalValue::isAvailableExternallyLinkage(I.first->getLinkage()))
648          continue;
649        NumCallGraphRecoveredProfiledFunc++;
650      }
651    }
652  
653    // Count profile mismatches for profile staleness report.
654    for (const auto &F : M) {
655      if (skipProfileForFunction(F))
656        continue;
657      // As the stats will be merged by linker, skip reporting the metrics for
658      // imported functions to avoid repeated counting.
659      if (GlobalValue::isAvailableExternallyLinkage(F.getLinkage()))
660        continue;
661      const auto *FS = Reader.getSamplesFor(F);
662      if (!FS)
663        continue;
664      TotalProfiledFunc++;
665      TotalFunctionSamples += FS->getTotalSamples();
666  
667      if (SalvageUnusedProfile && !CallGraphRecoveredProfiles.empty())
668        countCallGraphRecoveredSamples(*FS, CallGraphRecoveredProfiles);
669  
670      // Checksum mismatch is only used in pseudo-probe mode.
671      if (FunctionSamples::ProfileIsProbeBased)
672        countMismatchedFuncSamples(*FS, true);
673  
674      // Count mismatches and samples for calliste.
675      countMismatchCallsites(*FS);
676      countMismatchedCallsiteSamples(*FS);
677    }
678  
679    if (ReportProfileStaleness) {
680      if (FunctionSamples::ProfileIsProbeBased) {
681        errs() << "(" << NumStaleProfileFunc << "/" << TotalProfiledFunc
682               << ") of functions' profile are invalid and ("
683               << MismatchedFunctionSamples << "/" << TotalFunctionSamples
684               << ") of samples are discarded due to function hash mismatch.\n";
685      }
686      if (SalvageUnusedProfile) {
687        errs() << "(" << NumCallGraphRecoveredProfiledFunc << "/"
688               << TotalProfiledFunc << ") of functions' profile are matched and ("
689               << NumCallGraphRecoveredFuncSamples << "/" << TotalFunctionSamples
690               << ") of samples are reused by call graph matching.\n";
691      }
692  
693      errs() << "(" << (NumMismatchedCallsites + NumRecoveredCallsites) << "/"
694             << TotalProfiledCallsites
695             << ") of callsites' profile are invalid and ("
696             << (MismatchedCallsiteSamples + RecoveredCallsiteSamples) << "/"
697             << TotalFunctionSamples
698             << ") of samples are discarded due to callsite location mismatch.\n";
699      errs() << "(" << NumRecoveredCallsites << "/"
700             << (NumRecoveredCallsites + NumMismatchedCallsites)
701             << ") of callsites and (" << RecoveredCallsiteSamples << "/"
702             << (RecoveredCallsiteSamples + MismatchedCallsiteSamples)
703             << ") of samples are recovered by stale profile matching.\n";
704    }
705  
706    if (PersistProfileStaleness) {
707      LLVMContext &Ctx = M.getContext();
708      MDBuilder MDB(Ctx);
709  
710      SmallVector<std::pair<StringRef, uint64_t>> ProfStatsVec;
711      if (FunctionSamples::ProfileIsProbeBased) {
712        ProfStatsVec.emplace_back("NumStaleProfileFunc", NumStaleProfileFunc);
713        ProfStatsVec.emplace_back("TotalProfiledFunc", TotalProfiledFunc);
714        ProfStatsVec.emplace_back("MismatchedFunctionSamples",
715                                  MismatchedFunctionSamples);
716        ProfStatsVec.emplace_back("TotalFunctionSamples", TotalFunctionSamples);
717      }
718  
719      if (SalvageUnusedProfile) {
720        ProfStatsVec.emplace_back("NumCallGraphRecoveredProfiledFunc",
721                                  NumCallGraphRecoveredProfiledFunc);
722        ProfStatsVec.emplace_back("NumCallGraphRecoveredFuncSamples",
723                                  NumCallGraphRecoveredFuncSamples);
724      }
725  
726      ProfStatsVec.emplace_back("NumMismatchedCallsites", NumMismatchedCallsites);
727      ProfStatsVec.emplace_back("NumRecoveredCallsites", NumRecoveredCallsites);
728      ProfStatsVec.emplace_back("TotalProfiledCallsites", TotalProfiledCallsites);
729      ProfStatsVec.emplace_back("MismatchedCallsiteSamples",
730                                MismatchedCallsiteSamples);
731      ProfStatsVec.emplace_back("RecoveredCallsiteSamples",
732                                RecoveredCallsiteSamples);
733  
734      auto *MD = MDB.createLLVMStats(ProfStatsVec);
735      auto *NMD = M.getOrInsertNamedMetadata("llvm.stats");
736      NMD->addOperand(MD);
737    }
738  }
739  
findFunctionsWithoutProfile()740  void SampleProfileMatcher::findFunctionsWithoutProfile() {
741    // TODO: Support MD5 profile.
742    if (FunctionSamples::UseMD5)
743      return;
744    StringSet<> NamesInProfile;
745    if (auto NameTable = Reader.getNameTable()) {
746      for (auto Name : *NameTable)
747        NamesInProfile.insert(Name.stringRef());
748    }
749  
750    for (auto &F : M) {
751      // Skip declarations, as even if the function can be matched, we have
752      // nothing to do with it.
753      if (F.isDeclaration())
754        continue;
755  
756      StringRef CanonFName = FunctionSamples::getCanonicalFnName(F.getName());
757      const auto *FS = getFlattenedSamplesFor(F);
758      if (FS)
759        continue;
760  
761      // For extended binary, functions fully inlined may not be loaded in the
762      // top-level profile, so check the NameTable which has the all symbol names
763      // in profile.
764      if (NamesInProfile.count(CanonFName))
765        continue;
766  
767      // For extended binary, non-profiled function symbols are in the profile
768      // symbol list table.
769      if (PSL && PSL->contains(CanonFName))
770        continue;
771  
772      LLVM_DEBUG(dbgs() << "Function " << CanonFName
773                        << " is not in profile or profile symbol list.\n");
774      FunctionsWithoutProfile[FunctionId(CanonFName)] = &F;
775    }
776  }
777  
functionMatchesProfileHelper(const Function & IRFunc,const FunctionId & ProfFunc)778  bool SampleProfileMatcher::functionMatchesProfileHelper(
779      const Function &IRFunc, const FunctionId &ProfFunc) {
780    // The value is in the range [0, 1]. The bigger the value is, the more similar
781    // two sequences are.
782    float Similarity = 0.0;
783  
784    const auto *FSFlattened = getFlattenedSamplesFor(ProfFunc);
785    if (!FSFlattened)
786      return false;
787    // The check for similarity or checksum may not be reliable if the function is
788    // tiny, we use the number of basic block as a proxy for the function
789    // complexity and skip the matching if it's too small.
790    if (IRFunc.size() < MinFuncCountForCGMatching ||
791        FSFlattened->getBodySamples().size() < MinFuncCountForCGMatching)
792      return false;
793  
794    // For probe-based function, we first trust the checksum info. If the checksum
795    // doesn't match, we continue checking for similarity.
796    if (FunctionSamples::ProfileIsProbeBased) {
797      const auto *FuncDesc = ProbeManager->getDesc(IRFunc);
798      if (FuncDesc &&
799          !ProbeManager->profileIsHashMismatched(*FuncDesc, *FSFlattened)) {
800        LLVM_DEBUG(dbgs() << "The checksums for " << IRFunc.getName()
801                          << "(IR) and " << ProfFunc << "(Profile) match.\n");
802  
803        return true;
804      }
805    }
806  
807    AnchorMap IRAnchors;
808    findIRAnchors(IRFunc, IRAnchors);
809    AnchorMap ProfileAnchors;
810    findProfileAnchors(*FSFlattened, ProfileAnchors);
811  
812    AnchorList FilteredIRAnchorsList;
813    AnchorList FilteredProfileAnchorList;
814    getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
815                          FilteredProfileAnchorList);
816  
817    // Similarly skip the matching if the num of anchors is not enough.
818    if (FilteredIRAnchorsList.size() < MinCallCountForCGMatching ||
819        FilteredProfileAnchorList.size() < MinCallCountForCGMatching)
820      return false;
821  
822    // Use the diff algorithm to find the LCS between IR and profile.
823  
824    // Don't recursively match the callee function to avoid infinite matching,
825    // callee functions will be handled later since it's processed in top-down
826    // order .
827    LocToLocMap MatchedAnchors =
828        longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList,
829                              false /* Match unused functions */);
830  
831    Similarity =
832        static_cast<float>(MatchedAnchors.size()) * 2 /
833        (FilteredIRAnchorsList.size() + FilteredProfileAnchorList.size());
834  
835    LLVM_DEBUG(dbgs() << "The similarity between " << IRFunc.getName()
836                      << "(IR) and " << ProfFunc << "(profile) is "
837                      << format("%.2f", Similarity) << "\n");
838    assert((Similarity >= 0 && Similarity <= 1.0) &&
839           "Similarity value should be in [0, 1]");
840    return Similarity * 100 > FuncProfileSimilarityThreshold;
841  }
842  
843  // If FindMatchedProfileOnly is set to true, only use the processed function
844  // results. This is used for skipping the repeated recursive matching.
functionMatchesProfile(Function & IRFunc,const FunctionId & ProfFunc,bool FindMatchedProfileOnly)845  bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc,
846                                                    const FunctionId &ProfFunc,
847                                                    bool FindMatchedProfileOnly) {
848    auto R = FuncProfileMatchCache.find({&IRFunc, ProfFunc});
849    if (R != FuncProfileMatchCache.end())
850      return R->second;
851  
852    if (FindMatchedProfileOnly)
853      return false;
854  
855    bool Matched = functionMatchesProfileHelper(IRFunc, ProfFunc);
856    FuncProfileMatchCache[{&IRFunc, ProfFunc}] = Matched;
857    if (Matched) {
858      FuncToProfileNameMap[&IRFunc] = ProfFunc;
859      LLVM_DEBUG(dbgs() << "Function:" << IRFunc.getName()
860                        << " matches profile:" << ProfFunc << "\n");
861    }
862  
863    return Matched;
864  }
865  
runOnModule()866  void SampleProfileMatcher::runOnModule() {
867    ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
868                                     FunctionSamples::ProfileIsCS);
869    if (SalvageUnusedProfile)
870      findFunctionsWithoutProfile();
871  
872    // Process the matching in top-down order so that the caller matching result
873    // can be used to the callee matching.
874    std::vector<Function *> TopDownFunctionList;
875    TopDownFunctionList.reserve(M.size());
876    buildTopDownFuncOrder(CG, TopDownFunctionList);
877    for (auto *F : TopDownFunctionList) {
878      if (skipProfileForFunction(*F))
879        continue;
880      runOnFunction(*F);
881    }
882  
883    // Update the data in SampleLoader.
884    if (SalvageUnusedProfile)
885      for (auto &I : FuncToProfileNameMap) {
886        assert(I.first && "New function is null");
887        FunctionId FuncName(I.first->getName());
888        FuncNameToProfNameMap->emplace(FuncName, I.second);
889        // We need to remove the old entry to avoid duplicating the function
890        // processing.
891        SymbolMap->erase(FuncName);
892        SymbolMap->emplace(I.second, I.first);
893      }
894  
895    if (SalvageStaleProfile)
896      distributeIRToProfileLocationMap();
897  
898    computeAndReportProfileStaleness();
899  }
900  
distributeIRToProfileLocationMap(FunctionSamples & FS)901  void SampleProfileMatcher::distributeIRToProfileLocationMap(
902      FunctionSamples &FS) {
903    const auto ProfileMappings = FuncMappings.find(FS.getFuncName());
904    if (ProfileMappings != FuncMappings.end()) {
905      FS.setIRToProfileLocationMap(&(ProfileMappings->second));
906    }
907  
908    for (auto &Callees :
909         const_cast<CallsiteSampleMap &>(FS.getCallsiteSamples())) {
910      for (auto &FS : Callees.second) {
911        distributeIRToProfileLocationMap(FS.second);
912      }
913    }
914  }
915  
916  // Use a central place to distribute the matching results. Outlined and inlined
917  // profile with the function name will be set to the same pointer.
distributeIRToProfileLocationMap()918  void SampleProfileMatcher::distributeIRToProfileLocationMap() {
919    for (auto &I : Reader.getProfiles()) {
920      distributeIRToProfileLocationMap(I.second);
921    }
922  }
923