xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp (revision 770cf0a5f02dc8983a89c6568d741fbc25baa999)
1 //===- SampleProfileMatcher.cpp - Sampling-based Stale Profile Matcher ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SampleProfileMatcher used for stale
10 // profile matching.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/IPO/SampleProfileMatcher.h"
15 #include "llvm/Demangle/Demangle.h"
16 #include "llvm/IR/IntrinsicInst.h"
17 #include "llvm/IR/MDBuilder.h"
18 #include "llvm/Support/CommandLine.h"
19 #include "llvm/Transforms/Utils/LongestCommonSequence.h"
20 
21 using namespace llvm;
22 using namespace sampleprof;
23 
24 #define DEBUG_TYPE "sample-profile-matcher"
25 
26 static cl::opt<unsigned> FuncProfileSimilarityThreshold(
27     "func-profile-similarity-threshold", cl::Hidden, cl::init(80),
28     cl::desc("Consider a profile matches a function if the similarity of their "
29              "callee sequences is above the specified percentile."));
30 
31 static cl::opt<unsigned> MinFuncCountForCGMatching(
32     "min-func-count-for-cg-matching", cl::Hidden, cl::init(5),
33     cl::desc("The minimum number of basic blocks required for a function to "
34              "run stale profile call graph matching."));
35 
36 static cl::opt<unsigned> MinCallCountForCGMatching(
37     "min-call-count-for-cg-matching", cl::Hidden, cl::init(3),
38     cl::desc("The minimum number of call anchors required for a function to "
39              "run stale profile call graph matching."));
40 
41 static cl::opt<bool> LoadFuncProfileforCGMatching(
42     "load-func-profile-for-cg-matching", cl::Hidden, cl::init(true),
43     cl::desc(
44         "Load top-level profiles that the sample reader initially skipped for "
45         "the call-graph matching (only meaningful for extended binary "
46         "format)"));
47 
48 extern cl::opt<bool> SalvageStaleProfile;
49 extern cl::opt<bool> SalvageUnusedProfile;
50 extern cl::opt<bool> PersistProfileStaleness;
51 extern cl::opt<bool> ReportProfileStaleness;
52 
53 static cl::opt<unsigned> SalvageStaleProfileMaxCallsites(
54     "salvage-stale-profile-max-callsites", cl::Hidden, cl::init(UINT_MAX),
55     cl::desc("The maximum number of callsites in a function, above which stale "
56              "profile matching will be skipped."));
57 
58 void SampleProfileMatcher::findIRAnchors(const Function &F,
59                                          AnchorMap &IRAnchors) const {
60   // For inlined code, recover the original callsite and callee by finding the
61   // top-level inline frame. e.g. For frame stack "main:1 @ foo:2 @ bar:3", the
62   // top-level frame is "main:1", the callsite is "1" and the callee is "foo".
63   auto FindTopLevelInlinedCallsite = [](const DILocation *DIL) {
64     assert((DIL && DIL->getInlinedAt()) && "No inlined callsite");
65     const DILocation *PrevDIL = nullptr;
66     do {
67       PrevDIL = DIL;
68       DIL = DIL->getInlinedAt();
69     } while (DIL->getInlinedAt());
70 
71     LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(
72         DIL, FunctionSamples::ProfileIsFS);
73     StringRef CalleeName = PrevDIL->getSubprogramLinkageName();
74     return std::make_pair(Callsite, FunctionId(CalleeName));
75   };
76 
77   auto GetCanonicalCalleeName = [](const CallBase *CB) {
78     StringRef CalleeName = UnknownIndirectCallee;
79     if (Function *Callee = CB->getCalledFunction())
80       CalleeName = FunctionSamples::getCanonicalFnName(Callee->getName());
81     return CalleeName;
82   };
83 
84   // Extract profile matching anchors in the IR.
85   for (auto &BB : F) {
86     for (auto &I : BB) {
87       DILocation *DIL = I.getDebugLoc();
88       if (!DIL)
89         continue;
90 
91       if (FunctionSamples::ProfileIsProbeBased) {
92         if (auto Probe = extractProbe(I)) {
93           // Flatten inlined IR for the matching.
94           if (DIL->getInlinedAt()) {
95             IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
96           } else {
97             // Use empty StringRef for basic block probe.
98             StringRef CalleeName;
99             if (const auto *CB = dyn_cast<CallBase>(&I)) {
100               // Skip the probe inst whose callee name is "llvm.pseudoprobe".
101               if (!isa<IntrinsicInst>(&I))
102                 CalleeName = GetCanonicalCalleeName(CB);
103             }
104             LineLocation Loc = LineLocation(Probe->Id, 0);
105             IRAnchors.emplace(Loc, FunctionId(CalleeName));
106           }
107         }
108       } else {
109         // TODO: For line-number based profile(AutoFDO), currently only support
110         // find callsite anchors. In future, we need to parse all the non-call
111         // instructions to extract the line locations for profile matching.
112         if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I))
113           continue;
114 
115         if (DIL->getInlinedAt()) {
116           IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
117         } else {
118           LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(
119               DIL, FunctionSamples::ProfileIsFS);
120           StringRef CalleeName = GetCanonicalCalleeName(dyn_cast<CallBase>(&I));
121           IRAnchors.emplace(Callsite, FunctionId(CalleeName));
122         }
123       }
124     }
125   }
126 }
127 
128 void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS,
129                                               AnchorMap &ProfileAnchors) const {
130   auto isInvalidLineOffset = [](uint32_t LineOffset) {
131     return LineOffset & 0x8000;
132   };
133 
134   auto InsertAnchor = [](const LineLocation &Loc, const FunctionId &CalleeName,
135                          AnchorMap &ProfileAnchors) {
136     auto Ret = ProfileAnchors.try_emplace(Loc, CalleeName);
137     if (!Ret.second) {
138       // For multiple callees, which indicates it's an indirect call, we use a
139       // dummy name(UnknownIndirectCallee) as the indrect callee name.
140       Ret.first->second = FunctionId(UnknownIndirectCallee);
141     }
142   };
143 
144   for (const auto &I : FS.getBodySamples()) {
145     const LineLocation &Loc = I.first;
146     if (isInvalidLineOffset(Loc.LineOffset))
147       continue;
148     for (const auto &C : I.second.getCallTargets())
149       InsertAnchor(Loc, C.first, ProfileAnchors);
150   }
151 
152   for (const auto &I : FS.getCallsiteSamples()) {
153     const LineLocation &Loc = I.first;
154     if (isInvalidLineOffset(Loc.LineOffset))
155       continue;
156     for (const auto &C : I.second)
157       InsertAnchor(Loc, C.first, ProfileAnchors);
158   }
159 }
160 
161 bool SampleProfileMatcher::functionHasProfile(const FunctionId &IRFuncName,
162                                               Function *&FuncWithoutProfile) {
163   FuncWithoutProfile = nullptr;
164   auto R = FunctionsWithoutProfile.find(IRFuncName);
165   if (R != FunctionsWithoutProfile.end())
166     FuncWithoutProfile = R->second;
167   return !FuncWithoutProfile;
168 }
169 
170 bool SampleProfileMatcher::isProfileUnused(const FunctionId &ProfileFuncName) {
171   return SymbolMap->find(ProfileFuncName) == SymbolMap->end();
172 }
173 
174 bool SampleProfileMatcher::functionMatchesProfile(
175     const FunctionId &IRFuncName, const FunctionId &ProfileFuncName,
176     bool FindMatchedProfileOnly) {
177   if (IRFuncName == ProfileFuncName)
178     return true;
179   if (!SalvageUnusedProfile)
180     return false;
181 
182   // If IR function doesn't have profile and the profile is unused, try
183   // matching them.
184   Function *IRFunc = nullptr;
185   if (functionHasProfile(IRFuncName, IRFunc) ||
186       !isProfileUnused(ProfileFuncName))
187     return false;
188 
189   assert(FunctionId(IRFunc->getName()) != ProfileFuncName &&
190          "IR function should be different from profile function to match");
191   return functionMatchesProfile(*IRFunc, ProfileFuncName,
192                                 FindMatchedProfileOnly);
193 }
194 
195 LocToLocMap
196 SampleProfileMatcher::longestCommonSequence(const AnchorList &AnchorList1,
197                                             const AnchorList &AnchorList2,
198                                             bool MatchUnusedFunction) {
199   LocToLocMap MatchedAnchors;
200   llvm::longestCommonSequence<LineLocation, FunctionId>(
201       AnchorList1, AnchorList2,
202       [&](const FunctionId &A, const FunctionId &B) {
203         return functionMatchesProfile(
204             A, B,
205             !MatchUnusedFunction // Find matched function only
206         );
207       },
208       [&](LineLocation A, LineLocation B) {
209         MatchedAnchors.try_emplace(A, B);
210       });
211   return MatchedAnchors;
212 }
213 
214 void SampleProfileMatcher::matchNonCallsiteLocs(
215     const LocToLocMap &MatchedAnchors, const AnchorMap &IRAnchors,
216     LocToLocMap &IRToProfileLocationMap) {
217   auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) {
218     // Skip the unchanged location mapping to save memory.
219     if (From != To)
220       IRToProfileLocationMap.insert({From, To});
221   };
222 
223   // Use function's beginning location as the initial anchor.
224   int32_t LocationDelta = 0;
225   SmallVector<LineLocation> LastMatchedNonAnchors;
226   for (const auto &IR : IRAnchors) {
227     const auto &Loc = IR.first;
228     bool IsMatchedAnchor = false;
229     // Match the anchor location in lexical order.
230     auto R = MatchedAnchors.find(Loc);
231     if (R != MatchedAnchors.end()) {
232       const auto &Candidate = R->second;
233       InsertMatching(Loc, Candidate);
234       LLVM_DEBUG(dbgs() << "Callsite with callee:" << IR.second.stringRef()
235                         << " is matched from " << Loc << " to " << Candidate
236                         << "\n");
237       LocationDelta = Candidate.LineOffset - Loc.LineOffset;
238 
239       // Match backwards for non-anchor locations.
240       // The locations in LastMatchedNonAnchors have been matched forwards
241       // based on the previous anchor, spilt it evenly and overwrite the
242       // second half based on the current anchor.
243       for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2;
244            I < LastMatchedNonAnchors.size(); I++) {
245         const auto &L = LastMatchedNonAnchors[I];
246         uint32_t CandidateLineOffset = L.LineOffset + LocationDelta;
247         LineLocation Candidate(CandidateLineOffset, L.Discriminator);
248         InsertMatching(L, Candidate);
249         LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L
250                           << " to " << Candidate << "\n");
251       }
252 
253       IsMatchedAnchor = true;
254       LastMatchedNonAnchors.clear();
255     }
256 
257     // Match forwards for non-anchor locations.
258     if (!IsMatchedAnchor) {
259       uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta;
260       LineLocation Candidate(CandidateLineOffset, Loc.Discriminator);
261       InsertMatching(Loc, Candidate);
262       LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to "
263                         << Candidate << "\n");
264       LastMatchedNonAnchors.emplace_back(Loc);
265     }
266   }
267 }
268 
269 // Filter the non-call locations from IRAnchors and ProfileAnchors and write
270 // them into a list for random access later.
271 void SampleProfileMatcher::getFilteredAnchorList(
272     const AnchorMap &IRAnchors, const AnchorMap &ProfileAnchors,
273     AnchorList &FilteredIRAnchorsList, AnchorList &FilteredProfileAnchorList) {
274   for (const auto &I : IRAnchors) {
275     if (I.second.stringRef().empty())
276       continue;
277     FilteredIRAnchorsList.emplace_back(I);
278   }
279 
280   for (const auto &I : ProfileAnchors)
281     FilteredProfileAnchorList.emplace_back(I);
282 }
283 
284 // Call target name anchor based profile fuzzy matching.
285 // Input:
286 // For IR locations, the anchor is the callee name of direct callsite; For
287 // profile locations, it's the call target name for BodySamples or inlinee's
288 // profile name for CallsiteSamples.
289 // Matching heuristic:
290 // First match all the anchors using the diff algorithm, then split the
291 // non-anchor locations between the two anchors evenly, first half are matched
292 // based on the start anchor, second half are matched based on the end anchor.
293 // For example, given:
294 // IR locations:      [1, 2(foo), 3, 5, 6(bar), 7]
295 // Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9]
296 // The matching gives:
297 //   [1,    2(foo), 3,  5,  6(bar), 7]
298 //    |     |       |   |     |     |
299 //   [1, 2, 3(foo), 4,  7,  8(bar), 9]
300 // The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9].
301 void SampleProfileMatcher::runStaleProfileMatching(
302     const Function &F, const AnchorMap &IRAnchors,
303     const AnchorMap &ProfileAnchors, LocToLocMap &IRToProfileLocationMap,
304     bool RunCFGMatching, bool RunCGMatching) {
305   if (!RunCFGMatching && !RunCGMatching)
306     return;
307   LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName()
308                     << "\n");
309   assert(IRToProfileLocationMap.empty() &&
310          "Run stale profile matching only once per function");
311 
312   AnchorList FilteredProfileAnchorList;
313   AnchorList FilteredIRAnchorsList;
314   getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
315                         FilteredProfileAnchorList);
316 
317   if (FilteredIRAnchorsList.empty() || FilteredProfileAnchorList.empty())
318     return;
319 
320   if (FilteredIRAnchorsList.size() > SalvageStaleProfileMaxCallsites ||
321       FilteredProfileAnchorList.size() > SalvageStaleProfileMaxCallsites) {
322     LLVM_DEBUG(dbgs() << "Skip stale profile matching for " << F.getName()
323                       << " because the number of callsites in the IR is "
324                       << FilteredIRAnchorsList.size()
325                       << " and in the profile is "
326                       << FilteredProfileAnchorList.size() << "\n");
327     return;
328   }
329 
330   // Match the callsite anchors by finding the longest common subsequence
331   // between IR and profile.
332   // Define a match between two anchors as follows:
333   // 1) The function names of anchors are the same.
334   // 2) The similarity between the anchor functions is above a threshold if
335   // RunCGMatching is set.
336   // For 2), we only consider the anchor functions from IR and profile don't
337   // appear on either side to reduce the matching scope. Note that we need to
338   // use IR anchor as base(A side) to align with the order of
339   // IRToProfileLocationMap.
340   LocToLocMap MatchedAnchors =
341       longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList,
342                             RunCGMatching /* Match unused functions */);
343 
344   // CFG level matching:
345   // Apply the callsite matchings to infer matching for the basic
346   // block(non-callsite) locations and write the result to
347   // IRToProfileLocationMap.
348   if (RunCFGMatching)
349     matchNonCallsiteLocs(MatchedAnchors, IRAnchors, IRToProfileLocationMap);
350 }
351 
352 void SampleProfileMatcher::runOnFunction(Function &F) {
353   // We need to use flattened function samples for matching.
354   // Unlike IR, which includes all callsites from the source code, the callsites
355   // in profile only show up when they are hit by samples, i,e. the profile
356   // callsites in one context may differ from those in another context. To get
357   // the maximum number of callsites, we merge the function profiles from all
358   // contexts, aka, the flattened profile to find profile anchors.
359   const auto *FSForMatching = getFlattenedSamplesFor(F);
360   if (SalvageUnusedProfile && !FSForMatching) {
361     // Apply the matching in place to find the new function's matched profile.
362     auto R = FuncToProfileNameMap.find(&F);
363     if (R != FuncToProfileNameMap.end()) {
364       FSForMatching = getFlattenedSamplesFor(R->second);
365       // Try to find the salvaged top-level profiles that are explicitly loaded
366       // for the matching, see "functionMatchesProfileHelper" for the details.
367       if (!FSForMatching && LoadFuncProfileforCGMatching)
368         FSForMatching = Reader.getSamplesFor(R->second.stringRef());
369     }
370   }
371   if (!FSForMatching)
372     return;
373 
374   // Anchors for IR. It's a map from IR location to callee name, callee name is
375   // empty for non-call instruction and use a dummy name(UnknownIndirectCallee)
376   // for unknown indrect callee name.
377   AnchorMap IRAnchors;
378   findIRAnchors(F, IRAnchors);
379   // Anchors for profile. It's a map from callsite location to a set of callee
380   // name.
381   AnchorMap ProfileAnchors;
382   findProfileAnchors(*FSForMatching, ProfileAnchors);
383 
384   // Compute the callsite match states for profile staleness report.
385   if (ReportProfileStaleness || PersistProfileStaleness)
386     recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, nullptr);
387 
388   if (!SalvageStaleProfile)
389     return;
390   // For probe-based profiles, run matching only when profile checksum is
391   // mismatched.
392   bool ChecksumMismatch = FunctionSamples::ProfileIsProbeBased &&
393                           !ProbeManager->profileIsValid(F, *FSForMatching);
394   bool RunCFGMatching =
395       !FunctionSamples::ProfileIsProbeBased || ChecksumMismatch;
396   bool RunCGMatching = SalvageUnusedProfile;
397   // For imported functions, the checksum metadata(pseudo_probe_desc) are
398   // dropped, so we leverage function attribute(profile-checksum-mismatch) to
399   // transfer the info: add the attribute during pre-link phase and check it
400   // during post-link phase(see "profileIsValid").
401   if (ChecksumMismatch && LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink)
402     F.addFnAttr("profile-checksum-mismatch");
403 
404   // The matching result will be saved to IRToProfileLocationMap, create a
405   // new map for each function.
406   auto &IRToProfileLocationMap = getIRToProfileLocationMap(F);
407   runStaleProfileMatching(F, IRAnchors, ProfileAnchors, IRToProfileLocationMap,
408                           RunCFGMatching, RunCGMatching);
409   // Find and update callsite match states after matching.
410   if (RunCFGMatching && (ReportProfileStaleness || PersistProfileStaleness))
411     recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors,
412                               &IRToProfileLocationMap);
413 }
414 
415 void SampleProfileMatcher::recordCallsiteMatchStates(
416     const Function &F, const AnchorMap &IRAnchors,
417     const AnchorMap &ProfileAnchors,
418     const LocToLocMap *IRToProfileLocationMap) {
419   bool IsPostMatch = IRToProfileLocationMap != nullptr;
420   auto &CallsiteMatchStates =
421       FuncCallsiteMatchStates[FunctionSamples::getCanonicalFnName(F.getName())];
422 
423   auto MapIRLocToProfileLoc = [&](const LineLocation &IRLoc) {
424     // IRToProfileLocationMap is null in pre-match phrase.
425     if (!IRToProfileLocationMap)
426       return IRLoc;
427     const auto &ProfileLoc = IRToProfileLocationMap->find(IRLoc);
428     if (ProfileLoc != IRToProfileLocationMap->end())
429       return ProfileLoc->second;
430     else
431       return IRLoc;
432   };
433 
434   for (const auto &I : IRAnchors) {
435     // After fuzzy profile matching, use the matching result to remap the
436     // current IR callsite.
437     const auto &ProfileLoc = MapIRLocToProfileLoc(I.first);
438     const auto &IRCalleeId = I.second;
439     const auto &It = ProfileAnchors.find(ProfileLoc);
440     if (It == ProfileAnchors.end())
441       continue;
442     const auto &ProfCalleeId = It->second;
443     if (IRCalleeId == ProfCalleeId) {
444       auto It = CallsiteMatchStates.find(ProfileLoc);
445       if (It == CallsiteMatchStates.end())
446         CallsiteMatchStates.emplace(ProfileLoc, MatchState::InitialMatch);
447       else if (IsPostMatch) {
448         if (It->second == MatchState::InitialMatch)
449           It->second = MatchState::UnchangedMatch;
450         else if (It->second == MatchState::InitialMismatch)
451           It->second = MatchState::RecoveredMismatch;
452       }
453     }
454   }
455 
456   // Check if there are any callsites in the profile that does not match to any
457   // IR callsites.
458   for (const auto &I : ProfileAnchors) {
459     const auto &Loc = I.first;
460     assert(!I.second.stringRef().empty() && "Callees should not be empty");
461     auto It = CallsiteMatchStates.find(Loc);
462     if (It == CallsiteMatchStates.end())
463       CallsiteMatchStates.emplace(Loc, MatchState::InitialMismatch);
464     else if (IsPostMatch) {
465       // Update the state if it's not matched(UnchangedMatch or
466       // RecoveredMismatch).
467       if (It->second == MatchState::InitialMismatch)
468         It->second = MatchState::UnchangedMismatch;
469       else if (It->second == MatchState::InitialMatch)
470         It->second = MatchState::RemovedMatch;
471     }
472   }
473 }
474 
475 void SampleProfileMatcher::countMismatchedFuncSamples(const FunctionSamples &FS,
476                                                       bool IsTopLevel) {
477   const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID());
478   // Skip the function that is external or renamed.
479   if (!FuncDesc)
480     return;
481 
482   if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) {
483     if (IsTopLevel)
484       NumStaleProfileFunc++;
485     // Given currently all probe ids are after block probe ids, once the
486     // checksum is mismatched, it's likely all the callites are mismatched and
487     // dropped. We conservatively count all the samples as mismatched and stop
488     // counting the inlinees' profiles.
489     MismatchedFunctionSamples += FS.getTotalSamples();
490     return;
491   }
492 
493   // Even the current-level function checksum is matched, it's possible that the
494   // nested inlinees' checksums are mismatched that affect the inlinee's sample
495   // loading, we need to go deeper to check the inlinees' function samples.
496   // Similarly, count all the samples as mismatched if the inlinee's checksum is
497   // mismatched using this recursive function.
498   for (const auto &I : FS.getCallsiteSamples())
499     for (const auto &CS : I.second)
500       countMismatchedFuncSamples(CS.second, false);
501 }
502 
503 void SampleProfileMatcher::countMismatchedCallsiteSamples(
504     const FunctionSamples &FS) {
505   auto It = FuncCallsiteMatchStates.find(FS.getFuncName());
506   // Skip it if no mismatched callsite or this is an external function.
507   if (It == FuncCallsiteMatchStates.end() || It->second.empty())
508     return;
509   const auto &CallsiteMatchStates = It->second;
510 
511   auto findMatchState = [&](const LineLocation &Loc) {
512     auto It = CallsiteMatchStates.find(Loc);
513     if (It == CallsiteMatchStates.end())
514       return MatchState::Unknown;
515     return It->second;
516   };
517 
518   auto AttributeMismatchedSamples = [&](const enum MatchState &State,
519                                         uint64_t Samples) {
520     if (isMismatchState(State))
521       MismatchedCallsiteSamples += Samples;
522     else if (State == MatchState::RecoveredMismatch)
523       RecoveredCallsiteSamples += Samples;
524   };
525 
526   // The non-inlined callsites are saved in the body samples of function
527   // profile, go through it to count the non-inlined callsite samples.
528   for (const auto &I : FS.getBodySamples())
529     AttributeMismatchedSamples(findMatchState(I.first), I.second.getSamples());
530 
531   // Count the inlined callsite samples.
532   for (const auto &I : FS.getCallsiteSamples()) {
533     auto State = findMatchState(I.first);
534     uint64_t CallsiteSamples = 0;
535     for (const auto &CS : I.second)
536       CallsiteSamples += CS.second.getTotalSamples();
537     AttributeMismatchedSamples(State, CallsiteSamples);
538 
539     if (isMismatchState(State))
540       continue;
541 
542     // When the current level of inlined call site matches the profiled call
543     // site, we need to go deeper along the inline tree to count mismatches from
544     // lower level inlinees.
545     for (const auto &CS : I.second)
546       countMismatchedCallsiteSamples(CS.second);
547   }
548 }
549 
550 void SampleProfileMatcher::countMismatchCallsites(const FunctionSamples &FS) {
551   auto It = FuncCallsiteMatchStates.find(FS.getFuncName());
552   // Skip it if no mismatched callsite or this is an external function.
553   if (It == FuncCallsiteMatchStates.end() || It->second.empty())
554     return;
555   const auto &MatchStates = It->second;
556   [[maybe_unused]] bool OnInitialState =
557       isInitialState(MatchStates.begin()->second);
558   for (const auto &I : MatchStates) {
559     TotalProfiledCallsites++;
560     assert(
561         (OnInitialState ? isInitialState(I.second) : isFinalState(I.second)) &&
562         "Profile matching state is inconsistent");
563 
564     if (isMismatchState(I.second))
565       NumMismatchedCallsites++;
566     else if (I.second == MatchState::RecoveredMismatch)
567       NumRecoveredCallsites++;
568   }
569 }
570 
571 void SampleProfileMatcher::countCallGraphRecoveredSamples(
572     const FunctionSamples &FS,
573     std::unordered_set<FunctionId> &CallGraphRecoveredProfiles) {
574   if (CallGraphRecoveredProfiles.count(FS.getFunction())) {
575     NumCallGraphRecoveredFuncSamples += FS.getTotalSamples();
576     return;
577   }
578 
579   for (const auto &CM : FS.getCallsiteSamples()) {
580     for (const auto &CS : CM.second) {
581       countCallGraphRecoveredSamples(CS.second, CallGraphRecoveredProfiles);
582     }
583   }
584 }
585 
586 void SampleProfileMatcher::computeAndReportProfileStaleness() {
587   if (!ReportProfileStaleness && !PersistProfileStaleness)
588     return;
589 
590   std::unordered_set<FunctionId> CallGraphRecoveredProfiles;
591   if (SalvageUnusedProfile) {
592     for (const auto &I : FuncToProfileNameMap) {
593       CallGraphRecoveredProfiles.insert(I.second);
594       if (GlobalValue::isAvailableExternallyLinkage(I.first->getLinkage()))
595         continue;
596       NumCallGraphRecoveredProfiledFunc++;
597     }
598   }
599 
600   // Count profile mismatches for profile staleness report.
601   for (const auto &F : M) {
602     if (skipProfileForFunction(F))
603       continue;
604     // As the stats will be merged by linker, skip reporting the metrics for
605     // imported functions to avoid repeated counting.
606     if (GlobalValue::isAvailableExternallyLinkage(F.getLinkage()))
607       continue;
608     const auto *FS = Reader.getSamplesFor(F);
609     if (!FS)
610       continue;
611     TotalProfiledFunc++;
612     TotalFunctionSamples += FS->getTotalSamples();
613 
614     if (SalvageUnusedProfile && !CallGraphRecoveredProfiles.empty())
615       countCallGraphRecoveredSamples(*FS, CallGraphRecoveredProfiles);
616 
617     // Checksum mismatch is only used in pseudo-probe mode.
618     if (FunctionSamples::ProfileIsProbeBased)
619       countMismatchedFuncSamples(*FS, true);
620 
621     // Count mismatches and samples for calliste.
622     countMismatchCallsites(*FS);
623     countMismatchedCallsiteSamples(*FS);
624   }
625 
626   if (ReportProfileStaleness) {
627     if (FunctionSamples::ProfileIsProbeBased) {
628       errs() << "(" << NumStaleProfileFunc << "/" << TotalProfiledFunc
629              << ") of functions' profile are invalid and ("
630              << MismatchedFunctionSamples << "/" << TotalFunctionSamples
631              << ") of samples are discarded due to function hash mismatch.\n";
632     }
633     if (SalvageUnusedProfile) {
634       errs() << "(" << NumCallGraphRecoveredProfiledFunc << "/"
635              << TotalProfiledFunc << ") of functions' profile are matched and ("
636              << NumCallGraphRecoveredFuncSamples << "/" << TotalFunctionSamples
637              << ") of samples are reused by call graph matching.\n";
638     }
639 
640     errs() << "(" << (NumMismatchedCallsites + NumRecoveredCallsites) << "/"
641            << TotalProfiledCallsites
642            << ") of callsites' profile are invalid and ("
643            << (MismatchedCallsiteSamples + RecoveredCallsiteSamples) << "/"
644            << TotalFunctionSamples
645            << ") of samples are discarded due to callsite location mismatch.\n";
646     errs() << "(" << NumRecoveredCallsites << "/"
647            << (NumRecoveredCallsites + NumMismatchedCallsites)
648            << ") of callsites and (" << RecoveredCallsiteSamples << "/"
649            << (RecoveredCallsiteSamples + MismatchedCallsiteSamples)
650            << ") of samples are recovered by stale profile matching.\n";
651   }
652 
653   if (PersistProfileStaleness) {
654     LLVMContext &Ctx = M.getContext();
655     MDBuilder MDB(Ctx);
656 
657     SmallVector<std::pair<StringRef, uint64_t>> ProfStatsVec;
658     if (FunctionSamples::ProfileIsProbeBased) {
659       ProfStatsVec.emplace_back("NumStaleProfileFunc", NumStaleProfileFunc);
660       ProfStatsVec.emplace_back("TotalProfiledFunc", TotalProfiledFunc);
661       ProfStatsVec.emplace_back("MismatchedFunctionSamples",
662                                 MismatchedFunctionSamples);
663       ProfStatsVec.emplace_back("TotalFunctionSamples", TotalFunctionSamples);
664     }
665 
666     if (SalvageUnusedProfile) {
667       ProfStatsVec.emplace_back("NumCallGraphRecoveredProfiledFunc",
668                                 NumCallGraphRecoveredProfiledFunc);
669       ProfStatsVec.emplace_back("NumCallGraphRecoveredFuncSamples",
670                                 NumCallGraphRecoveredFuncSamples);
671     }
672 
673     ProfStatsVec.emplace_back("NumMismatchedCallsites", NumMismatchedCallsites);
674     ProfStatsVec.emplace_back("NumRecoveredCallsites", NumRecoveredCallsites);
675     ProfStatsVec.emplace_back("TotalProfiledCallsites", TotalProfiledCallsites);
676     ProfStatsVec.emplace_back("MismatchedCallsiteSamples",
677                               MismatchedCallsiteSamples);
678     ProfStatsVec.emplace_back("RecoveredCallsiteSamples",
679                               RecoveredCallsiteSamples);
680 
681     auto *MD = MDB.createLLVMStats(ProfStatsVec);
682     auto *NMD = M.getOrInsertNamedMetadata("llvm.stats");
683     NMD->addOperand(MD);
684   }
685 }
686 
687 void SampleProfileMatcher::findFunctionsWithoutProfile() {
688   // TODO: Support MD5 profile.
689   if (FunctionSamples::UseMD5)
690     return;
691   StringSet<> NamesInProfile;
692   if (auto NameTable = Reader.getNameTable()) {
693     for (auto Name : *NameTable)
694       NamesInProfile.insert(Name.stringRef());
695   }
696 
697   for (auto &F : M) {
698     // Skip declarations, as even if the function can be matched, we have
699     // nothing to do with it.
700     if (F.isDeclaration())
701       continue;
702 
703     StringRef CanonFName = FunctionSamples::getCanonicalFnName(F.getName());
704     const auto *FS = getFlattenedSamplesFor(F);
705     if (FS)
706       continue;
707 
708     // For extended binary, functions fully inlined may not be loaded in the
709     // top-level profile, so check the NameTable which has the all symbol names
710     // in profile.
711     if (NamesInProfile.count(CanonFName))
712       continue;
713 
714     // For extended binary, non-profiled function symbols are in the profile
715     // symbol list table.
716     if (PSL && PSL->contains(CanonFName))
717       continue;
718 
719     LLVM_DEBUG(dbgs() << "Function " << CanonFName
720                       << " is not in profile or profile symbol list.\n");
721     FunctionsWithoutProfile[FunctionId(CanonFName)] = &F;
722   }
723 }
724 
725 bool SampleProfileMatcher::functionMatchesProfileHelper(
726     const Function &IRFunc, const FunctionId &ProfFunc) {
727   // The value is in the range [0, 1]. The bigger the value is, the more similar
728   // two sequences are.
729   float Similarity = 0.0;
730 
731   // Match the functions if they have the same base name(after demangling) and
732   // skip the similarity check.
733   ItaniumPartialDemangler Demangler;
734   // Helper lambda to demangle and get the base name. If the demangling failed,
735   // return an empty string.
736   auto GetBaseName = [&](StringRef FName) {
737     auto FunctionName = FName.str();
738     if (Demangler.partialDemangle(FunctionName.c_str()))
739       return std::string();
740     size_t BaseNameSize = 0;
741     // The demangler API follows the __cxa_demangle one, and thus needs a
742     // pointer that originates from malloc (or nullptr) and the caller is
743     // responsible for free()-ing the buffer.
744     char *BaseNamePtr = Demangler.getFunctionBaseName(nullptr, &BaseNameSize);
745     std::string Result = (BaseNamePtr && BaseNameSize)
746                              ? std::string(BaseNamePtr, BaseNameSize)
747                              : std::string();
748     free(BaseNamePtr);
749     return Result;
750   };
751   auto IRBaseName = GetBaseName(IRFunc.getName());
752   auto ProfBaseName = GetBaseName(ProfFunc.stringRef());
753   if (!IRBaseName.empty() && IRBaseName == ProfBaseName) {
754     LLVM_DEBUG(dbgs() << "The functions " << IRFunc.getName() << "(IR) and "
755                       << ProfFunc << "(Profile) share the same base name: "
756                       << IRBaseName << ".\n");
757     return true;
758   }
759 
760   const auto *FSForMatching = getFlattenedSamplesFor(ProfFunc);
761   // With extbinary profile format, initial profile loading only reads profile
762   // based on current function names in the module.
763   // However, if a function is renamed, sample loader skips to load its original
764   // profile(which has a different name), we will miss this case. To address
765   // this, we load the top-level profile candidate explicitly for the matching.
766   if (!FSForMatching && LoadFuncProfileforCGMatching) {
767     DenseSet<StringRef> TopLevelFunc({ProfFunc.stringRef()});
768     if (std::error_code EC = Reader.read(TopLevelFunc))
769       return false;
770     FSForMatching = Reader.getSamplesFor(ProfFunc.stringRef());
771     LLVM_DEBUG({
772       if (FSForMatching)
773         dbgs() << "Read top-level function " << ProfFunc
774                << " for call-graph matching\n";
775     });
776   }
777   if (!FSForMatching)
778     return false;
779   // The check for similarity or checksum may not be reliable if the function is
780   // tiny, we use the number of basic block as a proxy for the function
781   // complexity and skip the matching if it's too small.
782   if (IRFunc.size() < MinFuncCountForCGMatching ||
783       FSForMatching->getBodySamples().size() < MinFuncCountForCGMatching)
784     return false;
785 
786   // For probe-based function, we first trust the checksum info. If the checksum
787   // doesn't match, we continue checking for similarity.
788   if (FunctionSamples::ProfileIsProbeBased) {
789     const auto *FuncDesc = ProbeManager->getDesc(IRFunc);
790     if (FuncDesc &&
791         !ProbeManager->profileIsHashMismatched(*FuncDesc, *FSForMatching)) {
792       LLVM_DEBUG(dbgs() << "The checksums for " << IRFunc.getName()
793                         << "(IR) and " << ProfFunc << "(Profile) match.\n");
794 
795       return true;
796     }
797   }
798 
799   AnchorMap IRAnchors;
800   findIRAnchors(IRFunc, IRAnchors);
801   AnchorMap ProfileAnchors;
802   findProfileAnchors(*FSForMatching, ProfileAnchors);
803 
804   AnchorList FilteredIRAnchorsList;
805   AnchorList FilteredProfileAnchorList;
806   getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
807                         FilteredProfileAnchorList);
808 
809   // Similarly skip the matching if the num of anchors is not enough.
810   if (FilteredIRAnchorsList.size() < MinCallCountForCGMatching ||
811       FilteredProfileAnchorList.size() < MinCallCountForCGMatching)
812     return false;
813 
814   // Use the diff algorithm to find the LCS between IR and profile.
815 
816   // Don't recursively match the callee function to avoid infinite matching,
817   // callee functions will be handled later since it's processed in top-down
818   // order .
819   LocToLocMap MatchedAnchors =
820       longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList,
821                             false /* Match unused functions */);
822 
823   Similarity = static_cast<float>(MatchedAnchors.size()) /
824                FilteredProfileAnchorList.size();
825 
826   LLVM_DEBUG(dbgs() << "The similarity between " << IRFunc.getName()
827                     << "(IR) and " << ProfFunc << "(profile) is "
828                     << format("%.2f", Similarity) << "\n");
829   assert((Similarity >= 0 && Similarity <= 1.0) &&
830          "Similarity value should be in [0, 1]");
831   return Similarity * 100 > FuncProfileSimilarityThreshold;
832 }
833 
834 // If FindMatchedProfileOnly is set to true, only use the processed function
835 // results. This is used for skipping the repeated recursive matching.
836 bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc,
837                                                   const FunctionId &ProfFunc,
838                                                   bool FindMatchedProfileOnly) {
839   auto R = FuncProfileMatchCache.find({&IRFunc, ProfFunc});
840   if (R != FuncProfileMatchCache.end())
841     return R->second;
842 
843   if (FindMatchedProfileOnly)
844     return false;
845 
846   bool Matched = functionMatchesProfileHelper(IRFunc, ProfFunc);
847   FuncProfileMatchCache[{&IRFunc, ProfFunc}] = Matched;
848   if (Matched) {
849     FuncToProfileNameMap[&IRFunc] = ProfFunc;
850     LLVM_DEBUG(dbgs() << "Function:" << IRFunc.getName()
851                       << " matches profile:" << ProfFunc << "\n");
852   }
853 
854   return Matched;
855 }
856 
857 void SampleProfileMatcher::UpdateWithSalvagedProfiles() {
858   DenseSet<StringRef> ProfileSalvagedFuncs;
859   // Update FuncNameToProfNameMap and SymbolMap.
860   for (auto &I : FuncToProfileNameMap) {
861     assert(I.first && "New function is null");
862     FunctionId FuncName(I.first->getName());
863     ProfileSalvagedFuncs.insert(I.second.stringRef());
864     FuncNameToProfNameMap->emplace(FuncName, I.second);
865 
866     // We need to remove the old entry to avoid duplicating the function
867     // processing.
868     SymbolMap->erase(FuncName);
869     SymbolMap->emplace(I.second, I.first);
870   }
871 
872   // With extbinary profile format, initial profile loading only reads profile
873   // based on current function names in the module, so we need to load top-level
874   // profiles for functions with different profile name explicitly after
875   // function-profile name map is established with stale profile matching.
876   Reader.read(ProfileSalvagedFuncs);
877   Reader.setFuncNameToProfNameMap(*FuncNameToProfNameMap);
878 }
879 
880 void SampleProfileMatcher::runOnModule() {
881   ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
882                                    FunctionSamples::ProfileIsCS);
883   if (SalvageUnusedProfile)
884     findFunctionsWithoutProfile();
885 
886   // Process the matching in top-down order so that the caller matching result
887   // can be used to the callee matching.
888   std::vector<Function *> TopDownFunctionList;
889   TopDownFunctionList.reserve(M.size());
890   buildTopDownFuncOrder(CG, TopDownFunctionList);
891   for (auto *F : TopDownFunctionList) {
892     if (skipProfileForFunction(*F))
893       continue;
894     runOnFunction(*F);
895   }
896 
897   if (SalvageUnusedProfile)
898     UpdateWithSalvagedProfiles();
899 
900   if (SalvageStaleProfile)
901     distributeIRToProfileLocationMap();
902 
903   computeAndReportProfileStaleness();
904 }
905 
906 void SampleProfileMatcher::distributeIRToProfileLocationMap(
907     FunctionSamples &FS) {
908   const auto ProfileMappings = FuncMappings.find(FS.getFuncName());
909   if (ProfileMappings != FuncMappings.end()) {
910     FS.setIRToProfileLocationMap(&(ProfileMappings->second));
911   }
912 
913   for (auto &Callees :
914        const_cast<CallsiteSampleMap &>(FS.getCallsiteSamples())) {
915     for (auto &FS : Callees.second) {
916       distributeIRToProfileLocationMap(FS.second);
917     }
918   }
919 }
920 
921 // Use a central place to distribute the matching results. Outlined and inlined
922 // profile with the function name will be set to the same pointer.
923 void SampleProfileMatcher::distributeIRToProfileLocationMap() {
924   for (auto &I : Reader.getProfiles()) {
925     distributeIRToProfileLocationMap(I.second);
926   }
927 }
928