xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp (revision 3ceba58a7509418b47b8fca2d2b6bbf088714e26)
1 //===- SampleProfileMatcher.cpp - Sampling-based Stale Profile Matcher ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SampleProfileMatcher used for stale
10 // profile matching.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/IPO/SampleProfileMatcher.h"
15 #include "llvm/IR/IntrinsicInst.h"
16 #include "llvm/IR/MDBuilder.h"
17 #include "llvm/Support/CommandLine.h"
18 
19 using namespace llvm;
20 using namespace sampleprof;
21 
22 #define DEBUG_TYPE "sample-profile-matcher"
23 
24 static cl::opt<unsigned> FuncProfileSimilarityThreshold(
25     "func-profile-similarity-threshold", cl::Hidden, cl::init(80),
26     cl::desc("Consider a profile matches a function if the similarity of their "
27              "callee sequences is above the specified percentile."));
28 
29 static cl::opt<unsigned> MinFuncCountForCGMatching(
30     "min-func-count-for-cg-matching", cl::Hidden, cl::init(5),
31     cl::desc("The minimum number of basic blocks required for a function to "
32              "run stale profile call graph matching."));
33 
34 static cl::opt<unsigned> MinCallCountForCGMatching(
35     "min-call-count-for-cg-matching", cl::Hidden, cl::init(3),
36     cl::desc("The minimum number of call anchors required for a function to "
37              "run stale profile call graph matching."));
38 
39 extern cl::opt<bool> SalvageStaleProfile;
40 extern cl::opt<bool> SalvageUnusedProfile;
41 extern cl::opt<bool> PersistProfileStaleness;
42 extern cl::opt<bool> ReportProfileStaleness;
43 
44 static cl::opt<unsigned> SalvageStaleProfileMaxCallsites(
45     "salvage-stale-profile-max-callsites", cl::Hidden, cl::init(UINT_MAX),
46     cl::desc("The maximum number of callsites in a function, above which stale "
47              "profile matching will be skipped."));
48 
49 void SampleProfileMatcher::findIRAnchors(const Function &F,
50                                          AnchorMap &IRAnchors) const {
51   // For inlined code, recover the original callsite and callee by finding the
52   // top-level inline frame. e.g. For frame stack "main:1 @ foo:2 @ bar:3", the
53   // top-level frame is "main:1", the callsite is "1" and the callee is "foo".
54   auto FindTopLevelInlinedCallsite = [](const DILocation *DIL) {
55     assert((DIL && DIL->getInlinedAt()) && "No inlined callsite");
56     const DILocation *PrevDIL = nullptr;
57     do {
58       PrevDIL = DIL;
59       DIL = DIL->getInlinedAt();
60     } while (DIL->getInlinedAt());
61 
62     LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(
63         DIL, FunctionSamples::ProfileIsFS);
64     StringRef CalleeName = PrevDIL->getSubprogramLinkageName();
65     return std::make_pair(Callsite, FunctionId(CalleeName));
66   };
67 
68   auto GetCanonicalCalleeName = [](const CallBase *CB) {
69     StringRef CalleeName = UnknownIndirectCallee;
70     if (Function *Callee = CB->getCalledFunction())
71       CalleeName = FunctionSamples::getCanonicalFnName(Callee->getName());
72     return CalleeName;
73   };
74 
75   // Extract profile matching anchors in the IR.
76   for (auto &BB : F) {
77     for (auto &I : BB) {
78       DILocation *DIL = I.getDebugLoc();
79       if (!DIL)
80         continue;
81 
82       if (FunctionSamples::ProfileIsProbeBased) {
83         if (auto Probe = extractProbe(I)) {
84           // Flatten inlined IR for the matching.
85           if (DIL->getInlinedAt()) {
86             IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
87           } else {
88             // Use empty StringRef for basic block probe.
89             StringRef CalleeName;
90             if (const auto *CB = dyn_cast<CallBase>(&I)) {
91               // Skip the probe inst whose callee name is "llvm.pseudoprobe".
92               if (!isa<IntrinsicInst>(&I))
93                 CalleeName = GetCanonicalCalleeName(CB);
94             }
95             LineLocation Loc = LineLocation(Probe->Id, 0);
96             IRAnchors.emplace(Loc, FunctionId(CalleeName));
97           }
98         }
99       } else {
100         // TODO: For line-number based profile(AutoFDO), currently only support
101         // find callsite anchors. In future, we need to parse all the non-call
102         // instructions to extract the line locations for profile matching.
103         if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I))
104           continue;
105 
106         if (DIL->getInlinedAt()) {
107           IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
108         } else {
109           LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(
110               DIL, FunctionSamples::ProfileIsFS);
111           StringRef CalleeName = GetCanonicalCalleeName(dyn_cast<CallBase>(&I));
112           IRAnchors.emplace(Callsite, FunctionId(CalleeName));
113         }
114       }
115     }
116   }
117 }
118 
119 void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS,
120                                               AnchorMap &ProfileAnchors) const {
121   auto isInvalidLineOffset = [](uint32_t LineOffset) {
122     return LineOffset & 0x8000;
123   };
124 
125   auto InsertAnchor = [](const LineLocation &Loc, const FunctionId &CalleeName,
126                          AnchorMap &ProfileAnchors) {
127     auto Ret = ProfileAnchors.try_emplace(Loc, CalleeName);
128     if (!Ret.second) {
129       // For multiple callees, which indicates it's an indirect call, we use a
130       // dummy name(UnknownIndirectCallee) as the indrect callee name.
131       Ret.first->second = FunctionId(UnknownIndirectCallee);
132     }
133   };
134 
135   for (const auto &I : FS.getBodySamples()) {
136     const LineLocation &Loc = I.first;
137     if (isInvalidLineOffset(Loc.LineOffset))
138       continue;
139     for (const auto &C : I.second.getCallTargets())
140       InsertAnchor(Loc, C.first, ProfileAnchors);
141   }
142 
143   for (const auto &I : FS.getCallsiteSamples()) {
144     const LineLocation &Loc = I.first;
145     if (isInvalidLineOffset(Loc.LineOffset))
146       continue;
147     for (const auto &C : I.second)
148       InsertAnchor(Loc, C.first, ProfileAnchors);
149   }
150 }
151 
152 bool SampleProfileMatcher::functionHasProfile(const FunctionId &IRFuncName,
153                                               Function *&FuncWithoutProfile) {
154   FuncWithoutProfile = nullptr;
155   auto R = FunctionsWithoutProfile.find(IRFuncName);
156   if (R != FunctionsWithoutProfile.end())
157     FuncWithoutProfile = R->second;
158   return !FuncWithoutProfile;
159 }
160 
161 bool SampleProfileMatcher::isProfileUnused(const FunctionId &ProfileFuncName) {
162   return SymbolMap->find(ProfileFuncName) == SymbolMap->end();
163 }
164 
165 bool SampleProfileMatcher::functionMatchesProfile(
166     const FunctionId &IRFuncName, const FunctionId &ProfileFuncName,
167     bool FindMatchedProfileOnly) {
168   if (IRFuncName == ProfileFuncName)
169     return true;
170   if (!SalvageUnusedProfile)
171     return false;
172 
173   // If IR function doesn't have profile and the profile is unused, try
174   // matching them.
175   Function *IRFunc = nullptr;
176   if (functionHasProfile(IRFuncName, IRFunc) ||
177       !isProfileUnused(ProfileFuncName))
178     return false;
179 
180   assert(FunctionId(IRFunc->getName()) != ProfileFuncName &&
181          "IR function should be different from profile function to match");
182   return functionMatchesProfile(*IRFunc, ProfileFuncName,
183                                 FindMatchedProfileOnly);
184 }
185 
186 LocToLocMap
187 SampleProfileMatcher::longestCommonSequence(const AnchorList &AnchorList1,
188                                             const AnchorList &AnchorList2,
189                                             bool MatchUnusedFunction) {
190   int32_t Size1 = AnchorList1.size(), Size2 = AnchorList2.size(),
191           MaxDepth = Size1 + Size2;
192   auto Index = [&](int32_t I) { return I + MaxDepth; };
193 
194   LocToLocMap EqualLocations;
195   if (MaxDepth == 0)
196     return EqualLocations;
197 
198   // Backtrack the SES result.
199   auto Backtrack = [&](const std::vector<std::vector<int32_t>> &Trace,
200                        const AnchorList &AnchorList1,
201                        const AnchorList &AnchorList2,
202                        LocToLocMap &EqualLocations) {
203     int32_t X = Size1, Y = Size2;
204     for (int32_t Depth = Trace.size() - 1; X > 0 || Y > 0; Depth--) {
205       const auto &P = Trace[Depth];
206       int32_t K = X - Y;
207       int32_t PrevK = K;
208       if (K == -Depth || (K != Depth && P[Index(K - 1)] < P[Index(K + 1)]))
209         PrevK = K + 1;
210       else
211         PrevK = K - 1;
212 
213       int32_t PrevX = P[Index(PrevK)];
214       int32_t PrevY = PrevX - PrevK;
215       while (X > PrevX && Y > PrevY) {
216         X--;
217         Y--;
218         EqualLocations.insert({AnchorList1[X].first, AnchorList2[Y].first});
219       }
220 
221       if (Depth == 0)
222         break;
223 
224       if (Y == PrevY)
225         X--;
226       else if (X == PrevX)
227         Y--;
228       X = PrevX;
229       Y = PrevY;
230     }
231   };
232 
233   // The greedy LCS/SES algorithm.
234 
235   // An array contains the endpoints of the furthest reaching D-paths.
236   std::vector<int32_t> V(2 * MaxDepth + 1, -1);
237   V[Index(1)] = 0;
238   // Trace is used to backtrack the SES result.
239   std::vector<std::vector<int32_t>> Trace;
240   for (int32_t Depth = 0; Depth <= MaxDepth; Depth++) {
241     Trace.push_back(V);
242     for (int32_t K = -Depth; K <= Depth; K += 2) {
243       int32_t X = 0, Y = 0;
244       if (K == -Depth || (K != Depth && V[Index(K - 1)] < V[Index(K + 1)]))
245         X = V[Index(K + 1)];
246       else
247         X = V[Index(K - 1)] + 1;
248       Y = X - K;
249       while (X < Size1 && Y < Size2 &&
250              functionMatchesProfile(
251                  AnchorList1[X].second, AnchorList2[Y].second,
252                  !MatchUnusedFunction /* Find matched function only */))
253         X++, Y++;
254 
255       V[Index(K)] = X;
256 
257       if (X >= Size1 && Y >= Size2) {
258         // Length of an SES is D.
259         Backtrack(Trace, AnchorList1, AnchorList2, EqualLocations);
260         return EqualLocations;
261       }
262     }
263   }
264   // Length of an SES is greater than MaxDepth.
265   return EqualLocations;
266 }
267 
268 void SampleProfileMatcher::matchNonCallsiteLocs(
269     const LocToLocMap &MatchedAnchors, const AnchorMap &IRAnchors,
270     LocToLocMap &IRToProfileLocationMap) {
271   auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) {
272     // Skip the unchanged location mapping to save memory.
273     if (From != To)
274       IRToProfileLocationMap.insert({From, To});
275   };
276 
277   // Use function's beginning location as the initial anchor.
278   int32_t LocationDelta = 0;
279   SmallVector<LineLocation> LastMatchedNonAnchors;
280   for (const auto &IR : IRAnchors) {
281     const auto &Loc = IR.first;
282     bool IsMatchedAnchor = false;
283     // Match the anchor location in lexical order.
284     auto R = MatchedAnchors.find(Loc);
285     if (R != MatchedAnchors.end()) {
286       const auto &Candidate = R->second;
287       InsertMatching(Loc, Candidate);
288       LLVM_DEBUG(dbgs() << "Callsite with callee:" << IR.second.stringRef()
289                         << " is matched from " << Loc << " to " << Candidate
290                         << "\n");
291       LocationDelta = Candidate.LineOffset - Loc.LineOffset;
292 
293       // Match backwards for non-anchor locations.
294       // The locations in LastMatchedNonAnchors have been matched forwards
295       // based on the previous anchor, spilt it evenly and overwrite the
296       // second half based on the current anchor.
297       for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2;
298            I < LastMatchedNonAnchors.size(); I++) {
299         const auto &L = LastMatchedNonAnchors[I];
300         uint32_t CandidateLineOffset = L.LineOffset + LocationDelta;
301         LineLocation Candidate(CandidateLineOffset, L.Discriminator);
302         InsertMatching(L, Candidate);
303         LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L
304                           << " to " << Candidate << "\n");
305       }
306 
307       IsMatchedAnchor = true;
308       LastMatchedNonAnchors.clear();
309     }
310 
311     // Match forwards for non-anchor locations.
312     if (!IsMatchedAnchor) {
313       uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta;
314       LineLocation Candidate(CandidateLineOffset, Loc.Discriminator);
315       InsertMatching(Loc, Candidate);
316       LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to "
317                         << Candidate << "\n");
318       LastMatchedNonAnchors.emplace_back(Loc);
319     }
320   }
321 }
322 
323 // Filter the non-call locations from IRAnchors and ProfileAnchors and write
324 // them into a list for random access later.
325 void SampleProfileMatcher::getFilteredAnchorList(
326     const AnchorMap &IRAnchors, const AnchorMap &ProfileAnchors,
327     AnchorList &FilteredIRAnchorsList, AnchorList &FilteredProfileAnchorList) {
328   for (const auto &I : IRAnchors) {
329     if (I.second.stringRef().empty())
330       continue;
331     FilteredIRAnchorsList.emplace_back(I);
332   }
333 
334   for (const auto &I : ProfileAnchors)
335     FilteredProfileAnchorList.emplace_back(I);
336 }
337 
338 // Call target name anchor based profile fuzzy matching.
339 // Input:
340 // For IR locations, the anchor is the callee name of direct callsite; For
341 // profile locations, it's the call target name for BodySamples or inlinee's
342 // profile name for CallsiteSamples.
343 // Matching heuristic:
344 // First match all the anchors using the diff algorithm, then split the
345 // non-anchor locations between the two anchors evenly, first half are matched
346 // based on the start anchor, second half are matched based on the end anchor.
347 // For example, given:
348 // IR locations:      [1, 2(foo), 3, 5, 6(bar), 7]
349 // Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9]
350 // The matching gives:
351 //   [1,    2(foo), 3,  5,  6(bar), 7]
352 //    |     |       |   |     |     |
353 //   [1, 2, 3(foo), 4,  7,  8(bar), 9]
354 // The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9].
355 void SampleProfileMatcher::runStaleProfileMatching(
356     const Function &F, const AnchorMap &IRAnchors,
357     const AnchorMap &ProfileAnchors, LocToLocMap &IRToProfileLocationMap,
358     bool RunCFGMatching, bool RunCGMatching) {
359   if (!RunCFGMatching && !RunCGMatching)
360     return;
361   LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName()
362                     << "\n");
363   assert(IRToProfileLocationMap.empty() &&
364          "Run stale profile matching only once per function");
365 
366   AnchorList FilteredProfileAnchorList;
367   AnchorList FilteredIRAnchorsList;
368   getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
369                         FilteredProfileAnchorList);
370 
371   if (FilteredIRAnchorsList.empty() || FilteredProfileAnchorList.empty())
372     return;
373 
374   if (FilteredIRAnchorsList.size() > SalvageStaleProfileMaxCallsites ||
375       FilteredProfileAnchorList.size() > SalvageStaleProfileMaxCallsites) {
376     LLVM_DEBUG(dbgs() << "Skip stale profile matching for " << F.getName()
377                       << " because the number of callsites in the IR is "
378                       << FilteredIRAnchorsList.size()
379                       << " and in the profile is "
380                       << FilteredProfileAnchorList.size() << "\n");
381     return;
382   }
383 
384   // Match the callsite anchors by finding the longest common subsequence
385   // between IR and profile.
386   // Define a match between two anchors as follows:
387   // 1) The function names of anchors are the same.
388   // 2) The similarity between the anchor functions is above a threshold if
389   // RunCGMatching is set.
390   // For 2), we only consider the anchor functions from IR and profile don't
391   // appear on either side to reduce the matching scope. Note that we need to
392   // use IR anchor as base(A side) to align with the order of
393   // IRToProfileLocationMap.
394   LocToLocMap MatchedAnchors =
395       longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList,
396                             RunCGMatching /* Match unused functions */);
397 
398   // CFG level matching:
399   // Apply the callsite matchings to infer matching for the basic
400   // block(non-callsite) locations and write the result to
401   // IRToProfileLocationMap.
402   if (RunCFGMatching)
403     matchNonCallsiteLocs(MatchedAnchors, IRAnchors, IRToProfileLocationMap);
404 }
405 
406 void SampleProfileMatcher::runOnFunction(Function &F) {
407   // We need to use flattened function samples for matching.
408   // Unlike IR, which includes all callsites from the source code, the callsites
409   // in profile only show up when they are hit by samples, i,e. the profile
410   // callsites in one context may differ from those in another context. To get
411   // the maximum number of callsites, we merge the function profiles from all
412   // contexts, aka, the flattened profile to find profile anchors.
413   const auto *FSFlattened = getFlattenedSamplesFor(F);
414   if (SalvageUnusedProfile && !FSFlattened) {
415     // Apply the matching in place to find the new function's matched profile.
416     // TODO: For extended profile format, if a function profile is unused and
417     // it's top-level, even if the profile is matched, it's not found in the
418     // profile. This is because sample reader only read the used profile at the
419     // beginning, we need to support loading the profile on-demand in future.
420     auto R = FuncToProfileNameMap.find(&F);
421     if (R != FuncToProfileNameMap.end())
422       FSFlattened = getFlattenedSamplesFor(R->second);
423   }
424   if (!FSFlattened)
425     return;
426 
427   // Anchors for IR. It's a map from IR location to callee name, callee name is
428   // empty for non-call instruction and use a dummy name(UnknownIndirectCallee)
429   // for unknown indrect callee name.
430   AnchorMap IRAnchors;
431   findIRAnchors(F, IRAnchors);
432   // Anchors for profile. It's a map from callsite location to a set of callee
433   // name.
434   AnchorMap ProfileAnchors;
435   findProfileAnchors(*FSFlattened, ProfileAnchors);
436 
437   // Compute the callsite match states for profile staleness report.
438   if (ReportProfileStaleness || PersistProfileStaleness)
439     recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, nullptr);
440 
441   if (!SalvageStaleProfile)
442     return;
443   // For probe-based profiles, run matching only when profile checksum is
444   // mismatched.
445   bool ChecksumMismatch = FunctionSamples::ProfileIsProbeBased &&
446                           !ProbeManager->profileIsValid(F, *FSFlattened);
447   bool RunCFGMatching =
448       !FunctionSamples::ProfileIsProbeBased || ChecksumMismatch;
449   bool RunCGMatching = SalvageUnusedProfile;
450   // For imported functions, the checksum metadata(pseudo_probe_desc) are
451   // dropped, so we leverage function attribute(profile-checksum-mismatch) to
452   // transfer the info: add the attribute during pre-link phase and check it
453   // during post-link phase(see "profileIsValid").
454   if (ChecksumMismatch && LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink)
455     F.addFnAttr("profile-checksum-mismatch");
456 
457   // The matching result will be saved to IRToProfileLocationMap, create a
458   // new map for each function.
459   auto &IRToProfileLocationMap = getIRToProfileLocationMap(F);
460   runStaleProfileMatching(F, IRAnchors, ProfileAnchors, IRToProfileLocationMap,
461                           RunCFGMatching, RunCGMatching);
462   // Find and update callsite match states after matching.
463   if (RunCFGMatching && (ReportProfileStaleness || PersistProfileStaleness))
464     recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors,
465                               &IRToProfileLocationMap);
466 }
467 
468 void SampleProfileMatcher::recordCallsiteMatchStates(
469     const Function &F, const AnchorMap &IRAnchors,
470     const AnchorMap &ProfileAnchors,
471     const LocToLocMap *IRToProfileLocationMap) {
472   bool IsPostMatch = IRToProfileLocationMap != nullptr;
473   auto &CallsiteMatchStates =
474       FuncCallsiteMatchStates[FunctionSamples::getCanonicalFnName(F.getName())];
475 
476   auto MapIRLocToProfileLoc = [&](const LineLocation &IRLoc) {
477     // IRToProfileLocationMap is null in pre-match phrase.
478     if (!IRToProfileLocationMap)
479       return IRLoc;
480     const auto &ProfileLoc = IRToProfileLocationMap->find(IRLoc);
481     if (ProfileLoc != IRToProfileLocationMap->end())
482       return ProfileLoc->second;
483     else
484       return IRLoc;
485   };
486 
487   for (const auto &I : IRAnchors) {
488     // After fuzzy profile matching, use the matching result to remap the
489     // current IR callsite.
490     const auto &ProfileLoc = MapIRLocToProfileLoc(I.first);
491     const auto &IRCalleeId = I.second;
492     const auto &It = ProfileAnchors.find(ProfileLoc);
493     if (It == ProfileAnchors.end())
494       continue;
495     const auto &ProfCalleeId = It->second;
496     if (IRCalleeId == ProfCalleeId) {
497       auto It = CallsiteMatchStates.find(ProfileLoc);
498       if (It == CallsiteMatchStates.end())
499         CallsiteMatchStates.emplace(ProfileLoc, MatchState::InitialMatch);
500       else if (IsPostMatch) {
501         if (It->second == MatchState::InitialMatch)
502           It->second = MatchState::UnchangedMatch;
503         else if (It->second == MatchState::InitialMismatch)
504           It->second = MatchState::RecoveredMismatch;
505       }
506     }
507   }
508 
509   // Check if there are any callsites in the profile that does not match to any
510   // IR callsites.
511   for (const auto &I : ProfileAnchors) {
512     const auto &Loc = I.first;
513     assert(!I.second.stringRef().empty() && "Callees should not be empty");
514     auto It = CallsiteMatchStates.find(Loc);
515     if (It == CallsiteMatchStates.end())
516       CallsiteMatchStates.emplace(Loc, MatchState::InitialMismatch);
517     else if (IsPostMatch) {
518       // Update the state if it's not matched(UnchangedMatch or
519       // RecoveredMismatch).
520       if (It->second == MatchState::InitialMismatch)
521         It->second = MatchState::UnchangedMismatch;
522       else if (It->second == MatchState::InitialMatch)
523         It->second = MatchState::RemovedMatch;
524     }
525   }
526 }
527 
528 void SampleProfileMatcher::countMismatchedFuncSamples(const FunctionSamples &FS,
529                                                       bool IsTopLevel) {
530   const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID());
531   // Skip the function that is external or renamed.
532   if (!FuncDesc)
533     return;
534 
535   if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) {
536     if (IsTopLevel)
537       NumStaleProfileFunc++;
538     // Given currently all probe ids are after block probe ids, once the
539     // checksum is mismatched, it's likely all the callites are mismatched and
540     // dropped. We conservatively count all the samples as mismatched and stop
541     // counting the inlinees' profiles.
542     MismatchedFunctionSamples += FS.getTotalSamples();
543     return;
544   }
545 
546   // Even the current-level function checksum is matched, it's possible that the
547   // nested inlinees' checksums are mismatched that affect the inlinee's sample
548   // loading, we need to go deeper to check the inlinees' function samples.
549   // Similarly, count all the samples as mismatched if the inlinee's checksum is
550   // mismatched using this recursive function.
551   for (const auto &I : FS.getCallsiteSamples())
552     for (const auto &CS : I.second)
553       countMismatchedFuncSamples(CS.second, false);
554 }
555 
556 void SampleProfileMatcher::countMismatchedCallsiteSamples(
557     const FunctionSamples &FS) {
558   auto It = FuncCallsiteMatchStates.find(FS.getFuncName());
559   // Skip it if no mismatched callsite or this is an external function.
560   if (It == FuncCallsiteMatchStates.end() || It->second.empty())
561     return;
562   const auto &CallsiteMatchStates = It->second;
563 
564   auto findMatchState = [&](const LineLocation &Loc) {
565     auto It = CallsiteMatchStates.find(Loc);
566     if (It == CallsiteMatchStates.end())
567       return MatchState::Unknown;
568     return It->second;
569   };
570 
571   auto AttributeMismatchedSamples = [&](const enum MatchState &State,
572                                         uint64_t Samples) {
573     if (isMismatchState(State))
574       MismatchedCallsiteSamples += Samples;
575     else if (State == MatchState::RecoveredMismatch)
576       RecoveredCallsiteSamples += Samples;
577   };
578 
579   // The non-inlined callsites are saved in the body samples of function
580   // profile, go through it to count the non-inlined callsite samples.
581   for (const auto &I : FS.getBodySamples())
582     AttributeMismatchedSamples(findMatchState(I.first), I.second.getSamples());
583 
584   // Count the inlined callsite samples.
585   for (const auto &I : FS.getCallsiteSamples()) {
586     auto State = findMatchState(I.first);
587     uint64_t CallsiteSamples = 0;
588     for (const auto &CS : I.second)
589       CallsiteSamples += CS.second.getTotalSamples();
590     AttributeMismatchedSamples(State, CallsiteSamples);
591 
592     if (isMismatchState(State))
593       continue;
594 
595     // When the current level of inlined call site matches the profiled call
596     // site, we need to go deeper along the inline tree to count mismatches from
597     // lower level inlinees.
598     for (const auto &CS : I.second)
599       countMismatchedCallsiteSamples(CS.second);
600   }
601 }
602 
603 void SampleProfileMatcher::countMismatchCallsites(const FunctionSamples &FS) {
604   auto It = FuncCallsiteMatchStates.find(FS.getFuncName());
605   // Skip it if no mismatched callsite or this is an external function.
606   if (It == FuncCallsiteMatchStates.end() || It->second.empty())
607     return;
608   const auto &MatchStates = It->second;
609   [[maybe_unused]] bool OnInitialState =
610       isInitialState(MatchStates.begin()->second);
611   for (const auto &I : MatchStates) {
612     TotalProfiledCallsites++;
613     assert(
614         (OnInitialState ? isInitialState(I.second) : isFinalState(I.second)) &&
615         "Profile matching state is inconsistent");
616 
617     if (isMismatchState(I.second))
618       NumMismatchedCallsites++;
619     else if (I.second == MatchState::RecoveredMismatch)
620       NumRecoveredCallsites++;
621   }
622 }
623 
624 void SampleProfileMatcher::countCallGraphRecoveredSamples(
625     const FunctionSamples &FS,
626     std::unordered_set<FunctionId> &CallGraphRecoveredProfiles) {
627   if (CallGraphRecoveredProfiles.count(FS.getFunction())) {
628     NumCallGraphRecoveredFuncSamples += FS.getTotalSamples();
629     return;
630   }
631 
632   for (const auto &CM : FS.getCallsiteSamples()) {
633     for (const auto &CS : CM.second) {
634       countCallGraphRecoveredSamples(CS.second, CallGraphRecoveredProfiles);
635     }
636   }
637 }
638 
639 void SampleProfileMatcher::computeAndReportProfileStaleness() {
640   if (!ReportProfileStaleness && !PersistProfileStaleness)
641     return;
642 
643   std::unordered_set<FunctionId> CallGraphRecoveredProfiles;
644   if (SalvageUnusedProfile) {
645     for (const auto &I : FuncToProfileNameMap) {
646       CallGraphRecoveredProfiles.insert(I.second);
647       if (GlobalValue::isAvailableExternallyLinkage(I.first->getLinkage()))
648         continue;
649       NumCallGraphRecoveredProfiledFunc++;
650     }
651   }
652 
653   // Count profile mismatches for profile staleness report.
654   for (const auto &F : M) {
655     if (skipProfileForFunction(F))
656       continue;
657     // As the stats will be merged by linker, skip reporting the metrics for
658     // imported functions to avoid repeated counting.
659     if (GlobalValue::isAvailableExternallyLinkage(F.getLinkage()))
660       continue;
661     const auto *FS = Reader.getSamplesFor(F);
662     if (!FS)
663       continue;
664     TotalProfiledFunc++;
665     TotalFunctionSamples += FS->getTotalSamples();
666 
667     if (SalvageUnusedProfile && !CallGraphRecoveredProfiles.empty())
668       countCallGraphRecoveredSamples(*FS, CallGraphRecoveredProfiles);
669 
670     // Checksum mismatch is only used in pseudo-probe mode.
671     if (FunctionSamples::ProfileIsProbeBased)
672       countMismatchedFuncSamples(*FS, true);
673 
674     // Count mismatches and samples for calliste.
675     countMismatchCallsites(*FS);
676     countMismatchedCallsiteSamples(*FS);
677   }
678 
679   if (ReportProfileStaleness) {
680     if (FunctionSamples::ProfileIsProbeBased) {
681       errs() << "(" << NumStaleProfileFunc << "/" << TotalProfiledFunc
682              << ") of functions' profile are invalid and ("
683              << MismatchedFunctionSamples << "/" << TotalFunctionSamples
684              << ") of samples are discarded due to function hash mismatch.\n";
685     }
686     if (SalvageUnusedProfile) {
687       errs() << "(" << NumCallGraphRecoveredProfiledFunc << "/"
688              << TotalProfiledFunc << ") of functions' profile are matched and ("
689              << NumCallGraphRecoveredFuncSamples << "/" << TotalFunctionSamples
690              << ") of samples are reused by call graph matching.\n";
691     }
692 
693     errs() << "(" << (NumMismatchedCallsites + NumRecoveredCallsites) << "/"
694            << TotalProfiledCallsites
695            << ") of callsites' profile are invalid and ("
696            << (MismatchedCallsiteSamples + RecoveredCallsiteSamples) << "/"
697            << TotalFunctionSamples
698            << ") of samples are discarded due to callsite location mismatch.\n";
699     errs() << "(" << NumRecoveredCallsites << "/"
700            << (NumRecoveredCallsites + NumMismatchedCallsites)
701            << ") of callsites and (" << RecoveredCallsiteSamples << "/"
702            << (RecoveredCallsiteSamples + MismatchedCallsiteSamples)
703            << ") of samples are recovered by stale profile matching.\n";
704   }
705 
706   if (PersistProfileStaleness) {
707     LLVMContext &Ctx = M.getContext();
708     MDBuilder MDB(Ctx);
709 
710     SmallVector<std::pair<StringRef, uint64_t>> ProfStatsVec;
711     if (FunctionSamples::ProfileIsProbeBased) {
712       ProfStatsVec.emplace_back("NumStaleProfileFunc", NumStaleProfileFunc);
713       ProfStatsVec.emplace_back("TotalProfiledFunc", TotalProfiledFunc);
714       ProfStatsVec.emplace_back("MismatchedFunctionSamples",
715                                 MismatchedFunctionSamples);
716       ProfStatsVec.emplace_back("TotalFunctionSamples", TotalFunctionSamples);
717     }
718 
719     if (SalvageUnusedProfile) {
720       ProfStatsVec.emplace_back("NumCallGraphRecoveredProfiledFunc",
721                                 NumCallGraphRecoveredProfiledFunc);
722       ProfStatsVec.emplace_back("NumCallGraphRecoveredFuncSamples",
723                                 NumCallGraphRecoveredFuncSamples);
724     }
725 
726     ProfStatsVec.emplace_back("NumMismatchedCallsites", NumMismatchedCallsites);
727     ProfStatsVec.emplace_back("NumRecoveredCallsites", NumRecoveredCallsites);
728     ProfStatsVec.emplace_back("TotalProfiledCallsites", TotalProfiledCallsites);
729     ProfStatsVec.emplace_back("MismatchedCallsiteSamples",
730                               MismatchedCallsiteSamples);
731     ProfStatsVec.emplace_back("RecoveredCallsiteSamples",
732                               RecoveredCallsiteSamples);
733 
734     auto *MD = MDB.createLLVMStats(ProfStatsVec);
735     auto *NMD = M.getOrInsertNamedMetadata("llvm.stats");
736     NMD->addOperand(MD);
737   }
738 }
739 
740 void SampleProfileMatcher::findFunctionsWithoutProfile() {
741   // TODO: Support MD5 profile.
742   if (FunctionSamples::UseMD5)
743     return;
744   StringSet<> NamesInProfile;
745   if (auto NameTable = Reader.getNameTable()) {
746     for (auto Name : *NameTable)
747       NamesInProfile.insert(Name.stringRef());
748   }
749 
750   for (auto &F : M) {
751     // Skip declarations, as even if the function can be matched, we have
752     // nothing to do with it.
753     if (F.isDeclaration())
754       continue;
755 
756     StringRef CanonFName = FunctionSamples::getCanonicalFnName(F.getName());
757     const auto *FS = getFlattenedSamplesFor(F);
758     if (FS)
759       continue;
760 
761     // For extended binary, functions fully inlined may not be loaded in the
762     // top-level profile, so check the NameTable which has the all symbol names
763     // in profile.
764     if (NamesInProfile.count(CanonFName))
765       continue;
766 
767     // For extended binary, non-profiled function symbols are in the profile
768     // symbol list table.
769     if (PSL && PSL->contains(CanonFName))
770       continue;
771 
772     LLVM_DEBUG(dbgs() << "Function " << CanonFName
773                       << " is not in profile or profile symbol list.\n");
774     FunctionsWithoutProfile[FunctionId(CanonFName)] = &F;
775   }
776 }
777 
778 bool SampleProfileMatcher::functionMatchesProfileHelper(
779     const Function &IRFunc, const FunctionId &ProfFunc) {
780   // The value is in the range [0, 1]. The bigger the value is, the more similar
781   // two sequences are.
782   float Similarity = 0.0;
783 
784   const auto *FSFlattened = getFlattenedSamplesFor(ProfFunc);
785   if (!FSFlattened)
786     return false;
787   // The check for similarity or checksum may not be reliable if the function is
788   // tiny, we use the number of basic block as a proxy for the function
789   // complexity and skip the matching if it's too small.
790   if (IRFunc.size() < MinFuncCountForCGMatching ||
791       FSFlattened->getBodySamples().size() < MinFuncCountForCGMatching)
792     return false;
793 
794   // For probe-based function, we first trust the checksum info. If the checksum
795   // doesn't match, we continue checking for similarity.
796   if (FunctionSamples::ProfileIsProbeBased) {
797     const auto *FuncDesc = ProbeManager->getDesc(IRFunc);
798     if (FuncDesc &&
799         !ProbeManager->profileIsHashMismatched(*FuncDesc, *FSFlattened)) {
800       LLVM_DEBUG(dbgs() << "The checksums for " << IRFunc.getName()
801                         << "(IR) and " << ProfFunc << "(Profile) match.\n");
802 
803       return true;
804     }
805   }
806 
807   AnchorMap IRAnchors;
808   findIRAnchors(IRFunc, IRAnchors);
809   AnchorMap ProfileAnchors;
810   findProfileAnchors(*FSFlattened, ProfileAnchors);
811 
812   AnchorList FilteredIRAnchorsList;
813   AnchorList FilteredProfileAnchorList;
814   getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
815                         FilteredProfileAnchorList);
816 
817   // Similarly skip the matching if the num of anchors is not enough.
818   if (FilteredIRAnchorsList.size() < MinCallCountForCGMatching ||
819       FilteredProfileAnchorList.size() < MinCallCountForCGMatching)
820     return false;
821 
822   // Use the diff algorithm to find the LCS between IR and profile.
823 
824   // Don't recursively match the callee function to avoid infinite matching,
825   // callee functions will be handled later since it's processed in top-down
826   // order .
827   LocToLocMap MatchedAnchors =
828       longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList,
829                             false /* Match unused functions */);
830 
831   Similarity =
832       static_cast<float>(MatchedAnchors.size()) * 2 /
833       (FilteredIRAnchorsList.size() + FilteredProfileAnchorList.size());
834 
835   LLVM_DEBUG(dbgs() << "The similarity between " << IRFunc.getName()
836                     << "(IR) and " << ProfFunc << "(profile) is "
837                     << format("%.2f", Similarity) << "\n");
838   assert((Similarity >= 0 && Similarity <= 1.0) &&
839          "Similarity value should be in [0, 1]");
840   return Similarity * 100 > FuncProfileSimilarityThreshold;
841 }
842 
843 // If FindMatchedProfileOnly is set to true, only use the processed function
844 // results. This is used for skipping the repeated recursive matching.
845 bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc,
846                                                   const FunctionId &ProfFunc,
847                                                   bool FindMatchedProfileOnly) {
848   auto R = FuncProfileMatchCache.find({&IRFunc, ProfFunc});
849   if (R != FuncProfileMatchCache.end())
850     return R->second;
851 
852   if (FindMatchedProfileOnly)
853     return false;
854 
855   bool Matched = functionMatchesProfileHelper(IRFunc, ProfFunc);
856   FuncProfileMatchCache[{&IRFunc, ProfFunc}] = Matched;
857   if (Matched) {
858     FuncToProfileNameMap[&IRFunc] = ProfFunc;
859     LLVM_DEBUG(dbgs() << "Function:" << IRFunc.getName()
860                       << " matches profile:" << ProfFunc << "\n");
861   }
862 
863   return Matched;
864 }
865 
866 void SampleProfileMatcher::runOnModule() {
867   ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
868                                    FunctionSamples::ProfileIsCS);
869   if (SalvageUnusedProfile)
870     findFunctionsWithoutProfile();
871 
872   // Process the matching in top-down order so that the caller matching result
873   // can be used to the callee matching.
874   std::vector<Function *> TopDownFunctionList;
875   TopDownFunctionList.reserve(M.size());
876   buildTopDownFuncOrder(CG, TopDownFunctionList);
877   for (auto *F : TopDownFunctionList) {
878     if (skipProfileForFunction(*F))
879       continue;
880     runOnFunction(*F);
881   }
882 
883   // Update the data in SampleLoader.
884   if (SalvageUnusedProfile)
885     for (auto &I : FuncToProfileNameMap) {
886       assert(I.first && "New function is null");
887       FunctionId FuncName(I.first->getName());
888       FuncNameToProfNameMap->emplace(FuncName, I.second);
889       // We need to remove the old entry to avoid duplicating the function
890       // processing.
891       SymbolMap->erase(FuncName);
892       SymbolMap->emplace(I.second, I.first);
893     }
894 
895   if (SalvageStaleProfile)
896     distributeIRToProfileLocationMap();
897 
898   computeAndReportProfileStaleness();
899 }
900 
901 void SampleProfileMatcher::distributeIRToProfileLocationMap(
902     FunctionSamples &FS) {
903   const auto ProfileMappings = FuncMappings.find(FS.getFuncName());
904   if (ProfileMappings != FuncMappings.end()) {
905     FS.setIRToProfileLocationMap(&(ProfileMappings->second));
906   }
907 
908   for (auto &Callees :
909        const_cast<CallsiteSampleMap &>(FS.getCallsiteSamples())) {
910     for (auto &FS : Callees.second) {
911       distributeIRToProfileLocationMap(FS.second);
912     }
913   }
914 }
915 
916 // Use a central place to distribute the matching results. Outlined and inlined
917 // profile with the function name will be set to the same pointer.
918 void SampleProfileMatcher::distributeIRToProfileLocationMap() {
919   for (auto &I : Reader.getProfiles()) {
920     distributeIRToProfileLocationMap(I.second);
921   }
922 }
923