1 //===- SampleProfileMatcher.cpp - Sampling-based Stale Profile Matcher ----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the SampleProfileMatcher used for stale 10 // profile matching. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Transforms/IPO/SampleProfileMatcher.h" 15 #include "llvm/IR/IntrinsicInst.h" 16 #include "llvm/IR/MDBuilder.h" 17 #include "llvm/Support/CommandLine.h" 18 19 using namespace llvm; 20 using namespace sampleprof; 21 22 #define DEBUG_TYPE "sample-profile-matcher" 23 24 static cl::opt<unsigned> FuncProfileSimilarityThreshold( 25 "func-profile-similarity-threshold", cl::Hidden, cl::init(80), 26 cl::desc("Consider a profile matches a function if the similarity of their " 27 "callee sequences is above the specified percentile.")); 28 29 static cl::opt<unsigned> MinFuncCountForCGMatching( 30 "min-func-count-for-cg-matching", cl::Hidden, cl::init(5), 31 cl::desc("The minimum number of basic blocks required for a function to " 32 "run stale profile call graph matching.")); 33 34 static cl::opt<unsigned> MinCallCountForCGMatching( 35 "min-call-count-for-cg-matching", cl::Hidden, cl::init(3), 36 cl::desc("The minimum number of call anchors required for a function to " 37 "run stale profile call graph matching.")); 38 39 extern cl::opt<bool> SalvageStaleProfile; 40 extern cl::opt<bool> SalvageUnusedProfile; 41 extern cl::opt<bool> PersistProfileStaleness; 42 extern cl::opt<bool> ReportProfileStaleness; 43 44 static cl::opt<unsigned> SalvageStaleProfileMaxCallsites( 45 "salvage-stale-profile-max-callsites", cl::Hidden, cl::init(UINT_MAX), 46 cl::desc("The maximum number of callsites in a function, above which stale " 47 "profile matching will be skipped.")); 48 49 void SampleProfileMatcher::findIRAnchors(const Function &F, 50 AnchorMap &IRAnchors) const { 51 // For inlined code, recover the original callsite and callee by finding the 52 // top-level inline frame. e.g. For frame stack "main:1 @ foo:2 @ bar:3", the 53 // top-level frame is "main:1", the callsite is "1" and the callee is "foo". 54 auto FindTopLevelInlinedCallsite = [](const DILocation *DIL) { 55 assert((DIL && DIL->getInlinedAt()) && "No inlined callsite"); 56 const DILocation *PrevDIL = nullptr; 57 do { 58 PrevDIL = DIL; 59 DIL = DIL->getInlinedAt(); 60 } while (DIL->getInlinedAt()); 61 62 LineLocation Callsite = FunctionSamples::getCallSiteIdentifier( 63 DIL, FunctionSamples::ProfileIsFS); 64 StringRef CalleeName = PrevDIL->getSubprogramLinkageName(); 65 return std::make_pair(Callsite, FunctionId(CalleeName)); 66 }; 67 68 auto GetCanonicalCalleeName = [](const CallBase *CB) { 69 StringRef CalleeName = UnknownIndirectCallee; 70 if (Function *Callee = CB->getCalledFunction()) 71 CalleeName = FunctionSamples::getCanonicalFnName(Callee->getName()); 72 return CalleeName; 73 }; 74 75 // Extract profile matching anchors in the IR. 76 for (auto &BB : F) { 77 for (auto &I : BB) { 78 DILocation *DIL = I.getDebugLoc(); 79 if (!DIL) 80 continue; 81 82 if (FunctionSamples::ProfileIsProbeBased) { 83 if (auto Probe = extractProbe(I)) { 84 // Flatten inlined IR for the matching. 85 if (DIL->getInlinedAt()) { 86 IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL)); 87 } else { 88 // Use empty StringRef for basic block probe. 89 StringRef CalleeName; 90 if (const auto *CB = dyn_cast<CallBase>(&I)) { 91 // Skip the probe inst whose callee name is "llvm.pseudoprobe". 92 if (!isa<IntrinsicInst>(&I)) 93 CalleeName = GetCanonicalCalleeName(CB); 94 } 95 LineLocation Loc = LineLocation(Probe->Id, 0); 96 IRAnchors.emplace(Loc, FunctionId(CalleeName)); 97 } 98 } 99 } else { 100 // TODO: For line-number based profile(AutoFDO), currently only support 101 // find callsite anchors. In future, we need to parse all the non-call 102 // instructions to extract the line locations for profile matching. 103 if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I)) 104 continue; 105 106 if (DIL->getInlinedAt()) { 107 IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL)); 108 } else { 109 LineLocation Callsite = FunctionSamples::getCallSiteIdentifier( 110 DIL, FunctionSamples::ProfileIsFS); 111 StringRef CalleeName = GetCanonicalCalleeName(dyn_cast<CallBase>(&I)); 112 IRAnchors.emplace(Callsite, FunctionId(CalleeName)); 113 } 114 } 115 } 116 } 117 } 118 119 void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS, 120 AnchorMap &ProfileAnchors) const { 121 auto isInvalidLineOffset = [](uint32_t LineOffset) { 122 return LineOffset & 0x8000; 123 }; 124 125 auto InsertAnchor = [](const LineLocation &Loc, const FunctionId &CalleeName, 126 AnchorMap &ProfileAnchors) { 127 auto Ret = ProfileAnchors.try_emplace(Loc, CalleeName); 128 if (!Ret.second) { 129 // For multiple callees, which indicates it's an indirect call, we use a 130 // dummy name(UnknownIndirectCallee) as the indrect callee name. 131 Ret.first->second = FunctionId(UnknownIndirectCallee); 132 } 133 }; 134 135 for (const auto &I : FS.getBodySamples()) { 136 const LineLocation &Loc = I.first; 137 if (isInvalidLineOffset(Loc.LineOffset)) 138 continue; 139 for (const auto &C : I.second.getCallTargets()) 140 InsertAnchor(Loc, C.first, ProfileAnchors); 141 } 142 143 for (const auto &I : FS.getCallsiteSamples()) { 144 const LineLocation &Loc = I.first; 145 if (isInvalidLineOffset(Loc.LineOffset)) 146 continue; 147 for (const auto &C : I.second) 148 InsertAnchor(Loc, C.first, ProfileAnchors); 149 } 150 } 151 152 bool SampleProfileMatcher::functionHasProfile(const FunctionId &IRFuncName, 153 Function *&FuncWithoutProfile) { 154 FuncWithoutProfile = nullptr; 155 auto R = FunctionsWithoutProfile.find(IRFuncName); 156 if (R != FunctionsWithoutProfile.end()) 157 FuncWithoutProfile = R->second; 158 return !FuncWithoutProfile; 159 } 160 161 bool SampleProfileMatcher::isProfileUnused(const FunctionId &ProfileFuncName) { 162 return SymbolMap->find(ProfileFuncName) == SymbolMap->end(); 163 } 164 165 bool SampleProfileMatcher::functionMatchesProfile( 166 const FunctionId &IRFuncName, const FunctionId &ProfileFuncName, 167 bool FindMatchedProfileOnly) { 168 if (IRFuncName == ProfileFuncName) 169 return true; 170 if (!SalvageUnusedProfile) 171 return false; 172 173 // If IR function doesn't have profile and the profile is unused, try 174 // matching them. 175 Function *IRFunc = nullptr; 176 if (functionHasProfile(IRFuncName, IRFunc) || 177 !isProfileUnused(ProfileFuncName)) 178 return false; 179 180 assert(FunctionId(IRFunc->getName()) != ProfileFuncName && 181 "IR function should be different from profile function to match"); 182 return functionMatchesProfile(*IRFunc, ProfileFuncName, 183 FindMatchedProfileOnly); 184 } 185 186 LocToLocMap 187 SampleProfileMatcher::longestCommonSequence(const AnchorList &AnchorList1, 188 const AnchorList &AnchorList2, 189 bool MatchUnusedFunction) { 190 int32_t Size1 = AnchorList1.size(), Size2 = AnchorList2.size(), 191 MaxDepth = Size1 + Size2; 192 auto Index = [&](int32_t I) { return I + MaxDepth; }; 193 194 LocToLocMap EqualLocations; 195 if (MaxDepth == 0) 196 return EqualLocations; 197 198 // Backtrack the SES result. 199 auto Backtrack = [&](const std::vector<std::vector<int32_t>> &Trace, 200 const AnchorList &AnchorList1, 201 const AnchorList &AnchorList2, 202 LocToLocMap &EqualLocations) { 203 int32_t X = Size1, Y = Size2; 204 for (int32_t Depth = Trace.size() - 1; X > 0 || Y > 0; Depth--) { 205 const auto &P = Trace[Depth]; 206 int32_t K = X - Y; 207 int32_t PrevK = K; 208 if (K == -Depth || (K != Depth && P[Index(K - 1)] < P[Index(K + 1)])) 209 PrevK = K + 1; 210 else 211 PrevK = K - 1; 212 213 int32_t PrevX = P[Index(PrevK)]; 214 int32_t PrevY = PrevX - PrevK; 215 while (X > PrevX && Y > PrevY) { 216 X--; 217 Y--; 218 EqualLocations.insert({AnchorList1[X].first, AnchorList2[Y].first}); 219 } 220 221 if (Depth == 0) 222 break; 223 224 if (Y == PrevY) 225 X--; 226 else if (X == PrevX) 227 Y--; 228 X = PrevX; 229 Y = PrevY; 230 } 231 }; 232 233 // The greedy LCS/SES algorithm. 234 235 // An array contains the endpoints of the furthest reaching D-paths. 236 std::vector<int32_t> V(2 * MaxDepth + 1, -1); 237 V[Index(1)] = 0; 238 // Trace is used to backtrack the SES result. 239 std::vector<std::vector<int32_t>> Trace; 240 for (int32_t Depth = 0; Depth <= MaxDepth; Depth++) { 241 Trace.push_back(V); 242 for (int32_t K = -Depth; K <= Depth; K += 2) { 243 int32_t X = 0, Y = 0; 244 if (K == -Depth || (K != Depth && V[Index(K - 1)] < V[Index(K + 1)])) 245 X = V[Index(K + 1)]; 246 else 247 X = V[Index(K - 1)] + 1; 248 Y = X - K; 249 while (X < Size1 && Y < Size2 && 250 functionMatchesProfile( 251 AnchorList1[X].second, AnchorList2[Y].second, 252 !MatchUnusedFunction /* Find matched function only */)) 253 X++, Y++; 254 255 V[Index(K)] = X; 256 257 if (X >= Size1 && Y >= Size2) { 258 // Length of an SES is D. 259 Backtrack(Trace, AnchorList1, AnchorList2, EqualLocations); 260 return EqualLocations; 261 } 262 } 263 } 264 // Length of an SES is greater than MaxDepth. 265 return EqualLocations; 266 } 267 268 void SampleProfileMatcher::matchNonCallsiteLocs( 269 const LocToLocMap &MatchedAnchors, const AnchorMap &IRAnchors, 270 LocToLocMap &IRToProfileLocationMap) { 271 auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) { 272 // Skip the unchanged location mapping to save memory. 273 if (From != To) 274 IRToProfileLocationMap.insert({From, To}); 275 }; 276 277 // Use function's beginning location as the initial anchor. 278 int32_t LocationDelta = 0; 279 SmallVector<LineLocation> LastMatchedNonAnchors; 280 for (const auto &IR : IRAnchors) { 281 const auto &Loc = IR.first; 282 bool IsMatchedAnchor = false; 283 // Match the anchor location in lexical order. 284 auto R = MatchedAnchors.find(Loc); 285 if (R != MatchedAnchors.end()) { 286 const auto &Candidate = R->second; 287 InsertMatching(Loc, Candidate); 288 LLVM_DEBUG(dbgs() << "Callsite with callee:" << IR.second.stringRef() 289 << " is matched from " << Loc << " to " << Candidate 290 << "\n"); 291 LocationDelta = Candidate.LineOffset - Loc.LineOffset; 292 293 // Match backwards for non-anchor locations. 294 // The locations in LastMatchedNonAnchors have been matched forwards 295 // based on the previous anchor, spilt it evenly and overwrite the 296 // second half based on the current anchor. 297 for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2; 298 I < LastMatchedNonAnchors.size(); I++) { 299 const auto &L = LastMatchedNonAnchors[I]; 300 uint32_t CandidateLineOffset = L.LineOffset + LocationDelta; 301 LineLocation Candidate(CandidateLineOffset, L.Discriminator); 302 InsertMatching(L, Candidate); 303 LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L 304 << " to " << Candidate << "\n"); 305 } 306 307 IsMatchedAnchor = true; 308 LastMatchedNonAnchors.clear(); 309 } 310 311 // Match forwards for non-anchor locations. 312 if (!IsMatchedAnchor) { 313 uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta; 314 LineLocation Candidate(CandidateLineOffset, Loc.Discriminator); 315 InsertMatching(Loc, Candidate); 316 LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to " 317 << Candidate << "\n"); 318 LastMatchedNonAnchors.emplace_back(Loc); 319 } 320 } 321 } 322 323 // Filter the non-call locations from IRAnchors and ProfileAnchors and write 324 // them into a list for random access later. 325 void SampleProfileMatcher::getFilteredAnchorList( 326 const AnchorMap &IRAnchors, const AnchorMap &ProfileAnchors, 327 AnchorList &FilteredIRAnchorsList, AnchorList &FilteredProfileAnchorList) { 328 for (const auto &I : IRAnchors) { 329 if (I.second.stringRef().empty()) 330 continue; 331 FilteredIRAnchorsList.emplace_back(I); 332 } 333 334 for (const auto &I : ProfileAnchors) 335 FilteredProfileAnchorList.emplace_back(I); 336 } 337 338 // Call target name anchor based profile fuzzy matching. 339 // Input: 340 // For IR locations, the anchor is the callee name of direct callsite; For 341 // profile locations, it's the call target name for BodySamples or inlinee's 342 // profile name for CallsiteSamples. 343 // Matching heuristic: 344 // First match all the anchors using the diff algorithm, then split the 345 // non-anchor locations between the two anchors evenly, first half are matched 346 // based on the start anchor, second half are matched based on the end anchor. 347 // For example, given: 348 // IR locations: [1, 2(foo), 3, 5, 6(bar), 7] 349 // Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9] 350 // The matching gives: 351 // [1, 2(foo), 3, 5, 6(bar), 7] 352 // | | | | | | 353 // [1, 2, 3(foo), 4, 7, 8(bar), 9] 354 // The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9]. 355 void SampleProfileMatcher::runStaleProfileMatching( 356 const Function &F, const AnchorMap &IRAnchors, 357 const AnchorMap &ProfileAnchors, LocToLocMap &IRToProfileLocationMap, 358 bool RunCFGMatching, bool RunCGMatching) { 359 if (!RunCFGMatching && !RunCGMatching) 360 return; 361 LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName() 362 << "\n"); 363 assert(IRToProfileLocationMap.empty() && 364 "Run stale profile matching only once per function"); 365 366 AnchorList FilteredProfileAnchorList; 367 AnchorList FilteredIRAnchorsList; 368 getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList, 369 FilteredProfileAnchorList); 370 371 if (FilteredIRAnchorsList.empty() || FilteredProfileAnchorList.empty()) 372 return; 373 374 if (FilteredIRAnchorsList.size() > SalvageStaleProfileMaxCallsites || 375 FilteredProfileAnchorList.size() > SalvageStaleProfileMaxCallsites) { 376 LLVM_DEBUG(dbgs() << "Skip stale profile matching for " << F.getName() 377 << " because the number of callsites in the IR is " 378 << FilteredIRAnchorsList.size() 379 << " and in the profile is " 380 << FilteredProfileAnchorList.size() << "\n"); 381 return; 382 } 383 384 // Match the callsite anchors by finding the longest common subsequence 385 // between IR and profile. 386 // Define a match between two anchors as follows: 387 // 1) The function names of anchors are the same. 388 // 2) The similarity between the anchor functions is above a threshold if 389 // RunCGMatching is set. 390 // For 2), we only consider the anchor functions from IR and profile don't 391 // appear on either side to reduce the matching scope. Note that we need to 392 // use IR anchor as base(A side) to align with the order of 393 // IRToProfileLocationMap. 394 LocToLocMap MatchedAnchors = 395 longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList, 396 RunCGMatching /* Match unused functions */); 397 398 // CFG level matching: 399 // Apply the callsite matchings to infer matching for the basic 400 // block(non-callsite) locations and write the result to 401 // IRToProfileLocationMap. 402 if (RunCFGMatching) 403 matchNonCallsiteLocs(MatchedAnchors, IRAnchors, IRToProfileLocationMap); 404 } 405 406 void SampleProfileMatcher::runOnFunction(Function &F) { 407 // We need to use flattened function samples for matching. 408 // Unlike IR, which includes all callsites from the source code, the callsites 409 // in profile only show up when they are hit by samples, i,e. the profile 410 // callsites in one context may differ from those in another context. To get 411 // the maximum number of callsites, we merge the function profiles from all 412 // contexts, aka, the flattened profile to find profile anchors. 413 const auto *FSFlattened = getFlattenedSamplesFor(F); 414 if (SalvageUnusedProfile && !FSFlattened) { 415 // Apply the matching in place to find the new function's matched profile. 416 // TODO: For extended profile format, if a function profile is unused and 417 // it's top-level, even if the profile is matched, it's not found in the 418 // profile. This is because sample reader only read the used profile at the 419 // beginning, we need to support loading the profile on-demand in future. 420 auto R = FuncToProfileNameMap.find(&F); 421 if (R != FuncToProfileNameMap.end()) 422 FSFlattened = getFlattenedSamplesFor(R->second); 423 } 424 if (!FSFlattened) 425 return; 426 427 // Anchors for IR. It's a map from IR location to callee name, callee name is 428 // empty for non-call instruction and use a dummy name(UnknownIndirectCallee) 429 // for unknown indrect callee name. 430 AnchorMap IRAnchors; 431 findIRAnchors(F, IRAnchors); 432 // Anchors for profile. It's a map from callsite location to a set of callee 433 // name. 434 AnchorMap ProfileAnchors; 435 findProfileAnchors(*FSFlattened, ProfileAnchors); 436 437 // Compute the callsite match states for profile staleness report. 438 if (ReportProfileStaleness || PersistProfileStaleness) 439 recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, nullptr); 440 441 if (!SalvageStaleProfile) 442 return; 443 // For probe-based profiles, run matching only when profile checksum is 444 // mismatched. 445 bool ChecksumMismatch = FunctionSamples::ProfileIsProbeBased && 446 !ProbeManager->profileIsValid(F, *FSFlattened); 447 bool RunCFGMatching = 448 !FunctionSamples::ProfileIsProbeBased || ChecksumMismatch; 449 bool RunCGMatching = SalvageUnusedProfile; 450 // For imported functions, the checksum metadata(pseudo_probe_desc) are 451 // dropped, so we leverage function attribute(profile-checksum-mismatch) to 452 // transfer the info: add the attribute during pre-link phase and check it 453 // during post-link phase(see "profileIsValid"). 454 if (ChecksumMismatch && LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) 455 F.addFnAttr("profile-checksum-mismatch"); 456 457 // The matching result will be saved to IRToProfileLocationMap, create a 458 // new map for each function. 459 auto &IRToProfileLocationMap = getIRToProfileLocationMap(F); 460 runStaleProfileMatching(F, IRAnchors, ProfileAnchors, IRToProfileLocationMap, 461 RunCFGMatching, RunCGMatching); 462 // Find and update callsite match states after matching. 463 if (RunCFGMatching && (ReportProfileStaleness || PersistProfileStaleness)) 464 recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, 465 &IRToProfileLocationMap); 466 } 467 468 void SampleProfileMatcher::recordCallsiteMatchStates( 469 const Function &F, const AnchorMap &IRAnchors, 470 const AnchorMap &ProfileAnchors, 471 const LocToLocMap *IRToProfileLocationMap) { 472 bool IsPostMatch = IRToProfileLocationMap != nullptr; 473 auto &CallsiteMatchStates = 474 FuncCallsiteMatchStates[FunctionSamples::getCanonicalFnName(F.getName())]; 475 476 auto MapIRLocToProfileLoc = [&](const LineLocation &IRLoc) { 477 // IRToProfileLocationMap is null in pre-match phrase. 478 if (!IRToProfileLocationMap) 479 return IRLoc; 480 const auto &ProfileLoc = IRToProfileLocationMap->find(IRLoc); 481 if (ProfileLoc != IRToProfileLocationMap->end()) 482 return ProfileLoc->second; 483 else 484 return IRLoc; 485 }; 486 487 for (const auto &I : IRAnchors) { 488 // After fuzzy profile matching, use the matching result to remap the 489 // current IR callsite. 490 const auto &ProfileLoc = MapIRLocToProfileLoc(I.first); 491 const auto &IRCalleeId = I.second; 492 const auto &It = ProfileAnchors.find(ProfileLoc); 493 if (It == ProfileAnchors.end()) 494 continue; 495 const auto &ProfCalleeId = It->second; 496 if (IRCalleeId == ProfCalleeId) { 497 auto It = CallsiteMatchStates.find(ProfileLoc); 498 if (It == CallsiteMatchStates.end()) 499 CallsiteMatchStates.emplace(ProfileLoc, MatchState::InitialMatch); 500 else if (IsPostMatch) { 501 if (It->second == MatchState::InitialMatch) 502 It->second = MatchState::UnchangedMatch; 503 else if (It->second == MatchState::InitialMismatch) 504 It->second = MatchState::RecoveredMismatch; 505 } 506 } 507 } 508 509 // Check if there are any callsites in the profile that does not match to any 510 // IR callsites. 511 for (const auto &I : ProfileAnchors) { 512 const auto &Loc = I.first; 513 assert(!I.second.stringRef().empty() && "Callees should not be empty"); 514 auto It = CallsiteMatchStates.find(Loc); 515 if (It == CallsiteMatchStates.end()) 516 CallsiteMatchStates.emplace(Loc, MatchState::InitialMismatch); 517 else if (IsPostMatch) { 518 // Update the state if it's not matched(UnchangedMatch or 519 // RecoveredMismatch). 520 if (It->second == MatchState::InitialMismatch) 521 It->second = MatchState::UnchangedMismatch; 522 else if (It->second == MatchState::InitialMatch) 523 It->second = MatchState::RemovedMatch; 524 } 525 } 526 } 527 528 void SampleProfileMatcher::countMismatchedFuncSamples(const FunctionSamples &FS, 529 bool IsTopLevel) { 530 const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID()); 531 // Skip the function that is external or renamed. 532 if (!FuncDesc) 533 return; 534 535 if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) { 536 if (IsTopLevel) 537 NumStaleProfileFunc++; 538 // Given currently all probe ids are after block probe ids, once the 539 // checksum is mismatched, it's likely all the callites are mismatched and 540 // dropped. We conservatively count all the samples as mismatched and stop 541 // counting the inlinees' profiles. 542 MismatchedFunctionSamples += FS.getTotalSamples(); 543 return; 544 } 545 546 // Even the current-level function checksum is matched, it's possible that the 547 // nested inlinees' checksums are mismatched that affect the inlinee's sample 548 // loading, we need to go deeper to check the inlinees' function samples. 549 // Similarly, count all the samples as mismatched if the inlinee's checksum is 550 // mismatched using this recursive function. 551 for (const auto &I : FS.getCallsiteSamples()) 552 for (const auto &CS : I.second) 553 countMismatchedFuncSamples(CS.second, false); 554 } 555 556 void SampleProfileMatcher::countMismatchedCallsiteSamples( 557 const FunctionSamples &FS) { 558 auto It = FuncCallsiteMatchStates.find(FS.getFuncName()); 559 // Skip it if no mismatched callsite or this is an external function. 560 if (It == FuncCallsiteMatchStates.end() || It->second.empty()) 561 return; 562 const auto &CallsiteMatchStates = It->second; 563 564 auto findMatchState = [&](const LineLocation &Loc) { 565 auto It = CallsiteMatchStates.find(Loc); 566 if (It == CallsiteMatchStates.end()) 567 return MatchState::Unknown; 568 return It->second; 569 }; 570 571 auto AttributeMismatchedSamples = [&](const enum MatchState &State, 572 uint64_t Samples) { 573 if (isMismatchState(State)) 574 MismatchedCallsiteSamples += Samples; 575 else if (State == MatchState::RecoveredMismatch) 576 RecoveredCallsiteSamples += Samples; 577 }; 578 579 // The non-inlined callsites are saved in the body samples of function 580 // profile, go through it to count the non-inlined callsite samples. 581 for (const auto &I : FS.getBodySamples()) 582 AttributeMismatchedSamples(findMatchState(I.first), I.second.getSamples()); 583 584 // Count the inlined callsite samples. 585 for (const auto &I : FS.getCallsiteSamples()) { 586 auto State = findMatchState(I.first); 587 uint64_t CallsiteSamples = 0; 588 for (const auto &CS : I.second) 589 CallsiteSamples += CS.second.getTotalSamples(); 590 AttributeMismatchedSamples(State, CallsiteSamples); 591 592 if (isMismatchState(State)) 593 continue; 594 595 // When the current level of inlined call site matches the profiled call 596 // site, we need to go deeper along the inline tree to count mismatches from 597 // lower level inlinees. 598 for (const auto &CS : I.second) 599 countMismatchedCallsiteSamples(CS.second); 600 } 601 } 602 603 void SampleProfileMatcher::countMismatchCallsites(const FunctionSamples &FS) { 604 auto It = FuncCallsiteMatchStates.find(FS.getFuncName()); 605 // Skip it if no mismatched callsite or this is an external function. 606 if (It == FuncCallsiteMatchStates.end() || It->second.empty()) 607 return; 608 const auto &MatchStates = It->second; 609 [[maybe_unused]] bool OnInitialState = 610 isInitialState(MatchStates.begin()->second); 611 for (const auto &I : MatchStates) { 612 TotalProfiledCallsites++; 613 assert( 614 (OnInitialState ? isInitialState(I.second) : isFinalState(I.second)) && 615 "Profile matching state is inconsistent"); 616 617 if (isMismatchState(I.second)) 618 NumMismatchedCallsites++; 619 else if (I.second == MatchState::RecoveredMismatch) 620 NumRecoveredCallsites++; 621 } 622 } 623 624 void SampleProfileMatcher::countCallGraphRecoveredSamples( 625 const FunctionSamples &FS, 626 std::unordered_set<FunctionId> &CallGraphRecoveredProfiles) { 627 if (CallGraphRecoveredProfiles.count(FS.getFunction())) { 628 NumCallGraphRecoveredFuncSamples += FS.getTotalSamples(); 629 return; 630 } 631 632 for (const auto &CM : FS.getCallsiteSamples()) { 633 for (const auto &CS : CM.second) { 634 countCallGraphRecoveredSamples(CS.second, CallGraphRecoveredProfiles); 635 } 636 } 637 } 638 639 void SampleProfileMatcher::computeAndReportProfileStaleness() { 640 if (!ReportProfileStaleness && !PersistProfileStaleness) 641 return; 642 643 std::unordered_set<FunctionId> CallGraphRecoveredProfiles; 644 if (SalvageUnusedProfile) { 645 for (const auto &I : FuncToProfileNameMap) { 646 CallGraphRecoveredProfiles.insert(I.second); 647 if (GlobalValue::isAvailableExternallyLinkage(I.first->getLinkage())) 648 continue; 649 NumCallGraphRecoveredProfiledFunc++; 650 } 651 } 652 653 // Count profile mismatches for profile staleness report. 654 for (const auto &F : M) { 655 if (skipProfileForFunction(F)) 656 continue; 657 // As the stats will be merged by linker, skip reporting the metrics for 658 // imported functions to avoid repeated counting. 659 if (GlobalValue::isAvailableExternallyLinkage(F.getLinkage())) 660 continue; 661 const auto *FS = Reader.getSamplesFor(F); 662 if (!FS) 663 continue; 664 TotalProfiledFunc++; 665 TotalFunctionSamples += FS->getTotalSamples(); 666 667 if (SalvageUnusedProfile && !CallGraphRecoveredProfiles.empty()) 668 countCallGraphRecoveredSamples(*FS, CallGraphRecoveredProfiles); 669 670 // Checksum mismatch is only used in pseudo-probe mode. 671 if (FunctionSamples::ProfileIsProbeBased) 672 countMismatchedFuncSamples(*FS, true); 673 674 // Count mismatches and samples for calliste. 675 countMismatchCallsites(*FS); 676 countMismatchedCallsiteSamples(*FS); 677 } 678 679 if (ReportProfileStaleness) { 680 if (FunctionSamples::ProfileIsProbeBased) { 681 errs() << "(" << NumStaleProfileFunc << "/" << TotalProfiledFunc 682 << ") of functions' profile are invalid and (" 683 << MismatchedFunctionSamples << "/" << TotalFunctionSamples 684 << ") of samples are discarded due to function hash mismatch.\n"; 685 } 686 if (SalvageUnusedProfile) { 687 errs() << "(" << NumCallGraphRecoveredProfiledFunc << "/" 688 << TotalProfiledFunc << ") of functions' profile are matched and (" 689 << NumCallGraphRecoveredFuncSamples << "/" << TotalFunctionSamples 690 << ") of samples are reused by call graph matching.\n"; 691 } 692 693 errs() << "(" << (NumMismatchedCallsites + NumRecoveredCallsites) << "/" 694 << TotalProfiledCallsites 695 << ") of callsites' profile are invalid and (" 696 << (MismatchedCallsiteSamples + RecoveredCallsiteSamples) << "/" 697 << TotalFunctionSamples 698 << ") of samples are discarded due to callsite location mismatch.\n"; 699 errs() << "(" << NumRecoveredCallsites << "/" 700 << (NumRecoveredCallsites + NumMismatchedCallsites) 701 << ") of callsites and (" << RecoveredCallsiteSamples << "/" 702 << (RecoveredCallsiteSamples + MismatchedCallsiteSamples) 703 << ") of samples are recovered by stale profile matching.\n"; 704 } 705 706 if (PersistProfileStaleness) { 707 LLVMContext &Ctx = M.getContext(); 708 MDBuilder MDB(Ctx); 709 710 SmallVector<std::pair<StringRef, uint64_t>> ProfStatsVec; 711 if (FunctionSamples::ProfileIsProbeBased) { 712 ProfStatsVec.emplace_back("NumStaleProfileFunc", NumStaleProfileFunc); 713 ProfStatsVec.emplace_back("TotalProfiledFunc", TotalProfiledFunc); 714 ProfStatsVec.emplace_back("MismatchedFunctionSamples", 715 MismatchedFunctionSamples); 716 ProfStatsVec.emplace_back("TotalFunctionSamples", TotalFunctionSamples); 717 } 718 719 if (SalvageUnusedProfile) { 720 ProfStatsVec.emplace_back("NumCallGraphRecoveredProfiledFunc", 721 NumCallGraphRecoveredProfiledFunc); 722 ProfStatsVec.emplace_back("NumCallGraphRecoveredFuncSamples", 723 NumCallGraphRecoveredFuncSamples); 724 } 725 726 ProfStatsVec.emplace_back("NumMismatchedCallsites", NumMismatchedCallsites); 727 ProfStatsVec.emplace_back("NumRecoveredCallsites", NumRecoveredCallsites); 728 ProfStatsVec.emplace_back("TotalProfiledCallsites", TotalProfiledCallsites); 729 ProfStatsVec.emplace_back("MismatchedCallsiteSamples", 730 MismatchedCallsiteSamples); 731 ProfStatsVec.emplace_back("RecoveredCallsiteSamples", 732 RecoveredCallsiteSamples); 733 734 auto *MD = MDB.createLLVMStats(ProfStatsVec); 735 auto *NMD = M.getOrInsertNamedMetadata("llvm.stats"); 736 NMD->addOperand(MD); 737 } 738 } 739 740 void SampleProfileMatcher::findFunctionsWithoutProfile() { 741 // TODO: Support MD5 profile. 742 if (FunctionSamples::UseMD5) 743 return; 744 StringSet<> NamesInProfile; 745 if (auto NameTable = Reader.getNameTable()) { 746 for (auto Name : *NameTable) 747 NamesInProfile.insert(Name.stringRef()); 748 } 749 750 for (auto &F : M) { 751 // Skip declarations, as even if the function can be matched, we have 752 // nothing to do with it. 753 if (F.isDeclaration()) 754 continue; 755 756 StringRef CanonFName = FunctionSamples::getCanonicalFnName(F.getName()); 757 const auto *FS = getFlattenedSamplesFor(F); 758 if (FS) 759 continue; 760 761 // For extended binary, functions fully inlined may not be loaded in the 762 // top-level profile, so check the NameTable which has the all symbol names 763 // in profile. 764 if (NamesInProfile.count(CanonFName)) 765 continue; 766 767 // For extended binary, non-profiled function symbols are in the profile 768 // symbol list table. 769 if (PSL && PSL->contains(CanonFName)) 770 continue; 771 772 LLVM_DEBUG(dbgs() << "Function " << CanonFName 773 << " is not in profile or profile symbol list.\n"); 774 FunctionsWithoutProfile[FunctionId(CanonFName)] = &F; 775 } 776 } 777 778 bool SampleProfileMatcher::functionMatchesProfileHelper( 779 const Function &IRFunc, const FunctionId &ProfFunc) { 780 // The value is in the range [0, 1]. The bigger the value is, the more similar 781 // two sequences are. 782 float Similarity = 0.0; 783 784 const auto *FSFlattened = getFlattenedSamplesFor(ProfFunc); 785 if (!FSFlattened) 786 return false; 787 // The check for similarity or checksum may not be reliable if the function is 788 // tiny, we use the number of basic block as a proxy for the function 789 // complexity and skip the matching if it's too small. 790 if (IRFunc.size() < MinFuncCountForCGMatching || 791 FSFlattened->getBodySamples().size() < MinFuncCountForCGMatching) 792 return false; 793 794 // For probe-based function, we first trust the checksum info. If the checksum 795 // doesn't match, we continue checking for similarity. 796 if (FunctionSamples::ProfileIsProbeBased) { 797 const auto *FuncDesc = ProbeManager->getDesc(IRFunc); 798 if (FuncDesc && 799 !ProbeManager->profileIsHashMismatched(*FuncDesc, *FSFlattened)) { 800 LLVM_DEBUG(dbgs() << "The checksums for " << IRFunc.getName() 801 << "(IR) and " << ProfFunc << "(Profile) match.\n"); 802 803 return true; 804 } 805 } 806 807 AnchorMap IRAnchors; 808 findIRAnchors(IRFunc, IRAnchors); 809 AnchorMap ProfileAnchors; 810 findProfileAnchors(*FSFlattened, ProfileAnchors); 811 812 AnchorList FilteredIRAnchorsList; 813 AnchorList FilteredProfileAnchorList; 814 getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList, 815 FilteredProfileAnchorList); 816 817 // Similarly skip the matching if the num of anchors is not enough. 818 if (FilteredIRAnchorsList.size() < MinCallCountForCGMatching || 819 FilteredProfileAnchorList.size() < MinCallCountForCGMatching) 820 return false; 821 822 // Use the diff algorithm to find the LCS between IR and profile. 823 824 // Don't recursively match the callee function to avoid infinite matching, 825 // callee functions will be handled later since it's processed in top-down 826 // order . 827 LocToLocMap MatchedAnchors = 828 longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList, 829 false /* Match unused functions */); 830 831 Similarity = 832 static_cast<float>(MatchedAnchors.size()) * 2 / 833 (FilteredIRAnchorsList.size() + FilteredProfileAnchorList.size()); 834 835 LLVM_DEBUG(dbgs() << "The similarity between " << IRFunc.getName() 836 << "(IR) and " << ProfFunc << "(profile) is " 837 << format("%.2f", Similarity) << "\n"); 838 assert((Similarity >= 0 && Similarity <= 1.0) && 839 "Similarity value should be in [0, 1]"); 840 return Similarity * 100 > FuncProfileSimilarityThreshold; 841 } 842 843 // If FindMatchedProfileOnly is set to true, only use the processed function 844 // results. This is used for skipping the repeated recursive matching. 845 bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc, 846 const FunctionId &ProfFunc, 847 bool FindMatchedProfileOnly) { 848 auto R = FuncProfileMatchCache.find({&IRFunc, ProfFunc}); 849 if (R != FuncProfileMatchCache.end()) 850 return R->second; 851 852 if (FindMatchedProfileOnly) 853 return false; 854 855 bool Matched = functionMatchesProfileHelper(IRFunc, ProfFunc); 856 FuncProfileMatchCache[{&IRFunc, ProfFunc}] = Matched; 857 if (Matched) { 858 FuncToProfileNameMap[&IRFunc] = ProfFunc; 859 LLVM_DEBUG(dbgs() << "Function:" << IRFunc.getName() 860 << " matches profile:" << ProfFunc << "\n"); 861 } 862 863 return Matched; 864 } 865 866 void SampleProfileMatcher::runOnModule() { 867 ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles, 868 FunctionSamples::ProfileIsCS); 869 if (SalvageUnusedProfile) 870 findFunctionsWithoutProfile(); 871 872 // Process the matching in top-down order so that the caller matching result 873 // can be used to the callee matching. 874 std::vector<Function *> TopDownFunctionList; 875 TopDownFunctionList.reserve(M.size()); 876 buildTopDownFuncOrder(CG, TopDownFunctionList); 877 for (auto *F : TopDownFunctionList) { 878 if (skipProfileForFunction(*F)) 879 continue; 880 runOnFunction(*F); 881 } 882 883 // Update the data in SampleLoader. 884 if (SalvageUnusedProfile) 885 for (auto &I : FuncToProfileNameMap) { 886 assert(I.first && "New function is null"); 887 FunctionId FuncName(I.first->getName()); 888 FuncNameToProfNameMap->emplace(FuncName, I.second); 889 // We need to remove the old entry to avoid duplicating the function 890 // processing. 891 SymbolMap->erase(FuncName); 892 SymbolMap->emplace(I.second, I.first); 893 } 894 895 if (SalvageStaleProfile) 896 distributeIRToProfileLocationMap(); 897 898 computeAndReportProfileStaleness(); 899 } 900 901 void SampleProfileMatcher::distributeIRToProfileLocationMap( 902 FunctionSamples &FS) { 903 const auto ProfileMappings = FuncMappings.find(FS.getFuncName()); 904 if (ProfileMappings != FuncMappings.end()) { 905 FS.setIRToProfileLocationMap(&(ProfileMappings->second)); 906 } 907 908 for (auto &Callees : 909 const_cast<CallsiteSampleMap &>(FS.getCallsiteSamples())) { 910 for (auto &FS : Callees.second) { 911 distributeIRToProfileLocationMap(FS.second); 912 } 913 } 914 } 915 916 // Use a central place to distribute the matching results. Outlined and inlined 917 // profile with the function name will be set to the same pointer. 918 void SampleProfileMatcher::distributeIRToProfileLocationMap() { 919 for (auto &I : Reader.getProfiles()) { 920 distributeIRToProfileLocationMap(I.second); 921 } 922 } 923