1 //===- SampleProfileMatcher.cpp - Sampling-based Stale Profile Matcher ----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the SampleProfileMatcher used for stale 10 // profile matching. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Transforms/IPO/SampleProfileMatcher.h" 15 #include "llvm/Demangle/Demangle.h" 16 #include "llvm/IR/IntrinsicInst.h" 17 #include "llvm/IR/MDBuilder.h" 18 #include "llvm/Support/CommandLine.h" 19 #include "llvm/Transforms/Utils/LongestCommonSequence.h" 20 21 using namespace llvm; 22 using namespace sampleprof; 23 24 #define DEBUG_TYPE "sample-profile-matcher" 25 26 static cl::opt<unsigned> FuncProfileSimilarityThreshold( 27 "func-profile-similarity-threshold", cl::Hidden, cl::init(80), 28 cl::desc("Consider a profile matches a function if the similarity of their " 29 "callee sequences is above the specified percentile.")); 30 31 static cl::opt<unsigned> MinFuncCountForCGMatching( 32 "min-func-count-for-cg-matching", cl::Hidden, cl::init(5), 33 cl::desc("The minimum number of basic blocks required for a function to " 34 "run stale profile call graph matching.")); 35 36 static cl::opt<unsigned> MinCallCountForCGMatching( 37 "min-call-count-for-cg-matching", cl::Hidden, cl::init(3), 38 cl::desc("The minimum number of call anchors required for a function to " 39 "run stale profile call graph matching.")); 40 41 static cl::opt<bool> LoadFuncProfileforCGMatching( 42 "load-func-profile-for-cg-matching", cl::Hidden, cl::init(true), 43 cl::desc( 44 "Load top-level profiles that the sample reader initially skipped for " 45 "the call-graph matching (only meaningful for extended binary " 46 "format)")); 47 48 extern cl::opt<bool> SalvageStaleProfile; 49 extern cl::opt<bool> SalvageUnusedProfile; 50 extern cl::opt<bool> PersistProfileStaleness; 51 extern cl::opt<bool> ReportProfileStaleness; 52 53 static cl::opt<unsigned> SalvageStaleProfileMaxCallsites( 54 "salvage-stale-profile-max-callsites", cl::Hidden, cl::init(UINT_MAX), 55 cl::desc("The maximum number of callsites in a function, above which stale " 56 "profile matching will be skipped.")); 57 58 void SampleProfileMatcher::findIRAnchors(const Function &F, 59 AnchorMap &IRAnchors) const { 60 // For inlined code, recover the original callsite and callee by finding the 61 // top-level inline frame. e.g. For frame stack "main:1 @ foo:2 @ bar:3", the 62 // top-level frame is "main:1", the callsite is "1" and the callee is "foo". 63 auto FindTopLevelInlinedCallsite = [](const DILocation *DIL) { 64 assert((DIL && DIL->getInlinedAt()) && "No inlined callsite"); 65 const DILocation *PrevDIL = nullptr; 66 do { 67 PrevDIL = DIL; 68 DIL = DIL->getInlinedAt(); 69 } while (DIL->getInlinedAt()); 70 71 LineLocation Callsite = FunctionSamples::getCallSiteIdentifier( 72 DIL, FunctionSamples::ProfileIsFS); 73 StringRef CalleeName = PrevDIL->getSubprogramLinkageName(); 74 return std::make_pair(Callsite, FunctionId(CalleeName)); 75 }; 76 77 auto GetCanonicalCalleeName = [](const CallBase *CB) { 78 StringRef CalleeName = UnknownIndirectCallee; 79 if (Function *Callee = CB->getCalledFunction()) 80 CalleeName = FunctionSamples::getCanonicalFnName(Callee->getName()); 81 return CalleeName; 82 }; 83 84 // Extract profile matching anchors in the IR. 85 for (auto &BB : F) { 86 for (auto &I : BB) { 87 DILocation *DIL = I.getDebugLoc(); 88 if (!DIL) 89 continue; 90 91 if (FunctionSamples::ProfileIsProbeBased) { 92 if (auto Probe = extractProbe(I)) { 93 // Flatten inlined IR for the matching. 94 if (DIL->getInlinedAt()) { 95 IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL)); 96 } else { 97 // Use empty StringRef for basic block probe. 98 StringRef CalleeName; 99 if (const auto *CB = dyn_cast<CallBase>(&I)) { 100 // Skip the probe inst whose callee name is "llvm.pseudoprobe". 101 if (!isa<IntrinsicInst>(&I)) 102 CalleeName = GetCanonicalCalleeName(CB); 103 } 104 LineLocation Loc = LineLocation(Probe->Id, 0); 105 IRAnchors.emplace(Loc, FunctionId(CalleeName)); 106 } 107 } 108 } else { 109 // TODO: For line-number based profile(AutoFDO), currently only support 110 // find callsite anchors. In future, we need to parse all the non-call 111 // instructions to extract the line locations for profile matching. 112 if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I)) 113 continue; 114 115 if (DIL->getInlinedAt()) { 116 IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL)); 117 } else { 118 LineLocation Callsite = FunctionSamples::getCallSiteIdentifier( 119 DIL, FunctionSamples::ProfileIsFS); 120 StringRef CalleeName = GetCanonicalCalleeName(dyn_cast<CallBase>(&I)); 121 IRAnchors.emplace(Callsite, FunctionId(CalleeName)); 122 } 123 } 124 } 125 } 126 } 127 128 void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS, 129 AnchorMap &ProfileAnchors) const { 130 auto isInvalidLineOffset = [](uint32_t LineOffset) { 131 return LineOffset & 0x8000; 132 }; 133 134 auto InsertAnchor = [](const LineLocation &Loc, const FunctionId &CalleeName, 135 AnchorMap &ProfileAnchors) { 136 auto Ret = ProfileAnchors.try_emplace(Loc, CalleeName); 137 if (!Ret.second) { 138 // For multiple callees, which indicates it's an indirect call, we use a 139 // dummy name(UnknownIndirectCallee) as the indrect callee name. 140 Ret.first->second = FunctionId(UnknownIndirectCallee); 141 } 142 }; 143 144 for (const auto &I : FS.getBodySamples()) { 145 const LineLocation &Loc = I.first; 146 if (isInvalidLineOffset(Loc.LineOffset)) 147 continue; 148 for (const auto &C : I.second.getCallTargets()) 149 InsertAnchor(Loc, C.first, ProfileAnchors); 150 } 151 152 for (const auto &I : FS.getCallsiteSamples()) { 153 const LineLocation &Loc = I.first; 154 if (isInvalidLineOffset(Loc.LineOffset)) 155 continue; 156 for (const auto &C : I.second) 157 InsertAnchor(Loc, C.first, ProfileAnchors); 158 } 159 } 160 161 bool SampleProfileMatcher::functionHasProfile(const FunctionId &IRFuncName, 162 Function *&FuncWithoutProfile) { 163 FuncWithoutProfile = nullptr; 164 auto R = FunctionsWithoutProfile.find(IRFuncName); 165 if (R != FunctionsWithoutProfile.end()) 166 FuncWithoutProfile = R->second; 167 return !FuncWithoutProfile; 168 } 169 170 bool SampleProfileMatcher::isProfileUnused(const FunctionId &ProfileFuncName) { 171 return SymbolMap->find(ProfileFuncName) == SymbolMap->end(); 172 } 173 174 bool SampleProfileMatcher::functionMatchesProfile( 175 const FunctionId &IRFuncName, const FunctionId &ProfileFuncName, 176 bool FindMatchedProfileOnly) { 177 if (IRFuncName == ProfileFuncName) 178 return true; 179 if (!SalvageUnusedProfile) 180 return false; 181 182 // If IR function doesn't have profile and the profile is unused, try 183 // matching them. 184 Function *IRFunc = nullptr; 185 if (functionHasProfile(IRFuncName, IRFunc) || 186 !isProfileUnused(ProfileFuncName)) 187 return false; 188 189 assert(FunctionId(IRFunc->getName()) != ProfileFuncName && 190 "IR function should be different from profile function to match"); 191 return functionMatchesProfile(*IRFunc, ProfileFuncName, 192 FindMatchedProfileOnly); 193 } 194 195 LocToLocMap 196 SampleProfileMatcher::longestCommonSequence(const AnchorList &AnchorList1, 197 const AnchorList &AnchorList2, 198 bool MatchUnusedFunction) { 199 LocToLocMap MatchedAnchors; 200 llvm::longestCommonSequence<LineLocation, FunctionId>( 201 AnchorList1, AnchorList2, 202 [&](const FunctionId &A, const FunctionId &B) { 203 return functionMatchesProfile( 204 A, B, 205 !MatchUnusedFunction // Find matched function only 206 ); 207 }, 208 [&](LineLocation A, LineLocation B) { 209 MatchedAnchors.try_emplace(A, B); 210 }); 211 return MatchedAnchors; 212 } 213 214 void SampleProfileMatcher::matchNonCallsiteLocs( 215 const LocToLocMap &MatchedAnchors, const AnchorMap &IRAnchors, 216 LocToLocMap &IRToProfileLocationMap) { 217 auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) { 218 // Skip the unchanged location mapping to save memory. 219 if (From != To) 220 IRToProfileLocationMap.insert({From, To}); 221 }; 222 223 // Use function's beginning location as the initial anchor. 224 int32_t LocationDelta = 0; 225 SmallVector<LineLocation> LastMatchedNonAnchors; 226 for (const auto &IR : IRAnchors) { 227 const auto &Loc = IR.first; 228 bool IsMatchedAnchor = false; 229 // Match the anchor location in lexical order. 230 auto R = MatchedAnchors.find(Loc); 231 if (R != MatchedAnchors.end()) { 232 const auto &Candidate = R->second; 233 InsertMatching(Loc, Candidate); 234 LLVM_DEBUG(dbgs() << "Callsite with callee:" << IR.second.stringRef() 235 << " is matched from " << Loc << " to " << Candidate 236 << "\n"); 237 LocationDelta = Candidate.LineOffset - Loc.LineOffset; 238 239 // Match backwards for non-anchor locations. 240 // The locations in LastMatchedNonAnchors have been matched forwards 241 // based on the previous anchor, spilt it evenly and overwrite the 242 // second half based on the current anchor. 243 for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2; 244 I < LastMatchedNonAnchors.size(); I++) { 245 const auto &L = LastMatchedNonAnchors[I]; 246 uint32_t CandidateLineOffset = L.LineOffset + LocationDelta; 247 LineLocation Candidate(CandidateLineOffset, L.Discriminator); 248 InsertMatching(L, Candidate); 249 LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L 250 << " to " << Candidate << "\n"); 251 } 252 253 IsMatchedAnchor = true; 254 LastMatchedNonAnchors.clear(); 255 } 256 257 // Match forwards for non-anchor locations. 258 if (!IsMatchedAnchor) { 259 uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta; 260 LineLocation Candidate(CandidateLineOffset, Loc.Discriminator); 261 InsertMatching(Loc, Candidate); 262 LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to " 263 << Candidate << "\n"); 264 LastMatchedNonAnchors.emplace_back(Loc); 265 } 266 } 267 } 268 269 // Filter the non-call locations from IRAnchors and ProfileAnchors and write 270 // them into a list for random access later. 271 void SampleProfileMatcher::getFilteredAnchorList( 272 const AnchorMap &IRAnchors, const AnchorMap &ProfileAnchors, 273 AnchorList &FilteredIRAnchorsList, AnchorList &FilteredProfileAnchorList) { 274 for (const auto &I : IRAnchors) { 275 if (I.second.stringRef().empty()) 276 continue; 277 FilteredIRAnchorsList.emplace_back(I); 278 } 279 280 for (const auto &I : ProfileAnchors) 281 FilteredProfileAnchorList.emplace_back(I); 282 } 283 284 // Call target name anchor based profile fuzzy matching. 285 // Input: 286 // For IR locations, the anchor is the callee name of direct callsite; For 287 // profile locations, it's the call target name for BodySamples or inlinee's 288 // profile name for CallsiteSamples. 289 // Matching heuristic: 290 // First match all the anchors using the diff algorithm, then split the 291 // non-anchor locations between the two anchors evenly, first half are matched 292 // based on the start anchor, second half are matched based on the end anchor. 293 // For example, given: 294 // IR locations: [1, 2(foo), 3, 5, 6(bar), 7] 295 // Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9] 296 // The matching gives: 297 // [1, 2(foo), 3, 5, 6(bar), 7] 298 // | | | | | | 299 // [1, 2, 3(foo), 4, 7, 8(bar), 9] 300 // The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9]. 301 void SampleProfileMatcher::runStaleProfileMatching( 302 const Function &F, const AnchorMap &IRAnchors, 303 const AnchorMap &ProfileAnchors, LocToLocMap &IRToProfileLocationMap, 304 bool RunCFGMatching, bool RunCGMatching) { 305 if (!RunCFGMatching && !RunCGMatching) 306 return; 307 LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName() 308 << "\n"); 309 assert(IRToProfileLocationMap.empty() && 310 "Run stale profile matching only once per function"); 311 312 AnchorList FilteredProfileAnchorList; 313 AnchorList FilteredIRAnchorsList; 314 getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList, 315 FilteredProfileAnchorList); 316 317 if (FilteredIRAnchorsList.empty() || FilteredProfileAnchorList.empty()) 318 return; 319 320 if (FilteredIRAnchorsList.size() > SalvageStaleProfileMaxCallsites || 321 FilteredProfileAnchorList.size() > SalvageStaleProfileMaxCallsites) { 322 LLVM_DEBUG(dbgs() << "Skip stale profile matching for " << F.getName() 323 << " because the number of callsites in the IR is " 324 << FilteredIRAnchorsList.size() 325 << " and in the profile is " 326 << FilteredProfileAnchorList.size() << "\n"); 327 return; 328 } 329 330 // Match the callsite anchors by finding the longest common subsequence 331 // between IR and profile. 332 // Define a match between two anchors as follows: 333 // 1) The function names of anchors are the same. 334 // 2) The similarity between the anchor functions is above a threshold if 335 // RunCGMatching is set. 336 // For 2), we only consider the anchor functions from IR and profile don't 337 // appear on either side to reduce the matching scope. Note that we need to 338 // use IR anchor as base(A side) to align with the order of 339 // IRToProfileLocationMap. 340 LocToLocMap MatchedAnchors = 341 longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList, 342 RunCGMatching /* Match unused functions */); 343 344 // CFG level matching: 345 // Apply the callsite matchings to infer matching for the basic 346 // block(non-callsite) locations and write the result to 347 // IRToProfileLocationMap. 348 if (RunCFGMatching) 349 matchNonCallsiteLocs(MatchedAnchors, IRAnchors, IRToProfileLocationMap); 350 } 351 352 void SampleProfileMatcher::runOnFunction(Function &F) { 353 // We need to use flattened function samples for matching. 354 // Unlike IR, which includes all callsites from the source code, the callsites 355 // in profile only show up when they are hit by samples, i,e. the profile 356 // callsites in one context may differ from those in another context. To get 357 // the maximum number of callsites, we merge the function profiles from all 358 // contexts, aka, the flattened profile to find profile anchors. 359 const auto *FSForMatching = getFlattenedSamplesFor(F); 360 if (SalvageUnusedProfile && !FSForMatching) { 361 // Apply the matching in place to find the new function's matched profile. 362 auto R = FuncToProfileNameMap.find(&F); 363 if (R != FuncToProfileNameMap.end()) { 364 FSForMatching = getFlattenedSamplesFor(R->second); 365 // Try to find the salvaged top-level profiles that are explicitly loaded 366 // for the matching, see "functionMatchesProfileHelper" for the details. 367 if (!FSForMatching && LoadFuncProfileforCGMatching) 368 FSForMatching = Reader.getSamplesFor(R->second.stringRef()); 369 } 370 } 371 if (!FSForMatching) 372 return; 373 374 // Anchors for IR. It's a map from IR location to callee name, callee name is 375 // empty for non-call instruction and use a dummy name(UnknownIndirectCallee) 376 // for unknown indrect callee name. 377 AnchorMap IRAnchors; 378 findIRAnchors(F, IRAnchors); 379 // Anchors for profile. It's a map from callsite location to a set of callee 380 // name. 381 AnchorMap ProfileAnchors; 382 findProfileAnchors(*FSForMatching, ProfileAnchors); 383 384 // Compute the callsite match states for profile staleness report. 385 if (ReportProfileStaleness || PersistProfileStaleness) 386 recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, nullptr); 387 388 if (!SalvageStaleProfile) 389 return; 390 // For probe-based profiles, run matching only when profile checksum is 391 // mismatched. 392 bool ChecksumMismatch = FunctionSamples::ProfileIsProbeBased && 393 !ProbeManager->profileIsValid(F, *FSForMatching); 394 bool RunCFGMatching = 395 !FunctionSamples::ProfileIsProbeBased || ChecksumMismatch; 396 bool RunCGMatching = SalvageUnusedProfile; 397 // For imported functions, the checksum metadata(pseudo_probe_desc) are 398 // dropped, so we leverage function attribute(profile-checksum-mismatch) to 399 // transfer the info: add the attribute during pre-link phase and check it 400 // during post-link phase(see "profileIsValid"). 401 if (ChecksumMismatch && LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) 402 F.addFnAttr("profile-checksum-mismatch"); 403 404 // The matching result will be saved to IRToProfileLocationMap, create a 405 // new map for each function. 406 auto &IRToProfileLocationMap = getIRToProfileLocationMap(F); 407 runStaleProfileMatching(F, IRAnchors, ProfileAnchors, IRToProfileLocationMap, 408 RunCFGMatching, RunCGMatching); 409 // Find and update callsite match states after matching. 410 if (RunCFGMatching && (ReportProfileStaleness || PersistProfileStaleness)) 411 recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, 412 &IRToProfileLocationMap); 413 } 414 415 void SampleProfileMatcher::recordCallsiteMatchStates( 416 const Function &F, const AnchorMap &IRAnchors, 417 const AnchorMap &ProfileAnchors, 418 const LocToLocMap *IRToProfileLocationMap) { 419 bool IsPostMatch = IRToProfileLocationMap != nullptr; 420 auto &CallsiteMatchStates = 421 FuncCallsiteMatchStates[FunctionSamples::getCanonicalFnName(F.getName())]; 422 423 auto MapIRLocToProfileLoc = [&](const LineLocation &IRLoc) { 424 // IRToProfileLocationMap is null in pre-match phrase. 425 if (!IRToProfileLocationMap) 426 return IRLoc; 427 const auto &ProfileLoc = IRToProfileLocationMap->find(IRLoc); 428 if (ProfileLoc != IRToProfileLocationMap->end()) 429 return ProfileLoc->second; 430 else 431 return IRLoc; 432 }; 433 434 for (const auto &I : IRAnchors) { 435 // After fuzzy profile matching, use the matching result to remap the 436 // current IR callsite. 437 const auto &ProfileLoc = MapIRLocToProfileLoc(I.first); 438 const auto &IRCalleeId = I.second; 439 const auto &It = ProfileAnchors.find(ProfileLoc); 440 if (It == ProfileAnchors.end()) 441 continue; 442 const auto &ProfCalleeId = It->second; 443 if (IRCalleeId == ProfCalleeId) { 444 auto It = CallsiteMatchStates.find(ProfileLoc); 445 if (It == CallsiteMatchStates.end()) 446 CallsiteMatchStates.emplace(ProfileLoc, MatchState::InitialMatch); 447 else if (IsPostMatch) { 448 if (It->second == MatchState::InitialMatch) 449 It->second = MatchState::UnchangedMatch; 450 else if (It->second == MatchState::InitialMismatch) 451 It->second = MatchState::RecoveredMismatch; 452 } 453 } 454 } 455 456 // Check if there are any callsites in the profile that does not match to any 457 // IR callsites. 458 for (const auto &I : ProfileAnchors) { 459 const auto &Loc = I.first; 460 assert(!I.second.stringRef().empty() && "Callees should not be empty"); 461 auto It = CallsiteMatchStates.find(Loc); 462 if (It == CallsiteMatchStates.end()) 463 CallsiteMatchStates.emplace(Loc, MatchState::InitialMismatch); 464 else if (IsPostMatch) { 465 // Update the state if it's not matched(UnchangedMatch or 466 // RecoveredMismatch). 467 if (It->second == MatchState::InitialMismatch) 468 It->second = MatchState::UnchangedMismatch; 469 else if (It->second == MatchState::InitialMatch) 470 It->second = MatchState::RemovedMatch; 471 } 472 } 473 } 474 475 void SampleProfileMatcher::countMismatchedFuncSamples(const FunctionSamples &FS, 476 bool IsTopLevel) { 477 const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID()); 478 // Skip the function that is external or renamed. 479 if (!FuncDesc) 480 return; 481 482 if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) { 483 if (IsTopLevel) 484 NumStaleProfileFunc++; 485 // Given currently all probe ids are after block probe ids, once the 486 // checksum is mismatched, it's likely all the callites are mismatched and 487 // dropped. We conservatively count all the samples as mismatched and stop 488 // counting the inlinees' profiles. 489 MismatchedFunctionSamples += FS.getTotalSamples(); 490 return; 491 } 492 493 // Even the current-level function checksum is matched, it's possible that the 494 // nested inlinees' checksums are mismatched that affect the inlinee's sample 495 // loading, we need to go deeper to check the inlinees' function samples. 496 // Similarly, count all the samples as mismatched if the inlinee's checksum is 497 // mismatched using this recursive function. 498 for (const auto &I : FS.getCallsiteSamples()) 499 for (const auto &CS : I.second) 500 countMismatchedFuncSamples(CS.second, false); 501 } 502 503 void SampleProfileMatcher::countMismatchedCallsiteSamples( 504 const FunctionSamples &FS) { 505 auto It = FuncCallsiteMatchStates.find(FS.getFuncName()); 506 // Skip it if no mismatched callsite or this is an external function. 507 if (It == FuncCallsiteMatchStates.end() || It->second.empty()) 508 return; 509 const auto &CallsiteMatchStates = It->second; 510 511 auto findMatchState = [&](const LineLocation &Loc) { 512 auto It = CallsiteMatchStates.find(Loc); 513 if (It == CallsiteMatchStates.end()) 514 return MatchState::Unknown; 515 return It->second; 516 }; 517 518 auto AttributeMismatchedSamples = [&](const enum MatchState &State, 519 uint64_t Samples) { 520 if (isMismatchState(State)) 521 MismatchedCallsiteSamples += Samples; 522 else if (State == MatchState::RecoveredMismatch) 523 RecoveredCallsiteSamples += Samples; 524 }; 525 526 // The non-inlined callsites are saved in the body samples of function 527 // profile, go through it to count the non-inlined callsite samples. 528 for (const auto &I : FS.getBodySamples()) 529 AttributeMismatchedSamples(findMatchState(I.first), I.second.getSamples()); 530 531 // Count the inlined callsite samples. 532 for (const auto &I : FS.getCallsiteSamples()) { 533 auto State = findMatchState(I.first); 534 uint64_t CallsiteSamples = 0; 535 for (const auto &CS : I.second) 536 CallsiteSamples += CS.second.getTotalSamples(); 537 AttributeMismatchedSamples(State, CallsiteSamples); 538 539 if (isMismatchState(State)) 540 continue; 541 542 // When the current level of inlined call site matches the profiled call 543 // site, we need to go deeper along the inline tree to count mismatches from 544 // lower level inlinees. 545 for (const auto &CS : I.second) 546 countMismatchedCallsiteSamples(CS.second); 547 } 548 } 549 550 void SampleProfileMatcher::countMismatchCallsites(const FunctionSamples &FS) { 551 auto It = FuncCallsiteMatchStates.find(FS.getFuncName()); 552 // Skip it if no mismatched callsite or this is an external function. 553 if (It == FuncCallsiteMatchStates.end() || It->second.empty()) 554 return; 555 const auto &MatchStates = It->second; 556 [[maybe_unused]] bool OnInitialState = 557 isInitialState(MatchStates.begin()->second); 558 for (const auto &I : MatchStates) { 559 TotalProfiledCallsites++; 560 assert( 561 (OnInitialState ? isInitialState(I.second) : isFinalState(I.second)) && 562 "Profile matching state is inconsistent"); 563 564 if (isMismatchState(I.second)) 565 NumMismatchedCallsites++; 566 else if (I.second == MatchState::RecoveredMismatch) 567 NumRecoveredCallsites++; 568 } 569 } 570 571 void SampleProfileMatcher::countCallGraphRecoveredSamples( 572 const FunctionSamples &FS, 573 std::unordered_set<FunctionId> &CallGraphRecoveredProfiles) { 574 if (CallGraphRecoveredProfiles.count(FS.getFunction())) { 575 NumCallGraphRecoveredFuncSamples += FS.getTotalSamples(); 576 return; 577 } 578 579 for (const auto &CM : FS.getCallsiteSamples()) { 580 for (const auto &CS : CM.second) { 581 countCallGraphRecoveredSamples(CS.second, CallGraphRecoveredProfiles); 582 } 583 } 584 } 585 586 void SampleProfileMatcher::computeAndReportProfileStaleness() { 587 if (!ReportProfileStaleness && !PersistProfileStaleness) 588 return; 589 590 std::unordered_set<FunctionId> CallGraphRecoveredProfiles; 591 if (SalvageUnusedProfile) { 592 for (const auto &I : FuncToProfileNameMap) { 593 CallGraphRecoveredProfiles.insert(I.second); 594 if (GlobalValue::isAvailableExternallyLinkage(I.first->getLinkage())) 595 continue; 596 NumCallGraphRecoveredProfiledFunc++; 597 } 598 } 599 600 // Count profile mismatches for profile staleness report. 601 for (const auto &F : M) { 602 if (skipProfileForFunction(F)) 603 continue; 604 // As the stats will be merged by linker, skip reporting the metrics for 605 // imported functions to avoid repeated counting. 606 if (GlobalValue::isAvailableExternallyLinkage(F.getLinkage())) 607 continue; 608 const auto *FS = Reader.getSamplesFor(F); 609 if (!FS) 610 continue; 611 TotalProfiledFunc++; 612 TotalFunctionSamples += FS->getTotalSamples(); 613 614 if (SalvageUnusedProfile && !CallGraphRecoveredProfiles.empty()) 615 countCallGraphRecoveredSamples(*FS, CallGraphRecoveredProfiles); 616 617 // Checksum mismatch is only used in pseudo-probe mode. 618 if (FunctionSamples::ProfileIsProbeBased) 619 countMismatchedFuncSamples(*FS, true); 620 621 // Count mismatches and samples for calliste. 622 countMismatchCallsites(*FS); 623 countMismatchedCallsiteSamples(*FS); 624 } 625 626 if (ReportProfileStaleness) { 627 if (FunctionSamples::ProfileIsProbeBased) { 628 errs() << "(" << NumStaleProfileFunc << "/" << TotalProfiledFunc 629 << ") of functions' profile are invalid and (" 630 << MismatchedFunctionSamples << "/" << TotalFunctionSamples 631 << ") of samples are discarded due to function hash mismatch.\n"; 632 } 633 if (SalvageUnusedProfile) { 634 errs() << "(" << NumCallGraphRecoveredProfiledFunc << "/" 635 << TotalProfiledFunc << ") of functions' profile are matched and (" 636 << NumCallGraphRecoveredFuncSamples << "/" << TotalFunctionSamples 637 << ") of samples are reused by call graph matching.\n"; 638 } 639 640 errs() << "(" << (NumMismatchedCallsites + NumRecoveredCallsites) << "/" 641 << TotalProfiledCallsites 642 << ") of callsites' profile are invalid and (" 643 << (MismatchedCallsiteSamples + RecoveredCallsiteSamples) << "/" 644 << TotalFunctionSamples 645 << ") of samples are discarded due to callsite location mismatch.\n"; 646 errs() << "(" << NumRecoveredCallsites << "/" 647 << (NumRecoveredCallsites + NumMismatchedCallsites) 648 << ") of callsites and (" << RecoveredCallsiteSamples << "/" 649 << (RecoveredCallsiteSamples + MismatchedCallsiteSamples) 650 << ") of samples are recovered by stale profile matching.\n"; 651 } 652 653 if (PersistProfileStaleness) { 654 LLVMContext &Ctx = M.getContext(); 655 MDBuilder MDB(Ctx); 656 657 SmallVector<std::pair<StringRef, uint64_t>> ProfStatsVec; 658 if (FunctionSamples::ProfileIsProbeBased) { 659 ProfStatsVec.emplace_back("NumStaleProfileFunc", NumStaleProfileFunc); 660 ProfStatsVec.emplace_back("TotalProfiledFunc", TotalProfiledFunc); 661 ProfStatsVec.emplace_back("MismatchedFunctionSamples", 662 MismatchedFunctionSamples); 663 ProfStatsVec.emplace_back("TotalFunctionSamples", TotalFunctionSamples); 664 } 665 666 if (SalvageUnusedProfile) { 667 ProfStatsVec.emplace_back("NumCallGraphRecoveredProfiledFunc", 668 NumCallGraphRecoveredProfiledFunc); 669 ProfStatsVec.emplace_back("NumCallGraphRecoveredFuncSamples", 670 NumCallGraphRecoveredFuncSamples); 671 } 672 673 ProfStatsVec.emplace_back("NumMismatchedCallsites", NumMismatchedCallsites); 674 ProfStatsVec.emplace_back("NumRecoveredCallsites", NumRecoveredCallsites); 675 ProfStatsVec.emplace_back("TotalProfiledCallsites", TotalProfiledCallsites); 676 ProfStatsVec.emplace_back("MismatchedCallsiteSamples", 677 MismatchedCallsiteSamples); 678 ProfStatsVec.emplace_back("RecoveredCallsiteSamples", 679 RecoveredCallsiteSamples); 680 681 auto *MD = MDB.createLLVMStats(ProfStatsVec); 682 auto *NMD = M.getOrInsertNamedMetadata("llvm.stats"); 683 NMD->addOperand(MD); 684 } 685 } 686 687 void SampleProfileMatcher::findFunctionsWithoutProfile() { 688 // TODO: Support MD5 profile. 689 if (FunctionSamples::UseMD5) 690 return; 691 StringSet<> NamesInProfile; 692 if (auto NameTable = Reader.getNameTable()) { 693 for (auto Name : *NameTable) 694 NamesInProfile.insert(Name.stringRef()); 695 } 696 697 for (auto &F : M) { 698 // Skip declarations, as even if the function can be matched, we have 699 // nothing to do with it. 700 if (F.isDeclaration()) 701 continue; 702 703 StringRef CanonFName = FunctionSamples::getCanonicalFnName(F.getName()); 704 const auto *FS = getFlattenedSamplesFor(F); 705 if (FS) 706 continue; 707 708 // For extended binary, functions fully inlined may not be loaded in the 709 // top-level profile, so check the NameTable which has the all symbol names 710 // in profile. 711 if (NamesInProfile.count(CanonFName)) 712 continue; 713 714 // For extended binary, non-profiled function symbols are in the profile 715 // symbol list table. 716 if (PSL && PSL->contains(CanonFName)) 717 continue; 718 719 LLVM_DEBUG(dbgs() << "Function " << CanonFName 720 << " is not in profile or profile symbol list.\n"); 721 FunctionsWithoutProfile[FunctionId(CanonFName)] = &F; 722 } 723 } 724 725 bool SampleProfileMatcher::functionMatchesProfileHelper( 726 const Function &IRFunc, const FunctionId &ProfFunc) { 727 // The value is in the range [0, 1]. The bigger the value is, the more similar 728 // two sequences are. 729 float Similarity = 0.0; 730 731 // Match the functions if they have the same base name(after demangling) and 732 // skip the similarity check. 733 ItaniumPartialDemangler Demangler; 734 // Helper lambda to demangle and get the base name. If the demangling failed, 735 // return an empty string. 736 auto GetBaseName = [&](StringRef FName) { 737 auto FunctionName = FName.str(); 738 if (Demangler.partialDemangle(FunctionName.c_str())) 739 return std::string(); 740 size_t BaseNameSize = 0; 741 // The demangler API follows the __cxa_demangle one, and thus needs a 742 // pointer that originates from malloc (or nullptr) and the caller is 743 // responsible for free()-ing the buffer. 744 char *BaseNamePtr = Demangler.getFunctionBaseName(nullptr, &BaseNameSize); 745 std::string Result = (BaseNamePtr && BaseNameSize) 746 ? std::string(BaseNamePtr, BaseNameSize) 747 : std::string(); 748 free(BaseNamePtr); 749 return Result; 750 }; 751 auto IRBaseName = GetBaseName(IRFunc.getName()); 752 auto ProfBaseName = GetBaseName(ProfFunc.stringRef()); 753 if (!IRBaseName.empty() && IRBaseName == ProfBaseName) { 754 LLVM_DEBUG(dbgs() << "The functions " << IRFunc.getName() << "(IR) and " 755 << ProfFunc << "(Profile) share the same base name: " 756 << IRBaseName << ".\n"); 757 return true; 758 } 759 760 const auto *FSForMatching = getFlattenedSamplesFor(ProfFunc); 761 // With extbinary profile format, initial profile loading only reads profile 762 // based on current function names in the module. 763 // However, if a function is renamed, sample loader skips to load its original 764 // profile(which has a different name), we will miss this case. To address 765 // this, we load the top-level profile candidate explicitly for the matching. 766 if (!FSForMatching && LoadFuncProfileforCGMatching) { 767 DenseSet<StringRef> TopLevelFunc({ProfFunc.stringRef()}); 768 if (std::error_code EC = Reader.read(TopLevelFunc)) 769 return false; 770 FSForMatching = Reader.getSamplesFor(ProfFunc.stringRef()); 771 LLVM_DEBUG({ 772 if (FSForMatching) 773 dbgs() << "Read top-level function " << ProfFunc 774 << " for call-graph matching\n"; 775 }); 776 } 777 if (!FSForMatching) 778 return false; 779 // The check for similarity or checksum may not be reliable if the function is 780 // tiny, we use the number of basic block as a proxy for the function 781 // complexity and skip the matching if it's too small. 782 if (IRFunc.size() < MinFuncCountForCGMatching || 783 FSForMatching->getBodySamples().size() < MinFuncCountForCGMatching) 784 return false; 785 786 // For probe-based function, we first trust the checksum info. If the checksum 787 // doesn't match, we continue checking for similarity. 788 if (FunctionSamples::ProfileIsProbeBased) { 789 const auto *FuncDesc = ProbeManager->getDesc(IRFunc); 790 if (FuncDesc && 791 !ProbeManager->profileIsHashMismatched(*FuncDesc, *FSForMatching)) { 792 LLVM_DEBUG(dbgs() << "The checksums for " << IRFunc.getName() 793 << "(IR) and " << ProfFunc << "(Profile) match.\n"); 794 795 return true; 796 } 797 } 798 799 AnchorMap IRAnchors; 800 findIRAnchors(IRFunc, IRAnchors); 801 AnchorMap ProfileAnchors; 802 findProfileAnchors(*FSForMatching, ProfileAnchors); 803 804 AnchorList FilteredIRAnchorsList; 805 AnchorList FilteredProfileAnchorList; 806 getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList, 807 FilteredProfileAnchorList); 808 809 // Similarly skip the matching if the num of anchors is not enough. 810 if (FilteredIRAnchorsList.size() < MinCallCountForCGMatching || 811 FilteredProfileAnchorList.size() < MinCallCountForCGMatching) 812 return false; 813 814 // Use the diff algorithm to find the LCS between IR and profile. 815 816 // Don't recursively match the callee function to avoid infinite matching, 817 // callee functions will be handled later since it's processed in top-down 818 // order . 819 LocToLocMap MatchedAnchors = 820 longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList, 821 false /* Match unused functions */); 822 823 Similarity = static_cast<float>(MatchedAnchors.size()) / 824 FilteredProfileAnchorList.size(); 825 826 LLVM_DEBUG(dbgs() << "The similarity between " << IRFunc.getName() 827 << "(IR) and " << ProfFunc << "(profile) is " 828 << format("%.2f", Similarity) << "\n"); 829 assert((Similarity >= 0 && Similarity <= 1.0) && 830 "Similarity value should be in [0, 1]"); 831 return Similarity * 100 > FuncProfileSimilarityThreshold; 832 } 833 834 // If FindMatchedProfileOnly is set to true, only use the processed function 835 // results. This is used for skipping the repeated recursive matching. 836 bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc, 837 const FunctionId &ProfFunc, 838 bool FindMatchedProfileOnly) { 839 auto R = FuncProfileMatchCache.find({&IRFunc, ProfFunc}); 840 if (R != FuncProfileMatchCache.end()) 841 return R->second; 842 843 if (FindMatchedProfileOnly) 844 return false; 845 846 bool Matched = functionMatchesProfileHelper(IRFunc, ProfFunc); 847 FuncProfileMatchCache[{&IRFunc, ProfFunc}] = Matched; 848 if (Matched) { 849 FuncToProfileNameMap[&IRFunc] = ProfFunc; 850 LLVM_DEBUG(dbgs() << "Function:" << IRFunc.getName() 851 << " matches profile:" << ProfFunc << "\n"); 852 } 853 854 return Matched; 855 } 856 857 void SampleProfileMatcher::UpdateWithSalvagedProfiles() { 858 DenseSet<StringRef> ProfileSalvagedFuncs; 859 // Update FuncNameToProfNameMap and SymbolMap. 860 for (auto &I : FuncToProfileNameMap) { 861 assert(I.first && "New function is null"); 862 FunctionId FuncName(I.first->getName()); 863 ProfileSalvagedFuncs.insert(I.second.stringRef()); 864 FuncNameToProfNameMap->emplace(FuncName, I.second); 865 866 // We need to remove the old entry to avoid duplicating the function 867 // processing. 868 SymbolMap->erase(FuncName); 869 SymbolMap->emplace(I.second, I.first); 870 } 871 872 // With extbinary profile format, initial profile loading only reads profile 873 // based on current function names in the module, so we need to load top-level 874 // profiles for functions with different profile name explicitly after 875 // function-profile name map is established with stale profile matching. 876 Reader.read(ProfileSalvagedFuncs); 877 Reader.setFuncNameToProfNameMap(*FuncNameToProfNameMap); 878 } 879 880 void SampleProfileMatcher::runOnModule() { 881 ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles, 882 FunctionSamples::ProfileIsCS); 883 if (SalvageUnusedProfile) 884 findFunctionsWithoutProfile(); 885 886 // Process the matching in top-down order so that the caller matching result 887 // can be used to the callee matching. 888 std::vector<Function *> TopDownFunctionList; 889 TopDownFunctionList.reserve(M.size()); 890 buildTopDownFuncOrder(CG, TopDownFunctionList); 891 for (auto *F : TopDownFunctionList) { 892 if (skipProfileForFunction(*F)) 893 continue; 894 runOnFunction(*F); 895 } 896 897 if (SalvageUnusedProfile) 898 UpdateWithSalvagedProfiles(); 899 900 if (SalvageStaleProfile) 901 distributeIRToProfileLocationMap(); 902 903 computeAndReportProfileStaleness(); 904 } 905 906 void SampleProfileMatcher::distributeIRToProfileLocationMap( 907 FunctionSamples &FS) { 908 const auto ProfileMappings = FuncMappings.find(FS.getFuncName()); 909 if (ProfileMappings != FuncMappings.end()) { 910 FS.setIRToProfileLocationMap(&(ProfileMappings->second)); 911 } 912 913 for (auto &Callees : 914 const_cast<CallsiteSampleMap &>(FS.getCallsiteSamples())) { 915 for (auto &FS : Callees.second) { 916 distributeIRToProfileLocationMap(FS.second); 917 } 918 } 919 } 920 921 // Use a central place to distribute the matching results. Outlined and inlined 922 // profile with the function name will be set to the same pointer. 923 void SampleProfileMatcher::distributeIRToProfileLocationMap() { 924 for (auto &I : Reader.getProfiles()) { 925 distributeIRToProfileLocationMap(I.second); 926 } 927 } 928