1 //===- llvm-profdata.cpp - LLVM profile data tool -------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // llvm-profdata merges .profdata files. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/ADT/SmallSet.h" 14 #include "llvm/ADT/SmallVector.h" 15 #include "llvm/ADT/StringRef.h" 16 #include "llvm/IR/LLVMContext.h" 17 #include "llvm/Object/Binary.h" 18 #include "llvm/ProfileData/InstrProfCorrelator.h" 19 #include "llvm/ProfileData/InstrProfReader.h" 20 #include "llvm/ProfileData/InstrProfWriter.h" 21 #include "llvm/ProfileData/MemProf.h" 22 #include "llvm/ProfileData/ProfileCommon.h" 23 #include "llvm/ProfileData/RawMemProfReader.h" 24 #include "llvm/ProfileData/SampleProfReader.h" 25 #include "llvm/ProfileData/SampleProfWriter.h" 26 #include "llvm/Support/BalancedPartitioning.h" 27 #include "llvm/Support/CommandLine.h" 28 #include "llvm/Support/Discriminator.h" 29 #include "llvm/Support/Errc.h" 30 #include "llvm/Support/FileSystem.h" 31 #include "llvm/Support/Format.h" 32 #include "llvm/Support/FormattedStream.h" 33 #include "llvm/Support/InitLLVM.h" 34 #include "llvm/Support/LLVMDriver.h" 35 #include "llvm/Support/MD5.h" 36 #include "llvm/Support/MemoryBuffer.h" 37 #include "llvm/Support/Path.h" 38 #include "llvm/Support/ThreadPool.h" 39 #include "llvm/Support/Threading.h" 40 #include "llvm/Support/VirtualFileSystem.h" 41 #include "llvm/Support/WithColor.h" 42 #include "llvm/Support/raw_ostream.h" 43 #include <algorithm> 44 #include <cmath> 45 #include <optional> 46 #include <queue> 47 48 using namespace llvm; 49 50 // We use this string to indicate that there are 51 // multiple static functions map to the same name. 52 const std::string DuplicateNameStr = "----"; 53 54 enum ProfileFormat { 55 PF_None = 0, 56 PF_Text, 57 PF_Compact_Binary, // Deprecated 58 PF_Ext_Binary, 59 PF_GCC, 60 PF_Binary 61 }; 62 63 enum class ShowFormat { Text, Json, Yaml }; 64 65 static void warn(Twine Message, std::string Whence = "", 66 std::string Hint = "") { 67 WithColor::warning(); 68 if (!Whence.empty()) 69 errs() << Whence << ": "; 70 errs() << Message << "\n"; 71 if (!Hint.empty()) 72 WithColor::note() << Hint << "\n"; 73 } 74 75 static void warn(Error E, StringRef Whence = "") { 76 if (E.isA<InstrProfError>()) { 77 handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { 78 warn(IPE.message(), std::string(Whence), std::string("")); 79 }); 80 } 81 } 82 83 static void exitWithError(Twine Message, std::string Whence = "", 84 std::string Hint = "") { 85 WithColor::error(); 86 if (!Whence.empty()) 87 errs() << Whence << ": "; 88 errs() << Message << "\n"; 89 if (!Hint.empty()) 90 WithColor::note() << Hint << "\n"; 91 ::exit(1); 92 } 93 94 static void exitWithError(Error E, StringRef Whence = "") { 95 if (E.isA<InstrProfError>()) { 96 handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { 97 instrprof_error instrError = IPE.get(); 98 StringRef Hint = ""; 99 if (instrError == instrprof_error::unrecognized_format) { 100 // Hint in case user missed specifying the profile type. 101 Hint = "Perhaps you forgot to use the --sample or --memory option?"; 102 } 103 exitWithError(IPE.message(), std::string(Whence), std::string(Hint)); 104 }); 105 return; 106 } 107 108 exitWithError(toString(std::move(E)), std::string(Whence)); 109 } 110 111 static void exitWithErrorCode(std::error_code EC, StringRef Whence = "") { 112 exitWithError(EC.message(), std::string(Whence)); 113 } 114 115 namespace { 116 enum ProfileKinds { instr, sample, memory }; 117 enum FailureMode { failIfAnyAreInvalid, failIfAllAreInvalid }; 118 } 119 120 static void warnOrExitGivenError(FailureMode FailMode, std::error_code EC, 121 StringRef Whence = "") { 122 if (FailMode == failIfAnyAreInvalid) 123 exitWithErrorCode(EC, Whence); 124 else 125 warn(EC.message(), std::string(Whence)); 126 } 127 128 static void handleMergeWriterError(Error E, StringRef WhenceFile = "", 129 StringRef WhenceFunction = "", 130 bool ShowHint = true) { 131 if (!WhenceFile.empty()) 132 errs() << WhenceFile << ": "; 133 if (!WhenceFunction.empty()) 134 errs() << WhenceFunction << ": "; 135 136 auto IPE = instrprof_error::success; 137 E = handleErrors(std::move(E), 138 [&IPE](std::unique_ptr<InstrProfError> E) -> Error { 139 IPE = E->get(); 140 return Error(std::move(E)); 141 }); 142 errs() << toString(std::move(E)) << "\n"; 143 144 if (ShowHint) { 145 StringRef Hint = ""; 146 if (IPE != instrprof_error::success) { 147 switch (IPE) { 148 case instrprof_error::hash_mismatch: 149 case instrprof_error::count_mismatch: 150 case instrprof_error::value_site_count_mismatch: 151 Hint = "Make sure that all profile data to be merged is generated " 152 "from the same binary."; 153 break; 154 default: 155 break; 156 } 157 } 158 159 if (!Hint.empty()) 160 errs() << Hint << "\n"; 161 } 162 } 163 164 namespace { 165 /// A remapper from original symbol names to new symbol names based on a file 166 /// containing a list of mappings from old name to new name. 167 class SymbolRemapper { 168 std::unique_ptr<MemoryBuffer> File; 169 DenseMap<StringRef, StringRef> RemappingTable; 170 171 public: 172 /// Build a SymbolRemapper from a file containing a list of old/new symbols. 173 static std::unique_ptr<SymbolRemapper> create(StringRef InputFile) { 174 auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFile); 175 if (!BufOrError) 176 exitWithErrorCode(BufOrError.getError(), InputFile); 177 178 auto Remapper = std::make_unique<SymbolRemapper>(); 179 Remapper->File = std::move(BufOrError.get()); 180 181 for (line_iterator LineIt(*Remapper->File, /*SkipBlanks=*/true, '#'); 182 !LineIt.is_at_eof(); ++LineIt) { 183 std::pair<StringRef, StringRef> Parts = LineIt->split(' '); 184 if (Parts.first.empty() || Parts.second.empty() || 185 Parts.second.count(' ')) { 186 exitWithError("unexpected line in remapping file", 187 (InputFile + ":" + Twine(LineIt.line_number())).str(), 188 "expected 'old_symbol new_symbol'"); 189 } 190 Remapper->RemappingTable.insert(Parts); 191 } 192 return Remapper; 193 } 194 195 /// Attempt to map the given old symbol into a new symbol. 196 /// 197 /// \return The new symbol, or \p Name if no such symbol was found. 198 StringRef operator()(StringRef Name) { 199 StringRef New = RemappingTable.lookup(Name); 200 return New.empty() ? Name : New; 201 } 202 }; 203 } 204 205 struct WeightedFile { 206 std::string Filename; 207 uint64_t Weight; 208 }; 209 typedef SmallVector<WeightedFile, 5> WeightedFileVector; 210 211 /// Keep track of merged data and reported errors. 212 struct WriterContext { 213 std::mutex Lock; 214 InstrProfWriter Writer; 215 std::vector<std::pair<Error, std::string>> Errors; 216 std::mutex &ErrLock; 217 SmallSet<instrprof_error, 4> &WriterErrorCodes; 218 219 WriterContext(bool IsSparse, std::mutex &ErrLock, 220 SmallSet<instrprof_error, 4> &WriterErrorCodes, 221 uint64_t ReservoirSize = 0, uint64_t MaxTraceLength = 0) 222 : Writer(IsSparse, ReservoirSize, MaxTraceLength), ErrLock(ErrLock), 223 WriterErrorCodes(WriterErrorCodes) {} 224 }; 225 226 /// Computer the overlap b/w profile BaseFilename and TestFileName, 227 /// and store the program level result to Overlap. 228 static void overlapInput(const std::string &BaseFilename, 229 const std::string &TestFilename, WriterContext *WC, 230 OverlapStats &Overlap, 231 const OverlapFuncFilters &FuncFilter, 232 raw_fd_ostream &OS, bool IsCS) { 233 auto FS = vfs::getRealFileSystem(); 234 auto ReaderOrErr = InstrProfReader::create(TestFilename, *FS); 235 if (Error E = ReaderOrErr.takeError()) { 236 // Skip the empty profiles by returning sliently. 237 auto [ErrorCode, Msg] = InstrProfError::take(std::move(E)); 238 if (ErrorCode != instrprof_error::empty_raw_profile) 239 WC->Errors.emplace_back(make_error<InstrProfError>(ErrorCode, Msg), 240 TestFilename); 241 return; 242 } 243 244 auto Reader = std::move(ReaderOrErr.get()); 245 for (auto &I : *Reader) { 246 OverlapStats FuncOverlap(OverlapStats::FunctionLevel); 247 FuncOverlap.setFuncInfo(I.Name, I.Hash); 248 249 WC->Writer.overlapRecord(std::move(I), Overlap, FuncOverlap, FuncFilter); 250 FuncOverlap.dump(OS); 251 } 252 } 253 254 /// Load an input into a writer context. 255 static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper, 256 const InstrProfCorrelator *Correlator, 257 const StringRef ProfiledBinary, WriterContext *WC) { 258 std::unique_lock<std::mutex> CtxGuard{WC->Lock}; 259 260 // Copy the filename, because llvm::ThreadPool copied the input "const 261 // WeightedFile &" by value, making a reference to the filename within it 262 // invalid outside of this packaged task. 263 std::string Filename = Input.Filename; 264 265 using ::llvm::memprof::RawMemProfReader; 266 if (RawMemProfReader::hasFormat(Input.Filename)) { 267 auto ReaderOrErr = RawMemProfReader::create(Input.Filename, ProfiledBinary); 268 if (!ReaderOrErr) { 269 exitWithError(ReaderOrErr.takeError(), Input.Filename); 270 } 271 std::unique_ptr<RawMemProfReader> Reader = std::move(ReaderOrErr.get()); 272 // Check if the profile types can be merged, e.g. clang frontend profiles 273 // should not be merged with memprof profiles. 274 if (Error E = WC->Writer.mergeProfileKind(Reader->getProfileKind())) { 275 consumeError(std::move(E)); 276 WC->Errors.emplace_back( 277 make_error<StringError>( 278 "Cannot merge MemProf profile with Clang generated profile.", 279 std::error_code()), 280 Filename); 281 return; 282 } 283 284 auto MemProfError = [&](Error E) { 285 auto [ErrorCode, Msg] = InstrProfError::take(std::move(E)); 286 WC->Errors.emplace_back(make_error<InstrProfError>(ErrorCode, Msg), 287 Filename); 288 }; 289 290 // Add the frame mappings into the writer context. 291 const auto &IdToFrame = Reader->getFrameMapping(); 292 for (const auto &I : IdToFrame) { 293 bool Succeeded = WC->Writer.addMemProfFrame( 294 /*Id=*/I.first, /*Frame=*/I.getSecond(), MemProfError); 295 // If we weren't able to add the frame mappings then it doesn't make sense 296 // to try to add the records from this profile. 297 if (!Succeeded) 298 return; 299 } 300 const auto &FunctionProfileData = Reader->getProfileData(); 301 // Add the memprof records into the writer context. 302 for (const auto &I : FunctionProfileData) { 303 WC->Writer.addMemProfRecord(/*Id=*/I.first, /*Record=*/I.second); 304 } 305 return; 306 } 307 308 auto FS = vfs::getRealFileSystem(); 309 auto ReaderOrErr = InstrProfReader::create(Input.Filename, *FS, Correlator); 310 if (Error E = ReaderOrErr.takeError()) { 311 // Skip the empty profiles by returning silently. 312 auto [ErrCode, Msg] = InstrProfError::take(std::move(E)); 313 if (ErrCode != instrprof_error::empty_raw_profile) 314 WC->Errors.emplace_back(make_error<InstrProfError>(ErrCode, Msg), 315 Filename); 316 return; 317 } 318 319 auto Reader = std::move(ReaderOrErr.get()); 320 if (Error E = WC->Writer.mergeProfileKind(Reader->getProfileKind())) { 321 consumeError(std::move(E)); 322 WC->Errors.emplace_back( 323 make_error<StringError>( 324 "Merge IR generated profile with Clang generated profile.", 325 std::error_code()), 326 Filename); 327 return; 328 } 329 330 for (auto &I : *Reader) { 331 if (Remapper) 332 I.Name = (*Remapper)(I.Name); 333 const StringRef FuncName = I.Name; 334 bool Reported = false; 335 WC->Writer.addRecord(std::move(I), Input.Weight, [&](Error E) { 336 if (Reported) { 337 consumeError(std::move(E)); 338 return; 339 } 340 Reported = true; 341 // Only show hint the first time an error occurs. 342 auto [ErrCode, Msg] = InstrProfError::take(std::move(E)); 343 std::unique_lock<std::mutex> ErrGuard{WC->ErrLock}; 344 bool firstTime = WC->WriterErrorCodes.insert(ErrCode).second; 345 handleMergeWriterError(make_error<InstrProfError>(ErrCode, Msg), 346 Input.Filename, FuncName, firstTime); 347 }); 348 } 349 350 if (Reader->hasTemporalProfile()) { 351 auto &Traces = Reader->getTemporalProfTraces(Input.Weight); 352 if (!Traces.empty()) 353 WC->Writer.addTemporalProfileTraces( 354 Traces, Reader->getTemporalProfTraceStreamSize()); 355 } 356 if (Reader->hasError()) { 357 if (Error E = Reader->getError()) 358 WC->Errors.emplace_back(std::move(E), Filename); 359 } 360 361 std::vector<llvm::object::BuildID> BinaryIds; 362 if (Error E = Reader->readBinaryIds(BinaryIds)) 363 WC->Errors.emplace_back(std::move(E), Filename); 364 WC->Writer.addBinaryIds(BinaryIds); 365 } 366 367 /// Merge the \p Src writer context into \p Dst. 368 static void mergeWriterContexts(WriterContext *Dst, WriterContext *Src) { 369 for (auto &ErrorPair : Src->Errors) 370 Dst->Errors.push_back(std::move(ErrorPair)); 371 Src->Errors.clear(); 372 373 if (Error E = Dst->Writer.mergeProfileKind(Src->Writer.getProfileKind())) 374 exitWithError(std::move(E)); 375 376 Dst->Writer.mergeRecordsFromWriter(std::move(Src->Writer), [&](Error E) { 377 auto [ErrorCode, Msg] = InstrProfError::take(std::move(E)); 378 std::unique_lock<std::mutex> ErrGuard{Dst->ErrLock}; 379 bool firstTime = Dst->WriterErrorCodes.insert(ErrorCode).second; 380 if (firstTime) 381 warn(toString(make_error<InstrProfError>(ErrorCode, Msg))); 382 }); 383 } 384 385 static void writeInstrProfile(StringRef OutputFilename, 386 ProfileFormat OutputFormat, 387 InstrProfWriter &Writer) { 388 std::error_code EC; 389 raw_fd_ostream Output(OutputFilename.data(), EC, 390 OutputFormat == PF_Text ? sys::fs::OF_TextWithCRLF 391 : sys::fs::OF_None); 392 if (EC) 393 exitWithErrorCode(EC, OutputFilename); 394 395 if (OutputFormat == PF_Text) { 396 if (Error E = Writer.writeText(Output)) 397 warn(std::move(E)); 398 } else { 399 if (Output.is_displayed()) 400 exitWithError("cannot write a non-text format profile to the terminal"); 401 if (Error E = Writer.write(Output)) 402 warn(std::move(E)); 403 } 404 } 405 406 static void 407 mergeInstrProfile(const WeightedFileVector &Inputs, StringRef DebugInfoFilename, 408 SymbolRemapper *Remapper, StringRef OutputFilename, 409 ProfileFormat OutputFormat, uint64_t TraceReservoirSize, 410 uint64_t MaxTraceLength, bool OutputSparse, 411 unsigned NumThreads, FailureMode FailMode, 412 const StringRef ProfiledBinary) { 413 if (OutputFormat == PF_Compact_Binary) 414 exitWithError("Compact Binary is deprecated"); 415 if (OutputFormat != PF_Binary && OutputFormat != PF_Ext_Binary && 416 OutputFormat != PF_Text) 417 exitWithError("unknown format is specified"); 418 419 std::unique_ptr<InstrProfCorrelator> Correlator; 420 if (!DebugInfoFilename.empty()) { 421 if (auto Err = 422 InstrProfCorrelator::get(DebugInfoFilename).moveInto(Correlator)) 423 exitWithError(std::move(Err), DebugInfoFilename); 424 if (auto Err = Correlator->correlateProfileData()) 425 exitWithError(std::move(Err), DebugInfoFilename); 426 } 427 428 std::mutex ErrorLock; 429 SmallSet<instrprof_error, 4> WriterErrorCodes; 430 431 // If NumThreads is not specified, auto-detect a good default. 432 if (NumThreads == 0) 433 NumThreads = std::min(hardware_concurrency().compute_thread_count(), 434 unsigned((Inputs.size() + 1) / 2)); 435 436 // Initialize the writer contexts. 437 SmallVector<std::unique_ptr<WriterContext>, 4> Contexts; 438 for (unsigned I = 0; I < NumThreads; ++I) 439 Contexts.emplace_back(std::make_unique<WriterContext>( 440 OutputSparse, ErrorLock, WriterErrorCodes, TraceReservoirSize, 441 MaxTraceLength)); 442 443 if (NumThreads == 1) { 444 for (const auto &Input : Inputs) 445 loadInput(Input, Remapper, Correlator.get(), ProfiledBinary, 446 Contexts[0].get()); 447 } else { 448 ThreadPool Pool(hardware_concurrency(NumThreads)); 449 450 // Load the inputs in parallel (N/NumThreads serial steps). 451 unsigned Ctx = 0; 452 for (const auto &Input : Inputs) { 453 Pool.async(loadInput, Input, Remapper, Correlator.get(), ProfiledBinary, 454 Contexts[Ctx].get()); 455 Ctx = (Ctx + 1) % NumThreads; 456 } 457 Pool.wait(); 458 459 // Merge the writer contexts together (~ lg(NumThreads) serial steps). 460 unsigned Mid = Contexts.size() / 2; 461 unsigned End = Contexts.size(); 462 assert(Mid > 0 && "Expected more than one context"); 463 do { 464 for (unsigned I = 0; I < Mid; ++I) 465 Pool.async(mergeWriterContexts, Contexts[I].get(), 466 Contexts[I + Mid].get()); 467 Pool.wait(); 468 if (End & 1) { 469 Pool.async(mergeWriterContexts, Contexts[0].get(), 470 Contexts[End - 1].get()); 471 Pool.wait(); 472 } 473 End = Mid; 474 Mid /= 2; 475 } while (Mid > 0); 476 } 477 478 // Handle deferred errors encountered during merging. If the number of errors 479 // is equal to the number of inputs the merge failed. 480 unsigned NumErrors = 0; 481 for (std::unique_ptr<WriterContext> &WC : Contexts) { 482 for (auto &ErrorPair : WC->Errors) { 483 ++NumErrors; 484 warn(toString(std::move(ErrorPair.first)), ErrorPair.second); 485 } 486 } 487 if (NumErrors == Inputs.size() || 488 (NumErrors > 0 && FailMode == failIfAnyAreInvalid)) 489 exitWithError("no profile can be merged"); 490 491 writeInstrProfile(OutputFilename, OutputFormat, Contexts[0]->Writer); 492 } 493 494 /// The profile entry for a function in instrumentation profile. 495 struct InstrProfileEntry { 496 uint64_t MaxCount = 0; 497 uint64_t NumEdgeCounters = 0; 498 float ZeroCounterRatio = 0.0; 499 InstrProfRecord *ProfRecord; 500 InstrProfileEntry(InstrProfRecord *Record); 501 InstrProfileEntry() = default; 502 }; 503 504 InstrProfileEntry::InstrProfileEntry(InstrProfRecord *Record) { 505 ProfRecord = Record; 506 uint64_t CntNum = Record->Counts.size(); 507 uint64_t ZeroCntNum = 0; 508 for (size_t I = 0; I < CntNum; ++I) { 509 MaxCount = std::max(MaxCount, Record->Counts[I]); 510 ZeroCntNum += !Record->Counts[I]; 511 } 512 ZeroCounterRatio = (float)ZeroCntNum / CntNum; 513 NumEdgeCounters = CntNum; 514 } 515 516 /// Either set all the counters in the instr profile entry \p IFE to 517 /// -1 / -2 /in order to drop the profile or scale up the 518 /// counters in \p IFP to be above hot / cold threshold. We use 519 /// the ratio of zero counters in the profile of a function to 520 /// decide the profile is helpful or harmful for performance, 521 /// and to choose whether to scale up or drop it. 522 static void updateInstrProfileEntry(InstrProfileEntry &IFE, bool SetToHot, 523 uint64_t HotInstrThreshold, 524 uint64_t ColdInstrThreshold, 525 float ZeroCounterThreshold) { 526 InstrProfRecord *ProfRecord = IFE.ProfRecord; 527 if (!IFE.MaxCount || IFE.ZeroCounterRatio > ZeroCounterThreshold) { 528 // If all or most of the counters of the function are zero, the 529 // profile is unaccountable and should be dropped. Reset all the 530 // counters to be -1 / -2 and PGO profile-use will drop the profile. 531 // All counters being -1 also implies that the function is hot so 532 // PGO profile-use will also set the entry count metadata to be 533 // above hot threshold. 534 // All counters being -2 implies that the function is warm so 535 // PGO profile-use will also set the entry count metadata to be 536 // above cold threshold. 537 auto Kind = 538 (SetToHot ? InstrProfRecord::PseudoHot : InstrProfRecord::PseudoWarm); 539 ProfRecord->setPseudoCount(Kind); 540 return; 541 } 542 543 // Scale up the MaxCount to be multiple times above hot / cold threshold. 544 const unsigned MultiplyFactor = 3; 545 uint64_t Threshold = (SetToHot ? HotInstrThreshold : ColdInstrThreshold); 546 uint64_t Numerator = Threshold * MultiplyFactor; 547 548 // Make sure Threshold for warm counters is below the HotInstrThreshold. 549 if (!SetToHot && Threshold >= HotInstrThreshold) { 550 Threshold = (HotInstrThreshold + ColdInstrThreshold) / 2; 551 } 552 553 uint64_t Denominator = IFE.MaxCount; 554 if (Numerator <= Denominator) 555 return; 556 ProfRecord->scale(Numerator, Denominator, [&](instrprof_error E) { 557 warn(toString(make_error<InstrProfError>(E))); 558 }); 559 } 560 561 const uint64_t ColdPercentileIdx = 15; 562 const uint64_t HotPercentileIdx = 11; 563 564 using sampleprof::FSDiscriminatorPass; 565 566 // Internal options to set FSDiscriminatorPass. Used in merge and show 567 // commands. 568 static cl::opt<FSDiscriminatorPass> FSDiscriminatorPassOption( 569 "fs-discriminator-pass", cl::init(PassLast), cl::Hidden, 570 cl::desc("Zero out the discriminator bits for the FS discrimiantor " 571 "pass beyond this value. The enum values are defined in " 572 "Support/Discriminator.h"), 573 cl::values(clEnumVal(Base, "Use base discriminators only"), 574 clEnumVal(Pass1, "Use base and pass 1 discriminators"), 575 clEnumVal(Pass2, "Use base and pass 1-2 discriminators"), 576 clEnumVal(Pass3, "Use base and pass 1-3 discriminators"), 577 clEnumVal(PassLast, "Use all discriminator bits (default)"))); 578 579 static unsigned getDiscriminatorMask() { 580 return getN1Bits(getFSPassBitEnd(FSDiscriminatorPassOption.getValue())); 581 } 582 583 /// Adjust the instr profile in \p WC based on the sample profile in 584 /// \p Reader. 585 static void 586 adjustInstrProfile(std::unique_ptr<WriterContext> &WC, 587 std::unique_ptr<sampleprof::SampleProfileReader> &Reader, 588 unsigned SupplMinSizeThreshold, float ZeroCounterThreshold, 589 unsigned InstrProfColdThreshold) { 590 // Function to its entry in instr profile. 591 StringMap<InstrProfileEntry> InstrProfileMap; 592 StringMap<StringRef> StaticFuncMap; 593 InstrProfSummaryBuilder IPBuilder(ProfileSummaryBuilder::DefaultCutoffs); 594 595 auto checkSampleProfileHasFUnique = [&Reader]() { 596 for (const auto &PD : Reader->getProfiles()) { 597 auto &FContext = PD.first; 598 if (FContext.toString().find(FunctionSamples::UniqSuffix) != 599 std::string::npos) { 600 return true; 601 } 602 } 603 return false; 604 }; 605 606 bool SampleProfileHasFUnique = checkSampleProfileHasFUnique(); 607 608 auto buildStaticFuncMap = [&StaticFuncMap, 609 SampleProfileHasFUnique](const StringRef Name) { 610 std::string Prefixes[] = {".cpp:", "cc:", ".c:", ".hpp:", ".h:"}; 611 size_t PrefixPos = StringRef::npos; 612 for (auto &Prefix : Prefixes) { 613 PrefixPos = Name.find_insensitive(Prefix); 614 if (PrefixPos == StringRef::npos) 615 continue; 616 PrefixPos += Prefix.size(); 617 break; 618 } 619 620 if (PrefixPos == StringRef::npos) { 621 return; 622 } 623 624 StringRef NewName = Name.drop_front(PrefixPos); 625 StringRef FName = Name.substr(0, PrefixPos - 1); 626 if (NewName.size() == 0) { 627 return; 628 } 629 630 // This name should have a static linkage. 631 size_t PostfixPos = NewName.find(FunctionSamples::UniqSuffix); 632 bool ProfileHasFUnique = (PostfixPos != StringRef::npos); 633 634 // If sample profile and instrumented profile do not agree on symbol 635 // uniqification. 636 if (SampleProfileHasFUnique != ProfileHasFUnique) { 637 // If instrumented profile uses -funique-internal-linakge-symbols, 638 // we need to trim the name. 639 if (ProfileHasFUnique) { 640 NewName = NewName.substr(0, PostfixPos); 641 } else { 642 // If sample profile uses -funique-internal-linakge-symbols, 643 // we build the map. 644 std::string NStr = 645 NewName.str() + getUniqueInternalLinkagePostfix(FName); 646 NewName = StringRef(NStr); 647 StaticFuncMap[NewName] = Name; 648 return; 649 } 650 } 651 652 if (!StaticFuncMap.contains(NewName)) { 653 StaticFuncMap[NewName] = Name; 654 } else { 655 StaticFuncMap[NewName] = DuplicateNameStr; 656 } 657 }; 658 659 // We need to flatten the SampleFDO profile as the InstrFDO 660 // profile does not have inlined callsite profiles. 661 // One caveat is the pre-inlined function -- their samples 662 // should be collapsed into the caller function. 663 // Here we do a DFS traversal to get the flatten profile 664 // info: the sum of entrycount and the max of maxcount. 665 // Here is the algorithm: 666 // recursive (FS, root_name) { 667 // name = FS->getName(); 668 // get samples for FS; 669 // if (InstrProf.find(name) { 670 // root_name = name; 671 // } else { 672 // if (name is in static_func map) { 673 // root_name = static_name; 674 // } 675 // } 676 // update the Map entry for root_name; 677 // for (subfs: FS) { 678 // recursive(subfs, root_name); 679 // } 680 // } 681 // 682 // Here is an example. 683 // 684 // SampleProfile: 685 // foo:12345:1000 686 // 1: 1000 687 // 2.1: 1000 688 // 15: 5000 689 // 4: bar:1000 690 // 1: 1000 691 // 2: goo:3000 692 // 1: 3000 693 // 8: bar:40000 694 // 1: 10000 695 // 2: goo:30000 696 // 1: 30000 697 // 698 // InstrProfile has two entries: 699 // foo 700 // bar.cc:bar 701 // 702 // After BuildMaxSampleMap, we should have the following in FlattenSampleMap: 703 // {"foo", {1000, 5000}} 704 // {"bar.cc:bar", {11000, 30000}} 705 // 706 // foo's has an entry count of 1000, and max body count of 5000. 707 // bar.cc:bar has an entry count of 11000 (sum two callsites of 1000 and 708 // 10000), and max count of 30000 (from the callsite in line 8). 709 // 710 // Note that goo's count will remain in bar.cc:bar() as it does not have an 711 // entry in InstrProfile. 712 DenseMap<StringRef, std::pair<uint64_t, uint64_t>> FlattenSampleMap; 713 auto BuildMaxSampleMap = [&FlattenSampleMap, &StaticFuncMap, 714 &InstrProfileMap](const FunctionSamples &FS, 715 const StringRef &RootName) { 716 auto BuildMaxSampleMapImpl = [&](const FunctionSamples &FS, 717 const StringRef &RootName, 718 auto &BuildImpl) -> void { 719 const StringRef &Name = FS.getName(); 720 const StringRef *NewRootName = &RootName; 721 uint64_t EntrySample = FS.getHeadSamplesEstimate(); 722 uint64_t MaxBodySample = FS.getMaxCountInside(/* SkipCallSite*/ true); 723 724 auto It = InstrProfileMap.find(Name); 725 if (It != InstrProfileMap.end()) { 726 NewRootName = &Name; 727 } else { 728 auto NewName = StaticFuncMap.find(Name); 729 if (NewName != StaticFuncMap.end()) { 730 It = InstrProfileMap.find(NewName->second.str()); 731 if (NewName->second != DuplicateNameStr) { 732 NewRootName = &NewName->second; 733 } 734 } else { 735 // Here the EntrySample is of an inlined function, so we should not 736 // update the EntrySample in the map. 737 EntrySample = 0; 738 } 739 } 740 EntrySample += FlattenSampleMap[*NewRootName].first; 741 MaxBodySample = 742 std::max(FlattenSampleMap[*NewRootName].second, MaxBodySample); 743 FlattenSampleMap[*NewRootName] = 744 std::make_pair(EntrySample, MaxBodySample); 745 746 for (const auto &C : FS.getCallsiteSamples()) 747 for (const auto &F : C.second) 748 BuildImpl(F.second, *NewRootName, BuildImpl); 749 }; 750 BuildMaxSampleMapImpl(FS, RootName, BuildMaxSampleMapImpl); 751 }; 752 753 for (auto &PD : WC->Writer.getProfileData()) { 754 // Populate IPBuilder. 755 for (const auto &PDV : PD.getValue()) { 756 InstrProfRecord Record = PDV.second; 757 IPBuilder.addRecord(Record); 758 } 759 760 // If a function has multiple entries in instr profile, skip it. 761 if (PD.getValue().size() != 1) 762 continue; 763 764 // Initialize InstrProfileMap. 765 InstrProfRecord *R = &PD.getValue().begin()->second; 766 StringRef FullName = PD.getKey(); 767 InstrProfileMap[FullName] = InstrProfileEntry(R); 768 buildStaticFuncMap(FullName); 769 } 770 771 for (auto &PD : Reader->getProfiles()) { 772 sampleprof::FunctionSamples &FS = PD.second; 773 BuildMaxSampleMap(FS, FS.getName()); 774 } 775 776 ProfileSummary InstrPS = *IPBuilder.getSummary(); 777 ProfileSummary SamplePS = Reader->getSummary(); 778 779 // Compute cold thresholds for instr profile and sample profile. 780 uint64_t HotSampleThreshold = 781 ProfileSummaryBuilder::getEntryForPercentile( 782 SamplePS.getDetailedSummary(), 783 ProfileSummaryBuilder::DefaultCutoffs[HotPercentileIdx]) 784 .MinCount; 785 uint64_t ColdSampleThreshold = 786 ProfileSummaryBuilder::getEntryForPercentile( 787 SamplePS.getDetailedSummary(), 788 ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx]) 789 .MinCount; 790 uint64_t HotInstrThreshold = 791 ProfileSummaryBuilder::getEntryForPercentile( 792 InstrPS.getDetailedSummary(), 793 ProfileSummaryBuilder::DefaultCutoffs[HotPercentileIdx]) 794 .MinCount; 795 uint64_t ColdInstrThreshold = 796 InstrProfColdThreshold 797 ? InstrProfColdThreshold 798 : ProfileSummaryBuilder::getEntryForPercentile( 799 InstrPS.getDetailedSummary(), 800 ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx]) 801 .MinCount; 802 803 // Find hot/warm functions in sample profile which is cold in instr profile 804 // and adjust the profiles of those functions in the instr profile. 805 for (const auto &E : FlattenSampleMap) { 806 uint64_t SampleMaxCount = std::max(E.second.first, E.second.second); 807 if (SampleMaxCount < ColdSampleThreshold) 808 continue; 809 const StringRef &Name = E.first; 810 auto It = InstrProfileMap.find(Name); 811 if (It == InstrProfileMap.end()) { 812 auto NewName = StaticFuncMap.find(Name); 813 if (NewName != StaticFuncMap.end()) { 814 It = InstrProfileMap.find(NewName->second.str()); 815 if (NewName->second == DuplicateNameStr) { 816 WithColor::warning() 817 << "Static function " << Name 818 << " has multiple promoted names, cannot adjust profile.\n"; 819 } 820 } 821 } 822 if (It == InstrProfileMap.end() || 823 It->second.MaxCount > ColdInstrThreshold || 824 It->second.NumEdgeCounters < SupplMinSizeThreshold) 825 continue; 826 bool SetToHot = SampleMaxCount >= HotSampleThreshold; 827 updateInstrProfileEntry(It->second, SetToHot, HotInstrThreshold, 828 ColdInstrThreshold, ZeroCounterThreshold); 829 } 830 } 831 832 /// The main function to supplement instr profile with sample profile. 833 /// \Inputs contains the instr profile. \p SampleFilename specifies the 834 /// sample profile. \p OutputFilename specifies the output profile name. 835 /// \p OutputFormat specifies the output profile format. \p OutputSparse 836 /// specifies whether to generate sparse profile. \p SupplMinSizeThreshold 837 /// specifies the minimal size for the functions whose profile will be 838 /// adjusted. \p ZeroCounterThreshold is the threshold to check whether 839 /// a function contains too many zero counters and whether its profile 840 /// should be dropped. \p InstrProfColdThreshold is the user specified 841 /// cold threshold which will override the cold threshold got from the 842 /// instr profile summary. 843 static void supplementInstrProfile( 844 const WeightedFileVector &Inputs, StringRef SampleFilename, 845 StringRef OutputFilename, ProfileFormat OutputFormat, bool OutputSparse, 846 unsigned SupplMinSizeThreshold, float ZeroCounterThreshold, 847 unsigned InstrProfColdThreshold) { 848 if (OutputFilename.compare("-") == 0) 849 exitWithError("cannot write indexed profdata format to stdout"); 850 if (Inputs.size() != 1) 851 exitWithError("expect one input to be an instr profile"); 852 if (Inputs[0].Weight != 1) 853 exitWithError("expect instr profile doesn't have weight"); 854 855 StringRef InstrFilename = Inputs[0].Filename; 856 857 // Read sample profile. 858 LLVMContext Context; 859 auto FS = vfs::getRealFileSystem(); 860 auto ReaderOrErr = sampleprof::SampleProfileReader::create( 861 SampleFilename.str(), Context, *FS, FSDiscriminatorPassOption); 862 if (std::error_code EC = ReaderOrErr.getError()) 863 exitWithErrorCode(EC, SampleFilename); 864 auto Reader = std::move(ReaderOrErr.get()); 865 if (std::error_code EC = Reader->read()) 866 exitWithErrorCode(EC, SampleFilename); 867 868 // Read instr profile. 869 std::mutex ErrorLock; 870 SmallSet<instrprof_error, 4> WriterErrorCodes; 871 auto WC = std::make_unique<WriterContext>(OutputSparse, ErrorLock, 872 WriterErrorCodes); 873 loadInput(Inputs[0], nullptr, nullptr, /*ProfiledBinary=*/"", WC.get()); 874 if (WC->Errors.size() > 0) 875 exitWithError(std::move(WC->Errors[0].first), InstrFilename); 876 877 adjustInstrProfile(WC, Reader, SupplMinSizeThreshold, ZeroCounterThreshold, 878 InstrProfColdThreshold); 879 writeInstrProfile(OutputFilename, OutputFormat, WC->Writer); 880 } 881 882 /// Make a copy of the given function samples with all symbol names remapped 883 /// by the provided symbol remapper. 884 static sampleprof::FunctionSamples 885 remapSamples(const sampleprof::FunctionSamples &Samples, 886 SymbolRemapper &Remapper, sampleprof_error &Error) { 887 sampleprof::FunctionSamples Result; 888 Result.setName(Remapper(Samples.getName())); 889 Result.addTotalSamples(Samples.getTotalSamples()); 890 Result.addHeadSamples(Samples.getHeadSamples()); 891 for (const auto &BodySample : Samples.getBodySamples()) { 892 uint32_t MaskedDiscriminator = 893 BodySample.first.Discriminator & getDiscriminatorMask(); 894 Result.addBodySamples(BodySample.first.LineOffset, MaskedDiscriminator, 895 BodySample.second.getSamples()); 896 for (const auto &Target : BodySample.second.getCallTargets()) { 897 Result.addCalledTargetSamples(BodySample.first.LineOffset, 898 MaskedDiscriminator, 899 Remapper(Target.first()), Target.second); 900 } 901 } 902 for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) { 903 sampleprof::FunctionSamplesMap &Target = 904 Result.functionSamplesAt(CallsiteSamples.first); 905 for (const auto &Callsite : CallsiteSamples.second) { 906 sampleprof::FunctionSamples Remapped = 907 remapSamples(Callsite.second, Remapper, Error); 908 MergeResult(Error, 909 Target[std::string(Remapped.getName())].merge(Remapped)); 910 } 911 } 912 return Result; 913 } 914 915 static sampleprof::SampleProfileFormat FormatMap[] = { 916 sampleprof::SPF_None, 917 sampleprof::SPF_Text, 918 sampleprof::SPF_None, 919 sampleprof::SPF_Ext_Binary, 920 sampleprof::SPF_GCC, 921 sampleprof::SPF_Binary}; 922 923 static std::unique_ptr<MemoryBuffer> 924 getInputFileBuf(const StringRef &InputFile) { 925 if (InputFile == "") 926 return {}; 927 928 auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFile); 929 if (!BufOrError) 930 exitWithErrorCode(BufOrError.getError(), InputFile); 931 932 return std::move(*BufOrError); 933 } 934 935 static void populateProfileSymbolList(MemoryBuffer *Buffer, 936 sampleprof::ProfileSymbolList &PSL) { 937 if (!Buffer) 938 return; 939 940 SmallVector<StringRef, 32> SymbolVec; 941 StringRef Data = Buffer->getBuffer(); 942 Data.split(SymbolVec, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false); 943 944 for (StringRef SymbolStr : SymbolVec) 945 PSL.add(SymbolStr.trim()); 946 } 947 948 static void handleExtBinaryWriter(sampleprof::SampleProfileWriter &Writer, 949 ProfileFormat OutputFormat, 950 MemoryBuffer *Buffer, 951 sampleprof::ProfileSymbolList &WriterList, 952 bool CompressAllSections, bool UseMD5, 953 bool GenPartialProfile) { 954 populateProfileSymbolList(Buffer, WriterList); 955 if (WriterList.size() > 0 && OutputFormat != PF_Ext_Binary) 956 warn("Profile Symbol list is not empty but the output format is not " 957 "ExtBinary format. The list will be lost in the output. "); 958 959 Writer.setProfileSymbolList(&WriterList); 960 961 if (CompressAllSections) { 962 if (OutputFormat != PF_Ext_Binary) 963 warn("-compress-all-section is ignored. Specify -extbinary to enable it"); 964 else 965 Writer.setToCompressAllSections(); 966 } 967 if (UseMD5) { 968 if (OutputFormat != PF_Ext_Binary) 969 warn("-use-md5 is ignored. Specify -extbinary to enable it"); 970 else 971 Writer.setUseMD5(); 972 } 973 if (GenPartialProfile) { 974 if (OutputFormat != PF_Ext_Binary) 975 warn("-gen-partial-profile is ignored. Specify -extbinary to enable it"); 976 else 977 Writer.setPartialProfile(); 978 } 979 } 980 981 static void 982 mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper, 983 StringRef OutputFilename, ProfileFormat OutputFormat, 984 StringRef ProfileSymbolListFile, bool CompressAllSections, 985 bool UseMD5, bool GenPartialProfile, 986 SampleProfileLayout ProfileLayout, 987 bool SampleMergeColdContext, bool SampleTrimColdContext, 988 bool SampleColdContextFrameDepth, FailureMode FailMode, 989 bool DropProfileSymbolList, size_t OutputSizeLimit) { 990 using namespace sampleprof; 991 SampleProfileMap ProfileMap; 992 SmallVector<std::unique_ptr<sampleprof::SampleProfileReader>, 5> Readers; 993 LLVMContext Context; 994 sampleprof::ProfileSymbolList WriterList; 995 std::optional<bool> ProfileIsProbeBased; 996 std::optional<bool> ProfileIsCS; 997 for (const auto &Input : Inputs) { 998 auto FS = vfs::getRealFileSystem(); 999 auto ReaderOrErr = SampleProfileReader::create(Input.Filename, Context, *FS, 1000 FSDiscriminatorPassOption); 1001 if (std::error_code EC = ReaderOrErr.getError()) { 1002 warnOrExitGivenError(FailMode, EC, Input.Filename); 1003 continue; 1004 } 1005 1006 // We need to keep the readers around until after all the files are 1007 // read so that we do not lose the function names stored in each 1008 // reader's memory. The function names are needed to write out the 1009 // merged profile map. 1010 Readers.push_back(std::move(ReaderOrErr.get())); 1011 const auto Reader = Readers.back().get(); 1012 if (std::error_code EC = Reader->read()) { 1013 warnOrExitGivenError(FailMode, EC, Input.Filename); 1014 Readers.pop_back(); 1015 continue; 1016 } 1017 1018 SampleProfileMap &Profiles = Reader->getProfiles(); 1019 if (ProfileIsProbeBased && 1020 ProfileIsProbeBased != FunctionSamples::ProfileIsProbeBased) 1021 exitWithError( 1022 "cannot merge probe-based profile with non-probe-based profile"); 1023 ProfileIsProbeBased = FunctionSamples::ProfileIsProbeBased; 1024 if (ProfileIsCS && ProfileIsCS != FunctionSamples::ProfileIsCS) 1025 exitWithError("cannot merge CS profile with non-CS profile"); 1026 ProfileIsCS = FunctionSamples::ProfileIsCS; 1027 for (SampleProfileMap::iterator I = Profiles.begin(), E = Profiles.end(); 1028 I != E; ++I) { 1029 sampleprof_error Result = sampleprof_error::success; 1030 FunctionSamples Remapped = 1031 Remapper ? remapSamples(I->second, *Remapper, Result) 1032 : FunctionSamples(); 1033 FunctionSamples &Samples = Remapper ? Remapped : I->second; 1034 SampleContext FContext = Samples.getContext(); 1035 MergeResult(Result, ProfileMap[FContext].merge(Samples, Input.Weight)); 1036 if (Result != sampleprof_error::success) { 1037 std::error_code EC = make_error_code(Result); 1038 handleMergeWriterError(errorCodeToError(EC), Input.Filename, 1039 FContext.toString()); 1040 } 1041 } 1042 1043 if (!DropProfileSymbolList) { 1044 std::unique_ptr<sampleprof::ProfileSymbolList> ReaderList = 1045 Reader->getProfileSymbolList(); 1046 if (ReaderList) 1047 WriterList.merge(*ReaderList); 1048 } 1049 } 1050 1051 if (ProfileIsCS && (SampleMergeColdContext || SampleTrimColdContext)) { 1052 // Use threshold calculated from profile summary unless specified. 1053 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); 1054 auto Summary = Builder.computeSummaryForProfiles(ProfileMap); 1055 uint64_t SampleProfColdThreshold = 1056 ProfileSummaryBuilder::getColdCountThreshold( 1057 (Summary->getDetailedSummary())); 1058 1059 // Trim and merge cold context profile using cold threshold above; 1060 SampleContextTrimmer(ProfileMap) 1061 .trimAndMergeColdContextProfiles( 1062 SampleProfColdThreshold, SampleTrimColdContext, 1063 SampleMergeColdContext, SampleColdContextFrameDepth, false); 1064 } 1065 1066 if (ProfileLayout == llvm::sampleprof::SPL_Flat) { 1067 ProfileConverter::flattenProfile(ProfileMap, FunctionSamples::ProfileIsCS); 1068 ProfileIsCS = FunctionSamples::ProfileIsCS = false; 1069 } else if (ProfileIsCS && ProfileLayout == llvm::sampleprof::SPL_Nest) { 1070 ProfileConverter CSConverter(ProfileMap); 1071 CSConverter.convertCSProfiles(); 1072 ProfileIsCS = FunctionSamples::ProfileIsCS = false; 1073 } 1074 1075 auto WriterOrErr = 1076 SampleProfileWriter::create(OutputFilename, FormatMap[OutputFormat]); 1077 if (std::error_code EC = WriterOrErr.getError()) 1078 exitWithErrorCode(EC, OutputFilename); 1079 1080 auto Writer = std::move(WriterOrErr.get()); 1081 // WriterList will have StringRef refering to string in Buffer. 1082 // Make sure Buffer lives as long as WriterList. 1083 auto Buffer = getInputFileBuf(ProfileSymbolListFile); 1084 handleExtBinaryWriter(*Writer, OutputFormat, Buffer.get(), WriterList, 1085 CompressAllSections, UseMD5, GenPartialProfile); 1086 1087 // If OutputSizeLimit is 0 (default), it is the same as write(). 1088 if (std::error_code EC = 1089 Writer->writeWithSizeLimit(ProfileMap, OutputSizeLimit)) 1090 exitWithErrorCode(std::move(EC)); 1091 } 1092 1093 static WeightedFile parseWeightedFile(const StringRef &WeightedFilename) { 1094 StringRef WeightStr, FileName; 1095 std::tie(WeightStr, FileName) = WeightedFilename.split(','); 1096 1097 uint64_t Weight; 1098 if (WeightStr.getAsInteger(10, Weight) || Weight < 1) 1099 exitWithError("input weight must be a positive integer"); 1100 1101 return {std::string(FileName), Weight}; 1102 } 1103 1104 static void addWeightedInput(WeightedFileVector &WNI, const WeightedFile &WF) { 1105 StringRef Filename = WF.Filename; 1106 uint64_t Weight = WF.Weight; 1107 1108 // If it's STDIN just pass it on. 1109 if (Filename == "-") { 1110 WNI.push_back({std::string(Filename), Weight}); 1111 return; 1112 } 1113 1114 llvm::sys::fs::file_status Status; 1115 llvm::sys::fs::status(Filename, Status); 1116 if (!llvm::sys::fs::exists(Status)) 1117 exitWithErrorCode(make_error_code(errc::no_such_file_or_directory), 1118 Filename); 1119 // If it's a source file, collect it. 1120 if (llvm::sys::fs::is_regular_file(Status)) { 1121 WNI.push_back({std::string(Filename), Weight}); 1122 return; 1123 } 1124 1125 if (llvm::sys::fs::is_directory(Status)) { 1126 std::error_code EC; 1127 for (llvm::sys::fs::recursive_directory_iterator F(Filename, EC), E; 1128 F != E && !EC; F.increment(EC)) { 1129 if (llvm::sys::fs::is_regular_file(F->path())) { 1130 addWeightedInput(WNI, {F->path(), Weight}); 1131 } 1132 } 1133 if (EC) 1134 exitWithErrorCode(EC, Filename); 1135 } 1136 } 1137 1138 static void parseInputFilenamesFile(MemoryBuffer *Buffer, 1139 WeightedFileVector &WFV) { 1140 if (!Buffer) 1141 return; 1142 1143 SmallVector<StringRef, 8> Entries; 1144 StringRef Data = Buffer->getBuffer(); 1145 Data.split(Entries, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false); 1146 for (const StringRef &FileWeightEntry : Entries) { 1147 StringRef SanitizedEntry = FileWeightEntry.trim(" \t\v\f\r"); 1148 // Skip comments. 1149 if (SanitizedEntry.startswith("#")) 1150 continue; 1151 // If there's no comma, it's an unweighted profile. 1152 else if (!SanitizedEntry.contains(',')) 1153 addWeightedInput(WFV, {std::string(SanitizedEntry), 1}); 1154 else 1155 addWeightedInput(WFV, parseWeightedFile(SanitizedEntry)); 1156 } 1157 } 1158 1159 static int merge_main(int argc, const char *argv[]) { 1160 cl::list<std::string> InputFilenames(cl::Positional, 1161 cl::desc("<filename...>")); 1162 cl::list<std::string> WeightedInputFilenames("weighted-input", 1163 cl::desc("<weight>,<filename>")); 1164 cl::opt<std::string> InputFilenamesFile( 1165 "input-files", cl::init(""), 1166 cl::desc("Path to file containing newline-separated " 1167 "[<weight>,]<filename> entries")); 1168 cl::alias InputFilenamesFileA("f", cl::desc("Alias for --input-files"), 1169 cl::aliasopt(InputFilenamesFile)); 1170 cl::opt<bool> DumpInputFileList( 1171 "dump-input-file-list", cl::init(false), cl::Hidden, 1172 cl::desc("Dump the list of input files and their weights, then exit")); 1173 cl::opt<std::string> RemappingFile("remapping-file", cl::value_desc("file"), 1174 cl::desc("Symbol remapping file")); 1175 cl::alias RemappingFileA("r", cl::desc("Alias for --remapping-file"), 1176 cl::aliasopt(RemappingFile)); 1177 cl::opt<std::string> OutputFilename("output", cl::value_desc("output"), 1178 cl::init("-"), cl::desc("Output file")); 1179 cl::alias OutputFilenameA("o", cl::desc("Alias for --output"), 1180 cl::aliasopt(OutputFilename)); 1181 cl::opt<ProfileKinds> ProfileKind( 1182 cl::desc("Profile kind:"), cl::init(instr), 1183 cl::values(clEnumVal(instr, "Instrumentation profile (default)"), 1184 clEnumVal(sample, "Sample profile"))); 1185 cl::opt<ProfileFormat> OutputFormat( 1186 cl::desc("Format of output profile"), cl::init(PF_Ext_Binary), 1187 cl::values( 1188 clEnumValN(PF_Binary, "binary", "Binary encoding"), 1189 clEnumValN(PF_Ext_Binary, "extbinary", "Extensible binary encoding " 1190 "(default)"), 1191 clEnumValN(PF_Text, "text", "Text encoding"), 1192 clEnumValN(PF_GCC, "gcc", 1193 "GCC encoding (only meaningful for -sample)"))); 1194 cl::opt<FailureMode> FailureMode( 1195 "failure-mode", cl::init(failIfAnyAreInvalid), cl::desc("Failure mode:"), 1196 cl::values(clEnumValN(failIfAnyAreInvalid, "any", 1197 "Fail if any profile is invalid."), 1198 clEnumValN(failIfAllAreInvalid, "all", 1199 "Fail only if all profiles are invalid."))); 1200 cl::opt<bool> OutputSparse("sparse", cl::init(false), 1201 cl::desc("Generate a sparse profile (only meaningful for -instr)")); 1202 cl::opt<unsigned> NumThreads( 1203 "num-threads", cl::init(0), 1204 cl::desc("Number of merge threads to use (default: autodetect)")); 1205 cl::alias NumThreadsA("j", cl::desc("Alias for --num-threads"), 1206 cl::aliasopt(NumThreads)); 1207 cl::opt<std::string> ProfileSymbolListFile( 1208 "prof-sym-list", cl::init(""), 1209 cl::desc("Path to file containing the list of function symbols " 1210 "used to populate profile symbol list")); 1211 cl::opt<bool> CompressAllSections( 1212 "compress-all-sections", cl::init(false), cl::Hidden, 1213 cl::desc("Compress all sections when writing the profile (only " 1214 "meaningful for -extbinary)")); 1215 cl::opt<bool> UseMD5( 1216 "use-md5", cl::init(false), cl::Hidden, 1217 cl::desc("Choose to use MD5 to represent string in name table (only " 1218 "meaningful for -extbinary)")); 1219 cl::opt<bool> SampleMergeColdContext( 1220 "sample-merge-cold-context", cl::init(false), cl::Hidden, 1221 cl::desc( 1222 "Merge context sample profiles whose count is below cold threshold")); 1223 cl::opt<bool> SampleTrimColdContext( 1224 "sample-trim-cold-context", cl::init(false), cl::Hidden, 1225 cl::desc( 1226 "Trim context sample profiles whose count is below cold threshold")); 1227 cl::opt<uint32_t> SampleColdContextFrameDepth( 1228 "sample-frame-depth-for-cold-context", cl::init(1), 1229 cl::desc("Keep the last K frames while merging cold profile. 1 means the " 1230 "context-less base profile")); 1231 cl::opt<size_t> OutputSizeLimit( 1232 "output-size-limit", cl::init(0), cl::Hidden, 1233 cl::desc("Trim cold functions until profile size is below specified " 1234 "limit in bytes. This uses a heursitic and functions may be " 1235 "excessively trimmed")); 1236 cl::opt<bool> GenPartialProfile( 1237 "gen-partial-profile", cl::init(false), cl::Hidden, 1238 cl::desc("Generate a partial profile (only meaningful for -extbinary)")); 1239 cl::opt<std::string> SupplInstrWithSample( 1240 "supplement-instr-with-sample", cl::init(""), cl::Hidden, 1241 cl::desc("Supplement an instr profile with sample profile, to correct " 1242 "the profile unrepresentativeness issue. The sample " 1243 "profile is the input of the flag. Output will be in instr " 1244 "format (The flag only works with -instr)")); 1245 cl::opt<float> ZeroCounterThreshold( 1246 "zero-counter-threshold", cl::init(0.7), cl::Hidden, 1247 cl::desc("For the function which is cold in instr profile but hot in " 1248 "sample profile, if the ratio of the number of zero counters " 1249 "divided by the total number of counters is above the " 1250 "threshold, the profile of the function will be regarded as " 1251 "being harmful for performance and will be dropped.")); 1252 cl::opt<unsigned> SupplMinSizeThreshold( 1253 "suppl-min-size-threshold", cl::init(10), cl::Hidden, 1254 cl::desc("If the size of a function is smaller than the threshold, " 1255 "assume it can be inlined by PGO early inliner and it won't " 1256 "be adjusted based on sample profile.")); 1257 cl::opt<unsigned> InstrProfColdThreshold( 1258 "instr-prof-cold-threshold", cl::init(0), cl::Hidden, 1259 cl::desc("User specified cold threshold for instr profile which will " 1260 "override the cold threshold got from profile summary. ")); 1261 cl::opt<SampleProfileLayout> ProfileLayout( 1262 "convert-sample-profile-layout", 1263 cl::desc("Convert the generated profile to a profile with a new layout"), 1264 cl::init(SPL_None), 1265 cl::values( 1266 clEnumValN(SPL_Nest, "nest", 1267 "Nested profile, the input should be CS flat profile"), 1268 clEnumValN(SPL_Flat, "flat", 1269 "Profile with nested inlinee flatten out"))); 1270 cl::opt<std::string> DebugInfoFilename( 1271 "debug-info", cl::init(""), 1272 cl::desc("Use the provided debug info to correlate the raw profile.")); 1273 cl::opt<std::string> ProfiledBinary( 1274 "profiled-binary", cl::init(""), 1275 cl::desc("Path to binary from which the profile was collected.")); 1276 cl::opt<bool> DropProfileSymbolList( 1277 "drop-profile-symbol-list", cl::init(false), cl::Hidden, 1278 cl::desc("Drop the profile symbol list when merging AutoFDO profiles " 1279 "(only meaningful for -sample)")); 1280 // WARNING: This reservoir size value is propagated to any input indexed 1281 // profiles for simplicity. Changing this value between invocations could 1282 // result in sample bias. 1283 cl::opt<uint64_t> TemporalProfTraceReservoirSize( 1284 "temporal-profile-trace-reservoir-size", cl::init(100), 1285 cl::desc("The maximum number of stored temporal profile traces (default: " 1286 "100)")); 1287 cl::opt<uint64_t> TemporalProfMaxTraceLength( 1288 "temporal-profile-max-trace-length", cl::init(10000), 1289 cl::desc("The maximum length of a single temporal profile trace " 1290 "(default: 10000)")); 1291 1292 cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n"); 1293 1294 WeightedFileVector WeightedInputs; 1295 for (StringRef Filename : InputFilenames) 1296 addWeightedInput(WeightedInputs, {std::string(Filename), 1}); 1297 for (StringRef WeightedFilename : WeightedInputFilenames) 1298 addWeightedInput(WeightedInputs, parseWeightedFile(WeightedFilename)); 1299 1300 // Make sure that the file buffer stays alive for the duration of the 1301 // weighted input vector's lifetime. 1302 auto Buffer = getInputFileBuf(InputFilenamesFile); 1303 parseInputFilenamesFile(Buffer.get(), WeightedInputs); 1304 1305 if (WeightedInputs.empty()) 1306 exitWithError("no input files specified. See " + 1307 sys::path::filename(argv[0]) + " -help"); 1308 1309 if (DumpInputFileList) { 1310 for (auto &WF : WeightedInputs) 1311 outs() << WF.Weight << "," << WF.Filename << "\n"; 1312 return 0; 1313 } 1314 1315 std::unique_ptr<SymbolRemapper> Remapper; 1316 if (!RemappingFile.empty()) 1317 Remapper = SymbolRemapper::create(RemappingFile); 1318 1319 if (!SupplInstrWithSample.empty()) { 1320 if (ProfileKind != instr) 1321 exitWithError( 1322 "-supplement-instr-with-sample can only work with -instr. "); 1323 1324 supplementInstrProfile(WeightedInputs, SupplInstrWithSample, OutputFilename, 1325 OutputFormat, OutputSparse, SupplMinSizeThreshold, 1326 ZeroCounterThreshold, InstrProfColdThreshold); 1327 return 0; 1328 } 1329 1330 if (ProfileKind == instr) 1331 mergeInstrProfile(WeightedInputs, DebugInfoFilename, Remapper.get(), 1332 OutputFilename, OutputFormat, 1333 TemporalProfTraceReservoirSize, 1334 TemporalProfMaxTraceLength, OutputSparse, NumThreads, 1335 FailureMode, ProfiledBinary); 1336 else 1337 mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename, 1338 OutputFormat, ProfileSymbolListFile, CompressAllSections, 1339 UseMD5, GenPartialProfile, ProfileLayout, 1340 SampleMergeColdContext, SampleTrimColdContext, 1341 SampleColdContextFrameDepth, FailureMode, 1342 DropProfileSymbolList, OutputSizeLimit); 1343 return 0; 1344 } 1345 1346 /// Computer the overlap b/w profile BaseFilename and profile TestFilename. 1347 static void overlapInstrProfile(const std::string &BaseFilename, 1348 const std::string &TestFilename, 1349 const OverlapFuncFilters &FuncFilter, 1350 raw_fd_ostream &OS, bool IsCS) { 1351 std::mutex ErrorLock; 1352 SmallSet<instrprof_error, 4> WriterErrorCodes; 1353 WriterContext Context(false, ErrorLock, WriterErrorCodes); 1354 WeightedFile WeightedInput{BaseFilename, 1}; 1355 OverlapStats Overlap; 1356 Error E = Overlap.accumulateCounts(BaseFilename, TestFilename, IsCS); 1357 if (E) 1358 exitWithError(std::move(E), "error in getting profile count sums"); 1359 if (Overlap.Base.CountSum < 1.0f) { 1360 OS << "Sum of edge counts for profile " << BaseFilename << " is 0.\n"; 1361 exit(0); 1362 } 1363 if (Overlap.Test.CountSum < 1.0f) { 1364 OS << "Sum of edge counts for profile " << TestFilename << " is 0.\n"; 1365 exit(0); 1366 } 1367 loadInput(WeightedInput, nullptr, nullptr, /*ProfiledBinary=*/"", &Context); 1368 overlapInput(BaseFilename, TestFilename, &Context, Overlap, FuncFilter, OS, 1369 IsCS); 1370 Overlap.dump(OS); 1371 } 1372 1373 namespace { 1374 struct SampleOverlapStats { 1375 SampleContext BaseName; 1376 SampleContext TestName; 1377 // Number of overlap units 1378 uint64_t OverlapCount; 1379 // Total samples of overlap units 1380 uint64_t OverlapSample; 1381 // Number of and total samples of units that only present in base or test 1382 // profile 1383 uint64_t BaseUniqueCount; 1384 uint64_t BaseUniqueSample; 1385 uint64_t TestUniqueCount; 1386 uint64_t TestUniqueSample; 1387 // Number of units and total samples in base or test profile 1388 uint64_t BaseCount; 1389 uint64_t BaseSample; 1390 uint64_t TestCount; 1391 uint64_t TestSample; 1392 // Number of and total samples of units that present in at least one profile 1393 uint64_t UnionCount; 1394 uint64_t UnionSample; 1395 // Weighted similarity 1396 double Similarity; 1397 // For SampleOverlapStats instances representing functions, weights of the 1398 // function in base and test profiles 1399 double BaseWeight; 1400 double TestWeight; 1401 1402 SampleOverlapStats() 1403 : OverlapCount(0), OverlapSample(0), BaseUniqueCount(0), 1404 BaseUniqueSample(0), TestUniqueCount(0), TestUniqueSample(0), 1405 BaseCount(0), BaseSample(0), TestCount(0), TestSample(0), UnionCount(0), 1406 UnionSample(0), Similarity(0.0), BaseWeight(0.0), TestWeight(0.0) {} 1407 }; 1408 } // end anonymous namespace 1409 1410 namespace { 1411 struct FuncSampleStats { 1412 uint64_t SampleSum; 1413 uint64_t MaxSample; 1414 uint64_t HotBlockCount; 1415 FuncSampleStats() : SampleSum(0), MaxSample(0), HotBlockCount(0) {} 1416 FuncSampleStats(uint64_t SampleSum, uint64_t MaxSample, 1417 uint64_t HotBlockCount) 1418 : SampleSum(SampleSum), MaxSample(MaxSample), 1419 HotBlockCount(HotBlockCount) {} 1420 }; 1421 } // end anonymous namespace 1422 1423 namespace { 1424 enum MatchStatus { MS_Match, MS_FirstUnique, MS_SecondUnique, MS_None }; 1425 1426 // Class for updating merging steps for two sorted maps. The class should be 1427 // instantiated with a map iterator type. 1428 template <class T> class MatchStep { 1429 public: 1430 MatchStep() = delete; 1431 1432 MatchStep(T FirstIter, T FirstEnd, T SecondIter, T SecondEnd) 1433 : FirstIter(FirstIter), FirstEnd(FirstEnd), SecondIter(SecondIter), 1434 SecondEnd(SecondEnd), Status(MS_None) {} 1435 1436 bool areBothFinished() const { 1437 return (FirstIter == FirstEnd && SecondIter == SecondEnd); 1438 } 1439 1440 bool isFirstFinished() const { return FirstIter == FirstEnd; } 1441 1442 bool isSecondFinished() const { return SecondIter == SecondEnd; } 1443 1444 /// Advance one step based on the previous match status unless the previous 1445 /// status is MS_None. Then update Status based on the comparison between two 1446 /// container iterators at the current step. If the previous status is 1447 /// MS_None, it means two iterators are at the beginning and no comparison has 1448 /// been made, so we simply update Status without advancing the iterators. 1449 void updateOneStep(); 1450 1451 T getFirstIter() const { return FirstIter; } 1452 1453 T getSecondIter() const { return SecondIter; } 1454 1455 MatchStatus getMatchStatus() const { return Status; } 1456 1457 private: 1458 // Current iterator and end iterator of the first container. 1459 T FirstIter; 1460 T FirstEnd; 1461 // Current iterator and end iterator of the second container. 1462 T SecondIter; 1463 T SecondEnd; 1464 // Match status of the current step. 1465 MatchStatus Status; 1466 }; 1467 } // end anonymous namespace 1468 1469 template <class T> void MatchStep<T>::updateOneStep() { 1470 switch (Status) { 1471 case MS_Match: 1472 ++FirstIter; 1473 ++SecondIter; 1474 break; 1475 case MS_FirstUnique: 1476 ++FirstIter; 1477 break; 1478 case MS_SecondUnique: 1479 ++SecondIter; 1480 break; 1481 case MS_None: 1482 break; 1483 } 1484 1485 // Update Status according to iterators at the current step. 1486 if (areBothFinished()) 1487 return; 1488 if (FirstIter != FirstEnd && 1489 (SecondIter == SecondEnd || FirstIter->first < SecondIter->first)) 1490 Status = MS_FirstUnique; 1491 else if (SecondIter != SecondEnd && 1492 (FirstIter == FirstEnd || SecondIter->first < FirstIter->first)) 1493 Status = MS_SecondUnique; 1494 else 1495 Status = MS_Match; 1496 } 1497 1498 // Return the sum of line/block samples, the max line/block sample, and the 1499 // number of line/block samples above the given threshold in a function 1500 // including its inlinees. 1501 static void getFuncSampleStats(const sampleprof::FunctionSamples &Func, 1502 FuncSampleStats &FuncStats, 1503 uint64_t HotThreshold) { 1504 for (const auto &L : Func.getBodySamples()) { 1505 uint64_t Sample = L.second.getSamples(); 1506 FuncStats.SampleSum += Sample; 1507 FuncStats.MaxSample = std::max(FuncStats.MaxSample, Sample); 1508 if (Sample >= HotThreshold) 1509 ++FuncStats.HotBlockCount; 1510 } 1511 1512 for (const auto &C : Func.getCallsiteSamples()) { 1513 for (const auto &F : C.second) 1514 getFuncSampleStats(F.second, FuncStats, HotThreshold); 1515 } 1516 } 1517 1518 /// Predicate that determines if a function is hot with a given threshold. We 1519 /// keep it separate from its callsites for possible extension in the future. 1520 static bool isFunctionHot(const FuncSampleStats &FuncStats, 1521 uint64_t HotThreshold) { 1522 // We intentionally compare the maximum sample count in a function with the 1523 // HotThreshold to get an approximate determination on hot functions. 1524 return (FuncStats.MaxSample >= HotThreshold); 1525 } 1526 1527 namespace { 1528 class SampleOverlapAggregator { 1529 public: 1530 SampleOverlapAggregator(const std::string &BaseFilename, 1531 const std::string &TestFilename, 1532 double LowSimilarityThreshold, double Epsilon, 1533 const OverlapFuncFilters &FuncFilter) 1534 : BaseFilename(BaseFilename), TestFilename(TestFilename), 1535 LowSimilarityThreshold(LowSimilarityThreshold), Epsilon(Epsilon), 1536 FuncFilter(FuncFilter) {} 1537 1538 /// Detect 0-sample input profile and report to output stream. This interface 1539 /// should be called after loadProfiles(). 1540 bool detectZeroSampleProfile(raw_fd_ostream &OS) const; 1541 1542 /// Write out function-level similarity statistics for functions specified by 1543 /// options --function, --value-cutoff, and --similarity-cutoff. 1544 void dumpFuncSimilarity(raw_fd_ostream &OS) const; 1545 1546 /// Write out program-level similarity and overlap statistics. 1547 void dumpProgramSummary(raw_fd_ostream &OS) const; 1548 1549 /// Write out hot-function and hot-block statistics for base_profile, 1550 /// test_profile, and their overlap. For both cases, the overlap HO is 1551 /// calculated as follows: 1552 /// Given the number of functions (or blocks) that are hot in both profiles 1553 /// HCommon and the number of functions (or blocks) that are hot in at 1554 /// least one profile HUnion, HO = HCommon / HUnion. 1555 void dumpHotFuncAndBlockOverlap(raw_fd_ostream &OS) const; 1556 1557 /// This function tries matching functions in base and test profiles. For each 1558 /// pair of matched functions, it aggregates the function-level 1559 /// similarity into a profile-level similarity. It also dump function-level 1560 /// similarity information of functions specified by --function, 1561 /// --value-cutoff, and --similarity-cutoff options. The program-level 1562 /// similarity PS is computed as follows: 1563 /// Given function-level similarity FS(A) for all function A, the 1564 /// weight of function A in base profile WB(A), and the weight of function 1565 /// A in test profile WT(A), compute PS(base_profile, test_profile) = 1566 /// sum_A(FS(A) * avg(WB(A), WT(A))) ranging in [0.0f to 1.0f] with 0.0 1567 /// meaning no-overlap. 1568 void computeSampleProfileOverlap(raw_fd_ostream &OS); 1569 1570 /// Initialize ProfOverlap with the sum of samples in base and test 1571 /// profiles. This function also computes and keeps the sum of samples and 1572 /// max sample counts of each function in BaseStats and TestStats for later 1573 /// use to avoid re-computations. 1574 void initializeSampleProfileOverlap(); 1575 1576 /// Load profiles specified by BaseFilename and TestFilename. 1577 std::error_code loadProfiles(); 1578 1579 using FuncSampleStatsMap = 1580 std::unordered_map<SampleContext, FuncSampleStats, SampleContext::Hash>; 1581 1582 private: 1583 SampleOverlapStats ProfOverlap; 1584 SampleOverlapStats HotFuncOverlap; 1585 SampleOverlapStats HotBlockOverlap; 1586 std::string BaseFilename; 1587 std::string TestFilename; 1588 std::unique_ptr<sampleprof::SampleProfileReader> BaseReader; 1589 std::unique_ptr<sampleprof::SampleProfileReader> TestReader; 1590 // BaseStats and TestStats hold FuncSampleStats for each function, with 1591 // function name as the key. 1592 FuncSampleStatsMap BaseStats; 1593 FuncSampleStatsMap TestStats; 1594 // Low similarity threshold in floating point number 1595 double LowSimilarityThreshold; 1596 // Block samples above BaseHotThreshold or TestHotThreshold are considered hot 1597 // for tracking hot blocks. 1598 uint64_t BaseHotThreshold; 1599 uint64_t TestHotThreshold; 1600 // A small threshold used to round the results of floating point accumulations 1601 // to resolve imprecision. 1602 const double Epsilon; 1603 std::multimap<double, SampleOverlapStats, std::greater<double>> 1604 FuncSimilarityDump; 1605 // FuncFilter carries specifications in options --value-cutoff and 1606 // --function. 1607 OverlapFuncFilters FuncFilter; 1608 // Column offsets for printing the function-level details table. 1609 static const unsigned int TestWeightCol = 15; 1610 static const unsigned int SimilarityCol = 30; 1611 static const unsigned int OverlapCol = 43; 1612 static const unsigned int BaseUniqueCol = 53; 1613 static const unsigned int TestUniqueCol = 67; 1614 static const unsigned int BaseSampleCol = 81; 1615 static const unsigned int TestSampleCol = 96; 1616 static const unsigned int FuncNameCol = 111; 1617 1618 /// Return a similarity of two line/block sample counters in the same 1619 /// function in base and test profiles. The line/block-similarity BS(i) is 1620 /// computed as follows: 1621 /// For an offsets i, given the sample count at i in base profile BB(i), 1622 /// the sample count at i in test profile BT(i), the sum of sample counts 1623 /// in this function in base profile SB, and the sum of sample counts in 1624 /// this function in test profile ST, compute BS(i) = 1.0 - fabs(BB(i)/SB - 1625 /// BT(i)/ST), ranging in [0.0f to 1.0f] with 0.0 meaning no-overlap. 1626 double computeBlockSimilarity(uint64_t BaseSample, uint64_t TestSample, 1627 const SampleOverlapStats &FuncOverlap) const; 1628 1629 void updateHotBlockOverlap(uint64_t BaseSample, uint64_t TestSample, 1630 uint64_t HotBlockCount); 1631 1632 void getHotFunctions(const FuncSampleStatsMap &ProfStats, 1633 FuncSampleStatsMap &HotFunc, 1634 uint64_t HotThreshold) const; 1635 1636 void computeHotFuncOverlap(); 1637 1638 /// This function updates statistics in FuncOverlap, HotBlockOverlap, and 1639 /// Difference for two sample units in a matched function according to the 1640 /// given match status. 1641 void updateOverlapStatsForFunction(uint64_t BaseSample, uint64_t TestSample, 1642 uint64_t HotBlockCount, 1643 SampleOverlapStats &FuncOverlap, 1644 double &Difference, MatchStatus Status); 1645 1646 /// This function updates statistics in FuncOverlap, HotBlockOverlap, and 1647 /// Difference for unmatched callees that only present in one profile in a 1648 /// matched caller function. 1649 void updateForUnmatchedCallee(const sampleprof::FunctionSamples &Func, 1650 SampleOverlapStats &FuncOverlap, 1651 double &Difference, MatchStatus Status); 1652 1653 /// This function updates sample overlap statistics of an overlap function in 1654 /// base and test profile. It also calculates a function-internal similarity 1655 /// FIS as follows: 1656 /// For offsets i that have samples in at least one profile in this 1657 /// function A, given BS(i) returned by computeBlockSimilarity(), compute 1658 /// FIS(A) = (2.0 - sum_i(1.0 - BS(i))) / 2, ranging in [0.0f to 1.0f] with 1659 /// 0.0 meaning no overlap. 1660 double computeSampleFunctionInternalOverlap( 1661 const sampleprof::FunctionSamples &BaseFunc, 1662 const sampleprof::FunctionSamples &TestFunc, 1663 SampleOverlapStats &FuncOverlap); 1664 1665 /// Function-level similarity (FS) is a weighted value over function internal 1666 /// similarity (FIS). This function computes a function's FS from its FIS by 1667 /// applying the weight. 1668 double weightForFuncSimilarity(double FuncSimilarity, uint64_t BaseFuncSample, 1669 uint64_t TestFuncSample) const; 1670 1671 /// The function-level similarity FS(A) for a function A is computed as 1672 /// follows: 1673 /// Compute a function-internal similarity FIS(A) by 1674 /// computeSampleFunctionInternalOverlap(). Then, with the weight of 1675 /// function A in base profile WB(A), and the weight of function A in test 1676 /// profile WT(A), compute FS(A) = FIS(A) * (1.0 - fabs(WB(A) - WT(A))) 1677 /// ranging in [0.0f to 1.0f] with 0.0 meaning no overlap. 1678 double 1679 computeSampleFunctionOverlap(const sampleprof::FunctionSamples *BaseFunc, 1680 const sampleprof::FunctionSamples *TestFunc, 1681 SampleOverlapStats *FuncOverlap, 1682 uint64_t BaseFuncSample, 1683 uint64_t TestFuncSample); 1684 1685 /// Profile-level similarity (PS) is a weighted aggregate over function-level 1686 /// similarities (FS). This method weights the FS value by the function 1687 /// weights in the base and test profiles for the aggregation. 1688 double weightByImportance(double FuncSimilarity, uint64_t BaseFuncSample, 1689 uint64_t TestFuncSample) const; 1690 }; 1691 } // end anonymous namespace 1692 1693 bool SampleOverlapAggregator::detectZeroSampleProfile( 1694 raw_fd_ostream &OS) const { 1695 bool HaveZeroSample = false; 1696 if (ProfOverlap.BaseSample == 0) { 1697 OS << "Sum of sample counts for profile " << BaseFilename << " is 0.\n"; 1698 HaveZeroSample = true; 1699 } 1700 if (ProfOverlap.TestSample == 0) { 1701 OS << "Sum of sample counts for profile " << TestFilename << " is 0.\n"; 1702 HaveZeroSample = true; 1703 } 1704 return HaveZeroSample; 1705 } 1706 1707 double SampleOverlapAggregator::computeBlockSimilarity( 1708 uint64_t BaseSample, uint64_t TestSample, 1709 const SampleOverlapStats &FuncOverlap) const { 1710 double BaseFrac = 0.0; 1711 double TestFrac = 0.0; 1712 if (FuncOverlap.BaseSample > 0) 1713 BaseFrac = static_cast<double>(BaseSample) / FuncOverlap.BaseSample; 1714 if (FuncOverlap.TestSample > 0) 1715 TestFrac = static_cast<double>(TestSample) / FuncOverlap.TestSample; 1716 return 1.0 - std::fabs(BaseFrac - TestFrac); 1717 } 1718 1719 void SampleOverlapAggregator::updateHotBlockOverlap(uint64_t BaseSample, 1720 uint64_t TestSample, 1721 uint64_t HotBlockCount) { 1722 bool IsBaseHot = (BaseSample >= BaseHotThreshold); 1723 bool IsTestHot = (TestSample >= TestHotThreshold); 1724 if (!IsBaseHot && !IsTestHot) 1725 return; 1726 1727 HotBlockOverlap.UnionCount += HotBlockCount; 1728 if (IsBaseHot) 1729 HotBlockOverlap.BaseCount += HotBlockCount; 1730 if (IsTestHot) 1731 HotBlockOverlap.TestCount += HotBlockCount; 1732 if (IsBaseHot && IsTestHot) 1733 HotBlockOverlap.OverlapCount += HotBlockCount; 1734 } 1735 1736 void SampleOverlapAggregator::getHotFunctions( 1737 const FuncSampleStatsMap &ProfStats, FuncSampleStatsMap &HotFunc, 1738 uint64_t HotThreshold) const { 1739 for (const auto &F : ProfStats) { 1740 if (isFunctionHot(F.second, HotThreshold)) 1741 HotFunc.emplace(F.first, F.second); 1742 } 1743 } 1744 1745 void SampleOverlapAggregator::computeHotFuncOverlap() { 1746 FuncSampleStatsMap BaseHotFunc; 1747 getHotFunctions(BaseStats, BaseHotFunc, BaseHotThreshold); 1748 HotFuncOverlap.BaseCount = BaseHotFunc.size(); 1749 1750 FuncSampleStatsMap TestHotFunc; 1751 getHotFunctions(TestStats, TestHotFunc, TestHotThreshold); 1752 HotFuncOverlap.TestCount = TestHotFunc.size(); 1753 HotFuncOverlap.UnionCount = HotFuncOverlap.TestCount; 1754 1755 for (const auto &F : BaseHotFunc) { 1756 if (TestHotFunc.count(F.first)) 1757 ++HotFuncOverlap.OverlapCount; 1758 else 1759 ++HotFuncOverlap.UnionCount; 1760 } 1761 } 1762 1763 void SampleOverlapAggregator::updateOverlapStatsForFunction( 1764 uint64_t BaseSample, uint64_t TestSample, uint64_t HotBlockCount, 1765 SampleOverlapStats &FuncOverlap, double &Difference, MatchStatus Status) { 1766 assert(Status != MS_None && 1767 "Match status should be updated before updating overlap statistics"); 1768 if (Status == MS_FirstUnique) { 1769 TestSample = 0; 1770 FuncOverlap.BaseUniqueSample += BaseSample; 1771 } else if (Status == MS_SecondUnique) { 1772 BaseSample = 0; 1773 FuncOverlap.TestUniqueSample += TestSample; 1774 } else { 1775 ++FuncOverlap.OverlapCount; 1776 } 1777 1778 FuncOverlap.UnionSample += std::max(BaseSample, TestSample); 1779 FuncOverlap.OverlapSample += std::min(BaseSample, TestSample); 1780 Difference += 1781 1.0 - computeBlockSimilarity(BaseSample, TestSample, FuncOverlap); 1782 updateHotBlockOverlap(BaseSample, TestSample, HotBlockCount); 1783 } 1784 1785 void SampleOverlapAggregator::updateForUnmatchedCallee( 1786 const sampleprof::FunctionSamples &Func, SampleOverlapStats &FuncOverlap, 1787 double &Difference, MatchStatus Status) { 1788 assert((Status == MS_FirstUnique || Status == MS_SecondUnique) && 1789 "Status must be either of the two unmatched cases"); 1790 FuncSampleStats FuncStats; 1791 if (Status == MS_FirstUnique) { 1792 getFuncSampleStats(Func, FuncStats, BaseHotThreshold); 1793 updateOverlapStatsForFunction(FuncStats.SampleSum, 0, 1794 FuncStats.HotBlockCount, FuncOverlap, 1795 Difference, Status); 1796 } else { 1797 getFuncSampleStats(Func, FuncStats, TestHotThreshold); 1798 updateOverlapStatsForFunction(0, FuncStats.SampleSum, 1799 FuncStats.HotBlockCount, FuncOverlap, 1800 Difference, Status); 1801 } 1802 } 1803 1804 double SampleOverlapAggregator::computeSampleFunctionInternalOverlap( 1805 const sampleprof::FunctionSamples &BaseFunc, 1806 const sampleprof::FunctionSamples &TestFunc, 1807 SampleOverlapStats &FuncOverlap) { 1808 1809 using namespace sampleprof; 1810 1811 double Difference = 0; 1812 1813 // Accumulate Difference for regular line/block samples in the function. 1814 // We match them through sort-merge join algorithm because 1815 // FunctionSamples::getBodySamples() returns a map of sample counters ordered 1816 // by their offsets. 1817 MatchStep<BodySampleMap::const_iterator> BlockIterStep( 1818 BaseFunc.getBodySamples().cbegin(), BaseFunc.getBodySamples().cend(), 1819 TestFunc.getBodySamples().cbegin(), TestFunc.getBodySamples().cend()); 1820 BlockIterStep.updateOneStep(); 1821 while (!BlockIterStep.areBothFinished()) { 1822 uint64_t BaseSample = 1823 BlockIterStep.isFirstFinished() 1824 ? 0 1825 : BlockIterStep.getFirstIter()->second.getSamples(); 1826 uint64_t TestSample = 1827 BlockIterStep.isSecondFinished() 1828 ? 0 1829 : BlockIterStep.getSecondIter()->second.getSamples(); 1830 updateOverlapStatsForFunction(BaseSample, TestSample, 1, FuncOverlap, 1831 Difference, BlockIterStep.getMatchStatus()); 1832 1833 BlockIterStep.updateOneStep(); 1834 } 1835 1836 // Accumulate Difference for callsite lines in the function. We match 1837 // them through sort-merge algorithm because 1838 // FunctionSamples::getCallsiteSamples() returns a map of callsite records 1839 // ordered by their offsets. 1840 MatchStep<CallsiteSampleMap::const_iterator> CallsiteIterStep( 1841 BaseFunc.getCallsiteSamples().cbegin(), 1842 BaseFunc.getCallsiteSamples().cend(), 1843 TestFunc.getCallsiteSamples().cbegin(), 1844 TestFunc.getCallsiteSamples().cend()); 1845 CallsiteIterStep.updateOneStep(); 1846 while (!CallsiteIterStep.areBothFinished()) { 1847 MatchStatus CallsiteStepStatus = CallsiteIterStep.getMatchStatus(); 1848 assert(CallsiteStepStatus != MS_None && 1849 "Match status should be updated before entering loop body"); 1850 1851 if (CallsiteStepStatus != MS_Match) { 1852 auto Callsite = (CallsiteStepStatus == MS_FirstUnique) 1853 ? CallsiteIterStep.getFirstIter() 1854 : CallsiteIterStep.getSecondIter(); 1855 for (const auto &F : Callsite->second) 1856 updateForUnmatchedCallee(F.second, FuncOverlap, Difference, 1857 CallsiteStepStatus); 1858 } else { 1859 // There may be multiple inlinees at the same offset, so we need to try 1860 // matching all of them. This match is implemented through sort-merge 1861 // algorithm because callsite records at the same offset are ordered by 1862 // function names. 1863 MatchStep<FunctionSamplesMap::const_iterator> CalleeIterStep( 1864 CallsiteIterStep.getFirstIter()->second.cbegin(), 1865 CallsiteIterStep.getFirstIter()->second.cend(), 1866 CallsiteIterStep.getSecondIter()->second.cbegin(), 1867 CallsiteIterStep.getSecondIter()->second.cend()); 1868 CalleeIterStep.updateOneStep(); 1869 while (!CalleeIterStep.areBothFinished()) { 1870 MatchStatus CalleeStepStatus = CalleeIterStep.getMatchStatus(); 1871 if (CalleeStepStatus != MS_Match) { 1872 auto Callee = (CalleeStepStatus == MS_FirstUnique) 1873 ? CalleeIterStep.getFirstIter() 1874 : CalleeIterStep.getSecondIter(); 1875 updateForUnmatchedCallee(Callee->second, FuncOverlap, Difference, 1876 CalleeStepStatus); 1877 } else { 1878 // An inlined function can contain other inlinees inside, so compute 1879 // the Difference recursively. 1880 Difference += 2.0 - 2 * computeSampleFunctionInternalOverlap( 1881 CalleeIterStep.getFirstIter()->second, 1882 CalleeIterStep.getSecondIter()->second, 1883 FuncOverlap); 1884 } 1885 CalleeIterStep.updateOneStep(); 1886 } 1887 } 1888 CallsiteIterStep.updateOneStep(); 1889 } 1890 1891 // Difference reflects the total differences of line/block samples in this 1892 // function and ranges in [0.0f to 2.0f]. Take (2.0 - Difference) / 2 to 1893 // reflect the similarity between function profiles in [0.0f to 1.0f]. 1894 return (2.0 - Difference) / 2; 1895 } 1896 1897 double SampleOverlapAggregator::weightForFuncSimilarity( 1898 double FuncInternalSimilarity, uint64_t BaseFuncSample, 1899 uint64_t TestFuncSample) const { 1900 // Compute the weight as the distance between the function weights in two 1901 // profiles. 1902 double BaseFrac = 0.0; 1903 double TestFrac = 0.0; 1904 assert(ProfOverlap.BaseSample > 0 && 1905 "Total samples in base profile should be greater than 0"); 1906 BaseFrac = static_cast<double>(BaseFuncSample) / ProfOverlap.BaseSample; 1907 assert(ProfOverlap.TestSample > 0 && 1908 "Total samples in test profile should be greater than 0"); 1909 TestFrac = static_cast<double>(TestFuncSample) / ProfOverlap.TestSample; 1910 double WeightDistance = std::fabs(BaseFrac - TestFrac); 1911 1912 // Take WeightDistance into the similarity. 1913 return FuncInternalSimilarity * (1 - WeightDistance); 1914 } 1915 1916 double 1917 SampleOverlapAggregator::weightByImportance(double FuncSimilarity, 1918 uint64_t BaseFuncSample, 1919 uint64_t TestFuncSample) const { 1920 1921 double BaseFrac = 0.0; 1922 double TestFrac = 0.0; 1923 assert(ProfOverlap.BaseSample > 0 && 1924 "Total samples in base profile should be greater than 0"); 1925 BaseFrac = static_cast<double>(BaseFuncSample) / ProfOverlap.BaseSample / 2.0; 1926 assert(ProfOverlap.TestSample > 0 && 1927 "Total samples in test profile should be greater than 0"); 1928 TestFrac = static_cast<double>(TestFuncSample) / ProfOverlap.TestSample / 2.0; 1929 return FuncSimilarity * (BaseFrac + TestFrac); 1930 } 1931 1932 double SampleOverlapAggregator::computeSampleFunctionOverlap( 1933 const sampleprof::FunctionSamples *BaseFunc, 1934 const sampleprof::FunctionSamples *TestFunc, 1935 SampleOverlapStats *FuncOverlap, uint64_t BaseFuncSample, 1936 uint64_t TestFuncSample) { 1937 // Default function internal similarity before weighted, meaning two functions 1938 // has no overlap. 1939 const double DefaultFuncInternalSimilarity = 0; 1940 double FuncSimilarity; 1941 double FuncInternalSimilarity; 1942 1943 // If BaseFunc or TestFunc is nullptr, it means the functions do not overlap. 1944 // In this case, we use DefaultFuncInternalSimilarity as the function internal 1945 // similarity. 1946 if (!BaseFunc || !TestFunc) { 1947 FuncInternalSimilarity = DefaultFuncInternalSimilarity; 1948 } else { 1949 assert(FuncOverlap != nullptr && 1950 "FuncOverlap should be provided in this case"); 1951 FuncInternalSimilarity = computeSampleFunctionInternalOverlap( 1952 *BaseFunc, *TestFunc, *FuncOverlap); 1953 // Now, FuncInternalSimilarity may be a little less than 0 due to 1954 // imprecision of floating point accumulations. Make it zero if the 1955 // difference is below Epsilon. 1956 FuncInternalSimilarity = (std::fabs(FuncInternalSimilarity - 0) < Epsilon) 1957 ? 0 1958 : FuncInternalSimilarity; 1959 } 1960 FuncSimilarity = weightForFuncSimilarity(FuncInternalSimilarity, 1961 BaseFuncSample, TestFuncSample); 1962 return FuncSimilarity; 1963 } 1964 1965 void SampleOverlapAggregator::computeSampleProfileOverlap(raw_fd_ostream &OS) { 1966 using namespace sampleprof; 1967 1968 std::unordered_map<SampleContext, const FunctionSamples *, 1969 SampleContext::Hash> 1970 BaseFuncProf; 1971 const auto &BaseProfiles = BaseReader->getProfiles(); 1972 for (const auto &BaseFunc : BaseProfiles) { 1973 BaseFuncProf.emplace(BaseFunc.second.getContext(), &(BaseFunc.second)); 1974 } 1975 ProfOverlap.UnionCount = BaseFuncProf.size(); 1976 1977 const auto &TestProfiles = TestReader->getProfiles(); 1978 for (const auto &TestFunc : TestProfiles) { 1979 SampleOverlapStats FuncOverlap; 1980 FuncOverlap.TestName = TestFunc.second.getContext(); 1981 assert(TestStats.count(FuncOverlap.TestName) && 1982 "TestStats should have records for all functions in test profile " 1983 "except inlinees"); 1984 FuncOverlap.TestSample = TestStats[FuncOverlap.TestName].SampleSum; 1985 1986 bool Matched = false; 1987 const auto Match = BaseFuncProf.find(FuncOverlap.TestName); 1988 if (Match == BaseFuncProf.end()) { 1989 const FuncSampleStats &FuncStats = TestStats[FuncOverlap.TestName]; 1990 ++ProfOverlap.TestUniqueCount; 1991 ProfOverlap.TestUniqueSample += FuncStats.SampleSum; 1992 FuncOverlap.TestUniqueSample = FuncStats.SampleSum; 1993 1994 updateHotBlockOverlap(0, FuncStats.SampleSum, FuncStats.HotBlockCount); 1995 1996 double FuncSimilarity = computeSampleFunctionOverlap( 1997 nullptr, nullptr, nullptr, 0, FuncStats.SampleSum); 1998 ProfOverlap.Similarity += 1999 weightByImportance(FuncSimilarity, 0, FuncStats.SampleSum); 2000 2001 ++ProfOverlap.UnionCount; 2002 ProfOverlap.UnionSample += FuncStats.SampleSum; 2003 } else { 2004 ++ProfOverlap.OverlapCount; 2005 2006 // Two functions match with each other. Compute function-level overlap and 2007 // aggregate them into profile-level overlap. 2008 FuncOverlap.BaseName = Match->second->getContext(); 2009 assert(BaseStats.count(FuncOverlap.BaseName) && 2010 "BaseStats should have records for all functions in base profile " 2011 "except inlinees"); 2012 FuncOverlap.BaseSample = BaseStats[FuncOverlap.BaseName].SampleSum; 2013 2014 FuncOverlap.Similarity = computeSampleFunctionOverlap( 2015 Match->second, &TestFunc.second, &FuncOverlap, FuncOverlap.BaseSample, 2016 FuncOverlap.TestSample); 2017 ProfOverlap.Similarity += 2018 weightByImportance(FuncOverlap.Similarity, FuncOverlap.BaseSample, 2019 FuncOverlap.TestSample); 2020 ProfOverlap.OverlapSample += FuncOverlap.OverlapSample; 2021 ProfOverlap.UnionSample += FuncOverlap.UnionSample; 2022 2023 // Accumulate the percentage of base unique and test unique samples into 2024 // ProfOverlap. 2025 ProfOverlap.BaseUniqueSample += FuncOverlap.BaseUniqueSample; 2026 ProfOverlap.TestUniqueSample += FuncOverlap.TestUniqueSample; 2027 2028 // Remove matched base functions for later reporting functions not found 2029 // in test profile. 2030 BaseFuncProf.erase(Match); 2031 Matched = true; 2032 } 2033 2034 // Print function-level similarity information if specified by options. 2035 assert(TestStats.count(FuncOverlap.TestName) && 2036 "TestStats should have records for all functions in test profile " 2037 "except inlinees"); 2038 if (TestStats[FuncOverlap.TestName].MaxSample >= FuncFilter.ValueCutoff || 2039 (Matched && FuncOverlap.Similarity < LowSimilarityThreshold) || 2040 (Matched && !FuncFilter.NameFilter.empty() && 2041 FuncOverlap.BaseName.toString().find(FuncFilter.NameFilter) != 2042 std::string::npos)) { 2043 assert(ProfOverlap.BaseSample > 0 && 2044 "Total samples in base profile should be greater than 0"); 2045 FuncOverlap.BaseWeight = 2046 static_cast<double>(FuncOverlap.BaseSample) / ProfOverlap.BaseSample; 2047 assert(ProfOverlap.TestSample > 0 && 2048 "Total samples in test profile should be greater than 0"); 2049 FuncOverlap.TestWeight = 2050 static_cast<double>(FuncOverlap.TestSample) / ProfOverlap.TestSample; 2051 FuncSimilarityDump.emplace(FuncOverlap.BaseWeight, FuncOverlap); 2052 } 2053 } 2054 2055 // Traverse through functions in base profile but not in test profile. 2056 for (const auto &F : BaseFuncProf) { 2057 assert(BaseStats.count(F.second->getContext()) && 2058 "BaseStats should have records for all functions in base profile " 2059 "except inlinees"); 2060 const FuncSampleStats &FuncStats = BaseStats[F.second->getContext()]; 2061 ++ProfOverlap.BaseUniqueCount; 2062 ProfOverlap.BaseUniqueSample += FuncStats.SampleSum; 2063 2064 updateHotBlockOverlap(FuncStats.SampleSum, 0, FuncStats.HotBlockCount); 2065 2066 double FuncSimilarity = computeSampleFunctionOverlap( 2067 nullptr, nullptr, nullptr, FuncStats.SampleSum, 0); 2068 ProfOverlap.Similarity += 2069 weightByImportance(FuncSimilarity, FuncStats.SampleSum, 0); 2070 2071 ProfOverlap.UnionSample += FuncStats.SampleSum; 2072 } 2073 2074 // Now, ProfSimilarity may be a little greater than 1 due to imprecision 2075 // of floating point accumulations. Make it 1.0 if the difference is below 2076 // Epsilon. 2077 ProfOverlap.Similarity = (std::fabs(ProfOverlap.Similarity - 1) < Epsilon) 2078 ? 1 2079 : ProfOverlap.Similarity; 2080 2081 computeHotFuncOverlap(); 2082 } 2083 2084 void SampleOverlapAggregator::initializeSampleProfileOverlap() { 2085 const auto &BaseProf = BaseReader->getProfiles(); 2086 for (const auto &I : BaseProf) { 2087 ++ProfOverlap.BaseCount; 2088 FuncSampleStats FuncStats; 2089 getFuncSampleStats(I.second, FuncStats, BaseHotThreshold); 2090 ProfOverlap.BaseSample += FuncStats.SampleSum; 2091 BaseStats.emplace(I.second.getContext(), FuncStats); 2092 } 2093 2094 const auto &TestProf = TestReader->getProfiles(); 2095 for (const auto &I : TestProf) { 2096 ++ProfOverlap.TestCount; 2097 FuncSampleStats FuncStats; 2098 getFuncSampleStats(I.second, FuncStats, TestHotThreshold); 2099 ProfOverlap.TestSample += FuncStats.SampleSum; 2100 TestStats.emplace(I.second.getContext(), FuncStats); 2101 } 2102 2103 ProfOverlap.BaseName = StringRef(BaseFilename); 2104 ProfOverlap.TestName = StringRef(TestFilename); 2105 } 2106 2107 void SampleOverlapAggregator::dumpFuncSimilarity(raw_fd_ostream &OS) const { 2108 using namespace sampleprof; 2109 2110 if (FuncSimilarityDump.empty()) 2111 return; 2112 2113 formatted_raw_ostream FOS(OS); 2114 FOS << "Function-level details:\n"; 2115 FOS << "Base weight"; 2116 FOS.PadToColumn(TestWeightCol); 2117 FOS << "Test weight"; 2118 FOS.PadToColumn(SimilarityCol); 2119 FOS << "Similarity"; 2120 FOS.PadToColumn(OverlapCol); 2121 FOS << "Overlap"; 2122 FOS.PadToColumn(BaseUniqueCol); 2123 FOS << "Base unique"; 2124 FOS.PadToColumn(TestUniqueCol); 2125 FOS << "Test unique"; 2126 FOS.PadToColumn(BaseSampleCol); 2127 FOS << "Base samples"; 2128 FOS.PadToColumn(TestSampleCol); 2129 FOS << "Test samples"; 2130 FOS.PadToColumn(FuncNameCol); 2131 FOS << "Function name\n"; 2132 for (const auto &F : FuncSimilarityDump) { 2133 double OverlapPercent = 2134 F.second.UnionSample > 0 2135 ? static_cast<double>(F.second.OverlapSample) / F.second.UnionSample 2136 : 0; 2137 double BaseUniquePercent = 2138 F.second.BaseSample > 0 2139 ? static_cast<double>(F.second.BaseUniqueSample) / 2140 F.second.BaseSample 2141 : 0; 2142 double TestUniquePercent = 2143 F.second.TestSample > 0 2144 ? static_cast<double>(F.second.TestUniqueSample) / 2145 F.second.TestSample 2146 : 0; 2147 2148 FOS << format("%.2f%%", F.second.BaseWeight * 100); 2149 FOS.PadToColumn(TestWeightCol); 2150 FOS << format("%.2f%%", F.second.TestWeight * 100); 2151 FOS.PadToColumn(SimilarityCol); 2152 FOS << format("%.2f%%", F.second.Similarity * 100); 2153 FOS.PadToColumn(OverlapCol); 2154 FOS << format("%.2f%%", OverlapPercent * 100); 2155 FOS.PadToColumn(BaseUniqueCol); 2156 FOS << format("%.2f%%", BaseUniquePercent * 100); 2157 FOS.PadToColumn(TestUniqueCol); 2158 FOS << format("%.2f%%", TestUniquePercent * 100); 2159 FOS.PadToColumn(BaseSampleCol); 2160 FOS << F.second.BaseSample; 2161 FOS.PadToColumn(TestSampleCol); 2162 FOS << F.second.TestSample; 2163 FOS.PadToColumn(FuncNameCol); 2164 FOS << F.second.TestName.toString() << "\n"; 2165 } 2166 } 2167 2168 void SampleOverlapAggregator::dumpProgramSummary(raw_fd_ostream &OS) const { 2169 OS << "Profile overlap infomation for base_profile: " 2170 << ProfOverlap.BaseName.toString() 2171 << " and test_profile: " << ProfOverlap.TestName.toString() 2172 << "\nProgram level:\n"; 2173 2174 OS << " Whole program profile similarity: " 2175 << format("%.3f%%", ProfOverlap.Similarity * 100) << "\n"; 2176 2177 assert(ProfOverlap.UnionSample > 0 && 2178 "Total samples in two profile should be greater than 0"); 2179 double OverlapPercent = 2180 static_cast<double>(ProfOverlap.OverlapSample) / ProfOverlap.UnionSample; 2181 assert(ProfOverlap.BaseSample > 0 && 2182 "Total samples in base profile should be greater than 0"); 2183 double BaseUniquePercent = static_cast<double>(ProfOverlap.BaseUniqueSample) / 2184 ProfOverlap.BaseSample; 2185 assert(ProfOverlap.TestSample > 0 && 2186 "Total samples in test profile should be greater than 0"); 2187 double TestUniquePercent = static_cast<double>(ProfOverlap.TestUniqueSample) / 2188 ProfOverlap.TestSample; 2189 2190 OS << " Whole program sample overlap: " 2191 << format("%.3f%%", OverlapPercent * 100) << "\n"; 2192 OS << " percentage of samples unique in base profile: " 2193 << format("%.3f%%", BaseUniquePercent * 100) << "\n"; 2194 OS << " percentage of samples unique in test profile: " 2195 << format("%.3f%%", TestUniquePercent * 100) << "\n"; 2196 OS << " total samples in base profile: " << ProfOverlap.BaseSample << "\n" 2197 << " total samples in test profile: " << ProfOverlap.TestSample << "\n"; 2198 2199 assert(ProfOverlap.UnionCount > 0 && 2200 "There should be at least one function in two input profiles"); 2201 double FuncOverlapPercent = 2202 static_cast<double>(ProfOverlap.OverlapCount) / ProfOverlap.UnionCount; 2203 OS << " Function overlap: " << format("%.3f%%", FuncOverlapPercent * 100) 2204 << "\n"; 2205 OS << " overlap functions: " << ProfOverlap.OverlapCount << "\n"; 2206 OS << " functions unique in base profile: " << ProfOverlap.BaseUniqueCount 2207 << "\n"; 2208 OS << " functions unique in test profile: " << ProfOverlap.TestUniqueCount 2209 << "\n"; 2210 } 2211 2212 void SampleOverlapAggregator::dumpHotFuncAndBlockOverlap( 2213 raw_fd_ostream &OS) const { 2214 assert(HotFuncOverlap.UnionCount > 0 && 2215 "There should be at least one hot function in two input profiles"); 2216 OS << " Hot-function overlap: " 2217 << format("%.3f%%", static_cast<double>(HotFuncOverlap.OverlapCount) / 2218 HotFuncOverlap.UnionCount * 100) 2219 << "\n"; 2220 OS << " overlap hot functions: " << HotFuncOverlap.OverlapCount << "\n"; 2221 OS << " hot functions unique in base profile: " 2222 << HotFuncOverlap.BaseCount - HotFuncOverlap.OverlapCount << "\n"; 2223 OS << " hot functions unique in test profile: " 2224 << HotFuncOverlap.TestCount - HotFuncOverlap.OverlapCount << "\n"; 2225 2226 assert(HotBlockOverlap.UnionCount > 0 && 2227 "There should be at least one hot block in two input profiles"); 2228 OS << " Hot-block overlap: " 2229 << format("%.3f%%", static_cast<double>(HotBlockOverlap.OverlapCount) / 2230 HotBlockOverlap.UnionCount * 100) 2231 << "\n"; 2232 OS << " overlap hot blocks: " << HotBlockOverlap.OverlapCount << "\n"; 2233 OS << " hot blocks unique in base profile: " 2234 << HotBlockOverlap.BaseCount - HotBlockOverlap.OverlapCount << "\n"; 2235 OS << " hot blocks unique in test profile: " 2236 << HotBlockOverlap.TestCount - HotBlockOverlap.OverlapCount << "\n"; 2237 } 2238 2239 std::error_code SampleOverlapAggregator::loadProfiles() { 2240 using namespace sampleprof; 2241 2242 LLVMContext Context; 2243 auto FS = vfs::getRealFileSystem(); 2244 auto BaseReaderOrErr = SampleProfileReader::create(BaseFilename, Context, *FS, 2245 FSDiscriminatorPassOption); 2246 if (std::error_code EC = BaseReaderOrErr.getError()) 2247 exitWithErrorCode(EC, BaseFilename); 2248 2249 auto TestReaderOrErr = SampleProfileReader::create(TestFilename, Context, *FS, 2250 FSDiscriminatorPassOption); 2251 if (std::error_code EC = TestReaderOrErr.getError()) 2252 exitWithErrorCode(EC, TestFilename); 2253 2254 BaseReader = std::move(BaseReaderOrErr.get()); 2255 TestReader = std::move(TestReaderOrErr.get()); 2256 2257 if (std::error_code EC = BaseReader->read()) 2258 exitWithErrorCode(EC, BaseFilename); 2259 if (std::error_code EC = TestReader->read()) 2260 exitWithErrorCode(EC, TestFilename); 2261 if (BaseReader->profileIsProbeBased() != TestReader->profileIsProbeBased()) 2262 exitWithError( 2263 "cannot compare probe-based profile with non-probe-based profile"); 2264 if (BaseReader->profileIsCS() != TestReader->profileIsCS()) 2265 exitWithError("cannot compare CS profile with non-CS profile"); 2266 2267 // Load BaseHotThreshold and TestHotThreshold as 99-percentile threshold in 2268 // profile summary. 2269 ProfileSummary &BasePS = BaseReader->getSummary(); 2270 ProfileSummary &TestPS = TestReader->getSummary(); 2271 BaseHotThreshold = 2272 ProfileSummaryBuilder::getHotCountThreshold(BasePS.getDetailedSummary()); 2273 TestHotThreshold = 2274 ProfileSummaryBuilder::getHotCountThreshold(TestPS.getDetailedSummary()); 2275 2276 return std::error_code(); 2277 } 2278 2279 void overlapSampleProfile(const std::string &BaseFilename, 2280 const std::string &TestFilename, 2281 const OverlapFuncFilters &FuncFilter, 2282 uint64_t SimilarityCutoff, raw_fd_ostream &OS) { 2283 using namespace sampleprof; 2284 2285 // We use 0.000005 to initialize OverlapAggr.Epsilon because the final metrics 2286 // report 2--3 places after decimal point in percentage numbers. 2287 SampleOverlapAggregator OverlapAggr( 2288 BaseFilename, TestFilename, 2289 static_cast<double>(SimilarityCutoff) / 1000000, 0.000005, FuncFilter); 2290 if (std::error_code EC = OverlapAggr.loadProfiles()) 2291 exitWithErrorCode(EC); 2292 2293 OverlapAggr.initializeSampleProfileOverlap(); 2294 if (OverlapAggr.detectZeroSampleProfile(OS)) 2295 return; 2296 2297 OverlapAggr.computeSampleProfileOverlap(OS); 2298 2299 OverlapAggr.dumpProgramSummary(OS); 2300 OverlapAggr.dumpHotFuncAndBlockOverlap(OS); 2301 OverlapAggr.dumpFuncSimilarity(OS); 2302 } 2303 2304 static int overlap_main(int argc, const char *argv[]) { 2305 cl::opt<std::string> BaseFilename(cl::Positional, cl::Required, 2306 cl::desc("<base profile file>")); 2307 cl::opt<std::string> TestFilename(cl::Positional, cl::Required, 2308 cl::desc("<test profile file>")); 2309 cl::opt<std::string> Output("output", cl::value_desc("output"), cl::init("-"), 2310 cl::desc("Output file")); 2311 cl::alias OutputA("o", cl::desc("Alias for --output"), cl::aliasopt(Output)); 2312 cl::opt<bool> IsCS( 2313 "cs", cl::init(false), 2314 cl::desc("For context sensitive PGO counts. Does not work with CSSPGO.")); 2315 cl::opt<unsigned long long> ValueCutoff( 2316 "value-cutoff", cl::init(-1), 2317 cl::desc( 2318 "Function level overlap information for every function (with calling " 2319 "context for csspgo) in test " 2320 "profile with max count value greater then the parameter value")); 2321 cl::opt<std::string> FuncNameFilter( 2322 "function", 2323 cl::desc("Function level overlap information for matching functions. For " 2324 "CSSPGO this takes a a function name with calling context")); 2325 cl::opt<unsigned long long> SimilarityCutoff( 2326 "similarity-cutoff", cl::init(0), 2327 cl::desc("For sample profiles, list function names (with calling context " 2328 "for csspgo) for overlapped functions " 2329 "with similarities below the cutoff (percentage times 10000).")); 2330 cl::opt<ProfileKinds> ProfileKind( 2331 cl::desc("Profile kind:"), cl::init(instr), 2332 cl::values(clEnumVal(instr, "Instrumentation profile (default)"), 2333 clEnumVal(sample, "Sample profile"))); 2334 cl::ParseCommandLineOptions(argc, argv, "LLVM profile data overlap tool\n"); 2335 2336 std::error_code EC; 2337 raw_fd_ostream OS(Output.data(), EC, sys::fs::OF_TextWithCRLF); 2338 if (EC) 2339 exitWithErrorCode(EC, Output); 2340 2341 if (ProfileKind == instr) 2342 overlapInstrProfile(BaseFilename, TestFilename, 2343 OverlapFuncFilters{ValueCutoff, FuncNameFilter}, OS, 2344 IsCS); 2345 else 2346 overlapSampleProfile(BaseFilename, TestFilename, 2347 OverlapFuncFilters{ValueCutoff, FuncNameFilter}, 2348 SimilarityCutoff, OS); 2349 2350 return 0; 2351 } 2352 2353 namespace { 2354 struct ValueSitesStats { 2355 ValueSitesStats() 2356 : TotalNumValueSites(0), TotalNumValueSitesWithValueProfile(0), 2357 TotalNumValues(0) {} 2358 uint64_t TotalNumValueSites; 2359 uint64_t TotalNumValueSitesWithValueProfile; 2360 uint64_t TotalNumValues; 2361 std::vector<unsigned> ValueSitesHistogram; 2362 }; 2363 } // namespace 2364 2365 static void traverseAllValueSites(const InstrProfRecord &Func, uint32_t VK, 2366 ValueSitesStats &Stats, raw_fd_ostream &OS, 2367 InstrProfSymtab *Symtab) { 2368 uint32_t NS = Func.getNumValueSites(VK); 2369 Stats.TotalNumValueSites += NS; 2370 for (size_t I = 0; I < NS; ++I) { 2371 uint32_t NV = Func.getNumValueDataForSite(VK, I); 2372 std::unique_ptr<InstrProfValueData[]> VD = Func.getValueForSite(VK, I); 2373 Stats.TotalNumValues += NV; 2374 if (NV) { 2375 Stats.TotalNumValueSitesWithValueProfile++; 2376 if (NV > Stats.ValueSitesHistogram.size()) 2377 Stats.ValueSitesHistogram.resize(NV, 0); 2378 Stats.ValueSitesHistogram[NV - 1]++; 2379 } 2380 2381 uint64_t SiteSum = 0; 2382 for (uint32_t V = 0; V < NV; V++) 2383 SiteSum += VD[V].Count; 2384 if (SiteSum == 0) 2385 SiteSum = 1; 2386 2387 for (uint32_t V = 0; V < NV; V++) { 2388 OS << "\t[ " << format("%2u", I) << ", "; 2389 if (Symtab == nullptr) 2390 OS << format("%4" PRIu64, VD[V].Value); 2391 else 2392 OS << Symtab->getFuncName(VD[V].Value); 2393 OS << ", " << format("%10" PRId64, VD[V].Count) << " ] (" 2394 << format("%.2f%%", (VD[V].Count * 100.0 / SiteSum)) << ")\n"; 2395 } 2396 } 2397 } 2398 2399 static void showValueSitesStats(raw_fd_ostream &OS, uint32_t VK, 2400 ValueSitesStats &Stats) { 2401 OS << " Total number of sites: " << Stats.TotalNumValueSites << "\n"; 2402 OS << " Total number of sites with values: " 2403 << Stats.TotalNumValueSitesWithValueProfile << "\n"; 2404 OS << " Total number of profiled values: " << Stats.TotalNumValues << "\n"; 2405 2406 OS << " Value sites histogram:\n\tNumTargets, SiteCount\n"; 2407 for (unsigned I = 0; I < Stats.ValueSitesHistogram.size(); I++) { 2408 if (Stats.ValueSitesHistogram[I] > 0) 2409 OS << "\t" << I + 1 << ", " << Stats.ValueSitesHistogram[I] << "\n"; 2410 } 2411 } 2412 2413 static int showInstrProfile( 2414 const std::string &Filename, bool ShowCounts, uint32_t TopN, 2415 bool ShowIndirectCallTargets, bool ShowMemOPSizes, bool ShowDetailedSummary, 2416 std::vector<uint32_t> DetailedSummaryCutoffs, bool ShowAllFunctions, 2417 bool ShowCS, uint64_t ValueCutoff, bool OnlyListBelow, 2418 const std::string &ShowFunction, bool TextFormat, bool ShowBinaryIds, 2419 bool ShowCovered, bool ShowProfileVersion, bool ShowTemporalProfTraces, 2420 ShowFormat SFormat, raw_fd_ostream &OS) { 2421 if (SFormat == ShowFormat::Json) 2422 exitWithError("JSON output is not supported for instr profiles"); 2423 if (SFormat == ShowFormat::Yaml) 2424 exitWithError("YAML output is not supported for instr profiles"); 2425 auto FS = vfs::getRealFileSystem(); 2426 auto ReaderOrErr = InstrProfReader::create(Filename, *FS); 2427 std::vector<uint32_t> Cutoffs = std::move(DetailedSummaryCutoffs); 2428 if (ShowDetailedSummary && Cutoffs.empty()) { 2429 Cutoffs = ProfileSummaryBuilder::DefaultCutoffs; 2430 } 2431 InstrProfSummaryBuilder Builder(std::move(Cutoffs)); 2432 if (Error E = ReaderOrErr.takeError()) 2433 exitWithError(std::move(E), Filename); 2434 2435 auto Reader = std::move(ReaderOrErr.get()); 2436 bool IsIRInstr = Reader->isIRLevelProfile(); 2437 size_t ShownFunctions = 0; 2438 size_t BelowCutoffFunctions = 0; 2439 int NumVPKind = IPVK_Last - IPVK_First + 1; 2440 std::vector<ValueSitesStats> VPStats(NumVPKind); 2441 2442 auto MinCmp = [](const std::pair<std::string, uint64_t> &v1, 2443 const std::pair<std::string, uint64_t> &v2) { 2444 return v1.second > v2.second; 2445 }; 2446 2447 std::priority_queue<std::pair<std::string, uint64_t>, 2448 std::vector<std::pair<std::string, uint64_t>>, 2449 decltype(MinCmp)> 2450 HottestFuncs(MinCmp); 2451 2452 if (!TextFormat && OnlyListBelow) { 2453 OS << "The list of functions with the maximum counter less than " 2454 << ValueCutoff << ":\n"; 2455 } 2456 2457 // Add marker so that IR-level instrumentation round-trips properly. 2458 if (TextFormat && IsIRInstr) 2459 OS << ":ir\n"; 2460 2461 for (const auto &Func : *Reader) { 2462 if (Reader->isIRLevelProfile()) { 2463 bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(Func.Hash); 2464 if (FuncIsCS != ShowCS) 2465 continue; 2466 } 2467 bool Show = ShowAllFunctions || 2468 (!ShowFunction.empty() && Func.Name.contains(ShowFunction)); 2469 2470 bool doTextFormatDump = (Show && TextFormat); 2471 2472 if (doTextFormatDump) { 2473 InstrProfSymtab &Symtab = Reader->getSymtab(); 2474 InstrProfWriter::writeRecordInText(Func.Name, Func.Hash, Func, Symtab, 2475 OS); 2476 continue; 2477 } 2478 2479 assert(Func.Counts.size() > 0 && "function missing entry counter"); 2480 Builder.addRecord(Func); 2481 2482 if (ShowCovered) { 2483 if (llvm::any_of(Func.Counts, [](uint64_t C) { return C; })) 2484 OS << Func.Name << "\n"; 2485 continue; 2486 } 2487 2488 uint64_t FuncMax = 0; 2489 uint64_t FuncSum = 0; 2490 2491 auto PseudoKind = Func.getCountPseudoKind(); 2492 if (PseudoKind != InstrProfRecord::NotPseudo) { 2493 if (Show) { 2494 if (!ShownFunctions) 2495 OS << "Counters:\n"; 2496 ++ShownFunctions; 2497 OS << " " << Func.Name << ":\n" 2498 << " Hash: " << format("0x%016" PRIx64, Func.Hash) << "\n" 2499 << " Counters: " << Func.Counts.size(); 2500 if (PseudoKind == InstrProfRecord::PseudoHot) 2501 OS << " <PseudoHot>\n"; 2502 else if (PseudoKind == InstrProfRecord::PseudoWarm) 2503 OS << " <PseudoWarm>\n"; 2504 else 2505 llvm_unreachable("Unknown PseudoKind"); 2506 } 2507 continue; 2508 } 2509 2510 for (size_t I = 0, E = Func.Counts.size(); I < E; ++I) { 2511 FuncMax = std::max(FuncMax, Func.Counts[I]); 2512 FuncSum += Func.Counts[I]; 2513 } 2514 2515 if (FuncMax < ValueCutoff) { 2516 ++BelowCutoffFunctions; 2517 if (OnlyListBelow) { 2518 OS << " " << Func.Name << ": (Max = " << FuncMax 2519 << " Sum = " << FuncSum << ")\n"; 2520 } 2521 continue; 2522 } else if (OnlyListBelow) 2523 continue; 2524 2525 if (TopN) { 2526 if (HottestFuncs.size() == TopN) { 2527 if (HottestFuncs.top().second < FuncMax) { 2528 HottestFuncs.pop(); 2529 HottestFuncs.emplace(std::make_pair(std::string(Func.Name), FuncMax)); 2530 } 2531 } else 2532 HottestFuncs.emplace(std::make_pair(std::string(Func.Name), FuncMax)); 2533 } 2534 2535 if (Show) { 2536 if (!ShownFunctions) 2537 OS << "Counters:\n"; 2538 2539 ++ShownFunctions; 2540 2541 OS << " " << Func.Name << ":\n" 2542 << " Hash: " << format("0x%016" PRIx64, Func.Hash) << "\n" 2543 << " Counters: " << Func.Counts.size() << "\n"; 2544 if (!IsIRInstr) 2545 OS << " Function count: " << Func.Counts[0] << "\n"; 2546 2547 if (ShowIndirectCallTargets) 2548 OS << " Indirect Call Site Count: " 2549 << Func.getNumValueSites(IPVK_IndirectCallTarget) << "\n"; 2550 2551 uint32_t NumMemOPCalls = Func.getNumValueSites(IPVK_MemOPSize); 2552 if (ShowMemOPSizes && NumMemOPCalls > 0) 2553 OS << " Number of Memory Intrinsics Calls: " << NumMemOPCalls 2554 << "\n"; 2555 2556 if (ShowCounts) { 2557 OS << " Block counts: ["; 2558 size_t Start = (IsIRInstr ? 0 : 1); 2559 for (size_t I = Start, E = Func.Counts.size(); I < E; ++I) { 2560 OS << (I == Start ? "" : ", ") << Func.Counts[I]; 2561 } 2562 OS << "]\n"; 2563 } 2564 2565 if (ShowIndirectCallTargets) { 2566 OS << " Indirect Target Results:\n"; 2567 traverseAllValueSites(Func, IPVK_IndirectCallTarget, 2568 VPStats[IPVK_IndirectCallTarget], OS, 2569 &(Reader->getSymtab())); 2570 } 2571 2572 if (ShowMemOPSizes && NumMemOPCalls > 0) { 2573 OS << " Memory Intrinsic Size Results:\n"; 2574 traverseAllValueSites(Func, IPVK_MemOPSize, VPStats[IPVK_MemOPSize], OS, 2575 nullptr); 2576 } 2577 } 2578 } 2579 if (Reader->hasError()) 2580 exitWithError(Reader->getError(), Filename); 2581 2582 if (TextFormat || ShowCovered) 2583 return 0; 2584 std::unique_ptr<ProfileSummary> PS(Builder.getSummary()); 2585 bool IsIR = Reader->isIRLevelProfile(); 2586 OS << "Instrumentation level: " << (IsIR ? "IR" : "Front-end"); 2587 if (IsIR) 2588 OS << " entry_first = " << Reader->instrEntryBBEnabled(); 2589 OS << "\n"; 2590 if (ShowAllFunctions || !ShowFunction.empty()) 2591 OS << "Functions shown: " << ShownFunctions << "\n"; 2592 OS << "Total functions: " << PS->getNumFunctions() << "\n"; 2593 if (ValueCutoff > 0) { 2594 OS << "Number of functions with maximum count (< " << ValueCutoff 2595 << "): " << BelowCutoffFunctions << "\n"; 2596 OS << "Number of functions with maximum count (>= " << ValueCutoff 2597 << "): " << PS->getNumFunctions() - BelowCutoffFunctions << "\n"; 2598 } 2599 OS << "Maximum function count: " << PS->getMaxFunctionCount() << "\n"; 2600 OS << "Maximum internal block count: " << PS->getMaxInternalCount() << "\n"; 2601 2602 if (TopN) { 2603 std::vector<std::pair<std::string, uint64_t>> SortedHottestFuncs; 2604 while (!HottestFuncs.empty()) { 2605 SortedHottestFuncs.emplace_back(HottestFuncs.top()); 2606 HottestFuncs.pop(); 2607 } 2608 OS << "Top " << TopN 2609 << " functions with the largest internal block counts: \n"; 2610 for (auto &hotfunc : llvm::reverse(SortedHottestFuncs)) 2611 OS << " " << hotfunc.first << ", max count = " << hotfunc.second << "\n"; 2612 } 2613 2614 if (ShownFunctions && ShowIndirectCallTargets) { 2615 OS << "Statistics for indirect call sites profile:\n"; 2616 showValueSitesStats(OS, IPVK_IndirectCallTarget, 2617 VPStats[IPVK_IndirectCallTarget]); 2618 } 2619 2620 if (ShownFunctions && ShowMemOPSizes) { 2621 OS << "Statistics for memory intrinsic calls sizes profile:\n"; 2622 showValueSitesStats(OS, IPVK_MemOPSize, VPStats[IPVK_MemOPSize]); 2623 } 2624 2625 if (ShowDetailedSummary) { 2626 OS << "Total number of blocks: " << PS->getNumCounts() << "\n"; 2627 OS << "Total count: " << PS->getTotalCount() << "\n"; 2628 PS->printDetailedSummary(OS); 2629 } 2630 2631 if (ShowBinaryIds) 2632 if (Error E = Reader->printBinaryIds(OS)) 2633 exitWithError(std::move(E), Filename); 2634 2635 if (ShowProfileVersion) 2636 OS << "Profile version: " << Reader->getVersion() << "\n"; 2637 2638 if (ShowTemporalProfTraces) { 2639 auto &Traces = Reader->getTemporalProfTraces(); 2640 OS << "Temporal Profile Traces (samples=" << Traces.size() 2641 << " seen=" << Reader->getTemporalProfTraceStreamSize() << "):\n"; 2642 for (unsigned i = 0; i < Traces.size(); i++) { 2643 OS << " Temporal Profile Trace " << i << " (weight=" << Traces[i].Weight 2644 << " count=" << Traces[i].FunctionNameRefs.size() << "):\n"; 2645 for (auto &NameRef : Traces[i].FunctionNameRefs) 2646 OS << " " << Reader->getSymtab().getFuncName(NameRef) << "\n"; 2647 } 2648 } 2649 2650 return 0; 2651 } 2652 2653 static void showSectionInfo(sampleprof::SampleProfileReader *Reader, 2654 raw_fd_ostream &OS) { 2655 if (!Reader->dumpSectionInfo(OS)) { 2656 WithColor::warning() << "-show-sec-info-only is only supported for " 2657 << "sample profile in extbinary format and is " 2658 << "ignored for other formats.\n"; 2659 return; 2660 } 2661 } 2662 2663 namespace { 2664 struct HotFuncInfo { 2665 std::string FuncName; 2666 uint64_t TotalCount; 2667 double TotalCountPercent; 2668 uint64_t MaxCount; 2669 uint64_t EntryCount; 2670 2671 HotFuncInfo() 2672 : TotalCount(0), TotalCountPercent(0.0f), MaxCount(0), EntryCount(0) {} 2673 2674 HotFuncInfo(StringRef FN, uint64_t TS, double TSP, uint64_t MS, uint64_t ES) 2675 : FuncName(FN.begin(), FN.end()), TotalCount(TS), TotalCountPercent(TSP), 2676 MaxCount(MS), EntryCount(ES) {} 2677 }; 2678 } // namespace 2679 2680 // Print out detailed information about hot functions in PrintValues vector. 2681 // Users specify titles and offset of every columns through ColumnTitle and 2682 // ColumnOffset. The size of ColumnTitle and ColumnOffset need to be the same 2683 // and at least 4. Besides, users can optionally give a HotFuncMetric string to 2684 // print out or let it be an empty string. 2685 static void dumpHotFunctionList(const std::vector<std::string> &ColumnTitle, 2686 const std::vector<int> &ColumnOffset, 2687 const std::vector<HotFuncInfo> &PrintValues, 2688 uint64_t HotFuncCount, uint64_t TotalFuncCount, 2689 uint64_t HotProfCount, uint64_t TotalProfCount, 2690 const std::string &HotFuncMetric, 2691 uint32_t TopNFunctions, raw_fd_ostream &OS) { 2692 assert(ColumnOffset.size() == ColumnTitle.size() && 2693 "ColumnOffset and ColumnTitle should have the same size"); 2694 assert(ColumnTitle.size() >= 4 && 2695 "ColumnTitle should have at least 4 elements"); 2696 assert(TotalFuncCount > 0 && 2697 "There should be at least one function in the profile"); 2698 double TotalProfPercent = 0; 2699 if (TotalProfCount > 0) 2700 TotalProfPercent = static_cast<double>(HotProfCount) / TotalProfCount * 100; 2701 2702 formatted_raw_ostream FOS(OS); 2703 FOS << HotFuncCount << " out of " << TotalFuncCount 2704 << " functions with profile (" 2705 << format("%.2f%%", 2706 (static_cast<double>(HotFuncCount) / TotalFuncCount * 100)) 2707 << ") are considered hot functions"; 2708 if (!HotFuncMetric.empty()) 2709 FOS << " (" << HotFuncMetric << ")"; 2710 FOS << ".\n"; 2711 FOS << HotProfCount << " out of " << TotalProfCount << " profile counts (" 2712 << format("%.2f%%", TotalProfPercent) << ") are from hot functions.\n"; 2713 2714 for (size_t I = 0; I < ColumnTitle.size(); ++I) { 2715 FOS.PadToColumn(ColumnOffset[I]); 2716 FOS << ColumnTitle[I]; 2717 } 2718 FOS << "\n"; 2719 2720 uint32_t Count = 0; 2721 for (const auto &R : PrintValues) { 2722 if (TopNFunctions && (Count++ == TopNFunctions)) 2723 break; 2724 FOS.PadToColumn(ColumnOffset[0]); 2725 FOS << R.TotalCount << " (" << format("%.2f%%", R.TotalCountPercent) << ")"; 2726 FOS.PadToColumn(ColumnOffset[1]); 2727 FOS << R.MaxCount; 2728 FOS.PadToColumn(ColumnOffset[2]); 2729 FOS << R.EntryCount; 2730 FOS.PadToColumn(ColumnOffset[3]); 2731 FOS << R.FuncName << "\n"; 2732 } 2733 } 2734 2735 static int showHotFunctionList(const sampleprof::SampleProfileMap &Profiles, 2736 ProfileSummary &PS, uint32_t TopN, 2737 raw_fd_ostream &OS) { 2738 using namespace sampleprof; 2739 2740 const uint32_t HotFuncCutoff = 990000; 2741 auto &SummaryVector = PS.getDetailedSummary(); 2742 uint64_t MinCountThreshold = 0; 2743 for (const ProfileSummaryEntry &SummaryEntry : SummaryVector) { 2744 if (SummaryEntry.Cutoff == HotFuncCutoff) { 2745 MinCountThreshold = SummaryEntry.MinCount; 2746 break; 2747 } 2748 } 2749 2750 // Traverse all functions in the profile and keep only hot functions. 2751 // The following loop also calculates the sum of total samples of all 2752 // functions. 2753 std::multimap<uint64_t, std::pair<const FunctionSamples *, const uint64_t>, 2754 std::greater<uint64_t>> 2755 HotFunc; 2756 uint64_t ProfileTotalSample = 0; 2757 uint64_t HotFuncSample = 0; 2758 uint64_t HotFuncCount = 0; 2759 2760 for (const auto &I : Profiles) { 2761 FuncSampleStats FuncStats; 2762 const FunctionSamples &FuncProf = I.second; 2763 ProfileTotalSample += FuncProf.getTotalSamples(); 2764 getFuncSampleStats(FuncProf, FuncStats, MinCountThreshold); 2765 2766 if (isFunctionHot(FuncStats, MinCountThreshold)) { 2767 HotFunc.emplace(FuncProf.getTotalSamples(), 2768 std::make_pair(&(I.second), FuncStats.MaxSample)); 2769 HotFuncSample += FuncProf.getTotalSamples(); 2770 ++HotFuncCount; 2771 } 2772 } 2773 2774 std::vector<std::string> ColumnTitle{"Total sample (%)", "Max sample", 2775 "Entry sample", "Function name"}; 2776 std::vector<int> ColumnOffset{0, 24, 42, 58}; 2777 std::string Metric = 2778 std::string("max sample >= ") + std::to_string(MinCountThreshold); 2779 std::vector<HotFuncInfo> PrintValues; 2780 for (const auto &FuncPair : HotFunc) { 2781 const FunctionSamples &Func = *FuncPair.second.first; 2782 double TotalSamplePercent = 2783 (ProfileTotalSample > 0) 2784 ? (Func.getTotalSamples() * 100.0) / ProfileTotalSample 2785 : 0; 2786 PrintValues.emplace_back( 2787 HotFuncInfo(Func.getContext().toString(), Func.getTotalSamples(), 2788 TotalSamplePercent, FuncPair.second.second, 2789 Func.getHeadSamplesEstimate())); 2790 } 2791 dumpHotFunctionList(ColumnTitle, ColumnOffset, PrintValues, HotFuncCount, 2792 Profiles.size(), HotFuncSample, ProfileTotalSample, 2793 Metric, TopN, OS); 2794 2795 return 0; 2796 } 2797 2798 static int showSampleProfile(const std::string &Filename, bool ShowCounts, 2799 uint32_t TopN, bool ShowAllFunctions, 2800 bool ShowDetailedSummary, 2801 const std::string &ShowFunction, 2802 bool ShowProfileSymbolList, 2803 bool ShowSectionInfoOnly, bool ShowHotFuncList, 2804 ShowFormat SFormat, raw_fd_ostream &OS) { 2805 if (SFormat == ShowFormat::Yaml) 2806 exitWithError("YAML output is not supported for sample profiles"); 2807 using namespace sampleprof; 2808 LLVMContext Context; 2809 auto FS = vfs::getRealFileSystem(); 2810 auto ReaderOrErr = SampleProfileReader::create(Filename, Context, *FS, 2811 FSDiscriminatorPassOption); 2812 if (std::error_code EC = ReaderOrErr.getError()) 2813 exitWithErrorCode(EC, Filename); 2814 2815 auto Reader = std::move(ReaderOrErr.get()); 2816 if (ShowSectionInfoOnly) { 2817 showSectionInfo(Reader.get(), OS); 2818 return 0; 2819 } 2820 2821 if (std::error_code EC = Reader->read()) 2822 exitWithErrorCode(EC, Filename); 2823 2824 if (ShowAllFunctions || ShowFunction.empty()) { 2825 if (SFormat == ShowFormat::Json) 2826 Reader->dumpJson(OS); 2827 else 2828 Reader->dump(OS); 2829 } else { 2830 if (SFormat == ShowFormat::Json) 2831 exitWithError( 2832 "the JSON format is supported only when all functions are to " 2833 "be printed"); 2834 2835 // TODO: parse context string to support filtering by contexts. 2836 Reader->dumpFunctionProfile(StringRef(ShowFunction), OS); 2837 } 2838 2839 if (ShowProfileSymbolList) { 2840 std::unique_ptr<sampleprof::ProfileSymbolList> ReaderList = 2841 Reader->getProfileSymbolList(); 2842 ReaderList->dump(OS); 2843 } 2844 2845 if (ShowDetailedSummary) { 2846 auto &PS = Reader->getSummary(); 2847 PS.printSummary(OS); 2848 PS.printDetailedSummary(OS); 2849 } 2850 2851 if (ShowHotFuncList || TopN) 2852 showHotFunctionList(Reader->getProfiles(), Reader->getSummary(), TopN, OS); 2853 2854 return 0; 2855 } 2856 2857 static int showMemProfProfile(const std::string &Filename, 2858 const std::string &ProfiledBinary, 2859 ShowFormat SFormat, raw_fd_ostream &OS) { 2860 if (SFormat == ShowFormat::Json) 2861 exitWithError("JSON output is not supported for MemProf"); 2862 auto ReaderOr = llvm::memprof::RawMemProfReader::create( 2863 Filename, ProfiledBinary, /*KeepNames=*/true); 2864 if (Error E = ReaderOr.takeError()) 2865 // Since the error can be related to the profile or the binary we do not 2866 // pass whence. Instead additional context is provided where necessary in 2867 // the error message. 2868 exitWithError(std::move(E), /*Whence*/ ""); 2869 2870 std::unique_ptr<llvm::memprof::RawMemProfReader> Reader( 2871 ReaderOr.get().release()); 2872 2873 Reader->printYAML(OS); 2874 return 0; 2875 } 2876 2877 static int showDebugInfoCorrelation(const std::string &Filename, 2878 bool ShowDetailedSummary, 2879 bool ShowProfileSymbolList, 2880 ShowFormat SFormat, raw_fd_ostream &OS) { 2881 if (SFormat == ShowFormat::Json) 2882 exitWithError("JSON output is not supported for debug info correlation"); 2883 std::unique_ptr<InstrProfCorrelator> Correlator; 2884 if (auto Err = InstrProfCorrelator::get(Filename).moveInto(Correlator)) 2885 exitWithError(std::move(Err), Filename); 2886 if (SFormat == ShowFormat::Yaml) { 2887 if (auto Err = Correlator->dumpYaml(OS)) 2888 exitWithError(std::move(Err), Filename); 2889 return 0; 2890 } 2891 2892 if (auto Err = Correlator->correlateProfileData()) 2893 exitWithError(std::move(Err), Filename); 2894 2895 InstrProfSymtab Symtab; 2896 if (auto Err = Symtab.create( 2897 StringRef(Correlator->getNamesPointer(), Correlator->getNamesSize()))) 2898 exitWithError(std::move(Err), Filename); 2899 2900 if (ShowProfileSymbolList) 2901 Symtab.dumpNames(OS); 2902 // TODO: Read "Profile Data Type" from debug info to compute and show how many 2903 // counters the section holds. 2904 if (ShowDetailedSummary) 2905 OS << "Counters section size: 0x" 2906 << Twine::utohexstr(Correlator->getCountersSectionSize()) << " bytes\n"; 2907 OS << "Found " << Correlator->getDataSize() << " functions\n"; 2908 2909 return 0; 2910 } 2911 2912 static int show_main(int argc, const char *argv[]) { 2913 cl::opt<std::string> Filename(cl::Positional, cl::desc("<profdata-file>")); 2914 2915 cl::opt<bool> ShowCounts("counts", cl::init(false), 2916 cl::desc("Show counter values for shown functions")); 2917 cl::opt<ShowFormat> SFormat( 2918 "show-format", cl::init(ShowFormat::Text), 2919 cl::desc("Emit output in the selected format if supported"), 2920 cl::values(clEnumValN(ShowFormat::Text, "text", 2921 "emit normal text output (default)"), 2922 clEnumValN(ShowFormat::Json, "json", "emit JSON"), 2923 clEnumValN(ShowFormat::Yaml, "yaml", "emit YAML"))); 2924 // TODO: Consider replacing this with `--show-format=text-encoding`. 2925 cl::opt<bool> TextFormat( 2926 "text", cl::init(false), 2927 cl::desc("Show instr profile data in text dump format")); 2928 cl::opt<bool> JsonFormat( 2929 "json", cl::desc("Show sample profile data in the JSON format " 2930 "(deprecated, please use --show-format=json)")); 2931 cl::opt<bool> ShowIndirectCallTargets( 2932 "ic-targets", cl::init(false), 2933 cl::desc("Show indirect call site target values for shown functions")); 2934 cl::opt<bool> ShowMemOPSizes( 2935 "memop-sizes", cl::init(false), 2936 cl::desc("Show the profiled sizes of the memory intrinsic calls " 2937 "for shown functions")); 2938 cl::opt<bool> ShowDetailedSummary("detailed-summary", cl::init(false), 2939 cl::desc("Show detailed profile summary")); 2940 cl::list<uint32_t> DetailedSummaryCutoffs( 2941 cl::CommaSeparated, "detailed-summary-cutoffs", 2942 cl::desc( 2943 "Cutoff percentages (times 10000) for generating detailed summary"), 2944 cl::value_desc("800000,901000,999999")); 2945 cl::opt<bool> ShowHotFuncList( 2946 "hot-func-list", cl::init(false), 2947 cl::desc("Show profile summary of a list of hot functions")); 2948 cl::opt<bool> ShowAllFunctions("all-functions", cl::init(false), 2949 cl::desc("Details for every function")); 2950 cl::opt<bool> ShowCS("showcs", cl::init(false), 2951 cl::desc("Show context sensitive counts")); 2952 cl::opt<std::string> ShowFunction("function", 2953 cl::desc("Details for matching functions")); 2954 2955 cl::opt<std::string> OutputFilename("output", cl::value_desc("output"), 2956 cl::init("-"), cl::desc("Output file")); 2957 cl::alias OutputFilenameA("o", cl::desc("Alias for --output"), 2958 cl::aliasopt(OutputFilename)); 2959 cl::opt<ProfileKinds> ProfileKind( 2960 cl::desc("Profile kind:"), cl::init(instr), 2961 cl::values(clEnumVal(instr, "Instrumentation profile (default)"), 2962 clEnumVal(sample, "Sample profile"), 2963 clEnumVal(memory, "MemProf memory access profile"))); 2964 cl::opt<uint32_t> TopNFunctions( 2965 "topn", cl::init(0), 2966 cl::desc("Show the list of functions with the largest internal counts")); 2967 cl::opt<uint32_t> ValueCutoff( 2968 "value-cutoff", cl::init(0), 2969 cl::desc("Set the count value cutoff. Functions with the maximum count " 2970 "less than this value will not be printed out. (Default is 0)")); 2971 cl::opt<bool> OnlyListBelow( 2972 "list-below-cutoff", cl::init(false), 2973 cl::desc("Only output names of functions whose max count values are " 2974 "below the cutoff value")); 2975 cl::opt<bool> ShowProfileSymbolList( 2976 "show-prof-sym-list", cl::init(false), 2977 cl::desc("Show profile symbol list if it exists in the profile. ")); 2978 cl::opt<bool> ShowSectionInfoOnly( 2979 "show-sec-info-only", cl::init(false), 2980 cl::desc("Show the information of each section in the sample profile. " 2981 "The flag is only usable when the sample profile is in " 2982 "extbinary format")); 2983 cl::opt<bool> ShowBinaryIds("binary-ids", cl::init(false), 2984 cl::desc("Show binary ids in the profile. ")); 2985 cl::opt<bool> ShowTemporalProfTraces( 2986 "temporal-profile-traces", 2987 cl::desc("Show temporal profile traces in the profile.")); 2988 cl::opt<std::string> DebugInfoFilename( 2989 "debug-info", cl::init(""), 2990 cl::desc("Read and extract profile metadata from debug info and show " 2991 "the functions it found.")); 2992 cl::opt<bool> ShowCovered( 2993 "covered", cl::init(false), 2994 cl::desc("Show only the functions that have been executed.")); 2995 cl::opt<std::string> ProfiledBinary( 2996 "profiled-binary", cl::init(""), 2997 cl::desc("Path to binary from which the profile was collected.")); 2998 cl::opt<bool> ShowProfileVersion("profile-version", cl::init(false), 2999 cl::desc("Show profile version. ")); 3000 cl::ParseCommandLineOptions(argc, argv, "LLVM profile data summary\n"); 3001 3002 if (Filename.empty() && DebugInfoFilename.empty()) 3003 exitWithError( 3004 "the positional argument '<profdata-file>' is required unless '--" + 3005 DebugInfoFilename.ArgStr + "' is provided"); 3006 3007 if (Filename == OutputFilename) { 3008 errs() << sys::path::filename(argv[0]) 3009 << ": Input file name cannot be the same as the output file name!\n"; 3010 return 1; 3011 } 3012 if (JsonFormat) 3013 SFormat = ShowFormat::Json; 3014 3015 std::error_code EC; 3016 raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF); 3017 if (EC) 3018 exitWithErrorCode(EC, OutputFilename); 3019 3020 if (ShowAllFunctions && !ShowFunction.empty()) 3021 WithColor::warning() << "-function argument ignored: showing all functions\n"; 3022 3023 if (!DebugInfoFilename.empty()) 3024 return showDebugInfoCorrelation(DebugInfoFilename, ShowDetailedSummary, 3025 ShowProfileSymbolList, SFormat, OS); 3026 3027 if (ProfileKind == instr) 3028 return showInstrProfile( 3029 Filename, ShowCounts, TopNFunctions, ShowIndirectCallTargets, 3030 ShowMemOPSizes, ShowDetailedSummary, DetailedSummaryCutoffs, 3031 ShowAllFunctions, ShowCS, ValueCutoff, OnlyListBelow, ShowFunction, 3032 TextFormat, ShowBinaryIds, ShowCovered, ShowProfileVersion, 3033 ShowTemporalProfTraces, SFormat, OS); 3034 if (ProfileKind == sample) 3035 return showSampleProfile(Filename, ShowCounts, TopNFunctions, 3036 ShowAllFunctions, ShowDetailedSummary, 3037 ShowFunction, ShowProfileSymbolList, 3038 ShowSectionInfoOnly, ShowHotFuncList, SFormat, OS); 3039 return showMemProfProfile(Filename, ProfiledBinary, SFormat, OS); 3040 } 3041 3042 static int order_main(int argc, const char *argv[]) { 3043 cl::opt<std::string> Filename(cl::Positional, cl::desc("<profdata-file>")); 3044 cl::opt<std::string> OutputFilename("output", cl::value_desc("output"), 3045 cl::init("-"), cl::desc("Output file")); 3046 cl::alias OutputFilenameA("o", cl::desc("Alias for --output"), 3047 cl::aliasopt(OutputFilename)); 3048 cl::ParseCommandLineOptions(argc, argv, "LLVM profile data order\n"); 3049 3050 std::error_code EC; 3051 raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF); 3052 if (EC) 3053 exitWithErrorCode(EC, OutputFilename); 3054 auto FS = vfs::getRealFileSystem(); 3055 auto ReaderOrErr = InstrProfReader::create(Filename, *FS); 3056 if (Error E = ReaderOrErr.takeError()) 3057 exitWithError(std::move(E), Filename); 3058 3059 auto Reader = std::move(ReaderOrErr.get()); 3060 for (auto &I : *Reader) { 3061 // Read all entries 3062 (void)I; 3063 } 3064 auto &Traces = Reader->getTemporalProfTraces(); 3065 auto Nodes = TemporalProfTraceTy::createBPFunctionNodes(Traces); 3066 BalancedPartitioningConfig Config; 3067 BalancedPartitioning BP(Config); 3068 BP.run(Nodes); 3069 3070 WithColor::note() << "# Ordered " << Nodes.size() << " functions\n"; 3071 for (auto &N : Nodes) { 3072 auto FuncName = Reader->getSymtab().getFuncName(N.Id); 3073 if (FuncName.contains(':')) { 3074 // GlobalValue::getGlobalIdentifier() prefixes the filename if the symbol 3075 // is local. This logic will break if there is a colon in the filename, 3076 // but we cannot use rsplit() because ObjC symbols can have colons. 3077 auto [Filename, ParsedFuncName] = FuncName.split(':'); 3078 // Emit a comment describing where this symbol came from 3079 OS << "# " << Filename << "\n"; 3080 FuncName = ParsedFuncName; 3081 } 3082 OS << FuncName << "\n"; 3083 } 3084 return 0; 3085 } 3086 3087 typedef int (*llvm_profdata_subcommand)(int, const char *[]); 3088 3089 static std::tuple<StringRef, llvm_profdata_subcommand> 3090 llvm_profdata_subcommands[] = { 3091 {"merge", merge_main}, 3092 {"show", show_main}, 3093 {"order", order_main}, 3094 {"overlap", overlap_main}, 3095 }; 3096 3097 int llvm_profdata_main(int argc, char **argvNonConst, 3098 const llvm::ToolContext &) { 3099 const char **argv = const_cast<const char **>(argvNonConst); 3100 InitLLVM X(argc, argv); 3101 3102 StringRef ProgName(sys::path::filename(argv[0])); 3103 if (argc > 1) { 3104 3105 llvm_profdata_subcommand func = nullptr; 3106 for (auto [subcmd_name, subcmd_action] : llvm_profdata_subcommands) 3107 if (subcmd_name == argv[1]) 3108 func = subcmd_action; 3109 3110 if (func) { 3111 std::string Invocation(ProgName.str() + " " + argv[1]); 3112 argv[1] = Invocation.c_str(); 3113 return func(argc - 1, argv + 1); 3114 } 3115 3116 if (strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "-help") == 0 || 3117 strcmp(argv[1], "--help") == 0) { 3118 3119 errs() << "OVERVIEW: LLVM profile data tools\n\n" 3120 << "USAGE: " << ProgName << " <command> [args...]\n" 3121 << "USAGE: " << ProgName << " <command> -help\n\n" 3122 << "See each individual command --help for more details.\n" 3123 << "Available commands: " 3124 << join(map_range(llvm_profdata_subcommands, 3125 [](auto const &KV) { return std::get<0>(KV); }), 3126 ", ") 3127 << "\n"; 3128 return 0; 3129 } 3130 3131 if (strcmp(argv[1], "--version") == 0) { 3132 outs() << ProgName << '\n'; 3133 cl::PrintVersionMessage(); 3134 return 0; 3135 } 3136 } 3137 3138 if (argc < 2) 3139 errs() << ProgName << ": No command specified!\n"; 3140 else 3141 errs() << ProgName << ": Unknown command!\n"; 3142 3143 errs() << "USAGE: " << ProgName << " <" 3144 << join(map_range(llvm_profdata_subcommands, 3145 [](auto const &KV) { return std::get<0>(KV); }), 3146 "|") 3147 << "> [args...]\n"; 3148 return 1; 3149 } 3150