1 //===- RawMemProfReader.cpp - Instrumented memory profiling reader --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for reading MemProf profiling data. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include <algorithm> 14 #include <cstdint> 15 #include <memory> 16 #include <type_traits> 17 18 #include "llvm/ADT/ArrayRef.h" 19 #include "llvm/ADT/DenseMap.h" 20 #include "llvm/ADT/SetVector.h" 21 #include "llvm/ADT/SmallSet.h" 22 #include "llvm/ADT/SmallVector.h" 23 #include "llvm/ADT/StringExtras.h" 24 #include "llvm/ADT/Twine.h" 25 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 26 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" 27 #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h" 28 #include "llvm/Object/Binary.h" 29 #include "llvm/Object/BuildID.h" 30 #include "llvm/Object/ELFObjectFile.h" 31 #include "llvm/Object/ObjectFile.h" 32 #include "llvm/ProfileData/InstrProf.h" 33 #include "llvm/ProfileData/MemProf.h" 34 #include "llvm/ProfileData/MemProfData.inc" 35 #include "llvm/ProfileData/MemProfReader.h" 36 #include "llvm/ProfileData/SampleProf.h" 37 #include "llvm/Support/Debug.h" 38 #include "llvm/Support/Endian.h" 39 #include "llvm/Support/Error.h" 40 #include "llvm/Support/MemoryBuffer.h" 41 #include "llvm/Support/Path.h" 42 43 #define DEBUG_TYPE "memprof" 44 namespace llvm { 45 namespace memprof { 46 namespace { 47 template <class T = uint64_t> inline T alignedRead(const char *Ptr) { 48 static_assert(std::is_pod<T>::value, "Not a pod type."); 49 assert(reinterpret_cast<size_t>(Ptr) % sizeof(T) == 0 && "Unaligned Read"); 50 return *reinterpret_cast<const T *>(Ptr); 51 } 52 53 Error checkBuffer(const MemoryBuffer &Buffer) { 54 if (!RawMemProfReader::hasFormat(Buffer)) 55 return make_error<InstrProfError>(instrprof_error::bad_magic); 56 57 if (Buffer.getBufferSize() == 0) 58 return make_error<InstrProfError>(instrprof_error::empty_raw_profile); 59 60 if (Buffer.getBufferSize() < sizeof(Header)) { 61 return make_error<InstrProfError>(instrprof_error::truncated); 62 } 63 64 // The size of the buffer can be > header total size since we allow repeated 65 // serialization of memprof profiles to the same file. 66 uint64_t TotalSize = 0; 67 const char *Next = Buffer.getBufferStart(); 68 while (Next < Buffer.getBufferEnd()) { 69 const auto *H = reinterpret_cast<const Header *>(Next); 70 71 // Check if the version in header is among the supported versions. 72 bool IsSupported = false; 73 for (auto SupportedVersion : MEMPROF_RAW_SUPPORTED_VERSIONS) { 74 if (H->Version == SupportedVersion) 75 IsSupported = true; 76 } 77 if (!IsSupported) { 78 return make_error<InstrProfError>(instrprof_error::unsupported_version); 79 } 80 81 TotalSize += H->TotalSize; 82 Next += H->TotalSize; 83 } 84 85 if (Buffer.getBufferSize() != TotalSize) { 86 return make_error<InstrProfError>(instrprof_error::malformed); 87 } 88 return Error::success(); 89 } 90 91 llvm::SmallVector<SegmentEntry> readSegmentEntries(const char *Ptr) { 92 using namespace support; 93 94 const uint64_t NumItemsToRead = 95 endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 96 llvm::SmallVector<SegmentEntry> Items; 97 for (uint64_t I = 0; I < NumItemsToRead; I++) { 98 Items.push_back(*reinterpret_cast<const SegmentEntry *>( 99 Ptr + I * sizeof(SegmentEntry))); 100 } 101 return Items; 102 } 103 104 llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> 105 readMemInfoBlocksV3(const char *Ptr) { 106 using namespace support; 107 108 const uint64_t NumItemsToRead = 109 endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr); 110 111 llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> Items; 112 for (uint64_t I = 0; I < NumItemsToRead; I++) { 113 const uint64_t Id = 114 endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr); 115 116 // We cheat a bit here and remove the const from cast to set the 117 // Histogram Pointer to newly allocated buffer. We also cheat, since V3 and 118 // V4 do not have the same fields. V3 is missing AccessHistogramSize and 119 // AccessHistogram. This means we read "dirty" data in here, but it should 120 // not segfault, since there will be callstack data placed after this in the 121 // binary format. 122 MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr); 123 // Overwrite dirty data. 124 MIB.AccessHistogramSize = 0; 125 MIB.AccessHistogram = 0; 126 127 Items.push_back({Id, MIB}); 128 // Only increment by the size of MIB in V3. 129 Ptr += MEMPROF_V3_MIB_SIZE; 130 } 131 return Items; 132 } 133 134 llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> 135 readMemInfoBlocksV4(const char *Ptr) { 136 using namespace support; 137 138 const uint64_t NumItemsToRead = 139 endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr); 140 141 llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> Items; 142 for (uint64_t I = 0; I < NumItemsToRead; I++) { 143 const uint64_t Id = 144 endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr); 145 // We cheat a bit here and remove the const from cast to set the 146 // Histogram Pointer to newly allocated buffer. 147 MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr); 148 149 // Only increment by size of MIB since readNext implicitly increments. 150 Ptr += sizeof(MemInfoBlock); 151 152 if (MIB.AccessHistogramSize > 0) { 153 MIB.AccessHistogram = 154 (uintptr_t)malloc(MIB.AccessHistogramSize * sizeof(uint64_t)); 155 } 156 157 for (uint64_t J = 0; J < MIB.AccessHistogramSize; J++) { 158 ((uint64_t *)MIB.AccessHistogram)[J] = 159 endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr); 160 } 161 Items.push_back({Id, MIB}); 162 } 163 return Items; 164 } 165 166 CallStackMap readStackInfo(const char *Ptr) { 167 using namespace support; 168 169 const uint64_t NumItemsToRead = 170 endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 171 CallStackMap Items; 172 173 for (uint64_t I = 0; I < NumItemsToRead; I++) { 174 const uint64_t StackId = 175 endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 176 const uint64_t NumPCs = 177 endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 178 179 SmallVector<uint64_t> CallStack; 180 CallStack.reserve(NumPCs); 181 for (uint64_t J = 0; J < NumPCs; J++) { 182 CallStack.push_back( 183 endian::readNext<uint64_t, llvm::endianness::little>(Ptr)); 184 } 185 186 Items[StackId] = CallStack; 187 } 188 return Items; 189 } 190 191 // Merges the contents of stack information in \p From to \p To. Returns true if 192 // any stack ids observed previously map to a different set of program counter 193 // addresses. 194 bool mergeStackMap(const CallStackMap &From, CallStackMap &To) { 195 for (const auto &[Id, Stack] : From) { 196 auto I = To.find(Id); 197 if (I == To.end()) { 198 To[Id] = Stack; 199 } else { 200 // Check that the PCs are the same (in order). 201 if (Stack != I->second) 202 return true; 203 } 204 } 205 return false; 206 } 207 208 Error report(Error E, const StringRef Context) { 209 return joinErrors(createStringError(inconvertibleErrorCode(), Context), 210 std::move(E)); 211 } 212 213 bool isRuntimePath(const StringRef Path) { 214 const StringRef Filename = llvm::sys::path::filename(Path); 215 // This list should be updated in case new files with additional interceptors 216 // are added to the memprof runtime. 217 return Filename == "memprof_malloc_linux.cpp" || 218 Filename == "memprof_interceptors.cpp" || 219 Filename == "memprof_new_delete.cpp"; 220 } 221 222 std::string getBuildIdString(const SegmentEntry &Entry) { 223 // If the build id is unset print a helpful string instead of all zeros. 224 if (Entry.BuildIdSize == 0) 225 return "<None>"; 226 227 std::string Str; 228 raw_string_ostream OS(Str); 229 for (size_t I = 0; I < Entry.BuildIdSize; I++) { 230 OS << format_hex_no_prefix(Entry.BuildId[I], 2); 231 } 232 return OS.str(); 233 } 234 } // namespace 235 236 MemProfReader::MemProfReader( 237 llvm::DenseMap<FrameId, Frame> FrameIdMap, 238 llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> ProfData) 239 : IdToFrame(std::move(FrameIdMap)), 240 FunctionProfileData(std::move(ProfData)) { 241 // Populate CSId in each IndexedAllocationInfo and IndexedMemProfRecord 242 // while storing CallStack in CSIdToCallStack. 243 for (auto &KV : FunctionProfileData) { 244 IndexedMemProfRecord &Record = KV.second; 245 for (auto &AS : Record.AllocSites) { 246 CallStackId CSId = hashCallStack(AS.CallStack); 247 AS.CSId = CSId; 248 CSIdToCallStack.insert({CSId, AS.CallStack}); 249 } 250 for (auto &CS : Record.CallSites) { 251 CallStackId CSId = hashCallStack(CS); 252 Record.CallSiteIds.push_back(CSId); 253 CSIdToCallStack.insert({CSId, CS}); 254 } 255 } 256 } 257 258 Expected<std::unique_ptr<RawMemProfReader>> 259 RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary, 260 bool KeepName) { 261 auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path); 262 if (std::error_code EC = BufferOr.getError()) 263 return report(errorCodeToError(EC), Path.getSingleStringRef()); 264 265 std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release()); 266 return create(std::move(Buffer), ProfiledBinary, KeepName); 267 } 268 269 Expected<std::unique_ptr<RawMemProfReader>> 270 RawMemProfReader::create(std::unique_ptr<MemoryBuffer> Buffer, 271 const StringRef ProfiledBinary, bool KeepName) { 272 if (Error E = checkBuffer(*Buffer)) 273 return report(std::move(E), Buffer->getBufferIdentifier()); 274 275 if (ProfiledBinary.empty()) { 276 // Peek the build ids to print a helpful error message. 277 const std::vector<std::string> BuildIds = peekBuildIds(Buffer.get()); 278 std::string ErrorMessage( 279 R"(Path to profiled binary is empty, expected binary with one of the following build ids: 280 )"); 281 for (const auto &Id : BuildIds) { 282 ErrorMessage += "\n BuildId: "; 283 ErrorMessage += Id; 284 } 285 return report( 286 make_error<StringError>(ErrorMessage, inconvertibleErrorCode()), 287 /*Context=*/""); 288 } 289 290 auto BinaryOr = llvm::object::createBinary(ProfiledBinary); 291 if (!BinaryOr) { 292 return report(BinaryOr.takeError(), ProfiledBinary); 293 } 294 295 // Use new here since constructor is private. 296 std::unique_ptr<RawMemProfReader> Reader( 297 new RawMemProfReader(std::move(BinaryOr.get()), KeepName)); 298 if (Error E = Reader->initialize(std::move(Buffer))) { 299 return std::move(E); 300 } 301 return std::move(Reader); 302 } 303 304 // We need to make sure that all leftover MIB histograms that have not been 305 // freed by merge are freed here. 306 RawMemProfReader::~RawMemProfReader() { 307 for (auto &[_, MIB] : CallstackProfileData) { 308 if (MemprofRawVersion >= 4ULL && MIB.AccessHistogramSize > 0) { 309 free((void *)MIB.AccessHistogram); 310 } 311 } 312 } 313 314 bool RawMemProfReader::hasFormat(const StringRef Path) { 315 auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path); 316 if (!BufferOr) 317 return false; 318 319 std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release()); 320 return hasFormat(*Buffer); 321 } 322 323 bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) { 324 if (Buffer.getBufferSize() < sizeof(uint64_t)) 325 return false; 326 // Aligned read to sanity check that the buffer was allocated with at least 8b 327 // alignment. 328 const uint64_t Magic = alignedRead(Buffer.getBufferStart()); 329 return Magic == MEMPROF_RAW_MAGIC_64; 330 } 331 332 void RawMemProfReader::printYAML(raw_ostream &OS) { 333 uint64_t NumAllocFunctions = 0, NumMibInfo = 0; 334 for (const auto &KV : FunctionProfileData) { 335 const size_t NumAllocSites = KV.second.AllocSites.size(); 336 if (NumAllocSites > 0) { 337 NumAllocFunctions++; 338 NumMibInfo += NumAllocSites; 339 } 340 } 341 342 OS << "MemprofProfile:\n"; 343 OS << " Summary:\n"; 344 OS << " Version: " << MemprofRawVersion << "\n"; 345 OS << " NumSegments: " << SegmentInfo.size() << "\n"; 346 OS << " NumMibInfo: " << NumMibInfo << "\n"; 347 OS << " NumAllocFunctions: " << NumAllocFunctions << "\n"; 348 OS << " NumStackOffsets: " << StackMap.size() << "\n"; 349 // Print out the segment information. 350 OS << " Segments:\n"; 351 for (const auto &Entry : SegmentInfo) { 352 OS << " -\n"; 353 OS << " BuildId: " << getBuildIdString(Entry) << "\n"; 354 OS << " Start: 0x" << llvm::utohexstr(Entry.Start) << "\n"; 355 OS << " End: 0x" << llvm::utohexstr(Entry.End) << "\n"; 356 OS << " Offset: 0x" << llvm::utohexstr(Entry.Offset) << "\n"; 357 } 358 // Print out the merged contents of the profiles. 359 OS << " Records:\n"; 360 for (const auto &[GUID, Record] : *this) { 361 OS << " -\n"; 362 OS << " FunctionGUID: " << GUID << "\n"; 363 Record.print(OS); 364 } 365 } 366 367 Error RawMemProfReader::initialize(std::unique_ptr<MemoryBuffer> DataBuffer) { 368 const StringRef FileName = Binary.getBinary()->getFileName(); 369 370 auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Binary.getBinary()); 371 if (!ElfObject) { 372 return report(make_error<StringError>(Twine("Not an ELF file: "), 373 inconvertibleErrorCode()), 374 FileName); 375 } 376 377 // Check whether the profiled binary was built with position independent code 378 // (PIC). Perform sanity checks for assumptions we rely on to simplify 379 // symbolization. 380 auto *Elf64LEObject = llvm::cast<llvm::object::ELF64LEObjectFile>(ElfObject); 381 const llvm::object::ELF64LEFile &ElfFile = Elf64LEObject->getELFFile(); 382 auto PHdrsOr = ElfFile.program_headers(); 383 if (!PHdrsOr) 384 return report( 385 make_error<StringError>(Twine("Could not read program headers: "), 386 inconvertibleErrorCode()), 387 FileName); 388 389 int NumExecutableSegments = 0; 390 for (const auto &Phdr : *PHdrsOr) { 391 if (Phdr.p_type == ELF::PT_LOAD) { 392 if (Phdr.p_flags & ELF::PF_X) { 393 // We assume only one text segment in the main binary for simplicity and 394 // reduce the overhead of checking multiple ranges during symbolization. 395 if (++NumExecutableSegments > 1) { 396 return report( 397 make_error<StringError>( 398 "Expect only one executable load segment in the binary", 399 inconvertibleErrorCode()), 400 FileName); 401 } 402 // Segment will always be loaded at a page boundary, expect it to be 403 // aligned already. Assume 4K pagesize for the machine from which the 404 // profile has been collected. This should be fine for now, in case we 405 // want to support other pagesizes it can be recorded in the raw profile 406 // during collection. 407 PreferredTextSegmentAddress = Phdr.p_vaddr; 408 assert(Phdr.p_vaddr == (Phdr.p_vaddr & ~(0x1000 - 1U)) && 409 "Expect p_vaddr to always be page aligned"); 410 assert(Phdr.p_offset == 0 && "Expect p_offset = 0 for symbolization."); 411 } 412 } 413 } 414 415 auto Triple = ElfObject->makeTriple(); 416 if (!Triple.isX86()) 417 return report(make_error<StringError>(Twine("Unsupported target: ") + 418 Triple.getArchName(), 419 inconvertibleErrorCode()), 420 FileName); 421 422 // Process the raw profile. 423 if (Error E = readRawProfile(std::move(DataBuffer))) 424 return E; 425 426 if (Error E = setupForSymbolization()) 427 return E; 428 429 auto *Object = cast<object::ObjectFile>(Binary.getBinary()); 430 std::unique_ptr<DIContext> Context = DWARFContext::create( 431 *Object, DWARFContext::ProcessDebugRelocations::Process); 432 433 auto SOFOr = symbolize::SymbolizableObjectFile::create( 434 Object, std::move(Context), /*UntagAddresses=*/false); 435 if (!SOFOr) 436 return report(SOFOr.takeError(), FileName); 437 auto Symbolizer = std::move(SOFOr.get()); 438 439 // The symbolizer ownership is moved into symbolizeAndFilterStackFrames so 440 // that it is freed automatically at the end, when it is no longer used. This 441 // reduces peak memory since it won't be live while also mapping the raw 442 // profile into records afterwards. 443 if (Error E = symbolizeAndFilterStackFrames(std::move(Symbolizer))) 444 return E; 445 446 return mapRawProfileToRecords(); 447 } 448 449 Error RawMemProfReader::setupForSymbolization() { 450 auto *Object = cast<object::ObjectFile>(Binary.getBinary()); 451 object::BuildIDRef BinaryId = object::getBuildID(Object); 452 if (BinaryId.empty()) 453 return make_error<StringError>(Twine("No build id found in binary ") + 454 Binary.getBinary()->getFileName(), 455 inconvertibleErrorCode()); 456 457 int NumMatched = 0; 458 for (const auto &Entry : SegmentInfo) { 459 llvm::ArrayRef<uint8_t> SegmentId(Entry.BuildId, Entry.BuildIdSize); 460 if (BinaryId == SegmentId) { 461 // We assume only one text segment in the main binary for simplicity and 462 // reduce the overhead of checking multiple ranges during symbolization. 463 if (++NumMatched > 1) { 464 return make_error<StringError>( 465 "We expect only one executable segment in the profiled binary", 466 inconvertibleErrorCode()); 467 } 468 ProfiledTextSegmentStart = Entry.Start; 469 ProfiledTextSegmentEnd = Entry.End; 470 } 471 } 472 assert(NumMatched != 0 && "No matching executable segments in segment info."); 473 assert((PreferredTextSegmentAddress == 0 || 474 (PreferredTextSegmentAddress == ProfiledTextSegmentStart)) && 475 "Expect text segment address to be 0 or equal to profiled text " 476 "segment start."); 477 return Error::success(); 478 } 479 480 Error RawMemProfReader::mapRawProfileToRecords() { 481 // Hold a mapping from function to each callsite location we encounter within 482 // it that is part of some dynamic allocation context. The location is stored 483 // as a pointer to a symbolized list of inline frames. 484 using LocationPtr = const llvm::SmallVector<FrameId> *; 485 llvm::MapVector<GlobalValue::GUID, llvm::SetVector<LocationPtr>> 486 PerFunctionCallSites; 487 488 // Convert the raw profile callstack data into memprof records. While doing so 489 // keep track of related contexts so that we can fill these in later. 490 for (const auto &[StackId, MIB] : CallstackProfileData) { 491 auto It = StackMap.find(StackId); 492 if (It == StackMap.end()) 493 return make_error<InstrProfError>( 494 instrprof_error::malformed, 495 "memprof callstack record does not contain id: " + Twine(StackId)); 496 497 // Construct the symbolized callstack. 498 llvm::SmallVector<FrameId> Callstack; 499 Callstack.reserve(It->getSecond().size()); 500 501 llvm::ArrayRef<uint64_t> Addresses = It->getSecond(); 502 for (size_t I = 0; I < Addresses.size(); I++) { 503 const uint64_t Address = Addresses[I]; 504 assert(SymbolizedFrame.count(Address) > 0 && 505 "Address not found in SymbolizedFrame map"); 506 const SmallVector<FrameId> &Frames = SymbolizedFrame[Address]; 507 508 assert(!idToFrame(Frames.back()).IsInlineFrame && 509 "The last frame should not be inlined"); 510 511 // Record the callsites for each function. Skip the first frame of the 512 // first address since it is the allocation site itself that is recorded 513 // as an alloc site. 514 for (size_t J = 0; J < Frames.size(); J++) { 515 if (I == 0 && J == 0) 516 continue; 517 // We attach the entire bottom-up frame here for the callsite even 518 // though we only need the frames up to and including the frame for 519 // Frames[J].Function. This will enable better deduplication for 520 // compression in the future. 521 const GlobalValue::GUID Guid = idToFrame(Frames[J]).Function; 522 PerFunctionCallSites[Guid].insert(&Frames); 523 } 524 525 // Add all the frames to the current allocation callstack. 526 Callstack.append(Frames.begin(), Frames.end()); 527 } 528 529 CallStackId CSId = hashCallStack(Callstack); 530 CSIdToCallStack.insert({CSId, Callstack}); 531 532 // We attach the memprof record to each function bottom-up including the 533 // first non-inline frame. 534 for (size_t I = 0; /*Break out using the condition below*/; I++) { 535 const Frame &F = idToFrame(Callstack[I]); 536 auto Result = 537 FunctionProfileData.insert({F.Function, IndexedMemProfRecord()}); 538 IndexedMemProfRecord &Record = Result.first->second; 539 Record.AllocSites.emplace_back(Callstack, CSId, MIB); 540 541 if (!F.IsInlineFrame) 542 break; 543 } 544 } 545 546 // Fill in the related callsites per function. 547 for (const auto &[Id, Locs] : PerFunctionCallSites) { 548 // Some functions may have only callsite data and no allocation data. Here 549 // we insert a new entry for callsite data if we need to. 550 auto Result = FunctionProfileData.insert({Id, IndexedMemProfRecord()}); 551 IndexedMemProfRecord &Record = Result.first->second; 552 for (LocationPtr Loc : Locs) { 553 CallStackId CSId = hashCallStack(*Loc); 554 CSIdToCallStack.insert({CSId, *Loc}); 555 Record.CallSites.push_back(*Loc); 556 Record.CallSiteIds.push_back(CSId); 557 } 558 } 559 560 verifyFunctionProfileData(FunctionProfileData); 561 562 return Error::success(); 563 } 564 565 Error RawMemProfReader::symbolizeAndFilterStackFrames( 566 std::unique_ptr<llvm::symbolize::SymbolizableModule> Symbolizer) { 567 // The specifier to use when symbolization is requested. 568 const DILineInfoSpecifier Specifier( 569 DILineInfoSpecifier::FileLineInfoKind::RawValue, 570 DILineInfoSpecifier::FunctionNameKind::LinkageName); 571 572 // For entries where all PCs in the callstack are discarded, we erase the 573 // entry from the stack map. 574 llvm::SmallVector<uint64_t> EntriesToErase; 575 // We keep track of all prior discarded entries so that we can avoid invoking 576 // the symbolizer for such entries. 577 llvm::DenseSet<uint64_t> AllVAddrsToDiscard; 578 for (auto &Entry : StackMap) { 579 for (const uint64_t VAddr : Entry.getSecond()) { 580 // Check if we have already symbolized and cached the result or if we 581 // don't want to attempt symbolization since we know this address is bad. 582 // In this case the address is also removed from the current callstack. 583 if (SymbolizedFrame.count(VAddr) > 0 || 584 AllVAddrsToDiscard.contains(VAddr)) 585 continue; 586 587 Expected<DIInliningInfo> DIOr = Symbolizer->symbolizeInlinedCode( 588 getModuleOffset(VAddr), Specifier, /*UseSymbolTable=*/false); 589 if (!DIOr) 590 return DIOr.takeError(); 591 DIInliningInfo DI = DIOr.get(); 592 593 // Drop frames which we can't symbolize or if they belong to the runtime. 594 if (DI.getFrame(0).FunctionName == DILineInfo::BadString || 595 isRuntimePath(DI.getFrame(0).FileName)) { 596 AllVAddrsToDiscard.insert(VAddr); 597 continue; 598 } 599 600 for (size_t I = 0, NumFrames = DI.getNumberOfFrames(); I < NumFrames; 601 I++) { 602 const auto &DIFrame = DI.getFrame(I); 603 const uint64_t Guid = 604 IndexedMemProfRecord::getGUID(DIFrame.FunctionName); 605 const Frame F(Guid, DIFrame.Line - DIFrame.StartLine, DIFrame.Column, 606 // Only the last entry is not an inlined location. 607 I != NumFrames - 1); 608 // Here we retain a mapping from the GUID to canonical symbol name 609 // instead of adding it to the frame object directly to reduce memory 610 // overhead. This is because there can be many unique frames, 611 // particularly for callsite frames. 612 if (KeepSymbolName) { 613 StringRef CanonicalName = 614 sampleprof::FunctionSamples::getCanonicalFnName( 615 DIFrame.FunctionName); 616 GuidToSymbolName.insert({Guid, CanonicalName.str()}); 617 } 618 619 const FrameId Hash = F.hash(); 620 IdToFrame.insert({Hash, F}); 621 SymbolizedFrame[VAddr].push_back(Hash); 622 } 623 } 624 625 auto &CallStack = Entry.getSecond(); 626 llvm::erase_if(CallStack, [&AllVAddrsToDiscard](const uint64_t A) { 627 return AllVAddrsToDiscard.contains(A); 628 }); 629 if (CallStack.empty()) 630 EntriesToErase.push_back(Entry.getFirst()); 631 } 632 633 // Drop the entries where the callstack is empty. 634 for (const uint64_t Id : EntriesToErase) { 635 StackMap.erase(Id); 636 if(CallstackProfileData[Id].AccessHistogramSize > 0) 637 free((void*) CallstackProfileData[Id].AccessHistogram); 638 CallstackProfileData.erase(Id); 639 } 640 641 if (StackMap.empty()) 642 return make_error<InstrProfError>( 643 instrprof_error::malformed, 644 "no entries in callstack map after symbolization"); 645 646 return Error::success(); 647 } 648 649 std::vector<std::string> 650 RawMemProfReader::peekBuildIds(MemoryBuffer *DataBuffer) { 651 const char *Next = DataBuffer->getBufferStart(); 652 // Use a SetVector since a profile file may contain multiple raw profile 653 // dumps, each with segment information. We want them unique and in order they 654 // were stored in the profile; the profiled binary should be the first entry. 655 // The runtime uses dl_iterate_phdr and the "... first object visited by 656 // callback is the main program." 657 // https://man7.org/linux/man-pages/man3/dl_iterate_phdr.3.html 658 llvm::SetVector<std::string, std::vector<std::string>, 659 llvm::SmallSet<std::string, 10>> 660 BuildIds; 661 while (Next < DataBuffer->getBufferEnd()) { 662 const auto *Header = reinterpret_cast<const memprof::Header *>(Next); 663 664 const llvm::SmallVector<SegmentEntry> Entries = 665 readSegmentEntries(Next + Header->SegmentOffset); 666 667 for (const auto &Entry : Entries) 668 BuildIds.insert(getBuildIdString(Entry)); 669 670 Next += Header->TotalSize; 671 } 672 return BuildIds.takeVector(); 673 } 674 675 // FIXME: Add a schema for serializing similiar to IndexedMemprofReader. This 676 // will help being able to deserialize different versions raw memprof versions 677 // more easily. 678 llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> 679 RawMemProfReader::readMemInfoBlocks(const char *Ptr) { 680 if (MemprofRawVersion == 3ULL) 681 return readMemInfoBlocksV3(Ptr); 682 if (MemprofRawVersion == 4ULL) 683 return readMemInfoBlocksV4(Ptr); 684 llvm_unreachable( 685 "Panic: Unsupported version number when reading MemInfoBlocks"); 686 } 687 688 Error RawMemProfReader::readRawProfile( 689 std::unique_ptr<MemoryBuffer> DataBuffer) { 690 const char *Next = DataBuffer->getBufferStart(); 691 692 while (Next < DataBuffer->getBufferEnd()) { 693 const auto *Header = reinterpret_cast<const memprof::Header *>(Next); 694 695 // Set Reader version to memprof raw version of profile. Checking if version 696 // is supported is checked before creating the reader. 697 MemprofRawVersion = Header->Version; 698 699 // Read in the segment information, check whether its the same across all 700 // profiles in this binary file. 701 const llvm::SmallVector<SegmentEntry> Entries = 702 readSegmentEntries(Next + Header->SegmentOffset); 703 if (!SegmentInfo.empty() && SegmentInfo != Entries) { 704 // We do not expect segment information to change when deserializing from 705 // the same binary profile file. This can happen if dynamic libraries are 706 // loaded/unloaded between profile dumping. 707 return make_error<InstrProfError>( 708 instrprof_error::malformed, 709 "memprof raw profile has different segment information"); 710 } 711 SegmentInfo.assign(Entries.begin(), Entries.end()); 712 713 // Read in the MemInfoBlocks. Merge them based on stack id - we assume that 714 // raw profiles in the same binary file are from the same process so the 715 // stackdepot ids are the same. 716 for (const auto &[Id, MIB] : readMemInfoBlocks(Next + Header->MIBOffset)) { 717 if (CallstackProfileData.count(Id)) { 718 719 if (MemprofRawVersion >= 4ULL && 720 (CallstackProfileData[Id].AccessHistogramSize > 0 || 721 MIB.AccessHistogramSize > 0)) { 722 uintptr_t ShorterHistogram; 723 if (CallstackProfileData[Id].AccessHistogramSize > 724 MIB.AccessHistogramSize) 725 ShorterHistogram = MIB.AccessHistogram; 726 else 727 ShorterHistogram = CallstackProfileData[Id].AccessHistogram; 728 CallstackProfileData[Id].Merge(MIB); 729 free((void *)ShorterHistogram); 730 } else { 731 CallstackProfileData[Id].Merge(MIB); 732 } 733 } else { 734 CallstackProfileData[Id] = MIB; 735 } 736 } 737 738 // Read in the callstack for each ids. For multiple raw profiles in the same 739 // file, we expect that the callstack is the same for a unique id. 740 const CallStackMap CSM = readStackInfo(Next + Header->StackOffset); 741 if (StackMap.empty()) { 742 StackMap = CSM; 743 } else { 744 if (mergeStackMap(CSM, StackMap)) 745 return make_error<InstrProfError>( 746 instrprof_error::malformed, 747 "memprof raw profile got different call stack for same id"); 748 } 749 750 Next += Header->TotalSize; 751 } 752 753 return Error::success(); 754 } 755 756 object::SectionedAddress 757 RawMemProfReader::getModuleOffset(const uint64_t VirtualAddress) { 758 if (VirtualAddress > ProfiledTextSegmentStart && 759 VirtualAddress <= ProfiledTextSegmentEnd) { 760 // For PIE binaries, the preferred address is zero and we adjust the virtual 761 // address by start of the profiled segment assuming that the offset of the 762 // segment in the binary is zero. For non-PIE binaries the preferred and 763 // profiled segment addresses should be equal and this is a no-op. 764 const uint64_t AdjustedAddress = 765 VirtualAddress + PreferredTextSegmentAddress - ProfiledTextSegmentStart; 766 return object::SectionedAddress{AdjustedAddress}; 767 } 768 // Addresses which do not originate from the profiled text segment in the 769 // binary are not adjusted. These will fail symbolization and be filtered out 770 // during processing. 771 return object::SectionedAddress{VirtualAddress}; 772 } 773 774 Error RawMemProfReader::readNextRecord( 775 GuidMemProfRecordPair &GuidRecord, 776 std::function<const Frame(const FrameId)> Callback) { 777 // Create a new callback for the RawMemProfRecord iterator so that we can 778 // provide the symbol name if the reader was initialized with KeepSymbolName = 779 // true. This is useful for debugging and testing. 780 auto IdToFrameCallback = [this](const FrameId Id) { 781 Frame F = this->idToFrame(Id); 782 if (!this->KeepSymbolName) 783 return F; 784 auto Iter = this->GuidToSymbolName.find(F.Function); 785 assert(Iter != this->GuidToSymbolName.end()); 786 F.SymbolName = std::make_unique<std::string>(Iter->getSecond()); 787 return F; 788 }; 789 return MemProfReader::readNextRecord(GuidRecord, IdToFrameCallback); 790 } 791 } // namespace memprof 792 } // namespace llvm 793