xref: /freebsd/contrib/llvm-project/llvm/lib/ProfileData/MemProfReader.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===- RawMemProfReader.cpp - Instrumented memory profiling reader --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for reading MemProf profiling data.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include <algorithm>
14 #include <cstdint>
15 #include <memory>
16 #include <type_traits>
17 
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/SetVector.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/ADT/StringExtras.h"
24 #include "llvm/ADT/Twine.h"
25 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
26 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
27 #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h"
28 #include "llvm/Object/Binary.h"
29 #include "llvm/Object/BuildID.h"
30 #include "llvm/Object/ELFObjectFile.h"
31 #include "llvm/Object/ObjectFile.h"
32 #include "llvm/ProfileData/InstrProf.h"
33 #include "llvm/ProfileData/MemProf.h"
34 #include "llvm/ProfileData/MemProfData.inc"
35 #include "llvm/ProfileData/MemProfReader.h"
36 #include "llvm/ProfileData/SampleProf.h"
37 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/Endian.h"
39 #include "llvm/Support/Error.h"
40 #include "llvm/Support/MemoryBuffer.h"
41 #include "llvm/Support/Path.h"
42 
43 #define DEBUG_TYPE "memprof"
44 namespace llvm {
45 namespace memprof {
46 namespace {
alignedRead(const char * Ptr)47 template <class T = uint64_t> inline T alignedRead(const char *Ptr) {
48   static_assert(std::is_pod<T>::value, "Not a pod type.");
49   assert(reinterpret_cast<size_t>(Ptr) % sizeof(T) == 0 && "Unaligned Read");
50   return *reinterpret_cast<const T *>(Ptr);
51 }
52 
checkBuffer(const MemoryBuffer & Buffer)53 Error checkBuffer(const MemoryBuffer &Buffer) {
54   if (!RawMemProfReader::hasFormat(Buffer))
55     return make_error<InstrProfError>(instrprof_error::bad_magic);
56 
57   if (Buffer.getBufferSize() == 0)
58     return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
59 
60   if (Buffer.getBufferSize() < sizeof(Header)) {
61     return make_error<InstrProfError>(instrprof_error::truncated);
62   }
63 
64   // The size of the buffer can be > header total size since we allow repeated
65   // serialization of memprof profiles to the same file.
66   uint64_t TotalSize = 0;
67   const char *Next = Buffer.getBufferStart();
68   while (Next < Buffer.getBufferEnd()) {
69     const auto *H = reinterpret_cast<const Header *>(Next);
70 
71     // Check if the version in header is among the supported versions.
72     bool IsSupported = false;
73     for (auto SupportedVersion : MEMPROF_RAW_SUPPORTED_VERSIONS) {
74       if (H->Version == SupportedVersion)
75         IsSupported = true;
76     }
77     if (!IsSupported) {
78       return make_error<InstrProfError>(instrprof_error::unsupported_version);
79     }
80 
81     TotalSize += H->TotalSize;
82     Next += H->TotalSize;
83   }
84 
85   if (Buffer.getBufferSize() != TotalSize) {
86     return make_error<InstrProfError>(instrprof_error::malformed);
87   }
88   return Error::success();
89 }
90 
readSegmentEntries(const char * Ptr)91 llvm::SmallVector<SegmentEntry> readSegmentEntries(const char *Ptr) {
92   using namespace support;
93 
94   const uint64_t NumItemsToRead =
95       endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
96   llvm::SmallVector<SegmentEntry> Items;
97   for (uint64_t I = 0; I < NumItemsToRead; I++) {
98     Items.push_back(*reinterpret_cast<const SegmentEntry *>(
99         Ptr + I * sizeof(SegmentEntry)));
100   }
101   return Items;
102 }
103 
104 llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>>
readMemInfoBlocksV3(const char * Ptr)105 readMemInfoBlocksV3(const char *Ptr) {
106   using namespace support;
107 
108   const uint64_t NumItemsToRead =
109       endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
110 
111   llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> Items;
112   for (uint64_t I = 0; I < NumItemsToRead; I++) {
113     const uint64_t Id =
114         endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
115 
116     // We cheat a bit here and remove the const from cast to set the
117     // Histogram Pointer to newly allocated buffer. We also cheat, since V3 and
118     // V4 do not have the same fields. V3 is missing AccessHistogramSize and
119     // AccessHistogram. This means we read "dirty" data in here, but it should
120     // not segfault, since there will be callstack data placed after this in the
121     // binary format.
122     MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr);
123     // Overwrite dirty data.
124     MIB.AccessHistogramSize = 0;
125     MIB.AccessHistogram = 0;
126 
127     Items.push_back({Id, MIB});
128     // Only increment by the size of MIB in V3.
129     Ptr += MEMPROF_V3_MIB_SIZE;
130   }
131   return Items;
132 }
133 
134 llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>>
readMemInfoBlocksV4(const char * Ptr)135 readMemInfoBlocksV4(const char *Ptr) {
136   using namespace support;
137 
138   const uint64_t NumItemsToRead =
139       endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
140 
141   llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> Items;
142   for (uint64_t I = 0; I < NumItemsToRead; I++) {
143     const uint64_t Id =
144         endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
145     // We cheat a bit here and remove the const from cast to set the
146     // Histogram Pointer to newly allocated buffer.
147     MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr);
148 
149     // Only increment by size of MIB since readNext implicitly increments.
150     Ptr += sizeof(MemInfoBlock);
151 
152     if (MIB.AccessHistogramSize > 0) {
153       MIB.AccessHistogram =
154           (uintptr_t)malloc(MIB.AccessHistogramSize * sizeof(uint64_t));
155     }
156 
157     for (uint64_t J = 0; J < MIB.AccessHistogramSize; J++) {
158       ((uint64_t *)MIB.AccessHistogram)[J] =
159           endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
160     }
161     Items.push_back({Id, MIB});
162   }
163   return Items;
164 }
165 
readStackInfo(const char * Ptr)166 CallStackMap readStackInfo(const char *Ptr) {
167   using namespace support;
168 
169   const uint64_t NumItemsToRead =
170       endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
171   CallStackMap Items;
172 
173   for (uint64_t I = 0; I < NumItemsToRead; I++) {
174     const uint64_t StackId =
175         endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
176     const uint64_t NumPCs =
177         endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
178 
179     SmallVector<uint64_t> CallStack;
180     CallStack.reserve(NumPCs);
181     for (uint64_t J = 0; J < NumPCs; J++) {
182       CallStack.push_back(
183           endian::readNext<uint64_t, llvm::endianness::little>(Ptr));
184     }
185 
186     Items[StackId] = CallStack;
187   }
188   return Items;
189 }
190 
191 // Merges the contents of stack information in \p From to \p To. Returns true if
192 // any stack ids observed previously map to a different set of program counter
193 // addresses.
mergeStackMap(const CallStackMap & From,CallStackMap & To)194 bool mergeStackMap(const CallStackMap &From, CallStackMap &To) {
195   for (const auto &[Id, Stack] : From) {
196     auto I = To.find(Id);
197     if (I == To.end()) {
198       To[Id] = Stack;
199     } else {
200       // Check that the PCs are the same (in order).
201       if (Stack != I->second)
202         return true;
203     }
204   }
205   return false;
206 }
207 
report(Error E,const StringRef Context)208 Error report(Error E, const StringRef Context) {
209   return joinErrors(createStringError(inconvertibleErrorCode(), Context),
210                     std::move(E));
211 }
212 
isRuntimePath(const StringRef Path)213 bool isRuntimePath(const StringRef Path) {
214   const StringRef Filename = llvm::sys::path::filename(Path);
215   // This list should be updated in case new files with additional interceptors
216   // are added to the memprof runtime.
217   return Filename == "memprof_malloc_linux.cpp" ||
218          Filename == "memprof_interceptors.cpp" ||
219          Filename == "memprof_new_delete.cpp";
220 }
221 
getBuildIdString(const SegmentEntry & Entry)222 std::string getBuildIdString(const SegmentEntry &Entry) {
223   // If the build id is unset print a helpful string instead of all zeros.
224   if (Entry.BuildIdSize == 0)
225     return "<None>";
226 
227   std::string Str;
228   raw_string_ostream OS(Str);
229   for (size_t I = 0; I < Entry.BuildIdSize; I++) {
230     OS << format_hex_no_prefix(Entry.BuildId[I], 2);
231   }
232   return OS.str();
233 }
234 } // namespace
235 
MemProfReader(llvm::DenseMap<FrameId,Frame> FrameIdMap,llvm::MapVector<GlobalValue::GUID,IndexedMemProfRecord> ProfData)236 MemProfReader::MemProfReader(
237     llvm::DenseMap<FrameId, Frame> FrameIdMap,
238     llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> ProfData)
239     : IdToFrame(std::move(FrameIdMap)),
240       FunctionProfileData(std::move(ProfData)) {
241   // Populate CSId in each IndexedAllocationInfo and IndexedMemProfRecord
242   // while storing CallStack in CSIdToCallStack.
243   for (auto &KV : FunctionProfileData) {
244     IndexedMemProfRecord &Record = KV.second;
245     for (auto &AS : Record.AllocSites) {
246       CallStackId CSId = hashCallStack(AS.CallStack);
247       AS.CSId = CSId;
248       CSIdToCallStack.insert({CSId, AS.CallStack});
249     }
250     for (auto &CS : Record.CallSites) {
251       CallStackId CSId = hashCallStack(CS);
252       Record.CallSiteIds.push_back(CSId);
253       CSIdToCallStack.insert({CSId, CS});
254     }
255   }
256 }
257 
258 Expected<std::unique_ptr<RawMemProfReader>>
create(const Twine & Path,const StringRef ProfiledBinary,bool KeepName)259 RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary,
260                          bool KeepName) {
261   auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path);
262   if (std::error_code EC = BufferOr.getError())
263     return report(errorCodeToError(EC), Path.getSingleStringRef());
264 
265   std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
266   return create(std::move(Buffer), ProfiledBinary, KeepName);
267 }
268 
269 Expected<std::unique_ptr<RawMemProfReader>>
create(std::unique_ptr<MemoryBuffer> Buffer,const StringRef ProfiledBinary,bool KeepName)270 RawMemProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
271                          const StringRef ProfiledBinary, bool KeepName) {
272   if (Error E = checkBuffer(*Buffer))
273     return report(std::move(E), Buffer->getBufferIdentifier());
274 
275   if (ProfiledBinary.empty()) {
276     // Peek the build ids to print a helpful error message.
277     const std::vector<std::string> BuildIds = peekBuildIds(Buffer.get());
278     std::string ErrorMessage(
279         R"(Path to profiled binary is empty, expected binary with one of the following build ids:
280 )");
281     for (const auto &Id : BuildIds) {
282       ErrorMessage += "\n BuildId: ";
283       ErrorMessage += Id;
284     }
285     return report(
286         make_error<StringError>(ErrorMessage, inconvertibleErrorCode()),
287         /*Context=*/"");
288   }
289 
290   auto BinaryOr = llvm::object::createBinary(ProfiledBinary);
291   if (!BinaryOr) {
292     return report(BinaryOr.takeError(), ProfiledBinary);
293   }
294 
295   // Use new here since constructor is private.
296   std::unique_ptr<RawMemProfReader> Reader(
297       new RawMemProfReader(std::move(BinaryOr.get()), KeepName));
298   if (Error E = Reader->initialize(std::move(Buffer))) {
299     return std::move(E);
300   }
301   return std::move(Reader);
302 }
303 
304 // We need to make sure that all leftover MIB histograms that have not been
305 // freed by merge are freed here.
~RawMemProfReader()306 RawMemProfReader::~RawMemProfReader() {
307   for (auto &[_, MIB] : CallstackProfileData) {
308     if (MemprofRawVersion >= 4ULL && MIB.AccessHistogramSize > 0) {
309       free((void *)MIB.AccessHistogram);
310     }
311   }
312 }
313 
hasFormat(const StringRef Path)314 bool RawMemProfReader::hasFormat(const StringRef Path) {
315   auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path);
316   if (!BufferOr)
317     return false;
318 
319   std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
320   return hasFormat(*Buffer);
321 }
322 
hasFormat(const MemoryBuffer & Buffer)323 bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) {
324   if (Buffer.getBufferSize() < sizeof(uint64_t))
325     return false;
326   // Aligned read to sanity check that the buffer was allocated with at least 8b
327   // alignment.
328   const uint64_t Magic = alignedRead(Buffer.getBufferStart());
329   return Magic == MEMPROF_RAW_MAGIC_64;
330 }
331 
printYAML(raw_ostream & OS)332 void RawMemProfReader::printYAML(raw_ostream &OS) {
333   uint64_t NumAllocFunctions = 0, NumMibInfo = 0;
334   for (const auto &KV : FunctionProfileData) {
335     const size_t NumAllocSites = KV.second.AllocSites.size();
336     if (NumAllocSites > 0) {
337       NumAllocFunctions++;
338       NumMibInfo += NumAllocSites;
339     }
340   }
341 
342   OS << "MemprofProfile:\n";
343   OS << "  Summary:\n";
344   OS << "    Version: " << MemprofRawVersion << "\n";
345   OS << "    NumSegments: " << SegmentInfo.size() << "\n";
346   OS << "    NumMibInfo: " << NumMibInfo << "\n";
347   OS << "    NumAllocFunctions: " << NumAllocFunctions << "\n";
348   OS << "    NumStackOffsets: " << StackMap.size() << "\n";
349   // Print out the segment information.
350   OS << "  Segments:\n";
351   for (const auto &Entry : SegmentInfo) {
352     OS << "  -\n";
353     OS << "    BuildId: " << getBuildIdString(Entry) << "\n";
354     OS << "    Start: 0x" << llvm::utohexstr(Entry.Start) << "\n";
355     OS << "    End: 0x" << llvm::utohexstr(Entry.End) << "\n";
356     OS << "    Offset: 0x" << llvm::utohexstr(Entry.Offset) << "\n";
357   }
358   // Print out the merged contents of the profiles.
359   OS << "  Records:\n";
360   for (const auto &[GUID, Record] : *this) {
361     OS << "  -\n";
362     OS << "    FunctionGUID: " << GUID << "\n";
363     Record.print(OS);
364   }
365 }
366 
initialize(std::unique_ptr<MemoryBuffer> DataBuffer)367 Error RawMemProfReader::initialize(std::unique_ptr<MemoryBuffer> DataBuffer) {
368   const StringRef FileName = Binary.getBinary()->getFileName();
369 
370   auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Binary.getBinary());
371   if (!ElfObject) {
372     return report(make_error<StringError>(Twine("Not an ELF file: "),
373                                           inconvertibleErrorCode()),
374                   FileName);
375   }
376 
377   // Check whether the profiled binary was built with position independent code
378   // (PIC). Perform sanity checks for assumptions we rely on to simplify
379   // symbolization.
380   auto *Elf64LEObject = llvm::cast<llvm::object::ELF64LEObjectFile>(ElfObject);
381   const llvm::object::ELF64LEFile &ElfFile = Elf64LEObject->getELFFile();
382   auto PHdrsOr = ElfFile.program_headers();
383   if (!PHdrsOr)
384     return report(
385         make_error<StringError>(Twine("Could not read program headers: "),
386                                 inconvertibleErrorCode()),
387         FileName);
388 
389   int NumExecutableSegments = 0;
390   for (const auto &Phdr : *PHdrsOr) {
391     if (Phdr.p_type == ELF::PT_LOAD) {
392       if (Phdr.p_flags & ELF::PF_X) {
393         // We assume only one text segment in the main binary for simplicity and
394         // reduce the overhead of checking multiple ranges during symbolization.
395         if (++NumExecutableSegments > 1) {
396           return report(
397               make_error<StringError>(
398                   "Expect only one executable load segment in the binary",
399                   inconvertibleErrorCode()),
400               FileName);
401         }
402         // Segment will always be loaded at a page boundary, expect it to be
403         // aligned already. Assume 4K pagesize for the machine from which the
404         // profile has been collected. This should be fine for now, in case we
405         // want to support other pagesizes it can be recorded in the raw profile
406         // during collection.
407         PreferredTextSegmentAddress = Phdr.p_vaddr;
408         assert(Phdr.p_vaddr == (Phdr.p_vaddr & ~(0x1000 - 1U)) &&
409                "Expect p_vaddr to always be page aligned");
410         assert(Phdr.p_offset == 0 && "Expect p_offset = 0 for symbolization.");
411       }
412     }
413   }
414 
415   auto Triple = ElfObject->makeTriple();
416   if (!Triple.isX86())
417     return report(make_error<StringError>(Twine("Unsupported target: ") +
418                                               Triple.getArchName(),
419                                           inconvertibleErrorCode()),
420                   FileName);
421 
422   // Process the raw profile.
423   if (Error E = readRawProfile(std::move(DataBuffer)))
424     return E;
425 
426   if (Error E = setupForSymbolization())
427     return E;
428 
429   auto *Object = cast<object::ObjectFile>(Binary.getBinary());
430   std::unique_ptr<DIContext> Context = DWARFContext::create(
431       *Object, DWARFContext::ProcessDebugRelocations::Process);
432 
433   auto SOFOr = symbolize::SymbolizableObjectFile::create(
434       Object, std::move(Context), /*UntagAddresses=*/false);
435   if (!SOFOr)
436     return report(SOFOr.takeError(), FileName);
437   auto Symbolizer = std::move(SOFOr.get());
438 
439   // The symbolizer ownership is moved into symbolizeAndFilterStackFrames so
440   // that it is freed automatically at the end, when it is no longer used. This
441   // reduces peak memory since it won't be live while also mapping the raw
442   // profile into records afterwards.
443   if (Error E = symbolizeAndFilterStackFrames(std::move(Symbolizer)))
444     return E;
445 
446   return mapRawProfileToRecords();
447 }
448 
setupForSymbolization()449 Error RawMemProfReader::setupForSymbolization() {
450   auto *Object = cast<object::ObjectFile>(Binary.getBinary());
451   object::BuildIDRef BinaryId = object::getBuildID(Object);
452   if (BinaryId.empty())
453     return make_error<StringError>(Twine("No build id found in binary ") +
454                                        Binary.getBinary()->getFileName(),
455                                    inconvertibleErrorCode());
456 
457   int NumMatched = 0;
458   for (const auto &Entry : SegmentInfo) {
459     llvm::ArrayRef<uint8_t> SegmentId(Entry.BuildId, Entry.BuildIdSize);
460     if (BinaryId == SegmentId) {
461       // We assume only one text segment in the main binary for simplicity and
462       // reduce the overhead of checking multiple ranges during symbolization.
463       if (++NumMatched > 1) {
464         return make_error<StringError>(
465             "We expect only one executable segment in the profiled binary",
466             inconvertibleErrorCode());
467       }
468       ProfiledTextSegmentStart = Entry.Start;
469       ProfiledTextSegmentEnd = Entry.End;
470     }
471   }
472   assert(NumMatched != 0 && "No matching executable segments in segment info.");
473   assert((PreferredTextSegmentAddress == 0 ||
474           (PreferredTextSegmentAddress == ProfiledTextSegmentStart)) &&
475          "Expect text segment address to be 0 or equal to profiled text "
476          "segment start.");
477   return Error::success();
478 }
479 
mapRawProfileToRecords()480 Error RawMemProfReader::mapRawProfileToRecords() {
481   // Hold a mapping from function to each callsite location we encounter within
482   // it that is part of some dynamic allocation context. The location is stored
483   // as a pointer to a symbolized list of inline frames.
484   using LocationPtr = const llvm::SmallVector<FrameId> *;
485   llvm::MapVector<GlobalValue::GUID, llvm::SetVector<LocationPtr>>
486       PerFunctionCallSites;
487 
488   // Convert the raw profile callstack data into memprof records. While doing so
489   // keep track of related contexts so that we can fill these in later.
490   for (const auto &[StackId, MIB] : CallstackProfileData) {
491     auto It = StackMap.find(StackId);
492     if (It == StackMap.end())
493       return make_error<InstrProfError>(
494           instrprof_error::malformed,
495           "memprof callstack record does not contain id: " + Twine(StackId));
496 
497     // Construct the symbolized callstack.
498     llvm::SmallVector<FrameId> Callstack;
499     Callstack.reserve(It->getSecond().size());
500 
501     llvm::ArrayRef<uint64_t> Addresses = It->getSecond();
502     for (size_t I = 0; I < Addresses.size(); I++) {
503       const uint64_t Address = Addresses[I];
504       assert(SymbolizedFrame.count(Address) > 0 &&
505              "Address not found in SymbolizedFrame map");
506       const SmallVector<FrameId> &Frames = SymbolizedFrame[Address];
507 
508       assert(!idToFrame(Frames.back()).IsInlineFrame &&
509              "The last frame should not be inlined");
510 
511       // Record the callsites for each function. Skip the first frame of the
512       // first address since it is the allocation site itself that is recorded
513       // as an alloc site.
514       for (size_t J = 0; J < Frames.size(); J++) {
515         if (I == 0 && J == 0)
516           continue;
517         // We attach the entire bottom-up frame here for the callsite even
518         // though we only need the frames up to and including the frame for
519         // Frames[J].Function. This will enable better deduplication for
520         // compression in the future.
521         const GlobalValue::GUID Guid = idToFrame(Frames[J]).Function;
522         PerFunctionCallSites[Guid].insert(&Frames);
523       }
524 
525       // Add all the frames to the current allocation callstack.
526       Callstack.append(Frames.begin(), Frames.end());
527     }
528 
529     CallStackId CSId = hashCallStack(Callstack);
530     CSIdToCallStack.insert({CSId, Callstack});
531 
532     // We attach the memprof record to each function bottom-up including the
533     // first non-inline frame.
534     for (size_t I = 0; /*Break out using the condition below*/; I++) {
535       const Frame &F = idToFrame(Callstack[I]);
536       auto Result =
537           FunctionProfileData.insert({F.Function, IndexedMemProfRecord()});
538       IndexedMemProfRecord &Record = Result.first->second;
539       Record.AllocSites.emplace_back(Callstack, CSId, MIB);
540 
541       if (!F.IsInlineFrame)
542         break;
543     }
544   }
545 
546   // Fill in the related callsites per function.
547   for (const auto &[Id, Locs] : PerFunctionCallSites) {
548     // Some functions may have only callsite data and no allocation data. Here
549     // we insert a new entry for callsite data if we need to.
550     auto Result = FunctionProfileData.insert({Id, IndexedMemProfRecord()});
551     IndexedMemProfRecord &Record = Result.first->second;
552     for (LocationPtr Loc : Locs) {
553       CallStackId CSId = hashCallStack(*Loc);
554       CSIdToCallStack.insert({CSId, *Loc});
555       Record.CallSites.push_back(*Loc);
556       Record.CallSiteIds.push_back(CSId);
557     }
558   }
559 
560   verifyFunctionProfileData(FunctionProfileData);
561 
562   return Error::success();
563 }
564 
symbolizeAndFilterStackFrames(std::unique_ptr<llvm::symbolize::SymbolizableModule> Symbolizer)565 Error RawMemProfReader::symbolizeAndFilterStackFrames(
566     std::unique_ptr<llvm::symbolize::SymbolizableModule> Symbolizer) {
567   // The specifier to use when symbolization is requested.
568   const DILineInfoSpecifier Specifier(
569       DILineInfoSpecifier::FileLineInfoKind::RawValue,
570       DILineInfoSpecifier::FunctionNameKind::LinkageName);
571 
572   // For entries where all PCs in the callstack are discarded, we erase the
573   // entry from the stack map.
574   llvm::SmallVector<uint64_t> EntriesToErase;
575   // We keep track of all prior discarded entries so that we can avoid invoking
576   // the symbolizer for such entries.
577   llvm::DenseSet<uint64_t> AllVAddrsToDiscard;
578   for (auto &Entry : StackMap) {
579     for (const uint64_t VAddr : Entry.getSecond()) {
580       // Check if we have already symbolized and cached the result or if we
581       // don't want to attempt symbolization since we know this address is bad.
582       // In this case the address is also removed from the current callstack.
583       if (SymbolizedFrame.count(VAddr) > 0 ||
584           AllVAddrsToDiscard.contains(VAddr))
585         continue;
586 
587       Expected<DIInliningInfo> DIOr = Symbolizer->symbolizeInlinedCode(
588           getModuleOffset(VAddr), Specifier, /*UseSymbolTable=*/false);
589       if (!DIOr)
590         return DIOr.takeError();
591       DIInliningInfo DI = DIOr.get();
592 
593       // Drop frames which we can't symbolize or if they belong to the runtime.
594       if (DI.getFrame(0).FunctionName == DILineInfo::BadString ||
595           isRuntimePath(DI.getFrame(0).FileName)) {
596         AllVAddrsToDiscard.insert(VAddr);
597         continue;
598       }
599 
600       for (size_t I = 0, NumFrames = DI.getNumberOfFrames(); I < NumFrames;
601            I++) {
602         const auto &DIFrame = DI.getFrame(I);
603         const uint64_t Guid =
604             IndexedMemProfRecord::getGUID(DIFrame.FunctionName);
605         const Frame F(Guid, DIFrame.Line - DIFrame.StartLine, DIFrame.Column,
606                       // Only the last entry is not an inlined location.
607                       I != NumFrames - 1);
608         // Here we retain a mapping from the GUID to canonical symbol name
609         // instead of adding it to the frame object directly to reduce memory
610         // overhead. This is because there can be many unique frames,
611         // particularly for callsite frames.
612         if (KeepSymbolName) {
613           StringRef CanonicalName =
614               sampleprof::FunctionSamples::getCanonicalFnName(
615                   DIFrame.FunctionName);
616           GuidToSymbolName.insert({Guid, CanonicalName.str()});
617         }
618 
619         const FrameId Hash = F.hash();
620         IdToFrame.insert({Hash, F});
621         SymbolizedFrame[VAddr].push_back(Hash);
622       }
623     }
624 
625     auto &CallStack = Entry.getSecond();
626     llvm::erase_if(CallStack, [&AllVAddrsToDiscard](const uint64_t A) {
627       return AllVAddrsToDiscard.contains(A);
628     });
629     if (CallStack.empty())
630       EntriesToErase.push_back(Entry.getFirst());
631   }
632 
633   // Drop the entries where the callstack is empty.
634   for (const uint64_t Id : EntriesToErase) {
635     StackMap.erase(Id);
636     if(CallstackProfileData[Id].AccessHistogramSize > 0)
637       free((void*) CallstackProfileData[Id].AccessHistogram);
638     CallstackProfileData.erase(Id);
639   }
640 
641   if (StackMap.empty())
642     return make_error<InstrProfError>(
643         instrprof_error::malformed,
644         "no entries in callstack map after symbolization");
645 
646   return Error::success();
647 }
648 
649 std::vector<std::string>
peekBuildIds(MemoryBuffer * DataBuffer)650 RawMemProfReader::peekBuildIds(MemoryBuffer *DataBuffer) {
651   const char *Next = DataBuffer->getBufferStart();
652   // Use a SetVector since a profile file may contain multiple raw profile
653   // dumps, each with segment information. We want them unique and in order they
654   // were stored in the profile; the profiled binary should be the first entry.
655   // The runtime uses dl_iterate_phdr and the "... first object visited by
656   // callback is the main program."
657   // https://man7.org/linux/man-pages/man3/dl_iterate_phdr.3.html
658   llvm::SetVector<std::string, std::vector<std::string>,
659                   llvm::SmallSet<std::string, 10>>
660       BuildIds;
661   while (Next < DataBuffer->getBufferEnd()) {
662     const auto *Header = reinterpret_cast<const memprof::Header *>(Next);
663 
664     const llvm::SmallVector<SegmentEntry> Entries =
665         readSegmentEntries(Next + Header->SegmentOffset);
666 
667     for (const auto &Entry : Entries)
668       BuildIds.insert(getBuildIdString(Entry));
669 
670     Next += Header->TotalSize;
671   }
672   return BuildIds.takeVector();
673 }
674 
675 // FIXME: Add a schema for serializing similiar to IndexedMemprofReader. This
676 // will help being able to deserialize different versions raw memprof versions
677 // more easily.
678 llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>>
readMemInfoBlocks(const char * Ptr)679 RawMemProfReader::readMemInfoBlocks(const char *Ptr) {
680   if (MemprofRawVersion == 3ULL)
681     return readMemInfoBlocksV3(Ptr);
682   if (MemprofRawVersion == 4ULL)
683     return readMemInfoBlocksV4(Ptr);
684   llvm_unreachable(
685       "Panic: Unsupported version number when reading MemInfoBlocks");
686 }
687 
readRawProfile(std::unique_ptr<MemoryBuffer> DataBuffer)688 Error RawMemProfReader::readRawProfile(
689     std::unique_ptr<MemoryBuffer> DataBuffer) {
690   const char *Next = DataBuffer->getBufferStart();
691 
692   while (Next < DataBuffer->getBufferEnd()) {
693     const auto *Header = reinterpret_cast<const memprof::Header *>(Next);
694 
695     // Set Reader version to memprof raw version of profile. Checking if version
696     // is supported is checked before creating the reader.
697     MemprofRawVersion = Header->Version;
698 
699     // Read in the segment information, check whether its the same across all
700     // profiles in this binary file.
701     const llvm::SmallVector<SegmentEntry> Entries =
702         readSegmentEntries(Next + Header->SegmentOffset);
703     if (!SegmentInfo.empty() && SegmentInfo != Entries) {
704       // We do not expect segment information to change when deserializing from
705       // the same binary profile file. This can happen if dynamic libraries are
706       // loaded/unloaded between profile dumping.
707       return make_error<InstrProfError>(
708           instrprof_error::malformed,
709           "memprof raw profile has different segment information");
710     }
711     SegmentInfo.assign(Entries.begin(), Entries.end());
712 
713     // Read in the MemInfoBlocks. Merge them based on stack id - we assume that
714     // raw profiles in the same binary file are from the same process so the
715     // stackdepot ids are the same.
716     for (const auto &[Id, MIB] : readMemInfoBlocks(Next + Header->MIBOffset)) {
717       if (CallstackProfileData.count(Id)) {
718 
719         if (MemprofRawVersion >= 4ULL &&
720             (CallstackProfileData[Id].AccessHistogramSize > 0 ||
721              MIB.AccessHistogramSize > 0)) {
722           uintptr_t ShorterHistogram;
723           if (CallstackProfileData[Id].AccessHistogramSize >
724               MIB.AccessHistogramSize)
725             ShorterHistogram = MIB.AccessHistogram;
726           else
727             ShorterHistogram = CallstackProfileData[Id].AccessHistogram;
728           CallstackProfileData[Id].Merge(MIB);
729           free((void *)ShorterHistogram);
730         } else {
731           CallstackProfileData[Id].Merge(MIB);
732         }
733       } else {
734         CallstackProfileData[Id] = MIB;
735       }
736     }
737 
738     // Read in the callstack for each ids. For multiple raw profiles in the same
739     // file, we expect that the callstack is the same for a unique id.
740     const CallStackMap CSM = readStackInfo(Next + Header->StackOffset);
741     if (StackMap.empty()) {
742       StackMap = CSM;
743     } else {
744       if (mergeStackMap(CSM, StackMap))
745         return make_error<InstrProfError>(
746             instrprof_error::malformed,
747             "memprof raw profile got different call stack for same id");
748     }
749 
750     Next += Header->TotalSize;
751   }
752 
753   return Error::success();
754 }
755 
756 object::SectionedAddress
getModuleOffset(const uint64_t VirtualAddress)757 RawMemProfReader::getModuleOffset(const uint64_t VirtualAddress) {
758   if (VirtualAddress > ProfiledTextSegmentStart &&
759       VirtualAddress <= ProfiledTextSegmentEnd) {
760     // For PIE binaries, the preferred address is zero and we adjust the virtual
761     // address by start of the profiled segment assuming that the offset of the
762     // segment in the binary is zero. For non-PIE binaries the preferred and
763     // profiled segment addresses should be equal and this is a no-op.
764     const uint64_t AdjustedAddress =
765         VirtualAddress + PreferredTextSegmentAddress - ProfiledTextSegmentStart;
766     return object::SectionedAddress{AdjustedAddress};
767   }
768   // Addresses which do not originate from the profiled text segment in the
769   // binary are not adjusted. These will fail symbolization and be filtered out
770   // during processing.
771   return object::SectionedAddress{VirtualAddress};
772 }
773 
readNextRecord(GuidMemProfRecordPair & GuidRecord,std::function<const Frame (const FrameId)> Callback)774 Error RawMemProfReader::readNextRecord(
775     GuidMemProfRecordPair &GuidRecord,
776     std::function<const Frame(const FrameId)> Callback) {
777   // Create a new callback for the RawMemProfRecord iterator so that we can
778   // provide the symbol name if the reader was initialized with KeepSymbolName =
779   // true. This is useful for debugging and testing.
780   auto IdToFrameCallback = [this](const FrameId Id) {
781     Frame F = this->idToFrame(Id);
782     if (!this->KeepSymbolName)
783       return F;
784     auto Iter = this->GuidToSymbolName.find(F.Function);
785     assert(Iter != this->GuidToSymbolName.end());
786     F.SymbolName = std::make_unique<std::string>(Iter->getSecond());
787     return F;
788   };
789   return MemProfReader::readNextRecord(GuidRecord, IdToFrameCallback);
790 }
791 } // namespace memprof
792 } // namespace llvm
793