xref: /freebsd/contrib/llvm-project/llvm/include/llvm/ProfileData/MemProf.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 #ifndef LLVM_PROFILEDATA_MEMPROF_H_
2 #define LLVM_PROFILEDATA_MEMPROF_H_
3 
4 #include "llvm/ADT/MapVector.h"
5 #include "llvm/ADT/STLForwardCompat.h"
6 #include "llvm/ADT/STLFunctionalExtras.h"
7 #include "llvm/ADT/SmallVector.h"
8 #include "llvm/IR/GlobalValue.h"
9 #include "llvm/ProfileData/MemProfData.inc"
10 #include "llvm/Support/Endian.h"
11 #include "llvm/Support/EndianStream.h"
12 #include "llvm/Support/raw_ostream.h"
13 
14 #include <bitset>
15 #include <cstdint>
16 #include <optional>
17 
18 namespace llvm {
19 namespace memprof {
20 
21 struct MemProfRecord;
22 
23 // The versions of the indexed MemProf format
24 enum IndexedVersion : uint64_t {
25   // Version 0: This version didn't have a version field.
26   Version0 = 0,
27   // Version 1: Added a version field to the header.
28   Version1 = 1,
29   // Version 2: Added a call stack table.
30   Version2 = 2,
31   // Version 3: Added a radix tree for call stacks.  Switched to linear IDs for
32   // frames and call stacks.
33   Version3 = 3,
34 };
35 
36 constexpr uint64_t MinimumSupportedVersion = Version0;
37 constexpr uint64_t MaximumSupportedVersion = Version3;
38 
39 // Verify that the minimum and maximum satisfy the obvious constraint.
40 static_assert(MinimumSupportedVersion <= MaximumSupportedVersion);
41 
42 enum class Meta : uint64_t {
43   Start = 0,
44 #define MIBEntryDef(NameTag, Name, Type) NameTag,
45 #include "llvm/ProfileData/MIBEntryDef.inc"
46 #undef MIBEntryDef
47   Size
48 };
49 
50 using MemProfSchema = llvm::SmallVector<Meta, static_cast<int>(Meta::Size)>;
51 
52 // Returns the full schema currently in use.
53 MemProfSchema getFullSchema();
54 
55 // Returns the schema consisting of the fields used for hot cold memory hinting.
56 MemProfSchema getHotColdSchema();
57 
58 // Holds the actual MemInfoBlock data with all fields. Contents may be read or
59 // written partially by providing an appropriate schema to the serialize and
60 // deserialize methods.
61 struct PortableMemInfoBlock {
62   PortableMemInfoBlock() = default;
PortableMemInfoBlockPortableMemInfoBlock63   explicit PortableMemInfoBlock(const MemInfoBlock &Block,
64                                 const MemProfSchema &IncomingSchema) {
65     for (const Meta Id : IncomingSchema)
66       Schema.set(llvm::to_underlying(Id));
67 #define MIBEntryDef(NameTag, Name, Type) Name = Block.Name;
68 #include "llvm/ProfileData/MIBEntryDef.inc"
69 #undef MIBEntryDef
70   }
71 
PortableMemInfoBlockPortableMemInfoBlock72   PortableMemInfoBlock(const MemProfSchema &Schema, const unsigned char *Ptr) {
73     deserialize(Schema, Ptr);
74   }
75 
76   // Read the contents of \p Ptr based on the \p Schema to populate the
77   // MemInfoBlock member.
deserializePortableMemInfoBlock78   void deserialize(const MemProfSchema &IncomingSchema,
79                    const unsigned char *Ptr) {
80     using namespace support;
81 
82     Schema.reset();
83     for (const Meta Id : IncomingSchema) {
84       switch (Id) {
85 #define MIBEntryDef(NameTag, Name, Type)                                       \
86   case Meta::Name: {                                                           \
87     Name = endian::readNext<Type, llvm::endianness::little>(Ptr);              \
88   } break;
89 #include "llvm/ProfileData/MIBEntryDef.inc"
90 #undef MIBEntryDef
91       default:
92         llvm_unreachable("Unknown meta type id, is the profile collected from "
93                          "a newer version of the runtime?");
94       }
95 
96       Schema.set(llvm::to_underlying(Id));
97     }
98   }
99 
100   // Write the contents of the MemInfoBlock based on the \p Schema provided to
101   // the raw_ostream \p OS.
serializePortableMemInfoBlock102   void serialize(const MemProfSchema &Schema, raw_ostream &OS) const {
103     using namespace support;
104 
105     endian::Writer LE(OS, llvm::endianness::little);
106     for (const Meta Id : Schema) {
107       switch (Id) {
108 #define MIBEntryDef(NameTag, Name, Type)                                       \
109   case Meta::Name: {                                                           \
110     LE.write<Type>(Name);                                                      \
111   } break;
112 #include "llvm/ProfileData/MIBEntryDef.inc"
113 #undef MIBEntryDef
114       default:
115         llvm_unreachable("Unknown meta type id, invalid input?");
116       }
117     }
118   }
119 
120   // Print out the contents of the MemInfoBlock in YAML format.
printYAMLPortableMemInfoBlock121   void printYAML(raw_ostream &OS) const {
122     OS << "      MemInfoBlock:\n";
123 #define MIBEntryDef(NameTag, Name, Type)                                       \
124   OS << "        " << #Name << ": " << Name << "\n";
125 #include "llvm/ProfileData/MIBEntryDef.inc"
126 #undef MIBEntryDef
127     if (AccessHistogramSize > 0) {
128       OS << "        " << "AccessHistogramValues" << ":";
129       for (uint32_t I = 0; I < AccessHistogramSize; ++I) {
130         OS << " " << ((uint64_t *)AccessHistogram)[I];
131       }
132       OS << "\n";
133     }
134   }
135 
136   // Return the schema, only for unit tests.
getSchemaPortableMemInfoBlock137   std::bitset<llvm::to_underlying(Meta::Size)> getSchema() const {
138     return Schema;
139   }
140 
141   // Define getters for each type which can be called by analyses.
142 #define MIBEntryDef(NameTag, Name, Type)                                       \
143   Type get##Name() const {                                                     \
144     assert(Schema[llvm::to_underlying(Meta::Name)]);                           \
145     return Name;                                                               \
146   }
147 #include "llvm/ProfileData/MIBEntryDef.inc"
148 #undef MIBEntryDef
149 
clearPortableMemInfoBlock150   void clear() { *this = PortableMemInfoBlock(); }
151 
152   bool operator==(const PortableMemInfoBlock &Other) const {
153     if (Other.Schema != Schema)
154       return false;
155 
156 #define MIBEntryDef(NameTag, Name, Type)                                       \
157   if (Schema[llvm::to_underlying(Meta::Name)] &&                               \
158       Other.get##Name() != get##Name())                                        \
159     return false;
160 #include "llvm/ProfileData/MIBEntryDef.inc"
161 #undef MIBEntryDef
162     return true;
163   }
164 
165   bool operator!=(const PortableMemInfoBlock &Other) const {
166     return !operator==(Other);
167   }
168 
serializedSizePortableMemInfoBlock169   static size_t serializedSize(const MemProfSchema &Schema) {
170     size_t Result = 0;
171 
172     for (const Meta Id : Schema) {
173       switch (Id) {
174 #define MIBEntryDef(NameTag, Name, Type)                                       \
175   case Meta::Name: {                                                           \
176     Result += sizeof(Type);                                                    \
177   } break;
178 #include "llvm/ProfileData/MIBEntryDef.inc"
179 #undef MIBEntryDef
180       default:
181         llvm_unreachable("Unknown meta type id, invalid input?");
182       }
183     }
184 
185     return Result;
186   }
187 
188 private:
189   // The set of available fields, indexed by Meta::Name.
190   std::bitset<llvm::to_underlying(Meta::Size)> Schema;
191 
192 #define MIBEntryDef(NameTag, Name, Type) Type Name = Type();
193 #include "llvm/ProfileData/MIBEntryDef.inc"
194 #undef MIBEntryDef
195 };
196 
197 // A type representing the id generated by hashing the contents of the Frame.
198 using FrameId = uint64_t;
199 // A type representing the id to index into the frame array.
200 using LinearFrameId = uint32_t;
201 // Describes a call frame for a dynamic allocation context. The contents of
202 // the frame are populated by symbolizing the stack depot call frame from the
203 // compiler runtime.
204 struct Frame {
205   // A uuid (uint64_t) identifying the function. It is obtained by
206   // llvm::md5(FunctionName) which returns the lower 64 bits.
207   GlobalValue::GUID Function;
208   // The symbol name for the function. Only populated in the Frame by the reader
209   // if requested during initialization. This field should not be serialized.
210   std::unique_ptr<std::string> SymbolName;
211   // The source line offset of the call from the beginning of parent function.
212   uint32_t LineOffset;
213   // The source column number of the call to help distinguish multiple calls
214   // on the same line.
215   uint32_t Column;
216   // Whether the current frame is inlined.
217   bool IsInlineFrame;
218 
FrameFrame219   Frame(const Frame &Other) {
220     Function = Other.Function;
221     SymbolName = Other.SymbolName
222                      ? std::make_unique<std::string>(*Other.SymbolName)
223                      : nullptr;
224     LineOffset = Other.LineOffset;
225     Column = Other.Column;
226     IsInlineFrame = Other.IsInlineFrame;
227   }
228 
FrameFrame229   Frame(GlobalValue::GUID Hash, uint32_t Off, uint32_t Col, bool Inline)
230       : Function(Hash), LineOffset(Off), Column(Col), IsInlineFrame(Inline) {}
231 
232   bool operator==(const Frame &Other) const {
233     // Ignore the SymbolName field to avoid a string compare. Comparing the
234     // function hash serves the same purpose.
235     return Other.Function == Function && Other.LineOffset == LineOffset &&
236            Other.Column == Column && Other.IsInlineFrame == IsInlineFrame;
237   }
238 
239   Frame &operator=(const Frame &Other) {
240     Function = Other.Function;
241     SymbolName = Other.SymbolName
242                      ? std::make_unique<std::string>(*Other.SymbolName)
243                      : nullptr;
244     LineOffset = Other.LineOffset;
245     Column = Other.Column;
246     IsInlineFrame = Other.IsInlineFrame;
247     return *this;
248   }
249 
250   bool operator!=(const Frame &Other) const { return !operator==(Other); }
251 
hasSymbolNameFrame252   bool hasSymbolName() const { return !!SymbolName; }
253 
getSymbolNameFrame254   StringRef getSymbolName() const {
255     assert(hasSymbolName());
256     return *SymbolName;
257   }
258 
getSymbolNameOrFrame259   std::string getSymbolNameOr(StringRef Alt) const {
260     return std::string(hasSymbolName() ? getSymbolName() : Alt);
261   }
262 
263   // Write the contents of the frame to the ostream \p OS.
serializeFrame264   void serialize(raw_ostream &OS) const {
265     using namespace support;
266 
267     endian::Writer LE(OS, llvm::endianness::little);
268 
269     // If the type of the GlobalValue::GUID changes, then we need to update
270     // the reader and the writer.
271     static_assert(std::is_same<GlobalValue::GUID, uint64_t>::value,
272                   "Expect GUID to be uint64_t.");
273     LE.write<uint64_t>(Function);
274 
275     LE.write<uint32_t>(LineOffset);
276     LE.write<uint32_t>(Column);
277     LE.write<bool>(IsInlineFrame);
278   }
279 
280   // Read a frame from char data which has been serialized as little endian.
deserializeFrame281   static Frame deserialize(const unsigned char *Ptr) {
282     using namespace support;
283 
284     const uint64_t F =
285         endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
286     const uint32_t L =
287         endian::readNext<uint32_t, llvm::endianness::little>(Ptr);
288     const uint32_t C =
289         endian::readNext<uint32_t, llvm::endianness::little>(Ptr);
290     const bool I = endian::readNext<bool, llvm::endianness::little>(Ptr);
291     return Frame(/*Function=*/F, /*LineOffset=*/L, /*Column=*/C,
292                  /*IsInlineFrame=*/I);
293   }
294 
295   // Returns the size of the frame information.
serializedSizeFrame296   static constexpr size_t serializedSize() {
297     return sizeof(Frame::Function) + sizeof(Frame::LineOffset) +
298            sizeof(Frame::Column) + sizeof(Frame::IsInlineFrame);
299   }
300 
301   // Print the frame information in YAML format.
printYAMLFrame302   void printYAML(raw_ostream &OS) const {
303     OS << "      -\n"
304        << "        Function: " << Function << "\n"
305        << "        SymbolName: " << getSymbolNameOr("<None>") << "\n"
306        << "        LineOffset: " << LineOffset << "\n"
307        << "        Column: " << Column << "\n"
308        << "        Inline: " << IsInlineFrame << "\n";
309   }
310 
311   // Return a hash value based on the contents of the frame. Here we don't use
312   // hashing from llvm ADT since we are going to persist the hash id, the hash
313   // combine algorithm in ADT uses a new randomized seed each time.
hashFrame314   inline FrameId hash() const {
315     auto HashCombine = [](auto Value, size_t Seed) {
316       std::hash<decltype(Value)> Hasher;
317       // The constant used below is the 64 bit representation of the fractional
318       // part of the golden ratio. Used here for the randomness in their bit
319       // pattern.
320       return Hasher(Value) + 0x9e3779b97f4a7c15 + (Seed << 6) + (Seed >> 2);
321     };
322 
323     size_t Result = 0;
324     Result ^= HashCombine(Function, Result);
325     Result ^= HashCombine(LineOffset, Result);
326     Result ^= HashCombine(Column, Result);
327     Result ^= HashCombine(IsInlineFrame, Result);
328     return static_cast<FrameId>(Result);
329   }
330 };
331 
332 // A type representing the index into the table of call stacks.
333 using CallStackId = uint64_t;
334 
335 // A type representing the index into the call stack array.
336 using LinearCallStackId = uint32_t;
337 
338 // Holds allocation information in a space efficient format where frames are
339 // represented using unique identifiers.
340 struct IndexedAllocationInfo {
341   // The dynamic calling context for the allocation in bottom-up (leaf-to-root)
342   // order. Frame contents are stored out-of-line.
343   // TODO: Remove once we fully transition to CSId.
344   llvm::SmallVector<FrameId> CallStack;
345   // Conceptually the same as above.  We are going to keep both CallStack and
346   // CallStackId while we are transitioning from CallStack to CallStackId.
347   CallStackId CSId = 0;
348   // The statistics obtained from the runtime for the allocation.
349   PortableMemInfoBlock Info;
350 
351   IndexedAllocationInfo() = default;
352   IndexedAllocationInfo(ArrayRef<FrameId> CS, CallStackId CSId,
353                         const MemInfoBlock &MB,
354                         const MemProfSchema &Schema = getFullSchema())
355       : CallStack(CS.begin(), CS.end()), CSId(CSId), Info(MB, Schema) {}
356 
357   // Returns the size in bytes when this allocation info struct is serialized.
358   size_t serializedSize(const MemProfSchema &Schema,
359                         IndexedVersion Version) const;
360 
361   bool operator==(const IndexedAllocationInfo &Other) const {
362     if (Other.Info != Info)
363       return false;
364 
365     if (Other.CSId != CSId)
366       return false;
367     return true;
368   }
369 
370   bool operator!=(const IndexedAllocationInfo &Other) const {
371     return !operator==(Other);
372   }
373 };
374 
375 // Holds allocation information with frame contents inline. The type should
376 // be used for temporary in-memory instances.
377 struct AllocationInfo {
378   // Same as IndexedAllocationInfo::CallStack with the frame contents inline.
379   std::vector<Frame> CallStack;
380   // Same as IndexedAllocationInfo::Info;
381   PortableMemInfoBlock Info;
382 
383   AllocationInfo() = default;
AllocationInfoAllocationInfo384   AllocationInfo(
385       const IndexedAllocationInfo &IndexedAI,
386       llvm::function_ref<const Frame(const FrameId)> IdToFrameCallback) {
387     for (const FrameId &Id : IndexedAI.CallStack) {
388       CallStack.push_back(IdToFrameCallback(Id));
389     }
390     Info = IndexedAI.Info;
391   }
392 
printYAMLAllocationInfo393   void printYAML(raw_ostream &OS) const {
394     OS << "    -\n";
395     OS << "      Callstack:\n";
396     // TODO: Print out the frame on one line with to make it easier for deep
397     // callstacks once we have a test to check valid YAML is generated.
398     for (const Frame &F : CallStack) {
399       F.printYAML(OS);
400     }
401     Info.printYAML(OS);
402   }
403 };
404 
405 // Holds the memprof profile information for a function. The internal
406 // representation stores frame ids for efficiency. This representation should
407 // be used in the profile conversion and manipulation tools.
408 struct IndexedMemProfRecord {
409   // Memory allocation sites in this function for which we have memory
410   // profiling data.
411   llvm::SmallVector<IndexedAllocationInfo> AllocSites;
412   // Holds call sites in this function which are part of some memory
413   // allocation context. We store this as a list of locations, each with its
414   // list of inline locations in bottom-up order i.e. from leaf to root. The
415   // inline location list may include additional entries, users should pick
416   // the last entry in the list with the same function GUID.
417   llvm::SmallVector<llvm::SmallVector<FrameId>> CallSites;
418   // Conceptually the same as above.  We are going to keep both CallSites and
419   // CallSiteIds while we are transitioning from CallSites to CallSiteIds.
420   llvm::SmallVector<CallStackId> CallSiteIds;
421 
clearIndexedMemProfRecord422   void clear() {
423     AllocSites.clear();
424     CallSites.clear();
425   }
426 
mergeIndexedMemProfRecord427   void merge(const IndexedMemProfRecord &Other) {
428     // TODO: Filter out duplicates which may occur if multiple memprof
429     // profiles are merged together using llvm-profdata.
430     AllocSites.append(Other.AllocSites);
431     CallSites.append(Other.CallSites);
432   }
433 
434   size_t serializedSize(const MemProfSchema &Schema,
435                         IndexedVersion Version) const;
436 
437   bool operator==(const IndexedMemProfRecord &Other) const {
438     if (Other.AllocSites != AllocSites)
439       return false;
440 
441     if (Other.CallSiteIds != CallSiteIds)
442       return false;
443     return true;
444   }
445 
446   // Serializes the memprof records in \p Records to the ostream \p OS based
447   // on the schema provided in \p Schema.
448   void serialize(const MemProfSchema &Schema, raw_ostream &OS,
449                  IndexedVersion Version,
450                  llvm::DenseMap<CallStackId, LinearCallStackId>
451                      *MemProfCallStackIndexes = nullptr) const;
452 
453   // Deserializes memprof records from the Buffer.
454   static IndexedMemProfRecord deserialize(const MemProfSchema &Schema,
455                                           const unsigned char *Buffer,
456                                           IndexedVersion Version);
457 
458   // Convert IndexedMemProfRecord to MemProfRecord.  Callback is used to
459   // translate CallStackId to call stacks with frames inline.
460   MemProfRecord toMemProfRecord(
461       llvm::function_ref<std::vector<Frame>(const CallStackId)> Callback) const;
462 
463   // Returns the GUID for the function name after canonicalization. For
464   // memprof, we remove any .llvm suffix added by LTO. MemProfRecords are
465   // mapped to functions using this GUID.
466   static GlobalValue::GUID getGUID(const StringRef FunctionName);
467 };
468 
469 // Holds the memprof profile information for a function. The internal
470 // representation stores frame contents inline. This representation should
471 // be used for small amount of temporary, in memory instances.
472 struct MemProfRecord {
473   // Same as IndexedMemProfRecord::AllocSites with frame contents inline.
474   llvm::SmallVector<AllocationInfo> AllocSites;
475   // Same as IndexedMemProfRecord::CallSites with frame contents inline.
476   llvm::SmallVector<std::vector<Frame>> CallSites;
477 
478   MemProfRecord() = default;
MemProfRecordMemProfRecord479   MemProfRecord(
480       const IndexedMemProfRecord &Record,
481       llvm::function_ref<const Frame(const FrameId Id)> IdToFrameCallback) {
482     for (const IndexedAllocationInfo &IndexedAI : Record.AllocSites) {
483       AllocSites.emplace_back(IndexedAI, IdToFrameCallback);
484     }
485     for (const ArrayRef<FrameId> Site : Record.CallSites) {
486       std::vector<Frame> Frames;
487       for (const FrameId Id : Site) {
488         Frames.push_back(IdToFrameCallback(Id));
489       }
490       CallSites.push_back(Frames);
491     }
492   }
493 
494   // Prints out the contents of the memprof record in YAML.
printMemProfRecord495   void print(llvm::raw_ostream &OS) const {
496     if (!AllocSites.empty()) {
497       OS << "    AllocSites:\n";
498       for (const AllocationInfo &N : AllocSites)
499         N.printYAML(OS);
500     }
501 
502     if (!CallSites.empty()) {
503       OS << "    CallSites:\n";
504       for (const std::vector<Frame> &Frames : CallSites) {
505         for (const Frame &F : Frames) {
506           OS << "    -\n";
507           F.printYAML(OS);
508         }
509       }
510     }
511   }
512 };
513 
514 // Reads a memprof schema from a buffer. All entries in the buffer are
515 // interpreted as uint64_t. The first entry in the buffer denotes the number of
516 // ids in the schema. Subsequent entries are integers which map to memprof::Meta
517 // enum class entries. After successfully reading the schema, the pointer is one
518 // byte past the schema contents.
519 Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer);
520 
521 // Trait for reading IndexedMemProfRecord data from the on-disk hash table.
522 class RecordLookupTrait {
523 public:
524   using data_type = const IndexedMemProfRecord &;
525   using internal_key_type = uint64_t;
526   using external_key_type = uint64_t;
527   using hash_value_type = uint64_t;
528   using offset_type = uint64_t;
529 
530   RecordLookupTrait() = delete;
RecordLookupTrait(IndexedVersion V,const MemProfSchema & S)531   RecordLookupTrait(IndexedVersion V, const MemProfSchema &S)
532       : Version(V), Schema(S) {}
533 
EqualKey(uint64_t A,uint64_t B)534   static bool EqualKey(uint64_t A, uint64_t B) { return A == B; }
GetInternalKey(uint64_t K)535   static uint64_t GetInternalKey(uint64_t K) { return K; }
GetExternalKey(uint64_t K)536   static uint64_t GetExternalKey(uint64_t K) { return K; }
537 
ComputeHash(uint64_t K)538   hash_value_type ComputeHash(uint64_t K) { return K; }
539 
540   static std::pair<offset_type, offset_type>
ReadKeyDataLength(const unsigned char * & D)541   ReadKeyDataLength(const unsigned char *&D) {
542     using namespace support;
543 
544     offset_type KeyLen =
545         endian::readNext<offset_type, llvm::endianness::little>(D);
546     offset_type DataLen =
547         endian::readNext<offset_type, llvm::endianness::little>(D);
548     return std::make_pair(KeyLen, DataLen);
549   }
550 
ReadKey(const unsigned char * D,offset_type)551   uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) {
552     using namespace support;
553     return endian::readNext<external_key_type, llvm::endianness::little>(D);
554   }
555 
ReadData(uint64_t K,const unsigned char * D,offset_type)556   data_type ReadData(uint64_t K, const unsigned char *D,
557                      offset_type /*Unused*/) {
558     Record = IndexedMemProfRecord::deserialize(Schema, D, Version);
559     return Record;
560   }
561 
562 private:
563   // Holds the MemProf version.
564   IndexedVersion Version;
565   // Holds the memprof schema used to deserialize records.
566   MemProfSchema Schema;
567   // Holds the records from one function deserialized from the indexed format.
568   IndexedMemProfRecord Record;
569 };
570 
571 // Trait for writing IndexedMemProfRecord data to the on-disk hash table.
572 class RecordWriterTrait {
573 public:
574   using key_type = uint64_t;
575   using key_type_ref = uint64_t;
576 
577   using data_type = IndexedMemProfRecord;
578   using data_type_ref = IndexedMemProfRecord &;
579 
580   using hash_value_type = uint64_t;
581   using offset_type = uint64_t;
582 
583 private:
584   // Pointer to the memprof schema to use for the generator.
585   const MemProfSchema *Schema;
586   // The MemProf version to use for the serialization.
587   IndexedVersion Version;
588 
589   // Mappings from CallStackId to the indexes into the call stack array.
590   llvm::DenseMap<CallStackId, LinearCallStackId> *MemProfCallStackIndexes;
591 
592 public:
593   // We do not support the default constructor, which does not set Version.
594   RecordWriterTrait() = delete;
RecordWriterTrait(const MemProfSchema * Schema,IndexedVersion V,llvm::DenseMap<CallStackId,LinearCallStackId> * MemProfCallStackIndexes)595   RecordWriterTrait(
596       const MemProfSchema *Schema, IndexedVersion V,
597       llvm::DenseMap<CallStackId, LinearCallStackId> *MemProfCallStackIndexes)
598       : Schema(Schema), Version(V),
599         MemProfCallStackIndexes(MemProfCallStackIndexes) {}
600 
ComputeHash(key_type_ref K)601   static hash_value_type ComputeHash(key_type_ref K) { return K; }
602 
603   std::pair<offset_type, offset_type>
EmitKeyDataLength(raw_ostream & Out,key_type_ref K,data_type_ref V)604   EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) {
605     using namespace support;
606 
607     endian::Writer LE(Out, llvm::endianness::little);
608     offset_type N = sizeof(K);
609     LE.write<offset_type>(N);
610     offset_type M = V.serializedSize(*Schema, Version);
611     LE.write<offset_type>(M);
612     return std::make_pair(N, M);
613   }
614 
EmitKey(raw_ostream & Out,key_type_ref K,offset_type)615   void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) {
616     using namespace support;
617     endian::Writer LE(Out, llvm::endianness::little);
618     LE.write<uint64_t>(K);
619   }
620 
EmitData(raw_ostream & Out,key_type_ref,data_type_ref V,offset_type)621   void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V,
622                 offset_type /*Unused*/) {
623     assert(Schema != nullptr && "MemProf schema is not initialized!");
624     V.serialize(*Schema, Out, Version, MemProfCallStackIndexes);
625     // Clear the IndexedMemProfRecord which results in clearing/freeing its
626     // vectors of allocs and callsites. This is owned by the associated on-disk
627     // hash table, but unused after this point. See also the comment added to
628     // the client which constructs the on-disk hash table for this trait.
629     V.clear();
630   }
631 };
632 
633 // Trait for writing frame mappings to the on-disk hash table.
634 class FrameWriterTrait {
635 public:
636   using key_type = FrameId;
637   using key_type_ref = FrameId;
638 
639   using data_type = Frame;
640   using data_type_ref = Frame &;
641 
642   using hash_value_type = FrameId;
643   using offset_type = uint64_t;
644 
ComputeHash(key_type_ref K)645   static hash_value_type ComputeHash(key_type_ref K) { return K; }
646 
647   static std::pair<offset_type, offset_type>
EmitKeyDataLength(raw_ostream & Out,key_type_ref K,data_type_ref V)648   EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) {
649     using namespace support;
650     endian::Writer LE(Out, llvm::endianness::little);
651     offset_type N = sizeof(K);
652     LE.write<offset_type>(N);
653     offset_type M = V.serializedSize();
654     LE.write<offset_type>(M);
655     return std::make_pair(N, M);
656   }
657 
EmitKey(raw_ostream & Out,key_type_ref K,offset_type)658   void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) {
659     using namespace support;
660     endian::Writer LE(Out, llvm::endianness::little);
661     LE.write<key_type>(K);
662   }
663 
EmitData(raw_ostream & Out,key_type_ref,data_type_ref V,offset_type)664   void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V,
665                 offset_type /*Unused*/) {
666     V.serialize(Out);
667   }
668 };
669 
670 // Trait for reading frame mappings from the on-disk hash table.
671 class FrameLookupTrait {
672 public:
673   using data_type = const Frame;
674   using internal_key_type = FrameId;
675   using external_key_type = FrameId;
676   using hash_value_type = FrameId;
677   using offset_type = uint64_t;
678 
EqualKey(internal_key_type A,internal_key_type B)679   static bool EqualKey(internal_key_type A, internal_key_type B) {
680     return A == B;
681   }
GetInternalKey(internal_key_type K)682   static uint64_t GetInternalKey(internal_key_type K) { return K; }
GetExternalKey(external_key_type K)683   static uint64_t GetExternalKey(external_key_type K) { return K; }
684 
ComputeHash(internal_key_type K)685   hash_value_type ComputeHash(internal_key_type K) { return K; }
686 
687   static std::pair<offset_type, offset_type>
ReadKeyDataLength(const unsigned char * & D)688   ReadKeyDataLength(const unsigned char *&D) {
689     using namespace support;
690 
691     offset_type KeyLen =
692         endian::readNext<offset_type, llvm::endianness::little>(D);
693     offset_type DataLen =
694         endian::readNext<offset_type, llvm::endianness::little>(D);
695     return std::make_pair(KeyLen, DataLen);
696   }
697 
ReadKey(const unsigned char * D,offset_type)698   uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) {
699     using namespace support;
700     return endian::readNext<external_key_type, llvm::endianness::little>(D);
701   }
702 
ReadData(uint64_t K,const unsigned char * D,offset_type)703   data_type ReadData(uint64_t K, const unsigned char *D,
704                      offset_type /*Unused*/) {
705     return Frame::deserialize(D);
706   }
707 };
708 
709 // Trait for writing call stacks to the on-disk hash table.
710 class CallStackWriterTrait {
711 public:
712   using key_type = CallStackId;
713   using key_type_ref = CallStackId;
714 
715   using data_type = llvm::SmallVector<FrameId>;
716   using data_type_ref = llvm::SmallVector<FrameId> &;
717 
718   using hash_value_type = CallStackId;
719   using offset_type = uint64_t;
720 
ComputeHash(key_type_ref K)721   static hash_value_type ComputeHash(key_type_ref K) { return K; }
722 
723   static std::pair<offset_type, offset_type>
EmitKeyDataLength(raw_ostream & Out,key_type_ref K,data_type_ref V)724   EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) {
725     using namespace support;
726     endian::Writer LE(Out, llvm::endianness::little);
727     // We do not explicitly emit the key length because it is a constant.
728     offset_type N = sizeof(K);
729     offset_type M = sizeof(FrameId) * V.size();
730     LE.write<offset_type>(M);
731     return std::make_pair(N, M);
732   }
733 
EmitKey(raw_ostream & Out,key_type_ref K,offset_type)734   void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) {
735     using namespace support;
736     endian::Writer LE(Out, llvm::endianness::little);
737     LE.write<key_type>(K);
738   }
739 
EmitData(raw_ostream & Out,key_type_ref,data_type_ref V,offset_type)740   void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V,
741                 offset_type /*Unused*/) {
742     using namespace support;
743     endian::Writer LE(Out, llvm::endianness::little);
744     // Emit the frames.  We do not explicitly emit the length of the vector
745     // because it can be inferred from the data length.
746     for (FrameId F : V)
747       LE.write<FrameId>(F);
748   }
749 };
750 
751 // Trait for reading call stack mappings from the on-disk hash table.
752 class CallStackLookupTrait {
753 public:
754   using data_type = const llvm::SmallVector<FrameId>;
755   using internal_key_type = CallStackId;
756   using external_key_type = CallStackId;
757   using hash_value_type = CallStackId;
758   using offset_type = uint64_t;
759 
EqualKey(internal_key_type A,internal_key_type B)760   static bool EqualKey(internal_key_type A, internal_key_type B) {
761     return A == B;
762   }
GetInternalKey(internal_key_type K)763   static uint64_t GetInternalKey(internal_key_type K) { return K; }
GetExternalKey(external_key_type K)764   static uint64_t GetExternalKey(external_key_type K) { return K; }
765 
ComputeHash(internal_key_type K)766   hash_value_type ComputeHash(internal_key_type K) { return K; }
767 
768   static std::pair<offset_type, offset_type>
ReadKeyDataLength(const unsigned char * & D)769   ReadKeyDataLength(const unsigned char *&D) {
770     using namespace support;
771 
772     // We do not explicitly read the key length because it is a constant.
773     offset_type KeyLen = sizeof(external_key_type);
774     offset_type DataLen =
775         endian::readNext<offset_type, llvm::endianness::little>(D);
776     return std::make_pair(KeyLen, DataLen);
777   }
778 
ReadKey(const unsigned char * D,offset_type)779   uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) {
780     using namespace support;
781     return endian::readNext<external_key_type, llvm::endianness::little>(D);
782   }
783 
ReadData(uint64_t K,const unsigned char * D,offset_type Length)784   data_type ReadData(uint64_t K, const unsigned char *D, offset_type Length) {
785     using namespace support;
786     llvm::SmallVector<FrameId> CS;
787     // Derive the number of frames from the data length.
788     uint64_t NumFrames = Length / sizeof(FrameId);
789     assert(Length % sizeof(FrameId) == 0);
790     CS.reserve(NumFrames);
791     for (size_t I = 0; I != NumFrames; ++I) {
792       FrameId F = endian::readNext<FrameId, llvm::endianness::little>(D);
793       CS.push_back(F);
794     }
795     return CS;
796   }
797 };
798 
799 // Compute a CallStackId for a given call stack.
800 CallStackId hashCallStack(ArrayRef<FrameId> CS);
801 
802 namespace detail {
803 // "Dereference" the iterator from DenseMap or OnDiskChainedHashTable.  We have
804 // to do so in one of two different ways depending on the type of the hash
805 // table.
806 template <typename value_type, typename IterTy>
DerefIterator(IterTy Iter)807 value_type DerefIterator(IterTy Iter) {
808   using deref_type = llvm::remove_cvref_t<decltype(*Iter)>;
809   if constexpr (std::is_same_v<deref_type, value_type>)
810     return *Iter;
811   else
812     return Iter->second;
813 }
814 } // namespace detail
815 
816 // A function object that returns a frame for a given FrameId.
817 template <typename MapTy> struct FrameIdConverter {
818   std::optional<FrameId> LastUnmappedId;
819   MapTy &Map;
820 
821   FrameIdConverter() = delete;
FrameIdConverterFrameIdConverter822   FrameIdConverter(MapTy &Map) : Map(Map) {}
823 
824   // Delete the copy constructor and copy assignment operator to avoid a
825   // situation where a copy of FrameIdConverter gets an error in LastUnmappedId
826   // while the original instance doesn't.
827   FrameIdConverter(const FrameIdConverter &) = delete;
828   FrameIdConverter &operator=(const FrameIdConverter &) = delete;
829 
operatorFrameIdConverter830   Frame operator()(FrameId Id) {
831     auto Iter = Map.find(Id);
832     if (Iter == Map.end()) {
833       LastUnmappedId = Id;
834       return Frame(0, 0, 0, false);
835     }
836     return detail::DerefIterator<Frame>(Iter);
837   }
838 };
839 
840 // A function object that returns a call stack for a given CallStackId.
841 template <typename MapTy> struct CallStackIdConverter {
842   std::optional<CallStackId> LastUnmappedId;
843   MapTy &Map;
844   llvm::function_ref<Frame(FrameId)> FrameIdToFrame;
845 
846   CallStackIdConverter() = delete;
CallStackIdConverterCallStackIdConverter847   CallStackIdConverter(MapTy &Map,
848                        llvm::function_ref<Frame(FrameId)> FrameIdToFrame)
849       : Map(Map), FrameIdToFrame(FrameIdToFrame) {}
850 
851   // Delete the copy constructor and copy assignment operator to avoid a
852   // situation where a copy of CallStackIdConverter gets an error in
853   // LastUnmappedId while the original instance doesn't.
854   CallStackIdConverter(const CallStackIdConverter &) = delete;
855   CallStackIdConverter &operator=(const CallStackIdConverter &) = delete;
856 
operatorCallStackIdConverter857   std::vector<Frame> operator()(CallStackId CSId) {
858     std::vector<Frame> Frames;
859     auto CSIter = Map.find(CSId);
860     if (CSIter == Map.end()) {
861       LastUnmappedId = CSId;
862     } else {
863       llvm::SmallVector<FrameId> CS =
864           detail::DerefIterator<llvm::SmallVector<FrameId>>(CSIter);
865       Frames.reserve(CS.size());
866       for (FrameId Id : CS)
867         Frames.push_back(FrameIdToFrame(Id));
868     }
869     return Frames;
870   }
871 };
872 
873 // A function object that returns a Frame stored at a given index into the Frame
874 // array in the profile.
875 struct LinearFrameIdConverter {
876   const unsigned char *FrameBase;
877 
878   LinearFrameIdConverter() = delete;
LinearFrameIdConverterLinearFrameIdConverter879   LinearFrameIdConverter(const unsigned char *FrameBase)
880       : FrameBase(FrameBase) {}
881 
operatorLinearFrameIdConverter882   Frame operator()(LinearFrameId LinearId) {
883     uint64_t Offset = static_cast<uint64_t>(LinearId) * Frame::serializedSize();
884     return Frame::deserialize(FrameBase + Offset);
885   }
886 };
887 
888 // A function object that returns a call stack stored at a given index into the
889 // call stack array in the profile.
890 struct LinearCallStackIdConverter {
891   const unsigned char *CallStackBase;
892   std::function<Frame(LinearFrameId)> FrameIdToFrame;
893 
894   LinearCallStackIdConverter() = delete;
LinearCallStackIdConverterLinearCallStackIdConverter895   LinearCallStackIdConverter(const unsigned char *CallStackBase,
896                              std::function<Frame(LinearFrameId)> FrameIdToFrame)
897       : CallStackBase(CallStackBase), FrameIdToFrame(FrameIdToFrame) {}
898 
operatorLinearCallStackIdConverter899   std::vector<Frame> operator()(LinearCallStackId LinearCSId) {
900     std::vector<Frame> Frames;
901 
902     const unsigned char *Ptr =
903         CallStackBase +
904         static_cast<uint64_t>(LinearCSId) * sizeof(LinearFrameId);
905     uint32_t NumFrames =
906         support::endian::readNext<uint32_t, llvm::endianness::little>(Ptr);
907     Frames.reserve(NumFrames);
908     for (; NumFrames; --NumFrames) {
909       LinearFrameId Elem =
910           support::endian::read<LinearFrameId, llvm::endianness::little>(Ptr);
911       // Follow a pointer to the parent, if any.  See comments below on
912       // CallStackRadixTreeBuilder for the description of the radix tree format.
913       if (static_cast<std::make_signed_t<LinearFrameId>>(Elem) < 0) {
914         Ptr += (-Elem) * sizeof(LinearFrameId);
915         Elem =
916             support::endian::read<LinearFrameId, llvm::endianness::little>(Ptr);
917       }
918       // We shouldn't encounter another pointer.
919       assert(static_cast<std::make_signed_t<LinearFrameId>>(Elem) >= 0);
920       Frames.push_back(FrameIdToFrame(Elem));
921       Ptr += sizeof(LinearFrameId);
922     }
923 
924     return Frames;
925   }
926 };
927 
928 struct IndexedMemProfData {
929   // A map to hold memprof data per function. The lower 64 bits obtained from
930   // the md5 hash of the function name is used to index into the map.
931   llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> Records;
932 
933   // A map to hold frame id to frame mappings. The mappings are used to
934   // convert IndexedMemProfRecord to MemProfRecords with frame information
935   // inline.
936   llvm::MapVector<FrameId, Frame> Frames;
937 
938   // A map to hold call stack id to call stacks.
939   llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>> CallStacks;
940 };
941 
942 struct FrameStat {
943   // The number of occurrences of a given FrameId.
944   uint64_t Count = 0;
945   // The sum of indexes where a given FrameId shows up.
946   uint64_t PositionSum = 0;
947 };
948 
949 // Compute a histogram of Frames in call stacks.
950 llvm::DenseMap<FrameId, FrameStat>
951 computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>>
952                           &MemProfCallStackData);
953 
954 // Construct a radix tree of call stacks.
955 //
956 // A set of call stacks might look like:
957 //
958 // CallStackId 1:  f1 -> f2 -> f3
959 // CallStackId 2:  f1 -> f2 -> f4 -> f5
960 // CallStackId 3:  f1 -> f2 -> f4 -> f6
961 // CallStackId 4:  f7 -> f8 -> f9
962 //
963 // where each fn refers to a stack frame.
964 //
965 // Since we expect a lot of common prefixes, we can compress the call stacks
966 // into a radix tree like:
967 //
968 // CallStackId 1:  f1 -> f2 -> f3
969 //                       |
970 // CallStackId 2:        +---> f4 -> f5
971 //                             |
972 // CallStackId 3:              +---> f6
973 //
974 // CallStackId 4:  f7 -> f8 -> f9
975 //
976 // Now, we are interested in retrieving call stacks for a given CallStackId, so
977 // we just need a pointer from a given call stack to its parent.  For example,
978 // CallStackId 2 would point to CallStackId 1 as a parent.
979 //
980 // We serialize the radix tree above into a single array along with the length
981 // of each call stack and pointers to the parent call stacks.
982 //
983 // Index:              0  1  2  3  4  5  6  7  8  9 10 11 12 13 14
984 // Array:             L3 f9 f8 f7 L4 f6 J3 L4 f5 f4 J3 L3 f3 f2 f1
985 //                     ^           ^        ^           ^
986 //                     |           |        |           |
987 // CallStackId 4:  0 --+           |        |           |
988 // CallStackId 3:  4 --------------+        |           |
989 // CallStackId 2:  7 -----------------------+           |
990 // CallStackId 1: 11 -----------------------------------+
991 //
992 // - LN indicates the length of a call stack, encoded as ordinary integer N.
993 //
994 // - JN indicates a pointer to the parent, encoded as -N.
995 //
996 // The radix tree allows us to reconstruct call stacks in the leaf-to-root
997 // order as we scan the array from left ro right while following pointers to
998 // parents along the way.
999 //
1000 // For example, if we are decoding CallStackId 2, we start a forward traversal
1001 // at Index 7, noting the call stack length of 4 and obtaining f5 and f4.  When
1002 // we see J3 at Index 10, we resume a forward traversal at Index 13 = 10 + 3,
1003 // picking up f2 and f1.  We are done after collecting 4 frames as indicated at
1004 // the beginning of the traversal.
1005 //
1006 // On-disk IndexedMemProfRecord will refer to call stacks by their indexes into
1007 // the radix tree array, so we do not explicitly encode mappings like:
1008 // "CallStackId 1 -> 11".
1009 class CallStackRadixTreeBuilder {
1010   // The radix tree array.
1011   std::vector<LinearFrameId> RadixArray;
1012 
1013   // Mapping from CallStackIds to indexes into RadixArray.
1014   llvm::DenseMap<CallStackId, LinearCallStackId> CallStackPos;
1015 
1016   // In build, we partition a given call stack into two parts -- the prefix
1017   // that's common with the previously encoded call stack and the frames beyond
1018   // the common prefix -- the unique portion.  Then we want to find out where
1019   // the common prefix is stored in RadixArray so that we can link the unique
1020   // portion to the common prefix.  Indexes, declared below, helps with our
1021   // needs.  Intuitively, Indexes tells us where each of the previously encoded
1022   // call stack is stored in RadixArray.  More formally, Indexes satisfies:
1023   //
1024   //   RadixArray[Indexes[I]] == Prev[I]
1025   //
1026   // for every I, where Prev is the the call stack in the root-to-leaf order
1027   // previously encoded by build.  (Note that Prev, as passed to
1028   // encodeCallStack, is in the leaf-to-root order.)
1029   //
1030   // For example, if the call stack being encoded shares 5 frames at the root of
1031   // the call stack with the previously encoded call stack,
1032   // RadixArray[Indexes[0]] is the root frame of the common prefix.
1033   // RadixArray[Indexes[5 - 1]] is the last frame of the common prefix.
1034   std::vector<LinearCallStackId> Indexes;
1035 
1036   using CSIdPair = std::pair<CallStackId, llvm::SmallVector<FrameId>>;
1037 
1038   // Encode a call stack into RadixArray.  Return the starting index within
1039   // RadixArray.
1040   LinearCallStackId encodeCallStack(
1041       const llvm::SmallVector<FrameId> *CallStack,
1042       const llvm::SmallVector<FrameId> *Prev,
1043       const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes);
1044 
1045 public:
1046   CallStackRadixTreeBuilder() = default;
1047 
1048   // Build a radix tree array.
1049   void build(llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>>
1050                  &&MemProfCallStackData,
1051              const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes,
1052              llvm::DenseMap<FrameId, FrameStat> &FrameHistogram);
1053 
getRadixArray()1054   const std::vector<LinearFrameId> &getRadixArray() const { return RadixArray; }
1055 
takeCallStackPos()1056   llvm::DenseMap<CallStackId, LinearCallStackId> takeCallStackPos() {
1057     return std::move(CallStackPos);
1058   }
1059 };
1060 
1061 // Verify that each CallStackId is computed with hashCallStack.  This function
1062 // is intended to help transition from CallStack to CSId in
1063 // IndexedAllocationInfo.
1064 void verifyIndexedMemProfRecord(const IndexedMemProfRecord &Record);
1065 
1066 // Verify that each CallStackId is computed with hashCallStack.  This function
1067 // is intended to help transition from CallStack to CSId in
1068 // IndexedAllocationInfo.
1069 void verifyFunctionProfileData(
1070     const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord>
1071         &FunctionProfileData);
1072 } // namespace memprof
1073 } // namespace llvm
1074 
1075 #endif // LLVM_PROFILEDATA_MEMPROF_H_
1076