xref: /freebsd/contrib/llvm-project/llvm/lib/ProfileData/IndexedMemProfData.cpp (revision 770cf0a5f02dc8983a89c6568d741fbc25baa999)
1 //===- IndexedMemProfData.h - MemProf format support ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // MemProf data is serialized in writeMemProf provided in this file.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/ProfileData/DataAccessProf.h"
14 #include "llvm/ProfileData/InstrProf.h"
15 #include "llvm/ProfileData/InstrProfReader.h"
16 #include "llvm/ProfileData/MemProf.h"
17 #include "llvm/ProfileData/MemProfRadixTree.h"
18 #include "llvm/ProfileData/MemProfSummary.h"
19 #include "llvm/Support/FormatVariadic.h"
20 #include "llvm/Support/OnDiskHashTable.h"
21 
22 namespace llvm {
23 
24 // Serialize Schema.
25 static void writeMemProfSchema(ProfOStream &OS,
26                                const memprof::MemProfSchema &Schema) {
27   OS.write(static_cast<uint64_t>(Schema.size()));
28   for (const auto Id : Schema)
29     OS.write(static_cast<uint64_t>(Id));
30 }
31 
32 // Serialize MemProfRecordData.  Return RecordTableOffset.
33 static uint64_t writeMemProfRecords(
34     ProfOStream &OS,
35     llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord>
36         &MemProfRecordData,
37     memprof::MemProfSchema *Schema, memprof::IndexedVersion Version,
38     llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
39         *MemProfCallStackIndexes = nullptr) {
40   memprof::RecordWriterTrait RecordWriter(Schema, Version,
41                                           MemProfCallStackIndexes);
42   OnDiskChainedHashTableGenerator<memprof::RecordWriterTrait>
43       RecordTableGenerator;
44   for (auto &[GUID, Record] : MemProfRecordData) {
45     // Insert the key (func hash) and value (memprof record).
46     RecordTableGenerator.insert(GUID, Record, RecordWriter);
47   }
48   // Release the memory of this MapVector as it is no longer needed.
49   MemProfRecordData.clear();
50 
51   // The call to Emit invokes RecordWriterTrait::EmitData which destructs
52   // the memprof record copies owned by the RecordTableGenerator. This works
53   // because the RecordTableGenerator is not used after this point.
54   return RecordTableGenerator.Emit(OS.OS, RecordWriter);
55 }
56 
57 // Serialize MemProfFrameData.  Return FrameTableOffset.
58 static uint64_t writeMemProfFrames(
59     ProfOStream &OS,
60     llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData) {
61   OnDiskChainedHashTableGenerator<memprof::FrameWriterTrait>
62       FrameTableGenerator;
63   for (auto &[FrameId, Frame] : MemProfFrameData) {
64     // Insert the key (frame id) and value (frame contents).
65     FrameTableGenerator.insert(FrameId, Frame);
66   }
67   // Release the memory of this MapVector as it is no longer needed.
68   MemProfFrameData.clear();
69 
70   return FrameTableGenerator.Emit(OS.OS);
71 }
72 
73 // Serialize MemProfFrameData.  Return the mapping from FrameIds to their
74 // indexes within the frame array.
75 static llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId>
76 writeMemProfFrameArray(
77     ProfOStream &OS,
78     llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData,
79     llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram) {
80   // Mappings from FrameIds to array indexes.
81   llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes;
82 
83   // Compute the order in which we serialize Frames.  The order does not matter
84   // in terms of correctness, but we still compute it for deserialization
85   // performance.  Specifically, if we serialize frequently used Frames one
86   // after another, we have better cache utilization.  For two Frames that
87   // appear equally frequently, we break a tie by serializing the one that tends
88   // to appear earlier in call stacks.  We implement the tie-breaking mechanism
89   // by computing the sum of indexes within call stacks for each Frame.  If we
90   // still have a tie, then we just resort to compare two FrameIds, which is
91   // just for stability of output.
92   std::vector<std::pair<memprof::FrameId, const memprof::Frame *>> FrameIdOrder;
93   FrameIdOrder.reserve(MemProfFrameData.size());
94   for (const auto &[Id, Frame] : MemProfFrameData)
95     FrameIdOrder.emplace_back(Id, &Frame);
96   assert(MemProfFrameData.size() == FrameIdOrder.size());
97   llvm::sort(FrameIdOrder,
98              [&](const std::pair<memprof::FrameId, const memprof::Frame *> &L,
99                  const std::pair<memprof::FrameId, const memprof::Frame *> &R) {
100                const auto &SL = FrameHistogram[L.first];
101                const auto &SR = FrameHistogram[R.first];
102                // Popular FrameIds should come first.
103                if (SL.Count != SR.Count)
104                  return SL.Count > SR.Count;
105                // If they are equally popular, then the one that tends to appear
106                // earlier in call stacks should come first.
107                if (SL.PositionSum != SR.PositionSum)
108                  return SL.PositionSum < SR.PositionSum;
109                // Compare their FrameIds for sort stability.
110                return L.first < R.first;
111              });
112 
113   // Serialize all frames while creating mappings from linear IDs to FrameIds.
114   uint64_t Index = 0;
115   MemProfFrameIndexes.reserve(FrameIdOrder.size());
116   for (const auto &[Id, F] : FrameIdOrder) {
117     F->serialize(OS.OS);
118     MemProfFrameIndexes.insert({Id, Index});
119     ++Index;
120   }
121   assert(MemProfFrameData.size() == Index);
122   assert(MemProfFrameData.size() == MemProfFrameIndexes.size());
123 
124   // Release the memory of this MapVector as it is no longer needed.
125   MemProfFrameData.clear();
126 
127   return MemProfFrameIndexes;
128 }
129 
130 static uint64_t writeMemProfCallStacks(
131     ProfOStream &OS,
132     llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
133         &MemProfCallStackData) {
134   OnDiskChainedHashTableGenerator<memprof::CallStackWriterTrait>
135       CallStackTableGenerator;
136   for (auto &[CSId, CallStack] : MemProfCallStackData)
137     CallStackTableGenerator.insert(CSId, CallStack);
138   // Release the memory of this vector as it is no longer needed.
139   MemProfCallStackData.clear();
140 
141   return CallStackTableGenerator.Emit(OS.OS);
142 }
143 
144 static llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
145 writeMemProfCallStackArray(
146     ProfOStream &OS,
147     llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
148         &MemProfCallStackData,
149     llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId>
150         &MemProfFrameIndexes,
151     llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram,
152     unsigned &NumElements) {
153   llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
154       MemProfCallStackIndexes;
155 
156   memprof::CallStackRadixTreeBuilder<memprof::FrameId> Builder;
157   Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes,
158                 FrameHistogram);
159   for (auto I : Builder.getRadixArray())
160     OS.write32(I);
161   NumElements = Builder.getRadixArray().size();
162   MemProfCallStackIndexes = Builder.takeCallStackPos();
163 
164   // Release the memory of this vector as it is no longer needed.
165   MemProfCallStackData.clear();
166 
167   return MemProfCallStackIndexes;
168 }
169 
170 // Write out MemProf Version2 as follows:
171 // uint64_t Version
172 // uint64_t RecordTableOffset = RecordTableGenerator.Emit
173 // uint64_t FramePayloadOffset = Offset for the frame payload
174 // uint64_t FrameTableOffset = FrameTableGenerator.Emit
175 // uint64_t CallStackPayloadOffset = Offset for the call stack payload (NEW V2)
176 // uint64_t CallStackTableOffset = CallStackTableGenerator.Emit (NEW in V2)
177 // uint64_t Num schema entries
178 // uint64_t Schema entry 0
179 // uint64_t Schema entry 1
180 // ....
181 // uint64_t Schema entry N - 1
182 // OnDiskChainedHashTable MemProfRecordData
183 // OnDiskChainedHashTable MemProfFrameData
184 // OnDiskChainedHashTable MemProfCallStackData (NEW in V2)
185 static Error writeMemProfV2(ProfOStream &OS,
186                             memprof::IndexedMemProfData &MemProfData,
187                             bool MemProfFullSchema) {
188   OS.write(memprof::Version2);
189   uint64_t HeaderUpdatePos = OS.tell();
190   OS.write(0ULL); // Reserve space for the memprof record table offset.
191   OS.write(0ULL); // Reserve space for the memprof frame payload offset.
192   OS.write(0ULL); // Reserve space for the memprof frame table offset.
193   OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
194   OS.write(0ULL); // Reserve space for the memprof call stack table offset.
195 
196   auto Schema = memprof::getHotColdSchema();
197   if (MemProfFullSchema)
198     Schema = memprof::getFullSchema();
199   writeMemProfSchema(OS, Schema);
200 
201   uint64_t RecordTableOffset =
202       writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version2);
203 
204   uint64_t FramePayloadOffset = OS.tell();
205   uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfData.Frames);
206 
207   uint64_t CallStackPayloadOffset = OS.tell();
208   uint64_t CallStackTableOffset =
209       writeMemProfCallStacks(OS, MemProfData.CallStacks);
210 
211   uint64_t Header[] = {
212       RecordTableOffset,      FramePayloadOffset,   FrameTableOffset,
213       CallStackPayloadOffset, CallStackTableOffset,
214   };
215   OS.patch({{HeaderUpdatePos, Header}});
216 
217   return Error::success();
218 }
219 
220 static Error writeMemProfRadixTreeBased(
221     ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
222     memprof::IndexedVersion Version, bool MemProfFullSchema,
223     std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData =
224         nullptr,
225     std::unique_ptr<memprof::MemProfSummary> MemProfSum = nullptr) {
226   assert((Version == memprof::Version3 || Version == memprof::Version4) &&
227          "Unsupported version for radix tree format");
228 
229   OS.write(Version); // Write the specific version (V3 or V4)
230   uint64_t HeaderUpdatePos = OS.tell();
231   OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
232   OS.write(0ULL); // Reserve space for the memprof record payload offset.
233   OS.write(0ULL); // Reserve space for the memprof record table offset.
234   if (Version >= memprof::Version4) {
235     OS.write(0ULL); // Reserve space for the data access profile offset.
236 
237     MemProfSum->write(OS);
238   }
239 
240   auto Schema = memprof::getHotColdSchema();
241   if (MemProfFullSchema)
242     Schema = memprof::getFullSchema();
243   writeMemProfSchema(OS, Schema);
244 
245   llvm::DenseMap<memprof::FrameId, memprof::FrameStat> FrameHistogram =
246       memprof::computeFrameHistogram(MemProfData.CallStacks);
247   assert(MemProfData.Frames.size() == FrameHistogram.size());
248 
249   llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes =
250       writeMemProfFrameArray(OS, MemProfData.Frames, FrameHistogram);
251 
252   uint64_t CallStackPayloadOffset = OS.tell();
253   // The number of elements in the call stack array.
254   unsigned NumElements = 0;
255   llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
256       MemProfCallStackIndexes =
257           writeMemProfCallStackArray(OS, MemProfData.CallStacks,
258                                      MemProfFrameIndexes, FrameHistogram,
259                                      NumElements);
260 
261   uint64_t RecordPayloadOffset = OS.tell();
262   uint64_t RecordTableOffset = writeMemProfRecords(
263       OS, MemProfData.Records, &Schema, Version, &MemProfCallStackIndexes);
264 
265   uint64_t DataAccessProfOffset = 0;
266   if (DataAccessProfileData != nullptr) {
267     assert(Version >= memprof::Version4 &&
268            "Data access profiles are added starting from v4");
269     DataAccessProfOffset = OS.tell();
270     if (Error E = DataAccessProfileData->serialize(OS))
271       return E;
272   }
273 
274   // Verify that the computation for the number of elements in the call stack
275   // array works.
276   assert(CallStackPayloadOffset +
277              NumElements * sizeof(memprof::LinearFrameId) ==
278          RecordPayloadOffset);
279 
280   SmallVector<uint64_t, 4> Header = {
281       CallStackPayloadOffset,
282       RecordPayloadOffset,
283       RecordTableOffset,
284   };
285   if (Version >= memprof::Version4)
286     Header.push_back(DataAccessProfOffset);
287 
288   OS.patch({{HeaderUpdatePos, Header}});
289 
290   return Error::success();
291 }
292 
293 // Write out MemProf Version3
294 static Error writeMemProfV3(ProfOStream &OS,
295                             memprof::IndexedMemProfData &MemProfData,
296                             bool MemProfFullSchema) {
297   return writeMemProfRadixTreeBased(OS, MemProfData, memprof::Version3,
298                                     MemProfFullSchema);
299 }
300 
301 // Write out MemProf Version4
302 static Error writeMemProfV4(
303     ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
304     bool MemProfFullSchema,
305     std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData,
306     std::unique_ptr<memprof::MemProfSummary> MemProfSum) {
307   return writeMemProfRadixTreeBased(
308       OS, MemProfData, memprof::Version4, MemProfFullSchema,
309       std::move(DataAccessProfileData), std::move(MemProfSum));
310 }
311 
312 // Write out the MemProf data in a requested version.
313 Error writeMemProf(
314     ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
315     memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema,
316     std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData,
317     std::unique_ptr<memprof::MemProfSummary> MemProfSum) {
318   switch (MemProfVersionRequested) {
319   case memprof::Version2:
320     return writeMemProfV2(OS, MemProfData, MemProfFullSchema);
321   case memprof::Version3:
322     return writeMemProfV3(OS, MemProfData, MemProfFullSchema);
323   case memprof::Version4:
324     return writeMemProfV4(OS, MemProfData, MemProfFullSchema,
325                           std::move(DataAccessProfileData),
326                           std::move(MemProfSum));
327   }
328 
329   return make_error<InstrProfError>(
330       instrprof_error::unsupported_version,
331       formatv("MemProf version {} not supported; "
332               "requires version between {} and {}, inclusive",
333               MemProfVersionRequested, memprof::MinimumSupportedVersion,
334               memprof::MaximumSupportedVersion));
335 }
336 
337 Error IndexedMemProfReader::deserializeV2(const unsigned char *Start,
338                                           const unsigned char *Ptr) {
339   // The value returned from RecordTableGenerator.Emit.
340   const uint64_t RecordTableOffset =
341       support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
342   // The offset in the stream right before invoking
343   // FrameTableGenerator.Emit.
344   const uint64_t FramePayloadOffset =
345       support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
346   // The value returned from FrameTableGenerator.Emit.
347   const uint64_t FrameTableOffset =
348       support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
349 
350   // The offset in the stream right before invoking
351   // CallStackTableGenerator.Emit.
352   uint64_t CallStackPayloadOffset = 0;
353   // The value returned from CallStackTableGenerator.Emit.
354   uint64_t CallStackTableOffset = 0;
355   if (Version >= memprof::Version2) {
356     CallStackPayloadOffset =
357         support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
358     CallStackTableOffset =
359         support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
360   }
361 
362   // Read the schema.
363   auto SchemaOr = memprof::readMemProfSchema(Ptr);
364   if (!SchemaOr)
365     return SchemaOr.takeError();
366   Schema = SchemaOr.get();
367 
368   // Now initialize the table reader with a pointer into data buffer.
369   MemProfRecordTable.reset(MemProfRecordHashTable::Create(
370       /*Buckets=*/Start + RecordTableOffset,
371       /*Payload=*/Ptr,
372       /*Base=*/Start, memprof::RecordLookupTrait(Version, Schema)));
373 
374   // Initialize the frame table reader with the payload and bucket offsets.
375   MemProfFrameTable.reset(MemProfFrameHashTable::Create(
376       /*Buckets=*/Start + FrameTableOffset,
377       /*Payload=*/Start + FramePayloadOffset,
378       /*Base=*/Start));
379 
380   if (Version >= memprof::Version2)
381     MemProfCallStackTable.reset(MemProfCallStackHashTable::Create(
382         /*Buckets=*/Start + CallStackTableOffset,
383         /*Payload=*/Start + CallStackPayloadOffset,
384         /*Base=*/Start));
385 
386   return Error::success();
387 }
388 
389 Error IndexedMemProfReader::deserializeRadixTreeBased(
390     const unsigned char *Start, const unsigned char *Ptr,
391     memprof::IndexedVersion Version) {
392   assert((Version == memprof::Version3 || Version == memprof::Version4) &&
393          "Unsupported version for radix tree format");
394   // The offset in the stream right before invoking
395   // CallStackTableGenerator.Emit.
396   const uint64_t CallStackPayloadOffset =
397       support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
398   // The offset in the stream right before invoking RecordTableGenerator.Emit.
399   const uint64_t RecordPayloadOffset =
400       support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
401   // The value returned from RecordTableGenerator.Emit.
402   const uint64_t RecordTableOffset =
403       support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
404 
405   uint64_t DataAccessProfOffset = 0;
406   if (Version >= memprof::Version4) {
407     DataAccessProfOffset =
408         support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
409     MemProfSum = memprof::MemProfSummary::deserialize(Ptr);
410   }
411 
412   // Read the schema.
413   auto SchemaOr = memprof::readMemProfSchema(Ptr);
414   if (!SchemaOr)
415     return SchemaOr.takeError();
416   Schema = SchemaOr.get();
417 
418   FrameBase = Ptr;
419   CallStackBase = Start + CallStackPayloadOffset;
420 
421   // Compute the number of elements in the radix tree array.  Since we use this
422   // to reserve enough bits in a BitVector, it's totally OK if we overestimate
423   // this number a little bit because of padding just before the next section.
424   RadixTreeSize = (RecordPayloadOffset - CallStackPayloadOffset) /
425                   sizeof(memprof::LinearFrameId);
426 
427   // Now initialize the table reader with a pointer into data buffer.
428   MemProfRecordTable.reset(MemProfRecordHashTable::Create(
429       /*Buckets=*/Start + RecordTableOffset,
430       /*Payload=*/Start + RecordPayloadOffset,
431       /*Base=*/Start, memprof::RecordLookupTrait(Version, Schema)));
432 
433   assert((!DataAccessProfOffset || DataAccessProfOffset > RecordTableOffset) &&
434          "Data access profile is either empty or after the record table");
435   if (DataAccessProfOffset > RecordTableOffset) {
436     DataAccessProfileData = std::make_unique<memprof::DataAccessProfData>();
437     const unsigned char *DAPPtr = Start + DataAccessProfOffset;
438     if (Error E = DataAccessProfileData->deserialize(DAPPtr))
439       return E;
440   }
441 
442   return Error::success();
443 }
444 
445 Error IndexedMemProfReader::deserialize(const unsigned char *Start,
446                                         uint64_t MemProfOffset) {
447   const unsigned char *Ptr = Start + MemProfOffset;
448 
449   // Read the MemProf version number.
450   const uint64_t FirstWord =
451       support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
452 
453   // Check if the version is supported
454   if (FirstWord >= memprof::MinimumSupportedVersion &&
455       FirstWord <= memprof::MaximumSupportedVersion) {
456     // Everything is good. We can proceed to deserialize the rest.
457     Version = static_cast<memprof::IndexedVersion>(FirstWord);
458   } else {
459     return make_error<InstrProfError>(
460         instrprof_error::unsupported_version,
461         formatv("MemProf version {} not supported; "
462                 "requires version between {} and {}, inclusive",
463                 FirstWord, memprof::MinimumSupportedVersion,
464                 memprof::MaximumSupportedVersion));
465   }
466 
467   switch (Version) {
468   case memprof::Version2:
469     if (Error E = deserializeV2(Start, Ptr))
470       return E;
471     break;
472   case memprof::Version3:
473   case memprof::Version4:
474     // V3 and V4 share the same high-level structure (radix tree, linear IDs).
475     if (Error E = deserializeRadixTreeBased(Start, Ptr, Version))
476       return E;
477     break;
478   }
479 
480   return Error::success();
481 }
482 } // namespace llvm
483