xref: /freebsd/contrib/llvm-project/llvm/lib/ProfileData/InstrProfWriter.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===- InstrProfWriter.cpp - Instrumented profiling writer ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for writing profiling data for clang's
10 // instrumentation based PGO and coverage.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ProfileData/InstrProfWriter.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SetVector.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/IR/ProfileSummary.h"
19 #include "llvm/ProfileData/InstrProf.h"
20 #include "llvm/ProfileData/MemProf.h"
21 #include "llvm/ProfileData/ProfileCommon.h"
22 #include "llvm/Support/Compression.h"
23 #include "llvm/Support/Endian.h"
24 #include "llvm/Support/EndianStream.h"
25 #include "llvm/Support/Error.h"
26 #include "llvm/Support/FormatVariadic.h"
27 #include "llvm/Support/MemoryBuffer.h"
28 #include "llvm/Support/OnDiskHashTable.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <cstdint>
31 #include <memory>
32 #include <string>
33 #include <tuple>
34 #include <utility>
35 #include <vector>
36 
37 using namespace llvm;
38 
39 // A struct to define how the data stream should be patched. For Indexed
40 // profiling, only uint64_t data type is needed.
41 struct PatchItem {
42   uint64_t Pos;         // Where to patch.
43   ArrayRef<uint64_t> D; // An array of source data.
44 };
45 
46 namespace llvm {
47 
48 // A wrapper class to abstract writer stream with support of bytes
49 // back patching.
50 class ProfOStream {
51 public:
ProfOStream(raw_fd_ostream & FD)52   ProfOStream(raw_fd_ostream &FD)
53       : IsFDOStream(true), OS(FD), LE(FD, llvm::endianness::little) {}
ProfOStream(raw_string_ostream & STR)54   ProfOStream(raw_string_ostream &STR)
55       : IsFDOStream(false), OS(STR), LE(STR, llvm::endianness::little) {}
56 
tell() const57   [[nodiscard]] uint64_t tell() const { return OS.tell(); }
write(uint64_t V)58   void write(uint64_t V) { LE.write<uint64_t>(V); }
write32(uint32_t V)59   void write32(uint32_t V) { LE.write<uint32_t>(V); }
writeByte(uint8_t V)60   void writeByte(uint8_t V) { LE.write<uint8_t>(V); }
61 
62   // \c patch can only be called when all data is written and flushed.
63   // For raw_string_ostream, the patch is done on the target string
64   // directly and it won't be reflected in the stream's internal buffer.
patch(ArrayRef<PatchItem> P)65   void patch(ArrayRef<PatchItem> P) {
66     using namespace support;
67 
68     if (IsFDOStream) {
69       raw_fd_ostream &FDOStream = static_cast<raw_fd_ostream &>(OS);
70       const uint64_t LastPos = FDOStream.tell();
71       for (const auto &K : P) {
72         FDOStream.seek(K.Pos);
73         for (uint64_t Elem : K.D)
74           write(Elem);
75       }
76       // Reset the stream to the last position after patching so that users
77       // don't accidentally overwrite data. This makes it consistent with
78       // the string stream below which replaces the data directly.
79       FDOStream.seek(LastPos);
80     } else {
81       raw_string_ostream &SOStream = static_cast<raw_string_ostream &>(OS);
82       std::string &Data = SOStream.str(); // with flush
83       for (const auto &K : P) {
84         for (int I = 0, E = K.D.size(); I != E; I++) {
85           uint64_t Bytes =
86               endian::byte_swap<uint64_t, llvm::endianness::little>(K.D[I]);
87           Data.replace(K.Pos + I * sizeof(uint64_t), sizeof(uint64_t),
88                        (const char *)&Bytes, sizeof(uint64_t));
89         }
90       }
91     }
92   }
93 
94   // If \c OS is an instance of \c raw_fd_ostream, this field will be
95   // true. Otherwise, \c OS will be an raw_string_ostream.
96   bool IsFDOStream;
97   raw_ostream &OS;
98   support::endian::Writer LE;
99 };
100 
101 class InstrProfRecordWriterTrait {
102 public:
103   using key_type = StringRef;
104   using key_type_ref = StringRef;
105 
106   using data_type = const InstrProfWriter::ProfilingData *const;
107   using data_type_ref = const InstrProfWriter::ProfilingData *const;
108 
109   using hash_value_type = uint64_t;
110   using offset_type = uint64_t;
111 
112   llvm::endianness ValueProfDataEndianness = llvm::endianness::little;
113   InstrProfSummaryBuilder *SummaryBuilder;
114   InstrProfSummaryBuilder *CSSummaryBuilder;
115 
116   InstrProfRecordWriterTrait() = default;
117 
ComputeHash(key_type_ref K)118   static hash_value_type ComputeHash(key_type_ref K) {
119     return IndexedInstrProf::ComputeHash(K);
120   }
121 
122   static std::pair<offset_type, offset_type>
EmitKeyDataLength(raw_ostream & Out,key_type_ref K,data_type_ref V)123   EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) {
124     using namespace support;
125 
126     endian::Writer LE(Out, llvm::endianness::little);
127 
128     offset_type N = K.size();
129     LE.write<offset_type>(N);
130 
131     offset_type M = 0;
132     for (const auto &ProfileData : *V) {
133       const InstrProfRecord &ProfRecord = ProfileData.second;
134       M += sizeof(uint64_t); // The function hash
135       M += sizeof(uint64_t); // The size of the Counts vector
136       M += ProfRecord.Counts.size() * sizeof(uint64_t);
137       M += sizeof(uint64_t); // The size of the Bitmap vector
138       M += ProfRecord.BitmapBytes.size() * sizeof(uint64_t);
139 
140       // Value data
141       M += ValueProfData::getSize(ProfileData.second);
142     }
143     LE.write<offset_type>(M);
144 
145     return std::make_pair(N, M);
146   }
147 
EmitKey(raw_ostream & Out,key_type_ref K,offset_type N)148   void EmitKey(raw_ostream &Out, key_type_ref K, offset_type N) {
149     Out.write(K.data(), N);
150   }
151 
EmitData(raw_ostream & Out,key_type_ref,data_type_ref V,offset_type)152   void EmitData(raw_ostream &Out, key_type_ref, data_type_ref V, offset_type) {
153     using namespace support;
154 
155     endian::Writer LE(Out, llvm::endianness::little);
156     for (const auto &ProfileData : *V) {
157       const InstrProfRecord &ProfRecord = ProfileData.second;
158       if (NamedInstrProfRecord::hasCSFlagInHash(ProfileData.first))
159         CSSummaryBuilder->addRecord(ProfRecord);
160       else
161         SummaryBuilder->addRecord(ProfRecord);
162 
163       LE.write<uint64_t>(ProfileData.first); // Function hash
164       LE.write<uint64_t>(ProfRecord.Counts.size());
165       for (uint64_t I : ProfRecord.Counts)
166         LE.write<uint64_t>(I);
167 
168       LE.write<uint64_t>(ProfRecord.BitmapBytes.size());
169       for (uint64_t I : ProfRecord.BitmapBytes)
170         LE.write<uint64_t>(I);
171 
172       // Write value data
173       std::unique_ptr<ValueProfData> VDataPtr =
174           ValueProfData::serializeFrom(ProfileData.second);
175       uint32_t S = VDataPtr->getSize();
176       VDataPtr->swapBytesFromHost(ValueProfDataEndianness);
177       Out.write((const char *)VDataPtr.get(), S);
178     }
179   }
180 };
181 
182 } // end namespace llvm
183 
InstrProfWriter(bool Sparse,uint64_t TemporalProfTraceReservoirSize,uint64_t MaxTemporalProfTraceLength,bool WritePrevVersion,memprof::IndexedVersion MemProfVersionRequested,bool MemProfFullSchema)184 InstrProfWriter::InstrProfWriter(
185     bool Sparse, uint64_t TemporalProfTraceReservoirSize,
186     uint64_t MaxTemporalProfTraceLength, bool WritePrevVersion,
187     memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema)
188     : Sparse(Sparse), MaxTemporalProfTraceLength(MaxTemporalProfTraceLength),
189       TemporalProfTraceReservoirSize(TemporalProfTraceReservoirSize),
190       InfoObj(new InstrProfRecordWriterTrait()),
191       WritePrevVersion(WritePrevVersion),
192       MemProfVersionRequested(MemProfVersionRequested),
193       MemProfFullSchema(MemProfFullSchema) {}
194 
~InstrProfWriter()195 InstrProfWriter::~InstrProfWriter() { delete InfoObj; }
196 
197 // Internal interface for testing purpose only.
setValueProfDataEndianness(llvm::endianness Endianness)198 void InstrProfWriter::setValueProfDataEndianness(llvm::endianness Endianness) {
199   InfoObj->ValueProfDataEndianness = Endianness;
200 }
201 
setOutputSparse(bool Sparse)202 void InstrProfWriter::setOutputSparse(bool Sparse) {
203   this->Sparse = Sparse;
204 }
205 
addRecord(NamedInstrProfRecord && I,uint64_t Weight,function_ref<void (Error)> Warn)206 void InstrProfWriter::addRecord(NamedInstrProfRecord &&I, uint64_t Weight,
207                                 function_ref<void(Error)> Warn) {
208   auto Name = I.Name;
209   auto Hash = I.Hash;
210   addRecord(Name, Hash, std::move(I), Weight, Warn);
211 }
212 
overlapRecord(NamedInstrProfRecord && Other,OverlapStats & Overlap,OverlapStats & FuncLevelOverlap,const OverlapFuncFilters & FuncFilter)213 void InstrProfWriter::overlapRecord(NamedInstrProfRecord &&Other,
214                                     OverlapStats &Overlap,
215                                     OverlapStats &FuncLevelOverlap,
216                                     const OverlapFuncFilters &FuncFilter) {
217   auto Name = Other.Name;
218   auto Hash = Other.Hash;
219   Other.accumulateCounts(FuncLevelOverlap.Test);
220   if (!FunctionData.contains(Name)) {
221     Overlap.addOneUnique(FuncLevelOverlap.Test);
222     return;
223   }
224   if (FuncLevelOverlap.Test.CountSum < 1.0f) {
225     Overlap.Overlap.NumEntries += 1;
226     return;
227   }
228   auto &ProfileDataMap = FunctionData[Name];
229   bool NewFunc;
230   ProfilingData::iterator Where;
231   std::tie(Where, NewFunc) =
232       ProfileDataMap.insert(std::make_pair(Hash, InstrProfRecord()));
233   if (NewFunc) {
234     Overlap.addOneMismatch(FuncLevelOverlap.Test);
235     return;
236   }
237   InstrProfRecord &Dest = Where->second;
238 
239   uint64_t ValueCutoff = FuncFilter.ValueCutoff;
240   if (!FuncFilter.NameFilter.empty() && Name.contains(FuncFilter.NameFilter))
241     ValueCutoff = 0;
242 
243   Dest.overlap(Other, Overlap, FuncLevelOverlap, ValueCutoff);
244 }
245 
addRecord(StringRef Name,uint64_t Hash,InstrProfRecord && I,uint64_t Weight,function_ref<void (Error)> Warn)246 void InstrProfWriter::addRecord(StringRef Name, uint64_t Hash,
247                                 InstrProfRecord &&I, uint64_t Weight,
248                                 function_ref<void(Error)> Warn) {
249   auto &ProfileDataMap = FunctionData[Name];
250 
251   bool NewFunc;
252   ProfilingData::iterator Where;
253   std::tie(Where, NewFunc) =
254       ProfileDataMap.insert(std::make_pair(Hash, InstrProfRecord()));
255   InstrProfRecord &Dest = Where->second;
256 
257   auto MapWarn = [&](instrprof_error E) {
258     Warn(make_error<InstrProfError>(E));
259   };
260 
261   if (NewFunc) {
262     // We've never seen a function with this name and hash, add it.
263     Dest = std::move(I);
264     if (Weight > 1)
265       Dest.scale(Weight, 1, MapWarn);
266   } else {
267     // We're updating a function we've seen before.
268     Dest.merge(I, Weight, MapWarn);
269   }
270 
271   Dest.sortValueData();
272 }
273 
addMemProfRecord(const Function::GUID Id,const memprof::IndexedMemProfRecord & Record)274 void InstrProfWriter::addMemProfRecord(
275     const Function::GUID Id, const memprof::IndexedMemProfRecord &Record) {
276   auto [Iter, Inserted] = MemProfData.Records.insert({Id, Record});
277   // If we inserted a new record then we are done.
278   if (Inserted) {
279     return;
280   }
281   memprof::IndexedMemProfRecord &Existing = Iter->second;
282   Existing.merge(Record);
283 }
284 
addMemProfFrame(const memprof::FrameId Id,const memprof::Frame & Frame,function_ref<void (Error)> Warn)285 bool InstrProfWriter::addMemProfFrame(const memprof::FrameId Id,
286                                       const memprof::Frame &Frame,
287                                       function_ref<void(Error)> Warn) {
288   auto [Iter, Inserted] = MemProfData.Frames.insert({Id, Frame});
289   // If a mapping already exists for the current frame id and it does not
290   // match the new mapping provided then reset the existing contents and bail
291   // out. We don't support the merging of memprof data whose Frame -> Id
292   // mapping across profiles is inconsistent.
293   if (!Inserted && Iter->second != Frame) {
294     Warn(make_error<InstrProfError>(instrprof_error::malformed,
295                                     "frame to id mapping mismatch"));
296     return false;
297   }
298   return true;
299 }
300 
addMemProfCallStack(const memprof::CallStackId CSId,const llvm::SmallVector<memprof::FrameId> & CallStack,function_ref<void (Error)> Warn)301 bool InstrProfWriter::addMemProfCallStack(
302     const memprof::CallStackId CSId,
303     const llvm::SmallVector<memprof::FrameId> &CallStack,
304     function_ref<void(Error)> Warn) {
305   auto [Iter, Inserted] = MemProfData.CallStacks.insert({CSId, CallStack});
306   // If a mapping already exists for the current call stack id and it does not
307   // match the new mapping provided then reset the existing contents and bail
308   // out. We don't support the merging of memprof data whose CallStack -> Id
309   // mapping across profiles is inconsistent.
310   if (!Inserted && Iter->second != CallStack) {
311     Warn(make_error<InstrProfError>(instrprof_error::malformed,
312                                     "call stack to id mapping mismatch"));
313     return false;
314   }
315   return true;
316 }
317 
addBinaryIds(ArrayRef<llvm::object::BuildID> BIs)318 void InstrProfWriter::addBinaryIds(ArrayRef<llvm::object::BuildID> BIs) {
319   llvm::append_range(BinaryIds, BIs);
320 }
321 
addTemporalProfileTrace(TemporalProfTraceTy Trace)322 void InstrProfWriter::addTemporalProfileTrace(TemporalProfTraceTy Trace) {
323   assert(Trace.FunctionNameRefs.size() <= MaxTemporalProfTraceLength);
324   assert(!Trace.FunctionNameRefs.empty());
325   if (TemporalProfTraceStreamSize < TemporalProfTraceReservoirSize) {
326     // Simply append the trace if we have not yet hit our reservoir size limit.
327     TemporalProfTraces.push_back(std::move(Trace));
328   } else {
329     // Otherwise, replace a random trace in the stream.
330     std::uniform_int_distribution<uint64_t> Distribution(
331         0, TemporalProfTraceStreamSize);
332     uint64_t RandomIndex = Distribution(RNG);
333     if (RandomIndex < TemporalProfTraces.size())
334       TemporalProfTraces[RandomIndex] = std::move(Trace);
335   }
336   ++TemporalProfTraceStreamSize;
337 }
338 
addTemporalProfileTraces(SmallVectorImpl<TemporalProfTraceTy> & SrcTraces,uint64_t SrcStreamSize)339 void InstrProfWriter::addTemporalProfileTraces(
340     SmallVectorImpl<TemporalProfTraceTy> &SrcTraces, uint64_t SrcStreamSize) {
341   for (auto &Trace : SrcTraces)
342     if (Trace.FunctionNameRefs.size() > MaxTemporalProfTraceLength)
343       Trace.FunctionNameRefs.resize(MaxTemporalProfTraceLength);
344   llvm::erase_if(SrcTraces, [](auto &T) { return T.FunctionNameRefs.empty(); });
345   // Assume that the source has the same reservoir size as the destination to
346   // avoid needing to record it in the indexed profile format.
347   bool IsDestSampled =
348       (TemporalProfTraceStreamSize > TemporalProfTraceReservoirSize);
349   bool IsSrcSampled = (SrcStreamSize > TemporalProfTraceReservoirSize);
350   if (!IsDestSampled && IsSrcSampled) {
351     // If one of the traces are sampled, ensure that it belongs to Dest.
352     std::swap(TemporalProfTraces, SrcTraces);
353     std::swap(TemporalProfTraceStreamSize, SrcStreamSize);
354     std::swap(IsDestSampled, IsSrcSampled);
355   }
356   if (!IsSrcSampled) {
357     // If the source stream is not sampled, we add each source trace normally.
358     for (auto &Trace : SrcTraces)
359       addTemporalProfileTrace(std::move(Trace));
360     return;
361   }
362   // Otherwise, we find the traces that would have been removed if we added
363   // the whole source stream.
364   SmallSetVector<uint64_t, 8> IndicesToReplace;
365   for (uint64_t I = 0; I < SrcStreamSize; I++) {
366     std::uniform_int_distribution<uint64_t> Distribution(
367         0, TemporalProfTraceStreamSize);
368     uint64_t RandomIndex = Distribution(RNG);
369     if (RandomIndex < TemporalProfTraces.size())
370       IndicesToReplace.insert(RandomIndex);
371     ++TemporalProfTraceStreamSize;
372   }
373   // Then we insert a random sample of the source traces.
374   llvm::shuffle(SrcTraces.begin(), SrcTraces.end(), RNG);
375   for (const auto &[Index, Trace] : llvm::zip(IndicesToReplace, SrcTraces))
376     TemporalProfTraces[Index] = std::move(Trace);
377 }
378 
mergeRecordsFromWriter(InstrProfWriter && IPW,function_ref<void (Error)> Warn)379 void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW,
380                                              function_ref<void(Error)> Warn) {
381   for (auto &I : IPW.FunctionData)
382     for (auto &Func : I.getValue())
383       addRecord(I.getKey(), Func.first, std::move(Func.second), 1, Warn);
384 
385   BinaryIds.reserve(BinaryIds.size() + IPW.BinaryIds.size());
386   for (auto &I : IPW.BinaryIds)
387     addBinaryIds(I);
388 
389   addTemporalProfileTraces(IPW.TemporalProfTraces,
390                            IPW.TemporalProfTraceStreamSize);
391 
392   MemProfData.Frames.reserve(IPW.MemProfData.Frames.size());
393   for (auto &[FrameId, Frame] : IPW.MemProfData.Frames) {
394     // If we weren't able to add the frame mappings then it doesn't make sense
395     // to try to merge the records from this profile.
396     if (!addMemProfFrame(FrameId, Frame, Warn))
397       return;
398   }
399 
400   MemProfData.CallStacks.reserve(IPW.MemProfData.CallStacks.size());
401   for (auto &[CSId, CallStack] : IPW.MemProfData.CallStacks) {
402     if (!addMemProfCallStack(CSId, CallStack, Warn))
403       return;
404   }
405 
406   MemProfData.Records.reserve(IPW.MemProfData.Records.size());
407   for (auto &[GUID, Record] : IPW.MemProfData.Records) {
408     addMemProfRecord(GUID, Record);
409   }
410 }
411 
shouldEncodeData(const ProfilingData & PD)412 bool InstrProfWriter::shouldEncodeData(const ProfilingData &PD) {
413   if (!Sparse)
414     return true;
415   for (const auto &Func : PD) {
416     const InstrProfRecord &IPR = Func.second;
417     if (llvm::any_of(IPR.Counts, [](uint64_t Count) { return Count > 0; }))
418       return true;
419     if (llvm::any_of(IPR.BitmapBytes, [](uint8_t Byte) { return Byte > 0; }))
420       return true;
421   }
422   return false;
423 }
424 
setSummary(IndexedInstrProf::Summary * TheSummary,ProfileSummary & PS)425 static void setSummary(IndexedInstrProf::Summary *TheSummary,
426                        ProfileSummary &PS) {
427   using namespace IndexedInstrProf;
428 
429   const std::vector<ProfileSummaryEntry> &Res = PS.getDetailedSummary();
430   TheSummary->NumSummaryFields = Summary::NumKinds;
431   TheSummary->NumCutoffEntries = Res.size();
432   TheSummary->set(Summary::MaxFunctionCount, PS.getMaxFunctionCount());
433   TheSummary->set(Summary::MaxBlockCount, PS.getMaxCount());
434   TheSummary->set(Summary::MaxInternalBlockCount, PS.getMaxInternalCount());
435   TheSummary->set(Summary::TotalBlockCount, PS.getTotalCount());
436   TheSummary->set(Summary::TotalNumBlocks, PS.getNumCounts());
437   TheSummary->set(Summary::TotalNumFunctions, PS.getNumFunctions());
438   for (unsigned I = 0; I < Res.size(); I++)
439     TheSummary->setEntry(I, Res[I]);
440 }
441 
442 // Serialize Schema.
writeMemProfSchema(ProfOStream & OS,const memprof::MemProfSchema & Schema)443 static void writeMemProfSchema(ProfOStream &OS,
444                                const memprof::MemProfSchema &Schema) {
445   OS.write(static_cast<uint64_t>(Schema.size()));
446   for (const auto Id : Schema)
447     OS.write(static_cast<uint64_t>(Id));
448 }
449 
450 // Serialize MemProfRecordData.  Return RecordTableOffset.
writeMemProfRecords(ProfOStream & OS,llvm::MapVector<GlobalValue::GUID,memprof::IndexedMemProfRecord> & MemProfRecordData,memprof::MemProfSchema * Schema,memprof::IndexedVersion Version,llvm::DenseMap<memprof::CallStackId,memprof::LinearCallStackId> * MemProfCallStackIndexes=nullptr)451 static uint64_t writeMemProfRecords(
452     ProfOStream &OS,
453     llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord>
454         &MemProfRecordData,
455     memprof::MemProfSchema *Schema, memprof::IndexedVersion Version,
456     llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
457         *MemProfCallStackIndexes = nullptr) {
458   memprof::RecordWriterTrait RecordWriter(Schema, Version,
459                                           MemProfCallStackIndexes);
460   OnDiskChainedHashTableGenerator<memprof::RecordWriterTrait>
461       RecordTableGenerator;
462   for (auto &[GUID, Record] : MemProfRecordData) {
463     // Insert the key (func hash) and value (memprof record).
464     RecordTableGenerator.insert(GUID, Record, RecordWriter);
465   }
466   // Release the memory of this MapVector as it is no longer needed.
467   MemProfRecordData.clear();
468 
469   // The call to Emit invokes RecordWriterTrait::EmitData which destructs
470   // the memprof record copies owned by the RecordTableGenerator. This works
471   // because the RecordTableGenerator is not used after this point.
472   return RecordTableGenerator.Emit(OS.OS, RecordWriter);
473 }
474 
475 // Serialize MemProfFrameData.  Return FrameTableOffset.
writeMemProfFrames(ProfOStream & OS,llvm::MapVector<memprof::FrameId,memprof::Frame> & MemProfFrameData)476 static uint64_t writeMemProfFrames(
477     ProfOStream &OS,
478     llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData) {
479   OnDiskChainedHashTableGenerator<memprof::FrameWriterTrait>
480       FrameTableGenerator;
481   for (auto &[FrameId, Frame] : MemProfFrameData) {
482     // Insert the key (frame id) and value (frame contents).
483     FrameTableGenerator.insert(FrameId, Frame);
484   }
485   // Release the memory of this MapVector as it is no longer needed.
486   MemProfFrameData.clear();
487 
488   return FrameTableGenerator.Emit(OS.OS);
489 }
490 
491 // Serialize MemProfFrameData.  Return the mapping from FrameIds to their
492 // indexes within the frame array.
493 static llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId>
writeMemProfFrameArray(ProfOStream & OS,llvm::MapVector<memprof::FrameId,memprof::Frame> & MemProfFrameData,llvm::DenseMap<memprof::FrameId,memprof::FrameStat> & FrameHistogram)494 writeMemProfFrameArray(
495     ProfOStream &OS,
496     llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData,
497     llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram) {
498   // Mappings from FrameIds to array indexes.
499   llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes;
500 
501   // Compute the order in which we serialize Frames.  The order does not matter
502   // in terms of correctness, but we still compute it for deserialization
503   // performance.  Specifically, if we serialize frequently used Frames one
504   // after another, we have better cache utilization.  For two Frames that
505   // appear equally frequently, we break a tie by serializing the one that tends
506   // to appear earlier in call stacks.  We implement the tie-breaking mechanism
507   // by computing the sum of indexes within call stacks for each Frame.  If we
508   // still have a tie, then we just resort to compare two FrameIds, which is
509   // just for stability of output.
510   std::vector<std::pair<memprof::FrameId, const memprof::Frame *>> FrameIdOrder;
511   FrameIdOrder.reserve(MemProfFrameData.size());
512   for (const auto &[Id, Frame] : MemProfFrameData)
513     FrameIdOrder.emplace_back(Id, &Frame);
514   assert(MemProfFrameData.size() == FrameIdOrder.size());
515   llvm::sort(FrameIdOrder,
516              [&](const std::pair<memprof::FrameId, const memprof::Frame *> &L,
517                  const std::pair<memprof::FrameId, const memprof::Frame *> &R) {
518                const auto &SL = FrameHistogram[L.first];
519                const auto &SR = FrameHistogram[R.first];
520                // Popular FrameIds should come first.
521                if (SL.Count != SR.Count)
522                  return SL.Count > SR.Count;
523                // If they are equally popular, then the one that tends to appear
524                // earlier in call stacks should come first.
525                if (SL.PositionSum != SR.PositionSum)
526                  return SL.PositionSum < SR.PositionSum;
527                // Compare their FrameIds for sort stability.
528                return L.first < R.first;
529              });
530 
531   // Serialize all frames while creating mappings from linear IDs to FrameIds.
532   uint64_t Index = 0;
533   MemProfFrameIndexes.reserve(FrameIdOrder.size());
534   for (const auto &[Id, F] : FrameIdOrder) {
535     F->serialize(OS.OS);
536     MemProfFrameIndexes.insert({Id, Index});
537     ++Index;
538   }
539   assert(MemProfFrameData.size() == Index);
540   assert(MemProfFrameData.size() == MemProfFrameIndexes.size());
541 
542   // Release the memory of this MapVector as it is no longer needed.
543   MemProfFrameData.clear();
544 
545   return MemProfFrameIndexes;
546 }
547 
writeMemProfCallStacks(ProfOStream & OS,llvm::MapVector<memprof::CallStackId,llvm::SmallVector<memprof::FrameId>> & MemProfCallStackData)548 static uint64_t writeMemProfCallStacks(
549     ProfOStream &OS,
550     llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
551         &MemProfCallStackData) {
552   OnDiskChainedHashTableGenerator<memprof::CallStackWriterTrait>
553       CallStackTableGenerator;
554   for (auto &[CSId, CallStack] : MemProfCallStackData)
555     CallStackTableGenerator.insert(CSId, CallStack);
556   // Release the memory of this vector as it is no longer needed.
557   MemProfCallStackData.clear();
558 
559   return CallStackTableGenerator.Emit(OS.OS);
560 }
561 
562 static llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
writeMemProfCallStackArray(ProfOStream & OS,llvm::MapVector<memprof::CallStackId,llvm::SmallVector<memprof::FrameId>> & MemProfCallStackData,llvm::DenseMap<memprof::FrameId,memprof::LinearFrameId> & MemProfFrameIndexes,llvm::DenseMap<memprof::FrameId,memprof::FrameStat> & FrameHistogram)563 writeMemProfCallStackArray(
564     ProfOStream &OS,
565     llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
566         &MemProfCallStackData,
567     llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId>
568         &MemProfFrameIndexes,
569     llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram) {
570   llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
571       MemProfCallStackIndexes;
572 
573   memprof::CallStackRadixTreeBuilder Builder;
574   Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes,
575                 FrameHistogram);
576   for (auto I : Builder.getRadixArray())
577     OS.write32(I);
578   MemProfCallStackIndexes = Builder.takeCallStackPos();
579 
580   // Release the memory of this vector as it is no longer needed.
581   MemProfCallStackData.clear();
582 
583   return MemProfCallStackIndexes;
584 }
585 
586 // Write out MemProf Version0 as follows:
587 // uint64_t RecordTableOffset = RecordTableGenerator.Emit
588 // uint64_t FramePayloadOffset = Offset for the frame payload
589 // uint64_t FrameTableOffset = FrameTableGenerator.Emit
590 // uint64_t Num schema entries
591 // uint64_t Schema entry 0
592 // uint64_t Schema entry 1
593 // ....
594 // uint64_t Schema entry N - 1
595 // OnDiskChainedHashTable MemProfRecordData
596 // OnDiskChainedHashTable MemProfFrameData
writeMemProfV0(ProfOStream & OS,memprof::IndexedMemProfData & MemProfData)597 static Error writeMemProfV0(ProfOStream &OS,
598                             memprof::IndexedMemProfData &MemProfData) {
599   uint64_t HeaderUpdatePos = OS.tell();
600   OS.write(0ULL); // Reserve space for the memprof record table offset.
601   OS.write(0ULL); // Reserve space for the memprof frame payload offset.
602   OS.write(0ULL); // Reserve space for the memprof frame table offset.
603 
604   auto Schema = memprof::getFullSchema();
605   writeMemProfSchema(OS, Schema);
606 
607   uint64_t RecordTableOffset =
608       writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version0);
609 
610   uint64_t FramePayloadOffset = OS.tell();
611   uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfData.Frames);
612 
613   uint64_t Header[] = {RecordTableOffset, FramePayloadOffset, FrameTableOffset};
614   OS.patch({{HeaderUpdatePos, Header}});
615 
616   return Error::success();
617 }
618 
619 // Write out MemProf Version1 as follows:
620 // uint64_t Version (NEW in V1)
621 // uint64_t RecordTableOffset = RecordTableGenerator.Emit
622 // uint64_t FramePayloadOffset = Offset for the frame payload
623 // uint64_t FrameTableOffset = FrameTableGenerator.Emit
624 // uint64_t Num schema entries
625 // uint64_t Schema entry 0
626 // uint64_t Schema entry 1
627 // ....
628 // uint64_t Schema entry N - 1
629 // OnDiskChainedHashTable MemProfRecordData
630 // OnDiskChainedHashTable MemProfFrameData
writeMemProfV1(ProfOStream & OS,memprof::IndexedMemProfData & MemProfData)631 static Error writeMemProfV1(ProfOStream &OS,
632                             memprof::IndexedMemProfData &MemProfData) {
633   OS.write(memprof::Version1);
634   uint64_t HeaderUpdatePos = OS.tell();
635   OS.write(0ULL); // Reserve space for the memprof record table offset.
636   OS.write(0ULL); // Reserve space for the memprof frame payload offset.
637   OS.write(0ULL); // Reserve space for the memprof frame table offset.
638 
639   auto Schema = memprof::getFullSchema();
640   writeMemProfSchema(OS, Schema);
641 
642   uint64_t RecordTableOffset =
643       writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version1);
644 
645   uint64_t FramePayloadOffset = OS.tell();
646   uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfData.Frames);
647 
648   uint64_t Header[] = {RecordTableOffset, FramePayloadOffset, FrameTableOffset};
649   OS.patch({{HeaderUpdatePos, Header}});
650 
651   return Error::success();
652 }
653 
654 // Write out MemProf Version2 as follows:
655 // uint64_t Version
656 // uint64_t RecordTableOffset = RecordTableGenerator.Emit
657 // uint64_t FramePayloadOffset = Offset for the frame payload
658 // uint64_t FrameTableOffset = FrameTableGenerator.Emit
659 // uint64_t CallStackPayloadOffset = Offset for the call stack payload (NEW V2)
660 // uint64_t CallStackTableOffset = CallStackTableGenerator.Emit (NEW in V2)
661 // uint64_t Num schema entries
662 // uint64_t Schema entry 0
663 // uint64_t Schema entry 1
664 // ....
665 // uint64_t Schema entry N - 1
666 // OnDiskChainedHashTable MemProfRecordData
667 // OnDiskChainedHashTable MemProfFrameData
668 // OnDiskChainedHashTable MemProfCallStackData (NEW in V2)
writeMemProfV2(ProfOStream & OS,memprof::IndexedMemProfData & MemProfData,bool MemProfFullSchema)669 static Error writeMemProfV2(ProfOStream &OS,
670                             memprof::IndexedMemProfData &MemProfData,
671                             bool MemProfFullSchema) {
672   OS.write(memprof::Version2);
673   uint64_t HeaderUpdatePos = OS.tell();
674   OS.write(0ULL); // Reserve space for the memprof record table offset.
675   OS.write(0ULL); // Reserve space for the memprof frame payload offset.
676   OS.write(0ULL); // Reserve space for the memprof frame table offset.
677   OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
678   OS.write(0ULL); // Reserve space for the memprof call stack table offset.
679 
680   auto Schema = memprof::getHotColdSchema();
681   if (MemProfFullSchema)
682     Schema = memprof::getFullSchema();
683   writeMemProfSchema(OS, Schema);
684 
685   uint64_t RecordTableOffset =
686       writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version2);
687 
688   uint64_t FramePayloadOffset = OS.tell();
689   uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfData.Frames);
690 
691   uint64_t CallStackPayloadOffset = OS.tell();
692   uint64_t CallStackTableOffset =
693       writeMemProfCallStacks(OS, MemProfData.CallStacks);
694 
695   uint64_t Header[] = {
696       RecordTableOffset,      FramePayloadOffset,   FrameTableOffset,
697       CallStackPayloadOffset, CallStackTableOffset,
698   };
699   OS.patch({{HeaderUpdatePos, Header}});
700 
701   return Error::success();
702 }
703 
704 // Write out MemProf Version3 as follows:
705 // uint64_t Version
706 // uint64_t CallStackPayloadOffset = Offset for the call stack payload
707 // uint64_t RecordPayloadOffset = Offset for the record payload
708 // uint64_t RecordTableOffset = RecordTableGenerator.Emit
709 // uint64_t Num schema entries
710 // uint64_t Schema entry 0
711 // uint64_t Schema entry 1
712 // ....
713 // uint64_t Schema entry N - 1
714 // Frames serialized one after another
715 // Call stacks encoded as a radix tree
716 // OnDiskChainedHashTable MemProfRecordData
writeMemProfV3(ProfOStream & OS,memprof::IndexedMemProfData & MemProfData,bool MemProfFullSchema)717 static Error writeMemProfV3(ProfOStream &OS,
718                             memprof::IndexedMemProfData &MemProfData,
719                             bool MemProfFullSchema) {
720   OS.write(memprof::Version3);
721   uint64_t HeaderUpdatePos = OS.tell();
722   OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
723   OS.write(0ULL); // Reserve space for the memprof record payload offset.
724   OS.write(0ULL); // Reserve space for the memprof record table offset.
725 
726   auto Schema = memprof::getHotColdSchema();
727   if (MemProfFullSchema)
728     Schema = memprof::getFullSchema();
729   writeMemProfSchema(OS, Schema);
730 
731   llvm::DenseMap<memprof::FrameId, memprof::FrameStat> FrameHistogram =
732       memprof::computeFrameHistogram(MemProfData.CallStacks);
733   assert(MemProfData.Frames.size() == FrameHistogram.size());
734 
735   llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes =
736       writeMemProfFrameArray(OS, MemProfData.Frames, FrameHistogram);
737 
738   uint64_t CallStackPayloadOffset = OS.tell();
739   llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
740       MemProfCallStackIndexes = writeMemProfCallStackArray(
741           OS, MemProfData.CallStacks, MemProfFrameIndexes, FrameHistogram);
742 
743   uint64_t RecordPayloadOffset = OS.tell();
744   uint64_t RecordTableOffset =
745       writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version3,
746                           &MemProfCallStackIndexes);
747 
748   uint64_t Header[] = {
749       CallStackPayloadOffset,
750       RecordPayloadOffset,
751       RecordTableOffset,
752   };
753   OS.patch({{HeaderUpdatePos, Header}});
754 
755   return Error::success();
756 }
757 
758 // Write out the MemProf data in a requested version.
writeMemProf(ProfOStream & OS,memprof::IndexedMemProfData & MemProfData,memprof::IndexedVersion MemProfVersionRequested,bool MemProfFullSchema)759 static Error writeMemProf(ProfOStream &OS,
760                           memprof::IndexedMemProfData &MemProfData,
761                           memprof::IndexedVersion MemProfVersionRequested,
762                           bool MemProfFullSchema) {
763   switch (MemProfVersionRequested) {
764   case memprof::Version0:
765     return writeMemProfV0(OS, MemProfData);
766   case memprof::Version1:
767     return writeMemProfV1(OS, MemProfData);
768   case memprof::Version2:
769     return writeMemProfV2(OS, MemProfData, MemProfFullSchema);
770   case memprof::Version3:
771     return writeMemProfV3(OS, MemProfData, MemProfFullSchema);
772   }
773 
774   return make_error<InstrProfError>(
775       instrprof_error::unsupported_version,
776       formatv("MemProf version {} not supported; "
777               "requires version between {} and {}, inclusive",
778               MemProfVersionRequested, memprof::MinimumSupportedVersion,
779               memprof::MaximumSupportedVersion));
780 }
781 
writeHeader(const IndexedInstrProf::Header & Header,const bool WritePrevVersion,ProfOStream & OS)782 uint64_t InstrProfWriter::writeHeader(const IndexedInstrProf::Header &Header,
783                                       const bool WritePrevVersion,
784                                       ProfOStream &OS) {
785   // Only write out the first four fields.
786   for (int I = 0; I < 4; I++)
787     OS.write(reinterpret_cast<const uint64_t *>(&Header)[I]);
788 
789   // Remember the offset of the remaining fields to allow back patching later.
790   auto BackPatchStartOffset = OS.tell();
791 
792   // Reserve the space for back patching later.
793   OS.write(0); // HashOffset
794   OS.write(0); // MemProfOffset
795   OS.write(0); // BinaryIdOffset
796   OS.write(0); // TemporalProfTracesOffset
797   if (!WritePrevVersion)
798     OS.write(0); // VTableNamesOffset
799 
800   return BackPatchStartOffset;
801 }
802 
writeVTableNames(ProfOStream & OS)803 Error InstrProfWriter::writeVTableNames(ProfOStream &OS) {
804   std::vector<std::string> VTableNameStrs;
805   for (StringRef VTableName : VTableNames.keys())
806     VTableNameStrs.push_back(VTableName.str());
807 
808   std::string CompressedVTableNames;
809   if (!VTableNameStrs.empty())
810     if (Error E = collectGlobalObjectNameStrings(
811             VTableNameStrs, compression::zlib::isAvailable(),
812             CompressedVTableNames))
813       return E;
814 
815   const uint64_t CompressedStringLen = CompressedVTableNames.length();
816 
817   // Record the length of compressed string.
818   OS.write(CompressedStringLen);
819 
820   // Write the chars in compressed strings.
821   for (auto &c : CompressedVTableNames)
822     OS.writeByte(static_cast<uint8_t>(c));
823 
824   // Pad up to a multiple of 8.
825   // InstrProfReader could read bytes according to 'CompressedStringLen'.
826   const uint64_t PaddedLength = alignTo(CompressedStringLen, 8);
827 
828   for (uint64_t K = CompressedStringLen; K < PaddedLength; K++)
829     OS.writeByte(0);
830 
831   return Error::success();
832 }
833 
writeImpl(ProfOStream & OS)834 Error InstrProfWriter::writeImpl(ProfOStream &OS) {
835   using namespace IndexedInstrProf;
836   using namespace support;
837 
838   OnDiskChainedHashTableGenerator<InstrProfRecordWriterTrait> Generator;
839 
840   InstrProfSummaryBuilder ISB(ProfileSummaryBuilder::DefaultCutoffs);
841   InfoObj->SummaryBuilder = &ISB;
842   InstrProfSummaryBuilder CSISB(ProfileSummaryBuilder::DefaultCutoffs);
843   InfoObj->CSSummaryBuilder = &CSISB;
844 
845   // Populate the hash table generator.
846   SmallVector<std::pair<StringRef, const ProfilingData *>> OrderedData;
847   for (const auto &I : FunctionData)
848     if (shouldEncodeData(I.getValue()))
849       OrderedData.emplace_back((I.getKey()), &I.getValue());
850   llvm::sort(OrderedData, less_first());
851   for (const auto &I : OrderedData)
852     Generator.insert(I.first, I.second);
853 
854   // Write the header.
855   IndexedInstrProf::Header Header;
856   Header.Version = WritePrevVersion
857                        ? IndexedInstrProf::ProfVersion::Version11
858                        : IndexedInstrProf::ProfVersion::CurrentVersion;
859   // The WritePrevVersion handling will either need to be removed or updated
860   // if the version is advanced beyond 12.
861   static_assert(IndexedInstrProf::ProfVersion::CurrentVersion ==
862                 IndexedInstrProf::ProfVersion::Version12);
863   if (static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation))
864     Header.Version |= VARIANT_MASK_IR_PROF;
865   if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive))
866     Header.Version |= VARIANT_MASK_CSIR_PROF;
867   if (static_cast<bool>(ProfileKind &
868                         InstrProfKind::FunctionEntryInstrumentation))
869     Header.Version |= VARIANT_MASK_INSTR_ENTRY;
870   if (static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage))
871     Header.Version |= VARIANT_MASK_BYTE_COVERAGE;
872   if (static_cast<bool>(ProfileKind & InstrProfKind::FunctionEntryOnly))
873     Header.Version |= VARIANT_MASK_FUNCTION_ENTRY_ONLY;
874   if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf))
875     Header.Version |= VARIANT_MASK_MEMPROF;
876   if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile))
877     Header.Version |= VARIANT_MASK_TEMPORAL_PROF;
878 
879   const uint64_t BackPatchStartOffset =
880       writeHeader(Header, WritePrevVersion, OS);
881 
882   // Reserve space to write profile summary data.
883   uint32_t NumEntries = ProfileSummaryBuilder::DefaultCutoffs.size();
884   uint32_t SummarySize = Summary::getSize(Summary::NumKinds, NumEntries);
885   // Remember the summary offset.
886   uint64_t SummaryOffset = OS.tell();
887   for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++)
888     OS.write(0);
889   uint64_t CSSummaryOffset = 0;
890   uint64_t CSSummarySize = 0;
891   if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive)) {
892     CSSummaryOffset = OS.tell();
893     CSSummarySize = SummarySize / sizeof(uint64_t);
894     for (unsigned I = 0; I < CSSummarySize; I++)
895       OS.write(0);
896   }
897 
898   // Write the hash table.
899   uint64_t HashTableStart = Generator.Emit(OS.OS, *InfoObj);
900 
901   // Write the MemProf profile data if we have it.
902   uint64_t MemProfSectionStart = 0;
903   if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf)) {
904     MemProfSectionStart = OS.tell();
905     if (auto E = writeMemProf(OS, MemProfData, MemProfVersionRequested,
906                               MemProfFullSchema))
907       return E;
908   }
909 
910   // BinaryIdSection has two parts:
911   // 1. uint64_t BinaryIdsSectionSize
912   // 2. list of binary ids that consist of:
913   //    a. uint64_t BinaryIdLength
914   //    b. uint8_t  BinaryIdData
915   //    c. uint8_t  Padding (if necessary)
916   uint64_t BinaryIdSectionStart = OS.tell();
917   // Calculate size of binary section.
918   uint64_t BinaryIdsSectionSize = 0;
919 
920   // Remove duplicate binary ids.
921   llvm::sort(BinaryIds);
922   BinaryIds.erase(llvm::unique(BinaryIds), BinaryIds.end());
923 
924   for (const auto &BI : BinaryIds) {
925     // Increment by binary id length data type size.
926     BinaryIdsSectionSize += sizeof(uint64_t);
927     // Increment by binary id data length, aligned to 8 bytes.
928     BinaryIdsSectionSize += alignToPowerOf2(BI.size(), sizeof(uint64_t));
929   }
930   // Write binary ids section size.
931   OS.write(BinaryIdsSectionSize);
932 
933   for (const auto &BI : BinaryIds) {
934     uint64_t BILen = BI.size();
935     // Write binary id length.
936     OS.write(BILen);
937     // Write binary id data.
938     for (unsigned K = 0; K < BILen; K++)
939       OS.writeByte(BI[K]);
940     // Write padding if necessary.
941     uint64_t PaddingSize = alignToPowerOf2(BILen, sizeof(uint64_t)) - BILen;
942     for (unsigned K = 0; K < PaddingSize; K++)
943       OS.writeByte(0);
944   }
945 
946   uint64_t VTableNamesSectionStart = OS.tell();
947 
948   if (!WritePrevVersion)
949     if (Error E = writeVTableNames(OS))
950       return E;
951 
952   uint64_t TemporalProfTracesSectionStart = 0;
953   if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile)) {
954     TemporalProfTracesSectionStart = OS.tell();
955     OS.write(TemporalProfTraces.size());
956     OS.write(TemporalProfTraceStreamSize);
957     for (auto &Trace : TemporalProfTraces) {
958       OS.write(Trace.Weight);
959       OS.write(Trace.FunctionNameRefs.size());
960       for (auto &NameRef : Trace.FunctionNameRefs)
961         OS.write(NameRef);
962     }
963   }
964 
965   // Allocate space for data to be serialized out.
966   std::unique_ptr<IndexedInstrProf::Summary> TheSummary =
967       IndexedInstrProf::allocSummary(SummarySize);
968   // Compute the Summary and copy the data to the data
969   // structure to be serialized out (to disk or buffer).
970   std::unique_ptr<ProfileSummary> PS = ISB.getSummary();
971   setSummary(TheSummary.get(), *PS);
972   InfoObj->SummaryBuilder = nullptr;
973 
974   // For Context Sensitive summary.
975   std::unique_ptr<IndexedInstrProf::Summary> TheCSSummary = nullptr;
976   if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive)) {
977     TheCSSummary = IndexedInstrProf::allocSummary(SummarySize);
978     std::unique_ptr<ProfileSummary> CSPS = CSISB.getSummary();
979     setSummary(TheCSSummary.get(), *CSPS);
980   }
981   InfoObj->CSSummaryBuilder = nullptr;
982 
983   SmallVector<uint64_t, 8> HeaderOffsets = {HashTableStart, MemProfSectionStart,
984                                             BinaryIdSectionStart,
985                                             TemporalProfTracesSectionStart};
986   if (!WritePrevVersion)
987     HeaderOffsets.push_back(VTableNamesSectionStart);
988 
989   PatchItem PatchItems[] = {
990       // Patch the Header fields
991       {BackPatchStartOffset, HeaderOffsets},
992       // Patch the summary data.
993       {SummaryOffset,
994        ArrayRef<uint64_t>(reinterpret_cast<uint64_t *>(TheSummary.get()),
995                           SummarySize / sizeof(uint64_t))},
996       {CSSummaryOffset,
997        ArrayRef<uint64_t>(reinterpret_cast<uint64_t *>(TheCSSummary.get()),
998                           CSSummarySize)}};
999 
1000   OS.patch(PatchItems);
1001 
1002   for (const auto &I : FunctionData)
1003     for (const auto &F : I.getValue())
1004       if (Error E = validateRecord(F.second))
1005         return E;
1006 
1007   return Error::success();
1008 }
1009 
write(raw_fd_ostream & OS)1010 Error InstrProfWriter::write(raw_fd_ostream &OS) {
1011   // Write the hash table.
1012   ProfOStream POS(OS);
1013   return writeImpl(POS);
1014 }
1015 
write(raw_string_ostream & OS)1016 Error InstrProfWriter::write(raw_string_ostream &OS) {
1017   ProfOStream POS(OS);
1018   return writeImpl(POS);
1019 }
1020 
writeBuffer()1021 std::unique_ptr<MemoryBuffer> InstrProfWriter::writeBuffer() {
1022   std::string Data;
1023   raw_string_ostream OS(Data);
1024   // Write the hash table.
1025   if (Error E = write(OS))
1026     return nullptr;
1027   // Return this in an aligned memory buffer.
1028   return MemoryBuffer::getMemBufferCopy(Data);
1029 }
1030 
1031 static const char *ValueProfKindStr[] = {
1032 #define VALUE_PROF_KIND(Enumerator, Value, Descr) #Enumerator,
1033 #include "llvm/ProfileData/InstrProfData.inc"
1034 };
1035 
validateRecord(const InstrProfRecord & Func)1036 Error InstrProfWriter::validateRecord(const InstrProfRecord &Func) {
1037   for (uint32_t VK = 0; VK <= IPVK_Last; VK++) {
1038     if (VK == IPVK_IndirectCallTarget || VK == IPVK_VTableTarget)
1039       continue;
1040     uint32_t NS = Func.getNumValueSites(VK);
1041     for (uint32_t S = 0; S < NS; S++) {
1042       DenseSet<uint64_t> SeenValues;
1043       for (const auto &V : Func.getValueArrayForSite(VK, S))
1044         if (!SeenValues.insert(V.Value).second)
1045           return make_error<InstrProfError>(instrprof_error::invalid_prof);
1046     }
1047   }
1048 
1049   return Error::success();
1050 }
1051 
writeRecordInText(StringRef Name,uint64_t Hash,const InstrProfRecord & Func,InstrProfSymtab & Symtab,raw_fd_ostream & OS)1052 void InstrProfWriter::writeRecordInText(StringRef Name, uint64_t Hash,
1053                                         const InstrProfRecord &Func,
1054                                         InstrProfSymtab &Symtab,
1055                                         raw_fd_ostream &OS) {
1056   OS << Name << "\n";
1057   OS << "# Func Hash:\n" << Hash << "\n";
1058   OS << "# Num Counters:\n" << Func.Counts.size() << "\n";
1059   OS << "# Counter Values:\n";
1060   for (uint64_t Count : Func.Counts)
1061     OS << Count << "\n";
1062 
1063   if (Func.BitmapBytes.size() > 0) {
1064     OS << "# Num Bitmap Bytes:\n$" << Func.BitmapBytes.size() << "\n";
1065     OS << "# Bitmap Byte Values:\n";
1066     for (uint8_t Byte : Func.BitmapBytes) {
1067       OS << "0x";
1068       OS.write_hex(Byte);
1069       OS << "\n";
1070     }
1071     OS << "\n";
1072   }
1073 
1074   uint32_t NumValueKinds = Func.getNumValueKinds();
1075   if (!NumValueKinds) {
1076     OS << "\n";
1077     return;
1078   }
1079 
1080   OS << "# Num Value Kinds:\n" << Func.getNumValueKinds() << "\n";
1081   for (uint32_t VK = 0; VK < IPVK_Last + 1; VK++) {
1082     uint32_t NS = Func.getNumValueSites(VK);
1083     if (!NS)
1084       continue;
1085     OS << "# ValueKind = " << ValueProfKindStr[VK] << ":\n" << VK << "\n";
1086     OS << "# NumValueSites:\n" << NS << "\n";
1087     for (uint32_t S = 0; S < NS; S++) {
1088       auto VD = Func.getValueArrayForSite(VK, S);
1089       OS << VD.size() << "\n";
1090       for (const auto &V : VD) {
1091         if (VK == IPVK_IndirectCallTarget || VK == IPVK_VTableTarget)
1092           OS << Symtab.getFuncOrVarNameIfDefined(V.Value) << ":" << V.Count
1093              << "\n";
1094         else
1095           OS << V.Value << ":" << V.Count << "\n";
1096       }
1097     }
1098   }
1099 
1100   OS << "\n";
1101 }
1102 
writeText(raw_fd_ostream & OS)1103 Error InstrProfWriter::writeText(raw_fd_ostream &OS) {
1104   // Check CS first since it implies an IR level profile.
1105   if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive))
1106     OS << "# CSIR level Instrumentation Flag\n:csir\n";
1107   else if (static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation))
1108     OS << "# IR level Instrumentation Flag\n:ir\n";
1109 
1110   if (static_cast<bool>(ProfileKind &
1111                         InstrProfKind::FunctionEntryInstrumentation))
1112     OS << "# Always instrument the function entry block\n:entry_first\n";
1113   if (static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage))
1114     OS << "# Instrument block coverage\n:single_byte_coverage\n";
1115   InstrProfSymtab Symtab;
1116 
1117   using FuncPair = detail::DenseMapPair<uint64_t, InstrProfRecord>;
1118   using RecordType = std::pair<StringRef, FuncPair>;
1119   SmallVector<RecordType, 4> OrderedFuncData;
1120 
1121   for (const auto &I : FunctionData) {
1122     if (shouldEncodeData(I.getValue())) {
1123       if (Error E = Symtab.addFuncName(I.getKey()))
1124         return E;
1125       for (const auto &Func : I.getValue())
1126         OrderedFuncData.push_back(std::make_pair(I.getKey(), Func));
1127     }
1128   }
1129 
1130   for (const auto &VTableName : VTableNames)
1131     if (Error E = Symtab.addVTableName(VTableName.getKey()))
1132       return E;
1133 
1134   if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile))
1135     writeTextTemporalProfTraceData(OS, Symtab);
1136 
1137   llvm::sort(OrderedFuncData, [](const RecordType &A, const RecordType &B) {
1138     return std::tie(A.first, A.second.first) <
1139            std::tie(B.first, B.second.first);
1140   });
1141 
1142   for (const auto &record : OrderedFuncData) {
1143     const StringRef &Name = record.first;
1144     const FuncPair &Func = record.second;
1145     writeRecordInText(Name, Func.first, Func.second, Symtab, OS);
1146   }
1147 
1148   for (const auto &record : OrderedFuncData) {
1149     const FuncPair &Func = record.second;
1150     if (Error E = validateRecord(Func.second))
1151       return E;
1152   }
1153 
1154   return Error::success();
1155 }
1156 
writeTextTemporalProfTraceData(raw_fd_ostream & OS,InstrProfSymtab & Symtab)1157 void InstrProfWriter::writeTextTemporalProfTraceData(raw_fd_ostream &OS,
1158                                                      InstrProfSymtab &Symtab) {
1159   OS << ":temporal_prof_traces\n";
1160   OS << "# Num Temporal Profile Traces:\n" << TemporalProfTraces.size() << "\n";
1161   OS << "# Temporal Profile Trace Stream Size:\n"
1162      << TemporalProfTraceStreamSize << "\n";
1163   for (auto &Trace : TemporalProfTraces) {
1164     OS << "# Weight:\n" << Trace.Weight << "\n";
1165     for (auto &NameRef : Trace.FunctionNameRefs)
1166       OS << Symtab.getFuncOrVarName(NameRef) << ",";
1167     OS << "\n";
1168   }
1169   OS << "\n";
1170 }
1171