1 //===- InstrProfWriter.cpp - Instrumented profiling writer ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for writing profiling data for clang's
10 // instrumentation based PGO and coverage.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "llvm/ProfileData/InstrProfWriter.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SetVector.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/IR/ProfileSummary.h"
19 #include "llvm/ProfileData/InstrProf.h"
20 #include "llvm/ProfileData/MemProf.h"
21 #include "llvm/ProfileData/ProfileCommon.h"
22 #include "llvm/Support/Compression.h"
23 #include "llvm/Support/Endian.h"
24 #include "llvm/Support/EndianStream.h"
25 #include "llvm/Support/Error.h"
26 #include "llvm/Support/FormatVariadic.h"
27 #include "llvm/Support/MemoryBuffer.h"
28 #include "llvm/Support/OnDiskHashTable.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <cstdint>
31 #include <memory>
32 #include <string>
33 #include <tuple>
34 #include <utility>
35 #include <vector>
36
37 using namespace llvm;
38
39 // A struct to define how the data stream should be patched. For Indexed
40 // profiling, only uint64_t data type is needed.
41 struct PatchItem {
42 uint64_t Pos; // Where to patch.
43 ArrayRef<uint64_t> D; // An array of source data.
44 };
45
46 namespace llvm {
47
48 // A wrapper class to abstract writer stream with support of bytes
49 // back patching.
50 class ProfOStream {
51 public:
ProfOStream(raw_fd_ostream & FD)52 ProfOStream(raw_fd_ostream &FD)
53 : IsFDOStream(true), OS(FD), LE(FD, llvm::endianness::little) {}
ProfOStream(raw_string_ostream & STR)54 ProfOStream(raw_string_ostream &STR)
55 : IsFDOStream(false), OS(STR), LE(STR, llvm::endianness::little) {}
56
tell() const57 [[nodiscard]] uint64_t tell() const { return OS.tell(); }
write(uint64_t V)58 void write(uint64_t V) { LE.write<uint64_t>(V); }
write32(uint32_t V)59 void write32(uint32_t V) { LE.write<uint32_t>(V); }
writeByte(uint8_t V)60 void writeByte(uint8_t V) { LE.write<uint8_t>(V); }
61
62 // \c patch can only be called when all data is written and flushed.
63 // For raw_string_ostream, the patch is done on the target string
64 // directly and it won't be reflected in the stream's internal buffer.
patch(ArrayRef<PatchItem> P)65 void patch(ArrayRef<PatchItem> P) {
66 using namespace support;
67
68 if (IsFDOStream) {
69 raw_fd_ostream &FDOStream = static_cast<raw_fd_ostream &>(OS);
70 const uint64_t LastPos = FDOStream.tell();
71 for (const auto &K : P) {
72 FDOStream.seek(K.Pos);
73 for (uint64_t Elem : K.D)
74 write(Elem);
75 }
76 // Reset the stream to the last position after patching so that users
77 // don't accidentally overwrite data. This makes it consistent with
78 // the string stream below which replaces the data directly.
79 FDOStream.seek(LastPos);
80 } else {
81 raw_string_ostream &SOStream = static_cast<raw_string_ostream &>(OS);
82 std::string &Data = SOStream.str(); // with flush
83 for (const auto &K : P) {
84 for (int I = 0, E = K.D.size(); I != E; I++) {
85 uint64_t Bytes =
86 endian::byte_swap<uint64_t, llvm::endianness::little>(K.D[I]);
87 Data.replace(K.Pos + I * sizeof(uint64_t), sizeof(uint64_t),
88 (const char *)&Bytes, sizeof(uint64_t));
89 }
90 }
91 }
92 }
93
94 // If \c OS is an instance of \c raw_fd_ostream, this field will be
95 // true. Otherwise, \c OS will be an raw_string_ostream.
96 bool IsFDOStream;
97 raw_ostream &OS;
98 support::endian::Writer LE;
99 };
100
101 class InstrProfRecordWriterTrait {
102 public:
103 using key_type = StringRef;
104 using key_type_ref = StringRef;
105
106 using data_type = const InstrProfWriter::ProfilingData *const;
107 using data_type_ref = const InstrProfWriter::ProfilingData *const;
108
109 using hash_value_type = uint64_t;
110 using offset_type = uint64_t;
111
112 llvm::endianness ValueProfDataEndianness = llvm::endianness::little;
113 InstrProfSummaryBuilder *SummaryBuilder;
114 InstrProfSummaryBuilder *CSSummaryBuilder;
115
116 InstrProfRecordWriterTrait() = default;
117
ComputeHash(key_type_ref K)118 static hash_value_type ComputeHash(key_type_ref K) {
119 return IndexedInstrProf::ComputeHash(K);
120 }
121
122 static std::pair<offset_type, offset_type>
EmitKeyDataLength(raw_ostream & Out,key_type_ref K,data_type_ref V)123 EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) {
124 using namespace support;
125
126 endian::Writer LE(Out, llvm::endianness::little);
127
128 offset_type N = K.size();
129 LE.write<offset_type>(N);
130
131 offset_type M = 0;
132 for (const auto &ProfileData : *V) {
133 const InstrProfRecord &ProfRecord = ProfileData.second;
134 M += sizeof(uint64_t); // The function hash
135 M += sizeof(uint64_t); // The size of the Counts vector
136 M += ProfRecord.Counts.size() * sizeof(uint64_t);
137 M += sizeof(uint64_t); // The size of the Bitmap vector
138 M += ProfRecord.BitmapBytes.size() * sizeof(uint64_t);
139
140 // Value data
141 M += ValueProfData::getSize(ProfileData.second);
142 }
143 LE.write<offset_type>(M);
144
145 return std::make_pair(N, M);
146 }
147
EmitKey(raw_ostream & Out,key_type_ref K,offset_type N)148 void EmitKey(raw_ostream &Out, key_type_ref K, offset_type N) {
149 Out.write(K.data(), N);
150 }
151
EmitData(raw_ostream & Out,key_type_ref,data_type_ref V,offset_type)152 void EmitData(raw_ostream &Out, key_type_ref, data_type_ref V, offset_type) {
153 using namespace support;
154
155 endian::Writer LE(Out, llvm::endianness::little);
156 for (const auto &ProfileData : *V) {
157 const InstrProfRecord &ProfRecord = ProfileData.second;
158 if (NamedInstrProfRecord::hasCSFlagInHash(ProfileData.first))
159 CSSummaryBuilder->addRecord(ProfRecord);
160 else
161 SummaryBuilder->addRecord(ProfRecord);
162
163 LE.write<uint64_t>(ProfileData.first); // Function hash
164 LE.write<uint64_t>(ProfRecord.Counts.size());
165 for (uint64_t I : ProfRecord.Counts)
166 LE.write<uint64_t>(I);
167
168 LE.write<uint64_t>(ProfRecord.BitmapBytes.size());
169 for (uint64_t I : ProfRecord.BitmapBytes)
170 LE.write<uint64_t>(I);
171
172 // Write value data
173 std::unique_ptr<ValueProfData> VDataPtr =
174 ValueProfData::serializeFrom(ProfileData.second);
175 uint32_t S = VDataPtr->getSize();
176 VDataPtr->swapBytesFromHost(ValueProfDataEndianness);
177 Out.write((const char *)VDataPtr.get(), S);
178 }
179 }
180 };
181
182 } // end namespace llvm
183
InstrProfWriter(bool Sparse,uint64_t TemporalProfTraceReservoirSize,uint64_t MaxTemporalProfTraceLength,bool WritePrevVersion,memprof::IndexedVersion MemProfVersionRequested,bool MemProfFullSchema)184 InstrProfWriter::InstrProfWriter(
185 bool Sparse, uint64_t TemporalProfTraceReservoirSize,
186 uint64_t MaxTemporalProfTraceLength, bool WritePrevVersion,
187 memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema)
188 : Sparse(Sparse), MaxTemporalProfTraceLength(MaxTemporalProfTraceLength),
189 TemporalProfTraceReservoirSize(TemporalProfTraceReservoirSize),
190 InfoObj(new InstrProfRecordWriterTrait()),
191 WritePrevVersion(WritePrevVersion),
192 MemProfVersionRequested(MemProfVersionRequested),
193 MemProfFullSchema(MemProfFullSchema) {}
194
~InstrProfWriter()195 InstrProfWriter::~InstrProfWriter() { delete InfoObj; }
196
197 // Internal interface for testing purpose only.
setValueProfDataEndianness(llvm::endianness Endianness)198 void InstrProfWriter::setValueProfDataEndianness(llvm::endianness Endianness) {
199 InfoObj->ValueProfDataEndianness = Endianness;
200 }
201
setOutputSparse(bool Sparse)202 void InstrProfWriter::setOutputSparse(bool Sparse) {
203 this->Sparse = Sparse;
204 }
205
addRecord(NamedInstrProfRecord && I,uint64_t Weight,function_ref<void (Error)> Warn)206 void InstrProfWriter::addRecord(NamedInstrProfRecord &&I, uint64_t Weight,
207 function_ref<void(Error)> Warn) {
208 auto Name = I.Name;
209 auto Hash = I.Hash;
210 addRecord(Name, Hash, std::move(I), Weight, Warn);
211 }
212
overlapRecord(NamedInstrProfRecord && Other,OverlapStats & Overlap,OverlapStats & FuncLevelOverlap,const OverlapFuncFilters & FuncFilter)213 void InstrProfWriter::overlapRecord(NamedInstrProfRecord &&Other,
214 OverlapStats &Overlap,
215 OverlapStats &FuncLevelOverlap,
216 const OverlapFuncFilters &FuncFilter) {
217 auto Name = Other.Name;
218 auto Hash = Other.Hash;
219 Other.accumulateCounts(FuncLevelOverlap.Test);
220 if (!FunctionData.contains(Name)) {
221 Overlap.addOneUnique(FuncLevelOverlap.Test);
222 return;
223 }
224 if (FuncLevelOverlap.Test.CountSum < 1.0f) {
225 Overlap.Overlap.NumEntries += 1;
226 return;
227 }
228 auto &ProfileDataMap = FunctionData[Name];
229 bool NewFunc;
230 ProfilingData::iterator Where;
231 std::tie(Where, NewFunc) =
232 ProfileDataMap.insert(std::make_pair(Hash, InstrProfRecord()));
233 if (NewFunc) {
234 Overlap.addOneMismatch(FuncLevelOverlap.Test);
235 return;
236 }
237 InstrProfRecord &Dest = Where->second;
238
239 uint64_t ValueCutoff = FuncFilter.ValueCutoff;
240 if (!FuncFilter.NameFilter.empty() && Name.contains(FuncFilter.NameFilter))
241 ValueCutoff = 0;
242
243 Dest.overlap(Other, Overlap, FuncLevelOverlap, ValueCutoff);
244 }
245
addRecord(StringRef Name,uint64_t Hash,InstrProfRecord && I,uint64_t Weight,function_ref<void (Error)> Warn)246 void InstrProfWriter::addRecord(StringRef Name, uint64_t Hash,
247 InstrProfRecord &&I, uint64_t Weight,
248 function_ref<void(Error)> Warn) {
249 auto &ProfileDataMap = FunctionData[Name];
250
251 bool NewFunc;
252 ProfilingData::iterator Where;
253 std::tie(Where, NewFunc) =
254 ProfileDataMap.insert(std::make_pair(Hash, InstrProfRecord()));
255 InstrProfRecord &Dest = Where->second;
256
257 auto MapWarn = [&](instrprof_error E) {
258 Warn(make_error<InstrProfError>(E));
259 };
260
261 if (NewFunc) {
262 // We've never seen a function with this name and hash, add it.
263 Dest = std::move(I);
264 if (Weight > 1)
265 Dest.scale(Weight, 1, MapWarn);
266 } else {
267 // We're updating a function we've seen before.
268 Dest.merge(I, Weight, MapWarn);
269 }
270
271 Dest.sortValueData();
272 }
273
addMemProfRecord(const Function::GUID Id,const memprof::IndexedMemProfRecord & Record)274 void InstrProfWriter::addMemProfRecord(
275 const Function::GUID Id, const memprof::IndexedMemProfRecord &Record) {
276 auto [Iter, Inserted] = MemProfData.Records.insert({Id, Record});
277 // If we inserted a new record then we are done.
278 if (Inserted) {
279 return;
280 }
281 memprof::IndexedMemProfRecord &Existing = Iter->second;
282 Existing.merge(Record);
283 }
284
addMemProfFrame(const memprof::FrameId Id,const memprof::Frame & Frame,function_ref<void (Error)> Warn)285 bool InstrProfWriter::addMemProfFrame(const memprof::FrameId Id,
286 const memprof::Frame &Frame,
287 function_ref<void(Error)> Warn) {
288 auto [Iter, Inserted] = MemProfData.Frames.insert({Id, Frame});
289 // If a mapping already exists for the current frame id and it does not
290 // match the new mapping provided then reset the existing contents and bail
291 // out. We don't support the merging of memprof data whose Frame -> Id
292 // mapping across profiles is inconsistent.
293 if (!Inserted && Iter->second != Frame) {
294 Warn(make_error<InstrProfError>(instrprof_error::malformed,
295 "frame to id mapping mismatch"));
296 return false;
297 }
298 return true;
299 }
300
addMemProfCallStack(const memprof::CallStackId CSId,const llvm::SmallVector<memprof::FrameId> & CallStack,function_ref<void (Error)> Warn)301 bool InstrProfWriter::addMemProfCallStack(
302 const memprof::CallStackId CSId,
303 const llvm::SmallVector<memprof::FrameId> &CallStack,
304 function_ref<void(Error)> Warn) {
305 auto [Iter, Inserted] = MemProfData.CallStacks.insert({CSId, CallStack});
306 // If a mapping already exists for the current call stack id and it does not
307 // match the new mapping provided then reset the existing contents and bail
308 // out. We don't support the merging of memprof data whose CallStack -> Id
309 // mapping across profiles is inconsistent.
310 if (!Inserted && Iter->second != CallStack) {
311 Warn(make_error<InstrProfError>(instrprof_error::malformed,
312 "call stack to id mapping mismatch"));
313 return false;
314 }
315 return true;
316 }
317
addBinaryIds(ArrayRef<llvm::object::BuildID> BIs)318 void InstrProfWriter::addBinaryIds(ArrayRef<llvm::object::BuildID> BIs) {
319 llvm::append_range(BinaryIds, BIs);
320 }
321
addTemporalProfileTrace(TemporalProfTraceTy Trace)322 void InstrProfWriter::addTemporalProfileTrace(TemporalProfTraceTy Trace) {
323 assert(Trace.FunctionNameRefs.size() <= MaxTemporalProfTraceLength);
324 assert(!Trace.FunctionNameRefs.empty());
325 if (TemporalProfTraceStreamSize < TemporalProfTraceReservoirSize) {
326 // Simply append the trace if we have not yet hit our reservoir size limit.
327 TemporalProfTraces.push_back(std::move(Trace));
328 } else {
329 // Otherwise, replace a random trace in the stream.
330 std::uniform_int_distribution<uint64_t> Distribution(
331 0, TemporalProfTraceStreamSize);
332 uint64_t RandomIndex = Distribution(RNG);
333 if (RandomIndex < TemporalProfTraces.size())
334 TemporalProfTraces[RandomIndex] = std::move(Trace);
335 }
336 ++TemporalProfTraceStreamSize;
337 }
338
addTemporalProfileTraces(SmallVectorImpl<TemporalProfTraceTy> & SrcTraces,uint64_t SrcStreamSize)339 void InstrProfWriter::addTemporalProfileTraces(
340 SmallVectorImpl<TemporalProfTraceTy> &SrcTraces, uint64_t SrcStreamSize) {
341 for (auto &Trace : SrcTraces)
342 if (Trace.FunctionNameRefs.size() > MaxTemporalProfTraceLength)
343 Trace.FunctionNameRefs.resize(MaxTemporalProfTraceLength);
344 llvm::erase_if(SrcTraces, [](auto &T) { return T.FunctionNameRefs.empty(); });
345 // Assume that the source has the same reservoir size as the destination to
346 // avoid needing to record it in the indexed profile format.
347 bool IsDestSampled =
348 (TemporalProfTraceStreamSize > TemporalProfTraceReservoirSize);
349 bool IsSrcSampled = (SrcStreamSize > TemporalProfTraceReservoirSize);
350 if (!IsDestSampled && IsSrcSampled) {
351 // If one of the traces are sampled, ensure that it belongs to Dest.
352 std::swap(TemporalProfTraces, SrcTraces);
353 std::swap(TemporalProfTraceStreamSize, SrcStreamSize);
354 std::swap(IsDestSampled, IsSrcSampled);
355 }
356 if (!IsSrcSampled) {
357 // If the source stream is not sampled, we add each source trace normally.
358 for (auto &Trace : SrcTraces)
359 addTemporalProfileTrace(std::move(Trace));
360 return;
361 }
362 // Otherwise, we find the traces that would have been removed if we added
363 // the whole source stream.
364 SmallSetVector<uint64_t, 8> IndicesToReplace;
365 for (uint64_t I = 0; I < SrcStreamSize; I++) {
366 std::uniform_int_distribution<uint64_t> Distribution(
367 0, TemporalProfTraceStreamSize);
368 uint64_t RandomIndex = Distribution(RNG);
369 if (RandomIndex < TemporalProfTraces.size())
370 IndicesToReplace.insert(RandomIndex);
371 ++TemporalProfTraceStreamSize;
372 }
373 // Then we insert a random sample of the source traces.
374 llvm::shuffle(SrcTraces.begin(), SrcTraces.end(), RNG);
375 for (const auto &[Index, Trace] : llvm::zip(IndicesToReplace, SrcTraces))
376 TemporalProfTraces[Index] = std::move(Trace);
377 }
378
mergeRecordsFromWriter(InstrProfWriter && IPW,function_ref<void (Error)> Warn)379 void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW,
380 function_ref<void(Error)> Warn) {
381 for (auto &I : IPW.FunctionData)
382 for (auto &Func : I.getValue())
383 addRecord(I.getKey(), Func.first, std::move(Func.second), 1, Warn);
384
385 BinaryIds.reserve(BinaryIds.size() + IPW.BinaryIds.size());
386 for (auto &I : IPW.BinaryIds)
387 addBinaryIds(I);
388
389 addTemporalProfileTraces(IPW.TemporalProfTraces,
390 IPW.TemporalProfTraceStreamSize);
391
392 MemProfData.Frames.reserve(IPW.MemProfData.Frames.size());
393 for (auto &[FrameId, Frame] : IPW.MemProfData.Frames) {
394 // If we weren't able to add the frame mappings then it doesn't make sense
395 // to try to merge the records from this profile.
396 if (!addMemProfFrame(FrameId, Frame, Warn))
397 return;
398 }
399
400 MemProfData.CallStacks.reserve(IPW.MemProfData.CallStacks.size());
401 for (auto &[CSId, CallStack] : IPW.MemProfData.CallStacks) {
402 if (!addMemProfCallStack(CSId, CallStack, Warn))
403 return;
404 }
405
406 MemProfData.Records.reserve(IPW.MemProfData.Records.size());
407 for (auto &[GUID, Record] : IPW.MemProfData.Records) {
408 addMemProfRecord(GUID, Record);
409 }
410 }
411
shouldEncodeData(const ProfilingData & PD)412 bool InstrProfWriter::shouldEncodeData(const ProfilingData &PD) {
413 if (!Sparse)
414 return true;
415 for (const auto &Func : PD) {
416 const InstrProfRecord &IPR = Func.second;
417 if (llvm::any_of(IPR.Counts, [](uint64_t Count) { return Count > 0; }))
418 return true;
419 if (llvm::any_of(IPR.BitmapBytes, [](uint8_t Byte) { return Byte > 0; }))
420 return true;
421 }
422 return false;
423 }
424
setSummary(IndexedInstrProf::Summary * TheSummary,ProfileSummary & PS)425 static void setSummary(IndexedInstrProf::Summary *TheSummary,
426 ProfileSummary &PS) {
427 using namespace IndexedInstrProf;
428
429 const std::vector<ProfileSummaryEntry> &Res = PS.getDetailedSummary();
430 TheSummary->NumSummaryFields = Summary::NumKinds;
431 TheSummary->NumCutoffEntries = Res.size();
432 TheSummary->set(Summary::MaxFunctionCount, PS.getMaxFunctionCount());
433 TheSummary->set(Summary::MaxBlockCount, PS.getMaxCount());
434 TheSummary->set(Summary::MaxInternalBlockCount, PS.getMaxInternalCount());
435 TheSummary->set(Summary::TotalBlockCount, PS.getTotalCount());
436 TheSummary->set(Summary::TotalNumBlocks, PS.getNumCounts());
437 TheSummary->set(Summary::TotalNumFunctions, PS.getNumFunctions());
438 for (unsigned I = 0; I < Res.size(); I++)
439 TheSummary->setEntry(I, Res[I]);
440 }
441
442 // Serialize Schema.
writeMemProfSchema(ProfOStream & OS,const memprof::MemProfSchema & Schema)443 static void writeMemProfSchema(ProfOStream &OS,
444 const memprof::MemProfSchema &Schema) {
445 OS.write(static_cast<uint64_t>(Schema.size()));
446 for (const auto Id : Schema)
447 OS.write(static_cast<uint64_t>(Id));
448 }
449
450 // Serialize MemProfRecordData. Return RecordTableOffset.
writeMemProfRecords(ProfOStream & OS,llvm::MapVector<GlobalValue::GUID,memprof::IndexedMemProfRecord> & MemProfRecordData,memprof::MemProfSchema * Schema,memprof::IndexedVersion Version,llvm::DenseMap<memprof::CallStackId,memprof::LinearCallStackId> * MemProfCallStackIndexes=nullptr)451 static uint64_t writeMemProfRecords(
452 ProfOStream &OS,
453 llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord>
454 &MemProfRecordData,
455 memprof::MemProfSchema *Schema, memprof::IndexedVersion Version,
456 llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
457 *MemProfCallStackIndexes = nullptr) {
458 memprof::RecordWriterTrait RecordWriter(Schema, Version,
459 MemProfCallStackIndexes);
460 OnDiskChainedHashTableGenerator<memprof::RecordWriterTrait>
461 RecordTableGenerator;
462 for (auto &[GUID, Record] : MemProfRecordData) {
463 // Insert the key (func hash) and value (memprof record).
464 RecordTableGenerator.insert(GUID, Record, RecordWriter);
465 }
466 // Release the memory of this MapVector as it is no longer needed.
467 MemProfRecordData.clear();
468
469 // The call to Emit invokes RecordWriterTrait::EmitData which destructs
470 // the memprof record copies owned by the RecordTableGenerator. This works
471 // because the RecordTableGenerator is not used after this point.
472 return RecordTableGenerator.Emit(OS.OS, RecordWriter);
473 }
474
475 // Serialize MemProfFrameData. Return FrameTableOffset.
writeMemProfFrames(ProfOStream & OS,llvm::MapVector<memprof::FrameId,memprof::Frame> & MemProfFrameData)476 static uint64_t writeMemProfFrames(
477 ProfOStream &OS,
478 llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData) {
479 OnDiskChainedHashTableGenerator<memprof::FrameWriterTrait>
480 FrameTableGenerator;
481 for (auto &[FrameId, Frame] : MemProfFrameData) {
482 // Insert the key (frame id) and value (frame contents).
483 FrameTableGenerator.insert(FrameId, Frame);
484 }
485 // Release the memory of this MapVector as it is no longer needed.
486 MemProfFrameData.clear();
487
488 return FrameTableGenerator.Emit(OS.OS);
489 }
490
491 // Serialize MemProfFrameData. Return the mapping from FrameIds to their
492 // indexes within the frame array.
493 static llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId>
writeMemProfFrameArray(ProfOStream & OS,llvm::MapVector<memprof::FrameId,memprof::Frame> & MemProfFrameData,llvm::DenseMap<memprof::FrameId,memprof::FrameStat> & FrameHistogram)494 writeMemProfFrameArray(
495 ProfOStream &OS,
496 llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData,
497 llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram) {
498 // Mappings from FrameIds to array indexes.
499 llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes;
500
501 // Compute the order in which we serialize Frames. The order does not matter
502 // in terms of correctness, but we still compute it for deserialization
503 // performance. Specifically, if we serialize frequently used Frames one
504 // after another, we have better cache utilization. For two Frames that
505 // appear equally frequently, we break a tie by serializing the one that tends
506 // to appear earlier in call stacks. We implement the tie-breaking mechanism
507 // by computing the sum of indexes within call stacks for each Frame. If we
508 // still have a tie, then we just resort to compare two FrameIds, which is
509 // just for stability of output.
510 std::vector<std::pair<memprof::FrameId, const memprof::Frame *>> FrameIdOrder;
511 FrameIdOrder.reserve(MemProfFrameData.size());
512 for (const auto &[Id, Frame] : MemProfFrameData)
513 FrameIdOrder.emplace_back(Id, &Frame);
514 assert(MemProfFrameData.size() == FrameIdOrder.size());
515 llvm::sort(FrameIdOrder,
516 [&](const std::pair<memprof::FrameId, const memprof::Frame *> &L,
517 const std::pair<memprof::FrameId, const memprof::Frame *> &R) {
518 const auto &SL = FrameHistogram[L.first];
519 const auto &SR = FrameHistogram[R.first];
520 // Popular FrameIds should come first.
521 if (SL.Count != SR.Count)
522 return SL.Count > SR.Count;
523 // If they are equally popular, then the one that tends to appear
524 // earlier in call stacks should come first.
525 if (SL.PositionSum != SR.PositionSum)
526 return SL.PositionSum < SR.PositionSum;
527 // Compare their FrameIds for sort stability.
528 return L.first < R.first;
529 });
530
531 // Serialize all frames while creating mappings from linear IDs to FrameIds.
532 uint64_t Index = 0;
533 MemProfFrameIndexes.reserve(FrameIdOrder.size());
534 for (const auto &[Id, F] : FrameIdOrder) {
535 F->serialize(OS.OS);
536 MemProfFrameIndexes.insert({Id, Index});
537 ++Index;
538 }
539 assert(MemProfFrameData.size() == Index);
540 assert(MemProfFrameData.size() == MemProfFrameIndexes.size());
541
542 // Release the memory of this MapVector as it is no longer needed.
543 MemProfFrameData.clear();
544
545 return MemProfFrameIndexes;
546 }
547
writeMemProfCallStacks(ProfOStream & OS,llvm::MapVector<memprof::CallStackId,llvm::SmallVector<memprof::FrameId>> & MemProfCallStackData)548 static uint64_t writeMemProfCallStacks(
549 ProfOStream &OS,
550 llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
551 &MemProfCallStackData) {
552 OnDiskChainedHashTableGenerator<memprof::CallStackWriterTrait>
553 CallStackTableGenerator;
554 for (auto &[CSId, CallStack] : MemProfCallStackData)
555 CallStackTableGenerator.insert(CSId, CallStack);
556 // Release the memory of this vector as it is no longer needed.
557 MemProfCallStackData.clear();
558
559 return CallStackTableGenerator.Emit(OS.OS);
560 }
561
562 static llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
writeMemProfCallStackArray(ProfOStream & OS,llvm::MapVector<memprof::CallStackId,llvm::SmallVector<memprof::FrameId>> & MemProfCallStackData,llvm::DenseMap<memprof::FrameId,memprof::LinearFrameId> & MemProfFrameIndexes,llvm::DenseMap<memprof::FrameId,memprof::FrameStat> & FrameHistogram)563 writeMemProfCallStackArray(
564 ProfOStream &OS,
565 llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
566 &MemProfCallStackData,
567 llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId>
568 &MemProfFrameIndexes,
569 llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram) {
570 llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
571 MemProfCallStackIndexes;
572
573 memprof::CallStackRadixTreeBuilder Builder;
574 Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes,
575 FrameHistogram);
576 for (auto I : Builder.getRadixArray())
577 OS.write32(I);
578 MemProfCallStackIndexes = Builder.takeCallStackPos();
579
580 // Release the memory of this vector as it is no longer needed.
581 MemProfCallStackData.clear();
582
583 return MemProfCallStackIndexes;
584 }
585
586 // Write out MemProf Version0 as follows:
587 // uint64_t RecordTableOffset = RecordTableGenerator.Emit
588 // uint64_t FramePayloadOffset = Offset for the frame payload
589 // uint64_t FrameTableOffset = FrameTableGenerator.Emit
590 // uint64_t Num schema entries
591 // uint64_t Schema entry 0
592 // uint64_t Schema entry 1
593 // ....
594 // uint64_t Schema entry N - 1
595 // OnDiskChainedHashTable MemProfRecordData
596 // OnDiskChainedHashTable MemProfFrameData
writeMemProfV0(ProfOStream & OS,memprof::IndexedMemProfData & MemProfData)597 static Error writeMemProfV0(ProfOStream &OS,
598 memprof::IndexedMemProfData &MemProfData) {
599 uint64_t HeaderUpdatePos = OS.tell();
600 OS.write(0ULL); // Reserve space for the memprof record table offset.
601 OS.write(0ULL); // Reserve space for the memprof frame payload offset.
602 OS.write(0ULL); // Reserve space for the memprof frame table offset.
603
604 auto Schema = memprof::getFullSchema();
605 writeMemProfSchema(OS, Schema);
606
607 uint64_t RecordTableOffset =
608 writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version0);
609
610 uint64_t FramePayloadOffset = OS.tell();
611 uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfData.Frames);
612
613 uint64_t Header[] = {RecordTableOffset, FramePayloadOffset, FrameTableOffset};
614 OS.patch({{HeaderUpdatePos, Header}});
615
616 return Error::success();
617 }
618
619 // Write out MemProf Version1 as follows:
620 // uint64_t Version (NEW in V1)
621 // uint64_t RecordTableOffset = RecordTableGenerator.Emit
622 // uint64_t FramePayloadOffset = Offset for the frame payload
623 // uint64_t FrameTableOffset = FrameTableGenerator.Emit
624 // uint64_t Num schema entries
625 // uint64_t Schema entry 0
626 // uint64_t Schema entry 1
627 // ....
628 // uint64_t Schema entry N - 1
629 // OnDiskChainedHashTable MemProfRecordData
630 // OnDiskChainedHashTable MemProfFrameData
writeMemProfV1(ProfOStream & OS,memprof::IndexedMemProfData & MemProfData)631 static Error writeMemProfV1(ProfOStream &OS,
632 memprof::IndexedMemProfData &MemProfData) {
633 OS.write(memprof::Version1);
634 uint64_t HeaderUpdatePos = OS.tell();
635 OS.write(0ULL); // Reserve space for the memprof record table offset.
636 OS.write(0ULL); // Reserve space for the memprof frame payload offset.
637 OS.write(0ULL); // Reserve space for the memprof frame table offset.
638
639 auto Schema = memprof::getFullSchema();
640 writeMemProfSchema(OS, Schema);
641
642 uint64_t RecordTableOffset =
643 writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version1);
644
645 uint64_t FramePayloadOffset = OS.tell();
646 uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfData.Frames);
647
648 uint64_t Header[] = {RecordTableOffset, FramePayloadOffset, FrameTableOffset};
649 OS.patch({{HeaderUpdatePos, Header}});
650
651 return Error::success();
652 }
653
654 // Write out MemProf Version2 as follows:
655 // uint64_t Version
656 // uint64_t RecordTableOffset = RecordTableGenerator.Emit
657 // uint64_t FramePayloadOffset = Offset for the frame payload
658 // uint64_t FrameTableOffset = FrameTableGenerator.Emit
659 // uint64_t CallStackPayloadOffset = Offset for the call stack payload (NEW V2)
660 // uint64_t CallStackTableOffset = CallStackTableGenerator.Emit (NEW in V2)
661 // uint64_t Num schema entries
662 // uint64_t Schema entry 0
663 // uint64_t Schema entry 1
664 // ....
665 // uint64_t Schema entry N - 1
666 // OnDiskChainedHashTable MemProfRecordData
667 // OnDiskChainedHashTable MemProfFrameData
668 // OnDiskChainedHashTable MemProfCallStackData (NEW in V2)
writeMemProfV2(ProfOStream & OS,memprof::IndexedMemProfData & MemProfData,bool MemProfFullSchema)669 static Error writeMemProfV2(ProfOStream &OS,
670 memprof::IndexedMemProfData &MemProfData,
671 bool MemProfFullSchema) {
672 OS.write(memprof::Version2);
673 uint64_t HeaderUpdatePos = OS.tell();
674 OS.write(0ULL); // Reserve space for the memprof record table offset.
675 OS.write(0ULL); // Reserve space for the memprof frame payload offset.
676 OS.write(0ULL); // Reserve space for the memprof frame table offset.
677 OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
678 OS.write(0ULL); // Reserve space for the memprof call stack table offset.
679
680 auto Schema = memprof::getHotColdSchema();
681 if (MemProfFullSchema)
682 Schema = memprof::getFullSchema();
683 writeMemProfSchema(OS, Schema);
684
685 uint64_t RecordTableOffset =
686 writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version2);
687
688 uint64_t FramePayloadOffset = OS.tell();
689 uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfData.Frames);
690
691 uint64_t CallStackPayloadOffset = OS.tell();
692 uint64_t CallStackTableOffset =
693 writeMemProfCallStacks(OS, MemProfData.CallStacks);
694
695 uint64_t Header[] = {
696 RecordTableOffset, FramePayloadOffset, FrameTableOffset,
697 CallStackPayloadOffset, CallStackTableOffset,
698 };
699 OS.patch({{HeaderUpdatePos, Header}});
700
701 return Error::success();
702 }
703
704 // Write out MemProf Version3 as follows:
705 // uint64_t Version
706 // uint64_t CallStackPayloadOffset = Offset for the call stack payload
707 // uint64_t RecordPayloadOffset = Offset for the record payload
708 // uint64_t RecordTableOffset = RecordTableGenerator.Emit
709 // uint64_t Num schema entries
710 // uint64_t Schema entry 0
711 // uint64_t Schema entry 1
712 // ....
713 // uint64_t Schema entry N - 1
714 // Frames serialized one after another
715 // Call stacks encoded as a radix tree
716 // OnDiskChainedHashTable MemProfRecordData
writeMemProfV3(ProfOStream & OS,memprof::IndexedMemProfData & MemProfData,bool MemProfFullSchema)717 static Error writeMemProfV3(ProfOStream &OS,
718 memprof::IndexedMemProfData &MemProfData,
719 bool MemProfFullSchema) {
720 OS.write(memprof::Version3);
721 uint64_t HeaderUpdatePos = OS.tell();
722 OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
723 OS.write(0ULL); // Reserve space for the memprof record payload offset.
724 OS.write(0ULL); // Reserve space for the memprof record table offset.
725
726 auto Schema = memprof::getHotColdSchema();
727 if (MemProfFullSchema)
728 Schema = memprof::getFullSchema();
729 writeMemProfSchema(OS, Schema);
730
731 llvm::DenseMap<memprof::FrameId, memprof::FrameStat> FrameHistogram =
732 memprof::computeFrameHistogram(MemProfData.CallStacks);
733 assert(MemProfData.Frames.size() == FrameHistogram.size());
734
735 llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes =
736 writeMemProfFrameArray(OS, MemProfData.Frames, FrameHistogram);
737
738 uint64_t CallStackPayloadOffset = OS.tell();
739 llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
740 MemProfCallStackIndexes = writeMemProfCallStackArray(
741 OS, MemProfData.CallStacks, MemProfFrameIndexes, FrameHistogram);
742
743 uint64_t RecordPayloadOffset = OS.tell();
744 uint64_t RecordTableOffset =
745 writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version3,
746 &MemProfCallStackIndexes);
747
748 uint64_t Header[] = {
749 CallStackPayloadOffset,
750 RecordPayloadOffset,
751 RecordTableOffset,
752 };
753 OS.patch({{HeaderUpdatePos, Header}});
754
755 return Error::success();
756 }
757
758 // Write out the MemProf data in a requested version.
writeMemProf(ProfOStream & OS,memprof::IndexedMemProfData & MemProfData,memprof::IndexedVersion MemProfVersionRequested,bool MemProfFullSchema)759 static Error writeMemProf(ProfOStream &OS,
760 memprof::IndexedMemProfData &MemProfData,
761 memprof::IndexedVersion MemProfVersionRequested,
762 bool MemProfFullSchema) {
763 switch (MemProfVersionRequested) {
764 case memprof::Version0:
765 return writeMemProfV0(OS, MemProfData);
766 case memprof::Version1:
767 return writeMemProfV1(OS, MemProfData);
768 case memprof::Version2:
769 return writeMemProfV2(OS, MemProfData, MemProfFullSchema);
770 case memprof::Version3:
771 return writeMemProfV3(OS, MemProfData, MemProfFullSchema);
772 }
773
774 return make_error<InstrProfError>(
775 instrprof_error::unsupported_version,
776 formatv("MemProf version {} not supported; "
777 "requires version between {} and {}, inclusive",
778 MemProfVersionRequested, memprof::MinimumSupportedVersion,
779 memprof::MaximumSupportedVersion));
780 }
781
writeHeader(const IndexedInstrProf::Header & Header,const bool WritePrevVersion,ProfOStream & OS)782 uint64_t InstrProfWriter::writeHeader(const IndexedInstrProf::Header &Header,
783 const bool WritePrevVersion,
784 ProfOStream &OS) {
785 // Only write out the first four fields.
786 for (int I = 0; I < 4; I++)
787 OS.write(reinterpret_cast<const uint64_t *>(&Header)[I]);
788
789 // Remember the offset of the remaining fields to allow back patching later.
790 auto BackPatchStartOffset = OS.tell();
791
792 // Reserve the space for back patching later.
793 OS.write(0); // HashOffset
794 OS.write(0); // MemProfOffset
795 OS.write(0); // BinaryIdOffset
796 OS.write(0); // TemporalProfTracesOffset
797 if (!WritePrevVersion)
798 OS.write(0); // VTableNamesOffset
799
800 return BackPatchStartOffset;
801 }
802
writeVTableNames(ProfOStream & OS)803 Error InstrProfWriter::writeVTableNames(ProfOStream &OS) {
804 std::vector<std::string> VTableNameStrs;
805 for (StringRef VTableName : VTableNames.keys())
806 VTableNameStrs.push_back(VTableName.str());
807
808 std::string CompressedVTableNames;
809 if (!VTableNameStrs.empty())
810 if (Error E = collectGlobalObjectNameStrings(
811 VTableNameStrs, compression::zlib::isAvailable(),
812 CompressedVTableNames))
813 return E;
814
815 const uint64_t CompressedStringLen = CompressedVTableNames.length();
816
817 // Record the length of compressed string.
818 OS.write(CompressedStringLen);
819
820 // Write the chars in compressed strings.
821 for (auto &c : CompressedVTableNames)
822 OS.writeByte(static_cast<uint8_t>(c));
823
824 // Pad up to a multiple of 8.
825 // InstrProfReader could read bytes according to 'CompressedStringLen'.
826 const uint64_t PaddedLength = alignTo(CompressedStringLen, 8);
827
828 for (uint64_t K = CompressedStringLen; K < PaddedLength; K++)
829 OS.writeByte(0);
830
831 return Error::success();
832 }
833
writeImpl(ProfOStream & OS)834 Error InstrProfWriter::writeImpl(ProfOStream &OS) {
835 using namespace IndexedInstrProf;
836 using namespace support;
837
838 OnDiskChainedHashTableGenerator<InstrProfRecordWriterTrait> Generator;
839
840 InstrProfSummaryBuilder ISB(ProfileSummaryBuilder::DefaultCutoffs);
841 InfoObj->SummaryBuilder = &ISB;
842 InstrProfSummaryBuilder CSISB(ProfileSummaryBuilder::DefaultCutoffs);
843 InfoObj->CSSummaryBuilder = &CSISB;
844
845 // Populate the hash table generator.
846 SmallVector<std::pair<StringRef, const ProfilingData *>> OrderedData;
847 for (const auto &I : FunctionData)
848 if (shouldEncodeData(I.getValue()))
849 OrderedData.emplace_back((I.getKey()), &I.getValue());
850 llvm::sort(OrderedData, less_first());
851 for (const auto &I : OrderedData)
852 Generator.insert(I.first, I.second);
853
854 // Write the header.
855 IndexedInstrProf::Header Header;
856 Header.Version = WritePrevVersion
857 ? IndexedInstrProf::ProfVersion::Version11
858 : IndexedInstrProf::ProfVersion::CurrentVersion;
859 // The WritePrevVersion handling will either need to be removed or updated
860 // if the version is advanced beyond 12.
861 static_assert(IndexedInstrProf::ProfVersion::CurrentVersion ==
862 IndexedInstrProf::ProfVersion::Version12);
863 if (static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation))
864 Header.Version |= VARIANT_MASK_IR_PROF;
865 if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive))
866 Header.Version |= VARIANT_MASK_CSIR_PROF;
867 if (static_cast<bool>(ProfileKind &
868 InstrProfKind::FunctionEntryInstrumentation))
869 Header.Version |= VARIANT_MASK_INSTR_ENTRY;
870 if (static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage))
871 Header.Version |= VARIANT_MASK_BYTE_COVERAGE;
872 if (static_cast<bool>(ProfileKind & InstrProfKind::FunctionEntryOnly))
873 Header.Version |= VARIANT_MASK_FUNCTION_ENTRY_ONLY;
874 if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf))
875 Header.Version |= VARIANT_MASK_MEMPROF;
876 if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile))
877 Header.Version |= VARIANT_MASK_TEMPORAL_PROF;
878
879 const uint64_t BackPatchStartOffset =
880 writeHeader(Header, WritePrevVersion, OS);
881
882 // Reserve space to write profile summary data.
883 uint32_t NumEntries = ProfileSummaryBuilder::DefaultCutoffs.size();
884 uint32_t SummarySize = Summary::getSize(Summary::NumKinds, NumEntries);
885 // Remember the summary offset.
886 uint64_t SummaryOffset = OS.tell();
887 for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++)
888 OS.write(0);
889 uint64_t CSSummaryOffset = 0;
890 uint64_t CSSummarySize = 0;
891 if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive)) {
892 CSSummaryOffset = OS.tell();
893 CSSummarySize = SummarySize / sizeof(uint64_t);
894 for (unsigned I = 0; I < CSSummarySize; I++)
895 OS.write(0);
896 }
897
898 // Write the hash table.
899 uint64_t HashTableStart = Generator.Emit(OS.OS, *InfoObj);
900
901 // Write the MemProf profile data if we have it.
902 uint64_t MemProfSectionStart = 0;
903 if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf)) {
904 MemProfSectionStart = OS.tell();
905 if (auto E = writeMemProf(OS, MemProfData, MemProfVersionRequested,
906 MemProfFullSchema))
907 return E;
908 }
909
910 // BinaryIdSection has two parts:
911 // 1. uint64_t BinaryIdsSectionSize
912 // 2. list of binary ids that consist of:
913 // a. uint64_t BinaryIdLength
914 // b. uint8_t BinaryIdData
915 // c. uint8_t Padding (if necessary)
916 uint64_t BinaryIdSectionStart = OS.tell();
917 // Calculate size of binary section.
918 uint64_t BinaryIdsSectionSize = 0;
919
920 // Remove duplicate binary ids.
921 llvm::sort(BinaryIds);
922 BinaryIds.erase(llvm::unique(BinaryIds), BinaryIds.end());
923
924 for (const auto &BI : BinaryIds) {
925 // Increment by binary id length data type size.
926 BinaryIdsSectionSize += sizeof(uint64_t);
927 // Increment by binary id data length, aligned to 8 bytes.
928 BinaryIdsSectionSize += alignToPowerOf2(BI.size(), sizeof(uint64_t));
929 }
930 // Write binary ids section size.
931 OS.write(BinaryIdsSectionSize);
932
933 for (const auto &BI : BinaryIds) {
934 uint64_t BILen = BI.size();
935 // Write binary id length.
936 OS.write(BILen);
937 // Write binary id data.
938 for (unsigned K = 0; K < BILen; K++)
939 OS.writeByte(BI[K]);
940 // Write padding if necessary.
941 uint64_t PaddingSize = alignToPowerOf2(BILen, sizeof(uint64_t)) - BILen;
942 for (unsigned K = 0; K < PaddingSize; K++)
943 OS.writeByte(0);
944 }
945
946 uint64_t VTableNamesSectionStart = OS.tell();
947
948 if (!WritePrevVersion)
949 if (Error E = writeVTableNames(OS))
950 return E;
951
952 uint64_t TemporalProfTracesSectionStart = 0;
953 if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile)) {
954 TemporalProfTracesSectionStart = OS.tell();
955 OS.write(TemporalProfTraces.size());
956 OS.write(TemporalProfTraceStreamSize);
957 for (auto &Trace : TemporalProfTraces) {
958 OS.write(Trace.Weight);
959 OS.write(Trace.FunctionNameRefs.size());
960 for (auto &NameRef : Trace.FunctionNameRefs)
961 OS.write(NameRef);
962 }
963 }
964
965 // Allocate space for data to be serialized out.
966 std::unique_ptr<IndexedInstrProf::Summary> TheSummary =
967 IndexedInstrProf::allocSummary(SummarySize);
968 // Compute the Summary and copy the data to the data
969 // structure to be serialized out (to disk or buffer).
970 std::unique_ptr<ProfileSummary> PS = ISB.getSummary();
971 setSummary(TheSummary.get(), *PS);
972 InfoObj->SummaryBuilder = nullptr;
973
974 // For Context Sensitive summary.
975 std::unique_ptr<IndexedInstrProf::Summary> TheCSSummary = nullptr;
976 if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive)) {
977 TheCSSummary = IndexedInstrProf::allocSummary(SummarySize);
978 std::unique_ptr<ProfileSummary> CSPS = CSISB.getSummary();
979 setSummary(TheCSSummary.get(), *CSPS);
980 }
981 InfoObj->CSSummaryBuilder = nullptr;
982
983 SmallVector<uint64_t, 8> HeaderOffsets = {HashTableStart, MemProfSectionStart,
984 BinaryIdSectionStart,
985 TemporalProfTracesSectionStart};
986 if (!WritePrevVersion)
987 HeaderOffsets.push_back(VTableNamesSectionStart);
988
989 PatchItem PatchItems[] = {
990 // Patch the Header fields
991 {BackPatchStartOffset, HeaderOffsets},
992 // Patch the summary data.
993 {SummaryOffset,
994 ArrayRef<uint64_t>(reinterpret_cast<uint64_t *>(TheSummary.get()),
995 SummarySize / sizeof(uint64_t))},
996 {CSSummaryOffset,
997 ArrayRef<uint64_t>(reinterpret_cast<uint64_t *>(TheCSSummary.get()),
998 CSSummarySize)}};
999
1000 OS.patch(PatchItems);
1001
1002 for (const auto &I : FunctionData)
1003 for (const auto &F : I.getValue())
1004 if (Error E = validateRecord(F.second))
1005 return E;
1006
1007 return Error::success();
1008 }
1009
write(raw_fd_ostream & OS)1010 Error InstrProfWriter::write(raw_fd_ostream &OS) {
1011 // Write the hash table.
1012 ProfOStream POS(OS);
1013 return writeImpl(POS);
1014 }
1015
write(raw_string_ostream & OS)1016 Error InstrProfWriter::write(raw_string_ostream &OS) {
1017 ProfOStream POS(OS);
1018 return writeImpl(POS);
1019 }
1020
writeBuffer()1021 std::unique_ptr<MemoryBuffer> InstrProfWriter::writeBuffer() {
1022 std::string Data;
1023 raw_string_ostream OS(Data);
1024 // Write the hash table.
1025 if (Error E = write(OS))
1026 return nullptr;
1027 // Return this in an aligned memory buffer.
1028 return MemoryBuffer::getMemBufferCopy(Data);
1029 }
1030
1031 static const char *ValueProfKindStr[] = {
1032 #define VALUE_PROF_KIND(Enumerator, Value, Descr) #Enumerator,
1033 #include "llvm/ProfileData/InstrProfData.inc"
1034 };
1035
validateRecord(const InstrProfRecord & Func)1036 Error InstrProfWriter::validateRecord(const InstrProfRecord &Func) {
1037 for (uint32_t VK = 0; VK <= IPVK_Last; VK++) {
1038 if (VK == IPVK_IndirectCallTarget || VK == IPVK_VTableTarget)
1039 continue;
1040 uint32_t NS = Func.getNumValueSites(VK);
1041 for (uint32_t S = 0; S < NS; S++) {
1042 DenseSet<uint64_t> SeenValues;
1043 for (const auto &V : Func.getValueArrayForSite(VK, S))
1044 if (!SeenValues.insert(V.Value).second)
1045 return make_error<InstrProfError>(instrprof_error::invalid_prof);
1046 }
1047 }
1048
1049 return Error::success();
1050 }
1051
writeRecordInText(StringRef Name,uint64_t Hash,const InstrProfRecord & Func,InstrProfSymtab & Symtab,raw_fd_ostream & OS)1052 void InstrProfWriter::writeRecordInText(StringRef Name, uint64_t Hash,
1053 const InstrProfRecord &Func,
1054 InstrProfSymtab &Symtab,
1055 raw_fd_ostream &OS) {
1056 OS << Name << "\n";
1057 OS << "# Func Hash:\n" << Hash << "\n";
1058 OS << "# Num Counters:\n" << Func.Counts.size() << "\n";
1059 OS << "# Counter Values:\n";
1060 for (uint64_t Count : Func.Counts)
1061 OS << Count << "\n";
1062
1063 if (Func.BitmapBytes.size() > 0) {
1064 OS << "# Num Bitmap Bytes:\n$" << Func.BitmapBytes.size() << "\n";
1065 OS << "# Bitmap Byte Values:\n";
1066 for (uint8_t Byte : Func.BitmapBytes) {
1067 OS << "0x";
1068 OS.write_hex(Byte);
1069 OS << "\n";
1070 }
1071 OS << "\n";
1072 }
1073
1074 uint32_t NumValueKinds = Func.getNumValueKinds();
1075 if (!NumValueKinds) {
1076 OS << "\n";
1077 return;
1078 }
1079
1080 OS << "# Num Value Kinds:\n" << Func.getNumValueKinds() << "\n";
1081 for (uint32_t VK = 0; VK < IPVK_Last + 1; VK++) {
1082 uint32_t NS = Func.getNumValueSites(VK);
1083 if (!NS)
1084 continue;
1085 OS << "# ValueKind = " << ValueProfKindStr[VK] << ":\n" << VK << "\n";
1086 OS << "# NumValueSites:\n" << NS << "\n";
1087 for (uint32_t S = 0; S < NS; S++) {
1088 auto VD = Func.getValueArrayForSite(VK, S);
1089 OS << VD.size() << "\n";
1090 for (const auto &V : VD) {
1091 if (VK == IPVK_IndirectCallTarget || VK == IPVK_VTableTarget)
1092 OS << Symtab.getFuncOrVarNameIfDefined(V.Value) << ":" << V.Count
1093 << "\n";
1094 else
1095 OS << V.Value << ":" << V.Count << "\n";
1096 }
1097 }
1098 }
1099
1100 OS << "\n";
1101 }
1102
writeText(raw_fd_ostream & OS)1103 Error InstrProfWriter::writeText(raw_fd_ostream &OS) {
1104 // Check CS first since it implies an IR level profile.
1105 if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive))
1106 OS << "# CSIR level Instrumentation Flag\n:csir\n";
1107 else if (static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation))
1108 OS << "# IR level Instrumentation Flag\n:ir\n";
1109
1110 if (static_cast<bool>(ProfileKind &
1111 InstrProfKind::FunctionEntryInstrumentation))
1112 OS << "# Always instrument the function entry block\n:entry_first\n";
1113 if (static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage))
1114 OS << "# Instrument block coverage\n:single_byte_coverage\n";
1115 InstrProfSymtab Symtab;
1116
1117 using FuncPair = detail::DenseMapPair<uint64_t, InstrProfRecord>;
1118 using RecordType = std::pair<StringRef, FuncPair>;
1119 SmallVector<RecordType, 4> OrderedFuncData;
1120
1121 for (const auto &I : FunctionData) {
1122 if (shouldEncodeData(I.getValue())) {
1123 if (Error E = Symtab.addFuncName(I.getKey()))
1124 return E;
1125 for (const auto &Func : I.getValue())
1126 OrderedFuncData.push_back(std::make_pair(I.getKey(), Func));
1127 }
1128 }
1129
1130 for (const auto &VTableName : VTableNames)
1131 if (Error E = Symtab.addVTableName(VTableName.getKey()))
1132 return E;
1133
1134 if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile))
1135 writeTextTemporalProfTraceData(OS, Symtab);
1136
1137 llvm::sort(OrderedFuncData, [](const RecordType &A, const RecordType &B) {
1138 return std::tie(A.first, A.second.first) <
1139 std::tie(B.first, B.second.first);
1140 });
1141
1142 for (const auto &record : OrderedFuncData) {
1143 const StringRef &Name = record.first;
1144 const FuncPair &Func = record.second;
1145 writeRecordInText(Name, Func.first, Func.second, Symtab, OS);
1146 }
1147
1148 for (const auto &record : OrderedFuncData) {
1149 const FuncPair &Func = record.second;
1150 if (Error E = validateRecord(Func.second))
1151 return E;
1152 }
1153
1154 return Error::success();
1155 }
1156
writeTextTemporalProfTraceData(raw_fd_ostream & OS,InstrProfSymtab & Symtab)1157 void InstrProfWriter::writeTextTemporalProfTraceData(raw_fd_ostream &OS,
1158 InstrProfSymtab &Symtab) {
1159 OS << ":temporal_prof_traces\n";
1160 OS << "# Num Temporal Profile Traces:\n" << TemporalProfTraces.size() << "\n";
1161 OS << "# Temporal Profile Trace Stream Size:\n"
1162 << TemporalProfTraceStreamSize << "\n";
1163 for (auto &Trace : TemporalProfTraces) {
1164 OS << "# Weight:\n" << Trace.Weight << "\n";
1165 for (auto &NameRef : Trace.FunctionNameRefs)
1166 OS << Symtab.getFuncOrVarName(NameRef) << ",";
1167 OS << "\n";
1168 }
1169 OS << "\n";
1170 }
1171