xref: /freebsd/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp (revision 5ca8e32633c4ffbbcd6762e5888b6a4ba0708c6c)
1 //===- InstrProfReader.cpp - Instrumented profiling reader ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for reading profiling data for clang's
10 // instrumentation based PGO and coverage.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ProfileData/InstrProfReader.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/StringExtras.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/IR/ProfileSummary.h"
20 #include "llvm/ProfileData/InstrProf.h"
21 #include "llvm/ProfileData/MemProf.h"
22 #include "llvm/ProfileData/ProfileCommon.h"
23 #include "llvm/ProfileData/SymbolRemappingReader.h"
24 #include "llvm/Support/Endian.h"
25 #include "llvm/Support/Error.h"
26 #include "llvm/Support/ErrorOr.h"
27 #include "llvm/Support/MemoryBuffer.h"
28 #include "llvm/Support/SwapByteOrder.h"
29 #include "llvm/Support/VirtualFileSystem.h"
30 #include <algorithm>
31 #include <cstddef>
32 #include <cstdint>
33 #include <limits>
34 #include <memory>
35 #include <system_error>
36 #include <utility>
37 #include <vector>
38 
39 using namespace llvm;
40 
41 // Extracts the variant information from the top 8 bits in the version and
42 // returns an enum specifying the variants present.
43 static InstrProfKind getProfileKindFromVersion(uint64_t Version) {
44   InstrProfKind ProfileKind = InstrProfKind::Unknown;
45   if (Version & VARIANT_MASK_IR_PROF) {
46     ProfileKind |= InstrProfKind::IRInstrumentation;
47   }
48   if (Version & VARIANT_MASK_CSIR_PROF) {
49     ProfileKind |= InstrProfKind::ContextSensitive;
50   }
51   if (Version & VARIANT_MASK_INSTR_ENTRY) {
52     ProfileKind |= InstrProfKind::FunctionEntryInstrumentation;
53   }
54   if (Version & VARIANT_MASK_BYTE_COVERAGE) {
55     ProfileKind |= InstrProfKind::SingleByteCoverage;
56   }
57   if (Version & VARIANT_MASK_FUNCTION_ENTRY_ONLY) {
58     ProfileKind |= InstrProfKind::FunctionEntryOnly;
59   }
60   if (Version & VARIANT_MASK_MEMPROF) {
61     ProfileKind |= InstrProfKind::MemProf;
62   }
63   if (Version & VARIANT_MASK_TEMPORAL_PROF) {
64     ProfileKind |= InstrProfKind::TemporalProfile;
65   }
66   return ProfileKind;
67 }
68 
69 static Expected<std::unique_ptr<MemoryBuffer>>
70 setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) {
71   auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN()
72                                            : FS.getBufferForFile(Filename);
73   if (std::error_code EC = BufferOrErr.getError())
74     return errorCodeToError(EC);
75   return std::move(BufferOrErr.get());
76 }
77 
78 static Error initializeReader(InstrProfReader &Reader) {
79   return Reader.readHeader();
80 }
81 
82 /// Read a list of binary ids from a profile that consist of
83 /// a. uint64_t binary id length
84 /// b. uint8_t  binary id data
85 /// c. uint8_t  padding (if necessary)
86 /// This function is shared between raw and indexed profiles.
87 /// Raw profiles are in host-endian format, and indexed profiles are in
88 /// little-endian format. So, this function takes an argument indicating the
89 /// associated endian format to read the binary ids correctly.
90 static Error
91 readBinaryIdsInternal(const MemoryBuffer &DataBuffer,
92                       const uint64_t BinaryIdsSize,
93                       const uint8_t *BinaryIdsStart,
94                       std::vector<llvm::object::BuildID> &BinaryIds,
95                       const llvm::support::endianness Endian) {
96   using namespace support;
97 
98   if (BinaryIdsSize == 0)
99     return Error::success();
100 
101   const uint8_t *BI = BinaryIdsStart;
102   const uint8_t *BIEnd = BinaryIdsStart + BinaryIdsSize;
103   const uint8_t *End =
104       reinterpret_cast<const uint8_t *>(DataBuffer.getBufferEnd());
105 
106   while (BI < BIEnd) {
107     size_t Remaining = BIEnd - BI;
108     // There should be enough left to read the binary id length.
109     if (Remaining < sizeof(uint64_t))
110       return make_error<InstrProfError>(
111           instrprof_error::malformed,
112           "not enough data to read binary id length");
113 
114     uint64_t BILen = 0;
115     if (Endian == little)
116       BILen = endian::readNext<uint64_t, little, unaligned>(BI);
117     else
118       BILen = endian::readNext<uint64_t, big, unaligned>(BI);
119 
120     if (BILen == 0)
121       return make_error<InstrProfError>(instrprof_error::malformed,
122                                         "binary id length is 0");
123 
124     Remaining = BIEnd - BI;
125     // There should be enough left to read the binary id data.
126     if (Remaining < alignToPowerOf2(BILen, sizeof(uint64_t)))
127       return make_error<InstrProfError>(
128           instrprof_error::malformed, "not enough data to read binary id data");
129 
130     // Add binary id to the binary ids list.
131     BinaryIds.push_back(object::BuildID(BI, BI + BILen));
132 
133     // Increment by binary id data length, which aligned to the size of uint64.
134     BI += alignToPowerOf2(BILen, sizeof(uint64_t));
135     if (BI > End)
136       return make_error<InstrProfError>(
137           instrprof_error::malformed,
138           "binary id section is greater than buffer size");
139   }
140 
141   return Error::success();
142 }
143 
144 static Error printBinaryIdsInternal(raw_ostream &OS,
145                                     const MemoryBuffer &DataBuffer,
146                                     uint64_t BinaryIdsSize,
147                                     const uint8_t *BinaryIdsStart,
148                                     llvm::support::endianness Endian) {
149   if (BinaryIdsSize == 0)
150     return Error::success();
151 
152   std::vector<llvm::object::BuildID> BinaryIds;
153   if (Error E = readBinaryIdsInternal(DataBuffer, BinaryIdsSize, BinaryIdsStart,
154                                       BinaryIds, Endian))
155     return E;
156 
157   OS << "Binary IDs: \n";
158   for (auto BI : BinaryIds) {
159     for (uint64_t I = 0; I < BI.size(); I++)
160       OS << format("%02x", BI[I]);
161     OS << "\n";
162   }
163 
164   return Error::success();
165 }
166 
167 Expected<std::unique_ptr<InstrProfReader>>
168 InstrProfReader::create(const Twine &Path, vfs::FileSystem &FS,
169                         const InstrProfCorrelator *Correlator) {
170   // Set up the buffer to read.
171   auto BufferOrError = setupMemoryBuffer(Path, FS);
172   if (Error E = BufferOrError.takeError())
173     return std::move(E);
174   return InstrProfReader::create(std::move(BufferOrError.get()), Correlator);
175 }
176 
177 Expected<std::unique_ptr<InstrProfReader>>
178 InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
179                         const InstrProfCorrelator *Correlator) {
180   if (Buffer->getBufferSize() == 0)
181     return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
182 
183   std::unique_ptr<InstrProfReader> Result;
184   // Create the reader.
185   if (IndexedInstrProfReader::hasFormat(*Buffer))
186     Result.reset(new IndexedInstrProfReader(std::move(Buffer)));
187   else if (RawInstrProfReader64::hasFormat(*Buffer))
188     Result.reset(new RawInstrProfReader64(std::move(Buffer), Correlator));
189   else if (RawInstrProfReader32::hasFormat(*Buffer))
190     Result.reset(new RawInstrProfReader32(std::move(Buffer), Correlator));
191   else if (TextInstrProfReader::hasFormat(*Buffer))
192     Result.reset(new TextInstrProfReader(std::move(Buffer)));
193   else
194     return make_error<InstrProfError>(instrprof_error::unrecognized_format);
195 
196   // Initialize the reader and return the result.
197   if (Error E = initializeReader(*Result))
198     return std::move(E);
199 
200   return std::move(Result);
201 }
202 
203 Expected<std::unique_ptr<IndexedInstrProfReader>>
204 IndexedInstrProfReader::create(const Twine &Path, vfs::FileSystem &FS,
205                                const Twine &RemappingPath) {
206   // Set up the buffer to read.
207   auto BufferOrError = setupMemoryBuffer(Path, FS);
208   if (Error E = BufferOrError.takeError())
209     return std::move(E);
210 
211   // Set up the remapping buffer if requested.
212   std::unique_ptr<MemoryBuffer> RemappingBuffer;
213   std::string RemappingPathStr = RemappingPath.str();
214   if (!RemappingPathStr.empty()) {
215     auto RemappingBufferOrError = setupMemoryBuffer(RemappingPathStr, FS);
216     if (Error E = RemappingBufferOrError.takeError())
217       return std::move(E);
218     RemappingBuffer = std::move(RemappingBufferOrError.get());
219   }
220 
221   return IndexedInstrProfReader::create(std::move(BufferOrError.get()),
222                                         std::move(RemappingBuffer));
223 }
224 
225 Expected<std::unique_ptr<IndexedInstrProfReader>>
226 IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
227                                std::unique_ptr<MemoryBuffer> RemappingBuffer) {
228   // Create the reader.
229   if (!IndexedInstrProfReader::hasFormat(*Buffer))
230     return make_error<InstrProfError>(instrprof_error::bad_magic);
231   auto Result = std::make_unique<IndexedInstrProfReader>(
232       std::move(Buffer), std::move(RemappingBuffer));
233 
234   // Initialize the reader and return the result.
235   if (Error E = initializeReader(*Result))
236     return std::move(E);
237 
238   return std::move(Result);
239 }
240 
241 bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) {
242   // Verify that this really looks like plain ASCII text by checking a
243   // 'reasonable' number of characters (up to profile magic size).
244   size_t count = std::min(Buffer.getBufferSize(), sizeof(uint64_t));
245   StringRef buffer = Buffer.getBufferStart();
246   return count == 0 ||
247          std::all_of(buffer.begin(), buffer.begin() + count,
248                      [](char c) { return isPrint(c) || isSpace(c); });
249 }
250 
251 // Read the profile variant flag from the header: ":FE" means this is a FE
252 // generated profile. ":IR" means this is an IR level profile. Other strings
253 // with a leading ':' will be reported an error format.
254 Error TextInstrProfReader::readHeader() {
255   Symtab.reset(new InstrProfSymtab());
256 
257   while (Line->startswith(":")) {
258     StringRef Str = Line->substr(1);
259     if (Str.equals_insensitive("ir"))
260       ProfileKind |= InstrProfKind::IRInstrumentation;
261     else if (Str.equals_insensitive("fe"))
262       ProfileKind |= InstrProfKind::FrontendInstrumentation;
263     else if (Str.equals_insensitive("csir")) {
264       ProfileKind |= InstrProfKind::IRInstrumentation;
265       ProfileKind |= InstrProfKind::ContextSensitive;
266     } else if (Str.equals_insensitive("entry_first"))
267       ProfileKind |= InstrProfKind::FunctionEntryInstrumentation;
268     else if (Str.equals_insensitive("not_entry_first"))
269       ProfileKind &= ~InstrProfKind::FunctionEntryInstrumentation;
270     else if (Str.equals_insensitive("temporal_prof_traces")) {
271       ProfileKind |= InstrProfKind::TemporalProfile;
272       if (auto Err = readTemporalProfTraceData())
273         return error(std::move(Err));
274     } else
275       return error(instrprof_error::bad_header);
276     ++Line;
277   }
278   return success();
279 }
280 
281 /// Temporal profile trace data is stored in the header immediately after
282 /// ":temporal_prof_traces". The first integer is the number of traces, the
283 /// second integer is the stream size, then the following lines are the actual
284 /// traces which consist of a weight and a comma separated list of function
285 /// names.
286 Error TextInstrProfReader::readTemporalProfTraceData() {
287   if ((++Line).is_at_end())
288     return error(instrprof_error::eof);
289 
290   uint32_t NumTraces;
291   if (Line->getAsInteger(0, NumTraces))
292     return error(instrprof_error::malformed);
293 
294   if ((++Line).is_at_end())
295     return error(instrprof_error::eof);
296 
297   if (Line->getAsInteger(0, TemporalProfTraceStreamSize))
298     return error(instrprof_error::malformed);
299 
300   for (uint32_t i = 0; i < NumTraces; i++) {
301     if ((++Line).is_at_end())
302       return error(instrprof_error::eof);
303 
304     TemporalProfTraceTy Trace;
305     if (Line->getAsInteger(0, Trace.Weight))
306       return error(instrprof_error::malformed);
307 
308     if ((++Line).is_at_end())
309       return error(instrprof_error::eof);
310 
311     SmallVector<StringRef> FuncNames;
312     Line->split(FuncNames, ",", /*MaxSplit=*/-1, /*KeepEmpty=*/false);
313     for (auto &FuncName : FuncNames)
314       Trace.FunctionNameRefs.push_back(
315           IndexedInstrProf::ComputeHash(FuncName.trim()));
316     TemporalProfTraces.push_back(std::move(Trace));
317   }
318   return success();
319 }
320 
321 Error
322 TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) {
323 
324 #define CHECK_LINE_END(Line)                                                   \
325   if (Line.is_at_end())                                                        \
326     return error(instrprof_error::truncated);
327 #define READ_NUM(Str, Dst)                                                     \
328   if ((Str).getAsInteger(10, (Dst)))                                           \
329     return error(instrprof_error::malformed);
330 #define VP_READ_ADVANCE(Val)                                                   \
331   CHECK_LINE_END(Line);                                                        \
332   uint32_t Val;                                                                \
333   READ_NUM((*Line), (Val));                                                    \
334   Line++;
335 
336   if (Line.is_at_end())
337     return success();
338 
339   uint32_t NumValueKinds;
340   if (Line->getAsInteger(10, NumValueKinds)) {
341     // No value profile data
342     return success();
343   }
344   if (NumValueKinds == 0 || NumValueKinds > IPVK_Last + 1)
345     return error(instrprof_error::malformed,
346                  "number of value kinds is invalid");
347   Line++;
348 
349   for (uint32_t VK = 0; VK < NumValueKinds; VK++) {
350     VP_READ_ADVANCE(ValueKind);
351     if (ValueKind > IPVK_Last)
352       return error(instrprof_error::malformed, "value kind is invalid");
353     ;
354     VP_READ_ADVANCE(NumValueSites);
355     if (!NumValueSites)
356       continue;
357 
358     Record.reserveSites(VK, NumValueSites);
359     for (uint32_t S = 0; S < NumValueSites; S++) {
360       VP_READ_ADVANCE(NumValueData);
361 
362       std::vector<InstrProfValueData> CurrentValues;
363       for (uint32_t V = 0; V < NumValueData; V++) {
364         CHECK_LINE_END(Line);
365         std::pair<StringRef, StringRef> VD = Line->rsplit(':');
366         uint64_t TakenCount, Value;
367         if (ValueKind == IPVK_IndirectCallTarget) {
368           if (InstrProfSymtab::isExternalSymbol(VD.first)) {
369             Value = 0;
370           } else {
371             if (Error E = Symtab->addFuncName(VD.first))
372               return E;
373             Value = IndexedInstrProf::ComputeHash(VD.first);
374           }
375         } else {
376           READ_NUM(VD.first, Value);
377         }
378         READ_NUM(VD.second, TakenCount);
379         CurrentValues.push_back({Value, TakenCount});
380         Line++;
381       }
382       Record.addValueData(ValueKind, S, CurrentValues.data(), NumValueData,
383                           nullptr);
384     }
385   }
386   return success();
387 
388 #undef CHECK_LINE_END
389 #undef READ_NUM
390 #undef VP_READ_ADVANCE
391 }
392 
393 Error TextInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) {
394   // Skip empty lines and comments.
395   while (!Line.is_at_end() && (Line->empty() || Line->startswith("#")))
396     ++Line;
397   // If we hit EOF while looking for a name, we're done.
398   if (Line.is_at_end()) {
399     return error(instrprof_error::eof);
400   }
401 
402   // Read the function name.
403   Record.Name = *Line++;
404   if (Error E = Symtab->addFuncName(Record.Name))
405     return error(std::move(E));
406 
407   // Read the function hash.
408   if (Line.is_at_end())
409     return error(instrprof_error::truncated);
410   if ((Line++)->getAsInteger(0, Record.Hash))
411     return error(instrprof_error::malformed,
412                  "function hash is not a valid integer");
413 
414   // Read the number of counters.
415   uint64_t NumCounters;
416   if (Line.is_at_end())
417     return error(instrprof_error::truncated);
418   if ((Line++)->getAsInteger(10, NumCounters))
419     return error(instrprof_error::malformed,
420                  "number of counters is not a valid integer");
421   if (NumCounters == 0)
422     return error(instrprof_error::malformed, "number of counters is zero");
423 
424   // Read each counter and fill our internal storage with the values.
425   Record.Clear();
426   Record.Counts.reserve(NumCounters);
427   for (uint64_t I = 0; I < NumCounters; ++I) {
428     if (Line.is_at_end())
429       return error(instrprof_error::truncated);
430     uint64_t Count;
431     if ((Line++)->getAsInteger(10, Count))
432       return error(instrprof_error::malformed, "count is invalid");
433     Record.Counts.push_back(Count);
434   }
435 
436   // Check if value profile data exists and read it if so.
437   if (Error E = readValueProfileData(Record))
438     return error(std::move(E));
439 
440   return success();
441 }
442 
443 template <class IntPtrT>
444 InstrProfKind RawInstrProfReader<IntPtrT>::getProfileKind() const {
445   return getProfileKindFromVersion(Version);
446 }
447 
448 template <class IntPtrT>
449 SmallVector<TemporalProfTraceTy> &
450 RawInstrProfReader<IntPtrT>::getTemporalProfTraces(
451     std::optional<uint64_t> Weight) {
452   if (TemporalProfTimestamps.empty()) {
453     assert(TemporalProfTraces.empty());
454     return TemporalProfTraces;
455   }
456   // Sort functions by their timestamps to build the trace.
457   std::sort(TemporalProfTimestamps.begin(), TemporalProfTimestamps.end());
458   TemporalProfTraceTy Trace;
459   if (Weight)
460     Trace.Weight = *Weight;
461   for (auto &[TimestampValue, NameRef] : TemporalProfTimestamps)
462     Trace.FunctionNameRefs.push_back(NameRef);
463   TemporalProfTraces = {std::move(Trace)};
464   return TemporalProfTraces;
465 }
466 
467 template <class IntPtrT>
468 bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) {
469   if (DataBuffer.getBufferSize() < sizeof(uint64_t))
470     return false;
471   uint64_t Magic =
472     *reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart());
473   return RawInstrProf::getMagic<IntPtrT>() == Magic ||
474          sys::getSwappedBytes(RawInstrProf::getMagic<IntPtrT>()) == Magic;
475 }
476 
477 template <class IntPtrT>
478 Error RawInstrProfReader<IntPtrT>::readHeader() {
479   if (!hasFormat(*DataBuffer))
480     return error(instrprof_error::bad_magic);
481   if (DataBuffer->getBufferSize() < sizeof(RawInstrProf::Header))
482     return error(instrprof_error::bad_header);
483   auto *Header = reinterpret_cast<const RawInstrProf::Header *>(
484       DataBuffer->getBufferStart());
485   ShouldSwapBytes = Header->Magic != RawInstrProf::getMagic<IntPtrT>();
486   return readHeader(*Header);
487 }
488 
489 template <class IntPtrT>
490 Error RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) {
491   const char *End = DataBuffer->getBufferEnd();
492   // Skip zero padding between profiles.
493   while (CurrentPos != End && *CurrentPos == 0)
494     ++CurrentPos;
495   // If there's nothing left, we're done.
496   if (CurrentPos == End)
497     return make_error<InstrProfError>(instrprof_error::eof);
498   // If there isn't enough space for another header, this is probably just
499   // garbage at the end of the file.
500   if (CurrentPos + sizeof(RawInstrProf::Header) > End)
501     return make_error<InstrProfError>(instrprof_error::malformed,
502                                       "not enough space for another header");
503   // The writer ensures each profile is padded to start at an aligned address.
504   if (reinterpret_cast<size_t>(CurrentPos) % alignof(uint64_t))
505     return make_error<InstrProfError>(instrprof_error::malformed,
506                                       "insufficient padding");
507   // The magic should have the same byte order as in the previous header.
508   uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos);
509   if (Magic != swap(RawInstrProf::getMagic<IntPtrT>()))
510     return make_error<InstrProfError>(instrprof_error::bad_magic);
511 
512   // There's another profile to read, so we need to process the header.
513   auto *Header = reinterpret_cast<const RawInstrProf::Header *>(CurrentPos);
514   return readHeader(*Header);
515 }
516 
517 template <class IntPtrT>
518 Error RawInstrProfReader<IntPtrT>::createSymtab(InstrProfSymtab &Symtab) {
519   if (Error E = Symtab.create(StringRef(NamesStart, NamesEnd - NamesStart)))
520     return error(std::move(E));
521   for (const RawInstrProf::ProfileData<IntPtrT> *I = Data; I != DataEnd; ++I) {
522     const IntPtrT FPtr = swap(I->FunctionPointer);
523     if (!FPtr)
524       continue;
525     Symtab.mapAddress(FPtr, I->NameRef);
526   }
527   return success();
528 }
529 
530 template <class IntPtrT>
531 Error RawInstrProfReader<IntPtrT>::readHeader(
532     const RawInstrProf::Header &Header) {
533   Version = swap(Header.Version);
534   if (GET_VERSION(Version) != RawInstrProf::Version)
535     return error(instrprof_error::raw_profile_version_mismatch,
536                  ("Profile uses raw profile format version = " +
537                   Twine(GET_VERSION(Version)) +
538                   "; expected version = " + Twine(RawInstrProf::Version) +
539                   "\nPLEASE update this tool to version in the raw profile, or "
540                   "regenerate raw profile with expected version.")
541                      .str());
542   if (useDebugInfoCorrelate() && !Correlator)
543     return error(instrprof_error::missing_debug_info_for_correlation);
544   if (!useDebugInfoCorrelate() && Correlator)
545     return error(instrprof_error::unexpected_debug_info_for_correlation);
546 
547   BinaryIdsSize = swap(Header.BinaryIdsSize);
548   if (BinaryIdsSize % sizeof(uint64_t))
549     return error(instrprof_error::bad_header);
550 
551   CountersDelta = swap(Header.CountersDelta);
552   NamesDelta = swap(Header.NamesDelta);
553   auto NumData = swap(Header.DataSize);
554   auto PaddingBytesBeforeCounters = swap(Header.PaddingBytesBeforeCounters);
555   auto CountersSize = swap(Header.CountersSize) * getCounterTypeSize();
556   auto PaddingBytesAfterCounters = swap(Header.PaddingBytesAfterCounters);
557   auto NamesSize = swap(Header.NamesSize);
558   ValueKindLast = swap(Header.ValueKindLast);
559 
560   auto DataSize = NumData * sizeof(RawInstrProf::ProfileData<IntPtrT>);
561   auto PaddingSize = getNumPaddingBytes(NamesSize);
562 
563   // Profile data starts after profile header and binary ids if exist.
564   ptrdiff_t DataOffset = sizeof(RawInstrProf::Header) + BinaryIdsSize;
565   ptrdiff_t CountersOffset = DataOffset + DataSize + PaddingBytesBeforeCounters;
566   ptrdiff_t NamesOffset =
567       CountersOffset + CountersSize + PaddingBytesAfterCounters;
568   ptrdiff_t ValueDataOffset = NamesOffset + NamesSize + PaddingSize;
569 
570   auto *Start = reinterpret_cast<const char *>(&Header);
571   if (Start + ValueDataOffset > DataBuffer->getBufferEnd())
572     return error(instrprof_error::bad_header);
573 
574   if (Correlator) {
575     // These sizes in the raw file are zero because we constructed them in the
576     // Correlator.
577     assert(DataSize == 0 && NamesSize == 0);
578     assert(CountersDelta == 0 && NamesDelta == 0);
579     Data = Correlator->getDataPointer();
580     DataEnd = Data + Correlator->getDataSize();
581     NamesStart = Correlator->getNamesPointer();
582     NamesEnd = NamesStart + Correlator->getNamesSize();
583   } else {
584     Data = reinterpret_cast<const RawInstrProf::ProfileData<IntPtrT> *>(
585         Start + DataOffset);
586     DataEnd = Data + NumData;
587     NamesStart = Start + NamesOffset;
588     NamesEnd = NamesStart + NamesSize;
589   }
590 
591   // Binary ids start just after the header.
592   BinaryIdsStart =
593       reinterpret_cast<const uint8_t *>(&Header) + sizeof(RawInstrProf::Header);
594   CountersStart = Start + CountersOffset;
595   CountersEnd = CountersStart + CountersSize;
596   ValueDataStart = reinterpret_cast<const uint8_t *>(Start + ValueDataOffset);
597 
598   const uint8_t *BufferEnd = (const uint8_t *)DataBuffer->getBufferEnd();
599   if (BinaryIdsStart + BinaryIdsSize > BufferEnd)
600     return error(instrprof_error::bad_header);
601 
602   std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>();
603   if (Error E = createSymtab(*NewSymtab))
604     return E;
605 
606   Symtab = std::move(NewSymtab);
607   return success();
608 }
609 
610 template <class IntPtrT>
611 Error RawInstrProfReader<IntPtrT>::readName(NamedInstrProfRecord &Record) {
612   Record.Name = getName(Data->NameRef);
613   return success();
614 }
615 
616 template <class IntPtrT>
617 Error RawInstrProfReader<IntPtrT>::readFuncHash(NamedInstrProfRecord &Record) {
618   Record.Hash = swap(Data->FuncHash);
619   return success();
620 }
621 
622 template <class IntPtrT>
623 Error RawInstrProfReader<IntPtrT>::readRawCounts(
624     InstrProfRecord &Record) {
625   uint32_t NumCounters = swap(Data->NumCounters);
626   if (NumCounters == 0)
627     return error(instrprof_error::malformed, "number of counters is zero");
628 
629   ptrdiff_t CounterBaseOffset = swap(Data->CounterPtr) - CountersDelta;
630   if (CounterBaseOffset < 0)
631     return error(
632         instrprof_error::malformed,
633         ("counter offset " + Twine(CounterBaseOffset) + " is negative").str());
634 
635   if (CounterBaseOffset >= CountersEnd - CountersStart)
636     return error(instrprof_error::malformed,
637                  ("counter offset " + Twine(CounterBaseOffset) +
638                   " is greater than the maximum counter offset " +
639                   Twine(CountersEnd - CountersStart - 1))
640                      .str());
641 
642   uint64_t MaxNumCounters =
643       (CountersEnd - (CountersStart + CounterBaseOffset)) /
644       getCounterTypeSize();
645   if (NumCounters > MaxNumCounters)
646     return error(instrprof_error::malformed,
647                  ("number of counters " + Twine(NumCounters) +
648                   " is greater than the maximum number of counters " +
649                   Twine(MaxNumCounters))
650                      .str());
651 
652   Record.Counts.clear();
653   Record.Counts.reserve(NumCounters);
654   for (uint32_t I = 0; I < NumCounters; I++) {
655     const char *Ptr =
656         CountersStart + CounterBaseOffset + I * getCounterTypeSize();
657     if (I == 0 && hasTemporalProfile()) {
658       uint64_t TimestampValue = swap(*reinterpret_cast<const uint64_t *>(Ptr));
659       if (TimestampValue != 0 &&
660           TimestampValue != std::numeric_limits<uint64_t>::max()) {
661         TemporalProfTimestamps.emplace_back(TimestampValue,
662                                             swap(Data->NameRef));
663         TemporalProfTraceStreamSize = 1;
664       }
665       if (hasSingleByteCoverage()) {
666         // In coverage mode, getCounterTypeSize() returns 1 byte but our
667         // timestamp field has size uint64_t. Increment I so that the next
668         // iteration of this for loop points to the byte after the timestamp
669         // field, i.e., I += 8.
670         I += 7;
671       }
672       continue;
673     }
674     if (hasSingleByteCoverage()) {
675       // A value of zero signifies the block is covered.
676       Record.Counts.push_back(*Ptr == 0 ? 1 : 0);
677     } else {
678       const auto *CounterValue = reinterpret_cast<const uint64_t *>(Ptr);
679       Record.Counts.push_back(swap(*CounterValue));
680     }
681   }
682 
683   return success();
684 }
685 
686 template <class IntPtrT>
687 Error RawInstrProfReader<IntPtrT>::readValueProfilingData(
688     InstrProfRecord &Record) {
689   Record.clearValueData();
690   CurValueDataSize = 0;
691   // Need to match the logic in value profile dumper code in compiler-rt:
692   uint32_t NumValueKinds = 0;
693   for (uint32_t I = 0; I < IPVK_Last + 1; I++)
694     NumValueKinds += (Data->NumValueSites[I] != 0);
695 
696   if (!NumValueKinds)
697     return success();
698 
699   Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr =
700       ValueProfData::getValueProfData(
701           ValueDataStart, (const unsigned char *)DataBuffer->getBufferEnd(),
702           getDataEndianness());
703 
704   if (Error E = VDataPtrOrErr.takeError())
705     return E;
706 
707   // Note that besides deserialization, this also performs the conversion for
708   // indirect call targets.  The function pointers from the raw profile are
709   // remapped into function name hashes.
710   VDataPtrOrErr.get()->deserializeTo(Record, Symtab.get());
711   CurValueDataSize = VDataPtrOrErr.get()->getSize();
712   return success();
713 }
714 
715 template <class IntPtrT>
716 Error RawInstrProfReader<IntPtrT>::readNextRecord(NamedInstrProfRecord &Record) {
717   // Keep reading profiles that consist of only headers and no profile data and
718   // counters.
719   while (atEnd())
720     // At this point, ValueDataStart field points to the next header.
721     if (Error E = readNextHeader(getNextHeaderPos()))
722       return error(std::move(E));
723 
724   // Read name and set it in Record.
725   if (Error E = readName(Record))
726     return error(std::move(E));
727 
728   // Read FuncHash and set it in Record.
729   if (Error E = readFuncHash(Record))
730     return error(std::move(E));
731 
732   // Read raw counts and set Record.
733   if (Error E = readRawCounts(Record))
734     return error(std::move(E));
735 
736   // Read value data and set Record.
737   if (Error E = readValueProfilingData(Record))
738     return error(std::move(E));
739 
740   // Iterate.
741   advanceData();
742   return success();
743 }
744 
745 template <class IntPtrT>
746 Error RawInstrProfReader<IntPtrT>::readBinaryIds(
747     std::vector<llvm::object::BuildID> &BinaryIds) {
748   return readBinaryIdsInternal(*DataBuffer, BinaryIdsSize, BinaryIdsStart,
749                                BinaryIds, getDataEndianness());
750 }
751 
752 template <class IntPtrT>
753 Error RawInstrProfReader<IntPtrT>::printBinaryIds(raw_ostream &OS) {
754   return printBinaryIdsInternal(OS, *DataBuffer, BinaryIdsSize, BinaryIdsStart,
755                                 getDataEndianness());
756 }
757 
758 namespace llvm {
759 
760 template class RawInstrProfReader<uint32_t>;
761 template class RawInstrProfReader<uint64_t>;
762 
763 } // end namespace llvm
764 
765 InstrProfLookupTrait::hash_value_type
766 InstrProfLookupTrait::ComputeHash(StringRef K) {
767   return IndexedInstrProf::ComputeHash(HashType, K);
768 }
769 
770 using data_type = InstrProfLookupTrait::data_type;
771 using offset_type = InstrProfLookupTrait::offset_type;
772 
773 bool InstrProfLookupTrait::readValueProfilingData(
774     const unsigned char *&D, const unsigned char *const End) {
775   Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr =
776       ValueProfData::getValueProfData(D, End, ValueProfDataEndianness);
777 
778   if (VDataPtrOrErr.takeError())
779     return false;
780 
781   VDataPtrOrErr.get()->deserializeTo(DataBuffer.back(), nullptr);
782   D += VDataPtrOrErr.get()->TotalSize;
783 
784   return true;
785 }
786 
787 data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D,
788                                          offset_type N) {
789   using namespace support;
790 
791   // Check if the data is corrupt. If so, don't try to read it.
792   if (N % sizeof(uint64_t))
793     return data_type();
794 
795   DataBuffer.clear();
796   std::vector<uint64_t> CounterBuffer;
797 
798   const unsigned char *End = D + N;
799   while (D < End) {
800     // Read hash.
801     if (D + sizeof(uint64_t) >= End)
802       return data_type();
803     uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D);
804 
805     // Initialize number of counters for GET_VERSION(FormatVersion) == 1.
806     uint64_t CountsSize = N / sizeof(uint64_t) - 1;
807     // If format version is different then read the number of counters.
808     if (GET_VERSION(FormatVersion) != IndexedInstrProf::ProfVersion::Version1) {
809       if (D + sizeof(uint64_t) > End)
810         return data_type();
811       CountsSize = endian::readNext<uint64_t, little, unaligned>(D);
812     }
813     // Read counter values.
814     if (D + CountsSize * sizeof(uint64_t) > End)
815       return data_type();
816 
817     CounterBuffer.clear();
818     CounterBuffer.reserve(CountsSize);
819     for (uint64_t J = 0; J < CountsSize; ++J)
820       CounterBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D));
821 
822     DataBuffer.emplace_back(K, Hash, std::move(CounterBuffer));
823 
824     // Read value profiling data.
825     if (GET_VERSION(FormatVersion) > IndexedInstrProf::ProfVersion::Version2 &&
826         !readValueProfilingData(D, End)) {
827       DataBuffer.clear();
828       return data_type();
829     }
830   }
831   return DataBuffer;
832 }
833 
834 template <typename HashTableImpl>
835 Error InstrProfReaderIndex<HashTableImpl>::getRecords(
836     StringRef FuncName, ArrayRef<NamedInstrProfRecord> &Data) {
837   auto Iter = HashTable->find(FuncName);
838   if (Iter == HashTable->end())
839     return make_error<InstrProfError>(instrprof_error::unknown_function);
840 
841   Data = (*Iter);
842   if (Data.empty())
843     return make_error<InstrProfError>(instrprof_error::malformed,
844                                       "profile data is empty");
845 
846   return Error::success();
847 }
848 
849 template <typename HashTableImpl>
850 Error InstrProfReaderIndex<HashTableImpl>::getRecords(
851     ArrayRef<NamedInstrProfRecord> &Data) {
852   if (atEnd())
853     return make_error<InstrProfError>(instrprof_error::eof);
854 
855   Data = *RecordIterator;
856 
857   if (Data.empty())
858     return make_error<InstrProfError>(instrprof_error::malformed,
859                                       "profile data is empty");
860 
861   return Error::success();
862 }
863 
864 template <typename HashTableImpl>
865 InstrProfReaderIndex<HashTableImpl>::InstrProfReaderIndex(
866     const unsigned char *Buckets, const unsigned char *const Payload,
867     const unsigned char *const Base, IndexedInstrProf::HashT HashType,
868     uint64_t Version) {
869   FormatVersion = Version;
870   HashTable.reset(HashTableImpl::Create(
871       Buckets, Payload, Base,
872       typename HashTableImpl::InfoType(HashType, Version)));
873   RecordIterator = HashTable->data_begin();
874 }
875 
876 template <typename HashTableImpl>
877 InstrProfKind InstrProfReaderIndex<HashTableImpl>::getProfileKind() const {
878   return getProfileKindFromVersion(FormatVersion);
879 }
880 
881 namespace {
882 /// A remapper that does not apply any remappings.
883 class InstrProfReaderNullRemapper : public InstrProfReaderRemapper {
884   InstrProfReaderIndexBase &Underlying;
885 
886 public:
887   InstrProfReaderNullRemapper(InstrProfReaderIndexBase &Underlying)
888       : Underlying(Underlying) {}
889 
890   Error getRecords(StringRef FuncName,
891                    ArrayRef<NamedInstrProfRecord> &Data) override {
892     return Underlying.getRecords(FuncName, Data);
893   }
894 };
895 } // namespace
896 
897 /// A remapper that applies remappings based on a symbol remapping file.
898 template <typename HashTableImpl>
899 class llvm::InstrProfReaderItaniumRemapper
900     : public InstrProfReaderRemapper {
901 public:
902   InstrProfReaderItaniumRemapper(
903       std::unique_ptr<MemoryBuffer> RemapBuffer,
904       InstrProfReaderIndex<HashTableImpl> &Underlying)
905       : RemapBuffer(std::move(RemapBuffer)), Underlying(Underlying) {
906   }
907 
908   /// Extract the original function name from a PGO function name.
909   static StringRef extractName(StringRef Name) {
910     // We can have multiple :-separated pieces; there can be pieces both
911     // before and after the mangled name. Find the first part that starts
912     // with '_Z'; we'll assume that's the mangled name we want.
913     std::pair<StringRef, StringRef> Parts = {StringRef(), Name};
914     while (true) {
915       Parts = Parts.second.split(':');
916       if (Parts.first.startswith("_Z"))
917         return Parts.first;
918       if (Parts.second.empty())
919         return Name;
920     }
921   }
922 
923   /// Given a mangled name extracted from a PGO function name, and a new
924   /// form for that mangled name, reconstitute the name.
925   static void reconstituteName(StringRef OrigName, StringRef ExtractedName,
926                                StringRef Replacement,
927                                SmallVectorImpl<char> &Out) {
928     Out.reserve(OrigName.size() + Replacement.size() - ExtractedName.size());
929     Out.insert(Out.end(), OrigName.begin(), ExtractedName.begin());
930     Out.insert(Out.end(), Replacement.begin(), Replacement.end());
931     Out.insert(Out.end(), ExtractedName.end(), OrigName.end());
932   }
933 
934   Error populateRemappings() override {
935     if (Error E = Remappings.read(*RemapBuffer))
936       return E;
937     for (StringRef Name : Underlying.HashTable->keys()) {
938       StringRef RealName = extractName(Name);
939       if (auto Key = Remappings.insert(RealName)) {
940         // FIXME: We could theoretically map the same equivalence class to
941         // multiple names in the profile data. If that happens, we should
942         // return NamedInstrProfRecords from all of them.
943         MappedNames.insert({Key, RealName});
944       }
945     }
946     return Error::success();
947   }
948 
949   Error getRecords(StringRef FuncName,
950                    ArrayRef<NamedInstrProfRecord> &Data) override {
951     StringRef RealName = extractName(FuncName);
952     if (auto Key = Remappings.lookup(RealName)) {
953       StringRef Remapped = MappedNames.lookup(Key);
954       if (!Remapped.empty()) {
955         if (RealName.begin() == FuncName.begin() &&
956             RealName.end() == FuncName.end())
957           FuncName = Remapped;
958         else {
959           // Try rebuilding the name from the given remapping.
960           SmallString<256> Reconstituted;
961           reconstituteName(FuncName, RealName, Remapped, Reconstituted);
962           Error E = Underlying.getRecords(Reconstituted, Data);
963           if (!E)
964             return E;
965 
966           // If we failed because the name doesn't exist, fall back to asking
967           // about the original name.
968           if (Error Unhandled = handleErrors(
969                   std::move(E), [](std::unique_ptr<InstrProfError> Err) {
970                     return Err->get() == instrprof_error::unknown_function
971                                ? Error::success()
972                                : Error(std::move(Err));
973                   }))
974             return Unhandled;
975         }
976       }
977     }
978     return Underlying.getRecords(FuncName, Data);
979   }
980 
981 private:
982   /// The memory buffer containing the remapping configuration. Remappings
983   /// holds pointers into this buffer.
984   std::unique_ptr<MemoryBuffer> RemapBuffer;
985 
986   /// The mangling remapper.
987   SymbolRemappingReader Remappings;
988 
989   /// Mapping from mangled name keys to the name used for the key in the
990   /// profile data.
991   /// FIXME: Can we store a location within the on-disk hash table instead of
992   /// redoing lookup?
993   DenseMap<SymbolRemappingReader::Key, StringRef> MappedNames;
994 
995   /// The real profile data reader.
996   InstrProfReaderIndex<HashTableImpl> &Underlying;
997 };
998 
999 bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) {
1000   using namespace support;
1001 
1002   if (DataBuffer.getBufferSize() < 8)
1003     return false;
1004   uint64_t Magic =
1005       endian::read<uint64_t, little, aligned>(DataBuffer.getBufferStart());
1006   // Verify that it's magical.
1007   return Magic == IndexedInstrProf::Magic;
1008 }
1009 
1010 const unsigned char *
1011 IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version,
1012                                     const unsigned char *Cur, bool UseCS) {
1013   using namespace IndexedInstrProf;
1014   using namespace support;
1015 
1016   if (Version >= IndexedInstrProf::Version4) {
1017     const IndexedInstrProf::Summary *SummaryInLE =
1018         reinterpret_cast<const IndexedInstrProf::Summary *>(Cur);
1019     uint64_t NFields =
1020         endian::byte_swap<uint64_t, little>(SummaryInLE->NumSummaryFields);
1021     uint64_t NEntries =
1022         endian::byte_swap<uint64_t, little>(SummaryInLE->NumCutoffEntries);
1023     uint32_t SummarySize =
1024         IndexedInstrProf::Summary::getSize(NFields, NEntries);
1025     std::unique_ptr<IndexedInstrProf::Summary> SummaryData =
1026         IndexedInstrProf::allocSummary(SummarySize);
1027 
1028     const uint64_t *Src = reinterpret_cast<const uint64_t *>(SummaryInLE);
1029     uint64_t *Dst = reinterpret_cast<uint64_t *>(SummaryData.get());
1030     for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++)
1031       Dst[I] = endian::byte_swap<uint64_t, little>(Src[I]);
1032 
1033     SummaryEntryVector DetailedSummary;
1034     for (unsigned I = 0; I < SummaryData->NumCutoffEntries; I++) {
1035       const IndexedInstrProf::Summary::Entry &Ent = SummaryData->getEntry(I);
1036       DetailedSummary.emplace_back((uint32_t)Ent.Cutoff, Ent.MinBlockCount,
1037                                    Ent.NumBlocks);
1038     }
1039     std::unique_ptr<llvm::ProfileSummary> &Summary =
1040         UseCS ? this->CS_Summary : this->Summary;
1041 
1042     // initialize InstrProfSummary using the SummaryData from disk.
1043     Summary = std::make_unique<ProfileSummary>(
1044         UseCS ? ProfileSummary::PSK_CSInstr : ProfileSummary::PSK_Instr,
1045         DetailedSummary, SummaryData->get(Summary::TotalBlockCount),
1046         SummaryData->get(Summary::MaxBlockCount),
1047         SummaryData->get(Summary::MaxInternalBlockCount),
1048         SummaryData->get(Summary::MaxFunctionCount),
1049         SummaryData->get(Summary::TotalNumBlocks),
1050         SummaryData->get(Summary::TotalNumFunctions));
1051     return Cur + SummarySize;
1052   } else {
1053     // The older versions do not support a profile summary. This just computes
1054     // an empty summary, which will not result in accurate hot/cold detection.
1055     // We would need to call addRecord for all NamedInstrProfRecords to get the
1056     // correct summary. However, this version is old (prior to early 2016) and
1057     // has not been supporting an accurate summary for several years.
1058     InstrProfSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1059     Summary = Builder.getSummary();
1060     return Cur;
1061   }
1062 }
1063 
1064 Error IndexedInstrProfReader::readHeader() {
1065   using namespace support;
1066 
1067   const unsigned char *Start =
1068       (const unsigned char *)DataBuffer->getBufferStart();
1069   const unsigned char *Cur = Start;
1070   if ((const unsigned char *)DataBuffer->getBufferEnd() - Cur < 24)
1071     return error(instrprof_error::truncated);
1072 
1073   auto HeaderOr = IndexedInstrProf::Header::readFromBuffer(Start);
1074   if (!HeaderOr)
1075     return HeaderOr.takeError();
1076 
1077   const IndexedInstrProf::Header *Header = &HeaderOr.get();
1078   Cur += Header->size();
1079 
1080   Cur = readSummary((IndexedInstrProf::ProfVersion)Header->formatVersion(), Cur,
1081                     /* UseCS */ false);
1082   if (Header->formatVersion() & VARIANT_MASK_CSIR_PROF)
1083     Cur =
1084         readSummary((IndexedInstrProf::ProfVersion)Header->formatVersion(), Cur,
1085                     /* UseCS */ true);
1086   // Read the hash type and start offset.
1087   IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>(
1088       endian::byte_swap<uint64_t, little>(Header->HashType));
1089   if (HashType > IndexedInstrProf::HashT::Last)
1090     return error(instrprof_error::unsupported_hash_type);
1091 
1092   uint64_t HashOffset = endian::byte_swap<uint64_t, little>(Header->HashOffset);
1093 
1094   // The hash table with profile counts comes next.
1095   auto IndexPtr = std::make_unique<InstrProfReaderIndex<OnDiskHashTableImplV3>>(
1096       Start + HashOffset, Cur, Start, HashType, Header->formatVersion());
1097 
1098   // The MemProfOffset field in the header is only valid when the format
1099   // version is higher than 8 (when it was introduced).
1100   if (GET_VERSION(Header->formatVersion()) >= 8 &&
1101       Header->formatVersion() & VARIANT_MASK_MEMPROF) {
1102     uint64_t MemProfOffset =
1103         endian::byte_swap<uint64_t, little>(Header->MemProfOffset);
1104 
1105     const unsigned char *Ptr = Start + MemProfOffset;
1106     // The value returned from RecordTableGenerator.Emit.
1107     const uint64_t RecordTableOffset =
1108         support::endian::readNext<uint64_t, little, unaligned>(Ptr);
1109     // The offset in the stream right before invoking
1110     // FrameTableGenerator.Emit.
1111     const uint64_t FramePayloadOffset =
1112         support::endian::readNext<uint64_t, little, unaligned>(Ptr);
1113     // The value returned from FrameTableGenerator.Emit.
1114     const uint64_t FrameTableOffset =
1115         support::endian::readNext<uint64_t, little, unaligned>(Ptr);
1116 
1117     // Read the schema.
1118     auto SchemaOr = memprof::readMemProfSchema(Ptr);
1119     if (!SchemaOr)
1120       return SchemaOr.takeError();
1121     Schema = SchemaOr.get();
1122 
1123     // Now initialize the table reader with a pointer into data buffer.
1124     MemProfRecordTable.reset(MemProfRecordHashTable::Create(
1125         /*Buckets=*/Start + RecordTableOffset,
1126         /*Payload=*/Ptr,
1127         /*Base=*/Start, memprof::RecordLookupTrait(Schema)));
1128 
1129     // Initialize the frame table reader with the payload and bucket offsets.
1130     MemProfFrameTable.reset(MemProfFrameHashTable::Create(
1131         /*Buckets=*/Start + FrameTableOffset,
1132         /*Payload=*/Start + FramePayloadOffset,
1133         /*Base=*/Start, memprof::FrameLookupTrait()));
1134   }
1135 
1136   // BinaryIdOffset field in the header is only valid when the format version
1137   // is higher than 9 (when it was introduced).
1138   if (GET_VERSION(Header->formatVersion()) >= 9) {
1139     uint64_t BinaryIdOffset =
1140         endian::byte_swap<uint64_t, little>(Header->BinaryIdOffset);
1141     const unsigned char *Ptr = Start + BinaryIdOffset;
1142     // Read binary ids size.
1143     BinaryIdsSize = support::endian::readNext<uint64_t, little, unaligned>(Ptr);
1144     if (BinaryIdsSize % sizeof(uint64_t))
1145       return error(instrprof_error::bad_header);
1146     // Set the binary ids start.
1147     BinaryIdsStart = Ptr;
1148     if (BinaryIdsStart > (const unsigned char *)DataBuffer->getBufferEnd())
1149       return make_error<InstrProfError>(instrprof_error::malformed,
1150                                         "corrupted binary ids");
1151   }
1152 
1153   if (GET_VERSION(Header->formatVersion()) >= 10 &&
1154       Header->formatVersion() & VARIANT_MASK_TEMPORAL_PROF) {
1155     uint64_t TemporalProfTracesOffset =
1156         endian::byte_swap<uint64_t, little>(Header->TemporalProfTracesOffset);
1157     const unsigned char *Ptr = Start + TemporalProfTracesOffset;
1158     const auto *PtrEnd = (const unsigned char *)DataBuffer->getBufferEnd();
1159     // Expect at least two 64 bit fields: NumTraces, and TraceStreamSize
1160     if (Ptr + 2 * sizeof(uint64_t) > PtrEnd)
1161       return error(instrprof_error::truncated);
1162     const uint64_t NumTraces =
1163         support::endian::readNext<uint64_t, little, unaligned>(Ptr);
1164     TemporalProfTraceStreamSize =
1165         support::endian::readNext<uint64_t, little, unaligned>(Ptr);
1166     for (unsigned i = 0; i < NumTraces; i++) {
1167       // Expect at least two 64 bit fields: Weight and NumFunctions
1168       if (Ptr + 2 * sizeof(uint64_t) > PtrEnd)
1169         return error(instrprof_error::truncated);
1170       TemporalProfTraceTy Trace;
1171       Trace.Weight =
1172           support::endian::readNext<uint64_t, little, unaligned>(Ptr);
1173       const uint64_t NumFunctions =
1174           support::endian::readNext<uint64_t, little, unaligned>(Ptr);
1175       // Expect at least NumFunctions 64 bit fields
1176       if (Ptr + NumFunctions * sizeof(uint64_t) > PtrEnd)
1177         return error(instrprof_error::truncated);
1178       for (unsigned j = 0; j < NumFunctions; j++) {
1179         const uint64_t NameRef =
1180             support::endian::readNext<uint64_t, little, unaligned>(Ptr);
1181         Trace.FunctionNameRefs.push_back(NameRef);
1182       }
1183       TemporalProfTraces.push_back(std::move(Trace));
1184     }
1185   }
1186 
1187   // Load the remapping table now if requested.
1188   if (RemappingBuffer) {
1189     Remapper =
1190         std::make_unique<InstrProfReaderItaniumRemapper<OnDiskHashTableImplV3>>(
1191             std::move(RemappingBuffer), *IndexPtr);
1192     if (Error E = Remapper->populateRemappings())
1193       return E;
1194   } else {
1195     Remapper = std::make_unique<InstrProfReaderNullRemapper>(*IndexPtr);
1196   }
1197   Index = std::move(IndexPtr);
1198 
1199   return success();
1200 }
1201 
1202 InstrProfSymtab &IndexedInstrProfReader::getSymtab() {
1203   if (Symtab)
1204     return *Symtab;
1205 
1206   std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>();
1207   if (Error E = Index->populateSymtab(*NewSymtab)) {
1208     auto [ErrCode, Msg] = InstrProfError::take(std::move(E));
1209     consumeError(error(ErrCode, Msg));
1210   }
1211 
1212   Symtab = std::move(NewSymtab);
1213   return *Symtab;
1214 }
1215 
1216 Expected<InstrProfRecord> IndexedInstrProfReader::getInstrProfRecord(
1217     StringRef FuncName, uint64_t FuncHash, uint64_t *MismatchedFuncSum) {
1218   ArrayRef<NamedInstrProfRecord> Data;
1219   uint64_t FuncSum = 0;
1220   Error Err = Remapper->getRecords(FuncName, Data);
1221   if (Err)
1222     return std::move(Err);
1223   // Found it. Look for counters with the right hash.
1224 
1225   // A flag to indicate if the records are from the same type
1226   // of profile (i.e cs vs nocs).
1227   bool CSBitMatch = false;
1228   auto getFuncSum = [](const std::vector<uint64_t> &Counts) {
1229     uint64_t ValueSum = 0;
1230     for (uint64_t CountValue : Counts) {
1231       if (CountValue == (uint64_t)-1)
1232         continue;
1233       // Handle overflow -- if that happens, return max.
1234       if (std::numeric_limits<uint64_t>::max() - CountValue <= ValueSum)
1235         return std::numeric_limits<uint64_t>::max();
1236       ValueSum += CountValue;
1237     }
1238     return ValueSum;
1239   };
1240 
1241   for (const NamedInstrProfRecord &I : Data) {
1242     // Check for a match and fill the vector if there is one.
1243     if (I.Hash == FuncHash)
1244       return std::move(I);
1245     if (NamedInstrProfRecord::hasCSFlagInHash(I.Hash) ==
1246         NamedInstrProfRecord::hasCSFlagInHash(FuncHash)) {
1247       CSBitMatch = true;
1248       if (MismatchedFuncSum == nullptr)
1249         continue;
1250       FuncSum = std::max(FuncSum, getFuncSum(I.Counts));
1251     }
1252   }
1253   if (CSBitMatch) {
1254     if (MismatchedFuncSum != nullptr)
1255       *MismatchedFuncSum = FuncSum;
1256     return error(instrprof_error::hash_mismatch);
1257   }
1258   return error(instrprof_error::unknown_function);
1259 }
1260 
1261 Expected<memprof::MemProfRecord>
1262 IndexedInstrProfReader::getMemProfRecord(const uint64_t FuncNameHash) {
1263   // TODO: Add memprof specific errors.
1264   if (MemProfRecordTable == nullptr)
1265     return make_error<InstrProfError>(instrprof_error::invalid_prof,
1266                                       "no memprof data available in profile");
1267   auto Iter = MemProfRecordTable->find(FuncNameHash);
1268   if (Iter == MemProfRecordTable->end())
1269     return make_error<InstrProfError>(
1270         instrprof_error::unknown_function,
1271         "memprof record not found for function hash " + Twine(FuncNameHash));
1272 
1273   // Setup a callback to convert from frame ids to frame using the on-disk
1274   // FrameData hash table.
1275   memprof::FrameId LastUnmappedFrameId = 0;
1276   bool HasFrameMappingError = false;
1277   auto IdToFrameCallback = [&](const memprof::FrameId Id) {
1278     auto FrIter = MemProfFrameTable->find(Id);
1279     if (FrIter == MemProfFrameTable->end()) {
1280       LastUnmappedFrameId = Id;
1281       HasFrameMappingError = true;
1282       return memprof::Frame(0, 0, 0, false);
1283     }
1284     return *FrIter;
1285   };
1286 
1287   memprof::MemProfRecord Record(*Iter, IdToFrameCallback);
1288 
1289   // Check that all frame ids were successfully converted to frames.
1290   if (HasFrameMappingError) {
1291     return make_error<InstrProfError>(instrprof_error::hash_mismatch,
1292                                       "memprof frame not found for frame id " +
1293                                           Twine(LastUnmappedFrameId));
1294   }
1295   return Record;
1296 }
1297 
1298 Error IndexedInstrProfReader::getFunctionCounts(StringRef FuncName,
1299                                                 uint64_t FuncHash,
1300                                                 std::vector<uint64_t> &Counts) {
1301   Expected<InstrProfRecord> Record = getInstrProfRecord(FuncName, FuncHash);
1302   if (Error E = Record.takeError())
1303     return error(std::move(E));
1304 
1305   Counts = Record.get().Counts;
1306   return success();
1307 }
1308 
1309 Error IndexedInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) {
1310   ArrayRef<NamedInstrProfRecord> Data;
1311 
1312   Error E = Index->getRecords(Data);
1313   if (E)
1314     return error(std::move(E));
1315 
1316   Record = Data[RecordIndex++];
1317   if (RecordIndex >= Data.size()) {
1318     Index->advanceToNextKey();
1319     RecordIndex = 0;
1320   }
1321   return success();
1322 }
1323 
1324 Error IndexedInstrProfReader::readBinaryIds(
1325     std::vector<llvm::object::BuildID> &BinaryIds) {
1326   return readBinaryIdsInternal(*DataBuffer, BinaryIdsSize, BinaryIdsStart,
1327                                BinaryIds, llvm::support::little);
1328 }
1329 
1330 Error IndexedInstrProfReader::printBinaryIds(raw_ostream &OS) {
1331   return printBinaryIdsInternal(OS, *DataBuffer, BinaryIdsSize, BinaryIdsStart,
1332                                 llvm::support::little);
1333 }
1334 
1335 void InstrProfReader::accumulateCounts(CountSumOrPercent &Sum, bool IsCS) {
1336   uint64_t NumFuncs = 0;
1337   for (const auto &Func : *this) {
1338     if (isIRLevelProfile()) {
1339       bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(Func.Hash);
1340       if (FuncIsCS != IsCS)
1341         continue;
1342     }
1343     Func.accumulateCounts(Sum);
1344     ++NumFuncs;
1345   }
1346   Sum.NumEntries = NumFuncs;
1347 }
1348