xref: /freebsd/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp (revision a90b9d0159070121c221b966469c3e36d912bf82)
1 //===- InstrProfReader.cpp - Instrumented profiling reader ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for reading profiling data for clang's
10 // instrumentation based PGO and coverage.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ProfileData/InstrProfReader.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/StringExtras.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/IR/ProfileSummary.h"
20 #include "llvm/ProfileData/InstrProf.h"
21 #include "llvm/ProfileData/MemProf.h"
22 #include "llvm/ProfileData/ProfileCommon.h"
23 #include "llvm/ProfileData/SymbolRemappingReader.h"
24 #include "llvm/Support/Endian.h"
25 #include "llvm/Support/Error.h"
26 #include "llvm/Support/ErrorOr.h"
27 #include "llvm/Support/MemoryBuffer.h"
28 #include "llvm/Support/SwapByteOrder.h"
29 #include "llvm/Support/VirtualFileSystem.h"
30 #include <algorithm>
31 #include <cstddef>
32 #include <cstdint>
33 #include <limits>
34 #include <memory>
35 #include <system_error>
36 #include <utility>
37 #include <vector>
38 
39 using namespace llvm;
40 
41 // Extracts the variant information from the top 32 bits in the version and
42 // returns an enum specifying the variants present.
43 static InstrProfKind getProfileKindFromVersion(uint64_t Version) {
44   InstrProfKind ProfileKind = InstrProfKind::Unknown;
45   if (Version & VARIANT_MASK_IR_PROF) {
46     ProfileKind |= InstrProfKind::IRInstrumentation;
47   }
48   if (Version & VARIANT_MASK_CSIR_PROF) {
49     ProfileKind |= InstrProfKind::ContextSensitive;
50   }
51   if (Version & VARIANT_MASK_INSTR_ENTRY) {
52     ProfileKind |= InstrProfKind::FunctionEntryInstrumentation;
53   }
54   if (Version & VARIANT_MASK_BYTE_COVERAGE) {
55     ProfileKind |= InstrProfKind::SingleByteCoverage;
56   }
57   if (Version & VARIANT_MASK_FUNCTION_ENTRY_ONLY) {
58     ProfileKind |= InstrProfKind::FunctionEntryOnly;
59   }
60   if (Version & VARIANT_MASK_MEMPROF) {
61     ProfileKind |= InstrProfKind::MemProf;
62   }
63   if (Version & VARIANT_MASK_TEMPORAL_PROF) {
64     ProfileKind |= InstrProfKind::TemporalProfile;
65   }
66   return ProfileKind;
67 }
68 
69 static Expected<std::unique_ptr<MemoryBuffer>>
70 setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) {
71   auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN()
72                                            : FS.getBufferForFile(Filename);
73   if (std::error_code EC = BufferOrErr.getError())
74     return errorCodeToError(EC);
75   return std::move(BufferOrErr.get());
76 }
77 
78 static Error initializeReader(InstrProfReader &Reader) {
79   return Reader.readHeader();
80 }
81 
82 /// Read a list of binary ids from a profile that consist of
83 /// a. uint64_t binary id length
84 /// b. uint8_t  binary id data
85 /// c. uint8_t  padding (if necessary)
86 /// This function is shared between raw and indexed profiles.
87 /// Raw profiles are in host-endian format, and indexed profiles are in
88 /// little-endian format. So, this function takes an argument indicating the
89 /// associated endian format to read the binary ids correctly.
90 static Error
91 readBinaryIdsInternal(const MemoryBuffer &DataBuffer,
92                       const uint64_t BinaryIdsSize,
93                       const uint8_t *BinaryIdsStart,
94                       std::vector<llvm::object::BuildID> &BinaryIds,
95                       const llvm::endianness Endian) {
96   using namespace support;
97 
98   if (BinaryIdsSize == 0)
99     return Error::success();
100 
101   const uint8_t *BI = BinaryIdsStart;
102   const uint8_t *BIEnd = BinaryIdsStart + BinaryIdsSize;
103   const uint8_t *End =
104       reinterpret_cast<const uint8_t *>(DataBuffer.getBufferEnd());
105 
106   while (BI < BIEnd) {
107     size_t Remaining = BIEnd - BI;
108     // There should be enough left to read the binary id length.
109     if (Remaining < sizeof(uint64_t))
110       return make_error<InstrProfError>(
111           instrprof_error::malformed,
112           "not enough data to read binary id length");
113 
114     uint64_t BILen = 0;
115     if (Endian == llvm::endianness::little)
116       BILen =
117           endian::readNext<uint64_t, llvm::endianness::little, unaligned>(BI);
118     else
119       BILen = endian::readNext<uint64_t, llvm::endianness::big, unaligned>(BI);
120 
121     if (BILen == 0)
122       return make_error<InstrProfError>(instrprof_error::malformed,
123                                         "binary id length is 0");
124 
125     Remaining = BIEnd - BI;
126     // There should be enough left to read the binary id data.
127     if (Remaining < alignToPowerOf2(BILen, sizeof(uint64_t)))
128       return make_error<InstrProfError>(
129           instrprof_error::malformed, "not enough data to read binary id data");
130 
131     // Add binary id to the binary ids list.
132     BinaryIds.push_back(object::BuildID(BI, BI + BILen));
133 
134     // Increment by binary id data length, which aligned to the size of uint64.
135     BI += alignToPowerOf2(BILen, sizeof(uint64_t));
136     if (BI > End)
137       return make_error<InstrProfError>(
138           instrprof_error::malformed,
139           "binary id section is greater than buffer size");
140   }
141 
142   return Error::success();
143 }
144 
145 static void
146 printBinaryIdsInternal(raw_ostream &OS,
147                        std::vector<llvm::object::BuildID> &BinaryIds) {
148   OS << "Binary IDs: \n";
149   for (auto BI : BinaryIds) {
150     for (uint64_t I = 0; I < BI.size(); I++)
151       OS << format("%02x", BI[I]);
152     OS << "\n";
153   }
154 }
155 
156 Expected<std::unique_ptr<InstrProfReader>>
157 InstrProfReader::create(const Twine &Path, vfs::FileSystem &FS,
158                         const InstrProfCorrelator *Correlator,
159                         std::function<void(Error)> Warn) {
160   // Set up the buffer to read.
161   auto BufferOrError = setupMemoryBuffer(Path, FS);
162   if (Error E = BufferOrError.takeError())
163     return std::move(E);
164   return InstrProfReader::create(std::move(BufferOrError.get()), Correlator,
165                                  Warn);
166 }
167 
168 Expected<std::unique_ptr<InstrProfReader>>
169 InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
170                         const InstrProfCorrelator *Correlator,
171                         std::function<void(Error)> Warn) {
172   if (Buffer->getBufferSize() == 0)
173     return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
174 
175   std::unique_ptr<InstrProfReader> Result;
176   // Create the reader.
177   if (IndexedInstrProfReader::hasFormat(*Buffer))
178     Result.reset(new IndexedInstrProfReader(std::move(Buffer)));
179   else if (RawInstrProfReader64::hasFormat(*Buffer))
180     Result.reset(new RawInstrProfReader64(std::move(Buffer), Correlator, Warn));
181   else if (RawInstrProfReader32::hasFormat(*Buffer))
182     Result.reset(new RawInstrProfReader32(std::move(Buffer), Correlator, Warn));
183   else if (TextInstrProfReader::hasFormat(*Buffer))
184     Result.reset(new TextInstrProfReader(std::move(Buffer)));
185   else
186     return make_error<InstrProfError>(instrprof_error::unrecognized_format);
187 
188   // Initialize the reader and return the result.
189   if (Error E = initializeReader(*Result))
190     return std::move(E);
191 
192   return std::move(Result);
193 }
194 
195 Expected<std::unique_ptr<IndexedInstrProfReader>>
196 IndexedInstrProfReader::create(const Twine &Path, vfs::FileSystem &FS,
197                                const Twine &RemappingPath) {
198   // Set up the buffer to read.
199   auto BufferOrError = setupMemoryBuffer(Path, FS);
200   if (Error E = BufferOrError.takeError())
201     return std::move(E);
202 
203   // Set up the remapping buffer if requested.
204   std::unique_ptr<MemoryBuffer> RemappingBuffer;
205   std::string RemappingPathStr = RemappingPath.str();
206   if (!RemappingPathStr.empty()) {
207     auto RemappingBufferOrError = setupMemoryBuffer(RemappingPathStr, FS);
208     if (Error E = RemappingBufferOrError.takeError())
209       return std::move(E);
210     RemappingBuffer = std::move(RemappingBufferOrError.get());
211   }
212 
213   return IndexedInstrProfReader::create(std::move(BufferOrError.get()),
214                                         std::move(RemappingBuffer));
215 }
216 
217 Expected<std::unique_ptr<IndexedInstrProfReader>>
218 IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
219                                std::unique_ptr<MemoryBuffer> RemappingBuffer) {
220   // Create the reader.
221   if (!IndexedInstrProfReader::hasFormat(*Buffer))
222     return make_error<InstrProfError>(instrprof_error::bad_magic);
223   auto Result = std::make_unique<IndexedInstrProfReader>(
224       std::move(Buffer), std::move(RemappingBuffer));
225 
226   // Initialize the reader and return the result.
227   if (Error E = initializeReader(*Result))
228     return std::move(E);
229 
230   return std::move(Result);
231 }
232 
233 bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) {
234   // Verify that this really looks like plain ASCII text by checking a
235   // 'reasonable' number of characters (up to profile magic size).
236   size_t count = std::min(Buffer.getBufferSize(), sizeof(uint64_t));
237   StringRef buffer = Buffer.getBufferStart();
238   return count == 0 ||
239          std::all_of(buffer.begin(), buffer.begin() + count,
240                      [](char c) { return isPrint(c) || isSpace(c); });
241 }
242 
243 // Read the profile variant flag from the header: ":FE" means this is a FE
244 // generated profile. ":IR" means this is an IR level profile. Other strings
245 // with a leading ':' will be reported an error format.
246 Error TextInstrProfReader::readHeader() {
247   Symtab.reset(new InstrProfSymtab());
248 
249   while (Line->starts_with(":")) {
250     StringRef Str = Line->substr(1);
251     if (Str.equals_insensitive("ir"))
252       ProfileKind |= InstrProfKind::IRInstrumentation;
253     else if (Str.equals_insensitive("fe"))
254       ProfileKind |= InstrProfKind::FrontendInstrumentation;
255     else if (Str.equals_insensitive("csir")) {
256       ProfileKind |= InstrProfKind::IRInstrumentation;
257       ProfileKind |= InstrProfKind::ContextSensitive;
258     } else if (Str.equals_insensitive("entry_first"))
259       ProfileKind |= InstrProfKind::FunctionEntryInstrumentation;
260     else if (Str.equals_insensitive("not_entry_first"))
261       ProfileKind &= ~InstrProfKind::FunctionEntryInstrumentation;
262     else if (Str.equals_insensitive("single_byte_coverage"))
263       ProfileKind |= InstrProfKind::SingleByteCoverage;
264     else if (Str.equals_insensitive("temporal_prof_traces")) {
265       ProfileKind |= InstrProfKind::TemporalProfile;
266       if (auto Err = readTemporalProfTraceData())
267         return error(std::move(Err));
268     } else
269       return error(instrprof_error::bad_header);
270     ++Line;
271   }
272   return success();
273 }
274 
275 /// Temporal profile trace data is stored in the header immediately after
276 /// ":temporal_prof_traces". The first integer is the number of traces, the
277 /// second integer is the stream size, then the following lines are the actual
278 /// traces which consist of a weight and a comma separated list of function
279 /// names.
280 Error TextInstrProfReader::readTemporalProfTraceData() {
281   if ((++Line).is_at_end())
282     return error(instrprof_error::eof);
283 
284   uint32_t NumTraces;
285   if (Line->getAsInteger(0, NumTraces))
286     return error(instrprof_error::malformed);
287 
288   if ((++Line).is_at_end())
289     return error(instrprof_error::eof);
290 
291   if (Line->getAsInteger(0, TemporalProfTraceStreamSize))
292     return error(instrprof_error::malformed);
293 
294   for (uint32_t i = 0; i < NumTraces; i++) {
295     if ((++Line).is_at_end())
296       return error(instrprof_error::eof);
297 
298     TemporalProfTraceTy Trace;
299     if (Line->getAsInteger(0, Trace.Weight))
300       return error(instrprof_error::malformed);
301 
302     if ((++Line).is_at_end())
303       return error(instrprof_error::eof);
304 
305     SmallVector<StringRef> FuncNames;
306     Line->split(FuncNames, ",", /*MaxSplit=*/-1, /*KeepEmpty=*/false);
307     for (auto &FuncName : FuncNames)
308       Trace.FunctionNameRefs.push_back(
309           IndexedInstrProf::ComputeHash(FuncName.trim()));
310     TemporalProfTraces.push_back(std::move(Trace));
311   }
312   return success();
313 }
314 
315 Error
316 TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) {
317 
318 #define CHECK_LINE_END(Line)                                                   \
319   if (Line.is_at_end())                                                        \
320     return error(instrprof_error::truncated);
321 #define READ_NUM(Str, Dst)                                                     \
322   if ((Str).getAsInteger(10, (Dst)))                                           \
323     return error(instrprof_error::malformed);
324 #define VP_READ_ADVANCE(Val)                                                   \
325   CHECK_LINE_END(Line);                                                        \
326   uint32_t Val;                                                                \
327   READ_NUM((*Line), (Val));                                                    \
328   Line++;
329 
330   if (Line.is_at_end())
331     return success();
332 
333   uint32_t NumValueKinds;
334   if (Line->getAsInteger(10, NumValueKinds)) {
335     // No value profile data
336     return success();
337   }
338   if (NumValueKinds == 0 || NumValueKinds > IPVK_Last + 1)
339     return error(instrprof_error::malformed,
340                  "number of value kinds is invalid");
341   Line++;
342 
343   for (uint32_t VK = 0; VK < NumValueKinds; VK++) {
344     VP_READ_ADVANCE(ValueKind);
345     if (ValueKind > IPVK_Last)
346       return error(instrprof_error::malformed, "value kind is invalid");
347     ;
348     VP_READ_ADVANCE(NumValueSites);
349     if (!NumValueSites)
350       continue;
351 
352     Record.reserveSites(VK, NumValueSites);
353     for (uint32_t S = 0; S < NumValueSites; S++) {
354       VP_READ_ADVANCE(NumValueData);
355 
356       std::vector<InstrProfValueData> CurrentValues;
357       for (uint32_t V = 0; V < NumValueData; V++) {
358         CHECK_LINE_END(Line);
359         std::pair<StringRef, StringRef> VD = Line->rsplit(':');
360         uint64_t TakenCount, Value;
361         if (ValueKind == IPVK_IndirectCallTarget) {
362           if (InstrProfSymtab::isExternalSymbol(VD.first)) {
363             Value = 0;
364           } else {
365             if (Error E = Symtab->addFuncName(VD.first))
366               return E;
367             Value = IndexedInstrProf::ComputeHash(VD.first);
368           }
369         } else {
370           READ_NUM(VD.first, Value);
371         }
372         READ_NUM(VD.second, TakenCount);
373         CurrentValues.push_back({Value, TakenCount});
374         Line++;
375       }
376       Record.addValueData(ValueKind, S, CurrentValues.data(), NumValueData,
377                           nullptr);
378     }
379   }
380   return success();
381 
382 #undef CHECK_LINE_END
383 #undef READ_NUM
384 #undef VP_READ_ADVANCE
385 }
386 
387 Error TextInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) {
388   // Skip empty lines and comments.
389   while (!Line.is_at_end() && (Line->empty() || Line->starts_with("#")))
390     ++Line;
391   // If we hit EOF while looking for a name, we're done.
392   if (Line.is_at_end()) {
393     return error(instrprof_error::eof);
394   }
395 
396   // Read the function name.
397   Record.Name = *Line++;
398   if (Error E = Symtab->addFuncName(Record.Name))
399     return error(std::move(E));
400 
401   // Read the function hash.
402   if (Line.is_at_end())
403     return error(instrprof_error::truncated);
404   if ((Line++)->getAsInteger(0, Record.Hash))
405     return error(instrprof_error::malformed,
406                  "function hash is not a valid integer");
407 
408   // Read the number of counters.
409   uint64_t NumCounters;
410   if (Line.is_at_end())
411     return error(instrprof_error::truncated);
412   if ((Line++)->getAsInteger(10, NumCounters))
413     return error(instrprof_error::malformed,
414                  "number of counters is not a valid integer");
415   if (NumCounters == 0)
416     return error(instrprof_error::malformed, "number of counters is zero");
417 
418   // Read each counter and fill our internal storage with the values.
419   Record.Clear();
420   Record.Counts.reserve(NumCounters);
421   for (uint64_t I = 0; I < NumCounters; ++I) {
422     if (Line.is_at_end())
423       return error(instrprof_error::truncated);
424     uint64_t Count;
425     if ((Line++)->getAsInteger(10, Count))
426       return error(instrprof_error::malformed, "count is invalid");
427     Record.Counts.push_back(Count);
428   }
429 
430   // Bitmap byte information is indicated with special character.
431   if (Line->starts_with("$")) {
432     Record.BitmapBytes.clear();
433     // Read the number of bitmap bytes.
434     uint64_t NumBitmapBytes;
435     if ((Line++)->drop_front(1).trim().getAsInteger(0, NumBitmapBytes))
436       return error(instrprof_error::malformed,
437                    "number of bitmap bytes is not a valid integer");
438     if (NumBitmapBytes != 0) {
439       // Read each bitmap and fill our internal storage with the values.
440       Record.BitmapBytes.reserve(NumBitmapBytes);
441       for (uint8_t I = 0; I < NumBitmapBytes; ++I) {
442         if (Line.is_at_end())
443           return error(instrprof_error::truncated);
444         uint8_t BitmapByte;
445         if ((Line++)->getAsInteger(0, BitmapByte))
446           return error(instrprof_error::malformed,
447                        "bitmap byte is not a valid integer");
448         Record.BitmapBytes.push_back(BitmapByte);
449       }
450     }
451   }
452 
453   // Check if value profile data exists and read it if so.
454   if (Error E = readValueProfileData(Record))
455     return error(std::move(E));
456 
457   return success();
458 }
459 
460 template <class IntPtrT>
461 InstrProfKind RawInstrProfReader<IntPtrT>::getProfileKind() const {
462   return getProfileKindFromVersion(Version);
463 }
464 
465 template <class IntPtrT>
466 SmallVector<TemporalProfTraceTy> &
467 RawInstrProfReader<IntPtrT>::getTemporalProfTraces(
468     std::optional<uint64_t> Weight) {
469   if (TemporalProfTimestamps.empty()) {
470     assert(TemporalProfTraces.empty());
471     return TemporalProfTraces;
472   }
473   // Sort functions by their timestamps to build the trace.
474   std::sort(TemporalProfTimestamps.begin(), TemporalProfTimestamps.end());
475   TemporalProfTraceTy Trace;
476   if (Weight)
477     Trace.Weight = *Weight;
478   for (auto &[TimestampValue, NameRef] : TemporalProfTimestamps)
479     Trace.FunctionNameRefs.push_back(NameRef);
480   TemporalProfTraces = {std::move(Trace)};
481   return TemporalProfTraces;
482 }
483 
484 template <class IntPtrT>
485 bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) {
486   if (DataBuffer.getBufferSize() < sizeof(uint64_t))
487     return false;
488   uint64_t Magic =
489     *reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart());
490   return RawInstrProf::getMagic<IntPtrT>() == Magic ||
491          llvm::byteswap(RawInstrProf::getMagic<IntPtrT>()) == Magic;
492 }
493 
494 template <class IntPtrT>
495 Error RawInstrProfReader<IntPtrT>::readHeader() {
496   if (!hasFormat(*DataBuffer))
497     return error(instrprof_error::bad_magic);
498   if (DataBuffer->getBufferSize() < sizeof(RawInstrProf::Header))
499     return error(instrprof_error::bad_header);
500   auto *Header = reinterpret_cast<const RawInstrProf::Header *>(
501       DataBuffer->getBufferStart());
502   ShouldSwapBytes = Header->Magic != RawInstrProf::getMagic<IntPtrT>();
503   return readHeader(*Header);
504 }
505 
506 template <class IntPtrT>
507 Error RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) {
508   const char *End = DataBuffer->getBufferEnd();
509   // Skip zero padding between profiles.
510   while (CurrentPos != End && *CurrentPos == 0)
511     ++CurrentPos;
512   // If there's nothing left, we're done.
513   if (CurrentPos == End)
514     return make_error<InstrProfError>(instrprof_error::eof);
515   // If there isn't enough space for another header, this is probably just
516   // garbage at the end of the file.
517   if (CurrentPos + sizeof(RawInstrProf::Header) > End)
518     return make_error<InstrProfError>(instrprof_error::malformed,
519                                       "not enough space for another header");
520   // The writer ensures each profile is padded to start at an aligned address.
521   if (reinterpret_cast<size_t>(CurrentPos) % alignof(uint64_t))
522     return make_error<InstrProfError>(instrprof_error::malformed,
523                                       "insufficient padding");
524   // The magic should have the same byte order as in the previous header.
525   uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos);
526   if (Magic != swap(RawInstrProf::getMagic<IntPtrT>()))
527     return make_error<InstrProfError>(instrprof_error::bad_magic);
528 
529   // There's another profile to read, so we need to process the header.
530   auto *Header = reinterpret_cast<const RawInstrProf::Header *>(CurrentPos);
531   return readHeader(*Header);
532 }
533 
534 template <class IntPtrT>
535 Error RawInstrProfReader<IntPtrT>::createSymtab(InstrProfSymtab &Symtab) {
536   if (Error E = Symtab.create(StringRef(NamesStart, NamesEnd - NamesStart)))
537     return error(std::move(E));
538   for (const RawInstrProf::ProfileData<IntPtrT> *I = Data; I != DataEnd; ++I) {
539     const IntPtrT FPtr = swap(I->FunctionPointer);
540     if (!FPtr)
541       continue;
542     Symtab.mapAddress(FPtr, swap(I->NameRef));
543   }
544   return success();
545 }
546 
547 template <class IntPtrT>
548 Error RawInstrProfReader<IntPtrT>::readHeader(
549     const RawInstrProf::Header &Header) {
550   Version = swap(Header.Version);
551   if (GET_VERSION(Version) != RawInstrProf::Version)
552     return error(instrprof_error::raw_profile_version_mismatch,
553                  ("Profile uses raw profile format version = " +
554                   Twine(GET_VERSION(Version)) +
555                   "; expected version = " + Twine(RawInstrProf::Version) +
556                   "\nPLEASE update this tool to version in the raw profile, or "
557                   "regenerate raw profile with expected version.")
558                      .str());
559 
560   uint64_t BinaryIdSize = swap(Header.BinaryIdsSize);
561   // Binary id start just after the header if exists.
562   const uint8_t *BinaryIdStart =
563       reinterpret_cast<const uint8_t *>(&Header) + sizeof(RawInstrProf::Header);
564   const uint8_t *BinaryIdEnd = BinaryIdStart + BinaryIdSize;
565   const uint8_t *BufferEnd = (const uint8_t *)DataBuffer->getBufferEnd();
566   if (BinaryIdSize % sizeof(uint64_t) || BinaryIdEnd > BufferEnd)
567     return error(instrprof_error::bad_header);
568   if (BinaryIdSize != 0) {
569     if (Error Err =
570             readBinaryIdsInternal(*DataBuffer, BinaryIdSize, BinaryIdStart,
571                                   BinaryIds, getDataEndianness()))
572       return Err;
573   }
574 
575   CountersDelta = swap(Header.CountersDelta);
576   BitmapDelta = swap(Header.BitmapDelta);
577   NamesDelta = swap(Header.NamesDelta);
578   auto NumData = swap(Header.NumData);
579   auto PaddingBytesBeforeCounters = swap(Header.PaddingBytesBeforeCounters);
580   auto CountersSize = swap(Header.NumCounters) * getCounterTypeSize();
581   auto PaddingBytesAfterCounters = swap(Header.PaddingBytesAfterCounters);
582   auto NumBitmapBytes = swap(Header.NumBitmapBytes);
583   auto PaddingBytesAfterBitmapBytes = swap(Header.PaddingBytesAfterBitmapBytes);
584   auto NamesSize = swap(Header.NamesSize);
585   ValueKindLast = swap(Header.ValueKindLast);
586 
587   auto DataSize = NumData * sizeof(RawInstrProf::ProfileData<IntPtrT>);
588   auto PaddingSize = getNumPaddingBytes(NamesSize);
589 
590   // Profile data starts after profile header and binary ids if exist.
591   ptrdiff_t DataOffset = sizeof(RawInstrProf::Header) + BinaryIdSize;
592   ptrdiff_t CountersOffset = DataOffset + DataSize + PaddingBytesBeforeCounters;
593   ptrdiff_t BitmapOffset =
594       CountersOffset + CountersSize + PaddingBytesAfterCounters;
595   ptrdiff_t NamesOffset =
596       BitmapOffset + NumBitmapBytes + PaddingBytesAfterBitmapBytes;
597   ptrdiff_t ValueDataOffset = NamesOffset + NamesSize + PaddingSize;
598 
599   auto *Start = reinterpret_cast<const char *>(&Header);
600   if (Start + ValueDataOffset > DataBuffer->getBufferEnd())
601     return error(instrprof_error::bad_header);
602 
603   if (Correlator) {
604     // These sizes in the raw file are zero because we constructed them in the
605     // Correlator.
606     if (!(DataSize == 0 && NamesSize == 0 && CountersDelta == 0 &&
607           NamesDelta == 0))
608       return error(instrprof_error::unexpected_correlation_info);
609     Data = Correlator->getDataPointer();
610     DataEnd = Data + Correlator->getDataSize();
611     NamesStart = Correlator->getNamesPointer();
612     NamesEnd = NamesStart + Correlator->getNamesSize();
613   } else {
614     Data = reinterpret_cast<const RawInstrProf::ProfileData<IntPtrT> *>(
615         Start + DataOffset);
616     DataEnd = Data + NumData;
617     NamesStart = Start + NamesOffset;
618     NamesEnd = NamesStart + NamesSize;
619   }
620 
621   CountersStart = Start + CountersOffset;
622   CountersEnd = CountersStart + CountersSize;
623   BitmapStart = Start + BitmapOffset;
624   BitmapEnd = BitmapStart + NumBitmapBytes;
625   ValueDataStart = reinterpret_cast<const uint8_t *>(Start + ValueDataOffset);
626 
627   std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>();
628   if (Error E = createSymtab(*NewSymtab))
629     return E;
630 
631   Symtab = std::move(NewSymtab);
632   return success();
633 }
634 
635 template <class IntPtrT>
636 Error RawInstrProfReader<IntPtrT>::readName(NamedInstrProfRecord &Record) {
637   Record.Name = getName(Data->NameRef);
638   return success();
639 }
640 
641 template <class IntPtrT>
642 Error RawInstrProfReader<IntPtrT>::readFuncHash(NamedInstrProfRecord &Record) {
643   Record.Hash = swap(Data->FuncHash);
644   return success();
645 }
646 
647 template <class IntPtrT>
648 Error RawInstrProfReader<IntPtrT>::readRawCounts(
649     InstrProfRecord &Record) {
650   uint32_t NumCounters = swap(Data->NumCounters);
651   if (NumCounters == 0)
652     return error(instrprof_error::malformed, "number of counters is zero");
653 
654   ptrdiff_t CounterBaseOffset = swap(Data->CounterPtr) - CountersDelta;
655   if (CounterBaseOffset < 0)
656     return error(
657         instrprof_error::malformed,
658         ("counter offset " + Twine(CounterBaseOffset) + " is negative").str());
659 
660   if (CounterBaseOffset >= CountersEnd - CountersStart)
661     return error(instrprof_error::malformed,
662                  ("counter offset " + Twine(CounterBaseOffset) +
663                   " is greater than the maximum counter offset " +
664                   Twine(CountersEnd - CountersStart - 1))
665                      .str());
666 
667   uint64_t MaxNumCounters =
668       (CountersEnd - (CountersStart + CounterBaseOffset)) /
669       getCounterTypeSize();
670   if (NumCounters > MaxNumCounters)
671     return error(instrprof_error::malformed,
672                  ("number of counters " + Twine(NumCounters) +
673                   " is greater than the maximum number of counters " +
674                   Twine(MaxNumCounters))
675                      .str());
676 
677   Record.Counts.clear();
678   Record.Counts.reserve(NumCounters);
679   for (uint32_t I = 0; I < NumCounters; I++) {
680     const char *Ptr =
681         CountersStart + CounterBaseOffset + I * getCounterTypeSize();
682     if (I == 0 && hasTemporalProfile()) {
683       uint64_t TimestampValue = swap(*reinterpret_cast<const uint64_t *>(Ptr));
684       if (TimestampValue != 0 &&
685           TimestampValue != std::numeric_limits<uint64_t>::max()) {
686         TemporalProfTimestamps.emplace_back(TimestampValue,
687                                             swap(Data->NameRef));
688         TemporalProfTraceStreamSize = 1;
689       }
690       if (hasSingleByteCoverage()) {
691         // In coverage mode, getCounterTypeSize() returns 1 byte but our
692         // timestamp field has size uint64_t. Increment I so that the next
693         // iteration of this for loop points to the byte after the timestamp
694         // field, i.e., I += 8.
695         I += 7;
696       }
697       continue;
698     }
699     if (hasSingleByteCoverage()) {
700       // A value of zero signifies the block is covered.
701       Record.Counts.push_back(*Ptr == 0 ? 1 : 0);
702     } else {
703       uint64_t CounterValue = swap(*reinterpret_cast<const uint64_t *>(Ptr));
704       if (CounterValue > MaxCounterValue && Warn)
705         Warn(make_error<InstrProfError>(
706             instrprof_error::counter_value_too_large, Twine(CounterValue)));
707 
708       Record.Counts.push_back(CounterValue);
709     }
710   }
711 
712   return success();
713 }
714 
715 template <class IntPtrT>
716 Error RawInstrProfReader<IntPtrT>::readRawBitmapBytes(InstrProfRecord &Record) {
717   uint32_t NumBitmapBytes = swap(Data->NumBitmapBytes);
718 
719   Record.BitmapBytes.clear();
720   Record.BitmapBytes.reserve(NumBitmapBytes);
721 
722   // It's possible MCDC is either not enabled or only used for some functions
723   // and not others. So if we record 0 bytes, just move on.
724   if (NumBitmapBytes == 0)
725     return success();
726 
727   // BitmapDelta decreases as we advance to the next data record.
728   ptrdiff_t BitmapOffset = swap(Data->BitmapPtr) - BitmapDelta;
729   if (BitmapOffset < 0)
730     return error(
731         instrprof_error::malformed,
732         ("bitmap offset " + Twine(BitmapOffset) + " is negative").str());
733 
734   if (BitmapOffset >= BitmapEnd - BitmapStart)
735     return error(instrprof_error::malformed,
736                  ("bitmap offset " + Twine(BitmapOffset) +
737                   " is greater than the maximum bitmap offset " +
738                   Twine(BitmapEnd - BitmapStart - 1))
739                      .str());
740 
741   uint64_t MaxNumBitmapBytes =
742       (BitmapEnd - (BitmapStart + BitmapOffset)) / sizeof(uint8_t);
743   if (NumBitmapBytes > MaxNumBitmapBytes)
744     return error(instrprof_error::malformed,
745                  ("number of bitmap bytes " + Twine(NumBitmapBytes) +
746                   " is greater than the maximum number of bitmap bytes " +
747                   Twine(MaxNumBitmapBytes))
748                      .str());
749 
750   for (uint32_t I = 0; I < NumBitmapBytes; I++) {
751     const char *Ptr = BitmapStart + BitmapOffset + I;
752     Record.BitmapBytes.push_back(swap(*Ptr));
753   }
754 
755   return success();
756 }
757 
758 template <class IntPtrT>
759 Error RawInstrProfReader<IntPtrT>::readValueProfilingData(
760     InstrProfRecord &Record) {
761   Record.clearValueData();
762   CurValueDataSize = 0;
763   // Need to match the logic in value profile dumper code in compiler-rt:
764   uint32_t NumValueKinds = 0;
765   for (uint32_t I = 0; I < IPVK_Last + 1; I++)
766     NumValueKinds += (Data->NumValueSites[I] != 0);
767 
768   if (!NumValueKinds)
769     return success();
770 
771   Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr =
772       ValueProfData::getValueProfData(
773           ValueDataStart, (const unsigned char *)DataBuffer->getBufferEnd(),
774           getDataEndianness());
775 
776   if (Error E = VDataPtrOrErr.takeError())
777     return E;
778 
779   // Note that besides deserialization, this also performs the conversion for
780   // indirect call targets.  The function pointers from the raw profile are
781   // remapped into function name hashes.
782   VDataPtrOrErr.get()->deserializeTo(Record, Symtab.get());
783   CurValueDataSize = VDataPtrOrErr.get()->getSize();
784   return success();
785 }
786 
787 template <class IntPtrT>
788 Error RawInstrProfReader<IntPtrT>::readNextRecord(NamedInstrProfRecord &Record) {
789   // Keep reading profiles that consist of only headers and no profile data and
790   // counters.
791   while (atEnd())
792     // At this point, ValueDataStart field points to the next header.
793     if (Error E = readNextHeader(getNextHeaderPos()))
794       return error(std::move(E));
795 
796   // Read name and set it in Record.
797   if (Error E = readName(Record))
798     return error(std::move(E));
799 
800   // Read FuncHash and set it in Record.
801   if (Error E = readFuncHash(Record))
802     return error(std::move(E));
803 
804   // Read raw counts and set Record.
805   if (Error E = readRawCounts(Record))
806     return error(std::move(E));
807 
808   // Read raw bitmap bytes and set Record.
809   if (Error E = readRawBitmapBytes(Record))
810     return error(std::move(E));
811 
812   // Read value data and set Record.
813   if (Error E = readValueProfilingData(Record))
814     return error(std::move(E));
815 
816   // Iterate.
817   advanceData();
818   return success();
819 }
820 
821 template <class IntPtrT>
822 Error RawInstrProfReader<IntPtrT>::readBinaryIds(
823     std::vector<llvm::object::BuildID> &BinaryIds) {
824   BinaryIds.insert(BinaryIds.begin(), this->BinaryIds.begin(),
825                    this->BinaryIds.end());
826   return Error::success();
827 }
828 
829 template <class IntPtrT>
830 Error RawInstrProfReader<IntPtrT>::printBinaryIds(raw_ostream &OS) {
831   if (!BinaryIds.empty())
832     printBinaryIdsInternal(OS, BinaryIds);
833   return Error::success();
834 }
835 
836 namespace llvm {
837 
838 template class RawInstrProfReader<uint32_t>;
839 template class RawInstrProfReader<uint64_t>;
840 
841 } // end namespace llvm
842 
843 InstrProfLookupTrait::hash_value_type
844 InstrProfLookupTrait::ComputeHash(StringRef K) {
845   return IndexedInstrProf::ComputeHash(HashType, K);
846 }
847 
848 using data_type = InstrProfLookupTrait::data_type;
849 using offset_type = InstrProfLookupTrait::offset_type;
850 
851 bool InstrProfLookupTrait::readValueProfilingData(
852     const unsigned char *&D, const unsigned char *const End) {
853   Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr =
854       ValueProfData::getValueProfData(D, End, ValueProfDataEndianness);
855 
856   if (VDataPtrOrErr.takeError())
857     return false;
858 
859   VDataPtrOrErr.get()->deserializeTo(DataBuffer.back(), nullptr);
860   D += VDataPtrOrErr.get()->TotalSize;
861 
862   return true;
863 }
864 
865 data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D,
866                                          offset_type N) {
867   using namespace support;
868 
869   // Check if the data is corrupt. If so, don't try to read it.
870   if (N % sizeof(uint64_t))
871     return data_type();
872 
873   DataBuffer.clear();
874   std::vector<uint64_t> CounterBuffer;
875   std::vector<uint8_t> BitmapByteBuffer;
876 
877   const unsigned char *End = D + N;
878   while (D < End) {
879     // Read hash.
880     if (D + sizeof(uint64_t) >= End)
881       return data_type();
882     uint64_t Hash =
883         endian::readNext<uint64_t, llvm::endianness::little, unaligned>(D);
884 
885     // Initialize number of counters for GET_VERSION(FormatVersion) == 1.
886     uint64_t CountsSize = N / sizeof(uint64_t) - 1;
887     // If format version is different then read the number of counters.
888     if (GET_VERSION(FormatVersion) != IndexedInstrProf::ProfVersion::Version1) {
889       if (D + sizeof(uint64_t) > End)
890         return data_type();
891       CountsSize =
892           endian::readNext<uint64_t, llvm::endianness::little, unaligned>(D);
893     }
894     // Read counter values.
895     if (D + CountsSize * sizeof(uint64_t) > End)
896       return data_type();
897 
898     CounterBuffer.clear();
899     CounterBuffer.reserve(CountsSize);
900     for (uint64_t J = 0; J < CountsSize; ++J)
901       CounterBuffer.push_back(
902           endian::readNext<uint64_t, llvm::endianness::little, unaligned>(D));
903 
904     // Read bitmap bytes for GET_VERSION(FormatVersion) > 10.
905     if (GET_VERSION(FormatVersion) > IndexedInstrProf::ProfVersion::Version10) {
906       uint64_t BitmapBytes = 0;
907       if (D + sizeof(uint64_t) > End)
908         return data_type();
909       BitmapBytes =
910           endian::readNext<uint64_t, llvm::endianness::little, unaligned>(D);
911       // Read bitmap byte values.
912       if (D + BitmapBytes * sizeof(uint8_t) > End)
913         return data_type();
914       BitmapByteBuffer.clear();
915       BitmapByteBuffer.reserve(BitmapBytes);
916       for (uint64_t J = 0; J < BitmapBytes; ++J)
917         BitmapByteBuffer.push_back(static_cast<uint8_t>(
918             endian::readNext<uint64_t, llvm::endianness::little, unaligned>(
919                 D)));
920     }
921 
922     DataBuffer.emplace_back(K, Hash, std::move(CounterBuffer),
923                             std::move(BitmapByteBuffer));
924 
925     // Read value profiling data.
926     if (GET_VERSION(FormatVersion) > IndexedInstrProf::ProfVersion::Version2 &&
927         !readValueProfilingData(D, End)) {
928       DataBuffer.clear();
929       return data_type();
930     }
931   }
932   return DataBuffer;
933 }
934 
935 template <typename HashTableImpl>
936 Error InstrProfReaderIndex<HashTableImpl>::getRecords(
937     StringRef FuncName, ArrayRef<NamedInstrProfRecord> &Data) {
938   auto Iter = HashTable->find(FuncName);
939   if (Iter == HashTable->end())
940     return make_error<InstrProfError>(instrprof_error::unknown_function);
941 
942   Data = (*Iter);
943   if (Data.empty())
944     return make_error<InstrProfError>(instrprof_error::malformed,
945                                       "profile data is empty");
946 
947   return Error::success();
948 }
949 
950 template <typename HashTableImpl>
951 Error InstrProfReaderIndex<HashTableImpl>::getRecords(
952     ArrayRef<NamedInstrProfRecord> &Data) {
953   if (atEnd())
954     return make_error<InstrProfError>(instrprof_error::eof);
955 
956   Data = *RecordIterator;
957 
958   if (Data.empty())
959     return make_error<InstrProfError>(instrprof_error::malformed,
960                                       "profile data is empty");
961 
962   return Error::success();
963 }
964 
965 template <typename HashTableImpl>
966 InstrProfReaderIndex<HashTableImpl>::InstrProfReaderIndex(
967     const unsigned char *Buckets, const unsigned char *const Payload,
968     const unsigned char *const Base, IndexedInstrProf::HashT HashType,
969     uint64_t Version) {
970   FormatVersion = Version;
971   HashTable.reset(HashTableImpl::Create(
972       Buckets, Payload, Base,
973       typename HashTableImpl::InfoType(HashType, Version)));
974   RecordIterator = HashTable->data_begin();
975 }
976 
977 template <typename HashTableImpl>
978 InstrProfKind InstrProfReaderIndex<HashTableImpl>::getProfileKind() const {
979   return getProfileKindFromVersion(FormatVersion);
980 }
981 
982 namespace {
983 /// A remapper that does not apply any remappings.
984 class InstrProfReaderNullRemapper : public InstrProfReaderRemapper {
985   InstrProfReaderIndexBase &Underlying;
986 
987 public:
988   InstrProfReaderNullRemapper(InstrProfReaderIndexBase &Underlying)
989       : Underlying(Underlying) {}
990 
991   Error getRecords(StringRef FuncName,
992                    ArrayRef<NamedInstrProfRecord> &Data) override {
993     return Underlying.getRecords(FuncName, Data);
994   }
995 };
996 } // namespace
997 
998 /// A remapper that applies remappings based on a symbol remapping file.
999 template <typename HashTableImpl>
1000 class llvm::InstrProfReaderItaniumRemapper
1001     : public InstrProfReaderRemapper {
1002 public:
1003   InstrProfReaderItaniumRemapper(
1004       std::unique_ptr<MemoryBuffer> RemapBuffer,
1005       InstrProfReaderIndex<HashTableImpl> &Underlying)
1006       : RemapBuffer(std::move(RemapBuffer)), Underlying(Underlying) {
1007   }
1008 
1009   /// Extract the original function name from a PGO function name.
1010   static StringRef extractName(StringRef Name) {
1011     // We can have multiple pieces separated by kGlobalIdentifierDelimiter (
1012     // semicolon now and colon in older profiles); there can be pieces both
1013     // before and after the mangled name. Find the first part that starts with
1014     // '_Z'; we'll assume that's the mangled name we want.
1015     std::pair<StringRef, StringRef> Parts = {StringRef(), Name};
1016     while (true) {
1017       Parts = Parts.second.split(kGlobalIdentifierDelimiter);
1018       if (Parts.first.starts_with("_Z"))
1019         return Parts.first;
1020       if (Parts.second.empty())
1021         return Name;
1022     }
1023   }
1024 
1025   /// Given a mangled name extracted from a PGO function name, and a new
1026   /// form for that mangled name, reconstitute the name.
1027   static void reconstituteName(StringRef OrigName, StringRef ExtractedName,
1028                                StringRef Replacement,
1029                                SmallVectorImpl<char> &Out) {
1030     Out.reserve(OrigName.size() + Replacement.size() - ExtractedName.size());
1031     Out.insert(Out.end(), OrigName.begin(), ExtractedName.begin());
1032     Out.insert(Out.end(), Replacement.begin(), Replacement.end());
1033     Out.insert(Out.end(), ExtractedName.end(), OrigName.end());
1034   }
1035 
1036   Error populateRemappings() override {
1037     if (Error E = Remappings.read(*RemapBuffer))
1038       return E;
1039     for (StringRef Name : Underlying.HashTable->keys()) {
1040       StringRef RealName = extractName(Name);
1041       if (auto Key = Remappings.insert(RealName)) {
1042         // FIXME: We could theoretically map the same equivalence class to
1043         // multiple names in the profile data. If that happens, we should
1044         // return NamedInstrProfRecords from all of them.
1045         MappedNames.insert({Key, RealName});
1046       }
1047     }
1048     return Error::success();
1049   }
1050 
1051   Error getRecords(StringRef FuncName,
1052                    ArrayRef<NamedInstrProfRecord> &Data) override {
1053     StringRef RealName = extractName(FuncName);
1054     if (auto Key = Remappings.lookup(RealName)) {
1055       StringRef Remapped = MappedNames.lookup(Key);
1056       if (!Remapped.empty()) {
1057         if (RealName.begin() == FuncName.begin() &&
1058             RealName.end() == FuncName.end())
1059           FuncName = Remapped;
1060         else {
1061           // Try rebuilding the name from the given remapping.
1062           SmallString<256> Reconstituted;
1063           reconstituteName(FuncName, RealName, Remapped, Reconstituted);
1064           Error E = Underlying.getRecords(Reconstituted, Data);
1065           if (!E)
1066             return E;
1067 
1068           // If we failed because the name doesn't exist, fall back to asking
1069           // about the original name.
1070           if (Error Unhandled = handleErrors(
1071                   std::move(E), [](std::unique_ptr<InstrProfError> Err) {
1072                     return Err->get() == instrprof_error::unknown_function
1073                                ? Error::success()
1074                                : Error(std::move(Err));
1075                   }))
1076             return Unhandled;
1077         }
1078       }
1079     }
1080     return Underlying.getRecords(FuncName, Data);
1081   }
1082 
1083 private:
1084   /// The memory buffer containing the remapping configuration. Remappings
1085   /// holds pointers into this buffer.
1086   std::unique_ptr<MemoryBuffer> RemapBuffer;
1087 
1088   /// The mangling remapper.
1089   SymbolRemappingReader Remappings;
1090 
1091   /// Mapping from mangled name keys to the name used for the key in the
1092   /// profile data.
1093   /// FIXME: Can we store a location within the on-disk hash table instead of
1094   /// redoing lookup?
1095   DenseMap<SymbolRemappingReader::Key, StringRef> MappedNames;
1096 
1097   /// The real profile data reader.
1098   InstrProfReaderIndex<HashTableImpl> &Underlying;
1099 };
1100 
1101 bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) {
1102   using namespace support;
1103 
1104   if (DataBuffer.getBufferSize() < 8)
1105     return false;
1106   uint64_t Magic = endian::read<uint64_t, llvm::endianness::little, aligned>(
1107       DataBuffer.getBufferStart());
1108   // Verify that it's magical.
1109   return Magic == IndexedInstrProf::Magic;
1110 }
1111 
1112 const unsigned char *
1113 IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version,
1114                                     const unsigned char *Cur, bool UseCS) {
1115   using namespace IndexedInstrProf;
1116   using namespace support;
1117 
1118   if (Version >= IndexedInstrProf::Version4) {
1119     const IndexedInstrProf::Summary *SummaryInLE =
1120         reinterpret_cast<const IndexedInstrProf::Summary *>(Cur);
1121     uint64_t NFields = endian::byte_swap<uint64_t, llvm::endianness::little>(
1122         SummaryInLE->NumSummaryFields);
1123     uint64_t NEntries = endian::byte_swap<uint64_t, llvm::endianness::little>(
1124         SummaryInLE->NumCutoffEntries);
1125     uint32_t SummarySize =
1126         IndexedInstrProf::Summary::getSize(NFields, NEntries);
1127     std::unique_ptr<IndexedInstrProf::Summary> SummaryData =
1128         IndexedInstrProf::allocSummary(SummarySize);
1129 
1130     const uint64_t *Src = reinterpret_cast<const uint64_t *>(SummaryInLE);
1131     uint64_t *Dst = reinterpret_cast<uint64_t *>(SummaryData.get());
1132     for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++)
1133       Dst[I] = endian::byte_swap<uint64_t, llvm::endianness::little>(Src[I]);
1134 
1135     SummaryEntryVector DetailedSummary;
1136     for (unsigned I = 0; I < SummaryData->NumCutoffEntries; I++) {
1137       const IndexedInstrProf::Summary::Entry &Ent = SummaryData->getEntry(I);
1138       DetailedSummary.emplace_back((uint32_t)Ent.Cutoff, Ent.MinBlockCount,
1139                                    Ent.NumBlocks);
1140     }
1141     std::unique_ptr<llvm::ProfileSummary> &Summary =
1142         UseCS ? this->CS_Summary : this->Summary;
1143 
1144     // initialize InstrProfSummary using the SummaryData from disk.
1145     Summary = std::make_unique<ProfileSummary>(
1146         UseCS ? ProfileSummary::PSK_CSInstr : ProfileSummary::PSK_Instr,
1147         DetailedSummary, SummaryData->get(Summary::TotalBlockCount),
1148         SummaryData->get(Summary::MaxBlockCount),
1149         SummaryData->get(Summary::MaxInternalBlockCount),
1150         SummaryData->get(Summary::MaxFunctionCount),
1151         SummaryData->get(Summary::TotalNumBlocks),
1152         SummaryData->get(Summary::TotalNumFunctions));
1153     return Cur + SummarySize;
1154   } else {
1155     // The older versions do not support a profile summary. This just computes
1156     // an empty summary, which will not result in accurate hot/cold detection.
1157     // We would need to call addRecord for all NamedInstrProfRecords to get the
1158     // correct summary. However, this version is old (prior to early 2016) and
1159     // has not been supporting an accurate summary for several years.
1160     InstrProfSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1161     Summary = Builder.getSummary();
1162     return Cur;
1163   }
1164 }
1165 
1166 Error IndexedInstrProfReader::readHeader() {
1167   using namespace support;
1168 
1169   const unsigned char *Start =
1170       (const unsigned char *)DataBuffer->getBufferStart();
1171   const unsigned char *Cur = Start;
1172   if ((const unsigned char *)DataBuffer->getBufferEnd() - Cur < 24)
1173     return error(instrprof_error::truncated);
1174 
1175   auto HeaderOr = IndexedInstrProf::Header::readFromBuffer(Start);
1176   if (!HeaderOr)
1177     return HeaderOr.takeError();
1178 
1179   const IndexedInstrProf::Header *Header = &HeaderOr.get();
1180   Cur += Header->size();
1181 
1182   Cur = readSummary((IndexedInstrProf::ProfVersion)Header->formatVersion(), Cur,
1183                     /* UseCS */ false);
1184   if (Header->formatVersion() & VARIANT_MASK_CSIR_PROF)
1185     Cur =
1186         readSummary((IndexedInstrProf::ProfVersion)Header->formatVersion(), Cur,
1187                     /* UseCS */ true);
1188   // Read the hash type and start offset.
1189   IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>(
1190       endian::byte_swap<uint64_t, llvm::endianness::little>(Header->HashType));
1191   if (HashType > IndexedInstrProf::HashT::Last)
1192     return error(instrprof_error::unsupported_hash_type);
1193 
1194   uint64_t HashOffset =
1195       endian::byte_swap<uint64_t, llvm::endianness::little>(Header->HashOffset);
1196 
1197   // The hash table with profile counts comes next.
1198   auto IndexPtr = std::make_unique<InstrProfReaderIndex<OnDiskHashTableImplV3>>(
1199       Start + HashOffset, Cur, Start, HashType, Header->formatVersion());
1200 
1201   // The MemProfOffset field in the header is only valid when the format
1202   // version is higher than 8 (when it was introduced).
1203   if (GET_VERSION(Header->formatVersion()) >= 8 &&
1204       Header->formatVersion() & VARIANT_MASK_MEMPROF) {
1205     uint64_t MemProfOffset =
1206         endian::byte_swap<uint64_t, llvm::endianness::little>(
1207             Header->MemProfOffset);
1208 
1209     const unsigned char *Ptr = Start + MemProfOffset;
1210     // The value returned from RecordTableGenerator.Emit.
1211     const uint64_t RecordTableOffset =
1212         support::endian::readNext<uint64_t, llvm::endianness::little,
1213                                   unaligned>(Ptr);
1214     // The offset in the stream right before invoking
1215     // FrameTableGenerator.Emit.
1216     const uint64_t FramePayloadOffset =
1217         support::endian::readNext<uint64_t, llvm::endianness::little,
1218                                   unaligned>(Ptr);
1219     // The value returned from FrameTableGenerator.Emit.
1220     const uint64_t FrameTableOffset =
1221         support::endian::readNext<uint64_t, llvm::endianness::little,
1222                                   unaligned>(Ptr);
1223 
1224     // Read the schema.
1225     auto SchemaOr = memprof::readMemProfSchema(Ptr);
1226     if (!SchemaOr)
1227       return SchemaOr.takeError();
1228     Schema = SchemaOr.get();
1229 
1230     // Now initialize the table reader with a pointer into data buffer.
1231     MemProfRecordTable.reset(MemProfRecordHashTable::Create(
1232         /*Buckets=*/Start + RecordTableOffset,
1233         /*Payload=*/Ptr,
1234         /*Base=*/Start, memprof::RecordLookupTrait(Schema)));
1235 
1236     // Initialize the frame table reader with the payload and bucket offsets.
1237     MemProfFrameTable.reset(MemProfFrameHashTable::Create(
1238         /*Buckets=*/Start + FrameTableOffset,
1239         /*Payload=*/Start + FramePayloadOffset,
1240         /*Base=*/Start, memprof::FrameLookupTrait()));
1241   }
1242 
1243   // BinaryIdOffset field in the header is only valid when the format version
1244   // is higher than 9 (when it was introduced).
1245   if (GET_VERSION(Header->formatVersion()) >= 9) {
1246     uint64_t BinaryIdOffset =
1247         endian::byte_swap<uint64_t, llvm::endianness::little>(
1248             Header->BinaryIdOffset);
1249     const unsigned char *Ptr = Start + BinaryIdOffset;
1250     // Read binary ids size.
1251     BinaryIdsSize =
1252         support::endian::readNext<uint64_t, llvm::endianness::little,
1253                                   unaligned>(Ptr);
1254     if (BinaryIdsSize % sizeof(uint64_t))
1255       return error(instrprof_error::bad_header);
1256     // Set the binary ids start.
1257     BinaryIdsStart = Ptr;
1258     if (BinaryIdsStart > (const unsigned char *)DataBuffer->getBufferEnd())
1259       return make_error<InstrProfError>(instrprof_error::malformed,
1260                                         "corrupted binary ids");
1261   }
1262 
1263   if (GET_VERSION(Header->formatVersion()) >= 10 &&
1264       Header->formatVersion() & VARIANT_MASK_TEMPORAL_PROF) {
1265     uint64_t TemporalProfTracesOffset =
1266         endian::byte_swap<uint64_t, llvm::endianness::little>(
1267             Header->TemporalProfTracesOffset);
1268     const unsigned char *Ptr = Start + TemporalProfTracesOffset;
1269     const auto *PtrEnd = (const unsigned char *)DataBuffer->getBufferEnd();
1270     // Expect at least two 64 bit fields: NumTraces, and TraceStreamSize
1271     if (Ptr + 2 * sizeof(uint64_t) > PtrEnd)
1272       return error(instrprof_error::truncated);
1273     const uint64_t NumTraces =
1274         support::endian::readNext<uint64_t, llvm::endianness::little,
1275                                   unaligned>(Ptr);
1276     TemporalProfTraceStreamSize =
1277         support::endian::readNext<uint64_t, llvm::endianness::little,
1278                                   unaligned>(Ptr);
1279     for (unsigned i = 0; i < NumTraces; i++) {
1280       // Expect at least two 64 bit fields: Weight and NumFunctions
1281       if (Ptr + 2 * sizeof(uint64_t) > PtrEnd)
1282         return error(instrprof_error::truncated);
1283       TemporalProfTraceTy Trace;
1284       Trace.Weight =
1285           support::endian::readNext<uint64_t, llvm::endianness::little,
1286                                     unaligned>(Ptr);
1287       const uint64_t NumFunctions =
1288           support::endian::readNext<uint64_t, llvm::endianness::little,
1289                                     unaligned>(Ptr);
1290       // Expect at least NumFunctions 64 bit fields
1291       if (Ptr + NumFunctions * sizeof(uint64_t) > PtrEnd)
1292         return error(instrprof_error::truncated);
1293       for (unsigned j = 0; j < NumFunctions; j++) {
1294         const uint64_t NameRef =
1295             support::endian::readNext<uint64_t, llvm::endianness::little,
1296                                       unaligned>(Ptr);
1297         Trace.FunctionNameRefs.push_back(NameRef);
1298       }
1299       TemporalProfTraces.push_back(std::move(Trace));
1300     }
1301   }
1302 
1303   // Load the remapping table now if requested.
1304   if (RemappingBuffer) {
1305     Remapper =
1306         std::make_unique<InstrProfReaderItaniumRemapper<OnDiskHashTableImplV3>>(
1307             std::move(RemappingBuffer), *IndexPtr);
1308     if (Error E = Remapper->populateRemappings())
1309       return E;
1310   } else {
1311     Remapper = std::make_unique<InstrProfReaderNullRemapper>(*IndexPtr);
1312   }
1313   Index = std::move(IndexPtr);
1314 
1315   return success();
1316 }
1317 
1318 InstrProfSymtab &IndexedInstrProfReader::getSymtab() {
1319   if (Symtab)
1320     return *Symtab;
1321 
1322   std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>();
1323   if (Error E = Index->populateSymtab(*NewSymtab)) {
1324     auto [ErrCode, Msg] = InstrProfError::take(std::move(E));
1325     consumeError(error(ErrCode, Msg));
1326   }
1327 
1328   Symtab = std::move(NewSymtab);
1329   return *Symtab;
1330 }
1331 
1332 Expected<InstrProfRecord> IndexedInstrProfReader::getInstrProfRecord(
1333     StringRef FuncName, uint64_t FuncHash, StringRef DeprecatedFuncName,
1334     uint64_t *MismatchedFuncSum) {
1335   ArrayRef<NamedInstrProfRecord> Data;
1336   uint64_t FuncSum = 0;
1337   auto Err = Remapper->getRecords(FuncName, Data);
1338   if (Err) {
1339     // If we don't find FuncName, try DeprecatedFuncName to handle profiles
1340     // built by older compilers.
1341     auto Err2 =
1342         handleErrors(std::move(Err), [&](const InstrProfError &IE) -> Error {
1343           if (IE.get() != instrprof_error::unknown_function)
1344             return make_error<InstrProfError>(IE);
1345           if (auto Err = Remapper->getRecords(DeprecatedFuncName, Data))
1346             return Err;
1347           return Error::success();
1348         });
1349     if (Err2)
1350       return std::move(Err2);
1351   }
1352   // Found it. Look for counters with the right hash.
1353 
1354   // A flag to indicate if the records are from the same type
1355   // of profile (i.e cs vs nocs).
1356   bool CSBitMatch = false;
1357   auto getFuncSum = [](const std::vector<uint64_t> &Counts) {
1358     uint64_t ValueSum = 0;
1359     for (uint64_t CountValue : Counts) {
1360       if (CountValue == (uint64_t)-1)
1361         continue;
1362       // Handle overflow -- if that happens, return max.
1363       if (std::numeric_limits<uint64_t>::max() - CountValue <= ValueSum)
1364         return std::numeric_limits<uint64_t>::max();
1365       ValueSum += CountValue;
1366     }
1367     return ValueSum;
1368   };
1369 
1370   for (const NamedInstrProfRecord &I : Data) {
1371     // Check for a match and fill the vector if there is one.
1372     if (I.Hash == FuncHash)
1373       return std::move(I);
1374     if (NamedInstrProfRecord::hasCSFlagInHash(I.Hash) ==
1375         NamedInstrProfRecord::hasCSFlagInHash(FuncHash)) {
1376       CSBitMatch = true;
1377       if (MismatchedFuncSum == nullptr)
1378         continue;
1379       FuncSum = std::max(FuncSum, getFuncSum(I.Counts));
1380     }
1381   }
1382   if (CSBitMatch) {
1383     if (MismatchedFuncSum != nullptr)
1384       *MismatchedFuncSum = FuncSum;
1385     return error(instrprof_error::hash_mismatch);
1386   }
1387   return error(instrprof_error::unknown_function);
1388 }
1389 
1390 Expected<memprof::MemProfRecord>
1391 IndexedInstrProfReader::getMemProfRecord(const uint64_t FuncNameHash) {
1392   // TODO: Add memprof specific errors.
1393   if (MemProfRecordTable == nullptr)
1394     return make_error<InstrProfError>(instrprof_error::invalid_prof,
1395                                       "no memprof data available in profile");
1396   auto Iter = MemProfRecordTable->find(FuncNameHash);
1397   if (Iter == MemProfRecordTable->end())
1398     return make_error<InstrProfError>(
1399         instrprof_error::unknown_function,
1400         "memprof record not found for function hash " + Twine(FuncNameHash));
1401 
1402   // Setup a callback to convert from frame ids to frame using the on-disk
1403   // FrameData hash table.
1404   memprof::FrameId LastUnmappedFrameId = 0;
1405   bool HasFrameMappingError = false;
1406   auto IdToFrameCallback = [&](const memprof::FrameId Id) {
1407     auto FrIter = MemProfFrameTable->find(Id);
1408     if (FrIter == MemProfFrameTable->end()) {
1409       LastUnmappedFrameId = Id;
1410       HasFrameMappingError = true;
1411       return memprof::Frame(0, 0, 0, false);
1412     }
1413     return *FrIter;
1414   };
1415 
1416   memprof::MemProfRecord Record(*Iter, IdToFrameCallback);
1417 
1418   // Check that all frame ids were successfully converted to frames.
1419   if (HasFrameMappingError) {
1420     return make_error<InstrProfError>(instrprof_error::hash_mismatch,
1421                                       "memprof frame not found for frame id " +
1422                                           Twine(LastUnmappedFrameId));
1423   }
1424   return Record;
1425 }
1426 
1427 Error IndexedInstrProfReader::getFunctionCounts(StringRef FuncName,
1428                                                 uint64_t FuncHash,
1429                                                 std::vector<uint64_t> &Counts) {
1430   Expected<InstrProfRecord> Record = getInstrProfRecord(FuncName, FuncHash);
1431   if (Error E = Record.takeError())
1432     return error(std::move(E));
1433 
1434   Counts = Record.get().Counts;
1435   return success();
1436 }
1437 
1438 Error IndexedInstrProfReader::getFunctionBitmapBytes(
1439     StringRef FuncName, uint64_t FuncHash, std::vector<uint8_t> &BitmapBytes) {
1440   Expected<InstrProfRecord> Record = getInstrProfRecord(FuncName, FuncHash);
1441   if (Error E = Record.takeError())
1442     return error(std::move(E));
1443 
1444   BitmapBytes = Record.get().BitmapBytes;
1445   return success();
1446 }
1447 
1448 Error IndexedInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) {
1449   ArrayRef<NamedInstrProfRecord> Data;
1450 
1451   Error E = Index->getRecords(Data);
1452   if (E)
1453     return error(std::move(E));
1454 
1455   Record = Data[RecordIndex++];
1456   if (RecordIndex >= Data.size()) {
1457     Index->advanceToNextKey();
1458     RecordIndex = 0;
1459   }
1460   return success();
1461 }
1462 
1463 Error IndexedInstrProfReader::readBinaryIds(
1464     std::vector<llvm::object::BuildID> &BinaryIds) {
1465   return readBinaryIdsInternal(*DataBuffer, BinaryIdsSize, BinaryIdsStart,
1466                                BinaryIds, llvm::endianness::little);
1467 }
1468 
1469 Error IndexedInstrProfReader::printBinaryIds(raw_ostream &OS) {
1470   std::vector<llvm::object::BuildID> BinaryIds;
1471   if (Error E = readBinaryIds(BinaryIds))
1472     return E;
1473   printBinaryIdsInternal(OS, BinaryIds);
1474   return Error::success();
1475 }
1476 
1477 void InstrProfReader::accumulateCounts(CountSumOrPercent &Sum, bool IsCS) {
1478   uint64_t NumFuncs = 0;
1479   for (const auto &Func : *this) {
1480     if (isIRLevelProfile()) {
1481       bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(Func.Hash);
1482       if (FuncIsCS != IsCS)
1483         continue;
1484     }
1485     Func.accumulateCounts(Sum);
1486     ++NumFuncs;
1487   }
1488   Sum.NumEntries = NumFuncs;
1489 }
1490