xref: /freebsd/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProfReader.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- InstrProfReader.h - Instrumented profiling readers -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for reading profiling data for instrumentation
10 // based PGO and coverage.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H
15 #define LLVM_PROFILEDATA_INSTRPROFREADER_H
16 
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/IR/ProfileSummary.h"
20 #include "llvm/Object/BuildID.h"
21 #include "llvm/ProfileData/DataAccessProf.h"
22 #include "llvm/ProfileData/InstrProf.h"
23 #include "llvm/ProfileData/InstrProfCorrelator.h"
24 #include "llvm/ProfileData/MemProf.h"
25 #include "llvm/ProfileData/MemProfSummary.h"
26 #include "llvm/ProfileData/MemProfYAML.h"
27 #include "llvm/Support/Compiler.h"
28 #include "llvm/Support/Endian.h"
29 #include "llvm/Support/Error.h"
30 #include "llvm/Support/LineIterator.h"
31 #include "llvm/Support/MathExtras.h"
32 #include "llvm/Support/MemoryBuffer.h"
33 #include "llvm/Support/OnDiskHashTable.h"
34 #include "llvm/Support/SwapByteOrder.h"
35 #include <algorithm>
36 #include <cassert>
37 #include <cstddef>
38 #include <cstdint>
39 #include <iterator>
40 #include <memory>
41 #include <utility>
42 #include <vector>
43 
44 namespace llvm {
45 
46 class InstrProfReader;
47 
48 namespace vfs {
49 class FileSystem;
50 } // namespace vfs
51 
52 /// A file format agnostic iterator over profiling data.
53 template <class record_type = NamedInstrProfRecord,
54           class reader_type = InstrProfReader>
55 class InstrProfIterator {
56 public:
57   using iterator_category = std::input_iterator_tag;
58   using value_type = record_type;
59   using difference_type = std::ptrdiff_t;
60   using pointer = value_type *;
61   using reference = value_type &;
62 
63 private:
64   reader_type *Reader = nullptr;
65   value_type Record;
66 
increment()67   void increment() {
68     if (Error E = Reader->readNextRecord(Record)) {
69       // Handle errors in the reader.
70       InstrProfError::take(std::move(E));
71       *this = InstrProfIterator();
72     }
73   }
74 
75 public:
76   InstrProfIterator() = default;
InstrProfIterator(reader_type * Reader)77   InstrProfIterator(reader_type *Reader) : Reader(Reader) { increment(); }
78 
79   InstrProfIterator &operator++() {
80     increment();
81     return *this;
82   }
83   bool operator==(const InstrProfIterator &RHS) const {
84     return Reader == RHS.Reader;
85   }
86   bool operator!=(const InstrProfIterator &RHS) const {
87     return Reader != RHS.Reader;
88   }
89   value_type &operator*() { return Record; }
90   value_type *operator->() { return &Record; }
91 };
92 
93 /// Base class and interface for reading profiling data of any known instrprof
94 /// format. Provides an iterator over NamedInstrProfRecords.
95 class InstrProfReader {
96   instrprof_error LastError = instrprof_error::success;
97   std::string LastErrorMsg;
98 
99 public:
100   InstrProfReader() = default;
101   virtual ~InstrProfReader() = default;
102 
103   /// Read the header.  Required before reading first record.
104   virtual Error readHeader() = 0;
105 
106   /// Read a single record.
107   virtual Error readNextRecord(NamedInstrProfRecord &Record) = 0;
108 
109   /// Read a list of binary ids.
readBinaryIds(std::vector<llvm::object::BuildID> & BinaryIds)110   virtual Error readBinaryIds(std::vector<llvm::object::BuildID> &BinaryIds) {
111     return success();
112   }
113 
114   /// Print binary ids.
printBinaryIds(raw_ostream & OS)115   virtual Error printBinaryIds(raw_ostream &OS) { return success(); };
116 
117   /// Iterator over profile data.
begin()118   InstrProfIterator<> begin() { return InstrProfIterator<>(this); }
end()119   InstrProfIterator<> end() { return InstrProfIterator<>(); }
120 
121   /// Return the profile version.
122   virtual uint64_t getVersion() const = 0;
123 
124   virtual bool isIRLevelProfile() const = 0;
125 
126   virtual bool hasCSIRLevelProfile() const = 0;
127 
128   virtual bool instrEntryBBEnabled() const = 0;
129 
130   /// Return true if the profile instruments all loop entries.
131   virtual bool instrLoopEntriesEnabled() const = 0;
132 
133   /// Return true if the profile has single byte counters representing coverage.
134   virtual bool hasSingleByteCoverage() const = 0;
135 
136   /// Return true if the profile only instruments function entries.
137   virtual bool functionEntryOnly() const = 0;
138 
139   /// Return true if profile includes a memory profile.
140   virtual bool hasMemoryProfile() const = 0;
141 
142   /// Return true if this has a temporal profile.
143   virtual bool hasTemporalProfile() const = 0;
144 
145   /// Returns a BitsetEnum describing the attributes of the profile. To check
146   /// individual attributes prefer using the helpers above.
147   virtual InstrProfKind getProfileKind() const = 0;
148 
149   /// Return the PGO symtab. There are three different readers:
150   /// Raw, Text, and Indexed profile readers. The first two types
151   /// of readers are used only by llvm-profdata tool, while the indexed
152   /// profile reader is also used by llvm-cov tool and the compiler (
153   /// backend or frontend). Since creating PGO symtab can create
154   /// significant runtime and memory overhead (as it touches data
155   /// for the whole program), InstrProfSymtab for the indexed profile
156   /// reader should be created on demand and it is recommended to be
157   /// only used for dumping purpose with llvm-proftool, not with the
158   /// compiler.
159   virtual InstrProfSymtab &getSymtab() = 0;
160 
161   /// Compute the sum of counts and return in Sum.
162   LLVM_ABI void accumulateCounts(CountSumOrPercent &Sum, bool IsCS);
163 
164 protected:
165   std::unique_ptr<InstrProfSymtab> Symtab;
166   /// A list of temporal profile traces.
167   SmallVector<TemporalProfTraceTy> TemporalProfTraces;
168   /// The total number of temporal profile traces seen.
169   uint64_t TemporalProfTraceStreamSize = 0;
170 
171   /// Set the current error and return same.
172   Error error(instrprof_error Err, const std::string &ErrMsg = "") {
173     LastError = Err;
174     LastErrorMsg = ErrMsg;
175     if (Err == instrprof_error::success)
176       return Error::success();
177     return make_error<InstrProfError>(Err, ErrMsg);
178   }
179 
error(Error && E)180   Error error(Error &&E) {
181     handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
182       LastError = IPE.get();
183       LastErrorMsg = IPE.getMessage();
184     });
185     return make_error<InstrProfError>(LastError, LastErrorMsg);
186   }
187 
188   /// Clear the current error and return a successful one.
success()189   Error success() { return error(instrprof_error::success); }
190 
191 public:
192   /// Return true if the reader has finished reading the profile data.
isEOF()193   bool isEOF() { return LastError == instrprof_error::eof; }
194 
195   /// Return true if the reader encountered an error reading profiling data.
hasError()196   bool hasError() { return LastError != instrprof_error::success && !isEOF(); }
197 
198   /// Get the current error.
getError()199   Error getError() {
200     if (hasError())
201       return make_error<InstrProfError>(LastError, LastErrorMsg);
202     return Error::success();
203   }
204 
205   /// Factory method to create an appropriately typed reader for the given
206   /// instrprof file.
207   LLVM_ABI static Expected<std::unique_ptr<InstrProfReader>> create(
208       const Twine &Path, vfs::FileSystem &FS,
209       const InstrProfCorrelator *Correlator = nullptr,
210       const object::BuildIDFetcher *BIDFetcher = nullptr,
211       const InstrProfCorrelator::ProfCorrelatorKind BIDFetcherCorrelatorKind =
212           InstrProfCorrelator::ProfCorrelatorKind::NONE,
213       std::function<void(Error)> Warn = nullptr);
214 
215   LLVM_ABI static Expected<std::unique_ptr<InstrProfReader>> create(
216       std::unique_ptr<MemoryBuffer> Buffer,
217       const InstrProfCorrelator *Correlator = nullptr,
218       const object::BuildIDFetcher *BIDFetcher = nullptr,
219       const InstrProfCorrelator::ProfCorrelatorKind BIDFetcherCorrelatorKind =
220           InstrProfCorrelator::ProfCorrelatorKind::NONE,
221       std::function<void(Error)> Warn = nullptr);
222 
223   /// \param Weight for raw profiles use this as the temporal profile trace
224   ///               weight
225   /// \returns a list of temporal profile traces.
226   virtual SmallVector<TemporalProfTraceTy> &
227   getTemporalProfTraces(std::optional<uint64_t> Weight = {}) {
228     // For non-raw profiles we ignore the input weight and instead use the
229     // weights already in the traces.
230     return TemporalProfTraces;
231   }
232   /// \returns the total number of temporal profile traces seen.
getTemporalProfTraceStreamSize()233   uint64_t getTemporalProfTraceStreamSize() {
234     return TemporalProfTraceStreamSize;
235   }
236 };
237 
238 /// Reader for the simple text based instrprof format.
239 ///
240 /// This format is a simple text format that's suitable for test data. Records
241 /// are separated by one or more blank lines, and record fields are separated by
242 /// new lines.
243 ///
244 /// Each record consists of a function name, a function hash, a number of
245 /// counters, and then each counter value, in that order.
246 class LLVM_ABI TextInstrProfReader : public InstrProfReader {
247 private:
248   /// The profile data file contents.
249   std::unique_ptr<MemoryBuffer> DataBuffer;
250   /// Iterator over the profile data.
251   line_iterator Line;
252   /// The attributes of the current profile.
253   InstrProfKind ProfileKind = InstrProfKind::Unknown;
254 
255   Error readValueProfileData(InstrProfRecord &Record);
256 
257   Error readTemporalProfTraceData();
258 
259 public:
TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)260   TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)
261       : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {}
262   TextInstrProfReader(const TextInstrProfReader &) = delete;
263   TextInstrProfReader &operator=(const TextInstrProfReader &) = delete;
264 
265   /// Return true if the given buffer is in text instrprof format.
266   static bool hasFormat(const MemoryBuffer &Buffer);
267 
268   // Text format does not have version, so return 0.
getVersion()269   uint64_t getVersion() const override { return 0; }
270 
isIRLevelProfile()271   bool isIRLevelProfile() const override {
272     return static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation);
273   }
274 
hasCSIRLevelProfile()275   bool hasCSIRLevelProfile() const override {
276     return static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive);
277   }
278 
instrEntryBBEnabled()279   bool instrEntryBBEnabled() const override {
280     return static_cast<bool>(ProfileKind &
281                              InstrProfKind::FunctionEntryInstrumentation);
282   }
283 
instrLoopEntriesEnabled()284   bool instrLoopEntriesEnabled() const override {
285     return static_cast<bool>(ProfileKind &
286                              InstrProfKind::LoopEntriesInstrumentation);
287   }
288 
hasSingleByteCoverage()289   bool hasSingleByteCoverage() const override {
290     return static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage);
291   }
292 
functionEntryOnly()293   bool functionEntryOnly() const override {
294     return static_cast<bool>(ProfileKind & InstrProfKind::FunctionEntryOnly);
295   }
296 
hasMemoryProfile()297   bool hasMemoryProfile() const override {
298     // TODO: Add support for text format memory profiles.
299     return false;
300   }
301 
hasTemporalProfile()302   bool hasTemporalProfile() const override {
303     return static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile);
304   }
305 
getProfileKind()306   InstrProfKind getProfileKind() const override { return ProfileKind; }
307 
308   /// Read the header.
309   Error readHeader() override;
310 
311   /// Read a single record.
312   Error readNextRecord(NamedInstrProfRecord &Record) override;
313 
getSymtab()314   InstrProfSymtab &getSymtab() override {
315     assert(Symtab);
316     return *Symtab;
317   }
318 };
319 
320 /// Reader for the raw instrprof binary format from runtime.
321 ///
322 /// This format is a raw memory dump of the instrumentation-based profiling data
323 /// from the runtime.  It has no index.
324 ///
325 /// Templated on the unsigned type whose size matches pointers on the platform
326 /// that wrote the profile.
327 template <class IntPtrT>
328 class RawInstrProfReader : public InstrProfReader {
329 private:
330   /// The profile data file contents.
331   std::unique_ptr<MemoryBuffer> DataBuffer;
332   /// If available, this hold the ProfileData array used to correlate raw
333   /// instrumentation data to their functions.
334   const InstrProfCorrelatorImpl<IntPtrT> *Correlator;
335   /// Fetches debuginfo by build id to correlate profiles.
336   const object::BuildIDFetcher *BIDFetcher;
337   /// Correlates profiles with build id fetcher by fetching debuginfo with build
338   /// ID.
339   std::unique_ptr<InstrProfCorrelator> BIDFetcherCorrelator;
340   /// Indicates if should use debuginfo or binary to correlate with build id
341   /// fetcher.
342   InstrProfCorrelator::ProfCorrelatorKind BIDFetcherCorrelatorKind;
343   /// A list of timestamps paired with a function name reference.
344   std::vector<std::pair<uint64_t, uint64_t>> TemporalProfTimestamps;
345   bool ShouldSwapBytes;
346   // The value of the version field of the raw profile data header. The lower 32
347   // bits specifies the format version and the most significant 32 bits specify
348   // the variant types of the profile.
349   uint64_t Version;
350   uint64_t CountersDelta;
351   uint64_t BitmapDelta;
352   uint64_t NamesDelta;
353   const RawInstrProf::ProfileData<IntPtrT> *Data;
354   const RawInstrProf::ProfileData<IntPtrT> *DataEnd;
355   const RawInstrProf::VTableProfileData<IntPtrT> *VTableBegin = nullptr;
356   const RawInstrProf::VTableProfileData<IntPtrT> *VTableEnd = nullptr;
357   const char *CountersStart;
358   const char *CountersEnd;
359   const char *BitmapStart;
360   const char *BitmapEnd;
361   const char *NamesStart;
362   const char *NamesEnd;
363   const char *VNamesStart = nullptr;
364   const char *VNamesEnd = nullptr;
365   // After value profile is all read, this pointer points to
366   // the header of next profile data (if exists)
367   const uint8_t *ValueDataStart;
368   uint32_t ValueKindLast;
369   uint32_t CurValueDataSize;
370   std::vector<llvm::object::BuildID> BinaryIds;
371 
372   std::function<void(Error)> Warn;
373 
374   /// Maxium counter value 2^56.
375   static const uint64_t MaxCounterValue = (1ULL << 56);
376 
377 public:
RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer,const InstrProfCorrelator * Correlator,const object::BuildIDFetcher * BIDFetcher,const InstrProfCorrelator::ProfCorrelatorKind BIDFetcherCorrelatorKind,std::function<void (Error)> Warn)378   RawInstrProfReader(
379       std::unique_ptr<MemoryBuffer> DataBuffer,
380       const InstrProfCorrelator *Correlator,
381       const object::BuildIDFetcher *BIDFetcher,
382       const InstrProfCorrelator::ProfCorrelatorKind BIDFetcherCorrelatorKind,
383       std::function<void(Error)> Warn)
384       : DataBuffer(std::move(DataBuffer)),
385         Correlator(dyn_cast_or_null<const InstrProfCorrelatorImpl<IntPtrT>>(
386             Correlator)),
387         BIDFetcher(BIDFetcher),
388         BIDFetcherCorrelatorKind(BIDFetcherCorrelatorKind), Warn(Warn) {}
389 
390   RawInstrProfReader(const RawInstrProfReader &) = delete;
391   RawInstrProfReader &operator=(const RawInstrProfReader &) = delete;
392 
393   static bool hasFormat(const MemoryBuffer &DataBuffer);
394   Error readHeader() override;
395   Error readNextRecord(NamedInstrProfRecord &Record) override;
396   Error readBinaryIds(std::vector<llvm::object::BuildID> &BinaryIds) override;
397   Error printBinaryIds(raw_ostream &OS) override;
398 
getVersion()399   uint64_t getVersion() const override { return Version; }
400 
isIRLevelProfile()401   bool isIRLevelProfile() const override {
402     return (Version & VARIANT_MASK_IR_PROF) != 0;
403   }
404 
hasCSIRLevelProfile()405   bool hasCSIRLevelProfile() const override {
406     return (Version & VARIANT_MASK_CSIR_PROF) != 0;
407   }
408 
instrEntryBBEnabled()409   bool instrEntryBBEnabled() const override {
410     return (Version & VARIANT_MASK_INSTR_ENTRY) != 0;
411   }
412 
instrLoopEntriesEnabled()413   bool instrLoopEntriesEnabled() const override {
414     return (Version & VARIANT_MASK_INSTR_LOOP_ENTRIES) != 0;
415   }
416 
hasSingleByteCoverage()417   bool hasSingleByteCoverage() const override {
418     return (Version & VARIANT_MASK_BYTE_COVERAGE) != 0;
419   }
420 
functionEntryOnly()421   bool functionEntryOnly() const override {
422     return (Version & VARIANT_MASK_FUNCTION_ENTRY_ONLY) != 0;
423   }
424 
hasMemoryProfile()425   bool hasMemoryProfile() const override {
426     // Memory profiles have a separate raw format, so this should never be set.
427     assert(!(Version & VARIANT_MASK_MEMPROF));
428     return false;
429   }
430 
hasTemporalProfile()431   bool hasTemporalProfile() const override {
432     return (Version & VARIANT_MASK_TEMPORAL_PROF) != 0;
433   }
434 
435   /// Returns a BitsetEnum describing the attributes of the raw instr profile.
436   InstrProfKind getProfileKind() const override;
437 
getSymtab()438   InstrProfSymtab &getSymtab() override {
439     assert(Symtab.get());
440     return *Symtab.get();
441   }
442 
443   SmallVector<TemporalProfTraceTy> &
444   getTemporalProfTraces(std::optional<uint64_t> Weight = {}) override;
445 
446 private:
447   Error createSymtab(InstrProfSymtab &Symtab);
448   Error readNextHeader(const char *CurrentPos);
449   Error readHeader(const RawInstrProf::Header &Header);
450 
swap(IntT Int)451   template <class IntT> IntT swap(IntT Int) const {
452     return ShouldSwapBytes ? llvm::byteswap(Int) : Int;
453   }
454 
getDataEndianness()455   llvm::endianness getDataEndianness() const {
456     if (!ShouldSwapBytes)
457       return llvm::endianness::native;
458     if (llvm::endianness::native == llvm::endianness::little)
459       return llvm::endianness::big;
460     else
461       return llvm::endianness::little;
462   }
463 
getNumPaddingBytes(uint64_t SizeInBytes)464   inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) {
465     return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t));
466   }
467 
468   Error readName(NamedInstrProfRecord &Record);
469   Error readFuncHash(NamedInstrProfRecord &Record);
470   Error readRawCounts(InstrProfRecord &Record);
471   Error readRawBitmapBytes(InstrProfRecord &Record);
472   Error readValueProfilingData(InstrProfRecord &Record);
atEnd()473   bool atEnd() const { return Data == DataEnd; }
474 
advanceData()475   void advanceData() {
476     // `CountersDelta` is a constant zero when using debug info correlation.
477     if (!Correlator && !BIDFetcherCorrelator) {
478       // The initial CountersDelta is the in-memory address difference between
479       // the data and counts sections:
480       // start(__llvm_prf_cnts) - start(__llvm_prf_data)
481       // As we advance to the next record, we maintain the correct CountersDelta
482       // with respect to the next record.
483       CountersDelta -= sizeof(*Data);
484       BitmapDelta -= sizeof(*Data);
485     }
486     Data++;
487     ValueDataStart += CurValueDataSize;
488   }
489 
getNextHeaderPos()490   const char *getNextHeaderPos() const {
491       assert(atEnd());
492       return (const char *)ValueDataStart;
493   }
494 
getName(uint64_t NameRef)495   StringRef getName(uint64_t NameRef) const {
496     return Symtab->getFuncOrVarName(swap(NameRef));
497   }
498 
getCounterTypeSize()499   int getCounterTypeSize() const {
500     return hasSingleByteCoverage() ? sizeof(uint8_t) : sizeof(uint64_t);
501   }
502 };
503 
504 using RawInstrProfReader32 = RawInstrProfReader<uint32_t>;
505 using RawInstrProfReader64 = RawInstrProfReader<uint64_t>;
506 
507 namespace IndexedInstrProf {
508 
509 enum class HashT : uint32_t;
510 
511 } // end namespace IndexedInstrProf
512 
513 /// Trait for lookups into the on-disk hash table for the binary instrprof
514 /// format.
515 class InstrProfLookupTrait {
516   std::vector<NamedInstrProfRecord> DataBuffer;
517   IndexedInstrProf::HashT HashType;
518   unsigned FormatVersion;
519   // Endianness of the input value profile data.
520   // It should be LE by default, but can be changed
521   // for testing purpose.
522   llvm::endianness ValueProfDataEndianness = llvm::endianness::little;
523 
524 public:
InstrProfLookupTrait(IndexedInstrProf::HashT HashType,unsigned FormatVersion)525   InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion)
526       : HashType(HashType), FormatVersion(FormatVersion) {}
527 
528   using data_type = ArrayRef<NamedInstrProfRecord>;
529 
530   using internal_key_type = StringRef;
531   using external_key_type = StringRef;
532   using hash_value_type = uint64_t;
533   using offset_type = uint64_t;
534 
EqualKey(StringRef A,StringRef B)535   static bool EqualKey(StringRef A, StringRef B) { return A == B; }
GetInternalKey(StringRef K)536   static StringRef GetInternalKey(StringRef K) { return K; }
GetExternalKey(StringRef K)537   static StringRef GetExternalKey(StringRef K) { return K; }
538 
539   LLVM_ABI hash_value_type ComputeHash(StringRef K);
540 
541   static std::pair<offset_type, offset_type>
ReadKeyDataLength(const unsigned char * & D)542   ReadKeyDataLength(const unsigned char *&D) {
543     using namespace support;
544 
545     offset_type KeyLen =
546         endian::readNext<offset_type, llvm::endianness::little>(D);
547     offset_type DataLen =
548         endian::readNext<offset_type, llvm::endianness::little>(D);
549     return std::make_pair(KeyLen, DataLen);
550   }
551 
ReadKey(const unsigned char * D,offset_type N)552   StringRef ReadKey(const unsigned char *D, offset_type N) {
553     return StringRef((const char *)D, N);
554   }
555 
556   LLVM_ABI bool readValueProfilingData(const unsigned char *&D,
557                                        const unsigned char *const End);
558   LLVM_ABI data_type ReadData(StringRef K, const unsigned char *D,
559                               offset_type N);
560 
561   // Used for testing purpose only.
setValueProfDataEndianness(llvm::endianness Endianness)562   void setValueProfDataEndianness(llvm::endianness Endianness) {
563     ValueProfDataEndianness = Endianness;
564   }
565 };
566 
567 struct InstrProfReaderIndexBase {
568   virtual ~InstrProfReaderIndexBase() = default;
569 
570   // Read all the profile records with the same key pointed to the current
571   // iterator.
572   virtual Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) = 0;
573 
574   // Read all the profile records with the key equal to FuncName
575   virtual Error getRecords(StringRef FuncName,
576                                      ArrayRef<NamedInstrProfRecord> &Data) = 0;
577   virtual void advanceToNextKey() = 0;
578   virtual bool atEnd() const = 0;
579   virtual void setValueProfDataEndianness(llvm::endianness Endianness) = 0;
580   virtual uint64_t getVersion() const = 0;
581   virtual bool isIRLevelProfile() const = 0;
582   virtual bool hasCSIRLevelProfile() const = 0;
583   virtual bool instrEntryBBEnabled() const = 0;
584   virtual bool instrLoopEntriesEnabled() const = 0;
585   virtual bool hasSingleByteCoverage() const = 0;
586   virtual bool functionEntryOnly() const = 0;
587   virtual bool hasMemoryProfile() const = 0;
588   virtual bool hasTemporalProfile() const = 0;
589   virtual InstrProfKind getProfileKind() const = 0;
590   virtual Error populateSymtab(InstrProfSymtab &) = 0;
591 };
592 
593 using OnDiskHashTableImplV3 =
594     OnDiskIterableChainedHashTable<InstrProfLookupTrait>;
595 
596 using MemProfRecordHashTable =
597     OnDiskIterableChainedHashTable<memprof::RecordLookupTrait>;
598 using MemProfFrameHashTable =
599     OnDiskIterableChainedHashTable<memprof::FrameLookupTrait>;
600 using MemProfCallStackHashTable =
601     OnDiskIterableChainedHashTable<memprof::CallStackLookupTrait>;
602 
603 template <typename HashTableImpl>
604 class InstrProfReaderItaniumRemapper;
605 
606 template <typename HashTableImpl>
607 class InstrProfReaderIndex : public InstrProfReaderIndexBase {
608 private:
609   std::unique_ptr<HashTableImpl> HashTable;
610   typename HashTableImpl::data_iterator RecordIterator;
611   uint64_t FormatVersion;
612 
613   friend class InstrProfReaderItaniumRemapper<HashTableImpl>;
614 
615 public:
616   InstrProfReaderIndex(const unsigned char *Buckets,
617                        const unsigned char *const Payload,
618                        const unsigned char *const Base,
619                        IndexedInstrProf::HashT HashType, uint64_t Version);
620   ~InstrProfReaderIndex() override = default;
621 
622   Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) override;
623   Error getRecords(StringRef FuncName,
624                    ArrayRef<NamedInstrProfRecord> &Data) override;
advanceToNextKey()625   void advanceToNextKey() override { RecordIterator++; }
626 
atEnd()627   bool atEnd() const override {
628     return RecordIterator == HashTable->data_end();
629   }
630 
setValueProfDataEndianness(llvm::endianness Endianness)631   void setValueProfDataEndianness(llvm::endianness Endianness) override {
632     HashTable->getInfoObj().setValueProfDataEndianness(Endianness);
633   }
634 
getVersion()635   uint64_t getVersion() const override { return GET_VERSION(FormatVersion); }
636 
isIRLevelProfile()637   bool isIRLevelProfile() const override {
638     return (FormatVersion & VARIANT_MASK_IR_PROF) != 0;
639   }
640 
hasCSIRLevelProfile()641   bool hasCSIRLevelProfile() const override {
642     return (FormatVersion & VARIANT_MASK_CSIR_PROF) != 0;
643   }
644 
instrEntryBBEnabled()645   bool instrEntryBBEnabled() const override {
646     return (FormatVersion & VARIANT_MASK_INSTR_ENTRY) != 0;
647   }
648 
instrLoopEntriesEnabled()649   bool instrLoopEntriesEnabled() const override {
650     return (FormatVersion & VARIANT_MASK_INSTR_LOOP_ENTRIES) != 0;
651   }
652 
hasSingleByteCoverage()653   bool hasSingleByteCoverage() const override {
654     return (FormatVersion & VARIANT_MASK_BYTE_COVERAGE) != 0;
655   }
656 
functionEntryOnly()657   bool functionEntryOnly() const override {
658     return (FormatVersion & VARIANT_MASK_FUNCTION_ENTRY_ONLY) != 0;
659   }
660 
hasMemoryProfile()661   bool hasMemoryProfile() const override {
662     return (FormatVersion & VARIANT_MASK_MEMPROF) != 0;
663   }
664 
hasTemporalProfile()665   bool hasTemporalProfile() const override {
666     return (FormatVersion & VARIANT_MASK_TEMPORAL_PROF) != 0;
667   }
668 
669   InstrProfKind getProfileKind() const override;
670 
populateSymtab(InstrProfSymtab & Symtab)671   Error populateSymtab(InstrProfSymtab &Symtab) override {
672     // FIXME: the create method calls 'finalizeSymtab' and sorts a bunch of
673     // arrays/maps. Since there are other data sources other than 'HashTable' to
674     // populate a symtab, it might make sense to have something like this
675     // 1. Let each data source populate Symtab and init the arrays/maps without
676     // calling 'finalizeSymtab'
677     // 2. Call 'finalizeSymtab' once to get all arrays/maps sorted if needed.
678     return Symtab.create(HashTable->keys());
679   }
680 };
681 
682 /// Name matcher supporting fuzzy matching of symbol names to names in profiles.
683 class InstrProfReaderRemapper {
684 public:
685   virtual ~InstrProfReaderRemapper() = default;
populateRemappings()686   virtual Error populateRemappings() { return Error::success(); }
687   virtual Error getRecords(StringRef FuncName,
688                            ArrayRef<NamedInstrProfRecord> &Data) = 0;
689 };
690 
691 class IndexedMemProfReader {
692 private:
693   /// The MemProf version.
694   memprof::IndexedVersion Version =
695       static_cast<memprof::IndexedVersion>(memprof::MinimumSupportedVersion);
696   /// MemProf summary (if available, version >= 4).
697   std::unique_ptr<memprof::MemProfSummary> MemProfSum;
698   /// MemProf profile schema (if available).
699   memprof::MemProfSchema Schema;
700   /// MemProf record profile data on-disk indexed via llvm::md5(FunctionName).
701   std::unique_ptr<MemProfRecordHashTable> MemProfRecordTable;
702   /// MemProf frame profile data on-disk indexed via frame id.
703   std::unique_ptr<MemProfFrameHashTable> MemProfFrameTable;
704   /// MemProf call stack data on-disk indexed via call stack id.
705   std::unique_ptr<MemProfCallStackHashTable> MemProfCallStackTable;
706   /// The starting address of the frame array.
707   const unsigned char *FrameBase = nullptr;
708   /// The starting address of the call stack array.
709   const unsigned char *CallStackBase = nullptr;
710   // The number of elements in the radix tree array.
711   unsigned RadixTreeSize = 0;
712   /// The data access profiles, deserialized from binary data.
713   std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData;
714 
715   Error deserializeV2(const unsigned char *Start, const unsigned char *Ptr);
716   Error deserializeRadixTreeBased(const unsigned char *Start,
717                                   const unsigned char *Ptr,
718                                   memprof::IndexedVersion Version);
719 
720 public:
721   IndexedMemProfReader() = default;
722 
723   LLVM_ABI Error deserialize(const unsigned char *Start,
724                              uint64_t MemProfOffset);
725 
726   LLVM_ABI Expected<memprof::MemProfRecord>
727   getMemProfRecord(const uint64_t FuncNameHash) const;
728 
729   LLVM_ABI DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>>
730   getMemProfCallerCalleePairs() const;
731 
732   // Return the entire MemProf profile.
733   LLVM_ABI memprof::AllMemProfData getAllMemProfData() const;
734 
getSummary()735   memprof::MemProfSummary *getSummary() const { return MemProfSum.get(); }
736 };
737 
738 /// Reader for the indexed binary instrprof format.
739 class LLVM_ABI IndexedInstrProfReader : public InstrProfReader {
740 private:
741   /// The profile data file contents.
742   std::unique_ptr<MemoryBuffer> DataBuffer;
743   /// The profile remapping file contents.
744   std::unique_ptr<MemoryBuffer> RemappingBuffer;
745   /// The index into the profile data.
746   std::unique_ptr<InstrProfReaderIndexBase> Index;
747   /// The profile remapping file contents.
748   std::unique_ptr<InstrProfReaderRemapper> Remapper;
749   /// Profile summary data.
750   std::unique_ptr<ProfileSummary> Summary;
751   /// Context sensitive profile summary data.
752   std::unique_ptr<ProfileSummary> CS_Summary;
753   IndexedMemProfReader MemProfReader;
754   /// The compressed vtable names, to be used for symtab construction.
755   /// A compiler that reads indexed profiles could construct symtab from module
756   /// IR so it doesn't need the decompressed names.
757   StringRef VTableName;
758   /// A memory buffer holding binary ids.
759   ArrayRef<uint8_t> BinaryIdsBuffer;
760 
761   // Index to the current record in the record array.
762   unsigned RecordIndex = 0;
763 
764   // Read the profile summary. Return a pointer pointing to one byte past the
765   // end of the summary data if it exists or the input \c Cur.
766   // \c UseCS indicates whether to use the context-sensitive profile summary.
767   const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version,
768                                    const unsigned char *Cur, bool UseCS);
769 
770 public:
771   IndexedInstrProfReader(
772       std::unique_ptr<MemoryBuffer> DataBuffer,
773       std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr)
DataBuffer(std::move (DataBuffer))774       : DataBuffer(std::move(DataBuffer)),
775         RemappingBuffer(std::move(RemappingBuffer)) {}
776   IndexedInstrProfReader(const IndexedInstrProfReader &) = delete;
777   IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete;
778 
779   /// Return the profile version.
getVersion()780   uint64_t getVersion() const override { return Index->getVersion(); }
isIRLevelProfile()781   bool isIRLevelProfile() const override { return Index->isIRLevelProfile(); }
hasCSIRLevelProfile()782   bool hasCSIRLevelProfile() const override {
783     return Index->hasCSIRLevelProfile();
784   }
785 
instrEntryBBEnabled()786   bool instrEntryBBEnabled() const override {
787     return Index->instrEntryBBEnabled();
788   }
789 
instrLoopEntriesEnabled()790   bool instrLoopEntriesEnabled() const override {
791     return Index->instrLoopEntriesEnabled();
792   }
793 
hasSingleByteCoverage()794   bool hasSingleByteCoverage() const override {
795     return Index->hasSingleByteCoverage();
796   }
797 
functionEntryOnly()798   bool functionEntryOnly() const override { return Index->functionEntryOnly(); }
799 
hasMemoryProfile()800   bool hasMemoryProfile() const override { return Index->hasMemoryProfile(); }
801 
hasTemporalProfile()802   bool hasTemporalProfile() const override {
803     return Index->hasTemporalProfile();
804   }
805 
806   /// Returns a BitsetEnum describing the attributes of the indexed instr
807   /// profile.
getProfileKind()808   InstrProfKind getProfileKind() const override {
809     return Index->getProfileKind();
810   }
811 
812   /// Return true if the given buffer is in an indexed instrprof format.
813   static bool hasFormat(const MemoryBuffer &DataBuffer);
814 
815   /// Read the file header.
816   Error readHeader() override;
817   /// Read a single record.
818   Error readNextRecord(NamedInstrProfRecord &Record) override;
819 
820   /// Return the NamedInstrProfRecord associated with FuncName and FuncHash.
821   /// When return a hash_mismatch error and MismatchedFuncSum is not nullptr,
822   /// the sum of all counters in the mismatched function will be set to
823   /// MismatchedFuncSum. If there are multiple instances of mismatched
824   /// functions, MismatchedFuncSum returns the maximum. If \c FuncName is not
825   /// found, try to lookup \c DeprecatedFuncName to handle profiles built by
826   /// older compilers.
827   Expected<NamedInstrProfRecord>
828   getInstrProfRecord(StringRef FuncName, uint64_t FuncHash,
829                      StringRef DeprecatedFuncName = "",
830                      uint64_t *MismatchedFuncSum = nullptr);
831 
832   /// Return the memprof record for the function identified by
833   /// llvm::md5(Name).
getMemProfRecord(uint64_t FuncNameHash)834   Expected<memprof::MemProfRecord> getMemProfRecord(uint64_t FuncNameHash) {
835     return MemProfReader.getMemProfRecord(FuncNameHash);
836   }
837 
838   DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>>
getMemProfCallerCalleePairs()839   getMemProfCallerCalleePairs() {
840     return MemProfReader.getMemProfCallerCalleePairs();
841   }
842 
getAllMemProfData()843   memprof::AllMemProfData getAllMemProfData() const {
844     return MemProfReader.getAllMemProfData();
845   }
846 
847   /// Fill Counts with the profile data for the given function name.
848   Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
849                           std::vector<uint64_t> &Counts);
850 
851   /// Fill Bitmap with the profile data for the given function name.
852   Error getFunctionBitmap(StringRef FuncName, uint64_t FuncHash,
853                           BitVector &Bitmap);
854 
855   /// Return the maximum of all known function counts.
856   /// \c UseCS indicates whether to use the context-sensitive count.
getMaximumFunctionCount(bool UseCS)857   uint64_t getMaximumFunctionCount(bool UseCS) {
858     if (UseCS) {
859       assert(CS_Summary && "No context sensitive profile summary");
860       return CS_Summary->getMaxFunctionCount();
861     } else {
862       assert(Summary && "No profile summary");
863       return Summary->getMaxFunctionCount();
864     }
865   }
866 
867   /// Factory method to create an indexed reader.
868   static Expected<std::unique_ptr<IndexedInstrProfReader>>
869   create(const Twine &Path, vfs::FileSystem &FS,
870          const Twine &RemappingPath = "");
871 
872   static Expected<std::unique_ptr<IndexedInstrProfReader>>
873   create(std::unique_ptr<MemoryBuffer> Buffer,
874          std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr);
875 
876   // Used for testing purpose only.
setValueProfDataEndianness(llvm::endianness Endianness)877   void setValueProfDataEndianness(llvm::endianness Endianness) {
878     Index->setValueProfDataEndianness(Endianness);
879   }
880 
881   // See description in the base class. This interface is designed
882   // to be used by llvm-profdata (for dumping). Avoid using this when
883   // the client is the compiler.
884   InstrProfSymtab &getSymtab() override;
885 
886   /// Return the profile summary.
887   /// \c UseCS indicates whether to use the context-sensitive summary.
getSummary(bool UseCS)888   ProfileSummary &getSummary(bool UseCS) {
889     if (UseCS) {
890       assert(CS_Summary && "No context sensitive summary");
891       return *CS_Summary;
892     } else {
893       assert(Summary && "No profile summary");
894       return *Summary;
895     }
896   }
897 
898   /// Return the MemProf summary. Will be null if unavailable (version < 4).
getMemProfSummary()899   memprof::MemProfSummary *getMemProfSummary() const {
900     return MemProfReader.getSummary();
901   }
902 
903   Error readBinaryIds(std::vector<llvm::object::BuildID> &BinaryIds) override;
904   Error printBinaryIds(raw_ostream &OS) override;
905 };
906 
907 } // end namespace llvm
908 
909 #endif // LLVM_PROFILEDATA_INSTRPROFREADER_H
910