xref: /freebsd/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProfReader.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===- InstrProfReader.h - Instrumented profiling readers -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for reading profiling data for instrumentation
10 // based PGO and coverage.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H
15 #define LLVM_PROFILEDATA_INSTRPROFREADER_H
16 
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/IR/ProfileSummary.h"
20 #include "llvm/Object/BuildID.h"
21 #include "llvm/ProfileData/InstrProf.h"
22 #include "llvm/ProfileData/InstrProfCorrelator.h"
23 #include "llvm/ProfileData/MemProf.h"
24 #include "llvm/Support/Endian.h"
25 #include "llvm/Support/Error.h"
26 #include "llvm/Support/LineIterator.h"
27 #include "llvm/Support/MathExtras.h"
28 #include "llvm/Support/MemoryBuffer.h"
29 #include "llvm/Support/OnDiskHashTable.h"
30 #include "llvm/Support/SwapByteOrder.h"
31 #include <algorithm>
32 #include <cassert>
33 #include <cstddef>
34 #include <cstdint>
35 #include <iterator>
36 #include <memory>
37 #include <utility>
38 #include <vector>
39 
40 namespace llvm {
41 
42 class InstrProfReader;
43 
44 namespace vfs {
45 class FileSystem;
46 } // namespace vfs
47 
48 /// A file format agnostic iterator over profiling data.
49 template <class record_type = NamedInstrProfRecord,
50           class reader_type = InstrProfReader>
51 class InstrProfIterator {
52 public:
53   using iterator_category = std::input_iterator_tag;
54   using value_type = record_type;
55   using difference_type = std::ptrdiff_t;
56   using pointer = value_type *;
57   using reference = value_type &;
58 
59 private:
60   reader_type *Reader = nullptr;
61   value_type Record;
62 
increment()63   void increment() {
64     if (Error E = Reader->readNextRecord(Record)) {
65       // Handle errors in the reader.
66       InstrProfError::take(std::move(E));
67       *this = InstrProfIterator();
68     }
69   }
70 
71 public:
72   InstrProfIterator() = default;
InstrProfIterator(reader_type * Reader)73   InstrProfIterator(reader_type *Reader) : Reader(Reader) { increment(); }
74 
75   InstrProfIterator &operator++() {
76     increment();
77     return *this;
78   }
79   bool operator==(const InstrProfIterator &RHS) const {
80     return Reader == RHS.Reader;
81   }
82   bool operator!=(const InstrProfIterator &RHS) const {
83     return Reader != RHS.Reader;
84   }
85   value_type &operator*() { return Record; }
86   value_type *operator->() { return &Record; }
87 };
88 
89 /// Base class and interface for reading profiling data of any known instrprof
90 /// format. Provides an iterator over NamedInstrProfRecords.
91 class InstrProfReader {
92   instrprof_error LastError = instrprof_error::success;
93   std::string LastErrorMsg;
94 
95 public:
96   InstrProfReader() = default;
97   virtual ~InstrProfReader() = default;
98 
99   /// Read the header.  Required before reading first record.
100   virtual Error readHeader() = 0;
101 
102   /// Read a single record.
103   virtual Error readNextRecord(NamedInstrProfRecord &Record) = 0;
104 
105   /// Read a list of binary ids.
readBinaryIds(std::vector<llvm::object::BuildID> & BinaryIds)106   virtual Error readBinaryIds(std::vector<llvm::object::BuildID> &BinaryIds) {
107     return success();
108   }
109 
110   /// Print binary ids.
printBinaryIds(raw_ostream & OS)111   virtual Error printBinaryIds(raw_ostream &OS) { return success(); };
112 
113   /// Iterator over profile data.
begin()114   InstrProfIterator<> begin() { return InstrProfIterator<>(this); }
end()115   InstrProfIterator<> end() { return InstrProfIterator<>(); }
116 
117   /// Return the profile version.
118   virtual uint64_t getVersion() const = 0;
119 
120   virtual bool isIRLevelProfile() const = 0;
121 
122   virtual bool hasCSIRLevelProfile() const = 0;
123 
124   virtual bool instrEntryBBEnabled() const = 0;
125 
126   /// Return true if the profile has single byte counters representing coverage.
127   virtual bool hasSingleByteCoverage() const = 0;
128 
129   /// Return true if the profile only instruments function entries.
130   virtual bool functionEntryOnly() const = 0;
131 
132   /// Return true if profile includes a memory profile.
133   virtual bool hasMemoryProfile() const = 0;
134 
135   /// Return true if this has a temporal profile.
136   virtual bool hasTemporalProfile() const = 0;
137 
138   /// Returns a BitsetEnum describing the attributes of the profile. To check
139   /// individual attributes prefer using the helpers above.
140   virtual InstrProfKind getProfileKind() const = 0;
141 
142   /// Return the PGO symtab. There are three different readers:
143   /// Raw, Text, and Indexed profile readers. The first two types
144   /// of readers are used only by llvm-profdata tool, while the indexed
145   /// profile reader is also used by llvm-cov tool and the compiler (
146   /// backend or frontend). Since creating PGO symtab can create
147   /// significant runtime and memory overhead (as it touches data
148   /// for the whole program), InstrProfSymtab for the indexed profile
149   /// reader should be created on demand and it is recommended to be
150   /// only used for dumping purpose with llvm-proftool, not with the
151   /// compiler.
152   virtual InstrProfSymtab &getSymtab() = 0;
153 
154   /// Compute the sum of counts and return in Sum.
155   void accumulateCounts(CountSumOrPercent &Sum, bool IsCS);
156 
157 protected:
158   std::unique_ptr<InstrProfSymtab> Symtab;
159   /// A list of temporal profile traces.
160   SmallVector<TemporalProfTraceTy> TemporalProfTraces;
161   /// The total number of temporal profile traces seen.
162   uint64_t TemporalProfTraceStreamSize = 0;
163 
164   /// Set the current error and return same.
165   Error error(instrprof_error Err, const std::string &ErrMsg = "") {
166     LastError = Err;
167     LastErrorMsg = ErrMsg;
168     if (Err == instrprof_error::success)
169       return Error::success();
170     return make_error<InstrProfError>(Err, ErrMsg);
171   }
172 
error(Error && E)173   Error error(Error &&E) {
174     handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
175       LastError = IPE.get();
176       LastErrorMsg = IPE.getMessage();
177     });
178     return make_error<InstrProfError>(LastError, LastErrorMsg);
179   }
180 
181   /// Clear the current error and return a successful one.
success()182   Error success() { return error(instrprof_error::success); }
183 
184 public:
185   /// Return true if the reader has finished reading the profile data.
isEOF()186   bool isEOF() { return LastError == instrprof_error::eof; }
187 
188   /// Return true if the reader encountered an error reading profiling data.
hasError()189   bool hasError() { return LastError != instrprof_error::success && !isEOF(); }
190 
191   /// Get the current error.
getError()192   Error getError() {
193     if (hasError())
194       return make_error<InstrProfError>(LastError, LastErrorMsg);
195     return Error::success();
196   }
197 
198   /// Factory method to create an appropriately typed reader for the given
199   /// instrprof file.
200   static Expected<std::unique_ptr<InstrProfReader>>
201   create(const Twine &Path, vfs::FileSystem &FS,
202          const InstrProfCorrelator *Correlator = nullptr,
203          std::function<void(Error)> Warn = nullptr);
204 
205   static Expected<std::unique_ptr<InstrProfReader>>
206   create(std::unique_ptr<MemoryBuffer> Buffer,
207          const InstrProfCorrelator *Correlator = nullptr,
208          std::function<void(Error)> Warn = nullptr);
209 
210   /// \param Weight for raw profiles use this as the temporal profile trace
211   ///               weight
212   /// \returns a list of temporal profile traces.
213   virtual SmallVector<TemporalProfTraceTy> &
214   getTemporalProfTraces(std::optional<uint64_t> Weight = {}) {
215     // For non-raw profiles we ignore the input weight and instead use the
216     // weights already in the traces.
217     return TemporalProfTraces;
218   }
219   /// \returns the total number of temporal profile traces seen.
getTemporalProfTraceStreamSize()220   uint64_t getTemporalProfTraceStreamSize() {
221     return TemporalProfTraceStreamSize;
222   }
223 };
224 
225 /// Reader for the simple text based instrprof format.
226 ///
227 /// This format is a simple text format that's suitable for test data. Records
228 /// are separated by one or more blank lines, and record fields are separated by
229 /// new lines.
230 ///
231 /// Each record consists of a function name, a function hash, a number of
232 /// counters, and then each counter value, in that order.
233 class TextInstrProfReader : public InstrProfReader {
234 private:
235   /// The profile data file contents.
236   std::unique_ptr<MemoryBuffer> DataBuffer;
237   /// Iterator over the profile data.
238   line_iterator Line;
239   /// The attributes of the current profile.
240   InstrProfKind ProfileKind = InstrProfKind::Unknown;
241 
242   Error readValueProfileData(InstrProfRecord &Record);
243 
244   Error readTemporalProfTraceData();
245 
246 public:
TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)247   TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)
248       : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {}
249   TextInstrProfReader(const TextInstrProfReader &) = delete;
250   TextInstrProfReader &operator=(const TextInstrProfReader &) = delete;
251 
252   /// Return true if the given buffer is in text instrprof format.
253   static bool hasFormat(const MemoryBuffer &Buffer);
254 
255   // Text format does not have version, so return 0.
getVersion()256   uint64_t getVersion() const override { return 0; }
257 
isIRLevelProfile()258   bool isIRLevelProfile() const override {
259     return static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation);
260   }
261 
hasCSIRLevelProfile()262   bool hasCSIRLevelProfile() const override {
263     return static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive);
264   }
265 
instrEntryBBEnabled()266   bool instrEntryBBEnabled() const override {
267     return static_cast<bool>(ProfileKind &
268                              InstrProfKind::FunctionEntryInstrumentation);
269   }
270 
hasSingleByteCoverage()271   bool hasSingleByteCoverage() const override {
272     return static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage);
273   }
274 
functionEntryOnly()275   bool functionEntryOnly() const override {
276     return static_cast<bool>(ProfileKind & InstrProfKind::FunctionEntryOnly);
277   }
278 
hasMemoryProfile()279   bool hasMemoryProfile() const override {
280     // TODO: Add support for text format memory profiles.
281     return false;
282   }
283 
hasTemporalProfile()284   bool hasTemporalProfile() const override {
285     return static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile);
286   }
287 
getProfileKind()288   InstrProfKind getProfileKind() const override { return ProfileKind; }
289 
290   /// Read the header.
291   Error readHeader() override;
292 
293   /// Read a single record.
294   Error readNextRecord(NamedInstrProfRecord &Record) override;
295 
getSymtab()296   InstrProfSymtab &getSymtab() override {
297     assert(Symtab);
298     return *Symtab;
299   }
300 };
301 
302 /// Reader for the raw instrprof binary format from runtime.
303 ///
304 /// This format is a raw memory dump of the instrumentation-based profiling data
305 /// from the runtime.  It has no index.
306 ///
307 /// Templated on the unsigned type whose size matches pointers on the platform
308 /// that wrote the profile.
309 template <class IntPtrT>
310 class RawInstrProfReader : public InstrProfReader {
311 private:
312   /// The profile data file contents.
313   std::unique_ptr<MemoryBuffer> DataBuffer;
314   /// If available, this hold the ProfileData array used to correlate raw
315   /// instrumentation data to their functions.
316   const InstrProfCorrelatorImpl<IntPtrT> *Correlator;
317   /// A list of timestamps paired with a function name reference.
318   std::vector<std::pair<uint64_t, uint64_t>> TemporalProfTimestamps;
319   bool ShouldSwapBytes;
320   // The value of the version field of the raw profile data header. The lower 32
321   // bits specifies the format version and the most significant 32 bits specify
322   // the variant types of the profile.
323   uint64_t Version;
324   uint64_t CountersDelta;
325   uint64_t BitmapDelta;
326   uint64_t NamesDelta;
327   const RawInstrProf::ProfileData<IntPtrT> *Data;
328   const RawInstrProf::ProfileData<IntPtrT> *DataEnd;
329   const RawInstrProf::VTableProfileData<IntPtrT> *VTableBegin = nullptr;
330   const RawInstrProf::VTableProfileData<IntPtrT> *VTableEnd = nullptr;
331   const char *CountersStart;
332   const char *CountersEnd;
333   const char *BitmapStart;
334   const char *BitmapEnd;
335   const char *NamesStart;
336   const char *NamesEnd;
337   const char *VNamesStart = nullptr;
338   const char *VNamesEnd = nullptr;
339   // After value profile is all read, this pointer points to
340   // the header of next profile data (if exists)
341   const uint8_t *ValueDataStart;
342   uint32_t ValueKindLast;
343   uint32_t CurValueDataSize;
344   std::vector<llvm::object::BuildID> BinaryIds;
345 
346   std::function<void(Error)> Warn;
347 
348   /// Maxium counter value 2^56.
349   static const uint64_t MaxCounterValue = (1ULL << 56);
350 
351 public:
RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer,const InstrProfCorrelator * Correlator,std::function<void (Error)> Warn)352   RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer,
353                      const InstrProfCorrelator *Correlator,
354                      std::function<void(Error)> Warn)
355       : DataBuffer(std::move(DataBuffer)),
356         Correlator(dyn_cast_or_null<const InstrProfCorrelatorImpl<IntPtrT>>(
357             Correlator)),
358         Warn(Warn) {}
359   RawInstrProfReader(const RawInstrProfReader &) = delete;
360   RawInstrProfReader &operator=(const RawInstrProfReader &) = delete;
361 
362   static bool hasFormat(const MemoryBuffer &DataBuffer);
363   Error readHeader() override;
364   Error readNextRecord(NamedInstrProfRecord &Record) override;
365   Error readBinaryIds(std::vector<llvm::object::BuildID> &BinaryIds) override;
366   Error printBinaryIds(raw_ostream &OS) override;
367 
getVersion()368   uint64_t getVersion() const override { return Version; }
369 
isIRLevelProfile()370   bool isIRLevelProfile() const override {
371     return (Version & VARIANT_MASK_IR_PROF) != 0;
372   }
373 
hasCSIRLevelProfile()374   bool hasCSIRLevelProfile() const override {
375     return (Version & VARIANT_MASK_CSIR_PROF) != 0;
376   }
377 
instrEntryBBEnabled()378   bool instrEntryBBEnabled() const override {
379     return (Version & VARIANT_MASK_INSTR_ENTRY) != 0;
380   }
381 
hasSingleByteCoverage()382   bool hasSingleByteCoverage() const override {
383     return (Version & VARIANT_MASK_BYTE_COVERAGE) != 0;
384   }
385 
functionEntryOnly()386   bool functionEntryOnly() const override {
387     return (Version & VARIANT_MASK_FUNCTION_ENTRY_ONLY) != 0;
388   }
389 
hasMemoryProfile()390   bool hasMemoryProfile() const override {
391     // Memory profiles have a separate raw format, so this should never be set.
392     assert(!(Version & VARIANT_MASK_MEMPROF));
393     return false;
394   }
395 
hasTemporalProfile()396   bool hasTemporalProfile() const override {
397     return (Version & VARIANT_MASK_TEMPORAL_PROF) != 0;
398   }
399 
400   /// Returns a BitsetEnum describing the attributes of the raw instr profile.
401   InstrProfKind getProfileKind() const override;
402 
getSymtab()403   InstrProfSymtab &getSymtab() override {
404     assert(Symtab.get());
405     return *Symtab.get();
406   }
407 
408   SmallVector<TemporalProfTraceTy> &
409   getTemporalProfTraces(std::optional<uint64_t> Weight = {}) override;
410 
411 private:
412   Error createSymtab(InstrProfSymtab &Symtab);
413   Error readNextHeader(const char *CurrentPos);
414   Error readHeader(const RawInstrProf::Header &Header);
415 
swap(IntT Int)416   template <class IntT> IntT swap(IntT Int) const {
417     return ShouldSwapBytes ? llvm::byteswap(Int) : Int;
418   }
419 
getDataEndianness()420   llvm::endianness getDataEndianness() const {
421     if (!ShouldSwapBytes)
422       return llvm::endianness::native;
423     if (llvm::endianness::native == llvm::endianness::little)
424       return llvm::endianness::big;
425     else
426       return llvm::endianness::little;
427   }
428 
getNumPaddingBytes(uint64_t SizeInBytes)429   inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) {
430     return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t));
431   }
432 
433   Error readName(NamedInstrProfRecord &Record);
434   Error readFuncHash(NamedInstrProfRecord &Record);
435   Error readRawCounts(InstrProfRecord &Record);
436   Error readRawBitmapBytes(InstrProfRecord &Record);
437   Error readValueProfilingData(InstrProfRecord &Record);
atEnd()438   bool atEnd() const { return Data == DataEnd; }
439 
advanceData()440   void advanceData() {
441     // `CountersDelta` is a constant zero when using debug info correlation.
442     if (!Correlator) {
443       // The initial CountersDelta is the in-memory address difference between
444       // the data and counts sections:
445       // start(__llvm_prf_cnts) - start(__llvm_prf_data)
446       // As we advance to the next record, we maintain the correct CountersDelta
447       // with respect to the next record.
448       CountersDelta -= sizeof(*Data);
449       BitmapDelta -= sizeof(*Data);
450     }
451     Data++;
452     ValueDataStart += CurValueDataSize;
453   }
454 
getNextHeaderPos()455   const char *getNextHeaderPos() const {
456       assert(atEnd());
457       return (const char *)ValueDataStart;
458   }
459 
getName(uint64_t NameRef)460   StringRef getName(uint64_t NameRef) const {
461     return Symtab->getFuncOrVarName(swap(NameRef));
462   }
463 
getCounterTypeSize()464   int getCounterTypeSize() const {
465     return hasSingleByteCoverage() ? sizeof(uint8_t) : sizeof(uint64_t);
466   }
467 };
468 
469 using RawInstrProfReader32 = RawInstrProfReader<uint32_t>;
470 using RawInstrProfReader64 = RawInstrProfReader<uint64_t>;
471 
472 namespace IndexedInstrProf {
473 
474 enum class HashT : uint32_t;
475 
476 } // end namespace IndexedInstrProf
477 
478 /// Trait for lookups into the on-disk hash table for the binary instrprof
479 /// format.
480 class InstrProfLookupTrait {
481   std::vector<NamedInstrProfRecord> DataBuffer;
482   IndexedInstrProf::HashT HashType;
483   unsigned FormatVersion;
484   // Endianness of the input value profile data.
485   // It should be LE by default, but can be changed
486   // for testing purpose.
487   llvm::endianness ValueProfDataEndianness = llvm::endianness::little;
488 
489 public:
InstrProfLookupTrait(IndexedInstrProf::HashT HashType,unsigned FormatVersion)490   InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion)
491       : HashType(HashType), FormatVersion(FormatVersion) {}
492 
493   using data_type = ArrayRef<NamedInstrProfRecord>;
494 
495   using internal_key_type = StringRef;
496   using external_key_type = StringRef;
497   using hash_value_type = uint64_t;
498   using offset_type = uint64_t;
499 
EqualKey(StringRef A,StringRef B)500   static bool EqualKey(StringRef A, StringRef B) { return A == B; }
GetInternalKey(StringRef K)501   static StringRef GetInternalKey(StringRef K) { return K; }
GetExternalKey(StringRef K)502   static StringRef GetExternalKey(StringRef K) { return K; }
503 
504   hash_value_type ComputeHash(StringRef K);
505 
506   static std::pair<offset_type, offset_type>
ReadKeyDataLength(const unsigned char * & D)507   ReadKeyDataLength(const unsigned char *&D) {
508     using namespace support;
509 
510     offset_type KeyLen =
511         endian::readNext<offset_type, llvm::endianness::little>(D);
512     offset_type DataLen =
513         endian::readNext<offset_type, llvm::endianness::little>(D);
514     return std::make_pair(KeyLen, DataLen);
515   }
516 
ReadKey(const unsigned char * D,offset_type N)517   StringRef ReadKey(const unsigned char *D, offset_type N) {
518     return StringRef((const char *)D, N);
519   }
520 
521   bool readValueProfilingData(const unsigned char *&D,
522                               const unsigned char *const End);
523   data_type ReadData(StringRef K, const unsigned char *D, offset_type N);
524 
525   // Used for testing purpose only.
setValueProfDataEndianness(llvm::endianness Endianness)526   void setValueProfDataEndianness(llvm::endianness Endianness) {
527     ValueProfDataEndianness = Endianness;
528   }
529 };
530 
531 struct InstrProfReaderIndexBase {
532   virtual ~InstrProfReaderIndexBase() = default;
533 
534   // Read all the profile records with the same key pointed to the current
535   // iterator.
536   virtual Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) = 0;
537 
538   // Read all the profile records with the key equal to FuncName
539   virtual Error getRecords(StringRef FuncName,
540                                      ArrayRef<NamedInstrProfRecord> &Data) = 0;
541   virtual void advanceToNextKey() = 0;
542   virtual bool atEnd() const = 0;
543   virtual void setValueProfDataEndianness(llvm::endianness Endianness) = 0;
544   virtual uint64_t getVersion() const = 0;
545   virtual bool isIRLevelProfile() const = 0;
546   virtual bool hasCSIRLevelProfile() const = 0;
547   virtual bool instrEntryBBEnabled() const = 0;
548   virtual bool hasSingleByteCoverage() const = 0;
549   virtual bool functionEntryOnly() const = 0;
550   virtual bool hasMemoryProfile() const = 0;
551   virtual bool hasTemporalProfile() const = 0;
552   virtual InstrProfKind getProfileKind() const = 0;
553   virtual Error populateSymtab(InstrProfSymtab &) = 0;
554 };
555 
556 using OnDiskHashTableImplV3 =
557     OnDiskIterableChainedHashTable<InstrProfLookupTrait>;
558 
559 using MemProfRecordHashTable =
560     OnDiskIterableChainedHashTable<memprof::RecordLookupTrait>;
561 using MemProfFrameHashTable =
562     OnDiskIterableChainedHashTable<memprof::FrameLookupTrait>;
563 using MemProfCallStackHashTable =
564     OnDiskIterableChainedHashTable<memprof::CallStackLookupTrait>;
565 
566 template <typename HashTableImpl>
567 class InstrProfReaderItaniumRemapper;
568 
569 template <typename HashTableImpl>
570 class InstrProfReaderIndex : public InstrProfReaderIndexBase {
571 private:
572   std::unique_ptr<HashTableImpl> HashTable;
573   typename HashTableImpl::data_iterator RecordIterator;
574   uint64_t FormatVersion;
575 
576   friend class InstrProfReaderItaniumRemapper<HashTableImpl>;
577 
578 public:
579   InstrProfReaderIndex(const unsigned char *Buckets,
580                        const unsigned char *const Payload,
581                        const unsigned char *const Base,
582                        IndexedInstrProf::HashT HashType, uint64_t Version);
583   ~InstrProfReaderIndex() override = default;
584 
585   Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) override;
586   Error getRecords(StringRef FuncName,
587                    ArrayRef<NamedInstrProfRecord> &Data) override;
advanceToNextKey()588   void advanceToNextKey() override { RecordIterator++; }
589 
atEnd()590   bool atEnd() const override {
591     return RecordIterator == HashTable->data_end();
592   }
593 
setValueProfDataEndianness(llvm::endianness Endianness)594   void setValueProfDataEndianness(llvm::endianness Endianness) override {
595     HashTable->getInfoObj().setValueProfDataEndianness(Endianness);
596   }
597 
getVersion()598   uint64_t getVersion() const override { return GET_VERSION(FormatVersion); }
599 
isIRLevelProfile()600   bool isIRLevelProfile() const override {
601     return (FormatVersion & VARIANT_MASK_IR_PROF) != 0;
602   }
603 
hasCSIRLevelProfile()604   bool hasCSIRLevelProfile() const override {
605     return (FormatVersion & VARIANT_MASK_CSIR_PROF) != 0;
606   }
607 
instrEntryBBEnabled()608   bool instrEntryBBEnabled() const override {
609     return (FormatVersion & VARIANT_MASK_INSTR_ENTRY) != 0;
610   }
611 
hasSingleByteCoverage()612   bool hasSingleByteCoverage() const override {
613     return (FormatVersion & VARIANT_MASK_BYTE_COVERAGE) != 0;
614   }
615 
functionEntryOnly()616   bool functionEntryOnly() const override {
617     return (FormatVersion & VARIANT_MASK_FUNCTION_ENTRY_ONLY) != 0;
618   }
619 
hasMemoryProfile()620   bool hasMemoryProfile() const override {
621     return (FormatVersion & VARIANT_MASK_MEMPROF) != 0;
622   }
623 
hasTemporalProfile()624   bool hasTemporalProfile() const override {
625     return (FormatVersion & VARIANT_MASK_TEMPORAL_PROF) != 0;
626   }
627 
628   InstrProfKind getProfileKind() const override;
629 
populateSymtab(InstrProfSymtab & Symtab)630   Error populateSymtab(InstrProfSymtab &Symtab) override {
631     // FIXME: the create method calls 'finalizeSymtab' and sorts a bunch of
632     // arrays/maps. Since there are other data sources other than 'HashTable' to
633     // populate a symtab, it might make sense to have something like this
634     // 1. Let each data source populate Symtab and init the arrays/maps without
635     // calling 'finalizeSymtab'
636     // 2. Call 'finalizeSymtab' once to get all arrays/maps sorted if needed.
637     return Symtab.create(HashTable->keys());
638   }
639 };
640 
641 /// Name matcher supporting fuzzy matching of symbol names to names in profiles.
642 class InstrProfReaderRemapper {
643 public:
644   virtual ~InstrProfReaderRemapper() = default;
populateRemappings()645   virtual Error populateRemappings() { return Error::success(); }
646   virtual Error getRecords(StringRef FuncName,
647                            ArrayRef<NamedInstrProfRecord> &Data) = 0;
648 };
649 
650 class IndexedMemProfReader {
651 private:
652   /// The MemProf version.
653   memprof::IndexedVersion Version = memprof::Version0;
654   /// MemProf profile schema (if available).
655   memprof::MemProfSchema Schema;
656   /// MemProf record profile data on-disk indexed via llvm::md5(FunctionName).
657   std::unique_ptr<MemProfRecordHashTable> MemProfRecordTable;
658   /// MemProf frame profile data on-disk indexed via frame id.
659   std::unique_ptr<MemProfFrameHashTable> MemProfFrameTable;
660   /// MemProf call stack data on-disk indexed via call stack id.
661   std::unique_ptr<MemProfCallStackHashTable> MemProfCallStackTable;
662   /// The starting address of the frame array.
663   const unsigned char *FrameBase = nullptr;
664   /// The starting address of the call stack array.
665   const unsigned char *CallStackBase = nullptr;
666 
667   Error deserializeV012(const unsigned char *Start, const unsigned char *Ptr,
668                         uint64_t FirstWord);
669   Error deserializeV3(const unsigned char *Start, const unsigned char *Ptr);
670 
671 public:
672   IndexedMemProfReader() = default;
673 
674   Error deserialize(const unsigned char *Start, uint64_t MemProfOffset);
675 
676   Expected<memprof::MemProfRecord>
677   getMemProfRecord(const uint64_t FuncNameHash) const;
678 };
679 
680 /// Reader for the indexed binary instrprof format.
681 class IndexedInstrProfReader : public InstrProfReader {
682 private:
683   /// The profile data file contents.
684   std::unique_ptr<MemoryBuffer> DataBuffer;
685   /// The profile remapping file contents.
686   std::unique_ptr<MemoryBuffer> RemappingBuffer;
687   /// The index into the profile data.
688   std::unique_ptr<InstrProfReaderIndexBase> Index;
689   /// The profile remapping file contents.
690   std::unique_ptr<InstrProfReaderRemapper> Remapper;
691   /// Profile summary data.
692   std::unique_ptr<ProfileSummary> Summary;
693   /// Context sensitive profile summary data.
694   std::unique_ptr<ProfileSummary> CS_Summary;
695   IndexedMemProfReader MemProfReader;
696   /// The compressed vtable names, to be used for symtab construction.
697   /// A compiler that reads indexed profiles could construct symtab from module
698   /// IR so it doesn't need the decompressed names.
699   StringRef VTableName;
700   /// A memory buffer holding binary ids.
701   ArrayRef<uint8_t> BinaryIdsBuffer;
702 
703   // Index to the current record in the record array.
704   unsigned RecordIndex = 0;
705 
706   // Read the profile summary. Return a pointer pointing to one byte past the
707   // end of the summary data if it exists or the input \c Cur.
708   // \c UseCS indicates whether to use the context-sensitive profile summary.
709   const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version,
710                                    const unsigned char *Cur, bool UseCS);
711 
712 public:
713   IndexedInstrProfReader(
714       std::unique_ptr<MemoryBuffer> DataBuffer,
715       std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr)
DataBuffer(std::move (DataBuffer))716       : DataBuffer(std::move(DataBuffer)),
717         RemappingBuffer(std::move(RemappingBuffer)) {}
718   IndexedInstrProfReader(const IndexedInstrProfReader &) = delete;
719   IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete;
720 
721   /// Return the profile version.
getVersion()722   uint64_t getVersion() const override { return Index->getVersion(); }
isIRLevelProfile()723   bool isIRLevelProfile() const override { return Index->isIRLevelProfile(); }
hasCSIRLevelProfile()724   bool hasCSIRLevelProfile() const override {
725     return Index->hasCSIRLevelProfile();
726   }
727 
instrEntryBBEnabled()728   bool instrEntryBBEnabled() const override {
729     return Index->instrEntryBBEnabled();
730   }
731 
hasSingleByteCoverage()732   bool hasSingleByteCoverage() const override {
733     return Index->hasSingleByteCoverage();
734   }
735 
functionEntryOnly()736   bool functionEntryOnly() const override { return Index->functionEntryOnly(); }
737 
hasMemoryProfile()738   bool hasMemoryProfile() const override { return Index->hasMemoryProfile(); }
739 
hasTemporalProfile()740   bool hasTemporalProfile() const override {
741     return Index->hasTemporalProfile();
742   }
743 
744   /// Returns a BitsetEnum describing the attributes of the indexed instr
745   /// profile.
getProfileKind()746   InstrProfKind getProfileKind() const override {
747     return Index->getProfileKind();
748   }
749 
750   /// Return true if the given buffer is in an indexed instrprof format.
751   static bool hasFormat(const MemoryBuffer &DataBuffer);
752 
753   /// Read the file header.
754   Error readHeader() override;
755   /// Read a single record.
756   Error readNextRecord(NamedInstrProfRecord &Record) override;
757 
758   /// Return the NamedInstrProfRecord associated with FuncName and FuncHash.
759   /// When return a hash_mismatch error and MismatchedFuncSum is not nullptr,
760   /// the sum of all counters in the mismatched function will be set to
761   /// MismatchedFuncSum. If there are multiple instances of mismatched
762   /// functions, MismatchedFuncSum returns the maximum. If \c FuncName is not
763   /// found, try to lookup \c DeprecatedFuncName to handle profiles built by
764   /// older compilers.
765   Expected<InstrProfRecord>
766   getInstrProfRecord(StringRef FuncName, uint64_t FuncHash,
767                      StringRef DeprecatedFuncName = "",
768                      uint64_t *MismatchedFuncSum = nullptr);
769 
770   /// Return the memprof record for the function identified by
771   /// llvm::md5(Name).
getMemProfRecord(uint64_t FuncNameHash)772   Expected<memprof::MemProfRecord> getMemProfRecord(uint64_t FuncNameHash) {
773     return MemProfReader.getMemProfRecord(FuncNameHash);
774   }
775 
776   /// Fill Counts with the profile data for the given function name.
777   Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
778                           std::vector<uint64_t> &Counts);
779 
780   /// Fill Bitmap with the profile data for the given function name.
781   Error getFunctionBitmap(StringRef FuncName, uint64_t FuncHash,
782                           BitVector &Bitmap);
783 
784   /// Return the maximum of all known function counts.
785   /// \c UseCS indicates whether to use the context-sensitive count.
getMaximumFunctionCount(bool UseCS)786   uint64_t getMaximumFunctionCount(bool UseCS) {
787     if (UseCS) {
788       assert(CS_Summary && "No context sensitive profile summary");
789       return CS_Summary->getMaxFunctionCount();
790     } else {
791       assert(Summary && "No profile summary");
792       return Summary->getMaxFunctionCount();
793     }
794   }
795 
796   /// Factory method to create an indexed reader.
797   static Expected<std::unique_ptr<IndexedInstrProfReader>>
798   create(const Twine &Path, vfs::FileSystem &FS,
799          const Twine &RemappingPath = "");
800 
801   static Expected<std::unique_ptr<IndexedInstrProfReader>>
802   create(std::unique_ptr<MemoryBuffer> Buffer,
803          std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr);
804 
805   // Used for testing purpose only.
setValueProfDataEndianness(llvm::endianness Endianness)806   void setValueProfDataEndianness(llvm::endianness Endianness) {
807     Index->setValueProfDataEndianness(Endianness);
808   }
809 
810   // See description in the base class. This interface is designed
811   // to be used by llvm-profdata (for dumping). Avoid using this when
812   // the client is the compiler.
813   InstrProfSymtab &getSymtab() override;
814 
815   /// Return the profile summary.
816   /// \c UseCS indicates whether to use the context-sensitive summary.
getSummary(bool UseCS)817   ProfileSummary &getSummary(bool UseCS) {
818     if (UseCS) {
819       assert(CS_Summary && "No context sensitive summary");
820       return *CS_Summary;
821     } else {
822       assert(Summary && "No profile summary");
823       return *Summary;
824     }
825   }
826 
827   Error readBinaryIds(std::vector<llvm::object::BuildID> &BinaryIds) override;
828   Error printBinaryIds(raw_ostream &OS) override;
829 };
830 
831 } // end namespace llvm
832 
833 #endif // LLVM_PROFILEDATA_INSTRPROFREADER_H
834