xref: /freebsd/contrib/llvm-project/llvm/include/llvm/ProfileData/MemProfReader.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- MemProfReader.h - Instrumented memory profiling reader ---*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for reading MemProf profiling data.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_PROFILEDATA_MEMPROFREADER_H_
14 #define LLVM_PROFILEDATA_MEMPROFREADER_H_
15 
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/MapVector.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
20 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
21 #include "llvm/IR/GlobalValue.h"
22 #include "llvm/Object/Binary.h"
23 #include "llvm/Object/ObjectFile.h"
24 #include "llvm/ProfileData/IndexedMemProfData.h"
25 #include "llvm/ProfileData/InstrProfReader.h"
26 #include "llvm/ProfileData/MemProfData.inc"
27 #include "llvm/ProfileData/MemProfRadixTree.h"
28 #include "llvm/Support/Compiler.h"
29 #include "llvm/Support/Error.h"
30 #include "llvm/Support/MemoryBuffer.h"
31 
32 #include <functional>
33 
34 namespace llvm {
35 namespace memprof {
36 // A class for memprof profile data populated directly from external
37 // sources.
38 class MemProfReader {
39 public:
40   // The MemProfReader only holds memory profile information.
getProfileKind()41   InstrProfKind getProfileKind() const { return InstrProfKind::MemProf; }
42 
43   using GuidMemProfRecordPair = std::pair<GlobalValue::GUID, MemProfRecord>;
44   using Iterator = InstrProfIterator<GuidMemProfRecordPair, MemProfReader>;
end()45   Iterator end() { return Iterator(); }
begin()46   Iterator begin() {
47     Iter = MemProfData.Records.begin();
48     return Iterator(this);
49   }
50 
51   // Take the complete profile data.  Once this function is invoked,
52   // MemProfReader no longer owns the MemProf profile.
takeMemProfData()53   IndexedMemProfData takeMemProfData() { return std::move(MemProfData); }
54 
55   virtual Error
56   readNextRecord(GuidMemProfRecordPair &GuidRecord,
57                  std::function<const Frame(const FrameId)> Callback = nullptr) {
58     if (MemProfData.Records.empty())
59       return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
60 
61     if (Iter == MemProfData.Records.end())
62       return make_error<InstrProfError>(instrprof_error::eof);
63 
64     if (Callback == nullptr)
65       Callback = [&](FrameId Id) { return idToFrame(Id); };
66 
67     CallStackIdConverter<decltype(MemProfData.CallStacks)> CSIdConv(
68         MemProfData.CallStacks, Callback);
69 
70     const IndexedMemProfRecord &IndexedRecord = Iter->second;
71     GuidRecord = {
72         Iter->first,
73         IndexedRecord.toMemProfRecord(CSIdConv),
74     };
75     if (CSIdConv.LastUnmappedId)
76       return make_error<InstrProfError>(instrprof_error::hash_mismatch);
77     Iter++;
78     return Error::success();
79   }
80 
81   // Allow default construction for derived classes which can populate the
82   // contents after construction.
83   MemProfReader() = default;
84   virtual ~MemProfReader() = default;
85 
86   // Initialize the MemProfReader with the given MemProf profile.
MemProfReader(IndexedMemProfData && MemProfData)87   MemProfReader(IndexedMemProfData &&MemProfData)
88       : MemProfData(std::move(MemProfData)) {}
89 
90 protected:
91   // A helper method to extract the frame from the IdToFrame map.
idToFrame(const FrameId Id)92   const Frame &idToFrame(const FrameId Id) const {
93     auto It = MemProfData.Frames.find(Id);
94     assert(It != MemProfData.Frames.end() && "Id not found in map.");
95     return It->second;
96   }
97   // A complete pacakge of the MemProf profile.
98   IndexedMemProfData MemProfData;
99   // An iterator to the internal function profile data structure.
100   llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord>::iterator Iter;
101 };
102 
103 // Map from id (recorded from sanitizer stack depot) to virtual addresses for
104 // each program counter address in the callstack.
105 using CallStackMap = llvm::DenseMap<uint64_t, llvm::SmallVector<uint64_t>>;
106 
107 // Specializes the MemProfReader class to populate the contents from raw binary
108 // memprof profiles from instrumentation based profiling.
109 class LLVM_ABI RawMemProfReader final : public MemProfReader {
110 public:
111   RawMemProfReader(const RawMemProfReader &) = delete;
112   RawMemProfReader &operator=(const RawMemProfReader &) = delete;
113   virtual ~RawMemProfReader() override;
114 
115   // Prints the contents of the profile in YAML format.
116   void printYAML(raw_ostream &OS);
117 
118   // Return true if the \p DataBuffer starts with magic bytes indicating it is
119   // a raw binary memprof profile.
120   static bool hasFormat(const MemoryBuffer &DataBuffer);
121   // Return true if the file at \p Path starts with magic bytes indicating it is
122   // a raw binary memprof profile.
123   static bool hasFormat(const StringRef Path);
124 
125   // Create a RawMemProfReader after sanity checking the contents of the file at
126   // \p Path or the \p Buffer. The binary from which the profile has been
127   // collected is specified via a path in \p ProfiledBinary.
128   static Expected<std::unique_ptr<RawMemProfReader>>
129   create(const Twine &Path, StringRef ProfiledBinary, bool KeepName = false);
130   static Expected<std::unique_ptr<RawMemProfReader>>
131   create(std::unique_ptr<MemoryBuffer> Buffer, StringRef ProfiledBinary,
132          bool KeepName = false);
133 
134   // Returns a list of build ids recorded in the segment information.
135   static std::vector<std::string> peekBuildIds(MemoryBuffer *DataBuffer);
136 
137   Error
138   readNextRecord(GuidMemProfRecordPair &GuidRecord,
139                  std::function<const Frame(const FrameId)> Callback) override;
140 
141   // Constructor for unittests only.
142   RawMemProfReader(std::unique_ptr<llvm::symbolize::SymbolizableModule> Sym,
143                    llvm::SmallVectorImpl<SegmentEntry> &Seg,
144                    llvm::MapVector<uint64_t, MemInfoBlock> &Prof,
145                    CallStackMap &SM, bool KeepName = false)
146       : SegmentInfo(Seg.begin(), Seg.end()), CallstackProfileData(Prof),
147         StackMap(SM), KeepSymbolName(KeepName) {
148     // We don't call initialize here since there is no raw profile to read. The
149     // test should pass in the raw profile as structured data.
150 
151     // If there is an error here then the mock symbolizer has not been
152     // initialized properly.
153     if (Error E = symbolizeAndFilterStackFrames(std::move(Sym)))
154       report_fatal_error(std::move(E));
155     if (Error E = mapRawProfileToRecords())
156       report_fatal_error(std::move(E));
157   }
158 
159 private:
RawMemProfReader(object::OwningBinary<object::Binary> && Bin,bool KeepName)160   RawMemProfReader(object::OwningBinary<object::Binary> &&Bin, bool KeepName)
161       : Binary(std::move(Bin)), KeepSymbolName(KeepName) {}
162   // Initializes the RawMemProfReader with the contents in `DataBuffer`.
163   Error initialize(std::unique_ptr<MemoryBuffer> DataBuffer);
164   // Read and parse the contents of the `DataBuffer` as a binary format profile.
165   Error readRawProfile(std::unique_ptr<MemoryBuffer> DataBuffer);
166   // Initialize the segment mapping information for symbolization.
167   Error setupForSymbolization();
168   // Symbolize and cache all the virtual addresses we encounter in the
169   // callstacks from the raw profile. Also prune callstack frames which we can't
170   // symbolize or those that belong to the runtime. For profile entries where
171   // the entire callstack is pruned, we drop the entry from the profile.
172   Error symbolizeAndFilterStackFrames(
173       std::unique_ptr<llvm::symbolize::SymbolizableModule> Symbolizer);
174   // Construct memprof records for each function and store it in the
175   // `FunctionProfileData` map. A function may have allocation profile data or
176   // callsite data or both.
177   Error mapRawProfileToRecords();
178 
179   object::SectionedAddress getModuleOffset(uint64_t VirtualAddress);
180 
181   llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>>
182   readMemInfoBlocks(const char *Ptr);
183 
184   // The profiled binary.
185   object::OwningBinary<object::Binary> Binary;
186   // Version of raw memprof binary currently being read. Defaults to most up
187   // to date version.
188   uint64_t MemprofRawVersion = MEMPROF_RAW_VERSION;
189   // The preferred load address of the executable segment.
190   uint64_t PreferredTextSegmentAddress = 0;
191   // The base address of the text segment in the process during profiling.
192   uint64_t ProfiledTextSegmentStart = 0;
193   // The limit address of the text segment in the process during profiling.
194   uint64_t ProfiledTextSegmentEnd = 0;
195 
196   // The memory mapped segment information for all executable segments in the
197   // profiled binary (filtered from the raw profile using the build id).
198   llvm::SmallVector<SegmentEntry, 2> SegmentInfo;
199 
200   // A map from callstack id (same as key in CallStackMap below) to the heap
201   // information recorded for that allocation context.
202   llvm::MapVector<uint64_t, MemInfoBlock> CallstackProfileData;
203   CallStackMap StackMap;
204 
205   // Cached symbolization from PC to Frame.
206   llvm::DenseMap<uint64_t, llvm::SmallVector<FrameId>> SymbolizedFrame;
207 
208   // Whether to keep the symbol name for each frame after hashing.
209   bool KeepSymbolName = false;
210   // A mapping of the hash to symbol name, only used if KeepSymbolName is true.
211   llvm::DenseMap<uint64_t, std::string> GuidToSymbolName;
212 };
213 
214 class YAMLMemProfReader final : public MemProfReader {
215 public:
216   YAMLMemProfReader() = default;
217 
218   // Return true if the \p DataBuffer starts with "---" indicating it is a YAML
219   // file.
220   LLVM_ABI static bool hasFormat(const MemoryBuffer &DataBuffer);
221   // Wrapper around hasFormat above, reading the file instead of the memory
222   // buffer.
223   LLVM_ABI static bool hasFormat(const StringRef Path);
224 
225   // Create a YAMLMemProfReader after sanity checking the contents of the file
226   // at \p Path or the \p Buffer.
227   LLVM_ABI static Expected<std::unique_ptr<YAMLMemProfReader>>
228   create(const Twine &Path);
229   LLVM_ABI static Expected<std::unique_ptr<YAMLMemProfReader>>
230   create(std::unique_ptr<MemoryBuffer> Buffer);
231 
232   LLVM_ABI void parse(StringRef YAMLData);
233 
takeDataAccessProfData()234   std::unique_ptr<memprof::DataAccessProfData> takeDataAccessProfData() {
235     return std::move(DataAccessProfileData);
236   }
237 
238 private:
239   // Called by `parse` to set data access profiles after parsing them from Yaml
240   // files.
241   void
setDataAccessProfileData(std::unique_ptr<memprof::DataAccessProfData> Data)242   setDataAccessProfileData(std::unique_ptr<memprof::DataAccessProfData> Data) {
243     DataAccessProfileData = std::move(Data);
244   }
245 
246   std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData;
247 };
248 } // namespace memprof
249 } // namespace llvm
250 
251 #endif // LLVM_PROFILEDATA_MEMPROFREADER_H_
252