xref: /freebsd/contrib/llvm-project/llvm/include/llvm/ProfileData/MemProfReader.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===- MemProfReader.h - Instrumented memory profiling reader ---*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for reading MemProf profiling data.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_PROFILEDATA_MEMPROFREADER_H_
14 #define LLVM_PROFILEDATA_MEMPROFREADER_H_
15 
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/MapVector.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
20 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
21 #include "llvm/IR/GlobalValue.h"
22 #include "llvm/Object/Binary.h"
23 #include "llvm/Object/ObjectFile.h"
24 #include "llvm/ProfileData/InstrProfReader.h"
25 #include "llvm/ProfileData/MemProf.h"
26 #include "llvm/ProfileData/MemProfData.inc"
27 #include "llvm/Support/Error.h"
28 #include "llvm/Support/MemoryBuffer.h"
29 
30 #include <functional>
31 
32 namespace llvm {
33 namespace memprof {
34 // A class for memprof profile data populated directly from external
35 // sources.
36 class MemProfReader {
37 public:
38   // The MemProfReader only holds memory profile information.
getProfileKind()39   InstrProfKind getProfileKind() const { return InstrProfKind::MemProf; }
40 
41   using GuidMemProfRecordPair = std::pair<GlobalValue::GUID, MemProfRecord>;
42   using Iterator = InstrProfIterator<GuidMemProfRecordPair, MemProfReader>;
end()43   Iterator end() { return Iterator(); }
begin()44   Iterator begin() {
45     Iter = FunctionProfileData.begin();
46     return Iterator(this);
47   }
48 
49   // Return a const reference to the internal Id to Frame mappings.
getFrameMapping()50   const llvm::DenseMap<FrameId, Frame> &getFrameMapping() const {
51     return IdToFrame;
52   }
53 
54   // Return a const reference to the internal Id to call stacks.
55   const llvm::DenseMap<CallStackId, llvm::SmallVector<FrameId>> &
getCallStacks()56   getCallStacks() const {
57     return CSIdToCallStack;
58   }
59 
60   // Return a const reference to the internal function profile data.
61   const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> &
getProfileData()62   getProfileData() const {
63     return FunctionProfileData;
64   }
65 
66   virtual Error
67   readNextRecord(GuidMemProfRecordPair &GuidRecord,
68                  std::function<const Frame(const FrameId)> Callback = nullptr) {
69     if (FunctionProfileData.empty())
70       return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
71 
72     if (Iter == FunctionProfileData.end())
73       return make_error<InstrProfError>(instrprof_error::eof);
74 
75     if (Callback == nullptr)
76       Callback =
77           std::bind(&MemProfReader::idToFrame, this, std::placeholders::_1);
78 
79     CallStackIdConverter<decltype(CSIdToCallStack)> CSIdConv(CSIdToCallStack,
80                                                              Callback);
81 
82     const IndexedMemProfRecord &IndexedRecord = Iter->second;
83     GuidRecord = {
84         Iter->first,
85         IndexedRecord.toMemProfRecord(CSIdConv),
86     };
87     if (CSIdConv.LastUnmappedId)
88       return make_error<InstrProfError>(instrprof_error::hash_mismatch);
89     Iter++;
90     return Error::success();
91   }
92 
93   // Allow default construction for derived classes which can populate the
94   // contents after construction.
95   MemProfReader() = default;
96   virtual ~MemProfReader() = default;
97 
98   // Initialize the MemProfReader with the frame mappings and profile contents.
99   MemProfReader(
100       llvm::DenseMap<FrameId, Frame> FrameIdMap,
101       llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> ProfData);
102 
103   // Initialize the MemProfReader with the frame mappings, call stack mappings,
104   // and profile contents.
MemProfReader(llvm::DenseMap<FrameId,Frame> FrameIdMap,llvm::DenseMap<CallStackId,llvm::SmallVector<FrameId>> CSIdMap,llvm::MapVector<GlobalValue::GUID,IndexedMemProfRecord> ProfData)105   MemProfReader(
106       llvm::DenseMap<FrameId, Frame> FrameIdMap,
107       llvm::DenseMap<CallStackId, llvm::SmallVector<FrameId>> CSIdMap,
108       llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> ProfData)
109       : IdToFrame(std::move(FrameIdMap)), CSIdToCallStack(std::move(CSIdMap)),
110         FunctionProfileData(std::move(ProfData)) {}
111 
112 protected:
113   // A helper method to extract the frame from the IdToFrame map.
idToFrame(const FrameId Id)114   const Frame &idToFrame(const FrameId Id) const {
115     auto It = IdToFrame.find(Id);
116     assert(It != IdToFrame.end() && "Id not found in map.");
117     return It->getSecond();
118   }
119   // A mapping from FrameId (a hash of the contents) to the frame.
120   llvm::DenseMap<FrameId, Frame> IdToFrame;
121   // A mapping from CallStackId to the call stack.
122   llvm::DenseMap<CallStackId, llvm::SmallVector<FrameId>> CSIdToCallStack;
123   // A mapping from function GUID, hash of the canonical function symbol to the
124   // memprof profile data for that function, i.e allocation and callsite info.
125   llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> FunctionProfileData;
126   // An iterator to the internal function profile data structure.
127   llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord>::iterator Iter;
128 };
129 
130 // Map from id (recorded from sanitizer stack depot) to virtual addresses for
131 // each program counter address in the callstack.
132 using CallStackMap = llvm::DenseMap<uint64_t, llvm::SmallVector<uint64_t>>;
133 
134 // Specializes the MemProfReader class to populate the contents from raw binary
135 // memprof profiles from instrumentation based profiling.
136 class RawMemProfReader final : public MemProfReader {
137 public:
138   RawMemProfReader(const RawMemProfReader &) = delete;
139   RawMemProfReader &operator=(const RawMemProfReader &) = delete;
140   virtual ~RawMemProfReader() override;
141 
142   // Prints the contents of the profile in YAML format.
143   void printYAML(raw_ostream &OS);
144 
145   // Return true if the \p DataBuffer starts with magic bytes indicating it is
146   // a raw binary memprof profile.
147   static bool hasFormat(const MemoryBuffer &DataBuffer);
148   // Return true if the file at \p Path starts with magic bytes indicating it is
149   // a raw binary memprof profile.
150   static bool hasFormat(const StringRef Path);
151 
152   // Create a RawMemProfReader after sanity checking the contents of the file at
153   // \p Path or the \p Buffer. The binary from which the profile has been
154   // collected is specified via a path in \p ProfiledBinary.
155   static Expected<std::unique_ptr<RawMemProfReader>>
156   create(const Twine &Path, StringRef ProfiledBinary, bool KeepName = false);
157   static Expected<std::unique_ptr<RawMemProfReader>>
158   create(std::unique_ptr<MemoryBuffer> Buffer, StringRef ProfiledBinary,
159          bool KeepName = false);
160 
161   // Returns a list of build ids recorded in the segment information.
162   static std::vector<std::string> peekBuildIds(MemoryBuffer *DataBuffer);
163 
164   Error
165   readNextRecord(GuidMemProfRecordPair &GuidRecord,
166                  std::function<const Frame(const FrameId)> Callback) override;
167 
168   // Constructor for unittests only.
169   RawMemProfReader(std::unique_ptr<llvm::symbolize::SymbolizableModule> Sym,
170                    llvm::SmallVectorImpl<SegmentEntry> &Seg,
171                    llvm::MapVector<uint64_t, MemInfoBlock> &Prof,
172                    CallStackMap &SM, bool KeepName = false)
173       : SegmentInfo(Seg.begin(), Seg.end()), CallstackProfileData(Prof),
174         StackMap(SM), KeepSymbolName(KeepName) {
175     // We don't call initialize here since there is no raw profile to read. The
176     // test should pass in the raw profile as structured data.
177 
178     // If there is an error here then the mock symbolizer has not been
179     // initialized properly.
180     if (Error E = symbolizeAndFilterStackFrames(std::move(Sym)))
181       report_fatal_error(std::move(E));
182     if (Error E = mapRawProfileToRecords())
183       report_fatal_error(std::move(E));
184   }
185 
186 private:
RawMemProfReader(object::OwningBinary<object::Binary> && Bin,bool KeepName)187   RawMemProfReader(object::OwningBinary<object::Binary> &&Bin, bool KeepName)
188       : Binary(std::move(Bin)), KeepSymbolName(KeepName) {}
189   // Initializes the RawMemProfReader with the contents in `DataBuffer`.
190   Error initialize(std::unique_ptr<MemoryBuffer> DataBuffer);
191   // Read and parse the contents of the `DataBuffer` as a binary format profile.
192   Error readRawProfile(std::unique_ptr<MemoryBuffer> DataBuffer);
193   // Initialize the segment mapping information for symbolization.
194   Error setupForSymbolization();
195   // Symbolize and cache all the virtual addresses we encounter in the
196   // callstacks from the raw profile. Also prune callstack frames which we can't
197   // symbolize or those that belong to the runtime. For profile entries where
198   // the entire callstack is pruned, we drop the entry from the profile.
199   Error symbolizeAndFilterStackFrames(
200       std::unique_ptr<llvm::symbolize::SymbolizableModule> Symbolizer);
201   // Construct memprof records for each function and store it in the
202   // `FunctionProfileData` map. A function may have allocation profile data or
203   // callsite data or both.
204   Error mapRawProfileToRecords();
205 
206   object::SectionedAddress getModuleOffset(uint64_t VirtualAddress);
207 
208   llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>>
209   readMemInfoBlocks(const char *Ptr);
210 
211   // The profiled binary.
212   object::OwningBinary<object::Binary> Binary;
213   // Version of raw memprof binary currently being read. Defaults to most up
214   // to date version.
215   uint64_t MemprofRawVersion = MEMPROF_RAW_VERSION;
216   // The preferred load address of the executable segment.
217   uint64_t PreferredTextSegmentAddress = 0;
218   // The base address of the text segment in the process during profiling.
219   uint64_t ProfiledTextSegmentStart = 0;
220   // The limit address of the text segment in the process during profiling.
221   uint64_t ProfiledTextSegmentEnd = 0;
222 
223   // The memory mapped segment information for all executable segments in the
224   // profiled binary (filtered from the raw profile using the build id).
225   llvm::SmallVector<SegmentEntry, 2> SegmentInfo;
226 
227   // A map from callstack id (same as key in CallStackMap below) to the heap
228   // information recorded for that allocation context.
229   llvm::MapVector<uint64_t, MemInfoBlock> CallstackProfileData;
230   CallStackMap StackMap;
231 
232   // Cached symbolization from PC to Frame.
233   llvm::DenseMap<uint64_t, llvm::SmallVector<FrameId>> SymbolizedFrame;
234 
235   // Whether to keep the symbol name for each frame after hashing.
236   bool KeepSymbolName = false;
237   // A mapping of the hash to symbol name, only used if KeepSymbolName is true.
238   llvm::DenseMap<uint64_t, std::string> GuidToSymbolName;
239 };
240 } // namespace memprof
241 } // namespace llvm
242 
243 #endif // LLVM_PROFILEDATA_MEMPROFREADER_H_
244