1 //===- MemProfReader.h - Instrumented memory profiling reader ---*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for reading MemProf profiling data. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_PROFILEDATA_MEMPROFREADER_H_ 14 #define LLVM_PROFILEDATA_MEMPROFREADER_H_ 15 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/ADT/MapVector.h" 18 #include "llvm/ADT/StringRef.h" 19 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" 20 #include "llvm/DebugInfo/Symbolize/Symbolize.h" 21 #include "llvm/IR/GlobalValue.h" 22 #include "llvm/Object/Binary.h" 23 #include "llvm/Object/ObjectFile.h" 24 #include "llvm/ProfileData/IndexedMemProfData.h" 25 #include "llvm/ProfileData/InstrProfReader.h" 26 #include "llvm/ProfileData/MemProfData.inc" 27 #include "llvm/ProfileData/MemProfRadixTree.h" 28 #include "llvm/Support/Compiler.h" 29 #include "llvm/Support/Error.h" 30 #include "llvm/Support/MemoryBuffer.h" 31 32 #include <functional> 33 34 namespace llvm { 35 namespace memprof { 36 // A class for memprof profile data populated directly from external 37 // sources. 38 class MemProfReader { 39 public: 40 // The MemProfReader only holds memory profile information. getProfileKind()41 InstrProfKind getProfileKind() const { return InstrProfKind::MemProf; } 42 43 using GuidMemProfRecordPair = std::pair<GlobalValue::GUID, MemProfRecord>; 44 using Iterator = InstrProfIterator<GuidMemProfRecordPair, MemProfReader>; end()45 Iterator end() { return Iterator(); } begin()46 Iterator begin() { 47 Iter = MemProfData.Records.begin(); 48 return Iterator(this); 49 } 50 51 // Take the complete profile data. Once this function is invoked, 52 // MemProfReader no longer owns the MemProf profile. takeMemProfData()53 IndexedMemProfData takeMemProfData() { return std::move(MemProfData); } 54 55 virtual Error 56 readNextRecord(GuidMemProfRecordPair &GuidRecord, 57 std::function<const Frame(const FrameId)> Callback = nullptr) { 58 if (MemProfData.Records.empty()) 59 return make_error<InstrProfError>(instrprof_error::empty_raw_profile); 60 61 if (Iter == MemProfData.Records.end()) 62 return make_error<InstrProfError>(instrprof_error::eof); 63 64 if (Callback == nullptr) 65 Callback = [&](FrameId Id) { return idToFrame(Id); }; 66 67 CallStackIdConverter<decltype(MemProfData.CallStacks)> CSIdConv( 68 MemProfData.CallStacks, Callback); 69 70 const IndexedMemProfRecord &IndexedRecord = Iter->second; 71 GuidRecord = { 72 Iter->first, 73 IndexedRecord.toMemProfRecord(CSIdConv), 74 }; 75 if (CSIdConv.LastUnmappedId) 76 return make_error<InstrProfError>(instrprof_error::hash_mismatch); 77 Iter++; 78 return Error::success(); 79 } 80 81 // Allow default construction for derived classes which can populate the 82 // contents after construction. 83 MemProfReader() = default; 84 virtual ~MemProfReader() = default; 85 86 // Initialize the MemProfReader with the given MemProf profile. MemProfReader(IndexedMemProfData && MemProfData)87 MemProfReader(IndexedMemProfData &&MemProfData) 88 : MemProfData(std::move(MemProfData)) {} 89 90 protected: 91 // A helper method to extract the frame from the IdToFrame map. idToFrame(const FrameId Id)92 const Frame &idToFrame(const FrameId Id) const { 93 auto It = MemProfData.Frames.find(Id); 94 assert(It != MemProfData.Frames.end() && "Id not found in map."); 95 return It->second; 96 } 97 // A complete pacakge of the MemProf profile. 98 IndexedMemProfData MemProfData; 99 // An iterator to the internal function profile data structure. 100 llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord>::iterator Iter; 101 }; 102 103 // Map from id (recorded from sanitizer stack depot) to virtual addresses for 104 // each program counter address in the callstack. 105 using CallStackMap = llvm::DenseMap<uint64_t, llvm::SmallVector<uint64_t>>; 106 107 // Specializes the MemProfReader class to populate the contents from raw binary 108 // memprof profiles from instrumentation based profiling. 109 class LLVM_ABI RawMemProfReader final : public MemProfReader { 110 public: 111 RawMemProfReader(const RawMemProfReader &) = delete; 112 RawMemProfReader &operator=(const RawMemProfReader &) = delete; 113 virtual ~RawMemProfReader() override; 114 115 // Prints the contents of the profile in YAML format. 116 void printYAML(raw_ostream &OS); 117 118 // Return true if the \p DataBuffer starts with magic bytes indicating it is 119 // a raw binary memprof profile. 120 static bool hasFormat(const MemoryBuffer &DataBuffer); 121 // Return true if the file at \p Path starts with magic bytes indicating it is 122 // a raw binary memprof profile. 123 static bool hasFormat(const StringRef Path); 124 125 // Create a RawMemProfReader after sanity checking the contents of the file at 126 // \p Path or the \p Buffer. The binary from which the profile has been 127 // collected is specified via a path in \p ProfiledBinary. 128 static Expected<std::unique_ptr<RawMemProfReader>> 129 create(const Twine &Path, StringRef ProfiledBinary, bool KeepName = false); 130 static Expected<std::unique_ptr<RawMemProfReader>> 131 create(std::unique_ptr<MemoryBuffer> Buffer, StringRef ProfiledBinary, 132 bool KeepName = false); 133 134 // Returns a list of build ids recorded in the segment information. 135 static std::vector<std::string> peekBuildIds(MemoryBuffer *DataBuffer); 136 137 Error 138 readNextRecord(GuidMemProfRecordPair &GuidRecord, 139 std::function<const Frame(const FrameId)> Callback) override; 140 141 // Constructor for unittests only. 142 RawMemProfReader(std::unique_ptr<llvm::symbolize::SymbolizableModule> Sym, 143 llvm::SmallVectorImpl<SegmentEntry> &Seg, 144 llvm::MapVector<uint64_t, MemInfoBlock> &Prof, 145 CallStackMap &SM, bool KeepName = false) 146 : SegmentInfo(Seg.begin(), Seg.end()), CallstackProfileData(Prof), 147 StackMap(SM), KeepSymbolName(KeepName) { 148 // We don't call initialize here since there is no raw profile to read. The 149 // test should pass in the raw profile as structured data. 150 151 // If there is an error here then the mock symbolizer has not been 152 // initialized properly. 153 if (Error E = symbolizeAndFilterStackFrames(std::move(Sym))) 154 report_fatal_error(std::move(E)); 155 if (Error E = mapRawProfileToRecords()) 156 report_fatal_error(std::move(E)); 157 } 158 159 private: RawMemProfReader(object::OwningBinary<object::Binary> && Bin,bool KeepName)160 RawMemProfReader(object::OwningBinary<object::Binary> &&Bin, bool KeepName) 161 : Binary(std::move(Bin)), KeepSymbolName(KeepName) {} 162 // Initializes the RawMemProfReader with the contents in `DataBuffer`. 163 Error initialize(std::unique_ptr<MemoryBuffer> DataBuffer); 164 // Read and parse the contents of the `DataBuffer` as a binary format profile. 165 Error readRawProfile(std::unique_ptr<MemoryBuffer> DataBuffer); 166 // Initialize the segment mapping information for symbolization. 167 Error setupForSymbolization(); 168 // Symbolize and cache all the virtual addresses we encounter in the 169 // callstacks from the raw profile. Also prune callstack frames which we can't 170 // symbolize or those that belong to the runtime. For profile entries where 171 // the entire callstack is pruned, we drop the entry from the profile. 172 Error symbolizeAndFilterStackFrames( 173 std::unique_ptr<llvm::symbolize::SymbolizableModule> Symbolizer); 174 // Construct memprof records for each function and store it in the 175 // `FunctionProfileData` map. A function may have allocation profile data or 176 // callsite data or both. 177 Error mapRawProfileToRecords(); 178 179 object::SectionedAddress getModuleOffset(uint64_t VirtualAddress); 180 181 llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> 182 readMemInfoBlocks(const char *Ptr); 183 184 // The profiled binary. 185 object::OwningBinary<object::Binary> Binary; 186 // Version of raw memprof binary currently being read. Defaults to most up 187 // to date version. 188 uint64_t MemprofRawVersion = MEMPROF_RAW_VERSION; 189 // The preferred load address of the executable segment. 190 uint64_t PreferredTextSegmentAddress = 0; 191 // The base address of the text segment in the process during profiling. 192 uint64_t ProfiledTextSegmentStart = 0; 193 // The limit address of the text segment in the process during profiling. 194 uint64_t ProfiledTextSegmentEnd = 0; 195 196 // The memory mapped segment information for all executable segments in the 197 // profiled binary (filtered from the raw profile using the build id). 198 llvm::SmallVector<SegmentEntry, 2> SegmentInfo; 199 200 // A map from callstack id (same as key in CallStackMap below) to the heap 201 // information recorded for that allocation context. 202 llvm::MapVector<uint64_t, MemInfoBlock> CallstackProfileData; 203 CallStackMap StackMap; 204 205 // Cached symbolization from PC to Frame. 206 llvm::DenseMap<uint64_t, llvm::SmallVector<FrameId>> SymbolizedFrame; 207 208 // Whether to keep the symbol name for each frame after hashing. 209 bool KeepSymbolName = false; 210 // A mapping of the hash to symbol name, only used if KeepSymbolName is true. 211 llvm::DenseMap<uint64_t, std::string> GuidToSymbolName; 212 }; 213 214 class YAMLMemProfReader final : public MemProfReader { 215 public: 216 YAMLMemProfReader() = default; 217 218 // Return true if the \p DataBuffer starts with "---" indicating it is a YAML 219 // file. 220 LLVM_ABI static bool hasFormat(const MemoryBuffer &DataBuffer); 221 // Wrapper around hasFormat above, reading the file instead of the memory 222 // buffer. 223 LLVM_ABI static bool hasFormat(const StringRef Path); 224 225 // Create a YAMLMemProfReader after sanity checking the contents of the file 226 // at \p Path or the \p Buffer. 227 LLVM_ABI static Expected<std::unique_ptr<YAMLMemProfReader>> 228 create(const Twine &Path); 229 LLVM_ABI static Expected<std::unique_ptr<YAMLMemProfReader>> 230 create(std::unique_ptr<MemoryBuffer> Buffer); 231 232 LLVM_ABI void parse(StringRef YAMLData); 233 takeDataAccessProfData()234 std::unique_ptr<memprof::DataAccessProfData> takeDataAccessProfData() { 235 return std::move(DataAccessProfileData); 236 } 237 238 private: 239 // Called by `parse` to set data access profiles after parsing them from Yaml 240 // files. 241 void setDataAccessProfileData(std::unique_ptr<memprof::DataAccessProfData> Data)242 setDataAccessProfileData(std::unique_ptr<memprof::DataAccessProfData> Data) { 243 DataAccessProfileData = std::move(Data); 244 } 245 246 std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData; 247 }; 248 } // namespace memprof 249 } // namespace llvm 250 251 #endif // LLVM_PROFILEDATA_MEMPROFREADER_H_ 252