1 //===- MemProfReader.h - Instrumented memory profiling reader ---*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for reading MemProf profiling data. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_PROFILEDATA_MEMPROFREADER_H_ 14 #define LLVM_PROFILEDATA_MEMPROFREADER_H_ 15 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/ADT/MapVector.h" 18 #include "llvm/ADT/StringRef.h" 19 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" 20 #include "llvm/DebugInfo/Symbolize/Symbolize.h" 21 #include "llvm/IR/GlobalValue.h" 22 #include "llvm/Object/Binary.h" 23 #include "llvm/Object/ObjectFile.h" 24 #include "llvm/ProfileData/InstrProfReader.h" 25 #include "llvm/ProfileData/MemProf.h" 26 #include "llvm/ProfileData/MemProfData.inc" 27 #include "llvm/Support/Error.h" 28 #include "llvm/Support/MemoryBuffer.h" 29 30 #include <functional> 31 32 namespace llvm { 33 namespace memprof { 34 // A class for memprof profile data populated directly from external 35 // sources. 36 class MemProfReader { 37 public: 38 // The MemProfReader only holds memory profile information. getProfileKind()39 InstrProfKind getProfileKind() const { return InstrProfKind::MemProf; } 40 41 using GuidMemProfRecordPair = std::pair<GlobalValue::GUID, MemProfRecord>; 42 using Iterator = InstrProfIterator<GuidMemProfRecordPair, MemProfReader>; end()43 Iterator end() { return Iterator(); } begin()44 Iterator begin() { 45 Iter = FunctionProfileData.begin(); 46 return Iterator(this); 47 } 48 49 // Return a const reference to the internal Id to Frame mappings. getFrameMapping()50 const llvm::DenseMap<FrameId, Frame> &getFrameMapping() const { 51 return IdToFrame; 52 } 53 54 // Return a const reference to the internal Id to call stacks. 55 const llvm::DenseMap<CallStackId, llvm::SmallVector<FrameId>> & getCallStacks()56 getCallStacks() const { 57 return CSIdToCallStack; 58 } 59 60 // Return a const reference to the internal function profile data. 61 const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> & getProfileData()62 getProfileData() const { 63 return FunctionProfileData; 64 } 65 66 virtual Error 67 readNextRecord(GuidMemProfRecordPair &GuidRecord, 68 std::function<const Frame(const FrameId)> Callback = nullptr) { 69 if (FunctionProfileData.empty()) 70 return make_error<InstrProfError>(instrprof_error::empty_raw_profile); 71 72 if (Iter == FunctionProfileData.end()) 73 return make_error<InstrProfError>(instrprof_error::eof); 74 75 if (Callback == nullptr) 76 Callback = 77 std::bind(&MemProfReader::idToFrame, this, std::placeholders::_1); 78 79 CallStackIdConverter<decltype(CSIdToCallStack)> CSIdConv(CSIdToCallStack, 80 Callback); 81 82 const IndexedMemProfRecord &IndexedRecord = Iter->second; 83 GuidRecord = { 84 Iter->first, 85 IndexedRecord.toMemProfRecord(CSIdConv), 86 }; 87 if (CSIdConv.LastUnmappedId) 88 return make_error<InstrProfError>(instrprof_error::hash_mismatch); 89 Iter++; 90 return Error::success(); 91 } 92 93 // Allow default construction for derived classes which can populate the 94 // contents after construction. 95 MemProfReader() = default; 96 virtual ~MemProfReader() = default; 97 98 // Initialize the MemProfReader with the frame mappings and profile contents. 99 MemProfReader( 100 llvm::DenseMap<FrameId, Frame> FrameIdMap, 101 llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> ProfData); 102 103 // Initialize the MemProfReader with the frame mappings, call stack mappings, 104 // and profile contents. MemProfReader(llvm::DenseMap<FrameId,Frame> FrameIdMap,llvm::DenseMap<CallStackId,llvm::SmallVector<FrameId>> CSIdMap,llvm::MapVector<GlobalValue::GUID,IndexedMemProfRecord> ProfData)105 MemProfReader( 106 llvm::DenseMap<FrameId, Frame> FrameIdMap, 107 llvm::DenseMap<CallStackId, llvm::SmallVector<FrameId>> CSIdMap, 108 llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> ProfData) 109 : IdToFrame(std::move(FrameIdMap)), CSIdToCallStack(std::move(CSIdMap)), 110 FunctionProfileData(std::move(ProfData)) {} 111 112 protected: 113 // A helper method to extract the frame from the IdToFrame map. idToFrame(const FrameId Id)114 const Frame &idToFrame(const FrameId Id) const { 115 auto It = IdToFrame.find(Id); 116 assert(It != IdToFrame.end() && "Id not found in map."); 117 return It->getSecond(); 118 } 119 // A mapping from FrameId (a hash of the contents) to the frame. 120 llvm::DenseMap<FrameId, Frame> IdToFrame; 121 // A mapping from CallStackId to the call stack. 122 llvm::DenseMap<CallStackId, llvm::SmallVector<FrameId>> CSIdToCallStack; 123 // A mapping from function GUID, hash of the canonical function symbol to the 124 // memprof profile data for that function, i.e allocation and callsite info. 125 llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> FunctionProfileData; 126 // An iterator to the internal function profile data structure. 127 llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord>::iterator Iter; 128 }; 129 130 // Map from id (recorded from sanitizer stack depot) to virtual addresses for 131 // each program counter address in the callstack. 132 using CallStackMap = llvm::DenseMap<uint64_t, llvm::SmallVector<uint64_t>>; 133 134 // Specializes the MemProfReader class to populate the contents from raw binary 135 // memprof profiles from instrumentation based profiling. 136 class RawMemProfReader final : public MemProfReader { 137 public: 138 RawMemProfReader(const RawMemProfReader &) = delete; 139 RawMemProfReader &operator=(const RawMemProfReader &) = delete; 140 virtual ~RawMemProfReader() override; 141 142 // Prints the contents of the profile in YAML format. 143 void printYAML(raw_ostream &OS); 144 145 // Return true if the \p DataBuffer starts with magic bytes indicating it is 146 // a raw binary memprof profile. 147 static bool hasFormat(const MemoryBuffer &DataBuffer); 148 // Return true if the file at \p Path starts with magic bytes indicating it is 149 // a raw binary memprof profile. 150 static bool hasFormat(const StringRef Path); 151 152 // Create a RawMemProfReader after sanity checking the contents of the file at 153 // \p Path or the \p Buffer. The binary from which the profile has been 154 // collected is specified via a path in \p ProfiledBinary. 155 static Expected<std::unique_ptr<RawMemProfReader>> 156 create(const Twine &Path, StringRef ProfiledBinary, bool KeepName = false); 157 static Expected<std::unique_ptr<RawMemProfReader>> 158 create(std::unique_ptr<MemoryBuffer> Buffer, StringRef ProfiledBinary, 159 bool KeepName = false); 160 161 // Returns a list of build ids recorded in the segment information. 162 static std::vector<std::string> peekBuildIds(MemoryBuffer *DataBuffer); 163 164 Error 165 readNextRecord(GuidMemProfRecordPair &GuidRecord, 166 std::function<const Frame(const FrameId)> Callback) override; 167 168 // Constructor for unittests only. 169 RawMemProfReader(std::unique_ptr<llvm::symbolize::SymbolizableModule> Sym, 170 llvm::SmallVectorImpl<SegmentEntry> &Seg, 171 llvm::MapVector<uint64_t, MemInfoBlock> &Prof, 172 CallStackMap &SM, bool KeepName = false) 173 : SegmentInfo(Seg.begin(), Seg.end()), CallstackProfileData(Prof), 174 StackMap(SM), KeepSymbolName(KeepName) { 175 // We don't call initialize here since there is no raw profile to read. The 176 // test should pass in the raw profile as structured data. 177 178 // If there is an error here then the mock symbolizer has not been 179 // initialized properly. 180 if (Error E = symbolizeAndFilterStackFrames(std::move(Sym))) 181 report_fatal_error(std::move(E)); 182 if (Error E = mapRawProfileToRecords()) 183 report_fatal_error(std::move(E)); 184 } 185 186 private: RawMemProfReader(object::OwningBinary<object::Binary> && Bin,bool KeepName)187 RawMemProfReader(object::OwningBinary<object::Binary> &&Bin, bool KeepName) 188 : Binary(std::move(Bin)), KeepSymbolName(KeepName) {} 189 // Initializes the RawMemProfReader with the contents in `DataBuffer`. 190 Error initialize(std::unique_ptr<MemoryBuffer> DataBuffer); 191 // Read and parse the contents of the `DataBuffer` as a binary format profile. 192 Error readRawProfile(std::unique_ptr<MemoryBuffer> DataBuffer); 193 // Initialize the segment mapping information for symbolization. 194 Error setupForSymbolization(); 195 // Symbolize and cache all the virtual addresses we encounter in the 196 // callstacks from the raw profile. Also prune callstack frames which we can't 197 // symbolize or those that belong to the runtime. For profile entries where 198 // the entire callstack is pruned, we drop the entry from the profile. 199 Error symbolizeAndFilterStackFrames( 200 std::unique_ptr<llvm::symbolize::SymbolizableModule> Symbolizer); 201 // Construct memprof records for each function and store it in the 202 // `FunctionProfileData` map. A function may have allocation profile data or 203 // callsite data or both. 204 Error mapRawProfileToRecords(); 205 206 object::SectionedAddress getModuleOffset(uint64_t VirtualAddress); 207 208 llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> 209 readMemInfoBlocks(const char *Ptr); 210 211 // The profiled binary. 212 object::OwningBinary<object::Binary> Binary; 213 // Version of raw memprof binary currently being read. Defaults to most up 214 // to date version. 215 uint64_t MemprofRawVersion = MEMPROF_RAW_VERSION; 216 // The preferred load address of the executable segment. 217 uint64_t PreferredTextSegmentAddress = 0; 218 // The base address of the text segment in the process during profiling. 219 uint64_t ProfiledTextSegmentStart = 0; 220 // The limit address of the text segment in the process during profiling. 221 uint64_t ProfiledTextSegmentEnd = 0; 222 223 // The memory mapped segment information for all executable segments in the 224 // profiled binary (filtered from the raw profile using the build id). 225 llvm::SmallVector<SegmentEntry, 2> SegmentInfo; 226 227 // A map from callstack id (same as key in CallStackMap below) to the heap 228 // information recorded for that allocation context. 229 llvm::MapVector<uint64_t, MemInfoBlock> CallstackProfileData; 230 CallStackMap StackMap; 231 232 // Cached symbolization from PC to Frame. 233 llvm::DenseMap<uint64_t, llvm::SmallVector<FrameId>> SymbolizedFrame; 234 235 // Whether to keep the symbol name for each frame after hashing. 236 bool KeepSymbolName = false; 237 // A mapping of the hash to symbol name, only used if KeepSymbolName is true. 238 llvm::DenseMap<uint64_t, std::string> GuidToSymbolName; 239 }; 240 } // namespace memprof 241 } // namespace llvm 242 243 #endif // LLVM_PROFILEDATA_MEMPROFREADER_H_ 244