1 //===- GsymReader.h ---------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H 10 #define LLVM_DEBUGINFO_GSYM_GSYMREADER_H 11 12 #include "llvm/ADT/ArrayRef.h" 13 #include "llvm/DebugInfo/GSYM/FileEntry.h" 14 #include "llvm/DebugInfo/GSYM/FunctionInfo.h" 15 #include "llvm/DebugInfo/GSYM/Header.h" 16 #include "llvm/DebugInfo/GSYM/LineEntry.h" 17 #include "llvm/DebugInfo/GSYM/StringTable.h" 18 #include "llvm/Support/Compiler.h" 19 #include "llvm/Support/DataExtractor.h" 20 #include "llvm/Support/Endian.h" 21 #include "llvm/Support/ErrorOr.h" 22 #include <inttypes.h> 23 #include <memory> 24 #include <stdint.h> 25 #include <vector> 26 27 namespace llvm { 28 class MemoryBuffer; 29 class raw_ostream; 30 31 namespace gsym { 32 33 /// GsymReader is used to read GSYM data from a file or buffer. 34 /// 35 /// This class is optimized for very quick lookups when the endianness matches 36 /// the host system. The Header, address table, address info offsets, and file 37 /// table is designed to be mmap'ed as read only into memory and used without 38 /// any parsing needed. If the endianness doesn't match, we swap these objects 39 /// and tables into GsymReader::SwappedData and then point our header and 40 /// ArrayRefs to this swapped internal data. 41 /// 42 /// GsymReader objects must use one of the static functions to create an 43 /// instance: GsymReader::openFile(...) and GsymReader::copyBuffer(...). 44 45 class GsymReader { 46 GsymReader(std::unique_ptr<MemoryBuffer> Buffer); 47 llvm::Error parse(); 48 49 std::unique_ptr<MemoryBuffer> MemBuffer; 50 StringRef GsymBytes; 51 llvm::endianness Endian; 52 const Header *Hdr = nullptr; 53 ArrayRef<uint8_t> AddrOffsets; 54 ArrayRef<uint32_t> AddrInfoOffsets; 55 ArrayRef<FileEntry> Files; 56 StringTable StrTab; 57 /// When the GSYM file's endianness doesn't match the host system then 58 /// we must decode all data structures that need to be swapped into 59 /// local storage and set point the ArrayRef objects above to these swapped 60 /// copies. 61 struct SwappedData { 62 Header Hdr; 63 std::vector<uint8_t> AddrOffsets; 64 std::vector<uint32_t> AddrInfoOffsets; 65 std::vector<FileEntry> Files; 66 }; 67 std::unique_ptr<SwappedData> Swap; 68 69 public: 70 LLVM_ABI GsymReader(GsymReader &&RHS); 71 LLVM_ABI ~GsymReader(); 72 73 /// Construct a GsymReader from a file on disk. 74 /// 75 /// \param Path The file path the GSYM file to read. 76 /// \returns An expected GsymReader that contains the object or an error 77 /// object that indicates reason for failing to read the GSYM. 78 LLVM_ABI static llvm::Expected<GsymReader> openFile(StringRef Path); 79 80 /// Construct a GsymReader from a buffer. 81 /// 82 /// \param Bytes A set of bytes that will be copied and owned by the 83 /// returned object on success. 84 /// \returns An expected GsymReader that contains the object or an error 85 /// object that indicates reason for failing to read the GSYM. 86 LLVM_ABI static llvm::Expected<GsymReader> copyBuffer(StringRef Bytes); 87 88 /// Access the GSYM header. 89 /// \returns A native endian version of the GSYM header. 90 LLVM_ABI const Header &getHeader() const; 91 92 /// Get the full function info for an address. 93 /// 94 /// This should be called when a client will store a copy of the complete 95 /// FunctionInfo for a given address. For one off lookups, use the lookup() 96 /// function below. 97 /// 98 /// Symbolication server processes might want to parse the entire function 99 /// info for a given address and cache it if the process stays around to 100 /// service many symbolication addresses, like for parsing profiling 101 /// information. 102 /// 103 /// \param Addr A virtual address from the orignal object file to lookup. 104 /// 105 /// \returns An expected FunctionInfo that contains the function info object 106 /// or an error object that indicates reason for failing to lookup the 107 /// address. 108 LLVM_ABI llvm::Expected<FunctionInfo> getFunctionInfo(uint64_t Addr) const; 109 110 /// Get the full function info given an address index. 111 /// 112 /// \param AddrIdx A address index for an address in the address table. 113 /// 114 /// \returns An expected FunctionInfo that contains the function info object 115 /// or an error object that indicates reason for failing get the function 116 /// info object. 117 LLVM_ABI llvm::Expected<FunctionInfo> 118 getFunctionInfoAtIndex(uint64_t AddrIdx) const; 119 120 /// Lookup an address in the a GSYM. 121 /// 122 /// Lookup just the information needed for a specific address \a Addr. This 123 /// function is faster that calling getFunctionInfo() as it will only return 124 /// information that pertains to \a Addr and allows the parsing to skip any 125 /// extra information encoded for other addresses. For example the line table 126 /// parsing can stop when a matching LineEntry has been fouhnd, and the 127 /// InlineInfo can stop parsing early once a match has been found and also 128 /// skip information that doesn't match. This avoids memory allocations and 129 /// is much faster for lookups. 130 /// 131 /// \param Addr A virtual address from the orignal object file to lookup. 132 /// 133 /// \param MergedFuncsData A pointer to an optional DataExtractor that, if 134 /// non-null, will be set to the raw data of the MergedFunctionInfo, if 135 /// present. 136 /// 137 /// \returns An expected LookupResult that contains only the information 138 /// needed for the current address, or an error object that indicates reason 139 /// for failing to lookup the address. 140 LLVM_ABI llvm::Expected<LookupResult> 141 lookup(uint64_t Addr, 142 std::optional<DataExtractor> *MergedFuncsData = nullptr) const; 143 144 /// Lookup all merged functions for a given address. 145 /// 146 /// This function performs a lookup for the specified address and then 147 /// retrieves additional LookupResults from any merged functions associated 148 /// with the primary LookupResult. 149 /// 150 /// \param Addr The address to lookup. 151 /// 152 /// \returns A vector of LookupResult objects, where the first element is the 153 /// primary result, followed by results for any merged functions 154 LLVM_ABI llvm::Expected<std::vector<LookupResult>> 155 lookupAll(uint64_t Addr) const; 156 157 /// Get a string from the string table. 158 /// 159 /// \param Offset The string table offset for the string to retrieve. 160 /// \returns The string from the strin table. getString(uint32_t Offset)161 StringRef getString(uint32_t Offset) const { return StrTab[Offset]; } 162 163 /// Get the a file entry for the suppplied file index. 164 /// 165 /// Used to convert any file indexes in the FunctionInfo data back into 166 /// files. This function can be used for iteration, but is more commonly used 167 /// for random access when doing lookups. 168 /// 169 /// \param Index An index into the file table. 170 /// \returns An optional FileInfo that will be valid if the file index is 171 /// valid, or std::nullopt if the file index is out of bounds, getFile(uint32_t Index)172 std::optional<FileEntry> getFile(uint32_t Index) const { 173 if (Index < Files.size()) 174 return Files[Index]; 175 return std::nullopt; 176 } 177 178 /// Dump the entire Gsym data contained in this object. 179 /// 180 /// \param OS The output stream to dump to. 181 LLVM_ABI void dump(raw_ostream &OS); 182 183 /// Dump a FunctionInfo object. 184 /// 185 /// This function will convert any string table indexes and file indexes 186 /// into human readable format. 187 /// 188 /// \param OS The output stream to dump to. 189 /// 190 /// \param FI The object to dump. 191 /// 192 /// \param Indent The indentation as number of spaces. Used when dumping as an 193 /// item within MergedFunctionsInfo. 194 LLVM_ABI void dump(raw_ostream &OS, const FunctionInfo &FI, 195 uint32_t Indent = 0); 196 197 /// Dump a MergedFunctionsInfo object. 198 /// 199 /// This function will dump a MergedFunctionsInfo object - basically by 200 /// dumping the contained FunctionInfo objects with indentation. 201 /// 202 /// \param OS The output stream to dump to. 203 /// 204 /// \param MFI The object to dump. 205 LLVM_ABI void dump(raw_ostream &OS, const MergedFunctionsInfo &MFI); 206 207 /// Dump a CallSiteInfo object. 208 /// 209 /// This function will output the details of a CallSiteInfo object in a 210 /// human-readable format. 211 /// 212 /// \param OS The output stream to dump to. 213 /// 214 /// \param CSI The CallSiteInfo object to dump. 215 LLVM_ABI void dump(raw_ostream &OS, const CallSiteInfo &CSI); 216 217 /// Dump a CallSiteInfoCollection object. 218 /// 219 /// This function will iterate over a collection of CallSiteInfo objects and 220 /// dump each one. 221 /// 222 /// \param OS The output stream to dump to. 223 /// 224 /// \param CSIC The CallSiteInfoCollection object to dump. 225 /// 226 /// \param Indent The indentation as number of spaces. Used when dumping as an 227 /// item from within MergedFunctionsInfo. 228 LLVM_ABI void dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC, 229 uint32_t Indent = 0); 230 231 /// Dump a LineTable object. 232 /// 233 /// This function will convert any string table indexes and file indexes 234 /// into human readable format. 235 /// 236 /// 237 /// \param OS The output stream to dump to. 238 /// 239 /// \param LT The object to dump. 240 /// 241 /// \param Indent The indentation as number of spaces. Used when dumping as an 242 /// item from within MergedFunctionsInfo. 243 LLVM_ABI void dump(raw_ostream &OS, const LineTable <, uint32_t Indent = 0); 244 245 /// Dump a InlineInfo object. 246 /// 247 /// This function will convert any string table indexes and file indexes 248 /// into human readable format. 249 /// 250 /// \param OS The output stream to dump to. 251 /// 252 /// \param II The object to dump. 253 /// 254 /// \param Indent The indentation as number of spaces. Used for recurive 255 /// dumping. 256 LLVM_ABI void dump(raw_ostream &OS, const InlineInfo &II, 257 uint32_t Indent = 0); 258 259 /// Dump a FileEntry object. 260 /// 261 /// This function will convert any string table indexes into human readable 262 /// format. 263 /// 264 /// \param OS The output stream to dump to. 265 /// 266 /// \param FE The object to dump. 267 LLVM_ABI void dump(raw_ostream &OS, std::optional<FileEntry> FE); 268 269 /// Get the number of addresses in this Gsym file. getNumAddresses()270 uint32_t getNumAddresses() const { 271 return Hdr->NumAddresses; 272 } 273 274 /// Gets an address from the address table. 275 /// 276 /// Addresses are stored as offsets frrom the gsym::Header::BaseAddress. 277 /// 278 /// \param Index A index into the address table. 279 /// \returns A resolved virtual address for adddress in the address table 280 /// or std::nullopt if Index is out of bounds. 281 LLVM_ABI std::optional<uint64_t> getAddress(size_t Index) const; 282 283 protected: 284 285 /// Get an appropriate address info offsets array. 286 /// 287 /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8 288 /// byte offsets from the The gsym::Header::BaseAddress. The table is stored 289 /// internally as a array of bytes that are in the correct endianness. When 290 /// we access this table we must get an array that matches those sizes. This 291 /// templatized helper function is used when accessing address offsets in the 292 /// AddrOffsets member variable. 293 /// 294 /// \returns An ArrayRef of an appropriate address offset size. 295 template <class T> ArrayRef<T> getAddrOffsets()296 getAddrOffsets() const { 297 return ArrayRef<T>(reinterpret_cast<const T *>(AddrOffsets.data()), 298 AddrOffsets.size()/sizeof(T)); 299 } 300 301 /// Get an appropriate address from the address table. 302 /// 303 /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8 304 /// byte address offsets from the The gsym::Header::BaseAddress. The table is 305 /// stored internally as a array of bytes that are in the correct endianness. 306 /// In order to extract an address from the address table we must access the 307 /// address offset using the correct size and then add it to the BaseAddress 308 /// in the header. 309 /// 310 /// \param Index An index into the AddrOffsets array. 311 /// \returns An virtual address that matches the original object file for the 312 /// address as the specified index, or std::nullopt if Index is out of bounds. 313 template <class T> addressForIndex(size_t Index)314 std::optional<uint64_t> addressForIndex(size_t Index) const { 315 ArrayRef<T> AIO = getAddrOffsets<T>(); 316 if (Index < AIO.size()) 317 return AIO[Index] + Hdr->BaseAddress; 318 return std::nullopt; 319 } 320 /// Lookup an address offset in the AddrOffsets table. 321 /// 322 /// Given an address offset, look it up using a binary search of the 323 /// AddrOffsets table. 324 /// 325 /// \param AddrOffset An address offset, that has already been computed by 326 /// subtracting the gsym::Header::BaseAddress. 327 /// \returns The matching address offset index. This index will be used to 328 /// extract the FunctionInfo data's offset from the AddrInfoOffsets array. 329 template <class T> 330 std::optional<uint64_t> getAddressOffsetIndex(const uint64_t AddrOffset)331 getAddressOffsetIndex(const uint64_t AddrOffset) const { 332 ArrayRef<T> AIO = getAddrOffsets<T>(); 333 const auto Begin = AIO.begin(); 334 const auto End = AIO.end(); 335 auto Iter = std::lower_bound(Begin, End, AddrOffset); 336 // Watch for addresses that fall between the gsym::Header::BaseAddress and 337 // the first address offset. 338 if (Iter == Begin && AddrOffset < *Begin) 339 return std::nullopt; 340 if (Iter == End || AddrOffset < *Iter) 341 --Iter; 342 343 // GSYM files have sorted function infos with the most information (line 344 // table and/or inline info) first in the array of function infos, so 345 // always backup as much as possible as long as the address offset is the 346 // same as the previous entry. 347 while (Iter != Begin) { 348 auto Prev = Iter - 1; 349 if (*Prev == *Iter) 350 Iter = Prev; 351 else 352 break; 353 } 354 355 return std::distance(Begin, Iter); 356 } 357 358 /// Create a GSYM from a memory buffer. 359 /// 360 /// Called by both openFile() and copyBuffer(), this function does all of the 361 /// work of parsing the GSYM file and returning an error. 362 /// 363 /// \param MemBuffer A memory buffer that will transfer ownership into the 364 /// GsymReader. 365 /// \returns An expected GsymReader that contains the object or an error 366 /// object that indicates reason for failing to read the GSYM. 367 LLVM_ABI static llvm::Expected<llvm::gsym::GsymReader> 368 create(std::unique_ptr<MemoryBuffer> &MemBuffer); 369 370 /// Given an address, find the address index. 371 /// 372 /// Binary search the address table and find the matching address index. 373 /// 374 /// \param Addr A virtual address that matches the original object file 375 /// to lookup. 376 /// \returns An index into the address table. This index can be used to 377 /// extract the FunctionInfo data's offset from the AddrInfoOffsets array. 378 /// Returns an error if the address isn't in the GSYM with details of why. 379 LLVM_ABI Expected<uint64_t> getAddressIndex(const uint64_t Addr) const; 380 381 /// Given an address index, get the offset for the FunctionInfo. 382 /// 383 /// Looking up an address is done by finding the corresponding address 384 /// index for the address. This index is then used to get the offset of the 385 /// FunctionInfo data that we will decode using this function. 386 /// 387 /// \param Index An index into the address table. 388 /// \returns An optional GSYM data offset for the offset of the FunctionInfo 389 /// that needs to be decoded. 390 LLVM_ABI std::optional<uint64_t> getAddressInfoOffset(size_t Index) const; 391 392 /// Given an address, find the correct function info data and function 393 /// address. 394 /// 395 /// Binary search the address table and find the matching address info 396 /// and make sure that the function info contains the address. GSYM allows 397 /// functions to overlap, and the most debug info is contained in the first 398 /// entries due to the sorting when GSYM files are created. We can have 399 /// multiple function info that start at the same address only if their 400 /// address range doesn't match. So find the first entry that matches \a Addr 401 /// and iterate forward until we find one that contains the address. 402 /// 403 /// \param[in] Addr A virtual address that matches the original object file 404 /// to lookup. 405 /// 406 /// \param[out] FuncStartAddr A virtual address that is the base address of 407 /// the function that is used for decoding the FunctionInfo. 408 /// 409 /// \returns An valid data extractor on success, or an error if we fail to 410 /// find the address in a function info or corrrectly decode the data 411 LLVM_ABI llvm::Expected<llvm::DataExtractor> 412 getFunctionInfoDataForAddress(uint64_t Addr, uint64_t &FuncStartAddr) const; 413 414 /// Get the function data and address given an address index. 415 /// 416 /// \param AddrIdx A address index from the address table. 417 /// 418 /// \returns An expected FunctionInfo that contains the function info object 419 /// or an error object that indicates reason for failing to lookup the 420 /// address. 421 LLVM_ABI llvm::Expected<llvm::DataExtractor> 422 getFunctionInfoDataAtIndex(uint64_t AddrIdx, uint64_t &FuncStartAddr) const; 423 }; 424 425 } // namespace gsym 426 } // namespace llvm 427 428 #endif // LLVM_DEBUGINFO_GSYM_GSYMREADER_H 429