xref: /freebsd/contrib/llvm-project/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- GsymReader.h ---------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H
10 #define LLVM_DEBUGINFO_GSYM_GSYMREADER_H
11 
12 #include "llvm/ADT/ArrayRef.h"
13 #include "llvm/DebugInfo/GSYM/FileEntry.h"
14 #include "llvm/DebugInfo/GSYM/FunctionInfo.h"
15 #include "llvm/DebugInfo/GSYM/Header.h"
16 #include "llvm/DebugInfo/GSYM/LineEntry.h"
17 #include "llvm/DebugInfo/GSYM/StringTable.h"
18 #include "llvm/Support/Compiler.h"
19 #include "llvm/Support/DataExtractor.h"
20 #include "llvm/Support/Endian.h"
21 #include "llvm/Support/ErrorOr.h"
22 #include <inttypes.h>
23 #include <memory>
24 #include <stdint.h>
25 #include <vector>
26 
27 namespace llvm {
28 class MemoryBuffer;
29 class raw_ostream;
30 
31 namespace gsym {
32 
33 /// GsymReader is used to read GSYM data from a file or buffer.
34 ///
35 /// This class is optimized for very quick lookups when the endianness matches
36 /// the host system. The Header, address table, address info offsets, and file
37 /// table is designed to be mmap'ed as read only into memory and used without
38 /// any parsing needed. If the endianness doesn't match, we swap these objects
39 /// and tables into GsymReader::SwappedData and then point our header and
40 /// ArrayRefs to this swapped internal data.
41 ///
42 /// GsymReader objects must use one of the static functions to create an
43 /// instance: GsymReader::openFile(...) and GsymReader::copyBuffer(...).
44 
45 class GsymReader {
46   GsymReader(std::unique_ptr<MemoryBuffer> Buffer);
47   llvm::Error parse();
48 
49   std::unique_ptr<MemoryBuffer> MemBuffer;
50   StringRef GsymBytes;
51   llvm::endianness Endian;
52   const Header *Hdr = nullptr;
53   ArrayRef<uint8_t> AddrOffsets;
54   ArrayRef<uint32_t> AddrInfoOffsets;
55   ArrayRef<FileEntry> Files;
56   StringTable StrTab;
57   /// When the GSYM file's endianness doesn't match the host system then
58   /// we must decode all data structures that need to be swapped into
59   /// local storage and set point the ArrayRef objects above to these swapped
60   /// copies.
61   struct SwappedData {
62     Header Hdr;
63     std::vector<uint8_t> AddrOffsets;
64     std::vector<uint32_t> AddrInfoOffsets;
65     std::vector<FileEntry> Files;
66   };
67   std::unique_ptr<SwappedData> Swap;
68 
69 public:
70   LLVM_ABI GsymReader(GsymReader &&RHS);
71   LLVM_ABI ~GsymReader();
72 
73   /// Construct a GsymReader from a file on disk.
74   ///
75   /// \param Path The file path the GSYM file to read.
76   /// \returns An expected GsymReader that contains the object or an error
77   /// object that indicates reason for failing to read the GSYM.
78   LLVM_ABI static llvm::Expected<GsymReader> openFile(StringRef Path);
79 
80   /// Construct a GsymReader from a buffer.
81   ///
82   /// \param Bytes A set of bytes that will be copied and owned by the
83   /// returned object on success.
84   /// \returns An expected GsymReader that contains the object or an error
85   /// object that indicates reason for failing to read the GSYM.
86   LLVM_ABI static llvm::Expected<GsymReader> copyBuffer(StringRef Bytes);
87 
88   /// Access the GSYM header.
89   /// \returns A native endian version of the GSYM header.
90   LLVM_ABI const Header &getHeader() const;
91 
92   /// Get the full function info for an address.
93   ///
94   /// This should be called when a client will store a copy of the complete
95   /// FunctionInfo for a given address. For one off lookups, use the lookup()
96   /// function below.
97   ///
98   /// Symbolication server processes might want to parse the entire function
99   /// info for a given address and cache it if the process stays around to
100   /// service many symbolication addresses, like for parsing profiling
101   /// information.
102   ///
103   /// \param Addr A virtual address from the orignal object file to lookup.
104   ///
105   /// \returns An expected FunctionInfo that contains the function info object
106   /// or an error object that indicates reason for failing to lookup the
107   /// address.
108   LLVM_ABI llvm::Expected<FunctionInfo> getFunctionInfo(uint64_t Addr) const;
109 
110   /// Get the full function info given an address index.
111   ///
112   /// \param AddrIdx A address index for an address in the address table.
113   ///
114   /// \returns An expected FunctionInfo that contains the function info object
115   /// or an error object that indicates reason for failing get the function
116   /// info object.
117   LLVM_ABI llvm::Expected<FunctionInfo>
118   getFunctionInfoAtIndex(uint64_t AddrIdx) const;
119 
120   /// Lookup an address in the a GSYM.
121   ///
122   /// Lookup just the information needed for a specific address \a Addr. This
123   /// function is faster that calling getFunctionInfo() as it will only return
124   /// information that pertains to \a Addr and allows the parsing to skip any
125   /// extra information encoded for other addresses. For example the line table
126   /// parsing can stop when a matching LineEntry has been fouhnd, and the
127   /// InlineInfo can stop parsing early once a match has been found and also
128   /// skip information that doesn't match. This avoids memory allocations and
129   /// is much faster for lookups.
130   ///
131   /// \param Addr A virtual address from the orignal object file to lookup.
132   ///
133   /// \param MergedFuncsData A pointer to an optional DataExtractor that, if
134   /// non-null, will be set to the raw data of the MergedFunctionInfo, if
135   /// present.
136   ///
137   /// \returns An expected LookupResult that contains only the information
138   /// needed for the current address, or an error object that indicates reason
139   /// for failing to lookup the address.
140   LLVM_ABI llvm::Expected<LookupResult>
141   lookup(uint64_t Addr,
142          std::optional<DataExtractor> *MergedFuncsData = nullptr) const;
143 
144   /// Lookup all merged functions for a given address.
145   ///
146   /// This function performs a lookup for the specified address and then
147   /// retrieves additional LookupResults from any merged functions associated
148   /// with the primary LookupResult.
149   ///
150   /// \param Addr The address to lookup.
151   ///
152   /// \returns A vector of LookupResult objects, where the first element is the
153   /// primary result, followed by results for any merged functions
154   LLVM_ABI llvm::Expected<std::vector<LookupResult>>
155   lookupAll(uint64_t Addr) const;
156 
157   /// Get a string from the string table.
158   ///
159   /// \param Offset The string table offset for the string to retrieve.
160   /// \returns The string from the strin table.
getString(uint32_t Offset)161   StringRef getString(uint32_t Offset) const { return StrTab[Offset]; }
162 
163   /// Get the a file entry for the suppplied file index.
164   ///
165   /// Used to convert any file indexes in the FunctionInfo data back into
166   /// files. This function can be used for iteration, but is more commonly used
167   /// for random access when doing lookups.
168   ///
169   /// \param Index An index into the file table.
170   /// \returns An optional FileInfo that will be valid if the file index is
171   /// valid, or std::nullopt if the file index is out of bounds,
getFile(uint32_t Index)172   std::optional<FileEntry> getFile(uint32_t Index) const {
173     if (Index < Files.size())
174       return Files[Index];
175     return std::nullopt;
176   }
177 
178   /// Dump the entire Gsym data contained in this object.
179   ///
180   /// \param  OS The output stream to dump to.
181   LLVM_ABI void dump(raw_ostream &OS);
182 
183   /// Dump a FunctionInfo object.
184   ///
185   /// This function will convert any string table indexes and file indexes
186   /// into human readable format.
187   ///
188   /// \param  OS The output stream to dump to.
189   ///
190   /// \param FI The object to dump.
191   ///
192   /// \param Indent The indentation as number of spaces. Used when dumping as an
193   /// item within MergedFunctionsInfo.
194   LLVM_ABI void dump(raw_ostream &OS, const FunctionInfo &FI,
195                      uint32_t Indent = 0);
196 
197   /// Dump a MergedFunctionsInfo object.
198   ///
199   /// This function will dump a MergedFunctionsInfo object - basically by
200   /// dumping the contained FunctionInfo objects with indentation.
201   ///
202   /// \param  OS The output stream to dump to.
203   ///
204   /// \param MFI The object to dump.
205   LLVM_ABI void dump(raw_ostream &OS, const MergedFunctionsInfo &MFI);
206 
207   /// Dump a CallSiteInfo object.
208   ///
209   /// This function will output the details of a CallSiteInfo object in a
210   /// human-readable format.
211   ///
212   /// \param OS The output stream to dump to.
213   ///
214   /// \param CSI The CallSiteInfo object to dump.
215   LLVM_ABI void dump(raw_ostream &OS, const CallSiteInfo &CSI);
216 
217   /// Dump a CallSiteInfoCollection object.
218   ///
219   /// This function will iterate over a collection of CallSiteInfo objects and
220   /// dump each one.
221   ///
222   /// \param OS The output stream to dump to.
223   ///
224   /// \param CSIC The CallSiteInfoCollection object to dump.
225   ///
226   /// \param Indent The indentation as number of spaces. Used when dumping as an
227   /// item from within MergedFunctionsInfo.
228   LLVM_ABI void dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC,
229                      uint32_t Indent = 0);
230 
231   /// Dump a LineTable object.
232   ///
233   /// This function will convert any string table indexes and file indexes
234   /// into human readable format.
235   ///
236   ///
237   /// \param  OS The output stream to dump to.
238   ///
239   /// \param LT The object to dump.
240   ///
241   /// \param Indent The indentation as number of spaces. Used when dumping as an
242   /// item from within MergedFunctionsInfo.
243   LLVM_ABI void dump(raw_ostream &OS, const LineTable &LT, uint32_t Indent = 0);
244 
245   /// Dump a InlineInfo object.
246   ///
247   /// This function will convert any string table indexes and file indexes
248   /// into human readable format.
249   ///
250   /// \param  OS The output stream to dump to.
251   ///
252   /// \param II The object to dump.
253   ///
254   /// \param Indent The indentation as number of spaces. Used for recurive
255   /// dumping.
256   LLVM_ABI void dump(raw_ostream &OS, const InlineInfo &II,
257                      uint32_t Indent = 0);
258 
259   /// Dump a FileEntry object.
260   ///
261   /// This function will convert any string table indexes into human readable
262   /// format.
263   ///
264   /// \param  OS The output stream to dump to.
265   ///
266   /// \param FE The object to dump.
267   LLVM_ABI void dump(raw_ostream &OS, std::optional<FileEntry> FE);
268 
269   /// Get the number of addresses in this Gsym file.
getNumAddresses()270   uint32_t getNumAddresses() const {
271     return Hdr->NumAddresses;
272   }
273 
274   /// Gets an address from the address table.
275   ///
276   /// Addresses are stored as offsets frrom the gsym::Header::BaseAddress.
277   ///
278   /// \param Index A index into the address table.
279   /// \returns A resolved virtual address for adddress in the address table
280   /// or std::nullopt if Index is out of bounds.
281   LLVM_ABI std::optional<uint64_t> getAddress(size_t Index) const;
282 
283 protected:
284 
285   /// Get an appropriate address info offsets array.
286   ///
287   /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
288   /// byte offsets from the The gsym::Header::BaseAddress. The table is stored
289   /// internally as a array of bytes that are in the correct endianness. When
290   /// we access this table we must get an array that matches those sizes. This
291   /// templatized helper function is used when accessing address offsets in the
292   /// AddrOffsets member variable.
293   ///
294   /// \returns An ArrayRef of an appropriate address offset size.
295   template <class T> ArrayRef<T>
getAddrOffsets()296   getAddrOffsets() const {
297     return ArrayRef<T>(reinterpret_cast<const T *>(AddrOffsets.data()),
298                        AddrOffsets.size()/sizeof(T));
299   }
300 
301   /// Get an appropriate address from the address table.
302   ///
303   /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
304   /// byte address offsets from the The gsym::Header::BaseAddress. The table is
305   /// stored internally as a array of bytes that are in the correct endianness.
306   /// In order to extract an address from the address table we must access the
307   /// address offset using the correct size and then add it to the BaseAddress
308   /// in the header.
309   ///
310   /// \param Index An index into the AddrOffsets array.
311   /// \returns An virtual address that matches the original object file for the
312   /// address as the specified index, or std::nullopt if Index is out of bounds.
313   template <class T>
addressForIndex(size_t Index)314   std::optional<uint64_t> addressForIndex(size_t Index) const {
315     ArrayRef<T> AIO = getAddrOffsets<T>();
316     if (Index < AIO.size())
317       return AIO[Index] + Hdr->BaseAddress;
318     return std::nullopt;
319   }
320   /// Lookup an address offset in the AddrOffsets table.
321   ///
322   /// Given an address offset, look it up using a binary search of the
323   /// AddrOffsets table.
324   ///
325   /// \param AddrOffset An address offset, that has already been computed by
326   /// subtracting the gsym::Header::BaseAddress.
327   /// \returns The matching address offset index. This index will be used to
328   /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
329   template <class T>
330   std::optional<uint64_t>
getAddressOffsetIndex(const uint64_t AddrOffset)331   getAddressOffsetIndex(const uint64_t AddrOffset) const {
332     ArrayRef<T> AIO = getAddrOffsets<T>();
333     const auto Begin = AIO.begin();
334     const auto End = AIO.end();
335     auto Iter = std::lower_bound(Begin, End, AddrOffset);
336     // Watch for addresses that fall between the gsym::Header::BaseAddress and
337     // the first address offset.
338     if (Iter == Begin && AddrOffset < *Begin)
339       return std::nullopt;
340     if (Iter == End || AddrOffset < *Iter)
341       --Iter;
342 
343     // GSYM files have sorted function infos with the most information (line
344     // table and/or inline info) first in the array of function infos, so
345     // always backup as much as possible as long as the address offset is the
346     // same as the previous entry.
347     while (Iter != Begin) {
348       auto Prev = Iter - 1;
349       if (*Prev == *Iter)
350         Iter = Prev;
351       else
352         break;
353     }
354 
355     return std::distance(Begin, Iter);
356   }
357 
358   /// Create a GSYM from a memory buffer.
359   ///
360   /// Called by both openFile() and copyBuffer(), this function does all of the
361   /// work of parsing the GSYM file and returning an error.
362   ///
363   /// \param MemBuffer A memory buffer that will transfer ownership into the
364   /// GsymReader.
365   /// \returns An expected GsymReader that contains the object or an error
366   /// object that indicates reason for failing to read the GSYM.
367   LLVM_ABI static llvm::Expected<llvm::gsym::GsymReader>
368   create(std::unique_ptr<MemoryBuffer> &MemBuffer);
369 
370   /// Given an address, find the address index.
371   ///
372   /// Binary search the address table and find the matching address index.
373   ///
374   /// \param Addr A virtual address that matches the original object file
375   /// to lookup.
376   /// \returns An index into the address table. This index can be used to
377   /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
378   /// Returns an error if the address isn't in the GSYM with details of why.
379   LLVM_ABI Expected<uint64_t> getAddressIndex(const uint64_t Addr) const;
380 
381   /// Given an address index, get the offset for the FunctionInfo.
382   ///
383   /// Looking up an address is done by finding the corresponding address
384   /// index for the address. This index is then used to get the offset of the
385   /// FunctionInfo data that we will decode using this function.
386   ///
387   /// \param Index An index into the address table.
388   /// \returns An optional GSYM data offset for the offset of the FunctionInfo
389   /// that needs to be decoded.
390   LLVM_ABI std::optional<uint64_t> getAddressInfoOffset(size_t Index) const;
391 
392   /// Given an address, find the correct function info data and function
393   /// address.
394   ///
395   /// Binary search the address table and find the matching address info
396   /// and make sure that the function info contains the address. GSYM allows
397   /// functions to overlap, and the most debug info is contained in the first
398   /// entries due to the sorting when GSYM files are created. We can have
399   /// multiple function info that start at the same address only if their
400   /// address range doesn't match. So find the first entry that matches \a Addr
401   /// and iterate forward until we find one that contains the address.
402   ///
403   /// \param[in] Addr A virtual address that matches the original object file
404   /// to lookup.
405   ///
406   /// \param[out] FuncStartAddr A virtual address that is the base address of
407   /// the function that is used for decoding the FunctionInfo.
408   ///
409   /// \returns An valid data extractor on success, or an error if we fail to
410   /// find the address in a function info or corrrectly decode the data
411   LLVM_ABI llvm::Expected<llvm::DataExtractor>
412   getFunctionInfoDataForAddress(uint64_t Addr, uint64_t &FuncStartAddr) const;
413 
414   /// Get the function data and address given an address index.
415   ///
416   /// \param AddrIdx A address index from the address table.
417   ///
418   /// \returns An expected FunctionInfo that contains the function info object
419   /// or an error object that indicates reason for failing to lookup the
420   /// address.
421   LLVM_ABI llvm::Expected<llvm::DataExtractor>
422   getFunctionInfoDataAtIndex(uint64_t AddrIdx, uint64_t &FuncStartAddr) const;
423 };
424 
425 } // namespace gsym
426 } // namespace llvm
427 
428 #endif // LLVM_DEBUGINFO_GSYM_GSYMREADER_H
429