xref: /freebsd/contrib/llvm-project/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- FunctionInfo.h -------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H
10 #define LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H
11 
12 #include "llvm/ADT/SmallString.h"
13 #include "llvm/DebugInfo/GSYM/CallSiteInfo.h"
14 #include "llvm/DebugInfo/GSYM/ExtractRanges.h"
15 #include "llvm/DebugInfo/GSYM/InlineInfo.h"
16 #include "llvm/DebugInfo/GSYM/LineTable.h"
17 #include "llvm/DebugInfo/GSYM/LookupResult.h"
18 #include "llvm/DebugInfo/GSYM/MergedFunctionsInfo.h"
19 #include "llvm/DebugInfo/GSYM/StringTable.h"
20 #include "llvm/Support/Compiler.h"
21 #include <cstdint>
22 
23 namespace llvm {
24 class raw_ostream;
25 
26 namespace gsym {
27 
28 class GsymReader;
29 /// Function information in GSYM files encodes information for one contiguous
30 /// address range. If a function has discontiguous address ranges, they will
31 /// need to be encoded using multiple FunctionInfo objects.
32 ///
33 /// ENCODING
34 ///
35 /// The function information gets the function start address as an argument
36 /// to the FunctionInfo::decode(...) function. This information is calculated
37 /// from the GSYM header and an address offset from the GSYM address offsets
38 /// table. The encoded FunctionInfo information must be aligned to a 4 byte
39 /// boundary.
40 ///
41 /// The encoded data for a FunctionInfo starts with fixed data that all
42 /// function info objects have:
43 ///
44 /// ENCODING  NAME        DESCRIPTION
45 /// ========= =========== ====================================================
46 /// uint32_t  Size        The size in bytes of this function.
47 /// uint32_t  Name        The string table offset of the function name.
48 ///
49 /// The optional data in a FunctionInfo object follows this fixed information
50 /// and consists of a stream of tuples that consist of:
51 ///
52 /// ENCODING  NAME        DESCRIPTION
53 /// ========= =========== ====================================================
54 /// uint32_t  InfoType    An "InfoType" enumeration that describes the type
55 ///                       of optional data that is encoded.
56 /// uint32_t  InfoLength  The size in bytes of the encoded data that
57 ///                       immediately follows this length if this value is
58 ///                       greater than zero.
59 /// uint8_t[] InfoData    Encoded bytes that represent the data for the
60 ///                       "InfoType". These bytes are only present if
61 ///                       "InfoLength" is greater than zero.
62 ///
63 /// The "InfoType" is an enumeration:
64 ///
65 ///   enum InfoType {
66 ///     EndOfList = 0u,
67 ///     LineTableInfo = 1u,
68 ///     InlineInfo = 2u,
69 ///     MergedFunctionsInfo = 3u,
70 ///     CallSiteInfo = 4u
71 ///   };
72 ///
73 /// This stream of tuples is terminated by a "InfoType" whose value is
74 /// InfoType::EndOfList and a zero for "InfoLength". This signifies the end of
75 /// the optional information list. This format allows us to add new optional
76 /// information data to a FunctionInfo object over time and allows older
77 /// clients to still parse the format and skip over any data that they don't
78 /// understand or want to parse.
79 ///
80 /// So the function information encoding essentially looks like:
81 ///
82 /// struct {
83 ///   uint32_t Size;
84 ///   uint32_t Name;
85 ///   struct {
86 ///     uint32_t InfoType;
87 ///     uint32_t InfoLength;
88 ///     uint8_t InfoData[InfoLength];
89 ///   }[N];
90 /// }
91 ///
92 /// Where "N" is the number of tuples.
93 struct FunctionInfo {
94   AddressRange Range;
95   uint32_t Name; ///< String table offset in the string table.
96   std::optional<LineTable> OptLineTable;
97   std::optional<InlineInfo> Inline;
98   std::optional<MergedFunctionsInfo> MergedFunctions;
99   std::optional<CallSiteInfoCollection> CallSites;
100   /// If we encode a FunctionInfo during segmenting so we know its size, we can
101   /// cache that encoding here so we don't need to re-encode it when saving the
102   /// GSYM file.
103   SmallString<32> EncodingCache;
104 
105   FunctionInfo(uint64_t Addr = 0, uint64_t Size = 0, uint32_t N = 0)
106       : Range(Addr, Addr + Size), Name(N) {}
107 
108   /// Query if a FunctionInfo has rich debug info.
109   ///
110   /// \returns A bool that indicates if this object has something else than
111   /// range and name. When converting information from a symbol table and from
112   /// debug info, we might end up with multiple FunctionInfo objects for the
113   /// same range and we need to be able to tell which one is the better object
114   /// to use.
hasRichInfoFunctionInfo115   bool hasRichInfo() const { return OptLineTable || Inline || CallSites; }
116 
117   /// Query if a FunctionInfo object is valid.
118   ///
119   /// Address and size can be zero and there can be no line entries for a
120   /// symbol so the only indication this entry is valid is if the name is
121   /// not zero. This can happen when extracting information from symbol
122   /// tables that do not encode symbol sizes. In that case only the
123   /// address and name will be filled in.
124   ///
125   /// \returns A boolean indicating if this FunctionInfo is valid.
isValidFunctionInfo126   bool isValid() const {
127     return Name != 0;
128   }
129 
130   /// Decode an object from a binary data stream.
131   ///
132   /// \param Data The binary stream to read the data from. This object must
133   /// have the data for the object starting at offset zero. The data
134   /// can contain more data than needed.
135   ///
136   /// \param BaseAddr The FunctionInfo's start address and will be used as the
137   /// base address when decoding any contained information like the line table
138   /// and the inline info.
139   ///
140   /// \returns An FunctionInfo or an error describing the issue that was
141   /// encountered during decoding.
142   LLVM_ABI static llvm::Expected<FunctionInfo> decode(DataExtractor &Data,
143                                                       uint64_t BaseAddr);
144 
145   /// Encode this object into FileWriter stream.
146   ///
147   /// \param O The binary stream to write the data to at the current file
148   /// position.
149   ///
150   /// \param NoPadding Directly write the FunctionInfo data, without any padding
151   /// By default, FunctionInfo will be 4-byte aligned by padding with
152   /// 0's at the start. This is OK since the function will return the offset of
153   /// actual data in the stream. However when writing FunctionInfo's as a
154   /// stream, the padding will break the decoding of the data - since the offset
155   /// where the FunctionInfo starts is not kept in this scenario.
156   ///
157   /// \returns An error object that indicates failure or the offset of the
158   /// function info that was successfully written into the stream.
159   LLVM_ABI llvm::Expected<uint64_t> encode(FileWriter &O,
160                                            bool NoPadding = false) const;
161 
162   /// Encode this function info into the internal byte cache and return the size
163   /// in bytes.
164   ///
165   /// When segmenting GSYM files we need to know how big each FunctionInfo will
166   /// encode into so we can generate segments of the right size. We don't want
167   /// to have to encode a FunctionInfo twice, so we can cache the encoded bytes
168   /// and re-use then when calling FunctionInfo::encode(...).
169   ///
170   /// \returns The size in bytes of the FunctionInfo if it were to be encoded
171   /// into a byte stream.
172   LLVM_ABI uint64_t cacheEncoding();
173 
174   /// Lookup an address within a FunctionInfo object's data stream.
175   ///
176   /// Instead of decoding an entire FunctionInfo object when doing lookups,
177   /// we can decode only the information we need from the FunctionInfo's data
178   /// for the specific address. The lookup result information is returned as
179   /// a LookupResult.
180   ///
181   /// \param Data The binary stream to read the data from. This object must
182   /// have the data for the object starting at offset zero. The data
183   /// can contain more data than needed.
184   ///
185   /// \param GR The GSYM reader that contains the string and file table that
186   /// will be used to fill in information in the returned result.
187   ///
188   /// \param FuncAddr The function start address decoded from the GsymReader.
189   ///
190   /// \param Addr The address to lookup.
191   ///
192   /// \param MergedFuncsData A pointer to an optional DataExtractor that, if
193   /// non-null, will be set to the raw data of the MergedFunctionInfo, if
194   /// present.
195   ///
196   /// \returns An LookupResult or an error describing the issue that was
197   /// encountered during decoding. An error should only be returned if the
198   /// address is not contained in the FunctionInfo or if the data is corrupted.
199   LLVM_ABI static llvm::Expected<LookupResult>
200   lookup(DataExtractor &Data, const GsymReader &GR, uint64_t FuncAddr,
201          uint64_t Addr,
202          std::optional<DataExtractor> *MergedFuncsData = nullptr);
203 
startAddressFunctionInfo204   uint64_t startAddress() const { return Range.start(); }
endAddressFunctionInfo205   uint64_t endAddress() const { return Range.end(); }
sizeFunctionInfo206   uint64_t size() const { return Range.size(); }
207 
clearFunctionInfo208   void clear() {
209     Range = {0, 0};
210     Name = 0;
211     OptLineTable = std::nullopt;
212     Inline = std::nullopt;
213   }
214 };
215 
216 inline bool operator==(const FunctionInfo &LHS, const FunctionInfo &RHS) {
217   return LHS.Range == RHS.Range && LHS.Name == RHS.Name &&
218          LHS.OptLineTable == RHS.OptLineTable && LHS.Inline == RHS.Inline;
219 }
220 inline bool operator!=(const FunctionInfo &LHS, const FunctionInfo &RHS) {
221   return !(LHS == RHS);
222 }
223 /// This sorting will order things consistently by address range first, but
224 /// then followed by increasing levels of debug info like inline information
225 /// and line tables. We might end up with a FunctionInfo from debug info that
226 /// will have the same range as one from the symbol table, but we want to
227 /// quickly be able to sort and use the best version when creating the final
228 /// GSYM file. This function compares the inline information as we have seen
229 /// cases where LTO can generate a wide array of differing inline information,
230 /// mostly due to messing up the address ranges for inlined functions, so the
231 /// inline information with the most entries will appeear last. If the inline
232 /// information match, either by both function infos not having any or both
233 /// being exactly the same, we will then compare line tables. Comparing line
234 /// tables allows the entry with the most line entries to appear last. This
235 /// ensures we are able to save the FunctionInfo with the most debug info into
236 /// the GSYM file.
237 inline bool operator<(const FunctionInfo &LHS, const FunctionInfo &RHS) {
238   // First sort by address range
239   return std::tie(LHS.Range, LHS.Inline, LHS.OptLineTable) <
240          std::tie(RHS.Range, RHS.Inline, RHS.OptLineTable);
241 }
242 
243 LLVM_ABI raw_ostream &operator<<(raw_ostream &OS, const FunctionInfo &R);
244 
245 } // namespace gsym
246 } // namespace llvm
247 
248 #endif // LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H
249