xref: /freebsd/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h (revision bdd1243df58e60e85101c09001d9812a789b6bc4)
1 //===- DWARFDebugLine.h -----------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_DEBUGINFO_DWARF_DWARFDEBUGLINE_H
10 #define LLVM_DEBUGINFO_DWARF_DWARFDEBUGLINE_H
11 
12 #include "llvm/ADT/StringRef.h"
13 #include "llvm/BinaryFormat/Dwarf.h"
14 #include "llvm/DebugInfo/DIContext.h"
15 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
16 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
17 #include "llvm/Support/MD5.h"
18 #include "llvm/Support/Path.h"
19 #include <cstdint>
20 #include <map>
21 #include <string>
22 #include <vector>
23 
24 namespace llvm {
25 
26 class raw_ostream;
27 
28 class DWARFDebugLine {
29 public:
30   struct FileNameEntry {
31     FileNameEntry() = default;
32 
33     DWARFFormValue Name;
34     uint64_t DirIdx = 0;
35     uint64_t ModTime = 0;
36     uint64_t Length = 0;
37     MD5::MD5Result Checksum;
38     DWARFFormValue Source;
39   };
40 
41   /// Tracks which optional content types are present in a DWARF file name
42   /// entry format.
43   struct ContentTypeTracker {
44     ContentTypeTracker() = default;
45 
46     /// Whether filename entries provide a modification timestamp.
47     bool HasModTime = false;
48     /// Whether filename entries provide a file size.
49     bool HasLength = false;
50     /// For v5, whether filename entries provide an MD5 checksum.
51     bool HasMD5 = false;
52     /// For v5, whether filename entries provide source text.
53     bool HasSource = false;
54 
55     /// Update tracked content types with \p ContentType.
56     void trackContentType(dwarf::LineNumberEntryFormat ContentType);
57   };
58 
59   struct Prologue {
60     Prologue();
61 
62     /// The size in bytes of the statement information for this compilation unit
63     /// (not including the total_length field itself).
64     uint64_t TotalLength;
65     /// Version, address size (starting in v5), and DWARF32/64 format; these
66     /// parameters affect interpretation of forms (used in the directory and
67     /// file tables starting with v5).
68     dwarf::FormParams FormParams;
69     /// The number of bytes following the prologue_length field to the beginning
70     /// of the first byte of the statement program itself.
71     uint64_t PrologueLength;
72     /// In v5, size in bytes of a segment selector.
73     uint8_t SegSelectorSize;
74     /// The size in bytes of the smallest target machine instruction. Statement
75     /// program opcodes that alter the address register first multiply their
76     /// operands by this value.
77     uint8_t MinInstLength;
78     /// The maximum number of individual operations that may be encoded in an
79     /// instruction.
80     uint8_t MaxOpsPerInst;
81     /// The initial value of theis_stmtregister.
82     uint8_t DefaultIsStmt;
83     /// This parameter affects the meaning of the special opcodes. See below.
84     int8_t LineBase;
85     /// This parameter affects the meaning of the special opcodes. See below.
86     uint8_t LineRange;
87     /// The number assigned to the first special opcode.
88     uint8_t OpcodeBase;
89     /// This tracks which optional file format content types are present.
90     ContentTypeTracker ContentTypes;
91     std::vector<uint8_t> StandardOpcodeLengths;
92     std::vector<DWARFFormValue> IncludeDirectories;
93     std::vector<FileNameEntry> FileNames;
94 
95     const dwarf::FormParams getFormParams() const { return FormParams; }
96     uint16_t getVersion() const { return FormParams.Version; }
97     uint8_t getAddressSize() const { return FormParams.AddrSize; }
98     bool isDWARF64() const { return FormParams.Format == dwarf::DWARF64; }
99 
100     uint32_t sizeofTotalLength() const { return isDWARF64() ? 12 : 4; }
101 
102     uint32_t sizeofPrologueLength() const { return isDWARF64() ? 8 : 4; }
103 
104     bool totalLengthIsValid() const;
105 
106     /// Length of the prologue in bytes.
107     uint64_t getLength() const;
108 
109     /// Get DWARF-version aware access to the file name entry at the provided
110     /// index.
111     const llvm::DWARFDebugLine::FileNameEntry &
112     getFileNameEntry(uint64_t Index) const;
113 
114     bool hasFileAtIndex(uint64_t FileIndex) const;
115 
116     std::optional<uint64_t> getLastValidFileIndex() const;
117 
118     bool
119     getFileNameByIndex(uint64_t FileIndex, StringRef CompDir,
120                        DILineInfoSpecifier::FileLineInfoKind Kind,
121                        std::string &Result,
122                        sys::path::Style Style = sys::path::Style::native) const;
123 
124     void clear();
125     void dump(raw_ostream &OS, DIDumpOptions DumpOptions) const;
126     Error parse(DWARFDataExtractor Data, uint64_t *OffsetPtr,
127                 function_ref<void(Error)> RecoverableErrorHandler,
128                 const DWARFContext &Ctx, const DWARFUnit *U = nullptr);
129   };
130 
131   /// Standard .debug_line state machine structure.
132   struct Row {
133     explicit Row(bool DefaultIsStmt = false);
134 
135     /// Called after a row is appended to the matrix.
136     void postAppend();
137     void reset(bool DefaultIsStmt);
138     void dump(raw_ostream &OS) const;
139 
140     static void dumpTableHeader(raw_ostream &OS, unsigned Indent);
141 
142     static bool orderByAddress(const Row &LHS, const Row &RHS) {
143       return std::tie(LHS.Address.SectionIndex, LHS.Address.Address) <
144              std::tie(RHS.Address.SectionIndex, RHS.Address.Address);
145     }
146 
147     /// The program-counter value corresponding to a machine instruction
148     /// generated by the compiler and section index pointing to the section
149     /// containg this PC. If relocation information is present then section
150     /// index is the index of the section which contains above address.
151     /// Otherwise this is object::SectionedAddress::Undef value.
152     object::SectionedAddress Address;
153     /// An unsigned integer indicating a source line number. Lines are numbered
154     /// beginning at 1. The compiler may emit the value 0 in cases where an
155     /// instruction cannot be attributed to any source line.
156     uint32_t Line;
157     /// An unsigned integer indicating a column number within a source line.
158     /// Columns are numbered beginning at 1. The value 0 is reserved to indicate
159     /// that a statement begins at the 'left edge' of the line.
160     uint16_t Column;
161     /// An unsigned integer indicating the identity of the source file
162     /// corresponding to a machine instruction.
163     uint16_t File;
164     /// An unsigned integer representing the DWARF path discriminator value
165     /// for this location.
166     uint32_t Discriminator;
167     /// An unsigned integer whose value encodes the applicable instruction set
168     /// architecture for the current instruction.
169     uint8_t Isa;
170     /// A boolean indicating that the current instruction is the beginning of a
171     /// statement.
172     uint8_t IsStmt : 1,
173         /// A boolean indicating that the current instruction is the
174         /// beginning of a basic block.
175         BasicBlock : 1,
176         /// A boolean indicating that the current address is that of the
177         /// first byte after the end of a sequence of target machine
178         /// instructions.
179         EndSequence : 1,
180         /// A boolean indicating that the current address is one (of possibly
181         /// many) where execution should be suspended for an entry breakpoint
182         /// of a function.
183         PrologueEnd : 1,
184         /// A boolean indicating that the current address is one (of possibly
185         /// many) where execution should be suspended for an exit breakpoint
186         /// of a function.
187         EpilogueBegin : 1;
188   };
189 
190   /// Represents a series of contiguous machine instructions. Line table for
191   /// each compilation unit may consist of multiple sequences, which are not
192   /// guaranteed to be in the order of ascending instruction address.
193   struct Sequence {
194     Sequence();
195 
196     /// Sequence describes instructions at address range [LowPC, HighPC)
197     /// and is described by line table rows [FirstRowIndex, LastRowIndex).
198     uint64_t LowPC;
199     uint64_t HighPC;
200     /// If relocation information is present then this is the index of the
201     /// section which contains above addresses. Otherwise this is
202     /// object::SectionedAddress::Undef value.
203     uint64_t SectionIndex;
204     unsigned FirstRowIndex;
205     unsigned LastRowIndex;
206     bool Empty;
207 
208     void reset();
209 
210     static bool orderByHighPC(const Sequence &LHS, const Sequence &RHS) {
211       return std::tie(LHS.SectionIndex, LHS.HighPC) <
212              std::tie(RHS.SectionIndex, RHS.HighPC);
213     }
214 
215     bool isValid() const {
216       return !Empty && (LowPC < HighPC) && (FirstRowIndex < LastRowIndex);
217     }
218 
219     bool containsPC(object::SectionedAddress PC) const {
220       return SectionIndex == PC.SectionIndex &&
221              (LowPC <= PC.Address && PC.Address < HighPC);
222     }
223   };
224 
225   struct LineTable {
226     LineTable();
227 
228     /// Represents an invalid row
229     const uint32_t UnknownRowIndex = UINT32_MAX;
230 
231     void appendRow(const DWARFDebugLine::Row &R) { Rows.push_back(R); }
232 
233     void appendSequence(const DWARFDebugLine::Sequence &S) {
234       Sequences.push_back(S);
235     }
236 
237     /// Returns the index of the row with file/line info for a given address,
238     /// or UnknownRowIndex if there is no such row.
239     uint32_t lookupAddress(object::SectionedAddress Address) const;
240 
241     bool lookupAddressRange(object::SectionedAddress Address, uint64_t Size,
242                             std::vector<uint32_t> &Result) const;
243 
244     bool hasFileAtIndex(uint64_t FileIndex) const {
245       return Prologue.hasFileAtIndex(FileIndex);
246     }
247 
248     std::optional<uint64_t> getLastValidFileIndex() const {
249       return Prologue.getLastValidFileIndex();
250     }
251 
252     /// Extracts filename by its index in filename table in prologue.
253     /// In Dwarf 4, the files are 1-indexed and the current compilation file
254     /// name is not represented in the list. In DWARF v5, the files are
255     /// 0-indexed and the primary source file has the index 0.
256     /// Returns true on success.
257     bool getFileNameByIndex(uint64_t FileIndex, StringRef CompDir,
258                             DILineInfoSpecifier::FileLineInfoKind Kind,
259                             std::string &Result) const {
260       return Prologue.getFileNameByIndex(FileIndex, CompDir, Kind, Result);
261     }
262 
263     /// Fills the Result argument with the file and line information
264     /// corresponding to Address. Returns true on success.
265     bool getFileLineInfoForAddress(object::SectionedAddress Address,
266                                    const char *CompDir,
267                                    DILineInfoSpecifier::FileLineInfoKind Kind,
268                                    DILineInfo &Result) const;
269 
270     /// Extracts directory name by its Entry in include directories table
271     /// in prologue. Returns true on success.
272     bool getDirectoryForEntry(const FileNameEntry &Entry,
273                               std::string &Directory) const;
274 
275     void dump(raw_ostream &OS, DIDumpOptions DumpOptions) const;
276     void clear();
277 
278     /// Parse prologue and all rows.
279     Error parse(DWARFDataExtractor &DebugLineData, uint64_t *OffsetPtr,
280                 const DWARFContext &Ctx, const DWARFUnit *U,
281                 function_ref<void(Error)> RecoverableErrorHandler,
282                 raw_ostream *OS = nullptr, bool Verbose = false);
283 
284     using RowVector = std::vector<Row>;
285     using RowIter = RowVector::const_iterator;
286     using SequenceVector = std::vector<Sequence>;
287     using SequenceIter = SequenceVector::const_iterator;
288 
289     struct Prologue Prologue;
290     RowVector Rows;
291     SequenceVector Sequences;
292 
293   private:
294     uint32_t findRowInSeq(const DWARFDebugLine::Sequence &Seq,
295                           object::SectionedAddress Address) const;
296     std::optional<StringRef>
297     getSourceByIndex(uint64_t FileIndex,
298                      DILineInfoSpecifier::FileLineInfoKind Kind) const;
299 
300     uint32_t lookupAddressImpl(object::SectionedAddress Address) const;
301 
302     bool lookupAddressRangeImpl(object::SectionedAddress Address, uint64_t Size,
303                                 std::vector<uint32_t> &Result) const;
304   };
305 
306   const LineTable *getLineTable(uint64_t Offset) const;
307   Expected<const LineTable *>
308   getOrParseLineTable(DWARFDataExtractor &DebugLineData, uint64_t Offset,
309                       const DWARFContext &Ctx, const DWARFUnit *U,
310                       function_ref<void(Error)> RecoverableErrorHandler);
311   void clearLineTable(uint64_t Offset);
312 
313   /// Helper to allow for parsing of an entire .debug_line section in sequence.
314   class SectionParser {
315   public:
316     using LineToUnitMap = std::map<uint64_t, DWARFUnit *>;
317 
318     SectionParser(DWARFDataExtractor &Data, const DWARFContext &C,
319                   DWARFUnitVector::iterator_range Units);
320 
321     /// Get the next line table from the section. Report any issues via the
322     /// handlers.
323     ///
324     /// \param RecoverableErrorHandler - any issues that don't prevent further
325     /// parsing of the table will be reported through this handler.
326     /// \param UnrecoverableErrorHandler - any issues that prevent further
327     /// parsing of the table will be reported through this handler.
328     /// \param OS - if not null, the parser will print information about the
329     /// table as it parses it.
330     /// \param Verbose - if true, the parser will print verbose information when
331     /// printing to the output.
332     LineTable parseNext(function_ref<void(Error)> RecoverableErrorHandler,
333                         function_ref<void(Error)> UnrecoverableErrorHandler,
334                         raw_ostream *OS = nullptr, bool Verbose = false);
335 
336     /// Skip the current line table and go to the following line table (if
337     /// present) immediately.
338     ///
339     /// \param RecoverableErrorHandler - report any recoverable prologue
340     /// parsing issues via this handler.
341     /// \param UnrecoverableErrorHandler - report any unrecoverable prologue
342     /// parsing issues via this handler.
343     void skip(function_ref<void(Error)> RecoverableErrorHandler,
344               function_ref<void(Error)> UnrecoverableErrorHandler);
345 
346     /// Indicates if the parser has parsed as much as possible.
347     ///
348     /// \note Certain problems with the line table structure might mean that
349     /// parsing stops before the end of the section is reached.
350     bool done() const { return Done; }
351 
352     /// Get the offset the parser has reached.
353     uint64_t getOffset() const { return Offset; }
354 
355   private:
356     DWARFUnit *prepareToParse(uint64_t Offset);
357     void moveToNextTable(uint64_t OldOffset, const Prologue &P);
358 
359     LineToUnitMap LineToUnit;
360 
361     DWARFDataExtractor &DebugLineData;
362     const DWARFContext &Context;
363     uint64_t Offset = 0;
364     bool Done = false;
365   };
366 
367 private:
368   struct ParsingState {
369     ParsingState(struct LineTable *LT, uint64_t TableOffset,
370                  function_ref<void(Error)> ErrorHandler);
371 
372     void resetRowAndSequence();
373     void appendRowToMatrix();
374 
375     /// Advance the address by the \p OperationAdvance value. \returns the
376     /// amount advanced by.
377     uint64_t advanceAddr(uint64_t OperationAdvance, uint8_t Opcode,
378                          uint64_t OpcodeOffset);
379 
380     struct AddrAndAdjustedOpcode {
381       uint64_t AddrDelta;
382       uint8_t AdjustedOpcode;
383     };
384 
385     /// Advance the address as required by the specified \p Opcode.
386     /// \returns the amount advanced by and the calculated adjusted opcode.
387     AddrAndAdjustedOpcode advanceAddrForOpcode(uint8_t Opcode,
388                                                uint64_t OpcodeOffset);
389 
390     struct AddrAndLineDelta {
391       uint64_t Address;
392       int32_t Line;
393     };
394 
395     /// Advance the line and address as required by the specified special \p
396     /// Opcode. \returns the address and line delta.
397     AddrAndLineDelta handleSpecialOpcode(uint8_t Opcode, uint64_t OpcodeOffset);
398 
399     /// Line table we're currently parsing.
400     struct LineTable *LineTable;
401     struct Row Row;
402     struct Sequence Sequence;
403 
404   private:
405     uint64_t LineTableOffset;
406 
407     bool ReportAdvanceAddrProblem = true;
408     bool ReportBadLineRange = true;
409     function_ref<void(Error)> ErrorHandler;
410   };
411 
412   using LineTableMapTy = std::map<uint64_t, LineTable>;
413   using LineTableIter = LineTableMapTy::iterator;
414   using LineTableConstIter = LineTableMapTy::const_iterator;
415 
416   LineTableMapTy LineTableMap;
417 };
418 
419 } // end namespace llvm
420 
421 #endif // LLVM_DEBUGINFO_DWARF_DWARFDEBUGLINE_H
422