xref: /freebsd/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- DWARFDebugLine.h -----------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_DEBUGINFO_DWARF_DWARFDEBUGLINE_H
10 #define LLVM_DEBUGINFO_DWARF_DWARFDEBUGLINE_H
11 
12 #include "llvm/ADT/StringRef.h"
13 #include "llvm/BinaryFormat/Dwarf.h"
14 #include "llvm/DebugInfo/DIContext.h"
15 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
16 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
17 #include "llvm/Support/Compiler.h"
18 #include "llvm/Support/MD5.h"
19 #include "llvm/Support/Path.h"
20 #include <cstdint>
21 #include <map>
22 #include <string>
23 #include <vector>
24 
25 namespace llvm {
26 
27 class raw_ostream;
28 
29 class DWARFDebugLine {
30 public:
31   struct FileNameEntry {
32     FileNameEntry() = default;
33 
34     DWARFFormValue Name;
35     uint64_t DirIdx = 0;
36     uint64_t ModTime = 0;
37     uint64_t Length = 0;
38     MD5::MD5Result Checksum;
39     DWARFFormValue Source;
40   };
41 
42   /// Tracks which optional content types are present in a DWARF file name
43   /// entry format.
44   struct ContentTypeTracker {
45     ContentTypeTracker() = default;
46 
47     /// Whether filename entries provide a modification timestamp.
48     bool HasModTime = false;
49     /// Whether filename entries provide a file size.
50     bool HasLength = false;
51     /// For v5, whether filename entries provide an MD5 checksum.
52     bool HasMD5 = false;
53     /// For v5, whether filename entries provide source text.
54     bool HasSource = false;
55 
56     /// Update tracked content types with \p ContentType.
57     LLVM_ABI void trackContentType(dwarf::LineNumberEntryFormat ContentType);
58   };
59 
60   struct Prologue {
61     LLVM_ABI Prologue();
62 
63     /// The size in bytes of the statement information for this compilation unit
64     /// (not including the total_length field itself).
65     uint64_t TotalLength;
66     /// Version, address size (starting in v5), and DWARF32/64 format; these
67     /// parameters affect interpretation of forms (used in the directory and
68     /// file tables starting with v5).
69     dwarf::FormParams FormParams;
70     /// The number of bytes following the prologue_length field to the beginning
71     /// of the first byte of the statement program itself.
72     uint64_t PrologueLength;
73     /// In v5, size in bytes of a segment selector.
74     uint8_t SegSelectorSize;
75     /// The size in bytes of the smallest target machine instruction. Statement
76     /// program opcodes that alter the address register first multiply their
77     /// operands by this value.
78     uint8_t MinInstLength;
79     /// The maximum number of individual operations that may be encoded in an
80     /// instruction.
81     uint8_t MaxOpsPerInst;
82     /// The initial value of theis_stmtregister.
83     uint8_t DefaultIsStmt;
84     /// This parameter affects the meaning of the special opcodes. See below.
85     int8_t LineBase;
86     /// This parameter affects the meaning of the special opcodes. See below.
87     uint8_t LineRange;
88     /// The number assigned to the first special opcode.
89     uint8_t OpcodeBase;
90     /// This tracks which optional file format content types are present.
91     ContentTypeTracker ContentTypes;
92     std::vector<uint8_t> StandardOpcodeLengths;
93     std::vector<DWARFFormValue> IncludeDirectories;
94     std::vector<FileNameEntry> FileNames;
95 
getFormParamsPrologue96     const dwarf::FormParams getFormParams() const { return FormParams; }
getVersionPrologue97     uint16_t getVersion() const { return FormParams.Version; }
getAddressSizePrologue98     uint8_t getAddressSize() const { return FormParams.AddrSize; }
isDWARF64Prologue99     bool isDWARF64() const { return FormParams.Format == dwarf::DWARF64; }
100 
sizeofTotalLengthPrologue101     uint32_t sizeofTotalLength() const { return isDWARF64() ? 12 : 4; }
102 
sizeofPrologueLengthPrologue103     uint32_t sizeofPrologueLength() const { return isDWARF64() ? 8 : 4; }
104 
105     LLVM_ABI bool totalLengthIsValid() const;
106 
107     /// Length of the prologue in bytes.
108     LLVM_ABI uint64_t getLength() const;
109 
110     /// Get DWARF-version aware access to the file name entry at the provided
111     /// index.
112     LLVM_ABI const llvm::DWARFDebugLine::FileNameEntry &
113     getFileNameEntry(uint64_t Index) const;
114 
115     LLVM_ABI bool hasFileAtIndex(uint64_t FileIndex) const;
116 
117     LLVM_ABI std::optional<uint64_t> getLastValidFileIndex() const;
118 
119     LLVM_ABI bool
120     getFileNameByIndex(uint64_t FileIndex, StringRef CompDir,
121                        DILineInfoSpecifier::FileLineInfoKind Kind,
122                        std::string &Result,
123                        sys::path::Style Style = sys::path::Style::native) const;
124 
125     LLVM_ABI void clear();
126     LLVM_ABI void dump(raw_ostream &OS, DIDumpOptions DumpOptions) const;
127     LLVM_ABI Error parse(DWARFDataExtractor Data, uint64_t *OffsetPtr,
128                          function_ref<void(Error)> RecoverableErrorHandler,
129                          const DWARFContext &Ctx, const DWARFUnit *U = nullptr);
130   };
131 
132   /// Standard .debug_line state machine structure.
133   struct Row {
134     LLVM_ABI explicit Row(bool DefaultIsStmt = false);
135 
136     /// Called after a row is appended to the matrix.
137     LLVM_ABI void postAppend();
138     LLVM_ABI void reset(bool DefaultIsStmt);
139     LLVM_ABI void dump(raw_ostream &OS) const;
140 
141     LLVM_ABI static void dumpTableHeader(raw_ostream &OS, unsigned Indent);
142 
orderByAddressRow143     static bool orderByAddress(const Row &LHS, const Row &RHS) {
144       return std::tie(LHS.Address.SectionIndex, LHS.Address.Address) <
145              std::tie(RHS.Address.SectionIndex, RHS.Address.Address);
146     }
147 
148     /// The program-counter value corresponding to a machine instruction
149     /// generated by the compiler and section index pointing to the section
150     /// containg this PC. If relocation information is present then section
151     /// index is the index of the section which contains above address.
152     /// Otherwise this is object::SectionedAddress::Undef value.
153     object::SectionedAddress Address;
154     /// An unsigned integer indicating a source line number. Lines are numbered
155     /// beginning at 1. The compiler may emit the value 0 in cases where an
156     /// instruction cannot be attributed to any source line.
157     uint32_t Line;
158     /// An unsigned integer indicating a column number within a source line.
159     /// Columns are numbered beginning at 1. The value 0 is reserved to indicate
160     /// that a statement begins at the 'left edge' of the line.
161     uint16_t Column;
162     /// An unsigned integer indicating the identity of the source file
163     /// corresponding to a machine instruction.
164     uint16_t File;
165     /// An unsigned integer representing the DWARF path discriminator value
166     /// for this location.
167     uint32_t Discriminator;
168     /// An unsigned integer whose value encodes the applicable instruction set
169     /// architecture for the current instruction.
170     uint8_t Isa;
171     /// An unsigned integer representing the index of an operation within a
172     /// VLIW instruction. The index of the first operation is 0.
173     /// For non-VLIW architectures, this register will always be 0.
174     uint8_t OpIndex;
175     /// A boolean indicating that the current instruction is the beginning of a
176     /// statement.
177     uint8_t IsStmt : 1,
178         /// A boolean indicating that the current instruction is the
179         /// beginning of a basic block.
180         BasicBlock : 1,
181         /// A boolean indicating that the current address is that of the
182         /// first byte after the end of a sequence of target machine
183         /// instructions.
184         EndSequence : 1,
185         /// A boolean indicating that the current address is one (of possibly
186         /// many) where execution should be suspended for an entry breakpoint
187         /// of a function.
188         PrologueEnd : 1,
189         /// A boolean indicating that the current address is one (of possibly
190         /// many) where execution should be suspended for an exit breakpoint
191         /// of a function.
192         EpilogueBegin : 1;
193   };
194 
195   /// Represents a series of contiguous machine instructions. Line table for
196   /// each compilation unit may consist of multiple sequences, which are not
197   /// guaranteed to be in the order of ascending instruction address.
198   struct Sequence {
199     LLVM_ABI Sequence();
200 
201     /// Sequence describes instructions at address range [LowPC, HighPC)
202     /// and is described by line table rows [FirstRowIndex, LastRowIndex).
203     uint64_t LowPC;
204     uint64_t HighPC;
205     /// If relocation information is present then this is the index of the
206     /// section which contains above addresses. Otherwise this is
207     /// object::SectionedAddress::Undef value.
208     uint64_t SectionIndex;
209     unsigned FirstRowIndex;
210     unsigned LastRowIndex;
211     bool Empty;
212 
213     /// The offset into the line table where this sequence begins
214     uint64_t StmtSeqOffset = UINT64_MAX;
215 
216     LLVM_ABI void reset();
217 
orderByHighPCSequence218     static bool orderByHighPC(const Sequence &LHS, const Sequence &RHS) {
219       return std::tie(LHS.SectionIndex, LHS.HighPC) <
220              std::tie(RHS.SectionIndex, RHS.HighPC);
221     }
222 
isValidSequence223     bool isValid() const {
224       return !Empty && (LowPC < HighPC) && (FirstRowIndex < LastRowIndex);
225     }
226 
containsPCSequence227     bool containsPC(object::SectionedAddress PC) const {
228       return SectionIndex == PC.SectionIndex &&
229              (LowPC <= PC.Address && PC.Address < HighPC);
230     }
231   };
232 
233   struct LineTable {
234     LLVM_ABI LineTable();
235 
236     /// Represents an invalid row
237     const uint32_t UnknownRowIndex = UINT32_MAX;
238 
appendRowLineTable239     void appendRow(const DWARFDebugLine::Row &R) { Rows.push_back(R); }
240 
appendSequenceLineTable241     void appendSequence(const DWARFDebugLine::Sequence &S) {
242       Sequences.push_back(S);
243     }
244 
245     /// Returns the index of the row with file/line info for a given address,
246     /// or UnknownRowIndex if there is no such row.
247     LLVM_ABI uint32_t lookupAddress(object::SectionedAddress Address,
248                                     bool *IsApproximateLine = nullptr) const;
249 
250     /// Fills the Result argument with the indices of the rows that correspond
251     /// to the address range specified by \p Address and \p Size.
252     ///
253     /// \param Address - The starting address of the range.
254     /// \param Size - The size of the address range.
255     /// \param Result - The vector to fill with row indices.
256     /// \param StmtSequenceOffset - if provided, only rows from the sequence
257     /// starting at the matching offset will be added to the result.
258     ///
259     /// Returns true if any rows were found.
260     LLVM_ABI bool lookupAddressRange(
261         object::SectionedAddress Address, uint64_t Size,
262         std::vector<uint32_t> &Result,
263         std::optional<uint64_t> StmtSequenceOffset = std::nullopt) const;
264 
hasFileAtIndexLineTable265     bool hasFileAtIndex(uint64_t FileIndex) const {
266       return Prologue.hasFileAtIndex(FileIndex);
267     }
268 
getLastValidFileIndexLineTable269     std::optional<uint64_t> getLastValidFileIndex() const {
270       return Prologue.getLastValidFileIndex();
271     }
272 
273     /// Extracts filename by its index in filename table in prologue.
274     /// In Dwarf 4, the files are 1-indexed and the current compilation file
275     /// name is not represented in the list. In DWARF v5, the files are
276     /// 0-indexed and the primary source file has the index 0.
277     /// Returns true on success.
getFileNameByIndexLineTable278     bool getFileNameByIndex(uint64_t FileIndex, StringRef CompDir,
279                             DILineInfoSpecifier::FileLineInfoKind Kind,
280                             std::string &Result) const {
281       return Prologue.getFileNameByIndex(FileIndex, CompDir, Kind, Result);
282     }
283 
284     /// Fills the Result argument with the file and line information
285     /// corresponding to Address. Returns true on success.
286     LLVM_ABI bool getFileLineInfoForAddress(
287         object::SectionedAddress Address, bool Approximate, const char *CompDir,
288         DILineInfoSpecifier::FileLineInfoKind Kind, DILineInfo &Result) const;
289 
290     /// Extracts directory name by its Entry in include directories table
291     /// in prologue. Returns true on success.
292     LLVM_ABI bool getDirectoryForEntry(const FileNameEntry &Entry,
293                                        std::string &Directory) const;
294 
295     LLVM_ABI void dump(raw_ostream &OS, DIDumpOptions DumpOptions) const;
296     LLVM_ABI void clear();
297 
298     /// Parse prologue and all rows.
299     LLVM_ABI Error parse(DWARFDataExtractor &DebugLineData, uint64_t *OffsetPtr,
300                          const DWARFContext &Ctx, const DWARFUnit *U,
301                          function_ref<void(Error)> RecoverableErrorHandler,
302                          raw_ostream *OS = nullptr, bool Verbose = false);
303 
304     using RowVector = std::vector<Row>;
305     using RowIter = RowVector::const_iterator;
306     using SequenceVector = std::vector<Sequence>;
307     using SequenceIter = SequenceVector::const_iterator;
308 
309     struct Prologue Prologue;
310     RowVector Rows;
311     SequenceVector Sequences;
312 
313   private:
314     uint32_t findRowInSeq(const DWARFDebugLine::Sequence &Seq,
315                           object::SectionedAddress Address) const;
316     std::optional<StringRef>
317     getSourceByIndex(uint64_t FileIndex,
318                      DILineInfoSpecifier::FileLineInfoKind Kind) const;
319 
320     uint32_t lookupAddressImpl(object::SectionedAddress Address,
321                                bool *IsApproximateLine = nullptr) const;
322 
323     /// Fills the Result argument with the indices of the rows that correspond
324     /// to the address range specified by \p Address and \p Size.
325     ///
326     /// \param Address - The starting address of the range.
327     /// \param Size - The size of the address range.
328     /// \param Result - The vector to fill with row indices.
329     /// \param StmtSequenceOffset - if provided, only rows from the sequence
330     /// starting at the matching offset will be added to the result.
331     ///
332     /// Returns true if any rows were found.
333     bool
334     lookupAddressRangeImpl(object::SectionedAddress Address, uint64_t Size,
335                            std::vector<uint32_t> &Result,
336                            std::optional<uint64_t> StmtSequenceOffset) const;
337   };
338 
339   LLVM_ABI const LineTable *getLineTable(uint64_t Offset) const;
340   LLVM_ABI Expected<const LineTable *>
341   getOrParseLineTable(DWARFDataExtractor &DebugLineData, uint64_t Offset,
342                       const DWARFContext &Ctx, const DWARFUnit *U,
343                       function_ref<void(Error)> RecoverableErrorHandler);
344   LLVM_ABI void clearLineTable(uint64_t Offset);
345 
346   /// Helper to allow for parsing of an entire .debug_line section in sequence.
347   class SectionParser {
348   public:
349     using LineToUnitMap = std::map<uint64_t, DWARFUnit *>;
350 
351     LLVM_ABI SectionParser(DWARFDataExtractor &Data, const DWARFContext &C,
352                            DWARFUnitVector::iterator_range Units);
353 
354     /// Get the next line table from the section. Report any issues via the
355     /// handlers.
356     ///
357     /// \param RecoverableErrorHandler - any issues that don't prevent further
358     /// parsing of the table will be reported through this handler.
359     /// \param UnrecoverableErrorHandler - any issues that prevent further
360     /// parsing of the table will be reported through this handler.
361     /// \param OS - if not null, the parser will print information about the
362     /// table as it parses it.
363     /// \param Verbose - if true, the parser will print verbose information when
364     /// printing to the output.
365     LLVM_ABI LineTable
366     parseNext(function_ref<void(Error)> RecoverableErrorHandler,
367               function_ref<void(Error)> UnrecoverableErrorHandler,
368               raw_ostream *OS = nullptr, bool Verbose = false);
369 
370     /// Skip the current line table and go to the following line table (if
371     /// present) immediately.
372     ///
373     /// \param RecoverableErrorHandler - report any recoverable prologue
374     /// parsing issues via this handler.
375     /// \param UnrecoverableErrorHandler - report any unrecoverable prologue
376     /// parsing issues via this handler.
377     LLVM_ABI void skip(function_ref<void(Error)> RecoverableErrorHandler,
378                        function_ref<void(Error)> UnrecoverableErrorHandler);
379 
380     /// Indicates if the parser has parsed as much as possible.
381     ///
382     /// \note Certain problems with the line table structure might mean that
383     /// parsing stops before the end of the section is reached.
done()384     bool done() const { return Done; }
385 
386     /// Get the offset the parser has reached.
getOffset()387     uint64_t getOffset() const { return Offset; }
388 
389   private:
390     DWARFUnit *prepareToParse(uint64_t Offset);
391     void moveToNextTable(uint64_t OldOffset, const Prologue &P);
392     bool hasValidVersion(uint64_t Offset);
393 
394     LineToUnitMap LineToUnit;
395 
396     DWARFDataExtractor &DebugLineData;
397     const DWARFContext &Context;
398     uint64_t Offset = 0;
399     bool Done = false;
400   };
401 
402 private:
403   struct ParsingState {
404     LLVM_ABI ParsingState(struct LineTable *LT, uint64_t TableOffset,
405                           function_ref<void(Error)> ErrorHandler);
406 
407     LLVM_ABI void resetRowAndSequence(uint64_t Offset);
408     LLVM_ABI void appendRowToMatrix();
409 
410     struct AddrOpIndexDelta {
411       uint64_t AddrOffset;
412       int16_t OpIndexDelta;
413     };
414 
415     /// Advance the address and op-index by the \p OperationAdvance value.
416     /// \returns the amount advanced by.
417     LLVM_ABI AddrOpIndexDelta advanceAddrOpIndex(uint64_t OperationAdvance,
418                                                  uint8_t Opcode,
419                                                  uint64_t OpcodeOffset);
420 
421     struct OpcodeAdvanceResults {
422       uint64_t AddrDelta;
423       int16_t OpIndexDelta;
424       uint8_t AdjustedOpcode;
425     };
426 
427     /// Advance the address and op-index as required by the specified \p Opcode.
428     /// \returns the amount advanced by and the calculated adjusted opcode.
429     LLVM_ABI OpcodeAdvanceResults advanceForOpcode(uint8_t Opcode,
430                                                    uint64_t OpcodeOffset);
431 
432     struct SpecialOpcodeDelta {
433       uint64_t Address;
434       int32_t Line;
435       int16_t OpIndex;
436     };
437 
438     /// Advance the line, address and op-index as required by the specified
439     /// special \p Opcode. \returns the address, op-index and line delta.
440     LLVM_ABI SpecialOpcodeDelta handleSpecialOpcode(uint8_t Opcode,
441                                                     uint64_t OpcodeOffset);
442 
443     /// Line table we're currently parsing.
444     struct LineTable *LineTable;
445     struct Row Row;
446     struct Sequence Sequence;
447 
448   private:
449     uint64_t LineTableOffset;
450 
451     bool ReportAdvanceAddrProblem = true;
452     bool ReportBadLineRange = true;
453     function_ref<void(Error)> ErrorHandler;
454   };
455 
456   using LineTableMapTy = std::map<uint64_t, LineTable>;
457   using LineTableIter = LineTableMapTy::iterator;
458   using LineTableConstIter = LineTableMapTy::const_iterator;
459 
460   LineTableMapTy LineTableMap;
461 };
462 
463 } // end namespace llvm
464 
465 #endif // LLVM_DEBUGINFO_DWARF_DWARFDEBUGLINE_H
466