1 //===- DWARFDebugLine.h -----------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_DEBUGINFO_DWARF_DWARFDEBUGLINE_H 10 #define LLVM_DEBUGINFO_DWARF_DWARFDEBUGLINE_H 11 12 #include "llvm/ADT/StringRef.h" 13 #include "llvm/BinaryFormat/Dwarf.h" 14 #include "llvm/DebugInfo/DIContext.h" 15 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" 16 #include "llvm/DebugInfo/DWARF/DWARFUnit.h" 17 #include "llvm/Support/Compiler.h" 18 #include "llvm/Support/MD5.h" 19 #include "llvm/Support/Path.h" 20 #include <cstdint> 21 #include <map> 22 #include <string> 23 #include <vector> 24 25 namespace llvm { 26 27 class raw_ostream; 28 29 class DWARFDebugLine { 30 public: 31 struct FileNameEntry { 32 FileNameEntry() = default; 33 34 DWARFFormValue Name; 35 uint64_t DirIdx = 0; 36 uint64_t ModTime = 0; 37 uint64_t Length = 0; 38 MD5::MD5Result Checksum; 39 DWARFFormValue Source; 40 }; 41 42 /// Tracks which optional content types are present in a DWARF file name 43 /// entry format. 44 struct ContentTypeTracker { 45 ContentTypeTracker() = default; 46 47 /// Whether filename entries provide a modification timestamp. 48 bool HasModTime = false; 49 /// Whether filename entries provide a file size. 50 bool HasLength = false; 51 /// For v5, whether filename entries provide an MD5 checksum. 52 bool HasMD5 = false; 53 /// For v5, whether filename entries provide source text. 54 bool HasSource = false; 55 56 /// Update tracked content types with \p ContentType. 57 LLVM_ABI void trackContentType(dwarf::LineNumberEntryFormat ContentType); 58 }; 59 60 struct Prologue { 61 LLVM_ABI Prologue(); 62 63 /// The size in bytes of the statement information for this compilation unit 64 /// (not including the total_length field itself). 65 uint64_t TotalLength; 66 /// Version, address size (starting in v5), and DWARF32/64 format; these 67 /// parameters affect interpretation of forms (used in the directory and 68 /// file tables starting with v5). 69 dwarf::FormParams FormParams; 70 /// The number of bytes following the prologue_length field to the beginning 71 /// of the first byte of the statement program itself. 72 uint64_t PrologueLength; 73 /// In v5, size in bytes of a segment selector. 74 uint8_t SegSelectorSize; 75 /// The size in bytes of the smallest target machine instruction. Statement 76 /// program opcodes that alter the address register first multiply their 77 /// operands by this value. 78 uint8_t MinInstLength; 79 /// The maximum number of individual operations that may be encoded in an 80 /// instruction. 81 uint8_t MaxOpsPerInst; 82 /// The initial value of theis_stmtregister. 83 uint8_t DefaultIsStmt; 84 /// This parameter affects the meaning of the special opcodes. See below. 85 int8_t LineBase; 86 /// This parameter affects the meaning of the special opcodes. See below. 87 uint8_t LineRange; 88 /// The number assigned to the first special opcode. 89 uint8_t OpcodeBase; 90 /// This tracks which optional file format content types are present. 91 ContentTypeTracker ContentTypes; 92 std::vector<uint8_t> StandardOpcodeLengths; 93 std::vector<DWARFFormValue> IncludeDirectories; 94 std::vector<FileNameEntry> FileNames; 95 getFormParamsPrologue96 const dwarf::FormParams getFormParams() const { return FormParams; } getVersionPrologue97 uint16_t getVersion() const { return FormParams.Version; } getAddressSizePrologue98 uint8_t getAddressSize() const { return FormParams.AddrSize; } isDWARF64Prologue99 bool isDWARF64() const { return FormParams.Format == dwarf::DWARF64; } 100 sizeofTotalLengthPrologue101 uint32_t sizeofTotalLength() const { return isDWARF64() ? 12 : 4; } 102 sizeofPrologueLengthPrologue103 uint32_t sizeofPrologueLength() const { return isDWARF64() ? 8 : 4; } 104 105 LLVM_ABI bool totalLengthIsValid() const; 106 107 /// Length of the prologue in bytes. 108 LLVM_ABI uint64_t getLength() const; 109 110 /// Get DWARF-version aware access to the file name entry at the provided 111 /// index. 112 LLVM_ABI const llvm::DWARFDebugLine::FileNameEntry & 113 getFileNameEntry(uint64_t Index) const; 114 115 LLVM_ABI bool hasFileAtIndex(uint64_t FileIndex) const; 116 117 LLVM_ABI std::optional<uint64_t> getLastValidFileIndex() const; 118 119 LLVM_ABI bool 120 getFileNameByIndex(uint64_t FileIndex, StringRef CompDir, 121 DILineInfoSpecifier::FileLineInfoKind Kind, 122 std::string &Result, 123 sys::path::Style Style = sys::path::Style::native) const; 124 125 LLVM_ABI void clear(); 126 LLVM_ABI void dump(raw_ostream &OS, DIDumpOptions DumpOptions) const; 127 LLVM_ABI Error parse(DWARFDataExtractor Data, uint64_t *OffsetPtr, 128 function_ref<void(Error)> RecoverableErrorHandler, 129 const DWARFContext &Ctx, const DWARFUnit *U = nullptr); 130 }; 131 132 /// Standard .debug_line state machine structure. 133 struct Row { 134 LLVM_ABI explicit Row(bool DefaultIsStmt = false); 135 136 /// Called after a row is appended to the matrix. 137 LLVM_ABI void postAppend(); 138 LLVM_ABI void reset(bool DefaultIsStmt); 139 LLVM_ABI void dump(raw_ostream &OS) const; 140 141 LLVM_ABI static void dumpTableHeader(raw_ostream &OS, unsigned Indent); 142 orderByAddressRow143 static bool orderByAddress(const Row &LHS, const Row &RHS) { 144 return std::tie(LHS.Address.SectionIndex, LHS.Address.Address) < 145 std::tie(RHS.Address.SectionIndex, RHS.Address.Address); 146 } 147 148 /// The program-counter value corresponding to a machine instruction 149 /// generated by the compiler and section index pointing to the section 150 /// containg this PC. If relocation information is present then section 151 /// index is the index of the section which contains above address. 152 /// Otherwise this is object::SectionedAddress::Undef value. 153 object::SectionedAddress Address; 154 /// An unsigned integer indicating a source line number. Lines are numbered 155 /// beginning at 1. The compiler may emit the value 0 in cases where an 156 /// instruction cannot be attributed to any source line. 157 uint32_t Line; 158 /// An unsigned integer indicating a column number within a source line. 159 /// Columns are numbered beginning at 1. The value 0 is reserved to indicate 160 /// that a statement begins at the 'left edge' of the line. 161 uint16_t Column; 162 /// An unsigned integer indicating the identity of the source file 163 /// corresponding to a machine instruction. 164 uint16_t File; 165 /// An unsigned integer representing the DWARF path discriminator value 166 /// for this location. 167 uint32_t Discriminator; 168 /// An unsigned integer whose value encodes the applicable instruction set 169 /// architecture for the current instruction. 170 uint8_t Isa; 171 /// An unsigned integer representing the index of an operation within a 172 /// VLIW instruction. The index of the first operation is 0. 173 /// For non-VLIW architectures, this register will always be 0. 174 uint8_t OpIndex; 175 /// A boolean indicating that the current instruction is the beginning of a 176 /// statement. 177 uint8_t IsStmt : 1, 178 /// A boolean indicating that the current instruction is the 179 /// beginning of a basic block. 180 BasicBlock : 1, 181 /// A boolean indicating that the current address is that of the 182 /// first byte after the end of a sequence of target machine 183 /// instructions. 184 EndSequence : 1, 185 /// A boolean indicating that the current address is one (of possibly 186 /// many) where execution should be suspended for an entry breakpoint 187 /// of a function. 188 PrologueEnd : 1, 189 /// A boolean indicating that the current address is one (of possibly 190 /// many) where execution should be suspended for an exit breakpoint 191 /// of a function. 192 EpilogueBegin : 1; 193 }; 194 195 /// Represents a series of contiguous machine instructions. Line table for 196 /// each compilation unit may consist of multiple sequences, which are not 197 /// guaranteed to be in the order of ascending instruction address. 198 struct Sequence { 199 LLVM_ABI Sequence(); 200 201 /// Sequence describes instructions at address range [LowPC, HighPC) 202 /// and is described by line table rows [FirstRowIndex, LastRowIndex). 203 uint64_t LowPC; 204 uint64_t HighPC; 205 /// If relocation information is present then this is the index of the 206 /// section which contains above addresses. Otherwise this is 207 /// object::SectionedAddress::Undef value. 208 uint64_t SectionIndex; 209 unsigned FirstRowIndex; 210 unsigned LastRowIndex; 211 bool Empty; 212 213 /// The offset into the line table where this sequence begins 214 uint64_t StmtSeqOffset = UINT64_MAX; 215 216 LLVM_ABI void reset(); 217 orderByHighPCSequence218 static bool orderByHighPC(const Sequence &LHS, const Sequence &RHS) { 219 return std::tie(LHS.SectionIndex, LHS.HighPC) < 220 std::tie(RHS.SectionIndex, RHS.HighPC); 221 } 222 isValidSequence223 bool isValid() const { 224 return !Empty && (LowPC < HighPC) && (FirstRowIndex < LastRowIndex); 225 } 226 containsPCSequence227 bool containsPC(object::SectionedAddress PC) const { 228 return SectionIndex == PC.SectionIndex && 229 (LowPC <= PC.Address && PC.Address < HighPC); 230 } 231 }; 232 233 struct LineTable { 234 LLVM_ABI LineTable(); 235 236 /// Represents an invalid row 237 const uint32_t UnknownRowIndex = UINT32_MAX; 238 appendRowLineTable239 void appendRow(const DWARFDebugLine::Row &R) { Rows.push_back(R); } 240 appendSequenceLineTable241 void appendSequence(const DWARFDebugLine::Sequence &S) { 242 Sequences.push_back(S); 243 } 244 245 /// Returns the index of the row with file/line info for a given address, 246 /// or UnknownRowIndex if there is no such row. 247 LLVM_ABI uint32_t lookupAddress(object::SectionedAddress Address, 248 bool *IsApproximateLine = nullptr) const; 249 250 /// Fills the Result argument with the indices of the rows that correspond 251 /// to the address range specified by \p Address and \p Size. 252 /// 253 /// \param Address - The starting address of the range. 254 /// \param Size - The size of the address range. 255 /// \param Result - The vector to fill with row indices. 256 /// \param StmtSequenceOffset - if provided, only rows from the sequence 257 /// starting at the matching offset will be added to the result. 258 /// 259 /// Returns true if any rows were found. 260 LLVM_ABI bool lookupAddressRange( 261 object::SectionedAddress Address, uint64_t Size, 262 std::vector<uint32_t> &Result, 263 std::optional<uint64_t> StmtSequenceOffset = std::nullopt) const; 264 hasFileAtIndexLineTable265 bool hasFileAtIndex(uint64_t FileIndex) const { 266 return Prologue.hasFileAtIndex(FileIndex); 267 } 268 getLastValidFileIndexLineTable269 std::optional<uint64_t> getLastValidFileIndex() const { 270 return Prologue.getLastValidFileIndex(); 271 } 272 273 /// Extracts filename by its index in filename table in prologue. 274 /// In Dwarf 4, the files are 1-indexed and the current compilation file 275 /// name is not represented in the list. In DWARF v5, the files are 276 /// 0-indexed and the primary source file has the index 0. 277 /// Returns true on success. getFileNameByIndexLineTable278 bool getFileNameByIndex(uint64_t FileIndex, StringRef CompDir, 279 DILineInfoSpecifier::FileLineInfoKind Kind, 280 std::string &Result) const { 281 return Prologue.getFileNameByIndex(FileIndex, CompDir, Kind, Result); 282 } 283 284 /// Fills the Result argument with the file and line information 285 /// corresponding to Address. Returns true on success. 286 LLVM_ABI bool getFileLineInfoForAddress( 287 object::SectionedAddress Address, bool Approximate, const char *CompDir, 288 DILineInfoSpecifier::FileLineInfoKind Kind, DILineInfo &Result) const; 289 290 /// Extracts directory name by its Entry in include directories table 291 /// in prologue. Returns true on success. 292 LLVM_ABI bool getDirectoryForEntry(const FileNameEntry &Entry, 293 std::string &Directory) const; 294 295 LLVM_ABI void dump(raw_ostream &OS, DIDumpOptions DumpOptions) const; 296 LLVM_ABI void clear(); 297 298 /// Parse prologue and all rows. 299 LLVM_ABI Error parse(DWARFDataExtractor &DebugLineData, uint64_t *OffsetPtr, 300 const DWARFContext &Ctx, const DWARFUnit *U, 301 function_ref<void(Error)> RecoverableErrorHandler, 302 raw_ostream *OS = nullptr, bool Verbose = false); 303 304 using RowVector = std::vector<Row>; 305 using RowIter = RowVector::const_iterator; 306 using SequenceVector = std::vector<Sequence>; 307 using SequenceIter = SequenceVector::const_iterator; 308 309 struct Prologue Prologue; 310 RowVector Rows; 311 SequenceVector Sequences; 312 313 private: 314 uint32_t findRowInSeq(const DWARFDebugLine::Sequence &Seq, 315 object::SectionedAddress Address) const; 316 std::optional<StringRef> 317 getSourceByIndex(uint64_t FileIndex, 318 DILineInfoSpecifier::FileLineInfoKind Kind) const; 319 320 uint32_t lookupAddressImpl(object::SectionedAddress Address, 321 bool *IsApproximateLine = nullptr) const; 322 323 /// Fills the Result argument with the indices of the rows that correspond 324 /// to the address range specified by \p Address and \p Size. 325 /// 326 /// \param Address - The starting address of the range. 327 /// \param Size - The size of the address range. 328 /// \param Result - The vector to fill with row indices. 329 /// \param StmtSequenceOffset - if provided, only rows from the sequence 330 /// starting at the matching offset will be added to the result. 331 /// 332 /// Returns true if any rows were found. 333 bool 334 lookupAddressRangeImpl(object::SectionedAddress Address, uint64_t Size, 335 std::vector<uint32_t> &Result, 336 std::optional<uint64_t> StmtSequenceOffset) const; 337 }; 338 339 LLVM_ABI const LineTable *getLineTable(uint64_t Offset) const; 340 LLVM_ABI Expected<const LineTable *> 341 getOrParseLineTable(DWARFDataExtractor &DebugLineData, uint64_t Offset, 342 const DWARFContext &Ctx, const DWARFUnit *U, 343 function_ref<void(Error)> RecoverableErrorHandler); 344 LLVM_ABI void clearLineTable(uint64_t Offset); 345 346 /// Helper to allow for parsing of an entire .debug_line section in sequence. 347 class SectionParser { 348 public: 349 using LineToUnitMap = std::map<uint64_t, DWARFUnit *>; 350 351 LLVM_ABI SectionParser(DWARFDataExtractor &Data, const DWARFContext &C, 352 DWARFUnitVector::iterator_range Units); 353 354 /// Get the next line table from the section. Report any issues via the 355 /// handlers. 356 /// 357 /// \param RecoverableErrorHandler - any issues that don't prevent further 358 /// parsing of the table will be reported through this handler. 359 /// \param UnrecoverableErrorHandler - any issues that prevent further 360 /// parsing of the table will be reported through this handler. 361 /// \param OS - if not null, the parser will print information about the 362 /// table as it parses it. 363 /// \param Verbose - if true, the parser will print verbose information when 364 /// printing to the output. 365 LLVM_ABI LineTable 366 parseNext(function_ref<void(Error)> RecoverableErrorHandler, 367 function_ref<void(Error)> UnrecoverableErrorHandler, 368 raw_ostream *OS = nullptr, bool Verbose = false); 369 370 /// Skip the current line table and go to the following line table (if 371 /// present) immediately. 372 /// 373 /// \param RecoverableErrorHandler - report any recoverable prologue 374 /// parsing issues via this handler. 375 /// \param UnrecoverableErrorHandler - report any unrecoverable prologue 376 /// parsing issues via this handler. 377 LLVM_ABI void skip(function_ref<void(Error)> RecoverableErrorHandler, 378 function_ref<void(Error)> UnrecoverableErrorHandler); 379 380 /// Indicates if the parser has parsed as much as possible. 381 /// 382 /// \note Certain problems with the line table structure might mean that 383 /// parsing stops before the end of the section is reached. done()384 bool done() const { return Done; } 385 386 /// Get the offset the parser has reached. getOffset()387 uint64_t getOffset() const { return Offset; } 388 389 private: 390 DWARFUnit *prepareToParse(uint64_t Offset); 391 void moveToNextTable(uint64_t OldOffset, const Prologue &P); 392 bool hasValidVersion(uint64_t Offset); 393 394 LineToUnitMap LineToUnit; 395 396 DWARFDataExtractor &DebugLineData; 397 const DWARFContext &Context; 398 uint64_t Offset = 0; 399 bool Done = false; 400 }; 401 402 private: 403 struct ParsingState { 404 LLVM_ABI ParsingState(struct LineTable *LT, uint64_t TableOffset, 405 function_ref<void(Error)> ErrorHandler); 406 407 LLVM_ABI void resetRowAndSequence(uint64_t Offset); 408 LLVM_ABI void appendRowToMatrix(); 409 410 struct AddrOpIndexDelta { 411 uint64_t AddrOffset; 412 int16_t OpIndexDelta; 413 }; 414 415 /// Advance the address and op-index by the \p OperationAdvance value. 416 /// \returns the amount advanced by. 417 LLVM_ABI AddrOpIndexDelta advanceAddrOpIndex(uint64_t OperationAdvance, 418 uint8_t Opcode, 419 uint64_t OpcodeOffset); 420 421 struct OpcodeAdvanceResults { 422 uint64_t AddrDelta; 423 int16_t OpIndexDelta; 424 uint8_t AdjustedOpcode; 425 }; 426 427 /// Advance the address and op-index as required by the specified \p Opcode. 428 /// \returns the amount advanced by and the calculated adjusted opcode. 429 LLVM_ABI OpcodeAdvanceResults advanceForOpcode(uint8_t Opcode, 430 uint64_t OpcodeOffset); 431 432 struct SpecialOpcodeDelta { 433 uint64_t Address; 434 int32_t Line; 435 int16_t OpIndex; 436 }; 437 438 /// Advance the line, address and op-index as required by the specified 439 /// special \p Opcode. \returns the address, op-index and line delta. 440 LLVM_ABI SpecialOpcodeDelta handleSpecialOpcode(uint8_t Opcode, 441 uint64_t OpcodeOffset); 442 443 /// Line table we're currently parsing. 444 struct LineTable *LineTable; 445 struct Row Row; 446 struct Sequence Sequence; 447 448 private: 449 uint64_t LineTableOffset; 450 451 bool ReportAdvanceAddrProblem = true; 452 bool ReportBadLineRange = true; 453 function_ref<void(Error)> ErrorHandler; 454 }; 455 456 using LineTableMapTy = std::map<uint64_t, LineTable>; 457 using LineTableIter = LineTableMapTy::iterator; 458 using LineTableConstIter = LineTableMapTy::const_iterator; 459 460 LineTableMapTy LineTableMap; 461 }; 462 463 } // end namespace llvm 464 465 #endif // LLVM_DEBUGINFO_DWARF_DWARFDEBUGLINE_H 466