1 //===- DWARFDebugLine.h -----------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_DEBUGINFO_DWARF_DWARFDEBUGLINE_H 10 #define LLVM_DEBUGINFO_DWARF_DWARFDEBUGLINE_H 11 12 #include "llvm/ADT/StringRef.h" 13 #include "llvm/BinaryFormat/Dwarf.h" 14 #include "llvm/DebugInfo/DIContext.h" 15 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" 16 #include "llvm/DebugInfo/DWARF/DWARFUnit.h" 17 #include "llvm/Support/MD5.h" 18 #include "llvm/Support/Path.h" 19 #include <cstdint> 20 #include <map> 21 #include <string> 22 #include <vector> 23 24 namespace llvm { 25 26 class raw_ostream; 27 28 class DWARFDebugLine { 29 public: 30 struct FileNameEntry { 31 FileNameEntry() = default; 32 33 DWARFFormValue Name; 34 uint64_t DirIdx = 0; 35 uint64_t ModTime = 0; 36 uint64_t Length = 0; 37 MD5::MD5Result Checksum; 38 DWARFFormValue Source; 39 }; 40 41 /// Tracks which optional content types are present in a DWARF file name 42 /// entry format. 43 struct ContentTypeTracker { 44 ContentTypeTracker() = default; 45 46 /// Whether filename entries provide a modification timestamp. 47 bool HasModTime = false; 48 /// Whether filename entries provide a file size. 49 bool HasLength = false; 50 /// For v5, whether filename entries provide an MD5 checksum. 51 bool HasMD5 = false; 52 /// For v5, whether filename entries provide source text. 53 bool HasSource = false; 54 55 /// Update tracked content types with \p ContentType. 56 void trackContentType(dwarf::LineNumberEntryFormat ContentType); 57 }; 58 59 struct Prologue { 60 Prologue(); 61 62 /// The size in bytes of the statement information for this compilation unit 63 /// (not including the total_length field itself). 64 uint64_t TotalLength; 65 /// Version, address size (starting in v5), and DWARF32/64 format; these 66 /// parameters affect interpretation of forms (used in the directory and 67 /// file tables starting with v5). 68 dwarf::FormParams FormParams; 69 /// The number of bytes following the prologue_length field to the beginning 70 /// of the first byte of the statement program itself. 71 uint64_t PrologueLength; 72 /// In v5, size in bytes of a segment selector. 73 uint8_t SegSelectorSize; 74 /// The size in bytes of the smallest target machine instruction. Statement 75 /// program opcodes that alter the address register first multiply their 76 /// operands by this value. 77 uint8_t MinInstLength; 78 /// The maximum number of individual operations that may be encoded in an 79 /// instruction. 80 uint8_t MaxOpsPerInst; 81 /// The initial value of theis_stmtregister. 82 uint8_t DefaultIsStmt; 83 /// This parameter affects the meaning of the special opcodes. See below. 84 int8_t LineBase; 85 /// This parameter affects the meaning of the special opcodes. See below. 86 uint8_t LineRange; 87 /// The number assigned to the first special opcode. 88 uint8_t OpcodeBase; 89 /// This tracks which optional file format content types are present. 90 ContentTypeTracker ContentTypes; 91 std::vector<uint8_t> StandardOpcodeLengths; 92 std::vector<DWARFFormValue> IncludeDirectories; 93 std::vector<FileNameEntry> FileNames; 94 95 const dwarf::FormParams getFormParams() const { return FormParams; } 96 uint16_t getVersion() const { return FormParams.Version; } 97 uint8_t getAddressSize() const { return FormParams.AddrSize; } 98 bool isDWARF64() const { return FormParams.Format == dwarf::DWARF64; } 99 100 uint32_t sizeofTotalLength() const { return isDWARF64() ? 12 : 4; } 101 102 uint32_t sizeofPrologueLength() const { return isDWARF64() ? 8 : 4; } 103 104 bool totalLengthIsValid() const; 105 106 /// Length of the prologue in bytes. 107 uint64_t getLength() const; 108 109 /// Get DWARF-version aware access to the file name entry at the provided 110 /// index. 111 const llvm::DWARFDebugLine::FileNameEntry & 112 getFileNameEntry(uint64_t Index) const; 113 114 bool hasFileAtIndex(uint64_t FileIndex) const; 115 116 std::optional<uint64_t> getLastValidFileIndex() const; 117 118 bool 119 getFileNameByIndex(uint64_t FileIndex, StringRef CompDir, 120 DILineInfoSpecifier::FileLineInfoKind Kind, 121 std::string &Result, 122 sys::path::Style Style = sys::path::Style::native) const; 123 124 void clear(); 125 void dump(raw_ostream &OS, DIDumpOptions DumpOptions) const; 126 Error parse(DWARFDataExtractor Data, uint64_t *OffsetPtr, 127 function_ref<void(Error)> RecoverableErrorHandler, 128 const DWARFContext &Ctx, const DWARFUnit *U = nullptr); 129 }; 130 131 /// Standard .debug_line state machine structure. 132 struct Row { 133 explicit Row(bool DefaultIsStmt = false); 134 135 /// Called after a row is appended to the matrix. 136 void postAppend(); 137 void reset(bool DefaultIsStmt); 138 void dump(raw_ostream &OS) const; 139 140 static void dumpTableHeader(raw_ostream &OS, unsigned Indent); 141 142 static bool orderByAddress(const Row &LHS, const Row &RHS) { 143 return std::tie(LHS.Address.SectionIndex, LHS.Address.Address) < 144 std::tie(RHS.Address.SectionIndex, RHS.Address.Address); 145 } 146 147 /// The program-counter value corresponding to a machine instruction 148 /// generated by the compiler and section index pointing to the section 149 /// containg this PC. If relocation information is present then section 150 /// index is the index of the section which contains above address. 151 /// Otherwise this is object::SectionedAddress::Undef value. 152 object::SectionedAddress Address; 153 /// An unsigned integer indicating a source line number. Lines are numbered 154 /// beginning at 1. The compiler may emit the value 0 in cases where an 155 /// instruction cannot be attributed to any source line. 156 uint32_t Line; 157 /// An unsigned integer indicating a column number within a source line. 158 /// Columns are numbered beginning at 1. The value 0 is reserved to indicate 159 /// that a statement begins at the 'left edge' of the line. 160 uint16_t Column; 161 /// An unsigned integer indicating the identity of the source file 162 /// corresponding to a machine instruction. 163 uint16_t File; 164 /// An unsigned integer representing the DWARF path discriminator value 165 /// for this location. 166 uint32_t Discriminator; 167 /// An unsigned integer whose value encodes the applicable instruction set 168 /// architecture for the current instruction. 169 uint8_t Isa; 170 /// A boolean indicating that the current instruction is the beginning of a 171 /// statement. 172 uint8_t IsStmt : 1, 173 /// A boolean indicating that the current instruction is the 174 /// beginning of a basic block. 175 BasicBlock : 1, 176 /// A boolean indicating that the current address is that of the 177 /// first byte after the end of a sequence of target machine 178 /// instructions. 179 EndSequence : 1, 180 /// A boolean indicating that the current address is one (of possibly 181 /// many) where execution should be suspended for an entry breakpoint 182 /// of a function. 183 PrologueEnd : 1, 184 /// A boolean indicating that the current address is one (of possibly 185 /// many) where execution should be suspended for an exit breakpoint 186 /// of a function. 187 EpilogueBegin : 1; 188 }; 189 190 /// Represents a series of contiguous machine instructions. Line table for 191 /// each compilation unit may consist of multiple sequences, which are not 192 /// guaranteed to be in the order of ascending instruction address. 193 struct Sequence { 194 Sequence(); 195 196 /// Sequence describes instructions at address range [LowPC, HighPC) 197 /// and is described by line table rows [FirstRowIndex, LastRowIndex). 198 uint64_t LowPC; 199 uint64_t HighPC; 200 /// If relocation information is present then this is the index of the 201 /// section which contains above addresses. Otherwise this is 202 /// object::SectionedAddress::Undef value. 203 uint64_t SectionIndex; 204 unsigned FirstRowIndex; 205 unsigned LastRowIndex; 206 bool Empty; 207 208 void reset(); 209 210 static bool orderByHighPC(const Sequence &LHS, const Sequence &RHS) { 211 return std::tie(LHS.SectionIndex, LHS.HighPC) < 212 std::tie(RHS.SectionIndex, RHS.HighPC); 213 } 214 215 bool isValid() const { 216 return !Empty && (LowPC < HighPC) && (FirstRowIndex < LastRowIndex); 217 } 218 219 bool containsPC(object::SectionedAddress PC) const { 220 return SectionIndex == PC.SectionIndex && 221 (LowPC <= PC.Address && PC.Address < HighPC); 222 } 223 }; 224 225 struct LineTable { 226 LineTable(); 227 228 /// Represents an invalid row 229 const uint32_t UnknownRowIndex = UINT32_MAX; 230 231 void appendRow(const DWARFDebugLine::Row &R) { Rows.push_back(R); } 232 233 void appendSequence(const DWARFDebugLine::Sequence &S) { 234 Sequences.push_back(S); 235 } 236 237 /// Returns the index of the row with file/line info for a given address, 238 /// or UnknownRowIndex if there is no such row. 239 uint32_t lookupAddress(object::SectionedAddress Address) const; 240 241 bool lookupAddressRange(object::SectionedAddress Address, uint64_t Size, 242 std::vector<uint32_t> &Result) const; 243 244 bool hasFileAtIndex(uint64_t FileIndex) const { 245 return Prologue.hasFileAtIndex(FileIndex); 246 } 247 248 std::optional<uint64_t> getLastValidFileIndex() const { 249 return Prologue.getLastValidFileIndex(); 250 } 251 252 /// Extracts filename by its index in filename table in prologue. 253 /// In Dwarf 4, the files are 1-indexed and the current compilation file 254 /// name is not represented in the list. In DWARF v5, the files are 255 /// 0-indexed and the primary source file has the index 0. 256 /// Returns true on success. 257 bool getFileNameByIndex(uint64_t FileIndex, StringRef CompDir, 258 DILineInfoSpecifier::FileLineInfoKind Kind, 259 std::string &Result) const { 260 return Prologue.getFileNameByIndex(FileIndex, CompDir, Kind, Result); 261 } 262 263 /// Fills the Result argument with the file and line information 264 /// corresponding to Address. Returns true on success. 265 bool getFileLineInfoForAddress(object::SectionedAddress Address, 266 const char *CompDir, 267 DILineInfoSpecifier::FileLineInfoKind Kind, 268 DILineInfo &Result) const; 269 270 /// Extracts directory name by its Entry in include directories table 271 /// in prologue. Returns true on success. 272 bool getDirectoryForEntry(const FileNameEntry &Entry, 273 std::string &Directory) const; 274 275 void dump(raw_ostream &OS, DIDumpOptions DumpOptions) const; 276 void clear(); 277 278 /// Parse prologue and all rows. 279 Error parse(DWARFDataExtractor &DebugLineData, uint64_t *OffsetPtr, 280 const DWARFContext &Ctx, const DWARFUnit *U, 281 function_ref<void(Error)> RecoverableErrorHandler, 282 raw_ostream *OS = nullptr, bool Verbose = false); 283 284 using RowVector = std::vector<Row>; 285 using RowIter = RowVector::const_iterator; 286 using SequenceVector = std::vector<Sequence>; 287 using SequenceIter = SequenceVector::const_iterator; 288 289 struct Prologue Prologue; 290 RowVector Rows; 291 SequenceVector Sequences; 292 293 private: 294 uint32_t findRowInSeq(const DWARFDebugLine::Sequence &Seq, 295 object::SectionedAddress Address) const; 296 std::optional<StringRef> 297 getSourceByIndex(uint64_t FileIndex, 298 DILineInfoSpecifier::FileLineInfoKind Kind) const; 299 300 uint32_t lookupAddressImpl(object::SectionedAddress Address) const; 301 302 bool lookupAddressRangeImpl(object::SectionedAddress Address, uint64_t Size, 303 std::vector<uint32_t> &Result) const; 304 }; 305 306 const LineTable *getLineTable(uint64_t Offset) const; 307 Expected<const LineTable *> 308 getOrParseLineTable(DWARFDataExtractor &DebugLineData, uint64_t Offset, 309 const DWARFContext &Ctx, const DWARFUnit *U, 310 function_ref<void(Error)> RecoverableErrorHandler); 311 void clearLineTable(uint64_t Offset); 312 313 /// Helper to allow for parsing of an entire .debug_line section in sequence. 314 class SectionParser { 315 public: 316 using LineToUnitMap = std::map<uint64_t, DWARFUnit *>; 317 318 SectionParser(DWARFDataExtractor &Data, const DWARFContext &C, 319 DWARFUnitVector::iterator_range Units); 320 321 /// Get the next line table from the section. Report any issues via the 322 /// handlers. 323 /// 324 /// \param RecoverableErrorHandler - any issues that don't prevent further 325 /// parsing of the table will be reported through this handler. 326 /// \param UnrecoverableErrorHandler - any issues that prevent further 327 /// parsing of the table will be reported through this handler. 328 /// \param OS - if not null, the parser will print information about the 329 /// table as it parses it. 330 /// \param Verbose - if true, the parser will print verbose information when 331 /// printing to the output. 332 LineTable parseNext(function_ref<void(Error)> RecoverableErrorHandler, 333 function_ref<void(Error)> UnrecoverableErrorHandler, 334 raw_ostream *OS = nullptr, bool Verbose = false); 335 336 /// Skip the current line table and go to the following line table (if 337 /// present) immediately. 338 /// 339 /// \param RecoverableErrorHandler - report any recoverable prologue 340 /// parsing issues via this handler. 341 /// \param UnrecoverableErrorHandler - report any unrecoverable prologue 342 /// parsing issues via this handler. 343 void skip(function_ref<void(Error)> RecoverableErrorHandler, 344 function_ref<void(Error)> UnrecoverableErrorHandler); 345 346 /// Indicates if the parser has parsed as much as possible. 347 /// 348 /// \note Certain problems with the line table structure might mean that 349 /// parsing stops before the end of the section is reached. 350 bool done() const { return Done; } 351 352 /// Get the offset the parser has reached. 353 uint64_t getOffset() const { return Offset; } 354 355 private: 356 DWARFUnit *prepareToParse(uint64_t Offset); 357 void moveToNextTable(uint64_t OldOffset, const Prologue &P); 358 359 LineToUnitMap LineToUnit; 360 361 DWARFDataExtractor &DebugLineData; 362 const DWARFContext &Context; 363 uint64_t Offset = 0; 364 bool Done = false; 365 }; 366 367 private: 368 struct ParsingState { 369 ParsingState(struct LineTable *LT, uint64_t TableOffset, 370 function_ref<void(Error)> ErrorHandler); 371 372 void resetRowAndSequence(); 373 void appendRowToMatrix(); 374 375 /// Advance the address by the \p OperationAdvance value. \returns the 376 /// amount advanced by. 377 uint64_t advanceAddr(uint64_t OperationAdvance, uint8_t Opcode, 378 uint64_t OpcodeOffset); 379 380 struct AddrAndAdjustedOpcode { 381 uint64_t AddrDelta; 382 uint8_t AdjustedOpcode; 383 }; 384 385 /// Advance the address as required by the specified \p Opcode. 386 /// \returns the amount advanced by and the calculated adjusted opcode. 387 AddrAndAdjustedOpcode advanceAddrForOpcode(uint8_t Opcode, 388 uint64_t OpcodeOffset); 389 390 struct AddrAndLineDelta { 391 uint64_t Address; 392 int32_t Line; 393 }; 394 395 /// Advance the line and address as required by the specified special \p 396 /// Opcode. \returns the address and line delta. 397 AddrAndLineDelta handleSpecialOpcode(uint8_t Opcode, uint64_t OpcodeOffset); 398 399 /// Line table we're currently parsing. 400 struct LineTable *LineTable; 401 struct Row Row; 402 struct Sequence Sequence; 403 404 private: 405 uint64_t LineTableOffset; 406 407 bool ReportAdvanceAddrProblem = true; 408 bool ReportBadLineRange = true; 409 function_ref<void(Error)> ErrorHandler; 410 }; 411 412 using LineTableMapTy = std::map<uint64_t, LineTable>; 413 using LineTableIter = LineTableMapTy::iterator; 414 using LineTableConstIter = LineTableMapTy::const_iterator; 415 416 LineTableMapTy LineTableMap; 417 }; 418 419 } // end namespace llvm 420 421 #endif // LLVM_DEBUGINFO_DWARF_DWARFDEBUGLINE_H 422