1 //===- DWARFUnit.h ----------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_DEBUGINFO_DWARF_DWARFUNIT_H 10 #define LLVM_DEBUGINFO_DWARF_DWARFUNIT_H 11 12 #include "llvm/ADT/Optional.h" 13 #include "llvm/ADT/STLExtras.h" 14 #include "llvm/ADT/SmallVector.h" 15 #include "llvm/ADT/StringRef.h" 16 #include "llvm/ADT/iterator_range.h" 17 #include "llvm/BinaryFormat/Dwarf.h" 18 #include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h" 19 #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h" 20 #include "llvm/DebugInfo/DWARF/DWARFDebugRnglists.h" 21 #include "llvm/DebugInfo/DWARF/DWARFDie.h" 22 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" 23 #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" 24 #include "llvm/DebugInfo/DWARF/DWARFSection.h" 25 #include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h" 26 #include "llvm/Support/DataExtractor.h" 27 #include <algorithm> 28 #include <cassert> 29 #include <cstddef> 30 #include <cstdint> 31 #include <map> 32 #include <memory> 33 #include <utility> 34 #include <vector> 35 36 namespace llvm { 37 38 class DWARFAbbreviationDeclarationSet; 39 class DWARFContext; 40 class DWARFDebugAbbrev; 41 class DWARFUnit; 42 43 /// Base class describing the header of any kind of "unit." Some information 44 /// is specific to certain unit types. We separate this class out so we can 45 /// parse the header before deciding what specific kind of unit to construct. 46 class DWARFUnitHeader { 47 // Offset within section. 48 uint32_t Offset = 0; 49 // Version, address size, and DWARF format. 50 dwarf::FormParams FormParams; 51 uint64_t Length = 0; 52 uint64_t AbbrOffset = 0; 53 54 // For DWO units only. 55 const DWARFUnitIndex::Entry *IndexEntry = nullptr; 56 57 // For type units only. 58 uint64_t TypeHash = 0; 59 uint32_t TypeOffset = 0; 60 61 // For v5 split or skeleton compile units only. 62 Optional<uint64_t> DWOId; 63 64 // Unit type as parsed, or derived from the section kind. 65 uint8_t UnitType = 0; 66 67 // Size as parsed. uint8_t for compactness. 68 uint8_t Size = 0; 69 70 public: 71 /// Parse a unit header from \p debug_info starting at \p offset_ptr. 72 bool extract(DWARFContext &Context, const DWARFDataExtractor &debug_info, 73 uint32_t *offset_ptr, DWARFSectionKind Kind = DW_SECT_INFO, 74 const DWARFUnitIndex *Index = nullptr, 75 const DWARFUnitIndex::Entry *Entry = nullptr); 76 uint32_t getOffset() const { return Offset; } 77 const dwarf::FormParams &getFormParams() const { return FormParams; } 78 uint16_t getVersion() const { return FormParams.Version; } 79 dwarf::DwarfFormat getFormat() const { return FormParams.Format; } 80 uint8_t getAddressByteSize() const { return FormParams.AddrSize; } 81 uint8_t getRefAddrByteSize() const { return FormParams.getRefAddrByteSize(); } 82 uint8_t getDwarfOffsetByteSize() const { 83 return FormParams.getDwarfOffsetByteSize(); 84 } 85 uint64_t getLength() const { return Length; } 86 uint64_t getAbbrOffset() const { return AbbrOffset; } 87 Optional<uint64_t> getDWOId() const { return DWOId; } 88 void setDWOId(uint64_t Id) { 89 assert((!DWOId || *DWOId == Id) && "setting DWOId to a different value"); 90 DWOId = Id; 91 } 92 const DWARFUnitIndex::Entry *getIndexEntry() const { return IndexEntry; } 93 uint64_t getTypeHash() const { return TypeHash; } 94 uint32_t getTypeOffset() const { return TypeOffset; } 95 uint8_t getUnitType() const { return UnitType; } 96 bool isTypeUnit() const { 97 return UnitType == dwarf::DW_UT_type || UnitType == dwarf::DW_UT_split_type; 98 } 99 uint8_t getSize() const { return Size; } 100 uint32_t getNextUnitOffset() const { 101 return Offset + Length + 102 (FormParams.Format == llvm::dwarf::DwarfFormat::DWARF64 ? 4 : 0) + 103 FormParams.getDwarfOffsetByteSize(); 104 } 105 }; 106 107 const DWARFUnitIndex &getDWARFUnitIndex(DWARFContext &Context, 108 DWARFSectionKind Kind); 109 110 /// Describe a collection of units. Intended to hold all units either from 111 /// .debug_info and .debug_types, or from .debug_info.dwo and .debug_types.dwo. 112 class DWARFUnitVector final : public SmallVector<std::unique_ptr<DWARFUnit>, 1> { 113 std::function<std::unique_ptr<DWARFUnit>(uint32_t, DWARFSectionKind, 114 const DWARFSection *, 115 const DWARFUnitIndex::Entry *)> 116 Parser; 117 int NumInfoUnits = -1; 118 119 public: 120 using UnitVector = SmallVectorImpl<std::unique_ptr<DWARFUnit>>; 121 using iterator = typename UnitVector::iterator; 122 using iterator_range = llvm::iterator_range<typename UnitVector::iterator>; 123 124 DWARFUnit *getUnitForOffset(uint32_t Offset) const; 125 DWARFUnit *getUnitForIndexEntry(const DWARFUnitIndex::Entry &E); 126 127 /// Read units from a .debug_info or .debug_types section. Calls made 128 /// before finishedInfoUnits() are assumed to be for .debug_info sections, 129 /// calls after finishedInfoUnits() are for .debug_types sections. Caller 130 /// must not mix calls to addUnitsForSection and addUnitsForDWOSection. 131 void addUnitsForSection(DWARFContext &C, const DWARFSection &Section, 132 DWARFSectionKind SectionKind); 133 /// Read units from a .debug_info.dwo or .debug_types.dwo section. Calls 134 /// made before finishedInfoUnits() are assumed to be for .debug_info.dwo 135 /// sections, calls after finishedInfoUnits() are for .debug_types.dwo 136 /// sections. Caller must not mix calls to addUnitsForSection and 137 /// addUnitsForDWOSection. 138 void addUnitsForDWOSection(DWARFContext &C, const DWARFSection &DWOSection, 139 DWARFSectionKind SectionKind, bool Lazy = false); 140 141 /// Add an existing DWARFUnit to this UnitVector. This is used by the DWARF 142 /// verifier to process unit separately. 143 DWARFUnit *addUnit(std::unique_ptr<DWARFUnit> Unit); 144 145 /// Returns number of all units held by this instance. 146 unsigned getNumUnits() const { return size(); } 147 /// Returns number of units from all .debug_info[.dwo] sections. 148 unsigned getNumInfoUnits() const { 149 return NumInfoUnits == -1 ? size() : NumInfoUnits; 150 } 151 /// Returns number of units from all .debug_types[.dwo] sections. 152 unsigned getNumTypesUnits() const { return size() - NumInfoUnits; } 153 /// Indicate that parsing .debug_info[.dwo] is done, and remaining units 154 /// will be from .debug_types[.dwo]. 155 void finishedInfoUnits() { NumInfoUnits = size(); } 156 157 private: 158 void addUnitsImpl(DWARFContext &Context, const DWARFObject &Obj, 159 const DWARFSection &Section, const DWARFDebugAbbrev *DA, 160 const DWARFSection *RS, const DWARFSection *LocSection, 161 StringRef SS, const DWARFSection &SOS, 162 const DWARFSection *AOS, const DWARFSection &LS, bool LE, 163 bool IsDWO, bool Lazy, DWARFSectionKind SectionKind); 164 }; 165 166 /// Represents base address of the CU. 167 /// Represents a unit's contribution to the string offsets table. 168 struct StrOffsetsContributionDescriptor { 169 uint64_t Base = 0; 170 /// The contribution size not including the header. 171 uint64_t Size = 0; 172 /// Format and version. 173 dwarf::FormParams FormParams = {0, 0, dwarf::DwarfFormat::DWARF32}; 174 175 StrOffsetsContributionDescriptor(uint64_t Base, uint64_t Size, 176 uint8_t Version, dwarf::DwarfFormat Format) 177 : Base(Base), Size(Size), FormParams({Version, 0, Format}) {} 178 StrOffsetsContributionDescriptor() = default; 179 180 uint8_t getVersion() const { return FormParams.Version; } 181 dwarf::DwarfFormat getFormat() const { return FormParams.Format; } 182 uint8_t getDwarfOffsetByteSize() const { 183 return FormParams.getDwarfOffsetByteSize(); 184 } 185 /// Determine whether a contribution to the string offsets table is 186 /// consistent with the relevant section size and that its length is 187 /// a multiple of the size of one of its entries. 188 Expected<StrOffsetsContributionDescriptor> 189 validateContributionSize(DWARFDataExtractor &DA); 190 }; 191 192 class DWARFUnit { 193 DWARFContext &Context; 194 /// Section containing this DWARFUnit. 195 const DWARFSection &InfoSection; 196 197 DWARFUnitHeader Header; 198 const DWARFDebugAbbrev *Abbrev; 199 const DWARFSection *RangeSection; 200 uint32_t RangeSectionBase; 201 /// We either keep track of the location list section or its data, depending 202 /// on whether we are handling a split DWARF section or not. 203 union { 204 const DWARFSection *LocSection; 205 StringRef LocSectionData; 206 }; 207 const DWARFSection &LineSection; 208 StringRef StringSection; 209 const DWARFSection &StringOffsetSection; 210 const DWARFSection *AddrOffsetSection; 211 uint32_t AddrOffsetSectionBase = 0; 212 bool isLittleEndian; 213 bool IsDWO; 214 const DWARFUnitVector &UnitVector; 215 216 /// Start, length, and DWARF format of the unit's contribution to the string 217 /// offsets table (DWARF v5). 218 Optional<StrOffsetsContributionDescriptor> StringOffsetsTableContribution; 219 220 /// A table of range lists (DWARF v5 and later). 221 Optional<DWARFDebugRnglistTable> RngListTable; 222 223 mutable const DWARFAbbreviationDeclarationSet *Abbrevs; 224 llvm::Optional<object::SectionedAddress> BaseAddr; 225 /// The compile unit debug information entry items. 226 std::vector<DWARFDebugInfoEntry> DieArray; 227 228 /// Map from range's start address to end address and corresponding DIE. 229 /// IntervalMap does not support range removal, as a result, we use the 230 /// std::map::upper_bound for address range lookup. 231 std::map<uint64_t, std::pair<uint64_t, DWARFDie>> AddrDieMap; 232 233 using die_iterator_range = 234 iterator_range<std::vector<DWARFDebugInfoEntry>::iterator>; 235 236 std::shared_ptr<DWARFUnit> DWO; 237 238 uint32_t getDIEIndex(const DWARFDebugInfoEntry *Die) { 239 auto First = DieArray.data(); 240 assert(Die >= First && Die < First + DieArray.size()); 241 return Die - First; 242 } 243 244 protected: 245 const DWARFUnitHeader &getHeader() const { return Header; } 246 247 /// Size in bytes of the parsed unit header. 248 uint32_t getHeaderSize() const { return Header.getSize(); } 249 250 /// Find the unit's contribution to the string offsets table and determine its 251 /// length and form. The given offset is expected to be derived from the unit 252 /// DIE's DW_AT_str_offsets_base attribute. 253 Expected<Optional<StrOffsetsContributionDescriptor>> 254 determineStringOffsetsTableContribution(DWARFDataExtractor &DA); 255 256 /// Find the unit's contribution to the string offsets table and determine its 257 /// length and form. The given offset is expected to be 0 in a dwo file or, 258 /// in a dwp file, the start of the unit's contribution to the string offsets 259 /// table section (as determined by the index table). 260 Expected<Optional<StrOffsetsContributionDescriptor>> 261 determineStringOffsetsTableContributionDWO(DWARFDataExtractor &DA); 262 263 public: 264 DWARFUnit(DWARFContext &Context, const DWARFSection &Section, 265 const DWARFUnitHeader &Header, const DWARFDebugAbbrev *DA, 266 const DWARFSection *RS, const DWARFSection *LocSection, 267 StringRef SS, const DWARFSection &SOS, const DWARFSection *AOS, 268 const DWARFSection &LS, bool LE, bool IsDWO, 269 const DWARFUnitVector &UnitVector); 270 271 virtual ~DWARFUnit(); 272 273 bool isDWOUnit() const { return IsDWO; } 274 DWARFContext& getContext() const { return Context; } 275 const DWARFSection &getInfoSection() const { return InfoSection; } 276 const DWARFSection *getLocSection() const { return LocSection; } 277 StringRef getLocSectionData() const { return LocSectionData; } 278 uint32_t getOffset() const { return Header.getOffset(); } 279 const dwarf::FormParams &getFormParams() const { 280 return Header.getFormParams(); 281 } 282 uint16_t getVersion() const { return Header.getVersion(); } 283 uint8_t getAddressByteSize() const { return Header.getAddressByteSize(); } 284 uint8_t getRefAddrByteSize() const { return Header.getRefAddrByteSize(); } 285 uint8_t getDwarfOffsetByteSize() const { 286 return Header.getDwarfOffsetByteSize(); 287 } 288 uint32_t getLength() const { return Header.getLength(); } 289 uint8_t getUnitType() const { return Header.getUnitType(); } 290 bool isTypeUnit() const { return Header.isTypeUnit(); } 291 uint32_t getNextUnitOffset() const { return Header.getNextUnitOffset(); } 292 const DWARFSection &getLineSection() const { return LineSection; } 293 StringRef getStringSection() const { return StringSection; } 294 const DWARFSection &getStringOffsetSection() const { 295 return StringOffsetSection; 296 } 297 298 void setAddrOffsetSection(const DWARFSection *AOS, uint32_t Base) { 299 AddrOffsetSection = AOS; 300 AddrOffsetSectionBase = Base; 301 } 302 303 /// Recursively update address to Die map. 304 void updateAddressDieMap(DWARFDie Die); 305 306 void setRangesSection(const DWARFSection *RS, uint32_t Base) { 307 RangeSection = RS; 308 RangeSectionBase = Base; 309 } 310 311 Optional<object::SectionedAddress> 312 getAddrOffsetSectionItem(uint32_t Index) const; 313 Optional<uint64_t> getStringOffsetSectionItem(uint32_t Index) const; 314 315 DWARFDataExtractor getDebugInfoExtractor() const; 316 317 DataExtractor getStringExtractor() const { 318 return DataExtractor(StringSection, false, 0); 319 } 320 321 /// Extract the range list referenced by this compile unit from the 322 /// .debug_ranges section. If the extraction is unsuccessful, an error 323 /// is returned. Successful extraction requires that the compile unit 324 /// has already been extracted. 325 Error extractRangeList(uint32_t RangeListOffset, 326 DWARFDebugRangeList &RangeList) const; 327 void clear(); 328 329 const Optional<StrOffsetsContributionDescriptor> & 330 getStringOffsetsTableContribution() const { 331 return StringOffsetsTableContribution; 332 } 333 334 uint8_t getDwarfStringOffsetsByteSize() const { 335 assert(StringOffsetsTableContribution); 336 return StringOffsetsTableContribution->getDwarfOffsetByteSize(); 337 } 338 339 uint64_t getStringOffsetsBase() const { 340 assert(StringOffsetsTableContribution); 341 return StringOffsetsTableContribution->Base; 342 } 343 344 const DWARFAbbreviationDeclarationSet *getAbbreviations() const; 345 346 static bool isMatchingUnitTypeAndTag(uint8_t UnitType, dwarf::Tag Tag) { 347 switch (UnitType) { 348 case dwarf::DW_UT_compile: 349 return Tag == dwarf::DW_TAG_compile_unit; 350 case dwarf::DW_UT_type: 351 return Tag == dwarf::DW_TAG_type_unit; 352 case dwarf::DW_UT_partial: 353 return Tag == dwarf::DW_TAG_partial_unit; 354 case dwarf::DW_UT_skeleton: 355 return Tag == dwarf::DW_TAG_skeleton_unit; 356 case dwarf::DW_UT_split_compile: 357 case dwarf::DW_UT_split_type: 358 return dwarf::isUnitType(Tag); 359 } 360 return false; 361 } 362 363 /// Return the number of bytes for the header of a unit of 364 /// UnitType type. 365 /// 366 /// This function must be called with a valid unit type which in 367 /// DWARF5 is defined as one of the following six types. 368 static uint32_t getDWARF5HeaderSize(uint8_t UnitType) { 369 switch (UnitType) { 370 case dwarf::DW_UT_compile: 371 case dwarf::DW_UT_partial: 372 return 12; 373 case dwarf::DW_UT_skeleton: 374 case dwarf::DW_UT_split_compile: 375 return 20; 376 case dwarf::DW_UT_type: 377 case dwarf::DW_UT_split_type: 378 return 24; 379 } 380 llvm_unreachable("Invalid UnitType."); 381 } 382 383 llvm::Optional<object::SectionedAddress> getBaseAddress(); 384 385 DWARFDie getUnitDIE(bool ExtractUnitDIEOnly = true) { 386 extractDIEsIfNeeded(ExtractUnitDIEOnly); 387 if (DieArray.empty()) 388 return DWARFDie(); 389 return DWARFDie(this, &DieArray[0]); 390 } 391 392 DWARFDie getNonSkeletonUnitDIE(bool ExtractUnitDIEOnly = true) { 393 parseDWO(); 394 if (DWO) 395 return DWO->getUnitDIE(ExtractUnitDIEOnly); 396 return getUnitDIE(ExtractUnitDIEOnly); 397 } 398 399 const char *getCompilationDir(); 400 Optional<uint64_t> getDWOId() { 401 extractDIEsIfNeeded(/*CUDieOnly*/ true); 402 return getHeader().getDWOId(); 403 } 404 void setDWOId(uint64_t NewID) { Header.setDWOId(NewID); } 405 406 /// Return a vector of address ranges resulting from a (possibly encoded) 407 /// range list starting at a given offset in the appropriate ranges section. 408 Expected<DWARFAddressRangesVector> findRnglistFromOffset(uint32_t Offset); 409 410 /// Return a vector of address ranges retrieved from an encoded range 411 /// list whose offset is found via a table lookup given an index (DWARF v5 412 /// and later). 413 Expected<DWARFAddressRangesVector> findRnglistFromIndex(uint32_t Index); 414 415 /// Return a rangelist's offset based on an index. The index designates 416 /// an entry in the rangelist table's offset array and is supplied by 417 /// DW_FORM_rnglistx. 418 Optional<uint32_t> getRnglistOffset(uint32_t Index) { 419 if (RngListTable) 420 return RngListTable->getOffsetEntry(Index); 421 return None; 422 } 423 424 Expected<DWARFAddressRangesVector> collectAddressRanges(); 425 426 /// Returns subprogram DIE with address range encompassing the provided 427 /// address. The pointer is alive as long as parsed compile unit DIEs are not 428 /// cleared. 429 DWARFDie getSubroutineForAddress(uint64_t Address); 430 431 /// getInlinedChainForAddress - fetches inlined chain for a given address. 432 /// Returns empty chain if there is no subprogram containing address. The 433 /// chain is valid as long as parsed compile unit DIEs are not cleared. 434 void getInlinedChainForAddress(uint64_t Address, 435 SmallVectorImpl<DWARFDie> &InlinedChain); 436 437 /// Return the DWARFUnitVector containing this unit. 438 const DWARFUnitVector &getUnitVector() const { return UnitVector; } 439 440 /// Returns the number of DIEs in the unit. Parses the unit 441 /// if necessary. 442 unsigned getNumDIEs() { 443 extractDIEsIfNeeded(false); 444 return DieArray.size(); 445 } 446 447 /// Return the index of a DIE inside the unit's DIE vector. 448 /// 449 /// It is illegal to call this method with a DIE that hasn't be 450 /// created by this unit. In other word, it's illegal to call this 451 /// method on a DIE that isn't accessible by following 452 /// children/sibling links starting from this unit's getUnitDIE(). 453 uint32_t getDIEIndex(const DWARFDie &D) { 454 return getDIEIndex(D.getDebugInfoEntry()); 455 } 456 457 /// Return the DIE object at the given index. 458 DWARFDie getDIEAtIndex(unsigned Index) { 459 assert(Index < DieArray.size()); 460 return DWARFDie(this, &DieArray[Index]); 461 } 462 463 DWARFDie getParent(const DWARFDebugInfoEntry *Die); 464 DWARFDie getSibling(const DWARFDebugInfoEntry *Die); 465 DWARFDie getPreviousSibling(const DWARFDebugInfoEntry *Die); 466 DWARFDie getFirstChild(const DWARFDebugInfoEntry *Die); 467 DWARFDie getLastChild(const DWARFDebugInfoEntry *Die); 468 469 /// Return the DIE object for a given offset inside the 470 /// unit's DIE vector. 471 /// 472 /// The unit needs to have its DIEs extracted for this method to work. 473 DWARFDie getDIEForOffset(uint32_t Offset) { 474 extractDIEsIfNeeded(false); 475 assert(!DieArray.empty()); 476 auto It = 477 llvm::partition_point(DieArray, [=](const DWARFDebugInfoEntry &DIE) { 478 return DIE.getOffset() < Offset; 479 }); 480 if (It != DieArray.end() && It->getOffset() == Offset) 481 return DWARFDie(this, &*It); 482 return DWARFDie(); 483 } 484 485 uint32_t getLineTableOffset() const { 486 if (auto IndexEntry = Header.getIndexEntry()) 487 if (const auto *Contrib = IndexEntry->getOffset(DW_SECT_LINE)) 488 return Contrib->Offset; 489 return 0; 490 } 491 492 die_iterator_range dies() { 493 extractDIEsIfNeeded(false); 494 return die_iterator_range(DieArray.begin(), DieArray.end()); 495 } 496 497 virtual void dump(raw_ostream &OS, DIDumpOptions DumpOpts) = 0; 498 private: 499 /// Size in bytes of the .debug_info data associated with this compile unit. 500 size_t getDebugInfoSize() const { 501 return Header.getLength() + 4 - getHeaderSize(); 502 } 503 504 /// extractDIEsIfNeeded - Parses a compile unit and indexes its DIEs if it 505 /// hasn't already been done. Returns the number of DIEs parsed at this call. 506 size_t extractDIEsIfNeeded(bool CUDieOnly); 507 508 /// extractDIEsToVector - Appends all parsed DIEs to a vector. 509 void extractDIEsToVector(bool AppendCUDie, bool AppendNonCUDIEs, 510 std::vector<DWARFDebugInfoEntry> &DIEs) const; 511 512 /// clearDIEs - Clear parsed DIEs to keep memory usage low. 513 void clearDIEs(bool KeepCUDie); 514 515 /// parseDWO - Parses .dwo file for current compile unit. Returns true if 516 /// it was actually constructed. 517 bool parseDWO(); 518 }; 519 520 } // end namespace llvm 521 522 #endif // LLVM_DEBUGINFO_DWARF_DWARFUNIT_H 523