1 //===- DWARFUnit.h ----------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_DEBUGINFO_DWARF_DWARFUNIT_H 10 #define LLVM_DEBUGINFO_DWARF_DWARFUNIT_H 11 12 #include "llvm/ADT/Optional.h" 13 #include "llvm/ADT/STLExtras.h" 14 #include "llvm/ADT/SmallVector.h" 15 #include "llvm/ADT/StringRef.h" 16 #include "llvm/ADT/iterator_range.h" 17 #include "llvm/BinaryFormat/Dwarf.h" 18 #include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h" 19 #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h" 20 #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h" 21 #include "llvm/DebugInfo/DWARF/DWARFDebugRnglists.h" 22 #include "llvm/DebugInfo/DWARF/DWARFDie.h" 23 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" 24 #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" 25 #include "llvm/DebugInfo/DWARF/DWARFSection.h" 26 #include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h" 27 #include "llvm/Support/DataExtractor.h" 28 #include <algorithm> 29 #include <cassert> 30 #include <cstddef> 31 #include <cstdint> 32 #include <map> 33 #include <memory> 34 #include <utility> 35 #include <vector> 36 37 namespace llvm { 38 39 class DWARFAbbreviationDeclarationSet; 40 class DWARFContext; 41 class DWARFDebugAbbrev; 42 class DWARFUnit; 43 44 /// Base class describing the header of any kind of "unit." Some information 45 /// is specific to certain unit types. We separate this class out so we can 46 /// parse the header before deciding what specific kind of unit to construct. 47 class DWARFUnitHeader { 48 // Offset within section. 49 uint64_t Offset = 0; 50 // Version, address size, and DWARF format. 51 dwarf::FormParams FormParams; 52 uint64_t Length = 0; 53 uint64_t AbbrOffset = 0; 54 55 // For DWO units only. 56 const DWARFUnitIndex::Entry *IndexEntry = nullptr; 57 58 // For type units only. 59 uint64_t TypeHash = 0; 60 uint64_t TypeOffset = 0; 61 62 // For v5 split or skeleton compile units only. 63 Optional<uint64_t> DWOId; 64 65 // Unit type as parsed, or derived from the section kind. 66 uint8_t UnitType = 0; 67 68 // Size as parsed. uint8_t for compactness. 69 uint8_t Size = 0; 70 71 public: 72 /// Parse a unit header from \p debug_info starting at \p offset_ptr. 73 /// Note that \p SectionKind is used as a hint to guess the unit type 74 /// for DWARF formats prior to DWARFv5. In DWARFv5 the unit type is 75 /// explicitly defined in the header and the hint is ignored. 76 bool extract(DWARFContext &Context, const DWARFDataExtractor &debug_info, 77 uint64_t *offset_ptr, DWARFSectionKind SectionKind); 78 // For units in DWARF Package File, remember the index entry and update 79 // the abbreviation offset read by extract(). 80 bool applyIndexEntry(const DWARFUnitIndex::Entry *Entry); 81 uint64_t getOffset() const { return Offset; } 82 const dwarf::FormParams &getFormParams() const { return FormParams; } 83 uint16_t getVersion() const { return FormParams.Version; } 84 dwarf::DwarfFormat getFormat() const { return FormParams.Format; } 85 uint8_t getAddressByteSize() const { return FormParams.AddrSize; } 86 uint8_t getRefAddrByteSize() const { return FormParams.getRefAddrByteSize(); } 87 uint8_t getDwarfOffsetByteSize() const { 88 return FormParams.getDwarfOffsetByteSize(); 89 } 90 uint64_t getLength() const { return Length; } 91 uint64_t getAbbrOffset() const { return AbbrOffset; } 92 Optional<uint64_t> getDWOId() const { return DWOId; } 93 void setDWOId(uint64_t Id) { 94 assert((!DWOId || *DWOId == Id) && "setting DWOId to a different value"); 95 DWOId = Id; 96 } 97 const DWARFUnitIndex::Entry *getIndexEntry() const { return IndexEntry; } 98 uint64_t getTypeHash() const { return TypeHash; } 99 uint64_t getTypeOffset() const { return TypeOffset; } 100 uint8_t getUnitType() const { return UnitType; } 101 bool isTypeUnit() const { 102 return UnitType == dwarf::DW_UT_type || UnitType == dwarf::DW_UT_split_type; 103 } 104 uint8_t getSize() const { return Size; } 105 uint8_t getUnitLengthFieldByteSize() const { 106 return dwarf::getUnitLengthFieldByteSize(FormParams.Format); 107 } 108 uint64_t getNextUnitOffset() const { 109 return Offset + Length + getUnitLengthFieldByteSize(); 110 } 111 }; 112 113 const DWARFUnitIndex &getDWARFUnitIndex(DWARFContext &Context, 114 DWARFSectionKind Kind); 115 116 bool isCompileUnit(const std::unique_ptr<DWARFUnit> &U); 117 118 /// Describe a collection of units. Intended to hold all units either from 119 /// .debug_info and .debug_types, or from .debug_info.dwo and .debug_types.dwo. 120 class DWARFUnitVector final : public SmallVector<std::unique_ptr<DWARFUnit>, 1> { 121 std::function<std::unique_ptr<DWARFUnit>(uint64_t, DWARFSectionKind, 122 const DWARFSection *, 123 const DWARFUnitIndex::Entry *)> 124 Parser; 125 int NumInfoUnits = -1; 126 127 public: 128 using UnitVector = SmallVectorImpl<std::unique_ptr<DWARFUnit>>; 129 using iterator = typename UnitVector::iterator; 130 using iterator_range = llvm::iterator_range<typename UnitVector::iterator>; 131 132 using compile_unit_range = 133 decltype(make_filter_range(std::declval<iterator_range>(), isCompileUnit)); 134 135 DWARFUnit *getUnitForOffset(uint64_t Offset) const; 136 DWARFUnit *getUnitForIndexEntry(const DWARFUnitIndex::Entry &E); 137 138 /// Read units from a .debug_info or .debug_types section. Calls made 139 /// before finishedInfoUnits() are assumed to be for .debug_info sections, 140 /// calls after finishedInfoUnits() are for .debug_types sections. Caller 141 /// must not mix calls to addUnitsForSection and addUnitsForDWOSection. 142 void addUnitsForSection(DWARFContext &C, const DWARFSection &Section, 143 DWARFSectionKind SectionKind); 144 /// Read units from a .debug_info.dwo or .debug_types.dwo section. Calls 145 /// made before finishedInfoUnits() are assumed to be for .debug_info.dwo 146 /// sections, calls after finishedInfoUnits() are for .debug_types.dwo 147 /// sections. Caller must not mix calls to addUnitsForSection and 148 /// addUnitsForDWOSection. 149 void addUnitsForDWOSection(DWARFContext &C, const DWARFSection &DWOSection, 150 DWARFSectionKind SectionKind, bool Lazy = false); 151 152 /// Add an existing DWARFUnit to this UnitVector. This is used by the DWARF 153 /// verifier to process unit separately. 154 DWARFUnit *addUnit(std::unique_ptr<DWARFUnit> Unit); 155 156 /// Returns number of all units held by this instance. 157 unsigned getNumUnits() const { return size(); } 158 /// Returns number of units from all .debug_info[.dwo] sections. 159 unsigned getNumInfoUnits() const { 160 return NumInfoUnits == -1 ? size() : NumInfoUnits; 161 } 162 /// Returns number of units from all .debug_types[.dwo] sections. 163 unsigned getNumTypesUnits() const { return size() - NumInfoUnits; } 164 /// Indicate that parsing .debug_info[.dwo] is done, and remaining units 165 /// will be from .debug_types[.dwo]. 166 void finishedInfoUnits() { NumInfoUnits = size(); } 167 168 private: 169 void addUnitsImpl(DWARFContext &Context, const DWARFObject &Obj, 170 const DWARFSection &Section, const DWARFDebugAbbrev *DA, 171 const DWARFSection *RS, const DWARFSection *LocSection, 172 StringRef SS, const DWARFSection &SOS, 173 const DWARFSection *AOS, const DWARFSection &LS, bool LE, 174 bool IsDWO, bool Lazy, DWARFSectionKind SectionKind); 175 }; 176 177 /// Represents base address of the CU. 178 /// Represents a unit's contribution to the string offsets table. 179 struct StrOffsetsContributionDescriptor { 180 uint64_t Base = 0; 181 /// The contribution size not including the header. 182 uint64_t Size = 0; 183 /// Format and version. 184 dwarf::FormParams FormParams = {0, 0, dwarf::DwarfFormat::DWARF32}; 185 186 StrOffsetsContributionDescriptor(uint64_t Base, uint64_t Size, 187 uint8_t Version, dwarf::DwarfFormat Format) 188 : Base(Base), Size(Size), FormParams({Version, 0, Format}) {} 189 StrOffsetsContributionDescriptor() = default; 190 191 uint8_t getVersion() const { return FormParams.Version; } 192 dwarf::DwarfFormat getFormat() const { return FormParams.Format; } 193 uint8_t getDwarfOffsetByteSize() const { 194 return FormParams.getDwarfOffsetByteSize(); 195 } 196 /// Determine whether a contribution to the string offsets table is 197 /// consistent with the relevant section size and that its length is 198 /// a multiple of the size of one of its entries. 199 Expected<StrOffsetsContributionDescriptor> 200 validateContributionSize(DWARFDataExtractor &DA); 201 }; 202 203 class DWARFUnit { 204 DWARFContext &Context; 205 /// Section containing this DWARFUnit. 206 const DWARFSection &InfoSection; 207 208 DWARFUnitHeader Header; 209 const DWARFDebugAbbrev *Abbrev; 210 const DWARFSection *RangeSection; 211 uint64_t RangeSectionBase; 212 uint64_t LocSectionBase; 213 214 /// Location table of this unit. 215 std::unique_ptr<DWARFLocationTable> LocTable; 216 217 const DWARFSection &LineSection; 218 StringRef StringSection; 219 const DWARFSection &StringOffsetSection; 220 const DWARFSection *AddrOffsetSection; 221 DWARFUnit *SU; 222 Optional<uint64_t> AddrOffsetSectionBase; 223 bool isLittleEndian; 224 bool IsDWO; 225 const DWARFUnitVector &UnitVector; 226 227 /// Start, length, and DWARF format of the unit's contribution to the string 228 /// offsets table (DWARF v5). 229 Optional<StrOffsetsContributionDescriptor> StringOffsetsTableContribution; 230 231 mutable const DWARFAbbreviationDeclarationSet *Abbrevs; 232 llvm::Optional<object::SectionedAddress> BaseAddr; 233 /// The compile unit debug information entry items. 234 std::vector<DWARFDebugInfoEntry> DieArray; 235 236 /// Map from range's start address to end address and corresponding DIE. 237 /// IntervalMap does not support range removal, as a result, we use the 238 /// std::map::upper_bound for address range lookup. 239 std::map<uint64_t, std::pair<uint64_t, DWARFDie>> AddrDieMap; 240 241 using die_iterator_range = 242 iterator_range<std::vector<DWARFDebugInfoEntry>::iterator>; 243 244 std::shared_ptr<DWARFUnit> DWO; 245 246 uint32_t getDIEIndex(const DWARFDebugInfoEntry *Die) { 247 auto First = DieArray.data(); 248 assert(Die >= First && Die < First + DieArray.size()); 249 return Die - First; 250 } 251 252 protected: 253 const DWARFUnitHeader &getHeader() const { return Header; } 254 255 /// Find the unit's contribution to the string offsets table and determine its 256 /// length and form. The given offset is expected to be derived from the unit 257 /// DIE's DW_AT_str_offsets_base attribute. 258 Expected<Optional<StrOffsetsContributionDescriptor>> 259 determineStringOffsetsTableContribution(DWARFDataExtractor &DA); 260 261 /// Find the unit's contribution to the string offsets table and determine its 262 /// length and form. The given offset is expected to be 0 in a dwo file or, 263 /// in a dwp file, the start of the unit's contribution to the string offsets 264 /// table section (as determined by the index table). 265 Expected<Optional<StrOffsetsContributionDescriptor>> 266 determineStringOffsetsTableContributionDWO(DWARFDataExtractor &DA); 267 268 public: 269 DWARFUnit(DWARFContext &Context, const DWARFSection &Section, 270 const DWARFUnitHeader &Header, const DWARFDebugAbbrev *DA, 271 const DWARFSection *RS, const DWARFSection *LocSection, 272 StringRef SS, const DWARFSection &SOS, const DWARFSection *AOS, 273 const DWARFSection &LS, bool LE, bool IsDWO, 274 const DWARFUnitVector &UnitVector); 275 276 virtual ~DWARFUnit(); 277 278 bool isDWOUnit() const { return IsDWO; } 279 DWARFContext& getContext() const { return Context; } 280 const DWARFSection &getInfoSection() const { return InfoSection; } 281 uint64_t getOffset() const { return Header.getOffset(); } 282 const dwarf::FormParams &getFormParams() const { 283 return Header.getFormParams(); 284 } 285 uint16_t getVersion() const { return Header.getVersion(); } 286 uint8_t getAddressByteSize() const { return Header.getAddressByteSize(); } 287 uint8_t getRefAddrByteSize() const { return Header.getRefAddrByteSize(); } 288 uint8_t getDwarfOffsetByteSize() const { 289 return Header.getDwarfOffsetByteSize(); 290 } 291 /// Size in bytes of the parsed unit header. 292 uint32_t getHeaderSize() const { return Header.getSize(); } 293 uint64_t getLength() const { return Header.getLength(); } 294 dwarf::DwarfFormat getFormat() const { return Header.getFormat(); } 295 uint8_t getUnitType() const { return Header.getUnitType(); } 296 bool isTypeUnit() const { return Header.isTypeUnit(); } 297 uint64_t getAbbrOffset() const { return Header.getAbbrOffset(); } 298 uint64_t getNextUnitOffset() const { return Header.getNextUnitOffset(); } 299 const DWARFSection &getLineSection() const { return LineSection; } 300 StringRef getStringSection() const { return StringSection; } 301 const DWARFSection &getStringOffsetSection() const { 302 return StringOffsetSection; 303 } 304 305 void setSkeletonUnit(DWARFUnit *SU) { this->SU = SU; } 306 // Returns itself if not using Split DWARF, or if the unit is a skeleton unit 307 // - otherwise returns the split full unit's corresponding skeleton, if 308 // available. 309 DWARFUnit *getLinkedUnit() { return IsDWO ? SU : this; } 310 311 void setAddrOffsetSection(const DWARFSection *AOS, uint64_t Base) { 312 AddrOffsetSection = AOS; 313 AddrOffsetSectionBase = Base; 314 } 315 316 Optional<uint64_t> getAddrOffsetSectionBase() const { 317 return AddrOffsetSectionBase; 318 } 319 320 /// Recursively update address to Die map. 321 void updateAddressDieMap(DWARFDie Die); 322 323 void setRangesSection(const DWARFSection *RS, uint64_t Base) { 324 RangeSection = RS; 325 RangeSectionBase = Base; 326 } 327 328 uint64_t getLocSectionBase() const { 329 return LocSectionBase; 330 } 331 332 Optional<object::SectionedAddress> 333 getAddrOffsetSectionItem(uint32_t Index) const; 334 Optional<uint64_t> getStringOffsetSectionItem(uint32_t Index) const; 335 336 DWARFDataExtractor getDebugInfoExtractor() const; 337 338 DataExtractor getStringExtractor() const { 339 return DataExtractor(StringSection, false, 0); 340 } 341 342 const DWARFLocationTable &getLocationTable() { return *LocTable; } 343 344 /// Extract the range list referenced by this compile unit from the 345 /// .debug_ranges section. If the extraction is unsuccessful, an error 346 /// is returned. Successful extraction requires that the compile unit 347 /// has already been extracted. 348 Error extractRangeList(uint64_t RangeListOffset, 349 DWARFDebugRangeList &RangeList) const; 350 void clear(); 351 352 const Optional<StrOffsetsContributionDescriptor> & 353 getStringOffsetsTableContribution() const { 354 return StringOffsetsTableContribution; 355 } 356 357 uint8_t getDwarfStringOffsetsByteSize() const { 358 assert(StringOffsetsTableContribution); 359 return StringOffsetsTableContribution->getDwarfOffsetByteSize(); 360 } 361 362 uint64_t getStringOffsetsBase() const { 363 assert(StringOffsetsTableContribution); 364 return StringOffsetsTableContribution->Base; 365 } 366 367 uint64_t getAbbreviationsOffset() const { return Header.getAbbrOffset(); } 368 369 const DWARFAbbreviationDeclarationSet *getAbbreviations() const; 370 371 static bool isMatchingUnitTypeAndTag(uint8_t UnitType, dwarf::Tag Tag) { 372 switch (UnitType) { 373 case dwarf::DW_UT_compile: 374 return Tag == dwarf::DW_TAG_compile_unit; 375 case dwarf::DW_UT_type: 376 return Tag == dwarf::DW_TAG_type_unit; 377 case dwarf::DW_UT_partial: 378 return Tag == dwarf::DW_TAG_partial_unit; 379 case dwarf::DW_UT_skeleton: 380 return Tag == dwarf::DW_TAG_skeleton_unit; 381 case dwarf::DW_UT_split_compile: 382 case dwarf::DW_UT_split_type: 383 return dwarf::isUnitType(Tag); 384 } 385 return false; 386 } 387 388 llvm::Optional<object::SectionedAddress> getBaseAddress(); 389 390 DWARFDie getUnitDIE(bool ExtractUnitDIEOnly = true) { 391 extractDIEsIfNeeded(ExtractUnitDIEOnly); 392 if (DieArray.empty()) 393 return DWARFDie(); 394 return DWARFDie(this, &DieArray[0]); 395 } 396 397 DWARFDie getNonSkeletonUnitDIE(bool ExtractUnitDIEOnly = true) { 398 parseDWO(); 399 if (DWO) 400 return DWO->getUnitDIE(ExtractUnitDIEOnly); 401 return getUnitDIE(ExtractUnitDIEOnly); 402 } 403 404 const char *getCompilationDir(); 405 Optional<uint64_t> getDWOId() { 406 extractDIEsIfNeeded(/*CUDieOnly*/ true); 407 return getHeader().getDWOId(); 408 } 409 void setDWOId(uint64_t NewID) { Header.setDWOId(NewID); } 410 411 /// Return a vector of address ranges resulting from a (possibly encoded) 412 /// range list starting at a given offset in the appropriate ranges section. 413 Expected<DWARFAddressRangesVector> findRnglistFromOffset(uint64_t Offset); 414 415 /// Return a vector of address ranges retrieved from an encoded range 416 /// list whose offset is found via a table lookup given an index (DWARF v5 417 /// and later). 418 Expected<DWARFAddressRangesVector> findRnglistFromIndex(uint32_t Index); 419 420 /// Return a rangelist's offset based on an index. The index designates 421 /// an entry in the rangelist table's offset array and is supplied by 422 /// DW_FORM_rnglistx. 423 Optional<uint64_t> getRnglistOffset(uint32_t Index); 424 425 Optional<uint64_t> getLoclistOffset(uint32_t Index); 426 427 Expected<DWARFAddressRangesVector> collectAddressRanges(); 428 429 Expected<DWARFLocationExpressionsVector> 430 findLoclistFromOffset(uint64_t Offset); 431 432 /// Returns subprogram DIE with address range encompassing the provided 433 /// address. The pointer is alive as long as parsed compile unit DIEs are not 434 /// cleared. 435 DWARFDie getSubroutineForAddress(uint64_t Address); 436 437 /// getInlinedChainForAddress - fetches inlined chain for a given address. 438 /// Returns empty chain if there is no subprogram containing address. The 439 /// chain is valid as long as parsed compile unit DIEs are not cleared. 440 void getInlinedChainForAddress(uint64_t Address, 441 SmallVectorImpl<DWARFDie> &InlinedChain); 442 443 /// Return the DWARFUnitVector containing this unit. 444 const DWARFUnitVector &getUnitVector() const { return UnitVector; } 445 446 /// Returns the number of DIEs in the unit. Parses the unit 447 /// if necessary. 448 unsigned getNumDIEs() { 449 extractDIEsIfNeeded(false); 450 return DieArray.size(); 451 } 452 453 /// Return the index of a DIE inside the unit's DIE vector. 454 /// 455 /// It is illegal to call this method with a DIE that hasn't be 456 /// created by this unit. In other word, it's illegal to call this 457 /// method on a DIE that isn't accessible by following 458 /// children/sibling links starting from this unit's getUnitDIE(). 459 uint32_t getDIEIndex(const DWARFDie &D) { 460 return getDIEIndex(D.getDebugInfoEntry()); 461 } 462 463 /// Return the DIE object at the given index. 464 DWARFDie getDIEAtIndex(unsigned Index) { 465 assert(Index < DieArray.size()); 466 return DWARFDie(this, &DieArray[Index]); 467 } 468 469 DWARFDie getParent(const DWARFDebugInfoEntry *Die); 470 DWARFDie getSibling(const DWARFDebugInfoEntry *Die); 471 DWARFDie getPreviousSibling(const DWARFDebugInfoEntry *Die); 472 DWARFDie getFirstChild(const DWARFDebugInfoEntry *Die); 473 DWARFDie getLastChild(const DWARFDebugInfoEntry *Die); 474 475 /// Return the DIE object for a given offset inside the 476 /// unit's DIE vector. 477 /// 478 /// The unit needs to have its DIEs extracted for this method to work. 479 DWARFDie getDIEForOffset(uint64_t Offset) { 480 extractDIEsIfNeeded(false); 481 auto It = 482 llvm::partition_point(DieArray, [=](const DWARFDebugInfoEntry &DIE) { 483 return DIE.getOffset() < Offset; 484 }); 485 if (It != DieArray.end() && It->getOffset() == Offset) 486 return DWARFDie(this, &*It); 487 return DWARFDie(); 488 } 489 490 uint32_t getLineTableOffset() const { 491 if (auto IndexEntry = Header.getIndexEntry()) 492 if (const auto *Contrib = IndexEntry->getContribution(DW_SECT_LINE)) 493 return Contrib->Offset; 494 return 0; 495 } 496 497 die_iterator_range dies() { 498 extractDIEsIfNeeded(false); 499 return die_iterator_range(DieArray.begin(), DieArray.end()); 500 } 501 502 virtual void dump(raw_ostream &OS, DIDumpOptions DumpOpts) = 0; 503 504 Error tryExtractDIEsIfNeeded(bool CUDieOnly); 505 506 private: 507 /// Size in bytes of the .debug_info data associated with this compile unit. 508 size_t getDebugInfoSize() const { 509 return Header.getLength() + Header.getUnitLengthFieldByteSize() - 510 getHeaderSize(); 511 } 512 513 /// extractDIEsIfNeeded - Parses a compile unit and indexes its DIEs if it 514 /// hasn't already been done 515 void extractDIEsIfNeeded(bool CUDieOnly); 516 517 /// extractDIEsToVector - Appends all parsed DIEs to a vector. 518 void extractDIEsToVector(bool AppendCUDie, bool AppendNonCUDIEs, 519 std::vector<DWARFDebugInfoEntry> &DIEs) const; 520 521 /// clearDIEs - Clear parsed DIEs to keep memory usage low. 522 void clearDIEs(bool KeepCUDie); 523 524 /// parseDWO - Parses .dwo file for current compile unit. Returns true if 525 /// it was actually constructed. 526 bool parseDWO(); 527 }; 528 529 inline bool isCompileUnit(const std::unique_ptr<DWARFUnit> &U) { 530 return !U->isTypeUnit(); 531 } 532 533 } // end namespace llvm 534 535 #endif // LLVM_DEBUGINFO_DWARF_DWARFUNIT_H 536