xref: /freebsd/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h (revision 0b57cec536236d46e3dba9bd041533462f33dbb7)
1 //===- DWARFUnit.h ----------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_DEBUGINFO_DWARF_DWARFUNIT_H
10 #define LLVM_DEBUGINFO_DWARF_DWARFUNIT_H
11 
12 #include "llvm/ADT/Optional.h"
13 #include "llvm/ADT/STLExtras.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/ADT/iterator_range.h"
17 #include "llvm/BinaryFormat/Dwarf.h"
18 #include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h"
19 #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h"
20 #include "llvm/DebugInfo/DWARF/DWARFDebugRnglists.h"
21 #include "llvm/DebugInfo/DWARF/DWARFDie.h"
22 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
23 #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
24 #include "llvm/DebugInfo/DWARF/DWARFSection.h"
25 #include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
26 #include "llvm/Support/DataExtractor.h"
27 #include <algorithm>
28 #include <cassert>
29 #include <cstddef>
30 #include <cstdint>
31 #include <map>
32 #include <memory>
33 #include <utility>
34 #include <vector>
35 
36 namespace llvm {
37 
38 class DWARFAbbreviationDeclarationSet;
39 class DWARFContext;
40 class DWARFDebugAbbrev;
41 class DWARFUnit;
42 
43 /// Base class describing the header of any kind of "unit."  Some information
44 /// is specific to certain unit types.  We separate this class out so we can
45 /// parse the header before deciding what specific kind of unit to construct.
46 class DWARFUnitHeader {
47   // Offset within section.
48   uint32_t Offset = 0;
49   // Version, address size, and DWARF format.
50   dwarf::FormParams FormParams;
51   uint64_t Length = 0;
52   uint64_t AbbrOffset = 0;
53 
54   // For DWO units only.
55   const DWARFUnitIndex::Entry *IndexEntry = nullptr;
56 
57   // For type units only.
58   uint64_t TypeHash = 0;
59   uint32_t TypeOffset = 0;
60 
61   // For v5 split or skeleton compile units only.
62   Optional<uint64_t> DWOId;
63 
64   // Unit type as parsed, or derived from the section kind.
65   uint8_t UnitType = 0;
66 
67   // Size as parsed. uint8_t for compactness.
68   uint8_t Size = 0;
69 
70 public:
71   /// Parse a unit header from \p debug_info starting at \p offset_ptr.
72   bool extract(DWARFContext &Context, const DWARFDataExtractor &debug_info,
73                uint32_t *offset_ptr, DWARFSectionKind Kind = DW_SECT_INFO,
74                const DWARFUnitIndex *Index = nullptr,
75                const DWARFUnitIndex::Entry *Entry = nullptr);
76   uint32_t getOffset() const { return Offset; }
77   const dwarf::FormParams &getFormParams() const { return FormParams; }
78   uint16_t getVersion() const { return FormParams.Version; }
79   dwarf::DwarfFormat getFormat() const { return FormParams.Format; }
80   uint8_t getAddressByteSize() const { return FormParams.AddrSize; }
81   uint8_t getRefAddrByteSize() const { return FormParams.getRefAddrByteSize(); }
82   uint8_t getDwarfOffsetByteSize() const {
83     return FormParams.getDwarfOffsetByteSize();
84   }
85   uint64_t getLength() const { return Length; }
86   uint64_t getAbbrOffset() const { return AbbrOffset; }
87   Optional<uint64_t> getDWOId() const { return DWOId; }
88   void setDWOId(uint64_t Id) {
89     assert((!DWOId || *DWOId == Id) && "setting DWOId to a different value");
90     DWOId = Id;
91   }
92   const DWARFUnitIndex::Entry *getIndexEntry() const { return IndexEntry; }
93   uint64_t getTypeHash() const { return TypeHash; }
94   uint32_t getTypeOffset() const { return TypeOffset; }
95   uint8_t getUnitType() const { return UnitType; }
96   bool isTypeUnit() const {
97     return UnitType == dwarf::DW_UT_type || UnitType == dwarf::DW_UT_split_type;
98   }
99   uint8_t getSize() const { return Size; }
100   uint32_t getNextUnitOffset() const {
101     return Offset + Length +
102            (FormParams.Format == llvm::dwarf::DwarfFormat::DWARF64 ? 4 : 0) +
103            FormParams.getDwarfOffsetByteSize();
104   }
105 };
106 
107 const DWARFUnitIndex &getDWARFUnitIndex(DWARFContext &Context,
108                                         DWARFSectionKind Kind);
109 
110 /// Describe a collection of units. Intended to hold all units either from
111 /// .debug_info and .debug_types, or from .debug_info.dwo and .debug_types.dwo.
112 class DWARFUnitVector final : public SmallVector<std::unique_ptr<DWARFUnit>, 1> {
113   std::function<std::unique_ptr<DWARFUnit>(uint32_t, DWARFSectionKind,
114                                            const DWARFSection *,
115                                            const DWARFUnitIndex::Entry *)>
116       Parser;
117   int NumInfoUnits = -1;
118 
119 public:
120   using UnitVector = SmallVectorImpl<std::unique_ptr<DWARFUnit>>;
121   using iterator = typename UnitVector::iterator;
122   using iterator_range = llvm::iterator_range<typename UnitVector::iterator>;
123 
124   DWARFUnit *getUnitForOffset(uint32_t Offset) const;
125   DWARFUnit *getUnitForIndexEntry(const DWARFUnitIndex::Entry &E);
126 
127   /// Read units from a .debug_info or .debug_types section.  Calls made
128   /// before finishedInfoUnits() are assumed to be for .debug_info sections,
129   /// calls after finishedInfoUnits() are for .debug_types sections.  Caller
130   /// must not mix calls to addUnitsForSection and addUnitsForDWOSection.
131   void addUnitsForSection(DWARFContext &C, const DWARFSection &Section,
132                           DWARFSectionKind SectionKind);
133   /// Read units from a .debug_info.dwo or .debug_types.dwo section.  Calls
134   /// made before finishedInfoUnits() are assumed to be for .debug_info.dwo
135   /// sections, calls after finishedInfoUnits() are for .debug_types.dwo
136   /// sections.  Caller must not mix calls to addUnitsForSection and
137   /// addUnitsForDWOSection.
138   void addUnitsForDWOSection(DWARFContext &C, const DWARFSection &DWOSection,
139                              DWARFSectionKind SectionKind, bool Lazy = false);
140 
141   /// Add an existing DWARFUnit to this UnitVector. This is used by the DWARF
142   /// verifier to process unit separately.
143   DWARFUnit *addUnit(std::unique_ptr<DWARFUnit> Unit);
144 
145   /// Returns number of all units held by this instance.
146   unsigned getNumUnits() const { return size(); }
147   /// Returns number of units from all .debug_info[.dwo] sections.
148   unsigned getNumInfoUnits() const {
149     return NumInfoUnits == -1 ? size() : NumInfoUnits;
150   }
151   /// Returns number of units from all .debug_types[.dwo] sections.
152   unsigned getNumTypesUnits() const { return size() - NumInfoUnits; }
153   /// Indicate that parsing .debug_info[.dwo] is done, and remaining units
154   /// will be from .debug_types[.dwo].
155   void finishedInfoUnits() { NumInfoUnits = size(); }
156 
157 private:
158   void addUnitsImpl(DWARFContext &Context, const DWARFObject &Obj,
159                     const DWARFSection &Section, const DWARFDebugAbbrev *DA,
160                     const DWARFSection *RS, const DWARFSection *LocSection,
161                     StringRef SS, const DWARFSection &SOS,
162                     const DWARFSection *AOS, const DWARFSection &LS, bool LE,
163                     bool IsDWO, bool Lazy, DWARFSectionKind SectionKind);
164 };
165 
166 /// Represents base address of the CU.
167 /// Represents a unit's contribution to the string offsets table.
168 struct StrOffsetsContributionDescriptor {
169   uint64_t Base = 0;
170   /// The contribution size not including the header.
171   uint64_t Size = 0;
172   /// Format and version.
173   dwarf::FormParams FormParams = {0, 0, dwarf::DwarfFormat::DWARF32};
174 
175   StrOffsetsContributionDescriptor(uint64_t Base, uint64_t Size,
176                                    uint8_t Version, dwarf::DwarfFormat Format)
177       : Base(Base), Size(Size), FormParams({Version, 0, Format}) {}
178   StrOffsetsContributionDescriptor() = default;
179 
180   uint8_t getVersion() const { return FormParams.Version; }
181   dwarf::DwarfFormat getFormat() const { return FormParams.Format; }
182   uint8_t getDwarfOffsetByteSize() const {
183     return FormParams.getDwarfOffsetByteSize();
184   }
185   /// Determine whether a contribution to the string offsets table is
186   /// consistent with the relevant section size and that its length is
187   /// a multiple of the size of one of its entries.
188   Expected<StrOffsetsContributionDescriptor>
189   validateContributionSize(DWARFDataExtractor &DA);
190 };
191 
192 class DWARFUnit {
193   DWARFContext &Context;
194   /// Section containing this DWARFUnit.
195   const DWARFSection &InfoSection;
196 
197   DWARFUnitHeader Header;
198   const DWARFDebugAbbrev *Abbrev;
199   const DWARFSection *RangeSection;
200   uint32_t RangeSectionBase;
201   /// We either keep track of the location list section or its data, depending
202   /// on whether we are handling a split DWARF section or not.
203   union {
204     const DWARFSection *LocSection;
205     StringRef LocSectionData;
206   };
207   const DWARFSection &LineSection;
208   StringRef StringSection;
209   const DWARFSection &StringOffsetSection;
210   const DWARFSection *AddrOffsetSection;
211   uint32_t AddrOffsetSectionBase = 0;
212   bool isLittleEndian;
213   bool IsDWO;
214   const DWARFUnitVector &UnitVector;
215 
216   /// Start, length, and DWARF format of the unit's contribution to the string
217   /// offsets table (DWARF v5).
218   Optional<StrOffsetsContributionDescriptor> StringOffsetsTableContribution;
219 
220   /// A table of range lists (DWARF v5 and later).
221   Optional<DWARFDebugRnglistTable> RngListTable;
222 
223   mutable const DWARFAbbreviationDeclarationSet *Abbrevs;
224   llvm::Optional<object::SectionedAddress> BaseAddr;
225   /// The compile unit debug information entry items.
226   std::vector<DWARFDebugInfoEntry> DieArray;
227 
228   /// Map from range's start address to end address and corresponding DIE.
229   /// IntervalMap does not support range removal, as a result, we use the
230   /// std::map::upper_bound for address range lookup.
231   std::map<uint64_t, std::pair<uint64_t, DWARFDie>> AddrDieMap;
232 
233   using die_iterator_range =
234       iterator_range<std::vector<DWARFDebugInfoEntry>::iterator>;
235 
236   std::shared_ptr<DWARFUnit> DWO;
237 
238   uint32_t getDIEIndex(const DWARFDebugInfoEntry *Die) {
239     auto First = DieArray.data();
240     assert(Die >= First && Die < First + DieArray.size());
241     return Die - First;
242   }
243 
244 protected:
245   const DWARFUnitHeader &getHeader() const { return Header; }
246 
247   /// Size in bytes of the parsed unit header.
248   uint32_t getHeaderSize() const { return Header.getSize(); }
249 
250   /// Find the unit's contribution to the string offsets table and determine its
251   /// length and form. The given offset is expected to be derived from the unit
252   /// DIE's DW_AT_str_offsets_base attribute.
253   Expected<Optional<StrOffsetsContributionDescriptor>>
254   determineStringOffsetsTableContribution(DWARFDataExtractor &DA);
255 
256   /// Find the unit's contribution to the string offsets table and determine its
257   /// length and form. The given offset is expected to be 0 in a dwo file or,
258   /// in a dwp file, the start of the unit's contribution to the string offsets
259   /// table section (as determined by the index table).
260   Expected<Optional<StrOffsetsContributionDescriptor>>
261   determineStringOffsetsTableContributionDWO(DWARFDataExtractor &DA);
262 
263 public:
264   DWARFUnit(DWARFContext &Context, const DWARFSection &Section,
265             const DWARFUnitHeader &Header, const DWARFDebugAbbrev *DA,
266             const DWARFSection *RS, const DWARFSection *LocSection,
267             StringRef SS, const DWARFSection &SOS, const DWARFSection *AOS,
268             const DWARFSection &LS, bool LE, bool IsDWO,
269             const DWARFUnitVector &UnitVector);
270 
271   virtual ~DWARFUnit();
272 
273   bool isDWOUnit() const { return IsDWO; }
274   DWARFContext& getContext() const { return Context; }
275   const DWARFSection &getInfoSection() const { return InfoSection; }
276   const DWARFSection *getLocSection() const { return LocSection; }
277   StringRef getLocSectionData() const { return LocSectionData; }
278   uint32_t getOffset() const { return Header.getOffset(); }
279   const dwarf::FormParams &getFormParams() const {
280     return Header.getFormParams();
281   }
282   uint16_t getVersion() const { return Header.getVersion(); }
283   uint8_t getAddressByteSize() const { return Header.getAddressByteSize(); }
284   uint8_t getRefAddrByteSize() const { return Header.getRefAddrByteSize(); }
285   uint8_t getDwarfOffsetByteSize() const {
286     return Header.getDwarfOffsetByteSize();
287   }
288   uint32_t getLength() const { return Header.getLength(); }
289   uint8_t getUnitType() const { return Header.getUnitType(); }
290   bool isTypeUnit() const { return Header.isTypeUnit(); }
291   uint32_t getNextUnitOffset() const { return Header.getNextUnitOffset(); }
292   const DWARFSection &getLineSection() const { return LineSection; }
293   StringRef getStringSection() const { return StringSection; }
294   const DWARFSection &getStringOffsetSection() const {
295     return StringOffsetSection;
296   }
297 
298   void setAddrOffsetSection(const DWARFSection *AOS, uint32_t Base) {
299     AddrOffsetSection = AOS;
300     AddrOffsetSectionBase = Base;
301   }
302 
303   /// Recursively update address to Die map.
304   void updateAddressDieMap(DWARFDie Die);
305 
306   void setRangesSection(const DWARFSection *RS, uint32_t Base) {
307     RangeSection = RS;
308     RangeSectionBase = Base;
309   }
310 
311   Optional<object::SectionedAddress>
312   getAddrOffsetSectionItem(uint32_t Index) const;
313   Optional<uint64_t> getStringOffsetSectionItem(uint32_t Index) const;
314 
315   DWARFDataExtractor getDebugInfoExtractor() const;
316 
317   DataExtractor getStringExtractor() const {
318     return DataExtractor(StringSection, false, 0);
319   }
320 
321   /// Extract the range list referenced by this compile unit from the
322   /// .debug_ranges section. If the extraction is unsuccessful, an error
323   /// is returned. Successful extraction requires that the compile unit
324   /// has already been extracted.
325   Error extractRangeList(uint32_t RangeListOffset,
326                          DWARFDebugRangeList &RangeList) const;
327   void clear();
328 
329   const Optional<StrOffsetsContributionDescriptor> &
330   getStringOffsetsTableContribution() const {
331     return StringOffsetsTableContribution;
332   }
333 
334   uint8_t getDwarfStringOffsetsByteSize() const {
335     assert(StringOffsetsTableContribution);
336     return StringOffsetsTableContribution->getDwarfOffsetByteSize();
337   }
338 
339   uint64_t getStringOffsetsBase() const {
340     assert(StringOffsetsTableContribution);
341     return StringOffsetsTableContribution->Base;
342   }
343 
344   const DWARFAbbreviationDeclarationSet *getAbbreviations() const;
345 
346   static bool isMatchingUnitTypeAndTag(uint8_t UnitType, dwarf::Tag Tag) {
347     switch (UnitType) {
348     case dwarf::DW_UT_compile:
349       return Tag == dwarf::DW_TAG_compile_unit;
350     case dwarf::DW_UT_type:
351       return Tag == dwarf::DW_TAG_type_unit;
352     case dwarf::DW_UT_partial:
353       return Tag == dwarf::DW_TAG_partial_unit;
354     case dwarf::DW_UT_skeleton:
355       return Tag == dwarf::DW_TAG_skeleton_unit;
356     case dwarf::DW_UT_split_compile:
357     case dwarf::DW_UT_split_type:
358       return dwarf::isUnitType(Tag);
359     }
360     return false;
361   }
362 
363   /// Return the number of bytes for the header of a unit of
364   /// UnitType type.
365   ///
366   /// This function must be called with a valid unit type which in
367   /// DWARF5 is defined as one of the following six types.
368   static uint32_t getDWARF5HeaderSize(uint8_t UnitType) {
369     switch (UnitType) {
370     case dwarf::DW_UT_compile:
371     case dwarf::DW_UT_partial:
372       return 12;
373     case dwarf::DW_UT_skeleton:
374     case dwarf::DW_UT_split_compile:
375       return 20;
376     case dwarf::DW_UT_type:
377     case dwarf::DW_UT_split_type:
378       return 24;
379     }
380     llvm_unreachable("Invalid UnitType.");
381   }
382 
383   llvm::Optional<object::SectionedAddress> getBaseAddress();
384 
385   DWARFDie getUnitDIE(bool ExtractUnitDIEOnly = true) {
386     extractDIEsIfNeeded(ExtractUnitDIEOnly);
387     if (DieArray.empty())
388       return DWARFDie();
389     return DWARFDie(this, &DieArray[0]);
390   }
391 
392   DWARFDie getNonSkeletonUnitDIE(bool ExtractUnitDIEOnly = true) {
393     parseDWO();
394     if (DWO)
395       return DWO->getUnitDIE(ExtractUnitDIEOnly);
396     return getUnitDIE(ExtractUnitDIEOnly);
397   }
398 
399   const char *getCompilationDir();
400   Optional<uint64_t> getDWOId() {
401     extractDIEsIfNeeded(/*CUDieOnly*/ true);
402     return getHeader().getDWOId();
403   }
404   void setDWOId(uint64_t NewID) { Header.setDWOId(NewID); }
405 
406   /// Return a vector of address ranges resulting from a (possibly encoded)
407   /// range list starting at a given offset in the appropriate ranges section.
408   Expected<DWARFAddressRangesVector> findRnglistFromOffset(uint32_t Offset);
409 
410   /// Return a vector of address ranges retrieved from an encoded range
411   /// list whose offset is found via a table lookup given an index (DWARF v5
412   /// and later).
413   Expected<DWARFAddressRangesVector> findRnglistFromIndex(uint32_t Index);
414 
415   /// Return a rangelist's offset based on an index. The index designates
416   /// an entry in the rangelist table's offset array and is supplied by
417   /// DW_FORM_rnglistx.
418   Optional<uint32_t> getRnglistOffset(uint32_t Index) {
419     if (RngListTable)
420       return RngListTable->getOffsetEntry(Index);
421     return None;
422   }
423 
424   Expected<DWARFAddressRangesVector> collectAddressRanges();
425 
426   /// Returns subprogram DIE with address range encompassing the provided
427   /// address. The pointer is alive as long as parsed compile unit DIEs are not
428   /// cleared.
429   DWARFDie getSubroutineForAddress(uint64_t Address);
430 
431   /// getInlinedChainForAddress - fetches inlined chain for a given address.
432   /// Returns empty chain if there is no subprogram containing address. The
433   /// chain is valid as long as parsed compile unit DIEs are not cleared.
434   void getInlinedChainForAddress(uint64_t Address,
435                                  SmallVectorImpl<DWARFDie> &InlinedChain);
436 
437   /// Return the DWARFUnitVector containing this unit.
438   const DWARFUnitVector &getUnitVector() const { return UnitVector; }
439 
440   /// Returns the number of DIEs in the unit. Parses the unit
441   /// if necessary.
442   unsigned getNumDIEs() {
443     extractDIEsIfNeeded(false);
444     return DieArray.size();
445   }
446 
447   /// Return the index of a DIE inside the unit's DIE vector.
448   ///
449   /// It is illegal to call this method with a DIE that hasn't be
450   /// created by this unit. In other word, it's illegal to call this
451   /// method on a DIE that isn't accessible by following
452   /// children/sibling links starting from this unit's getUnitDIE().
453   uint32_t getDIEIndex(const DWARFDie &D) {
454     return getDIEIndex(D.getDebugInfoEntry());
455   }
456 
457   /// Return the DIE object at the given index.
458   DWARFDie getDIEAtIndex(unsigned Index) {
459     assert(Index < DieArray.size());
460     return DWARFDie(this, &DieArray[Index]);
461   }
462 
463   DWARFDie getParent(const DWARFDebugInfoEntry *Die);
464   DWARFDie getSibling(const DWARFDebugInfoEntry *Die);
465   DWARFDie getPreviousSibling(const DWARFDebugInfoEntry *Die);
466   DWARFDie getFirstChild(const DWARFDebugInfoEntry *Die);
467   DWARFDie getLastChild(const DWARFDebugInfoEntry *Die);
468 
469   /// Return the DIE object for a given offset inside the
470   /// unit's DIE vector.
471   ///
472   /// The unit needs to have its DIEs extracted for this method to work.
473   DWARFDie getDIEForOffset(uint32_t Offset) {
474     extractDIEsIfNeeded(false);
475     assert(!DieArray.empty());
476     auto It =
477         llvm::partition_point(DieArray, [=](const DWARFDebugInfoEntry &DIE) {
478           return DIE.getOffset() < Offset;
479         });
480     if (It != DieArray.end() && It->getOffset() == Offset)
481       return DWARFDie(this, &*It);
482     return DWARFDie();
483   }
484 
485   uint32_t getLineTableOffset() const {
486     if (auto IndexEntry = Header.getIndexEntry())
487       if (const auto *Contrib = IndexEntry->getOffset(DW_SECT_LINE))
488         return Contrib->Offset;
489     return 0;
490   }
491 
492   die_iterator_range dies() {
493     extractDIEsIfNeeded(false);
494     return die_iterator_range(DieArray.begin(), DieArray.end());
495   }
496 
497   virtual void dump(raw_ostream &OS, DIDumpOptions DumpOpts) = 0;
498 private:
499   /// Size in bytes of the .debug_info data associated with this compile unit.
500   size_t getDebugInfoSize() const {
501     return Header.getLength() + 4 - getHeaderSize();
502   }
503 
504   /// extractDIEsIfNeeded - Parses a compile unit and indexes its DIEs if it
505   /// hasn't already been done. Returns the number of DIEs parsed at this call.
506   size_t extractDIEsIfNeeded(bool CUDieOnly);
507 
508   /// extractDIEsToVector - Appends all parsed DIEs to a vector.
509   void extractDIEsToVector(bool AppendCUDie, bool AppendNonCUDIEs,
510                            std::vector<DWARFDebugInfoEntry> &DIEs) const;
511 
512   /// clearDIEs - Clear parsed DIEs to keep memory usage low.
513   void clearDIEs(bool KeepCUDie);
514 
515   /// parseDWO - Parses .dwo file for current compile unit. Returns true if
516   /// it was actually constructed.
517   bool parseDWO();
518 };
519 
520 } // end namespace llvm
521 
522 #endif // LLVM_DEBUGINFO_DWARF_DWARFUNIT_H
523