xref: /freebsd/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h (revision 6132212808e8dccedc9e5d85fea4390c2f38059a)
1 //===- DWARFUnit.h ----------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_DEBUGINFO_DWARF_DWARFUNIT_H
10 #define LLVM_DEBUGINFO_DWARF_DWARFUNIT_H
11 
12 #include "llvm/ADT/Optional.h"
13 #include "llvm/ADT/STLExtras.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/ADT/iterator_range.h"
17 #include "llvm/BinaryFormat/Dwarf.h"
18 #include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h"
19 #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
20 #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h"
21 #include "llvm/DebugInfo/DWARF/DWARFDebugRnglists.h"
22 #include "llvm/DebugInfo/DWARF/DWARFDie.h"
23 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
24 #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
25 #include "llvm/DebugInfo/DWARF/DWARFSection.h"
26 #include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
27 #include "llvm/Support/DataExtractor.h"
28 #include <algorithm>
29 #include <cassert>
30 #include <cstddef>
31 #include <cstdint>
32 #include <map>
33 #include <memory>
34 #include <utility>
35 #include <vector>
36 
37 namespace llvm {
38 
39 class DWARFAbbreviationDeclarationSet;
40 class DWARFContext;
41 class DWARFDebugAbbrev;
42 class DWARFUnit;
43 
44 /// Base class describing the header of any kind of "unit."  Some information
45 /// is specific to certain unit types.  We separate this class out so we can
46 /// parse the header before deciding what specific kind of unit to construct.
47 class DWARFUnitHeader {
48   // Offset within section.
49   uint64_t Offset = 0;
50   // Version, address size, and DWARF format.
51   dwarf::FormParams FormParams;
52   uint64_t Length = 0;
53   uint64_t AbbrOffset = 0;
54 
55   // For DWO units only.
56   const DWARFUnitIndex::Entry *IndexEntry = nullptr;
57 
58   // For type units only.
59   uint64_t TypeHash = 0;
60   uint64_t TypeOffset = 0;
61 
62   // For v5 split or skeleton compile units only.
63   Optional<uint64_t> DWOId;
64 
65   // Unit type as parsed, or derived from the section kind.
66   uint8_t UnitType = 0;
67 
68   // Size as parsed. uint8_t for compactness.
69   uint8_t Size = 0;
70 
71 public:
72   /// Parse a unit header from \p debug_info starting at \p offset_ptr.
73   /// Note that \p SectionKind is used as a hint to guess the unit type
74   /// for DWARF formats prior to DWARFv5. In DWARFv5 the unit type is
75   /// explicitly defined in the header and the hint is ignored.
76   bool extract(DWARFContext &Context, const DWARFDataExtractor &debug_info,
77                uint64_t *offset_ptr, DWARFSectionKind SectionKind);
78   // For units in DWARF Package File, remember the index entry and update
79   // the abbreviation offset read by extract().
80   bool applyIndexEntry(const DWARFUnitIndex::Entry *Entry);
81   uint64_t getOffset() const { return Offset; }
82   const dwarf::FormParams &getFormParams() const { return FormParams; }
83   uint16_t getVersion() const { return FormParams.Version; }
84   dwarf::DwarfFormat getFormat() const { return FormParams.Format; }
85   uint8_t getAddressByteSize() const { return FormParams.AddrSize; }
86   uint8_t getRefAddrByteSize() const { return FormParams.getRefAddrByteSize(); }
87   uint8_t getDwarfOffsetByteSize() const {
88     return FormParams.getDwarfOffsetByteSize();
89   }
90   uint64_t getLength() const { return Length; }
91   uint64_t getAbbrOffset() const { return AbbrOffset; }
92   Optional<uint64_t> getDWOId() const { return DWOId; }
93   void setDWOId(uint64_t Id) {
94     assert((!DWOId || *DWOId == Id) && "setting DWOId to a different value");
95     DWOId = Id;
96   }
97   const DWARFUnitIndex::Entry *getIndexEntry() const { return IndexEntry; }
98   uint64_t getTypeHash() const { return TypeHash; }
99   uint64_t getTypeOffset() const { return TypeOffset; }
100   uint8_t getUnitType() const { return UnitType; }
101   bool isTypeUnit() const {
102     return UnitType == dwarf::DW_UT_type || UnitType == dwarf::DW_UT_split_type;
103   }
104   uint8_t getSize() const { return Size; }
105   uint8_t getUnitLengthFieldByteSize() const {
106     return dwarf::getUnitLengthFieldByteSize(FormParams.Format);
107   }
108   uint64_t getNextUnitOffset() const {
109     return Offset + Length + getUnitLengthFieldByteSize();
110   }
111 };
112 
113 const DWARFUnitIndex &getDWARFUnitIndex(DWARFContext &Context,
114                                         DWARFSectionKind Kind);
115 
116 /// Describe a collection of units. Intended to hold all units either from
117 /// .debug_info and .debug_types, or from .debug_info.dwo and .debug_types.dwo.
118 class DWARFUnitVector final : public SmallVector<std::unique_ptr<DWARFUnit>, 1> {
119   std::function<std::unique_ptr<DWARFUnit>(uint64_t, DWARFSectionKind,
120                                            const DWARFSection *,
121                                            const DWARFUnitIndex::Entry *)>
122       Parser;
123   int NumInfoUnits = -1;
124 
125 public:
126   using UnitVector = SmallVectorImpl<std::unique_ptr<DWARFUnit>>;
127   using iterator = typename UnitVector::iterator;
128   using iterator_range = llvm::iterator_range<typename UnitVector::iterator>;
129 
130   DWARFUnit *getUnitForOffset(uint64_t Offset) const;
131   DWARFUnit *getUnitForIndexEntry(const DWARFUnitIndex::Entry &E);
132 
133   /// Read units from a .debug_info or .debug_types section.  Calls made
134   /// before finishedInfoUnits() are assumed to be for .debug_info sections,
135   /// calls after finishedInfoUnits() are for .debug_types sections.  Caller
136   /// must not mix calls to addUnitsForSection and addUnitsForDWOSection.
137   void addUnitsForSection(DWARFContext &C, const DWARFSection &Section,
138                           DWARFSectionKind SectionKind);
139   /// Read units from a .debug_info.dwo or .debug_types.dwo section.  Calls
140   /// made before finishedInfoUnits() are assumed to be for .debug_info.dwo
141   /// sections, calls after finishedInfoUnits() are for .debug_types.dwo
142   /// sections.  Caller must not mix calls to addUnitsForSection and
143   /// addUnitsForDWOSection.
144   void addUnitsForDWOSection(DWARFContext &C, const DWARFSection &DWOSection,
145                              DWARFSectionKind SectionKind, bool Lazy = false);
146 
147   /// Add an existing DWARFUnit to this UnitVector. This is used by the DWARF
148   /// verifier to process unit separately.
149   DWARFUnit *addUnit(std::unique_ptr<DWARFUnit> Unit);
150 
151   /// Returns number of all units held by this instance.
152   unsigned getNumUnits() const { return size(); }
153   /// Returns number of units from all .debug_info[.dwo] sections.
154   unsigned getNumInfoUnits() const {
155     return NumInfoUnits == -1 ? size() : NumInfoUnits;
156   }
157   /// Returns number of units from all .debug_types[.dwo] sections.
158   unsigned getNumTypesUnits() const { return size() - NumInfoUnits; }
159   /// Indicate that parsing .debug_info[.dwo] is done, and remaining units
160   /// will be from .debug_types[.dwo].
161   void finishedInfoUnits() { NumInfoUnits = size(); }
162 
163 private:
164   void addUnitsImpl(DWARFContext &Context, const DWARFObject &Obj,
165                     const DWARFSection &Section, const DWARFDebugAbbrev *DA,
166                     const DWARFSection *RS, const DWARFSection *LocSection,
167                     StringRef SS, const DWARFSection &SOS,
168                     const DWARFSection *AOS, const DWARFSection &LS, bool LE,
169                     bool IsDWO, bool Lazy, DWARFSectionKind SectionKind);
170 };
171 
172 /// Represents base address of the CU.
173 /// Represents a unit's contribution to the string offsets table.
174 struct StrOffsetsContributionDescriptor {
175   uint64_t Base = 0;
176   /// The contribution size not including the header.
177   uint64_t Size = 0;
178   /// Format and version.
179   dwarf::FormParams FormParams = {0, 0, dwarf::DwarfFormat::DWARF32};
180 
181   StrOffsetsContributionDescriptor(uint64_t Base, uint64_t Size,
182                                    uint8_t Version, dwarf::DwarfFormat Format)
183       : Base(Base), Size(Size), FormParams({Version, 0, Format}) {}
184   StrOffsetsContributionDescriptor() = default;
185 
186   uint8_t getVersion() const { return FormParams.Version; }
187   dwarf::DwarfFormat getFormat() const { return FormParams.Format; }
188   uint8_t getDwarfOffsetByteSize() const {
189     return FormParams.getDwarfOffsetByteSize();
190   }
191   /// Determine whether a contribution to the string offsets table is
192   /// consistent with the relevant section size and that its length is
193   /// a multiple of the size of one of its entries.
194   Expected<StrOffsetsContributionDescriptor>
195   validateContributionSize(DWARFDataExtractor &DA);
196 };
197 
198 class DWARFUnit {
199   DWARFContext &Context;
200   /// Section containing this DWARFUnit.
201   const DWARFSection &InfoSection;
202 
203   DWARFUnitHeader Header;
204   const DWARFDebugAbbrev *Abbrev;
205   const DWARFSection *RangeSection;
206   uint64_t RangeSectionBase;
207   const DWARFSection *LocSection;
208   uint64_t LocSectionBase;
209 
210   /// Location table of this unit.
211   std::unique_ptr<DWARFLocationTable> LocTable;
212 
213   const DWARFSection &LineSection;
214   StringRef StringSection;
215   const DWARFSection &StringOffsetSection;
216   const DWARFSection *AddrOffsetSection;
217   Optional<uint64_t> AddrOffsetSectionBase;
218   bool isLittleEndian;
219   bool IsDWO;
220   const DWARFUnitVector &UnitVector;
221 
222   /// Start, length, and DWARF format of the unit's contribution to the string
223   /// offsets table (DWARF v5).
224   Optional<StrOffsetsContributionDescriptor> StringOffsetsTableContribution;
225 
226   /// A table of range lists (DWARF v5 and later).
227   Optional<DWARFDebugRnglistTable> RngListTable;
228   Optional<DWARFListTableHeader> LoclistTableHeader;
229 
230   mutable const DWARFAbbreviationDeclarationSet *Abbrevs;
231   llvm::Optional<object::SectionedAddress> BaseAddr;
232   /// The compile unit debug information entry items.
233   std::vector<DWARFDebugInfoEntry> DieArray;
234 
235   /// Map from range's start address to end address and corresponding DIE.
236   /// IntervalMap does not support range removal, as a result, we use the
237   /// std::map::upper_bound for address range lookup.
238   std::map<uint64_t, std::pair<uint64_t, DWARFDie>> AddrDieMap;
239 
240   using die_iterator_range =
241       iterator_range<std::vector<DWARFDebugInfoEntry>::iterator>;
242 
243   std::shared_ptr<DWARFUnit> DWO;
244 
245   uint32_t getDIEIndex(const DWARFDebugInfoEntry *Die) {
246     auto First = DieArray.data();
247     assert(Die >= First && Die < First + DieArray.size());
248     return Die - First;
249   }
250 
251 protected:
252   const DWARFUnitHeader &getHeader() const { return Header; }
253 
254   /// Size in bytes of the parsed unit header.
255   uint32_t getHeaderSize() const { return Header.getSize(); }
256 
257   /// Find the unit's contribution to the string offsets table and determine its
258   /// length and form. The given offset is expected to be derived from the unit
259   /// DIE's DW_AT_str_offsets_base attribute.
260   Expected<Optional<StrOffsetsContributionDescriptor>>
261   determineStringOffsetsTableContribution(DWARFDataExtractor &DA);
262 
263   /// Find the unit's contribution to the string offsets table and determine its
264   /// length and form. The given offset is expected to be 0 in a dwo file or,
265   /// in a dwp file, the start of the unit's contribution to the string offsets
266   /// table section (as determined by the index table).
267   Expected<Optional<StrOffsetsContributionDescriptor>>
268   determineStringOffsetsTableContributionDWO(DWARFDataExtractor &DA);
269 
270 public:
271   DWARFUnit(DWARFContext &Context, const DWARFSection &Section,
272             const DWARFUnitHeader &Header, const DWARFDebugAbbrev *DA,
273             const DWARFSection *RS, const DWARFSection *LocSection,
274             StringRef SS, const DWARFSection &SOS, const DWARFSection *AOS,
275             const DWARFSection &LS, bool LE, bool IsDWO,
276             const DWARFUnitVector &UnitVector);
277 
278   virtual ~DWARFUnit();
279 
280   bool isDWOUnit() const { return IsDWO; }
281   DWARFContext& getContext() const { return Context; }
282   const DWARFSection &getInfoSection() const { return InfoSection; }
283   uint64_t getOffset() const { return Header.getOffset(); }
284   const dwarf::FormParams &getFormParams() const {
285     return Header.getFormParams();
286   }
287   uint16_t getVersion() const { return Header.getVersion(); }
288   uint8_t getAddressByteSize() const { return Header.getAddressByteSize(); }
289   uint8_t getRefAddrByteSize() const { return Header.getRefAddrByteSize(); }
290   uint8_t getDwarfOffsetByteSize() const {
291     return Header.getDwarfOffsetByteSize();
292   }
293   uint64_t getLength() const { return Header.getLength(); }
294   dwarf::DwarfFormat getFormat() const { return Header.getFormat(); }
295   uint8_t getUnitType() const { return Header.getUnitType(); }
296   bool isTypeUnit() const { return Header.isTypeUnit(); }
297   uint64_t getNextUnitOffset() const { return Header.getNextUnitOffset(); }
298   const DWARFSection &getLineSection() const { return LineSection; }
299   StringRef getStringSection() const { return StringSection; }
300   const DWARFSection &getStringOffsetSection() const {
301     return StringOffsetSection;
302   }
303 
304   void setAddrOffsetSection(const DWARFSection *AOS, uint64_t Base) {
305     AddrOffsetSection = AOS;
306     AddrOffsetSectionBase = Base;
307   }
308 
309   /// Recursively update address to Die map.
310   void updateAddressDieMap(DWARFDie Die);
311 
312   void setRangesSection(const DWARFSection *RS, uint64_t Base) {
313     RangeSection = RS;
314     RangeSectionBase = Base;
315   }
316   void setLocSection(const DWARFSection *LS, uint64_t Base) {
317     LocSection = LS;
318     LocSectionBase = Base;
319   }
320 
321   uint64_t getLocSectionBase() const {
322     return LocSectionBase;
323   }
324 
325   Optional<object::SectionedAddress>
326   getAddrOffsetSectionItem(uint32_t Index) const;
327   Optional<uint64_t> getStringOffsetSectionItem(uint32_t Index) const;
328 
329   DWARFDataExtractor getDebugInfoExtractor() const;
330 
331   DataExtractor getStringExtractor() const {
332     return DataExtractor(StringSection, false, 0);
333   }
334 
335   const DWARFLocationTable &getLocationTable() { return *LocTable; }
336 
337   /// Extract the range list referenced by this compile unit from the
338   /// .debug_ranges section. If the extraction is unsuccessful, an error
339   /// is returned. Successful extraction requires that the compile unit
340   /// has already been extracted.
341   Error extractRangeList(uint64_t RangeListOffset,
342                          DWARFDebugRangeList &RangeList) const;
343   void clear();
344 
345   const Optional<StrOffsetsContributionDescriptor> &
346   getStringOffsetsTableContribution() const {
347     return StringOffsetsTableContribution;
348   }
349 
350   uint8_t getDwarfStringOffsetsByteSize() const {
351     assert(StringOffsetsTableContribution);
352     return StringOffsetsTableContribution->getDwarfOffsetByteSize();
353   }
354 
355   uint64_t getStringOffsetsBase() const {
356     assert(StringOffsetsTableContribution);
357     return StringOffsetsTableContribution->Base;
358   }
359 
360   const DWARFAbbreviationDeclarationSet *getAbbreviations() const;
361 
362   static bool isMatchingUnitTypeAndTag(uint8_t UnitType, dwarf::Tag Tag) {
363     switch (UnitType) {
364     case dwarf::DW_UT_compile:
365       return Tag == dwarf::DW_TAG_compile_unit;
366     case dwarf::DW_UT_type:
367       return Tag == dwarf::DW_TAG_type_unit;
368     case dwarf::DW_UT_partial:
369       return Tag == dwarf::DW_TAG_partial_unit;
370     case dwarf::DW_UT_skeleton:
371       return Tag == dwarf::DW_TAG_skeleton_unit;
372     case dwarf::DW_UT_split_compile:
373     case dwarf::DW_UT_split_type:
374       return dwarf::isUnitType(Tag);
375     }
376     return false;
377   }
378 
379   llvm::Optional<object::SectionedAddress> getBaseAddress();
380 
381   DWARFDie getUnitDIE(bool ExtractUnitDIEOnly = true) {
382     extractDIEsIfNeeded(ExtractUnitDIEOnly);
383     if (DieArray.empty())
384       return DWARFDie();
385     return DWARFDie(this, &DieArray[0]);
386   }
387 
388   DWARFDie getNonSkeletonUnitDIE(bool ExtractUnitDIEOnly = true) {
389     parseDWO();
390     if (DWO)
391       return DWO->getUnitDIE(ExtractUnitDIEOnly);
392     return getUnitDIE(ExtractUnitDIEOnly);
393   }
394 
395   const char *getCompilationDir();
396   Optional<uint64_t> getDWOId() {
397     extractDIEsIfNeeded(/*CUDieOnly*/ true);
398     return getHeader().getDWOId();
399   }
400   void setDWOId(uint64_t NewID) { Header.setDWOId(NewID); }
401 
402   /// Return a vector of address ranges resulting from a (possibly encoded)
403   /// range list starting at a given offset in the appropriate ranges section.
404   Expected<DWARFAddressRangesVector> findRnglistFromOffset(uint64_t Offset);
405 
406   /// Return a vector of address ranges retrieved from an encoded range
407   /// list whose offset is found via a table lookup given an index (DWARF v5
408   /// and later).
409   Expected<DWARFAddressRangesVector> findRnglistFromIndex(uint32_t Index);
410 
411   /// Return a rangelist's offset based on an index. The index designates
412   /// an entry in the rangelist table's offset array and is supplied by
413   /// DW_FORM_rnglistx.
414   Optional<uint64_t> getRnglistOffset(uint32_t Index) {
415     if (!RngListTable)
416       return None;
417     if (Optional<uint64_t> Off = RngListTable->getOffsetEntry(Index))
418       return *Off + RangeSectionBase;
419     return None;
420   }
421 
422   Optional<uint64_t> getLoclistOffset(uint32_t Index) {
423     if (!LoclistTableHeader)
424       return None;
425     if (Optional<uint64_t> Off = LoclistTableHeader->getOffsetEntry(Index))
426       return *Off + getLocSectionBase();
427     return None;
428   }
429   Expected<DWARFAddressRangesVector> collectAddressRanges();
430 
431   Expected<DWARFLocationExpressionsVector>
432   findLoclistFromOffset(uint64_t Offset);
433 
434   /// Returns subprogram DIE with address range encompassing the provided
435   /// address. The pointer is alive as long as parsed compile unit DIEs are not
436   /// cleared.
437   DWARFDie getSubroutineForAddress(uint64_t Address);
438 
439   /// getInlinedChainForAddress - fetches inlined chain for a given address.
440   /// Returns empty chain if there is no subprogram containing address. The
441   /// chain is valid as long as parsed compile unit DIEs are not cleared.
442   void getInlinedChainForAddress(uint64_t Address,
443                                  SmallVectorImpl<DWARFDie> &InlinedChain);
444 
445   /// Return the DWARFUnitVector containing this unit.
446   const DWARFUnitVector &getUnitVector() const { return UnitVector; }
447 
448   /// Returns the number of DIEs in the unit. Parses the unit
449   /// if necessary.
450   unsigned getNumDIEs() {
451     extractDIEsIfNeeded(false);
452     return DieArray.size();
453   }
454 
455   /// Return the index of a DIE inside the unit's DIE vector.
456   ///
457   /// It is illegal to call this method with a DIE that hasn't be
458   /// created by this unit. In other word, it's illegal to call this
459   /// method on a DIE that isn't accessible by following
460   /// children/sibling links starting from this unit's getUnitDIE().
461   uint32_t getDIEIndex(const DWARFDie &D) {
462     return getDIEIndex(D.getDebugInfoEntry());
463   }
464 
465   /// Return the DIE object at the given index.
466   DWARFDie getDIEAtIndex(unsigned Index) {
467     assert(Index < DieArray.size());
468     return DWARFDie(this, &DieArray[Index]);
469   }
470 
471   DWARFDie getParent(const DWARFDebugInfoEntry *Die);
472   DWARFDie getSibling(const DWARFDebugInfoEntry *Die);
473   DWARFDie getPreviousSibling(const DWARFDebugInfoEntry *Die);
474   DWARFDie getFirstChild(const DWARFDebugInfoEntry *Die);
475   DWARFDie getLastChild(const DWARFDebugInfoEntry *Die);
476 
477   /// Return the DIE object for a given offset inside the
478   /// unit's DIE vector.
479   ///
480   /// The unit needs to have its DIEs extracted for this method to work.
481   DWARFDie getDIEForOffset(uint64_t Offset) {
482     extractDIEsIfNeeded(false);
483     assert(!DieArray.empty());
484     auto It =
485         llvm::partition_point(DieArray, [=](const DWARFDebugInfoEntry &DIE) {
486           return DIE.getOffset() < Offset;
487         });
488     if (It != DieArray.end() && It->getOffset() == Offset)
489       return DWARFDie(this, &*It);
490     return DWARFDie();
491   }
492 
493   uint32_t getLineTableOffset() const {
494     if (auto IndexEntry = Header.getIndexEntry())
495       if (const auto *Contrib = IndexEntry->getContribution(DW_SECT_LINE))
496         return Contrib->Offset;
497     return 0;
498   }
499 
500   die_iterator_range dies() {
501     extractDIEsIfNeeded(false);
502     return die_iterator_range(DieArray.begin(), DieArray.end());
503   }
504 
505   virtual void dump(raw_ostream &OS, DIDumpOptions DumpOpts) = 0;
506 
507   Error tryExtractDIEsIfNeeded(bool CUDieOnly);
508 
509 private:
510   /// Size in bytes of the .debug_info data associated with this compile unit.
511   size_t getDebugInfoSize() const {
512     return Header.getLength() + Header.getUnitLengthFieldByteSize() -
513            getHeaderSize();
514   }
515 
516   /// extractDIEsIfNeeded - Parses a compile unit and indexes its DIEs if it
517   /// hasn't already been done
518   void extractDIEsIfNeeded(bool CUDieOnly);
519 
520   /// extractDIEsToVector - Appends all parsed DIEs to a vector.
521   void extractDIEsToVector(bool AppendCUDie, bool AppendNonCUDIEs,
522                            std::vector<DWARFDebugInfoEntry> &DIEs) const;
523 
524   /// clearDIEs - Clear parsed DIEs to keep memory usage low.
525   void clearDIEs(bool KeepCUDie);
526 
527   /// parseDWO - Parses .dwo file for current compile unit. Returns true if
528   /// it was actually constructed.
529   bool parseDWO();
530 };
531 
532 } // end namespace llvm
533 
534 #endif // LLVM_DEBUGINFO_DWARF_DWARFUNIT_H
535