xref: /freebsd/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h (revision 5e3190f700637fcfc1a52daeaa4a031fdd2557c7)
1 //===- DWARFAcceleratorTable.h ----------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_DEBUGINFO_DWARF_DWARFACCELERATORTABLE_H
10 #define LLVM_DEBUGINFO_DWARF_DWARFACCELERATORTABLE_H
11 
12 #include "llvm/ADT/DenseSet.h"
13 #include "llvm/ADT/SmallVector.h"
14 #include "llvm/BinaryFormat/Dwarf.h"
15 #include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
16 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
17 #include <cstdint>
18 #include <utility>
19 
20 namespace llvm {
21 
22 class raw_ostream;
23 class ScopedPrinter;
24 
25 /// The accelerator tables are designed to allow efficient random access
26 /// (using a symbol name as a key) into debug info by providing an index of the
27 /// debug info DIEs. This class implements the common functionality of Apple and
28 /// DWARF 5 accelerator tables.
29 /// TODO: Generalize the rest of the AppleAcceleratorTable interface and move it
30 /// to this class.
31 class DWARFAcceleratorTable {
32 protected:
33   DWARFDataExtractor AccelSection;
34   DataExtractor StringSection;
35 
36 public:
37   /// An abstract class representing a single entry in the accelerator tables.
38   class Entry {
39   protected:
40     SmallVector<DWARFFormValue, 3> Values;
41 
42     Entry() = default;
43 
44     // Make these protected so only (final) subclasses can be copied around.
45     Entry(const Entry &) = default;
46     Entry(Entry &&) = default;
47     Entry &operator=(const Entry &) = default;
48     Entry &operator=(Entry &&) = default;
49     ~Entry() = default;
50 
51 
52   public:
53     /// Returns the Offset of the Compilation Unit associated with this
54     /// Accelerator Entry or std::nullopt if the Compilation Unit offset is not
55     /// recorded in this Accelerator Entry.
56     virtual std::optional<uint64_t> getCUOffset() const = 0;
57 
58     /// Returns the Tag of the Debug Info Entry associated with this
59     /// Accelerator Entry or std::nullopt if the Tag is not recorded in this
60     /// Accelerator Entry.
61     virtual std::optional<dwarf::Tag> getTag() const = 0;
62 
63     /// Returns the raw values of fields in the Accelerator Entry. In general,
64     /// these can only be interpreted with the help of the metadata in the
65     /// owning Accelerator Table.
66     ArrayRef<DWARFFormValue> getValues() const { return Values; }
67   };
68 
69   DWARFAcceleratorTable(const DWARFDataExtractor &AccelSection,
70                         DataExtractor StringSection)
71       : AccelSection(AccelSection), StringSection(StringSection) {}
72   virtual ~DWARFAcceleratorTable();
73 
74   virtual Error extract() = 0;
75   virtual void dump(raw_ostream &OS) const = 0;
76 
77   DWARFAcceleratorTable(const DWARFAcceleratorTable &) = delete;
78   void operator=(const DWARFAcceleratorTable &) = delete;
79 };
80 
81 /// This implements the Apple accelerator table format, a precursor of the
82 /// DWARF 5 accelerator table format.
83 class AppleAcceleratorTable : public DWARFAcceleratorTable {
84   struct Header {
85     uint32_t Magic;
86     uint16_t Version;
87     uint16_t HashFunction;
88     uint32_t BucketCount;
89     uint32_t HashCount;
90     uint32_t HeaderDataLength;
91 
92     void dump(ScopedPrinter &W) const;
93   };
94 
95   struct HeaderData {
96     using AtomType = uint16_t;
97     using Form = dwarf::Form;
98 
99     uint64_t DIEOffsetBase;
100     SmallVector<std::pair<AtomType, Form>, 3> Atoms;
101 
102     std::optional<uint64_t>
103     extractOffset(std::optional<DWARFFormValue> Value) const;
104   };
105 
106   struct Header Hdr;
107   struct HeaderData HdrData;
108   bool IsValid = false;
109 
110   /// Returns true if we should continue scanning for entries or false if we've
111   /// reached the last (sentinel) entry of encountered a parsing error.
112   bool dumpName(ScopedPrinter &W, SmallVectorImpl<DWARFFormValue> &AtomForms,
113                 uint64_t *DataOffset) const;
114 
115 public:
116   /// Apple-specific implementation of an Accelerator Entry.
117   class Entry final : public DWARFAcceleratorTable::Entry {
118     const HeaderData *HdrData = nullptr;
119 
120     Entry(const HeaderData &Data);
121     Entry() = default;
122 
123     void extract(const AppleAcceleratorTable &AccelTable, uint64_t *Offset);
124 
125   public:
126     std::optional<uint64_t> getCUOffset() const override;
127 
128     /// Returns the Section Offset of the Debug Info Entry associated with this
129     /// Accelerator Entry or std::nullopt if the DIE offset is not recorded in
130     /// this Accelerator Entry. The returned offset is relative to the start of
131     /// the Section containing the DIE.
132     std::optional<uint64_t> getDIESectionOffset() const;
133 
134     std::optional<dwarf::Tag> getTag() const override;
135 
136     /// Returns the value of the Atom in this Accelerator Entry, if the Entry
137     /// contains such Atom.
138     std::optional<DWARFFormValue> lookup(HeaderData::AtomType Atom) const;
139 
140     friend class AppleAcceleratorTable;
141     friend class ValueIterator;
142   };
143 
144   class ValueIterator {
145     const AppleAcceleratorTable *AccelTable = nullptr;
146     Entry Current;           ///< The current entry.
147     uint64_t DataOffset = 0; ///< Offset into the section.
148     unsigned Data = 0; ///< Current data entry.
149     unsigned NumData = 0; ///< Number of data entries.
150 
151     /// Advance the iterator.
152     void Next();
153 
154   public:
155     using iterator_category = std::input_iterator_tag;
156     using value_type = Entry;
157     using difference_type = std::ptrdiff_t;
158     using pointer = value_type *;
159     using reference = value_type &;
160 
161     /// Construct a new iterator for the entries at \p DataOffset.
162     ValueIterator(const AppleAcceleratorTable &AccelTable, uint64_t DataOffset);
163     /// End marker.
164     ValueIterator() = default;
165 
166     const Entry &operator*() const { return Current; }
167     ValueIterator &operator++() { Next(); return *this; }
168     ValueIterator operator++(int) {
169       ValueIterator I = *this;
170       Next();
171       return I;
172     }
173     friend bool operator==(const ValueIterator &A, const ValueIterator &B) {
174       return A.NumData == B.NumData && A.DataOffset == B.DataOffset;
175     }
176     friend bool operator!=(const ValueIterator &A, const ValueIterator &B) {
177       return !(A == B);
178     }
179   };
180 
181   AppleAcceleratorTable(const DWARFDataExtractor &AccelSection,
182                         DataExtractor StringSection)
183       : DWARFAcceleratorTable(AccelSection, StringSection) {}
184 
185   Error extract() override;
186   uint32_t getNumBuckets();
187   uint32_t getNumHashes();
188   uint32_t getSizeHdr();
189   uint32_t getHeaderDataLength();
190 
191   /// Return the Atom description, which can be used to interpret the raw values
192   /// of the Accelerator Entries in this table.
193   ArrayRef<std::pair<HeaderData::AtomType, HeaderData::Form>> getAtomsDesc();
194   bool validateForms();
195 
196   /// Return information related to the DWARF DIE we're looking for when
197   /// performing a lookup by name.
198   ///
199   /// \param HashDataOffset an offset into the hash data table
200   /// \returns <DieOffset, DieTag>
201   /// DieOffset is the offset into the .debug_info section for the DIE
202   /// related to the input hash data offset.
203   /// DieTag is the tag of the DIE
204   std::pair<uint64_t, dwarf::Tag> readAtoms(uint64_t *HashDataOffset);
205   void dump(raw_ostream &OS) const override;
206 
207   /// Look up all entries in the accelerator table matching \c Key.
208   iterator_range<ValueIterator> equal_range(StringRef Key) const;
209 };
210 
211 /// .debug_names section consists of one or more units. Each unit starts with a
212 /// header, which is followed by a list of compilation units, local and foreign
213 /// type units.
214 ///
215 /// These may be followed by an (optional) hash lookup table, which consists of
216 /// an array of buckets and hashes similar to the apple tables above. The only
217 /// difference is that the hashes array is 1-based, and consequently an empty
218 /// bucket is denoted by 0 and not UINT32_MAX.
219 ///
220 /// Next is the name table, which consists of an array of names and array of
221 /// entry offsets. This is different from the apple tables, which store names
222 /// next to the actual entries.
223 ///
224 /// The structure of the entries is described by an abbreviations table, which
225 /// comes after the name table. Unlike the apple tables, which have a uniform
226 /// entry structure described in the header, each .debug_names entry may have
227 /// different index attributes (DW_IDX_???) attached to it.
228 ///
229 /// The last segment consists of a list of entries, which is a 0-terminated list
230 /// referenced by the name table and interpreted with the help of the
231 /// abbreviation table.
232 class DWARFDebugNames : public DWARFAcceleratorTable {
233 public:
234   class NameIndex;
235   class NameIterator;
236   class ValueIterator;
237 
238   /// DWARF v5 Name Index header.
239   struct Header {
240     uint64_t UnitLength;
241     dwarf::DwarfFormat Format;
242     uint16_t Version;
243     uint32_t CompUnitCount;
244     uint32_t LocalTypeUnitCount;
245     uint32_t ForeignTypeUnitCount;
246     uint32_t BucketCount;
247     uint32_t NameCount;
248     uint32_t AbbrevTableSize;
249     uint32_t AugmentationStringSize;
250     SmallString<8> AugmentationString;
251 
252     Error extract(const DWARFDataExtractor &AS, uint64_t *Offset);
253     void dump(ScopedPrinter &W) const;
254   };
255 
256   /// Index attribute and its encoding.
257   struct AttributeEncoding {
258     dwarf::Index Index;
259     dwarf::Form Form;
260 
261     constexpr AttributeEncoding(dwarf::Index Index, dwarf::Form Form)
262         : Index(Index), Form(Form) {}
263 
264     friend bool operator==(const AttributeEncoding &LHS,
265                            const AttributeEncoding &RHS) {
266       return LHS.Index == RHS.Index && LHS.Form == RHS.Form;
267     }
268   };
269 
270   /// Abbreviation describing the encoding of Name Index entries.
271   struct Abbrev {
272     uint32_t Code;  ///< Abbreviation code
273     dwarf::Tag Tag; ///< Dwarf Tag of the described entity.
274     std::vector<AttributeEncoding> Attributes; ///< List of index attributes.
275 
276     Abbrev(uint32_t Code, dwarf::Tag Tag,
277            std::vector<AttributeEncoding> Attributes)
278         : Code(Code), Tag(Tag), Attributes(std::move(Attributes)) {}
279 
280     void dump(ScopedPrinter &W) const;
281   };
282 
283   /// DWARF v5-specific implementation of an Accelerator Entry.
284   class Entry final : public DWARFAcceleratorTable::Entry {
285     const NameIndex *NameIdx;
286     const Abbrev *Abbr;
287 
288     Entry(const NameIndex &NameIdx, const Abbrev &Abbr);
289 
290   public:
291     std::optional<uint64_t> getCUOffset() const override;
292     std::optional<dwarf::Tag> getTag() const override { return tag(); }
293 
294     /// Returns the Index into the Compilation Unit list of the owning Name
295     /// Index or std::nullopt if this Accelerator Entry does not have an
296     /// associated Compilation Unit. It is up to the user to verify that the
297     /// returned Index is valid in the owning NameIndex (or use getCUOffset(),
298     /// which will handle that check itself). Note that entries in NameIndexes
299     /// which index just a single Compilation Unit are implicitly associated
300     /// with that unit, so this function will return 0 even without an explicit
301     /// DW_IDX_compile_unit attribute.
302     std::optional<uint64_t> getCUIndex() const;
303 
304     /// .debug_names-specific getter, which always succeeds (DWARF v5 index
305     /// entries always have a tag).
306     dwarf::Tag tag() const { return Abbr->Tag; }
307 
308     /// Returns the Offset of the DIE within the containing CU or TU.
309     std::optional<uint64_t> getDIEUnitOffset() const;
310 
311     /// Return the Abbreviation that can be used to interpret the raw values of
312     /// this Accelerator Entry.
313     const Abbrev &getAbbrev() const { return *Abbr; }
314 
315     /// Returns the value of the Index Attribute in this Accelerator Entry, if
316     /// the Entry contains such Attribute.
317     std::optional<DWARFFormValue> lookup(dwarf::Index Index) const;
318 
319     void dump(ScopedPrinter &W) const;
320 
321     friend class NameIndex;
322     friend class ValueIterator;
323   };
324 
325   /// Error returned by NameIndex::getEntry to report it has reached the end of
326   /// the entry list.
327   class SentinelError : public ErrorInfo<SentinelError> {
328   public:
329     static char ID;
330 
331     void log(raw_ostream &OS) const override { OS << "Sentinel"; }
332     std::error_code convertToErrorCode() const override;
333   };
334 
335 private:
336   /// DenseMapInfo for struct Abbrev.
337   struct AbbrevMapInfo {
338     static Abbrev getEmptyKey();
339     static Abbrev getTombstoneKey();
340     static unsigned getHashValue(uint32_t Code) {
341       return DenseMapInfo<uint32_t>::getHashValue(Code);
342     }
343     static unsigned getHashValue(const Abbrev &Abbr) {
344       return getHashValue(Abbr.Code);
345     }
346     static bool isEqual(uint32_t LHS, const Abbrev &RHS) {
347       return LHS == RHS.Code;
348     }
349     static bool isEqual(const Abbrev &LHS, const Abbrev &RHS) {
350       return LHS.Code == RHS.Code;
351     }
352   };
353 
354 public:
355   /// A single entry in the Name Table (DWARF v5 sect. 6.1.1.4.6) of the Name
356   /// Index.
357   class NameTableEntry {
358     DataExtractor StrData;
359 
360     uint32_t Index;
361     uint64_t StringOffset;
362     uint64_t EntryOffset;
363 
364   public:
365     NameTableEntry(const DataExtractor &StrData, uint32_t Index,
366                    uint64_t StringOffset, uint64_t EntryOffset)
367         : StrData(StrData), Index(Index), StringOffset(StringOffset),
368           EntryOffset(EntryOffset) {}
369 
370     /// Return the index of this name in the parent Name Index.
371     uint32_t getIndex() const { return Index; }
372 
373     /// Returns the offset of the name of the described entities.
374     uint64_t getStringOffset() const { return StringOffset; }
375 
376     /// Return the string referenced by this name table entry or nullptr if the
377     /// string offset is not valid.
378     const char *getString() const {
379       uint64_t Off = StringOffset;
380       return StrData.getCStr(&Off);
381     }
382 
383     /// Returns the offset of the first Entry in the list.
384     uint64_t getEntryOffset() const { return EntryOffset; }
385   };
386 
387   /// Represents a single accelerator table within the DWARF v5 .debug_names
388   /// section.
389   class NameIndex {
390     DenseSet<Abbrev, AbbrevMapInfo> Abbrevs;
391     struct Header Hdr;
392     const DWARFDebugNames &Section;
393 
394     // Base of the whole unit and of various important tables, as offsets from
395     // the start of the section.
396     uint64_t Base;
397     uint64_t CUsBase;
398     uint64_t BucketsBase;
399     uint64_t HashesBase;
400     uint64_t StringOffsetsBase;
401     uint64_t EntryOffsetsBase;
402     uint64_t EntriesBase;
403 
404     void dumpCUs(ScopedPrinter &W) const;
405     void dumpLocalTUs(ScopedPrinter &W) const;
406     void dumpForeignTUs(ScopedPrinter &W) const;
407     void dumpAbbreviations(ScopedPrinter &W) const;
408     bool dumpEntry(ScopedPrinter &W, uint64_t *Offset) const;
409     void dumpName(ScopedPrinter &W, const NameTableEntry &NTE,
410                   std::optional<uint32_t> Hash) const;
411     void dumpBucket(ScopedPrinter &W, uint32_t Bucket) const;
412 
413     Expected<AttributeEncoding> extractAttributeEncoding(uint64_t *Offset);
414 
415     Expected<std::vector<AttributeEncoding>>
416     extractAttributeEncodings(uint64_t *Offset);
417 
418     Expected<Abbrev> extractAbbrev(uint64_t *Offset);
419 
420   public:
421     NameIndex(const DWARFDebugNames &Section, uint64_t Base)
422         : Section(Section), Base(Base) {}
423 
424     /// Reads offset of compilation unit CU. CU is 0-based.
425     uint64_t getCUOffset(uint32_t CU) const;
426     uint32_t getCUCount() const { return Hdr.CompUnitCount; }
427 
428     /// Reads offset of local type unit TU, TU is 0-based.
429     uint64_t getLocalTUOffset(uint32_t TU) const;
430     uint32_t getLocalTUCount() const { return Hdr.LocalTypeUnitCount; }
431 
432     /// Reads signature of foreign type unit TU. TU is 0-based.
433     uint64_t getForeignTUSignature(uint32_t TU) const;
434     uint32_t getForeignTUCount() const { return Hdr.ForeignTypeUnitCount; }
435 
436     /// Reads an entry in the Bucket Array for the given Bucket. The returned
437     /// value is a (1-based) index into the Names, StringOffsets and
438     /// EntryOffsets arrays. The input Bucket index is 0-based.
439     uint32_t getBucketArrayEntry(uint32_t Bucket) const;
440     uint32_t getBucketCount() const { return Hdr.BucketCount; }
441 
442     /// Reads an entry in the Hash Array for the given Index. The input Index
443     /// is 1-based.
444     uint32_t getHashArrayEntry(uint32_t Index) const;
445 
446     /// Reads an entry in the Name Table for the given Index. The Name Table
447     /// consists of two arrays -- String Offsets and Entry Offsets. The returned
448     /// offsets are relative to the starts of respective sections. Input Index
449     /// is 1-based.
450     NameTableEntry getNameTableEntry(uint32_t Index) const;
451 
452     uint32_t getNameCount() const { return Hdr.NameCount; }
453 
454     const DenseSet<Abbrev, AbbrevMapInfo> &getAbbrevs() const {
455       return Abbrevs;
456     }
457 
458     Expected<Entry> getEntry(uint64_t *Offset) const;
459 
460     /// Look up all entries in this Name Index matching \c Key.
461     iterator_range<ValueIterator> equal_range(StringRef Key) const;
462 
463     NameIterator begin() const { return NameIterator(this, 1); }
464     NameIterator end() const { return NameIterator(this, getNameCount() + 1); }
465 
466     Error extract();
467     uint64_t getUnitOffset() const { return Base; }
468     uint64_t getNextUnitOffset() const {
469       return Base + dwarf::getUnitLengthFieldByteSize(Hdr.Format) +
470              Hdr.UnitLength;
471     }
472     void dump(ScopedPrinter &W) const;
473 
474     friend class DWARFDebugNames;
475   };
476 
477   class ValueIterator {
478   public:
479     using iterator_category = std::input_iterator_tag;
480     using value_type = Entry;
481     using difference_type = std::ptrdiff_t;
482     using pointer = value_type *;
483     using reference = value_type &;
484 
485   private:
486     /// The Name Index we are currently iterating through. The implementation
487     /// relies on the fact that this can also be used as an iterator into the
488     /// "NameIndices" vector in the Accelerator section.
489     const NameIndex *CurrentIndex = nullptr;
490 
491     /// Whether this is a local iterator (searches in CurrentIndex only) or not
492     /// (searches all name indices).
493     bool IsLocal;
494 
495     std::optional<Entry> CurrentEntry;
496     uint64_t DataOffset = 0; ///< Offset into the section.
497     std::string Key;         ///< The Key we are searching for.
498     std::optional<uint32_t> Hash; ///< Hash of Key, if it has been computed.
499 
500     bool getEntryAtCurrentOffset();
501     std::optional<uint64_t> findEntryOffsetInCurrentIndex();
502     bool findInCurrentIndex();
503     void searchFromStartOfCurrentIndex();
504     void next();
505 
506     /// Set the iterator to the "end" state.
507     void setEnd() { *this = ValueIterator(); }
508 
509   public:
510     /// Create a "begin" iterator for looping over all entries in the
511     /// accelerator table matching Key. The iterator will run through all Name
512     /// Indexes in the section in sequence.
513     ValueIterator(const DWARFDebugNames &AccelTable, StringRef Key);
514 
515     /// Create a "begin" iterator for looping over all entries in a specific
516     /// Name Index. Other indices in the section will not be visited.
517     ValueIterator(const NameIndex &NI, StringRef Key);
518 
519     /// End marker.
520     ValueIterator() = default;
521 
522     const Entry &operator*() const { return *CurrentEntry; }
523     ValueIterator &operator++() {
524       next();
525       return *this;
526     }
527     ValueIterator operator++(int) {
528       ValueIterator I = *this;
529       next();
530       return I;
531     }
532 
533     friend bool operator==(const ValueIterator &A, const ValueIterator &B) {
534       return A.CurrentIndex == B.CurrentIndex && A.DataOffset == B.DataOffset;
535     }
536     friend bool operator!=(const ValueIterator &A, const ValueIterator &B) {
537       return !(A == B);
538     }
539   };
540 
541   class NameIterator {
542 
543     /// The Name Index we are iterating through.
544     const NameIndex *CurrentIndex;
545 
546     /// The current name in the Name Index.
547     uint32_t CurrentName;
548 
549     void next() {
550       assert(CurrentName <= CurrentIndex->getNameCount());
551       ++CurrentName;
552     }
553 
554   public:
555     using iterator_category = std::input_iterator_tag;
556     using value_type = NameTableEntry;
557     using difference_type = uint32_t;
558     using pointer = NameTableEntry *;
559     using reference = NameTableEntry; // We return entries by value.
560 
561     /// Creates an iterator whose initial position is name CurrentName in
562     /// CurrentIndex.
563     NameIterator(const NameIndex *CurrentIndex, uint32_t CurrentName)
564         : CurrentIndex(CurrentIndex), CurrentName(CurrentName) {}
565 
566     NameTableEntry operator*() const {
567       return CurrentIndex->getNameTableEntry(CurrentName);
568     }
569     NameIterator &operator++() {
570       next();
571       return *this;
572     }
573     NameIterator operator++(int) {
574       NameIterator I = *this;
575       next();
576       return I;
577     }
578 
579     friend bool operator==(const NameIterator &A, const NameIterator &B) {
580       return A.CurrentIndex == B.CurrentIndex && A.CurrentName == B.CurrentName;
581     }
582     friend bool operator!=(const NameIterator &A, const NameIterator &B) {
583       return !(A == B);
584     }
585   };
586 
587 private:
588   SmallVector<NameIndex, 0> NameIndices;
589   DenseMap<uint64_t, const NameIndex *> CUToNameIndex;
590 
591 public:
592   DWARFDebugNames(const DWARFDataExtractor &AccelSection,
593                   DataExtractor StringSection)
594       : DWARFAcceleratorTable(AccelSection, StringSection) {}
595 
596   Error extract() override;
597   void dump(raw_ostream &OS) const override;
598 
599   /// Look up all entries in the accelerator table matching \c Key.
600   iterator_range<ValueIterator> equal_range(StringRef Key) const;
601 
602   using const_iterator = SmallVector<NameIndex, 0>::const_iterator;
603   const_iterator begin() const { return NameIndices.begin(); }
604   const_iterator end() const { return NameIndices.end(); }
605 
606   /// Return the Name Index covering the compile unit at CUOffset, or nullptr if
607   /// there is no Name Index covering that unit.
608   const NameIndex *getCUNameIndex(uint64_t CUOffset);
609 };
610 
611 } // end namespace llvm
612 
613 #endif // LLVM_DEBUGINFO_DWARF_DWARFACCELERATORTABLE_H
614