xref: /freebsd/contrib/llvm-project/llvm/include/llvm/CodeGen/AccelTable.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //==- include/llvm/CodeGen/AccelTable.h - Accelerator Tables -----*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file contains support for writing accelerator tables.
10 ///
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_CODEGEN_ACCELTABLE_H
14 #define LLVM_CODEGEN_ACCELTABLE_H
15 
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/MapVector.h"
18 #include "llvm/ADT/STLFunctionalExtras.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/BinaryFormat/Dwarf.h"
21 #include "llvm/CodeGen/DIE.h"
22 #include "llvm/CodeGen/DwarfStringPoolEntry.h"
23 #include "llvm/Support/Allocator.h"
24 #include "llvm/Support/DJB.h"
25 #include "llvm/Support/Debug.h"
26 #include <cstdint>
27 #include <variant>
28 #include <vector>
29 
30 /// \file
31 /// The DWARF and Apple accelerator tables are an indirect hash table optimized
32 /// for null lookup rather than access to known data. The Apple accelerator
33 /// tables are a precursor of the newer DWARF v5 accelerator tables. Both
34 /// formats share common design ideas.
35 ///
36 /// The Apple accelerator table are output into an on-disk format that looks
37 /// like this:
38 ///
39 /// .------------------.
40 /// |  HEADER          |
41 /// |------------------|
42 /// |  BUCKETS         |
43 /// |------------------|
44 /// |  HASHES          |
45 /// |------------------|
46 /// |  OFFSETS         |
47 /// |------------------|
48 /// |  DATA            |
49 /// `------------------'
50 ///
51 /// The header contains a magic number, version, type of hash function,
52 /// the number of buckets, total number of hashes, and room for a special struct
53 /// of data and the length of that struct.
54 ///
55 /// The buckets contain an index (e.g. 6) into the hashes array. The hashes
56 /// section contains all of the 32-bit hash values in contiguous memory, and the
57 /// offsets contain the offset into the data area for the particular hash.
58 ///
59 /// For a lookup example, we could hash a function name and take it modulo the
60 /// number of buckets giving us our bucket. From there we take the bucket value
61 /// as an index into the hashes table and look at each successive hash as long
62 /// as the hash value is still the same modulo result (bucket value) as earlier.
63 /// If we have a match we look at that same entry in the offsets table and grab
64 /// the offset in the data for our final match.
65 ///
66 /// The DWARF v5 accelerator table consists of zero or more name indices that
67 /// are output into an on-disk format that looks like this:
68 ///
69 /// .------------------.
70 /// |  HEADER          |
71 /// |------------------|
72 /// |  CU LIST         |
73 /// |------------------|
74 /// |  LOCAL TU LIST   |
75 /// |------------------|
76 /// |  FOREIGN TU LIST |
77 /// |------------------|
78 /// |  HASH TABLE      |
79 /// |------------------|
80 /// |  NAME TABLE      |
81 /// |------------------|
82 /// |  ABBREV TABLE    |
83 /// |------------------|
84 /// |  ENTRY POOL      |
85 /// `------------------'
86 ///
87 /// For the full documentation please refer to the DWARF 5 standard.
88 ///
89 ///
90 /// This file defines the class template AccelTable, which is represents an
91 /// abstract view of an Accelerator table, without any notion of an on-disk
92 /// layout. This class is parameterized by an entry type, which should derive
93 /// from AccelTableData. This is the type of individual entries in the table,
94 /// and it should store the data necessary to emit them. AppleAccelTableData is
95 /// the base class for Apple Accelerator Table entries, which have a uniform
96 /// structure based on a sequence of Atoms. There are different sub-classes
97 /// derived from AppleAccelTable, which differ in the set of Atoms and how they
98 /// obtain their values.
99 ///
100 /// An Apple Accelerator Table can be serialized by calling emitAppleAccelTable
101 /// function.
102 
103 namespace llvm {
104 
105 class AsmPrinter;
106 class DwarfDebug;
107 class DwarfTypeUnit;
108 class MCSymbol;
109 class raw_ostream;
110 
111 /// Interface which the different types of accelerator table data have to
112 /// conform. It serves as a base class for different values of the template
113 /// argument of the AccelTable class template.
114 class AccelTableData {
115 public:
116   virtual ~AccelTableData() = default;
117 
118   bool operator<(const AccelTableData &Other) const {
119     return order() < Other.order();
120   }
121 
122     // Subclasses should implement:
123     // static uint32_t hash(StringRef Name);
124 
125 #ifndef NDEBUG
126   virtual void print(raw_ostream &OS) const = 0;
127 #endif
128 protected:
129   virtual uint64_t order() const = 0;
130 };
131 
132 /// A base class holding non-template-dependant functionality of the AccelTable
133 /// class. Clients should not use this class directly but rather instantiate
134 /// AccelTable with a type derived from AccelTableData.
135 class AccelTableBase {
136 public:
137   using HashFn = uint32_t(StringRef);
138 
139   /// Represents a group of entries with identical name (and hence, hash value).
140   struct HashData {
141     DwarfStringPoolEntryRef Name;
142     uint32_t HashValue;
143     std::vector<AccelTableData *> Values;
144     MCSymbol *Sym;
145 
146     /// Get all AccelTableData cast as a `T`.
getValuesHashData147     template <typename T = AccelTableData *> auto getValues() const {
148       static_assert(std::is_pointer<T>());
149       static_assert(
150           std::is_base_of<AccelTableData, std::remove_pointer_t<T>>());
151       return map_range(
152           Values, [](AccelTableData *Data) { return static_cast<T>(Data); });
153     }
154 
155 #ifndef NDEBUG
156     void print(raw_ostream &OS) const;
dumpHashData157     void dump() const { print(dbgs()); }
158 #endif
159   };
160   using HashList = std::vector<HashData *>;
161   using BucketList = std::vector<HashList>;
162 
163 protected:
164   /// Allocator for HashData and Values.
165   BumpPtrAllocator Allocator;
166 
167   using StringEntries = MapVector<StringRef, HashData>;
168   StringEntries Entries;
169 
170   HashFn *Hash;
171   uint32_t BucketCount = 0;
172   uint32_t UniqueHashCount = 0;
173 
174   HashList Hashes;
175   BucketList Buckets;
176 
177   void computeBucketCount();
178 
AccelTableBase(HashFn * Hash)179   AccelTableBase(HashFn *Hash) : Hash(Hash) {}
180 
181 public:
182   void finalize(AsmPrinter *Asm, StringRef Prefix);
getBuckets()183   ArrayRef<HashList> getBuckets() const { return Buckets; }
getBucketCount()184   uint32_t getBucketCount() const { return BucketCount; }
getUniqueHashCount()185   uint32_t getUniqueHashCount() const { return UniqueHashCount; }
getUniqueNameCount()186   uint32_t getUniqueNameCount() const { return Entries.size(); }
187 
188 #ifndef NDEBUG
189   void print(raw_ostream &OS) const;
dump()190   void dump() const { print(dbgs()); }
191 #endif
192 
193   AccelTableBase(const AccelTableBase &) = delete;
194   void operator=(const AccelTableBase &) = delete;
195 };
196 
197 /// This class holds an abstract representation of an Accelerator Table,
198 /// consisting of a sequence of buckets, each bucket containint a sequence of
199 /// HashData entries. The class is parameterized by the type of entries it
200 /// holds. The type template parameter also defines the hash function to use for
201 /// hashing names.
202 template <typename DataT> class AccelTable : public AccelTableBase {
203 public:
AccelTable()204   AccelTable() : AccelTableBase(DataT::hash) {}
205 
206   template <typename... Types>
207   void addName(DwarfStringPoolEntryRef Name, Types &&... Args);
clear()208   void clear() { Entries.clear(); }
209   void addEntries(AccelTable<DataT> &Table);
getEntries()210   const StringEntries getEntries() const { return Entries; }
211 };
212 
213 template <typename AccelTableDataT>
214 template <typename... Types>
addName(DwarfStringPoolEntryRef Name,Types &&...Args)215 void AccelTable<AccelTableDataT>::addName(DwarfStringPoolEntryRef Name,
216                                           Types &&... Args) {
217   assert(Buckets.empty() && "Already finalized!");
218   // If the string is in the list already then add this die to the list
219   // otherwise add a new one.
220   auto &It = Entries[Name.getString()];
221   if (It.Values.empty()) {
222     It.Name = Name;
223     It.HashValue = Hash(Name.getString());
224   }
225   It.Values.push_back(new (Allocator)
226                           AccelTableDataT(std::forward<Types>(Args)...));
227 }
228 
229 /// A base class for different implementations of Data classes for Apple
230 /// Accelerator Tables. The columns in the table are defined by the static Atoms
231 /// variable defined on the subclasses.
232 class AppleAccelTableData : public AccelTableData {
233 public:
234   /// An Atom defines the form of the data in an Apple accelerator table.
235   /// Conceptually it is a column in the accelerator consisting of a type and a
236   /// specification of the form of its data.
237   struct Atom {
238     /// Atom Type.
239     const uint16_t Type;
240     /// DWARF Form.
241     const uint16_t Form;
242 
AtomAtom243     constexpr Atom(uint16_t Type, uint16_t Form) : Type(Type), Form(Form) {}
244 
245 #ifndef NDEBUG
246     void print(raw_ostream &OS) const;
dumpAtom247     void dump() const { print(dbgs()); }
248 #endif
249   };
250   // Subclasses should define:
251   // static constexpr Atom Atoms[];
252 
253   virtual void emit(AsmPrinter *Asm) const = 0;
254 
hash(StringRef Buffer)255   static uint32_t hash(StringRef Buffer) { return djbHash(Buffer); }
256 };
257 
258 /// Helper class to identify an entry in DWARF5AccelTable based on their DIE
259 /// offset and UnitID.
260 struct OffsetAndUnitID {
261   uint64_t Offset = 0;
262   uint32_t UnitID = 0;
263   bool IsTU = false;
264   OffsetAndUnitID() = delete;
OffsetAndUnitIDOffsetAndUnitID265   OffsetAndUnitID(uint64_t Offset, uint32_t UnitID, bool IsTU)
266       : Offset(Offset), UnitID(UnitID), IsTU(IsTU) {}
offsetOffsetAndUnitID267   uint64_t offset() const { return Offset; };
unitIDOffsetAndUnitID268   uint32_t unitID() const { return UnitID; };
isTUOffsetAndUnitID269   bool isTU() const { return IsTU; }
270 };
271 
272 template <> struct DenseMapInfo<OffsetAndUnitID> {
273   static inline OffsetAndUnitID getEmptyKey() {
274     return OffsetAndUnitID(-1, -1, false);
275   }
276   static inline OffsetAndUnitID getTombstoneKey() {
277     return OffsetAndUnitID(-2, -2, false);
278   }
279   static unsigned getHashValue(const OffsetAndUnitID &Val) {
280     return (unsigned)llvm::hash_combine(Val.offset(), Val.unitID(), Val.IsTU);
281   }
282   static bool isEqual(const OffsetAndUnitID &LHS, const OffsetAndUnitID &RHS) {
283     return LHS.offset() == RHS.offset() && LHS.unitID() == RHS.unitID() &&
284            LHS.IsTU == RHS.isTU();
285   }
286 };
287 
288 /// The Data class implementation for DWARF v5 accelerator table. Unlike the
289 /// Apple Data classes, this class is just a DIE wrapper, and does not know to
290 /// serialize itself. The complete serialization logic is in the
291 /// emitDWARF5AccelTable function.
292 class DWARF5AccelTableData : public AccelTableData {
293 public:
294   static uint32_t hash(StringRef Name) { return caseFoldingDjbHash(Name); }
295 
296   DWARF5AccelTableData(const DIE &Die, const uint32_t UnitID, const bool IsTU);
297   DWARF5AccelTableData(const uint64_t DieOffset,
298                        const std::optional<uint64_t> DefiningParentOffset,
299                        const unsigned DieTag, const unsigned UnitID,
300                        const bool IsTU)
301       : OffsetVal(DieOffset), ParentOffset(DefiningParentOffset),
302         DieTag(DieTag), AbbrevNumber(0), IsTU(IsTU), UnitID(UnitID) {}
303 
304 #ifndef NDEBUG
305   void print(raw_ostream &OS) const override;
306 #endif
307 
308   uint64_t getDieOffset() const {
309     assert(isNormalized() && "Accessing DIE Offset before normalizing.");
310     return std::get<uint64_t>(OffsetVal);
311   }
312 
313   OffsetAndUnitID getDieOffsetAndUnitID() const {
314     return {getDieOffset(), getUnitID(), isTU()};
315   }
316 
317   unsigned getDieTag() const { return DieTag; }
318   unsigned getUnitID() const { return UnitID; }
319   bool isTU() const { return IsTU; }
320   void normalizeDIEToOffset() {
321     assert(!isNormalized() && "Accessing offset after normalizing.");
322     const DIE *Entry = std::get<const DIE *>(OffsetVal);
323     ParentOffset = getDefiningParentDieOffset(*Entry);
324     OffsetVal = Entry->getOffset();
325   }
326   bool isNormalized() const {
327     return std::holds_alternative<uint64_t>(OffsetVal);
328   }
329 
330   std::optional<uint64_t> getParentDieOffset() const {
331     if (auto OffsetAndId = getParentDieOffsetAndUnitID())
332       return OffsetAndId->offset();
333     return {};
334   }
335 
336   std::optional<OffsetAndUnitID> getParentDieOffsetAndUnitID() const {
337     assert(isNormalized() && "Accessing DIE Offset before normalizing.");
338     if (!ParentOffset)
339       return std::nullopt;
340     return OffsetAndUnitID(*ParentOffset, getUnitID(), isTU());
341   }
342 
343   /// Sets AbbrevIndex for an Entry.
344   void setAbbrevNumber(uint16_t AbbrevNum) { AbbrevNumber = AbbrevNum; }
345 
346   /// Returns AbbrevIndex for an Entry.
347   uint16_t getAbbrevNumber() const { return AbbrevNumber; }
348 
349   /// If `Die` has a non-null parent and the parent is not a declaration,
350   /// return its offset.
351   static std::optional<uint64_t> getDefiningParentDieOffset(const DIE &Die);
352 
353 protected:
354   std::variant<const DIE *, uint64_t> OffsetVal;
355   std::optional<uint64_t> ParentOffset;
356   uint32_t DieTag : 16;
357   uint32_t AbbrevNumber : 15;
358   uint32_t IsTU : 1;
359   uint32_t UnitID;
360   uint64_t order() const override { return getDieOffset(); }
361 };
362 
363 class DebugNamesAbbrev : public FoldingSetNode {
364 public:
365   uint32_t DieTag;
366   uint32_t Number;
367   struct AttributeEncoding {
368     dwarf::Index Index;
369     dwarf::Form Form;
370   };
371   DebugNamesAbbrev(uint32_t DieTag) : DieTag(DieTag), Number(0) {}
372   /// Add attribute encoding to an abbreviation.
373   void addAttribute(const DebugNamesAbbrev::AttributeEncoding &Attr) {
374     AttrVect.push_back(Attr);
375   }
376   /// Set abbreviation tag index.
377   void setNumber(uint32_t AbbrevNumber) { Number = AbbrevNumber; }
378   /// Get abbreviation tag index.
379   uint32_t getNumber() const { return Number; }
380   /// Get DIE Tag.
381   uint32_t getDieTag() const { return DieTag; }
382   /// Used to gather unique data for the abbreviation folding set.
383   void Profile(FoldingSetNodeID &ID) const;
384   /// Returns attributes for an abbreviation.
385   const SmallVector<AttributeEncoding, 1> &getAttributes() const {
386     return AttrVect;
387   }
388 
389 private:
390   SmallVector<AttributeEncoding, 1> AttrVect;
391 };
392 
393 struct TypeUnitMetaInfo {
394   // Symbol for start of the TU section or signature if this is SplitDwarf.
395   std::variant<MCSymbol *, uint64_t> LabelOrSignature;
396   // Unique ID of Type Unit.
397   unsigned UniqueID;
398 };
399 using TUVectorTy = SmallVector<TypeUnitMetaInfo, 1>;
400 class DWARF5AccelTable : public AccelTable<DWARF5AccelTableData> {
401   // Symbols to start of all the TU sections that were generated.
402   TUVectorTy TUSymbolsOrHashes;
403 
404 public:
405   struct UnitIndexAndEncoding {
406     unsigned Index;
407     DebugNamesAbbrev::AttributeEncoding Encoding;
408   };
409   /// Returns type units that were constructed.
410   const TUVectorTy &getTypeUnitsSymbols() { return TUSymbolsOrHashes; }
411   /// Add a type unit start symbol.
412   void addTypeUnitSymbol(DwarfTypeUnit &U);
413   /// Add a type unit Signature.
414   void addTypeUnitSignature(DwarfTypeUnit &U);
415   /// Convert DIE entries to explicit offset.
416   /// Needs to be called after DIE offsets are computed.
417   void convertDieToOffset() {
418     for (auto &Entry : Entries) {
419       for (auto *Data : Entry.second.getValues<DWARF5AccelTableData *>()) {
420         // For TU we normalize as each Unit is emitted.
421         // So when this is invoked after CU construction we will be in mixed
422         // state.
423         if (!Data->isNormalized())
424           Data->normalizeDIEToOffset();
425       }
426     }
427   }
428 
429   void addTypeEntries(DWARF5AccelTable &Table) {
430     for (auto &Entry : Table.getEntries()) {
431       for (auto *Data : Entry.second.getValues<DWARF5AccelTableData *>()) {
432         addName(Entry.second.Name, Data->getDieOffset(),
433                 Data->getParentDieOffset(), Data->getDieTag(),
434                 Data->getUnitID(), Data->isTU());
435       }
436     }
437   }
438 };
439 
440 void emitAppleAccelTableImpl(AsmPrinter *Asm, AccelTableBase &Contents,
441                              StringRef Prefix, const MCSymbol *SecBegin,
442                              ArrayRef<AppleAccelTableData::Atom> Atoms);
443 
444 /// Emit an Apple Accelerator Table consisting of entries in the specified
445 /// AccelTable. The DataT template parameter should be derived from
446 /// AppleAccelTableData.
447 template <typename DataT>
448 void emitAppleAccelTable(AsmPrinter *Asm, AccelTable<DataT> &Contents,
449                          StringRef Prefix, const MCSymbol *SecBegin) {
450   static_assert(std::is_convertible<DataT *, AppleAccelTableData *>::value);
451   emitAppleAccelTableImpl(Asm, Contents, Prefix, SecBegin, DataT::Atoms);
452 }
453 
454 void emitDWARF5AccelTable(AsmPrinter *Asm, DWARF5AccelTable &Contents,
455                           const DwarfDebug &DD,
456                           ArrayRef<std::unique_ptr<DwarfCompileUnit>> CUs);
457 
458 /// Emit a DWARFv5 Accelerator Table consisting of entries in the specified
459 /// AccelTable. The \p CUs contains either symbols keeping offsets to the
460 /// start of compilation unit, either offsets to the start of compilation
461 /// unit themselves.
462 void emitDWARF5AccelTable(
463     AsmPrinter *Asm, DWARF5AccelTable &Contents,
464     ArrayRef<std::variant<MCSymbol *, uint64_t>> CUs,
465     llvm::function_ref<std::optional<DWARF5AccelTable::UnitIndexAndEncoding>(
466         const DWARF5AccelTableData &)>
467         getIndexForEntry);
468 
469 /// Accelerator table data implementation for simple Apple accelerator tables
470 /// with just a DIE reference.
471 class AppleAccelTableOffsetData : public AppleAccelTableData {
472 public:
473   AppleAccelTableOffsetData(const DIE &D) : Die(D) {}
474 
475   void emit(AsmPrinter *Asm) const override;
476 
477   static constexpr Atom Atoms[] = {
478       Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)};
479 
480 #ifndef NDEBUG
481   void print(raw_ostream &OS) const override;
482 #endif
483 protected:
484   uint64_t order() const override { return Die.getOffset(); }
485 
486   const DIE &Die;
487 };
488 
489 /// Accelerator table data implementation for Apple type accelerator tables.
490 class AppleAccelTableTypeData : public AppleAccelTableOffsetData {
491 public:
492   AppleAccelTableTypeData(const DIE &D) : AppleAccelTableOffsetData(D) {}
493 
494   void emit(AsmPrinter *Asm) const override;
495 
496   static constexpr Atom Atoms[] = {
497       Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4),
498       Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2),
499       Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)};
500 
501 #ifndef NDEBUG
502   void print(raw_ostream &OS) const override;
503 #endif
504 };
505 
506 /// Accelerator table data implementation for simple Apple accelerator tables
507 /// with a DIE offset but no actual DIE pointer.
508 class AppleAccelTableStaticOffsetData : public AppleAccelTableData {
509 public:
510   AppleAccelTableStaticOffsetData(uint32_t Offset) : Offset(Offset) {}
511 
512   void emit(AsmPrinter *Asm) const override;
513 
514   static constexpr Atom Atoms[] = {
515       Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)};
516 
517 #ifndef NDEBUG
518   void print(raw_ostream &OS) const override;
519 #endif
520 protected:
521   uint64_t order() const override { return Offset; }
522 
523   uint32_t Offset;
524 };
525 
526 /// Accelerator table data implementation for type accelerator tables with
527 /// a DIE offset but no actual DIE pointer.
528 class AppleAccelTableStaticTypeData : public AppleAccelTableStaticOffsetData {
529 public:
530   AppleAccelTableStaticTypeData(uint32_t Offset, uint16_t Tag,
531                                 bool ObjCClassIsImplementation,
532                                 uint32_t QualifiedNameHash)
533       : AppleAccelTableStaticOffsetData(Offset),
534         QualifiedNameHash(QualifiedNameHash), Tag(Tag),
535         ObjCClassIsImplementation(ObjCClassIsImplementation) {}
536 
537   void emit(AsmPrinter *Asm) const override;
538 
539   static constexpr Atom Atoms[] = {
540       Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4),
541       Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2),
542       Atom(5, dwarf::DW_FORM_data1), Atom(6, dwarf::DW_FORM_data4)};
543 
544 #ifndef NDEBUG
545   void print(raw_ostream &OS) const override;
546 #endif
547 protected:
548   uint64_t order() const override { return Offset; }
549 
550   uint32_t QualifiedNameHash;
551   uint16_t Tag;
552   bool ObjCClassIsImplementation;
553 };
554 
555 } // end namespace llvm
556 
557 #endif // LLVM_CODEGEN_ACCELTABLE_H
558