xref: /freebsd/contrib/llvm-project/llvm/include/llvm/CodeGen/AccelTable.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //==- include/llvm/CodeGen/AccelTable.h - Accelerator Tables -----*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file contains support for writing accelerator tables.
10 ///
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_CODEGEN_ACCELTABLE_H
14 #define LLVM_CODEGEN_ACCELTABLE_H
15 
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/MapVector.h"
18 #include "llvm/ADT/STLFunctionalExtras.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/BinaryFormat/Dwarf.h"
21 #include "llvm/CodeGen/DIE.h"
22 #include "llvm/CodeGen/DwarfStringPoolEntry.h"
23 #include "llvm/Support/Allocator.h"
24 #include "llvm/Support/Compiler.h"
25 #include "llvm/Support/DJB.h"
26 #include "llvm/Support/Debug.h"
27 #include <cstdint>
28 #include <variant>
29 #include <vector>
30 
31 /// \file
32 /// The DWARF and Apple accelerator tables are an indirect hash table optimized
33 /// for null lookup rather than access to known data. The Apple accelerator
34 /// tables are a precursor of the newer DWARF v5 accelerator tables. Both
35 /// formats share common design ideas.
36 ///
37 /// The Apple accelerator table are output into an on-disk format that looks
38 /// like this:
39 ///
40 /// .------------------.
41 /// |  HEADER          |
42 /// |------------------|
43 /// |  BUCKETS         |
44 /// |------------------|
45 /// |  HASHES          |
46 /// |------------------|
47 /// |  OFFSETS         |
48 /// |------------------|
49 /// |  DATA            |
50 /// `------------------'
51 ///
52 /// The header contains a magic number, version, type of hash function,
53 /// the number of buckets, total number of hashes, and room for a special struct
54 /// of data and the length of that struct.
55 ///
56 /// The buckets contain an index (e.g. 6) into the hashes array. The hashes
57 /// section contains all of the 32-bit hash values in contiguous memory, and the
58 /// offsets contain the offset into the data area for the particular hash.
59 ///
60 /// For a lookup example, we could hash a function name and take it modulo the
61 /// number of buckets giving us our bucket. From there we take the bucket value
62 /// as an index into the hashes table and look at each successive hash as long
63 /// as the hash value is still the same modulo result (bucket value) as earlier.
64 /// If we have a match we look at that same entry in the offsets table and grab
65 /// the offset in the data for our final match.
66 ///
67 /// The DWARF v5 accelerator table consists of zero or more name indices that
68 /// are output into an on-disk format that looks like this:
69 ///
70 /// .------------------.
71 /// |  HEADER          |
72 /// |------------------|
73 /// |  CU LIST         |
74 /// |------------------|
75 /// |  LOCAL TU LIST   |
76 /// |------------------|
77 /// |  FOREIGN TU LIST |
78 /// |------------------|
79 /// |  HASH TABLE      |
80 /// |------------------|
81 /// |  NAME TABLE      |
82 /// |------------------|
83 /// |  ABBREV TABLE    |
84 /// |------------------|
85 /// |  ENTRY POOL      |
86 /// `------------------'
87 ///
88 /// For the full documentation please refer to the DWARF 5 standard.
89 ///
90 ///
91 /// This file defines the class template AccelTable, which is represents an
92 /// abstract view of an Accelerator table, without any notion of an on-disk
93 /// layout. This class is parameterized by an entry type, which should derive
94 /// from AccelTableData. This is the type of individual entries in the table,
95 /// and it should store the data necessary to emit them. AppleAccelTableData is
96 /// the base class for Apple Accelerator Table entries, which have a uniform
97 /// structure based on a sequence of Atoms. There are different sub-classes
98 /// derived from AppleAccelTable, which differ in the set of Atoms and how they
99 /// obtain their values.
100 ///
101 /// An Apple Accelerator Table can be serialized by calling emitAppleAccelTable
102 /// function.
103 
104 namespace llvm {
105 
106 class AsmPrinter;
107 class DwarfDebug;
108 class DwarfTypeUnit;
109 class MCSymbol;
110 class raw_ostream;
111 
112 /// Interface which the different types of accelerator table data have to
113 /// conform. It serves as a base class for different values of the template
114 /// argument of the AccelTable class template.
115 class AccelTableData {
116 public:
117   virtual ~AccelTableData() = default;
118 
119   bool operator<(const AccelTableData &Other) const {
120     return order() < Other.order();
121   }
122 
123     // Subclasses should implement:
124     // static uint32_t hash(StringRef Name);
125 
126 #ifndef NDEBUG
127   virtual void print(raw_ostream &OS) const = 0;
128 #endif
129 protected:
130   virtual uint64_t order() const = 0;
131 };
132 
133 /// A base class holding non-template-dependant functionality of the AccelTable
134 /// class. Clients should not use this class directly but rather instantiate
135 /// AccelTable with a type derived from AccelTableData.
136 class AccelTableBase {
137 public:
138   using HashFn = uint32_t(StringRef);
139 
140   /// Represents a group of entries with identical name (and hence, hash value).
141   struct HashData {
142     DwarfStringPoolEntryRef Name;
143     uint32_t HashValue;
144     std::vector<AccelTableData *> Values;
145     MCSymbol *Sym;
146 
147     /// Get all AccelTableData cast as a `T`.
getValuesHashData148     template <typename T = AccelTableData *> auto getValues() const {
149       static_assert(std::is_pointer<T>());
150       static_assert(
151           std::is_base_of<AccelTableData, std::remove_pointer_t<T>>());
152       return map_range(
153           Values, [](AccelTableData *Data) { return static_cast<T>(Data); });
154     }
155 
156 #ifndef NDEBUG
157     void print(raw_ostream &OS) const;
dumpHashData158     void dump() const { print(dbgs()); }
159 #endif
160   };
161   using HashList = std::vector<HashData *>;
162   using BucketList = std::vector<HashList>;
163 
164 protected:
165   /// Allocator for HashData and Values.
166   BumpPtrAllocator Allocator;
167 
168   using StringEntries = MapVector<StringRef, HashData>;
169   StringEntries Entries;
170 
171   HashFn *Hash;
172   uint32_t BucketCount = 0;
173   uint32_t UniqueHashCount = 0;
174 
175   HashList Hashes;
176   BucketList Buckets;
177 
178   LLVM_ABI void computeBucketCount();
179 
AccelTableBase(HashFn * Hash)180   AccelTableBase(HashFn *Hash) : Hash(Hash) {}
181 
182 public:
183   LLVM_ABI void finalize(AsmPrinter *Asm, StringRef Prefix);
getBuckets()184   ArrayRef<HashList> getBuckets() const { return Buckets; }
getBucketCount()185   uint32_t getBucketCount() const { return BucketCount; }
getUniqueHashCount()186   uint32_t getUniqueHashCount() const { return UniqueHashCount; }
getUniqueNameCount()187   uint32_t getUniqueNameCount() const { return Entries.size(); }
188 
189 #ifndef NDEBUG
190   void print(raw_ostream &OS) const;
dump()191   void dump() const { print(dbgs()); }
192 #endif
193 
194   AccelTableBase(const AccelTableBase &) = delete;
195   void operator=(const AccelTableBase &) = delete;
196 };
197 
198 /// This class holds an abstract representation of an Accelerator Table,
199 /// consisting of a sequence of buckets, each bucket containint a sequence of
200 /// HashData entries. The class is parameterized by the type of entries it
201 /// holds. The type template parameter also defines the hash function to use for
202 /// hashing names.
203 template <typename DataT> class AccelTable : public AccelTableBase {
204 public:
AccelTable()205   AccelTable() : AccelTableBase(DataT::hash) {}
206 
207   template <typename... Types>
208   void addName(DwarfStringPoolEntryRef Name, Types &&... Args);
clear()209   void clear() { Entries.clear(); }
210   void addEntries(AccelTable<DataT> &Table);
getEntries()211   const StringEntries getEntries() const { return Entries; }
212 };
213 
214 template <typename AccelTableDataT>
215 template <typename... Types>
addName(DwarfStringPoolEntryRef Name,Types &&...Args)216 void AccelTable<AccelTableDataT>::addName(DwarfStringPoolEntryRef Name,
217                                           Types &&... Args) {
218   assert(Buckets.empty() && "Already finalized!");
219   // If the string is in the list already then add this die to the list
220   // otherwise add a new one.
221   auto &It = Entries[Name.getString()];
222   if (It.Values.empty()) {
223     It.Name = Name;
224     It.HashValue = Hash(Name.getString());
225   }
226   It.Values.push_back(new (Allocator)
227                           AccelTableDataT(std::forward<Types>(Args)...));
228 }
229 
230 /// A base class for different implementations of Data classes for Apple
231 /// Accelerator Tables. The columns in the table are defined by the static Atoms
232 /// variable defined on the subclasses.
233 class AppleAccelTableData : public AccelTableData {
234 public:
235   /// An Atom defines the form of the data in an Apple accelerator table.
236   /// Conceptually it is a column in the accelerator consisting of a type and a
237   /// specification of the form of its data.
238   struct Atom {
239     /// Atom Type.
240     const uint16_t Type;
241     /// DWARF Form.
242     const uint16_t Form;
243 
AtomAtom244     constexpr Atom(uint16_t Type, uint16_t Form) : Type(Type), Form(Form) {}
245 
246 #ifndef NDEBUG
247     void print(raw_ostream &OS) const;
dumpAtom248     void dump() const { print(dbgs()); }
249 #endif
250   };
251   // Subclasses should define:
252   // static constexpr Atom Atoms[];
253 
254   virtual void emit(AsmPrinter *Asm) const = 0;
255 
hash(StringRef Buffer)256   static uint32_t hash(StringRef Buffer) { return djbHash(Buffer); }
257 };
258 
259 /// Helper class to identify an entry in DWARF5AccelTable based on their DIE
260 /// offset and UnitID.
261 struct OffsetAndUnitID {
262   uint64_t Offset = 0;
263   uint32_t UnitID = 0;
264   bool IsTU = false;
265   OffsetAndUnitID() = delete;
OffsetAndUnitIDOffsetAndUnitID266   OffsetAndUnitID(uint64_t Offset, uint32_t UnitID, bool IsTU)
267       : Offset(Offset), UnitID(UnitID), IsTU(IsTU) {}
offsetOffsetAndUnitID268   uint64_t offset() const { return Offset; };
unitIDOffsetAndUnitID269   uint32_t unitID() const { return UnitID; };
isTUOffsetAndUnitID270   bool isTU() const { return IsTU; }
271 };
272 
273 template <> struct DenseMapInfo<OffsetAndUnitID> {
274   static inline OffsetAndUnitID getEmptyKey() {
275     return OffsetAndUnitID(-1, -1, false);
276   }
277   static inline OffsetAndUnitID getTombstoneKey() {
278     return OffsetAndUnitID(-2, -2, false);
279   }
280   static unsigned getHashValue(const OffsetAndUnitID &Val) {
281     return (unsigned)llvm::hash_combine(Val.offset(), Val.unitID(), Val.IsTU);
282   }
283   static bool isEqual(const OffsetAndUnitID &LHS, const OffsetAndUnitID &RHS) {
284     return LHS.offset() == RHS.offset() && LHS.unitID() == RHS.unitID() &&
285            LHS.IsTU == RHS.isTU();
286   }
287 };
288 
289 /// The Data class implementation for DWARF v5 accelerator table. Unlike the
290 /// Apple Data classes, this class is just a DIE wrapper, and does not know to
291 /// serialize itself. The complete serialization logic is in the
292 /// emitDWARF5AccelTable function.
293 class DWARF5AccelTableData : public AccelTableData {
294 public:
295   static uint32_t hash(StringRef Name) { return caseFoldingDjbHash(Name); }
296 
297   LLVM_ABI DWARF5AccelTableData(const DIE &Die, const uint32_t UnitID,
298                                 const bool IsTU);
299   DWARF5AccelTableData(const uint64_t DieOffset,
300                        const std::optional<uint64_t> DefiningParentOffset,
301                        const unsigned DieTag, const unsigned UnitID,
302                        const bool IsTU)
303       : OffsetVal(DieOffset), ParentOffset(DefiningParentOffset),
304         DieTag(DieTag), AbbrevNumber(0), IsTU(IsTU), UnitID(UnitID) {}
305 
306 #ifndef NDEBUG
307   void print(raw_ostream &OS) const override;
308 #endif
309 
310   uint64_t getDieOffset() const {
311     assert(isNormalized() && "Accessing DIE Offset before normalizing.");
312     return std::get<uint64_t>(OffsetVal);
313   }
314 
315   OffsetAndUnitID getDieOffsetAndUnitID() const {
316     return {getDieOffset(), getUnitID(), isTU()};
317   }
318 
319   unsigned getDieTag() const { return DieTag; }
320   unsigned getUnitID() const { return UnitID; }
321   bool isTU() const { return IsTU; }
322   void normalizeDIEToOffset() {
323     assert(!isNormalized() && "Accessing offset after normalizing.");
324     const DIE *Entry = std::get<const DIE *>(OffsetVal);
325     ParentOffset = getDefiningParentDieOffset(*Entry);
326     OffsetVal = Entry->getOffset();
327   }
328   bool isNormalized() const {
329     return std::holds_alternative<uint64_t>(OffsetVal);
330   }
331 
332   std::optional<uint64_t> getParentDieOffset() const {
333     if (auto OffsetAndId = getParentDieOffsetAndUnitID())
334       return OffsetAndId->offset();
335     return {};
336   }
337 
338   std::optional<OffsetAndUnitID> getParentDieOffsetAndUnitID() const {
339     assert(isNormalized() && "Accessing DIE Offset before normalizing.");
340     if (!ParentOffset)
341       return std::nullopt;
342     return OffsetAndUnitID(*ParentOffset, getUnitID(), isTU());
343   }
344 
345   /// Sets AbbrevIndex for an Entry.
346   void setAbbrevNumber(uint16_t AbbrevNum) { AbbrevNumber = AbbrevNum; }
347 
348   /// Returns AbbrevIndex for an Entry.
349   uint16_t getAbbrevNumber() const { return AbbrevNumber; }
350 
351   /// If `Die` has a non-null parent and the parent is not a declaration,
352   /// return its offset.
353   LLVM_ABI static std::optional<uint64_t>
354   getDefiningParentDieOffset(const DIE &Die);
355 
356 protected:
357   std::variant<const DIE *, uint64_t> OffsetVal;
358   std::optional<uint64_t> ParentOffset;
359   uint32_t DieTag : 16;
360   uint32_t AbbrevNumber : 15;
361   uint32_t IsTU : 1;
362   uint32_t UnitID;
363   uint64_t order() const override { return getDieOffset(); }
364 };
365 
366 class DebugNamesAbbrev : public FoldingSetNode {
367 public:
368   uint32_t DieTag;
369   uint32_t Number;
370   struct AttributeEncoding {
371     dwarf::Index Index;
372     dwarf::Form Form;
373   };
374   DebugNamesAbbrev(uint32_t DieTag) : DieTag(DieTag), Number(0) {}
375   /// Add attribute encoding to an abbreviation.
376   void addAttribute(const DebugNamesAbbrev::AttributeEncoding &Attr) {
377     AttrVect.push_back(Attr);
378   }
379   /// Set abbreviation tag index.
380   void setNumber(uint32_t AbbrevNumber) { Number = AbbrevNumber; }
381   /// Get abbreviation tag index.
382   uint32_t getNumber() const { return Number; }
383   /// Get DIE Tag.
384   uint32_t getDieTag() const { return DieTag; }
385   /// Used to gather unique data for the abbreviation folding set.
386   LLVM_ABI void Profile(FoldingSetNodeID &ID) const;
387   /// Returns attributes for an abbreviation.
388   const SmallVector<AttributeEncoding, 1> &getAttributes() const {
389     return AttrVect;
390   }
391 
392 private:
393   SmallVector<AttributeEncoding, 1> AttrVect;
394 };
395 
396 struct TypeUnitMetaInfo {
397   // Symbol for start of the TU section or signature if this is SplitDwarf.
398   std::variant<MCSymbol *, uint64_t> LabelOrSignature;
399   // Unique ID of Type Unit.
400   unsigned UniqueID;
401 };
402 using TUVectorTy = SmallVector<TypeUnitMetaInfo, 1>;
403 class DWARF5AccelTable : public AccelTable<DWARF5AccelTableData> {
404   // Symbols to start of all the TU sections that were generated.
405   TUVectorTy TUSymbolsOrHashes;
406 
407 public:
408   struct UnitIndexAndEncoding {
409     unsigned Index;
410     DebugNamesAbbrev::AttributeEncoding Encoding;
411   };
412   /// Returns type units that were constructed.
413   const TUVectorTy &getTypeUnitsSymbols() { return TUSymbolsOrHashes; }
414   /// Add a type unit start symbol.
415   LLVM_ABI void addTypeUnitSymbol(DwarfTypeUnit &U);
416   /// Add a type unit Signature.
417   LLVM_ABI void addTypeUnitSignature(DwarfTypeUnit &U);
418   /// Convert DIE entries to explicit offset.
419   /// Needs to be called after DIE offsets are computed.
420   void convertDieToOffset() {
421     for (auto &Entry : Entries) {
422       for (auto *Data : Entry.second.getValues<DWARF5AccelTableData *>()) {
423         // For TU we normalize as each Unit is emitted.
424         // So when this is invoked after CU construction we will be in mixed
425         // state.
426         if (!Data->isNormalized())
427           Data->normalizeDIEToOffset();
428       }
429     }
430   }
431 
432   void addTypeEntries(DWARF5AccelTable &Table) {
433     for (auto &Entry : Table.getEntries()) {
434       for (auto *Data : Entry.second.getValues<DWARF5AccelTableData *>()) {
435         addName(Entry.second.Name, Data->getDieOffset(),
436                 Data->getParentDieOffset(), Data->getDieTag(),
437                 Data->getUnitID(), Data->isTU());
438       }
439     }
440   }
441 };
442 
443 LLVM_ABI void
444 emitAppleAccelTableImpl(AsmPrinter *Asm, AccelTableBase &Contents,
445                         StringRef Prefix, const MCSymbol *SecBegin,
446                         ArrayRef<AppleAccelTableData::Atom> Atoms);
447 
448 /// Emit an Apple Accelerator Table consisting of entries in the specified
449 /// AccelTable. The DataT template parameter should be derived from
450 /// AppleAccelTableData.
451 template <typename DataT>
452 void emitAppleAccelTable(AsmPrinter *Asm, AccelTable<DataT> &Contents,
453                          StringRef Prefix, const MCSymbol *SecBegin) {
454   static_assert(std::is_convertible<DataT *, AppleAccelTableData *>::value);
455   emitAppleAccelTableImpl(Asm, Contents, Prefix, SecBegin, DataT::Atoms);
456 }
457 
458 LLVM_ABI void
459 emitDWARF5AccelTable(AsmPrinter *Asm, DWARF5AccelTable &Contents,
460                      const DwarfDebug &DD,
461                      ArrayRef<std::unique_ptr<DwarfCompileUnit>> CUs);
462 
463 /// Emit a DWARFv5 Accelerator Table consisting of entries in the specified
464 /// AccelTable. The \p CUs contains either symbols keeping offsets to the
465 /// start of compilation unit, either offsets to the start of compilation
466 /// unit themselves.
467 LLVM_ABI void emitDWARF5AccelTable(
468     AsmPrinter *Asm, DWARF5AccelTable &Contents,
469     ArrayRef<std::variant<MCSymbol *, uint64_t>> CUs,
470     llvm::function_ref<std::optional<DWARF5AccelTable::UnitIndexAndEncoding>(
471         const DWARF5AccelTableData &)>
472         getIndexForEntry);
473 
474 /// Accelerator table data implementation for simple Apple accelerator tables
475 /// with just a DIE reference.
476 class LLVM_ABI AppleAccelTableOffsetData : public AppleAccelTableData {
477 public:
478   AppleAccelTableOffsetData(const DIE &D) : Die(D) {}
479 
480   void emit(AsmPrinter *Asm) const override;
481 
482   static constexpr Atom Atoms[] = {
483       Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)};
484 
485 #ifndef NDEBUG
486   void print(raw_ostream &OS) const override;
487 #endif
488 protected:
489   uint64_t order() const override { return Die.getOffset(); }
490 
491   const DIE &Die;
492 };
493 
494 /// Accelerator table data implementation for Apple type accelerator tables.
495 class LLVM_ABI AppleAccelTableTypeData : public AppleAccelTableOffsetData {
496 public:
497   AppleAccelTableTypeData(const DIE &D) : AppleAccelTableOffsetData(D) {}
498 
499   void emit(AsmPrinter *Asm) const override;
500 
501   static constexpr Atom Atoms[] = {
502       Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4),
503       Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2),
504       Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)};
505 
506 #ifndef NDEBUG
507   void print(raw_ostream &OS) const override;
508 #endif
509 };
510 
511 /// Accelerator table data implementation for simple Apple accelerator tables
512 /// with a DIE offset but no actual DIE pointer.
513 class LLVM_ABI AppleAccelTableStaticOffsetData : public AppleAccelTableData {
514 public:
515   AppleAccelTableStaticOffsetData(uint32_t Offset) : Offset(Offset) {}
516 
517   void emit(AsmPrinter *Asm) const override;
518 
519   static constexpr Atom Atoms[] = {
520       Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)};
521 
522 #ifndef NDEBUG
523   void print(raw_ostream &OS) const override;
524 #endif
525 protected:
526   uint64_t order() const override { return Offset; }
527 
528   uint32_t Offset;
529 };
530 
531 /// Accelerator table data implementation for type accelerator tables with
532 /// a DIE offset but no actual DIE pointer.
533 class LLVM_ABI AppleAccelTableStaticTypeData
534     : public AppleAccelTableStaticOffsetData {
535 public:
536   AppleAccelTableStaticTypeData(uint32_t Offset, uint16_t Tag,
537                                 bool ObjCClassIsImplementation,
538                                 uint32_t QualifiedNameHash)
539       : AppleAccelTableStaticOffsetData(Offset),
540         QualifiedNameHash(QualifiedNameHash), Tag(Tag),
541         ObjCClassIsImplementation(ObjCClassIsImplementation) {}
542 
543   void emit(AsmPrinter *Asm) const override;
544 
545   static constexpr Atom Atoms[] = {
546       Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4),
547       Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2),
548       Atom(5, dwarf::DW_FORM_data1), Atom(6, dwarf::DW_FORM_data4)};
549 
550 #ifndef NDEBUG
551   void print(raw_ostream &OS) const override;
552 #endif
553 protected:
554   uint64_t order() const override { return Offset; }
555 
556   uint32_t QualifiedNameHash;
557   uint16_t Tag;
558   bool ObjCClassIsImplementation;
559 };
560 
561 } // end namespace llvm
562 
563 #endif // LLVM_CODEGEN_ACCELTABLE_H
564