xref: /freebsd/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp (revision a3266ba2697a383d2ede56803320d941866c7e76)
1 //===- PDBStringTableBuilder.cpp - PDB String Table -------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h"
10 
11 #include "llvm/ADT/ArrayRef.h"
12 #include "llvm/DebugInfo/PDB/Native/Hash.h"
13 #include "llvm/DebugInfo/PDB/Native/RawTypes.h"
14 #include "llvm/Support/BinaryStreamWriter.h"
15 #include "llvm/Support/Endian.h"
16 
17 #include <map>
18 
19 using namespace llvm;
20 using namespace llvm::msf;
21 using namespace llvm::support;
22 using namespace llvm::support::endian;
23 using namespace llvm::pdb;
24 
25 StringTableHashTraits::StringTableHashTraits(PDBStringTableBuilder &Table)
26     : Table(&Table) {}
27 
28 uint32_t StringTableHashTraits::hashLookupKey(StringRef S) const {
29   // The reference implementation doesn't include code for /src/headerblock
30   // handling, but it can only read natvis entries lld's PDB files if
31   // this hash function truncates the hash to 16 bit.
32   // PDB/include/misc.h in the reference implementation has a hashSz() function
33   // that returns an unsigned short, that seems what's being used for
34   // /src/headerblock.
35   return static_cast<uint16_t>(Table->getIdForString(S));
36 }
37 
38 StringRef StringTableHashTraits::storageKeyToLookupKey(uint32_t Offset) const {
39   return Table->getStringForId(Offset);
40 }
41 
42 uint32_t StringTableHashTraits::lookupKeyToStorageKey(StringRef S) {
43   return Table->insert(S);
44 }
45 
46 uint32_t PDBStringTableBuilder::insert(StringRef S) {
47   return Strings.insert(S);
48 }
49 
50 uint32_t PDBStringTableBuilder::getIdForString(StringRef S) const {
51   return Strings.getIdForString(S);
52 }
53 
54 StringRef PDBStringTableBuilder::getStringForId(uint32_t Id) const {
55   return Strings.getStringForId(Id);
56 }
57 
58 static uint32_t computeBucketCount(uint32_t NumStrings) {
59   // This is a precomputed list of Buckets given the specified number of
60   // strings.  Matching the reference algorithm exactly is not strictly
61   // necessary for correctness, but it helps when comparing LLD's PDBs with
62   // Microsoft's PDBs so as to eliminate superfluous differences.
63   // The reference implementation does (in nmt.h, NMT::grow()):
64   //   unsigned StringCount = 0;
65   //   unsigned BucketCount = 1;
66   //   fn insert() {
67   //     ++StringCount;
68   //     if (BucketCount * 3 / 4 < StringCount)
69   //       BucketCount = BucketCount * 3 / 2 + 1;
70   //   }
71   // This list contains all StringCount, BucketCount pairs where BucketCount was
72   // just incremented.  It ends before the first BucketCount entry where
73   // BucketCount * 3 would overflow a 32-bit unsigned int.
74   static std::map<uint32_t, uint32_t> StringsToBuckets = {
75       {0, 1},
76       {1, 2},
77       {2, 4},
78       {4, 7},
79       {6, 11},
80       {9, 17},
81       {13, 26},
82       {20, 40},
83       {31, 61},
84       {46, 92},
85       {70, 139},
86       {105, 209},
87       {157, 314},
88       {236, 472},
89       {355, 709},
90       {532, 1064},
91       {799, 1597},
92       {1198, 2396},
93       {1798, 3595},
94       {2697, 5393},
95       {4045, 8090},
96       {6068, 12136},
97       {9103, 18205},
98       {13654, 27308},
99       {20482, 40963},
100       {30723, 61445},
101       {46084, 92168},
102       {69127, 138253},
103       {103690, 207380},
104       {155536, 311071},
105       {233304, 466607},
106       {349956, 699911},
107       {524934, 1049867},
108       {787401, 1574801},
109       {1181101, 2362202},
110       {1771652, 3543304},
111       {2657479, 5314957},
112       {3986218, 7972436},
113       {5979328, 11958655},
114       {8968992, 17937983},
115       {13453488, 26906975},
116       {20180232, 40360463},
117       {30270348, 60540695},
118       {45405522, 90811043},
119       {68108283, 136216565},
120       {102162424, 204324848},
121       {153243637, 306487273},
122       {229865455, 459730910},
123       {344798183, 689596366},
124       {517197275, 1034394550},
125       {775795913, 1551591826},
126       {1163693870, 2327387740}};
127   auto Entry = StringsToBuckets.lower_bound(NumStrings);
128   assert(Entry != StringsToBuckets.end());
129   return Entry->second;
130 }
131 
132 uint32_t PDBStringTableBuilder::calculateHashTableSize() const {
133   uint32_t Size = sizeof(uint32_t); // Hash table begins with 4-byte size field.
134   Size += sizeof(uint32_t) * computeBucketCount(Strings.size());
135 
136   return Size;
137 }
138 
139 uint32_t PDBStringTableBuilder::calculateSerializedSize() const {
140   uint32_t Size = 0;
141   Size += sizeof(PDBStringTableHeader);
142   Size += Strings.calculateSerializedSize();
143   Size += calculateHashTableSize();
144   Size += sizeof(uint32_t); // The /names stream ends with the string count.
145   return Size;
146 }
147 
148 void PDBStringTableBuilder::setStrings(
149     const codeview::DebugStringTableSubsection &Strings) {
150   this->Strings = Strings;
151 }
152 
153 Error PDBStringTableBuilder::writeHeader(BinaryStreamWriter &Writer) const {
154   // Write a header
155   PDBStringTableHeader H;
156   H.Signature = PDBStringTableSignature;
157   H.HashVersion = 1;
158   H.ByteSize = Strings.calculateSerializedSize();
159   if (auto EC = Writer.writeObject(H))
160     return EC;
161   assert(Writer.bytesRemaining() == 0);
162   return Error::success();
163 }
164 
165 Error PDBStringTableBuilder::writeStrings(BinaryStreamWriter &Writer) const {
166   if (auto EC = Strings.commit(Writer))
167     return EC;
168 
169   assert(Writer.bytesRemaining() == 0);
170   return Error::success();
171 }
172 
173 Error PDBStringTableBuilder::writeHashTable(BinaryStreamWriter &Writer) const {
174   // Write a hash table.
175   uint32_t BucketCount = computeBucketCount(Strings.size());
176   if (auto EC = Writer.writeInteger(BucketCount))
177     return EC;
178   std::vector<ulittle32_t> Buckets(BucketCount);
179 
180   for (auto &Pair : Strings) {
181     StringRef S = Pair.getKey();
182     uint32_t Offset = Pair.getValue();
183     uint32_t Hash = hashStringV1(S);
184 
185     for (uint32_t I = 0; I != BucketCount; ++I) {
186       uint32_t Slot = (Hash + I) % BucketCount;
187       if (Buckets[Slot] != 0)
188         continue;
189       Buckets[Slot] = Offset;
190       break;
191     }
192   }
193 
194   if (auto EC = Writer.writeArray(ArrayRef<ulittle32_t>(Buckets)))
195     return EC;
196 
197   assert(Writer.bytesRemaining() == 0);
198   return Error::success();
199 }
200 
201 Error PDBStringTableBuilder::writeEpilogue(BinaryStreamWriter &Writer) const {
202   if (auto EC = Writer.writeInteger<uint32_t>(Strings.size()))
203     return EC;
204   assert(Writer.bytesRemaining() == 0);
205   return Error::success();
206 }
207 
208 Error PDBStringTableBuilder::commit(BinaryStreamWriter &Writer) const {
209   BinaryStreamWriter SectionWriter;
210 
211   std::tie(SectionWriter, Writer) = Writer.split(sizeof(PDBStringTableHeader));
212   if (auto EC = writeHeader(SectionWriter))
213     return EC;
214 
215   std::tie(SectionWriter, Writer) =
216       Writer.split(Strings.calculateSerializedSize());
217   if (auto EC = writeStrings(SectionWriter))
218     return EC;
219 
220   std::tie(SectionWriter, Writer) = Writer.split(calculateHashTableSize());
221   if (auto EC = writeHashTable(SectionWriter))
222     return EC;
223 
224   std::tie(SectionWriter, Writer) = Writer.split(sizeof(uint32_t));
225   if (auto EC = writeEpilogue(SectionWriter))
226     return EC;
227 
228   return Error::success();
229 }
230