xref: /freebsd/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 //===- PDBStringTableBuilder.cpp - PDB String Table -------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h"
10 
11 #include "llvm/ADT/ArrayRef.h"
12 #include "llvm/DebugInfo/PDB/Native/Hash.h"
13 #include "llvm/DebugInfo/PDB/Native/RawTypes.h"
14 #include "llvm/Support/BinaryStreamWriter.h"
15 #include "llvm/Support/Endian.h"
16 #include "llvm/Support/TimeProfiler.h"
17 
18 #include <map>
19 
20 using namespace llvm;
21 using namespace llvm::msf;
22 using namespace llvm::support;
23 using namespace llvm::support::endian;
24 using namespace llvm::pdb;
25 
26 StringTableHashTraits::StringTableHashTraits(PDBStringTableBuilder &Table)
27     : Table(&Table) {}
28 
29 uint32_t StringTableHashTraits::hashLookupKey(StringRef S) const {
30   // The reference implementation doesn't include code for /src/headerblock
31   // handling, but it can only read natvis entries lld's PDB files if
32   // this hash function truncates the hash to 16 bit.
33   // PDB/include/misc.h in the reference implementation has a hashSz() function
34   // that returns an unsigned short, that seems what's being used for
35   // /src/headerblock.
36   return static_cast<uint16_t>(Table->getIdForString(S));
37 }
38 
39 StringRef StringTableHashTraits::storageKeyToLookupKey(uint32_t Offset) const {
40   return Table->getStringForId(Offset);
41 }
42 
43 uint32_t StringTableHashTraits::lookupKeyToStorageKey(StringRef S) {
44   return Table->insert(S);
45 }
46 
47 uint32_t PDBStringTableBuilder::insert(StringRef S) {
48   return Strings.insert(S);
49 }
50 
51 uint32_t PDBStringTableBuilder::getIdForString(StringRef S) const {
52   return Strings.getIdForString(S);
53 }
54 
55 StringRef PDBStringTableBuilder::getStringForId(uint32_t Id) const {
56   return Strings.getStringForId(Id);
57 }
58 
59 static uint32_t computeBucketCount(uint32_t NumStrings) {
60   // This is a precomputed list of Buckets given the specified number of
61   // strings.  Matching the reference algorithm exactly is not strictly
62   // necessary for correctness, but it helps when comparing LLD's PDBs with
63   // Microsoft's PDBs so as to eliminate superfluous differences.
64   // The reference implementation does (in nmt.h, NMT::grow()):
65   //   unsigned StringCount = 0;
66   //   unsigned BucketCount = 1;
67   //   fn insert() {
68   //     ++StringCount;
69   //     if (BucketCount * 3 / 4 < StringCount)
70   //       BucketCount = BucketCount * 3 / 2 + 1;
71   //   }
72   // This list contains all StringCount, BucketCount pairs where BucketCount was
73   // just incremented.  It ends before the first BucketCount entry where
74   // BucketCount * 3 would overflow a 32-bit unsigned int.
75   static const std::pair<uint32_t, uint32_t> StringsToBuckets[] = {
76       {0, 1},
77       {1, 2},
78       {2, 4},
79       {4, 7},
80       {6, 11},
81       {9, 17},
82       {13, 26},
83       {20, 40},
84       {31, 61},
85       {46, 92},
86       {70, 139},
87       {105, 209},
88       {157, 314},
89       {236, 472},
90       {355, 709},
91       {532, 1064},
92       {799, 1597},
93       {1198, 2396},
94       {1798, 3595},
95       {2697, 5393},
96       {4045, 8090},
97       {6068, 12136},
98       {9103, 18205},
99       {13654, 27308},
100       {20482, 40963},
101       {30723, 61445},
102       {46084, 92168},
103       {69127, 138253},
104       {103690, 207380},
105       {155536, 311071},
106       {233304, 466607},
107       {349956, 699911},
108       {524934, 1049867},
109       {787401, 1574801},
110       {1181101, 2362202},
111       {1771652, 3543304},
112       {2657479, 5314957},
113       {3986218, 7972436},
114       {5979328, 11958655},
115       {8968992, 17937983},
116       {13453488, 26906975},
117       {20180232, 40360463},
118       {30270348, 60540695},
119       {45405522, 90811043},
120       {68108283, 136216565},
121       {102162424, 204324848},
122       {153243637, 306487273},
123       {229865455, 459730910},
124       {344798183, 689596366},
125       {517197275, 1034394550},
126       {775795913, 1551591826},
127       {1163693870, 2327387740}};
128   const auto *Entry = llvm::lower_bound(
129       StringsToBuckets, std::make_pair(NumStrings, 0U), llvm::less_first());
130   assert(Entry != std::end(StringsToBuckets));
131   return Entry->second;
132 }
133 
134 uint32_t PDBStringTableBuilder::calculateHashTableSize() const {
135   uint32_t Size = sizeof(uint32_t); // Hash table begins with 4-byte size field.
136   Size += sizeof(uint32_t) * computeBucketCount(Strings.size());
137 
138   return Size;
139 }
140 
141 uint32_t PDBStringTableBuilder::calculateSerializedSize() const {
142   uint32_t Size = 0;
143   Size += sizeof(PDBStringTableHeader);
144   Size += Strings.calculateSerializedSize();
145   Size += calculateHashTableSize();
146   Size += sizeof(uint32_t); // The /names stream ends with the string count.
147   return Size;
148 }
149 
150 void PDBStringTableBuilder::setStrings(
151     const codeview::DebugStringTableSubsection &Strings) {
152   this->Strings = Strings;
153 }
154 
155 Error PDBStringTableBuilder::writeHeader(BinaryStreamWriter &Writer) const {
156   // Write a header
157   PDBStringTableHeader H;
158   H.Signature = PDBStringTableSignature;
159   H.HashVersion = 1;
160   H.ByteSize = Strings.calculateSerializedSize();
161   if (auto EC = Writer.writeObject(H))
162     return EC;
163   assert(Writer.bytesRemaining() == 0);
164   return Error::success();
165 }
166 
167 Error PDBStringTableBuilder::writeStrings(BinaryStreamWriter &Writer) const {
168   if (auto EC = Strings.commit(Writer))
169     return EC;
170 
171   assert(Writer.bytesRemaining() == 0);
172   return Error::success();
173 }
174 
175 Error PDBStringTableBuilder::writeHashTable(BinaryStreamWriter &Writer) const {
176   // Write a hash table.
177   uint32_t BucketCount = computeBucketCount(Strings.size());
178   if (auto EC = Writer.writeInteger(BucketCount))
179     return EC;
180   std::vector<ulittle32_t> Buckets(BucketCount);
181 
182   for (const auto &Pair : Strings) {
183     StringRef S = Pair.getKey();
184     uint32_t Offset = Pair.getValue();
185     uint32_t Hash = hashStringV1(S);
186 
187     for (uint32_t I = 0; I != BucketCount; ++I) {
188       uint32_t Slot = (Hash + I) % BucketCount;
189       if (Buckets[Slot] != 0)
190         continue;
191       Buckets[Slot] = Offset;
192       break;
193     }
194   }
195 
196   if (auto EC = Writer.writeArray(ArrayRef<ulittle32_t>(Buckets)))
197     return EC;
198 
199   assert(Writer.bytesRemaining() == 0);
200   return Error::success();
201 }
202 
203 Error PDBStringTableBuilder::writeEpilogue(BinaryStreamWriter &Writer) const {
204   if (auto EC = Writer.writeInteger<uint32_t>(Strings.size()))
205     return EC;
206   assert(Writer.bytesRemaining() == 0);
207   return Error::success();
208 }
209 
210 Error PDBStringTableBuilder::commit(BinaryStreamWriter &Writer) const {
211   llvm::TimeTraceScope timeScope("Commit strings table");
212   BinaryStreamWriter SectionWriter;
213 
214   std::tie(SectionWriter, Writer) = Writer.split(sizeof(PDBStringTableHeader));
215   if (auto EC = writeHeader(SectionWriter))
216     return EC;
217 
218   std::tie(SectionWriter, Writer) =
219       Writer.split(Strings.calculateSerializedSize());
220   if (auto EC = writeStrings(SectionWriter))
221     return EC;
222 
223   std::tie(SectionWriter, Writer) = Writer.split(calculateHashTableSize());
224   if (auto EC = writeHashTable(SectionWriter))
225     return EC;
226 
227   std::tie(SectionWriter, Writer) = Writer.split(sizeof(uint32_t));
228   if (auto EC = writeEpilogue(SectionWriter))
229     return EC;
230 
231   return Error::success();
232 }
233