1 //===- PDBStringTableBuilder.cpp - PDB String Table -------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h" 10 11 #include "llvm/ADT/ArrayRef.h" 12 #include "llvm/DebugInfo/PDB/Native/Hash.h" 13 #include "llvm/DebugInfo/PDB/Native/RawTypes.h" 14 #include "llvm/Support/BinaryStreamWriter.h" 15 #include "llvm/Support/Endian.h" 16 17 #include <map> 18 19 using namespace llvm; 20 using namespace llvm::msf; 21 using namespace llvm::support; 22 using namespace llvm::support::endian; 23 using namespace llvm::pdb; 24 25 StringTableHashTraits::StringTableHashTraits(PDBStringTableBuilder &Table) 26 : Table(&Table) {} 27 28 uint32_t StringTableHashTraits::hashLookupKey(StringRef S) const { 29 // The reference implementation doesn't include code for /src/headerblock 30 // handling, but it can only read natvis entries lld's PDB files if 31 // this hash function truncates the hash to 16 bit. 32 // PDB/include/misc.h in the reference implementation has a hashSz() function 33 // that returns an unsigned short, that seems what's being used for 34 // /src/headerblock. 35 return static_cast<uint16_t>(Table->getIdForString(S)); 36 } 37 38 StringRef StringTableHashTraits::storageKeyToLookupKey(uint32_t Offset) const { 39 return Table->getStringForId(Offset); 40 } 41 42 uint32_t StringTableHashTraits::lookupKeyToStorageKey(StringRef S) { 43 return Table->insert(S); 44 } 45 46 uint32_t PDBStringTableBuilder::insert(StringRef S) { 47 return Strings.insert(S); 48 } 49 50 uint32_t PDBStringTableBuilder::getIdForString(StringRef S) const { 51 return Strings.getIdForString(S); 52 } 53 54 StringRef PDBStringTableBuilder::getStringForId(uint32_t Id) const { 55 return Strings.getStringForId(Id); 56 } 57 58 static uint32_t computeBucketCount(uint32_t NumStrings) { 59 // This is a precomputed list of Buckets given the specified number of 60 // strings. Matching the reference algorithm exactly is not strictly 61 // necessary for correctness, but it helps when comparing LLD's PDBs with 62 // Microsoft's PDBs so as to eliminate superfluous differences. 63 // The reference implementation does (in nmt.h, NMT::grow()): 64 // unsigned StringCount = 0; 65 // unsigned BucketCount = 1; 66 // fn insert() { 67 // ++StringCount; 68 // if (BucketCount * 3 / 4 < StringCount) 69 // BucketCount = BucketCount * 3 / 2 + 1; 70 // } 71 // This list contains all StringCount, BucketCount pairs where BucketCount was 72 // just incremented. It ends before the first BucketCount entry where 73 // BucketCount * 3 would overflow a 32-bit unsigned int. 74 static std::map<uint32_t, uint32_t> StringsToBuckets = { 75 {0, 1}, 76 {1, 2}, 77 {2, 4}, 78 {4, 7}, 79 {6, 11}, 80 {9, 17}, 81 {13, 26}, 82 {20, 40}, 83 {31, 61}, 84 {46, 92}, 85 {70, 139}, 86 {105, 209}, 87 {157, 314}, 88 {236, 472}, 89 {355, 709}, 90 {532, 1064}, 91 {799, 1597}, 92 {1198, 2396}, 93 {1798, 3595}, 94 {2697, 5393}, 95 {4045, 8090}, 96 {6068, 12136}, 97 {9103, 18205}, 98 {13654, 27308}, 99 {20482, 40963}, 100 {30723, 61445}, 101 {46084, 92168}, 102 {69127, 138253}, 103 {103690, 207380}, 104 {155536, 311071}, 105 {233304, 466607}, 106 {349956, 699911}, 107 {524934, 1049867}, 108 {787401, 1574801}, 109 {1181101, 2362202}, 110 {1771652, 3543304}, 111 {2657479, 5314957}, 112 {3986218, 7972436}, 113 {5979328, 11958655}, 114 {8968992, 17937983}, 115 {13453488, 26906975}, 116 {20180232, 40360463}, 117 {30270348, 60540695}, 118 {45405522, 90811043}, 119 {68108283, 136216565}, 120 {102162424, 204324848}, 121 {153243637, 306487273}, 122 {229865455, 459730910}, 123 {344798183, 689596366}, 124 {517197275, 1034394550}, 125 {775795913, 1551591826}, 126 {1163693870, 2327387740}}; 127 auto Entry = StringsToBuckets.lower_bound(NumStrings); 128 assert(Entry != StringsToBuckets.end()); 129 return Entry->second; 130 } 131 132 uint32_t PDBStringTableBuilder::calculateHashTableSize() const { 133 uint32_t Size = sizeof(uint32_t); // Hash table begins with 4-byte size field. 134 Size += sizeof(uint32_t) * computeBucketCount(Strings.size()); 135 136 return Size; 137 } 138 139 uint32_t PDBStringTableBuilder::calculateSerializedSize() const { 140 uint32_t Size = 0; 141 Size += sizeof(PDBStringTableHeader); 142 Size += Strings.calculateSerializedSize(); 143 Size += calculateHashTableSize(); 144 Size += sizeof(uint32_t); // The /names stream ends with the string count. 145 return Size; 146 } 147 148 void PDBStringTableBuilder::setStrings( 149 const codeview::DebugStringTableSubsection &Strings) { 150 this->Strings = Strings; 151 } 152 153 Error PDBStringTableBuilder::writeHeader(BinaryStreamWriter &Writer) const { 154 // Write a header 155 PDBStringTableHeader H; 156 H.Signature = PDBStringTableSignature; 157 H.HashVersion = 1; 158 H.ByteSize = Strings.calculateSerializedSize(); 159 if (auto EC = Writer.writeObject(H)) 160 return EC; 161 assert(Writer.bytesRemaining() == 0); 162 return Error::success(); 163 } 164 165 Error PDBStringTableBuilder::writeStrings(BinaryStreamWriter &Writer) const { 166 if (auto EC = Strings.commit(Writer)) 167 return EC; 168 169 assert(Writer.bytesRemaining() == 0); 170 return Error::success(); 171 } 172 173 Error PDBStringTableBuilder::writeHashTable(BinaryStreamWriter &Writer) const { 174 // Write a hash table. 175 uint32_t BucketCount = computeBucketCount(Strings.size()); 176 if (auto EC = Writer.writeInteger(BucketCount)) 177 return EC; 178 std::vector<ulittle32_t> Buckets(BucketCount); 179 180 for (auto &Pair : Strings) { 181 StringRef S = Pair.getKey(); 182 uint32_t Offset = Pair.getValue(); 183 uint32_t Hash = hashStringV1(S); 184 185 for (uint32_t I = 0; I != BucketCount; ++I) { 186 uint32_t Slot = (Hash + I) % BucketCount; 187 if (Buckets[Slot] != 0) 188 continue; 189 Buckets[Slot] = Offset; 190 break; 191 } 192 } 193 194 if (auto EC = Writer.writeArray(ArrayRef<ulittle32_t>(Buckets))) 195 return EC; 196 197 assert(Writer.bytesRemaining() == 0); 198 return Error::success(); 199 } 200 201 Error PDBStringTableBuilder::writeEpilogue(BinaryStreamWriter &Writer) const { 202 if (auto EC = Writer.writeInteger<uint32_t>(Strings.size())) 203 return EC; 204 assert(Writer.bytesRemaining() == 0); 205 return Error::success(); 206 } 207 208 Error PDBStringTableBuilder::commit(BinaryStreamWriter &Writer) const { 209 BinaryStreamWriter SectionWriter; 210 211 std::tie(SectionWriter, Writer) = Writer.split(sizeof(PDBStringTableHeader)); 212 if (auto EC = writeHeader(SectionWriter)) 213 return EC; 214 215 std::tie(SectionWriter, Writer) = 216 Writer.split(Strings.calculateSerializedSize()); 217 if (auto EC = writeStrings(SectionWriter)) 218 return EC; 219 220 std::tie(SectionWriter, Writer) = Writer.split(calculateHashTableSize()); 221 if (auto EC = writeHashTable(SectionWriter)) 222 return EC; 223 224 std::tie(SectionWriter, Writer) = Writer.split(sizeof(uint32_t)); 225 if (auto EC = writeEpilogue(SectionWriter)) 226 return EC; 227 228 return Error::success(); 229 } 230