1 //===- PDBStringTableBuilder.cpp - PDB String Table -------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h" 10 11 #include "llvm/ADT/ArrayRef.h" 12 #include "llvm/DebugInfo/PDB/Native/Hash.h" 13 #include "llvm/DebugInfo/PDB/Native/RawTypes.h" 14 #include "llvm/Support/BinaryStreamWriter.h" 15 #include "llvm/Support/Endian.h" 16 #include "llvm/Support/TimeProfiler.h" 17 18 #include <map> 19 20 using namespace llvm; 21 using namespace llvm::msf; 22 using namespace llvm::support; 23 using namespace llvm::support::endian; 24 using namespace llvm::pdb; 25 26 StringTableHashTraits::StringTableHashTraits(PDBStringTableBuilder &Table) 27 : Table(&Table) {} 28 29 uint32_t StringTableHashTraits::hashLookupKey(StringRef S) const { 30 // The reference implementation doesn't include code for /src/headerblock 31 // handling, but it can only read natvis entries lld's PDB files if 32 // this hash function truncates the hash to 16 bit. 33 // PDB/include/misc.h in the reference implementation has a hashSz() function 34 // that returns an unsigned short, that seems what's being used for 35 // /src/headerblock. 36 return static_cast<uint16_t>(Table->getIdForString(S)); 37 } 38 39 StringRef StringTableHashTraits::storageKeyToLookupKey(uint32_t Offset) const { 40 return Table->getStringForId(Offset); 41 } 42 43 uint32_t StringTableHashTraits::lookupKeyToStorageKey(StringRef S) { 44 return Table->insert(S); 45 } 46 47 uint32_t PDBStringTableBuilder::insert(StringRef S) { 48 return Strings.insert(S); 49 } 50 51 uint32_t PDBStringTableBuilder::getIdForString(StringRef S) const { 52 return Strings.getIdForString(S); 53 } 54 55 StringRef PDBStringTableBuilder::getStringForId(uint32_t Id) const { 56 return Strings.getStringForId(Id); 57 } 58 59 static uint32_t computeBucketCount(uint32_t NumStrings) { 60 // This is a precomputed list of Buckets given the specified number of 61 // strings. Matching the reference algorithm exactly is not strictly 62 // necessary for correctness, but it helps when comparing LLD's PDBs with 63 // Microsoft's PDBs so as to eliminate superfluous differences. 64 // The reference implementation does (in nmt.h, NMT::grow()): 65 // unsigned StringCount = 0; 66 // unsigned BucketCount = 1; 67 // fn insert() { 68 // ++StringCount; 69 // if (BucketCount * 3 / 4 < StringCount) 70 // BucketCount = BucketCount * 3 / 2 + 1; 71 // } 72 // This list contains all StringCount, BucketCount pairs where BucketCount was 73 // just incremented. It ends before the first BucketCount entry where 74 // BucketCount * 3 would overflow a 32-bit unsigned int. 75 static const std::pair<uint32_t, uint32_t> StringsToBuckets[] = { 76 {0, 1}, 77 {1, 2}, 78 {2, 4}, 79 {4, 7}, 80 {6, 11}, 81 {9, 17}, 82 {13, 26}, 83 {20, 40}, 84 {31, 61}, 85 {46, 92}, 86 {70, 139}, 87 {105, 209}, 88 {157, 314}, 89 {236, 472}, 90 {355, 709}, 91 {532, 1064}, 92 {799, 1597}, 93 {1198, 2396}, 94 {1798, 3595}, 95 {2697, 5393}, 96 {4045, 8090}, 97 {6068, 12136}, 98 {9103, 18205}, 99 {13654, 27308}, 100 {20482, 40963}, 101 {30723, 61445}, 102 {46084, 92168}, 103 {69127, 138253}, 104 {103690, 207380}, 105 {155536, 311071}, 106 {233304, 466607}, 107 {349956, 699911}, 108 {524934, 1049867}, 109 {787401, 1574801}, 110 {1181101, 2362202}, 111 {1771652, 3543304}, 112 {2657479, 5314957}, 113 {3986218, 7972436}, 114 {5979328, 11958655}, 115 {8968992, 17937983}, 116 {13453488, 26906975}, 117 {20180232, 40360463}, 118 {30270348, 60540695}, 119 {45405522, 90811043}, 120 {68108283, 136216565}, 121 {102162424, 204324848}, 122 {153243637, 306487273}, 123 {229865455, 459730910}, 124 {344798183, 689596366}, 125 {517197275, 1034394550}, 126 {775795913, 1551591826}, 127 {1163693870, 2327387740}}; 128 const auto *Entry = llvm::lower_bound( 129 StringsToBuckets, std::make_pair(NumStrings, 0U), llvm::less_first()); 130 assert(Entry != std::end(StringsToBuckets)); 131 return Entry->second; 132 } 133 134 uint32_t PDBStringTableBuilder::calculateHashTableSize() const { 135 uint32_t Size = sizeof(uint32_t); // Hash table begins with 4-byte size field. 136 Size += sizeof(uint32_t) * computeBucketCount(Strings.size()); 137 138 return Size; 139 } 140 141 uint32_t PDBStringTableBuilder::calculateSerializedSize() const { 142 uint32_t Size = 0; 143 Size += sizeof(PDBStringTableHeader); 144 Size += Strings.calculateSerializedSize(); 145 Size += calculateHashTableSize(); 146 Size += sizeof(uint32_t); // The /names stream ends with the string count. 147 return Size; 148 } 149 150 void PDBStringTableBuilder::setStrings( 151 const codeview::DebugStringTableSubsection &Strings) { 152 this->Strings = Strings; 153 } 154 155 Error PDBStringTableBuilder::writeHeader(BinaryStreamWriter &Writer) const { 156 // Write a header 157 PDBStringTableHeader H; 158 H.Signature = PDBStringTableSignature; 159 H.HashVersion = 1; 160 H.ByteSize = Strings.calculateSerializedSize(); 161 if (auto EC = Writer.writeObject(H)) 162 return EC; 163 assert(Writer.bytesRemaining() == 0); 164 return Error::success(); 165 } 166 167 Error PDBStringTableBuilder::writeStrings(BinaryStreamWriter &Writer) const { 168 if (auto EC = Strings.commit(Writer)) 169 return EC; 170 171 assert(Writer.bytesRemaining() == 0); 172 return Error::success(); 173 } 174 175 Error PDBStringTableBuilder::writeHashTable(BinaryStreamWriter &Writer) const { 176 // Write a hash table. 177 uint32_t BucketCount = computeBucketCount(Strings.size()); 178 if (auto EC = Writer.writeInteger(BucketCount)) 179 return EC; 180 std::vector<ulittle32_t> Buckets(BucketCount); 181 182 for (const auto &Pair : Strings) { 183 StringRef S = Pair.getKey(); 184 uint32_t Offset = Pair.getValue(); 185 uint32_t Hash = hashStringV1(S); 186 187 for (uint32_t I = 0; I != BucketCount; ++I) { 188 uint32_t Slot = (Hash + I) % BucketCount; 189 if (Buckets[Slot] != 0) 190 continue; 191 Buckets[Slot] = Offset; 192 break; 193 } 194 } 195 196 if (auto EC = Writer.writeArray(ArrayRef<ulittle32_t>(Buckets))) 197 return EC; 198 199 assert(Writer.bytesRemaining() == 0); 200 return Error::success(); 201 } 202 203 Error PDBStringTableBuilder::writeEpilogue(BinaryStreamWriter &Writer) const { 204 if (auto EC = Writer.writeInteger<uint32_t>(Strings.size())) 205 return EC; 206 assert(Writer.bytesRemaining() == 0); 207 return Error::success(); 208 } 209 210 Error PDBStringTableBuilder::commit(BinaryStreamWriter &Writer) const { 211 llvm::TimeTraceScope timeScope("Commit strings table"); 212 BinaryStreamWriter SectionWriter; 213 214 std::tie(SectionWriter, Writer) = Writer.split(sizeof(PDBStringTableHeader)); 215 if (auto EC = writeHeader(SectionWriter)) 216 return EC; 217 218 std::tie(SectionWriter, Writer) = 219 Writer.split(Strings.calculateSerializedSize()); 220 if (auto EC = writeStrings(SectionWriter)) 221 return EC; 222 223 std::tie(SectionWriter, Writer) = Writer.split(calculateHashTableSize()); 224 if (auto EC = writeHashTable(SectionWriter)) 225 return EC; 226 227 std::tie(SectionWriter, Writer) = Writer.split(sizeof(uint32_t)); 228 if (auto EC = writeEpilogue(SectionWriter)) 229 return EC; 230 231 return Error::success(); 232 } 233