10b57cec5SDimitry Andric //===- PDBStringTableBuilder.cpp - PDB String Table -------------*- C++ -*-===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric 90b57cec5SDimitry Andric #include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h" 100b57cec5SDimitry Andric 110b57cec5SDimitry Andric #include "llvm/ADT/ArrayRef.h" 120b57cec5SDimitry Andric #include "llvm/DebugInfo/PDB/Native/Hash.h" 130b57cec5SDimitry Andric #include "llvm/DebugInfo/PDB/Native/RawTypes.h" 140b57cec5SDimitry Andric #include "llvm/Support/BinaryStreamWriter.h" 150b57cec5SDimitry Andric #include "llvm/Support/Endian.h" 16*5f757f3fSDimitry Andric #include "llvm/Support/TimeProfiler.h" 170b57cec5SDimitry Andric 180b57cec5SDimitry Andric #include <map> 190b57cec5SDimitry Andric 200b57cec5SDimitry Andric using namespace llvm; 210b57cec5SDimitry Andric using namespace llvm::msf; 220b57cec5SDimitry Andric using namespace llvm::support; 230b57cec5SDimitry Andric using namespace llvm::support::endian; 240b57cec5SDimitry Andric using namespace llvm::pdb; 250b57cec5SDimitry Andric 260b57cec5SDimitry Andric StringTableHashTraits::StringTableHashTraits(PDBStringTableBuilder &Table) 270b57cec5SDimitry Andric : Table(&Table) {} 280b57cec5SDimitry Andric 290b57cec5SDimitry Andric uint32_t StringTableHashTraits::hashLookupKey(StringRef S) const { 300b57cec5SDimitry Andric // The reference implementation doesn't include code for /src/headerblock 310b57cec5SDimitry Andric // handling, but it can only read natvis entries lld's PDB files if 320b57cec5SDimitry Andric // this hash function truncates the hash to 16 bit. 330b57cec5SDimitry Andric // PDB/include/misc.h in the reference implementation has a hashSz() function 340b57cec5SDimitry Andric // that returns an unsigned short, that seems what's being used for 350b57cec5SDimitry Andric // /src/headerblock. 360b57cec5SDimitry Andric return static_cast<uint16_t>(Table->getIdForString(S)); 370b57cec5SDimitry Andric } 380b57cec5SDimitry Andric 390b57cec5SDimitry Andric StringRef StringTableHashTraits::storageKeyToLookupKey(uint32_t Offset) const { 400b57cec5SDimitry Andric return Table->getStringForId(Offset); 410b57cec5SDimitry Andric } 420b57cec5SDimitry Andric 430b57cec5SDimitry Andric uint32_t StringTableHashTraits::lookupKeyToStorageKey(StringRef S) { 440b57cec5SDimitry Andric return Table->insert(S); 450b57cec5SDimitry Andric } 460b57cec5SDimitry Andric 470b57cec5SDimitry Andric uint32_t PDBStringTableBuilder::insert(StringRef S) { 480b57cec5SDimitry Andric return Strings.insert(S); 490b57cec5SDimitry Andric } 500b57cec5SDimitry Andric 510b57cec5SDimitry Andric uint32_t PDBStringTableBuilder::getIdForString(StringRef S) const { 520b57cec5SDimitry Andric return Strings.getIdForString(S); 530b57cec5SDimitry Andric } 540b57cec5SDimitry Andric 550b57cec5SDimitry Andric StringRef PDBStringTableBuilder::getStringForId(uint32_t Id) const { 560b57cec5SDimitry Andric return Strings.getStringForId(Id); 570b57cec5SDimitry Andric } 580b57cec5SDimitry Andric 590b57cec5SDimitry Andric static uint32_t computeBucketCount(uint32_t NumStrings) { 600b57cec5SDimitry Andric // This is a precomputed list of Buckets given the specified number of 610b57cec5SDimitry Andric // strings. Matching the reference algorithm exactly is not strictly 620b57cec5SDimitry Andric // necessary for correctness, but it helps when comparing LLD's PDBs with 630b57cec5SDimitry Andric // Microsoft's PDBs so as to eliminate superfluous differences. 640b57cec5SDimitry Andric // The reference implementation does (in nmt.h, NMT::grow()): 650b57cec5SDimitry Andric // unsigned StringCount = 0; 660b57cec5SDimitry Andric // unsigned BucketCount = 1; 670b57cec5SDimitry Andric // fn insert() { 680b57cec5SDimitry Andric // ++StringCount; 690b57cec5SDimitry Andric // if (BucketCount * 3 / 4 < StringCount) 700b57cec5SDimitry Andric // BucketCount = BucketCount * 3 / 2 + 1; 710b57cec5SDimitry Andric // } 720b57cec5SDimitry Andric // This list contains all StringCount, BucketCount pairs where BucketCount was 730b57cec5SDimitry Andric // just incremented. It ends before the first BucketCount entry where 740b57cec5SDimitry Andric // BucketCount * 3 would overflow a 32-bit unsigned int. 7581ad6265SDimitry Andric static const std::pair<uint32_t, uint32_t> StringsToBuckets[] = { 760b57cec5SDimitry Andric {0, 1}, 770b57cec5SDimitry Andric {1, 2}, 780b57cec5SDimitry Andric {2, 4}, 790b57cec5SDimitry Andric {4, 7}, 800b57cec5SDimitry Andric {6, 11}, 810b57cec5SDimitry Andric {9, 17}, 820b57cec5SDimitry Andric {13, 26}, 830b57cec5SDimitry Andric {20, 40}, 840b57cec5SDimitry Andric {31, 61}, 850b57cec5SDimitry Andric {46, 92}, 860b57cec5SDimitry Andric {70, 139}, 870b57cec5SDimitry Andric {105, 209}, 880b57cec5SDimitry Andric {157, 314}, 890b57cec5SDimitry Andric {236, 472}, 900b57cec5SDimitry Andric {355, 709}, 910b57cec5SDimitry Andric {532, 1064}, 920b57cec5SDimitry Andric {799, 1597}, 930b57cec5SDimitry Andric {1198, 2396}, 940b57cec5SDimitry Andric {1798, 3595}, 950b57cec5SDimitry Andric {2697, 5393}, 960b57cec5SDimitry Andric {4045, 8090}, 970b57cec5SDimitry Andric {6068, 12136}, 980b57cec5SDimitry Andric {9103, 18205}, 990b57cec5SDimitry Andric {13654, 27308}, 1000b57cec5SDimitry Andric {20482, 40963}, 1010b57cec5SDimitry Andric {30723, 61445}, 1020b57cec5SDimitry Andric {46084, 92168}, 1030b57cec5SDimitry Andric {69127, 138253}, 1040b57cec5SDimitry Andric {103690, 207380}, 1050b57cec5SDimitry Andric {155536, 311071}, 1060b57cec5SDimitry Andric {233304, 466607}, 1070b57cec5SDimitry Andric {349956, 699911}, 1080b57cec5SDimitry Andric {524934, 1049867}, 1090b57cec5SDimitry Andric {787401, 1574801}, 1100b57cec5SDimitry Andric {1181101, 2362202}, 1110b57cec5SDimitry Andric {1771652, 3543304}, 1120b57cec5SDimitry Andric {2657479, 5314957}, 1130b57cec5SDimitry Andric {3986218, 7972436}, 1140b57cec5SDimitry Andric {5979328, 11958655}, 1150b57cec5SDimitry Andric {8968992, 17937983}, 1160b57cec5SDimitry Andric {13453488, 26906975}, 1170b57cec5SDimitry Andric {20180232, 40360463}, 1180b57cec5SDimitry Andric {30270348, 60540695}, 1190b57cec5SDimitry Andric {45405522, 90811043}, 1200b57cec5SDimitry Andric {68108283, 136216565}, 1210b57cec5SDimitry Andric {102162424, 204324848}, 1220b57cec5SDimitry Andric {153243637, 306487273}, 1230b57cec5SDimitry Andric {229865455, 459730910}, 1240b57cec5SDimitry Andric {344798183, 689596366}, 1250b57cec5SDimitry Andric {517197275, 1034394550}, 1260b57cec5SDimitry Andric {775795913, 1551591826}, 1270b57cec5SDimitry Andric {1163693870, 2327387740}}; 12881ad6265SDimitry Andric const auto *Entry = llvm::lower_bound( 12981ad6265SDimitry Andric StringsToBuckets, std::make_pair(NumStrings, 0U), llvm::less_first()); 13081ad6265SDimitry Andric assert(Entry != std::end(StringsToBuckets)); 1310b57cec5SDimitry Andric return Entry->second; 1320b57cec5SDimitry Andric } 1330b57cec5SDimitry Andric 1340b57cec5SDimitry Andric uint32_t PDBStringTableBuilder::calculateHashTableSize() const { 1350b57cec5SDimitry Andric uint32_t Size = sizeof(uint32_t); // Hash table begins with 4-byte size field. 1360b57cec5SDimitry Andric Size += sizeof(uint32_t) * computeBucketCount(Strings.size()); 1370b57cec5SDimitry Andric 1380b57cec5SDimitry Andric return Size; 1390b57cec5SDimitry Andric } 1400b57cec5SDimitry Andric 1410b57cec5SDimitry Andric uint32_t PDBStringTableBuilder::calculateSerializedSize() const { 1420b57cec5SDimitry Andric uint32_t Size = 0; 1430b57cec5SDimitry Andric Size += sizeof(PDBStringTableHeader); 1440b57cec5SDimitry Andric Size += Strings.calculateSerializedSize(); 1450b57cec5SDimitry Andric Size += calculateHashTableSize(); 1460b57cec5SDimitry Andric Size += sizeof(uint32_t); // The /names stream ends with the string count. 1470b57cec5SDimitry Andric return Size; 1480b57cec5SDimitry Andric } 1490b57cec5SDimitry Andric 1500b57cec5SDimitry Andric void PDBStringTableBuilder::setStrings( 1510b57cec5SDimitry Andric const codeview::DebugStringTableSubsection &Strings) { 1520b57cec5SDimitry Andric this->Strings = Strings; 1530b57cec5SDimitry Andric } 1540b57cec5SDimitry Andric 1550b57cec5SDimitry Andric Error PDBStringTableBuilder::writeHeader(BinaryStreamWriter &Writer) const { 1560b57cec5SDimitry Andric // Write a header 1570b57cec5SDimitry Andric PDBStringTableHeader H; 1580b57cec5SDimitry Andric H.Signature = PDBStringTableSignature; 1590b57cec5SDimitry Andric H.HashVersion = 1; 1600b57cec5SDimitry Andric H.ByteSize = Strings.calculateSerializedSize(); 1610b57cec5SDimitry Andric if (auto EC = Writer.writeObject(H)) 1620b57cec5SDimitry Andric return EC; 1630b57cec5SDimitry Andric assert(Writer.bytesRemaining() == 0); 1640b57cec5SDimitry Andric return Error::success(); 1650b57cec5SDimitry Andric } 1660b57cec5SDimitry Andric 1670b57cec5SDimitry Andric Error PDBStringTableBuilder::writeStrings(BinaryStreamWriter &Writer) const { 1680b57cec5SDimitry Andric if (auto EC = Strings.commit(Writer)) 1690b57cec5SDimitry Andric return EC; 1700b57cec5SDimitry Andric 1710b57cec5SDimitry Andric assert(Writer.bytesRemaining() == 0); 1720b57cec5SDimitry Andric return Error::success(); 1730b57cec5SDimitry Andric } 1740b57cec5SDimitry Andric 1750b57cec5SDimitry Andric Error PDBStringTableBuilder::writeHashTable(BinaryStreamWriter &Writer) const { 1760b57cec5SDimitry Andric // Write a hash table. 1770b57cec5SDimitry Andric uint32_t BucketCount = computeBucketCount(Strings.size()); 1780b57cec5SDimitry Andric if (auto EC = Writer.writeInteger(BucketCount)) 1790b57cec5SDimitry Andric return EC; 1800b57cec5SDimitry Andric std::vector<ulittle32_t> Buckets(BucketCount); 1810b57cec5SDimitry Andric 182bdd1243dSDimitry Andric for (const auto &Pair : Strings) { 1830b57cec5SDimitry Andric StringRef S = Pair.getKey(); 1840b57cec5SDimitry Andric uint32_t Offset = Pair.getValue(); 1850b57cec5SDimitry Andric uint32_t Hash = hashStringV1(S); 1860b57cec5SDimitry Andric 1870b57cec5SDimitry Andric for (uint32_t I = 0; I != BucketCount; ++I) { 1880b57cec5SDimitry Andric uint32_t Slot = (Hash + I) % BucketCount; 1890b57cec5SDimitry Andric if (Buckets[Slot] != 0) 1900b57cec5SDimitry Andric continue; 1910b57cec5SDimitry Andric Buckets[Slot] = Offset; 1920b57cec5SDimitry Andric break; 1930b57cec5SDimitry Andric } 1940b57cec5SDimitry Andric } 1950b57cec5SDimitry Andric 1960b57cec5SDimitry Andric if (auto EC = Writer.writeArray(ArrayRef<ulittle32_t>(Buckets))) 1970b57cec5SDimitry Andric return EC; 1980b57cec5SDimitry Andric 1990b57cec5SDimitry Andric assert(Writer.bytesRemaining() == 0); 2000b57cec5SDimitry Andric return Error::success(); 2010b57cec5SDimitry Andric } 2020b57cec5SDimitry Andric 2030b57cec5SDimitry Andric Error PDBStringTableBuilder::writeEpilogue(BinaryStreamWriter &Writer) const { 2040b57cec5SDimitry Andric if (auto EC = Writer.writeInteger<uint32_t>(Strings.size())) 2050b57cec5SDimitry Andric return EC; 2060b57cec5SDimitry Andric assert(Writer.bytesRemaining() == 0); 2070b57cec5SDimitry Andric return Error::success(); 2080b57cec5SDimitry Andric } 2090b57cec5SDimitry Andric 2100b57cec5SDimitry Andric Error PDBStringTableBuilder::commit(BinaryStreamWriter &Writer) const { 211*5f757f3fSDimitry Andric llvm::TimeTraceScope timeScope("Commit strings table"); 2120b57cec5SDimitry Andric BinaryStreamWriter SectionWriter; 2130b57cec5SDimitry Andric 2140b57cec5SDimitry Andric std::tie(SectionWriter, Writer) = Writer.split(sizeof(PDBStringTableHeader)); 2150b57cec5SDimitry Andric if (auto EC = writeHeader(SectionWriter)) 2160b57cec5SDimitry Andric return EC; 2170b57cec5SDimitry Andric 2180b57cec5SDimitry Andric std::tie(SectionWriter, Writer) = 2190b57cec5SDimitry Andric Writer.split(Strings.calculateSerializedSize()); 2200b57cec5SDimitry Andric if (auto EC = writeStrings(SectionWriter)) 2210b57cec5SDimitry Andric return EC; 2220b57cec5SDimitry Andric 2230b57cec5SDimitry Andric std::tie(SectionWriter, Writer) = Writer.split(calculateHashTableSize()); 2240b57cec5SDimitry Andric if (auto EC = writeHashTable(SectionWriter)) 2250b57cec5SDimitry Andric return EC; 2260b57cec5SDimitry Andric 2270b57cec5SDimitry Andric std::tie(SectionWriter, Writer) = Writer.split(sizeof(uint32_t)); 2280b57cec5SDimitry Andric if (auto EC = writeEpilogue(SectionWriter)) 2290b57cec5SDimitry Andric return EC; 2300b57cec5SDimitry Andric 2310b57cec5SDimitry Andric return Error::success(); 2320b57cec5SDimitry Andric } 233