1 //===- DbiStreamBuilder.cpp - PDB Dbi Stream Creation -----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h" 10 11 #include "llvm/ADT/DenseSet.h" 12 #include "llvm/DebugInfo/CodeView/RecordName.h" 13 #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h" 14 #include "llvm/DebugInfo/CodeView/SymbolRecord.h" 15 #include "llvm/DebugInfo/CodeView/SymbolSerializer.h" 16 #include "llvm/DebugInfo/MSF/MSFBuilder.h" 17 #include "llvm/DebugInfo/MSF/MSFCommon.h" 18 #include "llvm/DebugInfo/MSF/MappedBlockStream.h" 19 #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h" 20 #include "llvm/DebugInfo/PDB/Native/Hash.h" 21 #include "llvm/Support/BinaryItemStream.h" 22 #include "llvm/Support/BinaryStreamWriter.h" 23 #include "llvm/Support/xxhash.h" 24 #include <algorithm> 25 #include <vector> 26 27 using namespace llvm; 28 using namespace llvm::msf; 29 using namespace llvm::pdb; 30 using namespace llvm::codeview; 31 32 struct llvm::pdb::GSIHashStreamBuilder { 33 struct SymbolDenseMapInfo { 34 static inline CVSymbol getEmptyKey() { 35 static CVSymbol Empty; 36 return Empty; 37 } 38 static inline CVSymbol getTombstoneKey() { 39 static CVSymbol Tombstone( 40 DenseMapInfo<ArrayRef<uint8_t>>::getTombstoneKey()); 41 return Tombstone; 42 } 43 static unsigned getHashValue(const CVSymbol &Val) { 44 return xxHash64(Val.RecordData); 45 } 46 static bool isEqual(const CVSymbol &LHS, const CVSymbol &RHS) { 47 return LHS.RecordData == RHS.RecordData; 48 } 49 }; 50 51 std::vector<CVSymbol> Records; 52 uint32_t StreamIndex; 53 llvm::DenseSet<CVSymbol, SymbolDenseMapInfo> SymbolHashes; 54 std::vector<PSHashRecord> HashRecords; 55 std::array<support::ulittle32_t, (IPHR_HASH + 32) / 32> HashBitmap; 56 std::vector<support::ulittle32_t> HashBuckets; 57 58 uint32_t calculateSerializedLength() const; 59 uint32_t calculateRecordByteSize() const; 60 Error commit(BinaryStreamWriter &Writer); 61 void finalizeBuckets(uint32_t RecordZeroOffset); 62 63 template <typename T> void addSymbol(const T &Symbol, MSFBuilder &Msf) { 64 T Copy(Symbol); 65 addSymbol(SymbolSerializer::writeOneSymbol(Copy, Msf.getAllocator(), 66 CodeViewContainer::Pdb)); 67 } 68 void addSymbol(const CVSymbol &Symbol) { 69 if (Symbol.kind() == S_UDT || Symbol.kind() == S_CONSTANT) { 70 auto Iter = SymbolHashes.insert(Symbol); 71 if (!Iter.second) 72 return; 73 } 74 75 Records.push_back(Symbol); 76 } 77 }; 78 79 uint32_t GSIHashStreamBuilder::calculateSerializedLength() const { 80 uint32_t Size = sizeof(GSIHashHeader); 81 Size += HashRecords.size() * sizeof(PSHashRecord); 82 Size += HashBitmap.size() * sizeof(uint32_t); 83 Size += HashBuckets.size() * sizeof(uint32_t); 84 return Size; 85 } 86 87 uint32_t GSIHashStreamBuilder::calculateRecordByteSize() const { 88 uint32_t Size = 0; 89 for (const auto &Sym : Records) 90 Size += Sym.length(); 91 return Size; 92 } 93 94 Error GSIHashStreamBuilder::commit(BinaryStreamWriter &Writer) { 95 GSIHashHeader Header; 96 Header.VerSignature = GSIHashHeader::HdrSignature; 97 Header.VerHdr = GSIHashHeader::HdrVersion; 98 Header.HrSize = HashRecords.size() * sizeof(PSHashRecord); 99 Header.NumBuckets = HashBitmap.size() * 4 + HashBuckets.size() * 4; 100 101 if (auto EC = Writer.writeObject(Header)) 102 return EC; 103 104 if (auto EC = Writer.writeArray(makeArrayRef(HashRecords))) 105 return EC; 106 if (auto EC = Writer.writeArray(makeArrayRef(HashBitmap))) 107 return EC; 108 if (auto EC = Writer.writeArray(makeArrayRef(HashBuckets))) 109 return EC; 110 return Error::success(); 111 } 112 113 static bool isAsciiString(StringRef S) { 114 return llvm::all_of(S, [](char C) { return unsigned(C) < 0x80; }); 115 } 116 117 // See `caseInsensitiveComparePchPchCchCch` in gsi.cpp 118 static bool gsiRecordLess(StringRef S1, StringRef S2) { 119 size_t LS = S1.size(); 120 size_t RS = S2.size(); 121 // Shorter strings always compare less than longer strings. 122 if (LS != RS) 123 return LS < RS; 124 125 // If either string contains non ascii characters, memcmp them. 126 if (LLVM_UNLIKELY(!isAsciiString(S1) || !isAsciiString(S2))) 127 return memcmp(S1.data(), S2.data(), LS) < 0; 128 129 // Both strings are ascii, perform a case-insenstive comparison. 130 return S1.compare_lower(S2.data()) < 0; 131 } 132 133 void GSIHashStreamBuilder::finalizeBuckets(uint32_t RecordZeroOffset) { 134 std::array<std::vector<std::pair<StringRef, PSHashRecord>>, IPHR_HASH + 1> 135 TmpBuckets; 136 uint32_t SymOffset = RecordZeroOffset; 137 for (const CVSymbol &Sym : Records) { 138 PSHashRecord HR; 139 // Add one when writing symbol offsets to disk. See GSI1::fixSymRecs. 140 HR.Off = SymOffset + 1; 141 HR.CRef = 1; // Always use a refcount of 1. 142 143 // Hash the name to figure out which bucket this goes into. 144 StringRef Name = getSymbolName(Sym); 145 size_t BucketIdx = hashStringV1(Name) % IPHR_HASH; 146 TmpBuckets[BucketIdx].push_back(std::make_pair(Name, HR)); 147 SymOffset += Sym.length(); 148 } 149 150 // Compute the three tables: the hash records in bucket and chain order, the 151 // bucket presence bitmap, and the bucket chain start offsets. 152 HashRecords.reserve(Records.size()); 153 for (ulittle32_t &Word : HashBitmap) 154 Word = 0; 155 for (size_t BucketIdx = 0; BucketIdx < IPHR_HASH + 1; ++BucketIdx) { 156 auto &Bucket = TmpBuckets[BucketIdx]; 157 if (Bucket.empty()) 158 continue; 159 HashBitmap[BucketIdx / 32] |= 1U << (BucketIdx % 32); 160 161 // Calculate what the offset of the first hash record in the chain would 162 // be if it were inflated to contain 32-bit pointers. On a 32-bit system, 163 // each record would be 12 bytes. See HROffsetCalc in gsi.h. 164 const int SizeOfHROffsetCalc = 12; 165 ulittle32_t ChainStartOff = 166 ulittle32_t(HashRecords.size() * SizeOfHROffsetCalc); 167 HashBuckets.push_back(ChainStartOff); 168 169 // Sort each bucket by memcmp of the symbol's name. It's important that 170 // we use the same sorting algorithm as is used by the reference 171 // implementation to ensure that the search for a record within a bucket 172 // can properly early-out when it detects the record won't be found. The 173 // algorithm used here corredsponds to the function 174 // caseInsensitiveComparePchPchCchCch in the reference implementation. 175 llvm::sort(Bucket, [](const std::pair<StringRef, PSHashRecord> &Left, 176 const std::pair<StringRef, PSHashRecord> &Right) { 177 return gsiRecordLess(Left.first, Right.first); 178 }); 179 180 for (const auto &Entry : Bucket) 181 HashRecords.push_back(Entry.second); 182 } 183 } 184 185 GSIStreamBuilder::GSIStreamBuilder(msf::MSFBuilder &Msf) 186 : Msf(Msf), PSH(std::make_unique<GSIHashStreamBuilder>()), 187 GSH(std::make_unique<GSIHashStreamBuilder>()) {} 188 189 GSIStreamBuilder::~GSIStreamBuilder() {} 190 191 uint32_t GSIStreamBuilder::calculatePublicsHashStreamSize() const { 192 uint32_t Size = 0; 193 Size += sizeof(PublicsStreamHeader); 194 Size += PSH->calculateSerializedLength(); 195 Size += PSH->Records.size() * sizeof(uint32_t); // AddrMap 196 // FIXME: Add thunk map and section offsets for incremental linking. 197 198 return Size; 199 } 200 201 uint32_t GSIStreamBuilder::calculateGlobalsHashStreamSize() const { 202 return GSH->calculateSerializedLength(); 203 } 204 205 Error GSIStreamBuilder::finalizeMsfLayout() { 206 // First we write public symbol records, then we write global symbol records. 207 uint32_t PSHZero = 0; 208 uint32_t GSHZero = PSH->calculateRecordByteSize(); 209 210 PSH->finalizeBuckets(PSHZero); 211 GSH->finalizeBuckets(GSHZero); 212 213 Expected<uint32_t> Idx = Msf.addStream(calculateGlobalsHashStreamSize()); 214 if (!Idx) 215 return Idx.takeError(); 216 GSH->StreamIndex = *Idx; 217 Idx = Msf.addStream(calculatePublicsHashStreamSize()); 218 if (!Idx) 219 return Idx.takeError(); 220 PSH->StreamIndex = *Idx; 221 222 uint32_t RecordBytes = 223 GSH->calculateRecordByteSize() + PSH->calculateRecordByteSize(); 224 225 Idx = Msf.addStream(RecordBytes); 226 if (!Idx) 227 return Idx.takeError(); 228 RecordStreamIdx = *Idx; 229 return Error::success(); 230 } 231 232 static bool comparePubSymByAddrAndName( 233 const std::pair<const CVSymbol *, const PublicSym32 *> &LS, 234 const std::pair<const CVSymbol *, const PublicSym32 *> &RS) { 235 if (LS.second->Segment != RS.second->Segment) 236 return LS.second->Segment < RS.second->Segment; 237 if (LS.second->Offset != RS.second->Offset) 238 return LS.second->Offset < RS.second->Offset; 239 240 return LS.second->Name < RS.second->Name; 241 } 242 243 /// Compute the address map. The address map is an array of symbol offsets 244 /// sorted so that it can be binary searched by address. 245 static std::vector<ulittle32_t> computeAddrMap(ArrayRef<CVSymbol> Records) { 246 // Make a vector of pointers to the symbols so we can sort it by address. 247 // Also gather the symbol offsets while we're at it. 248 249 std::vector<PublicSym32> DeserializedPublics; 250 std::vector<std::pair<const CVSymbol *, const PublicSym32 *>> PublicsByAddr; 251 std::vector<uint32_t> SymOffsets; 252 DeserializedPublics.reserve(Records.size()); 253 PublicsByAddr.reserve(Records.size()); 254 SymOffsets.reserve(Records.size()); 255 256 uint32_t SymOffset = 0; 257 for (const CVSymbol &Sym : Records) { 258 assert(Sym.kind() == SymbolKind::S_PUB32); 259 DeserializedPublics.push_back( 260 cantFail(SymbolDeserializer::deserializeAs<PublicSym32>(Sym))); 261 PublicsByAddr.emplace_back(&Sym, &DeserializedPublics.back()); 262 SymOffsets.push_back(SymOffset); 263 SymOffset += Sym.length(); 264 } 265 llvm::stable_sort(PublicsByAddr, comparePubSymByAddrAndName); 266 267 // Fill in the symbol offsets in the appropriate order. 268 std::vector<ulittle32_t> AddrMap; 269 AddrMap.reserve(Records.size()); 270 for (auto &Sym : PublicsByAddr) { 271 ptrdiff_t Idx = std::distance(Records.data(), Sym.first); 272 assert(Idx >= 0 && size_t(Idx) < Records.size()); 273 AddrMap.push_back(ulittle32_t(SymOffsets[Idx])); 274 } 275 return AddrMap; 276 } 277 278 uint32_t GSIStreamBuilder::getPublicsStreamIndex() const { 279 return PSH->StreamIndex; 280 } 281 282 uint32_t GSIStreamBuilder::getGlobalsStreamIndex() const { 283 return GSH->StreamIndex; 284 } 285 286 void GSIStreamBuilder::addPublicSymbol(const PublicSym32 &Pub) { 287 PSH->addSymbol(Pub, Msf); 288 } 289 290 void GSIStreamBuilder::addGlobalSymbol(const ProcRefSym &Sym) { 291 GSH->addSymbol(Sym, Msf); 292 } 293 294 void GSIStreamBuilder::addGlobalSymbol(const DataSym &Sym) { 295 GSH->addSymbol(Sym, Msf); 296 } 297 298 void GSIStreamBuilder::addGlobalSymbol(const ConstantSym &Sym) { 299 GSH->addSymbol(Sym, Msf); 300 } 301 302 void GSIStreamBuilder::addGlobalSymbol(const codeview::CVSymbol &Sym) { 303 GSH->addSymbol(Sym); 304 } 305 306 static Error writeRecords(BinaryStreamWriter &Writer, 307 ArrayRef<CVSymbol> Records) { 308 BinaryItemStream<CVSymbol> ItemStream(support::endianness::little); 309 ItemStream.setItems(Records); 310 BinaryStreamRef RecordsRef(ItemStream); 311 return Writer.writeStreamRef(RecordsRef); 312 } 313 314 Error GSIStreamBuilder::commitSymbolRecordStream( 315 WritableBinaryStreamRef Stream) { 316 BinaryStreamWriter Writer(Stream); 317 318 // Write public symbol records first, followed by global symbol records. This 319 // must match the order that we assume in finalizeMsfLayout when computing 320 // PSHZero and GSHZero. 321 if (auto EC = writeRecords(Writer, PSH->Records)) 322 return EC; 323 if (auto EC = writeRecords(Writer, GSH->Records)) 324 return EC; 325 326 return Error::success(); 327 } 328 329 Error GSIStreamBuilder::commitPublicsHashStream( 330 WritableBinaryStreamRef Stream) { 331 BinaryStreamWriter Writer(Stream); 332 PublicsStreamHeader Header; 333 334 // FIXME: Fill these in. They are for incremental linking. 335 Header.SymHash = PSH->calculateSerializedLength(); 336 Header.AddrMap = PSH->Records.size() * 4; 337 Header.NumThunks = 0; 338 Header.SizeOfThunk = 0; 339 Header.ISectThunkTable = 0; 340 memset(Header.Padding, 0, sizeof(Header.Padding)); 341 Header.OffThunkTable = 0; 342 Header.NumSections = 0; 343 if (auto EC = Writer.writeObject(Header)) 344 return EC; 345 346 if (auto EC = PSH->commit(Writer)) 347 return EC; 348 349 std::vector<ulittle32_t> AddrMap = computeAddrMap(PSH->Records); 350 if (auto EC = Writer.writeArray(makeArrayRef(AddrMap))) 351 return EC; 352 353 return Error::success(); 354 } 355 356 Error GSIStreamBuilder::commitGlobalsHashStream( 357 WritableBinaryStreamRef Stream) { 358 BinaryStreamWriter Writer(Stream); 359 return GSH->commit(Writer); 360 } 361 362 Error GSIStreamBuilder::commit(const msf::MSFLayout &Layout, 363 WritableBinaryStreamRef Buffer) { 364 auto GS = WritableMappedBlockStream::createIndexedStream( 365 Layout, Buffer, getGlobalsStreamIndex(), Msf.getAllocator()); 366 auto PS = WritableMappedBlockStream::createIndexedStream( 367 Layout, Buffer, getPublicsStreamIndex(), Msf.getAllocator()); 368 auto PRS = WritableMappedBlockStream::createIndexedStream( 369 Layout, Buffer, getRecordStreamIdx(), Msf.getAllocator()); 370 371 if (auto EC = commitSymbolRecordStream(*PRS)) 372 return EC; 373 if (auto EC = commitGlobalsHashStream(*GS)) 374 return EC; 375 if (auto EC = commitPublicsHashStream(*PS)) 376 return EC; 377 return Error::success(); 378 } 379