10b57cec5SDimitry Andric //===- DbiStreamBuilder.cpp - PDB Dbi Stream Creation -----------*- C++ -*-===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 85ffd83dbSDimitry Andric // 95ffd83dbSDimitry Andric // The data structures defined in this file are based on the reference 105ffd83dbSDimitry Andric // implementation which is available at 115ffd83dbSDimitry Andric // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/dbi/gsi.cpp 125ffd83dbSDimitry Andric // 135ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 140b57cec5SDimitry Andric 150b57cec5SDimitry Andric #include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h" 160b57cec5SDimitry Andric #include "llvm/DebugInfo/CodeView/RecordName.h" 1781ad6265SDimitry Andric #include "llvm/DebugInfo/CodeView/RecordSerialization.h" 180b57cec5SDimitry Andric #include "llvm/DebugInfo/CodeView/SymbolRecord.h" 190b57cec5SDimitry Andric #include "llvm/DebugInfo/CodeView/SymbolSerializer.h" 200b57cec5SDimitry Andric #include "llvm/DebugInfo/MSF/MSFBuilder.h" 210b57cec5SDimitry Andric #include "llvm/DebugInfo/MSF/MSFCommon.h" 220b57cec5SDimitry Andric #include "llvm/DebugInfo/MSF/MappedBlockStream.h" 230b57cec5SDimitry Andric #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h" 240b57cec5SDimitry Andric #include "llvm/DebugInfo/PDB/Native/Hash.h" 2581ad6265SDimitry Andric #include "llvm/DebugInfo/PDB/Native/RawTypes.h" 260b57cec5SDimitry Andric #include "llvm/Support/BinaryItemStream.h" 270b57cec5SDimitry Andric #include "llvm/Support/BinaryStreamWriter.h" 285ffd83dbSDimitry Andric #include "llvm/Support/Parallel.h" 29*5f757f3fSDimitry Andric #include "llvm/Support/TimeProfiler.h" 300b57cec5SDimitry Andric #include "llvm/Support/xxhash.h" 310b57cec5SDimitry Andric #include <algorithm> 320b57cec5SDimitry Andric #include <vector> 330b57cec5SDimitry Andric 340b57cec5SDimitry Andric using namespace llvm; 350b57cec5SDimitry Andric using namespace llvm::msf; 360b57cec5SDimitry Andric using namespace llvm::pdb; 370b57cec5SDimitry Andric using namespace llvm::codeview; 380b57cec5SDimitry Andric 395ffd83dbSDimitry Andric // Helper class for building the public and global PDB hash table buckets. 400b57cec5SDimitry Andric struct llvm::pdb::GSIHashStreamBuilder { 415ffd83dbSDimitry Andric // Sum of the size of all public or global records. 425ffd83dbSDimitry Andric uint32_t RecordByteSize = 0; 435ffd83dbSDimitry Andric 445ffd83dbSDimitry Andric std::vector<PSHashRecord> HashRecords; 455ffd83dbSDimitry Andric 465ffd83dbSDimitry Andric // The hash bitmap has `ceil((IPHR_HASH + 1) / 32)` words in it. The 475ffd83dbSDimitry Andric // reference implementation builds a hash table with IPHR_HASH buckets in it. 485ffd83dbSDimitry Andric // The last bucket is used to link together free hash table cells in a linked 495ffd83dbSDimitry Andric // list, but it is always empty in the compressed, on-disk format. However, 505ffd83dbSDimitry Andric // the bitmap must have a bit for it. 515ffd83dbSDimitry Andric std::array<support::ulittle32_t, (IPHR_HASH + 32) / 32> HashBitmap; 525ffd83dbSDimitry Andric 535ffd83dbSDimitry Andric std::vector<support::ulittle32_t> HashBuckets; 545ffd83dbSDimitry Andric 555ffd83dbSDimitry Andric uint32_t calculateSerializedLength() const; 565ffd83dbSDimitry Andric Error commit(BinaryStreamWriter &Writer); 575ffd83dbSDimitry Andric 585ffd83dbSDimitry Andric void finalizePublicBuckets(); 595ffd83dbSDimitry Andric void finalizeGlobalBuckets(uint32_t RecordZeroOffset); 605ffd83dbSDimitry Andric 615ffd83dbSDimitry Andric // Assign public and global symbol records into hash table buckets. 625ffd83dbSDimitry Andric // Modifies the list of records to store the bucket index, but does not 635ffd83dbSDimitry Andric // change the order. 645ffd83dbSDimitry Andric void finalizeBuckets(uint32_t RecordZeroOffset, 655ffd83dbSDimitry Andric MutableArrayRef<BulkPublic> Globals); 665ffd83dbSDimitry Andric }; 675ffd83dbSDimitry Andric 685ffd83dbSDimitry Andric // DenseMapInfo implementation for deduplicating symbol records. 695ffd83dbSDimitry Andric struct llvm::pdb::SymbolDenseMapInfo { 700b57cec5SDimitry Andric static inline CVSymbol getEmptyKey() { 710b57cec5SDimitry Andric static CVSymbol Empty; 720b57cec5SDimitry Andric return Empty; 730b57cec5SDimitry Andric } 740b57cec5SDimitry Andric static inline CVSymbol getTombstoneKey() { 750b57cec5SDimitry Andric static CVSymbol Tombstone( 760b57cec5SDimitry Andric DenseMapInfo<ArrayRef<uint8_t>>::getTombstoneKey()); 770b57cec5SDimitry Andric return Tombstone; 780b57cec5SDimitry Andric } 790b57cec5SDimitry Andric static unsigned getHashValue(const CVSymbol &Val) { 8006c3fb27SDimitry Andric return xxh3_64bits(Val.RecordData); 810b57cec5SDimitry Andric } 820b57cec5SDimitry Andric static bool isEqual(const CVSymbol &LHS, const CVSymbol &RHS) { 830b57cec5SDimitry Andric return LHS.RecordData == RHS.RecordData; 840b57cec5SDimitry Andric } 850b57cec5SDimitry Andric }; 860b57cec5SDimitry Andric 875ffd83dbSDimitry Andric namespace { 885ffd83dbSDimitry Andric LLVM_PACKED_START 895ffd83dbSDimitry Andric struct PublicSym32Layout { 905ffd83dbSDimitry Andric RecordPrefix Prefix; 915ffd83dbSDimitry Andric PublicSym32Header Pub; 925ffd83dbSDimitry Andric // char Name[]; 930b57cec5SDimitry Andric }; 945ffd83dbSDimitry Andric LLVM_PACKED_END 955ffd83dbSDimitry Andric } // namespace 965ffd83dbSDimitry Andric 975ffd83dbSDimitry Andric // Calculate how much memory this public needs when serialized. 985ffd83dbSDimitry Andric static uint32_t sizeOfPublic(const BulkPublic &Pub) { 995ffd83dbSDimitry Andric uint32_t NameLen = Pub.NameLen; 1005ffd83dbSDimitry Andric NameLen = std::min(NameLen, 1015ffd83dbSDimitry Andric uint32_t(MaxRecordLength - sizeof(PublicSym32Layout) - 1)); 1025ffd83dbSDimitry Andric return alignTo(sizeof(PublicSym32Layout) + NameLen + 1, 4); 1035ffd83dbSDimitry Andric } 1045ffd83dbSDimitry Andric 1055ffd83dbSDimitry Andric static CVSymbol serializePublic(uint8_t *Mem, const BulkPublic &Pub) { 1065ffd83dbSDimitry Andric // Assume the caller has allocated sizeOfPublic bytes. 1075ffd83dbSDimitry Andric uint32_t NameLen = std::min( 1085ffd83dbSDimitry Andric Pub.NameLen, uint32_t(MaxRecordLength - sizeof(PublicSym32Layout) - 1)); 1095ffd83dbSDimitry Andric size_t Size = alignTo(sizeof(PublicSym32Layout) + NameLen + 1, 4); 1105ffd83dbSDimitry Andric assert(Size == sizeOfPublic(Pub)); 1115ffd83dbSDimitry Andric auto *FixedMem = reinterpret_cast<PublicSym32Layout *>(Mem); 1125ffd83dbSDimitry Andric FixedMem->Prefix.RecordKind = static_cast<uint16_t>(codeview::S_PUB32); 1135ffd83dbSDimitry Andric FixedMem->Prefix.RecordLen = static_cast<uint16_t>(Size - 2); 1145ffd83dbSDimitry Andric FixedMem->Pub.Flags = Pub.Flags; 1155ffd83dbSDimitry Andric FixedMem->Pub.Offset = Pub.Offset; 1165ffd83dbSDimitry Andric FixedMem->Pub.Segment = Pub.Segment; 1175ffd83dbSDimitry Andric char *NameMem = reinterpret_cast<char *>(FixedMem + 1); 1185ffd83dbSDimitry Andric memcpy(NameMem, Pub.Name, NameLen); 1195ffd83dbSDimitry Andric // Zero the null terminator and remaining bytes. 1205ffd83dbSDimitry Andric memset(&NameMem[NameLen], 0, Size - sizeof(PublicSym32Layout) - NameLen); 121bdd1243dSDimitry Andric return CVSymbol(ArrayRef(reinterpret_cast<uint8_t *>(Mem), Size)); 1225ffd83dbSDimitry Andric } 1230b57cec5SDimitry Andric 1240b57cec5SDimitry Andric uint32_t GSIHashStreamBuilder::calculateSerializedLength() const { 1250b57cec5SDimitry Andric uint32_t Size = sizeof(GSIHashHeader); 1260b57cec5SDimitry Andric Size += HashRecords.size() * sizeof(PSHashRecord); 1270b57cec5SDimitry Andric Size += HashBitmap.size() * sizeof(uint32_t); 1280b57cec5SDimitry Andric Size += HashBuckets.size() * sizeof(uint32_t); 1290b57cec5SDimitry Andric return Size; 1300b57cec5SDimitry Andric } 1310b57cec5SDimitry Andric 1320b57cec5SDimitry Andric Error GSIHashStreamBuilder::commit(BinaryStreamWriter &Writer) { 1330b57cec5SDimitry Andric GSIHashHeader Header; 1340b57cec5SDimitry Andric Header.VerSignature = GSIHashHeader::HdrSignature; 1350b57cec5SDimitry Andric Header.VerHdr = GSIHashHeader::HdrVersion; 1360b57cec5SDimitry Andric Header.HrSize = HashRecords.size() * sizeof(PSHashRecord); 1370b57cec5SDimitry Andric Header.NumBuckets = HashBitmap.size() * 4 + HashBuckets.size() * 4; 1380b57cec5SDimitry Andric 1390b57cec5SDimitry Andric if (auto EC = Writer.writeObject(Header)) 1400b57cec5SDimitry Andric return EC; 1410b57cec5SDimitry Andric 142bdd1243dSDimitry Andric if (auto EC = Writer.writeArray(ArrayRef(HashRecords))) 1430b57cec5SDimitry Andric return EC; 144bdd1243dSDimitry Andric if (auto EC = Writer.writeArray(ArrayRef(HashBitmap))) 1450b57cec5SDimitry Andric return EC; 146bdd1243dSDimitry Andric if (auto EC = Writer.writeArray(ArrayRef(HashBuckets))) 1470b57cec5SDimitry Andric return EC; 1480b57cec5SDimitry Andric return Error::success(); 1490b57cec5SDimitry Andric } 1500b57cec5SDimitry Andric 1510b57cec5SDimitry Andric static bool isAsciiString(StringRef S) { 1520b57cec5SDimitry Andric return llvm::all_of(S, [](char C) { return unsigned(C) < 0x80; }); 1530b57cec5SDimitry Andric } 1540b57cec5SDimitry Andric 1550b57cec5SDimitry Andric // See `caseInsensitiveComparePchPchCchCch` in gsi.cpp 1565ffd83dbSDimitry Andric static int gsiRecordCmp(StringRef S1, StringRef S2) { 1570b57cec5SDimitry Andric size_t LS = S1.size(); 1580b57cec5SDimitry Andric size_t RS = S2.size(); 1590b57cec5SDimitry Andric // Shorter strings always compare less than longer strings. 1600b57cec5SDimitry Andric if (LS != RS) 161fe6060f1SDimitry Andric return (LS > RS) - (LS < RS); 1620b57cec5SDimitry Andric 1630b57cec5SDimitry Andric // If either string contains non ascii characters, memcmp them. 1640b57cec5SDimitry Andric if (LLVM_UNLIKELY(!isAsciiString(S1) || !isAsciiString(S2))) 1655ffd83dbSDimitry Andric return memcmp(S1.data(), S2.data(), LS); 1660b57cec5SDimitry Andric 167e8d8bef9SDimitry Andric // Both strings are ascii, perform a case-insensitive comparison. 168fe6060f1SDimitry Andric return S1.compare_insensitive(S2.data()); 1690b57cec5SDimitry Andric } 1700b57cec5SDimitry Andric 1715ffd83dbSDimitry Andric void GSIStreamBuilder::finalizePublicBuckets() { 1725ffd83dbSDimitry Andric PSH->finalizeBuckets(0, Publics); 1735ffd83dbSDimitry Andric } 1745ffd83dbSDimitry Andric 1755ffd83dbSDimitry Andric void GSIStreamBuilder::finalizeGlobalBuckets(uint32_t RecordZeroOffset) { 1765ffd83dbSDimitry Andric // Build up a list of globals to be bucketed. Use the BulkPublic data 1775ffd83dbSDimitry Andric // structure for this purpose, even though these are global records, not 1785ffd83dbSDimitry Andric // public records. Most of the same fields are required: 1795ffd83dbSDimitry Andric // - Name 1805ffd83dbSDimitry Andric // - NameLen 1815ffd83dbSDimitry Andric // - SymOffset 1825ffd83dbSDimitry Andric // - BucketIdx 1835ffd83dbSDimitry Andric // The dead fields are Offset, Segment, and Flags. 1845ffd83dbSDimitry Andric std::vector<BulkPublic> Records; 1855ffd83dbSDimitry Andric Records.resize(Globals.size()); 1860b57cec5SDimitry Andric uint32_t SymOffset = RecordZeroOffset; 1875ffd83dbSDimitry Andric for (size_t I = 0, E = Globals.size(); I < E; ++I) { 1885ffd83dbSDimitry Andric StringRef Name = getSymbolName(Globals[I]); 1895ffd83dbSDimitry Andric Records[I].Name = Name.data(); 1905ffd83dbSDimitry Andric Records[I].NameLen = Name.size(); 1915ffd83dbSDimitry Andric Records[I].SymOffset = SymOffset; 1925ffd83dbSDimitry Andric SymOffset += Globals[I].length(); 1930b57cec5SDimitry Andric } 1940b57cec5SDimitry Andric 1955ffd83dbSDimitry Andric GSH->finalizeBuckets(RecordZeroOffset, Records); 1965ffd83dbSDimitry Andric } 1975ffd83dbSDimitry Andric 1985ffd83dbSDimitry Andric void GSIHashStreamBuilder::finalizeBuckets( 1995ffd83dbSDimitry Andric uint32_t RecordZeroOffset, MutableArrayRef<BulkPublic> Records) { 2005ffd83dbSDimitry Andric // Hash every name in parallel. 20181ad6265SDimitry Andric parallelFor(0, Records.size(), [&](size_t I) { 2025ffd83dbSDimitry Andric Records[I].setBucketIdx(hashStringV1(Records[I].Name) % IPHR_HASH); 2035ffd83dbSDimitry Andric }); 2045ffd83dbSDimitry Andric 2055ffd83dbSDimitry Andric // Count up the size of each bucket. Then, use an exclusive prefix sum to 2065ffd83dbSDimitry Andric // calculate the bucket start offsets. This is C++17 std::exclusive_scan, but 2075ffd83dbSDimitry Andric // we can't use it yet. 2085ffd83dbSDimitry Andric uint32_t BucketStarts[IPHR_HASH] = {0}; 2095ffd83dbSDimitry Andric for (const BulkPublic &P : Records) 2105ffd83dbSDimitry Andric ++BucketStarts[P.BucketIdx]; 2115ffd83dbSDimitry Andric uint32_t Sum = 0; 2125ffd83dbSDimitry Andric for (uint32_t &B : BucketStarts) { 2135ffd83dbSDimitry Andric uint32_t Size = B; 2145ffd83dbSDimitry Andric B = Sum; 2155ffd83dbSDimitry Andric Sum += Size; 2165ffd83dbSDimitry Andric } 2175ffd83dbSDimitry Andric 2185ffd83dbSDimitry Andric // Place globals into the hash table in bucket order. When placing a global, 2195ffd83dbSDimitry Andric // update the bucket start. Every hash table slot should be filled. Always use 2205ffd83dbSDimitry Andric // a refcount of one for now. 2215ffd83dbSDimitry Andric HashRecords.resize(Records.size()); 2225ffd83dbSDimitry Andric uint32_t BucketCursors[IPHR_HASH]; 2235ffd83dbSDimitry Andric memcpy(BucketCursors, BucketStarts, sizeof(BucketCursors)); 2245ffd83dbSDimitry Andric for (int I = 0, E = Records.size(); I < E; ++I) { 2255ffd83dbSDimitry Andric uint32_t HashIdx = BucketCursors[Records[I].BucketIdx]++; 2265ffd83dbSDimitry Andric HashRecords[HashIdx].Off = I; 2275ffd83dbSDimitry Andric HashRecords[HashIdx].CRef = 1; 2285ffd83dbSDimitry Andric } 2295ffd83dbSDimitry Andric 2305ffd83dbSDimitry Andric // Within the buckets, sort each bucket by memcmp of the symbol's name. It's 2315ffd83dbSDimitry Andric // important that we use the same sorting algorithm as is used by the 2325ffd83dbSDimitry Andric // reference implementation to ensure that the search for a record within a 2335ffd83dbSDimitry Andric // bucket can properly early-out when it detects the record won't be found. 2345ffd83dbSDimitry Andric // The algorithm used here corresponds to the function 2355ffd83dbSDimitry Andric // caseInsensitiveComparePchPchCchCch in the reference implementation. 23681ad6265SDimitry Andric parallelFor(0, IPHR_HASH, [&](size_t I) { 2375ffd83dbSDimitry Andric auto B = HashRecords.begin() + BucketStarts[I]; 2385ffd83dbSDimitry Andric auto E = HashRecords.begin() + BucketCursors[I]; 2395ffd83dbSDimitry Andric if (B == E) 2405ffd83dbSDimitry Andric return; 2415ffd83dbSDimitry Andric auto BucketCmp = [Records](const PSHashRecord &LHash, 2425ffd83dbSDimitry Andric const PSHashRecord &RHash) { 2435ffd83dbSDimitry Andric const BulkPublic &L = Records[uint32_t(LHash.Off)]; 2445ffd83dbSDimitry Andric const BulkPublic &R = Records[uint32_t(RHash.Off)]; 2455ffd83dbSDimitry Andric assert(L.BucketIdx == R.BucketIdx); 2465ffd83dbSDimitry Andric int Cmp = gsiRecordCmp(L.getName(), R.getName()); 2475ffd83dbSDimitry Andric if (Cmp != 0) 2485ffd83dbSDimitry Andric return Cmp < 0; 2495ffd83dbSDimitry Andric // This comparison is necessary to make the sorting stable in the presence 2505ffd83dbSDimitry Andric // of two static globals with the same name. The easiest way to observe 2515ffd83dbSDimitry Andric // this is with S_LDATA32 records. 2525ffd83dbSDimitry Andric return L.SymOffset < R.SymOffset; 2535ffd83dbSDimitry Andric }; 2545ffd83dbSDimitry Andric llvm::sort(B, E, BucketCmp); 2555ffd83dbSDimitry Andric 2565ffd83dbSDimitry Andric // After we are done sorting, replace the global indices with the stream 2575ffd83dbSDimitry Andric // offsets of each global. Add one when writing symbol offsets to disk. 2585ffd83dbSDimitry Andric // See GSI1::fixSymRecs. 2595ffd83dbSDimitry Andric for (PSHashRecord &HRec : make_range(B, E)) 2605ffd83dbSDimitry Andric HRec.Off = Records[uint32_t(HRec.Off)].SymOffset + 1; 2615ffd83dbSDimitry Andric }); 2625ffd83dbSDimitry Andric 2635ffd83dbSDimitry Andric // For each non-empty bucket, push the bucket start offset into HashBuckets 2645ffd83dbSDimitry Andric // and set a bit in the hash bitmap. 2655ffd83dbSDimitry Andric for (uint32_t I = 0; I < HashBitmap.size(); ++I) { 2665ffd83dbSDimitry Andric uint32_t Word = 0; 2675ffd83dbSDimitry Andric for (uint32_t J = 0; J < 32; ++J) { 2685ffd83dbSDimitry Andric // Skip empty buckets. 2695ffd83dbSDimitry Andric uint32_t BucketIdx = I * 32 + J; 2705ffd83dbSDimitry Andric if (BucketIdx >= IPHR_HASH || 2715ffd83dbSDimitry Andric BucketStarts[BucketIdx] == BucketCursors[BucketIdx]) 2720b57cec5SDimitry Andric continue; 2735ffd83dbSDimitry Andric Word |= (1U << J); 2740b57cec5SDimitry Andric 2750b57cec5SDimitry Andric // Calculate what the offset of the first hash record in the chain would 2760b57cec5SDimitry Andric // be if it were inflated to contain 32-bit pointers. On a 32-bit system, 2770b57cec5SDimitry Andric // each record would be 12 bytes. See HROffsetCalc in gsi.h. 2780b57cec5SDimitry Andric const int SizeOfHROffsetCalc = 12; 2790b57cec5SDimitry Andric ulittle32_t ChainStartOff = 2805ffd83dbSDimitry Andric ulittle32_t(BucketStarts[BucketIdx] * SizeOfHROffsetCalc); 2810b57cec5SDimitry Andric HashBuckets.push_back(ChainStartOff); 2825ffd83dbSDimitry Andric } 2835ffd83dbSDimitry Andric HashBitmap[I] = Word; 2840b57cec5SDimitry Andric } 2850b57cec5SDimitry Andric } 2860b57cec5SDimitry Andric 2870b57cec5SDimitry Andric GSIStreamBuilder::GSIStreamBuilder(msf::MSFBuilder &Msf) 2888bcb0991SDimitry Andric : Msf(Msf), PSH(std::make_unique<GSIHashStreamBuilder>()), 2898bcb0991SDimitry Andric GSH(std::make_unique<GSIHashStreamBuilder>()) {} 2900b57cec5SDimitry Andric 29181ad6265SDimitry Andric GSIStreamBuilder::~GSIStreamBuilder() = default; 2920b57cec5SDimitry Andric 2930b57cec5SDimitry Andric uint32_t GSIStreamBuilder::calculatePublicsHashStreamSize() const { 2940b57cec5SDimitry Andric uint32_t Size = 0; 2950b57cec5SDimitry Andric Size += sizeof(PublicsStreamHeader); 2960b57cec5SDimitry Andric Size += PSH->calculateSerializedLength(); 2975ffd83dbSDimitry Andric Size += Publics.size() * sizeof(uint32_t); // AddrMap 2980b57cec5SDimitry Andric // FIXME: Add thunk map and section offsets for incremental linking. 2990b57cec5SDimitry Andric 3000b57cec5SDimitry Andric return Size; 3010b57cec5SDimitry Andric } 3020b57cec5SDimitry Andric 3030b57cec5SDimitry Andric uint32_t GSIStreamBuilder::calculateGlobalsHashStreamSize() const { 3040b57cec5SDimitry Andric return GSH->calculateSerializedLength(); 3050b57cec5SDimitry Andric } 3060b57cec5SDimitry Andric 3070b57cec5SDimitry Andric Error GSIStreamBuilder::finalizeMsfLayout() { 3080b57cec5SDimitry Andric // First we write public symbol records, then we write global symbol records. 3095ffd83dbSDimitry Andric finalizePublicBuckets(); 3105ffd83dbSDimitry Andric finalizeGlobalBuckets(PSH->RecordByteSize); 3110b57cec5SDimitry Andric 3120b57cec5SDimitry Andric Expected<uint32_t> Idx = Msf.addStream(calculateGlobalsHashStreamSize()); 3130b57cec5SDimitry Andric if (!Idx) 3140b57cec5SDimitry Andric return Idx.takeError(); 3155ffd83dbSDimitry Andric GlobalsStreamIndex = *Idx; 3165ffd83dbSDimitry Andric 3170b57cec5SDimitry Andric Idx = Msf.addStream(calculatePublicsHashStreamSize()); 3180b57cec5SDimitry Andric if (!Idx) 3190b57cec5SDimitry Andric return Idx.takeError(); 3205ffd83dbSDimitry Andric PublicsStreamIndex = *Idx; 3210b57cec5SDimitry Andric 3225ffd83dbSDimitry Andric uint32_t RecordBytes = PSH->RecordByteSize + GSH->RecordByteSize; 3230b57cec5SDimitry Andric 3240b57cec5SDimitry Andric Idx = Msf.addStream(RecordBytes); 3250b57cec5SDimitry Andric if (!Idx) 3260b57cec5SDimitry Andric return Idx.takeError(); 3275ffd83dbSDimitry Andric RecordStreamIndex = *Idx; 3280b57cec5SDimitry Andric return Error::success(); 3290b57cec5SDimitry Andric } 3300b57cec5SDimitry Andric 3315ffd83dbSDimitry Andric void GSIStreamBuilder::addPublicSymbols(std::vector<BulkPublic> &&PublicsIn) { 3325ffd83dbSDimitry Andric assert(Publics.empty() && PSH->RecordByteSize == 0 && 3335ffd83dbSDimitry Andric "publics can only be added once"); 3345ffd83dbSDimitry Andric Publics = std::move(PublicsIn); 3350b57cec5SDimitry Andric 3365ffd83dbSDimitry Andric // Sort the symbols by name. PDBs contain lots of symbols, so use parallelism. 3375ffd83dbSDimitry Andric parallelSort(Publics, [](const BulkPublic &L, const BulkPublic &R) { 3385ffd83dbSDimitry Andric return L.getName() < R.getName(); 3395ffd83dbSDimitry Andric }); 3400b57cec5SDimitry Andric 3415ffd83dbSDimitry Andric // Assign offsets and calculate the length of the public symbol records. 3420b57cec5SDimitry Andric uint32_t SymOffset = 0; 3435ffd83dbSDimitry Andric for (BulkPublic &Pub : Publics) { 3445ffd83dbSDimitry Andric Pub.SymOffset = SymOffset; 3455ffd83dbSDimitry Andric SymOffset += sizeOfPublic(Pub); 3460b57cec5SDimitry Andric } 3470b57cec5SDimitry Andric 3485ffd83dbSDimitry Andric // Remember the length of the public stream records. 3495ffd83dbSDimitry Andric PSH->RecordByteSize = SymOffset; 3500b57cec5SDimitry Andric } 3510b57cec5SDimitry Andric 3520b57cec5SDimitry Andric void GSIStreamBuilder::addGlobalSymbol(const ProcRefSym &Sym) { 3535ffd83dbSDimitry Andric serializeAndAddGlobal(Sym); 3540b57cec5SDimitry Andric } 3550b57cec5SDimitry Andric 3560b57cec5SDimitry Andric void GSIStreamBuilder::addGlobalSymbol(const DataSym &Sym) { 3575ffd83dbSDimitry Andric serializeAndAddGlobal(Sym); 3580b57cec5SDimitry Andric } 3590b57cec5SDimitry Andric 3600b57cec5SDimitry Andric void GSIStreamBuilder::addGlobalSymbol(const ConstantSym &Sym) { 3615ffd83dbSDimitry Andric serializeAndAddGlobal(Sym); 3620b57cec5SDimitry Andric } 3630b57cec5SDimitry Andric 3645ffd83dbSDimitry Andric template <typename T> 3655ffd83dbSDimitry Andric void GSIStreamBuilder::serializeAndAddGlobal(const T &Symbol) { 3665ffd83dbSDimitry Andric T Copy(Symbol); 3675ffd83dbSDimitry Andric addGlobalSymbol(SymbolSerializer::writeOneSymbol(Copy, Msf.getAllocator(), 3685ffd83dbSDimitry Andric CodeViewContainer::Pdb)); 3695ffd83dbSDimitry Andric } 3705ffd83dbSDimitry Andric 3715ffd83dbSDimitry Andric void GSIStreamBuilder::addGlobalSymbol(const codeview::CVSymbol &Symbol) { 3725ffd83dbSDimitry Andric // Ignore duplicate typedefs and constants. 3735ffd83dbSDimitry Andric if (Symbol.kind() == S_UDT || Symbol.kind() == S_CONSTANT) { 3745ffd83dbSDimitry Andric auto Iter = GlobalsSeen.insert(Symbol); 3755ffd83dbSDimitry Andric if (!Iter.second) 3765ffd83dbSDimitry Andric return; 3775ffd83dbSDimitry Andric } 3785ffd83dbSDimitry Andric GSH->RecordByteSize += Symbol.length(); 3795ffd83dbSDimitry Andric Globals.push_back(Symbol); 3805ffd83dbSDimitry Andric } 3815ffd83dbSDimitry Andric 3825ffd83dbSDimitry Andric // Serialize each public and write it. 3835ffd83dbSDimitry Andric static Error writePublics(BinaryStreamWriter &Writer, 3845ffd83dbSDimitry Andric ArrayRef<BulkPublic> Publics) { 3855ffd83dbSDimitry Andric std::vector<uint8_t> Storage; 3865ffd83dbSDimitry Andric for (const BulkPublic &Pub : Publics) { 3875ffd83dbSDimitry Andric Storage.resize(sizeOfPublic(Pub)); 3885ffd83dbSDimitry Andric serializePublic(Storage.data(), Pub); 3895ffd83dbSDimitry Andric if (Error E = Writer.writeBytes(Storage)) 3905ffd83dbSDimitry Andric return E; 3915ffd83dbSDimitry Andric } 3925ffd83dbSDimitry Andric return Error::success(); 3930b57cec5SDimitry Andric } 3940b57cec5SDimitry Andric 3950b57cec5SDimitry Andric static Error writeRecords(BinaryStreamWriter &Writer, 3960b57cec5SDimitry Andric ArrayRef<CVSymbol> Records) { 397*5f757f3fSDimitry Andric BinaryItemStream<CVSymbol> ItemStream(llvm::endianness::little); 3980b57cec5SDimitry Andric ItemStream.setItems(Records); 3990b57cec5SDimitry Andric BinaryStreamRef RecordsRef(ItemStream); 4000b57cec5SDimitry Andric return Writer.writeStreamRef(RecordsRef); 4010b57cec5SDimitry Andric } 4020b57cec5SDimitry Andric 4030b57cec5SDimitry Andric Error GSIStreamBuilder::commitSymbolRecordStream( 4040b57cec5SDimitry Andric WritableBinaryStreamRef Stream) { 4050b57cec5SDimitry Andric BinaryStreamWriter Writer(Stream); 4060b57cec5SDimitry Andric 4070b57cec5SDimitry Andric // Write public symbol records first, followed by global symbol records. This 4080b57cec5SDimitry Andric // must match the order that we assume in finalizeMsfLayout when computing 4090b57cec5SDimitry Andric // PSHZero and GSHZero. 4105ffd83dbSDimitry Andric if (auto EC = writePublics(Writer, Publics)) 4110b57cec5SDimitry Andric return EC; 4125ffd83dbSDimitry Andric if (auto EC = writeRecords(Writer, Globals)) 4130b57cec5SDimitry Andric return EC; 4140b57cec5SDimitry Andric 4150b57cec5SDimitry Andric return Error::success(); 4160b57cec5SDimitry Andric } 4170b57cec5SDimitry Andric 4185ffd83dbSDimitry Andric static std::vector<support::ulittle32_t> 4195ffd83dbSDimitry Andric computeAddrMap(ArrayRef<BulkPublic> Publics) { 4205ffd83dbSDimitry Andric // Build a parallel vector of indices into the Publics vector, and sort it by 4215ffd83dbSDimitry Andric // address. 4225ffd83dbSDimitry Andric std::vector<ulittle32_t> PubAddrMap; 4235ffd83dbSDimitry Andric PubAddrMap.reserve(Publics.size()); 4245ffd83dbSDimitry Andric for (int I = 0, E = Publics.size(); I < E; ++I) 4255ffd83dbSDimitry Andric PubAddrMap.push_back(ulittle32_t(I)); 4265ffd83dbSDimitry Andric 4275ffd83dbSDimitry Andric auto AddrCmp = [Publics](const ulittle32_t &LIdx, const ulittle32_t &RIdx) { 4285ffd83dbSDimitry Andric const BulkPublic &L = Publics[LIdx]; 4295ffd83dbSDimitry Andric const BulkPublic &R = Publics[RIdx]; 4305ffd83dbSDimitry Andric if (L.Segment != R.Segment) 4315ffd83dbSDimitry Andric return L.Segment < R.Segment; 4325ffd83dbSDimitry Andric if (L.Offset != R.Offset) 4335ffd83dbSDimitry Andric return L.Offset < R.Offset; 4345ffd83dbSDimitry Andric // parallelSort is unstable, so we have to do name comparison to ensure 4355ffd83dbSDimitry Andric // that two names for the same location come out in a deterministic order. 4365ffd83dbSDimitry Andric return L.getName() < R.getName(); 4375ffd83dbSDimitry Andric }; 4385ffd83dbSDimitry Andric parallelSort(PubAddrMap, AddrCmp); 4395ffd83dbSDimitry Andric 4405ffd83dbSDimitry Andric // Rewrite the public symbol indices into symbol offsets. 4415ffd83dbSDimitry Andric for (ulittle32_t &Entry : PubAddrMap) 4425ffd83dbSDimitry Andric Entry = Publics[Entry].SymOffset; 4435ffd83dbSDimitry Andric return PubAddrMap; 4445ffd83dbSDimitry Andric } 4455ffd83dbSDimitry Andric 4460b57cec5SDimitry Andric Error GSIStreamBuilder::commitPublicsHashStream( 4470b57cec5SDimitry Andric WritableBinaryStreamRef Stream) { 4480b57cec5SDimitry Andric BinaryStreamWriter Writer(Stream); 4490b57cec5SDimitry Andric PublicsStreamHeader Header; 4500b57cec5SDimitry Andric 4510b57cec5SDimitry Andric // FIXME: Fill these in. They are for incremental linking. 4520b57cec5SDimitry Andric Header.SymHash = PSH->calculateSerializedLength(); 4535ffd83dbSDimitry Andric Header.AddrMap = Publics.size() * 4; 4540b57cec5SDimitry Andric Header.NumThunks = 0; 4550b57cec5SDimitry Andric Header.SizeOfThunk = 0; 4560b57cec5SDimitry Andric Header.ISectThunkTable = 0; 4570b57cec5SDimitry Andric memset(Header.Padding, 0, sizeof(Header.Padding)); 4580b57cec5SDimitry Andric Header.OffThunkTable = 0; 4590b57cec5SDimitry Andric Header.NumSections = 0; 4600b57cec5SDimitry Andric if (auto EC = Writer.writeObject(Header)) 4610b57cec5SDimitry Andric return EC; 4620b57cec5SDimitry Andric 4630b57cec5SDimitry Andric if (auto EC = PSH->commit(Writer)) 4640b57cec5SDimitry Andric return EC; 4650b57cec5SDimitry Andric 4665ffd83dbSDimitry Andric std::vector<support::ulittle32_t> PubAddrMap = computeAddrMap(Publics); 4675ffd83dbSDimitry Andric assert(PubAddrMap.size() == Publics.size()); 468bdd1243dSDimitry Andric if (auto EC = Writer.writeArray(ArrayRef(PubAddrMap))) 4690b57cec5SDimitry Andric return EC; 4700b57cec5SDimitry Andric 4710b57cec5SDimitry Andric return Error::success(); 4720b57cec5SDimitry Andric } 4730b57cec5SDimitry Andric 4740b57cec5SDimitry Andric Error GSIStreamBuilder::commitGlobalsHashStream( 4750b57cec5SDimitry Andric WritableBinaryStreamRef Stream) { 4760b57cec5SDimitry Andric BinaryStreamWriter Writer(Stream); 4770b57cec5SDimitry Andric return GSH->commit(Writer); 4780b57cec5SDimitry Andric } 4790b57cec5SDimitry Andric 4800b57cec5SDimitry Andric Error GSIStreamBuilder::commit(const msf::MSFLayout &Layout, 4810b57cec5SDimitry Andric WritableBinaryStreamRef Buffer) { 482*5f757f3fSDimitry Andric llvm::TimeTraceScope timeScope("Commit GSI stream"); 4830b57cec5SDimitry Andric auto GS = WritableMappedBlockStream::createIndexedStream( 4840b57cec5SDimitry Andric Layout, Buffer, getGlobalsStreamIndex(), Msf.getAllocator()); 4850b57cec5SDimitry Andric auto PS = WritableMappedBlockStream::createIndexedStream( 4860b57cec5SDimitry Andric Layout, Buffer, getPublicsStreamIndex(), Msf.getAllocator()); 4870b57cec5SDimitry Andric auto PRS = WritableMappedBlockStream::createIndexedStream( 4885ffd83dbSDimitry Andric Layout, Buffer, getRecordStreamIndex(), Msf.getAllocator()); 4890b57cec5SDimitry Andric 4900b57cec5SDimitry Andric if (auto EC = commitSymbolRecordStream(*PRS)) 4910b57cec5SDimitry Andric return EC; 4920b57cec5SDimitry Andric if (auto EC = commitGlobalsHashStream(*GS)) 4930b57cec5SDimitry Andric return EC; 4940b57cec5SDimitry Andric if (auto EC = commitPublicsHashStream(*PS)) 4950b57cec5SDimitry Andric return EC; 4960b57cec5SDimitry Andric return Error::success(); 4970b57cec5SDimitry Andric } 498