xref: /freebsd/contrib/llvm-project/lld/COFF/DebugTypes.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- DebugTypes.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "DebugTypes.h"
10 #include "COFFLinkerContext.h"
11 #include "Chunks.h"
12 #include "InputFiles.h"
13 #include "PDB.h"
14 #include "TypeMerger.h"
15 #include "lld/Common/ErrorHandler.h"
16 #include "lld/Common/Memory.h"
17 #include "llvm/ADT/StringExtras.h"
18 #include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
19 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
20 #include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
21 #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h"
22 #include "llvm/DebugInfo/PDB/GenericError.h"
23 #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
24 #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
25 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
26 #include "llvm/DebugInfo/PDB/Native/TpiHashing.h"
27 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
28 #include "llvm/Support/FormatVariadic.h"
29 #include "llvm/Support/Parallel.h"
30 #include "llvm/Support/Path.h"
31 #include "llvm/Support/TimeProfiler.h"
32 
33 using namespace llvm;
34 using namespace llvm::codeview;
35 using namespace lld;
36 using namespace lld::coff;
37 
38 namespace {
39 class TypeServerIpiSource;
40 
41 // The TypeServerSource class represents a PDB type server, a file referenced by
42 // OBJ files compiled with MSVC /Zi. A single PDB can be shared by several OBJ
43 // files, therefore there must be only once instance per OBJ lot. The file path
44 // is discovered from the dependent OBJ's debug type stream. The
45 // TypeServerSource object is then queued and loaded by the COFF Driver. The
46 // debug type stream for such PDB files will be merged first in the final PDB,
47 // before any dependent OBJ.
48 class TypeServerSource : public TpiSource {
49 public:
TypeServerSource(COFFLinkerContext & ctx,PDBInputFile * f)50   explicit TypeServerSource(COFFLinkerContext &ctx, PDBInputFile *f)
51       : TpiSource(ctx, PDB, nullptr), pdbInputFile(f) {
52     if (f->loadErrorStr)
53       return;
54     pdb::PDBFile &file = f->session->getPDBFile();
55     auto expectedInfo = file.getPDBInfoStream();
56     if (!expectedInfo)
57       return;
58     Guid = expectedInfo->getGuid();
59     auto it = ctx.typeServerSourceMappings.emplace(Guid, this);
60     if (!it.second) {
61       // If we hit here we have collision on Guid's in two PDB files.
62       // This can happen if the PDB Guid is invalid or if we are really
63       // unlucky. This should fall back on stright file-system lookup.
64       it.first->second = nullptr;
65     }
66   }
67 
68   Error mergeDebugT(TypeMerger *m) override;
69 
70   void loadGHashes() override;
71   void remapTpiWithGHashes(GHashState *g) override;
72 
isDependency() const73   bool isDependency() const override { return true; }
74 
75   PDBInputFile *pdbInputFile = nullptr;
76 
77   // TpiSource for IPI stream.
78   TypeServerIpiSource *ipiSrc = nullptr;
79 
80   // The PDB signature GUID.
81   codeview::GUID Guid;
82 };
83 
84 // Companion to TypeServerSource. Stores the index map for the IPI stream in the
85 // PDB. Modeling PDBs with two sources for TPI and IPI helps establish the
86 // invariant of one type index space per source.
87 class TypeServerIpiSource : public TpiSource {
88 public:
TypeServerIpiSource(COFFLinkerContext & ctx)89   explicit TypeServerIpiSource(COFFLinkerContext &ctx)
90       : TpiSource(ctx, PDBIpi, nullptr) {}
91 
92   friend class TypeServerSource;
93 
94   // All of the TpiSource methods are no-ops. The parent TypeServerSource
95   // handles both TPI and IPI.
mergeDebugT(TypeMerger * m)96   Error mergeDebugT(TypeMerger *m) override { return Error::success(); }
loadGHashes()97   void loadGHashes() override {}
remapTpiWithGHashes(GHashState * g)98   void remapTpiWithGHashes(GHashState *g) override {}
isDependency() const99   bool isDependency() const override { return true; }
100 };
101 
102 // This class represents the debug type stream of an OBJ file that depends on a
103 // PDB type server (see TypeServerSource).
104 class UseTypeServerSource : public TpiSource {
105   Expected<TypeServerSource *> getTypeServerSource();
106 
107 public:
UseTypeServerSource(COFFLinkerContext & ctx,ObjFile * f,TypeServer2Record ts)108   UseTypeServerSource(COFFLinkerContext &ctx, ObjFile *f, TypeServer2Record ts)
109       : TpiSource(ctx, UsingPDB, f), typeServerDependency(ts) {}
110 
111   Error mergeDebugT(TypeMerger *m) override;
112 
113   // No need to load ghashes from /Zi objects.
loadGHashes()114   void loadGHashes() override {}
115   void remapTpiWithGHashes(GHashState *g) override;
116 
117   // Information about the PDB type server dependency, that needs to be loaded
118   // in before merging this OBJ.
119   TypeServer2Record typeServerDependency;
120 };
121 
122 // This class represents the debug type stream of a Microsoft precompiled
123 // headers OBJ (PCH OBJ). This OBJ kind needs to be merged first in the output
124 // PDB, before any other OBJs that depend on this. Note that only MSVC generate
125 // such files, clang does not.
126 class PrecompSource : public TpiSource {
127 public:
PrecompSource(COFFLinkerContext & ctx,ObjFile * f)128   PrecompSource(COFFLinkerContext &ctx, ObjFile *f) : TpiSource(ctx, PCH, f) {
129     // If the S_OBJNAME record contains the PCH signature, we'll register this
130     // source file right away.
131     registerMapping();
132   }
133 
134   Error mergeDebugT(TypeMerger *m) override;
135 
136   void loadGHashes() override;
137 
isDependency() const138   bool isDependency() const override { return true; }
139 
140 private:
141   void registerMapping();
142 
143   // Whether this precomp OBJ was recorded in the precompSourceMappings map.
144   // Only happens if the file->pchSignature is valid.
145   bool registered = false;
146 };
147 
148 // This class represents the debug type stream of an OBJ file that depends on a
149 // Microsoft precompiled headers OBJ (see PrecompSource).
150 class UsePrecompSource : public TpiSource {
151 public:
UsePrecompSource(COFFLinkerContext & ctx,ObjFile * f,PrecompRecord precomp)152   UsePrecompSource(COFFLinkerContext &ctx, ObjFile *f, PrecompRecord precomp)
153       : TpiSource(ctx, UsingPCH, f), precompDependency(precomp) {}
154 
155   Error mergeDebugT(TypeMerger *m) override;
156 
157   void loadGHashes() override;
158   void remapTpiWithGHashes(GHashState *g) override;
159 
160 private:
161   Error mergeInPrecompHeaderObj();
162 
163   PrecompSource *findObjByName(StringRef fileNameOnly);
164   PrecompSource *findPrecompSource(ObjFile *file, PrecompRecord &pr);
165   Expected<PrecompSource *> findPrecompMap(ObjFile *file, PrecompRecord &pr);
166 
167 public:
168   // Information about the Precomp OBJ dependency, that needs to be loaded in
169   // before merging this OBJ.
170   PrecompRecord precompDependency;
171 };
172 } // namespace
173 
TpiSource(COFFLinkerContext & ctx,TpiKind k,ObjFile * f)174 TpiSource::TpiSource(COFFLinkerContext &ctx, TpiKind k, ObjFile *f)
175     : ctx(ctx), kind(k), tpiSrcIdx(ctx.tpiSourceList.size()), file(f) {
176   ctx.addTpiSource(this);
177 }
178 
179 // Vtable key method.
~TpiSource()180 TpiSource::~TpiSource() {
181   // Silence any assertions about unchecked errors.
182   consumeError(std::move(typeMergingError));
183 }
184 
makeTpiSource(COFFLinkerContext & ctx,ObjFile * file)185 TpiSource *lld::coff::makeTpiSource(COFFLinkerContext &ctx, ObjFile *file) {
186   return make<TpiSource>(ctx, TpiSource::Regular, file);
187 }
188 
makeTypeServerSource(COFFLinkerContext & ctx,PDBInputFile * pdbInputFile)189 TpiSource *lld::coff::makeTypeServerSource(COFFLinkerContext &ctx,
190                                            PDBInputFile *pdbInputFile) {
191   // Type server sources come in pairs: the TPI stream, and the IPI stream.
192   auto *tpiSource = make<TypeServerSource>(ctx, pdbInputFile);
193   if (pdbInputFile->session->getPDBFile().hasPDBIpiStream())
194     tpiSource->ipiSrc = make<TypeServerIpiSource>(ctx);
195   return tpiSource;
196 }
197 
makeUseTypeServerSource(COFFLinkerContext & ctx,ObjFile * file,TypeServer2Record ts)198 TpiSource *lld::coff::makeUseTypeServerSource(COFFLinkerContext &ctx,
199                                               ObjFile *file,
200                                               TypeServer2Record ts) {
201   return make<UseTypeServerSource>(ctx, file, ts);
202 }
203 
makePrecompSource(COFFLinkerContext & ctx,ObjFile * file)204 TpiSource *lld::coff::makePrecompSource(COFFLinkerContext &ctx, ObjFile *file) {
205   return make<PrecompSource>(ctx, file);
206 }
207 
makeUsePrecompSource(COFFLinkerContext & ctx,ObjFile * file,PrecompRecord precomp)208 TpiSource *lld::coff::makeUsePrecompSource(COFFLinkerContext &ctx,
209                                            ObjFile *file,
210                                            PrecompRecord precomp) {
211   return make<UsePrecompSource>(ctx, file, precomp);
212 }
213 
remapTypeIndex(TypeIndex & ti,TiRefKind refKind) const214 bool TpiSource::remapTypeIndex(TypeIndex &ti, TiRefKind refKind) const {
215   if (ti.isSimple())
216     return true;
217 
218   // This can be an item index or a type index. Choose the appropriate map.
219   ArrayRef<TypeIndex> tpiOrIpiMap =
220       (refKind == TiRefKind::IndexRef) ? ipiMap : tpiMap;
221   if (ti.toArrayIndex() >= tpiOrIpiMap.size())
222     return false;
223   ti = tpiOrIpiMap[ti.toArrayIndex()];
224   return true;
225 }
226 
remapRecord(MutableArrayRef<uint8_t> rec,ArrayRef<TiReference> typeRefs)227 void TpiSource::remapRecord(MutableArrayRef<uint8_t> rec,
228                             ArrayRef<TiReference> typeRefs) {
229   MutableArrayRef<uint8_t> contents = rec.drop_front(sizeof(RecordPrefix));
230   for (const TiReference &ref : typeRefs) {
231     unsigned byteSize = ref.Count * sizeof(TypeIndex);
232     if (contents.size() < ref.Offset + byteSize)
233       Fatal(ctx) << "symbol record too short";
234 
235     MutableArrayRef<TypeIndex> indices(
236         reinterpret_cast<TypeIndex *>(contents.data() + ref.Offset), ref.Count);
237     for (TypeIndex &ti : indices) {
238       if (!remapTypeIndex(ti, ref.Kind)) {
239         if (ctx.config.verbose) {
240           uint16_t kind =
241               reinterpret_cast<const RecordPrefix *>(rec.data())->RecordKind;
242           StringRef fname = file ? file->getName() : "<unknown PDB>";
243           Log(ctx) << "failed to remap type index in record of kind 0x"
244                    << utohexstr(kind) << " in " << fname << " with bad "
245                    << (ref.Kind == TiRefKind::IndexRef ? "item" : "type")
246                    << " index 0x" << utohexstr(ti.getIndex());
247         }
248         ti = TypeIndex(SimpleTypeKind::NotTranslated);
249         continue;
250       }
251     }
252   }
253 }
254 
remapTypesInTypeRecord(MutableArrayRef<uint8_t> rec)255 void TpiSource::remapTypesInTypeRecord(MutableArrayRef<uint8_t> rec) {
256   // TODO: Handle errors similar to symbols.
257   SmallVector<TiReference, 32> typeRefs;
258   discoverTypeIndices(CVType(rec), typeRefs);
259   remapRecord(rec, typeRefs);
260 }
261 
remapTypesInSymbolRecord(MutableArrayRef<uint8_t> rec)262 bool TpiSource::remapTypesInSymbolRecord(MutableArrayRef<uint8_t> rec) {
263   // Discover type index references in the record. Skip it if we don't
264   // know where they are.
265   SmallVector<TiReference, 32> typeRefs;
266   if (!discoverTypeIndicesInSymbol(rec, typeRefs))
267     return false;
268   remapRecord(rec, typeRefs);
269   return true;
270 }
271 
272 // A COFF .debug$H section is currently a clang extension.  This function checks
273 // if a .debug$H section is in a format that we expect / understand, so that we
274 // can ignore any sections which are coincidentally also named .debug$H but do
275 // not contain a format we recognize.
canUseDebugH(ArrayRef<uint8_t> debugH)276 static bool canUseDebugH(ArrayRef<uint8_t> debugH) {
277   if (debugH.size() < sizeof(object::debug_h_header))
278     return false;
279   auto *header =
280       reinterpret_cast<const object::debug_h_header *>(debugH.data());
281   debugH = debugH.drop_front(sizeof(object::debug_h_header));
282   return header->Magic == COFF::DEBUG_HASHES_SECTION_MAGIC &&
283          header->Version == 0 &&
284          header->HashAlgorithm == uint16_t(GlobalTypeHashAlg::BLAKE3) &&
285          (debugH.size() % 8 == 0);
286 }
287 
getDebugH(ObjFile * file)288 static std::optional<ArrayRef<uint8_t>> getDebugH(ObjFile *file) {
289   SectionChunk *sec =
290       SectionChunk::findByName(file->getDebugChunks(), ".debug$H");
291   if (!sec)
292     return std::nullopt;
293   ArrayRef<uint8_t> contents = sec->getContents();
294   if (!canUseDebugH(contents))
295     return std::nullopt;
296   return contents;
297 }
298 
299 static ArrayRef<GloballyHashedType>
getHashesFromDebugH(ArrayRef<uint8_t> debugH)300 getHashesFromDebugH(ArrayRef<uint8_t> debugH) {
301   assert(canUseDebugH(debugH));
302   debugH = debugH.drop_front(sizeof(object::debug_h_header));
303   uint32_t count = debugH.size() / sizeof(GloballyHashedType);
304   return {reinterpret_cast<const GloballyHashedType *>(debugH.data()), count};
305 }
306 
307 // Merge .debug$T for a generic object file.
mergeDebugT(TypeMerger * m)308 Error TpiSource::mergeDebugT(TypeMerger *m) {
309   assert(!ctx.config.debugGHashes &&
310          "use remapTpiWithGHashes when ghash is enabled");
311 
312   CVTypeArray types;
313   BinaryStreamReader reader(file->debugTypes, llvm::endianness::little);
314   cantFail(reader.readArray(types, reader.getLength()));
315 
316   // When dealing with PCH.OBJ, some indices were already merged.
317   unsigned nbHeadIndices = indexMapStorage.size();
318 
319   std::optional<PCHMergerInfo> pchInfo;
320   if (auto err = mergeTypeAndIdRecords(m->idTable, m->typeTable,
321                                        indexMapStorage, types, pchInfo))
322     Fatal(ctx) << "codeview::mergeTypeAndIdRecords failed: "
323                << toString(std::move(err));
324   if (pchInfo) {
325     file->pchSignature = pchInfo->PCHSignature;
326     endPrecompIdx = pchInfo->EndPrecompIndex;
327   }
328 
329   // In an object, there is only one mapping for both types and items.
330   tpiMap = indexMapStorage;
331   ipiMap = indexMapStorage;
332 
333   if (ctx.config.showSummary) {
334     nbTypeRecords = indexMapStorage.size() - nbHeadIndices;
335     nbTypeRecordsBytes = reader.getLength();
336     // Count how many times we saw each type record in our input. This
337     // calculation requires a second pass over the type records to classify each
338     // record as a type or index. This is slow, but this code executes when
339     // collecting statistics.
340     m->tpiCounts.resize(m->getTypeTable().size());
341     m->ipiCounts.resize(m->getIDTable().size());
342     uint32_t srcIdx = nbHeadIndices;
343     for (const CVType &ty : types) {
344       TypeIndex dstIdx = tpiMap[srcIdx++];
345       // Type merging may fail, so a complex source type may become the simple
346       // NotTranslated type, which cannot be used as an array index.
347       if (dstIdx.isSimple())
348         continue;
349       SmallVectorImpl<uint32_t> &counts =
350           isIdRecord(ty.kind()) ? m->ipiCounts : m->tpiCounts;
351       ++counts[dstIdx.toArrayIndex()];
352     }
353   }
354 
355   return Error::success();
356 }
357 
358 // Merge types from a type server PDB.
mergeDebugT(TypeMerger * m)359 Error TypeServerSource::mergeDebugT(TypeMerger *m) {
360   assert(!ctx.config.debugGHashes &&
361          "use remapTpiWithGHashes when ghash is enabled");
362 
363   pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
364   Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream();
365   if (auto e = expectedTpi.takeError())
366     Fatal(ctx) << "Type server does not have TPI stream: "
367                << toString(std::move(e));
368   pdb::TpiStream *maybeIpi = nullptr;
369   if (pdbFile.hasPDBIpiStream()) {
370     Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream();
371     if (auto e = expectedIpi.takeError())
372       Fatal(ctx) << "Error getting type server IPI stream: "
373                  << toString(std::move(e));
374     maybeIpi = &*expectedIpi;
375   }
376 
377   // Merge TPI first, because the IPI stream will reference type indices.
378   if (auto err = mergeTypeRecords(m->typeTable, indexMapStorage,
379                                   expectedTpi->typeArray()))
380     Fatal(ctx) << "codeview::mergeTypeRecords failed: "
381                << toString(std::move(err));
382   tpiMap = indexMapStorage;
383 
384   // Merge IPI.
385   if (maybeIpi) {
386     if (auto err = mergeIdRecords(m->idTable, tpiMap, ipiSrc->indexMapStorage,
387                                   maybeIpi->typeArray()))
388       Fatal(ctx) << "codeview::mergeIdRecords failed: "
389                  << toString(std::move(err));
390     ipiMap = ipiSrc->indexMapStorage;
391   }
392 
393   if (ctx.config.showSummary) {
394     nbTypeRecords = tpiMap.size() + ipiMap.size();
395     nbTypeRecordsBytes =
396         expectedTpi->typeArray().getUnderlyingStream().getLength() +
397         (maybeIpi ? maybeIpi->typeArray().getUnderlyingStream().getLength()
398                   : 0);
399 
400     // Count how many times we saw each type record in our input. If a
401     // destination type index is present in the source to destination type index
402     // map, that means we saw it once in the input. Add it to our histogram.
403     m->tpiCounts.resize(m->getTypeTable().size());
404     m->ipiCounts.resize(m->getIDTable().size());
405     for (TypeIndex ti : tpiMap)
406       if (!ti.isSimple())
407         ++m->tpiCounts[ti.toArrayIndex()];
408     for (TypeIndex ti : ipiMap)
409       if (!ti.isSimple())
410         ++m->ipiCounts[ti.toArrayIndex()];
411   }
412 
413   return Error::success();
414 }
415 
getTypeServerSource()416 Expected<TypeServerSource *> UseTypeServerSource::getTypeServerSource() {
417   const codeview::GUID &tsId = typeServerDependency.getGuid();
418   StringRef tsPath = typeServerDependency.getName();
419 
420   TypeServerSource *tsSrc = nullptr;
421   auto it = ctx.typeServerSourceMappings.find(tsId);
422   if (it != ctx.typeServerSourceMappings.end()) {
423     tsSrc = (TypeServerSource *)it->second;
424   }
425   if (tsSrc == nullptr) {
426     // The file failed to load, lookup by name
427     PDBInputFile *pdb = PDBInputFile::findFromRecordPath(ctx, tsPath, file);
428     if (!pdb)
429       return createFileError(tsPath, errorCodeToError(std::error_code(
430                                          ENOENT, std::generic_category())));
431     // If an error occurred during loading, throw it now
432     if (pdb->loadErrorStr)
433       return createFileError(
434           tsPath, make_error<StringError>(*pdb->loadErrorStr,
435                                           llvm::inconvertibleErrorCode()));
436 
437     tsSrc = (TypeServerSource *)pdb->debugTypesObj;
438 
439     // Just because a file with a matching name was found and it was an actual
440     // PDB file doesn't mean it matches.  For it to match the InfoStream's GUID
441     // must match the GUID specified in the TypeServer2 record.
442     if (tsSrc->Guid != tsId) {
443       return createFileError(tsPath,
444                              make_error<pdb::PDBError>(
445                                  pdb::pdb_error_code::signature_out_of_date));
446     }
447   }
448   return tsSrc;
449 }
450 
mergeDebugT(TypeMerger * m)451 Error UseTypeServerSource::mergeDebugT(TypeMerger *m) {
452   Expected<TypeServerSource *> tsSrc = getTypeServerSource();
453   if (!tsSrc)
454     return tsSrc.takeError();
455 
456   pdb::PDBFile &pdbSession = (*tsSrc)->pdbInputFile->session->getPDBFile();
457   auto expectedInfo = pdbSession.getPDBInfoStream();
458   if (!expectedInfo)
459     return expectedInfo.takeError();
460 
461   // Reuse the type index map of the type server.
462   tpiMap = (*tsSrc)->tpiMap;
463   ipiMap = (*tsSrc)->ipiMap;
464   return Error::success();
465 }
466 
equalsPath(StringRef path1,StringRef path2)467 static bool equalsPath(StringRef path1, StringRef path2) {
468 #if defined(_WIN32)
469   return path1.equals_insensitive(path2);
470 #else
471   return path1 == path2;
472 #endif
473 }
474 
475 // Find by name an OBJ provided on the command line
findObjByName(StringRef fileNameOnly)476 PrecompSource *UsePrecompSource::findObjByName(StringRef fileNameOnly) {
477   for (auto kv : ctx.precompSourceMappings) {
478     StringRef currentFileName = sys::path::filename(kv.second->file->getName(),
479                                                     sys::path::Style::windows);
480 
481     // Compare based solely on the file name (link.exe behavior)
482     if (equalsPath(currentFileName, fileNameOnly))
483       return (PrecompSource *)kv.second;
484   }
485   return nullptr;
486 }
487 
findPrecompSource(ObjFile * file,PrecompRecord & pr)488 PrecompSource *UsePrecompSource::findPrecompSource(ObjFile *file,
489                                                    PrecompRecord &pr) {
490   // Cross-compile warning: given that Clang doesn't generate LF_PRECOMP
491   // records, we assume the OBJ comes from a Windows build of cl.exe. Thusly,
492   // the paths embedded in the OBJs are in the Windows format.
493   SmallString<128> prFileName =
494       sys::path::filename(pr.getPrecompFilePath(), sys::path::Style::windows);
495 
496   auto it = ctx.precompSourceMappings.find(pr.getSignature());
497   if (it != ctx.precompSourceMappings.end()) {
498     return (PrecompSource *)it->second;
499   }
500   // Lookup by name
501   return findObjByName(prFileName);
502 }
503 
findPrecompMap(ObjFile * file,PrecompRecord & pr)504 Expected<PrecompSource *> UsePrecompSource::findPrecompMap(ObjFile *file,
505                                                            PrecompRecord &pr) {
506   PrecompSource *precomp = findPrecompSource(file, pr);
507 
508   if (!precomp)
509     return createFileError(
510         pr.getPrecompFilePath(),
511         make_error<pdb::PDBError>(pdb::pdb_error_code::no_matching_pch));
512 
513   // Don't rely on the PCH signature to validate the concordance between the PCH
514   // and the OBJ that uses it. However we do validate here that the
515   // LF_ENDPRECOMP record index lines up with the number of type records
516   // LF_PRECOMP is expecting.
517   if (precomp->endPrecompIdx != pr.getTypesCount())
518     return createFileError(
519         toString(file),
520         make_error<pdb::PDBError>(pdb::pdb_error_code::no_matching_pch));
521 
522   return precomp;
523 }
524 
525 /// Merges a precompiled headers TPI map into the current TPI map. The
526 /// precompiled headers object will also be loaded and remapped in the
527 /// process.
mergeInPrecompHeaderObj()528 Error UsePrecompSource::mergeInPrecompHeaderObj() {
529   auto e = findPrecompMap(file, precompDependency);
530   if (!e)
531     return e.takeError();
532 
533   PrecompSource *precompSrc = *e;
534   if (precompSrc->tpiMap.empty())
535     return Error::success();
536 
537   assert(precompDependency.getStartTypeIndex() ==
538          TypeIndex::FirstNonSimpleIndex);
539   assert(precompDependency.getTypesCount() <= precompSrc->tpiMap.size());
540   // Use the previously remapped index map from the precompiled headers.
541   indexMapStorage.insert(indexMapStorage.begin(), precompSrc->tpiMap.begin(),
542                          precompSrc->tpiMap.begin() +
543                              precompDependency.getTypesCount());
544 
545   return Error::success();
546 }
547 
mergeDebugT(TypeMerger * m)548 Error UsePrecompSource::mergeDebugT(TypeMerger *m) {
549   // This object was compiled with /Yu, so process the corresponding
550   // precompiled headers object (/Yc) first. Some type indices in the current
551   // object are referencing data in the precompiled headers object, so we need
552   // both to be loaded.
553   if (Error e = mergeInPrecompHeaderObj())
554     return e;
555 
556   return TpiSource::mergeDebugT(m);
557 }
558 
mergeDebugT(TypeMerger * m)559 Error PrecompSource::mergeDebugT(TypeMerger *m) {
560   // In some cases, the S_OBJNAME record doesn't contain the PCH signature.
561   // The signature comes later with the LF_ENDPRECOMP record, so we first need
562   // to merge in all the .PCH.OBJ file type records, before registering below.
563   if (Error e = TpiSource::mergeDebugT(m))
564     return e;
565 
566   registerMapping();
567 
568   return Error::success();
569 }
570 
registerMapping()571 void PrecompSource::registerMapping() {
572   if (registered)
573     return;
574   if (file->pchSignature && *file->pchSignature) {
575     auto it = ctx.precompSourceMappings.emplace(*file->pchSignature, this);
576     if (!it.second)
577       Fatal(ctx)
578           << "a PCH object with the same signature has already been provided ("
579           << toString(it.first->second->file) << " and " << toString(file)
580           << ")";
581     registered = true;
582   }
583 }
584 
585 //===----------------------------------------------------------------------===//
586 // Parellel GHash type merging implementation.
587 //===----------------------------------------------------------------------===//
588 
loadGHashes()589 void TpiSource::loadGHashes() {
590   if (std::optional<ArrayRef<uint8_t>> debugH = getDebugH(file)) {
591     ghashes = getHashesFromDebugH(*debugH);
592     ownedGHashes = false;
593   } else {
594     CVTypeArray types;
595     BinaryStreamReader reader(file->debugTypes, llvm::endianness::little);
596     cantFail(reader.readArray(types, reader.getLength()));
597     assignGHashesFromVector(GloballyHashedType::hashTypes(types));
598   }
599 
600   fillIsItemIndexFromDebugT();
601 }
602 
603 // Copies ghashes from a vector into an array. These are long lived, so it's
604 // worth the time to copy these into an appropriately sized vector to reduce
605 // memory usage.
assignGHashesFromVector(std::vector<GloballyHashedType> && hashVec)606 void TpiSource::assignGHashesFromVector(
607     std::vector<GloballyHashedType> &&hashVec) {
608   if (hashVec.empty())
609     return;
610   GloballyHashedType *hashes = new GloballyHashedType[hashVec.size()];
611   memcpy(hashes, hashVec.data(), hashVec.size() * sizeof(GloballyHashedType));
612   ghashes = ArrayRef(hashes, hashVec.size());
613   ownedGHashes = true;
614 }
615 
616 // Faster way to iterate type records. forEachTypeChecked is faster than
617 // iterating CVTypeArray. It avoids virtual readBytes calls in inner loops.
forEachTypeChecked(ArrayRef<uint8_t> types,function_ref<void (const CVType &)> fn)618 static void forEachTypeChecked(ArrayRef<uint8_t> types,
619                                function_ref<void(const CVType &)> fn) {
620   checkError(
621       forEachCodeViewRecord<CVType>(types, [fn](const CVType &ty) -> Error {
622         fn(ty);
623         return Error::success();
624       }));
625 }
626 
627 // Walk over file->debugTypes and fill in the isItemIndex bit vector.
628 // TODO: Store this information in .debug$H so that we don't have to recompute
629 // it. This is the main bottleneck slowing down parallel ghashing with one
630 // thread over single-threaded ghashing.
fillIsItemIndexFromDebugT()631 void TpiSource::fillIsItemIndexFromDebugT() {
632   uint32_t index = 0;
633   isItemIndex.resize(ghashes.size());
634   forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
635     if (isIdRecord(ty.kind()))
636       isItemIndex.set(index);
637     ++index;
638   });
639 }
640 
mergeTypeRecord(TypeIndex curIndex,CVType ty)641 void TpiSource::mergeTypeRecord(TypeIndex curIndex, CVType ty) {
642   // Decide if the merged type goes into TPI or IPI.
643   bool isItem = isIdRecord(ty.kind());
644   MergedInfo &merged = isItem ? mergedIpi : mergedTpi;
645 
646   // Copy the type into our mutable buffer.
647   assert(ty.length() <= codeview::MaxRecordLength);
648   size_t offset = merged.recs.size();
649   size_t newSize = alignTo(ty.length(), 4);
650   merged.recs.resize(offset + newSize);
651   auto newRec = MutableArrayRef(&merged.recs[offset], newSize);
652   memcpy(newRec.data(), ty.data().data(), newSize);
653 
654   // Fix up the record prefix and padding bytes if it required resizing.
655   if (newSize != ty.length()) {
656     reinterpret_cast<RecordPrefix *>(newRec.data())->RecordLen = newSize - 2;
657     for (size_t i = ty.length(); i < newSize; ++i)
658       newRec[i] = LF_PAD0 + (newSize - i);
659   }
660 
661   // Remap the type indices in the new record.
662   remapTypesInTypeRecord(newRec);
663   uint32_t pdbHash = check(pdb::hashTypeRecord(CVType(newRec)));
664   merged.recSizes.push_back(static_cast<uint16_t>(newSize));
665   merged.recHashes.push_back(pdbHash);
666 
667   // Retain a mapping from PDB function id to PDB function type. This mapping is
668   // used during symbol processing to rewrite S_GPROC32_ID symbols to S_GPROC32
669   // symbols.
670   if (ty.kind() == LF_FUNC_ID || ty.kind() == LF_MFUNC_ID) {
671     bool success = ty.length() >= 12;
672     TypeIndex funcId = curIndex;
673     if (success)
674       success &= remapTypeIndex(funcId, TiRefKind::IndexRef);
675     TypeIndex funcType =
676         *reinterpret_cast<const TypeIndex *>(&newRec.data()[8]);
677     if (success) {
678       funcIdToType.push_back({funcId, funcType});
679     } else {
680       StringRef fname = file ? file->getName() : "<unknown PDB>";
681       Warn(ctx) << "corrupt LF_[M]FUNC_ID record 0x"
682                 << utohexstr(curIndex.getIndex()) << " in " << fname;
683     }
684   }
685 }
686 
mergeUniqueTypeRecords(ArrayRef<uint8_t> typeRecords,TypeIndex beginIndex)687 void TpiSource::mergeUniqueTypeRecords(ArrayRef<uint8_t> typeRecords,
688                                        TypeIndex beginIndex) {
689   // Re-sort the list of unique types by index.
690   if (kind == PDB)
691     assert(llvm::is_sorted(uniqueTypes));
692   else
693     llvm::sort(uniqueTypes);
694 
695   // Accumulate all the unique types into one buffer in mergedTypes.
696   uint32_t ghashIndex = 0;
697   auto nextUniqueIndex = uniqueTypes.begin();
698   assert(mergedTpi.recs.empty());
699   assert(mergedIpi.recs.empty());
700 
701   // Pre-compute the number of elements in advance to avoid std::vector resizes.
702   unsigned nbTpiRecs = 0;
703   unsigned nbIpiRecs = 0;
704   forEachTypeChecked(typeRecords, [&](const CVType &ty) {
705     if (nextUniqueIndex != uniqueTypes.end() &&
706         *nextUniqueIndex == ghashIndex) {
707       assert(ty.length() <= codeview::MaxRecordLength);
708       size_t newSize = alignTo(ty.length(), 4);
709       (isIdRecord(ty.kind()) ? nbIpiRecs : nbTpiRecs) += newSize;
710       ++nextUniqueIndex;
711     }
712     ++ghashIndex;
713   });
714   mergedTpi.recs.reserve(nbTpiRecs);
715   mergedIpi.recs.reserve(nbIpiRecs);
716 
717   // Do the actual type merge.
718   ghashIndex = 0;
719   nextUniqueIndex = uniqueTypes.begin();
720   forEachTypeChecked(typeRecords, [&](const CVType &ty) {
721     if (nextUniqueIndex != uniqueTypes.end() &&
722         *nextUniqueIndex == ghashIndex) {
723       mergeTypeRecord(beginIndex + ghashIndex, ty);
724       ++nextUniqueIndex;
725     }
726     ++ghashIndex;
727   });
728   assert(nextUniqueIndex == uniqueTypes.end() &&
729          "failed to merge all desired records");
730   assert(uniqueTypes.size() ==
731              mergedTpi.recSizes.size() + mergedIpi.recSizes.size() &&
732          "missing desired record");
733 }
734 
remapTpiWithGHashes(GHashState * g)735 void TpiSource::remapTpiWithGHashes(GHashState *g) {
736   assert(ctx.config.debugGHashes && "ghashes must be enabled");
737   fillMapFromGHashes(g);
738   tpiMap = indexMapStorage;
739   ipiMap = indexMapStorage;
740   mergeUniqueTypeRecords(file->debugTypes);
741   // TODO: Free all unneeded ghash resources now that we have a full index map.
742 
743   if (ctx.config.showSummary) {
744     nbTypeRecords = ghashes.size();
745     nbTypeRecordsBytes = file->debugTypes.size();
746   }
747 }
748 
749 // PDBs do not actually store global hashes, so when merging a type server
750 // PDB we have to synthesize global hashes.  To do this, we first synthesize
751 // global hashes for the TPI stream, since it is independent, then we
752 // synthesize hashes for the IPI stream, using the hashes for the TPI stream
753 // as inputs.
loadGHashes()754 void TypeServerSource::loadGHashes() {
755   // Don't hash twice.
756   if (!ghashes.empty())
757     return;
758   pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
759 
760   // Hash TPI stream.
761   Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream();
762   if (auto e = expectedTpi.takeError())
763     Fatal(ctx) << "Type server does not have TPI stream: "
764                << toString(std::move(e));
765   assignGHashesFromVector(
766       GloballyHashedType::hashTypes(expectedTpi->typeArray()));
767   isItemIndex.resize(ghashes.size());
768 
769   // Hash IPI stream, which depends on TPI ghashes.
770   if (!pdbFile.hasPDBIpiStream())
771     return;
772   Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream();
773   if (auto e = expectedIpi.takeError())
774     Fatal(ctx) << "error retrieving IPI stream: " << toString(std::move(e));
775   ipiSrc->assignGHashesFromVector(
776       GloballyHashedType::hashIds(expectedIpi->typeArray(), ghashes));
777 
778   // The IPI stream isItemIndex bitvector should be all ones.
779   ipiSrc->isItemIndex.resize(ipiSrc->ghashes.size());
780   ipiSrc->isItemIndex.set(0, ipiSrc->ghashes.size());
781 }
782 
783 // Flatten discontiguous PDB type arrays to bytes so that we can use
784 // forEachTypeChecked instead of CVTypeArray iteration. Copying all types from
785 // type servers is faster than iterating all object files compiled with /Z7 with
786 // CVTypeArray, which has high overheads due to the virtual interface of
787 // BinaryStream::readBytes.
typeArrayToBytes(const CVTypeArray & types)788 static ArrayRef<uint8_t> typeArrayToBytes(const CVTypeArray &types) {
789   BinaryStreamRef stream = types.getUnderlyingStream();
790   ArrayRef<uint8_t> debugTypes;
791   checkError(stream.readBytes(0, stream.getLength(), debugTypes));
792   return debugTypes;
793 }
794 
795 // Merge types from a type server PDB.
remapTpiWithGHashes(GHashState * g)796 void TypeServerSource::remapTpiWithGHashes(GHashState *g) {
797   assert(ctx.config.debugGHashes && "ghashes must be enabled");
798 
799   // IPI merging depends on TPI, so do TPI first, then do IPI.  No need to
800   // propagate errors, those should've been handled during ghash loading.
801   pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
802   pdb::TpiStream &tpi = check(pdbFile.getPDBTpiStream());
803   fillMapFromGHashes(g);
804   tpiMap = indexMapStorage;
805   mergeUniqueTypeRecords(typeArrayToBytes(tpi.typeArray()));
806   if (pdbFile.hasPDBIpiStream()) {
807     pdb::TpiStream &ipi = check(pdbFile.getPDBIpiStream());
808     ipiSrc->indexMapStorage.resize(ipiSrc->ghashes.size());
809     ipiSrc->fillMapFromGHashes(g);
810     ipiMap = ipiSrc->indexMapStorage;
811     ipiSrc->tpiMap = tpiMap;
812     ipiSrc->ipiMap = ipiMap;
813     ipiSrc->mergeUniqueTypeRecords(typeArrayToBytes(ipi.typeArray()));
814 
815     if (ctx.config.showSummary) {
816       nbTypeRecords = ipiSrc->ghashes.size();
817       nbTypeRecordsBytes = ipi.typeArray().getUnderlyingStream().getLength();
818     }
819   }
820 
821   if (ctx.config.showSummary) {
822     nbTypeRecords += ghashes.size();
823     nbTypeRecordsBytes += tpi.typeArray().getUnderlyingStream().getLength();
824   }
825 }
826 
remapTpiWithGHashes(GHashState * g)827 void UseTypeServerSource::remapTpiWithGHashes(GHashState *g) {
828   // No remapping to do with /Zi objects. Simply use the index map from the type
829   // server. Errors should have been reported earlier. Symbols from this object
830   // will be ignored.
831   Expected<TypeServerSource *> maybeTsSrc = getTypeServerSource();
832   if (!maybeTsSrc) {
833     typeMergingError =
834         joinErrors(std::move(typeMergingError), maybeTsSrc.takeError());
835     return;
836   }
837   TypeServerSource *tsSrc = *maybeTsSrc;
838   tpiMap = tsSrc->tpiMap;
839   ipiMap = tsSrc->ipiMap;
840 }
841 
loadGHashes()842 void PrecompSource::loadGHashes() {
843   if (getDebugH(file)) {
844     Warn(ctx) << "ignoring .debug$H section; pch with ghash is not implemented";
845   }
846 
847   uint32_t ghashIdx = 0;
848   std::vector<GloballyHashedType> hashVec;
849   forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
850     // Remember the index of the LF_ENDPRECOMP record so it can be excluded from
851     // the PDB. There must be an entry in the list of ghashes so that the type
852     // indexes of the following records in the /Yc PCH object line up.
853     if (ty.kind() == LF_ENDPRECOMP) {
854       EndPrecompRecord endPrecomp;
855       cantFail(TypeDeserializer::deserializeAs<EndPrecompRecord>(
856           const_cast<CVType &>(ty), endPrecomp));
857       file->pchSignature = endPrecomp.getSignature();
858       registerMapping();
859       endPrecompIdx = ghashIdx;
860     }
861 
862     hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec));
863     isItemIndex.push_back(isIdRecord(ty.kind()));
864     ++ghashIdx;
865   });
866   assignGHashesFromVector(std::move(hashVec));
867 }
868 
loadGHashes()869 void UsePrecompSource::loadGHashes() {
870   auto e = findPrecompMap(file, precompDependency);
871   if (!e) {
872     Warn(ctx) << e.takeError();
873     return;
874   }
875 
876   PrecompSource *pchSrc = *e;
877 
878   // To compute ghashes of a /Yu object file, we need to build on the ghashes of
879   // the /Yc PCH object. After we are done hashing, discard the ghashes from the
880   // PCH source so we don't unnecessarily try to deduplicate them.
881   std::vector<GloballyHashedType> hashVec =
882       pchSrc->ghashes.take_front(precompDependency.getTypesCount());
883   forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
884     hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec));
885     isItemIndex.push_back(isIdRecord(ty.kind()));
886   });
887   hashVec.erase(hashVec.begin(),
888                 hashVec.begin() + precompDependency.getTypesCount());
889   assignGHashesFromVector(std::move(hashVec));
890 }
891 
remapTpiWithGHashes(GHashState * g)892 void UsePrecompSource::remapTpiWithGHashes(GHashState *g) {
893   fillMapFromGHashes(g);
894   // This object was compiled with /Yu, so process the corresponding
895   // precompiled headers object (/Yc) first. Some type indices in the current
896   // object are referencing data in the precompiled headers object, so we need
897   // both to be loaded.
898   if (Error e = mergeInPrecompHeaderObj()) {
899     typeMergingError = joinErrors(std::move(typeMergingError), std::move(e));
900     return;
901   }
902 
903   tpiMap = indexMapStorage;
904   ipiMap = indexMapStorage;
905   mergeUniqueTypeRecords(file->debugTypes,
906                          TypeIndex(precompDependency.getStartTypeIndex() +
907                                    precompDependency.getTypesCount()));
908   if (ctx.config.showSummary) {
909     nbTypeRecords = ghashes.size();
910     nbTypeRecordsBytes = file->debugTypes.size();
911   }
912 }
913 
914 namespace {
915 /// A concurrent hash table for global type hashing. It is based on this paper:
916 /// Concurrent Hash Tables: Fast and General(?)!
917 /// https://dl.acm.org/doi/10.1145/3309206
918 ///
919 /// This hash table is meant to be used in two phases:
920 /// 1. concurrent insertions
921 /// 2. concurrent reads
922 /// It does not support lookup, deletion, or rehashing. It uses linear probing.
923 ///
924 /// The paper describes storing a key-value pair in two machine words.
925 /// Generally, the values stored in this map are type indices, and we can use
926 /// those values to recover the ghash key from a side table. This allows us to
927 /// shrink the table entries further at the cost of some loads, and sidesteps
928 /// the need for a 128 bit atomic compare-and-swap operation.
929 ///
930 /// During insertion, a priority function is used to decide which insertion
931 /// should be preferred. This ensures that the output is deterministic. For
932 /// ghashing, lower tpiSrcIdx values (earlier inputs) are preferred.
933 ///
934 class GHashCell;
935 struct GHashTable {
936   GHashCell *table = nullptr;
937   uint32_t tableSize = 0;
938 
939   GHashTable() = default;
940   ~GHashTable();
941 
942   /// Initialize the table with the given size. Because the table cannot be
943   /// resized, the initial size of the table must be large enough to contain all
944   /// inputs, or insertion may not be able to find an empty cell.
945   void init(uint32_t newTableSize);
946 
947   /// Insert the cell with the given ghash into the table. Return the insertion
948   /// position in the table. It is safe for the caller to store the insertion
949   /// position because the table cannot be resized.
950   uint32_t insert(COFFLinkerContext &ctx, GloballyHashedType ghash,
951                   GHashCell newCell);
952 };
953 
954 /// A ghash table cell for deduplicating types from TpiSources.
955 class GHashCell {
956   // Force "data" to be 64-bit aligned; otherwise, some versions of clang
957   // will generate calls to libatomic when using some versions of libstdc++
958   // on 32-bit targets.  (Also, in theory, there could be a target where
959   // new[] doesn't always return an 8-byte-aligned allocation.)
960   alignas(sizeof(uint64_t)) uint64_t data = 0;
961 
962 public:
963   GHashCell() = default;
964 
965   // Construct data most to least significant so that sorting works well:
966   // - isItem
967   // - tpiSrcIdx
968   // - ghashIdx
969   // Add one to the tpiSrcIdx so that the 0th record from the 0th source has a
970   // non-zero representation.
GHashCell(bool isItem,uint32_t tpiSrcIdx,uint32_t ghashIdx)971   GHashCell(bool isItem, uint32_t tpiSrcIdx, uint32_t ghashIdx)
972       : data((uint64_t(isItem) << 63U) | (uint64_t(tpiSrcIdx + 1) << 32ULL) |
973              ghashIdx) {
974     assert(tpiSrcIdx == getTpiSrcIdx() && "round trip failure");
975     assert(ghashIdx == getGHashIdx() && "round trip failure");
976   }
977 
GHashCell(uint64_t data)978   explicit GHashCell(uint64_t data) : data(data) {}
979 
980   // The empty cell is all zeros.
isEmpty() const981   bool isEmpty() const { return data == 0ULL; }
982 
983   /// Extract the tpiSrcIdx.
getTpiSrcIdx() const984   uint32_t getTpiSrcIdx() const {
985     return ((uint32_t)(data >> 32U) & 0x7FFFFFFF) - 1;
986   }
987 
988   /// Extract the index into the ghash array of the TpiSource.
getGHashIdx() const989   uint32_t getGHashIdx() const { return (uint32_t)data; }
990 
isItem() const991   bool isItem() const { return data & (1ULL << 63U); }
992 
993   /// Get the ghash key for this cell.
getGHash(const COFFLinkerContext & ctx) const994   GloballyHashedType getGHash(const COFFLinkerContext &ctx) const {
995     return ctx.tpiSourceList[getTpiSrcIdx()]->ghashes[getGHashIdx()];
996   }
997 
998   /// The priority function for the cell. The data is stored such that lower
999   /// tpiSrcIdx and ghashIdx values are preferred, which means that type record
1000   /// from earlier sources are more likely to prevail.
operator <(const GHashCell & l,const GHashCell & r)1001   friend inline bool operator<(const GHashCell &l, const GHashCell &r) {
1002     return l.data < r.data;
1003   }
1004 };
1005 } // namespace
1006 
1007 namespace lld::coff {
1008 /// This type is just a wrapper around GHashTable with external linkage so it
1009 /// can be used from a header.
1010 struct GHashState {
1011   GHashTable table;
1012 };
1013 } // namespace lld::coff
1014 
~GHashTable()1015 GHashTable::~GHashTable() { delete[] table; }
1016 
init(uint32_t newTableSize)1017 void GHashTable::init(uint32_t newTableSize) {
1018   table = new GHashCell[newTableSize];
1019   memset(table, 0, newTableSize * sizeof(GHashCell));
1020   tableSize = newTableSize;
1021 }
1022 
insert(COFFLinkerContext & ctx,GloballyHashedType ghash,GHashCell newCell)1023 uint32_t GHashTable::insert(COFFLinkerContext &ctx, GloballyHashedType ghash,
1024                             GHashCell newCell) {
1025   assert(!newCell.isEmpty() && "cannot insert empty cell value");
1026 
1027   // FIXME: The low bytes of SHA1 have low entropy for short records, which
1028   // type records are. Swap the byte order for better entropy. A better ghash
1029   // won't need this.
1030   uint32_t startIdx =
1031       llvm::byteswap<uint64_t>(*reinterpret_cast<uint64_t *>(&ghash)) %
1032       tableSize;
1033 
1034   // Do a linear probe starting at startIdx.
1035   uint32_t idx = startIdx;
1036   while (true) {
1037     // Run a compare and swap loop. There are four cases:
1038     // - cell is empty: CAS into place and return
1039     // - cell has matching key, earlier priority: do nothing, return
1040     // - cell has matching key, later priority: CAS into place and return
1041     // - cell has non-matching key: hash collision, probe next cell
1042     auto *cellPtr = reinterpret_cast<std::atomic<GHashCell> *>(&table[idx]);
1043     GHashCell oldCell(cellPtr->load());
1044     while (oldCell.isEmpty() || oldCell.getGHash(ctx) == ghash) {
1045       // Check if there is an existing ghash entry with a higher priority
1046       // (earlier ordering). If so, this is a duplicate, we are done.
1047       if (!oldCell.isEmpty() && oldCell < newCell)
1048         return idx;
1049       // Either the cell is empty, or our value is higher priority. Try to
1050       // compare and swap. If it succeeds, we are done.
1051       if (cellPtr->compare_exchange_weak(oldCell, newCell))
1052         return idx;
1053       // If the CAS failed, check this cell again.
1054     }
1055 
1056     // Advance the probe. Wrap around to the beginning if we run off the end.
1057     ++idx;
1058     idx = idx == tableSize ? 0 : idx;
1059     if (idx == startIdx) {
1060       // If this becomes an issue, we could mark failure and rehash from the
1061       // beginning with a bigger table. There is no difference between rehashing
1062       // internally and starting over.
1063       report_fatal_error("ghash table is full");
1064     }
1065   }
1066   llvm_unreachable("left infloop");
1067 }
1068 
TypeMerger(COFFLinkerContext & c,llvm::BumpPtrAllocator & alloc)1069 TypeMerger::TypeMerger(COFFLinkerContext &c, llvm::BumpPtrAllocator &alloc)
1070     : typeTable(alloc), idTable(alloc), ctx(c) {}
1071 
1072 TypeMerger::~TypeMerger() = default;
1073 
mergeTypesWithGHash()1074 void TypeMerger::mergeTypesWithGHash() {
1075   // Load ghashes. Do type servers and PCH objects first.
1076   {
1077     llvm::TimeTraceScope timeScope("Load GHASHes");
1078     ScopedTimer t1(ctx.loadGHashTimer);
1079     parallelForEach(dependencySources,
1080                     [&](TpiSource *source) { source->loadGHashes(); });
1081     parallelForEach(objectSources,
1082                     [&](TpiSource *source) { source->loadGHashes(); });
1083   }
1084 
1085   llvm::TimeTraceScope timeScope("Merge types (GHASH)");
1086   ScopedTimer t2(ctx.mergeGHashTimer);
1087   GHashState ghashState;
1088 
1089   // Estimate the size of hash table needed to deduplicate ghashes. This *must*
1090   // be larger than the number of unique types, or hash table insertion may not
1091   // be able to find a vacant slot. Summing the input types guarantees this, but
1092   // it is a gross overestimate. The table size could be reduced to save memory,
1093   // but it would require implementing rehashing, and this table is generally
1094   // small compared to total memory usage, at eight bytes per input type record,
1095   // and most input type records are larger than eight bytes.
1096   size_t tableSize = 0;
1097   for (TpiSource *source : ctx.tpiSourceList)
1098     tableSize += source->ghashes.size();
1099 
1100   // Cap the table size so that we can use 32-bit cell indices. Type indices are
1101   // also 32-bit, so this is an inherent PDB file format limit anyway.
1102   tableSize =
1103       std::min(size_t(INT32_MAX) - TypeIndex::FirstNonSimpleIndex, tableSize);
1104   ghashState.table.init(static_cast<uint32_t>(tableSize));
1105 
1106   // Insert ghashes in parallel. During concurrent insertion, we cannot observe
1107   // the contents of the hash table cell, but we can remember the insertion
1108   // position. Because the table does not rehash, the position will not change
1109   // under insertion. After insertion is done, the value of the cell can be read
1110   // to retrieve the final PDB type index.
1111   parallelFor(0, ctx.tpiSourceList.size(), [&](size_t tpiSrcIdx) {
1112     TpiSource *source = ctx.tpiSourceList[tpiSrcIdx];
1113     source->indexMapStorage.resize(source->ghashes.size());
1114     for (uint32_t i = 0, e = source->ghashes.size(); i < e; i++) {
1115       if (source->shouldOmitFromPdb(i)) {
1116         source->indexMapStorage[i] = TypeIndex(SimpleTypeKind::NotTranslated);
1117         continue;
1118       }
1119       GloballyHashedType ghash = source->ghashes[i];
1120       bool isItem = source->isItemIndex.test(i);
1121       uint32_t cellIdx =
1122           ghashState.table.insert(ctx, ghash, GHashCell(isItem, tpiSrcIdx, i));
1123 
1124       // Store the ghash cell index as a type index in indexMapStorage. Later
1125       // we will replace it with the PDB type index.
1126       source->indexMapStorage[i] = TypeIndex::fromArrayIndex(cellIdx);
1127     }
1128   });
1129 
1130   // Collect all non-empty cells and sort them. This will implicitly assign
1131   // destination type indices, and partition the entries into type records and
1132   // item records. It arranges types in this order:
1133   // - type records
1134   //   - source 0, type 0...
1135   //   - source 1, type 1...
1136   // - item records
1137   //   - source 0, type 1...
1138   //   - source 1, type 0...
1139   std::vector<GHashCell> entries;
1140   for (const GHashCell &cell : ArrayRef(ghashState.table.table, tableSize)) {
1141     if (!cell.isEmpty())
1142       entries.push_back(cell);
1143   }
1144   parallelSort(entries, std::less<GHashCell>());
1145   Log(ctx) << formatv(
1146       "ghash table load factor: {0:p} (size {1} / capacity {2})\n",
1147       tableSize ? double(entries.size()) / tableSize : 0, entries.size(),
1148       tableSize);
1149 
1150   // Find out how many type and item indices there are.
1151   auto mid = llvm::lower_bound(entries, GHashCell(true, 0, 0));
1152   assert((mid == entries.end() || mid->isItem()) &&
1153          (mid == entries.begin() || !std::prev(mid)->isItem()) &&
1154          "midpoint is not midpoint");
1155   uint32_t numTypes = std::distance(entries.begin(), mid);
1156   uint32_t numItems = std::distance(mid, entries.end());
1157   Log(ctx) << "Tpi record count: " << numTypes;
1158   Log(ctx) << "Ipi record count: " << numItems;
1159 
1160   // Make a list of the "unique" type records to merge for each tpi source. Type
1161   // merging will skip indices not on this list. Store the destination PDB type
1162   // index for these unique types in the tpiMap for each source. The entries for
1163   // non-unique types will be filled in prior to type merging.
1164   for (uint32_t i = 0, e = entries.size(); i < e; ++i) {
1165     auto &cell = entries[i];
1166     uint32_t tpiSrcIdx = cell.getTpiSrcIdx();
1167     TpiSource *source = ctx.tpiSourceList[tpiSrcIdx];
1168     source->uniqueTypes.push_back(cell.getGHashIdx());
1169 
1170     // Update the ghash table to store the destination PDB type index in the
1171     // table.
1172     uint32_t pdbTypeIndex = i < numTypes ? i : i - numTypes;
1173     uint32_t ghashCellIndex =
1174         source->indexMapStorage[cell.getGHashIdx()].toArrayIndex();
1175     ghashState.table.table[ghashCellIndex] =
1176         GHashCell(cell.isItem(), cell.getTpiSrcIdx(), pdbTypeIndex);
1177   }
1178 
1179   // In parallel, remap all types.
1180   for (TpiSource *source : dependencySources)
1181     source->remapTpiWithGHashes(&ghashState);
1182   parallelForEach(objectSources, [&](TpiSource *source) {
1183     source->remapTpiWithGHashes(&ghashState);
1184   });
1185 
1186   // Build a global map of from function ID to function type.
1187   for (TpiSource *source : ctx.tpiSourceList) {
1188     funcIdToType.insert_range(source->funcIdToType);
1189     source->funcIdToType.clear();
1190   }
1191 
1192   clearGHashes();
1193 }
1194 
sortDependencies()1195 void TypeMerger::sortDependencies() {
1196   // Order dependencies first, but preserve the existing order.
1197   std::vector<TpiSource *> deps;
1198   std::vector<TpiSource *> objs;
1199   for (TpiSource *s : ctx.tpiSourceList)
1200     (s->isDependency() ? deps : objs).push_back(s);
1201   uint32_t numDeps = deps.size();
1202   uint32_t numObjs = objs.size();
1203   ctx.tpiSourceList = std::move(deps);
1204   ctx.tpiSourceList.insert(ctx.tpiSourceList.end(), objs.begin(), objs.end());
1205   for (uint32_t i = 0, e = ctx.tpiSourceList.size(); i < e; ++i)
1206     ctx.tpiSourceList[i]->tpiSrcIdx = i;
1207   dependencySources = ArrayRef(ctx.tpiSourceList.data(), numDeps);
1208   objectSources = ArrayRef(ctx.tpiSourceList.data() + numDeps, numObjs);
1209 }
1210 
1211 /// Given the index into the ghash table for a particular type, return the type
1212 /// index for that type in the output PDB.
loadPdbTypeIndexFromCell(GHashState * g,uint32_t ghashCellIdx)1213 static TypeIndex loadPdbTypeIndexFromCell(GHashState *g,
1214                                           uint32_t ghashCellIdx) {
1215   GHashCell cell = g->table.table[ghashCellIdx];
1216   return TypeIndex::fromArrayIndex(cell.getGHashIdx());
1217 }
1218 
1219 /// Free heap allocated ghashes.
clearGHashes()1220 void TypeMerger::clearGHashes() {
1221   for (TpiSource *src : ctx.tpiSourceList) {
1222     if (src->ownedGHashes)
1223       delete[] src->ghashes.data();
1224     src->ghashes = {};
1225     src->isItemIndex.clear();
1226     src->uniqueTypes.clear();
1227   }
1228 }
1229 
1230 // Fill in a TPI or IPI index map using ghashes. For each source type, use its
1231 // ghash to lookup its final type index in the PDB, and store that in the map.
fillMapFromGHashes(GHashState * g)1232 void TpiSource::fillMapFromGHashes(GHashState *g) {
1233   for (size_t i = 0, e = ghashes.size(); i < e; ++i) {
1234     TypeIndex fakeCellIndex = indexMapStorage[i];
1235     if (fakeCellIndex.isSimple())
1236       indexMapStorage[i] = fakeCellIndex;
1237     else
1238       indexMapStorage[i] =
1239           loadPdbTypeIndexFromCell(g, fakeCellIndex.toArrayIndex());
1240   }
1241 }
1242