xref: /freebsd/contrib/llvm-project/lld/COFF/InputFiles.cpp (revision e64bea71c21eb42e97aa615188ba91f6cce0d36d)
1 //===- InputFiles.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "InputFiles.h"
10 #include "COFFLinkerContext.h"
11 #include "Chunks.h"
12 #include "Config.h"
13 #include "DebugTypes.h"
14 #include "Driver.h"
15 #include "SymbolTable.h"
16 #include "Symbols.h"
17 #include "lld/Common/DWARF.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/BinaryFormat/COFF.h"
21 #include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
22 #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
23 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
24 #include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
25 #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
26 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
27 #include "llvm/IR/Mangler.h"
28 #include "llvm/LTO/LTO.h"
29 #include "llvm/Object/Binary.h"
30 #include "llvm/Object/COFF.h"
31 #include "llvm/Object/COFFImportFile.h"
32 #include "llvm/Support/Casting.h"
33 #include "llvm/Support/Endian.h"
34 #include "llvm/Support/Error.h"
35 #include "llvm/Support/FileSystem.h"
36 #include "llvm/Support/Path.h"
37 #include "llvm/TargetParser/Triple.h"
38 #include <cstring>
39 #include <optional>
40 #include <utility>
41 
42 using namespace llvm;
43 using namespace llvm::COFF;
44 using namespace llvm::codeview;
45 using namespace llvm::object;
46 using namespace llvm::support::endian;
47 using namespace lld;
48 using namespace lld::coff;
49 
50 using llvm::Triple;
51 using llvm::support::ulittle32_t;
52 
53 // Returns the last element of a path, which is supposed to be a filename.
getBasename(StringRef path)54 static StringRef getBasename(StringRef path) {
55   return sys::path::filename(path, sys::path::Style::windows);
56 }
57 
58 // Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)".
toString(const coff::InputFile * file)59 std::string lld::toString(const coff::InputFile *file) {
60   if (!file)
61     return "<internal>";
62   if (file->parentName.empty())
63     return std::string(file->getName());
64 
65   return (getBasename(file->parentName) + "(" + getBasename(file->getName()) +
66           ")")
67       .str();
68 }
69 
operator <<(const COFFSyncStream & s,const InputFile * f)70 const COFFSyncStream &coff::operator<<(const COFFSyncStream &s,
71                                        const InputFile *f) {
72   return s << toString(f);
73 }
74 
75 /// Checks that Source is compatible with being a weak alias to Target.
76 /// If Source is Undefined and has no weak alias set, makes it a weak
77 /// alias to Target.
checkAndSetWeakAlias(SymbolTable & symtab,InputFile * f,Symbol * source,Symbol * target,bool isAntiDep)78 static void checkAndSetWeakAlias(SymbolTable &symtab, InputFile *f,
79                                  Symbol *source, Symbol *target,
80                                  bool isAntiDep) {
81   if (auto *u = dyn_cast<Undefined>(source)) {
82     if (u->weakAlias && u->weakAlias != target) {
83       // Ignore duplicated anti-dependency symbols.
84       if (isAntiDep)
85         return;
86       if (!u->isAntiDep) {
87         // Weak aliases as produced by GCC are named in the form
88         // .weak.<weaksymbol>.<othersymbol>, where <othersymbol> is the name
89         // of another symbol emitted near the weak symbol.
90         // Just use the definition from the first object file that defined
91         // this weak symbol.
92         if (symtab.ctx.config.allowDuplicateWeak)
93           return;
94         symtab.reportDuplicate(source, f);
95       }
96     }
97     u->setWeakAlias(target, isAntiDep);
98   }
99 }
100 
ignoredSymbolName(StringRef name)101 static bool ignoredSymbolName(StringRef name) {
102   return name == "@feat.00" || name == "@comp.id";
103 }
104 
cloneSymbol(COFFSymbolRef sym)105 static coff_symbol_generic *cloneSymbol(COFFSymbolRef sym) {
106   if (sym.isBigObj()) {
107     auto *copy = make<coff_symbol32>(
108         *reinterpret_cast<const coff_symbol32 *>(sym.getRawPtr()));
109     return reinterpret_cast<coff_symbol_generic *>(copy);
110   } else {
111     auto *copy = make<coff_symbol16>(
112         *reinterpret_cast<const coff_symbol16 *>(sym.getRawPtr()));
113     return reinterpret_cast<coff_symbol_generic *>(copy);
114   }
115 }
116 
117 // Skip importing DllMain thunks from import libraries.
fixupDllMain(COFFLinkerContext & ctx,llvm::object::Archive * file,const Archive::Symbol & sym,bool & skipDllMain)118 static bool fixupDllMain(COFFLinkerContext &ctx, llvm::object::Archive *file,
119                          const Archive::Symbol &sym, bool &skipDllMain) {
120   const Archive::Child &c =
121       CHECK(sym.getMember(), file->getFileName() +
122                                  ": could not get the member for symbol " +
123                                  toCOFFString(ctx, sym));
124   MemoryBufferRef mb =
125       CHECK(c.getMemoryBufferRef(),
126             file->getFileName() +
127                 ": could not get the buffer for a child buffer of the archive");
128   if (identify_magic(mb.getBuffer()) == file_magic::coff_import_library) {
129     if (ctx.config.warnImportedDllMain) {
130       // We won't place DllMain symbols in the symbol table if they are
131       // coming from a import library. This message can be ignored with the flag
132       // '/ignore:importeddllmain'
133       Warn(ctx)
134           << file->getFileName()
135           << ": skipping imported DllMain symbol [importeddllmain]\nNOTE: this "
136              "might be a mistake when the DLL/library was produced.";
137     }
138     skipDllMain = true;
139     return true;
140   }
141   return false;
142 }
143 
ArchiveFile(COFFLinkerContext & ctx,MemoryBufferRef m)144 ArchiveFile::ArchiveFile(COFFLinkerContext &ctx, MemoryBufferRef m)
145     : InputFile(ctx.symtab, ArchiveKind, m) {}
146 
parse()147 void ArchiveFile::parse() {
148   COFFLinkerContext &ctx = symtab.ctx;
149   SymbolTable *archiveSymtab = &symtab;
150 
151   // Parse a MemoryBufferRef as an archive file.
152   file = CHECK(Archive::create(mb), this);
153 
154   // Try to read symbols from ECSYMBOLS section on ARM64EC.
155   if (ctx.symtab.isEC()) {
156     iterator_range<Archive::symbol_iterator> symbols =
157         CHECK(file->ec_symbols(), this);
158     if (!symbols.empty()) {
159       for (const Archive::Symbol &sym : symbols)
160         ctx.symtab.addLazyArchive(this, sym);
161 
162       // Read both EC and native symbols on ARM64X.
163       archiveSymtab = &*ctx.hybridSymtab;
164     } else {
165       // If the ECSYMBOLS section is missing in the archive, the archive could
166       // be either a native-only ARM64 or x86_64 archive. Check the machine type
167       // of the object containing a symbol to determine which symbol table to
168       // use.
169       Archive::symbol_iterator sym = file->symbol_begin();
170       if (sym != file->symbol_end()) {
171         MachineTypes machine = IMAGE_FILE_MACHINE_UNKNOWN;
172         Archive::Child child =
173             CHECK(sym->getMember(),
174                   file->getFileName() +
175                       ": could not get the buffer for a child of the archive");
176         MemoryBufferRef mb = CHECK(
177             child.getMemoryBufferRef(),
178             file->getFileName() +
179                 ": could not get the buffer for a child buffer of the archive");
180         switch (identify_magic(mb.getBuffer())) {
181         case file_magic::coff_object: {
182           std::unique_ptr<COFFObjectFile> obj =
183               CHECK(COFFObjectFile::create(mb),
184                     check(child.getName()) + ":" + ": not a valid COFF file");
185           machine = MachineTypes(obj->getMachine());
186           break;
187         }
188         case file_magic::coff_import_library:
189           machine = MachineTypes(COFFImportFile(mb).getMachine());
190           break;
191         case file_magic::bitcode: {
192           std::unique_ptr<lto::InputFile> obj =
193               check(lto::InputFile::create(mb));
194           machine = BitcodeFile::getMachineType(obj.get());
195           break;
196         }
197         default:
198           break;
199         }
200         archiveSymtab = &ctx.getSymtab(machine);
201       }
202     }
203   }
204 
205   bool skipDllMain = false;
206   StringRef mangledDllMain, impMangledDllMain;
207 
208   // The calls below will fail if we haven't set the machine type yet. Instead
209   // of failing, it is preferable to skip this "imported DllMain" check if we
210   // don't know the machine type at this point.
211   if (!file->isEmpty() && ctx.config.machine != IMAGE_FILE_MACHINE_UNKNOWN) {
212     mangledDllMain = archiveSymtab->mangle("DllMain");
213     impMangledDllMain = uniqueSaver().save("__imp_" + mangledDllMain);
214   }
215 
216   // Read the symbol table to construct Lazy objects.
217   for (const Archive::Symbol &sym : file->symbols()) {
218     // If an import library provides the DllMain symbol, skip importing it, as
219     // we should be using our own DllMain, not another DLL's DllMain.
220     if (!mangledDllMain.empty() && (sym.getName() == mangledDllMain ||
221                                     sym.getName() == impMangledDllMain)) {
222       if (skipDllMain || fixupDllMain(ctx, file.get(), sym, skipDllMain))
223         continue;
224     }
225     archiveSymtab->addLazyArchive(this, sym);
226   }
227 }
228 
229 // Returns a buffer pointing to a member file containing a given symbol.
addMember(const Archive::Symbol & sym)230 void ArchiveFile::addMember(const Archive::Symbol &sym) {
231   const Archive::Child &c =
232       CHECK(sym.getMember(), "could not get the member for symbol " +
233                                  toCOFFString(symtab.ctx, sym));
234 
235   // Return an empty buffer if we have already returned the same buffer.
236   // FIXME: Remove this once we resolve all defineds before all undefineds in
237   //        ObjFile::initializeSymbols().
238   if (!seen.insert(c.getChildOffset()).second)
239     return;
240 
241   symtab.ctx.driver.enqueueArchiveMember(c, sym, getName());
242 }
243 
244 std::vector<MemoryBufferRef>
getArchiveMembers(COFFLinkerContext & ctx,Archive * file)245 lld::coff::getArchiveMembers(COFFLinkerContext &ctx, Archive *file) {
246   std::vector<MemoryBufferRef> v;
247   Error err = Error::success();
248 
249   // Thin archives refer to .o files, so --reproduces needs the .o files too.
250   bool addToTar = file->isThin() && ctx.driver.tar;
251 
252   for (const Archive::Child &c : file->children(err)) {
253     MemoryBufferRef mbref =
254         CHECK(c.getMemoryBufferRef(),
255               file->getFileName() +
256                   ": could not get the buffer for a child of the archive");
257     if (addToTar) {
258       ctx.driver.tar->append(relativeToRoot(check(c.getFullName())),
259                              mbref.getBuffer());
260     }
261     v.push_back(mbref);
262   }
263   if (err)
264     Fatal(ctx) << file->getFileName()
265                << ": Archive::children failed: " << toString(std::move(err));
266   return v;
267 }
268 
ObjFile(SymbolTable & symtab,COFFObjectFile * coffObj,bool lazy)269 ObjFile::ObjFile(SymbolTable &symtab, COFFObjectFile *coffObj, bool lazy)
270     : InputFile(symtab, ObjectKind, coffObj->getMemoryBufferRef(), lazy),
271       coffObj(coffObj) {}
272 
create(COFFLinkerContext & ctx,MemoryBufferRef m,bool lazy)273 ObjFile *ObjFile::create(COFFLinkerContext &ctx, MemoryBufferRef m, bool lazy) {
274   // Parse a memory buffer as a COFF file.
275   Expected<std::unique_ptr<Binary>> bin = createBinary(m);
276   if (!bin)
277     Fatal(ctx) << "Could not parse " << m.getBufferIdentifier();
278 
279   auto *obj = dyn_cast<COFFObjectFile>(bin->get());
280   if (!obj)
281     Fatal(ctx) << m.getBufferIdentifier() << " is not a COFF file";
282 
283   bin->release();
284   return make<ObjFile>(ctx.getSymtab(MachineTypes(obj->getMachine())), obj,
285                        lazy);
286 }
287 
parseLazy()288 void ObjFile::parseLazy() {
289   // Native object file.
290   uint32_t numSymbols = coffObj->getNumberOfSymbols();
291   for (uint32_t i = 0; i < numSymbols; ++i) {
292     COFFSymbolRef coffSym = check(coffObj->getSymbol(i));
293     if (coffSym.isUndefined() || !coffSym.isExternal() ||
294         coffSym.isWeakExternal())
295       continue;
296     StringRef name = check(coffObj->getSymbolName(coffSym));
297     if (coffSym.isAbsolute() && ignoredSymbolName(name))
298       continue;
299     symtab.addLazyObject(this, name);
300     if (!lazy)
301       return;
302     i += coffSym.getNumberOfAuxSymbols();
303   }
304 }
305 
306 struct ECMapEntry {
307   ulittle32_t src;
308   ulittle32_t dst;
309   ulittle32_t type;
310 };
311 
initializeECThunks()312 void ObjFile::initializeECThunks() {
313   for (SectionChunk *chunk : hybmpChunks) {
314     if (chunk->getContents().size() % sizeof(ECMapEntry)) {
315       Err(symtab.ctx) << "Invalid .hybmp chunk size "
316                       << chunk->getContents().size();
317       continue;
318     }
319 
320     const uint8_t *end =
321         chunk->getContents().data() + chunk->getContents().size();
322     for (const uint8_t *iter = chunk->getContents().data(); iter != end;
323          iter += sizeof(ECMapEntry)) {
324       auto entry = reinterpret_cast<const ECMapEntry *>(iter);
325       switch (entry->type) {
326       case Arm64ECThunkType::Entry:
327         symtab.addEntryThunk(getSymbol(entry->src), getSymbol(entry->dst));
328         break;
329       case Arm64ECThunkType::Exit:
330         symtab.addExitThunk(getSymbol(entry->src), getSymbol(entry->dst));
331         break;
332       case Arm64ECThunkType::GuestExit:
333         break;
334       default:
335         Warn(symtab.ctx) << "Ignoring unknown EC thunk type " << entry->type;
336       }
337     }
338   }
339 }
340 
parse()341 void ObjFile::parse() {
342   // Read section and symbol tables.
343   initializeChunks();
344   initializeSymbols();
345   initializeFlags();
346   initializeDependencies();
347   initializeECThunks();
348 }
349 
getSection(uint32_t i)350 const coff_section *ObjFile::getSection(uint32_t i) {
351   auto sec = coffObj->getSection(i);
352   if (!sec)
353     Fatal(symtab.ctx) << "getSection failed: #" << i << ": " << sec.takeError();
354   return *sec;
355 }
356 
357 // We set SectionChunk pointers in the SparseChunks vector to this value
358 // temporarily to mark comdat sections as having an unknown resolution. As we
359 // walk the object file's symbol table, once we visit either a leader symbol or
360 // an associative section definition together with the parent comdat's leader,
361 // we set the pointer to either nullptr (to mark the section as discarded) or a
362 // valid SectionChunk for that section.
363 static SectionChunk *const pendingComdat = reinterpret_cast<SectionChunk *>(1);
364 
initializeChunks()365 void ObjFile::initializeChunks() {
366   uint32_t numSections = coffObj->getNumberOfSections();
367   sparseChunks.resize(numSections + 1);
368   for (uint32_t i = 1; i < numSections + 1; ++i) {
369     const coff_section *sec = getSection(i);
370     if (sec->Characteristics & IMAGE_SCN_LNK_COMDAT)
371       sparseChunks[i] = pendingComdat;
372     else
373       sparseChunks[i] = readSection(i, nullptr, "");
374   }
375 }
376 
readSection(uint32_t sectionNumber,const coff_aux_section_definition * def,StringRef leaderName)377 SectionChunk *ObjFile::readSection(uint32_t sectionNumber,
378                                    const coff_aux_section_definition *def,
379                                    StringRef leaderName) {
380   const coff_section *sec = getSection(sectionNumber);
381 
382   StringRef name;
383   if (Expected<StringRef> e = coffObj->getSectionName(sec))
384     name = *e;
385   else
386     Fatal(symtab.ctx) << "getSectionName failed: #" << sectionNumber << ": "
387                       << e.takeError();
388 
389   if (name == ".drectve") {
390     ArrayRef<uint8_t> data;
391     cantFail(coffObj->getSectionContents(sec, data));
392     directives = StringRef((const char *)data.data(), data.size());
393     return nullptr;
394   }
395 
396   if (name == ".llvm_addrsig") {
397     addrsigSec = sec;
398     return nullptr;
399   }
400 
401   if (name == ".llvm.call-graph-profile") {
402     callgraphSec = sec;
403     return nullptr;
404   }
405 
406   // Object files may have DWARF debug info or MS CodeView debug info
407   // (or both).
408   //
409   // DWARF sections don't need any special handling from the perspective
410   // of the linker; they are just a data section containing relocations.
411   // We can just link them to complete debug info.
412   //
413   // CodeView needs linker support. We need to interpret debug info,
414   // and then write it to a separate .pdb file.
415 
416   // Ignore DWARF debug info unless requested to be included.
417   if (!symtab.ctx.config.includeDwarfChunks && name.starts_with(".debug_"))
418     return nullptr;
419 
420   if (sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE)
421     return nullptr;
422   SectionChunk *c;
423   if (isArm64EC(getMachineType()))
424     c = make<SectionChunkEC>(this, sec);
425   else
426     c = make<SectionChunk>(this, sec);
427   if (def)
428     c->checksum = def->CheckSum;
429 
430   // CodeView sections are stored to a different vector because they are not
431   // linked in the regular manner.
432   if (c->isCodeView())
433     debugChunks.push_back(c);
434   else if (name == ".gfids$y")
435     guardFidChunks.push_back(c);
436   else if (name == ".giats$y")
437     guardIATChunks.push_back(c);
438   else if (name == ".gljmp$y")
439     guardLJmpChunks.push_back(c);
440   else if (name == ".gehcont$y")
441     guardEHContChunks.push_back(c);
442   else if (name == ".sxdata")
443     sxDataChunks.push_back(c);
444   else if (isArm64EC(getMachineType()) && name == ".hybmp$x")
445     hybmpChunks.push_back(c);
446   else if (symtab.ctx.config.tailMerge && sec->NumberOfRelocations == 0 &&
447            name == ".rdata" && leaderName.starts_with("??_C@"))
448     // COFF sections that look like string literal sections (i.e. no
449     // relocations, in .rdata, leader symbol name matches the MSVC name mangling
450     // for string literals) are subject to string tail merging.
451     MergeChunk::addSection(symtab.ctx, c);
452   else if (name == ".rsrc" || name.starts_with(".rsrc$"))
453     resourceChunks.push_back(c);
454   else if (!(sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_INFO))
455     chunks.push_back(c);
456 
457   return c;
458 }
459 
includeResourceChunks()460 void ObjFile::includeResourceChunks() {
461   chunks.insert(chunks.end(), resourceChunks.begin(), resourceChunks.end());
462 }
463 
readAssociativeDefinition(COFFSymbolRef sym,const coff_aux_section_definition * def)464 void ObjFile::readAssociativeDefinition(
465     COFFSymbolRef sym, const coff_aux_section_definition *def) {
466   readAssociativeDefinition(sym, def, def->getNumber(sym.isBigObj()));
467 }
468 
readAssociativeDefinition(COFFSymbolRef sym,const coff_aux_section_definition * def,uint32_t parentIndex)469 void ObjFile::readAssociativeDefinition(COFFSymbolRef sym,
470                                         const coff_aux_section_definition *def,
471                                         uint32_t parentIndex) {
472   SectionChunk *parent = sparseChunks[parentIndex];
473   int32_t sectionNumber = sym.getSectionNumber();
474 
475   auto diag = [&]() {
476     StringRef name = check(coffObj->getSymbolName(sym));
477 
478     StringRef parentName;
479     const coff_section *parentSec = getSection(parentIndex);
480     if (Expected<StringRef> e = coffObj->getSectionName(parentSec))
481       parentName = *e;
482     Err(symtab.ctx) << toString(this) << ": associative comdat " << name
483                     << " (sec " << sectionNumber
484                     << ") has invalid reference to section " << parentName
485                     << " (sec " << parentIndex << ")";
486   };
487 
488   if (parent == pendingComdat) {
489     // This can happen if an associative comdat refers to another associative
490     // comdat that appears after it (invalid per COFF spec) or to a section
491     // without any symbols.
492     diag();
493     return;
494   }
495 
496   // Check whether the parent is prevailing. If it is, so are we, and we read
497   // the section; otherwise mark it as discarded.
498   if (parent) {
499     SectionChunk *c = readSection(sectionNumber, def, "");
500     sparseChunks[sectionNumber] = c;
501     if (c) {
502       c->selection = IMAGE_COMDAT_SELECT_ASSOCIATIVE;
503       parent->addAssociative(c);
504     }
505   } else {
506     sparseChunks[sectionNumber] = nullptr;
507   }
508 }
509 
recordPrevailingSymbolForMingw(COFFSymbolRef sym,DenseMap<StringRef,uint32_t> & prevailingSectionMap)510 void ObjFile::recordPrevailingSymbolForMingw(
511     COFFSymbolRef sym, DenseMap<StringRef, uint32_t> &prevailingSectionMap) {
512   // For comdat symbols in executable sections, where this is the copy
513   // of the section chunk we actually include instead of discarding it,
514   // add the symbol to a map to allow using it for implicitly
515   // associating .[px]data$<func> sections to it.
516   // Use the suffix from the .text$<func> instead of the leader symbol
517   // name, for cases where the names differ (i386 mangling/decorations,
518   // cases where the leader is a weak symbol named .weak.func.default*).
519   int32_t sectionNumber = sym.getSectionNumber();
520   SectionChunk *sc = sparseChunks[sectionNumber];
521   if (sc && sc->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE) {
522     StringRef name = sc->getSectionName().split('$').second;
523     prevailingSectionMap[name] = sectionNumber;
524   }
525 }
526 
maybeAssociateSEHForMingw(COFFSymbolRef sym,const coff_aux_section_definition * def,const DenseMap<StringRef,uint32_t> & prevailingSectionMap)527 void ObjFile::maybeAssociateSEHForMingw(
528     COFFSymbolRef sym, const coff_aux_section_definition *def,
529     const DenseMap<StringRef, uint32_t> &prevailingSectionMap) {
530   StringRef name = check(coffObj->getSymbolName(sym));
531   if (name.consume_front(".pdata$") || name.consume_front(".xdata$") ||
532       name.consume_front(".eh_frame$")) {
533     // For MinGW, treat .[px]data$<func> and .eh_frame$<func> as implicitly
534     // associative to the symbol <func>.
535     auto parentSym = prevailingSectionMap.find(name);
536     if (parentSym != prevailingSectionMap.end())
537       readAssociativeDefinition(sym, def, parentSym->second);
538   }
539 }
540 
createRegular(COFFSymbolRef sym)541 Symbol *ObjFile::createRegular(COFFSymbolRef sym) {
542   SectionChunk *sc = sparseChunks[sym.getSectionNumber()];
543   if (sym.isExternal()) {
544     StringRef name = check(coffObj->getSymbolName(sym));
545     if (sc)
546       return symtab.addRegular(this, name, sym.getGeneric(), sc,
547                                sym.getValue());
548     // For MinGW symbols named .weak.* that point to a discarded section,
549     // don't create an Undefined symbol. If nothing ever refers to the symbol,
550     // everything should be fine. If something actually refers to the symbol
551     // (e.g. the undefined weak alias), linking will fail due to undefined
552     // references at the end.
553     if (symtab.ctx.config.mingw && name.starts_with(".weak."))
554       return nullptr;
555     return symtab.addUndefined(name, this, false);
556   }
557   if (sc) {
558     const coff_symbol_generic *symGen = sym.getGeneric();
559     if (sym.isSection()) {
560       auto *customSymGen = cloneSymbol(sym);
561       customSymGen->Value = 0;
562       symGen = customSymGen;
563     }
564     return make<DefinedRegular>(this, /*Name*/ "", /*IsCOMDAT*/ false,
565                                 /*IsExternal*/ false, symGen, sc);
566   }
567   return nullptr;
568 }
569 
initializeSymbols()570 void ObjFile::initializeSymbols() {
571   uint32_t numSymbols = coffObj->getNumberOfSymbols();
572   symbols.resize(numSymbols);
573 
574   SmallVector<std::pair<Symbol *, const coff_aux_weak_external *>, 8>
575       weakAliases;
576   std::vector<uint32_t> pendingIndexes;
577   pendingIndexes.reserve(numSymbols);
578 
579   DenseMap<StringRef, uint32_t> prevailingSectionMap;
580   std::vector<const coff_aux_section_definition *> comdatDefs(
581       coffObj->getNumberOfSections() + 1);
582   COFFLinkerContext &ctx = symtab.ctx;
583 
584   for (uint32_t i = 0; i < numSymbols; ++i) {
585     COFFSymbolRef coffSym = check(coffObj->getSymbol(i));
586     bool prevailingComdat;
587     if (coffSym.isUndefined()) {
588       symbols[i] = createUndefined(coffSym, false);
589     } else if (coffSym.isWeakExternal()) {
590       auto aux = coffSym.getAux<coff_aux_weak_external>();
591       bool overrideLazy = true;
592 
593       // On ARM64EC, external function calls emit a pair of weak-dependency
594       // aliases: func to #func and #func to the func guess exit thunk
595       // (instead of a single undefined func symbol, which would be emitted on
596       // other targets). Allow such aliases to be overridden by lazy archive
597       // symbols, just as we would for undefined symbols.
598       if (isArm64EC(getMachineType()) &&
599           aux->Characteristics == IMAGE_WEAK_EXTERN_ANTI_DEPENDENCY) {
600         COFFSymbolRef targetSym = check(coffObj->getSymbol(aux->TagIndex));
601         if (!targetSym.isAnyUndefined()) {
602           // If the target is defined, it may be either a guess exit thunk or
603           // the actual implementation. If it's the latter, consider the alias
604           // to be part of the implementation and override potential lazy
605           // archive symbols.
606           StringRef targetName = check(coffObj->getSymbolName(targetSym));
607           StringRef name = check(coffObj->getSymbolName(coffSym));
608           std::optional<std::string> mangledName =
609               getArm64ECMangledFunctionName(name);
610           overrideLazy = mangledName == targetName;
611         } else {
612           overrideLazy = false;
613         }
614       }
615       symbols[i] = createUndefined(coffSym, overrideLazy);
616       weakAliases.emplace_back(symbols[i], aux);
617     } else if (std::optional<Symbol *> optSym =
618                    createDefined(coffSym, comdatDefs, prevailingComdat)) {
619       symbols[i] = *optSym;
620       if (ctx.config.mingw && prevailingComdat)
621         recordPrevailingSymbolForMingw(coffSym, prevailingSectionMap);
622     } else {
623       // createDefined() returns std::nullopt if a symbol belongs to a section
624       // that was pending at the point when the symbol was read. This can happen
625       // in two cases:
626       // 1) section definition symbol for a comdat leader;
627       // 2) symbol belongs to a comdat section associated with another section.
628       // In both of these cases, we can expect the section to be resolved by
629       // the time we finish visiting the remaining symbols in the symbol
630       // table. So we postpone the handling of this symbol until that time.
631       pendingIndexes.push_back(i);
632     }
633     i += coffSym.getNumberOfAuxSymbols();
634   }
635 
636   for (uint32_t i : pendingIndexes) {
637     COFFSymbolRef sym = check(coffObj->getSymbol(i));
638     if (const coff_aux_section_definition *def = sym.getSectionDefinition()) {
639       if (def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE)
640         readAssociativeDefinition(sym, def);
641       else if (ctx.config.mingw)
642         maybeAssociateSEHForMingw(sym, def, prevailingSectionMap);
643     }
644     if (sparseChunks[sym.getSectionNumber()] == pendingComdat) {
645       StringRef name = check(coffObj->getSymbolName(sym));
646       Log(ctx) << "comdat section " << name
647                << " without leader and unassociated, discarding";
648       continue;
649     }
650     symbols[i] = createRegular(sym);
651   }
652 
653   for (auto &kv : weakAliases) {
654     Symbol *sym = kv.first;
655     const coff_aux_weak_external *aux = kv.second;
656     checkAndSetWeakAlias(symtab, this, sym, symbols[aux->TagIndex],
657                          aux->Characteristics ==
658                              IMAGE_WEAK_EXTERN_ANTI_DEPENDENCY);
659   }
660 
661   // Free the memory used by sparseChunks now that symbol loading is finished.
662   decltype(sparseChunks)().swap(sparseChunks);
663 }
664 
createUndefined(COFFSymbolRef sym,bool overrideLazy)665 Symbol *ObjFile::createUndefined(COFFSymbolRef sym, bool overrideLazy) {
666   StringRef name = check(coffObj->getSymbolName(sym));
667   Symbol *s = symtab.addUndefined(name, this, overrideLazy);
668 
669   // Add an anti-dependency alias for undefined AMD64 symbols on the ARM64EC
670   // target.
671   if (symtab.isEC() && getMachineType() == AMD64) {
672     auto u = dyn_cast<Undefined>(s);
673     if (u && !u->weakAlias) {
674       if (std::optional<std::string> mangledName =
675               getArm64ECMangledFunctionName(name)) {
676         Symbol *m = symtab.addUndefined(saver().save(*mangledName), this,
677                                         /*overrideLazy=*/false);
678         u->setWeakAlias(m, /*antiDep=*/true);
679       }
680     }
681   }
682   return s;
683 }
684 
findSectionDef(COFFObjectFile * obj,int32_t section)685 static const coff_aux_section_definition *findSectionDef(COFFObjectFile *obj,
686                                                          int32_t section) {
687   uint32_t numSymbols = obj->getNumberOfSymbols();
688   for (uint32_t i = 0; i < numSymbols; ++i) {
689     COFFSymbolRef sym = check(obj->getSymbol(i));
690     if (sym.getSectionNumber() != section)
691       continue;
692     if (const coff_aux_section_definition *def = sym.getSectionDefinition())
693       return def;
694   }
695   return nullptr;
696 }
697 
handleComdatSelection(COFFSymbolRef sym,COMDATType & selection,bool & prevailing,DefinedRegular * leader,const llvm::object::coff_aux_section_definition * def)698 void ObjFile::handleComdatSelection(
699     COFFSymbolRef sym, COMDATType &selection, bool &prevailing,
700     DefinedRegular *leader,
701     const llvm::object::coff_aux_section_definition *def) {
702   if (prevailing)
703     return;
704   // There's already an existing comdat for this symbol: `Leader`.
705   // Use the comdats's selection field to determine if the new
706   // symbol in `Sym` should be discarded, produce a duplicate symbol
707   // error, etc.
708 
709   SectionChunk *leaderChunk = leader->getChunk();
710   COMDATType leaderSelection = leaderChunk->selection;
711   COFFLinkerContext &ctx = symtab.ctx;
712 
713   assert(leader->data && "Comdat leader without SectionChunk?");
714   if (isa<BitcodeFile>(leader->file)) {
715     // If the leader is only a LTO symbol, we don't know e.g. its final size
716     // yet, so we can't do the full strict comdat selection checking yet.
717     selection = leaderSelection = IMAGE_COMDAT_SELECT_ANY;
718   }
719 
720   if ((selection == IMAGE_COMDAT_SELECT_ANY &&
721        leaderSelection == IMAGE_COMDAT_SELECT_LARGEST) ||
722       (selection == IMAGE_COMDAT_SELECT_LARGEST &&
723        leaderSelection == IMAGE_COMDAT_SELECT_ANY)) {
724     // cl.exe picks "any" for vftables when building with /GR- and
725     // "largest" when building with /GR. To be able to link object files
726     // compiled with each flag, "any" and "largest" are merged as "largest".
727     leaderSelection = selection = IMAGE_COMDAT_SELECT_LARGEST;
728   }
729 
730   // GCCs __declspec(selectany) doesn't actually pick "any" but "same size as".
731   // Clang on the other hand picks "any". To be able to link two object files
732   // with a __declspec(selectany) declaration, one compiled with gcc and the
733   // other with clang, we merge them as proper "same size as"
734   if (ctx.config.mingw && ((selection == IMAGE_COMDAT_SELECT_ANY &&
735                             leaderSelection == IMAGE_COMDAT_SELECT_SAME_SIZE) ||
736                            (selection == IMAGE_COMDAT_SELECT_SAME_SIZE &&
737                             leaderSelection == IMAGE_COMDAT_SELECT_ANY))) {
738     leaderSelection = selection = IMAGE_COMDAT_SELECT_SAME_SIZE;
739   }
740 
741   // Other than that, comdat selections must match.  This is a bit more
742   // strict than link.exe which allows merging "any" and "largest" if "any"
743   // is the first symbol the linker sees, and it allows merging "largest"
744   // with everything (!) if "largest" is the first symbol the linker sees.
745   // Making this symmetric independent of which selection is seen first
746   // seems better though.
747   // (This behavior matches ModuleLinker::getComdatResult().)
748   if (selection != leaderSelection) {
749     Log(ctx) << "conflicting comdat type for " << symtab.printSymbol(leader)
750              << ": " << (int)leaderSelection << " in " << leader->getFile()
751              << " and " << (int)selection << " in " << this;
752     symtab.reportDuplicate(leader, this);
753     return;
754   }
755 
756   switch (selection) {
757   case IMAGE_COMDAT_SELECT_NODUPLICATES:
758     symtab.reportDuplicate(leader, this);
759     break;
760 
761   case IMAGE_COMDAT_SELECT_ANY:
762     // Nothing to do.
763     break;
764 
765   case IMAGE_COMDAT_SELECT_SAME_SIZE:
766     if (leaderChunk->getSize() != getSection(sym)->SizeOfRawData) {
767       if (!ctx.config.mingw) {
768         symtab.reportDuplicate(leader, this);
769       } else {
770         const coff_aux_section_definition *leaderDef = nullptr;
771         if (leaderChunk->file)
772           leaderDef = findSectionDef(leaderChunk->file->getCOFFObj(),
773                                      leaderChunk->getSectionNumber());
774         if (!leaderDef || leaderDef->Length != def->Length)
775           symtab.reportDuplicate(leader, this);
776       }
777     }
778     break;
779 
780   case IMAGE_COMDAT_SELECT_EXACT_MATCH: {
781     SectionChunk newChunk(this, getSection(sym));
782     // link.exe only compares section contents here and doesn't complain
783     // if the two comdat sections have e.g. different alignment.
784     // Match that.
785     if (leaderChunk->getContents() != newChunk.getContents())
786       symtab.reportDuplicate(leader, this, &newChunk, sym.getValue());
787     break;
788   }
789 
790   case IMAGE_COMDAT_SELECT_ASSOCIATIVE:
791     // createDefined() is never called for IMAGE_COMDAT_SELECT_ASSOCIATIVE.
792     // (This means lld-link doesn't produce duplicate symbol errors for
793     // associative comdats while link.exe does, but associate comdats
794     // are never extern in practice.)
795     llvm_unreachable("createDefined not called for associative comdats");
796 
797   case IMAGE_COMDAT_SELECT_LARGEST:
798     if (leaderChunk->getSize() < getSection(sym)->SizeOfRawData) {
799       // Replace the existing comdat symbol with the new one.
800       StringRef name = check(coffObj->getSymbolName(sym));
801       // FIXME: This is incorrect: With /opt:noref, the previous sections
802       // make it into the final executable as well. Correct handling would
803       // be to undo reading of the whole old section that's being replaced,
804       // or doing one pass that determines what the final largest comdat
805       // is for all IMAGE_COMDAT_SELECT_LARGEST comdats and then reading
806       // only the largest one.
807       replaceSymbol<DefinedRegular>(leader, this, name, /*IsCOMDAT*/ true,
808                                     /*IsExternal*/ true, sym.getGeneric(),
809                                     nullptr);
810       prevailing = true;
811     }
812     break;
813 
814   case IMAGE_COMDAT_SELECT_NEWEST:
815     llvm_unreachable("should have been rejected earlier");
816   }
817 }
818 
createDefined(COFFSymbolRef sym,std::vector<const coff_aux_section_definition * > & comdatDefs,bool & prevailing)819 std::optional<Symbol *> ObjFile::createDefined(
820     COFFSymbolRef sym,
821     std::vector<const coff_aux_section_definition *> &comdatDefs,
822     bool &prevailing) {
823   prevailing = false;
824   auto getName = [&]() { return check(coffObj->getSymbolName(sym)); };
825 
826   if (sym.isCommon()) {
827     auto *c = make<CommonChunk>(sym);
828     chunks.push_back(c);
829     return symtab.addCommon(this, getName(), sym.getValue(), sym.getGeneric(),
830                             c);
831   }
832 
833   COFFLinkerContext &ctx = symtab.ctx;
834   if (sym.isAbsolute()) {
835     StringRef name = getName();
836 
837     if (name == "@feat.00")
838       feat00Flags = sym.getValue();
839     // Skip special symbols.
840     if (ignoredSymbolName(name))
841       return nullptr;
842 
843     if (sym.isExternal())
844       return symtab.addAbsolute(name, sym);
845     return make<DefinedAbsolute>(ctx, name, sym);
846   }
847 
848   int32_t sectionNumber = sym.getSectionNumber();
849   if (sectionNumber == llvm::COFF::IMAGE_SYM_DEBUG)
850     return nullptr;
851 
852   if (sym.isEmptySectionDeclaration()) {
853     // As there is no coff_section in the object file for these, make a
854     // new virtual one, with everything zeroed out (i.e. an empty section),
855     // with only the name and characteristics set.
856     StringRef name = getName();
857     auto *hdr = make<coff_section>();
858     memset(hdr, 0, sizeof(*hdr));
859     strncpy(hdr->Name, name.data(),
860             std::min(name.size(), (size_t)COFF::NameSize));
861     // The Value field in a section symbol may contain the characteristics,
862     // or it may be zero, where we make something up (that matches what is
863     // used in .idata sections in the regular object files in import libraries).
864     if (sym.getValue())
865       hdr->Characteristics = sym.getValue() | IMAGE_SCN_ALIGN_4BYTES;
866     else
867       hdr->Characteristics = IMAGE_SCN_CNT_INITIALIZED_DATA |
868                              IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE |
869                              IMAGE_SCN_ALIGN_4BYTES;
870     auto *sc = make<SectionChunk>(this, hdr);
871     chunks.push_back(sc);
872 
873     auto *symGen = cloneSymbol(sym);
874     // Ignore the Value offset of these symbols, as it may be a bitmask.
875     symGen->Value = 0;
876     return make<DefinedRegular>(this, /*name=*/"", /*isCOMDAT=*/false,
877                                 /*isExternal=*/false, symGen, sc);
878   }
879 
880   if (llvm::COFF::isReservedSectionNumber(sectionNumber))
881     Fatal(ctx) << toString(this) << ": " << getName()
882                << " should not refer to special section "
883                << Twine(sectionNumber);
884 
885   if ((uint32_t)sectionNumber >= sparseChunks.size())
886     Fatal(ctx) << toString(this) << ": " << getName()
887                << " should not refer to non-existent section "
888                << Twine(sectionNumber);
889 
890   // Comdat handling.
891   // A comdat symbol consists of two symbol table entries.
892   // The first symbol entry has the name of the section (e.g. .text), fixed
893   // values for the other fields, and one auxiliary record.
894   // The second symbol entry has the name of the comdat symbol, called the
895   // "comdat leader".
896   // When this function is called for the first symbol entry of a comdat,
897   // it sets comdatDefs and returns std::nullopt, and when it's called for the
898   // second symbol entry it reads comdatDefs and then sets it back to nullptr.
899 
900   // Handle comdat leader.
901   if (const coff_aux_section_definition *def = comdatDefs[sectionNumber]) {
902     comdatDefs[sectionNumber] = nullptr;
903     DefinedRegular *leader;
904 
905     if (sym.isExternal()) {
906       std::tie(leader, prevailing) =
907           symtab.addComdat(this, getName(), sym.getGeneric());
908     } else {
909       leader = make<DefinedRegular>(this, /*Name*/ "", /*IsCOMDAT*/ false,
910                                     /*IsExternal*/ false, sym.getGeneric());
911       prevailing = true;
912     }
913 
914     if (def->Selection < (int)IMAGE_COMDAT_SELECT_NODUPLICATES ||
915         // Intentionally ends at IMAGE_COMDAT_SELECT_LARGEST: link.exe
916         // doesn't understand IMAGE_COMDAT_SELECT_NEWEST either.
917         def->Selection > (int)IMAGE_COMDAT_SELECT_LARGEST) {
918       Fatal(ctx) << "unknown comdat type "
919                  << std::to_string((int)def->Selection) << " for " << getName()
920                  << " in " << toString(this);
921     }
922     COMDATType selection = (COMDATType)def->Selection;
923 
924     if (leader->isCOMDAT)
925       handleComdatSelection(sym, selection, prevailing, leader, def);
926 
927     if (prevailing) {
928       SectionChunk *c = readSection(sectionNumber, def, getName());
929       sparseChunks[sectionNumber] = c;
930       if (!c)
931         return nullptr;
932       c->sym = cast<DefinedRegular>(leader);
933       c->selection = selection;
934       cast<DefinedRegular>(leader)->data = &c->repl;
935     } else {
936       sparseChunks[sectionNumber] = nullptr;
937     }
938     return leader;
939   }
940 
941   // Prepare to handle the comdat leader symbol by setting the section's
942   // ComdatDefs pointer if we encounter a non-associative comdat.
943   if (sparseChunks[sectionNumber] == pendingComdat) {
944     if (const coff_aux_section_definition *def = sym.getSectionDefinition()) {
945       if (def->Selection != IMAGE_COMDAT_SELECT_ASSOCIATIVE)
946         comdatDefs[sectionNumber] = def;
947     }
948     return std::nullopt;
949   }
950 
951   return createRegular(sym);
952 }
953 
getMachineType() const954 MachineTypes ObjFile::getMachineType() const {
955   return static_cast<MachineTypes>(coffObj->getMachine());
956 }
957 
getDebugSection(StringRef secName)958 ArrayRef<uint8_t> ObjFile::getDebugSection(StringRef secName) {
959   if (SectionChunk *sec = SectionChunk::findByName(debugChunks, secName))
960     return sec->consumeDebugMagic();
961   return {};
962 }
963 
964 // OBJ files systematically store critical information in a .debug$S stream,
965 // even if the TU was compiled with no debug info. At least two records are
966 // always there. S_OBJNAME stores a 32-bit signature, which is loaded into the
967 // PCHSignature member. S_COMPILE3 stores compile-time cmd-line flags. This is
968 // currently used to initialize the hotPatchable member.
initializeFlags()969 void ObjFile::initializeFlags() {
970   ArrayRef<uint8_t> data = getDebugSection(".debug$S");
971   if (data.empty())
972     return;
973 
974   DebugSubsectionArray subsections;
975 
976   BinaryStreamReader reader(data, llvm::endianness::little);
977   ExitOnError exitOnErr;
978   exitOnErr(reader.readArray(subsections, data.size()));
979 
980   for (const DebugSubsectionRecord &ss : subsections) {
981     if (ss.kind() != DebugSubsectionKind::Symbols)
982       continue;
983 
984     unsigned offset = 0;
985 
986     // Only parse the first two records. We are only looking for S_OBJNAME
987     // and S_COMPILE3, and they usually appear at the beginning of the
988     // stream.
989     for (unsigned i = 0; i < 2; ++i) {
990       Expected<CVSymbol> sym = readSymbolFromStream(ss.getRecordData(), offset);
991       if (!sym) {
992         consumeError(sym.takeError());
993         return;
994       }
995       if (sym->kind() == SymbolKind::S_COMPILE3) {
996         auto cs =
997             cantFail(SymbolDeserializer::deserializeAs<Compile3Sym>(sym.get()));
998         hotPatchable =
999             (cs.Flags & CompileSym3Flags::HotPatch) != CompileSym3Flags::None;
1000       }
1001       if (sym->kind() == SymbolKind::S_OBJNAME) {
1002         auto objName = cantFail(SymbolDeserializer::deserializeAs<ObjNameSym>(
1003             sym.get()));
1004         if (objName.Signature)
1005           pchSignature = objName.Signature;
1006       }
1007       offset += sym->length();
1008     }
1009   }
1010 }
1011 
1012 // Depending on the compilation flags, OBJs can refer to external files,
1013 // necessary to merge this OBJ into the final PDB. We currently support two
1014 // types of external files: Precomp/PCH OBJs, when compiling with /Yc and /Yu.
1015 // And PDB type servers, when compiling with /Zi. This function extracts these
1016 // dependencies and makes them available as a TpiSource interface (see
1017 // DebugTypes.h). Both cases only happen with cl.exe: clang-cl produces regular
1018 // output even with /Yc and /Yu and with /Zi.
initializeDependencies()1019 void ObjFile::initializeDependencies() {
1020   COFFLinkerContext &ctx = symtab.ctx;
1021   if (!ctx.config.debug)
1022     return;
1023 
1024   bool isPCH = false;
1025 
1026   ArrayRef<uint8_t> data = getDebugSection(".debug$P");
1027   if (!data.empty())
1028     isPCH = true;
1029   else
1030     data = getDebugSection(".debug$T");
1031 
1032   // symbols but no types, make a plain, empty TpiSource anyway, because it
1033   // simplifies adding the symbols later.
1034   if (data.empty()) {
1035     if (!debugChunks.empty())
1036       debugTypesObj = makeTpiSource(ctx, this);
1037     return;
1038   }
1039 
1040   // Get the first type record. It will indicate if this object uses a type
1041   // server (/Zi) or a PCH file (/Yu).
1042   CVTypeArray types;
1043   BinaryStreamReader reader(data, llvm::endianness::little);
1044   cantFail(reader.readArray(types, reader.getLength()));
1045   CVTypeArray::Iterator firstType = types.begin();
1046   if (firstType == types.end())
1047     return;
1048 
1049   // Remember the .debug$T or .debug$P section.
1050   debugTypes = data;
1051 
1052   // This object file is a PCH file that others will depend on.
1053   if (isPCH) {
1054     debugTypesObj = makePrecompSource(ctx, this);
1055     return;
1056   }
1057 
1058   // This object file was compiled with /Zi. Enqueue the PDB dependency.
1059   if (firstType->kind() == LF_TYPESERVER2) {
1060     TypeServer2Record ts = cantFail(
1061         TypeDeserializer::deserializeAs<TypeServer2Record>(firstType->data()));
1062     debugTypesObj = makeUseTypeServerSource(ctx, this, ts);
1063     enqueuePdbFile(ts.getName(), this);
1064     return;
1065   }
1066 
1067   // This object was compiled with /Yu. It uses types from another object file
1068   // with a matching signature.
1069   if (firstType->kind() == LF_PRECOMP) {
1070     PrecompRecord precomp = cantFail(
1071         TypeDeserializer::deserializeAs<PrecompRecord>(firstType->data()));
1072     // We're better off trusting the LF_PRECOMP signature. In some cases the
1073     // S_OBJNAME record doesn't contain a valid PCH signature.
1074     if (precomp.Signature)
1075       pchSignature = precomp.Signature;
1076     debugTypesObj = makeUsePrecompSource(ctx, this, precomp);
1077     // Drop the LF_PRECOMP record from the input stream.
1078     debugTypes = debugTypes.drop_front(firstType->RecordData.size());
1079     return;
1080   }
1081 
1082   // This is a plain old object file.
1083   debugTypesObj = makeTpiSource(ctx, this);
1084 }
1085 
1086 // The casing of the PDB path stamped in the OBJ can differ from the actual path
1087 // on disk. With this, we ensure to always use lowercase as a key for the
1088 // pdbInputFileInstances map, at least on Windows.
normalizePdbPath(StringRef path)1089 static std::string normalizePdbPath(StringRef path) {
1090 #if defined(_WIN32)
1091   return path.lower();
1092 #else // LINUX
1093   return std::string(path);
1094 #endif
1095 }
1096 
1097 // If existing, return the actual PDB path on disk.
1098 static std::optional<std::string>
findPdbPath(StringRef pdbPath,ObjFile * dependentFile,StringRef outputPath)1099 findPdbPath(StringRef pdbPath, ObjFile *dependentFile, StringRef outputPath) {
1100   // Ensure the file exists before anything else. In some cases, if the path
1101   // points to a removable device, Driver::enqueuePath() would fail with an
1102   // error (EAGAIN, "resource unavailable try again") which we want to skip
1103   // silently.
1104   if (llvm::sys::fs::exists(pdbPath))
1105     return normalizePdbPath(pdbPath);
1106 
1107   StringRef objPath = !dependentFile->parentName.empty()
1108                           ? dependentFile->parentName
1109                           : dependentFile->getName();
1110 
1111   // Currently, type server PDBs are only created by MSVC cl, which only runs
1112   // on Windows, so we can assume type server paths are Windows style.
1113   StringRef pdbName = sys::path::filename(pdbPath, sys::path::Style::windows);
1114 
1115   // Check if the PDB is in the same folder as the OBJ.
1116   SmallString<128> path;
1117   sys::path::append(path, sys::path::parent_path(objPath), pdbName);
1118   if (llvm::sys::fs::exists(path))
1119     return normalizePdbPath(path);
1120 
1121   // Check if the PDB is in the output folder.
1122   path.clear();
1123   sys::path::append(path, sys::path::parent_path(outputPath), pdbName);
1124   if (llvm::sys::fs::exists(path))
1125     return normalizePdbPath(path);
1126 
1127   return std::nullopt;
1128 }
1129 
PDBInputFile(COFFLinkerContext & ctx,MemoryBufferRef m)1130 PDBInputFile::PDBInputFile(COFFLinkerContext &ctx, MemoryBufferRef m)
1131     : InputFile(ctx.symtab, PDBKind, m) {}
1132 
1133 PDBInputFile::~PDBInputFile() = default;
1134 
findFromRecordPath(const COFFLinkerContext & ctx,StringRef path,ObjFile * fromFile)1135 PDBInputFile *PDBInputFile::findFromRecordPath(const COFFLinkerContext &ctx,
1136                                                StringRef path,
1137                                                ObjFile *fromFile) {
1138   auto p = findPdbPath(path.str(), fromFile, ctx.config.outputFile);
1139   if (!p)
1140     return nullptr;
1141   auto it = ctx.pdbInputFileInstances.find(*p);
1142   if (it != ctx.pdbInputFileInstances.end())
1143     return it->second;
1144   return nullptr;
1145 }
1146 
parse()1147 void PDBInputFile::parse() {
1148   symtab.ctx.pdbInputFileInstances[mb.getBufferIdentifier().str()] = this;
1149 
1150   std::unique_ptr<pdb::IPDBSession> thisSession;
1151   Error E = pdb::NativeSession::createFromPdb(
1152       MemoryBuffer::getMemBuffer(mb, false), thisSession);
1153   if (E) {
1154     loadErrorStr.emplace(toString(std::move(E)));
1155     return; // fail silently at this point - the error will be handled later,
1156             // when merging the debug type stream
1157   }
1158 
1159   session.reset(static_cast<pdb::NativeSession *>(thisSession.release()));
1160 
1161   pdb::PDBFile &pdbFile = session->getPDBFile();
1162   auto expectedInfo = pdbFile.getPDBInfoStream();
1163   // All PDB Files should have an Info stream.
1164   if (!expectedInfo) {
1165     loadErrorStr.emplace(toString(expectedInfo.takeError()));
1166     return;
1167   }
1168   debugTypesObj = makeTypeServerSource(symtab.ctx, this);
1169 }
1170 
1171 // Used only for DWARF debug info, which is not common (except in MinGW
1172 // environments). This returns an optional pair of file name and line
1173 // number for where the variable was defined.
1174 std::optional<std::pair<StringRef, uint32_t>>
getVariableLocation(StringRef var)1175 ObjFile::getVariableLocation(StringRef var) {
1176   if (!dwarf) {
1177     dwarf = make<DWARFCache>(DWARFContext::create(*getCOFFObj()));
1178     if (!dwarf)
1179       return std::nullopt;
1180   }
1181   if (symtab.machine == I386)
1182     var.consume_front("_");
1183   std::optional<std::pair<std::string, unsigned>> ret =
1184       dwarf->getVariableLoc(var);
1185   if (!ret)
1186     return std::nullopt;
1187   return std::make_pair(saver().save(ret->first), ret->second);
1188 }
1189 
1190 // Used only for DWARF debug info, which is not common (except in MinGW
1191 // environments).
getDILineInfo(uint32_t offset,uint32_t sectionIndex)1192 std::optional<DILineInfo> ObjFile::getDILineInfo(uint32_t offset,
1193                                                  uint32_t sectionIndex) {
1194   if (!dwarf) {
1195     dwarf = make<DWARFCache>(DWARFContext::create(*getCOFFObj()));
1196     if (!dwarf)
1197       return std::nullopt;
1198   }
1199 
1200   return dwarf->getDILineInfo(offset, sectionIndex);
1201 }
1202 
enqueuePdbFile(StringRef path,ObjFile * fromFile)1203 void ObjFile::enqueuePdbFile(StringRef path, ObjFile *fromFile) {
1204   auto p = findPdbPath(path.str(), fromFile, symtab.ctx.config.outputFile);
1205   if (!p)
1206     return;
1207   auto it = symtab.ctx.pdbInputFileInstances.emplace(*p, nullptr);
1208   if (!it.second)
1209     return; // already scheduled for load
1210   symtab.ctx.driver.enqueuePDB(*p);
1211 }
1212 
ImportFile(COFFLinkerContext & ctx,MemoryBufferRef m)1213 ImportFile::ImportFile(COFFLinkerContext &ctx, MemoryBufferRef m)
1214     : InputFile(ctx.getSymtab(getMachineType(m)), ImportKind, m),
1215       live(!ctx.config.doGC) {}
1216 
getMachineType(MemoryBufferRef m)1217 MachineTypes ImportFile::getMachineType(MemoryBufferRef m) {
1218   uint16_t machine =
1219       reinterpret_cast<const coff_import_header *>(m.getBufferStart())->Machine;
1220   return MachineTypes(machine);
1221 }
1222 
isSameImport(const ImportFile * other) const1223 bool ImportFile::isSameImport(const ImportFile *other) const {
1224   if (!externalName.empty())
1225     return other->externalName == externalName;
1226   return hdr->OrdinalHint == other->hdr->OrdinalHint;
1227 }
1228 
makeImportThunk()1229 ImportThunkChunk *ImportFile::makeImportThunk() {
1230   switch (hdr->Machine) {
1231   case AMD64:
1232     return make<ImportThunkChunkX64>(symtab.ctx, impSym);
1233   case I386:
1234     return make<ImportThunkChunkX86>(symtab.ctx, impSym);
1235   case ARM64:
1236     return make<ImportThunkChunkARM64>(symtab.ctx, impSym, ARM64);
1237   case ARMNT:
1238     return make<ImportThunkChunkARM>(symtab.ctx, impSym);
1239   }
1240   llvm_unreachable("unknown machine type");
1241 }
1242 
parse()1243 void ImportFile::parse() {
1244   const auto *hdr =
1245       reinterpret_cast<const coff_import_header *>(mb.getBufferStart());
1246 
1247   // Check if the total size is valid.
1248   if (mb.getBufferSize() < sizeof(*hdr) ||
1249       mb.getBufferSize() != sizeof(*hdr) + hdr->SizeOfData)
1250     Fatal(symtab.ctx) << "broken import library";
1251 
1252   // Read names and create an __imp_ symbol.
1253   StringRef buf = mb.getBuffer().substr(sizeof(*hdr));
1254   auto split = buf.split('\0');
1255   buf = split.second;
1256   StringRef name;
1257   if (isArm64EC(hdr->Machine)) {
1258     if (std::optional<std::string> demangledName =
1259             getArm64ECDemangledFunctionName(split.first))
1260       name = saver().save(*demangledName);
1261   }
1262   if (name.empty())
1263     name = saver().save(split.first);
1264   StringRef impName = saver().save("__imp_" + name);
1265   dllName = buf.split('\0').first;
1266   StringRef extName;
1267   switch (hdr->getNameType()) {
1268   case IMPORT_ORDINAL:
1269     extName = "";
1270     break;
1271   case IMPORT_NAME:
1272     extName = name;
1273     break;
1274   case IMPORT_NAME_NOPREFIX:
1275     extName = ltrim1(name, "?@_");
1276     break;
1277   case IMPORT_NAME_UNDECORATE:
1278     extName = ltrim1(name, "?@_");
1279     extName = extName.substr(0, extName.find('@'));
1280     break;
1281   case IMPORT_NAME_EXPORTAS:
1282     extName = buf.substr(dllName.size() + 1).split('\0').first;
1283     break;
1284   }
1285 
1286   this->hdr = hdr;
1287   externalName = extName;
1288 
1289   bool isCode = hdr->getType() == llvm::COFF::IMPORT_CODE;
1290 
1291   if (!symtab.isEC()) {
1292     impSym = symtab.addImportData(impName, this, location);
1293   } else {
1294     // In addition to the regular IAT, ARM64EC also contains an auxiliary IAT,
1295     // which holds addresses that are guaranteed to be callable directly from
1296     // ARM64 code. Function symbol naming is swapped: __imp_ symbols refer to
1297     // the auxiliary IAT, while __imp_aux_ symbols refer to the regular IAT. For
1298     // data imports, the naming is reversed.
1299     StringRef auxImpName = saver().save("__imp_aux_" + name);
1300     if (isCode) {
1301       impSym = symtab.addImportData(auxImpName, this, location);
1302       impECSym = symtab.addImportData(impName, this, auxLocation);
1303     } else {
1304       impSym = symtab.addImportData(impName, this, location);
1305       impECSym = symtab.addImportData(auxImpName, this, auxLocation);
1306     }
1307     if (!impECSym)
1308       return;
1309 
1310     StringRef auxImpCopyName = saver().save("__auximpcopy_" + name);
1311     auxImpCopySym = symtab.addImportData(auxImpCopyName, this, auxCopyLocation);
1312     if (!auxImpCopySym)
1313       return;
1314   }
1315   // If this was a duplicate, we logged an error but may continue;
1316   // in this case, impSym is nullptr.
1317   if (!impSym)
1318     return;
1319 
1320   if (hdr->getType() == llvm::COFF::IMPORT_CONST)
1321     static_cast<void>(symtab.addImportData(name, this, location));
1322 
1323   // If type is function, we need to create a thunk which jump to an
1324   // address pointed by the __imp_ symbol. (This allows you to call
1325   // DLL functions just like regular non-DLL functions.)
1326   if (isCode) {
1327     if (!symtab.isEC()) {
1328       thunkSym = symtab.addImportThunk(name, impSym, makeImportThunk());
1329     } else {
1330       thunkSym = symtab.addImportThunk(
1331           name, impSym, make<ImportThunkChunkX64>(symtab.ctx, impSym));
1332 
1333       if (std::optional<std::string> mangledName =
1334               getArm64ECMangledFunctionName(name)) {
1335         StringRef auxThunkName = saver().save(*mangledName);
1336         auxThunkSym = symtab.addImportThunk(
1337             auxThunkName, impECSym,
1338             make<ImportThunkChunkARM64>(symtab.ctx, impECSym, ARM64EC));
1339       }
1340 
1341       StringRef impChkName = saver().save("__impchk_" + name);
1342       impchkThunk = make<ImportThunkChunkARM64EC>(this);
1343       impchkThunk->sym = symtab.addImportThunk(impChkName, impSym, impchkThunk);
1344       symtab.ctx.driver.pullArm64ECIcallHelper();
1345     }
1346   }
1347 }
1348 
BitcodeFile(SymbolTable & symtab,MemoryBufferRef mb,std::unique_ptr<lto::InputFile> & o,bool lazy)1349 BitcodeFile::BitcodeFile(SymbolTable &symtab, MemoryBufferRef mb,
1350                          std::unique_ptr<lto::InputFile> &o, bool lazy)
1351     : InputFile(symtab, BitcodeKind, mb, lazy) {
1352   obj.swap(o);
1353 }
1354 
create(COFFLinkerContext & ctx,MemoryBufferRef mb,StringRef archiveName,uint64_t offsetInArchive,bool lazy)1355 BitcodeFile *BitcodeFile::create(COFFLinkerContext &ctx, MemoryBufferRef mb,
1356                                  StringRef archiveName,
1357                                  uint64_t offsetInArchive, bool lazy) {
1358   std::string path = mb.getBufferIdentifier().str();
1359   if (ctx.config.thinLTOIndexOnly)
1360     path = replaceThinLTOSuffix(mb.getBufferIdentifier(),
1361                                 ctx.config.thinLTOObjectSuffixReplace.first,
1362                                 ctx.config.thinLTOObjectSuffixReplace.second);
1363 
1364   // ThinLTO assumes that all MemoryBufferRefs given to it have a unique
1365   // name. If two archives define two members with the same name, this
1366   // causes a collision which result in only one of the objects being taken
1367   // into consideration at LTO time (which very likely causes undefined
1368   // symbols later in the link stage). So we append file offset to make
1369   // filename unique.
1370   MemoryBufferRef mbref(mb.getBuffer(),
1371                         saver().save(archiveName.empty()
1372                                          ? path
1373                                          : archiveName +
1374                                                sys::path::filename(path) +
1375                                                utostr(offsetInArchive)));
1376 
1377   std::unique_ptr<lto::InputFile> obj = check(lto::InputFile::create(mbref));
1378   return make<BitcodeFile>(ctx.getSymtab(getMachineType(obj.get())), mb, obj,
1379                            lazy);
1380 }
1381 
1382 BitcodeFile::~BitcodeFile() = default;
1383 
parse()1384 void BitcodeFile::parse() {
1385   llvm::StringSaver &saver = lld::saver();
1386 
1387   std::vector<std::pair<Symbol *, bool>> comdat(obj->getComdatTable().size());
1388   for (size_t i = 0; i != obj->getComdatTable().size(); ++i)
1389     // FIXME: Check nodeduplicate
1390     comdat[i] =
1391         symtab.addComdat(this, saver.save(obj->getComdatTable()[i].first));
1392   for (const lto::InputFile::Symbol &objSym : obj->symbols()) {
1393     StringRef symName = saver.save(objSym.getName());
1394     int comdatIndex = objSym.getComdatIndex();
1395     Symbol *sym;
1396     SectionChunk *fakeSC = nullptr;
1397     if (objSym.isExecutable())
1398       fakeSC = &symtab.ctx.ltoTextSectionChunk.chunk;
1399     else
1400       fakeSC = &symtab.ctx.ltoDataSectionChunk.chunk;
1401     if (objSym.isUndefined()) {
1402       sym = symtab.addUndefined(symName, this, false);
1403       if (objSym.isWeak())
1404         sym->deferUndefined = true;
1405       // If one LTO object file references (i.e. has an undefined reference to)
1406       // a symbol with an __imp_ prefix, the LTO compilation itself sees it
1407       // as unprefixed but with a dllimport attribute instead, and doesn't
1408       // understand the relation to a concrete IR symbol with the __imp_ prefix.
1409       //
1410       // For such cases, mark the symbol as used in a regular object (i.e. the
1411       // symbol must be retained) so that the linker can associate the
1412       // references in the end. If the symbol is defined in an import library
1413       // or in a regular object file, this has no effect, but if it is defined
1414       // in another LTO object file, this makes sure it is kept, to fulfill
1415       // the reference when linking the output of the LTO compilation.
1416       if (symName.starts_with("__imp_"))
1417         sym->isUsedInRegularObj = true;
1418     } else if (objSym.isCommon()) {
1419       sym = symtab.addCommon(this, symName, objSym.getCommonSize());
1420     } else if (objSym.isWeak() && objSym.isIndirect()) {
1421       // Weak external.
1422       sym = symtab.addUndefined(symName, this, true);
1423       std::string fallback = std::string(objSym.getCOFFWeakExternalFallback());
1424       Symbol *alias = symtab.addUndefined(saver.save(fallback));
1425       checkAndSetWeakAlias(symtab, this, sym, alias, false);
1426     } else if (comdatIndex != -1) {
1427       if (symName == obj->getComdatTable()[comdatIndex].first) {
1428         sym = comdat[comdatIndex].first;
1429         if (cast<DefinedRegular>(sym)->data == nullptr)
1430           cast<DefinedRegular>(sym)->data = &fakeSC->repl;
1431       } else if (comdat[comdatIndex].second) {
1432         sym = symtab.addRegular(this, symName, nullptr, fakeSC);
1433       } else {
1434         sym = symtab.addUndefined(symName, this, false);
1435       }
1436     } else {
1437       sym =
1438           symtab.addRegular(this, symName, nullptr, fakeSC, 0, objSym.isWeak());
1439     }
1440     symbols.push_back(sym);
1441     if (objSym.isUsed())
1442       symtab.ctx.config.gcroot.push_back(sym);
1443   }
1444   directives = saver.save(obj->getCOFFLinkerOpts());
1445 }
1446 
parseLazy()1447 void BitcodeFile::parseLazy() {
1448   for (const lto::InputFile::Symbol &sym : obj->symbols())
1449     if (!sym.isUndefined()) {
1450       symtab.addLazyObject(this, sym.getName());
1451       if (!lazy)
1452         return;
1453     }
1454 }
1455 
getMachineType(const llvm::lto::InputFile * obj)1456 MachineTypes BitcodeFile::getMachineType(const llvm::lto::InputFile *obj) {
1457   Triple t(obj->getTargetTriple());
1458   switch (t.getArch()) {
1459   case Triple::x86_64:
1460     return AMD64;
1461   case Triple::x86:
1462     return I386;
1463   case Triple::arm:
1464   case Triple::thumb:
1465     return ARMNT;
1466   case Triple::aarch64:
1467     return t.isWindowsArm64EC() ? ARM64EC : ARM64;
1468   default:
1469     return IMAGE_FILE_MACHINE_UNKNOWN;
1470   }
1471 }
1472 
replaceThinLTOSuffix(StringRef path,StringRef suffix,StringRef repl)1473 std::string lld::coff::replaceThinLTOSuffix(StringRef path, StringRef suffix,
1474                                             StringRef repl) {
1475   if (path.consume_back(suffix))
1476     return (path + repl).str();
1477   return std::string(path);
1478 }
1479 
isRVACode(COFFObjectFile * coffObj,uint64_t rva,InputFile * file)1480 static bool isRVACode(COFFObjectFile *coffObj, uint64_t rva, InputFile *file) {
1481   for (size_t i = 1, e = coffObj->getNumberOfSections(); i <= e; i++) {
1482     const coff_section *sec = CHECK(coffObj->getSection(i), file);
1483     if (rva >= sec->VirtualAddress &&
1484         rva <= sec->VirtualAddress + sec->VirtualSize) {
1485       return (sec->Characteristics & COFF::IMAGE_SCN_CNT_CODE) != 0;
1486     }
1487   }
1488   return false;
1489 }
1490 
parse()1491 void DLLFile::parse() {
1492   // Parse a memory buffer as a PE-COFF executable.
1493   std::unique_ptr<Binary> bin = CHECK(createBinary(mb), this);
1494 
1495   if (auto *obj = dyn_cast<COFFObjectFile>(bin.get())) {
1496     bin.release();
1497     coffObj.reset(obj);
1498   } else {
1499     Err(symtab.ctx) << toString(this) << " is not a COFF file";
1500     return;
1501   }
1502 
1503   if (!coffObj->getPE32Header() && !coffObj->getPE32PlusHeader()) {
1504     Err(symtab.ctx) << toString(this) << " is not a PE-COFF executable";
1505     return;
1506   }
1507 
1508   for (const auto &exp : coffObj->export_directories()) {
1509     StringRef dllName, symbolName;
1510     uint32_t exportRVA;
1511     checkError(exp.getDllName(dllName));
1512     checkError(exp.getSymbolName(symbolName));
1513     checkError(exp.getExportRVA(exportRVA));
1514 
1515     if (symbolName.empty())
1516       continue;
1517 
1518     bool code = isRVACode(coffObj.get(), exportRVA, this);
1519 
1520     Symbol *s = make<Symbol>();
1521     s->dllName = dllName;
1522     s->symbolName = symbolName;
1523     s->importType = code ? ImportType::IMPORT_CODE : ImportType::IMPORT_DATA;
1524     s->nameType = ImportNameType::IMPORT_NAME;
1525 
1526     if (coffObj->getMachine() == I386) {
1527       s->symbolName = symbolName = saver().save("_" + symbolName);
1528       s->nameType = ImportNameType::IMPORT_NAME_NOPREFIX;
1529     }
1530 
1531     StringRef impName = saver().save("__imp_" + symbolName);
1532     symtab.addLazyDLLSymbol(this, s, impName);
1533     if (code)
1534       symtab.addLazyDLLSymbol(this, s, symbolName);
1535     if (symtab.isEC()) {
1536       StringRef impAuxName = saver().save("__imp_aux_" + symbolName);
1537       symtab.addLazyDLLSymbol(this, s, impAuxName);
1538 
1539       if (code) {
1540         std::optional<std::string> mangledName =
1541             getArm64ECMangledFunctionName(symbolName);
1542         if (mangledName)
1543           symtab.addLazyDLLSymbol(this, s, *mangledName);
1544       }
1545     }
1546   }
1547 }
1548 
getMachineType() const1549 MachineTypes DLLFile::getMachineType() const {
1550   if (coffObj)
1551     return static_cast<MachineTypes>(coffObj->getMachine());
1552   return IMAGE_FILE_MACHINE_UNKNOWN;
1553 }
1554 
makeImport(DLLFile::Symbol * s)1555 void DLLFile::makeImport(DLLFile::Symbol *s) {
1556   if (!seen.insert(s->symbolName).second)
1557     return;
1558 
1559   size_t impSize = s->dllName.size() + s->symbolName.size() + 2; // +2 for NULs
1560   size_t size = sizeof(coff_import_header) + impSize;
1561   char *buf = bAlloc().Allocate<char>(size);
1562   memset(buf, 0, size);
1563   char *p = buf;
1564   auto *imp = reinterpret_cast<coff_import_header *>(p);
1565   p += sizeof(*imp);
1566   imp->Sig2 = 0xFFFF;
1567   imp->Machine = coffObj->getMachine();
1568   imp->SizeOfData = impSize;
1569   imp->OrdinalHint = 0; // Only linking by name
1570   imp->TypeInfo = (s->nameType << 2) | s->importType;
1571 
1572   // Write symbol name and DLL name.
1573   memcpy(p, s->symbolName.data(), s->symbolName.size());
1574   p += s->symbolName.size() + 1;
1575   memcpy(p, s->dllName.data(), s->dllName.size());
1576   MemoryBufferRef mbref = MemoryBufferRef(StringRef(buf, size), s->dllName);
1577   ImportFile *impFile = make<ImportFile>(symtab.ctx, mbref);
1578   symtab.ctx.driver.addFile(impFile);
1579 }
1580