xref: /freebsd/contrib/llvm-project/lld/COFF/SymbolTable.cpp (revision e64bea71c21eb42e97aa615188ba91f6cce0d36d)
1 //===- SymbolTable.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SymbolTable.h"
10 #include "COFFLinkerContext.h"
11 #include "Config.h"
12 #include "Driver.h"
13 #include "LTO.h"
14 #include "PDB.h"
15 #include "Symbols.h"
16 #include "lld/Common/ErrorHandler.h"
17 #include "lld/Common/Memory.h"
18 #include "lld/Common/Timer.h"
19 #include "llvm/DebugInfo/DIContext.h"
20 #include "llvm/IR/LLVMContext.h"
21 #include "llvm/IR/Mangler.h"
22 #include "llvm/LTO/LTO.h"
23 #include "llvm/Object/COFFModuleDefinition.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/GlobPattern.h"
26 #include "llvm/Support/Parallel.h"
27 #include "llvm/Support/TimeProfiler.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <utility>
30 
31 using namespace llvm;
32 using namespace llvm::COFF;
33 using namespace llvm::object;
34 using namespace llvm::support;
35 
36 namespace lld::coff {
37 
ltrim1(StringRef s,const char * chars)38 StringRef ltrim1(StringRef s, const char *chars) {
39   if (!s.empty() && strchr(chars, s[0]))
40     return s.substr(1);
41   return s;
42 }
43 
errorOrWarn(COFFLinkerContext & ctx)44 static COFFSyncStream errorOrWarn(COFFLinkerContext &ctx) {
45   return {ctx, ctx.config.forceUnresolved ? DiagLevel::Warn : DiagLevel::Err};
46 }
47 
48 // Causes the file associated with a lazy symbol to be linked in.
forceLazy(Symbol * s)49 static void forceLazy(Symbol *s) {
50   s->pendingArchiveLoad = true;
51   switch (s->kind()) {
52   case Symbol::Kind::LazyArchiveKind: {
53     auto *l = cast<LazyArchive>(s);
54     l->file->addMember(l->sym);
55     break;
56   }
57   case Symbol::Kind::LazyObjectKind: {
58     InputFile *file = cast<LazyObject>(s)->file;
59     // FIXME: Remove this once we resolve all defineds before all undefineds in
60     //        ObjFile::initializeSymbols().
61     if (!file->lazy)
62       return;
63     file->lazy = false;
64     file->symtab.ctx.driver.addFile(file);
65     break;
66   }
67   case Symbol::Kind::LazyDLLSymbolKind: {
68     auto *l = cast<LazyDLLSymbol>(s);
69     l->file->makeImport(l->sym);
70     break;
71   }
72   default:
73     llvm_unreachable(
74         "symbol passed to forceLazy is not a LazyArchive or LazyObject");
75   }
76 }
77 
78 // Returns the symbol in SC whose value is <= Addr that is closest to Addr.
79 // This is generally the global variable or function whose definition contains
80 // Addr.
getSymbol(SectionChunk * sc,uint32_t addr)81 static Symbol *getSymbol(SectionChunk *sc, uint32_t addr) {
82   DefinedRegular *candidate = nullptr;
83 
84   for (Symbol *s : sc->file->getSymbols()) {
85     auto *d = dyn_cast_or_null<DefinedRegular>(s);
86     if (!d || !d->data || d->file != sc->file || d->getChunk() != sc ||
87         d->getValue() > addr ||
88         (candidate && d->getValue() < candidate->getValue()))
89       continue;
90 
91     candidate = d;
92   }
93 
94   return candidate;
95 }
96 
getSymbolLocations(BitcodeFile * file)97 static std::vector<std::string> getSymbolLocations(BitcodeFile *file) {
98   std::string res("\n>>> referenced by ");
99   StringRef source = file->obj->getSourceFileName();
100   if (!source.empty())
101     res += source.str() + "\n>>>               ";
102   res += toString(file);
103   return {res};
104 }
105 
106 static std::optional<std::pair<StringRef, uint32_t>>
getFileLineDwarf(const SectionChunk * c,uint32_t addr)107 getFileLineDwarf(const SectionChunk *c, uint32_t addr) {
108   std::optional<DILineInfo> optionalLineInfo =
109       c->file->getDILineInfo(addr, c->getSectionNumber() - 1);
110   if (!optionalLineInfo)
111     return std::nullopt;
112   const DILineInfo &lineInfo = *optionalLineInfo;
113   if (lineInfo.FileName == DILineInfo::BadString)
114     return std::nullopt;
115   return std::make_pair(saver().save(lineInfo.FileName), lineInfo.Line);
116 }
117 
118 static std::optional<std::pair<StringRef, uint32_t>>
getFileLine(const SectionChunk * c,uint32_t addr)119 getFileLine(const SectionChunk *c, uint32_t addr) {
120   // MinGW can optionally use codeview, even if the default is dwarf.
121   std::optional<std::pair<StringRef, uint32_t>> fileLine =
122       getFileLineCodeView(c, addr);
123   // If codeview didn't yield any result, check dwarf in MinGW mode.
124   if (!fileLine && c->file->symtab.ctx.config.mingw)
125     fileLine = getFileLineDwarf(c, addr);
126   return fileLine;
127 }
128 
129 // Given a file and the index of a symbol in that file, returns a description
130 // of all references to that symbol from that file. If no debug information is
131 // available, returns just the name of the file, else one string per actual
132 // reference as described in the debug info.
133 // Returns up to maxStrings string descriptions, along with the total number of
134 // locations found.
135 static std::pair<std::vector<std::string>, size_t>
getSymbolLocations(ObjFile * file,uint32_t symIndex,size_t maxStrings)136 getSymbolLocations(ObjFile *file, uint32_t symIndex, size_t maxStrings) {
137   struct Location {
138     Symbol *sym;
139     std::pair<StringRef, uint32_t> fileLine;
140   };
141   std::vector<Location> locations;
142   size_t numLocations = 0;
143 
144   for (Chunk *c : file->getChunks()) {
145     auto *sc = dyn_cast<SectionChunk>(c);
146     if (!sc)
147       continue;
148     for (const coff_relocation &r : sc->getRelocs()) {
149       if (r.SymbolTableIndex != symIndex)
150         continue;
151       numLocations++;
152       if (locations.size() >= maxStrings)
153         continue;
154 
155       std::optional<std::pair<StringRef, uint32_t>> fileLine =
156           getFileLine(sc, r.VirtualAddress);
157       Symbol *sym = getSymbol(sc, r.VirtualAddress);
158       if (fileLine)
159         locations.push_back({sym, *fileLine});
160       else if (sym)
161         locations.push_back({sym, {"", 0}});
162     }
163   }
164 
165   if (maxStrings == 0)
166     return std::make_pair(std::vector<std::string>(), numLocations);
167 
168   if (numLocations == 0)
169     return std::make_pair(
170         std::vector<std::string>{"\n>>> referenced by " + toString(file)}, 1);
171 
172   std::vector<std::string> symbolLocations(locations.size());
173   size_t i = 0;
174   for (Location loc : locations) {
175     llvm::raw_string_ostream os(symbolLocations[i++]);
176     os << "\n>>> referenced by ";
177     if (!loc.fileLine.first.empty())
178       os << loc.fileLine.first << ":" << loc.fileLine.second
179          << "\n>>>               ";
180     os << toString(file);
181     if (loc.sym)
182       os << ":(" << toString(file->symtab.ctx, *loc.sym) << ')';
183   }
184   return std::make_pair(symbolLocations, numLocations);
185 }
186 
getSymbolLocations(ObjFile * file,uint32_t symIndex)187 std::vector<std::string> getSymbolLocations(ObjFile *file, uint32_t symIndex) {
188   return getSymbolLocations(file, symIndex, SIZE_MAX).first;
189 }
190 
191 static std::pair<std::vector<std::string>, size_t>
getSymbolLocations(InputFile * file,uint32_t symIndex,size_t maxStrings)192 getSymbolLocations(InputFile *file, uint32_t symIndex, size_t maxStrings) {
193   if (auto *o = dyn_cast<ObjFile>(file))
194     return getSymbolLocations(o, symIndex, maxStrings);
195   if (auto *b = dyn_cast<BitcodeFile>(file)) {
196     std::vector<std::string> symbolLocations = getSymbolLocations(b);
197     size_t numLocations = symbolLocations.size();
198     if (symbolLocations.size() > maxStrings)
199       symbolLocations.resize(maxStrings);
200     return std::make_pair(symbolLocations, numLocations);
201   }
202   llvm_unreachable("unsupported file type passed to getSymbolLocations");
203   return std::make_pair(std::vector<std::string>(), (size_t)0);
204 }
205 
206 // For an undefined symbol, stores all files referencing it and the index of
207 // the undefined symbol in each file.
208 struct UndefinedDiag {
209   Symbol *sym;
210   struct File {
211     InputFile *file;
212     uint32_t symIndex;
213   };
214   std::vector<File> files;
215 };
216 
reportUndefinedSymbol(const UndefinedDiag & undefDiag)217 void SymbolTable::reportUndefinedSymbol(const UndefinedDiag &undefDiag) {
218   auto diag = errorOrWarn(ctx);
219   diag << "undefined symbol: " << printSymbol(undefDiag.sym);
220 
221   const size_t maxUndefReferences = 3;
222   size_t numDisplayedRefs = 0, numRefs = 0;
223   for (const UndefinedDiag::File &ref : undefDiag.files) {
224     auto [symbolLocations, totalLocations] = getSymbolLocations(
225         ref.file, ref.symIndex, maxUndefReferences - numDisplayedRefs);
226 
227     numRefs += totalLocations;
228     numDisplayedRefs += symbolLocations.size();
229     for (const std::string &s : symbolLocations)
230       diag << s;
231   }
232   if (numDisplayedRefs < numRefs)
233     diag << "\n>>> referenced " << numRefs - numDisplayedRefs << " more times";
234 
235   // Hints
236   StringRef name = undefDiag.sym->getName();
237   if (name.consume_front("__imp_")) {
238     Symbol *imp = find(name);
239     if (imp && imp->isLazy()) {
240       diag << "\nNOTE: a relevant symbol '" << imp->getName()
241            << "' is available in " << toString(imp->getFile())
242            << " but cannot be used because it is not an import library.";
243     }
244   }
245 }
246 
loadMinGWSymbols()247 void SymbolTable::loadMinGWSymbols() {
248   std::vector<Symbol *> undefs;
249   for (auto &i : symMap) {
250     Symbol *sym = i.second;
251     auto *undef = dyn_cast<Undefined>(sym);
252     if (!undef)
253       continue;
254     if (undef->getWeakAlias())
255       continue;
256     undefs.push_back(sym);
257   }
258 
259   for (auto sym : undefs) {
260     auto *undef = dyn_cast<Undefined>(sym);
261     if (!undef)
262       continue;
263     if (undef->getWeakAlias())
264       continue;
265     StringRef name = undef->getName();
266 
267     if (machine == I386 && ctx.config.stdcallFixup) {
268       // Check if we can resolve an undefined decorated symbol by finding
269       // the intended target as an undecorated symbol (only with a leading
270       // underscore).
271       StringRef origName = name;
272       StringRef baseName = name;
273       // Trim down stdcall/fastcall/vectorcall symbols to the base name.
274       baseName = ltrim1(baseName, "_@");
275       baseName = baseName.substr(0, baseName.find('@'));
276       // Add a leading underscore, as it would be in cdecl form.
277       std::string newName = ("_" + baseName).str();
278       Symbol *l;
279       if (newName != origName && (l = find(newName)) != nullptr) {
280         // If we found a symbol and it is lazy; load it.
281         if (l->isLazy() && !l->pendingArchiveLoad) {
282           Log(ctx) << "Loading lazy " << l->getName() << " from "
283                    << l->getFile()->getName() << " for stdcall fixup";
284           forceLazy(l);
285         }
286         // If it's lazy or already defined, hook it up as weak alias.
287         if (l->isLazy() || isa<Defined>(l)) {
288           if (ctx.config.warnStdcallFixup)
289             Warn(ctx) << "Resolving " << origName << " by linking to "
290                       << newName;
291           else
292             Log(ctx) << "Resolving " << origName << " by linking to "
293                      << newName;
294           undef->setWeakAlias(l);
295           continue;
296         }
297       }
298     }
299 
300     if (ctx.config.autoImport) {
301       if (name.starts_with("__imp_"))
302         continue;
303       // If we have an undefined symbol, but we have a lazy symbol we could
304       // load, load it.
305       Symbol *l = find(("__imp_" + name).str());
306       if (!l || l->pendingArchiveLoad || !l->isLazy())
307         continue;
308 
309       Log(ctx) << "Loading lazy " << l->getName() << " from "
310                << l->getFile()->getName() << " for automatic import";
311       forceLazy(l);
312     }
313   }
314 }
315 
impSymbol(StringRef name)316 Defined *SymbolTable::impSymbol(StringRef name) {
317   if (name.starts_with("__imp_"))
318     return nullptr;
319   return dyn_cast_or_null<Defined>(find(("__imp_" + name).str()));
320 }
321 
handleMinGWAutomaticImport(Symbol * sym,StringRef name)322 bool SymbolTable::handleMinGWAutomaticImport(Symbol *sym, StringRef name) {
323   Defined *imp = impSymbol(name);
324   if (!imp)
325     return false;
326 
327   // Replace the reference directly to a variable with a reference
328   // to the import address table instead. This obviously isn't right,
329   // but we mark the symbol as isRuntimePseudoReloc, and a later pass
330   // will add runtime pseudo relocations for every relocation against
331   // this Symbol. The runtime pseudo relocation framework expects the
332   // reference itself to point at the IAT entry.
333   size_t impSize = 0;
334   if (isa<DefinedImportData>(imp)) {
335     Log(ctx) << "Automatically importing " << name << " from "
336              << cast<DefinedImportData>(imp)->getDLLName();
337     impSize = sizeof(DefinedImportData);
338   } else if (isa<DefinedRegular>(imp)) {
339     Log(ctx) << "Automatically importing " << name << " from "
340              << toString(cast<DefinedRegular>(imp)->file);
341     impSize = sizeof(DefinedRegular);
342   } else {
343     Warn(ctx) << "unable to automatically import " << name << " from "
344               << imp->getName() << " from " << cast<DefinedRegular>(imp)->file
345               << "; unexpected symbol type";
346     return false;
347   }
348   sym->replaceKeepingName(imp, impSize);
349   sym->isRuntimePseudoReloc = true;
350 
351   // There may exist symbols named .refptr.<name> which only consist
352   // of a single pointer to <name>. If it turns out <name> is
353   // automatically imported, we don't need to keep the .refptr.<name>
354   // pointer at all, but redirect all accesses to it to the IAT entry
355   // for __imp_<name> instead, and drop the whole .refptr.<name> chunk.
356   DefinedRegular *refptr =
357       dyn_cast_or_null<DefinedRegular>(find((".refptr." + name).str()));
358   if (refptr && refptr->getChunk()->getSize() == ctx.config.wordsize) {
359     SectionChunk *sc = dyn_cast_or_null<SectionChunk>(refptr->getChunk());
360     if (sc && sc->getRelocs().size() == 1 && *sc->symbols().begin() == sym) {
361       Log(ctx) << "Replacing .refptr." << name << " with " << imp->getName();
362       refptr->getChunk()->live = false;
363       refptr->replaceKeepingName(imp, impSize);
364     }
365   }
366   return true;
367 }
368 
369 /// Helper function for reportUnresolvable and resolveRemainingUndefines.
370 /// This function emits an "undefined symbol" diagnostic for each symbol in
371 /// undefs. If localImports is not nullptr, it also emits a "locally
372 /// defined symbol imported" diagnostic for symbols in localImports.
373 /// objFiles and bitcodeFiles (if not nullptr) are used to report where
374 /// undefined symbols are referenced.
reportProblemSymbols(const SmallPtrSetImpl<Symbol * > & undefs,const DenseMap<Symbol *,Symbol * > * localImports,bool needBitcodeFiles)375 void SymbolTable::reportProblemSymbols(
376     const SmallPtrSetImpl<Symbol *> &undefs,
377     const DenseMap<Symbol *, Symbol *> *localImports, bool needBitcodeFiles) {
378   // Return early if there is nothing to report (which should be
379   // the common case).
380   if (undefs.empty() && (!localImports || localImports->empty()))
381     return;
382 
383   for (Symbol *b : ctx.config.gcroot) {
384     if (undefs.count(b))
385       errorOrWarn(ctx) << "<root>: undefined symbol: " << printSymbol(b);
386     if (localImports)
387       if (Symbol *imp = localImports->lookup(b))
388         Warn(ctx) << "<root>: locally defined symbol imported: "
389                   << printSymbol(imp) << " (defined in "
390                   << toString(imp->getFile()) << ") [LNK4217]";
391   }
392 
393   std::vector<UndefinedDiag> undefDiags;
394   DenseMap<Symbol *, int> firstDiag;
395 
396   auto processFile = [&](InputFile *file, ArrayRef<Symbol *> symbols) {
397     uint32_t symIndex = (uint32_t)-1;
398     for (Symbol *sym : symbols) {
399       ++symIndex;
400       if (!sym)
401         continue;
402       if (undefs.count(sym)) {
403         auto [it, inserted] = firstDiag.try_emplace(sym, undefDiags.size());
404         if (inserted)
405           undefDiags.push_back({sym, {{file, symIndex}}});
406         else
407           undefDiags[it->second].files.push_back({file, symIndex});
408       }
409       if (localImports)
410         if (Symbol *imp = localImports->lookup(sym))
411           Warn(ctx) << file
412                     << ": locally defined symbol imported: " << printSymbol(imp)
413                     << " (defined in " << imp->getFile() << ") [LNK4217]";
414     }
415   };
416 
417   for (ObjFile *file : ctx.objFileInstances)
418     processFile(file, file->getSymbols());
419 
420   if (needBitcodeFiles)
421     for (BitcodeFile *file : bitcodeFileInstances)
422       processFile(file, file->getSymbols());
423 
424   for (const UndefinedDiag &undefDiag : undefDiags)
425     reportUndefinedSymbol(undefDiag);
426 }
427 
reportUnresolvable()428 void SymbolTable::reportUnresolvable() {
429   SmallPtrSet<Symbol *, 8> undefs;
430   for (auto &i : symMap) {
431     Symbol *sym = i.second;
432     auto *undef = dyn_cast<Undefined>(sym);
433     if (!undef || sym->deferUndefined)
434       continue;
435     if (undef->getWeakAlias())
436       continue;
437     StringRef name = undef->getName();
438     if (name.starts_with("__imp_")) {
439       Symbol *imp = find(name.substr(strlen("__imp_")));
440       if (Defined *def = dyn_cast_or_null<Defined>(imp)) {
441         def->isUsedInRegularObj = true;
442         continue;
443       }
444     }
445     if (name.contains("_PchSym_"))
446       continue;
447     if (ctx.config.autoImport && impSymbol(name))
448       continue;
449     undefs.insert(sym);
450   }
451 
452   reportProblemSymbols(undefs, /*localImports=*/nullptr, true);
453 }
454 
resolveRemainingUndefines()455 void SymbolTable::resolveRemainingUndefines() {
456   llvm::TimeTraceScope timeScope("Resolve remaining undefined symbols");
457   SmallPtrSet<Symbol *, 8> undefs;
458   DenseMap<Symbol *, Symbol *> localImports;
459 
460   for (auto &i : symMap) {
461     Symbol *sym = i.second;
462     auto *undef = dyn_cast<Undefined>(sym);
463     if (!undef)
464       continue;
465     if (!sym->isUsedInRegularObj)
466       continue;
467 
468     StringRef name = undef->getName();
469 
470     // A weak alias may have been resolved, so check for that.
471     if (undef->resolveWeakAlias())
472       continue;
473 
474     // If we can resolve a symbol by removing __imp_ prefix, do that.
475     // This odd rule is for compatibility with MSVC linker.
476     if (name.starts_with("__imp_")) {
477       auto findLocalSym = [&](StringRef n) {
478         Symbol *sym = find(n);
479         if (auto undef = dyn_cast_or_null<Undefined>(sym)) {
480           // The unprefixed symbol might come later in symMap, so handle it now
481           // if needed.
482           if (!undef->resolveWeakAlias())
483             sym = nullptr;
484         }
485         return sym;
486       };
487 
488       StringRef impName = name.substr(strlen("__imp_"));
489       Symbol *imp = findLocalSym(impName);
490       if (!imp && isEC()) {
491         // Try to use the mangled symbol on ARM64EC.
492         std::optional<std::string> mangledName =
493             getArm64ECMangledFunctionName(impName);
494         if (mangledName)
495           imp = findLocalSym(*mangledName);
496         if (!imp && impName.consume_front("aux_")) {
497           // If it's a __imp_aux_ symbol, try skipping the aux_ prefix.
498           imp = findLocalSym(impName);
499           if (!imp && (mangledName = getArm64ECMangledFunctionName(impName)))
500             imp = findLocalSym(*mangledName);
501         }
502       }
503       if (imp && isa<Defined>(imp)) {
504         auto *d = cast<Defined>(imp);
505         replaceSymbol<DefinedLocalImport>(sym, ctx, name, d);
506         localImportChunks.push_back(cast<DefinedLocalImport>(sym)->getChunk());
507         localImports[sym] = d;
508         continue;
509       }
510     }
511 
512     // We don't want to report missing Microsoft precompiled headers symbols.
513     // A proper message will be emitted instead in PDBLinker::aquirePrecompObj
514     if (name.contains("_PchSym_"))
515       continue;
516 
517     if (ctx.config.autoImport && handleMinGWAutomaticImport(sym, name))
518       continue;
519 
520     // Remaining undefined symbols are not fatal if /force is specified.
521     // They are replaced with dummy defined symbols.
522     if (ctx.config.forceUnresolved)
523       replaceSymbol<DefinedAbsolute>(sym, ctx, name, 0);
524     undefs.insert(sym);
525   }
526 
527   reportProblemSymbols(
528       undefs, ctx.config.warnLocallyDefinedImported ? &localImports : nullptr,
529       false);
530 }
531 
insert(StringRef name)532 std::pair<Symbol *, bool> SymbolTable::insert(StringRef name) {
533   bool inserted = false;
534   Symbol *&sym = symMap[CachedHashStringRef(name)];
535   if (!sym) {
536     sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
537     sym->isUsedInRegularObj = false;
538     sym->pendingArchiveLoad = false;
539     sym->canInline = true;
540     inserted = true;
541 
542     if (isEC() && name.starts_with("EXP+"))
543       expSymbols.push_back(sym);
544   }
545   return {sym, inserted};
546 }
547 
insert(StringRef name,InputFile * file)548 std::pair<Symbol *, bool> SymbolTable::insert(StringRef name, InputFile *file) {
549   std::pair<Symbol *, bool> result = insert(name);
550   if (!file || !isa<BitcodeFile>(file))
551     result.first->isUsedInRegularObj = true;
552   return result;
553 }
554 
initializeLoadConfig()555 void SymbolTable::initializeLoadConfig() {
556   auto sym =
557       dyn_cast_or_null<DefinedRegular>(findUnderscore("_load_config_used"));
558   if (!sym) {
559     if (isEC()) {
560       Warn(ctx) << "EC version of '_load_config_used' is missing";
561       return;
562     }
563     if (ctx.config.machine == ARM64X) {
564       Warn(ctx) << "native version of '_load_config_used' is missing for "
565                    "ARM64X target";
566       return;
567     }
568     if (ctx.config.guardCF != GuardCFLevel::Off)
569       Warn(ctx)
570           << "Control Flow Guard is enabled but '_load_config_used' is missing";
571     if (ctx.config.dependentLoadFlags)
572       Warn(ctx) << "_load_config_used not found, /dependentloadflag will have "
573                    "no effect";
574     return;
575   }
576 
577   SectionChunk *sc = sym->getChunk();
578   if (!sc->hasData) {
579     Err(ctx) << "_load_config_used points to uninitialized data";
580     return;
581   }
582   uint64_t offsetInChunk = sym->getValue();
583   if (offsetInChunk + 4 > sc->getSize()) {
584     Err(ctx) << "_load_config_used section chunk is too small";
585     return;
586   }
587 
588   ArrayRef<uint8_t> secContents = sc->getContents();
589   loadConfigSize =
590       *reinterpret_cast<const ulittle32_t *>(&secContents[offsetInChunk]);
591   if (offsetInChunk + loadConfigSize > sc->getSize()) {
592     Err(ctx) << "_load_config_used specifies a size larger than its containing "
593                 "section chunk";
594     return;
595   }
596 
597   uint32_t expectedAlign = ctx.config.is64() ? 8 : 4;
598   if (sc->getAlignment() < expectedAlign)
599     Warn(ctx) << "'_load_config_used' is misaligned (expected alignment to be "
600               << expectedAlign << " bytes, got " << sc->getAlignment()
601               << " instead)";
602   else if (!isAligned(Align(expectedAlign), offsetInChunk))
603     Warn(ctx) << "'_load_config_used' is misaligned (section offset is 0x"
604               << Twine::utohexstr(sym->getValue()) << " not aligned to "
605               << expectedAlign << " bytes)";
606 
607   loadConfigSym = sym;
608 }
609 
addEntryThunk(Symbol * from,Symbol * to)610 void SymbolTable::addEntryThunk(Symbol *from, Symbol *to) {
611   entryThunks.push_back({from, to});
612 }
613 
addExitThunk(Symbol * from,Symbol * to)614 void SymbolTable::addExitThunk(Symbol *from, Symbol *to) {
615   exitThunks[from] = to;
616 }
617 
initializeECThunks()618 void SymbolTable::initializeECThunks() {
619   if (!isArm64EC(ctx.config.machine))
620     return;
621 
622   for (auto it : entryThunks) {
623     auto *to = dyn_cast<Defined>(it.second);
624     if (!to)
625       continue;
626     auto *from = dyn_cast<DefinedRegular>(it.first);
627     // We need to be able to add padding to the function and fill it with an
628     // offset to its entry thunks. To ensure that padding the function is
629     // feasible, functions are required to be COMDAT symbols with no offset.
630     if (!from || !from->getChunk()->isCOMDAT() ||
631         cast<DefinedRegular>(from)->getValue()) {
632       Err(ctx) << "non COMDAT symbol '" << from->getName() << "' in hybrid map";
633       continue;
634     }
635     from->getChunk()->setEntryThunk(to);
636   }
637 
638   for (ImportFile *file : ctx.importFileInstances) {
639     if (!file->impchkThunk)
640       continue;
641 
642     Symbol *sym = exitThunks.lookup(file->thunkSym);
643     if (!sym)
644       sym = exitThunks.lookup(file->impECSym);
645     file->impchkThunk->exitThunk = dyn_cast_or_null<Defined>(sym);
646   }
647 
648   // On ARM64EC, the __imp_ symbol references the auxiliary IAT, while the
649   // __imp_aux_ symbol references the regular IAT. However, x86_64 code expects
650   // both to reference the regular IAT, so adjust the symbol if necessary.
651   parallelForEach(ctx.objFileInstances, [&](ObjFile *file) {
652     if (file->getMachineType() != AMD64)
653       return;
654     for (auto &sym : file->getMutableSymbols()) {
655       auto impSym = dyn_cast_or_null<DefinedImportData>(sym);
656       if (impSym && impSym->file->impchkThunk && sym == impSym->file->impECSym)
657         sym = impSym->file->impSym;
658     }
659   });
660 }
661 
addUndefined(StringRef name,InputFile * f,bool overrideLazy)662 Symbol *SymbolTable::addUndefined(StringRef name, InputFile *f,
663                                   bool overrideLazy) {
664   auto [s, wasInserted] = insert(name, f);
665   if (wasInserted || (s->isLazy() && overrideLazy)) {
666     replaceSymbol<Undefined>(s, name);
667     return s;
668   }
669   if (s->isLazy())
670     forceLazy(s);
671   return s;
672 }
673 
addGCRoot(StringRef name,bool aliasEC)674 Symbol *SymbolTable::addGCRoot(StringRef name, bool aliasEC) {
675   Symbol *b = addUndefined(name);
676   if (!b->isGCRoot) {
677     b->isGCRoot = true;
678     ctx.config.gcroot.push_back(b);
679   }
680 
681   // On ARM64EC, a symbol may be defined in either its mangled or demangled form
682   // (or both). Define an anti-dependency symbol that binds both forms, similar
683   // to how compiler-generated code references external functions.
684   if (aliasEC && isEC()) {
685     if (std::optional<std::string> mangledName =
686             getArm64ECMangledFunctionName(name)) {
687       auto u = dyn_cast<Undefined>(b);
688       if (u && !u->weakAlias) {
689         Symbol *t = addUndefined(saver().save(*mangledName));
690         u->setWeakAlias(t, true);
691       }
692     } else if (std::optional<std::string> demangledName =
693                    getArm64ECDemangledFunctionName(name)) {
694       Symbol *us = addUndefined(saver().save(*demangledName));
695       auto u = dyn_cast<Undefined>(us);
696       if (u && !u->weakAlias)
697         u->setWeakAlias(b, true);
698     }
699   }
700   return b;
701 }
702 
703 // On ARM64EC, a function symbol may appear in both mangled and demangled forms:
704 // - ARM64EC archives contain only the mangled name, while the demangled symbol
705 //   is defined by the object file as an alias.
706 // - x86_64 archives contain only the demangled name (the mangled name is
707 //   usually defined by an object referencing the symbol as an alias to a guess
708 //   exit thunk).
709 // - ARM64EC import files contain both the mangled and demangled names for
710 //   thunks.
711 // If more than one archive defines the same function, this could lead
712 // to different libraries being used for the same function depending on how they
713 // are referenced. Avoid this by checking if the paired symbol is already
714 // defined before adding a symbol to the table.
715 template <typename T>
checkLazyECPair(SymbolTable * symtab,StringRef name,InputFile * f)716 bool checkLazyECPair(SymbolTable *symtab, StringRef name, InputFile *f) {
717   if (name.starts_with("__imp_"))
718     return true;
719   std::string pairName;
720   if (std::optional<std::string> mangledName =
721           getArm64ECMangledFunctionName(name))
722     pairName = std::move(*mangledName);
723   else if (std::optional<std::string> demangledName =
724                getArm64ECDemangledFunctionName(name))
725     pairName = std::move(*demangledName);
726   else
727     return true;
728 
729   Symbol *sym = symtab->find(pairName);
730   if (!sym)
731     return true;
732   if (sym->pendingArchiveLoad)
733     return false;
734   if (auto u = dyn_cast<Undefined>(sym))
735     return !u->weakAlias || u->isAntiDep;
736   // If the symbol is lazy, allow it only if it originates from the same
737   // archive.
738   auto lazy = dyn_cast<T>(sym);
739   return lazy && lazy->file == f;
740 }
741 
addLazyArchive(ArchiveFile * f,const Archive::Symbol & sym)742 void SymbolTable::addLazyArchive(ArchiveFile *f, const Archive::Symbol &sym) {
743   StringRef name = sym.getName();
744   if (isEC() && !checkLazyECPair<LazyArchive>(this, name, f))
745     return;
746   auto [s, wasInserted] = insert(name);
747   if (wasInserted) {
748     replaceSymbol<LazyArchive>(s, f, sym);
749     return;
750   }
751   auto *u = dyn_cast<Undefined>(s);
752   if (!u || (u->weakAlias && !u->isECAlias(machine)) || s->pendingArchiveLoad)
753     return;
754   s->pendingArchiveLoad = true;
755   f->addMember(sym);
756 }
757 
addLazyObject(InputFile * f,StringRef n)758 void SymbolTable::addLazyObject(InputFile *f, StringRef n) {
759   assert(f->lazy);
760   if (isEC() && !checkLazyECPair<LazyObject>(this, n, f))
761     return;
762   auto [s, wasInserted] = insert(n, f);
763   if (wasInserted) {
764     replaceSymbol<LazyObject>(s, f, n);
765     return;
766   }
767   auto *u = dyn_cast<Undefined>(s);
768   if (!u || (u->weakAlias && !u->isECAlias(machine)) || s->pendingArchiveLoad)
769     return;
770   s->pendingArchiveLoad = true;
771   f->lazy = false;
772   ctx.driver.addFile(f);
773 }
774 
addLazyDLLSymbol(DLLFile * f,DLLFile::Symbol * sym,StringRef n)775 void SymbolTable::addLazyDLLSymbol(DLLFile *f, DLLFile::Symbol *sym,
776                                    StringRef n) {
777   auto [s, wasInserted] = insert(n);
778   if (wasInserted) {
779     replaceSymbol<LazyDLLSymbol>(s, f, sym, n);
780     return;
781   }
782   auto *u = dyn_cast<Undefined>(s);
783   if (!u || (u->weakAlias && !u->isECAlias(machine)) || s->pendingArchiveLoad)
784     return;
785   s->pendingArchiveLoad = true;
786   f->makeImport(sym);
787 }
788 
getSourceLocationBitcode(BitcodeFile * file)789 static std::string getSourceLocationBitcode(BitcodeFile *file) {
790   std::string res("\n>>> defined at ");
791   StringRef source = file->obj->getSourceFileName();
792   if (!source.empty())
793     res += source.str() + "\n>>>            ";
794   res += toString(file);
795   return res;
796 }
797 
getSourceLocationObj(ObjFile * file,SectionChunk * sc,uint32_t offset,StringRef name)798 static std::string getSourceLocationObj(ObjFile *file, SectionChunk *sc,
799                                         uint32_t offset, StringRef name) {
800   std::optional<std::pair<StringRef, uint32_t>> fileLine;
801   if (sc)
802     fileLine = getFileLine(sc, offset);
803   if (!fileLine)
804     fileLine = file->getVariableLocation(name);
805 
806   std::string res;
807   llvm::raw_string_ostream os(res);
808   os << "\n>>> defined at ";
809   if (fileLine)
810     os << fileLine->first << ":" << fileLine->second << "\n>>>            ";
811   os << toString(file);
812   return res;
813 }
814 
getSourceLocation(InputFile * file,SectionChunk * sc,uint32_t offset,StringRef name)815 static std::string getSourceLocation(InputFile *file, SectionChunk *sc,
816                                      uint32_t offset, StringRef name) {
817   if (!file)
818     return "";
819   if (auto *o = dyn_cast<ObjFile>(file))
820     return getSourceLocationObj(o, sc, offset, name);
821   if (auto *b = dyn_cast<BitcodeFile>(file))
822     return getSourceLocationBitcode(b);
823   return "\n>>> defined at " + toString(file);
824 }
825 
826 // Construct and print an error message in the form of:
827 //
828 //   lld-link: error: duplicate symbol: foo
829 //   >>> defined at bar.c:30
830 //   >>>            bar.o
831 //   >>> defined at baz.c:563
832 //   >>>            baz.o
reportDuplicate(Symbol * existing,InputFile * newFile,SectionChunk * newSc,uint32_t newSectionOffset)833 void SymbolTable::reportDuplicate(Symbol *existing, InputFile *newFile,
834                                   SectionChunk *newSc,
835                                   uint32_t newSectionOffset) {
836   COFFSyncStream diag(ctx, ctx.config.forceMultiple ? DiagLevel::Warn
837                                                     : DiagLevel::Err);
838   diag << "duplicate symbol: " << printSymbol(existing);
839 
840   DefinedRegular *d = dyn_cast<DefinedRegular>(existing);
841   if (d && isa<ObjFile>(d->getFile())) {
842     diag << getSourceLocation(d->getFile(), d->getChunk(), d->getValue(),
843                               existing->getName());
844   } else {
845     diag << getSourceLocation(existing->getFile(), nullptr, 0, "");
846   }
847   diag << getSourceLocation(newFile, newSc, newSectionOffset,
848                             existing->getName());
849 }
850 
addAbsolute(StringRef n,COFFSymbolRef sym)851 Symbol *SymbolTable::addAbsolute(StringRef n, COFFSymbolRef sym) {
852   auto [s, wasInserted] = insert(n, nullptr);
853   s->isUsedInRegularObj = true;
854   if (wasInserted || isa<Undefined>(s) || s->isLazy())
855     replaceSymbol<DefinedAbsolute>(s, ctx, n, sym);
856   else if (auto *da = dyn_cast<DefinedAbsolute>(s)) {
857     if (da->getVA() != sym.getValue())
858       reportDuplicate(s, nullptr);
859   } else if (!isa<DefinedCOFF>(s))
860     reportDuplicate(s, nullptr);
861   return s;
862 }
863 
addAbsolute(StringRef n,uint64_t va)864 Symbol *SymbolTable::addAbsolute(StringRef n, uint64_t va) {
865   auto [s, wasInserted] = insert(n, nullptr);
866   s->isUsedInRegularObj = true;
867   if (wasInserted || isa<Undefined>(s) || s->isLazy())
868     replaceSymbol<DefinedAbsolute>(s, ctx, n, va);
869   else if (auto *da = dyn_cast<DefinedAbsolute>(s)) {
870     if (da->getVA() != va)
871       reportDuplicate(s, nullptr);
872   } else if (!isa<DefinedCOFF>(s))
873     reportDuplicate(s, nullptr);
874   return s;
875 }
876 
addSynthetic(StringRef n,Chunk * c)877 Symbol *SymbolTable::addSynthetic(StringRef n, Chunk *c) {
878   auto [s, wasInserted] = insert(n, nullptr);
879   s->isUsedInRegularObj = true;
880   if (wasInserted || isa<Undefined>(s) || s->isLazy())
881     replaceSymbol<DefinedSynthetic>(s, n, c);
882   else if (!isa<DefinedCOFF>(s))
883     reportDuplicate(s, nullptr);
884   return s;
885 }
886 
addRegular(InputFile * f,StringRef n,const coff_symbol_generic * sym,SectionChunk * c,uint32_t sectionOffset,bool isWeak)887 Symbol *SymbolTable::addRegular(InputFile *f, StringRef n,
888                                 const coff_symbol_generic *sym, SectionChunk *c,
889                                 uint32_t sectionOffset, bool isWeak) {
890   auto [s, wasInserted] = insert(n, f);
891   if (wasInserted || !isa<DefinedRegular>(s) || s->isWeak)
892     replaceSymbol<DefinedRegular>(s, f, n, /*IsCOMDAT*/ false,
893                                   /*IsExternal*/ true, sym, c, isWeak);
894   else if (!isWeak)
895     reportDuplicate(s, f, c, sectionOffset);
896   return s;
897 }
898 
899 std::pair<DefinedRegular *, bool>
addComdat(InputFile * f,StringRef n,const coff_symbol_generic * sym)900 SymbolTable::addComdat(InputFile *f, StringRef n,
901                        const coff_symbol_generic *sym) {
902   auto [s, wasInserted] = insert(n, f);
903   if (wasInserted || !isa<DefinedRegular>(s)) {
904     replaceSymbol<DefinedRegular>(s, f, n, /*IsCOMDAT*/ true,
905                                   /*IsExternal*/ true, sym, nullptr);
906     return {cast<DefinedRegular>(s), true};
907   }
908   auto *existingSymbol = cast<DefinedRegular>(s);
909   if (!existingSymbol->isCOMDAT)
910     reportDuplicate(s, f);
911   return {existingSymbol, false};
912 }
913 
addCommon(InputFile * f,StringRef n,uint64_t size,const coff_symbol_generic * sym,CommonChunk * c)914 Symbol *SymbolTable::addCommon(InputFile *f, StringRef n, uint64_t size,
915                                const coff_symbol_generic *sym, CommonChunk *c) {
916   auto [s, wasInserted] = insert(n, f);
917   if (wasInserted || !isa<DefinedCOFF>(s))
918     replaceSymbol<DefinedCommon>(s, f, n, size, sym, c);
919   else if (auto *dc = dyn_cast<DefinedCommon>(s))
920     if (size > dc->getSize())
921       replaceSymbol<DefinedCommon>(s, f, n, size, sym, c);
922   return s;
923 }
924 
addImportData(StringRef n,ImportFile * f,Chunk * & location)925 DefinedImportData *SymbolTable::addImportData(StringRef n, ImportFile *f,
926                                               Chunk *&location) {
927   auto [s, wasInserted] = insert(n, nullptr);
928   s->isUsedInRegularObj = true;
929   if (wasInserted || isa<Undefined>(s) || s->isLazy()) {
930     replaceSymbol<DefinedImportData>(s, n, f, location);
931     return cast<DefinedImportData>(s);
932   }
933 
934   reportDuplicate(s, f);
935   return nullptr;
936 }
937 
addImportThunk(StringRef name,DefinedImportData * id,ImportThunkChunk * chunk)938 Defined *SymbolTable::addImportThunk(StringRef name, DefinedImportData *id,
939                                      ImportThunkChunk *chunk) {
940   auto [s, wasInserted] = insert(name, nullptr);
941   s->isUsedInRegularObj = true;
942   if (wasInserted || isa<Undefined>(s) || s->isLazy()) {
943     replaceSymbol<DefinedImportThunk>(s, ctx, name, id, chunk);
944     return cast<Defined>(s);
945   }
946 
947   reportDuplicate(s, id->file);
948   return nullptr;
949 }
950 
addLibcall(StringRef name)951 void SymbolTable::addLibcall(StringRef name) {
952   Symbol *sym = findUnderscore(name);
953   if (!sym)
954     return;
955 
956   if (auto *l = dyn_cast<LazyArchive>(sym)) {
957     MemoryBufferRef mb = l->getMemberBuffer();
958     if (isBitcode(mb))
959       addUndefined(sym->getName());
960   } else if (LazyObject *o = dyn_cast<LazyObject>(sym)) {
961     if (isBitcode(o->file->mb))
962       addUndefined(sym->getName());
963   }
964 }
965 
find(StringRef name) const966 Symbol *SymbolTable::find(StringRef name) const {
967   return symMap.lookup(CachedHashStringRef(name));
968 }
969 
findUnderscore(StringRef name) const970 Symbol *SymbolTable::findUnderscore(StringRef name) const {
971   if (machine == I386)
972     return find(("_" + name).str());
973   return find(name);
974 }
975 
976 // Return all symbols that start with Prefix, possibly ignoring the first
977 // character of Prefix or the first character symbol.
getSymsWithPrefix(StringRef prefix)978 std::vector<Symbol *> SymbolTable::getSymsWithPrefix(StringRef prefix) {
979   std::vector<Symbol *> syms;
980   for (auto pair : symMap) {
981     StringRef name = pair.first.val();
982     if (name.starts_with(prefix) || name.starts_with(prefix.drop_front()) ||
983         name.drop_front().starts_with(prefix) ||
984         name.drop_front().starts_with(prefix.drop_front())) {
985       syms.push_back(pair.second);
986     }
987   }
988   return syms;
989 }
990 
findMangle(StringRef name)991 Symbol *SymbolTable::findMangle(StringRef name) {
992   if (Symbol *sym = find(name)) {
993     if (auto *u = dyn_cast<Undefined>(sym)) {
994       // We're specifically looking for weak aliases that ultimately resolve to
995       // defined symbols, hence the call to getWeakAlias() instead of just using
996       // the weakAlias member variable. This matches link.exe's behavior.
997       if (Symbol *weakAlias = u->getWeakAlias())
998         return weakAlias;
999     } else {
1000       return sym;
1001     }
1002   }
1003 
1004   // Efficient fuzzy string lookup is impossible with a hash table, so iterate
1005   // the symbol table once and collect all possibly matching symbols into this
1006   // vector. Then compare each possibly matching symbol with each possible
1007   // mangling.
1008   std::vector<Symbol *> syms = getSymsWithPrefix(name);
1009   auto findByPrefix = [&syms](const Twine &t) -> Symbol * {
1010     std::string prefix = t.str();
1011     for (auto *s : syms)
1012       if (s->getName().starts_with(prefix))
1013         return s;
1014     return nullptr;
1015   };
1016 
1017   // For non-x86, just look for C++ functions.
1018   if (machine != I386)
1019     return findByPrefix("?" + name + "@@Y");
1020 
1021   if (!name.starts_with("_"))
1022     return nullptr;
1023   // Search for x86 stdcall function.
1024   if (Symbol *s = findByPrefix(name + "@"))
1025     return s;
1026   // Search for x86 fastcall function.
1027   if (Symbol *s = findByPrefix("@" + name.substr(1) + "@"))
1028     return s;
1029   // Search for x86 vectorcall function.
1030   if (Symbol *s = findByPrefix(name.substr(1) + "@@"))
1031     return s;
1032   // Search for x86 C++ non-member function.
1033   return findByPrefix("?" + name.substr(1) + "@@Y");
1034 }
1035 
findUnderscoreMangle(StringRef sym)1036 bool SymbolTable::findUnderscoreMangle(StringRef sym) {
1037   Symbol *s = findMangle(mangle(sym));
1038   return s && !isa<Undefined>(s);
1039 }
1040 
1041 // Symbol names are mangled by prepending "_" on x86.
mangle(StringRef sym)1042 StringRef SymbolTable::mangle(StringRef sym) {
1043   assert(machine != IMAGE_FILE_MACHINE_UNKNOWN);
1044   if (machine == I386)
1045     return saver().save("_" + sym);
1046   return sym;
1047 }
1048 
mangleMaybe(Symbol * s)1049 StringRef SymbolTable::mangleMaybe(Symbol *s) {
1050   // If the plain symbol name has already been resolved, do nothing.
1051   Undefined *unmangled = dyn_cast<Undefined>(s);
1052   if (!unmangled)
1053     return "";
1054 
1055   // Otherwise, see if a similar, mangled symbol exists in the symbol table.
1056   Symbol *mangled = findMangle(unmangled->getName());
1057   if (!mangled)
1058     return "";
1059 
1060   // If we find a similar mangled symbol, make this an alias to it and return
1061   // its name.
1062   Log(ctx) << unmangled->getName() << " aliased to " << mangled->getName();
1063   unmangled->setWeakAlias(addUndefined(mangled->getName()));
1064   return mangled->getName();
1065 }
1066 
1067 // Windows specific -- find default entry point name.
1068 //
1069 // There are four different entry point functions for Windows executables,
1070 // each of which corresponds to a user-defined "main" function. This function
1071 // infers an entry point from a user-defined "main" function.
findDefaultEntry()1072 StringRef SymbolTable::findDefaultEntry() {
1073   assert(ctx.config.subsystem != IMAGE_SUBSYSTEM_UNKNOWN &&
1074          "must handle /subsystem before calling this");
1075 
1076   if (ctx.config.mingw)
1077     return mangle(ctx.config.subsystem == IMAGE_SUBSYSTEM_WINDOWS_GUI
1078                       ? "WinMainCRTStartup"
1079                       : "mainCRTStartup");
1080 
1081   if (ctx.config.subsystem == IMAGE_SUBSYSTEM_WINDOWS_GUI) {
1082     if (findUnderscoreMangle("wWinMain")) {
1083       if (!findUnderscoreMangle("WinMain"))
1084         return mangle("wWinMainCRTStartup");
1085       Warn(ctx) << "found both wWinMain and WinMain; using latter";
1086     }
1087     return mangle("WinMainCRTStartup");
1088   }
1089   if (findUnderscoreMangle("wmain")) {
1090     if (!findUnderscoreMangle("main"))
1091       return mangle("wmainCRTStartup");
1092     Warn(ctx) << "found both wmain and main; using latter";
1093   }
1094   return mangle("mainCRTStartup");
1095 }
1096 
inferSubsystem()1097 WindowsSubsystem SymbolTable::inferSubsystem() {
1098   if (ctx.config.dll)
1099     return IMAGE_SUBSYSTEM_WINDOWS_GUI;
1100   if (ctx.config.mingw)
1101     return IMAGE_SUBSYSTEM_WINDOWS_CUI;
1102   // Note that link.exe infers the subsystem from the presence of these
1103   // functions even if /entry: or /nodefaultlib are passed which causes them
1104   // to not be called.
1105   bool haveMain = findUnderscoreMangle("main");
1106   bool haveWMain = findUnderscoreMangle("wmain");
1107   bool haveWinMain = findUnderscoreMangle("WinMain");
1108   bool haveWWinMain = findUnderscoreMangle("wWinMain");
1109   if (haveMain || haveWMain) {
1110     if (haveWinMain || haveWWinMain) {
1111       Warn(ctx) << "found " << (haveMain ? "main" : "wmain") << " and "
1112                 << (haveWinMain ? "WinMain" : "wWinMain")
1113                 << "; defaulting to /subsystem:console";
1114     }
1115     return IMAGE_SUBSYSTEM_WINDOWS_CUI;
1116   }
1117   if (haveWinMain || haveWWinMain)
1118     return IMAGE_SUBSYSTEM_WINDOWS_GUI;
1119   return IMAGE_SUBSYSTEM_UNKNOWN;
1120 }
1121 
addUndefinedGlob(StringRef arg)1122 void SymbolTable::addUndefinedGlob(StringRef arg) {
1123   Expected<GlobPattern> pat = GlobPattern::create(arg);
1124   if (!pat) {
1125     Err(ctx) << "/includeglob: " << toString(pat.takeError());
1126     return;
1127   }
1128 
1129   SmallVector<Symbol *, 0> syms;
1130   forEachSymbol([&syms, &pat](Symbol *sym) {
1131     if (pat->match(sym->getName())) {
1132       syms.push_back(sym);
1133     }
1134   });
1135 
1136   for (Symbol *sym : syms)
1137     addGCRoot(sym->getName());
1138 }
1139 
1140 // Convert stdcall/fastcall style symbols into unsuffixed symbols,
1141 // with or without a leading underscore. (MinGW specific.)
killAt(StringRef sym,bool prefix)1142 static StringRef killAt(StringRef sym, bool prefix) {
1143   if (sym.empty())
1144     return sym;
1145   // Strip any trailing stdcall suffix
1146   sym = sym.substr(0, sym.find('@', 1));
1147   if (!sym.starts_with("@")) {
1148     if (prefix && !sym.starts_with("_"))
1149       return saver().save("_" + sym);
1150     return sym;
1151   }
1152   // For fastcall, remove the leading @ and replace it with an
1153   // underscore, if prefixes are used.
1154   sym = sym.substr(1);
1155   if (prefix)
1156     sym = saver().save("_" + sym);
1157   return sym;
1158 }
1159 
exportSourceName(ExportSource s)1160 static StringRef exportSourceName(ExportSource s) {
1161   switch (s) {
1162   case ExportSource::Directives:
1163     return "source file (directives)";
1164   case ExportSource::Export:
1165     return "/export";
1166   case ExportSource::ModuleDefinition:
1167     return "/def";
1168   default:
1169     llvm_unreachable("unknown ExportSource");
1170   }
1171 }
1172 
1173 // Performs error checking on all /export arguments.
1174 // It also sets ordinals.
fixupExports()1175 void SymbolTable::fixupExports() {
1176   llvm::TimeTraceScope timeScope("Fixup exports");
1177   // Symbol ordinals must be unique.
1178   std::set<uint16_t> ords;
1179   for (Export &e : exports) {
1180     if (e.ordinal == 0)
1181       continue;
1182     if (!ords.insert(e.ordinal).second)
1183       Fatal(ctx) << "duplicate export ordinal: " << e.name;
1184   }
1185 
1186   for (Export &e : exports) {
1187     if (!e.exportAs.empty()) {
1188       e.exportName = e.exportAs;
1189       continue;
1190     }
1191 
1192     StringRef sym =
1193         !e.forwardTo.empty() || e.extName.empty() ? e.name : e.extName;
1194     if (machine == I386 && sym.starts_with("_")) {
1195       // In MSVC mode, a fully decorated stdcall function is exported
1196       // as-is with the leading underscore (with type IMPORT_NAME).
1197       // In MinGW mode, a decorated stdcall function gets the underscore
1198       // removed, just like normal cdecl functions.
1199       if (ctx.config.mingw || !sym.contains('@')) {
1200         e.exportName = sym.substr(1);
1201         continue;
1202       }
1203     }
1204     if (isEC() && !e.data && !e.constant) {
1205       if (std::optional<std::string> demangledName =
1206               getArm64ECDemangledFunctionName(sym)) {
1207         e.exportName = saver().save(*demangledName);
1208         continue;
1209       }
1210     }
1211     e.exportName = sym;
1212   }
1213 
1214   if (ctx.config.killAt && machine == I386) {
1215     for (Export &e : exports) {
1216       e.name = killAt(e.name, true);
1217       e.exportName = killAt(e.exportName, false);
1218       e.extName = killAt(e.extName, true);
1219       e.symbolName = killAt(e.symbolName, true);
1220     }
1221   }
1222 
1223   // Uniquefy by name.
1224   DenseMap<StringRef, std::pair<Export *, unsigned>> map(exports.size());
1225   std::vector<Export> v;
1226   for (Export &e : exports) {
1227     auto pair = map.insert(std::make_pair(e.exportName, std::make_pair(&e, 0)));
1228     bool inserted = pair.second;
1229     if (inserted) {
1230       pair.first->second.second = v.size();
1231       v.push_back(e);
1232       continue;
1233     }
1234     Export *existing = pair.first->second.first;
1235     if (e == *existing || e.name != existing->name)
1236       continue;
1237     // If the existing export comes from .OBJ directives, we are allowed to
1238     // overwrite it with /DEF: or /EXPORT without any warning, as MSVC link.exe
1239     // does.
1240     if (existing->source == ExportSource::Directives) {
1241       *existing = e;
1242       v[pair.first->second.second] = e;
1243       continue;
1244     }
1245     if (existing->source == e.source) {
1246       Warn(ctx) << "duplicate " << exportSourceName(existing->source)
1247                 << " option: " << e.name;
1248     } else {
1249       Warn(ctx) << "duplicate export: " << e.name << " first seen in "
1250                 << exportSourceName(existing->source) << ", now in "
1251                 << exportSourceName(e.source);
1252     }
1253   }
1254   exports = std::move(v);
1255 
1256   // Sort by name.
1257   llvm::sort(exports, [](const Export &a, const Export &b) {
1258     return a.exportName < b.exportName;
1259   });
1260 }
1261 
assignExportOrdinals()1262 void SymbolTable::assignExportOrdinals() {
1263   // Assign unique ordinals if default (= 0).
1264   uint32_t max = 0;
1265   for (Export &e : exports)
1266     max = std::max(max, (uint32_t)e.ordinal);
1267   for (Export &e : exports)
1268     if (e.ordinal == 0)
1269       e.ordinal = ++max;
1270   if (max > std::numeric_limits<uint16_t>::max())
1271     Fatal(ctx) << "too many exported symbols (got " << max << ", max "
1272                << Twine(std::numeric_limits<uint16_t>::max()) << ")";
1273 }
1274 
parseModuleDefs(StringRef path)1275 void SymbolTable::parseModuleDefs(StringRef path) {
1276   llvm::TimeTraceScope timeScope("Parse def file");
1277   std::unique_ptr<MemoryBuffer> mb =
1278       CHECK(MemoryBuffer::getFile(path, /*IsText=*/false,
1279                                   /*RequiresNullTerminator=*/false,
1280                                   /*IsVolatile=*/true),
1281             "could not open " + path);
1282   COFFModuleDefinition m = check(parseCOFFModuleDefinition(
1283       mb->getMemBufferRef(), machine, ctx.config.mingw));
1284 
1285   // Include in /reproduce: output if applicable.
1286   ctx.driver.takeBuffer(std::move(mb));
1287 
1288   if (ctx.config.outputFile.empty())
1289     ctx.config.outputFile = std::string(saver().save(m.OutputFile));
1290   ctx.config.importName = std::string(saver().save(m.ImportName));
1291   if (m.ImageBase)
1292     ctx.config.imageBase = m.ImageBase;
1293   if (m.StackReserve)
1294     ctx.config.stackReserve = m.StackReserve;
1295   if (m.StackCommit)
1296     ctx.config.stackCommit = m.StackCommit;
1297   if (m.HeapReserve)
1298     ctx.config.heapReserve = m.HeapReserve;
1299   if (m.HeapCommit)
1300     ctx.config.heapCommit = m.HeapCommit;
1301   if (m.MajorImageVersion)
1302     ctx.config.majorImageVersion = m.MajorImageVersion;
1303   if (m.MinorImageVersion)
1304     ctx.config.minorImageVersion = m.MinorImageVersion;
1305   if (m.MajorOSVersion)
1306     ctx.config.majorOSVersion = m.MajorOSVersion;
1307   if (m.MinorOSVersion)
1308     ctx.config.minorOSVersion = m.MinorOSVersion;
1309 
1310   for (COFFShortExport e1 : m.Exports) {
1311     Export e2;
1312     // Renamed exports are parsed and set as "ExtName = Name". If Name has
1313     // the form "OtherDll.Func", it shouldn't be a normal exported
1314     // function but a forward to another DLL instead. This is supported
1315     // by both MS and GNU linkers.
1316     if (!e1.ExtName.empty() && e1.ExtName != e1.Name &&
1317         StringRef(e1.Name).contains('.')) {
1318       e2.name = saver().save(e1.ExtName);
1319       e2.forwardTo = saver().save(e1.Name);
1320     } else {
1321       e2.name = saver().save(e1.Name);
1322       e2.extName = saver().save(e1.ExtName);
1323     }
1324     e2.exportAs = saver().save(e1.ExportAs);
1325     e2.importName = saver().save(e1.ImportName);
1326     e2.ordinal = e1.Ordinal;
1327     e2.noname = e1.Noname;
1328     e2.data = e1.Data;
1329     e2.isPrivate = e1.Private;
1330     e2.constant = e1.Constant;
1331     e2.source = ExportSource::ModuleDefinition;
1332     exports.push_back(e2);
1333   }
1334 }
1335 
1336 // Parse a string of the form of "<from>=<to>".
parseAlternateName(StringRef s)1337 void SymbolTable::parseAlternateName(StringRef s) {
1338   auto [from, to] = s.split('=');
1339   if (from.empty() || to.empty())
1340     Fatal(ctx) << "/alternatename: invalid argument: " << s;
1341   auto it = alternateNames.find(from);
1342   if (it != alternateNames.end() && it->second != to)
1343     Fatal(ctx) << "/alternatename: conflicts: " << s;
1344   alternateNames.insert(it, std::make_pair(from, to));
1345 }
1346 
resolveAlternateNames()1347 void SymbolTable::resolveAlternateNames() {
1348   // Add weak aliases. Weak aliases is a mechanism to give remaining
1349   // undefined symbols final chance to be resolved successfully.
1350   for (auto pair : alternateNames) {
1351     StringRef from = pair.first;
1352     StringRef to = pair.second;
1353     Symbol *sym = find(from);
1354     if (!sym)
1355       continue;
1356     if (auto *u = dyn_cast<Undefined>(sym)) {
1357       if (u->weakAlias) {
1358         // On ARM64EC, anti-dependency aliases are treated as undefined
1359         // symbols unless a demangled symbol aliases a defined one, which
1360         // is part of the implementation.
1361         if (!isEC() || !u->isAntiDep)
1362           continue;
1363         if (!isa<Undefined>(u->weakAlias) &&
1364             !isArm64ECMangledFunctionName(u->getName()))
1365           continue;
1366       }
1367 
1368       // Check if the destination symbol is defined. If not, skip it.
1369       // It may still be resolved later if more input files are added.
1370       // Also skip anti-dependency targets, as they can't be chained anyway.
1371       Symbol *toSym = find(to);
1372       if (!toSym)
1373         continue;
1374       auto toUndef = dyn_cast<Undefined>(toSym);
1375       if (toUndef && (!toUndef->weakAlias || toUndef->isAntiDep))
1376         continue;
1377       toSym->isUsedInRegularObj = true;
1378       if (toSym->isLazy())
1379         forceLazy(toSym);
1380       u->setWeakAlias(toSym);
1381     }
1382   }
1383 }
1384 
1385 // Parses /aligncomm option argument.
parseAligncomm(StringRef s)1386 void SymbolTable::parseAligncomm(StringRef s) {
1387   auto [name, align] = s.split(',');
1388   if (name.empty() || align.empty()) {
1389     Err(ctx) << "/aligncomm: invalid argument: " << s;
1390     return;
1391   }
1392   int v;
1393   if (align.getAsInteger(0, v)) {
1394     Err(ctx) << "/aligncomm: invalid argument: " << s;
1395     return;
1396   }
1397   alignComm[std::string(name)] = std::max(alignComm[std::string(name)], 1 << v);
1398 }
1399 
addUndefined(StringRef name)1400 Symbol *SymbolTable::addUndefined(StringRef name) {
1401   return addUndefined(name, nullptr, false);
1402 }
1403 
printSymbol(Symbol * sym) const1404 std::string SymbolTable::printSymbol(Symbol *sym) const {
1405   std::string name = maybeDemangleSymbol(ctx, sym->getName());
1406   if (ctx.hybridSymtab)
1407     return name + (isEC() ? " (EC symbol)" : " (native symbol)");
1408   return name;
1409 }
1410 
compileBitcodeFiles()1411 void SymbolTable::compileBitcodeFiles() {
1412   if (bitcodeFileInstances.empty())
1413     return;
1414 
1415   llvm::TimeTraceScope timeScope("Compile bitcode");
1416   ScopedTimer t(ctx.ltoTimer);
1417   lto.reset(new BitcodeCompiler(ctx));
1418   for (BitcodeFile *f : bitcodeFileInstances)
1419     lto->add(*f);
1420   for (InputFile *newObj : lto->compile()) {
1421     ObjFile *obj = cast<ObjFile>(newObj);
1422     obj->parse();
1423     ctx.objFileInstances.push_back(obj);
1424   }
1425 }
1426 
1427 } // namespace lld::coff
1428