xref: /freebsd/contrib/llvm-project/lld/COFF/SymbolTable.cpp (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 //===- SymbolTable.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SymbolTable.h"
10 #include "COFFLinkerContext.h"
11 #include "Config.h"
12 #include "Driver.h"
13 #include "LTO.h"
14 #include "PDB.h"
15 #include "Symbols.h"
16 #include "lld/Common/ErrorHandler.h"
17 #include "lld/Common/Memory.h"
18 #include "lld/Common/Timer.h"
19 #include "llvm/DebugInfo/DIContext.h"
20 #include "llvm/IR/LLVMContext.h"
21 #include "llvm/LTO/LTO.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/TimeProfiler.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include <utility>
26 
27 using namespace llvm;
28 
29 namespace lld::coff {
30 
31 StringRef ltrim1(StringRef s, const char *chars) {
32   if (!s.empty() && strchr(chars, s[0]))
33     return s.substr(1);
34   return s;
35 }
36 
37 static bool compatibleMachineType(COFFLinkerContext &ctx, MachineTypes mt) {
38   if (mt == IMAGE_FILE_MACHINE_UNKNOWN)
39     return true;
40   switch (ctx.config.machine) {
41   case ARM64:
42     return mt == ARM64 || mt == ARM64X;
43   case ARM64EC:
44     return COFF::isArm64EC(mt) || mt == AMD64;
45   case ARM64X:
46     return COFF::isAnyArm64(mt) || mt == AMD64;
47   default:
48     return ctx.config.machine == mt;
49   }
50 }
51 
52 void SymbolTable::addFile(InputFile *file) {
53   log("Reading " + toString(file));
54   if (file->lazy) {
55     if (auto *f = dyn_cast<BitcodeFile>(file))
56       f->parseLazy();
57     else
58       cast<ObjFile>(file)->parseLazy();
59   } else {
60     file->parse();
61     if (auto *f = dyn_cast<ObjFile>(file)) {
62       ctx.objFileInstances.push_back(f);
63     } else if (auto *f = dyn_cast<BitcodeFile>(file)) {
64       if (ltoCompilationDone) {
65         error("LTO object file " + toString(file) + " linked in after "
66               "doing LTO compilation.");
67       }
68       ctx.bitcodeFileInstances.push_back(f);
69     } else if (auto *f = dyn_cast<ImportFile>(file)) {
70       ctx.importFileInstances.push_back(f);
71     }
72   }
73 
74   MachineTypes mt = file->getMachineType();
75   if (ctx.config.machine == IMAGE_FILE_MACHINE_UNKNOWN) {
76     ctx.config.machine = mt;
77     ctx.driver.addWinSysRootLibSearchPaths();
78   } else if (!compatibleMachineType(ctx, mt)) {
79     error(toString(file) + ": machine type " + machineToStr(mt) +
80           " conflicts with " + machineToStr(ctx.config.machine));
81     return;
82   }
83 
84   ctx.driver.parseDirectives(file);
85 }
86 
87 static void errorOrWarn(const Twine &s, bool forceUnresolved) {
88   if (forceUnresolved)
89     warn(s);
90   else
91     error(s);
92 }
93 
94 // Causes the file associated with a lazy symbol to be linked in.
95 static void forceLazy(Symbol *s) {
96   s->pendingArchiveLoad = true;
97   switch (s->kind()) {
98   case Symbol::Kind::LazyArchiveKind: {
99     auto *l = cast<LazyArchive>(s);
100     l->file->addMember(l->sym);
101     break;
102   }
103   case Symbol::Kind::LazyObjectKind: {
104     InputFile *file = cast<LazyObject>(s)->file;
105     file->ctx.symtab.addFile(file);
106     break;
107   }
108   case Symbol::Kind::LazyDLLSymbolKind: {
109     auto *l = cast<LazyDLLSymbol>(s);
110     l->file->makeImport(l->sym);
111     break;
112   }
113   default:
114     llvm_unreachable(
115         "symbol passed to forceLazy is not a LazyArchive or LazyObject");
116   }
117 }
118 
119 // Returns the symbol in SC whose value is <= Addr that is closest to Addr.
120 // This is generally the global variable or function whose definition contains
121 // Addr.
122 static Symbol *getSymbol(SectionChunk *sc, uint32_t addr) {
123   DefinedRegular *candidate = nullptr;
124 
125   for (Symbol *s : sc->file->getSymbols()) {
126     auto *d = dyn_cast_or_null<DefinedRegular>(s);
127     if (!d || !d->data || d->file != sc->file || d->getChunk() != sc ||
128         d->getValue() > addr ||
129         (candidate && d->getValue() < candidate->getValue()))
130       continue;
131 
132     candidate = d;
133   }
134 
135   return candidate;
136 }
137 
138 static std::vector<std::string> getSymbolLocations(BitcodeFile *file) {
139   std::string res("\n>>> referenced by ");
140   StringRef source = file->obj->getSourceFileName();
141   if (!source.empty())
142     res += source.str() + "\n>>>               ";
143   res += toString(file);
144   return {res};
145 }
146 
147 static std::optional<std::pair<StringRef, uint32_t>>
148 getFileLineDwarf(const SectionChunk *c, uint32_t addr) {
149   std::optional<DILineInfo> optionalLineInfo =
150       c->file->getDILineInfo(addr, c->getSectionNumber() - 1);
151   if (!optionalLineInfo)
152     return std::nullopt;
153   const DILineInfo &lineInfo = *optionalLineInfo;
154   if (lineInfo.FileName == DILineInfo::BadString)
155     return std::nullopt;
156   return std::make_pair(saver().save(lineInfo.FileName), lineInfo.Line);
157 }
158 
159 static std::optional<std::pair<StringRef, uint32_t>>
160 getFileLine(const SectionChunk *c, uint32_t addr) {
161   // MinGW can optionally use codeview, even if the default is dwarf.
162   std::optional<std::pair<StringRef, uint32_t>> fileLine =
163       getFileLineCodeView(c, addr);
164   // If codeview didn't yield any result, check dwarf in MinGW mode.
165   if (!fileLine && c->file->ctx.config.mingw)
166     fileLine = getFileLineDwarf(c, addr);
167   return fileLine;
168 }
169 
170 // Given a file and the index of a symbol in that file, returns a description
171 // of all references to that symbol from that file. If no debug information is
172 // available, returns just the name of the file, else one string per actual
173 // reference as described in the debug info.
174 // Returns up to maxStrings string descriptions, along with the total number of
175 // locations found.
176 static std::pair<std::vector<std::string>, size_t>
177 getSymbolLocations(ObjFile *file, uint32_t symIndex, size_t maxStrings) {
178   struct Location {
179     Symbol *sym;
180     std::pair<StringRef, uint32_t> fileLine;
181   };
182   std::vector<Location> locations;
183   size_t numLocations = 0;
184 
185   for (Chunk *c : file->getChunks()) {
186     auto *sc = dyn_cast<SectionChunk>(c);
187     if (!sc)
188       continue;
189     for (const coff_relocation &r : sc->getRelocs()) {
190       if (r.SymbolTableIndex != symIndex)
191         continue;
192       numLocations++;
193       if (locations.size() >= maxStrings)
194         continue;
195 
196       std::optional<std::pair<StringRef, uint32_t>> fileLine =
197           getFileLine(sc, r.VirtualAddress);
198       Symbol *sym = getSymbol(sc, r.VirtualAddress);
199       if (fileLine)
200         locations.push_back({sym, *fileLine});
201       else if (sym)
202         locations.push_back({sym, {"", 0}});
203     }
204   }
205 
206   if (maxStrings == 0)
207     return std::make_pair(std::vector<std::string>(), numLocations);
208 
209   if (numLocations == 0)
210     return std::make_pair(
211         std::vector<std::string>{"\n>>> referenced by " + toString(file)}, 1);
212 
213   std::vector<std::string> symbolLocations(locations.size());
214   size_t i = 0;
215   for (Location loc : locations) {
216     llvm::raw_string_ostream os(symbolLocations[i++]);
217     os << "\n>>> referenced by ";
218     if (!loc.fileLine.first.empty())
219       os << loc.fileLine.first << ":" << loc.fileLine.second
220          << "\n>>>               ";
221     os << toString(file);
222     if (loc.sym)
223       os << ":(" << toString(file->ctx, *loc.sym) << ')';
224   }
225   return std::make_pair(symbolLocations, numLocations);
226 }
227 
228 std::vector<std::string> getSymbolLocations(ObjFile *file, uint32_t symIndex) {
229   return getSymbolLocations(file, symIndex, SIZE_MAX).first;
230 }
231 
232 static std::pair<std::vector<std::string>, size_t>
233 getSymbolLocations(InputFile *file, uint32_t symIndex, size_t maxStrings) {
234   if (auto *o = dyn_cast<ObjFile>(file))
235     return getSymbolLocations(o, symIndex, maxStrings);
236   if (auto *b = dyn_cast<BitcodeFile>(file)) {
237     std::vector<std::string> symbolLocations = getSymbolLocations(b);
238     size_t numLocations = symbolLocations.size();
239     if (symbolLocations.size() > maxStrings)
240       symbolLocations.resize(maxStrings);
241     return std::make_pair(symbolLocations, numLocations);
242   }
243   llvm_unreachable("unsupported file type passed to getSymbolLocations");
244   return std::make_pair(std::vector<std::string>(), (size_t)0);
245 }
246 
247 // For an undefined symbol, stores all files referencing it and the index of
248 // the undefined symbol in each file.
249 struct UndefinedDiag {
250   Symbol *sym;
251   struct File {
252     InputFile *file;
253     uint32_t symIndex;
254   };
255   std::vector<File> files;
256 };
257 
258 static void reportUndefinedSymbol(const COFFLinkerContext &ctx,
259                                   const UndefinedDiag &undefDiag) {
260   std::string out;
261   llvm::raw_string_ostream os(out);
262   os << "undefined symbol: " << toString(ctx, *undefDiag.sym);
263 
264   const size_t maxUndefReferences = 3;
265   size_t numDisplayedRefs = 0, numRefs = 0;
266   for (const UndefinedDiag::File &ref : undefDiag.files) {
267     auto [symbolLocations, totalLocations] = getSymbolLocations(
268         ref.file, ref.symIndex, maxUndefReferences - numDisplayedRefs);
269 
270     numRefs += totalLocations;
271     numDisplayedRefs += symbolLocations.size();
272     for (const std::string &s : symbolLocations) {
273       os << s;
274     }
275   }
276   if (numDisplayedRefs < numRefs)
277     os << "\n>>> referenced " << numRefs - numDisplayedRefs << " more times";
278   errorOrWarn(os.str(), ctx.config.forceUnresolved);
279 }
280 
281 void SymbolTable::loadMinGWSymbols() {
282   for (auto &i : symMap) {
283     Symbol *sym = i.second;
284     auto *undef = dyn_cast<Undefined>(sym);
285     if (!undef)
286       continue;
287     if (undef->getWeakAlias())
288       continue;
289 
290     StringRef name = undef->getName();
291 
292     if (ctx.config.machine == I386 && ctx.config.stdcallFixup) {
293       // Check if we can resolve an undefined decorated symbol by finding
294       // the intended target as an undecorated symbol (only with a leading
295       // underscore).
296       StringRef origName = name;
297       StringRef baseName = name;
298       // Trim down stdcall/fastcall/vectorcall symbols to the base name.
299       baseName = ltrim1(baseName, "_@");
300       baseName = baseName.substr(0, baseName.find('@'));
301       // Add a leading underscore, as it would be in cdecl form.
302       std::string newName = ("_" + baseName).str();
303       Symbol *l;
304       if (newName != origName && (l = find(newName)) != nullptr) {
305         // If we found a symbol and it is lazy; load it.
306         if (l->isLazy() && !l->pendingArchiveLoad) {
307           log("Loading lazy " + l->getName() + " from " +
308               l->getFile()->getName() + " for stdcall fixup");
309           forceLazy(l);
310         }
311         // If it's lazy or already defined, hook it up as weak alias.
312         if (l->isLazy() || isa<Defined>(l)) {
313           if (ctx.config.warnStdcallFixup)
314             warn("Resolving " + origName + " by linking to " + newName);
315           else
316             log("Resolving " + origName + " by linking to " + newName);
317           undef->weakAlias = l;
318           continue;
319         }
320       }
321     }
322 
323     if (ctx.config.autoImport) {
324       if (name.starts_with("__imp_"))
325         continue;
326       // If we have an undefined symbol, but we have a lazy symbol we could
327       // load, load it.
328       Symbol *l = find(("__imp_" + name).str());
329       if (!l || l->pendingArchiveLoad || !l->isLazy())
330         continue;
331 
332       log("Loading lazy " + l->getName() + " from " + l->getFile()->getName() +
333           " for automatic import");
334       forceLazy(l);
335     }
336   }
337 }
338 
339 Defined *SymbolTable::impSymbol(StringRef name) {
340   if (name.starts_with("__imp_"))
341     return nullptr;
342   return dyn_cast_or_null<Defined>(find(("__imp_" + name).str()));
343 }
344 
345 bool SymbolTable::handleMinGWAutomaticImport(Symbol *sym, StringRef name) {
346   Defined *imp = impSymbol(name);
347   if (!imp)
348     return false;
349 
350   // Replace the reference directly to a variable with a reference
351   // to the import address table instead. This obviously isn't right,
352   // but we mark the symbol as isRuntimePseudoReloc, and a later pass
353   // will add runtime pseudo relocations for every relocation against
354   // this Symbol. The runtime pseudo relocation framework expects the
355   // reference itself to point at the IAT entry.
356   size_t impSize = 0;
357   if (isa<DefinedImportData>(imp)) {
358     log("Automatically importing " + name + " from " +
359         cast<DefinedImportData>(imp)->getDLLName());
360     impSize = sizeof(DefinedImportData);
361   } else if (isa<DefinedRegular>(imp)) {
362     log("Automatically importing " + name + " from " +
363         toString(cast<DefinedRegular>(imp)->file));
364     impSize = sizeof(DefinedRegular);
365   } else {
366     warn("unable to automatically import " + name + " from " + imp->getName() +
367          " from " + toString(cast<DefinedRegular>(imp)->file) +
368          "; unexpected symbol type");
369     return false;
370   }
371   sym->replaceKeepingName(imp, impSize);
372   sym->isRuntimePseudoReloc = true;
373 
374   // There may exist symbols named .refptr.<name> which only consist
375   // of a single pointer to <name>. If it turns out <name> is
376   // automatically imported, we don't need to keep the .refptr.<name>
377   // pointer at all, but redirect all accesses to it to the IAT entry
378   // for __imp_<name> instead, and drop the whole .refptr.<name> chunk.
379   DefinedRegular *refptr =
380       dyn_cast_or_null<DefinedRegular>(find((".refptr." + name).str()));
381   if (refptr && refptr->getChunk()->getSize() == ctx.config.wordsize) {
382     SectionChunk *sc = dyn_cast_or_null<SectionChunk>(refptr->getChunk());
383     if (sc && sc->getRelocs().size() == 1 && *sc->symbols().begin() == sym) {
384       log("Replacing .refptr." + name + " with " + imp->getName());
385       refptr->getChunk()->live = false;
386       refptr->replaceKeepingName(imp, impSize);
387     }
388   }
389   return true;
390 }
391 
392 /// Helper function for reportUnresolvable and resolveRemainingUndefines.
393 /// This function emits an "undefined symbol" diagnostic for each symbol in
394 /// undefs. If localImports is not nullptr, it also emits a "locally
395 /// defined symbol imported" diagnostic for symbols in localImports.
396 /// objFiles and bitcodeFiles (if not nullptr) are used to report where
397 /// undefined symbols are referenced.
398 static void reportProblemSymbols(
399     const COFFLinkerContext &ctx, const SmallPtrSetImpl<Symbol *> &undefs,
400     const DenseMap<Symbol *, Symbol *> *localImports, bool needBitcodeFiles) {
401   // Return early if there is nothing to report (which should be
402   // the common case).
403   if (undefs.empty() && (!localImports || localImports->empty()))
404     return;
405 
406   for (Symbol *b : ctx.config.gcroot) {
407     if (undefs.count(b))
408       errorOrWarn("<root>: undefined symbol: " + toString(ctx, *b),
409                   ctx.config.forceUnresolved);
410     if (localImports)
411       if (Symbol *imp = localImports->lookup(b))
412         warn("<root>: locally defined symbol imported: " + toString(ctx, *imp) +
413              " (defined in " + toString(imp->getFile()) + ") [LNK4217]");
414   }
415 
416   std::vector<UndefinedDiag> undefDiags;
417   DenseMap<Symbol *, int> firstDiag;
418 
419   auto processFile = [&](InputFile *file, ArrayRef<Symbol *> symbols) {
420     uint32_t symIndex = (uint32_t)-1;
421     for (Symbol *sym : symbols) {
422       ++symIndex;
423       if (!sym)
424         continue;
425       if (undefs.count(sym)) {
426         auto it = firstDiag.find(sym);
427         if (it == firstDiag.end()) {
428           firstDiag[sym] = undefDiags.size();
429           undefDiags.push_back({sym, {{file, symIndex}}});
430         } else {
431           undefDiags[it->second].files.push_back({file, symIndex});
432         }
433       }
434       if (localImports)
435         if (Symbol *imp = localImports->lookup(sym))
436           warn(toString(file) +
437                ": locally defined symbol imported: " + toString(ctx, *imp) +
438                " (defined in " + toString(imp->getFile()) + ") [LNK4217]");
439     }
440   };
441 
442   for (ObjFile *file : ctx.objFileInstances)
443     processFile(file, file->getSymbols());
444 
445   if (needBitcodeFiles)
446     for (BitcodeFile *file : ctx.bitcodeFileInstances)
447       processFile(file, file->getSymbols());
448 
449   for (const UndefinedDiag &undefDiag : undefDiags)
450     reportUndefinedSymbol(ctx, undefDiag);
451 }
452 
453 void SymbolTable::reportUnresolvable() {
454   SmallPtrSet<Symbol *, 8> undefs;
455   for (auto &i : symMap) {
456     Symbol *sym = i.second;
457     auto *undef = dyn_cast<Undefined>(sym);
458     if (!undef || sym->deferUndefined)
459       continue;
460     if (undef->getWeakAlias())
461       continue;
462     StringRef name = undef->getName();
463     if (name.starts_with("__imp_")) {
464       Symbol *imp = find(name.substr(strlen("__imp_")));
465       if (Defined *def = dyn_cast_or_null<Defined>(imp)) {
466         def->isUsedInRegularObj = true;
467         continue;
468       }
469     }
470     if (name.contains("_PchSym_"))
471       continue;
472     if (ctx.config.autoImport && impSymbol(name))
473       continue;
474     undefs.insert(sym);
475   }
476 
477   reportProblemSymbols(ctx, undefs,
478                        /* localImports */ nullptr, true);
479 }
480 
481 void SymbolTable::resolveRemainingUndefines() {
482   llvm::TimeTraceScope timeScope("Resolve remaining undefined symbols");
483   SmallPtrSet<Symbol *, 8> undefs;
484   DenseMap<Symbol *, Symbol *> localImports;
485 
486   for (auto &i : symMap) {
487     Symbol *sym = i.second;
488     auto *undef = dyn_cast<Undefined>(sym);
489     if (!undef)
490       continue;
491     if (!sym->isUsedInRegularObj)
492       continue;
493 
494     StringRef name = undef->getName();
495 
496     // A weak alias may have been resolved, so check for that.
497     if (Defined *d = undef->getWeakAlias()) {
498       // We want to replace Sym with D. However, we can't just blindly
499       // copy sizeof(SymbolUnion) bytes from D to Sym because D may be an
500       // internal symbol, and internal symbols are stored as "unparented"
501       // Symbols. For that reason we need to check which type of symbol we
502       // are dealing with and copy the correct number of bytes.
503       if (isa<DefinedRegular>(d))
504         memcpy(sym, d, sizeof(DefinedRegular));
505       else if (isa<DefinedAbsolute>(d))
506         memcpy(sym, d, sizeof(DefinedAbsolute));
507       else
508         memcpy(sym, d, sizeof(SymbolUnion));
509       continue;
510     }
511 
512     // If we can resolve a symbol by removing __imp_ prefix, do that.
513     // This odd rule is for compatibility with MSVC linker.
514     if (name.starts_with("__imp_")) {
515       Symbol *imp = find(name.substr(strlen("__imp_")));
516       if (imp && isa<Defined>(imp)) {
517         auto *d = cast<Defined>(imp);
518         replaceSymbol<DefinedLocalImport>(sym, ctx, name, d);
519         localImportChunks.push_back(cast<DefinedLocalImport>(sym)->getChunk());
520         localImports[sym] = d;
521         continue;
522       }
523     }
524 
525     // We don't want to report missing Microsoft precompiled headers symbols.
526     // A proper message will be emitted instead in PDBLinker::aquirePrecompObj
527     if (name.contains("_PchSym_"))
528       continue;
529 
530     if (ctx.config.autoImport && handleMinGWAutomaticImport(sym, name))
531       continue;
532 
533     // Remaining undefined symbols are not fatal if /force is specified.
534     // They are replaced with dummy defined symbols.
535     if (ctx.config.forceUnresolved)
536       replaceSymbol<DefinedAbsolute>(sym, ctx, name, 0);
537     undefs.insert(sym);
538   }
539 
540   reportProblemSymbols(
541       ctx, undefs,
542       ctx.config.warnLocallyDefinedImported ? &localImports : nullptr, false);
543 }
544 
545 std::pair<Symbol *, bool> SymbolTable::insert(StringRef name) {
546   bool inserted = false;
547   Symbol *&sym = symMap[CachedHashStringRef(name)];
548   if (!sym) {
549     sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
550     sym->isUsedInRegularObj = false;
551     sym->pendingArchiveLoad = false;
552     sym->canInline = true;
553     inserted = true;
554   }
555   return {sym, inserted};
556 }
557 
558 std::pair<Symbol *, bool> SymbolTable::insert(StringRef name, InputFile *file) {
559   std::pair<Symbol *, bool> result = insert(name);
560   if (!file || !isa<BitcodeFile>(file))
561     result.first->isUsedInRegularObj = true;
562   return result;
563 }
564 
565 void SymbolTable::addEntryThunk(Symbol *from, Symbol *to) {
566   entryThunks.push_back({from, to});
567 }
568 
569 void SymbolTable::initializeEntryThunks() {
570   for (auto it : entryThunks) {
571     auto *to = dyn_cast<Defined>(it.second);
572     if (!to)
573       continue;
574     auto *from = dyn_cast<DefinedRegular>(it.first);
575     // We need to be able to add padding to the function and fill it with an
576     // offset to its entry thunks. To ensure that padding the function is
577     // feasible, functions are required to be COMDAT symbols with no offset.
578     if (!from || !from->getChunk()->isCOMDAT() ||
579         cast<DefinedRegular>(from)->getValue()) {
580       error("non COMDAT symbol '" + from->getName() + "' in hybrid map");
581       continue;
582     }
583     from->getChunk()->setEntryThunk(to);
584   }
585 }
586 
587 Symbol *SymbolTable::addUndefined(StringRef name, InputFile *f,
588                                   bool isWeakAlias) {
589   auto [s, wasInserted] = insert(name, f);
590   if (wasInserted || (s->isLazy() && isWeakAlias)) {
591     replaceSymbol<Undefined>(s, name);
592     return s;
593   }
594   if (s->isLazy())
595     forceLazy(s);
596   return s;
597 }
598 
599 void SymbolTable::addLazyArchive(ArchiveFile *f, const Archive::Symbol &sym) {
600   StringRef name = sym.getName();
601   auto [s, wasInserted] = insert(name);
602   if (wasInserted) {
603     replaceSymbol<LazyArchive>(s, f, sym);
604     return;
605   }
606   auto *u = dyn_cast<Undefined>(s);
607   if (!u || u->weakAlias || s->pendingArchiveLoad)
608     return;
609   s->pendingArchiveLoad = true;
610   f->addMember(sym);
611 }
612 
613 void SymbolTable::addLazyObject(InputFile *f, StringRef n) {
614   assert(f->lazy);
615   auto [s, wasInserted] = insert(n, f);
616   if (wasInserted) {
617     replaceSymbol<LazyObject>(s, f, n);
618     return;
619   }
620   auto *u = dyn_cast<Undefined>(s);
621   if (!u || u->weakAlias || s->pendingArchiveLoad)
622     return;
623   s->pendingArchiveLoad = true;
624   f->lazy = false;
625   addFile(f);
626 }
627 
628 void SymbolTable::addLazyDLLSymbol(DLLFile *f, DLLFile::Symbol *sym,
629                                    StringRef n) {
630   auto [s, wasInserted] = insert(n);
631   if (wasInserted) {
632     replaceSymbol<LazyDLLSymbol>(s, f, sym, n);
633     return;
634   }
635   auto *u = dyn_cast<Undefined>(s);
636   if (!u || u->weakAlias || s->pendingArchiveLoad)
637     return;
638   s->pendingArchiveLoad = true;
639   f->makeImport(sym);
640 }
641 
642 static std::string getSourceLocationBitcode(BitcodeFile *file) {
643   std::string res("\n>>> defined at ");
644   StringRef source = file->obj->getSourceFileName();
645   if (!source.empty())
646     res += source.str() + "\n>>>            ";
647   res += toString(file);
648   return res;
649 }
650 
651 static std::string getSourceLocationObj(ObjFile *file, SectionChunk *sc,
652                                         uint32_t offset, StringRef name) {
653   std::optional<std::pair<StringRef, uint32_t>> fileLine;
654   if (sc)
655     fileLine = getFileLine(sc, offset);
656   if (!fileLine)
657     fileLine = file->getVariableLocation(name);
658 
659   std::string res;
660   llvm::raw_string_ostream os(res);
661   os << "\n>>> defined at ";
662   if (fileLine)
663     os << fileLine->first << ":" << fileLine->second << "\n>>>            ";
664   os << toString(file);
665   return os.str();
666 }
667 
668 static std::string getSourceLocation(InputFile *file, SectionChunk *sc,
669                                      uint32_t offset, StringRef name) {
670   if (!file)
671     return "";
672   if (auto *o = dyn_cast<ObjFile>(file))
673     return getSourceLocationObj(o, sc, offset, name);
674   if (auto *b = dyn_cast<BitcodeFile>(file))
675     return getSourceLocationBitcode(b);
676   return "\n>>> defined at " + toString(file);
677 }
678 
679 // Construct and print an error message in the form of:
680 //
681 //   lld-link: error: duplicate symbol: foo
682 //   >>> defined at bar.c:30
683 //   >>>            bar.o
684 //   >>> defined at baz.c:563
685 //   >>>            baz.o
686 void SymbolTable::reportDuplicate(Symbol *existing, InputFile *newFile,
687                                   SectionChunk *newSc,
688                                   uint32_t newSectionOffset) {
689   std::string msg;
690   llvm::raw_string_ostream os(msg);
691   os << "duplicate symbol: " << toString(ctx, *existing);
692 
693   DefinedRegular *d = dyn_cast<DefinedRegular>(existing);
694   if (d && isa<ObjFile>(d->getFile())) {
695     os << getSourceLocation(d->getFile(), d->getChunk(), d->getValue(),
696                             existing->getName());
697   } else {
698     os << getSourceLocation(existing->getFile(), nullptr, 0, "");
699   }
700   os << getSourceLocation(newFile, newSc, newSectionOffset,
701                           existing->getName());
702 
703   if (ctx.config.forceMultiple)
704     warn(os.str());
705   else
706     error(os.str());
707 }
708 
709 Symbol *SymbolTable::addAbsolute(StringRef n, COFFSymbolRef sym) {
710   auto [s, wasInserted] = insert(n, nullptr);
711   s->isUsedInRegularObj = true;
712   if (wasInserted || isa<Undefined>(s) || s->isLazy())
713     replaceSymbol<DefinedAbsolute>(s, ctx, n, sym);
714   else if (auto *da = dyn_cast<DefinedAbsolute>(s)) {
715     if (da->getVA() != sym.getValue())
716       reportDuplicate(s, nullptr);
717   } else if (!isa<DefinedCOFF>(s))
718     reportDuplicate(s, nullptr);
719   return s;
720 }
721 
722 Symbol *SymbolTable::addAbsolute(StringRef n, uint64_t va) {
723   auto [s, wasInserted] = insert(n, nullptr);
724   s->isUsedInRegularObj = true;
725   if (wasInserted || isa<Undefined>(s) || s->isLazy())
726     replaceSymbol<DefinedAbsolute>(s, ctx, n, va);
727   else if (auto *da = dyn_cast<DefinedAbsolute>(s)) {
728     if (da->getVA() != va)
729       reportDuplicate(s, nullptr);
730   } else if (!isa<DefinedCOFF>(s))
731     reportDuplicate(s, nullptr);
732   return s;
733 }
734 
735 Symbol *SymbolTable::addSynthetic(StringRef n, Chunk *c) {
736   auto [s, wasInserted] = insert(n, nullptr);
737   s->isUsedInRegularObj = true;
738   if (wasInserted || isa<Undefined>(s) || s->isLazy())
739     replaceSymbol<DefinedSynthetic>(s, n, c);
740   else if (!isa<DefinedCOFF>(s))
741     reportDuplicate(s, nullptr);
742   return s;
743 }
744 
745 Symbol *SymbolTable::addRegular(InputFile *f, StringRef n,
746                                 const coff_symbol_generic *sym, SectionChunk *c,
747                                 uint32_t sectionOffset, bool isWeak) {
748   auto [s, wasInserted] = insert(n, f);
749   if (wasInserted || !isa<DefinedRegular>(s) || s->isWeak)
750     replaceSymbol<DefinedRegular>(s, f, n, /*IsCOMDAT*/ false,
751                                   /*IsExternal*/ true, sym, c, isWeak);
752   else if (!isWeak)
753     reportDuplicate(s, f, c, sectionOffset);
754   return s;
755 }
756 
757 std::pair<DefinedRegular *, bool>
758 SymbolTable::addComdat(InputFile *f, StringRef n,
759                        const coff_symbol_generic *sym) {
760   auto [s, wasInserted] = insert(n, f);
761   if (wasInserted || !isa<DefinedRegular>(s)) {
762     replaceSymbol<DefinedRegular>(s, f, n, /*IsCOMDAT*/ true,
763                                   /*IsExternal*/ true, sym, nullptr);
764     return {cast<DefinedRegular>(s), true};
765   }
766   auto *existingSymbol = cast<DefinedRegular>(s);
767   if (!existingSymbol->isCOMDAT)
768     reportDuplicate(s, f);
769   return {existingSymbol, false};
770 }
771 
772 Symbol *SymbolTable::addCommon(InputFile *f, StringRef n, uint64_t size,
773                                const coff_symbol_generic *sym, CommonChunk *c) {
774   auto [s, wasInserted] = insert(n, f);
775   if (wasInserted || !isa<DefinedCOFF>(s))
776     replaceSymbol<DefinedCommon>(s, f, n, size, sym, c);
777   else if (auto *dc = dyn_cast<DefinedCommon>(s))
778     if (size > dc->getSize())
779       replaceSymbol<DefinedCommon>(s, f, n, size, sym, c);
780   return s;
781 }
782 
783 Symbol *SymbolTable::addImportData(StringRef n, ImportFile *f) {
784   auto [s, wasInserted] = insert(n, nullptr);
785   s->isUsedInRegularObj = true;
786   if (wasInserted || isa<Undefined>(s) || s->isLazy()) {
787     replaceSymbol<DefinedImportData>(s, n, f);
788     return s;
789   }
790 
791   reportDuplicate(s, f);
792   return nullptr;
793 }
794 
795 Symbol *SymbolTable::addImportThunk(StringRef name, DefinedImportData *id,
796                                     uint16_t machine) {
797   auto [s, wasInserted] = insert(name, nullptr);
798   s->isUsedInRegularObj = true;
799   if (wasInserted || isa<Undefined>(s) || s->isLazy()) {
800     replaceSymbol<DefinedImportThunk>(s, ctx, name, id, machine);
801     return s;
802   }
803 
804   reportDuplicate(s, id->file);
805   return nullptr;
806 }
807 
808 void SymbolTable::addLibcall(StringRef name) {
809   Symbol *sym = findUnderscore(name);
810   if (!sym)
811     return;
812 
813   if (auto *l = dyn_cast<LazyArchive>(sym)) {
814     MemoryBufferRef mb = l->getMemberBuffer();
815     if (isBitcode(mb))
816       addUndefined(sym->getName());
817   } else if (LazyObject *o = dyn_cast<LazyObject>(sym)) {
818     if (isBitcode(o->file->mb))
819       addUndefined(sym->getName());
820   }
821 }
822 
823 std::vector<Chunk *> SymbolTable::getChunks() const {
824   std::vector<Chunk *> res;
825   for (ObjFile *file : ctx.objFileInstances) {
826     ArrayRef<Chunk *> v = file->getChunks();
827     res.insert(res.end(), v.begin(), v.end());
828   }
829   return res;
830 }
831 
832 Symbol *SymbolTable::find(StringRef name) const {
833   return symMap.lookup(CachedHashStringRef(name));
834 }
835 
836 Symbol *SymbolTable::findUnderscore(StringRef name) const {
837   if (ctx.config.machine == I386)
838     return find(("_" + name).str());
839   return find(name);
840 }
841 
842 // Return all symbols that start with Prefix, possibly ignoring the first
843 // character of Prefix or the first character symbol.
844 std::vector<Symbol *> SymbolTable::getSymsWithPrefix(StringRef prefix) {
845   std::vector<Symbol *> syms;
846   for (auto pair : symMap) {
847     StringRef name = pair.first.val();
848     if (name.starts_with(prefix) || name.starts_with(prefix.drop_front()) ||
849         name.drop_front().starts_with(prefix) ||
850         name.drop_front().starts_with(prefix.drop_front())) {
851       syms.push_back(pair.second);
852     }
853   }
854   return syms;
855 }
856 
857 Symbol *SymbolTable::findMangle(StringRef name) {
858   if (Symbol *sym = find(name)) {
859     if (auto *u = dyn_cast<Undefined>(sym)) {
860       // We're specifically looking for weak aliases that ultimately resolve to
861       // defined symbols, hence the call to getWeakAlias() instead of just using
862       // the weakAlias member variable. This matches link.exe's behavior.
863       if (Symbol *weakAlias = u->getWeakAlias())
864         return weakAlias;
865     } else {
866       return sym;
867     }
868   }
869 
870   // Efficient fuzzy string lookup is impossible with a hash table, so iterate
871   // the symbol table once and collect all possibly matching symbols into this
872   // vector. Then compare each possibly matching symbol with each possible
873   // mangling.
874   std::vector<Symbol *> syms = getSymsWithPrefix(name);
875   auto findByPrefix = [&syms](const Twine &t) -> Symbol * {
876     std::string prefix = t.str();
877     for (auto *s : syms)
878       if (s->getName().starts_with(prefix))
879         return s;
880     return nullptr;
881   };
882 
883   // For non-x86, just look for C++ functions.
884   if (ctx.config.machine != I386)
885     return findByPrefix("?" + name + "@@Y");
886 
887   if (!name.starts_with("_"))
888     return nullptr;
889   // Search for x86 stdcall function.
890   if (Symbol *s = findByPrefix(name + "@"))
891     return s;
892   // Search for x86 fastcall function.
893   if (Symbol *s = findByPrefix("@" + name.substr(1) + "@"))
894     return s;
895   // Search for x86 vectorcall function.
896   if (Symbol *s = findByPrefix(name.substr(1) + "@@"))
897     return s;
898   // Search for x86 C++ non-member function.
899   return findByPrefix("?" + name.substr(1) + "@@Y");
900 }
901 
902 Symbol *SymbolTable::addUndefined(StringRef name) {
903   return addUndefined(name, nullptr, false);
904 }
905 
906 void SymbolTable::compileBitcodeFiles() {
907   ltoCompilationDone = true;
908   if (ctx.bitcodeFileInstances.empty())
909     return;
910 
911   llvm::TimeTraceScope timeScope("Compile bitcode");
912   ScopedTimer t(ctx.ltoTimer);
913   lto.reset(new BitcodeCompiler(ctx));
914   for (BitcodeFile *f : ctx.bitcodeFileInstances)
915     lto->add(*f);
916   for (InputFile *newObj : lto->compile()) {
917     ObjFile *obj = cast<ObjFile>(newObj);
918     obj->parse();
919     ctx.objFileInstances.push_back(obj);
920   }
921 }
922 
923 } // namespace lld::coff
924