xref: /freebsd/contrib/llvm-project/lld/COFF/SymbolTable.cpp (revision 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e)
1 //===- SymbolTable.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SymbolTable.h"
10 #include "COFFLinkerContext.h"
11 #include "Config.h"
12 #include "Driver.h"
13 #include "LTO.h"
14 #include "PDB.h"
15 #include "Symbols.h"
16 #include "lld/Common/ErrorHandler.h"
17 #include "lld/Common/Memory.h"
18 #include "lld/Common/Timer.h"
19 #include "llvm/DebugInfo/DIContext.h"
20 #include "llvm/IR/LLVMContext.h"
21 #include "llvm/LTO/LTO.h"
22 #include "llvm/Object/WindowsMachineFlag.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include <utility>
26 
27 using namespace llvm;
28 
29 namespace lld::coff {
30 
31 StringRef ltrim1(StringRef s, const char *chars) {
32   if (!s.empty() && strchr(chars, s[0]))
33     return s.substr(1);
34   return s;
35 }
36 
37 static bool compatibleMachineType(COFFLinkerContext &ctx, MachineTypes mt) {
38   if (mt == IMAGE_FILE_MACHINE_UNKNOWN)
39     return true;
40   switch (ctx.config.machine) {
41   case ARM64:
42     return mt == ARM64 || mt == ARM64X;
43   case ARM64EC:
44     return COFF::isArm64EC(mt) || mt == AMD64;
45   case ARM64X:
46     return COFF::isAnyArm64(mt) || mt == AMD64;
47   default:
48     return ctx.config.machine == mt;
49   }
50 }
51 
52 void SymbolTable::addFile(InputFile *file) {
53   log("Reading " + toString(file));
54   if (file->lazy) {
55     if (auto *f = dyn_cast<BitcodeFile>(file))
56       f->parseLazy();
57     else
58       cast<ObjFile>(file)->parseLazy();
59   } else {
60     file->parse();
61     if (auto *f = dyn_cast<ObjFile>(file)) {
62       ctx.objFileInstances.push_back(f);
63     } else if (auto *f = dyn_cast<BitcodeFile>(file)) {
64       ctx.bitcodeFileInstances.push_back(f);
65     } else if (auto *f = dyn_cast<ImportFile>(file)) {
66       ctx.importFileInstances.push_back(f);
67     }
68   }
69 
70   MachineTypes mt = file->getMachineType();
71   if (ctx.config.machine == IMAGE_FILE_MACHINE_UNKNOWN) {
72     ctx.config.machine = mt;
73     ctx.driver.addWinSysRootLibSearchPaths();
74   } else if (!compatibleMachineType(ctx, mt)) {
75     error(toString(file) + ": machine type " + machineToStr(mt) +
76           " conflicts with " + machineToStr(ctx.config.machine));
77     return;
78   }
79 
80   ctx.driver.parseDirectives(file);
81 }
82 
83 static void errorOrWarn(const Twine &s, bool forceUnresolved) {
84   if (forceUnresolved)
85     warn(s);
86   else
87     error(s);
88 }
89 
90 // Causes the file associated with a lazy symbol to be linked in.
91 static void forceLazy(Symbol *s) {
92   s->pendingArchiveLoad = true;
93   switch (s->kind()) {
94   case Symbol::Kind::LazyArchiveKind: {
95     auto *l = cast<LazyArchive>(s);
96     l->file->addMember(l->sym);
97     break;
98   }
99   case Symbol::Kind::LazyObjectKind: {
100     InputFile *file = cast<LazyObject>(s)->file;
101     file->ctx.symtab.addFile(file);
102     break;
103   }
104   case Symbol::Kind::LazyDLLSymbolKind: {
105     auto *l = cast<LazyDLLSymbol>(s);
106     l->file->makeImport(l->sym);
107     break;
108   }
109   default:
110     llvm_unreachable(
111         "symbol passed to forceLazy is not a LazyArchive or LazyObject");
112   }
113 }
114 
115 // Returns the symbol in SC whose value is <= Addr that is closest to Addr.
116 // This is generally the global variable or function whose definition contains
117 // Addr.
118 static Symbol *getSymbol(SectionChunk *sc, uint32_t addr) {
119   DefinedRegular *candidate = nullptr;
120 
121   for (Symbol *s : sc->file->getSymbols()) {
122     auto *d = dyn_cast_or_null<DefinedRegular>(s);
123     if (!d || !d->data || d->file != sc->file || d->getChunk() != sc ||
124         d->getValue() > addr ||
125         (candidate && d->getValue() < candidate->getValue()))
126       continue;
127 
128     candidate = d;
129   }
130 
131   return candidate;
132 }
133 
134 static std::vector<std::string> getSymbolLocations(BitcodeFile *file) {
135   std::string res("\n>>> referenced by ");
136   StringRef source = file->obj->getSourceFileName();
137   if (!source.empty())
138     res += source.str() + "\n>>>               ";
139   res += toString(file);
140   return {res};
141 }
142 
143 static std::optional<std::pair<StringRef, uint32_t>>
144 getFileLineDwarf(const SectionChunk *c, uint32_t addr) {
145   std::optional<DILineInfo> optionalLineInfo =
146       c->file->getDILineInfo(addr, c->getSectionNumber() - 1);
147   if (!optionalLineInfo)
148     return std::nullopt;
149   const DILineInfo &lineInfo = *optionalLineInfo;
150   if (lineInfo.FileName == DILineInfo::BadString)
151     return std::nullopt;
152   return std::make_pair(saver().save(lineInfo.FileName), lineInfo.Line);
153 }
154 
155 static std::optional<std::pair<StringRef, uint32_t>>
156 getFileLine(const SectionChunk *c, uint32_t addr) {
157   // MinGW can optionally use codeview, even if the default is dwarf.
158   std::optional<std::pair<StringRef, uint32_t>> fileLine =
159       getFileLineCodeView(c, addr);
160   // If codeview didn't yield any result, check dwarf in MinGW mode.
161   if (!fileLine && c->file->ctx.config.mingw)
162     fileLine = getFileLineDwarf(c, addr);
163   return fileLine;
164 }
165 
166 // Given a file and the index of a symbol in that file, returns a description
167 // of all references to that symbol from that file. If no debug information is
168 // available, returns just the name of the file, else one string per actual
169 // reference as described in the debug info.
170 // Returns up to maxStrings string descriptions, along with the total number of
171 // locations found.
172 static std::pair<std::vector<std::string>, size_t>
173 getSymbolLocations(ObjFile *file, uint32_t symIndex, size_t maxStrings) {
174   struct Location {
175     Symbol *sym;
176     std::pair<StringRef, uint32_t> fileLine;
177   };
178   std::vector<Location> locations;
179   size_t numLocations = 0;
180 
181   for (Chunk *c : file->getChunks()) {
182     auto *sc = dyn_cast<SectionChunk>(c);
183     if (!sc)
184       continue;
185     for (const coff_relocation &r : sc->getRelocs()) {
186       if (r.SymbolTableIndex != symIndex)
187         continue;
188       numLocations++;
189       if (locations.size() >= maxStrings)
190         continue;
191 
192       std::optional<std::pair<StringRef, uint32_t>> fileLine =
193           getFileLine(sc, r.VirtualAddress);
194       Symbol *sym = getSymbol(sc, r.VirtualAddress);
195       if (fileLine)
196         locations.push_back({sym, *fileLine});
197       else if (sym)
198         locations.push_back({sym, {"", 0}});
199     }
200   }
201 
202   if (maxStrings == 0)
203     return std::make_pair(std::vector<std::string>(), numLocations);
204 
205   if (numLocations == 0)
206     return std::make_pair(
207         std::vector<std::string>{"\n>>> referenced by " + toString(file)}, 1);
208 
209   std::vector<std::string> symbolLocations(locations.size());
210   size_t i = 0;
211   for (Location loc : locations) {
212     llvm::raw_string_ostream os(symbolLocations[i++]);
213     os << "\n>>> referenced by ";
214     if (!loc.fileLine.first.empty())
215       os << loc.fileLine.first << ":" << loc.fileLine.second
216          << "\n>>>               ";
217     os << toString(file);
218     if (loc.sym)
219       os << ":(" << toString(file->ctx, *loc.sym) << ')';
220   }
221   return std::make_pair(symbolLocations, numLocations);
222 }
223 
224 std::vector<std::string> getSymbolLocations(ObjFile *file, uint32_t symIndex) {
225   return getSymbolLocations(file, symIndex, SIZE_MAX).first;
226 }
227 
228 static std::pair<std::vector<std::string>, size_t>
229 getSymbolLocations(InputFile *file, uint32_t symIndex, size_t maxStrings) {
230   if (auto *o = dyn_cast<ObjFile>(file))
231     return getSymbolLocations(o, symIndex, maxStrings);
232   if (auto *b = dyn_cast<BitcodeFile>(file)) {
233     std::vector<std::string> symbolLocations = getSymbolLocations(b);
234     size_t numLocations = symbolLocations.size();
235     if (symbolLocations.size() > maxStrings)
236       symbolLocations.resize(maxStrings);
237     return std::make_pair(symbolLocations, numLocations);
238   }
239   llvm_unreachable("unsupported file type passed to getSymbolLocations");
240   return std::make_pair(std::vector<std::string>(), (size_t)0);
241 }
242 
243 // For an undefined symbol, stores all files referencing it and the index of
244 // the undefined symbol in each file.
245 struct UndefinedDiag {
246   Symbol *sym;
247   struct File {
248     InputFile *file;
249     uint32_t symIndex;
250   };
251   std::vector<File> files;
252 };
253 
254 static void reportUndefinedSymbol(const COFFLinkerContext &ctx,
255                                   const UndefinedDiag &undefDiag) {
256   std::string out;
257   llvm::raw_string_ostream os(out);
258   os << "undefined symbol: " << toString(ctx, *undefDiag.sym);
259 
260   const size_t maxUndefReferences = 3;
261   size_t numDisplayedRefs = 0, numRefs = 0;
262   for (const UndefinedDiag::File &ref : undefDiag.files) {
263     auto [symbolLocations, totalLocations] = getSymbolLocations(
264         ref.file, ref.symIndex, maxUndefReferences - numDisplayedRefs);
265 
266     numRefs += totalLocations;
267     numDisplayedRefs += symbolLocations.size();
268     for (const std::string &s : symbolLocations) {
269       os << s;
270     }
271   }
272   if (numDisplayedRefs < numRefs)
273     os << "\n>>> referenced " << numRefs - numDisplayedRefs << " more times";
274   errorOrWarn(os.str(), ctx.config.forceUnresolved);
275 }
276 
277 void SymbolTable::loadMinGWSymbols() {
278   for (auto &i : symMap) {
279     Symbol *sym = i.second;
280     auto *undef = dyn_cast<Undefined>(sym);
281     if (!undef)
282       continue;
283     if (undef->getWeakAlias())
284       continue;
285 
286     StringRef name = undef->getName();
287 
288     if (ctx.config.machine == I386 && ctx.config.stdcallFixup) {
289       // Check if we can resolve an undefined decorated symbol by finding
290       // the intended target as an undecorated symbol (only with a leading
291       // underscore).
292       StringRef origName = name;
293       StringRef baseName = name;
294       // Trim down stdcall/fastcall/vectorcall symbols to the base name.
295       baseName = ltrim1(baseName, "_@");
296       baseName = baseName.substr(0, baseName.find('@'));
297       // Add a leading underscore, as it would be in cdecl form.
298       std::string newName = ("_" + baseName).str();
299       Symbol *l;
300       if (newName != origName && (l = find(newName)) != nullptr) {
301         // If we found a symbol and it is lazy; load it.
302         if (l->isLazy() && !l->pendingArchiveLoad) {
303           log("Loading lazy " + l->getName() + " from " +
304               l->getFile()->getName() + " for stdcall fixup");
305           forceLazy(l);
306         }
307         // If it's lazy or already defined, hook it up as weak alias.
308         if (l->isLazy() || isa<Defined>(l)) {
309           if (ctx.config.warnStdcallFixup)
310             warn("Resolving " + origName + " by linking to " + newName);
311           else
312             log("Resolving " + origName + " by linking to " + newName);
313           undef->weakAlias = l;
314           continue;
315         }
316       }
317     }
318 
319     if (ctx.config.autoImport) {
320       if (name.starts_with("__imp_"))
321         continue;
322       // If we have an undefined symbol, but we have a lazy symbol we could
323       // load, load it.
324       Symbol *l = find(("__imp_" + name).str());
325       if (!l || l->pendingArchiveLoad || !l->isLazy())
326         continue;
327 
328       log("Loading lazy " + l->getName() + " from " + l->getFile()->getName() +
329           " for automatic import");
330       forceLazy(l);
331     }
332   }
333 }
334 
335 Defined *SymbolTable::impSymbol(StringRef name) {
336   if (name.starts_with("__imp_"))
337     return nullptr;
338   return dyn_cast_or_null<Defined>(find(("__imp_" + name).str()));
339 }
340 
341 bool SymbolTable::handleMinGWAutomaticImport(Symbol *sym, StringRef name) {
342   Defined *imp = impSymbol(name);
343   if (!imp)
344     return false;
345 
346   // Replace the reference directly to a variable with a reference
347   // to the import address table instead. This obviously isn't right,
348   // but we mark the symbol as isRuntimePseudoReloc, and a later pass
349   // will add runtime pseudo relocations for every relocation against
350   // this Symbol. The runtime pseudo relocation framework expects the
351   // reference itself to point at the IAT entry.
352   size_t impSize = 0;
353   if (isa<DefinedImportData>(imp)) {
354     log("Automatically importing " + name + " from " +
355         cast<DefinedImportData>(imp)->getDLLName());
356     impSize = sizeof(DefinedImportData);
357   } else if (isa<DefinedRegular>(imp)) {
358     log("Automatically importing " + name + " from " +
359         toString(cast<DefinedRegular>(imp)->file));
360     impSize = sizeof(DefinedRegular);
361   } else {
362     warn("unable to automatically import " + name + " from " + imp->getName() +
363          " from " + toString(cast<DefinedRegular>(imp)->file) +
364          "; unexpected symbol type");
365     return false;
366   }
367   sym->replaceKeepingName(imp, impSize);
368   sym->isRuntimePseudoReloc = true;
369 
370   // There may exist symbols named .refptr.<name> which only consist
371   // of a single pointer to <name>. If it turns out <name> is
372   // automatically imported, we don't need to keep the .refptr.<name>
373   // pointer at all, but redirect all accesses to it to the IAT entry
374   // for __imp_<name> instead, and drop the whole .refptr.<name> chunk.
375   DefinedRegular *refptr =
376       dyn_cast_or_null<DefinedRegular>(find((".refptr." + name).str()));
377   if (refptr && refptr->getChunk()->getSize() == ctx.config.wordsize) {
378     SectionChunk *sc = dyn_cast_or_null<SectionChunk>(refptr->getChunk());
379     if (sc && sc->getRelocs().size() == 1 && *sc->symbols().begin() == sym) {
380       log("Replacing .refptr." + name + " with " + imp->getName());
381       refptr->getChunk()->live = false;
382       refptr->replaceKeepingName(imp, impSize);
383     }
384   }
385   return true;
386 }
387 
388 /// Helper function for reportUnresolvable and resolveRemainingUndefines.
389 /// This function emits an "undefined symbol" diagnostic for each symbol in
390 /// undefs. If localImports is not nullptr, it also emits a "locally
391 /// defined symbol imported" diagnostic for symbols in localImports.
392 /// objFiles and bitcodeFiles (if not nullptr) are used to report where
393 /// undefined symbols are referenced.
394 static void reportProblemSymbols(
395     const COFFLinkerContext &ctx, const SmallPtrSetImpl<Symbol *> &undefs,
396     const DenseMap<Symbol *, Symbol *> *localImports, bool needBitcodeFiles) {
397   // Return early if there is nothing to report (which should be
398   // the common case).
399   if (undefs.empty() && (!localImports || localImports->empty()))
400     return;
401 
402   for (Symbol *b : ctx.config.gcroot) {
403     if (undefs.count(b))
404       errorOrWarn("<root>: undefined symbol: " + toString(ctx, *b),
405                   ctx.config.forceUnresolved);
406     if (localImports)
407       if (Symbol *imp = localImports->lookup(b))
408         warn("<root>: locally defined symbol imported: " + toString(ctx, *imp) +
409              " (defined in " + toString(imp->getFile()) + ") [LNK4217]");
410   }
411 
412   std::vector<UndefinedDiag> undefDiags;
413   DenseMap<Symbol *, int> firstDiag;
414 
415   auto processFile = [&](InputFile *file, ArrayRef<Symbol *> symbols) {
416     uint32_t symIndex = (uint32_t)-1;
417     for (Symbol *sym : symbols) {
418       ++symIndex;
419       if (!sym)
420         continue;
421       if (undefs.count(sym)) {
422         auto it = firstDiag.find(sym);
423         if (it == firstDiag.end()) {
424           firstDiag[sym] = undefDiags.size();
425           undefDiags.push_back({sym, {{file, symIndex}}});
426         } else {
427           undefDiags[it->second].files.push_back({file, symIndex});
428         }
429       }
430       if (localImports)
431         if (Symbol *imp = localImports->lookup(sym))
432           warn(toString(file) +
433                ": locally defined symbol imported: " + toString(ctx, *imp) +
434                " (defined in " + toString(imp->getFile()) + ") [LNK4217]");
435     }
436   };
437 
438   for (ObjFile *file : ctx.objFileInstances)
439     processFile(file, file->getSymbols());
440 
441   if (needBitcodeFiles)
442     for (BitcodeFile *file : ctx.bitcodeFileInstances)
443       processFile(file, file->getSymbols());
444 
445   for (const UndefinedDiag &undefDiag : undefDiags)
446     reportUndefinedSymbol(ctx, undefDiag);
447 }
448 
449 void SymbolTable::reportUnresolvable() {
450   SmallPtrSet<Symbol *, 8> undefs;
451   for (auto &i : symMap) {
452     Symbol *sym = i.second;
453     auto *undef = dyn_cast<Undefined>(sym);
454     if (!undef || sym->deferUndefined)
455       continue;
456     if (undef->getWeakAlias())
457       continue;
458     StringRef name = undef->getName();
459     if (name.starts_with("__imp_")) {
460       Symbol *imp = find(name.substr(strlen("__imp_")));
461       if (imp && isa<Defined>(imp))
462         continue;
463     }
464     if (name.contains("_PchSym_"))
465       continue;
466     if (ctx.config.autoImport && impSymbol(name))
467       continue;
468     undefs.insert(sym);
469   }
470 
471   reportProblemSymbols(ctx, undefs,
472                        /* localImports */ nullptr, true);
473 }
474 
475 void SymbolTable::resolveRemainingUndefines() {
476   SmallPtrSet<Symbol *, 8> undefs;
477   DenseMap<Symbol *, Symbol *> localImports;
478 
479   for (auto &i : symMap) {
480     Symbol *sym = i.second;
481     auto *undef = dyn_cast<Undefined>(sym);
482     if (!undef)
483       continue;
484     if (!sym->isUsedInRegularObj)
485       continue;
486 
487     StringRef name = undef->getName();
488 
489     // A weak alias may have been resolved, so check for that.
490     if (Defined *d = undef->getWeakAlias()) {
491       // We want to replace Sym with D. However, we can't just blindly
492       // copy sizeof(SymbolUnion) bytes from D to Sym because D may be an
493       // internal symbol, and internal symbols are stored as "unparented"
494       // Symbols. For that reason we need to check which type of symbol we
495       // are dealing with and copy the correct number of bytes.
496       if (isa<DefinedRegular>(d))
497         memcpy(sym, d, sizeof(DefinedRegular));
498       else if (isa<DefinedAbsolute>(d))
499         memcpy(sym, d, sizeof(DefinedAbsolute));
500       else
501         memcpy(sym, d, sizeof(SymbolUnion));
502       continue;
503     }
504 
505     // If we can resolve a symbol by removing __imp_ prefix, do that.
506     // This odd rule is for compatibility with MSVC linker.
507     if (name.starts_with("__imp_")) {
508       Symbol *imp = find(name.substr(strlen("__imp_")));
509       if (imp && isa<Defined>(imp)) {
510         auto *d = cast<Defined>(imp);
511         replaceSymbol<DefinedLocalImport>(sym, ctx, name, d);
512         localImportChunks.push_back(cast<DefinedLocalImport>(sym)->getChunk());
513         localImports[sym] = d;
514         continue;
515       }
516     }
517 
518     // We don't want to report missing Microsoft precompiled headers symbols.
519     // A proper message will be emitted instead in PDBLinker::aquirePrecompObj
520     if (name.contains("_PchSym_"))
521       continue;
522 
523     if (ctx.config.autoImport && handleMinGWAutomaticImport(sym, name))
524       continue;
525 
526     // Remaining undefined symbols are not fatal if /force is specified.
527     // They are replaced with dummy defined symbols.
528     if (ctx.config.forceUnresolved)
529       replaceSymbol<DefinedAbsolute>(sym, ctx, name, 0);
530     undefs.insert(sym);
531   }
532 
533   reportProblemSymbols(
534       ctx, undefs,
535       ctx.config.warnLocallyDefinedImported ? &localImports : nullptr, false);
536 }
537 
538 std::pair<Symbol *, bool> SymbolTable::insert(StringRef name) {
539   bool inserted = false;
540   Symbol *&sym = symMap[CachedHashStringRef(name)];
541   if (!sym) {
542     sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
543     sym->isUsedInRegularObj = false;
544     sym->pendingArchiveLoad = false;
545     sym->canInline = true;
546     inserted = true;
547   }
548   return {sym, inserted};
549 }
550 
551 std::pair<Symbol *, bool> SymbolTable::insert(StringRef name, InputFile *file) {
552   std::pair<Symbol *, bool> result = insert(name);
553   if (!file || !isa<BitcodeFile>(file))
554     result.first->isUsedInRegularObj = true;
555   return result;
556 }
557 
558 Symbol *SymbolTable::addUndefined(StringRef name, InputFile *f,
559                                   bool isWeakAlias) {
560   auto [s, wasInserted] = insert(name, f);
561   if (wasInserted || (s->isLazy() && isWeakAlias)) {
562     replaceSymbol<Undefined>(s, name);
563     return s;
564   }
565   if (s->isLazy())
566     forceLazy(s);
567   return s;
568 }
569 
570 void SymbolTable::addLazyArchive(ArchiveFile *f, const Archive::Symbol &sym) {
571   StringRef name = sym.getName();
572   auto [s, wasInserted] = insert(name);
573   if (wasInserted) {
574     replaceSymbol<LazyArchive>(s, f, sym);
575     return;
576   }
577   auto *u = dyn_cast<Undefined>(s);
578   if (!u || u->weakAlias || s->pendingArchiveLoad)
579     return;
580   s->pendingArchiveLoad = true;
581   f->addMember(sym);
582 }
583 
584 void SymbolTable::addLazyObject(InputFile *f, StringRef n) {
585   assert(f->lazy);
586   auto [s, wasInserted] = insert(n, f);
587   if (wasInserted) {
588     replaceSymbol<LazyObject>(s, f, n);
589     return;
590   }
591   auto *u = dyn_cast<Undefined>(s);
592   if (!u || u->weakAlias || s->pendingArchiveLoad)
593     return;
594   s->pendingArchiveLoad = true;
595   f->lazy = false;
596   addFile(f);
597 }
598 
599 void SymbolTable::addLazyDLLSymbol(DLLFile *f, DLLFile::Symbol *sym,
600                                    StringRef n) {
601   auto [s, wasInserted] = insert(n);
602   if (wasInserted) {
603     replaceSymbol<LazyDLLSymbol>(s, f, sym, n);
604     return;
605   }
606   auto *u = dyn_cast<Undefined>(s);
607   if (!u || u->weakAlias || s->pendingArchiveLoad)
608     return;
609   s->pendingArchiveLoad = true;
610   f->makeImport(sym);
611 }
612 
613 static std::string getSourceLocationBitcode(BitcodeFile *file) {
614   std::string res("\n>>> defined at ");
615   StringRef source = file->obj->getSourceFileName();
616   if (!source.empty())
617     res += source.str() + "\n>>>            ";
618   res += toString(file);
619   return res;
620 }
621 
622 static std::string getSourceLocationObj(ObjFile *file, SectionChunk *sc,
623                                         uint32_t offset, StringRef name) {
624   std::optional<std::pair<StringRef, uint32_t>> fileLine;
625   if (sc)
626     fileLine = getFileLine(sc, offset);
627   if (!fileLine)
628     fileLine = file->getVariableLocation(name);
629 
630   std::string res;
631   llvm::raw_string_ostream os(res);
632   os << "\n>>> defined at ";
633   if (fileLine)
634     os << fileLine->first << ":" << fileLine->second << "\n>>>            ";
635   os << toString(file);
636   return os.str();
637 }
638 
639 static std::string getSourceLocation(InputFile *file, SectionChunk *sc,
640                                      uint32_t offset, StringRef name) {
641   if (!file)
642     return "";
643   if (auto *o = dyn_cast<ObjFile>(file))
644     return getSourceLocationObj(o, sc, offset, name);
645   if (auto *b = dyn_cast<BitcodeFile>(file))
646     return getSourceLocationBitcode(b);
647   return "\n>>> defined at " + toString(file);
648 }
649 
650 // Construct and print an error message in the form of:
651 //
652 //   lld-link: error: duplicate symbol: foo
653 //   >>> defined at bar.c:30
654 //   >>>            bar.o
655 //   >>> defined at baz.c:563
656 //   >>>            baz.o
657 void SymbolTable::reportDuplicate(Symbol *existing, InputFile *newFile,
658                                   SectionChunk *newSc,
659                                   uint32_t newSectionOffset) {
660   std::string msg;
661   llvm::raw_string_ostream os(msg);
662   os << "duplicate symbol: " << toString(ctx, *existing);
663 
664   DefinedRegular *d = dyn_cast<DefinedRegular>(existing);
665   if (d && isa<ObjFile>(d->getFile())) {
666     os << getSourceLocation(d->getFile(), d->getChunk(), d->getValue(),
667                             existing->getName());
668   } else {
669     os << getSourceLocation(existing->getFile(), nullptr, 0, "");
670   }
671   os << getSourceLocation(newFile, newSc, newSectionOffset,
672                           existing->getName());
673 
674   if (ctx.config.forceMultiple)
675     warn(os.str());
676   else
677     error(os.str());
678 }
679 
680 Symbol *SymbolTable::addAbsolute(StringRef n, COFFSymbolRef sym) {
681   auto [s, wasInserted] = insert(n, nullptr);
682   s->isUsedInRegularObj = true;
683   if (wasInserted || isa<Undefined>(s) || s->isLazy())
684     replaceSymbol<DefinedAbsolute>(s, ctx, n, sym);
685   else if (auto *da = dyn_cast<DefinedAbsolute>(s)) {
686     if (da->getVA() != sym.getValue())
687       reportDuplicate(s, nullptr);
688   } else if (!isa<DefinedCOFF>(s))
689     reportDuplicate(s, nullptr);
690   return s;
691 }
692 
693 Symbol *SymbolTable::addAbsolute(StringRef n, uint64_t va) {
694   auto [s, wasInserted] = insert(n, nullptr);
695   s->isUsedInRegularObj = true;
696   if (wasInserted || isa<Undefined>(s) || s->isLazy())
697     replaceSymbol<DefinedAbsolute>(s, ctx, n, va);
698   else if (auto *da = dyn_cast<DefinedAbsolute>(s)) {
699     if (da->getVA() != va)
700       reportDuplicate(s, nullptr);
701   } else if (!isa<DefinedCOFF>(s))
702     reportDuplicate(s, nullptr);
703   return s;
704 }
705 
706 Symbol *SymbolTable::addSynthetic(StringRef n, Chunk *c) {
707   auto [s, wasInserted] = insert(n, nullptr);
708   s->isUsedInRegularObj = true;
709   if (wasInserted || isa<Undefined>(s) || s->isLazy())
710     replaceSymbol<DefinedSynthetic>(s, n, c);
711   else if (!isa<DefinedCOFF>(s))
712     reportDuplicate(s, nullptr);
713   return s;
714 }
715 
716 Symbol *SymbolTable::addRegular(InputFile *f, StringRef n,
717                                 const coff_symbol_generic *sym, SectionChunk *c,
718                                 uint32_t sectionOffset, bool isWeak) {
719   auto [s, wasInserted] = insert(n, f);
720   if (wasInserted || !isa<DefinedRegular>(s) || s->isWeak)
721     replaceSymbol<DefinedRegular>(s, f, n, /*IsCOMDAT*/ false,
722                                   /*IsExternal*/ true, sym, c, isWeak);
723   else if (!isWeak)
724     reportDuplicate(s, f, c, sectionOffset);
725   return s;
726 }
727 
728 std::pair<DefinedRegular *, bool>
729 SymbolTable::addComdat(InputFile *f, StringRef n,
730                        const coff_symbol_generic *sym) {
731   auto [s, wasInserted] = insert(n, f);
732   if (wasInserted || !isa<DefinedRegular>(s)) {
733     replaceSymbol<DefinedRegular>(s, f, n, /*IsCOMDAT*/ true,
734                                   /*IsExternal*/ true, sym, nullptr);
735     return {cast<DefinedRegular>(s), true};
736   }
737   auto *existingSymbol = cast<DefinedRegular>(s);
738   if (!existingSymbol->isCOMDAT)
739     reportDuplicate(s, f);
740   return {existingSymbol, false};
741 }
742 
743 Symbol *SymbolTable::addCommon(InputFile *f, StringRef n, uint64_t size,
744                                const coff_symbol_generic *sym, CommonChunk *c) {
745   auto [s, wasInserted] = insert(n, f);
746   if (wasInserted || !isa<DefinedCOFF>(s))
747     replaceSymbol<DefinedCommon>(s, f, n, size, sym, c);
748   else if (auto *dc = dyn_cast<DefinedCommon>(s))
749     if (size > dc->getSize())
750       replaceSymbol<DefinedCommon>(s, f, n, size, sym, c);
751   return s;
752 }
753 
754 Symbol *SymbolTable::addImportData(StringRef n, ImportFile *f) {
755   auto [s, wasInserted] = insert(n, nullptr);
756   s->isUsedInRegularObj = true;
757   if (wasInserted || isa<Undefined>(s) || s->isLazy()) {
758     replaceSymbol<DefinedImportData>(s, n, f);
759     return s;
760   }
761 
762   reportDuplicate(s, f);
763   return nullptr;
764 }
765 
766 Symbol *SymbolTable::addImportThunk(StringRef name, DefinedImportData *id,
767                                     uint16_t machine) {
768   auto [s, wasInserted] = insert(name, nullptr);
769   s->isUsedInRegularObj = true;
770   if (wasInserted || isa<Undefined>(s) || s->isLazy()) {
771     replaceSymbol<DefinedImportThunk>(s, ctx, name, id, machine);
772     return s;
773   }
774 
775   reportDuplicate(s, id->file);
776   return nullptr;
777 }
778 
779 void SymbolTable::addLibcall(StringRef name) {
780   Symbol *sym = findUnderscore(name);
781   if (!sym)
782     return;
783 
784   if (auto *l = dyn_cast<LazyArchive>(sym)) {
785     MemoryBufferRef mb = l->getMemberBuffer();
786     if (isBitcode(mb))
787       addUndefined(sym->getName());
788   } else if (LazyObject *o = dyn_cast<LazyObject>(sym)) {
789     if (isBitcode(o->file->mb))
790       addUndefined(sym->getName());
791   }
792 }
793 
794 std::vector<Chunk *> SymbolTable::getChunks() const {
795   std::vector<Chunk *> res;
796   for (ObjFile *file : ctx.objFileInstances) {
797     ArrayRef<Chunk *> v = file->getChunks();
798     res.insert(res.end(), v.begin(), v.end());
799   }
800   return res;
801 }
802 
803 Symbol *SymbolTable::find(StringRef name) const {
804   return symMap.lookup(CachedHashStringRef(name));
805 }
806 
807 Symbol *SymbolTable::findUnderscore(StringRef name) const {
808   if (ctx.config.machine == I386)
809     return find(("_" + name).str());
810   return find(name);
811 }
812 
813 // Return all symbols that start with Prefix, possibly ignoring the first
814 // character of Prefix or the first character symbol.
815 std::vector<Symbol *> SymbolTable::getSymsWithPrefix(StringRef prefix) {
816   std::vector<Symbol *> syms;
817   for (auto pair : symMap) {
818     StringRef name = pair.first.val();
819     if (name.starts_with(prefix) || name.starts_with(prefix.drop_front()) ||
820         name.drop_front().starts_with(prefix) ||
821         name.drop_front().starts_with(prefix.drop_front())) {
822       syms.push_back(pair.second);
823     }
824   }
825   return syms;
826 }
827 
828 Symbol *SymbolTable::findMangle(StringRef name) {
829   if (Symbol *sym = find(name)) {
830     if (auto *u = dyn_cast<Undefined>(sym)) {
831       // We're specifically looking for weak aliases that ultimately resolve to
832       // defined symbols, hence the call to getWeakAlias() instead of just using
833       // the weakAlias member variable. This matches link.exe's behavior.
834       if (Symbol *weakAlias = u->getWeakAlias())
835         return weakAlias;
836     } else {
837       return sym;
838     }
839   }
840 
841   // Efficient fuzzy string lookup is impossible with a hash table, so iterate
842   // the symbol table once and collect all possibly matching symbols into this
843   // vector. Then compare each possibly matching symbol with each possible
844   // mangling.
845   std::vector<Symbol *> syms = getSymsWithPrefix(name);
846   auto findByPrefix = [&syms](const Twine &t) -> Symbol * {
847     std::string prefix = t.str();
848     for (auto *s : syms)
849       if (s->getName().starts_with(prefix))
850         return s;
851     return nullptr;
852   };
853 
854   // For non-x86, just look for C++ functions.
855   if (ctx.config.machine != I386)
856     return findByPrefix("?" + name + "@@Y");
857 
858   if (!name.starts_with("_"))
859     return nullptr;
860   // Search for x86 stdcall function.
861   if (Symbol *s = findByPrefix(name + "@"))
862     return s;
863   // Search for x86 fastcall function.
864   if (Symbol *s = findByPrefix("@" + name.substr(1) + "@"))
865     return s;
866   // Search for x86 vectorcall function.
867   if (Symbol *s = findByPrefix(name.substr(1) + "@@"))
868     return s;
869   // Search for x86 C++ non-member function.
870   return findByPrefix("?" + name.substr(1) + "@@Y");
871 }
872 
873 Symbol *SymbolTable::addUndefined(StringRef name) {
874   return addUndefined(name, nullptr, false);
875 }
876 
877 void SymbolTable::compileBitcodeFiles() {
878   if (ctx.bitcodeFileInstances.empty())
879     return;
880 
881   ScopedTimer t(ctx.ltoTimer);
882   lto.reset(new BitcodeCompiler(ctx));
883   for (BitcodeFile *f : ctx.bitcodeFileInstances)
884     lto->add(*f);
885   for (InputFile *newObj : lto->compile()) {
886     ObjFile *obj = cast<ObjFile>(newObj);
887     obj->parse();
888     ctx.objFileInstances.push_back(obj);
889   }
890 }
891 
892 } // namespace lld::coff
893