xref: /freebsd/contrib/llvm-project/lld/COFF/SymbolTable.cpp (revision b23dbabb7f3edb3f323a64f03e37be2c9a8b2a45)
1 //===- SymbolTable.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SymbolTable.h"
10 #include "COFFLinkerContext.h"
11 #include "Config.h"
12 #include "Driver.h"
13 #include "LTO.h"
14 #include "PDB.h"
15 #include "Symbols.h"
16 #include "lld/Common/ErrorHandler.h"
17 #include "lld/Common/Memory.h"
18 #include "lld/Common/Timer.h"
19 #include "llvm/DebugInfo/DIContext.h"
20 #include "llvm/IR/LLVMContext.h"
21 #include "llvm/LTO/LTO.h"
22 #include "llvm/Object/WindowsMachineFlag.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include <utility>
26 
27 using namespace llvm;
28 
29 namespace lld::coff {
30 
31 StringRef ltrim1(StringRef s, const char *chars) {
32   if (!s.empty() && strchr(chars, s[0]))
33     return s.substr(1);
34   return s;
35 }
36 
37 void SymbolTable::addFile(InputFile *file) {
38   log("Reading " + toString(file));
39   if (file->lazy) {
40     if (auto *f = dyn_cast<BitcodeFile>(file))
41       f->parseLazy();
42     else
43       cast<ObjFile>(file)->parseLazy();
44   } else {
45     file->parse();
46     if (auto *f = dyn_cast<ObjFile>(file)) {
47       ctx.objFileInstances.push_back(f);
48     } else if (auto *f = dyn_cast<BitcodeFile>(file)) {
49       ctx.bitcodeFileInstances.push_back(f);
50     } else if (auto *f = dyn_cast<ImportFile>(file)) {
51       ctx.importFileInstances.push_back(f);
52     }
53   }
54 
55   MachineTypes mt = file->getMachineType();
56   if (ctx.config.machine == IMAGE_FILE_MACHINE_UNKNOWN) {
57     ctx.config.machine = mt;
58     ctx.driver.addWinSysRootLibSearchPaths();
59   } else if (mt != IMAGE_FILE_MACHINE_UNKNOWN && ctx.config.machine != mt) {
60     error(toString(file) + ": machine type " + machineToStr(mt) +
61           " conflicts with " + machineToStr(ctx.config.machine));
62     return;
63   }
64 
65   ctx.driver.parseDirectives(file);
66 }
67 
68 static void errorOrWarn(const Twine &s, bool forceUnresolved) {
69   if (forceUnresolved)
70     warn(s);
71   else
72     error(s);
73 }
74 
75 // Causes the file associated with a lazy symbol to be linked in.
76 static void forceLazy(Symbol *s) {
77   s->pendingArchiveLoad = true;
78   switch (s->kind()) {
79   case Symbol::Kind::LazyArchiveKind: {
80     auto *l = cast<LazyArchive>(s);
81     l->file->addMember(l->sym);
82     break;
83   }
84   case Symbol::Kind::LazyObjectKind: {
85     InputFile *file = cast<LazyObject>(s)->file;
86     file->ctx.symtab.addFile(file);
87     break;
88   }
89   case Symbol::Kind::LazyDLLSymbolKind: {
90     auto *l = cast<LazyDLLSymbol>(s);
91     l->file->makeImport(l->sym);
92     break;
93   }
94   default:
95     llvm_unreachable(
96         "symbol passed to forceLazy is not a LazyArchive or LazyObject");
97   }
98 }
99 
100 // Returns the symbol in SC whose value is <= Addr that is closest to Addr.
101 // This is generally the global variable or function whose definition contains
102 // Addr.
103 static Symbol *getSymbol(SectionChunk *sc, uint32_t addr) {
104   DefinedRegular *candidate = nullptr;
105 
106   for (Symbol *s : sc->file->getSymbols()) {
107     auto *d = dyn_cast_or_null<DefinedRegular>(s);
108     if (!d || !d->data || d->file != sc->file || d->getChunk() != sc ||
109         d->getValue() > addr ||
110         (candidate && d->getValue() < candidate->getValue()))
111       continue;
112 
113     candidate = d;
114   }
115 
116   return candidate;
117 }
118 
119 static std::vector<std::string> getSymbolLocations(BitcodeFile *file) {
120   std::string res("\n>>> referenced by ");
121   StringRef source = file->obj->getSourceFileName();
122   if (!source.empty())
123     res += source.str() + "\n>>>               ";
124   res += toString(file);
125   return {res};
126 }
127 
128 static std::optional<std::pair<StringRef, uint32_t>>
129 getFileLineDwarf(const SectionChunk *c, uint32_t addr) {
130   std::optional<DILineInfo> optionalLineInfo =
131       c->file->getDILineInfo(addr, c->getSectionNumber() - 1);
132   if (!optionalLineInfo)
133     return std::nullopt;
134   const DILineInfo &lineInfo = *optionalLineInfo;
135   if (lineInfo.FileName == DILineInfo::BadString)
136     return std::nullopt;
137   return std::make_pair(saver().save(lineInfo.FileName), lineInfo.Line);
138 }
139 
140 static std::optional<std::pair<StringRef, uint32_t>>
141 getFileLine(const SectionChunk *c, uint32_t addr) {
142   // MinGW can optionally use codeview, even if the default is dwarf.
143   std::optional<std::pair<StringRef, uint32_t>> fileLine =
144       getFileLineCodeView(c, addr);
145   // If codeview didn't yield any result, check dwarf in MinGW mode.
146   if (!fileLine && c->file->ctx.config.mingw)
147     fileLine = getFileLineDwarf(c, addr);
148   return fileLine;
149 }
150 
151 // Given a file and the index of a symbol in that file, returns a description
152 // of all references to that symbol from that file. If no debug information is
153 // available, returns just the name of the file, else one string per actual
154 // reference as described in the debug info.
155 // Returns up to maxStrings string descriptions, along with the total number of
156 // locations found.
157 static std::pair<std::vector<std::string>, size_t>
158 getSymbolLocations(ObjFile *file, uint32_t symIndex, size_t maxStrings) {
159   struct Location {
160     Symbol *sym;
161     std::pair<StringRef, uint32_t> fileLine;
162   };
163   std::vector<Location> locations;
164   size_t numLocations = 0;
165 
166   for (Chunk *c : file->getChunks()) {
167     auto *sc = dyn_cast<SectionChunk>(c);
168     if (!sc)
169       continue;
170     for (const coff_relocation &r : sc->getRelocs()) {
171       if (r.SymbolTableIndex != symIndex)
172         continue;
173       numLocations++;
174       if (locations.size() >= maxStrings)
175         continue;
176 
177       std::optional<std::pair<StringRef, uint32_t>> fileLine =
178           getFileLine(sc, r.VirtualAddress);
179       Symbol *sym = getSymbol(sc, r.VirtualAddress);
180       if (fileLine)
181         locations.push_back({sym, *fileLine});
182       else if (sym)
183         locations.push_back({sym, {"", 0}});
184     }
185   }
186 
187   if (maxStrings == 0)
188     return std::make_pair(std::vector<std::string>(), numLocations);
189 
190   if (numLocations == 0)
191     return std::make_pair(
192         std::vector<std::string>{"\n>>> referenced by " + toString(file)}, 1);
193 
194   std::vector<std::string> symbolLocations(locations.size());
195   size_t i = 0;
196   for (Location loc : locations) {
197     llvm::raw_string_ostream os(symbolLocations[i++]);
198     os << "\n>>> referenced by ";
199     if (!loc.fileLine.first.empty())
200       os << loc.fileLine.first << ":" << loc.fileLine.second
201          << "\n>>>               ";
202     os << toString(file);
203     if (loc.sym)
204       os << ":(" << toString(file->ctx, *loc.sym) << ')';
205   }
206   return std::make_pair(symbolLocations, numLocations);
207 }
208 
209 std::vector<std::string> getSymbolLocations(ObjFile *file, uint32_t symIndex) {
210   return getSymbolLocations(file, symIndex, SIZE_MAX).first;
211 }
212 
213 static std::pair<std::vector<std::string>, size_t>
214 getSymbolLocations(InputFile *file, uint32_t symIndex, size_t maxStrings) {
215   if (auto *o = dyn_cast<ObjFile>(file))
216     return getSymbolLocations(o, symIndex, maxStrings);
217   if (auto *b = dyn_cast<BitcodeFile>(file)) {
218     std::vector<std::string> symbolLocations = getSymbolLocations(b);
219     size_t numLocations = symbolLocations.size();
220     if (symbolLocations.size() > maxStrings)
221       symbolLocations.resize(maxStrings);
222     return std::make_pair(symbolLocations, numLocations);
223   }
224   llvm_unreachable("unsupported file type passed to getSymbolLocations");
225   return std::make_pair(std::vector<std::string>(), (size_t)0);
226 }
227 
228 // For an undefined symbol, stores all files referencing it and the index of
229 // the undefined symbol in each file.
230 struct UndefinedDiag {
231   Symbol *sym;
232   struct File {
233     InputFile *file;
234     uint32_t symIndex;
235   };
236   std::vector<File> files;
237 };
238 
239 static void reportUndefinedSymbol(const COFFLinkerContext &ctx,
240                                   const UndefinedDiag &undefDiag) {
241   std::string out;
242   llvm::raw_string_ostream os(out);
243   os << "undefined symbol: " << toString(ctx, *undefDiag.sym);
244 
245   const size_t maxUndefReferences = 3;
246   size_t numDisplayedRefs = 0, numRefs = 0;
247   for (const UndefinedDiag::File &ref : undefDiag.files) {
248     auto [symbolLocations, totalLocations] = getSymbolLocations(
249         ref.file, ref.symIndex, maxUndefReferences - numDisplayedRefs);
250 
251     numRefs += totalLocations;
252     numDisplayedRefs += symbolLocations.size();
253     for (const std::string &s : symbolLocations) {
254       os << s;
255     }
256   }
257   if (numDisplayedRefs < numRefs)
258     os << "\n>>> referenced " << numRefs - numDisplayedRefs << " more times";
259   errorOrWarn(os.str(), ctx.config.forceUnresolved);
260 }
261 
262 void SymbolTable::loadMinGWSymbols() {
263   for (auto &i : symMap) {
264     Symbol *sym = i.second;
265     auto *undef = dyn_cast<Undefined>(sym);
266     if (!undef)
267       continue;
268     if (undef->getWeakAlias())
269       continue;
270 
271     StringRef name = undef->getName();
272 
273     if (ctx.config.machine == I386 && ctx.config.stdcallFixup) {
274       // Check if we can resolve an undefined decorated symbol by finding
275       // the intended target as an undecorated symbol (only with a leading
276       // underscore).
277       StringRef origName = name;
278       StringRef baseName = name;
279       // Trim down stdcall/fastcall/vectorcall symbols to the base name.
280       baseName = ltrim1(baseName, "_@");
281       baseName = baseName.substr(0, baseName.find('@'));
282       // Add a leading underscore, as it would be in cdecl form.
283       std::string newName = ("_" + baseName).str();
284       Symbol *l;
285       if (newName != origName && (l = find(newName)) != nullptr) {
286         // If we found a symbol and it is lazy; load it.
287         if (l->isLazy() && !l->pendingArchiveLoad) {
288           log("Loading lazy " + l->getName() + " from " +
289               l->getFile()->getName() + " for stdcall fixup");
290           forceLazy(l);
291         }
292         // If it's lazy or already defined, hook it up as weak alias.
293         if (l->isLazy() || isa<Defined>(l)) {
294           if (ctx.config.warnStdcallFixup)
295             warn("Resolving " + origName + " by linking to " + newName);
296           else
297             log("Resolving " + origName + " by linking to " + newName);
298           undef->weakAlias = l;
299           continue;
300         }
301       }
302     }
303 
304     if (ctx.config.autoImport) {
305       if (name.startswith("__imp_"))
306         continue;
307       // If we have an undefined symbol, but we have a lazy symbol we could
308       // load, load it.
309       Symbol *l = find(("__imp_" + name).str());
310       if (!l || l->pendingArchiveLoad || !l->isLazy())
311         continue;
312 
313       log("Loading lazy " + l->getName() + " from " + l->getFile()->getName() +
314           " for automatic import");
315       forceLazy(l);
316     }
317   }
318 }
319 
320 Defined *SymbolTable::impSymbol(StringRef name) {
321   if (name.startswith("__imp_"))
322     return nullptr;
323   return dyn_cast_or_null<Defined>(find(("__imp_" + name).str()));
324 }
325 
326 bool SymbolTable::handleMinGWAutomaticImport(Symbol *sym, StringRef name) {
327   Defined *imp = impSymbol(name);
328   if (!imp)
329     return false;
330 
331   // Replace the reference directly to a variable with a reference
332   // to the import address table instead. This obviously isn't right,
333   // but we mark the symbol as isRuntimePseudoReloc, and a later pass
334   // will add runtime pseudo relocations for every relocation against
335   // this Symbol. The runtime pseudo relocation framework expects the
336   // reference itself to point at the IAT entry.
337   size_t impSize = 0;
338   if (isa<DefinedImportData>(imp)) {
339     log("Automatically importing " + name + " from " +
340         cast<DefinedImportData>(imp)->getDLLName());
341     impSize = sizeof(DefinedImportData);
342   } else if (isa<DefinedRegular>(imp)) {
343     log("Automatically importing " + name + " from " +
344         toString(cast<DefinedRegular>(imp)->file));
345     impSize = sizeof(DefinedRegular);
346   } else {
347     warn("unable to automatically import " + name + " from " + imp->getName() +
348          " from " + toString(cast<DefinedRegular>(imp)->file) +
349          "; unexpected symbol type");
350     return false;
351   }
352   sym->replaceKeepingName(imp, impSize);
353   sym->isRuntimePseudoReloc = true;
354 
355   // There may exist symbols named .refptr.<name> which only consist
356   // of a single pointer to <name>. If it turns out <name> is
357   // automatically imported, we don't need to keep the .refptr.<name>
358   // pointer at all, but redirect all accesses to it to the IAT entry
359   // for __imp_<name> instead, and drop the whole .refptr.<name> chunk.
360   DefinedRegular *refptr =
361       dyn_cast_or_null<DefinedRegular>(find((".refptr." + name).str()));
362   if (refptr && refptr->getChunk()->getSize() == ctx.config.wordsize) {
363     SectionChunk *sc = dyn_cast_or_null<SectionChunk>(refptr->getChunk());
364     if (sc && sc->getRelocs().size() == 1 && *sc->symbols().begin() == sym) {
365       log("Replacing .refptr." + name + " with " + imp->getName());
366       refptr->getChunk()->live = false;
367       refptr->replaceKeepingName(imp, impSize);
368     }
369   }
370   return true;
371 }
372 
373 /// Helper function for reportUnresolvable and resolveRemainingUndefines.
374 /// This function emits an "undefined symbol" diagnostic for each symbol in
375 /// undefs. If localImports is not nullptr, it also emits a "locally
376 /// defined symbol imported" diagnostic for symbols in localImports.
377 /// objFiles and bitcodeFiles (if not nullptr) are used to report where
378 /// undefined symbols are referenced.
379 static void reportProblemSymbols(
380     const COFFLinkerContext &ctx, const SmallPtrSetImpl<Symbol *> &undefs,
381     const DenseMap<Symbol *, Symbol *> *localImports, bool needBitcodeFiles) {
382   // Return early if there is nothing to report (which should be
383   // the common case).
384   if (undefs.empty() && (!localImports || localImports->empty()))
385     return;
386 
387   for (Symbol *b : ctx.config.gcroot) {
388     if (undefs.count(b))
389       errorOrWarn("<root>: undefined symbol: " + toString(ctx, *b),
390                   ctx.config.forceUnresolved);
391     if (localImports)
392       if (Symbol *imp = localImports->lookup(b))
393         warn("<root>: locally defined symbol imported: " + toString(ctx, *imp) +
394              " (defined in " + toString(imp->getFile()) + ") [LNK4217]");
395   }
396 
397   std::vector<UndefinedDiag> undefDiags;
398   DenseMap<Symbol *, int> firstDiag;
399 
400   auto processFile = [&](InputFile *file, ArrayRef<Symbol *> symbols) {
401     uint32_t symIndex = (uint32_t)-1;
402     for (Symbol *sym : symbols) {
403       ++symIndex;
404       if (!sym)
405         continue;
406       if (undefs.count(sym)) {
407         auto it = firstDiag.find(sym);
408         if (it == firstDiag.end()) {
409           firstDiag[sym] = undefDiags.size();
410           undefDiags.push_back({sym, {{file, symIndex}}});
411         } else {
412           undefDiags[it->second].files.push_back({file, symIndex});
413         }
414       }
415       if (localImports)
416         if (Symbol *imp = localImports->lookup(sym))
417           warn(toString(file) +
418                ": locally defined symbol imported: " + toString(ctx, *imp) +
419                " (defined in " + toString(imp->getFile()) + ") [LNK4217]");
420     }
421   };
422 
423   for (ObjFile *file : ctx.objFileInstances)
424     processFile(file, file->getSymbols());
425 
426   if (needBitcodeFiles)
427     for (BitcodeFile *file : ctx.bitcodeFileInstances)
428       processFile(file, file->getSymbols());
429 
430   for (const UndefinedDiag &undefDiag : undefDiags)
431     reportUndefinedSymbol(ctx, undefDiag);
432 }
433 
434 void SymbolTable::reportUnresolvable() {
435   SmallPtrSet<Symbol *, 8> undefs;
436   for (auto &i : symMap) {
437     Symbol *sym = i.second;
438     auto *undef = dyn_cast<Undefined>(sym);
439     if (!undef || sym->deferUndefined)
440       continue;
441     if (undef->getWeakAlias())
442       continue;
443     StringRef name = undef->getName();
444     if (name.startswith("__imp_")) {
445       Symbol *imp = find(name.substr(strlen("__imp_")));
446       if (imp && isa<Defined>(imp))
447         continue;
448     }
449     if (name.contains("_PchSym_"))
450       continue;
451     if (ctx.config.autoImport && impSymbol(name))
452       continue;
453     undefs.insert(sym);
454   }
455 
456   reportProblemSymbols(ctx, undefs,
457                        /* localImports */ nullptr, true);
458 }
459 
460 void SymbolTable::resolveRemainingUndefines() {
461   SmallPtrSet<Symbol *, 8> undefs;
462   DenseMap<Symbol *, Symbol *> localImports;
463 
464   for (auto &i : symMap) {
465     Symbol *sym = i.second;
466     auto *undef = dyn_cast<Undefined>(sym);
467     if (!undef)
468       continue;
469     if (!sym->isUsedInRegularObj)
470       continue;
471 
472     StringRef name = undef->getName();
473 
474     // A weak alias may have been resolved, so check for that.
475     if (Defined *d = undef->getWeakAlias()) {
476       // We want to replace Sym with D. However, we can't just blindly
477       // copy sizeof(SymbolUnion) bytes from D to Sym because D may be an
478       // internal symbol, and internal symbols are stored as "unparented"
479       // Symbols. For that reason we need to check which type of symbol we
480       // are dealing with and copy the correct number of bytes.
481       if (isa<DefinedRegular>(d))
482         memcpy(sym, d, sizeof(DefinedRegular));
483       else if (isa<DefinedAbsolute>(d))
484         memcpy(sym, d, sizeof(DefinedAbsolute));
485       else
486         memcpy(sym, d, sizeof(SymbolUnion));
487       continue;
488     }
489 
490     // If we can resolve a symbol by removing __imp_ prefix, do that.
491     // This odd rule is for compatibility with MSVC linker.
492     if (name.startswith("__imp_")) {
493       Symbol *imp = find(name.substr(strlen("__imp_")));
494       if (imp && isa<Defined>(imp)) {
495         auto *d = cast<Defined>(imp);
496         replaceSymbol<DefinedLocalImport>(sym, ctx, name, d);
497         localImportChunks.push_back(cast<DefinedLocalImport>(sym)->getChunk());
498         localImports[sym] = d;
499         continue;
500       }
501     }
502 
503     // We don't want to report missing Microsoft precompiled headers symbols.
504     // A proper message will be emitted instead in PDBLinker::aquirePrecompObj
505     if (name.contains("_PchSym_"))
506       continue;
507 
508     if (ctx.config.autoImport && handleMinGWAutomaticImport(sym, name))
509       continue;
510 
511     // Remaining undefined symbols are not fatal if /force is specified.
512     // They are replaced with dummy defined symbols.
513     if (ctx.config.forceUnresolved)
514       replaceSymbol<DefinedAbsolute>(sym, ctx, name, 0);
515     undefs.insert(sym);
516   }
517 
518   reportProblemSymbols(
519       ctx, undefs,
520       ctx.config.warnLocallyDefinedImported ? &localImports : nullptr, false);
521 }
522 
523 std::pair<Symbol *, bool> SymbolTable::insert(StringRef name) {
524   bool inserted = false;
525   Symbol *&sym = symMap[CachedHashStringRef(name)];
526   if (!sym) {
527     sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
528     sym->isUsedInRegularObj = false;
529     sym->pendingArchiveLoad = false;
530     sym->canInline = true;
531     inserted = true;
532   }
533   return {sym, inserted};
534 }
535 
536 std::pair<Symbol *, bool> SymbolTable::insert(StringRef name, InputFile *file) {
537   std::pair<Symbol *, bool> result = insert(name);
538   if (!file || !isa<BitcodeFile>(file))
539     result.first->isUsedInRegularObj = true;
540   return result;
541 }
542 
543 Symbol *SymbolTable::addUndefined(StringRef name, InputFile *f,
544                                   bool isWeakAlias) {
545   auto [s, wasInserted] = insert(name, f);
546   if (wasInserted || (s->isLazy() && isWeakAlias)) {
547     replaceSymbol<Undefined>(s, name);
548     return s;
549   }
550   if (s->isLazy())
551     forceLazy(s);
552   return s;
553 }
554 
555 void SymbolTable::addLazyArchive(ArchiveFile *f, const Archive::Symbol &sym) {
556   StringRef name = sym.getName();
557   auto [s, wasInserted] = insert(name);
558   if (wasInserted) {
559     replaceSymbol<LazyArchive>(s, f, sym);
560     return;
561   }
562   auto *u = dyn_cast<Undefined>(s);
563   if (!u || u->weakAlias || s->pendingArchiveLoad)
564     return;
565   s->pendingArchiveLoad = true;
566   f->addMember(sym);
567 }
568 
569 void SymbolTable::addLazyObject(InputFile *f, StringRef n) {
570   assert(f->lazy);
571   auto [s, wasInserted] = insert(n, f);
572   if (wasInserted) {
573     replaceSymbol<LazyObject>(s, f, n);
574     return;
575   }
576   auto *u = dyn_cast<Undefined>(s);
577   if (!u || u->weakAlias || s->pendingArchiveLoad)
578     return;
579   s->pendingArchiveLoad = true;
580   f->lazy = false;
581   addFile(f);
582 }
583 
584 void SymbolTable::addLazyDLLSymbol(DLLFile *f, DLLFile::Symbol *sym,
585                                    StringRef n) {
586   auto [s, wasInserted] = insert(n);
587   if (wasInserted) {
588     replaceSymbol<LazyDLLSymbol>(s, f, sym, n);
589     return;
590   }
591   auto *u = dyn_cast<Undefined>(s);
592   if (!u || u->weakAlias || s->pendingArchiveLoad)
593     return;
594   s->pendingArchiveLoad = true;
595   f->makeImport(sym);
596 }
597 
598 static std::string getSourceLocationBitcode(BitcodeFile *file) {
599   std::string res("\n>>> defined at ");
600   StringRef source = file->obj->getSourceFileName();
601   if (!source.empty())
602     res += source.str() + "\n>>>            ";
603   res += toString(file);
604   return res;
605 }
606 
607 static std::string getSourceLocationObj(ObjFile *file, SectionChunk *sc,
608                                         uint32_t offset, StringRef name) {
609   std::optional<std::pair<StringRef, uint32_t>> fileLine;
610   if (sc)
611     fileLine = getFileLine(sc, offset);
612   if (!fileLine)
613     fileLine = file->getVariableLocation(name);
614 
615   std::string res;
616   llvm::raw_string_ostream os(res);
617   os << "\n>>> defined at ";
618   if (fileLine)
619     os << fileLine->first << ":" << fileLine->second << "\n>>>            ";
620   os << toString(file);
621   return os.str();
622 }
623 
624 static std::string getSourceLocation(InputFile *file, SectionChunk *sc,
625                                      uint32_t offset, StringRef name) {
626   if (!file)
627     return "";
628   if (auto *o = dyn_cast<ObjFile>(file))
629     return getSourceLocationObj(o, sc, offset, name);
630   if (auto *b = dyn_cast<BitcodeFile>(file))
631     return getSourceLocationBitcode(b);
632   return "\n>>> defined at " + toString(file);
633 }
634 
635 // Construct and print an error message in the form of:
636 //
637 //   lld-link: error: duplicate symbol: foo
638 //   >>> defined at bar.c:30
639 //   >>>            bar.o
640 //   >>> defined at baz.c:563
641 //   >>>            baz.o
642 void SymbolTable::reportDuplicate(Symbol *existing, InputFile *newFile,
643                                   SectionChunk *newSc,
644                                   uint32_t newSectionOffset) {
645   std::string msg;
646   llvm::raw_string_ostream os(msg);
647   os << "duplicate symbol: " << toString(ctx, *existing);
648 
649   DefinedRegular *d = dyn_cast<DefinedRegular>(existing);
650   if (d && isa<ObjFile>(d->getFile())) {
651     os << getSourceLocation(d->getFile(), d->getChunk(), d->getValue(),
652                             existing->getName());
653   } else {
654     os << getSourceLocation(existing->getFile(), nullptr, 0, "");
655   }
656   os << getSourceLocation(newFile, newSc, newSectionOffset,
657                           existing->getName());
658 
659   if (ctx.config.forceMultiple)
660     warn(os.str());
661   else
662     error(os.str());
663 }
664 
665 Symbol *SymbolTable::addAbsolute(StringRef n, COFFSymbolRef sym) {
666   auto [s, wasInserted] = insert(n, nullptr);
667   s->isUsedInRegularObj = true;
668   if (wasInserted || isa<Undefined>(s) || s->isLazy())
669     replaceSymbol<DefinedAbsolute>(s, ctx, n, sym);
670   else if (auto *da = dyn_cast<DefinedAbsolute>(s)) {
671     if (da->getVA() != sym.getValue())
672       reportDuplicate(s, nullptr);
673   } else if (!isa<DefinedCOFF>(s))
674     reportDuplicate(s, nullptr);
675   return s;
676 }
677 
678 Symbol *SymbolTable::addAbsolute(StringRef n, uint64_t va) {
679   auto [s, wasInserted] = insert(n, nullptr);
680   s->isUsedInRegularObj = true;
681   if (wasInserted || isa<Undefined>(s) || s->isLazy())
682     replaceSymbol<DefinedAbsolute>(s, ctx, n, va);
683   else if (auto *da = dyn_cast<DefinedAbsolute>(s)) {
684     if (da->getVA() != va)
685       reportDuplicate(s, nullptr);
686   } else if (!isa<DefinedCOFF>(s))
687     reportDuplicate(s, nullptr);
688   return s;
689 }
690 
691 Symbol *SymbolTable::addSynthetic(StringRef n, Chunk *c) {
692   auto [s, wasInserted] = insert(n, nullptr);
693   s->isUsedInRegularObj = true;
694   if (wasInserted || isa<Undefined>(s) || s->isLazy())
695     replaceSymbol<DefinedSynthetic>(s, n, c);
696   else if (!isa<DefinedCOFF>(s))
697     reportDuplicate(s, nullptr);
698   return s;
699 }
700 
701 Symbol *SymbolTable::addRegular(InputFile *f, StringRef n,
702                                 const coff_symbol_generic *sym, SectionChunk *c,
703                                 uint32_t sectionOffset, bool isWeak) {
704   auto [s, wasInserted] = insert(n, f);
705   if (wasInserted || !isa<DefinedRegular>(s) || s->isWeak)
706     replaceSymbol<DefinedRegular>(s, f, n, /*IsCOMDAT*/ false,
707                                   /*IsExternal*/ true, sym, c, isWeak);
708   else if (!isWeak)
709     reportDuplicate(s, f, c, sectionOffset);
710   return s;
711 }
712 
713 std::pair<DefinedRegular *, bool>
714 SymbolTable::addComdat(InputFile *f, StringRef n,
715                        const coff_symbol_generic *sym) {
716   auto [s, wasInserted] = insert(n, f);
717   if (wasInserted || !isa<DefinedRegular>(s)) {
718     replaceSymbol<DefinedRegular>(s, f, n, /*IsCOMDAT*/ true,
719                                   /*IsExternal*/ true, sym, nullptr);
720     return {cast<DefinedRegular>(s), true};
721   }
722   auto *existingSymbol = cast<DefinedRegular>(s);
723   if (!existingSymbol->isCOMDAT)
724     reportDuplicate(s, f);
725   return {existingSymbol, false};
726 }
727 
728 Symbol *SymbolTable::addCommon(InputFile *f, StringRef n, uint64_t size,
729                                const coff_symbol_generic *sym, CommonChunk *c) {
730   auto [s, wasInserted] = insert(n, f);
731   if (wasInserted || !isa<DefinedCOFF>(s))
732     replaceSymbol<DefinedCommon>(s, f, n, size, sym, c);
733   else if (auto *dc = dyn_cast<DefinedCommon>(s))
734     if (size > dc->getSize())
735       replaceSymbol<DefinedCommon>(s, f, n, size, sym, c);
736   return s;
737 }
738 
739 Symbol *SymbolTable::addImportData(StringRef n, ImportFile *f) {
740   auto [s, wasInserted] = insert(n, nullptr);
741   s->isUsedInRegularObj = true;
742   if (wasInserted || isa<Undefined>(s) || s->isLazy()) {
743     replaceSymbol<DefinedImportData>(s, n, f);
744     return s;
745   }
746 
747   reportDuplicate(s, f);
748   return nullptr;
749 }
750 
751 Symbol *SymbolTable::addImportThunk(StringRef name, DefinedImportData *id,
752                                     uint16_t machine) {
753   auto [s, wasInserted] = insert(name, nullptr);
754   s->isUsedInRegularObj = true;
755   if (wasInserted || isa<Undefined>(s) || s->isLazy()) {
756     replaceSymbol<DefinedImportThunk>(s, ctx, name, id, machine);
757     return s;
758   }
759 
760   reportDuplicate(s, id->file);
761   return nullptr;
762 }
763 
764 void SymbolTable::addLibcall(StringRef name) {
765   Symbol *sym = findUnderscore(name);
766   if (!sym)
767     return;
768 
769   if (auto *l = dyn_cast<LazyArchive>(sym)) {
770     MemoryBufferRef mb = l->getMemberBuffer();
771     if (isBitcode(mb))
772       addUndefined(sym->getName());
773   } else if (LazyObject *o = dyn_cast<LazyObject>(sym)) {
774     if (isBitcode(o->file->mb))
775       addUndefined(sym->getName());
776   }
777 }
778 
779 std::vector<Chunk *> SymbolTable::getChunks() const {
780   std::vector<Chunk *> res;
781   for (ObjFile *file : ctx.objFileInstances) {
782     ArrayRef<Chunk *> v = file->getChunks();
783     res.insert(res.end(), v.begin(), v.end());
784   }
785   return res;
786 }
787 
788 Symbol *SymbolTable::find(StringRef name) const {
789   return symMap.lookup(CachedHashStringRef(name));
790 }
791 
792 Symbol *SymbolTable::findUnderscore(StringRef name) const {
793   if (ctx.config.machine == I386)
794     return find(("_" + name).str());
795   return find(name);
796 }
797 
798 // Return all symbols that start with Prefix, possibly ignoring the first
799 // character of Prefix or the first character symbol.
800 std::vector<Symbol *> SymbolTable::getSymsWithPrefix(StringRef prefix) {
801   std::vector<Symbol *> syms;
802   for (auto pair : symMap) {
803     StringRef name = pair.first.val();
804     if (name.startswith(prefix) || name.startswith(prefix.drop_front()) ||
805         name.drop_front().startswith(prefix) ||
806         name.drop_front().startswith(prefix.drop_front())) {
807       syms.push_back(pair.second);
808     }
809   }
810   return syms;
811 }
812 
813 Symbol *SymbolTable::findMangle(StringRef name) {
814   if (Symbol *sym = find(name)) {
815     if (auto *u = dyn_cast<Undefined>(sym)) {
816       // We're specifically looking for weak aliases that ultimately resolve to
817       // defined symbols, hence the call to getWeakAlias() instead of just using
818       // the weakAlias member variable. This matches link.exe's behavior.
819       if (Symbol *weakAlias = u->getWeakAlias())
820         return weakAlias;
821     } else {
822       return sym;
823     }
824   }
825 
826   // Efficient fuzzy string lookup is impossible with a hash table, so iterate
827   // the symbol table once and collect all possibly matching symbols into this
828   // vector. Then compare each possibly matching symbol with each possible
829   // mangling.
830   std::vector<Symbol *> syms = getSymsWithPrefix(name);
831   auto findByPrefix = [&syms](const Twine &t) -> Symbol * {
832     std::string prefix = t.str();
833     for (auto *s : syms)
834       if (s->getName().startswith(prefix))
835         return s;
836     return nullptr;
837   };
838 
839   // For non-x86, just look for C++ functions.
840   if (ctx.config.machine != I386)
841     return findByPrefix("?" + name + "@@Y");
842 
843   if (!name.startswith("_"))
844     return nullptr;
845   // Search for x86 stdcall function.
846   if (Symbol *s = findByPrefix(name + "@"))
847     return s;
848   // Search for x86 fastcall function.
849   if (Symbol *s = findByPrefix("@" + name.substr(1) + "@"))
850     return s;
851   // Search for x86 vectorcall function.
852   if (Symbol *s = findByPrefix(name.substr(1) + "@@"))
853     return s;
854   // Search for x86 C++ non-member function.
855   return findByPrefix("?" + name.substr(1) + "@@Y");
856 }
857 
858 Symbol *SymbolTable::addUndefined(StringRef name) {
859   return addUndefined(name, nullptr, false);
860 }
861 
862 void SymbolTable::compileBitcodeFiles() {
863   if (ctx.bitcodeFileInstances.empty())
864     return;
865 
866   ScopedTimer t(ctx.ltoTimer);
867   lto.reset(new BitcodeCompiler(ctx));
868   for (BitcodeFile *f : ctx.bitcodeFileInstances)
869     lto->add(*f);
870   for (InputFile *newObj : lto->compile()) {
871     ObjFile *obj = cast<ObjFile>(newObj);
872     obj->parse();
873     ctx.objFileInstances.push_back(obj);
874   }
875 }
876 
877 } // namespace lld::coff
878