xref: /freebsd/contrib/llvm-project/lld/COFF/SymbolTable.cpp (revision e9e8876a4d6afc1ad5315faaa191b25121a813d7)
1 //===- SymbolTable.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SymbolTable.h"
10 #include "Config.h"
11 #include "Driver.h"
12 #include "LTO.h"
13 #include "PDB.h"
14 #include "Symbols.h"
15 #include "lld/Common/ErrorHandler.h"
16 #include "lld/Common/Memory.h"
17 #include "lld/Common/Timer.h"
18 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
19 #include "llvm/IR/LLVMContext.h"
20 #include "llvm/LTO/LTO.h"
21 #include "llvm/Object/WindowsMachineFlag.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/raw_ostream.h"
24 #include <utility>
25 
26 using namespace llvm;
27 
28 namespace lld {
29 namespace coff {
30 
31 StringRef ltrim1(StringRef s, const char *chars) {
32   if (!s.empty() && strchr(chars, s[0]))
33     return s.substr(1);
34   return s;
35 }
36 
37 static Timer ltoTimer("LTO", Timer::root());
38 
39 SymbolTable *symtab;
40 
41 void SymbolTable::addFile(InputFile *file) {
42   log("Reading " + toString(file));
43   file->parse();
44 
45   MachineTypes mt = file->getMachineType();
46   if (config->machine == IMAGE_FILE_MACHINE_UNKNOWN) {
47     config->machine = mt;
48   } else if (mt != IMAGE_FILE_MACHINE_UNKNOWN && config->machine != mt) {
49     error(toString(file) + ": machine type " + machineToStr(mt) +
50           " conflicts with " + machineToStr(config->machine));
51     return;
52   }
53 
54   if (auto *f = dyn_cast<ObjFile>(file)) {
55     ObjFile::instances.push_back(f);
56   } else if (auto *f = dyn_cast<BitcodeFile>(file)) {
57     BitcodeFile::instances.push_back(f);
58   } else if (auto *f = dyn_cast<ImportFile>(file)) {
59     ImportFile::instances.push_back(f);
60   }
61 
62   driver->parseDirectives(file);
63 }
64 
65 static void errorOrWarn(const Twine &s) {
66   if (config->forceUnresolved)
67     warn(s);
68   else
69     error(s);
70 }
71 
72 // Causes the file associated with a lazy symbol to be linked in.
73 static void forceLazy(Symbol *s) {
74   s->pendingArchiveLoad = true;
75   switch (s->kind()) {
76   case Symbol::Kind::LazyArchiveKind: {
77     auto *l = cast<LazyArchive>(s);
78     l->file->addMember(l->sym);
79     break;
80   }
81   case Symbol::Kind::LazyObjectKind:
82     cast<LazyObject>(s)->file->fetch();
83     break;
84   case Symbol::Kind::LazyDLLSymbolKind: {
85     auto *l = cast<LazyDLLSymbol>(s);
86     l->file->makeImport(l->sym);
87     break;
88   }
89   default:
90     llvm_unreachable(
91         "symbol passed to forceLazy is not a LazyArchive or LazyObject");
92   }
93 }
94 
95 // Returns the symbol in SC whose value is <= Addr that is closest to Addr.
96 // This is generally the global variable or function whose definition contains
97 // Addr.
98 static Symbol *getSymbol(SectionChunk *sc, uint32_t addr) {
99   DefinedRegular *candidate = nullptr;
100 
101   for (Symbol *s : sc->file->getSymbols()) {
102     auto *d = dyn_cast_or_null<DefinedRegular>(s);
103     if (!d || !d->data || d->file != sc->file || d->getChunk() != sc ||
104         d->getValue() > addr ||
105         (candidate && d->getValue() < candidate->getValue()))
106       continue;
107 
108     candidate = d;
109   }
110 
111   return candidate;
112 }
113 
114 static std::vector<std::string> getSymbolLocations(BitcodeFile *file) {
115   std::string res("\n>>> referenced by ");
116   StringRef source = file->obj->getSourceFileName();
117   if (!source.empty())
118     res += source.str() + "\n>>>               ";
119   res += toString(file);
120   return {res};
121 }
122 
123 static Optional<std::pair<StringRef, uint32_t>>
124 getFileLineDwarf(const SectionChunk *c, uint32_t addr) {
125   Optional<DILineInfo> optionalLineInfo =
126       c->file->getDILineInfo(addr, c->getSectionNumber() - 1);
127   if (!optionalLineInfo)
128     return None;
129   const DILineInfo &lineInfo = *optionalLineInfo;
130   if (lineInfo.FileName == DILineInfo::BadString)
131     return None;
132   return std::make_pair(saver.save(lineInfo.FileName), lineInfo.Line);
133 }
134 
135 static Optional<std::pair<StringRef, uint32_t>>
136 getFileLine(const SectionChunk *c, uint32_t addr) {
137   // MinGW can optionally use codeview, even if the default is dwarf.
138   Optional<std::pair<StringRef, uint32_t>> fileLine =
139       getFileLineCodeView(c, addr);
140   // If codeview didn't yield any result, check dwarf in MinGW mode.
141   if (!fileLine && config->mingw)
142     fileLine = getFileLineDwarf(c, addr);
143   return fileLine;
144 }
145 
146 // Given a file and the index of a symbol in that file, returns a description
147 // of all references to that symbol from that file. If no debug information is
148 // available, returns just the name of the file, else one string per actual
149 // reference as described in the debug info.
150 // Returns up to maxStrings string descriptions, along with the total number of
151 // locations found.
152 static std::pair<std::vector<std::string>, size_t>
153 getSymbolLocations(ObjFile *file, uint32_t symIndex, size_t maxStrings) {
154   struct Location {
155     Symbol *sym;
156     std::pair<StringRef, uint32_t> fileLine;
157   };
158   std::vector<Location> locations;
159   size_t numLocations = 0;
160 
161   for (Chunk *c : file->getChunks()) {
162     auto *sc = dyn_cast<SectionChunk>(c);
163     if (!sc)
164       continue;
165     for (const coff_relocation &r : sc->getRelocs()) {
166       if (r.SymbolTableIndex != symIndex)
167         continue;
168       numLocations++;
169       if (locations.size() >= maxStrings)
170         continue;
171 
172       Optional<std::pair<StringRef, uint32_t>> fileLine =
173           getFileLine(sc, r.VirtualAddress);
174       Symbol *sym = getSymbol(sc, r.VirtualAddress);
175       if (fileLine)
176         locations.push_back({sym, *fileLine});
177       else if (sym)
178         locations.push_back({sym, {"", 0}});
179     }
180   }
181 
182   if (maxStrings == 0)
183     return std::make_pair(std::vector<std::string>(), numLocations);
184 
185   if (numLocations == 0)
186     return std::make_pair(
187         std::vector<std::string>{"\n>>> referenced by " + toString(file)}, 1);
188 
189   std::vector<std::string> symbolLocations(locations.size());
190   size_t i = 0;
191   for (Location loc : locations) {
192     llvm::raw_string_ostream os(symbolLocations[i++]);
193     os << "\n>>> referenced by ";
194     if (!loc.fileLine.first.empty())
195       os << loc.fileLine.first << ":" << loc.fileLine.second
196          << "\n>>>               ";
197     os << toString(file);
198     if (loc.sym)
199       os << ":(" << toString(*loc.sym) << ')';
200   }
201   return std::make_pair(symbolLocations, numLocations);
202 }
203 
204 std::vector<std::string> getSymbolLocations(ObjFile *file, uint32_t symIndex) {
205   return getSymbolLocations(file, symIndex, SIZE_MAX).first;
206 }
207 
208 static std::pair<std::vector<std::string>, size_t>
209 getSymbolLocations(InputFile *file, uint32_t symIndex, size_t maxStrings) {
210   if (auto *o = dyn_cast<ObjFile>(file))
211     return getSymbolLocations(o, symIndex, maxStrings);
212   if (auto *b = dyn_cast<BitcodeFile>(file)) {
213     std::vector<std::string> symbolLocations = getSymbolLocations(b);
214     size_t numLocations = symbolLocations.size();
215     if (symbolLocations.size() > maxStrings)
216       symbolLocations.resize(maxStrings);
217     return std::make_pair(symbolLocations, numLocations);
218   }
219   llvm_unreachable("unsupported file type passed to getSymbolLocations");
220   return std::make_pair(std::vector<std::string>(), (size_t)0);
221 }
222 
223 // For an undefined symbol, stores all files referencing it and the index of
224 // the undefined symbol in each file.
225 struct UndefinedDiag {
226   Symbol *sym;
227   struct File {
228     InputFile *file;
229     uint32_t symIndex;
230   };
231   std::vector<File> files;
232 };
233 
234 static void reportUndefinedSymbol(const UndefinedDiag &undefDiag) {
235   std::string out;
236   llvm::raw_string_ostream os(out);
237   os << "undefined symbol: " << toString(*undefDiag.sym);
238 
239   const size_t maxUndefReferences = 3;
240   size_t numDisplayedRefs = 0, numRefs = 0;
241   for (const UndefinedDiag::File &ref : undefDiag.files) {
242     std::vector<std::string> symbolLocations;
243     size_t totalLocations = 0;
244     std::tie(symbolLocations, totalLocations) = getSymbolLocations(
245         ref.file, ref.symIndex, maxUndefReferences - numDisplayedRefs);
246 
247     numRefs += totalLocations;
248     numDisplayedRefs += symbolLocations.size();
249     for (const std::string &s : symbolLocations) {
250       os << s;
251     }
252   }
253   if (numDisplayedRefs < numRefs)
254     os << "\n>>> referenced " << numRefs - numDisplayedRefs << " more times";
255   errorOrWarn(os.str());
256 }
257 
258 void SymbolTable::loadMinGWSymbols() {
259   for (auto &i : symMap) {
260     Symbol *sym = i.second;
261     auto *undef = dyn_cast<Undefined>(sym);
262     if (!undef)
263       continue;
264     if (undef->getWeakAlias())
265       continue;
266 
267     StringRef name = undef->getName();
268 
269     if (config->machine == I386 && config->stdcallFixup) {
270       // Check if we can resolve an undefined decorated symbol by finding
271       // the indended target as an undecorated symbol (only with a leading
272       // underscore).
273       StringRef origName = name;
274       StringRef baseName = name;
275       // Trim down stdcall/fastcall/vectorcall symbols to the base name.
276       baseName = ltrim1(baseName, "_@");
277       baseName = baseName.substr(0, baseName.find('@'));
278       // Add a leading underscore, as it would be in cdecl form.
279       std::string newName = ("_" + baseName).str();
280       Symbol *l;
281       if (newName != origName && (l = find(newName)) != nullptr) {
282         // If we found a symbol and it is lazy; load it.
283         if (l->isLazy() && !l->pendingArchiveLoad) {
284           log("Loading lazy " + l->getName() + " from " +
285               l->getFile()->getName() + " for stdcall fixup");
286           forceLazy(l);
287         }
288         // If it's lazy or already defined, hook it up as weak alias.
289         if (l->isLazy() || isa<Defined>(l)) {
290           if (config->warnStdcallFixup)
291             warn("Resolving " + origName + " by linking to " + newName);
292           else
293             log("Resolving " + origName + " by linking to " + newName);
294           undef->weakAlias = l;
295           continue;
296         }
297       }
298     }
299 
300     if (config->autoImport) {
301       if (name.startswith("__imp_"))
302         continue;
303       // If we have an undefined symbol, but we have a lazy symbol we could
304       // load, load it.
305       Symbol *l = find(("__imp_" + name).str());
306       if (!l || l->pendingArchiveLoad || !l->isLazy())
307         continue;
308 
309       log("Loading lazy " + l->getName() + " from " + l->getFile()->getName() +
310           " for automatic import");
311       forceLazy(l);
312     }
313   }
314 }
315 
316 Defined *SymbolTable::impSymbol(StringRef name) {
317   if (name.startswith("__imp_"))
318     return nullptr;
319   return dyn_cast_or_null<Defined>(find(("__imp_" + name).str()));
320 }
321 
322 bool SymbolTable::handleMinGWAutomaticImport(Symbol *sym, StringRef name) {
323   Defined *imp = impSymbol(name);
324   if (!imp)
325     return false;
326 
327   // Replace the reference directly to a variable with a reference
328   // to the import address table instead. This obviously isn't right,
329   // but we mark the symbol as isRuntimePseudoReloc, and a later pass
330   // will add runtime pseudo relocations for every relocation against
331   // this Symbol. The runtime pseudo relocation framework expects the
332   // reference itself to point at the IAT entry.
333   size_t impSize = 0;
334   if (isa<DefinedImportData>(imp)) {
335     log("Automatically importing " + name + " from " +
336         cast<DefinedImportData>(imp)->getDLLName());
337     impSize = sizeof(DefinedImportData);
338   } else if (isa<DefinedRegular>(imp)) {
339     log("Automatically importing " + name + " from " +
340         toString(cast<DefinedRegular>(imp)->file));
341     impSize = sizeof(DefinedRegular);
342   } else {
343     warn("unable to automatically import " + name + " from " + imp->getName() +
344          " from " + toString(cast<DefinedRegular>(imp)->file) +
345          "; unexpected symbol type");
346     return false;
347   }
348   sym->replaceKeepingName(imp, impSize);
349   sym->isRuntimePseudoReloc = true;
350 
351   // There may exist symbols named .refptr.<name> which only consist
352   // of a single pointer to <name>. If it turns out <name> is
353   // automatically imported, we don't need to keep the .refptr.<name>
354   // pointer at all, but redirect all accesses to it to the IAT entry
355   // for __imp_<name> instead, and drop the whole .refptr.<name> chunk.
356   DefinedRegular *refptr =
357       dyn_cast_or_null<DefinedRegular>(find((".refptr." + name).str()));
358   if (refptr && refptr->getChunk()->getSize() == config->wordsize) {
359     SectionChunk *sc = dyn_cast_or_null<SectionChunk>(refptr->getChunk());
360     if (sc && sc->getRelocs().size() == 1 && *sc->symbols().begin() == sym) {
361       log("Replacing .refptr." + name + " with " + imp->getName());
362       refptr->getChunk()->live = false;
363       refptr->replaceKeepingName(imp, impSize);
364     }
365   }
366   return true;
367 }
368 
369 /// Helper function for reportUnresolvable and resolveRemainingUndefines.
370 /// This function emits an "undefined symbol" diagnostic for each symbol in
371 /// undefs. If localImports is not nullptr, it also emits a "locally
372 /// defined symbol imported" diagnostic for symbols in localImports.
373 /// objFiles and bitcodeFiles (if not nullptr) are used to report where
374 /// undefined symbols are referenced.
375 static void
376 reportProblemSymbols(const SmallPtrSetImpl<Symbol *> &undefs,
377                      const DenseMap<Symbol *, Symbol *> *localImports,
378                      const std::vector<ObjFile *> objFiles,
379                      const std::vector<BitcodeFile *> *bitcodeFiles) {
380 
381   // Return early if there is nothing to report (which should be
382   // the common case).
383   if (undefs.empty() && (!localImports || localImports->empty()))
384     return;
385 
386   for (Symbol *b : config->gcroot) {
387     if (undefs.count(b))
388       errorOrWarn("<root>: undefined symbol: " + toString(*b));
389     if (localImports)
390       if (Symbol *imp = localImports->lookup(b))
391         warn("<root>: locally defined symbol imported: " + toString(*imp) +
392              " (defined in " + toString(imp->getFile()) + ") [LNK4217]");
393   }
394 
395   std::vector<UndefinedDiag> undefDiags;
396   DenseMap<Symbol *, int> firstDiag;
397 
398   auto processFile = [&](InputFile *file, ArrayRef<Symbol *> symbols) {
399     uint32_t symIndex = (uint32_t)-1;
400     for (Symbol *sym : symbols) {
401       ++symIndex;
402       if (!sym)
403         continue;
404       if (undefs.count(sym)) {
405         auto it = firstDiag.find(sym);
406         if (it == firstDiag.end()) {
407           firstDiag[sym] = undefDiags.size();
408           undefDiags.push_back({sym, {{file, symIndex}}});
409         } else {
410           undefDiags[it->second].files.push_back({file, symIndex});
411         }
412       }
413       if (localImports)
414         if (Symbol *imp = localImports->lookup(sym))
415           warn(toString(file) +
416                ": locally defined symbol imported: " + toString(*imp) +
417                " (defined in " + toString(imp->getFile()) + ") [LNK4217]");
418     }
419   };
420 
421   for (ObjFile *file : objFiles)
422     processFile(file, file->getSymbols());
423 
424   if (bitcodeFiles)
425     for (BitcodeFile *file : *bitcodeFiles)
426       processFile(file, file->getSymbols());
427 
428   for (const UndefinedDiag &undefDiag : undefDiags)
429     reportUndefinedSymbol(undefDiag);
430 }
431 
432 void SymbolTable::reportUnresolvable() {
433   SmallPtrSet<Symbol *, 8> undefs;
434   for (auto &i : symMap) {
435     Symbol *sym = i.second;
436     auto *undef = dyn_cast<Undefined>(sym);
437     if (!undef || sym->deferUndefined)
438       continue;
439     if (undef->getWeakAlias())
440       continue;
441     StringRef name = undef->getName();
442     if (name.startswith("__imp_")) {
443       Symbol *imp = find(name.substr(strlen("__imp_")));
444       if (imp && isa<Defined>(imp))
445         continue;
446     }
447     if (name.contains("_PchSym_"))
448       continue;
449     if (config->autoImport && impSymbol(name))
450       continue;
451     undefs.insert(sym);
452   }
453 
454   reportProblemSymbols(undefs,
455                        /* localImports */ nullptr, ObjFile::instances,
456                        &BitcodeFile::instances);
457 }
458 
459 void SymbolTable::resolveRemainingUndefines() {
460   SmallPtrSet<Symbol *, 8> undefs;
461   DenseMap<Symbol *, Symbol *> localImports;
462 
463   for (auto &i : symMap) {
464     Symbol *sym = i.second;
465     auto *undef = dyn_cast<Undefined>(sym);
466     if (!undef)
467       continue;
468     if (!sym->isUsedInRegularObj)
469       continue;
470 
471     StringRef name = undef->getName();
472 
473     // A weak alias may have been resolved, so check for that.
474     if (Defined *d = undef->getWeakAlias()) {
475       // We want to replace Sym with D. However, we can't just blindly
476       // copy sizeof(SymbolUnion) bytes from D to Sym because D may be an
477       // internal symbol, and internal symbols are stored as "unparented"
478       // Symbols. For that reason we need to check which type of symbol we
479       // are dealing with and copy the correct number of bytes.
480       if (isa<DefinedRegular>(d))
481         memcpy(sym, d, sizeof(DefinedRegular));
482       else if (isa<DefinedAbsolute>(d))
483         memcpy(sym, d, sizeof(DefinedAbsolute));
484       else
485         memcpy(sym, d, sizeof(SymbolUnion));
486       continue;
487     }
488 
489     // If we can resolve a symbol by removing __imp_ prefix, do that.
490     // This odd rule is for compatibility with MSVC linker.
491     if (name.startswith("__imp_")) {
492       Symbol *imp = find(name.substr(strlen("__imp_")));
493       if (imp && isa<Defined>(imp)) {
494         auto *d = cast<Defined>(imp);
495         replaceSymbol<DefinedLocalImport>(sym, name, d);
496         localImportChunks.push_back(cast<DefinedLocalImport>(sym)->getChunk());
497         localImports[sym] = d;
498         continue;
499       }
500     }
501 
502     // We don't want to report missing Microsoft precompiled headers symbols.
503     // A proper message will be emitted instead in PDBLinker::aquirePrecompObj
504     if (name.contains("_PchSym_"))
505       continue;
506 
507     if (config->autoImport && handleMinGWAutomaticImport(sym, name))
508       continue;
509 
510     // Remaining undefined symbols are not fatal if /force is specified.
511     // They are replaced with dummy defined symbols.
512     if (config->forceUnresolved)
513       replaceSymbol<DefinedAbsolute>(sym, name, 0);
514     undefs.insert(sym);
515   }
516 
517   reportProblemSymbols(
518       undefs, config->warnLocallyDefinedImported ? &localImports : nullptr,
519       ObjFile::instances, /* bitcode files no longer needed */ nullptr);
520 }
521 
522 std::pair<Symbol *, bool> SymbolTable::insert(StringRef name) {
523   bool inserted = false;
524   Symbol *&sym = symMap[CachedHashStringRef(name)];
525   if (!sym) {
526     sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
527     sym->isUsedInRegularObj = false;
528     sym->pendingArchiveLoad = false;
529     sym->canInline = true;
530     inserted = true;
531   }
532   return {sym, inserted};
533 }
534 
535 std::pair<Symbol *, bool> SymbolTable::insert(StringRef name, InputFile *file) {
536   std::pair<Symbol *, bool> result = insert(name);
537   if (!file || !isa<BitcodeFile>(file))
538     result.first->isUsedInRegularObj = true;
539   return result;
540 }
541 
542 Symbol *SymbolTable::addUndefined(StringRef name, InputFile *f,
543                                   bool isWeakAlias) {
544   Symbol *s;
545   bool wasInserted;
546   std::tie(s, wasInserted) = insert(name, f);
547   if (wasInserted || (s->isLazy() && isWeakAlias)) {
548     replaceSymbol<Undefined>(s, name);
549     return s;
550   }
551   if (s->isLazy())
552     forceLazy(s);
553   return s;
554 }
555 
556 void SymbolTable::addLazyArchive(ArchiveFile *f, const Archive::Symbol &sym) {
557   StringRef name = sym.getName();
558   Symbol *s;
559   bool wasInserted;
560   std::tie(s, wasInserted) = insert(name);
561   if (wasInserted) {
562     replaceSymbol<LazyArchive>(s, f, sym);
563     return;
564   }
565   auto *u = dyn_cast<Undefined>(s);
566   if (!u || u->weakAlias || s->pendingArchiveLoad)
567     return;
568   s->pendingArchiveLoad = true;
569   f->addMember(sym);
570 }
571 
572 void SymbolTable::addLazyObject(LazyObjFile *f, StringRef n) {
573   Symbol *s;
574   bool wasInserted;
575   std::tie(s, wasInserted) = insert(n, f);
576   if (wasInserted) {
577     replaceSymbol<LazyObject>(s, f, n);
578     return;
579   }
580   auto *u = dyn_cast<Undefined>(s);
581   if (!u || u->weakAlias || s->pendingArchiveLoad)
582     return;
583   s->pendingArchiveLoad = true;
584   f->fetch();
585 }
586 
587 void SymbolTable::addLazyDLLSymbol(DLLFile *f, DLLFile::Symbol *sym,
588                                    StringRef n) {
589   Symbol *s;
590   bool wasInserted;
591   std::tie(s, wasInserted) = insert(n);
592   if (wasInserted) {
593     replaceSymbol<LazyDLLSymbol>(s, f, sym, n);
594     return;
595   }
596   auto *u = dyn_cast<Undefined>(s);
597   if (!u || u->weakAlias || s->pendingArchiveLoad)
598     return;
599   s->pendingArchiveLoad = true;
600   f->makeImport(sym);
601 }
602 
603 static std::string getSourceLocationBitcode(BitcodeFile *file) {
604   std::string res("\n>>> defined at ");
605   StringRef source = file->obj->getSourceFileName();
606   if (!source.empty())
607     res += source.str() + "\n>>>            ";
608   res += toString(file);
609   return res;
610 }
611 
612 static std::string getSourceLocationObj(ObjFile *file, SectionChunk *sc,
613                                         uint32_t offset, StringRef name) {
614   Optional<std::pair<StringRef, uint32_t>> fileLine;
615   if (sc)
616     fileLine = getFileLine(sc, offset);
617   if (!fileLine)
618     fileLine = file->getVariableLocation(name);
619 
620   std::string res;
621   llvm::raw_string_ostream os(res);
622   os << "\n>>> defined at ";
623   if (fileLine)
624     os << fileLine->first << ":" << fileLine->second << "\n>>>            ";
625   os << toString(file);
626   return os.str();
627 }
628 
629 static std::string getSourceLocation(InputFile *file, SectionChunk *sc,
630                                      uint32_t offset, StringRef name) {
631   if (!file)
632     return "";
633   if (auto *o = dyn_cast<ObjFile>(file))
634     return getSourceLocationObj(o, sc, offset, name);
635   if (auto *b = dyn_cast<BitcodeFile>(file))
636     return getSourceLocationBitcode(b);
637   return "\n>>> defined at " + toString(file);
638 }
639 
640 // Construct and print an error message in the form of:
641 //
642 //   lld-link: error: duplicate symbol: foo
643 //   >>> defined at bar.c:30
644 //   >>>            bar.o
645 //   >>> defined at baz.c:563
646 //   >>>            baz.o
647 void SymbolTable::reportDuplicate(Symbol *existing, InputFile *newFile,
648                                   SectionChunk *newSc,
649                                   uint32_t newSectionOffset) {
650   std::string msg;
651   llvm::raw_string_ostream os(msg);
652   os << "duplicate symbol: " << toString(*existing);
653 
654   DefinedRegular *d = dyn_cast<DefinedRegular>(existing);
655   if (d && isa<ObjFile>(d->getFile())) {
656     os << getSourceLocation(d->getFile(), d->getChunk(), d->getValue(),
657                             existing->getName());
658   } else {
659     os << getSourceLocation(existing->getFile(), nullptr, 0, "");
660   }
661   os << getSourceLocation(newFile, newSc, newSectionOffset,
662                           existing->getName());
663 
664   if (config->forceMultiple)
665     warn(os.str());
666   else
667     error(os.str());
668 }
669 
670 Symbol *SymbolTable::addAbsolute(StringRef n, COFFSymbolRef sym) {
671   Symbol *s;
672   bool wasInserted;
673   std::tie(s, wasInserted) = insert(n, nullptr);
674   s->isUsedInRegularObj = true;
675   if (wasInserted || isa<Undefined>(s) || s->isLazy())
676     replaceSymbol<DefinedAbsolute>(s, n, sym);
677   else if (auto *da = dyn_cast<DefinedAbsolute>(s)) {
678     if (da->getVA() != sym.getValue())
679       reportDuplicate(s, nullptr);
680   } else if (!isa<DefinedCOFF>(s))
681     reportDuplicate(s, nullptr);
682   return s;
683 }
684 
685 Symbol *SymbolTable::addAbsolute(StringRef n, uint64_t va) {
686   Symbol *s;
687   bool wasInserted;
688   std::tie(s, wasInserted) = insert(n, nullptr);
689   s->isUsedInRegularObj = true;
690   if (wasInserted || isa<Undefined>(s) || s->isLazy())
691     replaceSymbol<DefinedAbsolute>(s, n, va);
692   else if (auto *da = dyn_cast<DefinedAbsolute>(s)) {
693     if (da->getVA() != va)
694       reportDuplicate(s, nullptr);
695   } else if (!isa<DefinedCOFF>(s))
696     reportDuplicate(s, nullptr);
697   return s;
698 }
699 
700 Symbol *SymbolTable::addSynthetic(StringRef n, Chunk *c) {
701   Symbol *s;
702   bool wasInserted;
703   std::tie(s, wasInserted) = insert(n, nullptr);
704   s->isUsedInRegularObj = true;
705   if (wasInserted || isa<Undefined>(s) || s->isLazy())
706     replaceSymbol<DefinedSynthetic>(s, n, c);
707   else if (!isa<DefinedCOFF>(s))
708     reportDuplicate(s, nullptr);
709   return s;
710 }
711 
712 Symbol *SymbolTable::addRegular(InputFile *f, StringRef n,
713                                 const coff_symbol_generic *sym, SectionChunk *c,
714                                 uint32_t sectionOffset) {
715   Symbol *s;
716   bool wasInserted;
717   std::tie(s, wasInserted) = insert(n, f);
718   if (wasInserted || !isa<DefinedRegular>(s))
719     replaceSymbol<DefinedRegular>(s, f, n, /*IsCOMDAT*/ false,
720                                   /*IsExternal*/ true, sym, c);
721   else
722     reportDuplicate(s, f, c, sectionOffset);
723   return s;
724 }
725 
726 std::pair<DefinedRegular *, bool>
727 SymbolTable::addComdat(InputFile *f, StringRef n,
728                        const coff_symbol_generic *sym) {
729   Symbol *s;
730   bool wasInserted;
731   std::tie(s, wasInserted) = insert(n, f);
732   if (wasInserted || !isa<DefinedRegular>(s)) {
733     replaceSymbol<DefinedRegular>(s, f, n, /*IsCOMDAT*/ true,
734                                   /*IsExternal*/ true, sym, nullptr);
735     return {cast<DefinedRegular>(s), true};
736   }
737   auto *existingSymbol = cast<DefinedRegular>(s);
738   if (!existingSymbol->isCOMDAT)
739     reportDuplicate(s, f);
740   return {existingSymbol, false};
741 }
742 
743 Symbol *SymbolTable::addCommon(InputFile *f, StringRef n, uint64_t size,
744                                const coff_symbol_generic *sym, CommonChunk *c) {
745   Symbol *s;
746   bool wasInserted;
747   std::tie(s, wasInserted) = insert(n, f);
748   if (wasInserted || !isa<DefinedCOFF>(s))
749     replaceSymbol<DefinedCommon>(s, f, n, size, sym, c);
750   else if (auto *dc = dyn_cast<DefinedCommon>(s))
751     if (size > dc->getSize())
752       replaceSymbol<DefinedCommon>(s, f, n, size, sym, c);
753   return s;
754 }
755 
756 Symbol *SymbolTable::addImportData(StringRef n, ImportFile *f) {
757   Symbol *s;
758   bool wasInserted;
759   std::tie(s, wasInserted) = insert(n, nullptr);
760   s->isUsedInRegularObj = true;
761   if (wasInserted || isa<Undefined>(s) || s->isLazy()) {
762     replaceSymbol<DefinedImportData>(s, n, f);
763     return s;
764   }
765 
766   reportDuplicate(s, f);
767   return nullptr;
768 }
769 
770 Symbol *SymbolTable::addImportThunk(StringRef name, DefinedImportData *id,
771                                     uint16_t machine) {
772   Symbol *s;
773   bool wasInserted;
774   std::tie(s, wasInserted) = insert(name, nullptr);
775   s->isUsedInRegularObj = true;
776   if (wasInserted || isa<Undefined>(s) || s->isLazy()) {
777     replaceSymbol<DefinedImportThunk>(s, name, id, machine);
778     return s;
779   }
780 
781   reportDuplicate(s, id->file);
782   return nullptr;
783 }
784 
785 void SymbolTable::addLibcall(StringRef name) {
786   Symbol *sym = findUnderscore(name);
787   if (!sym)
788     return;
789 
790   if (auto *l = dyn_cast<LazyArchive>(sym)) {
791     MemoryBufferRef mb = l->getMemberBuffer();
792     if (isBitcode(mb))
793       addUndefined(sym->getName());
794   } else if (LazyObject *o = dyn_cast<LazyObject>(sym)) {
795     if (isBitcode(o->file->mb))
796       addUndefined(sym->getName());
797   }
798 }
799 
800 std::vector<Chunk *> SymbolTable::getChunks() {
801   std::vector<Chunk *> res;
802   for (ObjFile *file : ObjFile::instances) {
803     ArrayRef<Chunk *> v = file->getChunks();
804     res.insert(res.end(), v.begin(), v.end());
805   }
806   return res;
807 }
808 
809 Symbol *SymbolTable::find(StringRef name) {
810   return symMap.lookup(CachedHashStringRef(name));
811 }
812 
813 Symbol *SymbolTable::findUnderscore(StringRef name) {
814   if (config->machine == I386)
815     return find(("_" + name).str());
816   return find(name);
817 }
818 
819 // Return all symbols that start with Prefix, possibly ignoring the first
820 // character of Prefix or the first character symbol.
821 std::vector<Symbol *> SymbolTable::getSymsWithPrefix(StringRef prefix) {
822   std::vector<Symbol *> syms;
823   for (auto pair : symMap) {
824     StringRef name = pair.first.val();
825     if (name.startswith(prefix) || name.startswith(prefix.drop_front()) ||
826         name.drop_front().startswith(prefix) ||
827         name.drop_front().startswith(prefix.drop_front())) {
828       syms.push_back(pair.second);
829     }
830   }
831   return syms;
832 }
833 
834 Symbol *SymbolTable::findMangle(StringRef name) {
835   if (Symbol *sym = find(name))
836     if (!isa<Undefined>(sym))
837       return sym;
838 
839   // Efficient fuzzy string lookup is impossible with a hash table, so iterate
840   // the symbol table once and collect all possibly matching symbols into this
841   // vector. Then compare each possibly matching symbol with each possible
842   // mangling.
843   std::vector<Symbol *> syms = getSymsWithPrefix(name);
844   auto findByPrefix = [&syms](const Twine &t) -> Symbol * {
845     std::string prefix = t.str();
846     for (auto *s : syms)
847       if (s->getName().startswith(prefix))
848         return s;
849     return nullptr;
850   };
851 
852   // For non-x86, just look for C++ functions.
853   if (config->machine != I386)
854     return findByPrefix("?" + name + "@@Y");
855 
856   if (!name.startswith("_"))
857     return nullptr;
858   // Search for x86 stdcall function.
859   if (Symbol *s = findByPrefix(name + "@"))
860     return s;
861   // Search for x86 fastcall function.
862   if (Symbol *s = findByPrefix("@" + name.substr(1) + "@"))
863     return s;
864   // Search for x86 vectorcall function.
865   if (Symbol *s = findByPrefix(name.substr(1) + "@@"))
866     return s;
867   // Search for x86 C++ non-member function.
868   return findByPrefix("?" + name.substr(1) + "@@Y");
869 }
870 
871 Symbol *SymbolTable::addUndefined(StringRef name) {
872   return addUndefined(name, nullptr, false);
873 }
874 
875 void SymbolTable::addCombinedLTOObjects() {
876   if (BitcodeFile::instances.empty())
877     return;
878 
879   ScopedTimer t(ltoTimer);
880   lto.reset(new BitcodeCompiler);
881   for (BitcodeFile *f : BitcodeFile::instances)
882     lto->add(*f);
883   for (InputFile *newObj : lto->compile()) {
884     ObjFile *obj = cast<ObjFile>(newObj);
885     obj->parse();
886     ObjFile::instances.push_back(obj);
887   }
888 }
889 
890 } // namespace coff
891 } // namespace lld
892