xref: /freebsd/contrib/llvm-project/lld/COFF/SymbolTable.cpp (revision 4b50c451720d8b427757a6da1dd2bb4c52cd9e35)
1 //===- SymbolTable.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SymbolTable.h"
10 #include "Config.h"
11 #include "Driver.h"
12 #include "LTO.h"
13 #include "PDB.h"
14 #include "Symbols.h"
15 #include "lld/Common/ErrorHandler.h"
16 #include "lld/Common/Memory.h"
17 #include "lld/Common/Timer.h"
18 #include "llvm/IR/LLVMContext.h"
19 #include "llvm/Object/WindowsMachineFlag.h"
20 #include "llvm/Support/Debug.h"
21 #include "llvm/Support/raw_ostream.h"
22 #include <utility>
23 
24 using namespace llvm;
25 
26 namespace lld {
27 namespace coff {
28 
29 static Timer ltoTimer("LTO", Timer::root());
30 
31 SymbolTable *symtab;
32 
33 void SymbolTable::addFile(InputFile *file) {
34   log("Reading " + toString(file));
35   file->parse();
36 
37   MachineTypes mt = file->getMachineType();
38   if (config->machine == IMAGE_FILE_MACHINE_UNKNOWN) {
39     config->machine = mt;
40   } else if (mt != IMAGE_FILE_MACHINE_UNKNOWN && config->machine != mt) {
41     error(toString(file) + ": machine type " + machineToStr(mt) +
42           " conflicts with " + machineToStr(config->machine));
43     return;
44   }
45 
46   if (auto *f = dyn_cast<ObjFile>(file)) {
47     ObjFile::instances.push_back(f);
48   } else if (auto *f = dyn_cast<BitcodeFile>(file)) {
49     BitcodeFile::instances.push_back(f);
50   } else if (auto *f = dyn_cast<ImportFile>(file)) {
51     ImportFile::instances.push_back(f);
52   }
53 
54   driver->parseDirectives(file);
55 }
56 
57 static void errorOrWarn(const Twine &s) {
58   if (config->forceUnresolved)
59     warn(s);
60   else
61     error(s);
62 }
63 
64 // Returns the symbol in SC whose value is <= Addr that is closest to Addr.
65 // This is generally the global variable or function whose definition contains
66 // Addr.
67 static Symbol *getSymbol(SectionChunk *sc, uint32_t addr) {
68   DefinedRegular *candidate = nullptr;
69 
70   for (Symbol *s : sc->file->getSymbols()) {
71     auto *d = dyn_cast_or_null<DefinedRegular>(s);
72     if (!d || !d->data || d->getChunk() != sc || d->getValue() > addr ||
73         (candidate && d->getValue() < candidate->getValue()))
74       continue;
75 
76     candidate = d;
77   }
78 
79   return candidate;
80 }
81 
82 // Given a file and the index of a symbol in that file, returns a description
83 // of all references to that symbol from that file. If no debug information is
84 // available, returns just the name of the file, else one string per actual
85 // reference as described in the debug info.
86 std::vector<std::string> getSymbolLocations(ObjFile *file, uint32_t symIndex) {
87   struct Location {
88     Symbol *sym;
89     std::pair<StringRef, uint32_t> fileLine;
90   };
91   std::vector<Location> locations;
92 
93   for (Chunk *c : file->getChunks()) {
94     auto *sc = dyn_cast<SectionChunk>(c);
95     if (!sc)
96       continue;
97     for (const coff_relocation &r : sc->getRelocs()) {
98       if (r.SymbolTableIndex != symIndex)
99         continue;
100       std::pair<StringRef, uint32_t> fileLine =
101           getFileLine(sc, r.VirtualAddress);
102       Symbol *sym = getSymbol(sc, r.VirtualAddress);
103       if (!fileLine.first.empty() || sym)
104         locations.push_back({sym, fileLine});
105     }
106   }
107 
108   if (locations.empty())
109     return std::vector<std::string>({"\n>>> referenced by " + toString(file)});
110 
111   std::vector<std::string> symbolLocations(locations.size());
112   size_t i = 0;
113   for (Location loc : locations) {
114     llvm::raw_string_ostream os(symbolLocations[i++]);
115     os << "\n>>> referenced by ";
116     if (!loc.fileLine.first.empty())
117       os << loc.fileLine.first << ":" << loc.fileLine.second
118          << "\n>>>               ";
119     os << toString(file);
120     if (loc.sym)
121       os << ":(" << toString(*loc.sym) << ')';
122   }
123   return symbolLocations;
124 }
125 
126 // For an undefined symbol, stores all files referencing it and the index of
127 // the undefined symbol in each file.
128 struct UndefinedDiag {
129   Symbol *sym;
130   struct File {
131     ObjFile *oFile;
132     uint64_t symIndex;
133   };
134   std::vector<File> files;
135 };
136 
137 static void reportUndefinedSymbol(const UndefinedDiag &undefDiag) {
138   std::string out;
139   llvm::raw_string_ostream os(out);
140   os << "undefined symbol: " << toString(*undefDiag.sym);
141 
142   const size_t maxUndefReferences = 10;
143   size_t i = 0, numRefs = 0;
144   for (const UndefinedDiag::File &ref : undefDiag.files) {
145     std::vector<std::string> symbolLocations =
146         getSymbolLocations(ref.oFile, ref.symIndex);
147     numRefs += symbolLocations.size();
148     for (const std::string &s : symbolLocations) {
149       if (i >= maxUndefReferences)
150         break;
151       os << s;
152       i++;
153     }
154   }
155   if (i < numRefs)
156     os << "\n>>> referenced " << numRefs - i << " more times";
157   errorOrWarn(os.str());
158 }
159 
160 void SymbolTable::loadMinGWAutomaticImports() {
161   for (auto &i : symMap) {
162     Symbol *sym = i.second;
163     auto *undef = dyn_cast<Undefined>(sym);
164     if (!undef)
165       continue;
166     if (!sym->isUsedInRegularObj)
167       continue;
168 
169     StringRef name = undef->getName();
170 
171     if (name.startswith("__imp_"))
172       continue;
173     // If we have an undefined symbol, but we have a Lazy representing a
174     // symbol we could load from file, make sure to load that.
175     Lazy *l = dyn_cast_or_null<Lazy>(find(("__imp_" + name).str()));
176     if (!l || l->pendingArchiveLoad)
177       continue;
178 
179     log("Loading lazy " + l->getName() + " from " + l->file->getName() +
180         " for automatic import");
181     l->pendingArchiveLoad = true;
182     l->file->addMember(l->sym);
183   }
184 }
185 
186 bool SymbolTable::handleMinGWAutomaticImport(Symbol *sym, StringRef name) {
187   if (name.startswith("__imp_"))
188     return false;
189   Defined *imp = dyn_cast_or_null<Defined>(find(("__imp_" + name).str()));
190   if (!imp)
191     return false;
192 
193   // Replace the reference directly to a variable with a reference
194   // to the import address table instead. This obviously isn't right,
195   // but we mark the symbol as isRuntimePseudoReloc, and a later pass
196   // will add runtime pseudo relocations for every relocation against
197   // this Symbol. The runtime pseudo relocation framework expects the
198   // reference itself to point at the IAT entry.
199   size_t impSize = 0;
200   if (isa<DefinedImportData>(imp)) {
201     log("Automatically importing " + name + " from " +
202         cast<DefinedImportData>(imp)->getDLLName());
203     impSize = sizeof(DefinedImportData);
204   } else if (isa<DefinedRegular>(imp)) {
205     log("Automatically importing " + name + " from " +
206         toString(cast<DefinedRegular>(imp)->file));
207     impSize = sizeof(DefinedRegular);
208   } else {
209     warn("unable to automatically import " + name + " from " + imp->getName() +
210          " from " + toString(cast<DefinedRegular>(imp)->file) +
211          "; unexpected symbol type");
212     return false;
213   }
214   sym->replaceKeepingName(imp, impSize);
215   sym->isRuntimePseudoReloc = true;
216 
217   // There may exist symbols named .refptr.<name> which only consist
218   // of a single pointer to <name>. If it turns out <name> is
219   // automatically imported, we don't need to keep the .refptr.<name>
220   // pointer at all, but redirect all accesses to it to the IAT entry
221   // for __imp_<name> instead, and drop the whole .refptr.<name> chunk.
222   DefinedRegular *refptr =
223       dyn_cast_or_null<DefinedRegular>(find((".refptr." + name).str()));
224   if (refptr && refptr->getChunk()->getSize() == config->wordsize) {
225     SectionChunk *sc = dyn_cast_or_null<SectionChunk>(refptr->getChunk());
226     if (sc && sc->getRelocs().size() == 1 && *sc->symbols().begin() == sym) {
227       log("Replacing .refptr." + name + " with " + imp->getName());
228       refptr->getChunk()->live = false;
229       refptr->replaceKeepingName(imp, impSize);
230     }
231   }
232   return true;
233 }
234 
235 void SymbolTable::reportRemainingUndefines() {
236   SmallPtrSet<Symbol *, 8> undefs;
237   DenseMap<Symbol *, Symbol *> localImports;
238 
239   for (auto &i : symMap) {
240     Symbol *sym = i.second;
241     auto *undef = dyn_cast<Undefined>(sym);
242     if (!undef)
243       continue;
244     if (!sym->isUsedInRegularObj)
245       continue;
246 
247     StringRef name = undef->getName();
248 
249     // A weak alias may have been resolved, so check for that.
250     if (Defined *d = undef->getWeakAlias()) {
251       // We want to replace Sym with D. However, we can't just blindly
252       // copy sizeof(SymbolUnion) bytes from D to Sym because D may be an
253       // internal symbol, and internal symbols are stored as "unparented"
254       // Symbols. For that reason we need to check which type of symbol we
255       // are dealing with and copy the correct number of bytes.
256       if (isa<DefinedRegular>(d))
257         memcpy(sym, d, sizeof(DefinedRegular));
258       else if (isa<DefinedAbsolute>(d))
259         memcpy(sym, d, sizeof(DefinedAbsolute));
260       else
261         memcpy(sym, d, sizeof(SymbolUnion));
262       continue;
263     }
264 
265     // If we can resolve a symbol by removing __imp_ prefix, do that.
266     // This odd rule is for compatibility with MSVC linker.
267     if (name.startswith("__imp_")) {
268       Symbol *imp = find(name.substr(strlen("__imp_")));
269       if (imp && isa<Defined>(imp)) {
270         auto *d = cast<Defined>(imp);
271         replaceSymbol<DefinedLocalImport>(sym, name, d);
272         localImportChunks.push_back(cast<DefinedLocalImport>(sym)->getChunk());
273         localImports[sym] = d;
274         continue;
275       }
276     }
277 
278     // We don't want to report missing Microsoft precompiled headers symbols.
279     // A proper message will be emitted instead in PDBLinker::aquirePrecompObj
280     if (name.contains("_PchSym_"))
281       continue;
282 
283     if (config->mingw && handleMinGWAutomaticImport(sym, name))
284       continue;
285 
286     // Remaining undefined symbols are not fatal if /force is specified.
287     // They are replaced with dummy defined symbols.
288     if (config->forceUnresolved)
289       replaceSymbol<DefinedAbsolute>(sym, name, 0);
290     undefs.insert(sym);
291   }
292 
293   if (undefs.empty() && localImports.empty())
294     return;
295 
296   for (Symbol *b : config->gcroot) {
297     if (undefs.count(b))
298       errorOrWarn("<root>: undefined symbol: " + toString(*b));
299     if (config->warnLocallyDefinedImported)
300       if (Symbol *imp = localImports.lookup(b))
301         warn("<root>: locally defined symbol imported: " + toString(*imp) +
302              " (defined in " + toString(imp->getFile()) + ") [LNK4217]");
303   }
304 
305   std::vector<UndefinedDiag> undefDiags;
306   DenseMap<Symbol *, int> firstDiag;
307 
308   for (ObjFile *file : ObjFile::instances) {
309     size_t symIndex = (size_t)-1;
310     for (Symbol *sym : file->getSymbols()) {
311       ++symIndex;
312       if (!sym)
313         continue;
314       if (undefs.count(sym)) {
315         auto it = firstDiag.find(sym);
316         if (it == firstDiag.end()) {
317           firstDiag[sym] = undefDiags.size();
318           undefDiags.push_back({sym, {{file, symIndex}}});
319         } else {
320           undefDiags[it->second].files.push_back({file, symIndex});
321         }
322       }
323       if (config->warnLocallyDefinedImported)
324         if (Symbol *imp = localImports.lookup(sym))
325           warn(toString(file) +
326                ": locally defined symbol imported: " + toString(*imp) +
327                " (defined in " + toString(imp->getFile()) + ") [LNK4217]");
328     }
329   }
330 
331   for (const UndefinedDiag& undefDiag : undefDiags)
332     reportUndefinedSymbol(undefDiag);
333 }
334 
335 std::pair<Symbol *, bool> SymbolTable::insert(StringRef name) {
336   bool inserted = false;
337   Symbol *&sym = symMap[CachedHashStringRef(name)];
338   if (!sym) {
339     sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
340     sym->isUsedInRegularObj = false;
341     sym->pendingArchiveLoad = false;
342     inserted = true;
343   }
344   return {sym, inserted};
345 }
346 
347 std::pair<Symbol *, bool> SymbolTable::insert(StringRef name, InputFile *file) {
348   std::pair<Symbol *, bool> result = insert(name);
349   if (!file || !isa<BitcodeFile>(file))
350     result.first->isUsedInRegularObj = true;
351   return result;
352 }
353 
354 Symbol *SymbolTable::addUndefined(StringRef name, InputFile *f,
355                                   bool isWeakAlias) {
356   Symbol *s;
357   bool wasInserted;
358   std::tie(s, wasInserted) = insert(name, f);
359   if (wasInserted || (isa<Lazy>(s) && isWeakAlias)) {
360     replaceSymbol<Undefined>(s, name);
361     return s;
362   }
363   if (auto *l = dyn_cast<Lazy>(s)) {
364     if (!s->pendingArchiveLoad) {
365       s->pendingArchiveLoad = true;
366       l->file->addMember(l->sym);
367     }
368   }
369   return s;
370 }
371 
372 void SymbolTable::addLazy(ArchiveFile *f, const Archive::Symbol &sym) {
373   StringRef name = sym.getName();
374   Symbol *s;
375   bool wasInserted;
376   std::tie(s, wasInserted) = insert(name);
377   if (wasInserted) {
378     replaceSymbol<Lazy>(s, f, sym);
379     return;
380   }
381   auto *u = dyn_cast<Undefined>(s);
382   if (!u || u->weakAlias || s->pendingArchiveLoad)
383     return;
384   s->pendingArchiveLoad = true;
385   f->addMember(sym);
386 }
387 
388 void SymbolTable::reportDuplicate(Symbol *existing, InputFile *newFile) {
389   std::string msg = "duplicate symbol: " + toString(*existing) + " in " +
390                     toString(existing->getFile()) + " and in " +
391                     toString(newFile);
392 
393   if (config->forceMultiple)
394     warn(msg);
395   else
396     error(msg);
397 }
398 
399 Symbol *SymbolTable::addAbsolute(StringRef n, COFFSymbolRef sym) {
400   Symbol *s;
401   bool wasInserted;
402   std::tie(s, wasInserted) = insert(n, nullptr);
403   s->isUsedInRegularObj = true;
404   if (wasInserted || isa<Undefined>(s) || isa<Lazy>(s))
405     replaceSymbol<DefinedAbsolute>(s, n, sym);
406   else if (!isa<DefinedCOFF>(s))
407     reportDuplicate(s, nullptr);
408   return s;
409 }
410 
411 Symbol *SymbolTable::addAbsolute(StringRef n, uint64_t va) {
412   Symbol *s;
413   bool wasInserted;
414   std::tie(s, wasInserted) = insert(n, nullptr);
415   s->isUsedInRegularObj = true;
416   if (wasInserted || isa<Undefined>(s) || isa<Lazy>(s))
417     replaceSymbol<DefinedAbsolute>(s, n, va);
418   else if (!isa<DefinedCOFF>(s))
419     reportDuplicate(s, nullptr);
420   return s;
421 }
422 
423 Symbol *SymbolTable::addSynthetic(StringRef n, Chunk *c) {
424   Symbol *s;
425   bool wasInserted;
426   std::tie(s, wasInserted) = insert(n, nullptr);
427   s->isUsedInRegularObj = true;
428   if (wasInserted || isa<Undefined>(s) || isa<Lazy>(s))
429     replaceSymbol<DefinedSynthetic>(s, n, c);
430   else if (!isa<DefinedCOFF>(s))
431     reportDuplicate(s, nullptr);
432   return s;
433 }
434 
435 Symbol *SymbolTable::addRegular(InputFile *f, StringRef n,
436                                 const coff_symbol_generic *sym,
437                                 SectionChunk *c) {
438   Symbol *s;
439   bool wasInserted;
440   std::tie(s, wasInserted) = insert(n, f);
441   if (wasInserted || !isa<DefinedRegular>(s))
442     replaceSymbol<DefinedRegular>(s, f, n, /*IsCOMDAT*/ false,
443                                   /*IsExternal*/ true, sym, c);
444   else
445     reportDuplicate(s, f);
446   return s;
447 }
448 
449 std::pair<DefinedRegular *, bool>
450 SymbolTable::addComdat(InputFile *f, StringRef n,
451                        const coff_symbol_generic *sym) {
452   Symbol *s;
453   bool wasInserted;
454   std::tie(s, wasInserted) = insert(n, f);
455   if (wasInserted || !isa<DefinedRegular>(s)) {
456     replaceSymbol<DefinedRegular>(s, f, n, /*IsCOMDAT*/ true,
457                                   /*IsExternal*/ true, sym, nullptr);
458     return {cast<DefinedRegular>(s), true};
459   }
460   auto *existingSymbol = cast<DefinedRegular>(s);
461   if (!existingSymbol->isCOMDAT)
462     reportDuplicate(s, f);
463   return {existingSymbol, false};
464 }
465 
466 Symbol *SymbolTable::addCommon(InputFile *f, StringRef n, uint64_t size,
467                                const coff_symbol_generic *sym, CommonChunk *c) {
468   Symbol *s;
469   bool wasInserted;
470   std::tie(s, wasInserted) = insert(n, f);
471   if (wasInserted || !isa<DefinedCOFF>(s))
472     replaceSymbol<DefinedCommon>(s, f, n, size, sym, c);
473   else if (auto *dc = dyn_cast<DefinedCommon>(s))
474     if (size > dc->getSize())
475       replaceSymbol<DefinedCommon>(s, f, n, size, sym, c);
476   return s;
477 }
478 
479 Symbol *SymbolTable::addImportData(StringRef n, ImportFile *f) {
480   Symbol *s;
481   bool wasInserted;
482   std::tie(s, wasInserted) = insert(n, nullptr);
483   s->isUsedInRegularObj = true;
484   if (wasInserted || isa<Undefined>(s) || isa<Lazy>(s)) {
485     replaceSymbol<DefinedImportData>(s, n, f);
486     return s;
487   }
488 
489   reportDuplicate(s, f);
490   return nullptr;
491 }
492 
493 Symbol *SymbolTable::addImportThunk(StringRef name, DefinedImportData *id,
494                                     uint16_t machine) {
495   Symbol *s;
496   bool wasInserted;
497   std::tie(s, wasInserted) = insert(name, nullptr);
498   s->isUsedInRegularObj = true;
499   if (wasInserted || isa<Undefined>(s) || isa<Lazy>(s)) {
500     replaceSymbol<DefinedImportThunk>(s, name, id, machine);
501     return s;
502   }
503 
504   reportDuplicate(s, id->file);
505   return nullptr;
506 }
507 
508 void SymbolTable::addLibcall(StringRef name) {
509   Symbol *sym = findUnderscore(name);
510   if (!sym)
511     return;
512 
513   if (Lazy *l = dyn_cast<Lazy>(sym)) {
514     MemoryBufferRef mb = l->getMemberBuffer();
515     if (identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode)
516       addUndefined(sym->getName());
517   }
518 }
519 
520 std::vector<Chunk *> SymbolTable::getChunks() {
521   std::vector<Chunk *> res;
522   for (ObjFile *file : ObjFile::instances) {
523     ArrayRef<Chunk *> v = file->getChunks();
524     res.insert(res.end(), v.begin(), v.end());
525   }
526   return res;
527 }
528 
529 Symbol *SymbolTable::find(StringRef name) {
530   return symMap.lookup(CachedHashStringRef(name));
531 }
532 
533 Symbol *SymbolTable::findUnderscore(StringRef name) {
534   if (config->machine == I386)
535     return find(("_" + name).str());
536   return find(name);
537 }
538 
539 // Return all symbols that start with Prefix, possibly ignoring the first
540 // character of Prefix or the first character symbol.
541 std::vector<Symbol *> SymbolTable::getSymsWithPrefix(StringRef prefix) {
542   std::vector<Symbol *> syms;
543   for (auto pair : symMap) {
544     StringRef name = pair.first.val();
545     if (name.startswith(prefix) || name.startswith(prefix.drop_front()) ||
546         name.drop_front().startswith(prefix) ||
547         name.drop_front().startswith(prefix.drop_front())) {
548       syms.push_back(pair.second);
549     }
550   }
551   return syms;
552 }
553 
554 Symbol *SymbolTable::findMangle(StringRef name) {
555   if (Symbol *sym = find(name))
556     if (!isa<Undefined>(sym))
557       return sym;
558 
559   // Efficient fuzzy string lookup is impossible with a hash table, so iterate
560   // the symbol table once and collect all possibly matching symbols into this
561   // vector. Then compare each possibly matching symbol with each possible
562   // mangling.
563   std::vector<Symbol *> syms = getSymsWithPrefix(name);
564   auto findByPrefix = [&syms](const Twine &t) -> Symbol * {
565     std::string prefix = t.str();
566     for (auto *s : syms)
567       if (s->getName().startswith(prefix))
568         return s;
569     return nullptr;
570   };
571 
572   // For non-x86, just look for C++ functions.
573   if (config->machine != I386)
574     return findByPrefix("?" + name + "@@Y");
575 
576   if (!name.startswith("_"))
577     return nullptr;
578   // Search for x86 stdcall function.
579   if (Symbol *s = findByPrefix(name + "@"))
580     return s;
581   // Search for x86 fastcall function.
582   if (Symbol *s = findByPrefix("@" + name.substr(1) + "@"))
583     return s;
584   // Search for x86 vectorcall function.
585   if (Symbol *s = findByPrefix(name.substr(1) + "@@"))
586     return s;
587   // Search for x86 C++ non-member function.
588   return findByPrefix("?" + name.substr(1) + "@@Y");
589 }
590 
591 Symbol *SymbolTable::addUndefined(StringRef name) {
592   return addUndefined(name, nullptr, false);
593 }
594 
595 std::vector<StringRef> SymbolTable::compileBitcodeFiles() {
596   lto.reset(new BitcodeCompiler);
597   for (BitcodeFile *f : BitcodeFile::instances)
598     lto->add(*f);
599   return lto->compile();
600 }
601 
602 void SymbolTable::addCombinedLTOObjects() {
603   if (BitcodeFile::instances.empty())
604     return;
605 
606   ScopedTimer t(ltoTimer);
607   for (StringRef object : compileBitcodeFiles()) {
608     auto *obj = make<ObjFile>(MemoryBufferRef(object, "lto.tmp"));
609     obj->parse();
610     ObjFile::instances.push_back(obj);
611   }
612 }
613 
614 } // namespace coff
615 } // namespace lld
616