1 //===- SymbolTable.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "SymbolTable.h"
10 #include "COFFLinkerContext.h"
11 #include "Config.h"
12 #include "Driver.h"
13 #include "LTO.h"
14 #include "PDB.h"
15 #include "Symbols.h"
16 #include "lld/Common/ErrorHandler.h"
17 #include "lld/Common/Memory.h"
18 #include "lld/Common/Timer.h"
19 #include "llvm/DebugInfo/DIContext.h"
20 #include "llvm/IR/LLVMContext.h"
21 #include "llvm/LTO/LTO.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/TimeProfiler.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include <utility>
26
27 using namespace llvm;
28
29 namespace lld::coff {
30
ltrim1(StringRef s,const char * chars)31 StringRef ltrim1(StringRef s, const char *chars) {
32 if (!s.empty() && strchr(chars, s[0]))
33 return s.substr(1);
34 return s;
35 }
36
compatibleMachineType(COFFLinkerContext & ctx,MachineTypes mt)37 static bool compatibleMachineType(COFFLinkerContext &ctx, MachineTypes mt) {
38 if (mt == IMAGE_FILE_MACHINE_UNKNOWN)
39 return true;
40 switch (ctx.config.machine) {
41 case ARM64:
42 return mt == ARM64 || mt == ARM64X;
43 case ARM64EC:
44 return COFF::isArm64EC(mt) || mt == AMD64;
45 case ARM64X:
46 return COFF::isAnyArm64(mt) || mt == AMD64;
47 default:
48 return ctx.config.machine == mt;
49 }
50 }
51
addFile(InputFile * file)52 void SymbolTable::addFile(InputFile *file) {
53 log("Reading " + toString(file));
54 if (file->lazy) {
55 if (auto *f = dyn_cast<BitcodeFile>(file))
56 f->parseLazy();
57 else
58 cast<ObjFile>(file)->parseLazy();
59 } else {
60 file->parse();
61 if (auto *f = dyn_cast<ObjFile>(file)) {
62 ctx.objFileInstances.push_back(f);
63 } else if (auto *f = dyn_cast<BitcodeFile>(file)) {
64 if (ltoCompilationDone) {
65 error("LTO object file " + toString(file) + " linked in after "
66 "doing LTO compilation.");
67 }
68 ctx.bitcodeFileInstances.push_back(f);
69 } else if (auto *f = dyn_cast<ImportFile>(file)) {
70 ctx.importFileInstances.push_back(f);
71 }
72 }
73
74 MachineTypes mt = file->getMachineType();
75 if (ctx.config.machine == IMAGE_FILE_MACHINE_UNKNOWN) {
76 ctx.config.machine = mt;
77 ctx.driver.addWinSysRootLibSearchPaths();
78 } else if (!compatibleMachineType(ctx, mt)) {
79 error(toString(file) + ": machine type " + machineToStr(mt) +
80 " conflicts with " + machineToStr(ctx.config.machine));
81 return;
82 }
83
84 ctx.driver.parseDirectives(file);
85 }
86
errorOrWarn(const Twine & s,bool forceUnresolved)87 static void errorOrWarn(const Twine &s, bool forceUnresolved) {
88 if (forceUnresolved)
89 warn(s);
90 else
91 error(s);
92 }
93
94 // Causes the file associated with a lazy symbol to be linked in.
forceLazy(Symbol * s)95 static void forceLazy(Symbol *s) {
96 s->pendingArchiveLoad = true;
97 switch (s->kind()) {
98 case Symbol::Kind::LazyArchiveKind: {
99 auto *l = cast<LazyArchive>(s);
100 l->file->addMember(l->sym);
101 break;
102 }
103 case Symbol::Kind::LazyObjectKind: {
104 InputFile *file = cast<LazyObject>(s)->file;
105 file->ctx.symtab.addFile(file);
106 break;
107 }
108 case Symbol::Kind::LazyDLLSymbolKind: {
109 auto *l = cast<LazyDLLSymbol>(s);
110 l->file->makeImport(l->sym);
111 break;
112 }
113 default:
114 llvm_unreachable(
115 "symbol passed to forceLazy is not a LazyArchive or LazyObject");
116 }
117 }
118
119 // Returns the symbol in SC whose value is <= Addr that is closest to Addr.
120 // This is generally the global variable or function whose definition contains
121 // Addr.
getSymbol(SectionChunk * sc,uint32_t addr)122 static Symbol *getSymbol(SectionChunk *sc, uint32_t addr) {
123 DefinedRegular *candidate = nullptr;
124
125 for (Symbol *s : sc->file->getSymbols()) {
126 auto *d = dyn_cast_or_null<DefinedRegular>(s);
127 if (!d || !d->data || d->file != sc->file || d->getChunk() != sc ||
128 d->getValue() > addr ||
129 (candidate && d->getValue() < candidate->getValue()))
130 continue;
131
132 candidate = d;
133 }
134
135 return candidate;
136 }
137
getSymbolLocations(BitcodeFile * file)138 static std::vector<std::string> getSymbolLocations(BitcodeFile *file) {
139 std::string res("\n>>> referenced by ");
140 StringRef source = file->obj->getSourceFileName();
141 if (!source.empty())
142 res += source.str() + "\n>>> ";
143 res += toString(file);
144 return {res};
145 }
146
147 static std::optional<std::pair<StringRef, uint32_t>>
getFileLineDwarf(const SectionChunk * c,uint32_t addr)148 getFileLineDwarf(const SectionChunk *c, uint32_t addr) {
149 std::optional<DILineInfo> optionalLineInfo =
150 c->file->getDILineInfo(addr, c->getSectionNumber() - 1);
151 if (!optionalLineInfo)
152 return std::nullopt;
153 const DILineInfo &lineInfo = *optionalLineInfo;
154 if (lineInfo.FileName == DILineInfo::BadString)
155 return std::nullopt;
156 return std::make_pair(saver().save(lineInfo.FileName), lineInfo.Line);
157 }
158
159 static std::optional<std::pair<StringRef, uint32_t>>
getFileLine(const SectionChunk * c,uint32_t addr)160 getFileLine(const SectionChunk *c, uint32_t addr) {
161 // MinGW can optionally use codeview, even if the default is dwarf.
162 std::optional<std::pair<StringRef, uint32_t>> fileLine =
163 getFileLineCodeView(c, addr);
164 // If codeview didn't yield any result, check dwarf in MinGW mode.
165 if (!fileLine && c->file->ctx.config.mingw)
166 fileLine = getFileLineDwarf(c, addr);
167 return fileLine;
168 }
169
170 // Given a file and the index of a symbol in that file, returns a description
171 // of all references to that symbol from that file. If no debug information is
172 // available, returns just the name of the file, else one string per actual
173 // reference as described in the debug info.
174 // Returns up to maxStrings string descriptions, along with the total number of
175 // locations found.
176 static std::pair<std::vector<std::string>, size_t>
getSymbolLocations(ObjFile * file,uint32_t symIndex,size_t maxStrings)177 getSymbolLocations(ObjFile *file, uint32_t symIndex, size_t maxStrings) {
178 struct Location {
179 Symbol *sym;
180 std::pair<StringRef, uint32_t> fileLine;
181 };
182 std::vector<Location> locations;
183 size_t numLocations = 0;
184
185 for (Chunk *c : file->getChunks()) {
186 auto *sc = dyn_cast<SectionChunk>(c);
187 if (!sc)
188 continue;
189 for (const coff_relocation &r : sc->getRelocs()) {
190 if (r.SymbolTableIndex != symIndex)
191 continue;
192 numLocations++;
193 if (locations.size() >= maxStrings)
194 continue;
195
196 std::optional<std::pair<StringRef, uint32_t>> fileLine =
197 getFileLine(sc, r.VirtualAddress);
198 Symbol *sym = getSymbol(sc, r.VirtualAddress);
199 if (fileLine)
200 locations.push_back({sym, *fileLine});
201 else if (sym)
202 locations.push_back({sym, {"", 0}});
203 }
204 }
205
206 if (maxStrings == 0)
207 return std::make_pair(std::vector<std::string>(), numLocations);
208
209 if (numLocations == 0)
210 return std::make_pair(
211 std::vector<std::string>{"\n>>> referenced by " + toString(file)}, 1);
212
213 std::vector<std::string> symbolLocations(locations.size());
214 size_t i = 0;
215 for (Location loc : locations) {
216 llvm::raw_string_ostream os(symbolLocations[i++]);
217 os << "\n>>> referenced by ";
218 if (!loc.fileLine.first.empty())
219 os << loc.fileLine.first << ":" << loc.fileLine.second
220 << "\n>>> ";
221 os << toString(file);
222 if (loc.sym)
223 os << ":(" << toString(file->ctx, *loc.sym) << ')';
224 }
225 return std::make_pair(symbolLocations, numLocations);
226 }
227
getSymbolLocations(ObjFile * file,uint32_t symIndex)228 std::vector<std::string> getSymbolLocations(ObjFile *file, uint32_t symIndex) {
229 return getSymbolLocations(file, symIndex, SIZE_MAX).first;
230 }
231
232 static std::pair<std::vector<std::string>, size_t>
getSymbolLocations(InputFile * file,uint32_t symIndex,size_t maxStrings)233 getSymbolLocations(InputFile *file, uint32_t symIndex, size_t maxStrings) {
234 if (auto *o = dyn_cast<ObjFile>(file))
235 return getSymbolLocations(o, symIndex, maxStrings);
236 if (auto *b = dyn_cast<BitcodeFile>(file)) {
237 std::vector<std::string> symbolLocations = getSymbolLocations(b);
238 size_t numLocations = symbolLocations.size();
239 if (symbolLocations.size() > maxStrings)
240 symbolLocations.resize(maxStrings);
241 return std::make_pair(symbolLocations, numLocations);
242 }
243 llvm_unreachable("unsupported file type passed to getSymbolLocations");
244 return std::make_pair(std::vector<std::string>(), (size_t)0);
245 }
246
247 // For an undefined symbol, stores all files referencing it and the index of
248 // the undefined symbol in each file.
249 struct UndefinedDiag {
250 Symbol *sym;
251 struct File {
252 InputFile *file;
253 uint32_t symIndex;
254 };
255 std::vector<File> files;
256 };
257
reportUndefinedSymbol(const COFFLinkerContext & ctx,const UndefinedDiag & undefDiag)258 static void reportUndefinedSymbol(const COFFLinkerContext &ctx,
259 const UndefinedDiag &undefDiag) {
260 std::string out;
261 llvm::raw_string_ostream os(out);
262 os << "undefined symbol: " << toString(ctx, *undefDiag.sym);
263
264 const size_t maxUndefReferences = 3;
265 size_t numDisplayedRefs = 0, numRefs = 0;
266 for (const UndefinedDiag::File &ref : undefDiag.files) {
267 auto [symbolLocations, totalLocations] = getSymbolLocations(
268 ref.file, ref.symIndex, maxUndefReferences - numDisplayedRefs);
269
270 numRefs += totalLocations;
271 numDisplayedRefs += symbolLocations.size();
272 for (const std::string &s : symbolLocations) {
273 os << s;
274 }
275 }
276 if (numDisplayedRefs < numRefs)
277 os << "\n>>> referenced " << numRefs - numDisplayedRefs << " more times";
278 errorOrWarn(os.str(), ctx.config.forceUnresolved);
279 }
280
loadMinGWSymbols()281 void SymbolTable::loadMinGWSymbols() {
282 for (auto &i : symMap) {
283 Symbol *sym = i.second;
284 auto *undef = dyn_cast<Undefined>(sym);
285 if (!undef)
286 continue;
287 if (undef->getWeakAlias())
288 continue;
289
290 StringRef name = undef->getName();
291
292 if (ctx.config.machine == I386 && ctx.config.stdcallFixup) {
293 // Check if we can resolve an undefined decorated symbol by finding
294 // the intended target as an undecorated symbol (only with a leading
295 // underscore).
296 StringRef origName = name;
297 StringRef baseName = name;
298 // Trim down stdcall/fastcall/vectorcall symbols to the base name.
299 baseName = ltrim1(baseName, "_@");
300 baseName = baseName.substr(0, baseName.find('@'));
301 // Add a leading underscore, as it would be in cdecl form.
302 std::string newName = ("_" + baseName).str();
303 Symbol *l;
304 if (newName != origName && (l = find(newName)) != nullptr) {
305 // If we found a symbol and it is lazy; load it.
306 if (l->isLazy() && !l->pendingArchiveLoad) {
307 log("Loading lazy " + l->getName() + " from " +
308 l->getFile()->getName() + " for stdcall fixup");
309 forceLazy(l);
310 }
311 // If it's lazy or already defined, hook it up as weak alias.
312 if (l->isLazy() || isa<Defined>(l)) {
313 if (ctx.config.warnStdcallFixup)
314 warn("Resolving " + origName + " by linking to " + newName);
315 else
316 log("Resolving " + origName + " by linking to " + newName);
317 undef->weakAlias = l;
318 continue;
319 }
320 }
321 }
322
323 if (ctx.config.autoImport) {
324 if (name.starts_with("__imp_"))
325 continue;
326 // If we have an undefined symbol, but we have a lazy symbol we could
327 // load, load it.
328 Symbol *l = find(("__imp_" + name).str());
329 if (!l || l->pendingArchiveLoad || !l->isLazy())
330 continue;
331
332 log("Loading lazy " + l->getName() + " from " + l->getFile()->getName() +
333 " for automatic import");
334 forceLazy(l);
335 }
336 }
337 }
338
impSymbol(StringRef name)339 Defined *SymbolTable::impSymbol(StringRef name) {
340 if (name.starts_with("__imp_"))
341 return nullptr;
342 return dyn_cast_or_null<Defined>(find(("__imp_" + name).str()));
343 }
344
handleMinGWAutomaticImport(Symbol * sym,StringRef name)345 bool SymbolTable::handleMinGWAutomaticImport(Symbol *sym, StringRef name) {
346 Defined *imp = impSymbol(name);
347 if (!imp)
348 return false;
349
350 // Replace the reference directly to a variable with a reference
351 // to the import address table instead. This obviously isn't right,
352 // but we mark the symbol as isRuntimePseudoReloc, and a later pass
353 // will add runtime pseudo relocations for every relocation against
354 // this Symbol. The runtime pseudo relocation framework expects the
355 // reference itself to point at the IAT entry.
356 size_t impSize = 0;
357 if (isa<DefinedImportData>(imp)) {
358 log("Automatically importing " + name + " from " +
359 cast<DefinedImportData>(imp)->getDLLName());
360 impSize = sizeof(DefinedImportData);
361 } else if (isa<DefinedRegular>(imp)) {
362 log("Automatically importing " + name + " from " +
363 toString(cast<DefinedRegular>(imp)->file));
364 impSize = sizeof(DefinedRegular);
365 } else {
366 warn("unable to automatically import " + name + " from " + imp->getName() +
367 " from " + toString(cast<DefinedRegular>(imp)->file) +
368 "; unexpected symbol type");
369 return false;
370 }
371 sym->replaceKeepingName(imp, impSize);
372 sym->isRuntimePseudoReloc = true;
373
374 // There may exist symbols named .refptr.<name> which only consist
375 // of a single pointer to <name>. If it turns out <name> is
376 // automatically imported, we don't need to keep the .refptr.<name>
377 // pointer at all, but redirect all accesses to it to the IAT entry
378 // for __imp_<name> instead, and drop the whole .refptr.<name> chunk.
379 DefinedRegular *refptr =
380 dyn_cast_or_null<DefinedRegular>(find((".refptr." + name).str()));
381 if (refptr && refptr->getChunk()->getSize() == ctx.config.wordsize) {
382 SectionChunk *sc = dyn_cast_or_null<SectionChunk>(refptr->getChunk());
383 if (sc && sc->getRelocs().size() == 1 && *sc->symbols().begin() == sym) {
384 log("Replacing .refptr." + name + " with " + imp->getName());
385 refptr->getChunk()->live = false;
386 refptr->replaceKeepingName(imp, impSize);
387 }
388 }
389 return true;
390 }
391
392 /// Helper function for reportUnresolvable and resolveRemainingUndefines.
393 /// This function emits an "undefined symbol" diagnostic for each symbol in
394 /// undefs. If localImports is not nullptr, it also emits a "locally
395 /// defined symbol imported" diagnostic for symbols in localImports.
396 /// objFiles and bitcodeFiles (if not nullptr) are used to report where
397 /// undefined symbols are referenced.
reportProblemSymbols(const COFFLinkerContext & ctx,const SmallPtrSetImpl<Symbol * > & undefs,const DenseMap<Symbol *,Symbol * > * localImports,bool needBitcodeFiles)398 static void reportProblemSymbols(
399 const COFFLinkerContext &ctx, const SmallPtrSetImpl<Symbol *> &undefs,
400 const DenseMap<Symbol *, Symbol *> *localImports, bool needBitcodeFiles) {
401 // Return early if there is nothing to report (which should be
402 // the common case).
403 if (undefs.empty() && (!localImports || localImports->empty()))
404 return;
405
406 for (Symbol *b : ctx.config.gcroot) {
407 if (undefs.count(b))
408 errorOrWarn("<root>: undefined symbol: " + toString(ctx, *b),
409 ctx.config.forceUnresolved);
410 if (localImports)
411 if (Symbol *imp = localImports->lookup(b))
412 warn("<root>: locally defined symbol imported: " + toString(ctx, *imp) +
413 " (defined in " + toString(imp->getFile()) + ") [LNK4217]");
414 }
415
416 std::vector<UndefinedDiag> undefDiags;
417 DenseMap<Symbol *, int> firstDiag;
418
419 auto processFile = [&](InputFile *file, ArrayRef<Symbol *> symbols) {
420 uint32_t symIndex = (uint32_t)-1;
421 for (Symbol *sym : symbols) {
422 ++symIndex;
423 if (!sym)
424 continue;
425 if (undefs.count(sym)) {
426 auto it = firstDiag.find(sym);
427 if (it == firstDiag.end()) {
428 firstDiag[sym] = undefDiags.size();
429 undefDiags.push_back({sym, {{file, symIndex}}});
430 } else {
431 undefDiags[it->second].files.push_back({file, symIndex});
432 }
433 }
434 if (localImports)
435 if (Symbol *imp = localImports->lookup(sym))
436 warn(toString(file) +
437 ": locally defined symbol imported: " + toString(ctx, *imp) +
438 " (defined in " + toString(imp->getFile()) + ") [LNK4217]");
439 }
440 };
441
442 for (ObjFile *file : ctx.objFileInstances)
443 processFile(file, file->getSymbols());
444
445 if (needBitcodeFiles)
446 for (BitcodeFile *file : ctx.bitcodeFileInstances)
447 processFile(file, file->getSymbols());
448
449 for (const UndefinedDiag &undefDiag : undefDiags)
450 reportUndefinedSymbol(ctx, undefDiag);
451 }
452
reportUnresolvable()453 void SymbolTable::reportUnresolvable() {
454 SmallPtrSet<Symbol *, 8> undefs;
455 for (auto &i : symMap) {
456 Symbol *sym = i.second;
457 auto *undef = dyn_cast<Undefined>(sym);
458 if (!undef || sym->deferUndefined)
459 continue;
460 if (undef->getWeakAlias())
461 continue;
462 StringRef name = undef->getName();
463 if (name.starts_with("__imp_")) {
464 Symbol *imp = find(name.substr(strlen("__imp_")));
465 if (Defined *def = dyn_cast_or_null<Defined>(imp)) {
466 def->isUsedInRegularObj = true;
467 continue;
468 }
469 }
470 if (name.contains("_PchSym_"))
471 continue;
472 if (ctx.config.autoImport && impSymbol(name))
473 continue;
474 undefs.insert(sym);
475 }
476
477 reportProblemSymbols(ctx, undefs,
478 /* localImports */ nullptr, true);
479 }
480
resolveRemainingUndefines()481 void SymbolTable::resolveRemainingUndefines() {
482 llvm::TimeTraceScope timeScope("Resolve remaining undefined symbols");
483 SmallPtrSet<Symbol *, 8> undefs;
484 DenseMap<Symbol *, Symbol *> localImports;
485
486 for (auto &i : symMap) {
487 Symbol *sym = i.second;
488 auto *undef = dyn_cast<Undefined>(sym);
489 if (!undef)
490 continue;
491 if (!sym->isUsedInRegularObj)
492 continue;
493
494 StringRef name = undef->getName();
495
496 // A weak alias may have been resolved, so check for that.
497 if (Defined *d = undef->getWeakAlias()) {
498 // We want to replace Sym with D. However, we can't just blindly
499 // copy sizeof(SymbolUnion) bytes from D to Sym because D may be an
500 // internal symbol, and internal symbols are stored as "unparented"
501 // Symbols. For that reason we need to check which type of symbol we
502 // are dealing with and copy the correct number of bytes.
503 if (isa<DefinedRegular>(d))
504 memcpy(sym, d, sizeof(DefinedRegular));
505 else if (isa<DefinedAbsolute>(d))
506 memcpy(sym, d, sizeof(DefinedAbsolute));
507 else
508 memcpy(sym, d, sizeof(SymbolUnion));
509 continue;
510 }
511
512 // If we can resolve a symbol by removing __imp_ prefix, do that.
513 // This odd rule is for compatibility with MSVC linker.
514 if (name.starts_with("__imp_")) {
515 Symbol *imp = find(name.substr(strlen("__imp_")));
516 if (imp && isa<Defined>(imp)) {
517 auto *d = cast<Defined>(imp);
518 replaceSymbol<DefinedLocalImport>(sym, ctx, name, d);
519 localImportChunks.push_back(cast<DefinedLocalImport>(sym)->getChunk());
520 localImports[sym] = d;
521 continue;
522 }
523 }
524
525 // We don't want to report missing Microsoft precompiled headers symbols.
526 // A proper message will be emitted instead in PDBLinker::aquirePrecompObj
527 if (name.contains("_PchSym_"))
528 continue;
529
530 if (ctx.config.autoImport && handleMinGWAutomaticImport(sym, name))
531 continue;
532
533 // Remaining undefined symbols are not fatal if /force is specified.
534 // They are replaced with dummy defined symbols.
535 if (ctx.config.forceUnresolved)
536 replaceSymbol<DefinedAbsolute>(sym, ctx, name, 0);
537 undefs.insert(sym);
538 }
539
540 reportProblemSymbols(
541 ctx, undefs,
542 ctx.config.warnLocallyDefinedImported ? &localImports : nullptr, false);
543 }
544
insert(StringRef name)545 std::pair<Symbol *, bool> SymbolTable::insert(StringRef name) {
546 bool inserted = false;
547 Symbol *&sym = symMap[CachedHashStringRef(name)];
548 if (!sym) {
549 sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
550 sym->isUsedInRegularObj = false;
551 sym->pendingArchiveLoad = false;
552 sym->canInline = true;
553 inserted = true;
554 }
555 return {sym, inserted};
556 }
557
insert(StringRef name,InputFile * file)558 std::pair<Symbol *, bool> SymbolTable::insert(StringRef name, InputFile *file) {
559 std::pair<Symbol *, bool> result = insert(name);
560 if (!file || !isa<BitcodeFile>(file))
561 result.first->isUsedInRegularObj = true;
562 return result;
563 }
564
addEntryThunk(Symbol * from,Symbol * to)565 void SymbolTable::addEntryThunk(Symbol *from, Symbol *to) {
566 entryThunks.push_back({from, to});
567 }
568
initializeEntryThunks()569 void SymbolTable::initializeEntryThunks() {
570 for (auto it : entryThunks) {
571 auto *to = dyn_cast<Defined>(it.second);
572 if (!to)
573 continue;
574 auto *from = dyn_cast<DefinedRegular>(it.first);
575 // We need to be able to add padding to the function and fill it with an
576 // offset to its entry thunks. To ensure that padding the function is
577 // feasible, functions are required to be COMDAT symbols with no offset.
578 if (!from || !from->getChunk()->isCOMDAT() ||
579 cast<DefinedRegular>(from)->getValue()) {
580 error("non COMDAT symbol '" + from->getName() + "' in hybrid map");
581 continue;
582 }
583 from->getChunk()->setEntryThunk(to);
584 }
585 }
586
addUndefined(StringRef name,InputFile * f,bool isWeakAlias)587 Symbol *SymbolTable::addUndefined(StringRef name, InputFile *f,
588 bool isWeakAlias) {
589 auto [s, wasInserted] = insert(name, f);
590 if (wasInserted || (s->isLazy() && isWeakAlias)) {
591 replaceSymbol<Undefined>(s, name);
592 return s;
593 }
594 if (s->isLazy())
595 forceLazy(s);
596 return s;
597 }
598
addLazyArchive(ArchiveFile * f,const Archive::Symbol & sym)599 void SymbolTable::addLazyArchive(ArchiveFile *f, const Archive::Symbol &sym) {
600 StringRef name = sym.getName();
601 auto [s, wasInserted] = insert(name);
602 if (wasInserted) {
603 replaceSymbol<LazyArchive>(s, f, sym);
604 return;
605 }
606 auto *u = dyn_cast<Undefined>(s);
607 if (!u || u->weakAlias || s->pendingArchiveLoad)
608 return;
609 s->pendingArchiveLoad = true;
610 f->addMember(sym);
611 }
612
addLazyObject(InputFile * f,StringRef n)613 void SymbolTable::addLazyObject(InputFile *f, StringRef n) {
614 assert(f->lazy);
615 auto [s, wasInserted] = insert(n, f);
616 if (wasInserted) {
617 replaceSymbol<LazyObject>(s, f, n);
618 return;
619 }
620 auto *u = dyn_cast<Undefined>(s);
621 if (!u || u->weakAlias || s->pendingArchiveLoad)
622 return;
623 s->pendingArchiveLoad = true;
624 f->lazy = false;
625 addFile(f);
626 }
627
addLazyDLLSymbol(DLLFile * f,DLLFile::Symbol * sym,StringRef n)628 void SymbolTable::addLazyDLLSymbol(DLLFile *f, DLLFile::Symbol *sym,
629 StringRef n) {
630 auto [s, wasInserted] = insert(n);
631 if (wasInserted) {
632 replaceSymbol<LazyDLLSymbol>(s, f, sym, n);
633 return;
634 }
635 auto *u = dyn_cast<Undefined>(s);
636 if (!u || u->weakAlias || s->pendingArchiveLoad)
637 return;
638 s->pendingArchiveLoad = true;
639 f->makeImport(sym);
640 }
641
getSourceLocationBitcode(BitcodeFile * file)642 static std::string getSourceLocationBitcode(BitcodeFile *file) {
643 std::string res("\n>>> defined at ");
644 StringRef source = file->obj->getSourceFileName();
645 if (!source.empty())
646 res += source.str() + "\n>>> ";
647 res += toString(file);
648 return res;
649 }
650
getSourceLocationObj(ObjFile * file,SectionChunk * sc,uint32_t offset,StringRef name)651 static std::string getSourceLocationObj(ObjFile *file, SectionChunk *sc,
652 uint32_t offset, StringRef name) {
653 std::optional<std::pair<StringRef, uint32_t>> fileLine;
654 if (sc)
655 fileLine = getFileLine(sc, offset);
656 if (!fileLine)
657 fileLine = file->getVariableLocation(name);
658
659 std::string res;
660 llvm::raw_string_ostream os(res);
661 os << "\n>>> defined at ";
662 if (fileLine)
663 os << fileLine->first << ":" << fileLine->second << "\n>>> ";
664 os << toString(file);
665 return os.str();
666 }
667
getSourceLocation(InputFile * file,SectionChunk * sc,uint32_t offset,StringRef name)668 static std::string getSourceLocation(InputFile *file, SectionChunk *sc,
669 uint32_t offset, StringRef name) {
670 if (!file)
671 return "";
672 if (auto *o = dyn_cast<ObjFile>(file))
673 return getSourceLocationObj(o, sc, offset, name);
674 if (auto *b = dyn_cast<BitcodeFile>(file))
675 return getSourceLocationBitcode(b);
676 return "\n>>> defined at " + toString(file);
677 }
678
679 // Construct and print an error message in the form of:
680 //
681 // lld-link: error: duplicate symbol: foo
682 // >>> defined at bar.c:30
683 // >>> bar.o
684 // >>> defined at baz.c:563
685 // >>> baz.o
reportDuplicate(Symbol * existing,InputFile * newFile,SectionChunk * newSc,uint32_t newSectionOffset)686 void SymbolTable::reportDuplicate(Symbol *existing, InputFile *newFile,
687 SectionChunk *newSc,
688 uint32_t newSectionOffset) {
689 std::string msg;
690 llvm::raw_string_ostream os(msg);
691 os << "duplicate symbol: " << toString(ctx, *existing);
692
693 DefinedRegular *d = dyn_cast<DefinedRegular>(existing);
694 if (d && isa<ObjFile>(d->getFile())) {
695 os << getSourceLocation(d->getFile(), d->getChunk(), d->getValue(),
696 existing->getName());
697 } else {
698 os << getSourceLocation(existing->getFile(), nullptr, 0, "");
699 }
700 os << getSourceLocation(newFile, newSc, newSectionOffset,
701 existing->getName());
702
703 if (ctx.config.forceMultiple)
704 warn(os.str());
705 else
706 error(os.str());
707 }
708
addAbsolute(StringRef n,COFFSymbolRef sym)709 Symbol *SymbolTable::addAbsolute(StringRef n, COFFSymbolRef sym) {
710 auto [s, wasInserted] = insert(n, nullptr);
711 s->isUsedInRegularObj = true;
712 if (wasInserted || isa<Undefined>(s) || s->isLazy())
713 replaceSymbol<DefinedAbsolute>(s, ctx, n, sym);
714 else if (auto *da = dyn_cast<DefinedAbsolute>(s)) {
715 if (da->getVA() != sym.getValue())
716 reportDuplicate(s, nullptr);
717 } else if (!isa<DefinedCOFF>(s))
718 reportDuplicate(s, nullptr);
719 return s;
720 }
721
addAbsolute(StringRef n,uint64_t va)722 Symbol *SymbolTable::addAbsolute(StringRef n, uint64_t va) {
723 auto [s, wasInserted] = insert(n, nullptr);
724 s->isUsedInRegularObj = true;
725 if (wasInserted || isa<Undefined>(s) || s->isLazy())
726 replaceSymbol<DefinedAbsolute>(s, ctx, n, va);
727 else if (auto *da = dyn_cast<DefinedAbsolute>(s)) {
728 if (da->getVA() != va)
729 reportDuplicate(s, nullptr);
730 } else if (!isa<DefinedCOFF>(s))
731 reportDuplicate(s, nullptr);
732 return s;
733 }
734
addSynthetic(StringRef n,Chunk * c)735 Symbol *SymbolTable::addSynthetic(StringRef n, Chunk *c) {
736 auto [s, wasInserted] = insert(n, nullptr);
737 s->isUsedInRegularObj = true;
738 if (wasInserted || isa<Undefined>(s) || s->isLazy())
739 replaceSymbol<DefinedSynthetic>(s, n, c);
740 else if (!isa<DefinedCOFF>(s))
741 reportDuplicate(s, nullptr);
742 return s;
743 }
744
addRegular(InputFile * f,StringRef n,const coff_symbol_generic * sym,SectionChunk * c,uint32_t sectionOffset,bool isWeak)745 Symbol *SymbolTable::addRegular(InputFile *f, StringRef n,
746 const coff_symbol_generic *sym, SectionChunk *c,
747 uint32_t sectionOffset, bool isWeak) {
748 auto [s, wasInserted] = insert(n, f);
749 if (wasInserted || !isa<DefinedRegular>(s) || s->isWeak)
750 replaceSymbol<DefinedRegular>(s, f, n, /*IsCOMDAT*/ false,
751 /*IsExternal*/ true, sym, c, isWeak);
752 else if (!isWeak)
753 reportDuplicate(s, f, c, sectionOffset);
754 return s;
755 }
756
757 std::pair<DefinedRegular *, bool>
addComdat(InputFile * f,StringRef n,const coff_symbol_generic * sym)758 SymbolTable::addComdat(InputFile *f, StringRef n,
759 const coff_symbol_generic *sym) {
760 auto [s, wasInserted] = insert(n, f);
761 if (wasInserted || !isa<DefinedRegular>(s)) {
762 replaceSymbol<DefinedRegular>(s, f, n, /*IsCOMDAT*/ true,
763 /*IsExternal*/ true, sym, nullptr);
764 return {cast<DefinedRegular>(s), true};
765 }
766 auto *existingSymbol = cast<DefinedRegular>(s);
767 if (!existingSymbol->isCOMDAT)
768 reportDuplicate(s, f);
769 return {existingSymbol, false};
770 }
771
addCommon(InputFile * f,StringRef n,uint64_t size,const coff_symbol_generic * sym,CommonChunk * c)772 Symbol *SymbolTable::addCommon(InputFile *f, StringRef n, uint64_t size,
773 const coff_symbol_generic *sym, CommonChunk *c) {
774 auto [s, wasInserted] = insert(n, f);
775 if (wasInserted || !isa<DefinedCOFF>(s))
776 replaceSymbol<DefinedCommon>(s, f, n, size, sym, c);
777 else if (auto *dc = dyn_cast<DefinedCommon>(s))
778 if (size > dc->getSize())
779 replaceSymbol<DefinedCommon>(s, f, n, size, sym, c);
780 return s;
781 }
782
addImportData(StringRef n,ImportFile * f)783 Symbol *SymbolTable::addImportData(StringRef n, ImportFile *f) {
784 auto [s, wasInserted] = insert(n, nullptr);
785 s->isUsedInRegularObj = true;
786 if (wasInserted || isa<Undefined>(s) || s->isLazy()) {
787 replaceSymbol<DefinedImportData>(s, n, f);
788 return s;
789 }
790
791 reportDuplicate(s, f);
792 return nullptr;
793 }
794
addImportThunk(StringRef name,DefinedImportData * id,uint16_t machine)795 Symbol *SymbolTable::addImportThunk(StringRef name, DefinedImportData *id,
796 uint16_t machine) {
797 auto [s, wasInserted] = insert(name, nullptr);
798 s->isUsedInRegularObj = true;
799 if (wasInserted || isa<Undefined>(s) || s->isLazy()) {
800 replaceSymbol<DefinedImportThunk>(s, ctx, name, id, machine);
801 return s;
802 }
803
804 reportDuplicate(s, id->file);
805 return nullptr;
806 }
807
addLibcall(StringRef name)808 void SymbolTable::addLibcall(StringRef name) {
809 Symbol *sym = findUnderscore(name);
810 if (!sym)
811 return;
812
813 if (auto *l = dyn_cast<LazyArchive>(sym)) {
814 MemoryBufferRef mb = l->getMemberBuffer();
815 if (isBitcode(mb))
816 addUndefined(sym->getName());
817 } else if (LazyObject *o = dyn_cast<LazyObject>(sym)) {
818 if (isBitcode(o->file->mb))
819 addUndefined(sym->getName());
820 }
821 }
822
getChunks() const823 std::vector<Chunk *> SymbolTable::getChunks() const {
824 std::vector<Chunk *> res;
825 for (ObjFile *file : ctx.objFileInstances) {
826 ArrayRef<Chunk *> v = file->getChunks();
827 res.insert(res.end(), v.begin(), v.end());
828 }
829 return res;
830 }
831
find(StringRef name) const832 Symbol *SymbolTable::find(StringRef name) const {
833 return symMap.lookup(CachedHashStringRef(name));
834 }
835
findUnderscore(StringRef name) const836 Symbol *SymbolTable::findUnderscore(StringRef name) const {
837 if (ctx.config.machine == I386)
838 return find(("_" + name).str());
839 return find(name);
840 }
841
842 // Return all symbols that start with Prefix, possibly ignoring the first
843 // character of Prefix or the first character symbol.
getSymsWithPrefix(StringRef prefix)844 std::vector<Symbol *> SymbolTable::getSymsWithPrefix(StringRef prefix) {
845 std::vector<Symbol *> syms;
846 for (auto pair : symMap) {
847 StringRef name = pair.first.val();
848 if (name.starts_with(prefix) || name.starts_with(prefix.drop_front()) ||
849 name.drop_front().starts_with(prefix) ||
850 name.drop_front().starts_with(prefix.drop_front())) {
851 syms.push_back(pair.second);
852 }
853 }
854 return syms;
855 }
856
findMangle(StringRef name)857 Symbol *SymbolTable::findMangle(StringRef name) {
858 if (Symbol *sym = find(name)) {
859 if (auto *u = dyn_cast<Undefined>(sym)) {
860 // We're specifically looking for weak aliases that ultimately resolve to
861 // defined symbols, hence the call to getWeakAlias() instead of just using
862 // the weakAlias member variable. This matches link.exe's behavior.
863 if (Symbol *weakAlias = u->getWeakAlias())
864 return weakAlias;
865 } else {
866 return sym;
867 }
868 }
869
870 // Efficient fuzzy string lookup is impossible with a hash table, so iterate
871 // the symbol table once and collect all possibly matching symbols into this
872 // vector. Then compare each possibly matching symbol with each possible
873 // mangling.
874 std::vector<Symbol *> syms = getSymsWithPrefix(name);
875 auto findByPrefix = [&syms](const Twine &t) -> Symbol * {
876 std::string prefix = t.str();
877 for (auto *s : syms)
878 if (s->getName().starts_with(prefix))
879 return s;
880 return nullptr;
881 };
882
883 // For non-x86, just look for C++ functions.
884 if (ctx.config.machine != I386)
885 return findByPrefix("?" + name + "@@Y");
886
887 if (!name.starts_with("_"))
888 return nullptr;
889 // Search for x86 stdcall function.
890 if (Symbol *s = findByPrefix(name + "@"))
891 return s;
892 // Search for x86 fastcall function.
893 if (Symbol *s = findByPrefix("@" + name.substr(1) + "@"))
894 return s;
895 // Search for x86 vectorcall function.
896 if (Symbol *s = findByPrefix(name.substr(1) + "@@"))
897 return s;
898 // Search for x86 C++ non-member function.
899 return findByPrefix("?" + name.substr(1) + "@@Y");
900 }
901
addUndefined(StringRef name)902 Symbol *SymbolTable::addUndefined(StringRef name) {
903 return addUndefined(name, nullptr, false);
904 }
905
compileBitcodeFiles()906 void SymbolTable::compileBitcodeFiles() {
907 ltoCompilationDone = true;
908 if (ctx.bitcodeFileInstances.empty())
909 return;
910
911 llvm::TimeTraceScope timeScope("Compile bitcode");
912 ScopedTimer t(ctx.ltoTimer);
913 lto.reset(new BitcodeCompiler(ctx));
914 for (BitcodeFile *f : ctx.bitcodeFileInstances)
915 lto->add(*f);
916 for (InputFile *newObj : lto->compile()) {
917 ObjFile *obj = cast<ObjFile>(newObj);
918 obj->parse();
919 ctx.objFileInstances.push_back(obj);
920 }
921 }
922
923 } // namespace lld::coff
924