1 //===- SymbolTable.cpp ----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "SymbolTable.h" 10 #include "Config.h" 11 #include "Driver.h" 12 #include "LTO.h" 13 #include "PDB.h" 14 #include "Symbols.h" 15 #include "lld/Common/ErrorHandler.h" 16 #include "lld/Common/Memory.h" 17 #include "lld/Common/Timer.h" 18 #include "llvm/IR/LLVMContext.h" 19 #include "llvm/Object/WindowsMachineFlag.h" 20 #include "llvm/Support/Debug.h" 21 #include "llvm/Support/raw_ostream.h" 22 #include <utility> 23 24 using namespace llvm; 25 26 namespace lld { 27 namespace coff { 28 29 static Timer ltoTimer("LTO", Timer::root()); 30 31 SymbolTable *symtab; 32 33 void SymbolTable::addFile(InputFile *file) { 34 log("Reading " + toString(file)); 35 file->parse(); 36 37 MachineTypes mt = file->getMachineType(); 38 if (config->machine == IMAGE_FILE_MACHINE_UNKNOWN) { 39 config->machine = mt; 40 } else if (mt != IMAGE_FILE_MACHINE_UNKNOWN && config->machine != mt) { 41 error(toString(file) + ": machine type " + machineToStr(mt) + 42 " conflicts with " + machineToStr(config->machine)); 43 return; 44 } 45 46 if (auto *f = dyn_cast<ObjFile>(file)) { 47 ObjFile::instances.push_back(f); 48 } else if (auto *f = dyn_cast<BitcodeFile>(file)) { 49 BitcodeFile::instances.push_back(f); 50 } else if (auto *f = dyn_cast<ImportFile>(file)) { 51 ImportFile::instances.push_back(f); 52 } 53 54 driver->parseDirectives(file); 55 } 56 57 static void errorOrWarn(const Twine &s) { 58 if (config->forceUnresolved) 59 warn(s); 60 else 61 error(s); 62 } 63 64 // Returns the symbol in SC whose value is <= Addr that is closest to Addr. 65 // This is generally the global variable or function whose definition contains 66 // Addr. 67 static Symbol *getSymbol(SectionChunk *sc, uint32_t addr) { 68 DefinedRegular *candidate = nullptr; 69 70 for (Symbol *s : sc->file->getSymbols()) { 71 auto *d = dyn_cast_or_null<DefinedRegular>(s); 72 if (!d || !d->data || d->getChunk() != sc || d->getValue() > addr || 73 (candidate && d->getValue() < candidate->getValue())) 74 continue; 75 76 candidate = d; 77 } 78 79 return candidate; 80 } 81 82 // Given a file and the index of a symbol in that file, returns a description 83 // of all references to that symbol from that file. If no debug information is 84 // available, returns just the name of the file, else one string per actual 85 // reference as described in the debug info. 86 std::vector<std::string> getSymbolLocations(ObjFile *file, uint32_t symIndex) { 87 struct Location { 88 Symbol *sym; 89 std::pair<StringRef, uint32_t> fileLine; 90 }; 91 std::vector<Location> locations; 92 93 for (Chunk *c : file->getChunks()) { 94 auto *sc = dyn_cast<SectionChunk>(c); 95 if (!sc) 96 continue; 97 for (const coff_relocation &r : sc->getRelocs()) { 98 if (r.SymbolTableIndex != symIndex) 99 continue; 100 std::pair<StringRef, uint32_t> fileLine = 101 getFileLine(sc, r.VirtualAddress); 102 Symbol *sym = getSymbol(sc, r.VirtualAddress); 103 if (!fileLine.first.empty() || sym) 104 locations.push_back({sym, fileLine}); 105 } 106 } 107 108 if (locations.empty()) 109 return std::vector<std::string>({"\n>>> referenced by " + toString(file)}); 110 111 std::vector<std::string> symbolLocations(locations.size()); 112 size_t i = 0; 113 for (Location loc : locations) { 114 llvm::raw_string_ostream os(symbolLocations[i++]); 115 os << "\n>>> referenced by "; 116 if (!loc.fileLine.first.empty()) 117 os << loc.fileLine.first << ":" << loc.fileLine.second 118 << "\n>>> "; 119 os << toString(file); 120 if (loc.sym) 121 os << ":(" << toString(*loc.sym) << ')'; 122 } 123 return symbolLocations; 124 } 125 126 // For an undefined symbol, stores all files referencing it and the index of 127 // the undefined symbol in each file. 128 struct UndefinedDiag { 129 Symbol *sym; 130 struct File { 131 ObjFile *oFile; 132 uint64_t symIndex; 133 }; 134 std::vector<File> files; 135 }; 136 137 static void reportUndefinedSymbol(const UndefinedDiag &undefDiag) { 138 std::string out; 139 llvm::raw_string_ostream os(out); 140 os << "undefined symbol: " << toString(*undefDiag.sym); 141 142 const size_t maxUndefReferences = 10; 143 size_t i = 0, numRefs = 0; 144 for (const UndefinedDiag::File &ref : undefDiag.files) { 145 std::vector<std::string> symbolLocations = 146 getSymbolLocations(ref.oFile, ref.symIndex); 147 numRefs += symbolLocations.size(); 148 for (const std::string &s : symbolLocations) { 149 if (i >= maxUndefReferences) 150 break; 151 os << s; 152 i++; 153 } 154 } 155 if (i < numRefs) 156 os << "\n>>> referenced " << numRefs - i << " more times"; 157 errorOrWarn(os.str()); 158 } 159 160 void SymbolTable::loadMinGWAutomaticImports() { 161 for (auto &i : symMap) { 162 Symbol *sym = i.second; 163 auto *undef = dyn_cast<Undefined>(sym); 164 if (!undef) 165 continue; 166 if (!sym->isUsedInRegularObj) 167 continue; 168 169 StringRef name = undef->getName(); 170 171 if (name.startswith("__imp_")) 172 continue; 173 // If we have an undefined symbol, but we have a Lazy representing a 174 // symbol we could load from file, make sure to load that. 175 Lazy *l = dyn_cast_or_null<Lazy>(find(("__imp_" + name).str())); 176 if (!l || l->pendingArchiveLoad) 177 continue; 178 179 log("Loading lazy " + l->getName() + " from " + l->file->getName() + 180 " for automatic import"); 181 l->pendingArchiveLoad = true; 182 l->file->addMember(l->sym); 183 } 184 } 185 186 bool SymbolTable::handleMinGWAutomaticImport(Symbol *sym, StringRef name) { 187 if (name.startswith("__imp_")) 188 return false; 189 Defined *imp = dyn_cast_or_null<Defined>(find(("__imp_" + name).str())); 190 if (!imp) 191 return false; 192 193 // Replace the reference directly to a variable with a reference 194 // to the import address table instead. This obviously isn't right, 195 // but we mark the symbol as isRuntimePseudoReloc, and a later pass 196 // will add runtime pseudo relocations for every relocation against 197 // this Symbol. The runtime pseudo relocation framework expects the 198 // reference itself to point at the IAT entry. 199 size_t impSize = 0; 200 if (isa<DefinedImportData>(imp)) { 201 log("Automatically importing " + name + " from " + 202 cast<DefinedImportData>(imp)->getDLLName()); 203 impSize = sizeof(DefinedImportData); 204 } else if (isa<DefinedRegular>(imp)) { 205 log("Automatically importing " + name + " from " + 206 toString(cast<DefinedRegular>(imp)->file)); 207 impSize = sizeof(DefinedRegular); 208 } else { 209 warn("unable to automatically import " + name + " from " + imp->getName() + 210 " from " + toString(cast<DefinedRegular>(imp)->file) + 211 "; unexpected symbol type"); 212 return false; 213 } 214 sym->replaceKeepingName(imp, impSize); 215 sym->isRuntimePseudoReloc = true; 216 217 // There may exist symbols named .refptr.<name> which only consist 218 // of a single pointer to <name>. If it turns out <name> is 219 // automatically imported, we don't need to keep the .refptr.<name> 220 // pointer at all, but redirect all accesses to it to the IAT entry 221 // for __imp_<name> instead, and drop the whole .refptr.<name> chunk. 222 DefinedRegular *refptr = 223 dyn_cast_or_null<DefinedRegular>(find((".refptr." + name).str())); 224 if (refptr && refptr->getChunk()->getSize() == config->wordsize) { 225 SectionChunk *sc = dyn_cast_or_null<SectionChunk>(refptr->getChunk()); 226 if (sc && sc->getRelocs().size() == 1 && *sc->symbols().begin() == sym) { 227 log("Replacing .refptr." + name + " with " + imp->getName()); 228 refptr->getChunk()->live = false; 229 refptr->replaceKeepingName(imp, impSize); 230 } 231 } 232 return true; 233 } 234 235 void SymbolTable::reportRemainingUndefines() { 236 SmallPtrSet<Symbol *, 8> undefs; 237 DenseMap<Symbol *, Symbol *> localImports; 238 239 for (auto &i : symMap) { 240 Symbol *sym = i.second; 241 auto *undef = dyn_cast<Undefined>(sym); 242 if (!undef) 243 continue; 244 if (!sym->isUsedInRegularObj) 245 continue; 246 247 StringRef name = undef->getName(); 248 249 // A weak alias may have been resolved, so check for that. 250 if (Defined *d = undef->getWeakAlias()) { 251 // We want to replace Sym with D. However, we can't just blindly 252 // copy sizeof(SymbolUnion) bytes from D to Sym because D may be an 253 // internal symbol, and internal symbols are stored as "unparented" 254 // Symbols. For that reason we need to check which type of symbol we 255 // are dealing with and copy the correct number of bytes. 256 if (isa<DefinedRegular>(d)) 257 memcpy(sym, d, sizeof(DefinedRegular)); 258 else if (isa<DefinedAbsolute>(d)) 259 memcpy(sym, d, sizeof(DefinedAbsolute)); 260 else 261 memcpy(sym, d, sizeof(SymbolUnion)); 262 continue; 263 } 264 265 // If we can resolve a symbol by removing __imp_ prefix, do that. 266 // This odd rule is for compatibility with MSVC linker. 267 if (name.startswith("__imp_")) { 268 Symbol *imp = find(name.substr(strlen("__imp_"))); 269 if (imp && isa<Defined>(imp)) { 270 auto *d = cast<Defined>(imp); 271 replaceSymbol<DefinedLocalImport>(sym, name, d); 272 localImportChunks.push_back(cast<DefinedLocalImport>(sym)->getChunk()); 273 localImports[sym] = d; 274 continue; 275 } 276 } 277 278 // We don't want to report missing Microsoft precompiled headers symbols. 279 // A proper message will be emitted instead in PDBLinker::aquirePrecompObj 280 if (name.contains("_PchSym_")) 281 continue; 282 283 if (config->mingw && handleMinGWAutomaticImport(sym, name)) 284 continue; 285 286 // Remaining undefined symbols are not fatal if /force is specified. 287 // They are replaced with dummy defined symbols. 288 if (config->forceUnresolved) 289 replaceSymbol<DefinedAbsolute>(sym, name, 0); 290 undefs.insert(sym); 291 } 292 293 if (undefs.empty() && localImports.empty()) 294 return; 295 296 for (Symbol *b : config->gcroot) { 297 if (undefs.count(b)) 298 errorOrWarn("<root>: undefined symbol: " + toString(*b)); 299 if (config->warnLocallyDefinedImported) 300 if (Symbol *imp = localImports.lookup(b)) 301 warn("<root>: locally defined symbol imported: " + toString(*imp) + 302 " (defined in " + toString(imp->getFile()) + ") [LNK4217]"); 303 } 304 305 std::vector<UndefinedDiag> undefDiags; 306 DenseMap<Symbol *, int> firstDiag; 307 308 for (ObjFile *file : ObjFile::instances) { 309 size_t symIndex = (size_t)-1; 310 for (Symbol *sym : file->getSymbols()) { 311 ++symIndex; 312 if (!sym) 313 continue; 314 if (undefs.count(sym)) { 315 auto it = firstDiag.find(sym); 316 if (it == firstDiag.end()) { 317 firstDiag[sym] = undefDiags.size(); 318 undefDiags.push_back({sym, {{file, symIndex}}}); 319 } else { 320 undefDiags[it->second].files.push_back({file, symIndex}); 321 } 322 } 323 if (config->warnLocallyDefinedImported) 324 if (Symbol *imp = localImports.lookup(sym)) 325 warn(toString(file) + 326 ": locally defined symbol imported: " + toString(*imp) + 327 " (defined in " + toString(imp->getFile()) + ") [LNK4217]"); 328 } 329 } 330 331 for (const UndefinedDiag& undefDiag : undefDiags) 332 reportUndefinedSymbol(undefDiag); 333 } 334 335 std::pair<Symbol *, bool> SymbolTable::insert(StringRef name) { 336 bool inserted = false; 337 Symbol *&sym = symMap[CachedHashStringRef(name)]; 338 if (!sym) { 339 sym = reinterpret_cast<Symbol *>(make<SymbolUnion>()); 340 sym->isUsedInRegularObj = false; 341 sym->pendingArchiveLoad = false; 342 inserted = true; 343 } 344 return {sym, inserted}; 345 } 346 347 std::pair<Symbol *, bool> SymbolTable::insert(StringRef name, InputFile *file) { 348 std::pair<Symbol *, bool> result = insert(name); 349 if (!file || !isa<BitcodeFile>(file)) 350 result.first->isUsedInRegularObj = true; 351 return result; 352 } 353 354 Symbol *SymbolTable::addUndefined(StringRef name, InputFile *f, 355 bool isWeakAlias) { 356 Symbol *s; 357 bool wasInserted; 358 std::tie(s, wasInserted) = insert(name, f); 359 if (wasInserted || (isa<Lazy>(s) && isWeakAlias)) { 360 replaceSymbol<Undefined>(s, name); 361 return s; 362 } 363 if (auto *l = dyn_cast<Lazy>(s)) { 364 if (!s->pendingArchiveLoad) { 365 s->pendingArchiveLoad = true; 366 l->file->addMember(l->sym); 367 } 368 } 369 return s; 370 } 371 372 void SymbolTable::addLazy(ArchiveFile *f, const Archive::Symbol &sym) { 373 StringRef name = sym.getName(); 374 Symbol *s; 375 bool wasInserted; 376 std::tie(s, wasInserted) = insert(name); 377 if (wasInserted) { 378 replaceSymbol<Lazy>(s, f, sym); 379 return; 380 } 381 auto *u = dyn_cast<Undefined>(s); 382 if (!u || u->weakAlias || s->pendingArchiveLoad) 383 return; 384 s->pendingArchiveLoad = true; 385 f->addMember(sym); 386 } 387 388 void SymbolTable::reportDuplicate(Symbol *existing, InputFile *newFile) { 389 std::string msg = "duplicate symbol: " + toString(*existing) + " in " + 390 toString(existing->getFile()) + " and in " + 391 toString(newFile); 392 393 if (config->forceMultiple) 394 warn(msg); 395 else 396 error(msg); 397 } 398 399 Symbol *SymbolTable::addAbsolute(StringRef n, COFFSymbolRef sym) { 400 Symbol *s; 401 bool wasInserted; 402 std::tie(s, wasInserted) = insert(n, nullptr); 403 s->isUsedInRegularObj = true; 404 if (wasInserted || isa<Undefined>(s) || isa<Lazy>(s)) 405 replaceSymbol<DefinedAbsolute>(s, n, sym); 406 else if (!isa<DefinedCOFF>(s)) 407 reportDuplicate(s, nullptr); 408 return s; 409 } 410 411 Symbol *SymbolTable::addAbsolute(StringRef n, uint64_t va) { 412 Symbol *s; 413 bool wasInserted; 414 std::tie(s, wasInserted) = insert(n, nullptr); 415 s->isUsedInRegularObj = true; 416 if (wasInserted || isa<Undefined>(s) || isa<Lazy>(s)) 417 replaceSymbol<DefinedAbsolute>(s, n, va); 418 else if (!isa<DefinedCOFF>(s)) 419 reportDuplicate(s, nullptr); 420 return s; 421 } 422 423 Symbol *SymbolTable::addSynthetic(StringRef n, Chunk *c) { 424 Symbol *s; 425 bool wasInserted; 426 std::tie(s, wasInserted) = insert(n, nullptr); 427 s->isUsedInRegularObj = true; 428 if (wasInserted || isa<Undefined>(s) || isa<Lazy>(s)) 429 replaceSymbol<DefinedSynthetic>(s, n, c); 430 else if (!isa<DefinedCOFF>(s)) 431 reportDuplicate(s, nullptr); 432 return s; 433 } 434 435 Symbol *SymbolTable::addRegular(InputFile *f, StringRef n, 436 const coff_symbol_generic *sym, 437 SectionChunk *c) { 438 Symbol *s; 439 bool wasInserted; 440 std::tie(s, wasInserted) = insert(n, f); 441 if (wasInserted || !isa<DefinedRegular>(s)) 442 replaceSymbol<DefinedRegular>(s, f, n, /*IsCOMDAT*/ false, 443 /*IsExternal*/ true, sym, c); 444 else 445 reportDuplicate(s, f); 446 return s; 447 } 448 449 std::pair<DefinedRegular *, bool> 450 SymbolTable::addComdat(InputFile *f, StringRef n, 451 const coff_symbol_generic *sym) { 452 Symbol *s; 453 bool wasInserted; 454 std::tie(s, wasInserted) = insert(n, f); 455 if (wasInserted || !isa<DefinedRegular>(s)) { 456 replaceSymbol<DefinedRegular>(s, f, n, /*IsCOMDAT*/ true, 457 /*IsExternal*/ true, sym, nullptr); 458 return {cast<DefinedRegular>(s), true}; 459 } 460 auto *existingSymbol = cast<DefinedRegular>(s); 461 if (!existingSymbol->isCOMDAT) 462 reportDuplicate(s, f); 463 return {existingSymbol, false}; 464 } 465 466 Symbol *SymbolTable::addCommon(InputFile *f, StringRef n, uint64_t size, 467 const coff_symbol_generic *sym, CommonChunk *c) { 468 Symbol *s; 469 bool wasInserted; 470 std::tie(s, wasInserted) = insert(n, f); 471 if (wasInserted || !isa<DefinedCOFF>(s)) 472 replaceSymbol<DefinedCommon>(s, f, n, size, sym, c); 473 else if (auto *dc = dyn_cast<DefinedCommon>(s)) 474 if (size > dc->getSize()) 475 replaceSymbol<DefinedCommon>(s, f, n, size, sym, c); 476 return s; 477 } 478 479 Symbol *SymbolTable::addImportData(StringRef n, ImportFile *f) { 480 Symbol *s; 481 bool wasInserted; 482 std::tie(s, wasInserted) = insert(n, nullptr); 483 s->isUsedInRegularObj = true; 484 if (wasInserted || isa<Undefined>(s) || isa<Lazy>(s)) { 485 replaceSymbol<DefinedImportData>(s, n, f); 486 return s; 487 } 488 489 reportDuplicate(s, f); 490 return nullptr; 491 } 492 493 Symbol *SymbolTable::addImportThunk(StringRef name, DefinedImportData *id, 494 uint16_t machine) { 495 Symbol *s; 496 bool wasInserted; 497 std::tie(s, wasInserted) = insert(name, nullptr); 498 s->isUsedInRegularObj = true; 499 if (wasInserted || isa<Undefined>(s) || isa<Lazy>(s)) { 500 replaceSymbol<DefinedImportThunk>(s, name, id, machine); 501 return s; 502 } 503 504 reportDuplicate(s, id->file); 505 return nullptr; 506 } 507 508 void SymbolTable::addLibcall(StringRef name) { 509 Symbol *sym = findUnderscore(name); 510 if (!sym) 511 return; 512 513 if (Lazy *l = dyn_cast<Lazy>(sym)) { 514 MemoryBufferRef mb = l->getMemberBuffer(); 515 if (identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode) 516 addUndefined(sym->getName()); 517 } 518 } 519 520 std::vector<Chunk *> SymbolTable::getChunks() { 521 std::vector<Chunk *> res; 522 for (ObjFile *file : ObjFile::instances) { 523 ArrayRef<Chunk *> v = file->getChunks(); 524 res.insert(res.end(), v.begin(), v.end()); 525 } 526 return res; 527 } 528 529 Symbol *SymbolTable::find(StringRef name) { 530 return symMap.lookup(CachedHashStringRef(name)); 531 } 532 533 Symbol *SymbolTable::findUnderscore(StringRef name) { 534 if (config->machine == I386) 535 return find(("_" + name).str()); 536 return find(name); 537 } 538 539 // Return all symbols that start with Prefix, possibly ignoring the first 540 // character of Prefix or the first character symbol. 541 std::vector<Symbol *> SymbolTable::getSymsWithPrefix(StringRef prefix) { 542 std::vector<Symbol *> syms; 543 for (auto pair : symMap) { 544 StringRef name = pair.first.val(); 545 if (name.startswith(prefix) || name.startswith(prefix.drop_front()) || 546 name.drop_front().startswith(prefix) || 547 name.drop_front().startswith(prefix.drop_front())) { 548 syms.push_back(pair.second); 549 } 550 } 551 return syms; 552 } 553 554 Symbol *SymbolTable::findMangle(StringRef name) { 555 if (Symbol *sym = find(name)) 556 if (!isa<Undefined>(sym)) 557 return sym; 558 559 // Efficient fuzzy string lookup is impossible with a hash table, so iterate 560 // the symbol table once and collect all possibly matching symbols into this 561 // vector. Then compare each possibly matching symbol with each possible 562 // mangling. 563 std::vector<Symbol *> syms = getSymsWithPrefix(name); 564 auto findByPrefix = [&syms](const Twine &t) -> Symbol * { 565 std::string prefix = t.str(); 566 for (auto *s : syms) 567 if (s->getName().startswith(prefix)) 568 return s; 569 return nullptr; 570 }; 571 572 // For non-x86, just look for C++ functions. 573 if (config->machine != I386) 574 return findByPrefix("?" + name + "@@Y"); 575 576 if (!name.startswith("_")) 577 return nullptr; 578 // Search for x86 stdcall function. 579 if (Symbol *s = findByPrefix(name + "@")) 580 return s; 581 // Search for x86 fastcall function. 582 if (Symbol *s = findByPrefix("@" + name.substr(1) + "@")) 583 return s; 584 // Search for x86 vectorcall function. 585 if (Symbol *s = findByPrefix(name.substr(1) + "@@")) 586 return s; 587 // Search for x86 C++ non-member function. 588 return findByPrefix("?" + name.substr(1) + "@@Y"); 589 } 590 591 Symbol *SymbolTable::addUndefined(StringRef name) { 592 return addUndefined(name, nullptr, false); 593 } 594 595 std::vector<StringRef> SymbolTable::compileBitcodeFiles() { 596 lto.reset(new BitcodeCompiler); 597 for (BitcodeFile *f : BitcodeFile::instances) 598 lto->add(*f); 599 return lto->compile(); 600 } 601 602 void SymbolTable::addCombinedLTOObjects() { 603 if (BitcodeFile::instances.empty()) 604 return; 605 606 ScopedTimer t(ltoTimer); 607 for (StringRef object : compileBitcodeFiles()) { 608 auto *obj = make<ObjFile>(MemoryBufferRef(object, "lto.tmp")); 609 obj->parse(); 610 ObjFile::instances.push_back(obj); 611 } 612 } 613 614 } // namespace coff 615 } // namespace lld 616