1 //===- SymbolTable.cpp ----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "SymbolTable.h" 10 #include "ConcatOutputSection.h" 11 #include "Config.h" 12 #include "InputFiles.h" 13 #include "InputSection.h" 14 #include "Symbols.h" 15 #include "SyntheticSections.h" 16 #include "lld/Common/ErrorHandler.h" 17 #include "lld/Common/Memory.h" 18 #include "llvm/Demangle/Demangle.h" 19 20 using namespace llvm; 21 using namespace lld; 22 using namespace lld::macho; 23 24 Symbol *SymbolTable::find(CachedHashStringRef cachedName) { 25 auto it = symMap.find(cachedName); 26 if (it == symMap.end()) 27 return nullptr; 28 return symVector[it->second]; 29 } 30 31 std::pair<Symbol *, bool> SymbolTable::insert(StringRef name, 32 const InputFile *file) { 33 auto p = symMap.insert({CachedHashStringRef(name), (int)symVector.size()}); 34 35 Symbol *sym; 36 if (!p.second) { 37 // Name already present in the symbol table. 38 sym = symVector[p.first->second]; 39 } else { 40 // Name is a new symbol. 41 sym = reinterpret_cast<Symbol *>(make<SymbolUnion>()); 42 symVector.push_back(sym); 43 } 44 45 sym->isUsedInRegularObj |= !file || isa<ObjFile>(file); 46 return {sym, p.second}; 47 } 48 49 namespace { 50 struct DuplicateSymbolDiag { 51 // Pair containing source location and source file 52 const std::pair<std::string, std::string> src1; 53 const std::pair<std::string, std::string> src2; 54 const Symbol *sym; 55 56 DuplicateSymbolDiag(const std::pair<std::string, std::string> src1, 57 const std::pair<std::string, std::string> src2, 58 const Symbol *sym) 59 : src1(src1), src2(src2), sym(sym) {} 60 }; 61 SmallVector<DuplicateSymbolDiag> dupSymDiags; 62 } // namespace 63 64 Defined *SymbolTable::addDefined(StringRef name, InputFile *file, 65 InputSection *isec, uint64_t value, 66 uint64_t size, bool isWeakDef, 67 bool isPrivateExtern, bool isThumb, 68 bool isReferencedDynamically, bool noDeadStrip, 69 bool isWeakDefCanBeHidden) { 70 bool overridesWeakDef = false; 71 auto [s, wasInserted] = insert(name, file); 72 73 assert(!file || !isa<BitcodeFile>(file) || !isec); 74 75 if (!wasInserted) { 76 if (auto *defined = dyn_cast<Defined>(s)) { 77 if (isWeakDef) { 78 // See further comment in createDefined() in InputFiles.cpp 79 if (defined->isWeakDef()) { 80 defined->privateExtern &= isPrivateExtern; 81 defined->weakDefCanBeHidden &= isWeakDefCanBeHidden; 82 defined->referencedDynamically |= isReferencedDynamically; 83 defined->noDeadStrip |= noDeadStrip; 84 } 85 // FIXME: Handle this for bitcode files. 86 if (auto concatIsec = dyn_cast_or_null<ConcatInputSection>(isec)) 87 concatIsec->wasCoalesced = true; 88 return defined; 89 } 90 91 if (defined->isWeakDef()) { 92 // FIXME: Handle this for bitcode files. 93 if (auto concatIsec = 94 dyn_cast_or_null<ConcatInputSection>(defined->isec)) { 95 concatIsec->wasCoalesced = true; 96 concatIsec->symbols.erase(llvm::find(concatIsec->symbols, defined)); 97 } 98 } else { 99 std::string srcLoc1 = defined->getSourceLocation(); 100 std::string srcLoc2 = isec ? isec->getSourceLocation(value) : ""; 101 std::string srcFile1 = toString(defined->getFile()); 102 std::string srcFile2 = toString(file); 103 104 dupSymDiags.push_back({make_pair(srcLoc1, srcFile1), 105 make_pair(srcLoc2, srcFile2), defined}); 106 } 107 108 } else if (auto *dysym = dyn_cast<DylibSymbol>(s)) { 109 overridesWeakDef = !isWeakDef && dysym->isWeakDef(); 110 dysym->unreference(); 111 } else if (auto *undef = dyn_cast<Undefined>(s)) { 112 // Preserve the original bitcode file name (instead of using the object 113 // file name). 114 if (undef->wasBitcodeSymbol) 115 file = undef->getFile(); 116 } 117 // Defined symbols take priority over other types of symbols, so in case 118 // of a name conflict, we fall through to the replaceSymbol() call below. 119 } 120 121 // With -flat_namespace, all extern symbols in dylibs are interposable. 122 // FIXME: Add support for `-interposable` (PR53680). 123 bool interposable = config->namespaceKind == NamespaceKind::flat && 124 config->outputType != MachO::MH_EXECUTE && 125 !isPrivateExtern; 126 Defined *defined = replaceSymbol<Defined>( 127 s, name, file, isec, value, size, isWeakDef, /*isExternal=*/true, 128 isPrivateExtern, /*includeInSymtab=*/true, isThumb, 129 isReferencedDynamically, noDeadStrip, overridesWeakDef, 130 isWeakDefCanBeHidden, interposable); 131 return defined; 132 } 133 134 Defined *SymbolTable::aliasDefined(Defined *src, StringRef target, 135 InputFile *newFile, bool makePrivateExtern) { 136 bool isPrivateExtern = makePrivateExtern || src->privateExtern; 137 return addDefined(target, newFile, src->isec, src->value, src->size, 138 src->isWeakDef(), isPrivateExtern, src->thumb, 139 src->referencedDynamically, src->noDeadStrip, 140 src->weakDefCanBeHidden); 141 } 142 143 Symbol *SymbolTable::addUndefined(StringRef name, InputFile *file, 144 bool isWeakRef) { 145 auto [s, wasInserted] = insert(name, file); 146 147 RefState refState = isWeakRef ? RefState::Weak : RefState::Strong; 148 149 if (wasInserted) 150 replaceSymbol<Undefined>(s, name, file, refState, 151 /*wasBitcodeSymbol=*/false); 152 else if (auto *lazy = dyn_cast<LazyArchive>(s)) 153 lazy->fetchArchiveMember(); 154 else if (isa<LazyObject>(s)) 155 extract(*s->getFile(), s->getName()); 156 else if (auto *dynsym = dyn_cast<DylibSymbol>(s)) 157 dynsym->reference(refState); 158 else if (auto *undefined = dyn_cast<Undefined>(s)) 159 undefined->refState = std::max(undefined->refState, refState); 160 return s; 161 } 162 163 Symbol *SymbolTable::addCommon(StringRef name, InputFile *file, uint64_t size, 164 uint32_t align, bool isPrivateExtern) { 165 auto [s, wasInserted] = insert(name, file); 166 167 if (!wasInserted) { 168 if (auto *common = dyn_cast<CommonSymbol>(s)) { 169 if (size < common->size) 170 return s; 171 } else if (isa<Defined>(s)) { 172 return s; 173 } 174 // Common symbols take priority over all non-Defined symbols, so in case of 175 // a name conflict, we fall through to the replaceSymbol() call below. 176 } 177 178 replaceSymbol<CommonSymbol>(s, name, file, size, align, isPrivateExtern); 179 return s; 180 } 181 182 Symbol *SymbolTable::addDylib(StringRef name, DylibFile *file, bool isWeakDef, 183 bool isTlv) { 184 auto [s, wasInserted] = insert(name, file); 185 186 RefState refState = RefState::Unreferenced; 187 if (!wasInserted) { 188 if (auto *defined = dyn_cast<Defined>(s)) { 189 if (isWeakDef && !defined->isWeakDef()) 190 defined->overridesWeakDef = true; 191 } else if (auto *undefined = dyn_cast<Undefined>(s)) { 192 refState = undefined->refState; 193 } else if (auto *dysym = dyn_cast<DylibSymbol>(s)) { 194 refState = dysym->getRefState(); 195 } 196 } 197 198 bool isDynamicLookup = file == nullptr; 199 if (wasInserted || isa<Undefined>(s) || 200 (isa<DylibSymbol>(s) && 201 ((!isWeakDef && s->isWeakDef()) || 202 (!isDynamicLookup && cast<DylibSymbol>(s)->isDynamicLookup())))) { 203 if (auto *dynsym = dyn_cast<DylibSymbol>(s)) 204 dynsym->unreference(); 205 replaceSymbol<DylibSymbol>(s, file, name, isWeakDef, refState, isTlv); 206 } 207 208 return s; 209 } 210 211 Symbol *SymbolTable::addDynamicLookup(StringRef name) { 212 return addDylib(name, /*file=*/nullptr, /*isWeakDef=*/false, /*isTlv=*/false); 213 } 214 215 Symbol *SymbolTable::addLazyArchive(StringRef name, ArchiveFile *file, 216 const object::Archive::Symbol &sym) { 217 auto [s, wasInserted] = insert(name, file); 218 219 if (wasInserted) { 220 replaceSymbol<LazyArchive>(s, file, sym); 221 } else if (isa<Undefined>(s)) { 222 file->fetch(sym); 223 } else if (auto *dysym = dyn_cast<DylibSymbol>(s)) { 224 if (dysym->isWeakDef()) { 225 if (dysym->getRefState() != RefState::Unreferenced) 226 file->fetch(sym); 227 else 228 replaceSymbol<LazyArchive>(s, file, sym); 229 } 230 } 231 return s; 232 } 233 234 Symbol *SymbolTable::addLazyObject(StringRef name, InputFile &file) { 235 auto [s, wasInserted] = insert(name, &file); 236 237 if (wasInserted) { 238 replaceSymbol<LazyObject>(s, file, name); 239 } else if (isa<Undefined>(s)) { 240 extract(file, name); 241 } else if (auto *dysym = dyn_cast<DylibSymbol>(s)) { 242 if (dysym->isWeakDef()) { 243 if (dysym->getRefState() != RefState::Unreferenced) 244 extract(file, name); 245 else 246 replaceSymbol<LazyObject>(s, file, name); 247 } 248 } 249 return s; 250 } 251 252 Defined *SymbolTable::addSynthetic(StringRef name, InputSection *isec, 253 uint64_t value, bool isPrivateExtern, 254 bool includeInSymtab, 255 bool referencedDynamically) { 256 assert(!isec || !isec->getFile()); // See makeSyntheticInputSection(). 257 Defined *s = 258 addDefined(name, /*file=*/nullptr, isec, value, /*size=*/0, 259 /*isWeakDef=*/false, isPrivateExtern, /*isThumb=*/false, 260 referencedDynamically, /*noDeadStrip=*/false, 261 /*isWeakDefCanBeHidden=*/false); 262 s->includeInSymtab = includeInSymtab; 263 return s; 264 } 265 266 enum class Boundary { 267 Start, 268 End, 269 }; 270 271 static Defined *createBoundarySymbol(const Undefined &sym) { 272 return symtab->addSynthetic( 273 sym.getName(), /*isec=*/nullptr, /*value=*/-1, /*isPrivateExtern=*/true, 274 /*includeInSymtab=*/false, /*referencedDynamically=*/false); 275 } 276 277 static void handleSectionBoundarySymbol(const Undefined &sym, StringRef segSect, 278 Boundary which) { 279 auto [segName, sectName] = segSect.split('$'); 280 281 // Attach the symbol to any InputSection that will end up in the right 282 // OutputSection -- it doesn't matter which one we pick. 283 // Don't bother looking through inputSections for a matching 284 // ConcatInputSection -- we need to create ConcatInputSection for 285 // non-existing sections anyways, and that codepath works even if we should 286 // already have a ConcatInputSection with the right name. 287 288 OutputSection *osec = nullptr; 289 // This looks for __TEXT,__cstring etc. 290 for (SyntheticSection *ssec : syntheticSections) 291 if (ssec->segname == segName && ssec->name == sectName) { 292 osec = ssec->isec->parent; 293 break; 294 } 295 296 if (!osec) { 297 ConcatInputSection *isec = makeSyntheticInputSection(segName, sectName); 298 299 // This runs after markLive() and is only called for Undefineds that are 300 // live. Marking the isec live ensures an OutputSection is created that the 301 // start/end symbol can refer to. 302 assert(sym.isLive()); 303 isec->live = true; 304 305 // This runs after gatherInputSections(), so need to explicitly set parent 306 // and add to inputSections. 307 osec = isec->parent = ConcatOutputSection::getOrCreateForInput(isec); 308 inputSections.push_back(isec); 309 } 310 311 if (which == Boundary::Start) 312 osec->sectionStartSymbols.push_back(createBoundarySymbol(sym)); 313 else 314 osec->sectionEndSymbols.push_back(createBoundarySymbol(sym)); 315 } 316 317 static void handleSegmentBoundarySymbol(const Undefined &sym, StringRef segName, 318 Boundary which) { 319 OutputSegment *seg = getOrCreateOutputSegment(segName); 320 if (which == Boundary::Start) 321 seg->segmentStartSymbols.push_back(createBoundarySymbol(sym)); 322 else 323 seg->segmentEndSymbols.push_back(createBoundarySymbol(sym)); 324 } 325 326 // Try to find a definition for an undefined symbol. 327 // Returns true if a definition was found and no diagnostics are needed. 328 static bool recoverFromUndefinedSymbol(const Undefined &sym) { 329 // Handle start/end symbols. 330 StringRef name = sym.getName(); 331 if (name.consume_front("section$start$")) { 332 handleSectionBoundarySymbol(sym, name, Boundary::Start); 333 return true; 334 } 335 if (name.consume_front("section$end$")) { 336 handleSectionBoundarySymbol(sym, name, Boundary::End); 337 return true; 338 } 339 if (name.consume_front("segment$start$")) { 340 handleSegmentBoundarySymbol(sym, name, Boundary::Start); 341 return true; 342 } 343 if (name.consume_front("segment$end$")) { 344 handleSegmentBoundarySymbol(sym, name, Boundary::End); 345 return true; 346 } 347 348 // Leave dtrace symbols, since we will handle them when we do the relocation 349 if (name.startswith("___dtrace_")) 350 return true; 351 352 // Handle -U. 353 if (config->explicitDynamicLookups.count(sym.getName())) { 354 symtab->addDynamicLookup(sym.getName()); 355 return true; 356 } 357 358 // Handle -undefined. 359 if (config->undefinedSymbolTreatment == 360 UndefinedSymbolTreatment::dynamic_lookup || 361 config->undefinedSymbolTreatment == UndefinedSymbolTreatment::suppress) { 362 symtab->addDynamicLookup(sym.getName()); 363 return true; 364 } 365 366 // We do not return true here, as we still need to print diagnostics. 367 if (config->undefinedSymbolTreatment == UndefinedSymbolTreatment::warning) 368 symtab->addDynamicLookup(sym.getName()); 369 370 return false; 371 } 372 373 namespace { 374 struct UndefinedDiag { 375 struct SectionAndOffset { 376 const InputSection *isec; 377 uint64_t offset; 378 }; 379 380 std::vector<SectionAndOffset> codeReferences; 381 std::vector<std::string> otherReferences; 382 }; 383 384 MapVector<const Undefined *, UndefinedDiag> undefs; 385 } 386 387 void macho::reportPendingDuplicateSymbols() { 388 for (const auto &duplicate : dupSymDiags) { 389 if (!config->deadStripDuplicates || duplicate.sym->isLive()) { 390 std::string message = 391 "duplicate symbol: " + toString(*duplicate.sym) + "\n>>> defined in "; 392 if (!duplicate.src1.first.empty()) 393 message += duplicate.src1.first + "\n>>> "; 394 message += duplicate.src1.second + "\n>>> defined in "; 395 if (!duplicate.src2.first.empty()) 396 message += duplicate.src2.first + "\n>>> "; 397 error(message + duplicate.src2.second); 398 } 399 } 400 } 401 402 // Check whether the definition name def is a mangled function name that matches 403 // the reference name ref. 404 static bool canSuggestExternCForCXX(StringRef ref, StringRef def) { 405 llvm::ItaniumPartialDemangler d; 406 std::string name = def.str(); 407 if (d.partialDemangle(name.c_str())) 408 return false; 409 char *buf = d.getFunctionName(nullptr, nullptr); 410 if (!buf) 411 return false; 412 bool ret = ref == buf; 413 free(buf); 414 return ret; 415 } 416 417 // Suggest an alternative spelling of an "undefined symbol" diagnostic. Returns 418 // the suggested symbol, which is either in the symbol table, or in the same 419 // file of sym. 420 static const Symbol *getAlternativeSpelling(const Undefined &sym, 421 std::string &pre_hint, 422 std::string &post_hint) { 423 DenseMap<StringRef, const Symbol *> map; 424 if (sym.getFile() && sym.getFile()->kind() == InputFile::ObjKind) { 425 // Build a map of local defined symbols. 426 for (const Symbol *s : sym.getFile()->symbols) 427 if (auto *defined = dyn_cast_or_null<Defined>(s)) 428 if (!defined->isExternal()) 429 map.try_emplace(s->getName(), s); 430 } 431 432 auto suggest = [&](StringRef newName) -> const Symbol * { 433 // If defined locally. 434 if (const Symbol *s = map.lookup(newName)) 435 return s; 436 437 // If in the symbol table and not undefined. 438 if (const Symbol *s = symtab->find(newName)) 439 if (dyn_cast<Undefined>(s) == nullptr) 440 return s; 441 442 return nullptr; 443 }; 444 445 // This loop enumerates all strings of Levenshtein distance 1 as typo 446 // correction candidates and suggests the one that exists as a non-undefined 447 // symbol. 448 StringRef name = sym.getName(); 449 for (size_t i = 0, e = name.size(); i != e + 1; ++i) { 450 // Insert a character before name[i]. 451 std::string newName = (name.substr(0, i) + "0" + name.substr(i)).str(); 452 for (char c = '0'; c <= 'z'; ++c) { 453 newName[i] = c; 454 if (const Symbol *s = suggest(newName)) 455 return s; 456 } 457 if (i == e) 458 break; 459 460 // Substitute name[i]. 461 newName = std::string(name); 462 for (char c = '0'; c <= 'z'; ++c) { 463 newName[i] = c; 464 if (const Symbol *s = suggest(newName)) 465 return s; 466 } 467 468 // Transpose name[i] and name[i+1]. This is of edit distance 2 but it is 469 // common. 470 if (i + 1 < e) { 471 newName[i] = name[i + 1]; 472 newName[i + 1] = name[i]; 473 if (const Symbol *s = suggest(newName)) 474 return s; 475 } 476 477 // Delete name[i]. 478 newName = (name.substr(0, i) + name.substr(i + 1)).str(); 479 if (const Symbol *s = suggest(newName)) 480 return s; 481 } 482 483 // Case mismatch, e.g. Foo vs FOO. 484 for (auto &it : map) 485 if (name.equals_insensitive(it.first)) 486 return it.second; 487 for (Symbol *sym : symtab->getSymbols()) 488 if (dyn_cast<Undefined>(sym) == nullptr && 489 name.equals_insensitive(sym->getName())) 490 return sym; 491 492 // The reference may be a mangled name while the definition is not. Suggest a 493 // missing extern "C". 494 if (name.startswith("__Z")) { 495 std::string buf = name.str(); 496 llvm::ItaniumPartialDemangler d; 497 if (!d.partialDemangle(buf.c_str())) 498 if (char *buf = d.getFunctionName(nullptr, nullptr)) { 499 const Symbol *s = suggest((Twine("_") + buf).str()); 500 free(buf); 501 if (s) { 502 pre_hint = ": extern \"C\" "; 503 return s; 504 } 505 } 506 } else { 507 StringRef name_without_underscore = name; 508 name_without_underscore.consume_front("_"); 509 const Symbol *s = nullptr; 510 for (auto &it : map) 511 if (canSuggestExternCForCXX(name_without_underscore, it.first)) { 512 s = it.second; 513 break; 514 } 515 if (!s) 516 for (Symbol *sym : symtab->getSymbols()) 517 if (canSuggestExternCForCXX(name_without_underscore, sym->getName())) { 518 s = sym; 519 break; 520 } 521 if (s) { 522 pre_hint = " to declare "; 523 post_hint = " as extern \"C\"?"; 524 return s; 525 } 526 } 527 528 return nullptr; 529 } 530 531 static void reportUndefinedSymbol(const Undefined &sym, 532 const UndefinedDiag &locations, 533 bool correctSpelling) { 534 std::string message = "undefined symbol"; 535 if (config->archMultiple) 536 message += (" for arch " + getArchitectureName(config->arch())).str(); 537 message += ": " + toString(sym); 538 539 const size_t maxUndefinedReferences = 3; 540 size_t i = 0; 541 for (const std::string &loc : locations.otherReferences) { 542 if (i >= maxUndefinedReferences) 543 break; 544 message += "\n>>> referenced by " + loc; 545 ++i; 546 } 547 548 for (const UndefinedDiag::SectionAndOffset &loc : locations.codeReferences) { 549 if (i >= maxUndefinedReferences) 550 break; 551 message += "\n>>> referenced by "; 552 std::string src = loc.isec->getSourceLocation(loc.offset); 553 if (!src.empty()) 554 message += src + "\n>>> "; 555 message += loc.isec->getLocation(loc.offset); 556 ++i; 557 } 558 559 size_t totalReferences = 560 locations.otherReferences.size() + locations.codeReferences.size(); 561 if (totalReferences > i) 562 message += 563 ("\n>>> referenced " + Twine(totalReferences - i) + " more times") 564 .str(); 565 566 if (correctSpelling) { 567 std::string pre_hint = ": ", post_hint; 568 if (const Symbol *corrected = 569 getAlternativeSpelling(sym, pre_hint, post_hint)) { 570 message += 571 "\n>>> did you mean" + pre_hint + toString(*corrected) + post_hint; 572 if (corrected->getFile()) 573 message += "\n>>> defined in: " + toString(corrected->getFile()); 574 } 575 } 576 577 if (config->undefinedSymbolTreatment == UndefinedSymbolTreatment::error) 578 error(message); 579 else if (config->undefinedSymbolTreatment == 580 UndefinedSymbolTreatment::warning) 581 warn(message); 582 else 583 assert(false && "diagnostics make sense for -undefined error|warning only"); 584 } 585 586 void macho::reportPendingUndefinedSymbols() { 587 // Enable spell corrector for the first 2 diagnostics. 588 for (const auto &[i, undef] : llvm::enumerate(undefs)) 589 reportUndefinedSymbol(*undef.first, undef.second, i < 2); 590 591 // This function is called multiple times during execution. Clear the printed 592 // diagnostics to avoid printing the same things again the next time. 593 undefs.clear(); 594 } 595 596 void macho::treatUndefinedSymbol(const Undefined &sym, StringRef source) { 597 if (recoverFromUndefinedSymbol(sym)) 598 return; 599 600 undefs[&sym].otherReferences.push_back(source.str()); 601 } 602 603 void macho::treatUndefinedSymbol(const Undefined &sym, const InputSection *isec, 604 uint64_t offset) { 605 if (recoverFromUndefinedSymbol(sym)) 606 return; 607 608 undefs[&sym].codeReferences.push_back({isec, offset}); 609 } 610 611 std::unique_ptr<SymbolTable> macho::symtab; 612