1 //===- SymbolTable.cpp ----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "SymbolTable.h" 10 #include "ConcatOutputSection.h" 11 #include "Config.h" 12 #include "InputFiles.h" 13 #include "InputSection.h" 14 #include "Symbols.h" 15 #include "SyntheticSections.h" 16 #include "lld/Common/ErrorHandler.h" 17 #include "lld/Common/Memory.h" 18 19 using namespace llvm; 20 using namespace lld; 21 using namespace lld::macho; 22 23 Symbol *SymbolTable::find(CachedHashStringRef cachedName) { 24 auto it = symMap.find(cachedName); 25 if (it == symMap.end()) 26 return nullptr; 27 return symVector[it->second]; 28 } 29 30 std::pair<Symbol *, bool> SymbolTable::insert(StringRef name, 31 const InputFile *file) { 32 auto p = symMap.insert({CachedHashStringRef(name), (int)symVector.size()}); 33 34 Symbol *sym; 35 if (!p.second) { 36 // Name already present in the symbol table. 37 sym = symVector[p.first->second]; 38 } else { 39 // Name is a new symbol. 40 sym = reinterpret_cast<Symbol *>(make<SymbolUnion>()); 41 symVector.push_back(sym); 42 } 43 44 sym->isUsedInRegularObj |= !file || isa<ObjFile>(file); 45 return {sym, p.second}; 46 } 47 48 Defined *SymbolTable::addDefined(StringRef name, InputFile *file, 49 InputSection *isec, uint64_t value, 50 uint64_t size, bool isWeakDef, 51 bool isPrivateExtern, bool isThumb, 52 bool isReferencedDynamically, bool noDeadStrip, 53 bool isWeakDefCanBeHidden) { 54 Symbol *s; 55 bool wasInserted; 56 bool overridesWeakDef = false; 57 std::tie(s, wasInserted) = insert(name, file); 58 59 assert(!isWeakDef || (isa<BitcodeFile>(file) && !isec) || 60 (isa<ObjFile>(file) && file == isec->getFile())); 61 62 if (!wasInserted) { 63 if (auto *defined = dyn_cast<Defined>(s)) { 64 if (isWeakDef) { 65 // See further comment in createDefined() in InputFiles.cpp 66 if (defined->isWeakDef()) { 67 defined->privateExtern &= isPrivateExtern; 68 defined->weakDefCanBeHidden &= isWeakDefCanBeHidden; 69 defined->referencedDynamically |= isReferencedDynamically; 70 defined->noDeadStrip |= noDeadStrip; 71 } 72 // FIXME: Handle this for bitcode files. 73 if (auto concatIsec = dyn_cast_or_null<ConcatInputSection>(isec)) 74 concatIsec->wasCoalesced = true; 75 return defined; 76 } 77 78 if (defined->isWeakDef()) { 79 // FIXME: Handle this for bitcode files. 80 if (auto concatIsec = 81 dyn_cast_or_null<ConcatInputSection>(defined->isec)) { 82 concatIsec->wasCoalesced = true; 83 concatIsec->symbols.erase(llvm::find(concatIsec->symbols, defined)); 84 } 85 } else { 86 std::string src1 = defined->getSourceLocation(); 87 std::string src2 = isec ? isec->getSourceLocation(value) : ""; 88 89 std::string message = 90 "duplicate symbol: " + toString(*defined) + "\n>>> defined in "; 91 if (!src1.empty()) 92 message += src1 + "\n>>> "; 93 message += toString(defined->getFile()) + "\n>>> defined in "; 94 if (!src2.empty()) 95 message += src2 + "\n>>> "; 96 error(message + toString(file)); 97 } 98 99 } else if (auto *dysym = dyn_cast<DylibSymbol>(s)) { 100 overridesWeakDef = !isWeakDef && dysym->isWeakDef(); 101 dysym->unreference(); 102 } 103 // Defined symbols take priority over other types of symbols, so in case 104 // of a name conflict, we fall through to the replaceSymbol() call below. 105 } 106 107 // With -flat_namespace, all extern symbols in dylibs are interposable. 108 // FIXME: Add support for `-interposable` (PR53680). 109 bool interposable = config->namespaceKind == NamespaceKind::flat && 110 config->outputType != MachO::MH_EXECUTE && 111 !isPrivateExtern; 112 Defined *defined = replaceSymbol<Defined>( 113 s, name, file, isec, value, size, isWeakDef, /*isExternal=*/true, 114 isPrivateExtern, /*includeInSymtab=*/true, isThumb, 115 isReferencedDynamically, noDeadStrip, overridesWeakDef, 116 isWeakDefCanBeHidden, interposable); 117 return defined; 118 } 119 120 Defined *SymbolTable::aliasDefined(Defined *src, StringRef target) { 121 return addDefined(target, src->getFile(), src->isec, src->value, src->size, 122 src->isWeakDef(), src->privateExtern, src->thumb, 123 src->referencedDynamically, src->noDeadStrip, 124 src->weakDefCanBeHidden); 125 } 126 127 Symbol *SymbolTable::addUndefined(StringRef name, InputFile *file, 128 bool isWeakRef) { 129 Symbol *s; 130 bool wasInserted; 131 std::tie(s, wasInserted) = insert(name, file); 132 133 RefState refState = isWeakRef ? RefState::Weak : RefState::Strong; 134 135 if (wasInserted) 136 replaceSymbol<Undefined>(s, name, file, refState); 137 else if (auto *lazy = dyn_cast<LazyArchive>(s)) 138 lazy->fetchArchiveMember(); 139 else if (isa<LazyObject>(s)) 140 extract(*s->getFile(), s->getName()); 141 else if (auto *dynsym = dyn_cast<DylibSymbol>(s)) 142 dynsym->reference(refState); 143 else if (auto *undefined = dyn_cast<Undefined>(s)) 144 undefined->refState = std::max(undefined->refState, refState); 145 return s; 146 } 147 148 Symbol *SymbolTable::addCommon(StringRef name, InputFile *file, uint64_t size, 149 uint32_t align, bool isPrivateExtern) { 150 Symbol *s; 151 bool wasInserted; 152 std::tie(s, wasInserted) = insert(name, file); 153 154 if (!wasInserted) { 155 if (auto *common = dyn_cast<CommonSymbol>(s)) { 156 if (size < common->size) 157 return s; 158 } else if (isa<Defined>(s)) { 159 return s; 160 } 161 // Common symbols take priority over all non-Defined symbols, so in case of 162 // a name conflict, we fall through to the replaceSymbol() call below. 163 } 164 165 replaceSymbol<CommonSymbol>(s, name, file, size, align, isPrivateExtern); 166 return s; 167 } 168 169 Symbol *SymbolTable::addDylib(StringRef name, DylibFile *file, bool isWeakDef, 170 bool isTlv) { 171 Symbol *s; 172 bool wasInserted; 173 std::tie(s, wasInserted) = insert(name, file); 174 175 RefState refState = RefState::Unreferenced; 176 if (!wasInserted) { 177 if (auto *defined = dyn_cast<Defined>(s)) { 178 if (isWeakDef && !defined->isWeakDef()) 179 defined->overridesWeakDef = true; 180 } else if (auto *undefined = dyn_cast<Undefined>(s)) { 181 refState = undefined->refState; 182 } else if (auto *dysym = dyn_cast<DylibSymbol>(s)) { 183 refState = dysym->getRefState(); 184 } 185 } 186 187 bool isDynamicLookup = file == nullptr; 188 if (wasInserted || isa<Undefined>(s) || 189 (isa<DylibSymbol>(s) && 190 ((!isWeakDef && s->isWeakDef()) || 191 (!isDynamicLookup && cast<DylibSymbol>(s)->isDynamicLookup())))) { 192 if (auto *dynsym = dyn_cast<DylibSymbol>(s)) 193 dynsym->unreference(); 194 replaceSymbol<DylibSymbol>(s, file, name, isWeakDef, refState, isTlv); 195 } 196 197 return s; 198 } 199 200 Symbol *SymbolTable::addDynamicLookup(StringRef name) { 201 return addDylib(name, /*file=*/nullptr, /*isWeakDef=*/false, /*isTlv=*/false); 202 } 203 204 Symbol *SymbolTable::addLazyArchive(StringRef name, ArchiveFile *file, 205 const object::Archive::Symbol &sym) { 206 Symbol *s; 207 bool wasInserted; 208 std::tie(s, wasInserted) = insert(name, file); 209 210 if (wasInserted) { 211 replaceSymbol<LazyArchive>(s, file, sym); 212 } else if (isa<Undefined>(s)) { 213 file->fetch(sym); 214 } else if (auto *dysym = dyn_cast<DylibSymbol>(s)) { 215 if (dysym->isWeakDef()) { 216 if (dysym->getRefState() != RefState::Unreferenced) 217 file->fetch(sym); 218 else 219 replaceSymbol<LazyArchive>(s, file, sym); 220 } 221 } 222 return s; 223 } 224 225 Symbol *SymbolTable::addLazyObject(StringRef name, InputFile &file) { 226 Symbol *s; 227 bool wasInserted; 228 std::tie(s, wasInserted) = insert(name, &file); 229 230 if (wasInserted) { 231 replaceSymbol<LazyObject>(s, file, name); 232 } else if (isa<Undefined>(s)) { 233 extract(file, name); 234 } else if (auto *dysym = dyn_cast<DylibSymbol>(s)) { 235 if (dysym->isWeakDef()) { 236 if (dysym->getRefState() != RefState::Unreferenced) 237 extract(file, name); 238 else 239 replaceSymbol<LazyObject>(s, file, name); 240 } 241 } 242 return s; 243 } 244 245 Defined *SymbolTable::addSynthetic(StringRef name, InputSection *isec, 246 uint64_t value, bool isPrivateExtern, 247 bool includeInSymtab, 248 bool referencedDynamically) { 249 assert(!isec || !isec->getFile()); // See makeSyntheticInputSection(). 250 Defined *s = 251 addDefined(name, /*file=*/nullptr, isec, value, /*size=*/0, 252 /*isWeakDef=*/false, isPrivateExtern, /*isThumb=*/false, 253 referencedDynamically, /*noDeadStrip=*/false, 254 /*isWeakDefCanBeHidden=*/false); 255 s->includeInSymtab = includeInSymtab; 256 return s; 257 } 258 259 enum class Boundary { 260 Start, 261 End, 262 }; 263 264 static Defined *createBoundarySymbol(const Undefined &sym) { 265 return symtab->addSynthetic( 266 sym.getName(), /*isec=*/nullptr, /*value=*/-1, /*isPrivateExtern=*/true, 267 /*includeInSymtab=*/false, /*referencedDynamically=*/false); 268 } 269 270 static void handleSectionBoundarySymbol(const Undefined &sym, StringRef segSect, 271 Boundary which) { 272 StringRef segName, sectName; 273 std::tie(segName, sectName) = segSect.split('$'); 274 275 // Attach the symbol to any InputSection that will end up in the right 276 // OutputSection -- it doesn't matter which one we pick. 277 // Don't bother looking through inputSections for a matching 278 // ConcatInputSection -- we need to create ConcatInputSection for 279 // non-existing sections anyways, and that codepath works even if we should 280 // already have a ConcatInputSection with the right name. 281 282 OutputSection *osec = nullptr; 283 // This looks for __TEXT,__cstring etc. 284 for (SyntheticSection *ssec : syntheticSections) 285 if (ssec->segname == segName && ssec->name == sectName) { 286 osec = ssec->isec->parent; 287 break; 288 } 289 290 if (!osec) { 291 ConcatInputSection *isec = makeSyntheticInputSection(segName, sectName); 292 293 // This runs after markLive() and is only called for Undefineds that are 294 // live. Marking the isec live ensures an OutputSection is created that the 295 // start/end symbol can refer to. 296 assert(sym.isLive()); 297 isec->live = true; 298 299 // This runs after gatherInputSections(), so need to explicitly set parent 300 // and add to inputSections. 301 osec = isec->parent = ConcatOutputSection::getOrCreateForInput(isec); 302 inputSections.push_back(isec); 303 } 304 305 if (which == Boundary::Start) 306 osec->sectionStartSymbols.push_back(createBoundarySymbol(sym)); 307 else 308 osec->sectionEndSymbols.push_back(createBoundarySymbol(sym)); 309 } 310 311 static void handleSegmentBoundarySymbol(const Undefined &sym, StringRef segName, 312 Boundary which) { 313 OutputSegment *seg = getOrCreateOutputSegment(segName); 314 if (which == Boundary::Start) 315 seg->segmentStartSymbols.push_back(createBoundarySymbol(sym)); 316 else 317 seg->segmentEndSymbols.push_back(createBoundarySymbol(sym)); 318 } 319 320 // Try to find a definition for an undefined symbol. 321 // Returns true if a definition was found and no diagnostics are needed. 322 static bool recoverFromUndefinedSymbol(const Undefined &sym) { 323 // Handle start/end symbols. 324 StringRef name = sym.getName(); 325 if (name.consume_front("section$start$")) { 326 handleSectionBoundarySymbol(sym, name, Boundary::Start); 327 return true; 328 } 329 if (name.consume_front("section$end$")) { 330 handleSectionBoundarySymbol(sym, name, Boundary::End); 331 return true; 332 } 333 if (name.consume_front("segment$start$")) { 334 handleSegmentBoundarySymbol(sym, name, Boundary::Start); 335 return true; 336 } 337 if (name.consume_front("segment$end$")) { 338 handleSegmentBoundarySymbol(sym, name, Boundary::End); 339 return true; 340 } 341 342 // Leave dtrace symbols, since we will handle them when we do the relocation 343 if (name.startswith("___dtrace_")) 344 return true; 345 346 // Handle -U. 347 if (config->explicitDynamicLookups.count(sym.getName())) { 348 symtab->addDynamicLookup(sym.getName()); 349 return true; 350 } 351 352 // Handle -undefined. 353 if (config->undefinedSymbolTreatment == 354 UndefinedSymbolTreatment::dynamic_lookup || 355 config->undefinedSymbolTreatment == UndefinedSymbolTreatment::suppress) { 356 symtab->addDynamicLookup(sym.getName()); 357 return true; 358 } 359 360 // We do not return true here, as we still need to print diagnostics. 361 if (config->undefinedSymbolTreatment == UndefinedSymbolTreatment::warning) 362 symtab->addDynamicLookup(sym.getName()); 363 364 return false; 365 } 366 367 namespace { 368 struct UndefinedDiag { 369 struct SectionAndOffset { 370 const InputSection *isec; 371 uint64_t offset; 372 }; 373 374 std::vector<SectionAndOffset> codeReferences; 375 std::vector<std::string> otherReferences; 376 }; 377 378 MapVector<const Undefined *, UndefinedDiag> undefs; 379 } 380 381 void macho::reportPendingUndefinedSymbols() { 382 for (const auto &undef : undefs) { 383 const UndefinedDiag &locations = undef.second; 384 385 std::string message = "undefined symbol"; 386 if (config->archMultiple) 387 message += (" for arch " + getArchitectureName(config->arch())).str(); 388 message += ": " + toString(*undef.first); 389 390 const size_t maxUndefinedReferences = 3; 391 size_t i = 0; 392 for (const std::string &loc : locations.otherReferences) { 393 if (i >= maxUndefinedReferences) 394 break; 395 message += "\n>>> referenced by " + loc; 396 ++i; 397 } 398 399 for (const UndefinedDiag::SectionAndOffset &loc : 400 locations.codeReferences) { 401 if (i >= maxUndefinedReferences) 402 break; 403 message += "\n>>> referenced by "; 404 std::string src = loc.isec->getSourceLocation(loc.offset); 405 if (!src.empty()) 406 message += src + "\n>>> "; 407 message += loc.isec->getLocation(loc.offset); 408 ++i; 409 } 410 411 size_t totalReferences = 412 locations.otherReferences.size() + locations.codeReferences.size(); 413 if (totalReferences > i) 414 message += 415 ("\n>>> referenced " + Twine(totalReferences - i) + " more times") 416 .str(); 417 418 if (config->undefinedSymbolTreatment == UndefinedSymbolTreatment::error) 419 error(message); 420 else if (config->undefinedSymbolTreatment == 421 UndefinedSymbolTreatment::warning) 422 warn(message); 423 else 424 assert(false && 425 "diagnostics make sense for -undefined error|warning only"); 426 } 427 428 // This function is called multiple times during execution. Clear the printed 429 // diagnostics to avoid printing the same things again the next time. 430 undefs.clear(); 431 } 432 433 void macho::treatUndefinedSymbol(const Undefined &sym, StringRef source) { 434 if (recoverFromUndefinedSymbol(sym)) 435 return; 436 437 undefs[&sym].otherReferences.push_back(source.str()); 438 } 439 440 void macho::treatUndefinedSymbol(const Undefined &sym, const InputSection *isec, 441 uint64_t offset) { 442 if (recoverFromUndefinedSymbol(sym)) 443 return; 444 445 undefs[&sym].codeReferences.push_back({isec, offset}); 446 } 447 448 std::unique_ptr<SymbolTable> macho::symtab; 449