1 //===- SyntheticSections.cpp ---------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "SyntheticSections.h" 10 #include "Config.h" 11 #include "ExportTrie.h" 12 #include "InputFiles.h" 13 #include "MachOStructs.h" 14 #include "MergedOutputSection.h" 15 #include "OutputSegment.h" 16 #include "SymbolTable.h" 17 #include "Symbols.h" 18 #include "Writer.h" 19 20 #include "lld/Common/ErrorHandler.h" 21 #include "lld/Common/Memory.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/Support/EndianStream.h" 24 #include "llvm/Support/FileSystem.h" 25 #include "llvm/Support/LEB128.h" 26 #include "llvm/Support/Path.h" 27 28 using namespace llvm; 29 using namespace llvm::support; 30 using namespace llvm::support::endian; 31 using namespace lld; 32 using namespace lld::macho; 33 34 InStruct macho::in; 35 std::vector<SyntheticSection *> macho::syntheticSections; 36 37 SyntheticSection::SyntheticSection(const char *segname, const char *name) 38 : OutputSection(SyntheticKind, name), segname(segname) { 39 syntheticSections.push_back(this); 40 } 41 42 // dyld3's MachOLoaded::getSlide() assumes that the __TEXT segment starts 43 // from the beginning of the file (i.e. the header). 44 MachHeaderSection::MachHeaderSection() 45 : SyntheticSection(segment_names::text, section_names::header) {} 46 47 void MachHeaderSection::addLoadCommand(LoadCommand *lc) { 48 loadCommands.push_back(lc); 49 sizeOfCmds += lc->getSize(); 50 } 51 52 uint64_t MachHeaderSection::getSize() const { 53 return sizeof(MachO::mach_header_64) + sizeOfCmds + config->headerPad; 54 } 55 56 void MachHeaderSection::writeTo(uint8_t *buf) const { 57 auto *hdr = reinterpret_cast<MachO::mach_header_64 *>(buf); 58 hdr->magic = MachO::MH_MAGIC_64; 59 hdr->cputype = MachO::CPU_TYPE_X86_64; 60 hdr->cpusubtype = MachO::CPU_SUBTYPE_X86_64_ALL | MachO::CPU_SUBTYPE_LIB64; 61 hdr->filetype = config->outputType; 62 hdr->ncmds = loadCommands.size(); 63 hdr->sizeofcmds = sizeOfCmds; 64 hdr->flags = MachO::MH_NOUNDEFS | MachO::MH_DYLDLINK | MachO::MH_TWOLEVEL; 65 66 if (config->outputType == MachO::MH_DYLIB && !config->hasReexports) 67 hdr->flags |= MachO::MH_NO_REEXPORTED_DYLIBS; 68 69 if (config->outputType == MachO::MH_EXECUTE && config->isPic) 70 hdr->flags |= MachO::MH_PIE; 71 72 if (in.exports->hasWeakSymbol || in.weakBinding->hasNonWeakDefinition()) 73 hdr->flags |= MachO::MH_WEAK_DEFINES; 74 75 if (in.exports->hasWeakSymbol || in.weakBinding->hasEntry()) 76 hdr->flags |= MachO::MH_BINDS_TO_WEAK; 77 78 for (OutputSegment *seg : outputSegments) { 79 for (OutputSection *osec : seg->getSections()) { 80 if (isThreadLocalVariables(osec->flags)) { 81 hdr->flags |= MachO::MH_HAS_TLV_DESCRIPTORS; 82 break; 83 } 84 } 85 } 86 87 uint8_t *p = reinterpret_cast<uint8_t *>(hdr + 1); 88 for (LoadCommand *lc : loadCommands) { 89 lc->writeTo(p); 90 p += lc->getSize(); 91 } 92 } 93 94 PageZeroSection::PageZeroSection() 95 : SyntheticSection(segment_names::pageZero, section_names::pageZero) {} 96 97 uint64_t Location::getVA() const { 98 if (const auto *isec = section.dyn_cast<const InputSection *>()) 99 return isec->getVA() + offset; 100 return section.get<const OutputSection *>()->addr + offset; 101 } 102 103 RebaseSection::RebaseSection() 104 : LinkEditSection(segment_names::linkEdit, section_names::rebase) {} 105 106 namespace { 107 struct Rebase { 108 OutputSegment *segment = nullptr; 109 uint64_t offset = 0; 110 uint64_t consecutiveCount = 0; 111 }; 112 } // namespace 113 114 // Rebase opcodes allow us to describe a contiguous sequence of rebase location 115 // using a single DO_REBASE opcode. To take advantage of it, we delay emitting 116 // `DO_REBASE` until we have reached the end of a contiguous sequence. 117 static void encodeDoRebase(Rebase &rebase, raw_svector_ostream &os) { 118 using namespace llvm::MachO; 119 assert(rebase.consecutiveCount != 0); 120 if (rebase.consecutiveCount <= REBASE_IMMEDIATE_MASK) { 121 os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_IMM_TIMES | 122 rebase.consecutiveCount); 123 } else { 124 os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_ULEB_TIMES); 125 encodeULEB128(rebase.consecutiveCount, os); 126 } 127 rebase.consecutiveCount = 0; 128 } 129 130 static void encodeRebase(const OutputSection *osec, uint64_t outSecOff, 131 Rebase &lastRebase, raw_svector_ostream &os) { 132 using namespace llvm::MachO; 133 OutputSegment *seg = osec->parent; 134 uint64_t offset = osec->getSegmentOffset() + outSecOff; 135 if (lastRebase.segment != seg || lastRebase.offset != offset) { 136 if (lastRebase.consecutiveCount != 0) 137 encodeDoRebase(lastRebase, os); 138 139 if (lastRebase.segment != seg) { 140 os << static_cast<uint8_t>(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 141 seg->index); 142 encodeULEB128(offset, os); 143 lastRebase.segment = seg; 144 lastRebase.offset = offset; 145 } else { 146 assert(lastRebase.offset != offset); 147 os << static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_ULEB); 148 encodeULEB128(offset - lastRebase.offset, os); 149 lastRebase.offset = offset; 150 } 151 } 152 ++lastRebase.consecutiveCount; 153 // DO_REBASE causes dyld to both perform the binding and increment the offset 154 lastRebase.offset += WordSize; 155 } 156 157 void RebaseSection::finalizeContents() { 158 using namespace llvm::MachO; 159 if (locations.empty()) 160 return; 161 162 raw_svector_ostream os{contents}; 163 Rebase lastRebase; 164 165 os << static_cast<uint8_t>(REBASE_OPCODE_SET_TYPE_IMM | REBASE_TYPE_POINTER); 166 167 llvm::sort(locations, [](const Location &a, const Location &b) { 168 return a.getVA() < b.getVA(); 169 }); 170 for (const Location &loc : locations) { 171 if (const auto *isec = loc.section.dyn_cast<const InputSection *>()) { 172 encodeRebase(isec->parent, isec->outSecOff + loc.offset, lastRebase, os); 173 } else { 174 const auto *osec = loc.section.get<const OutputSection *>(); 175 encodeRebase(osec, loc.offset, lastRebase, os); 176 } 177 } 178 if (lastRebase.consecutiveCount != 0) 179 encodeDoRebase(lastRebase, os); 180 181 os << static_cast<uint8_t>(REBASE_OPCODE_DONE); 182 } 183 184 void RebaseSection::writeTo(uint8_t *buf) const { 185 memcpy(buf, contents.data(), contents.size()); 186 } 187 188 NonLazyPointerSectionBase::NonLazyPointerSectionBase(const char *segname, 189 const char *name) 190 : SyntheticSection(segname, name) { 191 align = 8; 192 flags = MachO::S_NON_LAZY_SYMBOL_POINTERS; 193 } 194 195 void NonLazyPointerSectionBase::addEntry(Symbol *sym) { 196 if (entries.insert(sym)) { 197 assert(!sym->isInGot()); 198 sym->gotIndex = entries.size() - 1; 199 200 addNonLazyBindingEntries(sym, this, sym->gotIndex * WordSize); 201 } 202 } 203 204 void NonLazyPointerSectionBase::writeTo(uint8_t *buf) const { 205 for (size_t i = 0, n = entries.size(); i < n; ++i) 206 if (auto *defined = dyn_cast<Defined>(entries[i])) 207 write64le(&buf[i * WordSize], defined->getVA()); 208 } 209 210 BindingSection::BindingSection() 211 : LinkEditSection(segment_names::linkEdit, section_names::binding) {} 212 213 namespace { 214 struct Binding { 215 OutputSegment *segment = nullptr; 216 uint64_t offset = 0; 217 int64_t addend = 0; 218 uint8_t ordinal = 0; 219 }; 220 } // namespace 221 222 // Encode a sequence of opcodes that tell dyld to write the address of symbol + 223 // addend at osec->addr + outSecOff. 224 // 225 // The bind opcode "interpreter" remembers the values of each binding field, so 226 // we only need to encode the differences between bindings. Hence the use of 227 // lastBinding. 228 static void encodeBinding(const Symbol *sym, const OutputSection *osec, 229 uint64_t outSecOff, int64_t addend, 230 bool isWeakBinding, Binding &lastBinding, 231 raw_svector_ostream &os) { 232 using namespace llvm::MachO; 233 OutputSegment *seg = osec->parent; 234 uint64_t offset = osec->getSegmentOffset() + outSecOff; 235 if (lastBinding.segment != seg) { 236 os << static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 237 seg->index); 238 encodeULEB128(offset, os); 239 lastBinding.segment = seg; 240 lastBinding.offset = offset; 241 } else if (lastBinding.offset != offset) { 242 os << static_cast<uint8_t>(BIND_OPCODE_ADD_ADDR_ULEB); 243 encodeULEB128(offset - lastBinding.offset, os); 244 lastBinding.offset = offset; 245 } 246 247 if (lastBinding.addend != addend) { 248 os << static_cast<uint8_t>(BIND_OPCODE_SET_ADDEND_SLEB); 249 encodeSLEB128(addend, os); 250 lastBinding.addend = addend; 251 } 252 253 uint8_t flags = BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM; 254 if (!isWeakBinding && sym->isWeakRef()) 255 flags |= BIND_SYMBOL_FLAGS_WEAK_IMPORT; 256 257 os << flags << sym->getName() << '\0' 258 << static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER) 259 << static_cast<uint8_t>(BIND_OPCODE_DO_BIND); 260 // DO_BIND causes dyld to both perform the binding and increment the offset 261 lastBinding.offset += WordSize; 262 } 263 264 // Non-weak bindings need to have their dylib ordinal encoded as well. 265 static void encodeDylibOrdinal(const DylibSymbol *dysym, Binding &lastBinding, 266 raw_svector_ostream &os) { 267 using namespace llvm::MachO; 268 if (lastBinding.ordinal != dysym->file->ordinal) { 269 if (dysym->file->ordinal <= BIND_IMMEDIATE_MASK) { 270 os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 271 dysym->file->ordinal); 272 } else { 273 os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB); 274 encodeULEB128(dysym->file->ordinal, os); 275 } 276 lastBinding.ordinal = dysym->file->ordinal; 277 } 278 } 279 280 static void encodeWeakOverride(const Defined *defined, 281 raw_svector_ostream &os) { 282 using namespace llvm::MachO; 283 os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 284 BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION) 285 << defined->getName() << '\0'; 286 } 287 288 // Emit bind opcodes, which are a stream of byte-sized opcodes that dyld 289 // interprets to update a record with the following fields: 290 // * segment index (of the segment to write the symbol addresses to, typically 291 // the __DATA_CONST segment which contains the GOT) 292 // * offset within the segment, indicating the next location to write a binding 293 // * symbol type 294 // * symbol library ordinal (the index of its library's LC_LOAD_DYLIB command) 295 // * symbol name 296 // * addend 297 // When dyld sees BIND_OPCODE_DO_BIND, it uses the current record state to bind 298 // a symbol in the GOT, and increments the segment offset to point to the next 299 // entry. It does *not* clear the record state after doing the bind, so 300 // subsequent opcodes only need to encode the differences between bindings. 301 void BindingSection::finalizeContents() { 302 raw_svector_ostream os{contents}; 303 Binding lastBinding; 304 305 // Since bindings are delta-encoded, sorting them allows for a more compact 306 // result. Note that sorting by address alone ensures that bindings for the 307 // same segment / section are located together. 308 llvm::sort(bindings, [](const BindingEntry &a, const BindingEntry &b) { 309 return a.target.getVA() < b.target.getVA(); 310 }); 311 for (const BindingEntry &b : bindings) { 312 encodeDylibOrdinal(b.dysym, lastBinding, os); 313 if (auto *isec = b.target.section.dyn_cast<const InputSection *>()) { 314 encodeBinding(b.dysym, isec->parent, isec->outSecOff + b.target.offset, 315 b.addend, /*isWeakBinding=*/false, lastBinding, os); 316 } else { 317 auto *osec = b.target.section.get<const OutputSection *>(); 318 encodeBinding(b.dysym, osec, b.target.offset, b.addend, 319 /*isWeakBinding=*/false, lastBinding, os); 320 } 321 } 322 if (!bindings.empty()) 323 os << static_cast<uint8_t>(MachO::BIND_OPCODE_DONE); 324 } 325 326 void BindingSection::writeTo(uint8_t *buf) const { 327 memcpy(buf, contents.data(), contents.size()); 328 } 329 330 WeakBindingSection::WeakBindingSection() 331 : LinkEditSection(segment_names::linkEdit, section_names::weakBinding) {} 332 333 void WeakBindingSection::finalizeContents() { 334 raw_svector_ostream os{contents}; 335 Binding lastBinding; 336 337 for (const Defined *defined : definitions) 338 encodeWeakOverride(defined, os); 339 340 // Since bindings are delta-encoded, sorting them allows for a more compact 341 // result. 342 llvm::sort(bindings, 343 [](const WeakBindingEntry &a, const WeakBindingEntry &b) { 344 return a.target.getVA() < b.target.getVA(); 345 }); 346 for (const WeakBindingEntry &b : bindings) { 347 if (auto *isec = b.target.section.dyn_cast<const InputSection *>()) { 348 encodeBinding(b.symbol, isec->parent, isec->outSecOff + b.target.offset, 349 b.addend, /*isWeakBinding=*/true, lastBinding, os); 350 } else { 351 auto *osec = b.target.section.get<const OutputSection *>(); 352 encodeBinding(b.symbol, osec, b.target.offset, b.addend, 353 /*isWeakBinding=*/true, lastBinding, os); 354 } 355 } 356 if (!bindings.empty() || !definitions.empty()) 357 os << static_cast<uint8_t>(MachO::BIND_OPCODE_DONE); 358 } 359 360 void WeakBindingSection::writeTo(uint8_t *buf) const { 361 memcpy(buf, contents.data(), contents.size()); 362 } 363 364 bool macho::needsBinding(const Symbol *sym) { 365 if (isa<DylibSymbol>(sym)) 366 return true; 367 if (const auto *defined = dyn_cast<Defined>(sym)) 368 return defined->isExternalWeakDef(); 369 return false; 370 } 371 372 void macho::addNonLazyBindingEntries(const Symbol *sym, 373 SectionPointerUnion section, 374 uint64_t offset, int64_t addend) { 375 if (auto *dysym = dyn_cast<DylibSymbol>(sym)) { 376 in.binding->addEntry(dysym, section, offset, addend); 377 if (dysym->isWeakDef()) 378 in.weakBinding->addEntry(sym, section, offset, addend); 379 } else if (auto *defined = dyn_cast<Defined>(sym)) { 380 in.rebase->addEntry(section, offset); 381 if (defined->isExternalWeakDef()) 382 in.weakBinding->addEntry(sym, section, offset, addend); 383 } else if (isa<DSOHandle>(sym)) { 384 error("cannot bind to " + DSOHandle::name); 385 } else { 386 // Undefined symbols are filtered out in scanRelocations(); we should never 387 // get here 388 llvm_unreachable("cannot bind to an undefined symbol"); 389 } 390 } 391 392 StubsSection::StubsSection() 393 : SyntheticSection(segment_names::text, "__stubs") { 394 flags = MachO::S_SYMBOL_STUBS; 395 reserved2 = target->stubSize; 396 } 397 398 uint64_t StubsSection::getSize() const { 399 return entries.size() * target->stubSize; 400 } 401 402 void StubsSection::writeTo(uint8_t *buf) const { 403 size_t off = 0; 404 for (const Symbol *sym : entries) { 405 target->writeStub(buf + off, *sym); 406 off += target->stubSize; 407 } 408 } 409 410 bool StubsSection::addEntry(Symbol *sym) { 411 bool inserted = entries.insert(sym); 412 if (inserted) 413 sym->stubsIndex = entries.size() - 1; 414 return inserted; 415 } 416 417 StubHelperSection::StubHelperSection() 418 : SyntheticSection(segment_names::text, "__stub_helper") {} 419 420 uint64_t StubHelperSection::getSize() const { 421 return target->stubHelperHeaderSize + 422 in.lazyBinding->getEntries().size() * target->stubHelperEntrySize; 423 } 424 425 bool StubHelperSection::isNeeded() const { return in.lazyBinding->isNeeded(); } 426 427 void StubHelperSection::writeTo(uint8_t *buf) const { 428 target->writeStubHelperHeader(buf); 429 size_t off = target->stubHelperHeaderSize; 430 for (const DylibSymbol *sym : in.lazyBinding->getEntries()) { 431 target->writeStubHelperEntry(buf + off, *sym, addr + off); 432 off += target->stubHelperEntrySize; 433 } 434 } 435 436 void StubHelperSection::setup() { 437 stubBinder = dyn_cast_or_null<DylibSymbol>(symtab->find("dyld_stub_binder")); 438 if (stubBinder == nullptr) { 439 error("symbol dyld_stub_binder not found (normally in libSystem.dylib). " 440 "Needed to perform lazy binding."); 441 return; 442 } 443 stubBinder->refState = RefState::Strong; 444 in.got->addEntry(stubBinder); 445 446 inputSections.push_back(in.imageLoaderCache); 447 dyldPrivate = 448 make<Defined>("__dyld_private", in.imageLoaderCache, 0, 449 /*isWeakDef=*/false, 450 /*isExternal=*/false, /*isPrivateExtern=*/false); 451 } 452 453 ImageLoaderCacheSection::ImageLoaderCacheSection() { 454 segname = segment_names::data; 455 name = "__data"; 456 uint8_t *arr = bAlloc.Allocate<uint8_t>(WordSize); 457 memset(arr, 0, WordSize); 458 data = {arr, WordSize}; 459 } 460 461 LazyPointerSection::LazyPointerSection() 462 : SyntheticSection(segment_names::data, "__la_symbol_ptr") { 463 align = 8; 464 flags = MachO::S_LAZY_SYMBOL_POINTERS; 465 } 466 467 uint64_t LazyPointerSection::getSize() const { 468 return in.stubs->getEntries().size() * WordSize; 469 } 470 471 bool LazyPointerSection::isNeeded() const { 472 return !in.stubs->getEntries().empty(); 473 } 474 475 void LazyPointerSection::writeTo(uint8_t *buf) const { 476 size_t off = 0; 477 for (const Symbol *sym : in.stubs->getEntries()) { 478 if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) { 479 if (dysym->hasStubsHelper()) { 480 uint64_t stubHelperOffset = 481 target->stubHelperHeaderSize + 482 dysym->stubsHelperIndex * target->stubHelperEntrySize; 483 write64le(buf + off, in.stubHelper->addr + stubHelperOffset); 484 } 485 } else { 486 write64le(buf + off, sym->getVA()); 487 } 488 off += WordSize; 489 } 490 } 491 492 LazyBindingSection::LazyBindingSection() 493 : LinkEditSection(segment_names::linkEdit, section_names::lazyBinding) {} 494 495 void LazyBindingSection::finalizeContents() { 496 // TODO: Just precompute output size here instead of writing to a temporary 497 // buffer 498 for (DylibSymbol *sym : entries) 499 sym->lazyBindOffset = encode(*sym); 500 } 501 502 void LazyBindingSection::writeTo(uint8_t *buf) const { 503 memcpy(buf, contents.data(), contents.size()); 504 } 505 506 void LazyBindingSection::addEntry(DylibSymbol *dysym) { 507 if (entries.insert(dysym)) { 508 dysym->stubsHelperIndex = entries.size() - 1; 509 in.rebase->addEntry(in.lazyPointers, dysym->stubsIndex * WordSize); 510 } 511 } 512 513 // Unlike the non-lazy binding section, the bind opcodes in this section aren't 514 // interpreted all at once. Rather, dyld will start interpreting opcodes at a 515 // given offset, typically only binding a single symbol before it finds a 516 // BIND_OPCODE_DONE terminator. As such, unlike in the non-lazy-binding case, 517 // we cannot encode just the differences between symbols; we have to emit the 518 // complete bind information for each symbol. 519 uint32_t LazyBindingSection::encode(const DylibSymbol &sym) { 520 uint32_t opstreamOffset = contents.size(); 521 OutputSegment *dataSeg = in.lazyPointers->parent; 522 os << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 523 dataSeg->index); 524 uint64_t offset = in.lazyPointers->addr - dataSeg->firstSection()->addr + 525 sym.stubsIndex * WordSize; 526 encodeULEB128(offset, os); 527 if (sym.file->ordinal <= MachO::BIND_IMMEDIATE_MASK) { 528 os << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 529 sym.file->ordinal); 530 } else { 531 os << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB); 532 encodeULEB128(sym.file->ordinal, os); 533 } 534 535 uint8_t flags = MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM; 536 if (sym.isWeakRef()) 537 flags |= MachO::BIND_SYMBOL_FLAGS_WEAK_IMPORT; 538 539 os << flags << sym.getName() << '\0' 540 << static_cast<uint8_t>(MachO::BIND_OPCODE_DO_BIND) 541 << static_cast<uint8_t>(MachO::BIND_OPCODE_DONE); 542 return opstreamOffset; 543 } 544 545 void macho::prepareBranchTarget(Symbol *sym) { 546 if (auto *dysym = dyn_cast<DylibSymbol>(sym)) { 547 if (in.stubs->addEntry(dysym)) { 548 if (sym->isWeakDef()) { 549 in.binding->addEntry(dysym, in.lazyPointers, 550 sym->stubsIndex * WordSize); 551 in.weakBinding->addEntry(sym, in.lazyPointers, 552 sym->stubsIndex * WordSize); 553 } else { 554 in.lazyBinding->addEntry(dysym); 555 } 556 } 557 } else if (auto *defined = dyn_cast<Defined>(sym)) { 558 if (defined->isExternalWeakDef()) { 559 if (in.stubs->addEntry(sym)) { 560 in.rebase->addEntry(in.lazyPointers, sym->stubsIndex * WordSize); 561 in.weakBinding->addEntry(sym, in.lazyPointers, 562 sym->stubsIndex * WordSize); 563 } 564 } 565 } 566 } 567 568 ExportSection::ExportSection() 569 : LinkEditSection(segment_names::linkEdit, section_names::export_) {} 570 571 void ExportSection::finalizeContents() { 572 trieBuilder.setImageBase(in.header->addr); 573 for (const Symbol *sym : symtab->getSymbols()) { 574 if (const auto *defined = dyn_cast<Defined>(sym)) { 575 if (defined->privateExtern) 576 continue; 577 trieBuilder.addSymbol(*defined); 578 hasWeakSymbol = hasWeakSymbol || sym->isWeakDef(); 579 } 580 } 581 size = trieBuilder.build(); 582 } 583 584 void ExportSection::writeTo(uint8_t *buf) const { trieBuilder.writeTo(buf); } 585 586 SymtabSection::SymtabSection(StringTableSection &stringTableSection) 587 : LinkEditSection(segment_names::linkEdit, section_names::symbolTable), 588 stringTableSection(stringTableSection) {} 589 590 uint64_t SymtabSection::getRawSize() const { 591 return getNumSymbols() * sizeof(structs::nlist_64); 592 } 593 594 void SymtabSection::emitBeginSourceStab(DWARFUnit *compileUnit) { 595 StabsEntry stab(MachO::N_SO); 596 SmallString<261> dir(compileUnit->getCompilationDir()); 597 StringRef sep = sys::path::get_separator(); 598 // We don't use `path::append` here because we want an empty `dir` to result 599 // in an absolute path. `append` would give us a relative path for that case. 600 if (!dir.endswith(sep)) 601 dir += sep; 602 stab.strx = stringTableSection.addString( 603 saver.save(dir + compileUnit->getUnitDIE().getShortName())); 604 stabs.emplace_back(std::move(stab)); 605 } 606 607 void SymtabSection::emitEndSourceStab() { 608 StabsEntry stab(MachO::N_SO); 609 stab.sect = 1; 610 stabs.emplace_back(std::move(stab)); 611 } 612 613 void SymtabSection::emitObjectFileStab(ObjFile *file) { 614 StabsEntry stab(MachO::N_OSO); 615 stab.sect = target->cpuSubtype; 616 SmallString<261> path(!file->archiveName.empty() ? file->archiveName 617 : file->getName()); 618 std::error_code ec = sys::fs::make_absolute(path); 619 if (ec) 620 fatal("failed to get absolute path for " + path); 621 622 if (!file->archiveName.empty()) 623 path.append({"(", file->getName(), ")"}); 624 625 stab.strx = stringTableSection.addString(saver.save(path.str())); 626 stab.desc = 1; 627 stab.value = file->modTime; 628 stabs.emplace_back(std::move(stab)); 629 } 630 631 void SymtabSection::emitEndFunStab(Defined *defined) { 632 StabsEntry stab(MachO::N_FUN); 633 // FIXME this should be the size of the symbol. Using the section size in 634 // lieu is only correct if .subsections_via_symbols is set. 635 stab.value = defined->isec->getSize(); 636 stabs.emplace_back(std::move(stab)); 637 } 638 639 void SymtabSection::emitStabs() { 640 std::vector<Defined *> symbolsNeedingStabs; 641 for (const SymtabEntry &entry : 642 concat<SymtabEntry>(localSymbols, externalSymbols)) { 643 Symbol *sym = entry.sym; 644 if (auto *defined = dyn_cast<Defined>(sym)) { 645 if (defined->isAbsolute()) 646 continue; 647 InputSection *isec = defined->isec; 648 ObjFile *file = dyn_cast_or_null<ObjFile>(isec->file); 649 if (!file || !file->compileUnit) 650 continue; 651 symbolsNeedingStabs.push_back(defined); 652 } 653 } 654 655 llvm::stable_sort(symbolsNeedingStabs, [&](Defined *a, Defined *b) { 656 return a->isec->file->id < b->isec->file->id; 657 }); 658 659 // Emit STABS symbols so that dsymutil and/or the debugger can map address 660 // regions in the final binary to the source and object files from which they 661 // originated. 662 InputFile *lastFile = nullptr; 663 for (Defined *defined : symbolsNeedingStabs) { 664 InputSection *isec = defined->isec; 665 ObjFile *file = dyn_cast<ObjFile>(isec->file); 666 assert(file); 667 668 if (lastFile == nullptr || lastFile != file) { 669 if (lastFile != nullptr) 670 emitEndSourceStab(); 671 lastFile = file; 672 673 emitBeginSourceStab(file->compileUnit); 674 emitObjectFileStab(file); 675 } 676 677 StabsEntry symStab; 678 symStab.sect = defined->isec->parent->index; 679 symStab.strx = stringTableSection.addString(defined->getName()); 680 symStab.value = defined->getVA(); 681 682 if (isCodeSection(isec)) { 683 symStab.type = MachO::N_FUN; 684 stabs.emplace_back(std::move(symStab)); 685 emitEndFunStab(defined); 686 } else { 687 symStab.type = defined->isExternal() ? MachO::N_GSYM : MachO::N_STSYM; 688 stabs.emplace_back(std::move(symStab)); 689 } 690 } 691 692 if (!stabs.empty()) 693 emitEndSourceStab(); 694 } 695 696 void SymtabSection::finalizeContents() { 697 auto addSymbol = [&](std::vector<SymtabEntry> &symbols, Symbol *sym) { 698 uint32_t strx = stringTableSection.addString(sym->getName()); 699 symbols.push_back({sym, strx}); 700 }; 701 702 // Local symbols aren't in the SymbolTable, so we walk the list of object 703 // files to gather them. 704 for (InputFile *file : inputFiles) { 705 if (auto *objFile = dyn_cast<ObjFile>(file)) { 706 for (Symbol *sym : objFile->symbols) { 707 // TODO: when we implement -dead_strip, we should filter out symbols 708 // that belong to dead sections. 709 if (auto *defined = dyn_cast<Defined>(sym)) { 710 if (!defined->isExternal()) 711 addSymbol(localSymbols, sym); 712 } 713 } 714 } 715 } 716 717 // __dyld_private is a local symbol too. It's linker-created and doesn't 718 // exist in any object file. 719 if (Defined* dyldPrivate = in.stubHelper->dyldPrivate) 720 addSymbol(localSymbols, dyldPrivate); 721 722 for (Symbol *sym : symtab->getSymbols()) { 723 if (auto *defined = dyn_cast<Defined>(sym)) { 724 assert(defined->isExternal()); 725 (void)defined; 726 addSymbol(externalSymbols, sym); 727 } else if (auto *dysym = dyn_cast<DylibSymbol>(sym)) { 728 if (dysym->isReferenced()) 729 addSymbol(undefinedSymbols, sym); 730 } 731 } 732 733 emitStabs(); 734 uint32_t symtabIndex = stabs.size(); 735 for (const SymtabEntry &entry : 736 concat<SymtabEntry>(localSymbols, externalSymbols, undefinedSymbols)) { 737 entry.sym->symtabIndex = symtabIndex++; 738 } 739 } 740 741 uint32_t SymtabSection::getNumSymbols() const { 742 return stabs.size() + localSymbols.size() + externalSymbols.size() + 743 undefinedSymbols.size(); 744 } 745 746 void SymtabSection::writeTo(uint8_t *buf) const { 747 auto *nList = reinterpret_cast<structs::nlist_64 *>(buf); 748 // Emit the stabs entries before the "real" symbols. We cannot emit them 749 // after as that would render Symbol::symtabIndex inaccurate. 750 for (const StabsEntry &entry : stabs) { 751 nList->n_strx = entry.strx; 752 nList->n_type = entry.type; 753 nList->n_sect = entry.sect; 754 nList->n_desc = entry.desc; 755 nList->n_value = entry.value; 756 ++nList; 757 } 758 759 for (const SymtabEntry &entry : concat<const SymtabEntry>( 760 localSymbols, externalSymbols, undefinedSymbols)) { 761 nList->n_strx = entry.strx; 762 // TODO populate n_desc with more flags 763 if (auto *defined = dyn_cast<Defined>(entry.sym)) { 764 uint8_t scope = 0; 765 if (defined->privateExtern) { 766 // Private external -- dylib scoped symbol. 767 // Promote to non-external at link time. 768 assert(defined->isExternal() && "invalid input file"); 769 scope = MachO::N_PEXT; 770 } else if (defined->isExternal()) { 771 // Normal global symbol. 772 scope = MachO::N_EXT; 773 } else { 774 // TU-local symbol from localSymbols. 775 scope = 0; 776 } 777 778 if (defined->isAbsolute()) { 779 nList->n_type = scope | MachO::N_ABS; 780 nList->n_sect = MachO::NO_SECT; 781 nList->n_value = defined->value; 782 } else { 783 nList->n_type = scope | MachO::N_SECT; 784 nList->n_sect = defined->isec->parent->index; 785 // For the N_SECT symbol type, n_value is the address of the symbol 786 nList->n_value = defined->getVA(); 787 } 788 nList->n_desc |= defined->isExternalWeakDef() ? MachO::N_WEAK_DEF : 0; 789 } else if (auto *dysym = dyn_cast<DylibSymbol>(entry.sym)) { 790 uint16_t n_desc = nList->n_desc; 791 MachO::SET_LIBRARY_ORDINAL(n_desc, dysym->file->ordinal); 792 nList->n_type = MachO::N_EXT; 793 n_desc |= dysym->isWeakRef() ? MachO::N_WEAK_REF : 0; 794 nList->n_desc = n_desc; 795 } 796 ++nList; 797 } 798 } 799 800 IndirectSymtabSection::IndirectSymtabSection() 801 : LinkEditSection(segment_names::linkEdit, 802 section_names::indirectSymbolTable) {} 803 804 uint32_t IndirectSymtabSection::getNumSymbols() const { 805 return in.got->getEntries().size() + in.tlvPointers->getEntries().size() + 806 in.stubs->getEntries().size(); 807 } 808 809 bool IndirectSymtabSection::isNeeded() const { 810 return in.got->isNeeded() || in.tlvPointers->isNeeded() || 811 in.stubs->isNeeded(); 812 } 813 814 void IndirectSymtabSection::finalizeContents() { 815 uint32_t off = 0; 816 in.got->reserved1 = off; 817 off += in.got->getEntries().size(); 818 in.tlvPointers->reserved1 = off; 819 off += in.tlvPointers->getEntries().size(); 820 // There is a 1:1 correspondence between stubs and LazyPointerSection 821 // entries, so they can share the same sub-array in the table. 822 in.stubs->reserved1 = in.lazyPointers->reserved1 = off; 823 } 824 825 void IndirectSymtabSection::writeTo(uint8_t *buf) const { 826 uint32_t off = 0; 827 for (const Symbol *sym : in.got->getEntries()) { 828 write32le(buf + off * sizeof(uint32_t), sym->symtabIndex); 829 ++off; 830 } 831 for (const Symbol *sym : in.tlvPointers->getEntries()) { 832 write32le(buf + off * sizeof(uint32_t), sym->symtabIndex); 833 ++off; 834 } 835 for (const Symbol *sym : in.stubs->getEntries()) { 836 write32le(buf + off * sizeof(uint32_t), sym->symtabIndex); 837 ++off; 838 } 839 } 840 841 StringTableSection::StringTableSection() 842 : LinkEditSection(segment_names::linkEdit, section_names::stringTable) {} 843 844 uint32_t StringTableSection::addString(StringRef str) { 845 uint32_t strx = size; 846 strings.push_back(str); // TODO: consider deduplicating strings 847 size += str.size() + 1; // account for null terminator 848 return strx; 849 } 850 851 void StringTableSection::writeTo(uint8_t *buf) const { 852 uint32_t off = 0; 853 for (StringRef str : strings) { 854 memcpy(buf + off, str.data(), str.size()); 855 off += str.size() + 1; // account for null terminator 856 } 857 } 858