1 //===- SyntheticSections.h -------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_MACHO_SYNTHETIC_SECTIONS_H 10 #define LLD_MACHO_SYNTHETIC_SECTIONS_H 11 12 #include "Config.h" 13 #include "ExportTrie.h" 14 #include "InputSection.h" 15 #include "OutputSection.h" 16 #include "OutputSegment.h" 17 #include "Target.h" 18 #include "Writer.h" 19 20 #include "llvm/ADT/DenseMap.h" 21 #include "llvm/ADT/Hashing.h" 22 #include "llvm/ADT/MapVector.h" 23 #include "llvm/ADT/SetVector.h" 24 #include "llvm/BinaryFormat/MachO.h" 25 #include "llvm/Support/MathExtras.h" 26 #include "llvm/Support/raw_ostream.h" 27 28 #include <unordered_map> 29 30 namespace llvm { 31 class DWARFUnit; 32 } // namespace llvm 33 34 namespace lld::macho { 35 36 class Defined; 37 class DylibSymbol; 38 class LoadCommand; 39 class ObjFile; 40 class UnwindInfoSection; 41 42 class SyntheticSection : public OutputSection { 43 public: 44 SyntheticSection(const char *segname, const char *name); 45 virtual ~SyntheticSection() = default; 46 classof(const OutputSection * sec)47 static bool classof(const OutputSection *sec) { 48 return sec->kind() == SyntheticKind; 49 } 50 51 StringRef segname; 52 // This fake InputSection makes it easier for us to write code that applies 53 // generically to both user inputs and synthetics. 54 InputSection *isec; 55 }; 56 57 // All sections in __LINKEDIT should inherit from this. 58 class LinkEditSection : public SyntheticSection { 59 public: LinkEditSection(const char * segname,const char * name)60 LinkEditSection(const char *segname, const char *name) 61 : SyntheticSection(segname, name) { 62 align = target->wordSize; 63 } 64 65 // Implementations of this method can assume that the regular (non-__LINKEDIT) 66 // sections already have their addresses assigned. finalizeContents()67 virtual void finalizeContents() {} 68 69 // Sections in __LINKEDIT are special: their offsets are recorded in the 70 // load commands like LC_DYLD_INFO_ONLY and LC_SYMTAB, instead of in section 71 // headers. isHidden()72 bool isHidden() const final { return true; } 73 74 virtual uint64_t getRawSize() const = 0; 75 76 // codesign (or more specifically libstuff) checks that each section in 77 // __LINKEDIT ends where the next one starts -- no gaps are permitted. We 78 // therefore align every section's start and end points to WordSize. 79 // 80 // NOTE: This assumes that the extra bytes required for alignment can be 81 // zero-valued bytes. getSize()82 uint64_t getSize() const final { return llvm::alignTo(getRawSize(), align); } 83 }; 84 85 // The header of the Mach-O file, which must have a file offset of zero. 86 class MachHeaderSection final : public SyntheticSection { 87 public: 88 MachHeaderSection(); isHidden()89 bool isHidden() const override { return true; } 90 uint64_t getSize() const override; 91 void writeTo(uint8_t *buf) const override; 92 93 void addLoadCommand(LoadCommand *); 94 95 protected: 96 std::vector<LoadCommand *> loadCommands; 97 uint32_t sizeOfCmds = 0; 98 }; 99 100 // A hidden section that exists solely for the purpose of creating the 101 // __PAGEZERO segment, which is used to catch null pointer dereferences. 102 class PageZeroSection final : public SyntheticSection { 103 public: 104 PageZeroSection(); isHidden()105 bool isHidden() const override { return true; } isNeeded()106 bool isNeeded() const override { return target->pageZeroSize != 0; } getSize()107 uint64_t getSize() const override { return target->pageZeroSize; } getFileSize()108 uint64_t getFileSize() const override { return 0; } writeTo(uint8_t * buf)109 void writeTo(uint8_t *buf) const override {} 110 }; 111 112 // This is the base class for the GOT and TLVPointer sections, which are nearly 113 // functionally identical -- they will both be populated by dyld with addresses 114 // to non-lazily-loaded dylib symbols. The main difference is that the 115 // TLVPointerSection stores references to thread-local variables. 116 class NonLazyPointerSectionBase : public SyntheticSection { 117 public: 118 NonLazyPointerSectionBase(const char *segname, const char *name); getEntries()119 const llvm::SetVector<const Symbol *> &getEntries() const { return entries; } isNeeded()120 bool isNeeded() const override { return !entries.empty(); } getSize()121 uint64_t getSize() const override { 122 return entries.size() * target->wordSize; 123 } 124 void writeTo(uint8_t *buf) const override; 125 void addEntry(Symbol *sym); getVA(uint32_t gotIndex)126 uint64_t getVA(uint32_t gotIndex) const { 127 return addr + gotIndex * target->wordSize; 128 } 129 130 private: 131 llvm::SetVector<const Symbol *> entries; 132 }; 133 134 class GotSection final : public NonLazyPointerSectionBase { 135 public: 136 GotSection(); 137 }; 138 139 class TlvPointerSection final : public NonLazyPointerSectionBase { 140 public: 141 TlvPointerSection(); 142 }; 143 144 struct Location { 145 const InputSection *isec; 146 uint64_t offset; 147 LocationLocation148 Location(const InputSection *isec, uint64_t offset) 149 : isec(isec), offset(offset) {} getVALocation150 uint64_t getVA() const { return isec->getVA(offset); } 151 }; 152 153 // Stores rebase opcodes, which tell dyld where absolute addresses have been 154 // encoded in the binary. If the binary is not loaded at its preferred address, 155 // dyld has to rebase these addresses by adding an offset to them. 156 class RebaseSection final : public LinkEditSection { 157 public: 158 RebaseSection(); 159 void finalizeContents() override; getRawSize()160 uint64_t getRawSize() const override { return contents.size(); } isNeeded()161 bool isNeeded() const override { return !locations.empty(); } 162 void writeTo(uint8_t *buf) const override; 163 addEntry(const InputSection * isec,uint64_t offset)164 void addEntry(const InputSection *isec, uint64_t offset) { 165 if (config->isPic) 166 locations.emplace_back(isec, offset); 167 } 168 169 private: 170 std::vector<Location> locations; 171 SmallVector<char, 128> contents; 172 }; 173 174 struct BindingEntry { 175 int64_t addend; 176 Location target; BindingEntryBindingEntry177 BindingEntry(int64_t addend, Location target) 178 : addend(addend), target(target) {} 179 }; 180 181 template <class Sym> 182 using BindingsMap = llvm::DenseMap<Sym, std::vector<BindingEntry>>; 183 184 // Stores bind opcodes for telling dyld which symbols to load non-lazily. 185 class BindingSection final : public LinkEditSection { 186 public: 187 BindingSection(); 188 void finalizeContents() override; getRawSize()189 uint64_t getRawSize() const override { return contents.size(); } isNeeded()190 bool isNeeded() const override { return !bindingsMap.empty(); } 191 void writeTo(uint8_t *buf) const override; 192 193 void addEntry(const Symbol *dysym, const InputSection *isec, uint64_t offset, 194 int64_t addend = 0) { 195 bindingsMap[dysym].emplace_back(addend, Location(isec, offset)); 196 } 197 198 private: 199 BindingsMap<const Symbol *> bindingsMap; 200 SmallVector<char, 128> contents; 201 }; 202 203 // Stores bind opcodes for telling dyld which weak symbols need coalescing. 204 // There are two types of entries in this section: 205 // 206 // 1) Non-weak definitions: This is a symbol definition that weak symbols in 207 // other dylibs should coalesce to. 208 // 209 // 2) Weak bindings: These tell dyld that a given symbol reference should 210 // coalesce to a non-weak definition if one is found. Note that unlike the 211 // entries in the BindingSection, the bindings here only refer to these 212 // symbols by name, but do not specify which dylib to load them from. 213 class WeakBindingSection final : public LinkEditSection { 214 public: 215 WeakBindingSection(); 216 void finalizeContents() override; getRawSize()217 uint64_t getRawSize() const override { return contents.size(); } isNeeded()218 bool isNeeded() const override { 219 return !bindingsMap.empty() || !definitions.empty(); 220 } 221 222 void writeTo(uint8_t *buf) const override; 223 224 void addEntry(const Symbol *symbol, const InputSection *isec, uint64_t offset, 225 int64_t addend = 0) { 226 bindingsMap[symbol].emplace_back(addend, Location(isec, offset)); 227 } 228 hasEntry()229 bool hasEntry() const { return !bindingsMap.empty(); } 230 addNonWeakDefinition(const Defined * defined)231 void addNonWeakDefinition(const Defined *defined) { 232 definitions.emplace_back(defined); 233 } 234 hasNonWeakDefinition()235 bool hasNonWeakDefinition() const { return !definitions.empty(); } 236 237 private: 238 BindingsMap<const Symbol *> bindingsMap; 239 std::vector<const Defined *> definitions; 240 SmallVector<char, 128> contents; 241 }; 242 243 // The following sections implement lazy symbol binding -- very similar to the 244 // PLT mechanism in ELF. 245 // 246 // ELF's .plt section is broken up into two sections in Mach-O: StubsSection 247 // and StubHelperSection. Calls to functions in dylibs will end up calling into 248 // StubsSection, which contains indirect jumps to addresses stored in the 249 // LazyPointerSection (the counterpart to ELF's .plt.got). 250 // 251 // We will first describe how non-weak symbols are handled. 252 // 253 // At program start, the LazyPointerSection contains addresses that point into 254 // one of the entry points in the middle of the StubHelperSection. The code in 255 // StubHelperSection will push on the stack an offset into the 256 // LazyBindingSection. The push is followed by a jump to the beginning of the 257 // StubHelperSection (similar to PLT0), which then calls into dyld_stub_binder. 258 // dyld_stub_binder is a non-lazily-bound symbol, so this call looks it up in 259 // the GOT. 260 // 261 // The stub binder will look up the bind opcodes in the LazyBindingSection at 262 // the given offset. The bind opcodes will tell the binder to update the 263 // address in the LazyPointerSection to point to the symbol, so that subsequent 264 // calls don't have to redo the symbol resolution. The binder will then jump to 265 // the resolved symbol. 266 // 267 // With weak symbols, the situation is slightly different. Since there is no 268 // "weak lazy" lookup, function calls to weak symbols are always non-lazily 269 // bound. We emit both regular non-lazy bindings as well as weak bindings, in 270 // order that the weak bindings may overwrite the non-lazy bindings if an 271 // appropriate symbol is found at runtime. However, the bound addresses will 272 // still be written (non-lazily) into the LazyPointerSection. 273 // 274 // Symbols are always bound eagerly when chained fixups are used. In that case, 275 // StubsSection contains indirect jumps to addresses stored in the GotSection. 276 // The GOT directly contains the fixup entries, which will be replaced by the 277 // address of the target symbols on load. LazyPointerSection and 278 // StubHelperSection are not used. 279 280 class StubsSection final : public SyntheticSection { 281 public: 282 StubsSection(); 283 uint64_t getSize() const override; isNeeded()284 bool isNeeded() const override { return !entries.empty(); } 285 void finalize() override; 286 void writeTo(uint8_t *buf) const override; getEntries()287 const llvm::SetVector<Symbol *> &getEntries() const { return entries; } 288 // Creates a stub for the symbol and the corresponding entry in the 289 // LazyPointerSection. 290 void addEntry(Symbol *); getVA(uint32_t stubsIndex)291 uint64_t getVA(uint32_t stubsIndex) const { 292 assert(isFinal || target->usesThunks()); 293 // ConcatOutputSection::finalize() can seek the address of a 294 // stub before its address is assigned. Before __stubs is 295 // finalized, return a contrived out-of-range address. 296 return isFinal ? addr + stubsIndex * target->stubSize 297 : TargetInfo::outOfRangeVA; 298 } 299 300 bool isFinal = false; // is address assigned? 301 302 private: 303 llvm::SetVector<Symbol *> entries; 304 }; 305 306 class StubHelperSection final : public SyntheticSection { 307 public: 308 StubHelperSection(); 309 uint64_t getSize() const override; 310 bool isNeeded() const override; 311 void writeTo(uint8_t *buf) const override; 312 313 void setUp(); 314 315 DylibSymbol *stubBinder = nullptr; 316 Defined *dyldPrivate = nullptr; 317 }; 318 319 class ObjCSelRefsHelper { 320 public: 321 static void initialize(); 322 static void cleanup(); 323 324 static ConcatInputSection *getSelRef(StringRef methname); 325 static ConcatInputSection *makeSelRef(StringRef methname); 326 327 private: 328 static llvm::DenseMap<llvm::CachedHashStringRef, ConcatInputSection *> 329 methnameToSelref; 330 }; 331 332 // Objective-C stubs are hoisted objc_msgSend calls per selector called in the 333 // program. Apple Clang produces undefined symbols to each stub, such as 334 // '_objc_msgSend$foo', which are then synthesized by the linker. The stubs 335 // load the particular selector 'foo' from __objc_selrefs, setting it to the 336 // first argument of the objc_msgSend call, and then jumps to objc_msgSend. The 337 // actual stub contents are mirrored from ld64. 338 class ObjCStubsSection final : public SyntheticSection { 339 public: 340 ObjCStubsSection(); 341 void addEntry(Symbol *sym); 342 uint64_t getSize() const override; isNeeded()343 bool isNeeded() const override { return !symbols.empty(); } finalize()344 void finalize() override { isec->isFinal = true; } 345 void writeTo(uint8_t *buf) const override; 346 void setUp(); 347 348 static constexpr llvm::StringLiteral symbolPrefix = "_objc_msgSend$"; 349 static bool isObjCStubSymbol(Symbol *sym); 350 static StringRef getMethname(Symbol *sym); 351 352 private: 353 std::vector<Defined *> symbols; 354 Symbol *objcMsgSend = nullptr; 355 }; 356 357 // Note that this section may also be targeted by non-lazy bindings. In 358 // particular, this happens when branch relocations target weak symbols. 359 class LazyPointerSection final : public SyntheticSection { 360 public: 361 LazyPointerSection(); 362 uint64_t getSize() const override; 363 bool isNeeded() const override; 364 void writeTo(uint8_t *buf) const override; getVA(uint32_t index)365 uint64_t getVA(uint32_t index) const { 366 return addr + (index << target->p2WordSize); 367 } 368 }; 369 370 class LazyBindingSection final : public LinkEditSection { 371 public: 372 LazyBindingSection(); 373 void finalizeContents() override; getRawSize()374 uint64_t getRawSize() const override { return contents.size(); } isNeeded()375 bool isNeeded() const override { return !entries.empty(); } 376 void writeTo(uint8_t *buf) const override; 377 // Note that every entry here will by referenced by a corresponding entry in 378 // the StubHelperSection. 379 void addEntry(Symbol *dysym); getEntries()380 const llvm::SetVector<Symbol *> &getEntries() const { return entries; } 381 382 private: 383 uint32_t encode(const Symbol &); 384 385 llvm::SetVector<Symbol *> entries; 386 SmallVector<char, 128> contents; 387 llvm::raw_svector_ostream os{contents}; 388 }; 389 390 // Stores a trie that describes the set of exported symbols. 391 class ExportSection final : public LinkEditSection { 392 public: 393 ExportSection(); 394 void finalizeContents() override; getRawSize()395 uint64_t getRawSize() const override { return size; } isNeeded()396 bool isNeeded() const override { return size; } 397 void writeTo(uint8_t *buf) const override; 398 399 bool hasWeakSymbol = false; 400 401 private: 402 TrieBuilder trieBuilder; 403 size_t size = 0; 404 }; 405 406 // Stores 'data in code' entries that describe the locations of data regions 407 // inside code sections. This is used by llvm-objdump to distinguish jump tables 408 // and stop them from being disassembled as instructions. 409 class DataInCodeSection final : public LinkEditSection { 410 public: 411 DataInCodeSection(); 412 void finalizeContents() override; getRawSize()413 uint64_t getRawSize() const override { 414 return sizeof(llvm::MachO::data_in_code_entry) * entries.size(); 415 } 416 void writeTo(uint8_t *buf) const override; 417 418 private: 419 std::vector<llvm::MachO::data_in_code_entry> entries; 420 }; 421 422 // Stores ULEB128 delta encoded addresses of functions. 423 class FunctionStartsSection final : public LinkEditSection { 424 public: 425 FunctionStartsSection(); 426 void finalizeContents() override; getRawSize()427 uint64_t getRawSize() const override { return contents.size(); } 428 void writeTo(uint8_t *buf) const override; 429 430 private: 431 SmallVector<char, 128> contents; 432 }; 433 434 // Stores the strings referenced by the symbol table. 435 class StringTableSection final : public LinkEditSection { 436 public: 437 StringTableSection(); 438 // Returns the start offset of the added string. 439 uint32_t addString(StringRef); getRawSize()440 uint64_t getRawSize() const override { return size; } 441 void writeTo(uint8_t *buf) const override; 442 443 static constexpr size_t emptyStringIndex = 1; 444 445 private: 446 // ld64 emits string tables which start with a space and a zero byte. We 447 // match its behavior here since some tools depend on it. 448 // Consequently, the empty string will be at index 1, not zero. 449 std::vector<StringRef> strings{" "}; 450 size_t size = 2; 451 }; 452 453 struct SymtabEntry { 454 Symbol *sym; 455 size_t strx; 456 }; 457 458 struct StabsEntry { 459 uint8_t type = 0; 460 uint32_t strx = StringTableSection::emptyStringIndex; 461 uint8_t sect = 0; 462 uint16_t desc = 0; 463 uint64_t value = 0; 464 465 StabsEntry() = default; StabsEntryStabsEntry466 explicit StabsEntry(uint8_t type) : type(type) {} 467 }; 468 469 // Symbols of the same type must be laid out contiguously: we choose to emit 470 // all local symbols first, then external symbols, and finally undefined 471 // symbols. For each symbol type, the LC_DYSYMTAB load command will record the 472 // range (start index and total number) of those symbols in the symbol table. 473 class SymtabSection : public LinkEditSection { 474 public: 475 void finalizeContents() override; 476 uint32_t getNumSymbols() const; getNumLocalSymbols()477 uint32_t getNumLocalSymbols() const { 478 return stabs.size() + localSymbols.size(); 479 } getNumExternalSymbols()480 uint32_t getNumExternalSymbols() const { return externalSymbols.size(); } getNumUndefinedSymbols()481 uint32_t getNumUndefinedSymbols() const { return undefinedSymbols.size(); } 482 483 private: 484 void emitBeginSourceStab(StringRef); 485 void emitEndSourceStab(); 486 void emitObjectFileStab(ObjFile *); 487 void emitEndFunStab(Defined *); 488 void emitStabs(); 489 490 protected: 491 SymtabSection(StringTableSection &); 492 493 StringTableSection &stringTableSection; 494 // STABS symbols are always local symbols, but we represent them with special 495 // entries because they may use fields like n_sect and n_desc differently. 496 std::vector<StabsEntry> stabs; 497 std::vector<SymtabEntry> localSymbols; 498 std::vector<SymtabEntry> externalSymbols; 499 std::vector<SymtabEntry> undefinedSymbols; 500 }; 501 502 template <class LP> SymtabSection *makeSymtabSection(StringTableSection &); 503 504 // The indirect symbol table is a list of 32-bit integers that serve as indices 505 // into the (actual) symbol table. The indirect symbol table is a 506 // concatenation of several sub-arrays of indices, each sub-array belonging to 507 // a separate section. The starting offset of each sub-array is stored in the 508 // reserved1 header field of the respective section. 509 // 510 // These sub-arrays provide symbol information for sections that store 511 // contiguous sequences of symbol references. These references can be pointers 512 // (e.g. those in the GOT and TLVP sections) or assembly sequences (e.g. 513 // function stubs). 514 class IndirectSymtabSection final : public LinkEditSection { 515 public: 516 IndirectSymtabSection(); 517 void finalizeContents() override; 518 uint32_t getNumSymbols() const; getRawSize()519 uint64_t getRawSize() const override { 520 return getNumSymbols() * sizeof(uint32_t); 521 } 522 bool isNeeded() const override; 523 void writeTo(uint8_t *buf) const override; 524 }; 525 526 // The code signature comes at the very end of the linked output file. 527 class CodeSignatureSection final : public LinkEditSection { 528 public: 529 // NOTE: These values are duplicated in llvm-objcopy's MachO/Object.h file 530 // and any changes here, should be repeated there. 531 static constexpr uint8_t blockSizeShift = 12; 532 static constexpr size_t blockSize = (1 << blockSizeShift); // 4 KiB 533 static constexpr size_t hashSize = 256 / 8; 534 static constexpr size_t blobHeadersSize = llvm::alignTo<8>( 535 sizeof(llvm::MachO::CS_SuperBlob) + sizeof(llvm::MachO::CS_BlobIndex)); 536 static constexpr uint32_t fixedHeadersSize = 537 blobHeadersSize + sizeof(llvm::MachO::CS_CodeDirectory); 538 539 uint32_t fileNamePad = 0; 540 uint32_t allHeadersSize = 0; 541 StringRef fileName; 542 543 CodeSignatureSection(); 544 uint64_t getRawSize() const override; isNeeded()545 bool isNeeded() const override { return true; } 546 void writeTo(uint8_t *buf) const override; 547 uint32_t getBlockCount() const; 548 void writeHashes(uint8_t *buf) const; 549 }; 550 551 class CStringSection : public SyntheticSection { 552 public: 553 CStringSection(const char *name); 554 void addInput(CStringInputSection *); getSize()555 uint64_t getSize() const override { return size; } 556 virtual void finalizeContents(); isNeeded()557 bool isNeeded() const override { return !inputs.empty(); } 558 void writeTo(uint8_t *buf) const override; 559 560 std::vector<CStringInputSection *> inputs; 561 562 private: 563 uint64_t size; 564 }; 565 566 class DeduplicatedCStringSection final : public CStringSection { 567 public: DeduplicatedCStringSection(const char * name)568 DeduplicatedCStringSection(const char *name) : CStringSection(name){}; getSize()569 uint64_t getSize() const override { return size; } 570 void finalizeContents() override; 571 void writeTo(uint8_t *buf) const override; 572 573 struct StringOffset { 574 uint8_t trailingZeros; 575 uint64_t outSecOff = UINT64_MAX; 576 StringOffsetStringOffset577 explicit StringOffset(uint8_t zeros) : trailingZeros(zeros) {} 578 }; 579 580 StringOffset getStringOffset(StringRef str) const; 581 582 private: 583 llvm::DenseMap<llvm::CachedHashStringRef, StringOffset> stringOffsetMap; 584 size_t size = 0; 585 }; 586 587 /* 588 * This section contains deduplicated literal values. The 16-byte values are 589 * laid out first, followed by the 8- and then the 4-byte ones. 590 */ 591 class WordLiteralSection final : public SyntheticSection { 592 public: 593 using UInt128 = std::pair<uint64_t, uint64_t>; 594 // I don't think the standard guarantees the size of a pair, so let's make 595 // sure it's exact -- that way we can construct it via `mmap`. 596 static_assert(sizeof(UInt128) == 16); 597 598 WordLiteralSection(); 599 void addInput(WordLiteralInputSection *); 600 void finalizeContents(); 601 void writeTo(uint8_t *buf) const override; 602 getSize()603 uint64_t getSize() const override { 604 return literal16Map.size() * 16 + literal8Map.size() * 8 + 605 literal4Map.size() * 4; 606 } 607 isNeeded()608 bool isNeeded() const override { 609 return !literal16Map.empty() || !literal4Map.empty() || 610 !literal8Map.empty(); 611 } 612 getLiteral16Offset(uintptr_t buf)613 uint64_t getLiteral16Offset(uintptr_t buf) const { 614 return literal16Map.at(*reinterpret_cast<const UInt128 *>(buf)) * 16; 615 } 616 getLiteral8Offset(uintptr_t buf)617 uint64_t getLiteral8Offset(uintptr_t buf) const { 618 return literal16Map.size() * 16 + 619 literal8Map.at(*reinterpret_cast<const uint64_t *>(buf)) * 8; 620 } 621 getLiteral4Offset(uintptr_t buf)622 uint64_t getLiteral4Offset(uintptr_t buf) const { 623 return literal16Map.size() * 16 + literal8Map.size() * 8 + 624 literal4Map.at(*reinterpret_cast<const uint32_t *>(buf)) * 4; 625 } 626 627 private: 628 std::vector<WordLiteralInputSection *> inputs; 629 630 template <class T> struct Hasher { operatorHasher631 llvm::hash_code operator()(T v) const { return llvm::hash_value(v); } 632 }; 633 // We're using unordered_map instead of DenseMap here because we need to 634 // support all possible integer values -- there are no suitable tombstone 635 // values for DenseMap. 636 std::unordered_map<UInt128, uint64_t, Hasher<UInt128>> literal16Map; 637 std::unordered_map<uint64_t, uint64_t> literal8Map; 638 std::unordered_map<uint32_t, uint64_t> literal4Map; 639 }; 640 641 class ObjCImageInfoSection final : public SyntheticSection { 642 public: 643 ObjCImageInfoSection(); isNeeded()644 bool isNeeded() const override { return !files.empty(); } getSize()645 uint64_t getSize() const override { return 8; } addFile(const InputFile * file)646 void addFile(const InputFile *file) { 647 assert(!file->objCImageInfo.empty()); 648 files.push_back(file); 649 } 650 void finalizeContents(); 651 void writeTo(uint8_t *buf) const override; 652 653 private: 654 struct ImageInfo { 655 uint8_t swiftVersion = 0; 656 bool hasCategoryClassProperties = false; 657 } info; 658 static ImageInfo parseImageInfo(const InputFile *); 659 std::vector<const InputFile *> files; // files with image info 660 }; 661 662 // This section stores 32-bit __TEXT segment offsets of initializer functions. 663 // 664 // The compiler stores pointers to initializers in __mod_init_func. These need 665 // to be fixed up at load time, which takes time and dirties memory. By 666 // synthesizing InitOffsetsSection from them, this data can live in the 667 // read-only __TEXT segment instead. This section is used by default when 668 // chained fixups are enabled. 669 // 670 // There is no similar counterpart to __mod_term_func, as that section is 671 // deprecated, and static destructors are instead handled by registering them 672 // via __cxa_atexit from an autogenerated initializer function (see D121736). 673 class InitOffsetsSection final : public SyntheticSection { 674 public: 675 InitOffsetsSection(); isNeeded()676 bool isNeeded() const override { return !sections.empty(); } 677 uint64_t getSize() const override; 678 void writeTo(uint8_t *buf) const override; 679 void setUp(); 680 addInput(ConcatInputSection * isec)681 void addInput(ConcatInputSection *isec) { sections.push_back(isec); } inputs()682 const std::vector<ConcatInputSection *> &inputs() const { return sections; } 683 684 private: 685 std::vector<ConcatInputSection *> sections; 686 }; 687 688 // This SyntheticSection is for the __objc_methlist section, which contains 689 // relative method lists if the -objc_relative_method_lists option is enabled. 690 class ObjCMethListSection final : public SyntheticSection { 691 public: 692 ObjCMethListSection(); 693 694 static bool isMethodList(const ConcatInputSection *isec); addInput(ConcatInputSection * isec)695 void addInput(ConcatInputSection *isec) { inputs.push_back(isec); } getInputs()696 std::vector<ConcatInputSection *> getInputs() { return inputs; } 697 698 void setUp(); 699 void finalize() override; isNeeded()700 bool isNeeded() const override { return !inputs.empty(); } getSize()701 uint64_t getSize() const override { return sectionSize; } 702 void writeTo(uint8_t *bufStart) const override; 703 704 private: 705 void readMethodListHeader(const uint8_t *buf, uint32_t &structSizeAndFlags, 706 uint32_t &structCount) const; 707 void writeMethodListHeader(uint8_t *buf, uint32_t structSizeAndFlags, 708 uint32_t structCount) const; 709 uint32_t computeRelativeMethodListSize(uint32_t absoluteMethodListSize) const; 710 void writeRelativeOffsetForIsec(const ConcatInputSection *isec, uint8_t *buf, 711 uint32_t &inSecOff, uint32_t &outSecOff, 712 bool useSelRef) const; 713 uint32_t writeRelativeMethodList(const ConcatInputSection *isec, 714 uint8_t *buf) const; 715 716 static constexpr uint32_t methodListHeaderSize = 717 /*structSizeAndFlags*/ sizeof(uint32_t) + 718 /*structCount*/ sizeof(uint32_t); 719 // Relative method lists are supported only for 3-pointer method lists 720 static constexpr uint32_t pointersPerStruct = 3; 721 // The runtime identifies relative method lists via this magic value 722 static constexpr uint32_t relMethodHeaderFlag = 0x80000000; 723 // In the method list header, the first 2 bytes are the size of struct 724 static constexpr uint32_t structSizeMask = 0x0000FFFF; 725 // In the method list header, the last 2 bytes are the flags for the struct 726 static constexpr uint32_t structFlagsMask = 0xFFFF0000; 727 // Relative method lists have 4 byte alignment as all data in the InputSection 728 // is 4 byte 729 static constexpr uint32_t relativeOffsetSize = sizeof(uint32_t); 730 731 // The output size of the __objc_methlist section, computed during finalize() 732 uint32_t sectionSize = 0; 733 std::vector<ConcatInputSection *> inputs; 734 }; 735 736 // Chained fixups are a replacement for classic dyld opcodes. In this format, 737 // most of the metadata necessary for binding symbols and rebasing addresses is 738 // stored directly in the memory location that will have the fixup applied. 739 // 740 // The fixups form singly linked lists; each one covering a single page in 741 // memory. The __LINKEDIT,__chainfixups section stores the page offset of the 742 // first fixup of each page; the rest can be found by walking the chain using 743 // the offset that is embedded in each entry. 744 // 745 // This setup allows pages to be relocated lazily at page-in time and without 746 // being dirtied. The kernel can discard and load them again as needed. This 747 // technique, called page-in linking, was introduced in macOS 13. 748 // 749 // The benefits of this format are: 750 // - smaller __LINKEDIT segment, as most of the fixup information is stored in 751 // the data segment 752 // - faster startup, since not all relocations need to be done upfront 753 // - slightly lower memory usage, as fewer pages are dirtied 754 // 755 // Userspace x86_64 and arm64 binaries have two types of fixup entries: 756 // - Rebase entries contain an absolute address, to which the object's load 757 // address will be added to get the final value. This is used for loading 758 // the address of a symbol defined in the same binary. 759 // - Binding entries are mostly used for symbols imported from other dylibs, 760 // but for weakly bound and interposable symbols as well. They are looked up 761 // by a (symbol name, library) pair stored in __chainfixups. This import 762 // entry also encodes whether the import is weak (i.e. if the symbol is 763 // missing, it should be set to null instead of producing a load error). 764 // The fixup encodes an ordinal associated with the import, and an optional 765 // addend. 766 // 767 // The entries are tightly packed 64-bit bitfields. One of the bits specifies 768 // which kind of fixup to interpret them as. 769 // 770 // LLD generates the fixup data in 5 stages: 771 // 1. While scanning relocations, we make a note of each location that needs 772 // a fixup by calling addRebase() or addBinding(). During this, we assign 773 // a unique ordinal for each (symbol name, library, addend) import tuple. 774 // 2. After addresses have been assigned to all sections, and thus the memory 775 // layout of the linked image is final; finalizeContents() is called. Here, 776 // the page offsets of the chain start entries are calculated. 777 // 3. ChainedFixupsSection::writeTo() writes the page start offsets and the 778 // imports table to the output file. 779 // 4. Each section's fixup entries are encoded and written to disk in 780 // ConcatInputSection::writeTo(), but without writing the offsets that form 781 // the chain. 782 // 5. Finally, each page's (which might correspond to multiple sections) 783 // fixups are linked together in Writer::buildFixupChains(). 784 class ChainedFixupsSection final : public LinkEditSection { 785 public: 786 ChainedFixupsSection(); 787 void finalizeContents() override; getRawSize()788 uint64_t getRawSize() const override { return size; } 789 bool isNeeded() const override; 790 void writeTo(uint8_t *buf) const override; 791 addRebase(const InputSection * isec,uint64_t offset)792 void addRebase(const InputSection *isec, uint64_t offset) { 793 locations.emplace_back(isec, offset); 794 } 795 void addBinding(const Symbol *dysym, const InputSection *isec, 796 uint64_t offset, int64_t addend = 0); 797 setHasNonWeakDefinition()798 void setHasNonWeakDefinition() { hasNonWeakDef = true; } 799 800 // Returns an (ordinal, inline addend) tuple used by dyld_chained_ptr_64_bind. 801 std::pair<uint32_t, uint8_t> getBinding(const Symbol *sym, 802 int64_t addend) const; 803 getLocations()804 const std::vector<Location> &getLocations() const { return locations; } 805 hasWeakBinding()806 bool hasWeakBinding() const { return hasWeakBind; } hasNonWeakDefinition()807 bool hasNonWeakDefinition() const { return hasNonWeakDef; } 808 809 private: 810 // Location::offset initially stores the offset within an InputSection, but 811 // contains output segment offsets after finalizeContents(). 812 std::vector<Location> locations; 813 // (target symbol, addend) => import ordinal 814 llvm::MapVector<std::pair<const Symbol *, int64_t>, uint32_t> bindings; 815 816 struct SegmentInfo { SegmentInfoSegmentInfo817 SegmentInfo(const OutputSegment *oseg) : oseg(oseg) {} 818 819 const OutputSegment *oseg; 820 // (page index, fixup starts offset) 821 llvm::SmallVector<std::pair<uint16_t, uint16_t>> pageStarts; 822 823 size_t getSize() const; 824 size_t writeTo(uint8_t *buf) const; 825 }; 826 llvm::SmallVector<SegmentInfo, 4> fixupSegments; 827 828 size_t symtabSize = 0; 829 size_t size = 0; 830 831 bool needsAddend = false; 832 bool needsLargeAddend = false; 833 bool hasWeakBind = false; 834 bool hasNonWeakDef = false; 835 llvm::MachO::ChainedImportFormat importFormat; 836 }; 837 838 void writeChainedRebase(uint8_t *buf, uint64_t targetVA); 839 void writeChainedFixup(uint8_t *buf, const Symbol *sym, int64_t addend); 840 841 struct InStruct { 842 const uint8_t *bufferStart = nullptr; 843 MachHeaderSection *header = nullptr; 844 CStringSection *cStringSection = nullptr; 845 DeduplicatedCStringSection *objcMethnameSection = nullptr; 846 WordLiteralSection *wordLiteralSection = nullptr; 847 RebaseSection *rebase = nullptr; 848 BindingSection *binding = nullptr; 849 WeakBindingSection *weakBinding = nullptr; 850 LazyBindingSection *lazyBinding = nullptr; 851 ExportSection *exports = nullptr; 852 GotSection *got = nullptr; 853 TlvPointerSection *tlvPointers = nullptr; 854 LazyPointerSection *lazyPointers = nullptr; 855 StubsSection *stubs = nullptr; 856 StubHelperSection *stubHelper = nullptr; 857 ObjCStubsSection *objcStubs = nullptr; 858 UnwindInfoSection *unwindInfo = nullptr; 859 ObjCImageInfoSection *objCImageInfo = nullptr; 860 ConcatInputSection *imageLoaderCache = nullptr; 861 InitOffsetsSection *initOffsets = nullptr; 862 ObjCMethListSection *objcMethList = nullptr; 863 ChainedFixupsSection *chainedFixups = nullptr; 864 }; 865 866 extern InStruct in; 867 extern std::vector<SyntheticSection *> syntheticSections; 868 869 void createSyntheticSymbols(); 870 871 } // namespace lld::macho 872 873 #endif 874