1 //===- SyntheticSections.cpp ---------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "SyntheticSections.h" 10 #include "Config.h" 11 #include "ExportTrie.h" 12 #include "InputFiles.h" 13 #include "MachOStructs.h" 14 #include "MergedOutputSection.h" 15 #include "OutputSegment.h" 16 #include "SymbolTable.h" 17 #include "Symbols.h" 18 #include "Writer.h" 19 20 #include "lld/Common/ErrorHandler.h" 21 #include "lld/Common/Memory.h" 22 #include "llvm/Support/EndianStream.h" 23 #include "llvm/Support/LEB128.h" 24 25 using namespace llvm; 26 using namespace llvm::support; 27 using namespace llvm::support::endian; 28 using namespace lld; 29 using namespace lld::macho; 30 31 InStruct macho::in; 32 std::vector<SyntheticSection *> macho::syntheticSections; 33 34 SyntheticSection::SyntheticSection(const char *segname, const char *name) 35 : OutputSection(SyntheticKind, name), segname(segname) { 36 syntheticSections.push_back(this); 37 } 38 39 // dyld3's MachOLoaded::getSlide() assumes that the __TEXT segment starts 40 // from the beginning of the file (i.e. the header). 41 MachHeaderSection::MachHeaderSection() 42 : SyntheticSection(segment_names::text, section_names::header) {} 43 44 void MachHeaderSection::addLoadCommand(LoadCommand *lc) { 45 loadCommands.push_back(lc); 46 sizeOfCmds += lc->getSize(); 47 } 48 49 uint64_t MachHeaderSection::getSize() const { 50 return sizeof(MachO::mach_header_64) + sizeOfCmds; 51 } 52 53 void MachHeaderSection::writeTo(uint8_t *buf) const { 54 auto *hdr = reinterpret_cast<MachO::mach_header_64 *>(buf); 55 hdr->magic = MachO::MH_MAGIC_64; 56 hdr->cputype = MachO::CPU_TYPE_X86_64; 57 hdr->cpusubtype = MachO::CPU_SUBTYPE_X86_64_ALL | MachO::CPU_SUBTYPE_LIB64; 58 hdr->filetype = config->outputType; 59 hdr->ncmds = loadCommands.size(); 60 hdr->sizeofcmds = sizeOfCmds; 61 hdr->flags = MachO::MH_NOUNDEFS | MachO::MH_DYLDLINK | MachO::MH_TWOLEVEL; 62 if (config->outputType == MachO::MH_DYLIB && !config->hasReexports) 63 hdr->flags |= MachO::MH_NO_REEXPORTED_DYLIBS; 64 65 uint8_t *p = reinterpret_cast<uint8_t *>(hdr + 1); 66 for (LoadCommand *lc : loadCommands) { 67 lc->writeTo(p); 68 p += lc->getSize(); 69 } 70 } 71 72 PageZeroSection::PageZeroSection() 73 : SyntheticSection(segment_names::pageZero, section_names::pageZero) {} 74 75 GotSection::GotSection() 76 : SyntheticSection(segment_names::dataConst, section_names::got) { 77 align = 8; 78 flags = MachO::S_NON_LAZY_SYMBOL_POINTERS; 79 80 // TODO: section_64::reserved1 should be an index into the indirect symbol 81 // table, which we do not currently emit 82 } 83 84 void GotSection::addEntry(Symbol &sym) { 85 if (entries.insert(&sym)) { 86 sym.gotIndex = entries.size() - 1; 87 } 88 } 89 90 void GotSection::writeTo(uint8_t *buf) const { 91 for (size_t i = 0, n = entries.size(); i < n; ++i) 92 if (auto *defined = dyn_cast<Defined>(entries[i])) 93 write64le(&buf[i * WordSize], defined->getVA()); 94 } 95 96 BindingSection::BindingSection() 97 : SyntheticSection(segment_names::linkEdit, section_names::binding) {} 98 99 bool BindingSection::isNeeded() const { 100 return bindings.size() != 0 || in.got->isNeeded(); 101 } 102 103 namespace { 104 struct Binding { 105 OutputSegment *segment = nullptr; 106 uint64_t offset = 0; 107 int64_t addend = 0; 108 uint8_t ordinal = 0; 109 }; 110 } // namespace 111 112 // Encode a sequence of opcodes that tell dyld to write the address of dysym + 113 // addend at osec->addr + outSecOff. 114 // 115 // The bind opcode "interpreter" remembers the values of each binding field, so 116 // we only need to encode the differences between bindings. Hence the use of 117 // lastBinding. 118 static void encodeBinding(const DylibSymbol &dysym, const OutputSection *osec, 119 uint64_t outSecOff, int64_t addend, 120 Binding &lastBinding, raw_svector_ostream &os) { 121 using namespace llvm::MachO; 122 OutputSegment *seg = osec->parent; 123 uint64_t offset = osec->getSegmentOffset() + outSecOff; 124 if (lastBinding.segment != seg) { 125 os << static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 126 seg->index); 127 encodeULEB128(offset, os); 128 lastBinding.segment = seg; 129 lastBinding.offset = offset; 130 } else if (lastBinding.offset != offset) { 131 assert(lastBinding.offset <= offset); 132 os << static_cast<uint8_t>(BIND_OPCODE_ADD_ADDR_ULEB); 133 encodeULEB128(offset - lastBinding.offset, os); 134 lastBinding.offset = offset; 135 } 136 137 if (lastBinding.ordinal != dysym.file->ordinal) { 138 if (dysym.file->ordinal <= BIND_IMMEDIATE_MASK) { 139 os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 140 dysym.file->ordinal); 141 } else { 142 error("TODO: Support larger dylib symbol ordinals"); 143 return; 144 } 145 lastBinding.ordinal = dysym.file->ordinal; 146 } 147 148 if (lastBinding.addend != addend) { 149 os << static_cast<uint8_t>(BIND_OPCODE_SET_ADDEND_SLEB); 150 encodeSLEB128(addend, os); 151 lastBinding.addend = addend; 152 } 153 154 os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM) 155 << dysym.getName() << '\0' 156 << static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER) 157 << static_cast<uint8_t>(BIND_OPCODE_DO_BIND); 158 // DO_BIND causes dyld to both perform the binding and increment the offset 159 lastBinding.offset += WordSize; 160 } 161 162 // Emit bind opcodes, which are a stream of byte-sized opcodes that dyld 163 // interprets to update a record with the following fields: 164 // * segment index (of the segment to write the symbol addresses to, typically 165 // the __DATA_CONST segment which contains the GOT) 166 // * offset within the segment, indicating the next location to write a binding 167 // * symbol type 168 // * symbol library ordinal (the index of its library's LC_LOAD_DYLIB command) 169 // * symbol name 170 // * addend 171 // When dyld sees BIND_OPCODE_DO_BIND, it uses the current record state to bind 172 // a symbol in the GOT, and increments the segment offset to point to the next 173 // entry. It does *not* clear the record state after doing the bind, so 174 // subsequent opcodes only need to encode the differences between bindings. 175 void BindingSection::finalizeContents() { 176 raw_svector_ostream os{contents}; 177 Binding lastBinding; 178 bool didEncode = false; 179 size_t gotIdx = 0; 180 for (const Symbol *sym : in.got->getEntries()) { 181 if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) { 182 didEncode = true; 183 encodeBinding(*dysym, in.got, gotIdx * WordSize, 0, lastBinding, os); 184 } 185 ++gotIdx; 186 } 187 188 // Sorting the relocations by segment and address allows us to encode them 189 // more compactly. 190 llvm::sort(bindings, [](const BindingEntry &a, const BindingEntry &b) { 191 OutputSegment *segA = a.isec->parent->parent; 192 OutputSegment *segB = b.isec->parent->parent; 193 if (segA != segB) 194 return segA->fileOff < segB->fileOff; 195 OutputSection *osecA = a.isec->parent; 196 OutputSection *osecB = b.isec->parent; 197 if (osecA != osecB) 198 return osecA->addr < osecB->addr; 199 if (a.isec != b.isec) 200 return a.isec->outSecOff < b.isec->outSecOff; 201 return a.offset < b.offset; 202 }); 203 for (const BindingEntry &b : bindings) { 204 didEncode = true; 205 encodeBinding(*b.dysym, b.isec->parent, b.isec->outSecOff + b.offset, 206 b.addend, lastBinding, os); 207 } 208 if (didEncode) 209 os << static_cast<uint8_t>(MachO::BIND_OPCODE_DONE); 210 } 211 212 void BindingSection::writeTo(uint8_t *buf) const { 213 memcpy(buf, contents.data(), contents.size()); 214 } 215 216 StubsSection::StubsSection() 217 : SyntheticSection(segment_names::text, "__stubs") {} 218 219 uint64_t StubsSection::getSize() const { 220 return entries.size() * target->stubSize; 221 } 222 223 void StubsSection::writeTo(uint8_t *buf) const { 224 size_t off = 0; 225 for (const DylibSymbol *sym : in.stubs->getEntries()) { 226 target->writeStub(buf + off, *sym); 227 off += target->stubSize; 228 } 229 } 230 231 void StubsSection::addEntry(DylibSymbol &sym) { 232 if (entries.insert(&sym)) 233 sym.stubsIndex = entries.size() - 1; 234 } 235 236 StubHelperSection::StubHelperSection() 237 : SyntheticSection(segment_names::text, "__stub_helper") {} 238 239 uint64_t StubHelperSection::getSize() const { 240 return target->stubHelperHeaderSize + 241 in.stubs->getEntries().size() * target->stubHelperEntrySize; 242 } 243 244 bool StubHelperSection::isNeeded() const { 245 return !in.stubs->getEntries().empty(); 246 } 247 248 void StubHelperSection::writeTo(uint8_t *buf) const { 249 target->writeStubHelperHeader(buf); 250 size_t off = target->stubHelperHeaderSize; 251 for (const DylibSymbol *sym : in.stubs->getEntries()) { 252 target->writeStubHelperEntry(buf + off, *sym, addr + off); 253 off += target->stubHelperEntrySize; 254 } 255 } 256 257 void StubHelperSection::setup() { 258 stubBinder = dyn_cast_or_null<DylibSymbol>(symtab->find("dyld_stub_binder")); 259 if (stubBinder == nullptr) { 260 error("symbol dyld_stub_binder not found (normally in libSystem.dylib). " 261 "Needed to perform lazy binding."); 262 return; 263 } 264 in.got->addEntry(*stubBinder); 265 266 inputSections.push_back(in.imageLoaderCache); 267 symtab->addDefined("__dyld_private", in.imageLoaderCache, 0); 268 } 269 270 ImageLoaderCacheSection::ImageLoaderCacheSection() { 271 segname = segment_names::data; 272 name = "__data"; 273 uint8_t *arr = bAlloc.Allocate<uint8_t>(WordSize); 274 memset(arr, 0, WordSize); 275 data = {arr, WordSize}; 276 } 277 278 LazyPointerSection::LazyPointerSection() 279 : SyntheticSection(segment_names::data, "__la_symbol_ptr") { 280 align = 8; 281 flags = MachO::S_LAZY_SYMBOL_POINTERS; 282 } 283 284 uint64_t LazyPointerSection::getSize() const { 285 return in.stubs->getEntries().size() * WordSize; 286 } 287 288 bool LazyPointerSection::isNeeded() const { 289 return !in.stubs->getEntries().empty(); 290 } 291 292 void LazyPointerSection::writeTo(uint8_t *buf) const { 293 size_t off = 0; 294 for (const DylibSymbol *sym : in.stubs->getEntries()) { 295 uint64_t stubHelperOffset = target->stubHelperHeaderSize + 296 sym->stubsIndex * target->stubHelperEntrySize; 297 write64le(buf + off, in.stubHelper->addr + stubHelperOffset); 298 off += WordSize; 299 } 300 } 301 302 LazyBindingSection::LazyBindingSection() 303 : SyntheticSection(segment_names::linkEdit, section_names::lazyBinding) {} 304 305 bool LazyBindingSection::isNeeded() const { return in.stubs->isNeeded(); } 306 307 void LazyBindingSection::finalizeContents() { 308 // TODO: Just precompute output size here instead of writing to a temporary 309 // buffer 310 for (DylibSymbol *sym : in.stubs->getEntries()) 311 sym->lazyBindOffset = encode(*sym); 312 } 313 314 void LazyBindingSection::writeTo(uint8_t *buf) const { 315 memcpy(buf, contents.data(), contents.size()); 316 } 317 318 // Unlike the non-lazy binding section, the bind opcodes in this section aren't 319 // interpreted all at once. Rather, dyld will start interpreting opcodes at a 320 // given offset, typically only binding a single symbol before it finds a 321 // BIND_OPCODE_DONE terminator. As such, unlike in the non-lazy-binding case, 322 // we cannot encode just the differences between symbols; we have to emit the 323 // complete bind information for each symbol. 324 uint32_t LazyBindingSection::encode(const DylibSymbol &sym) { 325 uint32_t opstreamOffset = contents.size(); 326 OutputSegment *dataSeg = in.lazyPointers->parent; 327 os << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 328 dataSeg->index); 329 uint64_t offset = in.lazyPointers->addr - dataSeg->firstSection()->addr + 330 sym.stubsIndex * WordSize; 331 encodeULEB128(offset, os); 332 if (sym.file->ordinal <= MachO::BIND_IMMEDIATE_MASK) 333 os << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 334 sym.file->ordinal); 335 else 336 fatal("TODO: Support larger dylib symbol ordinals"); 337 338 os << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM) 339 << sym.getName() << '\0' 340 << static_cast<uint8_t>(MachO::BIND_OPCODE_DO_BIND) 341 << static_cast<uint8_t>(MachO::BIND_OPCODE_DONE); 342 return opstreamOffset; 343 } 344 345 ExportSection::ExportSection() 346 : SyntheticSection(segment_names::linkEdit, section_names::export_) {} 347 348 void ExportSection::finalizeContents() { 349 // TODO: We should check symbol visibility. 350 for (const Symbol *sym : symtab->getSymbols()) 351 if (auto *defined = dyn_cast<Defined>(sym)) 352 trieBuilder.addSymbol(*defined); 353 size = trieBuilder.build(); 354 } 355 356 void ExportSection::writeTo(uint8_t *buf) const { trieBuilder.writeTo(buf); } 357 358 SymtabSection::SymtabSection(StringTableSection &stringTableSection) 359 : SyntheticSection(segment_names::linkEdit, section_names::symbolTable), 360 stringTableSection(stringTableSection) { 361 // TODO: When we introduce the SyntheticSections superclass, we should make 362 // all synthetic sections aligned to WordSize by default. 363 align = WordSize; 364 } 365 366 uint64_t SymtabSection::getSize() const { 367 return symbols.size() * sizeof(structs::nlist_64); 368 } 369 370 void SymtabSection::finalizeContents() { 371 // TODO support other symbol types 372 for (Symbol *sym : symtab->getSymbols()) 373 if (isa<Defined>(sym)) 374 symbols.push_back({sym, stringTableSection.addString(sym->getName())}); 375 } 376 377 void SymtabSection::writeTo(uint8_t *buf) const { 378 auto *nList = reinterpret_cast<structs::nlist_64 *>(buf); 379 for (const SymtabEntry &entry : symbols) { 380 nList->n_strx = entry.strx; 381 // TODO support other symbol types 382 // TODO populate n_desc 383 if (auto *defined = dyn_cast<Defined>(entry.sym)) { 384 nList->n_type = MachO::N_EXT | MachO::N_SECT; 385 nList->n_sect = defined->isec->parent->index; 386 // For the N_SECT symbol type, n_value is the address of the symbol 387 nList->n_value = defined->value + defined->isec->getVA(); 388 } 389 ++nList; 390 } 391 } 392 393 StringTableSection::StringTableSection() 394 : SyntheticSection(segment_names::linkEdit, section_names::stringTable) {} 395 396 uint32_t StringTableSection::addString(StringRef str) { 397 uint32_t strx = size; 398 strings.push_back(str); 399 size += str.size() + 1; // account for null terminator 400 return strx; 401 } 402 403 void StringTableSection::writeTo(uint8_t *buf) const { 404 uint32_t off = 0; 405 for (StringRef str : strings) { 406 memcpy(buf + off, str.data(), str.size()); 407 off += str.size() + 1; // account for null terminator 408 } 409 } 410