xref: /freebsd/contrib/llvm-project/lld/MachO/SyntheticSections.cpp (revision d5e3895ea4fe4ef9db8823774e07b4368180a23e)
1 //===- SyntheticSections.cpp ---------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SyntheticSections.h"
10 #include "Config.h"
11 #include "ExportTrie.h"
12 #include "InputFiles.h"
13 #include "MachOStructs.h"
14 #include "MergedOutputSection.h"
15 #include "OutputSegment.h"
16 #include "SymbolTable.h"
17 #include "Symbols.h"
18 #include "Writer.h"
19 
20 #include "lld/Common/ErrorHandler.h"
21 #include "lld/Common/Memory.h"
22 #include "llvm/Support/EndianStream.h"
23 #include "llvm/Support/LEB128.h"
24 
25 using namespace llvm;
26 using namespace llvm::support;
27 using namespace llvm::support::endian;
28 using namespace lld;
29 using namespace lld::macho;
30 
31 InStruct macho::in;
32 std::vector<SyntheticSection *> macho::syntheticSections;
33 
34 SyntheticSection::SyntheticSection(const char *segname, const char *name)
35     : OutputSection(SyntheticKind, name), segname(segname) {
36   syntheticSections.push_back(this);
37 }
38 
39 // dyld3's MachOLoaded::getSlide() assumes that the __TEXT segment starts
40 // from the beginning of the file (i.e. the header).
41 MachHeaderSection::MachHeaderSection()
42     : SyntheticSection(segment_names::text, section_names::header) {}
43 
44 void MachHeaderSection::addLoadCommand(LoadCommand *lc) {
45   loadCommands.push_back(lc);
46   sizeOfCmds += lc->getSize();
47 }
48 
49 uint64_t MachHeaderSection::getSize() const {
50   return sizeof(MachO::mach_header_64) + sizeOfCmds;
51 }
52 
53 void MachHeaderSection::writeTo(uint8_t *buf) const {
54   auto *hdr = reinterpret_cast<MachO::mach_header_64 *>(buf);
55   hdr->magic = MachO::MH_MAGIC_64;
56   hdr->cputype = MachO::CPU_TYPE_X86_64;
57   hdr->cpusubtype = MachO::CPU_SUBTYPE_X86_64_ALL | MachO::CPU_SUBTYPE_LIB64;
58   hdr->filetype = config->outputType;
59   hdr->ncmds = loadCommands.size();
60   hdr->sizeofcmds = sizeOfCmds;
61   hdr->flags = MachO::MH_NOUNDEFS | MachO::MH_DYLDLINK | MachO::MH_TWOLEVEL;
62   if (config->outputType == MachO::MH_DYLIB && !config->hasReexports)
63     hdr->flags |= MachO::MH_NO_REEXPORTED_DYLIBS;
64 
65   uint8_t *p = reinterpret_cast<uint8_t *>(hdr + 1);
66   for (LoadCommand *lc : loadCommands) {
67     lc->writeTo(p);
68     p += lc->getSize();
69   }
70 }
71 
72 PageZeroSection::PageZeroSection()
73     : SyntheticSection(segment_names::pageZero, section_names::pageZero) {}
74 
75 GotSection::GotSection()
76     : SyntheticSection(segment_names::dataConst, section_names::got) {
77   align = 8;
78   flags = MachO::S_NON_LAZY_SYMBOL_POINTERS;
79 
80   // TODO: section_64::reserved1 should be an index into the indirect symbol
81   // table, which we do not currently emit
82 }
83 
84 void GotSection::addEntry(Symbol &sym) {
85   if (entries.insert(&sym)) {
86     sym.gotIndex = entries.size() - 1;
87   }
88 }
89 
90 void GotSection::writeTo(uint8_t *buf) const {
91   for (size_t i = 0, n = entries.size(); i < n; ++i)
92     if (auto *defined = dyn_cast<Defined>(entries[i]))
93       write64le(&buf[i * WordSize], defined->getVA());
94 }
95 
96 BindingSection::BindingSection()
97     : SyntheticSection(segment_names::linkEdit, section_names::binding) {}
98 
99 bool BindingSection::isNeeded() const {
100   return bindings.size() != 0 || in.got->isNeeded();
101 }
102 
103 namespace {
104 struct Binding {
105   OutputSegment *segment = nullptr;
106   uint64_t offset = 0;
107   int64_t addend = 0;
108   uint8_t ordinal = 0;
109 };
110 } // namespace
111 
112 // Encode a sequence of opcodes that tell dyld to write the address of dysym +
113 // addend at osec->addr + outSecOff.
114 //
115 // The bind opcode "interpreter" remembers the values of each binding field, so
116 // we only need to encode the differences between bindings. Hence the use of
117 // lastBinding.
118 static void encodeBinding(const DylibSymbol &dysym, const OutputSection *osec,
119                           uint64_t outSecOff, int64_t addend,
120                           Binding &lastBinding, raw_svector_ostream &os) {
121   using namespace llvm::MachO;
122   OutputSegment *seg = osec->parent;
123   uint64_t offset = osec->getSegmentOffset() + outSecOff;
124   if (lastBinding.segment != seg) {
125     os << static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB |
126                                seg->index);
127     encodeULEB128(offset, os);
128     lastBinding.segment = seg;
129     lastBinding.offset = offset;
130   } else if (lastBinding.offset != offset) {
131     assert(lastBinding.offset <= offset);
132     os << static_cast<uint8_t>(BIND_OPCODE_ADD_ADDR_ULEB);
133     encodeULEB128(offset - lastBinding.offset, os);
134     lastBinding.offset = offset;
135   }
136 
137   if (lastBinding.ordinal != dysym.file->ordinal) {
138     if (dysym.file->ordinal <= BIND_IMMEDIATE_MASK) {
139       os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
140                                  dysym.file->ordinal);
141     } else {
142       error("TODO: Support larger dylib symbol ordinals");
143       return;
144     }
145     lastBinding.ordinal = dysym.file->ordinal;
146   }
147 
148   if (lastBinding.addend != addend) {
149     os << static_cast<uint8_t>(BIND_OPCODE_SET_ADDEND_SLEB);
150     encodeSLEB128(addend, os);
151     lastBinding.addend = addend;
152   }
153 
154   os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM)
155      << dysym.getName() << '\0'
156      << static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER)
157      << static_cast<uint8_t>(BIND_OPCODE_DO_BIND);
158   // DO_BIND causes dyld to both perform the binding and increment the offset
159   lastBinding.offset += WordSize;
160 }
161 
162 // Emit bind opcodes, which are a stream of byte-sized opcodes that dyld
163 // interprets to update a record with the following fields:
164 //  * segment index (of the segment to write the symbol addresses to, typically
165 //    the __DATA_CONST segment which contains the GOT)
166 //  * offset within the segment, indicating the next location to write a binding
167 //  * symbol type
168 //  * symbol library ordinal (the index of its library's LC_LOAD_DYLIB command)
169 //  * symbol name
170 //  * addend
171 // When dyld sees BIND_OPCODE_DO_BIND, it uses the current record state to bind
172 // a symbol in the GOT, and increments the segment offset to point to the next
173 // entry. It does *not* clear the record state after doing the bind, so
174 // subsequent opcodes only need to encode the differences between bindings.
175 void BindingSection::finalizeContents() {
176   raw_svector_ostream os{contents};
177   Binding lastBinding;
178   bool didEncode = false;
179   size_t gotIdx = 0;
180   for (const Symbol *sym : in.got->getEntries()) {
181     if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) {
182       didEncode = true;
183       encodeBinding(*dysym, in.got, gotIdx * WordSize, 0, lastBinding, os);
184     }
185     ++gotIdx;
186   }
187 
188   // Sorting the relocations by segment and address allows us to encode them
189   // more compactly.
190   llvm::sort(bindings, [](const BindingEntry &a, const BindingEntry &b) {
191     OutputSegment *segA = a.isec->parent->parent;
192     OutputSegment *segB = b.isec->parent->parent;
193     if (segA != segB)
194       return segA->fileOff < segB->fileOff;
195     OutputSection *osecA = a.isec->parent;
196     OutputSection *osecB = b.isec->parent;
197     if (osecA != osecB)
198       return osecA->addr < osecB->addr;
199     if (a.isec != b.isec)
200       return a.isec->outSecOff < b.isec->outSecOff;
201     return a.offset < b.offset;
202   });
203   for (const BindingEntry &b : bindings) {
204     didEncode = true;
205     encodeBinding(*b.dysym, b.isec->parent, b.isec->outSecOff + b.offset,
206                   b.addend, lastBinding, os);
207   }
208   if (didEncode)
209     os << static_cast<uint8_t>(MachO::BIND_OPCODE_DONE);
210 }
211 
212 void BindingSection::writeTo(uint8_t *buf) const {
213   memcpy(buf, contents.data(), contents.size());
214 }
215 
216 StubsSection::StubsSection()
217     : SyntheticSection(segment_names::text, "__stubs") {}
218 
219 uint64_t StubsSection::getSize() const {
220   return entries.size() * target->stubSize;
221 }
222 
223 void StubsSection::writeTo(uint8_t *buf) const {
224   size_t off = 0;
225   for (const DylibSymbol *sym : in.stubs->getEntries()) {
226     target->writeStub(buf + off, *sym);
227     off += target->stubSize;
228   }
229 }
230 
231 void StubsSection::addEntry(DylibSymbol &sym) {
232   if (entries.insert(&sym))
233     sym.stubsIndex = entries.size() - 1;
234 }
235 
236 StubHelperSection::StubHelperSection()
237     : SyntheticSection(segment_names::text, "__stub_helper") {}
238 
239 uint64_t StubHelperSection::getSize() const {
240   return target->stubHelperHeaderSize +
241          in.stubs->getEntries().size() * target->stubHelperEntrySize;
242 }
243 
244 bool StubHelperSection::isNeeded() const {
245   return !in.stubs->getEntries().empty();
246 }
247 
248 void StubHelperSection::writeTo(uint8_t *buf) const {
249   target->writeStubHelperHeader(buf);
250   size_t off = target->stubHelperHeaderSize;
251   for (const DylibSymbol *sym : in.stubs->getEntries()) {
252     target->writeStubHelperEntry(buf + off, *sym, addr + off);
253     off += target->stubHelperEntrySize;
254   }
255 }
256 
257 void StubHelperSection::setup() {
258   stubBinder = dyn_cast_or_null<DylibSymbol>(symtab->find("dyld_stub_binder"));
259   if (stubBinder == nullptr) {
260     error("symbol dyld_stub_binder not found (normally in libSystem.dylib). "
261           "Needed to perform lazy binding.");
262     return;
263   }
264   in.got->addEntry(*stubBinder);
265 
266   inputSections.push_back(in.imageLoaderCache);
267   symtab->addDefined("__dyld_private", in.imageLoaderCache, 0);
268 }
269 
270 ImageLoaderCacheSection::ImageLoaderCacheSection() {
271   segname = segment_names::data;
272   name = "__data";
273   uint8_t *arr = bAlloc.Allocate<uint8_t>(WordSize);
274   memset(arr, 0, WordSize);
275   data = {arr, WordSize};
276 }
277 
278 LazyPointerSection::LazyPointerSection()
279     : SyntheticSection(segment_names::data, "__la_symbol_ptr") {
280   align = 8;
281   flags = MachO::S_LAZY_SYMBOL_POINTERS;
282 }
283 
284 uint64_t LazyPointerSection::getSize() const {
285   return in.stubs->getEntries().size() * WordSize;
286 }
287 
288 bool LazyPointerSection::isNeeded() const {
289   return !in.stubs->getEntries().empty();
290 }
291 
292 void LazyPointerSection::writeTo(uint8_t *buf) const {
293   size_t off = 0;
294   for (const DylibSymbol *sym : in.stubs->getEntries()) {
295     uint64_t stubHelperOffset = target->stubHelperHeaderSize +
296                                 sym->stubsIndex * target->stubHelperEntrySize;
297     write64le(buf + off, in.stubHelper->addr + stubHelperOffset);
298     off += WordSize;
299   }
300 }
301 
302 LazyBindingSection::LazyBindingSection()
303     : SyntheticSection(segment_names::linkEdit, section_names::lazyBinding) {}
304 
305 bool LazyBindingSection::isNeeded() const { return in.stubs->isNeeded(); }
306 
307 void LazyBindingSection::finalizeContents() {
308   // TODO: Just precompute output size here instead of writing to a temporary
309   // buffer
310   for (DylibSymbol *sym : in.stubs->getEntries())
311     sym->lazyBindOffset = encode(*sym);
312 }
313 
314 void LazyBindingSection::writeTo(uint8_t *buf) const {
315   memcpy(buf, contents.data(), contents.size());
316 }
317 
318 // Unlike the non-lazy binding section, the bind opcodes in this section aren't
319 // interpreted all at once. Rather, dyld will start interpreting opcodes at a
320 // given offset, typically only binding a single symbol before it finds a
321 // BIND_OPCODE_DONE terminator. As such, unlike in the non-lazy-binding case,
322 // we cannot encode just the differences between symbols; we have to emit the
323 // complete bind information for each symbol.
324 uint32_t LazyBindingSection::encode(const DylibSymbol &sym) {
325   uint32_t opstreamOffset = contents.size();
326   OutputSegment *dataSeg = in.lazyPointers->parent;
327   os << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB |
328                              dataSeg->index);
329   uint64_t offset = in.lazyPointers->addr - dataSeg->firstSection()->addr +
330                     sym.stubsIndex * WordSize;
331   encodeULEB128(offset, os);
332   if (sym.file->ordinal <= MachO::BIND_IMMEDIATE_MASK)
333     os << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
334                                sym.file->ordinal);
335   else
336     fatal("TODO: Support larger dylib symbol ordinals");
337 
338   os << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM)
339      << sym.getName() << '\0'
340      << static_cast<uint8_t>(MachO::BIND_OPCODE_DO_BIND)
341      << static_cast<uint8_t>(MachO::BIND_OPCODE_DONE);
342   return opstreamOffset;
343 }
344 
345 ExportSection::ExportSection()
346     : SyntheticSection(segment_names::linkEdit, section_names::export_) {}
347 
348 void ExportSection::finalizeContents() {
349   // TODO: We should check symbol visibility.
350   for (const Symbol *sym : symtab->getSymbols())
351     if (auto *defined = dyn_cast<Defined>(sym))
352       trieBuilder.addSymbol(*defined);
353   size = trieBuilder.build();
354 }
355 
356 void ExportSection::writeTo(uint8_t *buf) const { trieBuilder.writeTo(buf); }
357 
358 SymtabSection::SymtabSection(StringTableSection &stringTableSection)
359     : SyntheticSection(segment_names::linkEdit, section_names::symbolTable),
360       stringTableSection(stringTableSection) {
361   // TODO: When we introduce the SyntheticSections superclass, we should make
362   // all synthetic sections aligned to WordSize by default.
363   align = WordSize;
364 }
365 
366 uint64_t SymtabSection::getSize() const {
367   return symbols.size() * sizeof(structs::nlist_64);
368 }
369 
370 void SymtabSection::finalizeContents() {
371   // TODO support other symbol types
372   for (Symbol *sym : symtab->getSymbols())
373     if (isa<Defined>(sym))
374       symbols.push_back({sym, stringTableSection.addString(sym->getName())});
375 }
376 
377 void SymtabSection::writeTo(uint8_t *buf) const {
378   auto *nList = reinterpret_cast<structs::nlist_64 *>(buf);
379   for (const SymtabEntry &entry : symbols) {
380     nList->n_strx = entry.strx;
381     // TODO support other symbol types
382     // TODO populate n_desc
383     if (auto *defined = dyn_cast<Defined>(entry.sym)) {
384       nList->n_type = MachO::N_EXT | MachO::N_SECT;
385       nList->n_sect = defined->isec->parent->index;
386       // For the N_SECT symbol type, n_value is the address of the symbol
387       nList->n_value = defined->value + defined->isec->getVA();
388     }
389     ++nList;
390   }
391 }
392 
393 StringTableSection::StringTableSection()
394     : SyntheticSection(segment_names::linkEdit, section_names::stringTable) {}
395 
396 uint32_t StringTableSection::addString(StringRef str) {
397   uint32_t strx = size;
398   strings.push_back(str);
399   size += str.size() + 1; // account for null terminator
400   return strx;
401 }
402 
403 void StringTableSection::writeTo(uint8_t *buf) const {
404   uint32_t off = 0;
405   for (StringRef str : strings) {
406     memcpy(buf + off, str.data(), str.size());
407     off += str.size() + 1; // account for null terminator
408   }
409 }
410