xref: /freebsd/contrib/llvm-project/lld/MachO/SyntheticSections.cpp (revision 24ccef81405eb25efc65f16b6e9a787f3a51151a)
1 //===- SyntheticSections.cpp ---------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SyntheticSections.h"
10 #include "Config.h"
11 #include "ExportTrie.h"
12 #include "InputFiles.h"
13 #include "MachOStructs.h"
14 #include "MergedOutputSection.h"
15 #include "OutputSegment.h"
16 #include "SymbolTable.h"
17 #include "Symbols.h"
18 #include "Writer.h"
19 
20 #include "lld/Common/ErrorHandler.h"
21 #include "lld/Common/Memory.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/Support/EndianStream.h"
24 #include "llvm/Support/FileSystem.h"
25 #include "llvm/Support/LEB128.h"
26 #include "llvm/Support/Path.h"
27 
28 using namespace llvm;
29 using namespace llvm::support;
30 using namespace llvm::support::endian;
31 using namespace lld;
32 using namespace lld::macho;
33 
34 InStruct macho::in;
35 std::vector<SyntheticSection *> macho::syntheticSections;
36 
37 SyntheticSection::SyntheticSection(const char *segname, const char *name)
38     : OutputSection(SyntheticKind, name), segname(segname) {
39   syntheticSections.push_back(this);
40 }
41 
42 // dyld3's MachOLoaded::getSlide() assumes that the __TEXT segment starts
43 // from the beginning of the file (i.e. the header).
44 MachHeaderSection::MachHeaderSection()
45     : SyntheticSection(segment_names::text, section_names::header) {}
46 
47 void MachHeaderSection::addLoadCommand(LoadCommand *lc) {
48   loadCommands.push_back(lc);
49   sizeOfCmds += lc->getSize();
50 }
51 
52 uint64_t MachHeaderSection::getSize() const {
53   return sizeof(MachO::mach_header_64) + sizeOfCmds + config->headerPad;
54 }
55 
56 void MachHeaderSection::writeTo(uint8_t *buf) const {
57   auto *hdr = reinterpret_cast<MachO::mach_header_64 *>(buf);
58   hdr->magic = MachO::MH_MAGIC_64;
59   hdr->cputype = MachO::CPU_TYPE_X86_64;
60   hdr->cpusubtype = MachO::CPU_SUBTYPE_X86_64_ALL | MachO::CPU_SUBTYPE_LIB64;
61   hdr->filetype = config->outputType;
62   hdr->ncmds = loadCommands.size();
63   hdr->sizeofcmds = sizeOfCmds;
64   hdr->flags = MachO::MH_NOUNDEFS | MachO::MH_DYLDLINK | MachO::MH_TWOLEVEL;
65 
66   if (config->outputType == MachO::MH_DYLIB && !config->hasReexports)
67     hdr->flags |= MachO::MH_NO_REEXPORTED_DYLIBS;
68 
69   if (config->outputType == MachO::MH_EXECUTE && config->isPic)
70     hdr->flags |= MachO::MH_PIE;
71 
72   if (in.exports->hasWeakSymbol || in.weakBinding->hasNonWeakDefinition())
73     hdr->flags |= MachO::MH_WEAK_DEFINES;
74 
75   if (in.exports->hasWeakSymbol || in.weakBinding->hasEntry())
76     hdr->flags |= MachO::MH_BINDS_TO_WEAK;
77 
78   for (OutputSegment *seg : outputSegments) {
79     for (OutputSection *osec : seg->getSections()) {
80       if (isThreadLocalVariables(osec->flags)) {
81         hdr->flags |= MachO::MH_HAS_TLV_DESCRIPTORS;
82         break;
83       }
84     }
85   }
86 
87   uint8_t *p = reinterpret_cast<uint8_t *>(hdr + 1);
88   for (LoadCommand *lc : loadCommands) {
89     lc->writeTo(p);
90     p += lc->getSize();
91   }
92 }
93 
94 PageZeroSection::PageZeroSection()
95     : SyntheticSection(segment_names::pageZero, section_names::pageZero) {}
96 
97 uint64_t Location::getVA() const {
98   if (const auto *isec = section.dyn_cast<const InputSection *>())
99     return isec->getVA() + offset;
100   return section.get<const OutputSection *>()->addr + offset;
101 }
102 
103 RebaseSection::RebaseSection()
104     : LinkEditSection(segment_names::linkEdit, section_names::rebase) {}
105 
106 namespace {
107 struct Rebase {
108   OutputSegment *segment = nullptr;
109   uint64_t offset = 0;
110   uint64_t consecutiveCount = 0;
111 };
112 } // namespace
113 
114 // Rebase opcodes allow us to describe a contiguous sequence of rebase location
115 // using a single DO_REBASE opcode. To take advantage of it, we delay emitting
116 // `DO_REBASE` until we have reached the end of a contiguous sequence.
117 static void encodeDoRebase(Rebase &rebase, raw_svector_ostream &os) {
118   using namespace llvm::MachO;
119   assert(rebase.consecutiveCount != 0);
120   if (rebase.consecutiveCount <= REBASE_IMMEDIATE_MASK) {
121     os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_IMM_TIMES |
122                                rebase.consecutiveCount);
123   } else {
124     os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_ULEB_TIMES);
125     encodeULEB128(rebase.consecutiveCount, os);
126   }
127   rebase.consecutiveCount = 0;
128 }
129 
130 static void encodeRebase(const OutputSection *osec, uint64_t outSecOff,
131                          Rebase &lastRebase, raw_svector_ostream &os) {
132   using namespace llvm::MachO;
133   OutputSegment *seg = osec->parent;
134   uint64_t offset = osec->getSegmentOffset() + outSecOff;
135   if (lastRebase.segment != seg || lastRebase.offset != offset) {
136     if (lastRebase.consecutiveCount != 0)
137       encodeDoRebase(lastRebase, os);
138 
139     if (lastRebase.segment != seg) {
140       os << static_cast<uint8_t>(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB |
141                                  seg->index);
142       encodeULEB128(offset, os);
143       lastRebase.segment = seg;
144       lastRebase.offset = offset;
145     } else {
146       assert(lastRebase.offset != offset);
147       os << static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_ULEB);
148       encodeULEB128(offset - lastRebase.offset, os);
149       lastRebase.offset = offset;
150     }
151   }
152   ++lastRebase.consecutiveCount;
153   // DO_REBASE causes dyld to both perform the binding and increment the offset
154   lastRebase.offset += WordSize;
155 }
156 
157 void RebaseSection::finalizeContents() {
158   using namespace llvm::MachO;
159   if (locations.empty())
160     return;
161 
162   raw_svector_ostream os{contents};
163   Rebase lastRebase;
164 
165   os << static_cast<uint8_t>(REBASE_OPCODE_SET_TYPE_IMM | REBASE_TYPE_POINTER);
166 
167   llvm::sort(locations, [](const Location &a, const Location &b) {
168     return a.getVA() < b.getVA();
169   });
170   for (const Location &loc : locations) {
171     if (const auto *isec = loc.section.dyn_cast<const InputSection *>()) {
172       encodeRebase(isec->parent, isec->outSecOff + loc.offset, lastRebase, os);
173     } else {
174       const auto *osec = loc.section.get<const OutputSection *>();
175       encodeRebase(osec, loc.offset, lastRebase, os);
176     }
177   }
178   if (lastRebase.consecutiveCount != 0)
179     encodeDoRebase(lastRebase, os);
180 
181   os << static_cast<uint8_t>(REBASE_OPCODE_DONE);
182 }
183 
184 void RebaseSection::writeTo(uint8_t *buf) const {
185   memcpy(buf, contents.data(), contents.size());
186 }
187 
188 NonLazyPointerSectionBase::NonLazyPointerSectionBase(const char *segname,
189                                                      const char *name)
190     : SyntheticSection(segname, name) {
191   align = 8;
192   flags = MachO::S_NON_LAZY_SYMBOL_POINTERS;
193 }
194 
195 void NonLazyPointerSectionBase::addEntry(Symbol *sym) {
196   if (entries.insert(sym)) {
197     assert(!sym->isInGot());
198     sym->gotIndex = entries.size() - 1;
199 
200     addNonLazyBindingEntries(sym, this, sym->gotIndex * WordSize);
201   }
202 }
203 
204 void NonLazyPointerSectionBase::writeTo(uint8_t *buf) const {
205   for (size_t i = 0, n = entries.size(); i < n; ++i)
206     if (auto *defined = dyn_cast<Defined>(entries[i]))
207       write64le(&buf[i * WordSize], defined->getVA());
208 }
209 
210 BindingSection::BindingSection()
211     : LinkEditSection(segment_names::linkEdit, section_names::binding) {}
212 
213 namespace {
214 struct Binding {
215   OutputSegment *segment = nullptr;
216   uint64_t offset = 0;
217   int64_t addend = 0;
218   uint8_t ordinal = 0;
219 };
220 } // namespace
221 
222 // Encode a sequence of opcodes that tell dyld to write the address of symbol +
223 // addend at osec->addr + outSecOff.
224 //
225 // The bind opcode "interpreter" remembers the values of each binding field, so
226 // we only need to encode the differences between bindings. Hence the use of
227 // lastBinding.
228 static void encodeBinding(const Symbol *sym, const OutputSection *osec,
229                           uint64_t outSecOff, int64_t addend,
230                           bool isWeakBinding, Binding &lastBinding,
231                           raw_svector_ostream &os) {
232   using namespace llvm::MachO;
233   OutputSegment *seg = osec->parent;
234   uint64_t offset = osec->getSegmentOffset() + outSecOff;
235   if (lastBinding.segment != seg) {
236     os << static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB |
237                                seg->index);
238     encodeULEB128(offset, os);
239     lastBinding.segment = seg;
240     lastBinding.offset = offset;
241   } else if (lastBinding.offset != offset) {
242     os << static_cast<uint8_t>(BIND_OPCODE_ADD_ADDR_ULEB);
243     encodeULEB128(offset - lastBinding.offset, os);
244     lastBinding.offset = offset;
245   }
246 
247   if (lastBinding.addend != addend) {
248     os << static_cast<uint8_t>(BIND_OPCODE_SET_ADDEND_SLEB);
249     encodeSLEB128(addend, os);
250     lastBinding.addend = addend;
251   }
252 
253   uint8_t flags = BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM;
254   if (!isWeakBinding && sym->isWeakRef())
255     flags |= BIND_SYMBOL_FLAGS_WEAK_IMPORT;
256 
257   os << flags << sym->getName() << '\0'
258      << static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER)
259      << static_cast<uint8_t>(BIND_OPCODE_DO_BIND);
260   // DO_BIND causes dyld to both perform the binding and increment the offset
261   lastBinding.offset += WordSize;
262 }
263 
264 // Non-weak bindings need to have their dylib ordinal encoded as well.
265 static void encodeDylibOrdinal(const DylibSymbol *dysym, Binding &lastBinding,
266                                raw_svector_ostream &os) {
267   using namespace llvm::MachO;
268   if (lastBinding.ordinal != dysym->file->ordinal) {
269     if (dysym->file->ordinal <= BIND_IMMEDIATE_MASK) {
270       os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
271                                  dysym->file->ordinal);
272     } else {
273       os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
274       encodeULEB128(dysym->file->ordinal, os);
275     }
276     lastBinding.ordinal = dysym->file->ordinal;
277   }
278 }
279 
280 static void encodeWeakOverride(const Defined *defined,
281                                raw_svector_ostream &os) {
282   using namespace llvm::MachO;
283   os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM |
284                              BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION)
285      << defined->getName() << '\0';
286 }
287 
288 // Emit bind opcodes, which are a stream of byte-sized opcodes that dyld
289 // interprets to update a record with the following fields:
290 //  * segment index (of the segment to write the symbol addresses to, typically
291 //    the __DATA_CONST segment which contains the GOT)
292 //  * offset within the segment, indicating the next location to write a binding
293 //  * symbol type
294 //  * symbol library ordinal (the index of its library's LC_LOAD_DYLIB command)
295 //  * symbol name
296 //  * addend
297 // When dyld sees BIND_OPCODE_DO_BIND, it uses the current record state to bind
298 // a symbol in the GOT, and increments the segment offset to point to the next
299 // entry. It does *not* clear the record state after doing the bind, so
300 // subsequent opcodes only need to encode the differences between bindings.
301 void BindingSection::finalizeContents() {
302   raw_svector_ostream os{contents};
303   Binding lastBinding;
304 
305   // Since bindings are delta-encoded, sorting them allows for a more compact
306   // result. Note that sorting by address alone ensures that bindings for the
307   // same segment / section are located together.
308   llvm::sort(bindings, [](const BindingEntry &a, const BindingEntry &b) {
309     return a.target.getVA() < b.target.getVA();
310   });
311   for (const BindingEntry &b : bindings) {
312     encodeDylibOrdinal(b.dysym, lastBinding, os);
313     if (auto *isec = b.target.section.dyn_cast<const InputSection *>()) {
314       encodeBinding(b.dysym, isec->parent, isec->outSecOff + b.target.offset,
315                     b.addend, /*isWeakBinding=*/false, lastBinding, os);
316     } else {
317       auto *osec = b.target.section.get<const OutputSection *>();
318       encodeBinding(b.dysym, osec, b.target.offset, b.addend,
319                     /*isWeakBinding=*/false, lastBinding, os);
320     }
321   }
322   if (!bindings.empty())
323     os << static_cast<uint8_t>(MachO::BIND_OPCODE_DONE);
324 }
325 
326 void BindingSection::writeTo(uint8_t *buf) const {
327   memcpy(buf, contents.data(), contents.size());
328 }
329 
330 WeakBindingSection::WeakBindingSection()
331     : LinkEditSection(segment_names::linkEdit, section_names::weakBinding) {}
332 
333 void WeakBindingSection::finalizeContents() {
334   raw_svector_ostream os{contents};
335   Binding lastBinding;
336 
337   for (const Defined *defined : definitions)
338     encodeWeakOverride(defined, os);
339 
340   // Since bindings are delta-encoded, sorting them allows for a more compact
341   // result.
342   llvm::sort(bindings,
343              [](const WeakBindingEntry &a, const WeakBindingEntry &b) {
344                return a.target.getVA() < b.target.getVA();
345              });
346   for (const WeakBindingEntry &b : bindings) {
347     if (auto *isec = b.target.section.dyn_cast<const InputSection *>()) {
348       encodeBinding(b.symbol, isec->parent, isec->outSecOff + b.target.offset,
349                     b.addend, /*isWeakBinding=*/true, lastBinding, os);
350     } else {
351       auto *osec = b.target.section.get<const OutputSection *>();
352       encodeBinding(b.symbol, osec, b.target.offset, b.addend,
353                     /*isWeakBinding=*/true, lastBinding, os);
354     }
355   }
356   if (!bindings.empty() || !definitions.empty())
357     os << static_cast<uint8_t>(MachO::BIND_OPCODE_DONE);
358 }
359 
360 void WeakBindingSection::writeTo(uint8_t *buf) const {
361   memcpy(buf, contents.data(), contents.size());
362 }
363 
364 bool macho::needsBinding(const Symbol *sym) {
365   if (isa<DylibSymbol>(sym))
366     return true;
367   if (const auto *defined = dyn_cast<Defined>(sym))
368     return defined->isExternalWeakDef();
369   return false;
370 }
371 
372 void macho::addNonLazyBindingEntries(const Symbol *sym,
373                                      SectionPointerUnion section,
374                                      uint64_t offset, int64_t addend) {
375   if (auto *dysym = dyn_cast<DylibSymbol>(sym)) {
376     in.binding->addEntry(dysym, section, offset, addend);
377     if (dysym->isWeakDef())
378       in.weakBinding->addEntry(sym, section, offset, addend);
379   } else if (auto *defined = dyn_cast<Defined>(sym)) {
380     in.rebase->addEntry(section, offset);
381     if (defined->isExternalWeakDef())
382       in.weakBinding->addEntry(sym, section, offset, addend);
383   } else if (isa<DSOHandle>(sym)) {
384     error("cannot bind to " + DSOHandle::name);
385   } else {
386     // Undefined symbols are filtered out in scanRelocations(); we should never
387     // get here
388     llvm_unreachable("cannot bind to an undefined symbol");
389   }
390 }
391 
392 StubsSection::StubsSection()
393     : SyntheticSection(segment_names::text, "__stubs") {
394   flags = MachO::S_SYMBOL_STUBS;
395   reserved2 = target->stubSize;
396 }
397 
398 uint64_t StubsSection::getSize() const {
399   return entries.size() * target->stubSize;
400 }
401 
402 void StubsSection::writeTo(uint8_t *buf) const {
403   size_t off = 0;
404   for (const Symbol *sym : entries) {
405     target->writeStub(buf + off, *sym);
406     off += target->stubSize;
407   }
408 }
409 
410 bool StubsSection::addEntry(Symbol *sym) {
411   bool inserted = entries.insert(sym);
412   if (inserted)
413     sym->stubsIndex = entries.size() - 1;
414   return inserted;
415 }
416 
417 StubHelperSection::StubHelperSection()
418     : SyntheticSection(segment_names::text, "__stub_helper") {}
419 
420 uint64_t StubHelperSection::getSize() const {
421   return target->stubHelperHeaderSize +
422          in.lazyBinding->getEntries().size() * target->stubHelperEntrySize;
423 }
424 
425 bool StubHelperSection::isNeeded() const { return in.lazyBinding->isNeeded(); }
426 
427 void StubHelperSection::writeTo(uint8_t *buf) const {
428   target->writeStubHelperHeader(buf);
429   size_t off = target->stubHelperHeaderSize;
430   for (const DylibSymbol *sym : in.lazyBinding->getEntries()) {
431     target->writeStubHelperEntry(buf + off, *sym, addr + off);
432     off += target->stubHelperEntrySize;
433   }
434 }
435 
436 void StubHelperSection::setup() {
437   stubBinder = dyn_cast_or_null<DylibSymbol>(symtab->find("dyld_stub_binder"));
438   if (stubBinder == nullptr) {
439     error("symbol dyld_stub_binder not found (normally in libSystem.dylib). "
440           "Needed to perform lazy binding.");
441     return;
442   }
443   stubBinder->refState = RefState::Strong;
444   in.got->addEntry(stubBinder);
445 
446   inputSections.push_back(in.imageLoaderCache);
447   dyldPrivate =
448       make<Defined>("__dyld_private", in.imageLoaderCache, 0,
449                     /*isWeakDef=*/false,
450                     /*isExternal=*/false, /*isPrivateExtern=*/false);
451 }
452 
453 ImageLoaderCacheSection::ImageLoaderCacheSection() {
454   segname = segment_names::data;
455   name = "__data";
456   uint8_t *arr = bAlloc.Allocate<uint8_t>(WordSize);
457   memset(arr, 0, WordSize);
458   data = {arr, WordSize};
459 }
460 
461 LazyPointerSection::LazyPointerSection()
462     : SyntheticSection(segment_names::data, "__la_symbol_ptr") {
463   align = 8;
464   flags = MachO::S_LAZY_SYMBOL_POINTERS;
465 }
466 
467 uint64_t LazyPointerSection::getSize() const {
468   return in.stubs->getEntries().size() * WordSize;
469 }
470 
471 bool LazyPointerSection::isNeeded() const {
472   return !in.stubs->getEntries().empty();
473 }
474 
475 void LazyPointerSection::writeTo(uint8_t *buf) const {
476   size_t off = 0;
477   for (const Symbol *sym : in.stubs->getEntries()) {
478     if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) {
479       if (dysym->hasStubsHelper()) {
480         uint64_t stubHelperOffset =
481             target->stubHelperHeaderSize +
482             dysym->stubsHelperIndex * target->stubHelperEntrySize;
483         write64le(buf + off, in.stubHelper->addr + stubHelperOffset);
484       }
485     } else {
486       write64le(buf + off, sym->getVA());
487     }
488     off += WordSize;
489   }
490 }
491 
492 LazyBindingSection::LazyBindingSection()
493     : LinkEditSection(segment_names::linkEdit, section_names::lazyBinding) {}
494 
495 void LazyBindingSection::finalizeContents() {
496   // TODO: Just precompute output size here instead of writing to a temporary
497   // buffer
498   for (DylibSymbol *sym : entries)
499     sym->lazyBindOffset = encode(*sym);
500 }
501 
502 void LazyBindingSection::writeTo(uint8_t *buf) const {
503   memcpy(buf, contents.data(), contents.size());
504 }
505 
506 void LazyBindingSection::addEntry(DylibSymbol *dysym) {
507   if (entries.insert(dysym)) {
508     dysym->stubsHelperIndex = entries.size() - 1;
509     in.rebase->addEntry(in.lazyPointers, dysym->stubsIndex * WordSize);
510   }
511 }
512 
513 // Unlike the non-lazy binding section, the bind opcodes in this section aren't
514 // interpreted all at once. Rather, dyld will start interpreting opcodes at a
515 // given offset, typically only binding a single symbol before it finds a
516 // BIND_OPCODE_DONE terminator. As such, unlike in the non-lazy-binding case,
517 // we cannot encode just the differences between symbols; we have to emit the
518 // complete bind information for each symbol.
519 uint32_t LazyBindingSection::encode(const DylibSymbol &sym) {
520   uint32_t opstreamOffset = contents.size();
521   OutputSegment *dataSeg = in.lazyPointers->parent;
522   os << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB |
523                              dataSeg->index);
524   uint64_t offset = in.lazyPointers->addr - dataSeg->firstSection()->addr +
525                     sym.stubsIndex * WordSize;
526   encodeULEB128(offset, os);
527   if (sym.file->ordinal <= MachO::BIND_IMMEDIATE_MASK) {
528     os << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
529                                sym.file->ordinal);
530   } else {
531     os << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
532     encodeULEB128(sym.file->ordinal, os);
533   }
534 
535   uint8_t flags = MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM;
536   if (sym.isWeakRef())
537     flags |= MachO::BIND_SYMBOL_FLAGS_WEAK_IMPORT;
538 
539   os << flags << sym.getName() << '\0'
540      << static_cast<uint8_t>(MachO::BIND_OPCODE_DO_BIND)
541      << static_cast<uint8_t>(MachO::BIND_OPCODE_DONE);
542   return opstreamOffset;
543 }
544 
545 void macho::prepareBranchTarget(Symbol *sym) {
546   if (auto *dysym = dyn_cast<DylibSymbol>(sym)) {
547     if (in.stubs->addEntry(dysym)) {
548       if (sym->isWeakDef()) {
549         in.binding->addEntry(dysym, in.lazyPointers,
550                              sym->stubsIndex * WordSize);
551         in.weakBinding->addEntry(sym, in.lazyPointers,
552                                  sym->stubsIndex * WordSize);
553       } else {
554         in.lazyBinding->addEntry(dysym);
555       }
556     }
557   } else if (auto *defined = dyn_cast<Defined>(sym)) {
558     if (defined->isExternalWeakDef()) {
559       if (in.stubs->addEntry(sym)) {
560         in.rebase->addEntry(in.lazyPointers, sym->stubsIndex * WordSize);
561         in.weakBinding->addEntry(sym, in.lazyPointers,
562                                  sym->stubsIndex * WordSize);
563       }
564     }
565   }
566 }
567 
568 ExportSection::ExportSection()
569     : LinkEditSection(segment_names::linkEdit, section_names::export_) {}
570 
571 void ExportSection::finalizeContents() {
572   trieBuilder.setImageBase(in.header->addr);
573   for (const Symbol *sym : symtab->getSymbols()) {
574     if (const auto *defined = dyn_cast<Defined>(sym)) {
575       if (defined->privateExtern)
576         continue;
577       trieBuilder.addSymbol(*defined);
578       hasWeakSymbol = hasWeakSymbol || sym->isWeakDef();
579     }
580   }
581   size = trieBuilder.build();
582 }
583 
584 void ExportSection::writeTo(uint8_t *buf) const { trieBuilder.writeTo(buf); }
585 
586 SymtabSection::SymtabSection(StringTableSection &stringTableSection)
587     : LinkEditSection(segment_names::linkEdit, section_names::symbolTable),
588       stringTableSection(stringTableSection) {}
589 
590 uint64_t SymtabSection::getRawSize() const {
591   return getNumSymbols() * sizeof(structs::nlist_64);
592 }
593 
594 void SymtabSection::emitBeginSourceStab(DWARFUnit *compileUnit) {
595   StabsEntry stab(MachO::N_SO);
596   SmallString<261> dir(compileUnit->getCompilationDir());
597   StringRef sep = sys::path::get_separator();
598   // We don't use `path::append` here because we want an empty `dir` to result
599   // in an absolute path. `append` would give us a relative path for that case.
600   if (!dir.endswith(sep))
601     dir += sep;
602   stab.strx = stringTableSection.addString(
603       saver.save(dir + compileUnit->getUnitDIE().getShortName()));
604   stabs.emplace_back(std::move(stab));
605 }
606 
607 void SymtabSection::emitEndSourceStab() {
608   StabsEntry stab(MachO::N_SO);
609   stab.sect = 1;
610   stabs.emplace_back(std::move(stab));
611 }
612 
613 void SymtabSection::emitObjectFileStab(ObjFile *file) {
614   StabsEntry stab(MachO::N_OSO);
615   stab.sect = target->cpuSubtype;
616   SmallString<261> path(!file->archiveName.empty() ? file->archiveName
617                                                    : file->getName());
618   std::error_code ec = sys::fs::make_absolute(path);
619   if (ec)
620     fatal("failed to get absolute path for " + path);
621 
622   if (!file->archiveName.empty())
623     path.append({"(", file->getName(), ")"});
624 
625   stab.strx = stringTableSection.addString(saver.save(path.str()));
626   stab.desc = 1;
627   stab.value = file->modTime;
628   stabs.emplace_back(std::move(stab));
629 }
630 
631 void SymtabSection::emitEndFunStab(Defined *defined) {
632   StabsEntry stab(MachO::N_FUN);
633   // FIXME this should be the size of the symbol. Using the section size in
634   // lieu is only correct if .subsections_via_symbols is set.
635   stab.value = defined->isec->getSize();
636   stabs.emplace_back(std::move(stab));
637 }
638 
639 void SymtabSection::emitStabs() {
640   std::vector<Defined *> symbolsNeedingStabs;
641   for (const SymtabEntry &entry :
642        concat<SymtabEntry>(localSymbols, externalSymbols)) {
643     Symbol *sym = entry.sym;
644     if (auto *defined = dyn_cast<Defined>(sym)) {
645       if (defined->isAbsolute())
646         continue;
647       InputSection *isec = defined->isec;
648       ObjFile *file = dyn_cast_or_null<ObjFile>(isec->file);
649       if (!file || !file->compileUnit)
650         continue;
651       symbolsNeedingStabs.push_back(defined);
652     }
653   }
654 
655   llvm::stable_sort(symbolsNeedingStabs, [&](Defined *a, Defined *b) {
656     return a->isec->file->id < b->isec->file->id;
657   });
658 
659   // Emit STABS symbols so that dsymutil and/or the debugger can map address
660   // regions in the final binary to the source and object files from which they
661   // originated.
662   InputFile *lastFile = nullptr;
663   for (Defined *defined : symbolsNeedingStabs) {
664     InputSection *isec = defined->isec;
665     ObjFile *file = dyn_cast<ObjFile>(isec->file);
666     assert(file);
667 
668     if (lastFile == nullptr || lastFile != file) {
669       if (lastFile != nullptr)
670         emitEndSourceStab();
671       lastFile = file;
672 
673       emitBeginSourceStab(file->compileUnit);
674       emitObjectFileStab(file);
675     }
676 
677     StabsEntry symStab;
678     symStab.sect = defined->isec->parent->index;
679     symStab.strx = stringTableSection.addString(defined->getName());
680     symStab.value = defined->getVA();
681 
682     if (isCodeSection(isec)) {
683       symStab.type = MachO::N_FUN;
684       stabs.emplace_back(std::move(symStab));
685       emitEndFunStab(defined);
686     } else {
687       symStab.type = defined->isExternal() ? MachO::N_GSYM : MachO::N_STSYM;
688       stabs.emplace_back(std::move(symStab));
689     }
690   }
691 
692   if (!stabs.empty())
693     emitEndSourceStab();
694 }
695 
696 void SymtabSection::finalizeContents() {
697   auto addSymbol = [&](std::vector<SymtabEntry> &symbols, Symbol *sym) {
698     uint32_t strx = stringTableSection.addString(sym->getName());
699     symbols.push_back({sym, strx});
700   };
701 
702   // Local symbols aren't in the SymbolTable, so we walk the list of object
703   // files to gather them.
704   for (InputFile *file : inputFiles) {
705     if (auto *objFile = dyn_cast<ObjFile>(file)) {
706       for (Symbol *sym : objFile->symbols) {
707         // TODO: when we implement -dead_strip, we should filter out symbols
708         // that belong to dead sections.
709         if (auto *defined = dyn_cast<Defined>(sym)) {
710           if (!defined->isExternal())
711             addSymbol(localSymbols, sym);
712         }
713       }
714     }
715   }
716 
717   // __dyld_private is a local symbol too. It's linker-created and doesn't
718   // exist in any object file.
719   if (Defined* dyldPrivate = in.stubHelper->dyldPrivate)
720     addSymbol(localSymbols, dyldPrivate);
721 
722   for (Symbol *sym : symtab->getSymbols()) {
723     if (auto *defined = dyn_cast<Defined>(sym)) {
724       assert(defined->isExternal());
725       (void)defined;
726       addSymbol(externalSymbols, sym);
727     } else if (auto *dysym = dyn_cast<DylibSymbol>(sym)) {
728       if (dysym->isReferenced())
729         addSymbol(undefinedSymbols, sym);
730     }
731   }
732 
733   emitStabs();
734   uint32_t symtabIndex = stabs.size();
735   for (const SymtabEntry &entry :
736        concat<SymtabEntry>(localSymbols, externalSymbols, undefinedSymbols)) {
737     entry.sym->symtabIndex = symtabIndex++;
738   }
739 }
740 
741 uint32_t SymtabSection::getNumSymbols() const {
742   return stabs.size() + localSymbols.size() + externalSymbols.size() +
743          undefinedSymbols.size();
744 }
745 
746 void SymtabSection::writeTo(uint8_t *buf) const {
747   auto *nList = reinterpret_cast<structs::nlist_64 *>(buf);
748   // Emit the stabs entries before the "real" symbols. We cannot emit them
749   // after as that would render Symbol::symtabIndex inaccurate.
750   for (const StabsEntry &entry : stabs) {
751     nList->n_strx = entry.strx;
752     nList->n_type = entry.type;
753     nList->n_sect = entry.sect;
754     nList->n_desc = entry.desc;
755     nList->n_value = entry.value;
756     ++nList;
757   }
758 
759   for (const SymtabEntry &entry : concat<const SymtabEntry>(
760            localSymbols, externalSymbols, undefinedSymbols)) {
761     nList->n_strx = entry.strx;
762     // TODO populate n_desc with more flags
763     if (auto *defined = dyn_cast<Defined>(entry.sym)) {
764       uint8_t scope = 0;
765       if (defined->privateExtern) {
766         // Private external -- dylib scoped symbol.
767         // Promote to non-external at link time.
768         assert(defined->isExternal() && "invalid input file");
769         scope = MachO::N_PEXT;
770       } else if (defined->isExternal()) {
771         // Normal global symbol.
772         scope = MachO::N_EXT;
773       } else {
774         // TU-local symbol from localSymbols.
775         scope = 0;
776       }
777 
778       if (defined->isAbsolute()) {
779         nList->n_type = scope | MachO::N_ABS;
780         nList->n_sect = MachO::NO_SECT;
781         nList->n_value = defined->value;
782       } else {
783         nList->n_type = scope | MachO::N_SECT;
784         nList->n_sect = defined->isec->parent->index;
785         // For the N_SECT symbol type, n_value is the address of the symbol
786         nList->n_value = defined->getVA();
787       }
788       nList->n_desc |= defined->isExternalWeakDef() ? MachO::N_WEAK_DEF : 0;
789     } else if (auto *dysym = dyn_cast<DylibSymbol>(entry.sym)) {
790       uint16_t n_desc = nList->n_desc;
791       MachO::SET_LIBRARY_ORDINAL(n_desc, dysym->file->ordinal);
792       nList->n_type = MachO::N_EXT;
793       n_desc |= dysym->isWeakRef() ? MachO::N_WEAK_REF : 0;
794       nList->n_desc = n_desc;
795     }
796     ++nList;
797   }
798 }
799 
800 IndirectSymtabSection::IndirectSymtabSection()
801     : LinkEditSection(segment_names::linkEdit,
802                       section_names::indirectSymbolTable) {}
803 
804 uint32_t IndirectSymtabSection::getNumSymbols() const {
805   return in.got->getEntries().size() + in.tlvPointers->getEntries().size() +
806          in.stubs->getEntries().size();
807 }
808 
809 bool IndirectSymtabSection::isNeeded() const {
810   return in.got->isNeeded() || in.tlvPointers->isNeeded() ||
811          in.stubs->isNeeded();
812 }
813 
814 void IndirectSymtabSection::finalizeContents() {
815   uint32_t off = 0;
816   in.got->reserved1 = off;
817   off += in.got->getEntries().size();
818   in.tlvPointers->reserved1 = off;
819   off += in.tlvPointers->getEntries().size();
820   // There is a 1:1 correspondence between stubs and LazyPointerSection
821   // entries, so they can share the same sub-array in the table.
822   in.stubs->reserved1 = in.lazyPointers->reserved1 = off;
823 }
824 
825 void IndirectSymtabSection::writeTo(uint8_t *buf) const {
826   uint32_t off = 0;
827   for (const Symbol *sym : in.got->getEntries()) {
828     write32le(buf + off * sizeof(uint32_t), sym->symtabIndex);
829     ++off;
830   }
831   for (const Symbol *sym : in.tlvPointers->getEntries()) {
832     write32le(buf + off * sizeof(uint32_t), sym->symtabIndex);
833     ++off;
834   }
835   for (const Symbol *sym : in.stubs->getEntries()) {
836     write32le(buf + off * sizeof(uint32_t), sym->symtabIndex);
837     ++off;
838   }
839 }
840 
841 StringTableSection::StringTableSection()
842     : LinkEditSection(segment_names::linkEdit, section_names::stringTable) {}
843 
844 uint32_t StringTableSection::addString(StringRef str) {
845   uint32_t strx = size;
846   strings.push_back(str); // TODO: consider deduplicating strings
847   size += str.size() + 1; // account for null terminator
848   return strx;
849 }
850 
851 void StringTableSection::writeTo(uint8_t *buf) const {
852   uint32_t off = 0;
853   for (StringRef str : strings) {
854     memcpy(buf + off, str.data(), str.size());
855     off += str.size() + 1; // account for null terminator
856   }
857 }
858