xref: /freebsd/contrib/llvm-project/lld/MachO/SyntheticSections.h (revision 18054d0220cfc8df9c9568c437bd6fbb59d53c3c)
1 //===- SyntheticSections.h -------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_MACHO_SYNTHETIC_SECTIONS_H
10 #define LLD_MACHO_SYNTHETIC_SECTIONS_H
11 
12 #include "Config.h"
13 #include "ExportTrie.h"
14 #include "InputSection.h"
15 #include "OutputSection.h"
16 #include "OutputSegment.h"
17 #include "Target.h"
18 #include "Writer.h"
19 
20 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/ADT/Hashing.h"
22 #include "llvm/ADT/SetVector.h"
23 #include "llvm/MC/StringTableBuilder.h"
24 #include "llvm/Support/MathExtras.h"
25 #include "llvm/Support/raw_ostream.h"
26 
27 #include <unordered_map>
28 
29 namespace llvm {
30 class DWARFUnit;
31 } // namespace llvm
32 
33 namespace lld {
34 namespace macho {
35 
36 class Defined;
37 class DylibSymbol;
38 class LoadCommand;
39 class ObjFile;
40 class UnwindInfoSection;
41 
42 class SyntheticSection : public OutputSection {
43 public:
44   SyntheticSection(const char *segname, const char *name);
45   virtual ~SyntheticSection() = default;
46 
47   static bool classof(const OutputSection *sec) {
48     return sec->kind() == SyntheticKind;
49   }
50 
51   StringRef segname;
52   // This fake InputSection makes it easier for us to write code that applies
53   // generically to both user inputs and synthetics.
54   InputSection *isec;
55 };
56 
57 // All sections in __LINKEDIT should inherit from this.
58 class LinkEditSection : public SyntheticSection {
59 public:
60   LinkEditSection(const char *segname, const char *name)
61       : SyntheticSection(segname, name) {
62     align = target->wordSize;
63   }
64 
65   // Implementations of this method can assume that the regular (non-__LINKEDIT)
66   // sections already have their addresses assigned.
67   virtual void finalizeContents() {}
68 
69   // Sections in __LINKEDIT are special: their offsets are recorded in the
70   // load commands like LC_DYLD_INFO_ONLY and LC_SYMTAB, instead of in section
71   // headers.
72   bool isHidden() const override final { return true; }
73 
74   virtual uint64_t getRawSize() const = 0;
75 
76   // codesign (or more specifically libstuff) checks that each section in
77   // __LINKEDIT ends where the next one starts -- no gaps are permitted. We
78   // therefore align every section's start and end points to WordSize.
79   //
80   // NOTE: This assumes that the extra bytes required for alignment can be
81   // zero-valued bytes.
82   uint64_t getSize() const override final {
83     return llvm::alignTo(getRawSize(), align);
84   }
85 };
86 
87 // The header of the Mach-O file, which must have a file offset of zero.
88 class MachHeaderSection final : public SyntheticSection {
89 public:
90   MachHeaderSection();
91   bool isHidden() const override { return true; }
92   uint64_t getSize() const override;
93   void writeTo(uint8_t *buf) const override;
94 
95   void addLoadCommand(LoadCommand *);
96 
97 protected:
98   std::vector<LoadCommand *> loadCommands;
99   uint32_t sizeOfCmds = 0;
100 };
101 
102 // A hidden section that exists solely for the purpose of creating the
103 // __PAGEZERO segment, which is used to catch null pointer dereferences.
104 class PageZeroSection final : public SyntheticSection {
105 public:
106   PageZeroSection();
107   bool isHidden() const override { return true; }
108   uint64_t getSize() const override { return target->pageZeroSize; }
109   uint64_t getFileSize() const override { return 0; }
110   void writeTo(uint8_t *buf) const override {}
111 };
112 
113 // This is the base class for the GOT and TLVPointer sections, which are nearly
114 // functionally identical -- they will both be populated by dyld with addresses
115 // to non-lazily-loaded dylib symbols. The main difference is that the
116 // TLVPointerSection stores references to thread-local variables.
117 class NonLazyPointerSectionBase : public SyntheticSection {
118 public:
119   NonLazyPointerSectionBase(const char *segname, const char *name);
120   const llvm::SetVector<const Symbol *> &getEntries() const { return entries; }
121   bool isNeeded() const override { return !entries.empty(); }
122   uint64_t getSize() const override {
123     return entries.size() * target->wordSize;
124   }
125   void writeTo(uint8_t *buf) const override;
126   void addEntry(Symbol *sym);
127   uint64_t getVA(uint32_t gotIndex) const {
128     return addr + gotIndex * target->wordSize;
129   }
130 
131 private:
132   llvm::SetVector<const Symbol *> entries;
133 };
134 
135 class GotSection final : public NonLazyPointerSectionBase {
136 public:
137   GotSection();
138 };
139 
140 class TlvPointerSection final : public NonLazyPointerSectionBase {
141 public:
142   TlvPointerSection();
143 };
144 
145 struct Location {
146   const InputSection *isec;
147   uint64_t offset;
148 
149   Location(const InputSection *isec, uint64_t offset)
150       : isec(isec), offset(offset) {}
151   uint64_t getVA() const { return isec->getVA(offset); }
152 };
153 
154 // Stores rebase opcodes, which tell dyld where absolute addresses have been
155 // encoded in the binary. If the binary is not loaded at its preferred address,
156 // dyld has to rebase these addresses by adding an offset to them.
157 class RebaseSection final : public LinkEditSection {
158 public:
159   RebaseSection();
160   void finalizeContents() override;
161   uint64_t getRawSize() const override { return contents.size(); }
162   bool isNeeded() const override { return !locations.empty(); }
163   void writeTo(uint8_t *buf) const override;
164 
165   void addEntry(const InputSection *isec, uint64_t offset) {
166     if (config->isPic)
167       locations.push_back({isec, offset});
168   }
169 
170 private:
171   std::vector<Location> locations;
172   SmallVector<char, 128> contents;
173 };
174 
175 struct BindingEntry {
176   int64_t addend;
177   Location target;
178   BindingEntry(int64_t addend, Location target)
179       : addend(addend), target(std::move(target)) {}
180 };
181 
182 template <class Sym>
183 using BindingsMap = llvm::DenseMap<Sym, std::vector<BindingEntry>>;
184 
185 // Stores bind opcodes for telling dyld which symbols to load non-lazily.
186 class BindingSection final : public LinkEditSection {
187 public:
188   BindingSection();
189   void finalizeContents() override;
190   uint64_t getRawSize() const override { return contents.size(); }
191   bool isNeeded() const override { return !bindingsMap.empty(); }
192   void writeTo(uint8_t *buf) const override;
193 
194   void addEntry(const DylibSymbol *dysym, const InputSection *isec,
195                 uint64_t offset, int64_t addend = 0) {
196     bindingsMap[dysym].emplace_back(addend, Location(isec, offset));
197   }
198 
199 private:
200   BindingsMap<const DylibSymbol *> bindingsMap;
201   SmallVector<char, 128> contents;
202 };
203 
204 // Stores bind opcodes for telling dyld which weak symbols need coalescing.
205 // There are two types of entries in this section:
206 //
207 //   1) Non-weak definitions: This is a symbol definition that weak symbols in
208 //   other dylibs should coalesce to.
209 //
210 //   2) Weak bindings: These tell dyld that a given symbol reference should
211 //   coalesce to a non-weak definition if one is found. Note that unlike the
212 //   entries in the BindingSection, the bindings here only refer to these
213 //   symbols by name, but do not specify which dylib to load them from.
214 class WeakBindingSection final : public LinkEditSection {
215 public:
216   WeakBindingSection();
217   void finalizeContents() override;
218   uint64_t getRawSize() const override { return contents.size(); }
219   bool isNeeded() const override {
220     return !bindingsMap.empty() || !definitions.empty();
221   }
222 
223   void writeTo(uint8_t *buf) const override;
224 
225   void addEntry(const Symbol *symbol, const InputSection *isec, uint64_t offset,
226                 int64_t addend = 0) {
227     bindingsMap[symbol].emplace_back(addend, Location(isec, offset));
228   }
229 
230   bool hasEntry() const { return !bindingsMap.empty(); }
231 
232   void addNonWeakDefinition(const Defined *defined) {
233     definitions.emplace_back(defined);
234   }
235 
236   bool hasNonWeakDefinition() const { return !definitions.empty(); }
237 
238 private:
239   BindingsMap<const Symbol *> bindingsMap;
240   std::vector<const Defined *> definitions;
241   SmallVector<char, 128> contents;
242 };
243 
244 // The following sections implement lazy symbol binding -- very similar to the
245 // PLT mechanism in ELF.
246 //
247 // ELF's .plt section is broken up into two sections in Mach-O: StubsSection
248 // and StubHelperSection. Calls to functions in dylibs will end up calling into
249 // StubsSection, which contains indirect jumps to addresses stored in the
250 // LazyPointerSection (the counterpart to ELF's .plt.got).
251 //
252 // We will first describe how non-weak symbols are handled.
253 //
254 // At program start, the LazyPointerSection contains addresses that point into
255 // one of the entry points in the middle of the StubHelperSection. The code in
256 // StubHelperSection will push on the stack an offset into the
257 // LazyBindingSection. The push is followed by a jump to the beginning of the
258 // StubHelperSection (similar to PLT0), which then calls into dyld_stub_binder.
259 // dyld_stub_binder is a non-lazily-bound symbol, so this call looks it up in
260 // the GOT.
261 //
262 // The stub binder will look up the bind opcodes in the LazyBindingSection at
263 // the given offset. The bind opcodes will tell the binder to update the
264 // address in the LazyPointerSection to point to the symbol, so that subsequent
265 // calls don't have to redo the symbol resolution. The binder will then jump to
266 // the resolved symbol.
267 //
268 // With weak symbols, the situation is slightly different. Since there is no
269 // "weak lazy" lookup, function calls to weak symbols are always non-lazily
270 // bound. We emit both regular non-lazy bindings as well as weak bindings, in
271 // order that the weak bindings may overwrite the non-lazy bindings if an
272 // appropriate symbol is found at runtime. However, the bound addresses will
273 // still be written (non-lazily) into the LazyPointerSection.
274 
275 class StubsSection final : public SyntheticSection {
276 public:
277   StubsSection();
278   uint64_t getSize() const override;
279   bool isNeeded() const override { return !entries.empty(); }
280   void finalize() override;
281   void writeTo(uint8_t *buf) const override;
282   const llvm::SetVector<Symbol *> &getEntries() const { return entries; }
283   // Returns whether the symbol was added. Note that every stubs entry will
284   // have a corresponding entry in the LazyPointerSection.
285   bool addEntry(Symbol *);
286   uint64_t getVA(uint32_t stubsIndex) const {
287     assert(isFinal || target->usesThunks());
288     // ConcatOutputSection::finalize() can seek the address of a
289     // stub before its address is assigned. Before __stubs is
290     // finalized, return a contrived out-of-range address.
291     return isFinal ? addr + stubsIndex * target->stubSize
292                    : TargetInfo::outOfRangeVA;
293   }
294 
295   bool isFinal = false; // is address assigned?
296 
297 private:
298   llvm::SetVector<Symbol *> entries;
299 };
300 
301 class StubHelperSection final : public SyntheticSection {
302 public:
303   StubHelperSection();
304   uint64_t getSize() const override;
305   bool isNeeded() const override;
306   void writeTo(uint8_t *buf) const override;
307 
308   void setup();
309 
310   DylibSymbol *stubBinder = nullptr;
311   Defined *dyldPrivate = nullptr;
312 };
313 
314 // Note that this section may also be targeted by non-lazy bindings. In
315 // particular, this happens when branch relocations target weak symbols.
316 class LazyPointerSection final : public SyntheticSection {
317 public:
318   LazyPointerSection();
319   uint64_t getSize() const override;
320   bool isNeeded() const override;
321   void writeTo(uint8_t *buf) const override;
322 };
323 
324 class LazyBindingSection final : public LinkEditSection {
325 public:
326   LazyBindingSection();
327   void finalizeContents() override;
328   uint64_t getRawSize() const override { return contents.size(); }
329   bool isNeeded() const override { return !entries.empty(); }
330   void writeTo(uint8_t *buf) const override;
331   // Note that every entry here will by referenced by a corresponding entry in
332   // the StubHelperSection.
333   void addEntry(DylibSymbol *dysym);
334   const llvm::SetVector<DylibSymbol *> &getEntries() const { return entries; }
335 
336 private:
337   uint32_t encode(const DylibSymbol &);
338 
339   llvm::SetVector<DylibSymbol *> entries;
340   SmallVector<char, 128> contents;
341   llvm::raw_svector_ostream os{contents};
342 };
343 
344 // Stores a trie that describes the set of exported symbols.
345 class ExportSection final : public LinkEditSection {
346 public:
347   ExportSection();
348   void finalizeContents() override;
349   uint64_t getRawSize() const override { return size; }
350   bool isNeeded() const override { return size; }
351   void writeTo(uint8_t *buf) const override;
352 
353   bool hasWeakSymbol = false;
354 
355 private:
356   TrieBuilder trieBuilder;
357   size_t size = 0;
358 };
359 
360 // Stores 'data in code' entries that describe the locations of
361 // data regions inside code sections.
362 class DataInCodeSection final : public LinkEditSection {
363 public:
364   DataInCodeSection();
365   void finalizeContents() override;
366   uint64_t getRawSize() const override {
367     return sizeof(llvm::MachO::data_in_code_entry) * entries.size();
368   }
369   void writeTo(uint8_t *buf) const override;
370 
371 private:
372   std::vector<llvm::MachO::data_in_code_entry> entries;
373 };
374 
375 // Stores ULEB128 delta encoded addresses of functions.
376 class FunctionStartsSection final : public LinkEditSection {
377 public:
378   FunctionStartsSection();
379   void finalizeContents() override;
380   uint64_t getRawSize() const override { return contents.size(); }
381   void writeTo(uint8_t *buf) const override;
382 
383 private:
384   SmallVector<char, 128> contents;
385 };
386 
387 // Stores the strings referenced by the symbol table.
388 class StringTableSection final : public LinkEditSection {
389 public:
390   StringTableSection();
391   // Returns the start offset of the added string.
392   uint32_t addString(StringRef);
393   uint64_t getRawSize() const override { return size; }
394   void writeTo(uint8_t *buf) const override;
395 
396   static constexpr size_t emptyStringIndex = 1;
397 
398 private:
399   // ld64 emits string tables which start with a space and a zero byte. We
400   // match its behavior here since some tools depend on it.
401   // Consequently, the empty string will be at index 1, not zero.
402   std::vector<StringRef> strings{" "};
403   size_t size = 2;
404 };
405 
406 struct SymtabEntry {
407   Symbol *sym;
408   size_t strx;
409 };
410 
411 struct StabsEntry {
412   uint8_t type = 0;
413   uint32_t strx = StringTableSection::emptyStringIndex;
414   uint8_t sect = 0;
415   uint16_t desc = 0;
416   uint64_t value = 0;
417 
418   StabsEntry() = default;
419   explicit StabsEntry(uint8_t type) : type(type) {}
420 };
421 
422 // Symbols of the same type must be laid out contiguously: we choose to emit
423 // all local symbols first, then external symbols, and finally undefined
424 // symbols. For each symbol type, the LC_DYSYMTAB load command will record the
425 // range (start index and total number) of those symbols in the symbol table.
426 class SymtabSection : public LinkEditSection {
427 public:
428   void finalizeContents() override;
429   uint32_t getNumSymbols() const;
430   uint32_t getNumLocalSymbols() const {
431     return stabs.size() + localSymbols.size();
432   }
433   uint32_t getNumExternalSymbols() const { return externalSymbols.size(); }
434   uint32_t getNumUndefinedSymbols() const { return undefinedSymbols.size(); }
435 
436 private:
437   void emitBeginSourceStab(llvm::DWARFUnit *compileUnit);
438   void emitEndSourceStab();
439   void emitObjectFileStab(ObjFile *);
440   void emitEndFunStab(Defined *);
441   void emitStabs();
442 
443 protected:
444   SymtabSection(StringTableSection &);
445 
446   StringTableSection &stringTableSection;
447   // STABS symbols are always local symbols, but we represent them with special
448   // entries because they may use fields like n_sect and n_desc differently.
449   std::vector<StabsEntry> stabs;
450   std::vector<SymtabEntry> localSymbols;
451   std::vector<SymtabEntry> externalSymbols;
452   std::vector<SymtabEntry> undefinedSymbols;
453 };
454 
455 template <class LP> SymtabSection *makeSymtabSection(StringTableSection &);
456 
457 // The indirect symbol table is a list of 32-bit integers that serve as indices
458 // into the (actual) symbol table. The indirect symbol table is a
459 // concatenation of several sub-arrays of indices, each sub-array belonging to
460 // a separate section. The starting offset of each sub-array is stored in the
461 // reserved1 header field of the respective section.
462 //
463 // These sub-arrays provide symbol information for sections that store
464 // contiguous sequences of symbol references. These references can be pointers
465 // (e.g. those in the GOT and TLVP sections) or assembly sequences (e.g.
466 // function stubs).
467 class IndirectSymtabSection final : public LinkEditSection {
468 public:
469   IndirectSymtabSection();
470   void finalizeContents() override;
471   uint32_t getNumSymbols() const;
472   uint64_t getRawSize() const override {
473     return getNumSymbols() * sizeof(uint32_t);
474   }
475   bool isNeeded() const override;
476   void writeTo(uint8_t *buf) const override;
477 };
478 
479 // The code signature comes at the very end of the linked output file.
480 class CodeSignatureSection final : public LinkEditSection {
481 public:
482   // NOTE: These values are duplicated in llvm-objcopy's MachO/Object.h file
483   // and any changes here, should be repeated there.
484   static constexpr uint8_t blockSizeShift = 12;
485   static constexpr size_t blockSize = (1 << blockSizeShift); // 4 KiB
486   static constexpr size_t hashSize = 256 / 8;
487   static constexpr size_t blobHeadersSize = llvm::alignTo<8>(
488       sizeof(llvm::MachO::CS_SuperBlob) + sizeof(llvm::MachO::CS_BlobIndex));
489   static constexpr uint32_t fixedHeadersSize =
490       blobHeadersSize + sizeof(llvm::MachO::CS_CodeDirectory);
491 
492   uint32_t fileNamePad = 0;
493   uint32_t allHeadersSize = 0;
494   StringRef fileName;
495 
496   CodeSignatureSection();
497   uint64_t getRawSize() const override;
498   bool isNeeded() const override { return true; }
499   void writeTo(uint8_t *buf) const override;
500   uint32_t getBlockCount() const;
501   void writeHashes(uint8_t *buf) const;
502 };
503 
504 class BitcodeBundleSection final : public SyntheticSection {
505 public:
506   BitcodeBundleSection();
507   uint64_t getSize() const override { return xarSize; }
508   void finalize() override;
509   void writeTo(uint8_t *buf) const override;
510 
511 private:
512   llvm::SmallString<261> xarPath;
513   uint64_t xarSize;
514 };
515 
516 class CStringSection : public SyntheticSection {
517 public:
518   CStringSection();
519   void addInput(CStringInputSection *);
520   uint64_t getSize() const override { return size; }
521   virtual void finalizeContents();
522   bool isNeeded() const override { return !inputs.empty(); }
523   void writeTo(uint8_t *buf) const override;
524 
525   std::vector<CStringInputSection *> inputs;
526 
527 private:
528   uint64_t size;
529 };
530 
531 class DeduplicatedCStringSection final : public CStringSection {
532 public:
533   DeduplicatedCStringSection();
534   uint64_t getSize() const override { return builder.getSize(); }
535   void finalizeContents() override;
536   void writeTo(uint8_t *buf) const override { builder.write(buf); }
537 
538 private:
539   llvm::StringTableBuilder builder;
540 };
541 
542 /*
543  * This section contains deduplicated literal values. The 16-byte values are
544  * laid out first, followed by the 8- and then the 4-byte ones.
545  */
546 class WordLiteralSection final : public SyntheticSection {
547 public:
548   using UInt128 = std::pair<uint64_t, uint64_t>;
549   // I don't think the standard guarantees the size of a pair, so let's make
550   // sure it's exact -- that way we can construct it via `mmap`.
551   static_assert(sizeof(UInt128) == 16, "");
552 
553   WordLiteralSection();
554   void addInput(WordLiteralInputSection *);
555   void finalizeContents();
556   void writeTo(uint8_t *buf) const override;
557 
558   uint64_t getSize() const override {
559     return literal16Map.size() * 16 + literal8Map.size() * 8 +
560            literal4Map.size() * 4;
561   }
562 
563   bool isNeeded() const override {
564     return !literal16Map.empty() || !literal4Map.empty() ||
565            !literal8Map.empty();
566   }
567 
568   uint64_t getLiteral16Offset(uintptr_t buf) const {
569     return literal16Map.at(*reinterpret_cast<const UInt128 *>(buf)) * 16;
570   }
571 
572   uint64_t getLiteral8Offset(uintptr_t buf) const {
573     return literal16Map.size() * 16 +
574            literal8Map.at(*reinterpret_cast<const uint64_t *>(buf)) * 8;
575   }
576 
577   uint64_t getLiteral4Offset(uintptr_t buf) const {
578     return literal16Map.size() * 16 + literal8Map.size() * 8 +
579            literal4Map.at(*reinterpret_cast<const uint32_t *>(buf)) * 4;
580   }
581 
582 private:
583   std::vector<WordLiteralInputSection *> inputs;
584 
585   template <class T> struct Hasher {
586     llvm::hash_code operator()(T v) const { return llvm::hash_value(v); }
587   };
588   // We're using unordered_map instead of DenseMap here because we need to
589   // support all possible integer values -- there are no suitable tombstone
590   // values for DenseMap.
591   std::unordered_map<UInt128, uint64_t, Hasher<UInt128>> literal16Map;
592   std::unordered_map<uint64_t, uint64_t> literal8Map;
593   std::unordered_map<uint32_t, uint64_t> literal4Map;
594 };
595 
596 struct InStruct {
597   MachHeaderSection *header = nullptr;
598   CStringSection *cStringSection = nullptr;
599   WordLiteralSection *wordLiteralSection = nullptr;
600   RebaseSection *rebase = nullptr;
601   BindingSection *binding = nullptr;
602   WeakBindingSection *weakBinding = nullptr;
603   LazyBindingSection *lazyBinding = nullptr;
604   ExportSection *exports = nullptr;
605   GotSection *got = nullptr;
606   TlvPointerSection *tlvPointers = nullptr;
607   LazyPointerSection *lazyPointers = nullptr;
608   StubsSection *stubs = nullptr;
609   StubHelperSection *stubHelper = nullptr;
610   UnwindInfoSection *unwindInfo = nullptr;
611   ConcatInputSection *imageLoaderCache = nullptr;
612 };
613 
614 extern InStruct in;
615 extern std::vector<SyntheticSection *> syntheticSections;
616 
617 void createSyntheticSymbols();
618 
619 } // namespace macho
620 } // namespace lld
621 
622 #endif
623