xref: /freebsd/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/MachOBuilder.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===------------ MachOBuilder.h -- Build MachO Objects ---------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Build MachO object files for interaction with the ObjC runtime and debugger.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_EXECUTIONENGINE_ORC_MACHOBUILDER_H
14 #define LLVM_EXECUTIONENGINE_ORC_MACHOBUILDER_H
15 
16 #include "llvm/BinaryFormat/MachO.h"
17 #include "llvm/Support/Endian.h"
18 #include "llvm/Support/MathExtras.h"
19 
20 #include <list>
21 #include <map>
22 #include <vector>
23 
24 namespace llvm {
25 namespace orc {
26 
27 template <typename MachOStruct>
writeMachOStruct(MutableArrayRef<char> Buf,size_t Offset,MachOStruct S,bool SwapStruct)28 size_t writeMachOStruct(MutableArrayRef<char> Buf, size_t Offset, MachOStruct S,
29                         bool SwapStruct) {
30   if (SwapStruct)
31     MachO::swapStruct(S);
32   assert(Offset + sizeof(MachOStruct) <= Buf.size() && "Buffer overflow");
33   memcpy(&Buf[Offset], reinterpret_cast<const char *>(&S), sizeof(MachOStruct));
34   return Offset + sizeof(MachOStruct);
35 }
36 
37 /// Base type for MachOBuilder load command wrappers.
38 struct MachOBuilderLoadCommandBase {
~MachOBuilderLoadCommandBaseMachOBuilderLoadCommandBase39   virtual ~MachOBuilderLoadCommandBase() {}
40   virtual size_t size() const = 0;
41   virtual size_t write(MutableArrayRef<char> Buf, size_t Offset,
42                        bool SwapStruct) = 0;
43 };
44 
45 /// MachOBuilder load command wrapper type.
46 template <MachO::LoadCommandType LCType> struct MachOBuilderLoadCommandImplBase;
47 
48 #define HANDLE_LOAD_COMMAND(Name, Value, LCStruct)                             \
49   template <>                                                                  \
50   struct MachOBuilderLoadCommandImplBase<MachO::Name>                          \
51       : public MachO::LCStruct, public MachOBuilderLoadCommandBase {           \
52     using CmdStruct = LCStruct;                                                \
53     MachOBuilderLoadCommandImplBase() {                                        \
54       memset(&rawStruct(), 0, sizeof(CmdStruct));                              \
55       cmd = Value;                                                             \
56       cmdsize = sizeof(CmdStruct);                                             \
57     }                                                                          \
58     template <typename... ArgTs>                                               \
59     MachOBuilderLoadCommandImplBase(ArgTs &&...Args)                           \
60         : CmdStruct{Value, sizeof(CmdStruct), std::forward<ArgTs>(Args)...} {} \
61     CmdStruct &rawStruct() { return static_cast<CmdStruct &>(*this); }         \
62     size_t size() const override { return cmdsize; }                           \
63     size_t write(MutableArrayRef<char> Buf, size_t Offset,                     \
64                  bool SwapStruct) override {                                   \
65       return writeMachOStruct(Buf, Offset, rawStruct(), SwapStruct);           \
66     }                                                                          \
67   };
68 
69 #include "llvm/BinaryFormat/MachO.def"
70 
71 #undef HANDLE_LOAD_COMMAND
72 
73 template <MachO::LoadCommandType LCType>
74 struct MachOBuilderLoadCommand
75     : public MachOBuilderLoadCommandImplBase<LCType> {
76 public:
77   MachOBuilderLoadCommand() = default;
78 
79   template <typename... ArgTs>
MachOBuilderLoadCommandMachOBuilderLoadCommand80   MachOBuilderLoadCommand(ArgTs &&...Args)
81       : MachOBuilderLoadCommandImplBase<LCType>(std::forward<ArgTs>(Args)...) {}
82 };
83 
84 template <>
85 struct MachOBuilderLoadCommand<MachO::LC_ID_DYLIB>
86     : public MachOBuilderLoadCommandImplBase<MachO::LC_ID_DYLIB> {
87 
88   MachOBuilderLoadCommand(std::string Name, uint32_t Timestamp,
89                           uint32_t CurrentVersion,
90                           uint32_t CompatibilityVersion)
91       : MachOBuilderLoadCommandImplBase(
92             MachO::dylib{24, Timestamp, CurrentVersion, CompatibilityVersion}),
93         Name(std::move(Name)) {
94     cmdsize += (this->Name.size() + 1 + 3) & ~0x3;
95   }
96 
97   size_t write(MutableArrayRef<char> Buf, size_t Offset,
98                bool SwapStruct) override {
99     Offset = writeMachOStruct(Buf, Offset, rawStruct(), SwapStruct);
100     strcpy(Buf.data() + Offset, Name.data());
101     return Offset + ((Name.size() + 1 + 3) & ~0x3);
102   }
103 
104   std::string Name;
105 };
106 
107 template <>
108 struct MachOBuilderLoadCommand<MachO::LC_LOAD_DYLIB>
109     : public MachOBuilderLoadCommandImplBase<MachO::LC_LOAD_DYLIB> {
110 
111   MachOBuilderLoadCommand(std::string Name, uint32_t Timestamp,
112                           uint32_t CurrentVersion,
113                           uint32_t CompatibilityVersion)
114       : MachOBuilderLoadCommandImplBase(
115             MachO::dylib{24, Timestamp, CurrentVersion, CompatibilityVersion}),
116         Name(std::move(Name)) {
117     cmdsize += (this->Name.size() + 1 + 3) & ~0x3;
118   }
119 
120   size_t write(MutableArrayRef<char> Buf, size_t Offset,
121                bool SwapStruct) override {
122     Offset = writeMachOStruct(Buf, Offset, rawStruct(), SwapStruct);
123     strcpy(Buf.data() + Offset, Name.data());
124     return Offset + ((Name.size() + 1 + 3) & ~0x3);
125   }
126 
127   std::string Name;
128 };
129 
130 template <>
131 struct MachOBuilderLoadCommand<MachO::LC_RPATH>
132     : public MachOBuilderLoadCommandImplBase<MachO::LC_RPATH> {
133   MachOBuilderLoadCommand(std::string Path)
134       : MachOBuilderLoadCommandImplBase(12u), Path(std::move(Path)) {
135     cmdsize += (this->Path.size() + 1 + 3) & ~0x3;
136   }
137 
138   size_t write(MutableArrayRef<char> Buf, size_t Offset,
139                bool SwapStruct) override {
140     Offset = writeMachOStruct(Buf, Offset, rawStruct(), SwapStruct);
141     strcpy(Buf.data() + Offset, Path.data());
142     return Offset + ((Path.size() + 1 + 3) & ~0x3);
143   }
144 
145   std::string Path;
146 };
147 
148 // Builds MachO objects.
149 template <typename MachOTraits> class MachOBuilder {
150 private:
151   struct SymbolContainer {
152     size_t SymbolIndexBase = 0;
153     std::vector<typename MachOTraits::NList> Symbols;
154   };
155 
156   struct StringTableEntry {
157     StringRef S;
158     size_t Offset;
159   };
160 
161   using StringTable = std::vector<StringTableEntry>;
162 
163   static bool swapStruct() {
164     return MachOTraits::Endianness != llvm::endianness::native;
165   }
166 
167 public:
168   using StringId = size_t;
169 
170   struct Section;
171 
172   // Points to either an nlist entry (as a (symbol-container, index) pair), or
173   // a section.
174   class RelocTarget {
175   public:
176     RelocTarget(const Section &S) : S(&S), Idx(~0U) {}
177     RelocTarget(SymbolContainer &SC, size_t Idx) : SC(&SC), Idx(Idx) {}
178 
179     bool isSymbol() { return Idx != ~0U; }
180 
181     uint32_t getSymbolNum() {
182       assert(isSymbol() && "Target is not a symbol");
183       return SC->SymbolIndexBase + Idx;
184     }
185 
186     uint32_t getSectionId() {
187       assert(!isSymbol() && "Target is not a section");
188       return S->SectionNumber;
189     }
190 
191     typename MachOTraits::NList &nlist() {
192       assert(isSymbol() && "Target is not a symbol");
193       return SC->Symbols[Idx];
194     }
195 
196   private:
197     union {
198       const Section *S;
199       SymbolContainer *SC;
200     };
201     size_t Idx;
202   };
203 
204   struct Reloc : public MachO::relocation_info {
205     RelocTarget Target;
206 
207     Reloc(int32_t Offset, RelocTarget Target, bool PCRel, unsigned Length,
208           unsigned Type)
209         : Target(Target) {
210       assert(Type < 16 && "Relocation type out of range");
211       r_address = Offset; // Will slide to account for sec addr during layout
212       r_symbolnum = 0;
213       r_pcrel = PCRel;
214       r_length = Length;
215       r_extern = Target.isSymbol();
216       r_type = Type;
217     }
218 
219     MachO::relocation_info &rawStruct() {
220       return static_cast<MachO::relocation_info &>(*this);
221     }
222   };
223 
224   struct SectionContent {
225     const char *Data = nullptr;
226     size_t Size = 0;
227   };
228 
229   struct Section : public MachOTraits::Section, public RelocTarget {
230     MachOBuilder &Builder;
231     SectionContent Content;
232     size_t SectionNumber = 0;
233     SymbolContainer SC;
234     std::vector<Reloc> Relocs;
235 
236     Section(MachOBuilder &Builder, StringRef SecName, StringRef SegName)
237         : RelocTarget(*this), Builder(Builder) {
238       memset(&rawStruct(), 0, sizeof(typename MachOTraits::Section));
239       assert(SecName.size() <= 16 && "SecName too long");
240       assert(SegName.size() <= 16 && "SegName too long");
241       memcpy(this->sectname, SecName.data(), SecName.size());
242       memcpy(this->segname, SegName.data(), SegName.size());
243     }
244 
245     RelocTarget addSymbol(int32_t Offset, StringRef Name, uint8_t Type,
246                           uint16_t Desc) {
247       StringId SI = Builder.addString(Name);
248       typename MachOTraits::NList Sym;
249       Sym.n_strx = SI;
250       Sym.n_type = Type | MachO::N_SECT;
251       Sym.n_sect = MachO::NO_SECT; // Will be filled in later.
252       Sym.n_desc = Desc;
253       Sym.n_value = Offset;
254       SC.Symbols.push_back(Sym);
255       return {SC, SC.Symbols.size() - 1};
256     }
257 
258     void addReloc(int32_t Offset, RelocTarget Target, bool PCRel,
259                   unsigned Length, unsigned Type) {
260       Relocs.push_back({Offset, Target, PCRel, Length, Type});
261     }
262 
263     auto &rawStruct() {
264       return static_cast<typename MachOTraits::Section &>(*this);
265     }
266   };
267 
268   struct Segment : public MachOBuilderLoadCommand<MachOTraits::SegmentCmd> {
269     MachOBuilder &Builder;
270     std::vector<std::unique_ptr<Section>> Sections;
271 
272     Segment(MachOBuilder &Builder, StringRef SegName)
273         : MachOBuilderLoadCommand<MachOTraits::SegmentCmd>(), Builder(Builder) {
274       assert(SegName.size() <= 16 && "SegName too long");
275       memcpy(this->segname, SegName.data(), SegName.size());
276       this->maxprot =
277           MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE;
278       this->initprot = this->maxprot;
279     }
280 
281     Section &addSection(StringRef SecName, StringRef SegName) {
282       Sections.push_back(std::make_unique<Section>(Builder, SecName, SegName));
283       return *Sections.back();
284     }
285 
286     size_t write(MutableArrayRef<char> Buf, size_t Offset,
287                  bool SwapStruct) override {
288       Offset = MachOBuilderLoadCommand<MachOTraits::SegmentCmd>::write(
289           Buf, Offset, SwapStruct);
290       for (auto &Sec : Sections)
291         Offset = writeMachOStruct(Buf, Offset, Sec->rawStruct(), SwapStruct);
292       return Offset;
293     }
294   };
295 
296   MachOBuilder(size_t PageSize) : PageSize(PageSize) {
297     memset((char *)&Header, 0, sizeof(Header));
298     Header.magic = MachOTraits::Magic;
299   }
300 
301   template <MachO::LoadCommandType LCType, typename... ArgTs>
302   MachOBuilderLoadCommand<LCType> &addLoadCommand(ArgTs &&...Args) {
303     static_assert(LCType != MachOTraits::SegmentCmd,
304                   "Use addSegment to add segment load command");
305     auto LC = std::make_unique<MachOBuilderLoadCommand<LCType>>(
306         std::forward<ArgTs>(Args)...);
307     auto &Tmp = *LC;
308     LoadCommands.push_back(std::move(LC));
309     return Tmp;
310   }
311 
312   StringId addString(StringRef Str) {
313     if (Strings.empty() && !Str.empty())
314       addString("");
315     return Strings.insert(std::make_pair(Str, Strings.size())).first->second;
316   }
317 
318   Segment &addSegment(StringRef SegName) {
319     Segments.push_back(Segment(*this, SegName));
320     return Segments.back();
321   }
322 
323   RelocTarget addSymbol(StringRef Name, uint8_t Type, uint8_t Sect,
324                         uint16_t Desc, typename MachOTraits::UIntPtr Value) {
325     StringId SI = addString(Name);
326     typename MachOTraits::NList Sym;
327     Sym.n_strx = SI;
328     Sym.n_type = Type;
329     Sym.n_sect = Sect;
330     Sym.n_desc = Desc;
331     Sym.n_value = Value;
332     SC.Symbols.push_back(Sym);
333     return {SC, SC.Symbols.size() - 1};
334   }
335 
336   // Call to perform layout on the MachO. Returns the total size of the
337   // resulting file.
338   // This method will automatically insert some load commands (e.g.
339   // LC_SYMTAB) and fill in load command fields.
340   size_t layout() {
341 
342     // Build symbol table and add LC_SYMTAB command.
343     makeStringTable();
344     MachOBuilderLoadCommand<MachOTraits::SymTabCmd> *SymTabLC = nullptr;
345     if (!StrTab.empty())
346       SymTabLC = &addLoadCommand<MachOTraits::SymTabCmd>();
347 
348     // Lay out header, segment load command, and other load commands.
349     size_t Offset = sizeof(Header);
350     for (auto &Seg : Segments) {
351       Seg.cmdsize +=
352           Seg.Sections.size() * sizeof(typename MachOTraits::Section);
353       Seg.nsects = Seg.Sections.size();
354       Offset += Seg.cmdsize;
355     }
356     for (auto &LC : LoadCommands)
357       Offset += LC->size();
358 
359     Header.sizeofcmds = Offset - sizeof(Header);
360 
361     // Lay out content, set segment / section addrs and offsets.
362     size_t SegVMAddr = 0;
363     for (auto &Seg : Segments) {
364       Seg.vmaddr = SegVMAddr;
365       Seg.fileoff = Offset;
366       for (auto &Sec : Seg.Sections) {
367         Offset = alignTo(Offset, 1ULL << Sec->align);
368         if (Sec->Content.Size)
369           Sec->offset = Offset;
370         Sec->size = Sec->Content.Size;
371         Sec->addr = SegVMAddr + Sec->offset - Seg.fileoff;
372         Offset += Sec->Content.Size;
373       }
374       size_t SegContentSize = Offset - Seg.fileoff;
375       Seg.filesize = SegContentSize;
376       Seg.vmsize = Header.filetype == MachO::MH_OBJECT
377                        ? SegContentSize
378                        : alignTo(SegContentSize, PageSize);
379       SegVMAddr += Seg.vmsize;
380     }
381 
382     // Set string table offsets for non-section symbols.
383     for (auto &Sym : SC.Symbols)
384       Sym.n_strx = StrTab[Sym.n_strx].Offset;
385 
386     // Number sections, set symbol section numbers and string table offsets,
387     // count relocations.
388     size_t NumSymbols = SC.Symbols.size();
389     size_t SectionNumber = 0;
390     for (auto &Seg : Segments) {
391       for (auto &Sec : Seg.Sections) {
392         ++SectionNumber;
393         Sec->SectionNumber = SectionNumber;
394         Sec->SC.SymbolIndexBase = NumSymbols;
395         NumSymbols += Sec->SC.Symbols.size();
396         for (auto &Sym : Sec->SC.Symbols) {
397           Sym.n_sect = SectionNumber;
398           Sym.n_strx = StrTab[Sym.n_strx].Offset;
399           Sym.n_value += Sec->addr;
400         }
401       }
402     }
403 
404     // Handle relocations
405     bool OffsetAlignedForRelocs = false;
406     for (auto &Seg : Segments) {
407       for (auto &Sec : Seg.Sections) {
408         if (!Sec->Relocs.empty()) {
409           if (!OffsetAlignedForRelocs) {
410             Offset = alignTo(Offset, sizeof(MachO::relocation_info));
411             OffsetAlignedForRelocs = true;
412           }
413           Sec->reloff = Offset;
414           Sec->nreloc = Sec->Relocs.size();
415           Offset += Sec->Relocs.size() * sizeof(MachO::relocation_info);
416           for (auto &R : Sec->Relocs)
417             R.r_symbolnum = R.Target.isSymbol() ? R.Target.getSymbolNum()
418                                                 : R.Target.getSectionId();
419         }
420       }
421     }
422 
423     // Calculate offset to start of nlist and update symtab command.
424     if (NumSymbols > 0) {
425       Offset = alignTo(Offset, sizeof(typename MachOTraits::NList));
426       SymTabLC->symoff = Offset;
427       SymTabLC->nsyms = NumSymbols;
428 
429       // Calculate string table bounds and update symtab command.
430       if (!StrTab.empty()) {
431         Offset += NumSymbols * sizeof(typename MachOTraits::NList);
432         size_t StringTableSize =
433             StrTab.back().Offset + StrTab.back().S.size() + 1;
434 
435         SymTabLC->stroff = Offset;
436         SymTabLC->strsize = StringTableSize;
437         Offset += StringTableSize;
438       }
439     }
440 
441     return Offset;
442   }
443 
444   void write(MutableArrayRef<char> Buffer) {
445     size_t Offset = 0;
446     Offset = writeHeader(Buffer, Offset);
447     Offset = writeSegments(Buffer, Offset);
448     Offset = writeLoadCommands(Buffer, Offset);
449     Offset = writeSectionContent(Buffer, Offset);
450     Offset = writeRelocations(Buffer, Offset);
451     Offset = writeSymbols(Buffer, Offset);
452     Offset = writeStrings(Buffer, Offset);
453   }
454 
455   typename MachOTraits::Header Header;
456 
457 private:
458   void makeStringTable() {
459     if (Strings.empty())
460       return;
461 
462     StrTab.resize(Strings.size());
463     for (auto &KV : Strings)
464       StrTab[KV.second] = {KV.first, 0};
465     size_t Offset = 0;
466     for (auto &Elem : StrTab) {
467       Elem.Offset = Offset;
468       Offset += Elem.S.size() + 1;
469     }
470   }
471 
472   size_t writeHeader(MutableArrayRef<char> Buf, size_t Offset) {
473     Header.ncmds = Segments.size() + LoadCommands.size();
474     return writeMachOStruct(Buf, Offset, Header, swapStruct());
475   }
476 
477   size_t writeSegments(MutableArrayRef<char> Buf, size_t Offset) {
478     for (auto &Seg : Segments)
479       Offset = Seg.write(Buf, Offset, swapStruct());
480     return Offset;
481   }
482 
483   size_t writeLoadCommands(MutableArrayRef<char> Buf, size_t Offset) {
484     for (auto &LC : LoadCommands)
485       Offset = LC->write(Buf, Offset, swapStruct());
486     return Offset;
487   }
488 
489   size_t writeSectionContent(MutableArrayRef<char> Buf, size_t Offset) {
490     for (auto &Seg : Segments) {
491       for (auto &Sec : Seg.Sections) {
492         if (!Sec->Content.Data) {
493           assert(Sec->Relocs.empty() &&
494                  "Cant' have relocs for zero-fill segment");
495           continue;
496         }
497         while (Offset != Sec->offset)
498           Buf[Offset++] = '\0';
499 
500         assert(Offset + Sec->Content.Size <= Buf.size() && "Buffer overflow");
501         memcpy(&Buf[Offset], Sec->Content.Data, Sec->Content.Size);
502         Offset += Sec->Content.Size;
503       }
504     }
505     return Offset;
506   }
507 
508   size_t writeRelocations(MutableArrayRef<char> Buf, size_t Offset) {
509     for (auto &Seg : Segments) {
510       for (auto &Sec : Seg.Sections) {
511         if (!Sec->Relocs.empty()) {
512           while (Offset % sizeof(MachO::relocation_info))
513             Buf[Offset++] = '\0';
514         }
515         for (auto &R : Sec->Relocs) {
516           assert(Offset + sizeof(MachO::relocation_info) <= Buf.size() &&
517                  "Buffer overflow");
518           memcpy(&Buf[Offset], reinterpret_cast<const char *>(&R.rawStruct()),
519                  sizeof(MachO::relocation_info));
520           Offset += sizeof(MachO::relocation_info);
521         }
522       }
523     }
524     return Offset;
525   }
526 
527   size_t writeSymbols(MutableArrayRef<char> Buf, size_t Offset) {
528 
529     // Count symbols.
530     size_t NumSymbols = SC.Symbols.size();
531     for (auto &Seg : Segments)
532       for (auto &Sec : Seg.Sections)
533         NumSymbols += Sec->SC.Symbols.size();
534 
535     // If none then return.
536     if (NumSymbols == 0)
537       return Offset;
538 
539     // Align to nlist entry size.
540     while (Offset % sizeof(typename MachOTraits::NList))
541       Buf[Offset++] = '\0';
542 
543     // Write non-section symbols.
544     for (auto &Sym : SC.Symbols)
545       Offset = writeMachOStruct(Buf, Offset, Sym, swapStruct());
546 
547     // Write section symbols.
548     for (auto &Seg : Segments) {
549       for (auto &Sec : Seg.Sections) {
550         for (auto &Sym : Sec->SC.Symbols) {
551           Offset = writeMachOStruct(Buf, Offset, Sym, swapStruct());
552         }
553       }
554     }
555     return Offset;
556   }
557 
558   size_t writeStrings(MutableArrayRef<char> Buf, size_t Offset) {
559     for (auto &Elem : StrTab) {
560       assert(Offset + Elem.S.size() + 1 <= Buf.size() && "Buffer overflow");
561       memcpy(&Buf[Offset], Elem.S.data(), Elem.S.size());
562       Offset += Elem.S.size();
563       Buf[Offset++] = '\0';
564     }
565     return Offset;
566   }
567 
568   size_t PageSize;
569   std::list<Segment> Segments;
570   std::vector<std::unique_ptr<MachOBuilderLoadCommandBase>> LoadCommands;
571   SymbolContainer SC;
572 
573   // Maps strings to their "id" (addition order).
574   std::map<StringRef, size_t> Strings;
575   StringTable StrTab;
576 };
577 
578 struct MachO64LE {
579   using UIntPtr = uint64_t;
580   using Header = MachO::mach_header_64;
581   using Section = MachO::section_64;
582   using NList = MachO::nlist_64;
583   using Relocation = MachO::relocation_info;
584 
585   static constexpr llvm::endianness Endianness = llvm::endianness::little;
586   static constexpr uint32_t Magic = MachO::MH_MAGIC_64;
587   static constexpr MachO::LoadCommandType SegmentCmd = MachO::LC_SEGMENT_64;
588   static constexpr MachO::LoadCommandType SymTabCmd = MachO::LC_SYMTAB;
589 };
590 
591 } // namespace orc
592 } // namespace llvm
593 
594 #endif // LLVM_EXECUTIONENGINE_ORC_MACHOBUILDER_H
595