1 //===------------ MachOBuilder.h -- Build MachO Objects ---------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Build MachO object files for interaction with the ObjC runtime and debugger.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #ifndef LLVM_EXECUTIONENGINE_ORC_MACHOBUILDER_H
14 #define LLVM_EXECUTIONENGINE_ORC_MACHOBUILDER_H
15
16 #include "llvm/BinaryFormat/MachO.h"
17 #include "llvm/Support/Endian.h"
18 #include "llvm/Support/MathExtras.h"
19
20 #include <list>
21 #include <map>
22 #include <vector>
23
24 namespace llvm {
25 namespace orc {
26
27 template <typename MachOStruct>
writeMachOStruct(MutableArrayRef<char> Buf,size_t Offset,MachOStruct S,bool SwapStruct)28 size_t writeMachOStruct(MutableArrayRef<char> Buf, size_t Offset, MachOStruct S,
29 bool SwapStruct) {
30 if (SwapStruct)
31 MachO::swapStruct(S);
32 assert(Offset + sizeof(MachOStruct) <= Buf.size() && "Buffer overflow");
33 memcpy(&Buf[Offset], reinterpret_cast<const char *>(&S), sizeof(MachOStruct));
34 return Offset + sizeof(MachOStruct);
35 }
36
37 /// Base type for MachOBuilder load command wrappers.
38 struct MachOBuilderLoadCommandBase {
~MachOBuilderLoadCommandBaseMachOBuilderLoadCommandBase39 virtual ~MachOBuilderLoadCommandBase() {}
40 virtual size_t size() const = 0;
41 virtual size_t write(MutableArrayRef<char> Buf, size_t Offset,
42 bool SwapStruct) = 0;
43 };
44
45 /// MachOBuilder load command wrapper type.
46 template <MachO::LoadCommandType LCType> struct MachOBuilderLoadCommandImplBase;
47
48 #define HANDLE_LOAD_COMMAND(Name, Value, LCStruct) \
49 template <> \
50 struct MachOBuilderLoadCommandImplBase<MachO::Name> \
51 : public MachO::LCStruct, public MachOBuilderLoadCommandBase { \
52 using CmdStruct = LCStruct; \
53 MachOBuilderLoadCommandImplBase() { \
54 memset(&rawStruct(), 0, sizeof(CmdStruct)); \
55 cmd = Value; \
56 cmdsize = sizeof(CmdStruct); \
57 } \
58 template <typename... ArgTs> \
59 MachOBuilderLoadCommandImplBase(ArgTs &&...Args) \
60 : CmdStruct{Value, sizeof(CmdStruct), std::forward<ArgTs>(Args)...} {} \
61 CmdStruct &rawStruct() { return static_cast<CmdStruct &>(*this); } \
62 size_t size() const override { return cmdsize; } \
63 size_t write(MutableArrayRef<char> Buf, size_t Offset, \
64 bool SwapStruct) override { \
65 return writeMachOStruct(Buf, Offset, rawStruct(), SwapStruct); \
66 } \
67 };
68
69 #include "llvm/BinaryFormat/MachO.def"
70
71 #undef HANDLE_LOAD_COMMAND
72
73 template <MachO::LoadCommandType LCType>
74 struct MachOBuilderLoadCommand
75 : public MachOBuilderLoadCommandImplBase<LCType> {
76 public:
77 MachOBuilderLoadCommand() = default;
78
79 template <typename... ArgTs>
MachOBuilderLoadCommandMachOBuilderLoadCommand80 MachOBuilderLoadCommand(ArgTs &&...Args)
81 : MachOBuilderLoadCommandImplBase<LCType>(std::forward<ArgTs>(Args)...) {}
82 };
83
84 template <>
85 struct MachOBuilderLoadCommand<MachO::LC_ID_DYLIB>
86 : public MachOBuilderLoadCommandImplBase<MachO::LC_ID_DYLIB> {
87
88 MachOBuilderLoadCommand(std::string Name, uint32_t Timestamp,
89 uint32_t CurrentVersion,
90 uint32_t CompatibilityVersion)
91 : MachOBuilderLoadCommandImplBase(
92 MachO::dylib{24, Timestamp, CurrentVersion, CompatibilityVersion}),
93 Name(std::move(Name)) {
94 cmdsize += (this->Name.size() + 1 + 3) & ~0x3;
95 }
96
97 size_t write(MutableArrayRef<char> Buf, size_t Offset,
98 bool SwapStruct) override {
99 Offset = writeMachOStruct(Buf, Offset, rawStruct(), SwapStruct);
100 strcpy(Buf.data() + Offset, Name.data());
101 return Offset + ((Name.size() + 1 + 3) & ~0x3);
102 }
103
104 std::string Name;
105 };
106
107 template <>
108 struct MachOBuilderLoadCommand<MachO::LC_LOAD_DYLIB>
109 : public MachOBuilderLoadCommandImplBase<MachO::LC_LOAD_DYLIB> {
110
111 MachOBuilderLoadCommand(std::string Name, uint32_t Timestamp,
112 uint32_t CurrentVersion,
113 uint32_t CompatibilityVersion)
114 : MachOBuilderLoadCommandImplBase(
115 MachO::dylib{24, Timestamp, CurrentVersion, CompatibilityVersion}),
116 Name(std::move(Name)) {
117 cmdsize += (this->Name.size() + 1 + 3) & ~0x3;
118 }
119
120 size_t write(MutableArrayRef<char> Buf, size_t Offset,
121 bool SwapStruct) override {
122 Offset = writeMachOStruct(Buf, Offset, rawStruct(), SwapStruct);
123 strcpy(Buf.data() + Offset, Name.data());
124 return Offset + ((Name.size() + 1 + 3) & ~0x3);
125 }
126
127 std::string Name;
128 };
129
130 template <>
131 struct MachOBuilderLoadCommand<MachO::LC_RPATH>
132 : public MachOBuilderLoadCommandImplBase<MachO::LC_RPATH> {
133 MachOBuilderLoadCommand(std::string Path)
134 : MachOBuilderLoadCommandImplBase(12u), Path(std::move(Path)) {
135 cmdsize += (this->Path.size() + 1 + 3) & ~0x3;
136 }
137
138 size_t write(MutableArrayRef<char> Buf, size_t Offset,
139 bool SwapStruct) override {
140 Offset = writeMachOStruct(Buf, Offset, rawStruct(), SwapStruct);
141 strcpy(Buf.data() + Offset, Path.data());
142 return Offset + ((Path.size() + 1 + 3) & ~0x3);
143 }
144
145 std::string Path;
146 };
147
148 // Builds MachO objects.
149 template <typename MachOTraits> class MachOBuilder {
150 private:
151 struct SymbolContainer {
152 size_t SymbolIndexBase = 0;
153 std::vector<typename MachOTraits::NList> Symbols;
154 };
155
156 struct StringTableEntry {
157 StringRef S;
158 size_t Offset;
159 };
160
161 using StringTable = std::vector<StringTableEntry>;
162
163 static bool swapStruct() {
164 return MachOTraits::Endianness != llvm::endianness::native;
165 }
166
167 public:
168 using StringId = size_t;
169
170 struct Section;
171
172 // Points to either an nlist entry (as a (symbol-container, index) pair), or
173 // a section.
174 class RelocTarget {
175 public:
176 RelocTarget(const Section &S) : S(&S), Idx(~0U) {}
177 RelocTarget(SymbolContainer &SC, size_t Idx) : SC(&SC), Idx(Idx) {}
178
179 bool isSymbol() { return Idx != ~0U; }
180
181 uint32_t getSymbolNum() {
182 assert(isSymbol() && "Target is not a symbol");
183 return SC->SymbolIndexBase + Idx;
184 }
185
186 uint32_t getSectionId() {
187 assert(!isSymbol() && "Target is not a section");
188 return S->SectionNumber;
189 }
190
191 typename MachOTraits::NList &nlist() {
192 assert(isSymbol() && "Target is not a symbol");
193 return SC->Symbols[Idx];
194 }
195
196 private:
197 union {
198 const Section *S;
199 SymbolContainer *SC;
200 };
201 size_t Idx;
202 };
203
204 struct Reloc : public MachO::relocation_info {
205 RelocTarget Target;
206
207 Reloc(int32_t Offset, RelocTarget Target, bool PCRel, unsigned Length,
208 unsigned Type)
209 : Target(Target) {
210 assert(Type < 16 && "Relocation type out of range");
211 r_address = Offset; // Will slide to account for sec addr during layout
212 r_symbolnum = 0;
213 r_pcrel = PCRel;
214 r_length = Length;
215 r_extern = Target.isSymbol();
216 r_type = Type;
217 }
218
219 MachO::relocation_info &rawStruct() {
220 return static_cast<MachO::relocation_info &>(*this);
221 }
222 };
223
224 struct SectionContent {
225 const char *Data = nullptr;
226 size_t Size = 0;
227 };
228
229 struct Section : public MachOTraits::Section, public RelocTarget {
230 MachOBuilder &Builder;
231 SectionContent Content;
232 size_t SectionNumber = 0;
233 SymbolContainer SC;
234 std::vector<Reloc> Relocs;
235
236 Section(MachOBuilder &Builder, StringRef SecName, StringRef SegName)
237 : RelocTarget(*this), Builder(Builder) {
238 memset(&rawStruct(), 0, sizeof(typename MachOTraits::Section));
239 assert(SecName.size() <= 16 && "SecName too long");
240 assert(SegName.size() <= 16 && "SegName too long");
241 memcpy(this->sectname, SecName.data(), SecName.size());
242 memcpy(this->segname, SegName.data(), SegName.size());
243 }
244
245 RelocTarget addSymbol(int32_t Offset, StringRef Name, uint8_t Type,
246 uint16_t Desc) {
247 StringId SI = Builder.addString(Name);
248 typename MachOTraits::NList Sym;
249 Sym.n_strx = SI;
250 Sym.n_type = Type | MachO::N_SECT;
251 Sym.n_sect = MachO::NO_SECT; // Will be filled in later.
252 Sym.n_desc = Desc;
253 Sym.n_value = Offset;
254 SC.Symbols.push_back(Sym);
255 return {SC, SC.Symbols.size() - 1};
256 }
257
258 void addReloc(int32_t Offset, RelocTarget Target, bool PCRel,
259 unsigned Length, unsigned Type) {
260 Relocs.push_back({Offset, Target, PCRel, Length, Type});
261 }
262
263 auto &rawStruct() {
264 return static_cast<typename MachOTraits::Section &>(*this);
265 }
266 };
267
268 struct Segment : public MachOBuilderLoadCommand<MachOTraits::SegmentCmd> {
269 MachOBuilder &Builder;
270 std::vector<std::unique_ptr<Section>> Sections;
271
272 Segment(MachOBuilder &Builder, StringRef SegName)
273 : MachOBuilderLoadCommand<MachOTraits::SegmentCmd>(), Builder(Builder) {
274 assert(SegName.size() <= 16 && "SegName too long");
275 memcpy(this->segname, SegName.data(), SegName.size());
276 this->maxprot =
277 MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE;
278 this->initprot = this->maxprot;
279 }
280
281 Section &addSection(StringRef SecName, StringRef SegName) {
282 Sections.push_back(std::make_unique<Section>(Builder, SecName, SegName));
283 return *Sections.back();
284 }
285
286 size_t write(MutableArrayRef<char> Buf, size_t Offset,
287 bool SwapStruct) override {
288 Offset = MachOBuilderLoadCommand<MachOTraits::SegmentCmd>::write(
289 Buf, Offset, SwapStruct);
290 for (auto &Sec : Sections)
291 Offset = writeMachOStruct(Buf, Offset, Sec->rawStruct(), SwapStruct);
292 return Offset;
293 }
294 };
295
296 MachOBuilder(size_t PageSize) : PageSize(PageSize) {
297 memset((char *)&Header, 0, sizeof(Header));
298 Header.magic = MachOTraits::Magic;
299 }
300
301 template <MachO::LoadCommandType LCType, typename... ArgTs>
302 MachOBuilderLoadCommand<LCType> &addLoadCommand(ArgTs &&...Args) {
303 static_assert(LCType != MachOTraits::SegmentCmd,
304 "Use addSegment to add segment load command");
305 auto LC = std::make_unique<MachOBuilderLoadCommand<LCType>>(
306 std::forward<ArgTs>(Args)...);
307 auto &Tmp = *LC;
308 LoadCommands.push_back(std::move(LC));
309 return Tmp;
310 }
311
312 StringId addString(StringRef Str) {
313 if (Strings.empty() && !Str.empty())
314 addString("");
315 return Strings.insert(std::make_pair(Str, Strings.size())).first->second;
316 }
317
318 Segment &addSegment(StringRef SegName) {
319 Segments.push_back(Segment(*this, SegName));
320 return Segments.back();
321 }
322
323 RelocTarget addSymbol(StringRef Name, uint8_t Type, uint8_t Sect,
324 uint16_t Desc, typename MachOTraits::UIntPtr Value) {
325 StringId SI = addString(Name);
326 typename MachOTraits::NList Sym;
327 Sym.n_strx = SI;
328 Sym.n_type = Type;
329 Sym.n_sect = Sect;
330 Sym.n_desc = Desc;
331 Sym.n_value = Value;
332 SC.Symbols.push_back(Sym);
333 return {SC, SC.Symbols.size() - 1};
334 }
335
336 // Call to perform layout on the MachO. Returns the total size of the
337 // resulting file.
338 // This method will automatically insert some load commands (e.g.
339 // LC_SYMTAB) and fill in load command fields.
340 size_t layout() {
341
342 // Build symbol table and add LC_SYMTAB command.
343 makeStringTable();
344 MachOBuilderLoadCommand<MachOTraits::SymTabCmd> *SymTabLC = nullptr;
345 if (!StrTab.empty())
346 SymTabLC = &addLoadCommand<MachOTraits::SymTabCmd>();
347
348 // Lay out header, segment load command, and other load commands.
349 size_t Offset = sizeof(Header);
350 for (auto &Seg : Segments) {
351 Seg.cmdsize +=
352 Seg.Sections.size() * sizeof(typename MachOTraits::Section);
353 Seg.nsects = Seg.Sections.size();
354 Offset += Seg.cmdsize;
355 }
356 for (auto &LC : LoadCommands)
357 Offset += LC->size();
358
359 Header.sizeofcmds = Offset - sizeof(Header);
360
361 // Lay out content, set segment / section addrs and offsets.
362 size_t SegVMAddr = 0;
363 for (auto &Seg : Segments) {
364 Seg.vmaddr = SegVMAddr;
365 Seg.fileoff = Offset;
366 for (auto &Sec : Seg.Sections) {
367 Offset = alignTo(Offset, 1ULL << Sec->align);
368 if (Sec->Content.Size)
369 Sec->offset = Offset;
370 Sec->size = Sec->Content.Size;
371 Sec->addr = SegVMAddr + Sec->offset - Seg.fileoff;
372 Offset += Sec->Content.Size;
373 }
374 size_t SegContentSize = Offset - Seg.fileoff;
375 Seg.filesize = SegContentSize;
376 Seg.vmsize = Header.filetype == MachO::MH_OBJECT
377 ? SegContentSize
378 : alignTo(SegContentSize, PageSize);
379 SegVMAddr += Seg.vmsize;
380 }
381
382 // Set string table offsets for non-section symbols.
383 for (auto &Sym : SC.Symbols)
384 Sym.n_strx = StrTab[Sym.n_strx].Offset;
385
386 // Number sections, set symbol section numbers and string table offsets,
387 // count relocations.
388 size_t NumSymbols = SC.Symbols.size();
389 size_t SectionNumber = 0;
390 for (auto &Seg : Segments) {
391 for (auto &Sec : Seg.Sections) {
392 ++SectionNumber;
393 Sec->SectionNumber = SectionNumber;
394 Sec->SC.SymbolIndexBase = NumSymbols;
395 NumSymbols += Sec->SC.Symbols.size();
396 for (auto &Sym : Sec->SC.Symbols) {
397 Sym.n_sect = SectionNumber;
398 Sym.n_strx = StrTab[Sym.n_strx].Offset;
399 Sym.n_value += Sec->addr;
400 }
401 }
402 }
403
404 // Handle relocations
405 bool OffsetAlignedForRelocs = false;
406 for (auto &Seg : Segments) {
407 for (auto &Sec : Seg.Sections) {
408 if (!Sec->Relocs.empty()) {
409 if (!OffsetAlignedForRelocs) {
410 Offset = alignTo(Offset, sizeof(MachO::relocation_info));
411 OffsetAlignedForRelocs = true;
412 }
413 Sec->reloff = Offset;
414 Sec->nreloc = Sec->Relocs.size();
415 Offset += Sec->Relocs.size() * sizeof(MachO::relocation_info);
416 for (auto &R : Sec->Relocs)
417 R.r_symbolnum = R.Target.isSymbol() ? R.Target.getSymbolNum()
418 : R.Target.getSectionId();
419 }
420 }
421 }
422
423 // Calculate offset to start of nlist and update symtab command.
424 if (NumSymbols > 0) {
425 Offset = alignTo(Offset, sizeof(typename MachOTraits::NList));
426 SymTabLC->symoff = Offset;
427 SymTabLC->nsyms = NumSymbols;
428
429 // Calculate string table bounds and update symtab command.
430 if (!StrTab.empty()) {
431 Offset += NumSymbols * sizeof(typename MachOTraits::NList);
432 size_t StringTableSize =
433 StrTab.back().Offset + StrTab.back().S.size() + 1;
434
435 SymTabLC->stroff = Offset;
436 SymTabLC->strsize = StringTableSize;
437 Offset += StringTableSize;
438 }
439 }
440
441 return Offset;
442 }
443
444 void write(MutableArrayRef<char> Buffer) {
445 size_t Offset = 0;
446 Offset = writeHeader(Buffer, Offset);
447 Offset = writeSegments(Buffer, Offset);
448 Offset = writeLoadCommands(Buffer, Offset);
449 Offset = writeSectionContent(Buffer, Offset);
450 Offset = writeRelocations(Buffer, Offset);
451 Offset = writeSymbols(Buffer, Offset);
452 Offset = writeStrings(Buffer, Offset);
453 }
454
455 typename MachOTraits::Header Header;
456
457 private:
458 void makeStringTable() {
459 if (Strings.empty())
460 return;
461
462 StrTab.resize(Strings.size());
463 for (auto &KV : Strings)
464 StrTab[KV.second] = {KV.first, 0};
465 size_t Offset = 0;
466 for (auto &Elem : StrTab) {
467 Elem.Offset = Offset;
468 Offset += Elem.S.size() + 1;
469 }
470 }
471
472 size_t writeHeader(MutableArrayRef<char> Buf, size_t Offset) {
473 Header.ncmds = Segments.size() + LoadCommands.size();
474 return writeMachOStruct(Buf, Offset, Header, swapStruct());
475 }
476
477 size_t writeSegments(MutableArrayRef<char> Buf, size_t Offset) {
478 for (auto &Seg : Segments)
479 Offset = Seg.write(Buf, Offset, swapStruct());
480 return Offset;
481 }
482
483 size_t writeLoadCommands(MutableArrayRef<char> Buf, size_t Offset) {
484 for (auto &LC : LoadCommands)
485 Offset = LC->write(Buf, Offset, swapStruct());
486 return Offset;
487 }
488
489 size_t writeSectionContent(MutableArrayRef<char> Buf, size_t Offset) {
490 for (auto &Seg : Segments) {
491 for (auto &Sec : Seg.Sections) {
492 if (!Sec->Content.Data) {
493 assert(Sec->Relocs.empty() &&
494 "Cant' have relocs for zero-fill segment");
495 continue;
496 }
497 while (Offset != Sec->offset)
498 Buf[Offset++] = '\0';
499
500 assert(Offset + Sec->Content.Size <= Buf.size() && "Buffer overflow");
501 memcpy(&Buf[Offset], Sec->Content.Data, Sec->Content.Size);
502 Offset += Sec->Content.Size;
503 }
504 }
505 return Offset;
506 }
507
508 size_t writeRelocations(MutableArrayRef<char> Buf, size_t Offset) {
509 for (auto &Seg : Segments) {
510 for (auto &Sec : Seg.Sections) {
511 if (!Sec->Relocs.empty()) {
512 while (Offset % sizeof(MachO::relocation_info))
513 Buf[Offset++] = '\0';
514 }
515 for (auto &R : Sec->Relocs) {
516 assert(Offset + sizeof(MachO::relocation_info) <= Buf.size() &&
517 "Buffer overflow");
518 memcpy(&Buf[Offset], reinterpret_cast<const char *>(&R.rawStruct()),
519 sizeof(MachO::relocation_info));
520 Offset += sizeof(MachO::relocation_info);
521 }
522 }
523 }
524 return Offset;
525 }
526
527 size_t writeSymbols(MutableArrayRef<char> Buf, size_t Offset) {
528
529 // Count symbols.
530 size_t NumSymbols = SC.Symbols.size();
531 for (auto &Seg : Segments)
532 for (auto &Sec : Seg.Sections)
533 NumSymbols += Sec->SC.Symbols.size();
534
535 // If none then return.
536 if (NumSymbols == 0)
537 return Offset;
538
539 // Align to nlist entry size.
540 while (Offset % sizeof(typename MachOTraits::NList))
541 Buf[Offset++] = '\0';
542
543 // Write non-section symbols.
544 for (auto &Sym : SC.Symbols)
545 Offset = writeMachOStruct(Buf, Offset, Sym, swapStruct());
546
547 // Write section symbols.
548 for (auto &Seg : Segments) {
549 for (auto &Sec : Seg.Sections) {
550 for (auto &Sym : Sec->SC.Symbols) {
551 Offset = writeMachOStruct(Buf, Offset, Sym, swapStruct());
552 }
553 }
554 }
555 return Offset;
556 }
557
558 size_t writeStrings(MutableArrayRef<char> Buf, size_t Offset) {
559 for (auto &Elem : StrTab) {
560 assert(Offset + Elem.S.size() + 1 <= Buf.size() && "Buffer overflow");
561 memcpy(&Buf[Offset], Elem.S.data(), Elem.S.size());
562 Offset += Elem.S.size();
563 Buf[Offset++] = '\0';
564 }
565 return Offset;
566 }
567
568 size_t PageSize;
569 std::list<Segment> Segments;
570 std::vector<std::unique_ptr<MachOBuilderLoadCommandBase>> LoadCommands;
571 SymbolContainer SC;
572
573 // Maps strings to their "id" (addition order).
574 std::map<StringRef, size_t> Strings;
575 StringTable StrTab;
576 };
577
578 struct MachO64LE {
579 using UIntPtr = uint64_t;
580 using Header = MachO::mach_header_64;
581 using Section = MachO::section_64;
582 using NList = MachO::nlist_64;
583 using Relocation = MachO::relocation_info;
584
585 static constexpr llvm::endianness Endianness = llvm::endianness::little;
586 static constexpr uint32_t Magic = MachO::MH_MAGIC_64;
587 static constexpr MachO::LoadCommandType SegmentCmd = MachO::LC_SEGMENT_64;
588 static constexpr MachO::LoadCommandType SymTabCmd = MachO::LC_SYMTAB;
589 };
590
591 } // namespace orc
592 } // namespace llvm
593
594 #endif // LLVM_EXECUTIONENGINE_ORC_MACHOBUILDER_H
595