xref: /freebsd/contrib/llvm-project/llvm/lib/MC/MachObjectWriter.cpp (revision 2f513db72b034fd5ef7f080b11be5c711c15186a)
1 //===- lib/MC/MachObjectWriter.cpp - Mach-O File Writer -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/ADT/DenseMap.h"
10 #include "llvm/ADT/Twine.h"
11 #include "llvm/ADT/iterator_range.h"
12 #include "llvm/BinaryFormat/MachO.h"
13 #include "llvm/MC/MCAsmBackend.h"
14 #include "llvm/MC/MCAsmLayout.h"
15 #include "llvm/MC/MCAssembler.h"
16 #include "llvm/MC/MCContext.h"
17 #include "llvm/MC/MCDirectives.h"
18 #include "llvm/MC/MCExpr.h"
19 #include "llvm/MC/MCFixupKindInfo.h"
20 #include "llvm/MC/MCFragment.h"
21 #include "llvm/MC/MCMachObjectWriter.h"
22 #include "llvm/MC/MCObjectWriter.h"
23 #include "llvm/MC/MCSection.h"
24 #include "llvm/MC/MCSectionMachO.h"
25 #include "llvm/MC/MCSymbol.h"
26 #include "llvm/MC/MCSymbolMachO.h"
27 #include "llvm/MC/MCValue.h"
28 #include "llvm/Support/Casting.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/Support/ErrorHandling.h"
31 #include "llvm/Support/MathExtras.h"
32 #include "llvm/Support/raw_ostream.h"
33 #include <algorithm>
34 #include <cassert>
35 #include <cstdint>
36 #include <string>
37 #include <utility>
38 #include <vector>
39 
40 using namespace llvm;
41 
42 #define DEBUG_TYPE "mc"
43 
44 void MachObjectWriter::reset() {
45   Relocations.clear();
46   IndirectSymBase.clear();
47   StringTable.clear();
48   LocalSymbolData.clear();
49   ExternalSymbolData.clear();
50   UndefinedSymbolData.clear();
51   MCObjectWriter::reset();
52 }
53 
54 bool MachObjectWriter::doesSymbolRequireExternRelocation(const MCSymbol &S) {
55   // Undefined symbols are always extern.
56   if (S.isUndefined())
57     return true;
58 
59   // References to weak definitions require external relocation entries; the
60   // definition may not always be the one in the same object file.
61   if (cast<MCSymbolMachO>(S).isWeakDefinition())
62     return true;
63 
64   // Otherwise, we can use an internal relocation.
65   return false;
66 }
67 
68 bool MachObjectWriter::
69 MachSymbolData::operator<(const MachSymbolData &RHS) const {
70   return Symbol->getName() < RHS.Symbol->getName();
71 }
72 
73 bool MachObjectWriter::isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) {
74   const MCFixupKindInfo &FKI = Asm.getBackend().getFixupKindInfo(
75     (MCFixupKind) Kind);
76 
77   return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel;
78 }
79 
80 uint64_t MachObjectWriter::getFragmentAddress(const MCFragment *Fragment,
81                                               const MCAsmLayout &Layout) const {
82   return getSectionAddress(Fragment->getParent()) +
83          Layout.getFragmentOffset(Fragment);
84 }
85 
86 uint64_t MachObjectWriter::getSymbolAddress(const MCSymbol &S,
87                                             const MCAsmLayout &Layout) const {
88   // If this is a variable, then recursively evaluate now.
89   if (S.isVariable()) {
90     if (const MCConstantExpr *C =
91           dyn_cast<const MCConstantExpr>(S.getVariableValue()))
92       return C->getValue();
93 
94     MCValue Target;
95     if (!S.getVariableValue()->evaluateAsRelocatable(Target, &Layout, nullptr))
96       report_fatal_error("unable to evaluate offset for variable '" +
97                          S.getName() + "'");
98 
99     // Verify that any used symbols are defined.
100     if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined())
101       report_fatal_error("unable to evaluate offset to undefined symbol '" +
102                          Target.getSymA()->getSymbol().getName() + "'");
103     if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined())
104       report_fatal_error("unable to evaluate offset to undefined symbol '" +
105                          Target.getSymB()->getSymbol().getName() + "'");
106 
107     uint64_t Address = Target.getConstant();
108     if (Target.getSymA())
109       Address += getSymbolAddress(Target.getSymA()->getSymbol(), Layout);
110     if (Target.getSymB())
111       Address += getSymbolAddress(Target.getSymB()->getSymbol(), Layout);
112     return Address;
113   }
114 
115   return getSectionAddress(S.getFragment()->getParent()) +
116          Layout.getSymbolOffset(S);
117 }
118 
119 uint64_t MachObjectWriter::getPaddingSize(const MCSection *Sec,
120                                           const MCAsmLayout &Layout) const {
121   uint64_t EndAddr = getSectionAddress(Sec) + Layout.getSectionAddressSize(Sec);
122   unsigned Next = Sec->getLayoutOrder() + 1;
123   if (Next >= Layout.getSectionOrder().size())
124     return 0;
125 
126   const MCSection &NextSec = *Layout.getSectionOrder()[Next];
127   if (NextSec.isVirtualSection())
128     return 0;
129   return OffsetToAlignment(EndAddr, NextSec.getAlignment());
130 }
131 
132 void MachObjectWriter::writeHeader(MachO::HeaderFileType Type,
133                                    unsigned NumLoadCommands,
134                                    unsigned LoadCommandsSize,
135                                    bool SubsectionsViaSymbols) {
136   uint32_t Flags = 0;
137 
138   if (SubsectionsViaSymbols)
139     Flags |= MachO::MH_SUBSECTIONS_VIA_SYMBOLS;
140 
141   // struct mach_header (28 bytes) or
142   // struct mach_header_64 (32 bytes)
143 
144   uint64_t Start = W.OS.tell();
145   (void) Start;
146 
147   W.write<uint32_t>(is64Bit() ? MachO::MH_MAGIC_64 : MachO::MH_MAGIC);
148 
149   W.write<uint32_t>(TargetObjectWriter->getCPUType());
150   W.write<uint32_t>(TargetObjectWriter->getCPUSubtype());
151 
152   W.write<uint32_t>(Type);
153   W.write<uint32_t>(NumLoadCommands);
154   W.write<uint32_t>(LoadCommandsSize);
155   W.write<uint32_t>(Flags);
156   if (is64Bit())
157     W.write<uint32_t>(0); // reserved
158 
159   assert(W.OS.tell() - Start == (is64Bit() ? sizeof(MachO::mach_header_64)
160                                            : sizeof(MachO::mach_header)));
161 }
162 
163 void MachObjectWriter::writeWithPadding(StringRef Str, uint64_t Size) {
164   assert(Size >= Str.size());
165   W.OS << Str;
166   W.OS.write_zeros(Size - Str.size());
167 }
168 
169 /// writeSegmentLoadCommand - Write a segment load command.
170 ///
171 /// \param NumSections The number of sections in this segment.
172 /// \param SectionDataSize The total size of the sections.
173 void MachObjectWriter::writeSegmentLoadCommand(
174     StringRef Name, unsigned NumSections, uint64_t VMAddr, uint64_t VMSize,
175     uint64_t SectionDataStartOffset, uint64_t SectionDataSize, uint32_t MaxProt,
176     uint32_t InitProt) {
177   // struct segment_command (56 bytes) or
178   // struct segment_command_64 (72 bytes)
179 
180   uint64_t Start = W.OS.tell();
181   (void) Start;
182 
183   unsigned SegmentLoadCommandSize =
184     is64Bit() ? sizeof(MachO::segment_command_64):
185     sizeof(MachO::segment_command);
186   W.write<uint32_t>(is64Bit() ? MachO::LC_SEGMENT_64 : MachO::LC_SEGMENT);
187   W.write<uint32_t>(SegmentLoadCommandSize +
188           NumSections * (is64Bit() ? sizeof(MachO::section_64) :
189                          sizeof(MachO::section)));
190 
191   writeWithPadding(Name, 16);
192   if (is64Bit()) {
193     W.write<uint64_t>(VMAddr);                 // vmaddr
194     W.write<uint64_t>(VMSize); // vmsize
195     W.write<uint64_t>(SectionDataStartOffset); // file offset
196     W.write<uint64_t>(SectionDataSize); // file size
197   } else {
198     W.write<uint32_t>(VMAddr);                 // vmaddr
199     W.write<uint32_t>(VMSize); // vmsize
200     W.write<uint32_t>(SectionDataStartOffset); // file offset
201     W.write<uint32_t>(SectionDataSize); // file size
202   }
203   // maxprot
204   W.write<uint32_t>(MaxProt);
205   // initprot
206   W.write<uint32_t>(InitProt);
207   W.write<uint32_t>(NumSections);
208   W.write<uint32_t>(0); // flags
209 
210   assert(W.OS.tell() - Start == SegmentLoadCommandSize);
211 }
212 
213 void MachObjectWriter::writeSection(const MCAsmLayout &Layout,
214                                     const MCSection &Sec, uint64_t VMAddr,
215                                     uint64_t FileOffset, unsigned Flags,
216                                     uint64_t RelocationsStart,
217                                     unsigned NumRelocations) {
218   uint64_t SectionSize = Layout.getSectionAddressSize(&Sec);
219   const MCSectionMachO &Section = cast<MCSectionMachO>(Sec);
220 
221   // The offset is unused for virtual sections.
222   if (Section.isVirtualSection()) {
223     assert(Layout.getSectionFileSize(&Sec) == 0 && "Invalid file size!");
224     FileOffset = 0;
225   }
226 
227   // struct section (68 bytes) or
228   // struct section_64 (80 bytes)
229 
230   uint64_t Start = W.OS.tell();
231   (void) Start;
232 
233   writeWithPadding(Section.getSectionName(), 16);
234   writeWithPadding(Section.getSegmentName(), 16);
235   if (is64Bit()) {
236     W.write<uint64_t>(VMAddr);      // address
237     W.write<uint64_t>(SectionSize); // size
238   } else {
239     W.write<uint32_t>(VMAddr);      // address
240     W.write<uint32_t>(SectionSize); // size
241   }
242   W.write<uint32_t>(FileOffset);
243 
244   assert(isPowerOf2_32(Section.getAlignment()) && "Invalid alignment!");
245   W.write<uint32_t>(Log2_32(Section.getAlignment()));
246   W.write<uint32_t>(NumRelocations ? RelocationsStart : 0);
247   W.write<uint32_t>(NumRelocations);
248   W.write<uint32_t>(Flags);
249   W.write<uint32_t>(IndirectSymBase.lookup(&Sec)); // reserved1
250   W.write<uint32_t>(Section.getStubSize()); // reserved2
251   if (is64Bit())
252     W.write<uint32_t>(0); // reserved3
253 
254   assert(W.OS.tell() - Start ==
255          (is64Bit() ? sizeof(MachO::section_64) : sizeof(MachO::section)));
256 }
257 
258 void MachObjectWriter::writeSymtabLoadCommand(uint32_t SymbolOffset,
259                                               uint32_t NumSymbols,
260                                               uint32_t StringTableOffset,
261                                               uint32_t StringTableSize) {
262   // struct symtab_command (24 bytes)
263 
264   uint64_t Start = W.OS.tell();
265   (void) Start;
266 
267   W.write<uint32_t>(MachO::LC_SYMTAB);
268   W.write<uint32_t>(sizeof(MachO::symtab_command));
269   W.write<uint32_t>(SymbolOffset);
270   W.write<uint32_t>(NumSymbols);
271   W.write<uint32_t>(StringTableOffset);
272   W.write<uint32_t>(StringTableSize);
273 
274   assert(W.OS.tell() - Start == sizeof(MachO::symtab_command));
275 }
276 
277 void MachObjectWriter::writeDysymtabLoadCommand(uint32_t FirstLocalSymbol,
278                                                 uint32_t NumLocalSymbols,
279                                                 uint32_t FirstExternalSymbol,
280                                                 uint32_t NumExternalSymbols,
281                                                 uint32_t FirstUndefinedSymbol,
282                                                 uint32_t NumUndefinedSymbols,
283                                                 uint32_t IndirectSymbolOffset,
284                                                 uint32_t NumIndirectSymbols) {
285   // struct dysymtab_command (80 bytes)
286 
287   uint64_t Start = W.OS.tell();
288   (void) Start;
289 
290   W.write<uint32_t>(MachO::LC_DYSYMTAB);
291   W.write<uint32_t>(sizeof(MachO::dysymtab_command));
292   W.write<uint32_t>(FirstLocalSymbol);
293   W.write<uint32_t>(NumLocalSymbols);
294   W.write<uint32_t>(FirstExternalSymbol);
295   W.write<uint32_t>(NumExternalSymbols);
296   W.write<uint32_t>(FirstUndefinedSymbol);
297   W.write<uint32_t>(NumUndefinedSymbols);
298   W.write<uint32_t>(0); // tocoff
299   W.write<uint32_t>(0); // ntoc
300   W.write<uint32_t>(0); // modtaboff
301   W.write<uint32_t>(0); // nmodtab
302   W.write<uint32_t>(0); // extrefsymoff
303   W.write<uint32_t>(0); // nextrefsyms
304   W.write<uint32_t>(IndirectSymbolOffset);
305   W.write<uint32_t>(NumIndirectSymbols);
306   W.write<uint32_t>(0); // extreloff
307   W.write<uint32_t>(0); // nextrel
308   W.write<uint32_t>(0); // locreloff
309   W.write<uint32_t>(0); // nlocrel
310 
311   assert(W.OS.tell() - Start == sizeof(MachO::dysymtab_command));
312 }
313 
314 MachObjectWriter::MachSymbolData *
315 MachObjectWriter::findSymbolData(const MCSymbol &Sym) {
316   for (auto *SymbolData :
317        {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData})
318     for (MachSymbolData &Entry : *SymbolData)
319       if (Entry.Symbol == &Sym)
320         return &Entry;
321 
322   return nullptr;
323 }
324 
325 const MCSymbol &MachObjectWriter::findAliasedSymbol(const MCSymbol &Sym) const {
326   const MCSymbol *S = &Sym;
327   while (S->isVariable()) {
328     const MCExpr *Value = S->getVariableValue();
329     const auto *Ref = dyn_cast<MCSymbolRefExpr>(Value);
330     if (!Ref)
331       return *S;
332     S = &Ref->getSymbol();
333   }
334   return *S;
335 }
336 
337 void MachObjectWriter::writeNlist(MachSymbolData &MSD,
338                                   const MCAsmLayout &Layout) {
339   const MCSymbol *Symbol = MSD.Symbol;
340   const MCSymbol &Data = *Symbol;
341   const MCSymbol *AliasedSymbol = &findAliasedSymbol(*Symbol);
342   uint8_t SectionIndex = MSD.SectionIndex;
343   uint8_t Type = 0;
344   uint64_t Address = 0;
345   bool IsAlias = Symbol != AliasedSymbol;
346 
347   const MCSymbol &OrigSymbol = *Symbol;
348   MachSymbolData *AliaseeInfo;
349   if (IsAlias) {
350     AliaseeInfo = findSymbolData(*AliasedSymbol);
351     if (AliaseeInfo)
352       SectionIndex = AliaseeInfo->SectionIndex;
353     Symbol = AliasedSymbol;
354     // FIXME: Should this update Data as well?
355   }
356 
357   // Set the N_TYPE bits. See <mach-o/nlist.h>.
358   //
359   // FIXME: Are the prebound or indirect fields possible here?
360   if (IsAlias && Symbol->isUndefined())
361     Type = MachO::N_INDR;
362   else if (Symbol->isUndefined())
363     Type = MachO::N_UNDF;
364   else if (Symbol->isAbsolute())
365     Type = MachO::N_ABS;
366   else
367     Type = MachO::N_SECT;
368 
369   // FIXME: Set STAB bits.
370 
371   if (Data.isPrivateExtern())
372     Type |= MachO::N_PEXT;
373 
374   // Set external bit.
375   if (Data.isExternal() || (!IsAlias && Symbol->isUndefined()))
376     Type |= MachO::N_EXT;
377 
378   // Compute the symbol address.
379   if (IsAlias && Symbol->isUndefined())
380     Address = AliaseeInfo->StringIndex;
381   else if (Symbol->isDefined())
382     Address = getSymbolAddress(OrigSymbol, Layout);
383   else if (Symbol->isCommon()) {
384     // Common symbols are encoded with the size in the address
385     // field, and their alignment in the flags.
386     Address = Symbol->getCommonSize();
387   }
388 
389   // struct nlist (12 bytes)
390 
391   W.write<uint32_t>(MSD.StringIndex);
392   W.OS << char(Type);
393   W.OS << char(SectionIndex);
394 
395   // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc'
396   // value.
397   bool EncodeAsAltEntry =
398     IsAlias && cast<MCSymbolMachO>(OrigSymbol).isAltEntry();
399   W.write<uint16_t>(cast<MCSymbolMachO>(Symbol)->getEncodedFlags(EncodeAsAltEntry));
400   if (is64Bit())
401     W.write<uint64_t>(Address);
402   else
403     W.write<uint32_t>(Address);
404 }
405 
406 void MachObjectWriter::writeLinkeditLoadCommand(uint32_t Type,
407                                                 uint32_t DataOffset,
408                                                 uint32_t DataSize) {
409   uint64_t Start = W.OS.tell();
410   (void) Start;
411 
412   W.write<uint32_t>(Type);
413   W.write<uint32_t>(sizeof(MachO::linkedit_data_command));
414   W.write<uint32_t>(DataOffset);
415   W.write<uint32_t>(DataSize);
416 
417   assert(W.OS.tell() - Start == sizeof(MachO::linkedit_data_command));
418 }
419 
420 static unsigned ComputeLinkerOptionsLoadCommandSize(
421   const std::vector<std::string> &Options, bool is64Bit)
422 {
423   unsigned Size = sizeof(MachO::linker_option_command);
424   for (const std::string &Option : Options)
425     Size += Option.size() + 1;
426   return alignTo(Size, is64Bit ? 8 : 4);
427 }
428 
429 void MachObjectWriter::writeLinkerOptionsLoadCommand(
430   const std::vector<std::string> &Options)
431 {
432   unsigned Size = ComputeLinkerOptionsLoadCommandSize(Options, is64Bit());
433   uint64_t Start = W.OS.tell();
434   (void) Start;
435 
436   W.write<uint32_t>(MachO::LC_LINKER_OPTION);
437   W.write<uint32_t>(Size);
438   W.write<uint32_t>(Options.size());
439   uint64_t BytesWritten = sizeof(MachO::linker_option_command);
440   for (const std::string &Option : Options) {
441     // Write each string, including the null byte.
442     W.OS << Option << '\0';
443     BytesWritten += Option.size() + 1;
444   }
445 
446   // Pad to a multiple of the pointer size.
447   W.OS.write_zeros(OffsetToAlignment(BytesWritten, is64Bit() ? 8 : 4));
448 
449   assert(W.OS.tell() - Start == Size);
450 }
451 
452 static bool isFixupTargetValid(const MCValue &Target) {
453   // Target is (LHS - RHS + cst).
454   // We don't support the form where LHS is null: -RHS + cst
455   if (!Target.getSymA() && Target.getSymB())
456     return false;
457   return true;
458 }
459 
460 void MachObjectWriter::recordRelocation(MCAssembler &Asm,
461                                         const MCAsmLayout &Layout,
462                                         const MCFragment *Fragment,
463                                         const MCFixup &Fixup, MCValue Target,
464                                         uint64_t &FixedValue) {
465   if (!isFixupTargetValid(Target)) {
466     Asm.getContext().reportError(Fixup.getLoc(),
467                                  "unsupported relocation expression");
468     return;
469   }
470 
471   TargetObjectWriter->recordRelocation(this, Asm, Layout, Fragment, Fixup,
472                                        Target, FixedValue);
473 }
474 
475 void MachObjectWriter::bindIndirectSymbols(MCAssembler &Asm) {
476   // This is the point where 'as' creates actual symbols for indirect symbols
477   // (in the following two passes). It would be easier for us to do this sooner
478   // when we see the attribute, but that makes getting the order in the symbol
479   // table much more complicated than it is worth.
480   //
481   // FIXME: Revisit this when the dust settles.
482 
483   // Report errors for use of .indirect_symbol not in a symbol pointer section
484   // or stub section.
485   for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
486          ie = Asm.indirect_symbol_end(); it != ie; ++it) {
487     const MCSectionMachO &Section = cast<MCSectionMachO>(*it->Section);
488 
489     if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS &&
490         Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS &&
491         Section.getType() != MachO::S_THREAD_LOCAL_VARIABLE_POINTERS &&
492         Section.getType() != MachO::S_SYMBOL_STUBS) {
493       MCSymbol &Symbol = *it->Symbol;
494       report_fatal_error("indirect symbol '" + Symbol.getName() +
495                          "' not in a symbol pointer or stub section");
496     }
497   }
498 
499   // Bind non-lazy symbol pointers first.
500   unsigned IndirectIndex = 0;
501   for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
502          ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
503     const MCSectionMachO &Section = cast<MCSectionMachO>(*it->Section);
504 
505     if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS &&
506         Section.getType() !=  MachO::S_THREAD_LOCAL_VARIABLE_POINTERS)
507       continue;
508 
509     // Initialize the section indirect symbol base, if necessary.
510     IndirectSymBase.insert(std::make_pair(it->Section, IndirectIndex));
511 
512     Asm.registerSymbol(*it->Symbol);
513   }
514 
515   // Then lazy symbol pointers and symbol stubs.
516   IndirectIndex = 0;
517   for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
518          ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
519     const MCSectionMachO &Section = cast<MCSectionMachO>(*it->Section);
520 
521     if (Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS &&
522         Section.getType() != MachO::S_SYMBOL_STUBS)
523       continue;
524 
525     // Initialize the section indirect symbol base, if necessary.
526     IndirectSymBase.insert(std::make_pair(it->Section, IndirectIndex));
527 
528     // Set the symbol type to undefined lazy, but only on construction.
529     //
530     // FIXME: Do not hardcode.
531     bool Created;
532     Asm.registerSymbol(*it->Symbol, &Created);
533     if (Created)
534       cast<MCSymbolMachO>(it->Symbol)->setReferenceTypeUndefinedLazy(true);
535   }
536 }
537 
538 /// computeSymbolTable - Compute the symbol table data
539 void MachObjectWriter::computeSymbolTable(
540     MCAssembler &Asm, std::vector<MachSymbolData> &LocalSymbolData,
541     std::vector<MachSymbolData> &ExternalSymbolData,
542     std::vector<MachSymbolData> &UndefinedSymbolData) {
543   // Build section lookup table.
544   DenseMap<const MCSection*, uint8_t> SectionIndexMap;
545   unsigned Index = 1;
546   for (MCAssembler::iterator it = Asm.begin(),
547          ie = Asm.end(); it != ie; ++it, ++Index)
548     SectionIndexMap[&*it] = Index;
549   assert(Index <= 256 && "Too many sections!");
550 
551   // Build the string table.
552   for (const MCSymbol &Symbol : Asm.symbols()) {
553     if (!Asm.isSymbolLinkerVisible(Symbol))
554       continue;
555 
556     StringTable.add(Symbol.getName());
557   }
558   StringTable.finalize();
559 
560   // Build the symbol arrays but only for non-local symbols.
561   //
562   // The particular order that we collect and then sort the symbols is chosen to
563   // match 'as'. Even though it doesn't matter for correctness, this is
564   // important for letting us diff .o files.
565   for (const MCSymbol &Symbol : Asm.symbols()) {
566     // Ignore non-linker visible symbols.
567     if (!Asm.isSymbolLinkerVisible(Symbol))
568       continue;
569 
570     if (!Symbol.isExternal() && !Symbol.isUndefined())
571       continue;
572 
573     MachSymbolData MSD;
574     MSD.Symbol = &Symbol;
575     MSD.StringIndex = StringTable.getOffset(Symbol.getName());
576 
577     if (Symbol.isUndefined()) {
578       MSD.SectionIndex = 0;
579       UndefinedSymbolData.push_back(MSD);
580     } else if (Symbol.isAbsolute()) {
581       MSD.SectionIndex = 0;
582       ExternalSymbolData.push_back(MSD);
583     } else {
584       MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
585       assert(MSD.SectionIndex && "Invalid section index!");
586       ExternalSymbolData.push_back(MSD);
587     }
588   }
589 
590   // Now add the data for local symbols.
591   for (const MCSymbol &Symbol : Asm.symbols()) {
592     // Ignore non-linker visible symbols.
593     if (!Asm.isSymbolLinkerVisible(Symbol))
594       continue;
595 
596     if (Symbol.isExternal() || Symbol.isUndefined())
597       continue;
598 
599     MachSymbolData MSD;
600     MSD.Symbol = &Symbol;
601     MSD.StringIndex = StringTable.getOffset(Symbol.getName());
602 
603     if (Symbol.isAbsolute()) {
604       MSD.SectionIndex = 0;
605       LocalSymbolData.push_back(MSD);
606     } else {
607       MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
608       assert(MSD.SectionIndex && "Invalid section index!");
609       LocalSymbolData.push_back(MSD);
610     }
611   }
612 
613   // External and undefined symbols are required to be in lexicographic order.
614   llvm::sort(ExternalSymbolData);
615   llvm::sort(UndefinedSymbolData);
616 
617   // Set the symbol indices.
618   Index = 0;
619   for (auto *SymbolData :
620        {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData})
621     for (MachSymbolData &Entry : *SymbolData)
622       Entry.Symbol->setIndex(Index++);
623 
624   for (const MCSection &Section : Asm) {
625     for (RelAndSymbol &Rel : Relocations[&Section]) {
626       if (!Rel.Sym)
627         continue;
628 
629       // Set the Index and the IsExtern bit.
630       unsigned Index = Rel.Sym->getIndex();
631       assert(isInt<24>(Index));
632       if (W.Endian == support::little)
633         Rel.MRE.r_word1 = (Rel.MRE.r_word1 & (~0U << 24)) | Index | (1 << 27);
634       else
635         Rel.MRE.r_word1 = (Rel.MRE.r_word1 & 0xff) | Index << 8 | (1 << 4);
636     }
637   }
638 }
639 
640 void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm,
641                                                const MCAsmLayout &Layout) {
642   uint64_t StartAddress = 0;
643   for (const MCSection *Sec : Layout.getSectionOrder()) {
644     StartAddress = alignTo(StartAddress, Sec->getAlignment());
645     SectionAddress[Sec] = StartAddress;
646     StartAddress += Layout.getSectionAddressSize(Sec);
647 
648     // Explicitly pad the section to match the alignment requirements of the
649     // following one. This is for 'gas' compatibility, it shouldn't
650     /// strictly be necessary.
651     StartAddress += getPaddingSize(Sec, Layout);
652   }
653 }
654 
655 void MachObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
656                                                 const MCAsmLayout &Layout) {
657   computeSectionAddresses(Asm, Layout);
658 
659   // Create symbol data for any indirect symbols.
660   bindIndirectSymbols(Asm);
661 }
662 
663 bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
664     const MCAssembler &Asm, const MCSymbol &A, const MCSymbol &B,
665     bool InSet) const {
666   // FIXME: We don't handle things like
667   // foo = .
668   // creating atoms.
669   if (A.isVariable() || B.isVariable())
670     return false;
671   return MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(Asm, A, B,
672                                                                 InSet);
673 }
674 
675 bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
676     const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB,
677     bool InSet, bool IsPCRel) const {
678   if (InSet)
679     return true;
680 
681   // The effective address is
682   //     addr(atom(A)) + offset(A)
683   //   - addr(atom(B)) - offset(B)
684   // and the offsets are not relocatable, so the fixup is fully resolved when
685   //  addr(atom(A)) - addr(atom(B)) == 0.
686   const MCSymbol &SA = findAliasedSymbol(SymA);
687   const MCSection &SecA = SA.getSection();
688   const MCSection &SecB = *FB.getParent();
689 
690   if (IsPCRel) {
691     // The simple (Darwin, except on x86_64) way of dealing with this was to
692     // assume that any reference to a temporary symbol *must* be a temporary
693     // symbol in the same atom, unless the sections differ. Therefore, any PCrel
694     // relocation to a temporary symbol (in the same section) is fully
695     // resolved. This also works in conjunction with absolutized .set, which
696     // requires the compiler to use .set to absolutize the differences between
697     // symbols which the compiler knows to be assembly time constants, so we
698     // don't need to worry about considering symbol differences fully resolved.
699     //
700     // If the file isn't using sub-sections-via-symbols, we can make the
701     // same assumptions about any symbol that we normally make about
702     // assembler locals.
703 
704     bool hasReliableSymbolDifference = isX86_64();
705     if (!hasReliableSymbolDifference) {
706       if (!SA.isInSection() || &SecA != &SecB ||
707           (!SA.isTemporary() && FB.getAtom() != SA.getFragment()->getAtom() &&
708            Asm.getSubsectionsViaSymbols()))
709         return false;
710       return true;
711     }
712     // For Darwin x86_64, there is one special case when the reference IsPCRel.
713     // If the fragment with the reference does not have a base symbol but meets
714     // the simple way of dealing with this, in that it is a temporary symbol in
715     // the same atom then it is assumed to be fully resolved.  This is needed so
716     // a relocation entry is not created and so the static linker does not
717     // mess up the reference later.
718     else if(!FB.getAtom() &&
719             SA.isTemporary() && SA.isInSection() && &SecA == &SecB){
720       return true;
721     }
722   }
723 
724   // If they are not in the same section, we can't compute the diff.
725   if (&SecA != &SecB)
726     return false;
727 
728   const MCFragment *FA = SA.getFragment();
729 
730   // Bail if the symbol has no fragment.
731   if (!FA)
732     return false;
733 
734   // If the atoms are the same, they are guaranteed to have the same address.
735   if (FA->getAtom() == FB.getAtom())
736     return true;
737 
738   // Otherwise, we can't prove this is fully resolved.
739   return false;
740 }
741 
742 static MachO::LoadCommandType getLCFromMCVM(MCVersionMinType Type) {
743   switch (Type) {
744   case MCVM_OSXVersionMin:     return MachO::LC_VERSION_MIN_MACOSX;
745   case MCVM_IOSVersionMin:     return MachO::LC_VERSION_MIN_IPHONEOS;
746   case MCVM_TvOSVersionMin:    return MachO::LC_VERSION_MIN_TVOS;
747   case MCVM_WatchOSVersionMin: return MachO::LC_VERSION_MIN_WATCHOS;
748   }
749   llvm_unreachable("Invalid mc version min type");
750 }
751 
752 uint64_t MachObjectWriter::writeObject(MCAssembler &Asm,
753                                        const MCAsmLayout &Layout) {
754   uint64_t StartOffset = W.OS.tell();
755 
756   // Compute symbol table information and bind symbol indices.
757   computeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData,
758                      UndefinedSymbolData);
759 
760   unsigned NumSections = Asm.size();
761   const MCAssembler::VersionInfoType &VersionInfo =
762     Layout.getAssembler().getVersionInfo();
763 
764   // The section data starts after the header, the segment load command (and
765   // section headers) and the symbol table.
766   unsigned NumLoadCommands = 1;
767   uint64_t LoadCommandsSize = is64Bit() ?
768     sizeof(MachO::segment_command_64) + NumSections * sizeof(MachO::section_64):
769     sizeof(MachO::segment_command) + NumSections * sizeof(MachO::section);
770 
771   // Add the deployment target version info load command size, if used.
772   if (VersionInfo.Major != 0) {
773     ++NumLoadCommands;
774     if (VersionInfo.EmitBuildVersion)
775       LoadCommandsSize += sizeof(MachO::build_version_command);
776     else
777       LoadCommandsSize += sizeof(MachO::version_min_command);
778   }
779 
780   // Add the data-in-code load command size, if used.
781   unsigned NumDataRegions = Asm.getDataRegions().size();
782   if (NumDataRegions) {
783     ++NumLoadCommands;
784     LoadCommandsSize += sizeof(MachO::linkedit_data_command);
785   }
786 
787   // Add the loh load command size, if used.
788   uint64_t LOHRawSize = Asm.getLOHContainer().getEmitSize(*this, Layout);
789   uint64_t LOHSize = alignTo(LOHRawSize, is64Bit() ? 8 : 4);
790   if (LOHSize) {
791     ++NumLoadCommands;
792     LoadCommandsSize += sizeof(MachO::linkedit_data_command);
793   }
794 
795   // Add the symbol table load command sizes, if used.
796   unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() +
797     UndefinedSymbolData.size();
798   if (NumSymbols) {
799     NumLoadCommands += 2;
800     LoadCommandsSize += (sizeof(MachO::symtab_command) +
801                          sizeof(MachO::dysymtab_command));
802   }
803 
804   // Add the linker option load commands sizes.
805   for (const auto &Option : Asm.getLinkerOptions()) {
806     ++NumLoadCommands;
807     LoadCommandsSize += ComputeLinkerOptionsLoadCommandSize(Option, is64Bit());
808   }
809 
810   // Compute the total size of the section data, as well as its file size and vm
811   // size.
812   uint64_t SectionDataStart = (is64Bit() ? sizeof(MachO::mach_header_64) :
813                                sizeof(MachO::mach_header)) + LoadCommandsSize;
814   uint64_t SectionDataSize = 0;
815   uint64_t SectionDataFileSize = 0;
816   uint64_t VMSize = 0;
817   for (const MCSection &Sec : Asm) {
818     uint64_t Address = getSectionAddress(&Sec);
819     uint64_t Size = Layout.getSectionAddressSize(&Sec);
820     uint64_t FileSize = Layout.getSectionFileSize(&Sec);
821     FileSize += getPaddingSize(&Sec, Layout);
822 
823     VMSize = std::max(VMSize, Address + Size);
824 
825     if (Sec.isVirtualSection())
826       continue;
827 
828     SectionDataSize = std::max(SectionDataSize, Address + Size);
829     SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize);
830   }
831 
832   // The section data is padded to 4 bytes.
833   //
834   // FIXME: Is this machine dependent?
835   unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4);
836   SectionDataFileSize += SectionDataPadding;
837 
838   // Write the prolog, starting with the header and load command...
839   writeHeader(MachO::MH_OBJECT, NumLoadCommands, LoadCommandsSize,
840               Asm.getSubsectionsViaSymbols());
841   uint32_t Prot =
842       MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE;
843   writeSegmentLoadCommand("", NumSections, 0, VMSize, SectionDataStart,
844                           SectionDataSize, Prot, Prot);
845 
846   // ... and then the section headers.
847   uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
848   for (const MCSection &Section : Asm) {
849     const auto &Sec = cast<MCSectionMachO>(Section);
850     std::vector<RelAndSymbol> &Relocs = Relocations[&Sec];
851     unsigned NumRelocs = Relocs.size();
852     uint64_t SectionStart = SectionDataStart + getSectionAddress(&Sec);
853     unsigned Flags = Sec.getTypeAndAttributes();
854     if (Sec.hasInstructions())
855       Flags |= MachO::S_ATTR_SOME_INSTRUCTIONS;
856     writeSection(Layout, Sec, getSectionAddress(&Sec), SectionStart, Flags,
857                  RelocTableEnd, NumRelocs);
858     RelocTableEnd += NumRelocs * sizeof(MachO::any_relocation_info);
859   }
860 
861   // Write out the deployment target information, if it's available.
862   if (VersionInfo.Major != 0) {
863     auto EncodeVersion = [](VersionTuple V) -> uint32_t {
864       assert(!V.empty() && "empty version");
865       unsigned Update = V.getSubminor() ? *V.getSubminor() : 0;
866       unsigned Minor = V.getMinor() ? *V.getMinor() : 0;
867       assert(Update < 256 && "unencodable update target version");
868       assert(Minor < 256 && "unencodable minor target version");
869       assert(V.getMajor() < 65536 && "unencodable major target version");
870       return Update | (Minor << 8) | (V.getMajor() << 16);
871     };
872     uint32_t EncodedVersion = EncodeVersion(
873         VersionTuple(VersionInfo.Major, VersionInfo.Minor, VersionInfo.Update));
874     uint32_t SDKVersion = !VersionInfo.SDKVersion.empty()
875                               ? EncodeVersion(VersionInfo.SDKVersion)
876                               : 0;
877     if (VersionInfo.EmitBuildVersion) {
878       // FIXME: Currently empty tools. Add clang version in the future.
879       W.write<uint32_t>(MachO::LC_BUILD_VERSION);
880       W.write<uint32_t>(sizeof(MachO::build_version_command));
881       W.write<uint32_t>(VersionInfo.TypeOrPlatform.Platform);
882       W.write<uint32_t>(EncodedVersion);
883       W.write<uint32_t>(SDKVersion);
884       W.write<uint32_t>(0);         // Empty tools list.
885     } else {
886       MachO::LoadCommandType LCType
887         = getLCFromMCVM(VersionInfo.TypeOrPlatform.Type);
888       W.write<uint32_t>(LCType);
889       W.write<uint32_t>(sizeof(MachO::version_min_command));
890       W.write<uint32_t>(EncodedVersion);
891       W.write<uint32_t>(SDKVersion);
892     }
893   }
894 
895   // Write the data-in-code load command, if used.
896   uint64_t DataInCodeTableEnd = RelocTableEnd + NumDataRegions * 8;
897   if (NumDataRegions) {
898     uint64_t DataRegionsOffset = RelocTableEnd;
899     uint64_t DataRegionsSize = NumDataRegions * 8;
900     writeLinkeditLoadCommand(MachO::LC_DATA_IN_CODE, DataRegionsOffset,
901                              DataRegionsSize);
902   }
903 
904   // Write the loh load command, if used.
905   uint64_t LOHTableEnd = DataInCodeTableEnd + LOHSize;
906   if (LOHSize)
907     writeLinkeditLoadCommand(MachO::LC_LINKER_OPTIMIZATION_HINT,
908                              DataInCodeTableEnd, LOHSize);
909 
910   // Write the symbol table load command, if used.
911   if (NumSymbols) {
912     unsigned FirstLocalSymbol = 0;
913     unsigned NumLocalSymbols = LocalSymbolData.size();
914     unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols;
915     unsigned NumExternalSymbols = ExternalSymbolData.size();
916     unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols;
917     unsigned NumUndefinedSymbols = UndefinedSymbolData.size();
918     unsigned NumIndirectSymbols = Asm.indirect_symbol_size();
919     unsigned NumSymTabSymbols =
920       NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols;
921     uint64_t IndirectSymbolSize = NumIndirectSymbols * 4;
922     uint64_t IndirectSymbolOffset = 0;
923 
924     // If used, the indirect symbols are written after the section data.
925     if (NumIndirectSymbols)
926       IndirectSymbolOffset = LOHTableEnd;
927 
928     // The symbol table is written after the indirect symbol data.
929     uint64_t SymbolTableOffset = LOHTableEnd + IndirectSymbolSize;
930 
931     // The string table is written after symbol table.
932     uint64_t StringTableOffset =
933       SymbolTableOffset + NumSymTabSymbols * (is64Bit() ?
934                                               sizeof(MachO::nlist_64) :
935                                               sizeof(MachO::nlist));
936     writeSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols,
937                            StringTableOffset, StringTable.getSize());
938 
939     writeDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols,
940                              FirstExternalSymbol, NumExternalSymbols,
941                              FirstUndefinedSymbol, NumUndefinedSymbols,
942                              IndirectSymbolOffset, NumIndirectSymbols);
943   }
944 
945   // Write the linker options load commands.
946   for (const auto &Option : Asm.getLinkerOptions())
947     writeLinkerOptionsLoadCommand(Option);
948 
949   // Write the actual section data.
950   for (const MCSection &Sec : Asm) {
951     Asm.writeSectionData(W.OS, &Sec, Layout);
952 
953     uint64_t Pad = getPaddingSize(&Sec, Layout);
954     W.OS.write_zeros(Pad);
955   }
956 
957   // Write the extra padding.
958   W.OS.write_zeros(SectionDataPadding);
959 
960   // Write the relocation entries.
961   for (const MCSection &Sec : Asm) {
962     // Write the section relocation entries, in reverse order to match 'as'
963     // (approximately, the exact algorithm is more complicated than this).
964     std::vector<RelAndSymbol> &Relocs = Relocations[&Sec];
965     for (const RelAndSymbol &Rel : make_range(Relocs.rbegin(), Relocs.rend())) {
966       W.write<uint32_t>(Rel.MRE.r_word0);
967       W.write<uint32_t>(Rel.MRE.r_word1);
968     }
969   }
970 
971   // Write out the data-in-code region payload, if there is one.
972   for (MCAssembler::const_data_region_iterator
973          it = Asm.data_region_begin(), ie = Asm.data_region_end();
974          it != ie; ++it) {
975     const DataRegionData *Data = &(*it);
976     uint64_t Start = getSymbolAddress(*Data->Start, Layout);
977     uint64_t End;
978     if (Data->End)
979       End = getSymbolAddress(*Data->End, Layout);
980     else
981       report_fatal_error("Data region not terminated");
982 
983     LLVM_DEBUG(dbgs() << "data in code region-- kind: " << Data->Kind
984                       << "  start: " << Start << "(" << Data->Start->getName()
985                       << ")"
986                       << "  end: " << End << "(" << Data->End->getName() << ")"
987                       << "  size: " << End - Start << "\n");
988     W.write<uint32_t>(Start);
989     W.write<uint16_t>(End - Start);
990     W.write<uint16_t>(Data->Kind);
991   }
992 
993   // Write out the loh commands, if there is one.
994   if (LOHSize) {
995 #ifndef NDEBUG
996     unsigned Start = W.OS.tell();
997 #endif
998     Asm.getLOHContainer().emit(*this, Layout);
999     // Pad to a multiple of the pointer size.
1000     W.OS.write_zeros(OffsetToAlignment(LOHRawSize, is64Bit() ? 8 : 4));
1001     assert(W.OS.tell() - Start == LOHSize);
1002   }
1003 
1004   // Write the symbol table data, if used.
1005   if (NumSymbols) {
1006     // Write the indirect symbol entries.
1007     for (MCAssembler::const_indirect_symbol_iterator
1008            it = Asm.indirect_symbol_begin(),
1009            ie = Asm.indirect_symbol_end(); it != ie; ++it) {
1010       // Indirect symbols in the non-lazy symbol pointer section have some
1011       // special handling.
1012       const MCSectionMachO &Section =
1013           static_cast<const MCSectionMachO &>(*it->Section);
1014       if (Section.getType() == MachO::S_NON_LAZY_SYMBOL_POINTERS) {
1015         // If this symbol is defined and internal, mark it as such.
1016         if (it->Symbol->isDefined() && !it->Symbol->isExternal()) {
1017           uint32_t Flags = MachO::INDIRECT_SYMBOL_LOCAL;
1018           if (it->Symbol->isAbsolute())
1019             Flags |= MachO::INDIRECT_SYMBOL_ABS;
1020           W.write<uint32_t>(Flags);
1021           continue;
1022         }
1023       }
1024 
1025       W.write<uint32_t>(it->Symbol->getIndex());
1026     }
1027 
1028     // FIXME: Check that offsets match computed ones.
1029 
1030     // Write the symbol table entries.
1031     for (auto *SymbolData :
1032          {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData})
1033       for (MachSymbolData &Entry : *SymbolData)
1034         writeNlist(Entry, Layout);
1035 
1036     // Write the string table.
1037     StringTable.write(W.OS);
1038   }
1039 
1040   return W.OS.tell() - StartOffset;
1041 }
1042 
1043 std::unique_ptr<MCObjectWriter>
1044 llvm::createMachObjectWriter(std::unique_ptr<MCMachObjectTargetWriter> MOTW,
1045                              raw_pwrite_stream &OS, bool IsLittleEndian) {
1046   return llvm::make_unique<MachObjectWriter>(std::move(MOTW), OS,
1047                                              IsLittleEndian);
1048 }
1049