xref: /freebsd/contrib/llvm-project/llvm/lib/MC/MachObjectWriter.cpp (revision d5b0e70f7e04d971691517ce1304d86a1e367e2e)
1 //===- lib/MC/MachObjectWriter.cpp - Mach-O File Writer -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/ADT/DenseMap.h"
10 #include "llvm/ADT/Twine.h"
11 #include "llvm/ADT/iterator_range.h"
12 #include "llvm/BinaryFormat/MachO.h"
13 #include "llvm/MC/MCAsmBackend.h"
14 #include "llvm/MC/MCAsmLayout.h"
15 #include "llvm/MC/MCAssembler.h"
16 #include "llvm/MC/MCContext.h"
17 #include "llvm/MC/MCDirectives.h"
18 #include "llvm/MC/MCExpr.h"
19 #include "llvm/MC/MCFixupKindInfo.h"
20 #include "llvm/MC/MCFragment.h"
21 #include "llvm/MC/MCMachObjectWriter.h"
22 #include "llvm/MC/MCObjectWriter.h"
23 #include "llvm/MC/MCSection.h"
24 #include "llvm/MC/MCSectionMachO.h"
25 #include "llvm/MC/MCSymbol.h"
26 #include "llvm/MC/MCSymbolMachO.h"
27 #include "llvm/MC/MCValue.h"
28 #include "llvm/Support/Alignment.h"
29 #include "llvm/Support/Casting.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/Support/MathExtras.h"
33 #include "llvm/Support/raw_ostream.h"
34 #include <algorithm>
35 #include <cassert>
36 #include <cstdint>
37 #include <string>
38 #include <utility>
39 #include <vector>
40 
41 using namespace llvm;
42 
43 #define DEBUG_TYPE "mc"
44 
45 void MachObjectWriter::reset() {
46   Relocations.clear();
47   IndirectSymBase.clear();
48   StringTable.clear();
49   LocalSymbolData.clear();
50   ExternalSymbolData.clear();
51   UndefinedSymbolData.clear();
52   MCObjectWriter::reset();
53 }
54 
55 bool MachObjectWriter::doesSymbolRequireExternRelocation(const MCSymbol &S) {
56   // Undefined symbols are always extern.
57   if (S.isUndefined())
58     return true;
59 
60   // References to weak definitions require external relocation entries; the
61   // definition may not always be the one in the same object file.
62   if (cast<MCSymbolMachO>(S).isWeakDefinition())
63     return true;
64 
65   // Otherwise, we can use an internal relocation.
66   return false;
67 }
68 
69 bool MachObjectWriter::
70 MachSymbolData::operator<(const MachSymbolData &RHS) const {
71   return Symbol->getName() < RHS.Symbol->getName();
72 }
73 
74 bool MachObjectWriter::isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) {
75   const MCFixupKindInfo &FKI = Asm.getBackend().getFixupKindInfo(
76     (MCFixupKind) Kind);
77 
78   return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel;
79 }
80 
81 uint64_t MachObjectWriter::getFragmentAddress(const MCFragment *Fragment,
82                                               const MCAsmLayout &Layout) const {
83   return getSectionAddress(Fragment->getParent()) +
84          Layout.getFragmentOffset(Fragment);
85 }
86 
87 uint64_t MachObjectWriter::getSymbolAddress(const MCSymbol &S,
88                                             const MCAsmLayout &Layout) const {
89   // If this is a variable, then recursively evaluate now.
90   if (S.isVariable()) {
91     if (const MCConstantExpr *C =
92           dyn_cast<const MCConstantExpr>(S.getVariableValue()))
93       return C->getValue();
94 
95     MCValue Target;
96     if (!S.getVariableValue()->evaluateAsRelocatable(Target, &Layout, nullptr))
97       report_fatal_error("unable to evaluate offset for variable '" +
98                          S.getName() + "'");
99 
100     // Verify that any used symbols are defined.
101     if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined())
102       report_fatal_error("unable to evaluate offset to undefined symbol '" +
103                          Target.getSymA()->getSymbol().getName() + "'");
104     if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined())
105       report_fatal_error("unable to evaluate offset to undefined symbol '" +
106                          Target.getSymB()->getSymbol().getName() + "'");
107 
108     uint64_t Address = Target.getConstant();
109     if (Target.getSymA())
110       Address += getSymbolAddress(Target.getSymA()->getSymbol(), Layout);
111     if (Target.getSymB())
112       Address += getSymbolAddress(Target.getSymB()->getSymbol(), Layout);
113     return Address;
114   }
115 
116   return getSectionAddress(S.getFragment()->getParent()) +
117          Layout.getSymbolOffset(S);
118 }
119 
120 uint64_t MachObjectWriter::getPaddingSize(const MCSection *Sec,
121                                           const MCAsmLayout &Layout) const {
122   uint64_t EndAddr = getSectionAddress(Sec) + Layout.getSectionAddressSize(Sec);
123   unsigned Next = Sec->getLayoutOrder() + 1;
124   if (Next >= Layout.getSectionOrder().size())
125     return 0;
126 
127   const MCSection &NextSec = *Layout.getSectionOrder()[Next];
128   if (NextSec.isVirtualSection())
129     return 0;
130   return offsetToAlignment(EndAddr, Align(NextSec.getAlignment()));
131 }
132 
133 void MachObjectWriter::writeHeader(MachO::HeaderFileType Type,
134                                    unsigned NumLoadCommands,
135                                    unsigned LoadCommandsSize,
136                                    bool SubsectionsViaSymbols) {
137   uint32_t Flags = 0;
138 
139   if (SubsectionsViaSymbols)
140     Flags |= MachO::MH_SUBSECTIONS_VIA_SYMBOLS;
141 
142   // struct mach_header (28 bytes) or
143   // struct mach_header_64 (32 bytes)
144 
145   uint64_t Start = W.OS.tell();
146   (void) Start;
147 
148   W.write<uint32_t>(is64Bit() ? MachO::MH_MAGIC_64 : MachO::MH_MAGIC);
149 
150   W.write<uint32_t>(TargetObjectWriter->getCPUType());
151   W.write<uint32_t>(TargetObjectWriter->getCPUSubtype());
152 
153   W.write<uint32_t>(Type);
154   W.write<uint32_t>(NumLoadCommands);
155   W.write<uint32_t>(LoadCommandsSize);
156   W.write<uint32_t>(Flags);
157   if (is64Bit())
158     W.write<uint32_t>(0); // reserved
159 
160   assert(W.OS.tell() - Start == (is64Bit() ? sizeof(MachO::mach_header_64)
161                                            : sizeof(MachO::mach_header)));
162 }
163 
164 void MachObjectWriter::writeWithPadding(StringRef Str, uint64_t Size) {
165   assert(Size >= Str.size());
166   W.OS << Str;
167   W.OS.write_zeros(Size - Str.size());
168 }
169 
170 /// writeSegmentLoadCommand - Write a segment load command.
171 ///
172 /// \param NumSections The number of sections in this segment.
173 /// \param SectionDataSize The total size of the sections.
174 void MachObjectWriter::writeSegmentLoadCommand(
175     StringRef Name, unsigned NumSections, uint64_t VMAddr, uint64_t VMSize,
176     uint64_t SectionDataStartOffset, uint64_t SectionDataSize, uint32_t MaxProt,
177     uint32_t InitProt) {
178   // struct segment_command (56 bytes) or
179   // struct segment_command_64 (72 bytes)
180 
181   uint64_t Start = W.OS.tell();
182   (void) Start;
183 
184   unsigned SegmentLoadCommandSize =
185     is64Bit() ? sizeof(MachO::segment_command_64):
186     sizeof(MachO::segment_command);
187   W.write<uint32_t>(is64Bit() ? MachO::LC_SEGMENT_64 : MachO::LC_SEGMENT);
188   W.write<uint32_t>(SegmentLoadCommandSize +
189           NumSections * (is64Bit() ? sizeof(MachO::section_64) :
190                          sizeof(MachO::section)));
191 
192   writeWithPadding(Name, 16);
193   if (is64Bit()) {
194     W.write<uint64_t>(VMAddr);                 // vmaddr
195     W.write<uint64_t>(VMSize); // vmsize
196     W.write<uint64_t>(SectionDataStartOffset); // file offset
197     W.write<uint64_t>(SectionDataSize); // file size
198   } else {
199     W.write<uint32_t>(VMAddr);                 // vmaddr
200     W.write<uint32_t>(VMSize); // vmsize
201     W.write<uint32_t>(SectionDataStartOffset); // file offset
202     W.write<uint32_t>(SectionDataSize); // file size
203   }
204   // maxprot
205   W.write<uint32_t>(MaxProt);
206   // initprot
207   W.write<uint32_t>(InitProt);
208   W.write<uint32_t>(NumSections);
209   W.write<uint32_t>(0); // flags
210 
211   assert(W.OS.tell() - Start == SegmentLoadCommandSize);
212 }
213 
214 void MachObjectWriter::writeSection(const MCAsmLayout &Layout,
215                                     const MCSection &Sec, uint64_t VMAddr,
216                                     uint64_t FileOffset, unsigned Flags,
217                                     uint64_t RelocationsStart,
218                                     unsigned NumRelocations) {
219   uint64_t SectionSize = Layout.getSectionAddressSize(&Sec);
220   const MCSectionMachO &Section = cast<MCSectionMachO>(Sec);
221 
222   // The offset is unused for virtual sections.
223   if (Section.isVirtualSection()) {
224     assert(Layout.getSectionFileSize(&Sec) == 0 && "Invalid file size!");
225     FileOffset = 0;
226   }
227 
228   // struct section (68 bytes) or
229   // struct section_64 (80 bytes)
230 
231   uint64_t Start = W.OS.tell();
232   (void) Start;
233 
234   writeWithPadding(Section.getName(), 16);
235   writeWithPadding(Section.getSegmentName(), 16);
236   if (is64Bit()) {
237     W.write<uint64_t>(VMAddr);      // address
238     W.write<uint64_t>(SectionSize); // size
239   } else {
240     W.write<uint32_t>(VMAddr);      // address
241     W.write<uint32_t>(SectionSize); // size
242   }
243   W.write<uint32_t>(FileOffset);
244 
245   assert(isPowerOf2_32(Section.getAlignment()) && "Invalid alignment!");
246   W.write<uint32_t>(Log2_32(Section.getAlignment()));
247   W.write<uint32_t>(NumRelocations ? RelocationsStart : 0);
248   W.write<uint32_t>(NumRelocations);
249   W.write<uint32_t>(Flags);
250   W.write<uint32_t>(IndirectSymBase.lookup(&Sec)); // reserved1
251   W.write<uint32_t>(Section.getStubSize()); // reserved2
252   if (is64Bit())
253     W.write<uint32_t>(0); // reserved3
254 
255   assert(W.OS.tell() - Start ==
256          (is64Bit() ? sizeof(MachO::section_64) : sizeof(MachO::section)));
257 }
258 
259 void MachObjectWriter::writeSymtabLoadCommand(uint32_t SymbolOffset,
260                                               uint32_t NumSymbols,
261                                               uint32_t StringTableOffset,
262                                               uint32_t StringTableSize) {
263   // struct symtab_command (24 bytes)
264 
265   uint64_t Start = W.OS.tell();
266   (void) Start;
267 
268   W.write<uint32_t>(MachO::LC_SYMTAB);
269   W.write<uint32_t>(sizeof(MachO::symtab_command));
270   W.write<uint32_t>(SymbolOffset);
271   W.write<uint32_t>(NumSymbols);
272   W.write<uint32_t>(StringTableOffset);
273   W.write<uint32_t>(StringTableSize);
274 
275   assert(W.OS.tell() - Start == sizeof(MachO::symtab_command));
276 }
277 
278 void MachObjectWriter::writeDysymtabLoadCommand(uint32_t FirstLocalSymbol,
279                                                 uint32_t NumLocalSymbols,
280                                                 uint32_t FirstExternalSymbol,
281                                                 uint32_t NumExternalSymbols,
282                                                 uint32_t FirstUndefinedSymbol,
283                                                 uint32_t NumUndefinedSymbols,
284                                                 uint32_t IndirectSymbolOffset,
285                                                 uint32_t NumIndirectSymbols) {
286   // struct dysymtab_command (80 bytes)
287 
288   uint64_t Start = W.OS.tell();
289   (void) Start;
290 
291   W.write<uint32_t>(MachO::LC_DYSYMTAB);
292   W.write<uint32_t>(sizeof(MachO::dysymtab_command));
293   W.write<uint32_t>(FirstLocalSymbol);
294   W.write<uint32_t>(NumLocalSymbols);
295   W.write<uint32_t>(FirstExternalSymbol);
296   W.write<uint32_t>(NumExternalSymbols);
297   W.write<uint32_t>(FirstUndefinedSymbol);
298   W.write<uint32_t>(NumUndefinedSymbols);
299   W.write<uint32_t>(0); // tocoff
300   W.write<uint32_t>(0); // ntoc
301   W.write<uint32_t>(0); // modtaboff
302   W.write<uint32_t>(0); // nmodtab
303   W.write<uint32_t>(0); // extrefsymoff
304   W.write<uint32_t>(0); // nextrefsyms
305   W.write<uint32_t>(IndirectSymbolOffset);
306   W.write<uint32_t>(NumIndirectSymbols);
307   W.write<uint32_t>(0); // extreloff
308   W.write<uint32_t>(0); // nextrel
309   W.write<uint32_t>(0); // locreloff
310   W.write<uint32_t>(0); // nlocrel
311 
312   assert(W.OS.tell() - Start == sizeof(MachO::dysymtab_command));
313 }
314 
315 MachObjectWriter::MachSymbolData *
316 MachObjectWriter::findSymbolData(const MCSymbol &Sym) {
317   for (auto *SymbolData :
318        {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData})
319     for (MachSymbolData &Entry : *SymbolData)
320       if (Entry.Symbol == &Sym)
321         return &Entry;
322 
323   return nullptr;
324 }
325 
326 const MCSymbol &MachObjectWriter::findAliasedSymbol(const MCSymbol &Sym) const {
327   const MCSymbol *S = &Sym;
328   while (S->isVariable()) {
329     const MCExpr *Value = S->getVariableValue();
330     const auto *Ref = dyn_cast<MCSymbolRefExpr>(Value);
331     if (!Ref)
332       return *S;
333     S = &Ref->getSymbol();
334   }
335   return *S;
336 }
337 
338 void MachObjectWriter::writeNlist(MachSymbolData &MSD,
339                                   const MCAsmLayout &Layout) {
340   const MCSymbol *Symbol = MSD.Symbol;
341   const MCSymbol &Data = *Symbol;
342   const MCSymbol *AliasedSymbol = &findAliasedSymbol(*Symbol);
343   uint8_t SectionIndex = MSD.SectionIndex;
344   uint8_t Type = 0;
345   uint64_t Address = 0;
346   bool IsAlias = Symbol != AliasedSymbol;
347 
348   const MCSymbol &OrigSymbol = *Symbol;
349   MachSymbolData *AliaseeInfo;
350   if (IsAlias) {
351     AliaseeInfo = findSymbolData(*AliasedSymbol);
352     if (AliaseeInfo)
353       SectionIndex = AliaseeInfo->SectionIndex;
354     Symbol = AliasedSymbol;
355     // FIXME: Should this update Data as well?
356   }
357 
358   // Set the N_TYPE bits. See <mach-o/nlist.h>.
359   //
360   // FIXME: Are the prebound or indirect fields possible here?
361   if (IsAlias && Symbol->isUndefined())
362     Type = MachO::N_INDR;
363   else if (Symbol->isUndefined())
364     Type = MachO::N_UNDF;
365   else if (Symbol->isAbsolute())
366     Type = MachO::N_ABS;
367   else
368     Type = MachO::N_SECT;
369 
370   // FIXME: Set STAB bits.
371 
372   if (Data.isPrivateExtern())
373     Type |= MachO::N_PEXT;
374 
375   // Set external bit.
376   if (Data.isExternal() || (!IsAlias && Symbol->isUndefined()))
377     Type |= MachO::N_EXT;
378 
379   // Compute the symbol address.
380   if (IsAlias && Symbol->isUndefined())
381     Address = AliaseeInfo->StringIndex;
382   else if (Symbol->isDefined())
383     Address = getSymbolAddress(OrigSymbol, Layout);
384   else if (Symbol->isCommon()) {
385     // Common symbols are encoded with the size in the address
386     // field, and their alignment in the flags.
387     Address = Symbol->getCommonSize();
388   }
389 
390   // struct nlist (12 bytes)
391 
392   W.write<uint32_t>(MSD.StringIndex);
393   W.OS << char(Type);
394   W.OS << char(SectionIndex);
395 
396   // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc'
397   // value.
398   bool EncodeAsAltEntry =
399     IsAlias && cast<MCSymbolMachO>(OrigSymbol).isAltEntry();
400   W.write<uint16_t>(cast<MCSymbolMachO>(Symbol)->getEncodedFlags(EncodeAsAltEntry));
401   if (is64Bit())
402     W.write<uint64_t>(Address);
403   else
404     W.write<uint32_t>(Address);
405 }
406 
407 void MachObjectWriter::writeLinkeditLoadCommand(uint32_t Type,
408                                                 uint32_t DataOffset,
409                                                 uint32_t DataSize) {
410   uint64_t Start = W.OS.tell();
411   (void) Start;
412 
413   W.write<uint32_t>(Type);
414   W.write<uint32_t>(sizeof(MachO::linkedit_data_command));
415   W.write<uint32_t>(DataOffset);
416   W.write<uint32_t>(DataSize);
417 
418   assert(W.OS.tell() - Start == sizeof(MachO::linkedit_data_command));
419 }
420 
421 static unsigned ComputeLinkerOptionsLoadCommandSize(
422   const std::vector<std::string> &Options, bool is64Bit)
423 {
424   unsigned Size = sizeof(MachO::linker_option_command);
425   for (const std::string &Option : Options)
426     Size += Option.size() + 1;
427   return alignTo(Size, is64Bit ? 8 : 4);
428 }
429 
430 void MachObjectWriter::writeLinkerOptionsLoadCommand(
431   const std::vector<std::string> &Options)
432 {
433   unsigned Size = ComputeLinkerOptionsLoadCommandSize(Options, is64Bit());
434   uint64_t Start = W.OS.tell();
435   (void) Start;
436 
437   W.write<uint32_t>(MachO::LC_LINKER_OPTION);
438   W.write<uint32_t>(Size);
439   W.write<uint32_t>(Options.size());
440   uint64_t BytesWritten = sizeof(MachO::linker_option_command);
441   for (const std::string &Option : Options) {
442     // Write each string, including the null byte.
443     W.OS << Option << '\0';
444     BytesWritten += Option.size() + 1;
445   }
446 
447   // Pad to a multiple of the pointer size.
448   W.OS.write_zeros(
449       offsetToAlignment(BytesWritten, is64Bit() ? Align(8) : Align(4)));
450 
451   assert(W.OS.tell() - Start == Size);
452 }
453 
454 static bool isFixupTargetValid(const MCValue &Target) {
455   // Target is (LHS - RHS + cst).
456   // We don't support the form where LHS is null: -RHS + cst
457   if (!Target.getSymA() && Target.getSymB())
458     return false;
459   return true;
460 }
461 
462 void MachObjectWriter::recordRelocation(MCAssembler &Asm,
463                                         const MCAsmLayout &Layout,
464                                         const MCFragment *Fragment,
465                                         const MCFixup &Fixup, MCValue Target,
466                                         uint64_t &FixedValue) {
467   if (!isFixupTargetValid(Target)) {
468     Asm.getContext().reportError(Fixup.getLoc(),
469                                  "unsupported relocation expression");
470     return;
471   }
472 
473   TargetObjectWriter->recordRelocation(this, Asm, Layout, Fragment, Fixup,
474                                        Target, FixedValue);
475 }
476 
477 void MachObjectWriter::bindIndirectSymbols(MCAssembler &Asm) {
478   // This is the point where 'as' creates actual symbols for indirect symbols
479   // (in the following two passes). It would be easier for us to do this sooner
480   // when we see the attribute, but that makes getting the order in the symbol
481   // table much more complicated than it is worth.
482   //
483   // FIXME: Revisit this when the dust settles.
484 
485   // Report errors for use of .indirect_symbol not in a symbol pointer section
486   // or stub section.
487   for (IndirectSymbolData &ISD : llvm::make_range(Asm.indirect_symbol_begin(),
488                                                   Asm.indirect_symbol_end())) {
489     const MCSectionMachO &Section = cast<MCSectionMachO>(*ISD.Section);
490 
491     if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS &&
492         Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS &&
493         Section.getType() != MachO::S_THREAD_LOCAL_VARIABLE_POINTERS &&
494         Section.getType() != MachO::S_SYMBOL_STUBS) {
495       MCSymbol &Symbol = *ISD.Symbol;
496       report_fatal_error("indirect symbol '" + Symbol.getName() +
497                          "' not in a symbol pointer or stub section");
498     }
499   }
500 
501   // Bind non-lazy symbol pointers first.
502   unsigned IndirectIndex = 0;
503   for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
504          ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
505     const MCSectionMachO &Section = cast<MCSectionMachO>(*it->Section);
506 
507     if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS &&
508         Section.getType() !=  MachO::S_THREAD_LOCAL_VARIABLE_POINTERS)
509       continue;
510 
511     // Initialize the section indirect symbol base, if necessary.
512     IndirectSymBase.insert(std::make_pair(it->Section, IndirectIndex));
513 
514     Asm.registerSymbol(*it->Symbol);
515   }
516 
517   // Then lazy symbol pointers and symbol stubs.
518   IndirectIndex = 0;
519   for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
520          ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
521     const MCSectionMachO &Section = cast<MCSectionMachO>(*it->Section);
522 
523     if (Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS &&
524         Section.getType() != MachO::S_SYMBOL_STUBS)
525       continue;
526 
527     // Initialize the section indirect symbol base, if necessary.
528     IndirectSymBase.insert(std::make_pair(it->Section, IndirectIndex));
529 
530     // Set the symbol type to undefined lazy, but only on construction.
531     //
532     // FIXME: Do not hardcode.
533     bool Created;
534     Asm.registerSymbol(*it->Symbol, &Created);
535     if (Created)
536       cast<MCSymbolMachO>(it->Symbol)->setReferenceTypeUndefinedLazy(true);
537   }
538 }
539 
540 /// computeSymbolTable - Compute the symbol table data
541 void MachObjectWriter::computeSymbolTable(
542     MCAssembler &Asm, std::vector<MachSymbolData> &LocalSymbolData,
543     std::vector<MachSymbolData> &ExternalSymbolData,
544     std::vector<MachSymbolData> &UndefinedSymbolData) {
545   // Build section lookup table.
546   DenseMap<const MCSection*, uint8_t> SectionIndexMap;
547   unsigned Index = 1;
548   for (MCAssembler::iterator it = Asm.begin(),
549          ie = Asm.end(); it != ie; ++it, ++Index)
550     SectionIndexMap[&*it] = Index;
551   assert(Index <= 256 && "Too many sections!");
552 
553   // Build the string table.
554   for (const MCSymbol &Symbol : Asm.symbols()) {
555     if (!Asm.isSymbolLinkerVisible(Symbol))
556       continue;
557 
558     StringTable.add(Symbol.getName());
559   }
560   StringTable.finalize();
561 
562   // Build the symbol arrays but only for non-local symbols.
563   //
564   // The particular order that we collect and then sort the symbols is chosen to
565   // match 'as'. Even though it doesn't matter for correctness, this is
566   // important for letting us diff .o files.
567   for (const MCSymbol &Symbol : Asm.symbols()) {
568     // Ignore non-linker visible symbols.
569     if (!Asm.isSymbolLinkerVisible(Symbol))
570       continue;
571 
572     if (!Symbol.isExternal() && !Symbol.isUndefined())
573       continue;
574 
575     MachSymbolData MSD;
576     MSD.Symbol = &Symbol;
577     MSD.StringIndex = StringTable.getOffset(Symbol.getName());
578 
579     if (Symbol.isUndefined()) {
580       MSD.SectionIndex = 0;
581       UndefinedSymbolData.push_back(MSD);
582     } else if (Symbol.isAbsolute()) {
583       MSD.SectionIndex = 0;
584       ExternalSymbolData.push_back(MSD);
585     } else {
586       MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
587       assert(MSD.SectionIndex && "Invalid section index!");
588       ExternalSymbolData.push_back(MSD);
589     }
590   }
591 
592   // Now add the data for local symbols.
593   for (const MCSymbol &Symbol : Asm.symbols()) {
594     // Ignore non-linker visible symbols.
595     if (!Asm.isSymbolLinkerVisible(Symbol))
596       continue;
597 
598     if (Symbol.isExternal() || Symbol.isUndefined())
599       continue;
600 
601     MachSymbolData MSD;
602     MSD.Symbol = &Symbol;
603     MSD.StringIndex = StringTable.getOffset(Symbol.getName());
604 
605     if (Symbol.isAbsolute()) {
606       MSD.SectionIndex = 0;
607       LocalSymbolData.push_back(MSD);
608     } else {
609       MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
610       assert(MSD.SectionIndex && "Invalid section index!");
611       LocalSymbolData.push_back(MSD);
612     }
613   }
614 
615   // External and undefined symbols are required to be in lexicographic order.
616   llvm::sort(ExternalSymbolData);
617   llvm::sort(UndefinedSymbolData);
618 
619   // Set the symbol indices.
620   Index = 0;
621   for (auto *SymbolData :
622        {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData})
623     for (MachSymbolData &Entry : *SymbolData)
624       Entry.Symbol->setIndex(Index++);
625 
626   for (const MCSection &Section : Asm) {
627     for (RelAndSymbol &Rel : Relocations[&Section]) {
628       if (!Rel.Sym)
629         continue;
630 
631       // Set the Index and the IsExtern bit.
632       unsigned Index = Rel.Sym->getIndex();
633       assert(isInt<24>(Index));
634       if (W.Endian == support::little)
635         Rel.MRE.r_word1 = (Rel.MRE.r_word1 & (~0U << 24)) | Index | (1 << 27);
636       else
637         Rel.MRE.r_word1 = (Rel.MRE.r_word1 & 0xff) | Index << 8 | (1 << 4);
638     }
639   }
640 }
641 
642 void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm,
643                                                const MCAsmLayout &Layout) {
644   uint64_t StartAddress = 0;
645   for (const MCSection *Sec : Layout.getSectionOrder()) {
646     StartAddress = alignTo(StartAddress, Sec->getAlignment());
647     SectionAddress[Sec] = StartAddress;
648     StartAddress += Layout.getSectionAddressSize(Sec);
649 
650     // Explicitly pad the section to match the alignment requirements of the
651     // following one. This is for 'gas' compatibility, it shouldn't
652     /// strictly be necessary.
653     StartAddress += getPaddingSize(Sec, Layout);
654   }
655 }
656 
657 void MachObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
658                                                 const MCAsmLayout &Layout) {
659   computeSectionAddresses(Asm, Layout);
660 
661   // Create symbol data for any indirect symbols.
662   bindIndirectSymbols(Asm);
663 }
664 
665 bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
666     const MCAssembler &Asm, const MCSymbol &A, const MCSymbol &B,
667     bool InSet) const {
668   // FIXME: We don't handle things like
669   // foo = .
670   // creating atoms.
671   if (A.isVariable() || B.isVariable())
672     return false;
673   return MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(Asm, A, B,
674                                                                 InSet);
675 }
676 
677 bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
678     const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB,
679     bool InSet, bool IsPCRel) const {
680   if (InSet)
681     return true;
682 
683   // The effective address is
684   //     addr(atom(A)) + offset(A)
685   //   - addr(atom(B)) - offset(B)
686   // and the offsets are not relocatable, so the fixup is fully resolved when
687   //  addr(atom(A)) - addr(atom(B)) == 0.
688   const MCSymbol &SA = findAliasedSymbol(SymA);
689   const MCSection &SecA = SA.getSection();
690   const MCSection &SecB = *FB.getParent();
691 
692   if (IsPCRel) {
693     // The simple (Darwin, except on x86_64) way of dealing with this was to
694     // assume that any reference to a temporary symbol *must* be a temporary
695     // symbol in the same atom, unless the sections differ. Therefore, any PCrel
696     // relocation to a temporary symbol (in the same section) is fully
697     // resolved. This also works in conjunction with absolutized .set, which
698     // requires the compiler to use .set to absolutize the differences between
699     // symbols which the compiler knows to be assembly time constants, so we
700     // don't need to worry about considering symbol differences fully resolved.
701     //
702     // If the file isn't using sub-sections-via-symbols, we can make the
703     // same assumptions about any symbol that we normally make about
704     // assembler locals.
705 
706     bool hasReliableSymbolDifference = isX86_64();
707     if (!hasReliableSymbolDifference) {
708       if (!SA.isInSection() || &SecA != &SecB ||
709           (!SA.isTemporary() && FB.getAtom() != SA.getFragment()->getAtom() &&
710            Asm.getSubsectionsViaSymbols()))
711         return false;
712       return true;
713     }
714     // For Darwin x86_64, there is one special case when the reference IsPCRel.
715     // If the fragment with the reference does not have a base symbol but meets
716     // the simple way of dealing with this, in that it is a temporary symbol in
717     // the same atom then it is assumed to be fully resolved.  This is needed so
718     // a relocation entry is not created and so the static linker does not
719     // mess up the reference later.
720     else if(!FB.getAtom() &&
721             SA.isTemporary() && SA.isInSection() && &SecA == &SecB){
722       return true;
723     }
724   }
725 
726   // If they are not in the same section, we can't compute the diff.
727   if (&SecA != &SecB)
728     return false;
729 
730   const MCFragment *FA = SA.getFragment();
731 
732   // Bail if the symbol has no fragment.
733   if (!FA)
734     return false;
735 
736   // If the atoms are the same, they are guaranteed to have the same address.
737   if (FA->getAtom() == FB.getAtom())
738     return true;
739 
740   // Otherwise, we can't prove this is fully resolved.
741   return false;
742 }
743 
744 static MachO::LoadCommandType getLCFromMCVM(MCVersionMinType Type) {
745   switch (Type) {
746   case MCVM_OSXVersionMin:     return MachO::LC_VERSION_MIN_MACOSX;
747   case MCVM_IOSVersionMin:     return MachO::LC_VERSION_MIN_IPHONEOS;
748   case MCVM_TvOSVersionMin:    return MachO::LC_VERSION_MIN_TVOS;
749   case MCVM_WatchOSVersionMin: return MachO::LC_VERSION_MIN_WATCHOS;
750   }
751   llvm_unreachable("Invalid mc version min type");
752 }
753 
754 uint64_t MachObjectWriter::writeObject(MCAssembler &Asm,
755                                        const MCAsmLayout &Layout) {
756   uint64_t StartOffset = W.OS.tell();
757 
758   // Compute symbol table information and bind symbol indices.
759   computeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData,
760                      UndefinedSymbolData);
761 
762   if (!Asm.CGProfile.empty()) {
763     MCSection *CGProfileSection = Asm.getContext().getMachOSection(
764         "__LLVM", "__cg_profile", 0, SectionKind::getMetadata());
765     MCDataFragment *Frag = dyn_cast_or_null<MCDataFragment>(
766         &*CGProfileSection->getFragmentList().begin());
767     assert(Frag && "call graph profile section not reserved");
768     Frag->getContents().clear();
769     raw_svector_ostream OS(Frag->getContents());
770     for (const MCAssembler::CGProfileEntry &CGPE : Asm.CGProfile) {
771       uint32_t FromIndex = CGPE.From->getSymbol().getIndex();
772       uint32_t ToIndex = CGPE.To->getSymbol().getIndex();
773       support::endian::write(OS, FromIndex, W.Endian);
774       support::endian::write(OS, ToIndex, W.Endian);
775       support::endian::write(OS, CGPE.Count, W.Endian);
776     }
777   }
778 
779   unsigned NumSections = Asm.size();
780   const MCAssembler::VersionInfoType &VersionInfo =
781     Layout.getAssembler().getVersionInfo();
782 
783   // The section data starts after the header, the segment load command (and
784   // section headers) and the symbol table.
785   unsigned NumLoadCommands = 1;
786   uint64_t LoadCommandsSize = is64Bit() ?
787     sizeof(MachO::segment_command_64) + NumSections * sizeof(MachO::section_64):
788     sizeof(MachO::segment_command) + NumSections * sizeof(MachO::section);
789 
790   // Add the deployment target version info load command size, if used.
791   if (VersionInfo.Major != 0) {
792     ++NumLoadCommands;
793     if (VersionInfo.EmitBuildVersion)
794       LoadCommandsSize += sizeof(MachO::build_version_command);
795     else
796       LoadCommandsSize += sizeof(MachO::version_min_command);
797   }
798 
799   const MCAssembler::VersionInfoType &TargetVariantVersionInfo =
800       Layout.getAssembler().getDarwinTargetVariantVersionInfo();
801 
802   // Add the target variant version info load command size, if used.
803   if (TargetVariantVersionInfo.Major != 0) {
804     ++NumLoadCommands;
805     assert(TargetVariantVersionInfo.EmitBuildVersion &&
806            "target variant should use build version");
807     LoadCommandsSize += sizeof(MachO::build_version_command);
808   }
809 
810   // Add the data-in-code load command size, if used.
811   unsigned NumDataRegions = Asm.getDataRegions().size();
812   if (NumDataRegions) {
813     ++NumLoadCommands;
814     LoadCommandsSize += sizeof(MachO::linkedit_data_command);
815   }
816 
817   // Add the loh load command size, if used.
818   uint64_t LOHRawSize = Asm.getLOHContainer().getEmitSize(*this, Layout);
819   uint64_t LOHSize = alignTo(LOHRawSize, is64Bit() ? 8 : 4);
820   if (LOHSize) {
821     ++NumLoadCommands;
822     LoadCommandsSize += sizeof(MachO::linkedit_data_command);
823   }
824 
825   // Add the symbol table load command sizes, if used.
826   unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() +
827     UndefinedSymbolData.size();
828   if (NumSymbols) {
829     NumLoadCommands += 2;
830     LoadCommandsSize += (sizeof(MachO::symtab_command) +
831                          sizeof(MachO::dysymtab_command));
832   }
833 
834   // Add the linker option load commands sizes.
835   for (const auto &Option : Asm.getLinkerOptions()) {
836     ++NumLoadCommands;
837     LoadCommandsSize += ComputeLinkerOptionsLoadCommandSize(Option, is64Bit());
838   }
839 
840   // Compute the total size of the section data, as well as its file size and vm
841   // size.
842   uint64_t SectionDataStart = (is64Bit() ? sizeof(MachO::mach_header_64) :
843                                sizeof(MachO::mach_header)) + LoadCommandsSize;
844   uint64_t SectionDataSize = 0;
845   uint64_t SectionDataFileSize = 0;
846   uint64_t VMSize = 0;
847   for (const MCSection &Sec : Asm) {
848     uint64_t Address = getSectionAddress(&Sec);
849     uint64_t Size = Layout.getSectionAddressSize(&Sec);
850     uint64_t FileSize = Layout.getSectionFileSize(&Sec);
851     FileSize += getPaddingSize(&Sec, Layout);
852 
853     VMSize = std::max(VMSize, Address + Size);
854 
855     if (Sec.isVirtualSection())
856       continue;
857 
858     SectionDataSize = std::max(SectionDataSize, Address + Size);
859     SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize);
860   }
861 
862   // The section data is padded to pointer size bytes.
863   //
864   // FIXME: Is this machine dependent?
865   unsigned SectionDataPadding =
866       offsetToAlignment(SectionDataFileSize, is64Bit() ? Align(8) : Align(4));
867   SectionDataFileSize += SectionDataPadding;
868 
869   // Write the prolog, starting with the header and load command...
870   writeHeader(MachO::MH_OBJECT, NumLoadCommands, LoadCommandsSize,
871               Asm.getSubsectionsViaSymbols());
872   uint32_t Prot =
873       MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE;
874   writeSegmentLoadCommand("", NumSections, 0, VMSize, SectionDataStart,
875                           SectionDataSize, Prot, Prot);
876 
877   // ... and then the section headers.
878   uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
879   for (const MCSection &Section : Asm) {
880     const auto &Sec = cast<MCSectionMachO>(Section);
881     std::vector<RelAndSymbol> &Relocs = Relocations[&Sec];
882     unsigned NumRelocs = Relocs.size();
883     uint64_t SectionStart = SectionDataStart + getSectionAddress(&Sec);
884     unsigned Flags = Sec.getTypeAndAttributes();
885     if (Sec.hasInstructions())
886       Flags |= MachO::S_ATTR_SOME_INSTRUCTIONS;
887     writeSection(Layout, Sec, getSectionAddress(&Sec), SectionStart, Flags,
888                  RelocTableEnd, NumRelocs);
889     RelocTableEnd += NumRelocs * sizeof(MachO::any_relocation_info);
890   }
891 
892   // Write out the deployment target information, if it's available.
893   auto EmitDeploymentTargetVersion =
894       [&](const MCAssembler::VersionInfoType &VersionInfo) {
895         auto EncodeVersion = [](VersionTuple V) -> uint32_t {
896           assert(!V.empty() && "empty version");
897           unsigned Update = V.getSubminor().getValueOr(0);
898           unsigned Minor = V.getMinor().getValueOr(0);
899           assert(Update < 256 && "unencodable update target version");
900           assert(Minor < 256 && "unencodable minor target version");
901           assert(V.getMajor() < 65536 && "unencodable major target version");
902           return Update | (Minor << 8) | (V.getMajor() << 16);
903         };
904         uint32_t EncodedVersion = EncodeVersion(VersionTuple(
905             VersionInfo.Major, VersionInfo.Minor, VersionInfo.Update));
906         uint32_t SDKVersion = !VersionInfo.SDKVersion.empty()
907                                   ? EncodeVersion(VersionInfo.SDKVersion)
908                                   : 0;
909         if (VersionInfo.EmitBuildVersion) {
910           // FIXME: Currently empty tools. Add clang version in the future.
911           W.write<uint32_t>(MachO::LC_BUILD_VERSION);
912           W.write<uint32_t>(sizeof(MachO::build_version_command));
913           W.write<uint32_t>(VersionInfo.TypeOrPlatform.Platform);
914           W.write<uint32_t>(EncodedVersion);
915           W.write<uint32_t>(SDKVersion);
916           W.write<uint32_t>(0); // Empty tools list.
917         } else {
918           MachO::LoadCommandType LCType =
919               getLCFromMCVM(VersionInfo.TypeOrPlatform.Type);
920           W.write<uint32_t>(LCType);
921           W.write<uint32_t>(sizeof(MachO::version_min_command));
922           W.write<uint32_t>(EncodedVersion);
923           W.write<uint32_t>(SDKVersion);
924         }
925       };
926   if (VersionInfo.Major != 0)
927     EmitDeploymentTargetVersion(VersionInfo);
928   if (TargetVariantVersionInfo.Major != 0)
929     EmitDeploymentTargetVersion(TargetVariantVersionInfo);
930 
931   // Write the data-in-code load command, if used.
932   uint64_t DataInCodeTableEnd = RelocTableEnd + NumDataRegions * 8;
933   if (NumDataRegions) {
934     uint64_t DataRegionsOffset = RelocTableEnd;
935     uint64_t DataRegionsSize = NumDataRegions * 8;
936     writeLinkeditLoadCommand(MachO::LC_DATA_IN_CODE, DataRegionsOffset,
937                              DataRegionsSize);
938   }
939 
940   // Write the loh load command, if used.
941   uint64_t LOHTableEnd = DataInCodeTableEnd + LOHSize;
942   if (LOHSize)
943     writeLinkeditLoadCommand(MachO::LC_LINKER_OPTIMIZATION_HINT,
944                              DataInCodeTableEnd, LOHSize);
945 
946   // Write the symbol table load command, if used.
947   if (NumSymbols) {
948     unsigned FirstLocalSymbol = 0;
949     unsigned NumLocalSymbols = LocalSymbolData.size();
950     unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols;
951     unsigned NumExternalSymbols = ExternalSymbolData.size();
952     unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols;
953     unsigned NumUndefinedSymbols = UndefinedSymbolData.size();
954     unsigned NumIndirectSymbols = Asm.indirect_symbol_size();
955     unsigned NumSymTabSymbols =
956       NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols;
957     uint64_t IndirectSymbolSize = NumIndirectSymbols * 4;
958     uint64_t IndirectSymbolOffset = 0;
959 
960     // If used, the indirect symbols are written after the section data.
961     if (NumIndirectSymbols)
962       IndirectSymbolOffset = LOHTableEnd;
963 
964     // The symbol table is written after the indirect symbol data.
965     uint64_t SymbolTableOffset = LOHTableEnd + IndirectSymbolSize;
966 
967     // The string table is written after symbol table.
968     uint64_t StringTableOffset =
969       SymbolTableOffset + NumSymTabSymbols * (is64Bit() ?
970                                               sizeof(MachO::nlist_64) :
971                                               sizeof(MachO::nlist));
972     writeSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols,
973                            StringTableOffset, StringTable.getSize());
974 
975     writeDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols,
976                              FirstExternalSymbol, NumExternalSymbols,
977                              FirstUndefinedSymbol, NumUndefinedSymbols,
978                              IndirectSymbolOffset, NumIndirectSymbols);
979   }
980 
981   // Write the linker options load commands.
982   for (const auto &Option : Asm.getLinkerOptions())
983     writeLinkerOptionsLoadCommand(Option);
984 
985   // Write the actual section data.
986   for (const MCSection &Sec : Asm) {
987     Asm.writeSectionData(W.OS, &Sec, Layout);
988 
989     uint64_t Pad = getPaddingSize(&Sec, Layout);
990     W.OS.write_zeros(Pad);
991   }
992 
993   // Write the extra padding.
994   W.OS.write_zeros(SectionDataPadding);
995 
996   // Write the relocation entries.
997   for (const MCSection &Sec : Asm) {
998     // Write the section relocation entries, in reverse order to match 'as'
999     // (approximately, the exact algorithm is more complicated than this).
1000     std::vector<RelAndSymbol> &Relocs = Relocations[&Sec];
1001     for (const RelAndSymbol &Rel : llvm::reverse(Relocs)) {
1002       W.write<uint32_t>(Rel.MRE.r_word0);
1003       W.write<uint32_t>(Rel.MRE.r_word1);
1004     }
1005   }
1006 
1007   // Write out the data-in-code region payload, if there is one.
1008   for (MCAssembler::const_data_region_iterator
1009          it = Asm.data_region_begin(), ie = Asm.data_region_end();
1010          it != ie; ++it) {
1011     const DataRegionData *Data = &(*it);
1012     uint64_t Start = getSymbolAddress(*Data->Start, Layout);
1013     uint64_t End;
1014     if (Data->End)
1015       End = getSymbolAddress(*Data->End, Layout);
1016     else
1017       report_fatal_error("Data region not terminated");
1018 
1019     LLVM_DEBUG(dbgs() << "data in code region-- kind: " << Data->Kind
1020                       << "  start: " << Start << "(" << Data->Start->getName()
1021                       << ")"
1022                       << "  end: " << End << "(" << Data->End->getName() << ")"
1023                       << "  size: " << End - Start << "\n");
1024     W.write<uint32_t>(Start);
1025     W.write<uint16_t>(End - Start);
1026     W.write<uint16_t>(Data->Kind);
1027   }
1028 
1029   // Write out the loh commands, if there is one.
1030   if (LOHSize) {
1031 #ifndef NDEBUG
1032     unsigned Start = W.OS.tell();
1033 #endif
1034     Asm.getLOHContainer().emit(*this, Layout);
1035     // Pad to a multiple of the pointer size.
1036     W.OS.write_zeros(
1037         offsetToAlignment(LOHRawSize, is64Bit() ? Align(8) : Align(4)));
1038     assert(W.OS.tell() - Start == LOHSize);
1039   }
1040 
1041   // Write the symbol table data, if used.
1042   if (NumSymbols) {
1043     // Write the indirect symbol entries.
1044     for (MCAssembler::const_indirect_symbol_iterator
1045            it = Asm.indirect_symbol_begin(),
1046            ie = Asm.indirect_symbol_end(); it != ie; ++it) {
1047       // Indirect symbols in the non-lazy symbol pointer section have some
1048       // special handling.
1049       const MCSectionMachO &Section =
1050           static_cast<const MCSectionMachO &>(*it->Section);
1051       if (Section.getType() == MachO::S_NON_LAZY_SYMBOL_POINTERS) {
1052         // If this symbol is defined and internal, mark it as such.
1053         if (it->Symbol->isDefined() && !it->Symbol->isExternal()) {
1054           uint32_t Flags = MachO::INDIRECT_SYMBOL_LOCAL;
1055           if (it->Symbol->isAbsolute())
1056             Flags |= MachO::INDIRECT_SYMBOL_ABS;
1057           W.write<uint32_t>(Flags);
1058           continue;
1059         }
1060       }
1061 
1062       W.write<uint32_t>(it->Symbol->getIndex());
1063     }
1064 
1065     // FIXME: Check that offsets match computed ones.
1066 
1067     // Write the symbol table entries.
1068     for (auto *SymbolData :
1069          {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData})
1070       for (MachSymbolData &Entry : *SymbolData)
1071         writeNlist(Entry, Layout);
1072 
1073     // Write the string table.
1074     StringTable.write(W.OS);
1075   }
1076 
1077   return W.OS.tell() - StartOffset;
1078 }
1079 
1080 std::unique_ptr<MCObjectWriter>
1081 llvm::createMachObjectWriter(std::unique_ptr<MCMachObjectTargetWriter> MOTW,
1082                              raw_pwrite_stream &OS, bool IsLittleEndian) {
1083   return std::make_unique<MachObjectWriter>(std::move(MOTW), OS,
1084                                              IsLittleEndian);
1085 }
1086