xref: /freebsd/contrib/llvm-project/llvm/lib/MC/MachObjectWriter.cpp (revision 4fbb9c43aa44d9145151bb5f77d302ba01fb7551)
1 //===- lib/MC/MachObjectWriter.cpp - Mach-O File Writer -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/ADT/DenseMap.h"
10 #include "llvm/ADT/Twine.h"
11 #include "llvm/ADT/iterator_range.h"
12 #include "llvm/BinaryFormat/MachO.h"
13 #include "llvm/MC/MCAsmBackend.h"
14 #include "llvm/MC/MCAsmLayout.h"
15 #include "llvm/MC/MCAssembler.h"
16 #include "llvm/MC/MCContext.h"
17 #include "llvm/MC/MCDirectives.h"
18 #include "llvm/MC/MCExpr.h"
19 #include "llvm/MC/MCFixupKindInfo.h"
20 #include "llvm/MC/MCFragment.h"
21 #include "llvm/MC/MCMachObjectWriter.h"
22 #include "llvm/MC/MCObjectFileInfo.h"
23 #include "llvm/MC/MCObjectWriter.h"
24 #include "llvm/MC/MCSection.h"
25 #include "llvm/MC/MCSectionMachO.h"
26 #include "llvm/MC/MCSymbol.h"
27 #include "llvm/MC/MCSymbolMachO.h"
28 #include "llvm/MC/MCValue.h"
29 #include "llvm/Support/Alignment.h"
30 #include "llvm/Support/Casting.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/LEB128.h"
34 #include "llvm/Support/MathExtras.h"
35 #include "llvm/Support/raw_ostream.h"
36 #include <algorithm>
37 #include <cassert>
38 #include <cstdint>
39 #include <string>
40 #include <utility>
41 #include <vector>
42 
43 using namespace llvm;
44 
45 #define DEBUG_TYPE "mc"
46 
47 void MachObjectWriter::reset() {
48   Relocations.clear();
49   IndirectSymBase.clear();
50   StringTable.clear();
51   LocalSymbolData.clear();
52   ExternalSymbolData.clear();
53   UndefinedSymbolData.clear();
54   MCObjectWriter::reset();
55 }
56 
57 bool MachObjectWriter::doesSymbolRequireExternRelocation(const MCSymbol &S) {
58   // Undefined symbols are always extern.
59   if (S.isUndefined())
60     return true;
61 
62   // References to weak definitions require external relocation entries; the
63   // definition may not always be the one in the same object file.
64   if (cast<MCSymbolMachO>(S).isWeakDefinition())
65     return true;
66 
67   // Otherwise, we can use an internal relocation.
68   return false;
69 }
70 
71 bool MachObjectWriter::
72 MachSymbolData::operator<(const MachSymbolData &RHS) const {
73   return Symbol->getName() < RHS.Symbol->getName();
74 }
75 
76 bool MachObjectWriter::isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) {
77   const MCFixupKindInfo &FKI = Asm.getBackend().getFixupKindInfo(
78     (MCFixupKind) Kind);
79 
80   return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel;
81 }
82 
83 uint64_t MachObjectWriter::getFragmentAddress(const MCFragment *Fragment,
84                                               const MCAsmLayout &Layout) const {
85   return getSectionAddress(Fragment->getParent()) +
86          Layout.getFragmentOffset(Fragment);
87 }
88 
89 uint64_t MachObjectWriter::getSymbolAddress(const MCSymbol &S,
90                                             const MCAsmLayout &Layout) const {
91   // If this is a variable, then recursively evaluate now.
92   if (S.isVariable()) {
93     if (const MCConstantExpr *C =
94           dyn_cast<const MCConstantExpr>(S.getVariableValue()))
95       return C->getValue();
96 
97     MCValue Target;
98     if (!S.getVariableValue()->evaluateAsRelocatable(Target, &Layout, nullptr))
99       report_fatal_error("unable to evaluate offset for variable '" +
100                          S.getName() + "'");
101 
102     // Verify that any used symbols are defined.
103     if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined())
104       report_fatal_error("unable to evaluate offset to undefined symbol '" +
105                          Target.getSymA()->getSymbol().getName() + "'");
106     if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined())
107       report_fatal_error("unable to evaluate offset to undefined symbol '" +
108                          Target.getSymB()->getSymbol().getName() + "'");
109 
110     uint64_t Address = Target.getConstant();
111     if (Target.getSymA())
112       Address += getSymbolAddress(Target.getSymA()->getSymbol(), Layout);
113     if (Target.getSymB())
114       Address += getSymbolAddress(Target.getSymB()->getSymbol(), Layout);
115     return Address;
116   }
117 
118   return getSectionAddress(S.getFragment()->getParent()) +
119          Layout.getSymbolOffset(S);
120 }
121 
122 uint64_t MachObjectWriter::getPaddingSize(const MCSection *Sec,
123                                           const MCAsmLayout &Layout) const {
124   uint64_t EndAddr = getSectionAddress(Sec) + Layout.getSectionAddressSize(Sec);
125   unsigned Next = Sec->getLayoutOrder() + 1;
126   if (Next >= Layout.getSectionOrder().size())
127     return 0;
128 
129   const MCSection &NextSec = *Layout.getSectionOrder()[Next];
130   if (NextSec.isVirtualSection())
131     return 0;
132   return offsetToAlignment(EndAddr, NextSec.getAlign());
133 }
134 
135 void MachObjectWriter::writeHeader(MachO::HeaderFileType Type,
136                                    unsigned NumLoadCommands,
137                                    unsigned LoadCommandsSize,
138                                    bool SubsectionsViaSymbols) {
139   uint32_t Flags = 0;
140 
141   if (SubsectionsViaSymbols)
142     Flags |= MachO::MH_SUBSECTIONS_VIA_SYMBOLS;
143 
144   // struct mach_header (28 bytes) or
145   // struct mach_header_64 (32 bytes)
146 
147   uint64_t Start = W.OS.tell();
148   (void) Start;
149 
150   W.write<uint32_t>(is64Bit() ? MachO::MH_MAGIC_64 : MachO::MH_MAGIC);
151 
152   W.write<uint32_t>(TargetObjectWriter->getCPUType());
153   W.write<uint32_t>(TargetObjectWriter->getCPUSubtype());
154 
155   W.write<uint32_t>(Type);
156   W.write<uint32_t>(NumLoadCommands);
157   W.write<uint32_t>(LoadCommandsSize);
158   W.write<uint32_t>(Flags);
159   if (is64Bit())
160     W.write<uint32_t>(0); // reserved
161 
162   assert(W.OS.tell() - Start == (is64Bit() ? sizeof(MachO::mach_header_64)
163                                            : sizeof(MachO::mach_header)));
164 }
165 
166 void MachObjectWriter::writeWithPadding(StringRef Str, uint64_t Size) {
167   assert(Size >= Str.size());
168   W.OS << Str;
169   W.OS.write_zeros(Size - Str.size());
170 }
171 
172 /// writeSegmentLoadCommand - Write a segment load command.
173 ///
174 /// \param NumSections The number of sections in this segment.
175 /// \param SectionDataSize The total size of the sections.
176 void MachObjectWriter::writeSegmentLoadCommand(
177     StringRef Name, unsigned NumSections, uint64_t VMAddr, uint64_t VMSize,
178     uint64_t SectionDataStartOffset, uint64_t SectionDataSize, uint32_t MaxProt,
179     uint32_t InitProt) {
180   // struct segment_command (56 bytes) or
181   // struct segment_command_64 (72 bytes)
182 
183   uint64_t Start = W.OS.tell();
184   (void) Start;
185 
186   unsigned SegmentLoadCommandSize =
187     is64Bit() ? sizeof(MachO::segment_command_64):
188     sizeof(MachO::segment_command);
189   W.write<uint32_t>(is64Bit() ? MachO::LC_SEGMENT_64 : MachO::LC_SEGMENT);
190   W.write<uint32_t>(SegmentLoadCommandSize +
191           NumSections * (is64Bit() ? sizeof(MachO::section_64) :
192                          sizeof(MachO::section)));
193 
194   writeWithPadding(Name, 16);
195   if (is64Bit()) {
196     W.write<uint64_t>(VMAddr);                 // vmaddr
197     W.write<uint64_t>(VMSize); // vmsize
198     W.write<uint64_t>(SectionDataStartOffset); // file offset
199     W.write<uint64_t>(SectionDataSize); // file size
200   } else {
201     W.write<uint32_t>(VMAddr);                 // vmaddr
202     W.write<uint32_t>(VMSize); // vmsize
203     W.write<uint32_t>(SectionDataStartOffset); // file offset
204     W.write<uint32_t>(SectionDataSize); // file size
205   }
206   // maxprot
207   W.write<uint32_t>(MaxProt);
208   // initprot
209   W.write<uint32_t>(InitProt);
210   W.write<uint32_t>(NumSections);
211   W.write<uint32_t>(0); // flags
212 
213   assert(W.OS.tell() - Start == SegmentLoadCommandSize);
214 }
215 
216 void MachObjectWriter::writeSection(const MCAsmLayout &Layout,
217                                     const MCSection &Sec, uint64_t VMAddr,
218                                     uint64_t FileOffset, unsigned Flags,
219                                     uint64_t RelocationsStart,
220                                     unsigned NumRelocations) {
221   uint64_t SectionSize = Layout.getSectionAddressSize(&Sec);
222   const MCSectionMachO &Section = cast<MCSectionMachO>(Sec);
223 
224   // The offset is unused for virtual sections.
225   if (Section.isVirtualSection()) {
226     assert(Layout.getSectionFileSize(&Sec) == 0 && "Invalid file size!");
227     FileOffset = 0;
228   }
229 
230   // struct section (68 bytes) or
231   // struct section_64 (80 bytes)
232 
233   uint64_t Start = W.OS.tell();
234   (void) Start;
235 
236   writeWithPadding(Section.getName(), 16);
237   writeWithPadding(Section.getSegmentName(), 16);
238   if (is64Bit()) {
239     W.write<uint64_t>(VMAddr);      // address
240     W.write<uint64_t>(SectionSize); // size
241   } else {
242     W.write<uint32_t>(VMAddr);      // address
243     W.write<uint32_t>(SectionSize); // size
244   }
245   W.write<uint32_t>(FileOffset);
246 
247   W.write<uint32_t>(Log2(Section.getAlign()));
248   W.write<uint32_t>(NumRelocations ? RelocationsStart : 0);
249   W.write<uint32_t>(NumRelocations);
250   W.write<uint32_t>(Flags);
251   W.write<uint32_t>(IndirectSymBase.lookup(&Sec)); // reserved1
252   W.write<uint32_t>(Section.getStubSize()); // reserved2
253   if (is64Bit())
254     W.write<uint32_t>(0); // reserved3
255 
256   assert(W.OS.tell() - Start ==
257          (is64Bit() ? sizeof(MachO::section_64) : sizeof(MachO::section)));
258 }
259 
260 void MachObjectWriter::writeSymtabLoadCommand(uint32_t SymbolOffset,
261                                               uint32_t NumSymbols,
262                                               uint32_t StringTableOffset,
263                                               uint32_t StringTableSize) {
264   // struct symtab_command (24 bytes)
265 
266   uint64_t Start = W.OS.tell();
267   (void) Start;
268 
269   W.write<uint32_t>(MachO::LC_SYMTAB);
270   W.write<uint32_t>(sizeof(MachO::symtab_command));
271   W.write<uint32_t>(SymbolOffset);
272   W.write<uint32_t>(NumSymbols);
273   W.write<uint32_t>(StringTableOffset);
274   W.write<uint32_t>(StringTableSize);
275 
276   assert(W.OS.tell() - Start == sizeof(MachO::symtab_command));
277 }
278 
279 void MachObjectWriter::writeDysymtabLoadCommand(uint32_t FirstLocalSymbol,
280                                                 uint32_t NumLocalSymbols,
281                                                 uint32_t FirstExternalSymbol,
282                                                 uint32_t NumExternalSymbols,
283                                                 uint32_t FirstUndefinedSymbol,
284                                                 uint32_t NumUndefinedSymbols,
285                                                 uint32_t IndirectSymbolOffset,
286                                                 uint32_t NumIndirectSymbols) {
287   // struct dysymtab_command (80 bytes)
288 
289   uint64_t Start = W.OS.tell();
290   (void) Start;
291 
292   W.write<uint32_t>(MachO::LC_DYSYMTAB);
293   W.write<uint32_t>(sizeof(MachO::dysymtab_command));
294   W.write<uint32_t>(FirstLocalSymbol);
295   W.write<uint32_t>(NumLocalSymbols);
296   W.write<uint32_t>(FirstExternalSymbol);
297   W.write<uint32_t>(NumExternalSymbols);
298   W.write<uint32_t>(FirstUndefinedSymbol);
299   W.write<uint32_t>(NumUndefinedSymbols);
300   W.write<uint32_t>(0); // tocoff
301   W.write<uint32_t>(0); // ntoc
302   W.write<uint32_t>(0); // modtaboff
303   W.write<uint32_t>(0); // nmodtab
304   W.write<uint32_t>(0); // extrefsymoff
305   W.write<uint32_t>(0); // nextrefsyms
306   W.write<uint32_t>(IndirectSymbolOffset);
307   W.write<uint32_t>(NumIndirectSymbols);
308   W.write<uint32_t>(0); // extreloff
309   W.write<uint32_t>(0); // nextrel
310   W.write<uint32_t>(0); // locreloff
311   W.write<uint32_t>(0); // nlocrel
312 
313   assert(W.OS.tell() - Start == sizeof(MachO::dysymtab_command));
314 }
315 
316 MachObjectWriter::MachSymbolData *
317 MachObjectWriter::findSymbolData(const MCSymbol &Sym) {
318   for (auto *SymbolData :
319        {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData})
320     for (MachSymbolData &Entry : *SymbolData)
321       if (Entry.Symbol == &Sym)
322         return &Entry;
323 
324   return nullptr;
325 }
326 
327 const MCSymbol &MachObjectWriter::findAliasedSymbol(const MCSymbol &Sym) const {
328   const MCSymbol *S = &Sym;
329   while (S->isVariable()) {
330     const MCExpr *Value = S->getVariableValue();
331     const auto *Ref = dyn_cast<MCSymbolRefExpr>(Value);
332     if (!Ref)
333       return *S;
334     S = &Ref->getSymbol();
335   }
336   return *S;
337 }
338 
339 void MachObjectWriter::writeNlist(MachSymbolData &MSD,
340                                   const MCAsmLayout &Layout) {
341   const MCSymbol *Symbol = MSD.Symbol;
342   const MCSymbol &Data = *Symbol;
343   const MCSymbol *AliasedSymbol = &findAliasedSymbol(*Symbol);
344   uint8_t SectionIndex = MSD.SectionIndex;
345   uint8_t Type = 0;
346   uint64_t Address = 0;
347   bool IsAlias = Symbol != AliasedSymbol;
348 
349   const MCSymbol &OrigSymbol = *Symbol;
350   MachSymbolData *AliaseeInfo;
351   if (IsAlias) {
352     AliaseeInfo = findSymbolData(*AliasedSymbol);
353     if (AliaseeInfo)
354       SectionIndex = AliaseeInfo->SectionIndex;
355     Symbol = AliasedSymbol;
356     // FIXME: Should this update Data as well?
357   }
358 
359   // Set the N_TYPE bits. See <mach-o/nlist.h>.
360   //
361   // FIXME: Are the prebound or indirect fields possible here?
362   if (IsAlias && Symbol->isUndefined())
363     Type = MachO::N_INDR;
364   else if (Symbol->isUndefined())
365     Type = MachO::N_UNDF;
366   else if (Symbol->isAbsolute())
367     Type = MachO::N_ABS;
368   else
369     Type = MachO::N_SECT;
370 
371   // FIXME: Set STAB bits.
372 
373   if (Data.isPrivateExtern())
374     Type |= MachO::N_PEXT;
375 
376   // Set external bit.
377   if (Data.isExternal() || (!IsAlias && Symbol->isUndefined()))
378     Type |= MachO::N_EXT;
379 
380   // Compute the symbol address.
381   if (IsAlias && Symbol->isUndefined())
382     Address = AliaseeInfo->StringIndex;
383   else if (Symbol->isDefined())
384     Address = getSymbolAddress(OrigSymbol, Layout);
385   else if (Symbol->isCommon()) {
386     // Common symbols are encoded with the size in the address
387     // field, and their alignment in the flags.
388     Address = Symbol->getCommonSize();
389   }
390 
391   // struct nlist (12 bytes)
392 
393   W.write<uint32_t>(MSD.StringIndex);
394   W.OS << char(Type);
395   W.OS << char(SectionIndex);
396 
397   // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc'
398   // value.
399   bool EncodeAsAltEntry =
400     IsAlias && cast<MCSymbolMachO>(OrigSymbol).isAltEntry();
401   W.write<uint16_t>(cast<MCSymbolMachO>(Symbol)->getEncodedFlags(EncodeAsAltEntry));
402   if (is64Bit())
403     W.write<uint64_t>(Address);
404   else
405     W.write<uint32_t>(Address);
406 }
407 
408 void MachObjectWriter::writeLinkeditLoadCommand(uint32_t Type,
409                                                 uint32_t DataOffset,
410                                                 uint32_t DataSize) {
411   uint64_t Start = W.OS.tell();
412   (void) Start;
413 
414   W.write<uint32_t>(Type);
415   W.write<uint32_t>(sizeof(MachO::linkedit_data_command));
416   W.write<uint32_t>(DataOffset);
417   W.write<uint32_t>(DataSize);
418 
419   assert(W.OS.tell() - Start == sizeof(MachO::linkedit_data_command));
420 }
421 
422 static unsigned ComputeLinkerOptionsLoadCommandSize(
423   const std::vector<std::string> &Options, bool is64Bit)
424 {
425   unsigned Size = sizeof(MachO::linker_option_command);
426   for (const std::string &Option : Options)
427     Size += Option.size() + 1;
428   return alignTo(Size, is64Bit ? 8 : 4);
429 }
430 
431 void MachObjectWriter::writeLinkerOptionsLoadCommand(
432   const std::vector<std::string> &Options)
433 {
434   unsigned Size = ComputeLinkerOptionsLoadCommandSize(Options, is64Bit());
435   uint64_t Start = W.OS.tell();
436   (void) Start;
437 
438   W.write<uint32_t>(MachO::LC_LINKER_OPTION);
439   W.write<uint32_t>(Size);
440   W.write<uint32_t>(Options.size());
441   uint64_t BytesWritten = sizeof(MachO::linker_option_command);
442   for (const std::string &Option : Options) {
443     // Write each string, including the null byte.
444     W.OS << Option << '\0';
445     BytesWritten += Option.size() + 1;
446   }
447 
448   // Pad to a multiple of the pointer size.
449   W.OS.write_zeros(
450       offsetToAlignment(BytesWritten, is64Bit() ? Align(8) : Align(4)));
451 
452   assert(W.OS.tell() - Start == Size);
453 }
454 
455 static bool isFixupTargetValid(const MCValue &Target) {
456   // Target is (LHS - RHS + cst).
457   // We don't support the form where LHS is null: -RHS + cst
458   if (!Target.getSymA() && Target.getSymB())
459     return false;
460   return true;
461 }
462 
463 void MachObjectWriter::recordRelocation(MCAssembler &Asm,
464                                         const MCAsmLayout &Layout,
465                                         const MCFragment *Fragment,
466                                         const MCFixup &Fixup, MCValue Target,
467                                         uint64_t &FixedValue) {
468   if (!isFixupTargetValid(Target)) {
469     Asm.getContext().reportError(Fixup.getLoc(),
470                                  "unsupported relocation expression");
471     return;
472   }
473 
474   TargetObjectWriter->recordRelocation(this, Asm, Layout, Fragment, Fixup,
475                                        Target, FixedValue);
476 }
477 
478 void MachObjectWriter::bindIndirectSymbols(MCAssembler &Asm) {
479   // This is the point where 'as' creates actual symbols for indirect symbols
480   // (in the following two passes). It would be easier for us to do this sooner
481   // when we see the attribute, but that makes getting the order in the symbol
482   // table much more complicated than it is worth.
483   //
484   // FIXME: Revisit this when the dust settles.
485 
486   // Report errors for use of .indirect_symbol not in a symbol pointer section
487   // or stub section.
488   for (IndirectSymbolData &ISD : llvm::make_range(Asm.indirect_symbol_begin(),
489                                                   Asm.indirect_symbol_end())) {
490     const MCSectionMachO &Section = cast<MCSectionMachO>(*ISD.Section);
491 
492     if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS &&
493         Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS &&
494         Section.getType() != MachO::S_THREAD_LOCAL_VARIABLE_POINTERS &&
495         Section.getType() != MachO::S_SYMBOL_STUBS) {
496       MCSymbol &Symbol = *ISD.Symbol;
497       report_fatal_error("indirect symbol '" + Symbol.getName() +
498                          "' not in a symbol pointer or stub section");
499     }
500   }
501 
502   // Bind non-lazy symbol pointers first.
503   unsigned IndirectIndex = 0;
504   for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
505          ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
506     const MCSectionMachO &Section = cast<MCSectionMachO>(*it->Section);
507 
508     if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS &&
509         Section.getType() !=  MachO::S_THREAD_LOCAL_VARIABLE_POINTERS)
510       continue;
511 
512     // Initialize the section indirect symbol base, if necessary.
513     IndirectSymBase.insert(std::make_pair(it->Section, IndirectIndex));
514 
515     Asm.registerSymbol(*it->Symbol);
516   }
517 
518   // Then lazy symbol pointers and symbol stubs.
519   IndirectIndex = 0;
520   for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
521          ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
522     const MCSectionMachO &Section = cast<MCSectionMachO>(*it->Section);
523 
524     if (Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS &&
525         Section.getType() != MachO::S_SYMBOL_STUBS)
526       continue;
527 
528     // Initialize the section indirect symbol base, if necessary.
529     IndirectSymBase.insert(std::make_pair(it->Section, IndirectIndex));
530 
531     // Set the symbol type to undefined lazy, but only on construction.
532     //
533     // FIXME: Do not hardcode.
534     bool Created;
535     Asm.registerSymbol(*it->Symbol, &Created);
536     if (Created)
537       cast<MCSymbolMachO>(it->Symbol)->setReferenceTypeUndefinedLazy(true);
538   }
539 }
540 
541 /// computeSymbolTable - Compute the symbol table data
542 void MachObjectWriter::computeSymbolTable(
543     MCAssembler &Asm, std::vector<MachSymbolData> &LocalSymbolData,
544     std::vector<MachSymbolData> &ExternalSymbolData,
545     std::vector<MachSymbolData> &UndefinedSymbolData) {
546   // Build section lookup table.
547   DenseMap<const MCSection*, uint8_t> SectionIndexMap;
548   unsigned Index = 1;
549   for (MCAssembler::iterator it = Asm.begin(),
550          ie = Asm.end(); it != ie; ++it, ++Index)
551     SectionIndexMap[&*it] = Index;
552   assert(Index <= 256 && "Too many sections!");
553 
554   // Build the string table.
555   for (const MCSymbol &Symbol : Asm.symbols()) {
556     if (!Asm.isSymbolLinkerVisible(Symbol))
557       continue;
558 
559     StringTable.add(Symbol.getName());
560   }
561   StringTable.finalize();
562 
563   // Build the symbol arrays but only for non-local symbols.
564   //
565   // The particular order that we collect and then sort the symbols is chosen to
566   // match 'as'. Even though it doesn't matter for correctness, this is
567   // important for letting us diff .o files.
568   for (const MCSymbol &Symbol : Asm.symbols()) {
569     // Ignore non-linker visible symbols.
570     if (!Asm.isSymbolLinkerVisible(Symbol))
571       continue;
572 
573     if (!Symbol.isExternal() && !Symbol.isUndefined())
574       continue;
575 
576     MachSymbolData MSD;
577     MSD.Symbol = &Symbol;
578     MSD.StringIndex = StringTable.getOffset(Symbol.getName());
579 
580     if (Symbol.isUndefined()) {
581       MSD.SectionIndex = 0;
582       UndefinedSymbolData.push_back(MSD);
583     } else if (Symbol.isAbsolute()) {
584       MSD.SectionIndex = 0;
585       ExternalSymbolData.push_back(MSD);
586     } else {
587       MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
588       assert(MSD.SectionIndex && "Invalid section index!");
589       ExternalSymbolData.push_back(MSD);
590     }
591   }
592 
593   // Now add the data for local symbols.
594   for (const MCSymbol &Symbol : Asm.symbols()) {
595     // Ignore non-linker visible symbols.
596     if (!Asm.isSymbolLinkerVisible(Symbol))
597       continue;
598 
599     if (Symbol.isExternal() || Symbol.isUndefined())
600       continue;
601 
602     MachSymbolData MSD;
603     MSD.Symbol = &Symbol;
604     MSD.StringIndex = StringTable.getOffset(Symbol.getName());
605 
606     if (Symbol.isAbsolute()) {
607       MSD.SectionIndex = 0;
608       LocalSymbolData.push_back(MSD);
609     } else {
610       MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
611       assert(MSD.SectionIndex && "Invalid section index!");
612       LocalSymbolData.push_back(MSD);
613     }
614   }
615 
616   // External and undefined symbols are required to be in lexicographic order.
617   llvm::sort(ExternalSymbolData);
618   llvm::sort(UndefinedSymbolData);
619 
620   // Set the symbol indices.
621   Index = 0;
622   for (auto *SymbolData :
623        {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData})
624     for (MachSymbolData &Entry : *SymbolData)
625       Entry.Symbol->setIndex(Index++);
626 
627   for (const MCSection &Section : Asm) {
628     for (RelAndSymbol &Rel : Relocations[&Section]) {
629       if (!Rel.Sym)
630         continue;
631 
632       // Set the Index and the IsExtern bit.
633       unsigned Index = Rel.Sym->getIndex();
634       assert(isInt<24>(Index));
635       if (W.Endian == support::little)
636         Rel.MRE.r_word1 = (Rel.MRE.r_word1 & (~0U << 24)) | Index | (1 << 27);
637       else
638         Rel.MRE.r_word1 = (Rel.MRE.r_word1 & 0xff) | Index << 8 | (1 << 4);
639     }
640   }
641 }
642 
643 void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm,
644                                                const MCAsmLayout &Layout) {
645   uint64_t StartAddress = 0;
646   for (const MCSection *Sec : Layout.getSectionOrder()) {
647     StartAddress = alignTo(StartAddress, Sec->getAlign());
648     SectionAddress[Sec] = StartAddress;
649     StartAddress += Layout.getSectionAddressSize(Sec);
650 
651     // Explicitly pad the section to match the alignment requirements of the
652     // following one. This is for 'gas' compatibility, it shouldn't
653     /// strictly be necessary.
654     StartAddress += getPaddingSize(Sec, Layout);
655   }
656 }
657 
658 void MachObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
659                                                 const MCAsmLayout &Layout) {
660   computeSectionAddresses(Asm, Layout);
661 
662   // Create symbol data for any indirect symbols.
663   bindIndirectSymbols(Asm);
664 }
665 
666 bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
667     const MCAssembler &Asm, const MCSymbol &A, const MCSymbol &B,
668     bool InSet) const {
669   // FIXME: We don't handle things like
670   // foo = .
671   // creating atoms.
672   if (A.isVariable() || B.isVariable())
673     return false;
674   return MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(Asm, A, B,
675                                                                 InSet);
676 }
677 
678 bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
679     const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB,
680     bool InSet, bool IsPCRel) const {
681   if (InSet)
682     return true;
683 
684   // The effective address is
685   //     addr(atom(A)) + offset(A)
686   //   - addr(atom(B)) - offset(B)
687   // and the offsets are not relocatable, so the fixup is fully resolved when
688   //  addr(atom(A)) - addr(atom(B)) == 0.
689   const MCSymbol &SA = findAliasedSymbol(SymA);
690   const MCSection &SecA = SA.getSection();
691   const MCSection &SecB = *FB.getParent();
692 
693   if (IsPCRel) {
694     // The simple (Darwin, except on x86_64) way of dealing with this was to
695     // assume that any reference to a temporary symbol *must* be a temporary
696     // symbol in the same atom, unless the sections differ. Therefore, any PCrel
697     // relocation to a temporary symbol (in the same section) is fully
698     // resolved. This also works in conjunction with absolutized .set, which
699     // requires the compiler to use .set to absolutize the differences between
700     // symbols which the compiler knows to be assembly time constants, so we
701     // don't need to worry about considering symbol differences fully resolved.
702     //
703     // If the file isn't using sub-sections-via-symbols, we can make the
704     // same assumptions about any symbol that we normally make about
705     // assembler locals.
706 
707     bool hasReliableSymbolDifference = isX86_64();
708     if (!hasReliableSymbolDifference) {
709       if (!SA.isInSection() || &SecA != &SecB ||
710           (!SA.isTemporary() && FB.getAtom() != SA.getFragment()->getAtom() &&
711            Asm.getSubsectionsViaSymbols()))
712         return false;
713       return true;
714     }
715     // For Darwin x86_64, there is one special case when the reference IsPCRel.
716     // If the fragment with the reference does not have a base symbol but meets
717     // the simple way of dealing with this, in that it is a temporary symbol in
718     // the same atom then it is assumed to be fully resolved.  This is needed so
719     // a relocation entry is not created and so the static linker does not
720     // mess up the reference later.
721     else if(!FB.getAtom() &&
722             SA.isTemporary() && SA.isInSection() && &SecA == &SecB){
723       return true;
724     }
725   }
726 
727   // If they are not in the same section, we can't compute the diff.
728   if (&SecA != &SecB)
729     return false;
730 
731   const MCFragment *FA = SA.getFragment();
732 
733   // Bail if the symbol has no fragment.
734   if (!FA)
735     return false;
736 
737   // If the atoms are the same, they are guaranteed to have the same address.
738   if (FA->getAtom() == FB.getAtom())
739     return true;
740 
741   // Otherwise, we can't prove this is fully resolved.
742   return false;
743 }
744 
745 static MachO::LoadCommandType getLCFromMCVM(MCVersionMinType Type) {
746   switch (Type) {
747   case MCVM_OSXVersionMin:     return MachO::LC_VERSION_MIN_MACOSX;
748   case MCVM_IOSVersionMin:     return MachO::LC_VERSION_MIN_IPHONEOS;
749   case MCVM_TvOSVersionMin:    return MachO::LC_VERSION_MIN_TVOS;
750   case MCVM_WatchOSVersionMin: return MachO::LC_VERSION_MIN_WATCHOS;
751   }
752   llvm_unreachable("Invalid mc version min type");
753 }
754 
755 void MachObjectWriter::populateAddrSigSection(MCAssembler &Asm) {
756   MCSection *AddrSigSection =
757       Asm.getContext().getObjectFileInfo()->getAddrSigSection();
758   unsigned Log2Size = is64Bit() ? 3 : 2;
759   for (const MCSymbol *S : getAddrsigSyms()) {
760     if (!S->isRegistered())
761       continue;
762     MachO::any_relocation_info MRE;
763     MRE.r_word0 = 0;
764     MRE.r_word1 = (Log2Size << 25) | (MachO::GENERIC_RELOC_VANILLA << 28);
765     addRelocation(S, AddrSigSection, MRE);
766   }
767 }
768 
769 uint64_t MachObjectWriter::writeObject(MCAssembler &Asm,
770                                        const MCAsmLayout &Layout) {
771   uint64_t StartOffset = W.OS.tell();
772 
773   populateAddrSigSection(Asm);
774 
775   // Compute symbol table information and bind symbol indices.
776   computeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData,
777                      UndefinedSymbolData);
778 
779   if (!Asm.CGProfile.empty()) {
780     MCSection *CGProfileSection = Asm.getContext().getMachOSection(
781         "__LLVM", "__cg_profile", 0, SectionKind::getMetadata());
782     MCDataFragment *Frag = dyn_cast_or_null<MCDataFragment>(
783         &*CGProfileSection->getFragmentList().begin());
784     assert(Frag && "call graph profile section not reserved");
785     Frag->getContents().clear();
786     raw_svector_ostream OS(Frag->getContents());
787     for (const MCAssembler::CGProfileEntry &CGPE : Asm.CGProfile) {
788       uint32_t FromIndex = CGPE.From->getSymbol().getIndex();
789       uint32_t ToIndex = CGPE.To->getSymbol().getIndex();
790       support::endian::write(OS, FromIndex, W.Endian);
791       support::endian::write(OS, ToIndex, W.Endian);
792       support::endian::write(OS, CGPE.Count, W.Endian);
793     }
794   }
795 
796   unsigned NumSections = Asm.size();
797   const MCAssembler::VersionInfoType &VersionInfo =
798     Layout.getAssembler().getVersionInfo();
799 
800   // The section data starts after the header, the segment load command (and
801   // section headers) and the symbol table.
802   unsigned NumLoadCommands = 1;
803   uint64_t LoadCommandsSize = is64Bit() ?
804     sizeof(MachO::segment_command_64) + NumSections * sizeof(MachO::section_64):
805     sizeof(MachO::segment_command) + NumSections * sizeof(MachO::section);
806 
807   // Add the deployment target version info load command size, if used.
808   if (VersionInfo.Major != 0) {
809     ++NumLoadCommands;
810     if (VersionInfo.EmitBuildVersion)
811       LoadCommandsSize += sizeof(MachO::build_version_command);
812     else
813       LoadCommandsSize += sizeof(MachO::version_min_command);
814   }
815 
816   const MCAssembler::VersionInfoType &TargetVariantVersionInfo =
817       Layout.getAssembler().getDarwinTargetVariantVersionInfo();
818 
819   // Add the target variant version info load command size, if used.
820   if (TargetVariantVersionInfo.Major != 0) {
821     ++NumLoadCommands;
822     assert(TargetVariantVersionInfo.EmitBuildVersion &&
823            "target variant should use build version");
824     LoadCommandsSize += sizeof(MachO::build_version_command);
825   }
826 
827   // Add the data-in-code load command size, if used.
828   unsigned NumDataRegions = Asm.getDataRegions().size();
829   if (NumDataRegions) {
830     ++NumLoadCommands;
831     LoadCommandsSize += sizeof(MachO::linkedit_data_command);
832   }
833 
834   // Add the loh load command size, if used.
835   uint64_t LOHRawSize = Asm.getLOHContainer().getEmitSize(*this, Layout);
836   uint64_t LOHSize = alignTo(LOHRawSize, is64Bit() ? 8 : 4);
837   if (LOHSize) {
838     ++NumLoadCommands;
839     LoadCommandsSize += sizeof(MachO::linkedit_data_command);
840   }
841 
842   // Add the symbol table load command sizes, if used.
843   unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() +
844     UndefinedSymbolData.size();
845   if (NumSymbols) {
846     NumLoadCommands += 2;
847     LoadCommandsSize += (sizeof(MachO::symtab_command) +
848                          sizeof(MachO::dysymtab_command));
849   }
850 
851   // Add the linker option load commands sizes.
852   for (const auto &Option : Asm.getLinkerOptions()) {
853     ++NumLoadCommands;
854     LoadCommandsSize += ComputeLinkerOptionsLoadCommandSize(Option, is64Bit());
855   }
856 
857   // Compute the total size of the section data, as well as its file size and vm
858   // size.
859   uint64_t SectionDataStart = (is64Bit() ? sizeof(MachO::mach_header_64) :
860                                sizeof(MachO::mach_header)) + LoadCommandsSize;
861   uint64_t SectionDataSize = 0;
862   uint64_t SectionDataFileSize = 0;
863   uint64_t VMSize = 0;
864   for (const MCSection &Sec : Asm) {
865     uint64_t Address = getSectionAddress(&Sec);
866     uint64_t Size = Layout.getSectionAddressSize(&Sec);
867     uint64_t FileSize = Layout.getSectionFileSize(&Sec);
868     FileSize += getPaddingSize(&Sec, Layout);
869 
870     VMSize = std::max(VMSize, Address + Size);
871 
872     if (Sec.isVirtualSection())
873       continue;
874 
875     SectionDataSize = std::max(SectionDataSize, Address + Size);
876     SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize);
877   }
878 
879   // The section data is padded to pointer size bytes.
880   //
881   // FIXME: Is this machine dependent?
882   unsigned SectionDataPadding =
883       offsetToAlignment(SectionDataFileSize, is64Bit() ? Align(8) : Align(4));
884   SectionDataFileSize += SectionDataPadding;
885 
886   // Write the prolog, starting with the header and load command...
887   writeHeader(MachO::MH_OBJECT, NumLoadCommands, LoadCommandsSize,
888               Asm.getSubsectionsViaSymbols());
889   uint32_t Prot =
890       MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE;
891   writeSegmentLoadCommand("", NumSections, 0, VMSize, SectionDataStart,
892                           SectionDataSize, Prot, Prot);
893 
894   // ... and then the section headers.
895   uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
896   for (const MCSection &Section : Asm) {
897     const auto &Sec = cast<MCSectionMachO>(Section);
898     std::vector<RelAndSymbol> &Relocs = Relocations[&Sec];
899     unsigned NumRelocs = Relocs.size();
900     uint64_t SectionStart = SectionDataStart + getSectionAddress(&Sec);
901     unsigned Flags = Sec.getTypeAndAttributes();
902     if (Sec.hasInstructions())
903       Flags |= MachO::S_ATTR_SOME_INSTRUCTIONS;
904     writeSection(Layout, Sec, getSectionAddress(&Sec), SectionStart, Flags,
905                  RelocTableEnd, NumRelocs);
906     RelocTableEnd += NumRelocs * sizeof(MachO::any_relocation_info);
907   }
908 
909   // Write out the deployment target information, if it's available.
910   auto EmitDeploymentTargetVersion =
911       [&](const MCAssembler::VersionInfoType &VersionInfo) {
912         auto EncodeVersion = [](VersionTuple V) -> uint32_t {
913           assert(!V.empty() && "empty version");
914           unsigned Update = V.getSubminor().value_or(0);
915           unsigned Minor = V.getMinor().value_or(0);
916           assert(Update < 256 && "unencodable update target version");
917           assert(Minor < 256 && "unencodable minor target version");
918           assert(V.getMajor() < 65536 && "unencodable major target version");
919           return Update | (Minor << 8) | (V.getMajor() << 16);
920         };
921         uint32_t EncodedVersion = EncodeVersion(VersionTuple(
922             VersionInfo.Major, VersionInfo.Minor, VersionInfo.Update));
923         uint32_t SDKVersion = !VersionInfo.SDKVersion.empty()
924                                   ? EncodeVersion(VersionInfo.SDKVersion)
925                                   : 0;
926         if (VersionInfo.EmitBuildVersion) {
927           // FIXME: Currently empty tools. Add clang version in the future.
928           W.write<uint32_t>(MachO::LC_BUILD_VERSION);
929           W.write<uint32_t>(sizeof(MachO::build_version_command));
930           W.write<uint32_t>(VersionInfo.TypeOrPlatform.Platform);
931           W.write<uint32_t>(EncodedVersion);
932           W.write<uint32_t>(SDKVersion);
933           W.write<uint32_t>(0); // Empty tools list.
934         } else {
935           MachO::LoadCommandType LCType =
936               getLCFromMCVM(VersionInfo.TypeOrPlatform.Type);
937           W.write<uint32_t>(LCType);
938           W.write<uint32_t>(sizeof(MachO::version_min_command));
939           W.write<uint32_t>(EncodedVersion);
940           W.write<uint32_t>(SDKVersion);
941         }
942       };
943   if (VersionInfo.Major != 0)
944     EmitDeploymentTargetVersion(VersionInfo);
945   if (TargetVariantVersionInfo.Major != 0)
946     EmitDeploymentTargetVersion(TargetVariantVersionInfo);
947 
948   // Write the data-in-code load command, if used.
949   uint64_t DataInCodeTableEnd = RelocTableEnd + NumDataRegions * 8;
950   if (NumDataRegions) {
951     uint64_t DataRegionsOffset = RelocTableEnd;
952     uint64_t DataRegionsSize = NumDataRegions * 8;
953     writeLinkeditLoadCommand(MachO::LC_DATA_IN_CODE, DataRegionsOffset,
954                              DataRegionsSize);
955   }
956 
957   // Write the loh load command, if used.
958   uint64_t LOHTableEnd = DataInCodeTableEnd + LOHSize;
959   if (LOHSize)
960     writeLinkeditLoadCommand(MachO::LC_LINKER_OPTIMIZATION_HINT,
961                              DataInCodeTableEnd, LOHSize);
962 
963   // Write the symbol table load command, if used.
964   if (NumSymbols) {
965     unsigned FirstLocalSymbol = 0;
966     unsigned NumLocalSymbols = LocalSymbolData.size();
967     unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols;
968     unsigned NumExternalSymbols = ExternalSymbolData.size();
969     unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols;
970     unsigned NumUndefinedSymbols = UndefinedSymbolData.size();
971     unsigned NumIndirectSymbols = Asm.indirect_symbol_size();
972     unsigned NumSymTabSymbols =
973       NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols;
974     uint64_t IndirectSymbolSize = NumIndirectSymbols * 4;
975     uint64_t IndirectSymbolOffset = 0;
976 
977     // If used, the indirect symbols are written after the section data.
978     if (NumIndirectSymbols)
979       IndirectSymbolOffset = LOHTableEnd;
980 
981     // The symbol table is written after the indirect symbol data.
982     uint64_t SymbolTableOffset = LOHTableEnd + IndirectSymbolSize;
983 
984     // The string table is written after symbol table.
985     uint64_t StringTableOffset =
986       SymbolTableOffset + NumSymTabSymbols * (is64Bit() ?
987                                               sizeof(MachO::nlist_64) :
988                                               sizeof(MachO::nlist));
989     writeSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols,
990                            StringTableOffset, StringTable.getSize());
991 
992     writeDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols,
993                              FirstExternalSymbol, NumExternalSymbols,
994                              FirstUndefinedSymbol, NumUndefinedSymbols,
995                              IndirectSymbolOffset, NumIndirectSymbols);
996   }
997 
998   // Write the linker options load commands.
999   for (const auto &Option : Asm.getLinkerOptions())
1000     writeLinkerOptionsLoadCommand(Option);
1001 
1002   // Write the actual section data.
1003   for (const MCSection &Sec : Asm) {
1004     Asm.writeSectionData(W.OS, &Sec, Layout);
1005 
1006     uint64_t Pad = getPaddingSize(&Sec, Layout);
1007     W.OS.write_zeros(Pad);
1008   }
1009 
1010   // Write the extra padding.
1011   W.OS.write_zeros(SectionDataPadding);
1012 
1013   // Write the relocation entries.
1014   for (const MCSection &Sec : Asm) {
1015     // Write the section relocation entries, in reverse order to match 'as'
1016     // (approximately, the exact algorithm is more complicated than this).
1017     std::vector<RelAndSymbol> &Relocs = Relocations[&Sec];
1018     for (const RelAndSymbol &Rel : llvm::reverse(Relocs)) {
1019       W.write<uint32_t>(Rel.MRE.r_word0);
1020       W.write<uint32_t>(Rel.MRE.r_word1);
1021     }
1022   }
1023 
1024   // Write out the data-in-code region payload, if there is one.
1025   for (MCAssembler::const_data_region_iterator
1026          it = Asm.data_region_begin(), ie = Asm.data_region_end();
1027          it != ie; ++it) {
1028     const DataRegionData *Data = &(*it);
1029     uint64_t Start = getSymbolAddress(*Data->Start, Layout);
1030     uint64_t End;
1031     if (Data->End)
1032       End = getSymbolAddress(*Data->End, Layout);
1033     else
1034       report_fatal_error("Data region not terminated");
1035 
1036     LLVM_DEBUG(dbgs() << "data in code region-- kind: " << Data->Kind
1037                       << "  start: " << Start << "(" << Data->Start->getName()
1038                       << ")"
1039                       << "  end: " << End << "(" << Data->End->getName() << ")"
1040                       << "  size: " << End - Start << "\n");
1041     W.write<uint32_t>(Start);
1042     W.write<uint16_t>(End - Start);
1043     W.write<uint16_t>(Data->Kind);
1044   }
1045 
1046   // Write out the loh commands, if there is one.
1047   if (LOHSize) {
1048 #ifndef NDEBUG
1049     unsigned Start = W.OS.tell();
1050 #endif
1051     Asm.getLOHContainer().emit(*this, Layout);
1052     // Pad to a multiple of the pointer size.
1053     W.OS.write_zeros(
1054         offsetToAlignment(LOHRawSize, is64Bit() ? Align(8) : Align(4)));
1055     assert(W.OS.tell() - Start == LOHSize);
1056   }
1057 
1058   // Write the symbol table data, if used.
1059   if (NumSymbols) {
1060     // Write the indirect symbol entries.
1061     for (MCAssembler::const_indirect_symbol_iterator
1062            it = Asm.indirect_symbol_begin(),
1063            ie = Asm.indirect_symbol_end(); it != ie; ++it) {
1064       // Indirect symbols in the non-lazy symbol pointer section have some
1065       // special handling.
1066       const MCSectionMachO &Section =
1067           static_cast<const MCSectionMachO &>(*it->Section);
1068       if (Section.getType() == MachO::S_NON_LAZY_SYMBOL_POINTERS) {
1069         // If this symbol is defined and internal, mark it as such.
1070         if (it->Symbol->isDefined() && !it->Symbol->isExternal()) {
1071           uint32_t Flags = MachO::INDIRECT_SYMBOL_LOCAL;
1072           if (it->Symbol->isAbsolute())
1073             Flags |= MachO::INDIRECT_SYMBOL_ABS;
1074           W.write<uint32_t>(Flags);
1075           continue;
1076         }
1077       }
1078 
1079       W.write<uint32_t>(it->Symbol->getIndex());
1080     }
1081 
1082     // FIXME: Check that offsets match computed ones.
1083 
1084     // Write the symbol table entries.
1085     for (auto *SymbolData :
1086          {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData})
1087       for (MachSymbolData &Entry : *SymbolData)
1088         writeNlist(Entry, Layout);
1089 
1090     // Write the string table.
1091     StringTable.write(W.OS);
1092   }
1093 
1094   return W.OS.tell() - StartOffset;
1095 }
1096 
1097 std::unique_ptr<MCObjectWriter>
1098 llvm::createMachObjectWriter(std::unique_ptr<MCMachObjectTargetWriter> MOTW,
1099                              raw_pwrite_stream &OS, bool IsLittleEndian) {
1100   return std::make_unique<MachObjectWriter>(std::move(MOTW), OS,
1101                                              IsLittleEndian);
1102 }
1103