xref: /freebsd/contrib/llvm-project/llvm/include/llvm/MC/MCMachObjectWriter.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- llvm/MC/MCMachObjectWriter.h - Mach Object Writer --------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_MC_MCMACHOBJECTWRITER_H
10 #define LLVM_MC_MCMACHOBJECTWRITER_H
11 
12 #include "llvm/ADT/DenseMap.h"
13 #include "llvm/ADT/StringRef.h"
14 #include "llvm/BinaryFormat/MachO.h"
15 #include "llvm/MC/MCDirectives.h"
16 #include "llvm/MC/MCExpr.h"
17 #include "llvm/MC/MCLinkerOptimizationHint.h"
18 #include "llvm/MC/MCObjectWriter.h"
19 #include "llvm/MC/MCSection.h"
20 #include "llvm/MC/StringTableBuilder.h"
21 #include "llvm/Support/Compiler.h"
22 #include "llvm/Support/EndianStream.h"
23 #include "llvm/Support/VersionTuple.h"
24 #include <cstdint>
25 #include <memory>
26 #include <string>
27 #include <vector>
28 
29 namespace llvm {
30 
31 class MachObjectWriter;
32 
33 class LLVM_ABI MCMachObjectTargetWriter : public MCObjectTargetWriter {
34   const unsigned Is64Bit : 1;
35   const uint32_t CPUType;
36 protected:
37   uint32_t CPUSubtype;
38 public:
39   unsigned LocalDifference_RIT = 0;
40 
41 protected:
42   MCMachObjectTargetWriter(bool Is64Bit_, uint32_t CPUType_,
43                            uint32_t CPUSubtype_);
44 
setLocalDifferenceRelocationType(unsigned Type)45   void setLocalDifferenceRelocationType(unsigned Type) {
46     LocalDifference_RIT = Type;
47   }
48 
49 public:
50   virtual ~MCMachObjectTargetWriter();
51 
getFormat()52   Triple::ObjectFormatType getFormat() const override { return Triple::MachO; }
classof(const MCObjectTargetWriter * W)53   static bool classof(const MCObjectTargetWriter *W) {
54     return W->getFormat() == Triple::MachO;
55   }
56 
57   /// \name Lifetime Management
58   /// @{
59 
reset()60   virtual void reset() {}
61 
62   /// @}
63 
64   /// \name Accessors
65   /// @{
66 
is64Bit()67   bool is64Bit() const { return Is64Bit; }
getCPUType()68   uint32_t getCPUType() const { return CPUType; }
getCPUSubtype()69   uint32_t getCPUSubtype() const { return CPUSubtype; }
getLocalDifferenceRelocationType()70   unsigned getLocalDifferenceRelocationType() const {
71     return LocalDifference_RIT;
72   }
73 
74   /// @}
75 
76   /// \name API
77   /// @{
78 
79   virtual void recordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
80                                 const MCFragment *Fragment,
81                                 const MCFixup &Fixup, MCValue Target,
82                                 uint64_t &FixedValue) = 0;
83 
84   /// @}
85 };
86 
87 class LLVM_ABI MachObjectWriter final : public MCObjectWriter {
88 public:
89   struct DataRegionData {
90     MachO::DataRegionType Kind;
91     MCSymbol *Start;
92     MCSymbol *End;
93   };
94 
95   // A Major version of 0 indicates that no version information was supplied
96   // and so the corresponding load command should not be emitted.
97   using VersionInfoType = struct {
98     bool EmitBuildVersion;
99     union {
100       MCVersionMinType Type;        ///< Used when EmitBuildVersion==false.
101       MachO::PlatformType Platform; ///< Used when EmitBuildVersion==true.
102     } TypeOrPlatform;
103     unsigned Major;
104     unsigned Minor;
105     unsigned Update;
106     /// An optional version of the SDK that was used to build the source.
107     VersionTuple SDKVersion;
108   };
109 
110 private:
111   /// Helper struct for containing some precomputed information on symbols.
112   struct MachSymbolData {
113     const MCSymbol *Symbol;
114     uint64_t StringIndex;
115     uint8_t SectionIndex;
116 
117     // Support lexicographic sorting.
118     LLVM_ABI bool operator<(const MachSymbolData &RHS) const;
119   };
120 
121   struct IndirectSymbolData {
122     MCSymbol *Symbol;
123     MCSection *Section;
124   };
125 
126   /// The target specific Mach-O writer instance.
127   std::unique_ptr<MCMachObjectTargetWriter> TargetObjectWriter;
128 
129   /// \name Relocation Data
130   /// @{
131 
132   struct RelAndSymbol {
133     const MCSymbol *Sym;
134     MachO::any_relocation_info MRE;
RelAndSymbolRelAndSymbol135     RelAndSymbol(const MCSymbol *Sym, const MachO::any_relocation_info &MRE)
136         : Sym(Sym), MRE(MRE) {}
137   };
138 
139   DenseMap<const MCSection *, std::vector<RelAndSymbol>> Relocations;
140   std::vector<IndirectSymbolData> IndirectSymbols;
141   DenseMap<const MCSection *, unsigned> IndirectSymBase;
142 
143   std::vector<DataRegionData> DataRegions;
144 
145   DenseMap<const MCSection *, uint64_t> SectionAddress;
146 
147   // List of sections in layout order. Virtual sections are after non-virtual
148   // sections.
149   SmallVector<MCSection *, 0> SectionOrder;
150 
151   /// @}
152   /// \name Symbol Table Data
153   /// @{
154 
155   StringTableBuilder StringTable;
156   std::vector<MachSymbolData> LocalSymbolData;
157   std::vector<MachSymbolData> ExternalSymbolData;
158   std::vector<MachSymbolData> UndefinedSymbolData;
159 
160   /// @}
161 
162   // Used to communicate Linker Optimization Hint information.
163   MCLOHContainer LOHContainer;
164 
165   VersionInfoType VersionInfo{};
166   VersionInfoType TargetVariantVersionInfo{};
167 
168   // The list of linker options for LC_LINKER_OPTION.
169   std::vector<std::vector<std::string>> LinkerOptions;
170 
171   MachSymbolData *findSymbolData(const MCSymbol &Sym);
172 
173   void writeWithPadding(StringRef Str, uint64_t Size);
174 
175 public:
MachObjectWriter(std::unique_ptr<MCMachObjectTargetWriter> MOTW,raw_pwrite_stream & OS,bool IsLittleEndian)176   MachObjectWriter(std::unique_ptr<MCMachObjectTargetWriter> MOTW,
177                    raw_pwrite_stream &OS, bool IsLittleEndian)
178       : TargetObjectWriter(std::move(MOTW)),
179         StringTable(TargetObjectWriter->is64Bit() ? StringTableBuilder::MachO64
180                                                   : StringTableBuilder::MachO),
181         W(OS,
182           IsLittleEndian ? llvm::endianness::little : llvm::endianness::big) {}
183 
184   support::endian::Writer W;
185 
186   const MCSymbol &findAliasedSymbol(const MCSymbol &Sym) const;
187 
188   void reset() override;
189   void setAssembler(MCAssembler *Asm) override;
190 
191   /// \name Utility Methods
192   /// @{
193 
getIndirectSymbols()194   std::vector<IndirectSymbolData> &getIndirectSymbols() {
195     return IndirectSymbols;
196   }
getDataRegions()197   std::vector<DataRegionData> &getDataRegions() { return DataRegions; }
getSectionOrder()198   const llvm::SmallVectorImpl<MCSection *> &getSectionOrder() const {
199     return SectionOrder;
200   }
getLOHContainer()201   MCLOHContainer &getLOHContainer() { return LOHContainer; }
202 
getSectionAddress(const MCSection * Sec)203   uint64_t getSectionAddress(const MCSection *Sec) const {
204     return SectionAddress.lookup(Sec);
205   }
206   uint64_t getSymbolAddress(const MCSymbol &S) const;
207 
208   uint64_t getFragmentAddress(const MCAssembler &Asm,
209                               const MCFragment *Fragment) const;
210 
211   uint64_t getPaddingSize(const MCAssembler &Asm, const MCSection *SD) const;
212 
213   const MCSymbol *getAtom(const MCSymbol &S) const;
214 
215   bool doesSymbolRequireExternRelocation(const MCSymbol &S);
216 
217   /// Mach-O deployment target version information.
218   void setVersionMin(MCVersionMinType Type, unsigned Major, unsigned Minor,
219                      unsigned Update,
220                      VersionTuple SDKVersion = VersionTuple()) {
221     VersionInfo.EmitBuildVersion = false;
222     VersionInfo.TypeOrPlatform.Type = Type;
223     VersionInfo.Major = Major;
224     VersionInfo.Minor = Minor;
225     VersionInfo.Update = Update;
226     VersionInfo.SDKVersion = SDKVersion;
227   }
228   void setBuildVersion(MachO::PlatformType Platform, unsigned Major,
229                        unsigned Minor, unsigned Update,
230                        VersionTuple SDKVersion = VersionTuple()) {
231     VersionInfo.EmitBuildVersion = true;
232     VersionInfo.TypeOrPlatform.Platform = Platform;
233     VersionInfo.Major = Major;
234     VersionInfo.Minor = Minor;
235     VersionInfo.Update = Update;
236     VersionInfo.SDKVersion = SDKVersion;
237   }
setTargetVariantBuildVersion(MachO::PlatformType Platform,unsigned Major,unsigned Minor,unsigned Update,VersionTuple SDKVersion)238   void setTargetVariantBuildVersion(MachO::PlatformType Platform,
239                                     unsigned Major, unsigned Minor,
240                                     unsigned Update, VersionTuple SDKVersion) {
241     TargetVariantVersionInfo.EmitBuildVersion = true;
242     TargetVariantVersionInfo.TypeOrPlatform.Platform = Platform;
243     TargetVariantVersionInfo.Major = Major;
244     TargetVariantVersionInfo.Minor = Minor;
245     TargetVariantVersionInfo.Update = Update;
246     TargetVariantVersionInfo.SDKVersion = SDKVersion;
247   }
248 
getLinkerOptions()249   std::vector<std::vector<std::string>> &getLinkerOptions() {
250     return LinkerOptions;
251   }
252 
253   /// @}
254 
255   /// \name Target Writer Proxy Accessors
256   /// @{
257 
is64Bit()258   bool is64Bit() const { return TargetObjectWriter->is64Bit(); }
isX86_64()259   bool isX86_64() const {
260     uint32_t CPUType = TargetObjectWriter->getCPUType();
261     return CPUType == MachO::CPU_TYPE_X86_64;
262   }
263 
264   /// @}
265 
266   void writeHeader(MachO::HeaderFileType Type, unsigned NumLoadCommands,
267                    unsigned LoadCommandsSize, bool SubsectionsViaSymbols);
268 
269   /// Write a segment load command.
270   ///
271   /// \param NumSections The number of sections in this segment.
272   /// \param SectionDataSize The total size of the sections.
273   void writeSegmentLoadCommand(StringRef Name, unsigned NumSections,
274                                uint64_t VMAddr, uint64_t VMSize,
275                                uint64_t SectionDataStartOffset,
276                                uint64_t SectionDataSize, uint32_t MaxProt,
277                                uint32_t InitProt);
278 
279   void writeSection(const MCAssembler &Asm, const MCSection &Sec,
280                     uint64_t VMAddr, uint64_t FileOffset, unsigned Flags,
281                     uint64_t RelocationsStart, unsigned NumRelocations);
282 
283   void writeSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols,
284                               uint32_t StringTableOffset,
285                               uint32_t StringTableSize);
286 
287   void writeDysymtabLoadCommand(
288       uint32_t FirstLocalSymbol, uint32_t NumLocalSymbols,
289       uint32_t FirstExternalSymbol, uint32_t NumExternalSymbols,
290       uint32_t FirstUndefinedSymbol, uint32_t NumUndefinedSymbols,
291       uint32_t IndirectSymbolOffset, uint32_t NumIndirectSymbols);
292 
293   void writeNlist(MachSymbolData &MSD, const MCAssembler &Asm);
294 
295   void writeLinkeditLoadCommand(uint32_t Type, uint32_t DataOffset,
296                                 uint32_t DataSize);
297 
298   void writeLinkerOptionsLoadCommand(const std::vector<std::string> &Options);
299 
300   // FIXME: We really need to improve the relocation validation. Basically, we
301   // want to implement a separate computation which evaluates the relocation
302   // entry as the linker would, and verifies that the resultant fixup value is
303   // exactly what the encoder wanted. This will catch several classes of
304   // problems:
305   //
306   //  - Relocation entry bugs, the two algorithms are unlikely to have the same
307   //    exact bug.
308   //
309   //  - Relaxation issues, where we forget to relax something.
310   //
311   //  - Input errors, where something cannot be correctly encoded. 'as' allows
312   //    these through in many cases.
313 
314   // Add a relocation to be output in the object file. At the time this is
315   // called, the symbol indexes are not know, so if the relocation refers
316   // to a symbol it should be passed as \p RelSymbol so that it can be updated
317   // afterwards. If the relocation doesn't refer to a symbol, nullptr should be
318   // used.
addRelocation(const MCSymbol * RelSymbol,const MCSection * Sec,MachO::any_relocation_info & MRE)319   void addRelocation(const MCSymbol *RelSymbol, const MCSection *Sec,
320                      MachO::any_relocation_info &MRE) {
321     RelAndSymbol P(RelSymbol, MRE);
322     Relocations[Sec].push_back(P);
323   }
324 
325   void recordRelocation(const MCFragment &F, const MCFixup &Fixup,
326                         MCValue Target, uint64_t &FixedValue) override;
327 
328   void bindIndirectSymbols(MCAssembler &Asm);
329 
330   /// Compute the symbol table data.
331   void computeSymbolTable(MCAssembler &Asm,
332                           std::vector<MachSymbolData> &LocalSymbolData,
333                           std::vector<MachSymbolData> &ExternalSymbolData,
334                           std::vector<MachSymbolData> &UndefinedSymbolData);
335 
336   void computeSectionAddresses(const MCAssembler &Asm);
337 
338   void executePostLayoutBinding() override;
339 
340   bool isSymbolRefDifferenceFullyResolvedImpl(const MCSymbol &SymA,
341                                               const MCFragment &FB, bool InSet,
342                                               bool IsPCRel) const override;
343 
344   void populateAddrSigSection(MCAssembler &Asm);
345 
346   uint64_t writeObject() override;
347 };
348 } // end namespace llvm
349 
350 #endif // LLVM_MC_MCMACHOBJECTWRITER_H
351