xref: /freebsd/contrib/llvm-project/llvm/lib/ObjCopy/MachO/MachOReader.cpp (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
1  //===- MachOReader.cpp ------------------------------------------*- C++ -*-===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  
9  #include "MachOReader.h"
10  #include "MachOObject.h"
11  #include "llvm/BinaryFormat/MachO.h"
12  #include "llvm/Object/MachO.h"
13  #include "llvm/Support/Errc.h"
14  #include "llvm/Support/SystemZ/zOSSupport.h"
15  #include <memory>
16  
17  using namespace llvm;
18  using namespace llvm::objcopy;
19  using namespace llvm::objcopy::macho;
20  
21  void MachOReader::readHeader(Object &O) const {
22    O.Header.Magic = MachOObj.getHeader().magic;
23    O.Header.CPUType = MachOObj.getHeader().cputype;
24    O.Header.CPUSubType = MachOObj.getHeader().cpusubtype;
25    O.Header.FileType = MachOObj.getHeader().filetype;
26    O.Header.NCmds = MachOObj.getHeader().ncmds;
27    O.Header.SizeOfCmds = MachOObj.getHeader().sizeofcmds;
28    O.Header.Flags = MachOObj.getHeader().flags;
29  }
30  
31  template <typename SectionType>
32  static Section constructSectionCommon(const SectionType &Sec, uint32_t Index) {
33    StringRef SegName(Sec.segname, strnlen(Sec.segname, sizeof(Sec.segname)));
34    StringRef SectName(Sec.sectname, strnlen(Sec.sectname, sizeof(Sec.sectname)));
35    Section S(SegName, SectName);
36    S.Index = Index;
37    S.Addr = Sec.addr;
38    S.Size = Sec.size;
39    S.OriginalOffset = Sec.offset;
40    S.Align = Sec.align;
41    S.RelOff = Sec.reloff;
42    S.NReloc = Sec.nreloc;
43    S.Flags = Sec.flags;
44    S.Reserved1 = Sec.reserved1;
45    S.Reserved2 = Sec.reserved2;
46    S.Reserved3 = 0;
47    return S;
48  }
49  
50  Section constructSection(const MachO::section &Sec, uint32_t Index) {
51    return constructSectionCommon(Sec, Index);
52  }
53  
54  Section constructSection(const MachO::section_64 &Sec, uint32_t Index) {
55    Section S = constructSectionCommon(Sec, Index);
56    S.Reserved3 = Sec.reserved3;
57    return S;
58  }
59  
60  template <typename SectionType, typename SegmentType>
61  Expected<std::vector<std::unique_ptr<Section>>> static extractSections(
62      const object::MachOObjectFile::LoadCommandInfo &LoadCmd,
63      const object::MachOObjectFile &MachOObj, uint32_t &NextSectionIndex) {
64    std::vector<std::unique_ptr<Section>> Sections;
65    for (auto Curr = reinterpret_cast<const SectionType *>(LoadCmd.Ptr +
66                                                           sizeof(SegmentType)),
67              End = reinterpret_cast<const SectionType *>(LoadCmd.Ptr +
68                                                          LoadCmd.C.cmdsize);
69         Curr < End; ++Curr) {
70      SectionType Sec;
71      memcpy((void *)&Sec, reinterpret_cast<const char *>(Curr),
72             sizeof(SectionType));
73  
74      if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost)
75        MachO::swapStruct(Sec);
76  
77      Sections.push_back(
78          std::make_unique<Section>(constructSection(Sec, NextSectionIndex)));
79  
80      Section &S = *Sections.back();
81  
82      Expected<object::SectionRef> SecRef =
83          MachOObj.getSection(NextSectionIndex++);
84      if (!SecRef)
85        return SecRef.takeError();
86  
87      Expected<ArrayRef<uint8_t>> Data =
88          MachOObj.getSectionContents(SecRef->getRawDataRefImpl());
89      if (!Data)
90        return Data.takeError();
91  
92      S.Content =
93          StringRef(reinterpret_cast<const char *>(Data->data()), Data->size());
94  
95      const uint32_t CPUType = MachOObj.getHeader().cputype;
96      S.Relocations.reserve(S.NReloc);
97      for (auto RI = MachOObj.section_rel_begin(SecRef->getRawDataRefImpl()),
98                RE = MachOObj.section_rel_end(SecRef->getRawDataRefImpl());
99           RI != RE; ++RI) {
100        RelocationInfo R;
101        R.Symbol = nullptr; // We'll fill this field later.
102        R.Info = MachOObj.getRelocation(RI->getRawDataRefImpl());
103        R.Scattered = MachOObj.isRelocationScattered(R.Info);
104        unsigned Type = MachOObj.getAnyRelocationType(R.Info);
105        // TODO Support CPU_TYPE_ARM.
106        R.IsAddend = !R.Scattered && (CPUType == MachO::CPU_TYPE_ARM64 &&
107                                      Type == MachO::ARM64_RELOC_ADDEND);
108        R.Extern = !R.Scattered && MachOObj.getPlainRelocationExternal(R.Info);
109        S.Relocations.push_back(R);
110      }
111  
112      assert(S.NReloc == S.Relocations.size() &&
113             "Incorrect number of relocations");
114    }
115    return std::move(Sections);
116  }
117  
118  Error MachOReader::readLoadCommands(Object &O) const {
119    // For MachO sections indices start from 1.
120    uint32_t NextSectionIndex = 1;
121    static constexpr char TextSegmentName[] = "__TEXT";
122    for (auto LoadCmd : MachOObj.load_commands()) {
123      LoadCommand LC;
124      switch (LoadCmd.C.cmd) {
125      case MachO::LC_CODE_SIGNATURE:
126        O.CodeSignatureCommandIndex = O.LoadCommands.size();
127        break;
128      case MachO::LC_SEGMENT:
129        // LoadCmd.Ptr might not be aligned temporarily as
130        // MachO::segment_command requires, but the segname char pointer do not
131        // have alignment restrictions.
132        if (StringRef(reinterpret_cast<const char *>(
133                LoadCmd.Ptr + offsetof(MachO::segment_command, segname))) ==
134            TextSegmentName)
135          O.TextSegmentCommandIndex = O.LoadCommands.size();
136  
137        if (Expected<std::vector<std::unique_ptr<Section>>> Sections =
138                extractSections<MachO::section, MachO::segment_command>(
139                    LoadCmd, MachOObj, NextSectionIndex))
140          LC.Sections = std::move(*Sections);
141        else
142          return Sections.takeError();
143        break;
144      case MachO::LC_SEGMENT_64:
145        // LoadCmd.Ptr might not be aligned temporarily as
146        // MachO::segment_command_64 requires, but the segname char pointer do
147        // not have alignment restrictions.
148        if (StringRef(reinterpret_cast<const char *>(
149                LoadCmd.Ptr + offsetof(MachO::segment_command_64, segname))) ==
150            TextSegmentName)
151          O.TextSegmentCommandIndex = O.LoadCommands.size();
152  
153        if (Expected<std::vector<std::unique_ptr<Section>>> Sections =
154                extractSections<MachO::section_64, MachO::segment_command_64>(
155                    LoadCmd, MachOObj, NextSectionIndex))
156          LC.Sections = std::move(*Sections);
157        else
158          return Sections.takeError();
159        break;
160      case MachO::LC_SYMTAB:
161        O.SymTabCommandIndex = O.LoadCommands.size();
162        break;
163      case MachO::LC_DYSYMTAB:
164        O.DySymTabCommandIndex = O.LoadCommands.size();
165        break;
166      case MachO::LC_DYLD_INFO:
167      case MachO::LC_DYLD_INFO_ONLY:
168        O.DyLdInfoCommandIndex = O.LoadCommands.size();
169        break;
170      case MachO::LC_DATA_IN_CODE:
171        O.DataInCodeCommandIndex = O.LoadCommands.size();
172        break;
173      case MachO::LC_LINKER_OPTIMIZATION_HINT:
174        O.LinkerOptimizationHintCommandIndex = O.LoadCommands.size();
175        break;
176      case MachO::LC_FUNCTION_STARTS:
177        O.FunctionStartsCommandIndex = O.LoadCommands.size();
178        break;
179      case MachO::LC_DYLIB_CODE_SIGN_DRS:
180        O.DylibCodeSignDRsIndex = O.LoadCommands.size();
181        break;
182      case MachO::LC_DYLD_EXPORTS_TRIE:
183        O.ExportsTrieCommandIndex = O.LoadCommands.size();
184        break;
185      case MachO::LC_DYLD_CHAINED_FIXUPS:
186        O.ChainedFixupsCommandIndex = O.LoadCommands.size();
187        break;
188      }
189  #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct)                         \
190    case MachO::LCName:                                                          \
191      memcpy((void *)&(LC.MachOLoadCommand.LCStruct##_data), LoadCmd.Ptr,        \
192             sizeof(MachO::LCStruct));                                           \
193      if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost)                  \
194        MachO::swapStruct(LC.MachOLoadCommand.LCStruct##_data);                  \
195      if (LoadCmd.C.cmdsize > sizeof(MachO::LCStruct))                           \
196        LC.Payload = ArrayRef<uint8_t>(                                          \
197            reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) +       \
198                sizeof(MachO::LCStruct),                                         \
199            LoadCmd.C.cmdsize - sizeof(MachO::LCStruct));                        \
200      break;
201  
202      switch (LoadCmd.C.cmd) {
203      default:
204        memcpy((void *)&(LC.MachOLoadCommand.load_command_data), LoadCmd.Ptr,
205               sizeof(MachO::load_command));
206        if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost)
207          MachO::swapStruct(LC.MachOLoadCommand.load_command_data);
208        if (LoadCmd.C.cmdsize > sizeof(MachO::load_command))
209          LC.Payload = ArrayRef<uint8_t>(
210              reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) +
211                  sizeof(MachO::load_command),
212              LoadCmd.C.cmdsize - sizeof(MachO::load_command));
213        break;
214  #include "llvm/BinaryFormat/MachO.def"
215      }
216      O.LoadCommands.push_back(std::move(LC));
217    }
218    return Error::success();
219  }
220  
221  template <typename nlist_t>
222  SymbolEntry constructSymbolEntry(StringRef StrTable, const nlist_t &nlist) {
223    assert(nlist.n_strx < StrTable.size() &&
224           "n_strx exceeds the size of the string table");
225    SymbolEntry SE;
226    SE.Name = StringRef(StrTable.data() + nlist.n_strx).str();
227    SE.n_type = nlist.n_type;
228    SE.n_sect = nlist.n_sect;
229    SE.n_desc = nlist.n_desc;
230    SE.n_value = nlist.n_value;
231    return SE;
232  }
233  
234  void MachOReader::readSymbolTable(Object &O) const {
235    StringRef StrTable = MachOObj.getStringTableData();
236    for (auto Symbol : MachOObj.symbols()) {
237      SymbolEntry SE =
238          (MachOObj.is64Bit()
239               ? constructSymbolEntry(StrTable, MachOObj.getSymbol64TableEntry(
240                                                    Symbol.getRawDataRefImpl()))
241               : constructSymbolEntry(StrTable, MachOObj.getSymbolTableEntry(
242                                                    Symbol.getRawDataRefImpl())));
243  
244      O.SymTable.Symbols.push_back(std::make_unique<SymbolEntry>(SE));
245    }
246  }
247  
248  void MachOReader::setSymbolInRelocationInfo(Object &O) const {
249    std::vector<const Section *> Sections;
250    for (auto &LC : O.LoadCommands)
251      for (std::unique_ptr<Section> &Sec : LC.Sections)
252        Sections.push_back(Sec.get());
253  
254    for (LoadCommand &LC : O.LoadCommands)
255      for (std::unique_ptr<Section> &Sec : LC.Sections)
256        for (auto &Reloc : Sec->Relocations)
257          if (!Reloc.Scattered && !Reloc.IsAddend) {
258            const uint32_t SymbolNum =
259                Reloc.getPlainRelocationSymbolNum(MachOObj.isLittleEndian());
260            if (Reloc.Extern) {
261              Reloc.Symbol = O.SymTable.getSymbolByIndex(SymbolNum);
262            } else {
263              // FIXME: Refactor error handling in MachOReader and report an error
264              // if we encounter an invalid relocation.
265              assert(SymbolNum >= 1 && SymbolNum <= Sections.size() &&
266                     "Invalid section index.");
267              Reloc.Sec = Sections[SymbolNum - 1];
268            }
269          }
270  }
271  
272  void MachOReader::readRebaseInfo(Object &O) const {
273    O.Rebases.Opcodes = MachOObj.getDyldInfoRebaseOpcodes();
274  }
275  
276  void MachOReader::readBindInfo(Object &O) const {
277    O.Binds.Opcodes = MachOObj.getDyldInfoBindOpcodes();
278  }
279  
280  void MachOReader::readWeakBindInfo(Object &O) const {
281    O.WeakBinds.Opcodes = MachOObj.getDyldInfoWeakBindOpcodes();
282  }
283  
284  void MachOReader::readLazyBindInfo(Object &O) const {
285    O.LazyBinds.Opcodes = MachOObj.getDyldInfoLazyBindOpcodes();
286  }
287  
288  void MachOReader::readExportInfo(Object &O) const {
289    // This information can be in LC_DYLD_INFO or in LC_DYLD_EXPORTS_TRIE
290    ArrayRef<uint8_t> Trie = MachOObj.getDyldInfoExportsTrie();
291    if (Trie.empty())
292      Trie = MachOObj.getDyldExportsTrie();
293    O.Exports.Trie = Trie;
294  }
295  
296  void MachOReader::readLinkData(Object &O, std::optional<size_t> LCIndex,
297                                 LinkData &LD) const {
298    if (!LCIndex)
299      return;
300    const MachO::linkedit_data_command &LC =
301        O.LoadCommands[*LCIndex].MachOLoadCommand.linkedit_data_command_data;
302    LD.Data =
303        arrayRefFromStringRef(MachOObj.getData().substr(LC.dataoff, LC.datasize));
304  }
305  
306  void MachOReader::readDataInCodeData(Object &O) const {
307    return readLinkData(O, O.DataInCodeCommandIndex, O.DataInCode);
308  }
309  
310  void MachOReader::readLinkerOptimizationHint(Object &O) const {
311    return readLinkData(O, O.LinkerOptimizationHintCommandIndex,
312                        O.LinkerOptimizationHint);
313  }
314  
315  void MachOReader::readFunctionStartsData(Object &O) const {
316    return readLinkData(O, O.FunctionStartsCommandIndex, O.FunctionStarts);
317  }
318  
319  void MachOReader::readDylibCodeSignDRs(Object &O) const {
320    return readLinkData(O, O.DylibCodeSignDRsIndex, O.DylibCodeSignDRs);
321  }
322  
323  void MachOReader::readExportsTrie(Object &O) const {
324    return readLinkData(O, O.ExportsTrieCommandIndex, O.ExportsTrie);
325  }
326  
327  void MachOReader::readChainedFixups(Object &O) const {
328    return readLinkData(O, O.ChainedFixupsCommandIndex, O.ChainedFixups);
329  }
330  
331  void MachOReader::readIndirectSymbolTable(Object &O) const {
332    MachO::dysymtab_command DySymTab = MachOObj.getDysymtabLoadCommand();
333    constexpr uint32_t AbsOrLocalMask =
334        MachO::INDIRECT_SYMBOL_LOCAL | MachO::INDIRECT_SYMBOL_ABS;
335    for (uint32_t i = 0; i < DySymTab.nindirectsyms; ++i) {
336      uint32_t Index = MachOObj.getIndirectSymbolTableEntry(DySymTab, i);
337      if ((Index & AbsOrLocalMask) != 0)
338        O.IndirectSymTable.Symbols.emplace_back(Index, std::nullopt);
339      else
340        O.IndirectSymTable.Symbols.emplace_back(
341            Index, O.SymTable.getSymbolByIndex(Index));
342    }
343  }
344  
345  void MachOReader::readSwiftVersion(Object &O) const {
346    struct ObjCImageInfo {
347      uint32_t Version;
348      uint32_t Flags;
349    } ImageInfo;
350  
351    for (const LoadCommand &LC : O.LoadCommands)
352      for (const std::unique_ptr<Section> &Sec : LC.Sections)
353        if (Sec->Sectname == "__objc_imageinfo" &&
354            (Sec->Segname == "__DATA" || Sec->Segname == "__DATA_CONST" ||
355             Sec->Segname == "__DATA_DIRTY") &&
356            Sec->Content.size() >= sizeof(ObjCImageInfo)) {
357          memcpy(&ImageInfo, Sec->Content.data(), sizeof(ObjCImageInfo));
358          if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) {
359            sys::swapByteOrder(ImageInfo.Version);
360            sys::swapByteOrder(ImageInfo.Flags);
361          }
362          O.SwiftVersion = (ImageInfo.Flags >> 8) & 0xff;
363          return;
364        }
365  }
366  
367  Expected<std::unique_ptr<Object>> MachOReader::create() const {
368    auto Obj = std::make_unique<Object>();
369    readHeader(*Obj);
370    if (Error E = readLoadCommands(*Obj))
371      return std::move(E);
372    readSymbolTable(*Obj);
373    setSymbolInRelocationInfo(*Obj);
374    readRebaseInfo(*Obj);
375    readBindInfo(*Obj);
376    readWeakBindInfo(*Obj);
377    readLazyBindInfo(*Obj);
378    readExportInfo(*Obj);
379    readDataInCodeData(*Obj);
380    readLinkerOptimizationHint(*Obj);
381    readFunctionStartsData(*Obj);
382    readDylibCodeSignDRs(*Obj);
383    readExportsTrie(*Obj);
384    readChainedFixups(*Obj);
385    readIndirectSymbolTable(*Obj);
386    readSwiftVersion(*Obj);
387    return std::move(Obj);
388  }
389