1 //===- MachOReader.cpp ------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "MachOReader.h" 10 #include "MachOObject.h" 11 #include "llvm/BinaryFormat/MachO.h" 12 #include "llvm/Object/MachO.h" 13 #include "llvm/Support/SystemZ/zOSSupport.h" 14 #include <memory> 15 16 using namespace llvm; 17 using namespace llvm::objcopy; 18 using namespace llvm::objcopy::macho; 19 20 void MachOReader::readHeader(Object &O) const { 21 O.Header.Magic = MachOObj.getHeader().magic; 22 O.Header.CPUType = MachOObj.getHeader().cputype; 23 O.Header.CPUSubType = MachOObj.getHeader().cpusubtype; 24 O.Header.FileType = MachOObj.getHeader().filetype; 25 O.Header.NCmds = MachOObj.getHeader().ncmds; 26 O.Header.SizeOfCmds = MachOObj.getHeader().sizeofcmds; 27 O.Header.Flags = MachOObj.getHeader().flags; 28 } 29 30 template <typename SectionType> 31 static Section constructSectionCommon(const SectionType &Sec, uint32_t Index) { 32 StringRef SegName(Sec.segname, strnlen(Sec.segname, sizeof(Sec.segname))); 33 StringRef SectName(Sec.sectname, strnlen(Sec.sectname, sizeof(Sec.sectname))); 34 Section S(SegName, SectName); 35 S.Index = Index; 36 S.Addr = Sec.addr; 37 S.Size = Sec.size; 38 S.OriginalOffset = Sec.offset; 39 S.Align = Sec.align; 40 S.RelOff = Sec.reloff; 41 S.NReloc = Sec.nreloc; 42 S.Flags = Sec.flags; 43 S.Reserved1 = Sec.reserved1; 44 S.Reserved2 = Sec.reserved2; 45 S.Reserved3 = 0; 46 return S; 47 } 48 49 Section constructSection(const MachO::section &Sec, uint32_t Index) { 50 return constructSectionCommon(Sec, Index); 51 } 52 53 Section constructSection(const MachO::section_64 &Sec, uint32_t Index) { 54 Section S = constructSectionCommon(Sec, Index); 55 S.Reserved3 = Sec.reserved3; 56 return S; 57 } 58 59 template <typename SectionType, typename SegmentType> 60 Expected<std::vector<std::unique_ptr<Section>>> static extractSections( 61 const object::MachOObjectFile::LoadCommandInfo &LoadCmd, 62 const object::MachOObjectFile &MachOObj, uint32_t &NextSectionIndex) { 63 std::vector<std::unique_ptr<Section>> Sections; 64 for (auto Curr = reinterpret_cast<const SectionType *>(LoadCmd.Ptr + 65 sizeof(SegmentType)), 66 End = reinterpret_cast<const SectionType *>(LoadCmd.Ptr + 67 LoadCmd.C.cmdsize); 68 Curr < End; ++Curr) { 69 SectionType Sec; 70 memcpy((void *)&Sec, reinterpret_cast<const char *>(Curr), 71 sizeof(SectionType)); 72 73 if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) 74 MachO::swapStruct(Sec); 75 76 Sections.push_back( 77 std::make_unique<Section>(constructSection(Sec, NextSectionIndex))); 78 79 Section &S = *Sections.back(); 80 81 Expected<object::SectionRef> SecRef = 82 MachOObj.getSection(NextSectionIndex++); 83 if (!SecRef) 84 return SecRef.takeError(); 85 86 Expected<ArrayRef<uint8_t>> Data = 87 MachOObj.getSectionContents(SecRef->getRawDataRefImpl()); 88 if (!Data) 89 return Data.takeError(); 90 91 S.Content = 92 StringRef(reinterpret_cast<const char *>(Data->data()), Data->size()); 93 94 const uint32_t CPUType = MachOObj.getHeader().cputype; 95 S.Relocations.reserve(S.NReloc); 96 for (auto RI = MachOObj.section_rel_begin(SecRef->getRawDataRefImpl()), 97 RE = MachOObj.section_rel_end(SecRef->getRawDataRefImpl()); 98 RI != RE; ++RI) { 99 RelocationInfo R; 100 R.Symbol = nullptr; // We'll fill this field later. 101 R.Info = MachOObj.getRelocation(RI->getRawDataRefImpl()); 102 R.Scattered = MachOObj.isRelocationScattered(R.Info); 103 unsigned Type = MachOObj.getAnyRelocationType(R.Info); 104 // TODO Support CPU_TYPE_ARM. 105 R.IsAddend = !R.Scattered && (CPUType == MachO::CPU_TYPE_ARM64 && 106 Type == MachO::ARM64_RELOC_ADDEND); 107 R.Extern = !R.Scattered && MachOObj.getPlainRelocationExternal(R.Info); 108 S.Relocations.push_back(R); 109 } 110 111 assert(S.NReloc == S.Relocations.size() && 112 "Incorrect number of relocations"); 113 } 114 return std::move(Sections); 115 } 116 117 Error MachOReader::readLoadCommands(Object &O) const { 118 // For MachO sections indices start from 1. 119 uint32_t NextSectionIndex = 1; 120 static constexpr char TextSegmentName[] = "__TEXT"; 121 for (auto LoadCmd : MachOObj.load_commands()) { 122 LoadCommand LC; 123 switch (LoadCmd.C.cmd) { 124 case MachO::LC_CODE_SIGNATURE: 125 O.CodeSignatureCommandIndex = O.LoadCommands.size(); 126 break; 127 case MachO::LC_SEGMENT: 128 // LoadCmd.Ptr might not be aligned temporarily as 129 // MachO::segment_command requires, but the segname char pointer do not 130 // have alignment restrictions. 131 if (StringRef(reinterpret_cast<const char *>( 132 LoadCmd.Ptr + offsetof(MachO::segment_command, segname))) == 133 TextSegmentName) 134 O.TextSegmentCommandIndex = O.LoadCommands.size(); 135 136 if (Expected<std::vector<std::unique_ptr<Section>>> Sections = 137 extractSections<MachO::section, MachO::segment_command>( 138 LoadCmd, MachOObj, NextSectionIndex)) 139 LC.Sections = std::move(*Sections); 140 else 141 return Sections.takeError(); 142 break; 143 case MachO::LC_SEGMENT_64: 144 // LoadCmd.Ptr might not be aligned temporarily as 145 // MachO::segment_command_64 requires, but the segname char pointer do 146 // not have alignment restrictions. 147 if (StringRef(reinterpret_cast<const char *>( 148 LoadCmd.Ptr + offsetof(MachO::segment_command_64, segname))) == 149 TextSegmentName) 150 O.TextSegmentCommandIndex = O.LoadCommands.size(); 151 152 if (Expected<std::vector<std::unique_ptr<Section>>> Sections = 153 extractSections<MachO::section_64, MachO::segment_command_64>( 154 LoadCmd, MachOObj, NextSectionIndex)) 155 LC.Sections = std::move(*Sections); 156 else 157 return Sections.takeError(); 158 break; 159 case MachO::LC_SYMTAB: 160 O.SymTabCommandIndex = O.LoadCommands.size(); 161 break; 162 case MachO::LC_DYSYMTAB: 163 O.DySymTabCommandIndex = O.LoadCommands.size(); 164 break; 165 case MachO::LC_DYLD_INFO: 166 case MachO::LC_DYLD_INFO_ONLY: 167 O.DyLdInfoCommandIndex = O.LoadCommands.size(); 168 break; 169 case MachO::LC_DATA_IN_CODE: 170 O.DataInCodeCommandIndex = O.LoadCommands.size(); 171 break; 172 case MachO::LC_LINKER_OPTIMIZATION_HINT: 173 O.LinkerOptimizationHintCommandIndex = O.LoadCommands.size(); 174 break; 175 case MachO::LC_FUNCTION_STARTS: 176 O.FunctionStartsCommandIndex = O.LoadCommands.size(); 177 break; 178 case MachO::LC_DYLIB_CODE_SIGN_DRS: 179 O.DylibCodeSignDRsIndex = O.LoadCommands.size(); 180 break; 181 case MachO::LC_DYLD_EXPORTS_TRIE: 182 O.ExportsTrieCommandIndex = O.LoadCommands.size(); 183 break; 184 case MachO::LC_DYLD_CHAINED_FIXUPS: 185 O.ChainedFixupsCommandIndex = O.LoadCommands.size(); 186 break; 187 } 188 #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ 189 case MachO::LCName: \ 190 memcpy((void *)&(LC.MachOLoadCommand.LCStruct##_data), LoadCmd.Ptr, \ 191 sizeof(MachO::LCStruct)); \ 192 if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) \ 193 MachO::swapStruct(LC.MachOLoadCommand.LCStruct##_data); \ 194 if (LoadCmd.C.cmdsize > sizeof(MachO::LCStruct)) \ 195 LC.Payload = ArrayRef<uint8_t>( \ 196 reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) + \ 197 sizeof(MachO::LCStruct), \ 198 LoadCmd.C.cmdsize - sizeof(MachO::LCStruct)); \ 199 break; 200 201 switch (LoadCmd.C.cmd) { 202 default: 203 memcpy((void *)&(LC.MachOLoadCommand.load_command_data), LoadCmd.Ptr, 204 sizeof(MachO::load_command)); 205 if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) 206 MachO::swapStruct(LC.MachOLoadCommand.load_command_data); 207 if (LoadCmd.C.cmdsize > sizeof(MachO::load_command)) 208 LC.Payload = ArrayRef<uint8_t>( 209 reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) + 210 sizeof(MachO::load_command), 211 LoadCmd.C.cmdsize - sizeof(MachO::load_command)); 212 break; 213 #include "llvm/BinaryFormat/MachO.def" 214 } 215 O.LoadCommands.push_back(std::move(LC)); 216 } 217 return Error::success(); 218 } 219 220 template <typename nlist_t> 221 SymbolEntry constructSymbolEntry(StringRef StrTable, const nlist_t &nlist) { 222 assert(nlist.n_strx < StrTable.size() && 223 "n_strx exceeds the size of the string table"); 224 SymbolEntry SE; 225 SE.Name = StringRef(StrTable.data() + nlist.n_strx).str(); 226 SE.n_type = nlist.n_type; 227 SE.n_sect = nlist.n_sect; 228 SE.n_desc = nlist.n_desc; 229 SE.n_value = nlist.n_value; 230 return SE; 231 } 232 233 void MachOReader::readSymbolTable(Object &O) const { 234 StringRef StrTable = MachOObj.getStringTableData(); 235 for (auto Symbol : MachOObj.symbols()) { 236 SymbolEntry SE = 237 (MachOObj.is64Bit() 238 ? constructSymbolEntry(StrTable, MachOObj.getSymbol64TableEntry( 239 Symbol.getRawDataRefImpl())) 240 : constructSymbolEntry(StrTable, MachOObj.getSymbolTableEntry( 241 Symbol.getRawDataRefImpl()))); 242 243 O.SymTable.Symbols.push_back(std::make_unique<SymbolEntry>(SE)); 244 } 245 } 246 247 void MachOReader::setSymbolInRelocationInfo(Object &O) const { 248 std::vector<const Section *> Sections; 249 for (auto &LC : O.LoadCommands) 250 for (std::unique_ptr<Section> &Sec : LC.Sections) 251 Sections.push_back(Sec.get()); 252 253 for (LoadCommand &LC : O.LoadCommands) 254 for (std::unique_ptr<Section> &Sec : LC.Sections) 255 for (auto &Reloc : Sec->Relocations) 256 if (!Reloc.Scattered && !Reloc.IsAddend) { 257 const uint32_t SymbolNum = 258 Reloc.getPlainRelocationSymbolNum(MachOObj.isLittleEndian()); 259 if (Reloc.Extern) { 260 Reloc.Symbol = O.SymTable.getSymbolByIndex(SymbolNum); 261 } else { 262 // FIXME: Refactor error handling in MachOReader and report an error 263 // if we encounter an invalid relocation. 264 assert(SymbolNum >= 1 && SymbolNum <= Sections.size() && 265 "Invalid section index."); 266 Reloc.Sec = Sections[SymbolNum - 1]; 267 } 268 } 269 } 270 271 void MachOReader::readRebaseInfo(Object &O) const { 272 O.Rebases.Opcodes = MachOObj.getDyldInfoRebaseOpcodes(); 273 } 274 275 void MachOReader::readBindInfo(Object &O) const { 276 O.Binds.Opcodes = MachOObj.getDyldInfoBindOpcodes(); 277 } 278 279 void MachOReader::readWeakBindInfo(Object &O) const { 280 O.WeakBinds.Opcodes = MachOObj.getDyldInfoWeakBindOpcodes(); 281 } 282 283 void MachOReader::readLazyBindInfo(Object &O) const { 284 O.LazyBinds.Opcodes = MachOObj.getDyldInfoLazyBindOpcodes(); 285 } 286 287 void MachOReader::readExportInfo(Object &O) const { 288 // This information can be in LC_DYLD_INFO or in LC_DYLD_EXPORTS_TRIE 289 ArrayRef<uint8_t> Trie = MachOObj.getDyldInfoExportsTrie(); 290 if (Trie.empty()) 291 Trie = MachOObj.getDyldExportsTrie(); 292 O.Exports.Trie = Trie; 293 } 294 295 void MachOReader::readLinkData(Object &O, std::optional<size_t> LCIndex, 296 LinkData &LD) const { 297 if (!LCIndex) 298 return; 299 const MachO::linkedit_data_command &LC = 300 O.LoadCommands[*LCIndex].MachOLoadCommand.linkedit_data_command_data; 301 LD.Data = 302 arrayRefFromStringRef(MachOObj.getData().substr(LC.dataoff, LC.datasize)); 303 } 304 305 void MachOReader::readDataInCodeData(Object &O) const { 306 return readLinkData(O, O.DataInCodeCommandIndex, O.DataInCode); 307 } 308 309 void MachOReader::readLinkerOptimizationHint(Object &O) const { 310 return readLinkData(O, O.LinkerOptimizationHintCommandIndex, 311 O.LinkerOptimizationHint); 312 } 313 314 void MachOReader::readFunctionStartsData(Object &O) const { 315 return readLinkData(O, O.FunctionStartsCommandIndex, O.FunctionStarts); 316 } 317 318 void MachOReader::readDylibCodeSignDRs(Object &O) const { 319 return readLinkData(O, O.DylibCodeSignDRsIndex, O.DylibCodeSignDRs); 320 } 321 322 void MachOReader::readExportsTrie(Object &O) const { 323 return readLinkData(O, O.ExportsTrieCommandIndex, O.ExportsTrie); 324 } 325 326 void MachOReader::readChainedFixups(Object &O) const { 327 return readLinkData(O, O.ChainedFixupsCommandIndex, O.ChainedFixups); 328 } 329 330 void MachOReader::readIndirectSymbolTable(Object &O) const { 331 MachO::dysymtab_command DySymTab = MachOObj.getDysymtabLoadCommand(); 332 constexpr uint32_t AbsOrLocalMask = 333 MachO::INDIRECT_SYMBOL_LOCAL | MachO::INDIRECT_SYMBOL_ABS; 334 for (uint32_t i = 0; i < DySymTab.nindirectsyms; ++i) { 335 uint32_t Index = MachOObj.getIndirectSymbolTableEntry(DySymTab, i); 336 if ((Index & AbsOrLocalMask) != 0) 337 O.IndirectSymTable.Symbols.emplace_back(Index, std::nullopt); 338 else 339 O.IndirectSymTable.Symbols.emplace_back( 340 Index, O.SymTable.getSymbolByIndex(Index)); 341 } 342 } 343 344 void MachOReader::readSwiftVersion(Object &O) const { 345 struct ObjCImageInfo { 346 uint32_t Version; 347 uint32_t Flags; 348 } ImageInfo; 349 350 for (const LoadCommand &LC : O.LoadCommands) 351 for (const std::unique_ptr<Section> &Sec : LC.Sections) 352 if (Sec->Sectname == "__objc_imageinfo" && 353 (Sec->Segname == "__DATA" || Sec->Segname == "__DATA_CONST" || 354 Sec->Segname == "__DATA_DIRTY") && 355 Sec->Content.size() >= sizeof(ObjCImageInfo)) { 356 memcpy(&ImageInfo, Sec->Content.data(), sizeof(ObjCImageInfo)); 357 if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) { 358 sys::swapByteOrder(ImageInfo.Version); 359 sys::swapByteOrder(ImageInfo.Flags); 360 } 361 O.SwiftVersion = (ImageInfo.Flags >> 8) & 0xff; 362 return; 363 } 364 } 365 366 Expected<std::unique_ptr<Object>> MachOReader::create() const { 367 auto Obj = std::make_unique<Object>(); 368 readHeader(*Obj); 369 if (Error E = readLoadCommands(*Obj)) 370 return std::move(E); 371 readSymbolTable(*Obj); 372 setSymbolInRelocationInfo(*Obj); 373 readRebaseInfo(*Obj); 374 readBindInfo(*Obj); 375 readWeakBindInfo(*Obj); 376 readLazyBindInfo(*Obj); 377 readExportInfo(*Obj); 378 readDataInCodeData(*Obj); 379 readLinkerOptimizationHint(*Obj); 380 readFunctionStartsData(*Obj); 381 readDylibCodeSignDRs(*Obj); 382 readExportsTrie(*Obj); 383 readChainedFixups(*Obj); 384 readIndirectSymbolTable(*Obj); 385 readSwiftVersion(*Obj); 386 return std::move(Obj); 387 } 388