1 //===- MachOReader.cpp ------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "MachOReader.h" 10 #include "MachOObject.h" 11 #include "llvm/BinaryFormat/MachO.h" 12 #include "llvm/Object/MachO.h" 13 #include "llvm/Support/Errc.h" 14 #include "llvm/Support/SystemZ/zOSSupport.h" 15 #include <memory> 16 17 using namespace llvm; 18 using namespace llvm::objcopy; 19 using namespace llvm::objcopy::macho; 20 21 void MachOReader::readHeader(Object &O) const { 22 O.Header.Magic = MachOObj.getHeader().magic; 23 O.Header.CPUType = MachOObj.getHeader().cputype; 24 O.Header.CPUSubType = MachOObj.getHeader().cpusubtype; 25 O.Header.FileType = MachOObj.getHeader().filetype; 26 O.Header.NCmds = MachOObj.getHeader().ncmds; 27 O.Header.SizeOfCmds = MachOObj.getHeader().sizeofcmds; 28 O.Header.Flags = MachOObj.getHeader().flags; 29 } 30 31 template <typename SectionType> 32 static Section constructSectionCommon(const SectionType &Sec, uint32_t Index) { 33 StringRef SegName(Sec.segname, strnlen(Sec.segname, sizeof(Sec.segname))); 34 StringRef SectName(Sec.sectname, strnlen(Sec.sectname, sizeof(Sec.sectname))); 35 Section S(SegName, SectName); 36 S.Index = Index; 37 S.Addr = Sec.addr; 38 S.Size = Sec.size; 39 S.OriginalOffset = Sec.offset; 40 S.Align = Sec.align; 41 S.RelOff = Sec.reloff; 42 S.NReloc = Sec.nreloc; 43 S.Flags = Sec.flags; 44 S.Reserved1 = Sec.reserved1; 45 S.Reserved2 = Sec.reserved2; 46 S.Reserved3 = 0; 47 return S; 48 } 49 50 Section constructSection(const MachO::section &Sec, uint32_t Index) { 51 return constructSectionCommon(Sec, Index); 52 } 53 54 Section constructSection(const MachO::section_64 &Sec, uint32_t Index) { 55 Section S = constructSectionCommon(Sec, Index); 56 S.Reserved3 = Sec.reserved3; 57 return S; 58 } 59 60 template <typename SectionType, typename SegmentType> 61 Expected<std::vector<std::unique_ptr<Section>>> static extractSections( 62 const object::MachOObjectFile::LoadCommandInfo &LoadCmd, 63 const object::MachOObjectFile &MachOObj, uint32_t &NextSectionIndex) { 64 std::vector<std::unique_ptr<Section>> Sections; 65 for (auto Curr = reinterpret_cast<const SectionType *>(LoadCmd.Ptr + 66 sizeof(SegmentType)), 67 End = reinterpret_cast<const SectionType *>(LoadCmd.Ptr + 68 LoadCmd.C.cmdsize); 69 Curr < End; ++Curr) { 70 SectionType Sec; 71 memcpy((void *)&Sec, reinterpret_cast<const char *>(Curr), 72 sizeof(SectionType)); 73 74 if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) 75 MachO::swapStruct(Sec); 76 77 Sections.push_back( 78 std::make_unique<Section>(constructSection(Sec, NextSectionIndex))); 79 80 Section &S = *Sections.back(); 81 82 Expected<object::SectionRef> SecRef = 83 MachOObj.getSection(NextSectionIndex++); 84 if (!SecRef) 85 return SecRef.takeError(); 86 87 Expected<ArrayRef<uint8_t>> Data = 88 MachOObj.getSectionContents(SecRef->getRawDataRefImpl()); 89 if (!Data) 90 return Data.takeError(); 91 92 S.Content = 93 StringRef(reinterpret_cast<const char *>(Data->data()), Data->size()); 94 95 const uint32_t CPUType = MachOObj.getHeader().cputype; 96 S.Relocations.reserve(S.NReloc); 97 for (auto RI = MachOObj.section_rel_begin(SecRef->getRawDataRefImpl()), 98 RE = MachOObj.section_rel_end(SecRef->getRawDataRefImpl()); 99 RI != RE; ++RI) { 100 RelocationInfo R; 101 R.Symbol = nullptr; // We'll fill this field later. 102 R.Info = MachOObj.getRelocation(RI->getRawDataRefImpl()); 103 R.Scattered = MachOObj.isRelocationScattered(R.Info); 104 unsigned Type = MachOObj.getAnyRelocationType(R.Info); 105 // TODO Support CPU_TYPE_ARM. 106 R.IsAddend = !R.Scattered && (CPUType == MachO::CPU_TYPE_ARM64 && 107 Type == MachO::ARM64_RELOC_ADDEND); 108 R.Extern = !R.Scattered && MachOObj.getPlainRelocationExternal(R.Info); 109 S.Relocations.push_back(R); 110 } 111 112 assert(S.NReloc == S.Relocations.size() && 113 "Incorrect number of relocations"); 114 } 115 return std::move(Sections); 116 } 117 118 Error MachOReader::readLoadCommands(Object &O) const { 119 // For MachO sections indices start from 1. 120 uint32_t NextSectionIndex = 1; 121 static constexpr char TextSegmentName[] = "__TEXT"; 122 for (auto LoadCmd : MachOObj.load_commands()) { 123 LoadCommand LC; 124 switch (LoadCmd.C.cmd) { 125 case MachO::LC_CODE_SIGNATURE: 126 O.CodeSignatureCommandIndex = O.LoadCommands.size(); 127 break; 128 case MachO::LC_SEGMENT: 129 // LoadCmd.Ptr might not be aligned temporarily as 130 // MachO::segment_command requires, but the segname char pointer do not 131 // have alignment restrictions. 132 if (StringRef(reinterpret_cast<const char *>( 133 LoadCmd.Ptr + offsetof(MachO::segment_command, segname))) == 134 TextSegmentName) 135 O.TextSegmentCommandIndex = O.LoadCommands.size(); 136 137 if (Expected<std::vector<std::unique_ptr<Section>>> Sections = 138 extractSections<MachO::section, MachO::segment_command>( 139 LoadCmd, MachOObj, NextSectionIndex)) 140 LC.Sections = std::move(*Sections); 141 else 142 return Sections.takeError(); 143 break; 144 case MachO::LC_SEGMENT_64: 145 // LoadCmd.Ptr might not be aligned temporarily as 146 // MachO::segment_command_64 requires, but the segname char pointer do 147 // not have alignment restrictions. 148 if (StringRef(reinterpret_cast<const char *>( 149 LoadCmd.Ptr + offsetof(MachO::segment_command_64, segname))) == 150 TextSegmentName) 151 O.TextSegmentCommandIndex = O.LoadCommands.size(); 152 153 if (Expected<std::vector<std::unique_ptr<Section>>> Sections = 154 extractSections<MachO::section_64, MachO::segment_command_64>( 155 LoadCmd, MachOObj, NextSectionIndex)) 156 LC.Sections = std::move(*Sections); 157 else 158 return Sections.takeError(); 159 break; 160 case MachO::LC_SYMTAB: 161 O.SymTabCommandIndex = O.LoadCommands.size(); 162 break; 163 case MachO::LC_DYSYMTAB: 164 O.DySymTabCommandIndex = O.LoadCommands.size(); 165 break; 166 case MachO::LC_DYLD_INFO: 167 case MachO::LC_DYLD_INFO_ONLY: 168 O.DyLdInfoCommandIndex = O.LoadCommands.size(); 169 break; 170 case MachO::LC_DATA_IN_CODE: 171 O.DataInCodeCommandIndex = O.LoadCommands.size(); 172 break; 173 case MachO::LC_LINKER_OPTIMIZATION_HINT: 174 O.LinkerOptimizationHintCommandIndex = O.LoadCommands.size(); 175 break; 176 case MachO::LC_FUNCTION_STARTS: 177 O.FunctionStartsCommandIndex = O.LoadCommands.size(); 178 break; 179 case MachO::LC_DYLIB_CODE_SIGN_DRS: 180 O.DylibCodeSignDRsIndex = O.LoadCommands.size(); 181 break; 182 case MachO::LC_DYLD_EXPORTS_TRIE: 183 O.ExportsTrieCommandIndex = O.LoadCommands.size(); 184 break; 185 case MachO::LC_DYLD_CHAINED_FIXUPS: 186 O.ChainedFixupsCommandIndex = O.LoadCommands.size(); 187 break; 188 } 189 #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ 190 case MachO::LCName: \ 191 memcpy((void *)&(LC.MachOLoadCommand.LCStruct##_data), LoadCmd.Ptr, \ 192 sizeof(MachO::LCStruct)); \ 193 if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) \ 194 MachO::swapStruct(LC.MachOLoadCommand.LCStruct##_data); \ 195 if (LoadCmd.C.cmdsize > sizeof(MachO::LCStruct)) \ 196 LC.Payload = ArrayRef<uint8_t>( \ 197 reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) + \ 198 sizeof(MachO::LCStruct), \ 199 LoadCmd.C.cmdsize - sizeof(MachO::LCStruct)); \ 200 break; 201 202 switch (LoadCmd.C.cmd) { 203 default: 204 memcpy((void *)&(LC.MachOLoadCommand.load_command_data), LoadCmd.Ptr, 205 sizeof(MachO::load_command)); 206 if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) 207 MachO::swapStruct(LC.MachOLoadCommand.load_command_data); 208 if (LoadCmd.C.cmdsize > sizeof(MachO::load_command)) 209 LC.Payload = ArrayRef<uint8_t>( 210 reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) + 211 sizeof(MachO::load_command), 212 LoadCmd.C.cmdsize - sizeof(MachO::load_command)); 213 break; 214 #include "llvm/BinaryFormat/MachO.def" 215 } 216 O.LoadCommands.push_back(std::move(LC)); 217 } 218 return Error::success(); 219 } 220 221 template <typename nlist_t> 222 SymbolEntry constructSymbolEntry(StringRef StrTable, const nlist_t &nlist) { 223 assert(nlist.n_strx < StrTable.size() && 224 "n_strx exceeds the size of the string table"); 225 SymbolEntry SE; 226 SE.Name = StringRef(StrTable.data() + nlist.n_strx).str(); 227 SE.n_type = nlist.n_type; 228 SE.n_sect = nlist.n_sect; 229 SE.n_desc = nlist.n_desc; 230 SE.n_value = nlist.n_value; 231 return SE; 232 } 233 234 void MachOReader::readSymbolTable(Object &O) const { 235 StringRef StrTable = MachOObj.getStringTableData(); 236 for (auto Symbol : MachOObj.symbols()) { 237 SymbolEntry SE = 238 (MachOObj.is64Bit() 239 ? constructSymbolEntry(StrTable, MachOObj.getSymbol64TableEntry( 240 Symbol.getRawDataRefImpl())) 241 : constructSymbolEntry(StrTable, MachOObj.getSymbolTableEntry( 242 Symbol.getRawDataRefImpl()))); 243 244 O.SymTable.Symbols.push_back(std::make_unique<SymbolEntry>(SE)); 245 } 246 } 247 248 void MachOReader::setSymbolInRelocationInfo(Object &O) const { 249 std::vector<const Section *> Sections; 250 for (auto &LC : O.LoadCommands) 251 for (std::unique_ptr<Section> &Sec : LC.Sections) 252 Sections.push_back(Sec.get()); 253 254 for (LoadCommand &LC : O.LoadCommands) 255 for (std::unique_ptr<Section> &Sec : LC.Sections) 256 for (auto &Reloc : Sec->Relocations) 257 if (!Reloc.Scattered && !Reloc.IsAddend) { 258 const uint32_t SymbolNum = 259 Reloc.getPlainRelocationSymbolNum(MachOObj.isLittleEndian()); 260 if (Reloc.Extern) { 261 Reloc.Symbol = O.SymTable.getSymbolByIndex(SymbolNum); 262 } else { 263 // FIXME: Refactor error handling in MachOReader and report an error 264 // if we encounter an invalid relocation. 265 assert(SymbolNum >= 1 && SymbolNum <= Sections.size() && 266 "Invalid section index."); 267 Reloc.Sec = Sections[SymbolNum - 1]; 268 } 269 } 270 } 271 272 void MachOReader::readRebaseInfo(Object &O) const { 273 O.Rebases.Opcodes = MachOObj.getDyldInfoRebaseOpcodes(); 274 } 275 276 void MachOReader::readBindInfo(Object &O) const { 277 O.Binds.Opcodes = MachOObj.getDyldInfoBindOpcodes(); 278 } 279 280 void MachOReader::readWeakBindInfo(Object &O) const { 281 O.WeakBinds.Opcodes = MachOObj.getDyldInfoWeakBindOpcodes(); 282 } 283 284 void MachOReader::readLazyBindInfo(Object &O) const { 285 O.LazyBinds.Opcodes = MachOObj.getDyldInfoLazyBindOpcodes(); 286 } 287 288 void MachOReader::readExportInfo(Object &O) const { 289 // This information can be in LC_DYLD_INFO or in LC_DYLD_EXPORTS_TRIE 290 ArrayRef<uint8_t> Trie = MachOObj.getDyldInfoExportsTrie(); 291 if (Trie.empty()) 292 Trie = MachOObj.getDyldExportsTrie(); 293 O.Exports.Trie = Trie; 294 } 295 296 void MachOReader::readLinkData(Object &O, std::optional<size_t> LCIndex, 297 LinkData &LD) const { 298 if (!LCIndex) 299 return; 300 const MachO::linkedit_data_command &LC = 301 O.LoadCommands[*LCIndex].MachOLoadCommand.linkedit_data_command_data; 302 LD.Data = 303 arrayRefFromStringRef(MachOObj.getData().substr(LC.dataoff, LC.datasize)); 304 } 305 306 void MachOReader::readDataInCodeData(Object &O) const { 307 return readLinkData(O, O.DataInCodeCommandIndex, O.DataInCode); 308 } 309 310 void MachOReader::readLinkerOptimizationHint(Object &O) const { 311 return readLinkData(O, O.LinkerOptimizationHintCommandIndex, 312 O.LinkerOptimizationHint); 313 } 314 315 void MachOReader::readFunctionStartsData(Object &O) const { 316 return readLinkData(O, O.FunctionStartsCommandIndex, O.FunctionStarts); 317 } 318 319 void MachOReader::readDylibCodeSignDRs(Object &O) const { 320 return readLinkData(O, O.DylibCodeSignDRsIndex, O.DylibCodeSignDRs); 321 } 322 323 void MachOReader::readExportsTrie(Object &O) const { 324 return readLinkData(O, O.ExportsTrieCommandIndex, O.ExportsTrie); 325 } 326 327 void MachOReader::readChainedFixups(Object &O) const { 328 return readLinkData(O, O.ChainedFixupsCommandIndex, O.ChainedFixups); 329 } 330 331 void MachOReader::readIndirectSymbolTable(Object &O) const { 332 MachO::dysymtab_command DySymTab = MachOObj.getDysymtabLoadCommand(); 333 constexpr uint32_t AbsOrLocalMask = 334 MachO::INDIRECT_SYMBOL_LOCAL | MachO::INDIRECT_SYMBOL_ABS; 335 for (uint32_t i = 0; i < DySymTab.nindirectsyms; ++i) { 336 uint32_t Index = MachOObj.getIndirectSymbolTableEntry(DySymTab, i); 337 if ((Index & AbsOrLocalMask) != 0) 338 O.IndirectSymTable.Symbols.emplace_back(Index, std::nullopt); 339 else 340 O.IndirectSymTable.Symbols.emplace_back( 341 Index, O.SymTable.getSymbolByIndex(Index)); 342 } 343 } 344 345 void MachOReader::readSwiftVersion(Object &O) const { 346 struct ObjCImageInfo { 347 uint32_t Version; 348 uint32_t Flags; 349 } ImageInfo; 350 351 for (const LoadCommand &LC : O.LoadCommands) 352 for (const std::unique_ptr<Section> &Sec : LC.Sections) 353 if (Sec->Sectname == "__objc_imageinfo" && 354 (Sec->Segname == "__DATA" || Sec->Segname == "__DATA_CONST" || 355 Sec->Segname == "__DATA_DIRTY") && 356 Sec->Content.size() >= sizeof(ObjCImageInfo)) { 357 memcpy(&ImageInfo, Sec->Content.data(), sizeof(ObjCImageInfo)); 358 if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) { 359 sys::swapByteOrder(ImageInfo.Version); 360 sys::swapByteOrder(ImageInfo.Flags); 361 } 362 O.SwiftVersion = (ImageInfo.Flags >> 8) & 0xff; 363 return; 364 } 365 } 366 367 Expected<std::unique_ptr<Object>> MachOReader::create() const { 368 auto Obj = std::make_unique<Object>(); 369 readHeader(*Obj); 370 if (Error E = readLoadCommands(*Obj)) 371 return std::move(E); 372 readSymbolTable(*Obj); 373 setSymbolInRelocationInfo(*Obj); 374 readRebaseInfo(*Obj); 375 readBindInfo(*Obj); 376 readWeakBindInfo(*Obj); 377 readLazyBindInfo(*Obj); 378 readExportInfo(*Obj); 379 readDataInCodeData(*Obj); 380 readLinkerOptimizationHint(*Obj); 381 readFunctionStartsData(*Obj); 382 readDylibCodeSignDRs(*Obj); 383 readExportsTrie(*Obj); 384 readChainedFixups(*Obj); 385 readIndirectSymbolTable(*Obj); 386 readSwiftVersion(*Obj); 387 return std::move(Obj); 388 } 389