1 //===- MachOReader.cpp ------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "MachOReader.h" 10 #include "MachOObject.h" 11 #include "llvm/BinaryFormat/MachO.h" 12 #include "llvm/Object/MachO.h" 13 #include "llvm/Support/Errc.h" 14 #include <memory> 15 16 using namespace llvm; 17 using namespace llvm::objcopy; 18 using namespace llvm::objcopy::macho; 19 20 void MachOReader::readHeader(Object &O) const { 21 O.Header.Magic = MachOObj.getHeader().magic; 22 O.Header.CPUType = MachOObj.getHeader().cputype; 23 O.Header.CPUSubType = MachOObj.getHeader().cpusubtype; 24 O.Header.FileType = MachOObj.getHeader().filetype; 25 O.Header.NCmds = MachOObj.getHeader().ncmds; 26 O.Header.SizeOfCmds = MachOObj.getHeader().sizeofcmds; 27 O.Header.Flags = MachOObj.getHeader().flags; 28 } 29 30 template <typename SectionType> 31 static Section constructSectionCommon(const SectionType &Sec, uint32_t Index) { 32 StringRef SegName(Sec.segname, strnlen(Sec.segname, sizeof(Sec.segname))); 33 StringRef SectName(Sec.sectname, strnlen(Sec.sectname, sizeof(Sec.sectname))); 34 Section S(SegName, SectName); 35 S.Index = Index; 36 S.Addr = Sec.addr; 37 S.Size = Sec.size; 38 S.OriginalOffset = Sec.offset; 39 S.Align = Sec.align; 40 S.RelOff = Sec.reloff; 41 S.NReloc = Sec.nreloc; 42 S.Flags = Sec.flags; 43 S.Reserved1 = Sec.reserved1; 44 S.Reserved2 = Sec.reserved2; 45 S.Reserved3 = 0; 46 return S; 47 } 48 49 Section constructSection(const MachO::section &Sec, uint32_t Index) { 50 return constructSectionCommon(Sec, Index); 51 } 52 53 Section constructSection(const MachO::section_64 &Sec, uint32_t Index) { 54 Section S = constructSectionCommon(Sec, Index); 55 S.Reserved3 = Sec.reserved3; 56 return S; 57 } 58 59 template <typename SectionType, typename SegmentType> 60 Expected<std::vector<std::unique_ptr<Section>>> static extractSections( 61 const object::MachOObjectFile::LoadCommandInfo &LoadCmd, 62 const object::MachOObjectFile &MachOObj, uint32_t &NextSectionIndex) { 63 std::vector<std::unique_ptr<Section>> Sections; 64 for (auto Curr = reinterpret_cast<const SectionType *>(LoadCmd.Ptr + 65 sizeof(SegmentType)), 66 End = reinterpret_cast<const SectionType *>(LoadCmd.Ptr + 67 LoadCmd.C.cmdsize); 68 Curr < End; ++Curr) { 69 SectionType Sec; 70 memcpy((void *)&Sec, Curr, sizeof(SectionType)); 71 72 if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) 73 MachO::swapStruct(Sec); 74 75 Sections.push_back( 76 std::make_unique<Section>(constructSection(Sec, NextSectionIndex))); 77 78 Section &S = *Sections.back(); 79 80 Expected<object::SectionRef> SecRef = 81 MachOObj.getSection(NextSectionIndex++); 82 if (!SecRef) 83 return SecRef.takeError(); 84 85 Expected<ArrayRef<uint8_t>> Data = 86 MachOObj.getSectionContents(SecRef->getRawDataRefImpl()); 87 if (!Data) 88 return Data.takeError(); 89 90 S.Content = 91 StringRef(reinterpret_cast<const char *>(Data->data()), Data->size()); 92 93 const uint32_t CPUType = MachOObj.getHeader().cputype; 94 S.Relocations.reserve(S.NReloc); 95 for (auto RI = MachOObj.section_rel_begin(SecRef->getRawDataRefImpl()), 96 RE = MachOObj.section_rel_end(SecRef->getRawDataRefImpl()); 97 RI != RE; ++RI) { 98 RelocationInfo R; 99 R.Symbol = nullptr; // We'll fill this field later. 100 R.Info = MachOObj.getRelocation(RI->getRawDataRefImpl()); 101 R.Scattered = MachOObj.isRelocationScattered(R.Info); 102 unsigned Type = MachOObj.getAnyRelocationType(R.Info); 103 // TODO Support CPU_TYPE_ARM. 104 R.IsAddend = !R.Scattered && (CPUType == MachO::CPU_TYPE_ARM64 && 105 Type == MachO::ARM64_RELOC_ADDEND); 106 R.Extern = !R.Scattered && MachOObj.getPlainRelocationExternal(R.Info); 107 S.Relocations.push_back(R); 108 } 109 110 assert(S.NReloc == S.Relocations.size() && 111 "Incorrect number of relocations"); 112 } 113 return std::move(Sections); 114 } 115 116 Error MachOReader::readLoadCommands(Object &O) const { 117 // For MachO sections indices start from 1. 118 uint32_t NextSectionIndex = 1; 119 static constexpr char TextSegmentName[] = "__TEXT"; 120 for (auto LoadCmd : MachOObj.load_commands()) { 121 LoadCommand LC; 122 switch (LoadCmd.C.cmd) { 123 case MachO::LC_CODE_SIGNATURE: 124 O.CodeSignatureCommandIndex = O.LoadCommands.size(); 125 break; 126 case MachO::LC_SEGMENT: 127 // LoadCmd.Ptr might not be aligned temporarily as 128 // MachO::segment_command requires, but the segname char pointer do not 129 // have alignment restrictions. 130 if (StringRef(reinterpret_cast<const char *>( 131 LoadCmd.Ptr + offsetof(MachO::segment_command, segname))) == 132 TextSegmentName) 133 O.TextSegmentCommandIndex = O.LoadCommands.size(); 134 135 if (Expected<std::vector<std::unique_ptr<Section>>> Sections = 136 extractSections<MachO::section, MachO::segment_command>( 137 LoadCmd, MachOObj, NextSectionIndex)) 138 LC.Sections = std::move(*Sections); 139 else 140 return Sections.takeError(); 141 break; 142 case MachO::LC_SEGMENT_64: 143 // LoadCmd.Ptr might not be aligned temporarily as 144 // MachO::segment_command_64 requires, but the segname char pointer do 145 // not have alignment restrictions. 146 if (StringRef(reinterpret_cast<const char *>( 147 LoadCmd.Ptr + offsetof(MachO::segment_command_64, segname))) == 148 TextSegmentName) 149 O.TextSegmentCommandIndex = O.LoadCommands.size(); 150 151 if (Expected<std::vector<std::unique_ptr<Section>>> Sections = 152 extractSections<MachO::section_64, MachO::segment_command_64>( 153 LoadCmd, MachOObj, NextSectionIndex)) 154 LC.Sections = std::move(*Sections); 155 else 156 return Sections.takeError(); 157 break; 158 case MachO::LC_SYMTAB: 159 O.SymTabCommandIndex = O.LoadCommands.size(); 160 break; 161 case MachO::LC_DYSYMTAB: 162 O.DySymTabCommandIndex = O.LoadCommands.size(); 163 break; 164 case MachO::LC_DYLD_INFO: 165 case MachO::LC_DYLD_INFO_ONLY: 166 O.DyLdInfoCommandIndex = O.LoadCommands.size(); 167 break; 168 case MachO::LC_DATA_IN_CODE: 169 O.DataInCodeCommandIndex = O.LoadCommands.size(); 170 break; 171 case MachO::LC_LINKER_OPTIMIZATION_HINT: 172 O.LinkerOptimizationHintCommandIndex = O.LoadCommands.size(); 173 break; 174 case MachO::LC_FUNCTION_STARTS: 175 O.FunctionStartsCommandIndex = O.LoadCommands.size(); 176 break; 177 case MachO::LC_DYLIB_CODE_SIGN_DRS: 178 O.DylibCodeSignDRsIndex = O.LoadCommands.size(); 179 break; 180 case MachO::LC_DYLD_EXPORTS_TRIE: 181 O.ExportsTrieCommandIndex = O.LoadCommands.size(); 182 break; 183 case MachO::LC_DYLD_CHAINED_FIXUPS: 184 O.ChainedFixupsCommandIndex = O.LoadCommands.size(); 185 break; 186 } 187 #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ 188 case MachO::LCName: \ 189 memcpy((void *)&(LC.MachOLoadCommand.LCStruct##_data), LoadCmd.Ptr, \ 190 sizeof(MachO::LCStruct)); \ 191 if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) \ 192 MachO::swapStruct(LC.MachOLoadCommand.LCStruct##_data); \ 193 if (LoadCmd.C.cmdsize > sizeof(MachO::LCStruct)) \ 194 LC.Payload = ArrayRef<uint8_t>( \ 195 reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) + \ 196 sizeof(MachO::LCStruct), \ 197 LoadCmd.C.cmdsize - sizeof(MachO::LCStruct)); \ 198 break; 199 200 switch (LoadCmd.C.cmd) { 201 default: 202 memcpy((void *)&(LC.MachOLoadCommand.load_command_data), LoadCmd.Ptr, 203 sizeof(MachO::load_command)); 204 if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) 205 MachO::swapStruct(LC.MachOLoadCommand.load_command_data); 206 if (LoadCmd.C.cmdsize > sizeof(MachO::load_command)) 207 LC.Payload = ArrayRef<uint8_t>( 208 reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) + 209 sizeof(MachO::load_command), 210 LoadCmd.C.cmdsize - sizeof(MachO::load_command)); 211 break; 212 #include "llvm/BinaryFormat/MachO.def" 213 } 214 O.LoadCommands.push_back(std::move(LC)); 215 } 216 return Error::success(); 217 } 218 219 template <typename nlist_t> 220 SymbolEntry constructSymbolEntry(StringRef StrTable, const nlist_t &nlist) { 221 assert(nlist.n_strx < StrTable.size() && 222 "n_strx exceeds the size of the string table"); 223 SymbolEntry SE; 224 SE.Name = StringRef(StrTable.data() + nlist.n_strx).str(); 225 SE.n_type = nlist.n_type; 226 SE.n_sect = nlist.n_sect; 227 SE.n_desc = nlist.n_desc; 228 SE.n_value = nlist.n_value; 229 return SE; 230 } 231 232 void MachOReader::readSymbolTable(Object &O) const { 233 StringRef StrTable = MachOObj.getStringTableData(); 234 for (auto Symbol : MachOObj.symbols()) { 235 SymbolEntry SE = 236 (MachOObj.is64Bit() 237 ? constructSymbolEntry(StrTable, MachOObj.getSymbol64TableEntry( 238 Symbol.getRawDataRefImpl())) 239 : constructSymbolEntry(StrTable, MachOObj.getSymbolTableEntry( 240 Symbol.getRawDataRefImpl()))); 241 242 O.SymTable.Symbols.push_back(std::make_unique<SymbolEntry>(SE)); 243 } 244 } 245 246 void MachOReader::setSymbolInRelocationInfo(Object &O) const { 247 std::vector<const Section *> Sections; 248 for (auto &LC : O.LoadCommands) 249 for (std::unique_ptr<Section> &Sec : LC.Sections) 250 Sections.push_back(Sec.get()); 251 252 for (LoadCommand &LC : O.LoadCommands) 253 for (std::unique_ptr<Section> &Sec : LC.Sections) 254 for (auto &Reloc : Sec->Relocations) 255 if (!Reloc.Scattered && !Reloc.IsAddend) { 256 const uint32_t SymbolNum = 257 Reloc.getPlainRelocationSymbolNum(MachOObj.isLittleEndian()); 258 if (Reloc.Extern) { 259 Reloc.Symbol = O.SymTable.getSymbolByIndex(SymbolNum); 260 } else { 261 // FIXME: Refactor error handling in MachOReader and report an error 262 // if we encounter an invalid relocation. 263 assert(SymbolNum >= 1 && SymbolNum <= Sections.size() && 264 "Invalid section index."); 265 Reloc.Sec = Sections[SymbolNum - 1]; 266 } 267 } 268 } 269 270 void MachOReader::readRebaseInfo(Object &O) const { 271 O.Rebases.Opcodes = MachOObj.getDyldInfoRebaseOpcodes(); 272 } 273 274 void MachOReader::readBindInfo(Object &O) const { 275 O.Binds.Opcodes = MachOObj.getDyldInfoBindOpcodes(); 276 } 277 278 void MachOReader::readWeakBindInfo(Object &O) const { 279 O.WeakBinds.Opcodes = MachOObj.getDyldInfoWeakBindOpcodes(); 280 } 281 282 void MachOReader::readLazyBindInfo(Object &O) const { 283 O.LazyBinds.Opcodes = MachOObj.getDyldInfoLazyBindOpcodes(); 284 } 285 286 void MachOReader::readExportInfo(Object &O) const { 287 // This information can be in LC_DYLD_INFO or in LC_DYLD_EXPORTS_TRIE 288 ArrayRef<uint8_t> Trie = MachOObj.getDyldInfoExportsTrie(); 289 if (Trie.empty()) 290 Trie = MachOObj.getDyldExportsTrie(); 291 O.Exports.Trie = Trie; 292 } 293 294 void MachOReader::readLinkData(Object &O, std::optional<size_t> LCIndex, 295 LinkData &LD) const { 296 if (!LCIndex) 297 return; 298 const MachO::linkedit_data_command &LC = 299 O.LoadCommands[*LCIndex].MachOLoadCommand.linkedit_data_command_data; 300 LD.Data = 301 arrayRefFromStringRef(MachOObj.getData().substr(LC.dataoff, LC.datasize)); 302 } 303 304 void MachOReader::readDataInCodeData(Object &O) const { 305 return readLinkData(O, O.DataInCodeCommandIndex, O.DataInCode); 306 } 307 308 void MachOReader::readLinkerOptimizationHint(Object &O) const { 309 return readLinkData(O, O.LinkerOptimizationHintCommandIndex, 310 O.LinkerOptimizationHint); 311 } 312 313 void MachOReader::readFunctionStartsData(Object &O) const { 314 return readLinkData(O, O.FunctionStartsCommandIndex, O.FunctionStarts); 315 } 316 317 void MachOReader::readDylibCodeSignDRs(Object &O) const { 318 return readLinkData(O, O.DylibCodeSignDRsIndex, O.DylibCodeSignDRs); 319 } 320 321 void MachOReader::readExportsTrie(Object &O) const { 322 return readLinkData(O, O.ExportsTrieCommandIndex, O.ExportsTrie); 323 } 324 325 void MachOReader::readChainedFixups(Object &O) const { 326 return readLinkData(O, O.ChainedFixupsCommandIndex, O.ChainedFixups); 327 } 328 329 void MachOReader::readIndirectSymbolTable(Object &O) const { 330 MachO::dysymtab_command DySymTab = MachOObj.getDysymtabLoadCommand(); 331 constexpr uint32_t AbsOrLocalMask = 332 MachO::INDIRECT_SYMBOL_LOCAL | MachO::INDIRECT_SYMBOL_ABS; 333 for (uint32_t i = 0; i < DySymTab.nindirectsyms; ++i) { 334 uint32_t Index = MachOObj.getIndirectSymbolTableEntry(DySymTab, i); 335 if ((Index & AbsOrLocalMask) != 0) 336 O.IndirectSymTable.Symbols.emplace_back(Index, std::nullopt); 337 else 338 O.IndirectSymTable.Symbols.emplace_back( 339 Index, O.SymTable.getSymbolByIndex(Index)); 340 } 341 } 342 343 void MachOReader::readSwiftVersion(Object &O) const { 344 struct ObjCImageInfo { 345 uint32_t Version; 346 uint32_t Flags; 347 } ImageInfo; 348 349 for (const LoadCommand &LC : O.LoadCommands) 350 for (const std::unique_ptr<Section> &Sec : LC.Sections) 351 if (Sec->Sectname == "__objc_imageinfo" && 352 (Sec->Segname == "__DATA" || Sec->Segname == "__DATA_CONST" || 353 Sec->Segname == "__DATA_DIRTY") && 354 Sec->Content.size() >= sizeof(ObjCImageInfo)) { 355 memcpy(&ImageInfo, Sec->Content.data(), sizeof(ObjCImageInfo)); 356 if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) { 357 sys::swapByteOrder(ImageInfo.Version); 358 sys::swapByteOrder(ImageInfo.Flags); 359 } 360 O.SwiftVersion = (ImageInfo.Flags >> 8) & 0xff; 361 return; 362 } 363 } 364 365 Expected<std::unique_ptr<Object>> MachOReader::create() const { 366 auto Obj = std::make_unique<Object>(); 367 readHeader(*Obj); 368 if (Error E = readLoadCommands(*Obj)) 369 return std::move(E); 370 readSymbolTable(*Obj); 371 setSymbolInRelocationInfo(*Obj); 372 readRebaseInfo(*Obj); 373 readBindInfo(*Obj); 374 readWeakBindInfo(*Obj); 375 readLazyBindInfo(*Obj); 376 readExportInfo(*Obj); 377 readDataInCodeData(*Obj); 378 readLinkerOptimizationHint(*Obj); 379 readFunctionStartsData(*Obj); 380 readDylibCodeSignDRs(*Obj); 381 readExportsTrie(*Obj); 382 readChainedFixups(*Obj); 383 readIndirectSymbolTable(*Obj); 384 readSwiftVersion(*Obj); 385 return std::move(Obj); 386 } 387