1 //===- MachOReader.cpp ------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "MachOReader.h" 10 #include "MachOObject.h" 11 #include "llvm/BinaryFormat/MachO.h" 12 #include "llvm/Object/MachO.h" 13 #include "llvm/Support/Errc.h" 14 #include <memory> 15 16 using namespace llvm; 17 using namespace llvm::objcopy; 18 using namespace llvm::objcopy::macho; 19 20 void MachOReader::readHeader(Object &O) const { 21 O.Header.Magic = MachOObj.getHeader().magic; 22 O.Header.CPUType = MachOObj.getHeader().cputype; 23 O.Header.CPUSubType = MachOObj.getHeader().cpusubtype; 24 O.Header.FileType = MachOObj.getHeader().filetype; 25 O.Header.NCmds = MachOObj.getHeader().ncmds; 26 O.Header.SizeOfCmds = MachOObj.getHeader().sizeofcmds; 27 O.Header.Flags = MachOObj.getHeader().flags; 28 } 29 30 template <typename SectionType> 31 static Section constructSectionCommon(const SectionType &Sec, uint32_t Index) { 32 StringRef SegName(Sec.segname, strnlen(Sec.segname, sizeof(Sec.segname))); 33 StringRef SectName(Sec.sectname, strnlen(Sec.sectname, sizeof(Sec.sectname))); 34 Section S(SegName, SectName); 35 S.Index = Index; 36 S.Addr = Sec.addr; 37 S.Size = Sec.size; 38 S.OriginalOffset = Sec.offset; 39 S.Align = Sec.align; 40 S.RelOff = Sec.reloff; 41 S.NReloc = Sec.nreloc; 42 S.Flags = Sec.flags; 43 S.Reserved1 = Sec.reserved1; 44 S.Reserved2 = Sec.reserved2; 45 S.Reserved3 = 0; 46 return S; 47 } 48 49 Section constructSection(const MachO::section &Sec, uint32_t Index) { 50 return constructSectionCommon(Sec, Index); 51 } 52 53 Section constructSection(const MachO::section_64 &Sec, uint32_t Index) { 54 Section S = constructSectionCommon(Sec, Index); 55 S.Reserved3 = Sec.reserved3; 56 return S; 57 } 58 59 template <typename SectionType, typename SegmentType> 60 Expected<std::vector<std::unique_ptr<Section>>> static extractSections( 61 const object::MachOObjectFile::LoadCommandInfo &LoadCmd, 62 const object::MachOObjectFile &MachOObj, uint32_t &NextSectionIndex) { 63 std::vector<std::unique_ptr<Section>> Sections; 64 for (auto Curr = reinterpret_cast<const SectionType *>(LoadCmd.Ptr + 65 sizeof(SegmentType)), 66 End = reinterpret_cast<const SectionType *>(LoadCmd.Ptr + 67 LoadCmd.C.cmdsize); 68 Curr < End; ++Curr) { 69 SectionType Sec; 70 memcpy((void *)&Sec, Curr, sizeof(SectionType)); 71 72 if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) 73 MachO::swapStruct(Sec); 74 75 Sections.push_back( 76 std::make_unique<Section>(constructSection(Sec, NextSectionIndex))); 77 78 Section &S = *Sections.back(); 79 80 Expected<object::SectionRef> SecRef = 81 MachOObj.getSection(NextSectionIndex++); 82 if (!SecRef) 83 return SecRef.takeError(); 84 85 Expected<ArrayRef<uint8_t>> Data = 86 MachOObj.getSectionContents(SecRef->getRawDataRefImpl()); 87 if (!Data) 88 return Data.takeError(); 89 90 S.Content = 91 StringRef(reinterpret_cast<const char *>(Data->data()), Data->size()); 92 93 const uint32_t CPUType = MachOObj.getHeader().cputype; 94 S.Relocations.reserve(S.NReloc); 95 for (auto RI = MachOObj.section_rel_begin(SecRef->getRawDataRefImpl()), 96 RE = MachOObj.section_rel_end(SecRef->getRawDataRefImpl()); 97 RI != RE; ++RI) { 98 RelocationInfo R; 99 R.Symbol = nullptr; // We'll fill this field later. 100 R.Info = MachOObj.getRelocation(RI->getRawDataRefImpl()); 101 R.Scattered = MachOObj.isRelocationScattered(R.Info); 102 unsigned Type = MachOObj.getAnyRelocationType(R.Info); 103 // TODO Support CPU_TYPE_ARM. 104 R.IsAddend = !R.Scattered && (CPUType == MachO::CPU_TYPE_ARM64 && 105 Type == MachO::ARM64_RELOC_ADDEND); 106 R.Extern = !R.Scattered && MachOObj.getPlainRelocationExternal(R.Info); 107 S.Relocations.push_back(R); 108 } 109 110 assert(S.NReloc == S.Relocations.size() && 111 "Incorrect number of relocations"); 112 } 113 return std::move(Sections); 114 } 115 116 Error MachOReader::readLoadCommands(Object &O) const { 117 // For MachO sections indices start from 1. 118 uint32_t NextSectionIndex = 1; 119 static constexpr char TextSegmentName[] = "__TEXT"; 120 for (auto LoadCmd : MachOObj.load_commands()) { 121 LoadCommand LC; 122 switch (LoadCmd.C.cmd) { 123 case MachO::LC_CODE_SIGNATURE: 124 O.CodeSignatureCommandIndex = O.LoadCommands.size(); 125 break; 126 case MachO::LC_SEGMENT: 127 // LoadCmd.Ptr might not be aligned temporarily as 128 // MachO::segment_command requires, but the segname char pointer do not 129 // have alignment restrictions. 130 if (StringRef(reinterpret_cast<const char *>( 131 LoadCmd.Ptr + offsetof(MachO::segment_command, segname))) == 132 TextSegmentName) 133 O.TextSegmentCommandIndex = O.LoadCommands.size(); 134 135 if (Expected<std::vector<std::unique_ptr<Section>>> Sections = 136 extractSections<MachO::section, MachO::segment_command>( 137 LoadCmd, MachOObj, NextSectionIndex)) 138 LC.Sections = std::move(*Sections); 139 else 140 return Sections.takeError(); 141 break; 142 case MachO::LC_SEGMENT_64: 143 // LoadCmd.Ptr might not be aligned temporarily as 144 // MachO::segment_command_64 requires, but the segname char pointer do 145 // not have alignment restrictions. 146 if (StringRef(reinterpret_cast<const char *>( 147 LoadCmd.Ptr + offsetof(MachO::segment_command_64, segname))) == 148 TextSegmentName) 149 O.TextSegmentCommandIndex = O.LoadCommands.size(); 150 151 if (Expected<std::vector<std::unique_ptr<Section>>> Sections = 152 extractSections<MachO::section_64, MachO::segment_command_64>( 153 LoadCmd, MachOObj, NextSectionIndex)) 154 LC.Sections = std::move(*Sections); 155 else 156 return Sections.takeError(); 157 break; 158 case MachO::LC_SYMTAB: 159 O.SymTabCommandIndex = O.LoadCommands.size(); 160 break; 161 case MachO::LC_DYSYMTAB: 162 O.DySymTabCommandIndex = O.LoadCommands.size(); 163 break; 164 case MachO::LC_DYLD_INFO: 165 case MachO::LC_DYLD_INFO_ONLY: 166 O.DyLdInfoCommandIndex = O.LoadCommands.size(); 167 break; 168 case MachO::LC_DATA_IN_CODE: 169 O.DataInCodeCommandIndex = O.LoadCommands.size(); 170 break; 171 case MachO::LC_LINKER_OPTIMIZATION_HINT: 172 O.LinkerOptimizationHintCommandIndex = O.LoadCommands.size(); 173 break; 174 case MachO::LC_FUNCTION_STARTS: 175 O.FunctionStartsCommandIndex = O.LoadCommands.size(); 176 break; 177 case MachO::LC_DYLD_EXPORTS_TRIE: 178 O.ExportsTrieCommandIndex = O.LoadCommands.size(); 179 break; 180 case MachO::LC_DYLD_CHAINED_FIXUPS: 181 O.ChainedFixupsCommandIndex = O.LoadCommands.size(); 182 break; 183 } 184 #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ 185 case MachO::LCName: \ 186 memcpy((void *)&(LC.MachOLoadCommand.LCStruct##_data), LoadCmd.Ptr, \ 187 sizeof(MachO::LCStruct)); \ 188 if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) \ 189 MachO::swapStruct(LC.MachOLoadCommand.LCStruct##_data); \ 190 if (LoadCmd.C.cmdsize > sizeof(MachO::LCStruct)) \ 191 LC.Payload = ArrayRef<uint8_t>( \ 192 reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) + \ 193 sizeof(MachO::LCStruct), \ 194 LoadCmd.C.cmdsize - sizeof(MachO::LCStruct)); \ 195 break; 196 197 switch (LoadCmd.C.cmd) { 198 default: 199 memcpy((void *)&(LC.MachOLoadCommand.load_command_data), LoadCmd.Ptr, 200 sizeof(MachO::load_command)); 201 if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) 202 MachO::swapStruct(LC.MachOLoadCommand.load_command_data); 203 if (LoadCmd.C.cmdsize > sizeof(MachO::load_command)) 204 LC.Payload = ArrayRef<uint8_t>( 205 reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) + 206 sizeof(MachO::load_command), 207 LoadCmd.C.cmdsize - sizeof(MachO::load_command)); 208 break; 209 #include "llvm/BinaryFormat/MachO.def" 210 } 211 O.LoadCommands.push_back(std::move(LC)); 212 } 213 return Error::success(); 214 } 215 216 template <typename nlist_t> 217 SymbolEntry constructSymbolEntry(StringRef StrTable, const nlist_t &nlist) { 218 assert(nlist.n_strx < StrTable.size() && 219 "n_strx exceeds the size of the string table"); 220 SymbolEntry SE; 221 SE.Name = StringRef(StrTable.data() + nlist.n_strx).str(); 222 SE.n_type = nlist.n_type; 223 SE.n_sect = nlist.n_sect; 224 SE.n_desc = nlist.n_desc; 225 SE.n_value = nlist.n_value; 226 return SE; 227 } 228 229 void MachOReader::readSymbolTable(Object &O) const { 230 StringRef StrTable = MachOObj.getStringTableData(); 231 for (auto Symbol : MachOObj.symbols()) { 232 SymbolEntry SE = 233 (MachOObj.is64Bit() 234 ? constructSymbolEntry(StrTable, MachOObj.getSymbol64TableEntry( 235 Symbol.getRawDataRefImpl())) 236 : constructSymbolEntry(StrTable, MachOObj.getSymbolTableEntry( 237 Symbol.getRawDataRefImpl()))); 238 239 O.SymTable.Symbols.push_back(std::make_unique<SymbolEntry>(SE)); 240 } 241 } 242 243 void MachOReader::setSymbolInRelocationInfo(Object &O) const { 244 std::vector<const Section *> Sections; 245 for (auto &LC : O.LoadCommands) 246 for (std::unique_ptr<Section> &Sec : LC.Sections) 247 Sections.push_back(Sec.get()); 248 249 for (LoadCommand &LC : O.LoadCommands) 250 for (std::unique_ptr<Section> &Sec : LC.Sections) 251 for (auto &Reloc : Sec->Relocations) 252 if (!Reloc.Scattered && !Reloc.IsAddend) { 253 const uint32_t SymbolNum = 254 Reloc.getPlainRelocationSymbolNum(MachOObj.isLittleEndian()); 255 if (Reloc.Extern) { 256 Reloc.Symbol = O.SymTable.getSymbolByIndex(SymbolNum); 257 } else { 258 // FIXME: Refactor error handling in MachOReader and report an error 259 // if we encounter an invalid relocation. 260 assert(SymbolNum >= 1 && SymbolNum <= Sections.size() && 261 "Invalid section index."); 262 Reloc.Sec = Sections[SymbolNum - 1]; 263 } 264 } 265 } 266 267 void MachOReader::readRebaseInfo(Object &O) const { 268 O.Rebases.Opcodes = MachOObj.getDyldInfoRebaseOpcodes(); 269 } 270 271 void MachOReader::readBindInfo(Object &O) const { 272 O.Binds.Opcodes = MachOObj.getDyldInfoBindOpcodes(); 273 } 274 275 void MachOReader::readWeakBindInfo(Object &O) const { 276 O.WeakBinds.Opcodes = MachOObj.getDyldInfoWeakBindOpcodes(); 277 } 278 279 void MachOReader::readLazyBindInfo(Object &O) const { 280 O.LazyBinds.Opcodes = MachOObj.getDyldInfoLazyBindOpcodes(); 281 } 282 283 void MachOReader::readExportInfo(Object &O) const { 284 O.Exports.Trie = MachOObj.getDyldInfoExportsTrie(); 285 } 286 287 void MachOReader::readLinkData(Object &O, Optional<size_t> LCIndex, 288 LinkData &LD) const { 289 if (!LCIndex) 290 return; 291 const MachO::linkedit_data_command &LC = 292 O.LoadCommands[*LCIndex].MachOLoadCommand.linkedit_data_command_data; 293 LD.Data = 294 arrayRefFromStringRef(MachOObj.getData().substr(LC.dataoff, LC.datasize)); 295 } 296 297 void MachOReader::readDataInCodeData(Object &O) const { 298 return readLinkData(O, O.DataInCodeCommandIndex, O.DataInCode); 299 } 300 301 void MachOReader::readLinkerOptimizationHint(Object &O) const { 302 return readLinkData(O, O.LinkerOptimizationHintCommandIndex, 303 O.LinkerOptimizationHint); 304 } 305 306 void MachOReader::readFunctionStartsData(Object &O) const { 307 return readLinkData(O, O.FunctionStartsCommandIndex, O.FunctionStarts); 308 } 309 310 void MachOReader::readExportsTrie(Object &O) const { 311 return readLinkData(O, O.ExportsTrieCommandIndex, O.ExportsTrie); 312 } 313 314 void MachOReader::readChainedFixups(Object &O) const { 315 return readLinkData(O, O.ChainedFixupsCommandIndex, O.ChainedFixups); 316 } 317 318 void MachOReader::readIndirectSymbolTable(Object &O) const { 319 MachO::dysymtab_command DySymTab = MachOObj.getDysymtabLoadCommand(); 320 constexpr uint32_t AbsOrLocalMask = 321 MachO::INDIRECT_SYMBOL_LOCAL | MachO::INDIRECT_SYMBOL_ABS; 322 for (uint32_t i = 0; i < DySymTab.nindirectsyms; ++i) { 323 uint32_t Index = MachOObj.getIndirectSymbolTableEntry(DySymTab, i); 324 if ((Index & AbsOrLocalMask) != 0) 325 O.IndirectSymTable.Symbols.emplace_back(Index, None); 326 else 327 O.IndirectSymTable.Symbols.emplace_back( 328 Index, O.SymTable.getSymbolByIndex(Index)); 329 } 330 } 331 332 void MachOReader::readSwiftVersion(Object &O) const { 333 struct ObjCImageInfo { 334 uint32_t Version; 335 uint32_t Flags; 336 } ImageInfo; 337 338 for (const LoadCommand &LC : O.LoadCommands) 339 for (const std::unique_ptr<Section> &Sec : LC.Sections) 340 if (Sec->Sectname == "__objc_imageinfo" && 341 (Sec->Segname == "__DATA" || Sec->Segname == "__DATA_CONST" || 342 Sec->Segname == "__DATA_DIRTY") && 343 Sec->Content.size() >= sizeof(ObjCImageInfo)) { 344 memcpy(&ImageInfo, Sec->Content.data(), sizeof(ObjCImageInfo)); 345 if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) { 346 sys::swapByteOrder(ImageInfo.Version); 347 sys::swapByteOrder(ImageInfo.Flags); 348 } 349 O.SwiftVersion = (ImageInfo.Flags >> 8) & 0xff; 350 return; 351 } 352 } 353 354 Expected<std::unique_ptr<Object>> MachOReader::create() const { 355 auto Obj = std::make_unique<Object>(); 356 readHeader(*Obj); 357 if (Error E = readLoadCommands(*Obj)) 358 return std::move(E); 359 readSymbolTable(*Obj); 360 setSymbolInRelocationInfo(*Obj); 361 readRebaseInfo(*Obj); 362 readBindInfo(*Obj); 363 readWeakBindInfo(*Obj); 364 readLazyBindInfo(*Obj); 365 readExportInfo(*Obj); 366 readDataInCodeData(*Obj); 367 readLinkerOptimizationHint(*Obj); 368 readFunctionStartsData(*Obj); 369 readExportsTrie(*Obj); 370 readChainedFixups(*Obj); 371 readIndirectSymbolTable(*Obj); 372 readSwiftVersion(*Obj); 373 return std::move(Obj); 374 } 375