1 //===- MachOLayoutBuilder.cpp -----------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "MachOLayoutBuilder.h" 10 #include "llvm/Support/Alignment.h" 11 #include "llvm/Support/Errc.h" 12 #include "llvm/Support/ErrorHandling.h" 13 14 using namespace llvm; 15 using namespace llvm::objcopy::macho; 16 17 StringTableBuilder::Kind 18 MachOLayoutBuilder::getStringTableBuilderKind(const Object &O, bool Is64Bit) { 19 if (O.Header.FileType == MachO::HeaderFileType::MH_OBJECT) 20 return Is64Bit ? StringTableBuilder::MachO64 : StringTableBuilder::MachO; 21 return Is64Bit ? StringTableBuilder::MachO64Linked 22 : StringTableBuilder::MachOLinked; 23 } 24 25 uint32_t MachOLayoutBuilder::computeSizeOfCmds() const { 26 uint32_t Size = 0; 27 for (const LoadCommand &LC : O.LoadCommands) { 28 const MachO::macho_load_command &MLC = LC.MachOLoadCommand; 29 auto cmd = MLC.load_command_data.cmd; 30 switch (cmd) { 31 case MachO::LC_SEGMENT: 32 Size += sizeof(MachO::segment_command) + 33 sizeof(MachO::section) * LC.Sections.size(); 34 continue; 35 case MachO::LC_SEGMENT_64: 36 Size += sizeof(MachO::segment_command_64) + 37 sizeof(MachO::section_64) * LC.Sections.size(); 38 continue; 39 } 40 41 switch (cmd) { 42 #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ 43 case MachO::LCName: \ 44 Size += sizeof(MachO::LCStruct) + LC.Payload.size(); \ 45 break; 46 #include "llvm/BinaryFormat/MachO.def" 47 #undef HANDLE_LOAD_COMMAND 48 } 49 } 50 51 return Size; 52 } 53 54 void MachOLayoutBuilder::constructStringTable() { 55 for (std::unique_ptr<SymbolEntry> &Sym : O.SymTable.Symbols) 56 StrTableBuilder.add(Sym->Name); 57 StrTableBuilder.finalize(); 58 } 59 60 void MachOLayoutBuilder::updateSymbolIndexes() { 61 uint32_t Index = 0; 62 for (auto &Symbol : O.SymTable.Symbols) 63 Symbol->Index = Index++; 64 } 65 66 // Updates the index and the number of local/external/undefined symbols. 67 void MachOLayoutBuilder::updateDySymTab(MachO::macho_load_command &MLC) { 68 assert(MLC.load_command_data.cmd == MachO::LC_DYSYMTAB); 69 // Make sure that nlist entries in the symbol table are sorted by the those 70 // types. The order is: local < defined external < undefined external. 71 assert(llvm::is_sorted(O.SymTable.Symbols, 72 [](const std::unique_ptr<SymbolEntry> &A, 73 const std::unique_ptr<SymbolEntry> &B) { 74 bool AL = A->isLocalSymbol(), 75 BL = B->isLocalSymbol(); 76 if (AL != BL) 77 return AL; 78 return !AL && !A->isUndefinedSymbol() && 79 B->isUndefinedSymbol(); 80 }) && 81 "Symbols are not sorted by their types."); 82 83 uint32_t NumLocalSymbols = 0; 84 auto Iter = O.SymTable.Symbols.begin(); 85 auto End = O.SymTable.Symbols.end(); 86 for (; Iter != End; ++Iter) { 87 if ((*Iter)->isExternalSymbol()) 88 break; 89 90 ++NumLocalSymbols; 91 } 92 93 uint32_t NumExtDefSymbols = 0; 94 for (; Iter != End; ++Iter) { 95 if ((*Iter)->isUndefinedSymbol()) 96 break; 97 98 ++NumExtDefSymbols; 99 } 100 101 MLC.dysymtab_command_data.ilocalsym = 0; 102 MLC.dysymtab_command_data.nlocalsym = NumLocalSymbols; 103 MLC.dysymtab_command_data.iextdefsym = NumLocalSymbols; 104 MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols; 105 MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols; 106 MLC.dysymtab_command_data.nundefsym = 107 O.SymTable.Symbols.size() - (NumLocalSymbols + NumExtDefSymbols); 108 } 109 110 // Recomputes and updates offset and size fields in load commands and sections 111 // since they could be modified. 112 uint64_t MachOLayoutBuilder::layoutSegments() { 113 auto HeaderSize = 114 Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); 115 const bool IsObjectFile = 116 O.Header.FileType == MachO::HeaderFileType::MH_OBJECT; 117 uint64_t Offset = IsObjectFile ? (HeaderSize + O.Header.SizeOfCmds) : 0; 118 for (LoadCommand &LC : O.LoadCommands) { 119 auto &MLC = LC.MachOLoadCommand; 120 StringRef Segname; 121 uint64_t SegmentVmAddr; 122 uint64_t SegmentVmSize; 123 switch (MLC.load_command_data.cmd) { 124 case MachO::LC_SEGMENT: 125 SegmentVmAddr = MLC.segment_command_data.vmaddr; 126 SegmentVmSize = MLC.segment_command_data.vmsize; 127 Segname = StringRef(MLC.segment_command_data.segname, 128 strnlen(MLC.segment_command_data.segname, 129 sizeof(MLC.segment_command_data.segname))); 130 break; 131 case MachO::LC_SEGMENT_64: 132 SegmentVmAddr = MLC.segment_command_64_data.vmaddr; 133 SegmentVmSize = MLC.segment_command_64_data.vmsize; 134 Segname = StringRef(MLC.segment_command_64_data.segname, 135 strnlen(MLC.segment_command_64_data.segname, 136 sizeof(MLC.segment_command_64_data.segname))); 137 break; 138 default: 139 continue; 140 } 141 142 if (Segname == "__LINKEDIT") { 143 // We update the __LINKEDIT segment later (in layoutTail). 144 assert(LC.Sections.empty() && "__LINKEDIT segment has sections"); 145 LinkEditLoadCommand = &MLC; 146 continue; 147 } 148 149 // Update file offsets and sizes of sections. 150 uint64_t SegOffset = Offset; 151 uint64_t SegFileSize = 0; 152 uint64_t VMSize = 0; 153 for (std::unique_ptr<Section> &Sec : LC.Sections) { 154 assert(SegmentVmAddr <= Sec->Addr && 155 "Section's address cannot be smaller than Segment's one"); 156 uint32_t SectOffset = Sec->Addr - SegmentVmAddr; 157 if (IsObjectFile) { 158 if (!Sec->hasValidOffset()) { 159 Sec->Offset = 0; 160 } else { 161 uint64_t PaddingSize = 162 offsetToAlignment(SegFileSize, Align(1ull << Sec->Align)); 163 Sec->Offset = SegOffset + SegFileSize + PaddingSize; 164 Sec->Size = Sec->Content.size(); 165 SegFileSize += PaddingSize + Sec->Size; 166 } 167 } else { 168 if (!Sec->hasValidOffset()) { 169 Sec->Offset = 0; 170 } else { 171 Sec->Offset = SegOffset + SectOffset; 172 Sec->Size = Sec->Content.size(); 173 SegFileSize = std::max(SegFileSize, SectOffset + Sec->Size); 174 } 175 } 176 VMSize = std::max(VMSize, SectOffset + Sec->Size); 177 } 178 179 if (IsObjectFile) { 180 Offset += SegFileSize; 181 } else { 182 Offset = alignTo(Offset + SegFileSize, PageSize); 183 SegFileSize = alignTo(SegFileSize, PageSize); 184 // Use the original vmsize if the segment is __PAGEZERO. 185 VMSize = 186 Segname == "__PAGEZERO" ? SegmentVmSize : alignTo(VMSize, PageSize); 187 } 188 189 switch (MLC.load_command_data.cmd) { 190 case MachO::LC_SEGMENT: 191 MLC.segment_command_data.cmdsize = 192 sizeof(MachO::segment_command) + 193 sizeof(MachO::section) * LC.Sections.size(); 194 MLC.segment_command_data.nsects = LC.Sections.size(); 195 MLC.segment_command_data.fileoff = SegOffset; 196 MLC.segment_command_data.vmsize = VMSize; 197 MLC.segment_command_data.filesize = SegFileSize; 198 break; 199 case MachO::LC_SEGMENT_64: 200 MLC.segment_command_64_data.cmdsize = 201 sizeof(MachO::segment_command_64) + 202 sizeof(MachO::section_64) * LC.Sections.size(); 203 MLC.segment_command_64_data.nsects = LC.Sections.size(); 204 MLC.segment_command_64_data.fileoff = SegOffset; 205 MLC.segment_command_64_data.vmsize = VMSize; 206 MLC.segment_command_64_data.filesize = SegFileSize; 207 break; 208 } 209 } 210 211 return Offset; 212 } 213 214 uint64_t MachOLayoutBuilder::layoutRelocations(uint64_t Offset) { 215 for (LoadCommand &LC : O.LoadCommands) 216 for (std::unique_ptr<Section> &Sec : LC.Sections) { 217 Sec->RelOff = Sec->Relocations.empty() ? 0 : Offset; 218 Sec->NReloc = Sec->Relocations.size(); 219 Offset += sizeof(MachO::any_relocation_info) * Sec->NReloc; 220 } 221 222 return Offset; 223 } 224 225 Error MachOLayoutBuilder::layoutTail(uint64_t Offset) { 226 // If we are building the layout of an executable or dynamic library 227 // which does not have any segments other than __LINKEDIT, 228 // the Offset can be equal to zero by this time. It happens because of the 229 // convention that in such cases the file offsets specified by LC_SEGMENT 230 // start with zero (unlike the case of a relocatable object file). 231 const uint64_t HeaderSize = 232 Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); 233 assert((!(O.Header.FileType == MachO::HeaderFileType::MH_OBJECT) || 234 Offset >= HeaderSize + O.Header.SizeOfCmds) && 235 "Incorrect tail offset"); 236 Offset = std::max(Offset, HeaderSize + O.Header.SizeOfCmds); 237 238 // The exports trie can be in either LC_DYLD_INFO or in 239 // LC_DYLD_EXPORTS_TRIE, but not both. 240 size_t DyldInfoExportsTrieSize = 0; 241 size_t DyldExportsTrieSize = 0; 242 for (const auto &LC : O.LoadCommands) { 243 switch (LC.MachOLoadCommand.load_command_data.cmd) { 244 case MachO::LC_DYLD_INFO: 245 case MachO::LC_DYLD_INFO_ONLY: 246 DyldInfoExportsTrieSize = O.Exports.Trie.size(); 247 break; 248 case MachO::LC_DYLD_EXPORTS_TRIE: 249 DyldExportsTrieSize = O.Exports.Trie.size(); 250 break; 251 default: 252 break; 253 } 254 } 255 assert((DyldInfoExportsTrieSize == 0 || DyldExportsTrieSize == 0) && 256 "Export trie in both LCs"); 257 258 uint64_t NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist); 259 uint64_t StartOfLinkEdit = Offset; 260 261 // The order of LINKEDIT elements is as follows: 262 // rebase info, binding info, weak binding info, lazy binding info, export 263 // trie, chained fixups, dyld exports trie, function starts, data-in-code, 264 // symbol table, indirect symbol table, symbol table strings, 265 // dylib codesign drs, and code signature. 266 auto updateOffset = [&Offset](size_t Size) { 267 uint64_t PreviousOffset = Offset; 268 Offset += Size; 269 return PreviousOffset; 270 }; 271 272 uint64_t StartOfRebaseInfo = updateOffset(O.Rebases.Opcodes.size()); 273 uint64_t StartOfBindingInfo = updateOffset(O.Binds.Opcodes.size()); 274 uint64_t StartOfWeakBindingInfo = updateOffset(O.WeakBinds.Opcodes.size()); 275 uint64_t StartOfLazyBindingInfo = updateOffset(O.LazyBinds.Opcodes.size()); 276 uint64_t StartOfExportTrie = updateOffset(DyldInfoExportsTrieSize); 277 uint64_t StartOfChainedFixups = updateOffset(O.ChainedFixups.Data.size()); 278 uint64_t StartOfDyldExportsTrie = updateOffset(DyldExportsTrieSize); 279 uint64_t StartOfFunctionStarts = updateOffset(O.FunctionStarts.Data.size()); 280 uint64_t StartOfDataInCode = updateOffset(O.DataInCode.Data.size()); 281 uint64_t StartOfLinkerOptimizationHint = 282 updateOffset(O.LinkerOptimizationHint.Data.size()); 283 uint64_t StartOfSymbols = updateOffset(NListSize * O.SymTable.Symbols.size()); 284 uint64_t StartOfIndirectSymbols = 285 updateOffset(sizeof(uint32_t) * O.IndirectSymTable.Symbols.size()); 286 uint64_t StartOfSymbolStrings = updateOffset(StrTableBuilder.getSize()); 287 uint64_t StartOfDylibCodeSignDRs = updateOffset(O.DylibCodeSignDRs.Data.size()); 288 289 uint64_t StartOfCodeSignature = Offset; 290 uint32_t CodeSignatureSize = 0; 291 if (O.CodeSignatureCommandIndex) { 292 StartOfCodeSignature = alignTo(StartOfCodeSignature, 16); 293 294 // Note: These calculations are to be kept in sync with the same 295 // calculations performed in LLD's CodeSignatureSection. 296 const uint32_t AllHeadersSize = 297 alignTo(CodeSignature.FixedHeadersSize + OutputFileName.size() + 1, 298 CodeSignature.Align); 299 const uint32_t BlockCount = 300 (StartOfCodeSignature + CodeSignature.BlockSize - 1) / 301 CodeSignature.BlockSize; 302 const uint32_t Size = 303 alignTo(AllHeadersSize + BlockCount * CodeSignature.HashSize, 304 CodeSignature.Align); 305 306 CodeSignature.StartOffset = StartOfCodeSignature; 307 CodeSignature.AllHeadersSize = AllHeadersSize; 308 CodeSignature.BlockCount = BlockCount; 309 CodeSignature.OutputFileName = OutputFileName; 310 CodeSignature.Size = Size; 311 CodeSignatureSize = Size; 312 } 313 uint64_t LinkEditSize = 314 StartOfCodeSignature + CodeSignatureSize - StartOfLinkEdit; 315 316 // Now we have determined the layout of the contents of the __LINKEDIT 317 // segment. Update its load command. 318 if (LinkEditLoadCommand) { 319 MachO::macho_load_command *MLC = LinkEditLoadCommand; 320 switch (LinkEditLoadCommand->load_command_data.cmd) { 321 case MachO::LC_SEGMENT: 322 MLC->segment_command_data.cmdsize = sizeof(MachO::segment_command); 323 MLC->segment_command_data.fileoff = StartOfLinkEdit; 324 MLC->segment_command_data.vmsize = alignTo(LinkEditSize, PageSize); 325 MLC->segment_command_data.filesize = LinkEditSize; 326 break; 327 case MachO::LC_SEGMENT_64: 328 MLC->segment_command_64_data.cmdsize = sizeof(MachO::segment_command_64); 329 MLC->segment_command_64_data.fileoff = StartOfLinkEdit; 330 MLC->segment_command_64_data.vmsize = alignTo(LinkEditSize, PageSize); 331 MLC->segment_command_64_data.filesize = LinkEditSize; 332 break; 333 } 334 } 335 336 for (LoadCommand &LC : O.LoadCommands) { 337 auto &MLC = LC.MachOLoadCommand; 338 auto cmd = MLC.load_command_data.cmd; 339 switch (cmd) { 340 case MachO::LC_CODE_SIGNATURE: 341 MLC.linkedit_data_command_data.dataoff = StartOfCodeSignature; 342 MLC.linkedit_data_command_data.datasize = CodeSignatureSize; 343 break; 344 case MachO::LC_DYLIB_CODE_SIGN_DRS: 345 MLC.linkedit_data_command_data.dataoff = StartOfDylibCodeSignDRs; 346 MLC.linkedit_data_command_data.datasize = O.DylibCodeSignDRs.Data.size(); 347 break; 348 case MachO::LC_SYMTAB: 349 MLC.symtab_command_data.symoff = StartOfSymbols; 350 MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size(); 351 MLC.symtab_command_data.stroff = StartOfSymbolStrings; 352 MLC.symtab_command_data.strsize = StrTableBuilder.getSize(); 353 break; 354 case MachO::LC_DYSYMTAB: { 355 if (MLC.dysymtab_command_data.ntoc != 0 || 356 MLC.dysymtab_command_data.nmodtab != 0 || 357 MLC.dysymtab_command_data.nextrefsyms != 0 || 358 MLC.dysymtab_command_data.nlocrel != 0 || 359 MLC.dysymtab_command_data.nextrel != 0) 360 return createStringError(llvm::errc::not_supported, 361 "shared library is not yet supported"); 362 363 if (!O.IndirectSymTable.Symbols.empty()) { 364 MLC.dysymtab_command_data.indirectsymoff = StartOfIndirectSymbols; 365 MLC.dysymtab_command_data.nindirectsyms = 366 O.IndirectSymTable.Symbols.size(); 367 } 368 369 updateDySymTab(MLC); 370 break; 371 } 372 case MachO::LC_DATA_IN_CODE: 373 MLC.linkedit_data_command_data.dataoff = StartOfDataInCode; 374 MLC.linkedit_data_command_data.datasize = O.DataInCode.Data.size(); 375 break; 376 case MachO::LC_LINKER_OPTIMIZATION_HINT: 377 MLC.linkedit_data_command_data.dataoff = StartOfLinkerOptimizationHint; 378 MLC.linkedit_data_command_data.datasize = 379 O.LinkerOptimizationHint.Data.size(); 380 break; 381 case MachO::LC_FUNCTION_STARTS: 382 MLC.linkedit_data_command_data.dataoff = StartOfFunctionStarts; 383 MLC.linkedit_data_command_data.datasize = O.FunctionStarts.Data.size(); 384 break; 385 case MachO::LC_DYLD_CHAINED_FIXUPS: 386 MLC.linkedit_data_command_data.dataoff = StartOfChainedFixups; 387 MLC.linkedit_data_command_data.datasize = O.ChainedFixups.Data.size(); 388 break; 389 case MachO::LC_DYLD_EXPORTS_TRIE: 390 MLC.linkedit_data_command_data.dataoff = StartOfDyldExportsTrie; 391 MLC.linkedit_data_command_data.datasize = DyldExportsTrieSize; 392 break; 393 case MachO::LC_DYLD_INFO: 394 case MachO::LC_DYLD_INFO_ONLY: 395 MLC.dyld_info_command_data.rebase_off = 396 O.Rebases.Opcodes.empty() ? 0 : StartOfRebaseInfo; 397 MLC.dyld_info_command_data.rebase_size = O.Rebases.Opcodes.size(); 398 MLC.dyld_info_command_data.bind_off = 399 O.Binds.Opcodes.empty() ? 0 : StartOfBindingInfo; 400 MLC.dyld_info_command_data.bind_size = O.Binds.Opcodes.size(); 401 MLC.dyld_info_command_data.weak_bind_off = 402 O.WeakBinds.Opcodes.empty() ? 0 : StartOfWeakBindingInfo; 403 MLC.dyld_info_command_data.weak_bind_size = O.WeakBinds.Opcodes.size(); 404 MLC.dyld_info_command_data.lazy_bind_off = 405 O.LazyBinds.Opcodes.empty() ? 0 : StartOfLazyBindingInfo; 406 MLC.dyld_info_command_data.lazy_bind_size = O.LazyBinds.Opcodes.size(); 407 MLC.dyld_info_command_data.export_off = 408 O.Exports.Trie.empty() ? 0 : StartOfExportTrie; 409 MLC.dyld_info_command_data.export_size = DyldInfoExportsTrieSize; 410 break; 411 // Note that LC_ENCRYPTION_INFO.cryptoff despite its name and the comment in 412 // <mach-o/loader.h> is not an offset in the binary file, instead, it is a 413 // relative virtual address. At the moment modification of the __TEXT 414 // segment of executables isn't supported anyway (e.g. data in code entries 415 // are not recalculated). Moreover, in general 416 // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 are nontrivial to update because 417 // without making additional assumptions (e.g. that the entire __TEXT 418 // segment should be encrypted) we do not know how to recalculate the 419 // boundaries of the encrypted part. For now just copy over these load 420 // commands until we encounter a real world usecase where 421 // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 need to be adjusted. 422 case MachO::LC_ENCRYPTION_INFO: 423 case MachO::LC_ENCRYPTION_INFO_64: 424 case MachO::LC_LOAD_DYLINKER: 425 case MachO::LC_MAIN: 426 case MachO::LC_RPATH: 427 case MachO::LC_SEGMENT: 428 case MachO::LC_SEGMENT_64: 429 case MachO::LC_VERSION_MIN_MACOSX: 430 case MachO::LC_VERSION_MIN_IPHONEOS: 431 case MachO::LC_VERSION_MIN_TVOS: 432 case MachO::LC_VERSION_MIN_WATCHOS: 433 case MachO::LC_BUILD_VERSION: 434 case MachO::LC_ID_DYLIB: 435 case MachO::LC_LOAD_DYLIB: 436 case MachO::LC_LOAD_WEAK_DYLIB: 437 case MachO::LC_UUID: 438 case MachO::LC_SOURCE_VERSION: 439 case MachO::LC_THREAD: 440 case MachO::LC_UNIXTHREAD: 441 case MachO::LC_SUB_FRAMEWORK: 442 case MachO::LC_SUB_UMBRELLA: 443 case MachO::LC_SUB_CLIENT: 444 case MachO::LC_SUB_LIBRARY: 445 case MachO::LC_LINKER_OPTION: 446 // Nothing to update. 447 break; 448 default: 449 // Abort if it's unsupported in order to prevent corrupting the object. 450 return createStringError(llvm::errc::not_supported, 451 "unsupported load command (cmd=0x%x)", cmd); 452 } 453 } 454 455 return Error::success(); 456 } 457 458 Error MachOLayoutBuilder::layout() { 459 O.Header.NCmds = O.LoadCommands.size(); 460 O.Header.SizeOfCmds = computeSizeOfCmds(); 461 constructStringTable(); 462 updateSymbolIndexes(); 463 uint64_t Offset = layoutSegments(); 464 Offset = layoutRelocations(Offset); 465 return layoutTail(Offset); 466 } 467