1 //===- MachOLayoutBuilder.cpp -----------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "MachOLayoutBuilder.h" 10 #include "llvm/Support/Alignment.h" 11 #include "llvm/Support/Errc.h" 12 #include "llvm/Support/ErrorHandling.h" 13 #include "llvm/Support/SystemZ/zOSSupport.h" 14 15 using namespace llvm; 16 using namespace llvm::objcopy::macho; 17 18 StringTableBuilder::Kind 19 MachOLayoutBuilder::getStringTableBuilderKind(const Object &O, bool Is64Bit) { 20 if (O.Header.FileType == MachO::HeaderFileType::MH_OBJECT) 21 return Is64Bit ? StringTableBuilder::MachO64 : StringTableBuilder::MachO; 22 return Is64Bit ? StringTableBuilder::MachO64Linked 23 : StringTableBuilder::MachOLinked; 24 } 25 26 uint32_t MachOLayoutBuilder::computeSizeOfCmds() const { 27 uint32_t Size = 0; 28 for (const LoadCommand &LC : O.LoadCommands) { 29 const MachO::macho_load_command &MLC = LC.MachOLoadCommand; 30 auto cmd = MLC.load_command_data.cmd; 31 switch (cmd) { 32 case MachO::LC_SEGMENT: 33 Size += sizeof(MachO::segment_command) + 34 sizeof(MachO::section) * LC.Sections.size(); 35 continue; 36 case MachO::LC_SEGMENT_64: 37 Size += sizeof(MachO::segment_command_64) + 38 sizeof(MachO::section_64) * LC.Sections.size(); 39 continue; 40 } 41 42 switch (cmd) { 43 #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ 44 case MachO::LCName: \ 45 Size += sizeof(MachO::LCStruct) + LC.Payload.size(); \ 46 break; 47 #include "llvm/BinaryFormat/MachO.def" 48 #undef HANDLE_LOAD_COMMAND 49 } 50 } 51 52 return Size; 53 } 54 55 void MachOLayoutBuilder::constructStringTable() { 56 for (std::unique_ptr<SymbolEntry> &Sym : O.SymTable.Symbols) 57 StrTableBuilder.add(Sym->Name); 58 StrTableBuilder.finalize(); 59 } 60 61 void MachOLayoutBuilder::updateSymbolIndexes() { 62 uint32_t Index = 0; 63 for (auto &Symbol : O.SymTable.Symbols) 64 Symbol->Index = Index++; 65 } 66 67 // Updates the index and the number of local/external/undefined symbols. 68 void MachOLayoutBuilder::updateDySymTab(MachO::macho_load_command &MLC) { 69 assert(MLC.load_command_data.cmd == MachO::LC_DYSYMTAB); 70 // Make sure that nlist entries in the symbol table are sorted by the those 71 // types. The order is: local < defined external < undefined external. 72 assert(llvm::is_sorted(O.SymTable.Symbols, 73 [](const std::unique_ptr<SymbolEntry> &A, 74 const std::unique_ptr<SymbolEntry> &B) { 75 bool AL = A->isLocalSymbol(), 76 BL = B->isLocalSymbol(); 77 if (AL != BL) 78 return AL; 79 return !AL && !A->isUndefinedSymbol() && 80 B->isUndefinedSymbol(); 81 }) && 82 "Symbols are not sorted by their types."); 83 84 uint32_t NumLocalSymbols = 0; 85 auto Iter = O.SymTable.Symbols.begin(); 86 auto End = O.SymTable.Symbols.end(); 87 for (; Iter != End; ++Iter) { 88 if ((*Iter)->isExternalSymbol()) 89 break; 90 91 ++NumLocalSymbols; 92 } 93 94 uint32_t NumExtDefSymbols = 0; 95 for (; Iter != End; ++Iter) { 96 if ((*Iter)->isUndefinedSymbol()) 97 break; 98 99 ++NumExtDefSymbols; 100 } 101 102 MLC.dysymtab_command_data.ilocalsym = 0; 103 MLC.dysymtab_command_data.nlocalsym = NumLocalSymbols; 104 MLC.dysymtab_command_data.iextdefsym = NumLocalSymbols; 105 MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols; 106 MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols; 107 MLC.dysymtab_command_data.nundefsym = 108 O.SymTable.Symbols.size() - (NumLocalSymbols + NumExtDefSymbols); 109 } 110 111 // Recomputes and updates offset and size fields in load commands and sections 112 // since they could be modified. 113 uint64_t MachOLayoutBuilder::layoutSegments() { 114 auto HeaderSize = 115 Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); 116 const bool IsObjectFile = 117 O.Header.FileType == MachO::HeaderFileType::MH_OBJECT; 118 uint64_t Offset = IsObjectFile ? (HeaderSize + O.Header.SizeOfCmds) : 0; 119 for (LoadCommand &LC : O.LoadCommands) { 120 auto &MLC = LC.MachOLoadCommand; 121 StringRef Segname; 122 uint64_t SegmentVmAddr; 123 uint64_t SegmentVmSize; 124 switch (MLC.load_command_data.cmd) { 125 case MachO::LC_SEGMENT: 126 SegmentVmAddr = MLC.segment_command_data.vmaddr; 127 SegmentVmSize = MLC.segment_command_data.vmsize; 128 Segname = StringRef(MLC.segment_command_data.segname, 129 strnlen(MLC.segment_command_data.segname, 130 sizeof(MLC.segment_command_data.segname))); 131 break; 132 case MachO::LC_SEGMENT_64: 133 SegmentVmAddr = MLC.segment_command_64_data.vmaddr; 134 SegmentVmSize = MLC.segment_command_64_data.vmsize; 135 Segname = StringRef(MLC.segment_command_64_data.segname, 136 strnlen(MLC.segment_command_64_data.segname, 137 sizeof(MLC.segment_command_64_data.segname))); 138 break; 139 default: 140 continue; 141 } 142 143 if (Segname == "__LINKEDIT") { 144 // We update the __LINKEDIT segment later (in layoutTail). 145 assert(LC.Sections.empty() && "__LINKEDIT segment has sections"); 146 LinkEditLoadCommand = &MLC; 147 continue; 148 } 149 150 // Update file offsets and sizes of sections. 151 uint64_t SegOffset = Offset; 152 uint64_t SegFileSize = 0; 153 uint64_t VMSize = 0; 154 for (std::unique_ptr<Section> &Sec : LC.Sections) { 155 assert(SegmentVmAddr <= Sec->Addr && 156 "Section's address cannot be smaller than Segment's one"); 157 uint32_t SectOffset = Sec->Addr - SegmentVmAddr; 158 if (IsObjectFile) { 159 if (!Sec->hasValidOffset()) { 160 Sec->Offset = 0; 161 } else { 162 uint64_t PaddingSize = 163 offsetToAlignment(SegFileSize, Align(1ull << Sec->Align)); 164 Sec->Offset = SegOffset + SegFileSize + PaddingSize; 165 Sec->Size = Sec->Content.size(); 166 SegFileSize += PaddingSize + Sec->Size; 167 } 168 } else { 169 if (!Sec->hasValidOffset()) { 170 Sec->Offset = 0; 171 } else { 172 Sec->Offset = SegOffset + SectOffset; 173 Sec->Size = Sec->Content.size(); 174 SegFileSize = std::max(SegFileSize, SectOffset + Sec->Size); 175 } 176 } 177 VMSize = std::max(VMSize, SectOffset + Sec->Size); 178 } 179 180 if (IsObjectFile) { 181 Offset += SegFileSize; 182 } else { 183 Offset = alignTo(Offset + SegFileSize, PageSize); 184 SegFileSize = alignTo(SegFileSize, PageSize); 185 // Use the original vmsize if the segment is __PAGEZERO. 186 VMSize = 187 Segname == "__PAGEZERO" ? SegmentVmSize : alignTo(VMSize, PageSize); 188 } 189 190 switch (MLC.load_command_data.cmd) { 191 case MachO::LC_SEGMENT: 192 MLC.segment_command_data.cmdsize = 193 sizeof(MachO::segment_command) + 194 sizeof(MachO::section) * LC.Sections.size(); 195 MLC.segment_command_data.nsects = LC.Sections.size(); 196 MLC.segment_command_data.fileoff = SegOffset; 197 MLC.segment_command_data.vmsize = VMSize; 198 MLC.segment_command_data.filesize = SegFileSize; 199 break; 200 case MachO::LC_SEGMENT_64: 201 MLC.segment_command_64_data.cmdsize = 202 sizeof(MachO::segment_command_64) + 203 sizeof(MachO::section_64) * LC.Sections.size(); 204 MLC.segment_command_64_data.nsects = LC.Sections.size(); 205 MLC.segment_command_64_data.fileoff = SegOffset; 206 MLC.segment_command_64_data.vmsize = VMSize; 207 MLC.segment_command_64_data.filesize = SegFileSize; 208 break; 209 } 210 } 211 212 return Offset; 213 } 214 215 uint64_t MachOLayoutBuilder::layoutRelocations(uint64_t Offset) { 216 for (LoadCommand &LC : O.LoadCommands) 217 for (std::unique_ptr<Section> &Sec : LC.Sections) { 218 Sec->RelOff = Sec->Relocations.empty() ? 0 : Offset; 219 Sec->NReloc = Sec->Relocations.size(); 220 Offset += sizeof(MachO::any_relocation_info) * Sec->NReloc; 221 } 222 223 return Offset; 224 } 225 226 Error MachOLayoutBuilder::layoutTail(uint64_t Offset) { 227 // If we are building the layout of an executable or dynamic library 228 // which does not have any segments other than __LINKEDIT, 229 // the Offset can be equal to zero by this time. It happens because of the 230 // convention that in such cases the file offsets specified by LC_SEGMENT 231 // start with zero (unlike the case of a relocatable object file). 232 const uint64_t HeaderSize = 233 Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); 234 assert((!(O.Header.FileType == MachO::HeaderFileType::MH_OBJECT) || 235 Offset >= HeaderSize + O.Header.SizeOfCmds) && 236 "Incorrect tail offset"); 237 Offset = std::max(Offset, HeaderSize + O.Header.SizeOfCmds); 238 239 // The exports trie can be in either LC_DYLD_INFO or in 240 // LC_DYLD_EXPORTS_TRIE, but not both. 241 size_t DyldInfoExportsTrieSize = 0; 242 size_t DyldExportsTrieSize = 0; 243 for (const auto &LC : O.LoadCommands) { 244 switch (LC.MachOLoadCommand.load_command_data.cmd) { 245 case MachO::LC_DYLD_INFO: 246 case MachO::LC_DYLD_INFO_ONLY: 247 DyldInfoExportsTrieSize = O.Exports.Trie.size(); 248 break; 249 case MachO::LC_DYLD_EXPORTS_TRIE: 250 DyldExportsTrieSize = O.Exports.Trie.size(); 251 break; 252 default: 253 break; 254 } 255 } 256 assert((DyldInfoExportsTrieSize == 0 || DyldExportsTrieSize == 0) && 257 "Export trie in both LCs"); 258 259 uint64_t NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist); 260 uint64_t StartOfLinkEdit = Offset; 261 262 // The order of LINKEDIT elements is as follows: 263 // rebase info, binding info, weak binding info, lazy binding info, export 264 // trie, chained fixups, dyld exports trie, function starts, data-in-code, 265 // symbol table, indirect symbol table, symbol table strings, 266 // dylib codesign drs, and code signature. 267 auto updateOffset = [&Offset](size_t Size) { 268 uint64_t PreviousOffset = Offset; 269 Offset += Size; 270 return PreviousOffset; 271 }; 272 273 uint64_t StartOfRebaseInfo = updateOffset(O.Rebases.Opcodes.size()); 274 uint64_t StartOfBindingInfo = updateOffset(O.Binds.Opcodes.size()); 275 uint64_t StartOfWeakBindingInfo = updateOffset(O.WeakBinds.Opcodes.size()); 276 uint64_t StartOfLazyBindingInfo = updateOffset(O.LazyBinds.Opcodes.size()); 277 uint64_t StartOfExportTrie = updateOffset(DyldInfoExportsTrieSize); 278 uint64_t StartOfChainedFixups = updateOffset(O.ChainedFixups.Data.size()); 279 uint64_t StartOfDyldExportsTrie = updateOffset(DyldExportsTrieSize); 280 uint64_t StartOfFunctionStarts = updateOffset(O.FunctionStarts.Data.size()); 281 uint64_t StartOfDataInCode = updateOffset(O.DataInCode.Data.size()); 282 uint64_t StartOfLinkerOptimizationHint = 283 updateOffset(O.LinkerOptimizationHint.Data.size()); 284 uint64_t StartOfSymbols = updateOffset(NListSize * O.SymTable.Symbols.size()); 285 uint64_t StartOfIndirectSymbols = 286 updateOffset(sizeof(uint32_t) * O.IndirectSymTable.Symbols.size()); 287 uint64_t StartOfSymbolStrings = updateOffset(StrTableBuilder.getSize()); 288 uint64_t StartOfDylibCodeSignDRs = updateOffset(O.DylibCodeSignDRs.Data.size()); 289 290 uint64_t StartOfCodeSignature = Offset; 291 uint32_t CodeSignatureSize = 0; 292 if (O.CodeSignatureCommandIndex) { 293 StartOfCodeSignature = alignTo(StartOfCodeSignature, 16); 294 295 // Note: These calculations are to be kept in sync with the same 296 // calculations performed in LLD's CodeSignatureSection. 297 const uint32_t AllHeadersSize = 298 alignTo(CodeSignature.FixedHeadersSize + OutputFileName.size() + 1, 299 CodeSignature.Align); 300 const uint32_t BlockCount = 301 (StartOfCodeSignature + CodeSignature.BlockSize - 1) / 302 CodeSignature.BlockSize; 303 const uint32_t Size = 304 alignTo(AllHeadersSize + BlockCount * CodeSignature.HashSize, 305 CodeSignature.Align); 306 307 CodeSignature.StartOffset = StartOfCodeSignature; 308 CodeSignature.AllHeadersSize = AllHeadersSize; 309 CodeSignature.BlockCount = BlockCount; 310 CodeSignature.OutputFileName = OutputFileName; 311 CodeSignature.Size = Size; 312 CodeSignatureSize = Size; 313 } 314 uint64_t LinkEditSize = 315 StartOfCodeSignature + CodeSignatureSize - StartOfLinkEdit; 316 317 // Now we have determined the layout of the contents of the __LINKEDIT 318 // segment. Update its load command. 319 if (LinkEditLoadCommand) { 320 MachO::macho_load_command *MLC = LinkEditLoadCommand; 321 switch (LinkEditLoadCommand->load_command_data.cmd) { 322 case MachO::LC_SEGMENT: 323 MLC->segment_command_data.cmdsize = sizeof(MachO::segment_command); 324 MLC->segment_command_data.fileoff = StartOfLinkEdit; 325 MLC->segment_command_data.vmsize = alignTo(LinkEditSize, PageSize); 326 MLC->segment_command_data.filesize = LinkEditSize; 327 break; 328 case MachO::LC_SEGMENT_64: 329 MLC->segment_command_64_data.cmdsize = sizeof(MachO::segment_command_64); 330 MLC->segment_command_64_data.fileoff = StartOfLinkEdit; 331 MLC->segment_command_64_data.vmsize = alignTo(LinkEditSize, PageSize); 332 MLC->segment_command_64_data.filesize = LinkEditSize; 333 break; 334 } 335 } 336 337 for (LoadCommand &LC : O.LoadCommands) { 338 auto &MLC = LC.MachOLoadCommand; 339 auto cmd = MLC.load_command_data.cmd; 340 switch (cmd) { 341 case MachO::LC_CODE_SIGNATURE: 342 MLC.linkedit_data_command_data.dataoff = StartOfCodeSignature; 343 MLC.linkedit_data_command_data.datasize = CodeSignatureSize; 344 break; 345 case MachO::LC_DYLIB_CODE_SIGN_DRS: 346 MLC.linkedit_data_command_data.dataoff = StartOfDylibCodeSignDRs; 347 MLC.linkedit_data_command_data.datasize = O.DylibCodeSignDRs.Data.size(); 348 break; 349 case MachO::LC_SYMTAB: 350 MLC.symtab_command_data.symoff = StartOfSymbols; 351 MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size(); 352 MLC.symtab_command_data.stroff = StartOfSymbolStrings; 353 MLC.symtab_command_data.strsize = StrTableBuilder.getSize(); 354 break; 355 case MachO::LC_DYSYMTAB: { 356 if (MLC.dysymtab_command_data.ntoc != 0 || 357 MLC.dysymtab_command_data.nmodtab != 0 || 358 MLC.dysymtab_command_data.nextrefsyms != 0 || 359 MLC.dysymtab_command_data.nlocrel != 0 || 360 MLC.dysymtab_command_data.nextrel != 0) 361 return createStringError(llvm::errc::not_supported, 362 "shared library is not yet supported"); 363 364 if (!O.IndirectSymTable.Symbols.empty()) { 365 MLC.dysymtab_command_data.indirectsymoff = StartOfIndirectSymbols; 366 MLC.dysymtab_command_data.nindirectsyms = 367 O.IndirectSymTable.Symbols.size(); 368 } 369 370 updateDySymTab(MLC); 371 break; 372 } 373 case MachO::LC_DATA_IN_CODE: 374 MLC.linkedit_data_command_data.dataoff = StartOfDataInCode; 375 MLC.linkedit_data_command_data.datasize = O.DataInCode.Data.size(); 376 break; 377 case MachO::LC_LINKER_OPTIMIZATION_HINT: 378 MLC.linkedit_data_command_data.dataoff = StartOfLinkerOptimizationHint; 379 MLC.linkedit_data_command_data.datasize = 380 O.LinkerOptimizationHint.Data.size(); 381 break; 382 case MachO::LC_FUNCTION_STARTS: 383 MLC.linkedit_data_command_data.dataoff = StartOfFunctionStarts; 384 MLC.linkedit_data_command_data.datasize = O.FunctionStarts.Data.size(); 385 break; 386 case MachO::LC_DYLD_CHAINED_FIXUPS: 387 MLC.linkedit_data_command_data.dataoff = StartOfChainedFixups; 388 MLC.linkedit_data_command_data.datasize = O.ChainedFixups.Data.size(); 389 break; 390 case MachO::LC_DYLD_EXPORTS_TRIE: 391 MLC.linkedit_data_command_data.dataoff = StartOfDyldExportsTrie; 392 MLC.linkedit_data_command_data.datasize = DyldExportsTrieSize; 393 break; 394 case MachO::LC_DYLD_INFO: 395 case MachO::LC_DYLD_INFO_ONLY: 396 MLC.dyld_info_command_data.rebase_off = 397 O.Rebases.Opcodes.empty() ? 0 : StartOfRebaseInfo; 398 MLC.dyld_info_command_data.rebase_size = O.Rebases.Opcodes.size(); 399 MLC.dyld_info_command_data.bind_off = 400 O.Binds.Opcodes.empty() ? 0 : StartOfBindingInfo; 401 MLC.dyld_info_command_data.bind_size = O.Binds.Opcodes.size(); 402 MLC.dyld_info_command_data.weak_bind_off = 403 O.WeakBinds.Opcodes.empty() ? 0 : StartOfWeakBindingInfo; 404 MLC.dyld_info_command_data.weak_bind_size = O.WeakBinds.Opcodes.size(); 405 MLC.dyld_info_command_data.lazy_bind_off = 406 O.LazyBinds.Opcodes.empty() ? 0 : StartOfLazyBindingInfo; 407 MLC.dyld_info_command_data.lazy_bind_size = O.LazyBinds.Opcodes.size(); 408 MLC.dyld_info_command_data.export_off = 409 O.Exports.Trie.empty() ? 0 : StartOfExportTrie; 410 MLC.dyld_info_command_data.export_size = DyldInfoExportsTrieSize; 411 break; 412 // Note that LC_ENCRYPTION_INFO.cryptoff despite its name and the comment in 413 // <mach-o/loader.h> is not an offset in the binary file, instead, it is a 414 // relative virtual address. At the moment modification of the __TEXT 415 // segment of executables isn't supported anyway (e.g. data in code entries 416 // are not recalculated). Moreover, in general 417 // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 are nontrivial to update because 418 // without making additional assumptions (e.g. that the entire __TEXT 419 // segment should be encrypted) we do not know how to recalculate the 420 // boundaries of the encrypted part. For now just copy over these load 421 // commands until we encounter a real world usecase where 422 // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 need to be adjusted. 423 case MachO::LC_ENCRYPTION_INFO: 424 case MachO::LC_ENCRYPTION_INFO_64: 425 case MachO::LC_LOAD_DYLINKER: 426 case MachO::LC_MAIN: 427 case MachO::LC_RPATH: 428 case MachO::LC_SEGMENT: 429 case MachO::LC_SEGMENT_64: 430 case MachO::LC_VERSION_MIN_MACOSX: 431 case MachO::LC_VERSION_MIN_IPHONEOS: 432 case MachO::LC_VERSION_MIN_TVOS: 433 case MachO::LC_VERSION_MIN_WATCHOS: 434 case MachO::LC_BUILD_VERSION: 435 case MachO::LC_ID_DYLIB: 436 case MachO::LC_LOAD_DYLIB: 437 case MachO::LC_LOAD_WEAK_DYLIB: 438 case MachO::LC_UUID: 439 case MachO::LC_SOURCE_VERSION: 440 case MachO::LC_THREAD: 441 case MachO::LC_UNIXTHREAD: 442 case MachO::LC_SUB_FRAMEWORK: 443 case MachO::LC_SUB_UMBRELLA: 444 case MachO::LC_SUB_CLIENT: 445 case MachO::LC_SUB_LIBRARY: 446 case MachO::LC_LINKER_OPTION: 447 // Nothing to update. 448 break; 449 default: 450 // Abort if it's unsupported in order to prevent corrupting the object. 451 return createStringError(llvm::errc::not_supported, 452 "unsupported load command (cmd=0x%x)", cmd); 453 } 454 } 455 456 return Error::success(); 457 } 458 459 Error MachOLayoutBuilder::layout() { 460 O.Header.NCmds = O.LoadCommands.size(); 461 O.Header.SizeOfCmds = computeSizeOfCmds(); 462 constructStringTable(); 463 updateSymbolIndexes(); 464 uint64_t Offset = layoutSegments(); 465 Offset = layoutRelocations(Offset); 466 return layoutTail(Offset); 467 } 468