1 //===- MachOLayoutBuilder.cpp -----------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "MachOLayoutBuilder.h" 10 #include "llvm/Support/Alignment.h" 11 #include "llvm/Support/Errc.h" 12 #include "llvm/Support/ErrorHandling.h" 13 14 using namespace llvm; 15 using namespace llvm::objcopy::macho; 16 17 StringTableBuilder::Kind 18 MachOLayoutBuilder::getStringTableBuilderKind(const Object &O, bool Is64Bit) { 19 if (O.Header.FileType == MachO::HeaderFileType::MH_OBJECT) 20 return Is64Bit ? StringTableBuilder::MachO64 : StringTableBuilder::MachO; 21 return Is64Bit ? StringTableBuilder::MachO64Linked 22 : StringTableBuilder::MachOLinked; 23 } 24 25 uint32_t MachOLayoutBuilder::computeSizeOfCmds() const { 26 uint32_t Size = 0; 27 for (const LoadCommand &LC : O.LoadCommands) { 28 const MachO::macho_load_command &MLC = LC.MachOLoadCommand; 29 auto cmd = MLC.load_command_data.cmd; 30 switch (cmd) { 31 case MachO::LC_SEGMENT: 32 Size += sizeof(MachO::segment_command) + 33 sizeof(MachO::section) * LC.Sections.size(); 34 continue; 35 case MachO::LC_SEGMENT_64: 36 Size += sizeof(MachO::segment_command_64) + 37 sizeof(MachO::section_64) * LC.Sections.size(); 38 continue; 39 } 40 41 switch (cmd) { 42 #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ 43 case MachO::LCName: \ 44 Size += sizeof(MachO::LCStruct) + LC.Payload.size(); \ 45 break; 46 #include "llvm/BinaryFormat/MachO.def" 47 #undef HANDLE_LOAD_COMMAND 48 } 49 } 50 51 return Size; 52 } 53 54 void MachOLayoutBuilder::constructStringTable() { 55 for (std::unique_ptr<SymbolEntry> &Sym : O.SymTable.Symbols) 56 StrTableBuilder.add(Sym->Name); 57 StrTableBuilder.finalize(); 58 } 59 60 void MachOLayoutBuilder::updateSymbolIndexes() { 61 uint32_t Index = 0; 62 for (auto &Symbol : O.SymTable.Symbols) 63 Symbol->Index = Index++; 64 } 65 66 // Updates the index and the number of local/external/undefined symbols. 67 void MachOLayoutBuilder::updateDySymTab(MachO::macho_load_command &MLC) { 68 assert(MLC.load_command_data.cmd == MachO::LC_DYSYMTAB); 69 // Make sure that nlist entries in the symbol table are sorted by the those 70 // types. The order is: local < defined external < undefined external. 71 assert(llvm::is_sorted(O.SymTable.Symbols, 72 [](const std::unique_ptr<SymbolEntry> &A, 73 const std::unique_ptr<SymbolEntry> &B) { 74 bool AL = A->isLocalSymbol(), 75 BL = B->isLocalSymbol(); 76 if (AL != BL) 77 return AL; 78 return !AL && !A->isUndefinedSymbol() && 79 B->isUndefinedSymbol(); 80 }) && 81 "Symbols are not sorted by their types."); 82 83 uint32_t NumLocalSymbols = 0; 84 auto Iter = O.SymTable.Symbols.begin(); 85 auto End = O.SymTable.Symbols.end(); 86 for (; Iter != End; ++Iter) { 87 if ((*Iter)->isExternalSymbol()) 88 break; 89 90 ++NumLocalSymbols; 91 } 92 93 uint32_t NumExtDefSymbols = 0; 94 for (; Iter != End; ++Iter) { 95 if ((*Iter)->isUndefinedSymbol()) 96 break; 97 98 ++NumExtDefSymbols; 99 } 100 101 MLC.dysymtab_command_data.ilocalsym = 0; 102 MLC.dysymtab_command_data.nlocalsym = NumLocalSymbols; 103 MLC.dysymtab_command_data.iextdefsym = NumLocalSymbols; 104 MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols; 105 MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols; 106 MLC.dysymtab_command_data.nundefsym = 107 O.SymTable.Symbols.size() - (NumLocalSymbols + NumExtDefSymbols); 108 } 109 110 // Recomputes and updates offset and size fields in load commands and sections 111 // since they could be modified. 112 uint64_t MachOLayoutBuilder::layoutSegments() { 113 auto HeaderSize = 114 Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); 115 const bool IsObjectFile = 116 O.Header.FileType == MachO::HeaderFileType::MH_OBJECT; 117 uint64_t Offset = IsObjectFile ? (HeaderSize + O.Header.SizeOfCmds) : 0; 118 for (LoadCommand &LC : O.LoadCommands) { 119 auto &MLC = LC.MachOLoadCommand; 120 StringRef Segname; 121 uint64_t SegmentVmAddr; 122 uint64_t SegmentVmSize; 123 switch (MLC.load_command_data.cmd) { 124 case MachO::LC_SEGMENT: 125 SegmentVmAddr = MLC.segment_command_data.vmaddr; 126 SegmentVmSize = MLC.segment_command_data.vmsize; 127 Segname = StringRef(MLC.segment_command_data.segname, 128 strnlen(MLC.segment_command_data.segname, 129 sizeof(MLC.segment_command_data.segname))); 130 break; 131 case MachO::LC_SEGMENT_64: 132 SegmentVmAddr = MLC.segment_command_64_data.vmaddr; 133 SegmentVmSize = MLC.segment_command_64_data.vmsize; 134 Segname = StringRef(MLC.segment_command_64_data.segname, 135 strnlen(MLC.segment_command_64_data.segname, 136 sizeof(MLC.segment_command_64_data.segname))); 137 break; 138 default: 139 continue; 140 } 141 142 if (Segname == "__LINKEDIT") { 143 // We update the __LINKEDIT segment later (in layoutTail). 144 assert(LC.Sections.empty() && "__LINKEDIT segment has sections"); 145 LinkEditLoadCommand = &MLC; 146 continue; 147 } 148 149 // Update file offsets and sizes of sections. 150 uint64_t SegOffset = Offset; 151 uint64_t SegFileSize = 0; 152 uint64_t VMSize = 0; 153 for (std::unique_ptr<Section> &Sec : LC.Sections) { 154 assert(SegmentVmAddr <= Sec->Addr && 155 "Section's address cannot be smaller than Segment's one"); 156 uint32_t SectOffset = Sec->Addr - SegmentVmAddr; 157 if (IsObjectFile) { 158 if (!Sec->hasValidOffset()) { 159 Sec->Offset = 0; 160 } else { 161 uint64_t PaddingSize = 162 offsetToAlignment(SegFileSize, Align(1ull << Sec->Align)); 163 Sec->Offset = SegOffset + SegFileSize + PaddingSize; 164 Sec->Size = Sec->Content.size(); 165 SegFileSize += PaddingSize + Sec->Size; 166 } 167 } else { 168 if (!Sec->hasValidOffset()) { 169 Sec->Offset = 0; 170 } else { 171 Sec->Offset = SegOffset + SectOffset; 172 Sec->Size = Sec->Content.size(); 173 SegFileSize = std::max(SegFileSize, SectOffset + Sec->Size); 174 } 175 } 176 VMSize = std::max(VMSize, SectOffset + Sec->Size); 177 } 178 179 if (IsObjectFile) { 180 Offset += SegFileSize; 181 } else { 182 Offset = alignTo(Offset + SegFileSize, PageSize); 183 SegFileSize = alignTo(SegFileSize, PageSize); 184 // Use the original vmsize if the segment is __PAGEZERO. 185 VMSize = 186 Segname == "__PAGEZERO" ? SegmentVmSize : alignTo(VMSize, PageSize); 187 } 188 189 switch (MLC.load_command_data.cmd) { 190 case MachO::LC_SEGMENT: 191 MLC.segment_command_data.cmdsize = 192 sizeof(MachO::segment_command) + 193 sizeof(MachO::section) * LC.Sections.size(); 194 MLC.segment_command_data.nsects = LC.Sections.size(); 195 MLC.segment_command_data.fileoff = SegOffset; 196 MLC.segment_command_data.vmsize = VMSize; 197 MLC.segment_command_data.filesize = SegFileSize; 198 break; 199 case MachO::LC_SEGMENT_64: 200 MLC.segment_command_64_data.cmdsize = 201 sizeof(MachO::segment_command_64) + 202 sizeof(MachO::section_64) * LC.Sections.size(); 203 MLC.segment_command_64_data.nsects = LC.Sections.size(); 204 MLC.segment_command_64_data.fileoff = SegOffset; 205 MLC.segment_command_64_data.vmsize = VMSize; 206 MLC.segment_command_64_data.filesize = SegFileSize; 207 break; 208 } 209 } 210 211 return Offset; 212 } 213 214 uint64_t MachOLayoutBuilder::layoutRelocations(uint64_t Offset) { 215 for (LoadCommand &LC : O.LoadCommands) 216 for (std::unique_ptr<Section> &Sec : LC.Sections) { 217 Sec->RelOff = Sec->Relocations.empty() ? 0 : Offset; 218 Sec->NReloc = Sec->Relocations.size(); 219 Offset += sizeof(MachO::any_relocation_info) * Sec->NReloc; 220 } 221 222 return Offset; 223 } 224 225 Error MachOLayoutBuilder::layoutTail(uint64_t Offset) { 226 // If we are building the layout of an executable or dynamic library 227 // which does not have any segments other than __LINKEDIT, 228 // the Offset can be equal to zero by this time. It happens because of the 229 // convention that in such cases the file offsets specified by LC_SEGMENT 230 // start with zero (unlike the case of a relocatable object file). 231 const uint64_t HeaderSize = 232 Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); 233 assert((!(O.Header.FileType == MachO::HeaderFileType::MH_OBJECT) || 234 Offset >= HeaderSize + O.Header.SizeOfCmds) && 235 "Incorrect tail offset"); 236 Offset = std::max(Offset, HeaderSize + O.Header.SizeOfCmds); 237 238 // The order of LINKEDIT elements is as follows: 239 // rebase info, binding info, weak binding info, lazy binding info, export 240 // trie, data-in-code, symbol table, indirect symbol table, symbol table 241 // strings, code signature. 242 uint64_t NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist); 243 uint64_t StartOfLinkEdit = Offset; 244 uint64_t StartOfRebaseInfo = StartOfLinkEdit; 245 uint64_t StartOfBindingInfo = StartOfRebaseInfo + O.Rebases.Opcodes.size(); 246 uint64_t StartOfWeakBindingInfo = StartOfBindingInfo + O.Binds.Opcodes.size(); 247 uint64_t StartOfLazyBindingInfo = 248 StartOfWeakBindingInfo + O.WeakBinds.Opcodes.size(); 249 uint64_t StartOfExportTrie = 250 StartOfLazyBindingInfo + O.LazyBinds.Opcodes.size(); 251 uint64_t StartOfFunctionStarts = StartOfExportTrie + O.Exports.Trie.size(); 252 uint64_t StartOfDyldExportsTrie = 253 StartOfFunctionStarts + O.FunctionStarts.Data.size(); 254 uint64_t StartOfChainedFixups = 255 StartOfDyldExportsTrie + O.ExportsTrie.Data.size(); 256 uint64_t StartOfDataInCode = 257 StartOfChainedFixups + O.ChainedFixups.Data.size(); 258 uint64_t StartOfLinkerOptimizationHint = 259 StartOfDataInCode + O.DataInCode.Data.size(); 260 uint64_t StartOfSymbols = 261 StartOfLinkerOptimizationHint + O.LinkerOptimizationHint.Data.size(); 262 uint64_t StartOfIndirectSymbols = 263 StartOfSymbols + NListSize * O.SymTable.Symbols.size(); 264 uint64_t StartOfSymbolStrings = 265 StartOfIndirectSymbols + 266 sizeof(uint32_t) * O.IndirectSymTable.Symbols.size(); 267 uint64_t StartOfCodeSignature = 268 StartOfSymbolStrings + StrTableBuilder.getSize(); 269 uint32_t CodeSignatureSize = 0; 270 if (O.CodeSignatureCommandIndex) { 271 StartOfCodeSignature = alignTo(StartOfCodeSignature, 16); 272 273 // Note: These calculations are to be kept in sync with the same 274 // calculations performed in LLD's CodeSignatureSection. 275 const uint32_t AllHeadersSize = 276 alignTo(CodeSignature.FixedHeadersSize + OutputFileName.size() + 1, 277 CodeSignature.Align); 278 const uint32_t BlockCount = 279 (StartOfCodeSignature + CodeSignature.BlockSize - 1) / 280 CodeSignature.BlockSize; 281 const uint32_t Size = 282 alignTo(AllHeadersSize + BlockCount * CodeSignature.HashSize, 283 CodeSignature.Align); 284 285 CodeSignature.StartOffset = StartOfCodeSignature; 286 CodeSignature.AllHeadersSize = AllHeadersSize; 287 CodeSignature.BlockCount = BlockCount; 288 CodeSignature.OutputFileName = OutputFileName; 289 CodeSignature.Size = Size; 290 CodeSignatureSize = Size; 291 } 292 uint64_t LinkEditSize = 293 StartOfCodeSignature + CodeSignatureSize - StartOfLinkEdit; 294 295 // Now we have determined the layout of the contents of the __LINKEDIT 296 // segment. Update its load command. 297 if (LinkEditLoadCommand) { 298 MachO::macho_load_command *MLC = LinkEditLoadCommand; 299 switch (LinkEditLoadCommand->load_command_data.cmd) { 300 case MachO::LC_SEGMENT: 301 MLC->segment_command_data.cmdsize = sizeof(MachO::segment_command); 302 MLC->segment_command_data.fileoff = StartOfLinkEdit; 303 MLC->segment_command_data.vmsize = alignTo(LinkEditSize, PageSize); 304 MLC->segment_command_data.filesize = LinkEditSize; 305 break; 306 case MachO::LC_SEGMENT_64: 307 MLC->segment_command_64_data.cmdsize = sizeof(MachO::segment_command_64); 308 MLC->segment_command_64_data.fileoff = StartOfLinkEdit; 309 MLC->segment_command_64_data.vmsize = alignTo(LinkEditSize, PageSize); 310 MLC->segment_command_64_data.filesize = LinkEditSize; 311 break; 312 } 313 } 314 315 for (LoadCommand &LC : O.LoadCommands) { 316 auto &MLC = LC.MachOLoadCommand; 317 auto cmd = MLC.load_command_data.cmd; 318 switch (cmd) { 319 case MachO::LC_CODE_SIGNATURE: 320 MLC.linkedit_data_command_data.dataoff = StartOfCodeSignature; 321 MLC.linkedit_data_command_data.datasize = CodeSignatureSize; 322 break; 323 case MachO::LC_SYMTAB: 324 MLC.symtab_command_data.symoff = StartOfSymbols; 325 MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size(); 326 MLC.symtab_command_data.stroff = StartOfSymbolStrings; 327 MLC.symtab_command_data.strsize = StrTableBuilder.getSize(); 328 break; 329 case MachO::LC_DYSYMTAB: { 330 if (MLC.dysymtab_command_data.ntoc != 0 || 331 MLC.dysymtab_command_data.nmodtab != 0 || 332 MLC.dysymtab_command_data.nextrefsyms != 0 || 333 MLC.dysymtab_command_data.nlocrel != 0 || 334 MLC.dysymtab_command_data.nextrel != 0) 335 return createStringError(llvm::errc::not_supported, 336 "shared library is not yet supported"); 337 338 if (!O.IndirectSymTable.Symbols.empty()) { 339 MLC.dysymtab_command_data.indirectsymoff = StartOfIndirectSymbols; 340 MLC.dysymtab_command_data.nindirectsyms = 341 O.IndirectSymTable.Symbols.size(); 342 } 343 344 updateDySymTab(MLC); 345 break; 346 } 347 case MachO::LC_DATA_IN_CODE: 348 MLC.linkedit_data_command_data.dataoff = StartOfDataInCode; 349 MLC.linkedit_data_command_data.datasize = O.DataInCode.Data.size(); 350 break; 351 case MachO::LC_LINKER_OPTIMIZATION_HINT: 352 MLC.linkedit_data_command_data.dataoff = StartOfLinkerOptimizationHint; 353 MLC.linkedit_data_command_data.datasize = 354 O.LinkerOptimizationHint.Data.size(); 355 break; 356 case MachO::LC_FUNCTION_STARTS: 357 MLC.linkedit_data_command_data.dataoff = StartOfFunctionStarts; 358 MLC.linkedit_data_command_data.datasize = O.FunctionStarts.Data.size(); 359 break; 360 case MachO::LC_DYLD_CHAINED_FIXUPS: 361 MLC.linkedit_data_command_data.dataoff = StartOfChainedFixups; 362 MLC.linkedit_data_command_data.datasize = O.ChainedFixups.Data.size(); 363 break; 364 case MachO::LC_DYLD_EXPORTS_TRIE: 365 MLC.linkedit_data_command_data.dataoff = StartOfDyldExportsTrie; 366 MLC.linkedit_data_command_data.datasize = O.ExportsTrie.Data.size(); 367 break; 368 case MachO::LC_DYLD_INFO: 369 case MachO::LC_DYLD_INFO_ONLY: 370 MLC.dyld_info_command_data.rebase_off = 371 O.Rebases.Opcodes.empty() ? 0 : StartOfRebaseInfo; 372 MLC.dyld_info_command_data.rebase_size = O.Rebases.Opcodes.size(); 373 MLC.dyld_info_command_data.bind_off = 374 O.Binds.Opcodes.empty() ? 0 : StartOfBindingInfo; 375 MLC.dyld_info_command_data.bind_size = O.Binds.Opcodes.size(); 376 MLC.dyld_info_command_data.weak_bind_off = 377 O.WeakBinds.Opcodes.empty() ? 0 : StartOfWeakBindingInfo; 378 MLC.dyld_info_command_data.weak_bind_size = O.WeakBinds.Opcodes.size(); 379 MLC.dyld_info_command_data.lazy_bind_off = 380 O.LazyBinds.Opcodes.empty() ? 0 : StartOfLazyBindingInfo; 381 MLC.dyld_info_command_data.lazy_bind_size = O.LazyBinds.Opcodes.size(); 382 MLC.dyld_info_command_data.export_off = 383 O.Exports.Trie.empty() ? 0 : StartOfExportTrie; 384 MLC.dyld_info_command_data.export_size = O.Exports.Trie.size(); 385 break; 386 // Note that LC_ENCRYPTION_INFO.cryptoff despite its name and the comment in 387 // <mach-o/loader.h> is not an offset in the binary file, instead, it is a 388 // relative virtual address. At the moment modification of the __TEXT 389 // segment of executables isn't supported anyway (e.g. data in code entries 390 // are not recalculated). Moreover, in general 391 // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 are nontrivial to update because 392 // without making additional assumptions (e.g. that the entire __TEXT 393 // segment should be encrypted) we do not know how to recalculate the 394 // boundaries of the encrypted part. For now just copy over these load 395 // commands until we encounter a real world usecase where 396 // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 need to be adjusted. 397 case MachO::LC_ENCRYPTION_INFO: 398 case MachO::LC_ENCRYPTION_INFO_64: 399 case MachO::LC_LOAD_DYLINKER: 400 case MachO::LC_MAIN: 401 case MachO::LC_RPATH: 402 case MachO::LC_SEGMENT: 403 case MachO::LC_SEGMENT_64: 404 case MachO::LC_VERSION_MIN_MACOSX: 405 case MachO::LC_VERSION_MIN_IPHONEOS: 406 case MachO::LC_VERSION_MIN_TVOS: 407 case MachO::LC_VERSION_MIN_WATCHOS: 408 case MachO::LC_BUILD_VERSION: 409 case MachO::LC_ID_DYLIB: 410 case MachO::LC_LOAD_DYLIB: 411 case MachO::LC_LOAD_WEAK_DYLIB: 412 case MachO::LC_UUID: 413 case MachO::LC_SOURCE_VERSION: 414 case MachO::LC_THREAD: 415 case MachO::LC_UNIXTHREAD: 416 case MachO::LC_SUB_FRAMEWORK: 417 case MachO::LC_SUB_UMBRELLA: 418 case MachO::LC_SUB_CLIENT: 419 case MachO::LC_SUB_LIBRARY: 420 case MachO::LC_LINKER_OPTION: 421 // Nothing to update. 422 break; 423 default: 424 // Abort if it's unsupported in order to prevent corrupting the object. 425 return createStringError(llvm::errc::not_supported, 426 "unsupported load command (cmd=0x%x)", cmd); 427 } 428 } 429 430 return Error::success(); 431 } 432 433 Error MachOLayoutBuilder::layout() { 434 O.Header.NCmds = O.LoadCommands.size(); 435 O.Header.SizeOfCmds = computeSizeOfCmds(); 436 constructStringTable(); 437 updateSymbolIndexes(); 438 uint64_t Offset = layoutSegments(); 439 Offset = layoutRelocations(Offset); 440 return layoutTail(Offset); 441 } 442