181ad6265SDimitry Andric //===- MachOObject.cpp - Mach-O object file model ---------------*- C++ -*-===// 281ad6265SDimitry Andric // 381ad6265SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 481ad6265SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 581ad6265SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 681ad6265SDimitry Andric // 781ad6265SDimitry Andric //===----------------------------------------------------------------------===// 881ad6265SDimitry Andric 981ad6265SDimitry Andric #include "MachOObject.h" 1081ad6265SDimitry Andric #include "llvm/ADT/SmallPtrSet.h" 11*5f757f3fSDimitry Andric #include "llvm/Support/SystemZ/zOSSupport.h" 1281ad6265SDimitry Andric #include <unordered_set> 1381ad6265SDimitry Andric 1481ad6265SDimitry Andric using namespace llvm; 1581ad6265SDimitry Andric using namespace llvm::objcopy::macho; 1681ad6265SDimitry Andric 17bdd1243dSDimitry Andric Section::Section(StringRef SegName, StringRef SectName) 18bdd1243dSDimitry Andric : Segname(SegName), Sectname(SectName), 19bdd1243dSDimitry Andric CanonicalName((Twine(SegName) + Twine(',') + SectName).str()) {} 20bdd1243dSDimitry Andric 21bdd1243dSDimitry Andric Section::Section(StringRef SegName, StringRef SectName, StringRef Content) 22bdd1243dSDimitry Andric : Segname(SegName), Sectname(SectName), 23bdd1243dSDimitry Andric CanonicalName((Twine(SegName) + Twine(',') + SectName).str()), 24bdd1243dSDimitry Andric Content(Content) {} 25bdd1243dSDimitry Andric 2681ad6265SDimitry Andric const SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) const { 2781ad6265SDimitry Andric assert(Index < Symbols.size() && "invalid symbol index"); 2881ad6265SDimitry Andric return Symbols[Index].get(); 2981ad6265SDimitry Andric } 3081ad6265SDimitry Andric 3181ad6265SDimitry Andric SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) { 3281ad6265SDimitry Andric return const_cast<SymbolEntry *>( 3381ad6265SDimitry Andric static_cast<const SymbolTable *>(this)->getSymbolByIndex(Index)); 3481ad6265SDimitry Andric } 3581ad6265SDimitry Andric 3681ad6265SDimitry Andric void SymbolTable::removeSymbols( 3781ad6265SDimitry Andric function_ref<bool(const std::unique_ptr<SymbolEntry> &)> ToRemove) { 3881ad6265SDimitry Andric llvm::erase_if(Symbols, ToRemove); 3981ad6265SDimitry Andric } 4081ad6265SDimitry Andric 4181ad6265SDimitry Andric void Object::updateLoadCommandIndexes() { 4281ad6265SDimitry Andric static constexpr char TextSegmentName[] = "__TEXT"; 4381ad6265SDimitry Andric // Update indices of special load commands 4481ad6265SDimitry Andric for (size_t Index = 0, Size = LoadCommands.size(); Index < Size; ++Index) { 4581ad6265SDimitry Andric LoadCommand &LC = LoadCommands[Index]; 4681ad6265SDimitry Andric switch (LC.MachOLoadCommand.load_command_data.cmd) { 4781ad6265SDimitry Andric case MachO::LC_CODE_SIGNATURE: 4881ad6265SDimitry Andric CodeSignatureCommandIndex = Index; 4981ad6265SDimitry Andric break; 5081ad6265SDimitry Andric case MachO::LC_SEGMENT: 5181ad6265SDimitry Andric if (StringRef(LC.MachOLoadCommand.segment_command_data.segname) == 5281ad6265SDimitry Andric TextSegmentName) 5381ad6265SDimitry Andric TextSegmentCommandIndex = Index; 5481ad6265SDimitry Andric break; 5581ad6265SDimitry Andric case MachO::LC_SEGMENT_64: 5681ad6265SDimitry Andric if (StringRef(LC.MachOLoadCommand.segment_command_64_data.segname) == 5781ad6265SDimitry Andric TextSegmentName) 5881ad6265SDimitry Andric TextSegmentCommandIndex = Index; 5981ad6265SDimitry Andric break; 6081ad6265SDimitry Andric case MachO::LC_SYMTAB: 6181ad6265SDimitry Andric SymTabCommandIndex = Index; 6281ad6265SDimitry Andric break; 6381ad6265SDimitry Andric case MachO::LC_DYSYMTAB: 6481ad6265SDimitry Andric DySymTabCommandIndex = Index; 6581ad6265SDimitry Andric break; 6681ad6265SDimitry Andric case MachO::LC_DYLD_INFO: 6781ad6265SDimitry Andric case MachO::LC_DYLD_INFO_ONLY: 6881ad6265SDimitry Andric DyLdInfoCommandIndex = Index; 6981ad6265SDimitry Andric break; 7081ad6265SDimitry Andric case MachO::LC_DATA_IN_CODE: 7181ad6265SDimitry Andric DataInCodeCommandIndex = Index; 7281ad6265SDimitry Andric break; 7381ad6265SDimitry Andric case MachO::LC_LINKER_OPTIMIZATION_HINT: 7481ad6265SDimitry Andric LinkerOptimizationHintCommandIndex = Index; 7581ad6265SDimitry Andric break; 7681ad6265SDimitry Andric case MachO::LC_FUNCTION_STARTS: 7781ad6265SDimitry Andric FunctionStartsCommandIndex = Index; 7881ad6265SDimitry Andric break; 79bdd1243dSDimitry Andric case MachO::LC_DYLIB_CODE_SIGN_DRS: 80bdd1243dSDimitry Andric DylibCodeSignDRsIndex = Index; 81bdd1243dSDimitry Andric break; 8281ad6265SDimitry Andric case MachO::LC_DYLD_CHAINED_FIXUPS: 8381ad6265SDimitry Andric ChainedFixupsCommandIndex = Index; 8481ad6265SDimitry Andric break; 8581ad6265SDimitry Andric case MachO::LC_DYLD_EXPORTS_TRIE: 8681ad6265SDimitry Andric ExportsTrieCommandIndex = Index; 8781ad6265SDimitry Andric break; 8881ad6265SDimitry Andric } 8981ad6265SDimitry Andric } 9081ad6265SDimitry Andric } 9181ad6265SDimitry Andric 9281ad6265SDimitry Andric Error Object::removeLoadCommands( 9381ad6265SDimitry Andric function_ref<bool(const LoadCommand &)> ToRemove) { 9481ad6265SDimitry Andric auto It = std::stable_partition( 9581ad6265SDimitry Andric LoadCommands.begin(), LoadCommands.end(), 9681ad6265SDimitry Andric [&](const LoadCommand &LC) { return !ToRemove(LC); }); 9781ad6265SDimitry Andric LoadCommands.erase(It, LoadCommands.end()); 9881ad6265SDimitry Andric 9981ad6265SDimitry Andric updateLoadCommandIndexes(); 10081ad6265SDimitry Andric return Error::success(); 10181ad6265SDimitry Andric } 10281ad6265SDimitry Andric 10381ad6265SDimitry Andric Error Object::removeSections( 10481ad6265SDimitry Andric function_ref<bool(const std::unique_ptr<Section> &)> ToRemove) { 10581ad6265SDimitry Andric DenseMap<uint32_t, const Section *> OldIndexToSection; 10681ad6265SDimitry Andric uint32_t NextSectionIndex = 1; 10781ad6265SDimitry Andric for (LoadCommand &LC : LoadCommands) { 10881ad6265SDimitry Andric auto It = std::stable_partition( 10981ad6265SDimitry Andric std::begin(LC.Sections), std::end(LC.Sections), 11081ad6265SDimitry Andric [&](const std::unique_ptr<Section> &Sec) { return !ToRemove(Sec); }); 11181ad6265SDimitry Andric for (auto I = LC.Sections.begin(), End = It; I != End; ++I) { 11281ad6265SDimitry Andric OldIndexToSection[(*I)->Index] = I->get(); 11381ad6265SDimitry Andric (*I)->Index = NextSectionIndex++; 11481ad6265SDimitry Andric } 11581ad6265SDimitry Andric LC.Sections.erase(It, LC.Sections.end()); 11681ad6265SDimitry Andric } 11781ad6265SDimitry Andric 11881ad6265SDimitry Andric auto IsDead = [&](const std::unique_ptr<SymbolEntry> &S) -> bool { 119bdd1243dSDimitry Andric std::optional<uint32_t> Section = S->section(); 12081ad6265SDimitry Andric return (Section && !OldIndexToSection.count(*Section)); 12181ad6265SDimitry Andric }; 12281ad6265SDimitry Andric 12381ad6265SDimitry Andric SmallPtrSet<const SymbolEntry *, 2> DeadSymbols; 12481ad6265SDimitry Andric for (const std::unique_ptr<SymbolEntry> &Sym : SymTable.Symbols) 12581ad6265SDimitry Andric if (IsDead(Sym)) 12681ad6265SDimitry Andric DeadSymbols.insert(Sym.get()); 12781ad6265SDimitry Andric 12881ad6265SDimitry Andric for (const LoadCommand &LC : LoadCommands) 12981ad6265SDimitry Andric for (const std::unique_ptr<Section> &Sec : LC.Sections) 13081ad6265SDimitry Andric for (const RelocationInfo &R : Sec->Relocations) 13181ad6265SDimitry Andric if (R.Symbol && *R.Symbol && DeadSymbols.count(*R.Symbol)) 13281ad6265SDimitry Andric return createStringError(std::errc::invalid_argument, 13381ad6265SDimitry Andric "symbol '%s' defined in section with index " 13481ad6265SDimitry Andric "'%u' cannot be removed because it is " 13581ad6265SDimitry Andric "referenced by a relocation in section '%s'", 13681ad6265SDimitry Andric (*R.Symbol)->Name.c_str(), 13781ad6265SDimitry Andric *((*R.Symbol)->section()), 13881ad6265SDimitry Andric Sec->CanonicalName.c_str()); 13981ad6265SDimitry Andric SymTable.removeSymbols(IsDead); 14081ad6265SDimitry Andric for (std::unique_ptr<SymbolEntry> &S : SymTable.Symbols) 14181ad6265SDimitry Andric if (S->section()) 14281ad6265SDimitry Andric S->n_sect = OldIndexToSection[S->n_sect]->Index; 14381ad6265SDimitry Andric return Error::success(); 14481ad6265SDimitry Andric } 14581ad6265SDimitry Andric 14681ad6265SDimitry Andric uint64_t Object::nextAvailableSegmentAddress() const { 14781ad6265SDimitry Andric uint64_t HeaderSize = 14881ad6265SDimitry Andric is64Bit() ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); 14981ad6265SDimitry Andric uint64_t Addr = HeaderSize + Header.SizeOfCmds; 15081ad6265SDimitry Andric for (const LoadCommand &LC : LoadCommands) { 15181ad6265SDimitry Andric const MachO::macho_load_command &MLC = LC.MachOLoadCommand; 15281ad6265SDimitry Andric switch (MLC.load_command_data.cmd) { 15381ad6265SDimitry Andric case MachO::LC_SEGMENT: 15481ad6265SDimitry Andric Addr = std::max(Addr, 15581ad6265SDimitry Andric static_cast<uint64_t>(MLC.segment_command_data.vmaddr) + 15681ad6265SDimitry Andric MLC.segment_command_data.vmsize); 15781ad6265SDimitry Andric break; 15881ad6265SDimitry Andric case MachO::LC_SEGMENT_64: 15981ad6265SDimitry Andric Addr = std::max(Addr, MLC.segment_command_64_data.vmaddr + 16081ad6265SDimitry Andric MLC.segment_command_64_data.vmsize); 16181ad6265SDimitry Andric break; 16281ad6265SDimitry Andric default: 16381ad6265SDimitry Andric continue; 16481ad6265SDimitry Andric } 16581ad6265SDimitry Andric } 16681ad6265SDimitry Andric return Addr; 16781ad6265SDimitry Andric } 16881ad6265SDimitry Andric 16981ad6265SDimitry Andric template <typename SegmentType> 17081ad6265SDimitry Andric static void 17181ad6265SDimitry Andric constructSegment(SegmentType &Seg, llvm::MachO::LoadCommandType CmdType, 17281ad6265SDimitry Andric StringRef SegName, uint64_t SegVMAddr, uint64_t SegVMSize) { 17381ad6265SDimitry Andric assert(SegName.size() <= sizeof(Seg.segname) && "too long segment name"); 17481ad6265SDimitry Andric memset(&Seg, 0, sizeof(SegmentType)); 17581ad6265SDimitry Andric Seg.cmd = CmdType; 17681ad6265SDimitry Andric strncpy(Seg.segname, SegName.data(), SegName.size()); 17781ad6265SDimitry Andric Seg.maxprot |= 17881ad6265SDimitry Andric (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE); 17981ad6265SDimitry Andric Seg.initprot |= 18081ad6265SDimitry Andric (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE); 18181ad6265SDimitry Andric Seg.vmaddr = SegVMAddr; 18281ad6265SDimitry Andric Seg.vmsize = SegVMSize; 18381ad6265SDimitry Andric } 18481ad6265SDimitry Andric 18581ad6265SDimitry Andric LoadCommand &Object::addSegment(StringRef SegName, uint64_t SegVMSize) { 18681ad6265SDimitry Andric LoadCommand LC; 18781ad6265SDimitry Andric const uint64_t SegVMAddr = nextAvailableSegmentAddress(); 18881ad6265SDimitry Andric if (is64Bit()) 18981ad6265SDimitry Andric constructSegment(LC.MachOLoadCommand.segment_command_64_data, 19081ad6265SDimitry Andric MachO::LC_SEGMENT_64, SegName, SegVMAddr, SegVMSize); 19181ad6265SDimitry Andric else 19281ad6265SDimitry Andric constructSegment(LC.MachOLoadCommand.segment_command_data, 19381ad6265SDimitry Andric MachO::LC_SEGMENT, SegName, SegVMAddr, SegVMSize); 19481ad6265SDimitry Andric 19581ad6265SDimitry Andric LoadCommands.push_back(std::move(LC)); 19681ad6265SDimitry Andric return LoadCommands.back(); 19781ad6265SDimitry Andric } 19881ad6265SDimitry Andric 19981ad6265SDimitry Andric /// Extracts a segment name from a string which is possibly non-null-terminated. 20081ad6265SDimitry Andric static StringRef extractSegmentName(const char *SegName) { 20181ad6265SDimitry Andric return StringRef(SegName, 20281ad6265SDimitry Andric strnlen(SegName, sizeof(MachO::segment_command::segname))); 20381ad6265SDimitry Andric } 20481ad6265SDimitry Andric 205bdd1243dSDimitry Andric std::optional<StringRef> LoadCommand::getSegmentName() const { 20681ad6265SDimitry Andric const MachO::macho_load_command &MLC = MachOLoadCommand; 20781ad6265SDimitry Andric switch (MLC.load_command_data.cmd) { 20881ad6265SDimitry Andric case MachO::LC_SEGMENT: 20981ad6265SDimitry Andric return extractSegmentName(MLC.segment_command_data.segname); 21081ad6265SDimitry Andric case MachO::LC_SEGMENT_64: 21181ad6265SDimitry Andric return extractSegmentName(MLC.segment_command_64_data.segname); 21281ad6265SDimitry Andric default: 213bdd1243dSDimitry Andric return std::nullopt; 21481ad6265SDimitry Andric } 21581ad6265SDimitry Andric } 21681ad6265SDimitry Andric 217bdd1243dSDimitry Andric std::optional<uint64_t> LoadCommand::getSegmentVMAddr() const { 21881ad6265SDimitry Andric const MachO::macho_load_command &MLC = MachOLoadCommand; 21981ad6265SDimitry Andric switch (MLC.load_command_data.cmd) { 22081ad6265SDimitry Andric case MachO::LC_SEGMENT: 22181ad6265SDimitry Andric return MLC.segment_command_data.vmaddr; 22281ad6265SDimitry Andric case MachO::LC_SEGMENT_64: 22381ad6265SDimitry Andric return MLC.segment_command_64_data.vmaddr; 22481ad6265SDimitry Andric default: 225bdd1243dSDimitry Andric return std::nullopt; 22681ad6265SDimitry Andric } 22781ad6265SDimitry Andric } 228