1 //===- MachOObject.cpp - Mach-O object file model ---------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "MachOObject.h" 10 #include "llvm/ADT/SmallPtrSet.h" 11 #include "llvm/Support/SystemZ/zOSSupport.h" 12 #include <unordered_set> 13 14 using namespace llvm; 15 using namespace llvm::objcopy::macho; 16 17 Section::Section(StringRef SegName, StringRef SectName) 18 : Segname(SegName), Sectname(SectName), 19 CanonicalName((Twine(SegName) + Twine(',') + SectName).str()) {} 20 21 Section::Section(StringRef SegName, StringRef SectName, StringRef Content) 22 : Segname(SegName), Sectname(SectName), 23 CanonicalName((Twine(SegName) + Twine(',') + SectName).str()), 24 Content(Content) {} 25 26 const SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) const { 27 assert(Index < Symbols.size() && "invalid symbol index"); 28 return Symbols[Index].get(); 29 } 30 31 SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) { 32 return const_cast<SymbolEntry *>( 33 static_cast<const SymbolTable *>(this)->getSymbolByIndex(Index)); 34 } 35 36 void SymbolTable::removeSymbols( 37 function_ref<bool(const std::unique_ptr<SymbolEntry> &)> ToRemove) { 38 llvm::erase_if(Symbols, ToRemove); 39 } 40 41 void Object::updateLoadCommandIndexes() { 42 static constexpr char TextSegmentName[] = "__TEXT"; 43 // Update indices of special load commands 44 for (size_t Index = 0, Size = LoadCommands.size(); Index < Size; ++Index) { 45 LoadCommand &LC = LoadCommands[Index]; 46 switch (LC.MachOLoadCommand.load_command_data.cmd) { 47 case MachO::LC_CODE_SIGNATURE: 48 CodeSignatureCommandIndex = Index; 49 break; 50 case MachO::LC_SEGMENT: 51 if (StringRef(LC.MachOLoadCommand.segment_command_data.segname) == 52 TextSegmentName) 53 TextSegmentCommandIndex = Index; 54 break; 55 case MachO::LC_SEGMENT_64: 56 if (StringRef(LC.MachOLoadCommand.segment_command_64_data.segname) == 57 TextSegmentName) 58 TextSegmentCommandIndex = Index; 59 break; 60 case MachO::LC_SYMTAB: 61 SymTabCommandIndex = Index; 62 break; 63 case MachO::LC_DYSYMTAB: 64 DySymTabCommandIndex = Index; 65 break; 66 case MachO::LC_DYLD_INFO: 67 case MachO::LC_DYLD_INFO_ONLY: 68 DyLdInfoCommandIndex = Index; 69 break; 70 case MachO::LC_DATA_IN_CODE: 71 DataInCodeCommandIndex = Index; 72 break; 73 case MachO::LC_LINKER_OPTIMIZATION_HINT: 74 LinkerOptimizationHintCommandIndex = Index; 75 break; 76 case MachO::LC_FUNCTION_STARTS: 77 FunctionStartsCommandIndex = Index; 78 break; 79 case MachO::LC_DYLIB_CODE_SIGN_DRS: 80 DylibCodeSignDRsIndex = Index; 81 break; 82 case MachO::LC_DYLD_CHAINED_FIXUPS: 83 ChainedFixupsCommandIndex = Index; 84 break; 85 case MachO::LC_DYLD_EXPORTS_TRIE: 86 ExportsTrieCommandIndex = Index; 87 break; 88 } 89 } 90 } 91 92 Error Object::removeLoadCommands( 93 function_ref<bool(const LoadCommand &)> ToRemove) { 94 auto It = std::stable_partition( 95 LoadCommands.begin(), LoadCommands.end(), 96 [&](const LoadCommand &LC) { return !ToRemove(LC); }); 97 LoadCommands.erase(It, LoadCommands.end()); 98 99 updateLoadCommandIndexes(); 100 return Error::success(); 101 } 102 103 Error Object::removeSections( 104 function_ref<bool(const std::unique_ptr<Section> &)> ToRemove) { 105 DenseMap<uint32_t, const Section *> OldIndexToSection; 106 uint32_t NextSectionIndex = 1; 107 for (LoadCommand &LC : LoadCommands) { 108 auto It = std::stable_partition( 109 std::begin(LC.Sections), std::end(LC.Sections), 110 [&](const std::unique_ptr<Section> &Sec) { return !ToRemove(Sec); }); 111 for (auto I = LC.Sections.begin(), End = It; I != End; ++I) { 112 OldIndexToSection[(*I)->Index] = I->get(); 113 (*I)->Index = NextSectionIndex++; 114 } 115 LC.Sections.erase(It, LC.Sections.end()); 116 } 117 118 auto IsDead = [&](const std::unique_ptr<SymbolEntry> &S) -> bool { 119 std::optional<uint32_t> Section = S->section(); 120 return (Section && !OldIndexToSection.count(*Section)); 121 }; 122 123 SmallPtrSet<const SymbolEntry *, 2> DeadSymbols; 124 for (const std::unique_ptr<SymbolEntry> &Sym : SymTable.Symbols) 125 if (IsDead(Sym)) 126 DeadSymbols.insert(Sym.get()); 127 128 for (const LoadCommand &LC : LoadCommands) 129 for (const std::unique_ptr<Section> &Sec : LC.Sections) 130 for (const RelocationInfo &R : Sec->Relocations) 131 if (R.Symbol && *R.Symbol && DeadSymbols.count(*R.Symbol)) 132 return createStringError(std::errc::invalid_argument, 133 "symbol '%s' defined in section with index " 134 "'%u' cannot be removed because it is " 135 "referenced by a relocation in section '%s'", 136 (*R.Symbol)->Name.c_str(), 137 *((*R.Symbol)->section()), 138 Sec->CanonicalName.c_str()); 139 SymTable.removeSymbols(IsDead); 140 for (std::unique_ptr<SymbolEntry> &S : SymTable.Symbols) 141 if (S->section()) 142 S->n_sect = OldIndexToSection[S->n_sect]->Index; 143 return Error::success(); 144 } 145 146 uint64_t Object::nextAvailableSegmentAddress() const { 147 uint64_t HeaderSize = 148 is64Bit() ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); 149 uint64_t Addr = HeaderSize + Header.SizeOfCmds; 150 for (const LoadCommand &LC : LoadCommands) { 151 const MachO::macho_load_command &MLC = LC.MachOLoadCommand; 152 switch (MLC.load_command_data.cmd) { 153 case MachO::LC_SEGMENT: 154 Addr = std::max(Addr, 155 static_cast<uint64_t>(MLC.segment_command_data.vmaddr) + 156 MLC.segment_command_data.vmsize); 157 break; 158 case MachO::LC_SEGMENT_64: 159 Addr = std::max(Addr, MLC.segment_command_64_data.vmaddr + 160 MLC.segment_command_64_data.vmsize); 161 break; 162 default: 163 continue; 164 } 165 } 166 return Addr; 167 } 168 169 template <typename SegmentType> 170 static void 171 constructSegment(SegmentType &Seg, llvm::MachO::LoadCommandType CmdType, 172 StringRef SegName, uint64_t SegVMAddr, uint64_t SegVMSize) { 173 assert(SegName.size() <= sizeof(Seg.segname) && "too long segment name"); 174 memset(&Seg, 0, sizeof(SegmentType)); 175 Seg.cmd = CmdType; 176 strncpy(Seg.segname, SegName.data(), SegName.size()); 177 Seg.maxprot |= 178 (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE); 179 Seg.initprot |= 180 (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE); 181 Seg.vmaddr = SegVMAddr; 182 Seg.vmsize = SegVMSize; 183 } 184 185 LoadCommand &Object::addSegment(StringRef SegName, uint64_t SegVMSize) { 186 LoadCommand LC; 187 const uint64_t SegVMAddr = nextAvailableSegmentAddress(); 188 if (is64Bit()) 189 constructSegment(LC.MachOLoadCommand.segment_command_64_data, 190 MachO::LC_SEGMENT_64, SegName, SegVMAddr, SegVMSize); 191 else 192 constructSegment(LC.MachOLoadCommand.segment_command_data, 193 MachO::LC_SEGMENT, SegName, SegVMAddr, SegVMSize); 194 195 LoadCommands.push_back(std::move(LC)); 196 return LoadCommands.back(); 197 } 198 199 /// Extracts a segment name from a string which is possibly non-null-terminated. 200 static StringRef extractSegmentName(const char *SegName) { 201 return StringRef(SegName, 202 strnlen(SegName, sizeof(MachO::segment_command::segname))); 203 } 204 205 std::optional<StringRef> LoadCommand::getSegmentName() const { 206 const MachO::macho_load_command &MLC = MachOLoadCommand; 207 switch (MLC.load_command_data.cmd) { 208 case MachO::LC_SEGMENT: 209 return extractSegmentName(MLC.segment_command_data.segname); 210 case MachO::LC_SEGMENT_64: 211 return extractSegmentName(MLC.segment_command_64_data.segname); 212 default: 213 return std::nullopt; 214 } 215 } 216 217 std::optional<uint64_t> LoadCommand::getSegmentVMAddr() const { 218 const MachO::macho_load_command &MLC = MachOLoadCommand; 219 switch (MLC.load_command_data.cmd) { 220 case MachO::LC_SEGMENT: 221 return MLC.segment_command_data.vmaddr; 222 case MachO::LC_SEGMENT_64: 223 return MLC.segment_command_64_data.vmaddr; 224 default: 225 return std::nullopt; 226 } 227 } 228