1 //===- MachOObject.cpp - Mach-O object file model ---------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "MachOObject.h" 10 #include "llvm/ADT/SmallPtrSet.h" 11 #include <unordered_set> 12 13 using namespace llvm; 14 using namespace llvm::objcopy::macho; 15 16 Section::Section(StringRef SegName, StringRef SectName) 17 : Segname(SegName), Sectname(SectName), 18 CanonicalName((Twine(SegName) + Twine(',') + SectName).str()) {} 19 20 Section::Section(StringRef SegName, StringRef SectName, StringRef Content) 21 : Segname(SegName), Sectname(SectName), 22 CanonicalName((Twine(SegName) + Twine(',') + SectName).str()), 23 Content(Content) {} 24 25 const SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) const { 26 assert(Index < Symbols.size() && "invalid symbol index"); 27 return Symbols[Index].get(); 28 } 29 30 SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) { 31 return const_cast<SymbolEntry *>( 32 static_cast<const SymbolTable *>(this)->getSymbolByIndex(Index)); 33 } 34 35 void SymbolTable::removeSymbols( 36 function_ref<bool(const std::unique_ptr<SymbolEntry> &)> ToRemove) { 37 llvm::erase_if(Symbols, ToRemove); 38 } 39 40 void Object::updateLoadCommandIndexes() { 41 static constexpr char TextSegmentName[] = "__TEXT"; 42 // Update indices of special load commands 43 for (size_t Index = 0, Size = LoadCommands.size(); Index < Size; ++Index) { 44 LoadCommand &LC = LoadCommands[Index]; 45 switch (LC.MachOLoadCommand.load_command_data.cmd) { 46 case MachO::LC_CODE_SIGNATURE: 47 CodeSignatureCommandIndex = Index; 48 break; 49 case MachO::LC_SEGMENT: 50 if (StringRef(LC.MachOLoadCommand.segment_command_data.segname) == 51 TextSegmentName) 52 TextSegmentCommandIndex = Index; 53 break; 54 case MachO::LC_SEGMENT_64: 55 if (StringRef(LC.MachOLoadCommand.segment_command_64_data.segname) == 56 TextSegmentName) 57 TextSegmentCommandIndex = Index; 58 break; 59 case MachO::LC_SYMTAB: 60 SymTabCommandIndex = Index; 61 break; 62 case MachO::LC_DYSYMTAB: 63 DySymTabCommandIndex = Index; 64 break; 65 case MachO::LC_DYLD_INFO: 66 case MachO::LC_DYLD_INFO_ONLY: 67 DyLdInfoCommandIndex = Index; 68 break; 69 case MachO::LC_DATA_IN_CODE: 70 DataInCodeCommandIndex = Index; 71 break; 72 case MachO::LC_LINKER_OPTIMIZATION_HINT: 73 LinkerOptimizationHintCommandIndex = Index; 74 break; 75 case MachO::LC_FUNCTION_STARTS: 76 FunctionStartsCommandIndex = Index; 77 break; 78 case MachO::LC_DYLIB_CODE_SIGN_DRS: 79 DylibCodeSignDRsIndex = Index; 80 break; 81 case MachO::LC_DYLD_CHAINED_FIXUPS: 82 ChainedFixupsCommandIndex = Index; 83 break; 84 case MachO::LC_DYLD_EXPORTS_TRIE: 85 ExportsTrieCommandIndex = Index; 86 break; 87 } 88 } 89 } 90 91 Error Object::removeLoadCommands( 92 function_ref<bool(const LoadCommand &)> ToRemove) { 93 auto It = std::stable_partition( 94 LoadCommands.begin(), LoadCommands.end(), 95 [&](const LoadCommand &LC) { return !ToRemove(LC); }); 96 LoadCommands.erase(It, LoadCommands.end()); 97 98 updateLoadCommandIndexes(); 99 return Error::success(); 100 } 101 102 Error Object::removeSections( 103 function_ref<bool(const std::unique_ptr<Section> &)> ToRemove) { 104 DenseMap<uint32_t, const Section *> OldIndexToSection; 105 uint32_t NextSectionIndex = 1; 106 for (LoadCommand &LC : LoadCommands) { 107 auto It = std::stable_partition( 108 std::begin(LC.Sections), std::end(LC.Sections), 109 [&](const std::unique_ptr<Section> &Sec) { return !ToRemove(Sec); }); 110 for (auto I = LC.Sections.begin(), End = It; I != End; ++I) { 111 OldIndexToSection[(*I)->Index] = I->get(); 112 (*I)->Index = NextSectionIndex++; 113 } 114 LC.Sections.erase(It, LC.Sections.end()); 115 } 116 117 auto IsDead = [&](const std::unique_ptr<SymbolEntry> &S) -> bool { 118 std::optional<uint32_t> Section = S->section(); 119 return (Section && !OldIndexToSection.count(*Section)); 120 }; 121 122 SmallPtrSet<const SymbolEntry *, 2> DeadSymbols; 123 for (const std::unique_ptr<SymbolEntry> &Sym : SymTable.Symbols) 124 if (IsDead(Sym)) 125 DeadSymbols.insert(Sym.get()); 126 127 for (const LoadCommand &LC : LoadCommands) 128 for (const std::unique_ptr<Section> &Sec : LC.Sections) 129 for (const RelocationInfo &R : Sec->Relocations) 130 if (R.Symbol && *R.Symbol && DeadSymbols.count(*R.Symbol)) 131 return createStringError(std::errc::invalid_argument, 132 "symbol '%s' defined in section with index " 133 "'%u' cannot be removed because it is " 134 "referenced by a relocation in section '%s'", 135 (*R.Symbol)->Name.c_str(), 136 *((*R.Symbol)->section()), 137 Sec->CanonicalName.c_str()); 138 SymTable.removeSymbols(IsDead); 139 for (std::unique_ptr<SymbolEntry> &S : SymTable.Symbols) 140 if (S->section()) 141 S->n_sect = OldIndexToSection[S->n_sect]->Index; 142 return Error::success(); 143 } 144 145 uint64_t Object::nextAvailableSegmentAddress() const { 146 uint64_t HeaderSize = 147 is64Bit() ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); 148 uint64_t Addr = HeaderSize + Header.SizeOfCmds; 149 for (const LoadCommand &LC : LoadCommands) { 150 const MachO::macho_load_command &MLC = LC.MachOLoadCommand; 151 switch (MLC.load_command_data.cmd) { 152 case MachO::LC_SEGMENT: 153 Addr = std::max(Addr, 154 static_cast<uint64_t>(MLC.segment_command_data.vmaddr) + 155 MLC.segment_command_data.vmsize); 156 break; 157 case MachO::LC_SEGMENT_64: 158 Addr = std::max(Addr, MLC.segment_command_64_data.vmaddr + 159 MLC.segment_command_64_data.vmsize); 160 break; 161 default: 162 continue; 163 } 164 } 165 return Addr; 166 } 167 168 template <typename SegmentType> 169 static void 170 constructSegment(SegmentType &Seg, llvm::MachO::LoadCommandType CmdType, 171 StringRef SegName, uint64_t SegVMAddr, uint64_t SegVMSize) { 172 assert(SegName.size() <= sizeof(Seg.segname) && "too long segment name"); 173 memset(&Seg, 0, sizeof(SegmentType)); 174 Seg.cmd = CmdType; 175 strncpy(Seg.segname, SegName.data(), SegName.size()); 176 Seg.maxprot |= 177 (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE); 178 Seg.initprot |= 179 (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE); 180 Seg.vmaddr = SegVMAddr; 181 Seg.vmsize = SegVMSize; 182 } 183 184 LoadCommand &Object::addSegment(StringRef SegName, uint64_t SegVMSize) { 185 LoadCommand LC; 186 const uint64_t SegVMAddr = nextAvailableSegmentAddress(); 187 if (is64Bit()) 188 constructSegment(LC.MachOLoadCommand.segment_command_64_data, 189 MachO::LC_SEGMENT_64, SegName, SegVMAddr, SegVMSize); 190 else 191 constructSegment(LC.MachOLoadCommand.segment_command_data, 192 MachO::LC_SEGMENT, SegName, SegVMAddr, SegVMSize); 193 194 LoadCommands.push_back(std::move(LC)); 195 return LoadCommands.back(); 196 } 197 198 /// Extracts a segment name from a string which is possibly non-null-terminated. 199 static StringRef extractSegmentName(const char *SegName) { 200 return StringRef(SegName, 201 strnlen(SegName, sizeof(MachO::segment_command::segname))); 202 } 203 204 std::optional<StringRef> LoadCommand::getSegmentName() const { 205 const MachO::macho_load_command &MLC = MachOLoadCommand; 206 switch (MLC.load_command_data.cmd) { 207 case MachO::LC_SEGMENT: 208 return extractSegmentName(MLC.segment_command_data.segname); 209 case MachO::LC_SEGMENT_64: 210 return extractSegmentName(MLC.segment_command_64_data.segname); 211 default: 212 return std::nullopt; 213 } 214 } 215 216 std::optional<uint64_t> LoadCommand::getSegmentVMAddr() const { 217 const MachO::macho_load_command &MLC = MachOLoadCommand; 218 switch (MLC.load_command_data.cmd) { 219 case MachO::LC_SEGMENT: 220 return MLC.segment_command_data.vmaddr; 221 case MachO::LC_SEGMENT_64: 222 return MLC.segment_command_64_data.vmaddr; 223 default: 224 return std::nullopt; 225 } 226 } 227