1 //===- MachOObject.cpp - Mach-O object file model ---------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "MachOObject.h" 10 #include "llvm/ADT/SmallPtrSet.h" 11 #include "llvm/Support/SystemZ/zOSSupport.h" 12 #include <unordered_set> 13 14 using namespace llvm; 15 using namespace llvm::objcopy::macho; 16 17 Section::Section(StringRef SegName, StringRef SectName) 18 : Segname(SegName), Sectname(SectName), 19 CanonicalName((Twine(SegName) + Twine(',') + SectName).str()) {} 20 21 Section::Section(StringRef SegName, StringRef SectName, StringRef Content) 22 : Segname(SegName), Sectname(SectName), 23 CanonicalName((Twine(SegName) + Twine(',') + SectName).str()), 24 Content(Content) {} 25 26 const SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) const { 27 assert(Index < Symbols.size() && "invalid symbol index"); 28 return Symbols[Index].get(); 29 } 30 31 SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) { 32 return const_cast<SymbolEntry *>( 33 static_cast<const SymbolTable *>(this)->getSymbolByIndex(Index)); 34 } 35 36 void SymbolTable::updateSymbols(function_ref<void(SymbolEntry &)> Callable) { 37 for (auto &Sym : Symbols) 38 Callable(*Sym); 39 40 // Partition symbols: local < defined external < undefined external. 41 auto ExternalBegin = std::stable_partition( 42 std::begin(Symbols), std::end(Symbols), 43 [](const auto &Sym) { return Sym->isLocalSymbol(); }); 44 std::stable_partition(ExternalBegin, std::end(Symbols), [](const auto &Sym) { 45 return !Sym->isUndefinedSymbol(); 46 }); 47 } 48 49 void SymbolTable::removeSymbols( 50 function_ref<bool(const std::unique_ptr<SymbolEntry> &)> ToRemove) { 51 llvm::erase_if(Symbols, ToRemove); 52 } 53 54 void Object::updateLoadCommandIndexes() { 55 static constexpr char TextSegmentName[] = "__TEXT"; 56 // Update indices of special load commands 57 for (size_t Index = 0, Size = LoadCommands.size(); Index < Size; ++Index) { 58 LoadCommand &LC = LoadCommands[Index]; 59 switch (LC.MachOLoadCommand.load_command_data.cmd) { 60 case MachO::LC_CODE_SIGNATURE: 61 CodeSignatureCommandIndex = Index; 62 break; 63 case MachO::LC_SEGMENT: 64 if (StringRef(LC.MachOLoadCommand.segment_command_data.segname) == 65 TextSegmentName) 66 TextSegmentCommandIndex = Index; 67 break; 68 case MachO::LC_SEGMENT_64: 69 if (StringRef(LC.MachOLoadCommand.segment_command_64_data.segname) == 70 TextSegmentName) 71 TextSegmentCommandIndex = Index; 72 break; 73 case MachO::LC_SYMTAB: 74 SymTabCommandIndex = Index; 75 break; 76 case MachO::LC_DYSYMTAB: 77 DySymTabCommandIndex = Index; 78 break; 79 case MachO::LC_DYLD_INFO: 80 case MachO::LC_DYLD_INFO_ONLY: 81 DyLdInfoCommandIndex = Index; 82 break; 83 case MachO::LC_DATA_IN_CODE: 84 DataInCodeCommandIndex = Index; 85 break; 86 case MachO::LC_LINKER_OPTIMIZATION_HINT: 87 LinkerOptimizationHintCommandIndex = Index; 88 break; 89 case MachO::LC_FUNCTION_STARTS: 90 FunctionStartsCommandIndex = Index; 91 break; 92 case MachO::LC_DYLIB_CODE_SIGN_DRS: 93 DylibCodeSignDRsIndex = Index; 94 break; 95 case MachO::LC_DYLD_CHAINED_FIXUPS: 96 ChainedFixupsCommandIndex = Index; 97 break; 98 case MachO::LC_DYLD_EXPORTS_TRIE: 99 ExportsTrieCommandIndex = Index; 100 break; 101 } 102 } 103 } 104 105 Error Object::removeLoadCommands( 106 function_ref<bool(const LoadCommand &)> ToRemove) { 107 auto It = std::stable_partition( 108 LoadCommands.begin(), LoadCommands.end(), 109 [&](const LoadCommand &LC) { return !ToRemove(LC); }); 110 LoadCommands.erase(It, LoadCommands.end()); 111 112 updateLoadCommandIndexes(); 113 return Error::success(); 114 } 115 116 Error Object::removeSections( 117 function_ref<bool(const std::unique_ptr<Section> &)> ToRemove) { 118 DenseMap<uint32_t, const Section *> OldIndexToSection; 119 uint32_t NextSectionIndex = 1; 120 for (LoadCommand &LC : LoadCommands) { 121 auto It = std::stable_partition( 122 std::begin(LC.Sections), std::end(LC.Sections), 123 [&](const std::unique_ptr<Section> &Sec) { return !ToRemove(Sec); }); 124 for (auto I = LC.Sections.begin(), End = It; I != End; ++I) { 125 OldIndexToSection[(*I)->Index] = I->get(); 126 (*I)->Index = NextSectionIndex++; 127 } 128 LC.Sections.erase(It, LC.Sections.end()); 129 } 130 131 auto IsDead = [&](const std::unique_ptr<SymbolEntry> &S) -> bool { 132 std::optional<uint32_t> Section = S->section(); 133 return (Section && !OldIndexToSection.count(*Section)); 134 }; 135 136 SmallPtrSet<const SymbolEntry *, 2> DeadSymbols; 137 for (const std::unique_ptr<SymbolEntry> &Sym : SymTable.Symbols) 138 if (IsDead(Sym)) 139 DeadSymbols.insert(Sym.get()); 140 141 for (const LoadCommand &LC : LoadCommands) 142 for (const std::unique_ptr<Section> &Sec : LC.Sections) 143 for (const RelocationInfo &R : Sec->Relocations) 144 if (R.Symbol && *R.Symbol && DeadSymbols.count(*R.Symbol)) 145 return createStringError(std::errc::invalid_argument, 146 "symbol '%s' defined in section with index " 147 "'%u' cannot be removed because it is " 148 "referenced by a relocation in section '%s'", 149 (*R.Symbol)->Name.c_str(), 150 *((*R.Symbol)->section()), 151 Sec->CanonicalName.c_str()); 152 SymTable.removeSymbols(IsDead); 153 for (std::unique_ptr<SymbolEntry> &S : SymTable.Symbols) 154 if (S->section()) 155 S->n_sect = OldIndexToSection[S->n_sect]->Index; 156 return Error::success(); 157 } 158 159 uint64_t Object::nextAvailableSegmentAddress() const { 160 uint64_t HeaderSize = 161 is64Bit() ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); 162 uint64_t Addr = HeaderSize + Header.SizeOfCmds; 163 for (const LoadCommand &LC : LoadCommands) { 164 const MachO::macho_load_command &MLC = LC.MachOLoadCommand; 165 switch (MLC.load_command_data.cmd) { 166 case MachO::LC_SEGMENT: 167 Addr = std::max(Addr, 168 static_cast<uint64_t>(MLC.segment_command_data.vmaddr) + 169 MLC.segment_command_data.vmsize); 170 break; 171 case MachO::LC_SEGMENT_64: 172 Addr = std::max(Addr, MLC.segment_command_64_data.vmaddr + 173 MLC.segment_command_64_data.vmsize); 174 break; 175 default: 176 continue; 177 } 178 } 179 return Addr; 180 } 181 182 template <typename SegmentType> 183 static void 184 constructSegment(SegmentType &Seg, llvm::MachO::LoadCommandType CmdType, 185 StringRef SegName, uint64_t SegVMAddr, uint64_t SegVMSize) { 186 assert(SegName.size() <= sizeof(Seg.segname) && "too long segment name"); 187 memset(&Seg, 0, sizeof(SegmentType)); 188 Seg.cmd = CmdType; 189 strncpy(Seg.segname, SegName.data(), SegName.size()); 190 Seg.maxprot |= 191 (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE); 192 Seg.initprot |= 193 (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE); 194 Seg.vmaddr = SegVMAddr; 195 Seg.vmsize = SegVMSize; 196 } 197 198 LoadCommand &Object::addSegment(StringRef SegName, uint64_t SegVMSize) { 199 LoadCommand LC; 200 const uint64_t SegVMAddr = nextAvailableSegmentAddress(); 201 if (is64Bit()) 202 constructSegment(LC.MachOLoadCommand.segment_command_64_data, 203 MachO::LC_SEGMENT_64, SegName, SegVMAddr, SegVMSize); 204 else 205 constructSegment(LC.MachOLoadCommand.segment_command_data, 206 MachO::LC_SEGMENT, SegName, SegVMAddr, SegVMSize); 207 208 LoadCommands.push_back(std::move(LC)); 209 return LoadCommands.back(); 210 } 211 212 /// Extracts a segment name from a string which is possibly non-null-terminated. 213 static StringRef extractSegmentName(const char *SegName) { 214 return StringRef(SegName, 215 strnlen(SegName, sizeof(MachO::segment_command::segname))); 216 } 217 218 std::optional<StringRef> LoadCommand::getSegmentName() const { 219 const MachO::macho_load_command &MLC = MachOLoadCommand; 220 switch (MLC.load_command_data.cmd) { 221 case MachO::LC_SEGMENT: 222 return extractSegmentName(MLC.segment_command_data.segname); 223 case MachO::LC_SEGMENT_64: 224 return extractSegmentName(MLC.segment_command_64_data.segname); 225 default: 226 return std::nullopt; 227 } 228 } 229 230 std::optional<uint64_t> LoadCommand::getSegmentVMAddr() const { 231 const MachO::macho_load_command &MLC = MachOLoadCommand; 232 switch (MLC.load_command_data.cmd) { 233 case MachO::LC_SEGMENT: 234 return MLC.segment_command_data.vmaddr; 235 case MachO::LC_SEGMENT_64: 236 return MLC.segment_command_64_data.vmaddr; 237 default: 238 return std::nullopt; 239 } 240 } 241