1 //===- MachOObject.cpp - Mach-O object file model ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "MachOObject.h"
10 #include "llvm/ADT/SmallPtrSet.h"
11 #include "llvm/Support/SystemZ/zOSSupport.h"
12 #include <unordered_set>
13
14 using namespace llvm;
15 using namespace llvm::objcopy::macho;
16
Section(StringRef SegName,StringRef SectName)17 Section::Section(StringRef SegName, StringRef SectName)
18 : Segname(SegName), Sectname(SectName),
19 CanonicalName((Twine(SegName) + Twine(',') + SectName).str()) {}
20
Section(StringRef SegName,StringRef SectName,StringRef Content)21 Section::Section(StringRef SegName, StringRef SectName, StringRef Content)
22 : Segname(SegName), Sectname(SectName),
23 CanonicalName((Twine(SegName) + Twine(',') + SectName).str()),
24 Content(Content) {}
25
getSymbolByIndex(uint32_t Index) const26 const SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) const {
27 assert(Index < Symbols.size() && "invalid symbol index");
28 return Symbols[Index].get();
29 }
30
getSymbolByIndex(uint32_t Index)31 SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) {
32 return const_cast<SymbolEntry *>(
33 static_cast<const SymbolTable *>(this)->getSymbolByIndex(Index));
34 }
35
updateSymbols(function_ref<void (SymbolEntry &)> Callable)36 void SymbolTable::updateSymbols(function_ref<void(SymbolEntry &)> Callable) {
37 for (auto &Sym : Symbols)
38 Callable(*Sym);
39
40 // Partition symbols: local < defined external < undefined external.
41 auto ExternalBegin = std::stable_partition(
42 std::begin(Symbols), std::end(Symbols),
43 [](const auto &Sym) { return Sym->isLocalSymbol(); });
44 std::stable_partition(ExternalBegin, std::end(Symbols), [](const auto &Sym) {
45 return !Sym->isUndefinedSymbol();
46 });
47 }
48
removeSymbols(function_ref<bool (const std::unique_ptr<SymbolEntry> &)> ToRemove)49 void SymbolTable::removeSymbols(
50 function_ref<bool(const std::unique_ptr<SymbolEntry> &)> ToRemove) {
51 llvm::erase_if(Symbols, ToRemove);
52 }
53
updateLoadCommandIndexes()54 void Object::updateLoadCommandIndexes() {
55 static constexpr char TextSegmentName[] = "__TEXT";
56 // Update indices of special load commands
57 for (size_t Index = 0, Size = LoadCommands.size(); Index < Size; ++Index) {
58 LoadCommand &LC = LoadCommands[Index];
59 switch (LC.MachOLoadCommand.load_command_data.cmd) {
60 case MachO::LC_CODE_SIGNATURE:
61 CodeSignatureCommandIndex = Index;
62 break;
63 case MachO::LC_SEGMENT:
64 if (StringRef(LC.MachOLoadCommand.segment_command_data.segname) ==
65 TextSegmentName)
66 TextSegmentCommandIndex = Index;
67 break;
68 case MachO::LC_SEGMENT_64:
69 if (StringRef(LC.MachOLoadCommand.segment_command_64_data.segname) ==
70 TextSegmentName)
71 TextSegmentCommandIndex = Index;
72 break;
73 case MachO::LC_SYMTAB:
74 SymTabCommandIndex = Index;
75 break;
76 case MachO::LC_DYSYMTAB:
77 DySymTabCommandIndex = Index;
78 break;
79 case MachO::LC_DYLD_INFO:
80 case MachO::LC_DYLD_INFO_ONLY:
81 DyLdInfoCommandIndex = Index;
82 break;
83 case MachO::LC_DATA_IN_CODE:
84 DataInCodeCommandIndex = Index;
85 break;
86 case MachO::LC_LINKER_OPTIMIZATION_HINT:
87 LinkerOptimizationHintCommandIndex = Index;
88 break;
89 case MachO::LC_FUNCTION_STARTS:
90 FunctionStartsCommandIndex = Index;
91 break;
92 case MachO::LC_DYLIB_CODE_SIGN_DRS:
93 DylibCodeSignDRsIndex = Index;
94 break;
95 case MachO::LC_DYLD_CHAINED_FIXUPS:
96 ChainedFixupsCommandIndex = Index;
97 break;
98 case MachO::LC_DYLD_EXPORTS_TRIE:
99 ExportsTrieCommandIndex = Index;
100 break;
101 }
102 }
103 }
104
removeLoadCommands(function_ref<bool (const LoadCommand &)> ToRemove)105 Error Object::removeLoadCommands(
106 function_ref<bool(const LoadCommand &)> ToRemove) {
107 auto It = std::stable_partition(
108 LoadCommands.begin(), LoadCommands.end(),
109 [&](const LoadCommand &LC) { return !ToRemove(LC); });
110 LoadCommands.erase(It, LoadCommands.end());
111
112 updateLoadCommandIndexes();
113 return Error::success();
114 }
115
removeSections(function_ref<bool (const std::unique_ptr<Section> &)> ToRemove)116 Error Object::removeSections(
117 function_ref<bool(const std::unique_ptr<Section> &)> ToRemove) {
118 DenseMap<uint32_t, const Section *> OldIndexToSection;
119 uint32_t NextSectionIndex = 1;
120 for (LoadCommand &LC : LoadCommands) {
121 auto It = std::stable_partition(
122 std::begin(LC.Sections), std::end(LC.Sections),
123 [&](const std::unique_ptr<Section> &Sec) { return !ToRemove(Sec); });
124 for (auto I = LC.Sections.begin(), End = It; I != End; ++I) {
125 OldIndexToSection[(*I)->Index] = I->get();
126 (*I)->Index = NextSectionIndex++;
127 }
128 LC.Sections.erase(It, LC.Sections.end());
129 }
130
131 auto IsDead = [&](const std::unique_ptr<SymbolEntry> &S) -> bool {
132 std::optional<uint32_t> Section = S->section();
133 return (Section && !OldIndexToSection.count(*Section));
134 };
135
136 SmallPtrSet<const SymbolEntry *, 2> DeadSymbols;
137 for (const std::unique_ptr<SymbolEntry> &Sym : SymTable.Symbols)
138 if (IsDead(Sym))
139 DeadSymbols.insert(Sym.get());
140
141 for (const LoadCommand &LC : LoadCommands)
142 for (const std::unique_ptr<Section> &Sec : LC.Sections)
143 for (const RelocationInfo &R : Sec->Relocations)
144 if (R.Symbol && *R.Symbol && DeadSymbols.count(*R.Symbol))
145 return createStringError(std::errc::invalid_argument,
146 "symbol '%s' defined in section with index "
147 "'%u' cannot be removed because it is "
148 "referenced by a relocation in section '%s'",
149 (*R.Symbol)->Name.c_str(),
150 *((*R.Symbol)->section()),
151 Sec->CanonicalName.c_str());
152 SymTable.removeSymbols(IsDead);
153 for (std::unique_ptr<SymbolEntry> &S : SymTable.Symbols)
154 if (S->section())
155 S->n_sect = OldIndexToSection[S->n_sect]->Index;
156 return Error::success();
157 }
158
nextAvailableSegmentAddress() const159 uint64_t Object::nextAvailableSegmentAddress() const {
160 uint64_t HeaderSize =
161 is64Bit() ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
162 uint64_t Addr = HeaderSize + Header.SizeOfCmds;
163 for (const LoadCommand &LC : LoadCommands) {
164 const MachO::macho_load_command &MLC = LC.MachOLoadCommand;
165 switch (MLC.load_command_data.cmd) {
166 case MachO::LC_SEGMENT:
167 Addr = std::max(Addr,
168 static_cast<uint64_t>(MLC.segment_command_data.vmaddr) +
169 MLC.segment_command_data.vmsize);
170 break;
171 case MachO::LC_SEGMENT_64:
172 Addr = std::max(Addr, MLC.segment_command_64_data.vmaddr +
173 MLC.segment_command_64_data.vmsize);
174 break;
175 default:
176 continue;
177 }
178 }
179 return Addr;
180 }
181
182 template <typename SegmentType>
183 static void
constructSegment(SegmentType & Seg,llvm::MachO::LoadCommandType CmdType,StringRef SegName,uint64_t SegVMAddr,uint64_t SegVMSize)184 constructSegment(SegmentType &Seg, llvm::MachO::LoadCommandType CmdType,
185 StringRef SegName, uint64_t SegVMAddr, uint64_t SegVMSize) {
186 assert(SegName.size() <= sizeof(Seg.segname) && "too long segment name");
187 memset(&Seg, 0, sizeof(SegmentType));
188 Seg.cmd = CmdType;
189 strncpy(Seg.segname, SegName.data(), SegName.size());
190 Seg.maxprot |=
191 (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE);
192 Seg.initprot |=
193 (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE);
194 Seg.vmaddr = SegVMAddr;
195 Seg.vmsize = SegVMSize;
196 }
197
addSegment(StringRef SegName,uint64_t SegVMSize)198 LoadCommand &Object::addSegment(StringRef SegName, uint64_t SegVMSize) {
199 LoadCommand LC;
200 const uint64_t SegVMAddr = nextAvailableSegmentAddress();
201 if (is64Bit())
202 constructSegment(LC.MachOLoadCommand.segment_command_64_data,
203 MachO::LC_SEGMENT_64, SegName, SegVMAddr, SegVMSize);
204 else
205 constructSegment(LC.MachOLoadCommand.segment_command_data,
206 MachO::LC_SEGMENT, SegName, SegVMAddr, SegVMSize);
207
208 LoadCommands.push_back(std::move(LC));
209 return LoadCommands.back();
210 }
211
212 /// Extracts a segment name from a string which is possibly non-null-terminated.
extractSegmentName(const char * SegName)213 static StringRef extractSegmentName(const char *SegName) {
214 return StringRef(SegName,
215 strnlen(SegName, sizeof(MachO::segment_command::segname)));
216 }
217
getSegmentName() const218 std::optional<StringRef> LoadCommand::getSegmentName() const {
219 const MachO::macho_load_command &MLC = MachOLoadCommand;
220 switch (MLC.load_command_data.cmd) {
221 case MachO::LC_SEGMENT:
222 return extractSegmentName(MLC.segment_command_data.segname);
223 case MachO::LC_SEGMENT_64:
224 return extractSegmentName(MLC.segment_command_64_data.segname);
225 default:
226 return std::nullopt;
227 }
228 }
229
getSegmentVMAddr() const230 std::optional<uint64_t> LoadCommand::getSegmentVMAddr() const {
231 const MachO::macho_load_command &MLC = MachOLoadCommand;
232 switch (MLC.load_command_data.cmd) {
233 case MachO::LC_SEGMENT:
234 return MLC.segment_command_data.vmaddr;
235 case MachO::LC_SEGMENT_64:
236 return MLC.segment_command_64_data.vmaddr;
237 default:
238 return std::nullopt;
239 }
240 }
241