xref: /freebsd/contrib/llvm-project/llvm/lib/ObjCopy/MachO/MachOObject.cpp (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
181ad6265SDimitry Andric //===- MachOObject.cpp - Mach-O object file model ---------------*- C++ -*-===//
281ad6265SDimitry Andric //
381ad6265SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
481ad6265SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
581ad6265SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
681ad6265SDimitry Andric //
781ad6265SDimitry Andric //===----------------------------------------------------------------------===//
881ad6265SDimitry Andric 
981ad6265SDimitry Andric #include "MachOObject.h"
1081ad6265SDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
11*5f757f3fSDimitry Andric #include "llvm/Support/SystemZ/zOSSupport.h"
1281ad6265SDimitry Andric #include <unordered_set>
1381ad6265SDimitry Andric 
1481ad6265SDimitry Andric using namespace llvm;
1581ad6265SDimitry Andric using namespace llvm::objcopy::macho;
1681ad6265SDimitry Andric 
17bdd1243dSDimitry Andric Section::Section(StringRef SegName, StringRef SectName)
18bdd1243dSDimitry Andric     : Segname(SegName), Sectname(SectName),
19bdd1243dSDimitry Andric       CanonicalName((Twine(SegName) + Twine(',') + SectName).str()) {}
20bdd1243dSDimitry Andric 
21bdd1243dSDimitry Andric Section::Section(StringRef SegName, StringRef SectName, StringRef Content)
22bdd1243dSDimitry Andric     : Segname(SegName), Sectname(SectName),
23bdd1243dSDimitry Andric       CanonicalName((Twine(SegName) + Twine(',') + SectName).str()),
24bdd1243dSDimitry Andric       Content(Content) {}
25bdd1243dSDimitry Andric 
2681ad6265SDimitry Andric const SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) const {
2781ad6265SDimitry Andric   assert(Index < Symbols.size() && "invalid symbol index");
2881ad6265SDimitry Andric   return Symbols[Index].get();
2981ad6265SDimitry Andric }
3081ad6265SDimitry Andric 
3181ad6265SDimitry Andric SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) {
3281ad6265SDimitry Andric   return const_cast<SymbolEntry *>(
3381ad6265SDimitry Andric       static_cast<const SymbolTable *>(this)->getSymbolByIndex(Index));
3481ad6265SDimitry Andric }
3581ad6265SDimitry Andric 
3681ad6265SDimitry Andric void SymbolTable::removeSymbols(
3781ad6265SDimitry Andric     function_ref<bool(const std::unique_ptr<SymbolEntry> &)> ToRemove) {
3881ad6265SDimitry Andric   llvm::erase_if(Symbols, ToRemove);
3981ad6265SDimitry Andric }
4081ad6265SDimitry Andric 
4181ad6265SDimitry Andric void Object::updateLoadCommandIndexes() {
4281ad6265SDimitry Andric   static constexpr char TextSegmentName[] = "__TEXT";
4381ad6265SDimitry Andric   // Update indices of special load commands
4481ad6265SDimitry Andric   for (size_t Index = 0, Size = LoadCommands.size(); Index < Size; ++Index) {
4581ad6265SDimitry Andric     LoadCommand &LC = LoadCommands[Index];
4681ad6265SDimitry Andric     switch (LC.MachOLoadCommand.load_command_data.cmd) {
4781ad6265SDimitry Andric     case MachO::LC_CODE_SIGNATURE:
4881ad6265SDimitry Andric       CodeSignatureCommandIndex = Index;
4981ad6265SDimitry Andric       break;
5081ad6265SDimitry Andric     case MachO::LC_SEGMENT:
5181ad6265SDimitry Andric       if (StringRef(LC.MachOLoadCommand.segment_command_data.segname) ==
5281ad6265SDimitry Andric           TextSegmentName)
5381ad6265SDimitry Andric         TextSegmentCommandIndex = Index;
5481ad6265SDimitry Andric       break;
5581ad6265SDimitry Andric     case MachO::LC_SEGMENT_64:
5681ad6265SDimitry Andric       if (StringRef(LC.MachOLoadCommand.segment_command_64_data.segname) ==
5781ad6265SDimitry Andric           TextSegmentName)
5881ad6265SDimitry Andric         TextSegmentCommandIndex = Index;
5981ad6265SDimitry Andric       break;
6081ad6265SDimitry Andric     case MachO::LC_SYMTAB:
6181ad6265SDimitry Andric       SymTabCommandIndex = Index;
6281ad6265SDimitry Andric       break;
6381ad6265SDimitry Andric     case MachO::LC_DYSYMTAB:
6481ad6265SDimitry Andric       DySymTabCommandIndex = Index;
6581ad6265SDimitry Andric       break;
6681ad6265SDimitry Andric     case MachO::LC_DYLD_INFO:
6781ad6265SDimitry Andric     case MachO::LC_DYLD_INFO_ONLY:
6881ad6265SDimitry Andric       DyLdInfoCommandIndex = Index;
6981ad6265SDimitry Andric       break;
7081ad6265SDimitry Andric     case MachO::LC_DATA_IN_CODE:
7181ad6265SDimitry Andric       DataInCodeCommandIndex = Index;
7281ad6265SDimitry Andric       break;
7381ad6265SDimitry Andric     case MachO::LC_LINKER_OPTIMIZATION_HINT:
7481ad6265SDimitry Andric       LinkerOptimizationHintCommandIndex = Index;
7581ad6265SDimitry Andric       break;
7681ad6265SDimitry Andric     case MachO::LC_FUNCTION_STARTS:
7781ad6265SDimitry Andric       FunctionStartsCommandIndex = Index;
7881ad6265SDimitry Andric       break;
79bdd1243dSDimitry Andric     case MachO::LC_DYLIB_CODE_SIGN_DRS:
80bdd1243dSDimitry Andric       DylibCodeSignDRsIndex = Index;
81bdd1243dSDimitry Andric       break;
8281ad6265SDimitry Andric     case MachO::LC_DYLD_CHAINED_FIXUPS:
8381ad6265SDimitry Andric       ChainedFixupsCommandIndex = Index;
8481ad6265SDimitry Andric       break;
8581ad6265SDimitry Andric     case MachO::LC_DYLD_EXPORTS_TRIE:
8681ad6265SDimitry Andric       ExportsTrieCommandIndex = Index;
8781ad6265SDimitry Andric       break;
8881ad6265SDimitry Andric     }
8981ad6265SDimitry Andric   }
9081ad6265SDimitry Andric }
9181ad6265SDimitry Andric 
9281ad6265SDimitry Andric Error Object::removeLoadCommands(
9381ad6265SDimitry Andric     function_ref<bool(const LoadCommand &)> ToRemove) {
9481ad6265SDimitry Andric   auto It = std::stable_partition(
9581ad6265SDimitry Andric       LoadCommands.begin(), LoadCommands.end(),
9681ad6265SDimitry Andric       [&](const LoadCommand &LC) { return !ToRemove(LC); });
9781ad6265SDimitry Andric   LoadCommands.erase(It, LoadCommands.end());
9881ad6265SDimitry Andric 
9981ad6265SDimitry Andric   updateLoadCommandIndexes();
10081ad6265SDimitry Andric   return Error::success();
10181ad6265SDimitry Andric }
10281ad6265SDimitry Andric 
10381ad6265SDimitry Andric Error Object::removeSections(
10481ad6265SDimitry Andric     function_ref<bool(const std::unique_ptr<Section> &)> ToRemove) {
10581ad6265SDimitry Andric   DenseMap<uint32_t, const Section *> OldIndexToSection;
10681ad6265SDimitry Andric   uint32_t NextSectionIndex = 1;
10781ad6265SDimitry Andric   for (LoadCommand &LC : LoadCommands) {
10881ad6265SDimitry Andric     auto It = std::stable_partition(
10981ad6265SDimitry Andric         std::begin(LC.Sections), std::end(LC.Sections),
11081ad6265SDimitry Andric         [&](const std::unique_ptr<Section> &Sec) { return !ToRemove(Sec); });
11181ad6265SDimitry Andric     for (auto I = LC.Sections.begin(), End = It; I != End; ++I) {
11281ad6265SDimitry Andric       OldIndexToSection[(*I)->Index] = I->get();
11381ad6265SDimitry Andric       (*I)->Index = NextSectionIndex++;
11481ad6265SDimitry Andric     }
11581ad6265SDimitry Andric     LC.Sections.erase(It, LC.Sections.end());
11681ad6265SDimitry Andric   }
11781ad6265SDimitry Andric 
11881ad6265SDimitry Andric   auto IsDead = [&](const std::unique_ptr<SymbolEntry> &S) -> bool {
119bdd1243dSDimitry Andric     std::optional<uint32_t> Section = S->section();
12081ad6265SDimitry Andric     return (Section && !OldIndexToSection.count(*Section));
12181ad6265SDimitry Andric   };
12281ad6265SDimitry Andric 
12381ad6265SDimitry Andric   SmallPtrSet<const SymbolEntry *, 2> DeadSymbols;
12481ad6265SDimitry Andric   for (const std::unique_ptr<SymbolEntry> &Sym : SymTable.Symbols)
12581ad6265SDimitry Andric     if (IsDead(Sym))
12681ad6265SDimitry Andric       DeadSymbols.insert(Sym.get());
12781ad6265SDimitry Andric 
12881ad6265SDimitry Andric   for (const LoadCommand &LC : LoadCommands)
12981ad6265SDimitry Andric     for (const std::unique_ptr<Section> &Sec : LC.Sections)
13081ad6265SDimitry Andric       for (const RelocationInfo &R : Sec->Relocations)
13181ad6265SDimitry Andric         if (R.Symbol && *R.Symbol && DeadSymbols.count(*R.Symbol))
13281ad6265SDimitry Andric           return createStringError(std::errc::invalid_argument,
13381ad6265SDimitry Andric                                    "symbol '%s' defined in section with index "
13481ad6265SDimitry Andric                                    "'%u' cannot be removed because it is "
13581ad6265SDimitry Andric                                    "referenced by a relocation in section '%s'",
13681ad6265SDimitry Andric                                    (*R.Symbol)->Name.c_str(),
13781ad6265SDimitry Andric                                    *((*R.Symbol)->section()),
13881ad6265SDimitry Andric                                    Sec->CanonicalName.c_str());
13981ad6265SDimitry Andric   SymTable.removeSymbols(IsDead);
14081ad6265SDimitry Andric   for (std::unique_ptr<SymbolEntry> &S : SymTable.Symbols)
14181ad6265SDimitry Andric     if (S->section())
14281ad6265SDimitry Andric       S->n_sect = OldIndexToSection[S->n_sect]->Index;
14381ad6265SDimitry Andric   return Error::success();
14481ad6265SDimitry Andric }
14581ad6265SDimitry Andric 
14681ad6265SDimitry Andric uint64_t Object::nextAvailableSegmentAddress() const {
14781ad6265SDimitry Andric   uint64_t HeaderSize =
14881ad6265SDimitry Andric       is64Bit() ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
14981ad6265SDimitry Andric   uint64_t Addr = HeaderSize + Header.SizeOfCmds;
15081ad6265SDimitry Andric   for (const LoadCommand &LC : LoadCommands) {
15181ad6265SDimitry Andric     const MachO::macho_load_command &MLC = LC.MachOLoadCommand;
15281ad6265SDimitry Andric     switch (MLC.load_command_data.cmd) {
15381ad6265SDimitry Andric     case MachO::LC_SEGMENT:
15481ad6265SDimitry Andric       Addr = std::max(Addr,
15581ad6265SDimitry Andric                       static_cast<uint64_t>(MLC.segment_command_data.vmaddr) +
15681ad6265SDimitry Andric                           MLC.segment_command_data.vmsize);
15781ad6265SDimitry Andric       break;
15881ad6265SDimitry Andric     case MachO::LC_SEGMENT_64:
15981ad6265SDimitry Andric       Addr = std::max(Addr, MLC.segment_command_64_data.vmaddr +
16081ad6265SDimitry Andric                                 MLC.segment_command_64_data.vmsize);
16181ad6265SDimitry Andric       break;
16281ad6265SDimitry Andric     default:
16381ad6265SDimitry Andric       continue;
16481ad6265SDimitry Andric     }
16581ad6265SDimitry Andric   }
16681ad6265SDimitry Andric   return Addr;
16781ad6265SDimitry Andric }
16881ad6265SDimitry Andric 
16981ad6265SDimitry Andric template <typename SegmentType>
17081ad6265SDimitry Andric static void
17181ad6265SDimitry Andric constructSegment(SegmentType &Seg, llvm::MachO::LoadCommandType CmdType,
17281ad6265SDimitry Andric                  StringRef SegName, uint64_t SegVMAddr, uint64_t SegVMSize) {
17381ad6265SDimitry Andric   assert(SegName.size() <= sizeof(Seg.segname) && "too long segment name");
17481ad6265SDimitry Andric   memset(&Seg, 0, sizeof(SegmentType));
17581ad6265SDimitry Andric   Seg.cmd = CmdType;
17681ad6265SDimitry Andric   strncpy(Seg.segname, SegName.data(), SegName.size());
17781ad6265SDimitry Andric   Seg.maxprot |=
17881ad6265SDimitry Andric       (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE);
17981ad6265SDimitry Andric   Seg.initprot |=
18081ad6265SDimitry Andric       (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE);
18181ad6265SDimitry Andric   Seg.vmaddr = SegVMAddr;
18281ad6265SDimitry Andric   Seg.vmsize = SegVMSize;
18381ad6265SDimitry Andric }
18481ad6265SDimitry Andric 
18581ad6265SDimitry Andric LoadCommand &Object::addSegment(StringRef SegName, uint64_t SegVMSize) {
18681ad6265SDimitry Andric   LoadCommand LC;
18781ad6265SDimitry Andric   const uint64_t SegVMAddr = nextAvailableSegmentAddress();
18881ad6265SDimitry Andric   if (is64Bit())
18981ad6265SDimitry Andric     constructSegment(LC.MachOLoadCommand.segment_command_64_data,
19081ad6265SDimitry Andric                      MachO::LC_SEGMENT_64, SegName, SegVMAddr, SegVMSize);
19181ad6265SDimitry Andric   else
19281ad6265SDimitry Andric     constructSegment(LC.MachOLoadCommand.segment_command_data,
19381ad6265SDimitry Andric                      MachO::LC_SEGMENT, SegName, SegVMAddr, SegVMSize);
19481ad6265SDimitry Andric 
19581ad6265SDimitry Andric   LoadCommands.push_back(std::move(LC));
19681ad6265SDimitry Andric   return LoadCommands.back();
19781ad6265SDimitry Andric }
19881ad6265SDimitry Andric 
19981ad6265SDimitry Andric /// Extracts a segment name from a string which is possibly non-null-terminated.
20081ad6265SDimitry Andric static StringRef extractSegmentName(const char *SegName) {
20181ad6265SDimitry Andric   return StringRef(SegName,
20281ad6265SDimitry Andric                    strnlen(SegName, sizeof(MachO::segment_command::segname)));
20381ad6265SDimitry Andric }
20481ad6265SDimitry Andric 
205bdd1243dSDimitry Andric std::optional<StringRef> LoadCommand::getSegmentName() const {
20681ad6265SDimitry Andric   const MachO::macho_load_command &MLC = MachOLoadCommand;
20781ad6265SDimitry Andric   switch (MLC.load_command_data.cmd) {
20881ad6265SDimitry Andric   case MachO::LC_SEGMENT:
20981ad6265SDimitry Andric     return extractSegmentName(MLC.segment_command_data.segname);
21081ad6265SDimitry Andric   case MachO::LC_SEGMENT_64:
21181ad6265SDimitry Andric     return extractSegmentName(MLC.segment_command_64_data.segname);
21281ad6265SDimitry Andric   default:
213bdd1243dSDimitry Andric     return std::nullopt;
21481ad6265SDimitry Andric   }
21581ad6265SDimitry Andric }
21681ad6265SDimitry Andric 
217bdd1243dSDimitry Andric std::optional<uint64_t> LoadCommand::getSegmentVMAddr() const {
21881ad6265SDimitry Andric   const MachO::macho_load_command &MLC = MachOLoadCommand;
21981ad6265SDimitry Andric   switch (MLC.load_command_data.cmd) {
22081ad6265SDimitry Andric   case MachO::LC_SEGMENT:
22181ad6265SDimitry Andric     return MLC.segment_command_data.vmaddr;
22281ad6265SDimitry Andric   case MachO::LC_SEGMENT_64:
22381ad6265SDimitry Andric     return MLC.segment_command_64_data.vmaddr;
22481ad6265SDimitry Andric   default:
225bdd1243dSDimitry Andric     return std::nullopt;
22681ad6265SDimitry Andric   }
22781ad6265SDimitry Andric }
228