xref: /freebsd/contrib/llvm-project/llvm/lib/ObjCopy/MachO/MachOObjcopy.cpp (revision 35c0a8c449fd2b7f75029ebed5e10852240f0865)
1 //===- MachOObjcopy.cpp -----------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/ObjCopy/MachO/MachOObjcopy.h"
10 #include "Archive.h"
11 #include "MachOReader.h"
12 #include "MachOWriter.h"
13 #include "llvm/ADT/DenseSet.h"
14 #include "llvm/ObjCopy/CommonConfig.h"
15 #include "llvm/ObjCopy/MachO/MachOConfig.h"
16 #include "llvm/ObjCopy/MultiFormatConfig.h"
17 #include "llvm/ObjCopy/ObjCopy.h"
18 #include "llvm/Object/ArchiveWriter.h"
19 #include "llvm/Object/MachOUniversal.h"
20 #include "llvm/Object/MachOUniversalWriter.h"
21 #include "llvm/Support/Errc.h"
22 #include "llvm/Support/Error.h"
23 #include "llvm/Support/FileOutputBuffer.h"
24 #include "llvm/Support/Path.h"
25 #include "llvm/Support/SmallVectorMemoryBuffer.h"
26 
27 using namespace llvm;
28 using namespace llvm::objcopy;
29 using namespace llvm::objcopy::macho;
30 using namespace llvm::object;
31 
32 using SectionPred = std::function<bool(const std::unique_ptr<Section> &Sec)>;
33 using LoadCommandPred = std::function<bool(const LoadCommand &LC)>;
34 
35 #ifndef NDEBUG
36 static bool isLoadCommandWithPayloadString(const LoadCommand &LC) {
37   // TODO: Add support for LC_REEXPORT_DYLIB, LC_LOAD_UPWARD_DYLIB and
38   // LC_LAZY_LOAD_DYLIB
39   return LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH ||
40          LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_ID_DYLIB ||
41          LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_LOAD_DYLIB ||
42          LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_LOAD_WEAK_DYLIB;
43 }
44 #endif
45 
46 static StringRef getPayloadString(const LoadCommand &LC) {
47   assert(isLoadCommandWithPayloadString(LC) &&
48          "unsupported load command encountered");
49 
50   return StringRef(reinterpret_cast<const char *>(LC.Payload.data()),
51                    LC.Payload.size())
52       .rtrim('\0');
53 }
54 
55 static Error removeSections(const CommonConfig &Config, Object &Obj) {
56   SectionPred RemovePred = [](const std::unique_ptr<Section> &) {
57     return false;
58   };
59 
60   if (!Config.ToRemove.empty()) {
61     RemovePred = [&Config, RemovePred](const std::unique_ptr<Section> &Sec) {
62       return Config.ToRemove.matches(Sec->CanonicalName);
63     };
64   }
65 
66   if (Config.StripAll || Config.StripDebug) {
67     // Remove all debug sections.
68     RemovePred = [RemovePred](const std::unique_ptr<Section> &Sec) {
69       if (Sec->Segname == "__DWARF")
70         return true;
71 
72       return RemovePred(Sec);
73     };
74   }
75 
76   if (!Config.OnlySection.empty()) {
77     // Overwrite RemovePred because --only-section takes priority.
78     RemovePred = [&Config](const std::unique_ptr<Section> &Sec) {
79       return !Config.OnlySection.matches(Sec->CanonicalName);
80     };
81   }
82 
83   return Obj.removeSections(RemovePred);
84 }
85 
86 static void markSymbols(const CommonConfig &, Object &Obj) {
87   // Symbols referenced from the indirect symbol table must not be removed.
88   for (IndirectSymbolEntry &ISE : Obj.IndirectSymTable.Symbols)
89     if (ISE.Symbol)
90       (*ISE.Symbol)->Referenced = true;
91 }
92 
93 static void updateAndRemoveSymbols(const CommonConfig &Config,
94                                    const MachOConfig &MachOConfig,
95                                    Object &Obj) {
96   for (SymbolEntry &Sym : Obj.SymTable) {
97     // Weaken symbols first to match ELFObjcopy behavior.
98     bool IsExportedAndDefined =
99         (Sym.n_type & llvm::MachO::N_EXT) &&
100         (Sym.n_type & llvm::MachO::N_TYPE) != llvm::MachO::N_UNDF;
101     if (IsExportedAndDefined &&
102         (Config.Weaken || Config.SymbolsToWeaken.matches(Sym.Name)))
103       Sym.n_desc |= llvm::MachO::N_WEAK_DEF;
104 
105     auto I = Config.SymbolsToRename.find(Sym.Name);
106     if (I != Config.SymbolsToRename.end())
107       Sym.Name = std::string(I->getValue());
108   }
109 
110   auto RemovePred = [&Config, &MachOConfig,
111                      &Obj](const std::unique_ptr<SymbolEntry> &N) {
112     if (N->Referenced)
113       return false;
114     if (MachOConfig.KeepUndefined && N->isUndefinedSymbol())
115       return false;
116     if (N->n_desc & MachO::REFERENCED_DYNAMICALLY)
117       return false;
118     if (Config.StripAll)
119       return true;
120     if (Config.DiscardMode == DiscardType::All && !(N->n_type & MachO::N_EXT))
121       return true;
122     // This behavior is consistent with cctools' strip.
123     if (Config.StripDebug && (N->n_type & MachO::N_STAB))
124       return true;
125     // This behavior is consistent with cctools' strip.
126     if (MachOConfig.StripSwiftSymbols &&
127         (Obj.Header.Flags & MachO::MH_DYLDLINK) && Obj.SwiftVersion &&
128         *Obj.SwiftVersion && N->isSwiftSymbol())
129       return true;
130     return false;
131   };
132 
133   Obj.SymTable.removeSymbols(RemovePred);
134 }
135 
136 template <typename LCType>
137 static void updateLoadCommandPayloadString(LoadCommand &LC, StringRef S) {
138   assert(isLoadCommandWithPayloadString(LC) &&
139          "unsupported load command encountered");
140 
141   uint32_t NewCmdsize = alignTo(sizeof(LCType) + S.size() + 1, 8);
142 
143   LC.MachOLoadCommand.load_command_data.cmdsize = NewCmdsize;
144   LC.Payload.assign(NewCmdsize - sizeof(LCType), 0);
145   std::copy(S.begin(), S.end(), LC.Payload.begin());
146 }
147 
148 static LoadCommand buildRPathLoadCommand(StringRef Path) {
149   LoadCommand LC;
150   MachO::rpath_command RPathLC;
151   RPathLC.cmd = MachO::LC_RPATH;
152   RPathLC.path = sizeof(MachO::rpath_command);
153   RPathLC.cmdsize = alignTo(sizeof(MachO::rpath_command) + Path.size() + 1, 8);
154   LC.MachOLoadCommand.rpath_command_data = RPathLC;
155   LC.Payload.assign(RPathLC.cmdsize - sizeof(MachO::rpath_command), 0);
156   std::copy(Path.begin(), Path.end(), LC.Payload.begin());
157   return LC;
158 }
159 
160 static Error processLoadCommands(const MachOConfig &MachOConfig, Object &Obj) {
161   // Remove RPaths.
162   DenseSet<StringRef> RPathsToRemove(MachOConfig.RPathsToRemove.begin(),
163                                      MachOConfig.RPathsToRemove.end());
164 
165   LoadCommandPred RemovePred = [&RPathsToRemove,
166                                 &MachOConfig](const LoadCommand &LC) {
167     if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH) {
168       // When removing all RPaths we don't need to care
169       // about what it contains
170       if (MachOConfig.RemoveAllRpaths)
171         return true;
172 
173       StringRef RPath = getPayloadString(LC);
174       if (RPathsToRemove.count(RPath)) {
175         RPathsToRemove.erase(RPath);
176         return true;
177       }
178     }
179     return false;
180   };
181 
182   if (Error E = Obj.removeLoadCommands(RemovePred))
183     return E;
184 
185   // Emit an error if the Mach-O binary does not contain an rpath path name
186   // specified in -delete_rpath.
187   for (StringRef RPath : MachOConfig.RPathsToRemove) {
188     if (RPathsToRemove.count(RPath))
189       return createStringError(errc::invalid_argument,
190                                "no LC_RPATH load command with path: %s",
191                                RPath.str().c_str());
192   }
193 
194   DenseSet<StringRef> RPaths;
195 
196   // Get all existing RPaths.
197   for (LoadCommand &LC : Obj.LoadCommands) {
198     if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH)
199       RPaths.insert(getPayloadString(LC));
200   }
201 
202   // Throw errors for invalid RPaths.
203   for (const auto &OldNew : MachOConfig.RPathsToUpdate) {
204     StringRef Old = OldNew.getFirst();
205     StringRef New = OldNew.getSecond();
206     if (!RPaths.contains(Old))
207       return createStringError(errc::invalid_argument,
208                                "no LC_RPATH load command with path: " + Old);
209     if (RPaths.contains(New))
210       return createStringError(errc::invalid_argument,
211                                "rpath '" + New +
212                                    "' would create a duplicate load command");
213   }
214 
215   // Update load commands.
216   for (LoadCommand &LC : Obj.LoadCommands) {
217     switch (LC.MachOLoadCommand.load_command_data.cmd) {
218     case MachO::LC_ID_DYLIB:
219       if (MachOConfig.SharedLibId)
220         updateLoadCommandPayloadString<MachO::dylib_command>(
221             LC, *MachOConfig.SharedLibId);
222       break;
223 
224     case MachO::LC_RPATH: {
225       StringRef RPath = getPayloadString(LC);
226       StringRef NewRPath = MachOConfig.RPathsToUpdate.lookup(RPath);
227       if (!NewRPath.empty())
228         updateLoadCommandPayloadString<MachO::rpath_command>(LC, NewRPath);
229       break;
230     }
231 
232     // TODO: Add LC_REEXPORT_DYLIB, LC_LAZY_LOAD_DYLIB, and LC_LOAD_UPWARD_DYLIB
233     // here once llvm-objcopy supports them.
234     case MachO::LC_LOAD_DYLIB:
235     case MachO::LC_LOAD_WEAK_DYLIB:
236       StringRef InstallName = getPayloadString(LC);
237       StringRef NewInstallName =
238           MachOConfig.InstallNamesToUpdate.lookup(InstallName);
239       if (!NewInstallName.empty())
240         updateLoadCommandPayloadString<MachO::dylib_command>(LC,
241                                                              NewInstallName);
242       break;
243     }
244   }
245 
246   // Add new RPaths.
247   for (StringRef RPath : MachOConfig.RPathToAdd) {
248     if (RPaths.contains(RPath))
249       return createStringError(errc::invalid_argument,
250                                "rpath '" + RPath +
251                                    "' would create a duplicate load command");
252     RPaths.insert(RPath);
253     Obj.LoadCommands.push_back(buildRPathLoadCommand(RPath));
254   }
255 
256   for (StringRef RPath : MachOConfig.RPathToPrepend) {
257     if (RPaths.contains(RPath))
258       return createStringError(errc::invalid_argument,
259                                "rpath '" + RPath +
260                                    "' would create a duplicate load command");
261 
262     RPaths.insert(RPath);
263     Obj.LoadCommands.insert(Obj.LoadCommands.begin(),
264                             buildRPathLoadCommand(RPath));
265   }
266 
267   // Unlike appending rpaths, the indexes of subsequent load commands must
268   // be recalculated after prepending one.
269   if (!MachOConfig.RPathToPrepend.empty())
270     Obj.updateLoadCommandIndexes();
271 
272   // Remove any empty segments if required.
273   if (!MachOConfig.EmptySegmentsToRemove.empty()) {
274     auto RemovePred = [&MachOConfig](const LoadCommand &LC) {
275       if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_SEGMENT_64 ||
276           LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_SEGMENT) {
277         return LC.Sections.empty() &&
278                MachOConfig.EmptySegmentsToRemove.contains(*LC.getSegmentName());
279       }
280       return false;
281     };
282     if (Error E = Obj.removeLoadCommands(RemovePred))
283       return E;
284   }
285 
286   return Error::success();
287 }
288 
289 static Error dumpSectionToFile(StringRef SecName, StringRef Filename,
290                                Object &Obj) {
291   for (LoadCommand &LC : Obj.LoadCommands)
292     for (const std::unique_ptr<Section> &Sec : LC.Sections) {
293       if (Sec->CanonicalName == SecName) {
294         Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr =
295             FileOutputBuffer::create(Filename, Sec->Content.size());
296         if (!BufferOrErr)
297           return BufferOrErr.takeError();
298         std::unique_ptr<FileOutputBuffer> Buf = std::move(*BufferOrErr);
299         llvm::copy(Sec->Content, Buf->getBufferStart());
300 
301         if (Error E = Buf->commit())
302           return E;
303         return Error::success();
304       }
305     }
306 
307   return createStringError(object_error::parse_failed, "section '%s' not found",
308                            SecName.str().c_str());
309 }
310 
311 static Error addSection(const NewSectionInfo &NewSection, Object &Obj) {
312   std::pair<StringRef, StringRef> Pair = NewSection.SectionName.split(',');
313   StringRef TargetSegName = Pair.first;
314   Section Sec(TargetSegName, Pair.second);
315   Sec.Content =
316       Obj.NewSectionsContents.save(NewSection.SectionData->getBuffer());
317   Sec.Size = Sec.Content.size();
318 
319   // Add the a section into an existing segment.
320   for (LoadCommand &LC : Obj.LoadCommands) {
321     std::optional<StringRef> SegName = LC.getSegmentName();
322     if (SegName && SegName == TargetSegName) {
323       uint64_t Addr = *LC.getSegmentVMAddr();
324       for (const std::unique_ptr<Section> &S : LC.Sections)
325         Addr = std::max(Addr, S->Addr + S->Size);
326       LC.Sections.push_back(std::make_unique<Section>(Sec));
327       LC.Sections.back()->Addr = Addr;
328       return Error::success();
329     }
330   }
331 
332   // There's no segment named TargetSegName. Create a new load command and
333   // Insert a new section into it.
334   LoadCommand &NewSegment =
335       Obj.addSegment(TargetSegName, alignTo(Sec.Size, 16384));
336   NewSegment.Sections.push_back(std::make_unique<Section>(Sec));
337   NewSegment.Sections.back()->Addr = *NewSegment.getSegmentVMAddr();
338   return Error::success();
339 }
340 
341 static Expected<Section &> findSection(StringRef SecName, Object &O) {
342   StringRef SegName;
343   std::tie(SegName, SecName) = SecName.split(",");
344   auto FoundSeg =
345       llvm::find_if(O.LoadCommands, [SegName](const LoadCommand &LC) {
346         return LC.getSegmentName() == SegName;
347       });
348   if (FoundSeg == O.LoadCommands.end())
349     return createStringError(errc::invalid_argument,
350                              "could not find segment with name '%s'",
351                              SegName.str().c_str());
352   auto FoundSec = llvm::find_if(FoundSeg->Sections,
353                                 [SecName](const std::unique_ptr<Section> &Sec) {
354                                   return Sec->Sectname == SecName;
355                                 });
356   if (FoundSec == FoundSeg->Sections.end())
357     return createStringError(errc::invalid_argument,
358                              "could not find section with name '%s'",
359                              SecName.str().c_str());
360 
361   assert(FoundSec->get()->CanonicalName == (SegName + "," + SecName).str());
362   return **FoundSec;
363 }
364 
365 static Error updateSection(const NewSectionInfo &NewSection, Object &O) {
366   Expected<Section &> SecToUpdateOrErr = findSection(NewSection.SectionName, O);
367 
368   if (!SecToUpdateOrErr)
369     return SecToUpdateOrErr.takeError();
370   Section &Sec = *SecToUpdateOrErr;
371 
372   if (NewSection.SectionData->getBufferSize() > Sec.Size)
373     return createStringError(
374         errc::invalid_argument,
375         "new section cannot be larger than previous section");
376   Sec.Content = O.NewSectionsContents.save(NewSection.SectionData->getBuffer());
377   Sec.Size = Sec.Content.size();
378   return Error::success();
379 }
380 
381 // isValidMachOCannonicalName returns success if Name is a MachO cannonical name
382 // ("<segment>,<section>") and lengths of both segment and section names are
383 // valid.
384 static Error isValidMachOCannonicalName(StringRef Name) {
385   if (Name.count(',') != 1)
386     return createStringError(errc::invalid_argument,
387                              "invalid section name '%s' (should be formatted "
388                              "as '<segment name>,<section name>')",
389                              Name.str().c_str());
390 
391   std::pair<StringRef, StringRef> Pair = Name.split(',');
392   if (Pair.first.size() > 16)
393     return createStringError(errc::invalid_argument,
394                              "too long segment name: '%s'",
395                              Pair.first.str().c_str());
396   if (Pair.second.size() > 16)
397     return createStringError(errc::invalid_argument,
398                              "too long section name: '%s'",
399                              Pair.second.str().c_str());
400   return Error::success();
401 }
402 
403 static Error handleArgs(const CommonConfig &Config,
404                         const MachOConfig &MachOConfig, Object &Obj) {
405   // Dump sections before add/remove for compatibility with GNU objcopy.
406   for (StringRef Flag : Config.DumpSection) {
407     StringRef SectionName;
408     StringRef FileName;
409     std::tie(SectionName, FileName) = Flag.split('=');
410     if (Error E = dumpSectionToFile(SectionName, FileName, Obj))
411       return E;
412   }
413 
414   if (Error E = removeSections(Config, Obj))
415     return E;
416 
417   // Mark symbols to determine which symbols are still needed.
418   if (Config.StripAll)
419     markSymbols(Config, Obj);
420 
421   updateAndRemoveSymbols(Config, MachOConfig, Obj);
422 
423   if (Config.StripAll)
424     for (LoadCommand &LC : Obj.LoadCommands)
425       for (std::unique_ptr<Section> &Sec : LC.Sections)
426         Sec->Relocations.clear();
427 
428   for (const NewSectionInfo &NewSection : Config.AddSection) {
429     if (Error E = isValidMachOCannonicalName(NewSection.SectionName))
430       return E;
431     if (Error E = addSection(NewSection, Obj))
432       return E;
433   }
434 
435   for (const NewSectionInfo &NewSection : Config.UpdateSection) {
436     if (Error E = isValidMachOCannonicalName(NewSection.SectionName))
437       return E;
438     if (Error E = updateSection(NewSection, Obj))
439       return E;
440   }
441 
442   if (Error E = processLoadCommands(MachOConfig, Obj))
443     return E;
444 
445   return Error::success();
446 }
447 
448 Error objcopy::macho::executeObjcopyOnBinary(const CommonConfig &Config,
449                                              const MachOConfig &MachOConfig,
450                                              object::MachOObjectFile &In,
451                                              raw_ostream &Out) {
452   MachOReader Reader(In);
453   Expected<std::unique_ptr<Object>> O = Reader.create();
454   if (!O)
455     return createFileError(Config.InputFilename, O.takeError());
456 
457   if (O->get()->Header.FileType == MachO::HeaderFileType::MH_PRELOAD)
458     return createStringError(std::errc::not_supported,
459                              "%s: MH_PRELOAD files are not supported",
460                              Config.InputFilename.str().c_str());
461 
462   if (Error E = handleArgs(Config, MachOConfig, **O))
463     return createFileError(Config.InputFilename, std::move(E));
464 
465   // Page size used for alignment of segment sizes in Mach-O executables and
466   // dynamic libraries.
467   uint64_t PageSize;
468   switch (In.getArch()) {
469   case Triple::ArchType::arm:
470   case Triple::ArchType::aarch64:
471   case Triple::ArchType::aarch64_32:
472     PageSize = 16384;
473     break;
474   default:
475     PageSize = 4096;
476   }
477 
478   MachOWriter Writer(**O, In.is64Bit(), In.isLittleEndian(),
479                      sys::path::filename(Config.OutputFilename), PageSize, Out);
480   if (auto E = Writer.finalize())
481     return E;
482   return Writer.write();
483 }
484 
485 Error objcopy::macho::executeObjcopyOnMachOUniversalBinary(
486     const MultiFormatConfig &Config, const MachOUniversalBinary &In,
487     raw_ostream &Out) {
488   SmallVector<OwningBinary<Binary>, 2> Binaries;
489   SmallVector<Slice, 2> Slices;
490   for (const auto &O : In.objects()) {
491     Expected<std::unique_ptr<Archive>> ArOrErr = O.getAsArchive();
492     if (ArOrErr) {
493       Expected<std::vector<NewArchiveMember>> NewArchiveMembersOrErr =
494           createNewArchiveMembers(Config, **ArOrErr);
495       if (!NewArchiveMembersOrErr)
496         return NewArchiveMembersOrErr.takeError();
497       auto Kind = (*ArOrErr)->kind();
498       if (Kind == object::Archive::K_BSD)
499         Kind = object::Archive::K_DARWIN;
500       Expected<std::unique_ptr<MemoryBuffer>> OutputBufferOrErr =
501           writeArchiveToBuffer(
502               *NewArchiveMembersOrErr,
503               (*ArOrErr)->hasSymbolTable() ? SymtabWritingMode::NormalSymtab
504                                            : SymtabWritingMode::NoSymtab,
505               Kind, Config.getCommonConfig().DeterministicArchives,
506               (*ArOrErr)->isThin());
507       if (!OutputBufferOrErr)
508         return OutputBufferOrErr.takeError();
509       Expected<std::unique_ptr<Binary>> BinaryOrErr =
510           object::createBinary(**OutputBufferOrErr);
511       if (!BinaryOrErr)
512         return BinaryOrErr.takeError();
513       Binaries.emplace_back(std::move(*BinaryOrErr),
514                             std::move(*OutputBufferOrErr));
515       Slices.emplace_back(*cast<Archive>(Binaries.back().getBinary()),
516                           O.getCPUType(), O.getCPUSubType(),
517                           O.getArchFlagName(), O.getAlign());
518       continue;
519     }
520     // The methods getAsArchive, getAsObjectFile, getAsIRObject of the class
521     // ObjectForArch return an Error in case of the type mismatch. We need to
522     // check each in turn to see what kind of slice this is, so ignore errors
523     // produced along the way.
524     consumeError(ArOrErr.takeError());
525 
526     Expected<std::unique_ptr<MachOObjectFile>> ObjOrErr = O.getAsObjectFile();
527     if (!ObjOrErr) {
528       consumeError(ObjOrErr.takeError());
529       return createStringError(
530           std::errc::invalid_argument,
531           "slice for '%s' of the universal Mach-O binary "
532           "'%s' is not a Mach-O object or an archive",
533           O.getArchFlagName().c_str(),
534           Config.getCommonConfig().InputFilename.str().c_str());
535     }
536     std::string ArchFlagName = O.getArchFlagName();
537 
538     SmallVector<char, 0> Buffer;
539     raw_svector_ostream MemStream(Buffer);
540 
541     Expected<const MachOConfig &> MachO = Config.getMachOConfig();
542     if (!MachO)
543       return MachO.takeError();
544 
545     if (Error E = executeObjcopyOnBinary(Config.getCommonConfig(), *MachO,
546                                          **ObjOrErr, MemStream))
547       return E;
548 
549     auto MB = std::make_unique<SmallVectorMemoryBuffer>(
550         std::move(Buffer), ArchFlagName, /*RequiresNullTerminator=*/false);
551     Expected<std::unique_ptr<Binary>> BinaryOrErr = object::createBinary(*MB);
552     if (!BinaryOrErr)
553       return BinaryOrErr.takeError();
554     Binaries.emplace_back(std::move(*BinaryOrErr), std::move(MB));
555     Slices.emplace_back(*cast<MachOObjectFile>(Binaries.back().getBinary()),
556                         O.getAlign());
557   }
558 
559   if (Error Err = writeUniversalBinaryToStream(Slices, Out))
560     return Err;
561 
562   return Error::success();
563 }
564