1 //===- MachOObjcopy.cpp -----------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/ObjCopy/MachO/MachOObjcopy.h" 10 #include "Archive.h" 11 #include "MachOReader.h" 12 #include "MachOWriter.h" 13 #include "llvm/ADT/DenseSet.h" 14 #include "llvm/ObjCopy/CommonConfig.h" 15 #include "llvm/ObjCopy/MachO/MachOConfig.h" 16 #include "llvm/ObjCopy/MultiFormatConfig.h" 17 #include "llvm/ObjCopy/ObjCopy.h" 18 #include "llvm/Object/ArchiveWriter.h" 19 #include "llvm/Object/MachOUniversal.h" 20 #include "llvm/Object/MachOUniversalWriter.h" 21 #include "llvm/Support/Errc.h" 22 #include "llvm/Support/Error.h" 23 #include "llvm/Support/FileOutputBuffer.h" 24 #include "llvm/Support/Path.h" 25 #include "llvm/Support/SmallVectorMemoryBuffer.h" 26 27 using namespace llvm; 28 using namespace llvm::objcopy; 29 using namespace llvm::objcopy::macho; 30 using namespace llvm::object; 31 32 using SectionPred = std::function<bool(const std::unique_ptr<Section> &Sec)>; 33 using LoadCommandPred = std::function<bool(const LoadCommand &LC)>; 34 35 #ifndef NDEBUG 36 static bool isLoadCommandWithPayloadString(const LoadCommand &LC) { 37 // TODO: Add support for LC_REEXPORT_DYLIB, LC_LOAD_UPWARD_DYLIB and 38 // LC_LAZY_LOAD_DYLIB 39 return LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH || 40 LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_ID_DYLIB || 41 LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_LOAD_DYLIB || 42 LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_LOAD_WEAK_DYLIB; 43 } 44 #endif 45 46 static StringRef getPayloadString(const LoadCommand &LC) { 47 assert(isLoadCommandWithPayloadString(LC) && 48 "unsupported load command encountered"); 49 50 return StringRef(reinterpret_cast<const char *>(LC.Payload.data()), 51 LC.Payload.size()) 52 .rtrim('\0'); 53 } 54 55 static Error removeSections(const CommonConfig &Config, Object &Obj) { 56 SectionPred RemovePred = [](const std::unique_ptr<Section> &) { 57 return false; 58 }; 59 60 if (!Config.ToRemove.empty()) { 61 RemovePred = [&Config, RemovePred](const std::unique_ptr<Section> &Sec) { 62 return Config.ToRemove.matches(Sec->CanonicalName); 63 }; 64 } 65 66 if (Config.StripAll || Config.StripDebug) { 67 // Remove all debug sections. 68 RemovePred = [RemovePred](const std::unique_ptr<Section> &Sec) { 69 if (Sec->Segname == "__DWARF") 70 return true; 71 72 return RemovePred(Sec); 73 }; 74 } 75 76 if (!Config.OnlySection.empty()) { 77 // Overwrite RemovePred because --only-section takes priority. 78 RemovePred = [&Config](const std::unique_ptr<Section> &Sec) { 79 return !Config.OnlySection.matches(Sec->CanonicalName); 80 }; 81 } 82 83 return Obj.removeSections(RemovePred); 84 } 85 86 static void markSymbols(const CommonConfig &, Object &Obj) { 87 // Symbols referenced from the indirect symbol table must not be removed. 88 for (IndirectSymbolEntry &ISE : Obj.IndirectSymTable.Symbols) 89 if (ISE.Symbol) 90 (*ISE.Symbol)->Referenced = true; 91 } 92 93 static void updateAndRemoveSymbols(const CommonConfig &Config, 94 const MachOConfig &MachOConfig, 95 Object &Obj) { 96 for (SymbolEntry &Sym : Obj.SymTable) { 97 // Weaken symbols first to match ELFObjcopy behavior. 98 bool IsExportedAndDefined = 99 (Sym.n_type & llvm::MachO::N_EXT) && 100 (Sym.n_type & llvm::MachO::N_TYPE) != llvm::MachO::N_UNDF; 101 if (IsExportedAndDefined && 102 (Config.Weaken || Config.SymbolsToWeaken.matches(Sym.Name))) 103 Sym.n_desc |= llvm::MachO::N_WEAK_DEF; 104 105 auto I = Config.SymbolsToRename.find(Sym.Name); 106 if (I != Config.SymbolsToRename.end()) 107 Sym.Name = std::string(I->getValue()); 108 } 109 110 auto RemovePred = [&Config, &MachOConfig, 111 &Obj](const std::unique_ptr<SymbolEntry> &N) { 112 if (N->Referenced) 113 return false; 114 if (MachOConfig.KeepUndefined && N->isUndefinedSymbol()) 115 return false; 116 if (N->n_desc & MachO::REFERENCED_DYNAMICALLY) 117 return false; 118 if (Config.StripAll) 119 return true; 120 if (Config.DiscardMode == DiscardType::All && !(N->n_type & MachO::N_EXT)) 121 return true; 122 // This behavior is consistent with cctools' strip. 123 if (Config.StripDebug && (N->n_type & MachO::N_STAB)) 124 return true; 125 // This behavior is consistent with cctools' strip. 126 if (MachOConfig.StripSwiftSymbols && 127 (Obj.Header.Flags & MachO::MH_DYLDLINK) && Obj.SwiftVersion && 128 *Obj.SwiftVersion && N->isSwiftSymbol()) 129 return true; 130 return false; 131 }; 132 133 Obj.SymTable.removeSymbols(RemovePred); 134 } 135 136 template <typename LCType> 137 static void updateLoadCommandPayloadString(LoadCommand &LC, StringRef S) { 138 assert(isLoadCommandWithPayloadString(LC) && 139 "unsupported load command encountered"); 140 141 uint32_t NewCmdsize = alignTo(sizeof(LCType) + S.size() + 1, 8); 142 143 LC.MachOLoadCommand.load_command_data.cmdsize = NewCmdsize; 144 LC.Payload.assign(NewCmdsize - sizeof(LCType), 0); 145 std::copy(S.begin(), S.end(), LC.Payload.begin()); 146 } 147 148 static LoadCommand buildRPathLoadCommand(StringRef Path) { 149 LoadCommand LC; 150 MachO::rpath_command RPathLC; 151 RPathLC.cmd = MachO::LC_RPATH; 152 RPathLC.path = sizeof(MachO::rpath_command); 153 RPathLC.cmdsize = alignTo(sizeof(MachO::rpath_command) + Path.size() + 1, 8); 154 LC.MachOLoadCommand.rpath_command_data = RPathLC; 155 LC.Payload.assign(RPathLC.cmdsize - sizeof(MachO::rpath_command), 0); 156 std::copy(Path.begin(), Path.end(), LC.Payload.begin()); 157 return LC; 158 } 159 160 static Error processLoadCommands(const MachOConfig &MachOConfig, Object &Obj) { 161 // Remove RPaths. 162 DenseSet<StringRef> RPathsToRemove(MachOConfig.RPathsToRemove.begin(), 163 MachOConfig.RPathsToRemove.end()); 164 165 LoadCommandPred RemovePred = [&RPathsToRemove, 166 &MachOConfig](const LoadCommand &LC) { 167 if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH) { 168 // When removing all RPaths we don't need to care 169 // about what it contains 170 if (MachOConfig.RemoveAllRpaths) 171 return true; 172 173 StringRef RPath = getPayloadString(LC); 174 if (RPathsToRemove.count(RPath)) { 175 RPathsToRemove.erase(RPath); 176 return true; 177 } 178 } 179 return false; 180 }; 181 182 if (Error E = Obj.removeLoadCommands(RemovePred)) 183 return E; 184 185 // Emit an error if the Mach-O binary does not contain an rpath path name 186 // specified in -delete_rpath. 187 for (StringRef RPath : MachOConfig.RPathsToRemove) { 188 if (RPathsToRemove.count(RPath)) 189 return createStringError(errc::invalid_argument, 190 "no LC_RPATH load command with path: %s", 191 RPath.str().c_str()); 192 } 193 194 DenseSet<StringRef> RPaths; 195 196 // Get all existing RPaths. 197 for (LoadCommand &LC : Obj.LoadCommands) { 198 if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH) 199 RPaths.insert(getPayloadString(LC)); 200 } 201 202 // Throw errors for invalid RPaths. 203 for (const auto &OldNew : MachOConfig.RPathsToUpdate) { 204 StringRef Old = OldNew.getFirst(); 205 StringRef New = OldNew.getSecond(); 206 if (!RPaths.contains(Old)) 207 return createStringError(errc::invalid_argument, 208 "no LC_RPATH load command with path: " + Old); 209 if (RPaths.contains(New)) 210 return createStringError(errc::invalid_argument, 211 "rpath '" + New + 212 "' would create a duplicate load command"); 213 } 214 215 // Update load commands. 216 for (LoadCommand &LC : Obj.LoadCommands) { 217 switch (LC.MachOLoadCommand.load_command_data.cmd) { 218 case MachO::LC_ID_DYLIB: 219 if (MachOConfig.SharedLibId) 220 updateLoadCommandPayloadString<MachO::dylib_command>( 221 LC, *MachOConfig.SharedLibId); 222 break; 223 224 case MachO::LC_RPATH: { 225 StringRef RPath = getPayloadString(LC); 226 StringRef NewRPath = MachOConfig.RPathsToUpdate.lookup(RPath); 227 if (!NewRPath.empty()) 228 updateLoadCommandPayloadString<MachO::rpath_command>(LC, NewRPath); 229 break; 230 } 231 232 // TODO: Add LC_REEXPORT_DYLIB, LC_LAZY_LOAD_DYLIB, and LC_LOAD_UPWARD_DYLIB 233 // here once llvm-objcopy supports them. 234 case MachO::LC_LOAD_DYLIB: 235 case MachO::LC_LOAD_WEAK_DYLIB: 236 StringRef InstallName = getPayloadString(LC); 237 StringRef NewInstallName = 238 MachOConfig.InstallNamesToUpdate.lookup(InstallName); 239 if (!NewInstallName.empty()) 240 updateLoadCommandPayloadString<MachO::dylib_command>(LC, 241 NewInstallName); 242 break; 243 } 244 } 245 246 // Add new RPaths. 247 for (StringRef RPath : MachOConfig.RPathToAdd) { 248 if (RPaths.contains(RPath)) 249 return createStringError(errc::invalid_argument, 250 "rpath '" + RPath + 251 "' would create a duplicate load command"); 252 RPaths.insert(RPath); 253 Obj.LoadCommands.push_back(buildRPathLoadCommand(RPath)); 254 } 255 256 for (StringRef RPath : MachOConfig.RPathToPrepend) { 257 if (RPaths.contains(RPath)) 258 return createStringError(errc::invalid_argument, 259 "rpath '" + RPath + 260 "' would create a duplicate load command"); 261 262 RPaths.insert(RPath); 263 Obj.LoadCommands.insert(Obj.LoadCommands.begin(), 264 buildRPathLoadCommand(RPath)); 265 } 266 267 // Unlike appending rpaths, the indexes of subsequent load commands must 268 // be recalculated after prepending one. 269 if (!MachOConfig.RPathToPrepend.empty()) 270 Obj.updateLoadCommandIndexes(); 271 272 // Remove any empty segments if required. 273 if (!MachOConfig.EmptySegmentsToRemove.empty()) { 274 auto RemovePred = [&MachOConfig](const LoadCommand &LC) { 275 if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_SEGMENT_64 || 276 LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_SEGMENT) { 277 return LC.Sections.empty() && 278 MachOConfig.EmptySegmentsToRemove.contains(*LC.getSegmentName()); 279 } 280 return false; 281 }; 282 if (Error E = Obj.removeLoadCommands(RemovePred)) 283 return E; 284 } 285 286 return Error::success(); 287 } 288 289 static Error dumpSectionToFile(StringRef SecName, StringRef Filename, 290 Object &Obj) { 291 for (LoadCommand &LC : Obj.LoadCommands) 292 for (const std::unique_ptr<Section> &Sec : LC.Sections) { 293 if (Sec->CanonicalName == SecName) { 294 Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr = 295 FileOutputBuffer::create(Filename, Sec->Content.size()); 296 if (!BufferOrErr) 297 return BufferOrErr.takeError(); 298 std::unique_ptr<FileOutputBuffer> Buf = std::move(*BufferOrErr); 299 llvm::copy(Sec->Content, Buf->getBufferStart()); 300 301 if (Error E = Buf->commit()) 302 return E; 303 return Error::success(); 304 } 305 } 306 307 return createStringError(object_error::parse_failed, "section '%s' not found", 308 SecName.str().c_str()); 309 } 310 311 static Error addSection(const NewSectionInfo &NewSection, Object &Obj) { 312 std::pair<StringRef, StringRef> Pair = NewSection.SectionName.split(','); 313 StringRef TargetSegName = Pair.first; 314 Section Sec(TargetSegName, Pair.second); 315 Sec.Content = 316 Obj.NewSectionsContents.save(NewSection.SectionData->getBuffer()); 317 Sec.Size = Sec.Content.size(); 318 319 // Add the a section into an existing segment. 320 for (LoadCommand &LC : Obj.LoadCommands) { 321 std::optional<StringRef> SegName = LC.getSegmentName(); 322 if (SegName && SegName == TargetSegName) { 323 uint64_t Addr = *LC.getSegmentVMAddr(); 324 for (const std::unique_ptr<Section> &S : LC.Sections) 325 Addr = std::max(Addr, S->Addr + S->Size); 326 LC.Sections.push_back(std::make_unique<Section>(Sec)); 327 LC.Sections.back()->Addr = Addr; 328 return Error::success(); 329 } 330 } 331 332 // There's no segment named TargetSegName. Create a new load command and 333 // Insert a new section into it. 334 LoadCommand &NewSegment = 335 Obj.addSegment(TargetSegName, alignTo(Sec.Size, 16384)); 336 NewSegment.Sections.push_back(std::make_unique<Section>(Sec)); 337 NewSegment.Sections.back()->Addr = *NewSegment.getSegmentVMAddr(); 338 return Error::success(); 339 } 340 341 static Expected<Section &> findSection(StringRef SecName, Object &O) { 342 StringRef SegName; 343 std::tie(SegName, SecName) = SecName.split(","); 344 auto FoundSeg = 345 llvm::find_if(O.LoadCommands, [SegName](const LoadCommand &LC) { 346 return LC.getSegmentName() == SegName; 347 }); 348 if (FoundSeg == O.LoadCommands.end()) 349 return createStringError(errc::invalid_argument, 350 "could not find segment with name '%s'", 351 SegName.str().c_str()); 352 auto FoundSec = llvm::find_if(FoundSeg->Sections, 353 [SecName](const std::unique_ptr<Section> &Sec) { 354 return Sec->Sectname == SecName; 355 }); 356 if (FoundSec == FoundSeg->Sections.end()) 357 return createStringError(errc::invalid_argument, 358 "could not find section with name '%s'", 359 SecName.str().c_str()); 360 361 assert(FoundSec->get()->CanonicalName == (SegName + "," + SecName).str()); 362 return **FoundSec; 363 } 364 365 static Error updateSection(const NewSectionInfo &NewSection, Object &O) { 366 Expected<Section &> SecToUpdateOrErr = findSection(NewSection.SectionName, O); 367 368 if (!SecToUpdateOrErr) 369 return SecToUpdateOrErr.takeError(); 370 Section &Sec = *SecToUpdateOrErr; 371 372 if (NewSection.SectionData->getBufferSize() > Sec.Size) 373 return createStringError( 374 errc::invalid_argument, 375 "new section cannot be larger than previous section"); 376 Sec.Content = O.NewSectionsContents.save(NewSection.SectionData->getBuffer()); 377 Sec.Size = Sec.Content.size(); 378 return Error::success(); 379 } 380 381 // isValidMachOCannonicalName returns success if Name is a MachO cannonical name 382 // ("<segment>,<section>") and lengths of both segment and section names are 383 // valid. 384 static Error isValidMachOCannonicalName(StringRef Name) { 385 if (Name.count(',') != 1) 386 return createStringError(errc::invalid_argument, 387 "invalid section name '%s' (should be formatted " 388 "as '<segment name>,<section name>')", 389 Name.str().c_str()); 390 391 std::pair<StringRef, StringRef> Pair = Name.split(','); 392 if (Pair.first.size() > 16) 393 return createStringError(errc::invalid_argument, 394 "too long segment name: '%s'", 395 Pair.first.str().c_str()); 396 if (Pair.second.size() > 16) 397 return createStringError(errc::invalid_argument, 398 "too long section name: '%s'", 399 Pair.second.str().c_str()); 400 return Error::success(); 401 } 402 403 static Error handleArgs(const CommonConfig &Config, 404 const MachOConfig &MachOConfig, Object &Obj) { 405 // Dump sections before add/remove for compatibility with GNU objcopy. 406 for (StringRef Flag : Config.DumpSection) { 407 StringRef SectionName; 408 StringRef FileName; 409 std::tie(SectionName, FileName) = Flag.split('='); 410 if (Error E = dumpSectionToFile(SectionName, FileName, Obj)) 411 return E; 412 } 413 414 if (Error E = removeSections(Config, Obj)) 415 return E; 416 417 // Mark symbols to determine which symbols are still needed. 418 if (Config.StripAll) 419 markSymbols(Config, Obj); 420 421 updateAndRemoveSymbols(Config, MachOConfig, Obj); 422 423 if (Config.StripAll) 424 for (LoadCommand &LC : Obj.LoadCommands) 425 for (std::unique_ptr<Section> &Sec : LC.Sections) 426 Sec->Relocations.clear(); 427 428 for (const NewSectionInfo &NewSection : Config.AddSection) { 429 if (Error E = isValidMachOCannonicalName(NewSection.SectionName)) 430 return E; 431 if (Error E = addSection(NewSection, Obj)) 432 return E; 433 } 434 435 for (const NewSectionInfo &NewSection : Config.UpdateSection) { 436 if (Error E = isValidMachOCannonicalName(NewSection.SectionName)) 437 return E; 438 if (Error E = updateSection(NewSection, Obj)) 439 return E; 440 } 441 442 if (Error E = processLoadCommands(MachOConfig, Obj)) 443 return E; 444 445 return Error::success(); 446 } 447 448 Error objcopy::macho::executeObjcopyOnBinary(const CommonConfig &Config, 449 const MachOConfig &MachOConfig, 450 object::MachOObjectFile &In, 451 raw_ostream &Out) { 452 MachOReader Reader(In); 453 Expected<std::unique_ptr<Object>> O = Reader.create(); 454 if (!O) 455 return createFileError(Config.InputFilename, O.takeError()); 456 457 if (O->get()->Header.FileType == MachO::HeaderFileType::MH_PRELOAD) 458 return createStringError(std::errc::not_supported, 459 "%s: MH_PRELOAD files are not supported", 460 Config.InputFilename.str().c_str()); 461 462 if (Error E = handleArgs(Config, MachOConfig, **O)) 463 return createFileError(Config.InputFilename, std::move(E)); 464 465 // Page size used for alignment of segment sizes in Mach-O executables and 466 // dynamic libraries. 467 uint64_t PageSize; 468 switch (In.getArch()) { 469 case Triple::ArchType::arm: 470 case Triple::ArchType::aarch64: 471 case Triple::ArchType::aarch64_32: 472 PageSize = 16384; 473 break; 474 default: 475 PageSize = 4096; 476 } 477 478 MachOWriter Writer(**O, In.is64Bit(), In.isLittleEndian(), 479 sys::path::filename(Config.OutputFilename), PageSize, Out); 480 if (auto E = Writer.finalize()) 481 return E; 482 return Writer.write(); 483 } 484 485 Error objcopy::macho::executeObjcopyOnMachOUniversalBinary( 486 const MultiFormatConfig &Config, const MachOUniversalBinary &In, 487 raw_ostream &Out) { 488 SmallVector<OwningBinary<Binary>, 2> Binaries; 489 SmallVector<Slice, 2> Slices; 490 for (const auto &O : In.objects()) { 491 Expected<std::unique_ptr<Archive>> ArOrErr = O.getAsArchive(); 492 if (ArOrErr) { 493 Expected<std::vector<NewArchiveMember>> NewArchiveMembersOrErr = 494 createNewArchiveMembers(Config, **ArOrErr); 495 if (!NewArchiveMembersOrErr) 496 return NewArchiveMembersOrErr.takeError(); 497 auto Kind = (*ArOrErr)->kind(); 498 if (Kind == object::Archive::K_BSD) 499 Kind = object::Archive::K_DARWIN; 500 Expected<std::unique_ptr<MemoryBuffer>> OutputBufferOrErr = 501 writeArchiveToBuffer( 502 *NewArchiveMembersOrErr, 503 (*ArOrErr)->hasSymbolTable() ? SymtabWritingMode::NormalSymtab 504 : SymtabWritingMode::NoSymtab, 505 Kind, Config.getCommonConfig().DeterministicArchives, 506 (*ArOrErr)->isThin()); 507 if (!OutputBufferOrErr) 508 return OutputBufferOrErr.takeError(); 509 Expected<std::unique_ptr<Binary>> BinaryOrErr = 510 object::createBinary(**OutputBufferOrErr); 511 if (!BinaryOrErr) 512 return BinaryOrErr.takeError(); 513 Binaries.emplace_back(std::move(*BinaryOrErr), 514 std::move(*OutputBufferOrErr)); 515 Slices.emplace_back(*cast<Archive>(Binaries.back().getBinary()), 516 O.getCPUType(), O.getCPUSubType(), 517 O.getArchFlagName(), O.getAlign()); 518 continue; 519 } 520 // The methods getAsArchive, getAsObjectFile, getAsIRObject of the class 521 // ObjectForArch return an Error in case of the type mismatch. We need to 522 // check each in turn to see what kind of slice this is, so ignore errors 523 // produced along the way. 524 consumeError(ArOrErr.takeError()); 525 526 Expected<std::unique_ptr<MachOObjectFile>> ObjOrErr = O.getAsObjectFile(); 527 if (!ObjOrErr) { 528 consumeError(ObjOrErr.takeError()); 529 return createStringError( 530 std::errc::invalid_argument, 531 "slice for '%s' of the universal Mach-O binary " 532 "'%s' is not a Mach-O object or an archive", 533 O.getArchFlagName().c_str(), 534 Config.getCommonConfig().InputFilename.str().c_str()); 535 } 536 std::string ArchFlagName = O.getArchFlagName(); 537 538 SmallVector<char, 0> Buffer; 539 raw_svector_ostream MemStream(Buffer); 540 541 Expected<const MachOConfig &> MachO = Config.getMachOConfig(); 542 if (!MachO) 543 return MachO.takeError(); 544 545 if (Error E = executeObjcopyOnBinary(Config.getCommonConfig(), *MachO, 546 **ObjOrErr, MemStream)) 547 return E; 548 549 auto MB = std::make_unique<SmallVectorMemoryBuffer>( 550 std::move(Buffer), ArchFlagName, /*RequiresNullTerminator=*/false); 551 Expected<std::unique_ptr<Binary>> BinaryOrErr = object::createBinary(*MB); 552 if (!BinaryOrErr) 553 return BinaryOrErr.takeError(); 554 Binaries.emplace_back(std::move(*BinaryOrErr), std::move(MB)); 555 Slices.emplace_back(*cast<MachOObjectFile>(Binaries.back().getBinary()), 556 O.getAlign()); 557 } 558 559 if (Error Err = writeUniversalBinaryToStream(Slices, Out)) 560 return Err; 561 562 return Error::success(); 563 } 564