1 //===- MachOObjcopy.cpp -----------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/ObjCopy/MachO/MachOObjcopy.h" 10 #include "Archive.h" 11 #include "MachOReader.h" 12 #include "MachOWriter.h" 13 #include "llvm/ADT/DenseSet.h" 14 #include "llvm/ObjCopy/CommonConfig.h" 15 #include "llvm/ObjCopy/MachO/MachOConfig.h" 16 #include "llvm/ObjCopy/MultiFormatConfig.h" 17 #include "llvm/ObjCopy/ObjCopy.h" 18 #include "llvm/Object/ArchiveWriter.h" 19 #include "llvm/Object/MachOUniversal.h" 20 #include "llvm/Object/MachOUniversalWriter.h" 21 #include "llvm/Support/Errc.h" 22 #include "llvm/Support/Error.h" 23 #include "llvm/Support/FileOutputBuffer.h" 24 #include "llvm/Support/Path.h" 25 #include "llvm/Support/SmallVectorMemoryBuffer.h" 26 27 using namespace llvm; 28 using namespace llvm::objcopy; 29 using namespace llvm::objcopy::macho; 30 using namespace llvm::object; 31 32 using SectionPred = std::function<bool(const std::unique_ptr<Section> &Sec)>; 33 using LoadCommandPred = std::function<bool(const LoadCommand &LC)>; 34 35 #ifndef NDEBUG 36 static bool isLoadCommandWithPayloadString(const LoadCommand &LC) { 37 // TODO: Add support for LC_REEXPORT_DYLIB, LC_LOAD_UPWARD_DYLIB and 38 // LC_LAZY_LOAD_DYLIB 39 return LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH || 40 LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_ID_DYLIB || 41 LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_LOAD_DYLIB || 42 LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_LOAD_WEAK_DYLIB; 43 } 44 #endif 45 46 static StringRef getPayloadString(const LoadCommand &LC) { 47 assert(isLoadCommandWithPayloadString(LC) && 48 "unsupported load command encountered"); 49 50 return StringRef(reinterpret_cast<const char *>(LC.Payload.data()), 51 LC.Payload.size()) 52 .rtrim('\0'); 53 } 54 55 static Error removeSections(const CommonConfig &Config, Object &Obj) { 56 SectionPred RemovePred = [](const std::unique_ptr<Section> &) { 57 return false; 58 }; 59 60 if (!Config.ToRemove.empty()) { 61 RemovePred = [&Config, RemovePred](const std::unique_ptr<Section> &Sec) { 62 return Config.ToRemove.matches(Sec->CanonicalName); 63 }; 64 } 65 66 if (Config.StripAll || Config.StripDebug) { 67 // Remove all debug sections. 68 RemovePred = [RemovePred](const std::unique_ptr<Section> &Sec) { 69 if (Sec->Segname == "__DWARF") 70 return true; 71 72 return RemovePred(Sec); 73 }; 74 } 75 76 if (!Config.OnlySection.empty()) { 77 // Overwrite RemovePred because --only-section takes priority. 78 RemovePred = [&Config](const std::unique_ptr<Section> &Sec) { 79 return !Config.OnlySection.matches(Sec->CanonicalName); 80 }; 81 } 82 83 return Obj.removeSections(RemovePred); 84 } 85 86 static void markSymbols(const CommonConfig &, Object &Obj) { 87 // Symbols referenced from the indirect symbol table must not be removed. 88 for (IndirectSymbolEntry &ISE : Obj.IndirectSymTable.Symbols) 89 if (ISE.Symbol) 90 (*ISE.Symbol)->Referenced = true; 91 } 92 93 static void updateAndRemoveSymbols(const CommonConfig &Config, 94 const MachOConfig &MachOConfig, 95 Object &Obj) { 96 for (SymbolEntry &Sym : Obj.SymTable) { 97 auto I = Config.SymbolsToRename.find(Sym.Name); 98 if (I != Config.SymbolsToRename.end()) 99 Sym.Name = std::string(I->getValue()); 100 } 101 102 auto RemovePred = [&Config, &MachOConfig, 103 &Obj](const std::unique_ptr<SymbolEntry> &N) { 104 if (N->Referenced) 105 return false; 106 if (MachOConfig.KeepUndefined && N->isUndefinedSymbol()) 107 return false; 108 if (N->n_desc & MachO::REFERENCED_DYNAMICALLY) 109 return false; 110 if (Config.StripAll) 111 return true; 112 if (Config.DiscardMode == DiscardType::All && !(N->n_type & MachO::N_EXT)) 113 return true; 114 // This behavior is consistent with cctools' strip. 115 if (Config.StripDebug && (N->n_type & MachO::N_STAB)) 116 return true; 117 // This behavior is consistent with cctools' strip. 118 if (MachOConfig.StripSwiftSymbols && 119 (Obj.Header.Flags & MachO::MH_DYLDLINK) && Obj.SwiftVersion && 120 *Obj.SwiftVersion && N->isSwiftSymbol()) 121 return true; 122 return false; 123 }; 124 125 Obj.SymTable.removeSymbols(RemovePred); 126 } 127 128 template <typename LCType> 129 static void updateLoadCommandPayloadString(LoadCommand &LC, StringRef S) { 130 assert(isLoadCommandWithPayloadString(LC) && 131 "unsupported load command encountered"); 132 133 uint32_t NewCmdsize = alignTo(sizeof(LCType) + S.size() + 1, 8); 134 135 LC.MachOLoadCommand.load_command_data.cmdsize = NewCmdsize; 136 LC.Payload.assign(NewCmdsize - sizeof(LCType), 0); 137 std::copy(S.begin(), S.end(), LC.Payload.begin()); 138 } 139 140 static LoadCommand buildRPathLoadCommand(StringRef Path) { 141 LoadCommand LC; 142 MachO::rpath_command RPathLC; 143 RPathLC.cmd = MachO::LC_RPATH; 144 RPathLC.path = sizeof(MachO::rpath_command); 145 RPathLC.cmdsize = alignTo(sizeof(MachO::rpath_command) + Path.size() + 1, 8); 146 LC.MachOLoadCommand.rpath_command_data = RPathLC; 147 LC.Payload.assign(RPathLC.cmdsize - sizeof(MachO::rpath_command), 0); 148 std::copy(Path.begin(), Path.end(), LC.Payload.begin()); 149 return LC; 150 } 151 152 static Error processLoadCommands(const MachOConfig &MachOConfig, Object &Obj) { 153 // Remove RPaths. 154 DenseSet<StringRef> RPathsToRemove(MachOConfig.RPathsToRemove.begin(), 155 MachOConfig.RPathsToRemove.end()); 156 157 LoadCommandPred RemovePred = [&RPathsToRemove, 158 &MachOConfig](const LoadCommand &LC) { 159 if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH) { 160 // When removing all RPaths we don't need to care 161 // about what it contains 162 if (MachOConfig.RemoveAllRpaths) 163 return true; 164 165 StringRef RPath = getPayloadString(LC); 166 if (RPathsToRemove.count(RPath)) { 167 RPathsToRemove.erase(RPath); 168 return true; 169 } 170 } 171 return false; 172 }; 173 174 if (Error E = Obj.removeLoadCommands(RemovePred)) 175 return E; 176 177 // Emit an error if the Mach-O binary does not contain an rpath path name 178 // specified in -delete_rpath. 179 for (StringRef RPath : MachOConfig.RPathsToRemove) { 180 if (RPathsToRemove.count(RPath)) 181 return createStringError(errc::invalid_argument, 182 "no LC_RPATH load command with path: %s", 183 RPath.str().c_str()); 184 } 185 186 DenseSet<StringRef> RPaths; 187 188 // Get all existing RPaths. 189 for (LoadCommand &LC : Obj.LoadCommands) { 190 if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH) 191 RPaths.insert(getPayloadString(LC)); 192 } 193 194 // Throw errors for invalid RPaths. 195 for (const auto &OldNew : MachOConfig.RPathsToUpdate) { 196 StringRef Old = OldNew.getFirst(); 197 StringRef New = OldNew.getSecond(); 198 if (!RPaths.contains(Old)) 199 return createStringError(errc::invalid_argument, 200 "no LC_RPATH load command with path: " + Old); 201 if (RPaths.contains(New)) 202 return createStringError(errc::invalid_argument, 203 "rpath '" + New + 204 "' would create a duplicate load command"); 205 } 206 207 // Update load commands. 208 for (LoadCommand &LC : Obj.LoadCommands) { 209 switch (LC.MachOLoadCommand.load_command_data.cmd) { 210 case MachO::LC_ID_DYLIB: 211 if (MachOConfig.SharedLibId) 212 updateLoadCommandPayloadString<MachO::dylib_command>( 213 LC, *MachOConfig.SharedLibId); 214 break; 215 216 case MachO::LC_RPATH: { 217 StringRef RPath = getPayloadString(LC); 218 StringRef NewRPath = MachOConfig.RPathsToUpdate.lookup(RPath); 219 if (!NewRPath.empty()) 220 updateLoadCommandPayloadString<MachO::rpath_command>(LC, NewRPath); 221 break; 222 } 223 224 // TODO: Add LC_REEXPORT_DYLIB, LC_LAZY_LOAD_DYLIB, and LC_LOAD_UPWARD_DYLIB 225 // here once llvm-objcopy supports them. 226 case MachO::LC_LOAD_DYLIB: 227 case MachO::LC_LOAD_WEAK_DYLIB: 228 StringRef InstallName = getPayloadString(LC); 229 StringRef NewInstallName = 230 MachOConfig.InstallNamesToUpdate.lookup(InstallName); 231 if (!NewInstallName.empty()) 232 updateLoadCommandPayloadString<MachO::dylib_command>(LC, 233 NewInstallName); 234 break; 235 } 236 } 237 238 // Add new RPaths. 239 for (StringRef RPath : MachOConfig.RPathToAdd) { 240 if (RPaths.contains(RPath)) 241 return createStringError(errc::invalid_argument, 242 "rpath '" + RPath + 243 "' would create a duplicate load command"); 244 RPaths.insert(RPath); 245 Obj.LoadCommands.push_back(buildRPathLoadCommand(RPath)); 246 } 247 248 for (StringRef RPath : MachOConfig.RPathToPrepend) { 249 if (RPaths.contains(RPath)) 250 return createStringError(errc::invalid_argument, 251 "rpath '" + RPath + 252 "' would create a duplicate load command"); 253 254 RPaths.insert(RPath); 255 Obj.LoadCommands.insert(Obj.LoadCommands.begin(), 256 buildRPathLoadCommand(RPath)); 257 } 258 259 // Unlike appending rpaths, the indexes of subsequent load commands must 260 // be recalculated after prepending one. 261 if (!MachOConfig.RPathToPrepend.empty()) 262 Obj.updateLoadCommandIndexes(); 263 264 // Remove any empty segments if required. 265 if (!MachOConfig.EmptySegmentsToRemove.empty()) { 266 auto RemovePred = [&MachOConfig](const LoadCommand &LC) { 267 if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_SEGMENT_64 || 268 LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_SEGMENT) { 269 return LC.Sections.empty() && 270 MachOConfig.EmptySegmentsToRemove.contains(*LC.getSegmentName()); 271 } 272 return false; 273 }; 274 if (Error E = Obj.removeLoadCommands(RemovePred)) 275 return E; 276 } 277 278 return Error::success(); 279 } 280 281 static Error dumpSectionToFile(StringRef SecName, StringRef Filename, 282 Object &Obj) { 283 for (LoadCommand &LC : Obj.LoadCommands) 284 for (const std::unique_ptr<Section> &Sec : LC.Sections) { 285 if (Sec->CanonicalName == SecName) { 286 Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr = 287 FileOutputBuffer::create(Filename, Sec->Content.size()); 288 if (!BufferOrErr) 289 return BufferOrErr.takeError(); 290 std::unique_ptr<FileOutputBuffer> Buf = std::move(*BufferOrErr); 291 llvm::copy(Sec->Content, Buf->getBufferStart()); 292 293 if (Error E = Buf->commit()) 294 return E; 295 return Error::success(); 296 } 297 } 298 299 return createStringError(object_error::parse_failed, "section '%s' not found", 300 SecName.str().c_str()); 301 } 302 303 static Error addSection(const NewSectionInfo &NewSection, Object &Obj) { 304 std::pair<StringRef, StringRef> Pair = NewSection.SectionName.split(','); 305 StringRef TargetSegName = Pair.first; 306 Section Sec(TargetSegName, Pair.second); 307 Sec.Content = 308 Obj.NewSectionsContents.save(NewSection.SectionData->getBuffer()); 309 Sec.Size = Sec.Content.size(); 310 311 // Add the a section into an existing segment. 312 for (LoadCommand &LC : Obj.LoadCommands) { 313 std::optional<StringRef> SegName = LC.getSegmentName(); 314 if (SegName && SegName == TargetSegName) { 315 uint64_t Addr = *LC.getSegmentVMAddr(); 316 for (const std::unique_ptr<Section> &S : LC.Sections) 317 Addr = std::max(Addr, S->Addr + S->Size); 318 LC.Sections.push_back(std::make_unique<Section>(Sec)); 319 LC.Sections.back()->Addr = Addr; 320 return Error::success(); 321 } 322 } 323 324 // There's no segment named TargetSegName. Create a new load command and 325 // Insert a new section into it. 326 LoadCommand &NewSegment = 327 Obj.addSegment(TargetSegName, alignTo(Sec.Size, 16384)); 328 NewSegment.Sections.push_back(std::make_unique<Section>(Sec)); 329 NewSegment.Sections.back()->Addr = *NewSegment.getSegmentVMAddr(); 330 return Error::success(); 331 } 332 333 static Expected<Section &> findSection(StringRef SecName, Object &O) { 334 StringRef SegName; 335 std::tie(SegName, SecName) = SecName.split(","); 336 auto FoundSeg = 337 llvm::find_if(O.LoadCommands, [SegName](const LoadCommand &LC) { 338 return LC.getSegmentName() == SegName; 339 }); 340 if (FoundSeg == O.LoadCommands.end()) 341 return createStringError(errc::invalid_argument, 342 "could not find segment with name '%s'", 343 SegName.str().c_str()); 344 auto FoundSec = llvm::find_if(FoundSeg->Sections, 345 [SecName](const std::unique_ptr<Section> &Sec) { 346 return Sec->Sectname == SecName; 347 }); 348 if (FoundSec == FoundSeg->Sections.end()) 349 return createStringError(errc::invalid_argument, 350 "could not find section with name '%s'", 351 SecName.str().c_str()); 352 353 assert(FoundSec->get()->CanonicalName == (SegName + "," + SecName).str()); 354 return **FoundSec; 355 } 356 357 static Error updateSection(const NewSectionInfo &NewSection, Object &O) { 358 Expected<Section &> SecToUpdateOrErr = findSection(NewSection.SectionName, O); 359 360 if (!SecToUpdateOrErr) 361 return SecToUpdateOrErr.takeError(); 362 Section &Sec = *SecToUpdateOrErr; 363 364 if (NewSection.SectionData->getBufferSize() > Sec.Size) 365 return createStringError( 366 errc::invalid_argument, 367 "new section cannot be larger than previous section"); 368 Sec.Content = O.NewSectionsContents.save(NewSection.SectionData->getBuffer()); 369 Sec.Size = Sec.Content.size(); 370 return Error::success(); 371 } 372 373 // isValidMachOCannonicalName returns success if Name is a MachO cannonical name 374 // ("<segment>,<section>") and lengths of both segment and section names are 375 // valid. 376 static Error isValidMachOCannonicalName(StringRef Name) { 377 if (Name.count(',') != 1) 378 return createStringError(errc::invalid_argument, 379 "invalid section name '%s' (should be formatted " 380 "as '<segment name>,<section name>')", 381 Name.str().c_str()); 382 383 std::pair<StringRef, StringRef> Pair = Name.split(','); 384 if (Pair.first.size() > 16) 385 return createStringError(errc::invalid_argument, 386 "too long segment name: '%s'", 387 Pair.first.str().c_str()); 388 if (Pair.second.size() > 16) 389 return createStringError(errc::invalid_argument, 390 "too long section name: '%s'", 391 Pair.second.str().c_str()); 392 return Error::success(); 393 } 394 395 static Error handleArgs(const CommonConfig &Config, 396 const MachOConfig &MachOConfig, Object &Obj) { 397 // Dump sections before add/remove for compatibility with GNU objcopy. 398 for (StringRef Flag : Config.DumpSection) { 399 StringRef SectionName; 400 StringRef FileName; 401 std::tie(SectionName, FileName) = Flag.split('='); 402 if (Error E = dumpSectionToFile(SectionName, FileName, Obj)) 403 return E; 404 } 405 406 if (Error E = removeSections(Config, Obj)) 407 return E; 408 409 // Mark symbols to determine which symbols are still needed. 410 if (Config.StripAll) 411 markSymbols(Config, Obj); 412 413 updateAndRemoveSymbols(Config, MachOConfig, Obj); 414 415 if (Config.StripAll) 416 for (LoadCommand &LC : Obj.LoadCommands) 417 for (std::unique_ptr<Section> &Sec : LC.Sections) 418 Sec->Relocations.clear(); 419 420 for (const NewSectionInfo &NewSection : Config.AddSection) { 421 if (Error E = isValidMachOCannonicalName(NewSection.SectionName)) 422 return E; 423 if (Error E = addSection(NewSection, Obj)) 424 return E; 425 } 426 427 for (const NewSectionInfo &NewSection : Config.UpdateSection) { 428 if (Error E = isValidMachOCannonicalName(NewSection.SectionName)) 429 return E; 430 if (Error E = updateSection(NewSection, Obj)) 431 return E; 432 } 433 434 if (Error E = processLoadCommands(MachOConfig, Obj)) 435 return E; 436 437 return Error::success(); 438 } 439 440 Error objcopy::macho::executeObjcopyOnBinary(const CommonConfig &Config, 441 const MachOConfig &MachOConfig, 442 object::MachOObjectFile &In, 443 raw_ostream &Out) { 444 MachOReader Reader(In); 445 Expected<std::unique_ptr<Object>> O = Reader.create(); 446 if (!O) 447 return createFileError(Config.InputFilename, O.takeError()); 448 449 if (O->get()->Header.FileType == MachO::HeaderFileType::MH_PRELOAD) 450 return createStringError(std::errc::not_supported, 451 "%s: MH_PRELOAD files are not supported", 452 Config.InputFilename.str().c_str()); 453 454 if (Error E = handleArgs(Config, MachOConfig, **O)) 455 return createFileError(Config.InputFilename, std::move(E)); 456 457 // Page size used for alignment of segment sizes in Mach-O executables and 458 // dynamic libraries. 459 uint64_t PageSize; 460 switch (In.getArch()) { 461 case Triple::ArchType::arm: 462 case Triple::ArchType::aarch64: 463 case Triple::ArchType::aarch64_32: 464 PageSize = 16384; 465 break; 466 default: 467 PageSize = 4096; 468 } 469 470 MachOWriter Writer(**O, In.is64Bit(), In.isLittleEndian(), 471 sys::path::filename(Config.OutputFilename), PageSize, Out); 472 if (auto E = Writer.finalize()) 473 return E; 474 return Writer.write(); 475 } 476 477 Error objcopy::macho::executeObjcopyOnMachOUniversalBinary( 478 const MultiFormatConfig &Config, const MachOUniversalBinary &In, 479 raw_ostream &Out) { 480 SmallVector<OwningBinary<Binary>, 2> Binaries; 481 SmallVector<Slice, 2> Slices; 482 for (const auto &O : In.objects()) { 483 Expected<std::unique_ptr<Archive>> ArOrErr = O.getAsArchive(); 484 if (ArOrErr) { 485 Expected<std::vector<NewArchiveMember>> NewArchiveMembersOrErr = 486 createNewArchiveMembers(Config, **ArOrErr); 487 if (!NewArchiveMembersOrErr) 488 return NewArchiveMembersOrErr.takeError(); 489 auto Kind = (*ArOrErr)->kind(); 490 if (Kind == object::Archive::K_BSD) 491 Kind = object::Archive::K_DARWIN; 492 Expected<std::unique_ptr<MemoryBuffer>> OutputBufferOrErr = 493 writeArchiveToBuffer(*NewArchiveMembersOrErr, 494 (*ArOrErr)->hasSymbolTable(), Kind, 495 Config.getCommonConfig().DeterministicArchives, 496 (*ArOrErr)->isThin()); 497 if (!OutputBufferOrErr) 498 return OutputBufferOrErr.takeError(); 499 Expected<std::unique_ptr<Binary>> BinaryOrErr = 500 object::createBinary(**OutputBufferOrErr); 501 if (!BinaryOrErr) 502 return BinaryOrErr.takeError(); 503 Binaries.emplace_back(std::move(*BinaryOrErr), 504 std::move(*OutputBufferOrErr)); 505 Slices.emplace_back(*cast<Archive>(Binaries.back().getBinary()), 506 O.getCPUType(), O.getCPUSubType(), 507 O.getArchFlagName(), O.getAlign()); 508 continue; 509 } 510 // The methods getAsArchive, getAsObjectFile, getAsIRObject of the class 511 // ObjectForArch return an Error in case of the type mismatch. We need to 512 // check each in turn to see what kind of slice this is, so ignore errors 513 // produced along the way. 514 consumeError(ArOrErr.takeError()); 515 516 Expected<std::unique_ptr<MachOObjectFile>> ObjOrErr = O.getAsObjectFile(); 517 if (!ObjOrErr) { 518 consumeError(ObjOrErr.takeError()); 519 return createStringError( 520 std::errc::invalid_argument, 521 "slice for '%s' of the universal Mach-O binary " 522 "'%s' is not a Mach-O object or an archive", 523 O.getArchFlagName().c_str(), 524 Config.getCommonConfig().InputFilename.str().c_str()); 525 } 526 std::string ArchFlagName = O.getArchFlagName(); 527 528 SmallVector<char, 0> Buffer; 529 raw_svector_ostream MemStream(Buffer); 530 531 Expected<const MachOConfig &> MachO = Config.getMachOConfig(); 532 if (!MachO) 533 return MachO.takeError(); 534 535 if (Error E = executeObjcopyOnBinary(Config.getCommonConfig(), *MachO, 536 **ObjOrErr, MemStream)) 537 return E; 538 539 auto MB = std::make_unique<SmallVectorMemoryBuffer>( 540 std::move(Buffer), ArchFlagName, /*RequiresNullTerminator=*/false); 541 Expected<std::unique_ptr<Binary>> BinaryOrErr = object::createBinary(*MB); 542 if (!BinaryOrErr) 543 return BinaryOrErr.takeError(); 544 Binaries.emplace_back(std::move(*BinaryOrErr), std::move(MB)); 545 Slices.emplace_back(*cast<MachOObjectFile>(Binaries.back().getBinary()), 546 O.getAlign()); 547 } 548 549 if (Error Err = writeUniversalBinaryToStream(Slices, Out)) 550 return Err; 551 552 return Error::success(); 553 } 554