1 //===- MachOObjcopy.cpp -----------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/ObjCopy/MachO/MachOObjcopy.h" 10 #include "Archive.h" 11 #include "MachOReader.h" 12 #include "MachOWriter.h" 13 #include "llvm/ADT/DenseSet.h" 14 #include "llvm/ObjCopy/CommonConfig.h" 15 #include "llvm/ObjCopy/MachO/MachOConfig.h" 16 #include "llvm/ObjCopy/MultiFormatConfig.h" 17 #include "llvm/ObjCopy/ObjCopy.h" 18 #include "llvm/Object/ArchiveWriter.h" 19 #include "llvm/Object/MachOUniversal.h" 20 #include "llvm/Object/MachOUniversalWriter.h" 21 #include "llvm/Support/Errc.h" 22 #include "llvm/Support/Error.h" 23 #include "llvm/Support/FileOutputBuffer.h" 24 #include "llvm/Support/Path.h" 25 #include "llvm/Support/SmallVectorMemoryBuffer.h" 26 27 using namespace llvm; 28 using namespace llvm::objcopy; 29 using namespace llvm::objcopy::macho; 30 using namespace llvm::object; 31 32 using SectionPred = std::function<bool(const std::unique_ptr<Section> &Sec)>; 33 using LoadCommandPred = std::function<bool(const LoadCommand &LC)>; 34 35 #ifndef NDEBUG 36 static bool isLoadCommandWithPayloadString(const LoadCommand &LC) { 37 // TODO: Add support for LC_REEXPORT_DYLIB, LC_LOAD_UPWARD_DYLIB and 38 // LC_LAZY_LOAD_DYLIB 39 return LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH || 40 LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_ID_DYLIB || 41 LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_LOAD_DYLIB || 42 LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_LOAD_WEAK_DYLIB; 43 } 44 #endif 45 46 static StringRef getPayloadString(const LoadCommand &LC) { 47 assert(isLoadCommandWithPayloadString(LC) && 48 "unsupported load command encountered"); 49 50 return StringRef(reinterpret_cast<const char *>(LC.Payload.data()), 51 LC.Payload.size()) 52 .rtrim('\0'); 53 } 54 55 static Error removeSections(const CommonConfig &Config, Object &Obj) { 56 SectionPred RemovePred = [](const std::unique_ptr<Section> &) { 57 return false; 58 }; 59 60 if (!Config.ToRemove.empty()) { 61 RemovePred = [&Config, RemovePred](const std::unique_ptr<Section> &Sec) { 62 return Config.ToRemove.matches(Sec->CanonicalName); 63 }; 64 } 65 66 if (Config.StripAll || Config.StripDebug) { 67 // Remove all debug sections. 68 RemovePred = [RemovePred](const std::unique_ptr<Section> &Sec) { 69 if (Sec->Segname == "__DWARF") 70 return true; 71 72 return RemovePred(Sec); 73 }; 74 } 75 76 if (!Config.OnlySection.empty()) { 77 // Overwrite RemovePred because --only-section takes priority. 78 RemovePred = [&Config](const std::unique_ptr<Section> &Sec) { 79 return !Config.OnlySection.matches(Sec->CanonicalName); 80 }; 81 } 82 83 return Obj.removeSections(RemovePred); 84 } 85 86 static void markSymbols(const CommonConfig &, Object &Obj) { 87 // Symbols referenced from the indirect symbol table must not be removed. 88 for (IndirectSymbolEntry &ISE : Obj.IndirectSymTable.Symbols) 89 if (ISE.Symbol) 90 (*ISE.Symbol)->Referenced = true; 91 } 92 93 static void updateAndRemoveSymbols(const CommonConfig &Config, 94 const MachOConfig &MachOConfig, 95 Object &Obj) { 96 for (SymbolEntry &Sym : Obj.SymTable) { 97 auto I = Config.SymbolsToRename.find(Sym.Name); 98 if (I != Config.SymbolsToRename.end()) 99 Sym.Name = std::string(I->getValue()); 100 } 101 102 auto RemovePred = [&Config, &MachOConfig, 103 &Obj](const std::unique_ptr<SymbolEntry> &N) { 104 if (N->Referenced) 105 return false; 106 if (MachOConfig.KeepUndefined && N->isUndefinedSymbol()) 107 return false; 108 if (N->n_desc & MachO::REFERENCED_DYNAMICALLY) 109 return false; 110 if (Config.StripAll) 111 return true; 112 if (Config.DiscardMode == DiscardType::All && !(N->n_type & MachO::N_EXT)) 113 return true; 114 // This behavior is consistent with cctools' strip. 115 if (MachOConfig.StripSwiftSymbols && 116 (Obj.Header.Flags & MachO::MH_DYLDLINK) && Obj.SwiftVersion && 117 *Obj.SwiftVersion && N->isSwiftSymbol()) 118 return true; 119 return false; 120 }; 121 122 Obj.SymTable.removeSymbols(RemovePred); 123 } 124 125 template <typename LCType> 126 static void updateLoadCommandPayloadString(LoadCommand &LC, StringRef S) { 127 assert(isLoadCommandWithPayloadString(LC) && 128 "unsupported load command encountered"); 129 130 uint32_t NewCmdsize = alignTo(sizeof(LCType) + S.size() + 1, 8); 131 132 LC.MachOLoadCommand.load_command_data.cmdsize = NewCmdsize; 133 LC.Payload.assign(NewCmdsize - sizeof(LCType), 0); 134 std::copy(S.begin(), S.end(), LC.Payload.begin()); 135 } 136 137 static LoadCommand buildRPathLoadCommand(StringRef Path) { 138 LoadCommand LC; 139 MachO::rpath_command RPathLC; 140 RPathLC.cmd = MachO::LC_RPATH; 141 RPathLC.path = sizeof(MachO::rpath_command); 142 RPathLC.cmdsize = alignTo(sizeof(MachO::rpath_command) + Path.size() + 1, 8); 143 LC.MachOLoadCommand.rpath_command_data = RPathLC; 144 LC.Payload.assign(RPathLC.cmdsize - sizeof(MachO::rpath_command), 0); 145 std::copy(Path.begin(), Path.end(), LC.Payload.begin()); 146 return LC; 147 } 148 149 static Error processLoadCommands(const MachOConfig &MachOConfig, Object &Obj) { 150 // Remove RPaths. 151 DenseSet<StringRef> RPathsToRemove(MachOConfig.RPathsToRemove.begin(), 152 MachOConfig.RPathsToRemove.end()); 153 154 LoadCommandPred RemovePred = [&RPathsToRemove, 155 &MachOConfig](const LoadCommand &LC) { 156 if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH) { 157 // When removing all RPaths we don't need to care 158 // about what it contains 159 if (MachOConfig.RemoveAllRpaths) 160 return true; 161 162 StringRef RPath = getPayloadString(LC); 163 if (RPathsToRemove.count(RPath)) { 164 RPathsToRemove.erase(RPath); 165 return true; 166 } 167 } 168 return false; 169 }; 170 171 if (Error E = Obj.removeLoadCommands(RemovePred)) 172 return E; 173 174 // Emit an error if the Mach-O binary does not contain an rpath path name 175 // specified in -delete_rpath. 176 for (StringRef RPath : MachOConfig.RPathsToRemove) { 177 if (RPathsToRemove.count(RPath)) 178 return createStringError(errc::invalid_argument, 179 "no LC_RPATH load command with path: %s", 180 RPath.str().c_str()); 181 } 182 183 DenseSet<StringRef> RPaths; 184 185 // Get all existing RPaths. 186 for (LoadCommand &LC : Obj.LoadCommands) { 187 if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH) 188 RPaths.insert(getPayloadString(LC)); 189 } 190 191 // Throw errors for invalid RPaths. 192 for (const auto &OldNew : MachOConfig.RPathsToUpdate) { 193 StringRef Old = OldNew.getFirst(); 194 StringRef New = OldNew.getSecond(); 195 if (!RPaths.contains(Old)) 196 return createStringError(errc::invalid_argument, 197 "no LC_RPATH load command with path: " + Old); 198 if (RPaths.contains(New)) 199 return createStringError(errc::invalid_argument, 200 "rpath '" + New + 201 "' would create a duplicate load command"); 202 } 203 204 // Update load commands. 205 for (LoadCommand &LC : Obj.LoadCommands) { 206 switch (LC.MachOLoadCommand.load_command_data.cmd) { 207 case MachO::LC_ID_DYLIB: 208 if (MachOConfig.SharedLibId) 209 updateLoadCommandPayloadString<MachO::dylib_command>( 210 LC, *MachOConfig.SharedLibId); 211 break; 212 213 case MachO::LC_RPATH: { 214 StringRef RPath = getPayloadString(LC); 215 StringRef NewRPath = MachOConfig.RPathsToUpdate.lookup(RPath); 216 if (!NewRPath.empty()) 217 updateLoadCommandPayloadString<MachO::rpath_command>(LC, NewRPath); 218 break; 219 } 220 221 // TODO: Add LC_REEXPORT_DYLIB, LC_LAZY_LOAD_DYLIB, and LC_LOAD_UPWARD_DYLIB 222 // here once llvm-objcopy supports them. 223 case MachO::LC_LOAD_DYLIB: 224 case MachO::LC_LOAD_WEAK_DYLIB: 225 StringRef InstallName = getPayloadString(LC); 226 StringRef NewInstallName = 227 MachOConfig.InstallNamesToUpdate.lookup(InstallName); 228 if (!NewInstallName.empty()) 229 updateLoadCommandPayloadString<MachO::dylib_command>(LC, 230 NewInstallName); 231 break; 232 } 233 } 234 235 // Add new RPaths. 236 for (StringRef RPath : MachOConfig.RPathToAdd) { 237 if (RPaths.contains(RPath)) 238 return createStringError(errc::invalid_argument, 239 "rpath '" + RPath + 240 "' would create a duplicate load command"); 241 RPaths.insert(RPath); 242 Obj.LoadCommands.push_back(buildRPathLoadCommand(RPath)); 243 } 244 245 for (StringRef RPath : MachOConfig.RPathToPrepend) { 246 if (RPaths.contains(RPath)) 247 return createStringError(errc::invalid_argument, 248 "rpath '" + RPath + 249 "' would create a duplicate load command"); 250 251 RPaths.insert(RPath); 252 Obj.LoadCommands.insert(Obj.LoadCommands.begin(), 253 buildRPathLoadCommand(RPath)); 254 } 255 256 // Unlike appending rpaths, the indexes of subsequent load commands must 257 // be recalculated after prepending one. 258 if (!MachOConfig.RPathToPrepend.empty()) 259 Obj.updateLoadCommandIndexes(); 260 261 // Remove any empty segments if required. 262 if (!MachOConfig.EmptySegmentsToRemove.empty()) { 263 auto RemovePred = [&MachOConfig](const LoadCommand &LC) { 264 if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_SEGMENT_64 || 265 LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_SEGMENT) { 266 return LC.Sections.empty() && 267 MachOConfig.EmptySegmentsToRemove.contains(*LC.getSegmentName()); 268 } 269 return false; 270 }; 271 if (Error E = Obj.removeLoadCommands(RemovePred)) 272 return E; 273 } 274 275 return Error::success(); 276 } 277 278 static Error dumpSectionToFile(StringRef SecName, StringRef Filename, 279 Object &Obj) { 280 for (LoadCommand &LC : Obj.LoadCommands) 281 for (const std::unique_ptr<Section> &Sec : LC.Sections) { 282 if (Sec->CanonicalName == SecName) { 283 Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr = 284 FileOutputBuffer::create(Filename, Sec->Content.size()); 285 if (!BufferOrErr) 286 return BufferOrErr.takeError(); 287 std::unique_ptr<FileOutputBuffer> Buf = std::move(*BufferOrErr); 288 llvm::copy(Sec->Content, Buf->getBufferStart()); 289 290 if (Error E = Buf->commit()) 291 return E; 292 return Error::success(); 293 } 294 } 295 296 return createStringError(object_error::parse_failed, "section '%s' not found", 297 SecName.str().c_str()); 298 } 299 300 static Error addSection(const NewSectionInfo &NewSection, Object &Obj) { 301 std::pair<StringRef, StringRef> Pair = NewSection.SectionName.split(','); 302 StringRef TargetSegName = Pair.first; 303 Section Sec(TargetSegName, Pair.second); 304 Sec.Content = 305 Obj.NewSectionsContents.save(NewSection.SectionData->getBuffer()); 306 Sec.Size = Sec.Content.size(); 307 308 // Add the a section into an existing segment. 309 for (LoadCommand &LC : Obj.LoadCommands) { 310 std::optional<StringRef> SegName = LC.getSegmentName(); 311 if (SegName && SegName == TargetSegName) { 312 uint64_t Addr = *LC.getSegmentVMAddr(); 313 for (const std::unique_ptr<Section> &S : LC.Sections) 314 Addr = std::max(Addr, S->Addr + S->Size); 315 LC.Sections.push_back(std::make_unique<Section>(Sec)); 316 LC.Sections.back()->Addr = Addr; 317 return Error::success(); 318 } 319 } 320 321 // There's no segment named TargetSegName. Create a new load command and 322 // Insert a new section into it. 323 LoadCommand &NewSegment = 324 Obj.addSegment(TargetSegName, alignTo(Sec.Size, 16384)); 325 NewSegment.Sections.push_back(std::make_unique<Section>(Sec)); 326 NewSegment.Sections.back()->Addr = *NewSegment.getSegmentVMAddr(); 327 return Error::success(); 328 } 329 330 static Expected<Section &> findSection(StringRef SecName, Object &O) { 331 StringRef SegName; 332 std::tie(SegName, SecName) = SecName.split(","); 333 auto FoundSeg = 334 llvm::find_if(O.LoadCommands, [SegName](const LoadCommand &LC) { 335 return LC.getSegmentName() == SegName; 336 }); 337 if (FoundSeg == O.LoadCommands.end()) 338 return createStringError(errc::invalid_argument, 339 "could not find segment with name '%s'", 340 SegName.str().c_str()); 341 auto FoundSec = llvm::find_if(FoundSeg->Sections, 342 [SecName](const std::unique_ptr<Section> &Sec) { 343 return Sec->Sectname == SecName; 344 }); 345 if (FoundSec == FoundSeg->Sections.end()) 346 return createStringError(errc::invalid_argument, 347 "could not find section with name '%s'", 348 SecName.str().c_str()); 349 350 assert(FoundSec->get()->CanonicalName == (SegName + "," + SecName).str()); 351 return **FoundSec; 352 } 353 354 static Error updateSection(const NewSectionInfo &NewSection, Object &O) { 355 Expected<Section &> SecToUpdateOrErr = findSection(NewSection.SectionName, O); 356 357 if (!SecToUpdateOrErr) 358 return SecToUpdateOrErr.takeError(); 359 Section &Sec = *SecToUpdateOrErr; 360 361 if (NewSection.SectionData->getBufferSize() > Sec.Size) 362 return createStringError( 363 errc::invalid_argument, 364 "new section cannot be larger than previous section"); 365 Sec.Content = O.NewSectionsContents.save(NewSection.SectionData->getBuffer()); 366 Sec.Size = Sec.Content.size(); 367 return Error::success(); 368 } 369 370 // isValidMachOCannonicalName returns success if Name is a MachO cannonical name 371 // ("<segment>,<section>") and lengths of both segment and section names are 372 // valid. 373 static Error isValidMachOCannonicalName(StringRef Name) { 374 if (Name.count(',') != 1) 375 return createStringError(errc::invalid_argument, 376 "invalid section name '%s' (should be formatted " 377 "as '<segment name>,<section name>')", 378 Name.str().c_str()); 379 380 std::pair<StringRef, StringRef> Pair = Name.split(','); 381 if (Pair.first.size() > 16) 382 return createStringError(errc::invalid_argument, 383 "too long segment name: '%s'", 384 Pair.first.str().c_str()); 385 if (Pair.second.size() > 16) 386 return createStringError(errc::invalid_argument, 387 "too long section name: '%s'", 388 Pair.second.str().c_str()); 389 return Error::success(); 390 } 391 392 static Error handleArgs(const CommonConfig &Config, 393 const MachOConfig &MachOConfig, Object &Obj) { 394 // Dump sections before add/remove for compatibility with GNU objcopy. 395 for (StringRef Flag : Config.DumpSection) { 396 StringRef SectionName; 397 StringRef FileName; 398 std::tie(SectionName, FileName) = Flag.split('='); 399 if (Error E = dumpSectionToFile(SectionName, FileName, Obj)) 400 return E; 401 } 402 403 if (Error E = removeSections(Config, Obj)) 404 return E; 405 406 // Mark symbols to determine which symbols are still needed. 407 if (Config.StripAll) 408 markSymbols(Config, Obj); 409 410 updateAndRemoveSymbols(Config, MachOConfig, Obj); 411 412 if (Config.StripAll) 413 for (LoadCommand &LC : Obj.LoadCommands) 414 for (std::unique_ptr<Section> &Sec : LC.Sections) 415 Sec->Relocations.clear(); 416 417 for (const NewSectionInfo &NewSection : Config.AddSection) { 418 if (Error E = isValidMachOCannonicalName(NewSection.SectionName)) 419 return E; 420 if (Error E = addSection(NewSection, Obj)) 421 return E; 422 } 423 424 for (const NewSectionInfo &NewSection : Config.UpdateSection) { 425 if (Error E = isValidMachOCannonicalName(NewSection.SectionName)) 426 return E; 427 if (Error E = updateSection(NewSection, Obj)) 428 return E; 429 } 430 431 if (Error E = processLoadCommands(MachOConfig, Obj)) 432 return E; 433 434 return Error::success(); 435 } 436 437 Error objcopy::macho::executeObjcopyOnBinary(const CommonConfig &Config, 438 const MachOConfig &MachOConfig, 439 object::MachOObjectFile &In, 440 raw_ostream &Out) { 441 MachOReader Reader(In); 442 Expected<std::unique_ptr<Object>> O = Reader.create(); 443 if (!O) 444 return createFileError(Config.InputFilename, O.takeError()); 445 446 if (O->get()->Header.FileType == MachO::HeaderFileType::MH_PRELOAD) 447 return createStringError(std::errc::not_supported, 448 "%s: MH_PRELOAD files are not supported", 449 Config.InputFilename.str().c_str()); 450 451 if (Error E = handleArgs(Config, MachOConfig, **O)) 452 return createFileError(Config.InputFilename, std::move(E)); 453 454 // Page size used for alignment of segment sizes in Mach-O executables and 455 // dynamic libraries. 456 uint64_t PageSize; 457 switch (In.getArch()) { 458 case Triple::ArchType::arm: 459 case Triple::ArchType::aarch64: 460 case Triple::ArchType::aarch64_32: 461 PageSize = 16384; 462 break; 463 default: 464 PageSize = 4096; 465 } 466 467 MachOWriter Writer(**O, In.is64Bit(), In.isLittleEndian(), 468 sys::path::filename(Config.OutputFilename), PageSize, Out); 469 if (auto E = Writer.finalize()) 470 return E; 471 return Writer.write(); 472 } 473 474 Error objcopy::macho::executeObjcopyOnMachOUniversalBinary( 475 const MultiFormatConfig &Config, const MachOUniversalBinary &In, 476 raw_ostream &Out) { 477 SmallVector<OwningBinary<Binary>, 2> Binaries; 478 SmallVector<Slice, 2> Slices; 479 for (const auto &O : In.objects()) { 480 Expected<std::unique_ptr<Archive>> ArOrErr = O.getAsArchive(); 481 if (ArOrErr) { 482 Expected<std::vector<NewArchiveMember>> NewArchiveMembersOrErr = 483 createNewArchiveMembers(Config, **ArOrErr); 484 if (!NewArchiveMembersOrErr) 485 return NewArchiveMembersOrErr.takeError(); 486 auto Kind = (*ArOrErr)->kind(); 487 if (Kind == object::Archive::K_BSD) 488 Kind = object::Archive::K_DARWIN; 489 Expected<std::unique_ptr<MemoryBuffer>> OutputBufferOrErr = 490 writeArchiveToBuffer(*NewArchiveMembersOrErr, 491 (*ArOrErr)->hasSymbolTable(), Kind, 492 Config.getCommonConfig().DeterministicArchives, 493 (*ArOrErr)->isThin()); 494 if (!OutputBufferOrErr) 495 return OutputBufferOrErr.takeError(); 496 Expected<std::unique_ptr<Binary>> BinaryOrErr = 497 object::createBinary(**OutputBufferOrErr); 498 if (!BinaryOrErr) 499 return BinaryOrErr.takeError(); 500 Binaries.emplace_back(std::move(*BinaryOrErr), 501 std::move(*OutputBufferOrErr)); 502 Slices.emplace_back(*cast<Archive>(Binaries.back().getBinary()), 503 O.getCPUType(), O.getCPUSubType(), 504 O.getArchFlagName(), O.getAlign()); 505 continue; 506 } 507 // The methods getAsArchive, getAsObjectFile, getAsIRObject of the class 508 // ObjectForArch return an Error in case of the type mismatch. We need to 509 // check each in turn to see what kind of slice this is, so ignore errors 510 // produced along the way. 511 consumeError(ArOrErr.takeError()); 512 513 Expected<std::unique_ptr<MachOObjectFile>> ObjOrErr = O.getAsObjectFile(); 514 if (!ObjOrErr) { 515 consumeError(ObjOrErr.takeError()); 516 return createStringError( 517 std::errc::invalid_argument, 518 "slice for '%s' of the universal Mach-O binary " 519 "'%s' is not a Mach-O object or an archive", 520 O.getArchFlagName().c_str(), 521 Config.getCommonConfig().InputFilename.str().c_str()); 522 } 523 std::string ArchFlagName = O.getArchFlagName(); 524 525 SmallVector<char, 0> Buffer; 526 raw_svector_ostream MemStream(Buffer); 527 528 Expected<const MachOConfig &> MachO = Config.getMachOConfig(); 529 if (!MachO) 530 return MachO.takeError(); 531 532 if (Error E = executeObjcopyOnBinary(Config.getCommonConfig(), *MachO, 533 **ObjOrErr, MemStream)) 534 return E; 535 536 auto MB = std::make_unique<SmallVectorMemoryBuffer>( 537 std::move(Buffer), ArchFlagName, /*RequiresNullTerminator=*/false); 538 Expected<std::unique_ptr<Binary>> BinaryOrErr = object::createBinary(*MB); 539 if (!BinaryOrErr) 540 return BinaryOrErr.takeError(); 541 Binaries.emplace_back(std::move(*BinaryOrErr), std::move(MB)); 542 Slices.emplace_back(*cast<MachOObjectFile>(Binaries.back().getBinary()), 543 O.getAlign()); 544 } 545 546 if (Error Err = writeUniversalBinaryToStream(Slices, Out)) 547 return Err; 548 549 return Error::success(); 550 } 551