1 //===- lib/MC/MachObjectWriter.cpp - Mach-O File Writer -------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/ADT/DenseMap.h" 10 #include "llvm/ADT/Twine.h" 11 #include "llvm/BinaryFormat/MachO.h" 12 #include "llvm/MC/MCAsmBackend.h" 13 #include "llvm/MC/MCAsmInfoDarwin.h" 14 #include "llvm/MC/MCAssembler.h" 15 #include "llvm/MC/MCContext.h" 16 #include "llvm/MC/MCDirectives.h" 17 #include "llvm/MC/MCExpr.h" 18 #include "llvm/MC/MCMachObjectWriter.h" 19 #include "llvm/MC/MCObjectFileInfo.h" 20 #include "llvm/MC/MCObjectWriter.h" 21 #include "llvm/MC/MCSection.h" 22 #include "llvm/MC/MCSectionMachO.h" 23 #include "llvm/MC/MCSymbol.h" 24 #include "llvm/MC/MCSymbolMachO.h" 25 #include "llvm/MC/MCValue.h" 26 #include "llvm/Support/Alignment.h" 27 #include "llvm/Support/Casting.h" 28 #include "llvm/Support/Debug.h" 29 #include "llvm/Support/ErrorHandling.h" 30 #include "llvm/Support/MathExtras.h" 31 #include "llvm/Support/raw_ostream.h" 32 #include <algorithm> 33 #include <cassert> 34 #include <cstdint> 35 #include <string> 36 #include <utility> 37 #include <vector> 38 39 using namespace llvm; 40 41 #define DEBUG_TYPE "mc" 42 43 void MachObjectWriter::reset() { 44 Relocations.clear(); 45 IndirectSymBase.clear(); 46 IndirectSymbols.clear(); 47 DataRegions.clear(); 48 SectionAddress.clear(); 49 SectionOrder.clear(); 50 StringTable.clear(); 51 LocalSymbolData.clear(); 52 ExternalSymbolData.clear(); 53 UndefinedSymbolData.clear(); 54 LOHContainer.reset(); 55 VersionInfo.Major = 0; 56 VersionInfo.SDKVersion = VersionTuple(); 57 TargetVariantVersionInfo.Major = 0; 58 TargetVariantVersionInfo.SDKVersion = VersionTuple(); 59 LinkerOptions.clear(); 60 MCObjectWriter::reset(); 61 } 62 63 void MachObjectWriter::setAssembler(MCAssembler *Asm) { 64 MCObjectWriter::setAssembler(Asm); 65 TargetObjectWriter->setAssembler(Asm); 66 } 67 68 bool MachObjectWriter::doesSymbolRequireExternRelocation(const MCSymbol &S) { 69 // Undefined symbols are always extern. 70 if (S.isUndefined()) 71 return true; 72 73 // References to weak definitions require external relocation entries; the 74 // definition may not always be the one in the same object file. 75 if (cast<MCSymbolMachO>(S).isWeakDefinition()) 76 return true; 77 78 // Otherwise, we can use an internal relocation. 79 return false; 80 } 81 82 bool MachObjectWriter:: 83 MachSymbolData::operator<(const MachSymbolData &RHS) const { 84 return Symbol->getName() < RHS.Symbol->getName(); 85 } 86 87 uint64_t 88 MachObjectWriter::getFragmentAddress(const MCAssembler &Asm, 89 const MCFragment *Fragment) const { 90 return getSectionAddress(Fragment->getParent()) + 91 Asm.getFragmentOffset(*Fragment); 92 } 93 94 uint64_t MachObjectWriter::getSymbolAddress(const MCSymbol &S) const { 95 // If this is a variable, then recursively evaluate now. 96 if (S.isVariable()) { 97 if (const MCConstantExpr *C = 98 dyn_cast<const MCConstantExpr>(S.getVariableValue())) 99 return C->getValue(); 100 101 MCValue Target; 102 if (!S.getVariableValue()->evaluateAsRelocatable(Target, Asm)) 103 report_fatal_error("unable to evaluate offset for variable '" + 104 S.getName() + "'"); 105 106 // Verify that any used symbols are defined. 107 if (Target.getAddSym() && Target.getAddSym()->isUndefined()) 108 report_fatal_error("unable to evaluate offset to undefined symbol '" + 109 Target.getAddSym()->getName() + "'"); 110 if (Target.getSubSym() && Target.getSubSym()->isUndefined()) 111 report_fatal_error("unable to evaluate offset to undefined symbol '" + 112 Target.getSubSym()->getName() + "'"); 113 114 uint64_t Address = Target.getConstant(); 115 if (Target.getAddSym()) 116 Address += getSymbolAddress(*Target.getAddSym()); 117 if (Target.getSubSym()) 118 Address -= getSymbolAddress(*Target.getSubSym()); 119 return Address; 120 } 121 122 return getSectionAddress(S.getFragment()->getParent()) + 123 Asm->getSymbolOffset(S); 124 } 125 126 uint64_t MachObjectWriter::getPaddingSize(const MCAssembler &Asm, 127 const MCSection *Sec) const { 128 uint64_t EndAddr = getSectionAddress(Sec) + Asm.getSectionAddressSize(*Sec); 129 unsigned Next = cast<MCSectionMachO>(Sec)->getLayoutOrder() + 1; 130 if (Next >= SectionOrder.size()) 131 return 0; 132 133 const MCSection &NextSec = *SectionOrder[Next]; 134 if (NextSec.isVirtualSection()) 135 return 0; 136 return offsetToAlignment(EndAddr, NextSec.getAlign()); 137 } 138 139 static bool isSymbolLinkerVisible(const MCSymbol &Symbol) { 140 // Non-temporary labels should always be visible to the linker. 141 if (!Symbol.isTemporary()) 142 return true; 143 144 if (Symbol.isUsedInReloc()) 145 return true; 146 147 return false; 148 } 149 150 const MCSymbol *MachObjectWriter::getAtom(const MCSymbol &S) const { 151 // Linker visible symbols define atoms. 152 if (isSymbolLinkerVisible(S)) 153 return &S; 154 155 // Absolute and undefined symbols have no defining atom. 156 if (!S.isInSection()) 157 return nullptr; 158 159 // Non-linker visible symbols in sections which can't be atomized have no 160 // defining atom. 161 if (!MCAsmInfoDarwin::isSectionAtomizableBySymbols( 162 *S.getFragment()->getParent())) 163 return nullptr; 164 165 // Otherwise, return the atom for the containing fragment. 166 return S.getFragment()->getAtom(); 167 } 168 169 void MachObjectWriter::writeHeader(MachO::HeaderFileType Type, 170 unsigned NumLoadCommands, 171 unsigned LoadCommandsSize, 172 bool SubsectionsViaSymbols) { 173 uint32_t Flags = 0; 174 175 if (SubsectionsViaSymbols) 176 Flags |= MachO::MH_SUBSECTIONS_VIA_SYMBOLS; 177 178 // struct mach_header (28 bytes) or 179 // struct mach_header_64 (32 bytes) 180 181 uint64_t Start = W.OS.tell(); 182 (void) Start; 183 184 W.write<uint32_t>(is64Bit() ? MachO::MH_MAGIC_64 : MachO::MH_MAGIC); 185 186 W.write<uint32_t>(TargetObjectWriter->getCPUType()); 187 188 uint32_t Cpusubtype = TargetObjectWriter->getCPUSubtype(); 189 190 // Promote arm64e subtypes to always be ptrauth-ABI-versioned, at version 0. 191 // We never need to emit unversioned binaries. 192 // And we don't support arbitrary ABI versions (or the kernel flag) yet. 193 if (TargetObjectWriter->getCPUType() == MachO::CPU_TYPE_ARM64 && 194 Cpusubtype == MachO::CPU_SUBTYPE_ARM64E) 195 Cpusubtype = MachO::CPU_SUBTYPE_ARM64E_WITH_PTRAUTH_VERSION( 196 /*PtrAuthABIVersion=*/0, /*PtrAuthKernelABIVersion=*/false); 197 198 W.write<uint32_t>(Cpusubtype); 199 200 W.write<uint32_t>(Type); 201 W.write<uint32_t>(NumLoadCommands); 202 W.write<uint32_t>(LoadCommandsSize); 203 W.write<uint32_t>(Flags); 204 if (is64Bit()) 205 W.write<uint32_t>(0); // reserved 206 207 assert(W.OS.tell() - Start == (is64Bit() ? sizeof(MachO::mach_header_64) 208 : sizeof(MachO::mach_header))); 209 } 210 211 void MachObjectWriter::writeWithPadding(StringRef Str, uint64_t Size) { 212 assert(Size >= Str.size()); 213 W.OS << Str; 214 W.OS.write_zeros(Size - Str.size()); 215 } 216 217 /// writeSegmentLoadCommand - Write a segment load command. 218 /// 219 /// \param NumSections The number of sections in this segment. 220 /// \param SectionDataSize The total size of the sections. 221 void MachObjectWriter::writeSegmentLoadCommand( 222 StringRef Name, unsigned NumSections, uint64_t VMAddr, uint64_t VMSize, 223 uint64_t SectionDataStartOffset, uint64_t SectionDataSize, uint32_t MaxProt, 224 uint32_t InitProt) { 225 // struct segment_command (56 bytes) or 226 // struct segment_command_64 (72 bytes) 227 228 uint64_t Start = W.OS.tell(); 229 (void) Start; 230 231 unsigned SegmentLoadCommandSize = 232 is64Bit() ? sizeof(MachO::segment_command_64): 233 sizeof(MachO::segment_command); 234 W.write<uint32_t>(is64Bit() ? MachO::LC_SEGMENT_64 : MachO::LC_SEGMENT); 235 W.write<uint32_t>(SegmentLoadCommandSize + 236 NumSections * (is64Bit() ? sizeof(MachO::section_64) : 237 sizeof(MachO::section))); 238 239 writeWithPadding(Name, 16); 240 if (is64Bit()) { 241 W.write<uint64_t>(VMAddr); // vmaddr 242 W.write<uint64_t>(VMSize); // vmsize 243 W.write<uint64_t>(SectionDataStartOffset); // file offset 244 W.write<uint64_t>(SectionDataSize); // file size 245 } else { 246 W.write<uint32_t>(VMAddr); // vmaddr 247 W.write<uint32_t>(VMSize); // vmsize 248 W.write<uint32_t>(SectionDataStartOffset); // file offset 249 W.write<uint32_t>(SectionDataSize); // file size 250 } 251 // maxprot 252 W.write<uint32_t>(MaxProt); 253 // initprot 254 W.write<uint32_t>(InitProt); 255 W.write<uint32_t>(NumSections); 256 W.write<uint32_t>(0); // flags 257 258 assert(W.OS.tell() - Start == SegmentLoadCommandSize); 259 } 260 261 void MachObjectWriter::writeSection(const MCAssembler &Asm, 262 const MCSection &Sec, uint64_t VMAddr, 263 uint64_t FileOffset, unsigned Flags, 264 uint64_t RelocationsStart, 265 unsigned NumRelocations) { 266 uint64_t SectionSize = Asm.getSectionAddressSize(Sec); 267 const MCSectionMachO &Section = cast<MCSectionMachO>(Sec); 268 269 // The offset is unused for virtual sections. 270 if (Section.isVirtualSection()) { 271 assert(Asm.getSectionFileSize(Sec) == 0 && "Invalid file size!"); 272 FileOffset = 0; 273 } 274 275 // struct section (68 bytes) or 276 // struct section_64 (80 bytes) 277 278 uint64_t Start = W.OS.tell(); 279 (void) Start; 280 281 writeWithPadding(Section.getName(), 16); 282 writeWithPadding(Section.getSegmentName(), 16); 283 if (is64Bit()) { 284 W.write<uint64_t>(VMAddr); // address 285 W.write<uint64_t>(SectionSize); // size 286 } else { 287 W.write<uint32_t>(VMAddr); // address 288 W.write<uint32_t>(SectionSize); // size 289 } 290 assert(isUInt<32>(FileOffset) && "Cannot encode offset of section"); 291 W.write<uint32_t>(FileOffset); 292 293 W.write<uint32_t>(Log2(Section.getAlign())); 294 assert((!NumRelocations || isUInt<32>(RelocationsStart)) && 295 "Cannot encode offset of relocations"); 296 W.write<uint32_t>(NumRelocations ? RelocationsStart : 0); 297 W.write<uint32_t>(NumRelocations); 298 W.write<uint32_t>(Flags); 299 W.write<uint32_t>(IndirectSymBase.lookup(&Sec)); // reserved1 300 W.write<uint32_t>(Section.getStubSize()); // reserved2 301 if (is64Bit()) 302 W.write<uint32_t>(0); // reserved3 303 304 assert(W.OS.tell() - Start == 305 (is64Bit() ? sizeof(MachO::section_64) : sizeof(MachO::section))); 306 } 307 308 void MachObjectWriter::writeSymtabLoadCommand(uint32_t SymbolOffset, 309 uint32_t NumSymbols, 310 uint32_t StringTableOffset, 311 uint32_t StringTableSize) { 312 // struct symtab_command (24 bytes) 313 314 uint64_t Start = W.OS.tell(); 315 (void) Start; 316 317 W.write<uint32_t>(MachO::LC_SYMTAB); 318 W.write<uint32_t>(sizeof(MachO::symtab_command)); 319 W.write<uint32_t>(SymbolOffset); 320 W.write<uint32_t>(NumSymbols); 321 W.write<uint32_t>(StringTableOffset); 322 W.write<uint32_t>(StringTableSize); 323 324 assert(W.OS.tell() - Start == sizeof(MachO::symtab_command)); 325 } 326 327 void MachObjectWriter::writeDysymtabLoadCommand(uint32_t FirstLocalSymbol, 328 uint32_t NumLocalSymbols, 329 uint32_t FirstExternalSymbol, 330 uint32_t NumExternalSymbols, 331 uint32_t FirstUndefinedSymbol, 332 uint32_t NumUndefinedSymbols, 333 uint32_t IndirectSymbolOffset, 334 uint32_t NumIndirectSymbols) { 335 // struct dysymtab_command (80 bytes) 336 337 uint64_t Start = W.OS.tell(); 338 (void) Start; 339 340 W.write<uint32_t>(MachO::LC_DYSYMTAB); 341 W.write<uint32_t>(sizeof(MachO::dysymtab_command)); 342 W.write<uint32_t>(FirstLocalSymbol); 343 W.write<uint32_t>(NumLocalSymbols); 344 W.write<uint32_t>(FirstExternalSymbol); 345 W.write<uint32_t>(NumExternalSymbols); 346 W.write<uint32_t>(FirstUndefinedSymbol); 347 W.write<uint32_t>(NumUndefinedSymbols); 348 W.write<uint32_t>(0); // tocoff 349 W.write<uint32_t>(0); // ntoc 350 W.write<uint32_t>(0); // modtaboff 351 W.write<uint32_t>(0); // nmodtab 352 W.write<uint32_t>(0); // extrefsymoff 353 W.write<uint32_t>(0); // nextrefsyms 354 W.write<uint32_t>(IndirectSymbolOffset); 355 W.write<uint32_t>(NumIndirectSymbols); 356 W.write<uint32_t>(0); // extreloff 357 W.write<uint32_t>(0); // nextrel 358 W.write<uint32_t>(0); // locreloff 359 W.write<uint32_t>(0); // nlocrel 360 361 assert(W.OS.tell() - Start == sizeof(MachO::dysymtab_command)); 362 } 363 364 MachObjectWriter::MachSymbolData * 365 MachObjectWriter::findSymbolData(const MCSymbol &Sym) { 366 for (auto *SymbolData : 367 {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData}) 368 for (MachSymbolData &Entry : *SymbolData) 369 if (Entry.Symbol == &Sym) 370 return &Entry; 371 372 return nullptr; 373 } 374 375 const MCSymbol &MachObjectWriter::findAliasedSymbol(const MCSymbol &Sym) const { 376 const MCSymbol *S = &Sym; 377 while (S->isVariable()) { 378 const MCExpr *Value = S->getVariableValue(); 379 const auto *Ref = dyn_cast<MCSymbolRefExpr>(Value); 380 if (!Ref) 381 return *S; 382 S = &Ref->getSymbol(); 383 } 384 return *S; 385 } 386 387 void MachObjectWriter::writeNlist(MachSymbolData &MSD, const MCAssembler &Asm) { 388 const MCSymbol *Symbol = MSD.Symbol; 389 const auto &Data = cast<MCSymbolMachO>(*Symbol); 390 const MCSymbol *AliasedSymbol = &findAliasedSymbol(*Symbol); 391 uint8_t SectionIndex = MSD.SectionIndex; 392 uint8_t Type = 0; 393 uint64_t Address = 0; 394 bool IsAlias = Symbol != AliasedSymbol; 395 396 const MCSymbol &OrigSymbol = *Symbol; 397 MachSymbolData *AliaseeInfo; 398 if (IsAlias) { 399 AliaseeInfo = findSymbolData(*AliasedSymbol); 400 if (AliaseeInfo) 401 SectionIndex = AliaseeInfo->SectionIndex; 402 Symbol = AliasedSymbol; 403 // FIXME: Should this update Data as well? 404 } 405 406 // Set the N_TYPE bits. See <mach-o/nlist.h>. 407 // 408 // FIXME: Are the prebound or indirect fields possible here? 409 if (IsAlias && Symbol->isUndefined()) 410 Type = MachO::N_INDR; 411 else if (Symbol->isUndefined()) 412 Type = MachO::N_UNDF; 413 else if (Symbol->isAbsolute()) 414 Type = MachO::N_ABS; 415 else 416 Type = MachO::N_SECT; 417 418 // FIXME: Set STAB bits. 419 420 if (Data.isPrivateExtern()) 421 Type |= MachO::N_PEXT; 422 423 // Set external bit. 424 if (Data.isExternal() || (!IsAlias && Symbol->isUndefined())) 425 Type |= MachO::N_EXT; 426 427 // Compute the symbol address. 428 if (IsAlias && Symbol->isUndefined()) 429 Address = AliaseeInfo->StringIndex; 430 else if (Symbol->isDefined()) 431 Address = getSymbolAddress(OrigSymbol); 432 else if (Symbol->isCommon()) { 433 // Common symbols are encoded with the size in the address 434 // field, and their alignment in the flags. 435 Address = Symbol->getCommonSize(); 436 } 437 438 // struct nlist (12 bytes) 439 440 W.write<uint32_t>(MSD.StringIndex); 441 W.OS << char(Type); 442 W.OS << char(SectionIndex); 443 444 // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc' 445 // value. 446 bool EncodeAsAltEntry = 447 IsAlias && cast<MCSymbolMachO>(OrigSymbol).isAltEntry(); 448 W.write<uint16_t>(cast<MCSymbolMachO>(Symbol)->getEncodedFlags(EncodeAsAltEntry)); 449 if (is64Bit()) 450 W.write<uint64_t>(Address); 451 else 452 W.write<uint32_t>(Address); 453 } 454 455 void MachObjectWriter::writeLinkeditLoadCommand(uint32_t Type, 456 uint32_t DataOffset, 457 uint32_t DataSize) { 458 uint64_t Start = W.OS.tell(); 459 (void) Start; 460 461 W.write<uint32_t>(Type); 462 W.write<uint32_t>(sizeof(MachO::linkedit_data_command)); 463 W.write<uint32_t>(DataOffset); 464 W.write<uint32_t>(DataSize); 465 466 assert(W.OS.tell() - Start == sizeof(MachO::linkedit_data_command)); 467 } 468 469 static unsigned ComputeLinkerOptionsLoadCommandSize( 470 const std::vector<std::string> &Options, bool is64Bit) 471 { 472 unsigned Size = sizeof(MachO::linker_option_command); 473 for (const std::string &Option : Options) 474 Size += Option.size() + 1; 475 return alignTo(Size, is64Bit ? 8 : 4); 476 } 477 478 void MachObjectWriter::writeLinkerOptionsLoadCommand( 479 const std::vector<std::string> &Options) 480 { 481 unsigned Size = ComputeLinkerOptionsLoadCommandSize(Options, is64Bit()); 482 uint64_t Start = W.OS.tell(); 483 (void) Start; 484 485 W.write<uint32_t>(MachO::LC_LINKER_OPTION); 486 W.write<uint32_t>(Size); 487 W.write<uint32_t>(Options.size()); 488 uint64_t BytesWritten = sizeof(MachO::linker_option_command); 489 for (const std::string &Option : Options) { 490 // Write each string, including the null byte. 491 W.OS << Option << '\0'; 492 BytesWritten += Option.size() + 1; 493 } 494 495 // Pad to a multiple of the pointer size. 496 W.OS.write_zeros( 497 offsetToAlignment(BytesWritten, is64Bit() ? Align(8) : Align(4))); 498 499 assert(W.OS.tell() - Start == Size); 500 } 501 502 static bool isFixupTargetValid(const MCValue &Target) { 503 // Target is (LHS - RHS + cst). 504 // We don't support the form where LHS is null: -RHS + cst 505 if (!Target.getAddSym() && Target.getSubSym()) 506 return false; 507 return true; 508 } 509 510 void MachObjectWriter::recordRelocation(const MCFragment &F, 511 const MCFixup &Fixup, MCValue Target, 512 uint64_t &FixedValue) { 513 if (!isFixupTargetValid(Target)) { 514 getContext().reportError(Fixup.getLoc(), 515 "unsupported relocation expression"); 516 return; 517 } 518 519 TargetObjectWriter->recordRelocation(this, *Asm, &F, Fixup, Target, 520 FixedValue); 521 } 522 523 void MachObjectWriter::bindIndirectSymbols(MCAssembler &Asm) { 524 // This is the point where 'as' creates actual symbols for indirect symbols 525 // (in the following two passes). It would be easier for us to do this sooner 526 // when we see the attribute, but that makes getting the order in the symbol 527 // table much more complicated than it is worth. 528 // 529 // FIXME: Revisit this when the dust settles. 530 531 // Report errors for use of .indirect_symbol not in a symbol pointer section 532 // or stub section. 533 for (IndirectSymbolData &ISD : IndirectSymbols) { 534 const MCSectionMachO &Section = cast<MCSectionMachO>(*ISD.Section); 535 536 if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS && 537 Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS && 538 Section.getType() != MachO::S_THREAD_LOCAL_VARIABLE_POINTERS && 539 Section.getType() != MachO::S_SYMBOL_STUBS) { 540 MCSymbol &Symbol = *ISD.Symbol; 541 report_fatal_error("indirect symbol '" + Symbol.getName() + 542 "' not in a symbol pointer or stub section"); 543 } 544 } 545 546 // Bind non-lazy symbol pointers first. 547 for (auto [IndirectIndex, ISD] : enumerate(IndirectSymbols)) { 548 const auto &Section = cast<MCSectionMachO>(*ISD.Section); 549 550 if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS && 551 Section.getType() != MachO::S_THREAD_LOCAL_VARIABLE_POINTERS) 552 continue; 553 554 // Initialize the section indirect symbol base, if necessary. 555 IndirectSymBase.insert(std::make_pair(ISD.Section, IndirectIndex)); 556 557 Asm.registerSymbol(*ISD.Symbol); 558 } 559 560 // Then lazy symbol pointers and symbol stubs. 561 for (auto [IndirectIndex, ISD] : enumerate(IndirectSymbols)) { 562 const auto &Section = cast<MCSectionMachO>(*ISD.Section); 563 564 if (Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS && 565 Section.getType() != MachO::S_SYMBOL_STUBS) 566 continue; 567 568 // Initialize the section indirect symbol base, if necessary. 569 IndirectSymBase.insert(std::make_pair(ISD.Section, IndirectIndex)); 570 571 // Set the symbol type to undefined lazy, but only on construction. 572 // 573 // FIXME: Do not hardcode. 574 if (Asm.registerSymbol(*ISD.Symbol)) 575 cast<MCSymbolMachO>(ISD.Symbol)->setReferenceTypeUndefinedLazy(true); 576 } 577 } 578 579 /// computeSymbolTable - Compute the symbol table data 580 void MachObjectWriter::computeSymbolTable( 581 MCAssembler &Asm, std::vector<MachSymbolData> &LocalSymbolData, 582 std::vector<MachSymbolData> &ExternalSymbolData, 583 std::vector<MachSymbolData> &UndefinedSymbolData) { 584 // Build section lookup table. 585 DenseMap<const MCSection*, uint8_t> SectionIndexMap; 586 unsigned Index = 1; 587 for (MCSection &Sec : Asm) 588 SectionIndexMap[&Sec] = Index++; 589 assert(Index <= 256 && "Too many sections!"); 590 591 // Build the string table. 592 for (const MCSymbol &Symbol : Asm.symbols()) { 593 if (!cast<MCSymbolMachO>(Symbol).isSymbolLinkerVisible()) 594 continue; 595 596 StringTable.add(Symbol.getName()); 597 } 598 StringTable.finalize(); 599 600 // Build the symbol arrays but only for non-local symbols. 601 // 602 // The particular order that we collect and then sort the symbols is chosen to 603 // match 'as'. Even though it doesn't matter for correctness, this is 604 // important for letting us diff .o files. 605 for (const MCSymbol &Symbol : Asm.symbols()) { 606 // Ignore non-linker visible symbols. 607 if (!cast<MCSymbolMachO>(Symbol).isSymbolLinkerVisible()) 608 continue; 609 610 if (!Symbol.isExternal() && !Symbol.isUndefined()) 611 continue; 612 613 MachSymbolData MSD; 614 MSD.Symbol = &Symbol; 615 MSD.StringIndex = StringTable.getOffset(Symbol.getName()); 616 617 if (Symbol.isUndefined()) { 618 MSD.SectionIndex = 0; 619 UndefinedSymbolData.push_back(MSD); 620 } else if (Symbol.isAbsolute()) { 621 MSD.SectionIndex = 0; 622 ExternalSymbolData.push_back(MSD); 623 } else { 624 MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); 625 assert(MSD.SectionIndex && "Invalid section index!"); 626 ExternalSymbolData.push_back(MSD); 627 } 628 } 629 630 // Now add the data for local symbols. 631 for (const MCSymbol &Symbol : Asm.symbols()) { 632 // Ignore non-linker visible symbols. 633 if (!cast<MCSymbolMachO>(Symbol).isSymbolLinkerVisible()) 634 continue; 635 636 if (Symbol.isExternal() || Symbol.isUndefined()) 637 continue; 638 639 MachSymbolData MSD; 640 MSD.Symbol = &Symbol; 641 MSD.StringIndex = StringTable.getOffset(Symbol.getName()); 642 643 if (Symbol.isAbsolute()) { 644 MSD.SectionIndex = 0; 645 LocalSymbolData.push_back(MSD); 646 } else { 647 MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); 648 assert(MSD.SectionIndex && "Invalid section index!"); 649 LocalSymbolData.push_back(MSD); 650 } 651 } 652 653 // External and undefined symbols are required to be in lexicographic order. 654 llvm::sort(ExternalSymbolData); 655 llvm::sort(UndefinedSymbolData); 656 657 // Set the symbol indices. 658 Index = 0; 659 for (auto *SymbolData : 660 {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData}) 661 for (MachSymbolData &Entry : *SymbolData) 662 Entry.Symbol->setIndex(Index++); 663 664 for (const MCSection &Section : Asm) { 665 for (RelAndSymbol &Rel : Relocations[&Section]) { 666 if (!Rel.Sym) 667 continue; 668 669 // Set the Index and the IsExtern bit. 670 unsigned Index = Rel.Sym->getIndex(); 671 assert(isInt<24>(Index)); 672 if (W.Endian == llvm::endianness::little) 673 Rel.MRE.r_word1 = (Rel.MRE.r_word1 & (~0U << 24)) | Index | (1 << 27); 674 else 675 Rel.MRE.r_word1 = (Rel.MRE.r_word1 & 0xff) | Index << 8 | (1 << 4); 676 } 677 } 678 } 679 680 void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm) { 681 // Assign layout order indices to sections. 682 unsigned i = 0; 683 // Compute the section layout order. Virtual sections must go last. 684 for (MCSection &Sec : Asm) { 685 if (!Sec.isVirtualSection()) { 686 SectionOrder.push_back(&Sec); 687 cast<MCSectionMachO>(Sec).setLayoutOrder(i++); 688 } 689 } 690 for (MCSection &Sec : Asm) { 691 if (Sec.isVirtualSection()) { 692 SectionOrder.push_back(&Sec); 693 cast<MCSectionMachO>(Sec).setLayoutOrder(i++); 694 } 695 } 696 697 uint64_t StartAddress = 0; 698 for (const MCSection *Sec : SectionOrder) { 699 StartAddress = alignTo(StartAddress, Sec->getAlign()); 700 SectionAddress[Sec] = StartAddress; 701 StartAddress += Asm.getSectionAddressSize(*Sec); 702 703 // Explicitly pad the section to match the alignment requirements of the 704 // following one. This is for 'gas' compatibility, it shouldn't 705 /// strictly be necessary. 706 StartAddress += getPaddingSize(Asm, Sec); 707 } 708 } 709 710 void MachObjectWriter::executePostLayoutBinding() { 711 computeSectionAddresses(*Asm); 712 713 // Create symbol data for any indirect symbols. 714 bindIndirectSymbols(*Asm); 715 } 716 717 bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl( 718 const MCSymbol &SymA, const MCFragment &FB, bool InSet, 719 bool IsPCRel) const { 720 if (InSet) 721 return true; 722 723 // The effective address is 724 // addr(atom(A)) + offset(A) 725 // - addr(atom(B)) - offset(B) 726 // and the offsets are not relocatable, so the fixup is fully resolved when 727 // addr(atom(A)) - addr(atom(B)) == 0. 728 const MCSymbol &SA = findAliasedSymbol(SymA); 729 const MCSection &SecA = SA.getSection(); 730 const MCSection &SecB = *FB.getParent(); 731 732 if (IsPCRel) { 733 // The simple (Darwin, except on x86_64) way of dealing with this was to 734 // assume that any reference to a temporary symbol *must* be a temporary 735 // symbol in the same atom, unless the sections differ. Therefore, any PCrel 736 // relocation to a temporary symbol (in the same section) is fully 737 // resolved. This also works in conjunction with absolutized .set, which 738 // requires the compiler to use .set to absolutize the differences between 739 // symbols which the compiler knows to be assembly time constants, so we 740 // don't need to worry about considering symbol differences fully resolved. 741 // 742 // If the file isn't using sub-sections-via-symbols, we can make the 743 // same assumptions about any symbol that we normally make about 744 // assembler locals. 745 746 bool hasReliableSymbolDifference = isX86_64(); 747 if (!hasReliableSymbolDifference) { 748 if (!SA.isInSection() || &SecA != &SecB || 749 (!SA.isTemporary() && FB.getAtom() != SA.getFragment()->getAtom() && 750 SubsectionsViaSymbols)) 751 return false; 752 return true; 753 } 754 } 755 756 // If they are not in the same section, we can't compute the diff. 757 if (&SecA != &SecB) 758 return false; 759 760 // If the atoms are the same, they are guaranteed to have the same address. 761 return SA.getFragment()->getAtom() == FB.getAtom(); 762 } 763 764 static MachO::LoadCommandType getLCFromMCVM(MCVersionMinType Type) { 765 switch (Type) { 766 case MCVM_OSXVersionMin: return MachO::LC_VERSION_MIN_MACOSX; 767 case MCVM_IOSVersionMin: return MachO::LC_VERSION_MIN_IPHONEOS; 768 case MCVM_TvOSVersionMin: return MachO::LC_VERSION_MIN_TVOS; 769 case MCVM_WatchOSVersionMin: return MachO::LC_VERSION_MIN_WATCHOS; 770 } 771 llvm_unreachable("Invalid mc version min type"); 772 } 773 774 void MachObjectWriter::populateAddrSigSection(MCAssembler &Asm) { 775 MCSection *AddrSigSection = 776 getContext().getObjectFileInfo()->getAddrSigSection(); 777 unsigned Log2Size = is64Bit() ? 3 : 2; 778 for (const MCSymbol *S : getAddrsigSyms()) { 779 if (!S->isRegistered()) 780 continue; 781 MachO::any_relocation_info MRE; 782 MRE.r_word0 = 0; 783 MRE.r_word1 = (Log2Size << 25) | (MachO::GENERIC_RELOC_VANILLA << 28); 784 addRelocation(S, AddrSigSection, MRE); 785 } 786 } 787 788 uint64_t MachObjectWriter::writeObject() { 789 auto &Asm = *this->Asm; 790 uint64_t StartOffset = W.OS.tell(); 791 auto NumBytesWritten = [&] { return W.OS.tell() - StartOffset; }; 792 793 populateAddrSigSection(Asm); 794 795 // Compute symbol table information and bind symbol indices. 796 computeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData, 797 UndefinedSymbolData); 798 799 if (!CGProfile.empty()) { 800 MCSection *CGProfileSection = getContext().getMachOSection( 801 "__LLVM", "__cg_profile", 0, SectionKind::getMetadata()); 802 auto &Frag = cast<MCDataFragment>(*CGProfileSection->begin()); 803 Frag.clearContents(); 804 raw_svector_ostream OS(Frag.getContentsForAppending()); 805 for (const MCObjectWriter::CGProfileEntry &CGPE : CGProfile) { 806 uint32_t FromIndex = CGPE.From->getSymbol().getIndex(); 807 uint32_t ToIndex = CGPE.To->getSymbol().getIndex(); 808 support::endian::write(OS, FromIndex, W.Endian); 809 support::endian::write(OS, ToIndex, W.Endian); 810 support::endian::write(OS, CGPE.Count, W.Endian); 811 } 812 Frag.doneAppending(); 813 } 814 815 unsigned NumSections = Asm.end() - Asm.begin(); 816 817 // The section data starts after the header, the segment load command (and 818 // section headers) and the symbol table. 819 unsigned NumLoadCommands = 1; 820 uint64_t LoadCommandsSize = is64Bit() ? 821 sizeof(MachO::segment_command_64) + NumSections * sizeof(MachO::section_64): 822 sizeof(MachO::segment_command) + NumSections * sizeof(MachO::section); 823 824 // Add the deployment target version info load command size, if used. 825 if (VersionInfo.Major != 0) { 826 ++NumLoadCommands; 827 if (VersionInfo.EmitBuildVersion) 828 LoadCommandsSize += sizeof(MachO::build_version_command); 829 else 830 LoadCommandsSize += sizeof(MachO::version_min_command); 831 } 832 833 // Add the target variant version info load command size, if used. 834 if (TargetVariantVersionInfo.Major != 0) { 835 ++NumLoadCommands; 836 assert(TargetVariantVersionInfo.EmitBuildVersion && 837 "target variant should use build version"); 838 LoadCommandsSize += sizeof(MachO::build_version_command); 839 } 840 841 // Add the data-in-code load command size, if used. 842 unsigned NumDataRegions = DataRegions.size(); 843 if (NumDataRegions) { 844 ++NumLoadCommands; 845 LoadCommandsSize += sizeof(MachO::linkedit_data_command); 846 } 847 848 // Add the loh load command size, if used. 849 uint64_t LOHRawSize = LOHContainer.getEmitSize(Asm, *this); 850 uint64_t LOHSize = alignTo(LOHRawSize, is64Bit() ? 8 : 4); 851 if (LOHSize) { 852 ++NumLoadCommands; 853 LoadCommandsSize += sizeof(MachO::linkedit_data_command); 854 } 855 856 // Add the symbol table load command sizes, if used. 857 unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() + 858 UndefinedSymbolData.size(); 859 if (NumSymbols) { 860 NumLoadCommands += 2; 861 LoadCommandsSize += (sizeof(MachO::symtab_command) + 862 sizeof(MachO::dysymtab_command)); 863 } 864 865 // Add the linker option load commands sizes. 866 for (const auto &Option : LinkerOptions) { 867 ++NumLoadCommands; 868 LoadCommandsSize += ComputeLinkerOptionsLoadCommandSize(Option, is64Bit()); 869 } 870 871 // Compute the total size of the section data, as well as its file size and vm 872 // size. 873 uint64_t SectionDataStart = (is64Bit() ? sizeof(MachO::mach_header_64) : 874 sizeof(MachO::mach_header)) + LoadCommandsSize; 875 uint64_t SectionDataSize = 0; 876 uint64_t SectionDataFileSize = 0; 877 uint64_t VMSize = 0; 878 for (const MCSection &Sec : Asm) { 879 uint64_t Address = getSectionAddress(&Sec); 880 uint64_t Size = Asm.getSectionAddressSize(Sec); 881 uint64_t FileSize = Asm.getSectionFileSize(Sec); 882 FileSize += getPaddingSize(Asm, &Sec); 883 884 VMSize = std::max(VMSize, Address + Size); 885 886 if (Sec.isVirtualSection()) 887 continue; 888 889 SectionDataSize = std::max(SectionDataSize, Address + Size); 890 SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize); 891 } 892 893 // The section data is padded to pointer size bytes. 894 // 895 // FIXME: Is this machine dependent? 896 unsigned SectionDataPadding = 897 offsetToAlignment(SectionDataFileSize, is64Bit() ? Align(8) : Align(4)); 898 SectionDataFileSize += SectionDataPadding; 899 900 // Write the prolog, starting with the header and load command... 901 writeHeader(MachO::MH_OBJECT, NumLoadCommands, LoadCommandsSize, 902 SubsectionsViaSymbols); 903 uint32_t Prot = 904 MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE; 905 writeSegmentLoadCommand("", NumSections, 0, VMSize, SectionDataStart, 906 SectionDataSize, Prot, Prot); 907 908 // ... and then the section headers. 909 uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize; 910 for (const MCSection &Section : Asm) { 911 const auto &Sec = cast<MCSectionMachO>(Section); 912 std::vector<RelAndSymbol> &Relocs = Relocations[&Sec]; 913 unsigned NumRelocs = Relocs.size(); 914 uint64_t SectionStart = SectionDataStart + getSectionAddress(&Sec); 915 unsigned Flags = Sec.getTypeAndAttributes(); 916 if (Sec.hasInstructions()) 917 Flags |= MachO::S_ATTR_SOME_INSTRUCTIONS; 918 if (!cast<MCSectionMachO>(Sec).isVirtualSection() && 919 !isUInt<32>(SectionStart)) { 920 getContext().reportError( 921 SMLoc(), "cannot encode offset of section; object file too large"); 922 return NumBytesWritten(); 923 } 924 if (NumRelocs && !isUInt<32>(RelocTableEnd)) { 925 getContext().reportError( 926 SMLoc(), 927 "cannot encode offset of relocations; object file too large"); 928 return NumBytesWritten(); 929 } 930 writeSection(Asm, Sec, getSectionAddress(&Sec), SectionStart, Flags, 931 RelocTableEnd, NumRelocs); 932 RelocTableEnd += NumRelocs * sizeof(MachO::any_relocation_info); 933 } 934 935 // Write out the deployment target information, if it's available. 936 auto EmitDeploymentTargetVersion = 937 [&](const VersionInfoType &VersionInfo) { 938 auto EncodeVersion = [](VersionTuple V) -> uint32_t { 939 assert(!V.empty() && "empty version"); 940 unsigned Update = V.getSubminor().value_or(0); 941 unsigned Minor = V.getMinor().value_or(0); 942 assert(Update < 256 && "unencodable update target version"); 943 assert(Minor < 256 && "unencodable minor target version"); 944 assert(V.getMajor() < 65536 && "unencodable major target version"); 945 return Update | (Minor << 8) | (V.getMajor() << 16); 946 }; 947 uint32_t EncodedVersion = EncodeVersion(VersionTuple( 948 VersionInfo.Major, VersionInfo.Minor, VersionInfo.Update)); 949 uint32_t SDKVersion = !VersionInfo.SDKVersion.empty() 950 ? EncodeVersion(VersionInfo.SDKVersion) 951 : 0; 952 if (VersionInfo.EmitBuildVersion) { 953 // FIXME: Currently empty tools. Add clang version in the future. 954 W.write<uint32_t>(MachO::LC_BUILD_VERSION); 955 W.write<uint32_t>(sizeof(MachO::build_version_command)); 956 W.write<uint32_t>(VersionInfo.TypeOrPlatform.Platform); 957 W.write<uint32_t>(EncodedVersion); 958 W.write<uint32_t>(SDKVersion); 959 W.write<uint32_t>(0); // Empty tools list. 960 } else { 961 MachO::LoadCommandType LCType = 962 getLCFromMCVM(VersionInfo.TypeOrPlatform.Type); 963 W.write<uint32_t>(LCType); 964 W.write<uint32_t>(sizeof(MachO::version_min_command)); 965 W.write<uint32_t>(EncodedVersion); 966 W.write<uint32_t>(SDKVersion); 967 } 968 }; 969 if (VersionInfo.Major != 0) 970 EmitDeploymentTargetVersion(VersionInfo); 971 if (TargetVariantVersionInfo.Major != 0) 972 EmitDeploymentTargetVersion(TargetVariantVersionInfo); 973 974 // Write the data-in-code load command, if used. 975 uint64_t DataInCodeTableEnd = RelocTableEnd + NumDataRegions * 8; 976 if (NumDataRegions) { 977 uint64_t DataRegionsOffset = RelocTableEnd; 978 uint64_t DataRegionsSize = NumDataRegions * 8; 979 writeLinkeditLoadCommand(MachO::LC_DATA_IN_CODE, DataRegionsOffset, 980 DataRegionsSize); 981 } 982 983 // Write the loh load command, if used. 984 uint64_t LOHTableEnd = DataInCodeTableEnd + LOHSize; 985 if (LOHSize) 986 writeLinkeditLoadCommand(MachO::LC_LINKER_OPTIMIZATION_HINT, 987 DataInCodeTableEnd, LOHSize); 988 989 // Write the symbol table load command, if used. 990 if (NumSymbols) { 991 unsigned FirstLocalSymbol = 0; 992 unsigned NumLocalSymbols = LocalSymbolData.size(); 993 unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols; 994 unsigned NumExternalSymbols = ExternalSymbolData.size(); 995 unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols; 996 unsigned NumUndefinedSymbols = UndefinedSymbolData.size(); 997 unsigned NumIndirectSymbols = IndirectSymbols.size(); 998 unsigned NumSymTabSymbols = 999 NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols; 1000 uint64_t IndirectSymbolSize = NumIndirectSymbols * 4; 1001 uint64_t IndirectSymbolOffset = 0; 1002 1003 // If used, the indirect symbols are written after the section data. 1004 if (NumIndirectSymbols) 1005 IndirectSymbolOffset = LOHTableEnd; 1006 1007 // The symbol table is written after the indirect symbol data. 1008 uint64_t SymbolTableOffset = LOHTableEnd + IndirectSymbolSize; 1009 1010 // The string table is written after symbol table. 1011 uint64_t StringTableOffset = 1012 SymbolTableOffset + NumSymTabSymbols * (is64Bit() ? 1013 sizeof(MachO::nlist_64) : 1014 sizeof(MachO::nlist)); 1015 writeSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols, 1016 StringTableOffset, StringTable.getSize()); 1017 1018 writeDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols, 1019 FirstExternalSymbol, NumExternalSymbols, 1020 FirstUndefinedSymbol, NumUndefinedSymbols, 1021 IndirectSymbolOffset, NumIndirectSymbols); 1022 } 1023 1024 // Write the linker options load commands. 1025 for (const auto &Option : LinkerOptions) 1026 writeLinkerOptionsLoadCommand(Option); 1027 1028 // Write the actual section data. 1029 for (const MCSection &Sec : Asm) { 1030 Asm.writeSectionData(W.OS, &Sec); 1031 1032 uint64_t Pad = getPaddingSize(Asm, &Sec); 1033 W.OS.write_zeros(Pad); 1034 } 1035 1036 // Write the extra padding. 1037 W.OS.write_zeros(SectionDataPadding); 1038 1039 // Write the relocation entries. 1040 for (const MCSection &Sec : Asm) { 1041 // Write the section relocation entries, in reverse order to match 'as' 1042 // (approximately, the exact algorithm is more complicated than this). 1043 std::vector<RelAndSymbol> &Relocs = Relocations[&Sec]; 1044 for (const RelAndSymbol &Rel : llvm::reverse(Relocs)) { 1045 W.write<uint32_t>(Rel.MRE.r_word0); 1046 W.write<uint32_t>(Rel.MRE.r_word1); 1047 } 1048 } 1049 1050 // Write out the data-in-code region payload, if there is one. 1051 for (DataRegionData Data : DataRegions) { 1052 uint64_t Start = getSymbolAddress(*Data.Start); 1053 uint64_t End; 1054 if (Data.End) 1055 End = getSymbolAddress(*Data.End); 1056 else 1057 report_fatal_error("Data region not terminated"); 1058 1059 LLVM_DEBUG(dbgs() << "data in code region-- kind: " << Data.Kind 1060 << " start: " << Start << "(" << Data.Start->getName() 1061 << ")" << " end: " << End << "(" << Data.End->getName() 1062 << ")" << " size: " << End - Start << "\n"); 1063 W.write<uint32_t>(Start); 1064 W.write<uint16_t>(End - Start); 1065 W.write<uint16_t>(Data.Kind); 1066 } 1067 1068 // Write out the loh commands, if there is one. 1069 if (LOHSize) { 1070 #ifndef NDEBUG 1071 unsigned Start = W.OS.tell(); 1072 #endif 1073 LOHContainer.emit(Asm, *this); 1074 // Pad to a multiple of the pointer size. 1075 W.OS.write_zeros( 1076 offsetToAlignment(LOHRawSize, is64Bit() ? Align(8) : Align(4))); 1077 assert(W.OS.tell() - Start == LOHSize); 1078 } 1079 1080 // Write the symbol table data, if used. 1081 if (NumSymbols) { 1082 // Write the indirect symbol entries. 1083 for (auto &ISD : IndirectSymbols) { 1084 // Indirect symbols in the non-lazy symbol pointer section have some 1085 // special handling. 1086 const MCSectionMachO &Section = 1087 static_cast<const MCSectionMachO &>(*ISD.Section); 1088 if (Section.getType() == MachO::S_NON_LAZY_SYMBOL_POINTERS) { 1089 // If this symbol is defined and internal, mark it as such. 1090 if (ISD.Symbol->isDefined() && !ISD.Symbol->isExternal()) { 1091 uint32_t Flags = MachO::INDIRECT_SYMBOL_LOCAL; 1092 if (ISD.Symbol->isAbsolute()) 1093 Flags |= MachO::INDIRECT_SYMBOL_ABS; 1094 W.write<uint32_t>(Flags); 1095 continue; 1096 } 1097 } 1098 1099 W.write<uint32_t>(ISD.Symbol->getIndex()); 1100 } 1101 1102 // FIXME: Check that offsets match computed ones. 1103 1104 // Write the symbol table entries. 1105 for (auto *SymbolData : 1106 {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData}) 1107 for (MachSymbolData &Entry : *SymbolData) 1108 writeNlist(Entry, Asm); 1109 1110 // Write the string table. 1111 StringTable.write(W.OS); 1112 } 1113 1114 return NumBytesWritten(); 1115 } 1116