1 //===- lib/MC/MachObjectWriter.cpp - Mach-O File Writer -------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/ADT/DenseMap.h" 10 #include "llvm/ADT/Twine.h" 11 #include "llvm/ADT/iterator_range.h" 12 #include "llvm/BinaryFormat/MachO.h" 13 #include "llvm/MC/MCAsmBackend.h" 14 #include "llvm/MC/MCAsmLayout.h" 15 #include "llvm/MC/MCAssembler.h" 16 #include "llvm/MC/MCContext.h" 17 #include "llvm/MC/MCDirectives.h" 18 #include "llvm/MC/MCExpr.h" 19 #include "llvm/MC/MCFixupKindInfo.h" 20 #include "llvm/MC/MCFragment.h" 21 #include "llvm/MC/MCMachObjectWriter.h" 22 #include "llvm/MC/MCObjectFileInfo.h" 23 #include "llvm/MC/MCObjectWriter.h" 24 #include "llvm/MC/MCSection.h" 25 #include "llvm/MC/MCSectionMachO.h" 26 #include "llvm/MC/MCSymbol.h" 27 #include "llvm/MC/MCSymbolMachO.h" 28 #include "llvm/MC/MCValue.h" 29 #include "llvm/Support/Alignment.h" 30 #include "llvm/Support/Casting.h" 31 #include "llvm/Support/Debug.h" 32 #include "llvm/Support/ErrorHandling.h" 33 #include "llvm/Support/LEB128.h" 34 #include "llvm/Support/MathExtras.h" 35 #include "llvm/Support/raw_ostream.h" 36 #include <algorithm> 37 #include <cassert> 38 #include <cstdint> 39 #include <string> 40 #include <utility> 41 #include <vector> 42 43 using namespace llvm; 44 45 #define DEBUG_TYPE "mc" 46 47 void MachObjectWriter::reset() { 48 Relocations.clear(); 49 IndirectSymBase.clear(); 50 StringTable.clear(); 51 LocalSymbolData.clear(); 52 ExternalSymbolData.clear(); 53 UndefinedSymbolData.clear(); 54 MCObjectWriter::reset(); 55 } 56 57 bool MachObjectWriter::doesSymbolRequireExternRelocation(const MCSymbol &S) { 58 // Undefined symbols are always extern. 59 if (S.isUndefined()) 60 return true; 61 62 // References to weak definitions require external relocation entries; the 63 // definition may not always be the one in the same object file. 64 if (cast<MCSymbolMachO>(S).isWeakDefinition()) 65 return true; 66 67 // Otherwise, we can use an internal relocation. 68 return false; 69 } 70 71 bool MachObjectWriter:: 72 MachSymbolData::operator<(const MachSymbolData &RHS) const { 73 return Symbol->getName() < RHS.Symbol->getName(); 74 } 75 76 bool MachObjectWriter::isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) { 77 const MCFixupKindInfo &FKI = Asm.getBackend().getFixupKindInfo( 78 (MCFixupKind) Kind); 79 80 return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel; 81 } 82 83 uint64_t MachObjectWriter::getFragmentAddress(const MCFragment *Fragment, 84 const MCAsmLayout &Layout) const { 85 return getSectionAddress(Fragment->getParent()) + 86 Layout.getFragmentOffset(Fragment); 87 } 88 89 uint64_t MachObjectWriter::getSymbolAddress(const MCSymbol &S, 90 const MCAsmLayout &Layout) const { 91 // If this is a variable, then recursively evaluate now. 92 if (S.isVariable()) { 93 if (const MCConstantExpr *C = 94 dyn_cast<const MCConstantExpr>(S.getVariableValue())) 95 return C->getValue(); 96 97 MCValue Target; 98 if (!S.getVariableValue()->evaluateAsRelocatable(Target, &Layout, nullptr)) 99 report_fatal_error("unable to evaluate offset for variable '" + 100 S.getName() + "'"); 101 102 // Verify that any used symbols are defined. 103 if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined()) 104 report_fatal_error("unable to evaluate offset to undefined symbol '" + 105 Target.getSymA()->getSymbol().getName() + "'"); 106 if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined()) 107 report_fatal_error("unable to evaluate offset to undefined symbol '" + 108 Target.getSymB()->getSymbol().getName() + "'"); 109 110 uint64_t Address = Target.getConstant(); 111 if (Target.getSymA()) 112 Address += getSymbolAddress(Target.getSymA()->getSymbol(), Layout); 113 if (Target.getSymB()) 114 Address += getSymbolAddress(Target.getSymB()->getSymbol(), Layout); 115 return Address; 116 } 117 118 return getSectionAddress(S.getFragment()->getParent()) + 119 Layout.getSymbolOffset(S); 120 } 121 122 uint64_t MachObjectWriter::getPaddingSize(const MCSection *Sec, 123 const MCAsmLayout &Layout) const { 124 uint64_t EndAddr = getSectionAddress(Sec) + Layout.getSectionAddressSize(Sec); 125 unsigned Next = Sec->getLayoutOrder() + 1; 126 if (Next >= Layout.getSectionOrder().size()) 127 return 0; 128 129 const MCSection &NextSec = *Layout.getSectionOrder()[Next]; 130 if (NextSec.isVirtualSection()) 131 return 0; 132 return offsetToAlignment(EndAddr, Align(NextSec.getAlignment())); 133 } 134 135 void MachObjectWriter::writeHeader(MachO::HeaderFileType Type, 136 unsigned NumLoadCommands, 137 unsigned LoadCommandsSize, 138 bool SubsectionsViaSymbols) { 139 uint32_t Flags = 0; 140 141 if (SubsectionsViaSymbols) 142 Flags |= MachO::MH_SUBSECTIONS_VIA_SYMBOLS; 143 144 // struct mach_header (28 bytes) or 145 // struct mach_header_64 (32 bytes) 146 147 uint64_t Start = W.OS.tell(); 148 (void) Start; 149 150 W.write<uint32_t>(is64Bit() ? MachO::MH_MAGIC_64 : MachO::MH_MAGIC); 151 152 W.write<uint32_t>(TargetObjectWriter->getCPUType()); 153 W.write<uint32_t>(TargetObjectWriter->getCPUSubtype()); 154 155 W.write<uint32_t>(Type); 156 W.write<uint32_t>(NumLoadCommands); 157 W.write<uint32_t>(LoadCommandsSize); 158 W.write<uint32_t>(Flags); 159 if (is64Bit()) 160 W.write<uint32_t>(0); // reserved 161 162 assert(W.OS.tell() - Start == (is64Bit() ? sizeof(MachO::mach_header_64) 163 : sizeof(MachO::mach_header))); 164 } 165 166 void MachObjectWriter::writeWithPadding(StringRef Str, uint64_t Size) { 167 assert(Size >= Str.size()); 168 W.OS << Str; 169 W.OS.write_zeros(Size - Str.size()); 170 } 171 172 /// writeSegmentLoadCommand - Write a segment load command. 173 /// 174 /// \param NumSections The number of sections in this segment. 175 /// \param SectionDataSize The total size of the sections. 176 void MachObjectWriter::writeSegmentLoadCommand( 177 StringRef Name, unsigned NumSections, uint64_t VMAddr, uint64_t VMSize, 178 uint64_t SectionDataStartOffset, uint64_t SectionDataSize, uint32_t MaxProt, 179 uint32_t InitProt) { 180 // struct segment_command (56 bytes) or 181 // struct segment_command_64 (72 bytes) 182 183 uint64_t Start = W.OS.tell(); 184 (void) Start; 185 186 unsigned SegmentLoadCommandSize = 187 is64Bit() ? sizeof(MachO::segment_command_64): 188 sizeof(MachO::segment_command); 189 W.write<uint32_t>(is64Bit() ? MachO::LC_SEGMENT_64 : MachO::LC_SEGMENT); 190 W.write<uint32_t>(SegmentLoadCommandSize + 191 NumSections * (is64Bit() ? sizeof(MachO::section_64) : 192 sizeof(MachO::section))); 193 194 writeWithPadding(Name, 16); 195 if (is64Bit()) { 196 W.write<uint64_t>(VMAddr); // vmaddr 197 W.write<uint64_t>(VMSize); // vmsize 198 W.write<uint64_t>(SectionDataStartOffset); // file offset 199 W.write<uint64_t>(SectionDataSize); // file size 200 } else { 201 W.write<uint32_t>(VMAddr); // vmaddr 202 W.write<uint32_t>(VMSize); // vmsize 203 W.write<uint32_t>(SectionDataStartOffset); // file offset 204 W.write<uint32_t>(SectionDataSize); // file size 205 } 206 // maxprot 207 W.write<uint32_t>(MaxProt); 208 // initprot 209 W.write<uint32_t>(InitProt); 210 W.write<uint32_t>(NumSections); 211 W.write<uint32_t>(0); // flags 212 213 assert(W.OS.tell() - Start == SegmentLoadCommandSize); 214 } 215 216 void MachObjectWriter::writeSection(const MCAsmLayout &Layout, 217 const MCSection &Sec, uint64_t VMAddr, 218 uint64_t FileOffset, unsigned Flags, 219 uint64_t RelocationsStart, 220 unsigned NumRelocations) { 221 uint64_t SectionSize = Layout.getSectionAddressSize(&Sec); 222 const MCSectionMachO &Section = cast<MCSectionMachO>(Sec); 223 224 // The offset is unused for virtual sections. 225 if (Section.isVirtualSection()) { 226 assert(Layout.getSectionFileSize(&Sec) == 0 && "Invalid file size!"); 227 FileOffset = 0; 228 } 229 230 // struct section (68 bytes) or 231 // struct section_64 (80 bytes) 232 233 uint64_t Start = W.OS.tell(); 234 (void) Start; 235 236 writeWithPadding(Section.getName(), 16); 237 writeWithPadding(Section.getSegmentName(), 16); 238 if (is64Bit()) { 239 W.write<uint64_t>(VMAddr); // address 240 W.write<uint64_t>(SectionSize); // size 241 } else { 242 W.write<uint32_t>(VMAddr); // address 243 W.write<uint32_t>(SectionSize); // size 244 } 245 W.write<uint32_t>(FileOffset); 246 247 assert(isPowerOf2_32(Section.getAlignment()) && "Invalid alignment!"); 248 W.write<uint32_t>(Log2_32(Section.getAlignment())); 249 W.write<uint32_t>(NumRelocations ? RelocationsStart : 0); 250 W.write<uint32_t>(NumRelocations); 251 W.write<uint32_t>(Flags); 252 W.write<uint32_t>(IndirectSymBase.lookup(&Sec)); // reserved1 253 W.write<uint32_t>(Section.getStubSize()); // reserved2 254 if (is64Bit()) 255 W.write<uint32_t>(0); // reserved3 256 257 assert(W.OS.tell() - Start == 258 (is64Bit() ? sizeof(MachO::section_64) : sizeof(MachO::section))); 259 } 260 261 void MachObjectWriter::writeSymtabLoadCommand(uint32_t SymbolOffset, 262 uint32_t NumSymbols, 263 uint32_t StringTableOffset, 264 uint32_t StringTableSize) { 265 // struct symtab_command (24 bytes) 266 267 uint64_t Start = W.OS.tell(); 268 (void) Start; 269 270 W.write<uint32_t>(MachO::LC_SYMTAB); 271 W.write<uint32_t>(sizeof(MachO::symtab_command)); 272 W.write<uint32_t>(SymbolOffset); 273 W.write<uint32_t>(NumSymbols); 274 W.write<uint32_t>(StringTableOffset); 275 W.write<uint32_t>(StringTableSize); 276 277 assert(W.OS.tell() - Start == sizeof(MachO::symtab_command)); 278 } 279 280 void MachObjectWriter::writeDysymtabLoadCommand(uint32_t FirstLocalSymbol, 281 uint32_t NumLocalSymbols, 282 uint32_t FirstExternalSymbol, 283 uint32_t NumExternalSymbols, 284 uint32_t FirstUndefinedSymbol, 285 uint32_t NumUndefinedSymbols, 286 uint32_t IndirectSymbolOffset, 287 uint32_t NumIndirectSymbols) { 288 // struct dysymtab_command (80 bytes) 289 290 uint64_t Start = W.OS.tell(); 291 (void) Start; 292 293 W.write<uint32_t>(MachO::LC_DYSYMTAB); 294 W.write<uint32_t>(sizeof(MachO::dysymtab_command)); 295 W.write<uint32_t>(FirstLocalSymbol); 296 W.write<uint32_t>(NumLocalSymbols); 297 W.write<uint32_t>(FirstExternalSymbol); 298 W.write<uint32_t>(NumExternalSymbols); 299 W.write<uint32_t>(FirstUndefinedSymbol); 300 W.write<uint32_t>(NumUndefinedSymbols); 301 W.write<uint32_t>(0); // tocoff 302 W.write<uint32_t>(0); // ntoc 303 W.write<uint32_t>(0); // modtaboff 304 W.write<uint32_t>(0); // nmodtab 305 W.write<uint32_t>(0); // extrefsymoff 306 W.write<uint32_t>(0); // nextrefsyms 307 W.write<uint32_t>(IndirectSymbolOffset); 308 W.write<uint32_t>(NumIndirectSymbols); 309 W.write<uint32_t>(0); // extreloff 310 W.write<uint32_t>(0); // nextrel 311 W.write<uint32_t>(0); // locreloff 312 W.write<uint32_t>(0); // nlocrel 313 314 assert(W.OS.tell() - Start == sizeof(MachO::dysymtab_command)); 315 } 316 317 MachObjectWriter::MachSymbolData * 318 MachObjectWriter::findSymbolData(const MCSymbol &Sym) { 319 for (auto *SymbolData : 320 {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData}) 321 for (MachSymbolData &Entry : *SymbolData) 322 if (Entry.Symbol == &Sym) 323 return &Entry; 324 325 return nullptr; 326 } 327 328 const MCSymbol &MachObjectWriter::findAliasedSymbol(const MCSymbol &Sym) const { 329 const MCSymbol *S = &Sym; 330 while (S->isVariable()) { 331 const MCExpr *Value = S->getVariableValue(); 332 const auto *Ref = dyn_cast<MCSymbolRefExpr>(Value); 333 if (!Ref) 334 return *S; 335 S = &Ref->getSymbol(); 336 } 337 return *S; 338 } 339 340 void MachObjectWriter::writeNlist(MachSymbolData &MSD, 341 const MCAsmLayout &Layout) { 342 const MCSymbol *Symbol = MSD.Symbol; 343 const MCSymbol &Data = *Symbol; 344 const MCSymbol *AliasedSymbol = &findAliasedSymbol(*Symbol); 345 uint8_t SectionIndex = MSD.SectionIndex; 346 uint8_t Type = 0; 347 uint64_t Address = 0; 348 bool IsAlias = Symbol != AliasedSymbol; 349 350 const MCSymbol &OrigSymbol = *Symbol; 351 MachSymbolData *AliaseeInfo; 352 if (IsAlias) { 353 AliaseeInfo = findSymbolData(*AliasedSymbol); 354 if (AliaseeInfo) 355 SectionIndex = AliaseeInfo->SectionIndex; 356 Symbol = AliasedSymbol; 357 // FIXME: Should this update Data as well? 358 } 359 360 // Set the N_TYPE bits. See <mach-o/nlist.h>. 361 // 362 // FIXME: Are the prebound or indirect fields possible here? 363 if (IsAlias && Symbol->isUndefined()) 364 Type = MachO::N_INDR; 365 else if (Symbol->isUndefined()) 366 Type = MachO::N_UNDF; 367 else if (Symbol->isAbsolute()) 368 Type = MachO::N_ABS; 369 else 370 Type = MachO::N_SECT; 371 372 // FIXME: Set STAB bits. 373 374 if (Data.isPrivateExtern()) 375 Type |= MachO::N_PEXT; 376 377 // Set external bit. 378 if (Data.isExternal() || (!IsAlias && Symbol->isUndefined())) 379 Type |= MachO::N_EXT; 380 381 // Compute the symbol address. 382 if (IsAlias && Symbol->isUndefined()) 383 Address = AliaseeInfo->StringIndex; 384 else if (Symbol->isDefined()) 385 Address = getSymbolAddress(OrigSymbol, Layout); 386 else if (Symbol->isCommon()) { 387 // Common symbols are encoded with the size in the address 388 // field, and their alignment in the flags. 389 Address = Symbol->getCommonSize(); 390 } 391 392 // struct nlist (12 bytes) 393 394 W.write<uint32_t>(MSD.StringIndex); 395 W.OS << char(Type); 396 W.OS << char(SectionIndex); 397 398 // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc' 399 // value. 400 bool EncodeAsAltEntry = 401 IsAlias && cast<MCSymbolMachO>(OrigSymbol).isAltEntry(); 402 W.write<uint16_t>(cast<MCSymbolMachO>(Symbol)->getEncodedFlags(EncodeAsAltEntry)); 403 if (is64Bit()) 404 W.write<uint64_t>(Address); 405 else 406 W.write<uint32_t>(Address); 407 } 408 409 void MachObjectWriter::writeLinkeditLoadCommand(uint32_t Type, 410 uint32_t DataOffset, 411 uint32_t DataSize) { 412 uint64_t Start = W.OS.tell(); 413 (void) Start; 414 415 W.write<uint32_t>(Type); 416 W.write<uint32_t>(sizeof(MachO::linkedit_data_command)); 417 W.write<uint32_t>(DataOffset); 418 W.write<uint32_t>(DataSize); 419 420 assert(W.OS.tell() - Start == sizeof(MachO::linkedit_data_command)); 421 } 422 423 static unsigned ComputeLinkerOptionsLoadCommandSize( 424 const std::vector<std::string> &Options, bool is64Bit) 425 { 426 unsigned Size = sizeof(MachO::linker_option_command); 427 for (const std::string &Option : Options) 428 Size += Option.size() + 1; 429 return alignTo(Size, is64Bit ? 8 : 4); 430 } 431 432 void MachObjectWriter::writeLinkerOptionsLoadCommand( 433 const std::vector<std::string> &Options) 434 { 435 unsigned Size = ComputeLinkerOptionsLoadCommandSize(Options, is64Bit()); 436 uint64_t Start = W.OS.tell(); 437 (void) Start; 438 439 W.write<uint32_t>(MachO::LC_LINKER_OPTION); 440 W.write<uint32_t>(Size); 441 W.write<uint32_t>(Options.size()); 442 uint64_t BytesWritten = sizeof(MachO::linker_option_command); 443 for (const std::string &Option : Options) { 444 // Write each string, including the null byte. 445 W.OS << Option << '\0'; 446 BytesWritten += Option.size() + 1; 447 } 448 449 // Pad to a multiple of the pointer size. 450 W.OS.write_zeros( 451 offsetToAlignment(BytesWritten, is64Bit() ? Align(8) : Align(4))); 452 453 assert(W.OS.tell() - Start == Size); 454 } 455 456 static bool isFixupTargetValid(const MCValue &Target) { 457 // Target is (LHS - RHS + cst). 458 // We don't support the form where LHS is null: -RHS + cst 459 if (!Target.getSymA() && Target.getSymB()) 460 return false; 461 return true; 462 } 463 464 void MachObjectWriter::recordRelocation(MCAssembler &Asm, 465 const MCAsmLayout &Layout, 466 const MCFragment *Fragment, 467 const MCFixup &Fixup, MCValue Target, 468 uint64_t &FixedValue) { 469 if (!isFixupTargetValid(Target)) { 470 Asm.getContext().reportError(Fixup.getLoc(), 471 "unsupported relocation expression"); 472 return; 473 } 474 475 TargetObjectWriter->recordRelocation(this, Asm, Layout, Fragment, Fixup, 476 Target, FixedValue); 477 } 478 479 void MachObjectWriter::bindIndirectSymbols(MCAssembler &Asm) { 480 // This is the point where 'as' creates actual symbols for indirect symbols 481 // (in the following two passes). It would be easier for us to do this sooner 482 // when we see the attribute, but that makes getting the order in the symbol 483 // table much more complicated than it is worth. 484 // 485 // FIXME: Revisit this when the dust settles. 486 487 // Report errors for use of .indirect_symbol not in a symbol pointer section 488 // or stub section. 489 for (IndirectSymbolData &ISD : llvm::make_range(Asm.indirect_symbol_begin(), 490 Asm.indirect_symbol_end())) { 491 const MCSectionMachO &Section = cast<MCSectionMachO>(*ISD.Section); 492 493 if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS && 494 Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS && 495 Section.getType() != MachO::S_THREAD_LOCAL_VARIABLE_POINTERS && 496 Section.getType() != MachO::S_SYMBOL_STUBS) { 497 MCSymbol &Symbol = *ISD.Symbol; 498 report_fatal_error("indirect symbol '" + Symbol.getName() + 499 "' not in a symbol pointer or stub section"); 500 } 501 } 502 503 // Bind non-lazy symbol pointers first. 504 unsigned IndirectIndex = 0; 505 for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), 506 ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) { 507 const MCSectionMachO &Section = cast<MCSectionMachO>(*it->Section); 508 509 if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS && 510 Section.getType() != MachO::S_THREAD_LOCAL_VARIABLE_POINTERS) 511 continue; 512 513 // Initialize the section indirect symbol base, if necessary. 514 IndirectSymBase.insert(std::make_pair(it->Section, IndirectIndex)); 515 516 Asm.registerSymbol(*it->Symbol); 517 } 518 519 // Then lazy symbol pointers and symbol stubs. 520 IndirectIndex = 0; 521 for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), 522 ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) { 523 const MCSectionMachO &Section = cast<MCSectionMachO>(*it->Section); 524 525 if (Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS && 526 Section.getType() != MachO::S_SYMBOL_STUBS) 527 continue; 528 529 // Initialize the section indirect symbol base, if necessary. 530 IndirectSymBase.insert(std::make_pair(it->Section, IndirectIndex)); 531 532 // Set the symbol type to undefined lazy, but only on construction. 533 // 534 // FIXME: Do not hardcode. 535 bool Created; 536 Asm.registerSymbol(*it->Symbol, &Created); 537 if (Created) 538 cast<MCSymbolMachO>(it->Symbol)->setReferenceTypeUndefinedLazy(true); 539 } 540 } 541 542 /// computeSymbolTable - Compute the symbol table data 543 void MachObjectWriter::computeSymbolTable( 544 MCAssembler &Asm, std::vector<MachSymbolData> &LocalSymbolData, 545 std::vector<MachSymbolData> &ExternalSymbolData, 546 std::vector<MachSymbolData> &UndefinedSymbolData) { 547 // Build section lookup table. 548 DenseMap<const MCSection*, uint8_t> SectionIndexMap; 549 unsigned Index = 1; 550 for (MCAssembler::iterator it = Asm.begin(), 551 ie = Asm.end(); it != ie; ++it, ++Index) 552 SectionIndexMap[&*it] = Index; 553 assert(Index <= 256 && "Too many sections!"); 554 555 // Build the string table. 556 for (const MCSymbol &Symbol : Asm.symbols()) { 557 if (!Asm.isSymbolLinkerVisible(Symbol)) 558 continue; 559 560 StringTable.add(Symbol.getName()); 561 } 562 StringTable.finalize(); 563 564 // Build the symbol arrays but only for non-local symbols. 565 // 566 // The particular order that we collect and then sort the symbols is chosen to 567 // match 'as'. Even though it doesn't matter for correctness, this is 568 // important for letting us diff .o files. 569 for (const MCSymbol &Symbol : Asm.symbols()) { 570 // Ignore non-linker visible symbols. 571 if (!Asm.isSymbolLinkerVisible(Symbol)) 572 continue; 573 574 if (!Symbol.isExternal() && !Symbol.isUndefined()) 575 continue; 576 577 MachSymbolData MSD; 578 MSD.Symbol = &Symbol; 579 MSD.StringIndex = StringTable.getOffset(Symbol.getName()); 580 581 if (Symbol.isUndefined()) { 582 MSD.SectionIndex = 0; 583 UndefinedSymbolData.push_back(MSD); 584 } else if (Symbol.isAbsolute()) { 585 MSD.SectionIndex = 0; 586 ExternalSymbolData.push_back(MSD); 587 } else { 588 MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); 589 assert(MSD.SectionIndex && "Invalid section index!"); 590 ExternalSymbolData.push_back(MSD); 591 } 592 } 593 594 // Now add the data for local symbols. 595 for (const MCSymbol &Symbol : Asm.symbols()) { 596 // Ignore non-linker visible symbols. 597 if (!Asm.isSymbolLinkerVisible(Symbol)) 598 continue; 599 600 if (Symbol.isExternal() || Symbol.isUndefined()) 601 continue; 602 603 MachSymbolData MSD; 604 MSD.Symbol = &Symbol; 605 MSD.StringIndex = StringTable.getOffset(Symbol.getName()); 606 607 if (Symbol.isAbsolute()) { 608 MSD.SectionIndex = 0; 609 LocalSymbolData.push_back(MSD); 610 } else { 611 MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); 612 assert(MSD.SectionIndex && "Invalid section index!"); 613 LocalSymbolData.push_back(MSD); 614 } 615 } 616 617 // External and undefined symbols are required to be in lexicographic order. 618 llvm::sort(ExternalSymbolData); 619 llvm::sort(UndefinedSymbolData); 620 621 // Set the symbol indices. 622 Index = 0; 623 for (auto *SymbolData : 624 {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData}) 625 for (MachSymbolData &Entry : *SymbolData) 626 Entry.Symbol->setIndex(Index++); 627 628 for (const MCSection &Section : Asm) { 629 for (RelAndSymbol &Rel : Relocations[&Section]) { 630 if (!Rel.Sym) 631 continue; 632 633 // Set the Index and the IsExtern bit. 634 unsigned Index = Rel.Sym->getIndex(); 635 assert(isInt<24>(Index)); 636 if (W.Endian == support::little) 637 Rel.MRE.r_word1 = (Rel.MRE.r_word1 & (~0U << 24)) | Index | (1 << 27); 638 else 639 Rel.MRE.r_word1 = (Rel.MRE.r_word1 & 0xff) | Index << 8 | (1 << 4); 640 } 641 } 642 } 643 644 void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm, 645 const MCAsmLayout &Layout) { 646 uint64_t StartAddress = 0; 647 for (const MCSection *Sec : Layout.getSectionOrder()) { 648 StartAddress = alignTo(StartAddress, Sec->getAlignment()); 649 SectionAddress[Sec] = StartAddress; 650 StartAddress += Layout.getSectionAddressSize(Sec); 651 652 // Explicitly pad the section to match the alignment requirements of the 653 // following one. This is for 'gas' compatibility, it shouldn't 654 /// strictly be necessary. 655 StartAddress += getPaddingSize(Sec, Layout); 656 } 657 } 658 659 void MachObjectWriter::executePostLayoutBinding(MCAssembler &Asm, 660 const MCAsmLayout &Layout) { 661 computeSectionAddresses(Asm, Layout); 662 663 // Create symbol data for any indirect symbols. 664 bindIndirectSymbols(Asm); 665 } 666 667 bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl( 668 const MCAssembler &Asm, const MCSymbol &A, const MCSymbol &B, 669 bool InSet) const { 670 // FIXME: We don't handle things like 671 // foo = . 672 // creating atoms. 673 if (A.isVariable() || B.isVariable()) 674 return false; 675 return MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(Asm, A, B, 676 InSet); 677 } 678 679 bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl( 680 const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB, 681 bool InSet, bool IsPCRel) const { 682 if (InSet) 683 return true; 684 685 // The effective address is 686 // addr(atom(A)) + offset(A) 687 // - addr(atom(B)) - offset(B) 688 // and the offsets are not relocatable, so the fixup is fully resolved when 689 // addr(atom(A)) - addr(atom(B)) == 0. 690 const MCSymbol &SA = findAliasedSymbol(SymA); 691 const MCSection &SecA = SA.getSection(); 692 const MCSection &SecB = *FB.getParent(); 693 694 if (IsPCRel) { 695 // The simple (Darwin, except on x86_64) way of dealing with this was to 696 // assume that any reference to a temporary symbol *must* be a temporary 697 // symbol in the same atom, unless the sections differ. Therefore, any PCrel 698 // relocation to a temporary symbol (in the same section) is fully 699 // resolved. This also works in conjunction with absolutized .set, which 700 // requires the compiler to use .set to absolutize the differences between 701 // symbols which the compiler knows to be assembly time constants, so we 702 // don't need to worry about considering symbol differences fully resolved. 703 // 704 // If the file isn't using sub-sections-via-symbols, we can make the 705 // same assumptions about any symbol that we normally make about 706 // assembler locals. 707 708 bool hasReliableSymbolDifference = isX86_64(); 709 if (!hasReliableSymbolDifference) { 710 if (!SA.isInSection() || &SecA != &SecB || 711 (!SA.isTemporary() && FB.getAtom() != SA.getFragment()->getAtom() && 712 Asm.getSubsectionsViaSymbols())) 713 return false; 714 return true; 715 } 716 // For Darwin x86_64, there is one special case when the reference IsPCRel. 717 // If the fragment with the reference does not have a base symbol but meets 718 // the simple way of dealing with this, in that it is a temporary symbol in 719 // the same atom then it is assumed to be fully resolved. This is needed so 720 // a relocation entry is not created and so the static linker does not 721 // mess up the reference later. 722 else if(!FB.getAtom() && 723 SA.isTemporary() && SA.isInSection() && &SecA == &SecB){ 724 return true; 725 } 726 } 727 728 // If they are not in the same section, we can't compute the diff. 729 if (&SecA != &SecB) 730 return false; 731 732 const MCFragment *FA = SA.getFragment(); 733 734 // Bail if the symbol has no fragment. 735 if (!FA) 736 return false; 737 738 // If the atoms are the same, they are guaranteed to have the same address. 739 if (FA->getAtom() == FB.getAtom()) 740 return true; 741 742 // Otherwise, we can't prove this is fully resolved. 743 return false; 744 } 745 746 static MachO::LoadCommandType getLCFromMCVM(MCVersionMinType Type) { 747 switch (Type) { 748 case MCVM_OSXVersionMin: return MachO::LC_VERSION_MIN_MACOSX; 749 case MCVM_IOSVersionMin: return MachO::LC_VERSION_MIN_IPHONEOS; 750 case MCVM_TvOSVersionMin: return MachO::LC_VERSION_MIN_TVOS; 751 case MCVM_WatchOSVersionMin: return MachO::LC_VERSION_MIN_WATCHOS; 752 } 753 llvm_unreachable("Invalid mc version min type"); 754 } 755 756 void MachObjectWriter::populateAddrSigSection(MCAssembler &Asm) { 757 MCSection *AddrSigSection = 758 Asm.getContext().getObjectFileInfo()->getAddrSigSection(); 759 unsigned Log2Size = is64Bit() ? 3 : 2; 760 for (const MCSymbol *S : getAddrsigSyms()) { 761 MachO::any_relocation_info MRE; 762 MRE.r_word0 = 0; 763 MRE.r_word1 = (Log2Size << 25) | (MachO::GENERIC_RELOC_VANILLA << 28); 764 addRelocation(S, AddrSigSection, MRE); 765 } 766 } 767 768 uint64_t MachObjectWriter::writeObject(MCAssembler &Asm, 769 const MCAsmLayout &Layout) { 770 uint64_t StartOffset = W.OS.tell(); 771 772 populateAddrSigSection(Asm); 773 774 // Compute symbol table information and bind symbol indices. 775 computeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData, 776 UndefinedSymbolData); 777 778 if (!Asm.CGProfile.empty()) { 779 MCSection *CGProfileSection = Asm.getContext().getMachOSection( 780 "__LLVM", "__cg_profile", 0, SectionKind::getMetadata()); 781 MCDataFragment *Frag = dyn_cast_or_null<MCDataFragment>( 782 &*CGProfileSection->getFragmentList().begin()); 783 assert(Frag && "call graph profile section not reserved"); 784 Frag->getContents().clear(); 785 raw_svector_ostream OS(Frag->getContents()); 786 for (const MCAssembler::CGProfileEntry &CGPE : Asm.CGProfile) { 787 uint32_t FromIndex = CGPE.From->getSymbol().getIndex(); 788 uint32_t ToIndex = CGPE.To->getSymbol().getIndex(); 789 support::endian::write(OS, FromIndex, W.Endian); 790 support::endian::write(OS, ToIndex, W.Endian); 791 support::endian::write(OS, CGPE.Count, W.Endian); 792 } 793 } 794 795 unsigned NumSections = Asm.size(); 796 const MCAssembler::VersionInfoType &VersionInfo = 797 Layout.getAssembler().getVersionInfo(); 798 799 // The section data starts after the header, the segment load command (and 800 // section headers) and the symbol table. 801 unsigned NumLoadCommands = 1; 802 uint64_t LoadCommandsSize = is64Bit() ? 803 sizeof(MachO::segment_command_64) + NumSections * sizeof(MachO::section_64): 804 sizeof(MachO::segment_command) + NumSections * sizeof(MachO::section); 805 806 // Add the deployment target version info load command size, if used. 807 if (VersionInfo.Major != 0) { 808 ++NumLoadCommands; 809 if (VersionInfo.EmitBuildVersion) 810 LoadCommandsSize += sizeof(MachO::build_version_command); 811 else 812 LoadCommandsSize += sizeof(MachO::version_min_command); 813 } 814 815 const MCAssembler::VersionInfoType &TargetVariantVersionInfo = 816 Layout.getAssembler().getDarwinTargetVariantVersionInfo(); 817 818 // Add the target variant version info load command size, if used. 819 if (TargetVariantVersionInfo.Major != 0) { 820 ++NumLoadCommands; 821 assert(TargetVariantVersionInfo.EmitBuildVersion && 822 "target variant should use build version"); 823 LoadCommandsSize += sizeof(MachO::build_version_command); 824 } 825 826 // Add the data-in-code load command size, if used. 827 unsigned NumDataRegions = Asm.getDataRegions().size(); 828 if (NumDataRegions) { 829 ++NumLoadCommands; 830 LoadCommandsSize += sizeof(MachO::linkedit_data_command); 831 } 832 833 // Add the loh load command size, if used. 834 uint64_t LOHRawSize = Asm.getLOHContainer().getEmitSize(*this, Layout); 835 uint64_t LOHSize = alignTo(LOHRawSize, is64Bit() ? 8 : 4); 836 if (LOHSize) { 837 ++NumLoadCommands; 838 LoadCommandsSize += sizeof(MachO::linkedit_data_command); 839 } 840 841 // Add the symbol table load command sizes, if used. 842 unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() + 843 UndefinedSymbolData.size(); 844 if (NumSymbols) { 845 NumLoadCommands += 2; 846 LoadCommandsSize += (sizeof(MachO::symtab_command) + 847 sizeof(MachO::dysymtab_command)); 848 } 849 850 // Add the linker option load commands sizes. 851 for (const auto &Option : Asm.getLinkerOptions()) { 852 ++NumLoadCommands; 853 LoadCommandsSize += ComputeLinkerOptionsLoadCommandSize(Option, is64Bit()); 854 } 855 856 // Compute the total size of the section data, as well as its file size and vm 857 // size. 858 uint64_t SectionDataStart = (is64Bit() ? sizeof(MachO::mach_header_64) : 859 sizeof(MachO::mach_header)) + LoadCommandsSize; 860 uint64_t SectionDataSize = 0; 861 uint64_t SectionDataFileSize = 0; 862 uint64_t VMSize = 0; 863 for (const MCSection &Sec : Asm) { 864 uint64_t Address = getSectionAddress(&Sec); 865 uint64_t Size = Layout.getSectionAddressSize(&Sec); 866 uint64_t FileSize = Layout.getSectionFileSize(&Sec); 867 FileSize += getPaddingSize(&Sec, Layout); 868 869 VMSize = std::max(VMSize, Address + Size); 870 871 if (Sec.isVirtualSection()) 872 continue; 873 874 SectionDataSize = std::max(SectionDataSize, Address + Size); 875 SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize); 876 } 877 878 // The section data is padded to pointer size bytes. 879 // 880 // FIXME: Is this machine dependent? 881 unsigned SectionDataPadding = 882 offsetToAlignment(SectionDataFileSize, is64Bit() ? Align(8) : Align(4)); 883 SectionDataFileSize += SectionDataPadding; 884 885 // Write the prolog, starting with the header and load command... 886 writeHeader(MachO::MH_OBJECT, NumLoadCommands, LoadCommandsSize, 887 Asm.getSubsectionsViaSymbols()); 888 uint32_t Prot = 889 MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE; 890 writeSegmentLoadCommand("", NumSections, 0, VMSize, SectionDataStart, 891 SectionDataSize, Prot, Prot); 892 893 // ... and then the section headers. 894 uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize; 895 for (const MCSection &Section : Asm) { 896 const auto &Sec = cast<MCSectionMachO>(Section); 897 std::vector<RelAndSymbol> &Relocs = Relocations[&Sec]; 898 unsigned NumRelocs = Relocs.size(); 899 uint64_t SectionStart = SectionDataStart + getSectionAddress(&Sec); 900 unsigned Flags = Sec.getTypeAndAttributes(); 901 if (Sec.hasInstructions()) 902 Flags |= MachO::S_ATTR_SOME_INSTRUCTIONS; 903 writeSection(Layout, Sec, getSectionAddress(&Sec), SectionStart, Flags, 904 RelocTableEnd, NumRelocs); 905 RelocTableEnd += NumRelocs * sizeof(MachO::any_relocation_info); 906 } 907 908 // Write out the deployment target information, if it's available. 909 auto EmitDeploymentTargetVersion = 910 [&](const MCAssembler::VersionInfoType &VersionInfo) { 911 auto EncodeVersion = [](VersionTuple V) -> uint32_t { 912 assert(!V.empty() && "empty version"); 913 unsigned Update = V.getSubminor().value_or(0); 914 unsigned Minor = V.getMinor().value_or(0); 915 assert(Update < 256 && "unencodable update target version"); 916 assert(Minor < 256 && "unencodable minor target version"); 917 assert(V.getMajor() < 65536 && "unencodable major target version"); 918 return Update | (Minor << 8) | (V.getMajor() << 16); 919 }; 920 uint32_t EncodedVersion = EncodeVersion(VersionTuple( 921 VersionInfo.Major, VersionInfo.Minor, VersionInfo.Update)); 922 uint32_t SDKVersion = !VersionInfo.SDKVersion.empty() 923 ? EncodeVersion(VersionInfo.SDKVersion) 924 : 0; 925 if (VersionInfo.EmitBuildVersion) { 926 // FIXME: Currently empty tools. Add clang version in the future. 927 W.write<uint32_t>(MachO::LC_BUILD_VERSION); 928 W.write<uint32_t>(sizeof(MachO::build_version_command)); 929 W.write<uint32_t>(VersionInfo.TypeOrPlatform.Platform); 930 W.write<uint32_t>(EncodedVersion); 931 W.write<uint32_t>(SDKVersion); 932 W.write<uint32_t>(0); // Empty tools list. 933 } else { 934 MachO::LoadCommandType LCType = 935 getLCFromMCVM(VersionInfo.TypeOrPlatform.Type); 936 W.write<uint32_t>(LCType); 937 W.write<uint32_t>(sizeof(MachO::version_min_command)); 938 W.write<uint32_t>(EncodedVersion); 939 W.write<uint32_t>(SDKVersion); 940 } 941 }; 942 if (VersionInfo.Major != 0) 943 EmitDeploymentTargetVersion(VersionInfo); 944 if (TargetVariantVersionInfo.Major != 0) 945 EmitDeploymentTargetVersion(TargetVariantVersionInfo); 946 947 // Write the data-in-code load command, if used. 948 uint64_t DataInCodeTableEnd = RelocTableEnd + NumDataRegions * 8; 949 if (NumDataRegions) { 950 uint64_t DataRegionsOffset = RelocTableEnd; 951 uint64_t DataRegionsSize = NumDataRegions * 8; 952 writeLinkeditLoadCommand(MachO::LC_DATA_IN_CODE, DataRegionsOffset, 953 DataRegionsSize); 954 } 955 956 // Write the loh load command, if used. 957 uint64_t LOHTableEnd = DataInCodeTableEnd + LOHSize; 958 if (LOHSize) 959 writeLinkeditLoadCommand(MachO::LC_LINKER_OPTIMIZATION_HINT, 960 DataInCodeTableEnd, LOHSize); 961 962 // Write the symbol table load command, if used. 963 if (NumSymbols) { 964 unsigned FirstLocalSymbol = 0; 965 unsigned NumLocalSymbols = LocalSymbolData.size(); 966 unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols; 967 unsigned NumExternalSymbols = ExternalSymbolData.size(); 968 unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols; 969 unsigned NumUndefinedSymbols = UndefinedSymbolData.size(); 970 unsigned NumIndirectSymbols = Asm.indirect_symbol_size(); 971 unsigned NumSymTabSymbols = 972 NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols; 973 uint64_t IndirectSymbolSize = NumIndirectSymbols * 4; 974 uint64_t IndirectSymbolOffset = 0; 975 976 // If used, the indirect symbols are written after the section data. 977 if (NumIndirectSymbols) 978 IndirectSymbolOffset = LOHTableEnd; 979 980 // The symbol table is written after the indirect symbol data. 981 uint64_t SymbolTableOffset = LOHTableEnd + IndirectSymbolSize; 982 983 // The string table is written after symbol table. 984 uint64_t StringTableOffset = 985 SymbolTableOffset + NumSymTabSymbols * (is64Bit() ? 986 sizeof(MachO::nlist_64) : 987 sizeof(MachO::nlist)); 988 writeSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols, 989 StringTableOffset, StringTable.getSize()); 990 991 writeDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols, 992 FirstExternalSymbol, NumExternalSymbols, 993 FirstUndefinedSymbol, NumUndefinedSymbols, 994 IndirectSymbolOffset, NumIndirectSymbols); 995 } 996 997 // Write the linker options load commands. 998 for (const auto &Option : Asm.getLinkerOptions()) 999 writeLinkerOptionsLoadCommand(Option); 1000 1001 // Write the actual section data. 1002 for (const MCSection &Sec : Asm) { 1003 Asm.writeSectionData(W.OS, &Sec, Layout); 1004 1005 uint64_t Pad = getPaddingSize(&Sec, Layout); 1006 W.OS.write_zeros(Pad); 1007 } 1008 1009 // Write the extra padding. 1010 W.OS.write_zeros(SectionDataPadding); 1011 1012 // Write the relocation entries. 1013 for (const MCSection &Sec : Asm) { 1014 // Write the section relocation entries, in reverse order to match 'as' 1015 // (approximately, the exact algorithm is more complicated than this). 1016 std::vector<RelAndSymbol> &Relocs = Relocations[&Sec]; 1017 for (const RelAndSymbol &Rel : llvm::reverse(Relocs)) { 1018 W.write<uint32_t>(Rel.MRE.r_word0); 1019 W.write<uint32_t>(Rel.MRE.r_word1); 1020 } 1021 } 1022 1023 // Write out the data-in-code region payload, if there is one. 1024 for (MCAssembler::const_data_region_iterator 1025 it = Asm.data_region_begin(), ie = Asm.data_region_end(); 1026 it != ie; ++it) { 1027 const DataRegionData *Data = &(*it); 1028 uint64_t Start = getSymbolAddress(*Data->Start, Layout); 1029 uint64_t End; 1030 if (Data->End) 1031 End = getSymbolAddress(*Data->End, Layout); 1032 else 1033 report_fatal_error("Data region not terminated"); 1034 1035 LLVM_DEBUG(dbgs() << "data in code region-- kind: " << Data->Kind 1036 << " start: " << Start << "(" << Data->Start->getName() 1037 << ")" 1038 << " end: " << End << "(" << Data->End->getName() << ")" 1039 << " size: " << End - Start << "\n"); 1040 W.write<uint32_t>(Start); 1041 W.write<uint16_t>(End - Start); 1042 W.write<uint16_t>(Data->Kind); 1043 } 1044 1045 // Write out the loh commands, if there is one. 1046 if (LOHSize) { 1047 #ifndef NDEBUG 1048 unsigned Start = W.OS.tell(); 1049 #endif 1050 Asm.getLOHContainer().emit(*this, Layout); 1051 // Pad to a multiple of the pointer size. 1052 W.OS.write_zeros( 1053 offsetToAlignment(LOHRawSize, is64Bit() ? Align(8) : Align(4))); 1054 assert(W.OS.tell() - Start == LOHSize); 1055 } 1056 1057 // Write the symbol table data, if used. 1058 if (NumSymbols) { 1059 // Write the indirect symbol entries. 1060 for (MCAssembler::const_indirect_symbol_iterator 1061 it = Asm.indirect_symbol_begin(), 1062 ie = Asm.indirect_symbol_end(); it != ie; ++it) { 1063 // Indirect symbols in the non-lazy symbol pointer section have some 1064 // special handling. 1065 const MCSectionMachO &Section = 1066 static_cast<const MCSectionMachO &>(*it->Section); 1067 if (Section.getType() == MachO::S_NON_LAZY_SYMBOL_POINTERS) { 1068 // If this symbol is defined and internal, mark it as such. 1069 if (it->Symbol->isDefined() && !it->Symbol->isExternal()) { 1070 uint32_t Flags = MachO::INDIRECT_SYMBOL_LOCAL; 1071 if (it->Symbol->isAbsolute()) 1072 Flags |= MachO::INDIRECT_SYMBOL_ABS; 1073 W.write<uint32_t>(Flags); 1074 continue; 1075 } 1076 } 1077 1078 W.write<uint32_t>(it->Symbol->getIndex()); 1079 } 1080 1081 // FIXME: Check that offsets match computed ones. 1082 1083 // Write the symbol table entries. 1084 for (auto *SymbolData : 1085 {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData}) 1086 for (MachSymbolData &Entry : *SymbolData) 1087 writeNlist(Entry, Layout); 1088 1089 // Write the string table. 1090 StringTable.write(W.OS); 1091 } 1092 1093 return W.OS.tell() - StartOffset; 1094 } 1095 1096 std::unique_ptr<MCObjectWriter> 1097 llvm::createMachObjectWriter(std::unique_ptr<MCMachObjectTargetWriter> MOTW, 1098 raw_pwrite_stream &OS, bool IsLittleEndian) { 1099 return std::make_unique<MachObjectWriter>(std::move(MOTW), OS, 1100 IsLittleEndian); 1101 } 1102