1 //=--------- MachOLinkGraphBuilder.cpp - MachO LinkGraph builder ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Generic MachO LinkGraph buliding code. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "MachOLinkGraphBuilder.h" 14 15 #define DEBUG_TYPE "jitlink" 16 17 static const char *CommonSectionName = "__common"; 18 19 namespace llvm { 20 namespace jitlink { 21 22 MachOLinkGraphBuilder::~MachOLinkGraphBuilder() {} 23 24 Expected<std::unique_ptr<LinkGraph>> MachOLinkGraphBuilder::buildGraph() { 25 26 // Sanity check: we only operate on relocatable objects. 27 if (!Obj.isRelocatableObject()) 28 return make_error<JITLinkError>("Object is not a relocatable MachO"); 29 30 if (auto Err = createNormalizedSections()) 31 return std::move(Err); 32 33 if (auto Err = createNormalizedSymbols()) 34 return std::move(Err); 35 36 if (auto Err = graphifyRegularSymbols()) 37 return std::move(Err); 38 39 if (auto Err = graphifySectionsWithCustomParsers()) 40 return std::move(Err); 41 42 if (auto Err = addRelocations()) 43 return std::move(Err); 44 45 return std::move(G); 46 } 47 48 MachOLinkGraphBuilder::MachOLinkGraphBuilder(const object::MachOObjectFile &Obj) 49 : Obj(Obj), 50 G(std::make_unique<LinkGraph>(std::string(Obj.getFileName()), 51 getPointerSize(Obj), getEndianness(Obj))) {} 52 53 void MachOLinkGraphBuilder::addCustomSectionParser( 54 StringRef SectionName, SectionParserFunction Parser) { 55 assert(!CustomSectionParserFunctions.count(SectionName) && 56 "Custom parser for this section already exists"); 57 CustomSectionParserFunctions[SectionName] = std::move(Parser); 58 } 59 60 Linkage MachOLinkGraphBuilder::getLinkage(uint16_t Desc) { 61 if ((Desc & MachO::N_WEAK_DEF) || (Desc & MachO::N_WEAK_REF)) 62 return Linkage::Weak; 63 return Linkage::Strong; 64 } 65 66 Scope MachOLinkGraphBuilder::getScope(StringRef Name, uint8_t Type) { 67 if (Type & MachO::N_PEXT) 68 return Scope::Hidden; 69 if (Type & MachO::N_EXT) { 70 if (Name.startswith("l")) 71 return Scope::Hidden; 72 else 73 return Scope::Default; 74 } 75 return Scope::Local; 76 } 77 78 bool MachOLinkGraphBuilder::isAltEntry(const NormalizedSymbol &NSym) { 79 return NSym.Desc & MachO::N_ALT_ENTRY; 80 } 81 82 bool MachOLinkGraphBuilder::isDebugSection(const NormalizedSection &NSec) { 83 return (NSec.Flags & MachO::S_ATTR_DEBUG && 84 strcmp(NSec.SegName, "__DWARF") == 0); 85 } 86 87 unsigned 88 MachOLinkGraphBuilder::getPointerSize(const object::MachOObjectFile &Obj) { 89 return Obj.is64Bit() ? 8 : 4; 90 } 91 92 support::endianness 93 MachOLinkGraphBuilder::getEndianness(const object::MachOObjectFile &Obj) { 94 return Obj.isLittleEndian() ? support::little : support::big; 95 } 96 97 Section &MachOLinkGraphBuilder::getCommonSection() { 98 if (!CommonSection) { 99 auto Prot = static_cast<sys::Memory::ProtectionFlags>( 100 sys::Memory::MF_READ | sys::Memory::MF_WRITE); 101 CommonSection = &G->createSection(CommonSectionName, Prot); 102 } 103 return *CommonSection; 104 } 105 106 Error MachOLinkGraphBuilder::createNormalizedSections() { 107 // Build normalized sections. Verifies that section data is in-range (for 108 // sections with content) and that address ranges are non-overlapping. 109 110 LLVM_DEBUG(dbgs() << "Creating normalized sections...\n"); 111 112 for (auto &SecRef : Obj.sections()) { 113 NormalizedSection NSec; 114 uint32_t DataOffset = 0; 115 116 auto SecIndex = Obj.getSectionIndex(SecRef.getRawDataRefImpl()); 117 118 auto Name = SecRef.getName(); 119 if (!Name) 120 return Name.takeError(); 121 122 if (Obj.is64Bit()) { 123 const MachO::section_64 &Sec64 = 124 Obj.getSection64(SecRef.getRawDataRefImpl()); 125 126 memcpy(&NSec.SectName, &Sec64.sectname, 16); 127 NSec.SectName[16] = '\0'; 128 memcpy(&NSec.SegName, Sec64.segname, 16); 129 NSec.SegName[16] = '\0'; 130 131 NSec.Address = Sec64.addr; 132 NSec.Size = Sec64.size; 133 NSec.Alignment = 1ULL << Sec64.align; 134 NSec.Flags = Sec64.flags; 135 DataOffset = Sec64.offset; 136 } else { 137 const MachO::section &Sec32 = Obj.getSection(SecRef.getRawDataRefImpl()); 138 139 memcpy(&NSec.SectName, &Sec32.sectname, 16); 140 NSec.SectName[16] = '\0'; 141 memcpy(&NSec.SegName, Sec32.segname, 16); 142 NSec.SegName[16] = '\0'; 143 144 NSec.Address = Sec32.addr; 145 NSec.Size = Sec32.size; 146 NSec.Alignment = 1ULL << Sec32.align; 147 NSec.Flags = Sec32.flags; 148 DataOffset = Sec32.offset; 149 } 150 151 LLVM_DEBUG({ 152 dbgs() << " " << *Name << ": " << formatv("{0:x16}", NSec.Address) 153 << " -- " << formatv("{0:x16}", NSec.Address + NSec.Size) 154 << ", align: " << NSec.Alignment << ", index: " << SecIndex 155 << "\n"; 156 }); 157 158 // Get the section data if any. 159 { 160 unsigned SectionType = NSec.Flags & MachO::SECTION_TYPE; 161 if (SectionType != MachO::S_ZEROFILL && 162 SectionType != MachO::S_GB_ZEROFILL) { 163 164 if (DataOffset + NSec.Size > Obj.getData().size()) 165 return make_error<JITLinkError>( 166 "Section data extends past end of file"); 167 168 NSec.Data = Obj.getData().data() + DataOffset; 169 } 170 } 171 172 // Get prot flags. 173 // FIXME: Make sure this test is correct (it's probably missing cases 174 // as-is). 175 sys::Memory::ProtectionFlags Prot; 176 if (NSec.Flags & MachO::S_ATTR_PURE_INSTRUCTIONS) 177 Prot = static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ | 178 sys::Memory::MF_EXEC); 179 else 180 Prot = static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ | 181 sys::Memory::MF_WRITE); 182 183 if (!isDebugSection(NSec)) 184 NSec.GraphSection = &G->createSection(*Name, Prot); 185 else 186 LLVM_DEBUG({ 187 dbgs() << " " << *Name 188 << " is a debug section: No graph section will be created.\n"; 189 }); 190 191 IndexToSection.insert(std::make_pair(SecIndex, std::move(NSec))); 192 } 193 194 std::vector<NormalizedSection *> Sections; 195 Sections.reserve(IndexToSection.size()); 196 for (auto &KV : IndexToSection) 197 Sections.push_back(&KV.second); 198 199 // If we didn't end up creating any sections then bail out. The code below 200 // assumes that we have at least one section. 201 if (Sections.empty()) 202 return Error::success(); 203 204 llvm::sort(Sections, 205 [](const NormalizedSection *LHS, const NormalizedSection *RHS) { 206 assert(LHS && RHS && "Null section?"); 207 if (LHS->Address != RHS->Address) 208 return LHS->Address < RHS->Address; 209 return LHS->Size < RHS->Size; 210 }); 211 212 for (unsigned I = 0, E = Sections.size() - 1; I != E; ++I) { 213 auto &Cur = *Sections[I]; 214 auto &Next = *Sections[I + 1]; 215 if (Next.Address < Cur.Address + Cur.Size) 216 return make_error<JITLinkError>( 217 "Address range for section " + 218 formatv("\"{0}/{1}\" [ {2:x16} -- {3:x16} ] ", Cur.SegName, 219 Cur.SectName, Cur.Address, Cur.Address + Cur.Size) + 220 "overlaps section \"" + Next.SegName + "/" + Next.SectName + "\"" + 221 formatv("\"{0}/{1}\" [ {2:x16} -- {3:x16} ] ", Next.SegName, 222 Next.SectName, Next.Address, Next.Address + Next.Size)); 223 } 224 225 return Error::success(); 226 } 227 228 Error MachOLinkGraphBuilder::createNormalizedSymbols() { 229 LLVM_DEBUG(dbgs() << "Creating normalized symbols...\n"); 230 231 for (auto &SymRef : Obj.symbols()) { 232 233 unsigned SymbolIndex = Obj.getSymbolIndex(SymRef.getRawDataRefImpl()); 234 uint64_t Value; 235 uint32_t NStrX; 236 uint8_t Type; 237 uint8_t Sect; 238 uint16_t Desc; 239 240 if (Obj.is64Bit()) { 241 const MachO::nlist_64 &NL64 = 242 Obj.getSymbol64TableEntry(SymRef.getRawDataRefImpl()); 243 Value = NL64.n_value; 244 NStrX = NL64.n_strx; 245 Type = NL64.n_type; 246 Sect = NL64.n_sect; 247 Desc = NL64.n_desc; 248 } else { 249 const MachO::nlist &NL32 = 250 Obj.getSymbolTableEntry(SymRef.getRawDataRefImpl()); 251 Value = NL32.n_value; 252 NStrX = NL32.n_strx; 253 Type = NL32.n_type; 254 Sect = NL32.n_sect; 255 Desc = NL32.n_desc; 256 } 257 258 // Skip stabs. 259 // FIXME: Are there other symbols we should be skipping? 260 if (Type & MachO::N_STAB) 261 continue; 262 263 Optional<StringRef> Name; 264 if (NStrX) { 265 if (auto NameOrErr = SymRef.getName()) 266 Name = *NameOrErr; 267 else 268 return NameOrErr.takeError(); 269 } 270 271 LLVM_DEBUG({ 272 dbgs() << " "; 273 if (!Name) 274 dbgs() << "<anonymous symbol>"; 275 else 276 dbgs() << *Name; 277 dbgs() << ": value = " << formatv("{0:x16}", Value) 278 << ", type = " << formatv("{0:x2}", Type) 279 << ", desc = " << formatv("{0:x4}", Desc) << ", sect = "; 280 if (Sect) 281 dbgs() << static_cast<unsigned>(Sect - 1); 282 else 283 dbgs() << "none"; 284 dbgs() << "\n"; 285 }); 286 287 // If this symbol has a section, sanity check that the addresses line up. 288 if (Sect != 0) { 289 auto NSec = findSectionByIndex(Sect - 1); 290 if (!NSec) 291 return NSec.takeError(); 292 293 if (Value < NSec->Address || Value > NSec->Address + NSec->Size) 294 return make_error<JITLinkError>("Symbol address does not fall within " 295 "section"); 296 297 if (!NSec->GraphSection) { 298 LLVM_DEBUG({ 299 dbgs() << " Skipping: Symbol is in section " << NSec->SegName << "/" 300 << NSec->SectName 301 << " which has no associated graph section.\n"; 302 }); 303 continue; 304 } 305 } 306 307 IndexToSymbol[SymbolIndex] = 308 &createNormalizedSymbol(*Name, Value, Type, Sect, Desc, 309 getLinkage(Desc), getScope(*Name, Type)); 310 } 311 312 return Error::success(); 313 } 314 315 void MachOLinkGraphBuilder::addSectionStartSymAndBlock( 316 Section &GraphSec, uint64_t Address, const char *Data, uint64_t Size, 317 uint32_t Alignment, bool IsLive) { 318 Block &B = 319 Data ? G->createContentBlock(GraphSec, StringRef(Data, Size), Address, 320 Alignment, 0) 321 : G->createZeroFillBlock(GraphSec, Size, Address, Alignment, 0); 322 auto &Sym = G->addAnonymousSymbol(B, 0, Size, false, IsLive); 323 assert(!AddrToCanonicalSymbol.count(Sym.getAddress()) && 324 "Anonymous block start symbol clashes with existing symbol address"); 325 AddrToCanonicalSymbol[Sym.getAddress()] = &Sym; 326 } 327 328 Error MachOLinkGraphBuilder::graphifyRegularSymbols() { 329 330 LLVM_DEBUG(dbgs() << "Creating graph symbols...\n"); 331 332 /// We only have 256 section indexes: Use a vector rather than a map. 333 std::vector<std::vector<NormalizedSymbol *>> SecIndexToSymbols; 334 SecIndexToSymbols.resize(256); 335 336 // Create commons, externs, and absolutes, and partition all other symbols by 337 // section. 338 for (auto &KV : IndexToSymbol) { 339 auto &NSym = *KV.second; 340 341 switch (NSym.Type & MachO::N_TYPE) { 342 case MachO::N_UNDF: 343 if (NSym.Value) { 344 if (!NSym.Name) 345 return make_error<JITLinkError>("Anonymous common symbol at index " + 346 Twine(KV.first)); 347 NSym.GraphSymbol = &G->addCommonSymbol( 348 *NSym.Name, NSym.S, getCommonSection(), 0, NSym.Value, 349 1ull << MachO::GET_COMM_ALIGN(NSym.Desc), 350 NSym.Desc & MachO::N_NO_DEAD_STRIP); 351 } else { 352 if (!NSym.Name) 353 return make_error<JITLinkError>("Anonymous external symbol at " 354 "index " + 355 Twine(KV.first)); 356 NSym.GraphSymbol = &G->addExternalSymbol( 357 *NSym.Name, 0, 358 NSym.Desc & MachO::N_WEAK_REF ? Linkage::Weak : Linkage::Strong); 359 } 360 break; 361 case MachO::N_ABS: 362 if (!NSym.Name) 363 return make_error<JITLinkError>("Anonymous absolute symbol at index " + 364 Twine(KV.first)); 365 NSym.GraphSymbol = &G->addAbsoluteSymbol( 366 *NSym.Name, NSym.Value, 0, Linkage::Strong, Scope::Default, 367 NSym.Desc & MachO::N_NO_DEAD_STRIP); 368 break; 369 case MachO::N_SECT: 370 SecIndexToSymbols[NSym.Sect - 1].push_back(&NSym); 371 break; 372 case MachO::N_PBUD: 373 return make_error<JITLinkError>( 374 "Unupported N_PBUD symbol " + 375 (NSym.Name ? ("\"" + *NSym.Name + "\"") : Twine("<anon>")) + 376 " at index " + Twine(KV.first)); 377 case MachO::N_INDR: 378 return make_error<JITLinkError>( 379 "Unupported N_INDR symbol " + 380 (NSym.Name ? ("\"" + *NSym.Name + "\"") : Twine("<anon>")) + 381 " at index " + Twine(KV.first)); 382 default: 383 return make_error<JITLinkError>( 384 "Unrecognized symbol type " + Twine(NSym.Type & MachO::N_TYPE) + 385 " for symbol " + 386 (NSym.Name ? ("\"" + *NSym.Name + "\"") : Twine("<anon>")) + 387 " at index " + Twine(KV.first)); 388 } 389 } 390 391 // Loop over sections performing regular graphification for those that 392 // don't have custom parsers. 393 for (auto &KV : IndexToSection) { 394 auto SecIndex = KV.first; 395 auto &NSec = KV.second; 396 397 if (!NSec.GraphSection) { 398 LLVM_DEBUG({ 399 dbgs() << " " << NSec.SegName << "/" << NSec.SectName 400 << " has no graph section. Skipping.\n"; 401 }); 402 continue; 403 } 404 405 // Skip sections with custom parsers. 406 if (CustomSectionParserFunctions.count(NSec.GraphSection->getName())) { 407 LLVM_DEBUG({ 408 dbgs() << " Skipping section " << NSec.GraphSection->getName() 409 << " as it has a custom parser.\n"; 410 }); 411 continue; 412 } else 413 LLVM_DEBUG({ 414 dbgs() << " Processing section " << NSec.GraphSection->getName() 415 << "...\n"; 416 }); 417 418 bool SectionIsNoDeadStrip = NSec.Flags & MachO::S_ATTR_NO_DEAD_STRIP; 419 bool SectionIsText = NSec.Flags & MachO::S_ATTR_PURE_INSTRUCTIONS; 420 421 auto &SecNSymStack = SecIndexToSymbols[SecIndex]; 422 423 // If this section is non-empty but there are no symbols covering it then 424 // create one block and anonymous symbol to cover the entire section. 425 if (SecNSymStack.empty()) { 426 if (NSec.Size > 0) { 427 LLVM_DEBUG({ 428 dbgs() << " Section non-empty, but contains no symbols. " 429 "Creating anonymous block to cover " 430 << formatv("{0:x16}", NSec.Address) << " -- " 431 << formatv("{0:x16}", NSec.Address + NSec.Size) << "\n"; 432 }); 433 addSectionStartSymAndBlock(*NSec.GraphSection, NSec.Address, NSec.Data, 434 NSec.Size, NSec.Alignment, 435 SectionIsNoDeadStrip); 436 } else 437 LLVM_DEBUG({ 438 dbgs() << " Section empty and contains no symbols. Skipping.\n"; 439 }); 440 continue; 441 } 442 443 // Sort the symbol stack in by address, alt-entry status, scope, and name. 444 // We sort in reverse order so that symbols will be visited in the right 445 // order when we pop off the stack below. 446 llvm::sort(SecNSymStack, [](const NormalizedSymbol *LHS, 447 const NormalizedSymbol *RHS) { 448 if (LHS->Value != RHS->Value) 449 return LHS->Value > RHS->Value; 450 if (isAltEntry(*LHS) != isAltEntry(*RHS)) 451 return isAltEntry(*RHS); 452 if (LHS->S != RHS->S) 453 return static_cast<uint8_t>(LHS->S) < static_cast<uint8_t>(RHS->S); 454 return LHS->Name < RHS->Name; 455 }); 456 457 // The first symbol in a section can not be an alt-entry symbol. 458 if (!SecNSymStack.empty() && isAltEntry(*SecNSymStack.back())) 459 return make_error<JITLinkError>( 460 "First symbol in " + NSec.GraphSection->getName() + " is alt-entry"); 461 462 // If the section is non-empty but there is no symbol covering the start 463 // address then add an anonymous one. 464 if (SecNSymStack.back()->Value != NSec.Address) { 465 auto AnonBlockSize = SecNSymStack.back()->Value - NSec.Address; 466 LLVM_DEBUG({ 467 dbgs() << " Section start not covered by symbol. " 468 << "Creating anonymous block to cover [ " 469 << formatv("{0:x16}", NSec.Address) << " -- " 470 << formatv("{0:x16}", NSec.Address + AnonBlockSize) << " ]\n"; 471 }); 472 addSectionStartSymAndBlock(*NSec.GraphSection, NSec.Address, NSec.Data, 473 AnonBlockSize, NSec.Alignment, 474 SectionIsNoDeadStrip); 475 } 476 477 // Visit section symbols in order by popping off the reverse-sorted stack, 478 // building blocks for each alt-entry chain and creating symbols as we go. 479 while (!SecNSymStack.empty()) { 480 SmallVector<NormalizedSymbol *, 8> BlockSyms; 481 482 BlockSyms.push_back(SecNSymStack.back()); 483 SecNSymStack.pop_back(); 484 while (!SecNSymStack.empty() && 485 (isAltEntry(*SecNSymStack.back()) || 486 SecNSymStack.back()->Value == BlockSyms.back()->Value)) { 487 BlockSyms.push_back(SecNSymStack.back()); 488 SecNSymStack.pop_back(); 489 } 490 491 // BlockNSyms now contains the block symbols in reverse canonical order. 492 JITTargetAddress BlockStart = BlockSyms.front()->Value; 493 JITTargetAddress BlockEnd = SecNSymStack.empty() 494 ? NSec.Address + NSec.Size 495 : SecNSymStack.back()->Value; 496 JITTargetAddress BlockOffset = BlockStart - NSec.Address; 497 JITTargetAddress BlockSize = BlockEnd - BlockStart; 498 499 LLVM_DEBUG({ 500 dbgs() << " Creating block for " << formatv("{0:x16}", BlockStart) 501 << " -- " << formatv("{0:x16}", BlockEnd) << ": " 502 << NSec.GraphSection->getName() << " + " 503 << formatv("{0:x16}", BlockOffset) << " with " 504 << BlockSyms.size() << " symbol(s)...\n"; 505 }); 506 507 Block &B = 508 NSec.Data 509 ? G->createContentBlock( 510 *NSec.GraphSection, 511 StringRef(NSec.Data + BlockOffset, BlockSize), BlockStart, 512 NSec.Alignment, BlockStart % NSec.Alignment) 513 : G->createZeroFillBlock(*NSec.GraphSection, BlockSize, 514 BlockStart, NSec.Alignment, 515 BlockStart % NSec.Alignment); 516 517 Optional<JITTargetAddress> LastCanonicalAddr; 518 JITTargetAddress SymEnd = BlockEnd; 519 while (!BlockSyms.empty()) { 520 auto &NSym = *BlockSyms.back(); 521 BlockSyms.pop_back(); 522 523 bool SymLive = 524 (NSym.Desc & MachO::N_NO_DEAD_STRIP) || SectionIsNoDeadStrip; 525 526 LLVM_DEBUG({ 527 dbgs() << " " << formatv("{0:x16}", NSym.Value) << " -- " 528 << formatv("{0:x16}", SymEnd) << ": "; 529 if (!NSym.Name) 530 dbgs() << "<anonymous symbol>"; 531 else 532 dbgs() << NSym.Name; 533 if (SymLive) 534 dbgs() << " [no-dead-strip]"; 535 if (LastCanonicalAddr == NSym.Value) 536 dbgs() << " [non-canonical]"; 537 dbgs() << "\n"; 538 }); 539 540 auto &Sym = 541 NSym.Name 542 ? G->addDefinedSymbol(B, NSym.Value - BlockStart, *NSym.Name, 543 SymEnd - NSym.Value, NSym.L, NSym.S, 544 SectionIsText, SymLive) 545 : G->addAnonymousSymbol(B, NSym.Value - BlockStart, 546 SymEnd - NSym.Value, SectionIsText, 547 SymLive); 548 NSym.GraphSymbol = &Sym; 549 if (LastCanonicalAddr != Sym.getAddress()) { 550 if (LastCanonicalAddr) 551 SymEnd = *LastCanonicalAddr; 552 LastCanonicalAddr = Sym.getAddress(); 553 setCanonicalSymbol(Sym); 554 } 555 } 556 } 557 } 558 559 return Error::success(); 560 } 561 562 Error MachOLinkGraphBuilder::graphifySectionsWithCustomParsers() { 563 // Graphify special sections. 564 for (auto &KV : IndexToSection) { 565 auto &NSec = KV.second; 566 567 // Skip non-graph sections. 568 if (!NSec.GraphSection) 569 continue; 570 571 auto HI = CustomSectionParserFunctions.find(NSec.GraphSection->getName()); 572 if (HI != CustomSectionParserFunctions.end()) { 573 auto &Parse = HI->second; 574 if (auto Err = Parse(NSec)) 575 return Err; 576 } 577 } 578 579 return Error::success(); 580 } 581 582 } // end namespace jitlink 583 } // end namespace llvm 584