1 //===- InputFiles.cpp -----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "InputFiles.h" 10 #include "Config.h" 11 #include "DWARF.h" 12 #include "Driver.h" 13 #include "InputSection.h" 14 #include "LinkerScript.h" 15 #include "SymbolTable.h" 16 #include "Symbols.h" 17 #include "SyntheticSections.h" 18 #include "Target.h" 19 #include "lld/Common/DWARF.h" 20 #include "llvm/ADT/CachedHashString.h" 21 #include "llvm/ADT/STLExtras.h" 22 #include "llvm/LTO/LTO.h" 23 #include "llvm/Object/Archive.h" 24 #include "llvm/Object/IRObjectFile.h" 25 #include "llvm/Support/ARMAttributeParser.h" 26 #include "llvm/Support/ARMBuildAttributes.h" 27 #include "llvm/Support/Endian.h" 28 #include "llvm/Support/FileSystem.h" 29 #include "llvm/Support/Path.h" 30 #include "llvm/Support/TimeProfiler.h" 31 #include "llvm/Support/raw_ostream.h" 32 #include <optional> 33 34 using namespace llvm; 35 using namespace llvm::ELF; 36 using namespace llvm::object; 37 using namespace llvm::sys; 38 using namespace llvm::sys::fs; 39 using namespace llvm::support::endian; 40 using namespace lld; 41 using namespace lld::elf; 42 43 // This function is explicitly instantiated in ARM.cpp, don't do it here to 44 // avoid warnings with MSVC. 45 extern template void ObjFile<ELF32LE>::importCmseSymbols(); 46 extern template void ObjFile<ELF32BE>::importCmseSymbols(); 47 extern template void ObjFile<ELF64LE>::importCmseSymbols(); 48 extern template void ObjFile<ELF64BE>::importCmseSymbols(); 49 50 // Returns "<internal>", "foo.a(bar.o)" or "baz.o". 51 std::string elf::toStr(Ctx &ctx, const InputFile *f) { 52 static std::mutex mu; 53 if (!f) 54 return "<internal>"; 55 56 { 57 std::lock_guard<std::mutex> lock(mu); 58 if (f->toStringCache.empty()) { 59 if (f->archiveName.empty()) 60 f->toStringCache = f->getName(); 61 else 62 (f->archiveName + "(" + f->getName() + ")").toVector(f->toStringCache); 63 } 64 } 65 return std::string(f->toStringCache); 66 } 67 68 const ELFSyncStream &elf::operator<<(const ELFSyncStream &s, 69 const InputFile *f) { 70 return s << toStr(s.ctx, f); 71 } 72 73 static ELFKind getELFKind(Ctx &ctx, MemoryBufferRef mb, StringRef archiveName) { 74 unsigned char size; 75 unsigned char endian; 76 std::tie(size, endian) = getElfArchType(mb.getBuffer()); 77 78 auto report = [&](StringRef msg) { 79 StringRef filename = mb.getBufferIdentifier(); 80 if (archiveName.empty()) 81 Fatal(ctx) << filename << ": " << msg; 82 else 83 Fatal(ctx) << archiveName << "(" << filename << "): " << msg; 84 }; 85 86 if (!mb.getBuffer().starts_with(ElfMagic)) 87 report("not an ELF file"); 88 if (endian != ELFDATA2LSB && endian != ELFDATA2MSB) 89 report("corrupted ELF file: invalid data encoding"); 90 if (size != ELFCLASS32 && size != ELFCLASS64) 91 report("corrupted ELF file: invalid file class"); 92 93 size_t bufSize = mb.getBuffer().size(); 94 if ((size == ELFCLASS32 && bufSize < sizeof(Elf32_Ehdr)) || 95 (size == ELFCLASS64 && bufSize < sizeof(Elf64_Ehdr))) 96 report("corrupted ELF file: file is too short"); 97 98 if (size == ELFCLASS32) 99 return (endian == ELFDATA2LSB) ? ELF32LEKind : ELF32BEKind; 100 return (endian == ELFDATA2LSB) ? ELF64LEKind : ELF64BEKind; 101 } 102 103 // For ARM only, to set the EF_ARM_ABI_FLOAT_SOFT or EF_ARM_ABI_FLOAT_HARD 104 // flag in the ELF Header we need to look at Tag_ABI_VFP_args to find out how 105 // the input objects have been compiled. 106 static void updateARMVFPArgs(Ctx &ctx, const ARMAttributeParser &attributes, 107 const InputFile *f) { 108 std::optional<unsigned> attr = 109 attributes.getAttributeValue(ARMBuildAttrs::ABI_VFP_args); 110 if (!attr) 111 // If an ABI tag isn't present then it is implicitly given the value of 0 112 // which maps to ARMBuildAttrs::BaseAAPCS. However many assembler files, 113 // including some in glibc that don't use FP args (and should have value 3) 114 // don't have the attribute so we do not consider an implicit value of 0 115 // as a clash. 116 return; 117 118 unsigned vfpArgs = *attr; 119 ARMVFPArgKind arg; 120 switch (vfpArgs) { 121 case ARMBuildAttrs::BaseAAPCS: 122 arg = ARMVFPArgKind::Base; 123 break; 124 case ARMBuildAttrs::HardFPAAPCS: 125 arg = ARMVFPArgKind::VFP; 126 break; 127 case ARMBuildAttrs::ToolChainFPPCS: 128 // Tool chain specific convention that conforms to neither AAPCS variant. 129 arg = ARMVFPArgKind::ToolChain; 130 break; 131 case ARMBuildAttrs::CompatibleFPAAPCS: 132 // Object compatible with all conventions. 133 return; 134 default: 135 ErrAlways(ctx) << f << ": unknown Tag_ABI_VFP_args value: " << vfpArgs; 136 return; 137 } 138 // Follow ld.bfd and error if there is a mix of calling conventions. 139 if (ctx.arg.armVFPArgs != arg && ctx.arg.armVFPArgs != ARMVFPArgKind::Default) 140 ErrAlways(ctx) << f << ": incompatible Tag_ABI_VFP_args"; 141 else 142 ctx.arg.armVFPArgs = arg; 143 } 144 145 // The ARM support in lld makes some use of instructions that are not available 146 // on all ARM architectures. Namely: 147 // - Use of BLX instruction for interworking between ARM and Thumb state. 148 // - Use of the extended Thumb branch encoding in relocation. 149 // - Use of the MOVT/MOVW instructions in Thumb Thunks. 150 // The ARM Attributes section contains information about the architecture chosen 151 // at compile time. We follow the convention that if at least one input object 152 // is compiled with an architecture that supports these features then lld is 153 // permitted to use them. 154 static void updateSupportedARMFeatures(Ctx &ctx, 155 const ARMAttributeParser &attributes) { 156 std::optional<unsigned> attr = 157 attributes.getAttributeValue(ARMBuildAttrs::CPU_arch); 158 if (!attr) 159 return; 160 auto arch = *attr; 161 switch (arch) { 162 case ARMBuildAttrs::Pre_v4: 163 case ARMBuildAttrs::v4: 164 case ARMBuildAttrs::v4T: 165 // Architectures prior to v5 do not support BLX instruction 166 break; 167 case ARMBuildAttrs::v5T: 168 case ARMBuildAttrs::v5TE: 169 case ARMBuildAttrs::v5TEJ: 170 case ARMBuildAttrs::v6: 171 case ARMBuildAttrs::v6KZ: 172 case ARMBuildAttrs::v6K: 173 ctx.arg.armHasBlx = true; 174 // Architectures used in pre-Cortex processors do not support 175 // The J1 = 1 J2 = 1 Thumb branch range extension, with the exception 176 // of Architecture v6T2 (arm1156t2-s and arm1156t2f-s) that do. 177 break; 178 default: 179 // All other Architectures have BLX and extended branch encoding 180 ctx.arg.armHasBlx = true; 181 ctx.arg.armJ1J2BranchEncoding = true; 182 if (arch != ARMBuildAttrs::v6_M && arch != ARMBuildAttrs::v6S_M) 183 // All Architectures used in Cortex processors with the exception 184 // of v6-M and v6S-M have the MOVT and MOVW instructions. 185 ctx.arg.armHasMovtMovw = true; 186 break; 187 } 188 189 // Only ARMv8-M or later architectures have CMSE support. 190 std::optional<unsigned> profile = 191 attributes.getAttributeValue(ARMBuildAttrs::CPU_arch_profile); 192 if (!profile) 193 return; 194 if (arch >= ARMBuildAttrs::CPUArch::v8_M_Base && 195 profile == ARMBuildAttrs::MicroControllerProfile) 196 ctx.arg.armCMSESupport = true; 197 198 // The thumb PLT entries require Thumb2 which can be used on multiple archs. 199 // For now, let's limit it to ones where ARM isn't available and we know have 200 // Thumb2. 201 std::optional<unsigned> armISA = 202 attributes.getAttributeValue(ARMBuildAttrs::ARM_ISA_use); 203 std::optional<unsigned> thumb = 204 attributes.getAttributeValue(ARMBuildAttrs::THUMB_ISA_use); 205 ctx.arg.armHasArmISA |= armISA && *armISA >= ARMBuildAttrs::Allowed; 206 ctx.arg.armHasThumb2ISA |= thumb && *thumb >= ARMBuildAttrs::AllowThumb32; 207 } 208 209 InputFile::InputFile(Ctx &ctx, Kind k, MemoryBufferRef m) 210 : ctx(ctx), mb(m), groupId(ctx.driver.nextGroupId), fileKind(k) { 211 // All files within the same --{start,end}-group get the same group ID. 212 // Otherwise, a new file will get a new group ID. 213 if (!ctx.driver.isInGroup) 214 ++ctx.driver.nextGroupId; 215 } 216 217 InputFile::~InputFile() {} 218 219 std::optional<MemoryBufferRef> elf::readFile(Ctx &ctx, StringRef path) { 220 llvm::TimeTraceScope timeScope("Load input files", path); 221 222 // The --chroot option changes our virtual root directory. 223 // This is useful when you are dealing with files created by --reproduce. 224 if (!ctx.arg.chroot.empty() && path.starts_with("/")) 225 path = ctx.saver.save(ctx.arg.chroot + path); 226 227 bool remapped = false; 228 auto it = ctx.arg.remapInputs.find(path); 229 if (it != ctx.arg.remapInputs.end()) { 230 path = it->second; 231 remapped = true; 232 } else { 233 for (const auto &[pat, toFile] : ctx.arg.remapInputsWildcards) { 234 if (pat.match(path)) { 235 path = toFile; 236 remapped = true; 237 break; 238 } 239 } 240 } 241 if (remapped) { 242 // Use /dev/null to indicate an input file that should be ignored. Change 243 // the path to NUL on Windows. 244 #ifdef _WIN32 245 if (path == "/dev/null") 246 path = "NUL"; 247 #endif 248 } 249 250 Log(ctx) << path; 251 ctx.arg.dependencyFiles.insert(llvm::CachedHashString(path)); 252 253 auto mbOrErr = MemoryBuffer::getFile(path, /*IsText=*/false, 254 /*RequiresNullTerminator=*/false); 255 if (auto ec = mbOrErr.getError()) { 256 ErrAlways(ctx) << "cannot open " << path << ": " << ec.message(); 257 return std::nullopt; 258 } 259 260 MemoryBufferRef mbref = (*mbOrErr)->getMemBufferRef(); 261 ctx.memoryBuffers.push_back(std::move(*mbOrErr)); // take MB ownership 262 263 if (ctx.tar) 264 ctx.tar->append(relativeToRoot(path), mbref.getBuffer()); 265 return mbref; 266 } 267 268 // All input object files must be for the same architecture 269 // (e.g. it does not make sense to link x86 object files with 270 // MIPS object files.) This function checks for that error. 271 static bool isCompatible(Ctx &ctx, InputFile *file) { 272 if (!file->isElf() && !isa<BitcodeFile>(file)) 273 return true; 274 275 if (file->ekind == ctx.arg.ekind && file->emachine == ctx.arg.emachine) { 276 if (ctx.arg.emachine != EM_MIPS) 277 return true; 278 if (isMipsN32Abi(ctx, *file) == ctx.arg.mipsN32Abi) 279 return true; 280 } 281 282 StringRef target = 283 !ctx.arg.bfdname.empty() ? ctx.arg.bfdname : ctx.arg.emulation; 284 if (!target.empty()) { 285 Err(ctx) << file << " is incompatible with " << target; 286 return false; 287 } 288 289 InputFile *existing = nullptr; 290 if (!ctx.objectFiles.empty()) 291 existing = ctx.objectFiles[0]; 292 else if (!ctx.sharedFiles.empty()) 293 existing = ctx.sharedFiles[0]; 294 else if (!ctx.bitcodeFiles.empty()) 295 existing = ctx.bitcodeFiles[0]; 296 auto diag = Err(ctx); 297 diag << file << " is incompatible"; 298 if (existing) 299 diag << " with " << existing; 300 return false; 301 } 302 303 template <class ELFT> static void doParseFile(Ctx &ctx, InputFile *file) { 304 if (!isCompatible(ctx, file)) 305 return; 306 307 // Lazy object file 308 if (file->lazy) { 309 if (auto *f = dyn_cast<BitcodeFile>(file)) { 310 ctx.lazyBitcodeFiles.push_back(f); 311 f->parseLazy(); 312 } else { 313 cast<ObjFile<ELFT>>(file)->parseLazy(); 314 } 315 return; 316 } 317 318 if (ctx.arg.trace) 319 Msg(ctx) << file; 320 321 if (file->kind() == InputFile::ObjKind) { 322 ctx.objectFiles.push_back(cast<ELFFileBase>(file)); 323 cast<ObjFile<ELFT>>(file)->parse(); 324 } else if (auto *f = dyn_cast<SharedFile>(file)) { 325 f->parse<ELFT>(); 326 } else if (auto *f = dyn_cast<BitcodeFile>(file)) { 327 ctx.bitcodeFiles.push_back(f); 328 f->parse(); 329 } else { 330 ctx.binaryFiles.push_back(cast<BinaryFile>(file)); 331 cast<BinaryFile>(file)->parse(); 332 } 333 } 334 335 // Add symbols in File to the symbol table. 336 void elf::parseFile(Ctx &ctx, InputFile *file) { 337 invokeELFT(doParseFile, ctx, file); 338 } 339 340 // This function is explicitly instantiated in ARM.cpp. Mark it extern here, 341 // to avoid warnings when building with MSVC. 342 extern template void ObjFile<ELF32LE>::importCmseSymbols(); 343 extern template void ObjFile<ELF32BE>::importCmseSymbols(); 344 extern template void ObjFile<ELF64LE>::importCmseSymbols(); 345 extern template void ObjFile<ELF64BE>::importCmseSymbols(); 346 347 template <class ELFT> 348 static void 349 doParseFiles(Ctx &ctx, 350 const SmallVector<std::unique_ptr<InputFile>, 0> &files) { 351 // Add all files to the symbol table. This will add almost all symbols that we 352 // need to the symbol table. This process might add files to the link due to 353 // addDependentLibrary. 354 for (size_t i = 0; i < files.size(); ++i) { 355 llvm::TimeTraceScope timeScope("Parse input files", files[i]->getName()); 356 doParseFile<ELFT>(ctx, files[i].get()); 357 } 358 if (ctx.driver.armCmseImpLib) 359 cast<ObjFile<ELFT>>(*ctx.driver.armCmseImpLib).importCmseSymbols(); 360 } 361 362 void elf::parseFiles(Ctx &ctx, 363 const SmallVector<std::unique_ptr<InputFile>, 0> &files) { 364 llvm::TimeTraceScope timeScope("Parse input files"); 365 invokeELFT(doParseFiles, ctx, files); 366 } 367 368 // Concatenates arguments to construct a string representing an error location. 369 StringRef InputFile::getNameForScript() const { 370 if (archiveName.empty()) 371 return getName(); 372 373 if (nameForScriptCache.empty()) 374 nameForScriptCache = (archiveName + Twine(':') + getName()).str(); 375 376 return nameForScriptCache; 377 } 378 379 // An ELF object file may contain a `.deplibs` section. If it exists, the 380 // section contains a list of library specifiers such as `m` for libm. This 381 // function resolves a given name by finding the first matching library checking 382 // the various ways that a library can be specified to LLD. This ELF extension 383 // is a form of autolinking and is called `dependent libraries`. It is currently 384 // unique to LLVM and lld. 385 static void addDependentLibrary(Ctx &ctx, StringRef specifier, 386 const InputFile *f) { 387 if (!ctx.arg.dependentLibraries) 388 return; 389 if (std::optional<std::string> s = searchLibraryBaseName(ctx, specifier)) 390 ctx.driver.addFile(ctx.saver.save(*s), /*withLOption=*/true); 391 else if (std::optional<std::string> s = findFromSearchPaths(ctx, specifier)) 392 ctx.driver.addFile(ctx.saver.save(*s), /*withLOption=*/true); 393 else if (fs::exists(specifier)) 394 ctx.driver.addFile(specifier, /*withLOption=*/false); 395 else 396 ErrAlways(ctx) 397 << f << ": unable to find library from dependent library specifier: " 398 << specifier; 399 } 400 401 // Record the membership of a section group so that in the garbage collection 402 // pass, section group members are kept or discarded as a unit. 403 template <class ELFT> 404 static void handleSectionGroup(ArrayRef<InputSectionBase *> sections, 405 ArrayRef<typename ELFT::Word> entries) { 406 bool hasAlloc = false; 407 for (uint32_t index : entries.slice(1)) { 408 if (index >= sections.size()) 409 return; 410 if (InputSectionBase *s = sections[index]) 411 if (s != &InputSection::discarded && s->flags & SHF_ALLOC) 412 hasAlloc = true; 413 } 414 415 // If any member has the SHF_ALLOC flag, the whole group is subject to garbage 416 // collection. See the comment in markLive(). This rule retains .debug_types 417 // and .rela.debug_types. 418 if (!hasAlloc) 419 return; 420 421 // Connect the members in a circular doubly-linked list via 422 // nextInSectionGroup. 423 InputSectionBase *head; 424 InputSectionBase *prev = nullptr; 425 for (uint32_t index : entries.slice(1)) { 426 InputSectionBase *s = sections[index]; 427 if (!s || s == &InputSection::discarded) 428 continue; 429 if (prev) 430 prev->nextInSectionGroup = s; 431 else 432 head = s; 433 prev = s; 434 } 435 if (prev) 436 prev->nextInSectionGroup = head; 437 } 438 439 template <class ELFT> void ObjFile<ELFT>::initDwarf() { 440 dwarf = std::make_unique<DWARFCache>(std::make_unique<DWARFContext>( 441 std::make_unique<LLDDwarfObj<ELFT>>(this), "", 442 [&](Error err) { Warn(ctx) << getName() + ": " << std::move(err); }, 443 [&](Error warning) { 444 Warn(ctx) << getName() << ": " << std::move(warning); 445 })); 446 } 447 448 DWARFCache *ELFFileBase::getDwarf() { 449 assert(fileKind == ObjKind); 450 llvm::call_once(initDwarf, [this]() { 451 switch (ekind) { 452 default: 453 llvm_unreachable(""); 454 case ELF32LEKind: 455 return cast<ObjFile<ELF32LE>>(this)->initDwarf(); 456 case ELF32BEKind: 457 return cast<ObjFile<ELF32BE>>(this)->initDwarf(); 458 case ELF64LEKind: 459 return cast<ObjFile<ELF64LE>>(this)->initDwarf(); 460 case ELF64BEKind: 461 return cast<ObjFile<ELF64BE>>(this)->initDwarf(); 462 } 463 }); 464 return dwarf.get(); 465 } 466 467 ELFFileBase::ELFFileBase(Ctx &ctx, Kind k, ELFKind ekind, MemoryBufferRef mb) 468 : InputFile(ctx, k, mb) { 469 this->ekind = ekind; 470 } 471 472 ELFFileBase::~ELFFileBase() {} 473 474 template <typename Elf_Shdr> 475 static const Elf_Shdr *findSection(ArrayRef<Elf_Shdr> sections, uint32_t type) { 476 for (const Elf_Shdr &sec : sections) 477 if (sec.sh_type == type) 478 return &sec; 479 return nullptr; 480 } 481 482 void ELFFileBase::init() { 483 switch (ekind) { 484 case ELF32LEKind: 485 init<ELF32LE>(fileKind); 486 break; 487 case ELF32BEKind: 488 init<ELF32BE>(fileKind); 489 break; 490 case ELF64LEKind: 491 init<ELF64LE>(fileKind); 492 break; 493 case ELF64BEKind: 494 init<ELF64BE>(fileKind); 495 break; 496 default: 497 llvm_unreachable("getELFKind"); 498 } 499 } 500 501 template <class ELFT> void ELFFileBase::init(InputFile::Kind k) { 502 using Elf_Shdr = typename ELFT::Shdr; 503 using Elf_Sym = typename ELFT::Sym; 504 505 // Initialize trivial attributes. 506 const ELFFile<ELFT> &obj = getObj<ELFT>(); 507 emachine = obj.getHeader().e_machine; 508 osabi = obj.getHeader().e_ident[llvm::ELF::EI_OSABI]; 509 abiVersion = obj.getHeader().e_ident[llvm::ELF::EI_ABIVERSION]; 510 511 ArrayRef<Elf_Shdr> sections = CHECK2(obj.sections(), this); 512 elfShdrs = sections.data(); 513 numELFShdrs = sections.size(); 514 515 // Find a symbol table. 516 const Elf_Shdr *symtabSec = 517 findSection(sections, k == SharedKind ? SHT_DYNSYM : SHT_SYMTAB); 518 519 if (!symtabSec) 520 return; 521 522 // Initialize members corresponding to a symbol table. 523 firstGlobal = symtabSec->sh_info; 524 525 ArrayRef<Elf_Sym> eSyms = CHECK2(obj.symbols(symtabSec), this); 526 if (firstGlobal == 0 || firstGlobal > eSyms.size()) 527 Fatal(ctx) << this << ": invalid sh_info in symbol table"; 528 529 elfSyms = reinterpret_cast<const void *>(eSyms.data()); 530 numSymbols = eSyms.size(); 531 stringTable = CHECK2(obj.getStringTableForSymtab(*symtabSec, sections), this); 532 } 533 534 template <class ELFT> 535 uint32_t ObjFile<ELFT>::getSectionIndex(const Elf_Sym &sym) const { 536 return CHECK2( 537 this->getObj().getSectionIndex(sym, getELFSyms<ELFT>(), shndxTable), 538 this); 539 } 540 541 template <class ELFT> void ObjFile<ELFT>::parse(bool ignoreComdats) { 542 object::ELFFile<ELFT> obj = this->getObj(); 543 // Read a section table. justSymbols is usually false. 544 if (this->justSymbols) { 545 initializeJustSymbols(); 546 initializeSymbols(obj); 547 return; 548 } 549 550 // Handle dependent libraries and selection of section groups as these are not 551 // done in parallel. 552 ArrayRef<Elf_Shdr> objSections = getELFShdrs<ELFT>(); 553 StringRef shstrtab = CHECK2(obj.getSectionStringTable(objSections), this); 554 uint64_t size = objSections.size(); 555 sections.resize(size); 556 for (size_t i = 0; i != size; ++i) { 557 const Elf_Shdr &sec = objSections[i]; 558 if (LLVM_LIKELY(sec.sh_type == SHT_PROGBITS)) 559 continue; 560 if (LLVM_LIKELY(sec.sh_type == SHT_GROUP)) { 561 StringRef signature = getShtGroupSignature(objSections, sec); 562 ArrayRef<Elf_Word> entries = 563 CHECK2(obj.template getSectionContentsAsArray<Elf_Word>(sec), this); 564 if (entries.empty()) 565 Fatal(ctx) << this << ": empty SHT_GROUP"; 566 567 Elf_Word flag = entries[0]; 568 if (flag && flag != GRP_COMDAT) 569 Fatal(ctx) << this << ": unsupported SHT_GROUP format"; 570 571 bool keepGroup = !flag || ignoreComdats || 572 ctx.symtab->comdatGroups 573 .try_emplace(CachedHashStringRef(signature), this) 574 .second; 575 if (keepGroup) { 576 if (!ctx.arg.resolveGroups) 577 sections[i] = createInputSection( 578 i, sec, check(obj.getSectionName(sec, shstrtab))); 579 } else { 580 // Otherwise, discard group members. 581 for (uint32_t secIndex : entries.slice(1)) { 582 if (secIndex >= size) 583 Fatal(ctx) << this 584 << ": invalid section index in group: " << secIndex; 585 sections[secIndex] = &InputSection::discarded; 586 } 587 } 588 continue; 589 } 590 591 if (sec.sh_type == SHT_LLVM_DEPENDENT_LIBRARIES && !ctx.arg.relocatable) { 592 StringRef name = check(obj.getSectionName(sec, shstrtab)); 593 ArrayRef<char> data = CHECK2( 594 this->getObj().template getSectionContentsAsArray<char>(sec), this); 595 if (!data.empty() && data.back() != '\0') { 596 Err(ctx) 597 << this 598 << ": corrupted dependent libraries section (unterminated string): " 599 << name; 600 } else { 601 for (const char *d = data.begin(), *e = data.end(); d < e;) { 602 StringRef s(d); 603 addDependentLibrary(ctx, s, this); 604 d += s.size() + 1; 605 } 606 } 607 sections[i] = &InputSection::discarded; 608 continue; 609 } 610 611 switch (ctx.arg.emachine) { 612 case EM_ARM: 613 if (sec.sh_type == SHT_ARM_ATTRIBUTES) { 614 ARMAttributeParser attributes; 615 ArrayRef<uint8_t> contents = 616 check(this->getObj().getSectionContents(sec)); 617 StringRef name = check(obj.getSectionName(sec, shstrtab)); 618 sections[i] = &InputSection::discarded; 619 if (Error e = attributes.parse(contents, ekind == ELF32LEKind 620 ? llvm::endianness::little 621 : llvm::endianness::big)) { 622 InputSection isec(*this, sec, name); 623 Warn(ctx) << &isec << ": " << std::move(e); 624 } else { 625 updateSupportedARMFeatures(ctx, attributes); 626 updateARMVFPArgs(ctx, attributes, this); 627 628 // FIXME: Retain the first attribute section we see. The eglibc ARM 629 // dynamic loaders require the presence of an attribute section for 630 // dlopen to work. In a full implementation we would merge all 631 // attribute sections. 632 if (ctx.in.attributes == nullptr) { 633 ctx.in.attributes = 634 std::make_unique<InputSection>(*this, sec, name); 635 sections[i] = ctx.in.attributes.get(); 636 } 637 } 638 } 639 break; 640 case EM_AARCH64: 641 // FIXME: BuildAttributes have been implemented in llvm, but not yet in 642 // lld. Remove the section so that it does not accumulate in the output 643 // file. When support is implemented we expect not to output a build 644 // attributes section in files of type ET_EXEC or ET_SHARED, but ld -r 645 // ouptut will need a single merged attributes section. 646 if (sec.sh_type == SHT_AARCH64_ATTRIBUTES) 647 sections[i] = &InputSection::discarded; 648 // Producing a static binary with MTE globals is not currently supported, 649 // remove all SHT_AARCH64_MEMTAG_GLOBALS_STATIC sections as they're unused 650 // medatada, and we don't want them to end up in the output file for 651 // static executables. 652 if (sec.sh_type == SHT_AARCH64_MEMTAG_GLOBALS_STATIC && 653 !canHaveMemtagGlobals(ctx)) 654 sections[i] = &InputSection::discarded; 655 break; 656 } 657 } 658 659 // Read a symbol table. 660 initializeSymbols(obj); 661 } 662 663 // Sections with SHT_GROUP and comdat bits define comdat section groups. 664 // They are identified and deduplicated by group name. This function 665 // returns a group name. 666 template <class ELFT> 667 StringRef ObjFile<ELFT>::getShtGroupSignature(ArrayRef<Elf_Shdr> sections, 668 const Elf_Shdr &sec) { 669 typename ELFT::SymRange symbols = this->getELFSyms<ELFT>(); 670 if (sec.sh_info >= symbols.size()) 671 Fatal(ctx) << this << ": invalid symbol index"; 672 const typename ELFT::Sym &sym = symbols[sec.sh_info]; 673 return CHECK2(sym.getName(this->stringTable), this); 674 } 675 676 template <class ELFT> 677 bool ObjFile<ELFT>::shouldMerge(const Elf_Shdr &sec, StringRef name) { 678 // On a regular link we don't merge sections if -O0 (default is -O1). This 679 // sometimes makes the linker significantly faster, although the output will 680 // be bigger. 681 // 682 // Doing the same for -r would create a problem as it would combine sections 683 // with different sh_entsize. One option would be to just copy every SHF_MERGE 684 // section as is to the output. While this would produce a valid ELF file with 685 // usable SHF_MERGE sections, tools like (llvm-)?dwarfdump get confused when 686 // they see two .debug_str. We could have separate logic for combining 687 // SHF_MERGE sections based both on their name and sh_entsize, but that seems 688 // to be more trouble than it is worth. Instead, we just use the regular (-O1) 689 // logic for -r. 690 if (ctx.arg.optimize == 0 && !ctx.arg.relocatable) 691 return false; 692 693 // A mergeable section with size 0 is useless because they don't have 694 // any data to merge. A mergeable string section with size 0 can be 695 // argued as invalid because it doesn't end with a null character. 696 // We'll avoid a mess by handling them as if they were non-mergeable. 697 if (sec.sh_size == 0) 698 return false; 699 700 // Check for sh_entsize. The ELF spec is not clear about the zero 701 // sh_entsize. It says that "the member [sh_entsize] contains 0 if 702 // the section does not hold a table of fixed-size entries". We know 703 // that Rust 1.13 produces a string mergeable section with a zero 704 // sh_entsize. Here we just accept it rather than being picky about it. 705 uint64_t entSize = sec.sh_entsize; 706 if (entSize == 0) 707 return false; 708 if (sec.sh_size % entSize) 709 ErrAlways(ctx) << this << ":(" << name << "): SHF_MERGE section size (" 710 << uint64_t(sec.sh_size) 711 << ") must be a multiple of sh_entsize (" << entSize << ")"; 712 if (sec.sh_flags & SHF_WRITE) 713 Err(ctx) << this << ":(" << name 714 << "): writable SHF_MERGE section is not supported"; 715 716 return true; 717 } 718 719 // This is for --just-symbols. 720 // 721 // --just-symbols is a very minor feature that allows you to link your 722 // output against other existing program, so that if you load both your 723 // program and the other program into memory, your output can refer the 724 // other program's symbols. 725 // 726 // When the option is given, we link "just symbols". The section table is 727 // initialized with null pointers. 728 template <class ELFT> void ObjFile<ELFT>::initializeJustSymbols() { 729 sections.resize(numELFShdrs); 730 } 731 732 static bool isKnownSpecificSectionType(uint32_t t, uint32_t flags) { 733 if (SHT_LOUSER <= t && t <= SHT_HIUSER && !(flags & SHF_ALLOC)) 734 return true; 735 if (SHT_LOOS <= t && t <= SHT_HIOS && !(flags & SHF_OS_NONCONFORMING)) 736 return true; 737 // Allow all processor-specific types. This is different from GNU ld. 738 return SHT_LOPROC <= t && t <= SHT_HIPROC; 739 } 740 741 template <class ELFT> 742 void ObjFile<ELFT>::initializeSections(bool ignoreComdats, 743 const llvm::object::ELFFile<ELFT> &obj) { 744 ArrayRef<Elf_Shdr> objSections = getELFShdrs<ELFT>(); 745 StringRef shstrtab = CHECK2(obj.getSectionStringTable(objSections), this); 746 uint64_t size = objSections.size(); 747 SmallVector<ArrayRef<Elf_Word>, 0> selectedGroups; 748 for (size_t i = 0; i != size; ++i) { 749 if (this->sections[i] == &InputSection::discarded) 750 continue; 751 const Elf_Shdr &sec = objSections[i]; 752 const uint32_t type = sec.sh_type; 753 754 // SHF_EXCLUDE'ed sections are discarded by the linker. However, 755 // if -r is given, we'll let the final link discard such sections. 756 // This is compatible with GNU. 757 if ((sec.sh_flags & SHF_EXCLUDE) && !ctx.arg.relocatable) { 758 if (type == SHT_LLVM_CALL_GRAPH_PROFILE) 759 cgProfileSectionIndex = i; 760 if (type == SHT_LLVM_ADDRSIG) { 761 // We ignore the address-significance table if we know that the object 762 // file was created by objcopy or ld -r. This is because these tools 763 // will reorder the symbols in the symbol table, invalidating the data 764 // in the address-significance table, which refers to symbols by index. 765 if (sec.sh_link != 0) 766 this->addrsigSec = &sec; 767 else if (ctx.arg.icf == ICFLevel::Safe) 768 Warn(ctx) << this 769 << ": --icf=safe conservatively ignores " 770 "SHT_LLVM_ADDRSIG [index " 771 << i 772 << "] with sh_link=0 " 773 "(likely created using objcopy or ld -r)"; 774 } 775 this->sections[i] = &InputSection::discarded; 776 continue; 777 } 778 779 switch (type) { 780 case SHT_GROUP: { 781 if (!ctx.arg.relocatable) 782 sections[i] = &InputSection::discarded; 783 StringRef signature = 784 cantFail(this->getELFSyms<ELFT>()[sec.sh_info].getName(stringTable)); 785 ArrayRef<Elf_Word> entries = 786 cantFail(obj.template getSectionContentsAsArray<Elf_Word>(sec)); 787 if ((entries[0] & GRP_COMDAT) == 0 || ignoreComdats || 788 ctx.symtab->comdatGroups.find(CachedHashStringRef(signature)) 789 ->second == this) 790 selectedGroups.push_back(entries); 791 break; 792 } 793 case SHT_SYMTAB_SHNDX: 794 shndxTable = CHECK2(obj.getSHNDXTable(sec, objSections), this); 795 break; 796 case SHT_SYMTAB: 797 case SHT_STRTAB: 798 case SHT_REL: 799 case SHT_RELA: 800 case SHT_CREL: 801 case SHT_NULL: 802 break; 803 case SHT_PROGBITS: 804 case SHT_NOTE: 805 case SHT_NOBITS: 806 case SHT_INIT_ARRAY: 807 case SHT_FINI_ARRAY: 808 case SHT_PREINIT_ARRAY: 809 this->sections[i] = 810 createInputSection(i, sec, check(obj.getSectionName(sec, shstrtab))); 811 break; 812 case SHT_LLVM_LTO: 813 // Discard .llvm.lto in a relocatable link that does not use the bitcode. 814 // The concatenated output does not properly reflect the linking 815 // semantics. In addition, since we do not use the bitcode wrapper format, 816 // the concatenated raw bitcode would be invalid. 817 if (ctx.arg.relocatable && !ctx.arg.fatLTOObjects) { 818 sections[i] = &InputSection::discarded; 819 break; 820 } 821 [[fallthrough]]; 822 default: 823 this->sections[i] = 824 createInputSection(i, sec, check(obj.getSectionName(sec, shstrtab))); 825 if (type == SHT_LLVM_SYMPART) 826 ctx.hasSympart.store(true, std::memory_order_relaxed); 827 else if (ctx.arg.rejectMismatch && 828 !isKnownSpecificSectionType(type, sec.sh_flags)) 829 Err(ctx) << this->sections[i] << ": unknown section type 0x" 830 << Twine::utohexstr(type); 831 break; 832 } 833 } 834 835 // We have a second loop. It is used to: 836 // 1) handle SHF_LINK_ORDER sections. 837 // 2) create relocation sections. In some cases the section header index of a 838 // relocation section may be smaller than that of the relocated section. In 839 // such cases, the relocation section would attempt to reference a target 840 // section that has not yet been created. For simplicity, delay creation of 841 // relocation sections until now. 842 for (size_t i = 0; i != size; ++i) { 843 if (this->sections[i] == &InputSection::discarded) 844 continue; 845 const Elf_Shdr &sec = objSections[i]; 846 847 if (isStaticRelSecType(sec.sh_type)) { 848 // Find a relocation target section and associate this section with that. 849 // Target may have been discarded if it is in a different section group 850 // and the group is discarded, even though it's a violation of the spec. 851 // We handle that situation gracefully by discarding dangling relocation 852 // sections. 853 const uint32_t info = sec.sh_info; 854 InputSectionBase *s = getRelocTarget(i, info); 855 if (!s) 856 continue; 857 858 // ELF spec allows mergeable sections with relocations, but they are rare, 859 // and it is in practice hard to merge such sections by contents, because 860 // applying relocations at end of linking changes section contents. So, we 861 // simply handle such sections as non-mergeable ones. Degrading like this 862 // is acceptable because section merging is optional. 863 if (auto *ms = dyn_cast<MergeInputSection>(s)) { 864 s = makeThreadLocal<InputSection>(ms->file, ms->name, ms->type, 865 ms->flags, ms->addralign, ms->entsize, 866 ms->contentMaybeDecompress()); 867 sections[info] = s; 868 } 869 870 if (s->relSecIdx != 0) 871 ErrAlways(ctx) << s 872 << ": multiple relocation sections to one section are " 873 "not supported"; 874 s->relSecIdx = i; 875 876 // Relocation sections are usually removed from the output, so return 877 // `nullptr` for the normal case. However, if -r or --emit-relocs is 878 // specified, we need to copy them to the output. (Some post link analysis 879 // tools specify --emit-relocs to obtain the information.) 880 if (ctx.arg.copyRelocs) { 881 auto *isec = makeThreadLocal<InputSection>( 882 *this, sec, check(obj.getSectionName(sec, shstrtab))); 883 // If the relocated section is discarded (due to /DISCARD/ or 884 // --gc-sections), the relocation section should be discarded as well. 885 s->dependentSections.push_back(isec); 886 sections[i] = isec; 887 } 888 continue; 889 } 890 891 // A SHF_LINK_ORDER section with sh_link=0 is handled as if it did not have 892 // the flag. 893 if (!sec.sh_link || !(sec.sh_flags & SHF_LINK_ORDER)) 894 continue; 895 896 InputSectionBase *linkSec = nullptr; 897 if (sec.sh_link < size) 898 linkSec = this->sections[sec.sh_link]; 899 if (!linkSec) { 900 ErrAlways(ctx) << this 901 << ": invalid sh_link index: " << uint32_t(sec.sh_link); 902 continue; 903 } 904 905 // A SHF_LINK_ORDER section is discarded if its linked-to section is 906 // discarded. 907 InputSection *isec = cast<InputSection>(this->sections[i]); 908 linkSec->dependentSections.push_back(isec); 909 if (!isa<InputSection>(linkSec)) 910 ErrAlways(ctx) 911 << "a section " << isec->name 912 << " with SHF_LINK_ORDER should not refer a non-regular section: " 913 << linkSec; 914 } 915 916 for (ArrayRef<Elf_Word> entries : selectedGroups) 917 handleSectionGroup<ELFT>(this->sections, entries); 918 } 919 920 template <typename ELFT> 921 static void parseGnuPropertyNote(Ctx &ctx, ELFFileBase &f, 922 uint32_t featureAndType, 923 ArrayRef<uint8_t> &desc, const uint8_t *base, 924 ArrayRef<uint8_t> *data = nullptr) { 925 auto err = [&](const uint8_t *place) -> ELFSyncStream { 926 auto diag = Err(ctx); 927 diag << &f << ":(" << ".note.gnu.property+0x" 928 << Twine::utohexstr(place - base) << "): "; 929 return diag; 930 }; 931 932 while (!desc.empty()) { 933 const uint8_t *place = desc.data(); 934 if (desc.size() < 8) 935 return void(err(place) << "program property is too short"); 936 uint32_t type = read32<ELFT::Endianness>(desc.data()); 937 uint32_t size = read32<ELFT::Endianness>(desc.data() + 4); 938 desc = desc.slice(8); 939 if (desc.size() < size) 940 return void(err(place) << "program property is too short"); 941 942 if (type == featureAndType) { 943 // We found a FEATURE_1_AND field. There may be more than one of these 944 // in a .note.gnu.property section, for a relocatable object we 945 // accumulate the bits set. 946 if (size < 4) 947 return void(err(place) << "FEATURE_1_AND entry is too short"); 948 f.andFeatures |= read32<ELFT::Endianness>(desc.data()); 949 } else if (ctx.arg.emachine == EM_AARCH64 && 950 type == GNU_PROPERTY_AARCH64_FEATURE_PAUTH) { 951 ArrayRef<uint8_t> contents = data ? *data : desc; 952 if (f.aarch64PauthAbiCoreInfo) { 953 return void( 954 err(contents.data()) 955 << "multiple GNU_PROPERTY_AARCH64_FEATURE_PAUTH entries are " 956 "not supported"); 957 } else if (size != 16) { 958 return void(err(contents.data()) 959 << "GNU_PROPERTY_AARCH64_FEATURE_PAUTH entry " 960 "is invalid: expected 16 bytes, but got " 961 << size); 962 } 963 f.aarch64PauthAbiCoreInfo = { 964 support::endian::read64<ELFT::Endianness>(&desc[0]), 965 support::endian::read64<ELFT::Endianness>(&desc[8])}; 966 } 967 968 // Padding is present in the note descriptor, if necessary. 969 desc = desc.slice(alignTo<(ELFT::Is64Bits ? 8 : 4)>(size)); 970 } 971 } 972 // Read the following info from the .note.gnu.property section and write it to 973 // the corresponding fields in `ObjFile`: 974 // - Feature flags (32 bits) representing x86, AArch64 or RISC-V features for 975 // hardware-assisted call flow control; 976 // - AArch64 PAuth ABI core info (16 bytes). 977 template <class ELFT> 978 static void readGnuProperty(Ctx &ctx, const InputSection &sec, 979 ObjFile<ELFT> &f) { 980 using Elf_Nhdr = typename ELFT::Nhdr; 981 using Elf_Note = typename ELFT::Note; 982 983 uint32_t featureAndType; 984 switch (ctx.arg.emachine) { 985 case EM_386: 986 case EM_X86_64: 987 featureAndType = GNU_PROPERTY_X86_FEATURE_1_AND; 988 break; 989 case EM_AARCH64: 990 featureAndType = GNU_PROPERTY_AARCH64_FEATURE_1_AND; 991 break; 992 case EM_RISCV: 993 featureAndType = GNU_PROPERTY_RISCV_FEATURE_1_AND; 994 break; 995 default: 996 return; 997 } 998 999 ArrayRef<uint8_t> data = sec.content(); 1000 auto err = [&](const uint8_t *place) -> ELFSyncStream { 1001 auto diag = Err(ctx); 1002 diag << sec.file << ":(" << sec.name << "+0x" 1003 << Twine::utohexstr(place - sec.content().data()) << "): "; 1004 return diag; 1005 }; 1006 while (!data.empty()) { 1007 // Read one NOTE record. 1008 auto *nhdr = reinterpret_cast<const Elf_Nhdr *>(data.data()); 1009 if (data.size() < sizeof(Elf_Nhdr) || 1010 data.size() < nhdr->getSize(sec.addralign)) 1011 return void(err(data.data()) << "data is too short"); 1012 1013 Elf_Note note(*nhdr); 1014 if (nhdr->n_type != NT_GNU_PROPERTY_TYPE_0 || note.getName() != "GNU") { 1015 data = data.slice(nhdr->getSize(sec.addralign)); 1016 continue; 1017 } 1018 1019 // Read a body of a NOTE record, which consists of type-length-value fields. 1020 ArrayRef<uint8_t> desc = note.getDesc(sec.addralign); 1021 const uint8_t *base = sec.content().data(); 1022 parseGnuPropertyNote<ELFT>(ctx, f, featureAndType, desc, base, &data); 1023 1024 // Go to next NOTE record to look for more FEATURE_1_AND descriptions. 1025 data = data.slice(nhdr->getSize(sec.addralign)); 1026 } 1027 } 1028 1029 template <class ELFT> 1030 InputSectionBase *ObjFile<ELFT>::getRelocTarget(uint32_t idx, uint32_t info) { 1031 if (info < this->sections.size()) { 1032 InputSectionBase *target = this->sections[info]; 1033 1034 // Strictly speaking, a relocation section must be included in the 1035 // group of the section it relocates. However, LLVM 3.3 and earlier 1036 // would fail to do so, so we gracefully handle that case. 1037 if (target == &InputSection::discarded) 1038 return nullptr; 1039 1040 if (target != nullptr) 1041 return target; 1042 } 1043 1044 Err(ctx) << this << ": relocation section (index " << idx 1045 << ") has invalid sh_info (" << info << ')'; 1046 return nullptr; 1047 } 1048 1049 // The function may be called concurrently for different input files. For 1050 // allocation, prefer makeThreadLocal which does not require holding a lock. 1051 template <class ELFT> 1052 InputSectionBase *ObjFile<ELFT>::createInputSection(uint32_t idx, 1053 const Elf_Shdr &sec, 1054 StringRef name) { 1055 if (name.starts_with(".n")) { 1056 // The GNU linker uses .note.GNU-stack section as a marker indicating 1057 // that the code in the object file does not expect that the stack is 1058 // executable (in terms of NX bit). If all input files have the marker, 1059 // the GNU linker adds a PT_GNU_STACK segment to tells the loader to 1060 // make the stack non-executable. Most object files have this section as 1061 // of 2017. 1062 // 1063 // But making the stack non-executable is a norm today for security 1064 // reasons. Failure to do so may result in a serious security issue. 1065 // Therefore, we make LLD always add PT_GNU_STACK unless it is 1066 // explicitly told to do otherwise (by -z execstack). Because the stack 1067 // executable-ness is controlled solely by command line options, 1068 // .note.GNU-stack sections are, with one exception, ignored. Report 1069 // an error if we encounter an executable .note.GNU-stack to force the 1070 // user to explicitly request an executable stack. 1071 if (name == ".note.GNU-stack") { 1072 if ((sec.sh_flags & SHF_EXECINSTR) && !ctx.arg.relocatable && 1073 ctx.arg.zGnustack != GnuStackKind::Exec) { 1074 Err(ctx) << this 1075 << ": requires an executable stack, but -z execstack is not " 1076 "specified"; 1077 } 1078 return &InputSection::discarded; 1079 } 1080 1081 // Object files that use processor features such as Intel Control-Flow 1082 // Enforcement (CET), AArch64 Branch Target Identification BTI or RISC-V 1083 // Zicfilp/Zicfiss extensions, use a .note.gnu.property section containing 1084 // a bitfield of feature bits like the GNU_PROPERTY_X86_FEATURE_1_IBT flag. 1085 // 1086 // Since we merge bitmaps from multiple object files to create a new 1087 // .note.gnu.property containing a single AND'ed bitmap, we discard an input 1088 // file's .note.gnu.property section. 1089 if (name == ".note.gnu.property") { 1090 readGnuProperty<ELFT>(ctx, InputSection(*this, sec, name), *this); 1091 return &InputSection::discarded; 1092 } 1093 1094 // Split stacks is a feature to support a discontiguous stack, 1095 // commonly used in the programming language Go. For the details, 1096 // see https://gcc.gnu.org/wiki/SplitStacks. An object file compiled 1097 // for split stack will include a .note.GNU-split-stack section. 1098 if (name == ".note.GNU-split-stack") { 1099 if (ctx.arg.relocatable) { 1100 ErrAlways(ctx) << "cannot mix split-stack and non-split-stack in a " 1101 "relocatable link"; 1102 return &InputSection::discarded; 1103 } 1104 this->splitStack = true; 1105 return &InputSection::discarded; 1106 } 1107 1108 // An object file compiled for split stack, but where some of the 1109 // functions were compiled with the no_split_stack_attribute will 1110 // include a .note.GNU-no-split-stack section. 1111 if (name == ".note.GNU-no-split-stack") { 1112 this->someNoSplitStack = true; 1113 return &InputSection::discarded; 1114 } 1115 1116 // Strip existing .note.gnu.build-id sections so that the output won't have 1117 // more than one build-id. This is not usually a problem because input 1118 // object files normally don't have .build-id sections, but you can create 1119 // such files by "ld.{bfd,gold,lld} -r --build-id", and we want to guard 1120 // against it. 1121 if (name == ".note.gnu.build-id") 1122 return &InputSection::discarded; 1123 } 1124 1125 // The linker merges EH (exception handling) frames and creates a 1126 // .eh_frame_hdr section for runtime. So we handle them with a special 1127 // class. For relocatable outputs, they are just passed through. 1128 if (name == ".eh_frame" && !ctx.arg.relocatable) 1129 return makeThreadLocal<EhInputSection>(*this, sec, name); 1130 1131 if ((sec.sh_flags & SHF_MERGE) && shouldMerge(sec, name)) 1132 return makeThreadLocal<MergeInputSection>(*this, sec, name); 1133 return makeThreadLocal<InputSection>(*this, sec, name); 1134 } 1135 1136 // Initialize symbols. symbols is a parallel array to the corresponding ELF 1137 // symbol table. 1138 template <class ELFT> 1139 void ObjFile<ELFT>::initializeSymbols(const object::ELFFile<ELFT> &obj) { 1140 ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>(); 1141 if (!symbols) 1142 symbols = std::make_unique<Symbol *[]>(numSymbols); 1143 1144 // Some entries have been filled by LazyObjFile. 1145 auto *symtab = ctx.symtab.get(); 1146 for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) 1147 if (!symbols[i]) 1148 symbols[i] = symtab->insert(CHECK2(eSyms[i].getName(stringTable), this)); 1149 1150 // Perform symbol resolution on non-local symbols. 1151 SmallVector<unsigned, 32> undefineds; 1152 for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) { 1153 const Elf_Sym &eSym = eSyms[i]; 1154 uint32_t secIdx = eSym.st_shndx; 1155 if (secIdx == SHN_UNDEF) { 1156 undefineds.push_back(i); 1157 continue; 1158 } 1159 1160 uint8_t binding = eSym.getBinding(); 1161 uint8_t stOther = eSym.st_other; 1162 uint8_t type = eSym.getType(); 1163 uint64_t value = eSym.st_value; 1164 uint64_t size = eSym.st_size; 1165 1166 Symbol *sym = symbols[i]; 1167 sym->isUsedInRegularObj = true; 1168 if (LLVM_UNLIKELY(eSym.st_shndx == SHN_COMMON)) { 1169 if (value == 0 || value >= UINT32_MAX) 1170 Err(ctx) << this << ": common symbol '" << sym->getName() 1171 << "' has invalid alignment: " << value; 1172 hasCommonSyms = true; 1173 sym->resolve(ctx, CommonSymbol{ctx, this, StringRef(), binding, stOther, 1174 type, value, size}); 1175 continue; 1176 } 1177 1178 // Handle global defined symbols. Defined::section will be set in postParse. 1179 sym->resolve(ctx, Defined{ctx, this, StringRef(), binding, stOther, type, 1180 value, size, nullptr}); 1181 } 1182 1183 // Undefined symbols (excluding those defined relative to non-prevailing 1184 // sections) can trigger recursive extract. Process defined symbols first so 1185 // that the relative order between a defined symbol and an undefined symbol 1186 // does not change the symbol resolution behavior. In addition, a set of 1187 // interconnected symbols will all be resolved to the same file, instead of 1188 // being resolved to different files. 1189 for (unsigned i : undefineds) { 1190 const Elf_Sym &eSym = eSyms[i]; 1191 Symbol *sym = symbols[i]; 1192 sym->resolve(ctx, Undefined{this, StringRef(), eSym.getBinding(), 1193 eSym.st_other, eSym.getType()}); 1194 sym->isUsedInRegularObj = true; 1195 sym->referenced = true; 1196 } 1197 } 1198 1199 template <class ELFT> 1200 void ObjFile<ELFT>::initSectionsAndLocalSyms(bool ignoreComdats) { 1201 if (!justSymbols) 1202 initializeSections(ignoreComdats, getObj()); 1203 1204 if (!firstGlobal) 1205 return; 1206 SymbolUnion *locals = makeThreadLocalN<SymbolUnion>(firstGlobal); 1207 memset(locals, 0, sizeof(SymbolUnion) * firstGlobal); 1208 1209 ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>(); 1210 for (size_t i = 0, end = firstGlobal; i != end; ++i) { 1211 const Elf_Sym &eSym = eSyms[i]; 1212 uint32_t secIdx = eSym.st_shndx; 1213 if (LLVM_UNLIKELY(secIdx == SHN_XINDEX)) 1214 secIdx = check(getExtendedSymbolTableIndex<ELFT>(eSym, i, shndxTable)); 1215 else if (secIdx >= SHN_LORESERVE) 1216 secIdx = 0; 1217 if (LLVM_UNLIKELY(secIdx >= sections.size())) { 1218 Err(ctx) << this << ": invalid section index: " << secIdx; 1219 secIdx = 0; 1220 } 1221 if (LLVM_UNLIKELY(eSym.getBinding() != STB_LOCAL)) 1222 ErrAlways(ctx) << this << ": non-local symbol (" << i 1223 << ") found at index < .symtab's sh_info (" << end << ")"; 1224 1225 InputSectionBase *sec = sections[secIdx]; 1226 uint8_t type = eSym.getType(); 1227 if (type == STT_FILE) 1228 sourceFile = CHECK2(eSym.getName(stringTable), this); 1229 unsigned stName = eSym.st_name; 1230 if (LLVM_UNLIKELY(stringTable.size() <= stName)) { 1231 Err(ctx) << this << ": invalid symbol name offset"; 1232 stName = 0; 1233 } 1234 StringRef name(stringTable.data() + stName); 1235 1236 symbols[i] = reinterpret_cast<Symbol *>(locals + i); 1237 if (eSym.st_shndx == SHN_UNDEF || sec == &InputSection::discarded) 1238 new (symbols[i]) Undefined(this, name, STB_LOCAL, eSym.st_other, type, 1239 /*discardedSecIdx=*/secIdx); 1240 else 1241 new (symbols[i]) Defined(ctx, this, name, STB_LOCAL, eSym.st_other, type, 1242 eSym.st_value, eSym.st_size, sec); 1243 symbols[i]->partition = 1; 1244 symbols[i]->isUsedInRegularObj = true; 1245 } 1246 } 1247 1248 // Called after all ObjFile::parse is called for all ObjFiles. This checks 1249 // duplicate symbols and may do symbol property merge in the future. 1250 template <class ELFT> void ObjFile<ELFT>::postParse() { 1251 static std::mutex mu; 1252 ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>(); 1253 for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) { 1254 const Elf_Sym &eSym = eSyms[i]; 1255 Symbol &sym = *symbols[i]; 1256 uint32_t secIdx = eSym.st_shndx; 1257 uint8_t binding = eSym.getBinding(); 1258 if (LLVM_UNLIKELY(binding != STB_GLOBAL && binding != STB_WEAK && 1259 binding != STB_GNU_UNIQUE)) 1260 Err(ctx) << this << ": symbol (" << i 1261 << ") has invalid binding: " << (int)binding; 1262 1263 // st_value of STT_TLS represents the assigned offset, not the actual 1264 // address which is used by STT_FUNC and STT_OBJECT. STT_TLS symbols can 1265 // only be referenced by special TLS relocations. It is usually an error if 1266 // a STT_TLS symbol is replaced by a non-STT_TLS symbol, vice versa. 1267 if (LLVM_UNLIKELY(sym.isTls()) && eSym.getType() != STT_TLS && 1268 eSym.getType() != STT_NOTYPE) 1269 Err(ctx) << "TLS attribute mismatch: " << &sym << "\n>>> in " << sym.file 1270 << "\n>>> in " << this; 1271 1272 // Handle non-COMMON defined symbol below. !sym.file allows a symbol 1273 // assignment to redefine a symbol without an error. 1274 if (!sym.isDefined() || secIdx == SHN_UNDEF) 1275 continue; 1276 if (LLVM_UNLIKELY(secIdx >= SHN_LORESERVE)) { 1277 if (secIdx == SHN_COMMON) 1278 continue; 1279 if (secIdx == SHN_XINDEX) 1280 secIdx = check(getExtendedSymbolTableIndex<ELFT>(eSym, i, shndxTable)); 1281 else 1282 secIdx = 0; 1283 } 1284 1285 if (LLVM_UNLIKELY(secIdx >= sections.size())) { 1286 Err(ctx) << this << ": invalid section index: " << secIdx; 1287 continue; 1288 } 1289 InputSectionBase *sec = sections[secIdx]; 1290 if (sec == &InputSection::discarded) { 1291 if (sym.traced) { 1292 printTraceSymbol(Undefined{this, sym.getName(), sym.binding, 1293 sym.stOther, sym.type, secIdx}, 1294 sym.getName()); 1295 } 1296 if (sym.file == this) { 1297 std::lock_guard<std::mutex> lock(mu); 1298 ctx.nonPrevailingSyms.emplace_back(&sym, secIdx); 1299 } 1300 continue; 1301 } 1302 1303 if (sym.file == this) { 1304 cast<Defined>(sym).section = sec; 1305 continue; 1306 } 1307 1308 if (sym.binding == STB_WEAK || binding == STB_WEAK) 1309 continue; 1310 std::lock_guard<std::mutex> lock(mu); 1311 ctx.duplicates.push_back({&sym, this, sec, eSym.st_value}); 1312 } 1313 } 1314 1315 // The handling of tentative definitions (COMMON symbols) in archives is murky. 1316 // A tentative definition will be promoted to a global definition if there are 1317 // no non-tentative definitions to dominate it. When we hold a tentative 1318 // definition to a symbol and are inspecting archive members for inclusion 1319 // there are 2 ways we can proceed: 1320 // 1321 // 1) Consider the tentative definition a 'real' definition (ie promotion from 1322 // tentative to real definition has already happened) and not inspect 1323 // archive members for Global/Weak definitions to replace the tentative 1324 // definition. An archive member would only be included if it satisfies some 1325 // other undefined symbol. This is the behavior Gold uses. 1326 // 1327 // 2) Consider the tentative definition as still undefined (ie the promotion to 1328 // a real definition happens only after all symbol resolution is done). 1329 // The linker searches archive members for STB_GLOBAL definitions to 1330 // replace the tentative definition with. This is the behavior used by 1331 // GNU ld. 1332 // 1333 // The second behavior is inherited from SysVR4, which based it on the FORTRAN 1334 // COMMON BLOCK model. This behavior is needed for proper initialization in old 1335 // (pre F90) FORTRAN code that is packaged into an archive. 1336 // 1337 // The following functions search archive members for definitions to replace 1338 // tentative definitions (implementing behavior 2). 1339 static bool isBitcodeNonCommonDef(MemoryBufferRef mb, StringRef symName, 1340 StringRef archiveName) { 1341 IRSymtabFile symtabFile = check(readIRSymtab(mb)); 1342 for (const irsymtab::Reader::SymbolRef &sym : 1343 symtabFile.TheReader.symbols()) { 1344 if (sym.isGlobal() && sym.getName() == symName) 1345 return !sym.isUndefined() && !sym.isWeak() && !sym.isCommon(); 1346 } 1347 return false; 1348 } 1349 1350 template <class ELFT> 1351 static bool isNonCommonDef(Ctx &ctx, ELFKind ekind, MemoryBufferRef mb, 1352 StringRef symName, StringRef archiveName) { 1353 ObjFile<ELFT> *obj = make<ObjFile<ELFT>>(ctx, ekind, mb, archiveName); 1354 obj->init(); 1355 StringRef stringtable = obj->getStringTable(); 1356 1357 for (auto sym : obj->template getGlobalELFSyms<ELFT>()) { 1358 Expected<StringRef> name = sym.getName(stringtable); 1359 if (name && name.get() == symName) 1360 return sym.isDefined() && sym.getBinding() == STB_GLOBAL && 1361 !sym.isCommon(); 1362 } 1363 return false; 1364 } 1365 1366 static bool isNonCommonDef(Ctx &ctx, MemoryBufferRef mb, StringRef symName, 1367 StringRef archiveName) { 1368 switch (getELFKind(ctx, mb, archiveName)) { 1369 case ELF32LEKind: 1370 return isNonCommonDef<ELF32LE>(ctx, ELF32LEKind, mb, symName, archiveName); 1371 case ELF32BEKind: 1372 return isNonCommonDef<ELF32BE>(ctx, ELF32BEKind, mb, symName, archiveName); 1373 case ELF64LEKind: 1374 return isNonCommonDef<ELF64LE>(ctx, ELF64LEKind, mb, symName, archiveName); 1375 case ELF64BEKind: 1376 return isNonCommonDef<ELF64BE>(ctx, ELF64BEKind, mb, symName, archiveName); 1377 default: 1378 llvm_unreachable("getELFKind"); 1379 } 1380 } 1381 1382 SharedFile::SharedFile(Ctx &ctx, MemoryBufferRef m, StringRef defaultSoName) 1383 : ELFFileBase(ctx, SharedKind, getELFKind(ctx, m, ""), m), 1384 soName(defaultSoName), isNeeded(!ctx.arg.asNeeded) {} 1385 1386 // Parse the version definitions in the object file if present, and return a 1387 // vector whose nth element contains a pointer to the Elf_Verdef for version 1388 // identifier n. Version identifiers that are not definitions map to nullptr. 1389 template <typename ELFT> 1390 static SmallVector<const void *, 0> 1391 parseVerdefs(const uint8_t *base, const typename ELFT::Shdr *sec) { 1392 if (!sec) 1393 return {}; 1394 1395 // Build the Verdefs array by following the chain of Elf_Verdef objects 1396 // from the start of the .gnu.version_d section. 1397 SmallVector<const void *, 0> verdefs; 1398 const uint8_t *verdef = base + sec->sh_offset; 1399 for (unsigned i = 0, e = sec->sh_info; i != e; ++i) { 1400 auto *curVerdef = reinterpret_cast<const typename ELFT::Verdef *>(verdef); 1401 verdef += curVerdef->vd_next; 1402 unsigned verdefIndex = curVerdef->vd_ndx; 1403 if (verdefIndex >= verdefs.size()) 1404 verdefs.resize(verdefIndex + 1); 1405 verdefs[verdefIndex] = curVerdef; 1406 } 1407 return verdefs; 1408 } 1409 1410 // Parse SHT_GNU_verneed to properly set the name of a versioned undefined 1411 // symbol. We detect fatal issues which would cause vulnerabilities, but do not 1412 // implement sophisticated error checking like in llvm-readobj because the value 1413 // of such diagnostics is low. 1414 template <typename ELFT> 1415 std::vector<uint32_t> SharedFile::parseVerneed(const ELFFile<ELFT> &obj, 1416 const typename ELFT::Shdr *sec) { 1417 if (!sec) 1418 return {}; 1419 std::vector<uint32_t> verneeds; 1420 ArrayRef<uint8_t> data = CHECK2(obj.getSectionContents(*sec), this); 1421 const uint8_t *verneedBuf = data.begin(); 1422 for (unsigned i = 0; i != sec->sh_info; ++i) { 1423 if (verneedBuf + sizeof(typename ELFT::Verneed) > data.end()) { 1424 Err(ctx) << this << " has an invalid Verneed"; 1425 break; 1426 } 1427 auto *vn = reinterpret_cast<const typename ELFT::Verneed *>(verneedBuf); 1428 const uint8_t *vernauxBuf = verneedBuf + vn->vn_aux; 1429 for (unsigned j = 0; j != vn->vn_cnt; ++j) { 1430 if (vernauxBuf + sizeof(typename ELFT::Vernaux) > data.end()) { 1431 Err(ctx) << this << " has an invalid Vernaux"; 1432 break; 1433 } 1434 auto *aux = reinterpret_cast<const typename ELFT::Vernaux *>(vernauxBuf); 1435 if (aux->vna_name >= this->stringTable.size()) { 1436 Err(ctx) << this << " has a Vernaux with an invalid vna_name"; 1437 break; 1438 } 1439 uint16_t version = aux->vna_other & VERSYM_VERSION; 1440 if (version >= verneeds.size()) 1441 verneeds.resize(version + 1); 1442 verneeds[version] = aux->vna_name; 1443 vernauxBuf += aux->vna_next; 1444 } 1445 verneedBuf += vn->vn_next; 1446 } 1447 return verneeds; 1448 } 1449 1450 // Parse PT_GNU_PROPERTY segments in DSO. The process is similar to 1451 // readGnuProperty, but we don't have the InputSection information. 1452 template <typename ELFT> 1453 void SharedFile::parseGnuAndFeatures(const ELFFile<ELFT> &obj) { 1454 if (ctx.arg.emachine != EM_AARCH64) 1455 return; 1456 const uint8_t *base = obj.base(); 1457 auto phdrs = CHECK2(obj.program_headers(), this); 1458 for (auto phdr : phdrs) { 1459 if (phdr.p_type != PT_GNU_PROPERTY) 1460 continue; 1461 typename ELFT::Note note( 1462 *reinterpret_cast<const typename ELFT::Nhdr *>(base + phdr.p_offset)); 1463 if (note.getType() != NT_GNU_PROPERTY_TYPE_0 || note.getName() != "GNU") 1464 continue; 1465 1466 ArrayRef<uint8_t> desc = note.getDesc(phdr.p_align); 1467 parseGnuPropertyNote<ELFT>(ctx, *this, GNU_PROPERTY_AARCH64_FEATURE_1_AND, 1468 desc, base); 1469 } 1470 } 1471 1472 // We do not usually care about alignments of data in shared object 1473 // files because the loader takes care of it. However, if we promote a 1474 // DSO symbol to point to .bss due to copy relocation, we need to keep 1475 // the original alignment requirements. We infer it in this function. 1476 template <typename ELFT> 1477 static uint64_t getAlignment(ArrayRef<typename ELFT::Shdr> sections, 1478 const typename ELFT::Sym &sym) { 1479 uint64_t ret = UINT64_MAX; 1480 if (sym.st_value) 1481 ret = 1ULL << llvm::countr_zero((uint64_t)sym.st_value); 1482 if (0 < sym.st_shndx && sym.st_shndx < sections.size()) 1483 ret = std::min<uint64_t>(ret, sections[sym.st_shndx].sh_addralign); 1484 return (ret > UINT32_MAX) ? 0 : ret; 1485 } 1486 1487 // Fully parse the shared object file. 1488 // 1489 // This function parses symbol versions. If a DSO has version information, 1490 // the file has a ".gnu.version_d" section which contains symbol version 1491 // definitions. Each symbol is associated to one version through a table in 1492 // ".gnu.version" section. That table is a parallel array for the symbol 1493 // table, and each table entry contains an index in ".gnu.version_d". 1494 // 1495 // The special index 0 is reserved for VERF_NDX_LOCAL and 1 is for 1496 // VER_NDX_GLOBAL. There's no table entry for these special versions in 1497 // ".gnu.version_d". 1498 // 1499 // The file format for symbol versioning is perhaps a bit more complicated 1500 // than necessary, but you can easily understand the code if you wrap your 1501 // head around the data structure described above. 1502 template <class ELFT> void SharedFile::parse() { 1503 using Elf_Dyn = typename ELFT::Dyn; 1504 using Elf_Shdr = typename ELFT::Shdr; 1505 using Elf_Sym = typename ELFT::Sym; 1506 using Elf_Verdef = typename ELFT::Verdef; 1507 using Elf_Versym = typename ELFT::Versym; 1508 1509 ArrayRef<Elf_Dyn> dynamicTags; 1510 const ELFFile<ELFT> obj = this->getObj<ELFT>(); 1511 ArrayRef<Elf_Shdr> sections = getELFShdrs<ELFT>(); 1512 1513 const Elf_Shdr *versymSec = nullptr; 1514 const Elf_Shdr *verdefSec = nullptr; 1515 const Elf_Shdr *verneedSec = nullptr; 1516 symbols = std::make_unique<Symbol *[]>(numSymbols); 1517 1518 // Search for .dynsym, .dynamic, .symtab, .gnu.version and .gnu.version_d. 1519 for (const Elf_Shdr &sec : sections) { 1520 switch (sec.sh_type) { 1521 default: 1522 continue; 1523 case SHT_DYNAMIC: 1524 dynamicTags = 1525 CHECK2(obj.template getSectionContentsAsArray<Elf_Dyn>(sec), this); 1526 break; 1527 case SHT_GNU_versym: 1528 versymSec = &sec; 1529 break; 1530 case SHT_GNU_verdef: 1531 verdefSec = &sec; 1532 break; 1533 case SHT_GNU_verneed: 1534 verneedSec = &sec; 1535 break; 1536 } 1537 } 1538 1539 if (versymSec && numSymbols == 0) { 1540 ErrAlways(ctx) << "SHT_GNU_versym should be associated with symbol table"; 1541 return; 1542 } 1543 1544 // Search for a DT_SONAME tag to initialize this->soName. 1545 for (const Elf_Dyn &dyn : dynamicTags) { 1546 if (dyn.d_tag == DT_NEEDED) { 1547 uint64_t val = dyn.getVal(); 1548 if (val >= this->stringTable.size()) { 1549 Err(ctx) << this << ": invalid DT_NEEDED entry"; 1550 return; 1551 } 1552 dtNeeded.push_back(this->stringTable.data() + val); 1553 } else if (dyn.d_tag == DT_SONAME) { 1554 uint64_t val = dyn.getVal(); 1555 if (val >= this->stringTable.size()) { 1556 Err(ctx) << this << ": invalid DT_SONAME entry"; 1557 return; 1558 } 1559 soName = this->stringTable.data() + val; 1560 } 1561 } 1562 1563 // DSOs are uniquified not by filename but by soname. 1564 StringSaver &ss = ctx.saver; 1565 DenseMap<CachedHashStringRef, SharedFile *>::iterator it; 1566 bool wasInserted; 1567 std::tie(it, wasInserted) = 1568 ctx.symtab->soNames.try_emplace(CachedHashStringRef(soName), this); 1569 1570 // If a DSO appears more than once on the command line with and without 1571 // --as-needed, --no-as-needed takes precedence over --as-needed because a 1572 // user can add an extra DSO with --no-as-needed to force it to be added to 1573 // the dependency list. 1574 it->second->isNeeded |= isNeeded; 1575 if (!wasInserted) 1576 return; 1577 1578 ctx.sharedFiles.push_back(this); 1579 1580 verdefs = parseVerdefs<ELFT>(obj.base(), verdefSec); 1581 std::vector<uint32_t> verneeds = parseVerneed<ELFT>(obj, verneedSec); 1582 parseGnuAndFeatures<ELFT>(obj); 1583 1584 // Parse ".gnu.version" section which is a parallel array for the symbol 1585 // table. If a given file doesn't have a ".gnu.version" section, we use 1586 // VER_NDX_GLOBAL. 1587 size_t size = numSymbols - firstGlobal; 1588 std::vector<uint16_t> versyms(size, VER_NDX_GLOBAL); 1589 if (versymSec) { 1590 ArrayRef<Elf_Versym> versym = 1591 CHECK2(obj.template getSectionContentsAsArray<Elf_Versym>(*versymSec), 1592 this) 1593 .slice(firstGlobal); 1594 for (size_t i = 0; i < size; ++i) 1595 versyms[i] = versym[i].vs_index; 1596 } 1597 1598 // System libraries can have a lot of symbols with versions. Using a 1599 // fixed buffer for computing the versions name (foo@ver) can save a 1600 // lot of allocations. 1601 SmallString<0> versionedNameBuffer; 1602 1603 // Add symbols to the symbol table. 1604 ArrayRef<Elf_Sym> syms = this->getGlobalELFSyms<ELFT>(); 1605 for (size_t i = 0, e = syms.size(); i != e; ++i) { 1606 const Elf_Sym &sym = syms[i]; 1607 1608 // ELF spec requires that all local symbols precede weak or global 1609 // symbols in each symbol table, and the index of first non-local symbol 1610 // is stored to sh_info. If a local symbol appears after some non-local 1611 // symbol, that's a violation of the spec. 1612 StringRef name = CHECK2(sym.getName(stringTable), this); 1613 if (sym.getBinding() == STB_LOCAL) { 1614 Err(ctx) << this << ": invalid local symbol '" << name 1615 << "' in global part of symbol table"; 1616 continue; 1617 } 1618 1619 const uint16_t ver = versyms[i], idx = ver & ~VERSYM_HIDDEN; 1620 if (sym.isUndefined()) { 1621 // For unversioned undefined symbols, VER_NDX_GLOBAL makes more sense but 1622 // as of binutils 2.34, GNU ld produces VER_NDX_LOCAL. 1623 if (ver != VER_NDX_LOCAL && ver != VER_NDX_GLOBAL) { 1624 if (idx >= verneeds.size()) { 1625 ErrAlways(ctx) << "corrupt input file: version need index " << idx 1626 << " for symbol " << name 1627 << " is out of bounds\n>>> defined in " << this; 1628 continue; 1629 } 1630 StringRef verName = stringTable.data() + verneeds[idx]; 1631 versionedNameBuffer.clear(); 1632 name = ss.save((name + "@" + verName).toStringRef(versionedNameBuffer)); 1633 } 1634 Symbol *s = ctx.symtab->addSymbol( 1635 Undefined{this, name, sym.getBinding(), sym.st_other, sym.getType()}); 1636 s->isExported = true; 1637 if (sym.getBinding() != STB_WEAK && 1638 ctx.arg.unresolvedSymbolsInShlib != UnresolvedPolicy::Ignore) 1639 requiredSymbols.push_back(s); 1640 continue; 1641 } 1642 1643 if (ver == VER_NDX_LOCAL || 1644 (ver != VER_NDX_GLOBAL && idx >= verdefs.size())) { 1645 // In GNU ld < 2.31 (before 3be08ea4728b56d35e136af4e6fd3086ade17764), the 1646 // MIPS port puts _gp_disp symbol into DSO files and incorrectly assigns 1647 // VER_NDX_LOCAL. Workaround this bug. 1648 if (ctx.arg.emachine == EM_MIPS && name == "_gp_disp") 1649 continue; 1650 ErrAlways(ctx) << "corrupt input file: version definition index " << idx 1651 << " for symbol " << name 1652 << " is out of bounds\n>>> defined in " << this; 1653 continue; 1654 } 1655 1656 uint32_t alignment = getAlignment<ELFT>(sections, sym); 1657 if (ver == idx) { 1658 auto *s = ctx.symtab->addSymbol( 1659 SharedSymbol{*this, name, sym.getBinding(), sym.st_other, 1660 sym.getType(), sym.st_value, sym.st_size, alignment}); 1661 s->dsoDefined = true; 1662 if (s->file == this) 1663 s->versionId = ver; 1664 } 1665 1666 // Also add the symbol with the versioned name to handle undefined symbols 1667 // with explicit versions. 1668 if (ver == VER_NDX_GLOBAL) 1669 continue; 1670 1671 StringRef verName = 1672 stringTable.data() + 1673 reinterpret_cast<const Elf_Verdef *>(verdefs[idx])->getAux()->vda_name; 1674 versionedNameBuffer.clear(); 1675 name = (name + "@" + verName).toStringRef(versionedNameBuffer); 1676 auto *s = ctx.symtab->addSymbol( 1677 SharedSymbol{*this, ss.save(name), sym.getBinding(), sym.st_other, 1678 sym.getType(), sym.st_value, sym.st_size, alignment}); 1679 s->dsoDefined = true; 1680 if (s->file == this) 1681 s->versionId = idx; 1682 } 1683 } 1684 1685 static ELFKind getBitcodeELFKind(const Triple &t) { 1686 if (t.isLittleEndian()) 1687 return t.isArch64Bit() ? ELF64LEKind : ELF32LEKind; 1688 return t.isArch64Bit() ? ELF64BEKind : ELF32BEKind; 1689 } 1690 1691 static uint16_t getBitcodeMachineKind(Ctx &ctx, StringRef path, 1692 const Triple &t) { 1693 switch (t.getArch()) { 1694 case Triple::aarch64: 1695 case Triple::aarch64_be: 1696 return EM_AARCH64; 1697 case Triple::amdgcn: 1698 case Triple::r600: 1699 return EM_AMDGPU; 1700 case Triple::arm: 1701 case Triple::armeb: 1702 case Triple::thumb: 1703 case Triple::thumbeb: 1704 return EM_ARM; 1705 case Triple::avr: 1706 return EM_AVR; 1707 case Triple::hexagon: 1708 return EM_HEXAGON; 1709 case Triple::loongarch32: 1710 case Triple::loongarch64: 1711 return EM_LOONGARCH; 1712 case Triple::mips: 1713 case Triple::mipsel: 1714 case Triple::mips64: 1715 case Triple::mips64el: 1716 return EM_MIPS; 1717 case Triple::msp430: 1718 return EM_MSP430; 1719 case Triple::ppc: 1720 case Triple::ppcle: 1721 return EM_PPC; 1722 case Triple::ppc64: 1723 case Triple::ppc64le: 1724 return EM_PPC64; 1725 case Triple::riscv32: 1726 case Triple::riscv64: 1727 return EM_RISCV; 1728 case Triple::sparcv9: 1729 return EM_SPARCV9; 1730 case Triple::systemz: 1731 return EM_S390; 1732 case Triple::x86: 1733 return t.isOSIAMCU() ? EM_IAMCU : EM_386; 1734 case Triple::x86_64: 1735 return EM_X86_64; 1736 default: 1737 ErrAlways(ctx) << path 1738 << ": could not infer e_machine from bitcode target triple " 1739 << t.str(); 1740 return EM_NONE; 1741 } 1742 } 1743 1744 static uint8_t getOsAbi(const Triple &t) { 1745 switch (t.getOS()) { 1746 case Triple::AMDHSA: 1747 return ELF::ELFOSABI_AMDGPU_HSA; 1748 case Triple::AMDPAL: 1749 return ELF::ELFOSABI_AMDGPU_PAL; 1750 case Triple::Mesa3D: 1751 return ELF::ELFOSABI_AMDGPU_MESA3D; 1752 default: 1753 return ELF::ELFOSABI_NONE; 1754 } 1755 } 1756 1757 // For DTLTO, bitcode member names must be valid paths to files on disk. 1758 // For thin archives, resolve `memberPath` relative to the archive's location. 1759 // Returns true if adjusted; false otherwise. Non-thin archives are unsupported. 1760 static bool dtltoAdjustMemberPathIfThinArchive(Ctx &ctx, StringRef archivePath, 1761 std::string &memberPath) { 1762 assert(!archivePath.empty()); 1763 1764 if (ctx.arg.dtltoDistributor.empty()) 1765 return false; 1766 1767 // Read the archive header to determine if it's a thin archive. 1768 auto bufferOrErr = 1769 MemoryBuffer::getFileSlice(archivePath, sizeof(ThinArchiveMagic) - 1, 0); 1770 if (std::error_code ec = bufferOrErr.getError()) { 1771 ErrAlways(ctx) << "cannot open " << archivePath << ": " << ec.message(); 1772 return false; 1773 } 1774 1775 if (!bufferOrErr->get()->getBuffer().starts_with(ThinArchiveMagic)) 1776 return false; 1777 1778 SmallString<128> resolvedPath; 1779 if (path::is_relative(memberPath)) { 1780 resolvedPath = path::parent_path(archivePath); 1781 path::append(resolvedPath, memberPath); 1782 } else 1783 resolvedPath = memberPath; 1784 1785 path::remove_dots(resolvedPath, /*remove_dot_dot=*/true); 1786 memberPath = resolvedPath.str(); 1787 return true; 1788 } 1789 1790 BitcodeFile::BitcodeFile(Ctx &ctx, MemoryBufferRef mb, StringRef archiveName, 1791 uint64_t offsetInArchive, bool lazy) 1792 : InputFile(ctx, BitcodeKind, mb) { 1793 this->archiveName = archiveName; 1794 this->lazy = lazy; 1795 1796 std::string path = mb.getBufferIdentifier().str(); 1797 if (ctx.arg.thinLTOIndexOnly) 1798 path = replaceThinLTOSuffix(ctx, mb.getBufferIdentifier()); 1799 1800 StringSaver &ss = ctx.saver; 1801 StringRef name; 1802 if (archiveName.empty() || 1803 dtltoAdjustMemberPathIfThinArchive(ctx, archiveName, path)) { 1804 name = ss.save(path); 1805 } else { 1806 // ThinLTO assumes that all MemoryBufferRefs given to it have a unique 1807 // name. If two archives define two members with the same name, this 1808 // causes a collision which result in only one of the objects being taken 1809 // into consideration at LTO time (which very likely causes undefined 1810 // symbols later in the link stage). So we append file offset to make 1811 // filename unique. 1812 name = ss.save(archiveName + "(" + path::filename(path) + " at " + 1813 utostr(offsetInArchive) + ")"); 1814 } 1815 1816 MemoryBufferRef mbref(mb.getBuffer(), name); 1817 1818 obj = CHECK2(lto::InputFile::create(mbref), this); 1819 1820 Triple t(obj->getTargetTriple()); 1821 ekind = getBitcodeELFKind(t); 1822 emachine = getBitcodeMachineKind(ctx, mb.getBufferIdentifier(), t); 1823 osabi = getOsAbi(t); 1824 } 1825 1826 static uint8_t mapVisibility(GlobalValue::VisibilityTypes gvVisibility) { 1827 switch (gvVisibility) { 1828 case GlobalValue::DefaultVisibility: 1829 return STV_DEFAULT; 1830 case GlobalValue::HiddenVisibility: 1831 return STV_HIDDEN; 1832 case GlobalValue::ProtectedVisibility: 1833 return STV_PROTECTED; 1834 } 1835 llvm_unreachable("unknown visibility"); 1836 } 1837 1838 static void createBitcodeSymbol(Ctx &ctx, Symbol *&sym, 1839 const lto::InputFile::Symbol &objSym, 1840 BitcodeFile &f) { 1841 uint8_t binding = objSym.isWeak() ? STB_WEAK : STB_GLOBAL; 1842 uint8_t type = objSym.isTLS() ? STT_TLS : STT_NOTYPE; 1843 uint8_t visibility = mapVisibility(objSym.getVisibility()); 1844 1845 if (!sym) { 1846 // Symbols can be duplicated in bitcode files because of '#include' and 1847 // linkonce_odr. Use uniqueSaver to save symbol names for de-duplication. 1848 // Update objSym.Name to reference (via StringRef) the string saver's copy; 1849 // this way LTO can reference the same string saver's copy rather than 1850 // keeping copies of its own. 1851 objSym.Name = ctx.uniqueSaver.save(objSym.getName()); 1852 sym = ctx.symtab->insert(objSym.getName()); 1853 } 1854 1855 if (objSym.isUndefined()) { 1856 Undefined newSym(&f, StringRef(), binding, visibility, type); 1857 sym->resolve(ctx, newSym); 1858 sym->referenced = true; 1859 return; 1860 } 1861 1862 if (objSym.isCommon()) { 1863 sym->resolve(ctx, CommonSymbol{ctx, &f, StringRef(), binding, visibility, 1864 STT_OBJECT, objSym.getCommonAlignment(), 1865 objSym.getCommonSize()}); 1866 } else { 1867 Defined newSym(ctx, &f, StringRef(), binding, visibility, type, 0, 0, 1868 nullptr); 1869 // The definition can be omitted if all bitcode definitions satisfy 1870 // `canBeOmittedFromSymbolTable()` and isUsedInRegularObj is false. 1871 // The latter condition is tested in parseVersionAndComputeIsPreemptible. 1872 sym->ltoCanOmit = objSym.canBeOmittedFromSymbolTable() && 1873 (!sym->isDefined() || sym->ltoCanOmit); 1874 sym->resolve(ctx, newSym); 1875 } 1876 } 1877 1878 void BitcodeFile::parse() { 1879 for (std::pair<StringRef, Comdat::SelectionKind> s : obj->getComdatTable()) { 1880 keptComdats.push_back( 1881 s.second == Comdat::NoDeduplicate || 1882 ctx.symtab->comdatGroups.try_emplace(CachedHashStringRef(s.first), this) 1883 .second); 1884 } 1885 1886 if (numSymbols == 0) { 1887 numSymbols = obj->symbols().size(); 1888 symbols = std::make_unique<Symbol *[]>(numSymbols); 1889 } 1890 // Process defined symbols first. See the comment in 1891 // ObjFile<ELFT>::initializeSymbols. 1892 for (auto [i, irSym] : llvm::enumerate(obj->symbols())) 1893 if (!irSym.isUndefined()) 1894 createBitcodeSymbol(ctx, symbols[i], irSym, *this); 1895 for (auto [i, irSym] : llvm::enumerate(obj->symbols())) 1896 if (irSym.isUndefined()) 1897 createBitcodeSymbol(ctx, symbols[i], irSym, *this); 1898 1899 for (auto l : obj->getDependentLibraries()) 1900 addDependentLibrary(ctx, l, this); 1901 } 1902 1903 void BitcodeFile::parseLazy() { 1904 numSymbols = obj->symbols().size(); 1905 symbols = std::make_unique<Symbol *[]>(numSymbols); 1906 for (auto [i, irSym] : llvm::enumerate(obj->symbols())) { 1907 // Symbols can be duplicated in bitcode files because of '#include' and 1908 // linkonce_odr. Use uniqueSaver to save symbol names for de-duplication. 1909 // Update objSym.Name to reference (via StringRef) the string saver's copy; 1910 // this way LTO can reference the same string saver's copy rather than 1911 // keeping copies of its own. 1912 irSym.Name = ctx.uniqueSaver.save(irSym.getName()); 1913 if (!irSym.isUndefined()) { 1914 auto *sym = ctx.symtab->insert(irSym.getName()); 1915 sym->resolve(ctx, LazySymbol{*this}); 1916 symbols[i] = sym; 1917 } 1918 } 1919 } 1920 1921 void BitcodeFile::postParse() { 1922 for (auto [i, irSym] : llvm::enumerate(obj->symbols())) { 1923 const Symbol &sym = *symbols[i]; 1924 if (sym.file == this || !sym.isDefined() || irSym.isUndefined() || 1925 irSym.isCommon() || irSym.isWeak()) 1926 continue; 1927 int c = irSym.getComdatIndex(); 1928 if (c != -1 && !keptComdats[c]) 1929 continue; 1930 reportDuplicate(ctx, sym, this, nullptr, 0); 1931 } 1932 } 1933 1934 void BinaryFile::parse() { 1935 ArrayRef<uint8_t> data = arrayRefFromStringRef(mb.getBuffer()); 1936 auto *section = 1937 make<InputSection>(this, ".data", SHT_PROGBITS, SHF_ALLOC | SHF_WRITE, 1938 /*addralign=*/8, /*entsize=*/0, data); 1939 sections.push_back(section); 1940 1941 // For each input file foo that is embedded to a result as a binary 1942 // blob, we define _binary_foo_{start,end,size} symbols, so that 1943 // user programs can access blobs by name. Non-alphanumeric 1944 // characters in a filename are replaced with underscore. 1945 std::string s = "_binary_" + mb.getBufferIdentifier().str(); 1946 for (char &c : s) 1947 if (!isAlnum(c)) 1948 c = '_'; 1949 1950 llvm::StringSaver &ss = ctx.saver; 1951 ctx.symtab->addAndCheckDuplicate( 1952 ctx, Defined{ctx, this, ss.save(s + "_start"), STB_GLOBAL, STV_DEFAULT, 1953 STT_OBJECT, 0, 0, section}); 1954 ctx.symtab->addAndCheckDuplicate( 1955 ctx, Defined{ctx, this, ss.save(s + "_end"), STB_GLOBAL, STV_DEFAULT, 1956 STT_OBJECT, data.size(), 0, section}); 1957 ctx.symtab->addAndCheckDuplicate( 1958 ctx, Defined{ctx, this, ss.save(s + "_size"), STB_GLOBAL, STV_DEFAULT, 1959 STT_OBJECT, data.size(), 0, nullptr}); 1960 } 1961 1962 InputFile *elf::createInternalFile(Ctx &ctx, StringRef name) { 1963 auto *file = 1964 make<InputFile>(ctx, InputFile::InternalKind, MemoryBufferRef("", name)); 1965 // References from an internal file do not lead to --warn-backrefs 1966 // diagnostics. 1967 file->groupId = 0; 1968 return file; 1969 } 1970 1971 std::unique_ptr<ELFFileBase> elf::createObjFile(Ctx &ctx, MemoryBufferRef mb, 1972 StringRef archiveName, 1973 bool lazy) { 1974 std::unique_ptr<ELFFileBase> f; 1975 switch (getELFKind(ctx, mb, archiveName)) { 1976 case ELF32LEKind: 1977 f = std::make_unique<ObjFile<ELF32LE>>(ctx, ELF32LEKind, mb, archiveName); 1978 break; 1979 case ELF32BEKind: 1980 f = std::make_unique<ObjFile<ELF32BE>>(ctx, ELF32BEKind, mb, archiveName); 1981 break; 1982 case ELF64LEKind: 1983 f = std::make_unique<ObjFile<ELF64LE>>(ctx, ELF64LEKind, mb, archiveName); 1984 break; 1985 case ELF64BEKind: 1986 f = std::make_unique<ObjFile<ELF64BE>>(ctx, ELF64BEKind, mb, archiveName); 1987 break; 1988 default: 1989 llvm_unreachable("getELFKind"); 1990 } 1991 f->init(); 1992 f->lazy = lazy; 1993 return f; 1994 } 1995 1996 template <class ELFT> void ObjFile<ELFT>::parseLazy() { 1997 const ArrayRef<typename ELFT::Sym> eSyms = this->getELFSyms<ELFT>(); 1998 numSymbols = eSyms.size(); 1999 symbols = std::make_unique<Symbol *[]>(numSymbols); 2000 2001 // resolve() may trigger this->extract() if an existing symbol is an undefined 2002 // symbol. If that happens, this function has served its purpose, and we can 2003 // exit from the loop early. 2004 auto *symtab = ctx.symtab.get(); 2005 for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) { 2006 if (eSyms[i].st_shndx == SHN_UNDEF) 2007 continue; 2008 symbols[i] = symtab->insert(CHECK2(eSyms[i].getName(stringTable), this)); 2009 symbols[i]->resolve(ctx, LazySymbol{*this}); 2010 if (!lazy) 2011 break; 2012 } 2013 } 2014 2015 bool InputFile::shouldExtractForCommon(StringRef name) const { 2016 if (isa<BitcodeFile>(this)) 2017 return isBitcodeNonCommonDef(mb, name, archiveName); 2018 2019 return isNonCommonDef(ctx, mb, name, archiveName); 2020 } 2021 2022 std::string elf::replaceThinLTOSuffix(Ctx &ctx, StringRef path) { 2023 auto [suffix, repl] = ctx.arg.thinLTOObjectSuffixReplace; 2024 if (path.consume_back(suffix)) 2025 return (path + repl).str(); 2026 return std::string(path); 2027 } 2028 2029 template class elf::ObjFile<ELF32LE>; 2030 template class elf::ObjFile<ELF32BE>; 2031 template class elf::ObjFile<ELF64LE>; 2032 template class elf::ObjFile<ELF64BE>; 2033 2034 template void SharedFile::parse<ELF32LE>(); 2035 template void SharedFile::parse<ELF32BE>(); 2036 template void SharedFile::parse<ELF64LE>(); 2037 template void SharedFile::parse<ELF64BE>(); 2038