1 //===- InputFiles.cpp -----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "InputFiles.h" 10 #include "Config.h" 11 #include "DWARF.h" 12 #include "Driver.h" 13 #include "InputSection.h" 14 #include "LinkerScript.h" 15 #include "SymbolTable.h" 16 #include "Symbols.h" 17 #include "SyntheticSections.h" 18 #include "Target.h" 19 #include "lld/Common/DWARF.h" 20 #include "llvm/ADT/CachedHashString.h" 21 #include "llvm/ADT/STLExtras.h" 22 #include "llvm/LTO/LTO.h" 23 #include "llvm/Object/IRObjectFile.h" 24 #include "llvm/Support/ARMAttributeParser.h" 25 #include "llvm/Support/ARMBuildAttributes.h" 26 #include "llvm/Support/Endian.h" 27 #include "llvm/Support/FileSystem.h" 28 #include "llvm/Support/Path.h" 29 #include "llvm/Support/TimeProfiler.h" 30 #include "llvm/Support/raw_ostream.h" 31 #include <optional> 32 33 using namespace llvm; 34 using namespace llvm::ELF; 35 using namespace llvm::object; 36 using namespace llvm::sys; 37 using namespace llvm::sys::fs; 38 using namespace llvm::support::endian; 39 using namespace lld; 40 using namespace lld::elf; 41 42 // This function is explicitly instantiated in ARM.cpp, don't do it here to 43 // avoid warnings with MSVC. 44 extern template void ObjFile<ELF32LE>::importCmseSymbols(); 45 extern template void ObjFile<ELF32BE>::importCmseSymbols(); 46 extern template void ObjFile<ELF64LE>::importCmseSymbols(); 47 extern template void ObjFile<ELF64BE>::importCmseSymbols(); 48 49 // Returns "<internal>", "foo.a(bar.o)" or "baz.o". 50 std::string elf::toStr(Ctx &ctx, const InputFile *f) { 51 static std::mutex mu; 52 if (!f) 53 return "<internal>"; 54 55 { 56 std::lock_guard<std::mutex> lock(mu); 57 if (f->toStringCache.empty()) { 58 if (f->archiveName.empty()) 59 f->toStringCache = f->getName(); 60 else 61 (f->archiveName + "(" + f->getName() + ")").toVector(f->toStringCache); 62 } 63 } 64 return std::string(f->toStringCache); 65 } 66 67 const ELFSyncStream &elf::operator<<(const ELFSyncStream &s, 68 const InputFile *f) { 69 return s << toStr(s.ctx, f); 70 } 71 72 static ELFKind getELFKind(Ctx &ctx, MemoryBufferRef mb, StringRef archiveName) { 73 unsigned char size; 74 unsigned char endian; 75 std::tie(size, endian) = getElfArchType(mb.getBuffer()); 76 77 auto report = [&](StringRef msg) { 78 StringRef filename = mb.getBufferIdentifier(); 79 if (archiveName.empty()) 80 Fatal(ctx) << filename << ": " << msg; 81 else 82 Fatal(ctx) << archiveName << "(" << filename << "): " << msg; 83 }; 84 85 if (!mb.getBuffer().starts_with(ElfMagic)) 86 report("not an ELF file"); 87 if (endian != ELFDATA2LSB && endian != ELFDATA2MSB) 88 report("corrupted ELF file: invalid data encoding"); 89 if (size != ELFCLASS32 && size != ELFCLASS64) 90 report("corrupted ELF file: invalid file class"); 91 92 size_t bufSize = mb.getBuffer().size(); 93 if ((size == ELFCLASS32 && bufSize < sizeof(Elf32_Ehdr)) || 94 (size == ELFCLASS64 && bufSize < sizeof(Elf64_Ehdr))) 95 report("corrupted ELF file: file is too short"); 96 97 if (size == ELFCLASS32) 98 return (endian == ELFDATA2LSB) ? ELF32LEKind : ELF32BEKind; 99 return (endian == ELFDATA2LSB) ? ELF64LEKind : ELF64BEKind; 100 } 101 102 // For ARM only, to set the EF_ARM_ABI_FLOAT_SOFT or EF_ARM_ABI_FLOAT_HARD 103 // flag in the ELF Header we need to look at Tag_ABI_VFP_args to find out how 104 // the input objects have been compiled. 105 static void updateARMVFPArgs(Ctx &ctx, const ARMAttributeParser &attributes, 106 const InputFile *f) { 107 std::optional<unsigned> attr = 108 attributes.getAttributeValue(ARMBuildAttrs::ABI_VFP_args); 109 if (!attr) 110 // If an ABI tag isn't present then it is implicitly given the value of 0 111 // which maps to ARMBuildAttrs::BaseAAPCS. However many assembler files, 112 // including some in glibc that don't use FP args (and should have value 3) 113 // don't have the attribute so we do not consider an implicit value of 0 114 // as a clash. 115 return; 116 117 unsigned vfpArgs = *attr; 118 ARMVFPArgKind arg; 119 switch (vfpArgs) { 120 case ARMBuildAttrs::BaseAAPCS: 121 arg = ARMVFPArgKind::Base; 122 break; 123 case ARMBuildAttrs::HardFPAAPCS: 124 arg = ARMVFPArgKind::VFP; 125 break; 126 case ARMBuildAttrs::ToolChainFPPCS: 127 // Tool chain specific convention that conforms to neither AAPCS variant. 128 arg = ARMVFPArgKind::ToolChain; 129 break; 130 case ARMBuildAttrs::CompatibleFPAAPCS: 131 // Object compatible with all conventions. 132 return; 133 default: 134 ErrAlways(ctx) << f << ": unknown Tag_ABI_VFP_args value: " << vfpArgs; 135 return; 136 } 137 // Follow ld.bfd and error if there is a mix of calling conventions. 138 if (ctx.arg.armVFPArgs != arg && ctx.arg.armVFPArgs != ARMVFPArgKind::Default) 139 ErrAlways(ctx) << f << ": incompatible Tag_ABI_VFP_args"; 140 else 141 ctx.arg.armVFPArgs = arg; 142 } 143 144 // The ARM support in lld makes some use of instructions that are not available 145 // on all ARM architectures. Namely: 146 // - Use of BLX instruction for interworking between ARM and Thumb state. 147 // - Use of the extended Thumb branch encoding in relocation. 148 // - Use of the MOVT/MOVW instructions in Thumb Thunks. 149 // The ARM Attributes section contains information about the architecture chosen 150 // at compile time. We follow the convention that if at least one input object 151 // is compiled with an architecture that supports these features then lld is 152 // permitted to use them. 153 static void updateSupportedARMFeatures(Ctx &ctx, 154 const ARMAttributeParser &attributes) { 155 std::optional<unsigned> attr = 156 attributes.getAttributeValue(ARMBuildAttrs::CPU_arch); 157 if (!attr) 158 return; 159 auto arch = *attr; 160 switch (arch) { 161 case ARMBuildAttrs::Pre_v4: 162 case ARMBuildAttrs::v4: 163 case ARMBuildAttrs::v4T: 164 // Architectures prior to v5 do not support BLX instruction 165 break; 166 case ARMBuildAttrs::v5T: 167 case ARMBuildAttrs::v5TE: 168 case ARMBuildAttrs::v5TEJ: 169 case ARMBuildAttrs::v6: 170 case ARMBuildAttrs::v6KZ: 171 case ARMBuildAttrs::v6K: 172 ctx.arg.armHasBlx = true; 173 // Architectures used in pre-Cortex processors do not support 174 // The J1 = 1 J2 = 1 Thumb branch range extension, with the exception 175 // of Architecture v6T2 (arm1156t2-s and arm1156t2f-s) that do. 176 break; 177 default: 178 // All other Architectures have BLX and extended branch encoding 179 ctx.arg.armHasBlx = true; 180 ctx.arg.armJ1J2BranchEncoding = true; 181 if (arch != ARMBuildAttrs::v6_M && arch != ARMBuildAttrs::v6S_M) 182 // All Architectures used in Cortex processors with the exception 183 // of v6-M and v6S-M have the MOVT and MOVW instructions. 184 ctx.arg.armHasMovtMovw = true; 185 break; 186 } 187 188 // Only ARMv8-M or later architectures have CMSE support. 189 std::optional<unsigned> profile = 190 attributes.getAttributeValue(ARMBuildAttrs::CPU_arch_profile); 191 if (!profile) 192 return; 193 if (arch >= ARMBuildAttrs::CPUArch::v8_M_Base && 194 profile == ARMBuildAttrs::MicroControllerProfile) 195 ctx.arg.armCMSESupport = true; 196 197 // The thumb PLT entries require Thumb2 which can be used on multiple archs. 198 // For now, let's limit it to ones where ARM isn't available and we know have 199 // Thumb2. 200 std::optional<unsigned> armISA = 201 attributes.getAttributeValue(ARMBuildAttrs::ARM_ISA_use); 202 std::optional<unsigned> thumb = 203 attributes.getAttributeValue(ARMBuildAttrs::THUMB_ISA_use); 204 ctx.arg.armHasArmISA |= armISA && *armISA >= ARMBuildAttrs::Allowed; 205 ctx.arg.armHasThumb2ISA |= thumb && *thumb >= ARMBuildAttrs::AllowThumb32; 206 } 207 208 InputFile::InputFile(Ctx &ctx, Kind k, MemoryBufferRef m) 209 : ctx(ctx), mb(m), groupId(ctx.driver.nextGroupId), fileKind(k) { 210 // All files within the same --{start,end}-group get the same group ID. 211 // Otherwise, a new file will get a new group ID. 212 if (!ctx.driver.isInGroup) 213 ++ctx.driver.nextGroupId; 214 } 215 216 InputFile::~InputFile() {} 217 218 std::optional<MemoryBufferRef> elf::readFile(Ctx &ctx, StringRef path) { 219 llvm::TimeTraceScope timeScope("Load input files", path); 220 221 // The --chroot option changes our virtual root directory. 222 // This is useful when you are dealing with files created by --reproduce. 223 if (!ctx.arg.chroot.empty() && path.starts_with("/")) 224 path = ctx.saver.save(ctx.arg.chroot + path); 225 226 bool remapped = false; 227 auto it = ctx.arg.remapInputs.find(path); 228 if (it != ctx.arg.remapInputs.end()) { 229 path = it->second; 230 remapped = true; 231 } else { 232 for (const auto &[pat, toFile] : ctx.arg.remapInputsWildcards) { 233 if (pat.match(path)) { 234 path = toFile; 235 remapped = true; 236 break; 237 } 238 } 239 } 240 if (remapped) { 241 // Use /dev/null to indicate an input file that should be ignored. Change 242 // the path to NUL on Windows. 243 #ifdef _WIN32 244 if (path == "/dev/null") 245 path = "NUL"; 246 #endif 247 } 248 249 Log(ctx) << path; 250 ctx.arg.dependencyFiles.insert(llvm::CachedHashString(path)); 251 252 auto mbOrErr = MemoryBuffer::getFile(path, /*IsText=*/false, 253 /*RequiresNullTerminator=*/false); 254 if (auto ec = mbOrErr.getError()) { 255 ErrAlways(ctx) << "cannot open " << path << ": " << ec.message(); 256 return std::nullopt; 257 } 258 259 MemoryBufferRef mbref = (*mbOrErr)->getMemBufferRef(); 260 ctx.memoryBuffers.push_back(std::move(*mbOrErr)); // take MB ownership 261 262 if (ctx.tar) 263 ctx.tar->append(relativeToRoot(path), mbref.getBuffer()); 264 return mbref; 265 } 266 267 // All input object files must be for the same architecture 268 // (e.g. it does not make sense to link x86 object files with 269 // MIPS object files.) This function checks for that error. 270 static bool isCompatible(Ctx &ctx, InputFile *file) { 271 if (!file->isElf() && !isa<BitcodeFile>(file)) 272 return true; 273 274 if (file->ekind == ctx.arg.ekind && file->emachine == ctx.arg.emachine) { 275 if (ctx.arg.emachine != EM_MIPS) 276 return true; 277 if (isMipsN32Abi(ctx, *file) == ctx.arg.mipsN32Abi) 278 return true; 279 } 280 281 StringRef target = 282 !ctx.arg.bfdname.empty() ? ctx.arg.bfdname : ctx.arg.emulation; 283 if (!target.empty()) { 284 Err(ctx) << file << " is incompatible with " << target; 285 return false; 286 } 287 288 InputFile *existing = nullptr; 289 if (!ctx.objectFiles.empty()) 290 existing = ctx.objectFiles[0]; 291 else if (!ctx.sharedFiles.empty()) 292 existing = ctx.sharedFiles[0]; 293 else if (!ctx.bitcodeFiles.empty()) 294 existing = ctx.bitcodeFiles[0]; 295 auto diag = Err(ctx); 296 diag << file << " is incompatible"; 297 if (existing) 298 diag << " with " << existing; 299 return false; 300 } 301 302 template <class ELFT> static void doParseFile(Ctx &ctx, InputFile *file) { 303 if (!isCompatible(ctx, file)) 304 return; 305 306 // Lazy object file 307 if (file->lazy) { 308 if (auto *f = dyn_cast<BitcodeFile>(file)) { 309 ctx.lazyBitcodeFiles.push_back(f); 310 f->parseLazy(); 311 } else { 312 cast<ObjFile<ELFT>>(file)->parseLazy(); 313 } 314 return; 315 } 316 317 if (ctx.arg.trace) 318 Msg(ctx) << file; 319 320 if (file->kind() == InputFile::ObjKind) { 321 ctx.objectFiles.push_back(cast<ELFFileBase>(file)); 322 cast<ObjFile<ELFT>>(file)->parse(); 323 } else if (auto *f = dyn_cast<SharedFile>(file)) { 324 f->parse<ELFT>(); 325 } else if (auto *f = dyn_cast<BitcodeFile>(file)) { 326 ctx.bitcodeFiles.push_back(f); 327 f->parse(); 328 } else { 329 ctx.binaryFiles.push_back(cast<BinaryFile>(file)); 330 cast<BinaryFile>(file)->parse(); 331 } 332 } 333 334 // Add symbols in File to the symbol table. 335 void elf::parseFile(Ctx &ctx, InputFile *file) { 336 invokeELFT(doParseFile, ctx, file); 337 } 338 339 // This function is explicitly instantiated in ARM.cpp. Mark it extern here, 340 // to avoid warnings when building with MSVC. 341 extern template void ObjFile<ELF32LE>::importCmseSymbols(); 342 extern template void ObjFile<ELF32BE>::importCmseSymbols(); 343 extern template void ObjFile<ELF64LE>::importCmseSymbols(); 344 extern template void ObjFile<ELF64BE>::importCmseSymbols(); 345 346 template <class ELFT> 347 static void 348 doParseFiles(Ctx &ctx, 349 const SmallVector<std::unique_ptr<InputFile>, 0> &files) { 350 // Add all files to the symbol table. This will add almost all symbols that we 351 // need to the symbol table. This process might add files to the link due to 352 // addDependentLibrary. 353 for (size_t i = 0; i < files.size(); ++i) { 354 llvm::TimeTraceScope timeScope("Parse input files", files[i]->getName()); 355 doParseFile<ELFT>(ctx, files[i].get()); 356 } 357 if (ctx.driver.armCmseImpLib) 358 cast<ObjFile<ELFT>>(*ctx.driver.armCmseImpLib).importCmseSymbols(); 359 } 360 361 void elf::parseFiles(Ctx &ctx, 362 const SmallVector<std::unique_ptr<InputFile>, 0> &files) { 363 llvm::TimeTraceScope timeScope("Parse input files"); 364 invokeELFT(doParseFiles, ctx, files); 365 } 366 367 // Concatenates arguments to construct a string representing an error location. 368 StringRef InputFile::getNameForScript() const { 369 if (archiveName.empty()) 370 return getName(); 371 372 if (nameForScriptCache.empty()) 373 nameForScriptCache = (archiveName + Twine(':') + getName()).str(); 374 375 return nameForScriptCache; 376 } 377 378 // An ELF object file may contain a `.deplibs` section. If it exists, the 379 // section contains a list of library specifiers such as `m` for libm. This 380 // function resolves a given name by finding the first matching library checking 381 // the various ways that a library can be specified to LLD. This ELF extension 382 // is a form of autolinking and is called `dependent libraries`. It is currently 383 // unique to LLVM and lld. 384 static void addDependentLibrary(Ctx &ctx, StringRef specifier, 385 const InputFile *f) { 386 if (!ctx.arg.dependentLibraries) 387 return; 388 if (std::optional<std::string> s = searchLibraryBaseName(ctx, specifier)) 389 ctx.driver.addFile(ctx.saver.save(*s), /*withLOption=*/true); 390 else if (std::optional<std::string> s = findFromSearchPaths(ctx, specifier)) 391 ctx.driver.addFile(ctx.saver.save(*s), /*withLOption=*/true); 392 else if (fs::exists(specifier)) 393 ctx.driver.addFile(specifier, /*withLOption=*/false); 394 else 395 ErrAlways(ctx) 396 << f << ": unable to find library from dependent library specifier: " 397 << specifier; 398 } 399 400 // Record the membership of a section group so that in the garbage collection 401 // pass, section group members are kept or discarded as a unit. 402 template <class ELFT> 403 static void handleSectionGroup(ArrayRef<InputSectionBase *> sections, 404 ArrayRef<typename ELFT::Word> entries) { 405 bool hasAlloc = false; 406 for (uint32_t index : entries.slice(1)) { 407 if (index >= sections.size()) 408 return; 409 if (InputSectionBase *s = sections[index]) 410 if (s != &InputSection::discarded && s->flags & SHF_ALLOC) 411 hasAlloc = true; 412 } 413 414 // If any member has the SHF_ALLOC flag, the whole group is subject to garbage 415 // collection. See the comment in markLive(). This rule retains .debug_types 416 // and .rela.debug_types. 417 if (!hasAlloc) 418 return; 419 420 // Connect the members in a circular doubly-linked list via 421 // nextInSectionGroup. 422 InputSectionBase *head; 423 InputSectionBase *prev = nullptr; 424 for (uint32_t index : entries.slice(1)) { 425 InputSectionBase *s = sections[index]; 426 if (!s || s == &InputSection::discarded) 427 continue; 428 if (prev) 429 prev->nextInSectionGroup = s; 430 else 431 head = s; 432 prev = s; 433 } 434 if (prev) 435 prev->nextInSectionGroup = head; 436 } 437 438 template <class ELFT> void ObjFile<ELFT>::initDwarf() { 439 dwarf = std::make_unique<DWARFCache>(std::make_unique<DWARFContext>( 440 std::make_unique<LLDDwarfObj<ELFT>>(this), "", 441 [&](Error err) { Warn(ctx) << getName() + ": " << std::move(err); }, 442 [&](Error warning) { 443 Warn(ctx) << getName() << ": " << std::move(warning); 444 })); 445 } 446 447 DWARFCache *ELFFileBase::getDwarf() { 448 assert(fileKind == ObjKind); 449 llvm::call_once(initDwarf, [this]() { 450 switch (ekind) { 451 default: 452 llvm_unreachable(""); 453 case ELF32LEKind: 454 return cast<ObjFile<ELF32LE>>(this)->initDwarf(); 455 case ELF32BEKind: 456 return cast<ObjFile<ELF32BE>>(this)->initDwarf(); 457 case ELF64LEKind: 458 return cast<ObjFile<ELF64LE>>(this)->initDwarf(); 459 case ELF64BEKind: 460 return cast<ObjFile<ELF64BE>>(this)->initDwarf(); 461 } 462 }); 463 return dwarf.get(); 464 } 465 466 ELFFileBase::ELFFileBase(Ctx &ctx, Kind k, ELFKind ekind, MemoryBufferRef mb) 467 : InputFile(ctx, k, mb) { 468 this->ekind = ekind; 469 } 470 471 ELFFileBase::~ELFFileBase() {} 472 473 template <typename Elf_Shdr> 474 static const Elf_Shdr *findSection(ArrayRef<Elf_Shdr> sections, uint32_t type) { 475 for (const Elf_Shdr &sec : sections) 476 if (sec.sh_type == type) 477 return &sec; 478 return nullptr; 479 } 480 481 void ELFFileBase::init() { 482 switch (ekind) { 483 case ELF32LEKind: 484 init<ELF32LE>(fileKind); 485 break; 486 case ELF32BEKind: 487 init<ELF32BE>(fileKind); 488 break; 489 case ELF64LEKind: 490 init<ELF64LE>(fileKind); 491 break; 492 case ELF64BEKind: 493 init<ELF64BE>(fileKind); 494 break; 495 default: 496 llvm_unreachable("getELFKind"); 497 } 498 } 499 500 template <class ELFT> void ELFFileBase::init(InputFile::Kind k) { 501 using Elf_Shdr = typename ELFT::Shdr; 502 using Elf_Sym = typename ELFT::Sym; 503 504 // Initialize trivial attributes. 505 const ELFFile<ELFT> &obj = getObj<ELFT>(); 506 emachine = obj.getHeader().e_machine; 507 osabi = obj.getHeader().e_ident[llvm::ELF::EI_OSABI]; 508 abiVersion = obj.getHeader().e_ident[llvm::ELF::EI_ABIVERSION]; 509 510 ArrayRef<Elf_Shdr> sections = CHECK2(obj.sections(), this); 511 elfShdrs = sections.data(); 512 numELFShdrs = sections.size(); 513 514 // Find a symbol table. 515 const Elf_Shdr *symtabSec = 516 findSection(sections, k == SharedKind ? SHT_DYNSYM : SHT_SYMTAB); 517 518 if (!symtabSec) 519 return; 520 521 // Initialize members corresponding to a symbol table. 522 firstGlobal = symtabSec->sh_info; 523 524 ArrayRef<Elf_Sym> eSyms = CHECK2(obj.symbols(symtabSec), this); 525 if (firstGlobal == 0 || firstGlobal > eSyms.size()) 526 Fatal(ctx) << this << ": invalid sh_info in symbol table"; 527 528 elfSyms = reinterpret_cast<const void *>(eSyms.data()); 529 numSymbols = eSyms.size(); 530 stringTable = CHECK2(obj.getStringTableForSymtab(*symtabSec, sections), this); 531 } 532 533 template <class ELFT> 534 uint32_t ObjFile<ELFT>::getSectionIndex(const Elf_Sym &sym) const { 535 return CHECK2( 536 this->getObj().getSectionIndex(sym, getELFSyms<ELFT>(), shndxTable), 537 this); 538 } 539 540 template <class ELFT> void ObjFile<ELFT>::parse(bool ignoreComdats) { 541 object::ELFFile<ELFT> obj = this->getObj(); 542 // Read a section table. justSymbols is usually false. 543 if (this->justSymbols) { 544 initializeJustSymbols(); 545 initializeSymbols(obj); 546 return; 547 } 548 549 // Handle dependent libraries and selection of section groups as these are not 550 // done in parallel. 551 ArrayRef<Elf_Shdr> objSections = getELFShdrs<ELFT>(); 552 StringRef shstrtab = CHECK2(obj.getSectionStringTable(objSections), this); 553 uint64_t size = objSections.size(); 554 sections.resize(size); 555 for (size_t i = 0; i != size; ++i) { 556 const Elf_Shdr &sec = objSections[i]; 557 if (LLVM_LIKELY(sec.sh_type == SHT_PROGBITS)) 558 continue; 559 if (LLVM_LIKELY(sec.sh_type == SHT_GROUP)) { 560 StringRef signature = getShtGroupSignature(objSections, sec); 561 ArrayRef<Elf_Word> entries = 562 CHECK2(obj.template getSectionContentsAsArray<Elf_Word>(sec), this); 563 if (entries.empty()) 564 Fatal(ctx) << this << ": empty SHT_GROUP"; 565 566 Elf_Word flag = entries[0]; 567 if (flag && flag != GRP_COMDAT) 568 Fatal(ctx) << this << ": unsupported SHT_GROUP format"; 569 570 bool keepGroup = !flag || ignoreComdats || 571 ctx.symtab->comdatGroups 572 .try_emplace(CachedHashStringRef(signature), this) 573 .second; 574 if (keepGroup) { 575 if (!ctx.arg.resolveGroups) 576 sections[i] = createInputSection( 577 i, sec, check(obj.getSectionName(sec, shstrtab))); 578 } else { 579 // Otherwise, discard group members. 580 for (uint32_t secIndex : entries.slice(1)) { 581 if (secIndex >= size) 582 Fatal(ctx) << this 583 << ": invalid section index in group: " << secIndex; 584 sections[secIndex] = &InputSection::discarded; 585 } 586 } 587 continue; 588 } 589 590 if (sec.sh_type == SHT_LLVM_DEPENDENT_LIBRARIES && !ctx.arg.relocatable) { 591 StringRef name = check(obj.getSectionName(sec, shstrtab)); 592 ArrayRef<char> data = CHECK2( 593 this->getObj().template getSectionContentsAsArray<char>(sec), this); 594 if (!data.empty() && data.back() != '\0') { 595 Err(ctx) 596 << this 597 << ": corrupted dependent libraries section (unterminated string): " 598 << name; 599 } else { 600 for (const char *d = data.begin(), *e = data.end(); d < e;) { 601 StringRef s(d); 602 addDependentLibrary(ctx, s, this); 603 d += s.size() + 1; 604 } 605 } 606 sections[i] = &InputSection::discarded; 607 continue; 608 } 609 610 switch (ctx.arg.emachine) { 611 case EM_ARM: 612 if (sec.sh_type == SHT_ARM_ATTRIBUTES) { 613 ARMAttributeParser attributes; 614 ArrayRef<uint8_t> contents = 615 check(this->getObj().getSectionContents(sec)); 616 StringRef name = check(obj.getSectionName(sec, shstrtab)); 617 sections[i] = &InputSection::discarded; 618 if (Error e = attributes.parse(contents, ekind == ELF32LEKind 619 ? llvm::endianness::little 620 : llvm::endianness::big)) { 621 InputSection isec(*this, sec, name); 622 Warn(ctx) << &isec << ": " << std::move(e); 623 } else { 624 updateSupportedARMFeatures(ctx, attributes); 625 updateARMVFPArgs(ctx, attributes, this); 626 627 // FIXME: Retain the first attribute section we see. The eglibc ARM 628 // dynamic loaders require the presence of an attribute section for 629 // dlopen to work. In a full implementation we would merge all 630 // attribute sections. 631 if (ctx.in.attributes == nullptr) { 632 ctx.in.attributes = 633 std::make_unique<InputSection>(*this, sec, name); 634 sections[i] = ctx.in.attributes.get(); 635 } 636 } 637 } 638 break; 639 case EM_AARCH64: 640 // FIXME: BuildAttributes have been implemented in llvm, but not yet in 641 // lld. Remove the section so that it does not accumulate in the output 642 // file. When support is implemented we expect not to output a build 643 // attributes section in files of type ET_EXEC or ET_SHARED, but ld -r 644 // ouptut will need a single merged attributes section. 645 if (sec.sh_type == SHT_AARCH64_ATTRIBUTES) 646 sections[i] = &InputSection::discarded; 647 // Producing a static binary with MTE globals is not currently supported, 648 // remove all SHT_AARCH64_MEMTAG_GLOBALS_STATIC sections as they're unused 649 // medatada, and we don't want them to end up in the output file for 650 // static executables. 651 if (sec.sh_type == SHT_AARCH64_MEMTAG_GLOBALS_STATIC && 652 !canHaveMemtagGlobals(ctx)) 653 sections[i] = &InputSection::discarded; 654 break; 655 } 656 } 657 658 // Read a symbol table. 659 initializeSymbols(obj); 660 } 661 662 // Sections with SHT_GROUP and comdat bits define comdat section groups. 663 // They are identified and deduplicated by group name. This function 664 // returns a group name. 665 template <class ELFT> 666 StringRef ObjFile<ELFT>::getShtGroupSignature(ArrayRef<Elf_Shdr> sections, 667 const Elf_Shdr &sec) { 668 typename ELFT::SymRange symbols = this->getELFSyms<ELFT>(); 669 if (sec.sh_info >= symbols.size()) 670 Fatal(ctx) << this << ": invalid symbol index"; 671 const typename ELFT::Sym &sym = symbols[sec.sh_info]; 672 return CHECK2(sym.getName(this->stringTable), this); 673 } 674 675 template <class ELFT> 676 bool ObjFile<ELFT>::shouldMerge(const Elf_Shdr &sec, StringRef name) { 677 // On a regular link we don't merge sections if -O0 (default is -O1). This 678 // sometimes makes the linker significantly faster, although the output will 679 // be bigger. 680 // 681 // Doing the same for -r would create a problem as it would combine sections 682 // with different sh_entsize. One option would be to just copy every SHF_MERGE 683 // section as is to the output. While this would produce a valid ELF file with 684 // usable SHF_MERGE sections, tools like (llvm-)?dwarfdump get confused when 685 // they see two .debug_str. We could have separate logic for combining 686 // SHF_MERGE sections based both on their name and sh_entsize, but that seems 687 // to be more trouble than it is worth. Instead, we just use the regular (-O1) 688 // logic for -r. 689 if (ctx.arg.optimize == 0 && !ctx.arg.relocatable) 690 return false; 691 692 // A mergeable section with size 0 is useless because they don't have 693 // any data to merge. A mergeable string section with size 0 can be 694 // argued as invalid because it doesn't end with a null character. 695 // We'll avoid a mess by handling them as if they were non-mergeable. 696 if (sec.sh_size == 0) 697 return false; 698 699 // Check for sh_entsize. The ELF spec is not clear about the zero 700 // sh_entsize. It says that "the member [sh_entsize] contains 0 if 701 // the section does not hold a table of fixed-size entries". We know 702 // that Rust 1.13 produces a string mergeable section with a zero 703 // sh_entsize. Here we just accept it rather than being picky about it. 704 uint64_t entSize = sec.sh_entsize; 705 if (entSize == 0) 706 return false; 707 if (sec.sh_size % entSize) 708 ErrAlways(ctx) << this << ":(" << name << "): SHF_MERGE section size (" 709 << uint64_t(sec.sh_size) 710 << ") must be a multiple of sh_entsize (" << entSize << ")"; 711 if (sec.sh_flags & SHF_WRITE) 712 Err(ctx) << this << ":(" << name 713 << "): writable SHF_MERGE section is not supported"; 714 715 return true; 716 } 717 718 // This is for --just-symbols. 719 // 720 // --just-symbols is a very minor feature that allows you to link your 721 // output against other existing program, so that if you load both your 722 // program and the other program into memory, your output can refer the 723 // other program's symbols. 724 // 725 // When the option is given, we link "just symbols". The section table is 726 // initialized with null pointers. 727 template <class ELFT> void ObjFile<ELFT>::initializeJustSymbols() { 728 sections.resize(numELFShdrs); 729 } 730 731 static bool isKnownSpecificSectionType(uint32_t t, uint32_t flags) { 732 if (SHT_LOUSER <= t && t <= SHT_HIUSER && !(flags & SHF_ALLOC)) 733 return true; 734 if (SHT_LOOS <= t && t <= SHT_HIOS && !(flags & SHF_OS_NONCONFORMING)) 735 return true; 736 // Allow all processor-specific types. This is different from GNU ld. 737 return SHT_LOPROC <= t && t <= SHT_HIPROC; 738 } 739 740 template <class ELFT> 741 void ObjFile<ELFT>::initializeSections(bool ignoreComdats, 742 const llvm::object::ELFFile<ELFT> &obj) { 743 ArrayRef<Elf_Shdr> objSections = getELFShdrs<ELFT>(); 744 StringRef shstrtab = CHECK2(obj.getSectionStringTable(objSections), this); 745 uint64_t size = objSections.size(); 746 SmallVector<ArrayRef<Elf_Word>, 0> selectedGroups; 747 for (size_t i = 0; i != size; ++i) { 748 if (this->sections[i] == &InputSection::discarded) 749 continue; 750 const Elf_Shdr &sec = objSections[i]; 751 const uint32_t type = sec.sh_type; 752 753 // SHF_EXCLUDE'ed sections are discarded by the linker. However, 754 // if -r is given, we'll let the final link discard such sections. 755 // This is compatible with GNU. 756 if ((sec.sh_flags & SHF_EXCLUDE) && !ctx.arg.relocatable) { 757 if (type == SHT_LLVM_CALL_GRAPH_PROFILE) 758 cgProfileSectionIndex = i; 759 if (type == SHT_LLVM_ADDRSIG) { 760 // We ignore the address-significance table if we know that the object 761 // file was created by objcopy or ld -r. This is because these tools 762 // will reorder the symbols in the symbol table, invalidating the data 763 // in the address-significance table, which refers to symbols by index. 764 if (sec.sh_link != 0) 765 this->addrsigSec = &sec; 766 else if (ctx.arg.icf == ICFLevel::Safe) 767 Warn(ctx) << this 768 << ": --icf=safe conservatively ignores " 769 "SHT_LLVM_ADDRSIG [index " 770 << i 771 << "] with sh_link=0 " 772 "(likely created using objcopy or ld -r)"; 773 } 774 this->sections[i] = &InputSection::discarded; 775 continue; 776 } 777 778 switch (type) { 779 case SHT_GROUP: { 780 if (!ctx.arg.relocatable) 781 sections[i] = &InputSection::discarded; 782 StringRef signature = 783 cantFail(this->getELFSyms<ELFT>()[sec.sh_info].getName(stringTable)); 784 ArrayRef<Elf_Word> entries = 785 cantFail(obj.template getSectionContentsAsArray<Elf_Word>(sec)); 786 if ((entries[0] & GRP_COMDAT) == 0 || ignoreComdats || 787 ctx.symtab->comdatGroups.find(CachedHashStringRef(signature)) 788 ->second == this) 789 selectedGroups.push_back(entries); 790 break; 791 } 792 case SHT_SYMTAB_SHNDX: 793 shndxTable = CHECK2(obj.getSHNDXTable(sec, objSections), this); 794 break; 795 case SHT_SYMTAB: 796 case SHT_STRTAB: 797 case SHT_REL: 798 case SHT_RELA: 799 case SHT_CREL: 800 case SHT_NULL: 801 break; 802 case SHT_PROGBITS: 803 case SHT_NOTE: 804 case SHT_NOBITS: 805 case SHT_INIT_ARRAY: 806 case SHT_FINI_ARRAY: 807 case SHT_PREINIT_ARRAY: 808 this->sections[i] = 809 createInputSection(i, sec, check(obj.getSectionName(sec, shstrtab))); 810 break; 811 case SHT_LLVM_LTO: 812 // Discard .llvm.lto in a relocatable link that does not use the bitcode. 813 // The concatenated output does not properly reflect the linking 814 // semantics. In addition, since we do not use the bitcode wrapper format, 815 // the concatenated raw bitcode would be invalid. 816 if (ctx.arg.relocatable && !ctx.arg.fatLTOObjects) { 817 sections[i] = &InputSection::discarded; 818 break; 819 } 820 [[fallthrough]]; 821 default: 822 this->sections[i] = 823 createInputSection(i, sec, check(obj.getSectionName(sec, shstrtab))); 824 if (type == SHT_LLVM_SYMPART) 825 ctx.hasSympart.store(true, std::memory_order_relaxed); 826 else if (ctx.arg.rejectMismatch && 827 !isKnownSpecificSectionType(type, sec.sh_flags)) 828 Err(ctx) << this->sections[i] << ": unknown section type 0x" 829 << Twine::utohexstr(type); 830 break; 831 } 832 } 833 834 // We have a second loop. It is used to: 835 // 1) handle SHF_LINK_ORDER sections. 836 // 2) create relocation sections. In some cases the section header index of a 837 // relocation section may be smaller than that of the relocated section. In 838 // such cases, the relocation section would attempt to reference a target 839 // section that has not yet been created. For simplicity, delay creation of 840 // relocation sections until now. 841 for (size_t i = 0; i != size; ++i) { 842 if (this->sections[i] == &InputSection::discarded) 843 continue; 844 const Elf_Shdr &sec = objSections[i]; 845 846 if (isStaticRelSecType(sec.sh_type)) { 847 // Find a relocation target section and associate this section with that. 848 // Target may have been discarded if it is in a different section group 849 // and the group is discarded, even though it's a violation of the spec. 850 // We handle that situation gracefully by discarding dangling relocation 851 // sections. 852 const uint32_t info = sec.sh_info; 853 InputSectionBase *s = getRelocTarget(i, info); 854 if (!s) 855 continue; 856 857 // ELF spec allows mergeable sections with relocations, but they are rare, 858 // and it is in practice hard to merge such sections by contents, because 859 // applying relocations at end of linking changes section contents. So, we 860 // simply handle such sections as non-mergeable ones. Degrading like this 861 // is acceptable because section merging is optional. 862 if (auto *ms = dyn_cast<MergeInputSection>(s)) { 863 s = makeThreadLocal<InputSection>(ms->file, ms->name, ms->type, 864 ms->flags, ms->addralign, ms->entsize, 865 ms->contentMaybeDecompress()); 866 sections[info] = s; 867 } 868 869 if (s->relSecIdx != 0) 870 ErrAlways(ctx) << s 871 << ": multiple relocation sections to one section are " 872 "not supported"; 873 s->relSecIdx = i; 874 875 // Relocation sections are usually removed from the output, so return 876 // `nullptr` for the normal case. However, if -r or --emit-relocs is 877 // specified, we need to copy them to the output. (Some post link analysis 878 // tools specify --emit-relocs to obtain the information.) 879 if (ctx.arg.copyRelocs) { 880 auto *isec = makeThreadLocal<InputSection>( 881 *this, sec, check(obj.getSectionName(sec, shstrtab))); 882 // If the relocated section is discarded (due to /DISCARD/ or 883 // --gc-sections), the relocation section should be discarded as well. 884 s->dependentSections.push_back(isec); 885 sections[i] = isec; 886 } 887 continue; 888 } 889 890 // A SHF_LINK_ORDER section with sh_link=0 is handled as if it did not have 891 // the flag. 892 if (!sec.sh_link || !(sec.sh_flags & SHF_LINK_ORDER)) 893 continue; 894 895 InputSectionBase *linkSec = nullptr; 896 if (sec.sh_link < size) 897 linkSec = this->sections[sec.sh_link]; 898 if (!linkSec) { 899 ErrAlways(ctx) << this 900 << ": invalid sh_link index: " << uint32_t(sec.sh_link); 901 continue; 902 } 903 904 // A SHF_LINK_ORDER section is discarded if its linked-to section is 905 // discarded. 906 InputSection *isec = cast<InputSection>(this->sections[i]); 907 linkSec->dependentSections.push_back(isec); 908 if (!isa<InputSection>(linkSec)) 909 ErrAlways(ctx) 910 << "a section " << isec->name 911 << " with SHF_LINK_ORDER should not refer a non-regular section: " 912 << linkSec; 913 } 914 915 for (ArrayRef<Elf_Word> entries : selectedGroups) 916 handleSectionGroup<ELFT>(this->sections, entries); 917 } 918 919 template <typename ELFT> 920 static void parseGnuPropertyNote(Ctx &ctx, ELFFileBase &f, 921 uint32_t featureAndType, 922 ArrayRef<uint8_t> &desc, const uint8_t *base, 923 ArrayRef<uint8_t> *data = nullptr) { 924 auto err = [&](const uint8_t *place) -> ELFSyncStream { 925 auto diag = Err(ctx); 926 diag << &f << ":(" << ".note.gnu.property+0x" 927 << Twine::utohexstr(place - base) << "): "; 928 return diag; 929 }; 930 931 while (!desc.empty()) { 932 const uint8_t *place = desc.data(); 933 if (desc.size() < 8) 934 return void(err(place) << "program property is too short"); 935 uint32_t type = read32<ELFT::Endianness>(desc.data()); 936 uint32_t size = read32<ELFT::Endianness>(desc.data() + 4); 937 desc = desc.slice(8); 938 if (desc.size() < size) 939 return void(err(place) << "program property is too short"); 940 941 if (type == featureAndType) { 942 // We found a FEATURE_1_AND field. There may be more than one of these 943 // in a .note.gnu.property section, for a relocatable object we 944 // accumulate the bits set. 945 if (size < 4) 946 return void(err(place) << "FEATURE_1_AND entry is too short"); 947 f.andFeatures |= read32<ELFT::Endianness>(desc.data()); 948 } else if (ctx.arg.emachine == EM_AARCH64 && 949 type == GNU_PROPERTY_AARCH64_FEATURE_PAUTH) { 950 ArrayRef<uint8_t> contents = data ? *data : desc; 951 if (f.aarch64PauthAbiCoreInfo) { 952 return void( 953 err(contents.data()) 954 << "multiple GNU_PROPERTY_AARCH64_FEATURE_PAUTH entries are " 955 "not supported"); 956 } else if (size != 16) { 957 return void(err(contents.data()) 958 << "GNU_PROPERTY_AARCH64_FEATURE_PAUTH entry " 959 "is invalid: expected 16 bytes, but got " 960 << size); 961 } 962 f.aarch64PauthAbiCoreInfo = { 963 support::endian::read64<ELFT::Endianness>(&desc[0]), 964 support::endian::read64<ELFT::Endianness>(&desc[8])}; 965 } 966 967 // Padding is present in the note descriptor, if necessary. 968 desc = desc.slice(alignTo<(ELFT::Is64Bits ? 8 : 4)>(size)); 969 } 970 } 971 // Read the following info from the .note.gnu.property section and write it to 972 // the corresponding fields in `ObjFile`: 973 // - Feature flags (32 bits) representing x86, AArch64 or RISC-V features for 974 // hardware-assisted call flow control; 975 // - AArch64 PAuth ABI core info (16 bytes). 976 template <class ELFT> 977 static void readGnuProperty(Ctx &ctx, const InputSection &sec, 978 ObjFile<ELFT> &f) { 979 using Elf_Nhdr = typename ELFT::Nhdr; 980 using Elf_Note = typename ELFT::Note; 981 982 uint32_t featureAndType; 983 switch (ctx.arg.emachine) { 984 case EM_386: 985 case EM_X86_64: 986 featureAndType = GNU_PROPERTY_X86_FEATURE_1_AND; 987 break; 988 case EM_AARCH64: 989 featureAndType = GNU_PROPERTY_AARCH64_FEATURE_1_AND; 990 break; 991 case EM_RISCV: 992 featureAndType = GNU_PROPERTY_RISCV_FEATURE_1_AND; 993 break; 994 default: 995 return; 996 } 997 998 ArrayRef<uint8_t> data = sec.content(); 999 auto err = [&](const uint8_t *place) -> ELFSyncStream { 1000 auto diag = Err(ctx); 1001 diag << sec.file << ":(" << sec.name << "+0x" 1002 << Twine::utohexstr(place - sec.content().data()) << "): "; 1003 return diag; 1004 }; 1005 while (!data.empty()) { 1006 // Read one NOTE record. 1007 auto *nhdr = reinterpret_cast<const Elf_Nhdr *>(data.data()); 1008 if (data.size() < sizeof(Elf_Nhdr) || 1009 data.size() < nhdr->getSize(sec.addralign)) 1010 return void(err(data.data()) << "data is too short"); 1011 1012 Elf_Note note(*nhdr); 1013 if (nhdr->n_type != NT_GNU_PROPERTY_TYPE_0 || note.getName() != "GNU") { 1014 data = data.slice(nhdr->getSize(sec.addralign)); 1015 continue; 1016 } 1017 1018 // Read a body of a NOTE record, which consists of type-length-value fields. 1019 ArrayRef<uint8_t> desc = note.getDesc(sec.addralign); 1020 const uint8_t *base = sec.content().data(); 1021 parseGnuPropertyNote<ELFT>(ctx, f, featureAndType, desc, base, &data); 1022 1023 // Go to next NOTE record to look for more FEATURE_1_AND descriptions. 1024 data = data.slice(nhdr->getSize(sec.addralign)); 1025 } 1026 } 1027 1028 template <class ELFT> 1029 InputSectionBase *ObjFile<ELFT>::getRelocTarget(uint32_t idx, uint32_t info) { 1030 if (info < this->sections.size()) { 1031 InputSectionBase *target = this->sections[info]; 1032 1033 // Strictly speaking, a relocation section must be included in the 1034 // group of the section it relocates. However, LLVM 3.3 and earlier 1035 // would fail to do so, so we gracefully handle that case. 1036 if (target == &InputSection::discarded) 1037 return nullptr; 1038 1039 if (target != nullptr) 1040 return target; 1041 } 1042 1043 Err(ctx) << this << ": relocation section (index " << idx 1044 << ") has invalid sh_info (" << info << ')'; 1045 return nullptr; 1046 } 1047 1048 // The function may be called concurrently for different input files. For 1049 // allocation, prefer makeThreadLocal which does not require holding a lock. 1050 template <class ELFT> 1051 InputSectionBase *ObjFile<ELFT>::createInputSection(uint32_t idx, 1052 const Elf_Shdr &sec, 1053 StringRef name) { 1054 if (name.starts_with(".n")) { 1055 // The GNU linker uses .note.GNU-stack section as a marker indicating 1056 // that the code in the object file does not expect that the stack is 1057 // executable (in terms of NX bit). If all input files have the marker, 1058 // the GNU linker adds a PT_GNU_STACK segment to tells the loader to 1059 // make the stack non-executable. Most object files have this section as 1060 // of 2017. 1061 // 1062 // But making the stack non-executable is a norm today for security 1063 // reasons. Failure to do so may result in a serious security issue. 1064 // Therefore, we make LLD always add PT_GNU_STACK unless it is 1065 // explicitly told to do otherwise (by -z execstack). Because the stack 1066 // executable-ness is controlled solely by command line options, 1067 // .note.GNU-stack sections are, with one exception, ignored. Report 1068 // an error if we encounter an executable .note.GNU-stack to force the 1069 // user to explicitly request an executable stack. 1070 if (name == ".note.GNU-stack") { 1071 if ((sec.sh_flags & SHF_EXECINSTR) && !ctx.arg.relocatable && 1072 ctx.arg.zGnustack != GnuStackKind::Exec) { 1073 Err(ctx) << this 1074 << ": requires an executable stack, but -z execstack is not " 1075 "specified"; 1076 } 1077 return &InputSection::discarded; 1078 } 1079 1080 // Object files that use processor features such as Intel Control-Flow 1081 // Enforcement (CET), AArch64 Branch Target Identification BTI or RISC-V 1082 // Zicfilp/Zicfiss extensions, use a .note.gnu.property section containing 1083 // a bitfield of feature bits like the GNU_PROPERTY_X86_FEATURE_1_IBT flag. 1084 // 1085 // Since we merge bitmaps from multiple object files to create a new 1086 // .note.gnu.property containing a single AND'ed bitmap, we discard an input 1087 // file's .note.gnu.property section. 1088 if (name == ".note.gnu.property") { 1089 readGnuProperty<ELFT>(ctx, InputSection(*this, sec, name), *this); 1090 return &InputSection::discarded; 1091 } 1092 1093 // Split stacks is a feature to support a discontiguous stack, 1094 // commonly used in the programming language Go. For the details, 1095 // see https://gcc.gnu.org/wiki/SplitStacks. An object file compiled 1096 // for split stack will include a .note.GNU-split-stack section. 1097 if (name == ".note.GNU-split-stack") { 1098 if (ctx.arg.relocatable) { 1099 ErrAlways(ctx) << "cannot mix split-stack and non-split-stack in a " 1100 "relocatable link"; 1101 return &InputSection::discarded; 1102 } 1103 this->splitStack = true; 1104 return &InputSection::discarded; 1105 } 1106 1107 // An object file compiled for split stack, but where some of the 1108 // functions were compiled with the no_split_stack_attribute will 1109 // include a .note.GNU-no-split-stack section. 1110 if (name == ".note.GNU-no-split-stack") { 1111 this->someNoSplitStack = true; 1112 return &InputSection::discarded; 1113 } 1114 1115 // Strip existing .note.gnu.build-id sections so that the output won't have 1116 // more than one build-id. This is not usually a problem because input 1117 // object files normally don't have .build-id sections, but you can create 1118 // such files by "ld.{bfd,gold,lld} -r --build-id", and we want to guard 1119 // against it. 1120 if (name == ".note.gnu.build-id") 1121 return &InputSection::discarded; 1122 } 1123 1124 // The linker merges EH (exception handling) frames and creates a 1125 // .eh_frame_hdr section for runtime. So we handle them with a special 1126 // class. For relocatable outputs, they are just passed through. 1127 if (name == ".eh_frame" && !ctx.arg.relocatable) 1128 return makeThreadLocal<EhInputSection>(*this, sec, name); 1129 1130 if ((sec.sh_flags & SHF_MERGE) && shouldMerge(sec, name)) 1131 return makeThreadLocal<MergeInputSection>(*this, sec, name); 1132 return makeThreadLocal<InputSection>(*this, sec, name); 1133 } 1134 1135 // Initialize symbols. symbols is a parallel array to the corresponding ELF 1136 // symbol table. 1137 template <class ELFT> 1138 void ObjFile<ELFT>::initializeSymbols(const object::ELFFile<ELFT> &obj) { 1139 ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>(); 1140 if (!symbols) 1141 symbols = std::make_unique<Symbol *[]>(numSymbols); 1142 1143 // Some entries have been filled by LazyObjFile. 1144 auto *symtab = ctx.symtab.get(); 1145 for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) 1146 if (!symbols[i]) 1147 symbols[i] = symtab->insert(CHECK2(eSyms[i].getName(stringTable), this)); 1148 1149 // Perform symbol resolution on non-local symbols. 1150 SmallVector<unsigned, 32> undefineds; 1151 for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) { 1152 const Elf_Sym &eSym = eSyms[i]; 1153 uint32_t secIdx = eSym.st_shndx; 1154 if (secIdx == SHN_UNDEF) { 1155 undefineds.push_back(i); 1156 continue; 1157 } 1158 1159 uint8_t binding = eSym.getBinding(); 1160 uint8_t stOther = eSym.st_other; 1161 uint8_t type = eSym.getType(); 1162 uint64_t value = eSym.st_value; 1163 uint64_t size = eSym.st_size; 1164 1165 Symbol *sym = symbols[i]; 1166 sym->isUsedInRegularObj = true; 1167 if (LLVM_UNLIKELY(eSym.st_shndx == SHN_COMMON)) { 1168 if (value == 0 || value >= UINT32_MAX) 1169 Err(ctx) << this << ": common symbol '" << sym->getName() 1170 << "' has invalid alignment: " << value; 1171 hasCommonSyms = true; 1172 sym->resolve(ctx, CommonSymbol{ctx, this, StringRef(), binding, stOther, 1173 type, value, size}); 1174 continue; 1175 } 1176 1177 // Handle global defined symbols. Defined::section will be set in postParse. 1178 sym->resolve(ctx, Defined{ctx, this, StringRef(), binding, stOther, type, 1179 value, size, nullptr}); 1180 } 1181 1182 // Undefined symbols (excluding those defined relative to non-prevailing 1183 // sections) can trigger recursive extract. Process defined symbols first so 1184 // that the relative order between a defined symbol and an undefined symbol 1185 // does not change the symbol resolution behavior. In addition, a set of 1186 // interconnected symbols will all be resolved to the same file, instead of 1187 // being resolved to different files. 1188 for (unsigned i : undefineds) { 1189 const Elf_Sym &eSym = eSyms[i]; 1190 Symbol *sym = symbols[i]; 1191 sym->resolve(ctx, Undefined{this, StringRef(), eSym.getBinding(), 1192 eSym.st_other, eSym.getType()}); 1193 sym->isUsedInRegularObj = true; 1194 sym->referenced = true; 1195 } 1196 } 1197 1198 template <class ELFT> 1199 void ObjFile<ELFT>::initSectionsAndLocalSyms(bool ignoreComdats) { 1200 if (!justSymbols) 1201 initializeSections(ignoreComdats, getObj()); 1202 1203 if (!firstGlobal) 1204 return; 1205 SymbolUnion *locals = makeThreadLocalN<SymbolUnion>(firstGlobal); 1206 memset(locals, 0, sizeof(SymbolUnion) * firstGlobal); 1207 1208 ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>(); 1209 for (size_t i = 0, end = firstGlobal; i != end; ++i) { 1210 const Elf_Sym &eSym = eSyms[i]; 1211 uint32_t secIdx = eSym.st_shndx; 1212 if (LLVM_UNLIKELY(secIdx == SHN_XINDEX)) 1213 secIdx = check(getExtendedSymbolTableIndex<ELFT>(eSym, i, shndxTable)); 1214 else if (secIdx >= SHN_LORESERVE) 1215 secIdx = 0; 1216 if (LLVM_UNLIKELY(secIdx >= sections.size())) { 1217 Err(ctx) << this << ": invalid section index: " << secIdx; 1218 secIdx = 0; 1219 } 1220 if (LLVM_UNLIKELY(eSym.getBinding() != STB_LOCAL)) 1221 ErrAlways(ctx) << this << ": non-local symbol (" << i 1222 << ") found at index < .symtab's sh_info (" << end << ")"; 1223 1224 InputSectionBase *sec = sections[secIdx]; 1225 uint8_t type = eSym.getType(); 1226 if (type == STT_FILE) 1227 sourceFile = CHECK2(eSym.getName(stringTable), this); 1228 unsigned stName = eSym.st_name; 1229 if (LLVM_UNLIKELY(stringTable.size() <= stName)) { 1230 Err(ctx) << this << ": invalid symbol name offset"; 1231 stName = 0; 1232 } 1233 StringRef name(stringTable.data() + stName); 1234 1235 symbols[i] = reinterpret_cast<Symbol *>(locals + i); 1236 if (eSym.st_shndx == SHN_UNDEF || sec == &InputSection::discarded) 1237 new (symbols[i]) Undefined(this, name, STB_LOCAL, eSym.st_other, type, 1238 /*discardedSecIdx=*/secIdx); 1239 else 1240 new (symbols[i]) Defined(ctx, this, name, STB_LOCAL, eSym.st_other, type, 1241 eSym.st_value, eSym.st_size, sec); 1242 symbols[i]->partition = 1; 1243 symbols[i]->isUsedInRegularObj = true; 1244 } 1245 } 1246 1247 // Called after all ObjFile::parse is called for all ObjFiles. This checks 1248 // duplicate symbols and may do symbol property merge in the future. 1249 template <class ELFT> void ObjFile<ELFT>::postParse() { 1250 static std::mutex mu; 1251 ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>(); 1252 for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) { 1253 const Elf_Sym &eSym = eSyms[i]; 1254 Symbol &sym = *symbols[i]; 1255 uint32_t secIdx = eSym.st_shndx; 1256 uint8_t binding = eSym.getBinding(); 1257 if (LLVM_UNLIKELY(binding != STB_GLOBAL && binding != STB_WEAK && 1258 binding != STB_GNU_UNIQUE)) 1259 Err(ctx) << this << ": symbol (" << i 1260 << ") has invalid binding: " << (int)binding; 1261 1262 // st_value of STT_TLS represents the assigned offset, not the actual 1263 // address which is used by STT_FUNC and STT_OBJECT. STT_TLS symbols can 1264 // only be referenced by special TLS relocations. It is usually an error if 1265 // a STT_TLS symbol is replaced by a non-STT_TLS symbol, vice versa. 1266 if (LLVM_UNLIKELY(sym.isTls()) && eSym.getType() != STT_TLS && 1267 eSym.getType() != STT_NOTYPE) 1268 Err(ctx) << "TLS attribute mismatch: " << &sym << "\n>>> in " << sym.file 1269 << "\n>>> in " << this; 1270 1271 // Handle non-COMMON defined symbol below. !sym.file allows a symbol 1272 // assignment to redefine a symbol without an error. 1273 if (!sym.isDefined() || secIdx == SHN_UNDEF) 1274 continue; 1275 if (LLVM_UNLIKELY(secIdx >= SHN_LORESERVE)) { 1276 if (secIdx == SHN_COMMON) 1277 continue; 1278 if (secIdx == SHN_XINDEX) 1279 secIdx = check(getExtendedSymbolTableIndex<ELFT>(eSym, i, shndxTable)); 1280 else 1281 secIdx = 0; 1282 } 1283 1284 if (LLVM_UNLIKELY(secIdx >= sections.size())) { 1285 Err(ctx) << this << ": invalid section index: " << secIdx; 1286 continue; 1287 } 1288 InputSectionBase *sec = sections[secIdx]; 1289 if (sec == &InputSection::discarded) { 1290 if (sym.traced) { 1291 printTraceSymbol(Undefined{this, sym.getName(), sym.binding, 1292 sym.stOther, sym.type, secIdx}, 1293 sym.getName()); 1294 } 1295 if (sym.file == this) { 1296 std::lock_guard<std::mutex> lock(mu); 1297 ctx.nonPrevailingSyms.emplace_back(&sym, secIdx); 1298 } 1299 continue; 1300 } 1301 1302 if (sym.file == this) { 1303 cast<Defined>(sym).section = sec; 1304 continue; 1305 } 1306 1307 if (sym.binding == STB_WEAK || binding == STB_WEAK) 1308 continue; 1309 std::lock_guard<std::mutex> lock(mu); 1310 ctx.duplicates.push_back({&sym, this, sec, eSym.st_value}); 1311 } 1312 } 1313 1314 // The handling of tentative definitions (COMMON symbols) in archives is murky. 1315 // A tentative definition will be promoted to a global definition if there are 1316 // no non-tentative definitions to dominate it. When we hold a tentative 1317 // definition to a symbol and are inspecting archive members for inclusion 1318 // there are 2 ways we can proceed: 1319 // 1320 // 1) Consider the tentative definition a 'real' definition (ie promotion from 1321 // tentative to real definition has already happened) and not inspect 1322 // archive members for Global/Weak definitions to replace the tentative 1323 // definition. An archive member would only be included if it satisfies some 1324 // other undefined symbol. This is the behavior Gold uses. 1325 // 1326 // 2) Consider the tentative definition as still undefined (ie the promotion to 1327 // a real definition happens only after all symbol resolution is done). 1328 // The linker searches archive members for STB_GLOBAL definitions to 1329 // replace the tentative definition with. This is the behavior used by 1330 // GNU ld. 1331 // 1332 // The second behavior is inherited from SysVR4, which based it on the FORTRAN 1333 // COMMON BLOCK model. This behavior is needed for proper initialization in old 1334 // (pre F90) FORTRAN code that is packaged into an archive. 1335 // 1336 // The following functions search archive members for definitions to replace 1337 // tentative definitions (implementing behavior 2). 1338 static bool isBitcodeNonCommonDef(MemoryBufferRef mb, StringRef symName, 1339 StringRef archiveName) { 1340 IRSymtabFile symtabFile = check(readIRSymtab(mb)); 1341 for (const irsymtab::Reader::SymbolRef &sym : 1342 symtabFile.TheReader.symbols()) { 1343 if (sym.isGlobal() && sym.getName() == symName) 1344 return !sym.isUndefined() && !sym.isWeak() && !sym.isCommon(); 1345 } 1346 return false; 1347 } 1348 1349 template <class ELFT> 1350 static bool isNonCommonDef(Ctx &ctx, ELFKind ekind, MemoryBufferRef mb, 1351 StringRef symName, StringRef archiveName) { 1352 ObjFile<ELFT> *obj = make<ObjFile<ELFT>>(ctx, ekind, mb, archiveName); 1353 obj->init(); 1354 StringRef stringtable = obj->getStringTable(); 1355 1356 for (auto sym : obj->template getGlobalELFSyms<ELFT>()) { 1357 Expected<StringRef> name = sym.getName(stringtable); 1358 if (name && name.get() == symName) 1359 return sym.isDefined() && sym.getBinding() == STB_GLOBAL && 1360 !sym.isCommon(); 1361 } 1362 return false; 1363 } 1364 1365 static bool isNonCommonDef(Ctx &ctx, MemoryBufferRef mb, StringRef symName, 1366 StringRef archiveName) { 1367 switch (getELFKind(ctx, mb, archiveName)) { 1368 case ELF32LEKind: 1369 return isNonCommonDef<ELF32LE>(ctx, ELF32LEKind, mb, symName, archiveName); 1370 case ELF32BEKind: 1371 return isNonCommonDef<ELF32BE>(ctx, ELF32BEKind, mb, symName, archiveName); 1372 case ELF64LEKind: 1373 return isNonCommonDef<ELF64LE>(ctx, ELF64LEKind, mb, symName, archiveName); 1374 case ELF64BEKind: 1375 return isNonCommonDef<ELF64BE>(ctx, ELF64BEKind, mb, symName, archiveName); 1376 default: 1377 llvm_unreachable("getELFKind"); 1378 } 1379 } 1380 1381 SharedFile::SharedFile(Ctx &ctx, MemoryBufferRef m, StringRef defaultSoName) 1382 : ELFFileBase(ctx, SharedKind, getELFKind(ctx, m, ""), m), 1383 soName(defaultSoName), isNeeded(!ctx.arg.asNeeded) {} 1384 1385 // Parse the version definitions in the object file if present, and return a 1386 // vector whose nth element contains a pointer to the Elf_Verdef for version 1387 // identifier n. Version identifiers that are not definitions map to nullptr. 1388 template <typename ELFT> 1389 static SmallVector<const void *, 0> 1390 parseVerdefs(const uint8_t *base, const typename ELFT::Shdr *sec) { 1391 if (!sec) 1392 return {}; 1393 1394 // Build the Verdefs array by following the chain of Elf_Verdef objects 1395 // from the start of the .gnu.version_d section. 1396 SmallVector<const void *, 0> verdefs; 1397 const uint8_t *verdef = base + sec->sh_offset; 1398 for (unsigned i = 0, e = sec->sh_info; i != e; ++i) { 1399 auto *curVerdef = reinterpret_cast<const typename ELFT::Verdef *>(verdef); 1400 verdef += curVerdef->vd_next; 1401 unsigned verdefIndex = curVerdef->vd_ndx; 1402 if (verdefIndex >= verdefs.size()) 1403 verdefs.resize(verdefIndex + 1); 1404 verdefs[verdefIndex] = curVerdef; 1405 } 1406 return verdefs; 1407 } 1408 1409 // Parse SHT_GNU_verneed to properly set the name of a versioned undefined 1410 // symbol. We detect fatal issues which would cause vulnerabilities, but do not 1411 // implement sophisticated error checking like in llvm-readobj because the value 1412 // of such diagnostics is low. 1413 template <typename ELFT> 1414 std::vector<uint32_t> SharedFile::parseVerneed(const ELFFile<ELFT> &obj, 1415 const typename ELFT::Shdr *sec) { 1416 if (!sec) 1417 return {}; 1418 std::vector<uint32_t> verneeds; 1419 ArrayRef<uint8_t> data = CHECK2(obj.getSectionContents(*sec), this); 1420 const uint8_t *verneedBuf = data.begin(); 1421 for (unsigned i = 0; i != sec->sh_info; ++i) { 1422 if (verneedBuf + sizeof(typename ELFT::Verneed) > data.end()) { 1423 Err(ctx) << this << " has an invalid Verneed"; 1424 break; 1425 } 1426 auto *vn = reinterpret_cast<const typename ELFT::Verneed *>(verneedBuf); 1427 const uint8_t *vernauxBuf = verneedBuf + vn->vn_aux; 1428 for (unsigned j = 0; j != vn->vn_cnt; ++j) { 1429 if (vernauxBuf + sizeof(typename ELFT::Vernaux) > data.end()) { 1430 Err(ctx) << this << " has an invalid Vernaux"; 1431 break; 1432 } 1433 auto *aux = reinterpret_cast<const typename ELFT::Vernaux *>(vernauxBuf); 1434 if (aux->vna_name >= this->stringTable.size()) { 1435 Err(ctx) << this << " has a Vernaux with an invalid vna_name"; 1436 break; 1437 } 1438 uint16_t version = aux->vna_other & VERSYM_VERSION; 1439 if (version >= verneeds.size()) 1440 verneeds.resize(version + 1); 1441 verneeds[version] = aux->vna_name; 1442 vernauxBuf += aux->vna_next; 1443 } 1444 verneedBuf += vn->vn_next; 1445 } 1446 return verneeds; 1447 } 1448 1449 // Parse PT_GNU_PROPERTY segments in DSO. The process is similar to 1450 // readGnuProperty, but we don't have the InputSection information. 1451 template <typename ELFT> 1452 void SharedFile::parseGnuAndFeatures(const ELFFile<ELFT> &obj) { 1453 if (ctx.arg.emachine != EM_AARCH64) 1454 return; 1455 const uint8_t *base = obj.base(); 1456 auto phdrs = CHECK2(obj.program_headers(), this); 1457 for (auto phdr : phdrs) { 1458 if (phdr.p_type != PT_GNU_PROPERTY) 1459 continue; 1460 typename ELFT::Note note( 1461 *reinterpret_cast<const typename ELFT::Nhdr *>(base + phdr.p_offset)); 1462 if (note.getType() != NT_GNU_PROPERTY_TYPE_0 || note.getName() != "GNU") 1463 continue; 1464 1465 ArrayRef<uint8_t> desc = note.getDesc(phdr.p_align); 1466 parseGnuPropertyNote<ELFT>(ctx, *this, GNU_PROPERTY_AARCH64_FEATURE_1_AND, 1467 desc, base); 1468 } 1469 } 1470 1471 // We do not usually care about alignments of data in shared object 1472 // files because the loader takes care of it. However, if we promote a 1473 // DSO symbol to point to .bss due to copy relocation, we need to keep 1474 // the original alignment requirements. We infer it in this function. 1475 template <typename ELFT> 1476 static uint64_t getAlignment(ArrayRef<typename ELFT::Shdr> sections, 1477 const typename ELFT::Sym &sym) { 1478 uint64_t ret = UINT64_MAX; 1479 if (sym.st_value) 1480 ret = 1ULL << llvm::countr_zero((uint64_t)sym.st_value); 1481 if (0 < sym.st_shndx && sym.st_shndx < sections.size()) 1482 ret = std::min<uint64_t>(ret, sections[sym.st_shndx].sh_addralign); 1483 return (ret > UINT32_MAX) ? 0 : ret; 1484 } 1485 1486 // Fully parse the shared object file. 1487 // 1488 // This function parses symbol versions. If a DSO has version information, 1489 // the file has a ".gnu.version_d" section which contains symbol version 1490 // definitions. Each symbol is associated to one version through a table in 1491 // ".gnu.version" section. That table is a parallel array for the symbol 1492 // table, and each table entry contains an index in ".gnu.version_d". 1493 // 1494 // The special index 0 is reserved for VERF_NDX_LOCAL and 1 is for 1495 // VER_NDX_GLOBAL. There's no table entry for these special versions in 1496 // ".gnu.version_d". 1497 // 1498 // The file format for symbol versioning is perhaps a bit more complicated 1499 // than necessary, but you can easily understand the code if you wrap your 1500 // head around the data structure described above. 1501 template <class ELFT> void SharedFile::parse() { 1502 using Elf_Dyn = typename ELFT::Dyn; 1503 using Elf_Shdr = typename ELFT::Shdr; 1504 using Elf_Sym = typename ELFT::Sym; 1505 using Elf_Verdef = typename ELFT::Verdef; 1506 using Elf_Versym = typename ELFT::Versym; 1507 1508 ArrayRef<Elf_Dyn> dynamicTags; 1509 const ELFFile<ELFT> obj = this->getObj<ELFT>(); 1510 ArrayRef<Elf_Shdr> sections = getELFShdrs<ELFT>(); 1511 1512 const Elf_Shdr *versymSec = nullptr; 1513 const Elf_Shdr *verdefSec = nullptr; 1514 const Elf_Shdr *verneedSec = nullptr; 1515 symbols = std::make_unique<Symbol *[]>(numSymbols); 1516 1517 // Search for .dynsym, .dynamic, .symtab, .gnu.version and .gnu.version_d. 1518 for (const Elf_Shdr &sec : sections) { 1519 switch (sec.sh_type) { 1520 default: 1521 continue; 1522 case SHT_DYNAMIC: 1523 dynamicTags = 1524 CHECK2(obj.template getSectionContentsAsArray<Elf_Dyn>(sec), this); 1525 break; 1526 case SHT_GNU_versym: 1527 versymSec = &sec; 1528 break; 1529 case SHT_GNU_verdef: 1530 verdefSec = &sec; 1531 break; 1532 case SHT_GNU_verneed: 1533 verneedSec = &sec; 1534 break; 1535 } 1536 } 1537 1538 if (versymSec && numSymbols == 0) { 1539 ErrAlways(ctx) << "SHT_GNU_versym should be associated with symbol table"; 1540 return; 1541 } 1542 1543 // Search for a DT_SONAME tag to initialize this->soName. 1544 for (const Elf_Dyn &dyn : dynamicTags) { 1545 if (dyn.d_tag == DT_NEEDED) { 1546 uint64_t val = dyn.getVal(); 1547 if (val >= this->stringTable.size()) { 1548 Err(ctx) << this << ": invalid DT_NEEDED entry"; 1549 return; 1550 } 1551 dtNeeded.push_back(this->stringTable.data() + val); 1552 } else if (dyn.d_tag == DT_SONAME) { 1553 uint64_t val = dyn.getVal(); 1554 if (val >= this->stringTable.size()) { 1555 Err(ctx) << this << ": invalid DT_SONAME entry"; 1556 return; 1557 } 1558 soName = this->stringTable.data() + val; 1559 } 1560 } 1561 1562 // DSOs are uniquified not by filename but by soname. 1563 StringSaver &ss = ctx.saver; 1564 DenseMap<CachedHashStringRef, SharedFile *>::iterator it; 1565 bool wasInserted; 1566 std::tie(it, wasInserted) = 1567 ctx.symtab->soNames.try_emplace(CachedHashStringRef(soName), this); 1568 1569 // If a DSO appears more than once on the command line with and without 1570 // --as-needed, --no-as-needed takes precedence over --as-needed because a 1571 // user can add an extra DSO with --no-as-needed to force it to be added to 1572 // the dependency list. 1573 it->second->isNeeded |= isNeeded; 1574 if (!wasInserted) 1575 return; 1576 1577 ctx.sharedFiles.push_back(this); 1578 1579 verdefs = parseVerdefs<ELFT>(obj.base(), verdefSec); 1580 std::vector<uint32_t> verneeds = parseVerneed<ELFT>(obj, verneedSec); 1581 parseGnuAndFeatures<ELFT>(obj); 1582 1583 // Parse ".gnu.version" section which is a parallel array for the symbol 1584 // table. If a given file doesn't have a ".gnu.version" section, we use 1585 // VER_NDX_GLOBAL. 1586 size_t size = numSymbols - firstGlobal; 1587 std::vector<uint16_t> versyms(size, VER_NDX_GLOBAL); 1588 if (versymSec) { 1589 ArrayRef<Elf_Versym> versym = 1590 CHECK2(obj.template getSectionContentsAsArray<Elf_Versym>(*versymSec), 1591 this) 1592 .slice(firstGlobal); 1593 for (size_t i = 0; i < size; ++i) 1594 versyms[i] = versym[i].vs_index; 1595 } 1596 1597 // System libraries can have a lot of symbols with versions. Using a 1598 // fixed buffer for computing the versions name (foo@ver) can save a 1599 // lot of allocations. 1600 SmallString<0> versionedNameBuffer; 1601 1602 // Add symbols to the symbol table. 1603 ArrayRef<Elf_Sym> syms = this->getGlobalELFSyms<ELFT>(); 1604 for (size_t i = 0, e = syms.size(); i != e; ++i) { 1605 const Elf_Sym &sym = syms[i]; 1606 1607 // ELF spec requires that all local symbols precede weak or global 1608 // symbols in each symbol table, and the index of first non-local symbol 1609 // is stored to sh_info. If a local symbol appears after some non-local 1610 // symbol, that's a violation of the spec. 1611 StringRef name = CHECK2(sym.getName(stringTable), this); 1612 if (sym.getBinding() == STB_LOCAL) { 1613 Err(ctx) << this << ": invalid local symbol '" << name 1614 << "' in global part of symbol table"; 1615 continue; 1616 } 1617 1618 const uint16_t ver = versyms[i], idx = ver & ~VERSYM_HIDDEN; 1619 if (sym.isUndefined()) { 1620 // For unversioned undefined symbols, VER_NDX_GLOBAL makes more sense but 1621 // as of binutils 2.34, GNU ld produces VER_NDX_LOCAL. 1622 if (ver != VER_NDX_LOCAL && ver != VER_NDX_GLOBAL) { 1623 if (idx >= verneeds.size()) { 1624 ErrAlways(ctx) << "corrupt input file: version need index " << idx 1625 << " for symbol " << name 1626 << " is out of bounds\n>>> defined in " << this; 1627 continue; 1628 } 1629 StringRef verName = stringTable.data() + verneeds[idx]; 1630 versionedNameBuffer.clear(); 1631 name = ss.save((name + "@" + verName).toStringRef(versionedNameBuffer)); 1632 } 1633 Symbol *s = ctx.symtab->addSymbol( 1634 Undefined{this, name, sym.getBinding(), sym.st_other, sym.getType()}); 1635 s->isExported = true; 1636 if (sym.getBinding() != STB_WEAK && 1637 ctx.arg.unresolvedSymbolsInShlib != UnresolvedPolicy::Ignore) 1638 requiredSymbols.push_back(s); 1639 continue; 1640 } 1641 1642 if (ver == VER_NDX_LOCAL || 1643 (ver != VER_NDX_GLOBAL && idx >= verdefs.size())) { 1644 // In GNU ld < 2.31 (before 3be08ea4728b56d35e136af4e6fd3086ade17764), the 1645 // MIPS port puts _gp_disp symbol into DSO files and incorrectly assigns 1646 // VER_NDX_LOCAL. Workaround this bug. 1647 if (ctx.arg.emachine == EM_MIPS && name == "_gp_disp") 1648 continue; 1649 ErrAlways(ctx) << "corrupt input file: version definition index " << idx 1650 << " for symbol " << name 1651 << " is out of bounds\n>>> defined in " << this; 1652 continue; 1653 } 1654 1655 uint32_t alignment = getAlignment<ELFT>(sections, sym); 1656 if (ver == idx) { 1657 auto *s = ctx.symtab->addSymbol( 1658 SharedSymbol{*this, name, sym.getBinding(), sym.st_other, 1659 sym.getType(), sym.st_value, sym.st_size, alignment}); 1660 s->dsoDefined = true; 1661 if (s->file == this) 1662 s->versionId = ver; 1663 } 1664 1665 // Also add the symbol with the versioned name to handle undefined symbols 1666 // with explicit versions. 1667 if (ver == VER_NDX_GLOBAL) 1668 continue; 1669 1670 StringRef verName = 1671 stringTable.data() + 1672 reinterpret_cast<const Elf_Verdef *>(verdefs[idx])->getAux()->vda_name; 1673 versionedNameBuffer.clear(); 1674 name = (name + "@" + verName).toStringRef(versionedNameBuffer); 1675 auto *s = ctx.symtab->addSymbol( 1676 SharedSymbol{*this, ss.save(name), sym.getBinding(), sym.st_other, 1677 sym.getType(), sym.st_value, sym.st_size, alignment}); 1678 s->dsoDefined = true; 1679 if (s->file == this) 1680 s->versionId = idx; 1681 } 1682 } 1683 1684 static ELFKind getBitcodeELFKind(const Triple &t) { 1685 if (t.isLittleEndian()) 1686 return t.isArch64Bit() ? ELF64LEKind : ELF32LEKind; 1687 return t.isArch64Bit() ? ELF64BEKind : ELF32BEKind; 1688 } 1689 1690 static uint16_t getBitcodeMachineKind(Ctx &ctx, StringRef path, 1691 const Triple &t) { 1692 switch (t.getArch()) { 1693 case Triple::aarch64: 1694 case Triple::aarch64_be: 1695 return EM_AARCH64; 1696 case Triple::amdgcn: 1697 case Triple::r600: 1698 return EM_AMDGPU; 1699 case Triple::arm: 1700 case Triple::armeb: 1701 case Triple::thumb: 1702 case Triple::thumbeb: 1703 return EM_ARM; 1704 case Triple::avr: 1705 return EM_AVR; 1706 case Triple::hexagon: 1707 return EM_HEXAGON; 1708 case Triple::loongarch32: 1709 case Triple::loongarch64: 1710 return EM_LOONGARCH; 1711 case Triple::mips: 1712 case Triple::mipsel: 1713 case Triple::mips64: 1714 case Triple::mips64el: 1715 return EM_MIPS; 1716 case Triple::msp430: 1717 return EM_MSP430; 1718 case Triple::ppc: 1719 case Triple::ppcle: 1720 return EM_PPC; 1721 case Triple::ppc64: 1722 case Triple::ppc64le: 1723 return EM_PPC64; 1724 case Triple::riscv32: 1725 case Triple::riscv64: 1726 return EM_RISCV; 1727 case Triple::sparcv9: 1728 return EM_SPARCV9; 1729 case Triple::systemz: 1730 return EM_S390; 1731 case Triple::x86: 1732 return t.isOSIAMCU() ? EM_IAMCU : EM_386; 1733 case Triple::x86_64: 1734 return EM_X86_64; 1735 default: 1736 ErrAlways(ctx) << path 1737 << ": could not infer e_machine from bitcode target triple " 1738 << t.str(); 1739 return EM_NONE; 1740 } 1741 } 1742 1743 static uint8_t getOsAbi(const Triple &t) { 1744 switch (t.getOS()) { 1745 case Triple::AMDHSA: 1746 return ELF::ELFOSABI_AMDGPU_HSA; 1747 case Triple::AMDPAL: 1748 return ELF::ELFOSABI_AMDGPU_PAL; 1749 case Triple::Mesa3D: 1750 return ELF::ELFOSABI_AMDGPU_MESA3D; 1751 default: 1752 return ELF::ELFOSABI_NONE; 1753 } 1754 } 1755 1756 BitcodeFile::BitcodeFile(Ctx &ctx, MemoryBufferRef mb, StringRef archiveName, 1757 uint64_t offsetInArchive, bool lazy) 1758 : InputFile(ctx, BitcodeKind, mb) { 1759 this->archiveName = archiveName; 1760 this->lazy = lazy; 1761 1762 std::string path = mb.getBufferIdentifier().str(); 1763 if (ctx.arg.thinLTOIndexOnly) 1764 path = replaceThinLTOSuffix(ctx, mb.getBufferIdentifier()); 1765 1766 // ThinLTO assumes that all MemoryBufferRefs given to it have a unique 1767 // name. If two archives define two members with the same name, this 1768 // causes a collision which result in only one of the objects being taken 1769 // into consideration at LTO time (which very likely causes undefined 1770 // symbols later in the link stage). So we append file offset to make 1771 // filename unique. 1772 StringSaver &ss = ctx.saver; 1773 StringRef name = archiveName.empty() 1774 ? ss.save(path) 1775 : ss.save(archiveName + "(" + path::filename(path) + 1776 " at " + utostr(offsetInArchive) + ")"); 1777 MemoryBufferRef mbref(mb.getBuffer(), name); 1778 1779 obj = CHECK2(lto::InputFile::create(mbref), this); 1780 1781 Triple t(obj->getTargetTriple()); 1782 ekind = getBitcodeELFKind(t); 1783 emachine = getBitcodeMachineKind(ctx, mb.getBufferIdentifier(), t); 1784 osabi = getOsAbi(t); 1785 } 1786 1787 static uint8_t mapVisibility(GlobalValue::VisibilityTypes gvVisibility) { 1788 switch (gvVisibility) { 1789 case GlobalValue::DefaultVisibility: 1790 return STV_DEFAULT; 1791 case GlobalValue::HiddenVisibility: 1792 return STV_HIDDEN; 1793 case GlobalValue::ProtectedVisibility: 1794 return STV_PROTECTED; 1795 } 1796 llvm_unreachable("unknown visibility"); 1797 } 1798 1799 static void createBitcodeSymbol(Ctx &ctx, Symbol *&sym, 1800 const lto::InputFile::Symbol &objSym, 1801 BitcodeFile &f) { 1802 uint8_t binding = objSym.isWeak() ? STB_WEAK : STB_GLOBAL; 1803 uint8_t type = objSym.isTLS() ? STT_TLS : STT_NOTYPE; 1804 uint8_t visibility = mapVisibility(objSym.getVisibility()); 1805 1806 if (!sym) { 1807 // Symbols can be duplicated in bitcode files because of '#include' and 1808 // linkonce_odr. Use uniqueSaver to save symbol names for de-duplication. 1809 // Update objSym.Name to reference (via StringRef) the string saver's copy; 1810 // this way LTO can reference the same string saver's copy rather than 1811 // keeping copies of its own. 1812 objSym.Name = ctx.uniqueSaver.save(objSym.getName()); 1813 sym = ctx.symtab->insert(objSym.getName()); 1814 } 1815 1816 if (objSym.isUndefined()) { 1817 Undefined newSym(&f, StringRef(), binding, visibility, type); 1818 sym->resolve(ctx, newSym); 1819 sym->referenced = true; 1820 return; 1821 } 1822 1823 if (objSym.isCommon()) { 1824 sym->resolve(ctx, CommonSymbol{ctx, &f, StringRef(), binding, visibility, 1825 STT_OBJECT, objSym.getCommonAlignment(), 1826 objSym.getCommonSize()}); 1827 } else { 1828 Defined newSym(ctx, &f, StringRef(), binding, visibility, type, 0, 0, 1829 nullptr); 1830 // The definition can be omitted if all bitcode definitions satisfy 1831 // `canBeOmittedFromSymbolTable()` and isUsedInRegularObj is false. 1832 // The latter condition is tested in parseVersionAndComputeIsPreemptible. 1833 sym->ltoCanOmit = objSym.canBeOmittedFromSymbolTable() && 1834 (!sym->isDefined() || sym->ltoCanOmit); 1835 sym->resolve(ctx, newSym); 1836 } 1837 } 1838 1839 void BitcodeFile::parse() { 1840 for (std::pair<StringRef, Comdat::SelectionKind> s : obj->getComdatTable()) { 1841 keptComdats.push_back( 1842 s.second == Comdat::NoDeduplicate || 1843 ctx.symtab->comdatGroups.try_emplace(CachedHashStringRef(s.first), this) 1844 .second); 1845 } 1846 1847 if (numSymbols == 0) { 1848 numSymbols = obj->symbols().size(); 1849 symbols = std::make_unique<Symbol *[]>(numSymbols); 1850 } 1851 // Process defined symbols first. See the comment in 1852 // ObjFile<ELFT>::initializeSymbols. 1853 for (auto [i, irSym] : llvm::enumerate(obj->symbols())) 1854 if (!irSym.isUndefined()) 1855 createBitcodeSymbol(ctx, symbols[i], irSym, *this); 1856 for (auto [i, irSym] : llvm::enumerate(obj->symbols())) 1857 if (irSym.isUndefined()) 1858 createBitcodeSymbol(ctx, symbols[i], irSym, *this); 1859 1860 for (auto l : obj->getDependentLibraries()) 1861 addDependentLibrary(ctx, l, this); 1862 } 1863 1864 void BitcodeFile::parseLazy() { 1865 numSymbols = obj->symbols().size(); 1866 symbols = std::make_unique<Symbol *[]>(numSymbols); 1867 for (auto [i, irSym] : llvm::enumerate(obj->symbols())) { 1868 // Symbols can be duplicated in bitcode files because of '#include' and 1869 // linkonce_odr. Use uniqueSaver to save symbol names for de-duplication. 1870 // Update objSym.Name to reference (via StringRef) the string saver's copy; 1871 // this way LTO can reference the same string saver's copy rather than 1872 // keeping copies of its own. 1873 irSym.Name = ctx.uniqueSaver.save(irSym.getName()); 1874 if (!irSym.isUndefined()) { 1875 auto *sym = ctx.symtab->insert(irSym.getName()); 1876 sym->resolve(ctx, LazySymbol{*this}); 1877 symbols[i] = sym; 1878 } 1879 } 1880 } 1881 1882 void BitcodeFile::postParse() { 1883 for (auto [i, irSym] : llvm::enumerate(obj->symbols())) { 1884 const Symbol &sym = *symbols[i]; 1885 if (sym.file == this || !sym.isDefined() || irSym.isUndefined() || 1886 irSym.isCommon() || irSym.isWeak()) 1887 continue; 1888 int c = irSym.getComdatIndex(); 1889 if (c != -1 && !keptComdats[c]) 1890 continue; 1891 reportDuplicate(ctx, sym, this, nullptr, 0); 1892 } 1893 } 1894 1895 void BinaryFile::parse() { 1896 ArrayRef<uint8_t> data = arrayRefFromStringRef(mb.getBuffer()); 1897 auto *section = 1898 make<InputSection>(this, ".data", SHT_PROGBITS, SHF_ALLOC | SHF_WRITE, 1899 /*addralign=*/8, /*entsize=*/0, data); 1900 sections.push_back(section); 1901 1902 // For each input file foo that is embedded to a result as a binary 1903 // blob, we define _binary_foo_{start,end,size} symbols, so that 1904 // user programs can access blobs by name. Non-alphanumeric 1905 // characters in a filename are replaced with underscore. 1906 std::string s = "_binary_" + mb.getBufferIdentifier().str(); 1907 for (char &c : s) 1908 if (!isAlnum(c)) 1909 c = '_'; 1910 1911 llvm::StringSaver &ss = ctx.saver; 1912 ctx.symtab->addAndCheckDuplicate( 1913 ctx, Defined{ctx, this, ss.save(s + "_start"), STB_GLOBAL, STV_DEFAULT, 1914 STT_OBJECT, 0, 0, section}); 1915 ctx.symtab->addAndCheckDuplicate( 1916 ctx, Defined{ctx, this, ss.save(s + "_end"), STB_GLOBAL, STV_DEFAULT, 1917 STT_OBJECT, data.size(), 0, section}); 1918 ctx.symtab->addAndCheckDuplicate( 1919 ctx, Defined{ctx, this, ss.save(s + "_size"), STB_GLOBAL, STV_DEFAULT, 1920 STT_OBJECT, data.size(), 0, nullptr}); 1921 } 1922 1923 InputFile *elf::createInternalFile(Ctx &ctx, StringRef name) { 1924 auto *file = 1925 make<InputFile>(ctx, InputFile::InternalKind, MemoryBufferRef("", name)); 1926 // References from an internal file do not lead to --warn-backrefs 1927 // diagnostics. 1928 file->groupId = 0; 1929 return file; 1930 } 1931 1932 std::unique_ptr<ELFFileBase> elf::createObjFile(Ctx &ctx, MemoryBufferRef mb, 1933 StringRef archiveName, 1934 bool lazy) { 1935 std::unique_ptr<ELFFileBase> f; 1936 switch (getELFKind(ctx, mb, archiveName)) { 1937 case ELF32LEKind: 1938 f = std::make_unique<ObjFile<ELF32LE>>(ctx, ELF32LEKind, mb, archiveName); 1939 break; 1940 case ELF32BEKind: 1941 f = std::make_unique<ObjFile<ELF32BE>>(ctx, ELF32BEKind, mb, archiveName); 1942 break; 1943 case ELF64LEKind: 1944 f = std::make_unique<ObjFile<ELF64LE>>(ctx, ELF64LEKind, mb, archiveName); 1945 break; 1946 case ELF64BEKind: 1947 f = std::make_unique<ObjFile<ELF64BE>>(ctx, ELF64BEKind, mb, archiveName); 1948 break; 1949 default: 1950 llvm_unreachable("getELFKind"); 1951 } 1952 f->init(); 1953 f->lazy = lazy; 1954 return f; 1955 } 1956 1957 template <class ELFT> void ObjFile<ELFT>::parseLazy() { 1958 const ArrayRef<typename ELFT::Sym> eSyms = this->getELFSyms<ELFT>(); 1959 numSymbols = eSyms.size(); 1960 symbols = std::make_unique<Symbol *[]>(numSymbols); 1961 1962 // resolve() may trigger this->extract() if an existing symbol is an undefined 1963 // symbol. If that happens, this function has served its purpose, and we can 1964 // exit from the loop early. 1965 auto *symtab = ctx.symtab.get(); 1966 for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) { 1967 if (eSyms[i].st_shndx == SHN_UNDEF) 1968 continue; 1969 symbols[i] = symtab->insert(CHECK2(eSyms[i].getName(stringTable), this)); 1970 symbols[i]->resolve(ctx, LazySymbol{*this}); 1971 if (!lazy) 1972 break; 1973 } 1974 } 1975 1976 bool InputFile::shouldExtractForCommon(StringRef name) const { 1977 if (isa<BitcodeFile>(this)) 1978 return isBitcodeNonCommonDef(mb, name, archiveName); 1979 1980 return isNonCommonDef(ctx, mb, name, archiveName); 1981 } 1982 1983 std::string elf::replaceThinLTOSuffix(Ctx &ctx, StringRef path) { 1984 auto [suffix, repl] = ctx.arg.thinLTOObjectSuffixReplace; 1985 if (path.consume_back(suffix)) 1986 return (path + repl).str(); 1987 return std::string(path); 1988 } 1989 1990 template class elf::ObjFile<ELF32LE>; 1991 template class elf::ObjFile<ELF32BE>; 1992 template class elf::ObjFile<ELF64LE>; 1993 template class elf::ObjFile<ELF64BE>; 1994 1995 template void SharedFile::parse<ELF32LE>(); 1996 template void SharedFile::parse<ELF32BE>(); 1997 template void SharedFile::parse<ELF64LE>(); 1998 template void SharedFile::parse<ELF64BE>(); 1999