1 //===- InputFiles.cpp -----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "InputFiles.h" 10 #include "Config.h" 11 #include "DWARF.h" 12 #include "Driver.h" 13 #include "InputSection.h" 14 #include "LinkerScript.h" 15 #include "SymbolTable.h" 16 #include "Symbols.h" 17 #include "SyntheticSections.h" 18 #include "Target.h" 19 #include "lld/Common/CommonLinkerContext.h" 20 #include "lld/Common/DWARF.h" 21 #include "llvm/ADT/CachedHashString.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/LTO/LTO.h" 24 #include "llvm/Object/IRObjectFile.h" 25 #include "llvm/Support/ARMAttributeParser.h" 26 #include "llvm/Support/ARMBuildAttributes.h" 27 #include "llvm/Support/Endian.h" 28 #include "llvm/Support/FileSystem.h" 29 #include "llvm/Support/Path.h" 30 #include "llvm/Support/RISCVAttributeParser.h" 31 #include "llvm/Support/TarWriter.h" 32 #include "llvm/Support/raw_ostream.h" 33 #include <optional> 34 35 using namespace llvm; 36 using namespace llvm::ELF; 37 using namespace llvm::object; 38 using namespace llvm::sys; 39 using namespace llvm::sys::fs; 40 using namespace llvm::support::endian; 41 using namespace lld; 42 using namespace lld::elf; 43 44 // This function is explicity instantiated in ARM.cpp, don't do it here to avoid 45 // warnings with MSVC. 46 extern template void ObjFile<ELF32LE>::importCmseSymbols(); 47 extern template void ObjFile<ELF32BE>::importCmseSymbols(); 48 extern template void ObjFile<ELF64LE>::importCmseSymbols(); 49 extern template void ObjFile<ELF64BE>::importCmseSymbols(); 50 51 bool InputFile::isInGroup; 52 uint32_t InputFile::nextGroupId; 53 54 std::unique_ptr<TarWriter> elf::tar; 55 56 // Returns "<internal>", "foo.a(bar.o)" or "baz.o". 57 std::string lld::toString(const InputFile *f) { 58 static std::mutex mu; 59 if (!f) 60 return "<internal>"; 61 62 { 63 std::lock_guard<std::mutex> lock(mu); 64 if (f->toStringCache.empty()) { 65 if (f->archiveName.empty()) 66 f->toStringCache = f->getName(); 67 else 68 (f->archiveName + "(" + f->getName() + ")").toVector(f->toStringCache); 69 } 70 } 71 return std::string(f->toStringCache); 72 } 73 74 static ELFKind getELFKind(MemoryBufferRef mb, StringRef archiveName) { 75 unsigned char size; 76 unsigned char endian; 77 std::tie(size, endian) = getElfArchType(mb.getBuffer()); 78 79 auto report = [&](StringRef msg) { 80 StringRef filename = mb.getBufferIdentifier(); 81 if (archiveName.empty()) 82 fatal(filename + ": " + msg); 83 else 84 fatal(archiveName + "(" + filename + "): " + msg); 85 }; 86 87 if (!mb.getBuffer().starts_with(ElfMagic)) 88 report("not an ELF file"); 89 if (endian != ELFDATA2LSB && endian != ELFDATA2MSB) 90 report("corrupted ELF file: invalid data encoding"); 91 if (size != ELFCLASS32 && size != ELFCLASS64) 92 report("corrupted ELF file: invalid file class"); 93 94 size_t bufSize = mb.getBuffer().size(); 95 if ((size == ELFCLASS32 && bufSize < sizeof(Elf32_Ehdr)) || 96 (size == ELFCLASS64 && bufSize < sizeof(Elf64_Ehdr))) 97 report("corrupted ELF file: file is too short"); 98 99 if (size == ELFCLASS32) 100 return (endian == ELFDATA2LSB) ? ELF32LEKind : ELF32BEKind; 101 return (endian == ELFDATA2LSB) ? ELF64LEKind : ELF64BEKind; 102 } 103 104 // For ARM only, to set the EF_ARM_ABI_FLOAT_SOFT or EF_ARM_ABI_FLOAT_HARD 105 // flag in the ELF Header we need to look at Tag_ABI_VFP_args to find out how 106 // the input objects have been compiled. 107 static void updateARMVFPArgs(const ARMAttributeParser &attributes, 108 const InputFile *f) { 109 std::optional<unsigned> attr = 110 attributes.getAttributeValue(ARMBuildAttrs::ABI_VFP_args); 111 if (!attr) 112 // If an ABI tag isn't present then it is implicitly given the value of 0 113 // which maps to ARMBuildAttrs::BaseAAPCS. However many assembler files, 114 // including some in glibc that don't use FP args (and should have value 3) 115 // don't have the attribute so we do not consider an implicit value of 0 116 // as a clash. 117 return; 118 119 unsigned vfpArgs = *attr; 120 ARMVFPArgKind arg; 121 switch (vfpArgs) { 122 case ARMBuildAttrs::BaseAAPCS: 123 arg = ARMVFPArgKind::Base; 124 break; 125 case ARMBuildAttrs::HardFPAAPCS: 126 arg = ARMVFPArgKind::VFP; 127 break; 128 case ARMBuildAttrs::ToolChainFPPCS: 129 // Tool chain specific convention that conforms to neither AAPCS variant. 130 arg = ARMVFPArgKind::ToolChain; 131 break; 132 case ARMBuildAttrs::CompatibleFPAAPCS: 133 // Object compatible with all conventions. 134 return; 135 default: 136 error(toString(f) + ": unknown Tag_ABI_VFP_args value: " + Twine(vfpArgs)); 137 return; 138 } 139 // Follow ld.bfd and error if there is a mix of calling conventions. 140 if (config->armVFPArgs != arg && config->armVFPArgs != ARMVFPArgKind::Default) 141 error(toString(f) + ": incompatible Tag_ABI_VFP_args"); 142 else 143 config->armVFPArgs = arg; 144 } 145 146 // The ARM support in lld makes some use of instructions that are not available 147 // on all ARM architectures. Namely: 148 // - Use of BLX instruction for interworking between ARM and Thumb state. 149 // - Use of the extended Thumb branch encoding in relocation. 150 // - Use of the MOVT/MOVW instructions in Thumb Thunks. 151 // The ARM Attributes section contains information about the architecture chosen 152 // at compile time. We follow the convention that if at least one input object 153 // is compiled with an architecture that supports these features then lld is 154 // permitted to use them. 155 static void updateSupportedARMFeatures(const ARMAttributeParser &attributes) { 156 std::optional<unsigned> attr = 157 attributes.getAttributeValue(ARMBuildAttrs::CPU_arch); 158 if (!attr) 159 return; 160 auto arch = *attr; 161 switch (arch) { 162 case ARMBuildAttrs::Pre_v4: 163 case ARMBuildAttrs::v4: 164 case ARMBuildAttrs::v4T: 165 // Architectures prior to v5 do not support BLX instruction 166 break; 167 case ARMBuildAttrs::v5T: 168 case ARMBuildAttrs::v5TE: 169 case ARMBuildAttrs::v5TEJ: 170 case ARMBuildAttrs::v6: 171 case ARMBuildAttrs::v6KZ: 172 case ARMBuildAttrs::v6K: 173 config->armHasBlx = true; 174 // Architectures used in pre-Cortex processors do not support 175 // The J1 = 1 J2 = 1 Thumb branch range extension, with the exception 176 // of Architecture v6T2 (arm1156t2-s and arm1156t2f-s) that do. 177 break; 178 default: 179 // All other Architectures have BLX and extended branch encoding 180 config->armHasBlx = true; 181 config->armJ1J2BranchEncoding = true; 182 if (arch != ARMBuildAttrs::v6_M && arch != ARMBuildAttrs::v6S_M) 183 // All Architectures used in Cortex processors with the exception 184 // of v6-M and v6S-M have the MOVT and MOVW instructions. 185 config->armHasMovtMovw = true; 186 break; 187 } 188 189 // Only ARMv8-M or later architectures have CMSE support. 190 std::optional<unsigned> profile = 191 attributes.getAttributeValue(ARMBuildAttrs::CPU_arch_profile); 192 if (!profile) 193 return; 194 if (arch >= ARMBuildAttrs::CPUArch::v8_M_Base && 195 profile == ARMBuildAttrs::MicroControllerProfile) 196 config->armCMSESupport = true; 197 } 198 199 InputFile::InputFile(Kind k, MemoryBufferRef m) 200 : mb(m), groupId(nextGroupId), fileKind(k) { 201 // All files within the same --{start,end}-group get the same group ID. 202 // Otherwise, a new file will get a new group ID. 203 if (!isInGroup) 204 ++nextGroupId; 205 } 206 207 std::optional<MemoryBufferRef> elf::readFile(StringRef path) { 208 llvm::TimeTraceScope timeScope("Load input files", path); 209 210 // The --chroot option changes our virtual root directory. 211 // This is useful when you are dealing with files created by --reproduce. 212 if (!config->chroot.empty() && path.starts_with("/")) 213 path = saver().save(config->chroot + path); 214 215 bool remapped = false; 216 auto it = config->remapInputs.find(path); 217 if (it != config->remapInputs.end()) { 218 path = it->second; 219 remapped = true; 220 } else { 221 for (const auto &[pat, toFile] : config->remapInputsWildcards) { 222 if (pat.match(path)) { 223 path = toFile; 224 remapped = true; 225 break; 226 } 227 } 228 } 229 if (remapped) { 230 // Use /dev/null to indicate an input file that should be ignored. Change 231 // the path to NUL on Windows. 232 #ifdef _WIN32 233 if (path == "/dev/null") 234 path = "NUL"; 235 #endif 236 } 237 238 log(path); 239 config->dependencyFiles.insert(llvm::CachedHashString(path)); 240 241 auto mbOrErr = MemoryBuffer::getFile(path, /*IsText=*/false, 242 /*RequiresNullTerminator=*/false); 243 if (auto ec = mbOrErr.getError()) { 244 error("cannot open " + path + ": " + ec.message()); 245 return std::nullopt; 246 } 247 248 MemoryBufferRef mbref = (*mbOrErr)->getMemBufferRef(); 249 ctx.memoryBuffers.push_back(std::move(*mbOrErr)); // take MB ownership 250 251 if (tar) 252 tar->append(relativeToRoot(path), mbref.getBuffer()); 253 return mbref; 254 } 255 256 // All input object files must be for the same architecture 257 // (e.g. it does not make sense to link x86 object files with 258 // MIPS object files.) This function checks for that error. 259 static bool isCompatible(InputFile *file) { 260 if (!file->isElf() && !isa<BitcodeFile>(file)) 261 return true; 262 263 if (file->ekind == config->ekind && file->emachine == config->emachine) { 264 if (config->emachine != EM_MIPS) 265 return true; 266 if (isMipsN32Abi(file) == config->mipsN32Abi) 267 return true; 268 } 269 270 StringRef target = 271 !config->bfdname.empty() ? config->bfdname : config->emulation; 272 if (!target.empty()) { 273 error(toString(file) + " is incompatible with " + target); 274 return false; 275 } 276 277 InputFile *existing = nullptr; 278 if (!ctx.objectFiles.empty()) 279 existing = ctx.objectFiles[0]; 280 else if (!ctx.sharedFiles.empty()) 281 existing = ctx.sharedFiles[0]; 282 else if (!ctx.bitcodeFiles.empty()) 283 existing = ctx.bitcodeFiles[0]; 284 std::string with; 285 if (existing) 286 with = " with " + toString(existing); 287 error(toString(file) + " is incompatible" + with); 288 return false; 289 } 290 291 template <class ELFT> static void doParseFile(InputFile *file) { 292 if (!isCompatible(file)) 293 return; 294 295 // Lazy object file 296 if (file->lazy) { 297 if (auto *f = dyn_cast<BitcodeFile>(file)) { 298 ctx.lazyBitcodeFiles.push_back(f); 299 f->parseLazy(); 300 } else { 301 cast<ObjFile<ELFT>>(file)->parseLazy(); 302 } 303 return; 304 } 305 306 if (config->trace) 307 message(toString(file)); 308 309 if (file->kind() == InputFile::ObjKind) { 310 ctx.objectFiles.push_back(cast<ELFFileBase>(file)); 311 cast<ObjFile<ELFT>>(file)->parse(); 312 } else if (auto *f = dyn_cast<SharedFile>(file)) { 313 f->parse<ELFT>(); 314 } else if (auto *f = dyn_cast<BitcodeFile>(file)) { 315 ctx.bitcodeFiles.push_back(f); 316 f->parse(); 317 } else { 318 ctx.binaryFiles.push_back(cast<BinaryFile>(file)); 319 cast<BinaryFile>(file)->parse(); 320 } 321 } 322 323 // Add symbols in File to the symbol table. 324 void elf::parseFile(InputFile *file) { invokeELFT(doParseFile, file); } 325 326 // This function is explicity instantiated in ARM.cpp. Mark it extern here, 327 // to avoid warnings when building with MSVC. 328 extern template void ObjFile<ELF32LE>::importCmseSymbols(); 329 extern template void ObjFile<ELF32BE>::importCmseSymbols(); 330 extern template void ObjFile<ELF64LE>::importCmseSymbols(); 331 extern template void ObjFile<ELF64BE>::importCmseSymbols(); 332 333 template <class ELFT> static void doParseArmCMSEImportLib(InputFile *file) { 334 cast<ObjFile<ELFT>>(file)->importCmseSymbols(); 335 } 336 337 void elf::parseArmCMSEImportLib(InputFile *file) { 338 invokeELFT(doParseArmCMSEImportLib, file); 339 } 340 341 // Concatenates arguments to construct a string representing an error location. 342 static std::string createFileLineMsg(StringRef path, unsigned line) { 343 std::string filename = std::string(path::filename(path)); 344 std::string lineno = ":" + std::to_string(line); 345 if (filename == path) 346 return filename + lineno; 347 return filename + lineno + " (" + path.str() + lineno + ")"; 348 } 349 350 template <class ELFT> 351 static std::string getSrcMsgAux(ObjFile<ELFT> &file, const Symbol &sym, 352 const InputSectionBase &sec, uint64_t offset) { 353 // In DWARF, functions and variables are stored to different places. 354 // First, look up a function for a given offset. 355 if (std::optional<DILineInfo> info = file.getDILineInfo(&sec, offset)) 356 return createFileLineMsg(info->FileName, info->Line); 357 358 // If it failed, look up again as a variable. 359 if (std::optional<std::pair<std::string, unsigned>> fileLine = 360 file.getVariableLoc(sym.getName())) 361 return createFileLineMsg(fileLine->first, fileLine->second); 362 363 // File.sourceFile contains STT_FILE symbol, and that is a last resort. 364 return std::string(file.sourceFile); 365 } 366 367 std::string InputFile::getSrcMsg(const Symbol &sym, const InputSectionBase &sec, 368 uint64_t offset) { 369 if (kind() != ObjKind) 370 return ""; 371 switch (ekind) { 372 default: 373 llvm_unreachable("Invalid kind"); 374 case ELF32LEKind: 375 return getSrcMsgAux(cast<ObjFile<ELF32LE>>(*this), sym, sec, offset); 376 case ELF32BEKind: 377 return getSrcMsgAux(cast<ObjFile<ELF32BE>>(*this), sym, sec, offset); 378 case ELF64LEKind: 379 return getSrcMsgAux(cast<ObjFile<ELF64LE>>(*this), sym, sec, offset); 380 case ELF64BEKind: 381 return getSrcMsgAux(cast<ObjFile<ELF64BE>>(*this), sym, sec, offset); 382 } 383 } 384 385 StringRef InputFile::getNameForScript() const { 386 if (archiveName.empty()) 387 return getName(); 388 389 if (nameForScriptCache.empty()) 390 nameForScriptCache = (archiveName + Twine(':') + getName()).str(); 391 392 return nameForScriptCache; 393 } 394 395 // An ELF object file may contain a `.deplibs` section. If it exists, the 396 // section contains a list of library specifiers such as `m` for libm. This 397 // function resolves a given name by finding the first matching library checking 398 // the various ways that a library can be specified to LLD. This ELF extension 399 // is a form of autolinking and is called `dependent libraries`. It is currently 400 // unique to LLVM and lld. 401 static void addDependentLibrary(StringRef specifier, const InputFile *f) { 402 if (!config->dependentLibraries) 403 return; 404 if (std::optional<std::string> s = searchLibraryBaseName(specifier)) 405 ctx.driver.addFile(saver().save(*s), /*withLOption=*/true); 406 else if (std::optional<std::string> s = findFromSearchPaths(specifier)) 407 ctx.driver.addFile(saver().save(*s), /*withLOption=*/true); 408 else if (fs::exists(specifier)) 409 ctx.driver.addFile(specifier, /*withLOption=*/false); 410 else 411 error(toString(f) + 412 ": unable to find library from dependent library specifier: " + 413 specifier); 414 } 415 416 // Record the membership of a section group so that in the garbage collection 417 // pass, section group members are kept or discarded as a unit. 418 template <class ELFT> 419 static void handleSectionGroup(ArrayRef<InputSectionBase *> sections, 420 ArrayRef<typename ELFT::Word> entries) { 421 bool hasAlloc = false; 422 for (uint32_t index : entries.slice(1)) { 423 if (index >= sections.size()) 424 return; 425 if (InputSectionBase *s = sections[index]) 426 if (s != &InputSection::discarded && s->flags & SHF_ALLOC) 427 hasAlloc = true; 428 } 429 430 // If any member has the SHF_ALLOC flag, the whole group is subject to garbage 431 // collection. See the comment in markLive(). This rule retains .debug_types 432 // and .rela.debug_types. 433 if (!hasAlloc) 434 return; 435 436 // Connect the members in a circular doubly-linked list via 437 // nextInSectionGroup. 438 InputSectionBase *head; 439 InputSectionBase *prev = nullptr; 440 for (uint32_t index : entries.slice(1)) { 441 InputSectionBase *s = sections[index]; 442 if (!s || s == &InputSection::discarded) 443 continue; 444 if (prev) 445 prev->nextInSectionGroup = s; 446 else 447 head = s; 448 prev = s; 449 } 450 if (prev) 451 prev->nextInSectionGroup = head; 452 } 453 454 template <class ELFT> DWARFCache *ObjFile<ELFT>::getDwarf() { 455 llvm::call_once(initDwarf, [this]() { 456 dwarf = std::make_unique<DWARFCache>(std::make_unique<DWARFContext>( 457 std::make_unique<LLDDwarfObj<ELFT>>(this), "", 458 [&](Error err) { warn(getName() + ": " + toString(std::move(err))); }, 459 [&](Error warning) { 460 warn(getName() + ": " + toString(std::move(warning))); 461 })); 462 }); 463 464 return dwarf.get(); 465 } 466 467 // Returns the pair of file name and line number describing location of data 468 // object (variable, array, etc) definition. 469 template <class ELFT> 470 std::optional<std::pair<std::string, unsigned>> 471 ObjFile<ELFT>::getVariableLoc(StringRef name) { 472 return getDwarf()->getVariableLoc(name); 473 } 474 475 // Returns source line information for a given offset 476 // using DWARF debug info. 477 template <class ELFT> 478 std::optional<DILineInfo> 479 ObjFile<ELFT>::getDILineInfo(const InputSectionBase *s, uint64_t offset) { 480 // Detect SectionIndex for specified section. 481 uint64_t sectionIndex = object::SectionedAddress::UndefSection; 482 ArrayRef<InputSectionBase *> sections = s->file->getSections(); 483 for (uint64_t curIndex = 0; curIndex < sections.size(); ++curIndex) { 484 if (s == sections[curIndex]) { 485 sectionIndex = curIndex; 486 break; 487 } 488 } 489 490 return getDwarf()->getDILineInfo(offset, sectionIndex); 491 } 492 493 ELFFileBase::ELFFileBase(Kind k, ELFKind ekind, MemoryBufferRef mb) 494 : InputFile(k, mb) { 495 this->ekind = ekind; 496 } 497 498 template <typename Elf_Shdr> 499 static const Elf_Shdr *findSection(ArrayRef<Elf_Shdr> sections, uint32_t type) { 500 for (const Elf_Shdr &sec : sections) 501 if (sec.sh_type == type) 502 return &sec; 503 return nullptr; 504 } 505 506 void ELFFileBase::init() { 507 switch (ekind) { 508 case ELF32LEKind: 509 init<ELF32LE>(fileKind); 510 break; 511 case ELF32BEKind: 512 init<ELF32BE>(fileKind); 513 break; 514 case ELF64LEKind: 515 init<ELF64LE>(fileKind); 516 break; 517 case ELF64BEKind: 518 init<ELF64BE>(fileKind); 519 break; 520 default: 521 llvm_unreachable("getELFKind"); 522 } 523 } 524 525 template <class ELFT> void ELFFileBase::init(InputFile::Kind k) { 526 using Elf_Shdr = typename ELFT::Shdr; 527 using Elf_Sym = typename ELFT::Sym; 528 529 // Initialize trivial attributes. 530 const ELFFile<ELFT> &obj = getObj<ELFT>(); 531 emachine = obj.getHeader().e_machine; 532 osabi = obj.getHeader().e_ident[llvm::ELF::EI_OSABI]; 533 abiVersion = obj.getHeader().e_ident[llvm::ELF::EI_ABIVERSION]; 534 535 ArrayRef<Elf_Shdr> sections = CHECK(obj.sections(), this); 536 elfShdrs = sections.data(); 537 numELFShdrs = sections.size(); 538 539 // Find a symbol table. 540 const Elf_Shdr *symtabSec = 541 findSection(sections, k == SharedKind ? SHT_DYNSYM : SHT_SYMTAB); 542 543 if (!symtabSec) 544 return; 545 546 // Initialize members corresponding to a symbol table. 547 firstGlobal = symtabSec->sh_info; 548 549 ArrayRef<Elf_Sym> eSyms = CHECK(obj.symbols(symtabSec), this); 550 if (firstGlobal == 0 || firstGlobal > eSyms.size()) 551 fatal(toString(this) + ": invalid sh_info in symbol table"); 552 553 elfSyms = reinterpret_cast<const void *>(eSyms.data()); 554 numELFSyms = uint32_t(eSyms.size()); 555 stringTable = CHECK(obj.getStringTableForSymtab(*symtabSec, sections), this); 556 } 557 558 template <class ELFT> 559 uint32_t ObjFile<ELFT>::getSectionIndex(const Elf_Sym &sym) const { 560 return CHECK( 561 this->getObj().getSectionIndex(sym, getELFSyms<ELFT>(), shndxTable), 562 this); 563 } 564 565 template <class ELFT> void ObjFile<ELFT>::parse(bool ignoreComdats) { 566 object::ELFFile<ELFT> obj = this->getObj(); 567 // Read a section table. justSymbols is usually false. 568 if (this->justSymbols) { 569 initializeJustSymbols(); 570 initializeSymbols(obj); 571 return; 572 } 573 574 // Handle dependent libraries and selection of section groups as these are not 575 // done in parallel. 576 ArrayRef<Elf_Shdr> objSections = getELFShdrs<ELFT>(); 577 StringRef shstrtab = CHECK(obj.getSectionStringTable(objSections), this); 578 uint64_t size = objSections.size(); 579 sections.resize(size); 580 for (size_t i = 0; i != size; ++i) { 581 const Elf_Shdr &sec = objSections[i]; 582 if (sec.sh_type == SHT_LLVM_DEPENDENT_LIBRARIES && !config->relocatable) { 583 StringRef name = check(obj.getSectionName(sec, shstrtab)); 584 ArrayRef<char> data = CHECK( 585 this->getObj().template getSectionContentsAsArray<char>(sec), this); 586 if (!data.empty() && data.back() != '\0') { 587 error( 588 toString(this) + 589 ": corrupted dependent libraries section (unterminated string): " + 590 name); 591 } else { 592 for (const char *d = data.begin(), *e = data.end(); d < e;) { 593 StringRef s(d); 594 addDependentLibrary(s, this); 595 d += s.size() + 1; 596 } 597 } 598 this->sections[i] = &InputSection::discarded; 599 continue; 600 } 601 602 if (sec.sh_type == SHT_ARM_ATTRIBUTES && config->emachine == EM_ARM) { 603 ARMAttributeParser attributes; 604 ArrayRef<uint8_t> contents = 605 check(this->getObj().getSectionContents(sec)); 606 StringRef name = check(obj.getSectionName(sec, shstrtab)); 607 this->sections[i] = &InputSection::discarded; 608 if (Error e = attributes.parse(contents, ekind == ELF32LEKind 609 ? llvm::endianness::little 610 : llvm::endianness::big)) { 611 InputSection isec(*this, sec, name); 612 warn(toString(&isec) + ": " + llvm::toString(std::move(e))); 613 } else { 614 updateSupportedARMFeatures(attributes); 615 updateARMVFPArgs(attributes, this); 616 617 // FIXME: Retain the first attribute section we see. The eglibc ARM 618 // dynamic loaders require the presence of an attribute section for 619 // dlopen to work. In a full implementation we would merge all attribute 620 // sections. 621 if (in.attributes == nullptr) { 622 in.attributes = std::make_unique<InputSection>(*this, sec, name); 623 this->sections[i] = in.attributes.get(); 624 } 625 } 626 } 627 628 // Producing a static binary with MTE globals is not currently supported, 629 // remove all SHT_AARCH64_MEMTAG_GLOBALS_STATIC sections as they're unused 630 // medatada, and we don't want them to end up in the output file for static 631 // executables. 632 if (sec.sh_type == SHT_AARCH64_MEMTAG_GLOBALS_STATIC && 633 !canHaveMemtagGlobals()) { 634 this->sections[i] = &InputSection::discarded; 635 continue; 636 } 637 638 if (sec.sh_type != SHT_GROUP) 639 continue; 640 StringRef signature = getShtGroupSignature(objSections, sec); 641 ArrayRef<Elf_Word> entries = 642 CHECK(obj.template getSectionContentsAsArray<Elf_Word>(sec), this); 643 if (entries.empty()) 644 fatal(toString(this) + ": empty SHT_GROUP"); 645 646 Elf_Word flag = entries[0]; 647 if (flag && flag != GRP_COMDAT) 648 fatal(toString(this) + ": unsupported SHT_GROUP format"); 649 650 bool keepGroup = 651 (flag & GRP_COMDAT) == 0 || ignoreComdats || 652 symtab.comdatGroups.try_emplace(CachedHashStringRef(signature), this) 653 .second; 654 if (keepGroup) { 655 if (config->relocatable) 656 this->sections[i] = createInputSection( 657 i, sec, check(obj.getSectionName(sec, shstrtab))); 658 continue; 659 } 660 661 // Otherwise, discard group members. 662 for (uint32_t secIndex : entries.slice(1)) { 663 if (secIndex >= size) 664 fatal(toString(this) + 665 ": invalid section index in group: " + Twine(secIndex)); 666 this->sections[secIndex] = &InputSection::discarded; 667 } 668 } 669 670 // Read a symbol table. 671 initializeSymbols(obj); 672 } 673 674 // Sections with SHT_GROUP and comdat bits define comdat section groups. 675 // They are identified and deduplicated by group name. This function 676 // returns a group name. 677 template <class ELFT> 678 StringRef ObjFile<ELFT>::getShtGroupSignature(ArrayRef<Elf_Shdr> sections, 679 const Elf_Shdr &sec) { 680 typename ELFT::SymRange symbols = this->getELFSyms<ELFT>(); 681 if (sec.sh_info >= symbols.size()) 682 fatal(toString(this) + ": invalid symbol index"); 683 const typename ELFT::Sym &sym = symbols[sec.sh_info]; 684 return CHECK(sym.getName(this->stringTable), this); 685 } 686 687 template <class ELFT> 688 bool ObjFile<ELFT>::shouldMerge(const Elf_Shdr &sec, StringRef name) { 689 // On a regular link we don't merge sections if -O0 (default is -O1). This 690 // sometimes makes the linker significantly faster, although the output will 691 // be bigger. 692 // 693 // Doing the same for -r would create a problem as it would combine sections 694 // with different sh_entsize. One option would be to just copy every SHF_MERGE 695 // section as is to the output. While this would produce a valid ELF file with 696 // usable SHF_MERGE sections, tools like (llvm-)?dwarfdump get confused when 697 // they see two .debug_str. We could have separate logic for combining 698 // SHF_MERGE sections based both on their name and sh_entsize, but that seems 699 // to be more trouble than it is worth. Instead, we just use the regular (-O1) 700 // logic for -r. 701 if (config->optimize == 0 && !config->relocatable) 702 return false; 703 704 // A mergeable section with size 0 is useless because they don't have 705 // any data to merge. A mergeable string section with size 0 can be 706 // argued as invalid because it doesn't end with a null character. 707 // We'll avoid a mess by handling them as if they were non-mergeable. 708 if (sec.sh_size == 0) 709 return false; 710 711 // Check for sh_entsize. The ELF spec is not clear about the zero 712 // sh_entsize. It says that "the member [sh_entsize] contains 0 if 713 // the section does not hold a table of fixed-size entries". We know 714 // that Rust 1.13 produces a string mergeable section with a zero 715 // sh_entsize. Here we just accept it rather than being picky about it. 716 uint64_t entSize = sec.sh_entsize; 717 if (entSize == 0) 718 return false; 719 if (sec.sh_size % entSize) 720 fatal(toString(this) + ":(" + name + "): SHF_MERGE section size (" + 721 Twine(sec.sh_size) + ") must be a multiple of sh_entsize (" + 722 Twine(entSize) + ")"); 723 724 if (sec.sh_flags & SHF_WRITE) 725 fatal(toString(this) + ":(" + name + 726 "): writable SHF_MERGE section is not supported"); 727 728 return true; 729 } 730 731 // This is for --just-symbols. 732 // 733 // --just-symbols is a very minor feature that allows you to link your 734 // output against other existing program, so that if you load both your 735 // program and the other program into memory, your output can refer the 736 // other program's symbols. 737 // 738 // When the option is given, we link "just symbols". The section table is 739 // initialized with null pointers. 740 template <class ELFT> void ObjFile<ELFT>::initializeJustSymbols() { 741 sections.resize(numELFShdrs); 742 } 743 744 template <class ELFT> 745 void ObjFile<ELFT>::initializeSections(bool ignoreComdats, 746 const llvm::object::ELFFile<ELFT> &obj) { 747 ArrayRef<Elf_Shdr> objSections = getELFShdrs<ELFT>(); 748 StringRef shstrtab = CHECK(obj.getSectionStringTable(objSections), this); 749 uint64_t size = objSections.size(); 750 SmallVector<ArrayRef<Elf_Word>, 0> selectedGroups; 751 for (size_t i = 0; i != size; ++i) { 752 if (this->sections[i] == &InputSection::discarded) 753 continue; 754 const Elf_Shdr &sec = objSections[i]; 755 756 // SHF_EXCLUDE'ed sections are discarded by the linker. However, 757 // if -r is given, we'll let the final link discard such sections. 758 // This is compatible with GNU. 759 if ((sec.sh_flags & SHF_EXCLUDE) && !config->relocatable) { 760 if (sec.sh_type == SHT_LLVM_CALL_GRAPH_PROFILE) 761 cgProfileSectionIndex = i; 762 if (sec.sh_type == SHT_LLVM_ADDRSIG) { 763 // We ignore the address-significance table if we know that the object 764 // file was created by objcopy or ld -r. This is because these tools 765 // will reorder the symbols in the symbol table, invalidating the data 766 // in the address-significance table, which refers to symbols by index. 767 if (sec.sh_link != 0) 768 this->addrsigSec = &sec; 769 else if (config->icf == ICFLevel::Safe) 770 warn(toString(this) + 771 ": --icf=safe conservatively ignores " 772 "SHT_LLVM_ADDRSIG [index " + 773 Twine(i) + 774 "] with sh_link=0 " 775 "(likely created using objcopy or ld -r)"); 776 } 777 this->sections[i] = &InputSection::discarded; 778 continue; 779 } 780 781 switch (sec.sh_type) { 782 case SHT_GROUP: { 783 if (!config->relocatable) 784 sections[i] = &InputSection::discarded; 785 StringRef signature = 786 cantFail(this->getELFSyms<ELFT>()[sec.sh_info].getName(stringTable)); 787 ArrayRef<Elf_Word> entries = 788 cantFail(obj.template getSectionContentsAsArray<Elf_Word>(sec)); 789 if ((entries[0] & GRP_COMDAT) == 0 || ignoreComdats || 790 symtab.comdatGroups.find(CachedHashStringRef(signature))->second == 791 this) 792 selectedGroups.push_back(entries); 793 break; 794 } 795 case SHT_SYMTAB_SHNDX: 796 shndxTable = CHECK(obj.getSHNDXTable(sec, objSections), this); 797 break; 798 case SHT_SYMTAB: 799 case SHT_STRTAB: 800 case SHT_REL: 801 case SHT_RELA: 802 case SHT_NULL: 803 break; 804 case SHT_LLVM_SYMPART: 805 ctx.hasSympart.store(true, std::memory_order_relaxed); 806 [[fallthrough]]; 807 default: 808 this->sections[i] = 809 createInputSection(i, sec, check(obj.getSectionName(sec, shstrtab))); 810 } 811 } 812 813 // We have a second loop. It is used to: 814 // 1) handle SHF_LINK_ORDER sections. 815 // 2) create SHT_REL[A] sections. In some cases the section header index of a 816 // relocation section may be smaller than that of the relocated section. In 817 // such cases, the relocation section would attempt to reference a target 818 // section that has not yet been created. For simplicity, delay creation of 819 // relocation sections until now. 820 for (size_t i = 0; i != size; ++i) { 821 if (this->sections[i] == &InputSection::discarded) 822 continue; 823 const Elf_Shdr &sec = objSections[i]; 824 825 if (sec.sh_type == SHT_REL || sec.sh_type == SHT_RELA) { 826 // Find a relocation target section and associate this section with that. 827 // Target may have been discarded if it is in a different section group 828 // and the group is discarded, even though it's a violation of the spec. 829 // We handle that situation gracefully by discarding dangling relocation 830 // sections. 831 const uint32_t info = sec.sh_info; 832 InputSectionBase *s = getRelocTarget(i, sec, info); 833 if (!s) 834 continue; 835 836 // ELF spec allows mergeable sections with relocations, but they are rare, 837 // and it is in practice hard to merge such sections by contents, because 838 // applying relocations at end of linking changes section contents. So, we 839 // simply handle such sections as non-mergeable ones. Degrading like this 840 // is acceptable because section merging is optional. 841 if (auto *ms = dyn_cast<MergeInputSection>(s)) { 842 s = makeThreadLocal<InputSection>( 843 ms->file, ms->flags, ms->type, ms->addralign, 844 ms->contentMaybeDecompress(), ms->name); 845 sections[info] = s; 846 } 847 848 if (s->relSecIdx != 0) 849 error( 850 toString(s) + 851 ": multiple relocation sections to one section are not supported"); 852 s->relSecIdx = i; 853 854 // Relocation sections are usually removed from the output, so return 855 // `nullptr` for the normal case. However, if -r or --emit-relocs is 856 // specified, we need to copy them to the output. (Some post link analysis 857 // tools specify --emit-relocs to obtain the information.) 858 if (config->copyRelocs) { 859 auto *isec = makeThreadLocal<InputSection>( 860 *this, sec, check(obj.getSectionName(sec, shstrtab))); 861 // If the relocated section is discarded (due to /DISCARD/ or 862 // --gc-sections), the relocation section should be discarded as well. 863 s->dependentSections.push_back(isec); 864 sections[i] = isec; 865 } 866 continue; 867 } 868 869 // A SHF_LINK_ORDER section with sh_link=0 is handled as if it did not have 870 // the flag. 871 if (!sec.sh_link || !(sec.sh_flags & SHF_LINK_ORDER)) 872 continue; 873 874 InputSectionBase *linkSec = nullptr; 875 if (sec.sh_link < size) 876 linkSec = this->sections[sec.sh_link]; 877 if (!linkSec) 878 fatal(toString(this) + ": invalid sh_link index: " + Twine(sec.sh_link)); 879 880 // A SHF_LINK_ORDER section is discarded if its linked-to section is 881 // discarded. 882 InputSection *isec = cast<InputSection>(this->sections[i]); 883 linkSec->dependentSections.push_back(isec); 884 if (!isa<InputSection>(linkSec)) 885 error("a section " + isec->name + 886 " with SHF_LINK_ORDER should not refer a non-regular section: " + 887 toString(linkSec)); 888 } 889 890 for (ArrayRef<Elf_Word> entries : selectedGroups) 891 handleSectionGroup<ELFT>(this->sections, entries); 892 } 893 894 // If a source file is compiled with x86 hardware-assisted call flow control 895 // enabled, the generated object file contains feature flags indicating that 896 // fact. This function reads the feature flags and returns it. 897 // 898 // Essentially we want to read a single 32-bit value in this function, but this 899 // function is rather complicated because the value is buried deep inside a 900 // .note.gnu.property section. 901 // 902 // The section consists of one or more NOTE records. Each NOTE record consists 903 // of zero or more type-length-value fields. We want to find a field of a 904 // certain type. It seems a bit too much to just store a 32-bit value, perhaps 905 // the ABI is unnecessarily complicated. 906 template <class ELFT> static uint32_t readAndFeatures(const InputSection &sec) { 907 using Elf_Nhdr = typename ELFT::Nhdr; 908 using Elf_Note = typename ELFT::Note; 909 910 uint32_t featuresSet = 0; 911 ArrayRef<uint8_t> data = sec.content(); 912 auto reportFatal = [&](const uint8_t *place, const char *msg) { 913 fatal(toString(sec.file) + ":(" + sec.name + "+0x" + 914 Twine::utohexstr(place - sec.content().data()) + "): " + msg); 915 }; 916 while (!data.empty()) { 917 // Read one NOTE record. 918 auto *nhdr = reinterpret_cast<const Elf_Nhdr *>(data.data()); 919 if (data.size() < sizeof(Elf_Nhdr) || 920 data.size() < nhdr->getSize(sec.addralign)) 921 reportFatal(data.data(), "data is too short"); 922 923 Elf_Note note(*nhdr); 924 if (nhdr->n_type != NT_GNU_PROPERTY_TYPE_0 || note.getName() != "GNU") { 925 data = data.slice(nhdr->getSize(sec.addralign)); 926 continue; 927 } 928 929 uint32_t featureAndType = config->emachine == EM_AARCH64 930 ? GNU_PROPERTY_AARCH64_FEATURE_1_AND 931 : GNU_PROPERTY_X86_FEATURE_1_AND; 932 933 // Read a body of a NOTE record, which consists of type-length-value fields. 934 ArrayRef<uint8_t> desc = note.getDesc(sec.addralign); 935 while (!desc.empty()) { 936 const uint8_t *place = desc.data(); 937 if (desc.size() < 8) 938 reportFatal(place, "program property is too short"); 939 uint32_t type = read32<ELFT::TargetEndianness>(desc.data()); 940 uint32_t size = read32<ELFT::TargetEndianness>(desc.data() + 4); 941 desc = desc.slice(8); 942 if (desc.size() < size) 943 reportFatal(place, "program property is too short"); 944 945 if (type == featureAndType) { 946 // We found a FEATURE_1_AND field. There may be more than one of these 947 // in a .note.gnu.property section, for a relocatable object we 948 // accumulate the bits set. 949 if (size < 4) 950 reportFatal(place, "FEATURE_1_AND entry is too short"); 951 featuresSet |= read32<ELFT::TargetEndianness>(desc.data()); 952 } 953 954 // Padding is present in the note descriptor, if necessary. 955 desc = desc.slice(alignTo<(ELFT::Is64Bits ? 8 : 4)>(size)); 956 } 957 958 // Go to next NOTE record to look for more FEATURE_1_AND descriptions. 959 data = data.slice(nhdr->getSize(sec.addralign)); 960 } 961 962 return featuresSet; 963 } 964 965 template <class ELFT> 966 InputSectionBase *ObjFile<ELFT>::getRelocTarget(uint32_t idx, 967 const Elf_Shdr &sec, 968 uint32_t info) { 969 if (info < this->sections.size()) { 970 InputSectionBase *target = this->sections[info]; 971 972 // Strictly speaking, a relocation section must be included in the 973 // group of the section it relocates. However, LLVM 3.3 and earlier 974 // would fail to do so, so we gracefully handle that case. 975 if (target == &InputSection::discarded) 976 return nullptr; 977 978 if (target != nullptr) 979 return target; 980 } 981 982 error(toString(this) + Twine(": relocation section (index ") + Twine(idx) + 983 ") has invalid sh_info (" + Twine(info) + ")"); 984 return nullptr; 985 } 986 987 // The function may be called concurrently for different input files. For 988 // allocation, prefer makeThreadLocal which does not require holding a lock. 989 template <class ELFT> 990 InputSectionBase *ObjFile<ELFT>::createInputSection(uint32_t idx, 991 const Elf_Shdr &sec, 992 StringRef name) { 993 if (name.starts_with(".n")) { 994 // The GNU linker uses .note.GNU-stack section as a marker indicating 995 // that the code in the object file does not expect that the stack is 996 // executable (in terms of NX bit). If all input files have the marker, 997 // the GNU linker adds a PT_GNU_STACK segment to tells the loader to 998 // make the stack non-executable. Most object files have this section as 999 // of 2017. 1000 // 1001 // But making the stack non-executable is a norm today for security 1002 // reasons. Failure to do so may result in a serious security issue. 1003 // Therefore, we make LLD always add PT_GNU_STACK unless it is 1004 // explicitly told to do otherwise (by -z execstack). Because the stack 1005 // executable-ness is controlled solely by command line options, 1006 // .note.GNU-stack sections are simply ignored. 1007 if (name == ".note.GNU-stack") 1008 return &InputSection::discarded; 1009 1010 // Object files that use processor features such as Intel Control-Flow 1011 // Enforcement (CET) or AArch64 Branch Target Identification BTI, use a 1012 // .note.gnu.property section containing a bitfield of feature bits like the 1013 // GNU_PROPERTY_X86_FEATURE_1_IBT flag. Read a bitmap containing the flag. 1014 // 1015 // Since we merge bitmaps from multiple object files to create a new 1016 // .note.gnu.property containing a single AND'ed bitmap, we discard an input 1017 // file's .note.gnu.property section. 1018 if (name == ".note.gnu.property") { 1019 this->andFeatures = readAndFeatures<ELFT>(InputSection(*this, sec, name)); 1020 return &InputSection::discarded; 1021 } 1022 1023 // Split stacks is a feature to support a discontiguous stack, 1024 // commonly used in the programming language Go. For the details, 1025 // see https://gcc.gnu.org/wiki/SplitStacks. An object file compiled 1026 // for split stack will include a .note.GNU-split-stack section. 1027 if (name == ".note.GNU-split-stack") { 1028 if (config->relocatable) { 1029 error( 1030 "cannot mix split-stack and non-split-stack in a relocatable link"); 1031 return &InputSection::discarded; 1032 } 1033 this->splitStack = true; 1034 return &InputSection::discarded; 1035 } 1036 1037 // An object file compiled for split stack, but where some of the 1038 // functions were compiled with the no_split_stack_attribute will 1039 // include a .note.GNU-no-split-stack section. 1040 if (name == ".note.GNU-no-split-stack") { 1041 this->someNoSplitStack = true; 1042 return &InputSection::discarded; 1043 } 1044 1045 // Strip existing .note.gnu.build-id sections so that the output won't have 1046 // more than one build-id. This is not usually a problem because input 1047 // object files normally don't have .build-id sections, but you can create 1048 // such files by "ld.{bfd,gold,lld} -r --build-id", and we want to guard 1049 // against it. 1050 if (name == ".note.gnu.build-id") 1051 return &InputSection::discarded; 1052 } 1053 1054 // The linker merges EH (exception handling) frames and creates a 1055 // .eh_frame_hdr section for runtime. So we handle them with a special 1056 // class. For relocatable outputs, they are just passed through. 1057 if (name == ".eh_frame" && !config->relocatable) 1058 return makeThreadLocal<EhInputSection>(*this, sec, name); 1059 1060 if ((sec.sh_flags & SHF_MERGE) && shouldMerge(sec, name)) 1061 return makeThreadLocal<MergeInputSection>(*this, sec, name); 1062 return makeThreadLocal<InputSection>(*this, sec, name); 1063 } 1064 1065 // Initialize symbols. symbols is a parallel array to the corresponding ELF 1066 // symbol table. 1067 template <class ELFT> 1068 void ObjFile<ELFT>::initializeSymbols(const object::ELFFile<ELFT> &obj) { 1069 ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>(); 1070 if (numSymbols == 0) { 1071 numSymbols = eSyms.size(); 1072 symbols = std::make_unique<Symbol *[]>(numSymbols); 1073 } 1074 1075 // Some entries have been filled by LazyObjFile. 1076 for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) 1077 if (!symbols[i]) 1078 symbols[i] = symtab.insert(CHECK(eSyms[i].getName(stringTable), this)); 1079 1080 // Perform symbol resolution on non-local symbols. 1081 SmallVector<unsigned, 32> undefineds; 1082 for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) { 1083 const Elf_Sym &eSym = eSyms[i]; 1084 uint32_t secIdx = eSym.st_shndx; 1085 if (secIdx == SHN_UNDEF) { 1086 undefineds.push_back(i); 1087 continue; 1088 } 1089 1090 uint8_t binding = eSym.getBinding(); 1091 uint8_t stOther = eSym.st_other; 1092 uint8_t type = eSym.getType(); 1093 uint64_t value = eSym.st_value; 1094 uint64_t size = eSym.st_size; 1095 1096 Symbol *sym = symbols[i]; 1097 sym->isUsedInRegularObj = true; 1098 if (LLVM_UNLIKELY(eSym.st_shndx == SHN_COMMON)) { 1099 if (value == 0 || value >= UINT32_MAX) 1100 fatal(toString(this) + ": common symbol '" + sym->getName() + 1101 "' has invalid alignment: " + Twine(value)); 1102 hasCommonSyms = true; 1103 sym->resolve( 1104 CommonSymbol{this, StringRef(), binding, stOther, type, value, size}); 1105 continue; 1106 } 1107 1108 // Handle global defined symbols. Defined::section will be set in postParse. 1109 sym->resolve(Defined{this, StringRef(), binding, stOther, type, value, size, 1110 nullptr}); 1111 } 1112 1113 // Undefined symbols (excluding those defined relative to non-prevailing 1114 // sections) can trigger recursive extract. Process defined symbols first so 1115 // that the relative order between a defined symbol and an undefined symbol 1116 // does not change the symbol resolution behavior. In addition, a set of 1117 // interconnected symbols will all be resolved to the same file, instead of 1118 // being resolved to different files. 1119 for (unsigned i : undefineds) { 1120 const Elf_Sym &eSym = eSyms[i]; 1121 Symbol *sym = symbols[i]; 1122 sym->resolve(Undefined{this, StringRef(), eSym.getBinding(), eSym.st_other, 1123 eSym.getType()}); 1124 sym->isUsedInRegularObj = true; 1125 sym->referenced = true; 1126 } 1127 } 1128 1129 template <class ELFT> 1130 void ObjFile<ELFT>::initSectionsAndLocalSyms(bool ignoreComdats) { 1131 if (!justSymbols) 1132 initializeSections(ignoreComdats, getObj()); 1133 1134 if (!firstGlobal) 1135 return; 1136 SymbolUnion *locals = makeThreadLocalN<SymbolUnion>(firstGlobal); 1137 memset(locals, 0, sizeof(SymbolUnion) * firstGlobal); 1138 1139 ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>(); 1140 for (size_t i = 0, end = firstGlobal; i != end; ++i) { 1141 const Elf_Sym &eSym = eSyms[i]; 1142 uint32_t secIdx = eSym.st_shndx; 1143 if (LLVM_UNLIKELY(secIdx == SHN_XINDEX)) 1144 secIdx = check(getExtendedSymbolTableIndex<ELFT>(eSym, i, shndxTable)); 1145 else if (secIdx >= SHN_LORESERVE) 1146 secIdx = 0; 1147 if (LLVM_UNLIKELY(secIdx >= sections.size())) 1148 fatal(toString(this) + ": invalid section index: " + Twine(secIdx)); 1149 if (LLVM_UNLIKELY(eSym.getBinding() != STB_LOCAL)) 1150 error(toString(this) + ": non-local symbol (" + Twine(i) + 1151 ") found at index < .symtab's sh_info (" + Twine(end) + ")"); 1152 1153 InputSectionBase *sec = sections[secIdx]; 1154 uint8_t type = eSym.getType(); 1155 if (type == STT_FILE) 1156 sourceFile = CHECK(eSym.getName(stringTable), this); 1157 if (LLVM_UNLIKELY(stringTable.size() <= eSym.st_name)) 1158 fatal(toString(this) + ": invalid symbol name offset"); 1159 StringRef name(stringTable.data() + eSym.st_name); 1160 1161 symbols[i] = reinterpret_cast<Symbol *>(locals + i); 1162 if (eSym.st_shndx == SHN_UNDEF || sec == &InputSection::discarded) 1163 new (symbols[i]) Undefined(this, name, STB_LOCAL, eSym.st_other, type, 1164 /*discardedSecIdx=*/secIdx); 1165 else 1166 new (symbols[i]) Defined(this, name, STB_LOCAL, eSym.st_other, type, 1167 eSym.st_value, eSym.st_size, sec); 1168 symbols[i]->partition = 1; 1169 symbols[i]->isUsedInRegularObj = true; 1170 } 1171 } 1172 1173 // Called after all ObjFile::parse is called for all ObjFiles. This checks 1174 // duplicate symbols and may do symbol property merge in the future. 1175 template <class ELFT> void ObjFile<ELFT>::postParse() { 1176 static std::mutex mu; 1177 ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>(); 1178 for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) { 1179 const Elf_Sym &eSym = eSyms[i]; 1180 Symbol &sym = *symbols[i]; 1181 uint32_t secIdx = eSym.st_shndx; 1182 uint8_t binding = eSym.getBinding(); 1183 if (LLVM_UNLIKELY(binding != STB_GLOBAL && binding != STB_WEAK && 1184 binding != STB_GNU_UNIQUE)) 1185 errorOrWarn(toString(this) + ": symbol (" + Twine(i) + 1186 ") has invalid binding: " + Twine((int)binding)); 1187 1188 // st_value of STT_TLS represents the assigned offset, not the actual 1189 // address which is used by STT_FUNC and STT_OBJECT. STT_TLS symbols can 1190 // only be referenced by special TLS relocations. It is usually an error if 1191 // a STT_TLS symbol is replaced by a non-STT_TLS symbol, vice versa. 1192 if (LLVM_UNLIKELY(sym.isTls()) && eSym.getType() != STT_TLS && 1193 eSym.getType() != STT_NOTYPE) 1194 errorOrWarn("TLS attribute mismatch: " + toString(sym) + "\n>>> in " + 1195 toString(sym.file) + "\n>>> in " + toString(this)); 1196 1197 // Handle non-COMMON defined symbol below. !sym.file allows a symbol 1198 // assignment to redefine a symbol without an error. 1199 if (!sym.file || !sym.isDefined() || secIdx == SHN_UNDEF || 1200 secIdx == SHN_COMMON) 1201 continue; 1202 1203 if (LLVM_UNLIKELY(secIdx == SHN_XINDEX)) 1204 secIdx = check(getExtendedSymbolTableIndex<ELFT>(eSym, i, shndxTable)); 1205 else if (secIdx >= SHN_LORESERVE) 1206 secIdx = 0; 1207 if (LLVM_UNLIKELY(secIdx >= sections.size())) 1208 fatal(toString(this) + ": invalid section index: " + Twine(secIdx)); 1209 InputSectionBase *sec = sections[secIdx]; 1210 if (sec == &InputSection::discarded) { 1211 if (sym.traced) { 1212 printTraceSymbol(Undefined{this, sym.getName(), sym.binding, 1213 sym.stOther, sym.type, secIdx}, 1214 sym.getName()); 1215 } 1216 if (sym.file == this) { 1217 std::lock_guard<std::mutex> lock(mu); 1218 ctx.nonPrevailingSyms.emplace_back(&sym, secIdx); 1219 } 1220 continue; 1221 } 1222 1223 if (sym.file == this) { 1224 cast<Defined>(sym).section = sec; 1225 continue; 1226 } 1227 1228 if (sym.binding == STB_WEAK || binding == STB_WEAK) 1229 continue; 1230 std::lock_guard<std::mutex> lock(mu); 1231 ctx.duplicates.push_back({&sym, this, sec, eSym.st_value}); 1232 } 1233 } 1234 1235 // The handling of tentative definitions (COMMON symbols) in archives is murky. 1236 // A tentative definition will be promoted to a global definition if there are 1237 // no non-tentative definitions to dominate it. When we hold a tentative 1238 // definition to a symbol and are inspecting archive members for inclusion 1239 // there are 2 ways we can proceed: 1240 // 1241 // 1) Consider the tentative definition a 'real' definition (ie promotion from 1242 // tentative to real definition has already happened) and not inspect 1243 // archive members for Global/Weak definitions to replace the tentative 1244 // definition. An archive member would only be included if it satisfies some 1245 // other undefined symbol. This is the behavior Gold uses. 1246 // 1247 // 2) Consider the tentative definition as still undefined (ie the promotion to 1248 // a real definition happens only after all symbol resolution is done). 1249 // The linker searches archive members for STB_GLOBAL definitions to 1250 // replace the tentative definition with. This is the behavior used by 1251 // GNU ld. 1252 // 1253 // The second behavior is inherited from SysVR4, which based it on the FORTRAN 1254 // COMMON BLOCK model. This behavior is needed for proper initialization in old 1255 // (pre F90) FORTRAN code that is packaged into an archive. 1256 // 1257 // The following functions search archive members for definitions to replace 1258 // tentative definitions (implementing behavior 2). 1259 static bool isBitcodeNonCommonDef(MemoryBufferRef mb, StringRef symName, 1260 StringRef archiveName) { 1261 IRSymtabFile symtabFile = check(readIRSymtab(mb)); 1262 for (const irsymtab::Reader::SymbolRef &sym : 1263 symtabFile.TheReader.symbols()) { 1264 if (sym.isGlobal() && sym.getName() == symName) 1265 return !sym.isUndefined() && !sym.isWeak() && !sym.isCommon(); 1266 } 1267 return false; 1268 } 1269 1270 template <class ELFT> 1271 static bool isNonCommonDef(ELFKind ekind, MemoryBufferRef mb, StringRef symName, 1272 StringRef archiveName) { 1273 ObjFile<ELFT> *obj = make<ObjFile<ELFT>>(ekind, mb, archiveName); 1274 obj->init(); 1275 StringRef stringtable = obj->getStringTable(); 1276 1277 for (auto sym : obj->template getGlobalELFSyms<ELFT>()) { 1278 Expected<StringRef> name = sym.getName(stringtable); 1279 if (name && name.get() == symName) 1280 return sym.isDefined() && sym.getBinding() == STB_GLOBAL && 1281 !sym.isCommon(); 1282 } 1283 return false; 1284 } 1285 1286 static bool isNonCommonDef(MemoryBufferRef mb, StringRef symName, 1287 StringRef archiveName) { 1288 switch (getELFKind(mb, archiveName)) { 1289 case ELF32LEKind: 1290 return isNonCommonDef<ELF32LE>(ELF32LEKind, mb, symName, archiveName); 1291 case ELF32BEKind: 1292 return isNonCommonDef<ELF32BE>(ELF32BEKind, mb, symName, archiveName); 1293 case ELF64LEKind: 1294 return isNonCommonDef<ELF64LE>(ELF64LEKind, mb, symName, archiveName); 1295 case ELF64BEKind: 1296 return isNonCommonDef<ELF64BE>(ELF64BEKind, mb, symName, archiveName); 1297 default: 1298 llvm_unreachable("getELFKind"); 1299 } 1300 } 1301 1302 unsigned SharedFile::vernauxNum; 1303 1304 SharedFile::SharedFile(MemoryBufferRef m, StringRef defaultSoName) 1305 : ELFFileBase(SharedKind, getELFKind(m, ""), m), soName(defaultSoName), 1306 isNeeded(!config->asNeeded) {} 1307 1308 // Parse the version definitions in the object file if present, and return a 1309 // vector whose nth element contains a pointer to the Elf_Verdef for version 1310 // identifier n. Version identifiers that are not definitions map to nullptr. 1311 template <typename ELFT> 1312 static SmallVector<const void *, 0> 1313 parseVerdefs(const uint8_t *base, const typename ELFT::Shdr *sec) { 1314 if (!sec) 1315 return {}; 1316 1317 // Build the Verdefs array by following the chain of Elf_Verdef objects 1318 // from the start of the .gnu.version_d section. 1319 SmallVector<const void *, 0> verdefs; 1320 const uint8_t *verdef = base + sec->sh_offset; 1321 for (unsigned i = 0, e = sec->sh_info; i != e; ++i) { 1322 auto *curVerdef = reinterpret_cast<const typename ELFT::Verdef *>(verdef); 1323 verdef += curVerdef->vd_next; 1324 unsigned verdefIndex = curVerdef->vd_ndx; 1325 if (verdefIndex >= verdefs.size()) 1326 verdefs.resize(verdefIndex + 1); 1327 verdefs[verdefIndex] = curVerdef; 1328 } 1329 return verdefs; 1330 } 1331 1332 // Parse SHT_GNU_verneed to properly set the name of a versioned undefined 1333 // symbol. We detect fatal issues which would cause vulnerabilities, but do not 1334 // implement sophisticated error checking like in llvm-readobj because the value 1335 // of such diagnostics is low. 1336 template <typename ELFT> 1337 std::vector<uint32_t> SharedFile::parseVerneed(const ELFFile<ELFT> &obj, 1338 const typename ELFT::Shdr *sec) { 1339 if (!sec) 1340 return {}; 1341 std::vector<uint32_t> verneeds; 1342 ArrayRef<uint8_t> data = CHECK(obj.getSectionContents(*sec), this); 1343 const uint8_t *verneedBuf = data.begin(); 1344 for (unsigned i = 0; i != sec->sh_info; ++i) { 1345 if (verneedBuf + sizeof(typename ELFT::Verneed) > data.end()) 1346 fatal(toString(this) + " has an invalid Verneed"); 1347 auto *vn = reinterpret_cast<const typename ELFT::Verneed *>(verneedBuf); 1348 const uint8_t *vernauxBuf = verneedBuf + vn->vn_aux; 1349 for (unsigned j = 0; j != vn->vn_cnt; ++j) { 1350 if (vernauxBuf + sizeof(typename ELFT::Vernaux) > data.end()) 1351 fatal(toString(this) + " has an invalid Vernaux"); 1352 auto *aux = reinterpret_cast<const typename ELFT::Vernaux *>(vernauxBuf); 1353 if (aux->vna_name >= this->stringTable.size()) 1354 fatal(toString(this) + " has a Vernaux with an invalid vna_name"); 1355 uint16_t version = aux->vna_other & VERSYM_VERSION; 1356 if (version >= verneeds.size()) 1357 verneeds.resize(version + 1); 1358 verneeds[version] = aux->vna_name; 1359 vernauxBuf += aux->vna_next; 1360 } 1361 verneedBuf += vn->vn_next; 1362 } 1363 return verneeds; 1364 } 1365 1366 // We do not usually care about alignments of data in shared object 1367 // files because the loader takes care of it. However, if we promote a 1368 // DSO symbol to point to .bss due to copy relocation, we need to keep 1369 // the original alignment requirements. We infer it in this function. 1370 template <typename ELFT> 1371 static uint64_t getAlignment(ArrayRef<typename ELFT::Shdr> sections, 1372 const typename ELFT::Sym &sym) { 1373 uint64_t ret = UINT64_MAX; 1374 if (sym.st_value) 1375 ret = 1ULL << llvm::countr_zero((uint64_t)sym.st_value); 1376 if (0 < sym.st_shndx && sym.st_shndx < sections.size()) 1377 ret = std::min<uint64_t>(ret, sections[sym.st_shndx].sh_addralign); 1378 return (ret > UINT32_MAX) ? 0 : ret; 1379 } 1380 1381 // Fully parse the shared object file. 1382 // 1383 // This function parses symbol versions. If a DSO has version information, 1384 // the file has a ".gnu.version_d" section which contains symbol version 1385 // definitions. Each symbol is associated to one version through a table in 1386 // ".gnu.version" section. That table is a parallel array for the symbol 1387 // table, and each table entry contains an index in ".gnu.version_d". 1388 // 1389 // The special index 0 is reserved for VERF_NDX_LOCAL and 1 is for 1390 // VER_NDX_GLOBAL. There's no table entry for these special versions in 1391 // ".gnu.version_d". 1392 // 1393 // The file format for symbol versioning is perhaps a bit more complicated 1394 // than necessary, but you can easily understand the code if you wrap your 1395 // head around the data structure described above. 1396 template <class ELFT> void SharedFile::parse() { 1397 using Elf_Dyn = typename ELFT::Dyn; 1398 using Elf_Shdr = typename ELFT::Shdr; 1399 using Elf_Sym = typename ELFT::Sym; 1400 using Elf_Verdef = typename ELFT::Verdef; 1401 using Elf_Versym = typename ELFT::Versym; 1402 1403 ArrayRef<Elf_Dyn> dynamicTags; 1404 const ELFFile<ELFT> obj = this->getObj<ELFT>(); 1405 ArrayRef<Elf_Shdr> sections = getELFShdrs<ELFT>(); 1406 1407 const Elf_Shdr *versymSec = nullptr; 1408 const Elf_Shdr *verdefSec = nullptr; 1409 const Elf_Shdr *verneedSec = nullptr; 1410 1411 // Search for .dynsym, .dynamic, .symtab, .gnu.version and .gnu.version_d. 1412 for (const Elf_Shdr &sec : sections) { 1413 switch (sec.sh_type) { 1414 default: 1415 continue; 1416 case SHT_DYNAMIC: 1417 dynamicTags = 1418 CHECK(obj.template getSectionContentsAsArray<Elf_Dyn>(sec), this); 1419 break; 1420 case SHT_GNU_versym: 1421 versymSec = &sec; 1422 break; 1423 case SHT_GNU_verdef: 1424 verdefSec = &sec; 1425 break; 1426 case SHT_GNU_verneed: 1427 verneedSec = &sec; 1428 break; 1429 } 1430 } 1431 1432 if (versymSec && numELFSyms == 0) { 1433 error("SHT_GNU_versym should be associated with symbol table"); 1434 return; 1435 } 1436 1437 // Search for a DT_SONAME tag to initialize this->soName. 1438 for (const Elf_Dyn &dyn : dynamicTags) { 1439 if (dyn.d_tag == DT_NEEDED) { 1440 uint64_t val = dyn.getVal(); 1441 if (val >= this->stringTable.size()) 1442 fatal(toString(this) + ": invalid DT_NEEDED entry"); 1443 dtNeeded.push_back(this->stringTable.data() + val); 1444 } else if (dyn.d_tag == DT_SONAME) { 1445 uint64_t val = dyn.getVal(); 1446 if (val >= this->stringTable.size()) 1447 fatal(toString(this) + ": invalid DT_SONAME entry"); 1448 soName = this->stringTable.data() + val; 1449 } 1450 } 1451 1452 // DSOs are uniquified not by filename but by soname. 1453 DenseMap<CachedHashStringRef, SharedFile *>::iterator it; 1454 bool wasInserted; 1455 std::tie(it, wasInserted) = 1456 symtab.soNames.try_emplace(CachedHashStringRef(soName), this); 1457 1458 // If a DSO appears more than once on the command line with and without 1459 // --as-needed, --no-as-needed takes precedence over --as-needed because a 1460 // user can add an extra DSO with --no-as-needed to force it to be added to 1461 // the dependency list. 1462 it->second->isNeeded |= isNeeded; 1463 if (!wasInserted) 1464 return; 1465 1466 ctx.sharedFiles.push_back(this); 1467 1468 verdefs = parseVerdefs<ELFT>(obj.base(), verdefSec); 1469 std::vector<uint32_t> verneeds = parseVerneed<ELFT>(obj, verneedSec); 1470 1471 // Parse ".gnu.version" section which is a parallel array for the symbol 1472 // table. If a given file doesn't have a ".gnu.version" section, we use 1473 // VER_NDX_GLOBAL. 1474 size_t size = numELFSyms - firstGlobal; 1475 std::vector<uint16_t> versyms(size, VER_NDX_GLOBAL); 1476 if (versymSec) { 1477 ArrayRef<Elf_Versym> versym = 1478 CHECK(obj.template getSectionContentsAsArray<Elf_Versym>(*versymSec), 1479 this) 1480 .slice(firstGlobal); 1481 for (size_t i = 0; i < size; ++i) 1482 versyms[i] = versym[i].vs_index; 1483 } 1484 1485 // System libraries can have a lot of symbols with versions. Using a 1486 // fixed buffer for computing the versions name (foo@ver) can save a 1487 // lot of allocations. 1488 SmallString<0> versionedNameBuffer; 1489 1490 // Add symbols to the symbol table. 1491 ArrayRef<Elf_Sym> syms = this->getGlobalELFSyms<ELFT>(); 1492 for (size_t i = 0, e = syms.size(); i != e; ++i) { 1493 const Elf_Sym &sym = syms[i]; 1494 1495 // ELF spec requires that all local symbols precede weak or global 1496 // symbols in each symbol table, and the index of first non-local symbol 1497 // is stored to sh_info. If a local symbol appears after some non-local 1498 // symbol, that's a violation of the spec. 1499 StringRef name = CHECK(sym.getName(stringTable), this); 1500 if (sym.getBinding() == STB_LOCAL) { 1501 errorOrWarn(toString(this) + ": invalid local symbol '" + name + 1502 "' in global part of symbol table"); 1503 continue; 1504 } 1505 1506 const uint16_t ver = versyms[i], idx = ver & ~VERSYM_HIDDEN; 1507 if (sym.isUndefined()) { 1508 // For unversioned undefined symbols, VER_NDX_GLOBAL makes more sense but 1509 // as of binutils 2.34, GNU ld produces VER_NDX_LOCAL. 1510 if (ver != VER_NDX_LOCAL && ver != VER_NDX_GLOBAL) { 1511 if (idx >= verneeds.size()) { 1512 error("corrupt input file: version need index " + Twine(idx) + 1513 " for symbol " + name + " is out of bounds\n>>> defined in " + 1514 toString(this)); 1515 continue; 1516 } 1517 StringRef verName = stringTable.data() + verneeds[idx]; 1518 versionedNameBuffer.clear(); 1519 name = saver().save( 1520 (name + "@" + verName).toStringRef(versionedNameBuffer)); 1521 } 1522 Symbol *s = symtab.addSymbol( 1523 Undefined{this, name, sym.getBinding(), sym.st_other, sym.getType()}); 1524 s->exportDynamic = true; 1525 if (s->isUndefined() && sym.getBinding() != STB_WEAK && 1526 config->unresolvedSymbolsInShlib != UnresolvedPolicy::Ignore) 1527 requiredSymbols.push_back(s); 1528 continue; 1529 } 1530 1531 if (ver == VER_NDX_LOCAL || 1532 (ver != VER_NDX_GLOBAL && idx >= verdefs.size())) { 1533 // In GNU ld < 2.31 (before 3be08ea4728b56d35e136af4e6fd3086ade17764), the 1534 // MIPS port puts _gp_disp symbol into DSO files and incorrectly assigns 1535 // VER_NDX_LOCAL. Workaround this bug. 1536 if (config->emachine == EM_MIPS && name == "_gp_disp") 1537 continue; 1538 error("corrupt input file: version definition index " + Twine(idx) + 1539 " for symbol " + name + " is out of bounds\n>>> defined in " + 1540 toString(this)); 1541 continue; 1542 } 1543 1544 uint32_t alignment = getAlignment<ELFT>(sections, sym); 1545 if (ver == idx) { 1546 auto *s = symtab.addSymbol( 1547 SharedSymbol{*this, name, sym.getBinding(), sym.st_other, 1548 sym.getType(), sym.st_value, sym.st_size, alignment}); 1549 s->dsoDefined = true; 1550 if (s->file == this) 1551 s->versionId = ver; 1552 } 1553 1554 // Also add the symbol with the versioned name to handle undefined symbols 1555 // with explicit versions. 1556 if (ver == VER_NDX_GLOBAL) 1557 continue; 1558 1559 StringRef verName = 1560 stringTable.data() + 1561 reinterpret_cast<const Elf_Verdef *>(verdefs[idx])->getAux()->vda_name; 1562 versionedNameBuffer.clear(); 1563 name = (name + "@" + verName).toStringRef(versionedNameBuffer); 1564 auto *s = symtab.addSymbol( 1565 SharedSymbol{*this, saver().save(name), sym.getBinding(), sym.st_other, 1566 sym.getType(), sym.st_value, sym.st_size, alignment}); 1567 s->dsoDefined = true; 1568 if (s->file == this) 1569 s->versionId = idx; 1570 } 1571 } 1572 1573 static ELFKind getBitcodeELFKind(const Triple &t) { 1574 if (t.isLittleEndian()) 1575 return t.isArch64Bit() ? ELF64LEKind : ELF32LEKind; 1576 return t.isArch64Bit() ? ELF64BEKind : ELF32BEKind; 1577 } 1578 1579 static uint16_t getBitcodeMachineKind(StringRef path, const Triple &t) { 1580 switch (t.getArch()) { 1581 case Triple::aarch64: 1582 case Triple::aarch64_be: 1583 return EM_AARCH64; 1584 case Triple::amdgcn: 1585 case Triple::r600: 1586 return EM_AMDGPU; 1587 case Triple::arm: 1588 case Triple::armeb: 1589 case Triple::thumb: 1590 case Triple::thumbeb: 1591 return EM_ARM; 1592 case Triple::avr: 1593 return EM_AVR; 1594 case Triple::hexagon: 1595 return EM_HEXAGON; 1596 case Triple::loongarch32: 1597 case Triple::loongarch64: 1598 return EM_LOONGARCH; 1599 case Triple::mips: 1600 case Triple::mipsel: 1601 case Triple::mips64: 1602 case Triple::mips64el: 1603 return EM_MIPS; 1604 case Triple::msp430: 1605 return EM_MSP430; 1606 case Triple::ppc: 1607 case Triple::ppcle: 1608 return EM_PPC; 1609 case Triple::ppc64: 1610 case Triple::ppc64le: 1611 return EM_PPC64; 1612 case Triple::riscv32: 1613 case Triple::riscv64: 1614 return EM_RISCV; 1615 case Triple::sparcv9: 1616 return EM_SPARCV9; 1617 case Triple::systemz: 1618 return EM_S390; 1619 case Triple::x86: 1620 return t.isOSIAMCU() ? EM_IAMCU : EM_386; 1621 case Triple::x86_64: 1622 return EM_X86_64; 1623 default: 1624 error(path + ": could not infer e_machine from bitcode target triple " + 1625 t.str()); 1626 return EM_NONE; 1627 } 1628 } 1629 1630 static uint8_t getOsAbi(const Triple &t) { 1631 switch (t.getOS()) { 1632 case Triple::AMDHSA: 1633 return ELF::ELFOSABI_AMDGPU_HSA; 1634 case Triple::AMDPAL: 1635 return ELF::ELFOSABI_AMDGPU_PAL; 1636 case Triple::Mesa3D: 1637 return ELF::ELFOSABI_AMDGPU_MESA3D; 1638 default: 1639 return ELF::ELFOSABI_NONE; 1640 } 1641 } 1642 1643 BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName, 1644 uint64_t offsetInArchive, bool lazy) 1645 : InputFile(BitcodeKind, mb) { 1646 this->archiveName = archiveName; 1647 this->lazy = lazy; 1648 1649 std::string path = mb.getBufferIdentifier().str(); 1650 if (config->thinLTOIndexOnly) 1651 path = replaceThinLTOSuffix(mb.getBufferIdentifier()); 1652 1653 // ThinLTO assumes that all MemoryBufferRefs given to it have a unique 1654 // name. If two archives define two members with the same name, this 1655 // causes a collision which result in only one of the objects being taken 1656 // into consideration at LTO time (which very likely causes undefined 1657 // symbols later in the link stage). So we append file offset to make 1658 // filename unique. 1659 StringRef name = archiveName.empty() 1660 ? saver().save(path) 1661 : saver().save(archiveName + "(" + path::filename(path) + 1662 " at " + utostr(offsetInArchive) + ")"); 1663 MemoryBufferRef mbref(mb.getBuffer(), name); 1664 1665 obj = CHECK(lto::InputFile::create(mbref), this); 1666 1667 Triple t(obj->getTargetTriple()); 1668 ekind = getBitcodeELFKind(t); 1669 emachine = getBitcodeMachineKind(mb.getBufferIdentifier(), t); 1670 osabi = getOsAbi(t); 1671 } 1672 1673 static uint8_t mapVisibility(GlobalValue::VisibilityTypes gvVisibility) { 1674 switch (gvVisibility) { 1675 case GlobalValue::DefaultVisibility: 1676 return STV_DEFAULT; 1677 case GlobalValue::HiddenVisibility: 1678 return STV_HIDDEN; 1679 case GlobalValue::ProtectedVisibility: 1680 return STV_PROTECTED; 1681 } 1682 llvm_unreachable("unknown visibility"); 1683 } 1684 1685 static void 1686 createBitcodeSymbol(Symbol *&sym, const std::vector<bool> &keptComdats, 1687 const lto::InputFile::Symbol &objSym, BitcodeFile &f) { 1688 uint8_t binding = objSym.isWeak() ? STB_WEAK : STB_GLOBAL; 1689 uint8_t type = objSym.isTLS() ? STT_TLS : STT_NOTYPE; 1690 uint8_t visibility = mapVisibility(objSym.getVisibility()); 1691 1692 if (!sym) 1693 sym = symtab.insert(saver().save(objSym.getName())); 1694 1695 int c = objSym.getComdatIndex(); 1696 if (objSym.isUndefined() || (c != -1 && !keptComdats[c])) { 1697 Undefined newSym(&f, StringRef(), binding, visibility, type); 1698 sym->resolve(newSym); 1699 sym->referenced = true; 1700 return; 1701 } 1702 1703 if (objSym.isCommon()) { 1704 sym->resolve(CommonSymbol{&f, StringRef(), binding, visibility, STT_OBJECT, 1705 objSym.getCommonAlignment(), 1706 objSym.getCommonSize()}); 1707 } else { 1708 Defined newSym(&f, StringRef(), binding, visibility, type, 0, 0, nullptr); 1709 if (objSym.canBeOmittedFromSymbolTable()) 1710 newSym.exportDynamic = false; 1711 sym->resolve(newSym); 1712 } 1713 } 1714 1715 void BitcodeFile::parse() { 1716 for (std::pair<StringRef, Comdat::SelectionKind> s : obj->getComdatTable()) { 1717 keptComdats.push_back( 1718 s.second == Comdat::NoDeduplicate || 1719 symtab.comdatGroups.try_emplace(CachedHashStringRef(s.first), this) 1720 .second); 1721 } 1722 1723 if (numSymbols == 0) { 1724 numSymbols = obj->symbols().size(); 1725 symbols = std::make_unique<Symbol *[]>(numSymbols); 1726 } 1727 // Process defined symbols first. See the comment in 1728 // ObjFile<ELFT>::initializeSymbols. 1729 for (auto [i, irSym] : llvm::enumerate(obj->symbols())) 1730 if (!irSym.isUndefined()) 1731 createBitcodeSymbol(symbols[i], keptComdats, irSym, *this); 1732 for (auto [i, irSym] : llvm::enumerate(obj->symbols())) 1733 if (irSym.isUndefined()) 1734 createBitcodeSymbol(symbols[i], keptComdats, irSym, *this); 1735 1736 for (auto l : obj->getDependentLibraries()) 1737 addDependentLibrary(l, this); 1738 } 1739 1740 void BitcodeFile::parseLazy() { 1741 numSymbols = obj->symbols().size(); 1742 symbols = std::make_unique<Symbol *[]>(numSymbols); 1743 for (auto [i, irSym] : llvm::enumerate(obj->symbols())) 1744 if (!irSym.isUndefined()) { 1745 auto *sym = symtab.insert(saver().save(irSym.getName())); 1746 sym->resolve(LazySymbol{*this}); 1747 symbols[i] = sym; 1748 } 1749 } 1750 1751 void BitcodeFile::postParse() { 1752 for (auto [i, irSym] : llvm::enumerate(obj->symbols())) { 1753 const Symbol &sym = *symbols[i]; 1754 if (sym.file == this || !sym.isDefined() || irSym.isUndefined() || 1755 irSym.isCommon() || irSym.isWeak()) 1756 continue; 1757 int c = irSym.getComdatIndex(); 1758 if (c != -1 && !keptComdats[c]) 1759 continue; 1760 reportDuplicate(sym, this, nullptr, 0); 1761 } 1762 } 1763 1764 void BinaryFile::parse() { 1765 ArrayRef<uint8_t> data = arrayRefFromStringRef(mb.getBuffer()); 1766 auto *section = make<InputSection>(this, SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, 1767 8, data, ".data"); 1768 sections.push_back(section); 1769 1770 // For each input file foo that is embedded to a result as a binary 1771 // blob, we define _binary_foo_{start,end,size} symbols, so that 1772 // user programs can access blobs by name. Non-alphanumeric 1773 // characters in a filename are replaced with underscore. 1774 std::string s = "_binary_" + mb.getBufferIdentifier().str(); 1775 for (char &c : s) 1776 if (!isAlnum(c)) 1777 c = '_'; 1778 1779 llvm::StringSaver &saver = lld::saver(); 1780 1781 symtab.addAndCheckDuplicate(Defined{this, saver.save(s + "_start"), 1782 STB_GLOBAL, STV_DEFAULT, STT_OBJECT, 0, 0, 1783 section}); 1784 symtab.addAndCheckDuplicate(Defined{this, saver.save(s + "_end"), STB_GLOBAL, 1785 STV_DEFAULT, STT_OBJECT, data.size(), 0, 1786 section}); 1787 symtab.addAndCheckDuplicate(Defined{this, saver.save(s + "_size"), STB_GLOBAL, 1788 STV_DEFAULT, STT_OBJECT, data.size(), 0, 1789 nullptr}); 1790 } 1791 1792 InputFile *elf::createInternalFile(StringRef name) { 1793 auto *file = 1794 make<InputFile>(InputFile::InternalKind, MemoryBufferRef("", name)); 1795 // References from an internal file do not lead to --warn-backrefs 1796 // diagnostics. 1797 file->groupId = 0; 1798 return file; 1799 } 1800 1801 ELFFileBase *elf::createObjFile(MemoryBufferRef mb, StringRef archiveName, 1802 bool lazy) { 1803 ELFFileBase *f; 1804 switch (getELFKind(mb, archiveName)) { 1805 case ELF32LEKind: 1806 f = make<ObjFile<ELF32LE>>(ELF32LEKind, mb, archiveName); 1807 break; 1808 case ELF32BEKind: 1809 f = make<ObjFile<ELF32BE>>(ELF32BEKind, mb, archiveName); 1810 break; 1811 case ELF64LEKind: 1812 f = make<ObjFile<ELF64LE>>(ELF64LEKind, mb, archiveName); 1813 break; 1814 case ELF64BEKind: 1815 f = make<ObjFile<ELF64BE>>(ELF64BEKind, mb, archiveName); 1816 break; 1817 default: 1818 llvm_unreachable("getELFKind"); 1819 } 1820 f->init(); 1821 f->lazy = lazy; 1822 return f; 1823 } 1824 1825 template <class ELFT> void ObjFile<ELFT>::parseLazy() { 1826 const ArrayRef<typename ELFT::Sym> eSyms = this->getELFSyms<ELFT>(); 1827 numSymbols = eSyms.size(); 1828 symbols = std::make_unique<Symbol *[]>(numSymbols); 1829 1830 // resolve() may trigger this->extract() if an existing symbol is an undefined 1831 // symbol. If that happens, this function has served its purpose, and we can 1832 // exit from the loop early. 1833 for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) { 1834 if (eSyms[i].st_shndx == SHN_UNDEF) 1835 continue; 1836 symbols[i] = symtab.insert(CHECK(eSyms[i].getName(stringTable), this)); 1837 symbols[i]->resolve(LazySymbol{*this}); 1838 if (!lazy) 1839 break; 1840 } 1841 } 1842 1843 bool InputFile::shouldExtractForCommon(StringRef name) const { 1844 if (isa<BitcodeFile>(this)) 1845 return isBitcodeNonCommonDef(mb, name, archiveName); 1846 1847 return isNonCommonDef(mb, name, archiveName); 1848 } 1849 1850 std::string elf::replaceThinLTOSuffix(StringRef path) { 1851 auto [suffix, repl] = config->thinLTOObjectSuffixReplace; 1852 if (path.consume_back(suffix)) 1853 return (path + repl).str(); 1854 return std::string(path); 1855 } 1856 1857 template class elf::ObjFile<ELF32LE>; 1858 template class elf::ObjFile<ELF32BE>; 1859 template class elf::ObjFile<ELF64LE>; 1860 template class elf::ObjFile<ELF64BE>; 1861 1862 template void SharedFile::parse<ELF32LE>(); 1863 template void SharedFile::parse<ELF32BE>(); 1864 template void SharedFile::parse<ELF64LE>(); 1865 template void SharedFile::parse<ELF64BE>(); 1866