10b57cec5SDimitry Andric //===- InputFiles.cpp -----------------------------------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric 90b57cec5SDimitry Andric #include "InputFiles.h" 1081ad6265SDimitry Andric #include "Config.h" 1181ad6265SDimitry Andric #include "DWARF.h" 120b57cec5SDimitry Andric #include "Driver.h" 130b57cec5SDimitry Andric #include "InputSection.h" 140b57cec5SDimitry Andric #include "LinkerScript.h" 150b57cec5SDimitry Andric #include "SymbolTable.h" 160b57cec5SDimitry Andric #include "Symbols.h" 170b57cec5SDimitry Andric #include "SyntheticSections.h" 181fd87a68SDimitry Andric #include "Target.h" 1904eeddc0SDimitry Andric #include "lld/Common/CommonLinkerContext.h" 20480093f4SDimitry Andric #include "lld/Common/DWARF.h" 2181ad6265SDimitry Andric #include "llvm/ADT/CachedHashString.h" 220b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h" 230b57cec5SDimitry Andric #include "llvm/LTO/LTO.h" 2481ad6265SDimitry Andric #include "llvm/Object/IRObjectFile.h" 250b57cec5SDimitry Andric #include "llvm/Support/ARMAttributeParser.h" 260b57cec5SDimitry Andric #include "llvm/Support/ARMBuildAttributes.h" 270b57cec5SDimitry Andric #include "llvm/Support/Endian.h" 2881ad6265SDimitry Andric #include "llvm/Support/FileSystem.h" 290b57cec5SDimitry Andric #include "llvm/Support/Path.h" 30e8d8bef9SDimitry Andric #include "llvm/Support/RISCVAttributeParser.h" 310b57cec5SDimitry Andric #include "llvm/Support/TarWriter.h" 32*0fca6ea1SDimitry Andric #include "llvm/Support/TimeProfiler.h" 330b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 345f757f3fSDimitry Andric #include <optional> 350b57cec5SDimitry Andric 360b57cec5SDimitry Andric using namespace llvm; 370b57cec5SDimitry Andric using namespace llvm::ELF; 380b57cec5SDimitry Andric using namespace llvm::object; 390b57cec5SDimitry Andric using namespace llvm::sys; 400b57cec5SDimitry Andric using namespace llvm::sys::fs; 410b57cec5SDimitry Andric using namespace llvm::support::endian; 425ffd83dbSDimitry Andric using namespace lld; 435ffd83dbSDimitry Andric using namespace lld::elf; 440b57cec5SDimitry Andric 45*0fca6ea1SDimitry Andric // This function is explicitly instantiated in ARM.cpp, don't do it here to 46*0fca6ea1SDimitry Andric // avoid warnings with MSVC. 475f757f3fSDimitry Andric extern template void ObjFile<ELF32LE>::importCmseSymbols(); 485f757f3fSDimitry Andric extern template void ObjFile<ELF32BE>::importCmseSymbols(); 495f757f3fSDimitry Andric extern template void ObjFile<ELF64LE>::importCmseSymbols(); 505f757f3fSDimitry Andric extern template void ObjFile<ELF64BE>::importCmseSymbols(); 515f757f3fSDimitry Andric 525ffd83dbSDimitry Andric bool InputFile::isInGroup; 535ffd83dbSDimitry Andric uint32_t InputFile::nextGroupId; 545ffd83dbSDimitry Andric 555ffd83dbSDimitry Andric std::unique_ptr<TarWriter> elf::tar; 565ffd83dbSDimitry Andric 5785868e8aSDimitry Andric // Returns "<internal>", "foo.a(bar.o)" or "baz.o". 585ffd83dbSDimitry Andric std::string lld::toString(const InputFile *f) { 59bdd1243dSDimitry Andric static std::mutex mu; 6085868e8aSDimitry Andric if (!f) 6185868e8aSDimitry Andric return "<internal>"; 620b57cec5SDimitry Andric 63bdd1243dSDimitry Andric { 64bdd1243dSDimitry Andric std::lock_guard<std::mutex> lock(mu); 6585868e8aSDimitry Andric if (f->toStringCache.empty()) { 6685868e8aSDimitry Andric if (f->archiveName.empty()) 670eae32dcSDimitry Andric f->toStringCache = f->getName(); 6885868e8aSDimitry Andric else 690eae32dcSDimitry Andric (f->archiveName + "(" + f->getName() + ")").toVector(f->toStringCache); 7085868e8aSDimitry Andric } 71bdd1243dSDimitry Andric } 720eae32dcSDimitry Andric return std::string(f->toStringCache); 7385868e8aSDimitry Andric } 7485868e8aSDimitry Andric 750b57cec5SDimitry Andric static ELFKind getELFKind(MemoryBufferRef mb, StringRef archiveName) { 760b57cec5SDimitry Andric unsigned char size; 770b57cec5SDimitry Andric unsigned char endian; 780b57cec5SDimitry Andric std::tie(size, endian) = getElfArchType(mb.getBuffer()); 790b57cec5SDimitry Andric 800b57cec5SDimitry Andric auto report = [&](StringRef msg) { 810b57cec5SDimitry Andric StringRef filename = mb.getBufferIdentifier(); 820b57cec5SDimitry Andric if (archiveName.empty()) 830b57cec5SDimitry Andric fatal(filename + ": " + msg); 840b57cec5SDimitry Andric else 850b57cec5SDimitry Andric fatal(archiveName + "(" + filename + "): " + msg); 860b57cec5SDimitry Andric }; 870b57cec5SDimitry Andric 8806c3fb27SDimitry Andric if (!mb.getBuffer().starts_with(ElfMagic)) 890b57cec5SDimitry Andric report("not an ELF file"); 900b57cec5SDimitry Andric if (endian != ELFDATA2LSB && endian != ELFDATA2MSB) 910b57cec5SDimitry Andric report("corrupted ELF file: invalid data encoding"); 920b57cec5SDimitry Andric if (size != ELFCLASS32 && size != ELFCLASS64) 930b57cec5SDimitry Andric report("corrupted ELF file: invalid file class"); 940b57cec5SDimitry Andric 950b57cec5SDimitry Andric size_t bufSize = mb.getBuffer().size(); 960b57cec5SDimitry Andric if ((size == ELFCLASS32 && bufSize < sizeof(Elf32_Ehdr)) || 970b57cec5SDimitry Andric (size == ELFCLASS64 && bufSize < sizeof(Elf64_Ehdr))) 980b57cec5SDimitry Andric report("corrupted ELF file: file is too short"); 990b57cec5SDimitry Andric 1000b57cec5SDimitry Andric if (size == ELFCLASS32) 1010b57cec5SDimitry Andric return (endian == ELFDATA2LSB) ? ELF32LEKind : ELF32BEKind; 1020b57cec5SDimitry Andric return (endian == ELFDATA2LSB) ? ELF64LEKind : ELF64BEKind; 1030b57cec5SDimitry Andric } 1040b57cec5SDimitry Andric 105bdd1243dSDimitry Andric // For ARM only, to set the EF_ARM_ABI_FLOAT_SOFT or EF_ARM_ABI_FLOAT_HARD 106bdd1243dSDimitry Andric // flag in the ELF Header we need to look at Tag_ABI_VFP_args to find out how 107bdd1243dSDimitry Andric // the input objects have been compiled. 108bdd1243dSDimitry Andric static void updateARMVFPArgs(const ARMAttributeParser &attributes, 109bdd1243dSDimitry Andric const InputFile *f) { 110bdd1243dSDimitry Andric std::optional<unsigned> attr = 111bdd1243dSDimitry Andric attributes.getAttributeValue(ARMBuildAttrs::ABI_VFP_args); 112bdd1243dSDimitry Andric if (!attr) 113bdd1243dSDimitry Andric // If an ABI tag isn't present then it is implicitly given the value of 0 114bdd1243dSDimitry Andric // which maps to ARMBuildAttrs::BaseAAPCS. However many assembler files, 115bdd1243dSDimitry Andric // including some in glibc that don't use FP args (and should have value 3) 116bdd1243dSDimitry Andric // don't have the attribute so we do not consider an implicit value of 0 117bdd1243dSDimitry Andric // as a clash. 118bdd1243dSDimitry Andric return; 119bdd1243dSDimitry Andric 120bdd1243dSDimitry Andric unsigned vfpArgs = *attr; 121bdd1243dSDimitry Andric ARMVFPArgKind arg; 122bdd1243dSDimitry Andric switch (vfpArgs) { 123bdd1243dSDimitry Andric case ARMBuildAttrs::BaseAAPCS: 124bdd1243dSDimitry Andric arg = ARMVFPArgKind::Base; 125bdd1243dSDimitry Andric break; 126bdd1243dSDimitry Andric case ARMBuildAttrs::HardFPAAPCS: 127bdd1243dSDimitry Andric arg = ARMVFPArgKind::VFP; 128bdd1243dSDimitry Andric break; 129bdd1243dSDimitry Andric case ARMBuildAttrs::ToolChainFPPCS: 130bdd1243dSDimitry Andric // Tool chain specific convention that conforms to neither AAPCS variant. 131bdd1243dSDimitry Andric arg = ARMVFPArgKind::ToolChain; 132bdd1243dSDimitry Andric break; 133bdd1243dSDimitry Andric case ARMBuildAttrs::CompatibleFPAAPCS: 134bdd1243dSDimitry Andric // Object compatible with all conventions. 135bdd1243dSDimitry Andric return; 136bdd1243dSDimitry Andric default: 137bdd1243dSDimitry Andric error(toString(f) + ": unknown Tag_ABI_VFP_args value: " + Twine(vfpArgs)); 138bdd1243dSDimitry Andric return; 139bdd1243dSDimitry Andric } 140bdd1243dSDimitry Andric // Follow ld.bfd and error if there is a mix of calling conventions. 141bdd1243dSDimitry Andric if (config->armVFPArgs != arg && config->armVFPArgs != ARMVFPArgKind::Default) 142bdd1243dSDimitry Andric error(toString(f) + ": incompatible Tag_ABI_VFP_args"); 143bdd1243dSDimitry Andric else 144bdd1243dSDimitry Andric config->armVFPArgs = arg; 145bdd1243dSDimitry Andric } 146bdd1243dSDimitry Andric 147bdd1243dSDimitry Andric // The ARM support in lld makes some use of instructions that are not available 148bdd1243dSDimitry Andric // on all ARM architectures. Namely: 149bdd1243dSDimitry Andric // - Use of BLX instruction for interworking between ARM and Thumb state. 150bdd1243dSDimitry Andric // - Use of the extended Thumb branch encoding in relocation. 151bdd1243dSDimitry Andric // - Use of the MOVT/MOVW instructions in Thumb Thunks. 152bdd1243dSDimitry Andric // The ARM Attributes section contains information about the architecture chosen 153bdd1243dSDimitry Andric // at compile time. We follow the convention that if at least one input object 154bdd1243dSDimitry Andric // is compiled with an architecture that supports these features then lld is 155bdd1243dSDimitry Andric // permitted to use them. 156bdd1243dSDimitry Andric static void updateSupportedARMFeatures(const ARMAttributeParser &attributes) { 157bdd1243dSDimitry Andric std::optional<unsigned> attr = 158bdd1243dSDimitry Andric attributes.getAttributeValue(ARMBuildAttrs::CPU_arch); 159bdd1243dSDimitry Andric if (!attr) 160bdd1243dSDimitry Andric return; 161bdd1243dSDimitry Andric auto arch = *attr; 162bdd1243dSDimitry Andric switch (arch) { 163bdd1243dSDimitry Andric case ARMBuildAttrs::Pre_v4: 164bdd1243dSDimitry Andric case ARMBuildAttrs::v4: 165bdd1243dSDimitry Andric case ARMBuildAttrs::v4T: 166bdd1243dSDimitry Andric // Architectures prior to v5 do not support BLX instruction 167bdd1243dSDimitry Andric break; 168bdd1243dSDimitry Andric case ARMBuildAttrs::v5T: 169bdd1243dSDimitry Andric case ARMBuildAttrs::v5TE: 170bdd1243dSDimitry Andric case ARMBuildAttrs::v5TEJ: 171bdd1243dSDimitry Andric case ARMBuildAttrs::v6: 172bdd1243dSDimitry Andric case ARMBuildAttrs::v6KZ: 173bdd1243dSDimitry Andric case ARMBuildAttrs::v6K: 174bdd1243dSDimitry Andric config->armHasBlx = true; 175bdd1243dSDimitry Andric // Architectures used in pre-Cortex processors do not support 176bdd1243dSDimitry Andric // The J1 = 1 J2 = 1 Thumb branch range extension, with the exception 177bdd1243dSDimitry Andric // of Architecture v6T2 (arm1156t2-s and arm1156t2f-s) that do. 178bdd1243dSDimitry Andric break; 179bdd1243dSDimitry Andric default: 180bdd1243dSDimitry Andric // All other Architectures have BLX and extended branch encoding 181bdd1243dSDimitry Andric config->armHasBlx = true; 182bdd1243dSDimitry Andric config->armJ1J2BranchEncoding = true; 183bdd1243dSDimitry Andric if (arch != ARMBuildAttrs::v6_M && arch != ARMBuildAttrs::v6S_M) 184bdd1243dSDimitry Andric // All Architectures used in Cortex processors with the exception 185bdd1243dSDimitry Andric // of v6-M and v6S-M have the MOVT and MOVW instructions. 186bdd1243dSDimitry Andric config->armHasMovtMovw = true; 187bdd1243dSDimitry Andric break; 188bdd1243dSDimitry Andric } 18906c3fb27SDimitry Andric 19006c3fb27SDimitry Andric // Only ARMv8-M or later architectures have CMSE support. 19106c3fb27SDimitry Andric std::optional<unsigned> profile = 19206c3fb27SDimitry Andric attributes.getAttributeValue(ARMBuildAttrs::CPU_arch_profile); 19306c3fb27SDimitry Andric if (!profile) 19406c3fb27SDimitry Andric return; 19506c3fb27SDimitry Andric if (arch >= ARMBuildAttrs::CPUArch::v8_M_Base && 19606c3fb27SDimitry Andric profile == ARMBuildAttrs::MicroControllerProfile) 19706c3fb27SDimitry Andric config->armCMSESupport = true; 198*0fca6ea1SDimitry Andric 199*0fca6ea1SDimitry Andric // The thumb PLT entries require Thumb2 which can be used on multiple archs. 200*0fca6ea1SDimitry Andric // For now, let's limit it to ones where ARM isn't available and we know have 201*0fca6ea1SDimitry Andric // Thumb2. 202*0fca6ea1SDimitry Andric std::optional<unsigned> armISA = 203*0fca6ea1SDimitry Andric attributes.getAttributeValue(ARMBuildAttrs::ARM_ISA_use); 204*0fca6ea1SDimitry Andric std::optional<unsigned> thumb = 205*0fca6ea1SDimitry Andric attributes.getAttributeValue(ARMBuildAttrs::THUMB_ISA_use); 206*0fca6ea1SDimitry Andric bool noArmISA = !armISA || *armISA == ARMBuildAttrs::Not_Allowed; 207*0fca6ea1SDimitry Andric bool hasThumb2 = thumb && *thumb >= ARMBuildAttrs::AllowThumb32; 208*0fca6ea1SDimitry Andric if (noArmISA && hasThumb2) 209*0fca6ea1SDimitry Andric config->armThumbPLTs = true; 210bdd1243dSDimitry Andric } 211bdd1243dSDimitry Andric 2120b57cec5SDimitry Andric InputFile::InputFile(Kind k, MemoryBufferRef m) 2130b57cec5SDimitry Andric : mb(m), groupId(nextGroupId), fileKind(k) { 2140b57cec5SDimitry Andric // All files within the same --{start,end}-group get the same group ID. 2150b57cec5SDimitry Andric // Otherwise, a new file will get a new group ID. 2160b57cec5SDimitry Andric if (!isInGroup) 2170b57cec5SDimitry Andric ++nextGroupId; 2180b57cec5SDimitry Andric } 2190b57cec5SDimitry Andric 220bdd1243dSDimitry Andric std::optional<MemoryBufferRef> elf::readFile(StringRef path) { 221e8d8bef9SDimitry Andric llvm::TimeTraceScope timeScope("Load input files", path); 222e8d8bef9SDimitry Andric 2230b57cec5SDimitry Andric // The --chroot option changes our virtual root directory. 2240b57cec5SDimitry Andric // This is useful when you are dealing with files created by --reproduce. 22506c3fb27SDimitry Andric if (!config->chroot.empty() && path.starts_with("/")) 22604eeddc0SDimitry Andric path = saver().save(config->chroot + path); 2270b57cec5SDimitry Andric 22806c3fb27SDimitry Andric bool remapped = false; 22906c3fb27SDimitry Andric auto it = config->remapInputs.find(path); 23006c3fb27SDimitry Andric if (it != config->remapInputs.end()) { 23106c3fb27SDimitry Andric path = it->second; 23206c3fb27SDimitry Andric remapped = true; 23306c3fb27SDimitry Andric } else { 23406c3fb27SDimitry Andric for (const auto &[pat, toFile] : config->remapInputsWildcards) { 23506c3fb27SDimitry Andric if (pat.match(path)) { 23606c3fb27SDimitry Andric path = toFile; 23706c3fb27SDimitry Andric remapped = true; 23806c3fb27SDimitry Andric break; 23906c3fb27SDimitry Andric } 24006c3fb27SDimitry Andric } 24106c3fb27SDimitry Andric } 24206c3fb27SDimitry Andric if (remapped) { 24306c3fb27SDimitry Andric // Use /dev/null to indicate an input file that should be ignored. Change 24406c3fb27SDimitry Andric // the path to NUL on Windows. 24506c3fb27SDimitry Andric #ifdef _WIN32 24606c3fb27SDimitry Andric if (path == "/dev/null") 24706c3fb27SDimitry Andric path = "NUL"; 24806c3fb27SDimitry Andric #endif 24906c3fb27SDimitry Andric } 25006c3fb27SDimitry Andric 2510b57cec5SDimitry Andric log(path); 252e8d8bef9SDimitry Andric config->dependencyFiles.insert(llvm::CachedHashString(path)); 2530b57cec5SDimitry Andric 254fe6060f1SDimitry Andric auto mbOrErr = MemoryBuffer::getFile(path, /*IsText=*/false, 255fe6060f1SDimitry Andric /*RequiresNullTerminator=*/false); 2560b57cec5SDimitry Andric if (auto ec = mbOrErr.getError()) { 2570b57cec5SDimitry Andric error("cannot open " + path + ": " + ec.message()); 258bdd1243dSDimitry Andric return std::nullopt; 2590b57cec5SDimitry Andric } 2600b57cec5SDimitry Andric 26104eeddc0SDimitry Andric MemoryBufferRef mbref = (*mbOrErr)->getMemBufferRef(); 262bdd1243dSDimitry Andric ctx.memoryBuffers.push_back(std::move(*mbOrErr)); // take MB ownership 2630b57cec5SDimitry Andric 2640b57cec5SDimitry Andric if (tar) 2650b57cec5SDimitry Andric tar->append(relativeToRoot(path), mbref.getBuffer()); 2660b57cec5SDimitry Andric return mbref; 2670b57cec5SDimitry Andric } 2680b57cec5SDimitry Andric 2690b57cec5SDimitry Andric // All input object files must be for the same architecture 2700b57cec5SDimitry Andric // (e.g. it does not make sense to link x86 object files with 2710b57cec5SDimitry Andric // MIPS object files.) This function checks for that error. 2720b57cec5SDimitry Andric static bool isCompatible(InputFile *file) { 2730b57cec5SDimitry Andric if (!file->isElf() && !isa<BitcodeFile>(file)) 2740b57cec5SDimitry Andric return true; 2750b57cec5SDimitry Andric 2760b57cec5SDimitry Andric if (file->ekind == config->ekind && file->emachine == config->emachine) { 2770b57cec5SDimitry Andric if (config->emachine != EM_MIPS) 2780b57cec5SDimitry Andric return true; 2790b57cec5SDimitry Andric if (isMipsN32Abi(file) == config->mipsN32Abi) 2800b57cec5SDimitry Andric return true; 2810b57cec5SDimitry Andric } 2820b57cec5SDimitry Andric 2835ffd83dbSDimitry Andric StringRef target = 2845ffd83dbSDimitry Andric !config->bfdname.empty() ? config->bfdname : config->emulation; 2855ffd83dbSDimitry Andric if (!target.empty()) { 2865ffd83dbSDimitry Andric error(toString(file) + " is incompatible with " + target); 28785868e8aSDimitry Andric return false; 28885868e8aSDimitry Andric } 28985868e8aSDimitry Andric 290d56accc7SDimitry Andric InputFile *existing = nullptr; 291bdd1243dSDimitry Andric if (!ctx.objectFiles.empty()) 292bdd1243dSDimitry Andric existing = ctx.objectFiles[0]; 293bdd1243dSDimitry Andric else if (!ctx.sharedFiles.empty()) 294bdd1243dSDimitry Andric existing = ctx.sharedFiles[0]; 295bdd1243dSDimitry Andric else if (!ctx.bitcodeFiles.empty()) 296bdd1243dSDimitry Andric existing = ctx.bitcodeFiles[0]; 297d56accc7SDimitry Andric std::string with; 298d56accc7SDimitry Andric if (existing) 299d56accc7SDimitry Andric with = " with " + toString(existing); 300d56accc7SDimitry Andric error(toString(file) + " is incompatible" + with); 3010b57cec5SDimitry Andric return false; 3020b57cec5SDimitry Andric } 3030b57cec5SDimitry Andric 3040b57cec5SDimitry Andric template <class ELFT> static void doParseFile(InputFile *file) { 3050b57cec5SDimitry Andric if (!isCompatible(file)) 3060b57cec5SDimitry Andric return; 3070b57cec5SDimitry Andric 3080b57cec5SDimitry Andric // Lazy object file 3090eae32dcSDimitry Andric if (file->lazy) { 3100eae32dcSDimitry Andric if (auto *f = dyn_cast<BitcodeFile>(file)) { 311bdd1243dSDimitry Andric ctx.lazyBitcodeFiles.push_back(f); 3120eae32dcSDimitry Andric f->parseLazy(); 3130eae32dcSDimitry Andric } else { 3140eae32dcSDimitry Andric cast<ObjFile<ELFT>>(file)->parseLazy(); 3150eae32dcSDimitry Andric } 3160b57cec5SDimitry Andric return; 3170b57cec5SDimitry Andric } 3180b57cec5SDimitry Andric 3190b57cec5SDimitry Andric if (config->trace) 3200b57cec5SDimitry Andric message(toString(file)); 3210b57cec5SDimitry Andric 3225f757f3fSDimitry Andric if (file->kind() == InputFile::ObjKind) { 323bdd1243dSDimitry Andric ctx.objectFiles.push_back(cast<ELFFileBase>(file)); 3240b57cec5SDimitry Andric cast<ObjFile<ELFT>>(file)->parse(); 3255f757f3fSDimitry Andric } else if (auto *f = dyn_cast<SharedFile>(file)) { 3265f757f3fSDimitry Andric f->parse<ELFT>(); 3275f757f3fSDimitry Andric } else if (auto *f = dyn_cast<BitcodeFile>(file)) { 3285f757f3fSDimitry Andric ctx.bitcodeFiles.push_back(f); 3295f757f3fSDimitry Andric f->parse(); 3305f757f3fSDimitry Andric } else { 3315f757f3fSDimitry Andric ctx.binaryFiles.push_back(cast<BinaryFile>(file)); 3325f757f3fSDimitry Andric cast<BinaryFile>(file)->parse(); 3335f757f3fSDimitry Andric } 3340b57cec5SDimitry Andric } 3350b57cec5SDimitry Andric 3360b57cec5SDimitry Andric // Add symbols in File to the symbol table. 3371fd87a68SDimitry Andric void elf::parseFile(InputFile *file) { invokeELFT(doParseFile, file); } 3380b57cec5SDimitry Andric 339*0fca6ea1SDimitry Andric // This function is explicitly instantiated in ARM.cpp. Mark it extern here, 3405f757f3fSDimitry Andric // to avoid warnings when building with MSVC. 3415f757f3fSDimitry Andric extern template void ObjFile<ELF32LE>::importCmseSymbols(); 3425f757f3fSDimitry Andric extern template void ObjFile<ELF32BE>::importCmseSymbols(); 3435f757f3fSDimitry Andric extern template void ObjFile<ELF64LE>::importCmseSymbols(); 3445f757f3fSDimitry Andric extern template void ObjFile<ELF64BE>::importCmseSymbols(); 3455f757f3fSDimitry Andric 346*0fca6ea1SDimitry Andric template <class ELFT> 347*0fca6ea1SDimitry Andric static void doParseFiles(const std::vector<InputFile *> &files, 348*0fca6ea1SDimitry Andric InputFile *armCmseImpLib) { 349*0fca6ea1SDimitry Andric // Add all files to the symbol table. This will add almost all symbols that we 350*0fca6ea1SDimitry Andric // need to the symbol table. This process might add files to the link due to 351*0fca6ea1SDimitry Andric // addDependentLibrary. 352*0fca6ea1SDimitry Andric for (size_t i = 0; i < files.size(); ++i) { 353*0fca6ea1SDimitry Andric llvm::TimeTraceScope timeScope("Parse input files", files[i]->getName()); 354*0fca6ea1SDimitry Andric doParseFile<ELFT>(files[i]); 355*0fca6ea1SDimitry Andric } 356*0fca6ea1SDimitry Andric if (armCmseImpLib) 357*0fca6ea1SDimitry Andric cast<ObjFile<ELFT>>(*armCmseImpLib).importCmseSymbols(); 35806c3fb27SDimitry Andric } 35906c3fb27SDimitry Andric 360*0fca6ea1SDimitry Andric void elf::parseFiles(const std::vector<InputFile *> &files, 361*0fca6ea1SDimitry Andric InputFile *armCmseImpLib) { 362*0fca6ea1SDimitry Andric llvm::TimeTraceScope timeScope("Parse input files"); 363*0fca6ea1SDimitry Andric invokeELFT(doParseFiles, files, armCmseImpLib); 36406c3fb27SDimitry Andric } 36506c3fb27SDimitry Andric 3660b57cec5SDimitry Andric // Concatenates arguments to construct a string representing an error location. 3670b57cec5SDimitry Andric static std::string createFileLineMsg(StringRef path, unsigned line) { 3685ffd83dbSDimitry Andric std::string filename = std::string(path::filename(path)); 3690b57cec5SDimitry Andric std::string lineno = ":" + std::to_string(line); 3700b57cec5SDimitry Andric if (filename == path) 3710b57cec5SDimitry Andric return filename + lineno; 3720b57cec5SDimitry Andric return filename + lineno + " (" + path.str() + lineno + ")"; 3730b57cec5SDimitry Andric } 3740b57cec5SDimitry Andric 3750b57cec5SDimitry Andric template <class ELFT> 3760b57cec5SDimitry Andric static std::string getSrcMsgAux(ObjFile<ELFT> &file, const Symbol &sym, 3775f757f3fSDimitry Andric const InputSectionBase &sec, uint64_t offset) { 3780b57cec5SDimitry Andric // In DWARF, functions and variables are stored to different places. 3790b57cec5SDimitry Andric // First, look up a function for a given offset. 380bdd1243dSDimitry Andric if (std::optional<DILineInfo> info = file.getDILineInfo(&sec, offset)) 3810b57cec5SDimitry Andric return createFileLineMsg(info->FileName, info->Line); 3820b57cec5SDimitry Andric 3830b57cec5SDimitry Andric // If it failed, look up again as a variable. 384bdd1243dSDimitry Andric if (std::optional<std::pair<std::string, unsigned>> fileLine = 3850b57cec5SDimitry Andric file.getVariableLoc(sym.getName())) 3860b57cec5SDimitry Andric return createFileLineMsg(fileLine->first, fileLine->second); 3870b57cec5SDimitry Andric 3880b57cec5SDimitry Andric // File.sourceFile contains STT_FILE symbol, and that is a last resort. 3895ffd83dbSDimitry Andric return std::string(file.sourceFile); 3900b57cec5SDimitry Andric } 3910b57cec5SDimitry Andric 3925f757f3fSDimitry Andric std::string InputFile::getSrcMsg(const Symbol &sym, const InputSectionBase &sec, 3930b57cec5SDimitry Andric uint64_t offset) { 3940b57cec5SDimitry Andric if (kind() != ObjKind) 3950b57cec5SDimitry Andric return ""; 396bdd1243dSDimitry Andric switch (ekind) { 3970b57cec5SDimitry Andric default: 3980b57cec5SDimitry Andric llvm_unreachable("Invalid kind"); 3990b57cec5SDimitry Andric case ELF32LEKind: 4000b57cec5SDimitry Andric return getSrcMsgAux(cast<ObjFile<ELF32LE>>(*this), sym, sec, offset); 4010b57cec5SDimitry Andric case ELF32BEKind: 4020b57cec5SDimitry Andric return getSrcMsgAux(cast<ObjFile<ELF32BE>>(*this), sym, sec, offset); 4030b57cec5SDimitry Andric case ELF64LEKind: 4040b57cec5SDimitry Andric return getSrcMsgAux(cast<ObjFile<ELF64LE>>(*this), sym, sec, offset); 4050b57cec5SDimitry Andric case ELF64BEKind: 4060b57cec5SDimitry Andric return getSrcMsgAux(cast<ObjFile<ELF64BE>>(*this), sym, sec, offset); 4070b57cec5SDimitry Andric } 4080b57cec5SDimitry Andric } 4090b57cec5SDimitry Andric 410e8d8bef9SDimitry Andric StringRef InputFile::getNameForScript() const { 411e8d8bef9SDimitry Andric if (archiveName.empty()) 412e8d8bef9SDimitry Andric return getName(); 413e8d8bef9SDimitry Andric 414e8d8bef9SDimitry Andric if (nameForScriptCache.empty()) 415e8d8bef9SDimitry Andric nameForScriptCache = (archiveName + Twine(':') + getName()).str(); 416e8d8bef9SDimitry Andric 417e8d8bef9SDimitry Andric return nameForScriptCache; 418e8d8bef9SDimitry Andric } 419e8d8bef9SDimitry Andric 420bdd1243dSDimitry Andric // An ELF object file may contain a `.deplibs` section. If it exists, the 421bdd1243dSDimitry Andric // section contains a list of library specifiers such as `m` for libm. This 422bdd1243dSDimitry Andric // function resolves a given name by finding the first matching library checking 423bdd1243dSDimitry Andric // the various ways that a library can be specified to LLD. This ELF extension 424bdd1243dSDimitry Andric // is a form of autolinking and is called `dependent libraries`. It is currently 425bdd1243dSDimitry Andric // unique to LLVM and lld. 426bdd1243dSDimitry Andric static void addDependentLibrary(StringRef specifier, const InputFile *f) { 427bdd1243dSDimitry Andric if (!config->dependentLibraries) 428bdd1243dSDimitry Andric return; 429bdd1243dSDimitry Andric if (std::optional<std::string> s = searchLibraryBaseName(specifier)) 430bdd1243dSDimitry Andric ctx.driver.addFile(saver().save(*s), /*withLOption=*/true); 431bdd1243dSDimitry Andric else if (std::optional<std::string> s = findFromSearchPaths(specifier)) 432bdd1243dSDimitry Andric ctx.driver.addFile(saver().save(*s), /*withLOption=*/true); 433bdd1243dSDimitry Andric else if (fs::exists(specifier)) 434bdd1243dSDimitry Andric ctx.driver.addFile(specifier, /*withLOption=*/false); 435bdd1243dSDimitry Andric else 436bdd1243dSDimitry Andric error(toString(f) + 437bdd1243dSDimitry Andric ": unable to find library from dependent library specifier: " + 438bdd1243dSDimitry Andric specifier); 439bdd1243dSDimitry Andric } 440bdd1243dSDimitry Andric 441bdd1243dSDimitry Andric // Record the membership of a section group so that in the garbage collection 442bdd1243dSDimitry Andric // pass, section group members are kept or discarded as a unit. 443bdd1243dSDimitry Andric template <class ELFT> 444bdd1243dSDimitry Andric static void handleSectionGroup(ArrayRef<InputSectionBase *> sections, 445bdd1243dSDimitry Andric ArrayRef<typename ELFT::Word> entries) { 446bdd1243dSDimitry Andric bool hasAlloc = false; 447bdd1243dSDimitry Andric for (uint32_t index : entries.slice(1)) { 448bdd1243dSDimitry Andric if (index >= sections.size()) 449bdd1243dSDimitry Andric return; 450bdd1243dSDimitry Andric if (InputSectionBase *s = sections[index]) 451bdd1243dSDimitry Andric if (s != &InputSection::discarded && s->flags & SHF_ALLOC) 452bdd1243dSDimitry Andric hasAlloc = true; 453bdd1243dSDimitry Andric } 454bdd1243dSDimitry Andric 455bdd1243dSDimitry Andric // If any member has the SHF_ALLOC flag, the whole group is subject to garbage 456bdd1243dSDimitry Andric // collection. See the comment in markLive(). This rule retains .debug_types 457bdd1243dSDimitry Andric // and .rela.debug_types. 458bdd1243dSDimitry Andric if (!hasAlloc) 459bdd1243dSDimitry Andric return; 460bdd1243dSDimitry Andric 461bdd1243dSDimitry Andric // Connect the members in a circular doubly-linked list via 462bdd1243dSDimitry Andric // nextInSectionGroup. 463bdd1243dSDimitry Andric InputSectionBase *head; 464bdd1243dSDimitry Andric InputSectionBase *prev = nullptr; 465bdd1243dSDimitry Andric for (uint32_t index : entries.slice(1)) { 466bdd1243dSDimitry Andric InputSectionBase *s = sections[index]; 467bdd1243dSDimitry Andric if (!s || s == &InputSection::discarded) 468bdd1243dSDimitry Andric continue; 469bdd1243dSDimitry Andric if (prev) 470bdd1243dSDimitry Andric prev->nextInSectionGroup = s; 471bdd1243dSDimitry Andric else 472bdd1243dSDimitry Andric head = s; 473bdd1243dSDimitry Andric prev = s; 474bdd1243dSDimitry Andric } 475bdd1243dSDimitry Andric if (prev) 476bdd1243dSDimitry Andric prev->nextInSectionGroup = head; 477bdd1243dSDimitry Andric } 478bdd1243dSDimitry Andric 4795ffd83dbSDimitry Andric template <class ELFT> DWARFCache *ObjFile<ELFT>::getDwarf() { 4805ffd83dbSDimitry Andric llvm::call_once(initDwarf, [this]() { 4815ffd83dbSDimitry Andric dwarf = std::make_unique<DWARFCache>(std::make_unique<DWARFContext>( 4825ffd83dbSDimitry Andric std::make_unique<LLDDwarfObj<ELFT>>(this), "", 4835ffd83dbSDimitry Andric [&](Error err) { warn(getName() + ": " + toString(std::move(err))); }, 4845ffd83dbSDimitry Andric [&](Error warning) { 4855ffd83dbSDimitry Andric warn(getName() + ": " + toString(std::move(warning))); 4865ffd83dbSDimitry Andric })); 4875ffd83dbSDimitry Andric }); 4885ffd83dbSDimitry Andric 4895ffd83dbSDimitry Andric return dwarf.get(); 4900b57cec5SDimitry Andric } 4910b57cec5SDimitry Andric 4920b57cec5SDimitry Andric // Returns the pair of file name and line number describing location of data 4930b57cec5SDimitry Andric // object (variable, array, etc) definition. 4940b57cec5SDimitry Andric template <class ELFT> 495bdd1243dSDimitry Andric std::optional<std::pair<std::string, unsigned>> 4960b57cec5SDimitry Andric ObjFile<ELFT>::getVariableLoc(StringRef name) { 4975ffd83dbSDimitry Andric return getDwarf()->getVariableLoc(name); 4980b57cec5SDimitry Andric } 4990b57cec5SDimitry Andric 5000b57cec5SDimitry Andric // Returns source line information for a given offset 5010b57cec5SDimitry Andric // using DWARF debug info. 5020b57cec5SDimitry Andric template <class ELFT> 5035f757f3fSDimitry Andric std::optional<DILineInfo> 5045f757f3fSDimitry Andric ObjFile<ELFT>::getDILineInfo(const InputSectionBase *s, uint64_t offset) { 5050b57cec5SDimitry Andric // Detect SectionIndex for specified section. 5060b57cec5SDimitry Andric uint64_t sectionIndex = object::SectionedAddress::UndefSection; 5070b57cec5SDimitry Andric ArrayRef<InputSectionBase *> sections = s->file->getSections(); 5080b57cec5SDimitry Andric for (uint64_t curIndex = 0; curIndex < sections.size(); ++curIndex) { 5090b57cec5SDimitry Andric if (s == sections[curIndex]) { 5100b57cec5SDimitry Andric sectionIndex = curIndex; 5110b57cec5SDimitry Andric break; 5120b57cec5SDimitry Andric } 5130b57cec5SDimitry Andric } 5140b57cec5SDimitry Andric 5155ffd83dbSDimitry Andric return getDwarf()->getDILineInfo(offset, sectionIndex); 5160b57cec5SDimitry Andric } 5170b57cec5SDimitry Andric 518bdd1243dSDimitry Andric ELFFileBase::ELFFileBase(Kind k, ELFKind ekind, MemoryBufferRef mb) 519bdd1243dSDimitry Andric : InputFile(k, mb) { 520bdd1243dSDimitry Andric this->ekind = ekind; 5210b57cec5SDimitry Andric } 5220b57cec5SDimitry Andric 5230b57cec5SDimitry Andric template <typename Elf_Shdr> 5240b57cec5SDimitry Andric static const Elf_Shdr *findSection(ArrayRef<Elf_Shdr> sections, uint32_t type) { 5250b57cec5SDimitry Andric for (const Elf_Shdr &sec : sections) 5260b57cec5SDimitry Andric if (sec.sh_type == type) 5270b57cec5SDimitry Andric return &sec; 5280b57cec5SDimitry Andric return nullptr; 5290b57cec5SDimitry Andric } 5300b57cec5SDimitry Andric 531bdd1243dSDimitry Andric void ELFFileBase::init() { 532bdd1243dSDimitry Andric switch (ekind) { 533bdd1243dSDimitry Andric case ELF32LEKind: 534bdd1243dSDimitry Andric init<ELF32LE>(fileKind); 535bdd1243dSDimitry Andric break; 536bdd1243dSDimitry Andric case ELF32BEKind: 537bdd1243dSDimitry Andric init<ELF32BE>(fileKind); 538bdd1243dSDimitry Andric break; 539bdd1243dSDimitry Andric case ELF64LEKind: 540bdd1243dSDimitry Andric init<ELF64LE>(fileKind); 541bdd1243dSDimitry Andric break; 542bdd1243dSDimitry Andric case ELF64BEKind: 543bdd1243dSDimitry Andric init<ELF64BE>(fileKind); 544bdd1243dSDimitry Andric break; 545bdd1243dSDimitry Andric default: 546bdd1243dSDimitry Andric llvm_unreachable("getELFKind"); 547bdd1243dSDimitry Andric } 548bdd1243dSDimitry Andric } 549bdd1243dSDimitry Andric 550bdd1243dSDimitry Andric template <class ELFT> void ELFFileBase::init(InputFile::Kind k) { 5510b57cec5SDimitry Andric using Elf_Shdr = typename ELFT::Shdr; 5520b57cec5SDimitry Andric using Elf_Sym = typename ELFT::Sym; 5530b57cec5SDimitry Andric 5540b57cec5SDimitry Andric // Initialize trivial attributes. 5550b57cec5SDimitry Andric const ELFFile<ELFT> &obj = getObj<ELFT>(); 556e8d8bef9SDimitry Andric emachine = obj.getHeader().e_machine; 557e8d8bef9SDimitry Andric osabi = obj.getHeader().e_ident[llvm::ELF::EI_OSABI]; 558e8d8bef9SDimitry Andric abiVersion = obj.getHeader().e_ident[llvm::ELF::EI_ABIVERSION]; 5590b57cec5SDimitry Andric 5600b57cec5SDimitry Andric ArrayRef<Elf_Shdr> sections = CHECK(obj.sections(), this); 5610eae32dcSDimitry Andric elfShdrs = sections.data(); 5620eae32dcSDimitry Andric numELFShdrs = sections.size(); 5630b57cec5SDimitry Andric 5640b57cec5SDimitry Andric // Find a symbol table. 5650b57cec5SDimitry Andric const Elf_Shdr *symtabSec = 566bdd1243dSDimitry Andric findSection(sections, k == SharedKind ? SHT_DYNSYM : SHT_SYMTAB); 5670b57cec5SDimitry Andric 5680b57cec5SDimitry Andric if (!symtabSec) 5690b57cec5SDimitry Andric return; 5700b57cec5SDimitry Andric 5710b57cec5SDimitry Andric // Initialize members corresponding to a symbol table. 5720b57cec5SDimitry Andric firstGlobal = symtabSec->sh_info; 5730b57cec5SDimitry Andric 5740b57cec5SDimitry Andric ArrayRef<Elf_Sym> eSyms = CHECK(obj.symbols(symtabSec), this); 5750b57cec5SDimitry Andric if (firstGlobal == 0 || firstGlobal > eSyms.size()) 5760b57cec5SDimitry Andric fatal(toString(this) + ": invalid sh_info in symbol table"); 5770b57cec5SDimitry Andric 5780b57cec5SDimitry Andric elfSyms = reinterpret_cast<const void *>(eSyms.data()); 5790eae32dcSDimitry Andric numELFSyms = uint32_t(eSyms.size()); 5800b57cec5SDimitry Andric stringTable = CHECK(obj.getStringTableForSymtab(*symtabSec, sections), this); 5810b57cec5SDimitry Andric } 5820b57cec5SDimitry Andric 5830b57cec5SDimitry Andric template <class ELFT> 5840b57cec5SDimitry Andric uint32_t ObjFile<ELFT>::getSectionIndex(const Elf_Sym &sym) const { 5850b57cec5SDimitry Andric return CHECK( 586e8d8bef9SDimitry Andric this->getObj().getSectionIndex(sym, getELFSyms<ELFT>(), shndxTable), 5870b57cec5SDimitry Andric this); 5880b57cec5SDimitry Andric } 5890b57cec5SDimitry Andric 5900b57cec5SDimitry Andric template <class ELFT> void ObjFile<ELFT>::parse(bool ignoreComdats) { 5911fd87a68SDimitry Andric object::ELFFile<ELFT> obj = this->getObj(); 5920b57cec5SDimitry Andric // Read a section table. justSymbols is usually false. 593bdd1243dSDimitry Andric if (this->justSymbols) { 5940b57cec5SDimitry Andric initializeJustSymbols(); 595bdd1243dSDimitry Andric initializeSymbols(obj); 596bdd1243dSDimitry Andric return; 597bdd1243dSDimitry Andric } 598bdd1243dSDimitry Andric 599bdd1243dSDimitry Andric // Handle dependent libraries and selection of section groups as these are not 600bdd1243dSDimitry Andric // done in parallel. 601bdd1243dSDimitry Andric ArrayRef<Elf_Shdr> objSections = getELFShdrs<ELFT>(); 602bdd1243dSDimitry Andric StringRef shstrtab = CHECK(obj.getSectionStringTable(objSections), this); 603bdd1243dSDimitry Andric uint64_t size = objSections.size(); 604bdd1243dSDimitry Andric sections.resize(size); 605bdd1243dSDimitry Andric for (size_t i = 0; i != size; ++i) { 606bdd1243dSDimitry Andric const Elf_Shdr &sec = objSections[i]; 607bdd1243dSDimitry Andric if (sec.sh_type == SHT_LLVM_DEPENDENT_LIBRARIES && !config->relocatable) { 608bdd1243dSDimitry Andric StringRef name = check(obj.getSectionName(sec, shstrtab)); 609bdd1243dSDimitry Andric ArrayRef<char> data = CHECK( 610bdd1243dSDimitry Andric this->getObj().template getSectionContentsAsArray<char>(sec), this); 611bdd1243dSDimitry Andric if (!data.empty() && data.back() != '\0') { 612bdd1243dSDimitry Andric error( 613bdd1243dSDimitry Andric toString(this) + 614bdd1243dSDimitry Andric ": corrupted dependent libraries section (unterminated string): " + 615bdd1243dSDimitry Andric name); 616bdd1243dSDimitry Andric } else { 617bdd1243dSDimitry Andric for (const char *d = data.begin(), *e = data.end(); d < e;) { 618bdd1243dSDimitry Andric StringRef s(d); 619bdd1243dSDimitry Andric addDependentLibrary(s, this); 620bdd1243dSDimitry Andric d += s.size() + 1; 621bdd1243dSDimitry Andric } 622bdd1243dSDimitry Andric } 623bdd1243dSDimitry Andric this->sections[i] = &InputSection::discarded; 624bdd1243dSDimitry Andric continue; 625bdd1243dSDimitry Andric } 626bdd1243dSDimitry Andric 627bdd1243dSDimitry Andric if (sec.sh_type == SHT_ARM_ATTRIBUTES && config->emachine == EM_ARM) { 628bdd1243dSDimitry Andric ARMAttributeParser attributes; 629bdd1243dSDimitry Andric ArrayRef<uint8_t> contents = 630bdd1243dSDimitry Andric check(this->getObj().getSectionContents(sec)); 631bdd1243dSDimitry Andric StringRef name = check(obj.getSectionName(sec, shstrtab)); 632bdd1243dSDimitry Andric this->sections[i] = &InputSection::discarded; 6335f757f3fSDimitry Andric if (Error e = attributes.parse(contents, ekind == ELF32LEKind 6345f757f3fSDimitry Andric ? llvm::endianness::little 6355f757f3fSDimitry Andric : llvm::endianness::big)) { 636bdd1243dSDimitry Andric InputSection isec(*this, sec, name); 637bdd1243dSDimitry Andric warn(toString(&isec) + ": " + llvm::toString(std::move(e))); 638bdd1243dSDimitry Andric } else { 639bdd1243dSDimitry Andric updateSupportedARMFeatures(attributes); 640bdd1243dSDimitry Andric updateARMVFPArgs(attributes, this); 641bdd1243dSDimitry Andric 642bdd1243dSDimitry Andric // FIXME: Retain the first attribute section we see. The eglibc ARM 643bdd1243dSDimitry Andric // dynamic loaders require the presence of an attribute section for 644bdd1243dSDimitry Andric // dlopen to work. In a full implementation we would merge all attribute 645bdd1243dSDimitry Andric // sections. 646bdd1243dSDimitry Andric if (in.attributes == nullptr) { 647bdd1243dSDimitry Andric in.attributes = std::make_unique<InputSection>(*this, sec, name); 648bdd1243dSDimitry Andric this->sections[i] = in.attributes.get(); 649bdd1243dSDimitry Andric } 650bdd1243dSDimitry Andric } 651bdd1243dSDimitry Andric } 652bdd1243dSDimitry Andric 6535f757f3fSDimitry Andric // Producing a static binary with MTE globals is not currently supported, 6545f757f3fSDimitry Andric // remove all SHT_AARCH64_MEMTAG_GLOBALS_STATIC sections as they're unused 6555f757f3fSDimitry Andric // medatada, and we don't want them to end up in the output file for static 6565f757f3fSDimitry Andric // executables. 6575f757f3fSDimitry Andric if (sec.sh_type == SHT_AARCH64_MEMTAG_GLOBALS_STATIC && 6585f757f3fSDimitry Andric !canHaveMemtagGlobals()) { 6595f757f3fSDimitry Andric this->sections[i] = &InputSection::discarded; 6605f757f3fSDimitry Andric continue; 6615f757f3fSDimitry Andric } 6625f757f3fSDimitry Andric 663bdd1243dSDimitry Andric if (sec.sh_type != SHT_GROUP) 664bdd1243dSDimitry Andric continue; 665bdd1243dSDimitry Andric StringRef signature = getShtGroupSignature(objSections, sec); 666bdd1243dSDimitry Andric ArrayRef<Elf_Word> entries = 667bdd1243dSDimitry Andric CHECK(obj.template getSectionContentsAsArray<Elf_Word>(sec), this); 668bdd1243dSDimitry Andric if (entries.empty()) 669bdd1243dSDimitry Andric fatal(toString(this) + ": empty SHT_GROUP"); 670bdd1243dSDimitry Andric 671bdd1243dSDimitry Andric Elf_Word flag = entries[0]; 672bdd1243dSDimitry Andric if (flag && flag != GRP_COMDAT) 673bdd1243dSDimitry Andric fatal(toString(this) + ": unsupported SHT_GROUP format"); 674bdd1243dSDimitry Andric 675bdd1243dSDimitry Andric bool keepGroup = 676bdd1243dSDimitry Andric (flag & GRP_COMDAT) == 0 || ignoreComdats || 677bdd1243dSDimitry Andric symtab.comdatGroups.try_emplace(CachedHashStringRef(signature), this) 678bdd1243dSDimitry Andric .second; 679bdd1243dSDimitry Andric if (keepGroup) { 680*0fca6ea1SDimitry Andric if (!config->resolveGroups) 681bdd1243dSDimitry Andric this->sections[i] = createInputSection( 682bdd1243dSDimitry Andric i, sec, check(obj.getSectionName(sec, shstrtab))); 683bdd1243dSDimitry Andric continue; 684bdd1243dSDimitry Andric } 685bdd1243dSDimitry Andric 686bdd1243dSDimitry Andric // Otherwise, discard group members. 687bdd1243dSDimitry Andric for (uint32_t secIndex : entries.slice(1)) { 688bdd1243dSDimitry Andric if (secIndex >= size) 689bdd1243dSDimitry Andric fatal(toString(this) + 690bdd1243dSDimitry Andric ": invalid section index in group: " + Twine(secIndex)); 691bdd1243dSDimitry Andric this->sections[secIndex] = &InputSection::discarded; 692bdd1243dSDimitry Andric } 693bdd1243dSDimitry Andric } 6940b57cec5SDimitry Andric 6950b57cec5SDimitry Andric // Read a symbol table. 6961fd87a68SDimitry Andric initializeSymbols(obj); 6970b57cec5SDimitry Andric } 6980b57cec5SDimitry Andric 6990b57cec5SDimitry Andric // Sections with SHT_GROUP and comdat bits define comdat section groups. 7000b57cec5SDimitry Andric // They are identified and deduplicated by group name. This function 7010b57cec5SDimitry Andric // returns a group name. 7020b57cec5SDimitry Andric template <class ELFT> 7030b57cec5SDimitry Andric StringRef ObjFile<ELFT>::getShtGroupSignature(ArrayRef<Elf_Shdr> sections, 7040b57cec5SDimitry Andric const Elf_Shdr &sec) { 7050b57cec5SDimitry Andric typename ELFT::SymRange symbols = this->getELFSyms<ELFT>(); 7060b57cec5SDimitry Andric if (sec.sh_info >= symbols.size()) 7070b57cec5SDimitry Andric fatal(toString(this) + ": invalid symbol index"); 7080b57cec5SDimitry Andric const typename ELFT::Sym &sym = symbols[sec.sh_info]; 709349cc55cSDimitry Andric return CHECK(sym.getName(this->stringTable), this); 7100b57cec5SDimitry Andric } 7110b57cec5SDimitry Andric 71285868e8aSDimitry Andric template <class ELFT> 71385868e8aSDimitry Andric bool ObjFile<ELFT>::shouldMerge(const Elf_Shdr &sec, StringRef name) { 7140b57cec5SDimitry Andric // On a regular link we don't merge sections if -O0 (default is -O1). This 7150b57cec5SDimitry Andric // sometimes makes the linker significantly faster, although the output will 7160b57cec5SDimitry Andric // be bigger. 7170b57cec5SDimitry Andric // 7180b57cec5SDimitry Andric // Doing the same for -r would create a problem as it would combine sections 7190b57cec5SDimitry Andric // with different sh_entsize. One option would be to just copy every SHF_MERGE 7200b57cec5SDimitry Andric // section as is to the output. While this would produce a valid ELF file with 7210b57cec5SDimitry Andric // usable SHF_MERGE sections, tools like (llvm-)?dwarfdump get confused when 7220b57cec5SDimitry Andric // they see two .debug_str. We could have separate logic for combining 7230b57cec5SDimitry Andric // SHF_MERGE sections based both on their name and sh_entsize, but that seems 7240b57cec5SDimitry Andric // to be more trouble than it is worth. Instead, we just use the regular (-O1) 7250b57cec5SDimitry Andric // logic for -r. 7260b57cec5SDimitry Andric if (config->optimize == 0 && !config->relocatable) 7270b57cec5SDimitry Andric return false; 7280b57cec5SDimitry Andric 7290b57cec5SDimitry Andric // A mergeable section with size 0 is useless because they don't have 7300b57cec5SDimitry Andric // any data to merge. A mergeable string section with size 0 can be 7310b57cec5SDimitry Andric // argued as invalid because it doesn't end with a null character. 7320b57cec5SDimitry Andric // We'll avoid a mess by handling them as if they were non-mergeable. 7330b57cec5SDimitry Andric if (sec.sh_size == 0) 7340b57cec5SDimitry Andric return false; 7350b57cec5SDimitry Andric 7360b57cec5SDimitry Andric // Check for sh_entsize. The ELF spec is not clear about the zero 7370b57cec5SDimitry Andric // sh_entsize. It says that "the member [sh_entsize] contains 0 if 7380b57cec5SDimitry Andric // the section does not hold a table of fixed-size entries". We know 7390b57cec5SDimitry Andric // that Rust 1.13 produces a string mergeable section with a zero 7400b57cec5SDimitry Andric // sh_entsize. Here we just accept it rather than being picky about it. 7410b57cec5SDimitry Andric uint64_t entSize = sec.sh_entsize; 7420b57cec5SDimitry Andric if (entSize == 0) 7430b57cec5SDimitry Andric return false; 7440b57cec5SDimitry Andric if (sec.sh_size % entSize) 74585868e8aSDimitry Andric fatal(toString(this) + ":(" + name + "): SHF_MERGE section size (" + 74685868e8aSDimitry Andric Twine(sec.sh_size) + ") must be a multiple of sh_entsize (" + 74785868e8aSDimitry Andric Twine(entSize) + ")"); 7480b57cec5SDimitry Andric 7495ffd83dbSDimitry Andric if (sec.sh_flags & SHF_WRITE) 75085868e8aSDimitry Andric fatal(toString(this) + ":(" + name + 75185868e8aSDimitry Andric "): writable SHF_MERGE section is not supported"); 7520b57cec5SDimitry Andric 7530b57cec5SDimitry Andric return true; 7540b57cec5SDimitry Andric } 7550b57cec5SDimitry Andric 7560b57cec5SDimitry Andric // This is for --just-symbols. 7570b57cec5SDimitry Andric // 7580b57cec5SDimitry Andric // --just-symbols is a very minor feature that allows you to link your 7590b57cec5SDimitry Andric // output against other existing program, so that if you load both your 7600b57cec5SDimitry Andric // program and the other program into memory, your output can refer the 7610b57cec5SDimitry Andric // other program's symbols. 7620b57cec5SDimitry Andric // 7630b57cec5SDimitry Andric // When the option is given, we link "just symbols". The section table is 7640b57cec5SDimitry Andric // initialized with null pointers. 7650b57cec5SDimitry Andric template <class ELFT> void ObjFile<ELFT>::initializeJustSymbols() { 7660eae32dcSDimitry Andric sections.resize(numELFShdrs); 7670b57cec5SDimitry Andric } 7680b57cec5SDimitry Andric 769*0fca6ea1SDimitry Andric static bool isKnownSpecificSectionType(uint32_t t, uint32_t flags) { 770*0fca6ea1SDimitry Andric if (SHT_LOUSER <= t && t <= SHT_HIUSER && !(flags & SHF_ALLOC)) 771*0fca6ea1SDimitry Andric return true; 772*0fca6ea1SDimitry Andric if (SHT_LOOS <= t && t <= SHT_HIOS && !(flags & SHF_OS_NONCONFORMING)) 773*0fca6ea1SDimitry Andric return true; 774*0fca6ea1SDimitry Andric // Allow all processor-specific types. This is different from GNU ld. 775*0fca6ea1SDimitry Andric return SHT_LOPROC <= t && t <= SHT_HIPROC; 776*0fca6ea1SDimitry Andric } 777*0fca6ea1SDimitry Andric 7780b57cec5SDimitry Andric template <class ELFT> 7791fd87a68SDimitry Andric void ObjFile<ELFT>::initializeSections(bool ignoreComdats, 7801fd87a68SDimitry Andric const llvm::object::ELFFile<ELFT> &obj) { 7810eae32dcSDimitry Andric ArrayRef<Elf_Shdr> objSections = getELFShdrs<ELFT>(); 782349cc55cSDimitry Andric StringRef shstrtab = CHECK(obj.getSectionStringTable(objSections), this); 7830b57cec5SDimitry Andric uint64_t size = objSections.size(); 784bdd1243dSDimitry Andric SmallVector<ArrayRef<Elf_Word>, 0> selectedGroups; 78504eeddc0SDimitry Andric for (size_t i = 0; i != size; ++i) { 7860b57cec5SDimitry Andric if (this->sections[i] == &InputSection::discarded) 7870b57cec5SDimitry Andric continue; 7880b57cec5SDimitry Andric const Elf_Shdr &sec = objSections[i]; 789*0fca6ea1SDimitry Andric const uint32_t type = sec.sh_type; 7900b57cec5SDimitry Andric 7910b57cec5SDimitry Andric // SHF_EXCLUDE'ed sections are discarded by the linker. However, 7920b57cec5SDimitry Andric // if -r is given, we'll let the final link discard such sections. 7930b57cec5SDimitry Andric // This is compatible with GNU. 7940b57cec5SDimitry Andric if ((sec.sh_flags & SHF_EXCLUDE) && !config->relocatable) { 795*0fca6ea1SDimitry Andric if (type == SHT_LLVM_CALL_GRAPH_PROFILE) 7960eae32dcSDimitry Andric cgProfileSectionIndex = i; 797*0fca6ea1SDimitry Andric if (type == SHT_LLVM_ADDRSIG) { 7980b57cec5SDimitry Andric // We ignore the address-significance table if we know that the object 7990b57cec5SDimitry Andric // file was created by objcopy or ld -r. This is because these tools 8000b57cec5SDimitry Andric // will reorder the symbols in the symbol table, invalidating the data 8010b57cec5SDimitry Andric // in the address-significance table, which refers to symbols by index. 8020b57cec5SDimitry Andric if (sec.sh_link != 0) 8030b57cec5SDimitry Andric this->addrsigSec = &sec; 8040b57cec5SDimitry Andric else if (config->icf == ICFLevel::Safe) 805fe6060f1SDimitry Andric warn(toString(this) + 806fe6060f1SDimitry Andric ": --icf=safe conservatively ignores " 807fe6060f1SDimitry Andric "SHT_LLVM_ADDRSIG [index " + 808fe6060f1SDimitry Andric Twine(i) + 809fe6060f1SDimitry Andric "] with sh_link=0 " 810fe6060f1SDimitry Andric "(likely created using objcopy or ld -r)"); 8110b57cec5SDimitry Andric } 8120b57cec5SDimitry Andric this->sections[i] = &InputSection::discarded; 8130b57cec5SDimitry Andric continue; 8140b57cec5SDimitry Andric } 8150b57cec5SDimitry Andric 816*0fca6ea1SDimitry Andric switch (type) { 8170b57cec5SDimitry Andric case SHT_GROUP: { 818bdd1243dSDimitry Andric if (!config->relocatable) 819bdd1243dSDimitry Andric sections[i] = &InputSection::discarded; 820bdd1243dSDimitry Andric StringRef signature = 821bdd1243dSDimitry Andric cantFail(this->getELFSyms<ELFT>()[sec.sh_info].getName(stringTable)); 8220b57cec5SDimitry Andric ArrayRef<Elf_Word> entries = 823bdd1243dSDimitry Andric cantFail(obj.template getSectionContentsAsArray<Elf_Word>(sec)); 824bdd1243dSDimitry Andric if ((entries[0] & GRP_COMDAT) == 0 || ignoreComdats || 825bdd1243dSDimitry Andric symtab.comdatGroups.find(CachedHashStringRef(signature))->second == 826bdd1243dSDimitry Andric this) 827480093f4SDimitry Andric selectedGroups.push_back(entries); 8280b57cec5SDimitry Andric break; 8290b57cec5SDimitry Andric } 8300b57cec5SDimitry Andric case SHT_SYMTAB_SHNDX: 8310b57cec5SDimitry Andric shndxTable = CHECK(obj.getSHNDXTable(sec, objSections), this); 8320b57cec5SDimitry Andric break; 8330b57cec5SDimitry Andric case SHT_SYMTAB: 8340b57cec5SDimitry Andric case SHT_STRTAB: 8355ffd83dbSDimitry Andric case SHT_REL: 8365ffd83dbSDimitry Andric case SHT_RELA: 8370b57cec5SDimitry Andric case SHT_NULL: 8380b57cec5SDimitry Andric break; 839*0fca6ea1SDimitry Andric case SHT_PROGBITS: 840*0fca6ea1SDimitry Andric case SHT_NOTE: 841*0fca6ea1SDimitry Andric case SHT_NOBITS: 842*0fca6ea1SDimitry Andric case SHT_INIT_ARRAY: 843*0fca6ea1SDimitry Andric case SHT_FINI_ARRAY: 844*0fca6ea1SDimitry Andric case SHT_PREINIT_ARRAY: 845*0fca6ea1SDimitry Andric this->sections[i] = 846*0fca6ea1SDimitry Andric createInputSection(i, sec, check(obj.getSectionName(sec, shstrtab))); 847*0fca6ea1SDimitry Andric break; 848*0fca6ea1SDimitry Andric case SHT_LLVM_LTO: 849*0fca6ea1SDimitry Andric // Discard .llvm.lto in a relocatable link that does not use the bitcode. 850*0fca6ea1SDimitry Andric // The concatenated output does not properly reflect the linking 851*0fca6ea1SDimitry Andric // semantics. In addition, since we do not use the bitcode wrapper format, 852*0fca6ea1SDimitry Andric // the concatenated raw bitcode would be invalid. 853*0fca6ea1SDimitry Andric if (config->relocatable && !config->fatLTOObjects) { 854*0fca6ea1SDimitry Andric sections[i] = &InputSection::discarded; 855*0fca6ea1SDimitry Andric break; 856*0fca6ea1SDimitry Andric } 857bdd1243dSDimitry Andric [[fallthrough]]; 8580b57cec5SDimitry Andric default: 8591fd87a68SDimitry Andric this->sections[i] = 8601fd87a68SDimitry Andric createInputSection(i, sec, check(obj.getSectionName(sec, shstrtab))); 861*0fca6ea1SDimitry Andric if (type == SHT_LLVM_SYMPART) 862*0fca6ea1SDimitry Andric ctx.hasSympart.store(true, std::memory_order_relaxed); 863*0fca6ea1SDimitry Andric else if (config->rejectMismatch && 864*0fca6ea1SDimitry Andric !isKnownSpecificSectionType(type, sec.sh_flags)) 865*0fca6ea1SDimitry Andric errorOrWarn(toString(this->sections[i]) + ": unknown section type 0x" + 866*0fca6ea1SDimitry Andric Twine::utohexstr(type)); 867*0fca6ea1SDimitry Andric break; 8680b57cec5SDimitry Andric } 86985868e8aSDimitry Andric } 87085868e8aSDimitry Andric 8715ffd83dbSDimitry Andric // We have a second loop. It is used to: 8725ffd83dbSDimitry Andric // 1) handle SHF_LINK_ORDER sections. 873*0fca6ea1SDimitry Andric // 2) create relocation sections. In some cases the section header index of a 8745ffd83dbSDimitry Andric // relocation section may be smaller than that of the relocated section. In 8755ffd83dbSDimitry Andric // such cases, the relocation section would attempt to reference a target 8765ffd83dbSDimitry Andric // section that has not yet been created. For simplicity, delay creation of 8775ffd83dbSDimitry Andric // relocation sections until now. 87804eeddc0SDimitry Andric for (size_t i = 0; i != size; ++i) { 87985868e8aSDimitry Andric if (this->sections[i] == &InputSection::discarded) 88085868e8aSDimitry Andric continue; 88185868e8aSDimitry Andric const Elf_Shdr &sec = objSections[i]; 8825ffd83dbSDimitry Andric 883*0fca6ea1SDimitry Andric if (isStaticRelSecType(sec.sh_type)) { 88404eeddc0SDimitry Andric // Find a relocation target section and associate this section with that. 88504eeddc0SDimitry Andric // Target may have been discarded if it is in a different section group 88604eeddc0SDimitry Andric // and the group is discarded, even though it's a violation of the spec. 88704eeddc0SDimitry Andric // We handle that situation gracefully by discarding dangling relocation 88804eeddc0SDimitry Andric // sections. 88904eeddc0SDimitry Andric const uint32_t info = sec.sh_info; 890*0fca6ea1SDimitry Andric InputSectionBase *s = getRelocTarget(i, info); 89104eeddc0SDimitry Andric if (!s) 89204eeddc0SDimitry Andric continue; 89304eeddc0SDimitry Andric 89404eeddc0SDimitry Andric // ELF spec allows mergeable sections with relocations, but they are rare, 89504eeddc0SDimitry Andric // and it is in practice hard to merge such sections by contents, because 89604eeddc0SDimitry Andric // applying relocations at end of linking changes section contents. So, we 89704eeddc0SDimitry Andric // simply handle such sections as non-mergeable ones. Degrading like this 89804eeddc0SDimitry Andric // is acceptable because section merging is optional. 89904eeddc0SDimitry Andric if (auto *ms = dyn_cast<MergeInputSection>(s)) { 900bdd1243dSDimitry Andric s = makeThreadLocal<InputSection>( 901bdd1243dSDimitry Andric ms->file, ms->flags, ms->type, ms->addralign, 902bdd1243dSDimitry Andric ms->contentMaybeDecompress(), ms->name); 90304eeddc0SDimitry Andric sections[info] = s; 90404eeddc0SDimitry Andric } 90504eeddc0SDimitry Andric 90604eeddc0SDimitry Andric if (s->relSecIdx != 0) 90704eeddc0SDimitry Andric error( 90804eeddc0SDimitry Andric toString(s) + 90904eeddc0SDimitry Andric ": multiple relocation sections to one section are not supported"); 91004eeddc0SDimitry Andric s->relSecIdx = i; 91104eeddc0SDimitry Andric 91204eeddc0SDimitry Andric // Relocation sections are usually removed from the output, so return 91304eeddc0SDimitry Andric // `nullptr` for the normal case. However, if -r or --emit-relocs is 91404eeddc0SDimitry Andric // specified, we need to copy them to the output. (Some post link analysis 91504eeddc0SDimitry Andric // tools specify --emit-relocs to obtain the information.) 91604eeddc0SDimitry Andric if (config->copyRelocs) { 917bdd1243dSDimitry Andric auto *isec = makeThreadLocal<InputSection>( 91804eeddc0SDimitry Andric *this, sec, check(obj.getSectionName(sec, shstrtab))); 91904eeddc0SDimitry Andric // If the relocated section is discarded (due to /DISCARD/ or 92004eeddc0SDimitry Andric // --gc-sections), the relocation section should be discarded as well. 92104eeddc0SDimitry Andric s->dependentSections.push_back(isec); 92204eeddc0SDimitry Andric sections[i] = isec; 92304eeddc0SDimitry Andric } 92404eeddc0SDimitry Andric continue; 92504eeddc0SDimitry Andric } 9265ffd83dbSDimitry Andric 927e8d8bef9SDimitry Andric // A SHF_LINK_ORDER section with sh_link=0 is handled as if it did not have 928e8d8bef9SDimitry Andric // the flag. 92904eeddc0SDimitry Andric if (!sec.sh_link || !(sec.sh_flags & SHF_LINK_ORDER)) 93085868e8aSDimitry Andric continue; 9310b57cec5SDimitry Andric 9320b57cec5SDimitry Andric InputSectionBase *linkSec = nullptr; 93304eeddc0SDimitry Andric if (sec.sh_link < size) 9340b57cec5SDimitry Andric linkSec = this->sections[sec.sh_link]; 9350b57cec5SDimitry Andric if (!linkSec) 93685868e8aSDimitry Andric fatal(toString(this) + ": invalid sh_link index: " + Twine(sec.sh_link)); 9370b57cec5SDimitry Andric 938e8d8bef9SDimitry Andric // A SHF_LINK_ORDER section is discarded if its linked-to section is 939e8d8bef9SDimitry Andric // discarded. 9400b57cec5SDimitry Andric InputSection *isec = cast<InputSection>(this->sections[i]); 9410b57cec5SDimitry Andric linkSec->dependentSections.push_back(isec); 9420b57cec5SDimitry Andric if (!isa<InputSection>(linkSec)) 9430b57cec5SDimitry Andric error("a section " + isec->name + 94485868e8aSDimitry Andric " with SHF_LINK_ORDER should not refer a non-regular section: " + 9450b57cec5SDimitry Andric toString(linkSec)); 9460b57cec5SDimitry Andric } 947480093f4SDimitry Andric 948480093f4SDimitry Andric for (ArrayRef<Elf_Word> entries : selectedGroups) 949480093f4SDimitry Andric handleSectionGroup<ELFT>(this->sections, entries); 9500b57cec5SDimitry Andric } 9510b57cec5SDimitry Andric 952*0fca6ea1SDimitry Andric // Read the following info from the .note.gnu.property section and write it to 953*0fca6ea1SDimitry Andric // the corresponding fields in `ObjFile`: 954*0fca6ea1SDimitry Andric // - Feature flags (32 bits) representing x86 or AArch64 features for 955*0fca6ea1SDimitry Andric // hardware-assisted call flow control; 956*0fca6ea1SDimitry Andric // - AArch64 PAuth ABI core info (16 bytes). 957*0fca6ea1SDimitry Andric template <class ELFT> 958*0fca6ea1SDimitry Andric void readGnuProperty(const InputSection &sec, ObjFile<ELFT> &f) { 9590b57cec5SDimitry Andric using Elf_Nhdr = typename ELFT::Nhdr; 9600b57cec5SDimitry Andric using Elf_Note = typename ELFT::Note; 9610b57cec5SDimitry Andric 962bdd1243dSDimitry Andric ArrayRef<uint8_t> data = sec.content(); 963*0fca6ea1SDimitry Andric auto reportFatal = [&](const uint8_t *place, const Twine &msg) { 964e8d8bef9SDimitry Andric fatal(toString(sec.file) + ":(" + sec.name + "+0x" + 965bdd1243dSDimitry Andric Twine::utohexstr(place - sec.content().data()) + "): " + msg); 966e8d8bef9SDimitry Andric }; 9670b57cec5SDimitry Andric while (!data.empty()) { 9680b57cec5SDimitry Andric // Read one NOTE record. 9690b57cec5SDimitry Andric auto *nhdr = reinterpret_cast<const Elf_Nhdr *>(data.data()); 97006c3fb27SDimitry Andric if (data.size() < sizeof(Elf_Nhdr) || 97106c3fb27SDimitry Andric data.size() < nhdr->getSize(sec.addralign)) 972e8d8bef9SDimitry Andric reportFatal(data.data(), "data is too short"); 9730b57cec5SDimitry Andric 9740b57cec5SDimitry Andric Elf_Note note(*nhdr); 9750b57cec5SDimitry Andric if (nhdr->n_type != NT_GNU_PROPERTY_TYPE_0 || note.getName() != "GNU") { 97606c3fb27SDimitry Andric data = data.slice(nhdr->getSize(sec.addralign)); 9770b57cec5SDimitry Andric continue; 9780b57cec5SDimitry Andric } 9790b57cec5SDimitry Andric 9800b57cec5SDimitry Andric uint32_t featureAndType = config->emachine == EM_AARCH64 9810b57cec5SDimitry Andric ? GNU_PROPERTY_AARCH64_FEATURE_1_AND 9820b57cec5SDimitry Andric : GNU_PROPERTY_X86_FEATURE_1_AND; 9830b57cec5SDimitry Andric 9840b57cec5SDimitry Andric // Read a body of a NOTE record, which consists of type-length-value fields. 98506c3fb27SDimitry Andric ArrayRef<uint8_t> desc = note.getDesc(sec.addralign); 9860b57cec5SDimitry Andric while (!desc.empty()) { 987e8d8bef9SDimitry Andric const uint8_t *place = desc.data(); 9880b57cec5SDimitry Andric if (desc.size() < 8) 989e8d8bef9SDimitry Andric reportFatal(place, "program property is too short"); 990*0fca6ea1SDimitry Andric uint32_t type = read32<ELFT::Endianness>(desc.data()); 991*0fca6ea1SDimitry Andric uint32_t size = read32<ELFT::Endianness>(desc.data() + 4); 992e8d8bef9SDimitry Andric desc = desc.slice(8); 993e8d8bef9SDimitry Andric if (desc.size() < size) 994e8d8bef9SDimitry Andric reportFatal(place, "program property is too short"); 9950b57cec5SDimitry Andric 9960b57cec5SDimitry Andric if (type == featureAndType) { 9970b57cec5SDimitry Andric // We found a FEATURE_1_AND field. There may be more than one of these 998480093f4SDimitry Andric // in a .note.gnu.property section, for a relocatable object we 9990b57cec5SDimitry Andric // accumulate the bits set. 1000e8d8bef9SDimitry Andric if (size < 4) 1001e8d8bef9SDimitry Andric reportFatal(place, "FEATURE_1_AND entry is too short"); 1002*0fca6ea1SDimitry Andric f.andFeatures |= read32<ELFT::Endianness>(desc.data()); 1003*0fca6ea1SDimitry Andric } else if (config->emachine == EM_AARCH64 && 1004*0fca6ea1SDimitry Andric type == GNU_PROPERTY_AARCH64_FEATURE_PAUTH) { 1005*0fca6ea1SDimitry Andric if (!f.aarch64PauthAbiCoreInfo.empty()) { 1006*0fca6ea1SDimitry Andric reportFatal(data.data(), 1007*0fca6ea1SDimitry Andric "multiple GNU_PROPERTY_AARCH64_FEATURE_PAUTH entries are " 1008*0fca6ea1SDimitry Andric "not supported"); 1009*0fca6ea1SDimitry Andric } else if (size != 16) { 1010*0fca6ea1SDimitry Andric reportFatal(data.data(), "GNU_PROPERTY_AARCH64_FEATURE_PAUTH entry " 1011*0fca6ea1SDimitry Andric "is invalid: expected 16 bytes, but got " + 1012*0fca6ea1SDimitry Andric Twine(size)); 1013*0fca6ea1SDimitry Andric } 1014*0fca6ea1SDimitry Andric f.aarch64PauthAbiCoreInfo = desc; 10150b57cec5SDimitry Andric } 10160b57cec5SDimitry Andric 1017e8d8bef9SDimitry Andric // Padding is present in the note descriptor, if necessary. 1018e8d8bef9SDimitry Andric desc = desc.slice(alignTo<(ELFT::Is64Bits ? 8 : 4)>(size)); 10190b57cec5SDimitry Andric } 10200b57cec5SDimitry Andric 10210b57cec5SDimitry Andric // Go to next NOTE record to look for more FEATURE_1_AND descriptions. 102206c3fb27SDimitry Andric data = data.slice(nhdr->getSize(sec.addralign)); 10230b57cec5SDimitry Andric } 10240b57cec5SDimitry Andric } 10250b57cec5SDimitry Andric 10260b57cec5SDimitry Andric template <class ELFT> 1027*0fca6ea1SDimitry Andric InputSectionBase *ObjFile<ELFT>::getRelocTarget(uint32_t idx, uint32_t info) { 1028349cc55cSDimitry Andric if (info < this->sections.size()) { 1029349cc55cSDimitry Andric InputSectionBase *target = this->sections[info]; 10300b57cec5SDimitry Andric 10310b57cec5SDimitry Andric // Strictly speaking, a relocation section must be included in the 10320b57cec5SDimitry Andric // group of the section it relocates. However, LLVM 3.3 and earlier 10330b57cec5SDimitry Andric // would fail to do so, so we gracefully handle that case. 10340b57cec5SDimitry Andric if (target == &InputSection::discarded) 10350b57cec5SDimitry Andric return nullptr; 10360b57cec5SDimitry Andric 1037349cc55cSDimitry Andric if (target != nullptr) 10380b57cec5SDimitry Andric return target; 10390b57cec5SDimitry Andric } 10400b57cec5SDimitry Andric 104104eeddc0SDimitry Andric error(toString(this) + Twine(": relocation section (index ") + Twine(idx) + 104204eeddc0SDimitry Andric ") has invalid sh_info (" + Twine(info) + ")"); 1043349cc55cSDimitry Andric return nullptr; 1044349cc55cSDimitry Andric } 1045349cc55cSDimitry Andric 1046bdd1243dSDimitry Andric // The function may be called concurrently for different input files. For 1047bdd1243dSDimitry Andric // allocation, prefer makeThreadLocal which does not require holding a lock. 10480b57cec5SDimitry Andric template <class ELFT> 1049349cc55cSDimitry Andric InputSectionBase *ObjFile<ELFT>::createInputSection(uint32_t idx, 1050349cc55cSDimitry Andric const Elf_Shdr &sec, 10511fd87a68SDimitry Andric StringRef name) { 105206c3fb27SDimitry Andric if (name.starts_with(".n")) { 10530b57cec5SDimitry Andric // The GNU linker uses .note.GNU-stack section as a marker indicating 10540b57cec5SDimitry Andric // that the code in the object file does not expect that the stack is 10550b57cec5SDimitry Andric // executable (in terms of NX bit). If all input files have the marker, 10560b57cec5SDimitry Andric // the GNU linker adds a PT_GNU_STACK segment to tells the loader to 10570b57cec5SDimitry Andric // make the stack non-executable. Most object files have this section as 10580b57cec5SDimitry Andric // of 2017. 10590b57cec5SDimitry Andric // 10600b57cec5SDimitry Andric // But making the stack non-executable is a norm today for security 10610b57cec5SDimitry Andric // reasons. Failure to do so may result in a serious security issue. 10620b57cec5SDimitry Andric // Therefore, we make LLD always add PT_GNU_STACK unless it is 10630b57cec5SDimitry Andric // explicitly told to do otherwise (by -z execstack). Because the stack 10640b57cec5SDimitry Andric // executable-ness is controlled solely by command line options, 10650b57cec5SDimitry Andric // .note.GNU-stack sections are simply ignored. 10660b57cec5SDimitry Andric if (name == ".note.GNU-stack") 10670b57cec5SDimitry Andric return &InputSection::discarded; 10680b57cec5SDimitry Andric 10690b57cec5SDimitry Andric // Object files that use processor features such as Intel Control-Flow 10700b57cec5SDimitry Andric // Enforcement (CET) or AArch64 Branch Target Identification BTI, use a 10710b57cec5SDimitry Andric // .note.gnu.property section containing a bitfield of feature bits like the 10720b57cec5SDimitry Andric // GNU_PROPERTY_X86_FEATURE_1_IBT flag. Read a bitmap containing the flag. 10730b57cec5SDimitry Andric // 10740b57cec5SDimitry Andric // Since we merge bitmaps from multiple object files to create a new 10750b57cec5SDimitry Andric // .note.gnu.property containing a single AND'ed bitmap, we discard an input 10760b57cec5SDimitry Andric // file's .note.gnu.property section. 10770b57cec5SDimitry Andric if (name == ".note.gnu.property") { 1078*0fca6ea1SDimitry Andric readGnuProperty<ELFT>(InputSection(*this, sec, name), *this); 10790b57cec5SDimitry Andric return &InputSection::discarded; 10800b57cec5SDimitry Andric } 10810b57cec5SDimitry Andric 10820b57cec5SDimitry Andric // Split stacks is a feature to support a discontiguous stack, 10830b57cec5SDimitry Andric // commonly used in the programming language Go. For the details, 10840b57cec5SDimitry Andric // see https://gcc.gnu.org/wiki/SplitStacks. An object file compiled 10850b57cec5SDimitry Andric // for split stack will include a .note.GNU-split-stack section. 10860b57cec5SDimitry Andric if (name == ".note.GNU-split-stack") { 10870b57cec5SDimitry Andric if (config->relocatable) { 10880eae32dcSDimitry Andric error( 10890eae32dcSDimitry Andric "cannot mix split-stack and non-split-stack in a relocatable link"); 10900b57cec5SDimitry Andric return &InputSection::discarded; 10910b57cec5SDimitry Andric } 10920b57cec5SDimitry Andric this->splitStack = true; 10930b57cec5SDimitry Andric return &InputSection::discarded; 10940b57cec5SDimitry Andric } 10950b57cec5SDimitry Andric 1096bdd1243dSDimitry Andric // An object file compiled for split stack, but where some of the 10970b57cec5SDimitry Andric // functions were compiled with the no_split_stack_attribute will 10980b57cec5SDimitry Andric // include a .note.GNU-no-split-stack section. 10990b57cec5SDimitry Andric if (name == ".note.GNU-no-split-stack") { 11000b57cec5SDimitry Andric this->someNoSplitStack = true; 11010b57cec5SDimitry Andric return &InputSection::discarded; 11020b57cec5SDimitry Andric } 11030b57cec5SDimitry Andric 11040eae32dcSDimitry Andric // Strip existing .note.gnu.build-id sections so that the output won't have 11050eae32dcSDimitry Andric // more than one build-id. This is not usually a problem because input 11060eae32dcSDimitry Andric // object files normally don't have .build-id sections, but you can create 11070eae32dcSDimitry Andric // such files by "ld.{bfd,gold,lld} -r --build-id", and we want to guard 11080eae32dcSDimitry Andric // against it. 11090eae32dcSDimitry Andric if (name == ".note.gnu.build-id") 11100eae32dcSDimitry Andric return &InputSection::discarded; 11110eae32dcSDimitry Andric } 11120eae32dcSDimitry Andric 11130b57cec5SDimitry Andric // The linker merges EH (exception handling) frames and creates a 11140b57cec5SDimitry Andric // .eh_frame_hdr section for runtime. So we handle them with a special 11150b57cec5SDimitry Andric // class. For relocatable outputs, they are just passed through. 11160b57cec5SDimitry Andric if (name == ".eh_frame" && !config->relocatable) 1117bdd1243dSDimitry Andric return makeThreadLocal<EhInputSection>(*this, sec, name); 11180b57cec5SDimitry Andric 11190eae32dcSDimitry Andric if ((sec.sh_flags & SHF_MERGE) && shouldMerge(sec, name)) 1120bdd1243dSDimitry Andric return makeThreadLocal<MergeInputSection>(*this, sec, name); 1121bdd1243dSDimitry Andric return makeThreadLocal<InputSection>(*this, sec, name); 11220b57cec5SDimitry Andric } 11230b57cec5SDimitry Andric 112406c3fb27SDimitry Andric // Initialize symbols. symbols is a parallel array to the corresponding ELF 112506c3fb27SDimitry Andric // symbol table. 11261fd87a68SDimitry Andric template <class ELFT> 11271fd87a68SDimitry Andric void ObjFile<ELFT>::initializeSymbols(const object::ELFFile<ELFT> &obj) { 11280eae32dcSDimitry Andric ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>(); 1129bdd1243dSDimitry Andric if (numSymbols == 0) { 1130bdd1243dSDimitry Andric numSymbols = eSyms.size(); 1131bdd1243dSDimitry Andric symbols = std::make_unique<Symbol *[]>(numSymbols); 1132bdd1243dSDimitry Andric } 11330eae32dcSDimitry Andric 113481ad6265SDimitry Andric // Some entries have been filled by LazyObjFile. 113581ad6265SDimitry Andric for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) 113681ad6265SDimitry Andric if (!symbols[i]) 113781ad6265SDimitry Andric symbols[i] = symtab.insert(CHECK(eSyms[i].getName(stringTable), this)); 113881ad6265SDimitry Andric 113981ad6265SDimitry Andric // Perform symbol resolution on non-local symbols. 114081ad6265SDimitry Andric SmallVector<unsigned, 32> undefineds; 114181ad6265SDimitry Andric for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) { 114281ad6265SDimitry Andric const Elf_Sym &eSym = eSyms[i]; 114381ad6265SDimitry Andric uint32_t secIdx = eSym.st_shndx; 114481ad6265SDimitry Andric if (secIdx == SHN_UNDEF) { 114581ad6265SDimitry Andric undefineds.push_back(i); 114681ad6265SDimitry Andric continue; 114781ad6265SDimitry Andric } 114881ad6265SDimitry Andric 114981ad6265SDimitry Andric uint8_t binding = eSym.getBinding(); 115081ad6265SDimitry Andric uint8_t stOther = eSym.st_other; 115181ad6265SDimitry Andric uint8_t type = eSym.getType(); 115281ad6265SDimitry Andric uint64_t value = eSym.st_value; 115381ad6265SDimitry Andric uint64_t size = eSym.st_size; 115481ad6265SDimitry Andric 115581ad6265SDimitry Andric Symbol *sym = symbols[i]; 115681ad6265SDimitry Andric sym->isUsedInRegularObj = true; 115781ad6265SDimitry Andric if (LLVM_UNLIKELY(eSym.st_shndx == SHN_COMMON)) { 115881ad6265SDimitry Andric if (value == 0 || value >= UINT32_MAX) 115981ad6265SDimitry Andric fatal(toString(this) + ": common symbol '" + sym->getName() + 116081ad6265SDimitry Andric "' has invalid alignment: " + Twine(value)); 116181ad6265SDimitry Andric hasCommonSyms = true; 116281ad6265SDimitry Andric sym->resolve( 116381ad6265SDimitry Andric CommonSymbol{this, StringRef(), binding, stOther, type, value, size}); 116481ad6265SDimitry Andric continue; 116581ad6265SDimitry Andric } 116681ad6265SDimitry Andric 116781ad6265SDimitry Andric // Handle global defined symbols. Defined::section will be set in postParse. 116881ad6265SDimitry Andric sym->resolve(Defined{this, StringRef(), binding, stOther, type, value, size, 116981ad6265SDimitry Andric nullptr}); 117081ad6265SDimitry Andric } 117181ad6265SDimitry Andric 117281ad6265SDimitry Andric // Undefined symbols (excluding those defined relative to non-prevailing 117381ad6265SDimitry Andric // sections) can trigger recursive extract. Process defined symbols first so 117481ad6265SDimitry Andric // that the relative order between a defined symbol and an undefined symbol 117581ad6265SDimitry Andric // does not change the symbol resolution behavior. In addition, a set of 117681ad6265SDimitry Andric // interconnected symbols will all be resolved to the same file, instead of 117781ad6265SDimitry Andric // being resolved to different files. 117881ad6265SDimitry Andric for (unsigned i : undefineds) { 117981ad6265SDimitry Andric const Elf_Sym &eSym = eSyms[i]; 118081ad6265SDimitry Andric Symbol *sym = symbols[i]; 118181ad6265SDimitry Andric sym->resolve(Undefined{this, StringRef(), eSym.getBinding(), eSym.st_other, 118281ad6265SDimitry Andric eSym.getType()}); 118381ad6265SDimitry Andric sym->isUsedInRegularObj = true; 118481ad6265SDimitry Andric sym->referenced = true; 118581ad6265SDimitry Andric } 118681ad6265SDimitry Andric } 118781ad6265SDimitry Andric 1188bdd1243dSDimitry Andric template <class ELFT> 1189bdd1243dSDimitry Andric void ObjFile<ELFT>::initSectionsAndLocalSyms(bool ignoreComdats) { 1190bdd1243dSDimitry Andric if (!justSymbols) 1191bdd1243dSDimitry Andric initializeSections(ignoreComdats, getObj()); 1192bdd1243dSDimitry Andric 119381ad6265SDimitry Andric if (!firstGlobal) 119481ad6265SDimitry Andric return; 1195bdd1243dSDimitry Andric SymbolUnion *locals = makeThreadLocalN<SymbolUnion>(firstGlobal); 1196bdd1243dSDimitry Andric memset(locals, 0, sizeof(SymbolUnion) * firstGlobal); 119781ad6265SDimitry Andric 119881ad6265SDimitry Andric ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>(); 11990eae32dcSDimitry Andric for (size_t i = 0, end = firstGlobal; i != end; ++i) { 12000b57cec5SDimitry Andric const Elf_Sym &eSym = eSyms[i]; 12011fd87a68SDimitry Andric uint32_t secIdx = eSym.st_shndx; 12021fd87a68SDimitry Andric if (LLVM_UNLIKELY(secIdx == SHN_XINDEX)) 12031fd87a68SDimitry Andric secIdx = check(getExtendedSymbolTableIndex<ELFT>(eSym, i, shndxTable)); 12041fd87a68SDimitry Andric else if (secIdx >= SHN_LORESERVE) 12051fd87a68SDimitry Andric secIdx = 0; 12060eae32dcSDimitry Andric if (LLVM_UNLIKELY(secIdx >= sections.size())) 12070b57cec5SDimitry Andric fatal(toString(this) + ": invalid section index: " + Twine(secIdx)); 12080eae32dcSDimitry Andric if (LLVM_UNLIKELY(eSym.getBinding() != STB_LOCAL)) 12095ffd83dbSDimitry Andric error(toString(this) + ": non-local symbol (" + Twine(i) + 12100eae32dcSDimitry Andric ") found at index < .symtab's sh_info (" + Twine(end) + ")"); 12115ffd83dbSDimitry Andric 12120eae32dcSDimitry Andric InputSectionBase *sec = sections[secIdx]; 12135ffd83dbSDimitry Andric uint8_t type = eSym.getType(); 12145ffd83dbSDimitry Andric if (type == STT_FILE) 12150eae32dcSDimitry Andric sourceFile = CHECK(eSym.getName(stringTable), this); 12160eae32dcSDimitry Andric if (LLVM_UNLIKELY(stringTable.size() <= eSym.st_name)) 12175ffd83dbSDimitry Andric fatal(toString(this) + ": invalid symbol name offset"); 121804eeddc0SDimitry Andric StringRef name(stringTable.data() + eSym.st_name); 12195ffd83dbSDimitry Andric 12200eae32dcSDimitry Andric symbols[i] = reinterpret_cast<Symbol *>(locals + i); 12210eae32dcSDimitry Andric if (eSym.st_shndx == SHN_UNDEF || sec == &InputSection::discarded) 12220eae32dcSDimitry Andric new (symbols[i]) Undefined(this, name, STB_LOCAL, eSym.st_other, type, 12235ffd83dbSDimitry Andric /*discardedSecIdx=*/secIdx); 12245ffd83dbSDimitry Andric else 12250eae32dcSDimitry Andric new (symbols[i]) Defined(this, name, STB_LOCAL, eSym.st_other, type, 12260eae32dcSDimitry Andric eSym.st_value, eSym.st_size, sec); 1227bdd1243dSDimitry Andric symbols[i]->partition = 1; 122881ad6265SDimitry Andric symbols[i]->isUsedInRegularObj = true; 122981ad6265SDimitry Andric } 12305ffd83dbSDimitry Andric } 12315ffd83dbSDimitry Andric 123281ad6265SDimitry Andric // Called after all ObjFile::parse is called for all ObjFiles. This checks 123381ad6265SDimitry Andric // duplicate symbols and may do symbol property merge in the future. 123481ad6265SDimitry Andric template <class ELFT> void ObjFile<ELFT>::postParse() { 123581ad6265SDimitry Andric static std::mutex mu; 123681ad6265SDimitry Andric ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>(); 12375ffd83dbSDimitry Andric for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) { 12385ffd83dbSDimitry Andric const Elf_Sym &eSym = eSyms[i]; 123981ad6265SDimitry Andric Symbol &sym = *symbols[i]; 12401fd87a68SDimitry Andric uint32_t secIdx = eSym.st_shndx; 124181ad6265SDimitry Andric uint8_t binding = eSym.getBinding(); 124281ad6265SDimitry Andric if (LLVM_UNLIKELY(binding != STB_GLOBAL && binding != STB_WEAK && 124381ad6265SDimitry Andric binding != STB_GNU_UNIQUE)) 124481ad6265SDimitry Andric errorOrWarn(toString(this) + ": symbol (" + Twine(i) + 124581ad6265SDimitry Andric ") has invalid binding: " + Twine((int)binding)); 124681ad6265SDimitry Andric 124781ad6265SDimitry Andric // st_value of STT_TLS represents the assigned offset, not the actual 124881ad6265SDimitry Andric // address which is used by STT_FUNC and STT_OBJECT. STT_TLS symbols can 124981ad6265SDimitry Andric // only be referenced by special TLS relocations. It is usually an error if 125081ad6265SDimitry Andric // a STT_TLS symbol is replaced by a non-STT_TLS symbol, vice versa. 125181ad6265SDimitry Andric if (LLVM_UNLIKELY(sym.isTls()) && eSym.getType() != STT_TLS && 125281ad6265SDimitry Andric eSym.getType() != STT_NOTYPE) 125381ad6265SDimitry Andric errorOrWarn("TLS attribute mismatch: " + toString(sym) + "\n>>> in " + 125481ad6265SDimitry Andric toString(sym.file) + "\n>>> in " + toString(this)); 125581ad6265SDimitry Andric 125681ad6265SDimitry Andric // Handle non-COMMON defined symbol below. !sym.file allows a symbol 125781ad6265SDimitry Andric // assignment to redefine a symbol without an error. 125881ad6265SDimitry Andric if (!sym.file || !sym.isDefined() || secIdx == SHN_UNDEF || 125981ad6265SDimitry Andric secIdx == SHN_COMMON) 126081ad6265SDimitry Andric continue; 126181ad6265SDimitry Andric 12621fd87a68SDimitry Andric if (LLVM_UNLIKELY(secIdx == SHN_XINDEX)) 12631fd87a68SDimitry Andric secIdx = check(getExtendedSymbolTableIndex<ELFT>(eSym, i, shndxTable)); 12641fd87a68SDimitry Andric else if (secIdx >= SHN_LORESERVE) 12651fd87a68SDimitry Andric secIdx = 0; 12660eae32dcSDimitry Andric if (LLVM_UNLIKELY(secIdx >= sections.size())) 12670eae32dcSDimitry Andric fatal(toString(this) + ": invalid section index: " + Twine(secIdx)); 12680eae32dcSDimitry Andric InputSectionBase *sec = sections[secIdx]; 12690b57cec5SDimitry Andric if (sec == &InputSection::discarded) { 127081ad6265SDimitry Andric if (sym.traced) { 127181ad6265SDimitry Andric printTraceSymbol(Undefined{this, sym.getName(), sym.binding, 127281ad6265SDimitry Andric sym.stOther, sym.type, secIdx}, 127381ad6265SDimitry Andric sym.getName()); 127481ad6265SDimitry Andric } 127581ad6265SDimitry Andric if (sym.file == this) { 127681ad6265SDimitry Andric std::lock_guard<std::mutex> lock(mu); 1277bdd1243dSDimitry Andric ctx.nonPrevailingSyms.emplace_back(&sym, secIdx); 127881ad6265SDimitry Andric } 12790b57cec5SDimitry Andric continue; 12800b57cec5SDimitry Andric } 12810b57cec5SDimitry Andric 128281ad6265SDimitry Andric if (sym.file == this) { 128381ad6265SDimitry Andric cast<Defined>(sym).section = sec; 12840b57cec5SDimitry Andric continue; 12850b57cec5SDimitry Andric } 12860b57cec5SDimitry Andric 1287f3fd488fSDimitry Andric if (sym.binding == STB_WEAK || binding == STB_WEAK) 128881ad6265SDimitry Andric continue; 128981ad6265SDimitry Andric std::lock_guard<std::mutex> lock(mu); 1290bdd1243dSDimitry Andric ctx.duplicates.push_back({&sym, this, sec, eSym.st_value}); 12910b57cec5SDimitry Andric } 12920b57cec5SDimitry Andric } 12930b57cec5SDimitry Andric 1294e8d8bef9SDimitry Andric // The handling of tentative definitions (COMMON symbols) in archives is murky. 1295fe6060f1SDimitry Andric // A tentative definition will be promoted to a global definition if there are 1296fe6060f1SDimitry Andric // no non-tentative definitions to dominate it. When we hold a tentative 1297fe6060f1SDimitry Andric // definition to a symbol and are inspecting archive members for inclusion 1298fe6060f1SDimitry Andric // there are 2 ways we can proceed: 1299e8d8bef9SDimitry Andric // 1300e8d8bef9SDimitry Andric // 1) Consider the tentative definition a 'real' definition (ie promotion from 1301e8d8bef9SDimitry Andric // tentative to real definition has already happened) and not inspect 1302e8d8bef9SDimitry Andric // archive members for Global/Weak definitions to replace the tentative 1303e8d8bef9SDimitry Andric // definition. An archive member would only be included if it satisfies some 1304e8d8bef9SDimitry Andric // other undefined symbol. This is the behavior Gold uses. 1305e8d8bef9SDimitry Andric // 1306e8d8bef9SDimitry Andric // 2) Consider the tentative definition as still undefined (ie the promotion to 1307fe6060f1SDimitry Andric // a real definition happens only after all symbol resolution is done). 1308fe6060f1SDimitry Andric // The linker searches archive members for STB_GLOBAL definitions to 1309e8d8bef9SDimitry Andric // replace the tentative definition with. This is the behavior used by 1310e8d8bef9SDimitry Andric // GNU ld. 1311e8d8bef9SDimitry Andric // 1312e8d8bef9SDimitry Andric // The second behavior is inherited from SysVR4, which based it on the FORTRAN 1313fe6060f1SDimitry Andric // COMMON BLOCK model. This behavior is needed for proper initialization in old 1314e8d8bef9SDimitry Andric // (pre F90) FORTRAN code that is packaged into an archive. 1315e8d8bef9SDimitry Andric // 1316fe6060f1SDimitry Andric // The following functions search archive members for definitions to replace 1317fe6060f1SDimitry Andric // tentative definitions (implementing behavior 2). 1318e8d8bef9SDimitry Andric static bool isBitcodeNonCommonDef(MemoryBufferRef mb, StringRef symName, 1319e8d8bef9SDimitry Andric StringRef archiveName) { 1320e8d8bef9SDimitry Andric IRSymtabFile symtabFile = check(readIRSymtab(mb)); 1321e8d8bef9SDimitry Andric for (const irsymtab::Reader::SymbolRef &sym : 1322e8d8bef9SDimitry Andric symtabFile.TheReader.symbols()) { 1323e8d8bef9SDimitry Andric if (sym.isGlobal() && sym.getName() == symName) 1324fe6060f1SDimitry Andric return !sym.isUndefined() && !sym.isWeak() && !sym.isCommon(); 1325e8d8bef9SDimitry Andric } 1326e8d8bef9SDimitry Andric return false; 1327e8d8bef9SDimitry Andric } 1328e8d8bef9SDimitry Andric 1329e8d8bef9SDimitry Andric template <class ELFT> 1330bdd1243dSDimitry Andric static bool isNonCommonDef(ELFKind ekind, MemoryBufferRef mb, StringRef symName, 1331e8d8bef9SDimitry Andric StringRef archiveName) { 1332bdd1243dSDimitry Andric ObjFile<ELFT> *obj = make<ObjFile<ELFT>>(ekind, mb, archiveName); 1333bdd1243dSDimitry Andric obj->init(); 1334e8d8bef9SDimitry Andric StringRef stringtable = obj->getStringTable(); 1335e8d8bef9SDimitry Andric 1336e8d8bef9SDimitry Andric for (auto sym : obj->template getGlobalELFSyms<ELFT>()) { 1337e8d8bef9SDimitry Andric Expected<StringRef> name = sym.getName(stringtable); 1338e8d8bef9SDimitry Andric if (name && name.get() == symName) 1339fe6060f1SDimitry Andric return sym.isDefined() && sym.getBinding() == STB_GLOBAL && 1340fe6060f1SDimitry Andric !sym.isCommon(); 1341e8d8bef9SDimitry Andric } 1342e8d8bef9SDimitry Andric return false; 1343e8d8bef9SDimitry Andric } 1344e8d8bef9SDimitry Andric 1345e8d8bef9SDimitry Andric static bool isNonCommonDef(MemoryBufferRef mb, StringRef symName, 1346e8d8bef9SDimitry Andric StringRef archiveName) { 1347e8d8bef9SDimitry Andric switch (getELFKind(mb, archiveName)) { 1348e8d8bef9SDimitry Andric case ELF32LEKind: 1349bdd1243dSDimitry Andric return isNonCommonDef<ELF32LE>(ELF32LEKind, mb, symName, archiveName); 1350e8d8bef9SDimitry Andric case ELF32BEKind: 1351bdd1243dSDimitry Andric return isNonCommonDef<ELF32BE>(ELF32BEKind, mb, symName, archiveName); 1352e8d8bef9SDimitry Andric case ELF64LEKind: 1353bdd1243dSDimitry Andric return isNonCommonDef<ELF64LE>(ELF64LEKind, mb, symName, archiveName); 1354e8d8bef9SDimitry Andric case ELF64BEKind: 1355bdd1243dSDimitry Andric return isNonCommonDef<ELF64BE>(ELF64BEKind, mb, symName, archiveName); 1356e8d8bef9SDimitry Andric default: 1357e8d8bef9SDimitry Andric llvm_unreachable("getELFKind"); 1358e8d8bef9SDimitry Andric } 1359e8d8bef9SDimitry Andric } 1360e8d8bef9SDimitry Andric 13610b57cec5SDimitry Andric unsigned SharedFile::vernauxNum; 13620b57cec5SDimitry Andric 1363bdd1243dSDimitry Andric SharedFile::SharedFile(MemoryBufferRef m, StringRef defaultSoName) 1364bdd1243dSDimitry Andric : ELFFileBase(SharedKind, getELFKind(m, ""), m), soName(defaultSoName), 1365bdd1243dSDimitry Andric isNeeded(!config->asNeeded) {} 1366bdd1243dSDimitry Andric 13670b57cec5SDimitry Andric // Parse the version definitions in the object file if present, and return a 13680b57cec5SDimitry Andric // vector whose nth element contains a pointer to the Elf_Verdef for version 13690b57cec5SDimitry Andric // identifier n. Version identifiers that are not definitions map to nullptr. 13700b57cec5SDimitry Andric template <typename ELFT> 13710eae32dcSDimitry Andric static SmallVector<const void *, 0> 13720eae32dcSDimitry Andric parseVerdefs(const uint8_t *base, const typename ELFT::Shdr *sec) { 13730b57cec5SDimitry Andric if (!sec) 13740b57cec5SDimitry Andric return {}; 13750b57cec5SDimitry Andric 13760b57cec5SDimitry Andric // Build the Verdefs array by following the chain of Elf_Verdef objects 13770b57cec5SDimitry Andric // from the start of the .gnu.version_d section. 13780eae32dcSDimitry Andric SmallVector<const void *, 0> verdefs; 13790b57cec5SDimitry Andric const uint8_t *verdef = base + sec->sh_offset; 13800eae32dcSDimitry Andric for (unsigned i = 0, e = sec->sh_info; i != e; ++i) { 13810b57cec5SDimitry Andric auto *curVerdef = reinterpret_cast<const typename ELFT::Verdef *>(verdef); 13820b57cec5SDimitry Andric verdef += curVerdef->vd_next; 13830b57cec5SDimitry Andric unsigned verdefIndex = curVerdef->vd_ndx; 13840eae32dcSDimitry Andric if (verdefIndex >= verdefs.size()) 13850b57cec5SDimitry Andric verdefs.resize(verdefIndex + 1); 13860b57cec5SDimitry Andric verdefs[verdefIndex] = curVerdef; 13870b57cec5SDimitry Andric } 13880b57cec5SDimitry Andric return verdefs; 13890b57cec5SDimitry Andric } 13900b57cec5SDimitry Andric 13915ffd83dbSDimitry Andric // Parse SHT_GNU_verneed to properly set the name of a versioned undefined 13925ffd83dbSDimitry Andric // symbol. We detect fatal issues which would cause vulnerabilities, but do not 13935ffd83dbSDimitry Andric // implement sophisticated error checking like in llvm-readobj because the value 13945ffd83dbSDimitry Andric // of such diagnostics is low. 13955ffd83dbSDimitry Andric template <typename ELFT> 13965ffd83dbSDimitry Andric std::vector<uint32_t> SharedFile::parseVerneed(const ELFFile<ELFT> &obj, 13975ffd83dbSDimitry Andric const typename ELFT::Shdr *sec) { 13985ffd83dbSDimitry Andric if (!sec) 13995ffd83dbSDimitry Andric return {}; 14005ffd83dbSDimitry Andric std::vector<uint32_t> verneeds; 1401e8d8bef9SDimitry Andric ArrayRef<uint8_t> data = CHECK(obj.getSectionContents(*sec), this); 14025ffd83dbSDimitry Andric const uint8_t *verneedBuf = data.begin(); 14035ffd83dbSDimitry Andric for (unsigned i = 0; i != sec->sh_info; ++i) { 14045ffd83dbSDimitry Andric if (verneedBuf + sizeof(typename ELFT::Verneed) > data.end()) 14055ffd83dbSDimitry Andric fatal(toString(this) + " has an invalid Verneed"); 14065ffd83dbSDimitry Andric auto *vn = reinterpret_cast<const typename ELFT::Verneed *>(verneedBuf); 14075ffd83dbSDimitry Andric const uint8_t *vernauxBuf = verneedBuf + vn->vn_aux; 14085ffd83dbSDimitry Andric for (unsigned j = 0; j != vn->vn_cnt; ++j) { 14095ffd83dbSDimitry Andric if (vernauxBuf + sizeof(typename ELFT::Vernaux) > data.end()) 14105ffd83dbSDimitry Andric fatal(toString(this) + " has an invalid Vernaux"); 14115ffd83dbSDimitry Andric auto *aux = reinterpret_cast<const typename ELFT::Vernaux *>(vernauxBuf); 14125ffd83dbSDimitry Andric if (aux->vna_name >= this->stringTable.size()) 14135ffd83dbSDimitry Andric fatal(toString(this) + " has a Vernaux with an invalid vna_name"); 14145ffd83dbSDimitry Andric uint16_t version = aux->vna_other & VERSYM_VERSION; 14155ffd83dbSDimitry Andric if (version >= verneeds.size()) 14165ffd83dbSDimitry Andric verneeds.resize(version + 1); 14175ffd83dbSDimitry Andric verneeds[version] = aux->vna_name; 14185ffd83dbSDimitry Andric vernauxBuf += aux->vna_next; 14195ffd83dbSDimitry Andric } 14205ffd83dbSDimitry Andric verneedBuf += vn->vn_next; 14215ffd83dbSDimitry Andric } 14225ffd83dbSDimitry Andric return verneeds; 14235ffd83dbSDimitry Andric } 14245ffd83dbSDimitry Andric 14250b57cec5SDimitry Andric // We do not usually care about alignments of data in shared object 14260b57cec5SDimitry Andric // files because the loader takes care of it. However, if we promote a 14270b57cec5SDimitry Andric // DSO symbol to point to .bss due to copy relocation, we need to keep 14280b57cec5SDimitry Andric // the original alignment requirements. We infer it in this function. 14290b57cec5SDimitry Andric template <typename ELFT> 14300b57cec5SDimitry Andric static uint64_t getAlignment(ArrayRef<typename ELFT::Shdr> sections, 14310b57cec5SDimitry Andric const typename ELFT::Sym &sym) { 14320b57cec5SDimitry Andric uint64_t ret = UINT64_MAX; 14330b57cec5SDimitry Andric if (sym.st_value) 143406c3fb27SDimitry Andric ret = 1ULL << llvm::countr_zero((uint64_t)sym.st_value); 14350b57cec5SDimitry Andric if (0 < sym.st_shndx && sym.st_shndx < sections.size()) 14360b57cec5SDimitry Andric ret = std::min<uint64_t>(ret, sections[sym.st_shndx].sh_addralign); 14370b57cec5SDimitry Andric return (ret > UINT32_MAX) ? 0 : ret; 14380b57cec5SDimitry Andric } 14390b57cec5SDimitry Andric 14400b57cec5SDimitry Andric // Fully parse the shared object file. 14410b57cec5SDimitry Andric // 14420b57cec5SDimitry Andric // This function parses symbol versions. If a DSO has version information, 14430b57cec5SDimitry Andric // the file has a ".gnu.version_d" section which contains symbol version 14440b57cec5SDimitry Andric // definitions. Each symbol is associated to one version through a table in 14450b57cec5SDimitry Andric // ".gnu.version" section. That table is a parallel array for the symbol 14460b57cec5SDimitry Andric // table, and each table entry contains an index in ".gnu.version_d". 14470b57cec5SDimitry Andric // 14480b57cec5SDimitry Andric // The special index 0 is reserved for VERF_NDX_LOCAL and 1 is for 14490b57cec5SDimitry Andric // VER_NDX_GLOBAL. There's no table entry for these special versions in 14500b57cec5SDimitry Andric // ".gnu.version_d". 14510b57cec5SDimitry Andric // 14520b57cec5SDimitry Andric // The file format for symbol versioning is perhaps a bit more complicated 14530b57cec5SDimitry Andric // than necessary, but you can easily understand the code if you wrap your 14540b57cec5SDimitry Andric // head around the data structure described above. 14550b57cec5SDimitry Andric template <class ELFT> void SharedFile::parse() { 14560b57cec5SDimitry Andric using Elf_Dyn = typename ELFT::Dyn; 14570b57cec5SDimitry Andric using Elf_Shdr = typename ELFT::Shdr; 14580b57cec5SDimitry Andric using Elf_Sym = typename ELFT::Sym; 14590b57cec5SDimitry Andric using Elf_Verdef = typename ELFT::Verdef; 14600b57cec5SDimitry Andric using Elf_Versym = typename ELFT::Versym; 14610b57cec5SDimitry Andric 14620b57cec5SDimitry Andric ArrayRef<Elf_Dyn> dynamicTags; 14630b57cec5SDimitry Andric const ELFFile<ELFT> obj = this->getObj<ELFT>(); 14640eae32dcSDimitry Andric ArrayRef<Elf_Shdr> sections = getELFShdrs<ELFT>(); 14650b57cec5SDimitry Andric 14660b57cec5SDimitry Andric const Elf_Shdr *versymSec = nullptr; 14670b57cec5SDimitry Andric const Elf_Shdr *verdefSec = nullptr; 14685ffd83dbSDimitry Andric const Elf_Shdr *verneedSec = nullptr; 14690b57cec5SDimitry Andric 14700b57cec5SDimitry Andric // Search for .dynsym, .dynamic, .symtab, .gnu.version and .gnu.version_d. 14710b57cec5SDimitry Andric for (const Elf_Shdr &sec : sections) { 14720b57cec5SDimitry Andric switch (sec.sh_type) { 14730b57cec5SDimitry Andric default: 14740b57cec5SDimitry Andric continue; 14750b57cec5SDimitry Andric case SHT_DYNAMIC: 14760b57cec5SDimitry Andric dynamicTags = 1477e8d8bef9SDimitry Andric CHECK(obj.template getSectionContentsAsArray<Elf_Dyn>(sec), this); 14780b57cec5SDimitry Andric break; 14790b57cec5SDimitry Andric case SHT_GNU_versym: 14800b57cec5SDimitry Andric versymSec = &sec; 14810b57cec5SDimitry Andric break; 14820b57cec5SDimitry Andric case SHT_GNU_verdef: 14830b57cec5SDimitry Andric verdefSec = &sec; 14840b57cec5SDimitry Andric break; 14855ffd83dbSDimitry Andric case SHT_GNU_verneed: 14865ffd83dbSDimitry Andric verneedSec = &sec; 14875ffd83dbSDimitry Andric break; 14880b57cec5SDimitry Andric } 14890b57cec5SDimitry Andric } 14900b57cec5SDimitry Andric 14910b57cec5SDimitry Andric if (versymSec && numELFSyms == 0) { 14920b57cec5SDimitry Andric error("SHT_GNU_versym should be associated with symbol table"); 14930b57cec5SDimitry Andric return; 14940b57cec5SDimitry Andric } 14950b57cec5SDimitry Andric 14960b57cec5SDimitry Andric // Search for a DT_SONAME tag to initialize this->soName. 14970b57cec5SDimitry Andric for (const Elf_Dyn &dyn : dynamicTags) { 14980b57cec5SDimitry Andric if (dyn.d_tag == DT_NEEDED) { 14990b57cec5SDimitry Andric uint64_t val = dyn.getVal(); 15000b57cec5SDimitry Andric if (val >= this->stringTable.size()) 15010b57cec5SDimitry Andric fatal(toString(this) + ": invalid DT_NEEDED entry"); 15020b57cec5SDimitry Andric dtNeeded.push_back(this->stringTable.data() + val); 15030b57cec5SDimitry Andric } else if (dyn.d_tag == DT_SONAME) { 15040b57cec5SDimitry Andric uint64_t val = dyn.getVal(); 15050b57cec5SDimitry Andric if (val >= this->stringTable.size()) 15060b57cec5SDimitry Andric fatal(toString(this) + ": invalid DT_SONAME entry"); 15070b57cec5SDimitry Andric soName = this->stringTable.data() + val; 15080b57cec5SDimitry Andric } 15090b57cec5SDimitry Andric } 15100b57cec5SDimitry Andric 15110b57cec5SDimitry Andric // DSOs are uniquified not by filename but by soname. 151204eeddc0SDimitry Andric DenseMap<CachedHashStringRef, SharedFile *>::iterator it; 15130b57cec5SDimitry Andric bool wasInserted; 151404eeddc0SDimitry Andric std::tie(it, wasInserted) = 1515bdd1243dSDimitry Andric symtab.soNames.try_emplace(CachedHashStringRef(soName), this); 15160b57cec5SDimitry Andric 15170b57cec5SDimitry Andric // If a DSO appears more than once on the command line with and without 15180b57cec5SDimitry Andric // --as-needed, --no-as-needed takes precedence over --as-needed because a 15190b57cec5SDimitry Andric // user can add an extra DSO with --no-as-needed to force it to be added to 15200b57cec5SDimitry Andric // the dependency list. 15210b57cec5SDimitry Andric it->second->isNeeded |= isNeeded; 15220b57cec5SDimitry Andric if (!wasInserted) 15230b57cec5SDimitry Andric return; 15240b57cec5SDimitry Andric 1525bdd1243dSDimitry Andric ctx.sharedFiles.push_back(this); 15260b57cec5SDimitry Andric 15270b57cec5SDimitry Andric verdefs = parseVerdefs<ELFT>(obj.base(), verdefSec); 15285ffd83dbSDimitry Andric std::vector<uint32_t> verneeds = parseVerneed<ELFT>(obj, verneedSec); 15290b57cec5SDimitry Andric 15300b57cec5SDimitry Andric // Parse ".gnu.version" section which is a parallel array for the symbol 15310b57cec5SDimitry Andric // table. If a given file doesn't have a ".gnu.version" section, we use 15320b57cec5SDimitry Andric // VER_NDX_GLOBAL. 15330b57cec5SDimitry Andric size_t size = numELFSyms - firstGlobal; 15345ffd83dbSDimitry Andric std::vector<uint16_t> versyms(size, VER_NDX_GLOBAL); 15350b57cec5SDimitry Andric if (versymSec) { 15360b57cec5SDimitry Andric ArrayRef<Elf_Versym> versym = 1537e8d8bef9SDimitry Andric CHECK(obj.template getSectionContentsAsArray<Elf_Versym>(*versymSec), 15380b57cec5SDimitry Andric this) 15390b57cec5SDimitry Andric .slice(firstGlobal); 15400b57cec5SDimitry Andric for (size_t i = 0; i < size; ++i) 15410b57cec5SDimitry Andric versyms[i] = versym[i].vs_index; 15420b57cec5SDimitry Andric } 15430b57cec5SDimitry Andric 15440b57cec5SDimitry Andric // System libraries can have a lot of symbols with versions. Using a 15450b57cec5SDimitry Andric // fixed buffer for computing the versions name (foo@ver) can save a 15460b57cec5SDimitry Andric // lot of allocations. 15470b57cec5SDimitry Andric SmallString<0> versionedNameBuffer; 15480b57cec5SDimitry Andric 15490b57cec5SDimitry Andric // Add symbols to the symbol table. 15500b57cec5SDimitry Andric ArrayRef<Elf_Sym> syms = this->getGlobalELFSyms<ELFT>(); 15510eae32dcSDimitry Andric for (size_t i = 0, e = syms.size(); i != e; ++i) { 15520b57cec5SDimitry Andric const Elf_Sym &sym = syms[i]; 15530b57cec5SDimitry Andric 15540b57cec5SDimitry Andric // ELF spec requires that all local symbols precede weak or global 15550b57cec5SDimitry Andric // symbols in each symbol table, and the index of first non-local symbol 15560b57cec5SDimitry Andric // is stored to sh_info. If a local symbol appears after some non-local 15570b57cec5SDimitry Andric // symbol, that's a violation of the spec. 15580eae32dcSDimitry Andric StringRef name = CHECK(sym.getName(stringTable), this); 15590b57cec5SDimitry Andric if (sym.getBinding() == STB_LOCAL) { 1560bdd1243dSDimitry Andric errorOrWarn(toString(this) + ": invalid local symbol '" + name + 1561bdd1243dSDimitry Andric "' in global part of symbol table"); 15620b57cec5SDimitry Andric continue; 15630b57cec5SDimitry Andric } 15640b57cec5SDimitry Andric 1565bdd1243dSDimitry Andric const uint16_t ver = versyms[i], idx = ver & ~VERSYM_HIDDEN; 15660b57cec5SDimitry Andric if (sym.isUndefined()) { 15675ffd83dbSDimitry Andric // For unversioned undefined symbols, VER_NDX_GLOBAL makes more sense but 15685ffd83dbSDimitry Andric // as of binutils 2.34, GNU ld produces VER_NDX_LOCAL. 1569bdd1243dSDimitry Andric if (ver != VER_NDX_LOCAL && ver != VER_NDX_GLOBAL) { 15705ffd83dbSDimitry Andric if (idx >= verneeds.size()) { 15715ffd83dbSDimitry Andric error("corrupt input file: version need index " + Twine(idx) + 15725ffd83dbSDimitry Andric " for symbol " + name + " is out of bounds\n>>> defined in " + 15735ffd83dbSDimitry Andric toString(this)); 15745ffd83dbSDimitry Andric continue; 15755ffd83dbSDimitry Andric } 15760eae32dcSDimitry Andric StringRef verName = stringTable.data() + verneeds[idx]; 15775ffd83dbSDimitry Andric versionedNameBuffer.clear(); 157804eeddc0SDimitry Andric name = saver().save( 157904eeddc0SDimitry Andric (name + "@" + verName).toStringRef(versionedNameBuffer)); 15805ffd83dbSDimitry Andric } 15810eae32dcSDimitry Andric Symbol *s = symtab.addSymbol( 15820b57cec5SDimitry Andric Undefined{this, name, sym.getBinding(), sym.st_other, sym.getType()}); 15830b57cec5SDimitry Andric s->exportDynamic = true; 1584*0fca6ea1SDimitry Andric if (sym.getBinding() != STB_WEAK && 1585fe6060f1SDimitry Andric config->unresolvedSymbolsInShlib != UnresolvedPolicy::Ignore) 1586fe6060f1SDimitry Andric requiredSymbols.push_back(s); 15870b57cec5SDimitry Andric continue; 15880b57cec5SDimitry Andric } 15890b57cec5SDimitry Andric 1590bdd1243dSDimitry Andric if (ver == VER_NDX_LOCAL || 1591bdd1243dSDimitry Andric (ver != VER_NDX_GLOBAL && idx >= verdefs.size())) { 1592bdd1243dSDimitry Andric // In GNU ld < 2.31 (before 3be08ea4728b56d35e136af4e6fd3086ade17764), the 1593bdd1243dSDimitry Andric // MIPS port puts _gp_disp symbol into DSO files and incorrectly assigns 1594bdd1243dSDimitry Andric // VER_NDX_LOCAL. Workaround this bug. 1595bdd1243dSDimitry Andric if (config->emachine == EM_MIPS && name == "_gp_disp") 15960b57cec5SDimitry Andric continue; 15970b57cec5SDimitry Andric error("corrupt input file: version definition index " + Twine(idx) + 15980b57cec5SDimitry Andric " for symbol " + name + " is out of bounds\n>>> defined in " + 15990b57cec5SDimitry Andric toString(this)); 16000b57cec5SDimitry Andric continue; 16010b57cec5SDimitry Andric } 16020b57cec5SDimitry Andric 1603bdd1243dSDimitry Andric uint32_t alignment = getAlignment<ELFT>(sections, sym); 1604bdd1243dSDimitry Andric if (ver == idx) { 1605bdd1243dSDimitry Andric auto *s = symtab.addSymbol( 1606bdd1243dSDimitry Andric SharedSymbol{*this, name, sym.getBinding(), sym.st_other, 1607bdd1243dSDimitry Andric sym.getType(), sym.st_value, sym.st_size, alignment}); 16087a6dacacSDimitry Andric s->dsoDefined = true; 1609bdd1243dSDimitry Andric if (s->file == this) 16105f757f3fSDimitry Andric s->versionId = ver; 1611bdd1243dSDimitry Andric } 1612bdd1243dSDimitry Andric 1613bdd1243dSDimitry Andric // Also add the symbol with the versioned name to handle undefined symbols 1614bdd1243dSDimitry Andric // with explicit versions. 1615bdd1243dSDimitry Andric if (ver == VER_NDX_GLOBAL) 1616bdd1243dSDimitry Andric continue; 1617bdd1243dSDimitry Andric 16180b57cec5SDimitry Andric StringRef verName = 16190eae32dcSDimitry Andric stringTable.data() + 16200b57cec5SDimitry Andric reinterpret_cast<const Elf_Verdef *>(verdefs[idx])->getAux()->vda_name; 16210b57cec5SDimitry Andric versionedNameBuffer.clear(); 16220b57cec5SDimitry Andric name = (name + "@" + verName).toStringRef(versionedNameBuffer); 162381ad6265SDimitry Andric auto *s = symtab.addSymbol( 162481ad6265SDimitry Andric SharedSymbol{*this, saver().save(name), sym.getBinding(), sym.st_other, 162581ad6265SDimitry Andric sym.getType(), sym.st_value, sym.st_size, alignment}); 16267a6dacacSDimitry Andric s->dsoDefined = true; 162781ad6265SDimitry Andric if (s->file == this) 16285f757f3fSDimitry Andric s->versionId = idx; 16290b57cec5SDimitry Andric } 16300b57cec5SDimitry Andric } 16310b57cec5SDimitry Andric 16320b57cec5SDimitry Andric static ELFKind getBitcodeELFKind(const Triple &t) { 16330b57cec5SDimitry Andric if (t.isLittleEndian()) 16340b57cec5SDimitry Andric return t.isArch64Bit() ? ELF64LEKind : ELF32LEKind; 16350b57cec5SDimitry Andric return t.isArch64Bit() ? ELF64BEKind : ELF32BEKind; 16360b57cec5SDimitry Andric } 16370b57cec5SDimitry Andric 1638e8d8bef9SDimitry Andric static uint16_t getBitcodeMachineKind(StringRef path, const Triple &t) { 16390b57cec5SDimitry Andric switch (t.getArch()) { 16400b57cec5SDimitry Andric case Triple::aarch64: 1641fe6060f1SDimitry Andric case Triple::aarch64_be: 16420b57cec5SDimitry Andric return EM_AARCH64; 16430b57cec5SDimitry Andric case Triple::amdgcn: 16440b57cec5SDimitry Andric case Triple::r600: 16450b57cec5SDimitry Andric return EM_AMDGPU; 16460b57cec5SDimitry Andric case Triple::arm: 16475f757f3fSDimitry Andric case Triple::armeb: 16480b57cec5SDimitry Andric case Triple::thumb: 16495f757f3fSDimitry Andric case Triple::thumbeb: 16500b57cec5SDimitry Andric return EM_ARM; 16510b57cec5SDimitry Andric case Triple::avr: 16520b57cec5SDimitry Andric return EM_AVR; 1653349cc55cSDimitry Andric case Triple::hexagon: 1654349cc55cSDimitry Andric return EM_HEXAGON; 165506c3fb27SDimitry Andric case Triple::loongarch32: 165606c3fb27SDimitry Andric case Triple::loongarch64: 165706c3fb27SDimitry Andric return EM_LOONGARCH; 16580b57cec5SDimitry Andric case Triple::mips: 16590b57cec5SDimitry Andric case Triple::mipsel: 16600b57cec5SDimitry Andric case Triple::mips64: 16610b57cec5SDimitry Andric case Triple::mips64el: 16620b57cec5SDimitry Andric return EM_MIPS; 16630b57cec5SDimitry Andric case Triple::msp430: 16640b57cec5SDimitry Andric return EM_MSP430; 16650b57cec5SDimitry Andric case Triple::ppc: 1666e8d8bef9SDimitry Andric case Triple::ppcle: 16670b57cec5SDimitry Andric return EM_PPC; 16680b57cec5SDimitry Andric case Triple::ppc64: 16690b57cec5SDimitry Andric case Triple::ppc64le: 16700b57cec5SDimitry Andric return EM_PPC64; 16710b57cec5SDimitry Andric case Triple::riscv32: 16720b57cec5SDimitry Andric case Triple::riscv64: 16730b57cec5SDimitry Andric return EM_RISCV; 16745f757f3fSDimitry Andric case Triple::sparcv9: 16755f757f3fSDimitry Andric return EM_SPARCV9; 167674626c16SDimitry Andric case Triple::systemz: 167774626c16SDimitry Andric return EM_S390; 16780b57cec5SDimitry Andric case Triple::x86: 16790b57cec5SDimitry Andric return t.isOSIAMCU() ? EM_IAMCU : EM_386; 16800b57cec5SDimitry Andric case Triple::x86_64: 16810b57cec5SDimitry Andric return EM_X86_64; 16820b57cec5SDimitry Andric default: 16830b57cec5SDimitry Andric error(path + ": could not infer e_machine from bitcode target triple " + 16840b57cec5SDimitry Andric t.str()); 16850b57cec5SDimitry Andric return EM_NONE; 16860b57cec5SDimitry Andric } 16870b57cec5SDimitry Andric } 16880b57cec5SDimitry Andric 1689e8d8bef9SDimitry Andric static uint8_t getOsAbi(const Triple &t) { 1690e8d8bef9SDimitry Andric switch (t.getOS()) { 1691e8d8bef9SDimitry Andric case Triple::AMDHSA: 1692e8d8bef9SDimitry Andric return ELF::ELFOSABI_AMDGPU_HSA; 1693e8d8bef9SDimitry Andric case Triple::AMDPAL: 1694e8d8bef9SDimitry Andric return ELF::ELFOSABI_AMDGPU_PAL; 1695e8d8bef9SDimitry Andric case Triple::Mesa3D: 1696e8d8bef9SDimitry Andric return ELF::ELFOSABI_AMDGPU_MESA3D; 1697e8d8bef9SDimitry Andric default: 1698e8d8bef9SDimitry Andric return ELF::ELFOSABI_NONE; 1699e8d8bef9SDimitry Andric } 1700e8d8bef9SDimitry Andric } 1701e8d8bef9SDimitry Andric 17020b57cec5SDimitry Andric BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName, 17030eae32dcSDimitry Andric uint64_t offsetInArchive, bool lazy) 17040b57cec5SDimitry Andric : InputFile(BitcodeKind, mb) { 17050eae32dcSDimitry Andric this->archiveName = archiveName; 17060eae32dcSDimitry Andric this->lazy = lazy; 17070b57cec5SDimitry Andric 17080b57cec5SDimitry Andric std::string path = mb.getBufferIdentifier().str(); 17090b57cec5SDimitry Andric if (config->thinLTOIndexOnly) 17100b57cec5SDimitry Andric path = replaceThinLTOSuffix(mb.getBufferIdentifier()); 17110b57cec5SDimitry Andric 17120b57cec5SDimitry Andric // ThinLTO assumes that all MemoryBufferRefs given to it have a unique 17130b57cec5SDimitry Andric // name. If two archives define two members with the same name, this 17140b57cec5SDimitry Andric // causes a collision which result in only one of the objects being taken 17150b57cec5SDimitry Andric // into consideration at LTO time (which very likely causes undefined 17160b57cec5SDimitry Andric // symbols later in the link stage). So we append file offset to make 17170b57cec5SDimitry Andric // filename unique. 171804eeddc0SDimitry Andric StringRef name = archiveName.empty() 171904eeddc0SDimitry Andric ? saver().save(path) 172004eeddc0SDimitry Andric : saver().save(archiveName + "(" + path::filename(path) + 172104eeddc0SDimitry Andric " at " + utostr(offsetInArchive) + ")"); 17220b57cec5SDimitry Andric MemoryBufferRef mbref(mb.getBuffer(), name); 17230b57cec5SDimitry Andric 17240b57cec5SDimitry Andric obj = CHECK(lto::InputFile::create(mbref), this); 17250b57cec5SDimitry Andric 17260b57cec5SDimitry Andric Triple t(obj->getTargetTriple()); 17270b57cec5SDimitry Andric ekind = getBitcodeELFKind(t); 17280b57cec5SDimitry Andric emachine = getBitcodeMachineKind(mb.getBufferIdentifier(), t); 1729e8d8bef9SDimitry Andric osabi = getOsAbi(t); 17300b57cec5SDimitry Andric } 17310b57cec5SDimitry Andric 17320b57cec5SDimitry Andric static uint8_t mapVisibility(GlobalValue::VisibilityTypes gvVisibility) { 17330b57cec5SDimitry Andric switch (gvVisibility) { 17340b57cec5SDimitry Andric case GlobalValue::DefaultVisibility: 17350b57cec5SDimitry Andric return STV_DEFAULT; 17360b57cec5SDimitry Andric case GlobalValue::HiddenVisibility: 17370b57cec5SDimitry Andric return STV_HIDDEN; 17380b57cec5SDimitry Andric case GlobalValue::ProtectedVisibility: 17390b57cec5SDimitry Andric return STV_PROTECTED; 17400b57cec5SDimitry Andric } 17410b57cec5SDimitry Andric llvm_unreachable("unknown visibility"); 17420b57cec5SDimitry Andric } 17430b57cec5SDimitry Andric 174404eeddc0SDimitry Andric static void 174504eeddc0SDimitry Andric createBitcodeSymbol(Symbol *&sym, const std::vector<bool> &keptComdats, 174604eeddc0SDimitry Andric const lto::InputFile::Symbol &objSym, BitcodeFile &f) { 17470b57cec5SDimitry Andric uint8_t binding = objSym.isWeak() ? STB_WEAK : STB_GLOBAL; 17480b57cec5SDimitry Andric uint8_t type = objSym.isTLS() ? STT_TLS : STT_NOTYPE; 17490b57cec5SDimitry Andric uint8_t visibility = mapVisibility(objSym.getVisibility()); 17500b57cec5SDimitry Andric 175181ad6265SDimitry Andric if (!sym) 1752bdd1243dSDimitry Andric sym = symtab.insert(saver().save(objSym.getName())); 175304eeddc0SDimitry Andric 17540b57cec5SDimitry Andric int c = objSym.getComdatIndex(); 17550b57cec5SDimitry Andric if (objSym.isUndefined() || (c != -1 && !keptComdats[c])) { 175681ad6265SDimitry Andric Undefined newSym(&f, StringRef(), binding, visibility, type); 175704eeddc0SDimitry Andric sym->resolve(newSym); 175804eeddc0SDimitry Andric sym->referenced = true; 175904eeddc0SDimitry Andric return; 17600b57cec5SDimitry Andric } 17610b57cec5SDimitry Andric 176204eeddc0SDimitry Andric if (objSym.isCommon()) { 176381ad6265SDimitry Andric sym->resolve(CommonSymbol{&f, StringRef(), binding, visibility, STT_OBJECT, 176404eeddc0SDimitry Andric objSym.getCommonAlignment(), 176504eeddc0SDimitry Andric objSym.getCommonSize()}); 176604eeddc0SDimitry Andric } else { 176781ad6265SDimitry Andric Defined newSym(&f, StringRef(), binding, visibility, type, 0, 0, nullptr); 176881ad6265SDimitry Andric if (objSym.canBeOmittedFromSymbolTable()) 176985868e8aSDimitry Andric newSym.exportDynamic = false; 177004eeddc0SDimitry Andric sym->resolve(newSym); 177104eeddc0SDimitry Andric } 17720b57cec5SDimitry Andric } 17730b57cec5SDimitry Andric 1774bdd1243dSDimitry Andric void BitcodeFile::parse() { 1775fe6060f1SDimitry Andric for (std::pair<StringRef, Comdat::SelectionKind> s : obj->getComdatTable()) { 17760b57cec5SDimitry Andric keptComdats.push_back( 1777fe6060f1SDimitry Andric s.second == Comdat::NoDeduplicate || 1778bdd1243dSDimitry Andric symtab.comdatGroups.try_emplace(CachedHashStringRef(s.first), this) 1779fe6060f1SDimitry Andric .second); 1780fe6060f1SDimitry Andric } 17810b57cec5SDimitry Andric 1782bdd1243dSDimitry Andric if (numSymbols == 0) { 1783bdd1243dSDimitry Andric numSymbols = obj->symbols().size(); 1784bdd1243dSDimitry Andric symbols = std::make_unique<Symbol *[]>(numSymbols); 1785bdd1243dSDimitry Andric } 178681ad6265SDimitry Andric // Process defined symbols first. See the comment in 178781ad6265SDimitry Andric // ObjFile<ELFT>::initializeSymbols. 1788bdd1243dSDimitry Andric for (auto [i, irSym] : llvm::enumerate(obj->symbols())) 1789bdd1243dSDimitry Andric if (!irSym.isUndefined()) 1790bdd1243dSDimitry Andric createBitcodeSymbol(symbols[i], keptComdats, irSym, *this); 1791bdd1243dSDimitry Andric for (auto [i, irSym] : llvm::enumerate(obj->symbols())) 1792bdd1243dSDimitry Andric if (irSym.isUndefined()) 1793bdd1243dSDimitry Andric createBitcodeSymbol(symbols[i], keptComdats, irSym, *this); 17940b57cec5SDimitry Andric 17950b57cec5SDimitry Andric for (auto l : obj->getDependentLibraries()) 17960b57cec5SDimitry Andric addDependentLibrary(l, this); 17970b57cec5SDimitry Andric } 17980b57cec5SDimitry Andric 17990eae32dcSDimitry Andric void BitcodeFile::parseLazy() { 1800bdd1243dSDimitry Andric numSymbols = obj->symbols().size(); 1801bdd1243dSDimitry Andric symbols = std::make_unique<Symbol *[]>(numSymbols); 1802bdd1243dSDimitry Andric for (auto [i, irSym] : llvm::enumerate(obj->symbols())) 1803bdd1243dSDimitry Andric if (!irSym.isUndefined()) { 1804bdd1243dSDimitry Andric auto *sym = symtab.insert(saver().save(irSym.getName())); 18057a6dacacSDimitry Andric sym->resolve(LazySymbol{*this}); 1806bdd1243dSDimitry Andric symbols[i] = sym; 180781ad6265SDimitry Andric } 180881ad6265SDimitry Andric } 180981ad6265SDimitry Andric 181081ad6265SDimitry Andric void BitcodeFile::postParse() { 1811bdd1243dSDimitry Andric for (auto [i, irSym] : llvm::enumerate(obj->symbols())) { 1812bdd1243dSDimitry Andric const Symbol &sym = *symbols[i]; 1813bdd1243dSDimitry Andric if (sym.file == this || !sym.isDefined() || irSym.isUndefined() || 1814bdd1243dSDimitry Andric irSym.isCommon() || irSym.isWeak()) 181581ad6265SDimitry Andric continue; 1816bdd1243dSDimitry Andric int c = irSym.getComdatIndex(); 181781ad6265SDimitry Andric if (c != -1 && !keptComdats[c]) 181881ad6265SDimitry Andric continue; 181981ad6265SDimitry Andric reportDuplicate(sym, this, nullptr, 0); 182081ad6265SDimitry Andric } 18210eae32dcSDimitry Andric } 18220eae32dcSDimitry Andric 18230b57cec5SDimitry Andric void BinaryFile::parse() { 18240b57cec5SDimitry Andric ArrayRef<uint8_t> data = arrayRefFromStringRef(mb.getBuffer()); 18250b57cec5SDimitry Andric auto *section = make<InputSection>(this, SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, 18260b57cec5SDimitry Andric 8, data, ".data"); 18270b57cec5SDimitry Andric sections.push_back(section); 18280b57cec5SDimitry Andric 18290b57cec5SDimitry Andric // For each input file foo that is embedded to a result as a binary 18300b57cec5SDimitry Andric // blob, we define _binary_foo_{start,end,size} symbols, so that 18310b57cec5SDimitry Andric // user programs can access blobs by name. Non-alphanumeric 18320b57cec5SDimitry Andric // characters in a filename are replaced with underscore. 18330b57cec5SDimitry Andric std::string s = "_binary_" + mb.getBufferIdentifier().str(); 183406c3fb27SDimitry Andric for (char &c : s) 183506c3fb27SDimitry Andric if (!isAlnum(c)) 183606c3fb27SDimitry Andric c = '_'; 18370b57cec5SDimitry Andric 183804eeddc0SDimitry Andric llvm::StringSaver &saver = lld::saver(); 183904eeddc0SDimitry Andric 18405f757f3fSDimitry Andric symtab.addAndCheckDuplicate(Defined{this, saver.save(s + "_start"), 1841bdd1243dSDimitry Andric STB_GLOBAL, STV_DEFAULT, STT_OBJECT, 0, 0, 1842bdd1243dSDimitry Andric section}); 18435f757f3fSDimitry Andric symtab.addAndCheckDuplicate(Defined{this, saver.save(s + "_end"), STB_GLOBAL, 18445f757f3fSDimitry Andric STV_DEFAULT, STT_OBJECT, data.size(), 0, 18455f757f3fSDimitry Andric section}); 18465f757f3fSDimitry Andric symtab.addAndCheckDuplicate(Defined{this, saver.save(s + "_size"), STB_GLOBAL, 18475f757f3fSDimitry Andric STV_DEFAULT, STT_OBJECT, data.size(), 0, 18485f757f3fSDimitry Andric nullptr}); 18490b57cec5SDimitry Andric } 18500b57cec5SDimitry Andric 18517a6dacacSDimitry Andric InputFile *elf::createInternalFile(StringRef name) { 1852b3edf446SDimitry Andric auto *file = 1853b3edf446SDimitry Andric make<InputFile>(InputFile::InternalKind, MemoryBufferRef("", name)); 1854b3edf446SDimitry Andric // References from an internal file do not lead to --warn-backrefs 1855b3edf446SDimitry Andric // diagnostics. 1856b3edf446SDimitry Andric file->groupId = 0; 1857b3edf446SDimitry Andric return file; 18587a6dacacSDimitry Andric } 18597a6dacacSDimitry Andric 1860fcaf7f86SDimitry Andric ELFFileBase *elf::createObjFile(MemoryBufferRef mb, StringRef archiveName, 1861fcaf7f86SDimitry Andric bool lazy) { 1862fcaf7f86SDimitry Andric ELFFileBase *f; 18630b57cec5SDimitry Andric switch (getELFKind(mb, archiveName)) { 18640b57cec5SDimitry Andric case ELF32LEKind: 1865bdd1243dSDimitry Andric f = make<ObjFile<ELF32LE>>(ELF32LEKind, mb, archiveName); 1866fcaf7f86SDimitry Andric break; 18670b57cec5SDimitry Andric case ELF32BEKind: 1868bdd1243dSDimitry Andric f = make<ObjFile<ELF32BE>>(ELF32BEKind, mb, archiveName); 1869fcaf7f86SDimitry Andric break; 18700b57cec5SDimitry Andric case ELF64LEKind: 1871bdd1243dSDimitry Andric f = make<ObjFile<ELF64LE>>(ELF64LEKind, mb, archiveName); 1872fcaf7f86SDimitry Andric break; 18730b57cec5SDimitry Andric case ELF64BEKind: 1874bdd1243dSDimitry Andric f = make<ObjFile<ELF64BE>>(ELF64BEKind, mb, archiveName); 1875fcaf7f86SDimitry Andric break; 18760b57cec5SDimitry Andric default: 18770b57cec5SDimitry Andric llvm_unreachable("getELFKind"); 18780b57cec5SDimitry Andric } 1879bdd1243dSDimitry Andric f->init(); 1880fcaf7f86SDimitry Andric f->lazy = lazy; 1881fcaf7f86SDimitry Andric return f; 18820b57cec5SDimitry Andric } 18830b57cec5SDimitry Andric 18840eae32dcSDimitry Andric template <class ELFT> void ObjFile<ELFT>::parseLazy() { 18850eae32dcSDimitry Andric const ArrayRef<typename ELFT::Sym> eSyms = this->getELFSyms<ELFT>(); 1886bdd1243dSDimitry Andric numSymbols = eSyms.size(); 1887bdd1243dSDimitry Andric symbols = std::make_unique<Symbol *[]>(numSymbols); 18880b57cec5SDimitry Andric 18890eae32dcSDimitry Andric // resolve() may trigger this->extract() if an existing symbol is an undefined 18900eae32dcSDimitry Andric // symbol. If that happens, this function has served its purpose, and we can 18910eae32dcSDimitry Andric // exit from the loop early. 1892bdd1243dSDimitry Andric for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) { 1893bdd1243dSDimitry Andric if (eSyms[i].st_shndx == SHN_UNDEF) 1894bdd1243dSDimitry Andric continue; 1895bdd1243dSDimitry Andric symbols[i] = symtab.insert(CHECK(eSyms[i].getName(stringTable), this)); 18967a6dacacSDimitry Andric symbols[i]->resolve(LazySymbol{*this}); 18970eae32dcSDimitry Andric if (!lazy) 1898bdd1243dSDimitry Andric break; 18990b57cec5SDimitry Andric } 19000b57cec5SDimitry Andric } 19010b57cec5SDimitry Andric 19025f757f3fSDimitry Andric bool InputFile::shouldExtractForCommon(StringRef name) const { 1903fcaf7f86SDimitry Andric if (isa<BitcodeFile>(this)) 1904e8d8bef9SDimitry Andric return isBitcodeNonCommonDef(mb, name, archiveName); 1905e8d8bef9SDimitry Andric 1906e8d8bef9SDimitry Andric return isNonCommonDef(mb, name, archiveName); 1907e8d8bef9SDimitry Andric } 1908e8d8bef9SDimitry Andric 19095ffd83dbSDimitry Andric std::string elf::replaceThinLTOSuffix(StringRef path) { 1910bdd1243dSDimitry Andric auto [suffix, repl] = config->thinLTOObjectSuffixReplace; 19110b57cec5SDimitry Andric if (path.consume_back(suffix)) 19120b57cec5SDimitry Andric return (path + repl).str(); 19135ffd83dbSDimitry Andric return std::string(path); 19140b57cec5SDimitry Andric } 19150b57cec5SDimitry Andric 19165ffd83dbSDimitry Andric template class elf::ObjFile<ELF32LE>; 19175ffd83dbSDimitry Andric template class elf::ObjFile<ELF32BE>; 19185ffd83dbSDimitry Andric template class elf::ObjFile<ELF64LE>; 19195ffd83dbSDimitry Andric template class elf::ObjFile<ELF64BE>; 19200b57cec5SDimitry Andric 19210b57cec5SDimitry Andric template void SharedFile::parse<ELF32LE>(); 19220b57cec5SDimitry Andric template void SharedFile::parse<ELF32BE>(); 19230b57cec5SDimitry Andric template void SharedFile::parse<ELF64LE>(); 19240b57cec5SDimitry Andric template void SharedFile::parse<ELF64BE>(); 1925