1 //===- InputFiles.h ---------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_ELF_INPUT_FILES_H 10 #define LLD_ELF_INPUT_FILES_H 11 12 #include "Config.h" 13 #include "Symbols.h" 14 #include "lld/Common/ErrorHandler.h" 15 #include "lld/Common/LLVM.h" 16 #include "lld/Common/Reproduce.h" 17 #include "llvm/ADT/DenseSet.h" 18 #include "llvm/BinaryFormat/Magic.h" 19 #include "llvm/Object/ELF.h" 20 #include "llvm/Support/MemoryBufferRef.h" 21 #include "llvm/Support/Threading.h" 22 23 namespace llvm { 24 struct DILineInfo; 25 class TarWriter; 26 namespace lto { 27 class InputFile; 28 } 29 } // namespace llvm 30 31 namespace lld { 32 class DWARFCache; 33 34 // Returns "<internal>", "foo.a(bar.o)" or "baz.o". 35 std::string toString(const elf::InputFile *f); 36 37 namespace elf { 38 39 class InputSection; 40 class Symbol; 41 42 // If --reproduce is specified, all input files are written to this tar archive. 43 extern std::unique_ptr<llvm::TarWriter> tar; 44 45 // Opens a given file. 46 llvm::Optional<MemoryBufferRef> readFile(StringRef path); 47 48 // Add symbols in File to the symbol table. 49 void parseFile(InputFile *file); 50 51 // The root class of input files. 52 class InputFile { 53 protected: 54 SmallVector<Symbol *, 0> symbols; 55 SmallVector<InputSectionBase *, 0> sections; 56 57 public: 58 enum Kind : uint8_t { 59 ObjKind, 60 SharedKind, 61 ArchiveKind, 62 BitcodeKind, 63 BinaryKind, 64 }; 65 66 Kind kind() const { return fileKind; } 67 68 bool isElf() const { 69 Kind k = kind(); 70 return k == ObjKind || k == SharedKind; 71 } 72 73 StringRef getName() const { return mb.getBufferIdentifier(); } 74 MemoryBufferRef mb; 75 76 // Returns sections. It is a runtime error to call this function 77 // on files that don't have the notion of sections. 78 ArrayRef<InputSectionBase *> getSections() const { 79 assert(fileKind == ObjKind || fileKind == BinaryKind); 80 return sections; 81 } 82 83 // Returns object file symbols. It is a runtime error to call this 84 // function on files of other types. 85 ArrayRef<Symbol *> getSymbols() const { 86 assert(fileKind == BinaryKind || fileKind == ObjKind || 87 fileKind == BitcodeKind); 88 return symbols; 89 } 90 91 // Get filename to use for linker script processing. 92 StringRef getNameForScript() const; 93 94 // Check if a non-common symbol should be extracted to override a common 95 // definition. 96 bool shouldExtractForCommon(StringRef name); 97 98 // .got2 in the current file. This is used by PPC32 -fPIC/-fPIE to compute 99 // offsets in PLT call stubs. 100 InputSection *ppc32Got2 = nullptr; 101 102 // Index of MIPS GOT built for this file. 103 uint32_t mipsGotIndex = -1; 104 105 // groupId is used for --warn-backrefs which is an optional error 106 // checking feature. All files within the same --{start,end}-group or 107 // --{start,end}-lib get the same group ID. Otherwise, each file gets a new 108 // group ID. For more info, see checkDependency() in SymbolTable.cpp. 109 uint32_t groupId; 110 static bool isInGroup; 111 static uint32_t nextGroupId; 112 113 // If this is an architecture-specific file, the following members 114 // have ELF type (i.e. ELF{32,64}{LE,BE}) and target machine type. 115 uint16_t emachine = llvm::ELF::EM_NONE; 116 const Kind fileKind; 117 ELFKind ekind = ELFNoneKind; 118 uint8_t osabi = 0; 119 uint8_t abiVersion = 0; 120 121 // True if this is a relocatable object file/bitcode file between --start-lib 122 // and --end-lib. 123 bool lazy = false; 124 125 // True if this is an argument for --just-symbols. Usually false. 126 bool justSymbols = false; 127 128 std::string getSrcMsg(const Symbol &sym, InputSectionBase &sec, 129 uint64_t offset); 130 131 // On PPC64 we need to keep track of which files contain small code model 132 // relocations that access the .toc section. To minimize the chance of a 133 // relocation overflow, files that do contain said relocations should have 134 // their .toc sections sorted closer to the .got section than files that do 135 // not contain any small code model relocations. Thats because the toc-pointer 136 // is defined to point at .got + 0x8000 and the instructions used with small 137 // code model relocations support immediates in the range [-0x8000, 0x7FFC], 138 // making the addressable range relative to the toc pointer 139 // [.got, .got + 0xFFFC]. 140 bool ppc64SmallCodeModelTocRelocs = false; 141 142 // True if the file has TLSGD/TLSLD GOT relocations without R_PPC64_TLSGD or 143 // R_PPC64_TLSLD. Disable TLS relaxation to avoid bad code generation. 144 bool ppc64DisableTLSRelax = false; 145 146 protected: 147 InputFile(Kind k, MemoryBufferRef m); 148 149 public: 150 // If not empty, this stores the name of the archive containing this file. 151 // We use this string for creating error messages. 152 SmallString<0> archiveName; 153 // Cache for toString(). Only toString() should use this member. 154 mutable SmallString<0> toStringCache; 155 156 private: 157 // Cache for getNameForScript(). 158 mutable SmallString<0> nameForScriptCache; 159 }; 160 161 class ELFFileBase : public InputFile { 162 public: 163 ELFFileBase(Kind k, MemoryBufferRef m); 164 static bool classof(const InputFile *f) { return f->isElf(); } 165 166 template <typename ELFT> llvm::object::ELFFile<ELFT> getObj() const { 167 return check(llvm::object::ELFFile<ELFT>::create(mb.getBuffer())); 168 } 169 170 StringRef getStringTable() const { return stringTable; } 171 172 ArrayRef<Symbol *> getLocalSymbols() { 173 if (symbols.empty()) 174 return {}; 175 return llvm::makeArrayRef(symbols).slice(1, firstGlobal - 1); 176 } 177 ArrayRef<Symbol *> getGlobalSymbols() { 178 return llvm::makeArrayRef(symbols).slice(firstGlobal); 179 } 180 MutableArrayRef<Symbol *> getMutableGlobalSymbols() { 181 return llvm::makeMutableArrayRef(symbols.data(), symbols.size()) 182 .slice(firstGlobal); 183 } 184 185 template <typename ELFT> typename ELFT::ShdrRange getELFShdrs() const { 186 return typename ELFT::ShdrRange( 187 reinterpret_cast<const typename ELFT::Shdr *>(elfShdrs), numELFShdrs); 188 } 189 template <typename ELFT> typename ELFT::SymRange getELFSyms() const { 190 return typename ELFT::SymRange( 191 reinterpret_cast<const typename ELFT::Sym *>(elfSyms), numELFSyms); 192 } 193 template <typename ELFT> typename ELFT::SymRange getGlobalELFSyms() const { 194 return getELFSyms<ELFT>().slice(firstGlobal); 195 } 196 197 protected: 198 // Initializes this class's member variables. 199 template <typename ELFT> void init(); 200 201 StringRef stringTable; 202 const void *elfShdrs = nullptr; 203 const void *elfSyms = nullptr; 204 uint32_t numELFShdrs = 0; 205 uint32_t numELFSyms = 0; 206 uint32_t firstGlobal = 0; 207 208 public: 209 uint32_t andFeatures = 0; 210 bool hasCommonSyms = false; 211 }; 212 213 // .o file. 214 template <class ELFT> class ObjFile : public ELFFileBase { 215 LLVM_ELF_IMPORT_TYPES_ELFT(ELFT) 216 217 public: 218 static bool classof(const InputFile *f) { return f->kind() == ObjKind; } 219 220 llvm::object::ELFFile<ELFT> getObj() const { 221 return this->ELFFileBase::getObj<ELFT>(); 222 } 223 224 ObjFile(MemoryBufferRef m, StringRef archiveName) : ELFFileBase(ObjKind, m) { 225 this->archiveName = archiveName; 226 } 227 228 void parse(bool ignoreComdats = false); 229 void parseLazy(); 230 231 StringRef getShtGroupSignature(ArrayRef<Elf_Shdr> sections, 232 const Elf_Shdr &sec); 233 234 Symbol &getSymbol(uint32_t symbolIndex) const { 235 if (symbolIndex >= this->symbols.size()) 236 fatal(toString(this) + ": invalid symbol index"); 237 return *this->symbols[symbolIndex]; 238 } 239 240 uint32_t getSectionIndex(const Elf_Sym &sym) const; 241 242 template <typename RelT> Symbol &getRelocTargetSym(const RelT &rel) const { 243 uint32_t symIndex = rel.getSymbol(config->isMips64EL); 244 return getSymbol(symIndex); 245 } 246 247 llvm::Optional<llvm::DILineInfo> getDILineInfo(InputSectionBase *, uint64_t); 248 llvm::Optional<std::pair<std::string, unsigned>> getVariableLoc(StringRef name); 249 250 // Name of source file obtained from STT_FILE symbol value, 251 // or empty string if there is no such symbol in object file 252 // symbol table. 253 StringRef sourceFile; 254 255 // Pointer to this input file's .llvm_addrsig section, if it has one. 256 const Elf_Shdr *addrsigSec = nullptr; 257 258 // SHT_LLVM_CALL_GRAPH_PROFILE section index. 259 uint32_t cgProfileSectionIndex = 0; 260 261 // MIPS GP0 value defined by this file. This value represents the gp value 262 // used to create the relocatable object and required to support 263 // R_MIPS_GPREL16 / R_MIPS_GPREL32 relocations. 264 uint32_t mipsGp0 = 0; 265 266 // True if the file defines functions compiled with 267 // -fsplit-stack. Usually false. 268 bool splitStack = false; 269 270 // True if the file defines functions compiled with -fsplit-stack, 271 // but had one or more functions with the no_split_stack attribute. 272 bool someNoSplitStack = false; 273 274 // Get cached DWARF information. 275 DWARFCache *getDwarf(); 276 277 void initializeLocalSymbols(); 278 void postParse(); 279 280 private: 281 void initializeSections(bool ignoreComdats, 282 const llvm::object::ELFFile<ELFT> &obj); 283 void initializeSymbols(const llvm::object::ELFFile<ELFT> &obj); 284 void initializeJustSymbols(); 285 286 InputSectionBase *getRelocTarget(uint32_t idx, const Elf_Shdr &sec, 287 uint32_t info); 288 InputSectionBase *createInputSection(uint32_t idx, const Elf_Shdr &sec, 289 StringRef name); 290 291 bool shouldMerge(const Elf_Shdr &sec, StringRef name); 292 293 // Each ELF symbol contains a section index which the symbol belongs to. 294 // However, because the number of bits dedicated for that is limited, a 295 // symbol can directly point to a section only when the section index is 296 // equal to or smaller than 65280. 297 // 298 // If an object file contains more than 65280 sections, the file must 299 // contain .symtab_shndx section. The section contains an array of 300 // 32-bit integers whose size is the same as the number of symbols. 301 // Nth symbol's section index is in the Nth entry of .symtab_shndx. 302 // 303 // The following variable contains the contents of .symtab_shndx. 304 // If the section does not exist (which is common), the array is empty. 305 ArrayRef<Elf_Word> shndxTable; 306 307 // Storage for local symbols. 308 std::unique_ptr<SymbolUnion[]> localSymStorage; 309 310 // Debugging information to retrieve source file and line for error 311 // reporting. Linker may find reasonable number of errors in a 312 // single object file, so we cache debugging information in order to 313 // parse it only once for each object file we link. 314 std::unique_ptr<DWARFCache> dwarf; 315 llvm::once_flag initDwarf; 316 }; 317 318 class BitcodeFile : public InputFile { 319 public: 320 BitcodeFile(MemoryBufferRef m, StringRef archiveName, 321 uint64_t offsetInArchive, bool lazy); 322 static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; } 323 template <class ELFT> void parse(); 324 void parseLazy(); 325 void postParse(); 326 std::unique_ptr<llvm::lto::InputFile> obj; 327 std::vector<bool> keptComdats; 328 }; 329 330 // .so file. 331 class SharedFile : public ELFFileBase { 332 public: 333 SharedFile(MemoryBufferRef m, StringRef defaultSoName) 334 : ELFFileBase(SharedKind, m), soName(defaultSoName), 335 isNeeded(!config->asNeeded) {} 336 337 // This is actually a vector of Elf_Verdef pointers. 338 SmallVector<const void *, 0> verdefs; 339 340 // If the output file needs Elf_Verneed data structures for this file, this is 341 // a vector of Elf_Vernaux version identifiers that map onto the entries in 342 // Verdefs, otherwise it is empty. 343 SmallVector<uint32_t, 0> vernauxs; 344 345 static unsigned vernauxNum; 346 347 SmallVector<StringRef, 0> dtNeeded; 348 StringRef soName; 349 350 static bool classof(const InputFile *f) { return f->kind() == SharedKind; } 351 352 template <typename ELFT> void parse(); 353 354 // Used for --as-needed 355 bool isNeeded; 356 357 // Non-weak undefined symbols which are not yet resolved when the SO is 358 // parsed. Only filled for `--no-allow-shlib-undefined`. 359 SmallVector<Symbol *, 0> requiredSymbols; 360 361 private: 362 template <typename ELFT> 363 std::vector<uint32_t> parseVerneed(const llvm::object::ELFFile<ELFT> &obj, 364 const typename ELFT::Shdr *sec); 365 }; 366 367 class BinaryFile : public InputFile { 368 public: 369 explicit BinaryFile(MemoryBufferRef m) : InputFile(BinaryKind, m) {} 370 static bool classof(const InputFile *f) { return f->kind() == BinaryKind; } 371 void parse(); 372 }; 373 374 ELFFileBase *createObjFile(MemoryBufferRef mb, StringRef archiveName = "", 375 bool lazy = false); 376 377 std::string replaceThinLTOSuffix(StringRef path); 378 379 } // namespace elf 380 } // namespace lld 381 382 #endif 383