1 //===- InputFiles.h ---------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_ELF_INPUT_FILES_H 10 #define LLD_ELF_INPUT_FILES_H 11 12 #include "Config.h" 13 #include "lld/Common/ErrorHandler.h" 14 #include "lld/Common/LLVM.h" 15 #include "lld/Common/Reproduce.h" 16 #include "llvm/ADT/CachedHashString.h" 17 #include "llvm/ADT/DenseSet.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/IR/Comdat.h" 20 #include "llvm/Object/Archive.h" 21 #include "llvm/Object/ELF.h" 22 #include "llvm/Object/IRObjectFile.h" 23 #include "llvm/Support/Threading.h" 24 #include <map> 25 26 namespace llvm { 27 struct DILineInfo; 28 class TarWriter; 29 namespace lto { 30 class InputFile; 31 } 32 } // namespace llvm 33 34 namespace lld { 35 class DWARFCache; 36 37 // Returns "<internal>", "foo.a(bar.o)" or "baz.o". 38 std::string toString(const elf::InputFile *f); 39 40 namespace elf { 41 42 using llvm::object::Archive; 43 44 class Symbol; 45 46 // If -reproduce option is given, all input files are written 47 // to this tar archive. 48 extern std::unique_ptr<llvm::TarWriter> tar; 49 50 // Opens a given file. 51 llvm::Optional<MemoryBufferRef> readFile(StringRef path); 52 53 // Add symbols in File to the symbol table. 54 void parseFile(InputFile *file); 55 56 // The root class of input files. 57 class InputFile { 58 public: 59 enum Kind { 60 ObjKind, 61 SharedKind, 62 LazyObjKind, 63 ArchiveKind, 64 BitcodeKind, 65 BinaryKind, 66 }; 67 68 Kind kind() const { return fileKind; } 69 70 bool isElf() const { 71 Kind k = kind(); 72 return k == ObjKind || k == SharedKind; 73 } 74 75 StringRef getName() const { return mb.getBufferIdentifier(); } 76 MemoryBufferRef mb; 77 78 // Returns sections. It is a runtime error to call this function 79 // on files that don't have the notion of sections. 80 ArrayRef<InputSectionBase *> getSections() const { 81 assert(fileKind == ObjKind || fileKind == BinaryKind); 82 return sections; 83 } 84 85 // Returns object file symbols. It is a runtime error to call this 86 // function on files of other types. 87 ArrayRef<Symbol *> getSymbols() { return getMutableSymbols(); } 88 89 MutableArrayRef<Symbol *> getMutableSymbols() { 90 assert(fileKind == BinaryKind || fileKind == ObjKind || 91 fileKind == BitcodeKind); 92 return symbols; 93 } 94 95 // Get filename to use for linker script processing. 96 StringRef getNameForScript() const; 97 98 // If not empty, this stores the name of the archive containing this file. 99 // We use this string for creating error messages. 100 std::string archiveName; 101 102 // If this is an architecture-specific file, the following members 103 // have ELF type (i.e. ELF{32,64}{LE,BE}) and target machine type. 104 ELFKind ekind = ELFNoneKind; 105 uint16_t emachine = llvm::ELF::EM_NONE; 106 uint8_t osabi = 0; 107 uint8_t abiVersion = 0; 108 109 // Cache for toString(). Only toString() should use this member. 110 mutable std::string toStringCache; 111 112 std::string getSrcMsg(const Symbol &sym, InputSectionBase &sec, 113 uint64_t offset); 114 115 // True if this is an argument for --just-symbols. Usually false. 116 bool justSymbols = false; 117 118 // outSecOff of .got2 in the current file. This is used by PPC32 -fPIC/-fPIE 119 // to compute offsets in PLT call stubs. 120 uint32_t ppc32Got2OutSecOff = 0; 121 122 // On PPC64 we need to keep track of which files contain small code model 123 // relocations that access the .toc section. To minimize the chance of a 124 // relocation overflow, files that do contain said relocations should have 125 // their .toc sections sorted closer to the .got section than files that do 126 // not contain any small code model relocations. Thats because the toc-pointer 127 // is defined to point at .got + 0x8000 and the instructions used with small 128 // code model relocations support immediates in the range [-0x8000, 0x7FFC], 129 // making the addressable range relative to the toc pointer 130 // [.got, .got + 0xFFFC]. 131 bool ppc64SmallCodeModelTocRelocs = false; 132 133 // True if the file has TLSGD/TLSLD GOT relocations without R_PPC64_TLSGD or 134 // R_PPC64_TLSLD. Disable TLS relaxation to avoid bad code generation. 135 bool ppc64DisableTLSRelax = false; 136 137 // groupId is used for --warn-backrefs which is an optional error 138 // checking feature. All files within the same --{start,end}-group or 139 // --{start,end}-lib get the same group ID. Otherwise, each file gets a new 140 // group ID. For more info, see checkDependency() in SymbolTable.cpp. 141 uint32_t groupId; 142 static bool isInGroup; 143 static uint32_t nextGroupId; 144 145 // Index of MIPS GOT built for this file. 146 llvm::Optional<size_t> mipsGotIndex; 147 148 std::vector<Symbol *> symbols; 149 150 protected: 151 InputFile(Kind k, MemoryBufferRef m); 152 std::vector<InputSectionBase *> sections; 153 154 private: 155 const Kind fileKind; 156 157 // Cache for getNameForScript(). 158 mutable std::string nameForScriptCache; 159 }; 160 161 class ELFFileBase : public InputFile { 162 public: 163 ELFFileBase(Kind k, MemoryBufferRef m); 164 static bool classof(const InputFile *f) { return f->isElf(); } 165 166 template <typename ELFT> llvm::object::ELFFile<ELFT> getObj() const { 167 return check(llvm::object::ELFFile<ELFT>::create(mb.getBuffer())); 168 } 169 170 StringRef getStringTable() const { return stringTable; } 171 172 template <typename ELFT> typename ELFT::SymRange getELFSyms() const { 173 return typename ELFT::SymRange( 174 reinterpret_cast<const typename ELFT::Sym *>(elfSyms), numELFSyms); 175 } 176 template <typename ELFT> typename ELFT::SymRange getGlobalELFSyms() const { 177 return getELFSyms<ELFT>().slice(firstGlobal); 178 } 179 180 protected: 181 // Initializes this class's member variables. 182 template <typename ELFT> void init(); 183 184 const void *elfSyms = nullptr; 185 size_t numELFSyms = 0; 186 uint32_t firstGlobal = 0; 187 StringRef stringTable; 188 }; 189 190 // .o file. 191 template <class ELFT> class ObjFile : public ELFFileBase { 192 LLVM_ELF_IMPORT_TYPES_ELFT(ELFT) 193 194 public: 195 static bool classof(const InputFile *f) { return f->kind() == ObjKind; } 196 197 llvm::object::ELFFile<ELFT> getObj() const { 198 return this->ELFFileBase::getObj<ELFT>(); 199 } 200 201 ArrayRef<Symbol *> getLocalSymbols(); 202 ArrayRef<Symbol *> getGlobalSymbols(); 203 204 ObjFile(MemoryBufferRef m, StringRef archiveName) : ELFFileBase(ObjKind, m) { 205 this->archiveName = std::string(archiveName); 206 } 207 208 void parse(bool ignoreComdats = false); 209 210 StringRef getShtGroupSignature(ArrayRef<Elf_Shdr> sections, 211 const Elf_Shdr &sec); 212 213 Symbol &getSymbol(uint32_t symbolIndex) const { 214 if (symbolIndex >= this->symbols.size()) 215 fatal(toString(this) + ": invalid symbol index"); 216 return *this->symbols[symbolIndex]; 217 } 218 219 uint32_t getSectionIndex(const Elf_Sym &sym) const; 220 221 template <typename RelT> Symbol &getRelocTargetSym(const RelT &rel) const { 222 uint32_t symIndex = rel.getSymbol(config->isMips64EL); 223 return getSymbol(symIndex); 224 } 225 226 llvm::Optional<llvm::DILineInfo> getDILineInfo(InputSectionBase *, uint64_t); 227 llvm::Optional<std::pair<std::string, unsigned>> getVariableLoc(StringRef name); 228 229 // MIPS GP0 value defined by this file. This value represents the gp value 230 // used to create the relocatable object and required to support 231 // R_MIPS_GPREL16 / R_MIPS_GPREL32 relocations. 232 uint32_t mipsGp0 = 0; 233 234 uint32_t andFeatures = 0; 235 236 // Name of source file obtained from STT_FILE symbol value, 237 // or empty string if there is no such symbol in object file 238 // symbol table. 239 StringRef sourceFile; 240 241 // True if the file defines functions compiled with 242 // -fsplit-stack. Usually false. 243 bool splitStack = false; 244 245 // True if the file defines functions compiled with -fsplit-stack, 246 // but had one or more functions with the no_split_stack attribute. 247 bool someNoSplitStack = false; 248 249 // Pointer to this input file's .llvm_addrsig section, if it has one. 250 const Elf_Shdr *addrsigSec = nullptr; 251 252 // SHT_LLVM_CALL_GRAPH_PROFILE section index. 253 uint32_t cgProfileSectionIndex = 0; 254 255 // Get cached DWARF information. 256 DWARFCache *getDwarf(); 257 258 private: 259 void initializeSections(bool ignoreComdats); 260 void initializeSymbols(); 261 void initializeJustSymbols(); 262 263 InputSectionBase *getRelocTarget(const Elf_Shdr &sec); 264 InputSectionBase *createInputSection(const Elf_Shdr &sec); 265 StringRef getSectionName(const Elf_Shdr &sec); 266 267 bool shouldMerge(const Elf_Shdr &sec, StringRef name); 268 269 // Each ELF symbol contains a section index which the symbol belongs to. 270 // However, because the number of bits dedicated for that is limited, a 271 // symbol can directly point to a section only when the section index is 272 // equal to or smaller than 65280. 273 // 274 // If an object file contains more than 65280 sections, the file must 275 // contain .symtab_shndx section. The section contains an array of 276 // 32-bit integers whose size is the same as the number of symbols. 277 // Nth symbol's section index is in the Nth entry of .symtab_shndx. 278 // 279 // The following variable contains the contents of .symtab_shndx. 280 // If the section does not exist (which is common), the array is empty. 281 ArrayRef<Elf_Word> shndxTable; 282 283 // .shstrtab contents. 284 StringRef sectionStringTable; 285 286 // Debugging information to retrieve source file and line for error 287 // reporting. Linker may find reasonable number of errors in a 288 // single object file, so we cache debugging information in order to 289 // parse it only once for each object file we link. 290 std::unique_ptr<DWARFCache> dwarf; 291 llvm::once_flag initDwarf; 292 }; 293 294 // LazyObjFile is analogous to ArchiveFile in the sense that 295 // the file contains lazy symbols. The difference is that 296 // LazyObjFile wraps a single file instead of multiple files. 297 // 298 // This class is used for --start-lib and --end-lib options which 299 // instruct the linker to link object files between them with the 300 // archive file semantics. 301 class LazyObjFile : public InputFile { 302 public: 303 LazyObjFile(MemoryBufferRef m, StringRef archiveName, 304 uint64_t offsetInArchive) 305 : InputFile(LazyObjKind, m), offsetInArchive(offsetInArchive) { 306 this->archiveName = std::string(archiveName); 307 } 308 309 static bool classof(const InputFile *f) { return f->kind() == LazyObjKind; } 310 311 template <class ELFT> void parse(); 312 void fetch(); 313 314 // Check if a non-common symbol should be fetched to override a common 315 // definition. 316 bool shouldFetchForCommon(const StringRef &name); 317 318 bool fetched = false; 319 320 private: 321 uint64_t offsetInArchive; 322 }; 323 324 // An ArchiveFile object represents a .a file. 325 class ArchiveFile : public InputFile { 326 public: 327 explicit ArchiveFile(std::unique_ptr<Archive> &&file); 328 static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; } 329 void parse(); 330 331 // Pulls out an object file that contains a definition for Sym and 332 // returns it. If the same file was instantiated before, this 333 // function does nothing (so we don't instantiate the same file 334 // more than once.) 335 void fetch(const Archive::Symbol &sym); 336 337 // Check if a non-common symbol should be fetched to override a common 338 // definition. 339 bool shouldFetchForCommon(const Archive::Symbol &sym); 340 341 size_t getMemberCount() const; 342 size_t getFetchedMemberCount() const { return seen.size(); } 343 344 bool parsed = false; 345 346 private: 347 std::unique_ptr<Archive> file; 348 llvm::DenseSet<uint64_t> seen; 349 }; 350 351 class BitcodeFile : public InputFile { 352 public: 353 BitcodeFile(MemoryBufferRef m, StringRef archiveName, 354 uint64_t offsetInArchive); 355 static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; } 356 template <class ELFT> void parse(); 357 std::unique_ptr<llvm::lto::InputFile> obj; 358 }; 359 360 // .so file. 361 class SharedFile : public ELFFileBase { 362 public: 363 SharedFile(MemoryBufferRef m, StringRef defaultSoName) 364 : ELFFileBase(SharedKind, m), soName(std::string(defaultSoName)), 365 isNeeded(!config->asNeeded) {} 366 367 // This is actually a vector of Elf_Verdef pointers. 368 std::vector<const void *> verdefs; 369 370 // If the output file needs Elf_Verneed data structures for this file, this is 371 // a vector of Elf_Vernaux version identifiers that map onto the entries in 372 // Verdefs, otherwise it is empty. 373 std::vector<unsigned> vernauxs; 374 375 static unsigned vernauxNum; 376 377 std::vector<StringRef> dtNeeded; 378 std::string soName; 379 380 static bool classof(const InputFile *f) { return f->kind() == SharedKind; } 381 382 template <typename ELFT> void parse(); 383 384 // Used for --as-needed 385 bool isNeeded; 386 387 // Non-weak undefined symbols which are not yet resolved when the SO is 388 // parsed. Only filled for `--no-allow-shlib-undefined`. 389 std::vector<Symbol *> requiredSymbols; 390 391 private: 392 template <typename ELFT> 393 std::vector<uint32_t> parseVerneed(const llvm::object::ELFFile<ELFT> &obj, 394 const typename ELFT::Shdr *sec); 395 }; 396 397 class BinaryFile : public InputFile { 398 public: 399 explicit BinaryFile(MemoryBufferRef m) : InputFile(BinaryKind, m) {} 400 static bool classof(const InputFile *f) { return f->kind() == BinaryKind; } 401 void parse(); 402 }; 403 404 InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName = "", 405 uint64_t offsetInArchive = 0); 406 407 inline bool isBitcode(MemoryBufferRef mb) { 408 return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode; 409 } 410 411 std::string replaceThinLTOSuffix(StringRef path); 412 413 extern std::vector<ArchiveFile *> archiveFiles; 414 extern std::vector<BinaryFile *> binaryFiles; 415 extern std::vector<BitcodeFile *> bitcodeFiles; 416 extern std::vector<LazyObjFile *> lazyObjFiles; 417 extern std::vector<InputFile *> objectFiles; 418 extern std::vector<SharedFile *> sharedFiles; 419 420 } // namespace elf 421 } // namespace lld 422 423 #endif 424