1 //===- InputFiles.h ---------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_ELF_INPUT_FILES_H 10 #define LLD_ELF_INPUT_FILES_H 11 12 #include "Config.h" 13 #include "lld/Common/ErrorHandler.h" 14 #include "lld/Common/LLVM.h" 15 #include "lld/Common/Reproduce.h" 16 #include "llvm/ADT/CachedHashString.h" 17 #include "llvm/ADT/DenseSet.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/IR/Comdat.h" 20 #include "llvm/Object/Archive.h" 21 #include "llvm/Object/ELF.h" 22 #include "llvm/Object/IRObjectFile.h" 23 #include "llvm/Support/Threading.h" 24 #include <map> 25 26 namespace llvm { 27 struct DILineInfo; 28 class TarWriter; 29 namespace lto { 30 class InputFile; 31 } 32 } // namespace llvm 33 34 namespace lld { 35 class DWARFCache; 36 37 // Returns "<internal>", "foo.a(bar.o)" or "baz.o". 38 std::string toString(const elf::InputFile *f); 39 40 namespace elf { 41 42 using llvm::object::Archive; 43 44 class Symbol; 45 46 // If -reproduce option is given, all input files are written 47 // to this tar archive. 48 extern std::unique_ptr<llvm::TarWriter> tar; 49 50 // Opens a given file. 51 llvm::Optional<MemoryBufferRef> readFile(StringRef path); 52 53 // Add symbols in File to the symbol table. 54 void parseFile(InputFile *file); 55 56 // The root class of input files. 57 class InputFile { 58 public: 59 enum Kind { 60 ObjKind, 61 SharedKind, 62 LazyObjKind, 63 ArchiveKind, 64 BitcodeKind, 65 BinaryKind, 66 }; 67 68 Kind kind() const { return fileKind; } 69 70 bool isElf() const { 71 Kind k = kind(); 72 return k == ObjKind || k == SharedKind; 73 } 74 75 StringRef getName() const { return mb.getBufferIdentifier(); } 76 MemoryBufferRef mb; 77 78 // Returns sections. It is a runtime error to call this function 79 // on files that don't have the notion of sections. 80 ArrayRef<InputSectionBase *> getSections() const { 81 assert(fileKind == ObjKind || fileKind == BinaryKind); 82 return sections; 83 } 84 85 // Returns object file symbols. It is a runtime error to call this 86 // function on files of other types. 87 ArrayRef<Symbol *> getSymbols() { return getMutableSymbols(); } 88 89 MutableArrayRef<Symbol *> getMutableSymbols() { 90 assert(fileKind == BinaryKind || fileKind == ObjKind || 91 fileKind == BitcodeKind); 92 return symbols; 93 } 94 95 // Filename of .a which contained this file. If this file was 96 // not in an archive file, it is the empty string. We use this 97 // string for creating error messages. 98 std::string archiveName; 99 100 // If this is an architecture-specific file, the following members 101 // have ELF type (i.e. ELF{32,64}{LE,BE}) and target machine type. 102 ELFKind ekind = ELFNoneKind; 103 uint16_t emachine = llvm::ELF::EM_NONE; 104 uint8_t osabi = 0; 105 uint8_t abiVersion = 0; 106 107 // Cache for toString(). Only toString() should use this member. 108 mutable std::string toStringCache; 109 110 std::string getSrcMsg(const Symbol &sym, InputSectionBase &sec, 111 uint64_t offset); 112 113 // True if this is an argument for --just-symbols. Usually false. 114 bool justSymbols = false; 115 116 // outSecOff of .got2 in the current file. This is used by PPC32 -fPIC/-fPIE 117 // to compute offsets in PLT call stubs. 118 uint32_t ppc32Got2OutSecOff = 0; 119 120 // On PPC64 we need to keep track of which files contain small code model 121 // relocations that access the .toc section. To minimize the chance of a 122 // relocation overflow, files that do contain said relocations should have 123 // their .toc sections sorted closer to the .got section than files that do 124 // not contain any small code model relocations. Thats because the toc-pointer 125 // is defined to point at .got + 0x8000 and the instructions used with small 126 // code model relocations support immediates in the range [-0x8000, 0x7FFC], 127 // making the addressable range relative to the toc pointer 128 // [.got, .got + 0xFFFC]. 129 bool ppc64SmallCodeModelTocRelocs = false; 130 131 // groupId is used for --warn-backrefs which is an optional error 132 // checking feature. All files within the same --{start,end}-group or 133 // --{start,end}-lib get the same group ID. Otherwise, each file gets a new 134 // group ID. For more info, see checkDependency() in SymbolTable.cpp. 135 uint32_t groupId; 136 static bool isInGroup; 137 static uint32_t nextGroupId; 138 139 // Index of MIPS GOT built for this file. 140 llvm::Optional<size_t> mipsGotIndex; 141 142 std::vector<Symbol *> symbols; 143 144 protected: 145 InputFile(Kind k, MemoryBufferRef m); 146 std::vector<InputSectionBase *> sections; 147 148 private: 149 const Kind fileKind; 150 }; 151 152 class ELFFileBase : public InputFile { 153 public: 154 ELFFileBase(Kind k, MemoryBufferRef m); 155 static bool classof(const InputFile *f) { return f->isElf(); } 156 157 template <typename ELFT> llvm::object::ELFFile<ELFT> getObj() const { 158 return check(llvm::object::ELFFile<ELFT>::create(mb.getBuffer())); 159 } 160 161 StringRef getStringTable() const { return stringTable; } 162 163 template <typename ELFT> typename ELFT::SymRange getELFSyms() const { 164 return typename ELFT::SymRange( 165 reinterpret_cast<const typename ELFT::Sym *>(elfSyms), numELFSyms); 166 } 167 template <typename ELFT> typename ELFT::SymRange getGlobalELFSyms() const { 168 return getELFSyms<ELFT>().slice(firstGlobal); 169 } 170 171 protected: 172 // Initializes this class's member variables. 173 template <typename ELFT> void init(); 174 175 const void *elfSyms = nullptr; 176 size_t numELFSyms = 0; 177 uint32_t firstGlobal = 0; 178 StringRef stringTable; 179 }; 180 181 // .o file. 182 template <class ELFT> class ObjFile : public ELFFileBase { 183 using Elf_Rel = typename ELFT::Rel; 184 using Elf_Rela = typename ELFT::Rela; 185 using Elf_Sym = typename ELFT::Sym; 186 using Elf_Shdr = typename ELFT::Shdr; 187 using Elf_Word = typename ELFT::Word; 188 using Elf_CGProfile = typename ELFT::CGProfile; 189 190 public: 191 static bool classof(const InputFile *f) { return f->kind() == ObjKind; } 192 193 llvm::object::ELFFile<ELFT> getObj() const { 194 return this->ELFFileBase::getObj<ELFT>(); 195 } 196 197 ArrayRef<Symbol *> getLocalSymbols(); 198 ArrayRef<Symbol *> getGlobalSymbols(); 199 200 ObjFile(MemoryBufferRef m, StringRef archiveName) : ELFFileBase(ObjKind, m) { 201 this->archiveName = std::string(archiveName); 202 } 203 204 void parse(bool ignoreComdats = false); 205 206 StringRef getShtGroupSignature(ArrayRef<Elf_Shdr> sections, 207 const Elf_Shdr &sec); 208 209 Symbol &getSymbol(uint32_t symbolIndex) const { 210 if (symbolIndex >= this->symbols.size()) 211 fatal(toString(this) + ": invalid symbol index"); 212 return *this->symbols[symbolIndex]; 213 } 214 215 uint32_t getSectionIndex(const Elf_Sym &sym) const; 216 217 template <typename RelT> Symbol &getRelocTargetSym(const RelT &rel) const { 218 uint32_t symIndex = rel.getSymbol(config->isMips64EL); 219 return getSymbol(symIndex); 220 } 221 222 llvm::Optional<llvm::DILineInfo> getDILineInfo(InputSectionBase *, uint64_t); 223 llvm::Optional<std::pair<std::string, unsigned>> getVariableLoc(StringRef name); 224 225 // MIPS GP0 value defined by this file. This value represents the gp value 226 // used to create the relocatable object and required to support 227 // R_MIPS_GPREL16 / R_MIPS_GPREL32 relocations. 228 uint32_t mipsGp0 = 0; 229 230 uint32_t andFeatures = 0; 231 232 // Name of source file obtained from STT_FILE symbol value, 233 // or empty string if there is no such symbol in object file 234 // symbol table. 235 StringRef sourceFile; 236 237 // True if the file defines functions compiled with 238 // -fsplit-stack. Usually false. 239 bool splitStack = false; 240 241 // True if the file defines functions compiled with -fsplit-stack, 242 // but had one or more functions with the no_split_stack attribute. 243 bool someNoSplitStack = false; 244 245 // Pointer to this input file's .llvm_addrsig section, if it has one. 246 const Elf_Shdr *addrsigSec = nullptr; 247 248 // SHT_LLVM_CALL_GRAPH_PROFILE table 249 ArrayRef<Elf_CGProfile> cgProfile; 250 251 // Get cached DWARF information. 252 DWARFCache *getDwarf(); 253 254 private: 255 void initializeSections(bool ignoreComdats); 256 void initializeSymbols(); 257 void initializeJustSymbols(); 258 259 InputSectionBase *getRelocTarget(const Elf_Shdr &sec); 260 InputSectionBase *createInputSection(const Elf_Shdr &sec); 261 StringRef getSectionName(const Elf_Shdr &sec); 262 263 bool shouldMerge(const Elf_Shdr &sec, StringRef name); 264 265 // Each ELF symbol contains a section index which the symbol belongs to. 266 // However, because the number of bits dedicated for that is limited, a 267 // symbol can directly point to a section only when the section index is 268 // equal to or smaller than 65280. 269 // 270 // If an object file contains more than 65280 sections, the file must 271 // contain .symtab_shndx section. The section contains an array of 272 // 32-bit integers whose size is the same as the number of symbols. 273 // Nth symbol's section index is in the Nth entry of .symtab_shndx. 274 // 275 // The following variable contains the contents of .symtab_shndx. 276 // If the section does not exist (which is common), the array is empty. 277 ArrayRef<Elf_Word> shndxTable; 278 279 // .shstrtab contents. 280 StringRef sectionStringTable; 281 282 // Debugging information to retrieve source file and line for error 283 // reporting. Linker may find reasonable number of errors in a 284 // single object file, so we cache debugging information in order to 285 // parse it only once for each object file we link. 286 std::unique_ptr<DWARFCache> dwarf; 287 llvm::once_flag initDwarf; 288 }; 289 290 // LazyObjFile is analogous to ArchiveFile in the sense that 291 // the file contains lazy symbols. The difference is that 292 // LazyObjFile wraps a single file instead of multiple files. 293 // 294 // This class is used for --start-lib and --end-lib options which 295 // instruct the linker to link object files between them with the 296 // archive file semantics. 297 class LazyObjFile : public InputFile { 298 public: 299 LazyObjFile(MemoryBufferRef m, StringRef archiveName, 300 uint64_t offsetInArchive) 301 : InputFile(LazyObjKind, m), offsetInArchive(offsetInArchive) { 302 this->archiveName = std::string(archiveName); 303 } 304 305 static bool classof(const InputFile *f) { return f->kind() == LazyObjKind; } 306 307 template <class ELFT> void parse(); 308 void fetch(); 309 310 bool fetched = false; 311 312 private: 313 uint64_t offsetInArchive; 314 }; 315 316 // An ArchiveFile object represents a .a file. 317 class ArchiveFile : public InputFile { 318 public: 319 explicit ArchiveFile(std::unique_ptr<Archive> &&file); 320 static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; } 321 void parse(); 322 323 // Pulls out an object file that contains a definition for Sym and 324 // returns it. If the same file was instantiated before, this 325 // function does nothing (so we don't instantiate the same file 326 // more than once.) 327 void fetch(const Archive::Symbol &sym); 328 329 size_t getMemberCount() const; 330 size_t getFetchedMemberCount() const { return seen.size(); } 331 332 bool parsed = false; 333 334 private: 335 std::unique_ptr<Archive> file; 336 llvm::DenseSet<uint64_t> seen; 337 }; 338 339 class BitcodeFile : public InputFile { 340 public: 341 BitcodeFile(MemoryBufferRef m, StringRef archiveName, 342 uint64_t offsetInArchive); 343 static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; } 344 template <class ELFT> void parse(); 345 std::unique_ptr<llvm::lto::InputFile> obj; 346 }; 347 348 // .so file. 349 class SharedFile : public ELFFileBase { 350 public: 351 SharedFile(MemoryBufferRef m, StringRef defaultSoName) 352 : ELFFileBase(SharedKind, m), soName(std::string(defaultSoName)), 353 isNeeded(!config->asNeeded) {} 354 355 // This is actually a vector of Elf_Verdef pointers. 356 std::vector<const void *> verdefs; 357 358 // If the output file needs Elf_Verneed data structures for this file, this is 359 // a vector of Elf_Vernaux version identifiers that map onto the entries in 360 // Verdefs, otherwise it is empty. 361 std::vector<unsigned> vernauxs; 362 363 static unsigned vernauxNum; 364 365 std::vector<StringRef> dtNeeded; 366 std::string soName; 367 368 static bool classof(const InputFile *f) { return f->kind() == SharedKind; } 369 370 template <typename ELFT> void parse(); 371 372 // Used for --no-allow-shlib-undefined. 373 bool allNeededIsKnown; 374 375 // Used for --as-needed 376 bool isNeeded; 377 378 private: 379 template <typename ELFT> 380 std::vector<uint32_t> parseVerneed(const llvm::object::ELFFile<ELFT> &obj, 381 const typename ELFT::Shdr *sec); 382 }; 383 384 class BinaryFile : public InputFile { 385 public: 386 explicit BinaryFile(MemoryBufferRef m) : InputFile(BinaryKind, m) {} 387 static bool classof(const InputFile *f) { return f->kind() == BinaryKind; } 388 void parse(); 389 }; 390 391 InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName = "", 392 uint64_t offsetInArchive = 0); 393 394 inline bool isBitcode(MemoryBufferRef mb) { 395 return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode; 396 } 397 398 std::string replaceThinLTOSuffix(StringRef path); 399 400 extern std::vector<ArchiveFile *> archiveFiles; 401 extern std::vector<BinaryFile *> binaryFiles; 402 extern std::vector<BitcodeFile *> bitcodeFiles; 403 extern std::vector<LazyObjFile *> lazyObjFiles; 404 extern std::vector<InputFile *> objectFiles; 405 extern std::vector<SharedFile *> sharedFiles; 406 407 } // namespace elf 408 } // namespace lld 409 410 #endif 411