1 //===- InputFiles.h ---------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_ELF_INPUT_FILES_H 10 #define LLD_ELF_INPUT_FILES_H 11 12 #include "Config.h" 13 #include "lld/Common/ErrorHandler.h" 14 #include "lld/Common/LLVM.h" 15 #include "lld/Common/Reproduce.h" 16 #include "llvm/ADT/CachedHashString.h" 17 #include "llvm/ADT/DenseSet.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/IR/Comdat.h" 20 #include "llvm/Object/Archive.h" 21 #include "llvm/Object/ELF.h" 22 #include "llvm/Object/IRObjectFile.h" 23 #include "llvm/Support/Threading.h" 24 #include <map> 25 26 namespace llvm { 27 struct DILineInfo; 28 class TarWriter; 29 namespace lto { 30 class InputFile; 31 } 32 } // namespace llvm 33 34 namespace lld { 35 class DWARFCache; 36 37 // Returns "<internal>", "foo.a(bar.o)" or "baz.o". 38 std::string toString(const elf::InputFile *f); 39 40 namespace elf { 41 class InputFile; 42 class InputSectionBase; 43 44 using llvm::object::Archive; 45 46 class Symbol; 47 48 // If -reproduce option is given, all input files are written 49 // to this tar archive. 50 extern std::unique_ptr<llvm::TarWriter> tar; 51 52 // Opens a given file. 53 llvm::Optional<MemoryBufferRef> readFile(StringRef path); 54 55 // Add symbols in File to the symbol table. 56 void parseFile(InputFile *file); 57 58 // The root class of input files. 59 class InputFile { 60 public: 61 enum Kind { 62 ObjKind, 63 SharedKind, 64 LazyObjKind, 65 ArchiveKind, 66 BitcodeKind, 67 BinaryKind, 68 }; 69 70 Kind kind() const { return fileKind; } 71 72 bool isElf() const { 73 Kind k = kind(); 74 return k == ObjKind || k == SharedKind; 75 } 76 77 StringRef getName() const { return mb.getBufferIdentifier(); } 78 MemoryBufferRef mb; 79 80 // Returns sections. It is a runtime error to call this function 81 // on files that don't have the notion of sections. 82 ArrayRef<InputSectionBase *> getSections() const { 83 assert(fileKind == ObjKind || fileKind == BinaryKind); 84 return sections; 85 } 86 87 // Returns object file symbols. It is a runtime error to call this 88 // function on files of other types. 89 ArrayRef<Symbol *> getSymbols() { return getMutableSymbols(); } 90 91 MutableArrayRef<Symbol *> getMutableSymbols() { 92 assert(fileKind == BinaryKind || fileKind == ObjKind || 93 fileKind == BitcodeKind); 94 return symbols; 95 } 96 97 // Filename of .a which contained this file. If this file was 98 // not in an archive file, it is the empty string. We use this 99 // string for creating error messages. 100 std::string archiveName; 101 102 // If this is an architecture-specific file, the following members 103 // have ELF type (i.e. ELF{32,64}{LE,BE}) and target machine type. 104 ELFKind ekind = ELFNoneKind; 105 uint16_t emachine = llvm::ELF::EM_NONE; 106 uint8_t osabi = 0; 107 uint8_t abiVersion = 0; 108 109 // Cache for toString(). Only toString() should use this member. 110 mutable std::string toStringCache; 111 112 std::string getSrcMsg(const Symbol &sym, InputSectionBase &sec, 113 uint64_t offset); 114 115 // True if this is an argument for --just-symbols. Usually false. 116 bool justSymbols = false; 117 118 // outSecOff of .got2 in the current file. This is used by PPC32 -fPIC/-fPIE 119 // to compute offsets in PLT call stubs. 120 uint32_t ppc32Got2OutSecOff = 0; 121 122 // On PPC64 we need to keep track of which files contain small code model 123 // relocations that access the .toc section. To minimize the chance of a 124 // relocation overflow, files that do contain said relocations should have 125 // their .toc sections sorted closer to the .got section than files that do 126 // not contain any small code model relocations. Thats because the toc-pointer 127 // is defined to point at .got + 0x8000 and the instructions used with small 128 // code model relocations support immediates in the range [-0x8000, 0x7FFC], 129 // making the addressable range relative to the toc pointer 130 // [.got, .got + 0xFFFC]. 131 bool ppc64SmallCodeModelTocRelocs = false; 132 133 // groupId is used for --warn-backrefs which is an optional error 134 // checking feature. All files within the same --{start,end}-group or 135 // --{start,end}-lib get the same group ID. Otherwise, each file gets a new 136 // group ID. For more info, see checkDependency() in SymbolTable.cpp. 137 uint32_t groupId; 138 static bool isInGroup; 139 static uint32_t nextGroupId; 140 141 // Index of MIPS GOT built for this file. 142 llvm::Optional<size_t> mipsGotIndex; 143 144 std::vector<Symbol *> symbols; 145 146 protected: 147 InputFile(Kind k, MemoryBufferRef m); 148 std::vector<InputSectionBase *> sections; 149 150 private: 151 const Kind fileKind; 152 }; 153 154 class ELFFileBase : public InputFile { 155 public: 156 ELFFileBase(Kind k, MemoryBufferRef m); 157 static bool classof(const InputFile *f) { return f->isElf(); } 158 159 template <typename ELFT> llvm::object::ELFFile<ELFT> getObj() const { 160 return check(llvm::object::ELFFile<ELFT>::create(mb.getBuffer())); 161 } 162 163 StringRef getStringTable() const { return stringTable; } 164 165 template <typename ELFT> typename ELFT::SymRange getELFSyms() const { 166 return typename ELFT::SymRange( 167 reinterpret_cast<const typename ELFT::Sym *>(elfSyms), numELFSyms); 168 } 169 template <typename ELFT> typename ELFT::SymRange getGlobalELFSyms() const { 170 return getELFSyms<ELFT>().slice(firstGlobal); 171 } 172 173 protected: 174 // Initializes this class's member variables. 175 template <typename ELFT> void init(); 176 177 const void *elfSyms = nullptr; 178 size_t numELFSyms = 0; 179 uint32_t firstGlobal = 0; 180 StringRef stringTable; 181 }; 182 183 // .o file. 184 template <class ELFT> class ObjFile : public ELFFileBase { 185 using Elf_Rel = typename ELFT::Rel; 186 using Elf_Rela = typename ELFT::Rela; 187 using Elf_Sym = typename ELFT::Sym; 188 using Elf_Shdr = typename ELFT::Shdr; 189 using Elf_Word = typename ELFT::Word; 190 using Elf_CGProfile = typename ELFT::CGProfile; 191 192 public: 193 static bool classof(const InputFile *f) { return f->kind() == ObjKind; } 194 195 llvm::object::ELFFile<ELFT> getObj() const { 196 return this->ELFFileBase::getObj<ELFT>(); 197 } 198 199 ArrayRef<Symbol *> getLocalSymbols(); 200 ArrayRef<Symbol *> getGlobalSymbols(); 201 202 ObjFile(MemoryBufferRef m, StringRef archiveName) : ELFFileBase(ObjKind, m) { 203 this->archiveName = archiveName; 204 } 205 206 void parse(bool ignoreComdats = false); 207 208 StringRef getShtGroupSignature(ArrayRef<Elf_Shdr> sections, 209 const Elf_Shdr &sec); 210 211 Symbol &getSymbol(uint32_t symbolIndex) const { 212 if (symbolIndex >= this->symbols.size()) 213 fatal(toString(this) + ": invalid symbol index"); 214 return *this->symbols[symbolIndex]; 215 } 216 217 uint32_t getSectionIndex(const Elf_Sym &sym) const; 218 219 template <typename RelT> Symbol &getRelocTargetSym(const RelT &rel) const { 220 uint32_t symIndex = rel.getSymbol(config->isMips64EL); 221 return getSymbol(symIndex); 222 } 223 224 llvm::Optional<llvm::DILineInfo> getDILineInfo(InputSectionBase *, uint64_t); 225 llvm::Optional<std::pair<std::string, unsigned>> getVariableLoc(StringRef name); 226 227 // MIPS GP0 value defined by this file. This value represents the gp value 228 // used to create the relocatable object and required to support 229 // R_MIPS_GPREL16 / R_MIPS_GPREL32 relocations. 230 uint32_t mipsGp0 = 0; 231 232 uint32_t andFeatures = 0; 233 234 // Name of source file obtained from STT_FILE symbol value, 235 // or empty string if there is no such symbol in object file 236 // symbol table. 237 StringRef sourceFile; 238 239 // True if the file defines functions compiled with 240 // -fsplit-stack. Usually false. 241 bool splitStack = false; 242 243 // True if the file defines functions compiled with -fsplit-stack, 244 // but had one or more functions with the no_split_stack attribute. 245 bool someNoSplitStack = false; 246 247 // Pointer to this input file's .llvm_addrsig section, if it has one. 248 const Elf_Shdr *addrsigSec = nullptr; 249 250 // SHT_LLVM_CALL_GRAPH_PROFILE table 251 ArrayRef<Elf_CGProfile> cgProfile; 252 253 private: 254 void initializeSections(bool ignoreComdats); 255 void initializeSymbols(); 256 void initializeJustSymbols(); 257 void initializeDwarf(); 258 InputSectionBase *getRelocTarget(const Elf_Shdr &sec); 259 InputSectionBase *createInputSection(const Elf_Shdr &sec); 260 StringRef getSectionName(const Elf_Shdr &sec); 261 262 bool shouldMerge(const Elf_Shdr &sec, StringRef name); 263 264 // Each ELF symbol contains a section index which the symbol belongs to. 265 // However, because the number of bits dedicated for that is limited, a 266 // symbol can directly point to a section only when the section index is 267 // equal to or smaller than 65280. 268 // 269 // If an object file contains more than 65280 sections, the file must 270 // contain .symtab_shndx section. The section contains an array of 271 // 32-bit integers whose size is the same as the number of symbols. 272 // Nth symbol's section index is in the Nth entry of .symtab_shndx. 273 // 274 // The following variable contains the contents of .symtab_shndx. 275 // If the section does not exist (which is common), the array is empty. 276 ArrayRef<Elf_Word> shndxTable; 277 278 // .shstrtab contents. 279 StringRef sectionStringTable; 280 281 // Debugging information to retrieve source file and line for error 282 // reporting. Linker may find reasonable number of errors in a 283 // single object file, so we cache debugging information in order to 284 // parse it only once for each object file we link. 285 DWARFCache *dwarf; 286 llvm::once_flag initDwarfLine; 287 }; 288 289 // LazyObjFile is analogous to ArchiveFile in the sense that 290 // the file contains lazy symbols. The difference is that 291 // LazyObjFile wraps a single file instead of multiple files. 292 // 293 // This class is used for --start-lib and --end-lib options which 294 // instruct the linker to link object files between them with the 295 // archive file semantics. 296 class LazyObjFile : public InputFile { 297 public: 298 LazyObjFile(MemoryBufferRef m, StringRef archiveName, 299 uint64_t offsetInArchive) 300 : InputFile(LazyObjKind, m), offsetInArchive(offsetInArchive) { 301 this->archiveName = archiveName; 302 } 303 304 static bool classof(const InputFile *f) { return f->kind() == LazyObjKind; } 305 306 template <class ELFT> void parse(); 307 void fetch(); 308 309 private: 310 uint64_t offsetInArchive; 311 }; 312 313 // An ArchiveFile object represents a .a file. 314 class ArchiveFile : public InputFile { 315 public: 316 explicit ArchiveFile(std::unique_ptr<Archive> &&file); 317 static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; } 318 void parse(); 319 320 // Pulls out an object file that contains a definition for Sym and 321 // returns it. If the same file was instantiated before, this 322 // function does nothing (so we don't instantiate the same file 323 // more than once.) 324 void fetch(const Archive::Symbol &sym); 325 326 private: 327 std::unique_ptr<Archive> file; 328 llvm::DenseSet<uint64_t> seen; 329 }; 330 331 class BitcodeFile : public InputFile { 332 public: 333 BitcodeFile(MemoryBufferRef m, StringRef archiveName, 334 uint64_t offsetInArchive); 335 static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; } 336 template <class ELFT> void parse(); 337 std::unique_ptr<llvm::lto::InputFile> obj; 338 }; 339 340 // .so file. 341 class SharedFile : public ELFFileBase { 342 public: 343 SharedFile(MemoryBufferRef m, StringRef defaultSoName) 344 : ELFFileBase(SharedKind, m), soName(defaultSoName), 345 isNeeded(!config->asNeeded) {} 346 347 // This is actually a vector of Elf_Verdef pointers. 348 std::vector<const void *> verdefs; 349 350 // If the output file needs Elf_Verneed data structures for this file, this is 351 // a vector of Elf_Vernaux version identifiers that map onto the entries in 352 // Verdefs, otherwise it is empty. 353 std::vector<unsigned> vernauxs; 354 355 static unsigned vernauxNum; 356 357 std::vector<StringRef> dtNeeded; 358 std::string soName; 359 360 static bool classof(const InputFile *f) { return f->kind() == SharedKind; } 361 362 template <typename ELFT> void parse(); 363 364 // Used for --no-allow-shlib-undefined. 365 bool allNeededIsKnown; 366 367 // Used for --as-needed 368 bool isNeeded; 369 }; 370 371 class BinaryFile : public InputFile { 372 public: 373 explicit BinaryFile(MemoryBufferRef m) : InputFile(BinaryKind, m) {} 374 static bool classof(const InputFile *f) { return f->kind() == BinaryKind; } 375 void parse(); 376 }; 377 378 InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName = "", 379 uint64_t offsetInArchive = 0); 380 381 inline bool isBitcode(MemoryBufferRef mb) { 382 return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode; 383 } 384 385 std::string replaceThinLTOSuffix(StringRef path); 386 387 extern std::vector<BinaryFile *> binaryFiles; 388 extern std::vector<BitcodeFile *> bitcodeFiles; 389 extern std::vector<LazyObjFile *> lazyObjFiles; 390 extern std::vector<InputFile *> objectFiles; 391 extern std::vector<SharedFile *> sharedFiles; 392 393 } // namespace elf 394 } // namespace lld 395 396 #endif 397