1 //===- InputFiles.h ---------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_ELF_INPUT_FILES_H 10 #define LLD_ELF_INPUT_FILES_H 11 12 #include "Config.h" 13 #include "lld/Common/DWARF.h" 14 #include "lld/Common/ErrorHandler.h" 15 #include "lld/Common/LLVM.h" 16 #include "lld/Common/Reproduce.h" 17 #include "llvm/ADT/CachedHashString.h" 18 #include "llvm/ADT/DenseSet.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/IR/Comdat.h" 21 #include "llvm/Object/Archive.h" 22 #include "llvm/Object/ELF.h" 23 #include "llvm/Object/IRObjectFile.h" 24 #include "llvm/Support/Threading.h" 25 #include <map> 26 27 namespace llvm { 28 class TarWriter; 29 namespace lto { 30 class InputFile; 31 } 32 } // namespace llvm 33 34 namespace lld { 35 36 // Returns "<internal>", "foo.a(bar.o)" or "baz.o". 37 std::string toString(const elf::InputFile *f); 38 39 namespace elf { 40 class InputFile; 41 class InputSectionBase; 42 43 using llvm::object::Archive; 44 45 class Symbol; 46 47 // If -reproduce option is given, all input files are written 48 // to this tar archive. 49 extern std::unique_ptr<llvm::TarWriter> tar; 50 51 // Opens a given file. 52 llvm::Optional<MemoryBufferRef> readFile(StringRef path); 53 54 // Add symbols in File to the symbol table. 55 void parseFile(InputFile *file); 56 57 // The root class of input files. 58 class InputFile { 59 public: 60 enum Kind { 61 ObjKind, 62 SharedKind, 63 LazyObjKind, 64 ArchiveKind, 65 BitcodeKind, 66 BinaryKind, 67 }; 68 69 Kind kind() const { return fileKind; } 70 71 bool isElf() const { 72 Kind k = kind(); 73 return k == ObjKind || k == SharedKind; 74 } 75 76 StringRef getName() const { return mb.getBufferIdentifier(); } 77 MemoryBufferRef mb; 78 79 // Returns sections. It is a runtime error to call this function 80 // on files that don't have the notion of sections. 81 ArrayRef<InputSectionBase *> getSections() const { 82 assert(fileKind == ObjKind || fileKind == BinaryKind); 83 return sections; 84 } 85 86 // Returns object file symbols. It is a runtime error to call this 87 // function on files of other types. 88 ArrayRef<Symbol *> getSymbols() { return getMutableSymbols(); } 89 90 MutableArrayRef<Symbol *> getMutableSymbols() { 91 assert(fileKind == BinaryKind || fileKind == ObjKind || 92 fileKind == BitcodeKind); 93 return symbols; 94 } 95 96 // Filename of .a which contained this file. If this file was 97 // not in an archive file, it is the empty string. We use this 98 // string for creating error messages. 99 std::string archiveName; 100 101 // If this is an architecture-specific file, the following members 102 // have ELF type (i.e. ELF{32,64}{LE,BE}) and target machine type. 103 ELFKind ekind = ELFNoneKind; 104 uint16_t emachine = llvm::ELF::EM_NONE; 105 uint8_t osabi = 0; 106 uint8_t abiVersion = 0; 107 108 // Cache for toString(). Only toString() should use this member. 109 mutable std::string toStringCache; 110 111 std::string getSrcMsg(const Symbol &sym, InputSectionBase &sec, 112 uint64_t offset); 113 114 // True if this is an argument for --just-symbols. Usually false. 115 bool justSymbols = false; 116 117 // outSecOff of .got2 in the current file. This is used by PPC32 -fPIC/-fPIE 118 // to compute offsets in PLT call stubs. 119 uint32_t ppc32Got2OutSecOff = 0; 120 121 // On PPC64 we need to keep track of which files contain small code model 122 // relocations that access the .toc section. To minimize the chance of a 123 // relocation overflow, files that do contain said relocations should have 124 // their .toc sections sorted closer to the .got section than files that do 125 // not contain any small code model relocations. Thats because the toc-pointer 126 // is defined to point at .got + 0x8000 and the instructions used with small 127 // code model relocations support immediates in the range [-0x8000, 0x7FFC], 128 // making the addressable range relative to the toc pointer 129 // [.got, .got + 0xFFFC]. 130 bool ppc64SmallCodeModelTocRelocs = false; 131 132 // groupId is used for --warn-backrefs which is an optional error 133 // checking feature. All files within the same --{start,end}-group or 134 // --{start,end}-lib get the same group ID. Otherwise, each file gets a new 135 // group ID. For more info, see checkDependency() in SymbolTable.cpp. 136 uint32_t groupId; 137 static bool isInGroup; 138 static uint32_t nextGroupId; 139 140 // Index of MIPS GOT built for this file. 141 llvm::Optional<size_t> mipsGotIndex; 142 143 std::vector<Symbol *> symbols; 144 145 protected: 146 InputFile(Kind k, MemoryBufferRef m); 147 std::vector<InputSectionBase *> sections; 148 149 private: 150 const Kind fileKind; 151 }; 152 153 class ELFFileBase : public InputFile { 154 public: 155 ELFFileBase(Kind k, MemoryBufferRef m); 156 static bool classof(const InputFile *f) { return f->isElf(); } 157 158 template <typename ELFT> llvm::object::ELFFile<ELFT> getObj() const { 159 return check(llvm::object::ELFFile<ELFT>::create(mb.getBuffer())); 160 } 161 162 StringRef getStringTable() const { return stringTable; } 163 164 template <typename ELFT> typename ELFT::SymRange getELFSyms() const { 165 return typename ELFT::SymRange( 166 reinterpret_cast<const typename ELFT::Sym *>(elfSyms), numELFSyms); 167 } 168 template <typename ELFT> typename ELFT::SymRange getGlobalELFSyms() const { 169 return getELFSyms<ELFT>().slice(firstGlobal); 170 } 171 172 protected: 173 // Initializes this class's member variables. 174 template <typename ELFT> void init(); 175 176 const void *elfSyms = nullptr; 177 size_t numELFSyms = 0; 178 uint32_t firstGlobal = 0; 179 StringRef stringTable; 180 }; 181 182 // .o file. 183 template <class ELFT> class ObjFile : public ELFFileBase { 184 using Elf_Rel = typename ELFT::Rel; 185 using Elf_Rela = typename ELFT::Rela; 186 using Elf_Sym = typename ELFT::Sym; 187 using Elf_Shdr = typename ELFT::Shdr; 188 using Elf_Word = typename ELFT::Word; 189 using Elf_CGProfile = typename ELFT::CGProfile; 190 191 public: 192 static bool classof(const InputFile *f) { return f->kind() == ObjKind; } 193 194 llvm::object::ELFFile<ELFT> getObj() const { 195 return this->ELFFileBase::getObj<ELFT>(); 196 } 197 198 ArrayRef<Symbol *> getLocalSymbols(); 199 ArrayRef<Symbol *> getGlobalSymbols(); 200 201 ObjFile(MemoryBufferRef m, StringRef archiveName) : ELFFileBase(ObjKind, m) { 202 this->archiveName = archiveName; 203 } 204 205 void parse(bool ignoreComdats = false); 206 207 StringRef getShtGroupSignature(ArrayRef<Elf_Shdr> sections, 208 const Elf_Shdr &sec); 209 210 Symbol &getSymbol(uint32_t symbolIndex) const { 211 if (symbolIndex >= this->symbols.size()) 212 fatal(toString(this) + ": invalid symbol index"); 213 return *this->symbols[symbolIndex]; 214 } 215 216 uint32_t getSectionIndex(const Elf_Sym &sym) const; 217 218 template <typename RelT> Symbol &getRelocTargetSym(const RelT &rel) const { 219 uint32_t symIndex = rel.getSymbol(config->isMips64EL); 220 return getSymbol(symIndex); 221 } 222 223 llvm::Optional<llvm::DILineInfo> getDILineInfo(InputSectionBase *, uint64_t); 224 llvm::Optional<std::pair<std::string, unsigned>> getVariableLoc(StringRef name); 225 226 // MIPS GP0 value defined by this file. This value represents the gp value 227 // used to create the relocatable object and required to support 228 // R_MIPS_GPREL16 / R_MIPS_GPREL32 relocations. 229 uint32_t mipsGp0 = 0; 230 231 uint32_t andFeatures = 0; 232 233 // Name of source file obtained from STT_FILE symbol value, 234 // or empty string if there is no such symbol in object file 235 // symbol table. 236 StringRef sourceFile; 237 238 // True if the file defines functions compiled with 239 // -fsplit-stack. Usually false. 240 bool splitStack = false; 241 242 // True if the file defines functions compiled with -fsplit-stack, 243 // but had one or more functions with the no_split_stack attribute. 244 bool someNoSplitStack = false; 245 246 // Pointer to this input file's .llvm_addrsig section, if it has one. 247 const Elf_Shdr *addrsigSec = nullptr; 248 249 // SHT_LLVM_CALL_GRAPH_PROFILE table 250 ArrayRef<Elf_CGProfile> cgProfile; 251 252 private: 253 void initializeSections(bool ignoreComdats); 254 void initializeSymbols(); 255 void initializeJustSymbols(); 256 void initializeDwarf(); 257 InputSectionBase *getRelocTarget(const Elf_Shdr &sec); 258 InputSectionBase *createInputSection(const Elf_Shdr &sec); 259 StringRef getSectionName(const Elf_Shdr &sec); 260 261 bool shouldMerge(const Elf_Shdr &sec, StringRef name); 262 263 // Each ELF symbol contains a section index which the symbol belongs to. 264 // However, because the number of bits dedicated for that is limited, a 265 // symbol can directly point to a section only when the section index is 266 // equal to or smaller than 65280. 267 // 268 // If an object file contains more than 65280 sections, the file must 269 // contain .symtab_shndx section. The section contains an array of 270 // 32-bit integers whose size is the same as the number of symbols. 271 // Nth symbol's section index is in the Nth entry of .symtab_shndx. 272 // 273 // The following variable contains the contents of .symtab_shndx. 274 // If the section does not exist (which is common), the array is empty. 275 ArrayRef<Elf_Word> shndxTable; 276 277 // .shstrtab contents. 278 StringRef sectionStringTable; 279 280 // Debugging information to retrieve source file and line for error 281 // reporting. Linker may find reasonable number of errors in a 282 // single object file, so we cache debugging information in order to 283 // parse it only once for each object file we link. 284 DWARFCache *dwarf; 285 llvm::once_flag initDwarfLine; 286 }; 287 288 // LazyObjFile is analogous to ArchiveFile in the sense that 289 // the file contains lazy symbols. The difference is that 290 // LazyObjFile wraps a single file instead of multiple files. 291 // 292 // This class is used for --start-lib and --end-lib options which 293 // instruct the linker to link object files between them with the 294 // archive file semantics. 295 class LazyObjFile : public InputFile { 296 public: 297 LazyObjFile(MemoryBufferRef m, StringRef archiveName, 298 uint64_t offsetInArchive) 299 : InputFile(LazyObjKind, m), offsetInArchive(offsetInArchive) { 300 this->archiveName = archiveName; 301 } 302 303 static bool classof(const InputFile *f) { return f->kind() == LazyObjKind; } 304 305 template <class ELFT> void parse(); 306 void fetch(); 307 308 private: 309 uint64_t offsetInArchive; 310 }; 311 312 // An ArchiveFile object represents a .a file. 313 class ArchiveFile : public InputFile { 314 public: 315 explicit ArchiveFile(std::unique_ptr<Archive> &&file); 316 static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; } 317 void parse(); 318 319 // Pulls out an object file that contains a definition for Sym and 320 // returns it. If the same file was instantiated before, this 321 // function does nothing (so we don't instantiate the same file 322 // more than once.) 323 void fetch(const Archive::Symbol &sym); 324 325 private: 326 std::unique_ptr<Archive> file; 327 llvm::DenseSet<uint64_t> seen; 328 }; 329 330 class BitcodeFile : public InputFile { 331 public: 332 BitcodeFile(MemoryBufferRef m, StringRef archiveName, 333 uint64_t offsetInArchive); 334 static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; } 335 template <class ELFT> void parse(); 336 std::unique_ptr<llvm::lto::InputFile> obj; 337 }; 338 339 // .so file. 340 class SharedFile : public ELFFileBase { 341 public: 342 SharedFile(MemoryBufferRef m, StringRef defaultSoName) 343 : ELFFileBase(SharedKind, m), soName(defaultSoName), 344 isNeeded(!config->asNeeded) {} 345 346 // This is actually a vector of Elf_Verdef pointers. 347 std::vector<const void *> verdefs; 348 349 // If the output file needs Elf_Verneed data structures for this file, this is 350 // a vector of Elf_Vernaux version identifiers that map onto the entries in 351 // Verdefs, otherwise it is empty. 352 std::vector<unsigned> vernauxs; 353 354 static unsigned vernauxNum; 355 356 std::vector<StringRef> dtNeeded; 357 std::string soName; 358 359 static bool classof(const InputFile *f) { return f->kind() == SharedKind; } 360 361 template <typename ELFT> void parse(); 362 363 // Used for --no-allow-shlib-undefined. 364 bool allNeededIsKnown; 365 366 // Used for --as-needed 367 bool isNeeded; 368 }; 369 370 class BinaryFile : public InputFile { 371 public: 372 explicit BinaryFile(MemoryBufferRef m) : InputFile(BinaryKind, m) {} 373 static bool classof(const InputFile *f) { return f->kind() == BinaryKind; } 374 void parse(); 375 }; 376 377 InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName = "", 378 uint64_t offsetInArchive = 0); 379 380 inline bool isBitcode(MemoryBufferRef mb) { 381 return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode; 382 } 383 384 std::string replaceThinLTOSuffix(StringRef path); 385 386 extern std::vector<BinaryFile *> binaryFiles; 387 extern std::vector<BitcodeFile *> bitcodeFiles; 388 extern std::vector<LazyObjFile *> lazyObjFiles; 389 extern std::vector<InputFile *> objectFiles; 390 extern std::vector<SharedFile *> sharedFiles; 391 392 } // namespace elf 393 } // namespace lld 394 395 #endif 396