1 //===- InputFiles.h ---------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_ELF_INPUT_FILES_H 10 #define LLD_ELF_INPUT_FILES_H 11 12 #include "Config.h" 13 #include "lld/Common/ErrorHandler.h" 14 #include "lld/Common/LLVM.h" 15 #include "lld/Common/Reproduce.h" 16 #include "llvm/ADT/CachedHashString.h" 17 #include "llvm/ADT/DenseSet.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" 20 #include "llvm/IR/Comdat.h" 21 #include "llvm/Object/Archive.h" 22 #include "llvm/Object/ELF.h" 23 #include "llvm/Object/IRObjectFile.h" 24 #include "llvm/Support/Threading.h" 25 #include <map> 26 27 namespace llvm { 28 class TarWriter; 29 struct DILineInfo; 30 namespace lto { 31 class InputFile; 32 } 33 } // namespace llvm 34 35 namespace lld { 36 namespace elf { 37 class InputFile; 38 class InputSectionBase; 39 } 40 41 // Returns "<internal>", "foo.a(bar.o)" or "baz.o". 42 std::string toString(const elf::InputFile *f); 43 44 namespace elf { 45 46 using llvm::object::Archive; 47 48 class Symbol; 49 50 // If -reproduce option is given, all input files are written 51 // to this tar archive. 52 extern std::unique_ptr<llvm::TarWriter> tar; 53 54 // Opens a given file. 55 llvm::Optional<MemoryBufferRef> readFile(StringRef path); 56 57 // Add symbols in File to the symbol table. 58 void parseFile(InputFile *file); 59 60 // The root class of input files. 61 class InputFile { 62 public: 63 enum Kind { 64 ObjKind, 65 SharedKind, 66 LazyObjKind, 67 ArchiveKind, 68 BitcodeKind, 69 BinaryKind, 70 }; 71 72 Kind kind() const { return fileKind; } 73 74 bool isElf() const { 75 Kind k = kind(); 76 return k == ObjKind || k == SharedKind; 77 } 78 79 StringRef getName() const { return mb.getBufferIdentifier(); } 80 MemoryBufferRef mb; 81 82 // Returns sections. It is a runtime error to call this function 83 // on files that don't have the notion of sections. 84 ArrayRef<InputSectionBase *> getSections() const { 85 assert(fileKind == ObjKind || fileKind == BinaryKind); 86 return sections; 87 } 88 89 // Returns object file symbols. It is a runtime error to call this 90 // function on files of other types. 91 ArrayRef<Symbol *> getSymbols() { return getMutableSymbols(); } 92 93 MutableArrayRef<Symbol *> getMutableSymbols() { 94 assert(fileKind == BinaryKind || fileKind == ObjKind || 95 fileKind == BitcodeKind); 96 return symbols; 97 } 98 99 // Filename of .a which contained this file. If this file was 100 // not in an archive file, it is the empty string. We use this 101 // string for creating error messages. 102 std::string archiveName; 103 104 // If this is an architecture-specific file, the following members 105 // have ELF type (i.e. ELF{32,64}{LE,BE}) and target machine type. 106 ELFKind ekind = ELFNoneKind; 107 uint16_t emachine = llvm::ELF::EM_NONE; 108 uint8_t osabi = 0; 109 uint8_t abiVersion = 0; 110 111 // Cache for toString(). Only toString() should use this member. 112 mutable std::string toStringCache; 113 114 std::string getSrcMsg(const Symbol &sym, InputSectionBase &sec, 115 uint64_t offset); 116 117 // True if this is an argument for --just-symbols. Usually false. 118 bool justSymbols = false; 119 120 // outSecOff of .got2 in the current file. This is used by PPC32 -fPIC/-fPIE 121 // to compute offsets in PLT call stubs. 122 uint32_t ppc32Got2OutSecOff = 0; 123 124 // On PPC64 we need to keep track of which files contain small code model 125 // relocations that access the .toc section. To minimize the chance of a 126 // relocation overflow, files that do contain said relocations should have 127 // their .toc sections sorted closer to the .got section than files that do 128 // not contain any small code model relocations. Thats because the toc-pointer 129 // is defined to point at .got + 0x8000 and the instructions used with small 130 // code model relocations support immediates in the range [-0x8000, 0x7FFC], 131 // making the addressable range relative to the toc pointer 132 // [.got, .got + 0xFFFC]. 133 bool ppc64SmallCodeModelTocRelocs = false; 134 135 // groupId is used for --warn-backrefs which is an optional error 136 // checking feature. All files within the same --{start,end}-group or 137 // --{start,end}-lib get the same group ID. Otherwise, each file gets a new 138 // group ID. For more info, see checkDependency() in SymbolTable.cpp. 139 uint32_t groupId; 140 static bool isInGroup; 141 static uint32_t nextGroupId; 142 143 // Index of MIPS GOT built for this file. 144 llvm::Optional<size_t> mipsGotIndex; 145 146 std::vector<Symbol *> symbols; 147 148 protected: 149 InputFile(Kind k, MemoryBufferRef m); 150 std::vector<InputSectionBase *> sections; 151 152 private: 153 const Kind fileKind; 154 }; 155 156 class ELFFileBase : public InputFile { 157 public: 158 ELFFileBase(Kind k, MemoryBufferRef m); 159 static bool classof(const InputFile *f) { return f->isElf(); } 160 161 template <typename ELFT> llvm::object::ELFFile<ELFT> getObj() const { 162 return check(llvm::object::ELFFile<ELFT>::create(mb.getBuffer())); 163 } 164 165 StringRef getStringTable() const { return stringTable; } 166 167 template <typename ELFT> typename ELFT::SymRange getELFSyms() const { 168 return typename ELFT::SymRange( 169 reinterpret_cast<const typename ELFT::Sym *>(elfSyms), numELFSyms); 170 } 171 template <typename ELFT> typename ELFT::SymRange getGlobalELFSyms() const { 172 return getELFSyms<ELFT>().slice(firstGlobal); 173 } 174 175 protected: 176 // Initializes this class's member variables. 177 template <typename ELFT> void init(); 178 179 const void *elfSyms = nullptr; 180 size_t numELFSyms = 0; 181 uint32_t firstGlobal = 0; 182 StringRef stringTable; 183 }; 184 185 // .o file. 186 template <class ELFT> class ObjFile : public ELFFileBase { 187 using Elf_Rel = typename ELFT::Rel; 188 using Elf_Rela = typename ELFT::Rela; 189 using Elf_Sym = typename ELFT::Sym; 190 using Elf_Shdr = typename ELFT::Shdr; 191 using Elf_Word = typename ELFT::Word; 192 using Elf_CGProfile = typename ELFT::CGProfile; 193 194 public: 195 static bool classof(const InputFile *f) { return f->kind() == ObjKind; } 196 197 llvm::object::ELFFile<ELFT> getObj() const { 198 return this->ELFFileBase::getObj<ELFT>(); 199 } 200 201 ArrayRef<Symbol *> getLocalSymbols(); 202 ArrayRef<Symbol *> getGlobalSymbols(); 203 204 ObjFile(MemoryBufferRef m, StringRef archiveName) : ELFFileBase(ObjKind, m) { 205 this->archiveName = archiveName; 206 } 207 208 void parse(bool ignoreComdats = false); 209 210 StringRef getShtGroupSignature(ArrayRef<Elf_Shdr> sections, 211 const Elf_Shdr &sec); 212 213 Symbol &getSymbol(uint32_t symbolIndex) const { 214 if (symbolIndex >= this->symbols.size()) 215 fatal(toString(this) + ": invalid symbol index"); 216 return *this->symbols[symbolIndex]; 217 } 218 219 uint32_t getSectionIndex(const Elf_Sym &sym) const; 220 221 template <typename RelT> Symbol &getRelocTargetSym(const RelT &rel) const { 222 uint32_t symIndex = rel.getSymbol(config->isMips64EL); 223 return getSymbol(symIndex); 224 } 225 226 llvm::Optional<llvm::DILineInfo> getDILineInfo(InputSectionBase *, uint64_t); 227 llvm::Optional<std::pair<std::string, unsigned>> getVariableLoc(StringRef name); 228 229 // MIPS GP0 value defined by this file. This value represents the gp value 230 // used to create the relocatable object and required to support 231 // R_MIPS_GPREL16 / R_MIPS_GPREL32 relocations. 232 uint32_t mipsGp0 = 0; 233 234 uint32_t andFeatures = 0; 235 236 // Name of source file obtained from STT_FILE symbol value, 237 // or empty string if there is no such symbol in object file 238 // symbol table. 239 StringRef sourceFile; 240 241 // True if the file defines functions compiled with 242 // -fsplit-stack. Usually false. 243 bool splitStack = false; 244 245 // True if the file defines functions compiled with -fsplit-stack, 246 // but had one or more functions with the no_split_stack attribute. 247 bool someNoSplitStack = false; 248 249 // Pointer to this input file's .llvm_addrsig section, if it has one. 250 const Elf_Shdr *addrsigSec = nullptr; 251 252 // SHT_LLVM_CALL_GRAPH_PROFILE table 253 ArrayRef<Elf_CGProfile> cgProfile; 254 255 private: 256 void initializeSections(bool ignoreComdats); 257 void initializeSymbols(); 258 void initializeJustSymbols(); 259 void initializeDwarf(); 260 InputSectionBase *getRelocTarget(const Elf_Shdr &sec); 261 InputSectionBase *createInputSection(const Elf_Shdr &sec); 262 StringRef getSectionName(const Elf_Shdr &sec); 263 264 bool shouldMerge(const Elf_Shdr &sec); 265 266 // Each ELF symbol contains a section index which the symbol belongs to. 267 // However, because the number of bits dedicated for that is limited, a 268 // symbol can directly point to a section only when the section index is 269 // equal to or smaller than 65280. 270 // 271 // If an object file contains more than 65280 sections, the file must 272 // contain .symtab_shndx section. The section contains an array of 273 // 32-bit integers whose size is the same as the number of symbols. 274 // Nth symbol's section index is in the Nth entry of .symtab_shndx. 275 // 276 // The following variable contains the contents of .symtab_shndx. 277 // If the section does not exist (which is common), the array is empty. 278 ArrayRef<Elf_Word> shndxTable; 279 280 // .shstrtab contents. 281 StringRef sectionStringTable; 282 283 // Debugging information to retrieve source file and line for error 284 // reporting. Linker may find reasonable number of errors in a 285 // single object file, so we cache debugging information in order to 286 // parse it only once for each object file we link. 287 std::unique_ptr<llvm::DWARFContext> dwarf; 288 std::vector<const llvm::DWARFDebugLine::LineTable *> lineTables; 289 struct VarLoc { 290 const llvm::DWARFDebugLine::LineTable *lt; 291 unsigned file; 292 unsigned line; 293 }; 294 llvm::DenseMap<StringRef, VarLoc> variableLoc; 295 llvm::once_flag initDwarfLine; 296 }; 297 298 // LazyObjFile is analogous to ArchiveFile in the sense that 299 // the file contains lazy symbols. The difference is that 300 // LazyObjFile wraps a single file instead of multiple files. 301 // 302 // This class is used for --start-lib and --end-lib options which 303 // instruct the linker to link object files between them with the 304 // archive file semantics. 305 class LazyObjFile : public InputFile { 306 public: 307 LazyObjFile(MemoryBufferRef m, StringRef archiveName, 308 uint64_t offsetInArchive) 309 : InputFile(LazyObjKind, m), offsetInArchive(offsetInArchive) { 310 this->archiveName = archiveName; 311 } 312 313 static bool classof(const InputFile *f) { return f->kind() == LazyObjKind; } 314 315 template <class ELFT> void parse(); 316 void fetch(); 317 318 private: 319 uint64_t offsetInArchive; 320 }; 321 322 // An ArchiveFile object represents a .a file. 323 class ArchiveFile : public InputFile { 324 public: 325 explicit ArchiveFile(std::unique_ptr<Archive> &&file); 326 static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; } 327 void parse(); 328 329 // Pulls out an object file that contains a definition for Sym and 330 // returns it. If the same file was instantiated before, this 331 // function does nothing (so we don't instantiate the same file 332 // more than once.) 333 void fetch(const Archive::Symbol &sym); 334 335 private: 336 std::unique_ptr<Archive> file; 337 llvm::DenseSet<uint64_t> seen; 338 }; 339 340 class BitcodeFile : public InputFile { 341 public: 342 BitcodeFile(MemoryBufferRef m, StringRef archiveName, 343 uint64_t offsetInArchive); 344 static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; } 345 template <class ELFT> void parse(); 346 std::unique_ptr<llvm::lto::InputFile> obj; 347 }; 348 349 // .so file. 350 class SharedFile : public ELFFileBase { 351 public: 352 SharedFile(MemoryBufferRef m, StringRef defaultSoName) 353 : ELFFileBase(SharedKind, m), soName(defaultSoName), 354 isNeeded(!config->asNeeded) {} 355 356 // This is actually a vector of Elf_Verdef pointers. 357 std::vector<const void *> verdefs; 358 359 // If the output file needs Elf_Verneed data structures for this file, this is 360 // a vector of Elf_Vernaux version identifiers that map onto the entries in 361 // Verdefs, otherwise it is empty. 362 std::vector<unsigned> vernauxs; 363 364 static unsigned vernauxNum; 365 366 std::vector<StringRef> dtNeeded; 367 std::string soName; 368 369 static bool classof(const InputFile *f) { return f->kind() == SharedKind; } 370 371 template <typename ELFT> void parse(); 372 373 // Used for --no-allow-shlib-undefined. 374 bool allNeededIsKnown; 375 376 // Used for --as-needed 377 bool isNeeded; 378 }; 379 380 class BinaryFile : public InputFile { 381 public: 382 explicit BinaryFile(MemoryBufferRef m) : InputFile(BinaryKind, m) {} 383 static bool classof(const InputFile *f) { return f->kind() == BinaryKind; } 384 void parse(); 385 }; 386 387 InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName = "", 388 uint64_t offsetInArchive = 0); 389 390 inline bool isBitcode(MemoryBufferRef mb) { 391 return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode; 392 } 393 394 std::string replaceThinLTOSuffix(StringRef path); 395 396 extern std::vector<BinaryFile *> binaryFiles; 397 extern std::vector<BitcodeFile *> bitcodeFiles; 398 extern std::vector<LazyObjFile *> lazyObjFiles; 399 extern std::vector<InputFile *> objectFiles; 400 extern std::vector<SharedFile *> sharedFiles; 401 402 } // namespace elf 403 } // namespace lld 404 405 #endif 406