1 //===- InputFiles.h ---------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_COFF_INPUT_FILES_H 10 #define LLD_COFF_INPUT_FILES_H 11 12 #include "Config.h" 13 #include "lld/Common/LLVM.h" 14 #include "llvm/ADT/ArrayRef.h" 15 #include "llvm/ADT/DenseMap.h" 16 #include "llvm/ADT/DenseSet.h" 17 #include "llvm/ADT/StringSet.h" 18 #include "llvm/BinaryFormat/Magic.h" 19 #include "llvm/Object/Archive.h" 20 #include "llvm/Object/COFF.h" 21 #include "llvm/Support/StringSaver.h" 22 #include <memory> 23 #include <set> 24 #include <vector> 25 26 namespace llvm { 27 struct DILineInfo; 28 namespace pdb { 29 class DbiModuleDescriptorBuilder; 30 class NativeSession; 31 } 32 namespace lto { 33 class InputFile; 34 } 35 } 36 37 namespace lld { 38 class DWARFCache; 39 40 namespace coff { 41 42 std::vector<MemoryBufferRef> getArchiveMembers(llvm::object::Archive *file); 43 44 using llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN; 45 using llvm::COFF::MachineTypes; 46 using llvm::object::Archive; 47 using llvm::object::COFFObjectFile; 48 using llvm::object::COFFSymbolRef; 49 using llvm::object::coff_import_header; 50 using llvm::object::coff_section; 51 52 class Chunk; 53 class Defined; 54 class DefinedImportData; 55 class DefinedImportThunk; 56 class DefinedRegular; 57 class SectionChunk; 58 class Symbol; 59 class Undefined; 60 class TpiSource; 61 62 // The root class of input files. 63 class InputFile { 64 public: 65 enum Kind { 66 ArchiveKind, 67 ObjectKind, 68 LazyObjectKind, 69 PDBKind, 70 ImportKind, 71 BitcodeKind, 72 DLLKind 73 }; 74 Kind kind() const { return fileKind; } 75 virtual ~InputFile() {} 76 77 // Returns the filename. 78 StringRef getName() const { return mb.getBufferIdentifier(); } 79 80 // Reads a file (the constructor doesn't do that). 81 virtual void parse() = 0; 82 83 // Returns the CPU type this file was compiled to. 84 virtual MachineTypes getMachineType() { return IMAGE_FILE_MACHINE_UNKNOWN; } 85 86 MemoryBufferRef mb; 87 88 // An archive file name if this file is created from an archive. 89 StringRef parentName; 90 91 // Returns .drectve section contents if exist. 92 StringRef getDirectives() { return directives; } 93 94 protected: 95 InputFile(Kind k, MemoryBufferRef m) : mb(m), fileKind(k) {} 96 97 StringRef directives; 98 99 private: 100 const Kind fileKind; 101 }; 102 103 // .lib or .a file. 104 class ArchiveFile : public InputFile { 105 public: 106 explicit ArchiveFile(MemoryBufferRef m); 107 static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; } 108 void parse() override; 109 110 // Enqueues an archive member load for the given symbol. If we've already 111 // enqueued a load for the same archive member, this function does nothing, 112 // which ensures that we don't load the same member more than once. 113 void addMember(const Archive::Symbol &sym); 114 115 private: 116 std::unique_ptr<Archive> file; 117 llvm::DenseSet<uint64_t> seen; 118 }; 119 120 // .obj or .o file between -start-lib and -end-lib. 121 class LazyObjFile : public InputFile { 122 public: 123 explicit LazyObjFile(MemoryBufferRef m) : InputFile(LazyObjectKind, m) {} 124 static bool classof(const InputFile *f) { 125 return f->kind() == LazyObjectKind; 126 } 127 // Makes this object file part of the link. 128 void fetch(); 129 // Adds the symbols in this file to the symbol table as LazyObject symbols. 130 void parse() override; 131 132 private: 133 std::vector<Symbol *> symbols; 134 }; 135 136 // .obj or .o file. This may be a member of an archive file. 137 class ObjFile : public InputFile { 138 public: 139 explicit ObjFile(MemoryBufferRef m) : InputFile(ObjectKind, m) {} 140 explicit ObjFile(MemoryBufferRef m, std::vector<Symbol *> &&symbols) 141 : InputFile(ObjectKind, m), symbols(std::move(symbols)) {} 142 static bool classof(const InputFile *f) { return f->kind() == ObjectKind; } 143 void parse() override; 144 MachineTypes getMachineType() override; 145 ArrayRef<Chunk *> getChunks() { return chunks; } 146 ArrayRef<SectionChunk *> getDebugChunks() { return debugChunks; } 147 ArrayRef<SectionChunk *> getSXDataChunks() { return sxDataChunks; } 148 ArrayRef<SectionChunk *> getGuardFidChunks() { return guardFidChunks; } 149 ArrayRef<SectionChunk *> getGuardIATChunks() { return guardIATChunks; } 150 ArrayRef<SectionChunk *> getGuardLJmpChunks() { return guardLJmpChunks; } 151 ArrayRef<SectionChunk *> getGuardEHContChunks() { return guardEHContChunks; } 152 ArrayRef<Symbol *> getSymbols() { return symbols; } 153 154 MutableArrayRef<Symbol *> getMutableSymbols() { return symbols; } 155 156 ArrayRef<uint8_t> getDebugSection(StringRef secName); 157 158 // Returns a Symbol object for the symbolIndex'th symbol in the 159 // underlying object file. 160 Symbol *getSymbol(uint32_t symbolIndex) { 161 return symbols[symbolIndex]; 162 } 163 164 // Returns the underlying COFF file. 165 COFFObjectFile *getCOFFObj() { return coffObj.get(); } 166 167 // Add a symbol for a range extension thunk. Return the new symbol table 168 // index. This index can be used to modify a relocation. 169 uint32_t addRangeThunkSymbol(Symbol *thunk) { 170 symbols.push_back(thunk); 171 return symbols.size() - 1; 172 } 173 174 void includeResourceChunks(); 175 176 bool isResourceObjFile() const { return !resourceChunks.empty(); } 177 178 static std::vector<ObjFile *> instances; 179 180 // Flags in the absolute @feat.00 symbol if it is present. These usually 181 // indicate if an object was compiled with certain security features enabled 182 // like stack guard, safeseh, /guard:cf, or other things. 183 uint32_t feat00Flags = 0; 184 185 // True if this object file is compatible with SEH. COFF-specific and 186 // x86-only. COFF spec 5.10.1. The .sxdata section. 187 bool hasSafeSEH() { return feat00Flags & 0x1; } 188 189 // True if this file was compiled with /guard:cf. 190 bool hasGuardCF() { return feat00Flags & 0x4800; } 191 192 // Pointer to the PDB module descriptor builder. Various debug info records 193 // will reference object files by "module index", which is here. Things like 194 // source files and section contributions are also recorded here. Will be null 195 // if we are not producing a PDB. 196 llvm::pdb::DbiModuleDescriptorBuilder *moduleDBI = nullptr; 197 198 const coff_section *addrsigSec = nullptr; 199 200 const coff_section *callgraphSec = nullptr; 201 202 // When using Microsoft precompiled headers, this is the PCH's key. 203 // The same key is used by both the precompiled object, and objects using the 204 // precompiled object. Any difference indicates out-of-date objects. 205 llvm::Optional<uint32_t> pchSignature; 206 207 // Whether this file was compiled with /hotpatch. 208 bool hotPatchable = false; 209 210 // Whether the object was already merged into the final PDB. 211 bool mergedIntoPDB = false; 212 213 // If the OBJ has a .debug$T stream, this tells how it will be handled. 214 TpiSource *debugTypesObj = nullptr; 215 216 // The .debug$P or .debug$T section data if present. Empty otherwise. 217 ArrayRef<uint8_t> debugTypes; 218 219 llvm::Optional<std::pair<StringRef, uint32_t>> 220 getVariableLocation(StringRef var); 221 222 llvm::Optional<llvm::DILineInfo> getDILineInfo(uint32_t offset, 223 uint32_t sectionIndex); 224 225 private: 226 const coff_section* getSection(uint32_t i); 227 const coff_section *getSection(COFFSymbolRef sym) { 228 return getSection(sym.getSectionNumber()); 229 } 230 231 void initializeChunks(); 232 void initializeSymbols(); 233 void initializeFlags(); 234 void initializeDependencies(); 235 236 SectionChunk * 237 readSection(uint32_t sectionNumber, 238 const llvm::object::coff_aux_section_definition *def, 239 StringRef leaderName); 240 241 void readAssociativeDefinition( 242 COFFSymbolRef coffSym, 243 const llvm::object::coff_aux_section_definition *def); 244 245 void readAssociativeDefinition( 246 COFFSymbolRef coffSym, 247 const llvm::object::coff_aux_section_definition *def, 248 uint32_t parentSection); 249 250 void recordPrevailingSymbolForMingw( 251 COFFSymbolRef coffSym, 252 llvm::DenseMap<StringRef, uint32_t> &prevailingSectionMap); 253 254 void maybeAssociateSEHForMingw( 255 COFFSymbolRef sym, const llvm::object::coff_aux_section_definition *def, 256 const llvm::DenseMap<StringRef, uint32_t> &prevailingSectionMap); 257 258 // Given a new symbol Sym with comdat selection Selection, if the new 259 // symbol is not (yet) Prevailing and the existing comdat leader set to 260 // Leader, emits a diagnostic if the new symbol and its selection doesn't 261 // match the existing symbol and its selection. If either old or new 262 // symbol have selection IMAGE_COMDAT_SELECT_LARGEST, Sym might replace 263 // the existing leader. In that case, Prevailing is set to true. 264 void 265 handleComdatSelection(COFFSymbolRef sym, llvm::COFF::COMDATType &selection, 266 bool &prevailing, DefinedRegular *leader, 267 const llvm::object::coff_aux_section_definition *def); 268 269 llvm::Optional<Symbol *> 270 createDefined(COFFSymbolRef sym, 271 std::vector<const llvm::object::coff_aux_section_definition *> 272 &comdatDefs, 273 bool &prevailingComdat); 274 Symbol *createRegular(COFFSymbolRef sym); 275 Symbol *createUndefined(COFFSymbolRef sym); 276 277 std::unique_ptr<COFFObjectFile> coffObj; 278 279 // List of all chunks defined by this file. This includes both section 280 // chunks and non-section chunks for common symbols. 281 std::vector<Chunk *> chunks; 282 283 std::vector<SectionChunk *> resourceChunks; 284 285 // CodeView debug info sections. 286 std::vector<SectionChunk *> debugChunks; 287 288 // Chunks containing symbol table indices of exception handlers. Only used for 289 // 32-bit x86. 290 std::vector<SectionChunk *> sxDataChunks; 291 292 // Chunks containing symbol table indices of address taken symbols, address 293 // taken IAT entries, longjmp and ehcont targets. These are not linked into 294 // the final binary when /guard:cf is set. 295 std::vector<SectionChunk *> guardFidChunks; 296 std::vector<SectionChunk *> guardIATChunks; 297 std::vector<SectionChunk *> guardLJmpChunks; 298 std::vector<SectionChunk *> guardEHContChunks; 299 300 // This vector contains a list of all symbols defined or referenced by this 301 // file. They are indexed such that you can get a Symbol by symbol 302 // index. Nonexistent indices (which are occupied by auxiliary 303 // symbols in the real symbol table) are filled with null pointers. 304 std::vector<Symbol *> symbols; 305 306 // This vector contains the same chunks as Chunks, but they are 307 // indexed such that you can get a SectionChunk by section index. 308 // Nonexistent section indices are filled with null pointers. 309 // (Because section number is 1-based, the first slot is always a 310 // null pointer.) This vector is only valid during initialization. 311 std::vector<SectionChunk *> sparseChunks; 312 313 DWARFCache *dwarf = nullptr; 314 }; 315 316 // This is a PDB type server dependency, that is not a input file per se, but 317 // needs to be treated like one. Such files are discovered from the debug type 318 // stream. 319 class PDBInputFile : public InputFile { 320 public: 321 explicit PDBInputFile(MemoryBufferRef m); 322 ~PDBInputFile(); 323 static bool classof(const InputFile *f) { return f->kind() == PDBKind; } 324 void parse() override; 325 326 static void enqueue(StringRef path, ObjFile *fromFile); 327 328 static PDBInputFile *findFromRecordPath(StringRef path, ObjFile *fromFile); 329 330 static std::map<std::string, PDBInputFile *> instances; 331 332 // Record possible errors while opening the PDB file 333 llvm::Optional<Error> loadErr; 334 335 // This is the actual interface to the PDB (if it was opened successfully) 336 std::unique_ptr<llvm::pdb::NativeSession> session; 337 338 // If the PDB has a .debug$T stream, this tells how it will be handled. 339 TpiSource *debugTypesObj = nullptr; 340 }; 341 342 // This type represents import library members that contain DLL names 343 // and symbols exported from the DLLs. See Microsoft PE/COFF spec. 7 344 // for details about the format. 345 class ImportFile : public InputFile { 346 public: 347 explicit ImportFile(MemoryBufferRef m) : InputFile(ImportKind, m) {} 348 349 static bool classof(const InputFile *f) { return f->kind() == ImportKind; } 350 351 static std::vector<ImportFile *> instances; 352 353 Symbol *impSym = nullptr; 354 Symbol *thunkSym = nullptr; 355 std::string dllName; 356 357 private: 358 void parse() override; 359 360 public: 361 StringRef externalName; 362 const coff_import_header *hdr; 363 Chunk *location = nullptr; 364 365 // We want to eliminate dllimported symbols if no one actually refers to them. 366 // These "Live" bits are used to keep track of which import library members 367 // are actually in use. 368 // 369 // If the Live bit is turned off by MarkLive, Writer will ignore dllimported 370 // symbols provided by this import library member. We also track whether the 371 // imported symbol is used separately from whether the thunk is used in order 372 // to avoid creating unnecessary thunks. 373 bool live = !config->doGC; 374 bool thunkLive = !config->doGC; 375 }; 376 377 // Used for LTO. 378 class BitcodeFile : public InputFile { 379 public: 380 BitcodeFile(MemoryBufferRef mb, StringRef archiveName, 381 uint64_t offsetInArchive); 382 explicit BitcodeFile(MemoryBufferRef m, StringRef archiveName, 383 uint64_t offsetInArchive, 384 std::vector<Symbol *> &&symbols); 385 ~BitcodeFile(); 386 static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; } 387 ArrayRef<Symbol *> getSymbols() { return symbols; } 388 MachineTypes getMachineType() override; 389 static std::vector<BitcodeFile *> instances; 390 std::unique_ptr<llvm::lto::InputFile> obj; 391 392 private: 393 void parse() override; 394 395 std::vector<Symbol *> symbols; 396 }; 397 398 // .dll file. MinGW only. 399 class DLLFile : public InputFile { 400 public: 401 explicit DLLFile(MemoryBufferRef m) : InputFile(DLLKind, m) {} 402 static bool classof(const InputFile *f) { return f->kind() == DLLKind; } 403 void parse() override; 404 MachineTypes getMachineType() override; 405 406 struct Symbol { 407 StringRef dllName; 408 StringRef symbolName; 409 llvm::COFF::ImportNameType nameType; 410 llvm::COFF::ImportType importType; 411 }; 412 413 void makeImport(Symbol *s); 414 415 private: 416 std::unique_ptr<COFFObjectFile> coffObj; 417 llvm::StringSet<> seen; 418 }; 419 420 inline bool isBitcode(MemoryBufferRef mb) { 421 return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode; 422 } 423 424 std::string replaceThinLTOSuffix(StringRef path); 425 } // namespace coff 426 427 std::string toString(const coff::InputFile *file); 428 } // namespace lld 429 430 #endif 431