1 //===- InputFiles.h ---------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_COFF_INPUT_FILES_H 10 #define LLD_COFF_INPUT_FILES_H 11 12 #include "Config.h" 13 #include "lld/Common/LLVM.h" 14 #include "llvm/ADT/ArrayRef.h" 15 #include "llvm/ADT/DenseMap.h" 16 #include "llvm/ADT/DenseSet.h" 17 #include "llvm/BinaryFormat/Magic.h" 18 #include "llvm/Object/Archive.h" 19 #include "llvm/Object/COFF.h" 20 #include "llvm/Support/StringSaver.h" 21 #include <memory> 22 #include <set> 23 #include <vector> 24 25 namespace llvm { 26 struct DILineInfo; 27 namespace pdb { 28 class DbiModuleDescriptorBuilder; 29 class NativeSession; 30 } 31 namespace lto { 32 class InputFile; 33 } 34 } 35 36 namespace lld { 37 class DWARFCache; 38 39 namespace coff { 40 41 std::vector<MemoryBufferRef> getArchiveMembers(llvm::object::Archive *file); 42 43 using llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN; 44 using llvm::COFF::MachineTypes; 45 using llvm::object::Archive; 46 using llvm::object::COFFObjectFile; 47 using llvm::object::COFFSymbolRef; 48 using llvm::object::coff_import_header; 49 using llvm::object::coff_section; 50 51 class Chunk; 52 class Defined; 53 class DefinedImportData; 54 class DefinedImportThunk; 55 class DefinedRegular; 56 class SectionChunk; 57 class Symbol; 58 class Undefined; 59 class TpiSource; 60 61 // The root class of input files. 62 class InputFile { 63 public: 64 enum Kind { 65 ArchiveKind, 66 ObjectKind, 67 LazyObjectKind, 68 PDBKind, 69 ImportKind, 70 BitcodeKind 71 }; 72 Kind kind() const { return fileKind; } 73 virtual ~InputFile() {} 74 75 // Returns the filename. 76 StringRef getName() const { return mb.getBufferIdentifier(); } 77 78 // Reads a file (the constructor doesn't do that). 79 virtual void parse() = 0; 80 81 // Returns the CPU type this file was compiled to. 82 virtual MachineTypes getMachineType() { return IMAGE_FILE_MACHINE_UNKNOWN; } 83 84 MemoryBufferRef mb; 85 86 // An archive file name if this file is created from an archive. 87 StringRef parentName; 88 89 // Returns .drectve section contents if exist. 90 StringRef getDirectives() { return directives; } 91 92 protected: 93 InputFile(Kind k, MemoryBufferRef m) : mb(m), fileKind(k) {} 94 95 StringRef directives; 96 97 private: 98 const Kind fileKind; 99 }; 100 101 // .lib or .a file. 102 class ArchiveFile : public InputFile { 103 public: 104 explicit ArchiveFile(MemoryBufferRef m); 105 static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; } 106 void parse() override; 107 108 // Enqueues an archive member load for the given symbol. If we've already 109 // enqueued a load for the same archive member, this function does nothing, 110 // which ensures that we don't load the same member more than once. 111 void addMember(const Archive::Symbol &sym); 112 113 private: 114 std::unique_ptr<Archive> file; 115 llvm::DenseSet<uint64_t> seen; 116 }; 117 118 // .obj or .o file between -start-lib and -end-lib. 119 class LazyObjFile : public InputFile { 120 public: 121 explicit LazyObjFile(MemoryBufferRef m) : InputFile(LazyObjectKind, m) {} 122 static bool classof(const InputFile *f) { 123 return f->kind() == LazyObjectKind; 124 } 125 // Makes this object file part of the link. 126 void fetch(); 127 // Adds the symbols in this file to the symbol table as LazyObject symbols. 128 void parse() override; 129 130 private: 131 std::vector<Symbol *> symbols; 132 }; 133 134 // .obj or .o file. This may be a member of an archive file. 135 class ObjFile : public InputFile { 136 public: 137 explicit ObjFile(MemoryBufferRef m) : InputFile(ObjectKind, m) {} 138 explicit ObjFile(MemoryBufferRef m, std::vector<Symbol *> &&symbols) 139 : InputFile(ObjectKind, m), symbols(std::move(symbols)) {} 140 static bool classof(const InputFile *f) { return f->kind() == ObjectKind; } 141 void parse() override; 142 MachineTypes getMachineType() override; 143 ArrayRef<Chunk *> getChunks() { return chunks; } 144 ArrayRef<SectionChunk *> getDebugChunks() { return debugChunks; } 145 ArrayRef<SectionChunk *> getSXDataChunks() { return sxDataChunks; } 146 ArrayRef<SectionChunk *> getGuardFidChunks() { return guardFidChunks; } 147 ArrayRef<SectionChunk *> getGuardLJmpChunks() { return guardLJmpChunks; } 148 ArrayRef<Symbol *> getSymbols() { return symbols; } 149 150 ArrayRef<uint8_t> getDebugSection(StringRef secName); 151 152 // Returns a Symbol object for the symbolIndex'th symbol in the 153 // underlying object file. 154 Symbol *getSymbol(uint32_t symbolIndex) { 155 return symbols[symbolIndex]; 156 } 157 158 // Returns the underlying COFF file. 159 COFFObjectFile *getCOFFObj() { return coffObj.get(); } 160 161 // Add a symbol for a range extension thunk. Return the new symbol table 162 // index. This index can be used to modify a relocation. 163 uint32_t addRangeThunkSymbol(Symbol *thunk) { 164 symbols.push_back(thunk); 165 return symbols.size() - 1; 166 } 167 168 void includeResourceChunks(); 169 170 bool isResourceObjFile() const { return !resourceChunks.empty(); } 171 172 static std::vector<ObjFile *> instances; 173 174 // Flags in the absolute @feat.00 symbol if it is present. These usually 175 // indicate if an object was compiled with certain security features enabled 176 // like stack guard, safeseh, /guard:cf, or other things. 177 uint32_t feat00Flags = 0; 178 179 // True if this object file is compatible with SEH. COFF-specific and 180 // x86-only. COFF spec 5.10.1. The .sxdata section. 181 bool hasSafeSEH() { return feat00Flags & 0x1; } 182 183 // True if this file was compiled with /guard:cf. 184 bool hasGuardCF() { return feat00Flags & 0x800; } 185 186 // Pointer to the PDB module descriptor builder. Various debug info records 187 // will reference object files by "module index", which is here. Things like 188 // source files and section contributions are also recorded here. Will be null 189 // if we are not producing a PDB. 190 llvm::pdb::DbiModuleDescriptorBuilder *moduleDBI = nullptr; 191 192 const coff_section *addrsigSec = nullptr; 193 194 // When using Microsoft precompiled headers, this is the PCH's key. 195 // The same key is used by both the precompiled object, and objects using the 196 // precompiled object. Any difference indicates out-of-date objects. 197 llvm::Optional<uint32_t> pchSignature; 198 199 // Whether this file was compiled with /hotpatch. 200 bool hotPatchable = false; 201 202 // Whether the object was already merged into the final PDB. 203 bool mergedIntoPDB = false; 204 205 // If the OBJ has a .debug$T stream, this tells how it will be handled. 206 TpiSource *debugTypesObj = nullptr; 207 208 // The .debug$P or .debug$T section data if present. Empty otherwise. 209 ArrayRef<uint8_t> debugTypes; 210 211 llvm::Optional<std::pair<StringRef, uint32_t>> 212 getVariableLocation(StringRef var); 213 214 llvm::Optional<llvm::DILineInfo> getDILineInfo(uint32_t offset, 215 uint32_t sectionIndex); 216 217 private: 218 const coff_section* getSection(uint32_t i); 219 const coff_section *getSection(COFFSymbolRef sym) { 220 return getSection(sym.getSectionNumber()); 221 } 222 223 void initializeChunks(); 224 void initializeSymbols(); 225 void initializeFlags(); 226 void initializeDependencies(); 227 228 SectionChunk * 229 readSection(uint32_t sectionNumber, 230 const llvm::object::coff_aux_section_definition *def, 231 StringRef leaderName); 232 233 void readAssociativeDefinition( 234 COFFSymbolRef coffSym, 235 const llvm::object::coff_aux_section_definition *def); 236 237 void readAssociativeDefinition( 238 COFFSymbolRef coffSym, 239 const llvm::object::coff_aux_section_definition *def, 240 uint32_t parentSection); 241 242 void recordPrevailingSymbolForMingw( 243 COFFSymbolRef coffSym, 244 llvm::DenseMap<StringRef, uint32_t> &prevailingSectionMap); 245 246 void maybeAssociateSEHForMingw( 247 COFFSymbolRef sym, const llvm::object::coff_aux_section_definition *def, 248 const llvm::DenseMap<StringRef, uint32_t> &prevailingSectionMap); 249 250 // Given a new symbol Sym with comdat selection Selection, if the new 251 // symbol is not (yet) Prevailing and the existing comdat leader set to 252 // Leader, emits a diagnostic if the new symbol and its selection doesn't 253 // match the existing symbol and its selection. If either old or new 254 // symbol have selection IMAGE_COMDAT_SELECT_LARGEST, Sym might replace 255 // the existing leader. In that case, Prevailing is set to true. 256 void handleComdatSelection(COFFSymbolRef sym, 257 llvm::COFF::COMDATType &selection, 258 bool &prevailing, DefinedRegular *leader); 259 260 llvm::Optional<Symbol *> 261 createDefined(COFFSymbolRef sym, 262 std::vector<const llvm::object::coff_aux_section_definition *> 263 &comdatDefs, 264 bool &prevailingComdat); 265 Symbol *createRegular(COFFSymbolRef sym); 266 Symbol *createUndefined(COFFSymbolRef sym); 267 268 std::unique_ptr<COFFObjectFile> coffObj; 269 270 // List of all chunks defined by this file. This includes both section 271 // chunks and non-section chunks for common symbols. 272 std::vector<Chunk *> chunks; 273 274 std::vector<SectionChunk *> resourceChunks; 275 276 // CodeView debug info sections. 277 std::vector<SectionChunk *> debugChunks; 278 279 // Chunks containing symbol table indices of exception handlers. Only used for 280 // 32-bit x86. 281 std::vector<SectionChunk *> sxDataChunks; 282 283 // Chunks containing symbol table indices of address taken symbols and longjmp 284 // targets. These are not linked into the final binary when /guard:cf is set. 285 std::vector<SectionChunk *> guardFidChunks; 286 std::vector<SectionChunk *> guardLJmpChunks; 287 288 // This vector contains a list of all symbols defined or referenced by this 289 // file. They are indexed such that you can get a Symbol by symbol 290 // index. Nonexistent indices (which are occupied by auxiliary 291 // symbols in the real symbol table) are filled with null pointers. 292 std::vector<Symbol *> symbols; 293 294 // This vector contains the same chunks as Chunks, but they are 295 // indexed such that you can get a SectionChunk by section index. 296 // Nonexistent section indices are filled with null pointers. 297 // (Because section number is 1-based, the first slot is always a 298 // null pointer.) This vector is only valid during initialization. 299 std::vector<SectionChunk *> sparseChunks; 300 301 DWARFCache *dwarf = nullptr; 302 }; 303 304 // This is a PDB type server dependency, that is not a input file per se, but 305 // needs to be treated like one. Such files are discovered from the debug type 306 // stream. 307 class PDBInputFile : public InputFile { 308 public: 309 explicit PDBInputFile(MemoryBufferRef m); 310 ~PDBInputFile(); 311 static bool classof(const InputFile *f) { return f->kind() == PDBKind; } 312 void parse() override; 313 314 static void enqueue(StringRef path, ObjFile *fromFile); 315 316 static PDBInputFile *findFromRecordPath(StringRef path, ObjFile *fromFile); 317 318 static std::map<std::string, PDBInputFile *> instances; 319 320 // Record possible errors while opening the PDB file 321 llvm::Optional<Error> loadErr; 322 323 // This is the actual interface to the PDB (if it was opened successfully) 324 std::unique_ptr<llvm::pdb::NativeSession> session; 325 326 // If the PDB has a .debug$T stream, this tells how it will be handled. 327 TpiSource *debugTypesObj = nullptr; 328 }; 329 330 // This type represents import library members that contain DLL names 331 // and symbols exported from the DLLs. See Microsoft PE/COFF spec. 7 332 // for details about the format. 333 class ImportFile : public InputFile { 334 public: 335 explicit ImportFile(MemoryBufferRef m) : InputFile(ImportKind, m) {} 336 337 static bool classof(const InputFile *f) { return f->kind() == ImportKind; } 338 339 static std::vector<ImportFile *> instances; 340 341 Symbol *impSym = nullptr; 342 Symbol *thunkSym = nullptr; 343 std::string dllName; 344 345 private: 346 void parse() override; 347 348 public: 349 StringRef externalName; 350 const coff_import_header *hdr; 351 Chunk *location = nullptr; 352 353 // We want to eliminate dllimported symbols if no one actually refers them. 354 // These "Live" bits are used to keep track of which import library members 355 // are actually in use. 356 // 357 // If the Live bit is turned off by MarkLive, Writer will ignore dllimported 358 // symbols provided by this import library member. We also track whether the 359 // imported symbol is used separately from whether the thunk is used in order 360 // to avoid creating unnecessary thunks. 361 bool live = !config->doGC; 362 bool thunkLive = !config->doGC; 363 }; 364 365 // Used for LTO. 366 class BitcodeFile : public InputFile { 367 public: 368 BitcodeFile(MemoryBufferRef mb, StringRef archiveName, 369 uint64_t offsetInArchive); 370 explicit BitcodeFile(MemoryBufferRef m, StringRef archiveName, 371 uint64_t offsetInArchive, 372 std::vector<Symbol *> &&symbols); 373 ~BitcodeFile(); 374 static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; } 375 ArrayRef<Symbol *> getSymbols() { return symbols; } 376 MachineTypes getMachineType() override; 377 static std::vector<BitcodeFile *> instances; 378 std::unique_ptr<llvm::lto::InputFile> obj; 379 380 private: 381 void parse() override; 382 383 std::vector<Symbol *> symbols; 384 }; 385 386 inline bool isBitcode(MemoryBufferRef mb) { 387 return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode; 388 } 389 390 std::string replaceThinLTOSuffix(StringRef path); 391 } // namespace coff 392 393 std::string toString(const coff::InputFile *file); 394 } // namespace lld 395 396 #endif 397