1 //===- InputFiles.h ---------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_COFF_INPUT_FILES_H 10 #define LLD_COFF_INPUT_FILES_H 11 12 #include "Config.h" 13 #include "lld/Common/LLVM.h" 14 #include "llvm/ADT/ArrayRef.h" 15 #include "llvm/ADT/DenseMap.h" 16 #include "llvm/ADT/DenseSet.h" 17 #include "llvm/ADT/StringSet.h" 18 #include "llvm/BinaryFormat/Magic.h" 19 #include "llvm/Object/Archive.h" 20 #include "llvm/Object/COFF.h" 21 #include "llvm/Support/StringSaver.h" 22 #include <memory> 23 #include <set> 24 #include <vector> 25 26 namespace llvm { 27 struct DILineInfo; 28 namespace pdb { 29 class DbiModuleDescriptorBuilder; 30 class NativeSession; 31 } 32 namespace lto { 33 class InputFile; 34 } 35 } 36 37 namespace lld { 38 class DWARFCache; 39 40 namespace coff { 41 class COFFLinkerContext; 42 43 std::vector<MemoryBufferRef> getArchiveMembers(llvm::object::Archive *file); 44 45 using llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN; 46 using llvm::COFF::MachineTypes; 47 using llvm::object::Archive; 48 using llvm::object::COFFObjectFile; 49 using llvm::object::COFFSymbolRef; 50 using llvm::object::coff_import_header; 51 using llvm::object::coff_section; 52 53 class Chunk; 54 class Defined; 55 class DefinedImportData; 56 class DefinedImportThunk; 57 class DefinedRegular; 58 class SectionChunk; 59 class Symbol; 60 class Undefined; 61 class TpiSource; 62 63 // The root class of input files. 64 class InputFile { 65 public: 66 enum Kind { 67 ArchiveKind, 68 ObjectKind, 69 LazyObjectKind, 70 PDBKind, 71 ImportKind, 72 BitcodeKind, 73 DLLKind 74 }; 75 Kind kind() const { return fileKind; } 76 virtual ~InputFile() {} 77 78 // Returns the filename. 79 StringRef getName() const { return mb.getBufferIdentifier(); } 80 81 // Reads a file (the constructor doesn't do that). 82 virtual void parse() = 0; 83 84 // Returns the CPU type this file was compiled to. 85 virtual MachineTypes getMachineType() { return IMAGE_FILE_MACHINE_UNKNOWN; } 86 87 MemoryBufferRef mb; 88 89 // An archive file name if this file is created from an archive. 90 StringRef parentName; 91 92 // Returns .drectve section contents if exist. 93 StringRef getDirectives() { return directives; } 94 95 COFFLinkerContext &ctx; 96 97 protected: 98 InputFile(COFFLinkerContext &c, Kind k, MemoryBufferRef m, bool lazy = false) 99 : mb(m), ctx(c), fileKind(k), lazy(lazy) {} 100 101 StringRef directives; 102 103 private: 104 const Kind fileKind; 105 106 public: 107 // True if this is a lazy ObjFile or BitcodeFile. 108 bool lazy = false; 109 }; 110 111 // .lib or .a file. 112 class ArchiveFile : public InputFile { 113 public: 114 explicit ArchiveFile(COFFLinkerContext &ctx, MemoryBufferRef m); 115 static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; } 116 void parse() override; 117 118 // Enqueues an archive member load for the given symbol. If we've already 119 // enqueued a load for the same archive member, this function does nothing, 120 // which ensures that we don't load the same member more than once. 121 void addMember(const Archive::Symbol &sym); 122 123 private: 124 std::unique_ptr<Archive> file; 125 llvm::DenseSet<uint64_t> seen; 126 }; 127 128 // .obj or .o file. This may be a member of an archive file. 129 class ObjFile : public InputFile { 130 public: 131 explicit ObjFile(COFFLinkerContext &ctx, MemoryBufferRef m, bool lazy = false) 132 : InputFile(ctx, ObjectKind, m, lazy) {} 133 static bool classof(const InputFile *f) { return f->kind() == ObjectKind; } 134 void parse() override; 135 void parseLazy(); 136 MachineTypes getMachineType() override; 137 ArrayRef<Chunk *> getChunks() { return chunks; } 138 ArrayRef<SectionChunk *> getDebugChunks() { return debugChunks; } 139 ArrayRef<SectionChunk *> getSXDataChunks() { return sxDataChunks; } 140 ArrayRef<SectionChunk *> getGuardFidChunks() { return guardFidChunks; } 141 ArrayRef<SectionChunk *> getGuardIATChunks() { return guardIATChunks; } 142 ArrayRef<SectionChunk *> getGuardLJmpChunks() { return guardLJmpChunks; } 143 ArrayRef<SectionChunk *> getGuardEHContChunks() { return guardEHContChunks; } 144 ArrayRef<Symbol *> getSymbols() { return symbols; } 145 146 MutableArrayRef<Symbol *> getMutableSymbols() { return symbols; } 147 148 ArrayRef<uint8_t> getDebugSection(StringRef secName); 149 150 // Returns a Symbol object for the symbolIndex'th symbol in the 151 // underlying object file. 152 Symbol *getSymbol(uint32_t symbolIndex) { 153 return symbols[symbolIndex]; 154 } 155 156 // Returns the underlying COFF file. 157 COFFObjectFile *getCOFFObj() { return coffObj.get(); } 158 159 // Add a symbol for a range extension thunk. Return the new symbol table 160 // index. This index can be used to modify a relocation. 161 uint32_t addRangeThunkSymbol(Symbol *thunk) { 162 symbols.push_back(thunk); 163 return symbols.size() - 1; 164 } 165 166 void includeResourceChunks(); 167 168 bool isResourceObjFile() const { return !resourceChunks.empty(); } 169 170 // Flags in the absolute @feat.00 symbol if it is present. These usually 171 // indicate if an object was compiled with certain security features enabled 172 // like stack guard, safeseh, /guard:cf, or other things. 173 uint32_t feat00Flags = 0; 174 175 // True if this object file is compatible with SEH. COFF-specific and 176 // x86-only. COFF spec 5.10.1. The .sxdata section. 177 bool hasSafeSEH() { return feat00Flags & 0x1; } 178 179 // True if this file was compiled with /guard:cf. 180 bool hasGuardCF() { return feat00Flags & 0x800; } 181 182 // True if this file was compiled with /guard:ehcont. 183 bool hasGuardEHCont() { return feat00Flags & 0x4000; } 184 185 // Pointer to the PDB module descriptor builder. Various debug info records 186 // will reference object files by "module index", which is here. Things like 187 // source files and section contributions are also recorded here. Will be null 188 // if we are not producing a PDB. 189 llvm::pdb::DbiModuleDescriptorBuilder *moduleDBI = nullptr; 190 191 const coff_section *addrsigSec = nullptr; 192 193 const coff_section *callgraphSec = nullptr; 194 195 // When using Microsoft precompiled headers, this is the PCH's key. 196 // The same key is used by both the precompiled object, and objects using the 197 // precompiled object. Any difference indicates out-of-date objects. 198 std::optional<uint32_t> pchSignature; 199 200 // Whether this file was compiled with /hotpatch. 201 bool hotPatchable = false; 202 203 // Whether the object was already merged into the final PDB. 204 bool mergedIntoPDB = false; 205 206 // If the OBJ has a .debug$T stream, this tells how it will be handled. 207 TpiSource *debugTypesObj = nullptr; 208 209 // The .debug$P or .debug$T section data if present. Empty otherwise. 210 ArrayRef<uint8_t> debugTypes; 211 212 std::optional<std::pair<StringRef, uint32_t>> 213 getVariableLocation(StringRef var); 214 215 std::optional<llvm::DILineInfo> getDILineInfo(uint32_t offset, 216 uint32_t sectionIndex); 217 218 private: 219 const coff_section* getSection(uint32_t i); 220 const coff_section *getSection(COFFSymbolRef sym) { 221 return getSection(sym.getSectionNumber()); 222 } 223 224 void enqueuePdbFile(StringRef path, ObjFile *fromFile); 225 226 void initializeChunks(); 227 void initializeSymbols(); 228 void initializeFlags(); 229 void initializeDependencies(); 230 void initializeECThunks(); 231 232 SectionChunk * 233 readSection(uint32_t sectionNumber, 234 const llvm::object::coff_aux_section_definition *def, 235 StringRef leaderName); 236 237 void readAssociativeDefinition( 238 COFFSymbolRef coffSym, 239 const llvm::object::coff_aux_section_definition *def); 240 241 void readAssociativeDefinition( 242 COFFSymbolRef coffSym, 243 const llvm::object::coff_aux_section_definition *def, 244 uint32_t parentSection); 245 246 void recordPrevailingSymbolForMingw( 247 COFFSymbolRef coffSym, 248 llvm::DenseMap<StringRef, uint32_t> &prevailingSectionMap); 249 250 void maybeAssociateSEHForMingw( 251 COFFSymbolRef sym, const llvm::object::coff_aux_section_definition *def, 252 const llvm::DenseMap<StringRef, uint32_t> &prevailingSectionMap); 253 254 // Given a new symbol Sym with comdat selection Selection, if the new 255 // symbol is not (yet) Prevailing and the existing comdat leader set to 256 // Leader, emits a diagnostic if the new symbol and its selection doesn't 257 // match the existing symbol and its selection. If either old or new 258 // symbol have selection IMAGE_COMDAT_SELECT_LARGEST, Sym might replace 259 // the existing leader. In that case, Prevailing is set to true. 260 void 261 handleComdatSelection(COFFSymbolRef sym, llvm::COFF::COMDATType &selection, 262 bool &prevailing, DefinedRegular *leader, 263 const llvm::object::coff_aux_section_definition *def); 264 265 std::optional<Symbol *> 266 createDefined(COFFSymbolRef sym, 267 std::vector<const llvm::object::coff_aux_section_definition *> 268 &comdatDefs, 269 bool &prevailingComdat); 270 Symbol *createRegular(COFFSymbolRef sym); 271 Symbol *createUndefined(COFFSymbolRef sym); 272 273 std::unique_ptr<COFFObjectFile> coffObj; 274 275 // List of all chunks defined by this file. This includes both section 276 // chunks and non-section chunks for common symbols. 277 std::vector<Chunk *> chunks; 278 279 std::vector<SectionChunk *> resourceChunks; 280 281 // CodeView debug info sections. 282 std::vector<SectionChunk *> debugChunks; 283 284 // Chunks containing symbol table indices of exception handlers. Only used for 285 // 32-bit x86. 286 std::vector<SectionChunk *> sxDataChunks; 287 288 // Chunks containing symbol table indices of address taken symbols, address 289 // taken IAT entries, longjmp and ehcont targets. These are not linked into 290 // the final binary when /guard:cf is set. 291 std::vector<SectionChunk *> guardFidChunks; 292 std::vector<SectionChunk *> guardIATChunks; 293 std::vector<SectionChunk *> guardLJmpChunks; 294 std::vector<SectionChunk *> guardEHContChunks; 295 296 std::vector<SectionChunk *> hybmpChunks; 297 298 // This vector contains a list of all symbols defined or referenced by this 299 // file. They are indexed such that you can get a Symbol by symbol 300 // index. Nonexistent indices (which are occupied by auxiliary 301 // symbols in the real symbol table) are filled with null pointers. 302 std::vector<Symbol *> symbols; 303 304 // This vector contains the same chunks as Chunks, but they are 305 // indexed such that you can get a SectionChunk by section index. 306 // Nonexistent section indices are filled with null pointers. 307 // (Because section number is 1-based, the first slot is always a 308 // null pointer.) This vector is only valid during initialization. 309 std::vector<SectionChunk *> sparseChunks; 310 311 DWARFCache *dwarf = nullptr; 312 }; 313 314 // This is a PDB type server dependency, that is not a input file per se, but 315 // needs to be treated like one. Such files are discovered from the debug type 316 // stream. 317 class PDBInputFile : public InputFile { 318 public: 319 explicit PDBInputFile(COFFLinkerContext &ctx, MemoryBufferRef m); 320 ~PDBInputFile(); 321 static bool classof(const InputFile *f) { return f->kind() == PDBKind; } 322 void parse() override; 323 324 static PDBInputFile *findFromRecordPath(const COFFLinkerContext &ctx, 325 StringRef path, ObjFile *fromFile); 326 327 // Record possible errors while opening the PDB file 328 std::optional<std::string> loadErrorStr; 329 330 // This is the actual interface to the PDB (if it was opened successfully) 331 std::unique_ptr<llvm::pdb::NativeSession> session; 332 333 // If the PDB has a .debug$T stream, this tells how it will be handled. 334 TpiSource *debugTypesObj = nullptr; 335 }; 336 337 // This type represents import library members that contain DLL names 338 // and symbols exported from the DLLs. See Microsoft PE/COFF spec. 7 339 // for details about the format. 340 class ImportFile : public InputFile { 341 public: 342 explicit ImportFile(COFFLinkerContext &ctx, MemoryBufferRef m); 343 344 static bool classof(const InputFile *f) { return f->kind() == ImportKind; } 345 346 Symbol *impSym = nullptr; 347 Symbol *thunkSym = nullptr; 348 std::string dllName; 349 350 private: 351 void parse() override; 352 353 public: 354 StringRef externalName; 355 const coff_import_header *hdr; 356 Chunk *location = nullptr; 357 358 // We want to eliminate dllimported symbols if no one actually refers to them. 359 // These "Live" bits are used to keep track of which import library members 360 // are actually in use. 361 // 362 // If the Live bit is turned off by MarkLive, Writer will ignore dllimported 363 // symbols provided by this import library member. We also track whether the 364 // imported symbol is used separately from whether the thunk is used in order 365 // to avoid creating unnecessary thunks. 366 bool live; 367 bool thunkLive; 368 }; 369 370 // Used for LTO. 371 class BitcodeFile : public InputFile { 372 public: 373 explicit BitcodeFile(COFFLinkerContext &ctx, MemoryBufferRef mb, 374 StringRef archiveName, uint64_t offsetInArchive, 375 bool lazy); 376 ~BitcodeFile(); 377 static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; } 378 ArrayRef<Symbol *> getSymbols() { return symbols; } 379 MachineTypes getMachineType() override; 380 void parseLazy(); 381 std::unique_ptr<llvm::lto::InputFile> obj; 382 383 private: 384 void parse() override; 385 386 std::vector<Symbol *> symbols; 387 }; 388 389 // .dll file. MinGW only. 390 class DLLFile : public InputFile { 391 public: 392 explicit DLLFile(COFFLinkerContext &ctx, MemoryBufferRef m) 393 : InputFile(ctx, DLLKind, m) {} 394 static bool classof(const InputFile *f) { return f->kind() == DLLKind; } 395 void parse() override; 396 MachineTypes getMachineType() override; 397 398 struct Symbol { 399 StringRef dllName; 400 StringRef symbolName; 401 llvm::COFF::ImportNameType nameType; 402 llvm::COFF::ImportType importType; 403 }; 404 405 void makeImport(Symbol *s); 406 407 private: 408 std::unique_ptr<COFFObjectFile> coffObj; 409 llvm::StringSet<> seen; 410 }; 411 412 inline bool isBitcode(MemoryBufferRef mb) { 413 return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode; 414 } 415 416 std::string replaceThinLTOSuffix(StringRef path, StringRef suffix, 417 StringRef repl); 418 } // namespace coff 419 420 std::string toString(const coff::InputFile *file); 421 } // namespace lld 422 423 #endif 424