1 //===- InputFiles.h ---------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_COFF_INPUT_FILES_H 10 #define LLD_COFF_INPUT_FILES_H 11 12 #include "Config.h" 13 #include "lld/Common/LLVM.h" 14 #include "llvm/ADT/ArrayRef.h" 15 #include "llvm/ADT/DenseMap.h" 16 #include "llvm/ADT/DenseSet.h" 17 #include "llvm/ADT/StringSet.h" 18 #include "llvm/BinaryFormat/Magic.h" 19 #include "llvm/Object/Archive.h" 20 #include "llvm/Object/COFF.h" 21 #include "llvm/Support/StringSaver.h" 22 #include <memory> 23 #include <set> 24 #include <vector> 25 26 namespace llvm { 27 struct DILineInfo; 28 namespace pdb { 29 class DbiModuleDescriptorBuilder; 30 class NativeSession; 31 } 32 namespace lto { 33 class InputFile; 34 } 35 } 36 37 namespace lld { 38 class DWARFCache; 39 40 namespace coff { 41 class COFFLinkerContext; 42 43 std::vector<MemoryBufferRef> getArchiveMembers(llvm::object::Archive *file); 44 45 using llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN; 46 using llvm::COFF::MachineTypes; 47 using llvm::object::Archive; 48 using llvm::object::COFFObjectFile; 49 using llvm::object::COFFSymbolRef; 50 using llvm::object::coff_import_header; 51 using llvm::object::coff_section; 52 53 class Chunk; 54 class Defined; 55 class DefinedImportData; 56 class DefinedImportThunk; 57 class DefinedRegular; 58 class SectionChunk; 59 class Symbol; 60 class Undefined; 61 class TpiSource; 62 63 // The root class of input files. 64 class InputFile { 65 public: 66 enum Kind { 67 ArchiveKind, 68 ObjectKind, 69 LazyObjectKind, 70 PDBKind, 71 ImportKind, 72 BitcodeKind, 73 DLLKind 74 }; 75 Kind kind() const { return fileKind; } 76 virtual ~InputFile() {} 77 78 // Returns the filename. 79 StringRef getName() const { return mb.getBufferIdentifier(); } 80 81 // Reads a file (the constructor doesn't do that). 82 virtual void parse() = 0; 83 84 // Returns the CPU type this file was compiled to. 85 virtual MachineTypes getMachineType() { return IMAGE_FILE_MACHINE_UNKNOWN; } 86 87 MemoryBufferRef mb; 88 89 // An archive file name if this file is created from an archive. 90 StringRef parentName; 91 92 // Returns .drectve section contents if exist. 93 StringRef getDirectives() { return directives; } 94 95 COFFLinkerContext &ctx; 96 97 protected: 98 InputFile(COFFLinkerContext &c, Kind k, MemoryBufferRef m, bool lazy = false) 99 : mb(m), ctx(c), fileKind(k), lazy(lazy) {} 100 101 StringRef directives; 102 103 private: 104 const Kind fileKind; 105 106 public: 107 // True if this is a lazy ObjFile or BitcodeFile. 108 bool lazy = false; 109 }; 110 111 // .lib or .a file. 112 class ArchiveFile : public InputFile { 113 public: 114 explicit ArchiveFile(COFFLinkerContext &ctx, MemoryBufferRef m); 115 static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; } 116 void parse() override; 117 118 // Enqueues an archive member load for the given symbol. If we've already 119 // enqueued a load for the same archive member, this function does nothing, 120 // which ensures that we don't load the same member more than once. 121 void addMember(const Archive::Symbol &sym); 122 123 private: 124 std::unique_ptr<Archive> file; 125 llvm::DenseSet<uint64_t> seen; 126 }; 127 128 // .obj or .o file. This may be a member of an archive file. 129 class ObjFile : public InputFile { 130 public: 131 explicit ObjFile(COFFLinkerContext &ctx, MemoryBufferRef m, bool lazy = false) 132 : InputFile(ctx, ObjectKind, m, lazy) {} 133 static bool classof(const InputFile *f) { return f->kind() == ObjectKind; } 134 void parse() override; 135 void parseLazy(); 136 MachineTypes getMachineType() override; 137 ArrayRef<Chunk *> getChunks() { return chunks; } 138 ArrayRef<SectionChunk *> getDebugChunks() { return debugChunks; } 139 ArrayRef<SectionChunk *> getSXDataChunks() { return sxDataChunks; } 140 ArrayRef<SectionChunk *> getGuardFidChunks() { return guardFidChunks; } 141 ArrayRef<SectionChunk *> getGuardIATChunks() { return guardIATChunks; } 142 ArrayRef<SectionChunk *> getGuardLJmpChunks() { return guardLJmpChunks; } 143 ArrayRef<SectionChunk *> getGuardEHContChunks() { return guardEHContChunks; } 144 ArrayRef<Symbol *> getSymbols() { return symbols; } 145 146 MutableArrayRef<Symbol *> getMutableSymbols() { return symbols; } 147 148 ArrayRef<uint8_t> getDebugSection(StringRef secName); 149 150 // Returns a Symbol object for the symbolIndex'th symbol in the 151 // underlying object file. 152 Symbol *getSymbol(uint32_t symbolIndex) { 153 return symbols[symbolIndex]; 154 } 155 156 // Returns the underlying COFF file. 157 COFFObjectFile *getCOFFObj() { return coffObj.get(); } 158 159 // Add a symbol for a range extension thunk. Return the new symbol table 160 // index. This index can be used to modify a relocation. 161 uint32_t addRangeThunkSymbol(Symbol *thunk) { 162 symbols.push_back(thunk); 163 return symbols.size() - 1; 164 } 165 166 void includeResourceChunks(); 167 168 bool isResourceObjFile() const { return !resourceChunks.empty(); } 169 170 // Flags in the absolute @feat.00 symbol if it is present. These usually 171 // indicate if an object was compiled with certain security features enabled 172 // like stack guard, safeseh, /guard:cf, or other things. 173 uint32_t feat00Flags = 0; 174 175 // True if this object file is compatible with SEH. COFF-specific and 176 // x86-only. COFF spec 5.10.1. The .sxdata section. 177 bool hasSafeSEH() { return feat00Flags & 0x1; } 178 179 // True if this file was compiled with /guard:cf. 180 bool hasGuardCF() { return feat00Flags & 0x4800; } 181 182 // Pointer to the PDB module descriptor builder. Various debug info records 183 // will reference object files by "module index", which is here. Things like 184 // source files and section contributions are also recorded here. Will be null 185 // if we are not producing a PDB. 186 llvm::pdb::DbiModuleDescriptorBuilder *moduleDBI = nullptr; 187 188 const coff_section *addrsigSec = nullptr; 189 190 const coff_section *callgraphSec = nullptr; 191 192 // When using Microsoft precompiled headers, this is the PCH's key. 193 // The same key is used by both the precompiled object, and objects using the 194 // precompiled object. Any difference indicates out-of-date objects. 195 std::optional<uint32_t> pchSignature; 196 197 // Whether this file was compiled with /hotpatch. 198 bool hotPatchable = false; 199 200 // Whether the object was already merged into the final PDB. 201 bool mergedIntoPDB = false; 202 203 // If the OBJ has a .debug$T stream, this tells how it will be handled. 204 TpiSource *debugTypesObj = nullptr; 205 206 // The .debug$P or .debug$T section data if present. Empty otherwise. 207 ArrayRef<uint8_t> debugTypes; 208 209 std::optional<std::pair<StringRef, uint32_t>> 210 getVariableLocation(StringRef var); 211 212 std::optional<llvm::DILineInfo> getDILineInfo(uint32_t offset, 213 uint32_t sectionIndex); 214 215 private: 216 const coff_section* getSection(uint32_t i); 217 const coff_section *getSection(COFFSymbolRef sym) { 218 return getSection(sym.getSectionNumber()); 219 } 220 221 void enqueuePdbFile(StringRef path, ObjFile *fromFile); 222 223 void initializeChunks(); 224 void initializeSymbols(); 225 void initializeFlags(); 226 void initializeDependencies(); 227 228 SectionChunk * 229 readSection(uint32_t sectionNumber, 230 const llvm::object::coff_aux_section_definition *def, 231 StringRef leaderName); 232 233 void readAssociativeDefinition( 234 COFFSymbolRef coffSym, 235 const llvm::object::coff_aux_section_definition *def); 236 237 void readAssociativeDefinition( 238 COFFSymbolRef coffSym, 239 const llvm::object::coff_aux_section_definition *def, 240 uint32_t parentSection); 241 242 void recordPrevailingSymbolForMingw( 243 COFFSymbolRef coffSym, 244 llvm::DenseMap<StringRef, uint32_t> &prevailingSectionMap); 245 246 void maybeAssociateSEHForMingw( 247 COFFSymbolRef sym, const llvm::object::coff_aux_section_definition *def, 248 const llvm::DenseMap<StringRef, uint32_t> &prevailingSectionMap); 249 250 // Given a new symbol Sym with comdat selection Selection, if the new 251 // symbol is not (yet) Prevailing and the existing comdat leader set to 252 // Leader, emits a diagnostic if the new symbol and its selection doesn't 253 // match the existing symbol and its selection. If either old or new 254 // symbol have selection IMAGE_COMDAT_SELECT_LARGEST, Sym might replace 255 // the existing leader. In that case, Prevailing is set to true. 256 void 257 handleComdatSelection(COFFSymbolRef sym, llvm::COFF::COMDATType &selection, 258 bool &prevailing, DefinedRegular *leader, 259 const llvm::object::coff_aux_section_definition *def); 260 261 std::optional<Symbol *> 262 createDefined(COFFSymbolRef sym, 263 std::vector<const llvm::object::coff_aux_section_definition *> 264 &comdatDefs, 265 bool &prevailingComdat); 266 Symbol *createRegular(COFFSymbolRef sym); 267 Symbol *createUndefined(COFFSymbolRef sym); 268 269 std::unique_ptr<COFFObjectFile> coffObj; 270 271 // List of all chunks defined by this file. This includes both section 272 // chunks and non-section chunks for common symbols. 273 std::vector<Chunk *> chunks; 274 275 std::vector<SectionChunk *> resourceChunks; 276 277 // CodeView debug info sections. 278 std::vector<SectionChunk *> debugChunks; 279 280 // Chunks containing symbol table indices of exception handlers. Only used for 281 // 32-bit x86. 282 std::vector<SectionChunk *> sxDataChunks; 283 284 // Chunks containing symbol table indices of address taken symbols, address 285 // taken IAT entries, longjmp and ehcont targets. These are not linked into 286 // the final binary when /guard:cf is set. 287 std::vector<SectionChunk *> guardFidChunks; 288 std::vector<SectionChunk *> guardIATChunks; 289 std::vector<SectionChunk *> guardLJmpChunks; 290 std::vector<SectionChunk *> guardEHContChunks; 291 292 // This vector contains a list of all symbols defined or referenced by this 293 // file. They are indexed such that you can get a Symbol by symbol 294 // index. Nonexistent indices (which are occupied by auxiliary 295 // symbols in the real symbol table) are filled with null pointers. 296 std::vector<Symbol *> symbols; 297 298 // This vector contains the same chunks as Chunks, but they are 299 // indexed such that you can get a SectionChunk by section index. 300 // Nonexistent section indices are filled with null pointers. 301 // (Because section number is 1-based, the first slot is always a 302 // null pointer.) This vector is only valid during initialization. 303 std::vector<SectionChunk *> sparseChunks; 304 305 DWARFCache *dwarf = nullptr; 306 }; 307 308 // This is a PDB type server dependency, that is not a input file per se, but 309 // needs to be treated like one. Such files are discovered from the debug type 310 // stream. 311 class PDBInputFile : public InputFile { 312 public: 313 explicit PDBInputFile(COFFLinkerContext &ctx, MemoryBufferRef m); 314 ~PDBInputFile(); 315 static bool classof(const InputFile *f) { return f->kind() == PDBKind; } 316 void parse() override; 317 318 static PDBInputFile *findFromRecordPath(const COFFLinkerContext &ctx, 319 StringRef path, ObjFile *fromFile); 320 321 // Record possible errors while opening the PDB file 322 std::optional<std::string> loadErrorStr; 323 324 // This is the actual interface to the PDB (if it was opened successfully) 325 std::unique_ptr<llvm::pdb::NativeSession> session; 326 327 // If the PDB has a .debug$T stream, this tells how it will be handled. 328 TpiSource *debugTypesObj = nullptr; 329 }; 330 331 // This type represents import library members that contain DLL names 332 // and symbols exported from the DLLs. See Microsoft PE/COFF spec. 7 333 // for details about the format. 334 class ImportFile : public InputFile { 335 public: 336 explicit ImportFile(COFFLinkerContext &ctx, MemoryBufferRef m); 337 338 static bool classof(const InputFile *f) { return f->kind() == ImportKind; } 339 340 Symbol *impSym = nullptr; 341 Symbol *thunkSym = nullptr; 342 std::string dllName; 343 344 private: 345 void parse() override; 346 347 public: 348 StringRef externalName; 349 const coff_import_header *hdr; 350 Chunk *location = nullptr; 351 352 // We want to eliminate dllimported symbols if no one actually refers to them. 353 // These "Live" bits are used to keep track of which import library members 354 // are actually in use. 355 // 356 // If the Live bit is turned off by MarkLive, Writer will ignore dllimported 357 // symbols provided by this import library member. We also track whether the 358 // imported symbol is used separately from whether the thunk is used in order 359 // to avoid creating unnecessary thunks. 360 bool live; 361 bool thunkLive; 362 }; 363 364 // Used for LTO. 365 class BitcodeFile : public InputFile { 366 public: 367 explicit BitcodeFile(COFFLinkerContext &ctx, MemoryBufferRef mb, 368 StringRef archiveName, uint64_t offsetInArchive, 369 bool lazy); 370 ~BitcodeFile(); 371 static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; } 372 ArrayRef<Symbol *> getSymbols() { return symbols; } 373 MachineTypes getMachineType() override; 374 void parseLazy(); 375 std::unique_ptr<llvm::lto::InputFile> obj; 376 377 private: 378 void parse() override; 379 380 std::vector<Symbol *> symbols; 381 }; 382 383 // .dll file. MinGW only. 384 class DLLFile : public InputFile { 385 public: 386 explicit DLLFile(COFFLinkerContext &ctx, MemoryBufferRef m) 387 : InputFile(ctx, DLLKind, m) {} 388 static bool classof(const InputFile *f) { return f->kind() == DLLKind; } 389 void parse() override; 390 MachineTypes getMachineType() override; 391 392 struct Symbol { 393 StringRef dllName; 394 StringRef symbolName; 395 llvm::COFF::ImportNameType nameType; 396 llvm::COFF::ImportType importType; 397 }; 398 399 void makeImport(Symbol *s); 400 401 private: 402 std::unique_ptr<COFFObjectFile> coffObj; 403 llvm::StringSet<> seen; 404 }; 405 406 inline bool isBitcode(MemoryBufferRef mb) { 407 return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode; 408 } 409 410 std::string replaceThinLTOSuffix(StringRef path, StringRef suffix, 411 StringRef repl); 412 } // namespace coff 413 414 std::string toString(const coff::InputFile *file); 415 } // namespace lld 416 417 #endif 418