1 //===- InputFiles.h ---------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_COFF_INPUT_FILES_H 10 #define LLD_COFF_INPUT_FILES_H 11 12 #include "Config.h" 13 #include "lld/Common/LLVM.h" 14 #include "llvm/ADT/ArrayRef.h" 15 #include "llvm/ADT/DenseMap.h" 16 #include "llvm/ADT/DenseSet.h" 17 #include "llvm/ADT/StringSet.h" 18 #include "llvm/BinaryFormat/Magic.h" 19 #include "llvm/Object/Archive.h" 20 #include "llvm/Object/COFF.h" 21 #include "llvm/Support/StringSaver.h" 22 #include <memory> 23 #include <set> 24 #include <vector> 25 26 namespace llvm { 27 struct DILineInfo; 28 namespace pdb { 29 class DbiModuleDescriptorBuilder; 30 class NativeSession; 31 } 32 namespace lto { 33 class InputFile; 34 } 35 } 36 37 namespace lld { 38 class DWARFCache; 39 40 namespace coff { 41 class COFFLinkerContext; 42 43 std::vector<MemoryBufferRef> getArchiveMembers(llvm::object::Archive *file); 44 45 using llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN; 46 using llvm::COFF::MachineTypes; 47 using llvm::object::Archive; 48 using llvm::object::COFFObjectFile; 49 using llvm::object::COFFSymbolRef; 50 using llvm::object::coff_import_header; 51 using llvm::object::coff_section; 52 53 class Chunk; 54 class Defined; 55 class DefinedImportData; 56 class DefinedImportThunk; 57 class DefinedRegular; 58 class SectionChunk; 59 class Symbol; 60 class Undefined; 61 class TpiSource; 62 63 // The root class of input files. 64 class InputFile { 65 public: 66 enum Kind { 67 ArchiveKind, 68 ObjectKind, 69 LazyObjectKind, 70 PDBKind, 71 ImportKind, 72 BitcodeKind, 73 DLLKind 74 }; 75 Kind kind() const { return fileKind; } 76 virtual ~InputFile() {} 77 78 // Returns the filename. 79 StringRef getName() const { return mb.getBufferIdentifier(); } 80 81 // Reads a file (the constructor doesn't do that). 82 virtual void parse() = 0; 83 84 // Returns the CPU type this file was compiled to. 85 virtual MachineTypes getMachineType() { return IMAGE_FILE_MACHINE_UNKNOWN; } 86 87 MemoryBufferRef mb; 88 89 // An archive file name if this file is created from an archive. 90 StringRef parentName; 91 92 // Returns .drectve section contents if exist. 93 StringRef getDirectives() { return directives; } 94 95 COFFLinkerContext &ctx; 96 97 protected: 98 InputFile(COFFLinkerContext &c, Kind k, MemoryBufferRef m, bool lazy = false) 99 : mb(m), ctx(c), fileKind(k), lazy(lazy) {} 100 101 StringRef directives; 102 103 private: 104 const Kind fileKind; 105 106 public: 107 // True if this is a lazy ObjFile or BitcodeFile. 108 bool lazy = false; 109 }; 110 111 // .lib or .a file. 112 class ArchiveFile : public InputFile { 113 public: 114 explicit ArchiveFile(COFFLinkerContext &ctx, MemoryBufferRef m); 115 static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; } 116 void parse() override; 117 118 // Enqueues an archive member load for the given symbol. If we've already 119 // enqueued a load for the same archive member, this function does nothing, 120 // which ensures that we don't load the same member more than once. 121 void addMember(const Archive::Symbol &sym); 122 123 private: 124 std::unique_ptr<Archive> file; 125 llvm::DenseSet<uint64_t> seen; 126 }; 127 128 // .obj or .o file. This may be a member of an archive file. 129 class ObjFile : public InputFile { 130 public: 131 explicit ObjFile(COFFLinkerContext &ctx, MemoryBufferRef m, bool lazy = false) 132 : InputFile(ctx, ObjectKind, m, lazy) {} 133 static bool classof(const InputFile *f) { return f->kind() == ObjectKind; } 134 void parse() override; 135 void parseLazy(); 136 MachineTypes getMachineType() override; 137 ArrayRef<Chunk *> getChunks() { return chunks; } 138 ArrayRef<SectionChunk *> getDebugChunks() { return debugChunks; } 139 ArrayRef<SectionChunk *> getSXDataChunks() { return sxDataChunks; } 140 ArrayRef<SectionChunk *> getGuardFidChunks() { return guardFidChunks; } 141 ArrayRef<SectionChunk *> getGuardIATChunks() { return guardIATChunks; } 142 ArrayRef<SectionChunk *> getGuardLJmpChunks() { return guardLJmpChunks; } 143 ArrayRef<SectionChunk *> getGuardEHContChunks() { return guardEHContChunks; } 144 ArrayRef<Symbol *> getSymbols() { return symbols; } 145 146 MutableArrayRef<Symbol *> getMutableSymbols() { return symbols; } 147 148 ArrayRef<uint8_t> getDebugSection(StringRef secName); 149 150 // Returns a Symbol object for the symbolIndex'th symbol in the 151 // underlying object file. 152 Symbol *getSymbol(uint32_t symbolIndex) { 153 return symbols[symbolIndex]; 154 } 155 156 // Returns the underlying COFF file. 157 COFFObjectFile *getCOFFObj() { return coffObj.get(); } 158 159 // Add a symbol for a range extension thunk. Return the new symbol table 160 // index. This index can be used to modify a relocation. 161 uint32_t addRangeThunkSymbol(Symbol *thunk) { 162 symbols.push_back(thunk); 163 return symbols.size() - 1; 164 } 165 166 void includeResourceChunks(); 167 168 bool isResourceObjFile() const { return !resourceChunks.empty(); } 169 170 // Flags in the absolute @feat.00 symbol if it is present. These usually 171 // indicate if an object was compiled with certain security features enabled 172 // like stack guard, safeseh, /guard:cf, or other things. 173 uint32_t feat00Flags = 0; 174 175 // True if this object file is compatible with SEH. COFF-specific and 176 // x86-only. COFF spec 5.10.1. The .sxdata section. 177 bool hasSafeSEH() { return feat00Flags & 0x1; } 178 179 // True if this file was compiled with /guard:cf. 180 bool hasGuardCF() { return feat00Flags & 0x800; } 181 182 // True if this file was compiled with /guard:ehcont. 183 bool hasGuardEHCont() { return feat00Flags & 0x4000; } 184 185 // Pointer to the PDB module descriptor builder. Various debug info records 186 // will reference object files by "module index", which is here. Things like 187 // source files and section contributions are also recorded here. Will be null 188 // if we are not producing a PDB. 189 llvm::pdb::DbiModuleDescriptorBuilder *moduleDBI = nullptr; 190 191 const coff_section *addrsigSec = nullptr; 192 193 const coff_section *callgraphSec = nullptr; 194 195 // When using Microsoft precompiled headers, this is the PCH's key. 196 // The same key is used by both the precompiled object, and objects using the 197 // precompiled object. Any difference indicates out-of-date objects. 198 std::optional<uint32_t> pchSignature; 199 200 // Whether this file was compiled with /hotpatch. 201 bool hotPatchable = false; 202 203 // Whether the object was already merged into the final PDB. 204 bool mergedIntoPDB = false; 205 206 // If the OBJ has a .debug$T stream, this tells how it will be handled. 207 TpiSource *debugTypesObj = nullptr; 208 209 // The .debug$P or .debug$T section data if present. Empty otherwise. 210 ArrayRef<uint8_t> debugTypes; 211 212 std::optional<std::pair<StringRef, uint32_t>> 213 getVariableLocation(StringRef var); 214 215 std::optional<llvm::DILineInfo> getDILineInfo(uint32_t offset, 216 uint32_t sectionIndex); 217 218 private: 219 const coff_section* getSection(uint32_t i); 220 const coff_section *getSection(COFFSymbolRef sym) { 221 return getSection(sym.getSectionNumber()); 222 } 223 224 void enqueuePdbFile(StringRef path, ObjFile *fromFile); 225 226 void initializeChunks(); 227 void initializeSymbols(); 228 void initializeFlags(); 229 void initializeDependencies(); 230 231 SectionChunk * 232 readSection(uint32_t sectionNumber, 233 const llvm::object::coff_aux_section_definition *def, 234 StringRef leaderName); 235 236 void readAssociativeDefinition( 237 COFFSymbolRef coffSym, 238 const llvm::object::coff_aux_section_definition *def); 239 240 void readAssociativeDefinition( 241 COFFSymbolRef coffSym, 242 const llvm::object::coff_aux_section_definition *def, 243 uint32_t parentSection); 244 245 void recordPrevailingSymbolForMingw( 246 COFFSymbolRef coffSym, 247 llvm::DenseMap<StringRef, uint32_t> &prevailingSectionMap); 248 249 void maybeAssociateSEHForMingw( 250 COFFSymbolRef sym, const llvm::object::coff_aux_section_definition *def, 251 const llvm::DenseMap<StringRef, uint32_t> &prevailingSectionMap); 252 253 // Given a new symbol Sym with comdat selection Selection, if the new 254 // symbol is not (yet) Prevailing and the existing comdat leader set to 255 // Leader, emits a diagnostic if the new symbol and its selection doesn't 256 // match the existing symbol and its selection. If either old or new 257 // symbol have selection IMAGE_COMDAT_SELECT_LARGEST, Sym might replace 258 // the existing leader. In that case, Prevailing is set to true. 259 void 260 handleComdatSelection(COFFSymbolRef sym, llvm::COFF::COMDATType &selection, 261 bool &prevailing, DefinedRegular *leader, 262 const llvm::object::coff_aux_section_definition *def); 263 264 std::optional<Symbol *> 265 createDefined(COFFSymbolRef sym, 266 std::vector<const llvm::object::coff_aux_section_definition *> 267 &comdatDefs, 268 bool &prevailingComdat); 269 Symbol *createRegular(COFFSymbolRef sym); 270 Symbol *createUndefined(COFFSymbolRef sym); 271 272 std::unique_ptr<COFFObjectFile> coffObj; 273 274 // List of all chunks defined by this file. This includes both section 275 // chunks and non-section chunks for common symbols. 276 std::vector<Chunk *> chunks; 277 278 std::vector<SectionChunk *> resourceChunks; 279 280 // CodeView debug info sections. 281 std::vector<SectionChunk *> debugChunks; 282 283 // Chunks containing symbol table indices of exception handlers. Only used for 284 // 32-bit x86. 285 std::vector<SectionChunk *> sxDataChunks; 286 287 // Chunks containing symbol table indices of address taken symbols, address 288 // taken IAT entries, longjmp and ehcont targets. These are not linked into 289 // the final binary when /guard:cf is set. 290 std::vector<SectionChunk *> guardFidChunks; 291 std::vector<SectionChunk *> guardIATChunks; 292 std::vector<SectionChunk *> guardLJmpChunks; 293 std::vector<SectionChunk *> guardEHContChunks; 294 295 // This vector contains a list of all symbols defined or referenced by this 296 // file. They are indexed such that you can get a Symbol by symbol 297 // index. Nonexistent indices (which are occupied by auxiliary 298 // symbols in the real symbol table) are filled with null pointers. 299 std::vector<Symbol *> symbols; 300 301 // This vector contains the same chunks as Chunks, but they are 302 // indexed such that you can get a SectionChunk by section index. 303 // Nonexistent section indices are filled with null pointers. 304 // (Because section number is 1-based, the first slot is always a 305 // null pointer.) This vector is only valid during initialization. 306 std::vector<SectionChunk *> sparseChunks; 307 308 DWARFCache *dwarf = nullptr; 309 }; 310 311 // This is a PDB type server dependency, that is not a input file per se, but 312 // needs to be treated like one. Such files are discovered from the debug type 313 // stream. 314 class PDBInputFile : public InputFile { 315 public: 316 explicit PDBInputFile(COFFLinkerContext &ctx, MemoryBufferRef m); 317 ~PDBInputFile(); 318 static bool classof(const InputFile *f) { return f->kind() == PDBKind; } 319 void parse() override; 320 321 static PDBInputFile *findFromRecordPath(const COFFLinkerContext &ctx, 322 StringRef path, ObjFile *fromFile); 323 324 // Record possible errors while opening the PDB file 325 std::optional<std::string> loadErrorStr; 326 327 // This is the actual interface to the PDB (if it was opened successfully) 328 std::unique_ptr<llvm::pdb::NativeSession> session; 329 330 // If the PDB has a .debug$T stream, this tells how it will be handled. 331 TpiSource *debugTypesObj = nullptr; 332 }; 333 334 // This type represents import library members that contain DLL names 335 // and symbols exported from the DLLs. See Microsoft PE/COFF spec. 7 336 // for details about the format. 337 class ImportFile : public InputFile { 338 public: 339 explicit ImportFile(COFFLinkerContext &ctx, MemoryBufferRef m); 340 341 static bool classof(const InputFile *f) { return f->kind() == ImportKind; } 342 343 Symbol *impSym = nullptr; 344 Symbol *thunkSym = nullptr; 345 std::string dllName; 346 347 private: 348 void parse() override; 349 350 public: 351 StringRef externalName; 352 const coff_import_header *hdr; 353 Chunk *location = nullptr; 354 355 // We want to eliminate dllimported symbols if no one actually refers to them. 356 // These "Live" bits are used to keep track of which import library members 357 // are actually in use. 358 // 359 // If the Live bit is turned off by MarkLive, Writer will ignore dllimported 360 // symbols provided by this import library member. We also track whether the 361 // imported symbol is used separately from whether the thunk is used in order 362 // to avoid creating unnecessary thunks. 363 bool live; 364 bool thunkLive; 365 }; 366 367 // Used for LTO. 368 class BitcodeFile : public InputFile { 369 public: 370 explicit BitcodeFile(COFFLinkerContext &ctx, MemoryBufferRef mb, 371 StringRef archiveName, uint64_t offsetInArchive, 372 bool lazy); 373 ~BitcodeFile(); 374 static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; } 375 ArrayRef<Symbol *> getSymbols() { return symbols; } 376 MachineTypes getMachineType() override; 377 void parseLazy(); 378 std::unique_ptr<llvm::lto::InputFile> obj; 379 380 private: 381 void parse() override; 382 383 std::vector<Symbol *> symbols; 384 }; 385 386 // .dll file. MinGW only. 387 class DLLFile : public InputFile { 388 public: 389 explicit DLLFile(COFFLinkerContext &ctx, MemoryBufferRef m) 390 : InputFile(ctx, DLLKind, m) {} 391 static bool classof(const InputFile *f) { return f->kind() == DLLKind; } 392 void parse() override; 393 MachineTypes getMachineType() override; 394 395 struct Symbol { 396 StringRef dllName; 397 StringRef symbolName; 398 llvm::COFF::ImportNameType nameType; 399 llvm::COFF::ImportType importType; 400 }; 401 402 void makeImport(Symbol *s); 403 404 private: 405 std::unique_ptr<COFFObjectFile> coffObj; 406 llvm::StringSet<> seen; 407 }; 408 409 inline bool isBitcode(MemoryBufferRef mb) { 410 return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode; 411 } 412 413 std::string replaceThinLTOSuffix(StringRef path, StringRef suffix, 414 StringRef repl); 415 } // namespace coff 416 417 std::string toString(const coff::InputFile *file); 418 } // namespace lld 419 420 #endif 421