1 //===- InputFiles.h ---------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_COFF_INPUT_FILES_H 10 #define LLD_COFF_INPUT_FILES_H 11 12 #include "Config.h" 13 #include "lld/Common/LLVM.h" 14 #include "llvm/ADT/ArrayRef.h" 15 #include "llvm/ADT/DenseMap.h" 16 #include "llvm/ADT/DenseSet.h" 17 #include "llvm/BinaryFormat/Magic.h" 18 #include "llvm/Object/Archive.h" 19 #include "llvm/Object/COFF.h" 20 #include "llvm/Support/StringSaver.h" 21 #include <memory> 22 #include <set> 23 #include <vector> 24 25 namespace llvm { 26 struct DILineInfo; 27 namespace pdb { 28 class DbiModuleDescriptorBuilder; 29 class NativeSession; 30 } 31 namespace lto { 32 class InputFile; 33 } 34 } 35 36 namespace lld { 37 class DWARFCache; 38 39 namespace coff { 40 41 std::vector<MemoryBufferRef> getArchiveMembers(llvm::object::Archive *file); 42 43 using llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN; 44 using llvm::COFF::MachineTypes; 45 using llvm::object::Archive; 46 using llvm::object::COFFObjectFile; 47 using llvm::object::COFFSymbolRef; 48 using llvm::object::coff_import_header; 49 using llvm::object::coff_section; 50 51 class Chunk; 52 class Defined; 53 class DefinedImportData; 54 class DefinedImportThunk; 55 class DefinedRegular; 56 class SectionChunk; 57 class Symbol; 58 class Undefined; 59 class TpiSource; 60 61 // The root class of input files. 62 class InputFile { 63 public: 64 enum Kind { 65 ArchiveKind, 66 ObjectKind, 67 LazyObjectKind, 68 PDBKind, 69 ImportKind, 70 BitcodeKind 71 }; 72 Kind kind() const { return fileKind; } 73 virtual ~InputFile() {} 74 75 // Returns the filename. 76 StringRef getName() const { return mb.getBufferIdentifier(); } 77 78 // Reads a file (the constructor doesn't do that). 79 virtual void parse() = 0; 80 81 // Returns the CPU type this file was compiled to. 82 virtual MachineTypes getMachineType() { return IMAGE_FILE_MACHINE_UNKNOWN; } 83 84 MemoryBufferRef mb; 85 86 // An archive file name if this file is created from an archive. 87 StringRef parentName; 88 89 // Returns .drectve section contents if exist. 90 StringRef getDirectives() { return directives; } 91 92 protected: 93 InputFile(Kind k, MemoryBufferRef m) : mb(m), fileKind(k) {} 94 95 StringRef directives; 96 97 private: 98 const Kind fileKind; 99 }; 100 101 // .lib or .a file. 102 class ArchiveFile : public InputFile { 103 public: 104 explicit ArchiveFile(MemoryBufferRef m); 105 static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; } 106 void parse() override; 107 108 // Enqueues an archive member load for the given symbol. If we've already 109 // enqueued a load for the same archive member, this function does nothing, 110 // which ensures that we don't load the same member more than once. 111 void addMember(const Archive::Symbol &sym); 112 113 private: 114 std::unique_ptr<Archive> file; 115 llvm::DenseSet<uint64_t> seen; 116 }; 117 118 // .obj or .o file between -start-lib and -end-lib. 119 class LazyObjFile : public InputFile { 120 public: 121 explicit LazyObjFile(MemoryBufferRef m) : InputFile(LazyObjectKind, m) {} 122 static bool classof(const InputFile *f) { 123 return f->kind() == LazyObjectKind; 124 } 125 // Makes this object file part of the link. 126 void fetch(); 127 // Adds the symbols in this file to the symbol table as LazyObject symbols. 128 void parse() override; 129 130 private: 131 std::vector<Symbol *> symbols; 132 }; 133 134 // .obj or .o file. This may be a member of an archive file. 135 class ObjFile : public InputFile { 136 public: 137 explicit ObjFile(MemoryBufferRef m) : InputFile(ObjectKind, m) {} 138 explicit ObjFile(MemoryBufferRef m, std::vector<Symbol *> &&symbols) 139 : InputFile(ObjectKind, m), symbols(std::move(symbols)) {} 140 static bool classof(const InputFile *f) { return f->kind() == ObjectKind; } 141 void parse() override; 142 MachineTypes getMachineType() override; 143 ArrayRef<Chunk *> getChunks() { return chunks; } 144 ArrayRef<SectionChunk *> getDebugChunks() { return debugChunks; } 145 ArrayRef<SectionChunk *> getSXDataChunks() { return sxDataChunks; } 146 ArrayRef<SectionChunk *> getGuardFidChunks() { return guardFidChunks; } 147 ArrayRef<SectionChunk *> getGuardIATChunks() { return guardIATChunks; } 148 ArrayRef<SectionChunk *> getGuardLJmpChunks() { return guardLJmpChunks; } 149 ArrayRef<Symbol *> getSymbols() { return symbols; } 150 151 MutableArrayRef<Symbol *> getMutableSymbols() { return symbols; } 152 153 ArrayRef<uint8_t> getDebugSection(StringRef secName); 154 155 // Returns a Symbol object for the symbolIndex'th symbol in the 156 // underlying object file. 157 Symbol *getSymbol(uint32_t symbolIndex) { 158 return symbols[symbolIndex]; 159 } 160 161 // Returns the underlying COFF file. 162 COFFObjectFile *getCOFFObj() { return coffObj.get(); } 163 164 // Add a symbol for a range extension thunk. Return the new symbol table 165 // index. This index can be used to modify a relocation. 166 uint32_t addRangeThunkSymbol(Symbol *thunk) { 167 symbols.push_back(thunk); 168 return symbols.size() - 1; 169 } 170 171 void includeResourceChunks(); 172 173 bool isResourceObjFile() const { return !resourceChunks.empty(); } 174 175 static std::vector<ObjFile *> instances; 176 177 // Flags in the absolute @feat.00 symbol if it is present. These usually 178 // indicate if an object was compiled with certain security features enabled 179 // like stack guard, safeseh, /guard:cf, or other things. 180 uint32_t feat00Flags = 0; 181 182 // True if this object file is compatible with SEH. COFF-specific and 183 // x86-only. COFF spec 5.10.1. The .sxdata section. 184 bool hasSafeSEH() { return feat00Flags & 0x1; } 185 186 // True if this file was compiled with /guard:cf. 187 bool hasGuardCF() { return feat00Flags & 0x800; } 188 189 // Pointer to the PDB module descriptor builder. Various debug info records 190 // will reference object files by "module index", which is here. Things like 191 // source files and section contributions are also recorded here. Will be null 192 // if we are not producing a PDB. 193 llvm::pdb::DbiModuleDescriptorBuilder *moduleDBI = nullptr; 194 195 const coff_section *addrsigSec = nullptr; 196 197 const coff_section *callgraphSec = nullptr; 198 199 // When using Microsoft precompiled headers, this is the PCH's key. 200 // The same key is used by both the precompiled object, and objects using the 201 // precompiled object. Any difference indicates out-of-date objects. 202 llvm::Optional<uint32_t> pchSignature; 203 204 // Whether this file was compiled with /hotpatch. 205 bool hotPatchable = false; 206 207 // Whether the object was already merged into the final PDB. 208 bool mergedIntoPDB = false; 209 210 // If the OBJ has a .debug$T stream, this tells how it will be handled. 211 TpiSource *debugTypesObj = nullptr; 212 213 // The .debug$P or .debug$T section data if present. Empty otherwise. 214 ArrayRef<uint8_t> debugTypes; 215 216 llvm::Optional<std::pair<StringRef, uint32_t>> 217 getVariableLocation(StringRef var); 218 219 llvm::Optional<llvm::DILineInfo> getDILineInfo(uint32_t offset, 220 uint32_t sectionIndex); 221 222 private: 223 const coff_section* getSection(uint32_t i); 224 const coff_section *getSection(COFFSymbolRef sym) { 225 return getSection(sym.getSectionNumber()); 226 } 227 228 void initializeChunks(); 229 void initializeSymbols(); 230 void initializeFlags(); 231 void initializeDependencies(); 232 233 SectionChunk * 234 readSection(uint32_t sectionNumber, 235 const llvm::object::coff_aux_section_definition *def, 236 StringRef leaderName); 237 238 void readAssociativeDefinition( 239 COFFSymbolRef coffSym, 240 const llvm::object::coff_aux_section_definition *def); 241 242 void readAssociativeDefinition( 243 COFFSymbolRef coffSym, 244 const llvm::object::coff_aux_section_definition *def, 245 uint32_t parentSection); 246 247 void recordPrevailingSymbolForMingw( 248 COFFSymbolRef coffSym, 249 llvm::DenseMap<StringRef, uint32_t> &prevailingSectionMap); 250 251 void maybeAssociateSEHForMingw( 252 COFFSymbolRef sym, const llvm::object::coff_aux_section_definition *def, 253 const llvm::DenseMap<StringRef, uint32_t> &prevailingSectionMap); 254 255 // Given a new symbol Sym with comdat selection Selection, if the new 256 // symbol is not (yet) Prevailing and the existing comdat leader set to 257 // Leader, emits a diagnostic if the new symbol and its selection doesn't 258 // match the existing symbol and its selection. If either old or new 259 // symbol have selection IMAGE_COMDAT_SELECT_LARGEST, Sym might replace 260 // the existing leader. In that case, Prevailing is set to true. 261 void 262 handleComdatSelection(COFFSymbolRef sym, llvm::COFF::COMDATType &selection, 263 bool &prevailing, DefinedRegular *leader, 264 const llvm::object::coff_aux_section_definition *def); 265 266 llvm::Optional<Symbol *> 267 createDefined(COFFSymbolRef sym, 268 std::vector<const llvm::object::coff_aux_section_definition *> 269 &comdatDefs, 270 bool &prevailingComdat); 271 Symbol *createRegular(COFFSymbolRef sym); 272 Symbol *createUndefined(COFFSymbolRef sym); 273 274 std::unique_ptr<COFFObjectFile> coffObj; 275 276 // List of all chunks defined by this file. This includes both section 277 // chunks and non-section chunks for common symbols. 278 std::vector<Chunk *> chunks; 279 280 std::vector<SectionChunk *> resourceChunks; 281 282 // CodeView debug info sections. 283 std::vector<SectionChunk *> debugChunks; 284 285 // Chunks containing symbol table indices of exception handlers. Only used for 286 // 32-bit x86. 287 std::vector<SectionChunk *> sxDataChunks; 288 289 // Chunks containing symbol table indices of address taken symbols, address 290 // taken IAT entries, and longjmp targets. These are not linked into the 291 // final binary when /guard:cf is set. 292 std::vector<SectionChunk *> guardFidChunks; 293 std::vector<SectionChunk *> guardIATChunks; 294 std::vector<SectionChunk *> guardLJmpChunks; 295 296 // This vector contains a list of all symbols defined or referenced by this 297 // file. They are indexed such that you can get a Symbol by symbol 298 // index. Nonexistent indices (which are occupied by auxiliary 299 // symbols in the real symbol table) are filled with null pointers. 300 std::vector<Symbol *> symbols; 301 302 // This vector contains the same chunks as Chunks, but they are 303 // indexed such that you can get a SectionChunk by section index. 304 // Nonexistent section indices are filled with null pointers. 305 // (Because section number is 1-based, the first slot is always a 306 // null pointer.) This vector is only valid during initialization. 307 std::vector<SectionChunk *> sparseChunks; 308 309 DWARFCache *dwarf = nullptr; 310 }; 311 312 // This is a PDB type server dependency, that is not a input file per se, but 313 // needs to be treated like one. Such files are discovered from the debug type 314 // stream. 315 class PDBInputFile : public InputFile { 316 public: 317 explicit PDBInputFile(MemoryBufferRef m); 318 ~PDBInputFile(); 319 static bool classof(const InputFile *f) { return f->kind() == PDBKind; } 320 void parse() override; 321 322 static void enqueue(StringRef path, ObjFile *fromFile); 323 324 static PDBInputFile *findFromRecordPath(StringRef path, ObjFile *fromFile); 325 326 static std::map<std::string, PDBInputFile *> instances; 327 328 // Record possible errors while opening the PDB file 329 llvm::Optional<Error> loadErr; 330 331 // This is the actual interface to the PDB (if it was opened successfully) 332 std::unique_ptr<llvm::pdb::NativeSession> session; 333 334 // If the PDB has a .debug$T stream, this tells how it will be handled. 335 TpiSource *debugTypesObj = nullptr; 336 }; 337 338 // This type represents import library members that contain DLL names 339 // and symbols exported from the DLLs. See Microsoft PE/COFF spec. 7 340 // for details about the format. 341 class ImportFile : public InputFile { 342 public: 343 explicit ImportFile(MemoryBufferRef m) : InputFile(ImportKind, m) {} 344 345 static bool classof(const InputFile *f) { return f->kind() == ImportKind; } 346 347 static std::vector<ImportFile *> instances; 348 349 Symbol *impSym = nullptr; 350 Symbol *thunkSym = nullptr; 351 std::string dllName; 352 353 private: 354 void parse() override; 355 356 public: 357 StringRef externalName; 358 const coff_import_header *hdr; 359 Chunk *location = nullptr; 360 361 // We want to eliminate dllimported symbols if no one actually refers to them. 362 // These "Live" bits are used to keep track of which import library members 363 // are actually in use. 364 // 365 // If the Live bit is turned off by MarkLive, Writer will ignore dllimported 366 // symbols provided by this import library member. We also track whether the 367 // imported symbol is used separately from whether the thunk is used in order 368 // to avoid creating unnecessary thunks. 369 bool live = !config->doGC; 370 bool thunkLive = !config->doGC; 371 }; 372 373 // Used for LTO. 374 class BitcodeFile : public InputFile { 375 public: 376 BitcodeFile(MemoryBufferRef mb, StringRef archiveName, 377 uint64_t offsetInArchive); 378 explicit BitcodeFile(MemoryBufferRef m, StringRef archiveName, 379 uint64_t offsetInArchive, 380 std::vector<Symbol *> &&symbols); 381 ~BitcodeFile(); 382 static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; } 383 ArrayRef<Symbol *> getSymbols() { return symbols; } 384 MachineTypes getMachineType() override; 385 static std::vector<BitcodeFile *> instances; 386 std::unique_ptr<llvm::lto::InputFile> obj; 387 388 private: 389 void parse() override; 390 391 std::vector<Symbol *> symbols; 392 }; 393 394 inline bool isBitcode(MemoryBufferRef mb) { 395 return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode; 396 } 397 398 std::string replaceThinLTOSuffix(StringRef path); 399 } // namespace coff 400 401 std::string toString(const coff::InputFile *file); 402 } // namespace lld 403 404 #endif 405