xref: /freebsd/contrib/llvm-project/lld/COFF/InputFiles.h (revision 5e801ac66d24704442eba426ed13c3effb8a34e7)
1 //===- InputFiles.h ---------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_COFF_INPUT_FILES_H
10 #define LLD_COFF_INPUT_FILES_H
11 
12 #include "Config.h"
13 #include "lld/Common/LLVM.h"
14 #include "llvm/ADT/ArrayRef.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DenseSet.h"
17 #include "llvm/ADT/StringSet.h"
18 #include "llvm/BinaryFormat/Magic.h"
19 #include "llvm/Object/Archive.h"
20 #include "llvm/Object/COFF.h"
21 #include "llvm/Support/StringSaver.h"
22 #include <memory>
23 #include <set>
24 #include <vector>
25 
26 namespace llvm {
27 struct DILineInfo;
28 namespace pdb {
29 class DbiModuleDescriptorBuilder;
30 class NativeSession;
31 }
32 namespace lto {
33 class InputFile;
34 }
35 }
36 
37 namespace lld {
38 class DWARFCache;
39 
40 namespace coff {
41 class COFFLinkerContext;
42 
43 std::vector<MemoryBufferRef> getArchiveMembers(llvm::object::Archive *file);
44 
45 using llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN;
46 using llvm::COFF::MachineTypes;
47 using llvm::object::Archive;
48 using llvm::object::COFFObjectFile;
49 using llvm::object::COFFSymbolRef;
50 using llvm::object::coff_import_header;
51 using llvm::object::coff_section;
52 
53 class Chunk;
54 class Defined;
55 class DefinedImportData;
56 class DefinedImportThunk;
57 class DefinedRegular;
58 class SectionChunk;
59 class Symbol;
60 class Undefined;
61 class TpiSource;
62 
63 // The root class of input files.
64 class InputFile {
65 public:
66   enum Kind {
67     ArchiveKind,
68     ObjectKind,
69     LazyObjectKind,
70     PDBKind,
71     ImportKind,
72     BitcodeKind,
73     DLLKind
74   };
75   Kind kind() const { return fileKind; }
76   virtual ~InputFile() {}
77 
78   // Returns the filename.
79   StringRef getName() const { return mb.getBufferIdentifier(); }
80 
81   // Reads a file (the constructor doesn't do that).
82   virtual void parse() = 0;
83 
84   // Returns the CPU type this file was compiled to.
85   virtual MachineTypes getMachineType() { return IMAGE_FILE_MACHINE_UNKNOWN; }
86 
87   MemoryBufferRef mb;
88 
89   // An archive file name if this file is created from an archive.
90   StringRef parentName;
91 
92   // Returns .drectve section contents if exist.
93   StringRef getDirectives() { return directives; }
94 
95   COFFLinkerContext &ctx;
96 
97 protected:
98   InputFile(COFFLinkerContext &c, Kind k, MemoryBufferRef m)
99       : mb(m), ctx(c), fileKind(k) {}
100 
101   StringRef directives;
102 
103 private:
104   const Kind fileKind;
105 };
106 
107 // .lib or .a file.
108 class ArchiveFile : public InputFile {
109 public:
110   explicit ArchiveFile(COFFLinkerContext &ctx, MemoryBufferRef m);
111   static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
112   void parse() override;
113 
114   // Enqueues an archive member load for the given symbol. If we've already
115   // enqueued a load for the same archive member, this function does nothing,
116   // which ensures that we don't load the same member more than once.
117   void addMember(const Archive::Symbol &sym);
118 
119 private:
120   std::unique_ptr<Archive> file;
121   llvm::DenseSet<uint64_t> seen;
122 };
123 
124 // .obj or .o file between -start-lib and -end-lib.
125 class LazyObjFile : public InputFile {
126 public:
127   explicit LazyObjFile(COFFLinkerContext &ctx, MemoryBufferRef m)
128       : InputFile(ctx, LazyObjectKind, m) {}
129   static bool classof(const InputFile *f) {
130     return f->kind() == LazyObjectKind;
131   }
132   // Makes this object file part of the link.
133   void fetch();
134   // Adds the symbols in this file to the symbol table as LazyObject symbols.
135   void parse() override;
136 
137 private:
138   std::vector<Symbol *> symbols;
139 };
140 
141 // .obj or .o file. This may be a member of an archive file.
142 class ObjFile : public InputFile {
143 public:
144   explicit ObjFile(COFFLinkerContext &ctx, MemoryBufferRef m)
145       : InputFile(ctx, ObjectKind, m) {}
146   explicit ObjFile(COFFLinkerContext &ctx, MemoryBufferRef m,
147                    std::vector<Symbol *> &&symbols)
148       : InputFile(ctx, ObjectKind, m), symbols(std::move(symbols)) {}
149   static bool classof(const InputFile *f) { return f->kind() == ObjectKind; }
150   void parse() override;
151   MachineTypes getMachineType() override;
152   ArrayRef<Chunk *> getChunks() { return chunks; }
153   ArrayRef<SectionChunk *> getDebugChunks() { return debugChunks; }
154   ArrayRef<SectionChunk *> getSXDataChunks() { return sxDataChunks; }
155   ArrayRef<SectionChunk *> getGuardFidChunks() { return guardFidChunks; }
156   ArrayRef<SectionChunk *> getGuardIATChunks() { return guardIATChunks; }
157   ArrayRef<SectionChunk *> getGuardLJmpChunks() { return guardLJmpChunks; }
158   ArrayRef<SectionChunk *> getGuardEHContChunks() { return guardEHContChunks; }
159   ArrayRef<Symbol *> getSymbols() { return symbols; }
160 
161   MutableArrayRef<Symbol *> getMutableSymbols() { return symbols; }
162 
163   ArrayRef<uint8_t> getDebugSection(StringRef secName);
164 
165   // Returns a Symbol object for the symbolIndex'th symbol in the
166   // underlying object file.
167   Symbol *getSymbol(uint32_t symbolIndex) {
168     return symbols[symbolIndex];
169   }
170 
171   // Returns the underlying COFF file.
172   COFFObjectFile *getCOFFObj() { return coffObj.get(); }
173 
174   // Add a symbol for a range extension thunk. Return the new symbol table
175   // index. This index can be used to modify a relocation.
176   uint32_t addRangeThunkSymbol(Symbol *thunk) {
177     symbols.push_back(thunk);
178     return symbols.size() - 1;
179   }
180 
181   void includeResourceChunks();
182 
183   bool isResourceObjFile() const { return !resourceChunks.empty(); }
184 
185   // Flags in the absolute @feat.00 symbol if it is present. These usually
186   // indicate if an object was compiled with certain security features enabled
187   // like stack guard, safeseh, /guard:cf, or other things.
188   uint32_t feat00Flags = 0;
189 
190   // True if this object file is compatible with SEH.  COFF-specific and
191   // x86-only. COFF spec 5.10.1. The .sxdata section.
192   bool hasSafeSEH() { return feat00Flags & 0x1; }
193 
194   // True if this file was compiled with /guard:cf.
195   bool hasGuardCF() { return feat00Flags & 0x4800; }
196 
197   // Pointer to the PDB module descriptor builder. Various debug info records
198   // will reference object files by "module index", which is here. Things like
199   // source files and section contributions are also recorded here. Will be null
200   // if we are not producing a PDB.
201   llvm::pdb::DbiModuleDescriptorBuilder *moduleDBI = nullptr;
202 
203   const coff_section *addrsigSec = nullptr;
204 
205   const coff_section *callgraphSec = nullptr;
206 
207   // When using Microsoft precompiled headers, this is the PCH's key.
208   // The same key is used by both the precompiled object, and objects using the
209   // precompiled object. Any difference indicates out-of-date objects.
210   llvm::Optional<uint32_t> pchSignature;
211 
212   // Whether this file was compiled with /hotpatch.
213   bool hotPatchable = false;
214 
215   // Whether the object was already merged into the final PDB.
216   bool mergedIntoPDB = false;
217 
218   // If the OBJ has a .debug$T stream, this tells how it will be handled.
219   TpiSource *debugTypesObj = nullptr;
220 
221   // The .debug$P or .debug$T section data if present. Empty otherwise.
222   ArrayRef<uint8_t> debugTypes;
223 
224   llvm::Optional<std::pair<StringRef, uint32_t>>
225   getVariableLocation(StringRef var);
226 
227   llvm::Optional<llvm::DILineInfo> getDILineInfo(uint32_t offset,
228                                                  uint32_t sectionIndex);
229 
230 private:
231   const coff_section* getSection(uint32_t i);
232   const coff_section *getSection(COFFSymbolRef sym) {
233     return getSection(sym.getSectionNumber());
234   }
235 
236   void enqueuePdbFile(StringRef path, ObjFile *fromFile);
237 
238   void initializeChunks();
239   void initializeSymbols();
240   void initializeFlags();
241   void initializeDependencies();
242 
243   SectionChunk *
244   readSection(uint32_t sectionNumber,
245               const llvm::object::coff_aux_section_definition *def,
246               StringRef leaderName);
247 
248   void readAssociativeDefinition(
249       COFFSymbolRef coffSym,
250       const llvm::object::coff_aux_section_definition *def);
251 
252   void readAssociativeDefinition(
253       COFFSymbolRef coffSym,
254       const llvm::object::coff_aux_section_definition *def,
255       uint32_t parentSection);
256 
257   void recordPrevailingSymbolForMingw(
258       COFFSymbolRef coffSym,
259       llvm::DenseMap<StringRef, uint32_t> &prevailingSectionMap);
260 
261   void maybeAssociateSEHForMingw(
262       COFFSymbolRef sym, const llvm::object::coff_aux_section_definition *def,
263       const llvm::DenseMap<StringRef, uint32_t> &prevailingSectionMap);
264 
265   // Given a new symbol Sym with comdat selection Selection, if the new
266   // symbol is not (yet) Prevailing and the existing comdat leader set to
267   // Leader, emits a diagnostic if the new symbol and its selection doesn't
268   // match the existing symbol and its selection. If either old or new
269   // symbol have selection IMAGE_COMDAT_SELECT_LARGEST, Sym might replace
270   // the existing leader. In that case, Prevailing is set to true.
271   void
272   handleComdatSelection(COFFSymbolRef sym, llvm::COFF::COMDATType &selection,
273                         bool &prevailing, DefinedRegular *leader,
274                         const llvm::object::coff_aux_section_definition *def);
275 
276   llvm::Optional<Symbol *>
277   createDefined(COFFSymbolRef sym,
278                 std::vector<const llvm::object::coff_aux_section_definition *>
279                     &comdatDefs,
280                 bool &prevailingComdat);
281   Symbol *createRegular(COFFSymbolRef sym);
282   Symbol *createUndefined(COFFSymbolRef sym);
283 
284   std::unique_ptr<COFFObjectFile> coffObj;
285 
286   // List of all chunks defined by this file. This includes both section
287   // chunks and non-section chunks for common symbols.
288   std::vector<Chunk *> chunks;
289 
290   std::vector<SectionChunk *> resourceChunks;
291 
292   // CodeView debug info sections.
293   std::vector<SectionChunk *> debugChunks;
294 
295   // Chunks containing symbol table indices of exception handlers. Only used for
296   // 32-bit x86.
297   std::vector<SectionChunk *> sxDataChunks;
298 
299   // Chunks containing symbol table indices of address taken symbols, address
300   // taken IAT entries, longjmp and ehcont targets. These are not linked into
301   // the final binary when /guard:cf is set.
302   std::vector<SectionChunk *> guardFidChunks;
303   std::vector<SectionChunk *> guardIATChunks;
304   std::vector<SectionChunk *> guardLJmpChunks;
305   std::vector<SectionChunk *> guardEHContChunks;
306 
307   // This vector contains a list of all symbols defined or referenced by this
308   // file. They are indexed such that you can get a Symbol by symbol
309   // index. Nonexistent indices (which are occupied by auxiliary
310   // symbols in the real symbol table) are filled with null pointers.
311   std::vector<Symbol *> symbols;
312 
313   // This vector contains the same chunks as Chunks, but they are
314   // indexed such that you can get a SectionChunk by section index.
315   // Nonexistent section indices are filled with null pointers.
316   // (Because section number is 1-based, the first slot is always a
317   // null pointer.) This vector is only valid during initialization.
318   std::vector<SectionChunk *> sparseChunks;
319 
320   DWARFCache *dwarf = nullptr;
321 };
322 
323 // This is a PDB type server dependency, that is not a input file per se, but
324 // needs to be treated like one. Such files are discovered from the debug type
325 // stream.
326 class PDBInputFile : public InputFile {
327 public:
328   explicit PDBInputFile(COFFLinkerContext &ctx, MemoryBufferRef m);
329   ~PDBInputFile();
330   static bool classof(const InputFile *f) { return f->kind() == PDBKind; }
331   void parse() override;
332 
333   static PDBInputFile *findFromRecordPath(const COFFLinkerContext &ctx,
334                                           StringRef path, ObjFile *fromFile);
335 
336   // Record possible errors while opening the PDB file
337   llvm::Optional<Error> loadErr;
338 
339   // This is the actual interface to the PDB (if it was opened successfully)
340   std::unique_ptr<llvm::pdb::NativeSession> session;
341 
342   // If the PDB has a .debug$T stream, this tells how it will be handled.
343   TpiSource *debugTypesObj = nullptr;
344 };
345 
346 // This type represents import library members that contain DLL names
347 // and symbols exported from the DLLs. See Microsoft PE/COFF spec. 7
348 // for details about the format.
349 class ImportFile : public InputFile {
350 public:
351   explicit ImportFile(COFFLinkerContext &ctx, MemoryBufferRef m)
352       : InputFile(ctx, ImportKind, m) {}
353 
354   static bool classof(const InputFile *f) { return f->kind() == ImportKind; }
355 
356   Symbol *impSym = nullptr;
357   Symbol *thunkSym = nullptr;
358   std::string dllName;
359 
360 private:
361   void parse() override;
362 
363 public:
364   StringRef externalName;
365   const coff_import_header *hdr;
366   Chunk *location = nullptr;
367 
368   // We want to eliminate dllimported symbols if no one actually refers to them.
369   // These "Live" bits are used to keep track of which import library members
370   // are actually in use.
371   //
372   // If the Live bit is turned off by MarkLive, Writer will ignore dllimported
373   // symbols provided by this import library member. We also track whether the
374   // imported symbol is used separately from whether the thunk is used in order
375   // to avoid creating unnecessary thunks.
376   bool live = !config->doGC;
377   bool thunkLive = !config->doGC;
378 };
379 
380 // Used for LTO.
381 class BitcodeFile : public InputFile {
382 public:
383   BitcodeFile(COFFLinkerContext &ctx, MemoryBufferRef mb, StringRef archiveName,
384               uint64_t offsetInArchive);
385   explicit BitcodeFile(COFFLinkerContext &ctx, MemoryBufferRef m,
386                        StringRef archiveName, uint64_t offsetInArchive,
387                        std::vector<Symbol *> &&symbols);
388   ~BitcodeFile();
389   static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
390   ArrayRef<Symbol *> getSymbols() { return symbols; }
391   MachineTypes getMachineType() override;
392   std::unique_ptr<llvm::lto::InputFile> obj;
393 
394 private:
395   void parse() override;
396 
397   std::vector<Symbol *> symbols;
398 };
399 
400 // .dll file. MinGW only.
401 class DLLFile : public InputFile {
402 public:
403   explicit DLLFile(COFFLinkerContext &ctx, MemoryBufferRef m)
404       : InputFile(ctx, DLLKind, m) {}
405   static bool classof(const InputFile *f) { return f->kind() == DLLKind; }
406   void parse() override;
407   MachineTypes getMachineType() override;
408 
409   struct Symbol {
410     StringRef dllName;
411     StringRef symbolName;
412     llvm::COFF::ImportNameType nameType;
413     llvm::COFF::ImportType importType;
414   };
415 
416   void makeImport(Symbol *s);
417 
418 private:
419   std::unique_ptr<COFFObjectFile> coffObj;
420   llvm::StringSet<> seen;
421 };
422 
423 inline bool isBitcode(MemoryBufferRef mb) {
424   return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode;
425 }
426 
427 std::string replaceThinLTOSuffix(StringRef path);
428 } // namespace coff
429 
430 std::string toString(const coff::InputFile *file);
431 } // namespace lld
432 
433 #endif
434