xref: /freebsd/contrib/llvm-project/lld/COFF/Chunks.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- Chunks.h -------------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_COFF_CHUNKS_H
10 #define LLD_COFF_CHUNKS_H
11 
12 #include "Config.h"
13 #include "InputFiles.h"
14 #include "lld/Common/LLVM.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/PointerIntPair.h"
17 #include "llvm/ADT/iterator.h"
18 #include "llvm/ADT/iterator_range.h"
19 #include "llvm/MC/StringTableBuilder.h"
20 #include "llvm/Object/COFF.h"
21 #include "llvm/Object/WindowsMachineFlag.h"
22 #include <utility>
23 #include <vector>
24 
25 namespace lld::coff {
26 
27 using llvm::COFF::ImportDirectoryTableEntry;
28 using llvm::object::chpe_range_type;
29 using llvm::object::coff_relocation;
30 using llvm::object::coff_section;
31 using llvm::object::COFFSymbolRef;
32 using llvm::object::SectionRef;
33 
34 class Baserel;
35 class Defined;
36 class DefinedImportData;
37 class DefinedRegular;
38 class ObjFile;
39 class OutputSection;
40 class RuntimePseudoReloc;
41 class Symbol;
42 
43 // Mask for permissions (discardable, writable, readable, executable, etc).
44 const uint32_t permMask = 0xFE000000;
45 
46 // Mask for section types (code, data, bss).
47 const uint32_t typeMask = 0x000000E0;
48 
49 // The log base 2 of the largest section alignment, which is log2(8192), or 13.
50 enum : unsigned { Log2MaxSectionAlignment = 13 };
51 
52 // A Chunk represents a chunk of data that will occupy space in the
53 // output (if the resolver chose that). It may or may not be backed by
54 // a section of an input file. It could be linker-created data, or
55 // doesn't even have actual data (if common or bss).
56 class Chunk {
57 public:
58   enum Kind : uint8_t {
59     SectionKind,
60     SectionECKind,
61     OtherKind,
62     ImportThunkKind,
63     ECExportThunkKind
64   };
kind()65   Kind kind() const { return chunkKind; }
66 
67   // Returns the size of this chunk (even if this is a common or BSS.)
68   size_t getSize() const;
69 
70   // Returns chunk alignment in power of two form. Value values are powers of
71   // two from 1 to 8192.
getAlignment()72   uint32_t getAlignment() const { return 1U << p2Align; }
73 
74   // Update the chunk section alignment measured in bytes. Internally alignment
75   // is stored in log2.
setAlignment(uint32_t align)76   void setAlignment(uint32_t align) {
77     // Treat zero byte alignment as 1 byte alignment.
78     align = align ? align : 1;
79     assert(llvm::isPowerOf2_32(align) && "alignment is not a power of 2");
80     p2Align = llvm::Log2_32(align);
81     assert(p2Align <= Log2MaxSectionAlignment &&
82            "impossible requested alignment");
83   }
84 
85   // Write this chunk to a mmap'ed file, assuming Buf is pointing to
86   // beginning of the file. Because this function may use RVA values
87   // of other chunks for relocations, you need to set them properly
88   // before calling this function.
89   void writeTo(uint8_t *buf) const;
90 
91   // The writer sets and uses the addresses. In practice, PE images cannot be
92   // larger than 2GB. Chunks are always laid as part of the image, so Chunk RVAs
93   // can be stored with 32 bits.
getRVA()94   uint32_t getRVA() const { return rva; }
setRVA(uint64_t v)95   void setRVA(uint64_t v) {
96     // This may truncate. The writer checks for overflow later.
97     rva = (uint32_t)v;
98   }
99 
100   // Returns readable/writable/executable bits.
101   uint32_t getOutputCharacteristics() const;
102 
103   // Returns the section name if this is a section chunk.
104   // It is illegal to call this function on non-section chunks.
105   StringRef getSectionName() const;
106 
107   // An output section has pointers to chunks in the section, and each
108   // chunk has a back pointer to an output section.
setOutputSectionIdx(uint16_t o)109   void setOutputSectionIdx(uint16_t o) { osidx = o; }
getOutputSectionIdx()110   uint16_t getOutputSectionIdx() const { return osidx; }
111 
112   // Windows-specific.
113   // Collect all locations that contain absolute addresses for base relocations.
114   void getBaserels(std::vector<Baserel> *res);
115 
116   // Returns a human-readable name of this chunk. Chunks are unnamed chunks of
117   // bytes, so this is used only for logging or debugging.
118   StringRef getDebugName() const;
119 
120   // Return true if this file has the hotpatch flag set to true in the
121   // S_COMPILE3 record in codeview debug info. Also returns true for some thunks
122   // synthesized by the linker.
123   bool isHotPatchable() const;
124 
125   MachineTypes getMachine() const;
126   llvm::Triple::ArchType getArch() const;
127   std::optional<chpe_range_type> getArm64ECRangeType() const;
128 
129   // ARM64EC entry thunk associated with the chunk.
130   Defined *getEntryThunk() const;
131   void setEntryThunk(Defined *entryThunk);
132 
133 protected:
chunkKind(k)134   Chunk(Kind k = OtherKind) : chunkKind(k), hasData(true), p2Align(0) {}
135 
136   const Kind chunkKind;
137 
138 public:
139   // Returns true if this has non-zero data. BSS chunks return
140   // false. If false is returned, the space occupied by this chunk
141   // will be filled with zeros. Corresponds to the
142   // IMAGE_SCN_CNT_UNINITIALIZED_DATA section characteristic bit.
143   uint8_t hasData : 1;
144 
145 public:
146   // The alignment of this chunk, stored in log2 form. The writer uses the
147   // value.
148   uint8_t p2Align : 7;
149 
150   // The output section index for this chunk. The first valid section number is
151   // one.
152   uint16_t osidx = 0;
153 
154   // The RVA of this chunk in the output. The writer sets a value.
155   uint32_t rva = 0;
156 };
157 
158 class NonSectionChunk : public Chunk {
159 public:
160   virtual ~NonSectionChunk() = default;
161 
162   // Returns the size of this chunk (even if this is a common or BSS.)
163   virtual size_t getSize() const = 0;
164 
getOutputCharacteristics()165   virtual uint32_t getOutputCharacteristics() const { return 0; }
166 
167   // Write this chunk to a mmap'ed file, assuming Buf is pointing to
168   // beginning of the file. Because this function may use RVA values
169   // of other chunks for relocations, you need to set them properly
170   // before calling this function.
writeTo(uint8_t * buf)171   virtual void writeTo(uint8_t *buf) const {}
172 
173   // Returns the section name if this is a section chunk.
174   // It is illegal to call this function on non-section chunks.
getSectionName()175   virtual StringRef getSectionName() const {
176     llvm_unreachable("unimplemented getSectionName");
177   }
178 
179   // Windows-specific.
180   // Collect all locations that contain absolute addresses for base relocations.
getBaserels(std::vector<Baserel> * res)181   virtual void getBaserels(std::vector<Baserel> *res) {}
182 
getMachine()183   virtual MachineTypes getMachine() const { return IMAGE_FILE_MACHINE_UNKNOWN; }
184 
185   // Returns a human-readable name of this chunk. Chunks are unnamed chunks of
186   // bytes, so this is used only for logging or debugging.
getDebugName()187   virtual StringRef getDebugName() const { return ""; }
188 
189   // Verify that chunk relocations are within their ranges.
verifyRanges()190   virtual bool verifyRanges() { return true; };
191 
192   // If needed, extend the chunk to ensure all relocations are within the
193   // allowed ranges. Return the additional space required for the extension.
extendRanges()194   virtual uint32_t extendRanges() { return 0; };
195 
classof(const Chunk * c)196   static bool classof(const Chunk *c) { return c->kind() >= OtherKind; }
197 
198 protected:
Chunk(k)199   NonSectionChunk(Kind k = OtherKind) : Chunk(k) {}
200 };
201 
202 class NonSectionCodeChunk : public NonSectionChunk {
203 public:
getOutputCharacteristics()204   virtual uint32_t getOutputCharacteristics() const override {
205     return llvm::COFF::IMAGE_SCN_MEM_READ | llvm::COFF::IMAGE_SCN_MEM_EXECUTE;
206   }
207 
208 protected:
NonSectionChunk(k)209   NonSectionCodeChunk(Kind k = OtherKind) : NonSectionChunk(k) {}
210 };
211 
212 // MinGW specific; information about one individual location in the image
213 // that needs to be fixed up at runtime after loading. This represents
214 // one individual element in the PseudoRelocTableChunk table.
215 class RuntimePseudoReloc {
216 public:
RuntimePseudoReloc(Defined * sym,SectionChunk * target,uint32_t targetOffset,int flags)217   RuntimePseudoReloc(Defined *sym, SectionChunk *target, uint32_t targetOffset,
218                      int flags)
219       : sym(sym), target(target), targetOffset(targetOffset), flags(flags) {}
220 
221   Defined *sym;
222   SectionChunk *target;
223   uint32_t targetOffset;
224   // The Flags field contains the size of the relocation, in bits. No other
225   // flags are currently defined.
226   int flags;
227 };
228 
229 // A chunk corresponding a section of an input file.
230 class SectionChunk : public Chunk {
231   // Identical COMDAT Folding feature accesses section internal data.
232   friend class ICF;
233 
234 public:
235   class symbol_iterator : public llvm::iterator_adaptor_base<
236                               symbol_iterator, const coff_relocation *,
237                               std::random_access_iterator_tag, Symbol *> {
238     friend SectionChunk;
239 
240     ObjFile *file;
241 
symbol_iterator(ObjFile * file,const coff_relocation * i)242     symbol_iterator(ObjFile *file, const coff_relocation *i)
243         : symbol_iterator::iterator_adaptor_base(i), file(file) {}
244 
245   public:
246     symbol_iterator() = default;
247 
248     Symbol *operator*() const { return file->getSymbol(I->SymbolTableIndex); }
249   };
250 
251   SectionChunk(ObjFile *file, const coff_section *header, Kind k = SectionKind);
classof(const Chunk * c)252   static bool classof(const Chunk *c) { return c->kind() <= SectionECKind; }
getSize()253   size_t getSize() const { return header->SizeOfRawData; }
254   ArrayRef<uint8_t> getContents() const;
255   void writeTo(uint8_t *buf) const;
256   MachineTypes getMachine() const;
257 
258   // Defend against unsorted relocations. This may be overly conservative.
259   void sortRelocations();
260 
261   // Write and relocate a portion of the section. This is intended to be called
262   // in a loop. Relocations must be sorted first.
263   void writeAndRelocateSubsection(ArrayRef<uint8_t> sec,
264                                   ArrayRef<uint8_t> subsec,
265                                   uint32_t &nextRelocIndex, uint8_t *buf) const;
266 
getOutputCharacteristics()267   uint32_t getOutputCharacteristics() const {
268     return header->Characteristics & (permMask | typeMask);
269   }
getSectionName()270   StringRef getSectionName() const {
271     return StringRef(sectionNameData, sectionNameSize);
272   }
273   void getBaserels(std::vector<Baserel> *res);
274   bool isCOMDAT() const;
275   void applyRelocation(uint8_t *off, const coff_relocation &rel) const;
276   void applyRelX64(uint8_t *off, uint16_t type, OutputSection *os, uint64_t s,
277                    uint64_t p, uint64_t imageBase) const;
278   void applyRelX86(uint8_t *off, uint16_t type, OutputSection *os, uint64_t s,
279                    uint64_t p, uint64_t imageBase) const;
280   void applyRelARM(uint8_t *off, uint16_t type, OutputSection *os, uint64_t s,
281                    uint64_t p, uint64_t imageBase) const;
282   void applyRelARM64(uint8_t *off, uint16_t type, OutputSection *os, uint64_t s,
283                      uint64_t p, uint64_t imageBase) const;
284 
285   void getRuntimePseudoRelocs(std::vector<RuntimePseudoReloc> &res);
286 
287   // Called if the garbage collector decides to not include this chunk
288   // in a final output. It's supposed to print out a log message to stdout.
289   void printDiscardedMessage() const;
290 
291   // Adds COMDAT associative sections to this COMDAT section. A chunk
292   // and its children are treated as a group by the garbage collector.
293   void addAssociative(SectionChunk *child);
294 
295   StringRef getDebugName() const;
296 
297   // True if this is a codeview debug info chunk. These will not be laid out in
298   // the image. Instead they will end up in the PDB, if one is requested.
isCodeView()299   bool isCodeView() const {
300     return getSectionName() == ".debug" || getSectionName().starts_with(".debug$");
301   }
302 
303   // True if this is a DWARF debug info or exception handling chunk.
isDWARF()304   bool isDWARF() const {
305     return getSectionName().starts_with(".debug_") || getSectionName() == ".eh_frame";
306   }
307 
308   // Allow iteration over the bodies of this chunk's relocated symbols.
symbols()309   llvm::iterator_range<symbol_iterator> symbols() const {
310     return llvm::make_range(symbol_iterator(file, relocsData),
311                             symbol_iterator(file, relocsData + relocsSize));
312   }
313 
getRelocs()314   ArrayRef<coff_relocation> getRelocs() const {
315     return llvm::ArrayRef(relocsData, relocsSize);
316   }
317 
318   // Reloc setter used by ARM range extension thunk insertion.
setRelocs(ArrayRef<coff_relocation> newRelocs)319   void setRelocs(ArrayRef<coff_relocation> newRelocs) {
320     relocsData = newRelocs.data();
321     relocsSize = newRelocs.size();
322     assert(relocsSize == newRelocs.size() && "reloc size truncation");
323   }
324 
325   // Single linked list iterator for associated comdat children.
326   class AssociatedIterator
327       : public llvm::iterator_facade_base<
328             AssociatedIterator, std::forward_iterator_tag, SectionChunk> {
329   public:
330     AssociatedIterator() = default;
AssociatedIterator(SectionChunk * head)331     AssociatedIterator(SectionChunk *head) : cur(head) {}
332     bool operator==(const AssociatedIterator &r) const { return cur == r.cur; }
333     // FIXME: Wrong const-ness, but it makes filter ranges work.
334     SectionChunk &operator*() const { return *cur; }
335     SectionChunk &operator*() { return *cur; }
336     AssociatedIterator &operator++() {
337       cur = cur->assocChildren;
338       return *this;
339     }
340 
341   private:
342     SectionChunk *cur = nullptr;
343   };
344 
345   // Allow iteration over the associated child chunks for this section.
children()346   llvm::iterator_range<AssociatedIterator> children() const {
347     // Associated sections do not have children. The assocChildren field is
348     // part of the parent's list of children.
349     bool isAssoc = selection == llvm::COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE;
350     return llvm::make_range(
351         AssociatedIterator(isAssoc ? nullptr : assocChildren),
352         AssociatedIterator(nullptr));
353   }
354 
355   // The section ID this chunk belongs to in its Obj.
356   uint32_t getSectionNumber() const;
357 
358   ArrayRef<uint8_t> consumeDebugMagic();
359 
360   static ArrayRef<uint8_t> consumeDebugMagic(ArrayRef<uint8_t> data,
361                                              StringRef sectionName);
362 
363   static SectionChunk *findByName(ArrayRef<SectionChunk *> sections,
364                                   StringRef name);
365 
366   // The file that this chunk was created from.
367   ObjFile *file;
368 
369   // Pointer to the COFF section header in the input file.
370   const coff_section *header;
371 
372   // The COMDAT leader symbol if this is a COMDAT chunk.
373   DefinedRegular *sym = nullptr;
374 
375   // The CRC of the contents as described in the COFF spec 4.5.5.
376   // Auxiliary Format 5: Section Definitions. Used for ICF.
377   uint32_t checksum = 0;
378 
379   // Used by the garbage collector.
380   bool live;
381 
382   // Whether this section needs to be kept distinct from other sections during
383   // ICF. This is set by the driver using address-significance tables.
384   bool keepUnique = false;
385 
386   // The COMDAT selection if this is a COMDAT chunk.
387   llvm::COFF::COMDATType selection = (llvm::COFF::COMDATType)0;
388 
389   // A pointer pointing to a replacement for this chunk.
390   // Initially it points to "this" object. If this chunk is merged
391   // with other chunk by ICF, it points to another chunk,
392   // and this chunk is considered as dead.
393   SectionChunk *repl;
394 
395 private:
396   SectionChunk *assocChildren = nullptr;
397 
398   // Used for ICF (Identical COMDAT Folding)
399   void replace(SectionChunk *other);
400   uint32_t eqClass[2] = {0, 0};
401 
402   // Relocations for this section. Size is stored below.
403   const coff_relocation *relocsData;
404 
405   // Section name string. Size is stored below.
406   const char *sectionNameData;
407 
408   uint32_t relocsSize = 0;
409   uint32_t sectionNameSize = 0;
410 };
411 
412 // A section chunk corresponding a section of an EC input file.
413 class SectionChunkEC final : public SectionChunk {
414 public:
classof(const Chunk * c)415   static bool classof(const Chunk *c) { return c->kind() == SectionECKind; }
416 
SectionChunkEC(ObjFile * file,const coff_section * header)417   SectionChunkEC(ObjFile *file, const coff_section *header)
418       : SectionChunk(file, header, SectionECKind) {}
419   Defined *entryThunk = nullptr;
420 };
421 
422 // Inline methods to implement faux-virtual dispatch for SectionChunk.
423 
getSize()424 inline size_t Chunk::getSize() const {
425   if (isa<SectionChunk>(this))
426     return static_cast<const SectionChunk *>(this)->getSize();
427   return static_cast<const NonSectionChunk *>(this)->getSize();
428 }
429 
getOutputCharacteristics()430 inline uint32_t Chunk::getOutputCharacteristics() const {
431   if (isa<SectionChunk>(this))
432     return static_cast<const SectionChunk *>(this)->getOutputCharacteristics();
433   return static_cast<const NonSectionChunk *>(this)->getOutputCharacteristics();
434 }
435 
writeTo(uint8_t * buf)436 inline void Chunk::writeTo(uint8_t *buf) const {
437   if (isa<SectionChunk>(this))
438     static_cast<const SectionChunk *>(this)->writeTo(buf);
439   else
440     static_cast<const NonSectionChunk *>(this)->writeTo(buf);
441 }
442 
getSectionName()443 inline StringRef Chunk::getSectionName() const {
444   if (isa<SectionChunk>(this))
445     return static_cast<const SectionChunk *>(this)->getSectionName();
446   return static_cast<const NonSectionChunk *>(this)->getSectionName();
447 }
448 
getBaserels(std::vector<Baserel> * res)449 inline void Chunk::getBaserels(std::vector<Baserel> *res) {
450   if (isa<SectionChunk>(this))
451     static_cast<SectionChunk *>(this)->getBaserels(res);
452   else
453     static_cast<NonSectionChunk *>(this)->getBaserels(res);
454 }
455 
getDebugName()456 inline StringRef Chunk::getDebugName() const {
457   if (isa<SectionChunk>(this))
458     return static_cast<const SectionChunk *>(this)->getDebugName();
459   return static_cast<const NonSectionChunk *>(this)->getDebugName();
460 }
461 
getMachine()462 inline MachineTypes Chunk::getMachine() const {
463   if (isa<SectionChunk>(this))
464     return static_cast<const SectionChunk *>(this)->getMachine();
465   return static_cast<const NonSectionChunk *>(this)->getMachine();
466 }
467 
getArch()468 inline llvm::Triple::ArchType Chunk::getArch() const {
469   return llvm::getMachineArchType(getMachine());
470 }
471 
getArm64ECRangeType()472 inline std::optional<chpe_range_type> Chunk::getArm64ECRangeType() const {
473   // Data sections don't need codemap entries.
474   if (!(getOutputCharacteristics() & llvm::COFF::IMAGE_SCN_MEM_EXECUTE))
475     return std::nullopt;
476 
477   switch (getMachine()) {
478   case AMD64:
479     return chpe_range_type::Amd64;
480   case ARM64EC:
481     return chpe_range_type::Arm64EC;
482   default:
483     return chpe_range_type::Arm64;
484   }
485 }
486 
487 // This class is used to implement an lld-specific feature (not implemented in
488 // MSVC) that minimizes the output size by finding string literals sharing tail
489 // parts and merging them.
490 //
491 // If string tail merging is enabled and a section is identified as containing a
492 // string literal, it is added to a MergeChunk with an appropriate alignment.
493 // The MergeChunk then tail merges the strings using the StringTableBuilder
494 // class and assigns RVAs and section offsets to each of the member chunks based
495 // on the offsets assigned by the StringTableBuilder.
496 class MergeChunk : public NonSectionChunk {
497 public:
498   MergeChunk(uint32_t alignment);
499   static void addSection(COFFLinkerContext &ctx, SectionChunk *c);
500   void finalizeContents();
501   void assignSubsectionRVAs();
502 
503   uint32_t getOutputCharacteristics() const override;
getSectionName()504   StringRef getSectionName() const override { return ".rdata"; }
505   size_t getSize() const override;
506   void writeTo(uint8_t *buf) const override;
507 
508   std::vector<SectionChunk *> sections;
509 
510 private:
511   llvm::StringTableBuilder builder;
512   bool finalized = false;
513 };
514 
515 // A chunk for common symbols. Common chunks don't have actual data.
516 class CommonChunk : public NonSectionChunk {
517 public:
518   CommonChunk(const COFFSymbolRef sym);
getSize()519   size_t getSize() const override { return sym.getValue(); }
520   uint32_t getOutputCharacteristics() const override;
getSectionName()521   StringRef getSectionName() const override { return ".bss"; }
522 
523 private:
524   const COFFSymbolRef sym;
525 };
526 
527 // A chunk for linker-created strings.
528 class StringChunk : public NonSectionChunk {
529 public:
StringChunk(StringRef s)530   explicit StringChunk(StringRef s) : str(s) {}
getSize()531   size_t getSize() const override { return str.size() + 1; }
532   void writeTo(uint8_t *buf) const override;
533 
534 private:
535   StringRef str;
536 };
537 
538 static const uint8_t importThunkX86[] = {
539     0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // JMP *0x0
540 };
541 
542 static const uint8_t importThunkARM[] = {
543     0x40, 0xf2, 0x00, 0x0c, // mov.w ip, #0
544     0xc0, 0xf2, 0x00, 0x0c, // mov.t ip, #0
545     0xdc, 0xf8, 0x00, 0xf0, // ldr.w pc, [ip]
546 };
547 
548 static const uint8_t importThunkARM64[] = {
549     0x10, 0x00, 0x00, 0x90, // adrp x16, #0
550     0x10, 0x02, 0x40, 0xf9, // ldr  x16, [x16]
551     0x00, 0x02, 0x1f, 0xd6, // br   x16
552 };
553 
554 static const uint8_t importThunkARM64EC[] = {
555     0x0b, 0x00, 0x00, 0x90, // adrp x11, 0x0
556     0x6b, 0x01, 0x40, 0xf9, // ldr  x11, [x11]
557     0x0a, 0x00, 0x00, 0x90, // adrp x10, 0x0
558     0x4a, 0x01, 0x00, 0x91, // add  x10, x10, #0x0
559     0x00, 0x00, 0x00, 0x14  // b    0x0
560 };
561 
562 // Windows-specific.
563 // A chunk for DLL import jump table entry. In a final output, its
564 // contents will be a JMP instruction to some __imp_ symbol.
565 class ImportThunkChunk : public NonSectionCodeChunk {
566 public:
567   ImportThunkChunk(COFFLinkerContext &ctx, Defined *s);
classof(const Chunk * c)568   static bool classof(const Chunk *c) { return c->kind() == ImportThunkKind; }
569 
570   // We track the usage of the thunk symbol separately from the import file
571   // to avoid generating unnecessary thunks.
572   bool live;
573 
574 protected:
575   Defined *impSymbol;
576   COFFLinkerContext &ctx;
577 };
578 
579 class ImportThunkChunkX64 : public ImportThunkChunk {
580 public:
581   explicit ImportThunkChunkX64(COFFLinkerContext &ctx, Defined *s);
getSize()582   size_t getSize() const override { return sizeof(importThunkX86); }
583   void writeTo(uint8_t *buf) const override;
getMachine()584   MachineTypes getMachine() const override { return AMD64; }
585 };
586 
587 class ImportThunkChunkX86 : public ImportThunkChunk {
588 public:
ImportThunkChunkX86(COFFLinkerContext & ctx,Defined * s)589   explicit ImportThunkChunkX86(COFFLinkerContext &ctx, Defined *s)
590       : ImportThunkChunk(ctx, s) {}
getSize()591   size_t getSize() const override { return sizeof(importThunkX86); }
592   void getBaserels(std::vector<Baserel> *res) override;
593   void writeTo(uint8_t *buf) const override;
getMachine()594   MachineTypes getMachine() const override { return I386; }
595 };
596 
597 class ImportThunkChunkARM : public ImportThunkChunk {
598 public:
ImportThunkChunkARM(COFFLinkerContext & ctx,Defined * s)599   explicit ImportThunkChunkARM(COFFLinkerContext &ctx, Defined *s)
600       : ImportThunkChunk(ctx, s) {
601     setAlignment(2);
602   }
getSize()603   size_t getSize() const override { return sizeof(importThunkARM); }
604   void getBaserels(std::vector<Baserel> *res) override;
605   void writeTo(uint8_t *buf) const override;
getMachine()606   MachineTypes getMachine() const override { return ARMNT; }
607 };
608 
609 class ImportThunkChunkARM64 : public ImportThunkChunk {
610 public:
ImportThunkChunkARM64(COFFLinkerContext & ctx,Defined * s,MachineTypes machine)611   explicit ImportThunkChunkARM64(COFFLinkerContext &ctx, Defined *s,
612                                  MachineTypes machine)
613       : ImportThunkChunk(ctx, s), machine(machine) {
614     setAlignment(4);
615   }
getSize()616   size_t getSize() const override { return sizeof(importThunkARM64); }
617   void writeTo(uint8_t *buf) const override;
getMachine()618   MachineTypes getMachine() const override { return machine; }
619 
620 private:
621   MachineTypes machine;
622 };
623 
624 // ARM64EC __impchk_* thunk implementation.
625 // Performs an indirect call to an imported function pointer
626 // using the __icall_helper_arm64ec helper function.
627 class ImportThunkChunkARM64EC : public ImportThunkChunk {
628 public:
629   explicit ImportThunkChunkARM64EC(ImportFile *file);
630   size_t getSize() const override;
getMachine()631   MachineTypes getMachine() const override { return ARM64EC; }
632   void writeTo(uint8_t *buf) const override;
633   bool verifyRanges() override;
634   uint32_t extendRanges() override;
635 
636   Defined *exitThunk;
637   Defined *sym = nullptr;
638   bool extended = false;
639 
640 private:
641   ImportFile *file;
642 };
643 
644 class RangeExtensionThunkARM : public NonSectionCodeChunk {
645 public:
RangeExtensionThunkARM(COFFLinkerContext & ctx,Defined * t)646   explicit RangeExtensionThunkARM(COFFLinkerContext &ctx, Defined *t)
647       : target(t), ctx(ctx) {
648     setAlignment(2);
649   }
650   size_t getSize() const override;
651   void writeTo(uint8_t *buf) const override;
getMachine()652   MachineTypes getMachine() const override { return ARMNT; }
653 
654   Defined *target;
655 
656 private:
657   COFFLinkerContext &ctx;
658 };
659 
660 // A ragnge extension thunk used for both ARM64EC and ARM64 machine types.
661 class RangeExtensionThunkARM64 : public NonSectionCodeChunk {
662 public:
RangeExtensionThunkARM64(MachineTypes machine,Defined * t)663   explicit RangeExtensionThunkARM64(MachineTypes machine, Defined *t)
664       : target(t), machine(machine) {
665     setAlignment(4);
666     assert(llvm::COFF::isAnyArm64(machine));
667   }
668   size_t getSize() const override;
669   void writeTo(uint8_t *buf) const override;
getMachine()670   MachineTypes getMachine() const override { return machine; }
671 
672   Defined *target;
673 
674 private:
675   MachineTypes machine;
676 };
677 
678 // Windows-specific.
679 // See comments for DefinedLocalImport class.
680 class LocalImportChunk : public NonSectionChunk {
681 public:
682   explicit LocalImportChunk(COFFLinkerContext &ctx, Defined *s);
683   size_t getSize() const override;
684   void getBaserels(std::vector<Baserel> *res) override;
685   void writeTo(uint8_t *buf) const override;
686 
687 private:
688   Defined *sym;
689   COFFLinkerContext &ctx;
690 };
691 
692 // Duplicate RVAs are not allowed in RVA tables, so unique symbols by chunk and
693 // offset into the chunk. Order does not matter as the RVA table will be sorted
694 // later.
695 struct ChunkAndOffset {
696   Chunk *inputChunk;
697   uint32_t offset;
698 
699   struct DenseMapInfo {
getEmptyKeyChunkAndOffset::DenseMapInfo700     static ChunkAndOffset getEmptyKey() {
701       return {llvm::DenseMapInfo<Chunk *>::getEmptyKey(), 0};
702     }
getTombstoneKeyChunkAndOffset::DenseMapInfo703     static ChunkAndOffset getTombstoneKey() {
704       return {llvm::DenseMapInfo<Chunk *>::getTombstoneKey(), 0};
705     }
getHashValueChunkAndOffset::DenseMapInfo706     static unsigned getHashValue(const ChunkAndOffset &co) {
707       return llvm::DenseMapInfo<std::pair<Chunk *, uint32_t>>::getHashValue(
708           {co.inputChunk, co.offset});
709     }
isEqualChunkAndOffset::DenseMapInfo710     static bool isEqual(const ChunkAndOffset &lhs, const ChunkAndOffset &rhs) {
711       return lhs.inputChunk == rhs.inputChunk && lhs.offset == rhs.offset;
712     }
713   };
714 };
715 
716 using SymbolRVASet = llvm::DenseSet<ChunkAndOffset>;
717 
718 // Table which contains symbol RVAs. Used for /safeseh and /guard:cf.
719 class RVATableChunk : public NonSectionChunk {
720 public:
RVATableChunk(SymbolRVASet s)721   explicit RVATableChunk(SymbolRVASet s) : syms(std::move(s)) {}
getSize()722   size_t getSize() const override { return syms.size() * 4; }
723   void writeTo(uint8_t *buf) const override;
724 
725 private:
726   SymbolRVASet syms;
727 };
728 
729 // Table which contains symbol RVAs with flags. Used for /guard:ehcont.
730 class RVAFlagTableChunk : public NonSectionChunk {
731 public:
RVAFlagTableChunk(SymbolRVASet s)732   explicit RVAFlagTableChunk(SymbolRVASet s) : syms(std::move(s)) {}
getSize()733   size_t getSize() const override { return syms.size() * 5; }
734   void writeTo(uint8_t *buf) const override;
735 
736 private:
737   SymbolRVASet syms;
738 };
739 
740 // Windows-specific.
741 // This class represents a block in .reloc section.
742 // See the PE/COFF spec 5.6 for details.
743 class BaserelChunk : public NonSectionChunk {
744 public:
745   BaserelChunk(uint32_t page, Baserel *begin, Baserel *end);
getSize()746   size_t getSize() const override { return data.size(); }
747   void writeTo(uint8_t *buf) const override;
748 
749 private:
750   std::vector<uint8_t> data;
751 };
752 
753 class Baserel {
754 public:
Baserel(uint32_t v,uint8_t ty)755   Baserel(uint32_t v, uint8_t ty) : rva(v), type(ty) {}
Baserel(uint32_t v,llvm::COFF::MachineTypes machine)756   explicit Baserel(uint32_t v, llvm::COFF::MachineTypes machine)
757       : Baserel(v, getDefaultType(machine)) {}
758   static uint8_t getDefaultType(llvm::COFF::MachineTypes machine);
759 
760   uint32_t rva;
761   uint8_t type;
762 };
763 
764 // This is a placeholder Chunk, to allow attaching a DefinedSynthetic to a
765 // specific place in a section, without any data. This is used for the MinGW
766 // specific symbol __RUNTIME_PSEUDO_RELOC_LIST_END__, even though the concept
767 // of an empty chunk isn't MinGW specific.
768 class EmptyChunk : public NonSectionChunk {
769 public:
EmptyChunk()770   EmptyChunk() {}
getSize()771   size_t getSize() const override { return 0; }
writeTo(uint8_t * buf)772   void writeTo(uint8_t *buf) const override {}
773 };
774 
775 class ECCodeMapEntry {
776 public:
ECCodeMapEntry(Chunk * first,Chunk * last,chpe_range_type type)777   ECCodeMapEntry(Chunk *first, Chunk *last, chpe_range_type type)
778       : first(first), last(last), type(type) {}
779   Chunk *first;
780   Chunk *last;
781   chpe_range_type type;
782 };
783 
784 // This is a chunk containing CHPE code map on EC targets. It's a table
785 // of address ranges and their types.
786 class ECCodeMapChunk : public NonSectionChunk {
787 public:
ECCodeMapChunk(std::vector<ECCodeMapEntry> & map)788   ECCodeMapChunk(std::vector<ECCodeMapEntry> &map) : map(map) {}
789   size_t getSize() const override;
790   void writeTo(uint8_t *buf) const override;
791 
792 private:
793   std::vector<ECCodeMapEntry> &map;
794 };
795 
796 class CHPECodeRangesChunk : public NonSectionChunk {
797 public:
CHPECodeRangesChunk(std::vector<std::pair<Chunk *,Defined * >> & exportThunks)798   CHPECodeRangesChunk(std::vector<std::pair<Chunk *, Defined *>> &exportThunks)
799       : exportThunks(exportThunks) {}
800   size_t getSize() const override;
801   void writeTo(uint8_t *buf) const override;
802 
803 private:
804   std::vector<std::pair<Chunk *, Defined *>> &exportThunks;
805 };
806 
807 class CHPERedirectionChunk : public NonSectionChunk {
808 public:
CHPERedirectionChunk(std::vector<std::pair<Chunk *,Defined * >> & exportThunks)809   CHPERedirectionChunk(std::vector<std::pair<Chunk *, Defined *>> &exportThunks)
810       : exportThunks(exportThunks) {}
811   size_t getSize() const override;
812   void writeTo(uint8_t *buf) const override;
813 
814 private:
815   std::vector<std::pair<Chunk *, Defined *>> &exportThunks;
816 };
817 
818 static const uint8_t ECExportThunkCode[] = {
819     0x48, 0x8b, 0xc4,          // movq    %rsp, %rax
820     0x48, 0x89, 0x58, 0x20,    // movq    %rbx, 0x20(%rax)
821     0x55,                      // pushq   %rbp
822     0x5d,                      // popq    %rbp
823     0xe9, 0,    0,    0,    0, // jmp *0x0
824     0xcc,                      // int3
825     0xcc                       // int3
826 };
827 
828 class ECExportThunkChunk : public NonSectionCodeChunk {
829 public:
ECExportThunkChunk(Defined * targetSym)830   explicit ECExportThunkChunk(Defined *targetSym)
831       : NonSectionCodeChunk(ECExportThunkKind), target(targetSym) {}
classof(const Chunk * c)832   static bool classof(const Chunk *c) { return c->kind() == ECExportThunkKind; }
833 
getSize()834   size_t getSize() const override { return sizeof(ECExportThunkCode); };
835   void writeTo(uint8_t *buf) const override;
getMachine()836   MachineTypes getMachine() const override { return AMD64; }
837 
838   Defined *target;
839 };
840 
841 // ARM64X relocation value, potentially relative to a symbol.
842 class Arm64XRelocVal {
843 public:
value(value)844   Arm64XRelocVal(uint64_t value = 0) : value(value) {}
sym(sym)845   Arm64XRelocVal(Defined *sym, int32_t offset = 0) : sym(sym), value(offset) {}
846   Arm64XRelocVal(Chunk *chunk, int32_t offset = 0)
chunk(chunk)847       : chunk(chunk), value(offset) {}
848   uint64_t get() const;
849 
850 private:
851   Defined *sym = nullptr;
852   Chunk *chunk = nullptr;
853   uint64_t value;
854 };
855 
856 // ARM64X entry for dynamic relocations.
857 class Arm64XDynamicRelocEntry {
858 public:
Arm64XDynamicRelocEntry(llvm::COFF::Arm64XFixupType type,uint8_t size,Arm64XRelocVal offset,Arm64XRelocVal value)859   Arm64XDynamicRelocEntry(llvm::COFF::Arm64XFixupType type, uint8_t size,
860                           Arm64XRelocVal offset, Arm64XRelocVal value)
861       : offset(offset), value(value), type(type), size(size) {}
862 
863   size_t getSize() const;
864   void writeTo(uint8_t *buf) const;
865 
866   Arm64XRelocVal offset;
867   Arm64XRelocVal value;
868 
869 private:
870   llvm::COFF::Arm64XFixupType type;
871   uint8_t size;
872 };
873 
874 // Dynamic relocation chunk containing ARM64X relocations for the hybrid image.
875 class DynamicRelocsChunk : public NonSectionChunk {
876 public:
DynamicRelocsChunk()877   DynamicRelocsChunk() {}
getSize()878   size_t getSize() const override { return size; }
879   void writeTo(uint8_t *buf) const override;
880   void finalize();
881 
882   void add(llvm::COFF::Arm64XFixupType type, uint8_t size,
883            Arm64XRelocVal offset, Arm64XRelocVal value = Arm64XRelocVal()) {
884     arm64xRelocs.emplace_back(type, size, offset, value);
885   }
886 
887   void set(uint32_t rva, Arm64XRelocVal value);
888 
889 private:
890   std::vector<Arm64XDynamicRelocEntry> arm64xRelocs;
891   size_t size;
892 };
893 
894 // MinGW specific, for the "automatic import of variables from DLLs" feature.
895 // This provides the table of runtime pseudo relocations, for variable
896 // references that turned out to need to be imported from a DLL even though
897 // the reference didn't use the dllimport attribute. The MinGW runtime will
898 // process this table after loading, before handling control over to user
899 // code.
900 class PseudoRelocTableChunk : public NonSectionChunk {
901 public:
PseudoRelocTableChunk(std::vector<RuntimePseudoReloc> & relocs)902   PseudoRelocTableChunk(std::vector<RuntimePseudoReloc> &relocs)
903       : relocs(std::move(relocs)) {
904     setAlignment(4);
905   }
906   size_t getSize() const override;
907   void writeTo(uint8_t *buf) const override;
908 
909 private:
910   std::vector<RuntimePseudoReloc> relocs;
911 };
912 
913 // MinGW specific. A Chunk that contains one pointer-sized absolute value.
914 class AbsolutePointerChunk : public NonSectionChunk {
915 public:
AbsolutePointerChunk(SymbolTable & symtab,uint64_t value)916   AbsolutePointerChunk(SymbolTable &symtab, uint64_t value)
917       : value(value), symtab(symtab) {
918     setAlignment(getSize());
919   }
920   size_t getSize() const override;
921   void writeTo(uint8_t *buf) const override;
922   MachineTypes getMachine() const override;
923 
924 private:
925   uint64_t value;
926   SymbolTable &symtab;
927 };
928 
929 // Return true if this file has the hotpatch flag set to true in the S_COMPILE3
930 // record in codeview debug info. Also returns true for some thunks synthesized
931 // by the linker.
isHotPatchable()932 inline bool Chunk::isHotPatchable() const {
933   if (auto *sc = dyn_cast<SectionChunk>(this))
934     return sc->file->hotPatchable;
935   else if (isa<ImportThunkChunk>(this))
936     return true;
937   return false;
938 }
939 
getEntryThunk()940 inline Defined *Chunk::getEntryThunk() const {
941   if (auto *c = dyn_cast<const SectionChunkEC>(this))
942     return c->entryThunk;
943   return nullptr;
944 }
945 
setEntryThunk(Defined * entryThunk)946 inline void Chunk::setEntryThunk(Defined *entryThunk) {
947   if (auto c = dyn_cast<SectionChunkEC>(this))
948     c->entryThunk = entryThunk;
949 }
950 
951 void applyMOV32T(uint8_t *off, uint32_t v);
952 void applyBranch24T(uint8_t *off, int32_t v);
953 
954 void applyArm64Addr(uint8_t *off, uint64_t s, uint64_t p, int shift);
955 void applyArm64Imm(uint8_t *off, uint64_t imm, uint32_t rangeLimit);
956 void applyArm64Branch26(uint8_t *off, int64_t v);
957 
958 // Convenience class for initializing a coff_section with specific flags.
959 class FakeSection {
960 public:
FakeSection(int c)961   FakeSection(int c) { section.Characteristics = c; }
962 
963   coff_section section;
964 };
965 
966 // Convenience class for initializing a SectionChunk with specific flags.
967 class FakeSectionChunk {
968 public:
FakeSectionChunk(const coff_section * section)969   FakeSectionChunk(const coff_section *section) : chunk(nullptr, section) {
970     // Comdats from LTO files can't be fully treated as regular comdats
971     // at this point; we don't know what size or contents they are going to
972     // have, so we can't do proper checking of such aspects of them.
973     chunk.selection = llvm::COFF::IMAGE_COMDAT_SELECT_ANY;
974   }
975 
976   SectionChunk chunk;
977 };
978 
979 } // namespace lld::coff
980 
981 namespace llvm {
982 template <>
983 struct DenseMapInfo<lld::coff::ChunkAndOffset>
984     : lld::coff::ChunkAndOffset::DenseMapInfo {};
985 }
986 
987 #endif
988