xref: /freebsd/contrib/llvm-project/lld/COFF/Writer.cpp (revision e64bea71c21eb42e97aa615188ba91f6cce0d36d)
1 //===- Writer.cpp ---------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Writer.h"
10 #include "COFFLinkerContext.h"
11 #include "CallGraphSort.h"
12 #include "Config.h"
13 #include "DLL.h"
14 #include "InputFiles.h"
15 #include "LLDMapFile.h"
16 #include "MapFile.h"
17 #include "PDB.h"
18 #include "SymbolTable.h"
19 #include "Symbols.h"
20 #include "lld/Common/ErrorHandler.h"
21 #include "lld/Common/Memory.h"
22 #include "lld/Common/Timer.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/StringSet.h"
26 #include "llvm/BinaryFormat/COFF.h"
27 #include "llvm/MC/StringTableBuilder.h"
28 #include "llvm/Support/Endian.h"
29 #include "llvm/Support/FileOutputBuffer.h"
30 #include "llvm/Support/Parallel.h"
31 #include "llvm/Support/RandomNumberGenerator.h"
32 #include "llvm/Support/TimeProfiler.h"
33 #include "llvm/Support/xxhash.h"
34 #include <algorithm>
35 #include <cstdio>
36 #include <map>
37 #include <memory>
38 #include <utility>
39 
40 using namespace llvm;
41 using namespace llvm::COFF;
42 using namespace llvm::object;
43 using namespace llvm::support;
44 using namespace llvm::support::endian;
45 using namespace lld;
46 using namespace lld::coff;
47 
48 /* To re-generate DOSProgram:
49 $ cat > /tmp/DOSProgram.asm
50 org 0
51         ; Copy cs to ds.
52         push cs
53         pop ds
54         ; Point ds:dx at the $-terminated string.
55         mov dx, str
56         ; Int 21/AH=09h: Write string to standard output.
57         mov ah, 0x9
58         int 0x21
59         ; Int 21/AH=4Ch: Exit with return code (in AL).
60         mov ax, 0x4C01
61         int 0x21
62 str:
63         db 'This program cannot be run in DOS mode.$'
64 align 8, db 0
65 $ nasm -fbin /tmp/DOSProgram.asm -o /tmp/DOSProgram.bin
66 $ xxd -i /tmp/DOSProgram.bin
67 */
68 static unsigned char dosProgram[] = {
69   0x0e, 0x1f, 0xba, 0x0e, 0x00, 0xb4, 0x09, 0xcd, 0x21, 0xb8, 0x01, 0x4c,
70   0xcd, 0x21, 0x54, 0x68, 0x69, 0x73, 0x20, 0x70, 0x72, 0x6f, 0x67, 0x72,
71   0x61, 0x6d, 0x20, 0x63, 0x61, 0x6e, 0x6e, 0x6f, 0x74, 0x20, 0x62, 0x65,
72   0x20, 0x72, 0x75, 0x6e, 0x20, 0x69, 0x6e, 0x20, 0x44, 0x4f, 0x53, 0x20,
73   0x6d, 0x6f, 0x64, 0x65, 0x2e, 0x24, 0x00, 0x00
74 };
75 static_assert(sizeof(dosProgram) % 8 == 0,
76               "DOSProgram size must be multiple of 8");
77 static_assert((sizeof(dos_header) + sizeof(dosProgram)) % 8 == 0,
78               "DOSStub size must be multiple of 8");
79 
80 static const int numberOfDataDirectory = 16;
81 
82 namespace {
83 
84 class DebugDirectoryChunk : public NonSectionChunk {
85 public:
DebugDirectoryChunk(const COFFLinkerContext & c,const std::vector<std::pair<COFF::DebugType,Chunk * >> & r,bool writeRepro)86   DebugDirectoryChunk(const COFFLinkerContext &c,
87                       const std::vector<std::pair<COFF::DebugType, Chunk *>> &r,
88                       bool writeRepro)
89       : records(r), writeRepro(writeRepro), ctx(c) {}
90 
getSize() const91   size_t getSize() const override {
92     return (records.size() + int(writeRepro)) * sizeof(debug_directory);
93   }
94 
writeTo(uint8_t * b) const95   void writeTo(uint8_t *b) const override {
96     auto *d = reinterpret_cast<debug_directory *>(b);
97 
98     for (const std::pair<COFF::DebugType, Chunk *>& record : records) {
99       Chunk *c = record.second;
100       const OutputSection *os = ctx.getOutputSection(c);
101       uint64_t offs = os->getFileOff() + (c->getRVA() - os->getRVA());
102       fillEntry(d, record.first, c->getSize(), c->getRVA(), offs);
103       ++d;
104     }
105 
106     if (writeRepro) {
107       // FIXME: The COFF spec allows either a 0-sized entry to just say
108       // "the timestamp field is really a hash", or a 4-byte size field
109       // followed by that many bytes containing a longer hash (with the
110       // lowest 4 bytes usually being the timestamp in little-endian order).
111       // Consider storing the full 8 bytes computed by xxh3_64bits here.
112       fillEntry(d, COFF::IMAGE_DEBUG_TYPE_REPRO, 0, 0, 0);
113     }
114   }
115 
setTimeDateStamp(uint32_t timeDateStamp)116   void setTimeDateStamp(uint32_t timeDateStamp) {
117     for (support::ulittle32_t *tds : timeDateStamps)
118       *tds = timeDateStamp;
119   }
120 
121 private:
fillEntry(debug_directory * d,COFF::DebugType debugType,size_t size,uint64_t rva,uint64_t offs) const122   void fillEntry(debug_directory *d, COFF::DebugType debugType, size_t size,
123                  uint64_t rva, uint64_t offs) const {
124     d->Characteristics = 0;
125     d->TimeDateStamp = 0;
126     d->MajorVersion = 0;
127     d->MinorVersion = 0;
128     d->Type = debugType;
129     d->SizeOfData = size;
130     d->AddressOfRawData = rva;
131     d->PointerToRawData = offs;
132 
133     timeDateStamps.push_back(&d->TimeDateStamp);
134   }
135 
136   mutable std::vector<support::ulittle32_t *> timeDateStamps;
137   const std::vector<std::pair<COFF::DebugType, Chunk *>> &records;
138   bool writeRepro;
139   const COFFLinkerContext &ctx;
140 };
141 
142 class CVDebugRecordChunk : public NonSectionChunk {
143 public:
CVDebugRecordChunk(const COFFLinkerContext & c)144   CVDebugRecordChunk(const COFFLinkerContext &c) : ctx(c) {}
145 
getSize() const146   size_t getSize() const override {
147     return sizeof(codeview::DebugInfo) + ctx.config.pdbAltPath.size() + 1;
148   }
149 
writeTo(uint8_t * b) const150   void writeTo(uint8_t *b) const override {
151     // Save off the DebugInfo entry to backfill the file signature (build id)
152     // in Writer::writeBuildId
153     buildId = reinterpret_cast<codeview::DebugInfo *>(b);
154 
155     // variable sized field (PDB Path)
156     char *p = reinterpret_cast<char *>(b + sizeof(*buildId));
157     if (!ctx.config.pdbAltPath.empty())
158       memcpy(p, ctx.config.pdbAltPath.data(), ctx.config.pdbAltPath.size());
159     p[ctx.config.pdbAltPath.size()] = '\0';
160   }
161 
162   mutable codeview::DebugInfo *buildId = nullptr;
163 
164 private:
165   const COFFLinkerContext &ctx;
166 };
167 
168 class ExtendedDllCharacteristicsChunk : public NonSectionChunk {
169 public:
ExtendedDllCharacteristicsChunk(uint32_t c)170   ExtendedDllCharacteristicsChunk(uint32_t c) : characteristics(c) {}
171 
getSize() const172   size_t getSize() const override { return 4; }
173 
writeTo(uint8_t * buf) const174   void writeTo(uint8_t *buf) const override { write32le(buf, characteristics); }
175 
176   uint32_t characteristics = 0;
177 };
178 
179 // PartialSection represents a group of chunks that contribute to an
180 // OutputSection. Collating a collection of PartialSections of same name and
181 // characteristics constitutes the OutputSection.
182 class PartialSectionKey {
183 public:
184   StringRef name;
185   unsigned characteristics;
186 
operator <(const PartialSectionKey & other) const187   bool operator<(const PartialSectionKey &other) const {
188     int c = name.compare(other.name);
189     if (c > 0)
190       return false;
191     if (c == 0)
192       return characteristics < other.characteristics;
193     return true;
194   }
195 };
196 
197 struct ChunkRange {
198   Chunk *first = nullptr, *last;
199 };
200 
201 // The writer writes a SymbolTable result to a file.
202 class Writer {
203 public:
Writer(COFFLinkerContext & c)204   Writer(COFFLinkerContext &c)
205       : buffer(c.e.outputBuffer), strtab(StringTableBuilder::WinCOFF),
206         delayIdata(c), ctx(c) {}
207   void run();
208 
209 private:
210   void calculateStubDependentSizes();
211   void createSections();
212   void createMiscChunks();
213   void createImportTables();
214   void appendImportThunks();
215   void locateImportTables();
216   void createExportTable();
217   void mergeSection(const std::map<StringRef, StringRef>::value_type &p);
218   void mergeSections();
219   void sortECChunks();
220   void appendECImportTables();
221   void removeUnusedSections();
222   void assignAddresses();
223   bool isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin,
224                  MachineTypes machine);
225   std::pair<Defined *, bool> getThunk(DenseMap<uint64_t, Defined *> &lastThunks,
226                                       Defined *target, uint64_t p,
227                                       uint16_t type, int margin,
228                                       MachineTypes machine);
229   bool createThunks(OutputSection *os, int margin);
230   bool verifyRanges(const std::vector<Chunk *> chunks);
231   void createECCodeMap();
232   void finalizeAddresses();
233   void removeEmptySections();
234   void assignOutputSectionIndices();
235   void createSymbolAndStringTable();
236   void openFile(StringRef outputPath);
237   template <typename PEHeaderTy> void writeHeader();
238   void createSEHTable();
239   void createRuntimePseudoRelocs();
240   void createECChunks();
241   void insertCtorDtorSymbols();
242   void insertBssDataStartEndSymbols();
243   void markSymbolsWithRelocations(ObjFile *file, SymbolRVASet &usedSymbols);
244   void createGuardCFTables();
245   void markSymbolsForRVATable(ObjFile *file,
246                               ArrayRef<SectionChunk *> symIdxChunks,
247                               SymbolRVASet &tableSymbols);
248   void getSymbolsFromSections(ObjFile *file,
249                               ArrayRef<SectionChunk *> symIdxChunks,
250                               std::vector<Symbol *> &symbols);
251   void maybeAddRVATable(SymbolRVASet tableSymbols, StringRef tableSym,
252                         StringRef countSym, bool hasFlag=false);
253   void setSectionPermissions();
254   void setECSymbols();
255   void writeSections();
256   void writeBuildId();
257   void writePEChecksum();
258   void sortSections();
259   template <typename T> void sortExceptionTable(ChunkRange &exceptionTable);
260   void sortExceptionTables();
261   void sortCRTSectionChunks(std::vector<Chunk *> &chunks);
262   void addSyntheticIdata();
263   void sortBySectionOrder(std::vector<Chunk *> &chunks);
264   void fixPartialSectionChars(StringRef name, uint32_t chars);
265   bool fixGnuImportChunks();
266   void fixTlsAlignment();
267   PartialSection *createPartialSection(StringRef name, uint32_t outChars);
268   PartialSection *findPartialSection(StringRef name, uint32_t outChars);
269 
270   std::optional<coff_symbol16> createSymbol(Defined *d);
271   size_t addEntryToStringTable(StringRef str);
272 
273   OutputSection *findSection(StringRef name);
274   void addBaserels();
275   void addBaserelBlocks(std::vector<Baserel> &v);
276   void createDynamicRelocs();
277 
278   uint32_t getSizeOfInitializedData();
279 
280   void prepareLoadConfig();
281   template <typename T>
282   void prepareLoadConfig(SymbolTable &symtab, T *loadConfig);
283 
284   std::unique_ptr<FileOutputBuffer> &buffer;
285   std::map<PartialSectionKey, PartialSection *> partialSections;
286   StringTableBuilder strtab;
287   std::vector<llvm::object::coff_symbol16> outputSymtab;
288   std::vector<ECCodeMapEntry> codeMap;
289   IdataContents idata;
290   Chunk *importTableStart = nullptr;
291   uint64_t importTableSize = 0;
292   Chunk *iatStart = nullptr;
293   uint64_t iatSize = 0;
294   DelayLoadContents delayIdata;
295   bool setNoSEHCharacteristic = false;
296   uint32_t tlsAlignment = 0;
297 
298   DebugDirectoryChunk *debugDirectory = nullptr;
299   std::vector<std::pair<COFF::DebugType, Chunk *>> debugRecords;
300   CVDebugRecordChunk *buildId = nullptr;
301   ArrayRef<uint8_t> sectionTable;
302 
303   // List of Arm64EC export thunks.
304   std::vector<std::pair<Chunk *, Defined *>> exportThunks;
305 
306   uint64_t fileSize;
307   uint32_t pointerToSymbolTable = 0;
308   uint64_t sizeOfImage;
309   uint64_t sizeOfHeaders;
310 
311   uint32_t dosStubSize;
312   uint32_t coffHeaderOffset;
313   uint32_t peHeaderOffset;
314   uint32_t dataDirOffset64;
315 
316   OutputSection *textSec;
317   OutputSection *hexpthkSec;
318   OutputSection *bssSec;
319   OutputSection *rdataSec;
320   OutputSection *buildidSec;
321   OutputSection *dataSec;
322   OutputSection *pdataSec;
323   OutputSection *idataSec;
324   OutputSection *edataSec;
325   OutputSection *didatSec;
326   OutputSection *a64xrmSec;
327   OutputSection *rsrcSec;
328   OutputSection *relocSec;
329   OutputSection *ctorsSec;
330   OutputSection *dtorsSec;
331   // Either .rdata section or .buildid section.
332   OutputSection *debugInfoSec;
333 
334   // The range of .pdata sections in the output file.
335   //
336   // We need to keep track of the location of .pdata in whichever section it
337   // gets merged into so that we can sort its contents and emit a correct data
338   // directory entry for the exception table. This is also the case for some
339   // other sections (such as .edata) but because the contents of those sections
340   // are entirely linker-generated we can keep track of their locations using
341   // the chunks that the linker creates. All .pdata chunks come from input
342   // files, so we need to keep track of them separately.
343   ChunkRange pdata;
344 
345   // x86_64 .pdata sections on ARM64EC/ARM64X targets.
346   ChunkRange hybridPdata;
347 
348   // CHPE metadata symbol on ARM64C target.
349   DefinedRegular *chpeSym = nullptr;
350 
351   COFFLinkerContext &ctx;
352 };
353 } // anonymous namespace
354 
writeResult(COFFLinkerContext & ctx)355 void lld::coff::writeResult(COFFLinkerContext &ctx) {
356   llvm::TimeTraceScope timeScope("Write output(s)");
357   Writer(ctx).run();
358 }
359 
addChunk(Chunk * c)360 void OutputSection::addChunk(Chunk *c) {
361   chunks.push_back(c);
362 }
363 
insertChunkAtStart(Chunk * c)364 void OutputSection::insertChunkAtStart(Chunk *c) {
365   chunks.insert(chunks.begin(), c);
366 }
367 
setPermissions(uint32_t c)368 void OutputSection::setPermissions(uint32_t c) {
369   header.Characteristics &= ~permMask;
370   header.Characteristics |= c;
371 }
372 
merge(OutputSection * other)373 void OutputSection::merge(OutputSection *other) {
374   chunks.insert(chunks.end(), other->chunks.begin(), other->chunks.end());
375   other->chunks.clear();
376   contribSections.insert(contribSections.end(), other->contribSections.begin(),
377                          other->contribSections.end());
378   other->contribSections.clear();
379 
380   // MS link.exe compatibility: when merging a code section into a data section,
381   // mark the target section as a code section.
382   if (other->header.Characteristics & IMAGE_SCN_CNT_CODE) {
383     header.Characteristics |= IMAGE_SCN_CNT_CODE;
384     header.Characteristics &=
385         ~(IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_CNT_UNINITIALIZED_DATA);
386   }
387 }
388 
389 // Write the section header to a given buffer.
writeHeaderTo(uint8_t * buf,bool isDebug)390 void OutputSection::writeHeaderTo(uint8_t *buf, bool isDebug) {
391   auto *hdr = reinterpret_cast<coff_section *>(buf);
392   *hdr = header;
393   if (stringTableOff) {
394     // If name is too long, write offset into the string table as a name.
395     encodeSectionName(hdr->Name, stringTableOff);
396   } else {
397     assert(!isDebug || name.size() <= COFF::NameSize ||
398            (hdr->Characteristics & IMAGE_SCN_MEM_DISCARDABLE) == 0);
399     strncpy(hdr->Name, name.data(),
400             std::min(name.size(), (size_t)COFF::NameSize));
401   }
402 }
403 
addContributingPartialSection(PartialSection * sec)404 void OutputSection::addContributingPartialSection(PartialSection *sec) {
405   contribSections.push_back(sec);
406 }
407 
splitECChunks()408 void OutputSection::splitECChunks() {
409   llvm::stable_sort(chunks, [=](const Chunk *a, const Chunk *b) {
410     return (a->getMachine() != ARM64) < (b->getMachine() != ARM64);
411   });
412 }
413 
414 // Check whether the target address S is in range from a relocation
415 // of type relType at address P.
isInRange(uint16_t relType,uint64_t s,uint64_t p,int margin,MachineTypes machine)416 bool Writer::isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin,
417                        MachineTypes machine) {
418   if (machine == ARMNT) {
419     int64_t diff = AbsoluteDifference(s, p + 4) + margin;
420     switch (relType) {
421     case IMAGE_REL_ARM_BRANCH20T:
422       return isInt<21>(diff);
423     case IMAGE_REL_ARM_BRANCH24T:
424     case IMAGE_REL_ARM_BLX23T:
425       return isInt<25>(diff);
426     default:
427       return true;
428     }
429   } else if (isAnyArm64(machine)) {
430     int64_t diff = AbsoluteDifference(s, p) + margin;
431     switch (relType) {
432     case IMAGE_REL_ARM64_BRANCH26:
433       return isInt<28>(diff);
434     case IMAGE_REL_ARM64_BRANCH19:
435       return isInt<21>(diff);
436     case IMAGE_REL_ARM64_BRANCH14:
437       return isInt<16>(diff);
438     default:
439       return true;
440     }
441   } else {
442     return true;
443   }
444 }
445 
446 // Return the last thunk for the given target if it is in range,
447 // or create a new one.
448 std::pair<Defined *, bool>
getThunk(DenseMap<uint64_t,Defined * > & lastThunks,Defined * target,uint64_t p,uint16_t type,int margin,MachineTypes machine)449 Writer::getThunk(DenseMap<uint64_t, Defined *> &lastThunks, Defined *target,
450                  uint64_t p, uint16_t type, int margin, MachineTypes machine) {
451   Defined *&lastThunk = lastThunks[target->getRVA()];
452   if (lastThunk && isInRange(type, lastThunk->getRVA(), p, margin, machine))
453     return {lastThunk, false};
454   Chunk *c;
455   switch (getMachineArchType(machine)) {
456   case Triple::thumb:
457     c = make<RangeExtensionThunkARM>(ctx, target);
458     break;
459   case Triple::aarch64:
460     c = make<RangeExtensionThunkARM64>(machine, target);
461     break;
462   default:
463     llvm_unreachable("Unexpected architecture");
464   }
465   Defined *d = make<DefinedSynthetic>("range_extension_thunk", c);
466   lastThunk = d;
467   return {d, true};
468 }
469 
470 // This checks all relocations, and for any relocation which isn't in range
471 // it adds a thunk after the section chunk that contains the relocation.
472 // If the latest thunk for the specific target is in range, that is used
473 // instead of creating a new thunk. All range checks are done with the
474 // specified margin, to make sure that relocations that originally are in
475 // range, but only barely, also get thunks - in case other added thunks makes
476 // the target go out of range.
477 //
478 // After adding thunks, we verify that all relocations are in range (with
479 // no extra margin requirements). If this failed, we restart (throwing away
480 // the previously created thunks) and retry with a wider margin.
createThunks(OutputSection * os,int margin)481 bool Writer::createThunks(OutputSection *os, int margin) {
482   bool addressesChanged = false;
483   DenseMap<uint64_t, Defined *> lastThunks;
484   DenseMap<std::pair<ObjFile *, Defined *>, uint32_t> thunkSymtabIndices;
485   size_t thunksSize = 0;
486   // Recheck Chunks.size() each iteration, since we can insert more
487   // elements into it.
488   for (size_t i = 0; i != os->chunks.size(); ++i) {
489     SectionChunk *sc = dyn_cast<SectionChunk>(os->chunks[i]);
490     if (!sc) {
491       auto chunk = cast<NonSectionChunk>(os->chunks[i]);
492       if (uint32_t size = chunk->extendRanges()) {
493         thunksSize += size;
494         addressesChanged = true;
495       }
496       continue;
497     }
498     MachineTypes machine = sc->getMachine();
499     size_t thunkInsertionSpot = i + 1;
500 
501     // Try to get a good enough estimate of where new thunks will be placed.
502     // Offset this by the size of the new thunks added so far, to make the
503     // estimate slightly better.
504     size_t thunkInsertionRVA = sc->getRVA() + sc->getSize() + thunksSize;
505     ObjFile *file = sc->file;
506     std::vector<std::pair<uint32_t, uint32_t>> relocReplacements;
507     ArrayRef<coff_relocation> originalRelocs =
508         file->getCOFFObj()->getRelocations(sc->header);
509     for (size_t j = 0, e = originalRelocs.size(); j < e; ++j) {
510       const coff_relocation &rel = originalRelocs[j];
511       Symbol *relocTarget = file->getSymbol(rel.SymbolTableIndex);
512 
513       // The estimate of the source address P should be pretty accurate,
514       // but we don't know whether the target Symbol address should be
515       // offset by thunksSize or not (or by some of thunksSize but not all of
516       // it), giving us some uncertainty once we have added one thunk.
517       uint64_t p = sc->getRVA() + rel.VirtualAddress + thunksSize;
518 
519       Defined *sym = dyn_cast_or_null<Defined>(relocTarget);
520       if (!sym)
521         continue;
522 
523       uint64_t s = sym->getRVA();
524 
525       if (isInRange(rel.Type, s, p, margin, machine))
526         continue;
527 
528       // If the target isn't in range, hook it up to an existing or new thunk.
529       auto [thunk, wasNew] =
530           getThunk(lastThunks, sym, p, rel.Type, margin, machine);
531       if (wasNew) {
532         Chunk *thunkChunk = thunk->getChunk();
533         thunkChunk->setRVA(
534             thunkInsertionRVA); // Estimate of where it will be located.
535         os->chunks.insert(os->chunks.begin() + thunkInsertionSpot, thunkChunk);
536         thunkInsertionSpot++;
537         thunksSize += thunkChunk->getSize();
538         thunkInsertionRVA += thunkChunk->getSize();
539         addressesChanged = true;
540       }
541 
542       // To redirect the relocation, add a symbol to the parent object file's
543       // symbol table, and replace the relocation symbol table index with the
544       // new index.
545       auto insertion = thunkSymtabIndices.insert({{file, thunk}, ~0U});
546       uint32_t &thunkSymbolIndex = insertion.first->second;
547       if (insertion.second)
548         thunkSymbolIndex = file->addRangeThunkSymbol(thunk);
549       relocReplacements.emplace_back(j, thunkSymbolIndex);
550     }
551 
552     // Get a writable copy of this section's relocations so they can be
553     // modified. If the relocations point into the object file, allocate new
554     // memory. Otherwise, this must be previously allocated memory that can be
555     // modified in place.
556     ArrayRef<coff_relocation> curRelocs = sc->getRelocs();
557     MutableArrayRef<coff_relocation> newRelocs;
558     if (originalRelocs.data() == curRelocs.data()) {
559       newRelocs = MutableArrayRef(
560           bAlloc().Allocate<coff_relocation>(originalRelocs.size()),
561           originalRelocs.size());
562     } else {
563       newRelocs = MutableArrayRef(
564           const_cast<coff_relocation *>(curRelocs.data()), curRelocs.size());
565     }
566 
567     // Copy each relocation, but replace the symbol table indices which need
568     // thunks.
569     auto nextReplacement = relocReplacements.begin();
570     auto endReplacement = relocReplacements.end();
571     for (size_t i = 0, e = originalRelocs.size(); i != e; ++i) {
572       newRelocs[i] = originalRelocs[i];
573       if (nextReplacement != endReplacement && nextReplacement->first == i) {
574         newRelocs[i].SymbolTableIndex = nextReplacement->second;
575         ++nextReplacement;
576       }
577     }
578 
579     sc->setRelocs(newRelocs);
580   }
581   return addressesChanged;
582 }
583 
584 // Create a code map for CHPE metadata.
createECCodeMap()585 void Writer::createECCodeMap() {
586   if (!ctx.symtab.isEC())
587     return;
588 
589   // Clear the map in case we were're recomputing the map after adding
590   // a range extension thunk.
591   codeMap.clear();
592 
593   std::optional<chpe_range_type> lastType;
594   Chunk *first, *last;
595 
596   auto closeRange = [&]() {
597     if (lastType) {
598       codeMap.push_back({first, last, *lastType});
599       lastType.reset();
600     }
601   };
602 
603   for (OutputSection *sec : ctx.outputSections) {
604     for (Chunk *c : sec->chunks) {
605       // Skip empty section chunks. MS link.exe does not seem to do that and
606       // generates empty code ranges in some cases.
607       if (isa<SectionChunk>(c) && !c->getSize())
608         continue;
609 
610       std::optional<chpe_range_type> chunkType = c->getArm64ECRangeType();
611       if (chunkType != lastType) {
612         closeRange();
613         first = c;
614         lastType = chunkType;
615       }
616       last = c;
617     }
618   }
619 
620   closeRange();
621 
622   Symbol *tableCountSym = ctx.symtab.findUnderscore("__hybrid_code_map_count");
623   cast<DefinedAbsolute>(tableCountSym)->setVA(codeMap.size());
624 }
625 
626 // Verify that all relocations are in range, with no extra margin requirements.
verifyRanges(const std::vector<Chunk * > chunks)627 bool Writer::verifyRanges(const std::vector<Chunk *> chunks) {
628   for (Chunk *c : chunks) {
629     SectionChunk *sc = dyn_cast<SectionChunk>(c);
630     if (!sc) {
631       if (!cast<NonSectionChunk>(c)->verifyRanges())
632         return false;
633       continue;
634     }
635     MachineTypes machine = sc->getMachine();
636 
637     ArrayRef<coff_relocation> relocs = sc->getRelocs();
638     for (const coff_relocation &rel : relocs) {
639       Symbol *relocTarget = sc->file->getSymbol(rel.SymbolTableIndex);
640 
641       Defined *sym = dyn_cast_or_null<Defined>(relocTarget);
642       if (!sym)
643         continue;
644 
645       uint64_t p = sc->getRVA() + rel.VirtualAddress;
646       uint64_t s = sym->getRVA();
647 
648       if (!isInRange(rel.Type, s, p, 0, machine))
649         return false;
650     }
651   }
652   return true;
653 }
654 
655 // Assign addresses and add thunks if necessary.
finalizeAddresses()656 void Writer::finalizeAddresses() {
657   assignAddresses();
658   if (ctx.config.machine != ARMNT && !isAnyArm64(ctx.config.machine))
659     return;
660 
661   size_t origNumChunks = 0;
662   for (OutputSection *sec : ctx.outputSections) {
663     sec->origChunks = sec->chunks;
664     origNumChunks += sec->chunks.size();
665   }
666 
667   int pass = 0;
668   int margin = 1024 * 100;
669   while (true) {
670     llvm::TimeTraceScope timeScope2("Add thunks pass");
671 
672     // First check whether we need thunks at all, or if the previous pass of
673     // adding them turned out ok.
674     bool rangesOk = true;
675     size_t numChunks = 0;
676     {
677       llvm::TimeTraceScope timeScope3("Verify ranges");
678       for (OutputSection *sec : ctx.outputSections) {
679         if (!verifyRanges(sec->chunks)) {
680           rangesOk = false;
681           break;
682         }
683         numChunks += sec->chunks.size();
684       }
685     }
686     if (rangesOk) {
687       if (pass > 0)
688         Log(ctx) << "Added " << (numChunks - origNumChunks) << " thunks with "
689                  << "margin " << margin << " in " << pass << " passes";
690       return;
691     }
692 
693     if (pass >= 10)
694       Fatal(ctx) << "adding thunks hasn't converged after " << pass
695                  << " passes";
696 
697     if (pass > 0) {
698       // If the previous pass didn't work out, reset everything back to the
699       // original conditions before retrying with a wider margin. This should
700       // ideally never happen under real circumstances.
701       for (OutputSection *sec : ctx.outputSections)
702         sec->chunks = sec->origChunks;
703       margin *= 2;
704     }
705 
706     // Try adding thunks everywhere where it is needed, with a margin
707     // to avoid things going out of range due to the added thunks.
708     bool addressesChanged = false;
709     {
710       llvm::TimeTraceScope timeScope3("Create thunks");
711       for (OutputSection *sec : ctx.outputSections)
712         addressesChanged |= createThunks(sec, margin);
713     }
714     // If the verification above thought we needed thunks, we should have
715     // added some.
716     assert(addressesChanged);
717     (void)addressesChanged;
718 
719     // Recalculate the layout for the whole image (and verify the ranges at
720     // the start of the next round).
721     assignAddresses();
722 
723     pass++;
724   }
725 }
726 
writePEChecksum()727 void Writer::writePEChecksum() {
728   if (!ctx.config.writeCheckSum) {
729     return;
730   }
731 
732   llvm::TimeTraceScope timeScope("PE checksum");
733 
734   // https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#checksum
735   uint32_t *buf = (uint32_t *)buffer->getBufferStart();
736   uint32_t size = (uint32_t)(buffer->getBufferSize());
737 
738   pe32_header *peHeader = (pe32_header *)((uint8_t *)buf + coffHeaderOffset +
739                                           sizeof(coff_file_header));
740 
741   uint64_t sum = 0;
742   uint32_t count = size;
743   ulittle16_t *addr = (ulittle16_t *)buf;
744 
745   // The PE checksum algorithm, implemented as suggested in RFC1071
746   while (count > 1) {
747     sum += *addr++;
748     count -= 2;
749   }
750 
751   // Add left-over byte, if any
752   if (count > 0)
753     sum += *(unsigned char *)addr;
754 
755   // Fold 32-bit sum to 16 bits
756   while (sum >> 16) {
757     sum = (sum & 0xffff) + (sum >> 16);
758   }
759 
760   sum += size;
761   peHeader->CheckSum = sum;
762 }
763 
764 // The main function of the writer.
run()765 void Writer::run() {
766   {
767     llvm::TimeTraceScope timeScope("Write PE");
768     ScopedTimer t1(ctx.codeLayoutTimer);
769 
770     calculateStubDependentSizes();
771     if (ctx.config.machine == ARM64X)
772       ctx.dynamicRelocs = make<DynamicRelocsChunk>();
773     createImportTables();
774     createSections();
775     appendImportThunks();
776     // Import thunks must be added before the Control Flow Guard tables are
777     // added.
778     createMiscChunks();
779     createExportTable();
780     mergeSections();
781     sortECChunks();
782     appendECImportTables();
783     createDynamicRelocs();
784     removeUnusedSections();
785     finalizeAddresses();
786     removeEmptySections();
787     assignOutputSectionIndices();
788     setSectionPermissions();
789     setECSymbols();
790     createSymbolAndStringTable();
791 
792     if (fileSize > UINT32_MAX)
793       Fatal(ctx) << "image size (" << fileSize << ") "
794                  << "exceeds maximum allowable size (" << UINT32_MAX << ")";
795 
796     openFile(ctx.config.outputFile);
797     if (ctx.config.is64()) {
798       writeHeader<pe32plus_header>();
799     } else {
800       writeHeader<pe32_header>();
801     }
802     writeSections();
803     prepareLoadConfig();
804     sortExceptionTables();
805 
806     // Fix up the alignment in the TLS Directory's characteristic field,
807     // if a specific alignment value is needed
808     if (tlsAlignment)
809       fixTlsAlignment();
810   }
811 
812   if (!ctx.config.pdbPath.empty() && ctx.config.debug) {
813     assert(buildId);
814     createPDB(ctx, sectionTable, buildId->buildId);
815   }
816   writeBuildId();
817 
818   writeLLDMapFile(ctx);
819   writeMapFile(ctx);
820 
821   writePEChecksum();
822 
823   if (errorCount())
824     return;
825 
826   llvm::TimeTraceScope timeScope("Commit PE to disk");
827   ScopedTimer t2(ctx.outputCommitTimer);
828   if (auto e = buffer->commit())
829     Fatal(ctx) << "failed to write output '" << buffer->getPath()
830                << "': " << toString(std::move(e));
831 }
832 
getOutputSectionName(StringRef name)833 static StringRef getOutputSectionName(StringRef name) {
834   StringRef s = name.split('$').first;
835 
836   // Treat a later period as a separator for MinGW, for sections like
837   // ".ctors.01234".
838   return s.substr(0, s.find('.', 1));
839 }
840 
841 // For /order.
sortBySectionOrder(std::vector<Chunk * > & chunks)842 void Writer::sortBySectionOrder(std::vector<Chunk *> &chunks) {
843   auto getPriority = [&ctx = ctx](const Chunk *c) {
844     if (auto *sec = dyn_cast<SectionChunk>(c))
845       if (sec->sym)
846         return ctx.config.order.lookup(sec->sym->getName());
847     return 0;
848   };
849 
850   llvm::stable_sort(chunks, [=](const Chunk *a, const Chunk *b) {
851     return getPriority(a) < getPriority(b);
852   });
853 }
854 
855 // Change the characteristics of existing PartialSections that belong to the
856 // section Name to Chars.
fixPartialSectionChars(StringRef name,uint32_t chars)857 void Writer::fixPartialSectionChars(StringRef name, uint32_t chars) {
858   for (auto it : partialSections) {
859     PartialSection *pSec = it.second;
860     StringRef curName = pSec->name;
861     if (!curName.consume_front(name) ||
862         (!curName.empty() && !curName.starts_with("$")))
863       continue;
864     if (pSec->characteristics == chars)
865       continue;
866     PartialSection *destSec = createPartialSection(pSec->name, chars);
867     destSec->chunks.insert(destSec->chunks.end(), pSec->chunks.begin(),
868                            pSec->chunks.end());
869     pSec->chunks.clear();
870   }
871 }
872 
873 // Sort concrete section chunks from GNU import libraries.
874 //
875 // GNU binutils doesn't use short import files, but instead produces import
876 // libraries that consist of object files, with section chunks for the .idata$*
877 // sections. These are linked just as regular static libraries. Each import
878 // library consists of one header object, one object file for every imported
879 // symbol, and one trailer object. In order for the .idata tables/lists to
880 // be formed correctly, the section chunks within each .idata$* section need
881 // to be grouped by library, and sorted alphabetically within each library
882 // (which makes sure the header comes first and the trailer last).
fixGnuImportChunks()883 bool Writer::fixGnuImportChunks() {
884   uint32_t rdata = IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ;
885 
886   // Make sure all .idata$* section chunks are mapped as RDATA in order to
887   // be sorted into the same sections as our own synthesized .idata chunks.
888   fixPartialSectionChars(".idata", rdata);
889 
890   bool hasIdata = false;
891   // Sort all .idata$* chunks, grouping chunks from the same library,
892   // with alphabetical ordering of the object files within a library.
893   for (auto it : partialSections) {
894     PartialSection *pSec = it.second;
895     if (!pSec->name.starts_with(".idata"))
896       continue;
897 
898     if (!pSec->chunks.empty())
899       hasIdata = true;
900     llvm::stable_sort(pSec->chunks, [&](Chunk *s, Chunk *t) {
901       SectionChunk *sc1 = dyn_cast<SectionChunk>(s);
902       SectionChunk *sc2 = dyn_cast<SectionChunk>(t);
903       if (!sc1 || !sc2) {
904         // if SC1, order them ascending. If SC2 or both null,
905         // S is not less than T.
906         return sc1 != nullptr;
907       }
908       // Make a string with "libraryname/objectfile" for sorting, achieving
909       // both grouping by library and sorting of objects within a library,
910       // at once.
911       std::string key1 =
912           (sc1->file->parentName + "/" + sc1->file->getName()).str();
913       std::string key2 =
914           (sc2->file->parentName + "/" + sc2->file->getName()).str();
915       return key1 < key2;
916     });
917   }
918   return hasIdata;
919 }
920 
921 // Add generated idata chunks, for imported symbols and DLLs, and a
922 // terminator in .idata$2.
addSyntheticIdata()923 void Writer::addSyntheticIdata() {
924   uint32_t rdata = IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ;
925   idata.create(ctx);
926 
927   // Add the .idata content in the right section groups, to allow
928   // chunks from other linked in object files to be grouped together.
929   // See Microsoft PE/COFF spec 5.4 for details.
930   auto add = [&](StringRef n, std::vector<Chunk *> &v) {
931     PartialSection *pSec = createPartialSection(n, rdata);
932     pSec->chunks.insert(pSec->chunks.end(), v.begin(), v.end());
933   };
934 
935   // The loader assumes a specific order of data.
936   // Add each type in the correct order.
937   add(".idata$2", idata.dirs);
938   add(".idata$4", idata.lookups);
939   add(".idata$5", idata.addresses);
940   if (!idata.hints.empty())
941     add(".idata$6", idata.hints);
942   add(".idata$7", idata.dllNames);
943   if (!idata.auxIat.empty())
944     add(".idata$9", idata.auxIat);
945   if (!idata.auxIatCopy.empty())
946     add(".idata$a", idata.auxIatCopy);
947 }
948 
appendECImportTables()949 void Writer::appendECImportTables() {
950   if (!isArm64EC(ctx.config.machine))
951     return;
952 
953   const uint32_t rdata = IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ;
954 
955   // IAT is always placed at the beginning of .rdata section and its size
956   // is aligned to 4KB. Insert it here, after all merges all done.
957   if (PartialSection *importAddresses = findPartialSection(".idata$5", rdata)) {
958     if (!rdataSec->chunks.empty())
959       rdataSec->chunks.front()->setAlignment(
960           std::max(0x1000u, rdataSec->chunks.front()->getAlignment()));
961     iatSize = alignTo(iatSize, 0x1000);
962 
963     rdataSec->chunks.insert(rdataSec->chunks.begin(),
964                             importAddresses->chunks.begin(),
965                             importAddresses->chunks.end());
966     rdataSec->contribSections.insert(rdataSec->contribSections.begin(),
967                                      importAddresses);
968   }
969 
970   // The auxiliary IAT is always placed at the end of the .rdata section
971   // and is aligned to 4KB.
972   if (PartialSection *auxIat = findPartialSection(".idata$9", rdata)) {
973     auxIat->chunks.front()->setAlignment(0x1000);
974     rdataSec->chunks.insert(rdataSec->chunks.end(), auxIat->chunks.begin(),
975                             auxIat->chunks.end());
976     rdataSec->addContributingPartialSection(auxIat);
977   }
978 
979   if (!delayIdata.getAuxIat().empty()) {
980     delayIdata.getAuxIat().front()->setAlignment(0x1000);
981     rdataSec->chunks.insert(rdataSec->chunks.end(),
982                             delayIdata.getAuxIat().begin(),
983                             delayIdata.getAuxIat().end());
984   }
985 }
986 
987 // Locate the first Chunk and size of the import directory list and the
988 // IAT.
locateImportTables()989 void Writer::locateImportTables() {
990   uint32_t rdata = IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ;
991 
992   if (PartialSection *importDirs = findPartialSection(".idata$2", rdata)) {
993     if (!importDirs->chunks.empty())
994       importTableStart = importDirs->chunks.front();
995     for (Chunk *c : importDirs->chunks)
996       importTableSize += c->getSize();
997   }
998 
999   if (PartialSection *importAddresses = findPartialSection(".idata$5", rdata)) {
1000     if (!importAddresses->chunks.empty())
1001       iatStart = importAddresses->chunks.front();
1002     for (Chunk *c : importAddresses->chunks)
1003       iatSize += c->getSize();
1004   }
1005 }
1006 
1007 // Return whether a SectionChunk's suffix (the dollar and any trailing
1008 // suffix) should be removed and sorted into the main suffixless
1009 // PartialSection.
shouldStripSectionSuffix(SectionChunk * sc,StringRef name,bool isMinGW)1010 static bool shouldStripSectionSuffix(SectionChunk *sc, StringRef name,
1011                                      bool isMinGW) {
1012   // On MinGW, comdat groups are formed by putting the comdat group name
1013   // after the '$' in the section name. For .eh_frame$<symbol>, that must
1014   // still be sorted before the .eh_frame trailer from crtend.o, thus just
1015   // strip the section name trailer. For other sections, such as
1016   // .tls$$<symbol> (where non-comdat .tls symbols are otherwise stored in
1017   // ".tls$"), they must be strictly sorted after .tls. And for the
1018   // hypothetical case of comdat .CRT$XCU, we definitely need to keep the
1019   // suffix for sorting. Thus, to play it safe, only strip the suffix for
1020   // the standard sections.
1021   if (!isMinGW)
1022     return false;
1023   if (!sc || !sc->isCOMDAT())
1024     return false;
1025   return name.starts_with(".text$") || name.starts_with(".data$") ||
1026          name.starts_with(".rdata$") || name.starts_with(".pdata$") ||
1027          name.starts_with(".xdata$") || name.starts_with(".eh_frame$");
1028 }
1029 
sortSections()1030 void Writer::sortSections() {
1031   if (!ctx.config.callGraphProfile.empty()) {
1032     DenseMap<const SectionChunk *, int> order =
1033         computeCallGraphProfileOrder(ctx);
1034     for (auto it : order) {
1035       if (DefinedRegular *sym = it.first->sym)
1036         ctx.config.order[sym->getName()] = it.second;
1037     }
1038   }
1039   if (!ctx.config.order.empty())
1040     for (auto it : partialSections)
1041       sortBySectionOrder(it.second->chunks);
1042 }
1043 
calculateStubDependentSizes()1044 void Writer::calculateStubDependentSizes() {
1045   if (ctx.config.dosStub)
1046     dosStubSize = alignTo(ctx.config.dosStub->getBufferSize(), 8);
1047   else
1048     dosStubSize = sizeof(dos_header) + sizeof(dosProgram);
1049 
1050   coffHeaderOffset = dosStubSize + sizeof(PEMagic);
1051   peHeaderOffset = coffHeaderOffset + sizeof(coff_file_header);
1052   dataDirOffset64 = peHeaderOffset + sizeof(pe32plus_header);
1053 }
1054 
1055 // Create output section objects and add them to OutputSections.
createSections()1056 void Writer::createSections() {
1057   llvm::TimeTraceScope timeScope("Output sections");
1058   // First, create the builtin sections.
1059   const uint32_t data = IMAGE_SCN_CNT_INITIALIZED_DATA;
1060   const uint32_t bss = IMAGE_SCN_CNT_UNINITIALIZED_DATA;
1061   const uint32_t code = IMAGE_SCN_CNT_CODE;
1062   const uint32_t discardable = IMAGE_SCN_MEM_DISCARDABLE;
1063   const uint32_t r = IMAGE_SCN_MEM_READ;
1064   const uint32_t w = IMAGE_SCN_MEM_WRITE;
1065   const uint32_t x = IMAGE_SCN_MEM_EXECUTE;
1066 
1067   SmallDenseMap<std::pair<StringRef, uint32_t>, OutputSection *> sections;
1068   auto createSection = [&](StringRef name, uint32_t outChars) {
1069     OutputSection *&sec = sections[{name, outChars}];
1070     if (!sec) {
1071       sec = make<OutputSection>(name, outChars);
1072       ctx.outputSections.push_back(sec);
1073     }
1074     return sec;
1075   };
1076 
1077   // Try to match the section order used by link.exe.
1078   textSec = createSection(".text", code | r | x);
1079   if (isArm64EC(ctx.config.machine))
1080     hexpthkSec = createSection(".hexpthk", code | r | x);
1081   bssSec = createSection(".bss", bss | r | w);
1082   rdataSec = createSection(".rdata", data | r);
1083   buildidSec = createSection(".buildid", data | r);
1084   dataSec = createSection(".data", data | r | w);
1085   pdataSec = createSection(".pdata", data | r);
1086   idataSec = createSection(".idata", data | r);
1087   edataSec = createSection(".edata", data | r);
1088   didatSec = createSection(".didat", data | r);
1089   if (isArm64EC(ctx.config.machine))
1090     a64xrmSec = createSection(".a64xrm", data | r);
1091   rsrcSec = createSection(".rsrc", data | r);
1092   relocSec = createSection(".reloc", data | discardable | r);
1093   ctorsSec = createSection(".ctors", data | r | w);
1094   dtorsSec = createSection(".dtors", data | r | w);
1095 
1096   // Then bin chunks by name and output characteristics.
1097   for (Chunk *c : ctx.driver.getChunks()) {
1098     auto *sc = dyn_cast<SectionChunk>(c);
1099     if (sc && !sc->live) {
1100       if (ctx.config.verbose)
1101         sc->printDiscardedMessage();
1102       continue;
1103     }
1104     StringRef name = c->getSectionName();
1105     if (shouldStripSectionSuffix(sc, name, ctx.config.mingw))
1106       name = name.split('$').first;
1107 
1108     if (name.starts_with(".tls"))
1109       tlsAlignment = std::max(tlsAlignment, c->getAlignment());
1110 
1111     PartialSection *pSec = createPartialSection(name,
1112                                                 c->getOutputCharacteristics());
1113     pSec->chunks.push_back(c);
1114   }
1115 
1116   fixPartialSectionChars(".rsrc", data | r);
1117   fixPartialSectionChars(".edata", data | r);
1118   // Even in non MinGW cases, we might need to link against GNU import
1119   // libraries.
1120   bool hasIdata = fixGnuImportChunks();
1121   if (!idata.empty())
1122     hasIdata = true;
1123 
1124   if (hasIdata)
1125     addSyntheticIdata();
1126 
1127   sortSections();
1128 
1129   if (hasIdata)
1130     locateImportTables();
1131 
1132   // Then create an OutputSection for each section.
1133   // '$' and all following characters in input section names are
1134   // discarded when determining output section. So, .text$foo
1135   // contributes to .text, for example. See PE/COFF spec 3.2.
1136   for (auto it : partialSections) {
1137     PartialSection *pSec = it.second;
1138     StringRef name = getOutputSectionName(pSec->name);
1139     uint32_t outChars = pSec->characteristics;
1140 
1141     if (name == ".CRT") {
1142       // In link.exe, there is a special case for the I386 target where .CRT
1143       // sections are treated as if they have output characteristics DATA | R if
1144       // their characteristics are DATA | R | W. This implements the same
1145       // special case for all architectures.
1146       outChars = data | r;
1147 
1148       Log(ctx) << "Processing section " << pSec->name << " -> " << name;
1149 
1150       sortCRTSectionChunks(pSec->chunks);
1151     }
1152 
1153     // ARM64EC has specific placement and alignment requirements for the IAT.
1154     // Delay adding its chunks until appendECImportTables.
1155     if (isArm64EC(ctx.config.machine) &&
1156         (pSec->name == ".idata$5" || pSec->name == ".idata$9"))
1157       continue;
1158 
1159     OutputSection *sec = createSection(name, outChars);
1160     for (Chunk *c : pSec->chunks)
1161       sec->addChunk(c);
1162 
1163     sec->addContributingPartialSection(pSec);
1164   }
1165 
1166   if (ctx.hybridSymtab) {
1167     if (OutputSection *sec = findSection(".CRT"))
1168       sec->splitECChunks();
1169   }
1170 
1171   // Finally, move some output sections to the end.
1172   auto sectionOrder = [&](const OutputSection *s) {
1173     // Move DISCARDABLE (or non-memory-mapped) sections to the end of file
1174     // because the loader cannot handle holes. Stripping can remove other
1175     // discardable ones than .reloc, which is first of them (created early).
1176     if (s->header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE) {
1177       // Move discardable sections named .debug_ to the end, after other
1178       // discardable sections. Stripping only removes the sections named
1179       // .debug_* - thus try to avoid leaving holes after stripping.
1180       if (s->name.starts_with(".debug_"))
1181         return 3;
1182       return 2;
1183     }
1184     // .rsrc should come at the end of the non-discardable sections because its
1185     // size may change by the Win32 UpdateResources() function, causing
1186     // subsequent sections to move (see https://crbug.com/827082).
1187     if (s == rsrcSec)
1188       return 1;
1189     return 0;
1190   };
1191   llvm::stable_sort(ctx.outputSections,
1192                     [&](const OutputSection *s, const OutputSection *t) {
1193                       return sectionOrder(s) < sectionOrder(t);
1194                     });
1195 }
1196 
createMiscChunks()1197 void Writer::createMiscChunks() {
1198   llvm::TimeTraceScope timeScope("Misc chunks");
1199   Configuration *config = &ctx.config;
1200 
1201   for (MergeChunk *p : ctx.mergeChunkInstances) {
1202     if (p) {
1203       p->finalizeContents();
1204       rdataSec->addChunk(p);
1205     }
1206   }
1207 
1208   // Create thunks for locally-dllimported symbols.
1209   ctx.forEachSymtab([&](SymbolTable &symtab) {
1210     if (!symtab.localImportChunks.empty()) {
1211       for (Chunk *c : symtab.localImportChunks)
1212         rdataSec->addChunk(c);
1213     }
1214   });
1215 
1216   // Create Debug Information Chunks
1217   debugInfoSec = config->mingw ? buildidSec : rdataSec;
1218   if (config->buildIDHash != BuildIDHash::None || config->debug ||
1219       config->repro || config->cetCompat) {
1220     debugDirectory =
1221         make<DebugDirectoryChunk>(ctx, debugRecords, config->repro);
1222     debugDirectory->setAlignment(4);
1223     debugInfoSec->addChunk(debugDirectory);
1224   }
1225 
1226   if (config->debug || config->buildIDHash != BuildIDHash::None) {
1227     // Make a CVDebugRecordChunk even when /DEBUG:CV is not specified.  We
1228     // output a PDB no matter what, and this chunk provides the only means of
1229     // allowing a debugger to match a PDB and an executable.  So we need it even
1230     // if we're ultimately not going to write CodeView data to the PDB.
1231     buildId = make<CVDebugRecordChunk>(ctx);
1232     debugRecords.emplace_back(COFF::IMAGE_DEBUG_TYPE_CODEVIEW, buildId);
1233     ctx.forEachSymtab([&](SymbolTable &symtab) {
1234       if (Symbol *buildidSym = symtab.findUnderscore("__buildid"))
1235         replaceSymbol<DefinedSynthetic>(buildidSym, buildidSym->getName(),
1236                                         buildId, 4);
1237     });
1238   }
1239 
1240   if (config->cetCompat) {
1241     debugRecords.emplace_back(COFF::IMAGE_DEBUG_TYPE_EX_DLLCHARACTERISTICS,
1242                               make<ExtendedDllCharacteristicsChunk>(
1243                                   IMAGE_DLL_CHARACTERISTICS_EX_CET_COMPAT));
1244   }
1245 
1246   // Align and add each chunk referenced by the debug data directory.
1247   for (std::pair<COFF::DebugType, Chunk *> r : debugRecords) {
1248     r.second->setAlignment(4);
1249     debugInfoSec->addChunk(r.second);
1250   }
1251 
1252   // Create SEH table. x86-only.
1253   if (config->safeSEH)
1254     createSEHTable();
1255 
1256   // Create /guard:cf tables if requested.
1257   createGuardCFTables();
1258 
1259   createECChunks();
1260 
1261   if (config->autoImport)
1262     createRuntimePseudoRelocs();
1263 
1264   if (config->mingw) {
1265     insertCtorDtorSymbols();
1266     insertBssDataStartEndSymbols();
1267   }
1268 }
1269 
1270 // Create .idata section for the DLL-imported symbol table.
1271 // The format of this section is inherently Windows-specific.
1272 // IdataContents class abstracted away the details for us,
1273 // so we just let it create chunks and add them to the section.
createImportTables()1274 void Writer::createImportTables() {
1275   llvm::TimeTraceScope timeScope("Import tables");
1276   // Initialize DLLOrder so that import entries are ordered in
1277   // the same order as in the command line. (That affects DLL
1278   // initialization order, and this ordering is MSVC-compatible.)
1279   for (ImportFile *file : ctx.importFileInstances) {
1280     if (!file->live)
1281       continue;
1282 
1283     std::string dll = StringRef(file->dllName).lower();
1284     ctx.config.dllOrder.try_emplace(dll, ctx.config.dllOrder.size());
1285 
1286     if (file->impSym && !isa<DefinedImportData>(file->impSym))
1287       Fatal(ctx) << file->symtab.printSymbol(file->impSym) << " was replaced";
1288     DefinedImportData *impSym = cast_or_null<DefinedImportData>(file->impSym);
1289     if (ctx.config.delayLoads.count(StringRef(file->dllName).lower())) {
1290       if (!file->thunkSym)
1291         Fatal(ctx) << "cannot delay-load " << toString(file)
1292                    << " due to import of data: "
1293                    << file->symtab.printSymbol(impSym);
1294       delayIdata.add(impSym);
1295     } else {
1296       idata.add(impSym);
1297     }
1298   }
1299 }
1300 
appendImportThunks()1301 void Writer::appendImportThunks() {
1302   if (ctx.importFileInstances.empty())
1303     return;
1304 
1305   llvm::TimeTraceScope timeScope("Import thunks");
1306   for (ImportFile *file : ctx.importFileInstances) {
1307     if (!file->live)
1308       continue;
1309 
1310     if (file->thunkSym) {
1311       if (!isa<DefinedImportThunk>(file->thunkSym))
1312         Fatal(ctx) << file->symtab.printSymbol(file->thunkSym)
1313                    << " was replaced";
1314       auto *chunk = cast<DefinedImportThunk>(file->thunkSym)->getChunk();
1315       if (chunk->live)
1316         textSec->addChunk(chunk);
1317     }
1318 
1319     if (file->auxThunkSym) {
1320       if (!isa<DefinedImportThunk>(file->auxThunkSym))
1321         Fatal(ctx) << file->symtab.printSymbol(file->auxThunkSym)
1322                    << " was replaced";
1323       auto *chunk = cast<DefinedImportThunk>(file->auxThunkSym)->getChunk();
1324       if (chunk->live)
1325         textSec->addChunk(chunk);
1326     }
1327 
1328     if (file->impchkThunk)
1329       textSec->addChunk(file->impchkThunk);
1330   }
1331 
1332   if (!delayIdata.empty()) {
1333     delayIdata.create();
1334     for (Chunk *c : delayIdata.getChunks())
1335       didatSec->addChunk(c);
1336     for (Chunk *c : delayIdata.getDataChunks())
1337       dataSec->addChunk(c);
1338     for (Chunk *c : delayIdata.getCodeChunks())
1339       textSec->addChunk(c);
1340     for (Chunk *c : delayIdata.getCodePData())
1341       pdataSec->addChunk(c);
1342     for (Chunk *c : delayIdata.getAuxIatCopy())
1343       rdataSec->addChunk(c);
1344     for (Chunk *c : delayIdata.getCodeUnwindInfo())
1345       rdataSec->addChunk(c);
1346   }
1347 }
1348 
createExportTable()1349 void Writer::createExportTable() {
1350   llvm::TimeTraceScope timeScope("Export table");
1351   if (!edataSec->chunks.empty()) {
1352     // Allow using a custom built export table from input object files, instead
1353     // of having the linker synthesize the tables.
1354     if (!ctx.hybridSymtab) {
1355       ctx.symtab.edataStart = edataSec->chunks.front();
1356       ctx.symtab.edataEnd = edataSec->chunks.back();
1357     } else {
1358       // On hybrid target, split EC and native chunks.
1359       llvm::stable_sort(edataSec->chunks, [=](const Chunk *a, const Chunk *b) {
1360         return (a->getMachine() != ARM64) < (b->getMachine() != ARM64);
1361       });
1362 
1363       for (auto chunk : edataSec->chunks) {
1364         if (chunk->getMachine() != ARM64) {
1365           ctx.symtab.edataStart = chunk;
1366           ctx.symtab.edataEnd = edataSec->chunks.back();
1367           break;
1368         }
1369 
1370         if (!ctx.hybridSymtab->edataStart)
1371           ctx.hybridSymtab->edataStart = chunk;
1372         ctx.hybridSymtab->edataEnd = chunk;
1373       }
1374     }
1375   }
1376   ctx.forEachActiveSymtab([&](SymbolTable &symtab) {
1377     if (symtab.edataStart) {
1378       if (symtab.hadExplicitExports)
1379         Warn(ctx) << "literal .edata sections override exports";
1380     } else if (!symtab.exports.empty()) {
1381       std::vector<Chunk *> edataChunks;
1382       createEdataChunks(symtab, edataChunks);
1383       for (Chunk *c : edataChunks)
1384         edataSec->addChunk(c);
1385       symtab.edataStart = edataChunks.front();
1386       symtab.edataEnd = edataChunks.back();
1387     }
1388 
1389     // Warn on exported deleting destructor.
1390     for (auto e : symtab.exports)
1391       if (e.sym && e.sym->getName().starts_with("??_G"))
1392         Warn(ctx) << "export of deleting dtor: " << toString(ctx, *e.sym);
1393   });
1394 }
1395 
removeUnusedSections()1396 void Writer::removeUnusedSections() {
1397   llvm::TimeTraceScope timeScope("Remove unused sections");
1398   // Remove sections that we can be sure won't get content, to avoid
1399   // allocating space for their section headers.
1400   auto isUnused = [this](OutputSection *s) {
1401     if (s == relocSec)
1402       return false; // This section is populated later.
1403     // MergeChunks have zero size at this point, as their size is finalized
1404     // later. Only remove sections that have no Chunks at all.
1405     return s->chunks.empty();
1406   };
1407   llvm::erase_if(ctx.outputSections, isUnused);
1408 }
1409 
1410 // The Windows loader doesn't seem to like empty sections,
1411 // so we remove them if any.
removeEmptySections()1412 void Writer::removeEmptySections() {
1413   llvm::TimeTraceScope timeScope("Remove empty sections");
1414   auto isEmpty = [](OutputSection *s) { return s->getVirtualSize() == 0; };
1415   llvm::erase_if(ctx.outputSections, isEmpty);
1416 }
1417 
assignOutputSectionIndices()1418 void Writer::assignOutputSectionIndices() {
1419   llvm::TimeTraceScope timeScope("Output sections indices");
1420   // Assign final output section indices, and assign each chunk to its output
1421   // section.
1422   uint32_t idx = 1;
1423   for (OutputSection *os : ctx.outputSections) {
1424     os->sectionIndex = idx;
1425     for (Chunk *c : os->chunks)
1426       c->setOutputSectionIdx(idx);
1427     ++idx;
1428   }
1429 
1430   // Merge chunks are containers of chunks, so assign those an output section
1431   // too.
1432   for (MergeChunk *mc : ctx.mergeChunkInstances)
1433     if (mc)
1434       for (SectionChunk *sc : mc->sections)
1435         if (sc && sc->live)
1436           sc->setOutputSectionIdx(mc->getOutputSectionIdx());
1437 }
1438 
createSymbol(Defined * def)1439 std::optional<coff_symbol16> Writer::createSymbol(Defined *def) {
1440   coff_symbol16 sym;
1441   switch (def->kind()) {
1442   case Symbol::DefinedAbsoluteKind: {
1443     auto *da = dyn_cast<DefinedAbsolute>(def);
1444     // Note: COFF symbol can only store 32-bit values, so 64-bit absolute
1445     // values will be truncated.
1446     sym.Value = da->getVA();
1447     sym.SectionNumber = IMAGE_SYM_ABSOLUTE;
1448     break;
1449   }
1450   default: {
1451     // Don't write symbols that won't be written to the output to the symbol
1452     // table.
1453     // We also try to write DefinedSynthetic as a normal symbol. Some of these
1454     // symbols do point to an actual chunk, like __safe_se_handler_table. Others
1455     // like __ImageBase are outside of sections and thus cannot be represented.
1456     Chunk *c = def->getChunk();
1457     if (!c)
1458       return std::nullopt;
1459     OutputSection *os = ctx.getOutputSection(c);
1460     if (!os)
1461       return std::nullopt;
1462 
1463     sym.Value = def->getRVA() - os->getRVA();
1464     sym.SectionNumber = os->sectionIndex;
1465     break;
1466   }
1467   }
1468 
1469   // Symbols that are runtime pseudo relocations don't point to the actual
1470   // symbol data itself (as they are imported), but points to the IAT entry
1471   // instead. Avoid emitting them to the symbol table, as they can confuse
1472   // debuggers.
1473   if (def->isRuntimePseudoReloc)
1474     return std::nullopt;
1475 
1476   StringRef name = def->getName();
1477   if (name.size() > COFF::NameSize) {
1478     sym.Name.Offset.Zeroes = 0;
1479     sym.Name.Offset.Offset = 0; // Filled in later.
1480     strtab.add(name);
1481   } else {
1482     memset(sym.Name.ShortName, 0, COFF::NameSize);
1483     memcpy(sym.Name.ShortName, name.data(), name.size());
1484   }
1485 
1486   if (auto *d = dyn_cast<DefinedCOFF>(def)) {
1487     COFFSymbolRef ref = d->getCOFFSymbol();
1488     sym.Type = ref.getType();
1489     sym.StorageClass = ref.getStorageClass();
1490   } else if (def->kind() == Symbol::DefinedImportThunkKind) {
1491     sym.Type = (IMAGE_SYM_DTYPE_FUNCTION << SCT_COMPLEX_TYPE_SHIFT) |
1492                IMAGE_SYM_TYPE_NULL;
1493     sym.StorageClass = IMAGE_SYM_CLASS_EXTERNAL;
1494   } else {
1495     sym.Type = IMAGE_SYM_TYPE_NULL;
1496     sym.StorageClass = IMAGE_SYM_CLASS_EXTERNAL;
1497   }
1498   sym.NumberOfAuxSymbols = 0;
1499   return sym;
1500 }
1501 
createSymbolAndStringTable()1502 void Writer::createSymbolAndStringTable() {
1503   llvm::TimeTraceScope timeScope("Symbol and string table");
1504   // PE/COFF images are limited to 8 byte section names. Longer names can be
1505   // supported by writing a non-standard string table, but this string table is
1506   // not mapped at runtime and the long names will therefore be inaccessible.
1507   // link.exe always truncates section names to 8 bytes, whereas binutils always
1508   // preserves long section names via the string table. LLD adopts a hybrid
1509   // solution where discardable sections have long names preserved and
1510   // non-discardable sections have their names truncated, to ensure that any
1511   // section which is mapped at runtime also has its name mapped at runtime.
1512   SmallVector<OutputSection *> longNameSections;
1513   for (OutputSection *sec : ctx.outputSections) {
1514     if (sec->name.size() <= COFF::NameSize)
1515       continue;
1516     if ((sec->header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE) == 0)
1517       continue;
1518     if (ctx.config.warnLongSectionNames) {
1519       Warn(ctx)
1520           << "section name " << sec->name
1521           << " is longer than 8 characters and will use a non-standard string "
1522              "table";
1523     }
1524     // Put the section name in the begin of strtab so that its offset is less
1525     // than Max7DecimalOffset otherwise lldb/gdb will not read it.
1526     strtab.add(sec->name, /*Priority=*/UINT8_MAX);
1527     longNameSections.push_back(sec);
1528   }
1529 
1530   std::vector<std::pair<size_t, StringRef>> longNameSymbols;
1531   if (ctx.config.writeSymtab) {
1532     for (ObjFile *file : ctx.objFileInstances) {
1533       for (Symbol *b : file->getSymbols()) {
1534         auto *d = dyn_cast_or_null<Defined>(b);
1535         if (!d || d->writtenToSymtab)
1536           continue;
1537         d->writtenToSymtab = true;
1538         if (auto *dc = dyn_cast_or_null<DefinedCOFF>(d)) {
1539           COFFSymbolRef symRef = dc->getCOFFSymbol();
1540           if (symRef.isSectionDefinition() ||
1541               symRef.getStorageClass() == COFF::IMAGE_SYM_CLASS_LABEL)
1542             continue;
1543         }
1544 
1545         if (std::optional<coff_symbol16> sym = createSymbol(d)) {
1546           if (d->getName().size() > COFF::NameSize)
1547             longNameSymbols.emplace_back(outputSymtab.size(), d->getName());
1548           outputSymtab.push_back(*sym);
1549         }
1550 
1551         if (auto *dthunk = dyn_cast<DefinedImportThunk>(d)) {
1552           if (!dthunk->wrappedSym->writtenToSymtab) {
1553             dthunk->wrappedSym->writtenToSymtab = true;
1554             if (std::optional<coff_symbol16> sym =
1555                     createSymbol(dthunk->wrappedSym)) {
1556               if (dthunk->wrappedSym->getName().size() > COFF::NameSize)
1557                 longNameSymbols.emplace_back(outputSymtab.size(),
1558                                              dthunk->wrappedSym->getName());
1559               outputSymtab.push_back(*sym);
1560             }
1561           }
1562         }
1563       }
1564     }
1565   }
1566 
1567   if (outputSymtab.empty() && strtab.empty())
1568     return;
1569 
1570   strtab.finalize();
1571   for (OutputSection *sec : longNameSections)
1572     sec->setStringTableOff(strtab.getOffset(sec->name));
1573   for (auto P : longNameSymbols) {
1574     coff_symbol16 &sym = outputSymtab[P.first];
1575     sym.Name.Offset.Offset = strtab.getOffset(P.second);
1576   }
1577 
1578   // We position the symbol table to be adjacent to the end of the last section.
1579   uint64_t fileOff = fileSize;
1580   pointerToSymbolTable = fileOff;
1581   fileOff += outputSymtab.size() * sizeof(coff_symbol16);
1582   fileOff += strtab.getSize();
1583   fileSize = alignTo(fileOff, ctx.config.fileAlign);
1584 }
1585 
mergeSection(const std::map<StringRef,StringRef>::value_type & p)1586 void Writer::mergeSection(const std::map<StringRef, StringRef>::value_type &p) {
1587   StringRef toName = p.second;
1588   if (p.first == toName)
1589     return;
1590   StringSet<> names;
1591   while (true) {
1592     if (!names.insert(toName).second)
1593       Fatal(ctx) << "/merge: cycle found for section '" << p.first << "'";
1594     auto i = ctx.config.merge.find(toName);
1595     if (i == ctx.config.merge.end())
1596       break;
1597     toName = i->second;
1598   }
1599   OutputSection *from = findSection(p.first);
1600   OutputSection *to = findSection(toName);
1601   if (!from)
1602     return;
1603   if (!to) {
1604     from->name = toName;
1605     return;
1606   }
1607   to->merge(from);
1608 }
1609 
mergeSections()1610 void Writer::mergeSections() {
1611   llvm::TimeTraceScope timeScope("Merge sections");
1612   if (!pdataSec->chunks.empty()) {
1613     if (isArm64EC(ctx.config.machine)) {
1614       // On ARM64EC .pdata may contain both ARM64 and X64 data. Split them by
1615       // sorting and store their regions separately.
1616       llvm::stable_sort(pdataSec->chunks, [=](const Chunk *a, const Chunk *b) {
1617         return (a->getMachine() == AMD64) < (b->getMachine() == AMD64);
1618       });
1619 
1620       for (auto chunk : pdataSec->chunks) {
1621         if (chunk->getMachine() == AMD64) {
1622           hybridPdata.first = chunk;
1623           hybridPdata.last = pdataSec->chunks.back();
1624           break;
1625         }
1626 
1627         if (!pdata.first)
1628           pdata.first = chunk;
1629         pdata.last = chunk;
1630       }
1631     } else {
1632       pdata.first = pdataSec->chunks.front();
1633       pdata.last = pdataSec->chunks.back();
1634     }
1635   }
1636 
1637   for (auto &p : ctx.config.merge) {
1638     if (p.first != ".bss")
1639       mergeSection(p);
1640   }
1641 
1642   // Because .bss contains all zeros, it should be merged at the end of
1643   // whatever section it is being merged into (usually .data) so that the image
1644   // need not actually contain all of the zeros.
1645   auto it = ctx.config.merge.find(".bss");
1646   if (it != ctx.config.merge.end())
1647     mergeSection(*it);
1648 }
1649 
1650 // EC targets may have chunks of various architectures mixed together at this
1651 // point. Group code chunks of the same architecture together by sorting chunks
1652 // by their EC range type.
sortECChunks()1653 void Writer::sortECChunks() {
1654   if (!isArm64EC(ctx.config.machine))
1655     return;
1656 
1657   for (OutputSection *sec : ctx.outputSections) {
1658     if (sec->isCodeSection())
1659       llvm::stable_sort(sec->chunks, [=](const Chunk *a, const Chunk *b) {
1660         std::optional<chpe_range_type> aType = a->getArm64ECRangeType(),
1661                                        bType = b->getArm64ECRangeType();
1662         return bType && (!aType || *aType < *bType);
1663       });
1664   }
1665 }
1666 
1667 // Visits all sections to assign incremental, non-overlapping RVAs and
1668 // file offsets.
assignAddresses()1669 void Writer::assignAddresses() {
1670   llvm::TimeTraceScope timeScope("Assign addresses");
1671   Configuration *config = &ctx.config;
1672 
1673   // We need to create EC code map so that ECCodeMapChunk knows its size.
1674   // We do it here to make sure that we account for range extension chunks.
1675   createECCodeMap();
1676 
1677   sizeOfHeaders = dosStubSize + sizeof(PEMagic) + sizeof(coff_file_header) +
1678                   sizeof(data_directory) * numberOfDataDirectory +
1679                   sizeof(coff_section) * ctx.outputSections.size();
1680   sizeOfHeaders +=
1681       config->is64() ? sizeof(pe32plus_header) : sizeof(pe32_header);
1682   sizeOfHeaders = alignTo(sizeOfHeaders, config->fileAlign);
1683   fileSize = sizeOfHeaders;
1684 
1685   // The first page is kept unmapped.
1686   uint64_t rva = alignTo(sizeOfHeaders, config->align);
1687 
1688   for (OutputSection *sec : ctx.outputSections) {
1689     llvm::TimeTraceScope timeScope("Section: ", sec->name);
1690     if (sec == relocSec) {
1691       sec->chunks.clear();
1692       addBaserels();
1693       if (ctx.dynamicRelocs) {
1694         ctx.dynamicRelocs->finalize();
1695         relocSec->addChunk(ctx.dynamicRelocs);
1696       }
1697     }
1698     uint64_t rawSize = 0, virtualSize = 0;
1699     sec->header.VirtualAddress = rva;
1700 
1701     // If /FUNCTIONPADMIN is used, functions are padded in order to create a
1702     // hotpatchable image.
1703     uint32_t padding = sec->isCodeSection() ? config->functionPadMin : 0;
1704     std::optional<chpe_range_type> prevECRange;
1705 
1706     for (Chunk *c : sec->chunks) {
1707       // Alignment EC code range baudaries.
1708       if (isArm64EC(ctx.config.machine) && sec->isCodeSection()) {
1709         std::optional<chpe_range_type> rangeType = c->getArm64ECRangeType();
1710         if (rangeType != prevECRange) {
1711           virtualSize = alignTo(virtualSize, 4096);
1712           prevECRange = rangeType;
1713         }
1714       }
1715       if (padding && c->isHotPatchable())
1716         virtualSize += padding;
1717       // If chunk has EC entry thunk, reserve a space for an offset to the
1718       // thunk.
1719       if (c->getEntryThunk())
1720         virtualSize += sizeof(uint32_t);
1721       virtualSize = alignTo(virtualSize, c->getAlignment());
1722       c->setRVA(rva + virtualSize);
1723       virtualSize += c->getSize();
1724       if (c->hasData)
1725         rawSize = alignTo(virtualSize, config->fileAlign);
1726     }
1727     if (virtualSize > UINT32_MAX)
1728       Err(ctx) << "section larger than 4 GiB: " << sec->name;
1729     sec->header.VirtualSize = virtualSize;
1730     sec->header.SizeOfRawData = rawSize;
1731     if (rawSize != 0)
1732       sec->header.PointerToRawData = fileSize;
1733     rva += alignTo(virtualSize, config->align);
1734     fileSize += alignTo(rawSize, config->fileAlign);
1735   }
1736   sizeOfImage = alignTo(rva, config->align);
1737 
1738   // Assign addresses to sections in MergeChunks.
1739   for (MergeChunk *mc : ctx.mergeChunkInstances)
1740     if (mc)
1741       mc->assignSubsectionRVAs();
1742 }
1743 
writeHeader()1744 template <typename PEHeaderTy> void Writer::writeHeader() {
1745   // Write DOS header. For backwards compatibility, the first part of a PE/COFF
1746   // executable consists of an MS-DOS MZ executable. If the executable is run
1747   // under DOS, that program gets run (usually to just print an error message).
1748   // When run under Windows, the loader looks at AddressOfNewExeHeader and uses
1749   // the PE header instead.
1750   Configuration *config = &ctx.config;
1751 
1752   uint8_t *buf = buffer->getBufferStart();
1753   auto *dos = reinterpret_cast<dos_header *>(buf);
1754 
1755   // Write DOS program.
1756   if (config->dosStub) {
1757     memcpy(buf, config->dosStub->getBufferStart(),
1758            config->dosStub->getBufferSize());
1759     // MS link.exe accepts an invalid `e_lfanew` (AddressOfNewExeHeader) and
1760     // updates it automatically. Replicate the same behaviour.
1761     dos->AddressOfNewExeHeader = alignTo(config->dosStub->getBufferSize(), 8);
1762     // Unlike MS link.exe, LLD accepts non-8-byte-aligned stubs.
1763     // In that case, we add zero paddings ourselves.
1764     buf += alignTo(config->dosStub->getBufferSize(), 8);
1765   } else {
1766     buf += sizeof(dos_header);
1767     dos->Magic[0] = 'M';
1768     dos->Magic[1] = 'Z';
1769     dos->UsedBytesInTheLastPage = dosStubSize % 512;
1770     dos->FileSizeInPages = divideCeil(dosStubSize, 512);
1771     dos->HeaderSizeInParagraphs = sizeof(dos_header) / 16;
1772 
1773     dos->AddressOfRelocationTable = sizeof(dos_header);
1774     dos->AddressOfNewExeHeader = dosStubSize;
1775 
1776     memcpy(buf, dosProgram, sizeof(dosProgram));
1777     buf += sizeof(dosProgram);
1778   }
1779 
1780   // Make sure DOS stub is aligned to 8 bytes at this point
1781   assert((buf - buffer->getBufferStart()) % 8 == 0);
1782 
1783   // Write PE magic
1784   memcpy(buf, PEMagic, sizeof(PEMagic));
1785   buf += sizeof(PEMagic);
1786 
1787   // Write COFF header
1788   assert(coffHeaderOffset == buf - buffer->getBufferStart());
1789   auto *coff = reinterpret_cast<coff_file_header *>(buf);
1790   buf += sizeof(*coff);
1791   SymbolTable &symtab =
1792       ctx.config.machine == ARM64X ? *ctx.hybridSymtab : ctx.symtab;
1793   coff->Machine = symtab.isEC() ? AMD64 : symtab.machine;
1794   coff->NumberOfSections = ctx.outputSections.size();
1795   coff->Characteristics = IMAGE_FILE_EXECUTABLE_IMAGE;
1796   if (config->largeAddressAware)
1797     coff->Characteristics |= IMAGE_FILE_LARGE_ADDRESS_AWARE;
1798   if (!config->is64())
1799     coff->Characteristics |= IMAGE_FILE_32BIT_MACHINE;
1800   if (config->dll)
1801     coff->Characteristics |= IMAGE_FILE_DLL;
1802   if (config->driverUponly)
1803     coff->Characteristics |= IMAGE_FILE_UP_SYSTEM_ONLY;
1804   if (!config->relocatable)
1805     coff->Characteristics |= IMAGE_FILE_RELOCS_STRIPPED;
1806   if (config->swaprunCD)
1807     coff->Characteristics |= IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP;
1808   if (config->swaprunNet)
1809     coff->Characteristics |= IMAGE_FILE_NET_RUN_FROM_SWAP;
1810   coff->SizeOfOptionalHeader =
1811       sizeof(PEHeaderTy) + sizeof(data_directory) * numberOfDataDirectory;
1812 
1813   // Write PE header
1814   assert(peHeaderOffset == buf - buffer->getBufferStart());
1815   auto *pe = reinterpret_cast<PEHeaderTy *>(buf);
1816   buf += sizeof(*pe);
1817   pe->Magic = config->is64() ? PE32Header::PE32_PLUS : PE32Header::PE32;
1818 
1819   // If {Major,Minor}LinkerVersion is left at 0.0, then for some
1820   // reason signing the resulting PE file with Authenticode produces a
1821   // signature that fails to validate on Windows 7 (but is OK on 10).
1822   // Set it to 14.0, which is what VS2015 outputs, and which avoids
1823   // that problem.
1824   pe->MajorLinkerVersion = 14;
1825   pe->MinorLinkerVersion = 0;
1826 
1827   pe->ImageBase = config->imageBase;
1828   pe->SectionAlignment = config->align;
1829   pe->FileAlignment = config->fileAlign;
1830   pe->MajorImageVersion = config->majorImageVersion;
1831   pe->MinorImageVersion = config->minorImageVersion;
1832   pe->MajorOperatingSystemVersion = config->majorOSVersion;
1833   pe->MinorOperatingSystemVersion = config->minorOSVersion;
1834   pe->MajorSubsystemVersion = config->majorSubsystemVersion;
1835   pe->MinorSubsystemVersion = config->minorSubsystemVersion;
1836   pe->Subsystem = config->subsystem;
1837   pe->SizeOfImage = sizeOfImage;
1838   pe->SizeOfHeaders = sizeOfHeaders;
1839   if (!config->noEntry) {
1840     Defined *entry = cast<Defined>(symtab.entry);
1841     pe->AddressOfEntryPoint = entry->getRVA();
1842     // Pointer to thumb code must have the LSB set, so adjust it.
1843     if (config->machine == ARMNT)
1844       pe->AddressOfEntryPoint |= 1;
1845   }
1846   pe->SizeOfStackReserve = config->stackReserve;
1847   pe->SizeOfStackCommit = config->stackCommit;
1848   pe->SizeOfHeapReserve = config->heapReserve;
1849   pe->SizeOfHeapCommit = config->heapCommit;
1850   if (config->appContainer)
1851     pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_APPCONTAINER;
1852   if (config->driverWdm)
1853     pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_WDM_DRIVER;
1854   if (config->dynamicBase)
1855     pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE;
1856   if (config->highEntropyVA)
1857     pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_HIGH_ENTROPY_VA;
1858   if (!config->allowBind)
1859     pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NO_BIND;
1860   if (config->nxCompat)
1861     pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NX_COMPAT;
1862   if (!config->allowIsolation)
1863     pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NO_ISOLATION;
1864   if (config->guardCF != GuardCFLevel::Off)
1865     pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_GUARD_CF;
1866   if (config->integrityCheck)
1867     pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY;
1868   if (setNoSEHCharacteristic || config->noSEH)
1869     pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NO_SEH;
1870   if (config->terminalServerAware)
1871     pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE;
1872   pe->NumberOfRvaAndSize = numberOfDataDirectory;
1873   if (textSec->getVirtualSize()) {
1874     pe->BaseOfCode = textSec->getRVA();
1875     pe->SizeOfCode = textSec->getRawSize();
1876   }
1877   pe->SizeOfInitializedData = getSizeOfInitializedData();
1878 
1879   // Write data directory
1880   assert(!ctx.config.is64() ||
1881          dataDirOffset64 == buf - buffer->getBufferStart());
1882   auto *dir = reinterpret_cast<data_directory *>(buf);
1883   buf += sizeof(*dir) * numberOfDataDirectory;
1884   if (symtab.edataStart) {
1885     dir[EXPORT_TABLE].RelativeVirtualAddress = symtab.edataStart->getRVA();
1886     dir[EXPORT_TABLE].Size = symtab.edataEnd->getRVA() +
1887                              symtab.edataEnd->getSize() -
1888                              symtab.edataStart->getRVA();
1889   }
1890   if (importTableStart) {
1891     dir[IMPORT_TABLE].RelativeVirtualAddress = importTableStart->getRVA();
1892     dir[IMPORT_TABLE].Size = importTableSize;
1893   }
1894   if (iatStart) {
1895     dir[IAT].RelativeVirtualAddress = iatStart->getRVA();
1896     dir[IAT].Size = iatSize;
1897   }
1898   if (rsrcSec->getVirtualSize()) {
1899     dir[RESOURCE_TABLE].RelativeVirtualAddress = rsrcSec->getRVA();
1900     dir[RESOURCE_TABLE].Size = rsrcSec->getVirtualSize();
1901   }
1902   // ARM64EC (but not ARM64X) contains x86_64 exception table in data directory.
1903   ChunkRange &exceptionTable =
1904       ctx.config.machine == ARM64EC ? hybridPdata : pdata;
1905   if (exceptionTable.first) {
1906     dir[EXCEPTION_TABLE].RelativeVirtualAddress =
1907         exceptionTable.first->getRVA();
1908     dir[EXCEPTION_TABLE].Size = exceptionTable.last->getRVA() +
1909                                 exceptionTable.last->getSize() -
1910                                 exceptionTable.first->getRVA();
1911   }
1912   size_t relocSize = relocSec->getVirtualSize();
1913   if (ctx.dynamicRelocs)
1914     relocSize -= ctx.dynamicRelocs->getSize();
1915   if (relocSize) {
1916     dir[BASE_RELOCATION_TABLE].RelativeVirtualAddress = relocSec->getRVA();
1917     dir[BASE_RELOCATION_TABLE].Size = relocSize;
1918   }
1919   if (Symbol *sym = symtab.findUnderscore("_tls_used")) {
1920     if (Defined *b = dyn_cast<Defined>(sym)) {
1921       dir[TLS_TABLE].RelativeVirtualAddress = b->getRVA();
1922       dir[TLS_TABLE].Size = config->is64()
1923                                 ? sizeof(object::coff_tls_directory64)
1924                                 : sizeof(object::coff_tls_directory32);
1925     }
1926   }
1927   if (debugDirectory) {
1928     dir[DEBUG_DIRECTORY].RelativeVirtualAddress = debugDirectory->getRVA();
1929     dir[DEBUG_DIRECTORY].Size = debugDirectory->getSize();
1930   }
1931   if (symtab.loadConfigSym) {
1932     dir[LOAD_CONFIG_TABLE].RelativeVirtualAddress =
1933         symtab.loadConfigSym->getRVA();
1934     dir[LOAD_CONFIG_TABLE].Size = symtab.loadConfigSize;
1935   }
1936   if (!delayIdata.empty()) {
1937     dir[DELAY_IMPORT_DESCRIPTOR].RelativeVirtualAddress =
1938         delayIdata.getDirRVA();
1939     dir[DELAY_IMPORT_DESCRIPTOR].Size = delayIdata.getDirSize();
1940   }
1941 
1942   // Write section table
1943   for (OutputSection *sec : ctx.outputSections) {
1944     sec->writeHeaderTo(buf, config->debug);
1945     buf += sizeof(coff_section);
1946   }
1947   sectionTable = ArrayRef<uint8_t>(
1948       buf - ctx.outputSections.size() * sizeof(coff_section), buf);
1949 
1950   if (outputSymtab.empty() && strtab.empty())
1951     return;
1952 
1953   coff->PointerToSymbolTable = pointerToSymbolTable;
1954   uint32_t numberOfSymbols = outputSymtab.size();
1955   coff->NumberOfSymbols = numberOfSymbols;
1956   auto *symbolTable = reinterpret_cast<coff_symbol16 *>(
1957       buffer->getBufferStart() + coff->PointerToSymbolTable);
1958   for (size_t i = 0; i != numberOfSymbols; ++i)
1959     symbolTable[i] = outputSymtab[i];
1960   // Create the string table, it follows immediately after the symbol table.
1961   // The first 4 bytes is length including itself.
1962   buf = reinterpret_cast<uint8_t *>(&symbolTable[numberOfSymbols]);
1963   strtab.write(buf);
1964 }
1965 
openFile(StringRef path)1966 void Writer::openFile(StringRef path) {
1967   buffer = CHECK(
1968       FileOutputBuffer::create(path, fileSize, FileOutputBuffer::F_executable),
1969       "failed to open " + path);
1970 }
1971 
createSEHTable()1972 void Writer::createSEHTable() {
1973   SymbolRVASet handlers;
1974   for (ObjFile *file : ctx.objFileInstances) {
1975     if (!file->hasSafeSEH())
1976       Err(ctx) << "/safeseh: " << file->getName()
1977                << " is not compatible with SEH";
1978     markSymbolsForRVATable(file, file->getSXDataChunks(), handlers);
1979   }
1980 
1981   // Set the "no SEH" characteristic if there really were no handlers, or if
1982   // there is no load config object to point to the table of handlers.
1983   setNoSEHCharacteristic =
1984       handlers.empty() || !ctx.symtab.findUnderscore("_load_config_used");
1985 
1986   maybeAddRVATable(std::move(handlers), "__safe_se_handler_table",
1987                    "__safe_se_handler_count");
1988 }
1989 
1990 // Add a symbol to an RVA set. Two symbols may have the same RVA, but an RVA set
1991 // cannot contain duplicates. Therefore, the set is uniqued by Chunk and the
1992 // symbol's offset into that Chunk.
addSymbolToRVASet(SymbolRVASet & rvaSet,Defined * s)1993 static void addSymbolToRVASet(SymbolRVASet &rvaSet, Defined *s) {
1994   Chunk *c = s->getChunk();
1995   if (!c)
1996     return;
1997   if (auto *sc = dyn_cast<SectionChunk>(c))
1998     c = sc->repl; // Look through ICF replacement.
1999   uint32_t off = s->getRVA() - (c ? c->getRVA() : 0);
2000   rvaSet.insert({c, off});
2001 }
2002 
2003 // Given a symbol, add it to the GFIDs table if it is a live, defined, function
2004 // symbol in an executable section.
maybeAddAddressTakenFunction(SymbolRVASet & addressTakenSyms,Symbol * s)2005 static void maybeAddAddressTakenFunction(SymbolRVASet &addressTakenSyms,
2006                                          Symbol *s) {
2007   if (!s)
2008     return;
2009 
2010   switch (s->kind()) {
2011   case Symbol::DefinedLocalImportKind:
2012   case Symbol::DefinedImportDataKind:
2013     // Defines an __imp_ pointer, so it is data, so it is ignored.
2014     break;
2015   case Symbol::DefinedCommonKind:
2016     // Common is always data, so it is ignored.
2017     break;
2018   case Symbol::DefinedAbsoluteKind:
2019     // Absolute is never code, synthetic generally isn't and usually isn't
2020     // determinable.
2021     break;
2022   case Symbol::DefinedSyntheticKind:
2023     // For EC export thunks, mark both the thunk itself and its target.
2024     if (auto expChunk = dyn_cast_or_null<ECExportThunkChunk>(
2025             cast<Defined>(s)->getChunk())) {
2026       addSymbolToRVASet(addressTakenSyms, cast<Defined>(s));
2027       addSymbolToRVASet(addressTakenSyms, expChunk->target);
2028     }
2029     break;
2030   case Symbol::LazyArchiveKind:
2031   case Symbol::LazyObjectKind:
2032   case Symbol::LazyDLLSymbolKind:
2033   case Symbol::UndefinedKind:
2034     // Undefined symbols resolve to zero, so they don't have an RVA. Lazy
2035     // symbols shouldn't have relocations.
2036     break;
2037 
2038   case Symbol::DefinedImportThunkKind:
2039     // Thunks are always code, include them.
2040     addSymbolToRVASet(addressTakenSyms, cast<Defined>(s));
2041     break;
2042 
2043   case Symbol::DefinedRegularKind: {
2044     // This is a regular, defined, symbol from a COFF file. Mark the symbol as
2045     // address taken if the symbol type is function and it's in an executable
2046     // section.
2047     auto *d = cast<DefinedRegular>(s);
2048     if (d->getCOFFSymbol().getComplexType() == COFF::IMAGE_SYM_DTYPE_FUNCTION) {
2049       SectionChunk *sc = dyn_cast<SectionChunk>(d->getChunk());
2050       if (sc && sc->live &&
2051           sc->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE)
2052         addSymbolToRVASet(addressTakenSyms, d);
2053     }
2054     break;
2055   }
2056   }
2057 }
2058 
2059 // Visit all relocations from all section contributions of this object file and
2060 // mark the relocation target as address-taken.
markSymbolsWithRelocations(ObjFile * file,SymbolRVASet & usedSymbols)2061 void Writer::markSymbolsWithRelocations(ObjFile *file,
2062                                         SymbolRVASet &usedSymbols) {
2063   for (Chunk *c : file->getChunks()) {
2064     // We only care about live section chunks. Common chunks and other chunks
2065     // don't generally contain relocations.
2066     SectionChunk *sc = dyn_cast<SectionChunk>(c);
2067     if (!sc || !sc->live)
2068       continue;
2069 
2070     for (const coff_relocation &reloc : sc->getRelocs()) {
2071       if (ctx.config.machine == I386 &&
2072           reloc.Type == COFF::IMAGE_REL_I386_REL32)
2073         // Ignore relative relocations on x86. On x86_64 they can't be ignored
2074         // since they're also used to compute absolute addresses.
2075         continue;
2076 
2077       Symbol *ref = sc->file->getSymbol(reloc.SymbolTableIndex);
2078       maybeAddAddressTakenFunction(usedSymbols, ref);
2079     }
2080   }
2081 }
2082 
2083 // Create the guard function id table. This is a table of RVAs of all
2084 // address-taken functions. It is sorted and uniqued, just like the safe SEH
2085 // table.
createGuardCFTables()2086 void Writer::createGuardCFTables() {
2087   Configuration *config = &ctx.config;
2088 
2089   if (config->guardCF == GuardCFLevel::Off) {
2090     // MSVC marks the entire image as instrumented if any input object was built
2091     // with /guard:cf.
2092     for (ObjFile *file : ctx.objFileInstances) {
2093       if (file->hasGuardCF()) {
2094         ctx.forEachSymtab([&](SymbolTable &symtab) {
2095           Symbol *flagSym = symtab.findUnderscore("__guard_flags");
2096           cast<DefinedAbsolute>(flagSym)->setVA(
2097               uint32_t(GuardFlags::CF_INSTRUMENTED));
2098         });
2099         break;
2100       }
2101     }
2102     return;
2103   }
2104 
2105   SymbolRVASet addressTakenSyms;
2106   SymbolRVASet giatsRVASet;
2107   std::vector<Symbol *> giatsSymbols;
2108   SymbolRVASet longJmpTargets;
2109   SymbolRVASet ehContTargets;
2110   for (ObjFile *file : ctx.objFileInstances) {
2111     // If the object was compiled with /guard:cf, the address taken symbols
2112     // are in .gfids$y sections, and the longjmp targets are in .gljmp$y
2113     // sections. If the object was not compiled with /guard:cf, we assume there
2114     // were no setjmp targets, and that all code symbols with relocations are
2115     // possibly address-taken.
2116     if (file->hasGuardCF()) {
2117       markSymbolsForRVATable(file, file->getGuardFidChunks(), addressTakenSyms);
2118       markSymbolsForRVATable(file, file->getGuardIATChunks(), giatsRVASet);
2119       getSymbolsFromSections(file, file->getGuardIATChunks(), giatsSymbols);
2120       markSymbolsForRVATable(file, file->getGuardLJmpChunks(), longJmpTargets);
2121     } else {
2122       markSymbolsWithRelocations(file, addressTakenSyms);
2123     }
2124     // If the object was compiled with /guard:ehcont, the ehcont targets are in
2125     // .gehcont$y sections.
2126     if (file->hasGuardEHCont())
2127       markSymbolsForRVATable(file, file->getGuardEHContChunks(), ehContTargets);
2128   }
2129 
2130   // Mark the image entry as address-taken.
2131   ctx.forEachSymtab([&](SymbolTable &symtab) {
2132     if (symtab.entry)
2133       maybeAddAddressTakenFunction(addressTakenSyms, symtab.entry);
2134 
2135     // Mark exported symbols in executable sections as address-taken.
2136     for (Export &e : symtab.exports)
2137       maybeAddAddressTakenFunction(addressTakenSyms, e.sym);
2138   });
2139 
2140   // For each entry in the .giats table, check if it has a corresponding load
2141   // thunk (e.g. because the DLL that defines it will be delay-loaded) and, if
2142   // so, add the load thunk to the address taken (.gfids) table.
2143   for (Symbol *s : giatsSymbols) {
2144     if (auto *di = dyn_cast<DefinedImportData>(s)) {
2145       if (di->loadThunkSym)
2146         addSymbolToRVASet(addressTakenSyms, di->loadThunkSym);
2147     }
2148   }
2149 
2150   // Ensure sections referenced in the gfid table are 16-byte aligned.
2151   for (const ChunkAndOffset &c : addressTakenSyms)
2152     if (c.inputChunk->getAlignment() < 16)
2153       c.inputChunk->setAlignment(16);
2154 
2155   maybeAddRVATable(std::move(addressTakenSyms), "__guard_fids_table",
2156                    "__guard_fids_count");
2157 
2158   // Add the Guard Address Taken IAT Entry Table (.giats).
2159   maybeAddRVATable(std::move(giatsRVASet), "__guard_iat_table",
2160                    "__guard_iat_count");
2161 
2162   // Add the longjmp target table unless the user told us not to.
2163   if (config->guardCF & GuardCFLevel::LongJmp)
2164     maybeAddRVATable(std::move(longJmpTargets), "__guard_longjmp_table",
2165                      "__guard_longjmp_count");
2166 
2167   // Add the ehcont target table unless the user told us not to.
2168   if (config->guardCF & GuardCFLevel::EHCont)
2169     maybeAddRVATable(std::move(ehContTargets), "__guard_eh_cont_table",
2170                      "__guard_eh_cont_count");
2171 
2172   // Set __guard_flags, which will be used in the load config to indicate that
2173   // /guard:cf was enabled.
2174   uint32_t guardFlags = uint32_t(GuardFlags::CF_INSTRUMENTED) |
2175                         uint32_t(GuardFlags::CF_FUNCTION_TABLE_PRESENT);
2176   if (config->guardCF & GuardCFLevel::LongJmp)
2177     guardFlags |= uint32_t(GuardFlags::CF_LONGJUMP_TABLE_PRESENT);
2178   if (config->guardCF & GuardCFLevel::EHCont)
2179     guardFlags |= uint32_t(GuardFlags::EH_CONTINUATION_TABLE_PRESENT);
2180   ctx.forEachSymtab([guardFlags](SymbolTable &symtab) {
2181     Symbol *flagSym = symtab.findUnderscore("__guard_flags");
2182     cast<DefinedAbsolute>(flagSym)->setVA(guardFlags);
2183   });
2184 }
2185 
2186 // Take a list of input sections containing symbol table indices and add those
2187 // symbols to a vector. The challenge is that symbol RVAs are not known and
2188 // depend on the table size, so we can't directly build a set of integers.
getSymbolsFromSections(ObjFile * file,ArrayRef<SectionChunk * > symIdxChunks,std::vector<Symbol * > & symbols)2189 void Writer::getSymbolsFromSections(ObjFile *file,
2190                                     ArrayRef<SectionChunk *> symIdxChunks,
2191                                     std::vector<Symbol *> &symbols) {
2192   for (SectionChunk *c : symIdxChunks) {
2193     // Skip sections discarded by linker GC. This comes up when a .gfids section
2194     // is associated with something like a vtable and the vtable is discarded.
2195     // In this case, the associated gfids section is discarded, and we don't
2196     // mark the virtual member functions as address-taken by the vtable.
2197     if (!c->live)
2198       continue;
2199 
2200     // Validate that the contents look like symbol table indices.
2201     ArrayRef<uint8_t> data = c->getContents();
2202     if (data.size() % 4 != 0) {
2203       Warn(ctx) << "ignoring " << c->getSectionName()
2204                 << " symbol table index section in object " << file;
2205       continue;
2206     }
2207 
2208     // Read each symbol table index and check if that symbol was included in the
2209     // final link. If so, add it to the vector of symbols.
2210     ArrayRef<ulittle32_t> symIndices(
2211         reinterpret_cast<const ulittle32_t *>(data.data()), data.size() / 4);
2212     ArrayRef<Symbol *> objSymbols = file->getSymbols();
2213     for (uint32_t symIndex : symIndices) {
2214       if (symIndex >= objSymbols.size()) {
2215         Warn(ctx) << "ignoring invalid symbol table index in section "
2216                   << c->getSectionName() << " in object " << file;
2217         continue;
2218       }
2219       if (Symbol *s = objSymbols[symIndex]) {
2220         if (s->isLive())
2221           symbols.push_back(cast<Symbol>(s));
2222       }
2223     }
2224   }
2225 }
2226 
2227 // Take a list of input sections containing symbol table indices and add those
2228 // symbols to an RVA table.
markSymbolsForRVATable(ObjFile * file,ArrayRef<SectionChunk * > symIdxChunks,SymbolRVASet & tableSymbols)2229 void Writer::markSymbolsForRVATable(ObjFile *file,
2230                                     ArrayRef<SectionChunk *> symIdxChunks,
2231                                     SymbolRVASet &tableSymbols) {
2232   std::vector<Symbol *> syms;
2233   getSymbolsFromSections(file, symIdxChunks, syms);
2234 
2235   for (Symbol *s : syms)
2236     addSymbolToRVASet(tableSymbols, cast<Defined>(s));
2237 }
2238 
2239 // Replace the absolute table symbol with a synthetic symbol pointing to
2240 // tableChunk so that we can emit base relocations for it and resolve section
2241 // relative relocations.
maybeAddRVATable(SymbolRVASet tableSymbols,StringRef tableSym,StringRef countSym,bool hasFlag)2242 void Writer::maybeAddRVATable(SymbolRVASet tableSymbols, StringRef tableSym,
2243                               StringRef countSym, bool hasFlag) {
2244   if (tableSymbols.empty())
2245     return;
2246 
2247   NonSectionChunk *tableChunk;
2248   if (hasFlag)
2249     tableChunk = make<RVAFlagTableChunk>(std::move(tableSymbols));
2250   else
2251     tableChunk = make<RVATableChunk>(std::move(tableSymbols));
2252   rdataSec->addChunk(tableChunk);
2253 
2254   ctx.forEachSymtab([&](SymbolTable &symtab) {
2255     Symbol *t = symtab.findUnderscore(tableSym);
2256     Symbol *c = symtab.findUnderscore(countSym);
2257     replaceSymbol<DefinedSynthetic>(t, t->getName(), tableChunk);
2258     cast<DefinedAbsolute>(c)->setVA(tableChunk->getSize() / (hasFlag ? 5 : 4));
2259   });
2260 }
2261 
2262 // Create CHPE metadata chunks.
createECChunks()2263 void Writer::createECChunks() {
2264   if (!ctx.symtab.isEC())
2265     return;
2266 
2267   for (Symbol *s : ctx.symtab.expSymbols) {
2268     auto sym = dyn_cast<Defined>(s);
2269     if (!sym || !sym->getChunk())
2270       continue;
2271     if (auto thunk = dyn_cast<ECExportThunkChunk>(sym->getChunk())) {
2272       hexpthkSec->addChunk(thunk);
2273       exportThunks.push_back({thunk, thunk->target});
2274     } else if (auto def = dyn_cast<DefinedRegular>(sym)) {
2275       // Allow section chunk to be treated as an export thunk if it looks like
2276       // one.
2277       SectionChunk *chunk = def->getChunk();
2278       if (!chunk->live || chunk->getMachine() != AMD64)
2279         continue;
2280       assert(sym->getName().starts_with("EXP+"));
2281       StringRef targetName = sym->getName().substr(strlen("EXP+"));
2282       // If EXP+#foo is an export thunk of a hybrid patchable function,
2283       // we should use the #foo$hp_target symbol as the redirection target.
2284       // First, try to look up the $hp_target symbol. If it can't be found,
2285       // assume it's a regular function and look for #foo instead.
2286       Symbol *targetSym = ctx.symtab.find((targetName + "$hp_target").str());
2287       if (!targetSym)
2288         targetSym = ctx.symtab.find(targetName);
2289       Defined *t = dyn_cast_or_null<Defined>(targetSym);
2290       if (t && isArm64EC(t->getChunk()->getMachine()))
2291         exportThunks.push_back({chunk, t});
2292     }
2293   }
2294 
2295   auto codeMapChunk = make<ECCodeMapChunk>(codeMap);
2296   rdataSec->addChunk(codeMapChunk);
2297   Symbol *codeMapSym = ctx.symtab.findUnderscore("__hybrid_code_map");
2298   replaceSymbol<DefinedSynthetic>(codeMapSym, codeMapSym->getName(),
2299                                   codeMapChunk);
2300 
2301   CHPECodeRangesChunk *ranges = make<CHPECodeRangesChunk>(exportThunks);
2302   rdataSec->addChunk(ranges);
2303   Symbol *rangesSym =
2304       ctx.symtab.findUnderscore("__x64_code_ranges_to_entry_points");
2305   replaceSymbol<DefinedSynthetic>(rangesSym, rangesSym->getName(), ranges);
2306 
2307   CHPERedirectionChunk *entryPoints = make<CHPERedirectionChunk>(exportThunks);
2308   a64xrmSec->addChunk(entryPoints);
2309   Symbol *entryPointsSym =
2310       ctx.symtab.findUnderscore("__arm64x_redirection_metadata");
2311   replaceSymbol<DefinedSynthetic>(entryPointsSym, entryPointsSym->getName(),
2312                                   entryPoints);
2313 }
2314 
2315 // MinGW specific. Gather all relocations that are imported from a DLL even
2316 // though the code didn't expect it to, produce the table that the runtime
2317 // uses for fixing them up, and provide the synthetic symbols that the
2318 // runtime uses for finding the table.
createRuntimePseudoRelocs()2319 void Writer::createRuntimePseudoRelocs() {
2320   ctx.forEachSymtab([&](SymbolTable &symtab) {
2321     std::vector<RuntimePseudoReloc> rels;
2322 
2323     for (Chunk *c : ctx.driver.getChunks()) {
2324       auto *sc = dyn_cast<SectionChunk>(c);
2325       if (!sc || !sc->live || &sc->file->symtab != &symtab)
2326         continue;
2327       // Don't create pseudo relocations for sections that won't be
2328       // mapped at runtime.
2329       if (sc->header->Characteristics & IMAGE_SCN_MEM_DISCARDABLE)
2330         continue;
2331       sc->getRuntimePseudoRelocs(rels);
2332     }
2333 
2334     if (!ctx.config.pseudoRelocs) {
2335       // Not writing any pseudo relocs; if some were needed, error out and
2336       // indicate what required them.
2337       for (const RuntimePseudoReloc &rpr : rels)
2338         Err(ctx) << "automatic dllimport of " << rpr.sym->getName() << " in "
2339                  << toString(rpr.target->file)
2340                  << " requires pseudo relocations";
2341       return;
2342     }
2343 
2344     if (!rels.empty()) {
2345       Log(ctx) << "Writing " << Twine(rels.size())
2346                << " runtime pseudo relocations";
2347       const char *symbolName = "_pei386_runtime_relocator";
2348       Symbol *relocator = symtab.findUnderscore(symbolName);
2349       if (!relocator)
2350         Err(ctx)
2351             << "output image has runtime pseudo relocations, but the function "
2352             << symbolName
2353             << " is missing; it is needed for fixing the relocations at "
2354                "runtime";
2355     }
2356 
2357     PseudoRelocTableChunk *table = make<PseudoRelocTableChunk>(rels);
2358     rdataSec->addChunk(table);
2359     EmptyChunk *endOfList = make<EmptyChunk>();
2360     rdataSec->addChunk(endOfList);
2361 
2362     Symbol *headSym = symtab.findUnderscore("__RUNTIME_PSEUDO_RELOC_LIST__");
2363     Symbol *endSym = symtab.findUnderscore("__RUNTIME_PSEUDO_RELOC_LIST_END__");
2364     replaceSymbol<DefinedSynthetic>(headSym, headSym->getName(), table);
2365     replaceSymbol<DefinedSynthetic>(endSym, endSym->getName(), endOfList);
2366   });
2367 }
2368 
2369 // MinGW specific.
2370 // The MinGW .ctors and .dtors lists have sentinels at each end;
2371 // a (uintptr_t)-1 at the start and a (uintptr_t)0 at the end.
2372 // There's a symbol pointing to the start sentinel pointer, __CTOR_LIST__
2373 // and __DTOR_LIST__ respectively.
insertCtorDtorSymbols()2374 void Writer::insertCtorDtorSymbols() {
2375   ctx.forEachSymtab([&](SymbolTable &symtab) {
2376     AbsolutePointerChunk *ctorListHead = make<AbsolutePointerChunk>(symtab, -1);
2377     AbsolutePointerChunk *ctorListEnd = make<AbsolutePointerChunk>(symtab, 0);
2378     AbsolutePointerChunk *dtorListHead = make<AbsolutePointerChunk>(symtab, -1);
2379     AbsolutePointerChunk *dtorListEnd = make<AbsolutePointerChunk>(symtab, 0);
2380     ctorsSec->insertChunkAtStart(ctorListHead);
2381     ctorsSec->addChunk(ctorListEnd);
2382     dtorsSec->insertChunkAtStart(dtorListHead);
2383     dtorsSec->addChunk(dtorListEnd);
2384 
2385     Symbol *ctorListSym = symtab.findUnderscore("__CTOR_LIST__");
2386     Symbol *dtorListSym = symtab.findUnderscore("__DTOR_LIST__");
2387     replaceSymbol<DefinedSynthetic>(ctorListSym, ctorListSym->getName(),
2388                                     ctorListHead);
2389     replaceSymbol<DefinedSynthetic>(dtorListSym, dtorListSym->getName(),
2390                                     dtorListHead);
2391   });
2392 
2393   if (ctx.hybridSymtab) {
2394     ctorsSec->splitECChunks();
2395     dtorsSec->splitECChunks();
2396   }
2397 }
2398 
2399 // MinGW (really, Cygwin) specific.
2400 // The Cygwin startup code uses __data_start__ __data_end__ __bss_start__
2401 // and __bss_end__ to know what to copy during fork emulation.
insertBssDataStartEndSymbols()2402 void Writer::insertBssDataStartEndSymbols() {
2403   if (!dataSec->chunks.empty()) {
2404     Symbol *dataStartSym = ctx.symtab.find("__data_start__");
2405     Symbol *dataEndSym = ctx.symtab.find("__data_end__");
2406     Chunk *endChunk = dataSec->chunks.back();
2407     replaceSymbol<DefinedSynthetic>(dataStartSym, dataStartSym->getName(),
2408                                     dataSec->chunks.front());
2409     replaceSymbol<DefinedSynthetic>(dataEndSym, dataEndSym->getName(), endChunk,
2410                                     endChunk->getSize());
2411   }
2412 
2413   if (!bssSec->chunks.empty()) {
2414     Symbol *bssStartSym = ctx.symtab.find("__bss_start__");
2415     Symbol *bssEndSym = ctx.symtab.find("__bss_end__");
2416     Chunk *endChunk = bssSec->chunks.back();
2417     replaceSymbol<DefinedSynthetic>(bssStartSym, bssStartSym->getName(),
2418                                     bssSec->chunks.front());
2419     replaceSymbol<DefinedSynthetic>(bssEndSym, bssEndSym->getName(), endChunk,
2420                                     endChunk->getSize());
2421   }
2422 }
2423 
2424 // Handles /section options to allow users to overwrite
2425 // section attributes.
setSectionPermissions()2426 void Writer::setSectionPermissions() {
2427   llvm::TimeTraceScope timeScope("Sections permissions");
2428   for (auto &p : ctx.config.section) {
2429     StringRef name = p.first;
2430     uint32_t perm = p.second;
2431     for (OutputSection *sec : ctx.outputSections)
2432       if (sec->name == name)
2433         sec->setPermissions(perm);
2434   }
2435 }
2436 
2437 // Set symbols used by ARM64EC metadata.
setECSymbols()2438 void Writer::setECSymbols() {
2439   if (!ctx.symtab.isEC())
2440     return;
2441 
2442   llvm::stable_sort(exportThunks, [](const std::pair<Chunk *, Defined *> &a,
2443                                      const std::pair<Chunk *, Defined *> &b) {
2444     return a.first->getRVA() < b.first->getRVA();
2445   });
2446 
2447   ChunkRange &chpePdata = ctx.config.machine == ARM64X ? hybridPdata : pdata;
2448   Symbol *rfeTableSym = ctx.symtab.findUnderscore("__arm64x_extra_rfe_table");
2449   replaceSymbol<DefinedSynthetic>(rfeTableSym, "__arm64x_extra_rfe_table",
2450                                   chpePdata.first);
2451 
2452   if (chpePdata.first) {
2453     Symbol *rfeSizeSym =
2454         ctx.symtab.findUnderscore("__arm64x_extra_rfe_table_size");
2455     cast<DefinedAbsolute>(rfeSizeSym)
2456         ->setVA(chpePdata.last->getRVA() + chpePdata.last->getSize() -
2457                 chpePdata.first->getRVA());
2458   }
2459 
2460   Symbol *rangesCountSym =
2461       ctx.symtab.findUnderscore("__x64_code_ranges_to_entry_points_count");
2462   cast<DefinedAbsolute>(rangesCountSym)->setVA(exportThunks.size());
2463 
2464   Symbol *entryPointCountSym =
2465       ctx.symtab.findUnderscore("__arm64x_redirection_metadata_count");
2466   cast<DefinedAbsolute>(entryPointCountSym)->setVA(exportThunks.size());
2467 
2468   Symbol *iatSym = ctx.symtab.findUnderscore("__hybrid_auxiliary_iat");
2469   replaceSymbol<DefinedSynthetic>(iatSym, "__hybrid_auxiliary_iat",
2470                                   idata.auxIat.empty() ? nullptr
2471                                                        : idata.auxIat.front());
2472 
2473   Symbol *iatCopySym = ctx.symtab.findUnderscore("__hybrid_auxiliary_iat_copy");
2474   replaceSymbol<DefinedSynthetic>(
2475       iatCopySym, "__hybrid_auxiliary_iat_copy",
2476       idata.auxIatCopy.empty() ? nullptr : idata.auxIatCopy.front());
2477 
2478   Symbol *delayIatSym =
2479       ctx.symtab.findUnderscore("__hybrid_auxiliary_delayload_iat");
2480   replaceSymbol<DefinedSynthetic>(
2481       delayIatSym, "__hybrid_auxiliary_delayload_iat",
2482       delayIdata.getAuxIat().empty() ? nullptr
2483                                      : delayIdata.getAuxIat().front());
2484 
2485   Symbol *delayIatCopySym =
2486       ctx.symtab.findUnderscore("__hybrid_auxiliary_delayload_iat_copy");
2487   replaceSymbol<DefinedSynthetic>(
2488       delayIatCopySym, "__hybrid_auxiliary_delayload_iat_copy",
2489       delayIdata.getAuxIatCopy().empty() ? nullptr
2490                                          : delayIdata.getAuxIatCopy().front());
2491 
2492   if (ctx.config.machine == ARM64X) {
2493     // For the hybrid image, set the alternate entry point to the EC entry
2494     // point. In the hybrid view, it is swapped to the native entry point
2495     // using ARM64X relocations.
2496     if (auto altEntrySym = cast_or_null<Defined>(ctx.symtab.entry)) {
2497       // If the entry is an EC export thunk, use its target instead.
2498       if (auto thunkChunk =
2499               dyn_cast<ECExportThunkChunk>(altEntrySym->getChunk()))
2500         altEntrySym = thunkChunk->target;
2501       ctx.symtab.findUnderscore("__arm64x_native_entrypoint")
2502           ->replaceKeepingName(altEntrySym, sizeof(SymbolUnion));
2503     }
2504 
2505     if (ctx.symtab.edataStart)
2506       ctx.dynamicRelocs->set(
2507           dataDirOffset64 + EXPORT_TABLE * sizeof(data_directory) +
2508               offsetof(data_directory, Size),
2509           ctx.symtab.edataEnd->getRVA() - ctx.symtab.edataStart->getRVA() +
2510               ctx.symtab.edataEnd->getSize());
2511     if (hybridPdata.first)
2512       ctx.dynamicRelocs->set(
2513           dataDirOffset64 + EXCEPTION_TABLE * sizeof(data_directory) +
2514               offsetof(data_directory, Size),
2515           hybridPdata.last->getRVA() - hybridPdata.first->getRVA() +
2516               hybridPdata.last->getSize());
2517     if (chpeSym && pdata.first)
2518       ctx.dynamicRelocs->set(
2519           chpeSym->getRVA() + offsetof(chpe_metadata, ExtraRFETableSize),
2520           pdata.last->getRVA() + pdata.last->getSize() - pdata.first->getRVA());
2521   }
2522 }
2523 
2524 // Write section contents to a mmap'ed file.
writeSections()2525 void Writer::writeSections() {
2526   llvm::TimeTraceScope timeScope("Write sections");
2527   uint8_t *buf = buffer->getBufferStart();
2528   for (OutputSection *sec : ctx.outputSections) {
2529     uint8_t *secBuf = buf + sec->getFileOff();
2530     // Fill gaps between functions in .text with INT3 instructions
2531     // instead of leaving as NUL bytes (which can be interpreted as
2532     // ADD instructions). Only fill the gaps between chunks. Most
2533     // chunks overwrite it anyway, but uninitialized data chunks
2534     // merged into a code section don't.
2535     if ((sec->header.Characteristics & IMAGE_SCN_CNT_CODE) &&
2536         (ctx.config.machine == AMD64 || ctx.config.machine == I386)) {
2537       uint32_t prevEnd = 0;
2538       for (Chunk *c : sec->chunks) {
2539         uint32_t off = c->getRVA() - sec->getRVA();
2540         memset(secBuf + prevEnd, 0xCC, off - prevEnd);
2541         prevEnd = off + c->getSize();
2542       }
2543       memset(secBuf + prevEnd, 0xCC, sec->getRawSize() - prevEnd);
2544     }
2545 
2546     parallelForEach(sec->chunks, [&](Chunk *c) {
2547       c->writeTo(secBuf + c->getRVA() - sec->getRVA());
2548     });
2549   }
2550 }
2551 
writeBuildId()2552 void Writer::writeBuildId() {
2553   llvm::TimeTraceScope timeScope("Write build ID");
2554 
2555   // There are two important parts to the build ID.
2556   // 1) If building with debug info, the COFF debug directory contains a
2557   //    timestamp as well as a Guid and Age of the PDB.
2558   // 2) In all cases, the PE COFF file header also contains a timestamp.
2559   // For reproducibility, instead of a timestamp we want to use a hash of the
2560   // PE contents.
2561   Configuration *config = &ctx.config;
2562   bool generateSyntheticBuildId = config->buildIDHash == BuildIDHash::Binary;
2563   if (generateSyntheticBuildId) {
2564     assert(buildId && "BuildId is not set!");
2565     // BuildId->BuildId was filled in when the PDB was written.
2566   }
2567 
2568   // At this point the only fields in the COFF file which remain unset are the
2569   // "timestamp" in the COFF file header, and the ones in the coff debug
2570   // directory.  Now we can hash the file and write that hash to the various
2571   // timestamp fields in the file.
2572   StringRef outputFileData(
2573       reinterpret_cast<const char *>(buffer->getBufferStart()),
2574       buffer->getBufferSize());
2575 
2576   uint32_t timestamp = config->timestamp;
2577   uint64_t hash = 0;
2578 
2579   if (config->repro || generateSyntheticBuildId)
2580     hash = xxh3_64bits(outputFileData);
2581 
2582   if (config->repro)
2583     timestamp = static_cast<uint32_t>(hash);
2584 
2585   if (generateSyntheticBuildId) {
2586     buildId->buildId->PDB70.CVSignature = OMF::Signature::PDB70;
2587     buildId->buildId->PDB70.Age = 1;
2588     memcpy(buildId->buildId->PDB70.Signature, &hash, 8);
2589     // xxhash only gives us 8 bytes, so put some fixed data in the other half.
2590     memcpy(&buildId->buildId->PDB70.Signature[8], "LLD PDB.", 8);
2591   }
2592 
2593   if (debugDirectory)
2594     debugDirectory->setTimeDateStamp(timestamp);
2595 
2596   uint8_t *buf = buffer->getBufferStart();
2597   buf += dosStubSize + sizeof(PEMagic);
2598   object::coff_file_header *coffHeader =
2599       reinterpret_cast<coff_file_header *>(buf);
2600   coffHeader->TimeDateStamp = timestamp;
2601 }
2602 
2603 // Sort .pdata section contents according to PE/COFF spec 5.5.
2604 template <typename T>
sortExceptionTable(ChunkRange & exceptionTable)2605 void Writer::sortExceptionTable(ChunkRange &exceptionTable) {
2606   if (!exceptionTable.first)
2607     return;
2608 
2609   // We assume .pdata contains function table entries only.
2610   auto bufAddr = [&](Chunk *c) {
2611     OutputSection *os = ctx.getOutputSection(c);
2612     return buffer->getBufferStart() + os->getFileOff() + c->getRVA() -
2613            os->getRVA();
2614   };
2615   uint8_t *begin = bufAddr(exceptionTable.first);
2616   uint8_t *end = bufAddr(exceptionTable.last) + exceptionTable.last->getSize();
2617   if ((end - begin) % sizeof(T) != 0) {
2618     Fatal(ctx) << "unexpected .pdata size: " << (end - begin)
2619                << " is not a multiple of " << sizeof(T);
2620   }
2621 
2622   parallelSort(MutableArrayRef<T>(reinterpret_cast<T *>(begin),
2623                                   reinterpret_cast<T *>(end)),
2624                [](const T &a, const T &b) { return a.begin < b.begin; });
2625 }
2626 
2627 // Sort .pdata section contents according to PE/COFF spec 5.5.
sortExceptionTables()2628 void Writer::sortExceptionTables() {
2629   llvm::TimeTraceScope timeScope("Sort exception table");
2630 
2631   struct EntryX64 {
2632     ulittle32_t begin, end, unwind;
2633   };
2634   struct EntryArm {
2635     ulittle32_t begin, unwind;
2636   };
2637 
2638   switch (ctx.config.machine) {
2639   case AMD64:
2640     sortExceptionTable<EntryX64>(pdata);
2641     break;
2642   case ARM64EC:
2643   case ARM64X:
2644     sortExceptionTable<EntryX64>(hybridPdata);
2645     [[fallthrough]];
2646   case ARMNT:
2647   case ARM64:
2648     sortExceptionTable<EntryArm>(pdata);
2649     break;
2650   default:
2651     if (pdata.first)
2652       ctx.e.errs() << "warning: don't know how to handle .pdata\n";
2653     break;
2654   }
2655 }
2656 
2657 // The CRT section contains, among other things, the array of function
2658 // pointers that initialize every global variable that is not trivially
2659 // constructed. The CRT calls them one after the other prior to invoking
2660 // main().
2661 //
2662 // As per C++ spec, 3.6.2/2.3,
2663 // "Variables with ordered initialization defined within a single
2664 // translation unit shall be initialized in the order of their definitions
2665 // in the translation unit"
2666 //
2667 // It is therefore critical to sort the chunks containing the function
2668 // pointers in the order that they are listed in the object file (top to
2669 // bottom), otherwise global objects might not be initialized in the
2670 // correct order.
sortCRTSectionChunks(std::vector<Chunk * > & chunks)2671 void Writer::sortCRTSectionChunks(std::vector<Chunk *> &chunks) {
2672   auto sectionChunkOrder = [](const Chunk *a, const Chunk *b) {
2673     auto sa = dyn_cast<SectionChunk>(a);
2674     auto sb = dyn_cast<SectionChunk>(b);
2675     assert(sa && sb && "Non-section chunks in CRT section!");
2676 
2677     StringRef sAObj = sa->file->mb.getBufferIdentifier();
2678     StringRef sBObj = sb->file->mb.getBufferIdentifier();
2679 
2680     return sAObj == sBObj && sa->getSectionNumber() < sb->getSectionNumber();
2681   };
2682   llvm::stable_sort(chunks, sectionChunkOrder);
2683 
2684   if (ctx.config.verbose) {
2685     for (auto &c : chunks) {
2686       auto sc = dyn_cast<SectionChunk>(c);
2687       Log(ctx) << "  " << sc->file->mb.getBufferIdentifier().str()
2688                << ", SectionID: " << sc->getSectionNumber();
2689     }
2690   }
2691 }
2692 
findSection(StringRef name)2693 OutputSection *Writer::findSection(StringRef name) {
2694   for (OutputSection *sec : ctx.outputSections)
2695     if (sec->name == name)
2696       return sec;
2697   return nullptr;
2698 }
2699 
getSizeOfInitializedData()2700 uint32_t Writer::getSizeOfInitializedData() {
2701   uint32_t res = 0;
2702   for (OutputSection *s : ctx.outputSections)
2703     if (s->header.Characteristics & IMAGE_SCN_CNT_INITIALIZED_DATA)
2704       res += s->getRawSize();
2705   return res;
2706 }
2707 
2708 // Add base relocations to .reloc section.
addBaserels()2709 void Writer::addBaserels() {
2710   if (!ctx.config.relocatable)
2711     return;
2712   std::vector<Baserel> v;
2713   for (OutputSection *sec : ctx.outputSections) {
2714     if (sec->header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE)
2715       continue;
2716     llvm::TimeTraceScope timeScope("Base relocations: ", sec->name);
2717     // Collect all locations for base relocations.
2718     for (Chunk *c : sec->chunks)
2719       c->getBaserels(&v);
2720     // Add the addresses to .reloc section.
2721     if (!v.empty())
2722       addBaserelBlocks(v);
2723     v.clear();
2724   }
2725 }
2726 
2727 // Add addresses to .reloc section. Note that addresses are grouped by page.
addBaserelBlocks(std::vector<Baserel> & v)2728 void Writer::addBaserelBlocks(std::vector<Baserel> &v) {
2729   const uint32_t mask = ~uint32_t(pageSize - 1);
2730   uint32_t page = v[0].rva & mask;
2731   size_t i = 0, j = 1;
2732   llvm::sort(v,
2733              [](const Baserel &x, const Baserel &y) { return x.rva < y.rva; });
2734   for (size_t e = v.size(); j < e; ++j) {
2735     uint32_t p = v[j].rva & mask;
2736     if (p == page)
2737       continue;
2738     relocSec->addChunk(make<BaserelChunk>(page, &v[i], &v[0] + j));
2739     i = j;
2740     page = p;
2741   }
2742   if (i == j)
2743     return;
2744   relocSec->addChunk(make<BaserelChunk>(page, &v[i], &v[0] + j));
2745 }
2746 
createDynamicRelocs()2747 void Writer::createDynamicRelocs() {
2748   if (!ctx.dynamicRelocs)
2749     return;
2750 
2751   // Adjust the Machine field in the COFF header to AMD64.
2752   ctx.dynamicRelocs->add(IMAGE_DVRT_ARM64X_FIXUP_TYPE_VALUE, sizeof(uint16_t),
2753                          coffHeaderOffset + offsetof(coff_file_header, Machine),
2754                          AMD64);
2755 
2756   if (ctx.symtab.entry != ctx.hybridSymtab->entry ||
2757       pdata.first != hybridPdata.first) {
2758     chpeSym = cast_or_null<DefinedRegular>(
2759         ctx.symtab.findUnderscore("__chpe_metadata"));
2760     if (!chpeSym)
2761       Warn(ctx) << "'__chpe_metadata' is missing for ARM64X target";
2762   }
2763 
2764   if (ctx.symtab.entry != ctx.hybridSymtab->entry) {
2765     ctx.dynamicRelocs->add(IMAGE_DVRT_ARM64X_FIXUP_TYPE_VALUE, sizeof(uint32_t),
2766                            peHeaderOffset +
2767                                offsetof(pe32plus_header, AddressOfEntryPoint),
2768                            cast_or_null<Defined>(ctx.symtab.entry));
2769 
2770     // Swap the alternate entry point in the CHPE metadata.
2771     if (chpeSym)
2772       ctx.dynamicRelocs->add(
2773           IMAGE_DVRT_ARM64X_FIXUP_TYPE_VALUE, sizeof(uint32_t),
2774           Arm64XRelocVal(chpeSym, offsetof(chpe_metadata, AlternateEntryPoint)),
2775           cast_or_null<Defined>(ctx.hybridSymtab->entry));
2776   }
2777 
2778   if (ctx.symtab.edataStart != ctx.hybridSymtab->edataStart) {
2779     ctx.dynamicRelocs->add(IMAGE_DVRT_ARM64X_FIXUP_TYPE_VALUE, sizeof(uint32_t),
2780                            dataDirOffset64 +
2781                                EXPORT_TABLE * sizeof(data_directory) +
2782                                offsetof(data_directory, RelativeVirtualAddress),
2783                            ctx.symtab.edataStart);
2784     // The Size value is assigned after addresses are finalized.
2785     ctx.dynamicRelocs->add(IMAGE_DVRT_ARM64X_FIXUP_TYPE_VALUE, sizeof(uint32_t),
2786                            dataDirOffset64 +
2787                                EXPORT_TABLE * sizeof(data_directory) +
2788                                offsetof(data_directory, Size));
2789   }
2790 
2791   if (pdata.first != hybridPdata.first) {
2792     ctx.dynamicRelocs->add(IMAGE_DVRT_ARM64X_FIXUP_TYPE_VALUE, sizeof(uint32_t),
2793                            dataDirOffset64 +
2794                                EXCEPTION_TABLE * sizeof(data_directory) +
2795                                offsetof(data_directory, RelativeVirtualAddress),
2796                            hybridPdata.first);
2797     // The Size value is assigned after addresses are finalized.
2798     ctx.dynamicRelocs->add(IMAGE_DVRT_ARM64X_FIXUP_TYPE_VALUE, sizeof(uint32_t),
2799                            dataDirOffset64 +
2800                                EXCEPTION_TABLE * sizeof(data_directory) +
2801                                offsetof(data_directory, Size));
2802 
2803     // Swap ExtraRFETable in the CHPE metadata.
2804     if (chpeSym) {
2805       ctx.dynamicRelocs->add(
2806           IMAGE_DVRT_ARM64X_FIXUP_TYPE_VALUE, sizeof(uint32_t),
2807           Arm64XRelocVal(chpeSym, offsetof(chpe_metadata, ExtraRFETable)),
2808           pdata.first);
2809       // The Size value is assigned after addresses are finalized.
2810       ctx.dynamicRelocs->add(
2811           IMAGE_DVRT_ARM64X_FIXUP_TYPE_VALUE, sizeof(uint32_t),
2812           Arm64XRelocVal(chpeSym, offsetof(chpe_metadata, ExtraRFETableSize)));
2813     }
2814   }
2815 
2816   // Set the hybrid load config to the EC load config.
2817   ctx.dynamicRelocs->add(IMAGE_DVRT_ARM64X_FIXUP_TYPE_VALUE, sizeof(uint32_t),
2818                          dataDirOffset64 +
2819                              LOAD_CONFIG_TABLE * sizeof(data_directory) +
2820                              offsetof(data_directory, RelativeVirtualAddress),
2821                          ctx.symtab.loadConfigSym);
2822   ctx.dynamicRelocs->add(IMAGE_DVRT_ARM64X_FIXUP_TYPE_VALUE, sizeof(uint32_t),
2823                          dataDirOffset64 +
2824                              LOAD_CONFIG_TABLE * sizeof(data_directory) +
2825                              offsetof(data_directory, Size),
2826                          ctx.symtab.loadConfigSize);
2827 }
2828 
createPartialSection(StringRef name,uint32_t outChars)2829 PartialSection *Writer::createPartialSection(StringRef name,
2830                                              uint32_t outChars) {
2831   PartialSection *&pSec = partialSections[{name, outChars}];
2832   if (pSec)
2833     return pSec;
2834   pSec = make<PartialSection>(name, outChars);
2835   return pSec;
2836 }
2837 
findPartialSection(StringRef name,uint32_t outChars)2838 PartialSection *Writer::findPartialSection(StringRef name, uint32_t outChars) {
2839   auto it = partialSections.find({name, outChars});
2840   if (it != partialSections.end())
2841     return it->second;
2842   return nullptr;
2843 }
2844 
fixTlsAlignment()2845 void Writer::fixTlsAlignment() {
2846   Defined *tlsSym =
2847       dyn_cast_or_null<Defined>(ctx.symtab.findUnderscore("_tls_used"));
2848   if (!tlsSym)
2849     return;
2850 
2851   OutputSection *sec = ctx.getOutputSection(tlsSym->getChunk());
2852   assert(sec && tlsSym->getRVA() >= sec->getRVA() &&
2853          "no output section for _tls_used");
2854 
2855   uint8_t *secBuf = buffer->getBufferStart() + sec->getFileOff();
2856   uint64_t tlsOffset = tlsSym->getRVA() - sec->getRVA();
2857   uint64_t directorySize = ctx.config.is64()
2858                                ? sizeof(object::coff_tls_directory64)
2859                                : sizeof(object::coff_tls_directory32);
2860 
2861   if (tlsOffset + directorySize > sec->getRawSize())
2862     Fatal(ctx) << "_tls_used sym is malformed";
2863 
2864   if (ctx.config.is64()) {
2865     object::coff_tls_directory64 *tlsDir =
2866         reinterpret_cast<object::coff_tls_directory64 *>(&secBuf[tlsOffset]);
2867     tlsDir->setAlignment(tlsAlignment);
2868   } else {
2869     object::coff_tls_directory32 *tlsDir =
2870         reinterpret_cast<object::coff_tls_directory32 *>(&secBuf[tlsOffset]);
2871     tlsDir->setAlignment(tlsAlignment);
2872   }
2873 }
2874 
prepareLoadConfig()2875 void Writer::prepareLoadConfig() {
2876   ctx.forEachActiveSymtab([&](SymbolTable &symtab) {
2877     if (!symtab.loadConfigSym)
2878       return;
2879 
2880     OutputSection *sec = ctx.getOutputSection(symtab.loadConfigSym->getChunk());
2881     uint8_t *secBuf = buffer->getBufferStart() + sec->getFileOff();
2882     uint8_t *symBuf = secBuf + (symtab.loadConfigSym->getRVA() - sec->getRVA());
2883 
2884     if (ctx.config.is64())
2885       prepareLoadConfig(symtab,
2886                         reinterpret_cast<coff_load_configuration64 *>(symBuf));
2887     else
2888       prepareLoadConfig(symtab,
2889                         reinterpret_cast<coff_load_configuration32 *>(symBuf));
2890   });
2891 }
2892 
2893 template <typename T>
prepareLoadConfig(SymbolTable & symtab,T * loadConfig)2894 void Writer::prepareLoadConfig(SymbolTable &symtab, T *loadConfig) {
2895   size_t loadConfigSize = loadConfig->Size;
2896 
2897 #define RETURN_IF_NOT_CONTAINS(field)                                          \
2898   if (loadConfigSize < offsetof(T, field) + sizeof(T::field)) {                \
2899     Warn(ctx) << "'_load_config_used' structure too small to include " #field; \
2900     return;                                                                    \
2901   }
2902 
2903 #define IF_CONTAINS(field)                                                     \
2904   if (loadConfigSize >= offsetof(T, field) + sizeof(T::field))
2905 
2906 #define CHECK_VA(field, sym)                                                   \
2907   if (auto *s = dyn_cast<DefinedSynthetic>(symtab.findUnderscore(sym)))        \
2908     if (loadConfig->field != ctx.config.imageBase + s->getRVA())               \
2909       Warn(ctx) << #field " not set correctly in '_load_config_used'";
2910 
2911 #define CHECK_ABSOLUTE(field, sym)                                             \
2912   if (auto *s = dyn_cast<DefinedAbsolute>(symtab.findUnderscore(sym)))         \
2913     if (loadConfig->field != s->getVA())                                       \
2914       Warn(ctx) << #field " not set correctly in '_load_config_used'";
2915 
2916   if (ctx.config.dependentLoadFlags) {
2917     RETURN_IF_NOT_CONTAINS(DependentLoadFlags)
2918     loadConfig->DependentLoadFlags = ctx.config.dependentLoadFlags;
2919   }
2920 
2921   if (ctx.dynamicRelocs) {
2922     IF_CONTAINS(DynamicValueRelocTableSection) {
2923       loadConfig->DynamicValueRelocTableSection = relocSec->sectionIndex;
2924       loadConfig->DynamicValueRelocTableOffset =
2925           ctx.dynamicRelocs->getRVA() - relocSec->getRVA();
2926     }
2927     else {
2928       Warn(ctx) << "'_load_config_used' structure too small to include dynamic "
2929                    "relocations";
2930     }
2931   }
2932 
2933   IF_CONTAINS(CHPEMetadataPointer) {
2934     // On ARM64X, only the EC version of the load config contains
2935     // CHPEMetadataPointer. Copy its value to the native load config.
2936     if (ctx.config.machine == ARM64X && !symtab.isEC() &&
2937         ctx.symtab.loadConfigSize >=
2938             offsetof(T, CHPEMetadataPointer) + sizeof(T::CHPEMetadataPointer)) {
2939       OutputSection *sec =
2940           ctx.getOutputSection(ctx.symtab.loadConfigSym->getChunk());
2941       uint8_t *secBuf = buffer->getBufferStart() + sec->getFileOff();
2942       auto hybridLoadConfig =
2943           reinterpret_cast<const coff_load_configuration64 *>(
2944               secBuf + (ctx.symtab.loadConfigSym->getRVA() - sec->getRVA()));
2945       loadConfig->CHPEMetadataPointer = hybridLoadConfig->CHPEMetadataPointer;
2946     }
2947   }
2948 
2949   if (ctx.config.guardCF == GuardCFLevel::Off)
2950     return;
2951   RETURN_IF_NOT_CONTAINS(GuardFlags)
2952   CHECK_VA(GuardCFFunctionTable, "__guard_fids_table")
2953   CHECK_ABSOLUTE(GuardCFFunctionCount, "__guard_fids_count")
2954   CHECK_ABSOLUTE(GuardFlags, "__guard_flags")
2955   IF_CONTAINS(GuardAddressTakenIatEntryCount) {
2956     CHECK_VA(GuardAddressTakenIatEntryTable, "__guard_iat_table")
2957     CHECK_ABSOLUTE(GuardAddressTakenIatEntryCount, "__guard_iat_count")
2958   }
2959 
2960   if (!(ctx.config.guardCF & GuardCFLevel::LongJmp))
2961     return;
2962   RETURN_IF_NOT_CONTAINS(GuardLongJumpTargetCount)
2963   CHECK_VA(GuardLongJumpTargetTable, "__guard_longjmp_table")
2964   CHECK_ABSOLUTE(GuardLongJumpTargetCount, "__guard_longjmp_count")
2965 
2966   if (!(ctx.config.guardCF & GuardCFLevel::EHCont))
2967     return;
2968   RETURN_IF_NOT_CONTAINS(GuardEHContinuationCount)
2969   CHECK_VA(GuardEHContinuationTable, "__guard_eh_cont_table")
2970   CHECK_ABSOLUTE(GuardEHContinuationCount, "__guard_eh_cont_count")
2971 
2972 #undef RETURN_IF_NOT_CONTAINS
2973 #undef IF_CONTAINS
2974 #undef CHECK_VA
2975 #undef CHECK_ABSOLUTE
2976 }
2977