xref: /freebsd/contrib/llvm-project/lld/ELF/Arch/X86_64.cpp (revision d13def78ccef6dbc25c2e197089ee5fc4d7b82c3)
1 //===- X86_64.cpp ---------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "InputFiles.h"
10 #include "Symbols.h"
11 #include "SyntheticSections.h"
12 #include "Target.h"
13 #include "lld/Common/ErrorHandler.h"
14 #include "llvm/Object/ELF.h"
15 #include "llvm/Support/Endian.h"
16 
17 using namespace llvm;
18 using namespace llvm::object;
19 using namespace llvm::support::endian;
20 using namespace llvm::ELF;
21 
22 namespace lld {
23 namespace elf {
24 
25 namespace {
26 class X86_64 : public TargetInfo {
27 public:
28   X86_64();
29   int getTlsGdRelaxSkip(RelType type) const override;
30   RelExpr getRelExpr(RelType type, const Symbol &s,
31                      const uint8_t *loc) const override;
32   RelType getDynRel(RelType type) const override;
33   void writeGotPltHeader(uint8_t *buf) const override;
34   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
35   void writePltHeader(uint8_t *buf) const override;
36   void writePlt(uint8_t *buf, const Symbol &sym,
37                 uint64_t pltEntryAddr) const override;
38   void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override;
39 
40   RelExpr adjustRelaxExpr(RelType type, const uint8_t *data,
41                           RelExpr expr) const override;
42   void relaxGot(uint8_t *loc, RelType type, uint64_t val) const override;
43   void relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const override;
44   void relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const override;
45   void relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const override;
46   void relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const override;
47   bool adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end,
48                                         uint8_t stOther) const override;
49 };
50 } // namespace
51 
52 X86_64::X86_64() {
53   copyRel = R_X86_64_COPY;
54   gotRel = R_X86_64_GLOB_DAT;
55   noneRel = R_X86_64_NONE;
56   pltRel = R_X86_64_JUMP_SLOT;
57   relativeRel = R_X86_64_RELATIVE;
58   iRelativeRel = R_X86_64_IRELATIVE;
59   symbolicRel = R_X86_64_64;
60   tlsDescRel = R_X86_64_TLSDESC;
61   tlsGotRel = R_X86_64_TPOFF64;
62   tlsModuleIndexRel = R_X86_64_DTPMOD64;
63   tlsOffsetRel = R_X86_64_DTPOFF64;
64   pltHeaderSize = 16;
65   pltEntrySize = 16;
66   ipltEntrySize = 16;
67   trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3
68 
69   // Align to the large page size (known as a superpage or huge page).
70   // FreeBSD automatically promotes large, superpage-aligned allocations.
71   defaultImageBase = 0x200000;
72 }
73 
74 int X86_64::getTlsGdRelaxSkip(RelType type) const { return 2; }
75 
76 RelExpr X86_64::getRelExpr(RelType type, const Symbol &s,
77                            const uint8_t *loc) const {
78   if (type == R_X86_64_GOTTPOFF)
79     config->hasStaticTlsModel = true;
80 
81   switch (type) {
82   case R_X86_64_8:
83   case R_X86_64_16:
84   case R_X86_64_32:
85   case R_X86_64_32S:
86   case R_X86_64_64:
87     return R_ABS;
88   case R_X86_64_DTPOFF32:
89   case R_X86_64_DTPOFF64:
90     return R_DTPREL;
91   case R_X86_64_TPOFF32:
92     return R_TLS;
93   case R_X86_64_TLSDESC_CALL:
94     return R_TLSDESC_CALL;
95   case R_X86_64_TLSLD:
96     return R_TLSLD_PC;
97   case R_X86_64_TLSGD:
98     return R_TLSGD_PC;
99   case R_X86_64_SIZE32:
100   case R_X86_64_SIZE64:
101     return R_SIZE;
102   case R_X86_64_PLT32:
103     return R_PLT_PC;
104   case R_X86_64_PC8:
105   case R_X86_64_PC16:
106   case R_X86_64_PC32:
107   case R_X86_64_PC64:
108     return R_PC;
109   case R_X86_64_GOT32:
110   case R_X86_64_GOT64:
111     return R_GOTPLT;
112   case R_X86_64_GOTPC32_TLSDESC:
113     return R_TLSDESC_PC;
114   case R_X86_64_GOTPCREL:
115   case R_X86_64_GOTPCRELX:
116   case R_X86_64_REX_GOTPCRELX:
117   case R_X86_64_GOTTPOFF:
118     return R_GOT_PC;
119   case R_X86_64_GOTOFF64:
120     return R_GOTPLTREL;
121   case R_X86_64_GOTPC32:
122   case R_X86_64_GOTPC64:
123     return R_GOTPLTONLY_PC;
124   case R_X86_64_NONE:
125     return R_NONE;
126   default:
127     error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +
128           ") against symbol " + toString(s));
129     return R_NONE;
130   }
131 }
132 
133 void X86_64::writeGotPltHeader(uint8_t *buf) const {
134   // The first entry holds the value of _DYNAMIC. It is not clear why that is
135   // required, but it is documented in the psabi and the glibc dynamic linker
136   // seems to use it (note that this is relevant for linking ld.so, not any
137   // other program).
138   write64le(buf, mainPart->dynamic->getVA());
139 }
140 
141 void X86_64::writeGotPlt(uint8_t *buf, const Symbol &s) const {
142   // See comments in X86::writeGotPlt.
143   write64le(buf, s.getPltVA() + 6);
144 }
145 
146 void X86_64::writePltHeader(uint8_t *buf) const {
147   const uint8_t pltData[] = {
148       0xff, 0x35, 0, 0, 0, 0, // pushq GOTPLT+8(%rip)
149       0xff, 0x25, 0, 0, 0, 0, // jmp *GOTPLT+16(%rip)
150       0x0f, 0x1f, 0x40, 0x00, // nop
151   };
152   memcpy(buf, pltData, sizeof(pltData));
153   uint64_t gotPlt = in.gotPlt->getVA();
154   uint64_t plt = in.ibtPlt ? in.ibtPlt->getVA() : in.plt->getVA();
155   write32le(buf + 2, gotPlt - plt + 2); // GOTPLT+8
156   write32le(buf + 8, gotPlt - plt + 4); // GOTPLT+16
157 }
158 
159 void X86_64::writePlt(uint8_t *buf, const Symbol &sym,
160                       uint64_t pltEntryAddr) const {
161   const uint8_t inst[] = {
162       0xff, 0x25, 0, 0, 0, 0, // jmpq *got(%rip)
163       0x68, 0, 0, 0, 0,       // pushq <relocation index>
164       0xe9, 0, 0, 0, 0,       // jmpq plt[0]
165   };
166   memcpy(buf, inst, sizeof(inst));
167 
168   write32le(buf + 2, sym.getGotPltVA() - pltEntryAddr - 6);
169   write32le(buf + 7, sym.pltIndex);
170   write32le(buf + 12, in.plt->getVA() - pltEntryAddr - 16);
171 }
172 
173 RelType X86_64::getDynRel(RelType type) const {
174   if (type == R_X86_64_64 || type == R_X86_64_PC64 || type == R_X86_64_SIZE32 ||
175       type == R_X86_64_SIZE64)
176     return type;
177   return R_X86_64_NONE;
178 }
179 
180 void X86_64::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const {
181   if (type == R_X86_64_TLSGD) {
182     // Convert
183     //   .byte 0x66
184     //   leaq x@tlsgd(%rip), %rdi
185     //   .word 0x6666
186     //   rex64
187     //   call __tls_get_addr@plt
188     // to the following two instructions.
189     const uint8_t inst[] = {
190         0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00,
191         0x00, 0x00,                            // mov %fs:0x0,%rax
192         0x48, 0x8d, 0x80, 0,    0,    0,    0, // lea x@tpoff,%rax
193     };
194     memcpy(loc - 4, inst, sizeof(inst));
195 
196     // The original code used a pc relative relocation and so we have to
197     // compensate for the -4 in had in the addend.
198     write32le(loc + 8, val + 4);
199   } else {
200     // Convert
201     //   lea x@tlsgd(%rip), %rax
202     //   call *(%rax)
203     // to the following two instructions.
204     assert(type == R_X86_64_GOTPC32_TLSDESC);
205     if (memcmp(loc - 3, "\x48\x8d\x05", 3)) {
206       error(getErrorLocation(loc - 3) + "R_X86_64_GOTPC32_TLSDESC must be used "
207                                         "in callq *x@tlsdesc(%rip), %rax");
208       return;
209     }
210     // movq $x@tpoff(%rip),%rax
211     loc[-2] = 0xc7;
212     loc[-1] = 0xc0;
213     write32le(loc, val + 4);
214     // xchg ax,ax
215     loc[4] = 0x66;
216     loc[5] = 0x90;
217   }
218 }
219 
220 void X86_64::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const {
221   if (type == R_X86_64_TLSGD) {
222     // Convert
223     //   .byte 0x66
224     //   leaq x@tlsgd(%rip), %rdi
225     //   .word 0x6666
226     //   rex64
227     //   call __tls_get_addr@plt
228     // to the following two instructions.
229     const uint8_t inst[] = {
230         0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00,
231         0x00, 0x00,                            // mov %fs:0x0,%rax
232         0x48, 0x03, 0x05, 0,    0,    0,    0, // addq x@gottpoff(%rip),%rax
233     };
234     memcpy(loc - 4, inst, sizeof(inst));
235 
236     // Both code sequences are PC relatives, but since we are moving the
237     // constant forward by 8 bytes we have to subtract the value by 8.
238     write32le(loc + 8, val - 8);
239   } else {
240     // Convert
241     //   lea x@tlsgd(%rip), %rax
242     //   call *(%rax)
243     // to the following two instructions.
244     assert(type == R_X86_64_GOTPC32_TLSDESC);
245     if (memcmp(loc - 3, "\x48\x8d\x05", 3)) {
246       error(getErrorLocation(loc - 3) + "R_X86_64_GOTPC32_TLSDESC must be used "
247                                         "in callq *x@tlsdesc(%rip), %rax");
248       return;
249     }
250     // movq x@gottpoff(%rip),%rax
251     loc[-2] = 0x8b;
252     write32le(loc, val);
253     // xchg ax,ax
254     loc[4] = 0x66;
255     loc[5] = 0x90;
256   }
257 }
258 
259 // In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to
260 // R_X86_64_TPOFF32 so that it does not use GOT.
261 void X86_64::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const {
262   uint8_t *inst = loc - 3;
263   uint8_t reg = loc[-1] >> 3;
264   uint8_t *regSlot = loc - 1;
265 
266   // Note that ADD with RSP or R12 is converted to ADD instead of LEA
267   // because LEA with these registers needs 4 bytes to encode and thus
268   // wouldn't fit the space.
269 
270   if (memcmp(inst, "\x48\x03\x25", 3) == 0) {
271     // "addq foo@gottpoff(%rip),%rsp" -> "addq $foo,%rsp"
272     memcpy(inst, "\x48\x81\xc4", 3);
273   } else if (memcmp(inst, "\x4c\x03\x25", 3) == 0) {
274     // "addq foo@gottpoff(%rip),%r12" -> "addq $foo,%r12"
275     memcpy(inst, "\x49\x81\xc4", 3);
276   } else if (memcmp(inst, "\x4c\x03", 2) == 0) {
277     // "addq foo@gottpoff(%rip),%r[8-15]" -> "leaq foo(%r[8-15]),%r[8-15]"
278     memcpy(inst, "\x4d\x8d", 2);
279     *regSlot = 0x80 | (reg << 3) | reg;
280   } else if (memcmp(inst, "\x48\x03", 2) == 0) {
281     // "addq foo@gottpoff(%rip),%reg -> "leaq foo(%reg),%reg"
282     memcpy(inst, "\x48\x8d", 2);
283     *regSlot = 0x80 | (reg << 3) | reg;
284   } else if (memcmp(inst, "\x4c\x8b", 2) == 0) {
285     // "movq foo@gottpoff(%rip),%r[8-15]" -> "movq $foo,%r[8-15]"
286     memcpy(inst, "\x49\xc7", 2);
287     *regSlot = 0xc0 | reg;
288   } else if (memcmp(inst, "\x48\x8b", 2) == 0) {
289     // "movq foo@gottpoff(%rip),%reg" -> "movq $foo,%reg"
290     memcpy(inst, "\x48\xc7", 2);
291     *regSlot = 0xc0 | reg;
292   } else {
293     error(getErrorLocation(loc - 3) +
294           "R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only");
295   }
296 
297   // The original code used a PC relative relocation.
298   // Need to compensate for the -4 it had in the addend.
299   write32le(loc, val + 4);
300 }
301 
302 void X86_64::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const {
303   if (type == R_X86_64_DTPOFF64) {
304     write64le(loc, val);
305     return;
306   }
307   if (type == R_X86_64_DTPOFF32) {
308     write32le(loc, val);
309     return;
310   }
311 
312   const uint8_t inst[] = {
313       0x66, 0x66,                                           // .word 0x6666
314       0x66,                                                 // .byte 0x66
315       0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0,%rax
316   };
317 
318   if (loc[4] == 0xe8) {
319     // Convert
320     //   leaq bar@tlsld(%rip), %rdi           # 48 8d 3d <Loc>
321     //   callq __tls_get_addr@PLT             # e8 <disp32>
322     //   leaq bar@dtpoff(%rax), %rcx
323     // to
324     //   .word 0x6666
325     //   .byte 0x66
326     //   mov %fs:0,%rax
327     //   leaq bar@tpoff(%rax), %rcx
328     memcpy(loc - 3, inst, sizeof(inst));
329     return;
330   }
331 
332   if (loc[4] == 0xff && loc[5] == 0x15) {
333     // Convert
334     //   leaq  x@tlsld(%rip),%rdi               # 48 8d 3d <Loc>
335     //   call *__tls_get_addr@GOTPCREL(%rip)    # ff 15 <disp32>
336     // to
337     //   .long  0x66666666
338     //   movq   %fs:0,%rax
339     // See "Table 11.9: LD -> LE Code Transition (LP64)" in
340     // https://raw.githubusercontent.com/wiki/hjl-tools/x86-psABI/x86-64-psABI-1.0.pdf
341     loc[-3] = 0x66;
342     memcpy(loc - 2, inst, sizeof(inst));
343     return;
344   }
345 
346   error(getErrorLocation(loc - 3) +
347         "expected R_X86_64_PLT32 or R_X86_64_GOTPCRELX after R_X86_64_TLSLD");
348 }
349 
350 void X86_64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
351   switch (type) {
352   case R_X86_64_8:
353     checkIntUInt(loc, val, 8, type);
354     *loc = val;
355     break;
356   case R_X86_64_PC8:
357     checkInt(loc, val, 8, type);
358     *loc = val;
359     break;
360   case R_X86_64_16:
361     checkIntUInt(loc, val, 16, type);
362     write16le(loc, val);
363     break;
364   case R_X86_64_PC16:
365     checkInt(loc, val, 16, type);
366     write16le(loc, val);
367     break;
368   case R_X86_64_32:
369     checkUInt(loc, val, 32, type);
370     write32le(loc, val);
371     break;
372   case R_X86_64_32S:
373   case R_X86_64_TPOFF32:
374   case R_X86_64_GOT32:
375   case R_X86_64_GOTPC32:
376   case R_X86_64_GOTPC32_TLSDESC:
377   case R_X86_64_GOTPCREL:
378   case R_X86_64_GOTPCRELX:
379   case R_X86_64_REX_GOTPCRELX:
380   case R_X86_64_PC32:
381   case R_X86_64_GOTTPOFF:
382   case R_X86_64_PLT32:
383   case R_X86_64_TLSGD:
384   case R_X86_64_TLSLD:
385   case R_X86_64_DTPOFF32:
386   case R_X86_64_SIZE32:
387     checkInt(loc, val, 32, type);
388     write32le(loc, val);
389     break;
390   case R_X86_64_64:
391   case R_X86_64_DTPOFF64:
392   case R_X86_64_PC64:
393   case R_X86_64_SIZE64:
394   case R_X86_64_GOT64:
395   case R_X86_64_GOTOFF64:
396   case R_X86_64_GOTPC64:
397     write64le(loc, val);
398     break;
399   default:
400     llvm_unreachable("unknown relocation");
401   }
402 }
403 
404 RelExpr X86_64::adjustRelaxExpr(RelType type, const uint8_t *data,
405                                 RelExpr relExpr) const {
406   if (type != R_X86_64_GOTPCRELX && type != R_X86_64_REX_GOTPCRELX)
407     return relExpr;
408   const uint8_t op = data[-2];
409   const uint8_t modRm = data[-1];
410 
411   // FIXME: When PIC is disabled and foo is defined locally in the
412   // lower 32 bit address space, memory operand in mov can be converted into
413   // immediate operand. Otherwise, mov must be changed to lea. We support only
414   // latter relaxation at this moment.
415   if (op == 0x8b)
416     return R_RELAX_GOT_PC;
417 
418   // Relax call and jmp.
419   if (op == 0xff && (modRm == 0x15 || modRm == 0x25))
420     return R_RELAX_GOT_PC;
421 
422   // Relaxation of test, adc, add, and, cmp, or, sbb, sub, xor.
423   // If PIC then no relaxation is available.
424   // We also don't relax test/binop instructions without REX byte,
425   // they are 32bit operations and not common to have.
426   assert(type == R_X86_64_REX_GOTPCRELX);
427   return config->isPic ? relExpr : R_RELAX_GOT_PC_NOPIC;
428 }
429 
430 // A subset of relaxations can only be applied for no-PIC. This method
431 // handles such relaxations. Instructions encoding information was taken from:
432 // "Intel 64 and IA-32 Architectures Software Developer's Manual V2"
433 // (http://www.intel.com/content/dam/www/public/us/en/documents/manuals/
434 //    64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf)
435 static void relaxGotNoPic(uint8_t *loc, uint64_t val, uint8_t op,
436                           uint8_t modRm) {
437   const uint8_t rex = loc[-3];
438   // Convert "test %reg, foo@GOTPCREL(%rip)" to "test $foo, %reg".
439   if (op == 0x85) {
440     // See "TEST-Logical Compare" (4-428 Vol. 2B),
441     // TEST r/m64, r64 uses "full" ModR / M byte (no opcode extension).
442 
443     // ModR/M byte has form XX YYY ZZZ, where
444     // YYY is MODRM.reg(register 2), ZZZ is MODRM.rm(register 1).
445     // XX has different meanings:
446     // 00: The operand's memory address is in reg1.
447     // 01: The operand's memory address is reg1 + a byte-sized displacement.
448     // 10: The operand's memory address is reg1 + a word-sized displacement.
449     // 11: The operand is reg1 itself.
450     // If an instruction requires only one operand, the unused reg2 field
451     // holds extra opcode bits rather than a register code
452     // 0xC0 == 11 000 000 binary.
453     // 0x38 == 00 111 000 binary.
454     // We transfer reg2 to reg1 here as operand.
455     // See "2.1.3 ModR/M and SIB Bytes" (Vol. 2A 2-3).
456     loc[-1] = 0xc0 | (modRm & 0x38) >> 3; // ModR/M byte.
457 
458     // Change opcode from TEST r/m64, r64 to TEST r/m64, imm32
459     // See "TEST-Logical Compare" (4-428 Vol. 2B).
460     loc[-2] = 0xf7;
461 
462     // Move R bit to the B bit in REX byte.
463     // REX byte is encoded as 0100WRXB, where
464     // 0100 is 4bit fixed pattern.
465     // REX.W When 1, a 64-bit operand size is used. Otherwise, when 0, the
466     //   default operand size is used (which is 32-bit for most but not all
467     //   instructions).
468     // REX.R This 1-bit value is an extension to the MODRM.reg field.
469     // REX.X This 1-bit value is an extension to the SIB.index field.
470     // REX.B This 1-bit value is an extension to the MODRM.rm field or the
471     // SIB.base field.
472     // See "2.2.1.2 More on REX Prefix Fields " (2-8 Vol. 2A).
473     loc[-3] = (rex & ~0x4) | (rex & 0x4) >> 2;
474     write32le(loc, val);
475     return;
476   }
477 
478   // If we are here then we need to relax the adc, add, and, cmp, or, sbb, sub
479   // or xor operations.
480 
481   // Convert "binop foo@GOTPCREL(%rip), %reg" to "binop $foo, %reg".
482   // Logic is close to one for test instruction above, but we also
483   // write opcode extension here, see below for details.
484   loc[-1] = 0xc0 | (modRm & 0x38) >> 3 | (op & 0x3c); // ModR/M byte.
485 
486   // Primary opcode is 0x81, opcode extension is one of:
487   // 000b = ADD, 001b is OR, 010b is ADC, 011b is SBB,
488   // 100b is AND, 101b is SUB, 110b is XOR, 111b is CMP.
489   // This value was wrote to MODRM.reg in a line above.
490   // See "3.2 INSTRUCTIONS (A-M)" (Vol. 2A 3-15),
491   // "INSTRUCTION SET REFERENCE, N-Z" (Vol. 2B 4-1) for
492   // descriptions about each operation.
493   loc[-2] = 0x81;
494   loc[-3] = (rex & ~0x4) | (rex & 0x4) >> 2;
495   write32le(loc, val);
496 }
497 
498 void X86_64::relaxGot(uint8_t *loc, RelType type, uint64_t val) const {
499   const uint8_t op = loc[-2];
500   const uint8_t modRm = loc[-1];
501 
502   // Convert "mov foo@GOTPCREL(%rip),%reg" to "lea foo(%rip),%reg".
503   if (op == 0x8b) {
504     loc[-2] = 0x8d;
505     write32le(loc, val);
506     return;
507   }
508 
509   if (op != 0xff) {
510     // We are relaxing a rip relative to an absolute, so compensate
511     // for the old -4 addend.
512     assert(!config->isPic);
513     relaxGotNoPic(loc, val + 4, op, modRm);
514     return;
515   }
516 
517   // Convert call/jmp instructions.
518   if (modRm == 0x15) {
519     // ABI says we can convert "call *foo@GOTPCREL(%rip)" to "nop; call foo".
520     // Instead we convert to "addr32 call foo" where addr32 is an instruction
521     // prefix. That makes result expression to be a single instruction.
522     loc[-2] = 0x67; // addr32 prefix
523     loc[-1] = 0xe8; // call
524     write32le(loc, val);
525     return;
526   }
527 
528   // Convert "jmp *foo@GOTPCREL(%rip)" to "jmp foo; nop".
529   // jmp doesn't return, so it is fine to use nop here, it is just a stub.
530   assert(modRm == 0x25);
531   loc[-2] = 0xe9; // jmp
532   loc[3] = 0x90;  // nop
533   write32le(loc - 1, val + 1);
534 }
535 
536 // A split-stack prologue starts by checking the amount of stack remaining
537 // in one of two ways:
538 // A) Comparing of the stack pointer to a field in the tcb.
539 // B) Or a load of a stack pointer offset with an lea to r10 or r11.
540 bool X86_64::adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end,
541                                               uint8_t stOther) const {
542   if (!config->is64) {
543     error("Target doesn't support split stacks.");
544     return false;
545   }
546 
547   if (loc + 8 >= end)
548     return false;
549 
550   // Replace "cmp %fs:0x70,%rsp" and subsequent branch
551   // with "stc, nopl 0x0(%rax,%rax,1)"
552   if (memcmp(loc, "\x64\x48\x3b\x24\x25", 5) == 0) {
553     memcpy(loc, "\xf9\x0f\x1f\x84\x00\x00\x00\x00", 8);
554     return true;
555   }
556 
557   // Adjust "lea X(%rsp),%rYY" to lea "(X - 0x4000)(%rsp),%rYY" where rYY could
558   // be r10 or r11. The lea instruction feeds a subsequent compare which checks
559   // if there is X available stack space. Making X larger effectively reserves
560   // that much additional space. The stack grows downward so subtract the value.
561   if (memcmp(loc, "\x4c\x8d\x94\x24", 4) == 0 ||
562       memcmp(loc, "\x4c\x8d\x9c\x24", 4) == 0) {
563     // The offset bytes are encoded four bytes after the start of the
564     // instruction.
565     write32le(loc + 4, read32le(loc + 4) - 0x4000);
566     return true;
567   }
568   return false;
569 }
570 
571 // If Intel Indirect Branch Tracking is enabled, we have to emit special PLT
572 // entries containing endbr64 instructions. A PLT entry will be split into two
573 // parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt).
574 namespace {
575 class IntelIBT : public X86_64 {
576 public:
577   IntelIBT();
578   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
579   void writePlt(uint8_t *buf, const Symbol &sym,
580                 uint64_t pltEntryAddr) const override;
581   void writeIBTPlt(uint8_t *buf, size_t numEntries) const override;
582 
583   static const unsigned IBTPltHeaderSize = 16;
584 };
585 } // namespace
586 
587 IntelIBT::IntelIBT() { pltHeaderSize = 0; }
588 
589 void IntelIBT::writeGotPlt(uint8_t *buf, const Symbol &s) const {
590   uint64_t va =
591       in.ibtPlt->getVA() + IBTPltHeaderSize + s.pltIndex * pltEntrySize;
592   write64le(buf, va);
593 }
594 
595 void IntelIBT::writePlt(uint8_t *buf, const Symbol &sym,
596                         uint64_t pltEntryAddr) const {
597   const uint8_t Inst[] = {
598       0xf3, 0x0f, 0x1e, 0xfa,       // endbr64
599       0xff, 0x25, 0,    0,    0, 0, // jmpq *got(%rip)
600       0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
601   };
602   memcpy(buf, Inst, sizeof(Inst));
603   write32le(buf + 6, sym.getGotPltVA() - pltEntryAddr - 10);
604 }
605 
606 void IntelIBT::writeIBTPlt(uint8_t *buf, size_t numEntries) const {
607   writePltHeader(buf);
608   buf += IBTPltHeaderSize;
609 
610   const uint8_t inst[] = {
611       0xf3, 0x0f, 0x1e, 0xfa,    // endbr64
612       0x68, 0,    0,    0,    0, // pushq <relocation index>
613       0xe9, 0,    0,    0,    0, // jmpq plt[0]
614       0x66, 0x90,                // nop
615   };
616 
617   for (size_t i = 0; i < numEntries; ++i) {
618     memcpy(buf, inst, sizeof(inst));
619     write32le(buf + 5, i);
620     write32le(buf + 10, -pltHeaderSize - sizeof(inst) * i - 30);
621     buf += sizeof(inst);
622   }
623 }
624 
625 // These nonstandard PLT entries are to migtigate Spectre v2 security
626 // vulnerability. In order to mitigate Spectre v2, we want to avoid indirect
627 // branch instructions such as `jmp *GOTPLT(%rip)`. So, in the following PLT
628 // entries, we use a CALL followed by MOV and RET to do the same thing as an
629 // indirect jump. That instruction sequence is so-called "retpoline".
630 //
631 // We have two types of retpoline PLTs as a size optimization. If `-z now`
632 // is specified, all dynamic symbols are resolved at load-time. Thus, when
633 // that option is given, we can omit code for symbol lazy resolution.
634 namespace {
635 class Retpoline : public X86_64 {
636 public:
637   Retpoline();
638   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
639   void writePltHeader(uint8_t *buf) const override;
640   void writePlt(uint8_t *buf, const Symbol &sym,
641                 uint64_t pltEntryAddr) const override;
642 };
643 
644 class RetpolineZNow : public X86_64 {
645 public:
646   RetpolineZNow();
647   void writeGotPlt(uint8_t *buf, const Symbol &s) const override {}
648   void writePltHeader(uint8_t *buf) const override;
649   void writePlt(uint8_t *buf, const Symbol &sym,
650                 uint64_t pltEntryAddr) const override;
651 };
652 } // namespace
653 
654 Retpoline::Retpoline() {
655   pltHeaderSize = 48;
656   pltEntrySize = 32;
657   ipltEntrySize = 32;
658 }
659 
660 void Retpoline::writeGotPlt(uint8_t *buf, const Symbol &s) const {
661   write64le(buf, s.getPltVA() + 17);
662 }
663 
664 void Retpoline::writePltHeader(uint8_t *buf) const {
665   const uint8_t insn[] = {
666       0xff, 0x35, 0,    0,    0,    0,          // 0:    pushq GOTPLT+8(%rip)
667       0x4c, 0x8b, 0x1d, 0,    0,    0,    0,    // 6:    mov GOTPLT+16(%rip), %r11
668       0xe8, 0x0e, 0x00, 0x00, 0x00,             // d:    callq next
669       0xf3, 0x90,                               // 12: loop: pause
670       0x0f, 0xae, 0xe8,                         // 14:   lfence
671       0xeb, 0xf9,                               // 17:   jmp loop
672       0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19:   int3; .align 16
673       0x4c, 0x89, 0x1c, 0x24,                   // 20: next: mov %r11, (%rsp)
674       0xc3,                                     // 24:   ret
675       0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 25:   int3; padding
676       0xcc, 0xcc, 0xcc, 0xcc,                   // 2c:   int3; padding
677   };
678   memcpy(buf, insn, sizeof(insn));
679 
680   uint64_t gotPlt = in.gotPlt->getVA();
681   uint64_t plt = in.plt->getVA();
682   write32le(buf + 2, gotPlt - plt - 6 + 8);
683   write32le(buf + 9, gotPlt - plt - 13 + 16);
684 }
685 
686 void Retpoline::writePlt(uint8_t *buf, const Symbol &sym,
687                          uint64_t pltEntryAddr) const {
688   const uint8_t insn[] = {
689       0x4c, 0x8b, 0x1d, 0, 0, 0, 0, // 0:  mov foo@GOTPLT(%rip), %r11
690       0xe8, 0,    0,    0,    0,    // 7:  callq plt+0x20
691       0xe9, 0,    0,    0,    0,    // c:  jmp plt+0x12
692       0x68, 0,    0,    0,    0,    // 11: pushq <relocation index>
693       0xe9, 0,    0,    0,    0,    // 16: jmp plt+0
694       0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1b: int3; padding
695   };
696   memcpy(buf, insn, sizeof(insn));
697 
698   uint64_t off = pltEntryAddr - in.plt->getVA();
699 
700   write32le(buf + 3, sym.getGotPltVA() - pltEntryAddr - 7);
701   write32le(buf + 8, -off - 12 + 32);
702   write32le(buf + 13, -off - 17 + 18);
703   write32le(buf + 18, sym.pltIndex);
704   write32le(buf + 23, -off - 27);
705 }
706 
707 RetpolineZNow::RetpolineZNow() {
708   pltHeaderSize = 32;
709   pltEntrySize = 16;
710   ipltEntrySize = 16;
711 }
712 
713 void RetpolineZNow::writePltHeader(uint8_t *buf) const {
714   const uint8_t insn[] = {
715       0xe8, 0x0b, 0x00, 0x00, 0x00, // 0:    call next
716       0xf3, 0x90,                   // 5:  loop: pause
717       0x0f, 0xae, 0xe8,             // 7:    lfence
718       0xeb, 0xf9,                   // a:    jmp loop
719       0xcc, 0xcc, 0xcc, 0xcc,       // c:    int3; .align 16
720       0x4c, 0x89, 0x1c, 0x24,       // 10: next: mov %r11, (%rsp)
721       0xc3,                         // 14:   ret
722       0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 15:   int3; padding
723       0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a:   int3; padding
724       0xcc,                         // 1f:   int3; padding
725   };
726   memcpy(buf, insn, sizeof(insn));
727 }
728 
729 void RetpolineZNow::writePlt(uint8_t *buf, const Symbol &sym,
730                              uint64_t pltEntryAddr) const {
731   const uint8_t insn[] = {
732       0x4c, 0x8b, 0x1d, 0,    0, 0, 0, // mov foo@GOTPLT(%rip), %r11
733       0xe9, 0,    0,    0,    0,       // jmp plt+0
734       0xcc, 0xcc, 0xcc, 0xcc,          // int3; padding
735   };
736   memcpy(buf, insn, sizeof(insn));
737 
738   write32le(buf + 3, sym.getGotPltVA() - pltEntryAddr - 7);
739   write32le(buf + 8, in.plt->getVA() - pltEntryAddr - 12);
740 }
741 
742 static TargetInfo *getTargetInfo() {
743   if (config->zRetpolineplt) {
744     if (config->zNow) {
745       static RetpolineZNow t;
746       return &t;
747     }
748     static Retpoline t;
749     return &t;
750   }
751 
752   if (config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT) {
753     static IntelIBT t;
754     return &t;
755   }
756 
757   static X86_64 t;
758   return &t;
759 }
760 
761 TargetInfo *getX86_64TargetInfo() { return getTargetInfo(); }
762 
763 } // namespace elf
764 } // namespace lld
765