xref: /freebsd/contrib/llvm-project/lld/ELF/Arch/X86.cpp (revision 29fc4075e69fd27de0cded313ac6000165d99f8b)
1 //===- X86.cpp ------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "InputFiles.h"
10 #include "Symbols.h"
11 #include "SyntheticSections.h"
12 #include "Target.h"
13 #include "lld/Common/ErrorHandler.h"
14 #include "llvm/Support/Endian.h"
15 
16 using namespace llvm;
17 using namespace llvm::support::endian;
18 using namespace llvm::ELF;
19 using namespace lld;
20 using namespace lld::elf;
21 
22 namespace {
23 class X86 : public TargetInfo {
24 public:
25   X86();
26   int getTlsGdRelaxSkip(RelType type) const override;
27   RelExpr getRelExpr(RelType type, const Symbol &s,
28                      const uint8_t *loc) const override;
29   int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
30   void writeGotPltHeader(uint8_t *buf) const override;
31   RelType getDynRel(RelType type) const override;
32   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
33   void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
34   void writePltHeader(uint8_t *buf) const override;
35   void writePlt(uint8_t *buf, const Symbol &sym,
36                 uint64_t pltEntryAddr) const override;
37   void relocate(uint8_t *loc, const Relocation &rel,
38                 uint64_t val) const override;
39 
40   RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
41   void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
42                       uint64_t val) const override;
43   void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
44                       uint64_t val) const override;
45   void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
46                       uint64_t val) const override;
47   void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
48                       uint64_t val) const override;
49 };
50 } // namespace
51 
52 X86::X86() {
53   copyRel = R_386_COPY;
54   gotRel = R_386_GLOB_DAT;
55   pltRel = R_386_JUMP_SLOT;
56   iRelativeRel = R_386_IRELATIVE;
57   relativeRel = R_386_RELATIVE;
58   symbolicRel = R_386_32;
59   tlsDescRel = R_386_TLS_DESC;
60   tlsGotRel = R_386_TLS_TPOFF;
61   tlsModuleIndexRel = R_386_TLS_DTPMOD32;
62   tlsOffsetRel = R_386_TLS_DTPOFF32;
63   gotBaseSymInGotPlt = true;
64   pltHeaderSize = 16;
65   pltEntrySize = 16;
66   ipltEntrySize = 16;
67   trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3
68 
69   // Align to the non-PAE large page size (known as a superpage or huge page).
70   // FreeBSD automatically promotes large, superpage-aligned allocations.
71   defaultImageBase = 0x400000;
72 }
73 
74 int X86::getTlsGdRelaxSkip(RelType type) const {
75   // TLSDESC relocations are processed separately. See relaxTlsGdToLe below.
76   return type == R_386_TLS_GOTDESC || type == R_386_TLS_DESC_CALL ? 1 : 2;
77 }
78 
79 RelExpr X86::getRelExpr(RelType type, const Symbol &s,
80                         const uint8_t *loc) const {
81   if (type == R_386_TLS_IE || type == R_386_TLS_GOTIE)
82     config->hasTlsIe = true;
83 
84   switch (type) {
85   case R_386_8:
86   case R_386_16:
87   case R_386_32:
88     return R_ABS;
89   case R_386_TLS_LDO_32:
90     return R_DTPREL;
91   case R_386_TLS_GD:
92     return R_TLSGD_GOTPLT;
93   case R_386_TLS_LDM:
94     return R_TLSLD_GOTPLT;
95   case R_386_PLT32:
96     return R_PLT_PC;
97   case R_386_PC8:
98   case R_386_PC16:
99   case R_386_PC32:
100     return R_PC;
101   case R_386_GOTPC:
102     return R_GOTPLTONLY_PC;
103   case R_386_TLS_IE:
104     return R_GOT;
105   case R_386_GOT32:
106   case R_386_GOT32X:
107     // These relocations are arguably mis-designed because their calculations
108     // depend on the instructions they are applied to. This is bad because we
109     // usually don't care about whether the target section contains valid
110     // machine instructions or not. But this is part of the documented ABI, so
111     // we had to implement as the standard requires.
112     //
113     // x86 does not support PC-relative data access. Therefore, in order to
114     // access GOT contents, a GOT address needs to be known at link-time
115     // (which means non-PIC) or compilers have to emit code to get a GOT
116     // address at runtime (which means code is position-independent but
117     // compilers need to emit extra code for each GOT access.) This decision
118     // is made at compile-time. In the latter case, compilers emit code to
119     // load a GOT address to a register, which is usually %ebx.
120     //
121     // So, there are two ways to refer to symbol foo's GOT entry: foo@GOT or
122     // foo@GOT(%ebx).
123     //
124     // foo@GOT is not usable in PIC. If we are creating a PIC output and if we
125     // find such relocation, we should report an error. foo@GOT is resolved to
126     // an *absolute* address of foo's GOT entry, because both GOT address and
127     // foo's offset are known. In other words, it's G + A.
128     //
129     // foo@GOT(%ebx) needs to be resolved to a *relative* offset from a GOT to
130     // foo's GOT entry in the table, because GOT address is not known but foo's
131     // offset in the table is known. It's G + A - GOT.
132     //
133     // It's unfortunate that compilers emit the same relocation for these
134     // different use cases. In order to distinguish them, we have to read a
135     // machine instruction.
136     //
137     // The following code implements it. We assume that Loc[0] is the first byte
138     // of a displacement or an immediate field of a valid machine
139     // instruction. That means a ModRM byte is at Loc[-1]. By taking a look at
140     // the byte, we can determine whether the instruction uses the operand as an
141     // absolute address (R_GOT) or a register-relative address (R_GOTPLT).
142     return (loc[-1] & 0xc7) == 0x5 ? R_GOT : R_GOTPLT;
143   case R_386_TLS_GOTDESC:
144     return R_TLSDESC_GOTPLT;
145   case R_386_TLS_DESC_CALL:
146     return R_TLSDESC_CALL;
147   case R_386_TLS_GOTIE:
148     return R_GOTPLT;
149   case R_386_GOTOFF:
150     return R_GOTPLTREL;
151   case R_386_TLS_LE:
152     return R_TPREL;
153   case R_386_TLS_LE_32:
154     return R_TPREL_NEG;
155   case R_386_NONE:
156     return R_NONE;
157   default:
158     error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +
159           ") against symbol " + toString(s));
160     return R_NONE;
161   }
162 }
163 
164 RelExpr X86::adjustTlsExpr(RelType type, RelExpr expr) const {
165   switch (expr) {
166   default:
167     return expr;
168   case R_RELAX_TLS_GD_TO_IE:
169     return R_RELAX_TLS_GD_TO_IE_GOTPLT;
170   case R_RELAX_TLS_GD_TO_LE:
171     return type == R_386_TLS_GD ? R_RELAX_TLS_GD_TO_LE_NEG
172                                 : R_RELAX_TLS_GD_TO_LE;
173   }
174 }
175 
176 void X86::writeGotPltHeader(uint8_t *buf) const {
177   write32le(buf, mainPart->dynamic->getVA());
178 }
179 
180 void X86::writeGotPlt(uint8_t *buf, const Symbol &s) const {
181   // Entries in .got.plt initially points back to the corresponding
182   // PLT entries with a fixed offset to skip the first instruction.
183   write32le(buf, s.getPltVA() + 6);
184 }
185 
186 void X86::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
187   // An x86 entry is the address of the ifunc resolver function.
188   write32le(buf, s.getVA());
189 }
190 
191 RelType X86::getDynRel(RelType type) const {
192   if (type == R_386_TLS_LE)
193     return R_386_TLS_TPOFF;
194   if (type == R_386_TLS_LE_32)
195     return R_386_TLS_TPOFF32;
196   return type;
197 }
198 
199 void X86::writePltHeader(uint8_t *buf) const {
200   if (config->isPic) {
201     const uint8_t v[] = {
202         0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl 4(%ebx)
203         0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *8(%ebx)
204         0x90, 0x90, 0x90, 0x90              // nop
205     };
206     memcpy(buf, v, sizeof(v));
207     return;
208   }
209 
210   const uint8_t pltData[] = {
211       0xff, 0x35, 0, 0, 0, 0, // pushl (GOTPLT+4)
212       0xff, 0x25, 0, 0, 0, 0, // jmp *(GOTPLT+8)
213       0x90, 0x90, 0x90, 0x90, // nop
214   };
215   memcpy(buf, pltData, sizeof(pltData));
216   uint32_t gotPlt = in.gotPlt->getVA();
217   write32le(buf + 2, gotPlt + 4);
218   write32le(buf + 8, gotPlt + 8);
219 }
220 
221 void X86::writePlt(uint8_t *buf, const Symbol &sym,
222                    uint64_t pltEntryAddr) const {
223   unsigned relOff = in.relaPlt->entsize * sym.getPltIdx();
224   if (config->isPic) {
225     const uint8_t inst[] = {
226         0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx)
227         0x68, 0,    0, 0, 0,    // pushl $reloc_offset
228         0xe9, 0,    0, 0, 0,    // jmp .PLT0@PC
229     };
230     memcpy(buf, inst, sizeof(inst));
231     write32le(buf + 2, sym.getGotPltVA() - in.gotPlt->getVA());
232   } else {
233     const uint8_t inst[] = {
234         0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT
235         0x68, 0,    0, 0, 0,    // pushl $reloc_offset
236         0xe9, 0,    0, 0, 0,    // jmp .PLT0@PC
237     };
238     memcpy(buf, inst, sizeof(inst));
239     write32le(buf + 2, sym.getGotPltVA());
240   }
241 
242   write32le(buf + 7, relOff);
243   write32le(buf + 12, in.plt->getVA() - pltEntryAddr - 16);
244 }
245 
246 int64_t X86::getImplicitAddend(const uint8_t *buf, RelType type) const {
247   switch (type) {
248   case R_386_8:
249   case R_386_PC8:
250     return SignExtend64<8>(*buf);
251   case R_386_16:
252   case R_386_PC16:
253     return SignExtend64<16>(read16le(buf));
254   case R_386_32:
255   case R_386_GLOB_DAT:
256   case R_386_GOT32:
257   case R_386_GOT32X:
258   case R_386_GOTOFF:
259   case R_386_GOTPC:
260   case R_386_IRELATIVE:
261   case R_386_PC32:
262   case R_386_PLT32:
263   case R_386_RELATIVE:
264   case R_386_TLS_GOTDESC:
265   case R_386_TLS_DESC_CALL:
266   case R_386_TLS_DTPMOD32:
267   case R_386_TLS_DTPOFF32:
268   case R_386_TLS_LDO_32:
269   case R_386_TLS_LDM:
270   case R_386_TLS_IE:
271   case R_386_TLS_IE_32:
272   case R_386_TLS_LE:
273   case R_386_TLS_LE_32:
274   case R_386_TLS_GD:
275   case R_386_TLS_GD_32:
276   case R_386_TLS_GOTIE:
277   case R_386_TLS_TPOFF:
278   case R_386_TLS_TPOFF32:
279     return SignExtend64<32>(read32le(buf));
280   case R_386_TLS_DESC:
281     return SignExtend64<32>(read32le(buf + 4));
282   case R_386_NONE:
283   case R_386_JUMP_SLOT:
284     // These relocations are defined as not having an implicit addend.
285     return 0;
286   default:
287     internalLinkerError(getErrorLocation(buf),
288                         "cannot read addend for relocation " + toString(type));
289     return 0;
290   }
291 }
292 
293 void X86::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
294   switch (rel.type) {
295   case R_386_8:
296     // R_386_{PC,}{8,16} are not part of the i386 psABI, but they are
297     // being used for some 16-bit programs such as boot loaders, so
298     // we want to support them.
299     checkIntUInt(loc, val, 8, rel);
300     *loc = val;
301     break;
302   case R_386_PC8:
303     checkInt(loc, val, 8, rel);
304     *loc = val;
305     break;
306   case R_386_16:
307     checkIntUInt(loc, val, 16, rel);
308     write16le(loc, val);
309     break;
310   case R_386_PC16:
311     // R_386_PC16 is normally used with 16 bit code. In that situation
312     // the PC is 16 bits, just like the addend. This means that it can
313     // point from any 16 bit address to any other if the possibility
314     // of wrapping is included.
315     // The only restriction we have to check then is that the destination
316     // address fits in 16 bits. That is impossible to do here. The problem is
317     // that we are passed the final value, which already had the
318     // current location subtracted from it.
319     // We just check that Val fits in 17 bits. This misses some cases, but
320     // should have no false positives.
321     checkInt(loc, val, 17, rel);
322     write16le(loc, val);
323     break;
324   case R_386_32:
325   case R_386_GOT32:
326   case R_386_GOT32X:
327   case R_386_GOTOFF:
328   case R_386_GOTPC:
329   case R_386_PC32:
330   case R_386_PLT32:
331   case R_386_RELATIVE:
332   case R_386_TLS_GOTDESC:
333   case R_386_TLS_DESC_CALL:
334   case R_386_TLS_DTPMOD32:
335   case R_386_TLS_DTPOFF32:
336   case R_386_TLS_GD:
337   case R_386_TLS_GOTIE:
338   case R_386_TLS_IE:
339   case R_386_TLS_LDM:
340   case R_386_TLS_LDO_32:
341   case R_386_TLS_LE:
342   case R_386_TLS_LE_32:
343   case R_386_TLS_TPOFF:
344   case R_386_TLS_TPOFF32:
345     checkInt(loc, val, 32, rel);
346     write32le(loc, val);
347     break;
348   case R_386_TLS_DESC:
349     // The addend is stored in the second 32-bit word.
350     write32le(loc + 4, val);
351     break;
352   default:
353     llvm_unreachable("unknown relocation");
354   }
355 }
356 
357 void X86::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
358                          uint64_t val) const {
359   if (rel.type == R_386_TLS_GD) {
360     // Convert
361     //   leal x@tlsgd(, %ebx, 1), %eax
362     //   call __tls_get_addr@plt
363     // to
364     //   movl %gs:0, %eax
365     //   subl $x@tpoff, %eax
366     const uint8_t inst[] = {
367         0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
368         0x81, 0xe8, 0,    0,    0,    0,    // subl val(%ebx), %eax
369     };
370     memcpy(loc - 3, inst, sizeof(inst));
371     write32le(loc + 5, val);
372   } else if (rel.type == R_386_TLS_GOTDESC) {
373     // Convert leal x@tlsdesc(%ebx), %eax to leal x@ntpoff, %eax.
374     //
375     // Note: call *x@tlsdesc(%eax) may not immediately follow this instruction.
376     if (memcmp(loc - 2, "\x8d\x83", 2)) {
377       error(getErrorLocation(loc - 2) +
378             "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax");
379       return;
380     }
381     loc[-1] = 0x05;
382     write32le(loc, val);
383   } else {
384     // Convert call *x@tlsdesc(%eax) to xchg ax, ax.
385     assert(rel.type == R_386_TLS_DESC_CALL);
386     loc[0] = 0x66;
387     loc[1] = 0x90;
388   }
389 }
390 
391 void X86::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
392                          uint64_t val) const {
393   if (rel.type == R_386_TLS_GD) {
394     // Convert
395     //   leal x@tlsgd(, %ebx, 1), %eax
396     //   call __tls_get_addr@plt
397     // to
398     //   movl %gs:0, %eax
399     //   addl x@gotntpoff(%ebx), %eax
400     const uint8_t inst[] = {
401         0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
402         0x03, 0x83, 0,    0,    0,    0,    // addl val(%ebx), %eax
403     };
404     memcpy(loc - 3, inst, sizeof(inst));
405     write32le(loc + 5, val);
406   } else if (rel.type == R_386_TLS_GOTDESC) {
407     // Convert leal x@tlsdesc(%ebx), %eax to movl x@gotntpoff(%ebx), %eax.
408     if (memcmp(loc - 2, "\x8d\x83", 2)) {
409       error(getErrorLocation(loc - 2) +
410             "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax");
411       return;
412     }
413     loc[-2] = 0x8b;
414     write32le(loc, val);
415   } else {
416     // Convert call *x@tlsdesc(%eax) to xchg ax, ax.
417     assert(rel.type == R_386_TLS_DESC_CALL);
418     loc[0] = 0x66;
419     loc[1] = 0x90;
420   }
421 }
422 
423 // In some conditions, relocations can be optimized to avoid using GOT.
424 // This function does that for Initial Exec to Local Exec case.
425 void X86::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
426                          uint64_t val) const {
427   // Ulrich's document section 6.2 says that @gotntpoff can
428   // be used with MOVL or ADDL instructions.
429   // @indntpoff is similar to @gotntpoff, but for use in
430   // position dependent code.
431   uint8_t reg = (loc[-1] >> 3) & 7;
432 
433   if (rel.type == R_386_TLS_IE) {
434     if (loc[-1] == 0xa1) {
435       // "movl foo@indntpoff,%eax" -> "movl $foo,%eax"
436       // This case is different from the generic case below because
437       // this is a 5 byte instruction while below is 6 bytes.
438       loc[-1] = 0xb8;
439     } else if (loc[-2] == 0x8b) {
440       // "movl foo@indntpoff,%reg" -> "movl $foo,%reg"
441       loc[-2] = 0xc7;
442       loc[-1] = 0xc0 | reg;
443     } else {
444       // "addl foo@indntpoff,%reg" -> "addl $foo,%reg"
445       loc[-2] = 0x81;
446       loc[-1] = 0xc0 | reg;
447     }
448   } else {
449     assert(rel.type == R_386_TLS_GOTIE);
450     if (loc[-2] == 0x8b) {
451       // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg"
452       loc[-2] = 0xc7;
453       loc[-1] = 0xc0 | reg;
454     } else {
455       // "addl foo@gotntpoff(%rip),%reg" -> "leal foo(%reg),%reg"
456       loc[-2] = 0x8d;
457       loc[-1] = 0x80 | (reg << 3) | reg;
458     }
459   }
460   write32le(loc, val);
461 }
462 
463 void X86::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
464                          uint64_t val) const {
465   if (rel.type == R_386_TLS_LDO_32) {
466     write32le(loc, val);
467     return;
468   }
469 
470   // Convert
471   //   leal foo(%reg),%eax
472   //   call ___tls_get_addr
473   // to
474   //   movl %gs:0,%eax
475   //   nop
476   //   leal 0(%esi,1),%esi
477   const uint8_t inst[] = {
478       0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
479       0x90,                               // nop
480       0x8d, 0x74, 0x26, 0x00,             // leal 0(%esi,1),%esi
481   };
482   memcpy(loc - 2, inst, sizeof(inst));
483 }
484 
485 // If Intel Indirect Branch Tracking is enabled, we have to emit special PLT
486 // entries containing endbr32 instructions. A PLT entry will be split into two
487 // parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt).
488 namespace {
489 class IntelIBT : public X86 {
490 public:
491   IntelIBT();
492   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
493   void writePlt(uint8_t *buf, const Symbol &sym,
494                 uint64_t pltEntryAddr) const override;
495   void writeIBTPlt(uint8_t *buf, size_t numEntries) const override;
496 
497   static const unsigned IBTPltHeaderSize = 16;
498 };
499 } // namespace
500 
501 IntelIBT::IntelIBT() { pltHeaderSize = 0; }
502 
503 void IntelIBT::writeGotPlt(uint8_t *buf, const Symbol &s) const {
504   uint64_t va =
505       in.ibtPlt->getVA() + IBTPltHeaderSize + s.getPltIdx() * pltEntrySize;
506   write32le(buf, va);
507 }
508 
509 void IntelIBT::writePlt(uint8_t *buf, const Symbol &sym,
510                         uint64_t /*pltEntryAddr*/) const {
511   if (config->isPic) {
512     const uint8_t inst[] = {
513         0xf3, 0x0f, 0x1e, 0xfb,       // endbr32
514         0xff, 0xa3, 0,    0,    0, 0, // jmp *name@GOT(%ebx)
515         0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
516     };
517     memcpy(buf, inst, sizeof(inst));
518     write32le(buf + 6, sym.getGotPltVA() - in.gotPlt->getVA());
519     return;
520   }
521 
522   const uint8_t inst[] = {
523       0xf3, 0x0f, 0x1e, 0xfb,       // endbr32
524       0xff, 0x25, 0,    0,    0, 0, // jmp *foo@GOT
525       0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
526   };
527   memcpy(buf, inst, sizeof(inst));
528   write32le(buf + 6, sym.getGotPltVA());
529 }
530 
531 void IntelIBT::writeIBTPlt(uint8_t *buf, size_t numEntries) const {
532   writePltHeader(buf);
533   buf += IBTPltHeaderSize;
534 
535   const uint8_t inst[] = {
536       0xf3, 0x0f, 0x1e, 0xfb,    // endbr32
537       0x68, 0,    0,    0,    0, // pushl $reloc_offset
538       0xe9, 0,    0,    0,    0, // jmpq .PLT0@PC
539       0x66, 0x90,                // nop
540   };
541 
542   for (size_t i = 0; i < numEntries; ++i) {
543     memcpy(buf, inst, sizeof(inst));
544     write32le(buf + 5, i * sizeof(object::ELF32LE::Rel));
545     write32le(buf + 10, -pltHeaderSize - sizeof(inst) * i - 30);
546     buf += sizeof(inst);
547   }
548 }
549 
550 namespace {
551 class RetpolinePic : public X86 {
552 public:
553   RetpolinePic();
554   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
555   void writePltHeader(uint8_t *buf) const override;
556   void writePlt(uint8_t *buf, const Symbol &sym,
557                 uint64_t pltEntryAddr) const override;
558 };
559 
560 class RetpolineNoPic : public X86 {
561 public:
562   RetpolineNoPic();
563   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
564   void writePltHeader(uint8_t *buf) const override;
565   void writePlt(uint8_t *buf, const Symbol &sym,
566                 uint64_t pltEntryAddr) const override;
567 };
568 } // namespace
569 
570 RetpolinePic::RetpolinePic() {
571   pltHeaderSize = 48;
572   pltEntrySize = 32;
573   ipltEntrySize = 32;
574 }
575 
576 void RetpolinePic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
577   write32le(buf, s.getPltVA() + 17);
578 }
579 
580 void RetpolinePic::writePltHeader(uint8_t *buf) const {
581   const uint8_t insn[] = {
582       0xff, 0xb3, 4,    0,    0,    0,          // 0:    pushl 4(%ebx)
583       0x50,                                     // 6:    pushl %eax
584       0x8b, 0x83, 8,    0,    0,    0,          // 7:    mov 8(%ebx), %eax
585       0xe8, 0x0e, 0x00, 0x00, 0x00,             // d:    call next
586       0xf3, 0x90,                               // 12: loop: pause
587       0x0f, 0xae, 0xe8,                         // 14:   lfence
588       0xeb, 0xf9,                               // 17:   jmp loop
589       0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19:   int3; .align 16
590       0x89, 0x0c, 0x24,                         // 20: next: mov %ecx, (%esp)
591       0x8b, 0x4c, 0x24, 0x04,                   // 23:   mov 0x4(%esp), %ecx
592       0x89, 0x44, 0x24, 0x04,                   // 27:   mov %eax ,0x4(%esp)
593       0x89, 0xc8,                               // 2b:   mov %ecx, %eax
594       0x59,                                     // 2d:   pop %ecx
595       0xc3,                                     // 2e:   ret
596       0xcc,                                     // 2f:   int3; padding
597   };
598   memcpy(buf, insn, sizeof(insn));
599 }
600 
601 void RetpolinePic::writePlt(uint8_t *buf, const Symbol &sym,
602                             uint64_t pltEntryAddr) const {
603   unsigned relOff = in.relaPlt->entsize * sym.getPltIdx();
604   const uint8_t insn[] = {
605       0x50,                            // pushl %eax
606       0x8b, 0x83, 0,    0,    0,    0, // mov foo@GOT(%ebx), %eax
607       0xe8, 0,    0,    0,    0,       // call plt+0x20
608       0xe9, 0,    0,    0,    0,       // jmp plt+0x12
609       0x68, 0,    0,    0,    0,       // pushl $reloc_offset
610       0xe9, 0,    0,    0,    0,       // jmp plt+0
611       0xcc, 0xcc, 0xcc, 0xcc, 0xcc,    // int3; padding
612   };
613   memcpy(buf, insn, sizeof(insn));
614 
615   uint32_t ebx = in.gotPlt->getVA();
616   unsigned off = pltEntryAddr - in.plt->getVA();
617   write32le(buf + 3, sym.getGotPltVA() - ebx);
618   write32le(buf + 8, -off - 12 + 32);
619   write32le(buf + 13, -off - 17 + 18);
620   write32le(buf + 18, relOff);
621   write32le(buf + 23, -off - 27);
622 }
623 
624 RetpolineNoPic::RetpolineNoPic() {
625   pltHeaderSize = 48;
626   pltEntrySize = 32;
627   ipltEntrySize = 32;
628 }
629 
630 void RetpolineNoPic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
631   write32le(buf, s.getPltVA() + 16);
632 }
633 
634 void RetpolineNoPic::writePltHeader(uint8_t *buf) const {
635   const uint8_t insn[] = {
636       0xff, 0x35, 0,    0,    0,    0, // 0:    pushl GOTPLT+4
637       0x50,                            // 6:    pushl %eax
638       0xa1, 0,    0,    0,    0,       // 7:    mov GOTPLT+8, %eax
639       0xe8, 0x0f, 0x00, 0x00, 0x00,    // c:    call next
640       0xf3, 0x90,                      // 11: loop: pause
641       0x0f, 0xae, 0xe8,                // 13:   lfence
642       0xeb, 0xf9,                      // 16:   jmp loop
643       0xcc, 0xcc, 0xcc, 0xcc, 0xcc,    // 18:   int3
644       0xcc, 0xcc, 0xcc,                // 1f:   int3; .align 16
645       0x89, 0x0c, 0x24,                // 20: next: mov %ecx, (%esp)
646       0x8b, 0x4c, 0x24, 0x04,          // 23:   mov 0x4(%esp), %ecx
647       0x89, 0x44, 0x24, 0x04,          // 27:   mov %eax ,0x4(%esp)
648       0x89, 0xc8,                      // 2b:   mov %ecx, %eax
649       0x59,                            // 2d:   pop %ecx
650       0xc3,                            // 2e:   ret
651       0xcc,                            // 2f:   int3; padding
652   };
653   memcpy(buf, insn, sizeof(insn));
654 
655   uint32_t gotPlt = in.gotPlt->getVA();
656   write32le(buf + 2, gotPlt + 4);
657   write32le(buf + 8, gotPlt + 8);
658 }
659 
660 void RetpolineNoPic::writePlt(uint8_t *buf, const Symbol &sym,
661                               uint64_t pltEntryAddr) const {
662   unsigned relOff = in.relaPlt->entsize * sym.getPltIdx();
663   const uint8_t insn[] = {
664       0x50,                         // 0:  pushl %eax
665       0xa1, 0,    0,    0,    0,    // 1:  mov foo_in_GOT, %eax
666       0xe8, 0,    0,    0,    0,    // 6:  call plt+0x20
667       0xe9, 0,    0,    0,    0,    // b:  jmp plt+0x11
668       0x68, 0,    0,    0,    0,    // 10: pushl $reloc_offset
669       0xe9, 0,    0,    0,    0,    // 15: jmp plt+0
670       0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a: int3; padding
671       0xcc,                         // 1f: int3; padding
672   };
673   memcpy(buf, insn, sizeof(insn));
674 
675   unsigned off = pltEntryAddr - in.plt->getVA();
676   write32le(buf + 2, sym.getGotPltVA());
677   write32le(buf + 7, -off - 11 + 32);
678   write32le(buf + 12, -off - 16 + 17);
679   write32le(buf + 17, relOff);
680   write32le(buf + 22, -off - 26);
681 }
682 
683 TargetInfo *elf::getX86TargetInfo() {
684   if (config->zRetpolineplt) {
685     if (config->isPic) {
686       static RetpolinePic t;
687       return &t;
688     }
689     static RetpolineNoPic t;
690     return &t;
691   }
692 
693   if (config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT) {
694     static IntelIBT t;
695     return &t;
696   }
697 
698   static X86 t;
699   return &t;
700 }
701