xref: /freebsd/contrib/llvm-project/lld/ELF/Arch/X86.cpp (revision c66ec88fed842fbaad62c30d510644ceb7bd2d71)
1 //===- X86.cpp ------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "InputFiles.h"
10 #include "Symbols.h"
11 #include "SyntheticSections.h"
12 #include "Target.h"
13 #include "lld/Common/ErrorHandler.h"
14 #include "llvm/Support/Endian.h"
15 
16 using namespace llvm;
17 using namespace llvm::support::endian;
18 using namespace llvm::ELF;
19 using namespace lld;
20 using namespace lld::elf;
21 
22 namespace {
23 class X86 : public TargetInfo {
24 public:
25   X86();
26   int getTlsGdRelaxSkip(RelType type) const override;
27   RelExpr getRelExpr(RelType type, const Symbol &s,
28                      const uint8_t *loc) const override;
29   int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
30   void writeGotPltHeader(uint8_t *buf) const override;
31   RelType getDynRel(RelType type) const override;
32   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
33   void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
34   void writePltHeader(uint8_t *buf) const override;
35   void writePlt(uint8_t *buf, const Symbol &sym,
36                 uint64_t pltEntryAddr) const override;
37   void relocate(uint8_t *loc, const Relocation &rel,
38                 uint64_t val) const override;
39 
40   RelExpr adjustRelaxExpr(RelType type, const uint8_t *data,
41                           RelExpr expr) const override;
42   void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
43                       uint64_t val) const override;
44   void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
45                       uint64_t val) const override;
46   void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
47                       uint64_t val) const override;
48   void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
49                       uint64_t val) const override;
50 };
51 } // namespace
52 
53 X86::X86() {
54   copyRel = R_386_COPY;
55   gotRel = R_386_GLOB_DAT;
56   noneRel = R_386_NONE;
57   pltRel = R_386_JUMP_SLOT;
58   iRelativeRel = R_386_IRELATIVE;
59   relativeRel = R_386_RELATIVE;
60   symbolicRel = R_386_32;
61   tlsGotRel = R_386_TLS_TPOFF;
62   tlsModuleIndexRel = R_386_TLS_DTPMOD32;
63   tlsOffsetRel = R_386_TLS_DTPOFF32;
64   pltHeaderSize = 16;
65   pltEntrySize = 16;
66   ipltEntrySize = 16;
67   trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3
68 
69   // Align to the non-PAE large page size (known as a superpage or huge page).
70   // FreeBSD automatically promotes large, superpage-aligned allocations.
71   defaultImageBase = 0x400000;
72 }
73 
74 int X86::getTlsGdRelaxSkip(RelType type) const {
75   return 2;
76 }
77 
78 RelExpr X86::getRelExpr(RelType type, const Symbol &s,
79                         const uint8_t *loc) const {
80   // There are 4 different TLS variable models with varying degrees of
81   // flexibility and performance. LocalExec and InitialExec models are fast but
82   // less-flexible models. If they are in use, we set DF_STATIC_TLS flag in the
83   // dynamic section to let runtime know about that.
84   if (type == R_386_TLS_LE || type == R_386_TLS_LE_32 || type == R_386_TLS_IE ||
85       type == R_386_TLS_GOTIE)
86     config->hasStaticTlsModel = true;
87 
88   switch (type) {
89   case R_386_8:
90   case R_386_16:
91   case R_386_32:
92     return R_ABS;
93   case R_386_TLS_LDO_32:
94     return R_DTPREL;
95   case R_386_TLS_GD:
96     return R_TLSGD_GOTPLT;
97   case R_386_TLS_LDM:
98     return R_TLSLD_GOTPLT;
99   case R_386_PLT32:
100     return R_PLT_PC;
101   case R_386_PC8:
102   case R_386_PC16:
103   case R_386_PC32:
104     return R_PC;
105   case R_386_GOTPC:
106     return R_GOTPLTONLY_PC;
107   case R_386_TLS_IE:
108     return R_GOT;
109   case R_386_GOT32:
110   case R_386_GOT32X:
111     // These relocations are arguably mis-designed because their calculations
112     // depend on the instructions they are applied to. This is bad because we
113     // usually don't care about whether the target section contains valid
114     // machine instructions or not. But this is part of the documented ABI, so
115     // we had to implement as the standard requires.
116     //
117     // x86 does not support PC-relative data access. Therefore, in order to
118     // access GOT contents, a GOT address needs to be known at link-time
119     // (which means non-PIC) or compilers have to emit code to get a GOT
120     // address at runtime (which means code is position-independent but
121     // compilers need to emit extra code for each GOT access.) This decision
122     // is made at compile-time. In the latter case, compilers emit code to
123     // load a GOT address to a register, which is usually %ebx.
124     //
125     // So, there are two ways to refer to symbol foo's GOT entry: foo@GOT or
126     // foo@GOT(%ebx).
127     //
128     // foo@GOT is not usable in PIC. If we are creating a PIC output and if we
129     // find such relocation, we should report an error. foo@GOT is resolved to
130     // an *absolute* address of foo's GOT entry, because both GOT address and
131     // foo's offset are known. In other words, it's G + A.
132     //
133     // foo@GOT(%ebx) needs to be resolved to a *relative* offset from a GOT to
134     // foo's GOT entry in the table, because GOT address is not known but foo's
135     // offset in the table is known. It's G + A - GOT.
136     //
137     // It's unfortunate that compilers emit the same relocation for these
138     // different use cases. In order to distinguish them, we have to read a
139     // machine instruction.
140     //
141     // The following code implements it. We assume that Loc[0] is the first byte
142     // of a displacement or an immediate field of a valid machine
143     // instruction. That means a ModRM byte is at Loc[-1]. By taking a look at
144     // the byte, we can determine whether the instruction uses the operand as an
145     // absolute address (R_GOT) or a register-relative address (R_GOTPLT).
146     return (loc[-1] & 0xc7) == 0x5 ? R_GOT : R_GOTPLT;
147   case R_386_TLS_GOTIE:
148     return R_GOTPLT;
149   case R_386_GOTOFF:
150     return R_GOTPLTREL;
151   case R_386_TLS_LE:
152     return R_TLS;
153   case R_386_TLS_LE_32:
154     return R_NEG_TLS;
155   case R_386_NONE:
156     return R_NONE;
157   default:
158     error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +
159           ") against symbol " + toString(s));
160     return R_NONE;
161   }
162 }
163 
164 RelExpr X86::adjustRelaxExpr(RelType type, const uint8_t *data,
165                              RelExpr expr) const {
166   switch (expr) {
167   default:
168     return expr;
169   case R_RELAX_TLS_GD_TO_IE:
170     return R_RELAX_TLS_GD_TO_IE_GOTPLT;
171   case R_RELAX_TLS_GD_TO_LE:
172     return R_RELAX_TLS_GD_TO_LE_NEG;
173   }
174 }
175 
176 void X86::writeGotPltHeader(uint8_t *buf) const {
177   write32le(buf, mainPart->dynamic->getVA());
178 }
179 
180 void X86::writeGotPlt(uint8_t *buf, const Symbol &s) const {
181   // Entries in .got.plt initially points back to the corresponding
182   // PLT entries with a fixed offset to skip the first instruction.
183   write32le(buf, s.getPltVA() + 6);
184 }
185 
186 void X86::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
187   // An x86 entry is the address of the ifunc resolver function.
188   write32le(buf, s.getVA());
189 }
190 
191 RelType X86::getDynRel(RelType type) const {
192   if (type == R_386_TLS_LE)
193     return R_386_TLS_TPOFF;
194   if (type == R_386_TLS_LE_32)
195     return R_386_TLS_TPOFF32;
196   return type;
197 }
198 
199 void X86::writePltHeader(uint8_t *buf) const {
200   if (config->isPic) {
201     const uint8_t v[] = {
202         0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl 4(%ebx)
203         0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *8(%ebx)
204         0x90, 0x90, 0x90, 0x90              // nop
205     };
206     memcpy(buf, v, sizeof(v));
207     return;
208   }
209 
210   const uint8_t pltData[] = {
211       0xff, 0x35, 0, 0, 0, 0, // pushl (GOTPLT+4)
212       0xff, 0x25, 0, 0, 0, 0, // jmp *(GOTPLT+8)
213       0x90, 0x90, 0x90, 0x90, // nop
214   };
215   memcpy(buf, pltData, sizeof(pltData));
216   uint32_t gotPlt = in.gotPlt->getVA();
217   write32le(buf + 2, gotPlt + 4);
218   write32le(buf + 8, gotPlt + 8);
219 }
220 
221 void X86::writePlt(uint8_t *buf, const Symbol &sym,
222                    uint64_t pltEntryAddr) const {
223   unsigned relOff = in.relaPlt->entsize * sym.pltIndex;
224   if (config->isPic) {
225     const uint8_t inst[] = {
226         0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx)
227         0x68, 0,    0, 0, 0,    // pushl $reloc_offset
228         0xe9, 0,    0, 0, 0,    // jmp .PLT0@PC
229     };
230     memcpy(buf, inst, sizeof(inst));
231     write32le(buf + 2, sym.getGotPltVA() - in.gotPlt->getVA());
232   } else {
233     const uint8_t inst[] = {
234         0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT
235         0x68, 0,    0, 0, 0,    // pushl $reloc_offset
236         0xe9, 0,    0, 0, 0,    // jmp .PLT0@PC
237     };
238     memcpy(buf, inst, sizeof(inst));
239     write32le(buf + 2, sym.getGotPltVA());
240   }
241 
242   write32le(buf + 7, relOff);
243   write32le(buf + 12, in.plt->getVA() - pltEntryAddr - 16);
244 }
245 
246 int64_t X86::getImplicitAddend(const uint8_t *buf, RelType type) const {
247   switch (type) {
248   case R_386_8:
249   case R_386_PC8:
250     return SignExtend64<8>(*buf);
251   case R_386_16:
252   case R_386_PC16:
253     return SignExtend64<16>(read16le(buf));
254   case R_386_32:
255   case R_386_GOT32:
256   case R_386_GOT32X:
257   case R_386_GOTOFF:
258   case R_386_GOTPC:
259   case R_386_PC32:
260   case R_386_PLT32:
261   case R_386_TLS_LDO_32:
262   case R_386_TLS_LE:
263     return SignExtend64<32>(read32le(buf));
264   default:
265     return 0;
266   }
267 }
268 
269 void X86::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
270   switch (rel.type) {
271   case R_386_8:
272     // R_386_{PC,}{8,16} are not part of the i386 psABI, but they are
273     // being used for some 16-bit programs such as boot loaders, so
274     // we want to support them.
275     checkIntUInt(loc, val, 8, rel);
276     *loc = val;
277     break;
278   case R_386_PC8:
279     checkInt(loc, val, 8, rel);
280     *loc = val;
281     break;
282   case R_386_16:
283     checkIntUInt(loc, val, 16, rel);
284     write16le(loc, val);
285     break;
286   case R_386_PC16:
287     // R_386_PC16 is normally used with 16 bit code. In that situation
288     // the PC is 16 bits, just like the addend. This means that it can
289     // point from any 16 bit address to any other if the possibility
290     // of wrapping is included.
291     // The only restriction we have to check then is that the destination
292     // address fits in 16 bits. That is impossible to do here. The problem is
293     // that we are passed the final value, which already had the
294     // current location subtracted from it.
295     // We just check that Val fits in 17 bits. This misses some cases, but
296     // should have no false positives.
297     checkInt(loc, val, 17, rel);
298     write16le(loc, val);
299     break;
300   case R_386_32:
301   case R_386_GOT32:
302   case R_386_GOT32X:
303   case R_386_GOTOFF:
304   case R_386_GOTPC:
305   case R_386_PC32:
306   case R_386_PLT32:
307   case R_386_RELATIVE:
308   case R_386_TLS_DTPMOD32:
309   case R_386_TLS_DTPOFF32:
310   case R_386_TLS_GD:
311   case R_386_TLS_GOTIE:
312   case R_386_TLS_IE:
313   case R_386_TLS_LDM:
314   case R_386_TLS_LDO_32:
315   case R_386_TLS_LE:
316   case R_386_TLS_LE_32:
317   case R_386_TLS_TPOFF:
318   case R_386_TLS_TPOFF32:
319     checkInt(loc, val, 32, rel);
320     write32le(loc, val);
321     break;
322   default:
323     llvm_unreachable("unknown relocation");
324   }
325 }
326 
327 void X86::relaxTlsGdToLe(uint8_t *loc, const Relocation &, uint64_t val) const {
328   // Convert
329   //   leal x@tlsgd(, %ebx, 1),
330   //   call __tls_get_addr@plt
331   // to
332   //   movl %gs:0,%eax
333   //   subl $x@ntpoff,%eax
334   const uint8_t inst[] = {
335       0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
336       0x81, 0xe8, 0, 0, 0, 0,             // subl Val(%ebx), %eax
337   };
338   memcpy(loc - 3, inst, sizeof(inst));
339   write32le(loc + 5, val);
340 }
341 
342 void X86::relaxTlsGdToIe(uint8_t *loc, const Relocation &, uint64_t val) const {
343   // Convert
344   //   leal x@tlsgd(, %ebx, 1),
345   //   call __tls_get_addr@plt
346   // to
347   //   movl %gs:0, %eax
348   //   addl x@gotntpoff(%ebx), %eax
349   const uint8_t inst[] = {
350       0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
351       0x03, 0x83, 0, 0, 0, 0,             // addl Val(%ebx), %eax
352   };
353   memcpy(loc - 3, inst, sizeof(inst));
354   write32le(loc + 5, val);
355 }
356 
357 // In some conditions, relocations can be optimized to avoid using GOT.
358 // This function does that for Initial Exec to Local Exec case.
359 void X86::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
360                          uint64_t val) const {
361   // Ulrich's document section 6.2 says that @gotntpoff can
362   // be used with MOVL or ADDL instructions.
363   // @indntpoff is similar to @gotntpoff, but for use in
364   // position dependent code.
365   uint8_t reg = (loc[-1] >> 3) & 7;
366 
367   if (rel.type == R_386_TLS_IE) {
368     if (loc[-1] == 0xa1) {
369       // "movl foo@indntpoff,%eax" -> "movl $foo,%eax"
370       // This case is different from the generic case below because
371       // this is a 5 byte instruction while below is 6 bytes.
372       loc[-1] = 0xb8;
373     } else if (loc[-2] == 0x8b) {
374       // "movl foo@indntpoff,%reg" -> "movl $foo,%reg"
375       loc[-2] = 0xc7;
376       loc[-1] = 0xc0 | reg;
377     } else {
378       // "addl foo@indntpoff,%reg" -> "addl $foo,%reg"
379       loc[-2] = 0x81;
380       loc[-1] = 0xc0 | reg;
381     }
382   } else {
383     assert(rel.type == R_386_TLS_GOTIE);
384     if (loc[-2] == 0x8b) {
385       // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg"
386       loc[-2] = 0xc7;
387       loc[-1] = 0xc0 | reg;
388     } else {
389       // "addl foo@gotntpoff(%rip),%reg" -> "leal foo(%reg),%reg"
390       loc[-2] = 0x8d;
391       loc[-1] = 0x80 | (reg << 3) | reg;
392     }
393   }
394   write32le(loc, val);
395 }
396 
397 void X86::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
398                          uint64_t val) const {
399   if (rel.type == R_386_TLS_LDO_32) {
400     write32le(loc, val);
401     return;
402   }
403 
404   // Convert
405   //   leal foo(%reg),%eax
406   //   call ___tls_get_addr
407   // to
408   //   movl %gs:0,%eax
409   //   nop
410   //   leal 0(%esi,1),%esi
411   const uint8_t inst[] = {
412       0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
413       0x90,                               // nop
414       0x8d, 0x74, 0x26, 0x00,             // leal 0(%esi,1),%esi
415   };
416   memcpy(loc - 2, inst, sizeof(inst));
417 }
418 
419 // If Intel Indirect Branch Tracking is enabled, we have to emit special PLT
420 // entries containing endbr32 instructions. A PLT entry will be split into two
421 // parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt).
422 namespace {
423 class IntelIBT : public X86 {
424 public:
425   IntelIBT();
426   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
427   void writePlt(uint8_t *buf, const Symbol &sym,
428                 uint64_t pltEntryAddr) const override;
429   void writeIBTPlt(uint8_t *buf, size_t numEntries) const override;
430 
431   static const unsigned IBTPltHeaderSize = 16;
432 };
433 } // namespace
434 
435 IntelIBT::IntelIBT() { pltHeaderSize = 0; }
436 
437 void IntelIBT::writeGotPlt(uint8_t *buf, const Symbol &s) const {
438   uint64_t va =
439       in.ibtPlt->getVA() + IBTPltHeaderSize + s.pltIndex * pltEntrySize;
440   write32le(buf, va);
441 }
442 
443 void IntelIBT::writePlt(uint8_t *buf, const Symbol &sym,
444                         uint64_t /*pltEntryAddr*/) const {
445   if (config->isPic) {
446     const uint8_t inst[] = {
447         0xf3, 0x0f, 0x1e, 0xfb,       // endbr32
448         0xff, 0xa3, 0,    0,    0, 0, // jmp *name@GOT(%ebx)
449         0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
450     };
451     memcpy(buf, inst, sizeof(inst));
452     write32le(buf + 6, sym.getGotPltVA() - in.gotPlt->getVA());
453     return;
454   }
455 
456   const uint8_t inst[] = {
457       0xf3, 0x0f, 0x1e, 0xfb,       // endbr32
458       0xff, 0x25, 0,    0,    0, 0, // jmp *foo@GOT
459       0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
460   };
461   memcpy(buf, inst, sizeof(inst));
462   write32le(buf + 6, sym.getGotPltVA());
463 }
464 
465 void IntelIBT::writeIBTPlt(uint8_t *buf, size_t numEntries) const {
466   writePltHeader(buf);
467   buf += IBTPltHeaderSize;
468 
469   const uint8_t inst[] = {
470       0xf3, 0x0f, 0x1e, 0xfb,    // endbr32
471       0x68, 0,    0,    0,    0, // pushl $reloc_offset
472       0xe9, 0,    0,    0,    0, // jmpq .PLT0@PC
473       0x66, 0x90,                // nop
474   };
475 
476   for (size_t i = 0; i < numEntries; ++i) {
477     memcpy(buf, inst, sizeof(inst));
478     write32le(buf + 5, i * sizeof(object::ELF32LE::Rel));
479     write32le(buf + 10, -pltHeaderSize - sizeof(inst) * i - 30);
480     buf += sizeof(inst);
481   }
482 }
483 
484 namespace {
485 class RetpolinePic : public X86 {
486 public:
487   RetpolinePic();
488   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
489   void writePltHeader(uint8_t *buf) const override;
490   void writePlt(uint8_t *buf, const Symbol &sym,
491                 uint64_t pltEntryAddr) const override;
492 };
493 
494 class RetpolineNoPic : public X86 {
495 public:
496   RetpolineNoPic();
497   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
498   void writePltHeader(uint8_t *buf) const override;
499   void writePlt(uint8_t *buf, const Symbol &sym,
500                 uint64_t pltEntryAddr) const override;
501 };
502 } // namespace
503 
504 RetpolinePic::RetpolinePic() {
505   pltHeaderSize = 48;
506   pltEntrySize = 32;
507   ipltEntrySize = 32;
508 }
509 
510 void RetpolinePic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
511   write32le(buf, s.getPltVA() + 17);
512 }
513 
514 void RetpolinePic::writePltHeader(uint8_t *buf) const {
515   const uint8_t insn[] = {
516       0xff, 0xb3, 4,    0,    0,    0,          // 0:    pushl 4(%ebx)
517       0x50,                                     // 6:    pushl %eax
518       0x8b, 0x83, 8,    0,    0,    0,          // 7:    mov 8(%ebx), %eax
519       0xe8, 0x0e, 0x00, 0x00, 0x00,             // d:    call next
520       0xf3, 0x90,                               // 12: loop: pause
521       0x0f, 0xae, 0xe8,                         // 14:   lfence
522       0xeb, 0xf9,                               // 17:   jmp loop
523       0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19:   int3; .align 16
524       0x89, 0x0c, 0x24,                         // 20: next: mov %ecx, (%esp)
525       0x8b, 0x4c, 0x24, 0x04,                   // 23:   mov 0x4(%esp), %ecx
526       0x89, 0x44, 0x24, 0x04,                   // 27:   mov %eax ,0x4(%esp)
527       0x89, 0xc8,                               // 2b:   mov %ecx, %eax
528       0x59,                                     // 2d:   pop %ecx
529       0xc3,                                     // 2e:   ret
530       0xcc,                                     // 2f:   int3; padding
531   };
532   memcpy(buf, insn, sizeof(insn));
533 }
534 
535 void RetpolinePic::writePlt(uint8_t *buf, const Symbol &sym,
536                             uint64_t pltEntryAddr) const {
537   unsigned relOff = in.relaPlt->entsize * sym.pltIndex;
538   const uint8_t insn[] = {
539       0x50,                            // pushl %eax
540       0x8b, 0x83, 0,    0,    0,    0, // mov foo@GOT(%ebx), %eax
541       0xe8, 0,    0,    0,    0,       // call plt+0x20
542       0xe9, 0,    0,    0,    0,       // jmp plt+0x12
543       0x68, 0,    0,    0,    0,       // pushl $reloc_offset
544       0xe9, 0,    0,    0,    0,       // jmp plt+0
545       0xcc, 0xcc, 0xcc, 0xcc, 0xcc,    // int3; padding
546   };
547   memcpy(buf, insn, sizeof(insn));
548 
549   uint32_t ebx = in.gotPlt->getVA();
550   unsigned off = pltEntryAddr - in.plt->getVA();
551   write32le(buf + 3, sym.getGotPltVA() - ebx);
552   write32le(buf + 8, -off - 12 + 32);
553   write32le(buf + 13, -off - 17 + 18);
554   write32le(buf + 18, relOff);
555   write32le(buf + 23, -off - 27);
556 }
557 
558 RetpolineNoPic::RetpolineNoPic() {
559   pltHeaderSize = 48;
560   pltEntrySize = 32;
561   ipltEntrySize = 32;
562 }
563 
564 void RetpolineNoPic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
565   write32le(buf, s.getPltVA() + 16);
566 }
567 
568 void RetpolineNoPic::writePltHeader(uint8_t *buf) const {
569   const uint8_t insn[] = {
570       0xff, 0x35, 0,    0,    0,    0, // 0:    pushl GOTPLT+4
571       0x50,                            // 6:    pushl %eax
572       0xa1, 0,    0,    0,    0,       // 7:    mov GOTPLT+8, %eax
573       0xe8, 0x0f, 0x00, 0x00, 0x00,    // c:    call next
574       0xf3, 0x90,                      // 11: loop: pause
575       0x0f, 0xae, 0xe8,                // 13:   lfence
576       0xeb, 0xf9,                      // 16:   jmp loop
577       0xcc, 0xcc, 0xcc, 0xcc, 0xcc,    // 18:   int3
578       0xcc, 0xcc, 0xcc,                // 1f:   int3; .align 16
579       0x89, 0x0c, 0x24,                // 20: next: mov %ecx, (%esp)
580       0x8b, 0x4c, 0x24, 0x04,          // 23:   mov 0x4(%esp), %ecx
581       0x89, 0x44, 0x24, 0x04,          // 27:   mov %eax ,0x4(%esp)
582       0x89, 0xc8,                      // 2b:   mov %ecx, %eax
583       0x59,                            // 2d:   pop %ecx
584       0xc3,                            // 2e:   ret
585       0xcc,                            // 2f:   int3; padding
586   };
587   memcpy(buf, insn, sizeof(insn));
588 
589   uint32_t gotPlt = in.gotPlt->getVA();
590   write32le(buf + 2, gotPlt + 4);
591   write32le(buf + 8, gotPlt + 8);
592 }
593 
594 void RetpolineNoPic::writePlt(uint8_t *buf, const Symbol &sym,
595                               uint64_t pltEntryAddr) const {
596   unsigned relOff = in.relaPlt->entsize * sym.pltIndex;
597   const uint8_t insn[] = {
598       0x50,                         // 0:  pushl %eax
599       0xa1, 0,    0,    0,    0,    // 1:  mov foo_in_GOT, %eax
600       0xe8, 0,    0,    0,    0,    // 6:  call plt+0x20
601       0xe9, 0,    0,    0,    0,    // b:  jmp plt+0x11
602       0x68, 0,    0,    0,    0,    // 10: pushl $reloc_offset
603       0xe9, 0,    0,    0,    0,    // 15: jmp plt+0
604       0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a: int3; padding
605       0xcc,                         // 1f: int3; padding
606   };
607   memcpy(buf, insn, sizeof(insn));
608 
609   unsigned off = pltEntryAddr - in.plt->getVA();
610   write32le(buf + 2, sym.getGotPltVA());
611   write32le(buf + 7, -off - 11 + 32);
612   write32le(buf + 12, -off - 16 + 17);
613   write32le(buf + 17, relOff);
614   write32le(buf + 22, -off - 26);
615 }
616 
617 TargetInfo *elf::getX86TargetInfo() {
618   if (config->zRetpolineplt) {
619     if (config->isPic) {
620       static RetpolinePic t;
621       return &t;
622     }
623     static RetpolineNoPic t;
624     return &t;
625   }
626 
627   if (config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT) {
628     static IntelIBT t;
629     return &t;
630   }
631 
632   static X86 t;
633   return &t;
634 }
635