xref: /freebsd/contrib/llvm-project/lld/ELF/Arch/X86.cpp (revision 0c428864495af9dc7d2af4d0a5ae21732af9c739)
1 //===- X86.cpp ------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Symbols.h"
10 #include "SyntheticSections.h"
11 #include "Target.h"
12 #include "lld/Common/ErrorHandler.h"
13 #include "llvm/Support/Endian.h"
14 
15 using namespace llvm;
16 using namespace llvm::support::endian;
17 using namespace llvm::ELF;
18 using namespace lld;
19 using namespace lld::elf;
20 
21 namespace {
22 class X86 : public TargetInfo {
23 public:
24   X86();
25   int getTlsGdRelaxSkip(RelType type) const override;
26   RelExpr getRelExpr(RelType type, const Symbol &s,
27                      const uint8_t *loc) const override;
28   int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
29   void writeGotPltHeader(uint8_t *buf) const override;
30   RelType getDynRel(RelType type) const override;
31   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
32   void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
33   void writePltHeader(uint8_t *buf) const override;
34   void writePlt(uint8_t *buf, const Symbol &sym,
35                 uint64_t pltEntryAddr) const override;
36   void relocate(uint8_t *loc, const Relocation &rel,
37                 uint64_t val) const override;
38 
39   RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
40   void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
41                       uint64_t val) const override;
42   void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
43                       uint64_t val) const override;
44   void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
45                       uint64_t val) const override;
46   void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
47                       uint64_t val) const override;
48 };
49 } // namespace
50 
51 X86::X86() {
52   copyRel = R_386_COPY;
53   gotRel = R_386_GLOB_DAT;
54   pltRel = R_386_JUMP_SLOT;
55   iRelativeRel = R_386_IRELATIVE;
56   relativeRel = R_386_RELATIVE;
57   symbolicRel = R_386_32;
58   tlsDescRel = R_386_TLS_DESC;
59   tlsGotRel = R_386_TLS_TPOFF;
60   tlsModuleIndexRel = R_386_TLS_DTPMOD32;
61   tlsOffsetRel = R_386_TLS_DTPOFF32;
62   gotBaseSymInGotPlt = true;
63   pltHeaderSize = 16;
64   pltEntrySize = 16;
65   ipltEntrySize = 16;
66   trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3
67 
68   // Align to the non-PAE large page size (known as a superpage or huge page).
69   // FreeBSD automatically promotes large, superpage-aligned allocations.
70   defaultImageBase = 0x400000;
71 }
72 
73 int X86::getTlsGdRelaxSkip(RelType type) const {
74   // TLSDESC relocations are processed separately. See relaxTlsGdToLe below.
75   return type == R_386_TLS_GOTDESC || type == R_386_TLS_DESC_CALL ? 1 : 2;
76 }
77 
78 RelExpr X86::getRelExpr(RelType type, const Symbol &s,
79                         const uint8_t *loc) const {
80   if (type == R_386_TLS_IE || type == R_386_TLS_GOTIE)
81     config->hasTlsIe = true;
82 
83   switch (type) {
84   case R_386_8:
85   case R_386_16:
86   case R_386_32:
87     return R_ABS;
88   case R_386_TLS_LDO_32:
89     return R_DTPREL;
90   case R_386_TLS_GD:
91     return R_TLSGD_GOTPLT;
92   case R_386_TLS_LDM:
93     return R_TLSLD_GOTPLT;
94   case R_386_PLT32:
95     return R_PLT_PC;
96   case R_386_PC8:
97   case R_386_PC16:
98   case R_386_PC32:
99     return R_PC;
100   case R_386_GOTPC:
101     return R_GOTPLTONLY_PC;
102   case R_386_TLS_IE:
103     return R_GOT;
104   case R_386_GOT32:
105   case R_386_GOT32X:
106     // These relocations are arguably mis-designed because their calculations
107     // depend on the instructions they are applied to. This is bad because we
108     // usually don't care about whether the target section contains valid
109     // machine instructions or not. But this is part of the documented ABI, so
110     // we had to implement as the standard requires.
111     //
112     // x86 does not support PC-relative data access. Therefore, in order to
113     // access GOT contents, a GOT address needs to be known at link-time
114     // (which means non-PIC) or compilers have to emit code to get a GOT
115     // address at runtime (which means code is position-independent but
116     // compilers need to emit extra code for each GOT access.) This decision
117     // is made at compile-time. In the latter case, compilers emit code to
118     // load a GOT address to a register, which is usually %ebx.
119     //
120     // So, there are two ways to refer to symbol foo's GOT entry: foo@GOT or
121     // foo@GOT(%ebx).
122     //
123     // foo@GOT is not usable in PIC. If we are creating a PIC output and if we
124     // find such relocation, we should report an error. foo@GOT is resolved to
125     // an *absolute* address of foo's GOT entry, because both GOT address and
126     // foo's offset are known. In other words, it's G + A.
127     //
128     // foo@GOT(%ebx) needs to be resolved to a *relative* offset from a GOT to
129     // foo's GOT entry in the table, because GOT address is not known but foo's
130     // offset in the table is known. It's G + A - GOT.
131     //
132     // It's unfortunate that compilers emit the same relocation for these
133     // different use cases. In order to distinguish them, we have to read a
134     // machine instruction.
135     //
136     // The following code implements it. We assume that Loc[0] is the first byte
137     // of a displacement or an immediate field of a valid machine
138     // instruction. That means a ModRM byte is at Loc[-1]. By taking a look at
139     // the byte, we can determine whether the instruction uses the operand as an
140     // absolute address (R_GOT) or a register-relative address (R_GOTPLT).
141     return (loc[-1] & 0xc7) == 0x5 ? R_GOT : R_GOTPLT;
142   case R_386_TLS_GOTDESC:
143     return R_TLSDESC_GOTPLT;
144   case R_386_TLS_DESC_CALL:
145     return R_TLSDESC_CALL;
146   case R_386_TLS_GOTIE:
147     return R_GOTPLT;
148   case R_386_GOTOFF:
149     return R_GOTPLTREL;
150   case R_386_TLS_LE:
151     return R_TPREL;
152   case R_386_TLS_LE_32:
153     return R_TPREL_NEG;
154   case R_386_NONE:
155     return R_NONE;
156   default:
157     error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +
158           ") against symbol " + toString(s));
159     return R_NONE;
160   }
161 }
162 
163 RelExpr X86::adjustTlsExpr(RelType type, RelExpr expr) const {
164   switch (expr) {
165   default:
166     return expr;
167   case R_RELAX_TLS_GD_TO_IE:
168     return R_RELAX_TLS_GD_TO_IE_GOTPLT;
169   case R_RELAX_TLS_GD_TO_LE:
170     return type == R_386_TLS_GD ? R_RELAX_TLS_GD_TO_LE_NEG
171                                 : R_RELAX_TLS_GD_TO_LE;
172   }
173 }
174 
175 void X86::writeGotPltHeader(uint8_t *buf) const {
176   write32le(buf, mainPart->dynamic->getVA());
177 }
178 
179 void X86::writeGotPlt(uint8_t *buf, const Symbol &s) const {
180   // Entries in .got.plt initially points back to the corresponding
181   // PLT entries with a fixed offset to skip the first instruction.
182   write32le(buf, s.getPltVA() + 6);
183 }
184 
185 void X86::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
186   // An x86 entry is the address of the ifunc resolver function.
187   write32le(buf, s.getVA());
188 }
189 
190 RelType X86::getDynRel(RelType type) const {
191   if (type == R_386_TLS_LE)
192     return R_386_TLS_TPOFF;
193   if (type == R_386_TLS_LE_32)
194     return R_386_TLS_TPOFF32;
195   return type;
196 }
197 
198 void X86::writePltHeader(uint8_t *buf) const {
199   if (config->isPic) {
200     const uint8_t v[] = {
201         0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl 4(%ebx)
202         0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *8(%ebx)
203         0x90, 0x90, 0x90, 0x90              // nop
204     };
205     memcpy(buf, v, sizeof(v));
206     return;
207   }
208 
209   const uint8_t pltData[] = {
210       0xff, 0x35, 0, 0, 0, 0, // pushl (GOTPLT+4)
211       0xff, 0x25, 0, 0, 0, 0, // jmp *(GOTPLT+8)
212       0x90, 0x90, 0x90, 0x90, // nop
213   };
214   memcpy(buf, pltData, sizeof(pltData));
215   uint32_t gotPlt = in.gotPlt->getVA();
216   write32le(buf + 2, gotPlt + 4);
217   write32le(buf + 8, gotPlt + 8);
218 }
219 
220 void X86::writePlt(uint8_t *buf, const Symbol &sym,
221                    uint64_t pltEntryAddr) const {
222   unsigned relOff = in.relaPlt->entsize * sym.getPltIdx();
223   if (config->isPic) {
224     const uint8_t inst[] = {
225         0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx)
226         0x68, 0,    0, 0, 0,    // pushl $reloc_offset
227         0xe9, 0,    0, 0, 0,    // jmp .PLT0@PC
228     };
229     memcpy(buf, inst, sizeof(inst));
230     write32le(buf + 2, sym.getGotPltVA() - in.gotPlt->getVA());
231   } else {
232     const uint8_t inst[] = {
233         0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT
234         0x68, 0,    0, 0, 0,    // pushl $reloc_offset
235         0xe9, 0,    0, 0, 0,    // jmp .PLT0@PC
236     };
237     memcpy(buf, inst, sizeof(inst));
238     write32le(buf + 2, sym.getGotPltVA());
239   }
240 
241   write32le(buf + 7, relOff);
242   write32le(buf + 12, in.plt->getVA() - pltEntryAddr - 16);
243 }
244 
245 int64_t X86::getImplicitAddend(const uint8_t *buf, RelType type) const {
246   switch (type) {
247   case R_386_8:
248   case R_386_PC8:
249     return SignExtend64<8>(*buf);
250   case R_386_16:
251   case R_386_PC16:
252     return SignExtend64<16>(read16le(buf));
253   case R_386_32:
254   case R_386_GLOB_DAT:
255   case R_386_GOT32:
256   case R_386_GOT32X:
257   case R_386_GOTOFF:
258   case R_386_GOTPC:
259   case R_386_IRELATIVE:
260   case R_386_PC32:
261   case R_386_PLT32:
262   case R_386_RELATIVE:
263   case R_386_TLS_GOTDESC:
264   case R_386_TLS_DESC_CALL:
265   case R_386_TLS_DTPMOD32:
266   case R_386_TLS_DTPOFF32:
267   case R_386_TLS_LDO_32:
268   case R_386_TLS_LDM:
269   case R_386_TLS_IE:
270   case R_386_TLS_IE_32:
271   case R_386_TLS_LE:
272   case R_386_TLS_LE_32:
273   case R_386_TLS_GD:
274   case R_386_TLS_GD_32:
275   case R_386_TLS_GOTIE:
276   case R_386_TLS_TPOFF:
277   case R_386_TLS_TPOFF32:
278     return SignExtend64<32>(read32le(buf));
279   case R_386_TLS_DESC:
280     return SignExtend64<32>(read32le(buf + 4));
281   case R_386_NONE:
282   case R_386_JUMP_SLOT:
283     // These relocations are defined as not having an implicit addend.
284     return 0;
285   default:
286     internalLinkerError(getErrorLocation(buf),
287                         "cannot read addend for relocation " + toString(type));
288     return 0;
289   }
290 }
291 
292 void X86::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
293   switch (rel.type) {
294   case R_386_8:
295     // R_386_{PC,}{8,16} are not part of the i386 psABI, but they are
296     // being used for some 16-bit programs such as boot loaders, so
297     // we want to support them.
298     checkIntUInt(loc, val, 8, rel);
299     *loc = val;
300     break;
301   case R_386_PC8:
302     checkInt(loc, val, 8, rel);
303     *loc = val;
304     break;
305   case R_386_16:
306     checkIntUInt(loc, val, 16, rel);
307     write16le(loc, val);
308     break;
309   case R_386_PC16:
310     // R_386_PC16 is normally used with 16 bit code. In that situation
311     // the PC is 16 bits, just like the addend. This means that it can
312     // point from any 16 bit address to any other if the possibility
313     // of wrapping is included.
314     // The only restriction we have to check then is that the destination
315     // address fits in 16 bits. That is impossible to do here. The problem is
316     // that we are passed the final value, which already had the
317     // current location subtracted from it.
318     // We just check that Val fits in 17 bits. This misses some cases, but
319     // should have no false positives.
320     checkInt(loc, val, 17, rel);
321     write16le(loc, val);
322     break;
323   case R_386_32:
324   case R_386_GOT32:
325   case R_386_GOT32X:
326   case R_386_GOTOFF:
327   case R_386_GOTPC:
328   case R_386_PC32:
329   case R_386_PLT32:
330   case R_386_RELATIVE:
331   case R_386_TLS_GOTDESC:
332   case R_386_TLS_DESC_CALL:
333   case R_386_TLS_DTPMOD32:
334   case R_386_TLS_DTPOFF32:
335   case R_386_TLS_GD:
336   case R_386_TLS_GOTIE:
337   case R_386_TLS_IE:
338   case R_386_TLS_LDM:
339   case R_386_TLS_LDO_32:
340   case R_386_TLS_LE:
341   case R_386_TLS_LE_32:
342   case R_386_TLS_TPOFF:
343   case R_386_TLS_TPOFF32:
344     checkInt(loc, val, 32, rel);
345     write32le(loc, val);
346     break;
347   case R_386_TLS_DESC:
348     // The addend is stored in the second 32-bit word.
349     write32le(loc + 4, val);
350     break;
351   default:
352     llvm_unreachable("unknown relocation");
353   }
354 }
355 
356 void X86::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
357                          uint64_t val) const {
358   if (rel.type == R_386_TLS_GD) {
359     // Convert
360     //   leal x@tlsgd(, %ebx, 1), %eax
361     //   call __tls_get_addr@plt
362     // to
363     //   movl %gs:0, %eax
364     //   subl $x@tpoff, %eax
365     const uint8_t inst[] = {
366         0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
367         0x81, 0xe8, 0,    0,    0,    0,    // subl val(%ebx), %eax
368     };
369     memcpy(loc - 3, inst, sizeof(inst));
370     write32le(loc + 5, val);
371   } else if (rel.type == R_386_TLS_GOTDESC) {
372     // Convert leal x@tlsdesc(%ebx), %eax to leal x@ntpoff, %eax.
373     //
374     // Note: call *x@tlsdesc(%eax) may not immediately follow this instruction.
375     if (memcmp(loc - 2, "\x8d\x83", 2)) {
376       error(getErrorLocation(loc - 2) +
377             "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax");
378       return;
379     }
380     loc[-1] = 0x05;
381     write32le(loc, val);
382   } else {
383     // Convert call *x@tlsdesc(%eax) to xchg ax, ax.
384     assert(rel.type == R_386_TLS_DESC_CALL);
385     loc[0] = 0x66;
386     loc[1] = 0x90;
387   }
388 }
389 
390 void X86::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
391                          uint64_t val) const {
392   if (rel.type == R_386_TLS_GD) {
393     // Convert
394     //   leal x@tlsgd(, %ebx, 1), %eax
395     //   call __tls_get_addr@plt
396     // to
397     //   movl %gs:0, %eax
398     //   addl x@gotntpoff(%ebx), %eax
399     const uint8_t inst[] = {
400         0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
401         0x03, 0x83, 0,    0,    0,    0,    // addl val(%ebx), %eax
402     };
403     memcpy(loc - 3, inst, sizeof(inst));
404     write32le(loc + 5, val);
405   } else if (rel.type == R_386_TLS_GOTDESC) {
406     // Convert leal x@tlsdesc(%ebx), %eax to movl x@gotntpoff(%ebx), %eax.
407     if (memcmp(loc - 2, "\x8d\x83", 2)) {
408       error(getErrorLocation(loc - 2) +
409             "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax");
410       return;
411     }
412     loc[-2] = 0x8b;
413     write32le(loc, val);
414   } else {
415     // Convert call *x@tlsdesc(%eax) to xchg ax, ax.
416     assert(rel.type == R_386_TLS_DESC_CALL);
417     loc[0] = 0x66;
418     loc[1] = 0x90;
419   }
420 }
421 
422 // In some conditions, relocations can be optimized to avoid using GOT.
423 // This function does that for Initial Exec to Local Exec case.
424 void X86::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
425                          uint64_t val) const {
426   // Ulrich's document section 6.2 says that @gotntpoff can
427   // be used with MOVL or ADDL instructions.
428   // @indntpoff is similar to @gotntpoff, but for use in
429   // position dependent code.
430   uint8_t reg = (loc[-1] >> 3) & 7;
431 
432   if (rel.type == R_386_TLS_IE) {
433     if (loc[-1] == 0xa1) {
434       // "movl foo@indntpoff,%eax" -> "movl $foo,%eax"
435       // This case is different from the generic case below because
436       // this is a 5 byte instruction while below is 6 bytes.
437       loc[-1] = 0xb8;
438     } else if (loc[-2] == 0x8b) {
439       // "movl foo@indntpoff,%reg" -> "movl $foo,%reg"
440       loc[-2] = 0xc7;
441       loc[-1] = 0xc0 | reg;
442     } else {
443       // "addl foo@indntpoff,%reg" -> "addl $foo,%reg"
444       loc[-2] = 0x81;
445       loc[-1] = 0xc0 | reg;
446     }
447   } else {
448     assert(rel.type == R_386_TLS_GOTIE);
449     if (loc[-2] == 0x8b) {
450       // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg"
451       loc[-2] = 0xc7;
452       loc[-1] = 0xc0 | reg;
453     } else {
454       // "addl foo@gotntpoff(%rip),%reg" -> "leal foo(%reg),%reg"
455       loc[-2] = 0x8d;
456       loc[-1] = 0x80 | (reg << 3) | reg;
457     }
458   }
459   write32le(loc, val);
460 }
461 
462 void X86::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
463                          uint64_t val) const {
464   if (rel.type == R_386_TLS_LDO_32) {
465     write32le(loc, val);
466     return;
467   }
468 
469   // Convert
470   //   leal foo(%reg),%eax
471   //   call ___tls_get_addr
472   // to
473   //   movl %gs:0,%eax
474   //   nop
475   //   leal 0(%esi,1),%esi
476   const uint8_t inst[] = {
477       0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
478       0x90,                               // nop
479       0x8d, 0x74, 0x26, 0x00,             // leal 0(%esi,1),%esi
480   };
481   memcpy(loc - 2, inst, sizeof(inst));
482 }
483 
484 // If Intel Indirect Branch Tracking is enabled, we have to emit special PLT
485 // entries containing endbr32 instructions. A PLT entry will be split into two
486 // parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt).
487 namespace {
488 class IntelIBT : public X86 {
489 public:
490   IntelIBT();
491   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
492   void writePlt(uint8_t *buf, const Symbol &sym,
493                 uint64_t pltEntryAddr) const override;
494   void writeIBTPlt(uint8_t *buf, size_t numEntries) const override;
495 
496   static const unsigned IBTPltHeaderSize = 16;
497 };
498 } // namespace
499 
500 IntelIBT::IntelIBT() { pltHeaderSize = 0; }
501 
502 void IntelIBT::writeGotPlt(uint8_t *buf, const Symbol &s) const {
503   uint64_t va =
504       in.ibtPlt->getVA() + IBTPltHeaderSize + s.getPltIdx() * pltEntrySize;
505   write32le(buf, va);
506 }
507 
508 void IntelIBT::writePlt(uint8_t *buf, const Symbol &sym,
509                         uint64_t /*pltEntryAddr*/) const {
510   if (config->isPic) {
511     const uint8_t inst[] = {
512         0xf3, 0x0f, 0x1e, 0xfb,       // endbr32
513         0xff, 0xa3, 0,    0,    0, 0, // jmp *name@GOT(%ebx)
514         0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
515     };
516     memcpy(buf, inst, sizeof(inst));
517     write32le(buf + 6, sym.getGotPltVA() - in.gotPlt->getVA());
518     return;
519   }
520 
521   const uint8_t inst[] = {
522       0xf3, 0x0f, 0x1e, 0xfb,       // endbr32
523       0xff, 0x25, 0,    0,    0, 0, // jmp *foo@GOT
524       0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
525   };
526   memcpy(buf, inst, sizeof(inst));
527   write32le(buf + 6, sym.getGotPltVA());
528 }
529 
530 void IntelIBT::writeIBTPlt(uint8_t *buf, size_t numEntries) const {
531   writePltHeader(buf);
532   buf += IBTPltHeaderSize;
533 
534   const uint8_t inst[] = {
535       0xf3, 0x0f, 0x1e, 0xfb,    // endbr32
536       0x68, 0,    0,    0,    0, // pushl $reloc_offset
537       0xe9, 0,    0,    0,    0, // jmpq .PLT0@PC
538       0x66, 0x90,                // nop
539   };
540 
541   for (size_t i = 0; i < numEntries; ++i) {
542     memcpy(buf, inst, sizeof(inst));
543     write32le(buf + 5, i * sizeof(object::ELF32LE::Rel));
544     write32le(buf + 10, -pltHeaderSize - sizeof(inst) * i - 30);
545     buf += sizeof(inst);
546   }
547 }
548 
549 namespace {
550 class RetpolinePic : public X86 {
551 public:
552   RetpolinePic();
553   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
554   void writePltHeader(uint8_t *buf) const override;
555   void writePlt(uint8_t *buf, const Symbol &sym,
556                 uint64_t pltEntryAddr) const override;
557 };
558 
559 class RetpolineNoPic : public X86 {
560 public:
561   RetpolineNoPic();
562   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
563   void writePltHeader(uint8_t *buf) const override;
564   void writePlt(uint8_t *buf, const Symbol &sym,
565                 uint64_t pltEntryAddr) const override;
566 };
567 } // namespace
568 
569 RetpolinePic::RetpolinePic() {
570   pltHeaderSize = 48;
571   pltEntrySize = 32;
572   ipltEntrySize = 32;
573 }
574 
575 void RetpolinePic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
576   write32le(buf, s.getPltVA() + 17);
577 }
578 
579 void RetpolinePic::writePltHeader(uint8_t *buf) const {
580   const uint8_t insn[] = {
581       0xff, 0xb3, 4,    0,    0,    0,          // 0:    pushl 4(%ebx)
582       0x50,                                     // 6:    pushl %eax
583       0x8b, 0x83, 8,    0,    0,    0,          // 7:    mov 8(%ebx), %eax
584       0xe8, 0x0e, 0x00, 0x00, 0x00,             // d:    call next
585       0xf3, 0x90,                               // 12: loop: pause
586       0x0f, 0xae, 0xe8,                         // 14:   lfence
587       0xeb, 0xf9,                               // 17:   jmp loop
588       0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19:   int3; .align 16
589       0x89, 0x0c, 0x24,                         // 20: next: mov %ecx, (%esp)
590       0x8b, 0x4c, 0x24, 0x04,                   // 23:   mov 0x4(%esp), %ecx
591       0x89, 0x44, 0x24, 0x04,                   // 27:   mov %eax ,0x4(%esp)
592       0x89, 0xc8,                               // 2b:   mov %ecx, %eax
593       0x59,                                     // 2d:   pop %ecx
594       0xc3,                                     // 2e:   ret
595       0xcc,                                     // 2f:   int3; padding
596   };
597   memcpy(buf, insn, sizeof(insn));
598 }
599 
600 void RetpolinePic::writePlt(uint8_t *buf, const Symbol &sym,
601                             uint64_t pltEntryAddr) const {
602   unsigned relOff = in.relaPlt->entsize * sym.getPltIdx();
603   const uint8_t insn[] = {
604       0x50,                            // pushl %eax
605       0x8b, 0x83, 0,    0,    0,    0, // mov foo@GOT(%ebx), %eax
606       0xe8, 0,    0,    0,    0,       // call plt+0x20
607       0xe9, 0,    0,    0,    0,       // jmp plt+0x12
608       0x68, 0,    0,    0,    0,       // pushl $reloc_offset
609       0xe9, 0,    0,    0,    0,       // jmp plt+0
610       0xcc, 0xcc, 0xcc, 0xcc, 0xcc,    // int3; padding
611   };
612   memcpy(buf, insn, sizeof(insn));
613 
614   uint32_t ebx = in.gotPlt->getVA();
615   unsigned off = pltEntryAddr - in.plt->getVA();
616   write32le(buf + 3, sym.getGotPltVA() - ebx);
617   write32le(buf + 8, -off - 12 + 32);
618   write32le(buf + 13, -off - 17 + 18);
619   write32le(buf + 18, relOff);
620   write32le(buf + 23, -off - 27);
621 }
622 
623 RetpolineNoPic::RetpolineNoPic() {
624   pltHeaderSize = 48;
625   pltEntrySize = 32;
626   ipltEntrySize = 32;
627 }
628 
629 void RetpolineNoPic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
630   write32le(buf, s.getPltVA() + 16);
631 }
632 
633 void RetpolineNoPic::writePltHeader(uint8_t *buf) const {
634   const uint8_t insn[] = {
635       0xff, 0x35, 0,    0,    0,    0, // 0:    pushl GOTPLT+4
636       0x50,                            // 6:    pushl %eax
637       0xa1, 0,    0,    0,    0,       // 7:    mov GOTPLT+8, %eax
638       0xe8, 0x0f, 0x00, 0x00, 0x00,    // c:    call next
639       0xf3, 0x90,                      // 11: loop: pause
640       0x0f, 0xae, 0xe8,                // 13:   lfence
641       0xeb, 0xf9,                      // 16:   jmp loop
642       0xcc, 0xcc, 0xcc, 0xcc, 0xcc,    // 18:   int3
643       0xcc, 0xcc, 0xcc,                // 1f:   int3; .align 16
644       0x89, 0x0c, 0x24,                // 20: next: mov %ecx, (%esp)
645       0x8b, 0x4c, 0x24, 0x04,          // 23:   mov 0x4(%esp), %ecx
646       0x89, 0x44, 0x24, 0x04,          // 27:   mov %eax ,0x4(%esp)
647       0x89, 0xc8,                      // 2b:   mov %ecx, %eax
648       0x59,                            // 2d:   pop %ecx
649       0xc3,                            // 2e:   ret
650       0xcc,                            // 2f:   int3; padding
651   };
652   memcpy(buf, insn, sizeof(insn));
653 
654   uint32_t gotPlt = in.gotPlt->getVA();
655   write32le(buf + 2, gotPlt + 4);
656   write32le(buf + 8, gotPlt + 8);
657 }
658 
659 void RetpolineNoPic::writePlt(uint8_t *buf, const Symbol &sym,
660                               uint64_t pltEntryAddr) const {
661   unsigned relOff = in.relaPlt->entsize * sym.getPltIdx();
662   const uint8_t insn[] = {
663       0x50,                         // 0:  pushl %eax
664       0xa1, 0,    0,    0,    0,    // 1:  mov foo_in_GOT, %eax
665       0xe8, 0,    0,    0,    0,    // 6:  call plt+0x20
666       0xe9, 0,    0,    0,    0,    // b:  jmp plt+0x11
667       0x68, 0,    0,    0,    0,    // 10: pushl $reloc_offset
668       0xe9, 0,    0,    0,    0,    // 15: jmp plt+0
669       0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a: int3; padding
670       0xcc,                         // 1f: int3; padding
671   };
672   memcpy(buf, insn, sizeof(insn));
673 
674   unsigned off = pltEntryAddr - in.plt->getVA();
675   write32le(buf + 2, sym.getGotPltVA());
676   write32le(buf + 7, -off - 11 + 32);
677   write32le(buf + 12, -off - 16 + 17);
678   write32le(buf + 17, relOff);
679   write32le(buf + 22, -off - 26);
680 }
681 
682 TargetInfo *elf::getX86TargetInfo() {
683   if (config->zRetpolineplt) {
684     if (config->isPic) {
685       static RetpolinePic t;
686       return &t;
687     }
688     static RetpolineNoPic t;
689     return &t;
690   }
691 
692   if (config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT) {
693     static IntelIBT t;
694     return &t;
695   }
696 
697   static X86 t;
698   return &t;
699 }
700