xref: /freebsd/contrib/llvm-project/lld/ELF/Arch/X86.cpp (revision 5e801ac66d24704442eba426ed13c3effb8a34e7)
1 //===- X86.cpp ------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "InputFiles.h"
10 #include "Symbols.h"
11 #include "SyntheticSections.h"
12 #include "Target.h"
13 #include "lld/Common/ErrorHandler.h"
14 #include "llvm/Support/Endian.h"
15 
16 using namespace llvm;
17 using namespace llvm::support::endian;
18 using namespace llvm::ELF;
19 using namespace lld;
20 using namespace lld::elf;
21 
22 namespace {
23 class X86 : public TargetInfo {
24 public:
25   X86();
26   int getTlsGdRelaxSkip(RelType type) const override;
27   RelExpr getRelExpr(RelType type, const Symbol &s,
28                      const uint8_t *loc) const override;
29   int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
30   void writeGotPltHeader(uint8_t *buf) const override;
31   RelType getDynRel(RelType type) const override;
32   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
33   void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
34   void writePltHeader(uint8_t *buf) const override;
35   void writePlt(uint8_t *buf, const Symbol &sym,
36                 uint64_t pltEntryAddr) const override;
37   void relocate(uint8_t *loc, const Relocation &rel,
38                 uint64_t val) const override;
39 
40   RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
41   void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
42                       uint64_t val) const override;
43   void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
44                       uint64_t val) const override;
45   void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
46                       uint64_t val) const override;
47   void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
48                       uint64_t val) const override;
49 };
50 } // namespace
51 
52 X86::X86() {
53   copyRel = R_386_COPY;
54   gotRel = R_386_GLOB_DAT;
55   pltRel = R_386_JUMP_SLOT;
56   iRelativeRel = R_386_IRELATIVE;
57   relativeRel = R_386_RELATIVE;
58   symbolicRel = R_386_32;
59   tlsDescRel = R_386_TLS_DESC;
60   tlsGotRel = R_386_TLS_TPOFF;
61   tlsModuleIndexRel = R_386_TLS_DTPMOD32;
62   tlsOffsetRel = R_386_TLS_DTPOFF32;
63   gotBaseSymInGotPlt = true;
64   pltHeaderSize = 16;
65   pltEntrySize = 16;
66   ipltEntrySize = 16;
67   trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3
68 
69   // Align to the non-PAE large page size (known as a superpage or huge page).
70   // FreeBSD automatically promotes large, superpage-aligned allocations.
71   defaultImageBase = 0x400000;
72 }
73 
74 int X86::getTlsGdRelaxSkip(RelType type) const {
75   // TLSDESC relocations are processed separately. See relaxTlsGdToLe below.
76   return type == R_386_TLS_GOTDESC || type == R_386_TLS_DESC_CALL ? 1 : 2;
77 }
78 
79 RelExpr X86::getRelExpr(RelType type, const Symbol &s,
80                         const uint8_t *loc) const {
81   // There are 4 different TLS variable models with varying degrees of
82   // flexibility and performance. LocalExec and InitialExec models are fast but
83   // less-flexible models. If they are in use, we set DF_STATIC_TLS flag in the
84   // dynamic section to let runtime know about that.
85   if (type == R_386_TLS_LE || type == R_386_TLS_LE_32 || type == R_386_TLS_IE ||
86       type == R_386_TLS_GOTIE)
87     config->hasStaticTlsModel = true;
88 
89   switch (type) {
90   case R_386_8:
91   case R_386_16:
92   case R_386_32:
93     return R_ABS;
94   case R_386_TLS_LDO_32:
95     return R_DTPREL;
96   case R_386_TLS_GD:
97     return R_TLSGD_GOTPLT;
98   case R_386_TLS_LDM:
99     return R_TLSLD_GOTPLT;
100   case R_386_PLT32:
101     return R_PLT_PC;
102   case R_386_PC8:
103   case R_386_PC16:
104   case R_386_PC32:
105     return R_PC;
106   case R_386_GOTPC:
107     return R_GOTPLTONLY_PC;
108   case R_386_TLS_IE:
109     return R_GOT;
110   case R_386_GOT32:
111   case R_386_GOT32X:
112     // These relocations are arguably mis-designed because their calculations
113     // depend on the instructions they are applied to. This is bad because we
114     // usually don't care about whether the target section contains valid
115     // machine instructions or not. But this is part of the documented ABI, so
116     // we had to implement as the standard requires.
117     //
118     // x86 does not support PC-relative data access. Therefore, in order to
119     // access GOT contents, a GOT address needs to be known at link-time
120     // (which means non-PIC) or compilers have to emit code to get a GOT
121     // address at runtime (which means code is position-independent but
122     // compilers need to emit extra code for each GOT access.) This decision
123     // is made at compile-time. In the latter case, compilers emit code to
124     // load a GOT address to a register, which is usually %ebx.
125     //
126     // So, there are two ways to refer to symbol foo's GOT entry: foo@GOT or
127     // foo@GOT(%ebx).
128     //
129     // foo@GOT is not usable in PIC. If we are creating a PIC output and if we
130     // find such relocation, we should report an error. foo@GOT is resolved to
131     // an *absolute* address of foo's GOT entry, because both GOT address and
132     // foo's offset are known. In other words, it's G + A.
133     //
134     // foo@GOT(%ebx) needs to be resolved to a *relative* offset from a GOT to
135     // foo's GOT entry in the table, because GOT address is not known but foo's
136     // offset in the table is known. It's G + A - GOT.
137     //
138     // It's unfortunate that compilers emit the same relocation for these
139     // different use cases. In order to distinguish them, we have to read a
140     // machine instruction.
141     //
142     // The following code implements it. We assume that Loc[0] is the first byte
143     // of a displacement or an immediate field of a valid machine
144     // instruction. That means a ModRM byte is at Loc[-1]. By taking a look at
145     // the byte, we can determine whether the instruction uses the operand as an
146     // absolute address (R_GOT) or a register-relative address (R_GOTPLT).
147     return (loc[-1] & 0xc7) == 0x5 ? R_GOT : R_GOTPLT;
148   case R_386_TLS_GOTDESC:
149     return R_TLSDESC_GOTPLT;
150   case R_386_TLS_DESC_CALL:
151     return R_TLSDESC_CALL;
152   case R_386_TLS_GOTIE:
153     return R_GOTPLT;
154   case R_386_GOTOFF:
155     return R_GOTPLTREL;
156   case R_386_TLS_LE:
157     return R_TPREL;
158   case R_386_TLS_LE_32:
159     return R_TPREL_NEG;
160   case R_386_NONE:
161     return R_NONE;
162   default:
163     error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +
164           ") against symbol " + toString(s));
165     return R_NONE;
166   }
167 }
168 
169 RelExpr X86::adjustTlsExpr(RelType type, RelExpr expr) const {
170   switch (expr) {
171   default:
172     return expr;
173   case R_RELAX_TLS_GD_TO_IE:
174     return R_RELAX_TLS_GD_TO_IE_GOTPLT;
175   case R_RELAX_TLS_GD_TO_LE:
176     return type == R_386_TLS_GD ? R_RELAX_TLS_GD_TO_LE_NEG
177                                 : R_RELAX_TLS_GD_TO_LE;
178   }
179 }
180 
181 void X86::writeGotPltHeader(uint8_t *buf) const {
182   write32le(buf, mainPart->dynamic->getVA());
183 }
184 
185 void X86::writeGotPlt(uint8_t *buf, const Symbol &s) const {
186   // Entries in .got.plt initially points back to the corresponding
187   // PLT entries with a fixed offset to skip the first instruction.
188   write32le(buf, s.getPltVA() + 6);
189 }
190 
191 void X86::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
192   // An x86 entry is the address of the ifunc resolver function.
193   write32le(buf, s.getVA());
194 }
195 
196 RelType X86::getDynRel(RelType type) const {
197   if (type == R_386_TLS_LE)
198     return R_386_TLS_TPOFF;
199   if (type == R_386_TLS_LE_32)
200     return R_386_TLS_TPOFF32;
201   return type;
202 }
203 
204 void X86::writePltHeader(uint8_t *buf) const {
205   if (config->isPic) {
206     const uint8_t v[] = {
207         0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl 4(%ebx)
208         0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *8(%ebx)
209         0x90, 0x90, 0x90, 0x90              // nop
210     };
211     memcpy(buf, v, sizeof(v));
212     return;
213   }
214 
215   const uint8_t pltData[] = {
216       0xff, 0x35, 0, 0, 0, 0, // pushl (GOTPLT+4)
217       0xff, 0x25, 0, 0, 0, 0, // jmp *(GOTPLT+8)
218       0x90, 0x90, 0x90, 0x90, // nop
219   };
220   memcpy(buf, pltData, sizeof(pltData));
221   uint32_t gotPlt = in.gotPlt->getVA();
222   write32le(buf + 2, gotPlt + 4);
223   write32le(buf + 8, gotPlt + 8);
224 }
225 
226 void X86::writePlt(uint8_t *buf, const Symbol &sym,
227                    uint64_t pltEntryAddr) const {
228   unsigned relOff = in.relaPlt->entsize * sym.pltIndex;
229   if (config->isPic) {
230     const uint8_t inst[] = {
231         0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx)
232         0x68, 0,    0, 0, 0,    // pushl $reloc_offset
233         0xe9, 0,    0, 0, 0,    // jmp .PLT0@PC
234     };
235     memcpy(buf, inst, sizeof(inst));
236     write32le(buf + 2, sym.getGotPltVA() - in.gotPlt->getVA());
237   } else {
238     const uint8_t inst[] = {
239         0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT
240         0x68, 0,    0, 0, 0,    // pushl $reloc_offset
241         0xe9, 0,    0, 0, 0,    // jmp .PLT0@PC
242     };
243     memcpy(buf, inst, sizeof(inst));
244     write32le(buf + 2, sym.getGotPltVA());
245   }
246 
247   write32le(buf + 7, relOff);
248   write32le(buf + 12, in.plt->getVA() - pltEntryAddr - 16);
249 }
250 
251 int64_t X86::getImplicitAddend(const uint8_t *buf, RelType type) const {
252   switch (type) {
253   case R_386_8:
254   case R_386_PC8:
255     return SignExtend64<8>(*buf);
256   case R_386_16:
257   case R_386_PC16:
258     return SignExtend64<16>(read16le(buf));
259   case R_386_32:
260   case R_386_GLOB_DAT:
261   case R_386_GOT32:
262   case R_386_GOT32X:
263   case R_386_GOTOFF:
264   case R_386_GOTPC:
265   case R_386_IRELATIVE:
266   case R_386_PC32:
267   case R_386_PLT32:
268   case R_386_RELATIVE:
269   case R_386_TLS_GOTDESC:
270   case R_386_TLS_DESC_CALL:
271   case R_386_TLS_DTPMOD32:
272   case R_386_TLS_DTPOFF32:
273   case R_386_TLS_LDO_32:
274   case R_386_TLS_LDM:
275   case R_386_TLS_IE:
276   case R_386_TLS_IE_32:
277   case R_386_TLS_LE:
278   case R_386_TLS_LE_32:
279   case R_386_TLS_GD:
280   case R_386_TLS_GD_32:
281   case R_386_TLS_GOTIE:
282   case R_386_TLS_TPOFF:
283   case R_386_TLS_TPOFF32:
284     return SignExtend64<32>(read32le(buf));
285   case R_386_TLS_DESC:
286     return SignExtend64<32>(read32le(buf + 4));
287   case R_386_NONE:
288   case R_386_JUMP_SLOT:
289     // These relocations are defined as not having an implicit addend.
290     return 0;
291   default:
292     internalLinkerError(getErrorLocation(buf),
293                         "cannot read addend for relocation " + toString(type));
294     return 0;
295   }
296 }
297 
298 void X86::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
299   switch (rel.type) {
300   case R_386_8:
301     // R_386_{PC,}{8,16} are not part of the i386 psABI, but they are
302     // being used for some 16-bit programs such as boot loaders, so
303     // we want to support them.
304     checkIntUInt(loc, val, 8, rel);
305     *loc = val;
306     break;
307   case R_386_PC8:
308     checkInt(loc, val, 8, rel);
309     *loc = val;
310     break;
311   case R_386_16:
312     checkIntUInt(loc, val, 16, rel);
313     write16le(loc, val);
314     break;
315   case R_386_PC16:
316     // R_386_PC16 is normally used with 16 bit code. In that situation
317     // the PC is 16 bits, just like the addend. This means that it can
318     // point from any 16 bit address to any other if the possibility
319     // of wrapping is included.
320     // The only restriction we have to check then is that the destination
321     // address fits in 16 bits. That is impossible to do here. The problem is
322     // that we are passed the final value, which already had the
323     // current location subtracted from it.
324     // We just check that Val fits in 17 bits. This misses some cases, but
325     // should have no false positives.
326     checkInt(loc, val, 17, rel);
327     write16le(loc, val);
328     break;
329   case R_386_32:
330   case R_386_GOT32:
331   case R_386_GOT32X:
332   case R_386_GOTOFF:
333   case R_386_GOTPC:
334   case R_386_PC32:
335   case R_386_PLT32:
336   case R_386_RELATIVE:
337   case R_386_TLS_GOTDESC:
338   case R_386_TLS_DESC_CALL:
339   case R_386_TLS_DTPMOD32:
340   case R_386_TLS_DTPOFF32:
341   case R_386_TLS_GD:
342   case R_386_TLS_GOTIE:
343   case R_386_TLS_IE:
344   case R_386_TLS_LDM:
345   case R_386_TLS_LDO_32:
346   case R_386_TLS_LE:
347   case R_386_TLS_LE_32:
348   case R_386_TLS_TPOFF:
349   case R_386_TLS_TPOFF32:
350     checkInt(loc, val, 32, rel);
351     write32le(loc, val);
352     break;
353   case R_386_TLS_DESC:
354     // The addend is stored in the second 32-bit word.
355     write32le(loc + 4, val);
356     break;
357   default:
358     llvm_unreachable("unknown relocation");
359   }
360 }
361 
362 void X86::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
363                          uint64_t val) const {
364   if (rel.type == R_386_TLS_GD) {
365     // Convert
366     //   leal x@tlsgd(, %ebx, 1), %eax
367     //   call __tls_get_addr@plt
368     // to
369     //   movl %gs:0, %eax
370     //   subl $x@tpoff, %eax
371     const uint8_t inst[] = {
372         0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
373         0x81, 0xe8, 0,    0,    0,    0,    // subl val(%ebx), %eax
374     };
375     memcpy(loc - 3, inst, sizeof(inst));
376     write32le(loc + 5, val);
377   } else if (rel.type == R_386_TLS_GOTDESC) {
378     // Convert leal x@tlsdesc(%ebx), %eax to leal x@ntpoff, %eax.
379     //
380     // Note: call *x@tlsdesc(%eax) may not immediately follow this instruction.
381     if (memcmp(loc - 2, "\x8d\x83", 2)) {
382       error(getErrorLocation(loc - 2) +
383             "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax");
384       return;
385     }
386     loc[-1] = 0x05;
387     write32le(loc, val);
388   } else {
389     // Convert call *x@tlsdesc(%eax) to xchg ax, ax.
390     assert(rel.type == R_386_TLS_DESC_CALL);
391     loc[0] = 0x66;
392     loc[1] = 0x90;
393   }
394 }
395 
396 void X86::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
397                          uint64_t val) const {
398   if (rel.type == R_386_TLS_GD) {
399     // Convert
400     //   leal x@tlsgd(, %ebx, 1), %eax
401     //   call __tls_get_addr@plt
402     // to
403     //   movl %gs:0, %eax
404     //   addl x@gotntpoff(%ebx), %eax
405     const uint8_t inst[] = {
406         0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
407         0x03, 0x83, 0,    0,    0,    0,    // addl val(%ebx), %eax
408     };
409     memcpy(loc - 3, inst, sizeof(inst));
410     write32le(loc + 5, val);
411   } else if (rel.type == R_386_TLS_GOTDESC) {
412     // Convert leal x@tlsdesc(%ebx), %eax to movl x@gotntpoff(%ebx), %eax.
413     if (memcmp(loc - 2, "\x8d\x83", 2)) {
414       error(getErrorLocation(loc - 2) +
415             "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax");
416       return;
417     }
418     loc[-2] = 0x8b;
419     write32le(loc, val);
420   } else {
421     // Convert call *x@tlsdesc(%eax) to xchg ax, ax.
422     assert(rel.type == R_386_TLS_DESC_CALL);
423     loc[0] = 0x66;
424     loc[1] = 0x90;
425   }
426 }
427 
428 // In some conditions, relocations can be optimized to avoid using GOT.
429 // This function does that for Initial Exec to Local Exec case.
430 void X86::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
431                          uint64_t val) const {
432   // Ulrich's document section 6.2 says that @gotntpoff can
433   // be used with MOVL or ADDL instructions.
434   // @indntpoff is similar to @gotntpoff, but for use in
435   // position dependent code.
436   uint8_t reg = (loc[-1] >> 3) & 7;
437 
438   if (rel.type == R_386_TLS_IE) {
439     if (loc[-1] == 0xa1) {
440       // "movl foo@indntpoff,%eax" -> "movl $foo,%eax"
441       // This case is different from the generic case below because
442       // this is a 5 byte instruction while below is 6 bytes.
443       loc[-1] = 0xb8;
444     } else if (loc[-2] == 0x8b) {
445       // "movl foo@indntpoff,%reg" -> "movl $foo,%reg"
446       loc[-2] = 0xc7;
447       loc[-1] = 0xc0 | reg;
448     } else {
449       // "addl foo@indntpoff,%reg" -> "addl $foo,%reg"
450       loc[-2] = 0x81;
451       loc[-1] = 0xc0 | reg;
452     }
453   } else {
454     assert(rel.type == R_386_TLS_GOTIE);
455     if (loc[-2] == 0x8b) {
456       // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg"
457       loc[-2] = 0xc7;
458       loc[-1] = 0xc0 | reg;
459     } else {
460       // "addl foo@gotntpoff(%rip),%reg" -> "leal foo(%reg),%reg"
461       loc[-2] = 0x8d;
462       loc[-1] = 0x80 | (reg << 3) | reg;
463     }
464   }
465   write32le(loc, val);
466 }
467 
468 void X86::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
469                          uint64_t val) const {
470   if (rel.type == R_386_TLS_LDO_32) {
471     write32le(loc, val);
472     return;
473   }
474 
475   // Convert
476   //   leal foo(%reg),%eax
477   //   call ___tls_get_addr
478   // to
479   //   movl %gs:0,%eax
480   //   nop
481   //   leal 0(%esi,1),%esi
482   const uint8_t inst[] = {
483       0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
484       0x90,                               // nop
485       0x8d, 0x74, 0x26, 0x00,             // leal 0(%esi,1),%esi
486   };
487   memcpy(loc - 2, inst, sizeof(inst));
488 }
489 
490 // If Intel Indirect Branch Tracking is enabled, we have to emit special PLT
491 // entries containing endbr32 instructions. A PLT entry will be split into two
492 // parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt).
493 namespace {
494 class IntelIBT : public X86 {
495 public:
496   IntelIBT();
497   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
498   void writePlt(uint8_t *buf, const Symbol &sym,
499                 uint64_t pltEntryAddr) const override;
500   void writeIBTPlt(uint8_t *buf, size_t numEntries) const override;
501 
502   static const unsigned IBTPltHeaderSize = 16;
503 };
504 } // namespace
505 
506 IntelIBT::IntelIBT() { pltHeaderSize = 0; }
507 
508 void IntelIBT::writeGotPlt(uint8_t *buf, const Symbol &s) const {
509   uint64_t va =
510       in.ibtPlt->getVA() + IBTPltHeaderSize + s.pltIndex * pltEntrySize;
511   write32le(buf, va);
512 }
513 
514 void IntelIBT::writePlt(uint8_t *buf, const Symbol &sym,
515                         uint64_t /*pltEntryAddr*/) const {
516   if (config->isPic) {
517     const uint8_t inst[] = {
518         0xf3, 0x0f, 0x1e, 0xfb,       // endbr32
519         0xff, 0xa3, 0,    0,    0, 0, // jmp *name@GOT(%ebx)
520         0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
521     };
522     memcpy(buf, inst, sizeof(inst));
523     write32le(buf + 6, sym.getGotPltVA() - in.gotPlt->getVA());
524     return;
525   }
526 
527   const uint8_t inst[] = {
528       0xf3, 0x0f, 0x1e, 0xfb,       // endbr32
529       0xff, 0x25, 0,    0,    0, 0, // jmp *foo@GOT
530       0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
531   };
532   memcpy(buf, inst, sizeof(inst));
533   write32le(buf + 6, sym.getGotPltVA());
534 }
535 
536 void IntelIBT::writeIBTPlt(uint8_t *buf, size_t numEntries) const {
537   writePltHeader(buf);
538   buf += IBTPltHeaderSize;
539 
540   const uint8_t inst[] = {
541       0xf3, 0x0f, 0x1e, 0xfb,    // endbr32
542       0x68, 0,    0,    0,    0, // pushl $reloc_offset
543       0xe9, 0,    0,    0,    0, // jmpq .PLT0@PC
544       0x66, 0x90,                // nop
545   };
546 
547   for (size_t i = 0; i < numEntries; ++i) {
548     memcpy(buf, inst, sizeof(inst));
549     write32le(buf + 5, i * sizeof(object::ELF32LE::Rel));
550     write32le(buf + 10, -pltHeaderSize - sizeof(inst) * i - 30);
551     buf += sizeof(inst);
552   }
553 }
554 
555 namespace {
556 class RetpolinePic : public X86 {
557 public:
558   RetpolinePic();
559   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
560   void writePltHeader(uint8_t *buf) const override;
561   void writePlt(uint8_t *buf, const Symbol &sym,
562                 uint64_t pltEntryAddr) const override;
563 };
564 
565 class RetpolineNoPic : public X86 {
566 public:
567   RetpolineNoPic();
568   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
569   void writePltHeader(uint8_t *buf) const override;
570   void writePlt(uint8_t *buf, const Symbol &sym,
571                 uint64_t pltEntryAddr) const override;
572 };
573 } // namespace
574 
575 RetpolinePic::RetpolinePic() {
576   pltHeaderSize = 48;
577   pltEntrySize = 32;
578   ipltEntrySize = 32;
579 }
580 
581 void RetpolinePic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
582   write32le(buf, s.getPltVA() + 17);
583 }
584 
585 void RetpolinePic::writePltHeader(uint8_t *buf) const {
586   const uint8_t insn[] = {
587       0xff, 0xb3, 4,    0,    0,    0,          // 0:    pushl 4(%ebx)
588       0x50,                                     // 6:    pushl %eax
589       0x8b, 0x83, 8,    0,    0,    0,          // 7:    mov 8(%ebx), %eax
590       0xe8, 0x0e, 0x00, 0x00, 0x00,             // d:    call next
591       0xf3, 0x90,                               // 12: loop: pause
592       0x0f, 0xae, 0xe8,                         // 14:   lfence
593       0xeb, 0xf9,                               // 17:   jmp loop
594       0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19:   int3; .align 16
595       0x89, 0x0c, 0x24,                         // 20: next: mov %ecx, (%esp)
596       0x8b, 0x4c, 0x24, 0x04,                   // 23:   mov 0x4(%esp), %ecx
597       0x89, 0x44, 0x24, 0x04,                   // 27:   mov %eax ,0x4(%esp)
598       0x89, 0xc8,                               // 2b:   mov %ecx, %eax
599       0x59,                                     // 2d:   pop %ecx
600       0xc3,                                     // 2e:   ret
601       0xcc,                                     // 2f:   int3; padding
602   };
603   memcpy(buf, insn, sizeof(insn));
604 }
605 
606 void RetpolinePic::writePlt(uint8_t *buf, const Symbol &sym,
607                             uint64_t pltEntryAddr) const {
608   unsigned relOff = in.relaPlt->entsize * sym.pltIndex;
609   const uint8_t insn[] = {
610       0x50,                            // pushl %eax
611       0x8b, 0x83, 0,    0,    0,    0, // mov foo@GOT(%ebx), %eax
612       0xe8, 0,    0,    0,    0,       // call plt+0x20
613       0xe9, 0,    0,    0,    0,       // jmp plt+0x12
614       0x68, 0,    0,    0,    0,       // pushl $reloc_offset
615       0xe9, 0,    0,    0,    0,       // jmp plt+0
616       0xcc, 0xcc, 0xcc, 0xcc, 0xcc,    // int3; padding
617   };
618   memcpy(buf, insn, sizeof(insn));
619 
620   uint32_t ebx = in.gotPlt->getVA();
621   unsigned off = pltEntryAddr - in.plt->getVA();
622   write32le(buf + 3, sym.getGotPltVA() - ebx);
623   write32le(buf + 8, -off - 12 + 32);
624   write32le(buf + 13, -off - 17 + 18);
625   write32le(buf + 18, relOff);
626   write32le(buf + 23, -off - 27);
627 }
628 
629 RetpolineNoPic::RetpolineNoPic() {
630   pltHeaderSize = 48;
631   pltEntrySize = 32;
632   ipltEntrySize = 32;
633 }
634 
635 void RetpolineNoPic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
636   write32le(buf, s.getPltVA() + 16);
637 }
638 
639 void RetpolineNoPic::writePltHeader(uint8_t *buf) const {
640   const uint8_t insn[] = {
641       0xff, 0x35, 0,    0,    0,    0, // 0:    pushl GOTPLT+4
642       0x50,                            // 6:    pushl %eax
643       0xa1, 0,    0,    0,    0,       // 7:    mov GOTPLT+8, %eax
644       0xe8, 0x0f, 0x00, 0x00, 0x00,    // c:    call next
645       0xf3, 0x90,                      // 11: loop: pause
646       0x0f, 0xae, 0xe8,                // 13:   lfence
647       0xeb, 0xf9,                      // 16:   jmp loop
648       0xcc, 0xcc, 0xcc, 0xcc, 0xcc,    // 18:   int3
649       0xcc, 0xcc, 0xcc,                // 1f:   int3; .align 16
650       0x89, 0x0c, 0x24,                // 20: next: mov %ecx, (%esp)
651       0x8b, 0x4c, 0x24, 0x04,          // 23:   mov 0x4(%esp), %ecx
652       0x89, 0x44, 0x24, 0x04,          // 27:   mov %eax ,0x4(%esp)
653       0x89, 0xc8,                      // 2b:   mov %ecx, %eax
654       0x59,                            // 2d:   pop %ecx
655       0xc3,                            // 2e:   ret
656       0xcc,                            // 2f:   int3; padding
657   };
658   memcpy(buf, insn, sizeof(insn));
659 
660   uint32_t gotPlt = in.gotPlt->getVA();
661   write32le(buf + 2, gotPlt + 4);
662   write32le(buf + 8, gotPlt + 8);
663 }
664 
665 void RetpolineNoPic::writePlt(uint8_t *buf, const Symbol &sym,
666                               uint64_t pltEntryAddr) const {
667   unsigned relOff = in.relaPlt->entsize * sym.pltIndex;
668   const uint8_t insn[] = {
669       0x50,                         // 0:  pushl %eax
670       0xa1, 0,    0,    0,    0,    // 1:  mov foo_in_GOT, %eax
671       0xe8, 0,    0,    0,    0,    // 6:  call plt+0x20
672       0xe9, 0,    0,    0,    0,    // b:  jmp plt+0x11
673       0x68, 0,    0,    0,    0,    // 10: pushl $reloc_offset
674       0xe9, 0,    0,    0,    0,    // 15: jmp plt+0
675       0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a: int3; padding
676       0xcc,                         // 1f: int3; padding
677   };
678   memcpy(buf, insn, sizeof(insn));
679 
680   unsigned off = pltEntryAddr - in.plt->getVA();
681   write32le(buf + 2, sym.getGotPltVA());
682   write32le(buf + 7, -off - 11 + 32);
683   write32le(buf + 12, -off - 16 + 17);
684   write32le(buf + 17, relOff);
685   write32le(buf + 22, -off - 26);
686 }
687 
688 TargetInfo *elf::getX86TargetInfo() {
689   if (config->zRetpolineplt) {
690     if (config->isPic) {
691       static RetpolinePic t;
692       return &t;
693     }
694     static RetpolineNoPic t;
695     return &t;
696   }
697 
698   if (config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT) {
699     static IntelIBT t;
700     return &t;
701   }
702 
703   static X86 t;
704   return &t;
705 }
706