xref: /freebsd/contrib/llvm-project/lld/ELF/Arch/X86.cpp (revision 9c77fb6aaa366cbabc80ee1b834bcfe4df135491)
1 //===- X86.cpp ------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "OutputSections.h"
10 #include "Symbols.h"
11 #include "SyntheticSections.h"
12 #include "Target.h"
13 #include "llvm/Support/Endian.h"
14 
15 using namespace llvm;
16 using namespace llvm::support::endian;
17 using namespace llvm::ELF;
18 using namespace lld;
19 using namespace lld::elf;
20 
21 namespace {
22 class X86 : public TargetInfo {
23 public:
24   X86(Ctx &);
25   int getTlsGdRelaxSkip(RelType type) const override;
26   RelExpr getRelExpr(RelType type, const Symbol &s,
27                      const uint8_t *loc) const override;
28   int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
29   void writeGotPltHeader(uint8_t *buf) const override;
30   RelType getDynRel(RelType type) const override;
31   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
32   void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
33   void writePltHeader(uint8_t *buf) const override;
34   void writePlt(uint8_t *buf, const Symbol &sym,
35                 uint64_t pltEntryAddr) const override;
36   void relocate(uint8_t *loc, const Relocation &rel,
37                 uint64_t val) const override;
38 
39   RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
40   void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;
41 
42 private:
43   void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
44   void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
45   void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
46   void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
47 };
48 } // namespace
49 
50 X86::X86(Ctx &ctx) : TargetInfo(ctx) {
51   copyRel = R_386_COPY;
52   gotRel = R_386_GLOB_DAT;
53   pltRel = R_386_JUMP_SLOT;
54   iRelativeRel = R_386_IRELATIVE;
55   relativeRel = R_386_RELATIVE;
56   symbolicRel = R_386_32;
57   tlsDescRel = R_386_TLS_DESC;
58   tlsGotRel = R_386_TLS_TPOFF;
59   tlsModuleIndexRel = R_386_TLS_DTPMOD32;
60   tlsOffsetRel = R_386_TLS_DTPOFF32;
61   gotBaseSymInGotPlt = true;
62   pltHeaderSize = 16;
63   pltEntrySize = 16;
64   ipltEntrySize = 16;
65   trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3
66 
67   // Align to the non-PAE large page size (known as a superpage or huge page).
68   // FreeBSD automatically promotes large, superpage-aligned allocations.
69   defaultImageBase = 0x400000;
70 }
71 
72 int X86::getTlsGdRelaxSkip(RelType type) const {
73   // TLSDESC relocations are processed separately. See relaxTlsGdToLe below.
74   return type == R_386_TLS_GOTDESC || type == R_386_TLS_DESC_CALL ? 1 : 2;
75 }
76 
77 RelExpr X86::getRelExpr(RelType type, const Symbol &s,
78                         const uint8_t *loc) const {
79   switch (type) {
80   case R_386_8:
81   case R_386_16:
82   case R_386_32:
83     return R_ABS;
84   case R_386_TLS_LDO_32:
85     return R_DTPREL;
86   case R_386_TLS_GD:
87     return R_TLSGD_GOTPLT;
88   case R_386_TLS_LDM:
89     return R_TLSLD_GOTPLT;
90   case R_386_PLT32:
91     return R_PLT_PC;
92   case R_386_PC8:
93   case R_386_PC16:
94   case R_386_PC32:
95     return R_PC;
96   case R_386_GOTPC:
97     return R_GOTPLTONLY_PC;
98   case R_386_TLS_IE:
99     return R_GOT;
100   case R_386_GOT32:
101   case R_386_GOT32X:
102     // These relocations are arguably mis-designed because their calculations
103     // depend on the instructions they are applied to. This is bad because we
104     // usually don't care about whether the target section contains valid
105     // machine instructions or not. But this is part of the documented ABI, so
106     // we had to implement as the standard requires.
107     //
108     // x86 does not support PC-relative data access. Therefore, in order to
109     // access GOT contents, a GOT address needs to be known at link-time
110     // (which means non-PIC) or compilers have to emit code to get a GOT
111     // address at runtime (which means code is position-independent but
112     // compilers need to emit extra code for each GOT access.) This decision
113     // is made at compile-time. In the latter case, compilers emit code to
114     // load a GOT address to a register, which is usually %ebx.
115     //
116     // So, there are two ways to refer to symbol foo's GOT entry: foo@GOT or
117     // foo@GOT(%ebx).
118     //
119     // foo@GOT is not usable in PIC. If we are creating a PIC output and if we
120     // find such relocation, we should report an error. foo@GOT is resolved to
121     // an *absolute* address of foo's GOT entry, because both GOT address and
122     // foo's offset are known. In other words, it's G + A.
123     //
124     // foo@GOT(%ebx) needs to be resolved to a *relative* offset from a GOT to
125     // foo's GOT entry in the table, because GOT address is not known but foo's
126     // offset in the table is known. It's G + A - GOT.
127     //
128     // It's unfortunate that compilers emit the same relocation for these
129     // different use cases. In order to distinguish them, we have to read a
130     // machine instruction.
131     //
132     // The following code implements it. We assume that Loc[0] is the first byte
133     // of a displacement or an immediate field of a valid machine
134     // instruction. That means a ModRM byte is at Loc[-1]. By taking a look at
135     // the byte, we can determine whether the instruction uses the operand as an
136     // absolute address (R_GOT) or a register-relative address (R_GOTPLT).
137     return (loc[-1] & 0xc7) == 0x5 ? R_GOT : R_GOTPLT;
138   case R_386_TLS_GOTDESC:
139     return R_TLSDESC_GOTPLT;
140   case R_386_TLS_DESC_CALL:
141     return R_TLSDESC_CALL;
142   case R_386_TLS_GOTIE:
143     return R_GOTPLT;
144   case R_386_GOTOFF:
145     return R_GOTPLTREL;
146   case R_386_TLS_LE:
147     return R_TPREL;
148   case R_386_TLS_LE_32:
149     return R_TPREL_NEG;
150   case R_386_NONE:
151     return R_NONE;
152   default:
153     Err(ctx) << getErrorLoc(ctx, loc) << "unknown relocation (" << type.v
154              << ") against symbol " << &s;
155     return R_NONE;
156   }
157 }
158 
159 RelExpr X86::adjustTlsExpr(RelType type, RelExpr expr) const {
160   switch (expr) {
161   default:
162     return expr;
163   case R_RELAX_TLS_GD_TO_IE:
164     return R_RELAX_TLS_GD_TO_IE_GOTPLT;
165   case R_RELAX_TLS_GD_TO_LE:
166     return type == R_386_TLS_GD ? R_RELAX_TLS_GD_TO_LE_NEG
167                                 : R_RELAX_TLS_GD_TO_LE;
168   }
169 }
170 
171 void X86::writeGotPltHeader(uint8_t *buf) const {
172   write32le(buf, ctx.mainPart->dynamic->getVA());
173 }
174 
175 void X86::writeGotPlt(uint8_t *buf, const Symbol &s) const {
176   // Entries in .got.plt initially points back to the corresponding
177   // PLT entries with a fixed offset to skip the first instruction.
178   write32le(buf, s.getPltVA(ctx) + 6);
179 }
180 
181 void X86::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
182   // An x86 entry is the address of the ifunc resolver function.
183   write32le(buf, s.getVA(ctx));
184 }
185 
186 RelType X86::getDynRel(RelType type) const {
187   if (type == R_386_TLS_LE)
188     return R_386_TLS_TPOFF;
189   if (type == R_386_TLS_LE_32)
190     return R_386_TLS_TPOFF32;
191   return type;
192 }
193 
194 void X86::writePltHeader(uint8_t *buf) const {
195   if (ctx.arg.isPic) {
196     const uint8_t v[] = {
197         0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl 4(%ebx)
198         0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *8(%ebx)
199         0x90, 0x90, 0x90, 0x90              // nop
200     };
201     memcpy(buf, v, sizeof(v));
202     return;
203   }
204 
205   const uint8_t pltData[] = {
206       0xff, 0x35, 0, 0, 0, 0, // pushl (GOTPLT+4)
207       0xff, 0x25, 0, 0, 0, 0, // jmp *(GOTPLT+8)
208       0x90, 0x90, 0x90, 0x90, // nop
209   };
210   memcpy(buf, pltData, sizeof(pltData));
211   uint32_t gotPlt = ctx.in.gotPlt->getVA();
212   write32le(buf + 2, gotPlt + 4);
213   write32le(buf + 8, gotPlt + 8);
214 }
215 
216 void X86::writePlt(uint8_t *buf, const Symbol &sym,
217                    uint64_t pltEntryAddr) const {
218   unsigned relOff = ctx.in.relaPlt->entsize * sym.getPltIdx(ctx);
219   if (ctx.arg.isPic) {
220     const uint8_t inst[] = {
221         0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx)
222         0x68, 0,    0, 0, 0,    // pushl $reloc_offset
223         0xe9, 0,    0, 0, 0,    // jmp .PLT0@PC
224     };
225     memcpy(buf, inst, sizeof(inst));
226     write32le(buf + 2, sym.getGotPltVA(ctx) - ctx.in.gotPlt->getVA());
227   } else {
228     const uint8_t inst[] = {
229         0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT
230         0x68, 0,    0, 0, 0,    // pushl $reloc_offset
231         0xe9, 0,    0, 0, 0,    // jmp .PLT0@PC
232     };
233     memcpy(buf, inst, sizeof(inst));
234     write32le(buf + 2, sym.getGotPltVA(ctx));
235   }
236 
237   write32le(buf + 7, relOff);
238   write32le(buf + 12, ctx.in.plt->getVA() - pltEntryAddr - 16);
239 }
240 
241 int64_t X86::getImplicitAddend(const uint8_t *buf, RelType type) const {
242   switch (type) {
243   case R_386_8:
244   case R_386_PC8:
245     return SignExtend64<8>(*buf);
246   case R_386_16:
247   case R_386_PC16:
248     return SignExtend64<16>(read16le(buf));
249   case R_386_32:
250   case R_386_GLOB_DAT:
251   case R_386_GOT32:
252   case R_386_GOT32X:
253   case R_386_GOTOFF:
254   case R_386_GOTPC:
255   case R_386_IRELATIVE:
256   case R_386_PC32:
257   case R_386_PLT32:
258   case R_386_RELATIVE:
259   case R_386_TLS_GOTDESC:
260   case R_386_TLS_DESC_CALL:
261   case R_386_TLS_DTPMOD32:
262   case R_386_TLS_DTPOFF32:
263   case R_386_TLS_LDO_32:
264   case R_386_TLS_LDM:
265   case R_386_TLS_IE:
266   case R_386_TLS_IE_32:
267   case R_386_TLS_LE:
268   case R_386_TLS_LE_32:
269   case R_386_TLS_GD:
270   case R_386_TLS_GD_32:
271   case R_386_TLS_GOTIE:
272   case R_386_TLS_TPOFF:
273   case R_386_TLS_TPOFF32:
274     return SignExtend64<32>(read32le(buf));
275   case R_386_TLS_DESC:
276     return SignExtend64<32>(read32le(buf + 4));
277   case R_386_NONE:
278   case R_386_JUMP_SLOT:
279     // These relocations are defined as not having an implicit addend.
280     return 0;
281   default:
282     InternalErr(ctx, buf) << "cannot read addend for relocation " << type;
283     return 0;
284   }
285 }
286 
287 void X86::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
288   switch (rel.type) {
289   case R_386_8:
290     // R_386_{PC,}{8,16} are not part of the i386 psABI, but they are
291     // being used for some 16-bit programs such as boot loaders, so
292     // we want to support them.
293     checkIntUInt(ctx, loc, val, 8, rel);
294     *loc = val;
295     break;
296   case R_386_PC8:
297     checkInt(ctx, loc, val, 8, rel);
298     *loc = val;
299     break;
300   case R_386_16:
301     checkIntUInt(ctx, loc, val, 16, rel);
302     write16le(loc, val);
303     break;
304   case R_386_PC16:
305     // R_386_PC16 is normally used with 16 bit code. In that situation
306     // the PC is 16 bits, just like the addend. This means that it can
307     // point from any 16 bit address to any other if the possibility
308     // of wrapping is included.
309     // The only restriction we have to check then is that the destination
310     // address fits in 16 bits. That is impossible to do here. The problem is
311     // that we are passed the final value, which already had the
312     // current location subtracted from it.
313     // We just check that Val fits in 17 bits. This misses some cases, but
314     // should have no false positives.
315     checkInt(ctx, loc, val, 17, rel);
316     write16le(loc, val);
317     break;
318   case R_386_32:
319   case R_386_GOT32:
320   case R_386_GOT32X:
321   case R_386_GOTOFF:
322   case R_386_GOTPC:
323   case R_386_PC32:
324   case R_386_PLT32:
325   case R_386_RELATIVE:
326   case R_386_TLS_GOTDESC:
327   case R_386_TLS_DESC_CALL:
328   case R_386_TLS_DTPMOD32:
329   case R_386_TLS_DTPOFF32:
330   case R_386_TLS_GD:
331   case R_386_TLS_GOTIE:
332   case R_386_TLS_IE:
333   case R_386_TLS_LDM:
334   case R_386_TLS_LDO_32:
335   case R_386_TLS_LE:
336   case R_386_TLS_LE_32:
337   case R_386_TLS_TPOFF:
338   case R_386_TLS_TPOFF32:
339     checkInt(ctx, loc, val, 32, rel);
340     write32le(loc, val);
341     break;
342   case R_386_TLS_DESC:
343     // The addend is stored in the second 32-bit word.
344     write32le(loc + 4, val);
345     break;
346   default:
347     llvm_unreachable("unknown relocation");
348   }
349 }
350 
351 void X86::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
352                          uint64_t val) const {
353   if (rel.type == R_386_TLS_GD) {
354     // Convert (loc[-2] == 0x04)
355     //   leal x@tlsgd(, %ebx, 1), %eax
356     //   call ___tls_get_addr@plt
357     // or
358     //   leal x@tlsgd(%reg), %eax
359     //   call *___tls_get_addr@got(%reg)
360     // to
361     const uint8_t inst[] = {
362         0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
363         0x81, 0xe8, 0,    0,    0,    0,    // subl x@ntpoff(%ebx), %eax
364     };
365     uint8_t *w = loc[-2] == 0x04 ? loc - 3 : loc - 2;
366     memcpy(w, inst, sizeof(inst));
367     write32le(w + 8, val);
368   } else if (rel.type == R_386_TLS_GOTDESC) {
369     // Convert leal x@tlsdesc(%ebx), %eax to leal x@ntpoff, %eax.
370     //
371     // Note: call *x@tlsdesc(%eax) may not immediately follow this instruction.
372     if (memcmp(loc - 2, "\x8d\x83", 2)) {
373       ErrAlways(ctx)
374           << getErrorLoc(ctx, loc - 2)
375           << "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax";
376       return;
377     }
378     loc[-1] = 0x05;
379     write32le(loc, val);
380   } else {
381     // Convert call *x@tlsdesc(%eax) to xchg ax, ax.
382     assert(rel.type == R_386_TLS_DESC_CALL);
383     loc[0] = 0x66;
384     loc[1] = 0x90;
385   }
386 }
387 
388 void X86::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
389                          uint64_t val) const {
390   if (rel.type == R_386_TLS_GD) {
391     // Convert (loc[-2] == 0x04)
392     //   leal x@tlsgd(, %ebx, 1), %eax
393     //   call ___tls_get_addr@plt
394     // or
395     //   leal x@tlsgd(%reg), %eax
396     //   call *___tls_get_addr@got(%reg)
397     const uint8_t inst[] = {
398         0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
399         0x03, 0x83, 0,    0,    0,    0,    // addl x@gottpoff(%ebx), %eax
400     };
401     uint8_t *w = loc[-2] == 0x04 ? loc - 3 : loc - 2;
402     memcpy(w, inst, sizeof(inst));
403     write32le(w + 8, val);
404   } else if (rel.type == R_386_TLS_GOTDESC) {
405     // Convert leal x@tlsdesc(%ebx), %eax to movl x@gotntpoff(%ebx), %eax.
406     if (memcmp(loc - 2, "\x8d\x83", 2)) {
407       ErrAlways(ctx)
408           << getErrorLoc(ctx, loc - 2)
409           << "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax";
410       return;
411     }
412     loc[-2] = 0x8b;
413     write32le(loc, val);
414   } else {
415     // Convert call *x@tlsdesc(%eax) to xchg ax, ax.
416     assert(rel.type == R_386_TLS_DESC_CALL);
417     loc[0] = 0x66;
418     loc[1] = 0x90;
419   }
420 }
421 
422 // In some conditions, relocations can be optimized to avoid using GOT.
423 // This function does that for Initial Exec to Local Exec case.
424 void X86::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
425                          uint64_t val) const {
426   // Ulrich's document section 6.2 says that @gotntpoff can
427   // be used with MOVL or ADDL instructions.
428   // @indntpoff is similar to @gotntpoff, but for use in
429   // position dependent code.
430   uint8_t reg = (loc[-1] >> 3) & 7;
431 
432   if (rel.type == R_386_TLS_IE) {
433     if (loc[-1] == 0xa1) {
434       // "movl foo@indntpoff,%eax" -> "movl $foo,%eax"
435       // This case is different from the generic case below because
436       // this is a 5 byte instruction while below is 6 bytes.
437       loc[-1] = 0xb8;
438     } else if (loc[-2] == 0x8b) {
439       // "movl foo@indntpoff,%reg" -> "movl $foo,%reg"
440       loc[-2] = 0xc7;
441       loc[-1] = 0xc0 | reg;
442     } else {
443       // "addl foo@indntpoff,%reg" -> "addl $foo,%reg"
444       loc[-2] = 0x81;
445       loc[-1] = 0xc0 | reg;
446     }
447   } else {
448     assert(rel.type == R_386_TLS_GOTIE);
449     if (loc[-2] == 0x8b) {
450       // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg"
451       loc[-2] = 0xc7;
452       loc[-1] = 0xc0 | reg;
453     } else {
454       // "addl foo@gotntpoff(%rip),%reg" -> "leal foo(%reg),%reg"
455       loc[-2] = 0x8d;
456       loc[-1] = 0x80 | (reg << 3) | reg;
457     }
458   }
459   write32le(loc, val);
460 }
461 
462 void X86::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
463                          uint64_t val) const {
464   if (rel.type == R_386_TLS_LDO_32) {
465     write32le(loc, val);
466     return;
467   }
468 
469   if (loc[4] == 0xe8) {
470     // Convert
471     //   leal x(%reg),%eax
472     //   call ___tls_get_addr@plt
473     // to
474     const uint8_t inst[] = {
475         0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
476         0x90,                               // nop
477         0x8d, 0x74, 0x26, 0x00,             // leal 0(%esi,1),%esi
478     };
479     memcpy(loc - 2, inst, sizeof(inst));
480     return;
481   }
482 
483   // Convert
484   //   leal x(%reg),%eax
485   //   call *___tls_get_addr@got(%reg)
486   // to
487   const uint8_t inst[] = {
488       0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
489       0x8d, 0xb6, 0x00, 0x00, 0x00, 0x00, // leal (%esi),%esi
490   };
491   memcpy(loc - 2, inst, sizeof(inst));
492 }
493 
494 void X86::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
495   uint64_t secAddr = sec.getOutputSection()->addr;
496   if (auto *s = dyn_cast<InputSection>(&sec))
497     secAddr += s->outSecOff;
498   for (const Relocation &rel : sec.relocs()) {
499     uint8_t *loc = buf + rel.offset;
500     const uint64_t val =
501         SignExtend64(sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset), 32);
502     switch (rel.expr) {
503     case R_RELAX_TLS_GD_TO_IE_GOTPLT:
504       relaxTlsGdToIe(loc, rel, val);
505       continue;
506     case R_RELAX_TLS_GD_TO_LE:
507     case R_RELAX_TLS_GD_TO_LE_NEG:
508       relaxTlsGdToLe(loc, rel, val);
509       continue;
510     case R_RELAX_TLS_LD_TO_LE:
511       relaxTlsLdToLe(loc, rel, val);
512       break;
513     case R_RELAX_TLS_IE_TO_LE:
514       relaxTlsIeToLe(loc, rel, val);
515       continue;
516     default:
517       relocate(loc, rel, val);
518       break;
519     }
520   }
521 }
522 
523 // If Intel Indirect Branch Tracking is enabled, we have to emit special PLT
524 // entries containing endbr32 instructions. A PLT entry will be split into two
525 // parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt).
526 namespace {
527 class IntelIBT : public X86 {
528 public:
529   IntelIBT(Ctx &ctx) : X86(ctx) { pltHeaderSize = 0; }
530   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
531   void writePlt(uint8_t *buf, const Symbol &sym,
532                 uint64_t pltEntryAddr) const override;
533   void writeIBTPlt(uint8_t *buf, size_t numEntries) const override;
534 
535   static const unsigned IBTPltHeaderSize = 16;
536 };
537 } // namespace
538 
539 void IntelIBT::writeGotPlt(uint8_t *buf, const Symbol &s) const {
540   uint64_t va = ctx.in.ibtPlt->getVA() + IBTPltHeaderSize +
541                 s.getPltIdx(ctx) * pltEntrySize;
542   write32le(buf, va);
543 }
544 
545 void IntelIBT::writePlt(uint8_t *buf, const Symbol &sym,
546                         uint64_t /*pltEntryAddr*/) const {
547   if (ctx.arg.isPic) {
548     const uint8_t inst[] = {
549         0xf3, 0x0f, 0x1e, 0xfb,       // endbr32
550         0xff, 0xa3, 0,    0,    0, 0, // jmp *name@GOT(%ebx)
551         0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
552     };
553     memcpy(buf, inst, sizeof(inst));
554     write32le(buf + 6, sym.getGotPltVA(ctx) - ctx.in.gotPlt->getVA());
555     return;
556   }
557 
558   const uint8_t inst[] = {
559       0xf3, 0x0f, 0x1e, 0xfb,       // endbr32
560       0xff, 0x25, 0,    0,    0, 0, // jmp *foo@GOT
561       0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
562   };
563   memcpy(buf, inst, sizeof(inst));
564   write32le(buf + 6, sym.getGotPltVA(ctx));
565 }
566 
567 void IntelIBT::writeIBTPlt(uint8_t *buf, size_t numEntries) const {
568   writePltHeader(buf);
569   buf += IBTPltHeaderSize;
570 
571   const uint8_t inst[] = {
572       0xf3, 0x0f, 0x1e, 0xfb,    // endbr32
573       0x68, 0,    0,    0,    0, // pushl $reloc_offset
574       0xe9, 0,    0,    0,    0, // jmpq .PLT0@PC
575       0x66, 0x90,                // nop
576   };
577 
578   for (size_t i = 0; i < numEntries; ++i) {
579     memcpy(buf, inst, sizeof(inst));
580     write32le(buf + 5, i * sizeof(object::ELF32LE::Rel));
581     write32le(buf + 10, -pltHeaderSize - sizeof(inst) * i - 30);
582     buf += sizeof(inst);
583   }
584 }
585 
586 namespace {
587 class RetpolinePic : public X86 {
588 public:
589   RetpolinePic(Ctx &);
590   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
591   void writePltHeader(uint8_t *buf) const override;
592   void writePlt(uint8_t *buf, const Symbol &sym,
593                 uint64_t pltEntryAddr) const override;
594 };
595 
596 class RetpolineNoPic : public X86 {
597 public:
598   RetpolineNoPic(Ctx &);
599   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
600   void writePltHeader(uint8_t *buf) const override;
601   void writePlt(uint8_t *buf, const Symbol &sym,
602                 uint64_t pltEntryAddr) const override;
603 };
604 } // namespace
605 
606 RetpolinePic::RetpolinePic(Ctx &ctx) : X86(ctx) {
607   pltHeaderSize = 48;
608   pltEntrySize = 32;
609   ipltEntrySize = 32;
610 }
611 
612 void RetpolinePic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
613   write32le(buf, s.getPltVA(ctx) + 17);
614 }
615 
616 void RetpolinePic::writePltHeader(uint8_t *buf) const {
617   const uint8_t insn[] = {
618       0xff, 0xb3, 4,    0,    0,    0,          // 0:    pushl 4(%ebx)
619       0x50,                                     // 6:    pushl %eax
620       0x8b, 0x83, 8,    0,    0,    0,          // 7:    mov 8(%ebx), %eax
621       0xe8, 0x0e, 0x00, 0x00, 0x00,             // d:    call next
622       0xf3, 0x90,                               // 12: loop: pause
623       0x0f, 0xae, 0xe8,                         // 14:   lfence
624       0xeb, 0xf9,                               // 17:   jmp loop
625       0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19:   int3; .align 16
626       0x89, 0x0c, 0x24,                         // 20: next: mov %ecx, (%esp)
627       0x8b, 0x4c, 0x24, 0x04,                   // 23:   mov 0x4(%esp), %ecx
628       0x89, 0x44, 0x24, 0x04,                   // 27:   mov %eax ,0x4(%esp)
629       0x89, 0xc8,                               // 2b:   mov %ecx, %eax
630       0x59,                                     // 2d:   pop %ecx
631       0xc3,                                     // 2e:   ret
632       0xcc,                                     // 2f:   int3; padding
633   };
634   memcpy(buf, insn, sizeof(insn));
635 }
636 
637 void RetpolinePic::writePlt(uint8_t *buf, const Symbol &sym,
638                             uint64_t pltEntryAddr) const {
639   unsigned relOff = ctx.in.relaPlt->entsize * sym.getPltIdx(ctx);
640   const uint8_t insn[] = {
641       0x50,                            // pushl %eax
642       0x8b, 0x83, 0,    0,    0,    0, // mov foo@GOT(%ebx), %eax
643       0xe8, 0,    0,    0,    0,       // call plt+0x20
644       0xe9, 0,    0,    0,    0,       // jmp plt+0x12
645       0x68, 0,    0,    0,    0,       // pushl $reloc_offset
646       0xe9, 0,    0,    0,    0,       // jmp plt+0
647       0xcc, 0xcc, 0xcc, 0xcc, 0xcc,    // int3; padding
648   };
649   memcpy(buf, insn, sizeof(insn));
650 
651   uint32_t ebx = ctx.in.gotPlt->getVA();
652   unsigned off = pltEntryAddr - ctx.in.plt->getVA();
653   write32le(buf + 3, sym.getGotPltVA(ctx) - ebx);
654   write32le(buf + 8, -off - 12 + 32);
655   write32le(buf + 13, -off - 17 + 18);
656   write32le(buf + 18, relOff);
657   write32le(buf + 23, -off - 27);
658 }
659 
660 RetpolineNoPic::RetpolineNoPic(Ctx &ctx) : X86(ctx) {
661   pltHeaderSize = 48;
662   pltEntrySize = 32;
663   ipltEntrySize = 32;
664 }
665 
666 void RetpolineNoPic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
667   write32le(buf, s.getPltVA(ctx) + 16);
668 }
669 
670 void RetpolineNoPic::writePltHeader(uint8_t *buf) const {
671   const uint8_t insn[] = {
672       0xff, 0x35, 0,    0,    0,    0, // 0:    pushl GOTPLT+4
673       0x50,                            // 6:    pushl %eax
674       0xa1, 0,    0,    0,    0,       // 7:    mov GOTPLT+8, %eax
675       0xe8, 0x0f, 0x00, 0x00, 0x00,    // c:    call next
676       0xf3, 0x90,                      // 11: loop: pause
677       0x0f, 0xae, 0xe8,                // 13:   lfence
678       0xeb, 0xf9,                      // 16:   jmp loop
679       0xcc, 0xcc, 0xcc, 0xcc, 0xcc,    // 18:   int3
680       0xcc, 0xcc, 0xcc,                // 1f:   int3; .align 16
681       0x89, 0x0c, 0x24,                // 20: next: mov %ecx, (%esp)
682       0x8b, 0x4c, 0x24, 0x04,          // 23:   mov 0x4(%esp), %ecx
683       0x89, 0x44, 0x24, 0x04,          // 27:   mov %eax ,0x4(%esp)
684       0x89, 0xc8,                      // 2b:   mov %ecx, %eax
685       0x59,                            // 2d:   pop %ecx
686       0xc3,                            // 2e:   ret
687       0xcc,                            // 2f:   int3; padding
688   };
689   memcpy(buf, insn, sizeof(insn));
690 
691   uint32_t gotPlt = ctx.in.gotPlt->getVA();
692   write32le(buf + 2, gotPlt + 4);
693   write32le(buf + 8, gotPlt + 8);
694 }
695 
696 void RetpolineNoPic::writePlt(uint8_t *buf, const Symbol &sym,
697                               uint64_t pltEntryAddr) const {
698   unsigned relOff = ctx.in.relaPlt->entsize * sym.getPltIdx(ctx);
699   const uint8_t insn[] = {
700       0x50,                         // 0:  pushl %eax
701       0xa1, 0,    0,    0,    0,    // 1:  mov foo_in_GOT, %eax
702       0xe8, 0,    0,    0,    0,    // 6:  call plt+0x20
703       0xe9, 0,    0,    0,    0,    // b:  jmp plt+0x11
704       0x68, 0,    0,    0,    0,    // 10: pushl $reloc_offset
705       0xe9, 0,    0,    0,    0,    // 15: jmp plt+0
706       0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a: int3; padding
707       0xcc,                         // 1f: int3; padding
708   };
709   memcpy(buf, insn, sizeof(insn));
710 
711   unsigned off = pltEntryAddr - ctx.in.plt->getVA();
712   write32le(buf + 2, sym.getGotPltVA(ctx));
713   write32le(buf + 7, -off - 11 + 32);
714   write32le(buf + 12, -off - 16 + 17);
715   write32le(buf + 17, relOff);
716   write32le(buf + 22, -off - 26);
717 }
718 
719 void elf::setX86TargetInfo(Ctx &ctx) {
720   if (ctx.arg.zRetpolineplt) {
721     if (ctx.arg.isPic)
722       ctx.target.reset(new RetpolinePic(ctx));
723     else
724       ctx.target.reset(new RetpolineNoPic(ctx));
725     return;
726   }
727 
728   if (ctx.arg.andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT)
729     ctx.target.reset(new IntelIBT(ctx));
730   else
731     ctx.target.reset(new X86(ctx));
732 }
733