xref: /freebsd/contrib/llvm-project/lld/ELF/Arch/X86.cpp (revision bdd1243df58e60e85101c09001d9812a789b6bc4)
1 //===- X86.cpp ------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "OutputSections.h"
10 #include "Symbols.h"
11 #include "SyntheticSections.h"
12 #include "Target.h"
13 #include "lld/Common/ErrorHandler.h"
14 #include "llvm/Support/Endian.h"
15 
16 using namespace llvm;
17 using namespace llvm::support::endian;
18 using namespace llvm::ELF;
19 using namespace lld;
20 using namespace lld::elf;
21 
22 namespace {
23 class X86 : public TargetInfo {
24 public:
25   X86();
26   int getTlsGdRelaxSkip(RelType type) const override;
27   RelExpr getRelExpr(RelType type, const Symbol &s,
28                      const uint8_t *loc) const override;
29   int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
30   void writeGotPltHeader(uint8_t *buf) const override;
31   RelType getDynRel(RelType type) const override;
32   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
33   void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
34   void writePltHeader(uint8_t *buf) const override;
35   void writePlt(uint8_t *buf, const Symbol &sym,
36                 uint64_t pltEntryAddr) const override;
37   void relocate(uint8_t *loc, const Relocation &rel,
38                 uint64_t val) const override;
39 
40   RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
41   void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;
42 };
43 } // namespace
44 
X86()45 X86::X86() {
46   copyRel = R_386_COPY;
47   gotRel = R_386_GLOB_DAT;
48   pltRel = R_386_JUMP_SLOT;
49   iRelativeRel = R_386_IRELATIVE;
50   relativeRel = R_386_RELATIVE;
51   symbolicRel = R_386_32;
52   tlsDescRel = R_386_TLS_DESC;
53   tlsGotRel = R_386_TLS_TPOFF;
54   tlsModuleIndexRel = R_386_TLS_DTPMOD32;
55   tlsOffsetRel = R_386_TLS_DTPOFF32;
56   gotBaseSymInGotPlt = true;
57   pltHeaderSize = 16;
58   pltEntrySize = 16;
59   ipltEntrySize = 16;
60   trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3
61 
62   // Align to the non-PAE large page size (known as a superpage or huge page).
63   // FreeBSD automatically promotes large, superpage-aligned allocations.
64   defaultImageBase = 0x400000;
65 }
66 
getTlsGdRelaxSkip(RelType type) const67 int X86::getTlsGdRelaxSkip(RelType type) const {
68   // TLSDESC relocations are processed separately. See relaxTlsGdToLe below.
69   return type == R_386_TLS_GOTDESC || type == R_386_TLS_DESC_CALL ? 1 : 2;
70 }
71 
getRelExpr(RelType type,const Symbol & s,const uint8_t * loc) const72 RelExpr X86::getRelExpr(RelType type, const Symbol &s,
73                         const uint8_t *loc) const {
74   switch (type) {
75   case R_386_8:
76   case R_386_16:
77   case R_386_32:
78     return R_ABS;
79   case R_386_TLS_LDO_32:
80     return R_DTPREL;
81   case R_386_TLS_GD:
82     return R_TLSGD_GOTPLT;
83   case R_386_TLS_LDM:
84     return R_TLSLD_GOTPLT;
85   case R_386_PLT32:
86     return R_PLT_PC;
87   case R_386_PC8:
88   case R_386_PC16:
89   case R_386_PC32:
90     return R_PC;
91   case R_386_GOTPC:
92     return R_GOTPLTONLY_PC;
93   case R_386_TLS_IE:
94     return R_GOT;
95   case R_386_GOT32:
96   case R_386_GOT32X:
97     // These relocations are arguably mis-designed because their calculations
98     // depend on the instructions they are applied to. This is bad because we
99     // usually don't care about whether the target section contains valid
100     // machine instructions or not. But this is part of the documented ABI, so
101     // we had to implement as the standard requires.
102     //
103     // x86 does not support PC-relative data access. Therefore, in order to
104     // access GOT contents, a GOT address needs to be known at link-time
105     // (which means non-PIC) or compilers have to emit code to get a GOT
106     // address at runtime (which means code is position-independent but
107     // compilers need to emit extra code for each GOT access.) This decision
108     // is made at compile-time. In the latter case, compilers emit code to
109     // load a GOT address to a register, which is usually %ebx.
110     //
111     // So, there are two ways to refer to symbol foo's GOT entry: foo@GOT or
112     // foo@GOT(%ebx).
113     //
114     // foo@GOT is not usable in PIC. If we are creating a PIC output and if we
115     // find such relocation, we should report an error. foo@GOT is resolved to
116     // an *absolute* address of foo's GOT entry, because both GOT address and
117     // foo's offset are known. In other words, it's G + A.
118     //
119     // foo@GOT(%ebx) needs to be resolved to a *relative* offset from a GOT to
120     // foo's GOT entry in the table, because GOT address is not known but foo's
121     // offset in the table is known. It's G + A - GOT.
122     //
123     // It's unfortunate that compilers emit the same relocation for these
124     // different use cases. In order to distinguish them, we have to read a
125     // machine instruction.
126     //
127     // The following code implements it. We assume that Loc[0] is the first byte
128     // of a displacement or an immediate field of a valid machine
129     // instruction. That means a ModRM byte is at Loc[-1]. By taking a look at
130     // the byte, we can determine whether the instruction uses the operand as an
131     // absolute address (R_GOT) or a register-relative address (R_GOTPLT).
132     return (loc[-1] & 0xc7) == 0x5 ? R_GOT : R_GOTPLT;
133   case R_386_TLS_GOTDESC:
134     return R_TLSDESC_GOTPLT;
135   case R_386_TLS_DESC_CALL:
136     return R_TLSDESC_CALL;
137   case R_386_TLS_GOTIE:
138     return R_GOTPLT;
139   case R_386_GOTOFF:
140     return R_GOTPLTREL;
141   case R_386_TLS_LE:
142     return R_TPREL;
143   case R_386_TLS_LE_32:
144     return R_TPREL_NEG;
145   case R_386_NONE:
146     return R_NONE;
147   default:
148     error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +
149           ") against symbol " + toString(s));
150     return R_NONE;
151   }
152 }
153 
adjustTlsExpr(RelType type,RelExpr expr) const154 RelExpr X86::adjustTlsExpr(RelType type, RelExpr expr) const {
155   switch (expr) {
156   default:
157     return expr;
158   case R_RELAX_TLS_GD_TO_IE:
159     return R_RELAX_TLS_GD_TO_IE_GOTPLT;
160   case R_RELAX_TLS_GD_TO_LE:
161     return type == R_386_TLS_GD ? R_RELAX_TLS_GD_TO_LE_NEG
162                                 : R_RELAX_TLS_GD_TO_LE;
163   }
164 }
165 
writeGotPltHeader(uint8_t * buf) const166 void X86::writeGotPltHeader(uint8_t *buf) const {
167   write32le(buf, mainPart->dynamic->getVA());
168 }
169 
writeGotPlt(uint8_t * buf,const Symbol & s) const170 void X86::writeGotPlt(uint8_t *buf, const Symbol &s) const {
171   // Entries in .got.plt initially points back to the corresponding
172   // PLT entries with a fixed offset to skip the first instruction.
173   write32le(buf, s.getPltVA() + 6);
174 }
175 
writeIgotPlt(uint8_t * buf,const Symbol & s) const176 void X86::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
177   // An x86 entry is the address of the ifunc resolver function.
178   write32le(buf, s.getVA());
179 }
180 
getDynRel(RelType type) const181 RelType X86::getDynRel(RelType type) const {
182   if (type == R_386_TLS_LE)
183     return R_386_TLS_TPOFF;
184   if (type == R_386_TLS_LE_32)
185     return R_386_TLS_TPOFF32;
186   return type;
187 }
188 
writePltHeader(uint8_t * buf) const189 void X86::writePltHeader(uint8_t *buf) const {
190   if (config->isPic) {
191     const uint8_t v[] = {
192         0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl 4(%ebx)
193         0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *8(%ebx)
194         0x90, 0x90, 0x90, 0x90              // nop
195     };
196     memcpy(buf, v, sizeof(v));
197     return;
198   }
199 
200   const uint8_t pltData[] = {
201       0xff, 0x35, 0, 0, 0, 0, // pushl (GOTPLT+4)
202       0xff, 0x25, 0, 0, 0, 0, // jmp *(GOTPLT+8)
203       0x90, 0x90, 0x90, 0x90, // nop
204   };
205   memcpy(buf, pltData, sizeof(pltData));
206   uint32_t gotPlt = in.gotPlt->getVA();
207   write32le(buf + 2, gotPlt + 4);
208   write32le(buf + 8, gotPlt + 8);
209 }
210 
writePlt(uint8_t * buf,const Symbol & sym,uint64_t pltEntryAddr) const211 void X86::writePlt(uint8_t *buf, const Symbol &sym,
212                    uint64_t pltEntryAddr) const {
213   unsigned relOff = in.relaPlt->entsize * sym.getPltIdx();
214   if (config->isPic) {
215     const uint8_t inst[] = {
216         0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx)
217         0x68, 0,    0, 0, 0,    // pushl $reloc_offset
218         0xe9, 0,    0, 0, 0,    // jmp .PLT0@PC
219     };
220     memcpy(buf, inst, sizeof(inst));
221     write32le(buf + 2, sym.getGotPltVA() - in.gotPlt->getVA());
222   } else {
223     const uint8_t inst[] = {
224         0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT
225         0x68, 0,    0, 0, 0,    // pushl $reloc_offset
226         0xe9, 0,    0, 0, 0,    // jmp .PLT0@PC
227     };
228     memcpy(buf, inst, sizeof(inst));
229     write32le(buf + 2, sym.getGotPltVA());
230   }
231 
232   write32le(buf + 7, relOff);
233   write32le(buf + 12, in.plt->getVA() - pltEntryAddr - 16);
234 }
235 
getImplicitAddend(const uint8_t * buf,RelType type) const236 int64_t X86::getImplicitAddend(const uint8_t *buf, RelType type) const {
237   switch (type) {
238   case R_386_8:
239   case R_386_PC8:
240     return SignExtend64<8>(*buf);
241   case R_386_16:
242   case R_386_PC16:
243     return SignExtend64<16>(read16le(buf));
244   case R_386_32:
245   case R_386_GLOB_DAT:
246   case R_386_GOT32:
247   case R_386_GOT32X:
248   case R_386_GOTOFF:
249   case R_386_GOTPC:
250   case R_386_IRELATIVE:
251   case R_386_PC32:
252   case R_386_PLT32:
253   case R_386_RELATIVE:
254   case R_386_TLS_GOTDESC:
255   case R_386_TLS_DESC_CALL:
256   case R_386_TLS_DTPMOD32:
257   case R_386_TLS_DTPOFF32:
258   case R_386_TLS_LDO_32:
259   case R_386_TLS_LDM:
260   case R_386_TLS_IE:
261   case R_386_TLS_IE_32:
262   case R_386_TLS_LE:
263   case R_386_TLS_LE_32:
264   case R_386_TLS_GD:
265   case R_386_TLS_GD_32:
266   case R_386_TLS_GOTIE:
267   case R_386_TLS_TPOFF:
268   case R_386_TLS_TPOFF32:
269     return SignExtend64<32>(read32le(buf));
270   case R_386_TLS_DESC:
271     return SignExtend64<32>(read32le(buf + 4));
272   case R_386_NONE:
273   case R_386_JUMP_SLOT:
274     // These relocations are defined as not having an implicit addend.
275     return 0;
276   default:
277     internalLinkerError(getErrorLocation(buf),
278                         "cannot read addend for relocation " + toString(type));
279     return 0;
280   }
281 }
282 
relocate(uint8_t * loc,const Relocation & rel,uint64_t val) const283 void X86::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
284   switch (rel.type) {
285   case R_386_8:
286     // R_386_{PC,}{8,16} are not part of the i386 psABI, but they are
287     // being used for some 16-bit programs such as boot loaders, so
288     // we want to support them.
289     checkIntUInt(loc, val, 8, rel);
290     *loc = val;
291     break;
292   case R_386_PC8:
293     checkInt(loc, val, 8, rel);
294     *loc = val;
295     break;
296   case R_386_16:
297     checkIntUInt(loc, val, 16, rel);
298     write16le(loc, val);
299     break;
300   case R_386_PC16:
301     // R_386_PC16 is normally used with 16 bit code. In that situation
302     // the PC is 16 bits, just like the addend. This means that it can
303     // point from any 16 bit address to any other if the possibility
304     // of wrapping is included.
305     // The only restriction we have to check then is that the destination
306     // address fits in 16 bits. That is impossible to do here. The problem is
307     // that we are passed the final value, which already had the
308     // current location subtracted from it.
309     // We just check that Val fits in 17 bits. This misses some cases, but
310     // should have no false positives.
311     checkInt(loc, val, 17, rel);
312     write16le(loc, val);
313     break;
314   case R_386_32:
315   case R_386_GOT32:
316   case R_386_GOT32X:
317   case R_386_GOTOFF:
318   case R_386_GOTPC:
319   case R_386_PC32:
320   case R_386_PLT32:
321   case R_386_RELATIVE:
322   case R_386_TLS_GOTDESC:
323   case R_386_TLS_DESC_CALL:
324   case R_386_TLS_DTPMOD32:
325   case R_386_TLS_DTPOFF32:
326   case R_386_TLS_GD:
327   case R_386_TLS_GOTIE:
328   case R_386_TLS_IE:
329   case R_386_TLS_LDM:
330   case R_386_TLS_LDO_32:
331   case R_386_TLS_LE:
332   case R_386_TLS_LE_32:
333   case R_386_TLS_TPOFF:
334   case R_386_TLS_TPOFF32:
335     checkInt(loc, val, 32, rel);
336     write32le(loc, val);
337     break;
338   case R_386_TLS_DESC:
339     // The addend is stored in the second 32-bit word.
340     write32le(loc + 4, val);
341     break;
342   default:
343     llvm_unreachable("unknown relocation");
344   }
345 }
346 
relaxTlsGdToLe(uint8_t * loc,const Relocation & rel,uint64_t val)347 static void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) {
348   if (rel.type == R_386_TLS_GD) {
349     // Convert (loc[-2] == 0x04)
350     //   leal x@tlsgd(, %ebx, 1), %eax
351     //   call ___tls_get_addr@plt
352     // or
353     //   leal x@tlsgd(%reg), %eax
354     //   call *___tls_get_addr@got(%reg)
355     // to
356     const uint8_t inst[] = {
357         0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
358         0x81, 0xe8, 0,    0,    0,    0,    // subl x@ntpoff(%ebx), %eax
359     };
360     uint8_t *w = loc[-2] == 0x04 ? loc - 3 : loc - 2;
361     memcpy(w, inst, sizeof(inst));
362     write32le(w + 8, val);
363   } else if (rel.type == R_386_TLS_GOTDESC) {
364     // Convert leal x@tlsdesc(%ebx), %eax to leal x@ntpoff, %eax.
365     //
366     // Note: call *x@tlsdesc(%eax) may not immediately follow this instruction.
367     if (memcmp(loc - 2, "\x8d\x83", 2)) {
368       error(getErrorLocation(loc - 2) +
369             "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax");
370       return;
371     }
372     loc[-1] = 0x05;
373     write32le(loc, val);
374   } else {
375     // Convert call *x@tlsdesc(%eax) to xchg ax, ax.
376     assert(rel.type == R_386_TLS_DESC_CALL);
377     loc[0] = 0x66;
378     loc[1] = 0x90;
379   }
380 }
381 
relaxTlsGdToIe(uint8_t * loc,const Relocation & rel,uint64_t val)382 static void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, uint64_t val) {
383   if (rel.type == R_386_TLS_GD) {
384     // Convert (loc[-2] == 0x04)
385     //   leal x@tlsgd(, %ebx, 1), %eax
386     //   call ___tls_get_addr@plt
387     // or
388     //   leal x@tlsgd(%reg), %eax
389     //   call *___tls_get_addr@got(%reg)
390     const uint8_t inst[] = {
391         0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
392         0x03, 0x83, 0,    0,    0,    0,    // addl x@gottpoff(%ebx), %eax
393     };
394     uint8_t *w = loc[-2] == 0x04 ? loc - 3 : loc - 2;
395     memcpy(w, inst, sizeof(inst));
396     write32le(w + 8, val);
397   } else if (rel.type == R_386_TLS_GOTDESC) {
398     // Convert leal x@tlsdesc(%ebx), %eax to movl x@gotntpoff(%ebx), %eax.
399     if (memcmp(loc - 2, "\x8d\x83", 2)) {
400       error(getErrorLocation(loc - 2) +
401             "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax");
402       return;
403     }
404     loc[-2] = 0x8b;
405     write32le(loc, val);
406   } else {
407     // Convert call *x@tlsdesc(%eax) to xchg ax, ax.
408     assert(rel.type == R_386_TLS_DESC_CALL);
409     loc[0] = 0x66;
410     loc[1] = 0x90;
411   }
412 }
413 
414 // In some conditions, relocations can be optimized to avoid using GOT.
415 // This function does that for Initial Exec to Local Exec case.
relaxTlsIeToLe(uint8_t * loc,const Relocation & rel,uint64_t val)416 static void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) {
417   // Ulrich's document section 6.2 says that @gotntpoff can
418   // be used with MOVL or ADDL instructions.
419   // @indntpoff is similar to @gotntpoff, but for use in
420   // position dependent code.
421   uint8_t reg = (loc[-1] >> 3) & 7;
422 
423   if (rel.type == R_386_TLS_IE) {
424     if (loc[-1] == 0xa1) {
425       // "movl foo@indntpoff,%eax" -> "movl $foo,%eax"
426       // This case is different from the generic case below because
427       // this is a 5 byte instruction while below is 6 bytes.
428       loc[-1] = 0xb8;
429     } else if (loc[-2] == 0x8b) {
430       // "movl foo@indntpoff,%reg" -> "movl $foo,%reg"
431       loc[-2] = 0xc7;
432       loc[-1] = 0xc0 | reg;
433     } else {
434       // "addl foo@indntpoff,%reg" -> "addl $foo,%reg"
435       loc[-2] = 0x81;
436       loc[-1] = 0xc0 | reg;
437     }
438   } else {
439     assert(rel.type == R_386_TLS_GOTIE);
440     if (loc[-2] == 0x8b) {
441       // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg"
442       loc[-2] = 0xc7;
443       loc[-1] = 0xc0 | reg;
444     } else {
445       // "addl foo@gotntpoff(%rip),%reg" -> "leal foo(%reg),%reg"
446       loc[-2] = 0x8d;
447       loc[-1] = 0x80 | (reg << 3) | reg;
448     }
449   }
450   write32le(loc, val);
451 }
452 
relaxTlsLdToLe(uint8_t * loc,const Relocation & rel,uint64_t val)453 static void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) {
454   if (rel.type == R_386_TLS_LDO_32) {
455     write32le(loc, val);
456     return;
457   }
458 
459   if (loc[4] == 0xe8) {
460     // Convert
461     //   leal x(%reg),%eax
462     //   call ___tls_get_addr@plt
463     // to
464     const uint8_t inst[] = {
465         0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
466         0x90,                               // nop
467         0x8d, 0x74, 0x26, 0x00,             // leal 0(%esi,1),%esi
468     };
469     memcpy(loc - 2, inst, sizeof(inst));
470     return;
471   }
472 
473   // Convert
474   //   leal x(%reg),%eax
475   //   call *___tls_get_addr@got(%reg)
476   // to
477   const uint8_t inst[] = {
478       0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
479       0x8d, 0xb6, 0x00, 0x00, 0x00, 0x00, // leal (%esi),%esi
480   };
481   memcpy(loc - 2, inst, sizeof(inst));
482 }
483 
relocateAlloc(InputSectionBase & sec,uint8_t * buf) const484 void X86::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
485   uint64_t secAddr = sec.getOutputSection()->addr;
486   if (auto *s = dyn_cast<InputSection>(&sec))
487     secAddr += s->outSecOff;
488   for (const Relocation &rel : sec.relocs()) {
489     uint8_t *loc = buf + rel.offset;
490     const uint64_t val = SignExtend64(
491         sec.getRelocTargetVA(sec.file, rel.type, rel.addend,
492                              secAddr + rel.offset, *rel.sym, rel.expr),
493         32);
494     switch (rel.expr) {
495     case R_RELAX_TLS_GD_TO_IE_GOTPLT:
496       relaxTlsGdToIe(loc, rel, val);
497       continue;
498     case R_RELAX_TLS_GD_TO_LE:
499     case R_RELAX_TLS_GD_TO_LE_NEG:
500       relaxTlsGdToLe(loc, rel, val);
501       continue;
502     case R_RELAX_TLS_LD_TO_LE:
503       relaxTlsLdToLe(loc, rel, val);
504       break;
505     case R_RELAX_TLS_IE_TO_LE:
506       relaxTlsIeToLe(loc, rel, val);
507       continue;
508     default:
509       relocate(loc, rel, val);
510       break;
511     }
512   }
513 }
514 
515 // If Intel Indirect Branch Tracking is enabled, we have to emit special PLT
516 // entries containing endbr32 instructions. A PLT entry will be split into two
517 // parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt).
518 namespace {
519 class IntelIBT : public X86 {
520 public:
521   IntelIBT();
522   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
523   void writePlt(uint8_t *buf, const Symbol &sym,
524                 uint64_t pltEntryAddr) const override;
525   void writeIBTPlt(uint8_t *buf, size_t numEntries) const override;
526 
527   static const unsigned IBTPltHeaderSize = 16;
528 };
529 } // namespace
530 
IntelIBT()531 IntelIBT::IntelIBT() { pltHeaderSize = 0; }
532 
writeGotPlt(uint8_t * buf,const Symbol & s) const533 void IntelIBT::writeGotPlt(uint8_t *buf, const Symbol &s) const {
534   uint64_t va =
535       in.ibtPlt->getVA() + IBTPltHeaderSize + s.getPltIdx() * pltEntrySize;
536   write32le(buf, va);
537 }
538 
writePlt(uint8_t * buf,const Symbol & sym,uint64_t) const539 void IntelIBT::writePlt(uint8_t *buf, const Symbol &sym,
540                         uint64_t /*pltEntryAddr*/) const {
541   if (config->isPic) {
542     const uint8_t inst[] = {
543         0xf3, 0x0f, 0x1e, 0xfb,       // endbr32
544         0xff, 0xa3, 0,    0,    0, 0, // jmp *name@GOT(%ebx)
545         0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
546     };
547     memcpy(buf, inst, sizeof(inst));
548     write32le(buf + 6, sym.getGotPltVA() - in.gotPlt->getVA());
549     return;
550   }
551 
552   const uint8_t inst[] = {
553       0xf3, 0x0f, 0x1e, 0xfb,       // endbr32
554       0xff, 0x25, 0,    0,    0, 0, // jmp *foo@GOT
555       0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
556   };
557   memcpy(buf, inst, sizeof(inst));
558   write32le(buf + 6, sym.getGotPltVA());
559 }
560 
writeIBTPlt(uint8_t * buf,size_t numEntries) const561 void IntelIBT::writeIBTPlt(uint8_t *buf, size_t numEntries) const {
562   writePltHeader(buf);
563   buf += IBTPltHeaderSize;
564 
565   const uint8_t inst[] = {
566       0xf3, 0x0f, 0x1e, 0xfb,    // endbr32
567       0x68, 0,    0,    0,    0, // pushl $reloc_offset
568       0xe9, 0,    0,    0,    0, // jmpq .PLT0@PC
569       0x66, 0x90,                // nop
570   };
571 
572   for (size_t i = 0; i < numEntries; ++i) {
573     memcpy(buf, inst, sizeof(inst));
574     write32le(buf + 5, i * sizeof(object::ELF32LE::Rel));
575     write32le(buf + 10, -pltHeaderSize - sizeof(inst) * i - 30);
576     buf += sizeof(inst);
577   }
578 }
579 
580 namespace {
581 class RetpolinePic : public X86 {
582 public:
583   RetpolinePic();
584   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
585   void writePltHeader(uint8_t *buf) const override;
586   void writePlt(uint8_t *buf, const Symbol &sym,
587                 uint64_t pltEntryAddr) const override;
588 };
589 
590 class RetpolineNoPic : public X86 {
591 public:
592   RetpolineNoPic();
593   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
594   void writePltHeader(uint8_t *buf) const override;
595   void writePlt(uint8_t *buf, const Symbol &sym,
596                 uint64_t pltEntryAddr) const override;
597 };
598 } // namespace
599 
RetpolinePic()600 RetpolinePic::RetpolinePic() {
601   pltHeaderSize = 48;
602   pltEntrySize = 32;
603   ipltEntrySize = 32;
604 }
605 
writeGotPlt(uint8_t * buf,const Symbol & s) const606 void RetpolinePic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
607   write32le(buf, s.getPltVA() + 17);
608 }
609 
writePltHeader(uint8_t * buf) const610 void RetpolinePic::writePltHeader(uint8_t *buf) const {
611   const uint8_t insn[] = {
612       0xff, 0xb3, 4,    0,    0,    0,          // 0:    pushl 4(%ebx)
613       0x50,                                     // 6:    pushl %eax
614       0x8b, 0x83, 8,    0,    0,    0,          // 7:    mov 8(%ebx), %eax
615       0xe8, 0x0e, 0x00, 0x00, 0x00,             // d:    call next
616       0xf3, 0x90,                               // 12: loop: pause
617       0x0f, 0xae, 0xe8,                         // 14:   lfence
618       0xeb, 0xf9,                               // 17:   jmp loop
619       0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19:   int3; .align 16
620       0x89, 0x0c, 0x24,                         // 20: next: mov %ecx, (%esp)
621       0x8b, 0x4c, 0x24, 0x04,                   // 23:   mov 0x4(%esp), %ecx
622       0x89, 0x44, 0x24, 0x04,                   // 27:   mov %eax ,0x4(%esp)
623       0x89, 0xc8,                               // 2b:   mov %ecx, %eax
624       0x59,                                     // 2d:   pop %ecx
625       0xc3,                                     // 2e:   ret
626       0xcc,                                     // 2f:   int3; padding
627   };
628   memcpy(buf, insn, sizeof(insn));
629 }
630 
writePlt(uint8_t * buf,const Symbol & sym,uint64_t pltEntryAddr) const631 void RetpolinePic::writePlt(uint8_t *buf, const Symbol &sym,
632                             uint64_t pltEntryAddr) const {
633   unsigned relOff = in.relaPlt->entsize * sym.getPltIdx();
634   const uint8_t insn[] = {
635       0x50,                            // pushl %eax
636       0x8b, 0x83, 0,    0,    0,    0, // mov foo@GOT(%ebx), %eax
637       0xe8, 0,    0,    0,    0,       // call plt+0x20
638       0xe9, 0,    0,    0,    0,       // jmp plt+0x12
639       0x68, 0,    0,    0,    0,       // pushl $reloc_offset
640       0xe9, 0,    0,    0,    0,       // jmp plt+0
641       0xcc, 0xcc, 0xcc, 0xcc, 0xcc,    // int3; padding
642   };
643   memcpy(buf, insn, sizeof(insn));
644 
645   uint32_t ebx = in.gotPlt->getVA();
646   unsigned off = pltEntryAddr - in.plt->getVA();
647   write32le(buf + 3, sym.getGotPltVA() - ebx);
648   write32le(buf + 8, -off - 12 + 32);
649   write32le(buf + 13, -off - 17 + 18);
650   write32le(buf + 18, relOff);
651   write32le(buf + 23, -off - 27);
652 }
653 
RetpolineNoPic()654 RetpolineNoPic::RetpolineNoPic() {
655   pltHeaderSize = 48;
656   pltEntrySize = 32;
657   ipltEntrySize = 32;
658 }
659 
writeGotPlt(uint8_t * buf,const Symbol & s) const660 void RetpolineNoPic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
661   write32le(buf, s.getPltVA() + 16);
662 }
663 
writePltHeader(uint8_t * buf) const664 void RetpolineNoPic::writePltHeader(uint8_t *buf) const {
665   const uint8_t insn[] = {
666       0xff, 0x35, 0,    0,    0,    0, // 0:    pushl GOTPLT+4
667       0x50,                            // 6:    pushl %eax
668       0xa1, 0,    0,    0,    0,       // 7:    mov GOTPLT+8, %eax
669       0xe8, 0x0f, 0x00, 0x00, 0x00,    // c:    call next
670       0xf3, 0x90,                      // 11: loop: pause
671       0x0f, 0xae, 0xe8,                // 13:   lfence
672       0xeb, 0xf9,                      // 16:   jmp loop
673       0xcc, 0xcc, 0xcc, 0xcc, 0xcc,    // 18:   int3
674       0xcc, 0xcc, 0xcc,                // 1f:   int3; .align 16
675       0x89, 0x0c, 0x24,                // 20: next: mov %ecx, (%esp)
676       0x8b, 0x4c, 0x24, 0x04,          // 23:   mov 0x4(%esp), %ecx
677       0x89, 0x44, 0x24, 0x04,          // 27:   mov %eax ,0x4(%esp)
678       0x89, 0xc8,                      // 2b:   mov %ecx, %eax
679       0x59,                            // 2d:   pop %ecx
680       0xc3,                            // 2e:   ret
681       0xcc,                            // 2f:   int3; padding
682   };
683   memcpy(buf, insn, sizeof(insn));
684 
685   uint32_t gotPlt = in.gotPlt->getVA();
686   write32le(buf + 2, gotPlt + 4);
687   write32le(buf + 8, gotPlt + 8);
688 }
689 
writePlt(uint8_t * buf,const Symbol & sym,uint64_t pltEntryAddr) const690 void RetpolineNoPic::writePlt(uint8_t *buf, const Symbol &sym,
691                               uint64_t pltEntryAddr) const {
692   unsigned relOff = in.relaPlt->entsize * sym.getPltIdx();
693   const uint8_t insn[] = {
694       0x50,                         // 0:  pushl %eax
695       0xa1, 0,    0,    0,    0,    // 1:  mov foo_in_GOT, %eax
696       0xe8, 0,    0,    0,    0,    // 6:  call plt+0x20
697       0xe9, 0,    0,    0,    0,    // b:  jmp plt+0x11
698       0x68, 0,    0,    0,    0,    // 10: pushl $reloc_offset
699       0xe9, 0,    0,    0,    0,    // 15: jmp plt+0
700       0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a: int3; padding
701       0xcc,                         // 1f: int3; padding
702   };
703   memcpy(buf, insn, sizeof(insn));
704 
705   unsigned off = pltEntryAddr - in.plt->getVA();
706   write32le(buf + 2, sym.getGotPltVA());
707   write32le(buf + 7, -off - 11 + 32);
708   write32le(buf + 12, -off - 16 + 17);
709   write32le(buf + 17, relOff);
710   write32le(buf + 22, -off - 26);
711 }
712 
getX86TargetInfo()713 TargetInfo *elf::getX86TargetInfo() {
714   if (config->zRetpolineplt) {
715     if (config->isPic) {
716       static RetpolinePic t;
717       return &t;
718     }
719     static RetpolineNoPic t;
720     return &t;
721   }
722 
723   if (config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT) {
724     static IntelIBT t;
725     return &t;
726   }
727 
728   static X86 t;
729   return &t;
730 }
731