1 //===- X86.cpp ------------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "OutputSections.h" 10 #include "Symbols.h" 11 #include "SyntheticSections.h" 12 #include "Target.h" 13 #include "llvm/Support/Endian.h" 14 15 using namespace llvm; 16 using namespace llvm::support::endian; 17 using namespace llvm::ELF; 18 using namespace lld; 19 using namespace lld::elf; 20 21 namespace { 22 class X86 : public TargetInfo { 23 public: 24 X86(Ctx &); 25 int getTlsGdRelaxSkip(RelType type) const override; 26 RelExpr getRelExpr(RelType type, const Symbol &s, 27 const uint8_t *loc) const override; 28 int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override; 29 void writeGotPltHeader(uint8_t *buf) const override; 30 RelType getDynRel(RelType type) const override; 31 void writeGotPlt(uint8_t *buf, const Symbol &s) const override; 32 void writeIgotPlt(uint8_t *buf, const Symbol &s) const override; 33 void writePltHeader(uint8_t *buf) const override; 34 void writePlt(uint8_t *buf, const Symbol &sym, 35 uint64_t pltEntryAddr) const override; 36 void relocate(uint8_t *loc, const Relocation &rel, 37 uint64_t val) const override; 38 39 RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override; 40 void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override; 41 42 private: 43 void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; 44 void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const; 45 void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; 46 void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; 47 }; 48 } // namespace 49 50 X86::X86(Ctx &ctx) : TargetInfo(ctx) { 51 copyRel = R_386_COPY; 52 gotRel = R_386_GLOB_DAT; 53 pltRel = R_386_JUMP_SLOT; 54 iRelativeRel = R_386_IRELATIVE; 55 relativeRel = R_386_RELATIVE; 56 symbolicRel = R_386_32; 57 tlsDescRel = R_386_TLS_DESC; 58 tlsGotRel = R_386_TLS_TPOFF; 59 tlsModuleIndexRel = R_386_TLS_DTPMOD32; 60 tlsOffsetRel = R_386_TLS_DTPOFF32; 61 gotBaseSymInGotPlt = true; 62 pltHeaderSize = 16; 63 pltEntrySize = 16; 64 ipltEntrySize = 16; 65 trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3 66 67 // Align to the non-PAE large page size (known as a superpage or huge page). 68 // FreeBSD automatically promotes large, superpage-aligned allocations. 69 defaultImageBase = 0x400000; 70 } 71 72 int X86::getTlsGdRelaxSkip(RelType type) const { 73 // TLSDESC relocations are processed separately. See relaxTlsGdToLe below. 74 return type == R_386_TLS_GOTDESC || type == R_386_TLS_DESC_CALL ? 1 : 2; 75 } 76 77 RelExpr X86::getRelExpr(RelType type, const Symbol &s, 78 const uint8_t *loc) const { 79 switch (type) { 80 case R_386_8: 81 case R_386_16: 82 case R_386_32: 83 return R_ABS; 84 case R_386_TLS_LDO_32: 85 return R_DTPREL; 86 case R_386_TLS_GD: 87 return R_TLSGD_GOTPLT; 88 case R_386_TLS_LDM: 89 return R_TLSLD_GOTPLT; 90 case R_386_PLT32: 91 return R_PLT_PC; 92 case R_386_PC8: 93 case R_386_PC16: 94 case R_386_PC32: 95 return R_PC; 96 case R_386_GOTPC: 97 return R_GOTPLTONLY_PC; 98 case R_386_TLS_IE: 99 return R_GOT; 100 case R_386_GOT32: 101 case R_386_GOT32X: 102 // These relocations are arguably mis-designed because their calculations 103 // depend on the instructions they are applied to. This is bad because we 104 // usually don't care about whether the target section contains valid 105 // machine instructions or not. But this is part of the documented ABI, so 106 // we had to implement as the standard requires. 107 // 108 // x86 does not support PC-relative data access. Therefore, in order to 109 // access GOT contents, a GOT address needs to be known at link-time 110 // (which means non-PIC) or compilers have to emit code to get a GOT 111 // address at runtime (which means code is position-independent but 112 // compilers need to emit extra code for each GOT access.) This decision 113 // is made at compile-time. In the latter case, compilers emit code to 114 // load a GOT address to a register, which is usually %ebx. 115 // 116 // So, there are two ways to refer to symbol foo's GOT entry: foo@GOT or 117 // foo@GOT(%ebx). 118 // 119 // foo@GOT is not usable in PIC. If we are creating a PIC output and if we 120 // find such relocation, we should report an error. foo@GOT is resolved to 121 // an *absolute* address of foo's GOT entry, because both GOT address and 122 // foo's offset are known. In other words, it's G + A. 123 // 124 // foo@GOT(%ebx) needs to be resolved to a *relative* offset from a GOT to 125 // foo's GOT entry in the table, because GOT address is not known but foo's 126 // offset in the table is known. It's G + A - GOT. 127 // 128 // It's unfortunate that compilers emit the same relocation for these 129 // different use cases. In order to distinguish them, we have to read a 130 // machine instruction. 131 // 132 // The following code implements it. We assume that Loc[0] is the first byte 133 // of a displacement or an immediate field of a valid machine 134 // instruction. That means a ModRM byte is at Loc[-1]. By taking a look at 135 // the byte, we can determine whether the instruction uses the operand as an 136 // absolute address (R_GOT) or a register-relative address (R_GOTPLT). 137 return (loc[-1] & 0xc7) == 0x5 ? R_GOT : R_GOTPLT; 138 case R_386_TLS_GOTDESC: 139 return R_TLSDESC_GOTPLT; 140 case R_386_TLS_DESC_CALL: 141 return R_TLSDESC_CALL; 142 case R_386_TLS_GOTIE: 143 return R_GOTPLT; 144 case R_386_GOTOFF: 145 return R_GOTPLTREL; 146 case R_386_TLS_LE: 147 return R_TPREL; 148 case R_386_TLS_LE_32: 149 return R_TPREL_NEG; 150 case R_386_NONE: 151 return R_NONE; 152 default: 153 Err(ctx) << getErrorLoc(ctx, loc) << "unknown relocation (" << type.v 154 << ") against symbol " << &s; 155 return R_NONE; 156 } 157 } 158 159 RelExpr X86::adjustTlsExpr(RelType type, RelExpr expr) const { 160 switch (expr) { 161 default: 162 return expr; 163 case R_RELAX_TLS_GD_TO_IE: 164 return R_RELAX_TLS_GD_TO_IE_GOTPLT; 165 case R_RELAX_TLS_GD_TO_LE: 166 return type == R_386_TLS_GD ? R_RELAX_TLS_GD_TO_LE_NEG 167 : R_RELAX_TLS_GD_TO_LE; 168 } 169 } 170 171 void X86::writeGotPltHeader(uint8_t *buf) const { 172 write32le(buf, ctx.mainPart->dynamic->getVA()); 173 } 174 175 void X86::writeGotPlt(uint8_t *buf, const Symbol &s) const { 176 // Entries in .got.plt initially points back to the corresponding 177 // PLT entries with a fixed offset to skip the first instruction. 178 write32le(buf, s.getPltVA(ctx) + 6); 179 } 180 181 void X86::writeIgotPlt(uint8_t *buf, const Symbol &s) const { 182 // An x86 entry is the address of the ifunc resolver function. 183 write32le(buf, s.getVA(ctx)); 184 } 185 186 RelType X86::getDynRel(RelType type) const { 187 if (type == R_386_TLS_LE) 188 return R_386_TLS_TPOFF; 189 if (type == R_386_TLS_LE_32) 190 return R_386_TLS_TPOFF32; 191 return type; 192 } 193 194 void X86::writePltHeader(uint8_t *buf) const { 195 if (ctx.arg.isPic) { 196 const uint8_t v[] = { 197 0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl 4(%ebx) 198 0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *8(%ebx) 199 0x90, 0x90, 0x90, 0x90 // nop 200 }; 201 memcpy(buf, v, sizeof(v)); 202 return; 203 } 204 205 const uint8_t pltData[] = { 206 0xff, 0x35, 0, 0, 0, 0, // pushl (GOTPLT+4) 207 0xff, 0x25, 0, 0, 0, 0, // jmp *(GOTPLT+8) 208 0x90, 0x90, 0x90, 0x90, // nop 209 }; 210 memcpy(buf, pltData, sizeof(pltData)); 211 uint32_t gotPlt = ctx.in.gotPlt->getVA(); 212 write32le(buf + 2, gotPlt + 4); 213 write32le(buf + 8, gotPlt + 8); 214 } 215 216 void X86::writePlt(uint8_t *buf, const Symbol &sym, 217 uint64_t pltEntryAddr) const { 218 unsigned relOff = ctx.in.relaPlt->entsize * sym.getPltIdx(ctx); 219 if (ctx.arg.isPic) { 220 const uint8_t inst[] = { 221 0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx) 222 0x68, 0, 0, 0, 0, // pushl $reloc_offset 223 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC 224 }; 225 memcpy(buf, inst, sizeof(inst)); 226 write32le(buf + 2, sym.getGotPltVA(ctx) - ctx.in.gotPlt->getVA()); 227 } else { 228 const uint8_t inst[] = { 229 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT 230 0x68, 0, 0, 0, 0, // pushl $reloc_offset 231 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC 232 }; 233 memcpy(buf, inst, sizeof(inst)); 234 write32le(buf + 2, sym.getGotPltVA(ctx)); 235 } 236 237 write32le(buf + 7, relOff); 238 write32le(buf + 12, ctx.in.plt->getVA() - pltEntryAddr - 16); 239 } 240 241 int64_t X86::getImplicitAddend(const uint8_t *buf, RelType type) const { 242 switch (type) { 243 case R_386_8: 244 case R_386_PC8: 245 return SignExtend64<8>(*buf); 246 case R_386_16: 247 case R_386_PC16: 248 return SignExtend64<16>(read16le(buf)); 249 case R_386_32: 250 case R_386_GLOB_DAT: 251 case R_386_GOT32: 252 case R_386_GOT32X: 253 case R_386_GOTOFF: 254 case R_386_GOTPC: 255 case R_386_IRELATIVE: 256 case R_386_PC32: 257 case R_386_PLT32: 258 case R_386_RELATIVE: 259 case R_386_TLS_GOTDESC: 260 case R_386_TLS_DESC_CALL: 261 case R_386_TLS_DTPMOD32: 262 case R_386_TLS_DTPOFF32: 263 case R_386_TLS_LDO_32: 264 case R_386_TLS_LDM: 265 case R_386_TLS_IE: 266 case R_386_TLS_IE_32: 267 case R_386_TLS_LE: 268 case R_386_TLS_LE_32: 269 case R_386_TLS_GD: 270 case R_386_TLS_GD_32: 271 case R_386_TLS_GOTIE: 272 case R_386_TLS_TPOFF: 273 case R_386_TLS_TPOFF32: 274 return SignExtend64<32>(read32le(buf)); 275 case R_386_TLS_DESC: 276 return SignExtend64<32>(read32le(buf + 4)); 277 case R_386_NONE: 278 case R_386_JUMP_SLOT: 279 // These relocations are defined as not having an implicit addend. 280 return 0; 281 default: 282 InternalErr(ctx, buf) << "cannot read addend for relocation " << type; 283 return 0; 284 } 285 } 286 287 void X86::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { 288 switch (rel.type) { 289 case R_386_8: 290 // R_386_{PC,}{8,16} are not part of the i386 psABI, but they are 291 // being used for some 16-bit programs such as boot loaders, so 292 // we want to support them. 293 checkIntUInt(ctx, loc, val, 8, rel); 294 *loc = val; 295 break; 296 case R_386_PC8: 297 checkInt(ctx, loc, val, 8, rel); 298 *loc = val; 299 break; 300 case R_386_16: 301 checkIntUInt(ctx, loc, val, 16, rel); 302 write16le(loc, val); 303 break; 304 case R_386_PC16: 305 // R_386_PC16 is normally used with 16 bit code. In that situation 306 // the PC is 16 bits, just like the addend. This means that it can 307 // point from any 16 bit address to any other if the possibility 308 // of wrapping is included. 309 // The only restriction we have to check then is that the destination 310 // address fits in 16 bits. That is impossible to do here. The problem is 311 // that we are passed the final value, which already had the 312 // current location subtracted from it. 313 // We just check that Val fits in 17 bits. This misses some cases, but 314 // should have no false positives. 315 checkInt(ctx, loc, val, 17, rel); 316 write16le(loc, val); 317 break; 318 case R_386_32: 319 case R_386_GOT32: 320 case R_386_GOT32X: 321 case R_386_GOTOFF: 322 case R_386_GOTPC: 323 case R_386_PC32: 324 case R_386_PLT32: 325 case R_386_RELATIVE: 326 case R_386_TLS_GOTDESC: 327 case R_386_TLS_DESC_CALL: 328 case R_386_TLS_DTPMOD32: 329 case R_386_TLS_DTPOFF32: 330 case R_386_TLS_GD: 331 case R_386_TLS_GOTIE: 332 case R_386_TLS_IE: 333 case R_386_TLS_LDM: 334 case R_386_TLS_LDO_32: 335 case R_386_TLS_LE: 336 case R_386_TLS_LE_32: 337 case R_386_TLS_TPOFF: 338 case R_386_TLS_TPOFF32: 339 checkInt(ctx, loc, val, 32, rel); 340 write32le(loc, val); 341 break; 342 case R_386_TLS_DESC: 343 // The addend is stored in the second 32-bit word. 344 write32le(loc + 4, val); 345 break; 346 default: 347 llvm_unreachable("unknown relocation"); 348 } 349 } 350 351 void X86::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, 352 uint64_t val) const { 353 if (rel.type == R_386_TLS_GD) { 354 // Convert (loc[-2] == 0x04) 355 // leal x@tlsgd(, %ebx, 1), %eax 356 // call ___tls_get_addr@plt 357 // or 358 // leal x@tlsgd(%reg), %eax 359 // call *___tls_get_addr@got(%reg) 360 // to 361 const uint8_t inst[] = { 362 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax 363 0x81, 0xe8, 0, 0, 0, 0, // subl x@ntpoff(%ebx), %eax 364 }; 365 uint8_t *w = loc[-2] == 0x04 ? loc - 3 : loc - 2; 366 memcpy(w, inst, sizeof(inst)); 367 write32le(w + 8, val); 368 } else if (rel.type == R_386_TLS_GOTDESC) { 369 // Convert leal x@tlsdesc(%ebx), %eax to leal x@ntpoff, %eax. 370 // 371 // Note: call *x@tlsdesc(%eax) may not immediately follow this instruction. 372 if (memcmp(loc - 2, "\x8d\x83", 2)) { 373 ErrAlways(ctx) 374 << getErrorLoc(ctx, loc - 2) 375 << "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax"; 376 return; 377 } 378 loc[-1] = 0x05; 379 write32le(loc, val); 380 } else { 381 // Convert call *x@tlsdesc(%eax) to xchg ax, ax. 382 assert(rel.type == R_386_TLS_DESC_CALL); 383 loc[0] = 0x66; 384 loc[1] = 0x90; 385 } 386 } 387 388 void X86::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, 389 uint64_t val) const { 390 if (rel.type == R_386_TLS_GD) { 391 // Convert (loc[-2] == 0x04) 392 // leal x@tlsgd(, %ebx, 1), %eax 393 // call ___tls_get_addr@plt 394 // or 395 // leal x@tlsgd(%reg), %eax 396 // call *___tls_get_addr@got(%reg) 397 const uint8_t inst[] = { 398 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax 399 0x03, 0x83, 0, 0, 0, 0, // addl x@gottpoff(%ebx), %eax 400 }; 401 uint8_t *w = loc[-2] == 0x04 ? loc - 3 : loc - 2; 402 memcpy(w, inst, sizeof(inst)); 403 write32le(w + 8, val); 404 } else if (rel.type == R_386_TLS_GOTDESC) { 405 // Convert leal x@tlsdesc(%ebx), %eax to movl x@gotntpoff(%ebx), %eax. 406 if (memcmp(loc - 2, "\x8d\x83", 2)) { 407 ErrAlways(ctx) 408 << getErrorLoc(ctx, loc - 2) 409 << "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax"; 410 return; 411 } 412 loc[-2] = 0x8b; 413 write32le(loc, val); 414 } else { 415 // Convert call *x@tlsdesc(%eax) to xchg ax, ax. 416 assert(rel.type == R_386_TLS_DESC_CALL); 417 loc[0] = 0x66; 418 loc[1] = 0x90; 419 } 420 } 421 422 // In some conditions, relocations can be optimized to avoid using GOT. 423 // This function does that for Initial Exec to Local Exec case. 424 void X86::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, 425 uint64_t val) const { 426 // Ulrich's document section 6.2 says that @gotntpoff can 427 // be used with MOVL or ADDL instructions. 428 // @indntpoff is similar to @gotntpoff, but for use in 429 // position dependent code. 430 uint8_t reg = (loc[-1] >> 3) & 7; 431 432 if (rel.type == R_386_TLS_IE) { 433 if (loc[-1] == 0xa1) { 434 // "movl foo@indntpoff,%eax" -> "movl $foo,%eax" 435 // This case is different from the generic case below because 436 // this is a 5 byte instruction while below is 6 bytes. 437 loc[-1] = 0xb8; 438 } else if (loc[-2] == 0x8b) { 439 // "movl foo@indntpoff,%reg" -> "movl $foo,%reg" 440 loc[-2] = 0xc7; 441 loc[-1] = 0xc0 | reg; 442 } else { 443 // "addl foo@indntpoff,%reg" -> "addl $foo,%reg" 444 loc[-2] = 0x81; 445 loc[-1] = 0xc0 | reg; 446 } 447 } else { 448 assert(rel.type == R_386_TLS_GOTIE); 449 if (loc[-2] == 0x8b) { 450 // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg" 451 loc[-2] = 0xc7; 452 loc[-1] = 0xc0 | reg; 453 } else { 454 // "addl foo@gotntpoff(%rip),%reg" -> "leal foo(%reg),%reg" 455 loc[-2] = 0x8d; 456 loc[-1] = 0x80 | (reg << 3) | reg; 457 } 458 } 459 write32le(loc, val); 460 } 461 462 void X86::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, 463 uint64_t val) const { 464 if (rel.type == R_386_TLS_LDO_32) { 465 write32le(loc, val); 466 return; 467 } 468 469 if (loc[4] == 0xe8) { 470 // Convert 471 // leal x(%reg),%eax 472 // call ___tls_get_addr@plt 473 // to 474 const uint8_t inst[] = { 475 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax 476 0x90, // nop 477 0x8d, 0x74, 0x26, 0x00, // leal 0(%esi,1),%esi 478 }; 479 memcpy(loc - 2, inst, sizeof(inst)); 480 return; 481 } 482 483 // Convert 484 // leal x(%reg),%eax 485 // call *___tls_get_addr@got(%reg) 486 // to 487 const uint8_t inst[] = { 488 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax 489 0x8d, 0xb6, 0x00, 0x00, 0x00, 0x00, // leal (%esi),%esi 490 }; 491 memcpy(loc - 2, inst, sizeof(inst)); 492 } 493 494 void X86::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { 495 uint64_t secAddr = sec.getOutputSection()->addr; 496 if (auto *s = dyn_cast<InputSection>(&sec)) 497 secAddr += s->outSecOff; 498 for (const Relocation &rel : sec.relocs()) { 499 uint8_t *loc = buf + rel.offset; 500 const uint64_t val = 501 SignExtend64(sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset), 32); 502 switch (rel.expr) { 503 case R_RELAX_TLS_GD_TO_IE_GOTPLT: 504 relaxTlsGdToIe(loc, rel, val); 505 continue; 506 case R_RELAX_TLS_GD_TO_LE: 507 case R_RELAX_TLS_GD_TO_LE_NEG: 508 relaxTlsGdToLe(loc, rel, val); 509 continue; 510 case R_RELAX_TLS_LD_TO_LE: 511 relaxTlsLdToLe(loc, rel, val); 512 break; 513 case R_RELAX_TLS_IE_TO_LE: 514 relaxTlsIeToLe(loc, rel, val); 515 continue; 516 default: 517 relocate(loc, rel, val); 518 break; 519 } 520 } 521 } 522 523 // If Intel Indirect Branch Tracking is enabled, we have to emit special PLT 524 // entries containing endbr32 instructions. A PLT entry will be split into two 525 // parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt). 526 namespace { 527 class IntelIBT : public X86 { 528 public: 529 IntelIBT(Ctx &ctx) : X86(ctx) { pltHeaderSize = 0; } 530 void writeGotPlt(uint8_t *buf, const Symbol &s) const override; 531 void writePlt(uint8_t *buf, const Symbol &sym, 532 uint64_t pltEntryAddr) const override; 533 void writeIBTPlt(uint8_t *buf, size_t numEntries) const override; 534 535 static const unsigned IBTPltHeaderSize = 16; 536 }; 537 } // namespace 538 539 void IntelIBT::writeGotPlt(uint8_t *buf, const Symbol &s) const { 540 uint64_t va = ctx.in.ibtPlt->getVA() + IBTPltHeaderSize + 541 s.getPltIdx(ctx) * pltEntrySize; 542 write32le(buf, va); 543 } 544 545 void IntelIBT::writePlt(uint8_t *buf, const Symbol &sym, 546 uint64_t /*pltEntryAddr*/) const { 547 if (ctx.arg.isPic) { 548 const uint8_t inst[] = { 549 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 550 0xff, 0xa3, 0, 0, 0, 0, // jmp *name@GOT(%ebx) 551 0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop 552 }; 553 memcpy(buf, inst, sizeof(inst)); 554 write32le(buf + 6, sym.getGotPltVA(ctx) - ctx.in.gotPlt->getVA()); 555 return; 556 } 557 558 const uint8_t inst[] = { 559 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 560 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT 561 0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop 562 }; 563 memcpy(buf, inst, sizeof(inst)); 564 write32le(buf + 6, sym.getGotPltVA(ctx)); 565 } 566 567 void IntelIBT::writeIBTPlt(uint8_t *buf, size_t numEntries) const { 568 writePltHeader(buf); 569 buf += IBTPltHeaderSize; 570 571 const uint8_t inst[] = { 572 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 573 0x68, 0, 0, 0, 0, // pushl $reloc_offset 574 0xe9, 0, 0, 0, 0, // jmpq .PLT0@PC 575 0x66, 0x90, // nop 576 }; 577 578 for (size_t i = 0; i < numEntries; ++i) { 579 memcpy(buf, inst, sizeof(inst)); 580 write32le(buf + 5, i * sizeof(object::ELF32LE::Rel)); 581 write32le(buf + 10, -pltHeaderSize - sizeof(inst) * i - 30); 582 buf += sizeof(inst); 583 } 584 } 585 586 namespace { 587 class RetpolinePic : public X86 { 588 public: 589 RetpolinePic(Ctx &); 590 void writeGotPlt(uint8_t *buf, const Symbol &s) const override; 591 void writePltHeader(uint8_t *buf) const override; 592 void writePlt(uint8_t *buf, const Symbol &sym, 593 uint64_t pltEntryAddr) const override; 594 }; 595 596 class RetpolineNoPic : public X86 { 597 public: 598 RetpolineNoPic(Ctx &); 599 void writeGotPlt(uint8_t *buf, const Symbol &s) const override; 600 void writePltHeader(uint8_t *buf) const override; 601 void writePlt(uint8_t *buf, const Symbol &sym, 602 uint64_t pltEntryAddr) const override; 603 }; 604 } // namespace 605 606 RetpolinePic::RetpolinePic(Ctx &ctx) : X86(ctx) { 607 pltHeaderSize = 48; 608 pltEntrySize = 32; 609 ipltEntrySize = 32; 610 } 611 612 void RetpolinePic::writeGotPlt(uint8_t *buf, const Symbol &s) const { 613 write32le(buf, s.getPltVA(ctx) + 17); 614 } 615 616 void RetpolinePic::writePltHeader(uint8_t *buf) const { 617 const uint8_t insn[] = { 618 0xff, 0xb3, 4, 0, 0, 0, // 0: pushl 4(%ebx) 619 0x50, // 6: pushl %eax 620 0x8b, 0x83, 8, 0, 0, 0, // 7: mov 8(%ebx), %eax 621 0xe8, 0x0e, 0x00, 0x00, 0x00, // d: call next 622 0xf3, 0x90, // 12: loop: pause 623 0x0f, 0xae, 0xe8, // 14: lfence 624 0xeb, 0xf9, // 17: jmp loop 625 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19: int3; .align 16 626 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp) 627 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx 628 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp) 629 0x89, 0xc8, // 2b: mov %ecx, %eax 630 0x59, // 2d: pop %ecx 631 0xc3, // 2e: ret 632 0xcc, // 2f: int3; padding 633 }; 634 memcpy(buf, insn, sizeof(insn)); 635 } 636 637 void RetpolinePic::writePlt(uint8_t *buf, const Symbol &sym, 638 uint64_t pltEntryAddr) const { 639 unsigned relOff = ctx.in.relaPlt->entsize * sym.getPltIdx(ctx); 640 const uint8_t insn[] = { 641 0x50, // pushl %eax 642 0x8b, 0x83, 0, 0, 0, 0, // mov foo@GOT(%ebx), %eax 643 0xe8, 0, 0, 0, 0, // call plt+0x20 644 0xe9, 0, 0, 0, 0, // jmp plt+0x12 645 0x68, 0, 0, 0, 0, // pushl $reloc_offset 646 0xe9, 0, 0, 0, 0, // jmp plt+0 647 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // int3; padding 648 }; 649 memcpy(buf, insn, sizeof(insn)); 650 651 uint32_t ebx = ctx.in.gotPlt->getVA(); 652 unsigned off = pltEntryAddr - ctx.in.plt->getVA(); 653 write32le(buf + 3, sym.getGotPltVA(ctx) - ebx); 654 write32le(buf + 8, -off - 12 + 32); 655 write32le(buf + 13, -off - 17 + 18); 656 write32le(buf + 18, relOff); 657 write32le(buf + 23, -off - 27); 658 } 659 660 RetpolineNoPic::RetpolineNoPic(Ctx &ctx) : X86(ctx) { 661 pltHeaderSize = 48; 662 pltEntrySize = 32; 663 ipltEntrySize = 32; 664 } 665 666 void RetpolineNoPic::writeGotPlt(uint8_t *buf, const Symbol &s) const { 667 write32le(buf, s.getPltVA(ctx) + 16); 668 } 669 670 void RetpolineNoPic::writePltHeader(uint8_t *buf) const { 671 const uint8_t insn[] = { 672 0xff, 0x35, 0, 0, 0, 0, // 0: pushl GOTPLT+4 673 0x50, // 6: pushl %eax 674 0xa1, 0, 0, 0, 0, // 7: mov GOTPLT+8, %eax 675 0xe8, 0x0f, 0x00, 0x00, 0x00, // c: call next 676 0xf3, 0x90, // 11: loop: pause 677 0x0f, 0xae, 0xe8, // 13: lfence 678 0xeb, 0xf9, // 16: jmp loop 679 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 18: int3 680 0xcc, 0xcc, 0xcc, // 1f: int3; .align 16 681 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp) 682 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx 683 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp) 684 0x89, 0xc8, // 2b: mov %ecx, %eax 685 0x59, // 2d: pop %ecx 686 0xc3, // 2e: ret 687 0xcc, // 2f: int3; padding 688 }; 689 memcpy(buf, insn, sizeof(insn)); 690 691 uint32_t gotPlt = ctx.in.gotPlt->getVA(); 692 write32le(buf + 2, gotPlt + 4); 693 write32le(buf + 8, gotPlt + 8); 694 } 695 696 void RetpolineNoPic::writePlt(uint8_t *buf, const Symbol &sym, 697 uint64_t pltEntryAddr) const { 698 unsigned relOff = ctx.in.relaPlt->entsize * sym.getPltIdx(ctx); 699 const uint8_t insn[] = { 700 0x50, // 0: pushl %eax 701 0xa1, 0, 0, 0, 0, // 1: mov foo_in_GOT, %eax 702 0xe8, 0, 0, 0, 0, // 6: call plt+0x20 703 0xe9, 0, 0, 0, 0, // b: jmp plt+0x11 704 0x68, 0, 0, 0, 0, // 10: pushl $reloc_offset 705 0xe9, 0, 0, 0, 0, // 15: jmp plt+0 706 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a: int3; padding 707 0xcc, // 1f: int3; padding 708 }; 709 memcpy(buf, insn, sizeof(insn)); 710 711 unsigned off = pltEntryAddr - ctx.in.plt->getVA(); 712 write32le(buf + 2, sym.getGotPltVA(ctx)); 713 write32le(buf + 7, -off - 11 + 32); 714 write32le(buf + 12, -off - 16 + 17); 715 write32le(buf + 17, relOff); 716 write32le(buf + 22, -off - 26); 717 } 718 719 void elf::setX86TargetInfo(Ctx &ctx) { 720 if (ctx.arg.zRetpolineplt) { 721 if (ctx.arg.isPic) 722 ctx.target.reset(new RetpolinePic(ctx)); 723 else 724 ctx.target.reset(new RetpolineNoPic(ctx)); 725 return; 726 } 727 728 if (ctx.arg.andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT) 729 ctx.target.reset(new IntelIBT(ctx)); 730 else 731 ctx.target.reset(new X86(ctx)); 732 } 733