1 //===- X86.cpp ------------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "InputFiles.h" 10 #include "Symbols.h" 11 #include "SyntheticSections.h" 12 #include "Target.h" 13 #include "lld/Common/ErrorHandler.h" 14 #include "llvm/Support/Endian.h" 15 16 using namespace llvm; 17 using namespace llvm::support::endian; 18 using namespace llvm::ELF; 19 using namespace lld; 20 using namespace lld::elf; 21 22 namespace { 23 class X86 : public TargetInfo { 24 public: 25 X86(); 26 int getTlsGdRelaxSkip(RelType type) const override; 27 RelExpr getRelExpr(RelType type, const Symbol &s, 28 const uint8_t *loc) const override; 29 int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override; 30 void writeGotPltHeader(uint8_t *buf) const override; 31 RelType getDynRel(RelType type) const override; 32 void writeGotPlt(uint8_t *buf, const Symbol &s) const override; 33 void writeIgotPlt(uint8_t *buf, const Symbol &s) const override; 34 void writePltHeader(uint8_t *buf) const override; 35 void writePlt(uint8_t *buf, const Symbol &sym, 36 uint64_t pltEntryAddr) const override; 37 void relocate(uint8_t *loc, const Relocation &rel, 38 uint64_t val) const override; 39 40 RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override; 41 void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, 42 uint64_t val) const override; 43 void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, 44 uint64_t val) const override; 45 void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, 46 uint64_t val) const override; 47 void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, 48 uint64_t val) const override; 49 }; 50 } // namespace 51 52 X86::X86() { 53 copyRel = R_386_COPY; 54 gotRel = R_386_GLOB_DAT; 55 pltRel = R_386_JUMP_SLOT; 56 iRelativeRel = R_386_IRELATIVE; 57 relativeRel = R_386_RELATIVE; 58 symbolicRel = R_386_32; 59 tlsDescRel = R_386_TLS_DESC; 60 tlsGotRel = R_386_TLS_TPOFF; 61 tlsModuleIndexRel = R_386_TLS_DTPMOD32; 62 tlsOffsetRel = R_386_TLS_DTPOFF32; 63 gotBaseSymInGotPlt = true; 64 pltHeaderSize = 16; 65 pltEntrySize = 16; 66 ipltEntrySize = 16; 67 trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3 68 69 // Align to the non-PAE large page size (known as a superpage or huge page). 70 // FreeBSD automatically promotes large, superpage-aligned allocations. 71 defaultImageBase = 0x400000; 72 } 73 74 int X86::getTlsGdRelaxSkip(RelType type) const { 75 // TLSDESC relocations are processed separately. See relaxTlsGdToLe below. 76 return type == R_386_TLS_GOTDESC || type == R_386_TLS_DESC_CALL ? 1 : 2; 77 } 78 79 RelExpr X86::getRelExpr(RelType type, const Symbol &s, 80 const uint8_t *loc) const { 81 // There are 4 different TLS variable models with varying degrees of 82 // flexibility and performance. LocalExec and InitialExec models are fast but 83 // less-flexible models. If they are in use, we set DF_STATIC_TLS flag in the 84 // dynamic section to let runtime know about that. 85 if (type == R_386_TLS_LE || type == R_386_TLS_LE_32 || type == R_386_TLS_IE || 86 type == R_386_TLS_GOTIE) 87 config->hasStaticTlsModel = true; 88 89 switch (type) { 90 case R_386_8: 91 case R_386_16: 92 case R_386_32: 93 return R_ABS; 94 case R_386_TLS_LDO_32: 95 return R_DTPREL; 96 case R_386_TLS_GD: 97 return R_TLSGD_GOTPLT; 98 case R_386_TLS_LDM: 99 return R_TLSLD_GOTPLT; 100 case R_386_PLT32: 101 return R_PLT_PC; 102 case R_386_PC8: 103 case R_386_PC16: 104 case R_386_PC32: 105 return R_PC; 106 case R_386_GOTPC: 107 return R_GOTPLTONLY_PC; 108 case R_386_TLS_IE: 109 return R_GOT; 110 case R_386_GOT32: 111 case R_386_GOT32X: 112 // These relocations are arguably mis-designed because their calculations 113 // depend on the instructions they are applied to. This is bad because we 114 // usually don't care about whether the target section contains valid 115 // machine instructions or not. But this is part of the documented ABI, so 116 // we had to implement as the standard requires. 117 // 118 // x86 does not support PC-relative data access. Therefore, in order to 119 // access GOT contents, a GOT address needs to be known at link-time 120 // (which means non-PIC) or compilers have to emit code to get a GOT 121 // address at runtime (which means code is position-independent but 122 // compilers need to emit extra code for each GOT access.) This decision 123 // is made at compile-time. In the latter case, compilers emit code to 124 // load a GOT address to a register, which is usually %ebx. 125 // 126 // So, there are two ways to refer to symbol foo's GOT entry: foo@GOT or 127 // foo@GOT(%ebx). 128 // 129 // foo@GOT is not usable in PIC. If we are creating a PIC output and if we 130 // find such relocation, we should report an error. foo@GOT is resolved to 131 // an *absolute* address of foo's GOT entry, because both GOT address and 132 // foo's offset are known. In other words, it's G + A. 133 // 134 // foo@GOT(%ebx) needs to be resolved to a *relative* offset from a GOT to 135 // foo's GOT entry in the table, because GOT address is not known but foo's 136 // offset in the table is known. It's G + A - GOT. 137 // 138 // It's unfortunate that compilers emit the same relocation for these 139 // different use cases. In order to distinguish them, we have to read a 140 // machine instruction. 141 // 142 // The following code implements it. We assume that Loc[0] is the first byte 143 // of a displacement or an immediate field of a valid machine 144 // instruction. That means a ModRM byte is at Loc[-1]. By taking a look at 145 // the byte, we can determine whether the instruction uses the operand as an 146 // absolute address (R_GOT) or a register-relative address (R_GOTPLT). 147 return (loc[-1] & 0xc7) == 0x5 ? R_GOT : R_GOTPLT; 148 case R_386_TLS_GOTDESC: 149 return R_TLSDESC_GOTPLT; 150 case R_386_TLS_DESC_CALL: 151 return R_TLSDESC_CALL; 152 case R_386_TLS_GOTIE: 153 return R_GOTPLT; 154 case R_386_GOTOFF: 155 return R_GOTPLTREL; 156 case R_386_TLS_LE: 157 return R_TPREL; 158 case R_386_TLS_LE_32: 159 return R_TPREL_NEG; 160 case R_386_NONE: 161 return R_NONE; 162 default: 163 error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) + 164 ") against symbol " + toString(s)); 165 return R_NONE; 166 } 167 } 168 169 RelExpr X86::adjustTlsExpr(RelType type, RelExpr expr) const { 170 switch (expr) { 171 default: 172 return expr; 173 case R_RELAX_TLS_GD_TO_IE: 174 return R_RELAX_TLS_GD_TO_IE_GOTPLT; 175 case R_RELAX_TLS_GD_TO_LE: 176 return type == R_386_TLS_GD ? R_RELAX_TLS_GD_TO_LE_NEG 177 : R_RELAX_TLS_GD_TO_LE; 178 } 179 } 180 181 void X86::writeGotPltHeader(uint8_t *buf) const { 182 write32le(buf, mainPart->dynamic->getVA()); 183 } 184 185 void X86::writeGotPlt(uint8_t *buf, const Symbol &s) const { 186 // Entries in .got.plt initially points back to the corresponding 187 // PLT entries with a fixed offset to skip the first instruction. 188 write32le(buf, s.getPltVA() + 6); 189 } 190 191 void X86::writeIgotPlt(uint8_t *buf, const Symbol &s) const { 192 // An x86 entry is the address of the ifunc resolver function. 193 write32le(buf, s.getVA()); 194 } 195 196 RelType X86::getDynRel(RelType type) const { 197 if (type == R_386_TLS_LE) 198 return R_386_TLS_TPOFF; 199 if (type == R_386_TLS_LE_32) 200 return R_386_TLS_TPOFF32; 201 return type; 202 } 203 204 void X86::writePltHeader(uint8_t *buf) const { 205 if (config->isPic) { 206 const uint8_t v[] = { 207 0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl 4(%ebx) 208 0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *8(%ebx) 209 0x90, 0x90, 0x90, 0x90 // nop 210 }; 211 memcpy(buf, v, sizeof(v)); 212 return; 213 } 214 215 const uint8_t pltData[] = { 216 0xff, 0x35, 0, 0, 0, 0, // pushl (GOTPLT+4) 217 0xff, 0x25, 0, 0, 0, 0, // jmp *(GOTPLT+8) 218 0x90, 0x90, 0x90, 0x90, // nop 219 }; 220 memcpy(buf, pltData, sizeof(pltData)); 221 uint32_t gotPlt = in.gotPlt->getVA(); 222 write32le(buf + 2, gotPlt + 4); 223 write32le(buf + 8, gotPlt + 8); 224 } 225 226 void X86::writePlt(uint8_t *buf, const Symbol &sym, 227 uint64_t pltEntryAddr) const { 228 unsigned relOff = in.relaPlt->entsize * sym.pltIndex; 229 if (config->isPic) { 230 const uint8_t inst[] = { 231 0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx) 232 0x68, 0, 0, 0, 0, // pushl $reloc_offset 233 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC 234 }; 235 memcpy(buf, inst, sizeof(inst)); 236 write32le(buf + 2, sym.getGotPltVA() - in.gotPlt->getVA()); 237 } else { 238 const uint8_t inst[] = { 239 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT 240 0x68, 0, 0, 0, 0, // pushl $reloc_offset 241 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC 242 }; 243 memcpy(buf, inst, sizeof(inst)); 244 write32le(buf + 2, sym.getGotPltVA()); 245 } 246 247 write32le(buf + 7, relOff); 248 write32le(buf + 12, in.plt->getVA() - pltEntryAddr - 16); 249 } 250 251 int64_t X86::getImplicitAddend(const uint8_t *buf, RelType type) const { 252 switch (type) { 253 case R_386_8: 254 case R_386_PC8: 255 return SignExtend64<8>(*buf); 256 case R_386_16: 257 case R_386_PC16: 258 return SignExtend64<16>(read16le(buf)); 259 case R_386_32: 260 case R_386_GLOB_DAT: 261 case R_386_GOT32: 262 case R_386_GOT32X: 263 case R_386_GOTOFF: 264 case R_386_GOTPC: 265 case R_386_IRELATIVE: 266 case R_386_PC32: 267 case R_386_PLT32: 268 case R_386_RELATIVE: 269 case R_386_TLS_GOTDESC: 270 case R_386_TLS_DESC_CALL: 271 case R_386_TLS_DTPMOD32: 272 case R_386_TLS_DTPOFF32: 273 case R_386_TLS_LDO_32: 274 case R_386_TLS_LDM: 275 case R_386_TLS_IE: 276 case R_386_TLS_IE_32: 277 case R_386_TLS_LE: 278 case R_386_TLS_LE_32: 279 case R_386_TLS_GD: 280 case R_386_TLS_GD_32: 281 case R_386_TLS_GOTIE: 282 case R_386_TLS_TPOFF: 283 case R_386_TLS_TPOFF32: 284 return SignExtend64<32>(read32le(buf)); 285 case R_386_TLS_DESC: 286 return SignExtend64<32>(read32le(buf + 4)); 287 case R_386_NONE: 288 case R_386_JUMP_SLOT: 289 // These relocations are defined as not having an implicit addend. 290 return 0; 291 default: 292 internalLinkerError(getErrorLocation(buf), 293 "cannot read addend for relocation " + toString(type)); 294 return 0; 295 } 296 } 297 298 void X86::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { 299 switch (rel.type) { 300 case R_386_8: 301 // R_386_{PC,}{8,16} are not part of the i386 psABI, but they are 302 // being used for some 16-bit programs such as boot loaders, so 303 // we want to support them. 304 checkIntUInt(loc, val, 8, rel); 305 *loc = val; 306 break; 307 case R_386_PC8: 308 checkInt(loc, val, 8, rel); 309 *loc = val; 310 break; 311 case R_386_16: 312 checkIntUInt(loc, val, 16, rel); 313 write16le(loc, val); 314 break; 315 case R_386_PC16: 316 // R_386_PC16 is normally used with 16 bit code. In that situation 317 // the PC is 16 bits, just like the addend. This means that it can 318 // point from any 16 bit address to any other if the possibility 319 // of wrapping is included. 320 // The only restriction we have to check then is that the destination 321 // address fits in 16 bits. That is impossible to do here. The problem is 322 // that we are passed the final value, which already had the 323 // current location subtracted from it. 324 // We just check that Val fits in 17 bits. This misses some cases, but 325 // should have no false positives. 326 checkInt(loc, val, 17, rel); 327 write16le(loc, val); 328 break; 329 case R_386_32: 330 case R_386_GOT32: 331 case R_386_GOT32X: 332 case R_386_GOTOFF: 333 case R_386_GOTPC: 334 case R_386_PC32: 335 case R_386_PLT32: 336 case R_386_RELATIVE: 337 case R_386_TLS_GOTDESC: 338 case R_386_TLS_DESC_CALL: 339 case R_386_TLS_DTPMOD32: 340 case R_386_TLS_DTPOFF32: 341 case R_386_TLS_GD: 342 case R_386_TLS_GOTIE: 343 case R_386_TLS_IE: 344 case R_386_TLS_LDM: 345 case R_386_TLS_LDO_32: 346 case R_386_TLS_LE: 347 case R_386_TLS_LE_32: 348 case R_386_TLS_TPOFF: 349 case R_386_TLS_TPOFF32: 350 checkInt(loc, val, 32, rel); 351 write32le(loc, val); 352 break; 353 case R_386_TLS_DESC: 354 // The addend is stored in the second 32-bit word. 355 write32le(loc + 4, val); 356 break; 357 default: 358 llvm_unreachable("unknown relocation"); 359 } 360 } 361 362 void X86::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, 363 uint64_t val) const { 364 if (rel.type == R_386_TLS_GD) { 365 // Convert 366 // leal x@tlsgd(, %ebx, 1), %eax 367 // call __tls_get_addr@plt 368 // to 369 // movl %gs:0, %eax 370 // subl $x@tpoff, %eax 371 const uint8_t inst[] = { 372 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax 373 0x81, 0xe8, 0, 0, 0, 0, // subl val(%ebx), %eax 374 }; 375 memcpy(loc - 3, inst, sizeof(inst)); 376 write32le(loc + 5, val); 377 } else if (rel.type == R_386_TLS_GOTDESC) { 378 // Convert leal x@tlsdesc(%ebx), %eax to leal x@ntpoff, %eax. 379 // 380 // Note: call *x@tlsdesc(%eax) may not immediately follow this instruction. 381 if (memcmp(loc - 2, "\x8d\x83", 2)) { 382 error(getErrorLocation(loc - 2) + 383 "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax"); 384 return; 385 } 386 loc[-1] = 0x05; 387 write32le(loc, val); 388 } else { 389 // Convert call *x@tlsdesc(%eax) to xchg ax, ax. 390 assert(rel.type == R_386_TLS_DESC_CALL); 391 loc[0] = 0x66; 392 loc[1] = 0x90; 393 } 394 } 395 396 void X86::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, 397 uint64_t val) const { 398 if (rel.type == R_386_TLS_GD) { 399 // Convert 400 // leal x@tlsgd(, %ebx, 1), %eax 401 // call __tls_get_addr@plt 402 // to 403 // movl %gs:0, %eax 404 // addl x@gotntpoff(%ebx), %eax 405 const uint8_t inst[] = { 406 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax 407 0x03, 0x83, 0, 0, 0, 0, // addl val(%ebx), %eax 408 }; 409 memcpy(loc - 3, inst, sizeof(inst)); 410 write32le(loc + 5, val); 411 } else if (rel.type == R_386_TLS_GOTDESC) { 412 // Convert leal x@tlsdesc(%ebx), %eax to movl x@gotntpoff(%ebx), %eax. 413 if (memcmp(loc - 2, "\x8d\x83", 2)) { 414 error(getErrorLocation(loc - 2) + 415 "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax"); 416 return; 417 } 418 loc[-2] = 0x8b; 419 write32le(loc, val); 420 } else { 421 // Convert call *x@tlsdesc(%eax) to xchg ax, ax. 422 assert(rel.type == R_386_TLS_DESC_CALL); 423 loc[0] = 0x66; 424 loc[1] = 0x90; 425 } 426 } 427 428 // In some conditions, relocations can be optimized to avoid using GOT. 429 // This function does that for Initial Exec to Local Exec case. 430 void X86::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, 431 uint64_t val) const { 432 // Ulrich's document section 6.2 says that @gotntpoff can 433 // be used with MOVL or ADDL instructions. 434 // @indntpoff is similar to @gotntpoff, but for use in 435 // position dependent code. 436 uint8_t reg = (loc[-1] >> 3) & 7; 437 438 if (rel.type == R_386_TLS_IE) { 439 if (loc[-1] == 0xa1) { 440 // "movl foo@indntpoff,%eax" -> "movl $foo,%eax" 441 // This case is different from the generic case below because 442 // this is a 5 byte instruction while below is 6 bytes. 443 loc[-1] = 0xb8; 444 } else if (loc[-2] == 0x8b) { 445 // "movl foo@indntpoff,%reg" -> "movl $foo,%reg" 446 loc[-2] = 0xc7; 447 loc[-1] = 0xc0 | reg; 448 } else { 449 // "addl foo@indntpoff,%reg" -> "addl $foo,%reg" 450 loc[-2] = 0x81; 451 loc[-1] = 0xc0 | reg; 452 } 453 } else { 454 assert(rel.type == R_386_TLS_GOTIE); 455 if (loc[-2] == 0x8b) { 456 // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg" 457 loc[-2] = 0xc7; 458 loc[-1] = 0xc0 | reg; 459 } else { 460 // "addl foo@gotntpoff(%rip),%reg" -> "leal foo(%reg),%reg" 461 loc[-2] = 0x8d; 462 loc[-1] = 0x80 | (reg << 3) | reg; 463 } 464 } 465 write32le(loc, val); 466 } 467 468 void X86::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, 469 uint64_t val) const { 470 if (rel.type == R_386_TLS_LDO_32) { 471 write32le(loc, val); 472 return; 473 } 474 475 // Convert 476 // leal foo(%reg),%eax 477 // call ___tls_get_addr 478 // to 479 // movl %gs:0,%eax 480 // nop 481 // leal 0(%esi,1),%esi 482 const uint8_t inst[] = { 483 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax 484 0x90, // nop 485 0x8d, 0x74, 0x26, 0x00, // leal 0(%esi,1),%esi 486 }; 487 memcpy(loc - 2, inst, sizeof(inst)); 488 } 489 490 // If Intel Indirect Branch Tracking is enabled, we have to emit special PLT 491 // entries containing endbr32 instructions. A PLT entry will be split into two 492 // parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt). 493 namespace { 494 class IntelIBT : public X86 { 495 public: 496 IntelIBT(); 497 void writeGotPlt(uint8_t *buf, const Symbol &s) const override; 498 void writePlt(uint8_t *buf, const Symbol &sym, 499 uint64_t pltEntryAddr) const override; 500 void writeIBTPlt(uint8_t *buf, size_t numEntries) const override; 501 502 static const unsigned IBTPltHeaderSize = 16; 503 }; 504 } // namespace 505 506 IntelIBT::IntelIBT() { pltHeaderSize = 0; } 507 508 void IntelIBT::writeGotPlt(uint8_t *buf, const Symbol &s) const { 509 uint64_t va = 510 in.ibtPlt->getVA() + IBTPltHeaderSize + s.pltIndex * pltEntrySize; 511 write32le(buf, va); 512 } 513 514 void IntelIBT::writePlt(uint8_t *buf, const Symbol &sym, 515 uint64_t /*pltEntryAddr*/) const { 516 if (config->isPic) { 517 const uint8_t inst[] = { 518 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 519 0xff, 0xa3, 0, 0, 0, 0, // jmp *name@GOT(%ebx) 520 0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop 521 }; 522 memcpy(buf, inst, sizeof(inst)); 523 write32le(buf + 6, sym.getGotPltVA() - in.gotPlt->getVA()); 524 return; 525 } 526 527 const uint8_t inst[] = { 528 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 529 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT 530 0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop 531 }; 532 memcpy(buf, inst, sizeof(inst)); 533 write32le(buf + 6, sym.getGotPltVA()); 534 } 535 536 void IntelIBT::writeIBTPlt(uint8_t *buf, size_t numEntries) const { 537 writePltHeader(buf); 538 buf += IBTPltHeaderSize; 539 540 const uint8_t inst[] = { 541 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 542 0x68, 0, 0, 0, 0, // pushl $reloc_offset 543 0xe9, 0, 0, 0, 0, // jmpq .PLT0@PC 544 0x66, 0x90, // nop 545 }; 546 547 for (size_t i = 0; i < numEntries; ++i) { 548 memcpy(buf, inst, sizeof(inst)); 549 write32le(buf + 5, i * sizeof(object::ELF32LE::Rel)); 550 write32le(buf + 10, -pltHeaderSize - sizeof(inst) * i - 30); 551 buf += sizeof(inst); 552 } 553 } 554 555 namespace { 556 class RetpolinePic : public X86 { 557 public: 558 RetpolinePic(); 559 void writeGotPlt(uint8_t *buf, const Symbol &s) const override; 560 void writePltHeader(uint8_t *buf) const override; 561 void writePlt(uint8_t *buf, const Symbol &sym, 562 uint64_t pltEntryAddr) const override; 563 }; 564 565 class RetpolineNoPic : public X86 { 566 public: 567 RetpolineNoPic(); 568 void writeGotPlt(uint8_t *buf, const Symbol &s) const override; 569 void writePltHeader(uint8_t *buf) const override; 570 void writePlt(uint8_t *buf, const Symbol &sym, 571 uint64_t pltEntryAddr) const override; 572 }; 573 } // namespace 574 575 RetpolinePic::RetpolinePic() { 576 pltHeaderSize = 48; 577 pltEntrySize = 32; 578 ipltEntrySize = 32; 579 } 580 581 void RetpolinePic::writeGotPlt(uint8_t *buf, const Symbol &s) const { 582 write32le(buf, s.getPltVA() + 17); 583 } 584 585 void RetpolinePic::writePltHeader(uint8_t *buf) const { 586 const uint8_t insn[] = { 587 0xff, 0xb3, 4, 0, 0, 0, // 0: pushl 4(%ebx) 588 0x50, // 6: pushl %eax 589 0x8b, 0x83, 8, 0, 0, 0, // 7: mov 8(%ebx), %eax 590 0xe8, 0x0e, 0x00, 0x00, 0x00, // d: call next 591 0xf3, 0x90, // 12: loop: pause 592 0x0f, 0xae, 0xe8, // 14: lfence 593 0xeb, 0xf9, // 17: jmp loop 594 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19: int3; .align 16 595 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp) 596 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx 597 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp) 598 0x89, 0xc8, // 2b: mov %ecx, %eax 599 0x59, // 2d: pop %ecx 600 0xc3, // 2e: ret 601 0xcc, // 2f: int3; padding 602 }; 603 memcpy(buf, insn, sizeof(insn)); 604 } 605 606 void RetpolinePic::writePlt(uint8_t *buf, const Symbol &sym, 607 uint64_t pltEntryAddr) const { 608 unsigned relOff = in.relaPlt->entsize * sym.pltIndex; 609 const uint8_t insn[] = { 610 0x50, // pushl %eax 611 0x8b, 0x83, 0, 0, 0, 0, // mov foo@GOT(%ebx), %eax 612 0xe8, 0, 0, 0, 0, // call plt+0x20 613 0xe9, 0, 0, 0, 0, // jmp plt+0x12 614 0x68, 0, 0, 0, 0, // pushl $reloc_offset 615 0xe9, 0, 0, 0, 0, // jmp plt+0 616 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // int3; padding 617 }; 618 memcpy(buf, insn, sizeof(insn)); 619 620 uint32_t ebx = in.gotPlt->getVA(); 621 unsigned off = pltEntryAddr - in.plt->getVA(); 622 write32le(buf + 3, sym.getGotPltVA() - ebx); 623 write32le(buf + 8, -off - 12 + 32); 624 write32le(buf + 13, -off - 17 + 18); 625 write32le(buf + 18, relOff); 626 write32le(buf + 23, -off - 27); 627 } 628 629 RetpolineNoPic::RetpolineNoPic() { 630 pltHeaderSize = 48; 631 pltEntrySize = 32; 632 ipltEntrySize = 32; 633 } 634 635 void RetpolineNoPic::writeGotPlt(uint8_t *buf, const Symbol &s) const { 636 write32le(buf, s.getPltVA() + 16); 637 } 638 639 void RetpolineNoPic::writePltHeader(uint8_t *buf) const { 640 const uint8_t insn[] = { 641 0xff, 0x35, 0, 0, 0, 0, // 0: pushl GOTPLT+4 642 0x50, // 6: pushl %eax 643 0xa1, 0, 0, 0, 0, // 7: mov GOTPLT+8, %eax 644 0xe8, 0x0f, 0x00, 0x00, 0x00, // c: call next 645 0xf3, 0x90, // 11: loop: pause 646 0x0f, 0xae, 0xe8, // 13: lfence 647 0xeb, 0xf9, // 16: jmp loop 648 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 18: int3 649 0xcc, 0xcc, 0xcc, // 1f: int3; .align 16 650 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp) 651 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx 652 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp) 653 0x89, 0xc8, // 2b: mov %ecx, %eax 654 0x59, // 2d: pop %ecx 655 0xc3, // 2e: ret 656 0xcc, // 2f: int3; padding 657 }; 658 memcpy(buf, insn, sizeof(insn)); 659 660 uint32_t gotPlt = in.gotPlt->getVA(); 661 write32le(buf + 2, gotPlt + 4); 662 write32le(buf + 8, gotPlt + 8); 663 } 664 665 void RetpolineNoPic::writePlt(uint8_t *buf, const Symbol &sym, 666 uint64_t pltEntryAddr) const { 667 unsigned relOff = in.relaPlt->entsize * sym.pltIndex; 668 const uint8_t insn[] = { 669 0x50, // 0: pushl %eax 670 0xa1, 0, 0, 0, 0, // 1: mov foo_in_GOT, %eax 671 0xe8, 0, 0, 0, 0, // 6: call plt+0x20 672 0xe9, 0, 0, 0, 0, // b: jmp plt+0x11 673 0x68, 0, 0, 0, 0, // 10: pushl $reloc_offset 674 0xe9, 0, 0, 0, 0, // 15: jmp plt+0 675 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a: int3; padding 676 0xcc, // 1f: int3; padding 677 }; 678 memcpy(buf, insn, sizeof(insn)); 679 680 unsigned off = pltEntryAddr - in.plt->getVA(); 681 write32le(buf + 2, sym.getGotPltVA()); 682 write32le(buf + 7, -off - 11 + 32); 683 write32le(buf + 12, -off - 16 + 17); 684 write32le(buf + 17, relOff); 685 write32le(buf + 22, -off - 26); 686 } 687 688 TargetInfo *elf::getX86TargetInfo() { 689 if (config->zRetpolineplt) { 690 if (config->isPic) { 691 static RetpolinePic t; 692 return &t; 693 } 694 static RetpolineNoPic t; 695 return &t; 696 } 697 698 if (config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT) { 699 static IntelIBT t; 700 return &t; 701 } 702 703 static X86 t; 704 return &t; 705 } 706