1 //===- X86.cpp ------------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "OutputSections.h" 10 #include "Symbols.h" 11 #include "SyntheticSections.h" 12 #include "Target.h" 13 #include "lld/Common/ErrorHandler.h" 14 #include "llvm/Support/Endian.h" 15 16 using namespace llvm; 17 using namespace llvm::support::endian; 18 using namespace llvm::ELF; 19 using namespace lld; 20 using namespace lld::elf; 21 22 namespace { 23 class X86 : public TargetInfo { 24 public: 25 X86(); 26 int getTlsGdRelaxSkip(RelType type) const override; 27 RelExpr getRelExpr(RelType type, const Symbol &s, 28 const uint8_t *loc) const override; 29 int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override; 30 void writeGotPltHeader(uint8_t *buf) const override; 31 RelType getDynRel(RelType type) const override; 32 void writeGotPlt(uint8_t *buf, const Symbol &s) const override; 33 void writeIgotPlt(uint8_t *buf, const Symbol &s) const override; 34 void writePltHeader(uint8_t *buf) const override; 35 void writePlt(uint8_t *buf, const Symbol &sym, 36 uint64_t pltEntryAddr) const override; 37 void relocate(uint8_t *loc, const Relocation &rel, 38 uint64_t val) const override; 39 40 RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override; 41 void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override; 42 }; 43 } // namespace 44 45 X86::X86() { 46 copyRel = R_386_COPY; 47 gotRel = R_386_GLOB_DAT; 48 pltRel = R_386_JUMP_SLOT; 49 iRelativeRel = R_386_IRELATIVE; 50 relativeRel = R_386_RELATIVE; 51 symbolicRel = R_386_32; 52 tlsDescRel = R_386_TLS_DESC; 53 tlsGotRel = R_386_TLS_TPOFF; 54 tlsModuleIndexRel = R_386_TLS_DTPMOD32; 55 tlsOffsetRel = R_386_TLS_DTPOFF32; 56 gotBaseSymInGotPlt = true; 57 pltHeaderSize = 16; 58 pltEntrySize = 16; 59 ipltEntrySize = 16; 60 trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3 61 62 // Align to the non-PAE large page size (known as a superpage or huge page). 63 // FreeBSD automatically promotes large, superpage-aligned allocations. 64 defaultImageBase = 0x400000; 65 } 66 67 int X86::getTlsGdRelaxSkip(RelType type) const { 68 // TLSDESC relocations are processed separately. See relaxTlsGdToLe below. 69 return type == R_386_TLS_GOTDESC || type == R_386_TLS_DESC_CALL ? 1 : 2; 70 } 71 72 RelExpr X86::getRelExpr(RelType type, const Symbol &s, 73 const uint8_t *loc) const { 74 switch (type) { 75 case R_386_8: 76 case R_386_16: 77 case R_386_32: 78 return R_ABS; 79 case R_386_TLS_LDO_32: 80 return R_DTPREL; 81 case R_386_TLS_GD: 82 return R_TLSGD_GOTPLT; 83 case R_386_TLS_LDM: 84 return R_TLSLD_GOTPLT; 85 case R_386_PLT32: 86 return R_PLT_PC; 87 case R_386_PC8: 88 case R_386_PC16: 89 case R_386_PC32: 90 return R_PC; 91 case R_386_GOTPC: 92 return R_GOTPLTONLY_PC; 93 case R_386_TLS_IE: 94 return R_GOT; 95 case R_386_GOT32: 96 case R_386_GOT32X: 97 // These relocations are arguably mis-designed because their calculations 98 // depend on the instructions they are applied to. This is bad because we 99 // usually don't care about whether the target section contains valid 100 // machine instructions or not. But this is part of the documented ABI, so 101 // we had to implement as the standard requires. 102 // 103 // x86 does not support PC-relative data access. Therefore, in order to 104 // access GOT contents, a GOT address needs to be known at link-time 105 // (which means non-PIC) or compilers have to emit code to get a GOT 106 // address at runtime (which means code is position-independent but 107 // compilers need to emit extra code for each GOT access.) This decision 108 // is made at compile-time. In the latter case, compilers emit code to 109 // load a GOT address to a register, which is usually %ebx. 110 // 111 // So, there are two ways to refer to symbol foo's GOT entry: foo@GOT or 112 // foo@GOT(%ebx). 113 // 114 // foo@GOT is not usable in PIC. If we are creating a PIC output and if we 115 // find such relocation, we should report an error. foo@GOT is resolved to 116 // an *absolute* address of foo's GOT entry, because both GOT address and 117 // foo's offset are known. In other words, it's G + A. 118 // 119 // foo@GOT(%ebx) needs to be resolved to a *relative* offset from a GOT to 120 // foo's GOT entry in the table, because GOT address is not known but foo's 121 // offset in the table is known. It's G + A - GOT. 122 // 123 // It's unfortunate that compilers emit the same relocation for these 124 // different use cases. In order to distinguish them, we have to read a 125 // machine instruction. 126 // 127 // The following code implements it. We assume that Loc[0] is the first byte 128 // of a displacement or an immediate field of a valid machine 129 // instruction. That means a ModRM byte is at Loc[-1]. By taking a look at 130 // the byte, we can determine whether the instruction uses the operand as an 131 // absolute address (R_GOT) or a register-relative address (R_GOTPLT). 132 return (loc[-1] & 0xc7) == 0x5 ? R_GOT : R_GOTPLT; 133 case R_386_TLS_GOTDESC: 134 return R_TLSDESC_GOTPLT; 135 case R_386_TLS_DESC_CALL: 136 return R_TLSDESC_CALL; 137 case R_386_TLS_GOTIE: 138 return R_GOTPLT; 139 case R_386_GOTOFF: 140 return R_GOTPLTREL; 141 case R_386_TLS_LE: 142 return R_TPREL; 143 case R_386_TLS_LE_32: 144 return R_TPREL_NEG; 145 case R_386_NONE: 146 return R_NONE; 147 default: 148 error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) + 149 ") against symbol " + toString(s)); 150 return R_NONE; 151 } 152 } 153 154 RelExpr X86::adjustTlsExpr(RelType type, RelExpr expr) const { 155 switch (expr) { 156 default: 157 return expr; 158 case R_RELAX_TLS_GD_TO_IE: 159 return R_RELAX_TLS_GD_TO_IE_GOTPLT; 160 case R_RELAX_TLS_GD_TO_LE: 161 return type == R_386_TLS_GD ? R_RELAX_TLS_GD_TO_LE_NEG 162 : R_RELAX_TLS_GD_TO_LE; 163 } 164 } 165 166 void X86::writeGotPltHeader(uint8_t *buf) const { 167 write32le(buf, mainPart->dynamic->getVA()); 168 } 169 170 void X86::writeGotPlt(uint8_t *buf, const Symbol &s) const { 171 // Entries in .got.plt initially points back to the corresponding 172 // PLT entries with a fixed offset to skip the first instruction. 173 write32le(buf, s.getPltVA() + 6); 174 } 175 176 void X86::writeIgotPlt(uint8_t *buf, const Symbol &s) const { 177 // An x86 entry is the address of the ifunc resolver function. 178 write32le(buf, s.getVA()); 179 } 180 181 RelType X86::getDynRel(RelType type) const { 182 if (type == R_386_TLS_LE) 183 return R_386_TLS_TPOFF; 184 if (type == R_386_TLS_LE_32) 185 return R_386_TLS_TPOFF32; 186 return type; 187 } 188 189 void X86::writePltHeader(uint8_t *buf) const { 190 if (config->isPic) { 191 const uint8_t v[] = { 192 0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl 4(%ebx) 193 0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *8(%ebx) 194 0x90, 0x90, 0x90, 0x90 // nop 195 }; 196 memcpy(buf, v, sizeof(v)); 197 return; 198 } 199 200 const uint8_t pltData[] = { 201 0xff, 0x35, 0, 0, 0, 0, // pushl (GOTPLT+4) 202 0xff, 0x25, 0, 0, 0, 0, // jmp *(GOTPLT+8) 203 0x90, 0x90, 0x90, 0x90, // nop 204 }; 205 memcpy(buf, pltData, sizeof(pltData)); 206 uint32_t gotPlt = in.gotPlt->getVA(); 207 write32le(buf + 2, gotPlt + 4); 208 write32le(buf + 8, gotPlt + 8); 209 } 210 211 void X86::writePlt(uint8_t *buf, const Symbol &sym, 212 uint64_t pltEntryAddr) const { 213 unsigned relOff = in.relaPlt->entsize * sym.getPltIdx(); 214 if (config->isPic) { 215 const uint8_t inst[] = { 216 0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx) 217 0x68, 0, 0, 0, 0, // pushl $reloc_offset 218 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC 219 }; 220 memcpy(buf, inst, sizeof(inst)); 221 write32le(buf + 2, sym.getGotPltVA() - in.gotPlt->getVA()); 222 } else { 223 const uint8_t inst[] = { 224 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT 225 0x68, 0, 0, 0, 0, // pushl $reloc_offset 226 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC 227 }; 228 memcpy(buf, inst, sizeof(inst)); 229 write32le(buf + 2, sym.getGotPltVA()); 230 } 231 232 write32le(buf + 7, relOff); 233 write32le(buf + 12, in.plt->getVA() - pltEntryAddr - 16); 234 } 235 236 int64_t X86::getImplicitAddend(const uint8_t *buf, RelType type) const { 237 switch (type) { 238 case R_386_8: 239 case R_386_PC8: 240 return SignExtend64<8>(*buf); 241 case R_386_16: 242 case R_386_PC16: 243 return SignExtend64<16>(read16le(buf)); 244 case R_386_32: 245 case R_386_GLOB_DAT: 246 case R_386_GOT32: 247 case R_386_GOT32X: 248 case R_386_GOTOFF: 249 case R_386_GOTPC: 250 case R_386_IRELATIVE: 251 case R_386_PC32: 252 case R_386_PLT32: 253 case R_386_RELATIVE: 254 case R_386_TLS_GOTDESC: 255 case R_386_TLS_DESC_CALL: 256 case R_386_TLS_DTPMOD32: 257 case R_386_TLS_DTPOFF32: 258 case R_386_TLS_LDO_32: 259 case R_386_TLS_LDM: 260 case R_386_TLS_IE: 261 case R_386_TLS_IE_32: 262 case R_386_TLS_LE: 263 case R_386_TLS_LE_32: 264 case R_386_TLS_GD: 265 case R_386_TLS_GD_32: 266 case R_386_TLS_GOTIE: 267 case R_386_TLS_TPOFF: 268 case R_386_TLS_TPOFF32: 269 return SignExtend64<32>(read32le(buf)); 270 case R_386_TLS_DESC: 271 return SignExtend64<32>(read32le(buf + 4)); 272 case R_386_NONE: 273 case R_386_JUMP_SLOT: 274 // These relocations are defined as not having an implicit addend. 275 return 0; 276 default: 277 internalLinkerError(getErrorLocation(buf), 278 "cannot read addend for relocation " + toString(type)); 279 return 0; 280 } 281 } 282 283 void X86::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { 284 switch (rel.type) { 285 case R_386_8: 286 // R_386_{PC,}{8,16} are not part of the i386 psABI, but they are 287 // being used for some 16-bit programs such as boot loaders, so 288 // we want to support them. 289 checkIntUInt(loc, val, 8, rel); 290 *loc = val; 291 break; 292 case R_386_PC8: 293 checkInt(loc, val, 8, rel); 294 *loc = val; 295 break; 296 case R_386_16: 297 checkIntUInt(loc, val, 16, rel); 298 write16le(loc, val); 299 break; 300 case R_386_PC16: 301 // R_386_PC16 is normally used with 16 bit code. In that situation 302 // the PC is 16 bits, just like the addend. This means that it can 303 // point from any 16 bit address to any other if the possibility 304 // of wrapping is included. 305 // The only restriction we have to check then is that the destination 306 // address fits in 16 bits. That is impossible to do here. The problem is 307 // that we are passed the final value, which already had the 308 // current location subtracted from it. 309 // We just check that Val fits in 17 bits. This misses some cases, but 310 // should have no false positives. 311 checkInt(loc, val, 17, rel); 312 write16le(loc, val); 313 break; 314 case R_386_32: 315 case R_386_GOT32: 316 case R_386_GOT32X: 317 case R_386_GOTOFF: 318 case R_386_GOTPC: 319 case R_386_PC32: 320 case R_386_PLT32: 321 case R_386_RELATIVE: 322 case R_386_TLS_GOTDESC: 323 case R_386_TLS_DESC_CALL: 324 case R_386_TLS_DTPMOD32: 325 case R_386_TLS_DTPOFF32: 326 case R_386_TLS_GD: 327 case R_386_TLS_GOTIE: 328 case R_386_TLS_IE: 329 case R_386_TLS_LDM: 330 case R_386_TLS_LDO_32: 331 case R_386_TLS_LE: 332 case R_386_TLS_LE_32: 333 case R_386_TLS_TPOFF: 334 case R_386_TLS_TPOFF32: 335 checkInt(loc, val, 32, rel); 336 write32le(loc, val); 337 break; 338 case R_386_TLS_DESC: 339 // The addend is stored in the second 32-bit word. 340 write32le(loc + 4, val); 341 break; 342 default: 343 llvm_unreachable("unknown relocation"); 344 } 345 } 346 347 static void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) { 348 if (rel.type == R_386_TLS_GD) { 349 // Convert (loc[-2] == 0x04) 350 // leal x@tlsgd(, %ebx, 1), %eax 351 // call ___tls_get_addr@plt 352 // or 353 // leal x@tlsgd(%reg), %eax 354 // call *___tls_get_addr@got(%reg) 355 // to 356 const uint8_t inst[] = { 357 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax 358 0x81, 0xe8, 0, 0, 0, 0, // subl x@ntpoff(%ebx), %eax 359 }; 360 uint8_t *w = loc[-2] == 0x04 ? loc - 3 : loc - 2; 361 memcpy(w, inst, sizeof(inst)); 362 write32le(w + 8, val); 363 } else if (rel.type == R_386_TLS_GOTDESC) { 364 // Convert leal x@tlsdesc(%ebx), %eax to leal x@ntpoff, %eax. 365 // 366 // Note: call *x@tlsdesc(%eax) may not immediately follow this instruction. 367 if (memcmp(loc - 2, "\x8d\x83", 2)) { 368 error(getErrorLocation(loc - 2) + 369 "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax"); 370 return; 371 } 372 loc[-1] = 0x05; 373 write32le(loc, val); 374 } else { 375 // Convert call *x@tlsdesc(%eax) to xchg ax, ax. 376 assert(rel.type == R_386_TLS_DESC_CALL); 377 loc[0] = 0x66; 378 loc[1] = 0x90; 379 } 380 } 381 382 static void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, uint64_t val) { 383 if (rel.type == R_386_TLS_GD) { 384 // Convert (loc[-2] == 0x04) 385 // leal x@tlsgd(, %ebx, 1), %eax 386 // call ___tls_get_addr@plt 387 // or 388 // leal x@tlsgd(%reg), %eax 389 // call *___tls_get_addr@got(%reg) 390 const uint8_t inst[] = { 391 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax 392 0x03, 0x83, 0, 0, 0, 0, // addl x@gottpoff(%ebx), %eax 393 }; 394 uint8_t *w = loc[-2] == 0x04 ? loc - 3 : loc - 2; 395 memcpy(w, inst, sizeof(inst)); 396 write32le(w + 8, val); 397 } else if (rel.type == R_386_TLS_GOTDESC) { 398 // Convert leal x@tlsdesc(%ebx), %eax to movl x@gotntpoff(%ebx), %eax. 399 if (memcmp(loc - 2, "\x8d\x83", 2)) { 400 error(getErrorLocation(loc - 2) + 401 "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax"); 402 return; 403 } 404 loc[-2] = 0x8b; 405 write32le(loc, val); 406 } else { 407 // Convert call *x@tlsdesc(%eax) to xchg ax, ax. 408 assert(rel.type == R_386_TLS_DESC_CALL); 409 loc[0] = 0x66; 410 loc[1] = 0x90; 411 } 412 } 413 414 // In some conditions, relocations can be optimized to avoid using GOT. 415 // This function does that for Initial Exec to Local Exec case. 416 static void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) { 417 // Ulrich's document section 6.2 says that @gotntpoff can 418 // be used with MOVL or ADDL instructions. 419 // @indntpoff is similar to @gotntpoff, but for use in 420 // position dependent code. 421 uint8_t reg = (loc[-1] >> 3) & 7; 422 423 if (rel.type == R_386_TLS_IE) { 424 if (loc[-1] == 0xa1) { 425 // "movl foo@indntpoff,%eax" -> "movl $foo,%eax" 426 // This case is different from the generic case below because 427 // this is a 5 byte instruction while below is 6 bytes. 428 loc[-1] = 0xb8; 429 } else if (loc[-2] == 0x8b) { 430 // "movl foo@indntpoff,%reg" -> "movl $foo,%reg" 431 loc[-2] = 0xc7; 432 loc[-1] = 0xc0 | reg; 433 } else { 434 // "addl foo@indntpoff,%reg" -> "addl $foo,%reg" 435 loc[-2] = 0x81; 436 loc[-1] = 0xc0 | reg; 437 } 438 } else { 439 assert(rel.type == R_386_TLS_GOTIE); 440 if (loc[-2] == 0x8b) { 441 // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg" 442 loc[-2] = 0xc7; 443 loc[-1] = 0xc0 | reg; 444 } else { 445 // "addl foo@gotntpoff(%rip),%reg" -> "leal foo(%reg),%reg" 446 loc[-2] = 0x8d; 447 loc[-1] = 0x80 | (reg << 3) | reg; 448 } 449 } 450 write32le(loc, val); 451 } 452 453 static void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) { 454 if (rel.type == R_386_TLS_LDO_32) { 455 write32le(loc, val); 456 return; 457 } 458 459 if (loc[4] == 0xe8) { 460 // Convert 461 // leal x(%reg),%eax 462 // call ___tls_get_addr@plt 463 // to 464 const uint8_t inst[] = { 465 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax 466 0x90, // nop 467 0x8d, 0x74, 0x26, 0x00, // leal 0(%esi,1),%esi 468 }; 469 memcpy(loc - 2, inst, sizeof(inst)); 470 return; 471 } 472 473 // Convert 474 // leal x(%reg),%eax 475 // call *___tls_get_addr@got(%reg) 476 // to 477 const uint8_t inst[] = { 478 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax 479 0x8d, 0xb6, 0x00, 0x00, 0x00, 0x00, // leal (%esi),%esi 480 }; 481 memcpy(loc - 2, inst, sizeof(inst)); 482 } 483 484 void X86::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { 485 uint64_t secAddr = sec.getOutputSection()->addr; 486 if (auto *s = dyn_cast<InputSection>(&sec)) 487 secAddr += s->outSecOff; 488 for (const Relocation &rel : sec.relocs()) { 489 uint8_t *loc = buf + rel.offset; 490 const uint64_t val = SignExtend64( 491 sec.getRelocTargetVA(sec.file, rel.type, rel.addend, 492 secAddr + rel.offset, *rel.sym, rel.expr), 493 32); 494 switch (rel.expr) { 495 case R_RELAX_TLS_GD_TO_IE_GOTPLT: 496 relaxTlsGdToIe(loc, rel, val); 497 continue; 498 case R_RELAX_TLS_GD_TO_LE: 499 case R_RELAX_TLS_GD_TO_LE_NEG: 500 relaxTlsGdToLe(loc, rel, val); 501 continue; 502 case R_RELAX_TLS_LD_TO_LE: 503 relaxTlsLdToLe(loc, rel, val); 504 break; 505 case R_RELAX_TLS_IE_TO_LE: 506 relaxTlsIeToLe(loc, rel, val); 507 continue; 508 default: 509 relocate(loc, rel, val); 510 break; 511 } 512 } 513 } 514 515 // If Intel Indirect Branch Tracking is enabled, we have to emit special PLT 516 // entries containing endbr32 instructions. A PLT entry will be split into two 517 // parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt). 518 namespace { 519 class IntelIBT : public X86 { 520 public: 521 IntelIBT(); 522 void writeGotPlt(uint8_t *buf, const Symbol &s) const override; 523 void writePlt(uint8_t *buf, const Symbol &sym, 524 uint64_t pltEntryAddr) const override; 525 void writeIBTPlt(uint8_t *buf, size_t numEntries) const override; 526 527 static const unsigned IBTPltHeaderSize = 16; 528 }; 529 } // namespace 530 531 IntelIBT::IntelIBT() { pltHeaderSize = 0; } 532 533 void IntelIBT::writeGotPlt(uint8_t *buf, const Symbol &s) const { 534 uint64_t va = 535 in.ibtPlt->getVA() + IBTPltHeaderSize + s.getPltIdx() * pltEntrySize; 536 write32le(buf, va); 537 } 538 539 void IntelIBT::writePlt(uint8_t *buf, const Symbol &sym, 540 uint64_t /*pltEntryAddr*/) const { 541 if (config->isPic) { 542 const uint8_t inst[] = { 543 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 544 0xff, 0xa3, 0, 0, 0, 0, // jmp *name@GOT(%ebx) 545 0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop 546 }; 547 memcpy(buf, inst, sizeof(inst)); 548 write32le(buf + 6, sym.getGotPltVA() - in.gotPlt->getVA()); 549 return; 550 } 551 552 const uint8_t inst[] = { 553 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 554 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT 555 0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop 556 }; 557 memcpy(buf, inst, sizeof(inst)); 558 write32le(buf + 6, sym.getGotPltVA()); 559 } 560 561 void IntelIBT::writeIBTPlt(uint8_t *buf, size_t numEntries) const { 562 writePltHeader(buf); 563 buf += IBTPltHeaderSize; 564 565 const uint8_t inst[] = { 566 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 567 0x68, 0, 0, 0, 0, // pushl $reloc_offset 568 0xe9, 0, 0, 0, 0, // jmpq .PLT0@PC 569 0x66, 0x90, // nop 570 }; 571 572 for (size_t i = 0; i < numEntries; ++i) { 573 memcpy(buf, inst, sizeof(inst)); 574 write32le(buf + 5, i * sizeof(object::ELF32LE::Rel)); 575 write32le(buf + 10, -pltHeaderSize - sizeof(inst) * i - 30); 576 buf += sizeof(inst); 577 } 578 } 579 580 namespace { 581 class RetpolinePic : public X86 { 582 public: 583 RetpolinePic(); 584 void writeGotPlt(uint8_t *buf, const Symbol &s) const override; 585 void writePltHeader(uint8_t *buf) const override; 586 void writePlt(uint8_t *buf, const Symbol &sym, 587 uint64_t pltEntryAddr) const override; 588 }; 589 590 class RetpolineNoPic : public X86 { 591 public: 592 RetpolineNoPic(); 593 void writeGotPlt(uint8_t *buf, const Symbol &s) const override; 594 void writePltHeader(uint8_t *buf) const override; 595 void writePlt(uint8_t *buf, const Symbol &sym, 596 uint64_t pltEntryAddr) const override; 597 }; 598 } // namespace 599 600 RetpolinePic::RetpolinePic() { 601 pltHeaderSize = 48; 602 pltEntrySize = 32; 603 ipltEntrySize = 32; 604 } 605 606 void RetpolinePic::writeGotPlt(uint8_t *buf, const Symbol &s) const { 607 write32le(buf, s.getPltVA() + 17); 608 } 609 610 void RetpolinePic::writePltHeader(uint8_t *buf) const { 611 const uint8_t insn[] = { 612 0xff, 0xb3, 4, 0, 0, 0, // 0: pushl 4(%ebx) 613 0x50, // 6: pushl %eax 614 0x8b, 0x83, 8, 0, 0, 0, // 7: mov 8(%ebx), %eax 615 0xe8, 0x0e, 0x00, 0x00, 0x00, // d: call next 616 0xf3, 0x90, // 12: loop: pause 617 0x0f, 0xae, 0xe8, // 14: lfence 618 0xeb, 0xf9, // 17: jmp loop 619 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19: int3; .align 16 620 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp) 621 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx 622 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp) 623 0x89, 0xc8, // 2b: mov %ecx, %eax 624 0x59, // 2d: pop %ecx 625 0xc3, // 2e: ret 626 0xcc, // 2f: int3; padding 627 }; 628 memcpy(buf, insn, sizeof(insn)); 629 } 630 631 void RetpolinePic::writePlt(uint8_t *buf, const Symbol &sym, 632 uint64_t pltEntryAddr) const { 633 unsigned relOff = in.relaPlt->entsize * sym.getPltIdx(); 634 const uint8_t insn[] = { 635 0x50, // pushl %eax 636 0x8b, 0x83, 0, 0, 0, 0, // mov foo@GOT(%ebx), %eax 637 0xe8, 0, 0, 0, 0, // call plt+0x20 638 0xe9, 0, 0, 0, 0, // jmp plt+0x12 639 0x68, 0, 0, 0, 0, // pushl $reloc_offset 640 0xe9, 0, 0, 0, 0, // jmp plt+0 641 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // int3; padding 642 }; 643 memcpy(buf, insn, sizeof(insn)); 644 645 uint32_t ebx = in.gotPlt->getVA(); 646 unsigned off = pltEntryAddr - in.plt->getVA(); 647 write32le(buf + 3, sym.getGotPltVA() - ebx); 648 write32le(buf + 8, -off - 12 + 32); 649 write32le(buf + 13, -off - 17 + 18); 650 write32le(buf + 18, relOff); 651 write32le(buf + 23, -off - 27); 652 } 653 654 RetpolineNoPic::RetpolineNoPic() { 655 pltHeaderSize = 48; 656 pltEntrySize = 32; 657 ipltEntrySize = 32; 658 } 659 660 void RetpolineNoPic::writeGotPlt(uint8_t *buf, const Symbol &s) const { 661 write32le(buf, s.getPltVA() + 16); 662 } 663 664 void RetpolineNoPic::writePltHeader(uint8_t *buf) const { 665 const uint8_t insn[] = { 666 0xff, 0x35, 0, 0, 0, 0, // 0: pushl GOTPLT+4 667 0x50, // 6: pushl %eax 668 0xa1, 0, 0, 0, 0, // 7: mov GOTPLT+8, %eax 669 0xe8, 0x0f, 0x00, 0x00, 0x00, // c: call next 670 0xf3, 0x90, // 11: loop: pause 671 0x0f, 0xae, 0xe8, // 13: lfence 672 0xeb, 0xf9, // 16: jmp loop 673 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 18: int3 674 0xcc, 0xcc, 0xcc, // 1f: int3; .align 16 675 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp) 676 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx 677 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp) 678 0x89, 0xc8, // 2b: mov %ecx, %eax 679 0x59, // 2d: pop %ecx 680 0xc3, // 2e: ret 681 0xcc, // 2f: int3; padding 682 }; 683 memcpy(buf, insn, sizeof(insn)); 684 685 uint32_t gotPlt = in.gotPlt->getVA(); 686 write32le(buf + 2, gotPlt + 4); 687 write32le(buf + 8, gotPlt + 8); 688 } 689 690 void RetpolineNoPic::writePlt(uint8_t *buf, const Symbol &sym, 691 uint64_t pltEntryAddr) const { 692 unsigned relOff = in.relaPlt->entsize * sym.getPltIdx(); 693 const uint8_t insn[] = { 694 0x50, // 0: pushl %eax 695 0xa1, 0, 0, 0, 0, // 1: mov foo_in_GOT, %eax 696 0xe8, 0, 0, 0, 0, // 6: call plt+0x20 697 0xe9, 0, 0, 0, 0, // b: jmp plt+0x11 698 0x68, 0, 0, 0, 0, // 10: pushl $reloc_offset 699 0xe9, 0, 0, 0, 0, // 15: jmp plt+0 700 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a: int3; padding 701 0xcc, // 1f: int3; padding 702 }; 703 memcpy(buf, insn, sizeof(insn)); 704 705 unsigned off = pltEntryAddr - in.plt->getVA(); 706 write32le(buf + 2, sym.getGotPltVA()); 707 write32le(buf + 7, -off - 11 + 32); 708 write32le(buf + 12, -off - 16 + 17); 709 write32le(buf + 17, relOff); 710 write32le(buf + 22, -off - 26); 711 } 712 713 TargetInfo *elf::getX86TargetInfo() { 714 if (config->zRetpolineplt) { 715 if (config->isPic) { 716 static RetpolinePic t; 717 return &t; 718 } 719 static RetpolineNoPic t; 720 return &t; 721 } 722 723 if (config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT) { 724 static IntelIBT t; 725 return &t; 726 } 727 728 static X86 t; 729 return &t; 730 } 731