1 //===- X86_64.cpp ---------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "InputFiles.h" 10 #include "Symbols.h" 11 #include "SyntheticSections.h" 12 #include "Target.h" 13 #include "lld/Common/ErrorHandler.h" 14 #include "llvm/Object/ELF.h" 15 #include "llvm/Support/Endian.h" 16 17 using namespace llvm; 18 using namespace llvm::object; 19 using namespace llvm::support::endian; 20 using namespace llvm::ELF; 21 using namespace lld; 22 using namespace lld::elf; 23 24 namespace { 25 class X86_64 : public TargetInfo { 26 public: 27 X86_64(); 28 int getTlsGdRelaxSkip(RelType type) const override; 29 RelExpr getRelExpr(RelType type, const Symbol &s, 30 const uint8_t *loc) const override; 31 RelType getDynRel(RelType type) const override; 32 void writeGotPltHeader(uint8_t *buf) const override; 33 void writeGotPlt(uint8_t *buf, const Symbol &s) const override; 34 void writePltHeader(uint8_t *buf) const override; 35 void writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, uint64_t pltEntryAddr, 36 int32_t index, unsigned relOff) const override; 37 void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; 38 39 RelExpr adjustRelaxExpr(RelType type, const uint8_t *data, 40 RelExpr expr) const override; 41 void relaxGot(uint8_t *loc, RelType type, uint64_t val) const override; 42 void relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const override; 43 void relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const override; 44 void relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const override; 45 void relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const override; 46 bool adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end, 47 uint8_t stOther) const override; 48 }; 49 } // namespace 50 51 X86_64::X86_64() { 52 copyRel = R_X86_64_COPY; 53 gotRel = R_X86_64_GLOB_DAT; 54 noneRel = R_X86_64_NONE; 55 pltRel = R_X86_64_JUMP_SLOT; 56 relativeRel = R_X86_64_RELATIVE; 57 iRelativeRel = R_X86_64_IRELATIVE; 58 symbolicRel = R_X86_64_64; 59 tlsDescRel = R_X86_64_TLSDESC; 60 tlsGotRel = R_X86_64_TPOFF64; 61 tlsModuleIndexRel = R_X86_64_DTPMOD64; 62 tlsOffsetRel = R_X86_64_DTPOFF64; 63 pltEntrySize = 16; 64 pltHeaderSize = 16; 65 trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3 66 67 // Align to the large page size (known as a superpage or huge page). 68 // FreeBSD automatically promotes large, superpage-aligned allocations. 69 defaultImageBase = 0x200000; 70 } 71 72 int X86_64::getTlsGdRelaxSkip(RelType type) const { return 2; } 73 74 RelExpr X86_64::getRelExpr(RelType type, const Symbol &s, 75 const uint8_t *loc) const { 76 if (type == R_X86_64_GOTTPOFF) 77 config->hasStaticTlsModel = true; 78 79 switch (type) { 80 case R_X86_64_8: 81 case R_X86_64_16: 82 case R_X86_64_32: 83 case R_X86_64_32S: 84 case R_X86_64_64: 85 return R_ABS; 86 case R_X86_64_DTPOFF32: 87 case R_X86_64_DTPOFF64: 88 return R_DTPREL; 89 case R_X86_64_TPOFF32: 90 return R_TLS; 91 case R_X86_64_TLSDESC_CALL: 92 return R_TLSDESC_CALL; 93 case R_X86_64_TLSLD: 94 return R_TLSLD_PC; 95 case R_X86_64_TLSGD: 96 return R_TLSGD_PC; 97 case R_X86_64_SIZE32: 98 case R_X86_64_SIZE64: 99 return R_SIZE; 100 case R_X86_64_PLT32: 101 return R_PLT_PC; 102 case R_X86_64_PC8: 103 case R_X86_64_PC16: 104 case R_X86_64_PC32: 105 case R_X86_64_PC64: 106 return R_PC; 107 case R_X86_64_GOT32: 108 case R_X86_64_GOT64: 109 return R_GOTPLT; 110 case R_X86_64_GOTPC32_TLSDESC: 111 return R_TLSDESC_PC; 112 case R_X86_64_GOTPCREL: 113 case R_X86_64_GOTPCRELX: 114 case R_X86_64_REX_GOTPCRELX: 115 case R_X86_64_GOTTPOFF: 116 return R_GOT_PC; 117 case R_X86_64_GOTOFF64: 118 return R_GOTPLTREL; 119 case R_X86_64_GOTPC32: 120 case R_X86_64_GOTPC64: 121 return R_GOTPLTONLY_PC; 122 case R_X86_64_NONE: 123 return R_NONE; 124 default: 125 error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) + 126 ") against symbol " + toString(s)); 127 return R_NONE; 128 } 129 } 130 131 void X86_64::writeGotPltHeader(uint8_t *buf) const { 132 // The first entry holds the value of _DYNAMIC. It is not clear why that is 133 // required, but it is documented in the psabi and the glibc dynamic linker 134 // seems to use it (note that this is relevant for linking ld.so, not any 135 // other program). 136 write64le(buf, mainPart->dynamic->getVA()); 137 } 138 139 void X86_64::writeGotPlt(uint8_t *buf, const Symbol &s) const { 140 // See comments in X86::writeGotPlt. 141 write64le(buf, s.getPltVA() + 6); 142 } 143 144 void X86_64::writePltHeader(uint8_t *buf) const { 145 const uint8_t pltData[] = { 146 0xff, 0x35, 0, 0, 0, 0, // pushq GOTPLT+8(%rip) 147 0xff, 0x25, 0, 0, 0, 0, // jmp *GOTPLT+16(%rip) 148 0x0f, 0x1f, 0x40, 0x00, // nop 149 }; 150 memcpy(buf, pltData, sizeof(pltData)); 151 uint64_t gotPlt = in.gotPlt->getVA(); 152 uint64_t plt = in.plt->getVA(); 153 write32le(buf + 2, gotPlt - plt + 2); // GOTPLT+8 154 write32le(buf + 8, gotPlt - plt + 4); // GOTPLT+16 155 } 156 157 void X86_64::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, 158 uint64_t pltEntryAddr, int32_t index, 159 unsigned relOff) const { 160 const uint8_t inst[] = { 161 0xff, 0x25, 0, 0, 0, 0, // jmpq *got(%rip) 162 0x68, 0, 0, 0, 0, // pushq <relocation index> 163 0xe9, 0, 0, 0, 0, // jmpq plt[0] 164 }; 165 memcpy(buf, inst, sizeof(inst)); 166 167 write32le(buf + 2, gotPltEntryAddr - pltEntryAddr - 6); 168 write32le(buf + 7, index); 169 write32le(buf + 12, -pltHeaderSize - pltEntrySize * index - 16); 170 } 171 172 RelType X86_64::getDynRel(RelType type) const { 173 if (type == R_X86_64_64 || type == R_X86_64_PC64 || type == R_X86_64_SIZE32 || 174 type == R_X86_64_SIZE64) 175 return type; 176 return R_X86_64_NONE; 177 } 178 179 void X86_64::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const { 180 if (type == R_X86_64_TLSGD) { 181 // Convert 182 // .byte 0x66 183 // leaq x@tlsgd(%rip), %rdi 184 // .word 0x6666 185 // rex64 186 // call __tls_get_addr@plt 187 // to the following two instructions. 188 const uint8_t inst[] = { 189 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 190 0x00, 0x00, // mov %fs:0x0,%rax 191 0x48, 0x8d, 0x80, 0, 0, 0, 0, // lea x@tpoff,%rax 192 }; 193 memcpy(loc - 4, inst, sizeof(inst)); 194 195 // The original code used a pc relative relocation and so we have to 196 // compensate for the -4 in had in the addend. 197 write32le(loc + 8, val + 4); 198 } else { 199 // Convert 200 // lea x@tlsgd(%rip), %rax 201 // call *(%rax) 202 // to the following two instructions. 203 assert(type == R_X86_64_GOTPC32_TLSDESC); 204 if (memcmp(loc - 3, "\x48\x8d\x05", 3)) { 205 error(getErrorLocation(loc - 3) + "R_X86_64_GOTPC32_TLSDESC must be used " 206 "in callq *x@tlsdesc(%rip), %rax"); 207 return; 208 } 209 // movq $x@tpoff(%rip),%rax 210 loc[-2] = 0xc7; 211 loc[-1] = 0xc0; 212 write32le(loc, val + 4); 213 // xchg ax,ax 214 loc[4] = 0x66; 215 loc[5] = 0x90; 216 } 217 } 218 219 void X86_64::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const { 220 if (type == R_X86_64_TLSGD) { 221 // Convert 222 // .byte 0x66 223 // leaq x@tlsgd(%rip), %rdi 224 // .word 0x6666 225 // rex64 226 // call __tls_get_addr@plt 227 // to the following two instructions. 228 const uint8_t inst[] = { 229 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 230 0x00, 0x00, // mov %fs:0x0,%rax 231 0x48, 0x03, 0x05, 0, 0, 0, 0, // addq x@gottpoff(%rip),%rax 232 }; 233 memcpy(loc - 4, inst, sizeof(inst)); 234 235 // Both code sequences are PC relatives, but since we are moving the 236 // constant forward by 8 bytes we have to subtract the value by 8. 237 write32le(loc + 8, val - 8); 238 } else { 239 // Convert 240 // lea x@tlsgd(%rip), %rax 241 // call *(%rax) 242 // to the following two instructions. 243 assert(type == R_X86_64_GOTPC32_TLSDESC); 244 if (memcmp(loc - 3, "\x48\x8d\x05", 3)) { 245 error(getErrorLocation(loc - 3) + "R_X86_64_GOTPC32_TLSDESC must be used " 246 "in callq *x@tlsdesc(%rip), %rax"); 247 return; 248 } 249 // movq x@gottpoff(%rip),%rax 250 loc[-2] = 0x8b; 251 write32le(loc, val); 252 // xchg ax,ax 253 loc[4] = 0x66; 254 loc[5] = 0x90; 255 } 256 } 257 258 // In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to 259 // R_X86_64_TPOFF32 so that it does not use GOT. 260 void X86_64::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const { 261 uint8_t *inst = loc - 3; 262 uint8_t reg = loc[-1] >> 3; 263 uint8_t *regSlot = loc - 1; 264 265 // Note that ADD with RSP or R12 is converted to ADD instead of LEA 266 // because LEA with these registers needs 4 bytes to encode and thus 267 // wouldn't fit the space. 268 269 if (memcmp(inst, "\x48\x03\x25", 3) == 0) { 270 // "addq foo@gottpoff(%rip),%rsp" -> "addq $foo,%rsp" 271 memcpy(inst, "\x48\x81\xc4", 3); 272 } else if (memcmp(inst, "\x4c\x03\x25", 3) == 0) { 273 // "addq foo@gottpoff(%rip),%r12" -> "addq $foo,%r12" 274 memcpy(inst, "\x49\x81\xc4", 3); 275 } else if (memcmp(inst, "\x4c\x03", 2) == 0) { 276 // "addq foo@gottpoff(%rip),%r[8-15]" -> "leaq foo(%r[8-15]),%r[8-15]" 277 memcpy(inst, "\x4d\x8d", 2); 278 *regSlot = 0x80 | (reg << 3) | reg; 279 } else if (memcmp(inst, "\x48\x03", 2) == 0) { 280 // "addq foo@gottpoff(%rip),%reg -> "leaq foo(%reg),%reg" 281 memcpy(inst, "\x48\x8d", 2); 282 *regSlot = 0x80 | (reg << 3) | reg; 283 } else if (memcmp(inst, "\x4c\x8b", 2) == 0) { 284 // "movq foo@gottpoff(%rip),%r[8-15]" -> "movq $foo,%r[8-15]" 285 memcpy(inst, "\x49\xc7", 2); 286 *regSlot = 0xc0 | reg; 287 } else if (memcmp(inst, "\x48\x8b", 2) == 0) { 288 // "movq foo@gottpoff(%rip),%reg" -> "movq $foo,%reg" 289 memcpy(inst, "\x48\xc7", 2); 290 *regSlot = 0xc0 | reg; 291 } else { 292 error(getErrorLocation(loc - 3) + 293 "R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only"); 294 } 295 296 // The original code used a PC relative relocation. 297 // Need to compensate for the -4 it had in the addend. 298 write32le(loc, val + 4); 299 } 300 301 void X86_64::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const { 302 if (type == R_X86_64_DTPOFF64) { 303 write64le(loc, val); 304 return; 305 } 306 if (type == R_X86_64_DTPOFF32) { 307 write32le(loc, val); 308 return; 309 } 310 311 const uint8_t inst[] = { 312 0x66, 0x66, // .word 0x6666 313 0x66, // .byte 0x66 314 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0,%rax 315 }; 316 317 if (loc[4] == 0xe8) { 318 // Convert 319 // leaq bar@tlsld(%rip), %rdi # 48 8d 3d <Loc> 320 // callq __tls_get_addr@PLT # e8 <disp32> 321 // leaq bar@dtpoff(%rax), %rcx 322 // to 323 // .word 0x6666 324 // .byte 0x66 325 // mov %fs:0,%rax 326 // leaq bar@tpoff(%rax), %rcx 327 memcpy(loc - 3, inst, sizeof(inst)); 328 return; 329 } 330 331 if (loc[4] == 0xff && loc[5] == 0x15) { 332 // Convert 333 // leaq x@tlsld(%rip),%rdi # 48 8d 3d <Loc> 334 // call *__tls_get_addr@GOTPCREL(%rip) # ff 15 <disp32> 335 // to 336 // .long 0x66666666 337 // movq %fs:0,%rax 338 // See "Table 11.9: LD -> LE Code Transition (LP64)" in 339 // https://raw.githubusercontent.com/wiki/hjl-tools/x86-psABI/x86-64-psABI-1.0.pdf 340 loc[-3] = 0x66; 341 memcpy(loc - 2, inst, sizeof(inst)); 342 return; 343 } 344 345 error(getErrorLocation(loc - 3) + 346 "expected R_X86_64_PLT32 or R_X86_64_GOTPCRELX after R_X86_64_TLSLD"); 347 } 348 349 void X86_64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { 350 switch (type) { 351 case R_X86_64_8: 352 checkIntUInt(loc, val, 8, type); 353 *loc = val; 354 break; 355 case R_X86_64_PC8: 356 checkInt(loc, val, 8, type); 357 *loc = val; 358 break; 359 case R_X86_64_16: 360 checkIntUInt(loc, val, 16, type); 361 write16le(loc, val); 362 break; 363 case R_X86_64_PC16: 364 checkInt(loc, val, 16, type); 365 write16le(loc, val); 366 break; 367 case R_X86_64_32: 368 checkUInt(loc, val, 32, type); 369 write32le(loc, val); 370 break; 371 case R_X86_64_32S: 372 case R_X86_64_TPOFF32: 373 case R_X86_64_GOT32: 374 case R_X86_64_GOTPC32: 375 case R_X86_64_GOTPC32_TLSDESC: 376 case R_X86_64_GOTPCREL: 377 case R_X86_64_GOTPCRELX: 378 case R_X86_64_REX_GOTPCRELX: 379 case R_X86_64_PC32: 380 case R_X86_64_GOTTPOFF: 381 case R_X86_64_PLT32: 382 case R_X86_64_TLSGD: 383 case R_X86_64_TLSLD: 384 case R_X86_64_DTPOFF32: 385 case R_X86_64_SIZE32: 386 checkInt(loc, val, 32, type); 387 write32le(loc, val); 388 break; 389 case R_X86_64_64: 390 case R_X86_64_DTPOFF64: 391 case R_X86_64_PC64: 392 case R_X86_64_SIZE64: 393 case R_X86_64_GOT64: 394 case R_X86_64_GOTOFF64: 395 case R_X86_64_GOTPC64: 396 write64le(loc, val); 397 break; 398 default: 399 llvm_unreachable("unknown relocation"); 400 } 401 } 402 403 RelExpr X86_64::adjustRelaxExpr(RelType type, const uint8_t *data, 404 RelExpr relExpr) const { 405 if (type != R_X86_64_GOTPCRELX && type != R_X86_64_REX_GOTPCRELX) 406 return relExpr; 407 const uint8_t op = data[-2]; 408 const uint8_t modRm = data[-1]; 409 410 // FIXME: When PIC is disabled and foo is defined locally in the 411 // lower 32 bit address space, memory operand in mov can be converted into 412 // immediate operand. Otherwise, mov must be changed to lea. We support only 413 // latter relaxation at this moment. 414 if (op == 0x8b) 415 return R_RELAX_GOT_PC; 416 417 // Relax call and jmp. 418 if (op == 0xff && (modRm == 0x15 || modRm == 0x25)) 419 return R_RELAX_GOT_PC; 420 421 // Relaxation of test, adc, add, and, cmp, or, sbb, sub, xor. 422 // If PIC then no relaxation is available. 423 // We also don't relax test/binop instructions without REX byte, 424 // they are 32bit operations and not common to have. 425 assert(type == R_X86_64_REX_GOTPCRELX); 426 return config->isPic ? relExpr : R_RELAX_GOT_PC_NOPIC; 427 } 428 429 // A subset of relaxations can only be applied for no-PIC. This method 430 // handles such relaxations. Instructions encoding information was taken from: 431 // "Intel 64 and IA-32 Architectures Software Developer's Manual V2" 432 // (http://www.intel.com/content/dam/www/public/us/en/documents/manuals/ 433 // 64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf) 434 static void relaxGotNoPic(uint8_t *loc, uint64_t val, uint8_t op, 435 uint8_t modRm) { 436 const uint8_t rex = loc[-3]; 437 // Convert "test %reg, foo@GOTPCREL(%rip)" to "test $foo, %reg". 438 if (op == 0x85) { 439 // See "TEST-Logical Compare" (4-428 Vol. 2B), 440 // TEST r/m64, r64 uses "full" ModR / M byte (no opcode extension). 441 442 // ModR/M byte has form XX YYY ZZZ, where 443 // YYY is MODRM.reg(register 2), ZZZ is MODRM.rm(register 1). 444 // XX has different meanings: 445 // 00: The operand's memory address is in reg1. 446 // 01: The operand's memory address is reg1 + a byte-sized displacement. 447 // 10: The operand's memory address is reg1 + a word-sized displacement. 448 // 11: The operand is reg1 itself. 449 // If an instruction requires only one operand, the unused reg2 field 450 // holds extra opcode bits rather than a register code 451 // 0xC0 == 11 000 000 binary. 452 // 0x38 == 00 111 000 binary. 453 // We transfer reg2 to reg1 here as operand. 454 // See "2.1.3 ModR/M and SIB Bytes" (Vol. 2A 2-3). 455 loc[-1] = 0xc0 | (modRm & 0x38) >> 3; // ModR/M byte. 456 457 // Change opcode from TEST r/m64, r64 to TEST r/m64, imm32 458 // See "TEST-Logical Compare" (4-428 Vol. 2B). 459 loc[-2] = 0xf7; 460 461 // Move R bit to the B bit in REX byte. 462 // REX byte is encoded as 0100WRXB, where 463 // 0100 is 4bit fixed pattern. 464 // REX.W When 1, a 64-bit operand size is used. Otherwise, when 0, the 465 // default operand size is used (which is 32-bit for most but not all 466 // instructions). 467 // REX.R This 1-bit value is an extension to the MODRM.reg field. 468 // REX.X This 1-bit value is an extension to the SIB.index field. 469 // REX.B This 1-bit value is an extension to the MODRM.rm field or the 470 // SIB.base field. 471 // See "2.2.1.2 More on REX Prefix Fields " (2-8 Vol. 2A). 472 loc[-3] = (rex & ~0x4) | (rex & 0x4) >> 2; 473 write32le(loc, val); 474 return; 475 } 476 477 // If we are here then we need to relax the adc, add, and, cmp, or, sbb, sub 478 // or xor operations. 479 480 // Convert "binop foo@GOTPCREL(%rip), %reg" to "binop $foo, %reg". 481 // Logic is close to one for test instruction above, but we also 482 // write opcode extension here, see below for details. 483 loc[-1] = 0xc0 | (modRm & 0x38) >> 3 | (op & 0x3c); // ModR/M byte. 484 485 // Primary opcode is 0x81, opcode extension is one of: 486 // 000b = ADD, 001b is OR, 010b is ADC, 011b is SBB, 487 // 100b is AND, 101b is SUB, 110b is XOR, 111b is CMP. 488 // This value was wrote to MODRM.reg in a line above. 489 // See "3.2 INSTRUCTIONS (A-M)" (Vol. 2A 3-15), 490 // "INSTRUCTION SET REFERENCE, N-Z" (Vol. 2B 4-1) for 491 // descriptions about each operation. 492 loc[-2] = 0x81; 493 loc[-3] = (rex & ~0x4) | (rex & 0x4) >> 2; 494 write32le(loc, val); 495 } 496 497 void X86_64::relaxGot(uint8_t *loc, RelType type, uint64_t val) const { 498 const uint8_t op = loc[-2]; 499 const uint8_t modRm = loc[-1]; 500 501 // Convert "mov foo@GOTPCREL(%rip),%reg" to "lea foo(%rip),%reg". 502 if (op == 0x8b) { 503 loc[-2] = 0x8d; 504 write32le(loc, val); 505 return; 506 } 507 508 if (op != 0xff) { 509 // We are relaxing a rip relative to an absolute, so compensate 510 // for the old -4 addend. 511 assert(!config->isPic); 512 relaxGotNoPic(loc, val + 4, op, modRm); 513 return; 514 } 515 516 // Convert call/jmp instructions. 517 if (modRm == 0x15) { 518 // ABI says we can convert "call *foo@GOTPCREL(%rip)" to "nop; call foo". 519 // Instead we convert to "addr32 call foo" where addr32 is an instruction 520 // prefix. That makes result expression to be a single instruction. 521 loc[-2] = 0x67; // addr32 prefix 522 loc[-1] = 0xe8; // call 523 write32le(loc, val); 524 return; 525 } 526 527 // Convert "jmp *foo@GOTPCREL(%rip)" to "jmp foo; nop". 528 // jmp doesn't return, so it is fine to use nop here, it is just a stub. 529 assert(modRm == 0x25); 530 loc[-2] = 0xe9; // jmp 531 loc[3] = 0x90; // nop 532 write32le(loc - 1, val + 1); 533 } 534 535 // A split-stack prologue starts by checking the amount of stack remaining 536 // in one of two ways: 537 // A) Comparing of the stack pointer to a field in the tcb. 538 // B) Or a load of a stack pointer offset with an lea to r10 or r11. 539 bool X86_64::adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end, 540 uint8_t stOther) const { 541 if (!config->is64) { 542 error("Target doesn't support split stacks."); 543 return false; 544 } 545 546 if (loc + 8 >= end) 547 return false; 548 549 // Replace "cmp %fs:0x70,%rsp" and subsequent branch 550 // with "stc, nopl 0x0(%rax,%rax,1)" 551 if (memcmp(loc, "\x64\x48\x3b\x24\x25", 5) == 0) { 552 memcpy(loc, "\xf9\x0f\x1f\x84\x00\x00\x00\x00", 8); 553 return true; 554 } 555 556 // Adjust "lea X(%rsp),%rYY" to lea "(X - 0x4000)(%rsp),%rYY" where rYY could 557 // be r10 or r11. The lea instruction feeds a subsequent compare which checks 558 // if there is X available stack space. Making X larger effectively reserves 559 // that much additional space. The stack grows downward so subtract the value. 560 if (memcmp(loc, "\x4c\x8d\x94\x24", 4) == 0 || 561 memcmp(loc, "\x4c\x8d\x9c\x24", 4) == 0) { 562 // The offset bytes are encoded four bytes after the start of the 563 // instruction. 564 write32le(loc + 4, read32le(loc + 4) - 0x4000); 565 return true; 566 } 567 return false; 568 } 569 570 // These nonstandard PLT entries are to migtigate Spectre v2 security 571 // vulnerability. In order to mitigate Spectre v2, we want to avoid indirect 572 // branch instructions such as `jmp *GOTPLT(%rip)`. So, in the following PLT 573 // entries, we use a CALL followed by MOV and RET to do the same thing as an 574 // indirect jump. That instruction sequence is so-called "retpoline". 575 // 576 // We have two types of retpoline PLTs as a size optimization. If `-z now` 577 // is specified, all dynamic symbols are resolved at load-time. Thus, when 578 // that option is given, we can omit code for symbol lazy resolution. 579 namespace { 580 class Retpoline : public X86_64 { 581 public: 582 Retpoline(); 583 void writeGotPlt(uint8_t *buf, const Symbol &s) const override; 584 void writePltHeader(uint8_t *buf) const override; 585 void writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, uint64_t pltEntryAddr, 586 int32_t index, unsigned relOff) const override; 587 }; 588 589 class RetpolineZNow : public X86_64 { 590 public: 591 RetpolineZNow(); 592 void writeGotPlt(uint8_t *buf, const Symbol &s) const override {} 593 void writePltHeader(uint8_t *buf) const override; 594 void writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, uint64_t pltEntryAddr, 595 int32_t index, unsigned relOff) const override; 596 }; 597 } // namespace 598 599 Retpoline::Retpoline() { 600 pltHeaderSize = 48; 601 pltEntrySize = 32; 602 } 603 604 void Retpoline::writeGotPlt(uint8_t *buf, const Symbol &s) const { 605 write64le(buf, s.getPltVA() + 17); 606 } 607 608 void Retpoline::writePltHeader(uint8_t *buf) const { 609 const uint8_t insn[] = { 610 0xff, 0x35, 0, 0, 0, 0, // 0: pushq GOTPLT+8(%rip) 611 0x4c, 0x8b, 0x1d, 0, 0, 0, 0, // 6: mov GOTPLT+16(%rip), %r11 612 0xe8, 0x0e, 0x00, 0x00, 0x00, // d: callq next 613 0xf3, 0x90, // 12: loop: pause 614 0x0f, 0xae, 0xe8, // 14: lfence 615 0xeb, 0xf9, // 17: jmp loop 616 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19: int3; .align 16 617 0x4c, 0x89, 0x1c, 0x24, // 20: next: mov %r11, (%rsp) 618 0xc3, // 24: ret 619 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 25: int3; padding 620 0xcc, 0xcc, 0xcc, 0xcc, // 2c: int3; padding 621 }; 622 memcpy(buf, insn, sizeof(insn)); 623 624 uint64_t gotPlt = in.gotPlt->getVA(); 625 uint64_t plt = in.plt->getVA(); 626 write32le(buf + 2, gotPlt - plt - 6 + 8); 627 write32le(buf + 9, gotPlt - plt - 13 + 16); 628 } 629 630 void Retpoline::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, 631 uint64_t pltEntryAddr, int32_t index, 632 unsigned relOff) const { 633 const uint8_t insn[] = { 634 0x4c, 0x8b, 0x1d, 0, 0, 0, 0, // 0: mov foo@GOTPLT(%rip), %r11 635 0xe8, 0, 0, 0, 0, // 7: callq plt+0x20 636 0xe9, 0, 0, 0, 0, // c: jmp plt+0x12 637 0x68, 0, 0, 0, 0, // 11: pushq <relocation index> 638 0xe9, 0, 0, 0, 0, // 16: jmp plt+0 639 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1b: int3; padding 640 }; 641 memcpy(buf, insn, sizeof(insn)); 642 643 uint64_t off = pltHeaderSize + pltEntrySize * index; 644 645 write32le(buf + 3, gotPltEntryAddr - pltEntryAddr - 7); 646 write32le(buf + 8, -off - 12 + 32); 647 write32le(buf + 13, -off - 17 + 18); 648 write32le(buf + 18, index); 649 write32le(buf + 23, -off - 27); 650 } 651 652 RetpolineZNow::RetpolineZNow() { 653 pltHeaderSize = 32; 654 pltEntrySize = 16; 655 } 656 657 void RetpolineZNow::writePltHeader(uint8_t *buf) const { 658 const uint8_t insn[] = { 659 0xe8, 0x0b, 0x00, 0x00, 0x00, // 0: call next 660 0xf3, 0x90, // 5: loop: pause 661 0x0f, 0xae, 0xe8, // 7: lfence 662 0xeb, 0xf9, // a: jmp loop 663 0xcc, 0xcc, 0xcc, 0xcc, // c: int3; .align 16 664 0x4c, 0x89, 0x1c, 0x24, // 10: next: mov %r11, (%rsp) 665 0xc3, // 14: ret 666 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 15: int3; padding 667 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a: int3; padding 668 0xcc, // 1f: int3; padding 669 }; 670 memcpy(buf, insn, sizeof(insn)); 671 } 672 673 void RetpolineZNow::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, 674 uint64_t pltEntryAddr, int32_t index, 675 unsigned relOff) const { 676 const uint8_t insn[] = { 677 0x4c, 0x8b, 0x1d, 0, 0, 0, 0, // mov foo@GOTPLT(%rip), %r11 678 0xe9, 0, 0, 0, 0, // jmp plt+0 679 0xcc, 0xcc, 0xcc, 0xcc, // int3; padding 680 }; 681 memcpy(buf, insn, sizeof(insn)); 682 683 write32le(buf + 3, gotPltEntryAddr - pltEntryAddr - 7); 684 write32le(buf + 8, -pltHeaderSize - pltEntrySize * index - 12); 685 } 686 687 static TargetInfo *getTargetInfo() { 688 if (config->zRetpolineplt) { 689 if (config->zNow) { 690 static RetpolineZNow t; 691 return &t; 692 } 693 static Retpoline t; 694 return &t; 695 } 696 697 static X86_64 t; 698 return &t; 699 } 700 701 TargetInfo *elf::getX86_64TargetInfo() { return getTargetInfo(); } 702