1 //===- X86_64.cpp ---------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "InputFiles.h" 10 #include "Symbols.h" 11 #include "SyntheticSections.h" 12 #include "Target.h" 13 #include "lld/Common/ErrorHandler.h" 14 #include "llvm/Object/ELF.h" 15 #include "llvm/Support/Endian.h" 16 17 using namespace llvm; 18 using namespace llvm::object; 19 using namespace llvm::support::endian; 20 using namespace llvm::ELF; 21 22 namespace lld { 23 namespace elf { 24 25 namespace { 26 class X86_64 : public TargetInfo { 27 public: 28 X86_64(); 29 int getTlsGdRelaxSkip(RelType type) const override; 30 RelExpr getRelExpr(RelType type, const Symbol &s, 31 const uint8_t *loc) const override; 32 RelType getDynRel(RelType type) const override; 33 void writeGotPltHeader(uint8_t *buf) const override; 34 void writeGotPlt(uint8_t *buf, const Symbol &s) const override; 35 void writePltHeader(uint8_t *buf) const override; 36 void writePlt(uint8_t *buf, const Symbol &sym, 37 uint64_t pltEntryAddr) const override; 38 void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; 39 40 RelExpr adjustRelaxExpr(RelType type, const uint8_t *data, 41 RelExpr expr) const override; 42 void relaxGot(uint8_t *loc, RelType type, uint64_t val) const override; 43 void relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const override; 44 void relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const override; 45 void relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const override; 46 void relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const override; 47 bool adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end, 48 uint8_t stOther) const override; 49 }; 50 } // namespace 51 52 X86_64::X86_64() { 53 copyRel = R_X86_64_COPY; 54 gotRel = R_X86_64_GLOB_DAT; 55 noneRel = R_X86_64_NONE; 56 pltRel = R_X86_64_JUMP_SLOT; 57 relativeRel = R_X86_64_RELATIVE; 58 iRelativeRel = R_X86_64_IRELATIVE; 59 symbolicRel = R_X86_64_64; 60 tlsDescRel = R_X86_64_TLSDESC; 61 tlsGotRel = R_X86_64_TPOFF64; 62 tlsModuleIndexRel = R_X86_64_DTPMOD64; 63 tlsOffsetRel = R_X86_64_DTPOFF64; 64 pltHeaderSize = 16; 65 pltEntrySize = 16; 66 ipltEntrySize = 16; 67 trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3 68 69 // Align to the large page size (known as a superpage or huge page). 70 // FreeBSD automatically promotes large, superpage-aligned allocations. 71 defaultImageBase = 0x200000; 72 } 73 74 int X86_64::getTlsGdRelaxSkip(RelType type) const { return 2; } 75 76 RelExpr X86_64::getRelExpr(RelType type, const Symbol &s, 77 const uint8_t *loc) const { 78 if (type == R_X86_64_GOTTPOFF) 79 config->hasStaticTlsModel = true; 80 81 switch (type) { 82 case R_X86_64_8: 83 case R_X86_64_16: 84 case R_X86_64_32: 85 case R_X86_64_32S: 86 case R_X86_64_64: 87 return R_ABS; 88 case R_X86_64_DTPOFF32: 89 case R_X86_64_DTPOFF64: 90 return R_DTPREL; 91 case R_X86_64_TPOFF32: 92 return R_TLS; 93 case R_X86_64_TLSDESC_CALL: 94 return R_TLSDESC_CALL; 95 case R_X86_64_TLSLD: 96 return R_TLSLD_PC; 97 case R_X86_64_TLSGD: 98 return R_TLSGD_PC; 99 case R_X86_64_SIZE32: 100 case R_X86_64_SIZE64: 101 return R_SIZE; 102 case R_X86_64_PLT32: 103 return R_PLT_PC; 104 case R_X86_64_PC8: 105 case R_X86_64_PC16: 106 case R_X86_64_PC32: 107 case R_X86_64_PC64: 108 return R_PC; 109 case R_X86_64_GOT32: 110 case R_X86_64_GOT64: 111 return R_GOTPLT; 112 case R_X86_64_GOTPC32_TLSDESC: 113 return R_TLSDESC_PC; 114 case R_X86_64_GOTPCREL: 115 case R_X86_64_GOTPCRELX: 116 case R_X86_64_REX_GOTPCRELX: 117 case R_X86_64_GOTTPOFF: 118 return R_GOT_PC; 119 case R_X86_64_GOTOFF64: 120 return R_GOTPLTREL; 121 case R_X86_64_GOTPC32: 122 case R_X86_64_GOTPC64: 123 return R_GOTPLTONLY_PC; 124 case R_X86_64_NONE: 125 return R_NONE; 126 default: 127 error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) + 128 ") against symbol " + toString(s)); 129 return R_NONE; 130 } 131 } 132 133 void X86_64::writeGotPltHeader(uint8_t *buf) const { 134 // The first entry holds the value of _DYNAMIC. It is not clear why that is 135 // required, but it is documented in the psabi and the glibc dynamic linker 136 // seems to use it (note that this is relevant for linking ld.so, not any 137 // other program). 138 write64le(buf, mainPart->dynamic->getVA()); 139 } 140 141 void X86_64::writeGotPlt(uint8_t *buf, const Symbol &s) const { 142 // See comments in X86::writeGotPlt. 143 write64le(buf, s.getPltVA() + 6); 144 } 145 146 void X86_64::writePltHeader(uint8_t *buf) const { 147 const uint8_t pltData[] = { 148 0xff, 0x35, 0, 0, 0, 0, // pushq GOTPLT+8(%rip) 149 0xff, 0x25, 0, 0, 0, 0, // jmp *GOTPLT+16(%rip) 150 0x0f, 0x1f, 0x40, 0x00, // nop 151 }; 152 memcpy(buf, pltData, sizeof(pltData)); 153 uint64_t gotPlt = in.gotPlt->getVA(); 154 uint64_t plt = in.ibtPlt ? in.ibtPlt->getVA() : in.plt->getVA(); 155 write32le(buf + 2, gotPlt - plt + 2); // GOTPLT+8 156 write32le(buf + 8, gotPlt - plt + 4); // GOTPLT+16 157 } 158 159 void X86_64::writePlt(uint8_t *buf, const Symbol &sym, 160 uint64_t pltEntryAddr) const { 161 const uint8_t inst[] = { 162 0xff, 0x25, 0, 0, 0, 0, // jmpq *got(%rip) 163 0x68, 0, 0, 0, 0, // pushq <relocation index> 164 0xe9, 0, 0, 0, 0, // jmpq plt[0] 165 }; 166 memcpy(buf, inst, sizeof(inst)); 167 168 write32le(buf + 2, sym.getGotPltVA() - pltEntryAddr - 6); 169 write32le(buf + 7, sym.pltIndex); 170 write32le(buf + 12, in.plt->getVA() - pltEntryAddr - 16); 171 } 172 173 RelType X86_64::getDynRel(RelType type) const { 174 if (type == R_X86_64_64 || type == R_X86_64_PC64 || type == R_X86_64_SIZE32 || 175 type == R_X86_64_SIZE64) 176 return type; 177 return R_X86_64_NONE; 178 } 179 180 void X86_64::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const { 181 if (type == R_X86_64_TLSGD) { 182 // Convert 183 // .byte 0x66 184 // leaq x@tlsgd(%rip), %rdi 185 // .word 0x6666 186 // rex64 187 // call __tls_get_addr@plt 188 // to the following two instructions. 189 const uint8_t inst[] = { 190 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 191 0x00, 0x00, // mov %fs:0x0,%rax 192 0x48, 0x8d, 0x80, 0, 0, 0, 0, // lea x@tpoff,%rax 193 }; 194 memcpy(loc - 4, inst, sizeof(inst)); 195 196 // The original code used a pc relative relocation and so we have to 197 // compensate for the -4 in had in the addend. 198 write32le(loc + 8, val + 4); 199 } else { 200 // Convert 201 // lea x@tlsgd(%rip), %rax 202 // call *(%rax) 203 // to the following two instructions. 204 assert(type == R_X86_64_GOTPC32_TLSDESC); 205 if (memcmp(loc - 3, "\x48\x8d\x05", 3)) { 206 error(getErrorLocation(loc - 3) + "R_X86_64_GOTPC32_TLSDESC must be used " 207 "in callq *x@tlsdesc(%rip), %rax"); 208 return; 209 } 210 // movq $x@tpoff(%rip),%rax 211 loc[-2] = 0xc7; 212 loc[-1] = 0xc0; 213 write32le(loc, val + 4); 214 // xchg ax,ax 215 loc[4] = 0x66; 216 loc[5] = 0x90; 217 } 218 } 219 220 void X86_64::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const { 221 if (type == R_X86_64_TLSGD) { 222 // Convert 223 // .byte 0x66 224 // leaq x@tlsgd(%rip), %rdi 225 // .word 0x6666 226 // rex64 227 // call __tls_get_addr@plt 228 // to the following two instructions. 229 const uint8_t inst[] = { 230 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 231 0x00, 0x00, // mov %fs:0x0,%rax 232 0x48, 0x03, 0x05, 0, 0, 0, 0, // addq x@gottpoff(%rip),%rax 233 }; 234 memcpy(loc - 4, inst, sizeof(inst)); 235 236 // Both code sequences are PC relatives, but since we are moving the 237 // constant forward by 8 bytes we have to subtract the value by 8. 238 write32le(loc + 8, val - 8); 239 } else { 240 // Convert 241 // lea x@tlsgd(%rip), %rax 242 // call *(%rax) 243 // to the following two instructions. 244 assert(type == R_X86_64_GOTPC32_TLSDESC); 245 if (memcmp(loc - 3, "\x48\x8d\x05", 3)) { 246 error(getErrorLocation(loc - 3) + "R_X86_64_GOTPC32_TLSDESC must be used " 247 "in callq *x@tlsdesc(%rip), %rax"); 248 return; 249 } 250 // movq x@gottpoff(%rip),%rax 251 loc[-2] = 0x8b; 252 write32le(loc, val); 253 // xchg ax,ax 254 loc[4] = 0x66; 255 loc[5] = 0x90; 256 } 257 } 258 259 // In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to 260 // R_X86_64_TPOFF32 so that it does not use GOT. 261 void X86_64::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const { 262 uint8_t *inst = loc - 3; 263 uint8_t reg = loc[-1] >> 3; 264 uint8_t *regSlot = loc - 1; 265 266 // Note that ADD with RSP or R12 is converted to ADD instead of LEA 267 // because LEA with these registers needs 4 bytes to encode and thus 268 // wouldn't fit the space. 269 270 if (memcmp(inst, "\x48\x03\x25", 3) == 0) { 271 // "addq foo@gottpoff(%rip),%rsp" -> "addq $foo,%rsp" 272 memcpy(inst, "\x48\x81\xc4", 3); 273 } else if (memcmp(inst, "\x4c\x03\x25", 3) == 0) { 274 // "addq foo@gottpoff(%rip),%r12" -> "addq $foo,%r12" 275 memcpy(inst, "\x49\x81\xc4", 3); 276 } else if (memcmp(inst, "\x4c\x03", 2) == 0) { 277 // "addq foo@gottpoff(%rip),%r[8-15]" -> "leaq foo(%r[8-15]),%r[8-15]" 278 memcpy(inst, "\x4d\x8d", 2); 279 *regSlot = 0x80 | (reg << 3) | reg; 280 } else if (memcmp(inst, "\x48\x03", 2) == 0) { 281 // "addq foo@gottpoff(%rip),%reg -> "leaq foo(%reg),%reg" 282 memcpy(inst, "\x48\x8d", 2); 283 *regSlot = 0x80 | (reg << 3) | reg; 284 } else if (memcmp(inst, "\x4c\x8b", 2) == 0) { 285 // "movq foo@gottpoff(%rip),%r[8-15]" -> "movq $foo,%r[8-15]" 286 memcpy(inst, "\x49\xc7", 2); 287 *regSlot = 0xc0 | reg; 288 } else if (memcmp(inst, "\x48\x8b", 2) == 0) { 289 // "movq foo@gottpoff(%rip),%reg" -> "movq $foo,%reg" 290 memcpy(inst, "\x48\xc7", 2); 291 *regSlot = 0xc0 | reg; 292 } else { 293 error(getErrorLocation(loc - 3) + 294 "R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only"); 295 } 296 297 // The original code used a PC relative relocation. 298 // Need to compensate for the -4 it had in the addend. 299 write32le(loc, val + 4); 300 } 301 302 void X86_64::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const { 303 if (type == R_X86_64_DTPOFF64) { 304 write64le(loc, val); 305 return; 306 } 307 if (type == R_X86_64_DTPOFF32) { 308 write32le(loc, val); 309 return; 310 } 311 312 const uint8_t inst[] = { 313 0x66, 0x66, // .word 0x6666 314 0x66, // .byte 0x66 315 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0,%rax 316 }; 317 318 if (loc[4] == 0xe8) { 319 // Convert 320 // leaq bar@tlsld(%rip), %rdi # 48 8d 3d <Loc> 321 // callq __tls_get_addr@PLT # e8 <disp32> 322 // leaq bar@dtpoff(%rax), %rcx 323 // to 324 // .word 0x6666 325 // .byte 0x66 326 // mov %fs:0,%rax 327 // leaq bar@tpoff(%rax), %rcx 328 memcpy(loc - 3, inst, sizeof(inst)); 329 return; 330 } 331 332 if (loc[4] == 0xff && loc[5] == 0x15) { 333 // Convert 334 // leaq x@tlsld(%rip),%rdi # 48 8d 3d <Loc> 335 // call *__tls_get_addr@GOTPCREL(%rip) # ff 15 <disp32> 336 // to 337 // .long 0x66666666 338 // movq %fs:0,%rax 339 // See "Table 11.9: LD -> LE Code Transition (LP64)" in 340 // https://raw.githubusercontent.com/wiki/hjl-tools/x86-psABI/x86-64-psABI-1.0.pdf 341 loc[-3] = 0x66; 342 memcpy(loc - 2, inst, sizeof(inst)); 343 return; 344 } 345 346 error(getErrorLocation(loc - 3) + 347 "expected R_X86_64_PLT32 or R_X86_64_GOTPCRELX after R_X86_64_TLSLD"); 348 } 349 350 void X86_64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { 351 switch (type) { 352 case R_X86_64_8: 353 checkIntUInt(loc, val, 8, type); 354 *loc = val; 355 break; 356 case R_X86_64_PC8: 357 checkInt(loc, val, 8, type); 358 *loc = val; 359 break; 360 case R_X86_64_16: 361 checkIntUInt(loc, val, 16, type); 362 write16le(loc, val); 363 break; 364 case R_X86_64_PC16: 365 checkInt(loc, val, 16, type); 366 write16le(loc, val); 367 break; 368 case R_X86_64_32: 369 checkUInt(loc, val, 32, type); 370 write32le(loc, val); 371 break; 372 case R_X86_64_32S: 373 case R_X86_64_TPOFF32: 374 case R_X86_64_GOT32: 375 case R_X86_64_GOTPC32: 376 case R_X86_64_GOTPC32_TLSDESC: 377 case R_X86_64_GOTPCREL: 378 case R_X86_64_GOTPCRELX: 379 case R_X86_64_REX_GOTPCRELX: 380 case R_X86_64_PC32: 381 case R_X86_64_GOTTPOFF: 382 case R_X86_64_PLT32: 383 case R_X86_64_TLSGD: 384 case R_X86_64_TLSLD: 385 case R_X86_64_DTPOFF32: 386 case R_X86_64_SIZE32: 387 checkInt(loc, val, 32, type); 388 write32le(loc, val); 389 break; 390 case R_X86_64_64: 391 case R_X86_64_DTPOFF64: 392 case R_X86_64_PC64: 393 case R_X86_64_SIZE64: 394 case R_X86_64_GOT64: 395 case R_X86_64_GOTOFF64: 396 case R_X86_64_GOTPC64: 397 write64le(loc, val); 398 break; 399 default: 400 llvm_unreachable("unknown relocation"); 401 } 402 } 403 404 RelExpr X86_64::adjustRelaxExpr(RelType type, const uint8_t *data, 405 RelExpr relExpr) const { 406 if (type != R_X86_64_GOTPCRELX && type != R_X86_64_REX_GOTPCRELX) 407 return relExpr; 408 const uint8_t op = data[-2]; 409 const uint8_t modRm = data[-1]; 410 411 // FIXME: When PIC is disabled and foo is defined locally in the 412 // lower 32 bit address space, memory operand in mov can be converted into 413 // immediate operand. Otherwise, mov must be changed to lea. We support only 414 // latter relaxation at this moment. 415 if (op == 0x8b) 416 return R_RELAX_GOT_PC; 417 418 // Relax call and jmp. 419 if (op == 0xff && (modRm == 0x15 || modRm == 0x25)) 420 return R_RELAX_GOT_PC; 421 422 // Relaxation of test, adc, add, and, cmp, or, sbb, sub, xor. 423 // If PIC then no relaxation is available. 424 // We also don't relax test/binop instructions without REX byte, 425 // they are 32bit operations and not common to have. 426 assert(type == R_X86_64_REX_GOTPCRELX); 427 return config->isPic ? relExpr : R_RELAX_GOT_PC_NOPIC; 428 } 429 430 // A subset of relaxations can only be applied for no-PIC. This method 431 // handles such relaxations. Instructions encoding information was taken from: 432 // "Intel 64 and IA-32 Architectures Software Developer's Manual V2" 433 // (http://www.intel.com/content/dam/www/public/us/en/documents/manuals/ 434 // 64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf) 435 static void relaxGotNoPic(uint8_t *loc, uint64_t val, uint8_t op, 436 uint8_t modRm) { 437 const uint8_t rex = loc[-3]; 438 // Convert "test %reg, foo@GOTPCREL(%rip)" to "test $foo, %reg". 439 if (op == 0x85) { 440 // See "TEST-Logical Compare" (4-428 Vol. 2B), 441 // TEST r/m64, r64 uses "full" ModR / M byte (no opcode extension). 442 443 // ModR/M byte has form XX YYY ZZZ, where 444 // YYY is MODRM.reg(register 2), ZZZ is MODRM.rm(register 1). 445 // XX has different meanings: 446 // 00: The operand's memory address is in reg1. 447 // 01: The operand's memory address is reg1 + a byte-sized displacement. 448 // 10: The operand's memory address is reg1 + a word-sized displacement. 449 // 11: The operand is reg1 itself. 450 // If an instruction requires only one operand, the unused reg2 field 451 // holds extra opcode bits rather than a register code 452 // 0xC0 == 11 000 000 binary. 453 // 0x38 == 00 111 000 binary. 454 // We transfer reg2 to reg1 here as operand. 455 // See "2.1.3 ModR/M and SIB Bytes" (Vol. 2A 2-3). 456 loc[-1] = 0xc0 | (modRm & 0x38) >> 3; // ModR/M byte. 457 458 // Change opcode from TEST r/m64, r64 to TEST r/m64, imm32 459 // See "TEST-Logical Compare" (4-428 Vol. 2B). 460 loc[-2] = 0xf7; 461 462 // Move R bit to the B bit in REX byte. 463 // REX byte is encoded as 0100WRXB, where 464 // 0100 is 4bit fixed pattern. 465 // REX.W When 1, a 64-bit operand size is used. Otherwise, when 0, the 466 // default operand size is used (which is 32-bit for most but not all 467 // instructions). 468 // REX.R This 1-bit value is an extension to the MODRM.reg field. 469 // REX.X This 1-bit value is an extension to the SIB.index field. 470 // REX.B This 1-bit value is an extension to the MODRM.rm field or the 471 // SIB.base field. 472 // See "2.2.1.2 More on REX Prefix Fields " (2-8 Vol. 2A). 473 loc[-3] = (rex & ~0x4) | (rex & 0x4) >> 2; 474 write32le(loc, val); 475 return; 476 } 477 478 // If we are here then we need to relax the adc, add, and, cmp, or, sbb, sub 479 // or xor operations. 480 481 // Convert "binop foo@GOTPCREL(%rip), %reg" to "binop $foo, %reg". 482 // Logic is close to one for test instruction above, but we also 483 // write opcode extension here, see below for details. 484 loc[-1] = 0xc0 | (modRm & 0x38) >> 3 | (op & 0x3c); // ModR/M byte. 485 486 // Primary opcode is 0x81, opcode extension is one of: 487 // 000b = ADD, 001b is OR, 010b is ADC, 011b is SBB, 488 // 100b is AND, 101b is SUB, 110b is XOR, 111b is CMP. 489 // This value was wrote to MODRM.reg in a line above. 490 // See "3.2 INSTRUCTIONS (A-M)" (Vol. 2A 3-15), 491 // "INSTRUCTION SET REFERENCE, N-Z" (Vol. 2B 4-1) for 492 // descriptions about each operation. 493 loc[-2] = 0x81; 494 loc[-3] = (rex & ~0x4) | (rex & 0x4) >> 2; 495 write32le(loc, val); 496 } 497 498 void X86_64::relaxGot(uint8_t *loc, RelType type, uint64_t val) const { 499 const uint8_t op = loc[-2]; 500 const uint8_t modRm = loc[-1]; 501 502 // Convert "mov foo@GOTPCREL(%rip),%reg" to "lea foo(%rip),%reg". 503 if (op == 0x8b) { 504 loc[-2] = 0x8d; 505 write32le(loc, val); 506 return; 507 } 508 509 if (op != 0xff) { 510 // We are relaxing a rip relative to an absolute, so compensate 511 // for the old -4 addend. 512 assert(!config->isPic); 513 relaxGotNoPic(loc, val + 4, op, modRm); 514 return; 515 } 516 517 // Convert call/jmp instructions. 518 if (modRm == 0x15) { 519 // ABI says we can convert "call *foo@GOTPCREL(%rip)" to "nop; call foo". 520 // Instead we convert to "addr32 call foo" where addr32 is an instruction 521 // prefix. That makes result expression to be a single instruction. 522 loc[-2] = 0x67; // addr32 prefix 523 loc[-1] = 0xe8; // call 524 write32le(loc, val); 525 return; 526 } 527 528 // Convert "jmp *foo@GOTPCREL(%rip)" to "jmp foo; nop". 529 // jmp doesn't return, so it is fine to use nop here, it is just a stub. 530 assert(modRm == 0x25); 531 loc[-2] = 0xe9; // jmp 532 loc[3] = 0x90; // nop 533 write32le(loc - 1, val + 1); 534 } 535 536 // A split-stack prologue starts by checking the amount of stack remaining 537 // in one of two ways: 538 // A) Comparing of the stack pointer to a field in the tcb. 539 // B) Or a load of a stack pointer offset with an lea to r10 or r11. 540 bool X86_64::adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end, 541 uint8_t stOther) const { 542 if (!config->is64) { 543 error("Target doesn't support split stacks."); 544 return false; 545 } 546 547 if (loc + 8 >= end) 548 return false; 549 550 // Replace "cmp %fs:0x70,%rsp" and subsequent branch 551 // with "stc, nopl 0x0(%rax,%rax,1)" 552 if (memcmp(loc, "\x64\x48\x3b\x24\x25", 5) == 0) { 553 memcpy(loc, "\xf9\x0f\x1f\x84\x00\x00\x00\x00", 8); 554 return true; 555 } 556 557 // Adjust "lea X(%rsp),%rYY" to lea "(X - 0x4000)(%rsp),%rYY" where rYY could 558 // be r10 or r11. The lea instruction feeds a subsequent compare which checks 559 // if there is X available stack space. Making X larger effectively reserves 560 // that much additional space. The stack grows downward so subtract the value. 561 if (memcmp(loc, "\x4c\x8d\x94\x24", 4) == 0 || 562 memcmp(loc, "\x4c\x8d\x9c\x24", 4) == 0) { 563 // The offset bytes are encoded four bytes after the start of the 564 // instruction. 565 write32le(loc + 4, read32le(loc + 4) - 0x4000); 566 return true; 567 } 568 return false; 569 } 570 571 // If Intel Indirect Branch Tracking is enabled, we have to emit special PLT 572 // entries containing endbr64 instructions. A PLT entry will be split into two 573 // parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt). 574 namespace { 575 class IntelIBT : public X86_64 { 576 public: 577 IntelIBT(); 578 void writeGotPlt(uint8_t *buf, const Symbol &s) const override; 579 void writePlt(uint8_t *buf, const Symbol &sym, 580 uint64_t pltEntryAddr) const override; 581 void writeIBTPlt(uint8_t *buf, size_t numEntries) const override; 582 583 static const unsigned IBTPltHeaderSize = 16; 584 }; 585 } // namespace 586 587 IntelIBT::IntelIBT() { pltHeaderSize = 0; } 588 589 void IntelIBT::writeGotPlt(uint8_t *buf, const Symbol &s) const { 590 uint64_t va = 591 in.ibtPlt->getVA() + IBTPltHeaderSize + s.pltIndex * pltEntrySize; 592 write64le(buf, va); 593 } 594 595 void IntelIBT::writePlt(uint8_t *buf, const Symbol &sym, 596 uint64_t pltEntryAddr) const { 597 const uint8_t Inst[] = { 598 0xf3, 0x0f, 0x1e, 0xfa, // endbr64 599 0xff, 0x25, 0, 0, 0, 0, // jmpq *got(%rip) 600 0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop 601 }; 602 memcpy(buf, Inst, sizeof(Inst)); 603 write32le(buf + 6, sym.getGotPltVA() - pltEntryAddr - 10); 604 } 605 606 void IntelIBT::writeIBTPlt(uint8_t *buf, size_t numEntries) const { 607 writePltHeader(buf); 608 buf += IBTPltHeaderSize; 609 610 const uint8_t inst[] = { 611 0xf3, 0x0f, 0x1e, 0xfa, // endbr64 612 0x68, 0, 0, 0, 0, // pushq <relocation index> 613 0xe9, 0, 0, 0, 0, // jmpq plt[0] 614 0x66, 0x90, // nop 615 }; 616 617 for (size_t i = 0; i < numEntries; ++i) { 618 memcpy(buf, inst, sizeof(inst)); 619 write32le(buf + 5, i); 620 write32le(buf + 10, -pltHeaderSize - sizeof(inst) * i - 30); 621 buf += sizeof(inst); 622 } 623 } 624 625 // These nonstandard PLT entries are to migtigate Spectre v2 security 626 // vulnerability. In order to mitigate Spectre v2, we want to avoid indirect 627 // branch instructions such as `jmp *GOTPLT(%rip)`. So, in the following PLT 628 // entries, we use a CALL followed by MOV and RET to do the same thing as an 629 // indirect jump. That instruction sequence is so-called "retpoline". 630 // 631 // We have two types of retpoline PLTs as a size optimization. If `-z now` 632 // is specified, all dynamic symbols are resolved at load-time. Thus, when 633 // that option is given, we can omit code for symbol lazy resolution. 634 namespace { 635 class Retpoline : public X86_64 { 636 public: 637 Retpoline(); 638 void writeGotPlt(uint8_t *buf, const Symbol &s) const override; 639 void writePltHeader(uint8_t *buf) const override; 640 void writePlt(uint8_t *buf, const Symbol &sym, 641 uint64_t pltEntryAddr) const override; 642 }; 643 644 class RetpolineZNow : public X86_64 { 645 public: 646 RetpolineZNow(); 647 void writeGotPlt(uint8_t *buf, const Symbol &s) const override {} 648 void writePltHeader(uint8_t *buf) const override; 649 void writePlt(uint8_t *buf, const Symbol &sym, 650 uint64_t pltEntryAddr) const override; 651 }; 652 } // namespace 653 654 Retpoline::Retpoline() { 655 pltHeaderSize = 48; 656 pltEntrySize = 32; 657 ipltEntrySize = 32; 658 } 659 660 void Retpoline::writeGotPlt(uint8_t *buf, const Symbol &s) const { 661 write64le(buf, s.getPltVA() + 17); 662 } 663 664 void Retpoline::writePltHeader(uint8_t *buf) const { 665 const uint8_t insn[] = { 666 0xff, 0x35, 0, 0, 0, 0, // 0: pushq GOTPLT+8(%rip) 667 0x4c, 0x8b, 0x1d, 0, 0, 0, 0, // 6: mov GOTPLT+16(%rip), %r11 668 0xe8, 0x0e, 0x00, 0x00, 0x00, // d: callq next 669 0xf3, 0x90, // 12: loop: pause 670 0x0f, 0xae, 0xe8, // 14: lfence 671 0xeb, 0xf9, // 17: jmp loop 672 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19: int3; .align 16 673 0x4c, 0x89, 0x1c, 0x24, // 20: next: mov %r11, (%rsp) 674 0xc3, // 24: ret 675 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 25: int3; padding 676 0xcc, 0xcc, 0xcc, 0xcc, // 2c: int3; padding 677 }; 678 memcpy(buf, insn, sizeof(insn)); 679 680 uint64_t gotPlt = in.gotPlt->getVA(); 681 uint64_t plt = in.plt->getVA(); 682 write32le(buf + 2, gotPlt - plt - 6 + 8); 683 write32le(buf + 9, gotPlt - plt - 13 + 16); 684 } 685 686 void Retpoline::writePlt(uint8_t *buf, const Symbol &sym, 687 uint64_t pltEntryAddr) const { 688 const uint8_t insn[] = { 689 0x4c, 0x8b, 0x1d, 0, 0, 0, 0, // 0: mov foo@GOTPLT(%rip), %r11 690 0xe8, 0, 0, 0, 0, // 7: callq plt+0x20 691 0xe9, 0, 0, 0, 0, // c: jmp plt+0x12 692 0x68, 0, 0, 0, 0, // 11: pushq <relocation index> 693 0xe9, 0, 0, 0, 0, // 16: jmp plt+0 694 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1b: int3; padding 695 }; 696 memcpy(buf, insn, sizeof(insn)); 697 698 uint64_t off = pltEntryAddr - in.plt->getVA(); 699 700 write32le(buf + 3, sym.getGotPltVA() - pltEntryAddr - 7); 701 write32le(buf + 8, -off - 12 + 32); 702 write32le(buf + 13, -off - 17 + 18); 703 write32le(buf + 18, sym.pltIndex); 704 write32le(buf + 23, -off - 27); 705 } 706 707 RetpolineZNow::RetpolineZNow() { 708 pltHeaderSize = 32; 709 pltEntrySize = 16; 710 ipltEntrySize = 16; 711 } 712 713 void RetpolineZNow::writePltHeader(uint8_t *buf) const { 714 const uint8_t insn[] = { 715 0xe8, 0x0b, 0x00, 0x00, 0x00, // 0: call next 716 0xf3, 0x90, // 5: loop: pause 717 0x0f, 0xae, 0xe8, // 7: lfence 718 0xeb, 0xf9, // a: jmp loop 719 0xcc, 0xcc, 0xcc, 0xcc, // c: int3; .align 16 720 0x4c, 0x89, 0x1c, 0x24, // 10: next: mov %r11, (%rsp) 721 0xc3, // 14: ret 722 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 15: int3; padding 723 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a: int3; padding 724 0xcc, // 1f: int3; padding 725 }; 726 memcpy(buf, insn, sizeof(insn)); 727 } 728 729 void RetpolineZNow::writePlt(uint8_t *buf, const Symbol &sym, 730 uint64_t pltEntryAddr) const { 731 const uint8_t insn[] = { 732 0x4c, 0x8b, 0x1d, 0, 0, 0, 0, // mov foo@GOTPLT(%rip), %r11 733 0xe9, 0, 0, 0, 0, // jmp plt+0 734 0xcc, 0xcc, 0xcc, 0xcc, // int3; padding 735 }; 736 memcpy(buf, insn, sizeof(insn)); 737 738 write32le(buf + 3, sym.getGotPltVA() - pltEntryAddr - 7); 739 write32le(buf + 8, in.plt->getVA() - pltEntryAddr - 12); 740 } 741 742 static TargetInfo *getTargetInfo() { 743 if (config->zRetpolineplt) { 744 if (config->zNow) { 745 static RetpolineZNow t; 746 return &t; 747 } 748 static Retpoline t; 749 return &t; 750 } 751 752 if (config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT) { 753 static IntelIBT t; 754 return &t; 755 } 756 757 static X86_64 t; 758 return &t; 759 } 760 761 TargetInfo *getX86_64TargetInfo() { return getTargetInfo(); } 762 763 } // namespace elf 764 } // namespace lld 765