Lines Matching +full:loc +full:- +full:code
1 //===- X86_64.cpp ---------------------------------------------------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
32 const uint8_t *loc) const override;
40 void relocate(uint8_t *loc, const Relocation &rel,
43 void applyJumpInstrMod(uint8_t *loc, JumpModType type,
46 const uint8_t *loc) const override;
48 bool adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end,
90 // FreeBSD automatically promotes large, superpage-aligned allocations. in X86_64()
158 for (unsigned i = size - 1; i + 1 > 0; --i) { in getRelocationWithOffset()
182 uint64_t addrLoc = is.getOutputSection()->addr + is.outSecOff + r.offset; in isFallThruRelocation()
189 nextIS->getOutputSection()->addr + nextIS->outSecOff; in isFallThruRelocation()
252 unsigned rIndex = getRelocationWithOffset(is, is.getSize() - 4); in deleteFallThruJmpInsn()
261 if (*(secContents + r.offset - 1) != 0xe9) in deleteFallThruJmpInsn()
280 getRelocationWithOffset(is, (is.getSize() - sizeOfDirectJmpInsn - 4)); in deleteFallThruJmpInsn()
286 const uint8_t *jmpInsnB = secContents + rB.offset - 1; in deleteFallThruJmpInsn()
287 JmpInsnOpcode jmpOpcodeB = getJmpInsnType(jmpInsnB - 1, jmpInsnB); in deleteFallThruJmpInsn()
300 *is.jumpInstrMod = {rB.offset - 1, jInvert, 4}; in deleteFallThruJmpInsn()
314 minVA = std::min(minVA, osec->addr); in relaxOnce()
315 maxVA = std::max(maxVA, osec->addr + osec->size); in relaxOnce()
318 // -pie/-shared, the condition can be relaxed to test the max VA difference as in relaxOnce()
320 if (isUInt<31>(maxVA) || (isUInt<31>(maxVA - minVA) && config->isPic)) in relaxOnce()
326 if (!(osec->flags & SHF_EXECINSTR)) in relaxOnce()
329 for (Relocation &rel : sec->relocs()) { in relaxOnce()
332 assert(rel.addend == -4); in relaxOnce()
334 uint64_t v = sec->getRelocTargetVA( in relaxOnce()
335 sec->file, rel.type, rel.expr == R_RELAX_GOT_PC_NOPIC ? 0 : -4, in relaxOnce()
336 sec->getOutputSection()->addr + sec->outSecOff + rel.offset, in relaxOnce()
340 if (rel.sym->auxIdx == 0) { in relaxOnce()
341 rel.sym->allocateAux(); in relaxOnce()
353 const uint8_t *loc) const { in getRelExpr()
403 error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) + in getRelExpr()
410 // The first entry holds the link-time address of _DYNAMIC. It is documented in writeGotPltHeader()
411 // in the psABI and glibc before Aug 2021 used the entry to compute run-time in writeGotPltHeader()
414 write64le(buf, mainPart->dynamic->getVA()); in writeGotPltHeader()
423 // An x86 entry is the address of the ifunc resolver function (for -z rel). in writeIgotPlt()
424 if (config->writeAddends) in writeIgotPlt()
435 uint64_t gotPlt = in.gotPlt->getVA(); in writePltHeader()
436 uint64_t plt = in.ibtPlt ? in.ibtPlt->getVA() : in.plt->getVA(); in writePltHeader()
437 write32le(buf + 2, gotPlt - plt + 2); // GOTPLT+8 in writePltHeader()
438 write32le(buf + 8, gotPlt - plt + 4); // GOTPLT+16 in writePltHeader()
450 write32le(buf + 2, sym.getGotPltVA() - pltEntryAddr - 6); in writePlt()
452 write32le(buf + 12, in.plt->getVA() - pltEntryAddr - 16); in writePlt()
462 static void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) { in relaxTlsGdToLe() argument
476 memcpy(loc - 4, inst, sizeof(inst)); in relaxTlsGdToLe()
478 // The original code used a pc relative relocation and so we have to in relaxTlsGdToLe()
479 // compensate for the -4 in had in the addend. in relaxTlsGdToLe()
480 write32le(loc + 8, val + 4); in relaxTlsGdToLe()
483 if ((loc[-3] & 0xfb) != 0x48 || loc[-2] != 0x8d || in relaxTlsGdToLe()
484 (loc[-1] & 0xc7) != 0x05) { in relaxTlsGdToLe()
485 errorOrWarn(getErrorLocation(loc - 3) + in relaxTlsGdToLe()
490 loc[-3] = 0x48 | ((loc[-3] >> 2) & 1); in relaxTlsGdToLe()
491 loc[-2] = 0xc7; in relaxTlsGdToLe()
492 loc[-1] = 0xc0 | ((loc[-1] >> 3) & 7); in relaxTlsGdToLe()
493 write32le(loc, val + 4); in relaxTlsGdToLe()
497 loc[0] = 0x66; in relaxTlsGdToLe()
498 loc[1] = 0x90; in relaxTlsGdToLe()
502 static void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, uint64_t val) { in relaxTlsGdToIe() argument
516 memcpy(loc - 4, inst, sizeof(inst)); in relaxTlsGdToIe()
518 // Both code sequences are PC relatives, but since we are moving the in relaxTlsGdToIe()
520 write32le(loc + 8, val - 8); in relaxTlsGdToIe()
524 if ((loc[-3] & 0xfb) != 0x48 || loc[-2] != 0x8d || in relaxTlsGdToIe()
525 (loc[-1] & 0xc7) != 0x05) { in relaxTlsGdToIe()
526 errorOrWarn(getErrorLocation(loc - 3) + in relaxTlsGdToIe()
531 loc[-2] = 0x8b; in relaxTlsGdToIe()
532 write32le(loc, val); in relaxTlsGdToIe()
536 loc[0] = 0x66; in relaxTlsGdToIe()
537 loc[1] = 0x90; in relaxTlsGdToIe()
543 static void relaxTlsIeToLe(uint8_t *loc, const Relocation &, uint64_t val) { in relaxTlsIeToLe() argument
544 uint8_t *inst = loc - 3; in relaxTlsIeToLe()
545 uint8_t reg = loc[-1] >> 3; in relaxTlsIeToLe()
546 uint8_t *regSlot = loc - 1; in relaxTlsIeToLe()
553 // "addq foo@gottpoff(%rip),%rsp" -> "addq $foo,%rsp" in relaxTlsIeToLe()
556 // "addq foo@gottpoff(%rip),%r12" -> "addq $foo,%r12" in relaxTlsIeToLe()
559 // "addq foo@gottpoff(%rip),%r[8-15]" -> "leaq foo(%r[8-15]),%r[8-15]" in relaxTlsIeToLe()
563 // "addq foo@gottpoff(%rip),%reg -> "leaq foo(%reg),%reg" in relaxTlsIeToLe()
567 // "movq foo@gottpoff(%rip),%r[8-15]" -> "movq $foo,%r[8-15]" in relaxTlsIeToLe()
571 // "movq foo@gottpoff(%rip),%reg" -> "movq $foo,%reg" in relaxTlsIeToLe()
575 error(getErrorLocation(loc - 3) + in relaxTlsIeToLe()
579 // The original code used a PC relative relocation. in relaxTlsIeToLe()
580 // Need to compensate for the -4 it had in the addend. in relaxTlsIeToLe()
581 write32le(loc, val + 4); in relaxTlsIeToLe()
584 static void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) { in relaxTlsLdToLe() argument
591 if (loc[4] == 0xe8) { in relaxTlsLdToLe()
593 // leaq bar@tlsld(%rip), %rdi # 48 8d 3d <Loc> in relaxTlsLdToLe()
601 memcpy(loc - 3, inst, sizeof(inst)); in relaxTlsLdToLe()
605 if (loc[4] == 0xff && loc[5] == 0x15) { in relaxTlsLdToLe()
607 // leaq x@tlsld(%rip),%rdi # 48 8d 3d <Loc> in relaxTlsLdToLe()
612 // See "Table 11.9: LD -> LE Code Transition (LP64)" in in relaxTlsLdToLe()
613 // https://raw.githubusercontent.com/wiki/hjl-tools/x86-psABI/x86-64-psABI-1.0.pdf in relaxTlsLdToLe()
614 loc[-3] = 0x66; in relaxTlsLdToLe()
615 memcpy(loc - 2, inst, sizeof(inst)); in relaxTlsLdToLe()
619 error(getErrorLocation(loc - 3) + in relaxTlsLdToLe()
627 void X86_64::applyJumpInstrMod(uint8_t *loc, JumpModType type, in applyJumpInstrMod() argument
632 *loc = 0xe9; in applyJumpInstrMod()
634 *loc = 0xeb; in applyJumpInstrMod()
638 loc[-1] = 0x0f; in applyJumpInstrMod()
639 *loc = 0x84; in applyJumpInstrMod()
641 *loc = 0x74; in applyJumpInstrMod()
645 loc[-1] = 0x0f; in applyJumpInstrMod()
646 *loc = 0x85; in applyJumpInstrMod()
648 *loc = 0x75; in applyJumpInstrMod()
652 loc[-1] = 0x0f; in applyJumpInstrMod()
653 *loc = 0x8f; in applyJumpInstrMod()
655 *loc = 0x7f; in applyJumpInstrMod()
659 loc[-1] = 0x0f; in applyJumpInstrMod()
660 *loc = 0x8d; in applyJumpInstrMod()
662 *loc = 0x7d; in applyJumpInstrMod()
666 loc[-1] = 0x0f; in applyJumpInstrMod()
667 *loc = 0x82; in applyJumpInstrMod()
669 *loc = 0x72; in applyJumpInstrMod()
673 loc[-1] = 0x0f; in applyJumpInstrMod()
674 *loc = 0x86; in applyJumpInstrMod()
676 *loc = 0x76; in applyJumpInstrMod()
680 loc[-1] = 0x0f; in applyJumpInstrMod()
681 *loc = 0x8c; in applyJumpInstrMod()
683 *loc = 0x7c; in applyJumpInstrMod()
687 loc[-1] = 0x0f; in applyJumpInstrMod()
688 *loc = 0x8e; in applyJumpInstrMod()
690 *loc = 0x7e; in applyJumpInstrMod()
694 loc[-1] = 0x0f; in applyJumpInstrMod()
695 *loc = 0x87; in applyJumpInstrMod()
697 *loc = 0x77; in applyJumpInstrMod()
701 loc[-1] = 0x0f; in applyJumpInstrMod()
702 *loc = 0x83; in applyJumpInstrMod()
704 *loc = 0x73; in applyJumpInstrMod()
763 static void relaxGot(uint8_t *loc, const Relocation &rel, uint64_t val);
765 void X86_64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { in relocate() argument
768 checkIntUInt(loc, val, 8, rel); in relocate()
769 *loc = val; in relocate()
772 checkInt(loc, val, 8, rel); in relocate()
773 *loc = val; in relocate()
776 checkIntUInt(loc, val, 16, rel); in relocate()
777 write16le(loc, val); in relocate()
780 checkInt(loc, val, 16, rel); in relocate()
781 write16le(loc, val); in relocate()
784 checkUInt(loc, val, 32, rel); in relocate()
785 write32le(loc, val); in relocate()
795 checkInt(loc, val, 32, rel); in relocate()
796 write32le(loc, val); in relocate()
807 write64le(loc, val); in relocate()
812 relaxGot(loc, rel, val); in relocate()
814 checkInt(loc, val, 32, rel); in relocate()
815 write32le(loc, val); in relocate()
822 relaxTlsGdToLe(loc, rel, val); in relocate()
824 relaxTlsGdToIe(loc, rel, val); in relocate()
826 checkInt(loc, val, 32, rel); in relocate()
827 write32le(loc, val); in relocate()
832 relaxTlsLdToLe(loc, rel, val); in relocate()
834 checkInt(loc, val, 32, rel); in relocate()
835 write32le(loc, val); in relocate()
840 relaxTlsIeToLe(loc, rel, val); in relocate()
842 checkInt(loc, val, 32, rel); in relocate()
843 write32le(loc, val); in relocate()
847 checkInt(loc, val, 32, rel); in relocate()
848 write32le(loc, val); in relocate()
852 // The addend is stored in the second 64-bit word. in relocate()
853 write64le(loc + 8, val); in relocate()
861 const uint8_t *loc) const { in adjustGotPcExpr()
863 // with addend != -4. Such an instruction does not load the full GOT entry, so in adjustGotPcExpr()
866 if (!config->relax || addend != -4 || in adjustGotPcExpr()
869 const uint8_t op = loc[-2]; in adjustGotPcExpr()
870 const uint8_t modRm = loc[-1]; in adjustGotPcExpr()
889 return config->isPic ? R_GOT_PC : R_RELAX_GOT_PC_NOPIC; in adjustGotPcExpr()
892 // A subset of relaxations can only be applied for no-PIC. This method
894 // "Intel 64 and IA-32 Architectures Software Developer's Manual V2"
896 // 64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf)
897 static void relaxGotNoPic(uint8_t *loc, uint64_t val, uint8_t op, in relaxGotNoPic() argument
899 const uint8_t rex = loc[-3]; in relaxGotNoPic()
902 // See "TEST-Logical Compare" (4-428 Vol. 2B), in relaxGotNoPic()
909 // 01: The operand's memory address is reg1 + a byte-sized displacement. in relaxGotNoPic()
910 // 10: The operand's memory address is reg1 + a word-sized displacement. in relaxGotNoPic()
913 // holds extra opcode bits rather than a register code in relaxGotNoPic()
917 // See "2.1.3 ModR/M and SIB Bytes" (Vol. 2A 2-3). in relaxGotNoPic()
918 loc[-1] = 0xc0 | (modRm & 0x38) >> 3; // ModR/M byte. in relaxGotNoPic()
921 // See "TEST-Logical Compare" (4-428 Vol. 2B). in relaxGotNoPic()
922 loc[-2] = 0xf7; in relaxGotNoPic()
927 // REX.W When 1, a 64-bit operand size is used. Otherwise, when 0, the in relaxGotNoPic()
928 // default operand size is used (which is 32-bit for most but not all in relaxGotNoPic()
930 // REX.R This 1-bit value is an extension to the MODRM.reg field. in relaxGotNoPic()
931 // REX.X This 1-bit value is an extension to the SIB.index field. in relaxGotNoPic()
932 // REX.B This 1-bit value is an extension to the MODRM.rm field or the in relaxGotNoPic()
934 // See "2.2.1.2 More on REX Prefix Fields " (2-8 Vol. 2A). in relaxGotNoPic()
935 loc[-3] = (rex & ~0x4) | (rex & 0x4) >> 2; in relaxGotNoPic()
936 write32le(loc, val); in relaxGotNoPic()
946 loc[-1] = 0xc0 | (modRm & 0x38) >> 3 | (op & 0x3c); // ModR/M byte. in relaxGotNoPic()
952 // See "3.2 INSTRUCTIONS (A-M)" (Vol. 2A 3-15), in relaxGotNoPic()
953 // "INSTRUCTION SET REFERENCE, N-Z" (Vol. 2B 4-1) for in relaxGotNoPic()
955 loc[-2] = 0x81; in relaxGotNoPic()
956 loc[-3] = (rex & ~0x4) | (rex & 0x4) >> 2; in relaxGotNoPic()
957 write32le(loc, val); in relaxGotNoPic()
960 static void relaxGot(uint8_t *loc, const Relocation &rel, uint64_t val) { in relaxGot() argument
963 const uint8_t op = loc[-2]; in relaxGot()
964 const uint8_t modRm = loc[-1]; in relaxGot()
968 loc[-2] = 0x8d; in relaxGot()
969 write32le(loc, val); in relaxGot()
975 // for the old -4 addend. in relaxGot()
976 assert(!config->isPic); in relaxGot()
977 relaxGotNoPic(loc, val + 4, op, modRm); in relaxGot()
986 loc[-2] = 0x67; // addr32 prefix in relaxGot()
987 loc[-1] = 0xe8; // call in relaxGot()
988 write32le(loc, val); in relaxGot()
995 loc[-2] = 0xe9; // jmp in relaxGot()
996 loc[3] = 0x90; // nop in relaxGot()
997 write32le(loc - 1, val + 1); in relaxGot()
1000 // A split-stack prologue starts by checking the amount of stack remaining
1004 bool X86_64::adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end, in adjustPrologueForCrossSplitStack() argument
1006 if (!config->is64) { in adjustPrologueForCrossSplitStack()
1011 if (loc + 8 >= end) in adjustPrologueForCrossSplitStack()
1016 if (memcmp(loc, "\x64\x48\x3b\x24\x25", 5) == 0) { in adjustPrologueForCrossSplitStack()
1017 memcpy(loc, "\xf9\x0f\x1f\x84\x00\x00\x00\x00", 8); in adjustPrologueForCrossSplitStack()
1021 // Adjust "lea X(%rsp),%rYY" to lea "(X - 0x4000)(%rsp),%rYY" where rYY could in adjustPrologueForCrossSplitStack()
1025 if (memcmp(loc, "\x4c\x8d\x94\x24", 4) == 0 || in adjustPrologueForCrossSplitStack()
1026 memcmp(loc, "\x4c\x8d\x9c\x24", 4) == 0) { in adjustPrologueForCrossSplitStack()
1029 write32le(loc + 4, read32le(loc + 4) - 0x4000); in adjustPrologueForCrossSplitStack()
1036 uint64_t secAddr = sec.getOutputSection()->addr; in relocateAlloc()
1038 secAddr += s->outSecOff; in relocateAlloc()
1040 secAddr += ehIn->getParent()->outSecOff; in relocateAlloc()
1044 uint8_t *loc = buf + rel.offset; in relocateAlloc() local
1048 relocate(loc, rel, val); in relocateAlloc()
1051 applyJumpInstrMod(buf + sec.jumpInstrMod->offset, in relocateAlloc()
1052 sec.jumpInstrMod->original, sec.jumpInstrMod->size); in relocateAlloc()
1076 in.ibtPlt->getVA() + IBTPltHeaderSize + s.getPltIdx() * pltEntrySize; in writeGotPlt()
1088 write32le(buf + 6, sym.getGotPltVA() - pltEntryAddr - 10); in writePlt()
1105 write32le(buf + 10, -pltHeaderSize - sizeof(inst) * i - 30); in writeIBTPlt()
1114 // indirect jump. That instruction sequence is so-called "retpoline".
1116 // We have two types of retpoline PLTs as a size optimization. If `-z now`
1117 // is specified, all dynamic symbols are resolved at load-time. Thus, when
1118 // that option is given, we can omit code for symbol lazy resolution.
1165 uint64_t gotPlt = in.gotPlt->getVA(); in writePltHeader()
1166 uint64_t plt = in.plt->getVA(); in writePltHeader()
1167 write32le(buf + 2, gotPlt - plt - 6 + 8); in writePltHeader()
1168 write32le(buf + 9, gotPlt - plt - 13 + 16); in writePltHeader()
1183 uint64_t off = pltEntryAddr - in.plt->getVA(); in writePlt()
1185 write32le(buf + 3, sym.getGotPltVA() - pltEntryAddr - 7); in writePlt()
1186 write32le(buf + 8, -off - 12 + 32); in writePlt()
1187 write32le(buf + 13, -off - 17 + 18); in writePlt()
1189 write32le(buf + 23, -off - 27); in writePlt()
1223 write32le(buf + 3, sym.getGotPltVA() - pltEntryAddr - 7); in writePlt()
1224 write32le(buf + 8, in.plt->getVA() - pltEntryAddr - 12); in writePlt()
1228 if (config->zRetpolineplt) { in getTargetInfo()
1229 if (config->zNow) { in getTargetInfo()
1237 if (config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT) { in getTargetInfo()