entry_64.S (39d64ee59ceee0fb61243eab3c4b7b4492f80df2) entry_64.S (ea4654e0885348f0faa47f6d7b44a08d75ad16e9)
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * linux/arch/x86_64/entry.S
4 *
5 * Copyright (C) 1991, 1992 Linus Torvalds
6 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
7 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
8 *

--- 4 unchanged lines hidden (view full) ---

13 * A note on terminology:
14 * - iret frame: Architecture defined interrupt frame from SS to RIP
15 * at the top of the kernel process stack.
16 *
17 * Some macro usage:
18 * - SYM_FUNC_START/END:Define functions in the symbol table.
19 * - idtentry: Define exception entry points.
20 */
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * linux/arch/x86_64/entry.S
4 *
5 * Copyright (C) 1991, 1992 Linus Torvalds
6 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
7 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
8 *

--- 4 unchanged lines hidden (view full) ---

13 * A note on terminology:
14 * - iret frame: Architecture defined interrupt frame from SS to RIP
15 * at the top of the kernel process stack.
16 *
17 * Some macro usage:
18 * - SYM_FUNC_START/END:Define functions in the symbol table.
19 * - idtentry: Define exception entry points.
20 */
21#include <linux/export.h>
21#include <linux/linkage.h>
22#include <asm/segment.h>
23#include <asm/cache.h>
24#include <asm/errno.h>
25#include <asm/asm-offsets.h>
26#include <asm/msr.h>
27#include <asm/unistd.h>
28#include <asm/thread_info.h>
29#include <asm/hw_irq.h>
30#include <asm/page_types.h>
31#include <asm/irqflags.h>
32#include <asm/paravirt.h>
33#include <asm/percpu.h>
34#include <asm/asm.h>
35#include <asm/smap.h>
36#include <asm/pgtable_types.h>
22#include <linux/linkage.h>
23#include <asm/segment.h>
24#include <asm/cache.h>
25#include <asm/errno.h>
26#include <asm/asm-offsets.h>
27#include <asm/msr.h>
28#include <asm/unistd.h>
29#include <asm/thread_info.h>
30#include <asm/hw_irq.h>
31#include <asm/page_types.h>
32#include <asm/irqflags.h>
33#include <asm/paravirt.h>
34#include <asm/percpu.h>
35#include <asm/asm.h>
36#include <asm/smap.h>
37#include <asm/pgtable_types.h>
37#include <asm/export.h>
38#include <asm/frame.h>
39#include <asm/trapnr.h>
40#include <asm/nospec-branch.h>
41#include <asm/fsgsbase.h>
42#include <linux/err.h>
43
44#include "calling.h"
45

--- 75 unchanged lines hidden (view full) ---

121
122 /*
123 * Try to use SYSRET instead of IRET if we're returning to
124 * a completely clean 64-bit userspace context. If we're not,
125 * go to the slow exit path.
126 * In the Xen PV case we must use iret anyway.
127 */
128
38#include <asm/frame.h>
39#include <asm/trapnr.h>
40#include <asm/nospec-branch.h>
41#include <asm/fsgsbase.h>
42#include <linux/err.h>
43
44#include "calling.h"
45

--- 75 unchanged lines hidden (view full) ---

121
122 /*
123 * Try to use SYSRET instead of IRET if we're returning to
124 * a completely clean 64-bit userspace context. If we're not,
125 * go to the slow exit path.
126 * In the Xen PV case we must use iret anyway.
127 */
128
129 ALTERNATIVE "", "jmp swapgs_restore_regs_and_return_to_usermode", \
130 X86_FEATURE_XENPV
129 ALTERNATIVE "testb %al, %al; jz swapgs_restore_regs_and_return_to_usermode", \
130 "jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
131
131
132 movq RCX(%rsp), %rcx
133 movq RIP(%rsp), %r11
134
135 cmpq %rcx, %r11 /* SYSRET requires RCX == RIP */
136 jne swapgs_restore_regs_and_return_to_usermode
137
138 /*
132 /*
139 * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
140 * in kernel space. This essentially lets the user take over
141 * the kernel, since userspace controls RSP.
142 *
143 * If width of "canonical tail" ever becomes variable, this will need
144 * to be updated to remain correct on both old and new CPUs.
145 *
146 * Change top bits to match most significant bit (47th or 56th bit
147 * depending on paging mode) in the address.
148 */
149#ifdef CONFIG_X86_5LEVEL
150 ALTERNATIVE "shl $(64 - 48), %rcx; sar $(64 - 48), %rcx", \
151 "shl $(64 - 57), %rcx; sar $(64 - 57), %rcx", X86_FEATURE_LA57
152#else
153 shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
154 sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
155#endif
156
157 /* If this changed %rcx, it was not canonical */
158 cmpq %rcx, %r11
159 jne swapgs_restore_regs_and_return_to_usermode
160
161 cmpq $__USER_CS, CS(%rsp) /* CS must match SYSRET */
162 jne swapgs_restore_regs_and_return_to_usermode
163
164 movq R11(%rsp), %r11
165 cmpq %r11, EFLAGS(%rsp) /* R11 == RFLAGS */
166 jne swapgs_restore_regs_and_return_to_usermode
167
168 /*
169 * SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot
170 * restore RF properly. If the slowpath sets it for whatever reason, we
171 * need to restore it correctly.
172 *
173 * SYSRET can restore TF, but unlike IRET, restoring TF results in a
174 * trap from userspace immediately after SYSRET. This would cause an
175 * infinite loop whenever #DB happens with register state that satisfies
176 * the opportunistic SYSRET conditions. For example, single-stepping
177 * this user code:
178 *
179 * movq $stuck_here, %rcx
180 * pushfq
181 * popq %r11
182 * stuck_here:
183 *
184 * would never get past 'stuck_here'.
185 */
186 testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
187 jnz swapgs_restore_regs_and_return_to_usermode
188
189 /* nothing to check for RSP */
190
191 cmpq $__USER_DS, SS(%rsp) /* SS must match SYSRET */
192 jne swapgs_restore_regs_and_return_to_usermode
193
194 /*
195 * We win! This label is here just for ease of understanding
196 * perf profiles. Nothing jumps here.
197 */
198syscall_return_via_sysret:
199 IBRS_EXIT
200 POP_REGS pop_rdi=0
201
202 /*

--- 44 unchanged lines hidden (view full) ---

247 pushq %r15
248
249 /* switch stack */
250 movq %rsp, TASK_threadsp(%rdi)
251 movq TASK_threadsp(%rsi), %rsp
252
253#ifdef CONFIG_STACKPROTECTOR
254 movq TASK_stack_canary(%rsi), %rbx
133 * We win! This label is here just for ease of understanding
134 * perf profiles. Nothing jumps here.
135 */
136syscall_return_via_sysret:
137 IBRS_EXIT
138 POP_REGS pop_rdi=0
139
140 /*

--- 44 unchanged lines hidden (view full) ---

185 pushq %r15
186
187 /* switch stack */
188 movq %rsp, TASK_threadsp(%rdi)
189 movq TASK_threadsp(%rsi), %rsp
190
191#ifdef CONFIG_STACKPROTECTOR
192 movq TASK_stack_canary(%rsi), %rbx
255 movq %rbx, PER_CPU_VAR(fixed_percpu_data + FIXED_stack_canary)
193 movq %rbx, PER_CPU_VAR(fixed_percpu_data) + FIXED_stack_canary
256#endif
257
258 /*
259 * When switching from a shallower to a deeper call stack
260 * the RSB may either underflow or use entries populated
261 * with userspace addresses. On CPUs where those concerns
262 * exist, overwrite the RSB with entries which capture
263 * speculative execution to prevent attack.

--- 352 unchanged lines hidden (view full) ---

616 __ALIGN
617 .globl __irqentry_text_end
618__irqentry_text_end:
619 ANNOTATE_NOENDBR
620
621SYM_CODE_START_LOCAL(common_interrupt_return)
622SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
623 IBRS_EXIT
194#endif
195
196 /*
197 * When switching from a shallower to a deeper call stack
198 * the RSB may either underflow or use entries populated
199 * with userspace addresses. On CPUs where those concerns
200 * exist, overwrite the RSB with entries which capture
201 * speculative execution to prevent attack.

--- 352 unchanged lines hidden (view full) ---

554 __ALIGN
555 .globl __irqentry_text_end
556__irqentry_text_end:
557 ANNOTATE_NOENDBR
558
559SYM_CODE_START_LOCAL(common_interrupt_return)
560SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
561 IBRS_EXIT
624#ifdef CONFIG_DEBUG_ENTRY
625 /* Assert that pt_regs indicates user mode. */
626 testb $3, CS(%rsp)
627 jnz 1f
628 ud2
6291:
630#endif
631#ifdef CONFIG_XEN_PV
632 ALTERNATIVE "", "jmp xenpv_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
633#endif
562#ifdef CONFIG_XEN_PV
563 ALTERNATIVE "", "jmp xenpv_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
564#endif
565#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
566 ALTERNATIVE "", "jmp .Lpti_restore_regs_and_return_to_usermode", X86_FEATURE_PTI
567#endif
634
568
569 STACKLEAK_ERASE
570 POP_REGS
571 add $8, %rsp /* orig_ax */
572 UNWIND_HINT_IRET_REGS
573
574.Lswapgs_and_iret:
575 swapgs
576 /* Assert that the IRET frame indicates user mode. */
577 testb $3, 8(%rsp)
578 jnz .Lnative_iret
579 ud2
580
581#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
582.Lpti_restore_regs_and_return_to_usermode:
635 POP_REGS pop_rdi=0
636
637 /*
638 * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS.
639 * Save old stack pointer and switch to trampoline stack.
640 */
641 movq %rsp, %rdi
642 movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp

--- 10 unchanged lines hidden (view full) ---

653 pushq (%rdi)
654
655 /*
656 * We are on the trampoline stack. All regs except RDI are live.
657 * We can do future final exit work right here.
658 */
659 STACKLEAK_ERASE_NOCLOBBER
660
583 POP_REGS pop_rdi=0
584
585 /*
586 * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS.
587 * Save old stack pointer and switch to trampoline stack.
588 */
589 movq %rsp, %rdi
590 movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp

--- 10 unchanged lines hidden (view full) ---

601 pushq (%rdi)
602
603 /*
604 * We are on the trampoline stack. All regs except RDI are live.
605 * We can do future final exit work right here.
606 */
607 STACKLEAK_ERASE_NOCLOBBER
608
661 SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
609 push %rax
610 SWITCH_TO_USER_CR3 scratch_reg=%rdi scratch_reg2=%rax
611 pop %rax
662
663 /* Restore RDI. */
664 popq %rdi
612
613 /* Restore RDI. */
614 popq %rdi
665 swapgs
666 jmp .Lnative_iret
615 jmp .Lswapgs_and_iret
616#endif
667
617
668
669SYM_INNER_LABEL(restore_regs_and_return_to_kernel, SYM_L_GLOBAL)
670#ifdef CONFIG_DEBUG_ENTRY
671 /* Assert that pt_regs indicates kernel mode. */
672 testb $3, CS(%rsp)
673 jz 1f
674 ud2
6751:
676#endif

--- 465 unchanged lines hidden (view full) ---

1142SYM_CODE_END(error_return)
1143
1144/*
1145 * Runs on exception stack. Xen PV does not go through this path at all,
1146 * so we can use real assembly here.
1147 *
1148 * Registers:
1149 * %r14: Used to save/restore the CR3 of the interrupted context
618SYM_INNER_LABEL(restore_regs_and_return_to_kernel, SYM_L_GLOBAL)
619#ifdef CONFIG_DEBUG_ENTRY
620 /* Assert that pt_regs indicates kernel mode. */
621 testb $3, CS(%rsp)
622 jz 1f
623 ud2
6241:
625#endif

--- 465 unchanged lines hidden (view full) ---

1091SYM_CODE_END(error_return)
1092
1093/*
1094 * Runs on exception stack. Xen PV does not go through this path at all,
1095 * so we can use real assembly here.
1096 *
1097 * Registers:
1098 * %r14: Used to save/restore the CR3 of the interrupted context
1150 * when PAGE_TABLE_ISOLATION is in use. Do not clobber.
1099 * when MITIGATION_PAGE_TABLE_ISOLATION is in use. Do not clobber.
1151 */
1152SYM_CODE_START(asm_exc_nmi)
1153 UNWIND_HINT_IRET_ENTRY
1154 ENDBR
1155
1156 /*
1157 * We allow breakpoints in NMIs. If a breakpoint occurs, then
1158 * the iretq it performs will take us out of NMI context.

--- 345 unchanged lines hidden (view full) ---

1504 * iretq reads the "iret" frame and exits the NMI stack in a
1505 * single instruction. We are returning to kernel mode, so this
1506 * cannot result in a fault. Similarly, we don't need to worry
1507 * about espfix64 on the way back to kernel mode.
1508 */
1509 iretq
1510SYM_CODE_END(asm_exc_nmi)
1511
1100 */
1101SYM_CODE_START(asm_exc_nmi)
1102 UNWIND_HINT_IRET_ENTRY
1103 ENDBR
1104
1105 /*
1106 * We allow breakpoints in NMIs. If a breakpoint occurs, then
1107 * the iretq it performs will take us out of NMI context.

--- 345 unchanged lines hidden (view full) ---

1453 * iretq reads the "iret" frame and exits the NMI stack in a
1454 * single instruction. We are returning to kernel mode, so this
1455 * cannot result in a fault. Similarly, we don't need to worry
1456 * about espfix64 on the way back to kernel mode.
1457 */
1458 iretq
1459SYM_CODE_END(asm_exc_nmi)
1460
1512#ifndef CONFIG_IA32_EMULATION
1513/*
1514 * This handles SYSCALL from 32-bit code. There is no way to program
1515 * MSRs to fully disable 32-bit SYSCALL.
1516 */
1461/*
1462 * This handles SYSCALL from 32-bit code. There is no way to program
1463 * MSRs to fully disable 32-bit SYSCALL.
1464 */
1517SYM_CODE_START(ignore_sysret)
1465SYM_CODE_START(entry_SYSCALL32_ignore)
1518 UNWIND_HINT_END_OF_STACK
1519 ENDBR
1520 mov $-ENOSYS, %eax
1521 sysretl
1466 UNWIND_HINT_END_OF_STACK
1467 ENDBR
1468 mov $-ENOSYS, %eax
1469 sysretl
1522SYM_CODE_END(ignore_sysret)
1523#endif
1470SYM_CODE_END(entry_SYSCALL32_ignore)
1524
1525.pushsection .text, "ax"
1526 __FUNC_ALIGN
1527SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead)
1528 UNWIND_HINT_FUNC
1529 /* Prevent any naive code from trying to unwind to our caller. */
1530 xorl %ebp, %ebp
1531
1532 movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rax
1533 leaq -PTREGS_SIZE(%rax), %rsp
1534 UNWIND_HINT_REGS
1535
1536 call make_task_dead
1537SYM_CODE_END(rewind_stack_and_make_dead)
1538.popsection
1471
1472.pushsection .text, "ax"
1473 __FUNC_ALIGN
1474SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead)
1475 UNWIND_HINT_FUNC
1476 /* Prevent any naive code from trying to unwind to our caller. */
1477 xorl %ebp, %ebp
1478
1479 movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rax
1480 leaq -PTREGS_SIZE(%rax), %rsp
1481 UNWIND_HINT_REGS
1482
1483 call make_task_dead
1484SYM_CODE_END(rewind_stack_and_make_dead)
1485.popsection