1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 64-bit system call dispatch */ 3 4 #include <linux/linkage.h> 5 #include <linux/sys.h> 6 #include <linux/cache.h> 7 #include <linux/syscalls.h> 8 #include <linux/entry-common.h> 9 #include <linux/nospec.h> 10 #include <asm/syscall.h> 11 12 #define __SYSCALL(nr, sym) extern long __x64_##sym(const struct pt_regs *); 13 #define __SYSCALL_NORETURN(nr, sym) extern long __noreturn __x64_##sym(const struct pt_regs *); 14 #include <asm/syscalls_64.h> 15 #ifdef CONFIG_X86_X32_ABI 16 #include <asm/syscalls_x32.h> 17 #endif 18 #undef __SYSCALL 19 20 #undef __SYSCALL_NORETURN 21 #define __SYSCALL_NORETURN __SYSCALL 22 23 /* 24 * The sys_call_table[] is no longer used for system calls, but 25 * kernel/trace/trace_syscalls.c still wants to know the system 26 * call address. 27 */ 28 #define __SYSCALL(nr, sym) __x64_##sym, 29 const sys_call_ptr_t sys_call_table[] = { 30 #include <asm/syscalls_64.h> 31 }; 32 #undef __SYSCALL 33 34 #define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs); 35 long x64_sys_call(const struct pt_regs *regs, unsigned int nr) 36 { 37 switch (nr) { 38 #include <asm/syscalls_64.h> 39 default: return __x64_sys_ni_syscall(regs); 40 } 41 } 42 43 #ifdef CONFIG_X86_X32_ABI 44 long x32_sys_call(const struct pt_regs *regs, unsigned int nr) 45 { 46 switch (nr) { 47 #include <asm/syscalls_x32.h> 48 default: return __x64_sys_ni_syscall(regs); 49 } 50 } 51 #endif 52 53 static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr) 54 { 55 /* 56 * Convert negative numbers to very high and thus out of range 57 * numbers for comparisons. 58 */ 59 unsigned int unr = nr; 60 61 if (likely(unr < NR_syscalls)) { 62 unr = array_index_nospec(unr, NR_syscalls); 63 regs->ax = x64_sys_call(regs, unr); 64 return true; 65 } 66 return false; 67 } 68 69 static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr) 70 { 71 /* 72 * Adjust the starting offset of the table, and convert numbers 73 * < __X32_SYSCALL_BIT to very high and thus out of range 74 * numbers for comparisons. 75 */ 76 unsigned int xnr = nr - __X32_SYSCALL_BIT; 77 78 if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) { 79 xnr = array_index_nospec(xnr, X32_NR_syscalls); 80 regs->ax = x32_sys_call(regs, xnr); 81 return true; 82 } 83 return false; 84 } 85 86 /* Returns true to return using SYSRET, or false to use IRET */ 87 __visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr) 88 { 89 add_random_kstack_offset(); 90 nr = syscall_enter_from_user_mode(regs, nr); 91 92 instrumentation_begin(); 93 94 if (!do_syscall_x64(regs, nr) && !do_syscall_x32(regs, nr) && nr != -1) { 95 /* Invalid system call, but still a system call. */ 96 regs->ax = __x64_sys_ni_syscall(regs); 97 } 98 99 instrumentation_end(); 100 syscall_exit_to_user_mode(regs); 101 102 /* 103 * Check that the register state is valid for using SYSRET to exit 104 * to userspace. Otherwise use the slower but fully capable IRET 105 * exit path. 106 */ 107 108 /* XEN PV guests always use the IRET path */ 109 if (cpu_feature_enabled(X86_FEATURE_XENPV)) 110 return false; 111 112 /* SYSRET requires RCX == RIP and R11 == EFLAGS */ 113 if (unlikely(regs->cx != regs->ip || regs->r11 != regs->flags)) 114 return false; 115 116 /* CS and SS must match the values set in MSR_STAR */ 117 if (unlikely(regs->cs != __USER_CS || regs->ss != __USER_DS)) 118 return false; 119 120 /* 121 * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP 122 * in kernel space. This essentially lets the user take over 123 * the kernel, since userspace controls RSP. 124 * 125 * TASK_SIZE_MAX covers all user-accessible addresses other than 126 * the deprecated vsyscall page. 127 */ 128 if (unlikely(regs->ip >= TASK_SIZE_MAX)) 129 return false; 130 131 /* 132 * SYSRET cannot restore RF. It can restore TF, but unlike IRET, 133 * restoring TF results in a trap from userspace immediately after 134 * SYSRET. 135 */ 136 if (unlikely(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF))) 137 return false; 138 139 /* Use SYSRET to exit to userspace */ 140 return true; 141 } 142