1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef __LINUX_ENTRYCOMMON_H 3 #define __LINUX_ENTRYCOMMON_H 4 5 #include <linux/irq-entry-common.h> 6 #include <linux/ptrace.h> 7 #include <linux/seccomp.h> 8 #include <linux/sched.h> 9 #include <linux/livepatch.h> 10 #include <linux/resume_user_mode.h> 11 12 #include <asm/entry-common.h> 13 #include <asm/syscall.h> 14 15 #ifndef _TIF_UPROBE 16 # define _TIF_UPROBE (0) 17 #endif 18 19 /* 20 * SYSCALL_WORK flags handled in syscall_enter_from_user_mode() 21 */ 22 #ifndef ARCH_SYSCALL_WORK_ENTER 23 # define ARCH_SYSCALL_WORK_ENTER (0) 24 #endif 25 26 /* 27 * SYSCALL_WORK flags handled in syscall_exit_to_user_mode() 28 */ 29 #ifndef ARCH_SYSCALL_WORK_EXIT 30 # define ARCH_SYSCALL_WORK_EXIT (0) 31 #endif 32 33 #define SYSCALL_WORK_ENTER (SYSCALL_WORK_SECCOMP | \ 34 SYSCALL_WORK_SYSCALL_TRACEPOINT | \ 35 SYSCALL_WORK_SYSCALL_TRACE | \ 36 SYSCALL_WORK_SYSCALL_EMU | \ 37 SYSCALL_WORK_SYSCALL_AUDIT | \ 38 SYSCALL_WORK_SYSCALL_USER_DISPATCH | \ 39 ARCH_SYSCALL_WORK_ENTER) 40 #define SYSCALL_WORK_EXIT (SYSCALL_WORK_SYSCALL_TRACEPOINT | \ 41 SYSCALL_WORK_SYSCALL_TRACE | \ 42 SYSCALL_WORK_SYSCALL_AUDIT | \ 43 SYSCALL_WORK_SYSCALL_USER_DISPATCH | \ 44 SYSCALL_WORK_SYSCALL_EXIT_TRAP | \ 45 ARCH_SYSCALL_WORK_EXIT) 46 47 /** 48 * syscall_enter_from_user_mode_prepare - Establish state and enable interrupts 49 * @regs: Pointer to currents pt_regs 50 * 51 * Invoked from architecture specific syscall entry code with interrupts 52 * disabled. The calling code has to be non-instrumentable. When the 53 * function returns all state is correct, interrupts are enabled and the 54 * subsequent functions can be instrumented. 55 * 56 * This handles lockdep, RCU (context tracking) and tracing state, i.e. 57 * the functionality provided by enter_from_user_mode(). 58 * 59 * This is invoked when there is extra architecture specific functionality 60 * to be done between establishing state and handling user mode entry work. 61 */ 62 void syscall_enter_from_user_mode_prepare(struct pt_regs *regs); 63 64 long syscall_trace_enter(struct pt_regs *regs, long syscall, 65 unsigned long work); 66 67 /** 68 * syscall_enter_from_user_mode_work - Check and handle work before invoking 69 * a syscall 70 * @regs: Pointer to currents pt_regs 71 * @syscall: The syscall number 72 * 73 * Invoked from architecture specific syscall entry code with interrupts 74 * enabled after invoking syscall_enter_from_user_mode_prepare() and extra 75 * architecture specific work. 76 * 77 * Returns: The original or a modified syscall number 78 * 79 * If the returned syscall number is -1 then the syscall should be 80 * skipped. In this case the caller may invoke syscall_set_error() or 81 * syscall_set_return_value() first. If neither of those are called and -1 82 * is returned, then the syscall will fail with ENOSYS. 83 * 84 * It handles the following work items: 85 * 86 * 1) syscall_work flag dependent invocations of 87 * ptrace_report_syscall_entry(), __secure_computing(), trace_sys_enter() 88 * 2) Invocation of audit_syscall_entry() 89 */ 90 static __always_inline long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall) 91 { 92 unsigned long work = READ_ONCE(current_thread_info()->syscall_work); 93 94 if (work & SYSCALL_WORK_ENTER) 95 syscall = syscall_trace_enter(regs, syscall, work); 96 97 return syscall; 98 } 99 100 /** 101 * syscall_enter_from_user_mode - Establish state and check and handle work 102 * before invoking a syscall 103 * @regs: Pointer to currents pt_regs 104 * @syscall: The syscall number 105 * 106 * Invoked from architecture specific syscall entry code with interrupts 107 * disabled. The calling code has to be non-instrumentable. When the 108 * function returns all state is correct, interrupts are enabled and the 109 * subsequent functions can be instrumented. 110 * 111 * This is combination of syscall_enter_from_user_mode_prepare() and 112 * syscall_enter_from_user_mode_work(). 113 * 114 * Returns: The original or a modified syscall number. See 115 * syscall_enter_from_user_mode_work() for further explanation. 116 */ 117 static __always_inline long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall) 118 { 119 long ret; 120 121 enter_from_user_mode(regs); 122 123 instrumentation_begin(); 124 local_irq_enable(); 125 ret = syscall_enter_from_user_mode_work(regs, syscall); 126 instrumentation_end(); 127 128 return ret; 129 } 130 131 /** 132 * syscall_exit_work - Handle work before returning to user mode 133 * @regs: Pointer to current pt_regs 134 * @work: Current thread syscall work 135 * 136 * Do one-time syscall specific work. 137 */ 138 void syscall_exit_work(struct pt_regs *regs, unsigned long work); 139 140 /** 141 * syscall_exit_to_user_mode_work - Handle work before returning to user mode 142 * @regs: Pointer to currents pt_regs 143 * 144 * Same as step 1 and 2 of syscall_exit_to_user_mode() but without calling 145 * exit_to_user_mode() to perform the final transition to user mode. 146 * 147 * Calling convention is the same as for syscall_exit_to_user_mode() and it 148 * returns with all work handled and interrupts disabled. The caller must 149 * invoke exit_to_user_mode() before actually switching to user mode to 150 * make the final state transitions. Interrupts must stay disabled between 151 * return from this function and the invocation of exit_to_user_mode(). 152 */ 153 static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs) 154 { 155 unsigned long work = READ_ONCE(current_thread_info()->syscall_work); 156 unsigned long nr = syscall_get_nr(current, regs); 157 158 CT_WARN_ON(ct_state() != CT_STATE_KERNEL); 159 160 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { 161 if (WARN(irqs_disabled(), "syscall %lu left IRQs disabled", nr)) 162 local_irq_enable(); 163 } 164 165 rseq_syscall(regs); 166 167 /* 168 * Do one-time syscall specific work. If these work items are 169 * enabled, we want to run them exactly once per syscall exit with 170 * interrupts enabled. 171 */ 172 if (unlikely(work & SYSCALL_WORK_EXIT)) 173 syscall_exit_work(regs, work); 174 local_irq_disable_exit_to_user(); 175 exit_to_user_mode_prepare(regs); 176 } 177 178 /** 179 * syscall_exit_to_user_mode - Handle work before returning to user mode 180 * @regs: Pointer to currents pt_regs 181 * 182 * Invoked with interrupts enabled and fully valid regs. Returns with all 183 * work handled, interrupts disabled such that the caller can immediately 184 * switch to user mode. Called from architecture specific syscall and ret 185 * from fork code. 186 * 187 * The call order is: 188 * 1) One-time syscall exit work: 189 * - rseq syscall exit 190 * - audit 191 * - syscall tracing 192 * - ptrace (single stepping) 193 * 194 * 2) Preparatory work 195 * - Exit to user mode loop (common TIF handling). Invokes 196 * arch_exit_to_user_mode_work() for architecture specific TIF work 197 * - Architecture specific one time work arch_exit_to_user_mode_prepare() 198 * - Address limit and lockdep checks 199 * 200 * 3) Final transition (lockdep, tracing, context tracking, RCU), i.e. the 201 * functionality in exit_to_user_mode(). 202 * 203 * This is a combination of syscall_exit_to_user_mode_work() (1,2) and 204 * exit_to_user_mode(). This function is preferred unless there is a 205 * compelling architectural reason to use the separate functions. 206 */ 207 static __always_inline void syscall_exit_to_user_mode(struct pt_regs *regs) 208 { 209 instrumentation_begin(); 210 syscall_exit_to_user_mode_work(regs); 211 instrumentation_end(); 212 exit_to_user_mode(); 213 } 214 215 #endif 216