1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef __LINUX_ENTRYCOMMON_H 3 #define __LINUX_ENTRYCOMMON_H 4 5 #include <linux/irq-entry-common.h> 6 #include <linux/livepatch.h> 7 #include <linux/ptrace.h> 8 #include <linux/resume_user_mode.h> 9 #include <linux/seccomp.h> 10 #include <linux/sched.h> 11 12 #include <asm/entry-common.h> 13 #include <asm/syscall.h> 14 15 #ifndef _TIF_UPROBE 16 # define _TIF_UPROBE (0) 17 #endif 18 19 /* 20 * SYSCALL_WORK flags handled in syscall_enter_from_user_mode() 21 */ 22 #ifndef ARCH_SYSCALL_WORK_ENTER 23 # define ARCH_SYSCALL_WORK_ENTER (0) 24 #endif 25 26 /* 27 * SYSCALL_WORK flags handled in syscall_exit_to_user_mode() 28 */ 29 #ifndef ARCH_SYSCALL_WORK_EXIT 30 # define ARCH_SYSCALL_WORK_EXIT (0) 31 #endif 32 33 #define SYSCALL_WORK_ENTER (SYSCALL_WORK_SECCOMP | \ 34 SYSCALL_WORK_SYSCALL_TRACEPOINT | \ 35 SYSCALL_WORK_SYSCALL_TRACE | \ 36 SYSCALL_WORK_SYSCALL_EMU | \ 37 SYSCALL_WORK_SYSCALL_AUDIT | \ 38 SYSCALL_WORK_SYSCALL_USER_DISPATCH | \ 39 ARCH_SYSCALL_WORK_ENTER) 40 41 #define SYSCALL_WORK_EXIT (SYSCALL_WORK_SYSCALL_TRACEPOINT | \ 42 SYSCALL_WORK_SYSCALL_TRACE | \ 43 SYSCALL_WORK_SYSCALL_AUDIT | \ 44 SYSCALL_WORK_SYSCALL_USER_DISPATCH | \ 45 SYSCALL_WORK_SYSCALL_EXIT_TRAP | \ 46 ARCH_SYSCALL_WORK_EXIT) 47 48 long syscall_trace_enter(struct pt_regs *regs, long syscall, unsigned long work); 49 50 /** 51 * syscall_enter_from_user_mode_work - Check and handle work before invoking 52 * a syscall 53 * @regs: Pointer to currents pt_regs 54 * @syscall: The syscall number 55 * 56 * Invoked from architecture specific syscall entry code with interrupts 57 * enabled after invoking enter_from_user_mode(), enabling interrupts and 58 * extra architecture specific work. 59 * 60 * Returns: The original or a modified syscall number 61 * 62 * If the returned syscall number is -1 then the syscall should be 63 * skipped. In this case the caller may invoke syscall_set_error() or 64 * syscall_set_return_value() first. If neither of those are called and -1 65 * is returned, then the syscall will fail with ENOSYS. 66 * 67 * It handles the following work items: 68 * 69 * 1) syscall_work flag dependent invocations of 70 * ptrace_report_syscall_entry(), __secure_computing(), trace_sys_enter() 71 * 2) Invocation of audit_syscall_entry() 72 */ 73 static __always_inline long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall) 74 { 75 unsigned long work = READ_ONCE(current_thread_info()->syscall_work); 76 77 if (work & SYSCALL_WORK_ENTER) 78 syscall = syscall_trace_enter(regs, syscall, work); 79 80 return syscall; 81 } 82 83 /** 84 * syscall_enter_from_user_mode - Establish state and check and handle work 85 * before invoking a syscall 86 * @regs: Pointer to currents pt_regs 87 * @syscall: The syscall number 88 * 89 * Invoked from architecture specific syscall entry code with interrupts 90 * disabled. The calling code has to be non-instrumentable. When the 91 * function returns all state is correct, interrupts are enabled and the 92 * subsequent functions can be instrumented. 93 * 94 * This is the combination of enter_from_user_mode() and 95 * syscall_enter_from_user_mode_work() to be used when there is no 96 * architecture specific work to be done between the two. 97 * 98 * Returns: The original or a modified syscall number. See 99 * syscall_enter_from_user_mode_work() for further explanation. 100 */ 101 static __always_inline long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall) 102 { 103 long ret; 104 105 enter_from_user_mode(regs); 106 107 instrumentation_begin(); 108 local_irq_enable(); 109 ret = syscall_enter_from_user_mode_work(regs, syscall); 110 instrumentation_end(); 111 112 return ret; 113 } 114 115 /** 116 * syscall_exit_work - Handle work before returning to user mode 117 * @regs: Pointer to current pt_regs 118 * @work: Current thread syscall work 119 * 120 * Do one-time syscall specific work. 121 */ 122 void syscall_exit_work(struct pt_regs *regs, unsigned long work); 123 124 /** 125 * syscall_exit_to_user_mode_work - Handle work before returning to user mode 126 * @regs: Pointer to currents pt_regs 127 * 128 * Same as step 1 and 2 of syscall_exit_to_user_mode() but without calling 129 * exit_to_user_mode() to perform the final transition to user mode. 130 * 131 * Calling convention is the same as for syscall_exit_to_user_mode() and it 132 * returns with all work handled and interrupts disabled. The caller must 133 * invoke exit_to_user_mode() before actually switching to user mode to 134 * make the final state transitions. Interrupts must stay disabled between 135 * return from this function and the invocation of exit_to_user_mode(). 136 */ 137 static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs) 138 { 139 unsigned long work = READ_ONCE(current_thread_info()->syscall_work); 140 unsigned long nr = syscall_get_nr(current, regs); 141 142 CT_WARN_ON(ct_state() != CT_STATE_KERNEL); 143 144 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { 145 if (WARN(irqs_disabled(), "syscall %lu left IRQs disabled", nr)) 146 local_irq_enable(); 147 } 148 149 rseq_debug_syscall_return(regs); 150 151 /* 152 * Do one-time syscall specific work. If these work items are 153 * enabled, we want to run them exactly once per syscall exit with 154 * interrupts enabled. 155 */ 156 if (unlikely(work & SYSCALL_WORK_EXIT)) 157 syscall_exit_work(regs, work); 158 local_irq_disable_exit_to_user(); 159 syscall_exit_to_user_mode_prepare(regs); 160 } 161 162 /** 163 * syscall_exit_to_user_mode - Handle work before returning to user mode 164 * @regs: Pointer to currents pt_regs 165 * 166 * Invoked with interrupts enabled and fully valid regs. Returns with all 167 * work handled, interrupts disabled such that the caller can immediately 168 * switch to user mode. Called from architecture specific syscall and ret 169 * from fork code. 170 * 171 * The call order is: 172 * 1) One-time syscall exit work: 173 * - rseq syscall exit 174 * - audit 175 * - syscall tracing 176 * - ptrace (single stepping) 177 * 178 * 2) Preparatory work 179 * - Exit to user mode loop (common TIF handling). Invokes 180 * arch_exit_to_user_mode_work() for architecture specific TIF work 181 * - Architecture specific one time work arch_exit_to_user_mode_prepare() 182 * - Address limit and lockdep checks 183 * 184 * 3) Final transition (lockdep, tracing, context tracking, RCU), i.e. the 185 * functionality in exit_to_user_mode(). 186 * 187 * This is a combination of syscall_exit_to_user_mode_work() (1,2) and 188 * exit_to_user_mode(). This function is preferred unless there is a 189 * compelling architectural reason to use the separate functions. 190 */ 191 static __always_inline void syscall_exit_to_user_mode(struct pt_regs *regs) 192 { 193 instrumentation_begin(); 194 syscall_exit_to_user_mode_work(regs); 195 instrumentation_end(); 196 exit_to_user_mode(); 197 } 198 199 #endif 200