1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef __LINUX_ENTRYCOMMON_H
3 #define __LINUX_ENTRYCOMMON_H
4
5 #include <linux/irq-entry-common.h>
6 #include <linux/ptrace.h>
7 #include <linux/seccomp.h>
8 #include <linux/sched.h>
9 #include <linux/livepatch.h>
10 #include <linux/resume_user_mode.h>
11
12 #include <asm/entry-common.h>
13 #include <asm/syscall.h>
14
15 #ifndef _TIF_UPROBE
16 # define _TIF_UPROBE (0)
17 #endif
18
19 /*
20 * SYSCALL_WORK flags handled in syscall_enter_from_user_mode()
21 */
22 #ifndef ARCH_SYSCALL_WORK_ENTER
23 # define ARCH_SYSCALL_WORK_ENTER (0)
24 #endif
25
26 /*
27 * SYSCALL_WORK flags handled in syscall_exit_to_user_mode()
28 */
29 #ifndef ARCH_SYSCALL_WORK_EXIT
30 # define ARCH_SYSCALL_WORK_EXIT (0)
31 #endif
32
33 #define SYSCALL_WORK_ENTER (SYSCALL_WORK_SECCOMP | \
34 SYSCALL_WORK_SYSCALL_TRACEPOINT | \
35 SYSCALL_WORK_SYSCALL_TRACE | \
36 SYSCALL_WORK_SYSCALL_EMU | \
37 SYSCALL_WORK_SYSCALL_AUDIT | \
38 SYSCALL_WORK_SYSCALL_USER_DISPATCH | \
39 ARCH_SYSCALL_WORK_ENTER)
40 #define SYSCALL_WORK_EXIT (SYSCALL_WORK_SYSCALL_TRACEPOINT | \
41 SYSCALL_WORK_SYSCALL_TRACE | \
42 SYSCALL_WORK_SYSCALL_AUDIT | \
43 SYSCALL_WORK_SYSCALL_USER_DISPATCH | \
44 SYSCALL_WORK_SYSCALL_EXIT_TRAP | \
45 ARCH_SYSCALL_WORK_EXIT)
46
47 /**
48 * syscall_enter_from_user_mode_prepare - Establish state and enable interrupts
49 * @regs: Pointer to currents pt_regs
50 *
51 * Invoked from architecture specific syscall entry code with interrupts
52 * disabled. The calling code has to be non-instrumentable. When the
53 * function returns all state is correct, interrupts are enabled and the
54 * subsequent functions can be instrumented.
55 *
56 * This handles lockdep, RCU (context tracking) and tracing state, i.e.
57 * the functionality provided by enter_from_user_mode().
58 *
59 * This is invoked when there is extra architecture specific functionality
60 * to be done between establishing state and handling user mode entry work.
61 */
62 void syscall_enter_from_user_mode_prepare(struct pt_regs *regs);
63
64 long syscall_trace_enter(struct pt_regs *regs, long syscall,
65 unsigned long work);
66
67 /**
68 * syscall_enter_from_user_mode_work - Check and handle work before invoking
69 * a syscall
70 * @regs: Pointer to currents pt_regs
71 * @syscall: The syscall number
72 *
73 * Invoked from architecture specific syscall entry code with interrupts
74 * enabled after invoking syscall_enter_from_user_mode_prepare() and extra
75 * architecture specific work.
76 *
77 * Returns: The original or a modified syscall number
78 *
79 * If the returned syscall number is -1 then the syscall should be
80 * skipped. In this case the caller may invoke syscall_set_error() or
81 * syscall_set_return_value() first. If neither of those are called and -1
82 * is returned, then the syscall will fail with ENOSYS.
83 *
84 * It handles the following work items:
85 *
86 * 1) syscall_work flag dependent invocations of
87 * ptrace_report_syscall_entry(), __secure_computing(), trace_sys_enter()
88 * 2) Invocation of audit_syscall_entry()
89 */
syscall_enter_from_user_mode_work(struct pt_regs * regs,long syscall)90 static __always_inline long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall)
91 {
92 unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
93
94 if (work & SYSCALL_WORK_ENTER)
95 syscall = syscall_trace_enter(regs, syscall, work);
96
97 return syscall;
98 }
99
100 /**
101 * syscall_enter_from_user_mode - Establish state and check and handle work
102 * before invoking a syscall
103 * @regs: Pointer to currents pt_regs
104 * @syscall: The syscall number
105 *
106 * Invoked from architecture specific syscall entry code with interrupts
107 * disabled. The calling code has to be non-instrumentable. When the
108 * function returns all state is correct, interrupts are enabled and the
109 * subsequent functions can be instrumented.
110 *
111 * This is combination of syscall_enter_from_user_mode_prepare() and
112 * syscall_enter_from_user_mode_work().
113 *
114 * Returns: The original or a modified syscall number. See
115 * syscall_enter_from_user_mode_work() for further explanation.
116 */
syscall_enter_from_user_mode(struct pt_regs * regs,long syscall)117 static __always_inline long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall)
118 {
119 long ret;
120
121 enter_from_user_mode(regs);
122
123 instrumentation_begin();
124 local_irq_enable();
125 ret = syscall_enter_from_user_mode_work(regs, syscall);
126 instrumentation_end();
127
128 return ret;
129 }
130
131 /**
132 * syscall_exit_work - Handle work before returning to user mode
133 * @regs: Pointer to current pt_regs
134 * @work: Current thread syscall work
135 *
136 * Do one-time syscall specific work.
137 */
138 void syscall_exit_work(struct pt_regs *regs, unsigned long work);
139
140 /**
141 * syscall_exit_to_user_mode_work - Handle work before returning to user mode
142 * @regs: Pointer to currents pt_regs
143 *
144 * Same as step 1 and 2 of syscall_exit_to_user_mode() but without calling
145 * exit_to_user_mode() to perform the final transition to user mode.
146 *
147 * Calling convention is the same as for syscall_exit_to_user_mode() and it
148 * returns with all work handled and interrupts disabled. The caller must
149 * invoke exit_to_user_mode() before actually switching to user mode to
150 * make the final state transitions. Interrupts must stay disabled between
151 * return from this function and the invocation of exit_to_user_mode().
152 */
syscall_exit_to_user_mode_work(struct pt_regs * regs)153 static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs)
154 {
155 unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
156 unsigned long nr = syscall_get_nr(current, regs);
157
158 CT_WARN_ON(ct_state() != CT_STATE_KERNEL);
159
160 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
161 if (WARN(irqs_disabled(), "syscall %lu left IRQs disabled", nr))
162 local_irq_enable();
163 }
164
165 rseq_syscall(regs);
166
167 /*
168 * Do one-time syscall specific work. If these work items are
169 * enabled, we want to run them exactly once per syscall exit with
170 * interrupts enabled.
171 */
172 if (unlikely(work & SYSCALL_WORK_EXIT))
173 syscall_exit_work(regs, work);
174 local_irq_disable_exit_to_user();
175 exit_to_user_mode_prepare(regs);
176 }
177
178 /**
179 * syscall_exit_to_user_mode - Handle work before returning to user mode
180 * @regs: Pointer to currents pt_regs
181 *
182 * Invoked with interrupts enabled and fully valid regs. Returns with all
183 * work handled, interrupts disabled such that the caller can immediately
184 * switch to user mode. Called from architecture specific syscall and ret
185 * from fork code.
186 *
187 * The call order is:
188 * 1) One-time syscall exit work:
189 * - rseq syscall exit
190 * - audit
191 * - syscall tracing
192 * - ptrace (single stepping)
193 *
194 * 2) Preparatory work
195 * - Exit to user mode loop (common TIF handling). Invokes
196 * arch_exit_to_user_mode_work() for architecture specific TIF work
197 * - Architecture specific one time work arch_exit_to_user_mode_prepare()
198 * - Address limit and lockdep checks
199 *
200 * 3) Final transition (lockdep, tracing, context tracking, RCU), i.e. the
201 * functionality in exit_to_user_mode().
202 *
203 * This is a combination of syscall_exit_to_user_mode_work() (1,2) and
204 * exit_to_user_mode(). This function is preferred unless there is a
205 * compelling architectural reason to use the separate functions.
206 */
syscall_exit_to_user_mode(struct pt_regs * regs)207 static __always_inline void syscall_exit_to_user_mode(struct pt_regs *regs)
208 {
209 instrumentation_begin();
210 syscall_exit_to_user_mode_work(regs);
211 instrumentation_end();
212 exit_to_user_mode();
213 }
214
215 #endif
216