xref: /linux/include/linux/entry-common.h (revision 78bb43e51b94828b333ab296eabf893d5b439fc2)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef __LINUX_ENTRYCOMMON_H
3 #define __LINUX_ENTRYCOMMON_H
4 
5 #include <linux/irq-entry-common.h>
6 #include <linux/ptrace.h>
7 #include <linux/seccomp.h>
8 #include <linux/sched.h>
9 #include <linux/livepatch.h>
10 #include <linux/resume_user_mode.h>
11 
12 #include <asm/entry-common.h>
13 #include <asm/syscall.h>
14 
15 #ifndef _TIF_UPROBE
16 # define _TIF_UPROBE			(0)
17 #endif
18 
19 /*
20  * SYSCALL_WORK flags handled in syscall_enter_from_user_mode()
21  */
22 #ifndef ARCH_SYSCALL_WORK_ENTER
23 # define ARCH_SYSCALL_WORK_ENTER	(0)
24 #endif
25 
26 /*
27  * SYSCALL_WORK flags handled in syscall_exit_to_user_mode()
28  */
29 #ifndef ARCH_SYSCALL_WORK_EXIT
30 # define ARCH_SYSCALL_WORK_EXIT		(0)
31 #endif
32 
33 #define SYSCALL_WORK_ENTER	(SYSCALL_WORK_SECCOMP |			\
34 				 SYSCALL_WORK_SYSCALL_TRACEPOINT |	\
35 				 SYSCALL_WORK_SYSCALL_TRACE |		\
36 				 SYSCALL_WORK_SYSCALL_EMU |		\
37 				 SYSCALL_WORK_SYSCALL_AUDIT |		\
38 				 SYSCALL_WORK_SYSCALL_USER_DISPATCH |	\
39 				 ARCH_SYSCALL_WORK_ENTER)
40 #define SYSCALL_WORK_EXIT	(SYSCALL_WORK_SYSCALL_TRACEPOINT |	\
41 				 SYSCALL_WORK_SYSCALL_TRACE |		\
42 				 SYSCALL_WORK_SYSCALL_AUDIT |		\
43 				 SYSCALL_WORK_SYSCALL_USER_DISPATCH |	\
44 				 SYSCALL_WORK_SYSCALL_EXIT_TRAP	|	\
45 				 ARCH_SYSCALL_WORK_EXIT)
46 
47 /**
48  * syscall_enter_from_user_mode_prepare - Establish state and enable interrupts
49  * @regs:	Pointer to currents pt_regs
50  *
51  * Invoked from architecture specific syscall entry code with interrupts
52  * disabled. The calling code has to be non-instrumentable. When the
53  * function returns all state is correct, interrupts are enabled and the
54  * subsequent functions can be instrumented.
55  *
56  * This handles lockdep, RCU (context tracking) and tracing state, i.e.
57  * the functionality provided by enter_from_user_mode().
58  *
59  * This is invoked when there is extra architecture specific functionality
60  * to be done between establishing state and handling user mode entry work.
61  */
62 void syscall_enter_from_user_mode_prepare(struct pt_regs *regs);
63 
64 long syscall_trace_enter(struct pt_regs *regs, long syscall,
65 			 unsigned long work);
66 
67 /**
68  * syscall_enter_from_user_mode_work - Check and handle work before invoking
69  *				       a syscall
70  * @regs:	Pointer to currents pt_regs
71  * @syscall:	The syscall number
72  *
73  * Invoked from architecture specific syscall entry code with interrupts
74  * enabled after invoking syscall_enter_from_user_mode_prepare() and extra
75  * architecture specific work.
76  *
77  * Returns: The original or a modified syscall number
78  *
79  * If the returned syscall number is -1 then the syscall should be
80  * skipped. In this case the caller may invoke syscall_set_error() or
81  * syscall_set_return_value() first.  If neither of those are called and -1
82  * is returned, then the syscall will fail with ENOSYS.
83  *
84  * It handles the following work items:
85  *
86  *  1) syscall_work flag dependent invocations of
87  *     ptrace_report_syscall_entry(), __secure_computing(), trace_sys_enter()
88  *  2) Invocation of audit_syscall_entry()
89  */
syscall_enter_from_user_mode_work(struct pt_regs * regs,long syscall)90 static __always_inline long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall)
91 {
92 	unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
93 
94 	if (work & SYSCALL_WORK_ENTER)
95 		syscall = syscall_trace_enter(regs, syscall, work);
96 
97 	return syscall;
98 }
99 
100 /**
101  * syscall_enter_from_user_mode - Establish state and check and handle work
102  *				  before invoking a syscall
103  * @regs:	Pointer to currents pt_regs
104  * @syscall:	The syscall number
105  *
106  * Invoked from architecture specific syscall entry code with interrupts
107  * disabled. The calling code has to be non-instrumentable. When the
108  * function returns all state is correct, interrupts are enabled and the
109  * subsequent functions can be instrumented.
110  *
111  * This is combination of syscall_enter_from_user_mode_prepare() and
112  * syscall_enter_from_user_mode_work().
113  *
114  * Returns: The original or a modified syscall number. See
115  * syscall_enter_from_user_mode_work() for further explanation.
116  */
syscall_enter_from_user_mode(struct pt_regs * regs,long syscall)117 static __always_inline long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall)
118 {
119 	long ret;
120 
121 	enter_from_user_mode(regs);
122 
123 	instrumentation_begin();
124 	local_irq_enable();
125 	ret = syscall_enter_from_user_mode_work(regs, syscall);
126 	instrumentation_end();
127 
128 	return ret;
129 }
130 
131 /**
132  * syscall_exit_work - Handle work before returning to user mode
133  * @regs:	Pointer to current pt_regs
134  * @work:	Current thread syscall work
135  *
136  * Do one-time syscall specific work.
137  */
138 void syscall_exit_work(struct pt_regs *regs, unsigned long work);
139 
140 /**
141  * syscall_exit_to_user_mode_work - Handle work before returning to user mode
142  * @regs:	Pointer to currents pt_regs
143  *
144  * Same as step 1 and 2 of syscall_exit_to_user_mode() but without calling
145  * exit_to_user_mode() to perform the final transition to user mode.
146  *
147  * Calling convention is the same as for syscall_exit_to_user_mode() and it
148  * returns with all work handled and interrupts disabled. The caller must
149  * invoke exit_to_user_mode() before actually switching to user mode to
150  * make the final state transitions. Interrupts must stay disabled between
151  * return from this function and the invocation of exit_to_user_mode().
152  */
syscall_exit_to_user_mode_work(struct pt_regs * regs)153 static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs)
154 {
155 	unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
156 	unsigned long nr = syscall_get_nr(current, regs);
157 
158 	CT_WARN_ON(ct_state() != CT_STATE_KERNEL);
159 
160 	if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
161 		if (WARN(irqs_disabled(), "syscall %lu left IRQs disabled", nr))
162 			local_irq_enable();
163 	}
164 
165 	rseq_syscall(regs);
166 
167 	/*
168 	 * Do one-time syscall specific work. If these work items are
169 	 * enabled, we want to run them exactly once per syscall exit with
170 	 * interrupts enabled.
171 	 */
172 	if (unlikely(work & SYSCALL_WORK_EXIT))
173 		syscall_exit_work(regs, work);
174 	local_irq_disable_exit_to_user();
175 	exit_to_user_mode_prepare(regs);
176 }
177 
178 /**
179  * syscall_exit_to_user_mode - Handle work before returning to user mode
180  * @regs:	Pointer to currents pt_regs
181  *
182  * Invoked with interrupts enabled and fully valid regs. Returns with all
183  * work handled, interrupts disabled such that the caller can immediately
184  * switch to user mode. Called from architecture specific syscall and ret
185  * from fork code.
186  *
187  * The call order is:
188  *  1) One-time syscall exit work:
189  *	- rseq syscall exit
190  *      - audit
191  *	- syscall tracing
192  *	- ptrace (single stepping)
193  *
194  *  2) Preparatory work
195  *	- Exit to user mode loop (common TIF handling). Invokes
196  *	  arch_exit_to_user_mode_work() for architecture specific TIF work
197  *	- Architecture specific one time work arch_exit_to_user_mode_prepare()
198  *	- Address limit and lockdep checks
199  *
200  *  3) Final transition (lockdep, tracing, context tracking, RCU), i.e. the
201  *     functionality in exit_to_user_mode().
202  *
203  * This is a combination of syscall_exit_to_user_mode_work() (1,2) and
204  * exit_to_user_mode(). This function is preferred unless there is a
205  * compelling architectural reason to use the separate functions.
206  */
syscall_exit_to_user_mode(struct pt_regs * regs)207 static __always_inline void syscall_exit_to_user_mode(struct pt_regs *regs)
208 {
209 	instrumentation_begin();
210 	syscall_exit_to_user_mode_work(regs);
211 	instrumentation_end();
212 	exit_to_user_mode();
213 }
214 
215 #endif
216