xref: /linux/include/linux/entry-common.h (revision 7fc2cd2e4b398c57c9cf961cfea05eadbf34c05c)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef __LINUX_ENTRYCOMMON_H
3 #define __LINUX_ENTRYCOMMON_H
4 
5 #include <linux/irq-entry-common.h>
6 #include <linux/livepatch.h>
7 #include <linux/ptrace.h>
8 #include <linux/resume_user_mode.h>
9 #include <linux/seccomp.h>
10 #include <linux/sched.h>
11 
12 #include <asm/entry-common.h>
13 #include <asm/syscall.h>
14 
15 #ifndef _TIF_UPROBE
16 # define _TIF_UPROBE			(0)
17 #endif
18 
19 /*
20  * SYSCALL_WORK flags handled in syscall_enter_from_user_mode()
21  */
22 #ifndef ARCH_SYSCALL_WORK_ENTER
23 # define ARCH_SYSCALL_WORK_ENTER	(0)
24 #endif
25 
26 /*
27  * SYSCALL_WORK flags handled in syscall_exit_to_user_mode()
28  */
29 #ifndef ARCH_SYSCALL_WORK_EXIT
30 # define ARCH_SYSCALL_WORK_EXIT		(0)
31 #endif
32 
33 #define SYSCALL_WORK_ENTER	(SYSCALL_WORK_SECCOMP |			\
34 				 SYSCALL_WORK_SYSCALL_TRACEPOINT |	\
35 				 SYSCALL_WORK_SYSCALL_TRACE |		\
36 				 SYSCALL_WORK_SYSCALL_EMU |		\
37 				 SYSCALL_WORK_SYSCALL_AUDIT |		\
38 				 SYSCALL_WORK_SYSCALL_USER_DISPATCH |	\
39 				 ARCH_SYSCALL_WORK_ENTER)
40 
41 #define SYSCALL_WORK_EXIT	(SYSCALL_WORK_SYSCALL_TRACEPOINT |	\
42 				 SYSCALL_WORK_SYSCALL_TRACE |		\
43 				 SYSCALL_WORK_SYSCALL_AUDIT |		\
44 				 SYSCALL_WORK_SYSCALL_USER_DISPATCH |	\
45 				 SYSCALL_WORK_SYSCALL_EXIT_TRAP	|	\
46 				 ARCH_SYSCALL_WORK_EXIT)
47 
48 long syscall_trace_enter(struct pt_regs *regs, long syscall, unsigned long work);
49 
50 /**
51  * syscall_enter_from_user_mode_work - Check and handle work before invoking
52  *				       a syscall
53  * @regs:	Pointer to currents pt_regs
54  * @syscall:	The syscall number
55  *
56  * Invoked from architecture specific syscall entry code with interrupts
57  * enabled after invoking enter_from_user_mode(), enabling interrupts and
58  * extra architecture specific work.
59  *
60  * Returns: The original or a modified syscall number
61  *
62  * If the returned syscall number is -1 then the syscall should be
63  * skipped. In this case the caller may invoke syscall_set_error() or
64  * syscall_set_return_value() first.  If neither of those are called and -1
65  * is returned, then the syscall will fail with ENOSYS.
66  *
67  * It handles the following work items:
68  *
69  *  1) syscall_work flag dependent invocations of
70  *     ptrace_report_syscall_entry(), __secure_computing(), trace_sys_enter()
71  *  2) Invocation of audit_syscall_entry()
72  */
73 static __always_inline long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall)
74 {
75 	unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
76 
77 	if (work & SYSCALL_WORK_ENTER)
78 		syscall = syscall_trace_enter(regs, syscall, work);
79 
80 	return syscall;
81 }
82 
83 /**
84  * syscall_enter_from_user_mode - Establish state and check and handle work
85  *				  before invoking a syscall
86  * @regs:	Pointer to currents pt_regs
87  * @syscall:	The syscall number
88  *
89  * Invoked from architecture specific syscall entry code with interrupts
90  * disabled. The calling code has to be non-instrumentable. When the
91  * function returns all state is correct, interrupts are enabled and the
92  * subsequent functions can be instrumented.
93  *
94  * This is the combination of enter_from_user_mode() and
95  * syscall_enter_from_user_mode_work() to be used when there is no
96  * architecture specific work to be done between the two.
97  *
98  * Returns: The original or a modified syscall number. See
99  * syscall_enter_from_user_mode_work() for further explanation.
100  */
101 static __always_inline long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall)
102 {
103 	long ret;
104 
105 	enter_from_user_mode(regs);
106 
107 	instrumentation_begin();
108 	local_irq_enable();
109 	ret = syscall_enter_from_user_mode_work(regs, syscall);
110 	instrumentation_end();
111 
112 	return ret;
113 }
114 
115 /**
116  * syscall_exit_work - Handle work before returning to user mode
117  * @regs:	Pointer to current pt_regs
118  * @work:	Current thread syscall work
119  *
120  * Do one-time syscall specific work.
121  */
122 void syscall_exit_work(struct pt_regs *regs, unsigned long work);
123 
124 /**
125  * syscall_exit_to_user_mode_work - Handle work before returning to user mode
126  * @regs:	Pointer to currents pt_regs
127  *
128  * Same as step 1 and 2 of syscall_exit_to_user_mode() but without calling
129  * exit_to_user_mode() to perform the final transition to user mode.
130  *
131  * Calling convention is the same as for syscall_exit_to_user_mode() and it
132  * returns with all work handled and interrupts disabled. The caller must
133  * invoke exit_to_user_mode() before actually switching to user mode to
134  * make the final state transitions. Interrupts must stay disabled between
135  * return from this function and the invocation of exit_to_user_mode().
136  */
137 static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs)
138 {
139 	unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
140 	unsigned long nr = syscall_get_nr(current, regs);
141 
142 	CT_WARN_ON(ct_state() != CT_STATE_KERNEL);
143 
144 	if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
145 		if (WARN(irqs_disabled(), "syscall %lu left IRQs disabled", nr))
146 			local_irq_enable();
147 	}
148 
149 	rseq_debug_syscall_return(regs);
150 
151 	/*
152 	 * Do one-time syscall specific work. If these work items are
153 	 * enabled, we want to run them exactly once per syscall exit with
154 	 * interrupts enabled.
155 	 */
156 	if (unlikely(work & SYSCALL_WORK_EXIT))
157 		syscall_exit_work(regs, work);
158 	local_irq_disable_exit_to_user();
159 	syscall_exit_to_user_mode_prepare(regs);
160 }
161 
162 /**
163  * syscall_exit_to_user_mode - Handle work before returning to user mode
164  * @regs:	Pointer to currents pt_regs
165  *
166  * Invoked with interrupts enabled and fully valid regs. Returns with all
167  * work handled, interrupts disabled such that the caller can immediately
168  * switch to user mode. Called from architecture specific syscall and ret
169  * from fork code.
170  *
171  * The call order is:
172  *  1) One-time syscall exit work:
173  *	- rseq syscall exit
174  *      - audit
175  *	- syscall tracing
176  *	- ptrace (single stepping)
177  *
178  *  2) Preparatory work
179  *	- Exit to user mode loop (common TIF handling). Invokes
180  *	  arch_exit_to_user_mode_work() for architecture specific TIF work
181  *	- Architecture specific one time work arch_exit_to_user_mode_prepare()
182  *	- Address limit and lockdep checks
183  *
184  *  3) Final transition (lockdep, tracing, context tracking, RCU), i.e. the
185  *     functionality in exit_to_user_mode().
186  *
187  * This is a combination of syscall_exit_to_user_mode_work() (1,2) and
188  * exit_to_user_mode(). This function is preferred unless there is a
189  * compelling architectural reason to use the separate functions.
190  */
191 static __always_inline void syscall_exit_to_user_mode(struct pt_regs *regs)
192 {
193 	instrumentation_begin();
194 	syscall_exit_to_user_mode_work(regs);
195 	instrumentation_end();
196 	exit_to_user_mode();
197 }
198 
199 #endif
200