xref: /linux/include/linux/entry-common.h (revision 4fd18fc38757217c746aa063ba9e4729814dc737)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef __LINUX_ENTRYCOMMON_H
3 #define __LINUX_ENTRYCOMMON_H
4 
5 #include <linux/tracehook.h>
6 #include <linux/syscalls.h>
7 #include <linux/seccomp.h>
8 #include <linux/sched.h>
9 
10 #include <asm/entry-common.h>
11 
12 /*
13  * Define dummy _TIF work flags if not defined by the architecture or for
14  * disabled functionality.
15  */
16 #ifndef _TIF_PATCH_PENDING
17 # define _TIF_PATCH_PENDING		(0)
18 #endif
19 
20 #ifndef _TIF_UPROBE
21 # define _TIF_UPROBE			(0)
22 #endif
23 
24 /*
25  * SYSCALL_WORK flags handled in syscall_enter_from_user_mode()
26  */
27 #ifndef ARCH_SYSCALL_WORK_ENTER
28 # define ARCH_SYSCALL_WORK_ENTER	(0)
29 #endif
30 
31 /*
32  * SYSCALL_WORK flags handled in syscall_exit_to_user_mode()
33  */
34 #ifndef ARCH_SYSCALL_WORK_EXIT
35 # define ARCH_SYSCALL_WORK_EXIT		(0)
36 #endif
37 
38 #define SYSCALL_WORK_ENTER	(SYSCALL_WORK_SECCOMP |			\
39 				 SYSCALL_WORK_SYSCALL_TRACEPOINT |	\
40 				 SYSCALL_WORK_SYSCALL_TRACE |		\
41 				 SYSCALL_WORK_SYSCALL_EMU |		\
42 				 SYSCALL_WORK_SYSCALL_AUDIT |		\
43 				 SYSCALL_WORK_SYSCALL_USER_DISPATCH |	\
44 				 ARCH_SYSCALL_WORK_ENTER)
45 #define SYSCALL_WORK_EXIT	(SYSCALL_WORK_SYSCALL_TRACEPOINT |	\
46 				 SYSCALL_WORK_SYSCALL_TRACE |		\
47 				 SYSCALL_WORK_SYSCALL_AUDIT |		\
48 				 SYSCALL_WORK_SYSCALL_USER_DISPATCH |	\
49 				 ARCH_SYSCALL_WORK_EXIT)
50 
51 /*
52  * TIF flags handled in exit_to_user_mode_loop()
53  */
54 #ifndef ARCH_EXIT_TO_USER_MODE_WORK
55 # define ARCH_EXIT_TO_USER_MODE_WORK		(0)
56 #endif
57 
58 #define EXIT_TO_USER_MODE_WORK						\
59 	(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE |		\
60 	 _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL |	\
61 	 ARCH_EXIT_TO_USER_MODE_WORK)
62 
63 /**
64  * arch_check_user_regs - Architecture specific sanity check for user mode regs
65  * @regs:	Pointer to currents pt_regs
66  *
67  * Defaults to an empty implementation. Can be replaced by architecture
68  * specific code.
69  *
70  * Invoked from syscall_enter_from_user_mode() in the non-instrumentable
71  * section. Use __always_inline so the compiler cannot push it out of line
72  * and make it instrumentable.
73  */
74 static __always_inline void arch_check_user_regs(struct pt_regs *regs);
75 
76 #ifndef arch_check_user_regs
77 static __always_inline void arch_check_user_regs(struct pt_regs *regs) {}
78 #endif
79 
80 /**
81  * arch_syscall_enter_tracehook - Wrapper around tracehook_report_syscall_entry()
82  * @regs:	Pointer to currents pt_regs
83  *
84  * Returns: 0 on success or an error code to skip the syscall.
85  *
86  * Defaults to tracehook_report_syscall_entry(). Can be replaced by
87  * architecture specific code.
88  *
89  * Invoked from syscall_enter_from_user_mode()
90  */
91 static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs);
92 
93 #ifndef arch_syscall_enter_tracehook
94 static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs)
95 {
96 	return tracehook_report_syscall_entry(regs);
97 }
98 #endif
99 
100 /**
101  * enter_from_user_mode - Establish state when coming from user mode
102  *
103  * Syscall/interrupt entry disables interrupts, but user mode is traced as
104  * interrupts enabled. Also with NO_HZ_FULL RCU might be idle.
105  *
106  * 1) Tell lockdep that interrupts are disabled
107  * 2) Invoke context tracking if enabled to reactivate RCU
108  * 3) Trace interrupts off state
109  *
110  * Invoked from architecture specific syscall entry code with interrupts
111  * disabled. The calling code has to be non-instrumentable. When the
112  * function returns all state is correct and interrupts are still
113  * disabled. The subsequent functions can be instrumented.
114  *
115  * This is invoked when there is architecture specific functionality to be
116  * done between establishing state and enabling interrupts. The caller must
117  * enable interrupts before invoking syscall_enter_from_user_mode_work().
118  */
119 void enter_from_user_mode(struct pt_regs *regs);
120 
121 /**
122  * syscall_enter_from_user_mode_prepare - Establish state and enable interrupts
123  * @regs:	Pointer to currents pt_regs
124  *
125  * Invoked from architecture specific syscall entry code with interrupts
126  * disabled. The calling code has to be non-instrumentable. When the
127  * function returns all state is correct, interrupts are enabled and the
128  * subsequent functions can be instrumented.
129  *
130  * This handles lockdep, RCU (context tracking) and tracing state, i.e.
131  * the functionality provided by enter_from_user_mode().
132  *
133  * This is invoked when there is extra architecture specific functionality
134  * to be done between establishing state and handling user mode entry work.
135  */
136 void syscall_enter_from_user_mode_prepare(struct pt_regs *regs);
137 
138 /**
139  * syscall_enter_from_user_mode_work - Check and handle work before invoking
140  *				       a syscall
141  * @regs:	Pointer to currents pt_regs
142  * @syscall:	The syscall number
143  *
144  * Invoked from architecture specific syscall entry code with interrupts
145  * enabled after invoking syscall_enter_from_user_mode_prepare() and extra
146  * architecture specific work.
147  *
148  * Returns: The original or a modified syscall number
149  *
150  * If the returned syscall number is -1 then the syscall should be
151  * skipped. In this case the caller may invoke syscall_set_error() or
152  * syscall_set_return_value() first.  If neither of those are called and -1
153  * is returned, then the syscall will fail with ENOSYS.
154  *
155  * It handles the following work items:
156  *
157  *  1) syscall_work flag dependent invocations of
158  *     arch_syscall_enter_tracehook(), __secure_computing(), trace_sys_enter()
159  *  2) Invocation of audit_syscall_entry()
160  */
161 long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall);
162 
163 /**
164  * syscall_enter_from_user_mode - Establish state and check and handle work
165  *				  before invoking a syscall
166  * @regs:	Pointer to currents pt_regs
167  * @syscall:	The syscall number
168  *
169  * Invoked from architecture specific syscall entry code with interrupts
170  * disabled. The calling code has to be non-instrumentable. When the
171  * function returns all state is correct, interrupts are enabled and the
172  * subsequent functions can be instrumented.
173  *
174  * This is combination of syscall_enter_from_user_mode_prepare() and
175  * syscall_enter_from_user_mode_work().
176  *
177  * Returns: The original or a modified syscall number. See
178  * syscall_enter_from_user_mode_work() for further explanation.
179  */
180 long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall);
181 
182 /**
183  * local_irq_enable_exit_to_user - Exit to user variant of local_irq_enable()
184  * @ti_work:	Cached TIF flags gathered with interrupts disabled
185  *
186  * Defaults to local_irq_enable(). Can be supplied by architecture specific
187  * code.
188  */
189 static inline void local_irq_enable_exit_to_user(unsigned long ti_work);
190 
191 #ifndef local_irq_enable_exit_to_user
192 static inline void local_irq_enable_exit_to_user(unsigned long ti_work)
193 {
194 	local_irq_enable();
195 }
196 #endif
197 
198 /**
199  * local_irq_disable_exit_to_user - Exit to user variant of local_irq_disable()
200  *
201  * Defaults to local_irq_disable(). Can be supplied by architecture specific
202  * code.
203  */
204 static inline void local_irq_disable_exit_to_user(void);
205 
206 #ifndef local_irq_disable_exit_to_user
207 static inline void local_irq_disable_exit_to_user(void)
208 {
209 	local_irq_disable();
210 }
211 #endif
212 
213 /**
214  * arch_exit_to_user_mode_work - Architecture specific TIF work for exit
215  *				 to user mode.
216  * @regs:	Pointer to currents pt_regs
217  * @ti_work:	Cached TIF flags gathered with interrupts disabled
218  *
219  * Invoked from exit_to_user_mode_loop() with interrupt enabled
220  *
221  * Defaults to NOOP. Can be supplied by architecture specific code.
222  */
223 static inline void arch_exit_to_user_mode_work(struct pt_regs *regs,
224 					       unsigned long ti_work);
225 
226 #ifndef arch_exit_to_user_mode_work
227 static inline void arch_exit_to_user_mode_work(struct pt_regs *regs,
228 					       unsigned long ti_work)
229 {
230 }
231 #endif
232 
233 /**
234  * arch_exit_to_user_mode_prepare - Architecture specific preparation for
235  *				    exit to user mode.
236  * @regs:	Pointer to currents pt_regs
237  * @ti_work:	Cached TIF flags gathered with interrupts disabled
238  *
239  * Invoked from exit_to_user_mode_prepare() with interrupt disabled as the last
240  * function before return. Defaults to NOOP.
241  */
242 static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
243 						  unsigned long ti_work);
244 
245 #ifndef arch_exit_to_user_mode_prepare
246 static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
247 						  unsigned long ti_work)
248 {
249 }
250 #endif
251 
252 /**
253  * arch_exit_to_user_mode - Architecture specific final work before
254  *			    exit to user mode.
255  *
256  * Invoked from exit_to_user_mode() with interrupt disabled as the last
257  * function before return. Defaults to NOOP.
258  *
259  * This needs to be __always_inline because it is non-instrumentable code
260  * invoked after context tracking switched to user mode.
261  *
262  * An architecture implementation must not do anything complex, no locking
263  * etc. The main purpose is for speculation mitigations.
264  */
265 static __always_inline void arch_exit_to_user_mode(void);
266 
267 #ifndef arch_exit_to_user_mode
268 static __always_inline void arch_exit_to_user_mode(void) { }
269 #endif
270 
271 /**
272  * arch_do_signal_or_restart -  Architecture specific signal delivery function
273  * @regs:	Pointer to currents pt_regs
274  * @has_signal:	actual signal to handle
275  *
276  * Invoked from exit_to_user_mode_loop().
277  */
278 void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal);
279 
280 /**
281  * arch_syscall_exit_tracehook - Wrapper around tracehook_report_syscall_exit()
282  * @regs:	Pointer to currents pt_regs
283  * @step:	Indicator for single step
284  *
285  * Defaults to tracehook_report_syscall_exit(). Can be replaced by
286  * architecture specific code.
287  *
288  * Invoked from syscall_exit_to_user_mode()
289  */
290 static inline void arch_syscall_exit_tracehook(struct pt_regs *regs, bool step);
291 
292 #ifndef arch_syscall_exit_tracehook
293 static inline void arch_syscall_exit_tracehook(struct pt_regs *regs, bool step)
294 {
295 	tracehook_report_syscall_exit(regs, step);
296 }
297 #endif
298 
299 /**
300  * exit_to_user_mode - Fixup state when exiting to user mode
301  *
302  * Syscall/interrupt exit enables interrupts, but the kernel state is
303  * interrupts disabled when this is invoked. Also tell RCU about it.
304  *
305  * 1) Trace interrupts on state
306  * 2) Invoke context tracking if enabled to adjust RCU state
307  * 3) Invoke architecture specific last minute exit code, e.g. speculation
308  *    mitigations, etc.: arch_exit_to_user_mode()
309  * 4) Tell lockdep that interrupts are enabled
310  *
311  * Invoked from architecture specific code when syscall_exit_to_user_mode()
312  * is not suitable as the last step before returning to userspace. Must be
313  * invoked with interrupts disabled and the caller must be
314  * non-instrumentable.
315  * The caller has to invoke syscall_exit_to_user_mode_work() before this.
316  */
317 void exit_to_user_mode(void);
318 
319 /**
320  * syscall_exit_to_user_mode_work - Handle work before returning to user mode
321  * @regs:	Pointer to currents pt_regs
322  *
323  * Same as step 1 and 2 of syscall_exit_to_user_mode() but without calling
324  * exit_to_user_mode() to perform the final transition to user mode.
325  *
326  * Calling convention is the same as for syscall_exit_to_user_mode() and it
327  * returns with all work handled and interrupts disabled. The caller must
328  * invoke exit_to_user_mode() before actually switching to user mode to
329  * make the final state transitions. Interrupts must stay disabled between
330  * return from this function and the invocation of exit_to_user_mode().
331  */
332 void syscall_exit_to_user_mode_work(struct pt_regs *regs);
333 
334 /**
335  * syscall_exit_to_user_mode - Handle work before returning to user mode
336  * @regs:	Pointer to currents pt_regs
337  *
338  * Invoked with interrupts enabled and fully valid regs. Returns with all
339  * work handled, interrupts disabled such that the caller can immediately
340  * switch to user mode. Called from architecture specific syscall and ret
341  * from fork code.
342  *
343  * The call order is:
344  *  1) One-time syscall exit work:
345  *	- rseq syscall exit
346  *      - audit
347  *	- syscall tracing
348  *	- tracehook (single stepping)
349  *
350  *  2) Preparatory work
351  *	- Exit to user mode loop (common TIF handling). Invokes
352  *	  arch_exit_to_user_mode_work() for architecture specific TIF work
353  *	- Architecture specific one time work arch_exit_to_user_mode_prepare()
354  *	- Address limit and lockdep checks
355  *
356  *  3) Final transition (lockdep, tracing, context tracking, RCU), i.e. the
357  *     functionality in exit_to_user_mode().
358  *
359  * This is a combination of syscall_exit_to_user_mode_work() (1,2) and
360  * exit_to_user_mode(). This function is preferred unless there is a
361  * compelling architectural reason to use the seperate functions.
362  */
363 void syscall_exit_to_user_mode(struct pt_regs *regs);
364 
365 /**
366  * irqentry_enter_from_user_mode - Establish state before invoking the irq handler
367  * @regs:	Pointer to currents pt_regs
368  *
369  * Invoked from architecture specific entry code with interrupts disabled.
370  * Can only be called when the interrupt entry came from user mode. The
371  * calling code must be non-instrumentable.  When the function returns all
372  * state is correct and the subsequent functions can be instrumented.
373  *
374  * The function establishes state (lockdep, RCU (context tracking), tracing)
375  */
376 void irqentry_enter_from_user_mode(struct pt_regs *regs);
377 
378 /**
379  * irqentry_exit_to_user_mode - Interrupt exit work
380  * @regs:	Pointer to current's pt_regs
381  *
382  * Invoked with interrupts disbled and fully valid regs. Returns with all
383  * work handled, interrupts disabled such that the caller can immediately
384  * switch to user mode. Called from architecture specific interrupt
385  * handling code.
386  *
387  * The call order is #2 and #3 as described in syscall_exit_to_user_mode().
388  * Interrupt exit is not invoking #1 which is the syscall specific one time
389  * work.
390  */
391 void irqentry_exit_to_user_mode(struct pt_regs *regs);
392 
393 #ifndef irqentry_state
394 /**
395  * struct irqentry_state - Opaque object for exception state storage
396  * @exit_rcu: Used exclusively in the irqentry_*() calls; signals whether the
397  *            exit path has to invoke rcu_irq_exit().
398  * @lockdep: Used exclusively in the irqentry_nmi_*() calls; ensures that
399  *           lockdep state is restored correctly on exit from nmi.
400  *
401  * This opaque object is filled in by the irqentry_*_enter() functions and
402  * must be passed back into the corresponding irqentry_*_exit() functions
403  * when the exception is complete.
404  *
405  * Callers of irqentry_*_[enter|exit]() must consider this structure opaque
406  * and all members private.  Descriptions of the members are provided to aid in
407  * the maintenance of the irqentry_*() functions.
408  */
409 typedef struct irqentry_state {
410 	union {
411 		bool	exit_rcu;
412 		bool	lockdep;
413 	};
414 } irqentry_state_t;
415 #endif
416 
417 /**
418  * irqentry_enter - Handle state tracking on ordinary interrupt entries
419  * @regs:	Pointer to pt_regs of interrupted context
420  *
421  * Invokes:
422  *  - lockdep irqflag state tracking as low level ASM entry disabled
423  *    interrupts.
424  *
425  *  - Context tracking if the exception hit user mode.
426  *
427  *  - The hardirq tracer to keep the state consistent as low level ASM
428  *    entry disabled interrupts.
429  *
430  * As a precondition, this requires that the entry came from user mode,
431  * idle, or a kernel context in which RCU is watching.
432  *
433  * For kernel mode entries RCU handling is done conditional. If RCU is
434  * watching then the only RCU requirement is to check whether the tick has
435  * to be restarted. If RCU is not watching then rcu_irq_enter() has to be
436  * invoked on entry and rcu_irq_exit() on exit.
437  *
438  * Avoiding the rcu_irq_enter/exit() calls is an optimization but also
439  * solves the problem of kernel mode pagefaults which can schedule, which
440  * is not possible after invoking rcu_irq_enter() without undoing it.
441  *
442  * For user mode entries irqentry_enter_from_user_mode() is invoked to
443  * establish the proper context for NOHZ_FULL. Otherwise scheduling on exit
444  * would not be possible.
445  *
446  * Returns: An opaque object that must be passed to idtentry_exit()
447  */
448 irqentry_state_t noinstr irqentry_enter(struct pt_regs *regs);
449 
450 /**
451  * irqentry_exit_cond_resched - Conditionally reschedule on return from interrupt
452  *
453  * Conditional reschedule with additional sanity checks.
454  */
455 void irqentry_exit_cond_resched(void);
456 
457 /**
458  * irqentry_exit - Handle return from exception that used irqentry_enter()
459  * @regs:	Pointer to pt_regs (exception entry regs)
460  * @state:	Return value from matching call to irqentry_enter()
461  *
462  * Depending on the return target (kernel/user) this runs the necessary
463  * preemption and work checks if possible and required and returns to
464  * the caller with interrupts disabled and no further work pending.
465  *
466  * This is the last action before returning to the low level ASM code which
467  * just needs to return to the appropriate context.
468  *
469  * Counterpart to irqentry_enter().
470  */
471 void noinstr irqentry_exit(struct pt_regs *regs, irqentry_state_t state);
472 
473 /**
474  * irqentry_nmi_enter - Handle NMI entry
475  * @regs:	Pointer to currents pt_regs
476  *
477  * Similar to irqentry_enter() but taking care of the NMI constraints.
478  */
479 irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs);
480 
481 /**
482  * irqentry_nmi_exit - Handle return from NMI handling
483  * @regs:	Pointer to pt_regs (NMI entry regs)
484  * @irq_state:	Return value from matching call to irqentry_nmi_enter()
485  *
486  * Last action before returning to the low level assembly code.
487  *
488  * Counterpart to irqentry_nmi_enter().
489  */
490 void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state);
491 
492 #endif
493