1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef __LINUX_ENTRYCOMMON_H 3 #define __LINUX_ENTRYCOMMON_H 4 5 #include <linux/static_call_types.h> 6 #include <linux/ptrace.h> 7 #include <linux/syscalls.h> 8 #include <linux/seccomp.h> 9 #include <linux/sched.h> 10 11 #include <asm/entry-common.h> 12 13 /* 14 * Define dummy _TIF work flags if not defined by the architecture or for 15 * disabled functionality. 16 */ 17 #ifndef _TIF_PATCH_PENDING 18 # define _TIF_PATCH_PENDING (0) 19 #endif 20 21 #ifndef _TIF_UPROBE 22 # define _TIF_UPROBE (0) 23 #endif 24 25 /* 26 * SYSCALL_WORK flags handled in syscall_enter_from_user_mode() 27 */ 28 #ifndef ARCH_SYSCALL_WORK_ENTER 29 # define ARCH_SYSCALL_WORK_ENTER (0) 30 #endif 31 32 /* 33 * SYSCALL_WORK flags handled in syscall_exit_to_user_mode() 34 */ 35 #ifndef ARCH_SYSCALL_WORK_EXIT 36 # define ARCH_SYSCALL_WORK_EXIT (0) 37 #endif 38 39 #define SYSCALL_WORK_ENTER (SYSCALL_WORK_SECCOMP | \ 40 SYSCALL_WORK_SYSCALL_TRACEPOINT | \ 41 SYSCALL_WORK_SYSCALL_TRACE | \ 42 SYSCALL_WORK_SYSCALL_EMU | \ 43 SYSCALL_WORK_SYSCALL_AUDIT | \ 44 SYSCALL_WORK_SYSCALL_USER_DISPATCH | \ 45 ARCH_SYSCALL_WORK_ENTER) 46 #define SYSCALL_WORK_EXIT (SYSCALL_WORK_SYSCALL_TRACEPOINT | \ 47 SYSCALL_WORK_SYSCALL_TRACE | \ 48 SYSCALL_WORK_SYSCALL_AUDIT | \ 49 SYSCALL_WORK_SYSCALL_USER_DISPATCH | \ 50 SYSCALL_WORK_SYSCALL_EXIT_TRAP | \ 51 ARCH_SYSCALL_WORK_EXIT) 52 53 /* 54 * TIF flags handled in exit_to_user_mode_loop() 55 */ 56 #ifndef ARCH_EXIT_TO_USER_MODE_WORK 57 # define ARCH_EXIT_TO_USER_MODE_WORK (0) 58 #endif 59 60 #define EXIT_TO_USER_MODE_WORK \ 61 (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ 62 _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \ 63 ARCH_EXIT_TO_USER_MODE_WORK) 64 65 /** 66 * arch_enter_from_user_mode - Architecture specific sanity check for user mode regs 67 * @regs: Pointer to currents pt_regs 68 * 69 * Defaults to an empty implementation. Can be replaced by architecture 70 * specific code. 71 * 72 * Invoked from syscall_enter_from_user_mode() in the non-instrumentable 73 * section. Use __always_inline so the compiler cannot push it out of line 74 * and make it instrumentable. 75 */ 76 static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs); 77 78 #ifndef arch_enter_from_user_mode 79 static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs) {} 80 #endif 81 82 /** 83 * enter_from_user_mode - Establish state when coming from user mode 84 * 85 * Syscall/interrupt entry disables interrupts, but user mode is traced as 86 * interrupts enabled. Also with NO_HZ_FULL RCU might be idle. 87 * 88 * 1) Tell lockdep that interrupts are disabled 89 * 2) Invoke context tracking if enabled to reactivate RCU 90 * 3) Trace interrupts off state 91 * 92 * Invoked from architecture specific syscall entry code with interrupts 93 * disabled. The calling code has to be non-instrumentable. When the 94 * function returns all state is correct and interrupts are still 95 * disabled. The subsequent functions can be instrumented. 96 * 97 * This is invoked when there is architecture specific functionality to be 98 * done between establishing state and enabling interrupts. The caller must 99 * enable interrupts before invoking syscall_enter_from_user_mode_work(). 100 */ 101 void enter_from_user_mode(struct pt_regs *regs); 102 103 /** 104 * syscall_enter_from_user_mode_prepare - Establish state and enable interrupts 105 * @regs: Pointer to currents pt_regs 106 * 107 * Invoked from architecture specific syscall entry code with interrupts 108 * disabled. The calling code has to be non-instrumentable. When the 109 * function returns all state is correct, interrupts are enabled and the 110 * subsequent functions can be instrumented. 111 * 112 * This handles lockdep, RCU (context tracking) and tracing state, i.e. 113 * the functionality provided by enter_from_user_mode(). 114 * 115 * This is invoked when there is extra architecture specific functionality 116 * to be done between establishing state and handling user mode entry work. 117 */ 118 void syscall_enter_from_user_mode_prepare(struct pt_regs *regs); 119 120 /** 121 * syscall_enter_from_user_mode_work - Check and handle work before invoking 122 * a syscall 123 * @regs: Pointer to currents pt_regs 124 * @syscall: The syscall number 125 * 126 * Invoked from architecture specific syscall entry code with interrupts 127 * enabled after invoking syscall_enter_from_user_mode_prepare() and extra 128 * architecture specific work. 129 * 130 * Returns: The original or a modified syscall number 131 * 132 * If the returned syscall number is -1 then the syscall should be 133 * skipped. In this case the caller may invoke syscall_set_error() or 134 * syscall_set_return_value() first. If neither of those are called and -1 135 * is returned, then the syscall will fail with ENOSYS. 136 * 137 * It handles the following work items: 138 * 139 * 1) syscall_work flag dependent invocations of 140 * ptrace_report_syscall_entry(), __secure_computing(), trace_sys_enter() 141 * 2) Invocation of audit_syscall_entry() 142 */ 143 long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall); 144 145 /** 146 * syscall_enter_from_user_mode - Establish state and check and handle work 147 * before invoking a syscall 148 * @regs: Pointer to currents pt_regs 149 * @syscall: The syscall number 150 * 151 * Invoked from architecture specific syscall entry code with interrupts 152 * disabled. The calling code has to be non-instrumentable. When the 153 * function returns all state is correct, interrupts are enabled and the 154 * subsequent functions can be instrumented. 155 * 156 * This is combination of syscall_enter_from_user_mode_prepare() and 157 * syscall_enter_from_user_mode_work(). 158 * 159 * Returns: The original or a modified syscall number. See 160 * syscall_enter_from_user_mode_work() for further explanation. 161 */ 162 long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall); 163 164 /** 165 * local_irq_enable_exit_to_user - Exit to user variant of local_irq_enable() 166 * @ti_work: Cached TIF flags gathered with interrupts disabled 167 * 168 * Defaults to local_irq_enable(). Can be supplied by architecture specific 169 * code. 170 */ 171 static inline void local_irq_enable_exit_to_user(unsigned long ti_work); 172 173 #ifndef local_irq_enable_exit_to_user 174 static inline void local_irq_enable_exit_to_user(unsigned long ti_work) 175 { 176 local_irq_enable(); 177 } 178 #endif 179 180 /** 181 * local_irq_disable_exit_to_user - Exit to user variant of local_irq_disable() 182 * 183 * Defaults to local_irq_disable(). Can be supplied by architecture specific 184 * code. 185 */ 186 static inline void local_irq_disable_exit_to_user(void); 187 188 #ifndef local_irq_disable_exit_to_user 189 static inline void local_irq_disable_exit_to_user(void) 190 { 191 local_irq_disable(); 192 } 193 #endif 194 195 /** 196 * arch_exit_to_user_mode_work - Architecture specific TIF work for exit 197 * to user mode. 198 * @regs: Pointer to currents pt_regs 199 * @ti_work: Cached TIF flags gathered with interrupts disabled 200 * 201 * Invoked from exit_to_user_mode_loop() with interrupt enabled 202 * 203 * Defaults to NOOP. Can be supplied by architecture specific code. 204 */ 205 static inline void arch_exit_to_user_mode_work(struct pt_regs *regs, 206 unsigned long ti_work); 207 208 #ifndef arch_exit_to_user_mode_work 209 static inline void arch_exit_to_user_mode_work(struct pt_regs *regs, 210 unsigned long ti_work) 211 { 212 } 213 #endif 214 215 /** 216 * arch_exit_to_user_mode_prepare - Architecture specific preparation for 217 * exit to user mode. 218 * @regs: Pointer to currents pt_regs 219 * @ti_work: Cached TIF flags gathered with interrupts disabled 220 * 221 * Invoked from exit_to_user_mode_prepare() with interrupt disabled as the last 222 * function before return. Defaults to NOOP. 223 */ 224 static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, 225 unsigned long ti_work); 226 227 #ifndef arch_exit_to_user_mode_prepare 228 static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, 229 unsigned long ti_work) 230 { 231 } 232 #endif 233 234 /** 235 * arch_exit_to_user_mode - Architecture specific final work before 236 * exit to user mode. 237 * 238 * Invoked from exit_to_user_mode() with interrupt disabled as the last 239 * function before return. Defaults to NOOP. 240 * 241 * This needs to be __always_inline because it is non-instrumentable code 242 * invoked after context tracking switched to user mode. 243 * 244 * An architecture implementation must not do anything complex, no locking 245 * etc. The main purpose is for speculation mitigations. 246 */ 247 static __always_inline void arch_exit_to_user_mode(void); 248 249 #ifndef arch_exit_to_user_mode 250 static __always_inline void arch_exit_to_user_mode(void) { } 251 #endif 252 253 /** 254 * arch_do_signal_or_restart - Architecture specific signal delivery function 255 * @regs: Pointer to currents pt_regs 256 * 257 * Invoked from exit_to_user_mode_loop(). 258 */ 259 void arch_do_signal_or_restart(struct pt_regs *regs); 260 261 /** 262 * exit_to_user_mode - Fixup state when exiting to user mode 263 * 264 * Syscall/interrupt exit enables interrupts, but the kernel state is 265 * interrupts disabled when this is invoked. Also tell RCU about it. 266 * 267 * 1) Trace interrupts on state 268 * 2) Invoke context tracking if enabled to adjust RCU state 269 * 3) Invoke architecture specific last minute exit code, e.g. speculation 270 * mitigations, etc.: arch_exit_to_user_mode() 271 * 4) Tell lockdep that interrupts are enabled 272 * 273 * Invoked from architecture specific code when syscall_exit_to_user_mode() 274 * is not suitable as the last step before returning to userspace. Must be 275 * invoked with interrupts disabled and the caller must be 276 * non-instrumentable. 277 * The caller has to invoke syscall_exit_to_user_mode_work() before this. 278 */ 279 void exit_to_user_mode(void); 280 281 /** 282 * syscall_exit_to_user_mode_work - Handle work before returning to user mode 283 * @regs: Pointer to currents pt_regs 284 * 285 * Same as step 1 and 2 of syscall_exit_to_user_mode() but without calling 286 * exit_to_user_mode() to perform the final transition to user mode. 287 * 288 * Calling convention is the same as for syscall_exit_to_user_mode() and it 289 * returns with all work handled and interrupts disabled. The caller must 290 * invoke exit_to_user_mode() before actually switching to user mode to 291 * make the final state transitions. Interrupts must stay disabled between 292 * return from this function and the invocation of exit_to_user_mode(). 293 */ 294 void syscall_exit_to_user_mode_work(struct pt_regs *regs); 295 296 /** 297 * syscall_exit_to_user_mode - Handle work before returning to user mode 298 * @regs: Pointer to currents pt_regs 299 * 300 * Invoked with interrupts enabled and fully valid regs. Returns with all 301 * work handled, interrupts disabled such that the caller can immediately 302 * switch to user mode. Called from architecture specific syscall and ret 303 * from fork code. 304 * 305 * The call order is: 306 * 1) One-time syscall exit work: 307 * - rseq syscall exit 308 * - audit 309 * - syscall tracing 310 * - ptrace (single stepping) 311 * 312 * 2) Preparatory work 313 * - Exit to user mode loop (common TIF handling). Invokes 314 * arch_exit_to_user_mode_work() for architecture specific TIF work 315 * - Architecture specific one time work arch_exit_to_user_mode_prepare() 316 * - Address limit and lockdep checks 317 * 318 * 3) Final transition (lockdep, tracing, context tracking, RCU), i.e. the 319 * functionality in exit_to_user_mode(). 320 * 321 * This is a combination of syscall_exit_to_user_mode_work() (1,2) and 322 * exit_to_user_mode(). This function is preferred unless there is a 323 * compelling architectural reason to use the separate functions. 324 */ 325 void syscall_exit_to_user_mode(struct pt_regs *regs); 326 327 /** 328 * irqentry_enter_from_user_mode - Establish state before invoking the irq handler 329 * @regs: Pointer to currents pt_regs 330 * 331 * Invoked from architecture specific entry code with interrupts disabled. 332 * Can only be called when the interrupt entry came from user mode. The 333 * calling code must be non-instrumentable. When the function returns all 334 * state is correct and the subsequent functions can be instrumented. 335 * 336 * The function establishes state (lockdep, RCU (context tracking), tracing) 337 */ 338 void irqentry_enter_from_user_mode(struct pt_regs *regs); 339 340 /** 341 * irqentry_exit_to_user_mode - Interrupt exit work 342 * @regs: Pointer to current's pt_regs 343 * 344 * Invoked with interrupts disabled and fully valid regs. Returns with all 345 * work handled, interrupts disabled such that the caller can immediately 346 * switch to user mode. Called from architecture specific interrupt 347 * handling code. 348 * 349 * The call order is #2 and #3 as described in syscall_exit_to_user_mode(). 350 * Interrupt exit is not invoking #1 which is the syscall specific one time 351 * work. 352 */ 353 void irqentry_exit_to_user_mode(struct pt_regs *regs); 354 355 #ifndef irqentry_state 356 /** 357 * struct irqentry_state - Opaque object for exception state storage 358 * @exit_rcu: Used exclusively in the irqentry_*() calls; signals whether the 359 * exit path has to invoke ct_irq_exit(). 360 * @lockdep: Used exclusively in the irqentry_nmi_*() calls; ensures that 361 * lockdep state is restored correctly on exit from nmi. 362 * 363 * This opaque object is filled in by the irqentry_*_enter() functions and 364 * must be passed back into the corresponding irqentry_*_exit() functions 365 * when the exception is complete. 366 * 367 * Callers of irqentry_*_[enter|exit]() must consider this structure opaque 368 * and all members private. Descriptions of the members are provided to aid in 369 * the maintenance of the irqentry_*() functions. 370 */ 371 typedef struct irqentry_state { 372 union { 373 bool exit_rcu; 374 bool lockdep; 375 }; 376 } irqentry_state_t; 377 #endif 378 379 /** 380 * irqentry_enter - Handle state tracking on ordinary interrupt entries 381 * @regs: Pointer to pt_regs of interrupted context 382 * 383 * Invokes: 384 * - lockdep irqflag state tracking as low level ASM entry disabled 385 * interrupts. 386 * 387 * - Context tracking if the exception hit user mode. 388 * 389 * - The hardirq tracer to keep the state consistent as low level ASM 390 * entry disabled interrupts. 391 * 392 * As a precondition, this requires that the entry came from user mode, 393 * idle, or a kernel context in which RCU is watching. 394 * 395 * For kernel mode entries RCU handling is done conditional. If RCU is 396 * watching then the only RCU requirement is to check whether the tick has 397 * to be restarted. If RCU is not watching then ct_irq_enter() has to be 398 * invoked on entry and ct_irq_exit() on exit. 399 * 400 * Avoiding the ct_irq_enter/exit() calls is an optimization but also 401 * solves the problem of kernel mode pagefaults which can schedule, which 402 * is not possible after invoking ct_irq_enter() without undoing it. 403 * 404 * For user mode entries irqentry_enter_from_user_mode() is invoked to 405 * establish the proper context for NOHZ_FULL. Otherwise scheduling on exit 406 * would not be possible. 407 * 408 * Returns: An opaque object that must be passed to idtentry_exit() 409 */ 410 irqentry_state_t noinstr irqentry_enter(struct pt_regs *regs); 411 412 /** 413 * irqentry_exit_cond_resched - Conditionally reschedule on return from interrupt 414 * 415 * Conditional reschedule with additional sanity checks. 416 */ 417 void raw_irqentry_exit_cond_resched(void); 418 #ifdef CONFIG_PREEMPT_DYNAMIC 419 #if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) 420 #define irqentry_exit_cond_resched_dynamic_enabled raw_irqentry_exit_cond_resched 421 #define irqentry_exit_cond_resched_dynamic_disabled NULL 422 DECLARE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched); 423 #define irqentry_exit_cond_resched() static_call(irqentry_exit_cond_resched)() 424 #elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) 425 DECLARE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched); 426 void dynamic_irqentry_exit_cond_resched(void); 427 #define irqentry_exit_cond_resched() dynamic_irqentry_exit_cond_resched() 428 #endif 429 #else /* CONFIG_PREEMPT_DYNAMIC */ 430 #define irqentry_exit_cond_resched() raw_irqentry_exit_cond_resched() 431 #endif /* CONFIG_PREEMPT_DYNAMIC */ 432 433 /** 434 * irqentry_exit - Handle return from exception that used irqentry_enter() 435 * @regs: Pointer to pt_regs (exception entry regs) 436 * @state: Return value from matching call to irqentry_enter() 437 * 438 * Depending on the return target (kernel/user) this runs the necessary 439 * preemption and work checks if possible and required and returns to 440 * the caller with interrupts disabled and no further work pending. 441 * 442 * This is the last action before returning to the low level ASM code which 443 * just needs to return to the appropriate context. 444 * 445 * Counterpart to irqentry_enter(). 446 */ 447 void noinstr irqentry_exit(struct pt_regs *regs, irqentry_state_t state); 448 449 /** 450 * irqentry_nmi_enter - Handle NMI entry 451 * @regs: Pointer to currents pt_regs 452 * 453 * Similar to irqentry_enter() but taking care of the NMI constraints. 454 */ 455 irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs); 456 457 /** 458 * irqentry_nmi_exit - Handle return from NMI handling 459 * @regs: Pointer to pt_regs (NMI entry regs) 460 * @irq_state: Return value from matching call to irqentry_nmi_enter() 461 * 462 * Last action before returning to the low level assembly code. 463 * 464 * Counterpart to irqentry_nmi_enter(). 465 */ 466 void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state); 467 468 #endif 469