1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef __LINUX_ENTRYCOMMON_H 3 #define __LINUX_ENTRYCOMMON_H 4 5 #include <linux/static_call_types.h> 6 #include <linux/ptrace.h> 7 #include <linux/syscalls.h> 8 #include <linux/seccomp.h> 9 #include <linux/sched.h> 10 11 #include <asm/entry-common.h> 12 13 /* 14 * Define dummy _TIF work flags if not defined by the architecture or for 15 * disabled functionality. 16 */ 17 #ifndef _TIF_PATCH_PENDING 18 # define _TIF_PATCH_PENDING (0) 19 #endif 20 21 #ifndef _TIF_UPROBE 22 # define _TIF_UPROBE (0) 23 #endif 24 25 /* 26 * SYSCALL_WORK flags handled in syscall_enter_from_user_mode() 27 */ 28 #ifndef ARCH_SYSCALL_WORK_ENTER 29 # define ARCH_SYSCALL_WORK_ENTER (0) 30 #endif 31 32 /* 33 * SYSCALL_WORK flags handled in syscall_exit_to_user_mode() 34 */ 35 #ifndef ARCH_SYSCALL_WORK_EXIT 36 # define ARCH_SYSCALL_WORK_EXIT (0) 37 #endif 38 39 #define SYSCALL_WORK_ENTER (SYSCALL_WORK_SECCOMP | \ 40 SYSCALL_WORK_SYSCALL_TRACEPOINT | \ 41 SYSCALL_WORK_SYSCALL_TRACE | \ 42 SYSCALL_WORK_SYSCALL_EMU | \ 43 SYSCALL_WORK_SYSCALL_AUDIT | \ 44 SYSCALL_WORK_SYSCALL_USER_DISPATCH | \ 45 ARCH_SYSCALL_WORK_ENTER) 46 #define SYSCALL_WORK_EXIT (SYSCALL_WORK_SYSCALL_TRACEPOINT | \ 47 SYSCALL_WORK_SYSCALL_TRACE | \ 48 SYSCALL_WORK_SYSCALL_AUDIT | \ 49 SYSCALL_WORK_SYSCALL_USER_DISPATCH | \ 50 SYSCALL_WORK_SYSCALL_EXIT_TRAP | \ 51 ARCH_SYSCALL_WORK_EXIT) 52 53 /* 54 * TIF flags handled in exit_to_user_mode_loop() 55 */ 56 #ifndef ARCH_EXIT_TO_USER_MODE_WORK 57 # define ARCH_EXIT_TO_USER_MODE_WORK (0) 58 #endif 59 60 #define EXIT_TO_USER_MODE_WORK \ 61 (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ 62 _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \ 63 ARCH_EXIT_TO_USER_MODE_WORK) 64 65 /** 66 * arch_enter_from_user_mode - Architecture specific sanity check for user mode regs 67 * @regs: Pointer to currents pt_regs 68 * 69 * Defaults to an empty implementation. Can be replaced by architecture 70 * specific code. 71 * 72 * Invoked from syscall_enter_from_user_mode() in the non-instrumentable 73 * section. Use __always_inline so the compiler cannot push it out of line 74 * and make it instrumentable. 75 */ 76 static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs); 77 78 #ifndef arch_enter_from_user_mode 79 static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs) {} 80 #endif 81 82 /** 83 * enter_from_user_mode - Establish state when coming from user mode 84 * 85 * Syscall/interrupt entry disables interrupts, but user mode is traced as 86 * interrupts enabled. Also with NO_HZ_FULL RCU might be idle. 87 * 88 * 1) Tell lockdep that interrupts are disabled 89 * 2) Invoke context tracking if enabled to reactivate RCU 90 * 3) Trace interrupts off state 91 * 92 * Invoked from architecture specific syscall entry code with interrupts 93 * disabled. The calling code has to be non-instrumentable. When the 94 * function returns all state is correct and interrupts are still 95 * disabled. The subsequent functions can be instrumented. 96 * 97 * This is invoked when there is architecture specific functionality to be 98 * done between establishing state and enabling interrupts. The caller must 99 * enable interrupts before invoking syscall_enter_from_user_mode_work(). 100 */ 101 void enter_from_user_mode(struct pt_regs *regs); 102 103 /** 104 * syscall_enter_from_user_mode_prepare - Establish state and enable interrupts 105 * @regs: Pointer to currents pt_regs 106 * 107 * Invoked from architecture specific syscall entry code with interrupts 108 * disabled. The calling code has to be non-instrumentable. When the 109 * function returns all state is correct, interrupts are enabled and the 110 * subsequent functions can be instrumented. 111 * 112 * This handles lockdep, RCU (context tracking) and tracing state, i.e. 113 * the functionality provided by enter_from_user_mode(). 114 * 115 * This is invoked when there is extra architecture specific functionality 116 * to be done between establishing state and handling user mode entry work. 117 */ 118 void syscall_enter_from_user_mode_prepare(struct pt_regs *regs); 119 120 /** 121 * syscall_enter_from_user_mode_work - Check and handle work before invoking 122 * a syscall 123 * @regs: Pointer to currents pt_regs 124 * @syscall: The syscall number 125 * 126 * Invoked from architecture specific syscall entry code with interrupts 127 * enabled after invoking syscall_enter_from_user_mode_prepare() and extra 128 * architecture specific work. 129 * 130 * Returns: The original or a modified syscall number 131 * 132 * If the returned syscall number is -1 then the syscall should be 133 * skipped. In this case the caller may invoke syscall_set_error() or 134 * syscall_set_return_value() first. If neither of those are called and -1 135 * is returned, then the syscall will fail with ENOSYS. 136 * 137 * It handles the following work items: 138 * 139 * 1) syscall_work flag dependent invocations of 140 * ptrace_report_syscall_entry(), __secure_computing(), trace_sys_enter() 141 * 2) Invocation of audit_syscall_entry() 142 */ 143 long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall); 144 145 /** 146 * syscall_enter_from_user_mode - Establish state and check and handle work 147 * before invoking a syscall 148 * @regs: Pointer to currents pt_regs 149 * @syscall: The syscall number 150 * 151 * Invoked from architecture specific syscall entry code with interrupts 152 * disabled. The calling code has to be non-instrumentable. When the 153 * function returns all state is correct, interrupts are enabled and the 154 * subsequent functions can be instrumented. 155 * 156 * This is combination of syscall_enter_from_user_mode_prepare() and 157 * syscall_enter_from_user_mode_work(). 158 * 159 * Returns: The original or a modified syscall number. See 160 * syscall_enter_from_user_mode_work() for further explanation. 161 */ 162 long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall); 163 164 /** 165 * local_irq_enable_exit_to_user - Exit to user variant of local_irq_enable() 166 * @ti_work: Cached TIF flags gathered with interrupts disabled 167 * 168 * Defaults to local_irq_enable(). Can be supplied by architecture specific 169 * code. 170 */ 171 static inline void local_irq_enable_exit_to_user(unsigned long ti_work); 172 173 #ifndef local_irq_enable_exit_to_user 174 static inline void local_irq_enable_exit_to_user(unsigned long ti_work) 175 { 176 local_irq_enable(); 177 } 178 #endif 179 180 /** 181 * local_irq_disable_exit_to_user - Exit to user variant of local_irq_disable() 182 * 183 * Defaults to local_irq_disable(). Can be supplied by architecture specific 184 * code. 185 */ 186 static inline void local_irq_disable_exit_to_user(void); 187 188 #ifndef local_irq_disable_exit_to_user 189 static inline void local_irq_disable_exit_to_user(void) 190 { 191 local_irq_disable(); 192 } 193 #endif 194 195 /** 196 * arch_exit_to_user_mode_work - Architecture specific TIF work for exit 197 * to user mode. 198 * @regs: Pointer to currents pt_regs 199 * @ti_work: Cached TIF flags gathered with interrupts disabled 200 * 201 * Invoked from exit_to_user_mode_loop() with interrupt enabled 202 * 203 * Defaults to NOOP. Can be supplied by architecture specific code. 204 */ 205 static inline void arch_exit_to_user_mode_work(struct pt_regs *regs, 206 unsigned long ti_work); 207 208 #ifndef arch_exit_to_user_mode_work 209 static inline void arch_exit_to_user_mode_work(struct pt_regs *regs, 210 unsigned long ti_work) 211 { 212 } 213 #endif 214 215 /** 216 * arch_exit_to_user_mode_prepare - Architecture specific preparation for 217 * exit to user mode. 218 * @regs: Pointer to currents pt_regs 219 * @ti_work: Cached TIF flags gathered with interrupts disabled 220 * 221 * Invoked from exit_to_user_mode_prepare() with interrupt disabled as the last 222 * function before return. Defaults to NOOP. 223 */ 224 static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, 225 unsigned long ti_work); 226 227 #ifndef arch_exit_to_user_mode_prepare 228 static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, 229 unsigned long ti_work) 230 { 231 } 232 #endif 233 234 /** 235 * arch_exit_to_user_mode - Architecture specific final work before 236 * exit to user mode. 237 * 238 * Invoked from exit_to_user_mode() with interrupt disabled as the last 239 * function before return. Defaults to NOOP. 240 * 241 * This needs to be __always_inline because it is non-instrumentable code 242 * invoked after context tracking switched to user mode. 243 * 244 * An architecture implementation must not do anything complex, no locking 245 * etc. The main purpose is for speculation mitigations. 246 */ 247 static __always_inline void arch_exit_to_user_mode(void); 248 249 #ifndef arch_exit_to_user_mode 250 static __always_inline void arch_exit_to_user_mode(void) { } 251 #endif 252 253 /** 254 * arch_do_signal_or_restart - Architecture specific signal delivery function 255 * @regs: Pointer to currents pt_regs 256 * @has_signal: actual signal to handle 257 * 258 * Invoked from exit_to_user_mode_loop(). 259 */ 260 void arch_do_signal_or_restart(struct pt_regs *regs); 261 262 /** 263 * exit_to_user_mode - Fixup state when exiting to user mode 264 * 265 * Syscall/interrupt exit enables interrupts, but the kernel state is 266 * interrupts disabled when this is invoked. Also tell RCU about it. 267 * 268 * 1) Trace interrupts on state 269 * 2) Invoke context tracking if enabled to adjust RCU state 270 * 3) Invoke architecture specific last minute exit code, e.g. speculation 271 * mitigations, etc.: arch_exit_to_user_mode() 272 * 4) Tell lockdep that interrupts are enabled 273 * 274 * Invoked from architecture specific code when syscall_exit_to_user_mode() 275 * is not suitable as the last step before returning to userspace. Must be 276 * invoked with interrupts disabled and the caller must be 277 * non-instrumentable. 278 * The caller has to invoke syscall_exit_to_user_mode_work() before this. 279 */ 280 void exit_to_user_mode(void); 281 282 /** 283 * syscall_exit_to_user_mode_work - Handle work before returning to user mode 284 * @regs: Pointer to currents pt_regs 285 * 286 * Same as step 1 and 2 of syscall_exit_to_user_mode() but without calling 287 * exit_to_user_mode() to perform the final transition to user mode. 288 * 289 * Calling convention is the same as for syscall_exit_to_user_mode() and it 290 * returns with all work handled and interrupts disabled. The caller must 291 * invoke exit_to_user_mode() before actually switching to user mode to 292 * make the final state transitions. Interrupts must stay disabled between 293 * return from this function and the invocation of exit_to_user_mode(). 294 */ 295 void syscall_exit_to_user_mode_work(struct pt_regs *regs); 296 297 /** 298 * syscall_exit_to_user_mode - Handle work before returning to user mode 299 * @regs: Pointer to currents pt_regs 300 * 301 * Invoked with interrupts enabled and fully valid regs. Returns with all 302 * work handled, interrupts disabled such that the caller can immediately 303 * switch to user mode. Called from architecture specific syscall and ret 304 * from fork code. 305 * 306 * The call order is: 307 * 1) One-time syscall exit work: 308 * - rseq syscall exit 309 * - audit 310 * - syscall tracing 311 * - ptrace (single stepping) 312 * 313 * 2) Preparatory work 314 * - Exit to user mode loop (common TIF handling). Invokes 315 * arch_exit_to_user_mode_work() for architecture specific TIF work 316 * - Architecture specific one time work arch_exit_to_user_mode_prepare() 317 * - Address limit and lockdep checks 318 * 319 * 3) Final transition (lockdep, tracing, context tracking, RCU), i.e. the 320 * functionality in exit_to_user_mode(). 321 * 322 * This is a combination of syscall_exit_to_user_mode_work() (1,2) and 323 * exit_to_user_mode(). This function is preferred unless there is a 324 * compelling architectural reason to use the separate functions. 325 */ 326 void syscall_exit_to_user_mode(struct pt_regs *regs); 327 328 /** 329 * irqentry_enter_from_user_mode - Establish state before invoking the irq handler 330 * @regs: Pointer to currents pt_regs 331 * 332 * Invoked from architecture specific entry code with interrupts disabled. 333 * Can only be called when the interrupt entry came from user mode. The 334 * calling code must be non-instrumentable. When the function returns all 335 * state is correct and the subsequent functions can be instrumented. 336 * 337 * The function establishes state (lockdep, RCU (context tracking), tracing) 338 */ 339 void irqentry_enter_from_user_mode(struct pt_regs *regs); 340 341 /** 342 * irqentry_exit_to_user_mode - Interrupt exit work 343 * @regs: Pointer to current's pt_regs 344 * 345 * Invoked with interrupts disabled and fully valid regs. Returns with all 346 * work handled, interrupts disabled such that the caller can immediately 347 * switch to user mode. Called from architecture specific interrupt 348 * handling code. 349 * 350 * The call order is #2 and #3 as described in syscall_exit_to_user_mode(). 351 * Interrupt exit is not invoking #1 which is the syscall specific one time 352 * work. 353 */ 354 void irqentry_exit_to_user_mode(struct pt_regs *regs); 355 356 #ifndef irqentry_state 357 /** 358 * struct irqentry_state - Opaque object for exception state storage 359 * @exit_rcu: Used exclusively in the irqentry_*() calls; signals whether the 360 * exit path has to invoke ct_irq_exit(). 361 * @lockdep: Used exclusively in the irqentry_nmi_*() calls; ensures that 362 * lockdep state is restored correctly on exit from nmi. 363 * 364 * This opaque object is filled in by the irqentry_*_enter() functions and 365 * must be passed back into the corresponding irqentry_*_exit() functions 366 * when the exception is complete. 367 * 368 * Callers of irqentry_*_[enter|exit]() must consider this structure opaque 369 * and all members private. Descriptions of the members are provided to aid in 370 * the maintenance of the irqentry_*() functions. 371 */ 372 typedef struct irqentry_state { 373 union { 374 bool exit_rcu; 375 bool lockdep; 376 }; 377 } irqentry_state_t; 378 #endif 379 380 /** 381 * irqentry_enter - Handle state tracking on ordinary interrupt entries 382 * @regs: Pointer to pt_regs of interrupted context 383 * 384 * Invokes: 385 * - lockdep irqflag state tracking as low level ASM entry disabled 386 * interrupts. 387 * 388 * - Context tracking if the exception hit user mode. 389 * 390 * - The hardirq tracer to keep the state consistent as low level ASM 391 * entry disabled interrupts. 392 * 393 * As a precondition, this requires that the entry came from user mode, 394 * idle, or a kernel context in which RCU is watching. 395 * 396 * For kernel mode entries RCU handling is done conditional. If RCU is 397 * watching then the only RCU requirement is to check whether the tick has 398 * to be restarted. If RCU is not watching then ct_irq_enter() has to be 399 * invoked on entry and ct_irq_exit() on exit. 400 * 401 * Avoiding the ct_irq_enter/exit() calls is an optimization but also 402 * solves the problem of kernel mode pagefaults which can schedule, which 403 * is not possible after invoking ct_irq_enter() without undoing it. 404 * 405 * For user mode entries irqentry_enter_from_user_mode() is invoked to 406 * establish the proper context for NOHZ_FULL. Otherwise scheduling on exit 407 * would not be possible. 408 * 409 * Returns: An opaque object that must be passed to idtentry_exit() 410 */ 411 irqentry_state_t noinstr irqentry_enter(struct pt_regs *regs); 412 413 /** 414 * irqentry_exit_cond_resched - Conditionally reschedule on return from interrupt 415 * 416 * Conditional reschedule with additional sanity checks. 417 */ 418 void raw_irqentry_exit_cond_resched(void); 419 #ifdef CONFIG_PREEMPT_DYNAMIC 420 #if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) 421 #define irqentry_exit_cond_resched_dynamic_enabled raw_irqentry_exit_cond_resched 422 #define irqentry_exit_cond_resched_dynamic_disabled NULL 423 DECLARE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched); 424 #define irqentry_exit_cond_resched() static_call(irqentry_exit_cond_resched)() 425 #elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) 426 DECLARE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched); 427 void dynamic_irqentry_exit_cond_resched(void); 428 #define irqentry_exit_cond_resched() dynamic_irqentry_exit_cond_resched() 429 #endif 430 #else /* CONFIG_PREEMPT_DYNAMIC */ 431 #define irqentry_exit_cond_resched() raw_irqentry_exit_cond_resched() 432 #endif /* CONFIG_PREEMPT_DYNAMIC */ 433 434 /** 435 * irqentry_exit - Handle return from exception that used irqentry_enter() 436 * @regs: Pointer to pt_regs (exception entry regs) 437 * @state: Return value from matching call to irqentry_enter() 438 * 439 * Depending on the return target (kernel/user) this runs the necessary 440 * preemption and work checks if possible and required and returns to 441 * the caller with interrupts disabled and no further work pending. 442 * 443 * This is the last action before returning to the low level ASM code which 444 * just needs to return to the appropriate context. 445 * 446 * Counterpart to irqentry_enter(). 447 */ 448 void noinstr irqentry_exit(struct pt_regs *regs, irqentry_state_t state); 449 450 /** 451 * irqentry_nmi_enter - Handle NMI entry 452 * @regs: Pointer to currents pt_regs 453 * 454 * Similar to irqentry_enter() but taking care of the NMI constraints. 455 */ 456 irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs); 457 458 /** 459 * irqentry_nmi_exit - Handle return from NMI handling 460 * @regs: Pointer to pt_regs (NMI entry regs) 461 * @irq_state: Return value from matching call to irqentry_nmi_enter() 462 * 463 * Last action before returning to the low level assembly code. 464 * 465 * Counterpart to irqentry_nmi_enter(). 466 */ 467 void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state); 468 469 #endif 470