1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef __LINUX_IRQENTRYCOMMON_H 3 #define __LINUX_IRQENTRYCOMMON_H 4 5 #include <linux/context_tracking.h> 6 #include <linux/kmsan.h> 7 #include <linux/rseq_entry.h> 8 #include <linux/static_call_types.h> 9 #include <linux/syscalls.h> 10 #include <linux/tick.h> 11 #include <linux/unwind_deferred.h> 12 13 #include <asm/entry-common.h> 14 15 /* 16 * Define dummy _TIF work flags if not defined by the architecture or for 17 * disabled functionality. 18 */ 19 #ifndef _TIF_PATCH_PENDING 20 # define _TIF_PATCH_PENDING (0) 21 #endif 22 23 /* 24 * TIF flags handled in exit_to_user_mode_loop() 25 */ 26 #ifndef ARCH_EXIT_TO_USER_MODE_WORK 27 # define ARCH_EXIT_TO_USER_MODE_WORK (0) 28 #endif 29 30 #define EXIT_TO_USER_MODE_WORK \ 31 (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ 32 _TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \ 33 _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | _TIF_RSEQ | \ 34 ARCH_EXIT_TO_USER_MODE_WORK) 35 36 /** 37 * arch_enter_from_user_mode - Architecture specific sanity check for user mode regs 38 * @regs: Pointer to currents pt_regs 39 * 40 * Defaults to an empty implementation. Can be replaced by architecture 41 * specific code. 42 * 43 * Invoked from syscall_enter_from_user_mode() in the non-instrumentable 44 * section. Use __always_inline so the compiler cannot push it out of line 45 * and make it instrumentable. 46 */ 47 static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs); 48 49 #ifndef arch_enter_from_user_mode 50 static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs) {} 51 #endif 52 53 /** 54 * arch_in_rcu_eqs - Architecture specific check for RCU extended quiescent 55 * states. 56 * 57 * Returns: true if the CPU is potentially in an RCU EQS, false otherwise. 58 * 59 * Architectures only need to define this if threads other than the idle thread 60 * may have an interruptible EQS. This does not need to handle idle threads. It 61 * is safe to over-estimate at the cost of redundant RCU management work. 62 * 63 * Invoked from irqentry_enter() 64 */ 65 #ifndef arch_in_rcu_eqs 66 static __always_inline bool arch_in_rcu_eqs(void) { return false; } 67 #endif 68 69 /** 70 * enter_from_user_mode - Establish state when coming from user mode 71 * @regs: Pointer to currents pt_regs 72 * 73 * Syscall/interrupt entry disables interrupts, but user mode is traced as 74 * interrupts enabled. Also with NO_HZ_FULL RCU might be idle. 75 * 76 * 1) Tell lockdep that interrupts are disabled 77 * 2) Invoke context tracking if enabled to reactivate RCU 78 * 3) Trace interrupts off state 79 * 80 * Invoked from architecture specific syscall entry code with interrupts 81 * disabled. The calling code has to be non-instrumentable. When the 82 * function returns all state is correct and interrupts are still 83 * disabled. The subsequent functions can be instrumented. 84 * 85 * This is invoked when there is architecture specific functionality to be 86 * done between establishing state and enabling interrupts. The caller must 87 * enable interrupts before invoking syscall_enter_from_user_mode_work(). 88 */ 89 static __always_inline void enter_from_user_mode(struct pt_regs *regs) 90 { 91 arch_enter_from_user_mode(regs); 92 lockdep_hardirqs_off(CALLER_ADDR0); 93 94 CT_WARN_ON(__ct_state() != CT_STATE_USER); 95 user_exit_irqoff(); 96 97 instrumentation_begin(); 98 kmsan_unpoison_entry_regs(regs); 99 trace_hardirqs_off_finish(); 100 instrumentation_end(); 101 } 102 103 /** 104 * local_irq_enable_exit_to_user - Exit to user variant of local_irq_enable() 105 * @ti_work: Cached TIF flags gathered with interrupts disabled 106 * 107 * Defaults to local_irq_enable(). Can be supplied by architecture specific 108 * code. 109 */ 110 static inline void local_irq_enable_exit_to_user(unsigned long ti_work); 111 112 #ifndef local_irq_enable_exit_to_user 113 static inline void local_irq_enable_exit_to_user(unsigned long ti_work) 114 { 115 local_irq_enable(); 116 } 117 #endif 118 119 /** 120 * local_irq_disable_exit_to_user - Exit to user variant of local_irq_disable() 121 * 122 * Defaults to local_irq_disable(). Can be supplied by architecture specific 123 * code. 124 */ 125 static inline void local_irq_disable_exit_to_user(void); 126 127 #ifndef local_irq_disable_exit_to_user 128 static inline void local_irq_disable_exit_to_user(void) 129 { 130 local_irq_disable(); 131 } 132 #endif 133 134 /** 135 * arch_exit_to_user_mode_work - Architecture specific TIF work for exit 136 * to user mode. 137 * @regs: Pointer to currents pt_regs 138 * @ti_work: Cached TIF flags gathered with interrupts disabled 139 * 140 * Invoked from exit_to_user_mode_loop() with interrupt enabled 141 * 142 * Defaults to NOOP. Can be supplied by architecture specific code. 143 */ 144 static inline void arch_exit_to_user_mode_work(struct pt_regs *regs, 145 unsigned long ti_work); 146 147 #ifndef arch_exit_to_user_mode_work 148 static inline void arch_exit_to_user_mode_work(struct pt_regs *regs, 149 unsigned long ti_work) 150 { 151 } 152 #endif 153 154 /** 155 * arch_exit_to_user_mode_prepare - Architecture specific preparation for 156 * exit to user mode. 157 * @regs: Pointer to currents pt_regs 158 * @ti_work: Cached TIF flags gathered with interrupts disabled 159 * 160 * Invoked from exit_to_user_mode_prepare() with interrupt disabled as the last 161 * function before return. Defaults to NOOP. 162 */ 163 static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, 164 unsigned long ti_work); 165 166 #ifndef arch_exit_to_user_mode_prepare 167 static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, 168 unsigned long ti_work) 169 { 170 } 171 #endif 172 173 /** 174 * arch_exit_to_user_mode - Architecture specific final work before 175 * exit to user mode. 176 * 177 * Invoked from exit_to_user_mode() with interrupt disabled as the last 178 * function before return. Defaults to NOOP. 179 * 180 * This needs to be __always_inline because it is non-instrumentable code 181 * invoked after context tracking switched to user mode. 182 * 183 * An architecture implementation must not do anything complex, no locking 184 * etc. The main purpose is for speculation mitigations. 185 */ 186 static __always_inline void arch_exit_to_user_mode(void); 187 188 #ifndef arch_exit_to_user_mode 189 static __always_inline void arch_exit_to_user_mode(void) { } 190 #endif 191 192 /** 193 * arch_do_signal_or_restart - Architecture specific signal delivery function 194 * @regs: Pointer to currents pt_regs 195 * 196 * Invoked from exit_to_user_mode_loop(). 197 */ 198 void arch_do_signal_or_restart(struct pt_regs *regs); 199 200 /* Handle pending TIF work */ 201 unsigned long exit_to_user_mode_loop(struct pt_regs *regs, unsigned long ti_work); 202 203 /** 204 * __exit_to_user_mode_prepare - call exit_to_user_mode_loop() if required 205 * @regs: Pointer to pt_regs on entry stack 206 * 207 * 1) check that interrupts are disabled 208 * 2) call tick_nohz_user_enter_prepare() 209 * 3) call exit_to_user_mode_loop() if any flags from 210 * EXIT_TO_USER_MODE_WORK are set 211 * 4) check that interrupts are still disabled 212 * 213 * Don't invoke directly, use the syscall/irqentry_ prefixed variants below 214 */ 215 static __always_inline void __exit_to_user_mode_prepare(struct pt_regs *regs) 216 { 217 unsigned long ti_work; 218 219 lockdep_assert_irqs_disabled(); 220 221 /* Flush pending rcuog wakeup before the last need_resched() check */ 222 tick_nohz_user_enter_prepare(); 223 224 ti_work = read_thread_flags(); 225 if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK)) 226 ti_work = exit_to_user_mode_loop(regs, ti_work); 227 228 arch_exit_to_user_mode_prepare(regs, ti_work); 229 } 230 231 static __always_inline void __exit_to_user_mode_validate(void) 232 { 233 /* Ensure that kernel state is sane for a return to userspace */ 234 kmap_assert_nomap(); 235 lockdep_assert_irqs_disabled(); 236 lockdep_sys_exit(); 237 } 238 239 /* Temporary workaround to keep ARM64 alive */ 240 static __always_inline void exit_to_user_mode_prepare_legacy(struct pt_regs *regs) 241 { 242 __exit_to_user_mode_prepare(regs); 243 rseq_exit_to_user_mode_legacy(); 244 __exit_to_user_mode_validate(); 245 } 246 247 /** 248 * syscall_exit_to_user_mode_prepare - call exit_to_user_mode_loop() if required 249 * @regs: Pointer to pt_regs on entry stack 250 * 251 * Wrapper around __exit_to_user_mode_prepare() to separate the exit work for 252 * syscalls and interrupts. 253 */ 254 static __always_inline void syscall_exit_to_user_mode_prepare(struct pt_regs *regs) 255 { 256 __exit_to_user_mode_prepare(regs); 257 rseq_syscall_exit_to_user_mode(); 258 __exit_to_user_mode_validate(); 259 } 260 261 /** 262 * irqentry_exit_to_user_mode_prepare - call exit_to_user_mode_loop() if required 263 * @regs: Pointer to pt_regs on entry stack 264 * 265 * Wrapper around __exit_to_user_mode_prepare() to separate the exit work for 266 * syscalls and interrupts. 267 */ 268 static __always_inline void irqentry_exit_to_user_mode_prepare(struct pt_regs *regs) 269 { 270 __exit_to_user_mode_prepare(regs); 271 rseq_irqentry_exit_to_user_mode(); 272 __exit_to_user_mode_validate(); 273 } 274 275 /** 276 * exit_to_user_mode - Fixup state when exiting to user mode 277 * 278 * Syscall/interrupt exit enables interrupts, but the kernel state is 279 * interrupts disabled when this is invoked. Also tell RCU about it. 280 * 281 * 1) Trace interrupts on state 282 * 2) Invoke context tracking if enabled to adjust RCU state 283 * 3) Invoke architecture specific last minute exit code, e.g. speculation 284 * mitigations, etc.: arch_exit_to_user_mode() 285 * 4) Tell lockdep that interrupts are enabled 286 * 287 * Invoked from architecture specific code when syscall_exit_to_user_mode() 288 * is not suitable as the last step before returning to userspace. Must be 289 * invoked with interrupts disabled and the caller must be 290 * non-instrumentable. 291 * The caller has to invoke syscall_exit_to_user_mode_work() before this. 292 */ 293 static __always_inline void exit_to_user_mode(void) 294 { 295 instrumentation_begin(); 296 unwind_reset_info(); 297 trace_hardirqs_on_prepare(); 298 lockdep_hardirqs_on_prepare(); 299 instrumentation_end(); 300 301 user_enter_irqoff(); 302 arch_exit_to_user_mode(); 303 lockdep_hardirqs_on(CALLER_ADDR0); 304 } 305 306 /** 307 * irqentry_enter_from_user_mode - Establish state before invoking the irq handler 308 * @regs: Pointer to currents pt_regs 309 * 310 * Invoked from architecture specific entry code with interrupts disabled. 311 * Can only be called when the interrupt entry came from user mode. The 312 * calling code must be non-instrumentable. When the function returns all 313 * state is correct and the subsequent functions can be instrumented. 314 * 315 * The function establishes state (lockdep, RCU (context tracking), tracing) 316 */ 317 static __always_inline void irqentry_enter_from_user_mode(struct pt_regs *regs) 318 { 319 enter_from_user_mode(regs); 320 rseq_note_user_irq_entry(); 321 } 322 323 /** 324 * irqentry_exit_to_user_mode - Interrupt exit work 325 * @regs: Pointer to current's pt_regs 326 * 327 * Invoked with interrupts disabled and fully valid regs. Returns with all 328 * work handled, interrupts disabled such that the caller can immediately 329 * switch to user mode. Called from architecture specific interrupt 330 * handling code. 331 * 332 * The call order is #2 and #3 as described in syscall_exit_to_user_mode(). 333 * Interrupt exit is not invoking #1 which is the syscall specific one time 334 * work. 335 */ 336 static __always_inline void irqentry_exit_to_user_mode(struct pt_regs *regs) 337 { 338 instrumentation_begin(); 339 irqentry_exit_to_user_mode_prepare(regs); 340 instrumentation_end(); 341 exit_to_user_mode(); 342 } 343 344 #ifndef irqentry_state 345 /** 346 * struct irqentry_state - Opaque object for exception state storage 347 * @exit_rcu: Used exclusively in the irqentry_*() calls; signals whether the 348 * exit path has to invoke ct_irq_exit(). 349 * @lockdep: Used exclusively in the irqentry_nmi_*() calls; ensures that 350 * lockdep state is restored correctly on exit from nmi. 351 * 352 * This opaque object is filled in by the irqentry_*_enter() functions and 353 * must be passed back into the corresponding irqentry_*_exit() functions 354 * when the exception is complete. 355 * 356 * Callers of irqentry_*_[enter|exit]() must consider this structure opaque 357 * and all members private. Descriptions of the members are provided to aid in 358 * the maintenance of the irqentry_*() functions. 359 */ 360 typedef struct irqentry_state { 361 union { 362 bool exit_rcu; 363 bool lockdep; 364 }; 365 } irqentry_state_t; 366 #endif 367 368 /** 369 * irqentry_enter - Handle state tracking on ordinary interrupt entries 370 * @regs: Pointer to pt_regs of interrupted context 371 * 372 * Invokes: 373 * - lockdep irqflag state tracking as low level ASM entry disabled 374 * interrupts. 375 * 376 * - Context tracking if the exception hit user mode. 377 * 378 * - The hardirq tracer to keep the state consistent as low level ASM 379 * entry disabled interrupts. 380 * 381 * As a precondition, this requires that the entry came from user mode, 382 * idle, or a kernel context in which RCU is watching. 383 * 384 * For kernel mode entries RCU handling is done conditional. If RCU is 385 * watching then the only RCU requirement is to check whether the tick has 386 * to be restarted. If RCU is not watching then ct_irq_enter() has to be 387 * invoked on entry and ct_irq_exit() on exit. 388 * 389 * Avoiding the ct_irq_enter/exit() calls is an optimization but also 390 * solves the problem of kernel mode pagefaults which can schedule, which 391 * is not possible after invoking ct_irq_enter() without undoing it. 392 * 393 * For user mode entries irqentry_enter_from_user_mode() is invoked to 394 * establish the proper context for NOHZ_FULL. Otherwise scheduling on exit 395 * would not be possible. 396 * 397 * Returns: An opaque object that must be passed to idtentry_exit() 398 */ 399 irqentry_state_t noinstr irqentry_enter(struct pt_regs *regs); 400 401 /** 402 * irqentry_exit_cond_resched - Conditionally reschedule on return from interrupt 403 * 404 * Conditional reschedule with additional sanity checks. 405 */ 406 void raw_irqentry_exit_cond_resched(void); 407 408 #ifdef CONFIG_PREEMPT_DYNAMIC 409 #if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) 410 #define irqentry_exit_cond_resched_dynamic_enabled raw_irqentry_exit_cond_resched 411 #define irqentry_exit_cond_resched_dynamic_disabled NULL 412 DECLARE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched); 413 #define irqentry_exit_cond_resched() static_call(irqentry_exit_cond_resched)() 414 #elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) 415 DECLARE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched); 416 void dynamic_irqentry_exit_cond_resched(void); 417 #define irqentry_exit_cond_resched() dynamic_irqentry_exit_cond_resched() 418 #endif 419 #else /* CONFIG_PREEMPT_DYNAMIC */ 420 #define irqentry_exit_cond_resched() raw_irqentry_exit_cond_resched() 421 #endif /* CONFIG_PREEMPT_DYNAMIC */ 422 423 /** 424 * irqentry_exit - Handle return from exception that used irqentry_enter() 425 * @regs: Pointer to pt_regs (exception entry regs) 426 * @state: Return value from matching call to irqentry_enter() 427 * 428 * Depending on the return target (kernel/user) this runs the necessary 429 * preemption and work checks if possible and required and returns to 430 * the caller with interrupts disabled and no further work pending. 431 * 432 * This is the last action before returning to the low level ASM code which 433 * just needs to return to the appropriate context. 434 * 435 * Counterpart to irqentry_enter(). 436 */ 437 void noinstr irqentry_exit(struct pt_regs *regs, irqentry_state_t state); 438 439 /** 440 * irqentry_nmi_enter - Handle NMI entry 441 * @regs: Pointer to currents pt_regs 442 * 443 * Similar to irqentry_enter() but taking care of the NMI constraints. 444 */ 445 irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs); 446 447 /** 448 * irqentry_nmi_exit - Handle return from NMI handling 449 * @regs: Pointer to pt_regs (NMI entry regs) 450 * @irq_state: Return value from matching call to irqentry_nmi_enter() 451 * 452 * Last action before returning to the low level assembly code. 453 * 454 * Counterpart to irqentry_nmi_enter(). 455 */ 456 void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state); 457 458 #endif 459