1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef __LINUX_IRQENTRYCOMMON_H 3 #define __LINUX_IRQENTRYCOMMON_H 4 5 #include <linux/context_tracking.h> 6 #include <linux/hrtimer_rearm.h> 7 #include <linux/kmsan.h> 8 #include <linux/rseq_entry.h> 9 #include <linux/static_call_types.h> 10 #include <linux/syscalls.h> 11 #include <linux/tick.h> 12 #include <linux/unwind_deferred.h> 13 14 #include <asm/entry-common.h> 15 16 /* 17 * Define dummy _TIF work flags if not defined by the architecture or for 18 * disabled functionality. 19 */ 20 #ifndef _TIF_PATCH_PENDING 21 # define _TIF_PATCH_PENDING (0) 22 #endif 23 24 /* 25 * TIF flags handled in exit_to_user_mode_loop() 26 */ 27 #ifndef ARCH_EXIT_TO_USER_MODE_WORK 28 # define ARCH_EXIT_TO_USER_MODE_WORK (0) 29 #endif 30 31 #define EXIT_TO_USER_MODE_WORK \ 32 (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ 33 _TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \ 34 _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | _TIF_RSEQ | \ 35 ARCH_EXIT_TO_USER_MODE_WORK) 36 37 #ifdef CONFIG_HRTIMER_REARM_DEFERRED 38 # define EXIT_TO_USER_MODE_WORK_SYSCALL (EXIT_TO_USER_MODE_WORK) 39 # define EXIT_TO_USER_MODE_WORK_IRQ (EXIT_TO_USER_MODE_WORK | _TIF_HRTIMER_REARM) 40 #else 41 # define EXIT_TO_USER_MODE_WORK_SYSCALL (EXIT_TO_USER_MODE_WORK) 42 # define EXIT_TO_USER_MODE_WORK_IRQ (EXIT_TO_USER_MODE_WORK) 43 #endif 44 45 /** 46 * arch_enter_from_user_mode - Architecture specific sanity check for user mode regs 47 * @regs: Pointer to currents pt_regs 48 * 49 * Defaults to an empty implementation. Can be replaced by architecture 50 * specific code. 51 * 52 * Invoked from syscall_enter_from_user_mode() in the non-instrumentable 53 * section. Use __always_inline so the compiler cannot push it out of line 54 * and make it instrumentable. 55 */ 56 static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs); 57 58 #ifndef arch_enter_from_user_mode 59 static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs) {} 60 #endif 61 62 /** 63 * arch_in_rcu_eqs - Architecture specific check for RCU extended quiescent 64 * states. 65 * 66 * Returns: true if the CPU is potentially in an RCU EQS, false otherwise. 67 * 68 * Architectures only need to define this if threads other than the idle thread 69 * may have an interruptible EQS. This does not need to handle idle threads. It 70 * is safe to over-estimate at the cost of redundant RCU management work. 71 * 72 * Invoked from irqentry_enter() 73 */ 74 #ifndef arch_in_rcu_eqs 75 static __always_inline bool arch_in_rcu_eqs(void) { return false; } 76 #endif 77 78 /** 79 * enter_from_user_mode - Establish state when coming from user mode 80 * @regs: Pointer to currents pt_regs 81 * 82 * Syscall/interrupt entry disables interrupts, but user mode is traced as 83 * interrupts enabled. Also with NO_HZ_FULL RCU might be idle. 84 * 85 * 1) Tell lockdep that interrupts are disabled 86 * 2) Invoke context tracking if enabled to reactivate RCU 87 * 3) Trace interrupts off state 88 * 89 * Invoked from architecture specific syscall entry code with interrupts 90 * disabled. The calling code has to be non-instrumentable. When the 91 * function returns all state is correct and interrupts are still 92 * disabled. The subsequent functions can be instrumented. 93 * 94 * This is invoked when there is architecture specific functionality to be 95 * done between establishing state and enabling interrupts. The caller must 96 * enable interrupts before invoking syscall_enter_from_user_mode_work(). 97 */ 98 static __always_inline void enter_from_user_mode(struct pt_regs *regs) 99 { 100 arch_enter_from_user_mode(regs); 101 lockdep_hardirqs_off(CALLER_ADDR0); 102 103 CT_WARN_ON(__ct_state() != CT_STATE_USER); 104 user_exit_irqoff(); 105 106 instrumentation_begin(); 107 kmsan_unpoison_entry_regs(regs); 108 trace_hardirqs_off_finish(); 109 instrumentation_end(); 110 } 111 112 /** 113 * local_irq_enable_exit_to_user - Exit to user variant of local_irq_enable() 114 * @ti_work: Cached TIF flags gathered with interrupts disabled 115 * 116 * Defaults to local_irq_enable(). Can be supplied by architecture specific 117 * code. 118 */ 119 static inline void local_irq_enable_exit_to_user(unsigned long ti_work); 120 121 #ifndef local_irq_enable_exit_to_user 122 static __always_inline void local_irq_enable_exit_to_user(unsigned long ti_work) 123 { 124 local_irq_enable(); 125 } 126 #endif 127 128 /** 129 * local_irq_disable_exit_to_user - Exit to user variant of local_irq_disable() 130 * 131 * Defaults to local_irq_disable(). Can be supplied by architecture specific 132 * code. 133 */ 134 static inline void local_irq_disable_exit_to_user(void); 135 136 #ifndef local_irq_disable_exit_to_user 137 static __always_inline void local_irq_disable_exit_to_user(void) 138 { 139 local_irq_disable(); 140 } 141 #endif 142 143 /** 144 * arch_exit_to_user_mode_work - Architecture specific TIF work for exit 145 * to user mode. 146 * @regs: Pointer to currents pt_regs 147 * @ti_work: Cached TIF flags gathered with interrupts disabled 148 * 149 * Invoked from exit_to_user_mode_loop() with interrupt enabled 150 * 151 * Defaults to NOOP. Can be supplied by architecture specific code. 152 */ 153 static inline void arch_exit_to_user_mode_work(struct pt_regs *regs, 154 unsigned long ti_work); 155 156 #ifndef arch_exit_to_user_mode_work 157 static inline void arch_exit_to_user_mode_work(struct pt_regs *regs, 158 unsigned long ti_work) 159 { 160 } 161 #endif 162 163 /** 164 * arch_exit_to_user_mode_prepare - Architecture specific preparation for 165 * exit to user mode. 166 * @regs: Pointer to currents pt_regs 167 * @ti_work: Cached TIF flags gathered with interrupts disabled 168 * 169 * Invoked from exit_to_user_mode_prepare() with interrupt disabled as the last 170 * function before return. Defaults to NOOP. 171 */ 172 static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, 173 unsigned long ti_work); 174 175 #ifndef arch_exit_to_user_mode_prepare 176 static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, 177 unsigned long ti_work) 178 { 179 } 180 #endif 181 182 /** 183 * arch_exit_to_user_mode - Architecture specific final work before 184 * exit to user mode. 185 * 186 * Invoked from exit_to_user_mode() with interrupt disabled as the last 187 * function before return. Defaults to NOOP. 188 * 189 * This needs to be __always_inline because it is non-instrumentable code 190 * invoked after context tracking switched to user mode. 191 * 192 * An architecture implementation must not do anything complex, no locking 193 * etc. The main purpose is for speculation mitigations. 194 */ 195 static __always_inline void arch_exit_to_user_mode(void); 196 197 #ifndef arch_exit_to_user_mode 198 static __always_inline void arch_exit_to_user_mode(void) { } 199 #endif 200 201 /** 202 * arch_do_signal_or_restart - Architecture specific signal delivery function 203 * @regs: Pointer to currents pt_regs 204 * 205 * Invoked from exit_to_user_mode_loop(). 206 */ 207 void arch_do_signal_or_restart(struct pt_regs *regs); 208 209 /* Handle pending TIF work */ 210 unsigned long exit_to_user_mode_loop(struct pt_regs *regs, unsigned long ti_work); 211 212 /** 213 * __exit_to_user_mode_prepare - call exit_to_user_mode_loop() if required 214 * @regs: Pointer to pt_regs on entry stack 215 * @work_mask: Which TIF bits need to be evaluated 216 * 217 * 1) check that interrupts are disabled 218 * 2) call tick_nohz_user_enter_prepare() 219 * 3) call exit_to_user_mode_loop() if any flags from 220 * EXIT_TO_USER_MODE_WORK are set 221 * 4) check that interrupts are still disabled 222 * 223 * Don't invoke directly, use the syscall/irqentry_ prefixed variants below 224 */ 225 static __always_inline void __exit_to_user_mode_prepare(struct pt_regs *regs, 226 const unsigned long work_mask) 227 { 228 unsigned long ti_work; 229 230 lockdep_assert_irqs_disabled(); 231 232 /* Flush pending rcuog wakeup before the last need_resched() check */ 233 tick_nohz_user_enter_prepare(); 234 235 ti_work = read_thread_flags(); 236 if (unlikely(ti_work & work_mask)) { 237 if (!hrtimer_rearm_deferred_user_irq(&ti_work, work_mask)) 238 ti_work = exit_to_user_mode_loop(regs, ti_work); 239 } 240 241 arch_exit_to_user_mode_prepare(regs, ti_work); 242 } 243 244 static __always_inline void __exit_to_user_mode_validate(void) 245 { 246 /* Ensure that kernel state is sane for a return to userspace */ 247 kmap_assert_nomap(); 248 lockdep_assert_irqs_disabled(); 249 lockdep_sys_exit(); 250 } 251 252 /* Temporary workaround to keep ARM64 alive */ 253 static __always_inline void exit_to_user_mode_prepare_legacy(struct pt_regs *regs) 254 { 255 __exit_to_user_mode_prepare(regs, EXIT_TO_USER_MODE_WORK); 256 rseq_exit_to_user_mode_legacy(); 257 __exit_to_user_mode_validate(); 258 } 259 260 /** 261 * syscall_exit_to_user_mode_prepare - call exit_to_user_mode_loop() if required 262 * @regs: Pointer to pt_regs on entry stack 263 * 264 * Wrapper around __exit_to_user_mode_prepare() to separate the exit work for 265 * syscalls and interrupts. 266 */ 267 static __always_inline void syscall_exit_to_user_mode_prepare(struct pt_regs *regs) 268 { 269 __exit_to_user_mode_prepare(regs, EXIT_TO_USER_MODE_WORK_SYSCALL); 270 rseq_syscall_exit_to_user_mode(); 271 __exit_to_user_mode_validate(); 272 } 273 274 /** 275 * irqentry_exit_to_user_mode_prepare - call exit_to_user_mode_loop() if required 276 * @regs: Pointer to pt_regs on entry stack 277 * 278 * Wrapper around __exit_to_user_mode_prepare() to separate the exit work for 279 * syscalls and interrupts. 280 */ 281 static __always_inline void irqentry_exit_to_user_mode_prepare(struct pt_regs *regs) 282 { 283 __exit_to_user_mode_prepare(regs, EXIT_TO_USER_MODE_WORK_IRQ); 284 rseq_irqentry_exit_to_user_mode(); 285 __exit_to_user_mode_validate(); 286 } 287 288 /** 289 * exit_to_user_mode - Fixup state when exiting to user mode 290 * 291 * Syscall/interrupt exit enables interrupts, but the kernel state is 292 * interrupts disabled when this is invoked. Also tell RCU about it. 293 * 294 * 1) Trace interrupts on state 295 * 2) Invoke context tracking if enabled to adjust RCU state 296 * 3) Invoke architecture specific last minute exit code, e.g. speculation 297 * mitigations, etc.: arch_exit_to_user_mode() 298 * 4) Tell lockdep that interrupts are enabled 299 * 300 * Invoked from architecture specific code when syscall_exit_to_user_mode() 301 * is not suitable as the last step before returning to userspace. Must be 302 * invoked with interrupts disabled and the caller must be 303 * non-instrumentable. 304 * The caller has to invoke syscall_exit_to_user_mode_work() before this. 305 */ 306 static __always_inline void exit_to_user_mode(void) 307 { 308 instrumentation_begin(); 309 unwind_reset_info(); 310 trace_hardirqs_on_prepare(); 311 lockdep_hardirqs_on_prepare(); 312 instrumentation_end(); 313 314 user_enter_irqoff(); 315 arch_exit_to_user_mode(); 316 lockdep_hardirqs_on(CALLER_ADDR0); 317 } 318 319 /** 320 * irqentry_enter_from_user_mode - Establish state before invoking the irq handler 321 * @regs: Pointer to currents pt_regs 322 * 323 * Invoked from architecture specific entry code with interrupts disabled. 324 * Can only be called when the interrupt entry came from user mode. The 325 * calling code must be non-instrumentable. When the function returns all 326 * state is correct and the subsequent functions can be instrumented. 327 * 328 * The function establishes state (lockdep, RCU (context tracking), tracing) 329 */ 330 static __always_inline void irqentry_enter_from_user_mode(struct pt_regs *regs) 331 { 332 enter_from_user_mode(regs); 333 rseq_note_user_irq_entry(); 334 } 335 336 /** 337 * irqentry_exit_to_user_mode - Interrupt exit work 338 * @regs: Pointer to current's pt_regs 339 * 340 * Invoked with interrupts disabled and fully valid regs. Returns with all 341 * work handled, interrupts disabled such that the caller can immediately 342 * switch to user mode. Called from architecture specific interrupt 343 * handling code. 344 * 345 * The call order is #2 and #3 as described in syscall_exit_to_user_mode(). 346 * Interrupt exit is not invoking #1 which is the syscall specific one time 347 * work. 348 */ 349 static __always_inline void irqentry_exit_to_user_mode(struct pt_regs *regs) 350 { 351 instrumentation_begin(); 352 irqentry_exit_to_user_mode_prepare(regs); 353 instrumentation_end(); 354 exit_to_user_mode(); 355 } 356 357 #ifndef irqentry_state 358 /** 359 * struct irqentry_state - Opaque object for exception state storage 360 * @exit_rcu: Used exclusively in the irqentry_*() calls; signals whether the 361 * exit path has to invoke ct_irq_exit(). 362 * @lockdep: Used exclusively in the irqentry_nmi_*() calls; ensures that 363 * lockdep state is restored correctly on exit from nmi. 364 * 365 * This opaque object is filled in by the irqentry_*_enter() functions and 366 * must be passed back into the corresponding irqentry_*_exit() functions 367 * when the exception is complete. 368 * 369 * Callers of irqentry_*_[enter|exit]() must consider this structure opaque 370 * and all members private. Descriptions of the members are provided to aid in 371 * the maintenance of the irqentry_*() functions. 372 */ 373 typedef struct irqentry_state { 374 union { 375 bool exit_rcu; 376 bool lockdep; 377 }; 378 } irqentry_state_t; 379 #endif 380 381 /** 382 * irqentry_enter - Handle state tracking on ordinary interrupt entries 383 * @regs: Pointer to pt_regs of interrupted context 384 * 385 * Invokes: 386 * - lockdep irqflag state tracking as low level ASM entry disabled 387 * interrupts. 388 * 389 * - Context tracking if the exception hit user mode. 390 * 391 * - The hardirq tracer to keep the state consistent as low level ASM 392 * entry disabled interrupts. 393 * 394 * As a precondition, this requires that the entry came from user mode, 395 * idle, or a kernel context in which RCU is watching. 396 * 397 * For kernel mode entries RCU handling is done conditional. If RCU is 398 * watching then the only RCU requirement is to check whether the tick has 399 * to be restarted. If RCU is not watching then ct_irq_enter() has to be 400 * invoked on entry and ct_irq_exit() on exit. 401 * 402 * Avoiding the ct_irq_enter/exit() calls is an optimization but also 403 * solves the problem of kernel mode pagefaults which can schedule, which 404 * is not possible after invoking ct_irq_enter() without undoing it. 405 * 406 * For user mode entries irqentry_enter_from_user_mode() is invoked to 407 * establish the proper context for NOHZ_FULL. Otherwise scheduling on exit 408 * would not be possible. 409 * 410 * Returns: An opaque object that must be passed to idtentry_exit() 411 */ 412 irqentry_state_t noinstr irqentry_enter(struct pt_regs *regs); 413 414 /** 415 * irqentry_exit_cond_resched - Conditionally reschedule on return from interrupt 416 * 417 * Conditional reschedule with additional sanity checks. 418 */ 419 void raw_irqentry_exit_cond_resched(void); 420 421 #ifdef CONFIG_PREEMPT_DYNAMIC 422 #if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) 423 #define irqentry_exit_cond_resched_dynamic_enabled raw_irqentry_exit_cond_resched 424 #define irqentry_exit_cond_resched_dynamic_disabled NULL 425 DECLARE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched); 426 #define irqentry_exit_cond_resched() static_call(irqentry_exit_cond_resched)() 427 #elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) 428 DECLARE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched); 429 void dynamic_irqentry_exit_cond_resched(void); 430 #define irqentry_exit_cond_resched() dynamic_irqentry_exit_cond_resched() 431 #endif 432 #else /* CONFIG_PREEMPT_DYNAMIC */ 433 #define irqentry_exit_cond_resched() raw_irqentry_exit_cond_resched() 434 #endif /* CONFIG_PREEMPT_DYNAMIC */ 435 436 /** 437 * irqentry_exit - Handle return from exception that used irqentry_enter() 438 * @regs: Pointer to pt_regs (exception entry regs) 439 * @state: Return value from matching call to irqentry_enter() 440 * 441 * Depending on the return target (kernel/user) this runs the necessary 442 * preemption and work checks if possible and required and returns to 443 * the caller with interrupts disabled and no further work pending. 444 * 445 * This is the last action before returning to the low level ASM code which 446 * just needs to return to the appropriate context. 447 * 448 * Counterpart to irqentry_enter(). 449 */ 450 void noinstr irqentry_exit(struct pt_regs *regs, irqentry_state_t state); 451 452 /** 453 * irqentry_nmi_enter - Handle NMI entry 454 * @regs: Pointer to currents pt_regs 455 * 456 * Similar to irqentry_enter() but taking care of the NMI constraints. 457 */ 458 irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs); 459 460 /** 461 * irqentry_nmi_exit - Handle return from NMI handling 462 * @regs: Pointer to pt_regs (NMI entry regs) 463 * @irq_state: Return value from matching call to irqentry_nmi_enter() 464 * 465 * Last action before returning to the low level assembly code. 466 * 467 * Counterpart to irqentry_nmi_enter(). 468 */ 469 void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state); 470 471 #endif 472