1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef __LINUX_IRQENTRYCOMMON_H 3 #define __LINUX_IRQENTRYCOMMON_H 4 5 #include <linux/static_call_types.h> 6 #include <linux/syscalls.h> 7 #include <linux/context_tracking.h> 8 #include <linux/tick.h> 9 #include <linux/kmsan.h> 10 11 #include <asm/entry-common.h> 12 13 /* 14 * Define dummy _TIF work flags if not defined by the architecture or for 15 * disabled functionality. 16 */ 17 #ifndef _TIF_PATCH_PENDING 18 # define _TIF_PATCH_PENDING (0) 19 #endif 20 21 /* 22 * TIF flags handled in exit_to_user_mode_loop() 23 */ 24 #ifndef ARCH_EXIT_TO_USER_MODE_WORK 25 # define ARCH_EXIT_TO_USER_MODE_WORK (0) 26 #endif 27 28 #define EXIT_TO_USER_MODE_WORK \ 29 (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ 30 _TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \ 31 _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \ 32 ARCH_EXIT_TO_USER_MODE_WORK) 33 34 /** 35 * arch_enter_from_user_mode - Architecture specific sanity check for user mode regs 36 * @regs: Pointer to currents pt_regs 37 * 38 * Defaults to an empty implementation. Can be replaced by architecture 39 * specific code. 40 * 41 * Invoked from syscall_enter_from_user_mode() in the non-instrumentable 42 * section. Use __always_inline so the compiler cannot push it out of line 43 * and make it instrumentable. 44 */ 45 static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs); 46 47 #ifndef arch_enter_from_user_mode 48 static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs) {} 49 #endif 50 51 /** 52 * enter_from_user_mode - Establish state when coming from user mode 53 * 54 * Syscall/interrupt entry disables interrupts, but user mode is traced as 55 * interrupts enabled. Also with NO_HZ_FULL RCU might be idle. 56 * 57 * 1) Tell lockdep that interrupts are disabled 58 * 2) Invoke context tracking if enabled to reactivate RCU 59 * 3) Trace interrupts off state 60 * 61 * Invoked from architecture specific syscall entry code with interrupts 62 * disabled. The calling code has to be non-instrumentable. When the 63 * function returns all state is correct and interrupts are still 64 * disabled. The subsequent functions can be instrumented. 65 * 66 * This is invoked when there is architecture specific functionality to be 67 * done between establishing state and enabling interrupts. The caller must 68 * enable interrupts before invoking syscall_enter_from_user_mode_work(). 69 */ 70 static __always_inline void enter_from_user_mode(struct pt_regs *regs) 71 { 72 arch_enter_from_user_mode(regs); 73 lockdep_hardirqs_off(CALLER_ADDR0); 74 75 CT_WARN_ON(__ct_state() != CT_STATE_USER); 76 user_exit_irqoff(); 77 78 instrumentation_begin(); 79 kmsan_unpoison_entry_regs(regs); 80 trace_hardirqs_off_finish(); 81 instrumentation_end(); 82 } 83 84 /** 85 * local_irq_enable_exit_to_user - Exit to user variant of local_irq_enable() 86 * @ti_work: Cached TIF flags gathered with interrupts disabled 87 * 88 * Defaults to local_irq_enable(). Can be supplied by architecture specific 89 * code. 90 */ 91 static inline void local_irq_enable_exit_to_user(unsigned long ti_work); 92 93 #ifndef local_irq_enable_exit_to_user 94 static inline void local_irq_enable_exit_to_user(unsigned long ti_work) 95 { 96 local_irq_enable(); 97 } 98 #endif 99 100 /** 101 * local_irq_disable_exit_to_user - Exit to user variant of local_irq_disable() 102 * 103 * Defaults to local_irq_disable(). Can be supplied by architecture specific 104 * code. 105 */ 106 static inline void local_irq_disable_exit_to_user(void); 107 108 #ifndef local_irq_disable_exit_to_user 109 static inline void local_irq_disable_exit_to_user(void) 110 { 111 local_irq_disable(); 112 } 113 #endif 114 115 /** 116 * arch_exit_to_user_mode_work - Architecture specific TIF work for exit 117 * to user mode. 118 * @regs: Pointer to currents pt_regs 119 * @ti_work: Cached TIF flags gathered with interrupts disabled 120 * 121 * Invoked from exit_to_user_mode_loop() with interrupt enabled 122 * 123 * Defaults to NOOP. Can be supplied by architecture specific code. 124 */ 125 static inline void arch_exit_to_user_mode_work(struct pt_regs *regs, 126 unsigned long ti_work); 127 128 #ifndef arch_exit_to_user_mode_work 129 static inline void arch_exit_to_user_mode_work(struct pt_regs *regs, 130 unsigned long ti_work) 131 { 132 } 133 #endif 134 135 /** 136 * arch_exit_to_user_mode_prepare - Architecture specific preparation for 137 * exit to user mode. 138 * @regs: Pointer to currents pt_regs 139 * @ti_work: Cached TIF flags gathered with interrupts disabled 140 * 141 * Invoked from exit_to_user_mode_prepare() with interrupt disabled as the last 142 * function before return. Defaults to NOOP. 143 */ 144 static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, 145 unsigned long ti_work); 146 147 #ifndef arch_exit_to_user_mode_prepare 148 static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, 149 unsigned long ti_work) 150 { 151 } 152 #endif 153 154 /** 155 * arch_exit_to_user_mode - Architecture specific final work before 156 * exit to user mode. 157 * 158 * Invoked from exit_to_user_mode() with interrupt disabled as the last 159 * function before return. Defaults to NOOP. 160 * 161 * This needs to be __always_inline because it is non-instrumentable code 162 * invoked after context tracking switched to user mode. 163 * 164 * An architecture implementation must not do anything complex, no locking 165 * etc. The main purpose is for speculation mitigations. 166 */ 167 static __always_inline void arch_exit_to_user_mode(void); 168 169 #ifndef arch_exit_to_user_mode 170 static __always_inline void arch_exit_to_user_mode(void) { } 171 #endif 172 173 /** 174 * arch_do_signal_or_restart - Architecture specific signal delivery function 175 * @regs: Pointer to currents pt_regs 176 * 177 * Invoked from exit_to_user_mode_loop(). 178 */ 179 void arch_do_signal_or_restart(struct pt_regs *regs); 180 181 /** 182 * exit_to_user_mode_loop - do any pending work before leaving to user space 183 */ 184 unsigned long exit_to_user_mode_loop(struct pt_regs *regs, 185 unsigned long ti_work); 186 187 /** 188 * exit_to_user_mode_prepare - call exit_to_user_mode_loop() if required 189 * @regs: Pointer to pt_regs on entry stack 190 * 191 * 1) check that interrupts are disabled 192 * 2) call tick_nohz_user_enter_prepare() 193 * 3) call exit_to_user_mode_loop() if any flags from 194 * EXIT_TO_USER_MODE_WORK are set 195 * 4) check that interrupts are still disabled 196 */ 197 static __always_inline void exit_to_user_mode_prepare(struct pt_regs *regs) 198 { 199 unsigned long ti_work; 200 201 lockdep_assert_irqs_disabled(); 202 203 /* Flush pending rcuog wakeup before the last need_resched() check */ 204 tick_nohz_user_enter_prepare(); 205 206 ti_work = read_thread_flags(); 207 if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK)) 208 ti_work = exit_to_user_mode_loop(regs, ti_work); 209 210 arch_exit_to_user_mode_prepare(regs, ti_work); 211 212 /* Ensure that kernel state is sane for a return to userspace */ 213 kmap_assert_nomap(); 214 lockdep_assert_irqs_disabled(); 215 lockdep_sys_exit(); 216 } 217 218 /** 219 * exit_to_user_mode - Fixup state when exiting to user mode 220 * 221 * Syscall/interrupt exit enables interrupts, but the kernel state is 222 * interrupts disabled when this is invoked. Also tell RCU about it. 223 * 224 * 1) Trace interrupts on state 225 * 2) Invoke context tracking if enabled to adjust RCU state 226 * 3) Invoke architecture specific last minute exit code, e.g. speculation 227 * mitigations, etc.: arch_exit_to_user_mode() 228 * 4) Tell lockdep that interrupts are enabled 229 * 230 * Invoked from architecture specific code when syscall_exit_to_user_mode() 231 * is not suitable as the last step before returning to userspace. Must be 232 * invoked with interrupts disabled and the caller must be 233 * non-instrumentable. 234 * The caller has to invoke syscall_exit_to_user_mode_work() before this. 235 */ 236 static __always_inline void exit_to_user_mode(void) 237 { 238 instrumentation_begin(); 239 trace_hardirqs_on_prepare(); 240 lockdep_hardirqs_on_prepare(); 241 instrumentation_end(); 242 243 user_enter_irqoff(); 244 arch_exit_to_user_mode(); 245 lockdep_hardirqs_on(CALLER_ADDR0); 246 } 247 248 /** 249 * irqentry_enter_from_user_mode - Establish state before invoking the irq handler 250 * @regs: Pointer to currents pt_regs 251 * 252 * Invoked from architecture specific entry code with interrupts disabled. 253 * Can only be called when the interrupt entry came from user mode. The 254 * calling code must be non-instrumentable. When the function returns all 255 * state is correct and the subsequent functions can be instrumented. 256 * 257 * The function establishes state (lockdep, RCU (context tracking), tracing) 258 */ 259 void irqentry_enter_from_user_mode(struct pt_regs *regs); 260 261 /** 262 * irqentry_exit_to_user_mode - Interrupt exit work 263 * @regs: Pointer to current's pt_regs 264 * 265 * Invoked with interrupts disabled and fully valid regs. Returns with all 266 * work handled, interrupts disabled such that the caller can immediately 267 * switch to user mode. Called from architecture specific interrupt 268 * handling code. 269 * 270 * The call order is #2 and #3 as described in syscall_exit_to_user_mode(). 271 * Interrupt exit is not invoking #1 which is the syscall specific one time 272 * work. 273 */ 274 void irqentry_exit_to_user_mode(struct pt_regs *regs); 275 276 #ifndef irqentry_state 277 /** 278 * struct irqentry_state - Opaque object for exception state storage 279 * @exit_rcu: Used exclusively in the irqentry_*() calls; signals whether the 280 * exit path has to invoke ct_irq_exit(). 281 * @lockdep: Used exclusively in the irqentry_nmi_*() calls; ensures that 282 * lockdep state is restored correctly on exit from nmi. 283 * 284 * This opaque object is filled in by the irqentry_*_enter() functions and 285 * must be passed back into the corresponding irqentry_*_exit() functions 286 * when the exception is complete. 287 * 288 * Callers of irqentry_*_[enter|exit]() must consider this structure opaque 289 * and all members private. Descriptions of the members are provided to aid in 290 * the maintenance of the irqentry_*() functions. 291 */ 292 typedef struct irqentry_state { 293 union { 294 bool exit_rcu; 295 bool lockdep; 296 }; 297 } irqentry_state_t; 298 #endif 299 300 /** 301 * irqentry_enter - Handle state tracking on ordinary interrupt entries 302 * @regs: Pointer to pt_regs of interrupted context 303 * 304 * Invokes: 305 * - lockdep irqflag state tracking as low level ASM entry disabled 306 * interrupts. 307 * 308 * - Context tracking if the exception hit user mode. 309 * 310 * - The hardirq tracer to keep the state consistent as low level ASM 311 * entry disabled interrupts. 312 * 313 * As a precondition, this requires that the entry came from user mode, 314 * idle, or a kernel context in which RCU is watching. 315 * 316 * For kernel mode entries RCU handling is done conditional. If RCU is 317 * watching then the only RCU requirement is to check whether the tick has 318 * to be restarted. If RCU is not watching then ct_irq_enter() has to be 319 * invoked on entry and ct_irq_exit() on exit. 320 * 321 * Avoiding the ct_irq_enter/exit() calls is an optimization but also 322 * solves the problem of kernel mode pagefaults which can schedule, which 323 * is not possible after invoking ct_irq_enter() without undoing it. 324 * 325 * For user mode entries irqentry_enter_from_user_mode() is invoked to 326 * establish the proper context for NOHZ_FULL. Otherwise scheduling on exit 327 * would not be possible. 328 * 329 * Returns: An opaque object that must be passed to idtentry_exit() 330 */ 331 irqentry_state_t noinstr irqentry_enter(struct pt_regs *regs); 332 333 /** 334 * irqentry_exit_cond_resched - Conditionally reschedule on return from interrupt 335 * 336 * Conditional reschedule with additional sanity checks. 337 */ 338 void raw_irqentry_exit_cond_resched(void); 339 #ifdef CONFIG_PREEMPT_DYNAMIC 340 #if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) 341 #define irqentry_exit_cond_resched_dynamic_enabled raw_irqentry_exit_cond_resched 342 #define irqentry_exit_cond_resched_dynamic_disabled NULL 343 DECLARE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched); 344 #define irqentry_exit_cond_resched() static_call(irqentry_exit_cond_resched)() 345 #elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) 346 DECLARE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched); 347 void dynamic_irqentry_exit_cond_resched(void); 348 #define irqentry_exit_cond_resched() dynamic_irqentry_exit_cond_resched() 349 #endif 350 #else /* CONFIG_PREEMPT_DYNAMIC */ 351 #define irqentry_exit_cond_resched() raw_irqentry_exit_cond_resched() 352 #endif /* CONFIG_PREEMPT_DYNAMIC */ 353 354 /** 355 * irqentry_exit - Handle return from exception that used irqentry_enter() 356 * @regs: Pointer to pt_regs (exception entry regs) 357 * @state: Return value from matching call to irqentry_enter() 358 * 359 * Depending on the return target (kernel/user) this runs the necessary 360 * preemption and work checks if possible and required and returns to 361 * the caller with interrupts disabled and no further work pending. 362 * 363 * This is the last action before returning to the low level ASM code which 364 * just needs to return to the appropriate context. 365 * 366 * Counterpart to irqentry_enter(). 367 */ 368 void noinstr irqentry_exit(struct pt_regs *regs, irqentry_state_t state); 369 370 /** 371 * irqentry_nmi_enter - Handle NMI entry 372 * @regs: Pointer to currents pt_regs 373 * 374 * Similar to irqentry_enter() but taking care of the NMI constraints. 375 */ 376 irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs); 377 378 /** 379 * irqentry_nmi_exit - Handle return from NMI handling 380 * @regs: Pointer to pt_regs (NMI entry regs) 381 * @irq_state: Return value from matching call to irqentry_nmi_enter() 382 * 383 * Last action before returning to the low level assembly code. 384 * 385 * Counterpart to irqentry_nmi_enter(). 386 */ 387 void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state); 388 389 #endif 390