1 // SPDX-License-Identifier: GPL-2.0 2 3 #include <linux/irq-entry-common.h> 4 #include <linux/resume_user_mode.h> 5 #include <linux/highmem.h> 6 #include <linux/jump_label.h> 7 #include <linux/kmsan.h> 8 #include <linux/livepatch.h> 9 #include <linux/tick.h> 10 11 /* Workaround to allow gradual conversion of architecture code */ 12 void __weak arch_do_signal_or_restart(struct pt_regs *regs) { } 13 14 #ifdef CONFIG_HAVE_GENERIC_TIF_BITS 15 #define EXIT_TO_USER_MODE_WORK_LOOP (EXIT_TO_USER_MODE_WORK & ~_TIF_RSEQ) 16 #else 17 #define EXIT_TO_USER_MODE_WORK_LOOP (EXIT_TO_USER_MODE_WORK) 18 #endif 19 20 /* TIF bits, which prevent a time slice extension. */ 21 #ifdef CONFIG_PREEMPT_RT 22 /* 23 * Since rseq slice ext has a direct correlation to the worst case 24 * scheduling latency (schedule is delayed after all), only have it affect 25 * LAZY reschedules on PREEMPT_RT for now. 26 * 27 * However, since this delay is only applicable to userspace, a value 28 * for rseq_slice_extension_nsec that is strictly less than the worst case 29 * kernel space preempt_disable() region, should mean the scheduling latency 30 * is not affected, even for !LAZY. 31 * 32 * However, since this value depends on the hardware at hand, it cannot be 33 * pre-determined in any sensible way. Hence punt on this problem for now. 34 */ 35 # define TIF_SLICE_EXT_SCHED (_TIF_NEED_RESCHED_LAZY) 36 #else 37 # define TIF_SLICE_EXT_SCHED (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) 38 #endif 39 #define TIF_SLICE_EXT_DENY (EXIT_TO_USER_MODE_WORK & ~TIF_SLICE_EXT_SCHED) 40 41 static __always_inline unsigned long __exit_to_user_mode_loop(struct pt_regs *regs, 42 unsigned long ti_work) 43 { 44 /* 45 * Before returning to user space ensure that all pending work 46 * items have been completed. 47 */ 48 while (ti_work & EXIT_TO_USER_MODE_WORK_LOOP) { 49 50 local_irq_enable_exit_to_user(ti_work); 51 52 if (ti_work & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)) { 53 if (!rseq_grant_slice_extension(ti_work & TIF_SLICE_EXT_DENY)) 54 schedule(); 55 } 56 57 if (ti_work & _TIF_UPROBE) 58 uprobe_notify_resume(regs); 59 60 if (ti_work & _TIF_PATCH_PENDING) 61 klp_update_patch_state(current); 62 63 if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) 64 arch_do_signal_or_restart(regs); 65 66 if (ti_work & _TIF_NOTIFY_RESUME) 67 resume_user_mode_work(regs); 68 69 /* Architecture specific TIF work */ 70 arch_exit_to_user_mode_work(regs, ti_work); 71 72 /* 73 * Disable interrupts and reevaluate the work flags as they 74 * might have changed while interrupts and preemption was 75 * enabled above. 76 */ 77 local_irq_disable_exit_to_user(); 78 79 /* Check if any of the above work has queued a deferred wakeup */ 80 tick_nohz_user_enter_prepare(); 81 82 ti_work = read_thread_flags(); 83 } 84 85 /* Return the latest work state for arch_exit_to_user_mode() */ 86 return ti_work; 87 } 88 89 /** 90 * exit_to_user_mode_loop - do any pending work before leaving to user space 91 * @regs: Pointer to pt_regs on entry stack 92 * @ti_work: TIF work flags as read by the caller 93 */ 94 __always_inline unsigned long exit_to_user_mode_loop(struct pt_regs *regs, 95 unsigned long ti_work) 96 { 97 for (;;) { 98 ti_work = __exit_to_user_mode_loop(regs, ti_work); 99 100 if (likely(!rseq_exit_to_user_mode_restart(regs, ti_work))) 101 return ti_work; 102 ti_work = read_thread_flags(); 103 } 104 } 105 106 noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs) 107 { 108 irqentry_state_t ret = { 109 .exit_rcu = false, 110 }; 111 112 if (user_mode(regs)) { 113 irqentry_enter_from_user_mode(regs); 114 return ret; 115 } 116 117 /* 118 * If this entry hit the idle task invoke ct_irq_enter() whether 119 * RCU is watching or not. 120 * 121 * Interrupts can nest when the first interrupt invokes softirq 122 * processing on return which enables interrupts. 123 * 124 * Scheduler ticks in the idle task can mark quiescent state and 125 * terminate a grace period, if and only if the timer interrupt is 126 * not nested into another interrupt. 127 * 128 * Checking for rcu_is_watching() here would prevent the nesting 129 * interrupt to invoke ct_irq_enter(). If that nested interrupt is 130 * the tick then rcu_flavor_sched_clock_irq() would wrongfully 131 * assume that it is the first interrupt and eventually claim 132 * quiescent state and end grace periods prematurely. 133 * 134 * Unconditionally invoke ct_irq_enter() so RCU state stays 135 * consistent. 136 * 137 * TINY_RCU does not support EQS, so let the compiler eliminate 138 * this part when enabled. 139 */ 140 if (!IS_ENABLED(CONFIG_TINY_RCU) && 141 (is_idle_task(current) || arch_in_rcu_eqs())) { 142 /* 143 * If RCU is not watching then the same careful 144 * sequence vs. lockdep and tracing is required 145 * as in irqentry_enter_from_user_mode(). 146 */ 147 lockdep_hardirqs_off(CALLER_ADDR0); 148 ct_irq_enter(); 149 instrumentation_begin(); 150 kmsan_unpoison_entry_regs(regs); 151 trace_hardirqs_off_finish(); 152 instrumentation_end(); 153 154 ret.exit_rcu = true; 155 return ret; 156 } 157 158 /* 159 * If RCU is watching then RCU only wants to check whether it needs 160 * to restart the tick in NOHZ mode. rcu_irq_enter_check_tick() 161 * already contains a warning when RCU is not watching, so no point 162 * in having another one here. 163 */ 164 lockdep_hardirqs_off(CALLER_ADDR0); 165 instrumentation_begin(); 166 kmsan_unpoison_entry_regs(regs); 167 rcu_irq_enter_check_tick(); 168 trace_hardirqs_off_finish(); 169 instrumentation_end(); 170 171 return ret; 172 } 173 174 /** 175 * arch_irqentry_exit_need_resched - Architecture specific need resched function 176 * 177 * Invoked from raw_irqentry_exit_cond_resched() to check if resched is needed. 178 * Defaults return true. 179 * 180 * The main purpose is to permit arch to avoid preemption of a task from an IRQ. 181 */ 182 static inline bool arch_irqentry_exit_need_resched(void); 183 184 #ifndef arch_irqentry_exit_need_resched 185 static inline bool arch_irqentry_exit_need_resched(void) { return true; } 186 #endif 187 188 void raw_irqentry_exit_cond_resched(void) 189 { 190 if (!preempt_count()) { 191 /* Sanity check RCU and thread stack */ 192 rcu_irq_exit_check_preempt(); 193 if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) 194 WARN_ON_ONCE(!on_thread_stack()); 195 if (need_resched() && arch_irqentry_exit_need_resched()) 196 preempt_schedule_irq(); 197 } 198 } 199 #ifdef CONFIG_PREEMPT_DYNAMIC 200 #if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) 201 DEFINE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched); 202 #elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) 203 DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched); 204 void dynamic_irqentry_exit_cond_resched(void) 205 { 206 if (!static_branch_unlikely(&sk_dynamic_irqentry_exit_cond_resched)) 207 return; 208 raw_irqentry_exit_cond_resched(); 209 } 210 #endif 211 #endif 212 213 noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state) 214 { 215 lockdep_assert_irqs_disabled(); 216 217 /* Check whether this returns to user mode */ 218 if (user_mode(regs)) { 219 irqentry_exit_to_user_mode(regs); 220 } else if (!regs_irqs_disabled(regs)) { 221 /* 222 * If RCU was not watching on entry this needs to be done 223 * carefully and needs the same ordering of lockdep/tracing 224 * and RCU as the return to user mode path. 225 */ 226 if (state.exit_rcu) { 227 instrumentation_begin(); 228 /* Tell the tracer that IRET will enable interrupts */ 229 trace_hardirqs_on_prepare(); 230 lockdep_hardirqs_on_prepare(); 231 instrumentation_end(); 232 ct_irq_exit(); 233 lockdep_hardirqs_on(CALLER_ADDR0); 234 return; 235 } 236 237 instrumentation_begin(); 238 if (IS_ENABLED(CONFIG_PREEMPTION)) 239 irqentry_exit_cond_resched(); 240 241 /* Covers both tracing and lockdep */ 242 trace_hardirqs_on(); 243 instrumentation_end(); 244 } else { 245 /* 246 * IRQ flags state is correct already. Just tell RCU if it 247 * was not watching on entry. 248 */ 249 if (state.exit_rcu) 250 ct_irq_exit(); 251 } 252 } 253 254 irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs) 255 { 256 irqentry_state_t irq_state; 257 258 irq_state.lockdep = lockdep_hardirqs_enabled(); 259 260 __nmi_enter(); 261 lockdep_hardirqs_off(CALLER_ADDR0); 262 lockdep_hardirq_enter(); 263 ct_nmi_enter(); 264 265 instrumentation_begin(); 266 kmsan_unpoison_entry_regs(regs); 267 trace_hardirqs_off_finish(); 268 ftrace_nmi_enter(); 269 instrumentation_end(); 270 271 return irq_state; 272 } 273 274 void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state) 275 { 276 instrumentation_begin(); 277 ftrace_nmi_exit(); 278 if (irq_state.lockdep) { 279 trace_hardirqs_on_prepare(); 280 lockdep_hardirqs_on_prepare(); 281 } 282 instrumentation_end(); 283 284 ct_nmi_exit(); 285 lockdep_hardirq_exit(); 286 if (irq_state.lockdep) 287 lockdep_hardirqs_on(CALLER_ADDR0); 288 __nmi_exit(); 289 } 290