1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 1992, 1998-2004 Linus Torvalds, Ingo Molnar 4 * 5 * This file contains spurious interrupt handling. 6 */ 7 8 #include <linux/jiffies.h> 9 #include <linux/irq.h> 10 #include <linux/module.h> 11 #include <linux/interrupt.h> 12 #include <linux/moduleparam.h> 13 #include <linux/timer.h> 14 15 #include "internals.h" 16 17 static int irqfixup __read_mostly; 18 19 #define POLL_SPURIOUS_IRQ_INTERVAL (HZ/10) 20 static void poll_spurious_irqs(struct timer_list *unused); 21 static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs); 22 static int irq_poll_cpu; 23 static atomic_t irq_poll_active; 24 25 /* 26 * We wait here for a poller to finish. 27 * 28 * If the poll runs on this CPU, then we yell loudly and return 29 * false. That will leave the interrupt line disabled in the worst 30 * case, but it should never happen. 31 * 32 * We wait until the poller is done and then recheck disabled and 33 * action (about to be disabled). Only if it's still active, we return 34 * true and let the handler run. 35 */ 36 bool irq_wait_for_poll(struct irq_desc *desc) 37 __must_hold(&desc->lock) 38 { 39 if (WARN_ONCE(irq_poll_cpu == smp_processor_id(), 40 "irq poll in progress on cpu %d for irq %d\n", 41 smp_processor_id(), desc->irq_data.irq)) 42 return false; 43 44 #ifdef CONFIG_SMP 45 do { 46 raw_spin_unlock(&desc->lock); 47 while (irqd_irq_inprogress(&desc->irq_data)) 48 cpu_relax(); 49 raw_spin_lock(&desc->lock); 50 } while (irqd_irq_inprogress(&desc->irq_data)); 51 /* Might have been disabled in meantime */ 52 return !irqd_irq_disabled(&desc->irq_data) && desc->action; 53 #else 54 return false; 55 #endif 56 } 57 58 59 /* 60 * Recovery handler for misrouted interrupts. 61 */ 62 static int try_one_irq(struct irq_desc *desc, bool force) 63 { 64 irqreturn_t ret = IRQ_NONE; 65 struct irqaction *action; 66 67 raw_spin_lock(&desc->lock); 68 69 /* 70 * PER_CPU, nested thread interrupts and interrupts explicitly 71 * marked polled are excluded from polling. 72 */ 73 if (irq_settings_is_per_cpu(desc) || 74 irq_settings_is_nested_thread(desc) || 75 irq_settings_is_polled(desc)) 76 goto out; 77 78 /* 79 * Do not poll disabled interrupts unless the spurious 80 * disabled poller asks explicitly. 81 */ 82 if (irqd_irq_disabled(&desc->irq_data) && !force) 83 goto out; 84 85 /* 86 * All handlers must agree on IRQF_SHARED, so we test just the 87 * first. 88 */ 89 action = desc->action; 90 if (!action || !(action->flags & IRQF_SHARED) || 91 (action->flags & __IRQF_TIMER)) 92 goto out; 93 94 /* Already running on another processor */ 95 if (irqd_irq_inprogress(&desc->irq_data)) { 96 /* 97 * Already running: If it is shared get the other 98 * CPU to go looking for our mystery interrupt too 99 */ 100 desc->istate |= IRQS_PENDING; 101 goto out; 102 } 103 104 /* Mark it poll in progress */ 105 desc->istate |= IRQS_POLL_INPROGRESS; 106 do { 107 if (handle_irq_event(desc) == IRQ_HANDLED) 108 ret = IRQ_HANDLED; 109 /* Make sure that there is still a valid action */ 110 action = desc->action; 111 } while ((desc->istate & IRQS_PENDING) && action); 112 desc->istate &= ~IRQS_POLL_INPROGRESS; 113 out: 114 raw_spin_unlock(&desc->lock); 115 return ret == IRQ_HANDLED; 116 } 117 118 static int misrouted_irq(int irq) 119 { 120 struct irq_desc *desc; 121 int i, ok = 0; 122 123 if (atomic_inc_return(&irq_poll_active) != 1) 124 goto out; 125 126 irq_poll_cpu = smp_processor_id(); 127 128 for_each_irq_desc(i, desc) { 129 if (!i) 130 continue; 131 132 if (i == irq) /* Already tried */ 133 continue; 134 135 if (try_one_irq(desc, false)) 136 ok = 1; 137 } 138 out: 139 atomic_dec(&irq_poll_active); 140 /* So the caller can adjust the irq error counts */ 141 return ok; 142 } 143 144 static void poll_spurious_irqs(struct timer_list *unused) 145 { 146 struct irq_desc *desc; 147 int i; 148 149 if (atomic_inc_return(&irq_poll_active) != 1) 150 goto out; 151 irq_poll_cpu = smp_processor_id(); 152 153 for_each_irq_desc(i, desc) { 154 unsigned int state; 155 156 if (!i) 157 continue; 158 159 /* Racy but it doesn't matter */ 160 state = desc->istate; 161 barrier(); 162 if (!(state & IRQS_SPURIOUS_DISABLED)) 163 continue; 164 165 local_irq_disable(); 166 try_one_irq(desc, true); 167 local_irq_enable(); 168 } 169 out: 170 atomic_dec(&irq_poll_active); 171 mod_timer(&poll_spurious_irq_timer, 172 jiffies + POLL_SPURIOUS_IRQ_INTERVAL); 173 } 174 175 static inline int bad_action_ret(irqreturn_t action_ret) 176 { 177 unsigned int r = action_ret; 178 179 if (likely(r <= (IRQ_HANDLED | IRQ_WAKE_THREAD))) 180 return 0; 181 return 1; 182 } 183 184 /* 185 * If 99,900 of the previous 100,000 interrupts have not been handled 186 * then assume that the IRQ is stuck in some manner. Drop a diagnostic 187 * and try to turn the IRQ off. 188 * 189 * (The other 100-of-100,000 interrupts may have been a correctly 190 * functioning device sharing an IRQ with the failing one) 191 */ 192 static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret) 193 { 194 unsigned int irq = irq_desc_get_irq(desc); 195 struct irqaction *action; 196 unsigned long flags; 197 198 if (bad_action_ret(action_ret)) { 199 printk(KERN_ERR "irq event %d: bogus return value %x\n", 200 irq, action_ret); 201 } else { 202 printk(KERN_ERR "irq %d: nobody cared (try booting with " 203 "the \"irqpoll\" option)\n", irq); 204 } 205 dump_stack(); 206 printk(KERN_ERR "handlers:\n"); 207 208 /* 209 * We need to take desc->lock here. note_interrupt() is called 210 * w/o desc->lock held, but IRQ_PROGRESS set. We might race 211 * with something else removing an action. It's ok to take 212 * desc->lock here. See synchronize_irq(). 213 */ 214 raw_spin_lock_irqsave(&desc->lock, flags); 215 for_each_action_of_desc(desc, action) { 216 printk(KERN_ERR "[<%p>] %ps", action->handler, action->handler); 217 if (action->thread_fn) 218 printk(KERN_CONT " threaded [<%p>] %ps", 219 action->thread_fn, action->thread_fn); 220 printk(KERN_CONT "\n"); 221 } 222 raw_spin_unlock_irqrestore(&desc->lock, flags); 223 } 224 225 static void report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret) 226 { 227 static int count = 100; 228 229 if (count > 0) { 230 count--; 231 __report_bad_irq(desc, action_ret); 232 } 233 } 234 235 static inline int 236 try_misrouted_irq(unsigned int irq, struct irq_desc *desc, 237 irqreturn_t action_ret) 238 { 239 struct irqaction *action; 240 241 if (!irqfixup) 242 return 0; 243 244 /* We didn't actually handle the IRQ - see if it was misrouted? */ 245 if (action_ret == IRQ_NONE) 246 return 1; 247 248 /* 249 * But for 'irqfixup == 2' we also do it for handled interrupts if 250 * they are marked as IRQF_IRQPOLL (or for irq zero, which is the 251 * traditional PC timer interrupt.. Legacy) 252 */ 253 if (irqfixup < 2) 254 return 0; 255 256 if (!irq) 257 return 1; 258 259 /* 260 * Since we don't get the descriptor lock, "action" can 261 * change under us. We don't really care, but we don't 262 * want to follow a NULL pointer. So tell the compiler to 263 * just load it once by using a barrier. 264 */ 265 action = desc->action; 266 barrier(); 267 return action && (action->flags & IRQF_IRQPOLL); 268 } 269 270 #define SPURIOUS_DEFERRED 0x80000000 271 272 void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret) 273 { 274 unsigned int irq; 275 276 if (desc->istate & IRQS_POLL_INPROGRESS || 277 irq_settings_is_polled(desc)) 278 return; 279 280 if (bad_action_ret(action_ret)) { 281 report_bad_irq(desc, action_ret); 282 return; 283 } 284 285 /* 286 * We cannot call note_interrupt from the threaded handler 287 * because we need to look at the compound of all handlers 288 * (primary and threaded). Aside of that in the threaded 289 * shared case we have no serialization against an incoming 290 * hardware interrupt while we are dealing with a threaded 291 * result. 292 * 293 * So in case a thread is woken, we just note the fact and 294 * defer the analysis to the next hardware interrupt. 295 * 296 * The threaded handlers store whether they successfully 297 * handled an interrupt and we check whether that number 298 * changed versus the last invocation. 299 * 300 * We could handle all interrupts with the delayed by one 301 * mechanism, but for the non forced threaded case we'd just 302 * add pointless overhead to the straight hardirq interrupts 303 * for the sake of a few lines less code. 304 */ 305 if (action_ret & IRQ_WAKE_THREAD) { 306 /* 307 * There is a thread woken. Check whether one of the 308 * shared primary handlers returned IRQ_HANDLED. If 309 * not we defer the spurious detection to the next 310 * interrupt. 311 */ 312 if (action_ret == IRQ_WAKE_THREAD) { 313 int handled; 314 /* 315 * We use bit 31 of thread_handled_last to 316 * denote the deferred spurious detection 317 * active. No locking necessary as 318 * thread_handled_last is only accessed here 319 * and we have the guarantee that hard 320 * interrupts are not reentrant. 321 */ 322 if (!(desc->threads_handled_last & SPURIOUS_DEFERRED)) { 323 desc->threads_handled_last |= SPURIOUS_DEFERRED; 324 return; 325 } 326 /* 327 * Check whether one of the threaded handlers 328 * returned IRQ_HANDLED since the last 329 * interrupt happened. 330 * 331 * For simplicity we just set bit 31, as it is 332 * set in threads_handled_last as well. So we 333 * avoid extra masking. And we really do not 334 * care about the high bits of the handled 335 * count. We just care about the count being 336 * different than the one we saw before. 337 */ 338 handled = atomic_read(&desc->threads_handled); 339 handled |= SPURIOUS_DEFERRED; 340 if (handled != desc->threads_handled_last) { 341 action_ret = IRQ_HANDLED; 342 /* 343 * Note: We keep the SPURIOUS_DEFERRED 344 * bit set. We are handling the 345 * previous invocation right now. 346 * Keep it for the current one, so the 347 * next hardware interrupt will 348 * account for it. 349 */ 350 desc->threads_handled_last = handled; 351 } else { 352 /* 353 * None of the threaded handlers felt 354 * responsible for the last interrupt 355 * 356 * We keep the SPURIOUS_DEFERRED bit 357 * set in threads_handled_last as we 358 * need to account for the current 359 * interrupt as well. 360 */ 361 action_ret = IRQ_NONE; 362 } 363 } else { 364 /* 365 * One of the primary handlers returned 366 * IRQ_HANDLED. So we don't care about the 367 * threaded handlers on the same line. Clear 368 * the deferred detection bit. 369 * 370 * In theory we could/should check whether the 371 * deferred bit is set and take the result of 372 * the previous run into account here as 373 * well. But it's really not worth the 374 * trouble. If every other interrupt is 375 * handled we never trigger the spurious 376 * detector. And if this is just the one out 377 * of 100k unhandled ones which is handled 378 * then we merily delay the spurious detection 379 * by one hard interrupt. Not a real problem. 380 */ 381 desc->threads_handled_last &= ~SPURIOUS_DEFERRED; 382 } 383 } 384 385 if (unlikely(action_ret == IRQ_NONE)) { 386 /* 387 * If we are seeing only the odd spurious IRQ caused by 388 * bus asynchronicity then don't eventually trigger an error, 389 * otherwise the counter becomes a doomsday timer for otherwise 390 * working systems 391 */ 392 if (time_after(jiffies, desc->last_unhandled + HZ/10)) 393 desc->irqs_unhandled = 1; 394 else 395 desc->irqs_unhandled++; 396 desc->last_unhandled = jiffies; 397 } 398 399 irq = irq_desc_get_irq(desc); 400 if (unlikely(try_misrouted_irq(irq, desc, action_ret))) { 401 int ok = misrouted_irq(irq); 402 if (action_ret == IRQ_NONE) 403 desc->irqs_unhandled -= ok; 404 } 405 406 if (likely(!desc->irqs_unhandled)) 407 return; 408 409 /* Now getting into unhandled irq detection */ 410 desc->irq_count++; 411 if (likely(desc->irq_count < 100000)) 412 return; 413 414 desc->irq_count = 0; 415 if (unlikely(desc->irqs_unhandled > 99900)) { 416 /* 417 * The interrupt is stuck 418 */ 419 __report_bad_irq(desc, action_ret); 420 /* 421 * Now kill the IRQ 422 */ 423 printk(KERN_EMERG "Disabling IRQ #%d\n", irq); 424 desc->istate |= IRQS_SPURIOUS_DISABLED; 425 desc->depth++; 426 irq_disable(desc); 427 428 mod_timer(&poll_spurious_irq_timer, 429 jiffies + POLL_SPURIOUS_IRQ_INTERVAL); 430 } 431 desc->irqs_unhandled = 0; 432 } 433 434 bool noirqdebug __read_mostly; 435 436 int noirqdebug_setup(char *str) 437 { 438 noirqdebug = 1; 439 printk(KERN_INFO "IRQ lockup detection disabled\n"); 440 441 return 1; 442 } 443 444 __setup("noirqdebug", noirqdebug_setup); 445 module_param(noirqdebug, bool, 0644); 446 MODULE_PARM_DESC(noirqdebug, "Disable irq lockup detection when true"); 447 448 static int __init irqfixup_setup(char *str) 449 { 450 if (IS_ENABLED(CONFIG_PREEMPT_RT)) { 451 pr_warn("irqfixup boot option not supported with PREEMPT_RT\n"); 452 return 1; 453 } 454 irqfixup = 1; 455 printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n"); 456 printk(KERN_WARNING "This may impact system performance.\n"); 457 458 return 1; 459 } 460 461 __setup("irqfixup", irqfixup_setup); 462 module_param(irqfixup, int, 0644); 463 464 static int __init irqpoll_setup(char *str) 465 { 466 if (IS_ENABLED(CONFIG_PREEMPT_RT)) { 467 pr_warn("irqpoll boot option not supported with PREEMPT_RT\n"); 468 return 1; 469 } 470 irqfixup = 2; 471 printk(KERN_WARNING "Misrouted IRQ fixup and polling support " 472 "enabled\n"); 473 printk(KERN_WARNING "This may significantly impact system " 474 "performance\n"); 475 return 1; 476 } 477 478 __setup("irqpoll", irqpoll_setup); 479