1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Common interrupt code for 32 and 64 bit 4 */ 5 #include <linux/cpu.h> 6 #include <linux/interrupt.h> 7 #include <linux/kernel_stat.h> 8 #include <linux/of.h> 9 #include <linux/seq_file.h> 10 #include <linux/smp.h> 11 #include <linux/ftrace.h> 12 #include <linux/delay.h> 13 #include <linux/export.h> 14 #include <linux/irq.h> 15 #include <linux/kvm_types.h> 16 17 #include <asm/irq_stack.h> 18 #include <asm/apic.h> 19 #include <asm/io_apic.h> 20 #include <asm/irq.h> 21 #include <asm/mce.h> 22 #include <asm/hw_irq.h> 23 #include <asm/desc.h> 24 #include <asm/traps.h> 25 #include <asm/thermal.h> 26 #include <asm/posted_intr.h> 27 #include <asm/irq_remapping.h> 28 29 #if defined(CONFIG_X86_LOCAL_APIC) || defined(CONFIG_X86_THERMAL_VECTOR) 30 #define CREATE_TRACE_POINTS 31 #include <asm/trace/irq_vectors.h> 32 #endif 33 34 DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); 35 EXPORT_PER_CPU_SYMBOL(irq_stat); 36 37 DEFINE_PER_CPU_CACHE_HOT(u16, __softirq_pending); 38 EXPORT_PER_CPU_SYMBOL(__softirq_pending); 39 40 DEFINE_PER_CPU_CACHE_HOT(struct irq_stack *, hardirq_stack_ptr); 41 42 atomic_t irq_err_count; 43 44 /* 45 * 'what should we do if we get a hw irq event on an illegal vector'. 46 * each architecture has to answer this themselves. 47 */ 48 void ack_bad_irq(unsigned int irq) 49 { 50 if (printk_ratelimit()) 51 pr_err("unexpected IRQ trap at vector %02x\n", irq); 52 53 /* 54 * Currently unexpected vectors happen only on SMP and APIC. 55 * We _must_ ack these because every local APIC has only N 56 * irq slots per priority level, and a 'hanging, unacked' IRQ 57 * holds up an irq slot - in excessive cases (when multiple 58 * unexpected vectors occur) that might lock up the APIC 59 * completely. 60 * But only ack when the APIC is enabled -AK 61 */ 62 apic_eoi(); 63 } 64 65 #define irq_stats(x) (&per_cpu(irq_stat, x)) 66 /* 67 * /proc/interrupts printing for arch specific interrupts 68 */ 69 int arch_show_interrupts(struct seq_file *p, int prec) 70 { 71 int j; 72 73 seq_printf(p, "%*s: ", prec, "NMI"); 74 for_each_online_cpu(j) 75 seq_printf(p, "%10u ", irq_stats(j)->__nmi_count); 76 seq_puts(p, " Non-maskable interrupts\n"); 77 #ifdef CONFIG_X86_LOCAL_APIC 78 seq_printf(p, "%*s: ", prec, "LOC"); 79 for_each_online_cpu(j) 80 seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs); 81 seq_puts(p, " Local timer interrupts\n"); 82 83 seq_printf(p, "%*s: ", prec, "SPU"); 84 for_each_online_cpu(j) 85 seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); 86 seq_puts(p, " Spurious interrupts\n"); 87 seq_printf(p, "%*s: ", prec, "PMI"); 88 for_each_online_cpu(j) 89 seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); 90 seq_puts(p, " Performance monitoring interrupts\n"); 91 seq_printf(p, "%*s: ", prec, "IWI"); 92 for_each_online_cpu(j) 93 seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs); 94 seq_puts(p, " IRQ work interrupts\n"); 95 seq_printf(p, "%*s: ", prec, "RTR"); 96 for_each_online_cpu(j) 97 seq_printf(p, "%10u ", irq_stats(j)->icr_read_retry_count); 98 seq_puts(p, " APIC ICR read retries\n"); 99 if (x86_platform_ipi_callback) { 100 seq_printf(p, "%*s: ", prec, "PLT"); 101 for_each_online_cpu(j) 102 seq_printf(p, "%10u ", irq_stats(j)->x86_platform_ipis); 103 seq_puts(p, " Platform interrupts\n"); 104 } 105 #endif 106 #ifdef CONFIG_SMP 107 seq_printf(p, "%*s: ", prec, "RES"); 108 for_each_online_cpu(j) 109 seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count); 110 seq_puts(p, " Rescheduling interrupts\n"); 111 seq_printf(p, "%*s: ", prec, "CAL"); 112 for_each_online_cpu(j) 113 seq_printf(p, "%10u ", irq_stats(j)->irq_call_count); 114 seq_puts(p, " Function call interrupts\n"); 115 seq_printf(p, "%*s: ", prec, "TLB"); 116 for_each_online_cpu(j) 117 seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count); 118 seq_puts(p, " TLB shootdowns\n"); 119 #endif 120 #ifdef CONFIG_X86_THERMAL_VECTOR 121 seq_printf(p, "%*s: ", prec, "TRM"); 122 for_each_online_cpu(j) 123 seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); 124 seq_puts(p, " Thermal event interrupts\n"); 125 #endif 126 #ifdef CONFIG_X86_MCE_THRESHOLD 127 seq_printf(p, "%*s: ", prec, "THR"); 128 for_each_online_cpu(j) 129 seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); 130 seq_puts(p, " Threshold APIC interrupts\n"); 131 #endif 132 #ifdef CONFIG_X86_MCE_AMD 133 seq_printf(p, "%*s: ", prec, "DFR"); 134 for_each_online_cpu(j) 135 seq_printf(p, "%10u ", irq_stats(j)->irq_deferred_error_count); 136 seq_puts(p, " Deferred Error APIC interrupts\n"); 137 #endif 138 #ifdef CONFIG_X86_MCE 139 seq_printf(p, "%*s: ", prec, "MCE"); 140 for_each_online_cpu(j) 141 seq_printf(p, "%10u ", per_cpu(mce_exception_count, j)); 142 seq_puts(p, " Machine check exceptions\n"); 143 seq_printf(p, "%*s: ", prec, "MCP"); 144 for_each_online_cpu(j) 145 seq_printf(p, "%10u ", per_cpu(mce_poll_count, j)); 146 seq_puts(p, " Machine check polls\n"); 147 #endif 148 #ifdef CONFIG_X86_HV_CALLBACK_VECTOR 149 if (test_bit(HYPERVISOR_CALLBACK_VECTOR, system_vectors)) { 150 seq_printf(p, "%*s: ", prec, "HYP"); 151 for_each_online_cpu(j) 152 seq_printf(p, "%10u ", 153 irq_stats(j)->irq_hv_callback_count); 154 seq_puts(p, " Hypervisor callback interrupts\n"); 155 } 156 #endif 157 #if IS_ENABLED(CONFIG_HYPERV) 158 if (test_bit(HYPERV_REENLIGHTENMENT_VECTOR, system_vectors)) { 159 seq_printf(p, "%*s: ", prec, "HRE"); 160 for_each_online_cpu(j) 161 seq_printf(p, "%10u ", 162 irq_stats(j)->irq_hv_reenlightenment_count); 163 seq_puts(p, " Hyper-V reenlightenment interrupts\n"); 164 } 165 if (test_bit(HYPERV_STIMER0_VECTOR, system_vectors)) { 166 seq_printf(p, "%*s: ", prec, "HVS"); 167 for_each_online_cpu(j) 168 seq_printf(p, "%10u ", 169 irq_stats(j)->hyperv_stimer0_count); 170 seq_puts(p, " Hyper-V stimer0 interrupts\n"); 171 } 172 #endif 173 seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); 174 #if defined(CONFIG_X86_IO_APIC) 175 seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count)); 176 #endif 177 #if IS_ENABLED(CONFIG_KVM) 178 seq_printf(p, "%*s: ", prec, "PIN"); 179 for_each_online_cpu(j) 180 seq_printf(p, "%10u ", irq_stats(j)->kvm_posted_intr_ipis); 181 seq_puts(p, " Posted-interrupt notification event\n"); 182 183 seq_printf(p, "%*s: ", prec, "NPI"); 184 for_each_online_cpu(j) 185 seq_printf(p, "%10u ", 186 irq_stats(j)->kvm_posted_intr_nested_ipis); 187 seq_puts(p, " Nested posted-interrupt event\n"); 188 189 seq_printf(p, "%*s: ", prec, "PIW"); 190 for_each_online_cpu(j) 191 seq_printf(p, "%10u ", 192 irq_stats(j)->kvm_posted_intr_wakeup_ipis); 193 seq_puts(p, " Posted-interrupt wakeup event\n"); 194 #endif 195 #ifdef CONFIG_GUEST_PERF_EVENTS 196 seq_printf(p, "%*s: ", prec, "VPMI"); 197 for_each_online_cpu(j) 198 seq_printf(p, "%10u ", 199 irq_stats(j)->perf_guest_mediated_pmis); 200 seq_puts(p, " Perf Guest Mediated PMI\n"); 201 #endif 202 #ifdef CONFIG_X86_POSTED_MSI 203 seq_printf(p, "%*s: ", prec, "PMN"); 204 for_each_online_cpu(j) 205 seq_printf(p, "%10u ", 206 irq_stats(j)->posted_msi_notification_count); 207 seq_puts(p, " Posted MSI notification event\n"); 208 #endif 209 return 0; 210 } 211 212 /* 213 * /proc/stat helpers 214 */ 215 u64 arch_irq_stat_cpu(unsigned int cpu) 216 { 217 u64 sum = irq_stats(cpu)->__nmi_count; 218 219 #ifdef CONFIG_X86_LOCAL_APIC 220 sum += irq_stats(cpu)->apic_timer_irqs; 221 sum += irq_stats(cpu)->irq_spurious_count; 222 sum += irq_stats(cpu)->apic_perf_irqs; 223 sum += irq_stats(cpu)->apic_irq_work_irqs; 224 sum += irq_stats(cpu)->icr_read_retry_count; 225 if (x86_platform_ipi_callback) 226 sum += irq_stats(cpu)->x86_platform_ipis; 227 #endif 228 #ifdef CONFIG_SMP 229 sum += irq_stats(cpu)->irq_resched_count; 230 sum += irq_stats(cpu)->irq_call_count; 231 #endif 232 #ifdef CONFIG_X86_THERMAL_VECTOR 233 sum += irq_stats(cpu)->irq_thermal_count; 234 #endif 235 #ifdef CONFIG_X86_MCE_THRESHOLD 236 sum += irq_stats(cpu)->irq_threshold_count; 237 #endif 238 #ifdef CONFIG_X86_HV_CALLBACK_VECTOR 239 sum += irq_stats(cpu)->irq_hv_callback_count; 240 #endif 241 #if IS_ENABLED(CONFIG_HYPERV) 242 sum += irq_stats(cpu)->irq_hv_reenlightenment_count; 243 sum += irq_stats(cpu)->hyperv_stimer0_count; 244 #endif 245 #ifdef CONFIG_X86_MCE 246 sum += per_cpu(mce_exception_count, cpu); 247 sum += per_cpu(mce_poll_count, cpu); 248 #endif 249 return sum; 250 } 251 252 u64 arch_irq_stat(void) 253 { 254 u64 sum = atomic_read(&irq_err_count); 255 return sum; 256 } 257 258 static __always_inline void handle_irq(struct irq_desc *desc, 259 struct pt_regs *regs) 260 { 261 if (IS_ENABLED(CONFIG_X86_64)) 262 generic_handle_irq_desc(desc); 263 else 264 __handle_irq(desc, regs); 265 } 266 267 static struct irq_desc *reevaluate_vector(int vector) 268 { 269 struct irq_desc *desc = __this_cpu_read(vector_irq[vector]); 270 271 if (!IS_ERR_OR_NULL(desc)) 272 return desc; 273 274 if (desc == VECTOR_UNUSED) 275 pr_emerg_ratelimited("No irq handler for %d.%u\n", smp_processor_id(), vector); 276 else 277 __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); 278 return NULL; 279 } 280 281 static __always_inline bool call_irq_handler(int vector, struct pt_regs *regs) 282 { 283 struct irq_desc *desc = __this_cpu_read(vector_irq[vector]); 284 285 if (likely(!IS_ERR_OR_NULL(desc))) { 286 handle_irq(desc, regs); 287 return true; 288 } 289 290 /* 291 * Reevaluate with vector_lock held to prevent a race against 292 * request_irq() setting up the vector: 293 * 294 * CPU0 CPU1 295 * interrupt is raised in APIC IRR 296 * but not handled 297 * free_irq() 298 * per_cpu(vector_irq, CPU1)[vector] = VECTOR_SHUTDOWN; 299 * 300 * request_irq() common_interrupt() 301 * d = this_cpu_read(vector_irq[vector]); 302 * 303 * per_cpu(vector_irq, CPU1)[vector] = desc; 304 * 305 * if (d == VECTOR_SHUTDOWN) 306 * this_cpu_write(vector_irq[vector], VECTOR_UNUSED); 307 * 308 * This requires that the same vector on the same target CPU is 309 * handed out or that a spurious interrupt hits that CPU/vector. 310 */ 311 lock_vector_lock(); 312 desc = reevaluate_vector(vector); 313 unlock_vector_lock(); 314 315 if (!desc) 316 return false; 317 318 handle_irq(desc, regs); 319 return true; 320 } 321 322 /* 323 * common_interrupt() handles all normal device IRQ's (the special SMP 324 * cross-CPU interrupts have their own entry points). 325 */ 326 DEFINE_IDTENTRY_IRQ(common_interrupt) 327 { 328 struct pt_regs *old_regs = set_irq_regs(regs); 329 330 /* entry code tells RCU that we're not quiescent. Check it. */ 331 RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU"); 332 333 if (unlikely(!call_irq_handler(vector, regs))) 334 apic_eoi(); 335 336 set_irq_regs(old_regs); 337 } 338 339 #ifdef CONFIG_X86_LOCAL_APIC 340 /* Function pointer for generic interrupt vector handling */ 341 void (*x86_platform_ipi_callback)(void) = NULL; 342 /* 343 * Handler for X86_PLATFORM_IPI_VECTOR. 344 */ 345 DEFINE_IDTENTRY_SYSVEC(sysvec_x86_platform_ipi) 346 { 347 struct pt_regs *old_regs = set_irq_regs(regs); 348 349 apic_eoi(); 350 trace_x86_platform_ipi_entry(X86_PLATFORM_IPI_VECTOR); 351 inc_irq_stat(x86_platform_ipis); 352 if (x86_platform_ipi_callback) 353 x86_platform_ipi_callback(); 354 trace_x86_platform_ipi_exit(X86_PLATFORM_IPI_VECTOR); 355 set_irq_regs(old_regs); 356 } 357 #endif 358 359 #ifdef CONFIG_GUEST_PERF_EVENTS 360 /* 361 * Handler for PERF_GUEST_MEDIATED_PMI_VECTOR. 362 */ 363 DEFINE_IDTENTRY_SYSVEC(sysvec_perf_guest_mediated_pmi_handler) 364 { 365 apic_eoi(); 366 inc_irq_stat(perf_guest_mediated_pmis); 367 perf_guest_handle_mediated_pmi(); 368 } 369 #endif 370 371 #if IS_ENABLED(CONFIG_KVM) 372 static void dummy_handler(void) {} 373 static void (*kvm_posted_intr_wakeup_handler)(void) = dummy_handler; 374 375 void kvm_set_posted_intr_wakeup_handler(void (*handler)(void)) 376 { 377 if (handler) 378 kvm_posted_intr_wakeup_handler = handler; 379 else { 380 kvm_posted_intr_wakeup_handler = dummy_handler; 381 synchronize_rcu(); 382 } 383 } 384 EXPORT_SYMBOL_FOR_KVM(kvm_set_posted_intr_wakeup_handler); 385 386 /* 387 * Handler for POSTED_INTERRUPT_VECTOR. 388 */ 389 DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_ipi) 390 { 391 apic_eoi(); 392 inc_irq_stat(kvm_posted_intr_ipis); 393 } 394 395 /* 396 * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR. 397 */ 398 DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_posted_intr_wakeup_ipi) 399 { 400 apic_eoi(); 401 inc_irq_stat(kvm_posted_intr_wakeup_ipis); 402 kvm_posted_intr_wakeup_handler(); 403 } 404 405 /* 406 * Handler for POSTED_INTERRUPT_NESTED_VECTOR. 407 */ 408 DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_nested_ipi) 409 { 410 apic_eoi(); 411 inc_irq_stat(kvm_posted_intr_nested_ipis); 412 } 413 #endif 414 415 #ifdef CONFIG_X86_POSTED_MSI 416 417 /* Posted Interrupt Descriptors for coalesced MSIs to be posted */ 418 DEFINE_PER_CPU_ALIGNED(struct pi_desc, posted_msi_pi_desc); 419 static DEFINE_PER_CPU_CACHE_HOT(bool, posted_msi_handler_active); 420 421 void intel_posted_msi_init(void) 422 { 423 u32 destination; 424 u32 apic_id; 425 426 this_cpu_write(posted_msi_pi_desc.nv, POSTED_MSI_NOTIFICATION_VECTOR); 427 428 /* 429 * APIC destination ID is stored in bit 8:15 while in XAPIC mode. 430 * VT-d spec. CH 9.11 431 */ 432 apic_id = this_cpu_read(x86_cpu_to_apicid); 433 destination = x2apic_enabled() ? apic_id : apic_id << 8; 434 this_cpu_write(posted_msi_pi_desc.ndst, destination); 435 } 436 437 void intel_ack_posted_msi_irq(struct irq_data *irqd) 438 { 439 irq_move_irq(irqd); 440 441 /* 442 * Handle the rare case that irq_retrigger() raised the actual 443 * assigned vector on the target CPU, which means that it was not 444 * invoked via the posted MSI handler below. In that case APIC EOI 445 * is required as otherwise the ISR entry becomes stale and lower 446 * priority interrupts are never going to be delivered after that. 447 * 448 * If the posted handler invoked the device interrupt handler then 449 * the EOI would be premature because it would acknowledge the 450 * posted vector. 451 */ 452 if (unlikely(!__this_cpu_read(posted_msi_handler_active))) 453 apic_eoi(); 454 } 455 456 static __always_inline bool handle_pending_pir(unsigned long *pir, struct pt_regs *regs) 457 { 458 unsigned long pir_copy[NR_PIR_WORDS]; 459 int vec = FIRST_EXTERNAL_VECTOR; 460 461 if (!pi_harvest_pir(pir, pir_copy)) 462 return false; 463 464 for_each_set_bit_from(vec, pir_copy, FIRST_SYSTEM_VECTOR) 465 call_irq_handler(vec, regs); 466 467 return true; 468 } 469 470 /* 471 * Performance data shows that 3 is good enough to harvest 90+% of the benefit 472 * on high IRQ rate workload. 473 */ 474 #define MAX_POSTED_MSI_COALESCING_LOOP 3 475 476 /* 477 * For MSIs that are delivered as posted interrupts, the CPU notifications 478 * can be coalesced if the MSIs arrive in high frequency bursts. 479 */ 480 DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi_notification) 481 { 482 struct pt_regs *old_regs = set_irq_regs(regs); 483 struct pi_desc *pid; 484 int i = 0; 485 486 pid = this_cpu_ptr(&posted_msi_pi_desc); 487 488 /* Mark the handler active for intel_ack_posted_msi_irq() */ 489 __this_cpu_write(posted_msi_handler_active, true); 490 inc_irq_stat(posted_msi_notification_count); 491 irq_enter(); 492 493 /* 494 * Max coalescing count includes the extra round of handle_pending_pir 495 * after clearing the outstanding notification bit. Hence, at most 496 * MAX_POSTED_MSI_COALESCING_LOOP - 1 loops are executed here. 497 */ 498 while (++i < MAX_POSTED_MSI_COALESCING_LOOP) { 499 if (!handle_pending_pir(pid->pir, regs)) 500 break; 501 } 502 503 /* 504 * Clear outstanding notification bit to allow new IRQ notifications, 505 * do this last to maximize the window of interrupt coalescing. 506 */ 507 pi_clear_on(pid); 508 509 /* 510 * There could be a race of PI notification and the clearing of ON bit, 511 * process PIR bits one last time such that handling the new interrupts 512 * are not delayed until the next IRQ. 513 */ 514 handle_pending_pir(pid->pir, regs); 515 516 apic_eoi(); 517 irq_exit(); 518 __this_cpu_write(posted_msi_handler_active, false); 519 set_irq_regs(old_regs); 520 } 521 #endif /* X86_POSTED_MSI */ 522 523 #ifdef CONFIG_HOTPLUG_CPU 524 /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ 525 void fixup_irqs(void) 526 { 527 unsigned int vector; 528 struct irq_desc *desc; 529 struct irq_data *data; 530 struct irq_chip *chip; 531 532 irq_migrate_all_off_this_cpu(); 533 534 /* 535 * We can remove mdelay() and then send spurious interrupts to 536 * new cpu targets for all the irqs that were handled previously by 537 * this cpu. While it works, I have seen spurious interrupt messages 538 * (nothing wrong but still...). 539 * 540 * So for now, retain mdelay(1) and check the IRR and then send those 541 * interrupts to new targets as this cpu is already offlined... 542 */ 543 mdelay(1); 544 545 /* 546 * We can walk the vector array of this cpu without holding 547 * vector_lock because the cpu is already marked !online, so 548 * nothing else will touch it. 549 */ 550 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { 551 if (IS_ERR_OR_NULL(__this_cpu_read(vector_irq[vector]))) 552 continue; 553 554 if (is_vector_pending(vector)) { 555 desc = __this_cpu_read(vector_irq[vector]); 556 557 raw_spin_lock(&desc->lock); 558 data = irq_desc_get_irq_data(desc); 559 chip = irq_data_get_irq_chip(data); 560 if (chip->irq_retrigger) { 561 chip->irq_retrigger(data); 562 __this_cpu_write(vector_irq[vector], VECTOR_RETRIGGERED); 563 } 564 raw_spin_unlock(&desc->lock); 565 } 566 if (__this_cpu_read(vector_irq[vector]) != VECTOR_RETRIGGERED) 567 __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); 568 } 569 } 570 #endif 571 572 #ifdef CONFIG_X86_THERMAL_VECTOR 573 static void smp_thermal_vector(void) 574 { 575 if (x86_thermal_enabled()) 576 intel_thermal_interrupt(); 577 else 578 pr_err("CPU%d: Unexpected LVT thermal interrupt!\n", 579 smp_processor_id()); 580 } 581 582 DEFINE_IDTENTRY_SYSVEC(sysvec_thermal) 583 { 584 trace_thermal_apic_entry(THERMAL_APIC_VECTOR); 585 inc_irq_stat(irq_thermal_count); 586 smp_thermal_vector(); 587 trace_thermal_apic_exit(THERMAL_APIC_VECTOR); 588 apic_eoi(); 589 } 590 #endif 591