1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Common interrupt code for 32 and 64 bit 4 */ 5 #include <linux/cpu.h> 6 #include <linux/interrupt.h> 7 #include <linux/kernel_stat.h> 8 #include <linux/of.h> 9 #include <linux/seq_file.h> 10 #include <linux/smp.h> 11 #include <linux/ftrace.h> 12 #include <linux/delay.h> 13 #include <linux/export.h> 14 #include <linux/irq.h> 15 #include <linux/kvm_types.h> 16 17 #include <asm/irq_stack.h> 18 #include <asm/apic.h> 19 #include <asm/io_apic.h> 20 #include <asm/irq.h> 21 #include <asm/mce.h> 22 #include <asm/hw_irq.h> 23 #include <asm/desc.h> 24 #include <asm/traps.h> 25 #include <asm/thermal.h> 26 #include <asm/posted_intr.h> 27 #include <asm/irq_remapping.h> 28 29 #if defined(CONFIG_X86_LOCAL_APIC) || defined(CONFIG_X86_THERMAL_VECTOR) 30 #define CREATE_TRACE_POINTS 31 #include <asm/trace/irq_vectors.h> 32 #endif 33 34 DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); 35 EXPORT_PER_CPU_SYMBOL(irq_stat); 36 37 DEFINE_PER_CPU_CACHE_HOT(u16, __softirq_pending); 38 EXPORT_PER_CPU_SYMBOL(__softirq_pending); 39 40 DEFINE_PER_CPU_CACHE_HOT(struct irq_stack *, hardirq_stack_ptr); 41 42 atomic_t irq_err_count; 43 44 /* 45 * 'what should we do if we get a hw irq event on an illegal vector'. 46 * each architecture has to answer this themselves. 47 */ 48 void ack_bad_irq(unsigned int irq) 49 { 50 if (printk_ratelimit()) 51 pr_err("unexpected IRQ trap at vector %02x\n", irq); 52 53 /* 54 * Currently unexpected vectors happen only on SMP and APIC. 55 * We _must_ ack these because every local APIC has only N 56 * irq slots per priority level, and a 'hanging, unacked' IRQ 57 * holds up an irq slot - in excessive cases (when multiple 58 * unexpected vectors occur) that might lock up the APIC 59 * completely. 60 * But only ack when the APIC is enabled -AK 61 */ 62 apic_eoi(); 63 } 64 65 #define irq_stats(x) (&per_cpu(irq_stat, x)) 66 /* 67 * /proc/interrupts printing for arch specific interrupts 68 */ 69 int arch_show_interrupts(struct seq_file *p, int prec) 70 { 71 int j; 72 73 seq_printf(p, "%*s: ", prec, "NMI"); 74 for_each_online_cpu(j) 75 seq_printf(p, "%10u ", irq_stats(j)->__nmi_count); 76 seq_puts(p, " Non-maskable interrupts\n"); 77 #ifdef CONFIG_X86_LOCAL_APIC 78 seq_printf(p, "%*s: ", prec, "LOC"); 79 for_each_online_cpu(j) 80 seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs); 81 seq_puts(p, " Local timer interrupts\n"); 82 83 seq_printf(p, "%*s: ", prec, "SPU"); 84 for_each_online_cpu(j) 85 seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); 86 seq_puts(p, " Spurious interrupts\n"); 87 seq_printf(p, "%*s: ", prec, "PMI"); 88 for_each_online_cpu(j) 89 seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); 90 seq_puts(p, " Performance monitoring interrupts\n"); 91 seq_printf(p, "%*s: ", prec, "IWI"); 92 for_each_online_cpu(j) 93 seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs); 94 seq_puts(p, " IRQ work interrupts\n"); 95 seq_printf(p, "%*s: ", prec, "RTR"); 96 for_each_online_cpu(j) 97 seq_printf(p, "%10u ", irq_stats(j)->icr_read_retry_count); 98 seq_puts(p, " APIC ICR read retries\n"); 99 if (x86_platform_ipi_callback) { 100 seq_printf(p, "%*s: ", prec, "PLT"); 101 for_each_online_cpu(j) 102 seq_printf(p, "%10u ", irq_stats(j)->x86_platform_ipis); 103 seq_puts(p, " Platform interrupts\n"); 104 } 105 #endif 106 #ifdef CONFIG_SMP 107 seq_printf(p, "%*s: ", prec, "RES"); 108 for_each_online_cpu(j) 109 seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count); 110 seq_puts(p, " Rescheduling interrupts\n"); 111 seq_printf(p, "%*s: ", prec, "CAL"); 112 for_each_online_cpu(j) 113 seq_printf(p, "%10u ", irq_stats(j)->irq_call_count); 114 seq_puts(p, " Function call interrupts\n"); 115 seq_printf(p, "%*s: ", prec, "TLB"); 116 for_each_online_cpu(j) 117 seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count); 118 seq_puts(p, " TLB shootdowns\n"); 119 #endif 120 #ifdef CONFIG_X86_THERMAL_VECTOR 121 seq_printf(p, "%*s: ", prec, "TRM"); 122 for_each_online_cpu(j) 123 seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); 124 seq_puts(p, " Thermal event interrupts\n"); 125 #endif 126 #ifdef CONFIG_X86_MCE_THRESHOLD 127 seq_printf(p, "%*s: ", prec, "THR"); 128 for_each_online_cpu(j) 129 seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); 130 seq_puts(p, " Threshold APIC interrupts\n"); 131 #endif 132 #ifdef CONFIG_X86_MCE_AMD 133 seq_printf(p, "%*s: ", prec, "DFR"); 134 for_each_online_cpu(j) 135 seq_printf(p, "%10u ", irq_stats(j)->irq_deferred_error_count); 136 seq_puts(p, " Deferred Error APIC interrupts\n"); 137 #endif 138 #ifdef CONFIG_X86_MCE 139 seq_printf(p, "%*s: ", prec, "MCE"); 140 for_each_online_cpu(j) 141 seq_printf(p, "%10u ", per_cpu(mce_exception_count, j)); 142 seq_puts(p, " Machine check exceptions\n"); 143 seq_printf(p, "%*s: ", prec, "MCP"); 144 for_each_online_cpu(j) 145 seq_printf(p, "%10u ", per_cpu(mce_poll_count, j)); 146 seq_puts(p, " Machine check polls\n"); 147 #endif 148 #ifdef CONFIG_X86_HV_CALLBACK_VECTOR 149 if (test_bit(HYPERVISOR_CALLBACK_VECTOR, system_vectors)) { 150 seq_printf(p, "%*s: ", prec, "HYP"); 151 for_each_online_cpu(j) 152 seq_printf(p, "%10u ", 153 irq_stats(j)->irq_hv_callback_count); 154 seq_puts(p, " Hypervisor callback interrupts\n"); 155 } 156 #endif 157 #if IS_ENABLED(CONFIG_HYPERV) 158 if (test_bit(HYPERV_REENLIGHTENMENT_VECTOR, system_vectors)) { 159 seq_printf(p, "%*s: ", prec, "HRE"); 160 for_each_online_cpu(j) 161 seq_printf(p, "%10u ", 162 irq_stats(j)->irq_hv_reenlightenment_count); 163 seq_puts(p, " Hyper-V reenlightenment interrupts\n"); 164 } 165 if (test_bit(HYPERV_STIMER0_VECTOR, system_vectors)) { 166 seq_printf(p, "%*s: ", prec, "HVS"); 167 for_each_online_cpu(j) 168 seq_printf(p, "%10u ", 169 irq_stats(j)->hyperv_stimer0_count); 170 seq_puts(p, " Hyper-V stimer0 interrupts\n"); 171 } 172 #endif 173 seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); 174 #if defined(CONFIG_X86_IO_APIC) 175 seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count)); 176 #endif 177 #if IS_ENABLED(CONFIG_KVM) 178 seq_printf(p, "%*s: ", prec, "PIN"); 179 for_each_online_cpu(j) 180 seq_printf(p, "%10u ", irq_stats(j)->kvm_posted_intr_ipis); 181 seq_puts(p, " Posted-interrupt notification event\n"); 182 183 seq_printf(p, "%*s: ", prec, "NPI"); 184 for_each_online_cpu(j) 185 seq_printf(p, "%10u ", 186 irq_stats(j)->kvm_posted_intr_nested_ipis); 187 seq_puts(p, " Nested posted-interrupt event\n"); 188 189 seq_printf(p, "%*s: ", prec, "PIW"); 190 for_each_online_cpu(j) 191 seq_printf(p, "%10u ", 192 irq_stats(j)->kvm_posted_intr_wakeup_ipis); 193 seq_puts(p, " Posted-interrupt wakeup event\n"); 194 #endif 195 #ifdef CONFIG_GUEST_PERF_EVENTS 196 seq_printf(p, "%*s: ", prec, "VPMI"); 197 for_each_online_cpu(j) 198 seq_printf(p, "%10u ", 199 irq_stats(j)->perf_guest_mediated_pmis); 200 seq_puts(p, " Perf Guest Mediated PMI\n"); 201 #endif 202 #ifdef CONFIG_X86_POSTED_MSI 203 seq_printf(p, "%*s: ", prec, "PMN"); 204 for_each_online_cpu(j) 205 seq_printf(p, "%10u ", 206 irq_stats(j)->posted_msi_notification_count); 207 seq_puts(p, " Posted MSI notification event\n"); 208 #endif 209 return 0; 210 } 211 212 /* 213 * /proc/stat helpers 214 */ 215 u64 arch_irq_stat_cpu(unsigned int cpu) 216 { 217 u64 sum = irq_stats(cpu)->__nmi_count; 218 219 #ifdef CONFIG_X86_LOCAL_APIC 220 sum += irq_stats(cpu)->apic_timer_irqs; 221 sum += irq_stats(cpu)->irq_spurious_count; 222 sum += irq_stats(cpu)->apic_perf_irqs; 223 sum += irq_stats(cpu)->apic_irq_work_irqs; 224 sum += irq_stats(cpu)->icr_read_retry_count; 225 if (x86_platform_ipi_callback) 226 sum += irq_stats(cpu)->x86_platform_ipis; 227 #endif 228 #ifdef CONFIG_SMP 229 sum += irq_stats(cpu)->irq_resched_count; 230 sum += irq_stats(cpu)->irq_call_count; 231 #endif 232 #ifdef CONFIG_X86_THERMAL_VECTOR 233 sum += irq_stats(cpu)->irq_thermal_count; 234 #endif 235 #ifdef CONFIG_X86_MCE_THRESHOLD 236 sum += irq_stats(cpu)->irq_threshold_count; 237 #endif 238 #ifdef CONFIG_X86_HV_CALLBACK_VECTOR 239 sum += irq_stats(cpu)->irq_hv_callback_count; 240 #endif 241 #if IS_ENABLED(CONFIG_HYPERV) 242 sum += irq_stats(cpu)->irq_hv_reenlightenment_count; 243 sum += irq_stats(cpu)->hyperv_stimer0_count; 244 #endif 245 #ifdef CONFIG_X86_MCE 246 sum += per_cpu(mce_exception_count, cpu); 247 sum += per_cpu(mce_poll_count, cpu); 248 #endif 249 return sum; 250 } 251 252 u64 arch_irq_stat(void) 253 { 254 u64 sum = atomic_read(&irq_err_count); 255 return sum; 256 } 257 258 static __always_inline void handle_irq(struct irq_desc *desc, 259 struct pt_regs *regs) 260 { 261 if (IS_ENABLED(CONFIG_X86_64)) 262 generic_handle_irq_desc(desc); 263 else 264 __handle_irq(desc, regs); 265 } 266 267 static struct irq_desc *reevaluate_vector(int vector) 268 { 269 struct irq_desc *desc = __this_cpu_read(vector_irq[vector]); 270 271 if (!IS_ERR_OR_NULL(desc)) 272 return desc; 273 274 if (desc == VECTOR_UNUSED) 275 pr_emerg_ratelimited("No irq handler for %d.%u\n", smp_processor_id(), vector); 276 else 277 __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); 278 return NULL; 279 } 280 281 static __always_inline bool call_irq_handler(int vector, struct pt_regs *regs) 282 { 283 struct irq_desc *desc = __this_cpu_read(vector_irq[vector]); 284 285 if (likely(!IS_ERR_OR_NULL(desc))) { 286 handle_irq(desc, regs); 287 return true; 288 } 289 290 /* 291 * Reevaluate with vector_lock held to prevent a race against 292 * request_irq() setting up the vector: 293 * 294 * CPU0 CPU1 295 * interrupt is raised in APIC IRR 296 * but not handled 297 * free_irq() 298 * per_cpu(vector_irq, CPU1)[vector] = VECTOR_SHUTDOWN; 299 * 300 * request_irq() common_interrupt() 301 * d = this_cpu_read(vector_irq[vector]); 302 * 303 * per_cpu(vector_irq, CPU1)[vector] = desc; 304 * 305 * if (d == VECTOR_SHUTDOWN) 306 * this_cpu_write(vector_irq[vector], VECTOR_UNUSED); 307 * 308 * This requires that the same vector on the same target CPU is 309 * handed out or that a spurious interrupt hits that CPU/vector. 310 */ 311 lock_vector_lock(); 312 desc = reevaluate_vector(vector); 313 unlock_vector_lock(); 314 315 if (!desc) 316 return false; 317 318 handle_irq(desc, regs); 319 return true; 320 } 321 322 /* 323 * common_interrupt() handles all normal device IRQ's (the special SMP 324 * cross-CPU interrupts have their own entry points). 325 */ 326 DEFINE_IDTENTRY_IRQ(common_interrupt) 327 { 328 struct pt_regs *old_regs = set_irq_regs(regs); 329 330 /* entry code tells RCU that we're not quiescent. Check it. */ 331 RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU"); 332 333 if (unlikely(!call_irq_handler(vector, regs))) 334 apic_eoi(); 335 336 set_irq_regs(old_regs); 337 } 338 339 #ifdef CONFIG_X86_LOCAL_APIC 340 /* Function pointer for generic interrupt vector handling */ 341 void (*x86_platform_ipi_callback)(void) = NULL; 342 /* 343 * Handler for X86_PLATFORM_IPI_VECTOR. 344 */ 345 DEFINE_IDTENTRY_SYSVEC(sysvec_x86_platform_ipi) 346 { 347 struct pt_regs *old_regs = set_irq_regs(regs); 348 349 apic_eoi(); 350 trace_x86_platform_ipi_entry(X86_PLATFORM_IPI_VECTOR); 351 inc_irq_stat(x86_platform_ipis); 352 if (x86_platform_ipi_callback) 353 x86_platform_ipi_callback(); 354 trace_x86_platform_ipi_exit(X86_PLATFORM_IPI_VECTOR); 355 set_irq_regs(old_regs); 356 } 357 #endif 358 359 #ifdef CONFIG_GUEST_PERF_EVENTS 360 /* 361 * Handler for PERF_GUEST_MEDIATED_PMI_VECTOR. 362 */ 363 DEFINE_IDTENTRY_SYSVEC(sysvec_perf_guest_mediated_pmi_handler) 364 { 365 apic_eoi(); 366 inc_irq_stat(perf_guest_mediated_pmis); 367 perf_guest_handle_mediated_pmi(); 368 } 369 #endif 370 371 #if IS_ENABLED(CONFIG_KVM) 372 static void dummy_handler(void) {} 373 static void (*kvm_posted_intr_wakeup_handler)(void) = dummy_handler; 374 375 void kvm_set_posted_intr_wakeup_handler(void (*handler)(void)) 376 { 377 if (handler) 378 kvm_posted_intr_wakeup_handler = handler; 379 else { 380 kvm_posted_intr_wakeup_handler = dummy_handler; 381 synchronize_rcu(); 382 } 383 } 384 EXPORT_SYMBOL_FOR_KVM(kvm_set_posted_intr_wakeup_handler); 385 386 /* 387 * Handler for POSTED_INTERRUPT_VECTOR. 388 */ 389 DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_ipi) 390 { 391 apic_eoi(); 392 inc_irq_stat(kvm_posted_intr_ipis); 393 } 394 395 /* 396 * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR. 397 */ 398 DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_posted_intr_wakeup_ipi) 399 { 400 apic_eoi(); 401 inc_irq_stat(kvm_posted_intr_wakeup_ipis); 402 kvm_posted_intr_wakeup_handler(); 403 } 404 405 /* 406 * Handler for POSTED_INTERRUPT_NESTED_VECTOR. 407 */ 408 DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_nested_ipi) 409 { 410 apic_eoi(); 411 inc_irq_stat(kvm_posted_intr_nested_ipis); 412 } 413 #endif 414 415 #ifdef CONFIG_X86_POSTED_MSI 416 417 /* Posted Interrupt Descriptors for coalesced MSIs to be posted */ 418 DEFINE_PER_CPU_ALIGNED(struct pi_desc, posted_msi_pi_desc); 419 static DEFINE_PER_CPU_CACHE_HOT(bool, posted_msi_handler_active); 420 421 void intel_posted_msi_init(void) 422 { 423 u32 destination, apic_id; 424 425 this_cpu_write(posted_msi_pi_desc.nv, POSTED_MSI_NOTIFICATION_VECTOR); 426 /* 427 * APIC destination ID is stored in bit 8:15 while in XAPIC mode. 428 * VT-d spec. CH 9.11 429 */ 430 apic_id = this_cpu_read(x86_cpu_to_apicid); 431 destination = x2apic_enabled() ? apic_id : apic_id << 8; 432 this_cpu_write(posted_msi_pi_desc.ndst, destination); 433 } 434 435 void intel_ack_posted_msi_irq(struct irq_data *irqd) 436 { 437 irq_move_irq(irqd); 438 439 /* 440 * Handle the rare case that irq_retrigger() raised the actual 441 * assigned vector on the target CPU, which means that it was not 442 * invoked via the posted MSI handler below. In that case APIC EOI 443 * is required as otherwise the ISR entry becomes stale and lower 444 * priority interrupts are never going to be delivered after that. 445 * 446 * If the posted handler invoked the device interrupt handler then 447 * the EOI would be premature because it would acknowledge the 448 * posted vector. 449 */ 450 if (unlikely(!__this_cpu_read(posted_msi_handler_active))) 451 apic_eoi(); 452 } 453 454 static __always_inline bool handle_pending_pir(unsigned long *pir, struct pt_regs *regs) 455 { 456 unsigned long pir_copy[NR_PIR_WORDS]; 457 int vec = FIRST_EXTERNAL_VECTOR; 458 459 if (!pi_harvest_pir(pir, pir_copy)) 460 return false; 461 462 for_each_set_bit_from(vec, pir_copy, FIRST_SYSTEM_VECTOR) 463 call_irq_handler(vec, regs); 464 465 return true; 466 } 467 468 /* 469 * Performance data shows that 3 is good enough to harvest 90+% of the 470 * benefit on high interrupt rate workloads. 471 */ 472 #define MAX_POSTED_MSI_COALESCING_LOOP 3 473 474 /* 475 * For MSIs that are delivered as posted interrupts, the CPU notifications 476 * can be coalesced if the MSIs arrive in high frequency bursts. 477 */ 478 DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi_notification) 479 { 480 struct pi_desc *pid = this_cpu_ptr(&posted_msi_pi_desc); 481 struct pt_regs *old_regs = set_irq_regs(regs); 482 483 /* Mark the handler active for intel_ack_posted_msi_irq() */ 484 __this_cpu_write(posted_msi_handler_active, true); 485 inc_irq_stat(posted_msi_notification_count); 486 irq_enter(); 487 488 /* 489 * Loop only MAX_POSTED_MSI_COALESCING_LOOP - 1 times here to take 490 * the final handle_pending_pir() invocation after clearing the 491 * outstanding notification bit into account. 492 */ 493 for (int i = 1; i < MAX_POSTED_MSI_COALESCING_LOOP; i++) { 494 if (!handle_pending_pir(pid->pir, regs)) 495 break; 496 } 497 498 /* 499 * Clear the outstanding notification bit to rearm the notification 500 * mechanism. 501 */ 502 pi_clear_on(pid); 503 504 /* 505 * Clearing the ON bit can race with a notification. Process the 506 * PIR bits one last time so that handling the new interrupts is 507 * not delayed until the next notification happens. 508 */ 509 handle_pending_pir(pid->pir, regs); 510 511 apic_eoi(); 512 irq_exit(); 513 __this_cpu_write(posted_msi_handler_active, false); 514 set_irq_regs(old_regs); 515 } 516 #endif /* X86_POSTED_MSI */ 517 518 #ifdef CONFIG_HOTPLUG_CPU 519 /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ 520 void fixup_irqs(void) 521 { 522 unsigned int vector; 523 struct irq_desc *desc; 524 struct irq_data *data; 525 struct irq_chip *chip; 526 527 irq_migrate_all_off_this_cpu(); 528 529 /* 530 * We can remove mdelay() and then send spurious interrupts to 531 * new cpu targets for all the irqs that were handled previously by 532 * this cpu. While it works, I have seen spurious interrupt messages 533 * (nothing wrong but still...). 534 * 535 * So for now, retain mdelay(1) and check the IRR and then send those 536 * interrupts to new targets as this cpu is already offlined... 537 */ 538 mdelay(1); 539 540 /* 541 * We can walk the vector array of this cpu without holding 542 * vector_lock because the cpu is already marked !online, so 543 * nothing else will touch it. 544 */ 545 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { 546 if (IS_ERR_OR_NULL(__this_cpu_read(vector_irq[vector]))) 547 continue; 548 549 if (is_vector_pending(vector)) { 550 desc = __this_cpu_read(vector_irq[vector]); 551 552 raw_spin_lock(&desc->lock); 553 data = irq_desc_get_irq_data(desc); 554 chip = irq_data_get_irq_chip(data); 555 if (chip->irq_retrigger) { 556 chip->irq_retrigger(data); 557 __this_cpu_write(vector_irq[vector], VECTOR_RETRIGGERED); 558 } 559 raw_spin_unlock(&desc->lock); 560 } 561 if (__this_cpu_read(vector_irq[vector]) != VECTOR_RETRIGGERED) 562 __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); 563 } 564 } 565 #endif 566 567 #ifdef CONFIG_X86_THERMAL_VECTOR 568 static void smp_thermal_vector(void) 569 { 570 if (x86_thermal_enabled()) 571 intel_thermal_interrupt(); 572 else 573 pr_err("CPU%d: Unexpected LVT thermal interrupt!\n", 574 smp_processor_id()); 575 } 576 577 DEFINE_IDTENTRY_SYSVEC(sysvec_thermal) 578 { 579 trace_thermal_apic_entry(THERMAL_APIC_VECTOR); 580 inc_irq_stat(irq_thermal_count); 581 smp_thermal_vector(); 582 trace_thermal_apic_exit(THERMAL_APIC_VECTOR); 583 apic_eoi(); 584 } 585 #endif 586