1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Common interrupt code for 32 and 64 bit 4 */ 5 #include <linux/cpu.h> 6 #include <linux/interrupt.h> 7 #include <linux/kernel_stat.h> 8 #include <linux/of.h> 9 #include <linux/seq_file.h> 10 #include <linux/smp.h> 11 #include <linux/ftrace.h> 12 #include <linux/delay.h> 13 #include <linux/export.h> 14 #include <linux/irq.h> 15 #include <linux/kvm_types.h> 16 17 #include <asm/irq_stack.h> 18 #include <asm/apic.h> 19 #include <asm/io_apic.h> 20 #include <asm/irq.h> 21 #include <asm/mce.h> 22 #include <asm/hw_irq.h> 23 #include <asm/desc.h> 24 #include <asm/traps.h> 25 #include <asm/thermal.h> 26 #include <asm/posted_intr.h> 27 #include <asm/irq_remapping.h> 28 29 #if defined(CONFIG_X86_LOCAL_APIC) || defined(CONFIG_X86_THERMAL_VECTOR) 30 #define CREATE_TRACE_POINTS 31 #include <asm/trace/irq_vectors.h> 32 #endif 33 34 DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); 35 EXPORT_PER_CPU_SYMBOL(irq_stat); 36 37 DEFINE_PER_CPU_CACHE_HOT(u16, __softirq_pending); 38 EXPORT_PER_CPU_SYMBOL(__softirq_pending); 39 40 DEFINE_PER_CPU_CACHE_HOT(struct irq_stack *, hardirq_stack_ptr); 41 42 atomic_t irq_err_count; 43 44 /* 45 * 'what should we do if we get a hw irq event on an illegal vector'. 46 * each architecture has to answer this themselves. 47 */ 48 void ack_bad_irq(unsigned int irq) 49 { 50 if (printk_ratelimit()) 51 pr_err("unexpected IRQ trap at vector %02x\n", irq); 52 53 /* 54 * Currently unexpected vectors happen only on SMP and APIC. 55 * We _must_ ack these because every local APIC has only N 56 * irq slots per priority level, and a 'hanging, unacked' IRQ 57 * holds up an irq slot - in excessive cases (when multiple 58 * unexpected vectors occur) that might lock up the APIC 59 * completely. 60 * But only ack when the APIC is enabled -AK 61 */ 62 apic_eoi(); 63 } 64 65 #define irq_stats(x) (&per_cpu(irq_stat, x)) 66 /* 67 * /proc/interrupts printing for arch specific interrupts 68 */ 69 int arch_show_interrupts(struct seq_file *p, int prec) 70 { 71 int j; 72 73 seq_printf(p, "%*s: ", prec, "NMI"); 74 for_each_online_cpu(j) 75 seq_printf(p, "%10u ", irq_stats(j)->__nmi_count); 76 seq_puts(p, " Non-maskable interrupts\n"); 77 #ifdef CONFIG_X86_LOCAL_APIC 78 seq_printf(p, "%*s: ", prec, "LOC"); 79 for_each_online_cpu(j) 80 seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs); 81 seq_puts(p, " Local timer interrupts\n"); 82 83 seq_printf(p, "%*s: ", prec, "SPU"); 84 for_each_online_cpu(j) 85 seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); 86 seq_puts(p, " Spurious interrupts\n"); 87 seq_printf(p, "%*s: ", prec, "PMI"); 88 for_each_online_cpu(j) 89 seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); 90 seq_puts(p, " Performance monitoring interrupts\n"); 91 seq_printf(p, "%*s: ", prec, "IWI"); 92 for_each_online_cpu(j) 93 seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs); 94 seq_puts(p, " IRQ work interrupts\n"); 95 seq_printf(p, "%*s: ", prec, "RTR"); 96 for_each_online_cpu(j) 97 seq_printf(p, "%10u ", irq_stats(j)->icr_read_retry_count); 98 seq_puts(p, " APIC ICR read retries\n"); 99 if (x86_platform_ipi_callback) { 100 seq_printf(p, "%*s: ", prec, "PLT"); 101 for_each_online_cpu(j) 102 seq_printf(p, "%10u ", irq_stats(j)->x86_platform_ipis); 103 seq_puts(p, " Platform interrupts\n"); 104 } 105 #endif 106 #ifdef CONFIG_SMP 107 seq_printf(p, "%*s: ", prec, "RES"); 108 for_each_online_cpu(j) 109 seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count); 110 seq_puts(p, " Rescheduling interrupts\n"); 111 seq_printf(p, "%*s: ", prec, "CAL"); 112 for_each_online_cpu(j) 113 seq_printf(p, "%10u ", irq_stats(j)->irq_call_count); 114 seq_puts(p, " Function call interrupts\n"); 115 seq_printf(p, "%*s: ", prec, "TLB"); 116 for_each_online_cpu(j) 117 seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count); 118 seq_puts(p, " TLB shootdowns\n"); 119 #endif 120 #ifdef CONFIG_X86_THERMAL_VECTOR 121 seq_printf(p, "%*s: ", prec, "TRM"); 122 for_each_online_cpu(j) 123 seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); 124 seq_puts(p, " Thermal event interrupts\n"); 125 #endif 126 #ifdef CONFIG_X86_MCE_THRESHOLD 127 seq_printf(p, "%*s: ", prec, "THR"); 128 for_each_online_cpu(j) 129 seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); 130 seq_puts(p, " Threshold APIC interrupts\n"); 131 #endif 132 #ifdef CONFIG_X86_MCE_AMD 133 seq_printf(p, "%*s: ", prec, "DFR"); 134 for_each_online_cpu(j) 135 seq_printf(p, "%10u ", irq_stats(j)->irq_deferred_error_count); 136 seq_puts(p, " Deferred Error APIC interrupts\n"); 137 #endif 138 #ifdef CONFIG_X86_MCE 139 seq_printf(p, "%*s: ", prec, "MCE"); 140 for_each_online_cpu(j) 141 seq_printf(p, "%10u ", per_cpu(mce_exception_count, j)); 142 seq_puts(p, " Machine check exceptions\n"); 143 seq_printf(p, "%*s: ", prec, "MCP"); 144 for_each_online_cpu(j) 145 seq_printf(p, "%10u ", per_cpu(mce_poll_count, j)); 146 seq_puts(p, " Machine check polls\n"); 147 #endif 148 #ifdef CONFIG_X86_HV_CALLBACK_VECTOR 149 if (test_bit(HYPERVISOR_CALLBACK_VECTOR, system_vectors)) { 150 seq_printf(p, "%*s: ", prec, "HYP"); 151 for_each_online_cpu(j) 152 seq_printf(p, "%10u ", 153 irq_stats(j)->irq_hv_callback_count); 154 seq_puts(p, " Hypervisor callback interrupts\n"); 155 } 156 #endif 157 #if IS_ENABLED(CONFIG_HYPERV) 158 if (test_bit(HYPERV_REENLIGHTENMENT_VECTOR, system_vectors)) { 159 seq_printf(p, "%*s: ", prec, "HRE"); 160 for_each_online_cpu(j) 161 seq_printf(p, "%10u ", 162 irq_stats(j)->irq_hv_reenlightenment_count); 163 seq_puts(p, " Hyper-V reenlightenment interrupts\n"); 164 } 165 if (test_bit(HYPERV_STIMER0_VECTOR, system_vectors)) { 166 seq_printf(p, "%*s: ", prec, "HVS"); 167 for_each_online_cpu(j) 168 seq_printf(p, "%10u ", 169 irq_stats(j)->hyperv_stimer0_count); 170 seq_puts(p, " Hyper-V stimer0 interrupts\n"); 171 } 172 #endif 173 seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); 174 #if defined(CONFIG_X86_IO_APIC) 175 seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count)); 176 #endif 177 #if IS_ENABLED(CONFIG_KVM) 178 seq_printf(p, "%*s: ", prec, "PIN"); 179 for_each_online_cpu(j) 180 seq_printf(p, "%10u ", irq_stats(j)->kvm_posted_intr_ipis); 181 seq_puts(p, " Posted-interrupt notification event\n"); 182 183 seq_printf(p, "%*s: ", prec, "NPI"); 184 for_each_online_cpu(j) 185 seq_printf(p, "%10u ", 186 irq_stats(j)->kvm_posted_intr_nested_ipis); 187 seq_puts(p, " Nested posted-interrupt event\n"); 188 189 seq_printf(p, "%*s: ", prec, "PIW"); 190 for_each_online_cpu(j) 191 seq_printf(p, "%10u ", 192 irq_stats(j)->kvm_posted_intr_wakeup_ipis); 193 seq_puts(p, " Posted-interrupt wakeup event\n"); 194 #endif 195 #ifdef CONFIG_X86_POSTED_MSI 196 seq_printf(p, "%*s: ", prec, "PMN"); 197 for_each_online_cpu(j) 198 seq_printf(p, "%10u ", 199 irq_stats(j)->posted_msi_notification_count); 200 seq_puts(p, " Posted MSI notification event\n"); 201 #endif 202 return 0; 203 } 204 205 /* 206 * /proc/stat helpers 207 */ 208 u64 arch_irq_stat_cpu(unsigned int cpu) 209 { 210 u64 sum = irq_stats(cpu)->__nmi_count; 211 212 #ifdef CONFIG_X86_LOCAL_APIC 213 sum += irq_stats(cpu)->apic_timer_irqs; 214 sum += irq_stats(cpu)->irq_spurious_count; 215 sum += irq_stats(cpu)->apic_perf_irqs; 216 sum += irq_stats(cpu)->apic_irq_work_irqs; 217 sum += irq_stats(cpu)->icr_read_retry_count; 218 if (x86_platform_ipi_callback) 219 sum += irq_stats(cpu)->x86_platform_ipis; 220 #endif 221 #ifdef CONFIG_SMP 222 sum += irq_stats(cpu)->irq_resched_count; 223 sum += irq_stats(cpu)->irq_call_count; 224 #endif 225 #ifdef CONFIG_X86_THERMAL_VECTOR 226 sum += irq_stats(cpu)->irq_thermal_count; 227 #endif 228 #ifdef CONFIG_X86_MCE_THRESHOLD 229 sum += irq_stats(cpu)->irq_threshold_count; 230 #endif 231 #ifdef CONFIG_X86_HV_CALLBACK_VECTOR 232 sum += irq_stats(cpu)->irq_hv_callback_count; 233 #endif 234 #if IS_ENABLED(CONFIG_HYPERV) 235 sum += irq_stats(cpu)->irq_hv_reenlightenment_count; 236 sum += irq_stats(cpu)->hyperv_stimer0_count; 237 #endif 238 #ifdef CONFIG_X86_MCE 239 sum += per_cpu(mce_exception_count, cpu); 240 sum += per_cpu(mce_poll_count, cpu); 241 #endif 242 return sum; 243 } 244 245 u64 arch_irq_stat(void) 246 { 247 u64 sum = atomic_read(&irq_err_count); 248 return sum; 249 } 250 251 static __always_inline void handle_irq(struct irq_desc *desc, 252 struct pt_regs *regs) 253 { 254 if (IS_ENABLED(CONFIG_X86_64)) 255 generic_handle_irq_desc(desc); 256 else 257 __handle_irq(desc, regs); 258 } 259 260 static struct irq_desc *reevaluate_vector(int vector) 261 { 262 struct irq_desc *desc = __this_cpu_read(vector_irq[vector]); 263 264 if (!IS_ERR_OR_NULL(desc)) 265 return desc; 266 267 if (desc == VECTOR_UNUSED) 268 pr_emerg_ratelimited("No irq handler for %d.%u\n", smp_processor_id(), vector); 269 else 270 __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); 271 return NULL; 272 } 273 274 static __always_inline bool call_irq_handler(int vector, struct pt_regs *regs) 275 { 276 struct irq_desc *desc = __this_cpu_read(vector_irq[vector]); 277 278 if (likely(!IS_ERR_OR_NULL(desc))) { 279 handle_irq(desc, regs); 280 return true; 281 } 282 283 /* 284 * Reevaluate with vector_lock held to prevent a race against 285 * request_irq() setting up the vector: 286 * 287 * CPU0 CPU1 288 * interrupt is raised in APIC IRR 289 * but not handled 290 * free_irq() 291 * per_cpu(vector_irq, CPU1)[vector] = VECTOR_SHUTDOWN; 292 * 293 * request_irq() common_interrupt() 294 * d = this_cpu_read(vector_irq[vector]); 295 * 296 * per_cpu(vector_irq, CPU1)[vector] = desc; 297 * 298 * if (d == VECTOR_SHUTDOWN) 299 * this_cpu_write(vector_irq[vector], VECTOR_UNUSED); 300 * 301 * This requires that the same vector on the same target CPU is 302 * handed out or that a spurious interrupt hits that CPU/vector. 303 */ 304 lock_vector_lock(); 305 desc = reevaluate_vector(vector); 306 unlock_vector_lock(); 307 308 if (!desc) 309 return false; 310 311 handle_irq(desc, regs); 312 return true; 313 } 314 315 /* 316 * common_interrupt() handles all normal device IRQ's (the special SMP 317 * cross-CPU interrupts have their own entry points). 318 */ 319 DEFINE_IDTENTRY_IRQ(common_interrupt) 320 { 321 struct pt_regs *old_regs = set_irq_regs(regs); 322 323 /* entry code tells RCU that we're not quiescent. Check it. */ 324 RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU"); 325 326 if (unlikely(!call_irq_handler(vector, regs))) 327 apic_eoi(); 328 329 set_irq_regs(old_regs); 330 } 331 332 #ifdef CONFIG_X86_LOCAL_APIC 333 /* Function pointer for generic interrupt vector handling */ 334 void (*x86_platform_ipi_callback)(void) = NULL; 335 /* 336 * Handler for X86_PLATFORM_IPI_VECTOR. 337 */ 338 DEFINE_IDTENTRY_SYSVEC(sysvec_x86_platform_ipi) 339 { 340 struct pt_regs *old_regs = set_irq_regs(regs); 341 342 apic_eoi(); 343 trace_x86_platform_ipi_entry(X86_PLATFORM_IPI_VECTOR); 344 inc_irq_stat(x86_platform_ipis); 345 if (x86_platform_ipi_callback) 346 x86_platform_ipi_callback(); 347 trace_x86_platform_ipi_exit(X86_PLATFORM_IPI_VECTOR); 348 set_irq_regs(old_regs); 349 } 350 #endif 351 352 #if IS_ENABLED(CONFIG_KVM) 353 static void dummy_handler(void) {} 354 static void (*kvm_posted_intr_wakeup_handler)(void) = dummy_handler; 355 356 void kvm_set_posted_intr_wakeup_handler(void (*handler)(void)) 357 { 358 if (handler) 359 kvm_posted_intr_wakeup_handler = handler; 360 else { 361 kvm_posted_intr_wakeup_handler = dummy_handler; 362 synchronize_rcu(); 363 } 364 } 365 EXPORT_SYMBOL_FOR_KVM(kvm_set_posted_intr_wakeup_handler); 366 367 /* 368 * Handler for POSTED_INTERRUPT_VECTOR. 369 */ 370 DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_ipi) 371 { 372 apic_eoi(); 373 inc_irq_stat(kvm_posted_intr_ipis); 374 } 375 376 /* 377 * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR. 378 */ 379 DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_posted_intr_wakeup_ipi) 380 { 381 apic_eoi(); 382 inc_irq_stat(kvm_posted_intr_wakeup_ipis); 383 kvm_posted_intr_wakeup_handler(); 384 } 385 386 /* 387 * Handler for POSTED_INTERRUPT_NESTED_VECTOR. 388 */ 389 DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_nested_ipi) 390 { 391 apic_eoi(); 392 inc_irq_stat(kvm_posted_intr_nested_ipis); 393 } 394 #endif 395 396 #ifdef CONFIG_X86_POSTED_MSI 397 398 /* Posted Interrupt Descriptors for coalesced MSIs to be posted */ 399 DEFINE_PER_CPU_ALIGNED(struct pi_desc, posted_msi_pi_desc); 400 401 void intel_posted_msi_init(void) 402 { 403 u32 destination; 404 u32 apic_id; 405 406 this_cpu_write(posted_msi_pi_desc.nv, POSTED_MSI_NOTIFICATION_VECTOR); 407 408 /* 409 * APIC destination ID is stored in bit 8:15 while in XAPIC mode. 410 * VT-d spec. CH 9.11 411 */ 412 apic_id = this_cpu_read(x86_cpu_to_apicid); 413 destination = x2apic_enabled() ? apic_id : apic_id << 8; 414 this_cpu_write(posted_msi_pi_desc.ndst, destination); 415 } 416 417 static __always_inline bool handle_pending_pir(unsigned long *pir, struct pt_regs *regs) 418 { 419 unsigned long pir_copy[NR_PIR_WORDS]; 420 int vec = FIRST_EXTERNAL_VECTOR; 421 422 if (!pi_harvest_pir(pir, pir_copy)) 423 return false; 424 425 for_each_set_bit_from(vec, pir_copy, FIRST_SYSTEM_VECTOR) 426 call_irq_handler(vec, regs); 427 428 return true; 429 } 430 431 /* 432 * Performance data shows that 3 is good enough to harvest 90+% of the benefit 433 * on high IRQ rate workload. 434 */ 435 #define MAX_POSTED_MSI_COALESCING_LOOP 3 436 437 /* 438 * For MSIs that are delivered as posted interrupts, the CPU notifications 439 * can be coalesced if the MSIs arrive in high frequency bursts. 440 */ 441 DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi_notification) 442 { 443 struct pt_regs *old_regs = set_irq_regs(regs); 444 struct pi_desc *pid; 445 int i = 0; 446 447 pid = this_cpu_ptr(&posted_msi_pi_desc); 448 449 inc_irq_stat(posted_msi_notification_count); 450 irq_enter(); 451 452 /* 453 * Max coalescing count includes the extra round of handle_pending_pir 454 * after clearing the outstanding notification bit. Hence, at most 455 * MAX_POSTED_MSI_COALESCING_LOOP - 1 loops are executed here. 456 */ 457 while (++i < MAX_POSTED_MSI_COALESCING_LOOP) { 458 if (!handle_pending_pir(pid->pir, regs)) 459 break; 460 } 461 462 /* 463 * Clear outstanding notification bit to allow new IRQ notifications, 464 * do this last to maximize the window of interrupt coalescing. 465 */ 466 pi_clear_on(pid); 467 468 /* 469 * There could be a race of PI notification and the clearing of ON bit, 470 * process PIR bits one last time such that handling the new interrupts 471 * are not delayed until the next IRQ. 472 */ 473 handle_pending_pir(pid->pir, regs); 474 475 apic_eoi(); 476 irq_exit(); 477 set_irq_regs(old_regs); 478 } 479 #endif /* X86_POSTED_MSI */ 480 481 #ifdef CONFIG_HOTPLUG_CPU 482 /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ 483 void fixup_irqs(void) 484 { 485 unsigned int vector; 486 struct irq_desc *desc; 487 struct irq_data *data; 488 struct irq_chip *chip; 489 490 irq_migrate_all_off_this_cpu(); 491 492 /* 493 * We can remove mdelay() and then send spurious interrupts to 494 * new cpu targets for all the irqs that were handled previously by 495 * this cpu. While it works, I have seen spurious interrupt messages 496 * (nothing wrong but still...). 497 * 498 * So for now, retain mdelay(1) and check the IRR and then send those 499 * interrupts to new targets as this cpu is already offlined... 500 */ 501 mdelay(1); 502 503 /* 504 * We can walk the vector array of this cpu without holding 505 * vector_lock because the cpu is already marked !online, so 506 * nothing else will touch it. 507 */ 508 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { 509 if (IS_ERR_OR_NULL(__this_cpu_read(vector_irq[vector]))) 510 continue; 511 512 if (is_vector_pending(vector)) { 513 desc = __this_cpu_read(vector_irq[vector]); 514 515 raw_spin_lock(&desc->lock); 516 data = irq_desc_get_irq_data(desc); 517 chip = irq_data_get_irq_chip(data); 518 if (chip->irq_retrigger) { 519 chip->irq_retrigger(data); 520 __this_cpu_write(vector_irq[vector], VECTOR_RETRIGGERED); 521 } 522 raw_spin_unlock(&desc->lock); 523 } 524 if (__this_cpu_read(vector_irq[vector]) != VECTOR_RETRIGGERED) 525 __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); 526 } 527 } 528 #endif 529 530 #ifdef CONFIG_X86_THERMAL_VECTOR 531 static void smp_thermal_vector(void) 532 { 533 if (x86_thermal_enabled()) 534 intel_thermal_interrupt(); 535 else 536 pr_err("CPU%d: Unexpected LVT thermal interrupt!\n", 537 smp_processor_id()); 538 } 539 540 DEFINE_IDTENTRY_SYSVEC(sysvec_thermal) 541 { 542 trace_thermal_apic_entry(THERMAL_APIC_VECTOR); 543 inc_irq_stat(irq_thermal_count); 544 smp_thermal_vector(); 545 trace_thermal_apic_exit(THERMAL_APIC_VECTOR); 546 apic_eoi(); 547 } 548 #endif 549