1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Common interrupt code for 32 and 64 bit 4 */ 5 #include <linux/cpu.h> 6 #include <linux/interrupt.h> 7 #include <linux/kernel_stat.h> 8 #include <linux/of.h> 9 #include <linux/seq_file.h> 10 #include <linux/smp.h> 11 #include <linux/ftrace.h> 12 #include <linux/delay.h> 13 #include <linux/export.h> 14 #include <linux/irq.h> 15 16 #include <asm/irq_stack.h> 17 #include <asm/apic.h> 18 #include <asm/io_apic.h> 19 #include <asm/irq.h> 20 #include <asm/mce.h> 21 #include <asm/hw_irq.h> 22 #include <asm/desc.h> 23 #include <asm/traps.h> 24 #include <asm/thermal.h> 25 #include <asm/posted_intr.h> 26 #include <asm/irq_remapping.h> 27 28 #define CREATE_TRACE_POINTS 29 #include <asm/trace/irq_vectors.h> 30 31 DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); 32 EXPORT_PER_CPU_SYMBOL(irq_stat); 33 34 atomic_t irq_err_count; 35 36 /* 37 * 'what should we do if we get a hw irq event on an illegal vector'. 38 * each architecture has to answer this themselves. 39 */ 40 void ack_bad_irq(unsigned int irq) 41 { 42 if (printk_ratelimit()) 43 pr_err("unexpected IRQ trap at vector %02x\n", irq); 44 45 /* 46 * Currently unexpected vectors happen only on SMP and APIC. 47 * We _must_ ack these because every local APIC has only N 48 * irq slots per priority level, and a 'hanging, unacked' IRQ 49 * holds up an irq slot - in excessive cases (when multiple 50 * unexpected vectors occur) that might lock up the APIC 51 * completely. 52 * But only ack when the APIC is enabled -AK 53 */ 54 apic_eoi(); 55 } 56 57 #define irq_stats(x) (&per_cpu(irq_stat, x)) 58 /* 59 * /proc/interrupts printing for arch specific interrupts 60 */ 61 int arch_show_interrupts(struct seq_file *p, int prec) 62 { 63 int j; 64 65 seq_printf(p, "%*s: ", prec, "NMI"); 66 for_each_online_cpu(j) 67 seq_printf(p, "%10u ", irq_stats(j)->__nmi_count); 68 seq_puts(p, " Non-maskable interrupts\n"); 69 #ifdef CONFIG_X86_LOCAL_APIC 70 seq_printf(p, "%*s: ", prec, "LOC"); 71 for_each_online_cpu(j) 72 seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs); 73 seq_puts(p, " Local timer interrupts\n"); 74 75 seq_printf(p, "%*s: ", prec, "SPU"); 76 for_each_online_cpu(j) 77 seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); 78 seq_puts(p, " Spurious interrupts\n"); 79 seq_printf(p, "%*s: ", prec, "PMI"); 80 for_each_online_cpu(j) 81 seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); 82 seq_puts(p, " Performance monitoring interrupts\n"); 83 seq_printf(p, "%*s: ", prec, "IWI"); 84 for_each_online_cpu(j) 85 seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs); 86 seq_puts(p, " IRQ work interrupts\n"); 87 seq_printf(p, "%*s: ", prec, "RTR"); 88 for_each_online_cpu(j) 89 seq_printf(p, "%10u ", irq_stats(j)->icr_read_retry_count); 90 seq_puts(p, " APIC ICR read retries\n"); 91 if (x86_platform_ipi_callback) { 92 seq_printf(p, "%*s: ", prec, "PLT"); 93 for_each_online_cpu(j) 94 seq_printf(p, "%10u ", irq_stats(j)->x86_platform_ipis); 95 seq_puts(p, " Platform interrupts\n"); 96 } 97 #endif 98 #ifdef CONFIG_SMP 99 seq_printf(p, "%*s: ", prec, "RES"); 100 for_each_online_cpu(j) 101 seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count); 102 seq_puts(p, " Rescheduling interrupts\n"); 103 seq_printf(p, "%*s: ", prec, "CAL"); 104 for_each_online_cpu(j) 105 seq_printf(p, "%10u ", irq_stats(j)->irq_call_count); 106 seq_puts(p, " Function call interrupts\n"); 107 seq_printf(p, "%*s: ", prec, "TLB"); 108 for_each_online_cpu(j) 109 seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count); 110 seq_puts(p, " TLB shootdowns\n"); 111 #endif 112 #ifdef CONFIG_X86_THERMAL_VECTOR 113 seq_printf(p, "%*s: ", prec, "TRM"); 114 for_each_online_cpu(j) 115 seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); 116 seq_puts(p, " Thermal event interrupts\n"); 117 #endif 118 #ifdef CONFIG_X86_MCE_THRESHOLD 119 seq_printf(p, "%*s: ", prec, "THR"); 120 for_each_online_cpu(j) 121 seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); 122 seq_puts(p, " Threshold APIC interrupts\n"); 123 #endif 124 #ifdef CONFIG_X86_MCE_AMD 125 seq_printf(p, "%*s: ", prec, "DFR"); 126 for_each_online_cpu(j) 127 seq_printf(p, "%10u ", irq_stats(j)->irq_deferred_error_count); 128 seq_puts(p, " Deferred Error APIC interrupts\n"); 129 #endif 130 #ifdef CONFIG_X86_MCE 131 seq_printf(p, "%*s: ", prec, "MCE"); 132 for_each_online_cpu(j) 133 seq_printf(p, "%10u ", per_cpu(mce_exception_count, j)); 134 seq_puts(p, " Machine check exceptions\n"); 135 seq_printf(p, "%*s: ", prec, "MCP"); 136 for_each_online_cpu(j) 137 seq_printf(p, "%10u ", per_cpu(mce_poll_count, j)); 138 seq_puts(p, " Machine check polls\n"); 139 #endif 140 #ifdef CONFIG_X86_HV_CALLBACK_VECTOR 141 if (test_bit(HYPERVISOR_CALLBACK_VECTOR, system_vectors)) { 142 seq_printf(p, "%*s: ", prec, "HYP"); 143 for_each_online_cpu(j) 144 seq_printf(p, "%10u ", 145 irq_stats(j)->irq_hv_callback_count); 146 seq_puts(p, " Hypervisor callback interrupts\n"); 147 } 148 #endif 149 #if IS_ENABLED(CONFIG_HYPERV) 150 if (test_bit(HYPERV_REENLIGHTENMENT_VECTOR, system_vectors)) { 151 seq_printf(p, "%*s: ", prec, "HRE"); 152 for_each_online_cpu(j) 153 seq_printf(p, "%10u ", 154 irq_stats(j)->irq_hv_reenlightenment_count); 155 seq_puts(p, " Hyper-V reenlightenment interrupts\n"); 156 } 157 if (test_bit(HYPERV_STIMER0_VECTOR, system_vectors)) { 158 seq_printf(p, "%*s: ", prec, "HVS"); 159 for_each_online_cpu(j) 160 seq_printf(p, "%10u ", 161 irq_stats(j)->hyperv_stimer0_count); 162 seq_puts(p, " Hyper-V stimer0 interrupts\n"); 163 } 164 #endif 165 seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); 166 #if defined(CONFIG_X86_IO_APIC) 167 seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count)); 168 #endif 169 #if IS_ENABLED(CONFIG_KVM) 170 seq_printf(p, "%*s: ", prec, "PIN"); 171 for_each_online_cpu(j) 172 seq_printf(p, "%10u ", irq_stats(j)->kvm_posted_intr_ipis); 173 seq_puts(p, " Posted-interrupt notification event\n"); 174 175 seq_printf(p, "%*s: ", prec, "NPI"); 176 for_each_online_cpu(j) 177 seq_printf(p, "%10u ", 178 irq_stats(j)->kvm_posted_intr_nested_ipis); 179 seq_puts(p, " Nested posted-interrupt event\n"); 180 181 seq_printf(p, "%*s: ", prec, "PIW"); 182 for_each_online_cpu(j) 183 seq_printf(p, "%10u ", 184 irq_stats(j)->kvm_posted_intr_wakeup_ipis); 185 seq_puts(p, " Posted-interrupt wakeup event\n"); 186 #endif 187 #ifdef CONFIG_X86_POSTED_MSI 188 seq_printf(p, "%*s: ", prec, "PMN"); 189 for_each_online_cpu(j) 190 seq_printf(p, "%10u ", 191 irq_stats(j)->posted_msi_notification_count); 192 seq_puts(p, " Posted MSI notification event\n"); 193 #endif 194 return 0; 195 } 196 197 /* 198 * /proc/stat helpers 199 */ 200 u64 arch_irq_stat_cpu(unsigned int cpu) 201 { 202 u64 sum = irq_stats(cpu)->__nmi_count; 203 204 #ifdef CONFIG_X86_LOCAL_APIC 205 sum += irq_stats(cpu)->apic_timer_irqs; 206 sum += irq_stats(cpu)->irq_spurious_count; 207 sum += irq_stats(cpu)->apic_perf_irqs; 208 sum += irq_stats(cpu)->apic_irq_work_irqs; 209 sum += irq_stats(cpu)->icr_read_retry_count; 210 if (x86_platform_ipi_callback) 211 sum += irq_stats(cpu)->x86_platform_ipis; 212 #endif 213 #ifdef CONFIG_SMP 214 sum += irq_stats(cpu)->irq_resched_count; 215 sum += irq_stats(cpu)->irq_call_count; 216 #endif 217 #ifdef CONFIG_X86_THERMAL_VECTOR 218 sum += irq_stats(cpu)->irq_thermal_count; 219 #endif 220 #ifdef CONFIG_X86_MCE_THRESHOLD 221 sum += irq_stats(cpu)->irq_threshold_count; 222 #endif 223 #ifdef CONFIG_X86_HV_CALLBACK_VECTOR 224 sum += irq_stats(cpu)->irq_hv_callback_count; 225 #endif 226 #if IS_ENABLED(CONFIG_HYPERV) 227 sum += irq_stats(cpu)->irq_hv_reenlightenment_count; 228 sum += irq_stats(cpu)->hyperv_stimer0_count; 229 #endif 230 #ifdef CONFIG_X86_MCE 231 sum += per_cpu(mce_exception_count, cpu); 232 sum += per_cpu(mce_poll_count, cpu); 233 #endif 234 return sum; 235 } 236 237 u64 arch_irq_stat(void) 238 { 239 u64 sum = atomic_read(&irq_err_count); 240 return sum; 241 } 242 243 static __always_inline void handle_irq(struct irq_desc *desc, 244 struct pt_regs *regs) 245 { 246 if (IS_ENABLED(CONFIG_X86_64)) 247 generic_handle_irq_desc(desc); 248 else 249 __handle_irq(desc, regs); 250 } 251 252 static __always_inline int call_irq_handler(int vector, struct pt_regs *regs) 253 { 254 struct irq_desc *desc; 255 int ret = 0; 256 257 desc = __this_cpu_read(vector_irq[vector]); 258 if (likely(!IS_ERR_OR_NULL(desc))) { 259 handle_irq(desc, regs); 260 } else { 261 ret = -EINVAL; 262 if (desc == VECTOR_UNUSED) { 263 pr_emerg_ratelimited("%s: %d.%u No irq handler for vector\n", 264 __func__, smp_processor_id(), 265 vector); 266 } else { 267 __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); 268 } 269 } 270 271 return ret; 272 } 273 274 /* 275 * common_interrupt() handles all normal device IRQ's (the special SMP 276 * cross-CPU interrupts have their own entry points). 277 */ 278 DEFINE_IDTENTRY_IRQ(common_interrupt) 279 { 280 struct pt_regs *old_regs = set_irq_regs(regs); 281 282 /* entry code tells RCU that we're not quiescent. Check it. */ 283 RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU"); 284 285 if (unlikely(call_irq_handler(vector, regs))) 286 apic_eoi(); 287 288 set_irq_regs(old_regs); 289 } 290 291 #ifdef CONFIG_X86_LOCAL_APIC 292 /* Function pointer for generic interrupt vector handling */ 293 void (*x86_platform_ipi_callback)(void) = NULL; 294 /* 295 * Handler for X86_PLATFORM_IPI_VECTOR. 296 */ 297 DEFINE_IDTENTRY_SYSVEC(sysvec_x86_platform_ipi) 298 { 299 struct pt_regs *old_regs = set_irq_regs(regs); 300 301 apic_eoi(); 302 trace_x86_platform_ipi_entry(X86_PLATFORM_IPI_VECTOR); 303 inc_irq_stat(x86_platform_ipis); 304 if (x86_platform_ipi_callback) 305 x86_platform_ipi_callback(); 306 trace_x86_platform_ipi_exit(X86_PLATFORM_IPI_VECTOR); 307 set_irq_regs(old_regs); 308 } 309 #endif 310 311 #if IS_ENABLED(CONFIG_KVM) 312 static void dummy_handler(void) {} 313 static void (*kvm_posted_intr_wakeup_handler)(void) = dummy_handler; 314 315 void kvm_set_posted_intr_wakeup_handler(void (*handler)(void)) 316 { 317 if (handler) 318 kvm_posted_intr_wakeup_handler = handler; 319 else { 320 kvm_posted_intr_wakeup_handler = dummy_handler; 321 synchronize_rcu(); 322 } 323 } 324 EXPORT_SYMBOL_GPL(kvm_set_posted_intr_wakeup_handler); 325 326 /* 327 * Handler for POSTED_INTERRUPT_VECTOR. 328 */ 329 DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_ipi) 330 { 331 apic_eoi(); 332 inc_irq_stat(kvm_posted_intr_ipis); 333 } 334 335 /* 336 * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR. 337 */ 338 DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_posted_intr_wakeup_ipi) 339 { 340 apic_eoi(); 341 inc_irq_stat(kvm_posted_intr_wakeup_ipis); 342 kvm_posted_intr_wakeup_handler(); 343 } 344 345 /* 346 * Handler for POSTED_INTERRUPT_NESTED_VECTOR. 347 */ 348 DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_nested_ipi) 349 { 350 apic_eoi(); 351 inc_irq_stat(kvm_posted_intr_nested_ipis); 352 } 353 #endif 354 355 #ifdef CONFIG_X86_POSTED_MSI 356 357 /* Posted Interrupt Descriptors for coalesced MSIs to be posted */ 358 DEFINE_PER_CPU_ALIGNED(struct pi_desc, posted_msi_pi_desc); 359 360 void intel_posted_msi_init(void) 361 { 362 u32 destination; 363 u32 apic_id; 364 365 this_cpu_write(posted_msi_pi_desc.nv, POSTED_MSI_NOTIFICATION_VECTOR); 366 367 /* 368 * APIC destination ID is stored in bit 8:15 while in XAPIC mode. 369 * VT-d spec. CH 9.11 370 */ 371 apic_id = this_cpu_read(x86_cpu_to_apicid); 372 destination = x2apic_enabled() ? apic_id : apic_id << 8; 373 this_cpu_write(posted_msi_pi_desc.ndst, destination); 374 } 375 376 /* 377 * De-multiplexing posted interrupts is on the performance path, the code 378 * below is written to optimize the cache performance based on the following 379 * considerations: 380 * 1.Posted interrupt descriptor (PID) fits in a cache line that is frequently 381 * accessed by both CPU and IOMMU. 382 * 2.During posted MSI processing, the CPU needs to do 64-bit read and xchg 383 * for checking and clearing posted interrupt request (PIR), a 256 bit field 384 * within the PID. 385 * 3.On the other side, the IOMMU does atomic swaps of the entire PID cache 386 * line when posting interrupts and setting control bits. 387 * 4.The CPU can access the cache line a magnitude faster than the IOMMU. 388 * 5.Each time the IOMMU does interrupt posting to the PIR will evict the PID 389 * cache line. The cache line states after each operation are as follows: 390 * CPU IOMMU PID Cache line state 391 * --------------------------------------------------------------- 392 *...read64 exclusive 393 *...lock xchg64 modified 394 *... post/atomic swap invalid 395 *...------------------------------------------------------------- 396 * 397 * To reduce L1 data cache miss, it is important to avoid contention with 398 * IOMMU's interrupt posting/atomic swap. Therefore, a copy of PIR is used 399 * to dispatch interrupt handlers. 400 * 401 * In addition, the code is trying to keep the cache line state consistent 402 * as much as possible. e.g. when making a copy and clearing the PIR 403 * (assuming non-zero PIR bits are present in the entire PIR), it does: 404 * read, read, read, read, xchg, xchg, xchg, xchg 405 * instead of: 406 * read, xchg, read, xchg, read, xchg, read, xchg 407 */ 408 static __always_inline bool handle_pending_pir(u64 *pir, struct pt_regs *regs) 409 { 410 int i, vec = FIRST_EXTERNAL_VECTOR; 411 unsigned long pir_copy[4]; 412 bool handled = false; 413 414 for (i = 0; i < 4; i++) 415 pir_copy[i] = pir[i]; 416 417 for (i = 0; i < 4; i++) { 418 if (!pir_copy[i]) 419 continue; 420 421 pir_copy[i] = arch_xchg(&pir[i], 0); 422 handled = true; 423 } 424 425 if (handled) { 426 for_each_set_bit_from(vec, pir_copy, FIRST_SYSTEM_VECTOR) 427 call_irq_handler(vec, regs); 428 } 429 430 return handled; 431 } 432 433 /* 434 * Performance data shows that 3 is good enough to harvest 90+% of the benefit 435 * on high IRQ rate workload. 436 */ 437 #define MAX_POSTED_MSI_COALESCING_LOOP 3 438 439 /* 440 * For MSIs that are delivered as posted interrupts, the CPU notifications 441 * can be coalesced if the MSIs arrive in high frequency bursts. 442 */ 443 DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi_notification) 444 { 445 struct pt_regs *old_regs = set_irq_regs(regs); 446 struct pi_desc *pid; 447 int i = 0; 448 449 pid = this_cpu_ptr(&posted_msi_pi_desc); 450 451 inc_irq_stat(posted_msi_notification_count); 452 irq_enter(); 453 454 /* 455 * Max coalescing count includes the extra round of handle_pending_pir 456 * after clearing the outstanding notification bit. Hence, at most 457 * MAX_POSTED_MSI_COALESCING_LOOP - 1 loops are executed here. 458 */ 459 while (++i < MAX_POSTED_MSI_COALESCING_LOOP) { 460 if (!handle_pending_pir(pid->pir64, regs)) 461 break; 462 } 463 464 /* 465 * Clear outstanding notification bit to allow new IRQ notifications, 466 * do this last to maximize the window of interrupt coalescing. 467 */ 468 pi_clear_on(pid); 469 470 /* 471 * There could be a race of PI notification and the clearing of ON bit, 472 * process PIR bits one last time such that handling the new interrupts 473 * are not delayed until the next IRQ. 474 */ 475 handle_pending_pir(pid->pir64, regs); 476 477 apic_eoi(); 478 irq_exit(); 479 set_irq_regs(old_regs); 480 } 481 #endif /* X86_POSTED_MSI */ 482 483 #ifdef CONFIG_HOTPLUG_CPU 484 /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ 485 void fixup_irqs(void) 486 { 487 unsigned int vector; 488 struct irq_desc *desc; 489 struct irq_data *data; 490 struct irq_chip *chip; 491 492 irq_migrate_all_off_this_cpu(); 493 494 /* 495 * We can remove mdelay() and then send spurious interrupts to 496 * new cpu targets for all the irqs that were handled previously by 497 * this cpu. While it works, I have seen spurious interrupt messages 498 * (nothing wrong but still...). 499 * 500 * So for now, retain mdelay(1) and check the IRR and then send those 501 * interrupts to new targets as this cpu is already offlined... 502 */ 503 mdelay(1); 504 505 /* 506 * We can walk the vector array of this cpu without holding 507 * vector_lock because the cpu is already marked !online, so 508 * nothing else will touch it. 509 */ 510 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { 511 if (IS_ERR_OR_NULL(__this_cpu_read(vector_irq[vector]))) 512 continue; 513 514 if (is_vector_pending(vector)) { 515 desc = __this_cpu_read(vector_irq[vector]); 516 517 raw_spin_lock(&desc->lock); 518 data = irq_desc_get_irq_data(desc); 519 chip = irq_data_get_irq_chip(data); 520 if (chip->irq_retrigger) { 521 chip->irq_retrigger(data); 522 __this_cpu_write(vector_irq[vector], VECTOR_RETRIGGERED); 523 } 524 raw_spin_unlock(&desc->lock); 525 } 526 if (__this_cpu_read(vector_irq[vector]) != VECTOR_RETRIGGERED) 527 __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); 528 } 529 } 530 #endif 531 532 #ifdef CONFIG_X86_THERMAL_VECTOR 533 static void smp_thermal_vector(void) 534 { 535 if (x86_thermal_enabled()) 536 intel_thermal_interrupt(); 537 else 538 pr_err("CPU%d: Unexpected LVT thermal interrupt!\n", 539 smp_processor_id()); 540 } 541 542 DEFINE_IDTENTRY_SYSVEC(sysvec_thermal) 543 { 544 trace_thermal_apic_entry(THERMAL_APIC_VECTOR); 545 inc_irq_stat(irq_thermal_count); 546 smp_thermal_vector(); 547 trace_thermal_apic_exit(THERMAL_APIC_VECTOR); 548 apic_eoi(); 549 } 550 #endif 551