1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Common interrupt code for 32 and 64 bit 4 */ 5 #include <linux/cpu.h> 6 #include <linux/interrupt.h> 7 #include <linux/kernel_stat.h> 8 #include <linux/of.h> 9 #include <linux/seq_file.h> 10 #include <linux/smp.h> 11 #include <linux/ftrace.h> 12 #include <linux/delay.h> 13 #include <linux/export.h> 14 #include <linux/irq.h> 15 16 #include <asm/irq_stack.h> 17 #include <asm/apic.h> 18 #include <asm/io_apic.h> 19 #include <asm/irq.h> 20 #include <asm/mce.h> 21 #include <asm/hw_irq.h> 22 #include <asm/desc.h> 23 #include <asm/traps.h> 24 #include <asm/thermal.h> 25 #include <asm/posted_intr.h> 26 #include <asm/irq_remapping.h> 27 28 #if defined(CONFIG_X86_LOCAL_APIC) || defined(CONFIG_X86_THERMAL_VECTOR) 29 #define CREATE_TRACE_POINTS 30 #include <asm/trace/irq_vectors.h> 31 #endif 32 33 DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); 34 EXPORT_PER_CPU_SYMBOL(irq_stat); 35 36 atomic_t irq_err_count; 37 38 /* 39 * 'what should we do if we get a hw irq event on an illegal vector'. 40 * each architecture has to answer this themselves. 41 */ 42 void ack_bad_irq(unsigned int irq) 43 { 44 if (printk_ratelimit()) 45 pr_err("unexpected IRQ trap at vector %02x\n", irq); 46 47 /* 48 * Currently unexpected vectors happen only on SMP and APIC. 49 * We _must_ ack these because every local APIC has only N 50 * irq slots per priority level, and a 'hanging, unacked' IRQ 51 * holds up an irq slot - in excessive cases (when multiple 52 * unexpected vectors occur) that might lock up the APIC 53 * completely. 54 * But only ack when the APIC is enabled -AK 55 */ 56 apic_eoi(); 57 } 58 59 #define irq_stats(x) (&per_cpu(irq_stat, x)) 60 /* 61 * /proc/interrupts printing for arch specific interrupts 62 */ 63 int arch_show_interrupts(struct seq_file *p, int prec) 64 { 65 int j; 66 67 seq_printf(p, "%*s: ", prec, "NMI"); 68 for_each_online_cpu(j) 69 seq_printf(p, "%10u ", irq_stats(j)->__nmi_count); 70 seq_puts(p, " Non-maskable interrupts\n"); 71 #ifdef CONFIG_X86_LOCAL_APIC 72 seq_printf(p, "%*s: ", prec, "LOC"); 73 for_each_online_cpu(j) 74 seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs); 75 seq_puts(p, " Local timer interrupts\n"); 76 77 seq_printf(p, "%*s: ", prec, "SPU"); 78 for_each_online_cpu(j) 79 seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); 80 seq_puts(p, " Spurious interrupts\n"); 81 seq_printf(p, "%*s: ", prec, "PMI"); 82 for_each_online_cpu(j) 83 seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); 84 seq_puts(p, " Performance monitoring interrupts\n"); 85 seq_printf(p, "%*s: ", prec, "IWI"); 86 for_each_online_cpu(j) 87 seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs); 88 seq_puts(p, " IRQ work interrupts\n"); 89 seq_printf(p, "%*s: ", prec, "RTR"); 90 for_each_online_cpu(j) 91 seq_printf(p, "%10u ", irq_stats(j)->icr_read_retry_count); 92 seq_puts(p, " APIC ICR read retries\n"); 93 if (x86_platform_ipi_callback) { 94 seq_printf(p, "%*s: ", prec, "PLT"); 95 for_each_online_cpu(j) 96 seq_printf(p, "%10u ", irq_stats(j)->x86_platform_ipis); 97 seq_puts(p, " Platform interrupts\n"); 98 } 99 #endif 100 #ifdef CONFIG_SMP 101 seq_printf(p, "%*s: ", prec, "RES"); 102 for_each_online_cpu(j) 103 seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count); 104 seq_puts(p, " Rescheduling interrupts\n"); 105 seq_printf(p, "%*s: ", prec, "CAL"); 106 for_each_online_cpu(j) 107 seq_printf(p, "%10u ", irq_stats(j)->irq_call_count); 108 seq_puts(p, " Function call interrupts\n"); 109 seq_printf(p, "%*s: ", prec, "TLB"); 110 for_each_online_cpu(j) 111 seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count); 112 seq_puts(p, " TLB shootdowns\n"); 113 #endif 114 #ifdef CONFIG_X86_THERMAL_VECTOR 115 seq_printf(p, "%*s: ", prec, "TRM"); 116 for_each_online_cpu(j) 117 seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); 118 seq_puts(p, " Thermal event interrupts\n"); 119 #endif 120 #ifdef CONFIG_X86_MCE_THRESHOLD 121 seq_printf(p, "%*s: ", prec, "THR"); 122 for_each_online_cpu(j) 123 seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); 124 seq_puts(p, " Threshold APIC interrupts\n"); 125 #endif 126 #ifdef CONFIG_X86_MCE_AMD 127 seq_printf(p, "%*s: ", prec, "DFR"); 128 for_each_online_cpu(j) 129 seq_printf(p, "%10u ", irq_stats(j)->irq_deferred_error_count); 130 seq_puts(p, " Deferred Error APIC interrupts\n"); 131 #endif 132 #ifdef CONFIG_X86_MCE 133 seq_printf(p, "%*s: ", prec, "MCE"); 134 for_each_online_cpu(j) 135 seq_printf(p, "%10u ", per_cpu(mce_exception_count, j)); 136 seq_puts(p, " Machine check exceptions\n"); 137 seq_printf(p, "%*s: ", prec, "MCP"); 138 for_each_online_cpu(j) 139 seq_printf(p, "%10u ", per_cpu(mce_poll_count, j)); 140 seq_puts(p, " Machine check polls\n"); 141 #endif 142 #ifdef CONFIG_X86_HV_CALLBACK_VECTOR 143 if (test_bit(HYPERVISOR_CALLBACK_VECTOR, system_vectors)) { 144 seq_printf(p, "%*s: ", prec, "HYP"); 145 for_each_online_cpu(j) 146 seq_printf(p, "%10u ", 147 irq_stats(j)->irq_hv_callback_count); 148 seq_puts(p, " Hypervisor callback interrupts\n"); 149 } 150 #endif 151 #if IS_ENABLED(CONFIG_HYPERV) 152 if (test_bit(HYPERV_REENLIGHTENMENT_VECTOR, system_vectors)) { 153 seq_printf(p, "%*s: ", prec, "HRE"); 154 for_each_online_cpu(j) 155 seq_printf(p, "%10u ", 156 irq_stats(j)->irq_hv_reenlightenment_count); 157 seq_puts(p, " Hyper-V reenlightenment interrupts\n"); 158 } 159 if (test_bit(HYPERV_STIMER0_VECTOR, system_vectors)) { 160 seq_printf(p, "%*s: ", prec, "HVS"); 161 for_each_online_cpu(j) 162 seq_printf(p, "%10u ", 163 irq_stats(j)->hyperv_stimer0_count); 164 seq_puts(p, " Hyper-V stimer0 interrupts\n"); 165 } 166 #endif 167 seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); 168 #if defined(CONFIG_X86_IO_APIC) 169 seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count)); 170 #endif 171 #if IS_ENABLED(CONFIG_KVM) 172 seq_printf(p, "%*s: ", prec, "PIN"); 173 for_each_online_cpu(j) 174 seq_printf(p, "%10u ", irq_stats(j)->kvm_posted_intr_ipis); 175 seq_puts(p, " Posted-interrupt notification event\n"); 176 177 seq_printf(p, "%*s: ", prec, "NPI"); 178 for_each_online_cpu(j) 179 seq_printf(p, "%10u ", 180 irq_stats(j)->kvm_posted_intr_nested_ipis); 181 seq_puts(p, " Nested posted-interrupt event\n"); 182 183 seq_printf(p, "%*s: ", prec, "PIW"); 184 for_each_online_cpu(j) 185 seq_printf(p, "%10u ", 186 irq_stats(j)->kvm_posted_intr_wakeup_ipis); 187 seq_puts(p, " Posted-interrupt wakeup event\n"); 188 #endif 189 #ifdef CONFIG_X86_POSTED_MSI 190 seq_printf(p, "%*s: ", prec, "PMN"); 191 for_each_online_cpu(j) 192 seq_printf(p, "%10u ", 193 irq_stats(j)->posted_msi_notification_count); 194 seq_puts(p, " Posted MSI notification event\n"); 195 #endif 196 return 0; 197 } 198 199 /* 200 * /proc/stat helpers 201 */ 202 u64 arch_irq_stat_cpu(unsigned int cpu) 203 { 204 u64 sum = irq_stats(cpu)->__nmi_count; 205 206 #ifdef CONFIG_X86_LOCAL_APIC 207 sum += irq_stats(cpu)->apic_timer_irqs; 208 sum += irq_stats(cpu)->irq_spurious_count; 209 sum += irq_stats(cpu)->apic_perf_irqs; 210 sum += irq_stats(cpu)->apic_irq_work_irqs; 211 sum += irq_stats(cpu)->icr_read_retry_count; 212 if (x86_platform_ipi_callback) 213 sum += irq_stats(cpu)->x86_platform_ipis; 214 #endif 215 #ifdef CONFIG_SMP 216 sum += irq_stats(cpu)->irq_resched_count; 217 sum += irq_stats(cpu)->irq_call_count; 218 #endif 219 #ifdef CONFIG_X86_THERMAL_VECTOR 220 sum += irq_stats(cpu)->irq_thermal_count; 221 #endif 222 #ifdef CONFIG_X86_MCE_THRESHOLD 223 sum += irq_stats(cpu)->irq_threshold_count; 224 #endif 225 #ifdef CONFIG_X86_HV_CALLBACK_VECTOR 226 sum += irq_stats(cpu)->irq_hv_callback_count; 227 #endif 228 #if IS_ENABLED(CONFIG_HYPERV) 229 sum += irq_stats(cpu)->irq_hv_reenlightenment_count; 230 sum += irq_stats(cpu)->hyperv_stimer0_count; 231 #endif 232 #ifdef CONFIG_X86_MCE 233 sum += per_cpu(mce_exception_count, cpu); 234 sum += per_cpu(mce_poll_count, cpu); 235 #endif 236 return sum; 237 } 238 239 u64 arch_irq_stat(void) 240 { 241 u64 sum = atomic_read(&irq_err_count); 242 return sum; 243 } 244 245 static __always_inline void handle_irq(struct irq_desc *desc, 246 struct pt_regs *regs) 247 { 248 if (IS_ENABLED(CONFIG_X86_64)) 249 generic_handle_irq_desc(desc); 250 else 251 __handle_irq(desc, regs); 252 } 253 254 static __always_inline int call_irq_handler(int vector, struct pt_regs *regs) 255 { 256 struct irq_desc *desc; 257 int ret = 0; 258 259 desc = __this_cpu_read(vector_irq[vector]); 260 if (likely(!IS_ERR_OR_NULL(desc))) { 261 handle_irq(desc, regs); 262 } else { 263 ret = -EINVAL; 264 if (desc == VECTOR_UNUSED) { 265 pr_emerg_ratelimited("%s: %d.%u No irq handler for vector\n", 266 __func__, smp_processor_id(), 267 vector); 268 } else { 269 __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); 270 } 271 } 272 273 return ret; 274 } 275 276 /* 277 * common_interrupt() handles all normal device IRQ's (the special SMP 278 * cross-CPU interrupts have their own entry points). 279 */ 280 DEFINE_IDTENTRY_IRQ(common_interrupt) 281 { 282 struct pt_regs *old_regs = set_irq_regs(regs); 283 284 /* entry code tells RCU that we're not quiescent. Check it. */ 285 RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU"); 286 287 if (unlikely(call_irq_handler(vector, regs))) 288 apic_eoi(); 289 290 set_irq_regs(old_regs); 291 } 292 293 #ifdef CONFIG_X86_LOCAL_APIC 294 /* Function pointer for generic interrupt vector handling */ 295 void (*x86_platform_ipi_callback)(void) = NULL; 296 /* 297 * Handler for X86_PLATFORM_IPI_VECTOR. 298 */ 299 DEFINE_IDTENTRY_SYSVEC(sysvec_x86_platform_ipi) 300 { 301 struct pt_regs *old_regs = set_irq_regs(regs); 302 303 apic_eoi(); 304 trace_x86_platform_ipi_entry(X86_PLATFORM_IPI_VECTOR); 305 inc_irq_stat(x86_platform_ipis); 306 if (x86_platform_ipi_callback) 307 x86_platform_ipi_callback(); 308 trace_x86_platform_ipi_exit(X86_PLATFORM_IPI_VECTOR); 309 set_irq_regs(old_regs); 310 } 311 #endif 312 313 #if IS_ENABLED(CONFIG_KVM) 314 static void dummy_handler(void) {} 315 static void (*kvm_posted_intr_wakeup_handler)(void) = dummy_handler; 316 317 void kvm_set_posted_intr_wakeup_handler(void (*handler)(void)) 318 { 319 if (handler) 320 kvm_posted_intr_wakeup_handler = handler; 321 else { 322 kvm_posted_intr_wakeup_handler = dummy_handler; 323 synchronize_rcu(); 324 } 325 } 326 EXPORT_SYMBOL_GPL(kvm_set_posted_intr_wakeup_handler); 327 328 /* 329 * Handler for POSTED_INTERRUPT_VECTOR. 330 */ 331 DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_ipi) 332 { 333 apic_eoi(); 334 inc_irq_stat(kvm_posted_intr_ipis); 335 } 336 337 /* 338 * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR. 339 */ 340 DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_posted_intr_wakeup_ipi) 341 { 342 apic_eoi(); 343 inc_irq_stat(kvm_posted_intr_wakeup_ipis); 344 kvm_posted_intr_wakeup_handler(); 345 } 346 347 /* 348 * Handler for POSTED_INTERRUPT_NESTED_VECTOR. 349 */ 350 DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_nested_ipi) 351 { 352 apic_eoi(); 353 inc_irq_stat(kvm_posted_intr_nested_ipis); 354 } 355 #endif 356 357 #ifdef CONFIG_X86_POSTED_MSI 358 359 /* Posted Interrupt Descriptors for coalesced MSIs to be posted */ 360 DEFINE_PER_CPU_ALIGNED(struct pi_desc, posted_msi_pi_desc); 361 362 void intel_posted_msi_init(void) 363 { 364 u32 destination; 365 u32 apic_id; 366 367 this_cpu_write(posted_msi_pi_desc.nv, POSTED_MSI_NOTIFICATION_VECTOR); 368 369 /* 370 * APIC destination ID is stored in bit 8:15 while in XAPIC mode. 371 * VT-d spec. CH 9.11 372 */ 373 apic_id = this_cpu_read(x86_cpu_to_apicid); 374 destination = x2apic_enabled() ? apic_id : apic_id << 8; 375 this_cpu_write(posted_msi_pi_desc.ndst, destination); 376 } 377 378 /* 379 * De-multiplexing posted interrupts is on the performance path, the code 380 * below is written to optimize the cache performance based on the following 381 * considerations: 382 * 1.Posted interrupt descriptor (PID) fits in a cache line that is frequently 383 * accessed by both CPU and IOMMU. 384 * 2.During posted MSI processing, the CPU needs to do 64-bit read and xchg 385 * for checking and clearing posted interrupt request (PIR), a 256 bit field 386 * within the PID. 387 * 3.On the other side, the IOMMU does atomic swaps of the entire PID cache 388 * line when posting interrupts and setting control bits. 389 * 4.The CPU can access the cache line a magnitude faster than the IOMMU. 390 * 5.Each time the IOMMU does interrupt posting to the PIR will evict the PID 391 * cache line. The cache line states after each operation are as follows: 392 * CPU IOMMU PID Cache line state 393 * --------------------------------------------------------------- 394 *...read64 exclusive 395 *...lock xchg64 modified 396 *... post/atomic swap invalid 397 *...------------------------------------------------------------- 398 * 399 * To reduce L1 data cache miss, it is important to avoid contention with 400 * IOMMU's interrupt posting/atomic swap. Therefore, a copy of PIR is used 401 * to dispatch interrupt handlers. 402 * 403 * In addition, the code is trying to keep the cache line state consistent 404 * as much as possible. e.g. when making a copy and clearing the PIR 405 * (assuming non-zero PIR bits are present in the entire PIR), it does: 406 * read, read, read, read, xchg, xchg, xchg, xchg 407 * instead of: 408 * read, xchg, read, xchg, read, xchg, read, xchg 409 */ 410 static __always_inline bool handle_pending_pir(u64 *pir, struct pt_regs *regs) 411 { 412 int i, vec = FIRST_EXTERNAL_VECTOR; 413 unsigned long pir_copy[4]; 414 bool handled = false; 415 416 for (i = 0; i < 4; i++) 417 pir_copy[i] = pir[i]; 418 419 for (i = 0; i < 4; i++) { 420 if (!pir_copy[i]) 421 continue; 422 423 pir_copy[i] = arch_xchg(&pir[i], 0); 424 handled = true; 425 } 426 427 if (handled) { 428 for_each_set_bit_from(vec, pir_copy, FIRST_SYSTEM_VECTOR) 429 call_irq_handler(vec, regs); 430 } 431 432 return handled; 433 } 434 435 /* 436 * Performance data shows that 3 is good enough to harvest 90+% of the benefit 437 * on high IRQ rate workload. 438 */ 439 #define MAX_POSTED_MSI_COALESCING_LOOP 3 440 441 /* 442 * For MSIs that are delivered as posted interrupts, the CPU notifications 443 * can be coalesced if the MSIs arrive in high frequency bursts. 444 */ 445 DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi_notification) 446 { 447 struct pt_regs *old_regs = set_irq_regs(regs); 448 struct pi_desc *pid; 449 int i = 0; 450 451 pid = this_cpu_ptr(&posted_msi_pi_desc); 452 453 inc_irq_stat(posted_msi_notification_count); 454 irq_enter(); 455 456 /* 457 * Max coalescing count includes the extra round of handle_pending_pir 458 * after clearing the outstanding notification bit. Hence, at most 459 * MAX_POSTED_MSI_COALESCING_LOOP - 1 loops are executed here. 460 */ 461 while (++i < MAX_POSTED_MSI_COALESCING_LOOP) { 462 if (!handle_pending_pir(pid->pir64, regs)) 463 break; 464 } 465 466 /* 467 * Clear outstanding notification bit to allow new IRQ notifications, 468 * do this last to maximize the window of interrupt coalescing. 469 */ 470 pi_clear_on(pid); 471 472 /* 473 * There could be a race of PI notification and the clearing of ON bit, 474 * process PIR bits one last time such that handling the new interrupts 475 * are not delayed until the next IRQ. 476 */ 477 handle_pending_pir(pid->pir64, regs); 478 479 apic_eoi(); 480 irq_exit(); 481 set_irq_regs(old_regs); 482 } 483 #endif /* X86_POSTED_MSI */ 484 485 #ifdef CONFIG_HOTPLUG_CPU 486 /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ 487 void fixup_irqs(void) 488 { 489 unsigned int vector; 490 struct irq_desc *desc; 491 struct irq_data *data; 492 struct irq_chip *chip; 493 494 irq_migrate_all_off_this_cpu(); 495 496 /* 497 * We can remove mdelay() and then send spurious interrupts to 498 * new cpu targets for all the irqs that were handled previously by 499 * this cpu. While it works, I have seen spurious interrupt messages 500 * (nothing wrong but still...). 501 * 502 * So for now, retain mdelay(1) and check the IRR and then send those 503 * interrupts to new targets as this cpu is already offlined... 504 */ 505 mdelay(1); 506 507 /* 508 * We can walk the vector array of this cpu without holding 509 * vector_lock because the cpu is already marked !online, so 510 * nothing else will touch it. 511 */ 512 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { 513 if (IS_ERR_OR_NULL(__this_cpu_read(vector_irq[vector]))) 514 continue; 515 516 if (is_vector_pending(vector)) { 517 desc = __this_cpu_read(vector_irq[vector]); 518 519 raw_spin_lock(&desc->lock); 520 data = irq_desc_get_irq_data(desc); 521 chip = irq_data_get_irq_chip(data); 522 if (chip->irq_retrigger) { 523 chip->irq_retrigger(data); 524 __this_cpu_write(vector_irq[vector], VECTOR_RETRIGGERED); 525 } 526 raw_spin_unlock(&desc->lock); 527 } 528 if (__this_cpu_read(vector_irq[vector]) != VECTOR_RETRIGGERED) 529 __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); 530 } 531 } 532 #endif 533 534 #ifdef CONFIG_X86_THERMAL_VECTOR 535 static void smp_thermal_vector(void) 536 { 537 if (x86_thermal_enabled()) 538 intel_thermal_interrupt(); 539 else 540 pr_err("CPU%d: Unexpected LVT thermal interrupt!\n", 541 smp_processor_id()); 542 } 543 544 DEFINE_IDTENTRY_SYSVEC(sysvec_thermal) 545 { 546 trace_thermal_apic_entry(THERMAL_APIC_VECTOR); 547 inc_irq_stat(irq_thermal_count); 548 smp_thermal_vector(); 549 trace_thermal_apic_exit(THERMAL_APIC_VECTOR); 550 apic_eoi(); 551 } 552 #endif 553