1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Common interrupt code for 32 and 64 bit 4 */ 5 #include <linux/cpu.h> 6 #include <linux/interrupt.h> 7 #include <linux/kernel_stat.h> 8 #include <linux/of.h> 9 #include <linux/seq_file.h> 10 #include <linux/smp.h> 11 #include <linux/ftrace.h> 12 #include <linux/delay.h> 13 #include <linux/export.h> 14 #include <linux/irq.h> 15 #include <linux/kvm_types.h> 16 17 #include <asm/irq_stack.h> 18 #include <asm/apic.h> 19 #include <asm/io_apic.h> 20 #include <asm/irq.h> 21 #include <asm/mce.h> 22 #include <asm/hw_irq.h> 23 #include <asm/desc.h> 24 #include <asm/traps.h> 25 #include <asm/thermal.h> 26 #include <asm/posted_intr.h> 27 #include <asm/irq_remapping.h> 28 29 #if defined(CONFIG_X86_LOCAL_APIC) || defined(CONFIG_X86_THERMAL_VECTOR) 30 #define CREATE_TRACE_POINTS 31 #include <asm/trace/irq_vectors.h> 32 #endif 33 34 DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); 35 EXPORT_PER_CPU_SYMBOL(irq_stat); 36 37 DEFINE_PER_CPU_CACHE_HOT(u16, __softirq_pending); 38 EXPORT_PER_CPU_SYMBOL(__softirq_pending); 39 40 DEFINE_PER_CPU_CACHE_HOT(struct irq_stack *, hardirq_stack_ptr); 41 42 /* 43 * 'what should we do if we get a hw irq event on an illegal vector'. 44 * each architecture has to answer this themselves. 45 */ 46 void ack_bad_irq(unsigned int irq) 47 { 48 if (printk_ratelimit()) 49 pr_err("unexpected IRQ trap at vector %02x\n", irq); 50 51 /* 52 * Currently unexpected vectors happen only on SMP and APIC. 53 * We _must_ ack these because every local APIC has only N 54 * irq slots per priority level, and a 'hanging, unacked' IRQ 55 * holds up an irq slot - in excessive cases (when multiple 56 * unexpected vectors occur) that might lock up the APIC 57 * completely. 58 * But only ack when the APIC is enabled -AK 59 */ 60 apic_eoi(); 61 } 62 63 struct irq_stat_info { 64 unsigned int skip_vector; 65 const char *symbol; 66 const char *text; 67 }; 68 69 #define DEFAULT_SUPPRESSED_VECTOR UINT_MAX 70 71 #define ISS(idx, sym, txt) [IRQ_COUNT_##idx] = { .symbol = sym, .text = txt } 72 73 #define ITS(idx, sym, txt) [IRQ_COUNT_##idx] = \ 74 { .skip_vector = idx## _VECTOR, .symbol = sym, .text = txt } 75 76 #define IDS(idx, sym, txt) [IRQ_COUNT_##idx] = \ 77 { .skip_vector = DEFAULT_SUPPRESSED_VECTOR, .symbol = sym, .text = txt } 78 79 static const struct irq_stat_info irq_stat_info[IRQ_COUNT_MAX] = { 80 ISS(NMI, "NMI", " Non-maskable interrupts\n"), 81 #ifdef CONFIG_X86_LOCAL_APIC 82 ISS(APIC_TIMER, "LOC", " Local timer interrupts\n"), 83 IDS(SPURIOUS, "SPU", " Spurious interrupts\n"), 84 ISS(APIC_PERF, "PMI", " Performance monitoring interrupts\n"), 85 ISS(IRQ_WORK, "IWI", " IRQ work interrupts\n"), 86 IDS(ICR_READ_RETRY, "RTR", " APIC ICR read retries\n"), 87 ISS(X86_PLATFORM_IPI, "PLT", " Platform interrupts\n"), 88 #endif 89 #ifdef CONFIG_SMP 90 ISS(RESCHEDULE, "RES", " Rescheduling interrupts\n"), 91 ISS(CALL_FUNCTION, "CAL", " Function call interrupts\n"), 92 #endif 93 ISS(TLB, "TLB", " TLB shootdowns\n"), 94 #ifdef CONFIG_X86_THERMAL_VECTOR 95 ISS(THERMAL_APIC, "TRM", " Thermal event interrupts\n"), 96 #endif 97 #ifdef CONFIG_X86_MCE_THRESHOLD 98 ISS(THRESHOLD_APIC, "THR", " Threshold APIC interrupts\n"), 99 #endif 100 #ifdef CONFIG_X86_MCE_AMD 101 ISS(DEFERRED_ERROR, "DFR", " Deferred Error APIC interrupts\n"), 102 #endif 103 #ifdef CONFIG_X86_MCE 104 ISS(MCE_EXCEPTION, "MCE", " Machine check exceptions\n"), 105 ISS(MCE_POLL, "MCP", " Machine check polls\n"), 106 #endif 107 #ifdef CONFIG_X86_HV_CALLBACK_VECTOR 108 ITS(HYPERVISOR_CALLBACK, "HYP", " Hypervisor callback interrupts\n"), 109 #endif 110 #if IS_ENABLED(CONFIG_HYPERV) 111 ITS(HYPERV_REENLIGHTENMENT, "HRE", " Hyper-V reenlightenment interrupts\n"), 112 ITS(HYPERV_STIMER0, "HVS", " Hyper-V stimer0 interrupts\n"), 113 #endif 114 #if IS_ENABLED(CONFIG_KVM) 115 ITS(POSTED_INTR, "PIN", " Posted-interrupt notification event\n"), 116 ITS(POSTED_INTR_NESTED, "NPI", " Nested posted-interrupt event\n"), 117 ITS(POSTED_INTR_WAKEUP, "PIW", " Posted-interrupt wakeup event\n"), 118 #endif 119 #ifdef CONFIG_GUEST_PERF_EVENTS 120 ISS(PERF_GUEST_MEDIATED_PMI, "VPMI", " Perf Guest Mediated PMI\n"), 121 #endif 122 #ifdef CONFIG_X86_POSTED_MSI 123 ISS(POSTED_MSI_NOTIFICATION, "PMN", " Posted MSI notification event\n"), 124 #endif 125 IDS(PIC_APIC_ERROR, "ERR", " PIC/APIC error interrupts\n"), 126 #ifdef CONFIG_X86_IO_APIC 127 IDS(IOAPIC_MISROUTED, "MIS", " Misrouted IO/APIC interrupts\n"), 128 #endif 129 }; 130 131 static DECLARE_BITMAP(irq_stat_count_show, IRQ_COUNT_MAX) __read_mostly; 132 133 static int __init irq_init_stats(void) 134 { 135 const struct irq_stat_info *info = irq_stat_info; 136 137 for (unsigned int i = 0; i < ARRAY_SIZE(irq_stat_info); i++, info++) { 138 if (!info->skip_vector || (info->skip_vector != DEFAULT_SUPPRESSED_VECTOR && 139 test_bit(info->skip_vector, system_vectors))) 140 set_bit(i, irq_stat_count_show); 141 } 142 143 #ifdef CONFIG_X86_LOCAL_APIC 144 if (!x86_platform_ipi_callback) 145 clear_bit(IRQ_COUNT_X86_PLATFORM_IPI, irq_stat_count_show); 146 #endif 147 148 #ifdef CONFIG_X86_POSTED_MSI 149 if (!posted_msi_enabled()) 150 clear_bit(IRQ_COUNT_POSTED_MSI_NOTIFICATION, irq_stat_count_show); 151 #endif 152 153 #ifdef CONFIG_X86_MCE_AMD 154 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && 155 boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) 156 clear_bit(IRQ_COUNT_DEFERRED_ERROR, irq_stat_count_show); 157 #endif 158 return 0; 159 } 160 late_initcall(irq_init_stats); 161 162 /* 163 * Used for default disabled counters to increment the stats and to enable the 164 * entry for /proc/interrupts output. 165 */ 166 void irq_stat_inc_and_enable(enum irq_stat_counts which) 167 { 168 this_cpu_inc(irq_stat.counts[which]); 169 set_bit(which, irq_stat_count_show); 170 } 171 172 #ifdef CONFIG_PROC_FS 173 /* 174 * /proc/interrupts printing for arch specific interrupts 175 */ 176 int arch_show_interrupts(struct seq_file *p, int prec) 177 { 178 const struct irq_stat_info *info = irq_stat_info; 179 180 for (unsigned int i = 0; i < ARRAY_SIZE(irq_stat_info); i++, info++) { 181 if (!test_bit(i, irq_stat_count_show)) 182 continue; 183 184 seq_printf(p, "%*s:", prec, info->symbol); 185 irq_proc_emit_counts(p, &irq_stat.counts[i]); 186 seq_puts(p, info->text); 187 } 188 return 0; 189 } 190 191 /* 192 * /proc/stat helpers 193 */ 194 u64 arch_irq_stat_cpu(unsigned int cpu) 195 { 196 irq_cpustat_t *p = per_cpu_ptr(&irq_stat, cpu); 197 u64 sum = 0; 198 199 for (unsigned int i = 0; i < ARRAY_SIZE(irq_stat_info); i++) 200 sum += p->counts[i]; 201 return sum; 202 } 203 #endif /* CONFIG_PROC_FS */ 204 205 static __always_inline void handle_irq(struct irq_desc *desc, 206 struct pt_regs *regs) 207 { 208 if (IS_ENABLED(CONFIG_X86_64)) 209 generic_handle_irq_desc(desc); 210 else 211 __handle_irq(desc, regs); 212 } 213 214 static struct irq_desc *reevaluate_vector(int vector) 215 { 216 struct irq_desc *desc = __this_cpu_read(vector_irq[vector]); 217 218 if (!IS_ERR_OR_NULL(desc)) 219 return desc; 220 221 if (desc == VECTOR_UNUSED) 222 pr_emerg_ratelimited("No irq handler for %d.%u\n", smp_processor_id(), vector); 223 else 224 __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); 225 return NULL; 226 } 227 228 static __always_inline bool call_irq_handler(int vector, struct pt_regs *regs) 229 { 230 struct irq_desc *desc = __this_cpu_read(vector_irq[vector]); 231 232 if (likely(!IS_ERR_OR_NULL(desc))) { 233 handle_irq(desc, regs); 234 return true; 235 } 236 237 /* 238 * Reevaluate with vector_lock held to prevent a race against 239 * request_irq() setting up the vector: 240 * 241 * CPU0 CPU1 242 * interrupt is raised in APIC IRR 243 * but not handled 244 * free_irq() 245 * per_cpu(vector_irq, CPU1)[vector] = VECTOR_SHUTDOWN; 246 * 247 * request_irq() common_interrupt() 248 * d = this_cpu_read(vector_irq[vector]); 249 * 250 * per_cpu(vector_irq, CPU1)[vector] = desc; 251 * 252 * if (d == VECTOR_SHUTDOWN) 253 * this_cpu_write(vector_irq[vector], VECTOR_UNUSED); 254 * 255 * This requires that the same vector on the same target CPU is 256 * handed out or that a spurious interrupt hits that CPU/vector. 257 */ 258 lock_vector_lock(); 259 desc = reevaluate_vector(vector); 260 unlock_vector_lock(); 261 262 if (!desc) 263 return false; 264 265 handle_irq(desc, regs); 266 return true; 267 } 268 269 /* 270 * common_interrupt() handles all normal device IRQ's (the special SMP 271 * cross-CPU interrupts have their own entry points). 272 */ 273 DEFINE_IDTENTRY_IRQ(common_interrupt) 274 { 275 struct pt_regs *old_regs = set_irq_regs(regs); 276 277 /* entry code tells RCU that we're not quiescent. Check it. */ 278 RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU"); 279 280 if (unlikely(!call_irq_handler(vector, regs))) 281 apic_eoi(); 282 283 set_irq_regs(old_regs); 284 } 285 286 #ifdef CONFIG_X86_LOCAL_APIC 287 /* Function pointer for generic interrupt vector handling */ 288 void (*x86_platform_ipi_callback)(void) __ro_after_init = NULL; 289 /* 290 * Handler for X86_PLATFORM_IPI_VECTOR. 291 */ 292 DEFINE_IDTENTRY_SYSVEC(sysvec_x86_platform_ipi) 293 { 294 struct pt_regs *old_regs = set_irq_regs(regs); 295 296 apic_eoi(); 297 trace_x86_platform_ipi_entry(X86_PLATFORM_IPI_VECTOR); 298 inc_irq_stat(X86_PLATFORM_IPI); 299 if (x86_platform_ipi_callback) 300 x86_platform_ipi_callback(); 301 trace_x86_platform_ipi_exit(X86_PLATFORM_IPI_VECTOR); 302 set_irq_regs(old_regs); 303 } 304 #endif 305 306 #ifdef CONFIG_GUEST_PERF_EVENTS 307 /* 308 * Handler for PERF_GUEST_MEDIATED_PMI_VECTOR. 309 */ 310 DEFINE_IDTENTRY_SYSVEC(sysvec_perf_guest_mediated_pmi_handler) 311 { 312 apic_eoi(); 313 inc_irq_stat(PERF_GUEST_MEDIATED_PMI); 314 perf_guest_handle_mediated_pmi(); 315 } 316 #endif 317 318 #if IS_ENABLED(CONFIG_KVM) 319 static void dummy_handler(void) {} 320 static void (*kvm_posted_intr_wakeup_handler)(void) = dummy_handler; 321 322 void kvm_set_posted_intr_wakeup_handler(void (*handler)(void)) 323 { 324 if (handler) 325 kvm_posted_intr_wakeup_handler = handler; 326 else { 327 kvm_posted_intr_wakeup_handler = dummy_handler; 328 synchronize_rcu(); 329 } 330 } 331 EXPORT_SYMBOL_FOR_KVM(kvm_set_posted_intr_wakeup_handler); 332 333 /* 334 * Handler for POSTED_INTERRUPT_VECTOR. 335 */ 336 DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_ipi) 337 { 338 apic_eoi(); 339 inc_irq_stat(POSTED_INTR); 340 } 341 342 /* 343 * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR. 344 */ 345 DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_posted_intr_wakeup_ipi) 346 { 347 apic_eoi(); 348 inc_irq_stat(POSTED_INTR_WAKEUP); 349 kvm_posted_intr_wakeup_handler(); 350 } 351 352 /* 353 * Handler for POSTED_INTERRUPT_NESTED_VECTOR. 354 */ 355 DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_nested_ipi) 356 { 357 apic_eoi(); 358 inc_irq_stat(POSTED_INTR_NESTED); 359 } 360 #endif 361 362 #ifdef CONFIG_X86_POSTED_MSI 363 364 /* Posted Interrupt Descriptors for coalesced MSIs to be posted */ 365 DEFINE_PER_CPU_ALIGNED(struct pi_desc, posted_msi_pi_desc); 366 static DEFINE_PER_CPU_CACHE_HOT(bool, posted_msi_handler_active); 367 368 void intel_posted_msi_init(void) 369 { 370 u32 destination, apic_id; 371 372 this_cpu_write(posted_msi_pi_desc.nv, POSTED_MSI_NOTIFICATION_VECTOR); 373 /* 374 * APIC destination ID is stored in bit 8:15 while in XAPIC mode. 375 * VT-d spec. CH 9.11 376 */ 377 apic_id = this_cpu_read(x86_cpu_to_apicid); 378 destination = x2apic_enabled() ? apic_id : apic_id << 8; 379 this_cpu_write(posted_msi_pi_desc.ndst, destination); 380 } 381 382 void intel_ack_posted_msi_irq(struct irq_data *irqd) 383 { 384 irq_move_irq(irqd); 385 386 /* 387 * Handle the rare case that irq_retrigger() raised the actual 388 * assigned vector on the target CPU, which means that it was not 389 * invoked via the posted MSI handler below. In that case APIC EOI 390 * is required as otherwise the ISR entry becomes stale and lower 391 * priority interrupts are never going to be delivered after that. 392 * 393 * If the posted handler invoked the device interrupt handler then 394 * the EOI would be premature because it would acknowledge the 395 * posted vector. 396 */ 397 if (unlikely(!__this_cpu_read(posted_msi_handler_active))) 398 apic_eoi(); 399 } 400 401 static __always_inline bool handle_pending_pir(unsigned long *pir, struct pt_regs *regs) 402 { 403 unsigned long pir_copy[NR_PIR_WORDS]; 404 int vec = FIRST_EXTERNAL_VECTOR; 405 406 if (!pi_harvest_pir(pir, pir_copy)) 407 return false; 408 409 for_each_set_bit_from(vec, pir_copy, FIRST_SYSTEM_VECTOR) 410 call_irq_handler(vec, regs); 411 412 return true; 413 } 414 415 /* 416 * Performance data shows that 3 is good enough to harvest 90+% of the 417 * benefit on high interrupt rate workloads. 418 */ 419 #define MAX_POSTED_MSI_COALESCING_LOOP 3 420 421 /* 422 * For MSIs that are delivered as posted interrupts, the CPU notifications 423 * can be coalesced if the MSIs arrive in high frequency bursts. 424 */ 425 DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi_notification) 426 { 427 struct pi_desc *pid = this_cpu_ptr(&posted_msi_pi_desc); 428 struct pt_regs *old_regs = set_irq_regs(regs); 429 430 /* Mark the handler active for intel_ack_posted_msi_irq() */ 431 __this_cpu_write(posted_msi_handler_active, true); 432 inc_irq_stat(POSTED_MSI_NOTIFICATION); 433 irq_enter(); 434 435 /* 436 * Loop only MAX_POSTED_MSI_COALESCING_LOOP - 1 times here to take 437 * the final handle_pending_pir() invocation after clearing the 438 * outstanding notification bit into account. 439 */ 440 for (int i = 1; i < MAX_POSTED_MSI_COALESCING_LOOP; i++) { 441 if (!handle_pending_pir(pid->pir, regs)) 442 break; 443 } 444 445 /* 446 * Clear the outstanding notification bit to rearm the notification 447 * mechanism. 448 */ 449 pi_clear_on(pid); 450 451 /* 452 * Clearing the ON bit can race with a notification. Process the 453 * PIR bits one last time so that handling the new interrupts is 454 * not delayed until the next notification happens. 455 */ 456 handle_pending_pir(pid->pir, regs); 457 458 apic_eoi(); 459 irq_exit(); 460 __this_cpu_write(posted_msi_handler_active, false); 461 set_irq_regs(old_regs); 462 } 463 #endif /* X86_POSTED_MSI */ 464 465 #ifdef CONFIG_HOTPLUG_CPU 466 /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ 467 void fixup_irqs(void) 468 { 469 unsigned int vector; 470 struct irq_desc *desc; 471 struct irq_data *data; 472 struct irq_chip *chip; 473 474 irq_migrate_all_off_this_cpu(); 475 476 /* 477 * We can remove mdelay() and then send spurious interrupts to 478 * new cpu targets for all the irqs that were handled previously by 479 * this cpu. While it works, I have seen spurious interrupt messages 480 * (nothing wrong but still...). 481 * 482 * So for now, retain mdelay(1) and check the IRR and then send those 483 * interrupts to new targets as this cpu is already offlined... 484 */ 485 mdelay(1); 486 487 /* 488 * We can walk the vector array of this cpu without holding 489 * vector_lock because the cpu is already marked !online, so 490 * nothing else will touch it. 491 */ 492 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { 493 if (IS_ERR_OR_NULL(__this_cpu_read(vector_irq[vector]))) 494 continue; 495 496 if (is_vector_pending(vector)) { 497 desc = __this_cpu_read(vector_irq[vector]); 498 499 raw_spin_lock(&desc->lock); 500 data = irq_desc_get_irq_data(desc); 501 chip = irq_data_get_irq_chip(data); 502 if (chip->irq_retrigger) { 503 chip->irq_retrigger(data); 504 __this_cpu_write(vector_irq[vector], VECTOR_RETRIGGERED); 505 } 506 raw_spin_unlock(&desc->lock); 507 } 508 if (__this_cpu_read(vector_irq[vector]) != VECTOR_RETRIGGERED) 509 __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); 510 } 511 } 512 #endif 513 514 #ifdef CONFIG_X86_THERMAL_VECTOR 515 static void smp_thermal_vector(void) 516 { 517 if (x86_thermal_enabled()) 518 intel_thermal_interrupt(); 519 else 520 pr_err("CPU%d: Unexpected LVT thermal interrupt!\n", 521 smp_processor_id()); 522 } 523 524 DEFINE_IDTENTRY_SYSVEC(sysvec_thermal) 525 { 526 trace_thermal_apic_entry(THERMAL_APIC_VECTOR); 527 inc_irq_stat(THERMAL_APIC); 528 smp_thermal_vector(); 529 trace_thermal_apic_exit(THERMAL_APIC_VECTOR); 530 apic_eoi(); 531 } 532 #endif 533