xref: /linux/arch/x86/kernel/irq.c (revision 7fc2cd2e4b398c57c9cf961cfea05eadbf34c05c)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Common interrupt code for 32 and 64 bit
4  */
5 #include <linux/cpu.h>
6 #include <linux/interrupt.h>
7 #include <linux/kernel_stat.h>
8 #include <linux/of.h>
9 #include <linux/seq_file.h>
10 #include <linux/smp.h>
11 #include <linux/ftrace.h>
12 #include <linux/delay.h>
13 #include <linux/export.h>
14 #include <linux/irq.h>
15 #include <linux/kvm_types.h>
16 
17 #include <asm/irq_stack.h>
18 #include <asm/apic.h>
19 #include <asm/io_apic.h>
20 #include <asm/irq.h>
21 #include <asm/mce.h>
22 #include <asm/hw_irq.h>
23 #include <asm/desc.h>
24 #include <asm/traps.h>
25 #include <asm/thermal.h>
26 #include <asm/posted_intr.h>
27 #include <asm/irq_remapping.h>
28 
29 #if defined(CONFIG_X86_LOCAL_APIC) || defined(CONFIG_X86_THERMAL_VECTOR)
30 #define CREATE_TRACE_POINTS
31 #include <asm/trace/irq_vectors.h>
32 #endif
33 
34 DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
35 EXPORT_PER_CPU_SYMBOL(irq_stat);
36 
37 DEFINE_PER_CPU_CACHE_HOT(u16, __softirq_pending);
38 EXPORT_PER_CPU_SYMBOL(__softirq_pending);
39 
40 DEFINE_PER_CPU_CACHE_HOT(struct irq_stack *, hardirq_stack_ptr);
41 
42 atomic_t irq_err_count;
43 
44 /*
45  * 'what should we do if we get a hw irq event on an illegal vector'.
46  * each architecture has to answer this themselves.
47  */
48 void ack_bad_irq(unsigned int irq)
49 {
50 	if (printk_ratelimit())
51 		pr_err("unexpected IRQ trap at vector %02x\n", irq);
52 
53 	/*
54 	 * Currently unexpected vectors happen only on SMP and APIC.
55 	 * We _must_ ack these because every local APIC has only N
56 	 * irq slots per priority level, and a 'hanging, unacked' IRQ
57 	 * holds up an irq slot - in excessive cases (when multiple
58 	 * unexpected vectors occur) that might lock up the APIC
59 	 * completely.
60 	 * But only ack when the APIC is enabled -AK
61 	 */
62 	apic_eoi();
63 }
64 
65 #define irq_stats(x)		(&per_cpu(irq_stat, x))
66 /*
67  * /proc/interrupts printing for arch specific interrupts
68  */
69 int arch_show_interrupts(struct seq_file *p, int prec)
70 {
71 	int j;
72 
73 	seq_printf(p, "%*s: ", prec, "NMI");
74 	for_each_online_cpu(j)
75 		seq_printf(p, "%10u ", irq_stats(j)->__nmi_count);
76 	seq_puts(p, "  Non-maskable interrupts\n");
77 #ifdef CONFIG_X86_LOCAL_APIC
78 	seq_printf(p, "%*s: ", prec, "LOC");
79 	for_each_online_cpu(j)
80 		seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs);
81 	seq_puts(p, "  Local timer interrupts\n");
82 
83 	seq_printf(p, "%*s: ", prec, "SPU");
84 	for_each_online_cpu(j)
85 		seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
86 	seq_puts(p, "  Spurious interrupts\n");
87 	seq_printf(p, "%*s: ", prec, "PMI");
88 	for_each_online_cpu(j)
89 		seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
90 	seq_puts(p, "  Performance monitoring interrupts\n");
91 	seq_printf(p, "%*s: ", prec, "IWI");
92 	for_each_online_cpu(j)
93 		seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs);
94 	seq_puts(p, "  IRQ work interrupts\n");
95 	seq_printf(p, "%*s: ", prec, "RTR");
96 	for_each_online_cpu(j)
97 		seq_printf(p, "%10u ", irq_stats(j)->icr_read_retry_count);
98 	seq_puts(p, "  APIC ICR read retries\n");
99 	if (x86_platform_ipi_callback) {
100 		seq_printf(p, "%*s: ", prec, "PLT");
101 		for_each_online_cpu(j)
102 			seq_printf(p, "%10u ", irq_stats(j)->x86_platform_ipis);
103 		seq_puts(p, "  Platform interrupts\n");
104 	}
105 #endif
106 #ifdef CONFIG_SMP
107 	seq_printf(p, "%*s: ", prec, "RES");
108 	for_each_online_cpu(j)
109 		seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count);
110 	seq_puts(p, "  Rescheduling interrupts\n");
111 	seq_printf(p, "%*s: ", prec, "CAL");
112 	for_each_online_cpu(j)
113 		seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
114 	seq_puts(p, "  Function call interrupts\n");
115 	seq_printf(p, "%*s: ", prec, "TLB");
116 	for_each_online_cpu(j)
117 		seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count);
118 	seq_puts(p, "  TLB shootdowns\n");
119 #endif
120 #ifdef CONFIG_X86_THERMAL_VECTOR
121 	seq_printf(p, "%*s: ", prec, "TRM");
122 	for_each_online_cpu(j)
123 		seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
124 	seq_puts(p, "  Thermal event interrupts\n");
125 #endif
126 #ifdef CONFIG_X86_MCE_THRESHOLD
127 	seq_printf(p, "%*s: ", prec, "THR");
128 	for_each_online_cpu(j)
129 		seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
130 	seq_puts(p, "  Threshold APIC interrupts\n");
131 #endif
132 #ifdef CONFIG_X86_MCE_AMD
133 	seq_printf(p, "%*s: ", prec, "DFR");
134 	for_each_online_cpu(j)
135 		seq_printf(p, "%10u ", irq_stats(j)->irq_deferred_error_count);
136 	seq_puts(p, "  Deferred Error APIC interrupts\n");
137 #endif
138 #ifdef CONFIG_X86_MCE
139 	seq_printf(p, "%*s: ", prec, "MCE");
140 	for_each_online_cpu(j)
141 		seq_printf(p, "%10u ", per_cpu(mce_exception_count, j));
142 	seq_puts(p, "  Machine check exceptions\n");
143 	seq_printf(p, "%*s: ", prec, "MCP");
144 	for_each_online_cpu(j)
145 		seq_printf(p, "%10u ", per_cpu(mce_poll_count, j));
146 	seq_puts(p, "  Machine check polls\n");
147 #endif
148 #ifdef CONFIG_X86_HV_CALLBACK_VECTOR
149 	if (test_bit(HYPERVISOR_CALLBACK_VECTOR, system_vectors)) {
150 		seq_printf(p, "%*s: ", prec, "HYP");
151 		for_each_online_cpu(j)
152 			seq_printf(p, "%10u ",
153 				   irq_stats(j)->irq_hv_callback_count);
154 		seq_puts(p, "  Hypervisor callback interrupts\n");
155 	}
156 #endif
157 #if IS_ENABLED(CONFIG_HYPERV)
158 	if (test_bit(HYPERV_REENLIGHTENMENT_VECTOR, system_vectors)) {
159 		seq_printf(p, "%*s: ", prec, "HRE");
160 		for_each_online_cpu(j)
161 			seq_printf(p, "%10u ",
162 				   irq_stats(j)->irq_hv_reenlightenment_count);
163 		seq_puts(p, "  Hyper-V reenlightenment interrupts\n");
164 	}
165 	if (test_bit(HYPERV_STIMER0_VECTOR, system_vectors)) {
166 		seq_printf(p, "%*s: ", prec, "HVS");
167 		for_each_online_cpu(j)
168 			seq_printf(p, "%10u ",
169 				   irq_stats(j)->hyperv_stimer0_count);
170 		seq_puts(p, "  Hyper-V stimer0 interrupts\n");
171 	}
172 #endif
173 	seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
174 #if defined(CONFIG_X86_IO_APIC)
175 	seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count));
176 #endif
177 #if IS_ENABLED(CONFIG_KVM)
178 	seq_printf(p, "%*s: ", prec, "PIN");
179 	for_each_online_cpu(j)
180 		seq_printf(p, "%10u ", irq_stats(j)->kvm_posted_intr_ipis);
181 	seq_puts(p, "  Posted-interrupt notification event\n");
182 
183 	seq_printf(p, "%*s: ", prec, "NPI");
184 	for_each_online_cpu(j)
185 		seq_printf(p, "%10u ",
186 			   irq_stats(j)->kvm_posted_intr_nested_ipis);
187 	seq_puts(p, "  Nested posted-interrupt event\n");
188 
189 	seq_printf(p, "%*s: ", prec, "PIW");
190 	for_each_online_cpu(j)
191 		seq_printf(p, "%10u ",
192 			   irq_stats(j)->kvm_posted_intr_wakeup_ipis);
193 	seq_puts(p, "  Posted-interrupt wakeup event\n");
194 #endif
195 #ifdef CONFIG_X86_POSTED_MSI
196 	seq_printf(p, "%*s: ", prec, "PMN");
197 	for_each_online_cpu(j)
198 		seq_printf(p, "%10u ",
199 			   irq_stats(j)->posted_msi_notification_count);
200 	seq_puts(p, "  Posted MSI notification event\n");
201 #endif
202 	return 0;
203 }
204 
205 /*
206  * /proc/stat helpers
207  */
208 u64 arch_irq_stat_cpu(unsigned int cpu)
209 {
210 	u64 sum = irq_stats(cpu)->__nmi_count;
211 
212 #ifdef CONFIG_X86_LOCAL_APIC
213 	sum += irq_stats(cpu)->apic_timer_irqs;
214 	sum += irq_stats(cpu)->irq_spurious_count;
215 	sum += irq_stats(cpu)->apic_perf_irqs;
216 	sum += irq_stats(cpu)->apic_irq_work_irqs;
217 	sum += irq_stats(cpu)->icr_read_retry_count;
218 	if (x86_platform_ipi_callback)
219 		sum += irq_stats(cpu)->x86_platform_ipis;
220 #endif
221 #ifdef CONFIG_SMP
222 	sum += irq_stats(cpu)->irq_resched_count;
223 	sum += irq_stats(cpu)->irq_call_count;
224 #endif
225 #ifdef CONFIG_X86_THERMAL_VECTOR
226 	sum += irq_stats(cpu)->irq_thermal_count;
227 #endif
228 #ifdef CONFIG_X86_MCE_THRESHOLD
229 	sum += irq_stats(cpu)->irq_threshold_count;
230 #endif
231 #ifdef CONFIG_X86_HV_CALLBACK_VECTOR
232 	sum += irq_stats(cpu)->irq_hv_callback_count;
233 #endif
234 #if IS_ENABLED(CONFIG_HYPERV)
235 	sum += irq_stats(cpu)->irq_hv_reenlightenment_count;
236 	sum += irq_stats(cpu)->hyperv_stimer0_count;
237 #endif
238 #ifdef CONFIG_X86_MCE
239 	sum += per_cpu(mce_exception_count, cpu);
240 	sum += per_cpu(mce_poll_count, cpu);
241 #endif
242 	return sum;
243 }
244 
245 u64 arch_irq_stat(void)
246 {
247 	u64 sum = atomic_read(&irq_err_count);
248 	return sum;
249 }
250 
251 static __always_inline void handle_irq(struct irq_desc *desc,
252 				       struct pt_regs *regs)
253 {
254 	if (IS_ENABLED(CONFIG_X86_64))
255 		generic_handle_irq_desc(desc);
256 	else
257 		__handle_irq(desc, regs);
258 }
259 
260 static struct irq_desc *reevaluate_vector(int vector)
261 {
262 	struct irq_desc *desc = __this_cpu_read(vector_irq[vector]);
263 
264 	if (!IS_ERR_OR_NULL(desc))
265 		return desc;
266 
267 	if (desc == VECTOR_UNUSED)
268 		pr_emerg_ratelimited("No irq handler for %d.%u\n", smp_processor_id(), vector);
269 	else
270 		__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
271 	return NULL;
272 }
273 
274 static __always_inline bool call_irq_handler(int vector, struct pt_regs *regs)
275 {
276 	struct irq_desc *desc = __this_cpu_read(vector_irq[vector]);
277 
278 	if (likely(!IS_ERR_OR_NULL(desc))) {
279 		handle_irq(desc, regs);
280 		return true;
281 	}
282 
283 	/*
284 	 * Reevaluate with vector_lock held to prevent a race against
285 	 * request_irq() setting up the vector:
286 	 *
287 	 * CPU0				CPU1
288 	 *				interrupt is raised in APIC IRR
289 	 *				but not handled
290 	 * free_irq()
291 	 *   per_cpu(vector_irq, CPU1)[vector] = VECTOR_SHUTDOWN;
292 	 *
293 	 * request_irq()		common_interrupt()
294 	 *				  d = this_cpu_read(vector_irq[vector]);
295 	 *
296 	 * per_cpu(vector_irq, CPU1)[vector] = desc;
297 	 *
298 	 *				  if (d == VECTOR_SHUTDOWN)
299 	 *				    this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
300 	 *
301 	 * This requires that the same vector on the same target CPU is
302 	 * handed out or that a spurious interrupt hits that CPU/vector.
303 	 */
304 	lock_vector_lock();
305 	desc = reevaluate_vector(vector);
306 	unlock_vector_lock();
307 
308 	if (!desc)
309 		return false;
310 
311 	handle_irq(desc, regs);
312 	return true;
313 }
314 
315 /*
316  * common_interrupt() handles all normal device IRQ's (the special SMP
317  * cross-CPU interrupts have their own entry points).
318  */
319 DEFINE_IDTENTRY_IRQ(common_interrupt)
320 {
321 	struct pt_regs *old_regs = set_irq_regs(regs);
322 
323 	/* entry code tells RCU that we're not quiescent.  Check it. */
324 	RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU");
325 
326 	if (unlikely(!call_irq_handler(vector, regs)))
327 		apic_eoi();
328 
329 	set_irq_regs(old_regs);
330 }
331 
332 #ifdef CONFIG_X86_LOCAL_APIC
333 /* Function pointer for generic interrupt vector handling */
334 void (*x86_platform_ipi_callback)(void) = NULL;
335 /*
336  * Handler for X86_PLATFORM_IPI_VECTOR.
337  */
338 DEFINE_IDTENTRY_SYSVEC(sysvec_x86_platform_ipi)
339 {
340 	struct pt_regs *old_regs = set_irq_regs(regs);
341 
342 	apic_eoi();
343 	trace_x86_platform_ipi_entry(X86_PLATFORM_IPI_VECTOR);
344 	inc_irq_stat(x86_platform_ipis);
345 	if (x86_platform_ipi_callback)
346 		x86_platform_ipi_callback();
347 	trace_x86_platform_ipi_exit(X86_PLATFORM_IPI_VECTOR);
348 	set_irq_regs(old_regs);
349 }
350 #endif
351 
352 #if IS_ENABLED(CONFIG_KVM)
353 static void dummy_handler(void) {}
354 static void (*kvm_posted_intr_wakeup_handler)(void) = dummy_handler;
355 
356 void kvm_set_posted_intr_wakeup_handler(void (*handler)(void))
357 {
358 	if (handler)
359 		kvm_posted_intr_wakeup_handler = handler;
360 	else {
361 		kvm_posted_intr_wakeup_handler = dummy_handler;
362 		synchronize_rcu();
363 	}
364 }
365 EXPORT_SYMBOL_FOR_KVM(kvm_set_posted_intr_wakeup_handler);
366 
367 /*
368  * Handler for POSTED_INTERRUPT_VECTOR.
369  */
370 DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_ipi)
371 {
372 	apic_eoi();
373 	inc_irq_stat(kvm_posted_intr_ipis);
374 }
375 
376 /*
377  * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
378  */
379 DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_posted_intr_wakeup_ipi)
380 {
381 	apic_eoi();
382 	inc_irq_stat(kvm_posted_intr_wakeup_ipis);
383 	kvm_posted_intr_wakeup_handler();
384 }
385 
386 /*
387  * Handler for POSTED_INTERRUPT_NESTED_VECTOR.
388  */
389 DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_nested_ipi)
390 {
391 	apic_eoi();
392 	inc_irq_stat(kvm_posted_intr_nested_ipis);
393 }
394 #endif
395 
396 #ifdef CONFIG_X86_POSTED_MSI
397 
398 /* Posted Interrupt Descriptors for coalesced MSIs to be posted */
399 DEFINE_PER_CPU_ALIGNED(struct pi_desc, posted_msi_pi_desc);
400 
401 void intel_posted_msi_init(void)
402 {
403 	u32 destination;
404 	u32 apic_id;
405 
406 	this_cpu_write(posted_msi_pi_desc.nv, POSTED_MSI_NOTIFICATION_VECTOR);
407 
408 	/*
409 	 * APIC destination ID is stored in bit 8:15 while in XAPIC mode.
410 	 * VT-d spec. CH 9.11
411 	 */
412 	apic_id = this_cpu_read(x86_cpu_to_apicid);
413 	destination = x2apic_enabled() ? apic_id : apic_id << 8;
414 	this_cpu_write(posted_msi_pi_desc.ndst, destination);
415 }
416 
417 static __always_inline bool handle_pending_pir(unsigned long *pir, struct pt_regs *regs)
418 {
419 	unsigned long pir_copy[NR_PIR_WORDS];
420 	int vec = FIRST_EXTERNAL_VECTOR;
421 
422 	if (!pi_harvest_pir(pir, pir_copy))
423 		return false;
424 
425 	for_each_set_bit_from(vec, pir_copy, FIRST_SYSTEM_VECTOR)
426 		call_irq_handler(vec, regs);
427 
428 	return true;
429 }
430 
431 /*
432  * Performance data shows that 3 is good enough to harvest 90+% of the benefit
433  * on high IRQ rate workload.
434  */
435 #define MAX_POSTED_MSI_COALESCING_LOOP 3
436 
437 /*
438  * For MSIs that are delivered as posted interrupts, the CPU notifications
439  * can be coalesced if the MSIs arrive in high frequency bursts.
440  */
441 DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi_notification)
442 {
443 	struct pt_regs *old_regs = set_irq_regs(regs);
444 	struct pi_desc *pid;
445 	int i = 0;
446 
447 	pid = this_cpu_ptr(&posted_msi_pi_desc);
448 
449 	inc_irq_stat(posted_msi_notification_count);
450 	irq_enter();
451 
452 	/*
453 	 * Max coalescing count includes the extra round of handle_pending_pir
454 	 * after clearing the outstanding notification bit. Hence, at most
455 	 * MAX_POSTED_MSI_COALESCING_LOOP - 1 loops are executed here.
456 	 */
457 	while (++i < MAX_POSTED_MSI_COALESCING_LOOP) {
458 		if (!handle_pending_pir(pid->pir, regs))
459 			break;
460 	}
461 
462 	/*
463 	 * Clear outstanding notification bit to allow new IRQ notifications,
464 	 * do this last to maximize the window of interrupt coalescing.
465 	 */
466 	pi_clear_on(pid);
467 
468 	/*
469 	 * There could be a race of PI notification and the clearing of ON bit,
470 	 * process PIR bits one last time such that handling the new interrupts
471 	 * are not delayed until the next IRQ.
472 	 */
473 	handle_pending_pir(pid->pir, regs);
474 
475 	apic_eoi();
476 	irq_exit();
477 	set_irq_regs(old_regs);
478 }
479 #endif /* X86_POSTED_MSI */
480 
481 #ifdef CONFIG_HOTPLUG_CPU
482 /* A cpu has been removed from cpu_online_mask.  Reset irq affinities. */
483 void fixup_irqs(void)
484 {
485 	unsigned int vector;
486 	struct irq_desc *desc;
487 	struct irq_data *data;
488 	struct irq_chip *chip;
489 
490 	irq_migrate_all_off_this_cpu();
491 
492 	/*
493 	 * We can remove mdelay() and then send spurious interrupts to
494 	 * new cpu targets for all the irqs that were handled previously by
495 	 * this cpu. While it works, I have seen spurious interrupt messages
496 	 * (nothing wrong but still...).
497 	 *
498 	 * So for now, retain mdelay(1) and check the IRR and then send those
499 	 * interrupts to new targets as this cpu is already offlined...
500 	 */
501 	mdelay(1);
502 
503 	/*
504 	 * We can walk the vector array of this cpu without holding
505 	 * vector_lock because the cpu is already marked !online, so
506 	 * nothing else will touch it.
507 	 */
508 	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
509 		if (IS_ERR_OR_NULL(__this_cpu_read(vector_irq[vector])))
510 			continue;
511 
512 		if (is_vector_pending(vector)) {
513 			desc = __this_cpu_read(vector_irq[vector]);
514 
515 			raw_spin_lock(&desc->lock);
516 			data = irq_desc_get_irq_data(desc);
517 			chip = irq_data_get_irq_chip(data);
518 			if (chip->irq_retrigger) {
519 				chip->irq_retrigger(data);
520 				__this_cpu_write(vector_irq[vector], VECTOR_RETRIGGERED);
521 			}
522 			raw_spin_unlock(&desc->lock);
523 		}
524 		if (__this_cpu_read(vector_irq[vector]) != VECTOR_RETRIGGERED)
525 			__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
526 	}
527 }
528 #endif
529 
530 #ifdef CONFIG_X86_THERMAL_VECTOR
531 static void smp_thermal_vector(void)
532 {
533 	if (x86_thermal_enabled())
534 		intel_thermal_interrupt();
535 	else
536 		pr_err("CPU%d: Unexpected LVT thermal interrupt!\n",
537 		       smp_processor_id());
538 }
539 
540 DEFINE_IDTENTRY_SYSVEC(sysvec_thermal)
541 {
542 	trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
543 	inc_irq_stat(irq_thermal_count);
544 	smp_thermal_vector();
545 	trace_thermal_apic_exit(THERMAL_APIC_VECTOR);
546 	apic_eoi();
547 }
548 #endif
549