1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 /*
29 * Machine dependent interrupt code for x86. For x86, we have to
30 * deal with different PICs. Thus, we use the passed in vector to lookup
31 * an interrupt source associated with that vector. The interrupt source
32 * describes which PIC the source belongs to and includes methods to handle
33 * that source.
34 */
35
36 #include "opt_atpic.h"
37 #include "opt_ddb.h"
38 #include "opt_smp.h"
39
40 #include <sys/param.h>
41 #include <sys/bus.h>
42 #include <sys/interrupt.h>
43 #include <sys/ktr.h>
44 #include <sys/kernel.h>
45 #include <sys/lock.h>
46 #include <sys/malloc.h>
47 #include <sys/mutex.h>
48 #include <sys/proc.h>
49 #include <sys/queue.h>
50 #include <sys/sbuf.h>
51 #include <sys/smp.h>
52 #include <sys/sx.h>
53 #include <sys/sysctl.h>
54 #include <sys/syslog.h>
55 #include <sys/systm.h>
56 #include <sys/taskqueue.h>
57 #include <sys/vmmeter.h>
58 #include <machine/clock.h>
59 #include <machine/intr_machdep.h>
60 #include <machine/smp.h>
61 #ifdef DDB
62 #include <ddb/ddb.h>
63 #endif
64
65 #ifndef DEV_ATPIC
66 #include <machine/segments.h>
67 #include <machine/frame.h>
68 #include <dev/ic/i8259.h>
69 #include <x86/isa/icu.h>
70 #include <isa/isareg.h>
71 #endif
72
73 #include <vm/vm.h>
74
75 typedef void (*mask_fn)(void *);
76
77 static int intrcnt_index;
78 static struct intsrc **interrupt_sources;
79 #ifdef SMP
80 static struct intsrc **interrupt_sorted;
81 static int intrbalance;
82 SYSCTL_INT(_hw, OID_AUTO, intrbalance, CTLFLAG_RWTUN, &intrbalance, 0,
83 "Interrupt auto-balance interval (seconds). Zero disables.");
84 static struct timeout_task intrbalance_task;
85 #endif
86 static struct sx intrsrc_lock;
87 static struct mtx intrpic_lock;
88 static struct mtx intrcnt_lock;
89 static TAILQ_HEAD(pics_head, pic) pics;
90 u_int num_io_irqs;
91
92 #if defined(SMP) && !defined(EARLY_AP_STARTUP)
93 #error EARLY_AP_STARTUP required on x86
94 #endif
95
96 #define INTRNAME_LEN (MAXCOMLEN + 1)
97 u_long *intrcnt;
98 char *intrnames;
99 size_t sintrcnt = sizeof(intrcnt);
100 size_t sintrnames = sizeof(intrnames);
101 int nintrcnt;
102
103 static MALLOC_DEFINE(M_INTR, "intr", "Interrupt Sources");
104
105 static int intr_assign_cpu(void *arg, int cpu);
106 static void intr_disable_src(void *arg);
107 static void intr_init(void *__dummy);
108 static int intr_pic_registered(struct pic *pic);
109 static void intrcnt_setname(const char *name, int index);
110 static void intrcnt_updatename(struct intsrc *is);
111 static void intrcnt_register(struct intsrc *is);
112
113 /*
114 * SYSINIT levels for SI_SUB_INTR:
115 *
116 * SI_ORDER_FIRST: Initialize locks and pics TAILQ, xen_hvm_cpu_init
117 * SI_ORDER_SECOND: Xen PICs
118 * SI_ORDER_THIRD: Add I/O APIC PICs, alloc MSI and Xen IRQ ranges
119 * SI_ORDER_FOURTH: Add 8259A PICs
120 * SI_ORDER_FOURTH + 1: Finalize interrupt count and add interrupt sources
121 * SI_ORDER_MIDDLE: SMP interrupt counters
122 * SI_ORDER_ANY: Enable interrupts on BSP
123 */
124
125 static int
intr_pic_registered(struct pic * pic)126 intr_pic_registered(struct pic *pic)
127 {
128 struct pic *p;
129
130 TAILQ_FOREACH(p, &pics, pics) {
131 if (p == pic)
132 return (1);
133 }
134 return (0);
135 }
136
137 /*
138 * Register a new interrupt controller (PIC). This is to support suspend
139 * and resume where we suspend/resume controllers rather than individual
140 * sources. This also allows controllers with no active sources (such as
141 * 8259As in a system using the APICs) to participate in suspend and resume.
142 */
143 int
intr_register_pic(struct pic * pic)144 intr_register_pic(struct pic *pic)
145 {
146 int error;
147
148 mtx_lock(&intrpic_lock);
149 if (intr_pic_registered(pic))
150 error = EBUSY;
151 else {
152 TAILQ_INSERT_TAIL(&pics, pic, pics);
153 error = 0;
154 }
155 mtx_unlock(&intrpic_lock);
156 return (error);
157 }
158
159 /*
160 * Allocate interrupt source arrays and register interrupt sources
161 * once the number of interrupts is known.
162 */
163 static void
intr_init_sources(void * arg)164 intr_init_sources(void *arg)
165 {
166 struct pic *pic;
167
168 MPASS(num_io_irqs > 0);
169
170 interrupt_sources = mallocarray(num_io_irqs, sizeof(*interrupt_sources),
171 M_INTR, M_WAITOK | M_ZERO);
172 #ifdef SMP
173 interrupt_sorted = mallocarray(num_io_irqs, sizeof(*interrupt_sorted),
174 M_INTR, M_WAITOK | M_ZERO);
175 #endif
176
177 /*
178 * - 1 ??? dummy counter.
179 * - 2 counters for each I/O interrupt.
180 * - 1 counter for each CPU for lapic timer.
181 * - 1 counter for each CPU for the Hyper-V vmbus driver.
182 * - 8 counters for each CPU for IPI counters for SMP.
183 */
184 nintrcnt = 1 + num_io_irqs * 2 + mp_ncpus * 2;
185 #ifdef COUNT_IPIS
186 if (mp_ncpus > 1)
187 nintrcnt += 8 * mp_ncpus;
188 #endif
189 intrcnt = mallocarray(nintrcnt, sizeof(u_long), M_INTR, M_WAITOK |
190 M_ZERO);
191 intrnames = mallocarray(nintrcnt, INTRNAME_LEN, M_INTR, M_WAITOK |
192 M_ZERO);
193 sintrcnt = nintrcnt * sizeof(u_long);
194 sintrnames = nintrcnt * INTRNAME_LEN;
195
196 intrcnt_setname("???", 0);
197 intrcnt_index = 1;
198
199 /*
200 * NB: intrpic_lock is not held here to avoid LORs due to
201 * malloc() in intr_register_source(). However, we are still
202 * single-threaded at this point in startup so the list of
203 * PICs shouldn't change.
204 */
205 TAILQ_FOREACH(pic, &pics, pics) {
206 if (pic->pic_register_sources != NULL)
207 pic->pic_register_sources(pic);
208 }
209 }
210 SYSINIT(intr_init_sources, SI_SUB_INTR, SI_ORDER_FOURTH + 1, intr_init_sources,
211 NULL);
212
213 /*
214 * Register a new interrupt source with the global interrupt system.
215 * The global interrupts need to be disabled when this function is
216 * called.
217 */
218 int
intr_register_source(struct intsrc * isrc)219 intr_register_source(struct intsrc *isrc)
220 {
221 int error, vector;
222
223 KASSERT(intr_pic_registered(isrc->is_pic), ("unregistered PIC"));
224 vector = isrc->is_pic->pic_vector(isrc);
225 KASSERT(vector < num_io_irqs, ("IRQ %d too large (%u irqs)", vector,
226 num_io_irqs));
227 if (interrupt_sources[vector] != NULL)
228 return (EEXIST);
229 error = intr_event_create(&isrc->is_event, isrc, 0, vector,
230 intr_disable_src, (mask_fn)isrc->is_pic->pic_enable_source,
231 (mask_fn)isrc->is_pic->pic_eoi_source, intr_assign_cpu, "irq%d:",
232 vector);
233 if (error)
234 return (error);
235 sx_xlock(&intrsrc_lock);
236 if (interrupt_sources[vector] != NULL) {
237 sx_xunlock(&intrsrc_lock);
238 intr_event_destroy(isrc->is_event);
239 return (EEXIST);
240 }
241 intrcnt_register(isrc);
242 interrupt_sources[vector] = isrc;
243 isrc->is_handlers = 0;
244 sx_xunlock(&intrsrc_lock);
245 return (0);
246 }
247
248 void
intr_disable_all(void)249 intr_disable_all(void)
250 {
251 /*
252 * Disable all external interrupts. This is used by kexec_reboot() to
253 * prevent problems on the other side when APs are brought up.
254 */
255 for (int v = 0; v < num_io_irqs; v++) {
256 struct intsrc *is;
257
258 is = interrupt_sources[v];
259 if (is == NULL)
260 continue;
261 if (is->is_pic->pic_disable_intr != NULL) {
262 is->is_pic->pic_disable_source(is, PIC_EOI);
263 is->is_pic->pic_disable_intr(is);
264 }
265 }
266 }
267
268 struct intsrc *
intr_lookup_source(int vector)269 intr_lookup_source(int vector)
270 {
271
272 if (vector < 0 || vector >= num_io_irqs)
273 return (NULL);
274 return (interrupt_sources[vector]);
275 }
276
277 int
intr_add_handler(struct intsrc * isrc,const char * name,driver_filter_t filter,driver_intr_t handler,void * arg,enum intr_type flags,void ** cookiep,int domain)278 intr_add_handler(struct intsrc *isrc, const char *name, driver_filter_t filter,
279 driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep,
280 int domain)
281 {
282 int error;
283
284 error = intr_event_add_handler(isrc->is_event, name, filter, handler,
285 arg, intr_priority(flags), flags, cookiep);
286 if (error == 0) {
287 sx_xlock(&intrsrc_lock);
288 intrcnt_updatename(isrc);
289 isrc->is_handlers++;
290 if (isrc->is_handlers == 1) {
291 isrc->is_domain = domain;
292 isrc->is_pic->pic_enable_intr(isrc);
293 isrc->is_pic->pic_enable_source(isrc);
294 }
295 sx_xunlock(&intrsrc_lock);
296 }
297 return (error);
298 }
299
300 int
intr_remove_handler(void * cookie)301 intr_remove_handler(void *cookie)
302 {
303 struct intsrc *isrc;
304 int error;
305
306 isrc = intr_handler_source(cookie);
307 error = intr_event_remove_handler(cookie);
308 if (error == 0) {
309 sx_xlock(&intrsrc_lock);
310 isrc->is_handlers--;
311 if (isrc->is_handlers == 0) {
312 isrc->is_pic->pic_disable_source(isrc, PIC_NO_EOI);
313 isrc->is_pic->pic_disable_intr(isrc);
314 }
315 intrcnt_updatename(isrc);
316 sx_xunlock(&intrsrc_lock);
317 }
318 return (error);
319 }
320
321 int
intr_config_intr(struct intsrc * isrc,enum intr_trigger trig,enum intr_polarity pol)322 intr_config_intr(struct intsrc *isrc, enum intr_trigger trig,
323 enum intr_polarity pol)
324 {
325
326 return (isrc->is_pic->pic_config_intr(isrc, trig, pol));
327 }
328
329 static void
intr_disable_src(void * arg)330 intr_disable_src(void *arg)
331 {
332 struct intsrc *isrc;
333
334 isrc = arg;
335 isrc->is_pic->pic_disable_source(isrc, PIC_EOI);
336 }
337
338 void
intr_execute_handlers(struct intsrc * isrc,struct trapframe * frame)339 intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame)
340 {
341 struct intr_event *ie;
342 int vector;
343
344 /*
345 * We count software interrupts when we process them. The
346 * code here follows previous practice, but there's an
347 * argument for counting hardware interrupts when they're
348 * processed too.
349 */
350 (*isrc->is_count)++;
351 VM_CNT_INC(v_intr);
352
353 ie = isrc->is_event;
354
355 /*
356 * XXX: We assume that IRQ 0 is only used for the ISA timer
357 * device (clk).
358 */
359 vector = isrc->is_pic->pic_vector(isrc);
360 if (vector == 0)
361 clkintr_pending = 1;
362
363 /*
364 * For stray interrupts, mask and EOI the source, bump the
365 * stray count, and log the condition.
366 */
367 if (intr_event_handle(ie, frame) != 0) {
368 isrc->is_pic->pic_disable_source(isrc, PIC_EOI);
369 (*isrc->is_straycount)++;
370 if (*isrc->is_straycount < INTR_STRAY_LOG_MAX)
371 log(LOG_ERR, "stray irq%d\n", vector);
372 else if (*isrc->is_straycount == INTR_STRAY_LOG_MAX)
373 log(LOG_CRIT,
374 "too many stray irq %d's: not logging anymore\n",
375 vector);
376 }
377 }
378
379 void
intr_resume(bool suspend_cancelled)380 intr_resume(bool suspend_cancelled)
381 {
382 struct pic *pic;
383
384 #ifndef DEV_ATPIC
385 atpic_reset();
386 #endif
387 mtx_lock(&intrpic_lock);
388 TAILQ_FOREACH(pic, &pics, pics) {
389 if (pic->pic_resume != NULL)
390 pic->pic_resume(pic, suspend_cancelled);
391 }
392 mtx_unlock(&intrpic_lock);
393 }
394
395 void
intr_suspend(void)396 intr_suspend(void)
397 {
398 struct pic *pic;
399
400 mtx_lock(&intrpic_lock);
401 TAILQ_FOREACH_REVERSE(pic, &pics, pics_head, pics) {
402 if (pic->pic_suspend != NULL)
403 pic->pic_suspend(pic);
404 }
405 mtx_unlock(&intrpic_lock);
406 }
407
408 void
intr_enable_src(u_int irq)409 intr_enable_src(u_int irq)
410 {
411 struct intsrc *is;
412
413 is = interrupt_sources[irq];
414 is->is_pic->pic_enable_source(is);
415 }
416
417 static int
intr_assign_cpu(void * arg,int cpu)418 intr_assign_cpu(void *arg, int cpu)
419 {
420 #ifdef SMP
421 struct intsrc *isrc;
422 int error;
423
424 MPASS(mp_ncpus == 1 || smp_started);
425
426 /* Nothing to do if there is only a single CPU. */
427 if (mp_ncpus > 1 && cpu != NOCPU) {
428 isrc = arg;
429 sx_xlock(&intrsrc_lock);
430 error = isrc->is_pic->pic_assign_cpu(isrc, cpu_apic_ids[cpu]);
431 if (error == 0)
432 isrc->is_cpu = cpu;
433 sx_xunlock(&intrsrc_lock);
434 } else
435 error = 0;
436 return (error);
437 #else
438 return (EOPNOTSUPP);
439 #endif
440 }
441
442 static void
intrcnt_setname(const char * name,int index)443 intrcnt_setname(const char *name, int index)
444 {
445
446 snprintf(intrnames + INTRNAME_LEN * index, INTRNAME_LEN, "%-*s",
447 INTRNAME_LEN - 1, name);
448 }
449
450 static void
intrcnt_updatename(struct intsrc * is)451 intrcnt_updatename(struct intsrc *is)
452 {
453
454 intrcnt_setname(is->is_event->ie_fullname, is->is_index);
455 }
456
457 static void
intrcnt_register(struct intsrc * is)458 intrcnt_register(struct intsrc *is)
459 {
460 char straystr[INTRNAME_LEN];
461
462 KASSERT(is->is_event != NULL, ("%s: isrc with no event", __func__));
463 mtx_lock_spin(&intrcnt_lock);
464 MPASS(intrcnt_index + 2 <= nintrcnt);
465 is->is_index = intrcnt_index;
466 intrcnt_index += 2;
467 snprintf(straystr, sizeof(straystr), "stray irq%d",
468 is->is_pic->pic_vector(is));
469 intrcnt_updatename(is);
470 is->is_count = &intrcnt[is->is_index];
471 intrcnt_setname(straystr, is->is_index + 1);
472 is->is_straycount = &intrcnt[is->is_index + 1];
473 mtx_unlock_spin(&intrcnt_lock);
474 }
475
476 void
intrcnt_add(const char * name,u_long ** countp)477 intrcnt_add(const char *name, u_long **countp)
478 {
479
480 mtx_lock_spin(&intrcnt_lock);
481 MPASS(intrcnt_index < nintrcnt);
482 *countp = &intrcnt[intrcnt_index];
483 intrcnt_setname(name, intrcnt_index);
484 intrcnt_index++;
485 mtx_unlock_spin(&intrcnt_lock);
486 }
487
488 static void
intr_init(void * dummy __unused)489 intr_init(void *dummy __unused)
490 {
491
492 TAILQ_INIT(&pics);
493 mtx_init(&intrpic_lock, "intrpic", NULL, MTX_DEF);
494 sx_init(&intrsrc_lock, "intrsrc");
495 mtx_init(&intrcnt_lock, "intrcnt", NULL, MTX_SPIN);
496 }
497 SYSINIT(intr_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_init, NULL);
498
499 static void
intr_init_final(void * dummy __unused)500 intr_init_final(void *dummy __unused)
501 {
502
503 /*
504 * Enable interrupts on the BSP after all of the interrupt
505 * controllers are initialized. Device interrupts are still
506 * disabled in the interrupt controllers until interrupt
507 * handlers are registered. Interrupts are enabled on each AP
508 * after their first context switch.
509 */
510 enable_intr();
511 }
512 SYSINIT(intr_init_final, SI_SUB_INTR, SI_ORDER_ANY, intr_init_final, NULL);
513
514 #ifndef DEV_ATPIC
515 /* Initialize the two 8259A's to a known-good shutdown state. */
516 void
atpic_reset(void)517 atpic_reset(void)
518 {
519
520 outb(IO_ICU1, ICW1_RESET | ICW1_IC4);
521 outb(IO_ICU1 + ICU_IMR_OFFSET, IDT_IO_INTS);
522 outb(IO_ICU1 + ICU_IMR_OFFSET, IRQ_MASK(ICU_SLAVEID));
523 outb(IO_ICU1 + ICU_IMR_OFFSET, MASTER_MODE);
524 outb(IO_ICU1 + ICU_IMR_OFFSET, 0xff);
525 outb(IO_ICU1, OCW3_SEL | OCW3_RR);
526
527 outb(IO_ICU2, ICW1_RESET | ICW1_IC4);
528 outb(IO_ICU2 + ICU_IMR_OFFSET, IDT_IO_INTS + 8);
529 outb(IO_ICU2 + ICU_IMR_OFFSET, ICU_SLAVEID);
530 outb(IO_ICU2 + ICU_IMR_OFFSET, SLAVE_MODE);
531 outb(IO_ICU2 + ICU_IMR_OFFSET, 0xff);
532 outb(IO_ICU2, OCW3_SEL | OCW3_RR);
533 }
534 #endif
535
536 /* Add a description to an active interrupt handler. */
537 int
intr_describe(struct intsrc * isrc,void * ih,const char * descr)538 intr_describe(struct intsrc *isrc, void *ih, const char *descr)
539 {
540 int error;
541
542 error = intr_event_describe_handler(isrc->is_event, ih, descr);
543 if (error)
544 return (error);
545 intrcnt_updatename(isrc);
546 return (0);
547 }
548
549 void
intr_reprogram(void)550 intr_reprogram(void)
551 {
552 struct intsrc *is;
553 u_int v;
554
555 sx_xlock(&intrsrc_lock);
556 for (v = 0; v < num_io_irqs; v++) {
557 is = interrupt_sources[v];
558 if (is == NULL)
559 continue;
560 if (is->is_pic->pic_reprogram_pin != NULL)
561 is->is_pic->pic_reprogram_pin(is);
562 }
563 sx_xunlock(&intrsrc_lock);
564 }
565
566 #ifdef DDB
567 /*
568 * Dump data about interrupt handlers
569 */
DB_SHOW_COMMAND(irqs,db_show_irqs)570 DB_SHOW_COMMAND(irqs, db_show_irqs)
571 {
572 struct intsrc **isrc;
573 u_int i;
574 int verbose;
575
576 if (strcmp(modif, "v") == 0)
577 verbose = 1;
578 else
579 verbose = 0;
580 isrc = interrupt_sources;
581 for (i = 0; i < num_io_irqs && !db_pager_quit; i++, isrc++)
582 if (*isrc != NULL)
583 db_dump_intr_event((*isrc)->is_event, verbose);
584 }
585 #endif
586
587 #ifdef SMP
588 /*
589 * Support for balancing interrupt sources across CPUs. For now we just
590 * allocate CPUs round-robin.
591 *
592 * XXX If the system has a domain with without any usable CPUs (e.g., where all
593 * APIC IDs are 256 or greater and we do not have an IOMMU) we use
594 * intr_no_domain to fall back to assigning interrupts without regard for
595 * domain. Once we can rely on the presence of an IOMMU on all x86 platforms
596 * we can revert this.
597 */
598
599 cpuset_t intr_cpus = CPUSET_T_INITIALIZER(0x1);
600 static int current_cpu[MAXMEMDOM];
601 static bool intr_no_domain;
602
603 static void
intr_init_cpus(void)604 intr_init_cpus(void)
605 {
606 int i;
607
608 for (i = 0; i < vm_ndomains; i++) {
609 if (CPU_OVERLAP(&cpuset_domain[i], &intr_cpus) == 0) {
610 intr_no_domain = true;
611 printf("%s: unable to route interrupts to CPUs in domain %d\n",
612 __func__, i);
613 }
614
615 current_cpu[i] = 0;
616 if (intr_no_domain && i > 0)
617 continue;
618 if (!CPU_ISSET(current_cpu[i], &intr_cpus) ||
619 !CPU_ISSET(current_cpu[i], &cpuset_domain[i]))
620 intr_next_cpu(i);
621 }
622 }
623
624 /*
625 * Return the CPU that the next interrupt source should use. For now
626 * this just returns the next local APIC according to round-robin.
627 */
628 u_int
intr_next_cpu(int domain)629 intr_next_cpu(int domain)
630 {
631 u_int apic_id;
632
633 MPASS(mp_ncpus == 1 || smp_started);
634 if (mp_ncpus == 1)
635 return (PCPU_GET(apic_id));
636
637 if (intr_no_domain)
638 domain = 0;
639 mtx_lock_spin(&icu_lock);
640 apic_id = cpu_apic_ids[current_cpu[domain]];
641 do {
642 current_cpu[domain]++;
643 if (current_cpu[domain] > mp_maxid)
644 current_cpu[domain] = 0;
645 } while (!CPU_ISSET(current_cpu[domain], &intr_cpus) ||
646 (!CPU_ISSET(current_cpu[domain], &cpuset_domain[domain]) &&
647 !intr_no_domain));
648 mtx_unlock_spin(&icu_lock);
649 return (apic_id);
650 }
651
652 /*
653 * Add a CPU to our mask of valid CPUs that can be destinations of
654 * interrupts.
655 */
656 void
intr_add_cpu(u_int cpu)657 intr_add_cpu(u_int cpu)
658 {
659
660 if (cpu >= MAXCPU)
661 panic("%s: Invalid CPU ID %u", __func__, cpu);
662 if (bootverbose)
663 printf("INTR: Adding local APIC %d as a target\n",
664 cpu_apic_ids[cpu]);
665
666 CPU_SET(cpu, &intr_cpus);
667 }
668
669 static void
intr_smp_startup(void * arg __unused)670 intr_smp_startup(void *arg __unused)
671 {
672
673 intr_init_cpus();
674 return;
675 }
676 SYSINIT(intr_smp_startup, SI_SUB_SMP, SI_ORDER_SECOND, intr_smp_startup,
677 NULL);
678
679 /*
680 * TODO: Export this information in a non-MD fashion, integrate with vmstat -i.
681 */
682 static int
sysctl_hw_intrs(SYSCTL_HANDLER_ARGS)683 sysctl_hw_intrs(SYSCTL_HANDLER_ARGS)
684 {
685 struct sbuf sbuf;
686 struct intsrc *isrc;
687 u_int i;
688 int error;
689
690 error = sysctl_wire_old_buffer(req, 0);
691 if (error != 0)
692 return (error);
693
694 sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
695 sx_slock(&intrsrc_lock);
696 for (i = 0; i < num_io_irqs; i++) {
697 isrc = interrupt_sources[i];
698 if (isrc == NULL)
699 continue;
700 sbuf_printf(&sbuf, "%s:%d @cpu%d(domain%d): %ld\n",
701 isrc->is_event->ie_fullname,
702 isrc->is_index,
703 isrc->is_cpu,
704 isrc->is_domain,
705 *isrc->is_count);
706 }
707
708 sx_sunlock(&intrsrc_lock);
709 error = sbuf_finish(&sbuf);
710 sbuf_delete(&sbuf);
711 return (error);
712 }
713 SYSCTL_PROC(_hw, OID_AUTO, intrs,
714 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
715 0, 0, sysctl_hw_intrs, "A",
716 "interrupt:number @cpu: count");
717
718 /*
719 * Compare two, possibly NULL, entries in the interrupt source array
720 * by load.
721 */
722 static int
intrcmp(const void * one,const void * two)723 intrcmp(const void *one, const void *two)
724 {
725 const struct intsrc *i1, *i2;
726
727 i1 = *(const struct intsrc * const *)one;
728 i2 = *(const struct intsrc * const *)two;
729 if (i1 != NULL && i2 != NULL)
730 return (*i1->is_count - *i2->is_count);
731 if (i1 != NULL)
732 return (1);
733 if (i2 != NULL)
734 return (-1);
735 return (0);
736 }
737
738 /*
739 * Balance IRQs across available CPUs according to load.
740 */
741 static void
intr_balance(void * dummy __unused,int pending __unused)742 intr_balance(void *dummy __unused, int pending __unused)
743 {
744 struct intsrc *isrc;
745 int interval;
746 u_int cpu;
747 int i;
748
749 interval = intrbalance;
750 if (interval == 0)
751 goto out;
752
753 /*
754 * Sort interrupts according to count.
755 */
756 sx_xlock(&intrsrc_lock);
757 memcpy(interrupt_sorted, interrupt_sources, num_io_irqs *
758 sizeof(interrupt_sorted[0]));
759 qsort(interrupt_sorted, num_io_irqs, sizeof(interrupt_sorted[0]),
760 intrcmp);
761
762 /*
763 * Restart the scan from the same location to avoid moving in the
764 * common case.
765 */
766 intr_init_cpus();
767
768 /*
769 * Assign round-robin from most loaded to least.
770 */
771 for (i = num_io_irqs - 1; i >= 0; i--) {
772 isrc = interrupt_sorted[i];
773 if (isrc == NULL || isrc->is_event->ie_cpu != NOCPU)
774 continue;
775 cpu = current_cpu[isrc->is_domain];
776 intr_next_cpu(isrc->is_domain);
777 if (isrc->is_cpu != cpu &&
778 isrc->is_pic->pic_assign_cpu(isrc,
779 cpu_apic_ids[cpu]) == 0)
780 isrc->is_cpu = cpu;
781 }
782 sx_xunlock(&intrsrc_lock);
783 out:
784 taskqueue_enqueue_timeout(taskqueue_thread, &intrbalance_task,
785 interval ? hz * interval : hz * 60);
786
787 }
788
789 static void
intr_balance_init(void * dummy __unused)790 intr_balance_init(void *dummy __unused)
791 {
792
793 TIMEOUT_TASK_INIT(taskqueue_thread, &intrbalance_task, 0, intr_balance,
794 NULL);
795 taskqueue_enqueue_timeout(taskqueue_thread, &intrbalance_task, hz);
796 }
797 SYSINIT(intr_balance_init, SI_SUB_SMP, SI_ORDER_ANY, intr_balance_init, NULL);
798
799 #else
800 /*
801 * Always route interrupts to the current processor in the UP case.
802 */
803 u_int
intr_next_cpu(int domain)804 intr_next_cpu(int domain)
805 {
806
807 return (PCPU_GET(apic_id));
808 }
809 #endif
810