xref: /freebsd/sys/kern/subr_intr.c (revision f1ddb6fb8c4d051a205dae3a848776c9d56f86ff)
1 /*-
2  * Copyright (c) 2015-2016 Svatopluk Kraus
3  * Copyright (c) 2015-2016 Michal Meloun
4  * All rights reserved.
5  * Copyright (c) 2015-2016 The FreeBSD Foundation
6  * Copyright (c) 2021 Jessica Clarke <jrtc27@FreeBSD.org>
7  *
8  * Portions of this software were developed by Andrew Turner under
9  * sponsorship from the FreeBSD Foundation.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #include <sys/cdefs.h>
34 /*
35  *	New-style Interrupt Framework
36  *
37  *  TODO: - add support for disconnected PICs.
38  *        - to support IPI (PPI) enabling on other CPUs if already started.
39  *        - to complete things for removable PICs.
40  */
41 
42 #include "opt_ddb.h"
43 #include "opt_hwpmc_hooks.h"
44 #include "opt_iommu.h"
45 
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/asan.h>
49 #include <sys/bitstring.h>
50 #include <sys/bus.h>
51 #include <sys/conf.h>
52 #include <sys/cpuset.h>
53 #include <sys/interrupt.h>
54 #include <sys/intr.h>
55 #include <sys/kernel.h>
56 #include <sys/lock.h>
57 #include <sys/malloc.h>
58 #include <sys/msan.h>
59 #include <sys/mutex.h>
60 #include <sys/proc.h>
61 #include <sys/queue.h>
62 #include <sys/rman.h>
63 #include <sys/sched.h>
64 #include <sys/smp.h>
65 #include <sys/sysctl.h>
66 #include <sys/syslog.h>
67 #include <sys/taskqueue.h>
68 #include <sys/tree.h>
69 #include <sys/vmmeter.h>
70 #ifdef HWPMC_HOOKS
71 #include <sys/pmckern.h>
72 #endif
73 
74 #include <machine/atomic.h>
75 #include <machine/cpu.h>
76 #include <machine/smp.h>
77 #include <machine/stdarg.h>
78 
79 #ifdef DDB
80 #include <ddb/ddb.h>
81 #endif
82 
83 #ifdef IOMMU
84 #include <dev/iommu/iommu_msi.h>
85 #endif
86 
87 #include "pic_if.h"
88 #include "msi_if.h"
89 
90 #define	INTRNAME_LEN	(2*MAXCOMLEN + 1)
91 
92 #ifdef DEBUG
93 #define debugf(fmt, args...) do { printf("%s(): ", __func__);	\
94     printf(fmt,##args); } while (0)
95 #else
96 #define debugf(fmt, args...)
97 #endif
98 
99 MALLOC_DECLARE(M_INTRNG);
100 MALLOC_DEFINE(M_INTRNG, "intr", "intr interrupt handling");
101 
102 /* Root interrupt controller stuff. */
103 struct intr_irq_root {
104 	device_t dev;
105 	intr_irq_filter_t *filter;
106 	void *arg;
107 };
108 
109 static struct intr_irq_root intr_irq_roots[INTR_ROOT_COUNT];
110 
111 struct intr_pic_child {
112 	SLIST_ENTRY(intr_pic_child)	 pc_next;
113 	struct intr_pic			*pc_pic;
114 	intr_child_irq_filter_t		*pc_filter;
115 	void				*pc_filter_arg;
116 	uintptr_t			 pc_start;
117 	uintptr_t			 pc_length;
118 };
119 
120 /* Interrupt controller definition. */
121 struct intr_pic {
122 	SLIST_ENTRY(intr_pic)	pic_next;
123 	intptr_t		pic_xref;	/* hardware identification */
124 	device_t		pic_dev;
125 /* Only one of FLAG_PIC or FLAG_MSI may be set */
126 #define	FLAG_PIC	(1 << 0)
127 #define	FLAG_MSI	(1 << 1)
128 #define	FLAG_TYPE_MASK	(FLAG_PIC | FLAG_MSI)
129 	u_int			pic_flags;
130 	struct mtx		pic_child_lock;
131 	SLIST_HEAD(, intr_pic_child) pic_children;
132 };
133 
134 #ifdef SMP
135 #define INTR_IPI_NAMELEN	(MAXCOMLEN + 1)
136 
137 struct intr_ipi {
138 	intr_ipi_handler_t	*ii_handler;
139 	void			*ii_handler_arg;
140 	struct intr_irqsrc	*ii_isrc;
141 	char			ii_name[INTR_IPI_NAMELEN];
142 	u_long			*ii_count;
143 };
144 
145 static device_t intr_ipi_dev;
146 static u_int intr_ipi_dev_priority;
147 static bool intr_ipi_dev_frozen;
148 #endif
149 
150 static struct mtx pic_list_lock;
151 static SLIST_HEAD(, intr_pic) pic_list;
152 
153 static struct intr_pic *pic_lookup(device_t dev, intptr_t xref, u_int flags);
154 
155 /* Interrupt source definition. */
156 static struct mtx isrc_table_lock;
157 static struct intr_irqsrc **irq_sources;
158 static u_int irq_next_free;
159 
160 #ifdef SMP
161 #ifdef EARLY_AP_STARTUP
162 static bool irq_assign_cpu = true;
163 #else
164 static bool irq_assign_cpu = false;
165 #endif
166 
167 static struct intr_ipi ipi_sources[INTR_IPI_COUNT];
168 #endif
169 
170 u_int intr_nirq = NIRQ;
171 SYSCTL_UINT(_machdep, OID_AUTO, nirq, CTLFLAG_RDTUN, &intr_nirq, 0,
172     "Number of IRQs");
173 
174 /* Data for MI statistics reporting. */
175 u_long *intrcnt;
176 char *intrnames;
177 size_t sintrcnt;
178 size_t sintrnames;
179 int nintrcnt;
180 static bitstr_t *intrcnt_bitmap;
181 
182 static struct intr_irqsrc *intr_map_get_isrc(u_int res_id);
183 static void intr_map_set_isrc(u_int res_id, struct intr_irqsrc *isrc);
184 static struct intr_map_data * intr_map_get_map_data(u_int res_id);
185 static void intr_map_copy_map_data(u_int res_id, device_t *dev, intptr_t *xref,
186     struct intr_map_data **data);
187 
188 /*
189  *  Interrupt framework initialization routine.
190  */
191 static void
192 intr_irq_init(void *dummy __unused)
193 {
194 
195 	SLIST_INIT(&pic_list);
196 	mtx_init(&pic_list_lock, "intr pic list", NULL, MTX_DEF);
197 
198 	mtx_init(&isrc_table_lock, "intr isrc table", NULL, MTX_DEF);
199 
200 	/*
201 	 * - 2 counters for each I/O interrupt.
202 	 * - mp_maxid + 1 counters for each IPI counters for SMP.
203 	 */
204 	nintrcnt = intr_nirq * 2;
205 #ifdef SMP
206 	nintrcnt += INTR_IPI_COUNT * (mp_maxid + 1);
207 #endif
208 
209 	intrcnt = mallocarray(nintrcnt, sizeof(u_long), M_INTRNG,
210 	    M_WAITOK | M_ZERO);
211 	intrnames = mallocarray(nintrcnt, INTRNAME_LEN, M_INTRNG,
212 	    M_WAITOK | M_ZERO);
213 	sintrcnt = nintrcnt * sizeof(u_long);
214 	sintrnames = nintrcnt * INTRNAME_LEN;
215 
216 	/* Allocate the bitmap tracking counter allocations. */
217 	intrcnt_bitmap = bit_alloc(nintrcnt, M_INTRNG, M_WAITOK | M_ZERO);
218 
219 	irq_sources = mallocarray(intr_nirq, sizeof(struct intr_irqsrc*),
220 	    M_INTRNG, M_WAITOK | M_ZERO);
221 }
222 SYSINIT(intr_irq_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_irq_init, NULL);
223 
224 static void
225 intrcnt_setname(const char *name, int index)
226 {
227 
228 	snprintf(intrnames + INTRNAME_LEN * index, INTRNAME_LEN, "%-*s",
229 	    INTRNAME_LEN - 1, name);
230 }
231 
232 /*
233  *  Update name for interrupt source with interrupt event.
234  */
235 static void
236 intrcnt_updatename(struct intr_irqsrc *isrc)
237 {
238 
239 	/* QQQ: What about stray counter name? */
240 	mtx_assert(&isrc_table_lock, MA_OWNED);
241 	intrcnt_setname(isrc->isrc_event->ie_fullname, isrc->isrc_index);
242 }
243 
244 /*
245  *  Virtualization for interrupt source interrupt counter increment.
246  */
247 static inline void
248 isrc_increment_count(struct intr_irqsrc *isrc)
249 {
250 
251 	if (isrc->isrc_flags & INTR_ISRCF_PPI)
252 		atomic_add_long(&isrc->isrc_count[0], 1);
253 	else
254 		isrc->isrc_count[0]++;
255 }
256 
257 /*
258  *  Virtualization for interrupt source interrupt stray counter increment.
259  */
260 static inline void
261 isrc_increment_straycount(struct intr_irqsrc *isrc)
262 {
263 
264 	isrc->isrc_count[1]++;
265 }
266 
267 /*
268  *  Virtualization for interrupt source interrupt name update.
269  */
270 static void
271 isrc_update_name(struct intr_irqsrc *isrc, const char *name)
272 {
273 	char str[INTRNAME_LEN];
274 
275 	mtx_assert(&isrc_table_lock, MA_OWNED);
276 
277 	if (name != NULL) {
278 		snprintf(str, INTRNAME_LEN, "%s: %s", isrc->isrc_name, name);
279 		intrcnt_setname(str, isrc->isrc_index);
280 		snprintf(str, INTRNAME_LEN, "stray %s: %s", isrc->isrc_name,
281 		    name);
282 		intrcnt_setname(str, isrc->isrc_index + 1);
283 	} else {
284 		snprintf(str, INTRNAME_LEN, "%s:", isrc->isrc_name);
285 		intrcnt_setname(str, isrc->isrc_index);
286 		snprintf(str, INTRNAME_LEN, "stray %s:", isrc->isrc_name);
287 		intrcnt_setname(str, isrc->isrc_index + 1);
288 	}
289 }
290 
291 /*
292  *  Virtualization for interrupt source interrupt counters setup.
293  */
294 static void
295 isrc_setup_counters(struct intr_irqsrc *isrc)
296 {
297 	int index;
298 
299 	mtx_assert(&isrc_table_lock, MA_OWNED);
300 
301 	/*
302 	 * Allocate two counter values, the second tracking "stray" interrupts.
303 	 */
304 	bit_ffc_area(intrcnt_bitmap, nintrcnt, 2, &index);
305 	if (index == -1)
306 		panic("Failed to allocate 2 counters. Array exhausted?");
307 	bit_nset(intrcnt_bitmap, index, index + 1);
308 	isrc->isrc_index = index;
309 	isrc->isrc_count = &intrcnt[index];
310 	isrc_update_name(isrc, NULL);
311 }
312 
313 /*
314  *  Virtualization for interrupt source interrupt counters release.
315  */
316 static void
317 isrc_release_counters(struct intr_irqsrc *isrc)
318 {
319 	int idx = isrc->isrc_index;
320 
321 	mtx_assert(&isrc_table_lock, MA_OWNED);
322 
323 	bit_nclear(intrcnt_bitmap, idx, idx + 1);
324 }
325 
326 /*
327  *  Main interrupt dispatch handler. It's called straight
328  *  from the assembler, where CPU interrupt is served.
329  */
330 void
331 intr_irq_handler(struct trapframe *tf, uint32_t rootnum)
332 {
333 	struct trapframe * oldframe;
334 	struct thread * td;
335 	struct intr_irq_root *root;
336 
337 	KASSERT(rootnum < INTR_ROOT_COUNT,
338 	    ("%s: invalid interrupt root %d", __func__, rootnum));
339 
340 	root = &intr_irq_roots[rootnum];
341 	KASSERT(root->filter != NULL, ("%s: no filter", __func__));
342 
343 	kasan_mark(tf, sizeof(*tf), sizeof(*tf), 0);
344 	kmsan_mark(tf, sizeof(*tf), KMSAN_STATE_INITED);
345 
346 	VM_CNT_INC(v_intr);
347 	critical_enter();
348 	td = curthread;
349 	oldframe = td->td_intr_frame;
350 	td->td_intr_frame = tf;
351 	(root->filter)(root->arg);
352 	td->td_intr_frame = oldframe;
353 	critical_exit();
354 #ifdef HWPMC_HOOKS
355 	if (pmc_hook && TRAPF_USERMODE(tf) &&
356 	    (PCPU_GET(curthread)->td_pflags & TDP_CALLCHAIN))
357 		pmc_hook(PCPU_GET(curthread), PMC_FN_USER_CALLCHAIN, tf);
358 #endif
359 }
360 
361 int
362 intr_child_irq_handler(struct intr_pic *parent, uintptr_t irq)
363 {
364 	struct intr_pic_child *child;
365 	bool found;
366 
367 	found = false;
368 	mtx_lock_spin(&parent->pic_child_lock);
369 	SLIST_FOREACH(child, &parent->pic_children, pc_next) {
370 		if (child->pc_start <= irq &&
371 		    irq < (child->pc_start + child->pc_length)) {
372 			found = true;
373 			break;
374 		}
375 	}
376 	mtx_unlock_spin(&parent->pic_child_lock);
377 
378 	if (found)
379 		return (child->pc_filter(child->pc_filter_arg, irq));
380 
381 	return (FILTER_STRAY);
382 }
383 
384 /*
385  *  interrupt controller dispatch function for interrupts. It should
386  *  be called straight from the interrupt controller, when associated interrupt
387  *  source is learned.
388  */
389 int
390 intr_isrc_dispatch(struct intr_irqsrc *isrc, struct trapframe *tf)
391 {
392 
393 	KASSERT(isrc != NULL, ("%s: no source", __func__));
394 
395 	if ((isrc->isrc_flags & INTR_ISRCF_IPI) == 0)
396 		isrc_increment_count(isrc);
397 
398 #ifdef INTR_SOLO
399 	if (isrc->isrc_filter != NULL) {
400 		int error;
401 		error = isrc->isrc_filter(isrc->isrc_arg, tf);
402 		PIC_POST_FILTER(isrc->isrc_dev, isrc);
403 		if (error == FILTER_HANDLED)
404 			return (0);
405 	} else
406 #endif
407 	if (isrc->isrc_event != NULL) {
408 		if (intr_event_handle(isrc->isrc_event, tf) == 0)
409 			return (0);
410 	}
411 
412 	if ((isrc->isrc_flags & INTR_ISRCF_IPI) == 0)
413 		isrc_increment_straycount(isrc);
414 	return (EINVAL);
415 }
416 
417 /*
418  *  Alloc unique interrupt number (resource handle) for interrupt source.
419  *
420  *  There could be various strategies how to allocate free interrupt number
421  *  (resource handle) for new interrupt source.
422  *
423  *  1. Handles are always allocated forward, so handles are not recycled
424  *     immediately. However, if only one free handle left which is reused
425  *     constantly...
426  */
427 static inline int
428 isrc_alloc_irq(struct intr_irqsrc *isrc)
429 {
430 	u_int irq;
431 
432 	mtx_assert(&isrc_table_lock, MA_OWNED);
433 
434 	if (irq_next_free >= intr_nirq)
435 		return (ENOSPC);
436 
437 	for (irq = irq_next_free; irq < intr_nirq; irq++) {
438 		if (irq_sources[irq] == NULL)
439 			goto found;
440 	}
441 	for (irq = 0; irq < irq_next_free; irq++) {
442 		if (irq_sources[irq] == NULL)
443 			goto found;
444 	}
445 
446 	irq_next_free = intr_nirq;
447 	return (ENOSPC);
448 
449 found:
450 	isrc->isrc_irq = irq;
451 	irq_sources[irq] = isrc;
452 
453 	irq_next_free = irq + 1;
454 	if (irq_next_free >= intr_nirq)
455 		irq_next_free = 0;
456 	return (0);
457 }
458 
459 /*
460  *  Free unique interrupt number (resource handle) from interrupt source.
461  */
462 static inline int
463 isrc_free_irq(struct intr_irqsrc *isrc)
464 {
465 
466 	mtx_assert(&isrc_table_lock, MA_OWNED);
467 
468 	if (isrc->isrc_irq >= intr_nirq)
469 		return (EINVAL);
470 	if (irq_sources[isrc->isrc_irq] != isrc)
471 		return (EINVAL);
472 
473 	irq_sources[isrc->isrc_irq] = NULL;
474 	isrc->isrc_irq = INTR_IRQ_INVALID;	/* just to be safe */
475 
476 	/*
477 	 * If we are recovering from the state irq_sources table is full,
478 	 * then the following allocation should check the entire table. This
479 	 * will ensure maximum separation of allocation order from release
480 	 * order.
481 	 */
482 	if (irq_next_free >= intr_nirq)
483 		irq_next_free = 0;
484 
485 	return (0);
486 }
487 
488 device_t
489 intr_irq_root_device(uint32_t rootnum)
490 {
491 	KASSERT(rootnum < INTR_ROOT_COUNT,
492 	    ("%s: invalid interrupt root %d", __func__, rootnum));
493 	return (intr_irq_roots[rootnum].dev);
494 }
495 
496 /*
497  *  Initialize interrupt source and register it into global interrupt table.
498  */
499 int
500 intr_isrc_register(struct intr_irqsrc *isrc, device_t dev, u_int flags,
501     const char *fmt, ...)
502 {
503 	int error;
504 	va_list ap;
505 
506 	bzero(isrc, sizeof(struct intr_irqsrc));
507 	isrc->isrc_dev = dev;
508 	isrc->isrc_irq = INTR_IRQ_INVALID;	/* just to be safe */
509 	isrc->isrc_flags = flags;
510 
511 	va_start(ap, fmt);
512 	vsnprintf(isrc->isrc_name, INTR_ISRC_NAMELEN, fmt, ap);
513 	va_end(ap);
514 
515 	mtx_lock(&isrc_table_lock);
516 	error = isrc_alloc_irq(isrc);
517 	if (error != 0) {
518 		mtx_unlock(&isrc_table_lock);
519 		return (error);
520 	}
521 	/*
522 	 * Setup interrupt counters, but not for IPI sources. Those are setup
523 	 * later and only for used ones (up to INTR_IPI_COUNT) to not exhaust
524 	 * our counter pool.
525 	 */
526 	if ((isrc->isrc_flags & INTR_ISRCF_IPI) == 0)
527 		isrc_setup_counters(isrc);
528 	mtx_unlock(&isrc_table_lock);
529 	return (0);
530 }
531 
532 /*
533  *  Deregister interrupt source from global interrupt table.
534  */
535 int
536 intr_isrc_deregister(struct intr_irqsrc *isrc)
537 {
538 	int error;
539 
540 	mtx_lock(&isrc_table_lock);
541 	if ((isrc->isrc_flags & INTR_ISRCF_IPI) == 0)
542 		isrc_release_counters(isrc);
543 	error = isrc_free_irq(isrc);
544 	mtx_unlock(&isrc_table_lock);
545 	return (error);
546 }
547 
548 #ifdef SMP
549 /*
550  *  A support function for a PIC to decide if provided ISRC should be inited
551  *  on given cpu. The logic of INTR_ISRCF_BOUND flag and isrc_cpu member of
552  *  struct intr_irqsrc is the following:
553  *
554  *     If INTR_ISRCF_BOUND is set, the ISRC should be inited only on cpus
555  *     set in isrc_cpu. If not, the ISRC should be inited on every cpu and
556  *     isrc_cpu is kept consistent with it. Thus isrc_cpu is always correct.
557  */
558 bool
559 intr_isrc_init_on_cpu(struct intr_irqsrc *isrc, u_int cpu)
560 {
561 
562 	if (isrc->isrc_handlers == 0)
563 		return (false);
564 	if ((isrc->isrc_flags & (INTR_ISRCF_PPI | INTR_ISRCF_IPI)) == 0)
565 		return (false);
566 	if (isrc->isrc_flags & INTR_ISRCF_BOUND)
567 		return (CPU_ISSET(cpu, &isrc->isrc_cpu));
568 
569 	CPU_SET(cpu, &isrc->isrc_cpu);
570 	return (true);
571 }
572 #endif
573 
574 #ifdef INTR_SOLO
575 /*
576  *  Setup filter into interrupt source.
577  */
578 static int
579 iscr_setup_filter(struct intr_irqsrc *isrc, const char *name,
580     intr_irq_filter_t *filter, void *arg, void **cookiep)
581 {
582 
583 	if (filter == NULL)
584 		return (EINVAL);
585 
586 	mtx_lock(&isrc_table_lock);
587 	/*
588 	 * Make sure that we do not mix the two ways
589 	 * how we handle interrupt sources.
590 	 */
591 	if (isrc->isrc_filter != NULL || isrc->isrc_event != NULL) {
592 		mtx_unlock(&isrc_table_lock);
593 		return (EBUSY);
594 	}
595 	isrc->isrc_filter = filter;
596 	isrc->isrc_arg = arg;
597 	isrc_update_name(isrc, name);
598 	mtx_unlock(&isrc_table_lock);
599 
600 	*cookiep = isrc;
601 	return (0);
602 }
603 #endif
604 
605 /*
606  *  Interrupt source pre_ithread method for MI interrupt framework.
607  */
608 static void
609 intr_isrc_pre_ithread(void *arg)
610 {
611 	struct intr_irqsrc *isrc = arg;
612 
613 	PIC_PRE_ITHREAD(isrc->isrc_dev, isrc);
614 }
615 
616 /*
617  *  Interrupt source post_ithread method for MI interrupt framework.
618  */
619 static void
620 intr_isrc_post_ithread(void *arg)
621 {
622 	struct intr_irqsrc *isrc = arg;
623 
624 	PIC_POST_ITHREAD(isrc->isrc_dev, isrc);
625 }
626 
627 /*
628  *  Interrupt source post_filter method for MI interrupt framework.
629  */
630 static void
631 intr_isrc_post_filter(void *arg)
632 {
633 	struct intr_irqsrc *isrc = arg;
634 
635 	PIC_POST_FILTER(isrc->isrc_dev, isrc);
636 }
637 
638 /*
639  *  Interrupt source assign_cpu method for MI interrupt framework.
640  */
641 static int
642 intr_isrc_assign_cpu(void *arg, int cpu)
643 {
644 #ifdef SMP
645 	struct intr_irqsrc *isrc = arg;
646 	int error;
647 
648 	mtx_lock(&isrc_table_lock);
649 	if (cpu == NOCPU) {
650 		CPU_ZERO(&isrc->isrc_cpu);
651 		isrc->isrc_flags &= ~INTR_ISRCF_BOUND;
652 	} else {
653 		CPU_SETOF(cpu, &isrc->isrc_cpu);
654 		isrc->isrc_flags |= INTR_ISRCF_BOUND;
655 	}
656 
657 	/*
658 	 * In NOCPU case, it's up to PIC to either leave ISRC on same CPU or
659 	 * re-balance it to another CPU or enable it on more CPUs. However,
660 	 * PIC is expected to change isrc_cpu appropriately to keep us well
661 	 * informed if the call is successful.
662 	 */
663 	if (irq_assign_cpu) {
664 		error = PIC_BIND_INTR(isrc->isrc_dev, isrc);
665 		if (error) {
666 			CPU_ZERO(&isrc->isrc_cpu);
667 			mtx_unlock(&isrc_table_lock);
668 			return (error);
669 		}
670 	}
671 	mtx_unlock(&isrc_table_lock);
672 	return (0);
673 #else
674 	return (EOPNOTSUPP);
675 #endif
676 }
677 
678 /*
679  *  Create interrupt event for interrupt source.
680  */
681 static int
682 isrc_event_create(struct intr_irqsrc *isrc)
683 {
684 	struct intr_event *ie;
685 	int error;
686 
687 	error = intr_event_create(&ie, isrc, 0, isrc->isrc_irq,
688 	    intr_isrc_pre_ithread, intr_isrc_post_ithread, intr_isrc_post_filter,
689 	    intr_isrc_assign_cpu, "%s:", isrc->isrc_name);
690 	if (error)
691 		return (error);
692 
693 	mtx_lock(&isrc_table_lock);
694 	/*
695 	 * Make sure that we do not mix the two ways
696 	 * how we handle interrupt sources. Let contested event wins.
697 	 */
698 #ifdef INTR_SOLO
699 	if (isrc->isrc_filter != NULL || isrc->isrc_event != NULL) {
700 #else
701 	if (isrc->isrc_event != NULL) {
702 #endif
703 		mtx_unlock(&isrc_table_lock);
704 		intr_event_destroy(ie);
705 		return (isrc->isrc_event != NULL ? EBUSY : 0);
706 	}
707 	isrc->isrc_event = ie;
708 	mtx_unlock(&isrc_table_lock);
709 
710 	return (0);
711 }
712 #ifdef notyet
713 /*
714  *  Destroy interrupt event for interrupt source.
715  */
716 static void
717 isrc_event_destroy(struct intr_irqsrc *isrc)
718 {
719 	struct intr_event *ie;
720 
721 	mtx_lock(&isrc_table_lock);
722 	ie = isrc->isrc_event;
723 	isrc->isrc_event = NULL;
724 	mtx_unlock(&isrc_table_lock);
725 
726 	if (ie != NULL)
727 		intr_event_destroy(ie);
728 }
729 #endif
730 /*
731  *  Add handler to interrupt source.
732  */
733 static int
734 isrc_add_handler(struct intr_irqsrc *isrc, const char *name,
735     driver_filter_t filter, driver_intr_t handler, void *arg,
736     enum intr_type flags, void **cookiep)
737 {
738 	int error;
739 
740 	if (isrc->isrc_event == NULL) {
741 		error = isrc_event_create(isrc);
742 		if (error)
743 			return (error);
744 	}
745 
746 	error = intr_event_add_handler(isrc->isrc_event, name, filter, handler,
747 	    arg, intr_priority(flags), flags, cookiep);
748 	if (error == 0) {
749 		mtx_lock(&isrc_table_lock);
750 		intrcnt_updatename(isrc);
751 		mtx_unlock(&isrc_table_lock);
752 	}
753 
754 	return (error);
755 }
756 
757 /*
758  *  Lookup interrupt controller locked.
759  */
760 static inline struct intr_pic *
761 pic_lookup_locked(device_t dev, intptr_t xref, u_int flags)
762 {
763 	struct intr_pic *pic;
764 
765 	mtx_assert(&pic_list_lock, MA_OWNED);
766 
767 	if (dev == NULL && xref == 0)
768 		return (NULL);
769 
770 	/* Note that pic->pic_dev is never NULL on registered PIC. */
771 	SLIST_FOREACH(pic, &pic_list, pic_next) {
772 		if ((pic->pic_flags & FLAG_TYPE_MASK) !=
773 		    (flags & FLAG_TYPE_MASK))
774 			continue;
775 
776 		if (dev == NULL) {
777 			if (xref == pic->pic_xref)
778 				return (pic);
779 		} else if (xref == 0 || pic->pic_xref == 0) {
780 			if (dev == pic->pic_dev)
781 				return (pic);
782 		} else if (xref == pic->pic_xref && dev == pic->pic_dev)
783 				return (pic);
784 	}
785 	return (NULL);
786 }
787 
788 /*
789  *  Lookup interrupt controller.
790  */
791 static struct intr_pic *
792 pic_lookup(device_t dev, intptr_t xref, u_int flags)
793 {
794 	struct intr_pic *pic;
795 
796 	mtx_lock(&pic_list_lock);
797 	pic = pic_lookup_locked(dev, xref, flags);
798 	mtx_unlock(&pic_list_lock);
799 	return (pic);
800 }
801 
802 /*
803  *  Create interrupt controller.
804  */
805 static struct intr_pic *
806 pic_create(device_t dev, intptr_t xref, u_int flags)
807 {
808 	struct intr_pic *pic;
809 
810 	mtx_lock(&pic_list_lock);
811 	pic = pic_lookup_locked(dev, xref, flags);
812 	if (pic != NULL) {
813 		mtx_unlock(&pic_list_lock);
814 		return (pic);
815 	}
816 	pic = malloc(sizeof(*pic), M_INTRNG, M_NOWAIT | M_ZERO);
817 	if (pic == NULL) {
818 		mtx_unlock(&pic_list_lock);
819 		return (NULL);
820 	}
821 	pic->pic_xref = xref;
822 	pic->pic_dev = dev;
823 	pic->pic_flags = flags;
824 	mtx_init(&pic->pic_child_lock, "pic child lock", NULL, MTX_SPIN);
825 	SLIST_INSERT_HEAD(&pic_list, pic, pic_next);
826 	mtx_unlock(&pic_list_lock);
827 
828 	return (pic);
829 }
830 #ifdef notyet
831 /*
832  *  Destroy interrupt controller.
833  */
834 static void
835 pic_destroy(device_t dev, intptr_t xref, u_int flags)
836 {
837 	struct intr_pic *pic;
838 
839 	mtx_lock(&pic_list_lock);
840 	pic = pic_lookup_locked(dev, xref, flags);
841 	if (pic == NULL) {
842 		mtx_unlock(&pic_list_lock);
843 		return;
844 	}
845 	SLIST_REMOVE(&pic_list, pic, intr_pic, pic_next);
846 	mtx_unlock(&pic_list_lock);
847 
848 	free(pic, M_INTRNG);
849 }
850 #endif
851 /*
852  *  Register interrupt controller.
853  */
854 struct intr_pic *
855 intr_pic_register(device_t dev, intptr_t xref)
856 {
857 	struct intr_pic *pic;
858 
859 	if (dev == NULL)
860 		return (NULL);
861 	pic = pic_create(dev, xref, FLAG_PIC);
862 	if (pic == NULL)
863 		return (NULL);
864 
865 	debugf("PIC %p registered for %s <dev %p, xref %jx>\n", pic,
866 	    device_get_nameunit(dev), dev, (uintmax_t)xref);
867 	return (pic);
868 }
869 
870 /*
871  *  Unregister interrupt controller.
872  */
873 int
874 intr_pic_deregister(device_t dev, intptr_t xref)
875 {
876 
877 	panic("%s: not implemented", __func__);
878 }
879 
880 /*
881  *  Mark interrupt controller (itself) as a root one.
882  *
883  *  Note that only an interrupt controller can really know its position
884  *  in interrupt controller's tree. So root PIC must claim itself as a root.
885  *
886  *  In FDT case, according to ePAPR approved version 1.1 from 08 April 2011,
887  *  page 30:
888  *    "The root of the interrupt tree is determined when traversal
889  *     of the interrupt tree reaches an interrupt controller node without
890  *     an interrupts property and thus no explicit interrupt parent."
891  */
892 int
893 intr_pic_claim_root(device_t dev, intptr_t xref, intr_irq_filter_t *filter,
894     void *arg, uint32_t rootnum)
895 {
896 	struct intr_pic *pic;
897 	struct intr_irq_root *root;
898 
899 	pic = pic_lookup(dev, xref, FLAG_PIC);
900 	if (pic == NULL) {
901 		device_printf(dev, "not registered\n");
902 		return (EINVAL);
903 	}
904 
905 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_PIC,
906 	    ("%s: Found a non-PIC controller: %s", __func__,
907 	     device_get_name(pic->pic_dev)));
908 
909 	if (filter == NULL) {
910 		device_printf(dev, "filter missing\n");
911 		return (EINVAL);
912 	}
913 
914 	/*
915 	 * Only one interrupt controllers could be on the root for now.
916 	 * Note that we further suppose that there is not threaded interrupt
917 	 * routine (handler) on the root. See intr_irq_handler().
918 	 */
919 	KASSERT(rootnum < INTR_ROOT_COUNT,
920 	    ("%s: invalid interrupt root %d", __func__, rootnum));
921 	root = &intr_irq_roots[rootnum];
922 	if (root->dev != NULL) {
923 		device_printf(dev, "another root already set\n");
924 		return (EBUSY);
925 	}
926 
927 	root->dev = dev;
928 	root->filter = filter;
929 	root->arg = arg;
930 
931 	debugf("irq root set to %s\n", device_get_nameunit(dev));
932 	return (0);
933 }
934 
935 /*
936  * Add a handler to manage a sub range of a parents interrupts.
937  */
938 int
939 intr_pic_add_handler(device_t parent, struct intr_pic *pic,
940     intr_child_irq_filter_t *filter, void *arg, uintptr_t start,
941     uintptr_t length)
942 {
943 	struct intr_pic *parent_pic;
944 	struct intr_pic_child *newchild;
945 #ifdef INVARIANTS
946 	struct intr_pic_child *child;
947 #endif
948 
949 	/* Find the parent PIC */
950 	parent_pic = pic_lookup(parent, 0, FLAG_PIC);
951 	if (parent_pic == NULL)
952 		return (ENXIO);
953 
954 	newchild = malloc(sizeof(*newchild), M_INTRNG, M_WAITOK | M_ZERO);
955 	newchild->pc_pic = pic;
956 	newchild->pc_filter = filter;
957 	newchild->pc_filter_arg = arg;
958 	newchild->pc_start = start;
959 	newchild->pc_length = length;
960 
961 	mtx_lock_spin(&parent_pic->pic_child_lock);
962 #ifdef INVARIANTS
963 	SLIST_FOREACH(child, &parent_pic->pic_children, pc_next) {
964 		KASSERT(child->pc_pic != pic, ("%s: Adding a child PIC twice",
965 		    __func__));
966 	}
967 #endif
968 	SLIST_INSERT_HEAD(&parent_pic->pic_children, newchild, pc_next);
969 	mtx_unlock_spin(&parent_pic->pic_child_lock);
970 
971 	return (0);
972 }
973 
974 static int
975 intr_resolve_irq(device_t dev, intptr_t xref, struct intr_map_data *data,
976     struct intr_irqsrc **isrc)
977 {
978 	struct intr_pic *pic;
979 	struct intr_map_data_msi *msi;
980 
981 	if (data == NULL)
982 		return (EINVAL);
983 
984 	pic = pic_lookup(dev, xref,
985 	    (data->type == INTR_MAP_DATA_MSI) ? FLAG_MSI : FLAG_PIC);
986 	if (pic == NULL)
987 		return (ESRCH);
988 
989 	switch (data->type) {
990 	case INTR_MAP_DATA_MSI:
991 		KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
992 		    ("%s: Found a non-MSI controller: %s", __func__,
993 		     device_get_name(pic->pic_dev)));
994 		msi = (struct intr_map_data_msi *)data;
995 		*isrc = msi->isrc;
996 		return (0);
997 
998 	default:
999 		KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_PIC,
1000 		    ("%s: Found a non-PIC controller: %s", __func__,
1001 		     device_get_name(pic->pic_dev)));
1002 		return (PIC_MAP_INTR(pic->pic_dev, data, isrc));
1003 	}
1004 }
1005 
1006 bool
1007 intr_is_per_cpu(struct resource *res)
1008 {
1009 	u_int res_id;
1010 	struct intr_irqsrc *isrc;
1011 
1012 	res_id = (u_int)rman_get_start(res);
1013 	isrc = intr_map_get_isrc(res_id);
1014 
1015 	if (isrc == NULL)
1016 		panic("Attempt to get isrc for non-active resource id: %u\n",
1017 		    res_id);
1018 	return ((isrc->isrc_flags & INTR_ISRCF_PPI) != 0);
1019 }
1020 
1021 int
1022 intr_activate_irq(device_t dev, struct resource *res)
1023 {
1024 	device_t map_dev;
1025 	intptr_t map_xref;
1026 	struct intr_map_data *data;
1027 	struct intr_irqsrc *isrc;
1028 	u_int res_id;
1029 	int error;
1030 
1031 	KASSERT(rman_get_start(res) == rman_get_end(res),
1032 	    ("%s: more interrupts in resource", __func__));
1033 
1034 	res_id = (u_int)rman_get_start(res);
1035 	if (intr_map_get_isrc(res_id) != NULL)
1036 		panic("Attempt to double activation of resource id: %u\n",
1037 		    res_id);
1038 	intr_map_copy_map_data(res_id, &map_dev, &map_xref, &data);
1039 	error = intr_resolve_irq(map_dev, map_xref, data, &isrc);
1040 	if (error != 0) {
1041 		free(data, M_INTRNG);
1042 		/* XXX TODO DISCONECTED PICs */
1043 		/* if (error == EINVAL) return(0); */
1044 		return (error);
1045 	}
1046 	intr_map_set_isrc(res_id, isrc);
1047 	rman_set_virtual(res, data);
1048 	return (PIC_ACTIVATE_INTR(isrc->isrc_dev, isrc, res, data));
1049 }
1050 
1051 int
1052 intr_deactivate_irq(device_t dev, struct resource *res)
1053 {
1054 	struct intr_map_data *data;
1055 	struct intr_irqsrc *isrc;
1056 	u_int res_id;
1057 	int error;
1058 
1059 	KASSERT(rman_get_start(res) == rman_get_end(res),
1060 	    ("%s: more interrupts in resource", __func__));
1061 
1062 	res_id = (u_int)rman_get_start(res);
1063 	isrc = intr_map_get_isrc(res_id);
1064 	if (isrc == NULL)
1065 		panic("Attempt to deactivate non-active resource id: %u\n",
1066 		    res_id);
1067 
1068 	data = rman_get_virtual(res);
1069 	error = PIC_DEACTIVATE_INTR(isrc->isrc_dev, isrc, res, data);
1070 	intr_map_set_isrc(res_id, NULL);
1071 	rman_set_virtual(res, NULL);
1072 	free(data, M_INTRNG);
1073 	return (error);
1074 }
1075 
1076 int
1077 intr_setup_irq(device_t dev, struct resource *res, driver_filter_t filt,
1078     driver_intr_t hand, void *arg, int flags, void **cookiep)
1079 {
1080 	int error;
1081 	struct intr_map_data *data;
1082 	struct intr_irqsrc *isrc;
1083 	const char *name;
1084 	u_int res_id;
1085 
1086 	KASSERT(rman_get_start(res) == rman_get_end(res),
1087 	    ("%s: more interrupts in resource", __func__));
1088 
1089 	res_id = (u_int)rman_get_start(res);
1090 	isrc = intr_map_get_isrc(res_id);
1091 	if (isrc == NULL) {
1092 		/* XXX TODO DISCONECTED PICs */
1093 		return (EINVAL);
1094 	}
1095 
1096 	data = rman_get_virtual(res);
1097 	name = device_get_nameunit(dev);
1098 
1099 #ifdef INTR_SOLO
1100 	/*
1101 	 * Standard handling is done through MI interrupt framework. However,
1102 	 * some interrupts could request solely own special handling. This
1103 	 * non standard handling can be used for interrupt controllers without
1104 	 * handler (filter only), so in case that interrupt controllers are
1105 	 * chained, MI interrupt framework is called only in leaf controller.
1106 	 *
1107 	 * Note that root interrupt controller routine is served as well,
1108 	 * however in intr_irq_handler(), i.e. main system dispatch routine.
1109 	 */
1110 	if (flags & INTR_SOLO && hand != NULL) {
1111 		debugf("irq %u cannot solo on %s\n", irq, name);
1112 		return (EINVAL);
1113 	}
1114 
1115 	if (flags & INTR_SOLO) {
1116 		error = iscr_setup_filter(isrc, name, (intr_irq_filter_t *)filt,
1117 		    arg, cookiep);
1118 		debugf("irq %u setup filter error %d on %s\n", isrc->isrc_irq, error,
1119 		    name);
1120 	} else
1121 #endif
1122 		{
1123 		error = isrc_add_handler(isrc, name, filt, hand, arg, flags,
1124 		    cookiep);
1125 		debugf("irq %u add handler error %d on %s\n", isrc->isrc_irq, error, name);
1126 	}
1127 	if (error != 0)
1128 		return (error);
1129 
1130 	mtx_lock(&isrc_table_lock);
1131 	error = PIC_SETUP_INTR(isrc->isrc_dev, isrc, res, data);
1132 	if (error == 0) {
1133 		isrc->isrc_handlers++;
1134 		if (isrc->isrc_handlers == 1)
1135 			PIC_ENABLE_INTR(isrc->isrc_dev, isrc);
1136 	}
1137 	mtx_unlock(&isrc_table_lock);
1138 	if (error != 0)
1139 		intr_event_remove_handler(*cookiep);
1140 	return (error);
1141 }
1142 
1143 int
1144 intr_teardown_irq(device_t dev, struct resource *res, void *cookie)
1145 {
1146 	int error;
1147 	struct intr_map_data *data;
1148 	struct intr_irqsrc *isrc;
1149 	u_int res_id;
1150 
1151 	KASSERT(rman_get_start(res) == rman_get_end(res),
1152 	    ("%s: more interrupts in resource", __func__));
1153 
1154 	res_id = (u_int)rman_get_start(res);
1155 	isrc = intr_map_get_isrc(res_id);
1156 	if (isrc == NULL || isrc->isrc_handlers == 0)
1157 		return (EINVAL);
1158 
1159 	data = rman_get_virtual(res);
1160 
1161 #ifdef INTR_SOLO
1162 	if (isrc->isrc_filter != NULL) {
1163 		if (isrc != cookie)
1164 			return (EINVAL);
1165 
1166 		mtx_lock(&isrc_table_lock);
1167 		isrc->isrc_filter = NULL;
1168 		isrc->isrc_arg = NULL;
1169 		isrc->isrc_handlers = 0;
1170 		PIC_DISABLE_INTR(isrc->isrc_dev, isrc);
1171 		PIC_TEARDOWN_INTR(isrc->isrc_dev, isrc, res, data);
1172 		isrc_update_name(isrc, NULL);
1173 		mtx_unlock(&isrc_table_lock);
1174 		return (0);
1175 	}
1176 #endif
1177 	if (isrc != intr_handler_source(cookie))
1178 		return (EINVAL);
1179 
1180 	error = intr_event_remove_handler(cookie);
1181 	if (error == 0) {
1182 		mtx_lock(&isrc_table_lock);
1183 		isrc->isrc_handlers--;
1184 		if (isrc->isrc_handlers == 0)
1185 			PIC_DISABLE_INTR(isrc->isrc_dev, isrc);
1186 		PIC_TEARDOWN_INTR(isrc->isrc_dev, isrc, res, data);
1187 		intrcnt_updatename(isrc);
1188 		mtx_unlock(&isrc_table_lock);
1189 	}
1190 	return (error);
1191 }
1192 
1193 int
1194 intr_describe_irq(device_t dev, struct resource *res, void *cookie,
1195     const char *descr)
1196 {
1197 	int error;
1198 	struct intr_irqsrc *isrc;
1199 	u_int res_id;
1200 
1201 	KASSERT(rman_get_start(res) == rman_get_end(res),
1202 	    ("%s: more interrupts in resource", __func__));
1203 
1204 	res_id = (u_int)rman_get_start(res);
1205 	isrc = intr_map_get_isrc(res_id);
1206 	if (isrc == NULL || isrc->isrc_handlers == 0)
1207 		return (EINVAL);
1208 #ifdef INTR_SOLO
1209 	if (isrc->isrc_filter != NULL) {
1210 		if (isrc != cookie)
1211 			return (EINVAL);
1212 
1213 		mtx_lock(&isrc_table_lock);
1214 		isrc_update_name(isrc, descr);
1215 		mtx_unlock(&isrc_table_lock);
1216 		return (0);
1217 	}
1218 #endif
1219 	error = intr_event_describe_handler(isrc->isrc_event, cookie, descr);
1220 	if (error == 0) {
1221 		mtx_lock(&isrc_table_lock);
1222 		intrcnt_updatename(isrc);
1223 		mtx_unlock(&isrc_table_lock);
1224 	}
1225 	return (error);
1226 }
1227 
1228 #ifdef SMP
1229 int
1230 intr_bind_irq(device_t dev, struct resource *res, int cpu)
1231 {
1232 	struct intr_irqsrc *isrc;
1233 	u_int res_id;
1234 
1235 	KASSERT(rman_get_start(res) == rman_get_end(res),
1236 	    ("%s: more interrupts in resource", __func__));
1237 
1238 	res_id = (u_int)rman_get_start(res);
1239 	isrc = intr_map_get_isrc(res_id);
1240 	if (isrc == NULL || isrc->isrc_handlers == 0)
1241 		return (EINVAL);
1242 #ifdef INTR_SOLO
1243 	if (isrc->isrc_filter != NULL)
1244 		return (intr_isrc_assign_cpu(isrc, cpu));
1245 #endif
1246 	return (intr_event_bind(isrc->isrc_event, cpu));
1247 }
1248 
1249 /*
1250  * Return the CPU that the next interrupt source should use.
1251  * For now just returns the next CPU according to round-robin.
1252  */
1253 u_int
1254 intr_irq_next_cpu(u_int last_cpu, cpuset_t *cpumask)
1255 {
1256 	u_int cpu;
1257 
1258 	KASSERT(!CPU_EMPTY(cpumask), ("%s: Empty CPU mask", __func__));
1259 	if (!irq_assign_cpu || mp_ncpus == 1) {
1260 		cpu = PCPU_GET(cpuid);
1261 
1262 		if (CPU_ISSET(cpu, cpumask))
1263 			return (curcpu);
1264 
1265 		return (CPU_FFS(cpumask) - 1);
1266 	}
1267 
1268 	do {
1269 		last_cpu++;
1270 		if (last_cpu > mp_maxid)
1271 			last_cpu = 0;
1272 	} while (!CPU_ISSET(last_cpu, cpumask));
1273 	return (last_cpu);
1274 }
1275 
1276 #ifndef EARLY_AP_STARTUP
1277 /*
1278  *  Distribute all the interrupt sources among the available
1279  *  CPUs once the AP's have been launched.
1280  */
1281 static void
1282 intr_irq_shuffle(void *arg __unused)
1283 {
1284 	struct intr_irqsrc *isrc;
1285 	u_int i;
1286 
1287 	if (mp_ncpus == 1)
1288 		return;
1289 
1290 	mtx_lock(&isrc_table_lock);
1291 	irq_assign_cpu = true;
1292 	for (i = 0; i < intr_nirq; i++) {
1293 		isrc = irq_sources[i];
1294 		if (isrc == NULL || isrc->isrc_handlers == 0 ||
1295 		    isrc->isrc_flags & (INTR_ISRCF_PPI | INTR_ISRCF_IPI))
1296 			continue;
1297 
1298 		if (isrc->isrc_event != NULL &&
1299 		    isrc->isrc_flags & INTR_ISRCF_BOUND &&
1300 		    isrc->isrc_event->ie_cpu != CPU_FFS(&isrc->isrc_cpu) - 1)
1301 			panic("%s: CPU inconsistency", __func__);
1302 
1303 		if ((isrc->isrc_flags & INTR_ISRCF_BOUND) == 0)
1304 			CPU_ZERO(&isrc->isrc_cpu); /* start again */
1305 
1306 		/*
1307 		 * We are in wicked position here if the following call fails
1308 		 * for bound ISRC. The best thing we can do is to clear
1309 		 * isrc_cpu so inconsistency with ie_cpu will be detectable.
1310 		 */
1311 		if (PIC_BIND_INTR(isrc->isrc_dev, isrc) != 0)
1312 			CPU_ZERO(&isrc->isrc_cpu);
1313 	}
1314 	mtx_unlock(&isrc_table_lock);
1315 }
1316 SYSINIT(intr_irq_shuffle, SI_SUB_SMP, SI_ORDER_SECOND, intr_irq_shuffle, NULL);
1317 #endif /* !EARLY_AP_STARTUP */
1318 
1319 #else
1320 u_int
1321 intr_irq_next_cpu(u_int current_cpu, cpuset_t *cpumask)
1322 {
1323 
1324 	return (PCPU_GET(cpuid));
1325 }
1326 #endif /* SMP */
1327 
1328 /*
1329  * Allocate memory for new intr_map_data structure.
1330  * Initialize common fields.
1331  */
1332 struct intr_map_data *
1333 intr_alloc_map_data(enum intr_map_data_type type, size_t len, int flags)
1334 {
1335 	struct intr_map_data *data;
1336 
1337 	data = malloc(len, M_INTRNG, flags);
1338 	data->type = type;
1339 	data->len = len;
1340 	return (data);
1341 }
1342 
1343 void intr_free_intr_map_data(struct intr_map_data *data)
1344 {
1345 
1346 	free(data, M_INTRNG);
1347 }
1348 
1349 /*
1350  *  Register a MSI/MSI-X interrupt controller
1351  */
1352 int
1353 intr_msi_register(device_t dev, intptr_t xref)
1354 {
1355 	struct intr_pic *pic;
1356 
1357 	if (dev == NULL)
1358 		return (EINVAL);
1359 	pic = pic_create(dev, xref, FLAG_MSI);
1360 	if (pic == NULL)
1361 		return (ENOMEM);
1362 
1363 	debugf("PIC %p registered for %s <dev %p, xref %jx>\n", pic,
1364 	    device_get_nameunit(dev), dev, (uintmax_t)xref);
1365 	return (0);
1366 }
1367 
1368 int
1369 intr_alloc_msi(device_t pci, device_t child, intptr_t xref, int count,
1370     int maxcount, int *irqs)
1371 {
1372 	struct iommu_domain *domain;
1373 	struct intr_irqsrc **isrc;
1374 	struct intr_pic *pic;
1375 	device_t pdev;
1376 	struct intr_map_data_msi *msi;
1377 	int err, i;
1378 
1379 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1380 	if (pic == NULL)
1381 		return (ESRCH);
1382 
1383 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1384 	    ("%s: Found a non-MSI controller: %s", __func__,
1385 	     device_get_name(pic->pic_dev)));
1386 
1387 	/*
1388 	 * If this is the first time we have used this context ask the
1389 	 * interrupt controller to map memory the msi source will need.
1390 	 */
1391 	err = MSI_IOMMU_INIT(pic->pic_dev, child, &domain);
1392 	if (err != 0)
1393 		return (err);
1394 
1395 	isrc = malloc(sizeof(*isrc) * count, M_INTRNG, M_WAITOK);
1396 	err = MSI_ALLOC_MSI(pic->pic_dev, child, count, maxcount, &pdev, isrc);
1397 	if (err != 0) {
1398 		free(isrc, M_INTRNG);
1399 		return (err);
1400 	}
1401 
1402 	for (i = 0; i < count; i++) {
1403 		isrc[i]->isrc_iommu = domain;
1404 		msi = (struct intr_map_data_msi *)intr_alloc_map_data(
1405 		    INTR_MAP_DATA_MSI, sizeof(*msi), M_WAITOK | M_ZERO);
1406 		msi-> isrc = isrc[i];
1407 
1408 		irqs[i] = intr_map_irq(pic->pic_dev, xref,
1409 		    (struct intr_map_data *)msi);
1410 	}
1411 	free(isrc, M_INTRNG);
1412 
1413 	return (err);
1414 }
1415 
1416 int
1417 intr_release_msi(device_t pci, device_t child, intptr_t xref, int count,
1418     int *irqs)
1419 {
1420 	struct intr_irqsrc **isrc;
1421 	struct intr_pic *pic;
1422 	struct intr_map_data_msi *msi;
1423 	int i, err;
1424 
1425 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1426 	if (pic == NULL)
1427 		return (ESRCH);
1428 
1429 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1430 	    ("%s: Found a non-MSI controller: %s", __func__,
1431 	     device_get_name(pic->pic_dev)));
1432 
1433 	isrc = malloc(sizeof(*isrc) * count, M_INTRNG, M_WAITOK);
1434 
1435 	for (i = 0; i < count; i++) {
1436 		msi = (struct intr_map_data_msi *)
1437 		    intr_map_get_map_data(irqs[i]);
1438 		KASSERT(msi->hdr.type == INTR_MAP_DATA_MSI,
1439 		    ("%s: irq %d map data is not MSI", __func__,
1440 		    irqs[i]));
1441 		isrc[i] = msi->isrc;
1442 	}
1443 
1444 	MSI_IOMMU_DEINIT(pic->pic_dev, child);
1445 
1446 	err = MSI_RELEASE_MSI(pic->pic_dev, child, count, isrc);
1447 
1448 	for (i = 0; i < count; i++) {
1449 		if (isrc[i] != NULL)
1450 			intr_unmap_irq(irqs[i]);
1451 	}
1452 
1453 	free(isrc, M_INTRNG);
1454 	return (err);
1455 }
1456 
1457 int
1458 intr_alloc_msix(device_t pci, device_t child, intptr_t xref, int *irq)
1459 {
1460 	struct iommu_domain *domain;
1461 	struct intr_irqsrc *isrc;
1462 	struct intr_pic *pic;
1463 	device_t pdev;
1464 	struct intr_map_data_msi *msi;
1465 	int err;
1466 
1467 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1468 	if (pic == NULL)
1469 		return (ESRCH);
1470 
1471 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1472 	    ("%s: Found a non-MSI controller: %s", __func__,
1473 	     device_get_name(pic->pic_dev)));
1474 
1475 	/*
1476 	 * If this is the first time we have used this context ask the
1477 	 * interrupt controller to map memory the msi source will need.
1478 	 */
1479 	err = MSI_IOMMU_INIT(pic->pic_dev, child, &domain);
1480 	if (err != 0)
1481 		return (err);
1482 
1483 	err = MSI_ALLOC_MSIX(pic->pic_dev, child, &pdev, &isrc);
1484 	if (err != 0)
1485 		return (err);
1486 
1487 	isrc->isrc_iommu = domain;
1488 	msi = (struct intr_map_data_msi *)intr_alloc_map_data(
1489 		    INTR_MAP_DATA_MSI, sizeof(*msi), M_WAITOK | M_ZERO);
1490 	msi->isrc = isrc;
1491 	*irq = intr_map_irq(pic->pic_dev, xref, (struct intr_map_data *)msi);
1492 	return (0);
1493 }
1494 
1495 int
1496 intr_release_msix(device_t pci, device_t child, intptr_t xref, int irq)
1497 {
1498 	struct intr_irqsrc *isrc;
1499 	struct intr_pic *pic;
1500 	struct intr_map_data_msi *msi;
1501 	int err;
1502 
1503 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1504 	if (pic == NULL)
1505 		return (ESRCH);
1506 
1507 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1508 	    ("%s: Found a non-MSI controller: %s", __func__,
1509 	     device_get_name(pic->pic_dev)));
1510 
1511 	msi = (struct intr_map_data_msi *)
1512 	    intr_map_get_map_data(irq);
1513 	KASSERT(msi->hdr.type == INTR_MAP_DATA_MSI,
1514 	    ("%s: irq %d map data is not MSI", __func__,
1515 	    irq));
1516 	isrc = msi->isrc;
1517 	if (isrc == NULL) {
1518 		intr_unmap_irq(irq);
1519 		return (EINVAL);
1520 	}
1521 
1522 	MSI_IOMMU_DEINIT(pic->pic_dev, child);
1523 
1524 	err = MSI_RELEASE_MSIX(pic->pic_dev, child, isrc);
1525 	intr_unmap_irq(irq);
1526 
1527 	return (err);
1528 }
1529 
1530 int
1531 intr_map_msi(device_t pci, device_t child, intptr_t xref, int irq,
1532     uint64_t *addr, uint32_t *data)
1533 {
1534 	struct intr_irqsrc *isrc;
1535 	struct intr_pic *pic;
1536 	int err;
1537 
1538 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1539 	if (pic == NULL)
1540 		return (ESRCH);
1541 
1542 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1543 	    ("%s: Found a non-MSI controller: %s", __func__,
1544 	     device_get_name(pic->pic_dev)));
1545 
1546 	isrc = intr_map_get_isrc(irq);
1547 	if (isrc == NULL)
1548 		return (EINVAL);
1549 
1550 	err = MSI_MAP_MSI(pic->pic_dev, child, isrc, addr, data);
1551 
1552 #ifdef IOMMU
1553 	if (isrc->isrc_iommu != NULL)
1554 		iommu_translate_msi(isrc->isrc_iommu, addr);
1555 #endif
1556 
1557 	return (err);
1558 }
1559 
1560 void dosoftints(void);
1561 void
1562 dosoftints(void)
1563 {
1564 }
1565 
1566 #ifdef SMP
1567 /*
1568  *  Init interrupt controller on another CPU.
1569  */
1570 void
1571 intr_pic_init_secondary(void)
1572 {
1573 	device_t dev;
1574 	uint32_t rootnum;
1575 
1576 	/*
1577 	 * QQQ: Only root PICs are aware of other CPUs ???
1578 	 */
1579 	//mtx_lock(&isrc_table_lock);
1580 	for (rootnum = 0; rootnum < INTR_ROOT_COUNT; rootnum++) {
1581 		dev = intr_irq_roots[rootnum].dev;
1582 		if (dev != NULL) {
1583 			PIC_INIT_SECONDARY(dev, rootnum);
1584 		}
1585 	}
1586 	//mtx_unlock(&isrc_table_lock);
1587 }
1588 #endif
1589 
1590 #ifdef DDB
1591 DB_SHOW_COMMAND_FLAGS(irqs, db_show_irqs, DB_CMD_MEMSAFE)
1592 {
1593 	u_int i, irqsum;
1594 	u_long num;
1595 	struct intr_irqsrc *isrc;
1596 
1597 	for (irqsum = 0, i = 0; i < intr_nirq; i++) {
1598 		isrc = irq_sources[i];
1599 		if (isrc == NULL)
1600 			continue;
1601 
1602 		num = isrc->isrc_count != NULL ? isrc->isrc_count[0] : 0;
1603 		db_printf("irq%-3u <%s>: cpu %02lx%s cnt %lu\n", i,
1604 		    isrc->isrc_name, isrc->isrc_cpu.__bits[0],
1605 		    isrc->isrc_flags & INTR_ISRCF_BOUND ? " (bound)" : "", num);
1606 		irqsum += num;
1607 	}
1608 	db_printf("irq total %u\n", irqsum);
1609 }
1610 #endif
1611 
1612 /*
1613  * Interrupt mapping table functions.
1614  *
1615  * Please, keep this part separately, it can be transformed to
1616  * extension of standard resources.
1617  */
1618 struct intr_map_entry
1619 {
1620 	device_t 		dev;
1621 	intptr_t 		xref;
1622 	struct intr_map_data 	*map_data;
1623 	struct intr_irqsrc 	*isrc;
1624 	/* XXX TODO DISCONECTED PICs */
1625 	/*int			flags */
1626 };
1627 
1628 /* XXX Convert irq_map[] to dynamicaly expandable one. */
1629 static struct intr_map_entry **irq_map;
1630 static u_int irq_map_count;
1631 static u_int irq_map_first_free_idx;
1632 static struct mtx irq_map_lock;
1633 
1634 static struct intr_irqsrc *
1635 intr_map_get_isrc(u_int res_id)
1636 {
1637 	struct intr_irqsrc *isrc;
1638 
1639 	isrc = NULL;
1640 	mtx_lock(&irq_map_lock);
1641 	if (res_id < irq_map_count && irq_map[res_id] != NULL)
1642 		isrc = irq_map[res_id]->isrc;
1643 	mtx_unlock(&irq_map_lock);
1644 
1645 	return (isrc);
1646 }
1647 
1648 static void
1649 intr_map_set_isrc(u_int res_id, struct intr_irqsrc *isrc)
1650 {
1651 
1652 	mtx_lock(&irq_map_lock);
1653 	if (res_id < irq_map_count && irq_map[res_id] != NULL)
1654 		irq_map[res_id]->isrc = isrc;
1655 	mtx_unlock(&irq_map_lock);
1656 }
1657 
1658 /*
1659  * Get a copy of intr_map_entry data
1660  */
1661 static struct intr_map_data *
1662 intr_map_get_map_data(u_int res_id)
1663 {
1664 	struct intr_map_data *data;
1665 
1666 	data = NULL;
1667 	mtx_lock(&irq_map_lock);
1668 	if (res_id >= irq_map_count || irq_map[res_id] == NULL)
1669 		panic("Attempt to copy invalid resource id: %u\n", res_id);
1670 	data = irq_map[res_id]->map_data;
1671 	mtx_unlock(&irq_map_lock);
1672 
1673 	return (data);
1674 }
1675 
1676 /*
1677  * Get a copy of intr_map_entry data
1678  */
1679 static void
1680 intr_map_copy_map_data(u_int res_id, device_t *map_dev, intptr_t *map_xref,
1681     struct intr_map_data **data)
1682 {
1683 	size_t len;
1684 
1685 	len = 0;
1686 	mtx_lock(&irq_map_lock);
1687 	if (res_id >= irq_map_count || irq_map[res_id] == NULL)
1688 		panic("Attempt to copy invalid resource id: %u\n", res_id);
1689 	if (irq_map[res_id]->map_data != NULL)
1690 		len = irq_map[res_id]->map_data->len;
1691 	mtx_unlock(&irq_map_lock);
1692 
1693 	if (len == 0)
1694 		*data = NULL;
1695 	else
1696 		*data = malloc(len, M_INTRNG, M_WAITOK | M_ZERO);
1697 	mtx_lock(&irq_map_lock);
1698 	if (irq_map[res_id] == NULL)
1699 		panic("Attempt to copy invalid resource id: %u\n", res_id);
1700 	if (len != 0) {
1701 		if (len != irq_map[res_id]->map_data->len)
1702 			panic("Resource id: %u has changed.\n", res_id);
1703 		memcpy(*data, irq_map[res_id]->map_data, len);
1704 	}
1705 	*map_dev = irq_map[res_id]->dev;
1706 	*map_xref = irq_map[res_id]->xref;
1707 	mtx_unlock(&irq_map_lock);
1708 }
1709 
1710 /*
1711  * Allocate and fill new entry in irq_map table.
1712  */
1713 u_int
1714 intr_map_irq(device_t dev, intptr_t xref, struct intr_map_data *data)
1715 {
1716 	u_int i;
1717 	struct intr_map_entry *entry;
1718 
1719 	/* Prepare new entry first. */
1720 	entry = malloc(sizeof(*entry), M_INTRNG, M_WAITOK | M_ZERO);
1721 
1722 	entry->dev = dev;
1723 	entry->xref = xref;
1724 	entry->map_data = data;
1725 	entry->isrc = NULL;
1726 
1727 	mtx_lock(&irq_map_lock);
1728 	for (i = irq_map_first_free_idx; i < irq_map_count; i++) {
1729 		if (irq_map[i] == NULL) {
1730 			irq_map[i] = entry;
1731 			irq_map_first_free_idx = i + 1;
1732 			mtx_unlock(&irq_map_lock);
1733 			return (i);
1734 		}
1735 	}
1736 	for (i = 0; i < irq_map_first_free_idx; i++) {
1737 		if (irq_map[i] == NULL) {
1738 			irq_map[i] = entry;
1739 			irq_map_first_free_idx = i + 1;
1740 			mtx_unlock(&irq_map_lock);
1741 			return (i);
1742 		}
1743 	}
1744 	mtx_unlock(&irq_map_lock);
1745 
1746 	/* XXX Expand irq_map table */
1747 	panic("IRQ mapping table is full.");
1748 }
1749 
1750 /*
1751  * Remove and free mapping entry.
1752  */
1753 void
1754 intr_unmap_irq(u_int res_id)
1755 {
1756 	struct intr_map_entry *entry;
1757 
1758 	mtx_lock(&irq_map_lock);
1759 	if ((res_id >= irq_map_count) || (irq_map[res_id] == NULL))
1760 		panic("Attempt to unmap invalid resource id: %u\n", res_id);
1761 	entry = irq_map[res_id];
1762 	irq_map[res_id] = NULL;
1763 	irq_map_first_free_idx = res_id;
1764 	mtx_unlock(&irq_map_lock);
1765 	intr_free_intr_map_data(entry->map_data);
1766 	free(entry, M_INTRNG);
1767 }
1768 
1769 /*
1770  * Clone mapping entry.
1771  */
1772 u_int
1773 intr_map_clone_irq(u_int old_res_id)
1774 {
1775 	device_t map_dev;
1776 	intptr_t map_xref;
1777 	struct intr_map_data *data;
1778 
1779 	intr_map_copy_map_data(old_res_id, &map_dev, &map_xref, &data);
1780 	return (intr_map_irq(map_dev, map_xref, data));
1781 }
1782 
1783 static void
1784 intr_map_init(void *dummy __unused)
1785 {
1786 
1787 	mtx_init(&irq_map_lock, "intr map table", NULL, MTX_DEF);
1788 
1789 	irq_map_count = 2 * intr_nirq;
1790 	irq_map = mallocarray(irq_map_count, sizeof(struct intr_map_entry*),
1791 	    M_INTRNG, M_WAITOK | M_ZERO);
1792 }
1793 SYSINIT(intr_map_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_map_init, NULL);
1794 
1795 #ifdef SMP
1796 /* Virtualization for interrupt source IPI counter increment. */
1797 static inline void
1798 intr_ipi_increment_count(u_long *counter, u_int cpu)
1799 {
1800 
1801 	KASSERT(cpu < mp_maxid + 1, ("%s: too big cpu %u", __func__, cpu));
1802 	counter[cpu]++;
1803 }
1804 
1805 /*
1806  *  Virtualization for interrupt source IPI counters setup.
1807  */
1808 static u_long *
1809 intr_ipi_setup_counters(const char *name)
1810 {
1811 	u_int index, i;
1812 	char str[INTRNAME_LEN];
1813 
1814 	mtx_lock(&isrc_table_lock);
1815 
1816 	/*
1817 	 * We should never have a problem finding mp_maxid + 1 contiguous
1818 	 * counters, in practice. Interrupts will be allocated sequentially
1819 	 * during boot, so the array should fill from low to high index. Once
1820 	 * reserved, the IPI counters will never be released. Similarly, we
1821 	 * will not need to allocate more IPIs once the system is running.
1822 	 */
1823 	bit_ffc_area(intrcnt_bitmap, nintrcnt, mp_maxid + 1, &index);
1824 	if (index == -1)
1825 		panic("Failed to allocate %d counters. Array exhausted?",
1826 		    mp_maxid + 1);
1827 	bit_nset(intrcnt_bitmap, index, index + mp_maxid);
1828 	for (i = 0; i < mp_maxid + 1; i++) {
1829 		snprintf(str, INTRNAME_LEN, "cpu%d:%s", i, name);
1830 		intrcnt_setname(str, index + i);
1831 	}
1832 	mtx_unlock(&isrc_table_lock);
1833 	return (&intrcnt[index]);
1834 }
1835 
1836 /*
1837  *  Lookup IPI source.
1838  */
1839 static struct intr_ipi *
1840 intr_ipi_lookup(u_int ipi)
1841 {
1842 
1843 	if (ipi >= INTR_IPI_COUNT)
1844 		panic("%s: no such IPI %u", __func__, ipi);
1845 
1846 	return (&ipi_sources[ipi]);
1847 }
1848 
1849 int
1850 intr_ipi_pic_register(device_t dev, u_int priority)
1851 {
1852 	if (intr_ipi_dev_frozen) {
1853 		device_printf(dev, "IPI device already frozen");
1854 		return (EBUSY);
1855 	}
1856 
1857 	if (intr_ipi_dev == NULL || priority > intr_ipi_dev_priority) {
1858 		intr_ipi_dev_priority = priority;
1859 		intr_ipi_dev = dev;
1860 	}
1861 
1862 	return (0);
1863 }
1864 
1865 /*
1866  *  Setup IPI handler on interrupt controller.
1867  *
1868  *  Not SMP coherent.
1869  */
1870 void
1871 intr_ipi_setup(u_int ipi, const char *name, intr_ipi_handler_t *hand,
1872     void *arg)
1873 {
1874 	struct intr_irqsrc *isrc;
1875 	struct intr_ipi *ii;
1876 	int error;
1877 
1878 	if (!intr_ipi_dev_frozen) {
1879 		if (intr_ipi_dev == NULL)
1880 			panic("%s: no IPI PIC attached", __func__);
1881 
1882 		intr_ipi_dev_frozen = true;
1883 		device_printf(intr_ipi_dev, "using for IPIs\n");
1884 	}
1885 
1886 	KASSERT(hand != NULL, ("%s: ipi %u no handler", __func__, ipi));
1887 
1888 	error = PIC_IPI_SETUP(intr_ipi_dev, ipi, &isrc);
1889 	if (error != 0)
1890 		return;
1891 
1892 	isrc->isrc_handlers++;
1893 
1894 	ii = intr_ipi_lookup(ipi);
1895 	KASSERT(ii->ii_count == NULL, ("%s: ipi %u reused", __func__, ipi));
1896 
1897 	ii->ii_handler = hand;
1898 	ii->ii_handler_arg = arg;
1899 	ii->ii_isrc = isrc;
1900 	strlcpy(ii->ii_name, name, INTR_IPI_NAMELEN);
1901 	ii->ii_count = intr_ipi_setup_counters(name);
1902 
1903 	PIC_ENABLE_INTR(intr_ipi_dev, isrc);
1904 }
1905 
1906 void
1907 intr_ipi_send(cpuset_t cpus, u_int ipi)
1908 {
1909 	struct intr_ipi *ii;
1910 
1911 	KASSERT(intr_ipi_dev_frozen,
1912 	    ("%s: IPI device not yet frozen", __func__));
1913 
1914 	ii = intr_ipi_lookup(ipi);
1915 	if (ii->ii_count == NULL)
1916 		panic("%s: not setup IPI %u", __func__, ipi);
1917 
1918 	/*
1919 	 * XXX: Surely needed on other architectures too? Either way should be
1920 	 * some kind of MI hook defined in an MD header, or the responsibility
1921 	 * of the MD caller if not widespread.
1922 	 */
1923 #ifdef __aarch64__
1924 	/*
1925 	 * Ensure that this CPU's stores will be visible to IPI
1926 	 * recipients before starting to send the interrupts.
1927 	 */
1928 	dsb(ishst);
1929 #endif
1930 
1931 	PIC_IPI_SEND(intr_ipi_dev, ii->ii_isrc, cpus, ipi);
1932 }
1933 
1934 /*
1935  *  interrupt controller dispatch function for IPIs. It should
1936  *  be called straight from the interrupt controller, when associated
1937  *  interrupt source is learned. Or from anybody who has an interrupt
1938  *  source mapped.
1939  */
1940 void
1941 intr_ipi_dispatch(u_int ipi)
1942 {
1943 	struct intr_ipi *ii;
1944 
1945 	ii = intr_ipi_lookup(ipi);
1946 	if (ii->ii_count == NULL)
1947 		panic("%s: not setup IPI %u", __func__, ipi);
1948 
1949 	intr_ipi_increment_count(ii->ii_count, PCPU_GET(cpuid));
1950 
1951 	ii->ii_handler(ii->ii_handler_arg);
1952 }
1953 #endif
1954