xref: /freebsd/sys/kern/subr_intr.c (revision 2ff63af9b88c7413b7d71715b5532625752a248e)
1 /*-
2  * Copyright (c) 2015-2016 Svatopluk Kraus
3  * Copyright (c) 2015-2016 Michal Meloun
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 /*
32  *	New-style Interrupt Framework
33  *
34  *  TODO: - add support for disconnected PICs.
35  *        - to support IPI (PPI) enabling on other CPUs if already started.
36  *        - to complete things for removable PICs.
37  */
38 
39 #include "opt_ddb.h"
40 #include "opt_hwpmc_hooks.h"
41 #include "opt_iommu.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/asan.h>
46 #include <sys/bitstring.h>
47 #include <sys/bus.h>
48 #include <sys/conf.h>
49 #include <sys/cpuset.h>
50 #include <sys/interrupt.h>
51 #include <sys/kernel.h>
52 #include <sys/lock.h>
53 #include <sys/malloc.h>
54 #include <sys/mutex.h>
55 #include <sys/proc.h>
56 #include <sys/queue.h>
57 #include <sys/rman.h>
58 #include <sys/sched.h>
59 #include <sys/smp.h>
60 #include <sys/sysctl.h>
61 #include <sys/syslog.h>
62 #include <sys/taskqueue.h>
63 #include <sys/tree.h>
64 #include <sys/vmmeter.h>
65 #ifdef HWPMC_HOOKS
66 #include <sys/pmckern.h>
67 #endif
68 
69 #include <machine/atomic.h>
70 #include <machine/cpu.h>
71 #include <machine/intr.h>
72 #include <machine/smp.h>
73 #include <machine/stdarg.h>
74 
75 #ifdef DDB
76 #include <ddb/ddb.h>
77 #endif
78 
79 #ifdef IOMMU
80 #include <dev/iommu/iommu_msi.h>
81 #endif
82 
83 #include "pic_if.h"
84 #include "msi_if.h"
85 
86 #define	INTRNAME_LEN	(2*MAXCOMLEN + 1)
87 
88 #ifdef DEBUG
89 #define debugf(fmt, args...) do { printf("%s(): ", __func__);	\
90     printf(fmt,##args); } while (0)
91 #else
92 #define debugf(fmt, args...)
93 #endif
94 
95 MALLOC_DECLARE(M_INTRNG);
96 MALLOC_DEFINE(M_INTRNG, "intr", "intr interrupt handling");
97 
98 /* Main interrupt handler called from assembler -> 'hidden' for C code. */
99 void intr_irq_handler(struct trapframe *tf);
100 
101 /* Root interrupt controller stuff. */
102 device_t intr_irq_root_dev;
103 static intr_irq_filter_t *irq_root_filter;
104 static void *irq_root_arg;
105 static u_int irq_root_ipicount;
106 
107 struct intr_pic_child {
108 	SLIST_ENTRY(intr_pic_child)	 pc_next;
109 	struct intr_pic			*pc_pic;
110 	intr_child_irq_filter_t		*pc_filter;
111 	void				*pc_filter_arg;
112 	uintptr_t			 pc_start;
113 	uintptr_t			 pc_length;
114 };
115 
116 /* Interrupt controller definition. */
117 struct intr_pic {
118 	SLIST_ENTRY(intr_pic)	pic_next;
119 	intptr_t		pic_xref;	/* hardware identification */
120 	device_t		pic_dev;
121 /* Only one of FLAG_PIC or FLAG_MSI may be set */
122 #define	FLAG_PIC	(1 << 0)
123 #define	FLAG_MSI	(1 << 1)
124 #define	FLAG_TYPE_MASK	(FLAG_PIC | FLAG_MSI)
125 	u_int			pic_flags;
126 	struct mtx		pic_child_lock;
127 	SLIST_HEAD(, intr_pic_child) pic_children;
128 };
129 
130 static struct mtx pic_list_lock;
131 static SLIST_HEAD(, intr_pic) pic_list;
132 
133 static struct intr_pic *pic_lookup(device_t dev, intptr_t xref, int flags);
134 
135 /* Interrupt source definition. */
136 static struct mtx isrc_table_lock;
137 static struct intr_irqsrc **irq_sources;
138 static u_int irq_next_free;
139 
140 #ifdef SMP
141 #ifdef EARLY_AP_STARTUP
142 static bool irq_assign_cpu = true;
143 #else
144 static bool irq_assign_cpu = false;
145 #endif
146 #endif
147 
148 u_int intr_nirq = NIRQ;
149 SYSCTL_UINT(_machdep, OID_AUTO, nirq, CTLFLAG_RDTUN, &intr_nirq, 0,
150     "Number of IRQs");
151 
152 /* Data for MI statistics reporting. */
153 u_long *intrcnt;
154 char *intrnames;
155 size_t sintrcnt;
156 size_t sintrnames;
157 int nintrcnt;
158 static bitstr_t *intrcnt_bitmap;
159 
160 static struct intr_irqsrc *intr_map_get_isrc(u_int res_id);
161 static void intr_map_set_isrc(u_int res_id, struct intr_irqsrc *isrc);
162 static struct intr_map_data * intr_map_get_map_data(u_int res_id);
163 static void intr_map_copy_map_data(u_int res_id, device_t *dev, intptr_t *xref,
164     struct intr_map_data **data);
165 
166 /*
167  *  Interrupt framework initialization routine.
168  */
169 static void
170 intr_irq_init(void *dummy __unused)
171 {
172 
173 	SLIST_INIT(&pic_list);
174 	mtx_init(&pic_list_lock, "intr pic list", NULL, MTX_DEF);
175 
176 	mtx_init(&isrc_table_lock, "intr isrc table", NULL, MTX_DEF);
177 
178 	/*
179 	 * - 2 counters for each I/O interrupt.
180 	 * - MAXCPU counters for each IPI counters for SMP.
181 	 */
182 	nintrcnt = intr_nirq * 2;
183 #ifdef SMP
184 	nintrcnt += INTR_IPI_COUNT * MAXCPU;
185 #endif
186 
187 	intrcnt = mallocarray(nintrcnt, sizeof(u_long), M_INTRNG,
188 	    M_WAITOK | M_ZERO);
189 	intrnames = mallocarray(nintrcnt, INTRNAME_LEN, M_INTRNG,
190 	    M_WAITOK | M_ZERO);
191 	sintrcnt = nintrcnt * sizeof(u_long);
192 	sintrnames = nintrcnt * INTRNAME_LEN;
193 
194 	/* Allocate the bitmap tracking counter allocations. */
195 	intrcnt_bitmap = bit_alloc(nintrcnt, M_INTRNG, M_WAITOK | M_ZERO);
196 
197 	irq_sources = mallocarray(intr_nirq, sizeof(struct intr_irqsrc*),
198 	    M_INTRNG, M_WAITOK | M_ZERO);
199 }
200 SYSINIT(intr_irq_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_irq_init, NULL);
201 
202 static void
203 intrcnt_setname(const char *name, int index)
204 {
205 
206 	snprintf(intrnames + INTRNAME_LEN * index, INTRNAME_LEN, "%-*s",
207 	    INTRNAME_LEN - 1, name);
208 }
209 
210 /*
211  *  Update name for interrupt source with interrupt event.
212  */
213 static void
214 intrcnt_updatename(struct intr_irqsrc *isrc)
215 {
216 
217 	/* QQQ: What about stray counter name? */
218 	mtx_assert(&isrc_table_lock, MA_OWNED);
219 	intrcnt_setname(isrc->isrc_event->ie_fullname, isrc->isrc_index);
220 }
221 
222 /*
223  *  Virtualization for interrupt source interrupt counter increment.
224  */
225 static inline void
226 isrc_increment_count(struct intr_irqsrc *isrc)
227 {
228 
229 	if (isrc->isrc_flags & INTR_ISRCF_PPI)
230 		atomic_add_long(&isrc->isrc_count[0], 1);
231 	else
232 		isrc->isrc_count[0]++;
233 }
234 
235 /*
236  *  Virtualization for interrupt source interrupt stray counter increment.
237  */
238 static inline void
239 isrc_increment_straycount(struct intr_irqsrc *isrc)
240 {
241 
242 	isrc->isrc_count[1]++;
243 }
244 
245 /*
246  *  Virtualization for interrupt source interrupt name update.
247  */
248 static void
249 isrc_update_name(struct intr_irqsrc *isrc, const char *name)
250 {
251 	char str[INTRNAME_LEN];
252 
253 	mtx_assert(&isrc_table_lock, MA_OWNED);
254 
255 	if (name != NULL) {
256 		snprintf(str, INTRNAME_LEN, "%s: %s", isrc->isrc_name, name);
257 		intrcnt_setname(str, isrc->isrc_index);
258 		snprintf(str, INTRNAME_LEN, "stray %s: %s", isrc->isrc_name,
259 		    name);
260 		intrcnt_setname(str, isrc->isrc_index + 1);
261 	} else {
262 		snprintf(str, INTRNAME_LEN, "%s:", isrc->isrc_name);
263 		intrcnt_setname(str, isrc->isrc_index);
264 		snprintf(str, INTRNAME_LEN, "stray %s:", isrc->isrc_name);
265 		intrcnt_setname(str, isrc->isrc_index + 1);
266 	}
267 }
268 
269 /*
270  *  Virtualization for interrupt source interrupt counters setup.
271  */
272 static void
273 isrc_setup_counters(struct intr_irqsrc *isrc)
274 {
275 	int index;
276 
277 	mtx_assert(&isrc_table_lock, MA_OWNED);
278 
279 	/*
280 	 * Allocate two counter values, the second tracking "stray" interrupts.
281 	 */
282 	bit_ffc_area(intrcnt_bitmap, nintrcnt, 2, &index);
283 	if (index == -1)
284 		panic("Failed to allocate 2 counters. Array exhausted?");
285 	bit_nset(intrcnt_bitmap, index, index + 1);
286 	isrc->isrc_index = index;
287 	isrc->isrc_count = &intrcnt[index];
288 	isrc_update_name(isrc, NULL);
289 }
290 
291 /*
292  *  Virtualization for interrupt source interrupt counters release.
293  */
294 static void
295 isrc_release_counters(struct intr_irqsrc *isrc)
296 {
297 	int idx = isrc->isrc_index;
298 
299 	mtx_assert(&isrc_table_lock, MA_OWNED);
300 
301 	bit_nclear(intrcnt_bitmap, idx, idx + 1);
302 }
303 
304 #ifdef SMP
305 /*
306  *  Virtualization for interrupt source IPI counters setup.
307  */
308 u_long *
309 intr_ipi_setup_counters(const char *name)
310 {
311 	u_int index, i;
312 	char str[INTRNAME_LEN];
313 
314 	mtx_lock(&isrc_table_lock);
315 
316 	/*
317 	 * We should never have a problem finding MAXCPU contiguous counters,
318 	 * in practice. Interrupts will be allocated sequentially during boot,
319 	 * so the array should fill from low to high index. Once reserved, the
320 	 * IPI counters will never be released. Similarly, we will not need to
321 	 * allocate more IPIs once the system is running.
322 	 */
323 	bit_ffc_area(intrcnt_bitmap, nintrcnt, MAXCPU, &index);
324 	if (index == -1)
325 		panic("Failed to allocate %d counters. Array exhausted?",
326 		    MAXCPU);
327 	bit_nset(intrcnt_bitmap, index, index + MAXCPU - 1);
328 	for (i = 0; i < MAXCPU; i++) {
329 		snprintf(str, INTRNAME_LEN, "cpu%d:%s", i, name);
330 		intrcnt_setname(str, index + i);
331 	}
332 	mtx_unlock(&isrc_table_lock);
333 	return (&intrcnt[index]);
334 }
335 #endif
336 
337 /*
338  *  Main interrupt dispatch handler. It's called straight
339  *  from the assembler, where CPU interrupt is served.
340  */
341 void
342 intr_irq_handler(struct trapframe *tf)
343 {
344 	struct trapframe * oldframe;
345 	struct thread * td;
346 
347 	KASSERT(irq_root_filter != NULL, ("%s: no filter", __func__));
348 
349 	kasan_mark(tf, sizeof(*tf), sizeof(*tf), 0);
350 
351 	VM_CNT_INC(v_intr);
352 	critical_enter();
353 	td = curthread;
354 	oldframe = td->td_intr_frame;
355 	td->td_intr_frame = tf;
356 	irq_root_filter(irq_root_arg);
357 	td->td_intr_frame = oldframe;
358 	critical_exit();
359 #ifdef HWPMC_HOOKS
360 	if (pmc_hook && TRAPF_USERMODE(tf) &&
361 	    (PCPU_GET(curthread)->td_pflags & TDP_CALLCHAIN))
362 		pmc_hook(PCPU_GET(curthread), PMC_FN_USER_CALLCHAIN, tf);
363 #endif
364 }
365 
366 int
367 intr_child_irq_handler(struct intr_pic *parent, uintptr_t irq)
368 {
369 	struct intr_pic_child *child;
370 	bool found;
371 
372 	found = false;
373 	mtx_lock_spin(&parent->pic_child_lock);
374 	SLIST_FOREACH(child, &parent->pic_children, pc_next) {
375 		if (child->pc_start <= irq &&
376 		    irq < (child->pc_start + child->pc_length)) {
377 			found = true;
378 			break;
379 		}
380 	}
381 	mtx_unlock_spin(&parent->pic_child_lock);
382 
383 	if (found)
384 		return (child->pc_filter(child->pc_filter_arg, irq));
385 
386 	return (FILTER_STRAY);
387 }
388 
389 /*
390  *  interrupt controller dispatch function for interrupts. It should
391  *  be called straight from the interrupt controller, when associated interrupt
392  *  source is learned.
393  */
394 int
395 intr_isrc_dispatch(struct intr_irqsrc *isrc, struct trapframe *tf)
396 {
397 
398 	KASSERT(isrc != NULL, ("%s: no source", __func__));
399 
400 	isrc_increment_count(isrc);
401 
402 #ifdef INTR_SOLO
403 	if (isrc->isrc_filter != NULL) {
404 		int error;
405 		error = isrc->isrc_filter(isrc->isrc_arg, tf);
406 		PIC_POST_FILTER(isrc->isrc_dev, isrc);
407 		if (error == FILTER_HANDLED)
408 			return (0);
409 	} else
410 #endif
411 	if (isrc->isrc_event != NULL) {
412 		if (intr_event_handle(isrc->isrc_event, tf) == 0)
413 			return (0);
414 	}
415 
416 	isrc_increment_straycount(isrc);
417 	return (EINVAL);
418 }
419 
420 /*
421  *  Alloc unique interrupt number (resource handle) for interrupt source.
422  *
423  *  There could be various strategies how to allocate free interrupt number
424  *  (resource handle) for new interrupt source.
425  *
426  *  1. Handles are always allocated forward, so handles are not recycled
427  *     immediately. However, if only one free handle left which is reused
428  *     constantly...
429  */
430 static inline int
431 isrc_alloc_irq(struct intr_irqsrc *isrc)
432 {
433 	u_int irq;
434 
435 	mtx_assert(&isrc_table_lock, MA_OWNED);
436 
437 	if (irq_next_free >= intr_nirq)
438 		return (ENOSPC);
439 
440 	for (irq = irq_next_free; irq < intr_nirq; irq++) {
441 		if (irq_sources[irq] == NULL)
442 			goto found;
443 	}
444 	for (irq = 0; irq < irq_next_free; irq++) {
445 		if (irq_sources[irq] == NULL)
446 			goto found;
447 	}
448 
449 	irq_next_free = intr_nirq;
450 	return (ENOSPC);
451 
452 found:
453 	isrc->isrc_irq = irq;
454 	irq_sources[irq] = isrc;
455 
456 	irq_next_free = irq + 1;
457 	if (irq_next_free >= intr_nirq)
458 		irq_next_free = 0;
459 	return (0);
460 }
461 
462 /*
463  *  Free unique interrupt number (resource handle) from interrupt source.
464  */
465 static inline int
466 isrc_free_irq(struct intr_irqsrc *isrc)
467 {
468 
469 	mtx_assert(&isrc_table_lock, MA_OWNED);
470 
471 	if (isrc->isrc_irq >= intr_nirq)
472 		return (EINVAL);
473 	if (irq_sources[isrc->isrc_irq] != isrc)
474 		return (EINVAL);
475 
476 	irq_sources[isrc->isrc_irq] = NULL;
477 	isrc->isrc_irq = INTR_IRQ_INVALID;	/* just to be safe */
478 
479 	/*
480 	 * If we are recovering from the state irq_sources table is full,
481 	 * then the following allocation should check the entire table. This
482 	 * will ensure maximum separation of allocation order from release
483 	 * order.
484 	 */
485 	if (irq_next_free >= intr_nirq)
486 		irq_next_free = 0;
487 
488 	return (0);
489 }
490 
491 /*
492  *  Initialize interrupt source and register it into global interrupt table.
493  */
494 int
495 intr_isrc_register(struct intr_irqsrc *isrc, device_t dev, u_int flags,
496     const char *fmt, ...)
497 {
498 	int error;
499 	va_list ap;
500 
501 	bzero(isrc, sizeof(struct intr_irqsrc));
502 	isrc->isrc_dev = dev;
503 	isrc->isrc_irq = INTR_IRQ_INVALID;	/* just to be safe */
504 	isrc->isrc_flags = flags;
505 
506 	va_start(ap, fmt);
507 	vsnprintf(isrc->isrc_name, INTR_ISRC_NAMELEN, fmt, ap);
508 	va_end(ap);
509 
510 	mtx_lock(&isrc_table_lock);
511 	error = isrc_alloc_irq(isrc);
512 	if (error != 0) {
513 		mtx_unlock(&isrc_table_lock);
514 		return (error);
515 	}
516 	/*
517 	 * Setup interrupt counters, but not for IPI sources. Those are setup
518 	 * later and only for used ones (up to INTR_IPI_COUNT) to not exhaust
519 	 * our counter pool.
520 	 */
521 	if ((isrc->isrc_flags & INTR_ISRCF_IPI) == 0)
522 		isrc_setup_counters(isrc);
523 	mtx_unlock(&isrc_table_lock);
524 	return (0);
525 }
526 
527 /*
528  *  Deregister interrupt source from global interrupt table.
529  */
530 int
531 intr_isrc_deregister(struct intr_irqsrc *isrc)
532 {
533 	int error;
534 
535 	mtx_lock(&isrc_table_lock);
536 	if ((isrc->isrc_flags & INTR_ISRCF_IPI) == 0)
537 		isrc_release_counters(isrc);
538 	error = isrc_free_irq(isrc);
539 	mtx_unlock(&isrc_table_lock);
540 	return (error);
541 }
542 
543 #ifdef SMP
544 /*
545  *  A support function for a PIC to decide if provided ISRC should be inited
546  *  on given cpu. The logic of INTR_ISRCF_BOUND flag and isrc_cpu member of
547  *  struct intr_irqsrc is the following:
548  *
549  *     If INTR_ISRCF_BOUND is set, the ISRC should be inited only on cpus
550  *     set in isrc_cpu. If not, the ISRC should be inited on every cpu and
551  *     isrc_cpu is kept consistent with it. Thus isrc_cpu is always correct.
552  */
553 bool
554 intr_isrc_init_on_cpu(struct intr_irqsrc *isrc, u_int cpu)
555 {
556 
557 	if (isrc->isrc_handlers == 0)
558 		return (false);
559 	if ((isrc->isrc_flags & (INTR_ISRCF_PPI | INTR_ISRCF_IPI)) == 0)
560 		return (false);
561 	if (isrc->isrc_flags & INTR_ISRCF_BOUND)
562 		return (CPU_ISSET(cpu, &isrc->isrc_cpu));
563 
564 	CPU_SET(cpu, &isrc->isrc_cpu);
565 	return (true);
566 }
567 #endif
568 
569 #ifdef INTR_SOLO
570 /*
571  *  Setup filter into interrupt source.
572  */
573 static int
574 iscr_setup_filter(struct intr_irqsrc *isrc, const char *name,
575     intr_irq_filter_t *filter, void *arg, void **cookiep)
576 {
577 
578 	if (filter == NULL)
579 		return (EINVAL);
580 
581 	mtx_lock(&isrc_table_lock);
582 	/*
583 	 * Make sure that we do not mix the two ways
584 	 * how we handle interrupt sources.
585 	 */
586 	if (isrc->isrc_filter != NULL || isrc->isrc_event != NULL) {
587 		mtx_unlock(&isrc_table_lock);
588 		return (EBUSY);
589 	}
590 	isrc->isrc_filter = filter;
591 	isrc->isrc_arg = arg;
592 	isrc_update_name(isrc, name);
593 	mtx_unlock(&isrc_table_lock);
594 
595 	*cookiep = isrc;
596 	return (0);
597 }
598 #endif
599 
600 /*
601  *  Interrupt source pre_ithread method for MI interrupt framework.
602  */
603 static void
604 intr_isrc_pre_ithread(void *arg)
605 {
606 	struct intr_irqsrc *isrc = arg;
607 
608 	PIC_PRE_ITHREAD(isrc->isrc_dev, isrc);
609 }
610 
611 /*
612  *  Interrupt source post_ithread method for MI interrupt framework.
613  */
614 static void
615 intr_isrc_post_ithread(void *arg)
616 {
617 	struct intr_irqsrc *isrc = arg;
618 
619 	PIC_POST_ITHREAD(isrc->isrc_dev, isrc);
620 }
621 
622 /*
623  *  Interrupt source post_filter method for MI interrupt framework.
624  */
625 static void
626 intr_isrc_post_filter(void *arg)
627 {
628 	struct intr_irqsrc *isrc = arg;
629 
630 	PIC_POST_FILTER(isrc->isrc_dev, isrc);
631 }
632 
633 /*
634  *  Interrupt source assign_cpu method for MI interrupt framework.
635  */
636 static int
637 intr_isrc_assign_cpu(void *arg, int cpu)
638 {
639 #ifdef SMP
640 	struct intr_irqsrc *isrc = arg;
641 	int error;
642 
643 	mtx_lock(&isrc_table_lock);
644 	if (cpu == NOCPU) {
645 		CPU_ZERO(&isrc->isrc_cpu);
646 		isrc->isrc_flags &= ~INTR_ISRCF_BOUND;
647 	} else {
648 		CPU_SETOF(cpu, &isrc->isrc_cpu);
649 		isrc->isrc_flags |= INTR_ISRCF_BOUND;
650 	}
651 
652 	/*
653 	 * In NOCPU case, it's up to PIC to either leave ISRC on same CPU or
654 	 * re-balance it to another CPU or enable it on more CPUs. However,
655 	 * PIC is expected to change isrc_cpu appropriately to keep us well
656 	 * informed if the call is successful.
657 	 */
658 	if (irq_assign_cpu) {
659 		error = PIC_BIND_INTR(isrc->isrc_dev, isrc);
660 		if (error) {
661 			CPU_ZERO(&isrc->isrc_cpu);
662 			mtx_unlock(&isrc_table_lock);
663 			return (error);
664 		}
665 	}
666 	mtx_unlock(&isrc_table_lock);
667 	return (0);
668 #else
669 	return (EOPNOTSUPP);
670 #endif
671 }
672 
673 /*
674  *  Create interrupt event for interrupt source.
675  */
676 static int
677 isrc_event_create(struct intr_irqsrc *isrc)
678 {
679 	struct intr_event *ie;
680 	int error;
681 
682 	error = intr_event_create(&ie, isrc, 0, isrc->isrc_irq,
683 	    intr_isrc_pre_ithread, intr_isrc_post_ithread, intr_isrc_post_filter,
684 	    intr_isrc_assign_cpu, "%s:", isrc->isrc_name);
685 	if (error)
686 		return (error);
687 
688 	mtx_lock(&isrc_table_lock);
689 	/*
690 	 * Make sure that we do not mix the two ways
691 	 * how we handle interrupt sources. Let contested event wins.
692 	 */
693 #ifdef INTR_SOLO
694 	if (isrc->isrc_filter != NULL || isrc->isrc_event != NULL) {
695 #else
696 	if (isrc->isrc_event != NULL) {
697 #endif
698 		mtx_unlock(&isrc_table_lock);
699 		intr_event_destroy(ie);
700 		return (isrc->isrc_event != NULL ? EBUSY : 0);
701 	}
702 	isrc->isrc_event = ie;
703 	mtx_unlock(&isrc_table_lock);
704 
705 	return (0);
706 }
707 #ifdef notyet
708 /*
709  *  Destroy interrupt event for interrupt source.
710  */
711 static void
712 isrc_event_destroy(struct intr_irqsrc *isrc)
713 {
714 	struct intr_event *ie;
715 
716 	mtx_lock(&isrc_table_lock);
717 	ie = isrc->isrc_event;
718 	isrc->isrc_event = NULL;
719 	mtx_unlock(&isrc_table_lock);
720 
721 	if (ie != NULL)
722 		intr_event_destroy(ie);
723 }
724 #endif
725 /*
726  *  Add handler to interrupt source.
727  */
728 static int
729 isrc_add_handler(struct intr_irqsrc *isrc, const char *name,
730     driver_filter_t filter, driver_intr_t handler, void *arg,
731     enum intr_type flags, void **cookiep)
732 {
733 	int error;
734 
735 	if (isrc->isrc_event == NULL) {
736 		error = isrc_event_create(isrc);
737 		if (error)
738 			return (error);
739 	}
740 
741 	error = intr_event_add_handler(isrc->isrc_event, name, filter, handler,
742 	    arg, intr_priority(flags), flags, cookiep);
743 	if (error == 0) {
744 		mtx_lock(&isrc_table_lock);
745 		intrcnt_updatename(isrc);
746 		mtx_unlock(&isrc_table_lock);
747 	}
748 
749 	return (error);
750 }
751 
752 /*
753  *  Lookup interrupt controller locked.
754  */
755 static inline struct intr_pic *
756 pic_lookup_locked(device_t dev, intptr_t xref, int flags)
757 {
758 	struct intr_pic *pic;
759 
760 	mtx_assert(&pic_list_lock, MA_OWNED);
761 
762 	if (dev == NULL && xref == 0)
763 		return (NULL);
764 
765 	/* Note that pic->pic_dev is never NULL on registered PIC. */
766 	SLIST_FOREACH(pic, &pic_list, pic_next) {
767 		if ((pic->pic_flags & FLAG_TYPE_MASK) !=
768 		    (flags & FLAG_TYPE_MASK))
769 			continue;
770 
771 		if (dev == NULL) {
772 			if (xref == pic->pic_xref)
773 				return (pic);
774 		} else if (xref == 0 || pic->pic_xref == 0) {
775 			if (dev == pic->pic_dev)
776 				return (pic);
777 		} else if (xref == pic->pic_xref && dev == pic->pic_dev)
778 				return (pic);
779 	}
780 	return (NULL);
781 }
782 
783 /*
784  *  Lookup interrupt controller.
785  */
786 static struct intr_pic *
787 pic_lookup(device_t dev, intptr_t xref, int flags)
788 {
789 	struct intr_pic *pic;
790 
791 	mtx_lock(&pic_list_lock);
792 	pic = pic_lookup_locked(dev, xref, flags);
793 	mtx_unlock(&pic_list_lock);
794 	return (pic);
795 }
796 
797 /*
798  *  Create interrupt controller.
799  */
800 static struct intr_pic *
801 pic_create(device_t dev, intptr_t xref, int flags)
802 {
803 	struct intr_pic *pic;
804 
805 	mtx_lock(&pic_list_lock);
806 	pic = pic_lookup_locked(dev, xref, flags);
807 	if (pic != NULL) {
808 		mtx_unlock(&pic_list_lock);
809 		return (pic);
810 	}
811 	pic = malloc(sizeof(*pic), M_INTRNG, M_NOWAIT | M_ZERO);
812 	if (pic == NULL) {
813 		mtx_unlock(&pic_list_lock);
814 		return (NULL);
815 	}
816 	pic->pic_xref = xref;
817 	pic->pic_dev = dev;
818 	pic->pic_flags = flags;
819 	mtx_init(&pic->pic_child_lock, "pic child lock", NULL, MTX_SPIN);
820 	SLIST_INSERT_HEAD(&pic_list, pic, pic_next);
821 	mtx_unlock(&pic_list_lock);
822 
823 	return (pic);
824 }
825 #ifdef notyet
826 /*
827  *  Destroy interrupt controller.
828  */
829 static void
830 pic_destroy(device_t dev, intptr_t xref, int flags)
831 {
832 	struct intr_pic *pic;
833 
834 	mtx_lock(&pic_list_lock);
835 	pic = pic_lookup_locked(dev, xref, flags);
836 	if (pic == NULL) {
837 		mtx_unlock(&pic_list_lock);
838 		return;
839 	}
840 	SLIST_REMOVE(&pic_list, pic, intr_pic, pic_next);
841 	mtx_unlock(&pic_list_lock);
842 
843 	free(pic, M_INTRNG);
844 }
845 #endif
846 /*
847  *  Register interrupt controller.
848  */
849 struct intr_pic *
850 intr_pic_register(device_t dev, intptr_t xref)
851 {
852 	struct intr_pic *pic;
853 
854 	if (dev == NULL)
855 		return (NULL);
856 	pic = pic_create(dev, xref, FLAG_PIC);
857 	if (pic == NULL)
858 		return (NULL);
859 
860 	debugf("PIC %p registered for %s <dev %p, xref %jx>\n", pic,
861 	    device_get_nameunit(dev), dev, (uintmax_t)xref);
862 	return (pic);
863 }
864 
865 /*
866  *  Unregister interrupt controller.
867  */
868 int
869 intr_pic_deregister(device_t dev, intptr_t xref)
870 {
871 
872 	panic("%s: not implemented", __func__);
873 }
874 
875 /*
876  *  Mark interrupt controller (itself) as a root one.
877  *
878  *  Note that only an interrupt controller can really know its position
879  *  in interrupt controller's tree. So root PIC must claim itself as a root.
880  *
881  *  In FDT case, according to ePAPR approved version 1.1 from 08 April 2011,
882  *  page 30:
883  *    "The root of the interrupt tree is determined when traversal
884  *     of the interrupt tree reaches an interrupt controller node without
885  *     an interrupts property and thus no explicit interrupt parent."
886  */
887 int
888 intr_pic_claim_root(device_t dev, intptr_t xref, intr_irq_filter_t *filter,
889     void *arg, u_int ipicount)
890 {
891 	struct intr_pic *pic;
892 
893 	pic = pic_lookup(dev, xref, FLAG_PIC);
894 	if (pic == NULL) {
895 		device_printf(dev, "not registered\n");
896 		return (EINVAL);
897 	}
898 
899 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_PIC,
900 	    ("%s: Found a non-PIC controller: %s", __func__,
901 	     device_get_name(pic->pic_dev)));
902 
903 	if (filter == NULL) {
904 		device_printf(dev, "filter missing\n");
905 		return (EINVAL);
906 	}
907 
908 	/*
909 	 * Only one interrupt controllers could be on the root for now.
910 	 * Note that we further suppose that there is not threaded interrupt
911 	 * routine (handler) on the root. See intr_irq_handler().
912 	 */
913 	if (intr_irq_root_dev != NULL) {
914 		device_printf(dev, "another root already set\n");
915 		return (EBUSY);
916 	}
917 
918 	intr_irq_root_dev = dev;
919 	irq_root_filter = filter;
920 	irq_root_arg = arg;
921 	irq_root_ipicount = ipicount;
922 
923 	debugf("irq root set to %s\n", device_get_nameunit(dev));
924 	return (0);
925 }
926 
927 /*
928  * Add a handler to manage a sub range of a parents interrupts.
929  */
930 int
931 intr_pic_add_handler(device_t parent, struct intr_pic *pic,
932     intr_child_irq_filter_t *filter, void *arg, uintptr_t start,
933     uintptr_t length)
934 {
935 	struct intr_pic *parent_pic;
936 	struct intr_pic_child *newchild;
937 #ifdef INVARIANTS
938 	struct intr_pic_child *child;
939 #endif
940 
941 	/* Find the parent PIC */
942 	parent_pic = pic_lookup(parent, 0, FLAG_PIC);
943 	if (parent_pic == NULL)
944 		return (ENXIO);
945 
946 	newchild = malloc(sizeof(*newchild), M_INTRNG, M_WAITOK | M_ZERO);
947 	newchild->pc_pic = pic;
948 	newchild->pc_filter = filter;
949 	newchild->pc_filter_arg = arg;
950 	newchild->pc_start = start;
951 	newchild->pc_length = length;
952 
953 	mtx_lock_spin(&parent_pic->pic_child_lock);
954 #ifdef INVARIANTS
955 	SLIST_FOREACH(child, &parent_pic->pic_children, pc_next) {
956 		KASSERT(child->pc_pic != pic, ("%s: Adding a child PIC twice",
957 		    __func__));
958 	}
959 #endif
960 	SLIST_INSERT_HEAD(&parent_pic->pic_children, newchild, pc_next);
961 	mtx_unlock_spin(&parent_pic->pic_child_lock);
962 
963 	return (0);
964 }
965 
966 static int
967 intr_resolve_irq(device_t dev, intptr_t xref, struct intr_map_data *data,
968     struct intr_irqsrc **isrc)
969 {
970 	struct intr_pic *pic;
971 	struct intr_map_data_msi *msi;
972 
973 	if (data == NULL)
974 		return (EINVAL);
975 
976 	pic = pic_lookup(dev, xref,
977 	    (data->type == INTR_MAP_DATA_MSI) ? FLAG_MSI : FLAG_PIC);
978 	if (pic == NULL)
979 		return (ESRCH);
980 
981 	switch (data->type) {
982 	case INTR_MAP_DATA_MSI:
983 		KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
984 		    ("%s: Found a non-MSI controller: %s", __func__,
985 		     device_get_name(pic->pic_dev)));
986 		msi = (struct intr_map_data_msi *)data;
987 		*isrc = msi->isrc;
988 		return (0);
989 
990 	default:
991 		KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_PIC,
992 		    ("%s: Found a non-PIC controller: %s", __func__,
993 		     device_get_name(pic->pic_dev)));
994 		return (PIC_MAP_INTR(pic->pic_dev, data, isrc));
995 	}
996 }
997 
998 bool
999 intr_is_per_cpu(struct resource *res)
1000 {
1001 	u_int res_id;
1002 	struct intr_irqsrc *isrc;
1003 
1004 	res_id = (u_int)rman_get_start(res);
1005 	isrc = intr_map_get_isrc(res_id);
1006 
1007 	if (isrc == NULL)
1008 		panic("Attempt to get isrc for non-active resource id: %u\n",
1009 		    res_id);
1010 	return ((isrc->isrc_flags & INTR_ISRCF_PPI) != 0);
1011 }
1012 
1013 int
1014 intr_activate_irq(device_t dev, struct resource *res)
1015 {
1016 	device_t map_dev;
1017 	intptr_t map_xref;
1018 	struct intr_map_data *data;
1019 	struct intr_irqsrc *isrc;
1020 	u_int res_id;
1021 	int error;
1022 
1023 	KASSERT(rman_get_start(res) == rman_get_end(res),
1024 	    ("%s: more interrupts in resource", __func__));
1025 
1026 	res_id = (u_int)rman_get_start(res);
1027 	if (intr_map_get_isrc(res_id) != NULL)
1028 		panic("Attempt to double activation of resource id: %u\n",
1029 		    res_id);
1030 	intr_map_copy_map_data(res_id, &map_dev, &map_xref, &data);
1031 	error = intr_resolve_irq(map_dev, map_xref, data, &isrc);
1032 	if (error != 0) {
1033 		free(data, M_INTRNG);
1034 		/* XXX TODO DISCONECTED PICs */
1035 		/* if (error == EINVAL) return(0); */
1036 		return (error);
1037 	}
1038 	intr_map_set_isrc(res_id, isrc);
1039 	rman_set_virtual(res, data);
1040 	return (PIC_ACTIVATE_INTR(isrc->isrc_dev, isrc, res, data));
1041 }
1042 
1043 int
1044 intr_deactivate_irq(device_t dev, struct resource *res)
1045 {
1046 	struct intr_map_data *data;
1047 	struct intr_irqsrc *isrc;
1048 	u_int res_id;
1049 	int error;
1050 
1051 	KASSERT(rman_get_start(res) == rman_get_end(res),
1052 	    ("%s: more interrupts in resource", __func__));
1053 
1054 	res_id = (u_int)rman_get_start(res);
1055 	isrc = intr_map_get_isrc(res_id);
1056 	if (isrc == NULL)
1057 		panic("Attempt to deactivate non-active resource id: %u\n",
1058 		    res_id);
1059 
1060 	data = rman_get_virtual(res);
1061 	error = PIC_DEACTIVATE_INTR(isrc->isrc_dev, isrc, res, data);
1062 	intr_map_set_isrc(res_id, NULL);
1063 	rman_set_virtual(res, NULL);
1064 	free(data, M_INTRNG);
1065 	return (error);
1066 }
1067 
1068 int
1069 intr_setup_irq(device_t dev, struct resource *res, driver_filter_t filt,
1070     driver_intr_t hand, void *arg, int flags, void **cookiep)
1071 {
1072 	int error;
1073 	struct intr_map_data *data;
1074 	struct intr_irqsrc *isrc;
1075 	const char *name;
1076 	u_int res_id;
1077 
1078 	KASSERT(rman_get_start(res) == rman_get_end(res),
1079 	    ("%s: more interrupts in resource", __func__));
1080 
1081 	res_id = (u_int)rman_get_start(res);
1082 	isrc = intr_map_get_isrc(res_id);
1083 	if (isrc == NULL) {
1084 		/* XXX TODO DISCONECTED PICs */
1085 		return (EINVAL);
1086 	}
1087 
1088 	data = rman_get_virtual(res);
1089 	name = device_get_nameunit(dev);
1090 
1091 #ifdef INTR_SOLO
1092 	/*
1093 	 * Standard handling is done through MI interrupt framework. However,
1094 	 * some interrupts could request solely own special handling. This
1095 	 * non standard handling can be used for interrupt controllers without
1096 	 * handler (filter only), so in case that interrupt controllers are
1097 	 * chained, MI interrupt framework is called only in leaf controller.
1098 	 *
1099 	 * Note that root interrupt controller routine is served as well,
1100 	 * however in intr_irq_handler(), i.e. main system dispatch routine.
1101 	 */
1102 	if (flags & INTR_SOLO && hand != NULL) {
1103 		debugf("irq %u cannot solo on %s\n", irq, name);
1104 		return (EINVAL);
1105 	}
1106 
1107 	if (flags & INTR_SOLO) {
1108 		error = iscr_setup_filter(isrc, name, (intr_irq_filter_t *)filt,
1109 		    arg, cookiep);
1110 		debugf("irq %u setup filter error %d on %s\n", isrc->isrc_irq, error,
1111 		    name);
1112 	} else
1113 #endif
1114 		{
1115 		error = isrc_add_handler(isrc, name, filt, hand, arg, flags,
1116 		    cookiep);
1117 		debugf("irq %u add handler error %d on %s\n", isrc->isrc_irq, error, name);
1118 	}
1119 	if (error != 0)
1120 		return (error);
1121 
1122 	mtx_lock(&isrc_table_lock);
1123 	error = PIC_SETUP_INTR(isrc->isrc_dev, isrc, res, data);
1124 	if (error == 0) {
1125 		isrc->isrc_handlers++;
1126 		if (isrc->isrc_handlers == 1)
1127 			PIC_ENABLE_INTR(isrc->isrc_dev, isrc);
1128 	}
1129 	mtx_unlock(&isrc_table_lock);
1130 	if (error != 0)
1131 		intr_event_remove_handler(*cookiep);
1132 	return (error);
1133 }
1134 
1135 int
1136 intr_teardown_irq(device_t dev, struct resource *res, void *cookie)
1137 {
1138 	int error;
1139 	struct intr_map_data *data;
1140 	struct intr_irqsrc *isrc;
1141 	u_int res_id;
1142 
1143 	KASSERT(rman_get_start(res) == rman_get_end(res),
1144 	    ("%s: more interrupts in resource", __func__));
1145 
1146 	res_id = (u_int)rman_get_start(res);
1147 	isrc = intr_map_get_isrc(res_id);
1148 	if (isrc == NULL || isrc->isrc_handlers == 0)
1149 		return (EINVAL);
1150 
1151 	data = rman_get_virtual(res);
1152 
1153 #ifdef INTR_SOLO
1154 	if (isrc->isrc_filter != NULL) {
1155 		if (isrc != cookie)
1156 			return (EINVAL);
1157 
1158 		mtx_lock(&isrc_table_lock);
1159 		isrc->isrc_filter = NULL;
1160 		isrc->isrc_arg = NULL;
1161 		isrc->isrc_handlers = 0;
1162 		PIC_DISABLE_INTR(isrc->isrc_dev, isrc);
1163 		PIC_TEARDOWN_INTR(isrc->isrc_dev, isrc, res, data);
1164 		isrc_update_name(isrc, NULL);
1165 		mtx_unlock(&isrc_table_lock);
1166 		return (0);
1167 	}
1168 #endif
1169 	if (isrc != intr_handler_source(cookie))
1170 		return (EINVAL);
1171 
1172 	error = intr_event_remove_handler(cookie);
1173 	if (error == 0) {
1174 		mtx_lock(&isrc_table_lock);
1175 		isrc->isrc_handlers--;
1176 		if (isrc->isrc_handlers == 0)
1177 			PIC_DISABLE_INTR(isrc->isrc_dev, isrc);
1178 		PIC_TEARDOWN_INTR(isrc->isrc_dev, isrc, res, data);
1179 		intrcnt_updatename(isrc);
1180 		mtx_unlock(&isrc_table_lock);
1181 	}
1182 	return (error);
1183 }
1184 
1185 int
1186 intr_describe_irq(device_t dev, struct resource *res, void *cookie,
1187     const char *descr)
1188 {
1189 	int error;
1190 	struct intr_irqsrc *isrc;
1191 	u_int res_id;
1192 
1193 	KASSERT(rman_get_start(res) == rman_get_end(res),
1194 	    ("%s: more interrupts in resource", __func__));
1195 
1196 	res_id = (u_int)rman_get_start(res);
1197 	isrc = intr_map_get_isrc(res_id);
1198 	if (isrc == NULL || isrc->isrc_handlers == 0)
1199 		return (EINVAL);
1200 #ifdef INTR_SOLO
1201 	if (isrc->isrc_filter != NULL) {
1202 		if (isrc != cookie)
1203 			return (EINVAL);
1204 
1205 		mtx_lock(&isrc_table_lock);
1206 		isrc_update_name(isrc, descr);
1207 		mtx_unlock(&isrc_table_lock);
1208 		return (0);
1209 	}
1210 #endif
1211 	error = intr_event_describe_handler(isrc->isrc_event, cookie, descr);
1212 	if (error == 0) {
1213 		mtx_lock(&isrc_table_lock);
1214 		intrcnt_updatename(isrc);
1215 		mtx_unlock(&isrc_table_lock);
1216 	}
1217 	return (error);
1218 }
1219 
1220 #ifdef SMP
1221 int
1222 intr_bind_irq(device_t dev, struct resource *res, int cpu)
1223 {
1224 	struct intr_irqsrc *isrc;
1225 	u_int res_id;
1226 
1227 	KASSERT(rman_get_start(res) == rman_get_end(res),
1228 	    ("%s: more interrupts in resource", __func__));
1229 
1230 	res_id = (u_int)rman_get_start(res);
1231 	isrc = intr_map_get_isrc(res_id);
1232 	if (isrc == NULL || isrc->isrc_handlers == 0)
1233 		return (EINVAL);
1234 #ifdef INTR_SOLO
1235 	if (isrc->isrc_filter != NULL)
1236 		return (intr_isrc_assign_cpu(isrc, cpu));
1237 #endif
1238 	return (intr_event_bind(isrc->isrc_event, cpu));
1239 }
1240 
1241 /*
1242  * Return the CPU that the next interrupt source should use.
1243  * For now just returns the next CPU according to round-robin.
1244  */
1245 u_int
1246 intr_irq_next_cpu(u_int last_cpu, cpuset_t *cpumask)
1247 {
1248 	u_int cpu;
1249 
1250 	KASSERT(!CPU_EMPTY(cpumask), ("%s: Empty CPU mask", __func__));
1251 	if (!irq_assign_cpu || mp_ncpus == 1) {
1252 		cpu = PCPU_GET(cpuid);
1253 
1254 		if (CPU_ISSET(cpu, cpumask))
1255 			return (curcpu);
1256 
1257 		return (CPU_FFS(cpumask) - 1);
1258 	}
1259 
1260 	do {
1261 		last_cpu++;
1262 		if (last_cpu > mp_maxid)
1263 			last_cpu = 0;
1264 	} while (!CPU_ISSET(last_cpu, cpumask));
1265 	return (last_cpu);
1266 }
1267 
1268 #ifndef EARLY_AP_STARTUP
1269 /*
1270  *  Distribute all the interrupt sources among the available
1271  *  CPUs once the AP's have been launched.
1272  */
1273 static void
1274 intr_irq_shuffle(void *arg __unused)
1275 {
1276 	struct intr_irqsrc *isrc;
1277 	u_int i;
1278 
1279 	if (mp_ncpus == 1)
1280 		return;
1281 
1282 	mtx_lock(&isrc_table_lock);
1283 	irq_assign_cpu = true;
1284 	for (i = 0; i < intr_nirq; i++) {
1285 		isrc = irq_sources[i];
1286 		if (isrc == NULL || isrc->isrc_handlers == 0 ||
1287 		    isrc->isrc_flags & (INTR_ISRCF_PPI | INTR_ISRCF_IPI))
1288 			continue;
1289 
1290 		if (isrc->isrc_event != NULL &&
1291 		    isrc->isrc_flags & INTR_ISRCF_BOUND &&
1292 		    isrc->isrc_event->ie_cpu != CPU_FFS(&isrc->isrc_cpu) - 1)
1293 			panic("%s: CPU inconsistency", __func__);
1294 
1295 		if ((isrc->isrc_flags & INTR_ISRCF_BOUND) == 0)
1296 			CPU_ZERO(&isrc->isrc_cpu); /* start again */
1297 
1298 		/*
1299 		 * We are in wicked position here if the following call fails
1300 		 * for bound ISRC. The best thing we can do is to clear
1301 		 * isrc_cpu so inconsistency with ie_cpu will be detectable.
1302 		 */
1303 		if (PIC_BIND_INTR(isrc->isrc_dev, isrc) != 0)
1304 			CPU_ZERO(&isrc->isrc_cpu);
1305 	}
1306 	mtx_unlock(&isrc_table_lock);
1307 }
1308 SYSINIT(intr_irq_shuffle, SI_SUB_SMP, SI_ORDER_SECOND, intr_irq_shuffle, NULL);
1309 #endif /* !EARLY_AP_STARTUP */
1310 
1311 #else
1312 u_int
1313 intr_irq_next_cpu(u_int current_cpu, cpuset_t *cpumask)
1314 {
1315 
1316 	return (PCPU_GET(cpuid));
1317 }
1318 #endif /* SMP */
1319 
1320 /*
1321  * Allocate memory for new intr_map_data structure.
1322  * Initialize common fields.
1323  */
1324 struct intr_map_data *
1325 intr_alloc_map_data(enum intr_map_data_type type, size_t len, int flags)
1326 {
1327 	struct intr_map_data *data;
1328 
1329 	data = malloc(len, M_INTRNG, flags);
1330 	data->type = type;
1331 	data->len = len;
1332 	return (data);
1333 }
1334 
1335 void intr_free_intr_map_data(struct intr_map_data *data)
1336 {
1337 
1338 	free(data, M_INTRNG);
1339 }
1340 
1341 /*
1342  *  Register a MSI/MSI-X interrupt controller
1343  */
1344 int
1345 intr_msi_register(device_t dev, intptr_t xref)
1346 {
1347 	struct intr_pic *pic;
1348 
1349 	if (dev == NULL)
1350 		return (EINVAL);
1351 	pic = pic_create(dev, xref, FLAG_MSI);
1352 	if (pic == NULL)
1353 		return (ENOMEM);
1354 
1355 	debugf("PIC %p registered for %s <dev %p, xref %jx>\n", pic,
1356 	    device_get_nameunit(dev), dev, (uintmax_t)xref);
1357 	return (0);
1358 }
1359 
1360 int
1361 intr_alloc_msi(device_t pci, device_t child, intptr_t xref, int count,
1362     int maxcount, int *irqs)
1363 {
1364 	struct iommu_domain *domain;
1365 	struct intr_irqsrc **isrc;
1366 	struct intr_pic *pic;
1367 	device_t pdev;
1368 	struct intr_map_data_msi *msi;
1369 	int err, i;
1370 
1371 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1372 	if (pic == NULL)
1373 		return (ESRCH);
1374 
1375 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1376 	    ("%s: Found a non-MSI controller: %s", __func__,
1377 	     device_get_name(pic->pic_dev)));
1378 
1379 	/*
1380 	 * If this is the first time we have used this context ask the
1381 	 * interrupt controller to map memory the msi source will need.
1382 	 */
1383 	err = MSI_IOMMU_INIT(pic->pic_dev, child, &domain);
1384 	if (err != 0)
1385 		return (err);
1386 
1387 	isrc = malloc(sizeof(*isrc) * count, M_INTRNG, M_WAITOK);
1388 	err = MSI_ALLOC_MSI(pic->pic_dev, child, count, maxcount, &pdev, isrc);
1389 	if (err != 0) {
1390 		free(isrc, M_INTRNG);
1391 		return (err);
1392 	}
1393 
1394 	for (i = 0; i < count; i++) {
1395 		isrc[i]->isrc_iommu = domain;
1396 		msi = (struct intr_map_data_msi *)intr_alloc_map_data(
1397 		    INTR_MAP_DATA_MSI, sizeof(*msi), M_WAITOK | M_ZERO);
1398 		msi-> isrc = isrc[i];
1399 
1400 		irqs[i] = intr_map_irq(pic->pic_dev, xref,
1401 		    (struct intr_map_data *)msi);
1402 	}
1403 	free(isrc, M_INTRNG);
1404 
1405 	return (err);
1406 }
1407 
1408 int
1409 intr_release_msi(device_t pci, device_t child, intptr_t xref, int count,
1410     int *irqs)
1411 {
1412 	struct intr_irqsrc **isrc;
1413 	struct intr_pic *pic;
1414 	struct intr_map_data_msi *msi;
1415 	int i, err;
1416 
1417 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1418 	if (pic == NULL)
1419 		return (ESRCH);
1420 
1421 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1422 	    ("%s: Found a non-MSI controller: %s", __func__,
1423 	     device_get_name(pic->pic_dev)));
1424 
1425 	isrc = malloc(sizeof(*isrc) * count, M_INTRNG, M_WAITOK);
1426 
1427 	for (i = 0; i < count; i++) {
1428 		msi = (struct intr_map_data_msi *)
1429 		    intr_map_get_map_data(irqs[i]);
1430 		KASSERT(msi->hdr.type == INTR_MAP_DATA_MSI,
1431 		    ("%s: irq %d map data is not MSI", __func__,
1432 		    irqs[i]));
1433 		isrc[i] = msi->isrc;
1434 	}
1435 
1436 	MSI_IOMMU_DEINIT(pic->pic_dev, child);
1437 
1438 	err = MSI_RELEASE_MSI(pic->pic_dev, child, count, isrc);
1439 
1440 	for (i = 0; i < count; i++) {
1441 		if (isrc[i] != NULL)
1442 			intr_unmap_irq(irqs[i]);
1443 	}
1444 
1445 	free(isrc, M_INTRNG);
1446 	return (err);
1447 }
1448 
1449 int
1450 intr_alloc_msix(device_t pci, device_t child, intptr_t xref, int *irq)
1451 {
1452 	struct iommu_domain *domain;
1453 	struct intr_irqsrc *isrc;
1454 	struct intr_pic *pic;
1455 	device_t pdev;
1456 	struct intr_map_data_msi *msi;
1457 	int err;
1458 
1459 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1460 	if (pic == NULL)
1461 		return (ESRCH);
1462 
1463 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1464 	    ("%s: Found a non-MSI controller: %s", __func__,
1465 	     device_get_name(pic->pic_dev)));
1466 
1467 	/*
1468 	 * If this is the first time we have used this context ask the
1469 	 * interrupt controller to map memory the msi source will need.
1470 	 */
1471 	err = MSI_IOMMU_INIT(pic->pic_dev, child, &domain);
1472 	if (err != 0)
1473 		return (err);
1474 
1475 	err = MSI_ALLOC_MSIX(pic->pic_dev, child, &pdev, &isrc);
1476 	if (err != 0)
1477 		return (err);
1478 
1479 	isrc->isrc_iommu = domain;
1480 	msi = (struct intr_map_data_msi *)intr_alloc_map_data(
1481 		    INTR_MAP_DATA_MSI, sizeof(*msi), M_WAITOK | M_ZERO);
1482 	msi->isrc = isrc;
1483 	*irq = intr_map_irq(pic->pic_dev, xref, (struct intr_map_data *)msi);
1484 	return (0);
1485 }
1486 
1487 int
1488 intr_release_msix(device_t pci, device_t child, intptr_t xref, int irq)
1489 {
1490 	struct intr_irqsrc *isrc;
1491 	struct intr_pic *pic;
1492 	struct intr_map_data_msi *msi;
1493 	int err;
1494 
1495 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1496 	if (pic == NULL)
1497 		return (ESRCH);
1498 
1499 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1500 	    ("%s: Found a non-MSI controller: %s", __func__,
1501 	     device_get_name(pic->pic_dev)));
1502 
1503 	msi = (struct intr_map_data_msi *)
1504 	    intr_map_get_map_data(irq);
1505 	KASSERT(msi->hdr.type == INTR_MAP_DATA_MSI,
1506 	    ("%s: irq %d map data is not MSI", __func__,
1507 	    irq));
1508 	isrc = msi->isrc;
1509 	if (isrc == NULL) {
1510 		intr_unmap_irq(irq);
1511 		return (EINVAL);
1512 	}
1513 
1514 	MSI_IOMMU_DEINIT(pic->pic_dev, child);
1515 
1516 	err = MSI_RELEASE_MSIX(pic->pic_dev, child, isrc);
1517 	intr_unmap_irq(irq);
1518 
1519 	return (err);
1520 }
1521 
1522 int
1523 intr_map_msi(device_t pci, device_t child, intptr_t xref, int irq,
1524     uint64_t *addr, uint32_t *data)
1525 {
1526 	struct intr_irqsrc *isrc;
1527 	struct intr_pic *pic;
1528 	int err;
1529 
1530 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1531 	if (pic == NULL)
1532 		return (ESRCH);
1533 
1534 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1535 	    ("%s: Found a non-MSI controller: %s", __func__,
1536 	     device_get_name(pic->pic_dev)));
1537 
1538 	isrc = intr_map_get_isrc(irq);
1539 	if (isrc == NULL)
1540 		return (EINVAL);
1541 
1542 	err = MSI_MAP_MSI(pic->pic_dev, child, isrc, addr, data);
1543 
1544 #ifdef IOMMU
1545 	if (isrc->isrc_iommu != NULL)
1546 		iommu_translate_msi(isrc->isrc_iommu, addr);
1547 #endif
1548 
1549 	return (err);
1550 }
1551 
1552 void dosoftints(void);
1553 void
1554 dosoftints(void)
1555 {
1556 }
1557 
1558 #ifdef SMP
1559 /*
1560  *  Init interrupt controller on another CPU.
1561  */
1562 void
1563 intr_pic_init_secondary(void)
1564 {
1565 
1566 	/*
1567 	 * QQQ: Only root PIC is aware of other CPUs ???
1568 	 */
1569 	KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__));
1570 
1571 	//mtx_lock(&isrc_table_lock);
1572 	PIC_INIT_SECONDARY(intr_irq_root_dev);
1573 	//mtx_unlock(&isrc_table_lock);
1574 }
1575 #endif
1576 
1577 #ifdef DDB
1578 DB_SHOW_COMMAND_FLAGS(irqs, db_show_irqs, DB_CMD_MEMSAFE)
1579 {
1580 	u_int i, irqsum;
1581 	u_long num;
1582 	struct intr_irqsrc *isrc;
1583 
1584 	for (irqsum = 0, i = 0; i < intr_nirq; i++) {
1585 		isrc = irq_sources[i];
1586 		if (isrc == NULL)
1587 			continue;
1588 
1589 		num = isrc->isrc_count != NULL ? isrc->isrc_count[0] : 0;
1590 		db_printf("irq%-3u <%s>: cpu %02lx%s cnt %lu\n", i,
1591 		    isrc->isrc_name, isrc->isrc_cpu.__bits[0],
1592 		    isrc->isrc_flags & INTR_ISRCF_BOUND ? " (bound)" : "", num);
1593 		irqsum += num;
1594 	}
1595 	db_printf("irq total %u\n", irqsum);
1596 }
1597 #endif
1598 
1599 /*
1600  * Interrupt mapping table functions.
1601  *
1602  * Please, keep this part separately, it can be transformed to
1603  * extension of standard resources.
1604  */
1605 struct intr_map_entry
1606 {
1607 	device_t 		dev;
1608 	intptr_t 		xref;
1609 	struct intr_map_data 	*map_data;
1610 	struct intr_irqsrc 	*isrc;
1611 	/* XXX TODO DISCONECTED PICs */
1612 	/*int			flags */
1613 };
1614 
1615 /* XXX Convert irq_map[] to dynamicaly expandable one. */
1616 static struct intr_map_entry **irq_map;
1617 static u_int irq_map_count;
1618 static u_int irq_map_first_free_idx;
1619 static struct mtx irq_map_lock;
1620 
1621 static struct intr_irqsrc *
1622 intr_map_get_isrc(u_int res_id)
1623 {
1624 	struct intr_irqsrc *isrc;
1625 
1626 	isrc = NULL;
1627 	mtx_lock(&irq_map_lock);
1628 	if (res_id < irq_map_count && irq_map[res_id] != NULL)
1629 		isrc = irq_map[res_id]->isrc;
1630 	mtx_unlock(&irq_map_lock);
1631 
1632 	return (isrc);
1633 }
1634 
1635 static void
1636 intr_map_set_isrc(u_int res_id, struct intr_irqsrc *isrc)
1637 {
1638 
1639 	mtx_lock(&irq_map_lock);
1640 	if (res_id < irq_map_count && irq_map[res_id] != NULL)
1641 		irq_map[res_id]->isrc = isrc;
1642 	mtx_unlock(&irq_map_lock);
1643 }
1644 
1645 /*
1646  * Get a copy of intr_map_entry data
1647  */
1648 static struct intr_map_data *
1649 intr_map_get_map_data(u_int res_id)
1650 {
1651 	struct intr_map_data *data;
1652 
1653 	data = NULL;
1654 	mtx_lock(&irq_map_lock);
1655 	if (res_id >= irq_map_count || irq_map[res_id] == NULL)
1656 		panic("Attempt to copy invalid resource id: %u\n", res_id);
1657 	data = irq_map[res_id]->map_data;
1658 	mtx_unlock(&irq_map_lock);
1659 
1660 	return (data);
1661 }
1662 
1663 /*
1664  * Get a copy of intr_map_entry data
1665  */
1666 static void
1667 intr_map_copy_map_data(u_int res_id, device_t *map_dev, intptr_t *map_xref,
1668     struct intr_map_data **data)
1669 {
1670 	size_t len;
1671 
1672 	len = 0;
1673 	mtx_lock(&irq_map_lock);
1674 	if (res_id >= irq_map_count || irq_map[res_id] == NULL)
1675 		panic("Attempt to copy invalid resource id: %u\n", res_id);
1676 	if (irq_map[res_id]->map_data != NULL)
1677 		len = irq_map[res_id]->map_data->len;
1678 	mtx_unlock(&irq_map_lock);
1679 
1680 	if (len == 0)
1681 		*data = NULL;
1682 	else
1683 		*data = malloc(len, M_INTRNG, M_WAITOK | M_ZERO);
1684 	mtx_lock(&irq_map_lock);
1685 	if (irq_map[res_id] == NULL)
1686 		panic("Attempt to copy invalid resource id: %u\n", res_id);
1687 	if (len != 0) {
1688 		if (len != irq_map[res_id]->map_data->len)
1689 			panic("Resource id: %u has changed.\n", res_id);
1690 		memcpy(*data, irq_map[res_id]->map_data, len);
1691 	}
1692 	*map_dev = irq_map[res_id]->dev;
1693 	*map_xref = irq_map[res_id]->xref;
1694 	mtx_unlock(&irq_map_lock);
1695 }
1696 
1697 /*
1698  * Allocate and fill new entry in irq_map table.
1699  */
1700 u_int
1701 intr_map_irq(device_t dev, intptr_t xref, struct intr_map_data *data)
1702 {
1703 	u_int i;
1704 	struct intr_map_entry *entry;
1705 
1706 	/* Prepare new entry first. */
1707 	entry = malloc(sizeof(*entry), M_INTRNG, M_WAITOK | M_ZERO);
1708 
1709 	entry->dev = dev;
1710 	entry->xref = xref;
1711 	entry->map_data = data;
1712 	entry->isrc = NULL;
1713 
1714 	mtx_lock(&irq_map_lock);
1715 	for (i = irq_map_first_free_idx; i < irq_map_count; i++) {
1716 		if (irq_map[i] == NULL) {
1717 			irq_map[i] = entry;
1718 			irq_map_first_free_idx = i + 1;
1719 			mtx_unlock(&irq_map_lock);
1720 			return (i);
1721 		}
1722 	}
1723 	for (i = 0; i < irq_map_first_free_idx; i++) {
1724 		if (irq_map[i] == NULL) {
1725 			irq_map[i] = entry;
1726 			irq_map_first_free_idx = i + 1;
1727 			mtx_unlock(&irq_map_lock);
1728 			return (i);
1729 		}
1730 	}
1731 	mtx_unlock(&irq_map_lock);
1732 
1733 	/* XXX Expand irq_map table */
1734 	panic("IRQ mapping table is full.");
1735 }
1736 
1737 /*
1738  * Remove and free mapping entry.
1739  */
1740 void
1741 intr_unmap_irq(u_int res_id)
1742 {
1743 	struct intr_map_entry *entry;
1744 
1745 	mtx_lock(&irq_map_lock);
1746 	if ((res_id >= irq_map_count) || (irq_map[res_id] == NULL))
1747 		panic("Attempt to unmap invalid resource id: %u\n", res_id);
1748 	entry = irq_map[res_id];
1749 	irq_map[res_id] = NULL;
1750 	irq_map_first_free_idx = res_id;
1751 	mtx_unlock(&irq_map_lock);
1752 	intr_free_intr_map_data(entry->map_data);
1753 	free(entry, M_INTRNG);
1754 }
1755 
1756 /*
1757  * Clone mapping entry.
1758  */
1759 u_int
1760 intr_map_clone_irq(u_int old_res_id)
1761 {
1762 	device_t map_dev;
1763 	intptr_t map_xref;
1764 	struct intr_map_data *data;
1765 
1766 	intr_map_copy_map_data(old_res_id, &map_dev, &map_xref, &data);
1767 	return (intr_map_irq(map_dev, map_xref, data));
1768 }
1769 
1770 static void
1771 intr_map_init(void *dummy __unused)
1772 {
1773 
1774 	mtx_init(&irq_map_lock, "intr map table", NULL, MTX_DEF);
1775 
1776 	irq_map_count = 2 * intr_nirq;
1777 	irq_map = mallocarray(irq_map_count, sizeof(struct intr_map_entry*),
1778 	    M_INTRNG, M_WAITOK | M_ZERO);
1779 }
1780 SYSINIT(intr_map_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_map_init, NULL);
1781