xref: /freebsd/sys/kern/subr_intr.c (revision ff19fd624233a938b6a09ac75a87a2c69d65df08)
1 /*-
2  * Copyright (c) 2015-2016 Svatopluk Kraus
3  * Copyright (c) 2015-2016 Michal Meloun
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 /*
32  *	New-style Interrupt Framework
33  *
34  *  TODO: - add support for disconnected PICs.
35  *        - to support IPI (PPI) enabling on other CPUs if already started.
36  *        - to complete things for removable PICs.
37  */
38 
39 #include "opt_ddb.h"
40 #include "opt_hwpmc_hooks.h"
41 #include "opt_iommu.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/kernel.h>
46 #include <sys/lock.h>
47 #include <sys/mutex.h>
48 #include <sys/syslog.h>
49 #include <sys/malloc.h>
50 #include <sys/proc.h>
51 #include <sys/queue.h>
52 #include <sys/bus.h>
53 #include <sys/interrupt.h>
54 #include <sys/taskqueue.h>
55 #include <sys/tree.h>
56 #include <sys/conf.h>
57 #include <sys/cpuset.h>
58 #include <sys/rman.h>
59 #include <sys/sched.h>
60 #include <sys/smp.h>
61 #include <sys/vmmeter.h>
62 #ifdef HWPMC_HOOKS
63 #include <sys/pmckern.h>
64 #endif
65 
66 #include <machine/atomic.h>
67 #include <machine/intr.h>
68 #include <machine/cpu.h>
69 #include <machine/smp.h>
70 #include <machine/stdarg.h>
71 
72 #ifdef DDB
73 #include <ddb/ddb.h>
74 #endif
75 
76 #ifdef IOMMU
77 #include <dev/iommu/iommu_msi.h>
78 #endif
79 
80 #include "pic_if.h"
81 #include "msi_if.h"
82 
83 #define	INTRNAME_LEN	(2*MAXCOMLEN + 1)
84 
85 #ifdef DEBUG
86 #define debugf(fmt, args...) do { printf("%s(): ", __func__);	\
87     printf(fmt,##args); } while (0)
88 #else
89 #define debugf(fmt, args...)
90 #endif
91 
92 MALLOC_DECLARE(M_INTRNG);
93 MALLOC_DEFINE(M_INTRNG, "intr", "intr interrupt handling");
94 
95 /* Main interrupt handler called from assembler -> 'hidden' for C code. */
96 void intr_irq_handler(struct trapframe *tf);
97 
98 /* Root interrupt controller stuff. */
99 device_t intr_irq_root_dev;
100 static intr_irq_filter_t *irq_root_filter;
101 static void *irq_root_arg;
102 static u_int irq_root_ipicount;
103 
104 struct intr_pic_child {
105 	SLIST_ENTRY(intr_pic_child)	 pc_next;
106 	struct intr_pic			*pc_pic;
107 	intr_child_irq_filter_t		*pc_filter;
108 	void				*pc_filter_arg;
109 	uintptr_t			 pc_start;
110 	uintptr_t			 pc_length;
111 };
112 
113 /* Interrupt controller definition. */
114 struct intr_pic {
115 	SLIST_ENTRY(intr_pic)	pic_next;
116 	intptr_t		pic_xref;	/* hardware identification */
117 	device_t		pic_dev;
118 /* Only one of FLAG_PIC or FLAG_MSI may be set */
119 #define	FLAG_PIC	(1 << 0)
120 #define	FLAG_MSI	(1 << 1)
121 #define	FLAG_TYPE_MASK	(FLAG_PIC | FLAG_MSI)
122 	u_int			pic_flags;
123 	struct mtx		pic_child_lock;
124 	SLIST_HEAD(, intr_pic_child) pic_children;
125 };
126 
127 static struct mtx pic_list_lock;
128 static SLIST_HEAD(, intr_pic) pic_list;
129 
130 static struct intr_pic *pic_lookup(device_t dev, intptr_t xref, int flags);
131 
132 /* Interrupt source definition. */
133 static struct mtx isrc_table_lock;
134 static struct intr_irqsrc *irq_sources[NIRQ];
135 u_int irq_next_free;
136 
137 #ifdef SMP
138 #ifdef EARLY_AP_STARTUP
139 static bool irq_assign_cpu = true;
140 #else
141 static bool irq_assign_cpu = false;
142 #endif
143 #endif
144 
145 /*
146  * - 2 counters for each I/O interrupt.
147  * - MAXCPU counters for each IPI counters for SMP.
148  */
149 #ifdef SMP
150 #define INTRCNT_COUNT   (NIRQ * 2 + INTR_IPI_COUNT * MAXCPU)
151 #else
152 #define INTRCNT_COUNT   (NIRQ * 2)
153 #endif
154 
155 /* Data for MI statistics reporting. */
156 u_long intrcnt[INTRCNT_COUNT];
157 char intrnames[INTRCNT_COUNT * INTRNAME_LEN];
158 size_t sintrcnt = sizeof(intrcnt);
159 size_t sintrnames = sizeof(intrnames);
160 static u_int intrcnt_index;
161 
162 static struct intr_irqsrc *intr_map_get_isrc(u_int res_id);
163 static void intr_map_set_isrc(u_int res_id, struct intr_irqsrc *isrc);
164 static struct intr_map_data * intr_map_get_map_data(u_int res_id);
165 static void intr_map_copy_map_data(u_int res_id, device_t *dev, intptr_t *xref,
166     struct intr_map_data **data);
167 
168 /*
169  *  Interrupt framework initialization routine.
170  */
171 static void
172 intr_irq_init(void *dummy __unused)
173 {
174 
175 	SLIST_INIT(&pic_list);
176 	mtx_init(&pic_list_lock, "intr pic list", NULL, MTX_DEF);
177 
178 	mtx_init(&isrc_table_lock, "intr isrc table", NULL, MTX_DEF);
179 }
180 SYSINIT(intr_irq_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_irq_init, NULL);
181 
182 static void
183 intrcnt_setname(const char *name, int index)
184 {
185 
186 	snprintf(intrnames + INTRNAME_LEN * index, INTRNAME_LEN, "%-*s",
187 	    INTRNAME_LEN - 1, name);
188 }
189 
190 /*
191  *  Update name for interrupt source with interrupt event.
192  */
193 static void
194 intrcnt_updatename(struct intr_irqsrc *isrc)
195 {
196 
197 	/* QQQ: What about stray counter name? */
198 	mtx_assert(&isrc_table_lock, MA_OWNED);
199 	intrcnt_setname(isrc->isrc_event->ie_fullname, isrc->isrc_index);
200 }
201 
202 /*
203  *  Virtualization for interrupt source interrupt counter increment.
204  */
205 static inline void
206 isrc_increment_count(struct intr_irqsrc *isrc)
207 {
208 
209 	if (isrc->isrc_flags & INTR_ISRCF_PPI)
210 		atomic_add_long(&isrc->isrc_count[0], 1);
211 	else
212 		isrc->isrc_count[0]++;
213 }
214 
215 /*
216  *  Virtualization for interrupt source interrupt stray counter increment.
217  */
218 static inline void
219 isrc_increment_straycount(struct intr_irqsrc *isrc)
220 {
221 
222 	isrc->isrc_count[1]++;
223 }
224 
225 /*
226  *  Virtualization for interrupt source interrupt name update.
227  */
228 static void
229 isrc_update_name(struct intr_irqsrc *isrc, const char *name)
230 {
231 	char str[INTRNAME_LEN];
232 
233 	mtx_assert(&isrc_table_lock, MA_OWNED);
234 
235 	if (name != NULL) {
236 		snprintf(str, INTRNAME_LEN, "%s: %s", isrc->isrc_name, name);
237 		intrcnt_setname(str, isrc->isrc_index);
238 		snprintf(str, INTRNAME_LEN, "stray %s: %s", isrc->isrc_name,
239 		    name);
240 		intrcnt_setname(str, isrc->isrc_index + 1);
241 	} else {
242 		snprintf(str, INTRNAME_LEN, "%s:", isrc->isrc_name);
243 		intrcnt_setname(str, isrc->isrc_index);
244 		snprintf(str, INTRNAME_LEN, "stray %s:", isrc->isrc_name);
245 		intrcnt_setname(str, isrc->isrc_index + 1);
246 	}
247 }
248 
249 /*
250  *  Virtualization for interrupt source interrupt counters setup.
251  */
252 static void
253 isrc_setup_counters(struct intr_irqsrc *isrc)
254 {
255 	u_int index;
256 
257 	/*
258 	 *  XXX - it does not work well with removable controllers and
259 	 *        interrupt sources !!!
260 	 */
261 	index = atomic_fetchadd_int(&intrcnt_index, 2);
262 	isrc->isrc_index = index;
263 	isrc->isrc_count = &intrcnt[index];
264 	isrc_update_name(isrc, NULL);
265 }
266 
267 /*
268  *  Virtualization for interrupt source interrupt counters release.
269  */
270 static void
271 isrc_release_counters(struct intr_irqsrc *isrc)
272 {
273 
274 	panic("%s: not implemented", __func__);
275 }
276 
277 #ifdef SMP
278 /*
279  *  Virtualization for interrupt source IPI counters setup.
280  */
281 u_long *
282 intr_ipi_setup_counters(const char *name)
283 {
284 	u_int index, i;
285 	char str[INTRNAME_LEN];
286 
287 	index = atomic_fetchadd_int(&intrcnt_index, MAXCPU);
288 	for (i = 0; i < MAXCPU; i++) {
289 		snprintf(str, INTRNAME_LEN, "cpu%d:%s", i, name);
290 		intrcnt_setname(str, index + i);
291 	}
292 	return (&intrcnt[index]);
293 }
294 #endif
295 
296 /*
297  *  Main interrupt dispatch handler. It's called straight
298  *  from the assembler, where CPU interrupt is served.
299  */
300 void
301 intr_irq_handler(struct trapframe *tf)
302 {
303 	struct trapframe * oldframe;
304 	struct thread * td;
305 
306 	KASSERT(irq_root_filter != NULL, ("%s: no filter", __func__));
307 
308 	VM_CNT_INC(v_intr);
309 	critical_enter();
310 	td = curthread;
311 	oldframe = td->td_intr_frame;
312 	td->td_intr_frame = tf;
313 	irq_root_filter(irq_root_arg);
314 	td->td_intr_frame = oldframe;
315 	critical_exit();
316 #ifdef HWPMC_HOOKS
317 	if (pmc_hook && TRAPF_USERMODE(tf) &&
318 	    (PCPU_GET(curthread)->td_pflags & TDP_CALLCHAIN))
319 		pmc_hook(PCPU_GET(curthread), PMC_FN_USER_CALLCHAIN, tf);
320 #endif
321 }
322 
323 int
324 intr_child_irq_handler(struct intr_pic *parent, uintptr_t irq)
325 {
326 	struct intr_pic_child *child;
327 	bool found;
328 
329 	found = false;
330 	mtx_lock_spin(&parent->pic_child_lock);
331 	SLIST_FOREACH(child, &parent->pic_children, pc_next) {
332 		if (child->pc_start <= irq &&
333 		    irq < (child->pc_start + child->pc_length)) {
334 			found = true;
335 			break;
336 		}
337 	}
338 	mtx_unlock_spin(&parent->pic_child_lock);
339 
340 	if (found)
341 		return (child->pc_filter(child->pc_filter_arg, irq));
342 
343 	return (FILTER_STRAY);
344 }
345 
346 /*
347  *  interrupt controller dispatch function for interrupts. It should
348  *  be called straight from the interrupt controller, when associated interrupt
349  *  source is learned.
350  */
351 int
352 intr_isrc_dispatch(struct intr_irqsrc *isrc, struct trapframe *tf)
353 {
354 
355 	KASSERT(isrc != NULL, ("%s: no source", __func__));
356 
357 	isrc_increment_count(isrc);
358 
359 #ifdef INTR_SOLO
360 	if (isrc->isrc_filter != NULL) {
361 		int error;
362 		error = isrc->isrc_filter(isrc->isrc_arg, tf);
363 		PIC_POST_FILTER(isrc->isrc_dev, isrc);
364 		if (error == FILTER_HANDLED)
365 			return (0);
366 	} else
367 #endif
368 	if (isrc->isrc_event != NULL) {
369 		if (intr_event_handle(isrc->isrc_event, tf) == 0)
370 			return (0);
371 	}
372 
373 	isrc_increment_straycount(isrc);
374 	return (EINVAL);
375 }
376 
377 /*
378  *  Alloc unique interrupt number (resource handle) for interrupt source.
379  *
380  *  There could be various strategies how to allocate free interrupt number
381  *  (resource handle) for new interrupt source.
382  *
383  *  1. Handles are always allocated forward, so handles are not recycled
384  *     immediately. However, if only one free handle left which is reused
385  *     constantly...
386  */
387 static inline int
388 isrc_alloc_irq(struct intr_irqsrc *isrc)
389 {
390 	u_int maxirqs, irq;
391 
392 	mtx_assert(&isrc_table_lock, MA_OWNED);
393 
394 	maxirqs = nitems(irq_sources);
395 	if (irq_next_free >= maxirqs)
396 		return (ENOSPC);
397 
398 	for (irq = irq_next_free; irq < maxirqs; irq++) {
399 		if (irq_sources[irq] == NULL)
400 			goto found;
401 	}
402 	for (irq = 0; irq < irq_next_free; irq++) {
403 		if (irq_sources[irq] == NULL)
404 			goto found;
405 	}
406 
407 	irq_next_free = maxirqs;
408 	return (ENOSPC);
409 
410 found:
411 	isrc->isrc_irq = irq;
412 	irq_sources[irq] = isrc;
413 
414 	irq_next_free = irq + 1;
415 	if (irq_next_free >= maxirqs)
416 		irq_next_free = 0;
417 	return (0);
418 }
419 
420 /*
421  *  Free unique interrupt number (resource handle) from interrupt source.
422  */
423 static inline int
424 isrc_free_irq(struct intr_irqsrc *isrc)
425 {
426 
427 	mtx_assert(&isrc_table_lock, MA_OWNED);
428 
429 	if (isrc->isrc_irq >= nitems(irq_sources))
430 		return (EINVAL);
431 	if (irq_sources[isrc->isrc_irq] != isrc)
432 		return (EINVAL);
433 
434 	irq_sources[isrc->isrc_irq] = NULL;
435 	isrc->isrc_irq = INTR_IRQ_INVALID;	/* just to be safe */
436 	return (0);
437 }
438 
439 /*
440  *  Initialize interrupt source and register it into global interrupt table.
441  */
442 int
443 intr_isrc_register(struct intr_irqsrc *isrc, device_t dev, u_int flags,
444     const char *fmt, ...)
445 {
446 	int error;
447 	va_list ap;
448 
449 	bzero(isrc, sizeof(struct intr_irqsrc));
450 	isrc->isrc_dev = dev;
451 	isrc->isrc_irq = INTR_IRQ_INVALID;	/* just to be safe */
452 	isrc->isrc_flags = flags;
453 
454 	va_start(ap, fmt);
455 	vsnprintf(isrc->isrc_name, INTR_ISRC_NAMELEN, fmt, ap);
456 	va_end(ap);
457 
458 	mtx_lock(&isrc_table_lock);
459 	error = isrc_alloc_irq(isrc);
460 	if (error != 0) {
461 		mtx_unlock(&isrc_table_lock);
462 		return (error);
463 	}
464 	/*
465 	 * Setup interrupt counters, but not for IPI sources. Those are setup
466 	 * later and only for used ones (up to INTR_IPI_COUNT) to not exhaust
467 	 * our counter pool.
468 	 */
469 	if ((isrc->isrc_flags & INTR_ISRCF_IPI) == 0)
470 		isrc_setup_counters(isrc);
471 	mtx_unlock(&isrc_table_lock);
472 	return (0);
473 }
474 
475 /*
476  *  Deregister interrupt source from global interrupt table.
477  */
478 int
479 intr_isrc_deregister(struct intr_irqsrc *isrc)
480 {
481 	int error;
482 
483 	mtx_lock(&isrc_table_lock);
484 	if ((isrc->isrc_flags & INTR_ISRCF_IPI) == 0)
485 		isrc_release_counters(isrc);
486 	error = isrc_free_irq(isrc);
487 	mtx_unlock(&isrc_table_lock);
488 	return (error);
489 }
490 
491 #ifdef SMP
492 /*
493  *  A support function for a PIC to decide if provided ISRC should be inited
494  *  on given cpu. The logic of INTR_ISRCF_BOUND flag and isrc_cpu member of
495  *  struct intr_irqsrc is the following:
496  *
497  *     If INTR_ISRCF_BOUND is set, the ISRC should be inited only on cpus
498  *     set in isrc_cpu. If not, the ISRC should be inited on every cpu and
499  *     isrc_cpu is kept consistent with it. Thus isrc_cpu is always correct.
500  */
501 bool
502 intr_isrc_init_on_cpu(struct intr_irqsrc *isrc, u_int cpu)
503 {
504 
505 	if (isrc->isrc_handlers == 0)
506 		return (false);
507 	if ((isrc->isrc_flags & (INTR_ISRCF_PPI | INTR_ISRCF_IPI)) == 0)
508 		return (false);
509 	if (isrc->isrc_flags & INTR_ISRCF_BOUND)
510 		return (CPU_ISSET(cpu, &isrc->isrc_cpu));
511 
512 	CPU_SET(cpu, &isrc->isrc_cpu);
513 	return (true);
514 }
515 #endif
516 
517 #ifdef INTR_SOLO
518 /*
519  *  Setup filter into interrupt source.
520  */
521 static int
522 iscr_setup_filter(struct intr_irqsrc *isrc, const char *name,
523     intr_irq_filter_t *filter, void *arg, void **cookiep)
524 {
525 
526 	if (filter == NULL)
527 		return (EINVAL);
528 
529 	mtx_lock(&isrc_table_lock);
530 	/*
531 	 * Make sure that we do not mix the two ways
532 	 * how we handle interrupt sources.
533 	 */
534 	if (isrc->isrc_filter != NULL || isrc->isrc_event != NULL) {
535 		mtx_unlock(&isrc_table_lock);
536 		return (EBUSY);
537 	}
538 	isrc->isrc_filter = filter;
539 	isrc->isrc_arg = arg;
540 	isrc_update_name(isrc, name);
541 	mtx_unlock(&isrc_table_lock);
542 
543 	*cookiep = isrc;
544 	return (0);
545 }
546 #endif
547 
548 /*
549  *  Interrupt source pre_ithread method for MI interrupt framework.
550  */
551 static void
552 intr_isrc_pre_ithread(void *arg)
553 {
554 	struct intr_irqsrc *isrc = arg;
555 
556 	PIC_PRE_ITHREAD(isrc->isrc_dev, isrc);
557 }
558 
559 /*
560  *  Interrupt source post_ithread method for MI interrupt framework.
561  */
562 static void
563 intr_isrc_post_ithread(void *arg)
564 {
565 	struct intr_irqsrc *isrc = arg;
566 
567 	PIC_POST_ITHREAD(isrc->isrc_dev, isrc);
568 }
569 
570 /*
571  *  Interrupt source post_filter method for MI interrupt framework.
572  */
573 static void
574 intr_isrc_post_filter(void *arg)
575 {
576 	struct intr_irqsrc *isrc = arg;
577 
578 	PIC_POST_FILTER(isrc->isrc_dev, isrc);
579 }
580 
581 /*
582  *  Interrupt source assign_cpu method for MI interrupt framework.
583  */
584 static int
585 intr_isrc_assign_cpu(void *arg, int cpu)
586 {
587 #ifdef SMP
588 	struct intr_irqsrc *isrc = arg;
589 	int error;
590 
591 	if (isrc->isrc_dev != intr_irq_root_dev)
592 		return (EINVAL);
593 
594 	mtx_lock(&isrc_table_lock);
595 	if (cpu == NOCPU) {
596 		CPU_ZERO(&isrc->isrc_cpu);
597 		isrc->isrc_flags &= ~INTR_ISRCF_BOUND;
598 	} else {
599 		CPU_SETOF(cpu, &isrc->isrc_cpu);
600 		isrc->isrc_flags |= INTR_ISRCF_BOUND;
601 	}
602 
603 	/*
604 	 * In NOCPU case, it's up to PIC to either leave ISRC on same CPU or
605 	 * re-balance it to another CPU or enable it on more CPUs. However,
606 	 * PIC is expected to change isrc_cpu appropriately to keep us well
607 	 * informed if the call is successful.
608 	 */
609 	if (irq_assign_cpu) {
610 		error = PIC_BIND_INTR(isrc->isrc_dev, isrc);
611 		if (error) {
612 			CPU_ZERO(&isrc->isrc_cpu);
613 			mtx_unlock(&isrc_table_lock);
614 			return (error);
615 		}
616 	}
617 	mtx_unlock(&isrc_table_lock);
618 	return (0);
619 #else
620 	return (EOPNOTSUPP);
621 #endif
622 }
623 
624 /*
625  *  Create interrupt event for interrupt source.
626  */
627 static int
628 isrc_event_create(struct intr_irqsrc *isrc)
629 {
630 	struct intr_event *ie;
631 	int error;
632 
633 	error = intr_event_create(&ie, isrc, 0, isrc->isrc_irq,
634 	    intr_isrc_pre_ithread, intr_isrc_post_ithread, intr_isrc_post_filter,
635 	    intr_isrc_assign_cpu, "%s:", isrc->isrc_name);
636 	if (error)
637 		return (error);
638 
639 	mtx_lock(&isrc_table_lock);
640 	/*
641 	 * Make sure that we do not mix the two ways
642 	 * how we handle interrupt sources. Let contested event wins.
643 	 */
644 #ifdef INTR_SOLO
645 	if (isrc->isrc_filter != NULL || isrc->isrc_event != NULL) {
646 #else
647 	if (isrc->isrc_event != NULL) {
648 #endif
649 		mtx_unlock(&isrc_table_lock);
650 		intr_event_destroy(ie);
651 		return (isrc->isrc_event != NULL ? EBUSY : 0);
652 	}
653 	isrc->isrc_event = ie;
654 	mtx_unlock(&isrc_table_lock);
655 
656 	return (0);
657 }
658 #ifdef notyet
659 /*
660  *  Destroy interrupt event for interrupt source.
661  */
662 static void
663 isrc_event_destroy(struct intr_irqsrc *isrc)
664 {
665 	struct intr_event *ie;
666 
667 	mtx_lock(&isrc_table_lock);
668 	ie = isrc->isrc_event;
669 	isrc->isrc_event = NULL;
670 	mtx_unlock(&isrc_table_lock);
671 
672 	if (ie != NULL)
673 		intr_event_destroy(ie);
674 }
675 #endif
676 /*
677  *  Add handler to interrupt source.
678  */
679 static int
680 isrc_add_handler(struct intr_irqsrc *isrc, const char *name,
681     driver_filter_t filter, driver_intr_t handler, void *arg,
682     enum intr_type flags, void **cookiep)
683 {
684 	int error;
685 
686 	if (isrc->isrc_event == NULL) {
687 		error = isrc_event_create(isrc);
688 		if (error)
689 			return (error);
690 	}
691 
692 	error = intr_event_add_handler(isrc->isrc_event, name, filter, handler,
693 	    arg, intr_priority(flags), flags, cookiep);
694 	if (error == 0) {
695 		mtx_lock(&isrc_table_lock);
696 		intrcnt_updatename(isrc);
697 		mtx_unlock(&isrc_table_lock);
698 	}
699 
700 	return (error);
701 }
702 
703 /*
704  *  Lookup interrupt controller locked.
705  */
706 static inline struct intr_pic *
707 pic_lookup_locked(device_t dev, intptr_t xref, int flags)
708 {
709 	struct intr_pic *pic;
710 
711 	mtx_assert(&pic_list_lock, MA_OWNED);
712 
713 	if (dev == NULL && xref == 0)
714 		return (NULL);
715 
716 	/* Note that pic->pic_dev is never NULL on registered PIC. */
717 	SLIST_FOREACH(pic, &pic_list, pic_next) {
718 		if ((pic->pic_flags & FLAG_TYPE_MASK) !=
719 		    (flags & FLAG_TYPE_MASK))
720 			continue;
721 
722 		if (dev == NULL) {
723 			if (xref == pic->pic_xref)
724 				return (pic);
725 		} else if (xref == 0 || pic->pic_xref == 0) {
726 			if (dev == pic->pic_dev)
727 				return (pic);
728 		} else if (xref == pic->pic_xref && dev == pic->pic_dev)
729 				return (pic);
730 	}
731 	return (NULL);
732 }
733 
734 /*
735  *  Lookup interrupt controller.
736  */
737 static struct intr_pic *
738 pic_lookup(device_t dev, intptr_t xref, int flags)
739 {
740 	struct intr_pic *pic;
741 
742 	mtx_lock(&pic_list_lock);
743 	pic = pic_lookup_locked(dev, xref, flags);
744 	mtx_unlock(&pic_list_lock);
745 	return (pic);
746 }
747 
748 /*
749  *  Create interrupt controller.
750  */
751 static struct intr_pic *
752 pic_create(device_t dev, intptr_t xref, int flags)
753 {
754 	struct intr_pic *pic;
755 
756 	mtx_lock(&pic_list_lock);
757 	pic = pic_lookup_locked(dev, xref, flags);
758 	if (pic != NULL) {
759 		mtx_unlock(&pic_list_lock);
760 		return (pic);
761 	}
762 	pic = malloc(sizeof(*pic), M_INTRNG, M_NOWAIT | M_ZERO);
763 	if (pic == NULL) {
764 		mtx_unlock(&pic_list_lock);
765 		return (NULL);
766 	}
767 	pic->pic_xref = xref;
768 	pic->pic_dev = dev;
769 	pic->pic_flags = flags;
770 	mtx_init(&pic->pic_child_lock, "pic child lock", NULL, MTX_SPIN);
771 	SLIST_INSERT_HEAD(&pic_list, pic, pic_next);
772 	mtx_unlock(&pic_list_lock);
773 
774 	return (pic);
775 }
776 #ifdef notyet
777 /*
778  *  Destroy interrupt controller.
779  */
780 static void
781 pic_destroy(device_t dev, intptr_t xref, int flags)
782 {
783 	struct intr_pic *pic;
784 
785 	mtx_lock(&pic_list_lock);
786 	pic = pic_lookup_locked(dev, xref, flags);
787 	if (pic == NULL) {
788 		mtx_unlock(&pic_list_lock);
789 		return;
790 	}
791 	SLIST_REMOVE(&pic_list, pic, intr_pic, pic_next);
792 	mtx_unlock(&pic_list_lock);
793 
794 	free(pic, M_INTRNG);
795 }
796 #endif
797 /*
798  *  Register interrupt controller.
799  */
800 struct intr_pic *
801 intr_pic_register(device_t dev, intptr_t xref)
802 {
803 	struct intr_pic *pic;
804 
805 	if (dev == NULL)
806 		return (NULL);
807 	pic = pic_create(dev, xref, FLAG_PIC);
808 	if (pic == NULL)
809 		return (NULL);
810 
811 	debugf("PIC %p registered for %s <dev %p, xref %jx>\n", pic,
812 	    device_get_nameunit(dev), dev, (uintmax_t)xref);
813 	return (pic);
814 }
815 
816 /*
817  *  Unregister interrupt controller.
818  */
819 int
820 intr_pic_deregister(device_t dev, intptr_t xref)
821 {
822 
823 	panic("%s: not implemented", __func__);
824 }
825 
826 /*
827  *  Mark interrupt controller (itself) as a root one.
828  *
829  *  Note that only an interrupt controller can really know its position
830  *  in interrupt controller's tree. So root PIC must claim itself as a root.
831  *
832  *  In FDT case, according to ePAPR approved version 1.1 from 08 April 2011,
833  *  page 30:
834  *    "The root of the interrupt tree is determined when traversal
835  *     of the interrupt tree reaches an interrupt controller node without
836  *     an interrupts property and thus no explicit interrupt parent."
837  */
838 int
839 intr_pic_claim_root(device_t dev, intptr_t xref, intr_irq_filter_t *filter,
840     void *arg, u_int ipicount)
841 {
842 	struct intr_pic *pic;
843 
844 	pic = pic_lookup(dev, xref, FLAG_PIC);
845 	if (pic == NULL) {
846 		device_printf(dev, "not registered\n");
847 		return (EINVAL);
848 	}
849 
850 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_PIC,
851 	    ("%s: Found a non-PIC controller: %s", __func__,
852 	     device_get_name(pic->pic_dev)));
853 
854 	if (filter == NULL) {
855 		device_printf(dev, "filter missing\n");
856 		return (EINVAL);
857 	}
858 
859 	/*
860 	 * Only one interrupt controllers could be on the root for now.
861 	 * Note that we further suppose that there is not threaded interrupt
862 	 * routine (handler) on the root. See intr_irq_handler().
863 	 */
864 	if (intr_irq_root_dev != NULL) {
865 		device_printf(dev, "another root already set\n");
866 		return (EBUSY);
867 	}
868 
869 	intr_irq_root_dev = dev;
870 	irq_root_filter = filter;
871 	irq_root_arg = arg;
872 	irq_root_ipicount = ipicount;
873 
874 	debugf("irq root set to %s\n", device_get_nameunit(dev));
875 	return (0);
876 }
877 
878 /*
879  * Add a handler to manage a sub range of a parents interrupts.
880  */
881 struct intr_pic *
882 intr_pic_add_handler(device_t parent, struct intr_pic *pic,
883     intr_child_irq_filter_t *filter, void *arg, uintptr_t start,
884     uintptr_t length)
885 {
886 	struct intr_pic *parent_pic;
887 	struct intr_pic_child *newchild;
888 #ifdef INVARIANTS
889 	struct intr_pic_child *child;
890 #endif
891 
892 	/* Find the parent PIC */
893 	parent_pic = pic_lookup(parent, 0, FLAG_PIC);
894 	if (parent_pic == NULL)
895 		return (NULL);
896 
897 	newchild = malloc(sizeof(*newchild), M_INTRNG, M_WAITOK | M_ZERO);
898 	newchild->pc_pic = pic;
899 	newchild->pc_filter = filter;
900 	newchild->pc_filter_arg = arg;
901 	newchild->pc_start = start;
902 	newchild->pc_length = length;
903 
904 	mtx_lock_spin(&parent_pic->pic_child_lock);
905 #ifdef INVARIANTS
906 	SLIST_FOREACH(child, &parent_pic->pic_children, pc_next) {
907 		KASSERT(child->pc_pic != pic, ("%s: Adding a child PIC twice",
908 		    __func__));
909 	}
910 #endif
911 	SLIST_INSERT_HEAD(&parent_pic->pic_children, newchild, pc_next);
912 	mtx_unlock_spin(&parent_pic->pic_child_lock);
913 
914 	return (pic);
915 }
916 
917 static int
918 intr_resolve_irq(device_t dev, intptr_t xref, struct intr_map_data *data,
919     struct intr_irqsrc **isrc)
920 {
921 	struct intr_pic *pic;
922 	struct intr_map_data_msi *msi;
923 
924 	if (data == NULL)
925 		return (EINVAL);
926 
927 	pic = pic_lookup(dev, xref,
928 	    (data->type == INTR_MAP_DATA_MSI) ? FLAG_MSI : FLAG_PIC);
929 	if (pic == NULL)
930 		return (ESRCH);
931 
932 	switch (data->type) {
933 	case INTR_MAP_DATA_MSI:
934 		KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
935 		    ("%s: Found a non-MSI controller: %s", __func__,
936 		     device_get_name(pic->pic_dev)));
937 		msi = (struct intr_map_data_msi *)data;
938 		*isrc = msi->isrc;
939 		return (0);
940 
941 	default:
942 		KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_PIC,
943 		    ("%s: Found a non-PIC controller: %s", __func__,
944 		     device_get_name(pic->pic_dev)));
945 		return (PIC_MAP_INTR(pic->pic_dev, data, isrc));
946 	}
947 }
948 
949 bool
950 intr_is_per_cpu(struct resource *res)
951 {
952 	u_int res_id;
953 	struct intr_irqsrc *isrc;
954 
955 	res_id = (u_int)rman_get_start(res);
956 	isrc = intr_map_get_isrc(res_id);
957 
958 	if (isrc == NULL)
959 		panic("Attempt to get isrc for non-active resource id: %u\n",
960 		    res_id);
961 	return ((isrc->isrc_flags & INTR_ISRCF_PPI) != 0);
962 }
963 
964 int
965 intr_activate_irq(device_t dev, struct resource *res)
966 {
967 	device_t map_dev;
968 	intptr_t map_xref;
969 	struct intr_map_data *data;
970 	struct intr_irqsrc *isrc;
971 	u_int res_id;
972 	int error;
973 
974 	KASSERT(rman_get_start(res) == rman_get_end(res),
975 	    ("%s: more interrupts in resource", __func__));
976 
977 	res_id = (u_int)rman_get_start(res);
978 	if (intr_map_get_isrc(res_id) != NULL)
979 		panic("Attempt to double activation of resource id: %u\n",
980 		    res_id);
981 	intr_map_copy_map_data(res_id, &map_dev, &map_xref, &data);
982 	error = intr_resolve_irq(map_dev, map_xref, data, &isrc);
983 	if (error != 0) {
984 		free(data, M_INTRNG);
985 		/* XXX TODO DISCONECTED PICs */
986 		/* if (error == EINVAL) return(0); */
987 		return (error);
988 	}
989 	intr_map_set_isrc(res_id, isrc);
990 	rman_set_virtual(res, data);
991 	return (PIC_ACTIVATE_INTR(isrc->isrc_dev, isrc, res, data));
992 }
993 
994 int
995 intr_deactivate_irq(device_t dev, struct resource *res)
996 {
997 	struct intr_map_data *data;
998 	struct intr_irqsrc *isrc;
999 	u_int res_id;
1000 	int error;
1001 
1002 	KASSERT(rman_get_start(res) == rman_get_end(res),
1003 	    ("%s: more interrupts in resource", __func__));
1004 
1005 	res_id = (u_int)rman_get_start(res);
1006 	isrc = intr_map_get_isrc(res_id);
1007 	if (isrc == NULL)
1008 		panic("Attempt to deactivate non-active resource id: %u\n",
1009 		    res_id);
1010 
1011 	data = rman_get_virtual(res);
1012 	error = PIC_DEACTIVATE_INTR(isrc->isrc_dev, isrc, res, data);
1013 	intr_map_set_isrc(res_id, NULL);
1014 	rman_set_virtual(res, NULL);
1015 	free(data, M_INTRNG);
1016 	return (error);
1017 }
1018 
1019 int
1020 intr_setup_irq(device_t dev, struct resource *res, driver_filter_t filt,
1021     driver_intr_t hand, void *arg, int flags, void **cookiep)
1022 {
1023 	int error;
1024 	struct intr_map_data *data;
1025 	struct intr_irqsrc *isrc;
1026 	const char *name;
1027 	u_int res_id;
1028 
1029 	KASSERT(rman_get_start(res) == rman_get_end(res),
1030 	    ("%s: more interrupts in resource", __func__));
1031 
1032 	res_id = (u_int)rman_get_start(res);
1033 	isrc = intr_map_get_isrc(res_id);
1034 	if (isrc == NULL) {
1035 		/* XXX TODO DISCONECTED PICs */
1036 		return (EINVAL);
1037 	}
1038 
1039 	data = rman_get_virtual(res);
1040 	name = device_get_nameunit(dev);
1041 
1042 #ifdef INTR_SOLO
1043 	/*
1044 	 * Standard handling is done through MI interrupt framework. However,
1045 	 * some interrupts could request solely own special handling. This
1046 	 * non standard handling can be used for interrupt controllers without
1047 	 * handler (filter only), so in case that interrupt controllers are
1048 	 * chained, MI interrupt framework is called only in leaf controller.
1049 	 *
1050 	 * Note that root interrupt controller routine is served as well,
1051 	 * however in intr_irq_handler(), i.e. main system dispatch routine.
1052 	 */
1053 	if (flags & INTR_SOLO && hand != NULL) {
1054 		debugf("irq %u cannot solo on %s\n", irq, name);
1055 		return (EINVAL);
1056 	}
1057 
1058 	if (flags & INTR_SOLO) {
1059 		error = iscr_setup_filter(isrc, name, (intr_irq_filter_t *)filt,
1060 		    arg, cookiep);
1061 		debugf("irq %u setup filter error %d on %s\n", isrc->isrc_irq, error,
1062 		    name);
1063 	} else
1064 #endif
1065 		{
1066 		error = isrc_add_handler(isrc, name, filt, hand, arg, flags,
1067 		    cookiep);
1068 		debugf("irq %u add handler error %d on %s\n", isrc->isrc_irq, error, name);
1069 	}
1070 	if (error != 0)
1071 		return (error);
1072 
1073 	mtx_lock(&isrc_table_lock);
1074 	error = PIC_SETUP_INTR(isrc->isrc_dev, isrc, res, data);
1075 	if (error == 0) {
1076 		isrc->isrc_handlers++;
1077 		if (isrc->isrc_handlers == 1)
1078 			PIC_ENABLE_INTR(isrc->isrc_dev, isrc);
1079 	}
1080 	mtx_unlock(&isrc_table_lock);
1081 	if (error != 0)
1082 		intr_event_remove_handler(*cookiep);
1083 	return (error);
1084 }
1085 
1086 int
1087 intr_teardown_irq(device_t dev, struct resource *res, void *cookie)
1088 {
1089 	int error;
1090 	struct intr_map_data *data;
1091 	struct intr_irqsrc *isrc;
1092 	u_int res_id;
1093 
1094 	KASSERT(rman_get_start(res) == rman_get_end(res),
1095 	    ("%s: more interrupts in resource", __func__));
1096 
1097 	res_id = (u_int)rman_get_start(res);
1098 	isrc = intr_map_get_isrc(res_id);
1099 	if (isrc == NULL || isrc->isrc_handlers == 0)
1100 		return (EINVAL);
1101 
1102 	data = rman_get_virtual(res);
1103 
1104 #ifdef INTR_SOLO
1105 	if (isrc->isrc_filter != NULL) {
1106 		if (isrc != cookie)
1107 			return (EINVAL);
1108 
1109 		mtx_lock(&isrc_table_lock);
1110 		isrc->isrc_filter = NULL;
1111 		isrc->isrc_arg = NULL;
1112 		isrc->isrc_handlers = 0;
1113 		PIC_DISABLE_INTR(isrc->isrc_dev, isrc);
1114 		PIC_TEARDOWN_INTR(isrc->isrc_dev, isrc, res, data);
1115 		isrc_update_name(isrc, NULL);
1116 		mtx_unlock(&isrc_table_lock);
1117 		return (0);
1118 	}
1119 #endif
1120 	if (isrc != intr_handler_source(cookie))
1121 		return (EINVAL);
1122 
1123 	error = intr_event_remove_handler(cookie);
1124 	if (error == 0) {
1125 		mtx_lock(&isrc_table_lock);
1126 		isrc->isrc_handlers--;
1127 		if (isrc->isrc_handlers == 0)
1128 			PIC_DISABLE_INTR(isrc->isrc_dev, isrc);
1129 		PIC_TEARDOWN_INTR(isrc->isrc_dev, isrc, res, data);
1130 		intrcnt_updatename(isrc);
1131 		mtx_unlock(&isrc_table_lock);
1132 	}
1133 	return (error);
1134 }
1135 
1136 int
1137 intr_describe_irq(device_t dev, struct resource *res, void *cookie,
1138     const char *descr)
1139 {
1140 	int error;
1141 	struct intr_irqsrc *isrc;
1142 	u_int res_id;
1143 
1144 	KASSERT(rman_get_start(res) == rman_get_end(res),
1145 	    ("%s: more interrupts in resource", __func__));
1146 
1147 	res_id = (u_int)rman_get_start(res);
1148 	isrc = intr_map_get_isrc(res_id);
1149 	if (isrc == NULL || isrc->isrc_handlers == 0)
1150 		return (EINVAL);
1151 #ifdef INTR_SOLO
1152 	if (isrc->isrc_filter != NULL) {
1153 		if (isrc != cookie)
1154 			return (EINVAL);
1155 
1156 		mtx_lock(&isrc_table_lock);
1157 		isrc_update_name(isrc, descr);
1158 		mtx_unlock(&isrc_table_lock);
1159 		return (0);
1160 	}
1161 #endif
1162 	error = intr_event_describe_handler(isrc->isrc_event, cookie, descr);
1163 	if (error == 0) {
1164 		mtx_lock(&isrc_table_lock);
1165 		intrcnt_updatename(isrc);
1166 		mtx_unlock(&isrc_table_lock);
1167 	}
1168 	return (error);
1169 }
1170 
1171 #ifdef SMP
1172 int
1173 intr_bind_irq(device_t dev, struct resource *res, int cpu)
1174 {
1175 	struct intr_irqsrc *isrc;
1176 	u_int res_id;
1177 
1178 	KASSERT(rman_get_start(res) == rman_get_end(res),
1179 	    ("%s: more interrupts in resource", __func__));
1180 
1181 	res_id = (u_int)rman_get_start(res);
1182 	isrc = intr_map_get_isrc(res_id);
1183 	if (isrc == NULL || isrc->isrc_handlers == 0)
1184 		return (EINVAL);
1185 #ifdef INTR_SOLO
1186 	if (isrc->isrc_filter != NULL)
1187 		return (intr_isrc_assign_cpu(isrc, cpu));
1188 #endif
1189 	return (intr_event_bind(isrc->isrc_event, cpu));
1190 }
1191 
1192 /*
1193  * Return the CPU that the next interrupt source should use.
1194  * For now just returns the next CPU according to round-robin.
1195  */
1196 u_int
1197 intr_irq_next_cpu(u_int last_cpu, cpuset_t *cpumask)
1198 {
1199 	u_int cpu;
1200 
1201 	KASSERT(!CPU_EMPTY(cpumask), ("%s: Empty CPU mask", __func__));
1202 	if (!irq_assign_cpu || mp_ncpus == 1) {
1203 		cpu = PCPU_GET(cpuid);
1204 
1205 		if (CPU_ISSET(cpu, cpumask))
1206 			return (curcpu);
1207 
1208 		return (CPU_FFS(cpumask) - 1);
1209 	}
1210 
1211 	do {
1212 		last_cpu++;
1213 		if (last_cpu > mp_maxid)
1214 			last_cpu = 0;
1215 	} while (!CPU_ISSET(last_cpu, cpumask));
1216 	return (last_cpu);
1217 }
1218 
1219 #ifndef EARLY_AP_STARTUP
1220 /*
1221  *  Distribute all the interrupt sources among the available
1222  *  CPUs once the AP's have been launched.
1223  */
1224 static void
1225 intr_irq_shuffle(void *arg __unused)
1226 {
1227 	struct intr_irqsrc *isrc;
1228 	u_int i;
1229 
1230 	if (mp_ncpus == 1)
1231 		return;
1232 
1233 	mtx_lock(&isrc_table_lock);
1234 	irq_assign_cpu = true;
1235 	for (i = 0; i < NIRQ; i++) {
1236 		isrc = irq_sources[i];
1237 		if (isrc == NULL || isrc->isrc_handlers == 0 ||
1238 		    isrc->isrc_flags & (INTR_ISRCF_PPI | INTR_ISRCF_IPI))
1239 			continue;
1240 
1241 		if (isrc->isrc_event != NULL &&
1242 		    isrc->isrc_flags & INTR_ISRCF_BOUND &&
1243 		    isrc->isrc_event->ie_cpu != CPU_FFS(&isrc->isrc_cpu) - 1)
1244 			panic("%s: CPU inconsistency", __func__);
1245 
1246 		if ((isrc->isrc_flags & INTR_ISRCF_BOUND) == 0)
1247 			CPU_ZERO(&isrc->isrc_cpu); /* start again */
1248 
1249 		/*
1250 		 * We are in wicked position here if the following call fails
1251 		 * for bound ISRC. The best thing we can do is to clear
1252 		 * isrc_cpu so inconsistency with ie_cpu will be detectable.
1253 		 */
1254 		if (PIC_BIND_INTR(isrc->isrc_dev, isrc) != 0)
1255 			CPU_ZERO(&isrc->isrc_cpu);
1256 	}
1257 	mtx_unlock(&isrc_table_lock);
1258 }
1259 SYSINIT(intr_irq_shuffle, SI_SUB_SMP, SI_ORDER_SECOND, intr_irq_shuffle, NULL);
1260 #endif /* !EARLY_AP_STARTUP */
1261 
1262 #else
1263 u_int
1264 intr_irq_next_cpu(u_int current_cpu, cpuset_t *cpumask)
1265 {
1266 
1267 	return (PCPU_GET(cpuid));
1268 }
1269 #endif /* SMP */
1270 
1271 /*
1272  * Allocate memory for new intr_map_data structure.
1273  * Initialize common fields.
1274  */
1275 struct intr_map_data *
1276 intr_alloc_map_data(enum intr_map_data_type type, size_t len, int flags)
1277 {
1278 	struct intr_map_data *data;
1279 
1280 	data = malloc(len, M_INTRNG, flags);
1281 	data->type = type;
1282 	data->len = len;
1283 	return (data);
1284 }
1285 
1286 void intr_free_intr_map_data(struct intr_map_data *data)
1287 {
1288 
1289 	free(data, M_INTRNG);
1290 }
1291 
1292 /*
1293  *  Register a MSI/MSI-X interrupt controller
1294  */
1295 int
1296 intr_msi_register(device_t dev, intptr_t xref)
1297 {
1298 	struct intr_pic *pic;
1299 
1300 	if (dev == NULL)
1301 		return (EINVAL);
1302 	pic = pic_create(dev, xref, FLAG_MSI);
1303 	if (pic == NULL)
1304 		return (ENOMEM);
1305 
1306 	debugf("PIC %p registered for %s <dev %p, xref %jx>\n", pic,
1307 	    device_get_nameunit(dev), dev, (uintmax_t)xref);
1308 	return (0);
1309 }
1310 
1311 int
1312 intr_alloc_msi(device_t pci, device_t child, intptr_t xref, int count,
1313     int maxcount, int *irqs)
1314 {
1315 	struct iommu_domain *domain;
1316 	struct intr_irqsrc **isrc;
1317 	struct intr_pic *pic;
1318 	device_t pdev;
1319 	struct intr_map_data_msi *msi;
1320 	int err, i;
1321 
1322 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1323 	if (pic == NULL)
1324 		return (ESRCH);
1325 
1326 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1327 	    ("%s: Found a non-MSI controller: %s", __func__,
1328 	     device_get_name(pic->pic_dev)));
1329 
1330 	/*
1331 	 * If this is the first time we have used this context ask the
1332 	 * interrupt controller to map memory the msi source will need.
1333 	 */
1334 	err = MSI_IOMMU_INIT(pic->pic_dev, child, &domain);
1335 	if (err != 0)
1336 		return (err);
1337 
1338 	isrc = malloc(sizeof(*isrc) * count, M_INTRNG, M_WAITOK);
1339 	err = MSI_ALLOC_MSI(pic->pic_dev, child, count, maxcount, &pdev, isrc);
1340 	if (err != 0) {
1341 		free(isrc, M_INTRNG);
1342 		return (err);
1343 	}
1344 
1345 	for (i = 0; i < count; i++) {
1346 		isrc[i]->isrc_iommu = domain;
1347 		msi = (struct intr_map_data_msi *)intr_alloc_map_data(
1348 		    INTR_MAP_DATA_MSI, sizeof(*msi), M_WAITOK | M_ZERO);
1349 		msi-> isrc = isrc[i];
1350 
1351 		irqs[i] = intr_map_irq(pic->pic_dev, xref,
1352 		    (struct intr_map_data *)msi);
1353 	}
1354 	free(isrc, M_INTRNG);
1355 
1356 	return (err);
1357 }
1358 
1359 int
1360 intr_release_msi(device_t pci, device_t child, intptr_t xref, int count,
1361     int *irqs)
1362 {
1363 	struct intr_irqsrc **isrc;
1364 	struct intr_pic *pic;
1365 	struct intr_map_data_msi *msi;
1366 	int i, err;
1367 
1368 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1369 	if (pic == NULL)
1370 		return (ESRCH);
1371 
1372 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1373 	    ("%s: Found a non-MSI controller: %s", __func__,
1374 	     device_get_name(pic->pic_dev)));
1375 
1376 	isrc = malloc(sizeof(*isrc) * count, M_INTRNG, M_WAITOK);
1377 
1378 	for (i = 0; i < count; i++) {
1379 		msi = (struct intr_map_data_msi *)
1380 		    intr_map_get_map_data(irqs[i]);
1381 		KASSERT(msi->hdr.type == INTR_MAP_DATA_MSI,
1382 		    ("%s: irq %d map data is not MSI", __func__,
1383 		    irqs[i]));
1384 		isrc[i] = msi->isrc;
1385 	}
1386 
1387 	MSI_IOMMU_DEINIT(pic->pic_dev, child);
1388 
1389 	err = MSI_RELEASE_MSI(pic->pic_dev, child, count, isrc);
1390 
1391 	for (i = 0; i < count; i++) {
1392 		if (isrc[i] != NULL)
1393 			intr_unmap_irq(irqs[i]);
1394 	}
1395 
1396 	free(isrc, M_INTRNG);
1397 	return (err);
1398 }
1399 
1400 int
1401 intr_alloc_msix(device_t pci, device_t child, intptr_t xref, int *irq)
1402 {
1403 	struct iommu_domain *domain;
1404 	struct intr_irqsrc *isrc;
1405 	struct intr_pic *pic;
1406 	device_t pdev;
1407 	struct intr_map_data_msi *msi;
1408 	int err;
1409 
1410 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1411 	if (pic == NULL)
1412 		return (ESRCH);
1413 
1414 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1415 	    ("%s: Found a non-MSI controller: %s", __func__,
1416 	     device_get_name(pic->pic_dev)));
1417 
1418 	/*
1419 	 * If this is the first time we have used this context ask the
1420 	 * interrupt controller to map memory the msi source will need.
1421 	 */
1422 	err = MSI_IOMMU_INIT(pic->pic_dev, child, &domain);
1423 	if (err != 0)
1424 		return (err);
1425 
1426 	err = MSI_ALLOC_MSIX(pic->pic_dev, child, &pdev, &isrc);
1427 	if (err != 0)
1428 		return (err);
1429 
1430 	isrc->isrc_iommu = domain;
1431 	msi = (struct intr_map_data_msi *)intr_alloc_map_data(
1432 		    INTR_MAP_DATA_MSI, sizeof(*msi), M_WAITOK | M_ZERO);
1433 	msi->isrc = isrc;
1434 	*irq = intr_map_irq(pic->pic_dev, xref, (struct intr_map_data *)msi);
1435 	return (0);
1436 }
1437 
1438 int
1439 intr_release_msix(device_t pci, device_t child, intptr_t xref, int irq)
1440 {
1441 	struct intr_irqsrc *isrc;
1442 	struct intr_pic *pic;
1443 	struct intr_map_data_msi *msi;
1444 	int err;
1445 
1446 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1447 	if (pic == NULL)
1448 		return (ESRCH);
1449 
1450 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1451 	    ("%s: Found a non-MSI controller: %s", __func__,
1452 	     device_get_name(pic->pic_dev)));
1453 
1454 	msi = (struct intr_map_data_msi *)
1455 	    intr_map_get_map_data(irq);
1456 	KASSERT(msi->hdr.type == INTR_MAP_DATA_MSI,
1457 	    ("%s: irq %d map data is not MSI", __func__,
1458 	    irq));
1459 	isrc = msi->isrc;
1460 	if (isrc == NULL) {
1461 		intr_unmap_irq(irq);
1462 		return (EINVAL);
1463 	}
1464 
1465 	MSI_IOMMU_DEINIT(pic->pic_dev, child);
1466 
1467 	err = MSI_RELEASE_MSIX(pic->pic_dev, child, isrc);
1468 	intr_unmap_irq(irq);
1469 
1470 	return (err);
1471 }
1472 
1473 int
1474 intr_map_msi(device_t pci, device_t child, intptr_t xref, int irq,
1475     uint64_t *addr, uint32_t *data)
1476 {
1477 	struct intr_irqsrc *isrc;
1478 	struct intr_pic *pic;
1479 	int err;
1480 
1481 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1482 	if (pic == NULL)
1483 		return (ESRCH);
1484 
1485 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1486 	    ("%s: Found a non-MSI controller: %s", __func__,
1487 	     device_get_name(pic->pic_dev)));
1488 
1489 	isrc = intr_map_get_isrc(irq);
1490 	if (isrc == NULL)
1491 		return (EINVAL);
1492 
1493 	err = MSI_MAP_MSI(pic->pic_dev, child, isrc, addr, data);
1494 
1495 #ifdef IOMMU
1496 	if (isrc->isrc_iommu != NULL)
1497 		iommu_translate_msi(isrc->isrc_iommu, addr);
1498 #endif
1499 
1500 	return (err);
1501 }
1502 
1503 void dosoftints(void);
1504 void
1505 dosoftints(void)
1506 {
1507 }
1508 
1509 #ifdef SMP
1510 /*
1511  *  Init interrupt controller on another CPU.
1512  */
1513 void
1514 intr_pic_init_secondary(void)
1515 {
1516 
1517 	/*
1518 	 * QQQ: Only root PIC is aware of other CPUs ???
1519 	 */
1520 	KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__));
1521 
1522 	//mtx_lock(&isrc_table_lock);
1523 	PIC_INIT_SECONDARY(intr_irq_root_dev);
1524 	//mtx_unlock(&isrc_table_lock);
1525 }
1526 #endif
1527 
1528 #ifdef DDB
1529 DB_SHOW_COMMAND(irqs, db_show_irqs)
1530 {
1531 	u_int i, irqsum;
1532 	u_long num;
1533 	struct intr_irqsrc *isrc;
1534 
1535 	for (irqsum = 0, i = 0; i < NIRQ; i++) {
1536 		isrc = irq_sources[i];
1537 		if (isrc == NULL)
1538 			continue;
1539 
1540 		num = isrc->isrc_count != NULL ? isrc->isrc_count[0] : 0;
1541 		db_printf("irq%-3u <%s>: cpu %02lx%s cnt %lu\n", i,
1542 		    isrc->isrc_name, isrc->isrc_cpu.__bits[0],
1543 		    isrc->isrc_flags & INTR_ISRCF_BOUND ? " (bound)" : "", num);
1544 		irqsum += num;
1545 	}
1546 	db_printf("irq total %u\n", irqsum);
1547 }
1548 #endif
1549 
1550 /*
1551  * Interrupt mapping table functions.
1552  *
1553  * Please, keep this part separately, it can be transformed to
1554  * extension of standard resources.
1555  */
1556 struct intr_map_entry
1557 {
1558 	device_t 		dev;
1559 	intptr_t 		xref;
1560 	struct intr_map_data 	*map_data;
1561 	struct intr_irqsrc 	*isrc;
1562 	/* XXX TODO DISCONECTED PICs */
1563 	/*int			flags */
1564 };
1565 
1566 /* XXX Convert irq_map[] to dynamicaly expandable one. */
1567 static struct intr_map_entry *irq_map[2 * NIRQ];
1568 static int irq_map_count = nitems(irq_map);
1569 static int irq_map_first_free_idx;
1570 static struct mtx irq_map_lock;
1571 
1572 static struct intr_irqsrc *
1573 intr_map_get_isrc(u_int res_id)
1574 {
1575 	struct intr_irqsrc *isrc;
1576 
1577 	isrc = NULL;
1578 	mtx_lock(&irq_map_lock);
1579 	if (res_id < irq_map_count && irq_map[res_id] != NULL)
1580 		isrc = irq_map[res_id]->isrc;
1581 	mtx_unlock(&irq_map_lock);
1582 
1583 	return (isrc);
1584 }
1585 
1586 static void
1587 intr_map_set_isrc(u_int res_id, struct intr_irqsrc *isrc)
1588 {
1589 
1590 	mtx_lock(&irq_map_lock);
1591 	if (res_id < irq_map_count && irq_map[res_id] != NULL)
1592 		irq_map[res_id]->isrc = isrc;
1593 	mtx_unlock(&irq_map_lock);
1594 }
1595 
1596 /*
1597  * Get a copy of intr_map_entry data
1598  */
1599 static struct intr_map_data *
1600 intr_map_get_map_data(u_int res_id)
1601 {
1602 	struct intr_map_data *data;
1603 
1604 	data = NULL;
1605 	mtx_lock(&irq_map_lock);
1606 	if (res_id >= irq_map_count || irq_map[res_id] == NULL)
1607 		panic("Attempt to copy invalid resource id: %u\n", res_id);
1608 	data = irq_map[res_id]->map_data;
1609 	mtx_unlock(&irq_map_lock);
1610 
1611 	return (data);
1612 }
1613 
1614 /*
1615  * Get a copy of intr_map_entry data
1616  */
1617 static void
1618 intr_map_copy_map_data(u_int res_id, device_t *map_dev, intptr_t *map_xref,
1619     struct intr_map_data **data)
1620 {
1621 	size_t len;
1622 
1623 	len = 0;
1624 	mtx_lock(&irq_map_lock);
1625 	if (res_id >= irq_map_count || irq_map[res_id] == NULL)
1626 		panic("Attempt to copy invalid resource id: %u\n", res_id);
1627 	if (irq_map[res_id]->map_data != NULL)
1628 		len = irq_map[res_id]->map_data->len;
1629 	mtx_unlock(&irq_map_lock);
1630 
1631 	if (len == 0)
1632 		*data = NULL;
1633 	else
1634 		*data = malloc(len, M_INTRNG, M_WAITOK | M_ZERO);
1635 	mtx_lock(&irq_map_lock);
1636 	if (irq_map[res_id] == NULL)
1637 		panic("Attempt to copy invalid resource id: %u\n", res_id);
1638 	if (len != 0) {
1639 		if (len != irq_map[res_id]->map_data->len)
1640 			panic("Resource id: %u has changed.\n", res_id);
1641 		memcpy(*data, irq_map[res_id]->map_data, len);
1642 	}
1643 	*map_dev = irq_map[res_id]->dev;
1644 	*map_xref = irq_map[res_id]->xref;
1645 	mtx_unlock(&irq_map_lock);
1646 }
1647 
1648 /*
1649  * Allocate and fill new entry in irq_map table.
1650  */
1651 u_int
1652 intr_map_irq(device_t dev, intptr_t xref, struct intr_map_data *data)
1653 {
1654 	u_int i;
1655 	struct intr_map_entry *entry;
1656 
1657 	/* Prepare new entry first. */
1658 	entry = malloc(sizeof(*entry), M_INTRNG, M_WAITOK | M_ZERO);
1659 
1660 	entry->dev = dev;
1661 	entry->xref = xref;
1662 	entry->map_data = data;
1663 	entry->isrc = NULL;
1664 
1665 	mtx_lock(&irq_map_lock);
1666 	for (i = irq_map_first_free_idx; i < irq_map_count; i++) {
1667 		if (irq_map[i] == NULL) {
1668 			irq_map[i] = entry;
1669 			irq_map_first_free_idx = i + 1;
1670 			mtx_unlock(&irq_map_lock);
1671 			return (i);
1672 		}
1673 	}
1674 	mtx_unlock(&irq_map_lock);
1675 
1676 	/* XXX Expand irq_map table */
1677 	panic("IRQ mapping table is full.");
1678 }
1679 
1680 /*
1681  * Remove and free mapping entry.
1682  */
1683 void
1684 intr_unmap_irq(u_int res_id)
1685 {
1686 	struct intr_map_entry *entry;
1687 
1688 	mtx_lock(&irq_map_lock);
1689 	if ((res_id >= irq_map_count) || (irq_map[res_id] == NULL))
1690 		panic("Attempt to unmap invalid resource id: %u\n", res_id);
1691 	entry = irq_map[res_id];
1692 	irq_map[res_id] = NULL;
1693 	irq_map_first_free_idx = res_id;
1694 	mtx_unlock(&irq_map_lock);
1695 	intr_free_intr_map_data(entry->map_data);
1696 	free(entry, M_INTRNG);
1697 }
1698 
1699 /*
1700  * Clone mapping entry.
1701  */
1702 u_int
1703 intr_map_clone_irq(u_int old_res_id)
1704 {
1705 	device_t map_dev;
1706 	intptr_t map_xref;
1707 	struct intr_map_data *data;
1708 
1709 	intr_map_copy_map_data(old_res_id, &map_dev, &map_xref, &data);
1710 	return (intr_map_irq(map_dev, map_xref, data));
1711 }
1712 
1713 static void
1714 intr_map_init(void *dummy __unused)
1715 {
1716 
1717 	mtx_init(&irq_map_lock, "intr map table", NULL, MTX_DEF);
1718 }
1719 SYSINIT(intr_map_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_map_init, NULL);
1720