xref: /freebsd/sys/kern/subr_intr.c (revision 54c1a65736ec012b583ade1d53c477e182c574e4)
1 /*-
2  * Copyright (c) 2015-2016 Svatopluk Kraus
3  * Copyright (c) 2015-2016 Michal Meloun
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 /*
32  *	New-style Interrupt Framework
33  *
34  *  TODO: - add support for disconnected PICs.
35  *        - to support IPI (PPI) enabling on other CPUs if already started.
36  *        - to complete things for removable PICs.
37  */
38 
39 #include "opt_ddb.h"
40 #include "opt_hwpmc_hooks.h"
41 #include "opt_iommu.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/kernel.h>
46 #include <sys/lock.h>
47 #include <sys/mutex.h>
48 #include <sys/syslog.h>
49 #include <sys/malloc.h>
50 #include <sys/proc.h>
51 #include <sys/queue.h>
52 #include <sys/bus.h>
53 #include <sys/interrupt.h>
54 #include <sys/taskqueue.h>
55 #include <sys/tree.h>
56 #include <sys/conf.h>
57 #include <sys/cpuset.h>
58 #include <sys/rman.h>
59 #include <sys/sched.h>
60 #include <sys/smp.h>
61 #include <sys/sysctl.h>
62 #include <sys/vmmeter.h>
63 #ifdef HWPMC_HOOKS
64 #include <sys/pmckern.h>
65 #endif
66 
67 #include <machine/atomic.h>
68 #include <machine/intr.h>
69 #include <machine/cpu.h>
70 #include <machine/smp.h>
71 #include <machine/stdarg.h>
72 
73 #ifdef DDB
74 #include <ddb/ddb.h>
75 #endif
76 
77 #ifdef IOMMU
78 #include <dev/iommu/iommu_msi.h>
79 #endif
80 
81 #include "pic_if.h"
82 #include "msi_if.h"
83 
84 #define	INTRNAME_LEN	(2*MAXCOMLEN + 1)
85 
86 #ifdef DEBUG
87 #define debugf(fmt, args...) do { printf("%s(): ", __func__);	\
88     printf(fmt,##args); } while (0)
89 #else
90 #define debugf(fmt, args...)
91 #endif
92 
93 MALLOC_DECLARE(M_INTRNG);
94 MALLOC_DEFINE(M_INTRNG, "intr", "intr interrupt handling");
95 
96 /* Main interrupt handler called from assembler -> 'hidden' for C code. */
97 void intr_irq_handler(struct trapframe *tf);
98 
99 /* Root interrupt controller stuff. */
100 device_t intr_irq_root_dev;
101 static intr_irq_filter_t *irq_root_filter;
102 static void *irq_root_arg;
103 static u_int irq_root_ipicount;
104 
105 struct intr_pic_child {
106 	SLIST_ENTRY(intr_pic_child)	 pc_next;
107 	struct intr_pic			*pc_pic;
108 	intr_child_irq_filter_t		*pc_filter;
109 	void				*pc_filter_arg;
110 	uintptr_t			 pc_start;
111 	uintptr_t			 pc_length;
112 };
113 
114 /* Interrupt controller definition. */
115 struct intr_pic {
116 	SLIST_ENTRY(intr_pic)	pic_next;
117 	intptr_t		pic_xref;	/* hardware identification */
118 	device_t		pic_dev;
119 /* Only one of FLAG_PIC or FLAG_MSI may be set */
120 #define	FLAG_PIC	(1 << 0)
121 #define	FLAG_MSI	(1 << 1)
122 #define	FLAG_TYPE_MASK	(FLAG_PIC | FLAG_MSI)
123 	u_int			pic_flags;
124 	struct mtx		pic_child_lock;
125 	SLIST_HEAD(, intr_pic_child) pic_children;
126 };
127 
128 static struct mtx pic_list_lock;
129 static SLIST_HEAD(, intr_pic) pic_list;
130 
131 static struct intr_pic *pic_lookup(device_t dev, intptr_t xref, int flags);
132 
133 /* Interrupt source definition. */
134 static struct mtx isrc_table_lock;
135 static struct intr_irqsrc **irq_sources;
136 u_int irq_next_free;
137 
138 #ifdef SMP
139 #ifdef EARLY_AP_STARTUP
140 static bool irq_assign_cpu = true;
141 #else
142 static bool irq_assign_cpu = false;
143 #endif
144 #endif
145 
146 u_int intr_nirq = NIRQ;
147 SYSCTL_UINT(_machdep, OID_AUTO, nirq, CTLFLAG_RDTUN, &intr_nirq, 0,
148     "Number of IRQs");
149 
150 /* Data for MI statistics reporting. */
151 u_long *intrcnt;
152 char *intrnames;
153 size_t sintrcnt;
154 size_t sintrnames;
155 static u_int intrcnt_index;
156 
157 static struct intr_irqsrc *intr_map_get_isrc(u_int res_id);
158 static void intr_map_set_isrc(u_int res_id, struct intr_irqsrc *isrc);
159 static struct intr_map_data * intr_map_get_map_data(u_int res_id);
160 static void intr_map_copy_map_data(u_int res_id, device_t *dev, intptr_t *xref,
161     struct intr_map_data **data);
162 
163 /*
164  *  Interrupt framework initialization routine.
165  */
166 static void
167 intr_irq_init(void *dummy __unused)
168 {
169 	u_int intrcnt_count;
170 
171 	SLIST_INIT(&pic_list);
172 	mtx_init(&pic_list_lock, "intr pic list", NULL, MTX_DEF);
173 
174 	mtx_init(&isrc_table_lock, "intr isrc table", NULL, MTX_DEF);
175 
176 	/*
177 	 * - 2 counters for each I/O interrupt.
178 	 * - MAXCPU counters for each IPI counters for SMP.
179 	 */
180 	intrcnt_count = intr_nirq * 2;
181 #ifdef SMP
182 	intrcnt_count += INTR_IPI_COUNT * MAXCPU;
183 #endif
184 
185 	intrcnt = mallocarray(intrcnt_count, sizeof(u_long), M_INTRNG,
186 	    M_WAITOK | M_ZERO);
187 	intrnames = mallocarray(intrcnt_count, INTRNAME_LEN, M_INTRNG,
188 	    M_WAITOK | M_ZERO);
189 	sintrcnt = intrcnt_count * sizeof(u_long);
190 	sintrnames = intrcnt_count * INTRNAME_LEN;
191 	irq_sources = mallocarray(intr_nirq, sizeof(struct intr_irqsrc*),
192 	    M_INTRNG, M_WAITOK | M_ZERO);
193 }
194 SYSINIT(intr_irq_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_irq_init, NULL);
195 
196 static void
197 intrcnt_setname(const char *name, int index)
198 {
199 
200 	snprintf(intrnames + INTRNAME_LEN * index, INTRNAME_LEN, "%-*s",
201 	    INTRNAME_LEN - 1, name);
202 }
203 
204 /*
205  *  Update name for interrupt source with interrupt event.
206  */
207 static void
208 intrcnt_updatename(struct intr_irqsrc *isrc)
209 {
210 
211 	/* QQQ: What about stray counter name? */
212 	mtx_assert(&isrc_table_lock, MA_OWNED);
213 	intrcnt_setname(isrc->isrc_event->ie_fullname, isrc->isrc_index);
214 }
215 
216 /*
217  *  Virtualization for interrupt source interrupt counter increment.
218  */
219 static inline void
220 isrc_increment_count(struct intr_irqsrc *isrc)
221 {
222 
223 	if (isrc->isrc_flags & INTR_ISRCF_PPI)
224 		atomic_add_long(&isrc->isrc_count[0], 1);
225 	else
226 		isrc->isrc_count[0]++;
227 }
228 
229 /*
230  *  Virtualization for interrupt source interrupt stray counter increment.
231  */
232 static inline void
233 isrc_increment_straycount(struct intr_irqsrc *isrc)
234 {
235 
236 	isrc->isrc_count[1]++;
237 }
238 
239 /*
240  *  Virtualization for interrupt source interrupt name update.
241  */
242 static void
243 isrc_update_name(struct intr_irqsrc *isrc, const char *name)
244 {
245 	char str[INTRNAME_LEN];
246 
247 	mtx_assert(&isrc_table_lock, MA_OWNED);
248 
249 	if (name != NULL) {
250 		snprintf(str, INTRNAME_LEN, "%s: %s", isrc->isrc_name, name);
251 		intrcnt_setname(str, isrc->isrc_index);
252 		snprintf(str, INTRNAME_LEN, "stray %s: %s", isrc->isrc_name,
253 		    name);
254 		intrcnt_setname(str, isrc->isrc_index + 1);
255 	} else {
256 		snprintf(str, INTRNAME_LEN, "%s:", isrc->isrc_name);
257 		intrcnt_setname(str, isrc->isrc_index);
258 		snprintf(str, INTRNAME_LEN, "stray %s:", isrc->isrc_name);
259 		intrcnt_setname(str, isrc->isrc_index + 1);
260 	}
261 }
262 
263 /*
264  *  Virtualization for interrupt source interrupt counters setup.
265  */
266 static void
267 isrc_setup_counters(struct intr_irqsrc *isrc)
268 {
269 	u_int index;
270 
271 	/*
272 	 *  XXX - it does not work well with removable controllers and
273 	 *        interrupt sources !!!
274 	 */
275 	index = atomic_fetchadd_int(&intrcnt_index, 2);
276 	isrc->isrc_index = index;
277 	isrc->isrc_count = &intrcnt[index];
278 	isrc_update_name(isrc, NULL);
279 }
280 
281 /*
282  *  Virtualization for interrupt source interrupt counters release.
283  */
284 static void
285 isrc_release_counters(struct intr_irqsrc *isrc)
286 {
287 
288 	panic("%s: not implemented", __func__);
289 }
290 
291 #ifdef SMP
292 /*
293  *  Virtualization for interrupt source IPI counters setup.
294  */
295 u_long *
296 intr_ipi_setup_counters(const char *name)
297 {
298 	u_int index, i;
299 	char str[INTRNAME_LEN];
300 
301 	index = atomic_fetchadd_int(&intrcnt_index, MAXCPU);
302 	for (i = 0; i < MAXCPU; i++) {
303 		snprintf(str, INTRNAME_LEN, "cpu%d:%s", i, name);
304 		intrcnt_setname(str, index + i);
305 	}
306 	return (&intrcnt[index]);
307 }
308 #endif
309 
310 /*
311  *  Main interrupt dispatch handler. It's called straight
312  *  from the assembler, where CPU interrupt is served.
313  */
314 void
315 intr_irq_handler(struct trapframe *tf)
316 {
317 	struct trapframe * oldframe;
318 	struct thread * td;
319 
320 	KASSERT(irq_root_filter != NULL, ("%s: no filter", __func__));
321 
322 	VM_CNT_INC(v_intr);
323 	critical_enter();
324 	td = curthread;
325 	oldframe = td->td_intr_frame;
326 	td->td_intr_frame = tf;
327 	irq_root_filter(irq_root_arg);
328 	td->td_intr_frame = oldframe;
329 	critical_exit();
330 #ifdef HWPMC_HOOKS
331 	if (pmc_hook && TRAPF_USERMODE(tf) &&
332 	    (PCPU_GET(curthread)->td_pflags & TDP_CALLCHAIN))
333 		pmc_hook(PCPU_GET(curthread), PMC_FN_USER_CALLCHAIN, tf);
334 #endif
335 }
336 
337 int
338 intr_child_irq_handler(struct intr_pic *parent, uintptr_t irq)
339 {
340 	struct intr_pic_child *child;
341 	bool found;
342 
343 	found = false;
344 	mtx_lock_spin(&parent->pic_child_lock);
345 	SLIST_FOREACH(child, &parent->pic_children, pc_next) {
346 		if (child->pc_start <= irq &&
347 		    irq < (child->pc_start + child->pc_length)) {
348 			found = true;
349 			break;
350 		}
351 	}
352 	mtx_unlock_spin(&parent->pic_child_lock);
353 
354 	if (found)
355 		return (child->pc_filter(child->pc_filter_arg, irq));
356 
357 	return (FILTER_STRAY);
358 }
359 
360 /*
361  *  interrupt controller dispatch function for interrupts. It should
362  *  be called straight from the interrupt controller, when associated interrupt
363  *  source is learned.
364  */
365 int
366 intr_isrc_dispatch(struct intr_irqsrc *isrc, struct trapframe *tf)
367 {
368 
369 	KASSERT(isrc != NULL, ("%s: no source", __func__));
370 
371 	isrc_increment_count(isrc);
372 
373 #ifdef INTR_SOLO
374 	if (isrc->isrc_filter != NULL) {
375 		int error;
376 		error = isrc->isrc_filter(isrc->isrc_arg, tf);
377 		PIC_POST_FILTER(isrc->isrc_dev, isrc);
378 		if (error == FILTER_HANDLED)
379 			return (0);
380 	} else
381 #endif
382 	if (isrc->isrc_event != NULL) {
383 		if (intr_event_handle(isrc->isrc_event, tf) == 0)
384 			return (0);
385 	}
386 
387 	isrc_increment_straycount(isrc);
388 	return (EINVAL);
389 }
390 
391 /*
392  *  Alloc unique interrupt number (resource handle) for interrupt source.
393  *
394  *  There could be various strategies how to allocate free interrupt number
395  *  (resource handle) for new interrupt source.
396  *
397  *  1. Handles are always allocated forward, so handles are not recycled
398  *     immediately. However, if only one free handle left which is reused
399  *     constantly...
400  */
401 static inline int
402 isrc_alloc_irq(struct intr_irqsrc *isrc)
403 {
404 	u_int irq;
405 
406 	mtx_assert(&isrc_table_lock, MA_OWNED);
407 
408 	if (irq_next_free >= intr_nirq)
409 		return (ENOSPC);
410 
411 	for (irq = irq_next_free; irq < intr_nirq; irq++) {
412 		if (irq_sources[irq] == NULL)
413 			goto found;
414 	}
415 	for (irq = 0; irq < irq_next_free; irq++) {
416 		if (irq_sources[irq] == NULL)
417 			goto found;
418 	}
419 
420 	irq_next_free = intr_nirq;
421 	return (ENOSPC);
422 
423 found:
424 	isrc->isrc_irq = irq;
425 	irq_sources[irq] = isrc;
426 
427 	irq_next_free = irq + 1;
428 	if (irq_next_free >= intr_nirq)
429 		irq_next_free = 0;
430 	return (0);
431 }
432 
433 /*
434  *  Free unique interrupt number (resource handle) from interrupt source.
435  */
436 static inline int
437 isrc_free_irq(struct intr_irqsrc *isrc)
438 {
439 
440 	mtx_assert(&isrc_table_lock, MA_OWNED);
441 
442 	if (isrc->isrc_irq >= intr_nirq)
443 		return (EINVAL);
444 	if (irq_sources[isrc->isrc_irq] != isrc)
445 		return (EINVAL);
446 
447 	irq_sources[isrc->isrc_irq] = NULL;
448 	isrc->isrc_irq = INTR_IRQ_INVALID;	/* just to be safe */
449 
450 	/*
451 	 * If we are recovering from the state irq_sources table is full,
452 	 * then the following allocation should check the entire table. This
453 	 * will ensure maximum separation of allocation order from release
454 	 * order.
455 	 */
456 	if (irq_next_free >= intr_nirq)
457 		irq_next_free = 0;
458 
459 	return (0);
460 }
461 
462 /*
463  *  Initialize interrupt source and register it into global interrupt table.
464  */
465 int
466 intr_isrc_register(struct intr_irqsrc *isrc, device_t dev, u_int flags,
467     const char *fmt, ...)
468 {
469 	int error;
470 	va_list ap;
471 
472 	bzero(isrc, sizeof(struct intr_irqsrc));
473 	isrc->isrc_dev = dev;
474 	isrc->isrc_irq = INTR_IRQ_INVALID;	/* just to be safe */
475 	isrc->isrc_flags = flags;
476 
477 	va_start(ap, fmt);
478 	vsnprintf(isrc->isrc_name, INTR_ISRC_NAMELEN, fmt, ap);
479 	va_end(ap);
480 
481 	mtx_lock(&isrc_table_lock);
482 	error = isrc_alloc_irq(isrc);
483 	if (error != 0) {
484 		mtx_unlock(&isrc_table_lock);
485 		return (error);
486 	}
487 	/*
488 	 * Setup interrupt counters, but not for IPI sources. Those are setup
489 	 * later and only for used ones (up to INTR_IPI_COUNT) to not exhaust
490 	 * our counter pool.
491 	 */
492 	if ((isrc->isrc_flags & INTR_ISRCF_IPI) == 0)
493 		isrc_setup_counters(isrc);
494 	mtx_unlock(&isrc_table_lock);
495 	return (0);
496 }
497 
498 /*
499  *  Deregister interrupt source from global interrupt table.
500  */
501 int
502 intr_isrc_deregister(struct intr_irqsrc *isrc)
503 {
504 	int error;
505 
506 	mtx_lock(&isrc_table_lock);
507 	if ((isrc->isrc_flags & INTR_ISRCF_IPI) == 0)
508 		isrc_release_counters(isrc);
509 	error = isrc_free_irq(isrc);
510 	mtx_unlock(&isrc_table_lock);
511 	return (error);
512 }
513 
514 #ifdef SMP
515 /*
516  *  A support function for a PIC to decide if provided ISRC should be inited
517  *  on given cpu. The logic of INTR_ISRCF_BOUND flag and isrc_cpu member of
518  *  struct intr_irqsrc is the following:
519  *
520  *     If INTR_ISRCF_BOUND is set, the ISRC should be inited only on cpus
521  *     set in isrc_cpu. If not, the ISRC should be inited on every cpu and
522  *     isrc_cpu is kept consistent with it. Thus isrc_cpu is always correct.
523  */
524 bool
525 intr_isrc_init_on_cpu(struct intr_irqsrc *isrc, u_int cpu)
526 {
527 
528 	if (isrc->isrc_handlers == 0)
529 		return (false);
530 	if ((isrc->isrc_flags & (INTR_ISRCF_PPI | INTR_ISRCF_IPI)) == 0)
531 		return (false);
532 	if (isrc->isrc_flags & INTR_ISRCF_BOUND)
533 		return (CPU_ISSET(cpu, &isrc->isrc_cpu));
534 
535 	CPU_SET(cpu, &isrc->isrc_cpu);
536 	return (true);
537 }
538 #endif
539 
540 #ifdef INTR_SOLO
541 /*
542  *  Setup filter into interrupt source.
543  */
544 static int
545 iscr_setup_filter(struct intr_irqsrc *isrc, const char *name,
546     intr_irq_filter_t *filter, void *arg, void **cookiep)
547 {
548 
549 	if (filter == NULL)
550 		return (EINVAL);
551 
552 	mtx_lock(&isrc_table_lock);
553 	/*
554 	 * Make sure that we do not mix the two ways
555 	 * how we handle interrupt sources.
556 	 */
557 	if (isrc->isrc_filter != NULL || isrc->isrc_event != NULL) {
558 		mtx_unlock(&isrc_table_lock);
559 		return (EBUSY);
560 	}
561 	isrc->isrc_filter = filter;
562 	isrc->isrc_arg = arg;
563 	isrc_update_name(isrc, name);
564 	mtx_unlock(&isrc_table_lock);
565 
566 	*cookiep = isrc;
567 	return (0);
568 }
569 #endif
570 
571 /*
572  *  Interrupt source pre_ithread method for MI interrupt framework.
573  */
574 static void
575 intr_isrc_pre_ithread(void *arg)
576 {
577 	struct intr_irqsrc *isrc = arg;
578 
579 	PIC_PRE_ITHREAD(isrc->isrc_dev, isrc);
580 }
581 
582 /*
583  *  Interrupt source post_ithread method for MI interrupt framework.
584  */
585 static void
586 intr_isrc_post_ithread(void *arg)
587 {
588 	struct intr_irqsrc *isrc = arg;
589 
590 	PIC_POST_ITHREAD(isrc->isrc_dev, isrc);
591 }
592 
593 /*
594  *  Interrupt source post_filter method for MI interrupt framework.
595  */
596 static void
597 intr_isrc_post_filter(void *arg)
598 {
599 	struct intr_irqsrc *isrc = arg;
600 
601 	PIC_POST_FILTER(isrc->isrc_dev, isrc);
602 }
603 
604 /*
605  *  Interrupt source assign_cpu method for MI interrupt framework.
606  */
607 static int
608 intr_isrc_assign_cpu(void *arg, int cpu)
609 {
610 #ifdef SMP
611 	struct intr_irqsrc *isrc = arg;
612 	int error;
613 
614 	mtx_lock(&isrc_table_lock);
615 	if (cpu == NOCPU) {
616 		CPU_ZERO(&isrc->isrc_cpu);
617 		isrc->isrc_flags &= ~INTR_ISRCF_BOUND;
618 	} else {
619 		CPU_SETOF(cpu, &isrc->isrc_cpu);
620 		isrc->isrc_flags |= INTR_ISRCF_BOUND;
621 	}
622 
623 	/*
624 	 * In NOCPU case, it's up to PIC to either leave ISRC on same CPU or
625 	 * re-balance it to another CPU or enable it on more CPUs. However,
626 	 * PIC is expected to change isrc_cpu appropriately to keep us well
627 	 * informed if the call is successful.
628 	 */
629 	if (irq_assign_cpu) {
630 		error = PIC_BIND_INTR(isrc->isrc_dev, isrc);
631 		if (error) {
632 			CPU_ZERO(&isrc->isrc_cpu);
633 			mtx_unlock(&isrc_table_lock);
634 			return (error);
635 		}
636 	}
637 	mtx_unlock(&isrc_table_lock);
638 	return (0);
639 #else
640 	return (EOPNOTSUPP);
641 #endif
642 }
643 
644 /*
645  *  Create interrupt event for interrupt source.
646  */
647 static int
648 isrc_event_create(struct intr_irqsrc *isrc)
649 {
650 	struct intr_event *ie;
651 	int error;
652 
653 	error = intr_event_create(&ie, isrc, 0, isrc->isrc_irq,
654 	    intr_isrc_pre_ithread, intr_isrc_post_ithread, intr_isrc_post_filter,
655 	    intr_isrc_assign_cpu, "%s:", isrc->isrc_name);
656 	if (error)
657 		return (error);
658 
659 	mtx_lock(&isrc_table_lock);
660 	/*
661 	 * Make sure that we do not mix the two ways
662 	 * how we handle interrupt sources. Let contested event wins.
663 	 */
664 #ifdef INTR_SOLO
665 	if (isrc->isrc_filter != NULL || isrc->isrc_event != NULL) {
666 #else
667 	if (isrc->isrc_event != NULL) {
668 #endif
669 		mtx_unlock(&isrc_table_lock);
670 		intr_event_destroy(ie);
671 		return (isrc->isrc_event != NULL ? EBUSY : 0);
672 	}
673 	isrc->isrc_event = ie;
674 	mtx_unlock(&isrc_table_lock);
675 
676 	return (0);
677 }
678 #ifdef notyet
679 /*
680  *  Destroy interrupt event for interrupt source.
681  */
682 static void
683 isrc_event_destroy(struct intr_irqsrc *isrc)
684 {
685 	struct intr_event *ie;
686 
687 	mtx_lock(&isrc_table_lock);
688 	ie = isrc->isrc_event;
689 	isrc->isrc_event = NULL;
690 	mtx_unlock(&isrc_table_lock);
691 
692 	if (ie != NULL)
693 		intr_event_destroy(ie);
694 }
695 #endif
696 /*
697  *  Add handler to interrupt source.
698  */
699 static int
700 isrc_add_handler(struct intr_irqsrc *isrc, const char *name,
701     driver_filter_t filter, driver_intr_t handler, void *arg,
702     enum intr_type flags, void **cookiep)
703 {
704 	int error;
705 
706 	if (isrc->isrc_event == NULL) {
707 		error = isrc_event_create(isrc);
708 		if (error)
709 			return (error);
710 	}
711 
712 	error = intr_event_add_handler(isrc->isrc_event, name, filter, handler,
713 	    arg, intr_priority(flags), flags, cookiep);
714 	if (error == 0) {
715 		mtx_lock(&isrc_table_lock);
716 		intrcnt_updatename(isrc);
717 		mtx_unlock(&isrc_table_lock);
718 	}
719 
720 	return (error);
721 }
722 
723 /*
724  *  Lookup interrupt controller locked.
725  */
726 static inline struct intr_pic *
727 pic_lookup_locked(device_t dev, intptr_t xref, int flags)
728 {
729 	struct intr_pic *pic;
730 
731 	mtx_assert(&pic_list_lock, MA_OWNED);
732 
733 	if (dev == NULL && xref == 0)
734 		return (NULL);
735 
736 	/* Note that pic->pic_dev is never NULL on registered PIC. */
737 	SLIST_FOREACH(pic, &pic_list, pic_next) {
738 		if ((pic->pic_flags & FLAG_TYPE_MASK) !=
739 		    (flags & FLAG_TYPE_MASK))
740 			continue;
741 
742 		if (dev == NULL) {
743 			if (xref == pic->pic_xref)
744 				return (pic);
745 		} else if (xref == 0 || pic->pic_xref == 0) {
746 			if (dev == pic->pic_dev)
747 				return (pic);
748 		} else if (xref == pic->pic_xref && dev == pic->pic_dev)
749 				return (pic);
750 	}
751 	return (NULL);
752 }
753 
754 /*
755  *  Lookup interrupt controller.
756  */
757 static struct intr_pic *
758 pic_lookup(device_t dev, intptr_t xref, int flags)
759 {
760 	struct intr_pic *pic;
761 
762 	mtx_lock(&pic_list_lock);
763 	pic = pic_lookup_locked(dev, xref, flags);
764 	mtx_unlock(&pic_list_lock);
765 	return (pic);
766 }
767 
768 /*
769  *  Create interrupt controller.
770  */
771 static struct intr_pic *
772 pic_create(device_t dev, intptr_t xref, int flags)
773 {
774 	struct intr_pic *pic;
775 
776 	mtx_lock(&pic_list_lock);
777 	pic = pic_lookup_locked(dev, xref, flags);
778 	if (pic != NULL) {
779 		mtx_unlock(&pic_list_lock);
780 		return (pic);
781 	}
782 	pic = malloc(sizeof(*pic), M_INTRNG, M_NOWAIT | M_ZERO);
783 	if (pic == NULL) {
784 		mtx_unlock(&pic_list_lock);
785 		return (NULL);
786 	}
787 	pic->pic_xref = xref;
788 	pic->pic_dev = dev;
789 	pic->pic_flags = flags;
790 	mtx_init(&pic->pic_child_lock, "pic child lock", NULL, MTX_SPIN);
791 	SLIST_INSERT_HEAD(&pic_list, pic, pic_next);
792 	mtx_unlock(&pic_list_lock);
793 
794 	return (pic);
795 }
796 #ifdef notyet
797 /*
798  *  Destroy interrupt controller.
799  */
800 static void
801 pic_destroy(device_t dev, intptr_t xref, int flags)
802 {
803 	struct intr_pic *pic;
804 
805 	mtx_lock(&pic_list_lock);
806 	pic = pic_lookup_locked(dev, xref, flags);
807 	if (pic == NULL) {
808 		mtx_unlock(&pic_list_lock);
809 		return;
810 	}
811 	SLIST_REMOVE(&pic_list, pic, intr_pic, pic_next);
812 	mtx_unlock(&pic_list_lock);
813 
814 	free(pic, M_INTRNG);
815 }
816 #endif
817 /*
818  *  Register interrupt controller.
819  */
820 struct intr_pic *
821 intr_pic_register(device_t dev, intptr_t xref)
822 {
823 	struct intr_pic *pic;
824 
825 	if (dev == NULL)
826 		return (NULL);
827 	pic = pic_create(dev, xref, FLAG_PIC);
828 	if (pic == NULL)
829 		return (NULL);
830 
831 	debugf("PIC %p registered for %s <dev %p, xref %jx>\n", pic,
832 	    device_get_nameunit(dev), dev, (uintmax_t)xref);
833 	return (pic);
834 }
835 
836 /*
837  *  Unregister interrupt controller.
838  */
839 int
840 intr_pic_deregister(device_t dev, intptr_t xref)
841 {
842 
843 	panic("%s: not implemented", __func__);
844 }
845 
846 /*
847  *  Mark interrupt controller (itself) as a root one.
848  *
849  *  Note that only an interrupt controller can really know its position
850  *  in interrupt controller's tree. So root PIC must claim itself as a root.
851  *
852  *  In FDT case, according to ePAPR approved version 1.1 from 08 April 2011,
853  *  page 30:
854  *    "The root of the interrupt tree is determined when traversal
855  *     of the interrupt tree reaches an interrupt controller node without
856  *     an interrupts property and thus no explicit interrupt parent."
857  */
858 int
859 intr_pic_claim_root(device_t dev, intptr_t xref, intr_irq_filter_t *filter,
860     void *arg, u_int ipicount)
861 {
862 	struct intr_pic *pic;
863 
864 	pic = pic_lookup(dev, xref, FLAG_PIC);
865 	if (pic == NULL) {
866 		device_printf(dev, "not registered\n");
867 		return (EINVAL);
868 	}
869 
870 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_PIC,
871 	    ("%s: Found a non-PIC controller: %s", __func__,
872 	     device_get_name(pic->pic_dev)));
873 
874 	if (filter == NULL) {
875 		device_printf(dev, "filter missing\n");
876 		return (EINVAL);
877 	}
878 
879 	/*
880 	 * Only one interrupt controllers could be on the root for now.
881 	 * Note that we further suppose that there is not threaded interrupt
882 	 * routine (handler) on the root. See intr_irq_handler().
883 	 */
884 	if (intr_irq_root_dev != NULL) {
885 		device_printf(dev, "another root already set\n");
886 		return (EBUSY);
887 	}
888 
889 	intr_irq_root_dev = dev;
890 	irq_root_filter = filter;
891 	irq_root_arg = arg;
892 	irq_root_ipicount = ipicount;
893 
894 	debugf("irq root set to %s\n", device_get_nameunit(dev));
895 	return (0);
896 }
897 
898 /*
899  * Add a handler to manage a sub range of a parents interrupts.
900  */
901 struct intr_pic *
902 intr_pic_add_handler(device_t parent, struct intr_pic *pic,
903     intr_child_irq_filter_t *filter, void *arg, uintptr_t start,
904     uintptr_t length)
905 {
906 	struct intr_pic *parent_pic;
907 	struct intr_pic_child *newchild;
908 #ifdef INVARIANTS
909 	struct intr_pic_child *child;
910 #endif
911 
912 	/* Find the parent PIC */
913 	parent_pic = pic_lookup(parent, 0, FLAG_PIC);
914 	if (parent_pic == NULL)
915 		return (NULL);
916 
917 	newchild = malloc(sizeof(*newchild), M_INTRNG, M_WAITOK | M_ZERO);
918 	newchild->pc_pic = pic;
919 	newchild->pc_filter = filter;
920 	newchild->pc_filter_arg = arg;
921 	newchild->pc_start = start;
922 	newchild->pc_length = length;
923 
924 	mtx_lock_spin(&parent_pic->pic_child_lock);
925 #ifdef INVARIANTS
926 	SLIST_FOREACH(child, &parent_pic->pic_children, pc_next) {
927 		KASSERT(child->pc_pic != pic, ("%s: Adding a child PIC twice",
928 		    __func__));
929 	}
930 #endif
931 	SLIST_INSERT_HEAD(&parent_pic->pic_children, newchild, pc_next);
932 	mtx_unlock_spin(&parent_pic->pic_child_lock);
933 
934 	return (pic);
935 }
936 
937 static int
938 intr_resolve_irq(device_t dev, intptr_t xref, struct intr_map_data *data,
939     struct intr_irqsrc **isrc)
940 {
941 	struct intr_pic *pic;
942 	struct intr_map_data_msi *msi;
943 
944 	if (data == NULL)
945 		return (EINVAL);
946 
947 	pic = pic_lookup(dev, xref,
948 	    (data->type == INTR_MAP_DATA_MSI) ? FLAG_MSI : FLAG_PIC);
949 	if (pic == NULL)
950 		return (ESRCH);
951 
952 	switch (data->type) {
953 	case INTR_MAP_DATA_MSI:
954 		KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
955 		    ("%s: Found a non-MSI controller: %s", __func__,
956 		     device_get_name(pic->pic_dev)));
957 		msi = (struct intr_map_data_msi *)data;
958 		*isrc = msi->isrc;
959 		return (0);
960 
961 	default:
962 		KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_PIC,
963 		    ("%s: Found a non-PIC controller: %s", __func__,
964 		     device_get_name(pic->pic_dev)));
965 		return (PIC_MAP_INTR(pic->pic_dev, data, isrc));
966 	}
967 }
968 
969 bool
970 intr_is_per_cpu(struct resource *res)
971 {
972 	u_int res_id;
973 	struct intr_irqsrc *isrc;
974 
975 	res_id = (u_int)rman_get_start(res);
976 	isrc = intr_map_get_isrc(res_id);
977 
978 	if (isrc == NULL)
979 		panic("Attempt to get isrc for non-active resource id: %u\n",
980 		    res_id);
981 	return ((isrc->isrc_flags & INTR_ISRCF_PPI) != 0);
982 }
983 
984 int
985 intr_activate_irq(device_t dev, struct resource *res)
986 {
987 	device_t map_dev;
988 	intptr_t map_xref;
989 	struct intr_map_data *data;
990 	struct intr_irqsrc *isrc;
991 	u_int res_id;
992 	int error;
993 
994 	KASSERT(rman_get_start(res) == rman_get_end(res),
995 	    ("%s: more interrupts in resource", __func__));
996 
997 	res_id = (u_int)rman_get_start(res);
998 	if (intr_map_get_isrc(res_id) != NULL)
999 		panic("Attempt to double activation of resource id: %u\n",
1000 		    res_id);
1001 	intr_map_copy_map_data(res_id, &map_dev, &map_xref, &data);
1002 	error = intr_resolve_irq(map_dev, map_xref, data, &isrc);
1003 	if (error != 0) {
1004 		free(data, M_INTRNG);
1005 		/* XXX TODO DISCONECTED PICs */
1006 		/* if (error == EINVAL) return(0); */
1007 		return (error);
1008 	}
1009 	intr_map_set_isrc(res_id, isrc);
1010 	rman_set_virtual(res, data);
1011 	return (PIC_ACTIVATE_INTR(isrc->isrc_dev, isrc, res, data));
1012 }
1013 
1014 int
1015 intr_deactivate_irq(device_t dev, struct resource *res)
1016 {
1017 	struct intr_map_data *data;
1018 	struct intr_irqsrc *isrc;
1019 	u_int res_id;
1020 	int error;
1021 
1022 	KASSERT(rman_get_start(res) == rman_get_end(res),
1023 	    ("%s: more interrupts in resource", __func__));
1024 
1025 	res_id = (u_int)rman_get_start(res);
1026 	isrc = intr_map_get_isrc(res_id);
1027 	if (isrc == NULL)
1028 		panic("Attempt to deactivate non-active resource id: %u\n",
1029 		    res_id);
1030 
1031 	data = rman_get_virtual(res);
1032 	error = PIC_DEACTIVATE_INTR(isrc->isrc_dev, isrc, res, data);
1033 	intr_map_set_isrc(res_id, NULL);
1034 	rman_set_virtual(res, NULL);
1035 	free(data, M_INTRNG);
1036 	return (error);
1037 }
1038 
1039 int
1040 intr_setup_irq(device_t dev, struct resource *res, driver_filter_t filt,
1041     driver_intr_t hand, void *arg, int flags, void **cookiep)
1042 {
1043 	int error;
1044 	struct intr_map_data *data;
1045 	struct intr_irqsrc *isrc;
1046 	const char *name;
1047 	u_int res_id;
1048 
1049 	KASSERT(rman_get_start(res) == rman_get_end(res),
1050 	    ("%s: more interrupts in resource", __func__));
1051 
1052 	res_id = (u_int)rman_get_start(res);
1053 	isrc = intr_map_get_isrc(res_id);
1054 	if (isrc == NULL) {
1055 		/* XXX TODO DISCONECTED PICs */
1056 		return (EINVAL);
1057 	}
1058 
1059 	data = rman_get_virtual(res);
1060 	name = device_get_nameunit(dev);
1061 
1062 #ifdef INTR_SOLO
1063 	/*
1064 	 * Standard handling is done through MI interrupt framework. However,
1065 	 * some interrupts could request solely own special handling. This
1066 	 * non standard handling can be used for interrupt controllers without
1067 	 * handler (filter only), so in case that interrupt controllers are
1068 	 * chained, MI interrupt framework is called only in leaf controller.
1069 	 *
1070 	 * Note that root interrupt controller routine is served as well,
1071 	 * however in intr_irq_handler(), i.e. main system dispatch routine.
1072 	 */
1073 	if (flags & INTR_SOLO && hand != NULL) {
1074 		debugf("irq %u cannot solo on %s\n", irq, name);
1075 		return (EINVAL);
1076 	}
1077 
1078 	if (flags & INTR_SOLO) {
1079 		error = iscr_setup_filter(isrc, name, (intr_irq_filter_t *)filt,
1080 		    arg, cookiep);
1081 		debugf("irq %u setup filter error %d on %s\n", isrc->isrc_irq, error,
1082 		    name);
1083 	} else
1084 #endif
1085 		{
1086 		error = isrc_add_handler(isrc, name, filt, hand, arg, flags,
1087 		    cookiep);
1088 		debugf("irq %u add handler error %d on %s\n", isrc->isrc_irq, error, name);
1089 	}
1090 	if (error != 0)
1091 		return (error);
1092 
1093 	mtx_lock(&isrc_table_lock);
1094 	error = PIC_SETUP_INTR(isrc->isrc_dev, isrc, res, data);
1095 	if (error == 0) {
1096 		isrc->isrc_handlers++;
1097 		if (isrc->isrc_handlers == 1)
1098 			PIC_ENABLE_INTR(isrc->isrc_dev, isrc);
1099 	}
1100 	mtx_unlock(&isrc_table_lock);
1101 	if (error != 0)
1102 		intr_event_remove_handler(*cookiep);
1103 	return (error);
1104 }
1105 
1106 int
1107 intr_teardown_irq(device_t dev, struct resource *res, void *cookie)
1108 {
1109 	int error;
1110 	struct intr_map_data *data;
1111 	struct intr_irqsrc *isrc;
1112 	u_int res_id;
1113 
1114 	KASSERT(rman_get_start(res) == rman_get_end(res),
1115 	    ("%s: more interrupts in resource", __func__));
1116 
1117 	res_id = (u_int)rman_get_start(res);
1118 	isrc = intr_map_get_isrc(res_id);
1119 	if (isrc == NULL || isrc->isrc_handlers == 0)
1120 		return (EINVAL);
1121 
1122 	data = rman_get_virtual(res);
1123 
1124 #ifdef INTR_SOLO
1125 	if (isrc->isrc_filter != NULL) {
1126 		if (isrc != cookie)
1127 			return (EINVAL);
1128 
1129 		mtx_lock(&isrc_table_lock);
1130 		isrc->isrc_filter = NULL;
1131 		isrc->isrc_arg = NULL;
1132 		isrc->isrc_handlers = 0;
1133 		PIC_DISABLE_INTR(isrc->isrc_dev, isrc);
1134 		PIC_TEARDOWN_INTR(isrc->isrc_dev, isrc, res, data);
1135 		isrc_update_name(isrc, NULL);
1136 		mtx_unlock(&isrc_table_lock);
1137 		return (0);
1138 	}
1139 #endif
1140 	if (isrc != intr_handler_source(cookie))
1141 		return (EINVAL);
1142 
1143 	error = intr_event_remove_handler(cookie);
1144 	if (error == 0) {
1145 		mtx_lock(&isrc_table_lock);
1146 		isrc->isrc_handlers--;
1147 		if (isrc->isrc_handlers == 0)
1148 			PIC_DISABLE_INTR(isrc->isrc_dev, isrc);
1149 		PIC_TEARDOWN_INTR(isrc->isrc_dev, isrc, res, data);
1150 		intrcnt_updatename(isrc);
1151 		mtx_unlock(&isrc_table_lock);
1152 	}
1153 	return (error);
1154 }
1155 
1156 int
1157 intr_describe_irq(device_t dev, struct resource *res, void *cookie,
1158     const char *descr)
1159 {
1160 	int error;
1161 	struct intr_irqsrc *isrc;
1162 	u_int res_id;
1163 
1164 	KASSERT(rman_get_start(res) == rman_get_end(res),
1165 	    ("%s: more interrupts in resource", __func__));
1166 
1167 	res_id = (u_int)rman_get_start(res);
1168 	isrc = intr_map_get_isrc(res_id);
1169 	if (isrc == NULL || isrc->isrc_handlers == 0)
1170 		return (EINVAL);
1171 #ifdef INTR_SOLO
1172 	if (isrc->isrc_filter != NULL) {
1173 		if (isrc != cookie)
1174 			return (EINVAL);
1175 
1176 		mtx_lock(&isrc_table_lock);
1177 		isrc_update_name(isrc, descr);
1178 		mtx_unlock(&isrc_table_lock);
1179 		return (0);
1180 	}
1181 #endif
1182 	error = intr_event_describe_handler(isrc->isrc_event, cookie, descr);
1183 	if (error == 0) {
1184 		mtx_lock(&isrc_table_lock);
1185 		intrcnt_updatename(isrc);
1186 		mtx_unlock(&isrc_table_lock);
1187 	}
1188 	return (error);
1189 }
1190 
1191 #ifdef SMP
1192 int
1193 intr_bind_irq(device_t dev, struct resource *res, int cpu)
1194 {
1195 	struct intr_irqsrc *isrc;
1196 	u_int res_id;
1197 
1198 	KASSERT(rman_get_start(res) == rman_get_end(res),
1199 	    ("%s: more interrupts in resource", __func__));
1200 
1201 	res_id = (u_int)rman_get_start(res);
1202 	isrc = intr_map_get_isrc(res_id);
1203 	if (isrc == NULL || isrc->isrc_handlers == 0)
1204 		return (EINVAL);
1205 #ifdef INTR_SOLO
1206 	if (isrc->isrc_filter != NULL)
1207 		return (intr_isrc_assign_cpu(isrc, cpu));
1208 #endif
1209 	return (intr_event_bind(isrc->isrc_event, cpu));
1210 }
1211 
1212 /*
1213  * Return the CPU that the next interrupt source should use.
1214  * For now just returns the next CPU according to round-robin.
1215  */
1216 u_int
1217 intr_irq_next_cpu(u_int last_cpu, cpuset_t *cpumask)
1218 {
1219 	u_int cpu;
1220 
1221 	KASSERT(!CPU_EMPTY(cpumask), ("%s: Empty CPU mask", __func__));
1222 	if (!irq_assign_cpu || mp_ncpus == 1) {
1223 		cpu = PCPU_GET(cpuid);
1224 
1225 		if (CPU_ISSET(cpu, cpumask))
1226 			return (curcpu);
1227 
1228 		return (CPU_FFS(cpumask) - 1);
1229 	}
1230 
1231 	do {
1232 		last_cpu++;
1233 		if (last_cpu > mp_maxid)
1234 			last_cpu = 0;
1235 	} while (!CPU_ISSET(last_cpu, cpumask));
1236 	return (last_cpu);
1237 }
1238 
1239 #ifndef EARLY_AP_STARTUP
1240 /*
1241  *  Distribute all the interrupt sources among the available
1242  *  CPUs once the AP's have been launched.
1243  */
1244 static void
1245 intr_irq_shuffle(void *arg __unused)
1246 {
1247 	struct intr_irqsrc *isrc;
1248 	u_int i;
1249 
1250 	if (mp_ncpus == 1)
1251 		return;
1252 
1253 	mtx_lock(&isrc_table_lock);
1254 	irq_assign_cpu = true;
1255 	for (i = 0; i < intr_nirq; i++) {
1256 		isrc = irq_sources[i];
1257 		if (isrc == NULL || isrc->isrc_handlers == 0 ||
1258 		    isrc->isrc_flags & (INTR_ISRCF_PPI | INTR_ISRCF_IPI))
1259 			continue;
1260 
1261 		if (isrc->isrc_event != NULL &&
1262 		    isrc->isrc_flags & INTR_ISRCF_BOUND &&
1263 		    isrc->isrc_event->ie_cpu != CPU_FFS(&isrc->isrc_cpu) - 1)
1264 			panic("%s: CPU inconsistency", __func__);
1265 
1266 		if ((isrc->isrc_flags & INTR_ISRCF_BOUND) == 0)
1267 			CPU_ZERO(&isrc->isrc_cpu); /* start again */
1268 
1269 		/*
1270 		 * We are in wicked position here if the following call fails
1271 		 * for bound ISRC. The best thing we can do is to clear
1272 		 * isrc_cpu so inconsistency with ie_cpu will be detectable.
1273 		 */
1274 		if (PIC_BIND_INTR(isrc->isrc_dev, isrc) != 0)
1275 			CPU_ZERO(&isrc->isrc_cpu);
1276 	}
1277 	mtx_unlock(&isrc_table_lock);
1278 }
1279 SYSINIT(intr_irq_shuffle, SI_SUB_SMP, SI_ORDER_SECOND, intr_irq_shuffle, NULL);
1280 #endif /* !EARLY_AP_STARTUP */
1281 
1282 #else
1283 u_int
1284 intr_irq_next_cpu(u_int current_cpu, cpuset_t *cpumask)
1285 {
1286 
1287 	return (PCPU_GET(cpuid));
1288 }
1289 #endif /* SMP */
1290 
1291 /*
1292  * Allocate memory for new intr_map_data structure.
1293  * Initialize common fields.
1294  */
1295 struct intr_map_data *
1296 intr_alloc_map_data(enum intr_map_data_type type, size_t len, int flags)
1297 {
1298 	struct intr_map_data *data;
1299 
1300 	data = malloc(len, M_INTRNG, flags);
1301 	data->type = type;
1302 	data->len = len;
1303 	return (data);
1304 }
1305 
1306 void intr_free_intr_map_data(struct intr_map_data *data)
1307 {
1308 
1309 	free(data, M_INTRNG);
1310 }
1311 
1312 /*
1313  *  Register a MSI/MSI-X interrupt controller
1314  */
1315 int
1316 intr_msi_register(device_t dev, intptr_t xref)
1317 {
1318 	struct intr_pic *pic;
1319 
1320 	if (dev == NULL)
1321 		return (EINVAL);
1322 	pic = pic_create(dev, xref, FLAG_MSI);
1323 	if (pic == NULL)
1324 		return (ENOMEM);
1325 
1326 	debugf("PIC %p registered for %s <dev %p, xref %jx>\n", pic,
1327 	    device_get_nameunit(dev), dev, (uintmax_t)xref);
1328 	return (0);
1329 }
1330 
1331 int
1332 intr_alloc_msi(device_t pci, device_t child, intptr_t xref, int count,
1333     int maxcount, int *irqs)
1334 {
1335 	struct iommu_domain *domain;
1336 	struct intr_irqsrc **isrc;
1337 	struct intr_pic *pic;
1338 	device_t pdev;
1339 	struct intr_map_data_msi *msi;
1340 	int err, i;
1341 
1342 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1343 	if (pic == NULL)
1344 		return (ESRCH);
1345 
1346 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1347 	    ("%s: Found a non-MSI controller: %s", __func__,
1348 	     device_get_name(pic->pic_dev)));
1349 
1350 	/*
1351 	 * If this is the first time we have used this context ask the
1352 	 * interrupt controller to map memory the msi source will need.
1353 	 */
1354 	err = MSI_IOMMU_INIT(pic->pic_dev, child, &domain);
1355 	if (err != 0)
1356 		return (err);
1357 
1358 	isrc = malloc(sizeof(*isrc) * count, M_INTRNG, M_WAITOK);
1359 	err = MSI_ALLOC_MSI(pic->pic_dev, child, count, maxcount, &pdev, isrc);
1360 	if (err != 0) {
1361 		free(isrc, M_INTRNG);
1362 		return (err);
1363 	}
1364 
1365 	for (i = 0; i < count; i++) {
1366 		isrc[i]->isrc_iommu = domain;
1367 		msi = (struct intr_map_data_msi *)intr_alloc_map_data(
1368 		    INTR_MAP_DATA_MSI, sizeof(*msi), M_WAITOK | M_ZERO);
1369 		msi-> isrc = isrc[i];
1370 
1371 		irqs[i] = intr_map_irq(pic->pic_dev, xref,
1372 		    (struct intr_map_data *)msi);
1373 	}
1374 	free(isrc, M_INTRNG);
1375 
1376 	return (err);
1377 }
1378 
1379 int
1380 intr_release_msi(device_t pci, device_t child, intptr_t xref, int count,
1381     int *irqs)
1382 {
1383 	struct intr_irqsrc **isrc;
1384 	struct intr_pic *pic;
1385 	struct intr_map_data_msi *msi;
1386 	int i, err;
1387 
1388 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1389 	if (pic == NULL)
1390 		return (ESRCH);
1391 
1392 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1393 	    ("%s: Found a non-MSI controller: %s", __func__,
1394 	     device_get_name(pic->pic_dev)));
1395 
1396 	isrc = malloc(sizeof(*isrc) * count, M_INTRNG, M_WAITOK);
1397 
1398 	for (i = 0; i < count; i++) {
1399 		msi = (struct intr_map_data_msi *)
1400 		    intr_map_get_map_data(irqs[i]);
1401 		KASSERT(msi->hdr.type == INTR_MAP_DATA_MSI,
1402 		    ("%s: irq %d map data is not MSI", __func__,
1403 		    irqs[i]));
1404 		isrc[i] = msi->isrc;
1405 	}
1406 
1407 	MSI_IOMMU_DEINIT(pic->pic_dev, child);
1408 
1409 	err = MSI_RELEASE_MSI(pic->pic_dev, child, count, isrc);
1410 
1411 	for (i = 0; i < count; i++) {
1412 		if (isrc[i] != NULL)
1413 			intr_unmap_irq(irqs[i]);
1414 	}
1415 
1416 	free(isrc, M_INTRNG);
1417 	return (err);
1418 }
1419 
1420 int
1421 intr_alloc_msix(device_t pci, device_t child, intptr_t xref, int *irq)
1422 {
1423 	struct iommu_domain *domain;
1424 	struct intr_irqsrc *isrc;
1425 	struct intr_pic *pic;
1426 	device_t pdev;
1427 	struct intr_map_data_msi *msi;
1428 	int err;
1429 
1430 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1431 	if (pic == NULL)
1432 		return (ESRCH);
1433 
1434 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1435 	    ("%s: Found a non-MSI controller: %s", __func__,
1436 	     device_get_name(pic->pic_dev)));
1437 
1438 	/*
1439 	 * If this is the first time we have used this context ask the
1440 	 * interrupt controller to map memory the msi source will need.
1441 	 */
1442 	err = MSI_IOMMU_INIT(pic->pic_dev, child, &domain);
1443 	if (err != 0)
1444 		return (err);
1445 
1446 	err = MSI_ALLOC_MSIX(pic->pic_dev, child, &pdev, &isrc);
1447 	if (err != 0)
1448 		return (err);
1449 
1450 	isrc->isrc_iommu = domain;
1451 	msi = (struct intr_map_data_msi *)intr_alloc_map_data(
1452 		    INTR_MAP_DATA_MSI, sizeof(*msi), M_WAITOK | M_ZERO);
1453 	msi->isrc = isrc;
1454 	*irq = intr_map_irq(pic->pic_dev, xref, (struct intr_map_data *)msi);
1455 	return (0);
1456 }
1457 
1458 int
1459 intr_release_msix(device_t pci, device_t child, intptr_t xref, int irq)
1460 {
1461 	struct intr_irqsrc *isrc;
1462 	struct intr_pic *pic;
1463 	struct intr_map_data_msi *msi;
1464 	int err;
1465 
1466 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1467 	if (pic == NULL)
1468 		return (ESRCH);
1469 
1470 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1471 	    ("%s: Found a non-MSI controller: %s", __func__,
1472 	     device_get_name(pic->pic_dev)));
1473 
1474 	msi = (struct intr_map_data_msi *)
1475 	    intr_map_get_map_data(irq);
1476 	KASSERT(msi->hdr.type == INTR_MAP_DATA_MSI,
1477 	    ("%s: irq %d map data is not MSI", __func__,
1478 	    irq));
1479 	isrc = msi->isrc;
1480 	if (isrc == NULL) {
1481 		intr_unmap_irq(irq);
1482 		return (EINVAL);
1483 	}
1484 
1485 	MSI_IOMMU_DEINIT(pic->pic_dev, child);
1486 
1487 	err = MSI_RELEASE_MSIX(pic->pic_dev, child, isrc);
1488 	intr_unmap_irq(irq);
1489 
1490 	return (err);
1491 }
1492 
1493 int
1494 intr_map_msi(device_t pci, device_t child, intptr_t xref, int irq,
1495     uint64_t *addr, uint32_t *data)
1496 {
1497 	struct intr_irqsrc *isrc;
1498 	struct intr_pic *pic;
1499 	int err;
1500 
1501 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1502 	if (pic == NULL)
1503 		return (ESRCH);
1504 
1505 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1506 	    ("%s: Found a non-MSI controller: %s", __func__,
1507 	     device_get_name(pic->pic_dev)));
1508 
1509 	isrc = intr_map_get_isrc(irq);
1510 	if (isrc == NULL)
1511 		return (EINVAL);
1512 
1513 	err = MSI_MAP_MSI(pic->pic_dev, child, isrc, addr, data);
1514 
1515 #ifdef IOMMU
1516 	if (isrc->isrc_iommu != NULL)
1517 		iommu_translate_msi(isrc->isrc_iommu, addr);
1518 #endif
1519 
1520 	return (err);
1521 }
1522 
1523 void dosoftints(void);
1524 void
1525 dosoftints(void)
1526 {
1527 }
1528 
1529 #ifdef SMP
1530 /*
1531  *  Init interrupt controller on another CPU.
1532  */
1533 void
1534 intr_pic_init_secondary(void)
1535 {
1536 
1537 	/*
1538 	 * QQQ: Only root PIC is aware of other CPUs ???
1539 	 */
1540 	KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__));
1541 
1542 	//mtx_lock(&isrc_table_lock);
1543 	PIC_INIT_SECONDARY(intr_irq_root_dev);
1544 	//mtx_unlock(&isrc_table_lock);
1545 }
1546 #endif
1547 
1548 #ifdef DDB
1549 DB_SHOW_COMMAND(irqs, db_show_irqs)
1550 {
1551 	u_int i, irqsum;
1552 	u_long num;
1553 	struct intr_irqsrc *isrc;
1554 
1555 	for (irqsum = 0, i = 0; i < intr_nirq; i++) {
1556 		isrc = irq_sources[i];
1557 		if (isrc == NULL)
1558 			continue;
1559 
1560 		num = isrc->isrc_count != NULL ? isrc->isrc_count[0] : 0;
1561 		db_printf("irq%-3u <%s>: cpu %02lx%s cnt %lu\n", i,
1562 		    isrc->isrc_name, isrc->isrc_cpu.__bits[0],
1563 		    isrc->isrc_flags & INTR_ISRCF_BOUND ? " (bound)" : "", num);
1564 		irqsum += num;
1565 	}
1566 	db_printf("irq total %u\n", irqsum);
1567 }
1568 #endif
1569 
1570 /*
1571  * Interrupt mapping table functions.
1572  *
1573  * Please, keep this part separately, it can be transformed to
1574  * extension of standard resources.
1575  */
1576 struct intr_map_entry
1577 {
1578 	device_t 		dev;
1579 	intptr_t 		xref;
1580 	struct intr_map_data 	*map_data;
1581 	struct intr_irqsrc 	*isrc;
1582 	/* XXX TODO DISCONECTED PICs */
1583 	/*int			flags */
1584 };
1585 
1586 /* XXX Convert irq_map[] to dynamicaly expandable one. */
1587 static struct intr_map_entry **irq_map;
1588 static u_int irq_map_count;
1589 static u_int irq_map_first_free_idx;
1590 static struct mtx irq_map_lock;
1591 
1592 static struct intr_irqsrc *
1593 intr_map_get_isrc(u_int res_id)
1594 {
1595 	struct intr_irqsrc *isrc;
1596 
1597 	isrc = NULL;
1598 	mtx_lock(&irq_map_lock);
1599 	if (res_id < irq_map_count && irq_map[res_id] != NULL)
1600 		isrc = irq_map[res_id]->isrc;
1601 	mtx_unlock(&irq_map_lock);
1602 
1603 	return (isrc);
1604 }
1605 
1606 static void
1607 intr_map_set_isrc(u_int res_id, struct intr_irqsrc *isrc)
1608 {
1609 
1610 	mtx_lock(&irq_map_lock);
1611 	if (res_id < irq_map_count && irq_map[res_id] != NULL)
1612 		irq_map[res_id]->isrc = isrc;
1613 	mtx_unlock(&irq_map_lock);
1614 }
1615 
1616 /*
1617  * Get a copy of intr_map_entry data
1618  */
1619 static struct intr_map_data *
1620 intr_map_get_map_data(u_int res_id)
1621 {
1622 	struct intr_map_data *data;
1623 
1624 	data = NULL;
1625 	mtx_lock(&irq_map_lock);
1626 	if (res_id >= irq_map_count || irq_map[res_id] == NULL)
1627 		panic("Attempt to copy invalid resource id: %u\n", res_id);
1628 	data = irq_map[res_id]->map_data;
1629 	mtx_unlock(&irq_map_lock);
1630 
1631 	return (data);
1632 }
1633 
1634 /*
1635  * Get a copy of intr_map_entry data
1636  */
1637 static void
1638 intr_map_copy_map_data(u_int res_id, device_t *map_dev, intptr_t *map_xref,
1639     struct intr_map_data **data)
1640 {
1641 	size_t len;
1642 
1643 	len = 0;
1644 	mtx_lock(&irq_map_lock);
1645 	if (res_id >= irq_map_count || irq_map[res_id] == NULL)
1646 		panic("Attempt to copy invalid resource id: %u\n", res_id);
1647 	if (irq_map[res_id]->map_data != NULL)
1648 		len = irq_map[res_id]->map_data->len;
1649 	mtx_unlock(&irq_map_lock);
1650 
1651 	if (len == 0)
1652 		*data = NULL;
1653 	else
1654 		*data = malloc(len, M_INTRNG, M_WAITOK | M_ZERO);
1655 	mtx_lock(&irq_map_lock);
1656 	if (irq_map[res_id] == NULL)
1657 		panic("Attempt to copy invalid resource id: %u\n", res_id);
1658 	if (len != 0) {
1659 		if (len != irq_map[res_id]->map_data->len)
1660 			panic("Resource id: %u has changed.\n", res_id);
1661 		memcpy(*data, irq_map[res_id]->map_data, len);
1662 	}
1663 	*map_dev = irq_map[res_id]->dev;
1664 	*map_xref = irq_map[res_id]->xref;
1665 	mtx_unlock(&irq_map_lock);
1666 }
1667 
1668 /*
1669  * Allocate and fill new entry in irq_map table.
1670  */
1671 u_int
1672 intr_map_irq(device_t dev, intptr_t xref, struct intr_map_data *data)
1673 {
1674 	u_int i;
1675 	struct intr_map_entry *entry;
1676 
1677 	/* Prepare new entry first. */
1678 	entry = malloc(sizeof(*entry), M_INTRNG, M_WAITOK | M_ZERO);
1679 
1680 	entry->dev = dev;
1681 	entry->xref = xref;
1682 	entry->map_data = data;
1683 	entry->isrc = NULL;
1684 
1685 	mtx_lock(&irq_map_lock);
1686 	for (i = irq_map_first_free_idx; i < irq_map_count; i++) {
1687 		if (irq_map[i] == NULL) {
1688 			irq_map[i] = entry;
1689 			irq_map_first_free_idx = i + 1;
1690 			mtx_unlock(&irq_map_lock);
1691 			return (i);
1692 		}
1693 	}
1694 	mtx_unlock(&irq_map_lock);
1695 
1696 	/* XXX Expand irq_map table */
1697 	panic("IRQ mapping table is full.");
1698 }
1699 
1700 /*
1701  * Remove and free mapping entry.
1702  */
1703 void
1704 intr_unmap_irq(u_int res_id)
1705 {
1706 	struct intr_map_entry *entry;
1707 
1708 	mtx_lock(&irq_map_lock);
1709 	if ((res_id >= irq_map_count) || (irq_map[res_id] == NULL))
1710 		panic("Attempt to unmap invalid resource id: %u\n", res_id);
1711 	entry = irq_map[res_id];
1712 	irq_map[res_id] = NULL;
1713 	irq_map_first_free_idx = res_id;
1714 	mtx_unlock(&irq_map_lock);
1715 	intr_free_intr_map_data(entry->map_data);
1716 	free(entry, M_INTRNG);
1717 }
1718 
1719 /*
1720  * Clone mapping entry.
1721  */
1722 u_int
1723 intr_map_clone_irq(u_int old_res_id)
1724 {
1725 	device_t map_dev;
1726 	intptr_t map_xref;
1727 	struct intr_map_data *data;
1728 
1729 	intr_map_copy_map_data(old_res_id, &map_dev, &map_xref, &data);
1730 	return (intr_map_irq(map_dev, map_xref, data));
1731 }
1732 
1733 static void
1734 intr_map_init(void *dummy __unused)
1735 {
1736 
1737 	mtx_init(&irq_map_lock, "intr map table", NULL, MTX_DEF);
1738 
1739 	irq_map_count = 2 * intr_nirq;
1740 	irq_map = mallocarray(irq_map_count, sizeof(struct intr_map_entry*),
1741 	    M_INTRNG, M_WAITOK | M_ZERO);
1742 }
1743 SYSINIT(intr_map_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_map_init, NULL);
1744