xref: /freebsd/sys/kern/subr_intr.c (revision 3110d4ebd6c0848cf5e25890d01791bb407e2a9b)
1 /*-
2  * Copyright (c) 2015-2016 Svatopluk Kraus
3  * Copyright (c) 2015-2016 Michal Meloun
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 /*
32  *	New-style Interrupt Framework
33  *
34  *  TODO: - add support for disconnected PICs.
35  *        - to support IPI (PPI) enabling on other CPUs if already started.
36  *        - to complete things for removable PICs.
37  */
38 
39 #include "opt_ddb.h"
40 #include "opt_hwpmc_hooks.h"
41 #include "opt_iommu.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/kernel.h>
46 #include <sys/lock.h>
47 #include <sys/mutex.h>
48 #include <sys/syslog.h>
49 #include <sys/malloc.h>
50 #include <sys/proc.h>
51 #include <sys/queue.h>
52 #include <sys/bus.h>
53 #include <sys/interrupt.h>
54 #include <sys/taskqueue.h>
55 #include <sys/tree.h>
56 #include <sys/conf.h>
57 #include <sys/cpuset.h>
58 #include <sys/rman.h>
59 #include <sys/sched.h>
60 #include <sys/smp.h>
61 #include <sys/sysctl.h>
62 #include <sys/vmmeter.h>
63 #ifdef HWPMC_HOOKS
64 #include <sys/pmckern.h>
65 #endif
66 
67 #include <machine/atomic.h>
68 #include <machine/intr.h>
69 #include <machine/cpu.h>
70 #include <machine/smp.h>
71 #include <machine/stdarg.h>
72 
73 #ifdef DDB
74 #include <ddb/ddb.h>
75 #endif
76 
77 #ifdef IOMMU
78 #include <dev/iommu/iommu_msi.h>
79 #endif
80 
81 #include "pic_if.h"
82 #include "msi_if.h"
83 
84 #define	INTRNAME_LEN	(2*MAXCOMLEN + 1)
85 
86 #ifdef DEBUG
87 #define debugf(fmt, args...) do { printf("%s(): ", __func__);	\
88     printf(fmt,##args); } while (0)
89 #else
90 #define debugf(fmt, args...)
91 #endif
92 
93 MALLOC_DECLARE(M_INTRNG);
94 MALLOC_DEFINE(M_INTRNG, "intr", "intr interrupt handling");
95 
96 /* Main interrupt handler called from assembler -> 'hidden' for C code. */
97 void intr_irq_handler(struct trapframe *tf);
98 
99 /* Root interrupt controller stuff. */
100 device_t intr_irq_root_dev;
101 static intr_irq_filter_t *irq_root_filter;
102 static void *irq_root_arg;
103 static u_int irq_root_ipicount;
104 
105 struct intr_pic_child {
106 	SLIST_ENTRY(intr_pic_child)	 pc_next;
107 	struct intr_pic			*pc_pic;
108 	intr_child_irq_filter_t		*pc_filter;
109 	void				*pc_filter_arg;
110 	uintptr_t			 pc_start;
111 	uintptr_t			 pc_length;
112 };
113 
114 /* Interrupt controller definition. */
115 struct intr_pic {
116 	SLIST_ENTRY(intr_pic)	pic_next;
117 	intptr_t		pic_xref;	/* hardware identification */
118 	device_t		pic_dev;
119 /* Only one of FLAG_PIC or FLAG_MSI may be set */
120 #define	FLAG_PIC	(1 << 0)
121 #define	FLAG_MSI	(1 << 1)
122 #define	FLAG_TYPE_MASK	(FLAG_PIC | FLAG_MSI)
123 	u_int			pic_flags;
124 	struct mtx		pic_child_lock;
125 	SLIST_HEAD(, intr_pic_child) pic_children;
126 };
127 
128 static struct mtx pic_list_lock;
129 static SLIST_HEAD(, intr_pic) pic_list;
130 
131 static struct intr_pic *pic_lookup(device_t dev, intptr_t xref, int flags);
132 
133 /* Interrupt source definition. */
134 static struct mtx isrc_table_lock;
135 static struct intr_irqsrc **irq_sources;
136 u_int irq_next_free;
137 
138 #ifdef SMP
139 #ifdef EARLY_AP_STARTUP
140 static bool irq_assign_cpu = true;
141 #else
142 static bool irq_assign_cpu = false;
143 #endif
144 #endif
145 
146 int intr_nirq = NIRQ;
147 SYSCTL_UINT(_machdep, OID_AUTO, nirq, CTLFLAG_RDTUN, &intr_nirq, 0,
148     "Number of IRQs");
149 
150 /* Data for MI statistics reporting. */
151 u_long *intrcnt;
152 char *intrnames;
153 size_t sintrcnt;
154 size_t sintrnames;
155 static u_int intrcnt_index;
156 
157 static struct intr_irqsrc *intr_map_get_isrc(u_int res_id);
158 static void intr_map_set_isrc(u_int res_id, struct intr_irqsrc *isrc);
159 static struct intr_map_data * intr_map_get_map_data(u_int res_id);
160 static void intr_map_copy_map_data(u_int res_id, device_t *dev, intptr_t *xref,
161     struct intr_map_data **data);
162 
163 /*
164  *  Interrupt framework initialization routine.
165  */
166 static void
167 intr_irq_init(void *dummy __unused)
168 {
169 	int intrcnt_count;
170 
171 	SLIST_INIT(&pic_list);
172 	mtx_init(&pic_list_lock, "intr pic list", NULL, MTX_DEF);
173 
174 	mtx_init(&isrc_table_lock, "intr isrc table", NULL, MTX_DEF);
175 
176 	/*
177 	 * - 2 counters for each I/O interrupt.
178 	 * - MAXCPU counters for each IPI counters for SMP.
179 	 */
180 	intrcnt_count = intr_nirq * 2;
181 #ifdef SMP
182 	intrcnt_count += INTR_IPI_COUNT * MAXCPU;
183 #endif
184 
185 	intrcnt = mallocarray(intrcnt_count, sizeof(u_long), M_INTRNG,
186 	    M_WAITOK | M_ZERO);
187 	intrnames = mallocarray(intrcnt_count, INTRNAME_LEN, M_INTRNG,
188 	    M_WAITOK | M_ZERO);
189 	sintrcnt = intrcnt_count * sizeof(u_long);
190 	sintrnames = intrcnt_count * INTRNAME_LEN;
191 	irq_sources = mallocarray(intr_nirq, sizeof(struct intr_irqsrc*),
192 	    M_INTRNG, M_WAITOK | M_ZERO);
193 }
194 SYSINIT(intr_irq_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_irq_init, NULL);
195 
196 static void
197 intrcnt_setname(const char *name, int index)
198 {
199 
200 	snprintf(intrnames + INTRNAME_LEN * index, INTRNAME_LEN, "%-*s",
201 	    INTRNAME_LEN - 1, name);
202 }
203 
204 /*
205  *  Update name for interrupt source with interrupt event.
206  */
207 static void
208 intrcnt_updatename(struct intr_irqsrc *isrc)
209 {
210 
211 	/* QQQ: What about stray counter name? */
212 	mtx_assert(&isrc_table_lock, MA_OWNED);
213 	intrcnt_setname(isrc->isrc_event->ie_fullname, isrc->isrc_index);
214 }
215 
216 /*
217  *  Virtualization for interrupt source interrupt counter increment.
218  */
219 static inline void
220 isrc_increment_count(struct intr_irqsrc *isrc)
221 {
222 
223 	if (isrc->isrc_flags & INTR_ISRCF_PPI)
224 		atomic_add_long(&isrc->isrc_count[0], 1);
225 	else
226 		isrc->isrc_count[0]++;
227 }
228 
229 /*
230  *  Virtualization for interrupt source interrupt stray counter increment.
231  */
232 static inline void
233 isrc_increment_straycount(struct intr_irqsrc *isrc)
234 {
235 
236 	isrc->isrc_count[1]++;
237 }
238 
239 /*
240  *  Virtualization for interrupt source interrupt name update.
241  */
242 static void
243 isrc_update_name(struct intr_irqsrc *isrc, const char *name)
244 {
245 	char str[INTRNAME_LEN];
246 
247 	mtx_assert(&isrc_table_lock, MA_OWNED);
248 
249 	if (name != NULL) {
250 		snprintf(str, INTRNAME_LEN, "%s: %s", isrc->isrc_name, name);
251 		intrcnt_setname(str, isrc->isrc_index);
252 		snprintf(str, INTRNAME_LEN, "stray %s: %s", isrc->isrc_name,
253 		    name);
254 		intrcnt_setname(str, isrc->isrc_index + 1);
255 	} else {
256 		snprintf(str, INTRNAME_LEN, "%s:", isrc->isrc_name);
257 		intrcnt_setname(str, isrc->isrc_index);
258 		snprintf(str, INTRNAME_LEN, "stray %s:", isrc->isrc_name);
259 		intrcnt_setname(str, isrc->isrc_index + 1);
260 	}
261 }
262 
263 /*
264  *  Virtualization for interrupt source interrupt counters setup.
265  */
266 static void
267 isrc_setup_counters(struct intr_irqsrc *isrc)
268 {
269 	u_int index;
270 
271 	/*
272 	 *  XXX - it does not work well with removable controllers and
273 	 *        interrupt sources !!!
274 	 */
275 	index = atomic_fetchadd_int(&intrcnt_index, 2);
276 	isrc->isrc_index = index;
277 	isrc->isrc_count = &intrcnt[index];
278 	isrc_update_name(isrc, NULL);
279 }
280 
281 /*
282  *  Virtualization for interrupt source interrupt counters release.
283  */
284 static void
285 isrc_release_counters(struct intr_irqsrc *isrc)
286 {
287 
288 	panic("%s: not implemented", __func__);
289 }
290 
291 #ifdef SMP
292 /*
293  *  Virtualization for interrupt source IPI counters setup.
294  */
295 u_long *
296 intr_ipi_setup_counters(const char *name)
297 {
298 	u_int index, i;
299 	char str[INTRNAME_LEN];
300 
301 	index = atomic_fetchadd_int(&intrcnt_index, MAXCPU);
302 	for (i = 0; i < MAXCPU; i++) {
303 		snprintf(str, INTRNAME_LEN, "cpu%d:%s", i, name);
304 		intrcnt_setname(str, index + i);
305 	}
306 	return (&intrcnt[index]);
307 }
308 #endif
309 
310 /*
311  *  Main interrupt dispatch handler. It's called straight
312  *  from the assembler, where CPU interrupt is served.
313  */
314 void
315 intr_irq_handler(struct trapframe *tf)
316 {
317 	struct trapframe * oldframe;
318 	struct thread * td;
319 
320 	KASSERT(irq_root_filter != NULL, ("%s: no filter", __func__));
321 
322 	VM_CNT_INC(v_intr);
323 	critical_enter();
324 	td = curthread;
325 	oldframe = td->td_intr_frame;
326 	td->td_intr_frame = tf;
327 	irq_root_filter(irq_root_arg);
328 	td->td_intr_frame = oldframe;
329 	critical_exit();
330 #ifdef HWPMC_HOOKS
331 	if (pmc_hook && TRAPF_USERMODE(tf) &&
332 	    (PCPU_GET(curthread)->td_pflags & TDP_CALLCHAIN))
333 		pmc_hook(PCPU_GET(curthread), PMC_FN_USER_CALLCHAIN, tf);
334 #endif
335 }
336 
337 int
338 intr_child_irq_handler(struct intr_pic *parent, uintptr_t irq)
339 {
340 	struct intr_pic_child *child;
341 	bool found;
342 
343 	found = false;
344 	mtx_lock_spin(&parent->pic_child_lock);
345 	SLIST_FOREACH(child, &parent->pic_children, pc_next) {
346 		if (child->pc_start <= irq &&
347 		    irq < (child->pc_start + child->pc_length)) {
348 			found = true;
349 			break;
350 		}
351 	}
352 	mtx_unlock_spin(&parent->pic_child_lock);
353 
354 	if (found)
355 		return (child->pc_filter(child->pc_filter_arg, irq));
356 
357 	return (FILTER_STRAY);
358 }
359 
360 /*
361  *  interrupt controller dispatch function for interrupts. It should
362  *  be called straight from the interrupt controller, when associated interrupt
363  *  source is learned.
364  */
365 int
366 intr_isrc_dispatch(struct intr_irqsrc *isrc, struct trapframe *tf)
367 {
368 
369 	KASSERT(isrc != NULL, ("%s: no source", __func__));
370 
371 	isrc_increment_count(isrc);
372 
373 #ifdef INTR_SOLO
374 	if (isrc->isrc_filter != NULL) {
375 		int error;
376 		error = isrc->isrc_filter(isrc->isrc_arg, tf);
377 		PIC_POST_FILTER(isrc->isrc_dev, isrc);
378 		if (error == FILTER_HANDLED)
379 			return (0);
380 	} else
381 #endif
382 	if (isrc->isrc_event != NULL) {
383 		if (intr_event_handle(isrc->isrc_event, tf) == 0)
384 			return (0);
385 	}
386 
387 	isrc_increment_straycount(isrc);
388 	return (EINVAL);
389 }
390 
391 /*
392  *  Alloc unique interrupt number (resource handle) for interrupt source.
393  *
394  *  There could be various strategies how to allocate free interrupt number
395  *  (resource handle) for new interrupt source.
396  *
397  *  1. Handles are always allocated forward, so handles are not recycled
398  *     immediately. However, if only one free handle left which is reused
399  *     constantly...
400  */
401 static inline int
402 isrc_alloc_irq(struct intr_irqsrc *isrc)
403 {
404 	u_int maxirqs, irq;
405 
406 	mtx_assert(&isrc_table_lock, MA_OWNED);
407 
408 	maxirqs = intr_nirq;
409 	if (irq_next_free >= maxirqs)
410 		return (ENOSPC);
411 
412 	for (irq = irq_next_free; irq < maxirqs; irq++) {
413 		if (irq_sources[irq] == NULL)
414 			goto found;
415 	}
416 	for (irq = 0; irq < irq_next_free; irq++) {
417 		if (irq_sources[irq] == NULL)
418 			goto found;
419 	}
420 
421 	irq_next_free = maxirqs;
422 	return (ENOSPC);
423 
424 found:
425 	isrc->isrc_irq = irq;
426 	irq_sources[irq] = isrc;
427 
428 	irq_next_free = irq + 1;
429 	if (irq_next_free >= maxirqs)
430 		irq_next_free = 0;
431 	return (0);
432 }
433 
434 /*
435  *  Free unique interrupt number (resource handle) from interrupt source.
436  */
437 static inline int
438 isrc_free_irq(struct intr_irqsrc *isrc)
439 {
440 
441 	mtx_assert(&isrc_table_lock, MA_OWNED);
442 
443 	if (isrc->isrc_irq >= intr_nirq)
444 		return (EINVAL);
445 	if (irq_sources[isrc->isrc_irq] != isrc)
446 		return (EINVAL);
447 
448 	irq_sources[isrc->isrc_irq] = NULL;
449 	isrc->isrc_irq = INTR_IRQ_INVALID;	/* just to be safe */
450 	return (0);
451 }
452 
453 /*
454  *  Initialize interrupt source and register it into global interrupt table.
455  */
456 int
457 intr_isrc_register(struct intr_irqsrc *isrc, device_t dev, u_int flags,
458     const char *fmt, ...)
459 {
460 	int error;
461 	va_list ap;
462 
463 	bzero(isrc, sizeof(struct intr_irqsrc));
464 	isrc->isrc_dev = dev;
465 	isrc->isrc_irq = INTR_IRQ_INVALID;	/* just to be safe */
466 	isrc->isrc_flags = flags;
467 
468 	va_start(ap, fmt);
469 	vsnprintf(isrc->isrc_name, INTR_ISRC_NAMELEN, fmt, ap);
470 	va_end(ap);
471 
472 	mtx_lock(&isrc_table_lock);
473 	error = isrc_alloc_irq(isrc);
474 	if (error != 0) {
475 		mtx_unlock(&isrc_table_lock);
476 		return (error);
477 	}
478 	/*
479 	 * Setup interrupt counters, but not for IPI sources. Those are setup
480 	 * later and only for used ones (up to INTR_IPI_COUNT) to not exhaust
481 	 * our counter pool.
482 	 */
483 	if ((isrc->isrc_flags & INTR_ISRCF_IPI) == 0)
484 		isrc_setup_counters(isrc);
485 	mtx_unlock(&isrc_table_lock);
486 	return (0);
487 }
488 
489 /*
490  *  Deregister interrupt source from global interrupt table.
491  */
492 int
493 intr_isrc_deregister(struct intr_irqsrc *isrc)
494 {
495 	int error;
496 
497 	mtx_lock(&isrc_table_lock);
498 	if ((isrc->isrc_flags & INTR_ISRCF_IPI) == 0)
499 		isrc_release_counters(isrc);
500 	error = isrc_free_irq(isrc);
501 	mtx_unlock(&isrc_table_lock);
502 	return (error);
503 }
504 
505 #ifdef SMP
506 /*
507  *  A support function for a PIC to decide if provided ISRC should be inited
508  *  on given cpu. The logic of INTR_ISRCF_BOUND flag and isrc_cpu member of
509  *  struct intr_irqsrc is the following:
510  *
511  *     If INTR_ISRCF_BOUND is set, the ISRC should be inited only on cpus
512  *     set in isrc_cpu. If not, the ISRC should be inited on every cpu and
513  *     isrc_cpu is kept consistent with it. Thus isrc_cpu is always correct.
514  */
515 bool
516 intr_isrc_init_on_cpu(struct intr_irqsrc *isrc, u_int cpu)
517 {
518 
519 	if (isrc->isrc_handlers == 0)
520 		return (false);
521 	if ((isrc->isrc_flags & (INTR_ISRCF_PPI | INTR_ISRCF_IPI)) == 0)
522 		return (false);
523 	if (isrc->isrc_flags & INTR_ISRCF_BOUND)
524 		return (CPU_ISSET(cpu, &isrc->isrc_cpu));
525 
526 	CPU_SET(cpu, &isrc->isrc_cpu);
527 	return (true);
528 }
529 #endif
530 
531 #ifdef INTR_SOLO
532 /*
533  *  Setup filter into interrupt source.
534  */
535 static int
536 iscr_setup_filter(struct intr_irqsrc *isrc, const char *name,
537     intr_irq_filter_t *filter, void *arg, void **cookiep)
538 {
539 
540 	if (filter == NULL)
541 		return (EINVAL);
542 
543 	mtx_lock(&isrc_table_lock);
544 	/*
545 	 * Make sure that we do not mix the two ways
546 	 * how we handle interrupt sources.
547 	 */
548 	if (isrc->isrc_filter != NULL || isrc->isrc_event != NULL) {
549 		mtx_unlock(&isrc_table_lock);
550 		return (EBUSY);
551 	}
552 	isrc->isrc_filter = filter;
553 	isrc->isrc_arg = arg;
554 	isrc_update_name(isrc, name);
555 	mtx_unlock(&isrc_table_lock);
556 
557 	*cookiep = isrc;
558 	return (0);
559 }
560 #endif
561 
562 /*
563  *  Interrupt source pre_ithread method for MI interrupt framework.
564  */
565 static void
566 intr_isrc_pre_ithread(void *arg)
567 {
568 	struct intr_irqsrc *isrc = arg;
569 
570 	PIC_PRE_ITHREAD(isrc->isrc_dev, isrc);
571 }
572 
573 /*
574  *  Interrupt source post_ithread method for MI interrupt framework.
575  */
576 static void
577 intr_isrc_post_ithread(void *arg)
578 {
579 	struct intr_irqsrc *isrc = arg;
580 
581 	PIC_POST_ITHREAD(isrc->isrc_dev, isrc);
582 }
583 
584 /*
585  *  Interrupt source post_filter method for MI interrupt framework.
586  */
587 static void
588 intr_isrc_post_filter(void *arg)
589 {
590 	struct intr_irqsrc *isrc = arg;
591 
592 	PIC_POST_FILTER(isrc->isrc_dev, isrc);
593 }
594 
595 /*
596  *  Interrupt source assign_cpu method for MI interrupt framework.
597  */
598 static int
599 intr_isrc_assign_cpu(void *arg, int cpu)
600 {
601 #ifdef SMP
602 	struct intr_irqsrc *isrc = arg;
603 	int error;
604 
605 	mtx_lock(&isrc_table_lock);
606 	if (cpu == NOCPU) {
607 		CPU_ZERO(&isrc->isrc_cpu);
608 		isrc->isrc_flags &= ~INTR_ISRCF_BOUND;
609 	} else {
610 		CPU_SETOF(cpu, &isrc->isrc_cpu);
611 		isrc->isrc_flags |= INTR_ISRCF_BOUND;
612 	}
613 
614 	/*
615 	 * In NOCPU case, it's up to PIC to either leave ISRC on same CPU or
616 	 * re-balance it to another CPU or enable it on more CPUs. However,
617 	 * PIC is expected to change isrc_cpu appropriately to keep us well
618 	 * informed if the call is successful.
619 	 */
620 	if (irq_assign_cpu) {
621 		error = PIC_BIND_INTR(isrc->isrc_dev, isrc);
622 		if (error) {
623 			CPU_ZERO(&isrc->isrc_cpu);
624 			mtx_unlock(&isrc_table_lock);
625 			return (error);
626 		}
627 	}
628 	mtx_unlock(&isrc_table_lock);
629 	return (0);
630 #else
631 	return (EOPNOTSUPP);
632 #endif
633 }
634 
635 /*
636  *  Create interrupt event for interrupt source.
637  */
638 static int
639 isrc_event_create(struct intr_irqsrc *isrc)
640 {
641 	struct intr_event *ie;
642 	int error;
643 
644 	error = intr_event_create(&ie, isrc, 0, isrc->isrc_irq,
645 	    intr_isrc_pre_ithread, intr_isrc_post_ithread, intr_isrc_post_filter,
646 	    intr_isrc_assign_cpu, "%s:", isrc->isrc_name);
647 	if (error)
648 		return (error);
649 
650 	mtx_lock(&isrc_table_lock);
651 	/*
652 	 * Make sure that we do not mix the two ways
653 	 * how we handle interrupt sources. Let contested event wins.
654 	 */
655 #ifdef INTR_SOLO
656 	if (isrc->isrc_filter != NULL || isrc->isrc_event != NULL) {
657 #else
658 	if (isrc->isrc_event != NULL) {
659 #endif
660 		mtx_unlock(&isrc_table_lock);
661 		intr_event_destroy(ie);
662 		return (isrc->isrc_event != NULL ? EBUSY : 0);
663 	}
664 	isrc->isrc_event = ie;
665 	mtx_unlock(&isrc_table_lock);
666 
667 	return (0);
668 }
669 #ifdef notyet
670 /*
671  *  Destroy interrupt event for interrupt source.
672  */
673 static void
674 isrc_event_destroy(struct intr_irqsrc *isrc)
675 {
676 	struct intr_event *ie;
677 
678 	mtx_lock(&isrc_table_lock);
679 	ie = isrc->isrc_event;
680 	isrc->isrc_event = NULL;
681 	mtx_unlock(&isrc_table_lock);
682 
683 	if (ie != NULL)
684 		intr_event_destroy(ie);
685 }
686 #endif
687 /*
688  *  Add handler to interrupt source.
689  */
690 static int
691 isrc_add_handler(struct intr_irqsrc *isrc, const char *name,
692     driver_filter_t filter, driver_intr_t handler, void *arg,
693     enum intr_type flags, void **cookiep)
694 {
695 	int error;
696 
697 	if (isrc->isrc_event == NULL) {
698 		error = isrc_event_create(isrc);
699 		if (error)
700 			return (error);
701 	}
702 
703 	error = intr_event_add_handler(isrc->isrc_event, name, filter, handler,
704 	    arg, intr_priority(flags), flags, cookiep);
705 	if (error == 0) {
706 		mtx_lock(&isrc_table_lock);
707 		intrcnt_updatename(isrc);
708 		mtx_unlock(&isrc_table_lock);
709 	}
710 
711 	return (error);
712 }
713 
714 /*
715  *  Lookup interrupt controller locked.
716  */
717 static inline struct intr_pic *
718 pic_lookup_locked(device_t dev, intptr_t xref, int flags)
719 {
720 	struct intr_pic *pic;
721 
722 	mtx_assert(&pic_list_lock, MA_OWNED);
723 
724 	if (dev == NULL && xref == 0)
725 		return (NULL);
726 
727 	/* Note that pic->pic_dev is never NULL on registered PIC. */
728 	SLIST_FOREACH(pic, &pic_list, pic_next) {
729 		if ((pic->pic_flags & FLAG_TYPE_MASK) !=
730 		    (flags & FLAG_TYPE_MASK))
731 			continue;
732 
733 		if (dev == NULL) {
734 			if (xref == pic->pic_xref)
735 				return (pic);
736 		} else if (xref == 0 || pic->pic_xref == 0) {
737 			if (dev == pic->pic_dev)
738 				return (pic);
739 		} else if (xref == pic->pic_xref && dev == pic->pic_dev)
740 				return (pic);
741 	}
742 	return (NULL);
743 }
744 
745 /*
746  *  Lookup interrupt controller.
747  */
748 static struct intr_pic *
749 pic_lookup(device_t dev, intptr_t xref, int flags)
750 {
751 	struct intr_pic *pic;
752 
753 	mtx_lock(&pic_list_lock);
754 	pic = pic_lookup_locked(dev, xref, flags);
755 	mtx_unlock(&pic_list_lock);
756 	return (pic);
757 }
758 
759 /*
760  *  Create interrupt controller.
761  */
762 static struct intr_pic *
763 pic_create(device_t dev, intptr_t xref, int flags)
764 {
765 	struct intr_pic *pic;
766 
767 	mtx_lock(&pic_list_lock);
768 	pic = pic_lookup_locked(dev, xref, flags);
769 	if (pic != NULL) {
770 		mtx_unlock(&pic_list_lock);
771 		return (pic);
772 	}
773 	pic = malloc(sizeof(*pic), M_INTRNG, M_NOWAIT | M_ZERO);
774 	if (pic == NULL) {
775 		mtx_unlock(&pic_list_lock);
776 		return (NULL);
777 	}
778 	pic->pic_xref = xref;
779 	pic->pic_dev = dev;
780 	pic->pic_flags = flags;
781 	mtx_init(&pic->pic_child_lock, "pic child lock", NULL, MTX_SPIN);
782 	SLIST_INSERT_HEAD(&pic_list, pic, pic_next);
783 	mtx_unlock(&pic_list_lock);
784 
785 	return (pic);
786 }
787 #ifdef notyet
788 /*
789  *  Destroy interrupt controller.
790  */
791 static void
792 pic_destroy(device_t dev, intptr_t xref, int flags)
793 {
794 	struct intr_pic *pic;
795 
796 	mtx_lock(&pic_list_lock);
797 	pic = pic_lookup_locked(dev, xref, flags);
798 	if (pic == NULL) {
799 		mtx_unlock(&pic_list_lock);
800 		return;
801 	}
802 	SLIST_REMOVE(&pic_list, pic, intr_pic, pic_next);
803 	mtx_unlock(&pic_list_lock);
804 
805 	free(pic, M_INTRNG);
806 }
807 #endif
808 /*
809  *  Register interrupt controller.
810  */
811 struct intr_pic *
812 intr_pic_register(device_t dev, intptr_t xref)
813 {
814 	struct intr_pic *pic;
815 
816 	if (dev == NULL)
817 		return (NULL);
818 	pic = pic_create(dev, xref, FLAG_PIC);
819 	if (pic == NULL)
820 		return (NULL);
821 
822 	debugf("PIC %p registered for %s <dev %p, xref %jx>\n", pic,
823 	    device_get_nameunit(dev), dev, (uintmax_t)xref);
824 	return (pic);
825 }
826 
827 /*
828  *  Unregister interrupt controller.
829  */
830 int
831 intr_pic_deregister(device_t dev, intptr_t xref)
832 {
833 
834 	panic("%s: not implemented", __func__);
835 }
836 
837 /*
838  *  Mark interrupt controller (itself) as a root one.
839  *
840  *  Note that only an interrupt controller can really know its position
841  *  in interrupt controller's tree. So root PIC must claim itself as a root.
842  *
843  *  In FDT case, according to ePAPR approved version 1.1 from 08 April 2011,
844  *  page 30:
845  *    "The root of the interrupt tree is determined when traversal
846  *     of the interrupt tree reaches an interrupt controller node without
847  *     an interrupts property and thus no explicit interrupt parent."
848  */
849 int
850 intr_pic_claim_root(device_t dev, intptr_t xref, intr_irq_filter_t *filter,
851     void *arg, u_int ipicount)
852 {
853 	struct intr_pic *pic;
854 
855 	pic = pic_lookup(dev, xref, FLAG_PIC);
856 	if (pic == NULL) {
857 		device_printf(dev, "not registered\n");
858 		return (EINVAL);
859 	}
860 
861 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_PIC,
862 	    ("%s: Found a non-PIC controller: %s", __func__,
863 	     device_get_name(pic->pic_dev)));
864 
865 	if (filter == NULL) {
866 		device_printf(dev, "filter missing\n");
867 		return (EINVAL);
868 	}
869 
870 	/*
871 	 * Only one interrupt controllers could be on the root for now.
872 	 * Note that we further suppose that there is not threaded interrupt
873 	 * routine (handler) on the root. See intr_irq_handler().
874 	 */
875 	if (intr_irq_root_dev != NULL) {
876 		device_printf(dev, "another root already set\n");
877 		return (EBUSY);
878 	}
879 
880 	intr_irq_root_dev = dev;
881 	irq_root_filter = filter;
882 	irq_root_arg = arg;
883 	irq_root_ipicount = ipicount;
884 
885 	debugf("irq root set to %s\n", device_get_nameunit(dev));
886 	return (0);
887 }
888 
889 /*
890  * Add a handler to manage a sub range of a parents interrupts.
891  */
892 struct intr_pic *
893 intr_pic_add_handler(device_t parent, struct intr_pic *pic,
894     intr_child_irq_filter_t *filter, void *arg, uintptr_t start,
895     uintptr_t length)
896 {
897 	struct intr_pic *parent_pic;
898 	struct intr_pic_child *newchild;
899 #ifdef INVARIANTS
900 	struct intr_pic_child *child;
901 #endif
902 
903 	/* Find the parent PIC */
904 	parent_pic = pic_lookup(parent, 0, FLAG_PIC);
905 	if (parent_pic == NULL)
906 		return (NULL);
907 
908 	newchild = malloc(sizeof(*newchild), M_INTRNG, M_WAITOK | M_ZERO);
909 	newchild->pc_pic = pic;
910 	newchild->pc_filter = filter;
911 	newchild->pc_filter_arg = arg;
912 	newchild->pc_start = start;
913 	newchild->pc_length = length;
914 
915 	mtx_lock_spin(&parent_pic->pic_child_lock);
916 #ifdef INVARIANTS
917 	SLIST_FOREACH(child, &parent_pic->pic_children, pc_next) {
918 		KASSERT(child->pc_pic != pic, ("%s: Adding a child PIC twice",
919 		    __func__));
920 	}
921 #endif
922 	SLIST_INSERT_HEAD(&parent_pic->pic_children, newchild, pc_next);
923 	mtx_unlock_spin(&parent_pic->pic_child_lock);
924 
925 	return (pic);
926 }
927 
928 static int
929 intr_resolve_irq(device_t dev, intptr_t xref, struct intr_map_data *data,
930     struct intr_irqsrc **isrc)
931 {
932 	struct intr_pic *pic;
933 	struct intr_map_data_msi *msi;
934 
935 	if (data == NULL)
936 		return (EINVAL);
937 
938 	pic = pic_lookup(dev, xref,
939 	    (data->type == INTR_MAP_DATA_MSI) ? FLAG_MSI : FLAG_PIC);
940 	if (pic == NULL)
941 		return (ESRCH);
942 
943 	switch (data->type) {
944 	case INTR_MAP_DATA_MSI:
945 		KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
946 		    ("%s: Found a non-MSI controller: %s", __func__,
947 		     device_get_name(pic->pic_dev)));
948 		msi = (struct intr_map_data_msi *)data;
949 		*isrc = msi->isrc;
950 		return (0);
951 
952 	default:
953 		KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_PIC,
954 		    ("%s: Found a non-PIC controller: %s", __func__,
955 		     device_get_name(pic->pic_dev)));
956 		return (PIC_MAP_INTR(pic->pic_dev, data, isrc));
957 	}
958 }
959 
960 bool
961 intr_is_per_cpu(struct resource *res)
962 {
963 	u_int res_id;
964 	struct intr_irqsrc *isrc;
965 
966 	res_id = (u_int)rman_get_start(res);
967 	isrc = intr_map_get_isrc(res_id);
968 
969 	if (isrc == NULL)
970 		panic("Attempt to get isrc for non-active resource id: %u\n",
971 		    res_id);
972 	return ((isrc->isrc_flags & INTR_ISRCF_PPI) != 0);
973 }
974 
975 int
976 intr_activate_irq(device_t dev, struct resource *res)
977 {
978 	device_t map_dev;
979 	intptr_t map_xref;
980 	struct intr_map_data *data;
981 	struct intr_irqsrc *isrc;
982 	u_int res_id;
983 	int error;
984 
985 	KASSERT(rman_get_start(res) == rman_get_end(res),
986 	    ("%s: more interrupts in resource", __func__));
987 
988 	res_id = (u_int)rman_get_start(res);
989 	if (intr_map_get_isrc(res_id) != NULL)
990 		panic("Attempt to double activation of resource id: %u\n",
991 		    res_id);
992 	intr_map_copy_map_data(res_id, &map_dev, &map_xref, &data);
993 	error = intr_resolve_irq(map_dev, map_xref, data, &isrc);
994 	if (error != 0) {
995 		free(data, M_INTRNG);
996 		/* XXX TODO DISCONECTED PICs */
997 		/* if (error == EINVAL) return(0); */
998 		return (error);
999 	}
1000 	intr_map_set_isrc(res_id, isrc);
1001 	rman_set_virtual(res, data);
1002 	return (PIC_ACTIVATE_INTR(isrc->isrc_dev, isrc, res, data));
1003 }
1004 
1005 int
1006 intr_deactivate_irq(device_t dev, struct resource *res)
1007 {
1008 	struct intr_map_data *data;
1009 	struct intr_irqsrc *isrc;
1010 	u_int res_id;
1011 	int error;
1012 
1013 	KASSERT(rman_get_start(res) == rman_get_end(res),
1014 	    ("%s: more interrupts in resource", __func__));
1015 
1016 	res_id = (u_int)rman_get_start(res);
1017 	isrc = intr_map_get_isrc(res_id);
1018 	if (isrc == NULL)
1019 		panic("Attempt to deactivate non-active resource id: %u\n",
1020 		    res_id);
1021 
1022 	data = rman_get_virtual(res);
1023 	error = PIC_DEACTIVATE_INTR(isrc->isrc_dev, isrc, res, data);
1024 	intr_map_set_isrc(res_id, NULL);
1025 	rman_set_virtual(res, NULL);
1026 	free(data, M_INTRNG);
1027 	return (error);
1028 }
1029 
1030 int
1031 intr_setup_irq(device_t dev, struct resource *res, driver_filter_t filt,
1032     driver_intr_t hand, void *arg, int flags, void **cookiep)
1033 {
1034 	int error;
1035 	struct intr_map_data *data;
1036 	struct intr_irqsrc *isrc;
1037 	const char *name;
1038 	u_int res_id;
1039 
1040 	KASSERT(rman_get_start(res) == rman_get_end(res),
1041 	    ("%s: more interrupts in resource", __func__));
1042 
1043 	res_id = (u_int)rman_get_start(res);
1044 	isrc = intr_map_get_isrc(res_id);
1045 	if (isrc == NULL) {
1046 		/* XXX TODO DISCONECTED PICs */
1047 		return (EINVAL);
1048 	}
1049 
1050 	data = rman_get_virtual(res);
1051 	name = device_get_nameunit(dev);
1052 
1053 #ifdef INTR_SOLO
1054 	/*
1055 	 * Standard handling is done through MI interrupt framework. However,
1056 	 * some interrupts could request solely own special handling. This
1057 	 * non standard handling can be used for interrupt controllers without
1058 	 * handler (filter only), so in case that interrupt controllers are
1059 	 * chained, MI interrupt framework is called only in leaf controller.
1060 	 *
1061 	 * Note that root interrupt controller routine is served as well,
1062 	 * however in intr_irq_handler(), i.e. main system dispatch routine.
1063 	 */
1064 	if (flags & INTR_SOLO && hand != NULL) {
1065 		debugf("irq %u cannot solo on %s\n", irq, name);
1066 		return (EINVAL);
1067 	}
1068 
1069 	if (flags & INTR_SOLO) {
1070 		error = iscr_setup_filter(isrc, name, (intr_irq_filter_t *)filt,
1071 		    arg, cookiep);
1072 		debugf("irq %u setup filter error %d on %s\n", isrc->isrc_irq, error,
1073 		    name);
1074 	} else
1075 #endif
1076 		{
1077 		error = isrc_add_handler(isrc, name, filt, hand, arg, flags,
1078 		    cookiep);
1079 		debugf("irq %u add handler error %d on %s\n", isrc->isrc_irq, error, name);
1080 	}
1081 	if (error != 0)
1082 		return (error);
1083 
1084 	mtx_lock(&isrc_table_lock);
1085 	error = PIC_SETUP_INTR(isrc->isrc_dev, isrc, res, data);
1086 	if (error == 0) {
1087 		isrc->isrc_handlers++;
1088 		if (isrc->isrc_handlers == 1)
1089 			PIC_ENABLE_INTR(isrc->isrc_dev, isrc);
1090 	}
1091 	mtx_unlock(&isrc_table_lock);
1092 	if (error != 0)
1093 		intr_event_remove_handler(*cookiep);
1094 	return (error);
1095 }
1096 
1097 int
1098 intr_teardown_irq(device_t dev, struct resource *res, void *cookie)
1099 {
1100 	int error;
1101 	struct intr_map_data *data;
1102 	struct intr_irqsrc *isrc;
1103 	u_int res_id;
1104 
1105 	KASSERT(rman_get_start(res) == rman_get_end(res),
1106 	    ("%s: more interrupts in resource", __func__));
1107 
1108 	res_id = (u_int)rman_get_start(res);
1109 	isrc = intr_map_get_isrc(res_id);
1110 	if (isrc == NULL || isrc->isrc_handlers == 0)
1111 		return (EINVAL);
1112 
1113 	data = rman_get_virtual(res);
1114 
1115 #ifdef INTR_SOLO
1116 	if (isrc->isrc_filter != NULL) {
1117 		if (isrc != cookie)
1118 			return (EINVAL);
1119 
1120 		mtx_lock(&isrc_table_lock);
1121 		isrc->isrc_filter = NULL;
1122 		isrc->isrc_arg = NULL;
1123 		isrc->isrc_handlers = 0;
1124 		PIC_DISABLE_INTR(isrc->isrc_dev, isrc);
1125 		PIC_TEARDOWN_INTR(isrc->isrc_dev, isrc, res, data);
1126 		isrc_update_name(isrc, NULL);
1127 		mtx_unlock(&isrc_table_lock);
1128 		return (0);
1129 	}
1130 #endif
1131 	if (isrc != intr_handler_source(cookie))
1132 		return (EINVAL);
1133 
1134 	error = intr_event_remove_handler(cookie);
1135 	if (error == 0) {
1136 		mtx_lock(&isrc_table_lock);
1137 		isrc->isrc_handlers--;
1138 		if (isrc->isrc_handlers == 0)
1139 			PIC_DISABLE_INTR(isrc->isrc_dev, isrc);
1140 		PIC_TEARDOWN_INTR(isrc->isrc_dev, isrc, res, data);
1141 		intrcnt_updatename(isrc);
1142 		mtx_unlock(&isrc_table_lock);
1143 	}
1144 	return (error);
1145 }
1146 
1147 int
1148 intr_describe_irq(device_t dev, struct resource *res, void *cookie,
1149     const char *descr)
1150 {
1151 	int error;
1152 	struct intr_irqsrc *isrc;
1153 	u_int res_id;
1154 
1155 	KASSERT(rman_get_start(res) == rman_get_end(res),
1156 	    ("%s: more interrupts in resource", __func__));
1157 
1158 	res_id = (u_int)rman_get_start(res);
1159 	isrc = intr_map_get_isrc(res_id);
1160 	if (isrc == NULL || isrc->isrc_handlers == 0)
1161 		return (EINVAL);
1162 #ifdef INTR_SOLO
1163 	if (isrc->isrc_filter != NULL) {
1164 		if (isrc != cookie)
1165 			return (EINVAL);
1166 
1167 		mtx_lock(&isrc_table_lock);
1168 		isrc_update_name(isrc, descr);
1169 		mtx_unlock(&isrc_table_lock);
1170 		return (0);
1171 	}
1172 #endif
1173 	error = intr_event_describe_handler(isrc->isrc_event, cookie, descr);
1174 	if (error == 0) {
1175 		mtx_lock(&isrc_table_lock);
1176 		intrcnt_updatename(isrc);
1177 		mtx_unlock(&isrc_table_lock);
1178 	}
1179 	return (error);
1180 }
1181 
1182 #ifdef SMP
1183 int
1184 intr_bind_irq(device_t dev, struct resource *res, int cpu)
1185 {
1186 	struct intr_irqsrc *isrc;
1187 	u_int res_id;
1188 
1189 	KASSERT(rman_get_start(res) == rman_get_end(res),
1190 	    ("%s: more interrupts in resource", __func__));
1191 
1192 	res_id = (u_int)rman_get_start(res);
1193 	isrc = intr_map_get_isrc(res_id);
1194 	if (isrc == NULL || isrc->isrc_handlers == 0)
1195 		return (EINVAL);
1196 #ifdef INTR_SOLO
1197 	if (isrc->isrc_filter != NULL)
1198 		return (intr_isrc_assign_cpu(isrc, cpu));
1199 #endif
1200 	return (intr_event_bind(isrc->isrc_event, cpu));
1201 }
1202 
1203 /*
1204  * Return the CPU that the next interrupt source should use.
1205  * For now just returns the next CPU according to round-robin.
1206  */
1207 u_int
1208 intr_irq_next_cpu(u_int last_cpu, cpuset_t *cpumask)
1209 {
1210 	u_int cpu;
1211 
1212 	KASSERT(!CPU_EMPTY(cpumask), ("%s: Empty CPU mask", __func__));
1213 	if (!irq_assign_cpu || mp_ncpus == 1) {
1214 		cpu = PCPU_GET(cpuid);
1215 
1216 		if (CPU_ISSET(cpu, cpumask))
1217 			return (curcpu);
1218 
1219 		return (CPU_FFS(cpumask) - 1);
1220 	}
1221 
1222 	do {
1223 		last_cpu++;
1224 		if (last_cpu > mp_maxid)
1225 			last_cpu = 0;
1226 	} while (!CPU_ISSET(last_cpu, cpumask));
1227 	return (last_cpu);
1228 }
1229 
1230 #ifndef EARLY_AP_STARTUP
1231 /*
1232  *  Distribute all the interrupt sources among the available
1233  *  CPUs once the AP's have been launched.
1234  */
1235 static void
1236 intr_irq_shuffle(void *arg __unused)
1237 {
1238 	struct intr_irqsrc *isrc;
1239 	u_int i;
1240 
1241 	if (mp_ncpus == 1)
1242 		return;
1243 
1244 	mtx_lock(&isrc_table_lock);
1245 	irq_assign_cpu = true;
1246 	for (i = 0; i < intr_nirq; i++) {
1247 		isrc = irq_sources[i];
1248 		if (isrc == NULL || isrc->isrc_handlers == 0 ||
1249 		    isrc->isrc_flags & (INTR_ISRCF_PPI | INTR_ISRCF_IPI))
1250 			continue;
1251 
1252 		if (isrc->isrc_event != NULL &&
1253 		    isrc->isrc_flags & INTR_ISRCF_BOUND &&
1254 		    isrc->isrc_event->ie_cpu != CPU_FFS(&isrc->isrc_cpu) - 1)
1255 			panic("%s: CPU inconsistency", __func__);
1256 
1257 		if ((isrc->isrc_flags & INTR_ISRCF_BOUND) == 0)
1258 			CPU_ZERO(&isrc->isrc_cpu); /* start again */
1259 
1260 		/*
1261 		 * We are in wicked position here if the following call fails
1262 		 * for bound ISRC. The best thing we can do is to clear
1263 		 * isrc_cpu so inconsistency with ie_cpu will be detectable.
1264 		 */
1265 		if (PIC_BIND_INTR(isrc->isrc_dev, isrc) != 0)
1266 			CPU_ZERO(&isrc->isrc_cpu);
1267 	}
1268 	mtx_unlock(&isrc_table_lock);
1269 }
1270 SYSINIT(intr_irq_shuffle, SI_SUB_SMP, SI_ORDER_SECOND, intr_irq_shuffle, NULL);
1271 #endif /* !EARLY_AP_STARTUP */
1272 
1273 #else
1274 u_int
1275 intr_irq_next_cpu(u_int current_cpu, cpuset_t *cpumask)
1276 {
1277 
1278 	return (PCPU_GET(cpuid));
1279 }
1280 #endif /* SMP */
1281 
1282 /*
1283  * Allocate memory for new intr_map_data structure.
1284  * Initialize common fields.
1285  */
1286 struct intr_map_data *
1287 intr_alloc_map_data(enum intr_map_data_type type, size_t len, int flags)
1288 {
1289 	struct intr_map_data *data;
1290 
1291 	data = malloc(len, M_INTRNG, flags);
1292 	data->type = type;
1293 	data->len = len;
1294 	return (data);
1295 }
1296 
1297 void intr_free_intr_map_data(struct intr_map_data *data)
1298 {
1299 
1300 	free(data, M_INTRNG);
1301 }
1302 
1303 /*
1304  *  Register a MSI/MSI-X interrupt controller
1305  */
1306 int
1307 intr_msi_register(device_t dev, intptr_t xref)
1308 {
1309 	struct intr_pic *pic;
1310 
1311 	if (dev == NULL)
1312 		return (EINVAL);
1313 	pic = pic_create(dev, xref, FLAG_MSI);
1314 	if (pic == NULL)
1315 		return (ENOMEM);
1316 
1317 	debugf("PIC %p registered for %s <dev %p, xref %jx>\n", pic,
1318 	    device_get_nameunit(dev), dev, (uintmax_t)xref);
1319 	return (0);
1320 }
1321 
1322 int
1323 intr_alloc_msi(device_t pci, device_t child, intptr_t xref, int count,
1324     int maxcount, int *irqs)
1325 {
1326 	struct iommu_domain *domain;
1327 	struct intr_irqsrc **isrc;
1328 	struct intr_pic *pic;
1329 	device_t pdev;
1330 	struct intr_map_data_msi *msi;
1331 	int err, i;
1332 
1333 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1334 	if (pic == NULL)
1335 		return (ESRCH);
1336 
1337 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1338 	    ("%s: Found a non-MSI controller: %s", __func__,
1339 	     device_get_name(pic->pic_dev)));
1340 
1341 	/*
1342 	 * If this is the first time we have used this context ask the
1343 	 * interrupt controller to map memory the msi source will need.
1344 	 */
1345 	err = MSI_IOMMU_INIT(pic->pic_dev, child, &domain);
1346 	if (err != 0)
1347 		return (err);
1348 
1349 	isrc = malloc(sizeof(*isrc) * count, M_INTRNG, M_WAITOK);
1350 	err = MSI_ALLOC_MSI(pic->pic_dev, child, count, maxcount, &pdev, isrc);
1351 	if (err != 0) {
1352 		free(isrc, M_INTRNG);
1353 		return (err);
1354 	}
1355 
1356 	for (i = 0; i < count; i++) {
1357 		isrc[i]->isrc_iommu = domain;
1358 		msi = (struct intr_map_data_msi *)intr_alloc_map_data(
1359 		    INTR_MAP_DATA_MSI, sizeof(*msi), M_WAITOK | M_ZERO);
1360 		msi-> isrc = isrc[i];
1361 
1362 		irqs[i] = intr_map_irq(pic->pic_dev, xref,
1363 		    (struct intr_map_data *)msi);
1364 	}
1365 	free(isrc, M_INTRNG);
1366 
1367 	return (err);
1368 }
1369 
1370 int
1371 intr_release_msi(device_t pci, device_t child, intptr_t xref, int count,
1372     int *irqs)
1373 {
1374 	struct intr_irqsrc **isrc;
1375 	struct intr_pic *pic;
1376 	struct intr_map_data_msi *msi;
1377 	int i, err;
1378 
1379 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1380 	if (pic == NULL)
1381 		return (ESRCH);
1382 
1383 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1384 	    ("%s: Found a non-MSI controller: %s", __func__,
1385 	     device_get_name(pic->pic_dev)));
1386 
1387 	isrc = malloc(sizeof(*isrc) * count, M_INTRNG, M_WAITOK);
1388 
1389 	for (i = 0; i < count; i++) {
1390 		msi = (struct intr_map_data_msi *)
1391 		    intr_map_get_map_data(irqs[i]);
1392 		KASSERT(msi->hdr.type == INTR_MAP_DATA_MSI,
1393 		    ("%s: irq %d map data is not MSI", __func__,
1394 		    irqs[i]));
1395 		isrc[i] = msi->isrc;
1396 	}
1397 
1398 	MSI_IOMMU_DEINIT(pic->pic_dev, child);
1399 
1400 	err = MSI_RELEASE_MSI(pic->pic_dev, child, count, isrc);
1401 
1402 	for (i = 0; i < count; i++) {
1403 		if (isrc[i] != NULL)
1404 			intr_unmap_irq(irqs[i]);
1405 	}
1406 
1407 	free(isrc, M_INTRNG);
1408 	return (err);
1409 }
1410 
1411 int
1412 intr_alloc_msix(device_t pci, device_t child, intptr_t xref, int *irq)
1413 {
1414 	struct iommu_domain *domain;
1415 	struct intr_irqsrc *isrc;
1416 	struct intr_pic *pic;
1417 	device_t pdev;
1418 	struct intr_map_data_msi *msi;
1419 	int err;
1420 
1421 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1422 	if (pic == NULL)
1423 		return (ESRCH);
1424 
1425 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1426 	    ("%s: Found a non-MSI controller: %s", __func__,
1427 	     device_get_name(pic->pic_dev)));
1428 
1429 	/*
1430 	 * If this is the first time we have used this context ask the
1431 	 * interrupt controller to map memory the msi source will need.
1432 	 */
1433 	err = MSI_IOMMU_INIT(pic->pic_dev, child, &domain);
1434 	if (err != 0)
1435 		return (err);
1436 
1437 	err = MSI_ALLOC_MSIX(pic->pic_dev, child, &pdev, &isrc);
1438 	if (err != 0)
1439 		return (err);
1440 
1441 	isrc->isrc_iommu = domain;
1442 	msi = (struct intr_map_data_msi *)intr_alloc_map_data(
1443 		    INTR_MAP_DATA_MSI, sizeof(*msi), M_WAITOK | M_ZERO);
1444 	msi->isrc = isrc;
1445 	*irq = intr_map_irq(pic->pic_dev, xref, (struct intr_map_data *)msi);
1446 	return (0);
1447 }
1448 
1449 int
1450 intr_release_msix(device_t pci, device_t child, intptr_t xref, int irq)
1451 {
1452 	struct intr_irqsrc *isrc;
1453 	struct intr_pic *pic;
1454 	struct intr_map_data_msi *msi;
1455 	int err;
1456 
1457 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1458 	if (pic == NULL)
1459 		return (ESRCH);
1460 
1461 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1462 	    ("%s: Found a non-MSI controller: %s", __func__,
1463 	     device_get_name(pic->pic_dev)));
1464 
1465 	msi = (struct intr_map_data_msi *)
1466 	    intr_map_get_map_data(irq);
1467 	KASSERT(msi->hdr.type == INTR_MAP_DATA_MSI,
1468 	    ("%s: irq %d map data is not MSI", __func__,
1469 	    irq));
1470 	isrc = msi->isrc;
1471 	if (isrc == NULL) {
1472 		intr_unmap_irq(irq);
1473 		return (EINVAL);
1474 	}
1475 
1476 	MSI_IOMMU_DEINIT(pic->pic_dev, child);
1477 
1478 	err = MSI_RELEASE_MSIX(pic->pic_dev, child, isrc);
1479 	intr_unmap_irq(irq);
1480 
1481 	return (err);
1482 }
1483 
1484 int
1485 intr_map_msi(device_t pci, device_t child, intptr_t xref, int irq,
1486     uint64_t *addr, uint32_t *data)
1487 {
1488 	struct intr_irqsrc *isrc;
1489 	struct intr_pic *pic;
1490 	int err;
1491 
1492 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1493 	if (pic == NULL)
1494 		return (ESRCH);
1495 
1496 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1497 	    ("%s: Found a non-MSI controller: %s", __func__,
1498 	     device_get_name(pic->pic_dev)));
1499 
1500 	isrc = intr_map_get_isrc(irq);
1501 	if (isrc == NULL)
1502 		return (EINVAL);
1503 
1504 	err = MSI_MAP_MSI(pic->pic_dev, child, isrc, addr, data);
1505 
1506 #ifdef IOMMU
1507 	if (isrc->isrc_iommu != NULL)
1508 		iommu_translate_msi(isrc->isrc_iommu, addr);
1509 #endif
1510 
1511 	return (err);
1512 }
1513 
1514 void dosoftints(void);
1515 void
1516 dosoftints(void)
1517 {
1518 }
1519 
1520 #ifdef SMP
1521 /*
1522  *  Init interrupt controller on another CPU.
1523  */
1524 void
1525 intr_pic_init_secondary(void)
1526 {
1527 
1528 	/*
1529 	 * QQQ: Only root PIC is aware of other CPUs ???
1530 	 */
1531 	KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__));
1532 
1533 	//mtx_lock(&isrc_table_lock);
1534 	PIC_INIT_SECONDARY(intr_irq_root_dev);
1535 	//mtx_unlock(&isrc_table_lock);
1536 }
1537 #endif
1538 
1539 #ifdef DDB
1540 DB_SHOW_COMMAND(irqs, db_show_irqs)
1541 {
1542 	u_int i, irqsum;
1543 	u_long num;
1544 	struct intr_irqsrc *isrc;
1545 
1546 	for (irqsum = 0, i = 0; i < intr_nirq; i++) {
1547 		isrc = irq_sources[i];
1548 		if (isrc == NULL)
1549 			continue;
1550 
1551 		num = isrc->isrc_count != NULL ? isrc->isrc_count[0] : 0;
1552 		db_printf("irq%-3u <%s>: cpu %02lx%s cnt %lu\n", i,
1553 		    isrc->isrc_name, isrc->isrc_cpu.__bits[0],
1554 		    isrc->isrc_flags & INTR_ISRCF_BOUND ? " (bound)" : "", num);
1555 		irqsum += num;
1556 	}
1557 	db_printf("irq total %u\n", irqsum);
1558 }
1559 #endif
1560 
1561 /*
1562  * Interrupt mapping table functions.
1563  *
1564  * Please, keep this part separately, it can be transformed to
1565  * extension of standard resources.
1566  */
1567 struct intr_map_entry
1568 {
1569 	device_t 		dev;
1570 	intptr_t 		xref;
1571 	struct intr_map_data 	*map_data;
1572 	struct intr_irqsrc 	*isrc;
1573 	/* XXX TODO DISCONECTED PICs */
1574 	/*int			flags */
1575 };
1576 
1577 /* XXX Convert irq_map[] to dynamicaly expandable one. */
1578 static struct intr_map_entry **irq_map;
1579 static int irq_map_count;
1580 static int irq_map_first_free_idx;
1581 static struct mtx irq_map_lock;
1582 
1583 static struct intr_irqsrc *
1584 intr_map_get_isrc(u_int res_id)
1585 {
1586 	struct intr_irqsrc *isrc;
1587 
1588 	isrc = NULL;
1589 	mtx_lock(&irq_map_lock);
1590 	if (res_id < irq_map_count && irq_map[res_id] != NULL)
1591 		isrc = irq_map[res_id]->isrc;
1592 	mtx_unlock(&irq_map_lock);
1593 
1594 	return (isrc);
1595 }
1596 
1597 static void
1598 intr_map_set_isrc(u_int res_id, struct intr_irqsrc *isrc)
1599 {
1600 
1601 	mtx_lock(&irq_map_lock);
1602 	if (res_id < irq_map_count && irq_map[res_id] != NULL)
1603 		irq_map[res_id]->isrc = isrc;
1604 	mtx_unlock(&irq_map_lock);
1605 }
1606 
1607 /*
1608  * Get a copy of intr_map_entry data
1609  */
1610 static struct intr_map_data *
1611 intr_map_get_map_data(u_int res_id)
1612 {
1613 	struct intr_map_data *data;
1614 
1615 	data = NULL;
1616 	mtx_lock(&irq_map_lock);
1617 	if (res_id >= irq_map_count || irq_map[res_id] == NULL)
1618 		panic("Attempt to copy invalid resource id: %u\n", res_id);
1619 	data = irq_map[res_id]->map_data;
1620 	mtx_unlock(&irq_map_lock);
1621 
1622 	return (data);
1623 }
1624 
1625 /*
1626  * Get a copy of intr_map_entry data
1627  */
1628 static void
1629 intr_map_copy_map_data(u_int res_id, device_t *map_dev, intptr_t *map_xref,
1630     struct intr_map_data **data)
1631 {
1632 	size_t len;
1633 
1634 	len = 0;
1635 	mtx_lock(&irq_map_lock);
1636 	if (res_id >= irq_map_count || irq_map[res_id] == NULL)
1637 		panic("Attempt to copy invalid resource id: %u\n", res_id);
1638 	if (irq_map[res_id]->map_data != NULL)
1639 		len = irq_map[res_id]->map_data->len;
1640 	mtx_unlock(&irq_map_lock);
1641 
1642 	if (len == 0)
1643 		*data = NULL;
1644 	else
1645 		*data = malloc(len, M_INTRNG, M_WAITOK | M_ZERO);
1646 	mtx_lock(&irq_map_lock);
1647 	if (irq_map[res_id] == NULL)
1648 		panic("Attempt to copy invalid resource id: %u\n", res_id);
1649 	if (len != 0) {
1650 		if (len != irq_map[res_id]->map_data->len)
1651 			panic("Resource id: %u has changed.\n", res_id);
1652 		memcpy(*data, irq_map[res_id]->map_data, len);
1653 	}
1654 	*map_dev = irq_map[res_id]->dev;
1655 	*map_xref = irq_map[res_id]->xref;
1656 	mtx_unlock(&irq_map_lock);
1657 }
1658 
1659 /*
1660  * Allocate and fill new entry in irq_map table.
1661  */
1662 u_int
1663 intr_map_irq(device_t dev, intptr_t xref, struct intr_map_data *data)
1664 {
1665 	u_int i;
1666 	struct intr_map_entry *entry;
1667 
1668 	/* Prepare new entry first. */
1669 	entry = malloc(sizeof(*entry), M_INTRNG, M_WAITOK | M_ZERO);
1670 
1671 	entry->dev = dev;
1672 	entry->xref = xref;
1673 	entry->map_data = data;
1674 	entry->isrc = NULL;
1675 
1676 	mtx_lock(&irq_map_lock);
1677 	for (i = irq_map_first_free_idx; i < irq_map_count; i++) {
1678 		if (irq_map[i] == NULL) {
1679 			irq_map[i] = entry;
1680 			irq_map_first_free_idx = i + 1;
1681 			mtx_unlock(&irq_map_lock);
1682 			return (i);
1683 		}
1684 	}
1685 	mtx_unlock(&irq_map_lock);
1686 
1687 	/* XXX Expand irq_map table */
1688 	panic("IRQ mapping table is full.");
1689 }
1690 
1691 /*
1692  * Remove and free mapping entry.
1693  */
1694 void
1695 intr_unmap_irq(u_int res_id)
1696 {
1697 	struct intr_map_entry *entry;
1698 
1699 	mtx_lock(&irq_map_lock);
1700 	if ((res_id >= irq_map_count) || (irq_map[res_id] == NULL))
1701 		panic("Attempt to unmap invalid resource id: %u\n", res_id);
1702 	entry = irq_map[res_id];
1703 	irq_map[res_id] = NULL;
1704 	irq_map_first_free_idx = res_id;
1705 	mtx_unlock(&irq_map_lock);
1706 	intr_free_intr_map_data(entry->map_data);
1707 	free(entry, M_INTRNG);
1708 }
1709 
1710 /*
1711  * Clone mapping entry.
1712  */
1713 u_int
1714 intr_map_clone_irq(u_int old_res_id)
1715 {
1716 	device_t map_dev;
1717 	intptr_t map_xref;
1718 	struct intr_map_data *data;
1719 
1720 	intr_map_copy_map_data(old_res_id, &map_dev, &map_xref, &data);
1721 	return (intr_map_irq(map_dev, map_xref, data));
1722 }
1723 
1724 static void
1725 intr_map_init(void *dummy __unused)
1726 {
1727 
1728 	mtx_init(&irq_map_lock, "intr map table", NULL, MTX_DEF);
1729 
1730 	irq_map_count = 2 * intr_nirq;
1731 	irq_map = mallocarray(irq_map_count, sizeof(struct intr_map_entry*),
1732 	    M_INTRNG, M_WAITOK | M_ZERO);
1733 }
1734 SYSINIT(intr_map_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_map_init, NULL);
1735