xref: /freebsd/sys/kern/subr_intr.c (revision 2e3507c25e42292b45a5482e116d278f5515d04d)
1 /*-
2  * Copyright (c) 2015-2016 Svatopluk Kraus
3  * Copyright (c) 2015-2016 Michal Meloun
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 /*
30  *	New-style Interrupt Framework
31  *
32  *  TODO: - add support for disconnected PICs.
33  *        - to support IPI (PPI) enabling on other CPUs if already started.
34  *        - to complete things for removable PICs.
35  */
36 
37 #include "opt_ddb.h"
38 #include "opt_hwpmc_hooks.h"
39 #include "opt_iommu.h"
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/asan.h>
44 #include <sys/bitstring.h>
45 #include <sys/bus.h>
46 #include <sys/conf.h>
47 #include <sys/cpuset.h>
48 #include <sys/interrupt.h>
49 #include <sys/kernel.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/mutex.h>
53 #include <sys/proc.h>
54 #include <sys/queue.h>
55 #include <sys/rman.h>
56 #include <sys/sched.h>
57 #include <sys/smp.h>
58 #include <sys/sysctl.h>
59 #include <sys/syslog.h>
60 #include <sys/taskqueue.h>
61 #include <sys/tree.h>
62 #include <sys/vmmeter.h>
63 #ifdef HWPMC_HOOKS
64 #include <sys/pmckern.h>
65 #endif
66 
67 #include <machine/atomic.h>
68 #include <machine/cpu.h>
69 #include <machine/intr.h>
70 #include <machine/smp.h>
71 #include <machine/stdarg.h>
72 
73 #ifdef DDB
74 #include <ddb/ddb.h>
75 #endif
76 
77 #ifdef IOMMU
78 #include <dev/iommu/iommu_msi.h>
79 #endif
80 
81 #include "pic_if.h"
82 #include "msi_if.h"
83 
84 #define	INTRNAME_LEN	(2*MAXCOMLEN + 1)
85 
86 #ifdef DEBUG
87 #define debugf(fmt, args...) do { printf("%s(): ", __func__);	\
88     printf(fmt,##args); } while (0)
89 #else
90 #define debugf(fmt, args...)
91 #endif
92 
93 MALLOC_DECLARE(M_INTRNG);
94 MALLOC_DEFINE(M_INTRNG, "intr", "intr interrupt handling");
95 
96 /* Main interrupt handler called from assembler -> 'hidden' for C code. */
97 void intr_irq_handler(struct trapframe *tf);
98 
99 /* Root interrupt controller stuff. */
100 device_t intr_irq_root_dev;
101 static intr_irq_filter_t *irq_root_filter;
102 static void *irq_root_arg;
103 static u_int irq_root_ipicount;
104 
105 struct intr_pic_child {
106 	SLIST_ENTRY(intr_pic_child)	 pc_next;
107 	struct intr_pic			*pc_pic;
108 	intr_child_irq_filter_t		*pc_filter;
109 	void				*pc_filter_arg;
110 	uintptr_t			 pc_start;
111 	uintptr_t			 pc_length;
112 };
113 
114 /* Interrupt controller definition. */
115 struct intr_pic {
116 	SLIST_ENTRY(intr_pic)	pic_next;
117 	intptr_t		pic_xref;	/* hardware identification */
118 	device_t		pic_dev;
119 /* Only one of FLAG_PIC or FLAG_MSI may be set */
120 #define	FLAG_PIC	(1 << 0)
121 #define	FLAG_MSI	(1 << 1)
122 #define	FLAG_TYPE_MASK	(FLAG_PIC | FLAG_MSI)
123 	u_int			pic_flags;
124 	struct mtx		pic_child_lock;
125 	SLIST_HEAD(, intr_pic_child) pic_children;
126 };
127 
128 static struct mtx pic_list_lock;
129 static SLIST_HEAD(, intr_pic) pic_list;
130 
131 static struct intr_pic *pic_lookup(device_t dev, intptr_t xref, int flags);
132 
133 /* Interrupt source definition. */
134 static struct mtx isrc_table_lock;
135 static struct intr_irqsrc **irq_sources;
136 static u_int irq_next_free;
137 
138 #ifdef SMP
139 #ifdef EARLY_AP_STARTUP
140 static bool irq_assign_cpu = true;
141 #else
142 static bool irq_assign_cpu = false;
143 #endif
144 #endif
145 
146 u_int intr_nirq = NIRQ;
147 SYSCTL_UINT(_machdep, OID_AUTO, nirq, CTLFLAG_RDTUN, &intr_nirq, 0,
148     "Number of IRQs");
149 
150 /* Data for MI statistics reporting. */
151 u_long *intrcnt;
152 char *intrnames;
153 size_t sintrcnt;
154 size_t sintrnames;
155 int nintrcnt;
156 static bitstr_t *intrcnt_bitmap;
157 
158 static struct intr_irqsrc *intr_map_get_isrc(u_int res_id);
159 static void intr_map_set_isrc(u_int res_id, struct intr_irqsrc *isrc);
160 static struct intr_map_data * intr_map_get_map_data(u_int res_id);
161 static void intr_map_copy_map_data(u_int res_id, device_t *dev, intptr_t *xref,
162     struct intr_map_data **data);
163 
164 /*
165  *  Interrupt framework initialization routine.
166  */
167 static void
168 intr_irq_init(void *dummy __unused)
169 {
170 
171 	SLIST_INIT(&pic_list);
172 	mtx_init(&pic_list_lock, "intr pic list", NULL, MTX_DEF);
173 
174 	mtx_init(&isrc_table_lock, "intr isrc table", NULL, MTX_DEF);
175 
176 	/*
177 	 * - 2 counters for each I/O interrupt.
178 	 * - mp_maxid + 1 counters for each IPI counters for SMP.
179 	 */
180 	nintrcnt = intr_nirq * 2;
181 #ifdef SMP
182 	nintrcnt += INTR_IPI_COUNT * (mp_maxid + 1);
183 #endif
184 
185 	intrcnt = mallocarray(nintrcnt, sizeof(u_long), M_INTRNG,
186 	    M_WAITOK | M_ZERO);
187 	intrnames = mallocarray(nintrcnt, INTRNAME_LEN, M_INTRNG,
188 	    M_WAITOK | M_ZERO);
189 	sintrcnt = nintrcnt * sizeof(u_long);
190 	sintrnames = nintrcnt * INTRNAME_LEN;
191 
192 	/* Allocate the bitmap tracking counter allocations. */
193 	intrcnt_bitmap = bit_alloc(nintrcnt, M_INTRNG, M_WAITOK | M_ZERO);
194 
195 	irq_sources = mallocarray(intr_nirq, sizeof(struct intr_irqsrc*),
196 	    M_INTRNG, M_WAITOK | M_ZERO);
197 }
198 SYSINIT(intr_irq_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_irq_init, NULL);
199 
200 static void
201 intrcnt_setname(const char *name, int index)
202 {
203 
204 	snprintf(intrnames + INTRNAME_LEN * index, INTRNAME_LEN, "%-*s",
205 	    INTRNAME_LEN - 1, name);
206 }
207 
208 /*
209  *  Update name for interrupt source with interrupt event.
210  */
211 static void
212 intrcnt_updatename(struct intr_irqsrc *isrc)
213 {
214 
215 	/* QQQ: What about stray counter name? */
216 	mtx_assert(&isrc_table_lock, MA_OWNED);
217 	intrcnt_setname(isrc->isrc_event->ie_fullname, isrc->isrc_index);
218 }
219 
220 /*
221  *  Virtualization for interrupt source interrupt counter increment.
222  */
223 static inline void
224 isrc_increment_count(struct intr_irqsrc *isrc)
225 {
226 
227 	if (isrc->isrc_flags & INTR_ISRCF_PPI)
228 		atomic_add_long(&isrc->isrc_count[0], 1);
229 	else
230 		isrc->isrc_count[0]++;
231 }
232 
233 /*
234  *  Virtualization for interrupt source interrupt stray counter increment.
235  */
236 static inline void
237 isrc_increment_straycount(struct intr_irqsrc *isrc)
238 {
239 
240 	isrc->isrc_count[1]++;
241 }
242 
243 /*
244  *  Virtualization for interrupt source interrupt name update.
245  */
246 static void
247 isrc_update_name(struct intr_irqsrc *isrc, const char *name)
248 {
249 	char str[INTRNAME_LEN];
250 
251 	mtx_assert(&isrc_table_lock, MA_OWNED);
252 
253 	if (name != NULL) {
254 		snprintf(str, INTRNAME_LEN, "%s: %s", isrc->isrc_name, name);
255 		intrcnt_setname(str, isrc->isrc_index);
256 		snprintf(str, INTRNAME_LEN, "stray %s: %s", isrc->isrc_name,
257 		    name);
258 		intrcnt_setname(str, isrc->isrc_index + 1);
259 	} else {
260 		snprintf(str, INTRNAME_LEN, "%s:", isrc->isrc_name);
261 		intrcnt_setname(str, isrc->isrc_index);
262 		snprintf(str, INTRNAME_LEN, "stray %s:", isrc->isrc_name);
263 		intrcnt_setname(str, isrc->isrc_index + 1);
264 	}
265 }
266 
267 /*
268  *  Virtualization for interrupt source interrupt counters setup.
269  */
270 static void
271 isrc_setup_counters(struct intr_irqsrc *isrc)
272 {
273 	int index;
274 
275 	mtx_assert(&isrc_table_lock, MA_OWNED);
276 
277 	/*
278 	 * Allocate two counter values, the second tracking "stray" interrupts.
279 	 */
280 	bit_ffc_area(intrcnt_bitmap, nintrcnt, 2, &index);
281 	if (index == -1)
282 		panic("Failed to allocate 2 counters. Array exhausted?");
283 	bit_nset(intrcnt_bitmap, index, index + 1);
284 	isrc->isrc_index = index;
285 	isrc->isrc_count = &intrcnt[index];
286 	isrc_update_name(isrc, NULL);
287 }
288 
289 /*
290  *  Virtualization for interrupt source interrupt counters release.
291  */
292 static void
293 isrc_release_counters(struct intr_irqsrc *isrc)
294 {
295 	int idx = isrc->isrc_index;
296 
297 	mtx_assert(&isrc_table_lock, MA_OWNED);
298 
299 	bit_nclear(intrcnt_bitmap, idx, idx + 1);
300 }
301 
302 #ifdef SMP
303 /*
304  *  Virtualization for interrupt source IPI counters setup.
305  */
306 u_long *
307 intr_ipi_setup_counters(const char *name)
308 {
309 	u_int index, i;
310 	char str[INTRNAME_LEN];
311 
312 	mtx_lock(&isrc_table_lock);
313 
314 	/*
315 	 * We should never have a problem finding mp_maxid + 1 contiguous
316 	 * counters, in practice. Interrupts will be allocated sequentially
317 	 * during boot, so the array should fill from low to high index. Once
318 	 * reserved, the IPI counters will never be released. Similarly, we
319 	 * will not need to allocate more IPIs once the system is running.
320 	 */
321 	bit_ffc_area(intrcnt_bitmap, nintrcnt, mp_maxid + 1, &index);
322 	if (index == -1)
323 		panic("Failed to allocate %d counters. Array exhausted?",
324 		    mp_maxid + 1);
325 	bit_nset(intrcnt_bitmap, index, index + mp_maxid);
326 	for (i = 0; i < mp_maxid + 1; i++) {
327 		snprintf(str, INTRNAME_LEN, "cpu%d:%s", i, name);
328 		intrcnt_setname(str, index + i);
329 	}
330 	mtx_unlock(&isrc_table_lock);
331 	return (&intrcnt[index]);
332 }
333 #endif
334 
335 /*
336  *  Main interrupt dispatch handler. It's called straight
337  *  from the assembler, where CPU interrupt is served.
338  */
339 void
340 intr_irq_handler(struct trapframe *tf)
341 {
342 	struct trapframe * oldframe;
343 	struct thread * td;
344 
345 	KASSERT(irq_root_filter != NULL, ("%s: no filter", __func__));
346 
347 	kasan_mark(tf, sizeof(*tf), sizeof(*tf), 0);
348 
349 	VM_CNT_INC(v_intr);
350 	critical_enter();
351 	td = curthread;
352 	oldframe = td->td_intr_frame;
353 	td->td_intr_frame = tf;
354 	irq_root_filter(irq_root_arg);
355 	td->td_intr_frame = oldframe;
356 	critical_exit();
357 #ifdef HWPMC_HOOKS
358 	if (pmc_hook && TRAPF_USERMODE(tf) &&
359 	    (PCPU_GET(curthread)->td_pflags & TDP_CALLCHAIN))
360 		pmc_hook(PCPU_GET(curthread), PMC_FN_USER_CALLCHAIN, tf);
361 #endif
362 }
363 
364 int
365 intr_child_irq_handler(struct intr_pic *parent, uintptr_t irq)
366 {
367 	struct intr_pic_child *child;
368 	bool found;
369 
370 	found = false;
371 	mtx_lock_spin(&parent->pic_child_lock);
372 	SLIST_FOREACH(child, &parent->pic_children, pc_next) {
373 		if (child->pc_start <= irq &&
374 		    irq < (child->pc_start + child->pc_length)) {
375 			found = true;
376 			break;
377 		}
378 	}
379 	mtx_unlock_spin(&parent->pic_child_lock);
380 
381 	if (found)
382 		return (child->pc_filter(child->pc_filter_arg, irq));
383 
384 	return (FILTER_STRAY);
385 }
386 
387 /*
388  *  interrupt controller dispatch function for interrupts. It should
389  *  be called straight from the interrupt controller, when associated interrupt
390  *  source is learned.
391  */
392 int
393 intr_isrc_dispatch(struct intr_irqsrc *isrc, struct trapframe *tf)
394 {
395 
396 	KASSERT(isrc != NULL, ("%s: no source", __func__));
397 
398 	isrc_increment_count(isrc);
399 
400 #ifdef INTR_SOLO
401 	if (isrc->isrc_filter != NULL) {
402 		int error;
403 		error = isrc->isrc_filter(isrc->isrc_arg, tf);
404 		PIC_POST_FILTER(isrc->isrc_dev, isrc);
405 		if (error == FILTER_HANDLED)
406 			return (0);
407 	} else
408 #endif
409 	if (isrc->isrc_event != NULL) {
410 		if (intr_event_handle(isrc->isrc_event, tf) == 0)
411 			return (0);
412 	}
413 
414 	isrc_increment_straycount(isrc);
415 	return (EINVAL);
416 }
417 
418 /*
419  *  Alloc unique interrupt number (resource handle) for interrupt source.
420  *
421  *  There could be various strategies how to allocate free interrupt number
422  *  (resource handle) for new interrupt source.
423  *
424  *  1. Handles are always allocated forward, so handles are not recycled
425  *     immediately. However, if only one free handle left which is reused
426  *     constantly...
427  */
428 static inline int
429 isrc_alloc_irq(struct intr_irqsrc *isrc)
430 {
431 	u_int irq;
432 
433 	mtx_assert(&isrc_table_lock, MA_OWNED);
434 
435 	if (irq_next_free >= intr_nirq)
436 		return (ENOSPC);
437 
438 	for (irq = irq_next_free; irq < intr_nirq; irq++) {
439 		if (irq_sources[irq] == NULL)
440 			goto found;
441 	}
442 	for (irq = 0; irq < irq_next_free; irq++) {
443 		if (irq_sources[irq] == NULL)
444 			goto found;
445 	}
446 
447 	irq_next_free = intr_nirq;
448 	return (ENOSPC);
449 
450 found:
451 	isrc->isrc_irq = irq;
452 	irq_sources[irq] = isrc;
453 
454 	irq_next_free = irq + 1;
455 	if (irq_next_free >= intr_nirq)
456 		irq_next_free = 0;
457 	return (0);
458 }
459 
460 /*
461  *  Free unique interrupt number (resource handle) from interrupt source.
462  */
463 static inline int
464 isrc_free_irq(struct intr_irqsrc *isrc)
465 {
466 
467 	mtx_assert(&isrc_table_lock, MA_OWNED);
468 
469 	if (isrc->isrc_irq >= intr_nirq)
470 		return (EINVAL);
471 	if (irq_sources[isrc->isrc_irq] != isrc)
472 		return (EINVAL);
473 
474 	irq_sources[isrc->isrc_irq] = NULL;
475 	isrc->isrc_irq = INTR_IRQ_INVALID;	/* just to be safe */
476 
477 	/*
478 	 * If we are recovering from the state irq_sources table is full,
479 	 * then the following allocation should check the entire table. This
480 	 * will ensure maximum separation of allocation order from release
481 	 * order.
482 	 */
483 	if (irq_next_free >= intr_nirq)
484 		irq_next_free = 0;
485 
486 	return (0);
487 }
488 
489 /*
490  *  Initialize interrupt source and register it into global interrupt table.
491  */
492 int
493 intr_isrc_register(struct intr_irqsrc *isrc, device_t dev, u_int flags,
494     const char *fmt, ...)
495 {
496 	int error;
497 	va_list ap;
498 
499 	bzero(isrc, sizeof(struct intr_irqsrc));
500 	isrc->isrc_dev = dev;
501 	isrc->isrc_irq = INTR_IRQ_INVALID;	/* just to be safe */
502 	isrc->isrc_flags = flags;
503 
504 	va_start(ap, fmt);
505 	vsnprintf(isrc->isrc_name, INTR_ISRC_NAMELEN, fmt, ap);
506 	va_end(ap);
507 
508 	mtx_lock(&isrc_table_lock);
509 	error = isrc_alloc_irq(isrc);
510 	if (error != 0) {
511 		mtx_unlock(&isrc_table_lock);
512 		return (error);
513 	}
514 	/*
515 	 * Setup interrupt counters, but not for IPI sources. Those are setup
516 	 * later and only for used ones (up to INTR_IPI_COUNT) to not exhaust
517 	 * our counter pool.
518 	 */
519 	if ((isrc->isrc_flags & INTR_ISRCF_IPI) == 0)
520 		isrc_setup_counters(isrc);
521 	mtx_unlock(&isrc_table_lock);
522 	return (0);
523 }
524 
525 /*
526  *  Deregister interrupt source from global interrupt table.
527  */
528 int
529 intr_isrc_deregister(struct intr_irqsrc *isrc)
530 {
531 	int error;
532 
533 	mtx_lock(&isrc_table_lock);
534 	if ((isrc->isrc_flags & INTR_ISRCF_IPI) == 0)
535 		isrc_release_counters(isrc);
536 	error = isrc_free_irq(isrc);
537 	mtx_unlock(&isrc_table_lock);
538 	return (error);
539 }
540 
541 #ifdef SMP
542 /*
543  *  A support function for a PIC to decide if provided ISRC should be inited
544  *  on given cpu. The logic of INTR_ISRCF_BOUND flag and isrc_cpu member of
545  *  struct intr_irqsrc is the following:
546  *
547  *     If INTR_ISRCF_BOUND is set, the ISRC should be inited only on cpus
548  *     set in isrc_cpu. If not, the ISRC should be inited on every cpu and
549  *     isrc_cpu is kept consistent with it. Thus isrc_cpu is always correct.
550  */
551 bool
552 intr_isrc_init_on_cpu(struct intr_irqsrc *isrc, u_int cpu)
553 {
554 
555 	if (isrc->isrc_handlers == 0)
556 		return (false);
557 	if ((isrc->isrc_flags & (INTR_ISRCF_PPI | INTR_ISRCF_IPI)) == 0)
558 		return (false);
559 	if (isrc->isrc_flags & INTR_ISRCF_BOUND)
560 		return (CPU_ISSET(cpu, &isrc->isrc_cpu));
561 
562 	CPU_SET(cpu, &isrc->isrc_cpu);
563 	return (true);
564 }
565 #endif
566 
567 #ifdef INTR_SOLO
568 /*
569  *  Setup filter into interrupt source.
570  */
571 static int
572 iscr_setup_filter(struct intr_irqsrc *isrc, const char *name,
573     intr_irq_filter_t *filter, void *arg, void **cookiep)
574 {
575 
576 	if (filter == NULL)
577 		return (EINVAL);
578 
579 	mtx_lock(&isrc_table_lock);
580 	/*
581 	 * Make sure that we do not mix the two ways
582 	 * how we handle interrupt sources.
583 	 */
584 	if (isrc->isrc_filter != NULL || isrc->isrc_event != NULL) {
585 		mtx_unlock(&isrc_table_lock);
586 		return (EBUSY);
587 	}
588 	isrc->isrc_filter = filter;
589 	isrc->isrc_arg = arg;
590 	isrc_update_name(isrc, name);
591 	mtx_unlock(&isrc_table_lock);
592 
593 	*cookiep = isrc;
594 	return (0);
595 }
596 #endif
597 
598 /*
599  *  Interrupt source pre_ithread method for MI interrupt framework.
600  */
601 static void
602 intr_isrc_pre_ithread(void *arg)
603 {
604 	struct intr_irqsrc *isrc = arg;
605 
606 	PIC_PRE_ITHREAD(isrc->isrc_dev, isrc);
607 }
608 
609 /*
610  *  Interrupt source post_ithread method for MI interrupt framework.
611  */
612 static void
613 intr_isrc_post_ithread(void *arg)
614 {
615 	struct intr_irqsrc *isrc = arg;
616 
617 	PIC_POST_ITHREAD(isrc->isrc_dev, isrc);
618 }
619 
620 /*
621  *  Interrupt source post_filter method for MI interrupt framework.
622  */
623 static void
624 intr_isrc_post_filter(void *arg)
625 {
626 	struct intr_irqsrc *isrc = arg;
627 
628 	PIC_POST_FILTER(isrc->isrc_dev, isrc);
629 }
630 
631 /*
632  *  Interrupt source assign_cpu method for MI interrupt framework.
633  */
634 static int
635 intr_isrc_assign_cpu(void *arg, int cpu)
636 {
637 #ifdef SMP
638 	struct intr_irqsrc *isrc = arg;
639 	int error;
640 
641 	mtx_lock(&isrc_table_lock);
642 	if (cpu == NOCPU) {
643 		CPU_ZERO(&isrc->isrc_cpu);
644 		isrc->isrc_flags &= ~INTR_ISRCF_BOUND;
645 	} else {
646 		CPU_SETOF(cpu, &isrc->isrc_cpu);
647 		isrc->isrc_flags |= INTR_ISRCF_BOUND;
648 	}
649 
650 	/*
651 	 * In NOCPU case, it's up to PIC to either leave ISRC on same CPU or
652 	 * re-balance it to another CPU or enable it on more CPUs. However,
653 	 * PIC is expected to change isrc_cpu appropriately to keep us well
654 	 * informed if the call is successful.
655 	 */
656 	if (irq_assign_cpu) {
657 		error = PIC_BIND_INTR(isrc->isrc_dev, isrc);
658 		if (error) {
659 			CPU_ZERO(&isrc->isrc_cpu);
660 			mtx_unlock(&isrc_table_lock);
661 			return (error);
662 		}
663 	}
664 	mtx_unlock(&isrc_table_lock);
665 	return (0);
666 #else
667 	return (EOPNOTSUPP);
668 #endif
669 }
670 
671 /*
672  *  Create interrupt event for interrupt source.
673  */
674 static int
675 isrc_event_create(struct intr_irqsrc *isrc)
676 {
677 	struct intr_event *ie;
678 	int error;
679 
680 	error = intr_event_create(&ie, isrc, 0, isrc->isrc_irq,
681 	    intr_isrc_pre_ithread, intr_isrc_post_ithread, intr_isrc_post_filter,
682 	    intr_isrc_assign_cpu, "%s:", isrc->isrc_name);
683 	if (error)
684 		return (error);
685 
686 	mtx_lock(&isrc_table_lock);
687 	/*
688 	 * Make sure that we do not mix the two ways
689 	 * how we handle interrupt sources. Let contested event wins.
690 	 */
691 #ifdef INTR_SOLO
692 	if (isrc->isrc_filter != NULL || isrc->isrc_event != NULL) {
693 #else
694 	if (isrc->isrc_event != NULL) {
695 #endif
696 		mtx_unlock(&isrc_table_lock);
697 		intr_event_destroy(ie);
698 		return (isrc->isrc_event != NULL ? EBUSY : 0);
699 	}
700 	isrc->isrc_event = ie;
701 	mtx_unlock(&isrc_table_lock);
702 
703 	return (0);
704 }
705 #ifdef notyet
706 /*
707  *  Destroy interrupt event for interrupt source.
708  */
709 static void
710 isrc_event_destroy(struct intr_irqsrc *isrc)
711 {
712 	struct intr_event *ie;
713 
714 	mtx_lock(&isrc_table_lock);
715 	ie = isrc->isrc_event;
716 	isrc->isrc_event = NULL;
717 	mtx_unlock(&isrc_table_lock);
718 
719 	if (ie != NULL)
720 		intr_event_destroy(ie);
721 }
722 #endif
723 /*
724  *  Add handler to interrupt source.
725  */
726 static int
727 isrc_add_handler(struct intr_irqsrc *isrc, const char *name,
728     driver_filter_t filter, driver_intr_t handler, void *arg,
729     enum intr_type flags, void **cookiep)
730 {
731 	int error;
732 
733 	if (isrc->isrc_event == NULL) {
734 		error = isrc_event_create(isrc);
735 		if (error)
736 			return (error);
737 	}
738 
739 	error = intr_event_add_handler(isrc->isrc_event, name, filter, handler,
740 	    arg, intr_priority(flags), flags, cookiep);
741 	if (error == 0) {
742 		mtx_lock(&isrc_table_lock);
743 		intrcnt_updatename(isrc);
744 		mtx_unlock(&isrc_table_lock);
745 	}
746 
747 	return (error);
748 }
749 
750 /*
751  *  Lookup interrupt controller locked.
752  */
753 static inline struct intr_pic *
754 pic_lookup_locked(device_t dev, intptr_t xref, int flags)
755 {
756 	struct intr_pic *pic;
757 
758 	mtx_assert(&pic_list_lock, MA_OWNED);
759 
760 	if (dev == NULL && xref == 0)
761 		return (NULL);
762 
763 	/* Note that pic->pic_dev is never NULL on registered PIC. */
764 	SLIST_FOREACH(pic, &pic_list, pic_next) {
765 		if ((pic->pic_flags & FLAG_TYPE_MASK) !=
766 		    (flags & FLAG_TYPE_MASK))
767 			continue;
768 
769 		if (dev == NULL) {
770 			if (xref == pic->pic_xref)
771 				return (pic);
772 		} else if (xref == 0 || pic->pic_xref == 0) {
773 			if (dev == pic->pic_dev)
774 				return (pic);
775 		} else if (xref == pic->pic_xref && dev == pic->pic_dev)
776 				return (pic);
777 	}
778 	return (NULL);
779 }
780 
781 /*
782  *  Lookup interrupt controller.
783  */
784 static struct intr_pic *
785 pic_lookup(device_t dev, intptr_t xref, int flags)
786 {
787 	struct intr_pic *pic;
788 
789 	mtx_lock(&pic_list_lock);
790 	pic = pic_lookup_locked(dev, xref, flags);
791 	mtx_unlock(&pic_list_lock);
792 	return (pic);
793 }
794 
795 /*
796  *  Create interrupt controller.
797  */
798 static struct intr_pic *
799 pic_create(device_t dev, intptr_t xref, int flags)
800 {
801 	struct intr_pic *pic;
802 
803 	mtx_lock(&pic_list_lock);
804 	pic = pic_lookup_locked(dev, xref, flags);
805 	if (pic != NULL) {
806 		mtx_unlock(&pic_list_lock);
807 		return (pic);
808 	}
809 	pic = malloc(sizeof(*pic), M_INTRNG, M_NOWAIT | M_ZERO);
810 	if (pic == NULL) {
811 		mtx_unlock(&pic_list_lock);
812 		return (NULL);
813 	}
814 	pic->pic_xref = xref;
815 	pic->pic_dev = dev;
816 	pic->pic_flags = flags;
817 	mtx_init(&pic->pic_child_lock, "pic child lock", NULL, MTX_SPIN);
818 	SLIST_INSERT_HEAD(&pic_list, pic, pic_next);
819 	mtx_unlock(&pic_list_lock);
820 
821 	return (pic);
822 }
823 #ifdef notyet
824 /*
825  *  Destroy interrupt controller.
826  */
827 static void
828 pic_destroy(device_t dev, intptr_t xref, int flags)
829 {
830 	struct intr_pic *pic;
831 
832 	mtx_lock(&pic_list_lock);
833 	pic = pic_lookup_locked(dev, xref, flags);
834 	if (pic == NULL) {
835 		mtx_unlock(&pic_list_lock);
836 		return;
837 	}
838 	SLIST_REMOVE(&pic_list, pic, intr_pic, pic_next);
839 	mtx_unlock(&pic_list_lock);
840 
841 	free(pic, M_INTRNG);
842 }
843 #endif
844 /*
845  *  Register interrupt controller.
846  */
847 struct intr_pic *
848 intr_pic_register(device_t dev, intptr_t xref)
849 {
850 	struct intr_pic *pic;
851 
852 	if (dev == NULL)
853 		return (NULL);
854 	pic = pic_create(dev, xref, FLAG_PIC);
855 	if (pic == NULL)
856 		return (NULL);
857 
858 	debugf("PIC %p registered for %s <dev %p, xref %jx>\n", pic,
859 	    device_get_nameunit(dev), dev, (uintmax_t)xref);
860 	return (pic);
861 }
862 
863 /*
864  *  Unregister interrupt controller.
865  */
866 int
867 intr_pic_deregister(device_t dev, intptr_t xref)
868 {
869 
870 	panic("%s: not implemented", __func__);
871 }
872 
873 /*
874  *  Mark interrupt controller (itself) as a root one.
875  *
876  *  Note that only an interrupt controller can really know its position
877  *  in interrupt controller's tree. So root PIC must claim itself as a root.
878  *
879  *  In FDT case, according to ePAPR approved version 1.1 from 08 April 2011,
880  *  page 30:
881  *    "The root of the interrupt tree is determined when traversal
882  *     of the interrupt tree reaches an interrupt controller node without
883  *     an interrupts property and thus no explicit interrupt parent."
884  */
885 int
886 intr_pic_claim_root(device_t dev, intptr_t xref, intr_irq_filter_t *filter,
887     void *arg, u_int ipicount)
888 {
889 	struct intr_pic *pic;
890 
891 	pic = pic_lookup(dev, xref, FLAG_PIC);
892 	if (pic == NULL) {
893 		device_printf(dev, "not registered\n");
894 		return (EINVAL);
895 	}
896 
897 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_PIC,
898 	    ("%s: Found a non-PIC controller: %s", __func__,
899 	     device_get_name(pic->pic_dev)));
900 
901 	if (filter == NULL) {
902 		device_printf(dev, "filter missing\n");
903 		return (EINVAL);
904 	}
905 
906 	/*
907 	 * Only one interrupt controllers could be on the root for now.
908 	 * Note that we further suppose that there is not threaded interrupt
909 	 * routine (handler) on the root. See intr_irq_handler().
910 	 */
911 	if (intr_irq_root_dev != NULL) {
912 		device_printf(dev, "another root already set\n");
913 		return (EBUSY);
914 	}
915 
916 	intr_irq_root_dev = dev;
917 	irq_root_filter = filter;
918 	irq_root_arg = arg;
919 	irq_root_ipicount = ipicount;
920 
921 	debugf("irq root set to %s\n", device_get_nameunit(dev));
922 	return (0);
923 }
924 
925 /*
926  * Add a handler to manage a sub range of a parents interrupts.
927  */
928 int
929 intr_pic_add_handler(device_t parent, struct intr_pic *pic,
930     intr_child_irq_filter_t *filter, void *arg, uintptr_t start,
931     uintptr_t length)
932 {
933 	struct intr_pic *parent_pic;
934 	struct intr_pic_child *newchild;
935 #ifdef INVARIANTS
936 	struct intr_pic_child *child;
937 #endif
938 
939 	/* Find the parent PIC */
940 	parent_pic = pic_lookup(parent, 0, FLAG_PIC);
941 	if (parent_pic == NULL)
942 		return (ENXIO);
943 
944 	newchild = malloc(sizeof(*newchild), M_INTRNG, M_WAITOK | M_ZERO);
945 	newchild->pc_pic = pic;
946 	newchild->pc_filter = filter;
947 	newchild->pc_filter_arg = arg;
948 	newchild->pc_start = start;
949 	newchild->pc_length = length;
950 
951 	mtx_lock_spin(&parent_pic->pic_child_lock);
952 #ifdef INVARIANTS
953 	SLIST_FOREACH(child, &parent_pic->pic_children, pc_next) {
954 		KASSERT(child->pc_pic != pic, ("%s: Adding a child PIC twice",
955 		    __func__));
956 	}
957 #endif
958 	SLIST_INSERT_HEAD(&parent_pic->pic_children, newchild, pc_next);
959 	mtx_unlock_spin(&parent_pic->pic_child_lock);
960 
961 	return (0);
962 }
963 
964 static int
965 intr_resolve_irq(device_t dev, intptr_t xref, struct intr_map_data *data,
966     struct intr_irqsrc **isrc)
967 {
968 	struct intr_pic *pic;
969 	struct intr_map_data_msi *msi;
970 
971 	if (data == NULL)
972 		return (EINVAL);
973 
974 	pic = pic_lookup(dev, xref,
975 	    (data->type == INTR_MAP_DATA_MSI) ? FLAG_MSI : FLAG_PIC);
976 	if (pic == NULL)
977 		return (ESRCH);
978 
979 	switch (data->type) {
980 	case INTR_MAP_DATA_MSI:
981 		KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
982 		    ("%s: Found a non-MSI controller: %s", __func__,
983 		     device_get_name(pic->pic_dev)));
984 		msi = (struct intr_map_data_msi *)data;
985 		*isrc = msi->isrc;
986 		return (0);
987 
988 	default:
989 		KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_PIC,
990 		    ("%s: Found a non-PIC controller: %s", __func__,
991 		     device_get_name(pic->pic_dev)));
992 		return (PIC_MAP_INTR(pic->pic_dev, data, isrc));
993 	}
994 }
995 
996 bool
997 intr_is_per_cpu(struct resource *res)
998 {
999 	u_int res_id;
1000 	struct intr_irqsrc *isrc;
1001 
1002 	res_id = (u_int)rman_get_start(res);
1003 	isrc = intr_map_get_isrc(res_id);
1004 
1005 	if (isrc == NULL)
1006 		panic("Attempt to get isrc for non-active resource id: %u\n",
1007 		    res_id);
1008 	return ((isrc->isrc_flags & INTR_ISRCF_PPI) != 0);
1009 }
1010 
1011 int
1012 intr_activate_irq(device_t dev, struct resource *res)
1013 {
1014 	device_t map_dev;
1015 	intptr_t map_xref;
1016 	struct intr_map_data *data;
1017 	struct intr_irqsrc *isrc;
1018 	u_int res_id;
1019 	int error;
1020 
1021 	KASSERT(rman_get_start(res) == rman_get_end(res),
1022 	    ("%s: more interrupts in resource", __func__));
1023 
1024 	res_id = (u_int)rman_get_start(res);
1025 	if (intr_map_get_isrc(res_id) != NULL)
1026 		panic("Attempt to double activation of resource id: %u\n",
1027 		    res_id);
1028 	intr_map_copy_map_data(res_id, &map_dev, &map_xref, &data);
1029 	error = intr_resolve_irq(map_dev, map_xref, data, &isrc);
1030 	if (error != 0) {
1031 		free(data, M_INTRNG);
1032 		/* XXX TODO DISCONECTED PICs */
1033 		/* if (error == EINVAL) return(0); */
1034 		return (error);
1035 	}
1036 	intr_map_set_isrc(res_id, isrc);
1037 	rman_set_virtual(res, data);
1038 	return (PIC_ACTIVATE_INTR(isrc->isrc_dev, isrc, res, data));
1039 }
1040 
1041 int
1042 intr_deactivate_irq(device_t dev, struct resource *res)
1043 {
1044 	struct intr_map_data *data;
1045 	struct intr_irqsrc *isrc;
1046 	u_int res_id;
1047 	int error;
1048 
1049 	KASSERT(rman_get_start(res) == rman_get_end(res),
1050 	    ("%s: more interrupts in resource", __func__));
1051 
1052 	res_id = (u_int)rman_get_start(res);
1053 	isrc = intr_map_get_isrc(res_id);
1054 	if (isrc == NULL)
1055 		panic("Attempt to deactivate non-active resource id: %u\n",
1056 		    res_id);
1057 
1058 	data = rman_get_virtual(res);
1059 	error = PIC_DEACTIVATE_INTR(isrc->isrc_dev, isrc, res, data);
1060 	intr_map_set_isrc(res_id, NULL);
1061 	rman_set_virtual(res, NULL);
1062 	free(data, M_INTRNG);
1063 	return (error);
1064 }
1065 
1066 int
1067 intr_setup_irq(device_t dev, struct resource *res, driver_filter_t filt,
1068     driver_intr_t hand, void *arg, int flags, void **cookiep)
1069 {
1070 	int error;
1071 	struct intr_map_data *data;
1072 	struct intr_irqsrc *isrc;
1073 	const char *name;
1074 	u_int res_id;
1075 
1076 	KASSERT(rman_get_start(res) == rman_get_end(res),
1077 	    ("%s: more interrupts in resource", __func__));
1078 
1079 	res_id = (u_int)rman_get_start(res);
1080 	isrc = intr_map_get_isrc(res_id);
1081 	if (isrc == NULL) {
1082 		/* XXX TODO DISCONECTED PICs */
1083 		return (EINVAL);
1084 	}
1085 
1086 	data = rman_get_virtual(res);
1087 	name = device_get_nameunit(dev);
1088 
1089 #ifdef INTR_SOLO
1090 	/*
1091 	 * Standard handling is done through MI interrupt framework. However,
1092 	 * some interrupts could request solely own special handling. This
1093 	 * non standard handling can be used for interrupt controllers without
1094 	 * handler (filter only), so in case that interrupt controllers are
1095 	 * chained, MI interrupt framework is called only in leaf controller.
1096 	 *
1097 	 * Note that root interrupt controller routine is served as well,
1098 	 * however in intr_irq_handler(), i.e. main system dispatch routine.
1099 	 */
1100 	if (flags & INTR_SOLO && hand != NULL) {
1101 		debugf("irq %u cannot solo on %s\n", irq, name);
1102 		return (EINVAL);
1103 	}
1104 
1105 	if (flags & INTR_SOLO) {
1106 		error = iscr_setup_filter(isrc, name, (intr_irq_filter_t *)filt,
1107 		    arg, cookiep);
1108 		debugf("irq %u setup filter error %d on %s\n", isrc->isrc_irq, error,
1109 		    name);
1110 	} else
1111 #endif
1112 		{
1113 		error = isrc_add_handler(isrc, name, filt, hand, arg, flags,
1114 		    cookiep);
1115 		debugf("irq %u add handler error %d on %s\n", isrc->isrc_irq, error, name);
1116 	}
1117 	if (error != 0)
1118 		return (error);
1119 
1120 	mtx_lock(&isrc_table_lock);
1121 	error = PIC_SETUP_INTR(isrc->isrc_dev, isrc, res, data);
1122 	if (error == 0) {
1123 		isrc->isrc_handlers++;
1124 		if (isrc->isrc_handlers == 1)
1125 			PIC_ENABLE_INTR(isrc->isrc_dev, isrc);
1126 	}
1127 	mtx_unlock(&isrc_table_lock);
1128 	if (error != 0)
1129 		intr_event_remove_handler(*cookiep);
1130 	return (error);
1131 }
1132 
1133 int
1134 intr_teardown_irq(device_t dev, struct resource *res, void *cookie)
1135 {
1136 	int error;
1137 	struct intr_map_data *data;
1138 	struct intr_irqsrc *isrc;
1139 	u_int res_id;
1140 
1141 	KASSERT(rman_get_start(res) == rman_get_end(res),
1142 	    ("%s: more interrupts in resource", __func__));
1143 
1144 	res_id = (u_int)rman_get_start(res);
1145 	isrc = intr_map_get_isrc(res_id);
1146 	if (isrc == NULL || isrc->isrc_handlers == 0)
1147 		return (EINVAL);
1148 
1149 	data = rman_get_virtual(res);
1150 
1151 #ifdef INTR_SOLO
1152 	if (isrc->isrc_filter != NULL) {
1153 		if (isrc != cookie)
1154 			return (EINVAL);
1155 
1156 		mtx_lock(&isrc_table_lock);
1157 		isrc->isrc_filter = NULL;
1158 		isrc->isrc_arg = NULL;
1159 		isrc->isrc_handlers = 0;
1160 		PIC_DISABLE_INTR(isrc->isrc_dev, isrc);
1161 		PIC_TEARDOWN_INTR(isrc->isrc_dev, isrc, res, data);
1162 		isrc_update_name(isrc, NULL);
1163 		mtx_unlock(&isrc_table_lock);
1164 		return (0);
1165 	}
1166 #endif
1167 	if (isrc != intr_handler_source(cookie))
1168 		return (EINVAL);
1169 
1170 	error = intr_event_remove_handler(cookie);
1171 	if (error == 0) {
1172 		mtx_lock(&isrc_table_lock);
1173 		isrc->isrc_handlers--;
1174 		if (isrc->isrc_handlers == 0)
1175 			PIC_DISABLE_INTR(isrc->isrc_dev, isrc);
1176 		PIC_TEARDOWN_INTR(isrc->isrc_dev, isrc, res, data);
1177 		intrcnt_updatename(isrc);
1178 		mtx_unlock(&isrc_table_lock);
1179 	}
1180 	return (error);
1181 }
1182 
1183 int
1184 intr_describe_irq(device_t dev, struct resource *res, void *cookie,
1185     const char *descr)
1186 {
1187 	int error;
1188 	struct intr_irqsrc *isrc;
1189 	u_int res_id;
1190 
1191 	KASSERT(rman_get_start(res) == rman_get_end(res),
1192 	    ("%s: more interrupts in resource", __func__));
1193 
1194 	res_id = (u_int)rman_get_start(res);
1195 	isrc = intr_map_get_isrc(res_id);
1196 	if (isrc == NULL || isrc->isrc_handlers == 0)
1197 		return (EINVAL);
1198 #ifdef INTR_SOLO
1199 	if (isrc->isrc_filter != NULL) {
1200 		if (isrc != cookie)
1201 			return (EINVAL);
1202 
1203 		mtx_lock(&isrc_table_lock);
1204 		isrc_update_name(isrc, descr);
1205 		mtx_unlock(&isrc_table_lock);
1206 		return (0);
1207 	}
1208 #endif
1209 	error = intr_event_describe_handler(isrc->isrc_event, cookie, descr);
1210 	if (error == 0) {
1211 		mtx_lock(&isrc_table_lock);
1212 		intrcnt_updatename(isrc);
1213 		mtx_unlock(&isrc_table_lock);
1214 	}
1215 	return (error);
1216 }
1217 
1218 #ifdef SMP
1219 int
1220 intr_bind_irq(device_t dev, struct resource *res, int cpu)
1221 {
1222 	struct intr_irqsrc *isrc;
1223 	u_int res_id;
1224 
1225 	KASSERT(rman_get_start(res) == rman_get_end(res),
1226 	    ("%s: more interrupts in resource", __func__));
1227 
1228 	res_id = (u_int)rman_get_start(res);
1229 	isrc = intr_map_get_isrc(res_id);
1230 	if (isrc == NULL || isrc->isrc_handlers == 0)
1231 		return (EINVAL);
1232 #ifdef INTR_SOLO
1233 	if (isrc->isrc_filter != NULL)
1234 		return (intr_isrc_assign_cpu(isrc, cpu));
1235 #endif
1236 	return (intr_event_bind(isrc->isrc_event, cpu));
1237 }
1238 
1239 /*
1240  * Return the CPU that the next interrupt source should use.
1241  * For now just returns the next CPU according to round-robin.
1242  */
1243 u_int
1244 intr_irq_next_cpu(u_int last_cpu, cpuset_t *cpumask)
1245 {
1246 	u_int cpu;
1247 
1248 	KASSERT(!CPU_EMPTY(cpumask), ("%s: Empty CPU mask", __func__));
1249 	if (!irq_assign_cpu || mp_ncpus == 1) {
1250 		cpu = PCPU_GET(cpuid);
1251 
1252 		if (CPU_ISSET(cpu, cpumask))
1253 			return (curcpu);
1254 
1255 		return (CPU_FFS(cpumask) - 1);
1256 	}
1257 
1258 	do {
1259 		last_cpu++;
1260 		if (last_cpu > mp_maxid)
1261 			last_cpu = 0;
1262 	} while (!CPU_ISSET(last_cpu, cpumask));
1263 	return (last_cpu);
1264 }
1265 
1266 #ifndef EARLY_AP_STARTUP
1267 /*
1268  *  Distribute all the interrupt sources among the available
1269  *  CPUs once the AP's have been launched.
1270  */
1271 static void
1272 intr_irq_shuffle(void *arg __unused)
1273 {
1274 	struct intr_irqsrc *isrc;
1275 	u_int i;
1276 
1277 	if (mp_ncpus == 1)
1278 		return;
1279 
1280 	mtx_lock(&isrc_table_lock);
1281 	irq_assign_cpu = true;
1282 	for (i = 0; i < intr_nirq; i++) {
1283 		isrc = irq_sources[i];
1284 		if (isrc == NULL || isrc->isrc_handlers == 0 ||
1285 		    isrc->isrc_flags & (INTR_ISRCF_PPI | INTR_ISRCF_IPI))
1286 			continue;
1287 
1288 		if (isrc->isrc_event != NULL &&
1289 		    isrc->isrc_flags & INTR_ISRCF_BOUND &&
1290 		    isrc->isrc_event->ie_cpu != CPU_FFS(&isrc->isrc_cpu) - 1)
1291 			panic("%s: CPU inconsistency", __func__);
1292 
1293 		if ((isrc->isrc_flags & INTR_ISRCF_BOUND) == 0)
1294 			CPU_ZERO(&isrc->isrc_cpu); /* start again */
1295 
1296 		/*
1297 		 * We are in wicked position here if the following call fails
1298 		 * for bound ISRC. The best thing we can do is to clear
1299 		 * isrc_cpu so inconsistency with ie_cpu will be detectable.
1300 		 */
1301 		if (PIC_BIND_INTR(isrc->isrc_dev, isrc) != 0)
1302 			CPU_ZERO(&isrc->isrc_cpu);
1303 	}
1304 	mtx_unlock(&isrc_table_lock);
1305 }
1306 SYSINIT(intr_irq_shuffle, SI_SUB_SMP, SI_ORDER_SECOND, intr_irq_shuffle, NULL);
1307 #endif /* !EARLY_AP_STARTUP */
1308 
1309 #else
1310 u_int
1311 intr_irq_next_cpu(u_int current_cpu, cpuset_t *cpumask)
1312 {
1313 
1314 	return (PCPU_GET(cpuid));
1315 }
1316 #endif /* SMP */
1317 
1318 /*
1319  * Allocate memory for new intr_map_data structure.
1320  * Initialize common fields.
1321  */
1322 struct intr_map_data *
1323 intr_alloc_map_data(enum intr_map_data_type type, size_t len, int flags)
1324 {
1325 	struct intr_map_data *data;
1326 
1327 	data = malloc(len, M_INTRNG, flags);
1328 	data->type = type;
1329 	data->len = len;
1330 	return (data);
1331 }
1332 
1333 void intr_free_intr_map_data(struct intr_map_data *data)
1334 {
1335 
1336 	free(data, M_INTRNG);
1337 }
1338 
1339 /*
1340  *  Register a MSI/MSI-X interrupt controller
1341  */
1342 int
1343 intr_msi_register(device_t dev, intptr_t xref)
1344 {
1345 	struct intr_pic *pic;
1346 
1347 	if (dev == NULL)
1348 		return (EINVAL);
1349 	pic = pic_create(dev, xref, FLAG_MSI);
1350 	if (pic == NULL)
1351 		return (ENOMEM);
1352 
1353 	debugf("PIC %p registered for %s <dev %p, xref %jx>\n", pic,
1354 	    device_get_nameunit(dev), dev, (uintmax_t)xref);
1355 	return (0);
1356 }
1357 
1358 int
1359 intr_alloc_msi(device_t pci, device_t child, intptr_t xref, int count,
1360     int maxcount, int *irqs)
1361 {
1362 	struct iommu_domain *domain;
1363 	struct intr_irqsrc **isrc;
1364 	struct intr_pic *pic;
1365 	device_t pdev;
1366 	struct intr_map_data_msi *msi;
1367 	int err, i;
1368 
1369 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1370 	if (pic == NULL)
1371 		return (ESRCH);
1372 
1373 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1374 	    ("%s: Found a non-MSI controller: %s", __func__,
1375 	     device_get_name(pic->pic_dev)));
1376 
1377 	/*
1378 	 * If this is the first time we have used this context ask the
1379 	 * interrupt controller to map memory the msi source will need.
1380 	 */
1381 	err = MSI_IOMMU_INIT(pic->pic_dev, child, &domain);
1382 	if (err != 0)
1383 		return (err);
1384 
1385 	isrc = malloc(sizeof(*isrc) * count, M_INTRNG, M_WAITOK);
1386 	err = MSI_ALLOC_MSI(pic->pic_dev, child, count, maxcount, &pdev, isrc);
1387 	if (err != 0) {
1388 		free(isrc, M_INTRNG);
1389 		return (err);
1390 	}
1391 
1392 	for (i = 0; i < count; i++) {
1393 		isrc[i]->isrc_iommu = domain;
1394 		msi = (struct intr_map_data_msi *)intr_alloc_map_data(
1395 		    INTR_MAP_DATA_MSI, sizeof(*msi), M_WAITOK | M_ZERO);
1396 		msi-> isrc = isrc[i];
1397 
1398 		irqs[i] = intr_map_irq(pic->pic_dev, xref,
1399 		    (struct intr_map_data *)msi);
1400 	}
1401 	free(isrc, M_INTRNG);
1402 
1403 	return (err);
1404 }
1405 
1406 int
1407 intr_release_msi(device_t pci, device_t child, intptr_t xref, int count,
1408     int *irqs)
1409 {
1410 	struct intr_irqsrc **isrc;
1411 	struct intr_pic *pic;
1412 	struct intr_map_data_msi *msi;
1413 	int i, err;
1414 
1415 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1416 	if (pic == NULL)
1417 		return (ESRCH);
1418 
1419 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1420 	    ("%s: Found a non-MSI controller: %s", __func__,
1421 	     device_get_name(pic->pic_dev)));
1422 
1423 	isrc = malloc(sizeof(*isrc) * count, M_INTRNG, M_WAITOK);
1424 
1425 	for (i = 0; i < count; i++) {
1426 		msi = (struct intr_map_data_msi *)
1427 		    intr_map_get_map_data(irqs[i]);
1428 		KASSERT(msi->hdr.type == INTR_MAP_DATA_MSI,
1429 		    ("%s: irq %d map data is not MSI", __func__,
1430 		    irqs[i]));
1431 		isrc[i] = msi->isrc;
1432 	}
1433 
1434 	MSI_IOMMU_DEINIT(pic->pic_dev, child);
1435 
1436 	err = MSI_RELEASE_MSI(pic->pic_dev, child, count, isrc);
1437 
1438 	for (i = 0; i < count; i++) {
1439 		if (isrc[i] != NULL)
1440 			intr_unmap_irq(irqs[i]);
1441 	}
1442 
1443 	free(isrc, M_INTRNG);
1444 	return (err);
1445 }
1446 
1447 int
1448 intr_alloc_msix(device_t pci, device_t child, intptr_t xref, int *irq)
1449 {
1450 	struct iommu_domain *domain;
1451 	struct intr_irqsrc *isrc;
1452 	struct intr_pic *pic;
1453 	device_t pdev;
1454 	struct intr_map_data_msi *msi;
1455 	int err;
1456 
1457 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1458 	if (pic == NULL)
1459 		return (ESRCH);
1460 
1461 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1462 	    ("%s: Found a non-MSI controller: %s", __func__,
1463 	     device_get_name(pic->pic_dev)));
1464 
1465 	/*
1466 	 * If this is the first time we have used this context ask the
1467 	 * interrupt controller to map memory the msi source will need.
1468 	 */
1469 	err = MSI_IOMMU_INIT(pic->pic_dev, child, &domain);
1470 	if (err != 0)
1471 		return (err);
1472 
1473 	err = MSI_ALLOC_MSIX(pic->pic_dev, child, &pdev, &isrc);
1474 	if (err != 0)
1475 		return (err);
1476 
1477 	isrc->isrc_iommu = domain;
1478 	msi = (struct intr_map_data_msi *)intr_alloc_map_data(
1479 		    INTR_MAP_DATA_MSI, sizeof(*msi), M_WAITOK | M_ZERO);
1480 	msi->isrc = isrc;
1481 	*irq = intr_map_irq(pic->pic_dev, xref, (struct intr_map_data *)msi);
1482 	return (0);
1483 }
1484 
1485 int
1486 intr_release_msix(device_t pci, device_t child, intptr_t xref, int irq)
1487 {
1488 	struct intr_irqsrc *isrc;
1489 	struct intr_pic *pic;
1490 	struct intr_map_data_msi *msi;
1491 	int err;
1492 
1493 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1494 	if (pic == NULL)
1495 		return (ESRCH);
1496 
1497 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1498 	    ("%s: Found a non-MSI controller: %s", __func__,
1499 	     device_get_name(pic->pic_dev)));
1500 
1501 	msi = (struct intr_map_data_msi *)
1502 	    intr_map_get_map_data(irq);
1503 	KASSERT(msi->hdr.type == INTR_MAP_DATA_MSI,
1504 	    ("%s: irq %d map data is not MSI", __func__,
1505 	    irq));
1506 	isrc = msi->isrc;
1507 	if (isrc == NULL) {
1508 		intr_unmap_irq(irq);
1509 		return (EINVAL);
1510 	}
1511 
1512 	MSI_IOMMU_DEINIT(pic->pic_dev, child);
1513 
1514 	err = MSI_RELEASE_MSIX(pic->pic_dev, child, isrc);
1515 	intr_unmap_irq(irq);
1516 
1517 	return (err);
1518 }
1519 
1520 int
1521 intr_map_msi(device_t pci, device_t child, intptr_t xref, int irq,
1522     uint64_t *addr, uint32_t *data)
1523 {
1524 	struct intr_irqsrc *isrc;
1525 	struct intr_pic *pic;
1526 	int err;
1527 
1528 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1529 	if (pic == NULL)
1530 		return (ESRCH);
1531 
1532 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1533 	    ("%s: Found a non-MSI controller: %s", __func__,
1534 	     device_get_name(pic->pic_dev)));
1535 
1536 	isrc = intr_map_get_isrc(irq);
1537 	if (isrc == NULL)
1538 		return (EINVAL);
1539 
1540 	err = MSI_MAP_MSI(pic->pic_dev, child, isrc, addr, data);
1541 
1542 #ifdef IOMMU
1543 	if (isrc->isrc_iommu != NULL)
1544 		iommu_translate_msi(isrc->isrc_iommu, addr);
1545 #endif
1546 
1547 	return (err);
1548 }
1549 
1550 void dosoftints(void);
1551 void
1552 dosoftints(void)
1553 {
1554 }
1555 
1556 #ifdef SMP
1557 /*
1558  *  Init interrupt controller on another CPU.
1559  */
1560 void
1561 intr_pic_init_secondary(void)
1562 {
1563 
1564 	/*
1565 	 * QQQ: Only root PIC is aware of other CPUs ???
1566 	 */
1567 	KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__));
1568 
1569 	//mtx_lock(&isrc_table_lock);
1570 	PIC_INIT_SECONDARY(intr_irq_root_dev);
1571 	//mtx_unlock(&isrc_table_lock);
1572 }
1573 #endif
1574 
1575 #ifdef DDB
1576 DB_SHOW_COMMAND_FLAGS(irqs, db_show_irqs, DB_CMD_MEMSAFE)
1577 {
1578 	u_int i, irqsum;
1579 	u_long num;
1580 	struct intr_irqsrc *isrc;
1581 
1582 	for (irqsum = 0, i = 0; i < intr_nirq; i++) {
1583 		isrc = irq_sources[i];
1584 		if (isrc == NULL)
1585 			continue;
1586 
1587 		num = isrc->isrc_count != NULL ? isrc->isrc_count[0] : 0;
1588 		db_printf("irq%-3u <%s>: cpu %02lx%s cnt %lu\n", i,
1589 		    isrc->isrc_name, isrc->isrc_cpu.__bits[0],
1590 		    isrc->isrc_flags & INTR_ISRCF_BOUND ? " (bound)" : "", num);
1591 		irqsum += num;
1592 	}
1593 	db_printf("irq total %u\n", irqsum);
1594 }
1595 #endif
1596 
1597 /*
1598  * Interrupt mapping table functions.
1599  *
1600  * Please, keep this part separately, it can be transformed to
1601  * extension of standard resources.
1602  */
1603 struct intr_map_entry
1604 {
1605 	device_t 		dev;
1606 	intptr_t 		xref;
1607 	struct intr_map_data 	*map_data;
1608 	struct intr_irqsrc 	*isrc;
1609 	/* XXX TODO DISCONECTED PICs */
1610 	/*int			flags */
1611 };
1612 
1613 /* XXX Convert irq_map[] to dynamicaly expandable one. */
1614 static struct intr_map_entry **irq_map;
1615 static u_int irq_map_count;
1616 static u_int irq_map_first_free_idx;
1617 static struct mtx irq_map_lock;
1618 
1619 static struct intr_irqsrc *
1620 intr_map_get_isrc(u_int res_id)
1621 {
1622 	struct intr_irqsrc *isrc;
1623 
1624 	isrc = NULL;
1625 	mtx_lock(&irq_map_lock);
1626 	if (res_id < irq_map_count && irq_map[res_id] != NULL)
1627 		isrc = irq_map[res_id]->isrc;
1628 	mtx_unlock(&irq_map_lock);
1629 
1630 	return (isrc);
1631 }
1632 
1633 static void
1634 intr_map_set_isrc(u_int res_id, struct intr_irqsrc *isrc)
1635 {
1636 
1637 	mtx_lock(&irq_map_lock);
1638 	if (res_id < irq_map_count && irq_map[res_id] != NULL)
1639 		irq_map[res_id]->isrc = isrc;
1640 	mtx_unlock(&irq_map_lock);
1641 }
1642 
1643 /*
1644  * Get a copy of intr_map_entry data
1645  */
1646 static struct intr_map_data *
1647 intr_map_get_map_data(u_int res_id)
1648 {
1649 	struct intr_map_data *data;
1650 
1651 	data = NULL;
1652 	mtx_lock(&irq_map_lock);
1653 	if (res_id >= irq_map_count || irq_map[res_id] == NULL)
1654 		panic("Attempt to copy invalid resource id: %u\n", res_id);
1655 	data = irq_map[res_id]->map_data;
1656 	mtx_unlock(&irq_map_lock);
1657 
1658 	return (data);
1659 }
1660 
1661 /*
1662  * Get a copy of intr_map_entry data
1663  */
1664 static void
1665 intr_map_copy_map_data(u_int res_id, device_t *map_dev, intptr_t *map_xref,
1666     struct intr_map_data **data)
1667 {
1668 	size_t len;
1669 
1670 	len = 0;
1671 	mtx_lock(&irq_map_lock);
1672 	if (res_id >= irq_map_count || irq_map[res_id] == NULL)
1673 		panic("Attempt to copy invalid resource id: %u\n", res_id);
1674 	if (irq_map[res_id]->map_data != NULL)
1675 		len = irq_map[res_id]->map_data->len;
1676 	mtx_unlock(&irq_map_lock);
1677 
1678 	if (len == 0)
1679 		*data = NULL;
1680 	else
1681 		*data = malloc(len, M_INTRNG, M_WAITOK | M_ZERO);
1682 	mtx_lock(&irq_map_lock);
1683 	if (irq_map[res_id] == NULL)
1684 		panic("Attempt to copy invalid resource id: %u\n", res_id);
1685 	if (len != 0) {
1686 		if (len != irq_map[res_id]->map_data->len)
1687 			panic("Resource id: %u has changed.\n", res_id);
1688 		memcpy(*data, irq_map[res_id]->map_data, len);
1689 	}
1690 	*map_dev = irq_map[res_id]->dev;
1691 	*map_xref = irq_map[res_id]->xref;
1692 	mtx_unlock(&irq_map_lock);
1693 }
1694 
1695 /*
1696  * Allocate and fill new entry in irq_map table.
1697  */
1698 u_int
1699 intr_map_irq(device_t dev, intptr_t xref, struct intr_map_data *data)
1700 {
1701 	u_int i;
1702 	struct intr_map_entry *entry;
1703 
1704 	/* Prepare new entry first. */
1705 	entry = malloc(sizeof(*entry), M_INTRNG, M_WAITOK | M_ZERO);
1706 
1707 	entry->dev = dev;
1708 	entry->xref = xref;
1709 	entry->map_data = data;
1710 	entry->isrc = NULL;
1711 
1712 	mtx_lock(&irq_map_lock);
1713 	for (i = irq_map_first_free_idx; i < irq_map_count; i++) {
1714 		if (irq_map[i] == NULL) {
1715 			irq_map[i] = entry;
1716 			irq_map_first_free_idx = i + 1;
1717 			mtx_unlock(&irq_map_lock);
1718 			return (i);
1719 		}
1720 	}
1721 	for (i = 0; i < irq_map_first_free_idx; i++) {
1722 		if (irq_map[i] == NULL) {
1723 			irq_map[i] = entry;
1724 			irq_map_first_free_idx = i + 1;
1725 			mtx_unlock(&irq_map_lock);
1726 			return (i);
1727 		}
1728 	}
1729 	mtx_unlock(&irq_map_lock);
1730 
1731 	/* XXX Expand irq_map table */
1732 	panic("IRQ mapping table is full.");
1733 }
1734 
1735 /*
1736  * Remove and free mapping entry.
1737  */
1738 void
1739 intr_unmap_irq(u_int res_id)
1740 {
1741 	struct intr_map_entry *entry;
1742 
1743 	mtx_lock(&irq_map_lock);
1744 	if ((res_id >= irq_map_count) || (irq_map[res_id] == NULL))
1745 		panic("Attempt to unmap invalid resource id: %u\n", res_id);
1746 	entry = irq_map[res_id];
1747 	irq_map[res_id] = NULL;
1748 	irq_map_first_free_idx = res_id;
1749 	mtx_unlock(&irq_map_lock);
1750 	intr_free_intr_map_data(entry->map_data);
1751 	free(entry, M_INTRNG);
1752 }
1753 
1754 /*
1755  * Clone mapping entry.
1756  */
1757 u_int
1758 intr_map_clone_irq(u_int old_res_id)
1759 {
1760 	device_t map_dev;
1761 	intptr_t map_xref;
1762 	struct intr_map_data *data;
1763 
1764 	intr_map_copy_map_data(old_res_id, &map_dev, &map_xref, &data);
1765 	return (intr_map_irq(map_dev, map_xref, data));
1766 }
1767 
1768 static void
1769 intr_map_init(void *dummy __unused)
1770 {
1771 
1772 	mtx_init(&irq_map_lock, "intr map table", NULL, MTX_DEF);
1773 
1774 	irq_map_count = 2 * intr_nirq;
1775 	irq_map = mallocarray(irq_map_count, sizeof(struct intr_map_entry*),
1776 	    M_INTRNG, M_WAITOK | M_ZERO);
1777 }
1778 SYSINIT(intr_map_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_map_init, NULL);
1779