xref: /freebsd/sys/kern/subr_intr.c (revision 4543ef516683042d46f3bd3bb8a4f3f746e00499)
1 /*-
2  * Copyright (c) 2015-2016 Svatopluk Kraus
3  * Copyright (c) 2015-2016 Michal Meloun
4  * All rights reserved.
5  * Copyright (c) 2015-2016 The FreeBSD Foundation
6  * Copyright (c) 2021 Jessica Clarke <jrtc27@FreeBSD.org>
7  *
8  * Portions of this software were developed by Andrew Turner under
9  * sponsorship from the FreeBSD Foundation.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #include <sys/cdefs.h>
34 /*
35  *	New-style Interrupt Framework
36  *
37  *  TODO: - add support for disconnected PICs.
38  *        - to support IPI (PPI) enabling on other CPUs if already started.
39  *        - to complete things for removable PICs.
40  */
41 
42 #include "opt_ddb.h"
43 #include "opt_hwpmc_hooks.h"
44 #include "opt_iommu.h"
45 
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/asan.h>
49 #include <sys/bitstring.h>
50 #include <sys/bus.h>
51 #include <sys/conf.h>
52 #include <sys/cpuset.h>
53 #include <sys/interrupt.h>
54 #include <sys/kernel.h>
55 #include <sys/lock.h>
56 #include <sys/malloc.h>
57 #include <sys/msan.h>
58 #include <sys/mutex.h>
59 #include <sys/proc.h>
60 #include <sys/queue.h>
61 #include <sys/rman.h>
62 #include <sys/sched.h>
63 #include <sys/smp.h>
64 #include <sys/sysctl.h>
65 #include <sys/syslog.h>
66 #include <sys/taskqueue.h>
67 #include <sys/tree.h>
68 #include <sys/vmmeter.h>
69 #ifdef HWPMC_HOOKS
70 #include <sys/pmckern.h>
71 #endif
72 
73 #include <machine/atomic.h>
74 #include <machine/cpu.h>
75 #include <machine/intr.h>
76 #include <machine/smp.h>
77 #include <machine/stdarg.h>
78 
79 #ifdef DDB
80 #include <ddb/ddb.h>
81 #endif
82 
83 #ifdef IOMMU
84 #include <dev/iommu/iommu_msi.h>
85 #endif
86 
87 #include "pic_if.h"
88 #include "msi_if.h"
89 
90 #define	INTRNAME_LEN	(2*MAXCOMLEN + 1)
91 
92 #ifdef DEBUG
93 #define debugf(fmt, args...) do { printf("%s(): ", __func__);	\
94     printf(fmt,##args); } while (0)
95 #else
96 #define debugf(fmt, args...)
97 #endif
98 
99 MALLOC_DECLARE(M_INTRNG);
100 MALLOC_DEFINE(M_INTRNG, "intr", "intr interrupt handling");
101 
102 /* Main interrupt handler called from assembler -> 'hidden' for C code. */
103 void intr_irq_handler(struct trapframe *tf);
104 
105 /* Root interrupt controller stuff. */
106 device_t intr_irq_root_dev;
107 static intr_irq_filter_t *irq_root_filter;
108 static void *irq_root_arg;
109 
110 struct intr_pic_child {
111 	SLIST_ENTRY(intr_pic_child)	 pc_next;
112 	struct intr_pic			*pc_pic;
113 	intr_child_irq_filter_t		*pc_filter;
114 	void				*pc_filter_arg;
115 	uintptr_t			 pc_start;
116 	uintptr_t			 pc_length;
117 };
118 
119 /* Interrupt controller definition. */
120 struct intr_pic {
121 	SLIST_ENTRY(intr_pic)	pic_next;
122 	intptr_t		pic_xref;	/* hardware identification */
123 	device_t		pic_dev;
124 /* Only one of FLAG_PIC or FLAG_MSI may be set */
125 #define	FLAG_PIC	(1 << 0)
126 #define	FLAG_MSI	(1 << 1)
127 #define	FLAG_TYPE_MASK	(FLAG_PIC | FLAG_MSI)
128 	u_int			pic_flags;
129 	struct mtx		pic_child_lock;
130 	SLIST_HEAD(, intr_pic_child) pic_children;
131 };
132 
133 #ifdef SMP
134 #define INTR_IPI_NAMELEN	(MAXCOMLEN + 1)
135 
136 struct intr_ipi {
137 	intr_ipi_handler_t	*ii_handler;
138 	void			*ii_handler_arg;
139 	struct intr_irqsrc	*ii_isrc;
140 	char			ii_name[INTR_IPI_NAMELEN];
141 	u_long			*ii_count;
142 };
143 
144 static device_t intr_ipi_dev;
145 static u_int intr_ipi_dev_priority;
146 static bool intr_ipi_dev_frozen;
147 #endif
148 
149 static struct mtx pic_list_lock;
150 static SLIST_HEAD(, intr_pic) pic_list;
151 
152 static struct intr_pic *pic_lookup(device_t dev, intptr_t xref, int flags);
153 
154 /* Interrupt source definition. */
155 static struct mtx isrc_table_lock;
156 static struct intr_irqsrc **irq_sources;
157 static u_int irq_next_free;
158 
159 #ifdef SMP
160 #ifdef EARLY_AP_STARTUP
161 static bool irq_assign_cpu = true;
162 #else
163 static bool irq_assign_cpu = false;
164 #endif
165 
166 static struct intr_ipi ipi_sources[INTR_IPI_COUNT];
167 #endif
168 
169 u_int intr_nirq = NIRQ;
170 SYSCTL_UINT(_machdep, OID_AUTO, nirq, CTLFLAG_RDTUN, &intr_nirq, 0,
171     "Number of IRQs");
172 
173 /* Data for MI statistics reporting. */
174 u_long *intrcnt;
175 char *intrnames;
176 size_t sintrcnt;
177 size_t sintrnames;
178 int nintrcnt;
179 static bitstr_t *intrcnt_bitmap;
180 
181 static struct intr_irqsrc *intr_map_get_isrc(u_int res_id);
182 static void intr_map_set_isrc(u_int res_id, struct intr_irqsrc *isrc);
183 static struct intr_map_data * intr_map_get_map_data(u_int res_id);
184 static void intr_map_copy_map_data(u_int res_id, device_t *dev, intptr_t *xref,
185     struct intr_map_data **data);
186 
187 /*
188  *  Interrupt framework initialization routine.
189  */
190 static void
191 intr_irq_init(void *dummy __unused)
192 {
193 
194 	SLIST_INIT(&pic_list);
195 	mtx_init(&pic_list_lock, "intr pic list", NULL, MTX_DEF);
196 
197 	mtx_init(&isrc_table_lock, "intr isrc table", NULL, MTX_DEF);
198 
199 	/*
200 	 * - 2 counters for each I/O interrupt.
201 	 * - mp_maxid + 1 counters for each IPI counters for SMP.
202 	 */
203 	nintrcnt = intr_nirq * 2;
204 #ifdef SMP
205 	nintrcnt += INTR_IPI_COUNT * (mp_maxid + 1);
206 #endif
207 
208 	intrcnt = mallocarray(nintrcnt, sizeof(u_long), M_INTRNG,
209 	    M_WAITOK | M_ZERO);
210 	intrnames = mallocarray(nintrcnt, INTRNAME_LEN, M_INTRNG,
211 	    M_WAITOK | M_ZERO);
212 	sintrcnt = nintrcnt * sizeof(u_long);
213 	sintrnames = nintrcnt * INTRNAME_LEN;
214 
215 	/* Allocate the bitmap tracking counter allocations. */
216 	intrcnt_bitmap = bit_alloc(nintrcnt, M_INTRNG, M_WAITOK | M_ZERO);
217 
218 	irq_sources = mallocarray(intr_nirq, sizeof(struct intr_irqsrc*),
219 	    M_INTRNG, M_WAITOK | M_ZERO);
220 }
221 SYSINIT(intr_irq_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_irq_init, NULL);
222 
223 static void
224 intrcnt_setname(const char *name, int index)
225 {
226 
227 	snprintf(intrnames + INTRNAME_LEN * index, INTRNAME_LEN, "%-*s",
228 	    INTRNAME_LEN - 1, name);
229 }
230 
231 /*
232  *  Update name for interrupt source with interrupt event.
233  */
234 static void
235 intrcnt_updatename(struct intr_irqsrc *isrc)
236 {
237 
238 	/* QQQ: What about stray counter name? */
239 	mtx_assert(&isrc_table_lock, MA_OWNED);
240 	intrcnt_setname(isrc->isrc_event->ie_fullname, isrc->isrc_index);
241 }
242 
243 /*
244  *  Virtualization for interrupt source interrupt counter increment.
245  */
246 static inline void
247 isrc_increment_count(struct intr_irqsrc *isrc)
248 {
249 
250 	if (isrc->isrc_flags & INTR_ISRCF_PPI)
251 		atomic_add_long(&isrc->isrc_count[0], 1);
252 	else
253 		isrc->isrc_count[0]++;
254 }
255 
256 /*
257  *  Virtualization for interrupt source interrupt stray counter increment.
258  */
259 static inline void
260 isrc_increment_straycount(struct intr_irqsrc *isrc)
261 {
262 
263 	isrc->isrc_count[1]++;
264 }
265 
266 /*
267  *  Virtualization for interrupt source interrupt name update.
268  */
269 static void
270 isrc_update_name(struct intr_irqsrc *isrc, const char *name)
271 {
272 	char str[INTRNAME_LEN];
273 
274 	mtx_assert(&isrc_table_lock, MA_OWNED);
275 
276 	if (name != NULL) {
277 		snprintf(str, INTRNAME_LEN, "%s: %s", isrc->isrc_name, name);
278 		intrcnt_setname(str, isrc->isrc_index);
279 		snprintf(str, INTRNAME_LEN, "stray %s: %s", isrc->isrc_name,
280 		    name);
281 		intrcnt_setname(str, isrc->isrc_index + 1);
282 	} else {
283 		snprintf(str, INTRNAME_LEN, "%s:", isrc->isrc_name);
284 		intrcnt_setname(str, isrc->isrc_index);
285 		snprintf(str, INTRNAME_LEN, "stray %s:", isrc->isrc_name);
286 		intrcnt_setname(str, isrc->isrc_index + 1);
287 	}
288 }
289 
290 /*
291  *  Virtualization for interrupt source interrupt counters setup.
292  */
293 static void
294 isrc_setup_counters(struct intr_irqsrc *isrc)
295 {
296 	int index;
297 
298 	mtx_assert(&isrc_table_lock, MA_OWNED);
299 
300 	/*
301 	 * Allocate two counter values, the second tracking "stray" interrupts.
302 	 */
303 	bit_ffc_area(intrcnt_bitmap, nintrcnt, 2, &index);
304 	if (index == -1)
305 		panic("Failed to allocate 2 counters. Array exhausted?");
306 	bit_nset(intrcnt_bitmap, index, index + 1);
307 	isrc->isrc_index = index;
308 	isrc->isrc_count = &intrcnt[index];
309 	isrc_update_name(isrc, NULL);
310 }
311 
312 /*
313  *  Virtualization for interrupt source interrupt counters release.
314  */
315 static void
316 isrc_release_counters(struct intr_irqsrc *isrc)
317 {
318 	int idx = isrc->isrc_index;
319 
320 	mtx_assert(&isrc_table_lock, MA_OWNED);
321 
322 	bit_nclear(intrcnt_bitmap, idx, idx + 1);
323 }
324 
325 /*
326  *  Main interrupt dispatch handler. It's called straight
327  *  from the assembler, where CPU interrupt is served.
328  */
329 void
330 intr_irq_handler(struct trapframe *tf)
331 {
332 	struct trapframe * oldframe;
333 	struct thread * td;
334 
335 	KASSERT(irq_root_filter != NULL, ("%s: no filter", __func__));
336 
337 	kasan_mark(tf, sizeof(*tf), sizeof(*tf), 0);
338 	kmsan_mark(tf, sizeof(*tf), KMSAN_STATE_INITED);
339 
340 	VM_CNT_INC(v_intr);
341 	critical_enter();
342 	td = curthread;
343 	oldframe = td->td_intr_frame;
344 	td->td_intr_frame = tf;
345 	irq_root_filter(irq_root_arg);
346 	td->td_intr_frame = oldframe;
347 	critical_exit();
348 #ifdef HWPMC_HOOKS
349 	if (pmc_hook && TRAPF_USERMODE(tf) &&
350 	    (PCPU_GET(curthread)->td_pflags & TDP_CALLCHAIN))
351 		pmc_hook(PCPU_GET(curthread), PMC_FN_USER_CALLCHAIN, tf);
352 #endif
353 }
354 
355 int
356 intr_child_irq_handler(struct intr_pic *parent, uintptr_t irq)
357 {
358 	struct intr_pic_child *child;
359 	bool found;
360 
361 	found = false;
362 	mtx_lock_spin(&parent->pic_child_lock);
363 	SLIST_FOREACH(child, &parent->pic_children, pc_next) {
364 		if (child->pc_start <= irq &&
365 		    irq < (child->pc_start + child->pc_length)) {
366 			found = true;
367 			break;
368 		}
369 	}
370 	mtx_unlock_spin(&parent->pic_child_lock);
371 
372 	if (found)
373 		return (child->pc_filter(child->pc_filter_arg, irq));
374 
375 	return (FILTER_STRAY);
376 }
377 
378 /*
379  *  interrupt controller dispatch function for interrupts. It should
380  *  be called straight from the interrupt controller, when associated interrupt
381  *  source is learned.
382  */
383 int
384 intr_isrc_dispatch(struct intr_irqsrc *isrc, struct trapframe *tf)
385 {
386 
387 	KASSERT(isrc != NULL, ("%s: no source", __func__));
388 
389 	if ((isrc->isrc_flags & INTR_ISRCF_IPI) == 0)
390 		isrc_increment_count(isrc);
391 
392 #ifdef INTR_SOLO
393 	if (isrc->isrc_filter != NULL) {
394 		int error;
395 		error = isrc->isrc_filter(isrc->isrc_arg, tf);
396 		PIC_POST_FILTER(isrc->isrc_dev, isrc);
397 		if (error == FILTER_HANDLED)
398 			return (0);
399 	} else
400 #endif
401 	if (isrc->isrc_event != NULL) {
402 		if (intr_event_handle(isrc->isrc_event, tf) == 0)
403 			return (0);
404 	}
405 
406 	if ((isrc->isrc_flags & INTR_ISRCF_IPI) == 0)
407 		isrc_increment_straycount(isrc);
408 	return (EINVAL);
409 }
410 
411 /*
412  *  Alloc unique interrupt number (resource handle) for interrupt source.
413  *
414  *  There could be various strategies how to allocate free interrupt number
415  *  (resource handle) for new interrupt source.
416  *
417  *  1. Handles are always allocated forward, so handles are not recycled
418  *     immediately. However, if only one free handle left which is reused
419  *     constantly...
420  */
421 static inline int
422 isrc_alloc_irq(struct intr_irqsrc *isrc)
423 {
424 	u_int irq;
425 
426 	mtx_assert(&isrc_table_lock, MA_OWNED);
427 
428 	if (irq_next_free >= intr_nirq)
429 		return (ENOSPC);
430 
431 	for (irq = irq_next_free; irq < intr_nirq; irq++) {
432 		if (irq_sources[irq] == NULL)
433 			goto found;
434 	}
435 	for (irq = 0; irq < irq_next_free; irq++) {
436 		if (irq_sources[irq] == NULL)
437 			goto found;
438 	}
439 
440 	irq_next_free = intr_nirq;
441 	return (ENOSPC);
442 
443 found:
444 	isrc->isrc_irq = irq;
445 	irq_sources[irq] = isrc;
446 
447 	irq_next_free = irq + 1;
448 	if (irq_next_free >= intr_nirq)
449 		irq_next_free = 0;
450 	return (0);
451 }
452 
453 /*
454  *  Free unique interrupt number (resource handle) from interrupt source.
455  */
456 static inline int
457 isrc_free_irq(struct intr_irqsrc *isrc)
458 {
459 
460 	mtx_assert(&isrc_table_lock, MA_OWNED);
461 
462 	if (isrc->isrc_irq >= intr_nirq)
463 		return (EINVAL);
464 	if (irq_sources[isrc->isrc_irq] != isrc)
465 		return (EINVAL);
466 
467 	irq_sources[isrc->isrc_irq] = NULL;
468 	isrc->isrc_irq = INTR_IRQ_INVALID;	/* just to be safe */
469 
470 	/*
471 	 * If we are recovering from the state irq_sources table is full,
472 	 * then the following allocation should check the entire table. This
473 	 * will ensure maximum separation of allocation order from release
474 	 * order.
475 	 */
476 	if (irq_next_free >= intr_nirq)
477 		irq_next_free = 0;
478 
479 	return (0);
480 }
481 
482 /*
483  *  Initialize interrupt source and register it into global interrupt table.
484  */
485 int
486 intr_isrc_register(struct intr_irqsrc *isrc, device_t dev, u_int flags,
487     const char *fmt, ...)
488 {
489 	int error;
490 	va_list ap;
491 
492 	bzero(isrc, sizeof(struct intr_irqsrc));
493 	isrc->isrc_dev = dev;
494 	isrc->isrc_irq = INTR_IRQ_INVALID;	/* just to be safe */
495 	isrc->isrc_flags = flags;
496 
497 	va_start(ap, fmt);
498 	vsnprintf(isrc->isrc_name, INTR_ISRC_NAMELEN, fmt, ap);
499 	va_end(ap);
500 
501 	mtx_lock(&isrc_table_lock);
502 	error = isrc_alloc_irq(isrc);
503 	if (error != 0) {
504 		mtx_unlock(&isrc_table_lock);
505 		return (error);
506 	}
507 	/*
508 	 * Setup interrupt counters, but not for IPI sources. Those are setup
509 	 * later and only for used ones (up to INTR_IPI_COUNT) to not exhaust
510 	 * our counter pool.
511 	 */
512 	if ((isrc->isrc_flags & INTR_ISRCF_IPI) == 0)
513 		isrc_setup_counters(isrc);
514 	mtx_unlock(&isrc_table_lock);
515 	return (0);
516 }
517 
518 /*
519  *  Deregister interrupt source from global interrupt table.
520  */
521 int
522 intr_isrc_deregister(struct intr_irqsrc *isrc)
523 {
524 	int error;
525 
526 	mtx_lock(&isrc_table_lock);
527 	if ((isrc->isrc_flags & INTR_ISRCF_IPI) == 0)
528 		isrc_release_counters(isrc);
529 	error = isrc_free_irq(isrc);
530 	mtx_unlock(&isrc_table_lock);
531 	return (error);
532 }
533 
534 #ifdef SMP
535 /*
536  *  A support function for a PIC to decide if provided ISRC should be inited
537  *  on given cpu. The logic of INTR_ISRCF_BOUND flag and isrc_cpu member of
538  *  struct intr_irqsrc is the following:
539  *
540  *     If INTR_ISRCF_BOUND is set, the ISRC should be inited only on cpus
541  *     set in isrc_cpu. If not, the ISRC should be inited on every cpu and
542  *     isrc_cpu is kept consistent with it. Thus isrc_cpu is always correct.
543  */
544 bool
545 intr_isrc_init_on_cpu(struct intr_irqsrc *isrc, u_int cpu)
546 {
547 
548 	if (isrc->isrc_handlers == 0)
549 		return (false);
550 	if ((isrc->isrc_flags & (INTR_ISRCF_PPI | INTR_ISRCF_IPI)) == 0)
551 		return (false);
552 	if (isrc->isrc_flags & INTR_ISRCF_BOUND)
553 		return (CPU_ISSET(cpu, &isrc->isrc_cpu));
554 
555 	CPU_SET(cpu, &isrc->isrc_cpu);
556 	return (true);
557 }
558 #endif
559 
560 #ifdef INTR_SOLO
561 /*
562  *  Setup filter into interrupt source.
563  */
564 static int
565 iscr_setup_filter(struct intr_irqsrc *isrc, const char *name,
566     intr_irq_filter_t *filter, void *arg, void **cookiep)
567 {
568 
569 	if (filter == NULL)
570 		return (EINVAL);
571 
572 	mtx_lock(&isrc_table_lock);
573 	/*
574 	 * Make sure that we do not mix the two ways
575 	 * how we handle interrupt sources.
576 	 */
577 	if (isrc->isrc_filter != NULL || isrc->isrc_event != NULL) {
578 		mtx_unlock(&isrc_table_lock);
579 		return (EBUSY);
580 	}
581 	isrc->isrc_filter = filter;
582 	isrc->isrc_arg = arg;
583 	isrc_update_name(isrc, name);
584 	mtx_unlock(&isrc_table_lock);
585 
586 	*cookiep = isrc;
587 	return (0);
588 }
589 #endif
590 
591 /*
592  *  Interrupt source pre_ithread method for MI interrupt framework.
593  */
594 static void
595 intr_isrc_pre_ithread(void *arg)
596 {
597 	struct intr_irqsrc *isrc = arg;
598 
599 	PIC_PRE_ITHREAD(isrc->isrc_dev, isrc);
600 }
601 
602 /*
603  *  Interrupt source post_ithread method for MI interrupt framework.
604  */
605 static void
606 intr_isrc_post_ithread(void *arg)
607 {
608 	struct intr_irqsrc *isrc = arg;
609 
610 	PIC_POST_ITHREAD(isrc->isrc_dev, isrc);
611 }
612 
613 /*
614  *  Interrupt source post_filter method for MI interrupt framework.
615  */
616 static void
617 intr_isrc_post_filter(void *arg)
618 {
619 	struct intr_irqsrc *isrc = arg;
620 
621 	PIC_POST_FILTER(isrc->isrc_dev, isrc);
622 }
623 
624 /*
625  *  Interrupt source assign_cpu method for MI interrupt framework.
626  */
627 static int
628 intr_isrc_assign_cpu(void *arg, int cpu)
629 {
630 #ifdef SMP
631 	struct intr_irqsrc *isrc = arg;
632 	int error;
633 
634 	mtx_lock(&isrc_table_lock);
635 	if (cpu == NOCPU) {
636 		CPU_ZERO(&isrc->isrc_cpu);
637 		isrc->isrc_flags &= ~INTR_ISRCF_BOUND;
638 	} else {
639 		CPU_SETOF(cpu, &isrc->isrc_cpu);
640 		isrc->isrc_flags |= INTR_ISRCF_BOUND;
641 	}
642 
643 	/*
644 	 * In NOCPU case, it's up to PIC to either leave ISRC on same CPU or
645 	 * re-balance it to another CPU or enable it on more CPUs. However,
646 	 * PIC is expected to change isrc_cpu appropriately to keep us well
647 	 * informed if the call is successful.
648 	 */
649 	if (irq_assign_cpu) {
650 		error = PIC_BIND_INTR(isrc->isrc_dev, isrc);
651 		if (error) {
652 			CPU_ZERO(&isrc->isrc_cpu);
653 			mtx_unlock(&isrc_table_lock);
654 			return (error);
655 		}
656 	}
657 	mtx_unlock(&isrc_table_lock);
658 	return (0);
659 #else
660 	return (EOPNOTSUPP);
661 #endif
662 }
663 
664 /*
665  *  Create interrupt event for interrupt source.
666  */
667 static int
668 isrc_event_create(struct intr_irqsrc *isrc)
669 {
670 	struct intr_event *ie;
671 	int error;
672 
673 	error = intr_event_create(&ie, isrc, 0, isrc->isrc_irq,
674 	    intr_isrc_pre_ithread, intr_isrc_post_ithread, intr_isrc_post_filter,
675 	    intr_isrc_assign_cpu, "%s:", isrc->isrc_name);
676 	if (error)
677 		return (error);
678 
679 	mtx_lock(&isrc_table_lock);
680 	/*
681 	 * Make sure that we do not mix the two ways
682 	 * how we handle interrupt sources. Let contested event wins.
683 	 */
684 #ifdef INTR_SOLO
685 	if (isrc->isrc_filter != NULL || isrc->isrc_event != NULL) {
686 #else
687 	if (isrc->isrc_event != NULL) {
688 #endif
689 		mtx_unlock(&isrc_table_lock);
690 		intr_event_destroy(ie);
691 		return (isrc->isrc_event != NULL ? EBUSY : 0);
692 	}
693 	isrc->isrc_event = ie;
694 	mtx_unlock(&isrc_table_lock);
695 
696 	return (0);
697 }
698 #ifdef notyet
699 /*
700  *  Destroy interrupt event for interrupt source.
701  */
702 static void
703 isrc_event_destroy(struct intr_irqsrc *isrc)
704 {
705 	struct intr_event *ie;
706 
707 	mtx_lock(&isrc_table_lock);
708 	ie = isrc->isrc_event;
709 	isrc->isrc_event = NULL;
710 	mtx_unlock(&isrc_table_lock);
711 
712 	if (ie != NULL)
713 		intr_event_destroy(ie);
714 }
715 #endif
716 /*
717  *  Add handler to interrupt source.
718  */
719 static int
720 isrc_add_handler(struct intr_irqsrc *isrc, const char *name,
721     driver_filter_t filter, driver_intr_t handler, void *arg,
722     enum intr_type flags, void **cookiep)
723 {
724 	int error;
725 
726 	if (isrc->isrc_event == NULL) {
727 		error = isrc_event_create(isrc);
728 		if (error)
729 			return (error);
730 	}
731 
732 	error = intr_event_add_handler(isrc->isrc_event, name, filter, handler,
733 	    arg, intr_priority(flags), flags, cookiep);
734 	if (error == 0) {
735 		mtx_lock(&isrc_table_lock);
736 		intrcnt_updatename(isrc);
737 		mtx_unlock(&isrc_table_lock);
738 	}
739 
740 	return (error);
741 }
742 
743 /*
744  *  Lookup interrupt controller locked.
745  */
746 static inline struct intr_pic *
747 pic_lookup_locked(device_t dev, intptr_t xref, int flags)
748 {
749 	struct intr_pic *pic;
750 
751 	mtx_assert(&pic_list_lock, MA_OWNED);
752 
753 	if (dev == NULL && xref == 0)
754 		return (NULL);
755 
756 	/* Note that pic->pic_dev is never NULL on registered PIC. */
757 	SLIST_FOREACH(pic, &pic_list, pic_next) {
758 		if ((pic->pic_flags & FLAG_TYPE_MASK) !=
759 		    (flags & FLAG_TYPE_MASK))
760 			continue;
761 
762 		if (dev == NULL) {
763 			if (xref == pic->pic_xref)
764 				return (pic);
765 		} else if (xref == 0 || pic->pic_xref == 0) {
766 			if (dev == pic->pic_dev)
767 				return (pic);
768 		} else if (xref == pic->pic_xref && dev == pic->pic_dev)
769 				return (pic);
770 	}
771 	return (NULL);
772 }
773 
774 /*
775  *  Lookup interrupt controller.
776  */
777 static struct intr_pic *
778 pic_lookup(device_t dev, intptr_t xref, int flags)
779 {
780 	struct intr_pic *pic;
781 
782 	mtx_lock(&pic_list_lock);
783 	pic = pic_lookup_locked(dev, xref, flags);
784 	mtx_unlock(&pic_list_lock);
785 	return (pic);
786 }
787 
788 /*
789  *  Create interrupt controller.
790  */
791 static struct intr_pic *
792 pic_create(device_t dev, intptr_t xref, int flags)
793 {
794 	struct intr_pic *pic;
795 
796 	mtx_lock(&pic_list_lock);
797 	pic = pic_lookup_locked(dev, xref, flags);
798 	if (pic != NULL) {
799 		mtx_unlock(&pic_list_lock);
800 		return (pic);
801 	}
802 	pic = malloc(sizeof(*pic), M_INTRNG, M_NOWAIT | M_ZERO);
803 	if (pic == NULL) {
804 		mtx_unlock(&pic_list_lock);
805 		return (NULL);
806 	}
807 	pic->pic_xref = xref;
808 	pic->pic_dev = dev;
809 	pic->pic_flags = flags;
810 	mtx_init(&pic->pic_child_lock, "pic child lock", NULL, MTX_SPIN);
811 	SLIST_INSERT_HEAD(&pic_list, pic, pic_next);
812 	mtx_unlock(&pic_list_lock);
813 
814 	return (pic);
815 }
816 #ifdef notyet
817 /*
818  *  Destroy interrupt controller.
819  */
820 static void
821 pic_destroy(device_t dev, intptr_t xref, int flags)
822 {
823 	struct intr_pic *pic;
824 
825 	mtx_lock(&pic_list_lock);
826 	pic = pic_lookup_locked(dev, xref, flags);
827 	if (pic == NULL) {
828 		mtx_unlock(&pic_list_lock);
829 		return;
830 	}
831 	SLIST_REMOVE(&pic_list, pic, intr_pic, pic_next);
832 	mtx_unlock(&pic_list_lock);
833 
834 	free(pic, M_INTRNG);
835 }
836 #endif
837 /*
838  *  Register interrupt controller.
839  */
840 struct intr_pic *
841 intr_pic_register(device_t dev, intptr_t xref)
842 {
843 	struct intr_pic *pic;
844 
845 	if (dev == NULL)
846 		return (NULL);
847 	pic = pic_create(dev, xref, FLAG_PIC);
848 	if (pic == NULL)
849 		return (NULL);
850 
851 	debugf("PIC %p registered for %s <dev %p, xref %jx>\n", pic,
852 	    device_get_nameunit(dev), dev, (uintmax_t)xref);
853 	return (pic);
854 }
855 
856 /*
857  *  Unregister interrupt controller.
858  */
859 int
860 intr_pic_deregister(device_t dev, intptr_t xref)
861 {
862 
863 	panic("%s: not implemented", __func__);
864 }
865 
866 /*
867  *  Mark interrupt controller (itself) as a root one.
868  *
869  *  Note that only an interrupt controller can really know its position
870  *  in interrupt controller's tree. So root PIC must claim itself as a root.
871  *
872  *  In FDT case, according to ePAPR approved version 1.1 from 08 April 2011,
873  *  page 30:
874  *    "The root of the interrupt tree is determined when traversal
875  *     of the interrupt tree reaches an interrupt controller node without
876  *     an interrupts property and thus no explicit interrupt parent."
877  */
878 int
879 intr_pic_claim_root(device_t dev, intptr_t xref, intr_irq_filter_t *filter,
880     void *arg)
881 {
882 	struct intr_pic *pic;
883 
884 	pic = pic_lookup(dev, xref, FLAG_PIC);
885 	if (pic == NULL) {
886 		device_printf(dev, "not registered\n");
887 		return (EINVAL);
888 	}
889 
890 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_PIC,
891 	    ("%s: Found a non-PIC controller: %s", __func__,
892 	     device_get_name(pic->pic_dev)));
893 
894 	if (filter == NULL) {
895 		device_printf(dev, "filter missing\n");
896 		return (EINVAL);
897 	}
898 
899 	/*
900 	 * Only one interrupt controllers could be on the root for now.
901 	 * Note that we further suppose that there is not threaded interrupt
902 	 * routine (handler) on the root. See intr_irq_handler().
903 	 */
904 	if (intr_irq_root_dev != NULL) {
905 		device_printf(dev, "another root already set\n");
906 		return (EBUSY);
907 	}
908 
909 	intr_irq_root_dev = dev;
910 	irq_root_filter = filter;
911 	irq_root_arg = arg;
912 
913 	debugf("irq root set to %s\n", device_get_nameunit(dev));
914 	return (0);
915 }
916 
917 /*
918  * Add a handler to manage a sub range of a parents interrupts.
919  */
920 int
921 intr_pic_add_handler(device_t parent, struct intr_pic *pic,
922     intr_child_irq_filter_t *filter, void *arg, uintptr_t start,
923     uintptr_t length)
924 {
925 	struct intr_pic *parent_pic;
926 	struct intr_pic_child *newchild;
927 #ifdef INVARIANTS
928 	struct intr_pic_child *child;
929 #endif
930 
931 	/* Find the parent PIC */
932 	parent_pic = pic_lookup(parent, 0, FLAG_PIC);
933 	if (parent_pic == NULL)
934 		return (ENXIO);
935 
936 	newchild = malloc(sizeof(*newchild), M_INTRNG, M_WAITOK | M_ZERO);
937 	newchild->pc_pic = pic;
938 	newchild->pc_filter = filter;
939 	newchild->pc_filter_arg = arg;
940 	newchild->pc_start = start;
941 	newchild->pc_length = length;
942 
943 	mtx_lock_spin(&parent_pic->pic_child_lock);
944 #ifdef INVARIANTS
945 	SLIST_FOREACH(child, &parent_pic->pic_children, pc_next) {
946 		KASSERT(child->pc_pic != pic, ("%s: Adding a child PIC twice",
947 		    __func__));
948 	}
949 #endif
950 	SLIST_INSERT_HEAD(&parent_pic->pic_children, newchild, pc_next);
951 	mtx_unlock_spin(&parent_pic->pic_child_lock);
952 
953 	return (0);
954 }
955 
956 static int
957 intr_resolve_irq(device_t dev, intptr_t xref, struct intr_map_data *data,
958     struct intr_irqsrc **isrc)
959 {
960 	struct intr_pic *pic;
961 	struct intr_map_data_msi *msi;
962 
963 	if (data == NULL)
964 		return (EINVAL);
965 
966 	pic = pic_lookup(dev, xref,
967 	    (data->type == INTR_MAP_DATA_MSI) ? FLAG_MSI : FLAG_PIC);
968 	if (pic == NULL)
969 		return (ESRCH);
970 
971 	switch (data->type) {
972 	case INTR_MAP_DATA_MSI:
973 		KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
974 		    ("%s: Found a non-MSI controller: %s", __func__,
975 		     device_get_name(pic->pic_dev)));
976 		msi = (struct intr_map_data_msi *)data;
977 		*isrc = msi->isrc;
978 		return (0);
979 
980 	default:
981 		KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_PIC,
982 		    ("%s: Found a non-PIC controller: %s", __func__,
983 		     device_get_name(pic->pic_dev)));
984 		return (PIC_MAP_INTR(pic->pic_dev, data, isrc));
985 	}
986 }
987 
988 bool
989 intr_is_per_cpu(struct resource *res)
990 {
991 	u_int res_id;
992 	struct intr_irqsrc *isrc;
993 
994 	res_id = (u_int)rman_get_start(res);
995 	isrc = intr_map_get_isrc(res_id);
996 
997 	if (isrc == NULL)
998 		panic("Attempt to get isrc for non-active resource id: %u\n",
999 		    res_id);
1000 	return ((isrc->isrc_flags & INTR_ISRCF_PPI) != 0);
1001 }
1002 
1003 int
1004 intr_activate_irq(device_t dev, struct resource *res)
1005 {
1006 	device_t map_dev;
1007 	intptr_t map_xref;
1008 	struct intr_map_data *data;
1009 	struct intr_irqsrc *isrc;
1010 	u_int res_id;
1011 	int error;
1012 
1013 	KASSERT(rman_get_start(res) == rman_get_end(res),
1014 	    ("%s: more interrupts in resource", __func__));
1015 
1016 	res_id = (u_int)rman_get_start(res);
1017 	if (intr_map_get_isrc(res_id) != NULL)
1018 		panic("Attempt to double activation of resource id: %u\n",
1019 		    res_id);
1020 	intr_map_copy_map_data(res_id, &map_dev, &map_xref, &data);
1021 	error = intr_resolve_irq(map_dev, map_xref, data, &isrc);
1022 	if (error != 0) {
1023 		free(data, M_INTRNG);
1024 		/* XXX TODO DISCONECTED PICs */
1025 		/* if (error == EINVAL) return(0); */
1026 		return (error);
1027 	}
1028 	intr_map_set_isrc(res_id, isrc);
1029 	rman_set_virtual(res, data);
1030 	return (PIC_ACTIVATE_INTR(isrc->isrc_dev, isrc, res, data));
1031 }
1032 
1033 int
1034 intr_deactivate_irq(device_t dev, struct resource *res)
1035 {
1036 	struct intr_map_data *data;
1037 	struct intr_irqsrc *isrc;
1038 	u_int res_id;
1039 	int error;
1040 
1041 	KASSERT(rman_get_start(res) == rman_get_end(res),
1042 	    ("%s: more interrupts in resource", __func__));
1043 
1044 	res_id = (u_int)rman_get_start(res);
1045 	isrc = intr_map_get_isrc(res_id);
1046 	if (isrc == NULL)
1047 		panic("Attempt to deactivate non-active resource id: %u\n",
1048 		    res_id);
1049 
1050 	data = rman_get_virtual(res);
1051 	error = PIC_DEACTIVATE_INTR(isrc->isrc_dev, isrc, res, data);
1052 	intr_map_set_isrc(res_id, NULL);
1053 	rman_set_virtual(res, NULL);
1054 	free(data, M_INTRNG);
1055 	return (error);
1056 }
1057 
1058 int
1059 intr_setup_irq(device_t dev, struct resource *res, driver_filter_t filt,
1060     driver_intr_t hand, void *arg, int flags, void **cookiep)
1061 {
1062 	int error;
1063 	struct intr_map_data *data;
1064 	struct intr_irqsrc *isrc;
1065 	const char *name;
1066 	u_int res_id;
1067 
1068 	KASSERT(rman_get_start(res) == rman_get_end(res),
1069 	    ("%s: more interrupts in resource", __func__));
1070 
1071 	res_id = (u_int)rman_get_start(res);
1072 	isrc = intr_map_get_isrc(res_id);
1073 	if (isrc == NULL) {
1074 		/* XXX TODO DISCONECTED PICs */
1075 		return (EINVAL);
1076 	}
1077 
1078 	data = rman_get_virtual(res);
1079 	name = device_get_nameunit(dev);
1080 
1081 #ifdef INTR_SOLO
1082 	/*
1083 	 * Standard handling is done through MI interrupt framework. However,
1084 	 * some interrupts could request solely own special handling. This
1085 	 * non standard handling can be used for interrupt controllers without
1086 	 * handler (filter only), so in case that interrupt controllers are
1087 	 * chained, MI interrupt framework is called only in leaf controller.
1088 	 *
1089 	 * Note that root interrupt controller routine is served as well,
1090 	 * however in intr_irq_handler(), i.e. main system dispatch routine.
1091 	 */
1092 	if (flags & INTR_SOLO && hand != NULL) {
1093 		debugf("irq %u cannot solo on %s\n", irq, name);
1094 		return (EINVAL);
1095 	}
1096 
1097 	if (flags & INTR_SOLO) {
1098 		error = iscr_setup_filter(isrc, name, (intr_irq_filter_t *)filt,
1099 		    arg, cookiep);
1100 		debugf("irq %u setup filter error %d on %s\n", isrc->isrc_irq, error,
1101 		    name);
1102 	} else
1103 #endif
1104 		{
1105 		error = isrc_add_handler(isrc, name, filt, hand, arg, flags,
1106 		    cookiep);
1107 		debugf("irq %u add handler error %d on %s\n", isrc->isrc_irq, error, name);
1108 	}
1109 	if (error != 0)
1110 		return (error);
1111 
1112 	mtx_lock(&isrc_table_lock);
1113 	error = PIC_SETUP_INTR(isrc->isrc_dev, isrc, res, data);
1114 	if (error == 0) {
1115 		isrc->isrc_handlers++;
1116 		if (isrc->isrc_handlers == 1)
1117 			PIC_ENABLE_INTR(isrc->isrc_dev, isrc);
1118 	}
1119 	mtx_unlock(&isrc_table_lock);
1120 	if (error != 0)
1121 		intr_event_remove_handler(*cookiep);
1122 	return (error);
1123 }
1124 
1125 int
1126 intr_teardown_irq(device_t dev, struct resource *res, void *cookie)
1127 {
1128 	int error;
1129 	struct intr_map_data *data;
1130 	struct intr_irqsrc *isrc;
1131 	u_int res_id;
1132 
1133 	KASSERT(rman_get_start(res) == rman_get_end(res),
1134 	    ("%s: more interrupts in resource", __func__));
1135 
1136 	res_id = (u_int)rman_get_start(res);
1137 	isrc = intr_map_get_isrc(res_id);
1138 	if (isrc == NULL || isrc->isrc_handlers == 0)
1139 		return (EINVAL);
1140 
1141 	data = rman_get_virtual(res);
1142 
1143 #ifdef INTR_SOLO
1144 	if (isrc->isrc_filter != NULL) {
1145 		if (isrc != cookie)
1146 			return (EINVAL);
1147 
1148 		mtx_lock(&isrc_table_lock);
1149 		isrc->isrc_filter = NULL;
1150 		isrc->isrc_arg = NULL;
1151 		isrc->isrc_handlers = 0;
1152 		PIC_DISABLE_INTR(isrc->isrc_dev, isrc);
1153 		PIC_TEARDOWN_INTR(isrc->isrc_dev, isrc, res, data);
1154 		isrc_update_name(isrc, NULL);
1155 		mtx_unlock(&isrc_table_lock);
1156 		return (0);
1157 	}
1158 #endif
1159 	if (isrc != intr_handler_source(cookie))
1160 		return (EINVAL);
1161 
1162 	error = intr_event_remove_handler(cookie);
1163 	if (error == 0) {
1164 		mtx_lock(&isrc_table_lock);
1165 		isrc->isrc_handlers--;
1166 		if (isrc->isrc_handlers == 0)
1167 			PIC_DISABLE_INTR(isrc->isrc_dev, isrc);
1168 		PIC_TEARDOWN_INTR(isrc->isrc_dev, isrc, res, data);
1169 		intrcnt_updatename(isrc);
1170 		mtx_unlock(&isrc_table_lock);
1171 	}
1172 	return (error);
1173 }
1174 
1175 int
1176 intr_describe_irq(device_t dev, struct resource *res, void *cookie,
1177     const char *descr)
1178 {
1179 	int error;
1180 	struct intr_irqsrc *isrc;
1181 	u_int res_id;
1182 
1183 	KASSERT(rman_get_start(res) == rman_get_end(res),
1184 	    ("%s: more interrupts in resource", __func__));
1185 
1186 	res_id = (u_int)rman_get_start(res);
1187 	isrc = intr_map_get_isrc(res_id);
1188 	if (isrc == NULL || isrc->isrc_handlers == 0)
1189 		return (EINVAL);
1190 #ifdef INTR_SOLO
1191 	if (isrc->isrc_filter != NULL) {
1192 		if (isrc != cookie)
1193 			return (EINVAL);
1194 
1195 		mtx_lock(&isrc_table_lock);
1196 		isrc_update_name(isrc, descr);
1197 		mtx_unlock(&isrc_table_lock);
1198 		return (0);
1199 	}
1200 #endif
1201 	error = intr_event_describe_handler(isrc->isrc_event, cookie, descr);
1202 	if (error == 0) {
1203 		mtx_lock(&isrc_table_lock);
1204 		intrcnt_updatename(isrc);
1205 		mtx_unlock(&isrc_table_lock);
1206 	}
1207 	return (error);
1208 }
1209 
1210 #ifdef SMP
1211 int
1212 intr_bind_irq(device_t dev, struct resource *res, int cpu)
1213 {
1214 	struct intr_irqsrc *isrc;
1215 	u_int res_id;
1216 
1217 	KASSERT(rman_get_start(res) == rman_get_end(res),
1218 	    ("%s: more interrupts in resource", __func__));
1219 
1220 	res_id = (u_int)rman_get_start(res);
1221 	isrc = intr_map_get_isrc(res_id);
1222 	if (isrc == NULL || isrc->isrc_handlers == 0)
1223 		return (EINVAL);
1224 #ifdef INTR_SOLO
1225 	if (isrc->isrc_filter != NULL)
1226 		return (intr_isrc_assign_cpu(isrc, cpu));
1227 #endif
1228 	return (intr_event_bind(isrc->isrc_event, cpu));
1229 }
1230 
1231 /*
1232  * Return the CPU that the next interrupt source should use.
1233  * For now just returns the next CPU according to round-robin.
1234  */
1235 u_int
1236 intr_irq_next_cpu(u_int last_cpu, cpuset_t *cpumask)
1237 {
1238 	u_int cpu;
1239 
1240 	KASSERT(!CPU_EMPTY(cpumask), ("%s: Empty CPU mask", __func__));
1241 	if (!irq_assign_cpu || mp_ncpus == 1) {
1242 		cpu = PCPU_GET(cpuid);
1243 
1244 		if (CPU_ISSET(cpu, cpumask))
1245 			return (curcpu);
1246 
1247 		return (CPU_FFS(cpumask) - 1);
1248 	}
1249 
1250 	do {
1251 		last_cpu++;
1252 		if (last_cpu > mp_maxid)
1253 			last_cpu = 0;
1254 	} while (!CPU_ISSET(last_cpu, cpumask));
1255 	return (last_cpu);
1256 }
1257 
1258 #ifndef EARLY_AP_STARTUP
1259 /*
1260  *  Distribute all the interrupt sources among the available
1261  *  CPUs once the AP's have been launched.
1262  */
1263 static void
1264 intr_irq_shuffle(void *arg __unused)
1265 {
1266 	struct intr_irqsrc *isrc;
1267 	u_int i;
1268 
1269 	if (mp_ncpus == 1)
1270 		return;
1271 
1272 	mtx_lock(&isrc_table_lock);
1273 	irq_assign_cpu = true;
1274 	for (i = 0; i < intr_nirq; i++) {
1275 		isrc = irq_sources[i];
1276 		if (isrc == NULL || isrc->isrc_handlers == 0 ||
1277 		    isrc->isrc_flags & (INTR_ISRCF_PPI | INTR_ISRCF_IPI))
1278 			continue;
1279 
1280 		if (isrc->isrc_event != NULL &&
1281 		    isrc->isrc_flags & INTR_ISRCF_BOUND &&
1282 		    isrc->isrc_event->ie_cpu != CPU_FFS(&isrc->isrc_cpu) - 1)
1283 			panic("%s: CPU inconsistency", __func__);
1284 
1285 		if ((isrc->isrc_flags & INTR_ISRCF_BOUND) == 0)
1286 			CPU_ZERO(&isrc->isrc_cpu); /* start again */
1287 
1288 		/*
1289 		 * We are in wicked position here if the following call fails
1290 		 * for bound ISRC. The best thing we can do is to clear
1291 		 * isrc_cpu so inconsistency with ie_cpu will be detectable.
1292 		 */
1293 		if (PIC_BIND_INTR(isrc->isrc_dev, isrc) != 0)
1294 			CPU_ZERO(&isrc->isrc_cpu);
1295 	}
1296 	mtx_unlock(&isrc_table_lock);
1297 }
1298 SYSINIT(intr_irq_shuffle, SI_SUB_SMP, SI_ORDER_SECOND, intr_irq_shuffle, NULL);
1299 #endif /* !EARLY_AP_STARTUP */
1300 
1301 #else
1302 u_int
1303 intr_irq_next_cpu(u_int current_cpu, cpuset_t *cpumask)
1304 {
1305 
1306 	return (PCPU_GET(cpuid));
1307 }
1308 #endif /* SMP */
1309 
1310 /*
1311  * Allocate memory for new intr_map_data structure.
1312  * Initialize common fields.
1313  */
1314 struct intr_map_data *
1315 intr_alloc_map_data(enum intr_map_data_type type, size_t len, int flags)
1316 {
1317 	struct intr_map_data *data;
1318 
1319 	data = malloc(len, M_INTRNG, flags);
1320 	data->type = type;
1321 	data->len = len;
1322 	return (data);
1323 }
1324 
1325 void intr_free_intr_map_data(struct intr_map_data *data)
1326 {
1327 
1328 	free(data, M_INTRNG);
1329 }
1330 
1331 /*
1332  *  Register a MSI/MSI-X interrupt controller
1333  */
1334 int
1335 intr_msi_register(device_t dev, intptr_t xref)
1336 {
1337 	struct intr_pic *pic;
1338 
1339 	if (dev == NULL)
1340 		return (EINVAL);
1341 	pic = pic_create(dev, xref, FLAG_MSI);
1342 	if (pic == NULL)
1343 		return (ENOMEM);
1344 
1345 	debugf("PIC %p registered for %s <dev %p, xref %jx>\n", pic,
1346 	    device_get_nameunit(dev), dev, (uintmax_t)xref);
1347 	return (0);
1348 }
1349 
1350 int
1351 intr_alloc_msi(device_t pci, device_t child, intptr_t xref, int count,
1352     int maxcount, int *irqs)
1353 {
1354 	struct iommu_domain *domain;
1355 	struct intr_irqsrc **isrc;
1356 	struct intr_pic *pic;
1357 	device_t pdev;
1358 	struct intr_map_data_msi *msi;
1359 	int err, i;
1360 
1361 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1362 	if (pic == NULL)
1363 		return (ESRCH);
1364 
1365 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1366 	    ("%s: Found a non-MSI controller: %s", __func__,
1367 	     device_get_name(pic->pic_dev)));
1368 
1369 	/*
1370 	 * If this is the first time we have used this context ask the
1371 	 * interrupt controller to map memory the msi source will need.
1372 	 */
1373 	err = MSI_IOMMU_INIT(pic->pic_dev, child, &domain);
1374 	if (err != 0)
1375 		return (err);
1376 
1377 	isrc = malloc(sizeof(*isrc) * count, M_INTRNG, M_WAITOK);
1378 	err = MSI_ALLOC_MSI(pic->pic_dev, child, count, maxcount, &pdev, isrc);
1379 	if (err != 0) {
1380 		free(isrc, M_INTRNG);
1381 		return (err);
1382 	}
1383 
1384 	for (i = 0; i < count; i++) {
1385 		isrc[i]->isrc_iommu = domain;
1386 		msi = (struct intr_map_data_msi *)intr_alloc_map_data(
1387 		    INTR_MAP_DATA_MSI, sizeof(*msi), M_WAITOK | M_ZERO);
1388 		msi-> isrc = isrc[i];
1389 
1390 		irqs[i] = intr_map_irq(pic->pic_dev, xref,
1391 		    (struct intr_map_data *)msi);
1392 	}
1393 	free(isrc, M_INTRNG);
1394 
1395 	return (err);
1396 }
1397 
1398 int
1399 intr_release_msi(device_t pci, device_t child, intptr_t xref, int count,
1400     int *irqs)
1401 {
1402 	struct intr_irqsrc **isrc;
1403 	struct intr_pic *pic;
1404 	struct intr_map_data_msi *msi;
1405 	int i, err;
1406 
1407 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1408 	if (pic == NULL)
1409 		return (ESRCH);
1410 
1411 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1412 	    ("%s: Found a non-MSI controller: %s", __func__,
1413 	     device_get_name(pic->pic_dev)));
1414 
1415 	isrc = malloc(sizeof(*isrc) * count, M_INTRNG, M_WAITOK);
1416 
1417 	for (i = 0; i < count; i++) {
1418 		msi = (struct intr_map_data_msi *)
1419 		    intr_map_get_map_data(irqs[i]);
1420 		KASSERT(msi->hdr.type == INTR_MAP_DATA_MSI,
1421 		    ("%s: irq %d map data is not MSI", __func__,
1422 		    irqs[i]));
1423 		isrc[i] = msi->isrc;
1424 	}
1425 
1426 	MSI_IOMMU_DEINIT(pic->pic_dev, child);
1427 
1428 	err = MSI_RELEASE_MSI(pic->pic_dev, child, count, isrc);
1429 
1430 	for (i = 0; i < count; i++) {
1431 		if (isrc[i] != NULL)
1432 			intr_unmap_irq(irqs[i]);
1433 	}
1434 
1435 	free(isrc, M_INTRNG);
1436 	return (err);
1437 }
1438 
1439 int
1440 intr_alloc_msix(device_t pci, device_t child, intptr_t xref, int *irq)
1441 {
1442 	struct iommu_domain *domain;
1443 	struct intr_irqsrc *isrc;
1444 	struct intr_pic *pic;
1445 	device_t pdev;
1446 	struct intr_map_data_msi *msi;
1447 	int err;
1448 
1449 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1450 	if (pic == NULL)
1451 		return (ESRCH);
1452 
1453 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1454 	    ("%s: Found a non-MSI controller: %s", __func__,
1455 	     device_get_name(pic->pic_dev)));
1456 
1457 	/*
1458 	 * If this is the first time we have used this context ask the
1459 	 * interrupt controller to map memory the msi source will need.
1460 	 */
1461 	err = MSI_IOMMU_INIT(pic->pic_dev, child, &domain);
1462 	if (err != 0)
1463 		return (err);
1464 
1465 	err = MSI_ALLOC_MSIX(pic->pic_dev, child, &pdev, &isrc);
1466 	if (err != 0)
1467 		return (err);
1468 
1469 	isrc->isrc_iommu = domain;
1470 	msi = (struct intr_map_data_msi *)intr_alloc_map_data(
1471 		    INTR_MAP_DATA_MSI, sizeof(*msi), M_WAITOK | M_ZERO);
1472 	msi->isrc = isrc;
1473 	*irq = intr_map_irq(pic->pic_dev, xref, (struct intr_map_data *)msi);
1474 	return (0);
1475 }
1476 
1477 int
1478 intr_release_msix(device_t pci, device_t child, intptr_t xref, int irq)
1479 {
1480 	struct intr_irqsrc *isrc;
1481 	struct intr_pic *pic;
1482 	struct intr_map_data_msi *msi;
1483 	int err;
1484 
1485 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1486 	if (pic == NULL)
1487 		return (ESRCH);
1488 
1489 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1490 	    ("%s: Found a non-MSI controller: %s", __func__,
1491 	     device_get_name(pic->pic_dev)));
1492 
1493 	msi = (struct intr_map_data_msi *)
1494 	    intr_map_get_map_data(irq);
1495 	KASSERT(msi->hdr.type == INTR_MAP_DATA_MSI,
1496 	    ("%s: irq %d map data is not MSI", __func__,
1497 	    irq));
1498 	isrc = msi->isrc;
1499 	if (isrc == NULL) {
1500 		intr_unmap_irq(irq);
1501 		return (EINVAL);
1502 	}
1503 
1504 	MSI_IOMMU_DEINIT(pic->pic_dev, child);
1505 
1506 	err = MSI_RELEASE_MSIX(pic->pic_dev, child, isrc);
1507 	intr_unmap_irq(irq);
1508 
1509 	return (err);
1510 }
1511 
1512 int
1513 intr_map_msi(device_t pci, device_t child, intptr_t xref, int irq,
1514     uint64_t *addr, uint32_t *data)
1515 {
1516 	struct intr_irqsrc *isrc;
1517 	struct intr_pic *pic;
1518 	int err;
1519 
1520 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1521 	if (pic == NULL)
1522 		return (ESRCH);
1523 
1524 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1525 	    ("%s: Found a non-MSI controller: %s", __func__,
1526 	     device_get_name(pic->pic_dev)));
1527 
1528 	isrc = intr_map_get_isrc(irq);
1529 	if (isrc == NULL)
1530 		return (EINVAL);
1531 
1532 	err = MSI_MAP_MSI(pic->pic_dev, child, isrc, addr, data);
1533 
1534 #ifdef IOMMU
1535 	if (isrc->isrc_iommu != NULL)
1536 		iommu_translate_msi(isrc->isrc_iommu, addr);
1537 #endif
1538 
1539 	return (err);
1540 }
1541 
1542 void dosoftints(void);
1543 void
1544 dosoftints(void)
1545 {
1546 }
1547 
1548 #ifdef SMP
1549 /*
1550  *  Init interrupt controller on another CPU.
1551  */
1552 void
1553 intr_pic_init_secondary(void)
1554 {
1555 
1556 	/*
1557 	 * QQQ: Only root PIC is aware of other CPUs ???
1558 	 */
1559 	KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__));
1560 
1561 	//mtx_lock(&isrc_table_lock);
1562 	PIC_INIT_SECONDARY(intr_irq_root_dev);
1563 	//mtx_unlock(&isrc_table_lock);
1564 }
1565 #endif
1566 
1567 #ifdef DDB
1568 DB_SHOW_COMMAND_FLAGS(irqs, db_show_irqs, DB_CMD_MEMSAFE)
1569 {
1570 	u_int i, irqsum;
1571 	u_long num;
1572 	struct intr_irqsrc *isrc;
1573 
1574 	for (irqsum = 0, i = 0; i < intr_nirq; i++) {
1575 		isrc = irq_sources[i];
1576 		if (isrc == NULL)
1577 			continue;
1578 
1579 		num = isrc->isrc_count != NULL ? isrc->isrc_count[0] : 0;
1580 		db_printf("irq%-3u <%s>: cpu %02lx%s cnt %lu\n", i,
1581 		    isrc->isrc_name, isrc->isrc_cpu.__bits[0],
1582 		    isrc->isrc_flags & INTR_ISRCF_BOUND ? " (bound)" : "", num);
1583 		irqsum += num;
1584 	}
1585 	db_printf("irq total %u\n", irqsum);
1586 }
1587 #endif
1588 
1589 /*
1590  * Interrupt mapping table functions.
1591  *
1592  * Please, keep this part separately, it can be transformed to
1593  * extension of standard resources.
1594  */
1595 struct intr_map_entry
1596 {
1597 	device_t 		dev;
1598 	intptr_t 		xref;
1599 	struct intr_map_data 	*map_data;
1600 	struct intr_irqsrc 	*isrc;
1601 	/* XXX TODO DISCONECTED PICs */
1602 	/*int			flags */
1603 };
1604 
1605 /* XXX Convert irq_map[] to dynamicaly expandable one. */
1606 static struct intr_map_entry **irq_map;
1607 static u_int irq_map_count;
1608 static u_int irq_map_first_free_idx;
1609 static struct mtx irq_map_lock;
1610 
1611 static struct intr_irqsrc *
1612 intr_map_get_isrc(u_int res_id)
1613 {
1614 	struct intr_irqsrc *isrc;
1615 
1616 	isrc = NULL;
1617 	mtx_lock(&irq_map_lock);
1618 	if (res_id < irq_map_count && irq_map[res_id] != NULL)
1619 		isrc = irq_map[res_id]->isrc;
1620 	mtx_unlock(&irq_map_lock);
1621 
1622 	return (isrc);
1623 }
1624 
1625 static void
1626 intr_map_set_isrc(u_int res_id, struct intr_irqsrc *isrc)
1627 {
1628 
1629 	mtx_lock(&irq_map_lock);
1630 	if (res_id < irq_map_count && irq_map[res_id] != NULL)
1631 		irq_map[res_id]->isrc = isrc;
1632 	mtx_unlock(&irq_map_lock);
1633 }
1634 
1635 /*
1636  * Get a copy of intr_map_entry data
1637  */
1638 static struct intr_map_data *
1639 intr_map_get_map_data(u_int res_id)
1640 {
1641 	struct intr_map_data *data;
1642 
1643 	data = NULL;
1644 	mtx_lock(&irq_map_lock);
1645 	if (res_id >= irq_map_count || irq_map[res_id] == NULL)
1646 		panic("Attempt to copy invalid resource id: %u\n", res_id);
1647 	data = irq_map[res_id]->map_data;
1648 	mtx_unlock(&irq_map_lock);
1649 
1650 	return (data);
1651 }
1652 
1653 /*
1654  * Get a copy of intr_map_entry data
1655  */
1656 static void
1657 intr_map_copy_map_data(u_int res_id, device_t *map_dev, intptr_t *map_xref,
1658     struct intr_map_data **data)
1659 {
1660 	size_t len;
1661 
1662 	len = 0;
1663 	mtx_lock(&irq_map_lock);
1664 	if (res_id >= irq_map_count || irq_map[res_id] == NULL)
1665 		panic("Attempt to copy invalid resource id: %u\n", res_id);
1666 	if (irq_map[res_id]->map_data != NULL)
1667 		len = irq_map[res_id]->map_data->len;
1668 	mtx_unlock(&irq_map_lock);
1669 
1670 	if (len == 0)
1671 		*data = NULL;
1672 	else
1673 		*data = malloc(len, M_INTRNG, M_WAITOK | M_ZERO);
1674 	mtx_lock(&irq_map_lock);
1675 	if (irq_map[res_id] == NULL)
1676 		panic("Attempt to copy invalid resource id: %u\n", res_id);
1677 	if (len != 0) {
1678 		if (len != irq_map[res_id]->map_data->len)
1679 			panic("Resource id: %u has changed.\n", res_id);
1680 		memcpy(*data, irq_map[res_id]->map_data, len);
1681 	}
1682 	*map_dev = irq_map[res_id]->dev;
1683 	*map_xref = irq_map[res_id]->xref;
1684 	mtx_unlock(&irq_map_lock);
1685 }
1686 
1687 /*
1688  * Allocate and fill new entry in irq_map table.
1689  */
1690 u_int
1691 intr_map_irq(device_t dev, intptr_t xref, struct intr_map_data *data)
1692 {
1693 	u_int i;
1694 	struct intr_map_entry *entry;
1695 
1696 	/* Prepare new entry first. */
1697 	entry = malloc(sizeof(*entry), M_INTRNG, M_WAITOK | M_ZERO);
1698 
1699 	entry->dev = dev;
1700 	entry->xref = xref;
1701 	entry->map_data = data;
1702 	entry->isrc = NULL;
1703 
1704 	mtx_lock(&irq_map_lock);
1705 	for (i = irq_map_first_free_idx; i < irq_map_count; i++) {
1706 		if (irq_map[i] == NULL) {
1707 			irq_map[i] = entry;
1708 			irq_map_first_free_idx = i + 1;
1709 			mtx_unlock(&irq_map_lock);
1710 			return (i);
1711 		}
1712 	}
1713 	for (i = 0; i < irq_map_first_free_idx; i++) {
1714 		if (irq_map[i] == NULL) {
1715 			irq_map[i] = entry;
1716 			irq_map_first_free_idx = i + 1;
1717 			mtx_unlock(&irq_map_lock);
1718 			return (i);
1719 		}
1720 	}
1721 	mtx_unlock(&irq_map_lock);
1722 
1723 	/* XXX Expand irq_map table */
1724 	panic("IRQ mapping table is full.");
1725 }
1726 
1727 /*
1728  * Remove and free mapping entry.
1729  */
1730 void
1731 intr_unmap_irq(u_int res_id)
1732 {
1733 	struct intr_map_entry *entry;
1734 
1735 	mtx_lock(&irq_map_lock);
1736 	if ((res_id >= irq_map_count) || (irq_map[res_id] == NULL))
1737 		panic("Attempt to unmap invalid resource id: %u\n", res_id);
1738 	entry = irq_map[res_id];
1739 	irq_map[res_id] = NULL;
1740 	irq_map_first_free_idx = res_id;
1741 	mtx_unlock(&irq_map_lock);
1742 	intr_free_intr_map_data(entry->map_data);
1743 	free(entry, M_INTRNG);
1744 }
1745 
1746 /*
1747  * Clone mapping entry.
1748  */
1749 u_int
1750 intr_map_clone_irq(u_int old_res_id)
1751 {
1752 	device_t map_dev;
1753 	intptr_t map_xref;
1754 	struct intr_map_data *data;
1755 
1756 	intr_map_copy_map_data(old_res_id, &map_dev, &map_xref, &data);
1757 	return (intr_map_irq(map_dev, map_xref, data));
1758 }
1759 
1760 static void
1761 intr_map_init(void *dummy __unused)
1762 {
1763 
1764 	mtx_init(&irq_map_lock, "intr map table", NULL, MTX_DEF);
1765 
1766 	irq_map_count = 2 * intr_nirq;
1767 	irq_map = mallocarray(irq_map_count, sizeof(struct intr_map_entry*),
1768 	    M_INTRNG, M_WAITOK | M_ZERO);
1769 }
1770 SYSINIT(intr_map_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_map_init, NULL);
1771 
1772 #ifdef SMP
1773 /* Virtualization for interrupt source IPI counter increment. */
1774 static inline void
1775 intr_ipi_increment_count(u_long *counter, u_int cpu)
1776 {
1777 
1778 	KASSERT(cpu < mp_maxid + 1, ("%s: too big cpu %u", __func__, cpu));
1779 	counter[cpu]++;
1780 }
1781 
1782 /*
1783  *  Virtualization for interrupt source IPI counters setup.
1784  */
1785 static u_long *
1786 intr_ipi_setup_counters(const char *name)
1787 {
1788 	u_int index, i;
1789 	char str[INTRNAME_LEN];
1790 
1791 	mtx_lock(&isrc_table_lock);
1792 
1793 	/*
1794 	 * We should never have a problem finding mp_maxid + 1 contiguous
1795 	 * counters, in practice. Interrupts will be allocated sequentially
1796 	 * during boot, so the array should fill from low to high index. Once
1797 	 * reserved, the IPI counters will never be released. Similarly, we
1798 	 * will not need to allocate more IPIs once the system is running.
1799 	 */
1800 	bit_ffc_area(intrcnt_bitmap, nintrcnt, mp_maxid + 1, &index);
1801 	if (index == -1)
1802 		panic("Failed to allocate %d counters. Array exhausted?",
1803 		    mp_maxid + 1);
1804 	bit_nset(intrcnt_bitmap, index, index + mp_maxid);
1805 	for (i = 0; i < mp_maxid + 1; i++) {
1806 		snprintf(str, INTRNAME_LEN, "cpu%d:%s", i, name);
1807 		intrcnt_setname(str, index + i);
1808 	}
1809 	mtx_unlock(&isrc_table_lock);
1810 	return (&intrcnt[index]);
1811 }
1812 
1813 /*
1814  *  Lookup IPI source.
1815  */
1816 static struct intr_ipi *
1817 intr_ipi_lookup(u_int ipi)
1818 {
1819 
1820 	if (ipi >= INTR_IPI_COUNT)
1821 		panic("%s: no such IPI %u", __func__, ipi);
1822 
1823 	return (&ipi_sources[ipi]);
1824 }
1825 
1826 int
1827 intr_ipi_pic_register(device_t dev, u_int priority)
1828 {
1829 	if (intr_ipi_dev_frozen) {
1830 		device_printf(dev, "IPI device already frozen");
1831 		return (EBUSY);
1832 	}
1833 
1834 	if (intr_ipi_dev == NULL || priority > intr_ipi_dev_priority)
1835 		intr_ipi_dev = dev;
1836 
1837 	return (0);
1838 }
1839 
1840 /*
1841  *  Setup IPI handler on interrupt controller.
1842  *
1843  *  Not SMP coherent.
1844  */
1845 void
1846 intr_ipi_setup(u_int ipi, const char *name, intr_ipi_handler_t *hand,
1847     void *arg)
1848 {
1849 	struct intr_irqsrc *isrc;
1850 	struct intr_ipi *ii;
1851 	int error;
1852 
1853 	if (!intr_ipi_dev_frozen) {
1854 		if (intr_ipi_dev == NULL)
1855 			panic("%s: no IPI PIC attached", __func__);
1856 
1857 		intr_ipi_dev_frozen = true;
1858 		device_printf(intr_ipi_dev, "using for IPIs\n");
1859 	}
1860 
1861 	KASSERT(hand != NULL, ("%s: ipi %u no handler", __func__, ipi));
1862 
1863 	error = PIC_IPI_SETUP(intr_ipi_dev, ipi, &isrc);
1864 	if (error != 0)
1865 		return;
1866 
1867 	isrc->isrc_handlers++;
1868 
1869 	ii = intr_ipi_lookup(ipi);
1870 	KASSERT(ii->ii_count == NULL, ("%s: ipi %u reused", __func__, ipi));
1871 
1872 	ii->ii_handler = hand;
1873 	ii->ii_handler_arg = arg;
1874 	ii->ii_isrc = isrc;
1875 	strlcpy(ii->ii_name, name, INTR_IPI_NAMELEN);
1876 	ii->ii_count = intr_ipi_setup_counters(name);
1877 
1878 	PIC_ENABLE_INTR(intr_ipi_dev, isrc);
1879 }
1880 
1881 void
1882 intr_ipi_send(cpuset_t cpus, u_int ipi)
1883 {
1884 	struct intr_ipi *ii;
1885 
1886 	KASSERT(intr_ipi_dev_frozen,
1887 	    ("%s: IPI device not yet frozen", __func__));
1888 
1889 	ii = intr_ipi_lookup(ipi);
1890 	if (ii->ii_count == NULL)
1891 		panic("%s: not setup IPI %u", __func__, ipi);
1892 
1893 	/*
1894 	 * XXX: Surely needed on other architectures too? Either way should be
1895 	 * some kind of MI hook defined in an MD header, or the responsibility
1896 	 * of the MD caller if not widespread.
1897 	 */
1898 #ifdef __aarch64__
1899 	/*
1900 	 * Ensure that this CPU's stores will be visible to IPI
1901 	 * recipients before starting to send the interrupts.
1902 	 */
1903 	dsb(ishst);
1904 #endif
1905 
1906 	PIC_IPI_SEND(intr_ipi_dev, ii->ii_isrc, cpus, ipi);
1907 }
1908 
1909 /*
1910  *  interrupt controller dispatch function for IPIs. It should
1911  *  be called straight from the interrupt controller, when associated
1912  *  interrupt source is learned. Or from anybody who has an interrupt
1913  *  source mapped.
1914  */
1915 void
1916 intr_ipi_dispatch(u_int ipi)
1917 {
1918 	struct intr_ipi *ii;
1919 
1920 	ii = intr_ipi_lookup(ipi);
1921 	if (ii->ii_count == NULL)
1922 		panic("%s: not setup IPI %u", __func__, ipi);
1923 
1924 	intr_ipi_increment_count(ii->ii_count, PCPU_GET(cpuid));
1925 
1926 	ii->ii_handler(ii->ii_handler_arg);
1927 }
1928 #endif
1929