xref: /freebsd/sys/kern/subr_intr.c (revision e0c4386e7e71d93b0edc0c8fa156263fc4a8b0b6)
1 /*-
2  * Copyright (c) 2015-2016 Svatopluk Kraus
3  * Copyright (c) 2015-2016 Michal Meloun
4  * All rights reserved.
5  * Copyright (c) 2015-2016 The FreeBSD Foundation
6  * Copyright (c) 2021 Jessica Clarke <jrtc27@FreeBSD.org>
7  *
8  * Portions of this software were developed by Andrew Turner under
9  * sponsorship from the FreeBSD Foundation.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #include <sys/cdefs.h>
34 /*
35  *	New-style Interrupt Framework
36  *
37  *  TODO: - add support for disconnected PICs.
38  *        - to support IPI (PPI) enabling on other CPUs if already started.
39  *        - to complete things for removable PICs.
40  */
41 
42 #include "opt_ddb.h"
43 #include "opt_hwpmc_hooks.h"
44 #include "opt_iommu.h"
45 
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/asan.h>
49 #include <sys/bitstring.h>
50 #include <sys/bus.h>
51 #include <sys/conf.h>
52 #include <sys/cpuset.h>
53 #include <sys/interrupt.h>
54 #include <sys/kernel.h>
55 #include <sys/lock.h>
56 #include <sys/malloc.h>
57 #include <sys/mutex.h>
58 #include <sys/proc.h>
59 #include <sys/queue.h>
60 #include <sys/rman.h>
61 #include <sys/sched.h>
62 #include <sys/smp.h>
63 #include <sys/sysctl.h>
64 #include <sys/syslog.h>
65 #include <sys/taskqueue.h>
66 #include <sys/tree.h>
67 #include <sys/vmmeter.h>
68 #ifdef HWPMC_HOOKS
69 #include <sys/pmckern.h>
70 #endif
71 
72 #include <machine/atomic.h>
73 #include <machine/cpu.h>
74 #include <machine/intr.h>
75 #include <machine/smp.h>
76 #include <machine/stdarg.h>
77 
78 #ifdef DDB
79 #include <ddb/ddb.h>
80 #endif
81 
82 #ifdef IOMMU
83 #include <dev/iommu/iommu_msi.h>
84 #endif
85 
86 #include "pic_if.h"
87 #include "msi_if.h"
88 
89 #define	INTRNAME_LEN	(2*MAXCOMLEN + 1)
90 
91 #ifdef DEBUG
92 #define debugf(fmt, args...) do { printf("%s(): ", __func__);	\
93     printf(fmt,##args); } while (0)
94 #else
95 #define debugf(fmt, args...)
96 #endif
97 
98 MALLOC_DECLARE(M_INTRNG);
99 MALLOC_DEFINE(M_INTRNG, "intr", "intr interrupt handling");
100 
101 /* Main interrupt handler called from assembler -> 'hidden' for C code. */
102 void intr_irq_handler(struct trapframe *tf);
103 
104 /* Root interrupt controller stuff. */
105 device_t intr_irq_root_dev;
106 static intr_irq_filter_t *irq_root_filter;
107 static void *irq_root_arg;
108 
109 struct intr_pic_child {
110 	SLIST_ENTRY(intr_pic_child)	 pc_next;
111 	struct intr_pic			*pc_pic;
112 	intr_child_irq_filter_t		*pc_filter;
113 	void				*pc_filter_arg;
114 	uintptr_t			 pc_start;
115 	uintptr_t			 pc_length;
116 };
117 
118 /* Interrupt controller definition. */
119 struct intr_pic {
120 	SLIST_ENTRY(intr_pic)	pic_next;
121 	intptr_t		pic_xref;	/* hardware identification */
122 	device_t		pic_dev;
123 /* Only one of FLAG_PIC or FLAG_MSI may be set */
124 #define	FLAG_PIC	(1 << 0)
125 #define	FLAG_MSI	(1 << 1)
126 #define	FLAG_TYPE_MASK	(FLAG_PIC | FLAG_MSI)
127 	u_int			pic_flags;
128 	struct mtx		pic_child_lock;
129 	SLIST_HEAD(, intr_pic_child) pic_children;
130 };
131 
132 #ifdef SMP
133 #define INTR_IPI_NAMELEN	(MAXCOMLEN + 1)
134 
135 struct intr_ipi {
136 	intr_ipi_handler_t	*ii_handler;
137 	void			*ii_handler_arg;
138 	struct intr_irqsrc	*ii_isrc;
139 	char			ii_name[INTR_IPI_NAMELEN];
140 	u_long			*ii_count;
141 };
142 
143 static device_t intr_ipi_dev;
144 static u_int intr_ipi_dev_priority;
145 static bool intr_ipi_dev_frozen;
146 #endif
147 
148 static struct mtx pic_list_lock;
149 static SLIST_HEAD(, intr_pic) pic_list;
150 
151 static struct intr_pic *pic_lookup(device_t dev, intptr_t xref, int flags);
152 
153 /* Interrupt source definition. */
154 static struct mtx isrc_table_lock;
155 static struct intr_irqsrc **irq_sources;
156 static u_int irq_next_free;
157 
158 #ifdef SMP
159 #ifdef EARLY_AP_STARTUP
160 static bool irq_assign_cpu = true;
161 #else
162 static bool irq_assign_cpu = false;
163 #endif
164 
165 static struct intr_ipi ipi_sources[INTR_IPI_COUNT];
166 #endif
167 
168 u_int intr_nirq = NIRQ;
169 SYSCTL_UINT(_machdep, OID_AUTO, nirq, CTLFLAG_RDTUN, &intr_nirq, 0,
170     "Number of IRQs");
171 
172 /* Data for MI statistics reporting. */
173 u_long *intrcnt;
174 char *intrnames;
175 size_t sintrcnt;
176 size_t sintrnames;
177 int nintrcnt;
178 static bitstr_t *intrcnt_bitmap;
179 
180 static struct intr_irqsrc *intr_map_get_isrc(u_int res_id);
181 static void intr_map_set_isrc(u_int res_id, struct intr_irqsrc *isrc);
182 static struct intr_map_data * intr_map_get_map_data(u_int res_id);
183 static void intr_map_copy_map_data(u_int res_id, device_t *dev, intptr_t *xref,
184     struct intr_map_data **data);
185 
186 /*
187  *  Interrupt framework initialization routine.
188  */
189 static void
190 intr_irq_init(void *dummy __unused)
191 {
192 
193 	SLIST_INIT(&pic_list);
194 	mtx_init(&pic_list_lock, "intr pic list", NULL, MTX_DEF);
195 
196 	mtx_init(&isrc_table_lock, "intr isrc table", NULL, MTX_DEF);
197 
198 	/*
199 	 * - 2 counters for each I/O interrupt.
200 	 * - mp_maxid + 1 counters for each IPI counters for SMP.
201 	 */
202 	nintrcnt = intr_nirq * 2;
203 #ifdef SMP
204 	nintrcnt += INTR_IPI_COUNT * (mp_maxid + 1);
205 #endif
206 
207 	intrcnt = mallocarray(nintrcnt, sizeof(u_long), M_INTRNG,
208 	    M_WAITOK | M_ZERO);
209 	intrnames = mallocarray(nintrcnt, INTRNAME_LEN, M_INTRNG,
210 	    M_WAITOK | M_ZERO);
211 	sintrcnt = nintrcnt * sizeof(u_long);
212 	sintrnames = nintrcnt * INTRNAME_LEN;
213 
214 	/* Allocate the bitmap tracking counter allocations. */
215 	intrcnt_bitmap = bit_alloc(nintrcnt, M_INTRNG, M_WAITOK | M_ZERO);
216 
217 	irq_sources = mallocarray(intr_nirq, sizeof(struct intr_irqsrc*),
218 	    M_INTRNG, M_WAITOK | M_ZERO);
219 }
220 SYSINIT(intr_irq_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_irq_init, NULL);
221 
222 static void
223 intrcnt_setname(const char *name, int index)
224 {
225 
226 	snprintf(intrnames + INTRNAME_LEN * index, INTRNAME_LEN, "%-*s",
227 	    INTRNAME_LEN - 1, name);
228 }
229 
230 /*
231  *  Update name for interrupt source with interrupt event.
232  */
233 static void
234 intrcnt_updatename(struct intr_irqsrc *isrc)
235 {
236 
237 	/* QQQ: What about stray counter name? */
238 	mtx_assert(&isrc_table_lock, MA_OWNED);
239 	intrcnt_setname(isrc->isrc_event->ie_fullname, isrc->isrc_index);
240 }
241 
242 /*
243  *  Virtualization for interrupt source interrupt counter increment.
244  */
245 static inline void
246 isrc_increment_count(struct intr_irqsrc *isrc)
247 {
248 
249 	if (isrc->isrc_flags & INTR_ISRCF_PPI)
250 		atomic_add_long(&isrc->isrc_count[0], 1);
251 	else
252 		isrc->isrc_count[0]++;
253 }
254 
255 /*
256  *  Virtualization for interrupt source interrupt stray counter increment.
257  */
258 static inline void
259 isrc_increment_straycount(struct intr_irqsrc *isrc)
260 {
261 
262 	isrc->isrc_count[1]++;
263 }
264 
265 /*
266  *  Virtualization for interrupt source interrupt name update.
267  */
268 static void
269 isrc_update_name(struct intr_irqsrc *isrc, const char *name)
270 {
271 	char str[INTRNAME_LEN];
272 
273 	mtx_assert(&isrc_table_lock, MA_OWNED);
274 
275 	if (name != NULL) {
276 		snprintf(str, INTRNAME_LEN, "%s: %s", isrc->isrc_name, name);
277 		intrcnt_setname(str, isrc->isrc_index);
278 		snprintf(str, INTRNAME_LEN, "stray %s: %s", isrc->isrc_name,
279 		    name);
280 		intrcnt_setname(str, isrc->isrc_index + 1);
281 	} else {
282 		snprintf(str, INTRNAME_LEN, "%s:", isrc->isrc_name);
283 		intrcnt_setname(str, isrc->isrc_index);
284 		snprintf(str, INTRNAME_LEN, "stray %s:", isrc->isrc_name);
285 		intrcnt_setname(str, isrc->isrc_index + 1);
286 	}
287 }
288 
289 /*
290  *  Virtualization for interrupt source interrupt counters setup.
291  */
292 static void
293 isrc_setup_counters(struct intr_irqsrc *isrc)
294 {
295 	int index;
296 
297 	mtx_assert(&isrc_table_lock, MA_OWNED);
298 
299 	/*
300 	 * Allocate two counter values, the second tracking "stray" interrupts.
301 	 */
302 	bit_ffc_area(intrcnt_bitmap, nintrcnt, 2, &index);
303 	if (index == -1)
304 		panic("Failed to allocate 2 counters. Array exhausted?");
305 	bit_nset(intrcnt_bitmap, index, index + 1);
306 	isrc->isrc_index = index;
307 	isrc->isrc_count = &intrcnt[index];
308 	isrc_update_name(isrc, NULL);
309 }
310 
311 /*
312  *  Virtualization for interrupt source interrupt counters release.
313  */
314 static void
315 isrc_release_counters(struct intr_irqsrc *isrc)
316 {
317 	int idx = isrc->isrc_index;
318 
319 	mtx_assert(&isrc_table_lock, MA_OWNED);
320 
321 	bit_nclear(intrcnt_bitmap, idx, idx + 1);
322 }
323 
324 /*
325  *  Main interrupt dispatch handler. It's called straight
326  *  from the assembler, where CPU interrupt is served.
327  */
328 void
329 intr_irq_handler(struct trapframe *tf)
330 {
331 	struct trapframe * oldframe;
332 	struct thread * td;
333 
334 	KASSERT(irq_root_filter != NULL, ("%s: no filter", __func__));
335 
336 	kasan_mark(tf, sizeof(*tf), sizeof(*tf), 0);
337 
338 	VM_CNT_INC(v_intr);
339 	critical_enter();
340 	td = curthread;
341 	oldframe = td->td_intr_frame;
342 	td->td_intr_frame = tf;
343 	irq_root_filter(irq_root_arg);
344 	td->td_intr_frame = oldframe;
345 	critical_exit();
346 #ifdef HWPMC_HOOKS
347 	if (pmc_hook && TRAPF_USERMODE(tf) &&
348 	    (PCPU_GET(curthread)->td_pflags & TDP_CALLCHAIN))
349 		pmc_hook(PCPU_GET(curthread), PMC_FN_USER_CALLCHAIN, tf);
350 #endif
351 }
352 
353 int
354 intr_child_irq_handler(struct intr_pic *parent, uintptr_t irq)
355 {
356 	struct intr_pic_child *child;
357 	bool found;
358 
359 	found = false;
360 	mtx_lock_spin(&parent->pic_child_lock);
361 	SLIST_FOREACH(child, &parent->pic_children, pc_next) {
362 		if (child->pc_start <= irq &&
363 		    irq < (child->pc_start + child->pc_length)) {
364 			found = true;
365 			break;
366 		}
367 	}
368 	mtx_unlock_spin(&parent->pic_child_lock);
369 
370 	if (found)
371 		return (child->pc_filter(child->pc_filter_arg, irq));
372 
373 	return (FILTER_STRAY);
374 }
375 
376 /*
377  *  interrupt controller dispatch function for interrupts. It should
378  *  be called straight from the interrupt controller, when associated interrupt
379  *  source is learned.
380  */
381 int
382 intr_isrc_dispatch(struct intr_irqsrc *isrc, struct trapframe *tf)
383 {
384 
385 	KASSERT(isrc != NULL, ("%s: no source", __func__));
386 
387 	if ((isrc->isrc_flags & INTR_ISRCF_IPI) == 0)
388 		isrc_increment_count(isrc);
389 
390 #ifdef INTR_SOLO
391 	if (isrc->isrc_filter != NULL) {
392 		int error;
393 		error = isrc->isrc_filter(isrc->isrc_arg, tf);
394 		PIC_POST_FILTER(isrc->isrc_dev, isrc);
395 		if (error == FILTER_HANDLED)
396 			return (0);
397 	} else
398 #endif
399 	if (isrc->isrc_event != NULL) {
400 		if (intr_event_handle(isrc->isrc_event, tf) == 0)
401 			return (0);
402 	}
403 
404 	if ((isrc->isrc_flags & INTR_ISRCF_IPI) == 0)
405 		isrc_increment_straycount(isrc);
406 	return (EINVAL);
407 }
408 
409 /*
410  *  Alloc unique interrupt number (resource handle) for interrupt source.
411  *
412  *  There could be various strategies how to allocate free interrupt number
413  *  (resource handle) for new interrupt source.
414  *
415  *  1. Handles are always allocated forward, so handles are not recycled
416  *     immediately. However, if only one free handle left which is reused
417  *     constantly...
418  */
419 static inline int
420 isrc_alloc_irq(struct intr_irqsrc *isrc)
421 {
422 	u_int irq;
423 
424 	mtx_assert(&isrc_table_lock, MA_OWNED);
425 
426 	if (irq_next_free >= intr_nirq)
427 		return (ENOSPC);
428 
429 	for (irq = irq_next_free; irq < intr_nirq; irq++) {
430 		if (irq_sources[irq] == NULL)
431 			goto found;
432 	}
433 	for (irq = 0; irq < irq_next_free; irq++) {
434 		if (irq_sources[irq] == NULL)
435 			goto found;
436 	}
437 
438 	irq_next_free = intr_nirq;
439 	return (ENOSPC);
440 
441 found:
442 	isrc->isrc_irq = irq;
443 	irq_sources[irq] = isrc;
444 
445 	irq_next_free = irq + 1;
446 	if (irq_next_free >= intr_nirq)
447 		irq_next_free = 0;
448 	return (0);
449 }
450 
451 /*
452  *  Free unique interrupt number (resource handle) from interrupt source.
453  */
454 static inline int
455 isrc_free_irq(struct intr_irqsrc *isrc)
456 {
457 
458 	mtx_assert(&isrc_table_lock, MA_OWNED);
459 
460 	if (isrc->isrc_irq >= intr_nirq)
461 		return (EINVAL);
462 	if (irq_sources[isrc->isrc_irq] != isrc)
463 		return (EINVAL);
464 
465 	irq_sources[isrc->isrc_irq] = NULL;
466 	isrc->isrc_irq = INTR_IRQ_INVALID;	/* just to be safe */
467 
468 	/*
469 	 * If we are recovering from the state irq_sources table is full,
470 	 * then the following allocation should check the entire table. This
471 	 * will ensure maximum separation of allocation order from release
472 	 * order.
473 	 */
474 	if (irq_next_free >= intr_nirq)
475 		irq_next_free = 0;
476 
477 	return (0);
478 }
479 
480 /*
481  *  Initialize interrupt source and register it into global interrupt table.
482  */
483 int
484 intr_isrc_register(struct intr_irqsrc *isrc, device_t dev, u_int flags,
485     const char *fmt, ...)
486 {
487 	int error;
488 	va_list ap;
489 
490 	bzero(isrc, sizeof(struct intr_irqsrc));
491 	isrc->isrc_dev = dev;
492 	isrc->isrc_irq = INTR_IRQ_INVALID;	/* just to be safe */
493 	isrc->isrc_flags = flags;
494 
495 	va_start(ap, fmt);
496 	vsnprintf(isrc->isrc_name, INTR_ISRC_NAMELEN, fmt, ap);
497 	va_end(ap);
498 
499 	mtx_lock(&isrc_table_lock);
500 	error = isrc_alloc_irq(isrc);
501 	if (error != 0) {
502 		mtx_unlock(&isrc_table_lock);
503 		return (error);
504 	}
505 	/*
506 	 * Setup interrupt counters, but not for IPI sources. Those are setup
507 	 * later and only for used ones (up to INTR_IPI_COUNT) to not exhaust
508 	 * our counter pool.
509 	 */
510 	if ((isrc->isrc_flags & INTR_ISRCF_IPI) == 0)
511 		isrc_setup_counters(isrc);
512 	mtx_unlock(&isrc_table_lock);
513 	return (0);
514 }
515 
516 /*
517  *  Deregister interrupt source from global interrupt table.
518  */
519 int
520 intr_isrc_deregister(struct intr_irqsrc *isrc)
521 {
522 	int error;
523 
524 	mtx_lock(&isrc_table_lock);
525 	if ((isrc->isrc_flags & INTR_ISRCF_IPI) == 0)
526 		isrc_release_counters(isrc);
527 	error = isrc_free_irq(isrc);
528 	mtx_unlock(&isrc_table_lock);
529 	return (error);
530 }
531 
532 #ifdef SMP
533 /*
534  *  A support function for a PIC to decide if provided ISRC should be inited
535  *  on given cpu. The logic of INTR_ISRCF_BOUND flag and isrc_cpu member of
536  *  struct intr_irqsrc is the following:
537  *
538  *     If INTR_ISRCF_BOUND is set, the ISRC should be inited only on cpus
539  *     set in isrc_cpu. If not, the ISRC should be inited on every cpu and
540  *     isrc_cpu is kept consistent with it. Thus isrc_cpu is always correct.
541  */
542 bool
543 intr_isrc_init_on_cpu(struct intr_irqsrc *isrc, u_int cpu)
544 {
545 
546 	if (isrc->isrc_handlers == 0)
547 		return (false);
548 	if ((isrc->isrc_flags & (INTR_ISRCF_PPI | INTR_ISRCF_IPI)) == 0)
549 		return (false);
550 	if (isrc->isrc_flags & INTR_ISRCF_BOUND)
551 		return (CPU_ISSET(cpu, &isrc->isrc_cpu));
552 
553 	CPU_SET(cpu, &isrc->isrc_cpu);
554 	return (true);
555 }
556 #endif
557 
558 #ifdef INTR_SOLO
559 /*
560  *  Setup filter into interrupt source.
561  */
562 static int
563 iscr_setup_filter(struct intr_irqsrc *isrc, const char *name,
564     intr_irq_filter_t *filter, void *arg, void **cookiep)
565 {
566 
567 	if (filter == NULL)
568 		return (EINVAL);
569 
570 	mtx_lock(&isrc_table_lock);
571 	/*
572 	 * Make sure that we do not mix the two ways
573 	 * how we handle interrupt sources.
574 	 */
575 	if (isrc->isrc_filter != NULL || isrc->isrc_event != NULL) {
576 		mtx_unlock(&isrc_table_lock);
577 		return (EBUSY);
578 	}
579 	isrc->isrc_filter = filter;
580 	isrc->isrc_arg = arg;
581 	isrc_update_name(isrc, name);
582 	mtx_unlock(&isrc_table_lock);
583 
584 	*cookiep = isrc;
585 	return (0);
586 }
587 #endif
588 
589 /*
590  *  Interrupt source pre_ithread method for MI interrupt framework.
591  */
592 static void
593 intr_isrc_pre_ithread(void *arg)
594 {
595 	struct intr_irqsrc *isrc = arg;
596 
597 	PIC_PRE_ITHREAD(isrc->isrc_dev, isrc);
598 }
599 
600 /*
601  *  Interrupt source post_ithread method for MI interrupt framework.
602  */
603 static void
604 intr_isrc_post_ithread(void *arg)
605 {
606 	struct intr_irqsrc *isrc = arg;
607 
608 	PIC_POST_ITHREAD(isrc->isrc_dev, isrc);
609 }
610 
611 /*
612  *  Interrupt source post_filter method for MI interrupt framework.
613  */
614 static void
615 intr_isrc_post_filter(void *arg)
616 {
617 	struct intr_irqsrc *isrc = arg;
618 
619 	PIC_POST_FILTER(isrc->isrc_dev, isrc);
620 }
621 
622 /*
623  *  Interrupt source assign_cpu method for MI interrupt framework.
624  */
625 static int
626 intr_isrc_assign_cpu(void *arg, int cpu)
627 {
628 #ifdef SMP
629 	struct intr_irqsrc *isrc = arg;
630 	int error;
631 
632 	mtx_lock(&isrc_table_lock);
633 	if (cpu == NOCPU) {
634 		CPU_ZERO(&isrc->isrc_cpu);
635 		isrc->isrc_flags &= ~INTR_ISRCF_BOUND;
636 	} else {
637 		CPU_SETOF(cpu, &isrc->isrc_cpu);
638 		isrc->isrc_flags |= INTR_ISRCF_BOUND;
639 	}
640 
641 	/*
642 	 * In NOCPU case, it's up to PIC to either leave ISRC on same CPU or
643 	 * re-balance it to another CPU or enable it on more CPUs. However,
644 	 * PIC is expected to change isrc_cpu appropriately to keep us well
645 	 * informed if the call is successful.
646 	 */
647 	if (irq_assign_cpu) {
648 		error = PIC_BIND_INTR(isrc->isrc_dev, isrc);
649 		if (error) {
650 			CPU_ZERO(&isrc->isrc_cpu);
651 			mtx_unlock(&isrc_table_lock);
652 			return (error);
653 		}
654 	}
655 	mtx_unlock(&isrc_table_lock);
656 	return (0);
657 #else
658 	return (EOPNOTSUPP);
659 #endif
660 }
661 
662 /*
663  *  Create interrupt event for interrupt source.
664  */
665 static int
666 isrc_event_create(struct intr_irqsrc *isrc)
667 {
668 	struct intr_event *ie;
669 	int error;
670 
671 	error = intr_event_create(&ie, isrc, 0, isrc->isrc_irq,
672 	    intr_isrc_pre_ithread, intr_isrc_post_ithread, intr_isrc_post_filter,
673 	    intr_isrc_assign_cpu, "%s:", isrc->isrc_name);
674 	if (error)
675 		return (error);
676 
677 	mtx_lock(&isrc_table_lock);
678 	/*
679 	 * Make sure that we do not mix the two ways
680 	 * how we handle interrupt sources. Let contested event wins.
681 	 */
682 #ifdef INTR_SOLO
683 	if (isrc->isrc_filter != NULL || isrc->isrc_event != NULL) {
684 #else
685 	if (isrc->isrc_event != NULL) {
686 #endif
687 		mtx_unlock(&isrc_table_lock);
688 		intr_event_destroy(ie);
689 		return (isrc->isrc_event != NULL ? EBUSY : 0);
690 	}
691 	isrc->isrc_event = ie;
692 	mtx_unlock(&isrc_table_lock);
693 
694 	return (0);
695 }
696 #ifdef notyet
697 /*
698  *  Destroy interrupt event for interrupt source.
699  */
700 static void
701 isrc_event_destroy(struct intr_irqsrc *isrc)
702 {
703 	struct intr_event *ie;
704 
705 	mtx_lock(&isrc_table_lock);
706 	ie = isrc->isrc_event;
707 	isrc->isrc_event = NULL;
708 	mtx_unlock(&isrc_table_lock);
709 
710 	if (ie != NULL)
711 		intr_event_destroy(ie);
712 }
713 #endif
714 /*
715  *  Add handler to interrupt source.
716  */
717 static int
718 isrc_add_handler(struct intr_irqsrc *isrc, const char *name,
719     driver_filter_t filter, driver_intr_t handler, void *arg,
720     enum intr_type flags, void **cookiep)
721 {
722 	int error;
723 
724 	if (isrc->isrc_event == NULL) {
725 		error = isrc_event_create(isrc);
726 		if (error)
727 			return (error);
728 	}
729 
730 	error = intr_event_add_handler(isrc->isrc_event, name, filter, handler,
731 	    arg, intr_priority(flags), flags, cookiep);
732 	if (error == 0) {
733 		mtx_lock(&isrc_table_lock);
734 		intrcnt_updatename(isrc);
735 		mtx_unlock(&isrc_table_lock);
736 	}
737 
738 	return (error);
739 }
740 
741 /*
742  *  Lookup interrupt controller locked.
743  */
744 static inline struct intr_pic *
745 pic_lookup_locked(device_t dev, intptr_t xref, int flags)
746 {
747 	struct intr_pic *pic;
748 
749 	mtx_assert(&pic_list_lock, MA_OWNED);
750 
751 	if (dev == NULL && xref == 0)
752 		return (NULL);
753 
754 	/* Note that pic->pic_dev is never NULL on registered PIC. */
755 	SLIST_FOREACH(pic, &pic_list, pic_next) {
756 		if ((pic->pic_flags & FLAG_TYPE_MASK) !=
757 		    (flags & FLAG_TYPE_MASK))
758 			continue;
759 
760 		if (dev == NULL) {
761 			if (xref == pic->pic_xref)
762 				return (pic);
763 		} else if (xref == 0 || pic->pic_xref == 0) {
764 			if (dev == pic->pic_dev)
765 				return (pic);
766 		} else if (xref == pic->pic_xref && dev == pic->pic_dev)
767 				return (pic);
768 	}
769 	return (NULL);
770 }
771 
772 /*
773  *  Lookup interrupt controller.
774  */
775 static struct intr_pic *
776 pic_lookup(device_t dev, intptr_t xref, int flags)
777 {
778 	struct intr_pic *pic;
779 
780 	mtx_lock(&pic_list_lock);
781 	pic = pic_lookup_locked(dev, xref, flags);
782 	mtx_unlock(&pic_list_lock);
783 	return (pic);
784 }
785 
786 /*
787  *  Create interrupt controller.
788  */
789 static struct intr_pic *
790 pic_create(device_t dev, intptr_t xref, int flags)
791 {
792 	struct intr_pic *pic;
793 
794 	mtx_lock(&pic_list_lock);
795 	pic = pic_lookup_locked(dev, xref, flags);
796 	if (pic != NULL) {
797 		mtx_unlock(&pic_list_lock);
798 		return (pic);
799 	}
800 	pic = malloc(sizeof(*pic), M_INTRNG, M_NOWAIT | M_ZERO);
801 	if (pic == NULL) {
802 		mtx_unlock(&pic_list_lock);
803 		return (NULL);
804 	}
805 	pic->pic_xref = xref;
806 	pic->pic_dev = dev;
807 	pic->pic_flags = flags;
808 	mtx_init(&pic->pic_child_lock, "pic child lock", NULL, MTX_SPIN);
809 	SLIST_INSERT_HEAD(&pic_list, pic, pic_next);
810 	mtx_unlock(&pic_list_lock);
811 
812 	return (pic);
813 }
814 #ifdef notyet
815 /*
816  *  Destroy interrupt controller.
817  */
818 static void
819 pic_destroy(device_t dev, intptr_t xref, int flags)
820 {
821 	struct intr_pic *pic;
822 
823 	mtx_lock(&pic_list_lock);
824 	pic = pic_lookup_locked(dev, xref, flags);
825 	if (pic == NULL) {
826 		mtx_unlock(&pic_list_lock);
827 		return;
828 	}
829 	SLIST_REMOVE(&pic_list, pic, intr_pic, pic_next);
830 	mtx_unlock(&pic_list_lock);
831 
832 	free(pic, M_INTRNG);
833 }
834 #endif
835 /*
836  *  Register interrupt controller.
837  */
838 struct intr_pic *
839 intr_pic_register(device_t dev, intptr_t xref)
840 {
841 	struct intr_pic *pic;
842 
843 	if (dev == NULL)
844 		return (NULL);
845 	pic = pic_create(dev, xref, FLAG_PIC);
846 	if (pic == NULL)
847 		return (NULL);
848 
849 	debugf("PIC %p registered for %s <dev %p, xref %jx>\n", pic,
850 	    device_get_nameunit(dev), dev, (uintmax_t)xref);
851 	return (pic);
852 }
853 
854 /*
855  *  Unregister interrupt controller.
856  */
857 int
858 intr_pic_deregister(device_t dev, intptr_t xref)
859 {
860 
861 	panic("%s: not implemented", __func__);
862 }
863 
864 /*
865  *  Mark interrupt controller (itself) as a root one.
866  *
867  *  Note that only an interrupt controller can really know its position
868  *  in interrupt controller's tree. So root PIC must claim itself as a root.
869  *
870  *  In FDT case, according to ePAPR approved version 1.1 from 08 April 2011,
871  *  page 30:
872  *    "The root of the interrupt tree is determined when traversal
873  *     of the interrupt tree reaches an interrupt controller node without
874  *     an interrupts property and thus no explicit interrupt parent."
875  */
876 int
877 intr_pic_claim_root(device_t dev, intptr_t xref, intr_irq_filter_t *filter,
878     void *arg)
879 {
880 	struct intr_pic *pic;
881 
882 	pic = pic_lookup(dev, xref, FLAG_PIC);
883 	if (pic == NULL) {
884 		device_printf(dev, "not registered\n");
885 		return (EINVAL);
886 	}
887 
888 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_PIC,
889 	    ("%s: Found a non-PIC controller: %s", __func__,
890 	     device_get_name(pic->pic_dev)));
891 
892 	if (filter == NULL) {
893 		device_printf(dev, "filter missing\n");
894 		return (EINVAL);
895 	}
896 
897 	/*
898 	 * Only one interrupt controllers could be on the root for now.
899 	 * Note that we further suppose that there is not threaded interrupt
900 	 * routine (handler) on the root. See intr_irq_handler().
901 	 */
902 	if (intr_irq_root_dev != NULL) {
903 		device_printf(dev, "another root already set\n");
904 		return (EBUSY);
905 	}
906 
907 	intr_irq_root_dev = dev;
908 	irq_root_filter = filter;
909 	irq_root_arg = arg;
910 
911 	debugf("irq root set to %s\n", device_get_nameunit(dev));
912 	return (0);
913 }
914 
915 /*
916  * Add a handler to manage a sub range of a parents interrupts.
917  */
918 int
919 intr_pic_add_handler(device_t parent, struct intr_pic *pic,
920     intr_child_irq_filter_t *filter, void *arg, uintptr_t start,
921     uintptr_t length)
922 {
923 	struct intr_pic *parent_pic;
924 	struct intr_pic_child *newchild;
925 #ifdef INVARIANTS
926 	struct intr_pic_child *child;
927 #endif
928 
929 	/* Find the parent PIC */
930 	parent_pic = pic_lookup(parent, 0, FLAG_PIC);
931 	if (parent_pic == NULL)
932 		return (ENXIO);
933 
934 	newchild = malloc(sizeof(*newchild), M_INTRNG, M_WAITOK | M_ZERO);
935 	newchild->pc_pic = pic;
936 	newchild->pc_filter = filter;
937 	newchild->pc_filter_arg = arg;
938 	newchild->pc_start = start;
939 	newchild->pc_length = length;
940 
941 	mtx_lock_spin(&parent_pic->pic_child_lock);
942 #ifdef INVARIANTS
943 	SLIST_FOREACH(child, &parent_pic->pic_children, pc_next) {
944 		KASSERT(child->pc_pic != pic, ("%s: Adding a child PIC twice",
945 		    __func__));
946 	}
947 #endif
948 	SLIST_INSERT_HEAD(&parent_pic->pic_children, newchild, pc_next);
949 	mtx_unlock_spin(&parent_pic->pic_child_lock);
950 
951 	return (0);
952 }
953 
954 static int
955 intr_resolve_irq(device_t dev, intptr_t xref, struct intr_map_data *data,
956     struct intr_irqsrc **isrc)
957 {
958 	struct intr_pic *pic;
959 	struct intr_map_data_msi *msi;
960 
961 	if (data == NULL)
962 		return (EINVAL);
963 
964 	pic = pic_lookup(dev, xref,
965 	    (data->type == INTR_MAP_DATA_MSI) ? FLAG_MSI : FLAG_PIC);
966 	if (pic == NULL)
967 		return (ESRCH);
968 
969 	switch (data->type) {
970 	case INTR_MAP_DATA_MSI:
971 		KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
972 		    ("%s: Found a non-MSI controller: %s", __func__,
973 		     device_get_name(pic->pic_dev)));
974 		msi = (struct intr_map_data_msi *)data;
975 		*isrc = msi->isrc;
976 		return (0);
977 
978 	default:
979 		KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_PIC,
980 		    ("%s: Found a non-PIC controller: %s", __func__,
981 		     device_get_name(pic->pic_dev)));
982 		return (PIC_MAP_INTR(pic->pic_dev, data, isrc));
983 	}
984 }
985 
986 bool
987 intr_is_per_cpu(struct resource *res)
988 {
989 	u_int res_id;
990 	struct intr_irqsrc *isrc;
991 
992 	res_id = (u_int)rman_get_start(res);
993 	isrc = intr_map_get_isrc(res_id);
994 
995 	if (isrc == NULL)
996 		panic("Attempt to get isrc for non-active resource id: %u\n",
997 		    res_id);
998 	return ((isrc->isrc_flags & INTR_ISRCF_PPI) != 0);
999 }
1000 
1001 int
1002 intr_activate_irq(device_t dev, struct resource *res)
1003 {
1004 	device_t map_dev;
1005 	intptr_t map_xref;
1006 	struct intr_map_data *data;
1007 	struct intr_irqsrc *isrc;
1008 	u_int res_id;
1009 	int error;
1010 
1011 	KASSERT(rman_get_start(res) == rman_get_end(res),
1012 	    ("%s: more interrupts in resource", __func__));
1013 
1014 	res_id = (u_int)rman_get_start(res);
1015 	if (intr_map_get_isrc(res_id) != NULL)
1016 		panic("Attempt to double activation of resource id: %u\n",
1017 		    res_id);
1018 	intr_map_copy_map_data(res_id, &map_dev, &map_xref, &data);
1019 	error = intr_resolve_irq(map_dev, map_xref, data, &isrc);
1020 	if (error != 0) {
1021 		free(data, M_INTRNG);
1022 		/* XXX TODO DISCONECTED PICs */
1023 		/* if (error == EINVAL) return(0); */
1024 		return (error);
1025 	}
1026 	intr_map_set_isrc(res_id, isrc);
1027 	rman_set_virtual(res, data);
1028 	return (PIC_ACTIVATE_INTR(isrc->isrc_dev, isrc, res, data));
1029 }
1030 
1031 int
1032 intr_deactivate_irq(device_t dev, struct resource *res)
1033 {
1034 	struct intr_map_data *data;
1035 	struct intr_irqsrc *isrc;
1036 	u_int res_id;
1037 	int error;
1038 
1039 	KASSERT(rman_get_start(res) == rman_get_end(res),
1040 	    ("%s: more interrupts in resource", __func__));
1041 
1042 	res_id = (u_int)rman_get_start(res);
1043 	isrc = intr_map_get_isrc(res_id);
1044 	if (isrc == NULL)
1045 		panic("Attempt to deactivate non-active resource id: %u\n",
1046 		    res_id);
1047 
1048 	data = rman_get_virtual(res);
1049 	error = PIC_DEACTIVATE_INTR(isrc->isrc_dev, isrc, res, data);
1050 	intr_map_set_isrc(res_id, NULL);
1051 	rman_set_virtual(res, NULL);
1052 	free(data, M_INTRNG);
1053 	return (error);
1054 }
1055 
1056 int
1057 intr_setup_irq(device_t dev, struct resource *res, driver_filter_t filt,
1058     driver_intr_t hand, void *arg, int flags, void **cookiep)
1059 {
1060 	int error;
1061 	struct intr_map_data *data;
1062 	struct intr_irqsrc *isrc;
1063 	const char *name;
1064 	u_int res_id;
1065 
1066 	KASSERT(rman_get_start(res) == rman_get_end(res),
1067 	    ("%s: more interrupts in resource", __func__));
1068 
1069 	res_id = (u_int)rman_get_start(res);
1070 	isrc = intr_map_get_isrc(res_id);
1071 	if (isrc == NULL) {
1072 		/* XXX TODO DISCONECTED PICs */
1073 		return (EINVAL);
1074 	}
1075 
1076 	data = rman_get_virtual(res);
1077 	name = device_get_nameunit(dev);
1078 
1079 #ifdef INTR_SOLO
1080 	/*
1081 	 * Standard handling is done through MI interrupt framework. However,
1082 	 * some interrupts could request solely own special handling. This
1083 	 * non standard handling can be used for interrupt controllers without
1084 	 * handler (filter only), so in case that interrupt controllers are
1085 	 * chained, MI interrupt framework is called only in leaf controller.
1086 	 *
1087 	 * Note that root interrupt controller routine is served as well,
1088 	 * however in intr_irq_handler(), i.e. main system dispatch routine.
1089 	 */
1090 	if (flags & INTR_SOLO && hand != NULL) {
1091 		debugf("irq %u cannot solo on %s\n", irq, name);
1092 		return (EINVAL);
1093 	}
1094 
1095 	if (flags & INTR_SOLO) {
1096 		error = iscr_setup_filter(isrc, name, (intr_irq_filter_t *)filt,
1097 		    arg, cookiep);
1098 		debugf("irq %u setup filter error %d on %s\n", isrc->isrc_irq, error,
1099 		    name);
1100 	} else
1101 #endif
1102 		{
1103 		error = isrc_add_handler(isrc, name, filt, hand, arg, flags,
1104 		    cookiep);
1105 		debugf("irq %u add handler error %d on %s\n", isrc->isrc_irq, error, name);
1106 	}
1107 	if (error != 0)
1108 		return (error);
1109 
1110 	mtx_lock(&isrc_table_lock);
1111 	error = PIC_SETUP_INTR(isrc->isrc_dev, isrc, res, data);
1112 	if (error == 0) {
1113 		isrc->isrc_handlers++;
1114 		if (isrc->isrc_handlers == 1)
1115 			PIC_ENABLE_INTR(isrc->isrc_dev, isrc);
1116 	}
1117 	mtx_unlock(&isrc_table_lock);
1118 	if (error != 0)
1119 		intr_event_remove_handler(*cookiep);
1120 	return (error);
1121 }
1122 
1123 int
1124 intr_teardown_irq(device_t dev, struct resource *res, void *cookie)
1125 {
1126 	int error;
1127 	struct intr_map_data *data;
1128 	struct intr_irqsrc *isrc;
1129 	u_int res_id;
1130 
1131 	KASSERT(rman_get_start(res) == rman_get_end(res),
1132 	    ("%s: more interrupts in resource", __func__));
1133 
1134 	res_id = (u_int)rman_get_start(res);
1135 	isrc = intr_map_get_isrc(res_id);
1136 	if (isrc == NULL || isrc->isrc_handlers == 0)
1137 		return (EINVAL);
1138 
1139 	data = rman_get_virtual(res);
1140 
1141 #ifdef INTR_SOLO
1142 	if (isrc->isrc_filter != NULL) {
1143 		if (isrc != cookie)
1144 			return (EINVAL);
1145 
1146 		mtx_lock(&isrc_table_lock);
1147 		isrc->isrc_filter = NULL;
1148 		isrc->isrc_arg = NULL;
1149 		isrc->isrc_handlers = 0;
1150 		PIC_DISABLE_INTR(isrc->isrc_dev, isrc);
1151 		PIC_TEARDOWN_INTR(isrc->isrc_dev, isrc, res, data);
1152 		isrc_update_name(isrc, NULL);
1153 		mtx_unlock(&isrc_table_lock);
1154 		return (0);
1155 	}
1156 #endif
1157 	if (isrc != intr_handler_source(cookie))
1158 		return (EINVAL);
1159 
1160 	error = intr_event_remove_handler(cookie);
1161 	if (error == 0) {
1162 		mtx_lock(&isrc_table_lock);
1163 		isrc->isrc_handlers--;
1164 		if (isrc->isrc_handlers == 0)
1165 			PIC_DISABLE_INTR(isrc->isrc_dev, isrc);
1166 		PIC_TEARDOWN_INTR(isrc->isrc_dev, isrc, res, data);
1167 		intrcnt_updatename(isrc);
1168 		mtx_unlock(&isrc_table_lock);
1169 	}
1170 	return (error);
1171 }
1172 
1173 int
1174 intr_describe_irq(device_t dev, struct resource *res, void *cookie,
1175     const char *descr)
1176 {
1177 	int error;
1178 	struct intr_irqsrc *isrc;
1179 	u_int res_id;
1180 
1181 	KASSERT(rman_get_start(res) == rman_get_end(res),
1182 	    ("%s: more interrupts in resource", __func__));
1183 
1184 	res_id = (u_int)rman_get_start(res);
1185 	isrc = intr_map_get_isrc(res_id);
1186 	if (isrc == NULL || isrc->isrc_handlers == 0)
1187 		return (EINVAL);
1188 #ifdef INTR_SOLO
1189 	if (isrc->isrc_filter != NULL) {
1190 		if (isrc != cookie)
1191 			return (EINVAL);
1192 
1193 		mtx_lock(&isrc_table_lock);
1194 		isrc_update_name(isrc, descr);
1195 		mtx_unlock(&isrc_table_lock);
1196 		return (0);
1197 	}
1198 #endif
1199 	error = intr_event_describe_handler(isrc->isrc_event, cookie, descr);
1200 	if (error == 0) {
1201 		mtx_lock(&isrc_table_lock);
1202 		intrcnt_updatename(isrc);
1203 		mtx_unlock(&isrc_table_lock);
1204 	}
1205 	return (error);
1206 }
1207 
1208 #ifdef SMP
1209 int
1210 intr_bind_irq(device_t dev, struct resource *res, int cpu)
1211 {
1212 	struct intr_irqsrc *isrc;
1213 	u_int res_id;
1214 
1215 	KASSERT(rman_get_start(res) == rman_get_end(res),
1216 	    ("%s: more interrupts in resource", __func__));
1217 
1218 	res_id = (u_int)rman_get_start(res);
1219 	isrc = intr_map_get_isrc(res_id);
1220 	if (isrc == NULL || isrc->isrc_handlers == 0)
1221 		return (EINVAL);
1222 #ifdef INTR_SOLO
1223 	if (isrc->isrc_filter != NULL)
1224 		return (intr_isrc_assign_cpu(isrc, cpu));
1225 #endif
1226 	return (intr_event_bind(isrc->isrc_event, cpu));
1227 }
1228 
1229 /*
1230  * Return the CPU that the next interrupt source should use.
1231  * For now just returns the next CPU according to round-robin.
1232  */
1233 u_int
1234 intr_irq_next_cpu(u_int last_cpu, cpuset_t *cpumask)
1235 {
1236 	u_int cpu;
1237 
1238 	KASSERT(!CPU_EMPTY(cpumask), ("%s: Empty CPU mask", __func__));
1239 	if (!irq_assign_cpu || mp_ncpus == 1) {
1240 		cpu = PCPU_GET(cpuid);
1241 
1242 		if (CPU_ISSET(cpu, cpumask))
1243 			return (curcpu);
1244 
1245 		return (CPU_FFS(cpumask) - 1);
1246 	}
1247 
1248 	do {
1249 		last_cpu++;
1250 		if (last_cpu > mp_maxid)
1251 			last_cpu = 0;
1252 	} while (!CPU_ISSET(last_cpu, cpumask));
1253 	return (last_cpu);
1254 }
1255 
1256 #ifndef EARLY_AP_STARTUP
1257 /*
1258  *  Distribute all the interrupt sources among the available
1259  *  CPUs once the AP's have been launched.
1260  */
1261 static void
1262 intr_irq_shuffle(void *arg __unused)
1263 {
1264 	struct intr_irqsrc *isrc;
1265 	u_int i;
1266 
1267 	if (mp_ncpus == 1)
1268 		return;
1269 
1270 	mtx_lock(&isrc_table_lock);
1271 	irq_assign_cpu = true;
1272 	for (i = 0; i < intr_nirq; i++) {
1273 		isrc = irq_sources[i];
1274 		if (isrc == NULL || isrc->isrc_handlers == 0 ||
1275 		    isrc->isrc_flags & (INTR_ISRCF_PPI | INTR_ISRCF_IPI))
1276 			continue;
1277 
1278 		if (isrc->isrc_event != NULL &&
1279 		    isrc->isrc_flags & INTR_ISRCF_BOUND &&
1280 		    isrc->isrc_event->ie_cpu != CPU_FFS(&isrc->isrc_cpu) - 1)
1281 			panic("%s: CPU inconsistency", __func__);
1282 
1283 		if ((isrc->isrc_flags & INTR_ISRCF_BOUND) == 0)
1284 			CPU_ZERO(&isrc->isrc_cpu); /* start again */
1285 
1286 		/*
1287 		 * We are in wicked position here if the following call fails
1288 		 * for bound ISRC. The best thing we can do is to clear
1289 		 * isrc_cpu so inconsistency with ie_cpu will be detectable.
1290 		 */
1291 		if (PIC_BIND_INTR(isrc->isrc_dev, isrc) != 0)
1292 			CPU_ZERO(&isrc->isrc_cpu);
1293 	}
1294 	mtx_unlock(&isrc_table_lock);
1295 }
1296 SYSINIT(intr_irq_shuffle, SI_SUB_SMP, SI_ORDER_SECOND, intr_irq_shuffle, NULL);
1297 #endif /* !EARLY_AP_STARTUP */
1298 
1299 #else
1300 u_int
1301 intr_irq_next_cpu(u_int current_cpu, cpuset_t *cpumask)
1302 {
1303 
1304 	return (PCPU_GET(cpuid));
1305 }
1306 #endif /* SMP */
1307 
1308 /*
1309  * Allocate memory for new intr_map_data structure.
1310  * Initialize common fields.
1311  */
1312 struct intr_map_data *
1313 intr_alloc_map_data(enum intr_map_data_type type, size_t len, int flags)
1314 {
1315 	struct intr_map_data *data;
1316 
1317 	data = malloc(len, M_INTRNG, flags);
1318 	data->type = type;
1319 	data->len = len;
1320 	return (data);
1321 }
1322 
1323 void intr_free_intr_map_data(struct intr_map_data *data)
1324 {
1325 
1326 	free(data, M_INTRNG);
1327 }
1328 
1329 /*
1330  *  Register a MSI/MSI-X interrupt controller
1331  */
1332 int
1333 intr_msi_register(device_t dev, intptr_t xref)
1334 {
1335 	struct intr_pic *pic;
1336 
1337 	if (dev == NULL)
1338 		return (EINVAL);
1339 	pic = pic_create(dev, xref, FLAG_MSI);
1340 	if (pic == NULL)
1341 		return (ENOMEM);
1342 
1343 	debugf("PIC %p registered for %s <dev %p, xref %jx>\n", pic,
1344 	    device_get_nameunit(dev), dev, (uintmax_t)xref);
1345 	return (0);
1346 }
1347 
1348 int
1349 intr_alloc_msi(device_t pci, device_t child, intptr_t xref, int count,
1350     int maxcount, int *irqs)
1351 {
1352 	struct iommu_domain *domain;
1353 	struct intr_irqsrc **isrc;
1354 	struct intr_pic *pic;
1355 	device_t pdev;
1356 	struct intr_map_data_msi *msi;
1357 	int err, i;
1358 
1359 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1360 	if (pic == NULL)
1361 		return (ESRCH);
1362 
1363 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1364 	    ("%s: Found a non-MSI controller: %s", __func__,
1365 	     device_get_name(pic->pic_dev)));
1366 
1367 	/*
1368 	 * If this is the first time we have used this context ask the
1369 	 * interrupt controller to map memory the msi source will need.
1370 	 */
1371 	err = MSI_IOMMU_INIT(pic->pic_dev, child, &domain);
1372 	if (err != 0)
1373 		return (err);
1374 
1375 	isrc = malloc(sizeof(*isrc) * count, M_INTRNG, M_WAITOK);
1376 	err = MSI_ALLOC_MSI(pic->pic_dev, child, count, maxcount, &pdev, isrc);
1377 	if (err != 0) {
1378 		free(isrc, M_INTRNG);
1379 		return (err);
1380 	}
1381 
1382 	for (i = 0; i < count; i++) {
1383 		isrc[i]->isrc_iommu = domain;
1384 		msi = (struct intr_map_data_msi *)intr_alloc_map_data(
1385 		    INTR_MAP_DATA_MSI, sizeof(*msi), M_WAITOK | M_ZERO);
1386 		msi-> isrc = isrc[i];
1387 
1388 		irqs[i] = intr_map_irq(pic->pic_dev, xref,
1389 		    (struct intr_map_data *)msi);
1390 	}
1391 	free(isrc, M_INTRNG);
1392 
1393 	return (err);
1394 }
1395 
1396 int
1397 intr_release_msi(device_t pci, device_t child, intptr_t xref, int count,
1398     int *irqs)
1399 {
1400 	struct intr_irqsrc **isrc;
1401 	struct intr_pic *pic;
1402 	struct intr_map_data_msi *msi;
1403 	int i, err;
1404 
1405 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1406 	if (pic == NULL)
1407 		return (ESRCH);
1408 
1409 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1410 	    ("%s: Found a non-MSI controller: %s", __func__,
1411 	     device_get_name(pic->pic_dev)));
1412 
1413 	isrc = malloc(sizeof(*isrc) * count, M_INTRNG, M_WAITOK);
1414 
1415 	for (i = 0; i < count; i++) {
1416 		msi = (struct intr_map_data_msi *)
1417 		    intr_map_get_map_data(irqs[i]);
1418 		KASSERT(msi->hdr.type == INTR_MAP_DATA_MSI,
1419 		    ("%s: irq %d map data is not MSI", __func__,
1420 		    irqs[i]));
1421 		isrc[i] = msi->isrc;
1422 	}
1423 
1424 	MSI_IOMMU_DEINIT(pic->pic_dev, child);
1425 
1426 	err = MSI_RELEASE_MSI(pic->pic_dev, child, count, isrc);
1427 
1428 	for (i = 0; i < count; i++) {
1429 		if (isrc[i] != NULL)
1430 			intr_unmap_irq(irqs[i]);
1431 	}
1432 
1433 	free(isrc, M_INTRNG);
1434 	return (err);
1435 }
1436 
1437 int
1438 intr_alloc_msix(device_t pci, device_t child, intptr_t xref, int *irq)
1439 {
1440 	struct iommu_domain *domain;
1441 	struct intr_irqsrc *isrc;
1442 	struct intr_pic *pic;
1443 	device_t pdev;
1444 	struct intr_map_data_msi *msi;
1445 	int err;
1446 
1447 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1448 	if (pic == NULL)
1449 		return (ESRCH);
1450 
1451 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1452 	    ("%s: Found a non-MSI controller: %s", __func__,
1453 	     device_get_name(pic->pic_dev)));
1454 
1455 	/*
1456 	 * If this is the first time we have used this context ask the
1457 	 * interrupt controller to map memory the msi source will need.
1458 	 */
1459 	err = MSI_IOMMU_INIT(pic->pic_dev, child, &domain);
1460 	if (err != 0)
1461 		return (err);
1462 
1463 	err = MSI_ALLOC_MSIX(pic->pic_dev, child, &pdev, &isrc);
1464 	if (err != 0)
1465 		return (err);
1466 
1467 	isrc->isrc_iommu = domain;
1468 	msi = (struct intr_map_data_msi *)intr_alloc_map_data(
1469 		    INTR_MAP_DATA_MSI, sizeof(*msi), M_WAITOK | M_ZERO);
1470 	msi->isrc = isrc;
1471 	*irq = intr_map_irq(pic->pic_dev, xref, (struct intr_map_data *)msi);
1472 	return (0);
1473 }
1474 
1475 int
1476 intr_release_msix(device_t pci, device_t child, intptr_t xref, int irq)
1477 {
1478 	struct intr_irqsrc *isrc;
1479 	struct intr_pic *pic;
1480 	struct intr_map_data_msi *msi;
1481 	int err;
1482 
1483 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1484 	if (pic == NULL)
1485 		return (ESRCH);
1486 
1487 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1488 	    ("%s: Found a non-MSI controller: %s", __func__,
1489 	     device_get_name(pic->pic_dev)));
1490 
1491 	msi = (struct intr_map_data_msi *)
1492 	    intr_map_get_map_data(irq);
1493 	KASSERT(msi->hdr.type == INTR_MAP_DATA_MSI,
1494 	    ("%s: irq %d map data is not MSI", __func__,
1495 	    irq));
1496 	isrc = msi->isrc;
1497 	if (isrc == NULL) {
1498 		intr_unmap_irq(irq);
1499 		return (EINVAL);
1500 	}
1501 
1502 	MSI_IOMMU_DEINIT(pic->pic_dev, child);
1503 
1504 	err = MSI_RELEASE_MSIX(pic->pic_dev, child, isrc);
1505 	intr_unmap_irq(irq);
1506 
1507 	return (err);
1508 }
1509 
1510 int
1511 intr_map_msi(device_t pci, device_t child, intptr_t xref, int irq,
1512     uint64_t *addr, uint32_t *data)
1513 {
1514 	struct intr_irqsrc *isrc;
1515 	struct intr_pic *pic;
1516 	int err;
1517 
1518 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1519 	if (pic == NULL)
1520 		return (ESRCH);
1521 
1522 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1523 	    ("%s: Found a non-MSI controller: %s", __func__,
1524 	     device_get_name(pic->pic_dev)));
1525 
1526 	isrc = intr_map_get_isrc(irq);
1527 	if (isrc == NULL)
1528 		return (EINVAL);
1529 
1530 	err = MSI_MAP_MSI(pic->pic_dev, child, isrc, addr, data);
1531 
1532 #ifdef IOMMU
1533 	if (isrc->isrc_iommu != NULL)
1534 		iommu_translate_msi(isrc->isrc_iommu, addr);
1535 #endif
1536 
1537 	return (err);
1538 }
1539 
1540 void dosoftints(void);
1541 void
1542 dosoftints(void)
1543 {
1544 }
1545 
1546 #ifdef SMP
1547 /*
1548  *  Init interrupt controller on another CPU.
1549  */
1550 void
1551 intr_pic_init_secondary(void)
1552 {
1553 
1554 	/*
1555 	 * QQQ: Only root PIC is aware of other CPUs ???
1556 	 */
1557 	KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__));
1558 
1559 	//mtx_lock(&isrc_table_lock);
1560 	PIC_INIT_SECONDARY(intr_irq_root_dev);
1561 	//mtx_unlock(&isrc_table_lock);
1562 }
1563 #endif
1564 
1565 #ifdef DDB
1566 DB_SHOW_COMMAND_FLAGS(irqs, db_show_irqs, DB_CMD_MEMSAFE)
1567 {
1568 	u_int i, irqsum;
1569 	u_long num;
1570 	struct intr_irqsrc *isrc;
1571 
1572 	for (irqsum = 0, i = 0; i < intr_nirq; i++) {
1573 		isrc = irq_sources[i];
1574 		if (isrc == NULL)
1575 			continue;
1576 
1577 		num = isrc->isrc_count != NULL ? isrc->isrc_count[0] : 0;
1578 		db_printf("irq%-3u <%s>: cpu %02lx%s cnt %lu\n", i,
1579 		    isrc->isrc_name, isrc->isrc_cpu.__bits[0],
1580 		    isrc->isrc_flags & INTR_ISRCF_BOUND ? " (bound)" : "", num);
1581 		irqsum += num;
1582 	}
1583 	db_printf("irq total %u\n", irqsum);
1584 }
1585 #endif
1586 
1587 /*
1588  * Interrupt mapping table functions.
1589  *
1590  * Please, keep this part separately, it can be transformed to
1591  * extension of standard resources.
1592  */
1593 struct intr_map_entry
1594 {
1595 	device_t 		dev;
1596 	intptr_t 		xref;
1597 	struct intr_map_data 	*map_data;
1598 	struct intr_irqsrc 	*isrc;
1599 	/* XXX TODO DISCONECTED PICs */
1600 	/*int			flags */
1601 };
1602 
1603 /* XXX Convert irq_map[] to dynamicaly expandable one. */
1604 static struct intr_map_entry **irq_map;
1605 static u_int irq_map_count;
1606 static u_int irq_map_first_free_idx;
1607 static struct mtx irq_map_lock;
1608 
1609 static struct intr_irqsrc *
1610 intr_map_get_isrc(u_int res_id)
1611 {
1612 	struct intr_irqsrc *isrc;
1613 
1614 	isrc = NULL;
1615 	mtx_lock(&irq_map_lock);
1616 	if (res_id < irq_map_count && irq_map[res_id] != NULL)
1617 		isrc = irq_map[res_id]->isrc;
1618 	mtx_unlock(&irq_map_lock);
1619 
1620 	return (isrc);
1621 }
1622 
1623 static void
1624 intr_map_set_isrc(u_int res_id, struct intr_irqsrc *isrc)
1625 {
1626 
1627 	mtx_lock(&irq_map_lock);
1628 	if (res_id < irq_map_count && irq_map[res_id] != NULL)
1629 		irq_map[res_id]->isrc = isrc;
1630 	mtx_unlock(&irq_map_lock);
1631 }
1632 
1633 /*
1634  * Get a copy of intr_map_entry data
1635  */
1636 static struct intr_map_data *
1637 intr_map_get_map_data(u_int res_id)
1638 {
1639 	struct intr_map_data *data;
1640 
1641 	data = NULL;
1642 	mtx_lock(&irq_map_lock);
1643 	if (res_id >= irq_map_count || irq_map[res_id] == NULL)
1644 		panic("Attempt to copy invalid resource id: %u\n", res_id);
1645 	data = irq_map[res_id]->map_data;
1646 	mtx_unlock(&irq_map_lock);
1647 
1648 	return (data);
1649 }
1650 
1651 /*
1652  * Get a copy of intr_map_entry data
1653  */
1654 static void
1655 intr_map_copy_map_data(u_int res_id, device_t *map_dev, intptr_t *map_xref,
1656     struct intr_map_data **data)
1657 {
1658 	size_t len;
1659 
1660 	len = 0;
1661 	mtx_lock(&irq_map_lock);
1662 	if (res_id >= irq_map_count || irq_map[res_id] == NULL)
1663 		panic("Attempt to copy invalid resource id: %u\n", res_id);
1664 	if (irq_map[res_id]->map_data != NULL)
1665 		len = irq_map[res_id]->map_data->len;
1666 	mtx_unlock(&irq_map_lock);
1667 
1668 	if (len == 0)
1669 		*data = NULL;
1670 	else
1671 		*data = malloc(len, M_INTRNG, M_WAITOK | M_ZERO);
1672 	mtx_lock(&irq_map_lock);
1673 	if (irq_map[res_id] == NULL)
1674 		panic("Attempt to copy invalid resource id: %u\n", res_id);
1675 	if (len != 0) {
1676 		if (len != irq_map[res_id]->map_data->len)
1677 			panic("Resource id: %u has changed.\n", res_id);
1678 		memcpy(*data, irq_map[res_id]->map_data, len);
1679 	}
1680 	*map_dev = irq_map[res_id]->dev;
1681 	*map_xref = irq_map[res_id]->xref;
1682 	mtx_unlock(&irq_map_lock);
1683 }
1684 
1685 /*
1686  * Allocate and fill new entry in irq_map table.
1687  */
1688 u_int
1689 intr_map_irq(device_t dev, intptr_t xref, struct intr_map_data *data)
1690 {
1691 	u_int i;
1692 	struct intr_map_entry *entry;
1693 
1694 	/* Prepare new entry first. */
1695 	entry = malloc(sizeof(*entry), M_INTRNG, M_WAITOK | M_ZERO);
1696 
1697 	entry->dev = dev;
1698 	entry->xref = xref;
1699 	entry->map_data = data;
1700 	entry->isrc = NULL;
1701 
1702 	mtx_lock(&irq_map_lock);
1703 	for (i = irq_map_first_free_idx; i < irq_map_count; i++) {
1704 		if (irq_map[i] == NULL) {
1705 			irq_map[i] = entry;
1706 			irq_map_first_free_idx = i + 1;
1707 			mtx_unlock(&irq_map_lock);
1708 			return (i);
1709 		}
1710 	}
1711 	for (i = 0; i < irq_map_first_free_idx; i++) {
1712 		if (irq_map[i] == NULL) {
1713 			irq_map[i] = entry;
1714 			irq_map_first_free_idx = i + 1;
1715 			mtx_unlock(&irq_map_lock);
1716 			return (i);
1717 		}
1718 	}
1719 	mtx_unlock(&irq_map_lock);
1720 
1721 	/* XXX Expand irq_map table */
1722 	panic("IRQ mapping table is full.");
1723 }
1724 
1725 /*
1726  * Remove and free mapping entry.
1727  */
1728 void
1729 intr_unmap_irq(u_int res_id)
1730 {
1731 	struct intr_map_entry *entry;
1732 
1733 	mtx_lock(&irq_map_lock);
1734 	if ((res_id >= irq_map_count) || (irq_map[res_id] == NULL))
1735 		panic("Attempt to unmap invalid resource id: %u\n", res_id);
1736 	entry = irq_map[res_id];
1737 	irq_map[res_id] = NULL;
1738 	irq_map_first_free_idx = res_id;
1739 	mtx_unlock(&irq_map_lock);
1740 	intr_free_intr_map_data(entry->map_data);
1741 	free(entry, M_INTRNG);
1742 }
1743 
1744 /*
1745  * Clone mapping entry.
1746  */
1747 u_int
1748 intr_map_clone_irq(u_int old_res_id)
1749 {
1750 	device_t map_dev;
1751 	intptr_t map_xref;
1752 	struct intr_map_data *data;
1753 
1754 	intr_map_copy_map_data(old_res_id, &map_dev, &map_xref, &data);
1755 	return (intr_map_irq(map_dev, map_xref, data));
1756 }
1757 
1758 static void
1759 intr_map_init(void *dummy __unused)
1760 {
1761 
1762 	mtx_init(&irq_map_lock, "intr map table", NULL, MTX_DEF);
1763 
1764 	irq_map_count = 2 * intr_nirq;
1765 	irq_map = mallocarray(irq_map_count, sizeof(struct intr_map_entry*),
1766 	    M_INTRNG, M_WAITOK | M_ZERO);
1767 }
1768 SYSINIT(intr_map_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_map_init, NULL);
1769 
1770 #ifdef SMP
1771 /* Virtualization for interrupt source IPI counter increment. */
1772 static inline void
1773 intr_ipi_increment_count(u_long *counter, u_int cpu)
1774 {
1775 
1776 	KASSERT(cpu < mp_maxid + 1, ("%s: too big cpu %u", __func__, cpu));
1777 	counter[cpu]++;
1778 }
1779 
1780 /*
1781  *  Virtualization for interrupt source IPI counters setup.
1782  */
1783 static u_long *
1784 intr_ipi_setup_counters(const char *name)
1785 {
1786 	u_int index, i;
1787 	char str[INTRNAME_LEN];
1788 
1789 	mtx_lock(&isrc_table_lock);
1790 
1791 	/*
1792 	 * We should never have a problem finding mp_maxid + 1 contiguous
1793 	 * counters, in practice. Interrupts will be allocated sequentially
1794 	 * during boot, so the array should fill from low to high index. Once
1795 	 * reserved, the IPI counters will never be released. Similarly, we
1796 	 * will not need to allocate more IPIs once the system is running.
1797 	 */
1798 	bit_ffc_area(intrcnt_bitmap, nintrcnt, mp_maxid + 1, &index);
1799 	if (index == -1)
1800 		panic("Failed to allocate %d counters. Array exhausted?",
1801 		    mp_maxid + 1);
1802 	bit_nset(intrcnt_bitmap, index, index + mp_maxid);
1803 	for (i = 0; i < mp_maxid + 1; i++) {
1804 		snprintf(str, INTRNAME_LEN, "cpu%d:%s", i, name);
1805 		intrcnt_setname(str, index + i);
1806 	}
1807 	mtx_unlock(&isrc_table_lock);
1808 	return (&intrcnt[index]);
1809 }
1810 
1811 /*
1812  *  Lookup IPI source.
1813  */
1814 static struct intr_ipi *
1815 intr_ipi_lookup(u_int ipi)
1816 {
1817 
1818 	if (ipi >= INTR_IPI_COUNT)
1819 		panic("%s: no such IPI %u", __func__, ipi);
1820 
1821 	return (&ipi_sources[ipi]);
1822 }
1823 
1824 int
1825 intr_ipi_pic_register(device_t dev, u_int priority)
1826 {
1827 	if (intr_ipi_dev_frozen) {
1828 		device_printf(dev, "IPI device already frozen");
1829 		return (EBUSY);
1830 	}
1831 
1832 	if (intr_ipi_dev == NULL || priority > intr_ipi_dev_priority)
1833 		intr_ipi_dev = dev;
1834 
1835 	return (0);
1836 }
1837 
1838 /*
1839  *  Setup IPI handler on interrupt controller.
1840  *
1841  *  Not SMP coherent.
1842  */
1843 void
1844 intr_ipi_setup(u_int ipi, const char *name, intr_ipi_handler_t *hand,
1845     void *arg)
1846 {
1847 	struct intr_irqsrc *isrc;
1848 	struct intr_ipi *ii;
1849 	int error;
1850 
1851 	if (!intr_ipi_dev_frozen) {
1852 		if (intr_ipi_dev == NULL)
1853 			panic("%s: no IPI PIC attached", __func__);
1854 
1855 		intr_ipi_dev_frozen = true;
1856 		device_printf(intr_ipi_dev, "using for IPIs\n");
1857 	}
1858 
1859 	KASSERT(hand != NULL, ("%s: ipi %u no handler", __func__, ipi));
1860 
1861 	error = PIC_IPI_SETUP(intr_ipi_dev, ipi, &isrc);
1862 	if (error != 0)
1863 		return;
1864 
1865 	isrc->isrc_handlers++;
1866 
1867 	ii = intr_ipi_lookup(ipi);
1868 	KASSERT(ii->ii_count == NULL, ("%s: ipi %u reused", __func__, ipi));
1869 
1870 	ii->ii_handler = hand;
1871 	ii->ii_handler_arg = arg;
1872 	ii->ii_isrc = isrc;
1873 	strlcpy(ii->ii_name, name, INTR_IPI_NAMELEN);
1874 	ii->ii_count = intr_ipi_setup_counters(name);
1875 
1876 	PIC_ENABLE_INTR(intr_ipi_dev, isrc);
1877 }
1878 
1879 void
1880 intr_ipi_send(cpuset_t cpus, u_int ipi)
1881 {
1882 	struct intr_ipi *ii;
1883 
1884 	KASSERT(intr_ipi_dev_frozen,
1885 	    ("%s: IPI device not yet frozen", __func__));
1886 
1887 	ii = intr_ipi_lookup(ipi);
1888 	if (ii->ii_count == NULL)
1889 		panic("%s: not setup IPI %u", __func__, ipi);
1890 
1891 	/*
1892 	 * XXX: Surely needed on other architectures too? Either way should be
1893 	 * some kind of MI hook defined in an MD header, or the responsibility
1894 	 * of the MD caller if not widespread.
1895 	 */
1896 #ifdef __aarch64__
1897 	/*
1898 	 * Ensure that this CPU's stores will be visible to IPI
1899 	 * recipients before starting to send the interrupts.
1900 	 */
1901 	dsb(ishst);
1902 #endif
1903 
1904 	PIC_IPI_SEND(intr_ipi_dev, ii->ii_isrc, cpus, ipi);
1905 }
1906 
1907 /*
1908  *  interrupt controller dispatch function for IPIs. It should
1909  *  be called straight from the interrupt controller, when associated
1910  *  interrupt source is learned. Or from anybody who has an interrupt
1911  *  source mapped.
1912  */
1913 void
1914 intr_ipi_dispatch(u_int ipi)
1915 {
1916 	struct intr_ipi *ii;
1917 
1918 	ii = intr_ipi_lookup(ipi);
1919 	if (ii->ii_count == NULL)
1920 		panic("%s: not setup IPI %u", __func__, ipi);
1921 
1922 	intr_ipi_increment_count(ii->ii_count, PCPU_GET(cpuid));
1923 
1924 	ii->ii_handler(ii->ii_handler_arg);
1925 }
1926 #endif
1927