xref: /freebsd/sys/kern/subr_intr.c (revision 4d3fc8b0570b29fb0d6ee9525f104d52176ff0d4)
1 /*-
2  * Copyright (c) 2015-2016 Svatopluk Kraus
3  * Copyright (c) 2015-2016 Michal Meloun
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 /*
32  *	New-style Interrupt Framework
33  *
34  *  TODO: - add support for disconnected PICs.
35  *        - to support IPI (PPI) enabling on other CPUs if already started.
36  *        - to complete things for removable PICs.
37  */
38 
39 #include "opt_ddb.h"
40 #include "opt_hwpmc_hooks.h"
41 #include "opt_iommu.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/bitstring.h>
46 #include <sys/bus.h>
47 #include <sys/conf.h>
48 #include <sys/cpuset.h>
49 #include <sys/interrupt.h>
50 #include <sys/kernel.h>
51 #include <sys/lock.h>
52 #include <sys/malloc.h>
53 #include <sys/mutex.h>
54 #include <sys/proc.h>
55 #include <sys/queue.h>
56 #include <sys/rman.h>
57 #include <sys/sched.h>
58 #include <sys/smp.h>
59 #include <sys/sysctl.h>
60 #include <sys/syslog.h>
61 #include <sys/taskqueue.h>
62 #include <sys/tree.h>
63 #include <sys/vmmeter.h>
64 #ifdef HWPMC_HOOKS
65 #include <sys/pmckern.h>
66 #endif
67 
68 #include <machine/atomic.h>
69 #include <machine/cpu.h>
70 #include <machine/intr.h>
71 #include <machine/smp.h>
72 #include <machine/stdarg.h>
73 
74 #ifdef DDB
75 #include <ddb/ddb.h>
76 #endif
77 
78 #ifdef IOMMU
79 #include <dev/iommu/iommu_msi.h>
80 #endif
81 
82 #include "pic_if.h"
83 #include "msi_if.h"
84 
85 #define	INTRNAME_LEN	(2*MAXCOMLEN + 1)
86 
87 #ifdef DEBUG
88 #define debugf(fmt, args...) do { printf("%s(): ", __func__);	\
89     printf(fmt,##args); } while (0)
90 #else
91 #define debugf(fmt, args...)
92 #endif
93 
94 MALLOC_DECLARE(M_INTRNG);
95 MALLOC_DEFINE(M_INTRNG, "intr", "intr interrupt handling");
96 
97 /* Main interrupt handler called from assembler -> 'hidden' for C code. */
98 void intr_irq_handler(struct trapframe *tf);
99 
100 /* Root interrupt controller stuff. */
101 device_t intr_irq_root_dev;
102 static intr_irq_filter_t *irq_root_filter;
103 static void *irq_root_arg;
104 static u_int irq_root_ipicount;
105 
106 struct intr_pic_child {
107 	SLIST_ENTRY(intr_pic_child)	 pc_next;
108 	struct intr_pic			*pc_pic;
109 	intr_child_irq_filter_t		*pc_filter;
110 	void				*pc_filter_arg;
111 	uintptr_t			 pc_start;
112 	uintptr_t			 pc_length;
113 };
114 
115 /* Interrupt controller definition. */
116 struct intr_pic {
117 	SLIST_ENTRY(intr_pic)	pic_next;
118 	intptr_t		pic_xref;	/* hardware identification */
119 	device_t		pic_dev;
120 /* Only one of FLAG_PIC or FLAG_MSI may be set */
121 #define	FLAG_PIC	(1 << 0)
122 #define	FLAG_MSI	(1 << 1)
123 #define	FLAG_TYPE_MASK	(FLAG_PIC | FLAG_MSI)
124 	u_int			pic_flags;
125 	struct mtx		pic_child_lock;
126 	SLIST_HEAD(, intr_pic_child) pic_children;
127 };
128 
129 static struct mtx pic_list_lock;
130 static SLIST_HEAD(, intr_pic) pic_list;
131 
132 static struct intr_pic *pic_lookup(device_t dev, intptr_t xref, int flags);
133 
134 /* Interrupt source definition. */
135 static struct mtx isrc_table_lock;
136 static struct intr_irqsrc **irq_sources;
137 u_int irq_next_free;
138 
139 #ifdef SMP
140 #ifdef EARLY_AP_STARTUP
141 static bool irq_assign_cpu = true;
142 #else
143 static bool irq_assign_cpu = false;
144 #endif
145 #endif
146 
147 u_int intr_nirq = NIRQ;
148 SYSCTL_UINT(_machdep, OID_AUTO, nirq, CTLFLAG_RDTUN, &intr_nirq, 0,
149     "Number of IRQs");
150 
151 /* Data for MI statistics reporting. */
152 u_long *intrcnt;
153 char *intrnames;
154 size_t sintrcnt;
155 size_t sintrnames;
156 int nintrcnt;
157 static bitstr_t *intrcnt_bitmap;
158 
159 static struct intr_irqsrc *intr_map_get_isrc(u_int res_id);
160 static void intr_map_set_isrc(u_int res_id, struct intr_irqsrc *isrc);
161 static struct intr_map_data * intr_map_get_map_data(u_int res_id);
162 static void intr_map_copy_map_data(u_int res_id, device_t *dev, intptr_t *xref,
163     struct intr_map_data **data);
164 
165 /*
166  *  Interrupt framework initialization routine.
167  */
168 static void
169 intr_irq_init(void *dummy __unused)
170 {
171 
172 	SLIST_INIT(&pic_list);
173 	mtx_init(&pic_list_lock, "intr pic list", NULL, MTX_DEF);
174 
175 	mtx_init(&isrc_table_lock, "intr isrc table", NULL, MTX_DEF);
176 
177 	/*
178 	 * - 2 counters for each I/O interrupt.
179 	 * - MAXCPU counters for each IPI counters for SMP.
180 	 */
181 	nintrcnt = intr_nirq * 2;
182 #ifdef SMP
183 	nintrcnt += INTR_IPI_COUNT * MAXCPU;
184 #endif
185 
186 	intrcnt = mallocarray(nintrcnt, sizeof(u_long), M_INTRNG,
187 	    M_WAITOK | M_ZERO);
188 	intrnames = mallocarray(nintrcnt, INTRNAME_LEN, M_INTRNG,
189 	    M_WAITOK | M_ZERO);
190 	sintrcnt = nintrcnt * sizeof(u_long);
191 	sintrnames = nintrcnt * INTRNAME_LEN;
192 
193 	/* Allocate the bitmap tracking counter allocations. */
194 	intrcnt_bitmap = bit_alloc(nintrcnt, M_INTRNG, M_WAITOK | M_ZERO);
195 
196 	irq_sources = mallocarray(intr_nirq, sizeof(struct intr_irqsrc*),
197 	    M_INTRNG, M_WAITOK | M_ZERO);
198 }
199 SYSINIT(intr_irq_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_irq_init, NULL);
200 
201 static void
202 intrcnt_setname(const char *name, int index)
203 {
204 
205 	snprintf(intrnames + INTRNAME_LEN * index, INTRNAME_LEN, "%-*s",
206 	    INTRNAME_LEN - 1, name);
207 }
208 
209 /*
210  *  Update name for interrupt source with interrupt event.
211  */
212 static void
213 intrcnt_updatename(struct intr_irqsrc *isrc)
214 {
215 
216 	/* QQQ: What about stray counter name? */
217 	mtx_assert(&isrc_table_lock, MA_OWNED);
218 	intrcnt_setname(isrc->isrc_event->ie_fullname, isrc->isrc_index);
219 }
220 
221 /*
222  *  Virtualization for interrupt source interrupt counter increment.
223  */
224 static inline void
225 isrc_increment_count(struct intr_irqsrc *isrc)
226 {
227 
228 	if (isrc->isrc_flags & INTR_ISRCF_PPI)
229 		atomic_add_long(&isrc->isrc_count[0], 1);
230 	else
231 		isrc->isrc_count[0]++;
232 }
233 
234 /*
235  *  Virtualization for interrupt source interrupt stray counter increment.
236  */
237 static inline void
238 isrc_increment_straycount(struct intr_irqsrc *isrc)
239 {
240 
241 	isrc->isrc_count[1]++;
242 }
243 
244 /*
245  *  Virtualization for interrupt source interrupt name update.
246  */
247 static void
248 isrc_update_name(struct intr_irqsrc *isrc, const char *name)
249 {
250 	char str[INTRNAME_LEN];
251 
252 	mtx_assert(&isrc_table_lock, MA_OWNED);
253 
254 	if (name != NULL) {
255 		snprintf(str, INTRNAME_LEN, "%s: %s", isrc->isrc_name, name);
256 		intrcnt_setname(str, isrc->isrc_index);
257 		snprintf(str, INTRNAME_LEN, "stray %s: %s", isrc->isrc_name,
258 		    name);
259 		intrcnt_setname(str, isrc->isrc_index + 1);
260 	} else {
261 		snprintf(str, INTRNAME_LEN, "%s:", isrc->isrc_name);
262 		intrcnt_setname(str, isrc->isrc_index);
263 		snprintf(str, INTRNAME_LEN, "stray %s:", isrc->isrc_name);
264 		intrcnt_setname(str, isrc->isrc_index + 1);
265 	}
266 }
267 
268 /*
269  *  Virtualization for interrupt source interrupt counters setup.
270  */
271 static void
272 isrc_setup_counters(struct intr_irqsrc *isrc)
273 {
274 	int index;
275 
276 	mtx_assert(&isrc_table_lock, MA_OWNED);
277 
278 	/*
279 	 * Allocate two counter values, the second tracking "stray" interrupts.
280 	 */
281 	bit_ffc_area(intrcnt_bitmap, nintrcnt, 2, &index);
282 	if (index == -1)
283 		panic("Failed to allocate 2 counters. Array exhausted?");
284 	bit_nset(intrcnt_bitmap, index, index + 1);
285 	isrc->isrc_index = index;
286 	isrc->isrc_count = &intrcnt[index];
287 	isrc_update_name(isrc, NULL);
288 }
289 
290 /*
291  *  Virtualization for interrupt source interrupt counters release.
292  */
293 static void
294 isrc_release_counters(struct intr_irqsrc *isrc)
295 {
296 	int idx = isrc->isrc_index;
297 
298 	mtx_assert(&isrc_table_lock, MA_OWNED);
299 
300 	bit_nclear(intrcnt_bitmap, idx, idx + 1);
301 }
302 
303 #ifdef SMP
304 /*
305  *  Virtualization for interrupt source IPI counters setup.
306  */
307 u_long *
308 intr_ipi_setup_counters(const char *name)
309 {
310 	u_int index, i;
311 	char str[INTRNAME_LEN];
312 
313 	mtx_lock(&isrc_table_lock);
314 
315 	/*
316 	 * We should never have a problem finding MAXCPU contiguous counters,
317 	 * in practice. Interrupts will be allocated sequentially during boot,
318 	 * so the array should fill from low to high index. Once reserved, the
319 	 * IPI counters will never be released. Similarly, we will not need to
320 	 * allocate more IPIs once the system is running.
321 	 */
322 	bit_ffc_area(intrcnt_bitmap, nintrcnt, MAXCPU, &index);
323 	if (index == -1)
324 		panic("Failed to allocate %d counters. Array exhausted?",
325 		    MAXCPU);
326 	bit_nset(intrcnt_bitmap, index, index + MAXCPU - 1);
327 	for (i = 0; i < MAXCPU; i++) {
328 		snprintf(str, INTRNAME_LEN, "cpu%d:%s", i, name);
329 		intrcnt_setname(str, index + i);
330 	}
331 	mtx_unlock(&isrc_table_lock);
332 	return (&intrcnt[index]);
333 }
334 #endif
335 
336 /*
337  *  Main interrupt dispatch handler. It's called straight
338  *  from the assembler, where CPU interrupt is served.
339  */
340 void
341 intr_irq_handler(struct trapframe *tf)
342 {
343 	struct trapframe * oldframe;
344 	struct thread * td;
345 
346 	KASSERT(irq_root_filter != NULL, ("%s: no filter", __func__));
347 
348 	VM_CNT_INC(v_intr);
349 	critical_enter();
350 	td = curthread;
351 	oldframe = td->td_intr_frame;
352 	td->td_intr_frame = tf;
353 	irq_root_filter(irq_root_arg);
354 	td->td_intr_frame = oldframe;
355 	critical_exit();
356 #ifdef HWPMC_HOOKS
357 	if (pmc_hook && TRAPF_USERMODE(tf) &&
358 	    (PCPU_GET(curthread)->td_pflags & TDP_CALLCHAIN))
359 		pmc_hook(PCPU_GET(curthread), PMC_FN_USER_CALLCHAIN, tf);
360 #endif
361 }
362 
363 int
364 intr_child_irq_handler(struct intr_pic *parent, uintptr_t irq)
365 {
366 	struct intr_pic_child *child;
367 	bool found;
368 
369 	found = false;
370 	mtx_lock_spin(&parent->pic_child_lock);
371 	SLIST_FOREACH(child, &parent->pic_children, pc_next) {
372 		if (child->pc_start <= irq &&
373 		    irq < (child->pc_start + child->pc_length)) {
374 			found = true;
375 			break;
376 		}
377 	}
378 	mtx_unlock_spin(&parent->pic_child_lock);
379 
380 	if (found)
381 		return (child->pc_filter(child->pc_filter_arg, irq));
382 
383 	return (FILTER_STRAY);
384 }
385 
386 /*
387  *  interrupt controller dispatch function for interrupts. It should
388  *  be called straight from the interrupt controller, when associated interrupt
389  *  source is learned.
390  */
391 int
392 intr_isrc_dispatch(struct intr_irqsrc *isrc, struct trapframe *tf)
393 {
394 
395 	KASSERT(isrc != NULL, ("%s: no source", __func__));
396 
397 	isrc_increment_count(isrc);
398 
399 #ifdef INTR_SOLO
400 	if (isrc->isrc_filter != NULL) {
401 		int error;
402 		error = isrc->isrc_filter(isrc->isrc_arg, tf);
403 		PIC_POST_FILTER(isrc->isrc_dev, isrc);
404 		if (error == FILTER_HANDLED)
405 			return (0);
406 	} else
407 #endif
408 	if (isrc->isrc_event != NULL) {
409 		if (intr_event_handle(isrc->isrc_event, tf) == 0)
410 			return (0);
411 	}
412 
413 	isrc_increment_straycount(isrc);
414 	return (EINVAL);
415 }
416 
417 /*
418  *  Alloc unique interrupt number (resource handle) for interrupt source.
419  *
420  *  There could be various strategies how to allocate free interrupt number
421  *  (resource handle) for new interrupt source.
422  *
423  *  1. Handles are always allocated forward, so handles are not recycled
424  *     immediately. However, if only one free handle left which is reused
425  *     constantly...
426  */
427 static inline int
428 isrc_alloc_irq(struct intr_irqsrc *isrc)
429 {
430 	u_int irq;
431 
432 	mtx_assert(&isrc_table_lock, MA_OWNED);
433 
434 	if (irq_next_free >= intr_nirq)
435 		return (ENOSPC);
436 
437 	for (irq = irq_next_free; irq < intr_nirq; irq++) {
438 		if (irq_sources[irq] == NULL)
439 			goto found;
440 	}
441 	for (irq = 0; irq < irq_next_free; irq++) {
442 		if (irq_sources[irq] == NULL)
443 			goto found;
444 	}
445 
446 	irq_next_free = intr_nirq;
447 	return (ENOSPC);
448 
449 found:
450 	isrc->isrc_irq = irq;
451 	irq_sources[irq] = isrc;
452 
453 	irq_next_free = irq + 1;
454 	if (irq_next_free >= intr_nirq)
455 		irq_next_free = 0;
456 	return (0);
457 }
458 
459 /*
460  *  Free unique interrupt number (resource handle) from interrupt source.
461  */
462 static inline int
463 isrc_free_irq(struct intr_irqsrc *isrc)
464 {
465 
466 	mtx_assert(&isrc_table_lock, MA_OWNED);
467 
468 	if (isrc->isrc_irq >= intr_nirq)
469 		return (EINVAL);
470 	if (irq_sources[isrc->isrc_irq] != isrc)
471 		return (EINVAL);
472 
473 	irq_sources[isrc->isrc_irq] = NULL;
474 	isrc->isrc_irq = INTR_IRQ_INVALID;	/* just to be safe */
475 
476 	/*
477 	 * If we are recovering from the state irq_sources table is full,
478 	 * then the following allocation should check the entire table. This
479 	 * will ensure maximum separation of allocation order from release
480 	 * order.
481 	 */
482 	if (irq_next_free >= intr_nirq)
483 		irq_next_free = 0;
484 
485 	return (0);
486 }
487 
488 /*
489  *  Initialize interrupt source and register it into global interrupt table.
490  */
491 int
492 intr_isrc_register(struct intr_irqsrc *isrc, device_t dev, u_int flags,
493     const char *fmt, ...)
494 {
495 	int error;
496 	va_list ap;
497 
498 	bzero(isrc, sizeof(struct intr_irqsrc));
499 	isrc->isrc_dev = dev;
500 	isrc->isrc_irq = INTR_IRQ_INVALID;	/* just to be safe */
501 	isrc->isrc_flags = flags;
502 
503 	va_start(ap, fmt);
504 	vsnprintf(isrc->isrc_name, INTR_ISRC_NAMELEN, fmt, ap);
505 	va_end(ap);
506 
507 	mtx_lock(&isrc_table_lock);
508 	error = isrc_alloc_irq(isrc);
509 	if (error != 0) {
510 		mtx_unlock(&isrc_table_lock);
511 		return (error);
512 	}
513 	/*
514 	 * Setup interrupt counters, but not for IPI sources. Those are setup
515 	 * later and only for used ones (up to INTR_IPI_COUNT) to not exhaust
516 	 * our counter pool.
517 	 */
518 	if ((isrc->isrc_flags & INTR_ISRCF_IPI) == 0)
519 		isrc_setup_counters(isrc);
520 	mtx_unlock(&isrc_table_lock);
521 	return (0);
522 }
523 
524 /*
525  *  Deregister interrupt source from global interrupt table.
526  */
527 int
528 intr_isrc_deregister(struct intr_irqsrc *isrc)
529 {
530 	int error;
531 
532 	mtx_lock(&isrc_table_lock);
533 	if ((isrc->isrc_flags & INTR_ISRCF_IPI) == 0)
534 		isrc_release_counters(isrc);
535 	error = isrc_free_irq(isrc);
536 	mtx_unlock(&isrc_table_lock);
537 	return (error);
538 }
539 
540 #ifdef SMP
541 /*
542  *  A support function for a PIC to decide if provided ISRC should be inited
543  *  on given cpu. The logic of INTR_ISRCF_BOUND flag and isrc_cpu member of
544  *  struct intr_irqsrc is the following:
545  *
546  *     If INTR_ISRCF_BOUND is set, the ISRC should be inited only on cpus
547  *     set in isrc_cpu. If not, the ISRC should be inited on every cpu and
548  *     isrc_cpu is kept consistent with it. Thus isrc_cpu is always correct.
549  */
550 bool
551 intr_isrc_init_on_cpu(struct intr_irqsrc *isrc, u_int cpu)
552 {
553 
554 	if (isrc->isrc_handlers == 0)
555 		return (false);
556 	if ((isrc->isrc_flags & (INTR_ISRCF_PPI | INTR_ISRCF_IPI)) == 0)
557 		return (false);
558 	if (isrc->isrc_flags & INTR_ISRCF_BOUND)
559 		return (CPU_ISSET(cpu, &isrc->isrc_cpu));
560 
561 	CPU_SET(cpu, &isrc->isrc_cpu);
562 	return (true);
563 }
564 #endif
565 
566 #ifdef INTR_SOLO
567 /*
568  *  Setup filter into interrupt source.
569  */
570 static int
571 iscr_setup_filter(struct intr_irqsrc *isrc, const char *name,
572     intr_irq_filter_t *filter, void *arg, void **cookiep)
573 {
574 
575 	if (filter == NULL)
576 		return (EINVAL);
577 
578 	mtx_lock(&isrc_table_lock);
579 	/*
580 	 * Make sure that we do not mix the two ways
581 	 * how we handle interrupt sources.
582 	 */
583 	if (isrc->isrc_filter != NULL || isrc->isrc_event != NULL) {
584 		mtx_unlock(&isrc_table_lock);
585 		return (EBUSY);
586 	}
587 	isrc->isrc_filter = filter;
588 	isrc->isrc_arg = arg;
589 	isrc_update_name(isrc, name);
590 	mtx_unlock(&isrc_table_lock);
591 
592 	*cookiep = isrc;
593 	return (0);
594 }
595 #endif
596 
597 /*
598  *  Interrupt source pre_ithread method for MI interrupt framework.
599  */
600 static void
601 intr_isrc_pre_ithread(void *arg)
602 {
603 	struct intr_irqsrc *isrc = arg;
604 
605 	PIC_PRE_ITHREAD(isrc->isrc_dev, isrc);
606 }
607 
608 /*
609  *  Interrupt source post_ithread method for MI interrupt framework.
610  */
611 static void
612 intr_isrc_post_ithread(void *arg)
613 {
614 	struct intr_irqsrc *isrc = arg;
615 
616 	PIC_POST_ITHREAD(isrc->isrc_dev, isrc);
617 }
618 
619 /*
620  *  Interrupt source post_filter method for MI interrupt framework.
621  */
622 static void
623 intr_isrc_post_filter(void *arg)
624 {
625 	struct intr_irqsrc *isrc = arg;
626 
627 	PIC_POST_FILTER(isrc->isrc_dev, isrc);
628 }
629 
630 /*
631  *  Interrupt source assign_cpu method for MI interrupt framework.
632  */
633 static int
634 intr_isrc_assign_cpu(void *arg, int cpu)
635 {
636 #ifdef SMP
637 	struct intr_irqsrc *isrc = arg;
638 	int error;
639 
640 	mtx_lock(&isrc_table_lock);
641 	if (cpu == NOCPU) {
642 		CPU_ZERO(&isrc->isrc_cpu);
643 		isrc->isrc_flags &= ~INTR_ISRCF_BOUND;
644 	} else {
645 		CPU_SETOF(cpu, &isrc->isrc_cpu);
646 		isrc->isrc_flags |= INTR_ISRCF_BOUND;
647 	}
648 
649 	/*
650 	 * In NOCPU case, it's up to PIC to either leave ISRC on same CPU or
651 	 * re-balance it to another CPU or enable it on more CPUs. However,
652 	 * PIC is expected to change isrc_cpu appropriately to keep us well
653 	 * informed if the call is successful.
654 	 */
655 	if (irq_assign_cpu) {
656 		error = PIC_BIND_INTR(isrc->isrc_dev, isrc);
657 		if (error) {
658 			CPU_ZERO(&isrc->isrc_cpu);
659 			mtx_unlock(&isrc_table_lock);
660 			return (error);
661 		}
662 	}
663 	mtx_unlock(&isrc_table_lock);
664 	return (0);
665 #else
666 	return (EOPNOTSUPP);
667 #endif
668 }
669 
670 /*
671  *  Create interrupt event for interrupt source.
672  */
673 static int
674 isrc_event_create(struct intr_irqsrc *isrc)
675 {
676 	struct intr_event *ie;
677 	int error;
678 
679 	error = intr_event_create(&ie, isrc, 0, isrc->isrc_irq,
680 	    intr_isrc_pre_ithread, intr_isrc_post_ithread, intr_isrc_post_filter,
681 	    intr_isrc_assign_cpu, "%s:", isrc->isrc_name);
682 	if (error)
683 		return (error);
684 
685 	mtx_lock(&isrc_table_lock);
686 	/*
687 	 * Make sure that we do not mix the two ways
688 	 * how we handle interrupt sources. Let contested event wins.
689 	 */
690 #ifdef INTR_SOLO
691 	if (isrc->isrc_filter != NULL || isrc->isrc_event != NULL) {
692 #else
693 	if (isrc->isrc_event != NULL) {
694 #endif
695 		mtx_unlock(&isrc_table_lock);
696 		intr_event_destroy(ie);
697 		return (isrc->isrc_event != NULL ? EBUSY : 0);
698 	}
699 	isrc->isrc_event = ie;
700 	mtx_unlock(&isrc_table_lock);
701 
702 	return (0);
703 }
704 #ifdef notyet
705 /*
706  *  Destroy interrupt event for interrupt source.
707  */
708 static void
709 isrc_event_destroy(struct intr_irqsrc *isrc)
710 {
711 	struct intr_event *ie;
712 
713 	mtx_lock(&isrc_table_lock);
714 	ie = isrc->isrc_event;
715 	isrc->isrc_event = NULL;
716 	mtx_unlock(&isrc_table_lock);
717 
718 	if (ie != NULL)
719 		intr_event_destroy(ie);
720 }
721 #endif
722 /*
723  *  Add handler to interrupt source.
724  */
725 static int
726 isrc_add_handler(struct intr_irqsrc *isrc, const char *name,
727     driver_filter_t filter, driver_intr_t handler, void *arg,
728     enum intr_type flags, void **cookiep)
729 {
730 	int error;
731 
732 	if (isrc->isrc_event == NULL) {
733 		error = isrc_event_create(isrc);
734 		if (error)
735 			return (error);
736 	}
737 
738 	error = intr_event_add_handler(isrc->isrc_event, name, filter, handler,
739 	    arg, intr_priority(flags), flags, cookiep);
740 	if (error == 0) {
741 		mtx_lock(&isrc_table_lock);
742 		intrcnt_updatename(isrc);
743 		mtx_unlock(&isrc_table_lock);
744 	}
745 
746 	return (error);
747 }
748 
749 /*
750  *  Lookup interrupt controller locked.
751  */
752 static inline struct intr_pic *
753 pic_lookup_locked(device_t dev, intptr_t xref, int flags)
754 {
755 	struct intr_pic *pic;
756 
757 	mtx_assert(&pic_list_lock, MA_OWNED);
758 
759 	if (dev == NULL && xref == 0)
760 		return (NULL);
761 
762 	/* Note that pic->pic_dev is never NULL on registered PIC. */
763 	SLIST_FOREACH(pic, &pic_list, pic_next) {
764 		if ((pic->pic_flags & FLAG_TYPE_MASK) !=
765 		    (flags & FLAG_TYPE_MASK))
766 			continue;
767 
768 		if (dev == NULL) {
769 			if (xref == pic->pic_xref)
770 				return (pic);
771 		} else if (xref == 0 || pic->pic_xref == 0) {
772 			if (dev == pic->pic_dev)
773 				return (pic);
774 		} else if (xref == pic->pic_xref && dev == pic->pic_dev)
775 				return (pic);
776 	}
777 	return (NULL);
778 }
779 
780 /*
781  *  Lookup interrupt controller.
782  */
783 static struct intr_pic *
784 pic_lookup(device_t dev, intptr_t xref, int flags)
785 {
786 	struct intr_pic *pic;
787 
788 	mtx_lock(&pic_list_lock);
789 	pic = pic_lookup_locked(dev, xref, flags);
790 	mtx_unlock(&pic_list_lock);
791 	return (pic);
792 }
793 
794 /*
795  *  Create interrupt controller.
796  */
797 static struct intr_pic *
798 pic_create(device_t dev, intptr_t xref, int flags)
799 {
800 	struct intr_pic *pic;
801 
802 	mtx_lock(&pic_list_lock);
803 	pic = pic_lookup_locked(dev, xref, flags);
804 	if (pic != NULL) {
805 		mtx_unlock(&pic_list_lock);
806 		return (pic);
807 	}
808 	pic = malloc(sizeof(*pic), M_INTRNG, M_NOWAIT | M_ZERO);
809 	if (pic == NULL) {
810 		mtx_unlock(&pic_list_lock);
811 		return (NULL);
812 	}
813 	pic->pic_xref = xref;
814 	pic->pic_dev = dev;
815 	pic->pic_flags = flags;
816 	mtx_init(&pic->pic_child_lock, "pic child lock", NULL, MTX_SPIN);
817 	SLIST_INSERT_HEAD(&pic_list, pic, pic_next);
818 	mtx_unlock(&pic_list_lock);
819 
820 	return (pic);
821 }
822 #ifdef notyet
823 /*
824  *  Destroy interrupt controller.
825  */
826 static void
827 pic_destroy(device_t dev, intptr_t xref, int flags)
828 {
829 	struct intr_pic *pic;
830 
831 	mtx_lock(&pic_list_lock);
832 	pic = pic_lookup_locked(dev, xref, flags);
833 	if (pic == NULL) {
834 		mtx_unlock(&pic_list_lock);
835 		return;
836 	}
837 	SLIST_REMOVE(&pic_list, pic, intr_pic, pic_next);
838 	mtx_unlock(&pic_list_lock);
839 
840 	free(pic, M_INTRNG);
841 }
842 #endif
843 /*
844  *  Register interrupt controller.
845  */
846 struct intr_pic *
847 intr_pic_register(device_t dev, intptr_t xref)
848 {
849 	struct intr_pic *pic;
850 
851 	if (dev == NULL)
852 		return (NULL);
853 	pic = pic_create(dev, xref, FLAG_PIC);
854 	if (pic == NULL)
855 		return (NULL);
856 
857 	debugf("PIC %p registered for %s <dev %p, xref %jx>\n", pic,
858 	    device_get_nameunit(dev), dev, (uintmax_t)xref);
859 	return (pic);
860 }
861 
862 /*
863  *  Unregister interrupt controller.
864  */
865 int
866 intr_pic_deregister(device_t dev, intptr_t xref)
867 {
868 
869 	panic("%s: not implemented", __func__);
870 }
871 
872 /*
873  *  Mark interrupt controller (itself) as a root one.
874  *
875  *  Note that only an interrupt controller can really know its position
876  *  in interrupt controller's tree. So root PIC must claim itself as a root.
877  *
878  *  In FDT case, according to ePAPR approved version 1.1 from 08 April 2011,
879  *  page 30:
880  *    "The root of the interrupt tree is determined when traversal
881  *     of the interrupt tree reaches an interrupt controller node without
882  *     an interrupts property and thus no explicit interrupt parent."
883  */
884 int
885 intr_pic_claim_root(device_t dev, intptr_t xref, intr_irq_filter_t *filter,
886     void *arg, u_int ipicount)
887 {
888 	struct intr_pic *pic;
889 
890 	pic = pic_lookup(dev, xref, FLAG_PIC);
891 	if (pic == NULL) {
892 		device_printf(dev, "not registered\n");
893 		return (EINVAL);
894 	}
895 
896 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_PIC,
897 	    ("%s: Found a non-PIC controller: %s", __func__,
898 	     device_get_name(pic->pic_dev)));
899 
900 	if (filter == NULL) {
901 		device_printf(dev, "filter missing\n");
902 		return (EINVAL);
903 	}
904 
905 	/*
906 	 * Only one interrupt controllers could be on the root for now.
907 	 * Note that we further suppose that there is not threaded interrupt
908 	 * routine (handler) on the root. See intr_irq_handler().
909 	 */
910 	if (intr_irq_root_dev != NULL) {
911 		device_printf(dev, "another root already set\n");
912 		return (EBUSY);
913 	}
914 
915 	intr_irq_root_dev = dev;
916 	irq_root_filter = filter;
917 	irq_root_arg = arg;
918 	irq_root_ipicount = ipicount;
919 
920 	debugf("irq root set to %s\n", device_get_nameunit(dev));
921 	return (0);
922 }
923 
924 /*
925  * Add a handler to manage a sub range of a parents interrupts.
926  */
927 int
928 intr_pic_add_handler(device_t parent, struct intr_pic *pic,
929     intr_child_irq_filter_t *filter, void *arg, uintptr_t start,
930     uintptr_t length)
931 {
932 	struct intr_pic *parent_pic;
933 	struct intr_pic_child *newchild;
934 #ifdef INVARIANTS
935 	struct intr_pic_child *child;
936 #endif
937 
938 	/* Find the parent PIC */
939 	parent_pic = pic_lookup(parent, 0, FLAG_PIC);
940 	if (parent_pic == NULL)
941 		return (ENXIO);
942 
943 	newchild = malloc(sizeof(*newchild), M_INTRNG, M_WAITOK | M_ZERO);
944 	newchild->pc_pic = pic;
945 	newchild->pc_filter = filter;
946 	newchild->pc_filter_arg = arg;
947 	newchild->pc_start = start;
948 	newchild->pc_length = length;
949 
950 	mtx_lock_spin(&parent_pic->pic_child_lock);
951 #ifdef INVARIANTS
952 	SLIST_FOREACH(child, &parent_pic->pic_children, pc_next) {
953 		KASSERT(child->pc_pic != pic, ("%s: Adding a child PIC twice",
954 		    __func__));
955 	}
956 #endif
957 	SLIST_INSERT_HEAD(&parent_pic->pic_children, newchild, pc_next);
958 	mtx_unlock_spin(&parent_pic->pic_child_lock);
959 
960 	return (0);
961 }
962 
963 static int
964 intr_resolve_irq(device_t dev, intptr_t xref, struct intr_map_data *data,
965     struct intr_irqsrc **isrc)
966 {
967 	struct intr_pic *pic;
968 	struct intr_map_data_msi *msi;
969 
970 	if (data == NULL)
971 		return (EINVAL);
972 
973 	pic = pic_lookup(dev, xref,
974 	    (data->type == INTR_MAP_DATA_MSI) ? FLAG_MSI : FLAG_PIC);
975 	if (pic == NULL)
976 		return (ESRCH);
977 
978 	switch (data->type) {
979 	case INTR_MAP_DATA_MSI:
980 		KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
981 		    ("%s: Found a non-MSI controller: %s", __func__,
982 		     device_get_name(pic->pic_dev)));
983 		msi = (struct intr_map_data_msi *)data;
984 		*isrc = msi->isrc;
985 		return (0);
986 
987 	default:
988 		KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_PIC,
989 		    ("%s: Found a non-PIC controller: %s", __func__,
990 		     device_get_name(pic->pic_dev)));
991 		return (PIC_MAP_INTR(pic->pic_dev, data, isrc));
992 	}
993 }
994 
995 bool
996 intr_is_per_cpu(struct resource *res)
997 {
998 	u_int res_id;
999 	struct intr_irqsrc *isrc;
1000 
1001 	res_id = (u_int)rman_get_start(res);
1002 	isrc = intr_map_get_isrc(res_id);
1003 
1004 	if (isrc == NULL)
1005 		panic("Attempt to get isrc for non-active resource id: %u\n",
1006 		    res_id);
1007 	return ((isrc->isrc_flags & INTR_ISRCF_PPI) != 0);
1008 }
1009 
1010 int
1011 intr_activate_irq(device_t dev, struct resource *res)
1012 {
1013 	device_t map_dev;
1014 	intptr_t map_xref;
1015 	struct intr_map_data *data;
1016 	struct intr_irqsrc *isrc;
1017 	u_int res_id;
1018 	int error;
1019 
1020 	KASSERT(rman_get_start(res) == rman_get_end(res),
1021 	    ("%s: more interrupts in resource", __func__));
1022 
1023 	res_id = (u_int)rman_get_start(res);
1024 	if (intr_map_get_isrc(res_id) != NULL)
1025 		panic("Attempt to double activation of resource id: %u\n",
1026 		    res_id);
1027 	intr_map_copy_map_data(res_id, &map_dev, &map_xref, &data);
1028 	error = intr_resolve_irq(map_dev, map_xref, data, &isrc);
1029 	if (error != 0) {
1030 		free(data, M_INTRNG);
1031 		/* XXX TODO DISCONECTED PICs */
1032 		/* if (error == EINVAL) return(0); */
1033 		return (error);
1034 	}
1035 	intr_map_set_isrc(res_id, isrc);
1036 	rman_set_virtual(res, data);
1037 	return (PIC_ACTIVATE_INTR(isrc->isrc_dev, isrc, res, data));
1038 }
1039 
1040 int
1041 intr_deactivate_irq(device_t dev, struct resource *res)
1042 {
1043 	struct intr_map_data *data;
1044 	struct intr_irqsrc *isrc;
1045 	u_int res_id;
1046 	int error;
1047 
1048 	KASSERT(rman_get_start(res) == rman_get_end(res),
1049 	    ("%s: more interrupts in resource", __func__));
1050 
1051 	res_id = (u_int)rman_get_start(res);
1052 	isrc = intr_map_get_isrc(res_id);
1053 	if (isrc == NULL)
1054 		panic("Attempt to deactivate non-active resource id: %u\n",
1055 		    res_id);
1056 
1057 	data = rman_get_virtual(res);
1058 	error = PIC_DEACTIVATE_INTR(isrc->isrc_dev, isrc, res, data);
1059 	intr_map_set_isrc(res_id, NULL);
1060 	rman_set_virtual(res, NULL);
1061 	free(data, M_INTRNG);
1062 	return (error);
1063 }
1064 
1065 int
1066 intr_setup_irq(device_t dev, struct resource *res, driver_filter_t filt,
1067     driver_intr_t hand, void *arg, int flags, void **cookiep)
1068 {
1069 	int error;
1070 	struct intr_map_data *data;
1071 	struct intr_irqsrc *isrc;
1072 	const char *name;
1073 	u_int res_id;
1074 
1075 	KASSERT(rman_get_start(res) == rman_get_end(res),
1076 	    ("%s: more interrupts in resource", __func__));
1077 
1078 	res_id = (u_int)rman_get_start(res);
1079 	isrc = intr_map_get_isrc(res_id);
1080 	if (isrc == NULL) {
1081 		/* XXX TODO DISCONECTED PICs */
1082 		return (EINVAL);
1083 	}
1084 
1085 	data = rman_get_virtual(res);
1086 	name = device_get_nameunit(dev);
1087 
1088 #ifdef INTR_SOLO
1089 	/*
1090 	 * Standard handling is done through MI interrupt framework. However,
1091 	 * some interrupts could request solely own special handling. This
1092 	 * non standard handling can be used for interrupt controllers without
1093 	 * handler (filter only), so in case that interrupt controllers are
1094 	 * chained, MI interrupt framework is called only in leaf controller.
1095 	 *
1096 	 * Note that root interrupt controller routine is served as well,
1097 	 * however in intr_irq_handler(), i.e. main system dispatch routine.
1098 	 */
1099 	if (flags & INTR_SOLO && hand != NULL) {
1100 		debugf("irq %u cannot solo on %s\n", irq, name);
1101 		return (EINVAL);
1102 	}
1103 
1104 	if (flags & INTR_SOLO) {
1105 		error = iscr_setup_filter(isrc, name, (intr_irq_filter_t *)filt,
1106 		    arg, cookiep);
1107 		debugf("irq %u setup filter error %d on %s\n", isrc->isrc_irq, error,
1108 		    name);
1109 	} else
1110 #endif
1111 		{
1112 		error = isrc_add_handler(isrc, name, filt, hand, arg, flags,
1113 		    cookiep);
1114 		debugf("irq %u add handler error %d on %s\n", isrc->isrc_irq, error, name);
1115 	}
1116 	if (error != 0)
1117 		return (error);
1118 
1119 	mtx_lock(&isrc_table_lock);
1120 	error = PIC_SETUP_INTR(isrc->isrc_dev, isrc, res, data);
1121 	if (error == 0) {
1122 		isrc->isrc_handlers++;
1123 		if (isrc->isrc_handlers == 1)
1124 			PIC_ENABLE_INTR(isrc->isrc_dev, isrc);
1125 	}
1126 	mtx_unlock(&isrc_table_lock);
1127 	if (error != 0)
1128 		intr_event_remove_handler(*cookiep);
1129 	return (error);
1130 }
1131 
1132 int
1133 intr_teardown_irq(device_t dev, struct resource *res, void *cookie)
1134 {
1135 	int error;
1136 	struct intr_map_data *data;
1137 	struct intr_irqsrc *isrc;
1138 	u_int res_id;
1139 
1140 	KASSERT(rman_get_start(res) == rman_get_end(res),
1141 	    ("%s: more interrupts in resource", __func__));
1142 
1143 	res_id = (u_int)rman_get_start(res);
1144 	isrc = intr_map_get_isrc(res_id);
1145 	if (isrc == NULL || isrc->isrc_handlers == 0)
1146 		return (EINVAL);
1147 
1148 	data = rman_get_virtual(res);
1149 
1150 #ifdef INTR_SOLO
1151 	if (isrc->isrc_filter != NULL) {
1152 		if (isrc != cookie)
1153 			return (EINVAL);
1154 
1155 		mtx_lock(&isrc_table_lock);
1156 		isrc->isrc_filter = NULL;
1157 		isrc->isrc_arg = NULL;
1158 		isrc->isrc_handlers = 0;
1159 		PIC_DISABLE_INTR(isrc->isrc_dev, isrc);
1160 		PIC_TEARDOWN_INTR(isrc->isrc_dev, isrc, res, data);
1161 		isrc_update_name(isrc, NULL);
1162 		mtx_unlock(&isrc_table_lock);
1163 		return (0);
1164 	}
1165 #endif
1166 	if (isrc != intr_handler_source(cookie))
1167 		return (EINVAL);
1168 
1169 	error = intr_event_remove_handler(cookie);
1170 	if (error == 0) {
1171 		mtx_lock(&isrc_table_lock);
1172 		isrc->isrc_handlers--;
1173 		if (isrc->isrc_handlers == 0)
1174 			PIC_DISABLE_INTR(isrc->isrc_dev, isrc);
1175 		PIC_TEARDOWN_INTR(isrc->isrc_dev, isrc, res, data);
1176 		intrcnt_updatename(isrc);
1177 		mtx_unlock(&isrc_table_lock);
1178 	}
1179 	return (error);
1180 }
1181 
1182 int
1183 intr_describe_irq(device_t dev, struct resource *res, void *cookie,
1184     const char *descr)
1185 {
1186 	int error;
1187 	struct intr_irqsrc *isrc;
1188 	u_int res_id;
1189 
1190 	KASSERT(rman_get_start(res) == rman_get_end(res),
1191 	    ("%s: more interrupts in resource", __func__));
1192 
1193 	res_id = (u_int)rman_get_start(res);
1194 	isrc = intr_map_get_isrc(res_id);
1195 	if (isrc == NULL || isrc->isrc_handlers == 0)
1196 		return (EINVAL);
1197 #ifdef INTR_SOLO
1198 	if (isrc->isrc_filter != NULL) {
1199 		if (isrc != cookie)
1200 			return (EINVAL);
1201 
1202 		mtx_lock(&isrc_table_lock);
1203 		isrc_update_name(isrc, descr);
1204 		mtx_unlock(&isrc_table_lock);
1205 		return (0);
1206 	}
1207 #endif
1208 	error = intr_event_describe_handler(isrc->isrc_event, cookie, descr);
1209 	if (error == 0) {
1210 		mtx_lock(&isrc_table_lock);
1211 		intrcnt_updatename(isrc);
1212 		mtx_unlock(&isrc_table_lock);
1213 	}
1214 	return (error);
1215 }
1216 
1217 #ifdef SMP
1218 int
1219 intr_bind_irq(device_t dev, struct resource *res, int cpu)
1220 {
1221 	struct intr_irqsrc *isrc;
1222 	u_int res_id;
1223 
1224 	KASSERT(rman_get_start(res) == rman_get_end(res),
1225 	    ("%s: more interrupts in resource", __func__));
1226 
1227 	res_id = (u_int)rman_get_start(res);
1228 	isrc = intr_map_get_isrc(res_id);
1229 	if (isrc == NULL || isrc->isrc_handlers == 0)
1230 		return (EINVAL);
1231 #ifdef INTR_SOLO
1232 	if (isrc->isrc_filter != NULL)
1233 		return (intr_isrc_assign_cpu(isrc, cpu));
1234 #endif
1235 	return (intr_event_bind(isrc->isrc_event, cpu));
1236 }
1237 
1238 /*
1239  * Return the CPU that the next interrupt source should use.
1240  * For now just returns the next CPU according to round-robin.
1241  */
1242 u_int
1243 intr_irq_next_cpu(u_int last_cpu, cpuset_t *cpumask)
1244 {
1245 	u_int cpu;
1246 
1247 	KASSERT(!CPU_EMPTY(cpumask), ("%s: Empty CPU mask", __func__));
1248 	if (!irq_assign_cpu || mp_ncpus == 1) {
1249 		cpu = PCPU_GET(cpuid);
1250 
1251 		if (CPU_ISSET(cpu, cpumask))
1252 			return (curcpu);
1253 
1254 		return (CPU_FFS(cpumask) - 1);
1255 	}
1256 
1257 	do {
1258 		last_cpu++;
1259 		if (last_cpu > mp_maxid)
1260 			last_cpu = 0;
1261 	} while (!CPU_ISSET(last_cpu, cpumask));
1262 	return (last_cpu);
1263 }
1264 
1265 #ifndef EARLY_AP_STARTUP
1266 /*
1267  *  Distribute all the interrupt sources among the available
1268  *  CPUs once the AP's have been launched.
1269  */
1270 static void
1271 intr_irq_shuffle(void *arg __unused)
1272 {
1273 	struct intr_irqsrc *isrc;
1274 	u_int i;
1275 
1276 	if (mp_ncpus == 1)
1277 		return;
1278 
1279 	mtx_lock(&isrc_table_lock);
1280 	irq_assign_cpu = true;
1281 	for (i = 0; i < intr_nirq; i++) {
1282 		isrc = irq_sources[i];
1283 		if (isrc == NULL || isrc->isrc_handlers == 0 ||
1284 		    isrc->isrc_flags & (INTR_ISRCF_PPI | INTR_ISRCF_IPI))
1285 			continue;
1286 
1287 		if (isrc->isrc_event != NULL &&
1288 		    isrc->isrc_flags & INTR_ISRCF_BOUND &&
1289 		    isrc->isrc_event->ie_cpu != CPU_FFS(&isrc->isrc_cpu) - 1)
1290 			panic("%s: CPU inconsistency", __func__);
1291 
1292 		if ((isrc->isrc_flags & INTR_ISRCF_BOUND) == 0)
1293 			CPU_ZERO(&isrc->isrc_cpu); /* start again */
1294 
1295 		/*
1296 		 * We are in wicked position here if the following call fails
1297 		 * for bound ISRC. The best thing we can do is to clear
1298 		 * isrc_cpu so inconsistency with ie_cpu will be detectable.
1299 		 */
1300 		if (PIC_BIND_INTR(isrc->isrc_dev, isrc) != 0)
1301 			CPU_ZERO(&isrc->isrc_cpu);
1302 	}
1303 	mtx_unlock(&isrc_table_lock);
1304 }
1305 SYSINIT(intr_irq_shuffle, SI_SUB_SMP, SI_ORDER_SECOND, intr_irq_shuffle, NULL);
1306 #endif /* !EARLY_AP_STARTUP */
1307 
1308 #else
1309 u_int
1310 intr_irq_next_cpu(u_int current_cpu, cpuset_t *cpumask)
1311 {
1312 
1313 	return (PCPU_GET(cpuid));
1314 }
1315 #endif /* SMP */
1316 
1317 /*
1318  * Allocate memory for new intr_map_data structure.
1319  * Initialize common fields.
1320  */
1321 struct intr_map_data *
1322 intr_alloc_map_data(enum intr_map_data_type type, size_t len, int flags)
1323 {
1324 	struct intr_map_data *data;
1325 
1326 	data = malloc(len, M_INTRNG, flags);
1327 	data->type = type;
1328 	data->len = len;
1329 	return (data);
1330 }
1331 
1332 void intr_free_intr_map_data(struct intr_map_data *data)
1333 {
1334 
1335 	free(data, M_INTRNG);
1336 }
1337 
1338 /*
1339  *  Register a MSI/MSI-X interrupt controller
1340  */
1341 int
1342 intr_msi_register(device_t dev, intptr_t xref)
1343 {
1344 	struct intr_pic *pic;
1345 
1346 	if (dev == NULL)
1347 		return (EINVAL);
1348 	pic = pic_create(dev, xref, FLAG_MSI);
1349 	if (pic == NULL)
1350 		return (ENOMEM);
1351 
1352 	debugf("PIC %p registered for %s <dev %p, xref %jx>\n", pic,
1353 	    device_get_nameunit(dev), dev, (uintmax_t)xref);
1354 	return (0);
1355 }
1356 
1357 int
1358 intr_alloc_msi(device_t pci, device_t child, intptr_t xref, int count,
1359     int maxcount, int *irqs)
1360 {
1361 	struct iommu_domain *domain;
1362 	struct intr_irqsrc **isrc;
1363 	struct intr_pic *pic;
1364 	device_t pdev;
1365 	struct intr_map_data_msi *msi;
1366 	int err, i;
1367 
1368 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1369 	if (pic == NULL)
1370 		return (ESRCH);
1371 
1372 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1373 	    ("%s: Found a non-MSI controller: %s", __func__,
1374 	     device_get_name(pic->pic_dev)));
1375 
1376 	/*
1377 	 * If this is the first time we have used this context ask the
1378 	 * interrupt controller to map memory the msi source will need.
1379 	 */
1380 	err = MSI_IOMMU_INIT(pic->pic_dev, child, &domain);
1381 	if (err != 0)
1382 		return (err);
1383 
1384 	isrc = malloc(sizeof(*isrc) * count, M_INTRNG, M_WAITOK);
1385 	err = MSI_ALLOC_MSI(pic->pic_dev, child, count, maxcount, &pdev, isrc);
1386 	if (err != 0) {
1387 		free(isrc, M_INTRNG);
1388 		return (err);
1389 	}
1390 
1391 	for (i = 0; i < count; i++) {
1392 		isrc[i]->isrc_iommu = domain;
1393 		msi = (struct intr_map_data_msi *)intr_alloc_map_data(
1394 		    INTR_MAP_DATA_MSI, sizeof(*msi), M_WAITOK | M_ZERO);
1395 		msi-> isrc = isrc[i];
1396 
1397 		irqs[i] = intr_map_irq(pic->pic_dev, xref,
1398 		    (struct intr_map_data *)msi);
1399 	}
1400 	free(isrc, M_INTRNG);
1401 
1402 	return (err);
1403 }
1404 
1405 int
1406 intr_release_msi(device_t pci, device_t child, intptr_t xref, int count,
1407     int *irqs)
1408 {
1409 	struct intr_irqsrc **isrc;
1410 	struct intr_pic *pic;
1411 	struct intr_map_data_msi *msi;
1412 	int i, err;
1413 
1414 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1415 	if (pic == NULL)
1416 		return (ESRCH);
1417 
1418 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1419 	    ("%s: Found a non-MSI controller: %s", __func__,
1420 	     device_get_name(pic->pic_dev)));
1421 
1422 	isrc = malloc(sizeof(*isrc) * count, M_INTRNG, M_WAITOK);
1423 
1424 	for (i = 0; i < count; i++) {
1425 		msi = (struct intr_map_data_msi *)
1426 		    intr_map_get_map_data(irqs[i]);
1427 		KASSERT(msi->hdr.type == INTR_MAP_DATA_MSI,
1428 		    ("%s: irq %d map data is not MSI", __func__,
1429 		    irqs[i]));
1430 		isrc[i] = msi->isrc;
1431 	}
1432 
1433 	MSI_IOMMU_DEINIT(pic->pic_dev, child);
1434 
1435 	err = MSI_RELEASE_MSI(pic->pic_dev, child, count, isrc);
1436 
1437 	for (i = 0; i < count; i++) {
1438 		if (isrc[i] != NULL)
1439 			intr_unmap_irq(irqs[i]);
1440 	}
1441 
1442 	free(isrc, M_INTRNG);
1443 	return (err);
1444 }
1445 
1446 int
1447 intr_alloc_msix(device_t pci, device_t child, intptr_t xref, int *irq)
1448 {
1449 	struct iommu_domain *domain;
1450 	struct intr_irqsrc *isrc;
1451 	struct intr_pic *pic;
1452 	device_t pdev;
1453 	struct intr_map_data_msi *msi;
1454 	int err;
1455 
1456 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1457 	if (pic == NULL)
1458 		return (ESRCH);
1459 
1460 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1461 	    ("%s: Found a non-MSI controller: %s", __func__,
1462 	     device_get_name(pic->pic_dev)));
1463 
1464 	/*
1465 	 * If this is the first time we have used this context ask the
1466 	 * interrupt controller to map memory the msi source will need.
1467 	 */
1468 	err = MSI_IOMMU_INIT(pic->pic_dev, child, &domain);
1469 	if (err != 0)
1470 		return (err);
1471 
1472 	err = MSI_ALLOC_MSIX(pic->pic_dev, child, &pdev, &isrc);
1473 	if (err != 0)
1474 		return (err);
1475 
1476 	isrc->isrc_iommu = domain;
1477 	msi = (struct intr_map_data_msi *)intr_alloc_map_data(
1478 		    INTR_MAP_DATA_MSI, sizeof(*msi), M_WAITOK | M_ZERO);
1479 	msi->isrc = isrc;
1480 	*irq = intr_map_irq(pic->pic_dev, xref, (struct intr_map_data *)msi);
1481 	return (0);
1482 }
1483 
1484 int
1485 intr_release_msix(device_t pci, device_t child, intptr_t xref, int irq)
1486 {
1487 	struct intr_irqsrc *isrc;
1488 	struct intr_pic *pic;
1489 	struct intr_map_data_msi *msi;
1490 	int err;
1491 
1492 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1493 	if (pic == NULL)
1494 		return (ESRCH);
1495 
1496 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1497 	    ("%s: Found a non-MSI controller: %s", __func__,
1498 	     device_get_name(pic->pic_dev)));
1499 
1500 	msi = (struct intr_map_data_msi *)
1501 	    intr_map_get_map_data(irq);
1502 	KASSERT(msi->hdr.type == INTR_MAP_DATA_MSI,
1503 	    ("%s: irq %d map data is not MSI", __func__,
1504 	    irq));
1505 	isrc = msi->isrc;
1506 	if (isrc == NULL) {
1507 		intr_unmap_irq(irq);
1508 		return (EINVAL);
1509 	}
1510 
1511 	MSI_IOMMU_DEINIT(pic->pic_dev, child);
1512 
1513 	err = MSI_RELEASE_MSIX(pic->pic_dev, child, isrc);
1514 	intr_unmap_irq(irq);
1515 
1516 	return (err);
1517 }
1518 
1519 int
1520 intr_map_msi(device_t pci, device_t child, intptr_t xref, int irq,
1521     uint64_t *addr, uint32_t *data)
1522 {
1523 	struct intr_irqsrc *isrc;
1524 	struct intr_pic *pic;
1525 	int err;
1526 
1527 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1528 	if (pic == NULL)
1529 		return (ESRCH);
1530 
1531 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1532 	    ("%s: Found a non-MSI controller: %s", __func__,
1533 	     device_get_name(pic->pic_dev)));
1534 
1535 	isrc = intr_map_get_isrc(irq);
1536 	if (isrc == NULL)
1537 		return (EINVAL);
1538 
1539 	err = MSI_MAP_MSI(pic->pic_dev, child, isrc, addr, data);
1540 
1541 #ifdef IOMMU
1542 	if (isrc->isrc_iommu != NULL)
1543 		iommu_translate_msi(isrc->isrc_iommu, addr);
1544 #endif
1545 
1546 	return (err);
1547 }
1548 
1549 void dosoftints(void);
1550 void
1551 dosoftints(void)
1552 {
1553 }
1554 
1555 #ifdef SMP
1556 /*
1557  *  Init interrupt controller on another CPU.
1558  */
1559 void
1560 intr_pic_init_secondary(void)
1561 {
1562 
1563 	/*
1564 	 * QQQ: Only root PIC is aware of other CPUs ???
1565 	 */
1566 	KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__));
1567 
1568 	//mtx_lock(&isrc_table_lock);
1569 	PIC_INIT_SECONDARY(intr_irq_root_dev);
1570 	//mtx_unlock(&isrc_table_lock);
1571 }
1572 #endif
1573 
1574 #ifdef DDB
1575 DB_SHOW_COMMAND_FLAGS(irqs, db_show_irqs, DB_CMD_MEMSAFE)
1576 {
1577 	u_int i, irqsum;
1578 	u_long num;
1579 	struct intr_irqsrc *isrc;
1580 
1581 	for (irqsum = 0, i = 0; i < intr_nirq; i++) {
1582 		isrc = irq_sources[i];
1583 		if (isrc == NULL)
1584 			continue;
1585 
1586 		num = isrc->isrc_count != NULL ? isrc->isrc_count[0] : 0;
1587 		db_printf("irq%-3u <%s>: cpu %02lx%s cnt %lu\n", i,
1588 		    isrc->isrc_name, isrc->isrc_cpu.__bits[0],
1589 		    isrc->isrc_flags & INTR_ISRCF_BOUND ? " (bound)" : "", num);
1590 		irqsum += num;
1591 	}
1592 	db_printf("irq total %u\n", irqsum);
1593 }
1594 #endif
1595 
1596 /*
1597  * Interrupt mapping table functions.
1598  *
1599  * Please, keep this part separately, it can be transformed to
1600  * extension of standard resources.
1601  */
1602 struct intr_map_entry
1603 {
1604 	device_t 		dev;
1605 	intptr_t 		xref;
1606 	struct intr_map_data 	*map_data;
1607 	struct intr_irqsrc 	*isrc;
1608 	/* XXX TODO DISCONECTED PICs */
1609 	/*int			flags */
1610 };
1611 
1612 /* XXX Convert irq_map[] to dynamicaly expandable one. */
1613 static struct intr_map_entry **irq_map;
1614 static u_int irq_map_count;
1615 static u_int irq_map_first_free_idx;
1616 static struct mtx irq_map_lock;
1617 
1618 static struct intr_irqsrc *
1619 intr_map_get_isrc(u_int res_id)
1620 {
1621 	struct intr_irqsrc *isrc;
1622 
1623 	isrc = NULL;
1624 	mtx_lock(&irq_map_lock);
1625 	if (res_id < irq_map_count && irq_map[res_id] != NULL)
1626 		isrc = irq_map[res_id]->isrc;
1627 	mtx_unlock(&irq_map_lock);
1628 
1629 	return (isrc);
1630 }
1631 
1632 static void
1633 intr_map_set_isrc(u_int res_id, struct intr_irqsrc *isrc)
1634 {
1635 
1636 	mtx_lock(&irq_map_lock);
1637 	if (res_id < irq_map_count && irq_map[res_id] != NULL)
1638 		irq_map[res_id]->isrc = isrc;
1639 	mtx_unlock(&irq_map_lock);
1640 }
1641 
1642 /*
1643  * Get a copy of intr_map_entry data
1644  */
1645 static struct intr_map_data *
1646 intr_map_get_map_data(u_int res_id)
1647 {
1648 	struct intr_map_data *data;
1649 
1650 	data = NULL;
1651 	mtx_lock(&irq_map_lock);
1652 	if (res_id >= irq_map_count || irq_map[res_id] == NULL)
1653 		panic("Attempt to copy invalid resource id: %u\n", res_id);
1654 	data = irq_map[res_id]->map_data;
1655 	mtx_unlock(&irq_map_lock);
1656 
1657 	return (data);
1658 }
1659 
1660 /*
1661  * Get a copy of intr_map_entry data
1662  */
1663 static void
1664 intr_map_copy_map_data(u_int res_id, device_t *map_dev, intptr_t *map_xref,
1665     struct intr_map_data **data)
1666 {
1667 	size_t len;
1668 
1669 	len = 0;
1670 	mtx_lock(&irq_map_lock);
1671 	if (res_id >= irq_map_count || irq_map[res_id] == NULL)
1672 		panic("Attempt to copy invalid resource id: %u\n", res_id);
1673 	if (irq_map[res_id]->map_data != NULL)
1674 		len = irq_map[res_id]->map_data->len;
1675 	mtx_unlock(&irq_map_lock);
1676 
1677 	if (len == 0)
1678 		*data = NULL;
1679 	else
1680 		*data = malloc(len, M_INTRNG, M_WAITOK | M_ZERO);
1681 	mtx_lock(&irq_map_lock);
1682 	if (irq_map[res_id] == NULL)
1683 		panic("Attempt to copy invalid resource id: %u\n", res_id);
1684 	if (len != 0) {
1685 		if (len != irq_map[res_id]->map_data->len)
1686 			panic("Resource id: %u has changed.\n", res_id);
1687 		memcpy(*data, irq_map[res_id]->map_data, len);
1688 	}
1689 	*map_dev = irq_map[res_id]->dev;
1690 	*map_xref = irq_map[res_id]->xref;
1691 	mtx_unlock(&irq_map_lock);
1692 }
1693 
1694 /*
1695  * Allocate and fill new entry in irq_map table.
1696  */
1697 u_int
1698 intr_map_irq(device_t dev, intptr_t xref, struct intr_map_data *data)
1699 {
1700 	u_int i;
1701 	struct intr_map_entry *entry;
1702 
1703 	/* Prepare new entry first. */
1704 	entry = malloc(sizeof(*entry), M_INTRNG, M_WAITOK | M_ZERO);
1705 
1706 	entry->dev = dev;
1707 	entry->xref = xref;
1708 	entry->map_data = data;
1709 	entry->isrc = NULL;
1710 
1711 	mtx_lock(&irq_map_lock);
1712 	for (i = irq_map_first_free_idx; i < irq_map_count; i++) {
1713 		if (irq_map[i] == NULL) {
1714 			irq_map[i] = entry;
1715 			irq_map_first_free_idx = i + 1;
1716 			mtx_unlock(&irq_map_lock);
1717 			return (i);
1718 		}
1719 	}
1720 	mtx_unlock(&irq_map_lock);
1721 
1722 	/* XXX Expand irq_map table */
1723 	panic("IRQ mapping table is full.");
1724 }
1725 
1726 /*
1727  * Remove and free mapping entry.
1728  */
1729 void
1730 intr_unmap_irq(u_int res_id)
1731 {
1732 	struct intr_map_entry *entry;
1733 
1734 	mtx_lock(&irq_map_lock);
1735 	if ((res_id >= irq_map_count) || (irq_map[res_id] == NULL))
1736 		panic("Attempt to unmap invalid resource id: %u\n", res_id);
1737 	entry = irq_map[res_id];
1738 	irq_map[res_id] = NULL;
1739 	irq_map_first_free_idx = res_id;
1740 	mtx_unlock(&irq_map_lock);
1741 	intr_free_intr_map_data(entry->map_data);
1742 	free(entry, M_INTRNG);
1743 }
1744 
1745 /*
1746  * Clone mapping entry.
1747  */
1748 u_int
1749 intr_map_clone_irq(u_int old_res_id)
1750 {
1751 	device_t map_dev;
1752 	intptr_t map_xref;
1753 	struct intr_map_data *data;
1754 
1755 	intr_map_copy_map_data(old_res_id, &map_dev, &map_xref, &data);
1756 	return (intr_map_irq(map_dev, map_xref, data));
1757 }
1758 
1759 static void
1760 intr_map_init(void *dummy __unused)
1761 {
1762 
1763 	mtx_init(&irq_map_lock, "intr map table", NULL, MTX_DEF);
1764 
1765 	irq_map_count = 2 * intr_nirq;
1766 	irq_map = mallocarray(irq_map_count, sizeof(struct intr_map_entry*),
1767 	    M_INTRNG, M_WAITOK | M_ZERO);
1768 }
1769 SYSINIT(intr_map_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_map_init, NULL);
1770