xref: /freebsd/sys/kern/subr_intr.c (revision b4af4f93c682e445bf159f0d1ec90b636296c946)
1 /*-
2  * Copyright (c) 2015-2016 Svatopluk Kraus
3  * Copyright (c) 2015-2016 Michal Meloun
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 /*
32  *	New-style Interrupt Framework
33  *
34  *  TODO: - add support for disconnected PICs.
35  *        - to support IPI (PPI) enabling on other CPUs if already started.
36  *        - to complete things for removable PICs.
37  */
38 
39 #include "opt_ddb.h"
40 #include "opt_hwpmc_hooks.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/lock.h>
46 #include <sys/mutex.h>
47 #include <sys/syslog.h>
48 #include <sys/malloc.h>
49 #include <sys/proc.h>
50 #include <sys/queue.h>
51 #include <sys/bus.h>
52 #include <sys/interrupt.h>
53 #include <sys/conf.h>
54 #include <sys/cpuset.h>
55 #include <sys/rman.h>
56 #include <sys/sched.h>
57 #include <sys/smp.h>
58 #include <sys/vmmeter.h>
59 #ifdef HWPMC_HOOKS
60 #include <sys/pmckern.h>
61 #endif
62 
63 #include <machine/atomic.h>
64 #include <machine/intr.h>
65 #include <machine/cpu.h>
66 #include <machine/smp.h>
67 #include <machine/stdarg.h>
68 
69 #ifdef DDB
70 #include <ddb/ddb.h>
71 #endif
72 
73 #include "pic_if.h"
74 #include "msi_if.h"
75 
76 #define	INTRNAME_LEN	(2*MAXCOMLEN + 1)
77 
78 #ifdef DEBUG
79 #define debugf(fmt, args...) do { printf("%s(): ", __func__);	\
80     printf(fmt,##args); } while (0)
81 #else
82 #define debugf(fmt, args...)
83 #endif
84 
85 MALLOC_DECLARE(M_INTRNG);
86 MALLOC_DEFINE(M_INTRNG, "intr", "intr interrupt handling");
87 
88 /* Main interrupt handler called from assembler -> 'hidden' for C code. */
89 void intr_irq_handler(struct trapframe *tf);
90 
91 /* Root interrupt controller stuff. */
92 device_t intr_irq_root_dev;
93 static intr_irq_filter_t *irq_root_filter;
94 static void *irq_root_arg;
95 static u_int irq_root_ipicount;
96 
97 struct intr_pic_child {
98 	SLIST_ENTRY(intr_pic_child)	 pc_next;
99 	struct intr_pic			*pc_pic;
100 	intr_child_irq_filter_t		*pc_filter;
101 	void				*pc_filter_arg;
102 	uintptr_t			 pc_start;
103 	uintptr_t			 pc_length;
104 };
105 
106 /* Interrupt controller definition. */
107 struct intr_pic {
108 	SLIST_ENTRY(intr_pic)	pic_next;
109 	intptr_t		pic_xref;	/* hardware identification */
110 	device_t		pic_dev;
111 /* Only one of FLAG_PIC or FLAG_MSI may be set */
112 #define	FLAG_PIC	(1 << 0)
113 #define	FLAG_MSI	(1 << 1)
114 #define	FLAG_TYPE_MASK	(FLAG_PIC | FLAG_MSI)
115 	u_int			pic_flags;
116 	struct mtx		pic_child_lock;
117 	SLIST_HEAD(, intr_pic_child) pic_children;
118 };
119 
120 static struct mtx pic_list_lock;
121 static SLIST_HEAD(, intr_pic) pic_list;
122 
123 static struct intr_pic *pic_lookup(device_t dev, intptr_t xref, int flags);
124 
125 /* Interrupt source definition. */
126 static struct mtx isrc_table_lock;
127 static struct intr_irqsrc *irq_sources[NIRQ];
128 u_int irq_next_free;
129 
130 #ifdef SMP
131 #ifdef EARLY_AP_STARTUP
132 static bool irq_assign_cpu = true;
133 #else
134 static bool irq_assign_cpu = false;
135 #endif
136 #endif
137 
138 /*
139  * - 2 counters for each I/O interrupt.
140  * - MAXCPU counters for each IPI counters for SMP.
141  */
142 #ifdef SMP
143 #define INTRCNT_COUNT   (NIRQ * 2 + INTR_IPI_COUNT * MAXCPU)
144 #else
145 #define INTRCNT_COUNT   (NIRQ * 2)
146 #endif
147 
148 /* Data for MI statistics reporting. */
149 u_long intrcnt[INTRCNT_COUNT];
150 char intrnames[INTRCNT_COUNT * INTRNAME_LEN];
151 size_t sintrcnt = sizeof(intrcnt);
152 size_t sintrnames = sizeof(intrnames);
153 static u_int intrcnt_index;
154 
155 static struct intr_irqsrc *intr_map_get_isrc(u_int res_id);
156 static void intr_map_set_isrc(u_int res_id, struct intr_irqsrc *isrc);
157 static struct intr_map_data * intr_map_get_map_data(u_int res_id);
158 static void intr_map_copy_map_data(u_int res_id, device_t *dev, intptr_t *xref,
159     struct intr_map_data **data);
160 
161 /*
162  *  Interrupt framework initialization routine.
163  */
164 static void
165 intr_irq_init(void *dummy __unused)
166 {
167 
168 	SLIST_INIT(&pic_list);
169 	mtx_init(&pic_list_lock, "intr pic list", NULL, MTX_DEF);
170 
171 	mtx_init(&isrc_table_lock, "intr isrc table", NULL, MTX_DEF);
172 }
173 SYSINIT(intr_irq_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_irq_init, NULL);
174 
175 static void
176 intrcnt_setname(const char *name, int index)
177 {
178 
179 	snprintf(intrnames + INTRNAME_LEN * index, INTRNAME_LEN, "%-*s",
180 	    INTRNAME_LEN - 1, name);
181 }
182 
183 /*
184  *  Update name for interrupt source with interrupt event.
185  */
186 static void
187 intrcnt_updatename(struct intr_irqsrc *isrc)
188 {
189 
190 	/* QQQ: What about stray counter name? */
191 	mtx_assert(&isrc_table_lock, MA_OWNED);
192 	intrcnt_setname(isrc->isrc_event->ie_fullname, isrc->isrc_index);
193 }
194 
195 /*
196  *  Virtualization for interrupt source interrupt counter increment.
197  */
198 static inline void
199 isrc_increment_count(struct intr_irqsrc *isrc)
200 {
201 
202 	if (isrc->isrc_flags & INTR_ISRCF_PPI)
203 		atomic_add_long(&isrc->isrc_count[0], 1);
204 	else
205 		isrc->isrc_count[0]++;
206 }
207 
208 /*
209  *  Virtualization for interrupt source interrupt stray counter increment.
210  */
211 static inline void
212 isrc_increment_straycount(struct intr_irqsrc *isrc)
213 {
214 
215 	isrc->isrc_count[1]++;
216 }
217 
218 /*
219  *  Virtualization for interrupt source interrupt name update.
220  */
221 static void
222 isrc_update_name(struct intr_irqsrc *isrc, const char *name)
223 {
224 	char str[INTRNAME_LEN];
225 
226 	mtx_assert(&isrc_table_lock, MA_OWNED);
227 
228 	if (name != NULL) {
229 		snprintf(str, INTRNAME_LEN, "%s: %s", isrc->isrc_name, name);
230 		intrcnt_setname(str, isrc->isrc_index);
231 		snprintf(str, INTRNAME_LEN, "stray %s: %s", isrc->isrc_name,
232 		    name);
233 		intrcnt_setname(str, isrc->isrc_index + 1);
234 	} else {
235 		snprintf(str, INTRNAME_LEN, "%s:", isrc->isrc_name);
236 		intrcnt_setname(str, isrc->isrc_index);
237 		snprintf(str, INTRNAME_LEN, "stray %s:", isrc->isrc_name);
238 		intrcnt_setname(str, isrc->isrc_index + 1);
239 	}
240 }
241 
242 /*
243  *  Virtualization for interrupt source interrupt counters setup.
244  */
245 static void
246 isrc_setup_counters(struct intr_irqsrc *isrc)
247 {
248 	u_int index;
249 
250 	/*
251 	 *  XXX - it does not work well with removable controllers and
252 	 *        interrupt sources !!!
253 	 */
254 	index = atomic_fetchadd_int(&intrcnt_index, 2);
255 	isrc->isrc_index = index;
256 	isrc->isrc_count = &intrcnt[index];
257 	isrc_update_name(isrc, NULL);
258 }
259 
260 /*
261  *  Virtualization for interrupt source interrupt counters release.
262  */
263 static void
264 isrc_release_counters(struct intr_irqsrc *isrc)
265 {
266 
267 	panic("%s: not implemented", __func__);
268 }
269 
270 #ifdef SMP
271 /*
272  *  Virtualization for interrupt source IPI counters setup.
273  */
274 u_long *
275 intr_ipi_setup_counters(const char *name)
276 {
277 	u_int index, i;
278 	char str[INTRNAME_LEN];
279 
280 	index = atomic_fetchadd_int(&intrcnt_index, MAXCPU);
281 	for (i = 0; i < MAXCPU; i++) {
282 		snprintf(str, INTRNAME_LEN, "cpu%d:%s", i, name);
283 		intrcnt_setname(str, index + i);
284 	}
285 	return (&intrcnt[index]);
286 }
287 #endif
288 
289 /*
290  *  Main interrupt dispatch handler. It's called straight
291  *  from the assembler, where CPU interrupt is served.
292  */
293 void
294 intr_irq_handler(struct trapframe *tf)
295 {
296 	struct trapframe * oldframe;
297 	struct thread * td;
298 
299 	KASSERT(irq_root_filter != NULL, ("%s: no filter", __func__));
300 
301 	VM_CNT_INC(v_intr);
302 	critical_enter();
303 	td = curthread;
304 	oldframe = td->td_intr_frame;
305 	td->td_intr_frame = tf;
306 	irq_root_filter(irq_root_arg);
307 	td->td_intr_frame = oldframe;
308 	critical_exit();
309 #ifdef HWPMC_HOOKS
310 	if (pmc_hook && TRAPF_USERMODE(tf) &&
311 	    (PCPU_GET(curthread)->td_pflags & TDP_CALLCHAIN))
312 		pmc_hook(PCPU_GET(curthread), PMC_FN_USER_CALLCHAIN, tf);
313 #endif
314 }
315 
316 int
317 intr_child_irq_handler(struct intr_pic *parent, uintptr_t irq)
318 {
319 	struct intr_pic_child *child;
320 	bool found;
321 
322 	found = false;
323 	mtx_lock_spin(&parent->pic_child_lock);
324 	SLIST_FOREACH(child, &parent->pic_children, pc_next) {
325 		if (child->pc_start <= irq &&
326 		    irq < (child->pc_start + child->pc_length)) {
327 			found = true;
328 			break;
329 		}
330 	}
331 	mtx_unlock_spin(&parent->pic_child_lock);
332 
333 	if (found)
334 		return (child->pc_filter(child->pc_filter_arg, irq));
335 
336 	return (FILTER_STRAY);
337 }
338 
339 /*
340  *  interrupt controller dispatch function for interrupts. It should
341  *  be called straight from the interrupt controller, when associated interrupt
342  *  source is learned.
343  */
344 int
345 intr_isrc_dispatch(struct intr_irqsrc *isrc, struct trapframe *tf)
346 {
347 
348 	KASSERT(isrc != NULL, ("%s: no source", __func__));
349 
350 	isrc_increment_count(isrc);
351 
352 #ifdef INTR_SOLO
353 	if (isrc->isrc_filter != NULL) {
354 		int error;
355 		error = isrc->isrc_filter(isrc->isrc_arg, tf);
356 		PIC_POST_FILTER(isrc->isrc_dev, isrc);
357 		if (error == FILTER_HANDLED)
358 			return (0);
359 	} else
360 #endif
361 	if (isrc->isrc_event != NULL) {
362 		if (intr_event_handle(isrc->isrc_event, tf) == 0)
363 			return (0);
364 	}
365 
366 	isrc_increment_straycount(isrc);
367 	return (EINVAL);
368 }
369 
370 /*
371  *  Alloc unique interrupt number (resource handle) for interrupt source.
372  *
373  *  There could be various strategies how to allocate free interrupt number
374  *  (resource handle) for new interrupt source.
375  *
376  *  1. Handles are always allocated forward, so handles are not recycled
377  *     immediately. However, if only one free handle left which is reused
378  *     constantly...
379  */
380 static inline int
381 isrc_alloc_irq(struct intr_irqsrc *isrc)
382 {
383 	u_int maxirqs, irq;
384 
385 	mtx_assert(&isrc_table_lock, MA_OWNED);
386 
387 	maxirqs = nitems(irq_sources);
388 	if (irq_next_free >= maxirqs)
389 		return (ENOSPC);
390 
391 	for (irq = irq_next_free; irq < maxirqs; irq++) {
392 		if (irq_sources[irq] == NULL)
393 			goto found;
394 	}
395 	for (irq = 0; irq < irq_next_free; irq++) {
396 		if (irq_sources[irq] == NULL)
397 			goto found;
398 	}
399 
400 	irq_next_free = maxirqs;
401 	return (ENOSPC);
402 
403 found:
404 	isrc->isrc_irq = irq;
405 	irq_sources[irq] = isrc;
406 
407 	irq_next_free = irq + 1;
408 	if (irq_next_free >= maxirqs)
409 		irq_next_free = 0;
410 	return (0);
411 }
412 
413 /*
414  *  Free unique interrupt number (resource handle) from interrupt source.
415  */
416 static inline int
417 isrc_free_irq(struct intr_irqsrc *isrc)
418 {
419 
420 	mtx_assert(&isrc_table_lock, MA_OWNED);
421 
422 	if (isrc->isrc_irq >= nitems(irq_sources))
423 		return (EINVAL);
424 	if (irq_sources[isrc->isrc_irq] != isrc)
425 		return (EINVAL);
426 
427 	irq_sources[isrc->isrc_irq] = NULL;
428 	isrc->isrc_irq = INTR_IRQ_INVALID;	/* just to be safe */
429 	return (0);
430 }
431 
432 /*
433  *  Initialize interrupt source and register it into global interrupt table.
434  */
435 int
436 intr_isrc_register(struct intr_irqsrc *isrc, device_t dev, u_int flags,
437     const char *fmt, ...)
438 {
439 	int error;
440 	va_list ap;
441 
442 	bzero(isrc, sizeof(struct intr_irqsrc));
443 	isrc->isrc_dev = dev;
444 	isrc->isrc_irq = INTR_IRQ_INVALID;	/* just to be safe */
445 	isrc->isrc_flags = flags;
446 
447 	va_start(ap, fmt);
448 	vsnprintf(isrc->isrc_name, INTR_ISRC_NAMELEN, fmt, ap);
449 	va_end(ap);
450 
451 	mtx_lock(&isrc_table_lock);
452 	error = isrc_alloc_irq(isrc);
453 	if (error != 0) {
454 		mtx_unlock(&isrc_table_lock);
455 		return (error);
456 	}
457 	/*
458 	 * Setup interrupt counters, but not for IPI sources. Those are setup
459 	 * later and only for used ones (up to INTR_IPI_COUNT) to not exhaust
460 	 * our counter pool.
461 	 */
462 	if ((isrc->isrc_flags & INTR_ISRCF_IPI) == 0)
463 		isrc_setup_counters(isrc);
464 	mtx_unlock(&isrc_table_lock);
465 	return (0);
466 }
467 
468 /*
469  *  Deregister interrupt source from global interrupt table.
470  */
471 int
472 intr_isrc_deregister(struct intr_irqsrc *isrc)
473 {
474 	int error;
475 
476 	mtx_lock(&isrc_table_lock);
477 	if ((isrc->isrc_flags & INTR_ISRCF_IPI) == 0)
478 		isrc_release_counters(isrc);
479 	error = isrc_free_irq(isrc);
480 	mtx_unlock(&isrc_table_lock);
481 	return (error);
482 }
483 
484 #ifdef SMP
485 /*
486  *  A support function for a PIC to decide if provided ISRC should be inited
487  *  on given cpu. The logic of INTR_ISRCF_BOUND flag and isrc_cpu member of
488  *  struct intr_irqsrc is the following:
489  *
490  *     If INTR_ISRCF_BOUND is set, the ISRC should be inited only on cpus
491  *     set in isrc_cpu. If not, the ISRC should be inited on every cpu and
492  *     isrc_cpu is kept consistent with it. Thus isrc_cpu is always correct.
493  */
494 bool
495 intr_isrc_init_on_cpu(struct intr_irqsrc *isrc, u_int cpu)
496 {
497 
498 	if (isrc->isrc_handlers == 0)
499 		return (false);
500 	if ((isrc->isrc_flags & (INTR_ISRCF_PPI | INTR_ISRCF_IPI)) == 0)
501 		return (false);
502 	if (isrc->isrc_flags & INTR_ISRCF_BOUND)
503 		return (CPU_ISSET(cpu, &isrc->isrc_cpu));
504 
505 	CPU_SET(cpu, &isrc->isrc_cpu);
506 	return (true);
507 }
508 #endif
509 
510 #ifdef INTR_SOLO
511 /*
512  *  Setup filter into interrupt source.
513  */
514 static int
515 iscr_setup_filter(struct intr_irqsrc *isrc, const char *name,
516     intr_irq_filter_t *filter, void *arg, void **cookiep)
517 {
518 
519 	if (filter == NULL)
520 		return (EINVAL);
521 
522 	mtx_lock(&isrc_table_lock);
523 	/*
524 	 * Make sure that we do not mix the two ways
525 	 * how we handle interrupt sources.
526 	 */
527 	if (isrc->isrc_filter != NULL || isrc->isrc_event != NULL) {
528 		mtx_unlock(&isrc_table_lock);
529 		return (EBUSY);
530 	}
531 	isrc->isrc_filter = filter;
532 	isrc->isrc_arg = arg;
533 	isrc_update_name(isrc, name);
534 	mtx_unlock(&isrc_table_lock);
535 
536 	*cookiep = isrc;
537 	return (0);
538 }
539 #endif
540 
541 /*
542  *  Interrupt source pre_ithread method for MI interrupt framework.
543  */
544 static void
545 intr_isrc_pre_ithread(void *arg)
546 {
547 	struct intr_irqsrc *isrc = arg;
548 
549 	PIC_PRE_ITHREAD(isrc->isrc_dev, isrc);
550 }
551 
552 /*
553  *  Interrupt source post_ithread method for MI interrupt framework.
554  */
555 static void
556 intr_isrc_post_ithread(void *arg)
557 {
558 	struct intr_irqsrc *isrc = arg;
559 
560 	PIC_POST_ITHREAD(isrc->isrc_dev, isrc);
561 }
562 
563 /*
564  *  Interrupt source post_filter method for MI interrupt framework.
565  */
566 static void
567 intr_isrc_post_filter(void *arg)
568 {
569 	struct intr_irqsrc *isrc = arg;
570 
571 	PIC_POST_FILTER(isrc->isrc_dev, isrc);
572 }
573 
574 /*
575  *  Interrupt source assign_cpu method for MI interrupt framework.
576  */
577 static int
578 intr_isrc_assign_cpu(void *arg, int cpu)
579 {
580 #ifdef SMP
581 	struct intr_irqsrc *isrc = arg;
582 	int error;
583 
584 	if (isrc->isrc_dev != intr_irq_root_dev)
585 		return (EINVAL);
586 
587 	mtx_lock(&isrc_table_lock);
588 	if (cpu == NOCPU) {
589 		CPU_ZERO(&isrc->isrc_cpu);
590 		isrc->isrc_flags &= ~INTR_ISRCF_BOUND;
591 	} else {
592 		CPU_SETOF(cpu, &isrc->isrc_cpu);
593 		isrc->isrc_flags |= INTR_ISRCF_BOUND;
594 	}
595 
596 	/*
597 	 * In NOCPU case, it's up to PIC to either leave ISRC on same CPU or
598 	 * re-balance it to another CPU or enable it on more CPUs. However,
599 	 * PIC is expected to change isrc_cpu appropriately to keep us well
600 	 * informed if the call is successful.
601 	 */
602 	if (irq_assign_cpu) {
603 		error = PIC_BIND_INTR(isrc->isrc_dev, isrc);
604 		if (error) {
605 			CPU_ZERO(&isrc->isrc_cpu);
606 			mtx_unlock(&isrc_table_lock);
607 			return (error);
608 		}
609 	}
610 	mtx_unlock(&isrc_table_lock);
611 	return (0);
612 #else
613 	return (EOPNOTSUPP);
614 #endif
615 }
616 
617 /*
618  *  Create interrupt event for interrupt source.
619  */
620 static int
621 isrc_event_create(struct intr_irqsrc *isrc)
622 {
623 	struct intr_event *ie;
624 	int error;
625 
626 	error = intr_event_create(&ie, isrc, 0, isrc->isrc_irq,
627 	    intr_isrc_pre_ithread, intr_isrc_post_ithread, intr_isrc_post_filter,
628 	    intr_isrc_assign_cpu, "%s:", isrc->isrc_name);
629 	if (error)
630 		return (error);
631 
632 	mtx_lock(&isrc_table_lock);
633 	/*
634 	 * Make sure that we do not mix the two ways
635 	 * how we handle interrupt sources. Let contested event wins.
636 	 */
637 #ifdef INTR_SOLO
638 	if (isrc->isrc_filter != NULL || isrc->isrc_event != NULL) {
639 #else
640 	if (isrc->isrc_event != NULL) {
641 #endif
642 		mtx_unlock(&isrc_table_lock);
643 		intr_event_destroy(ie);
644 		return (isrc->isrc_event != NULL ? EBUSY : 0);
645 	}
646 	isrc->isrc_event = ie;
647 	mtx_unlock(&isrc_table_lock);
648 
649 	return (0);
650 }
651 #ifdef notyet
652 /*
653  *  Destroy interrupt event for interrupt source.
654  */
655 static void
656 isrc_event_destroy(struct intr_irqsrc *isrc)
657 {
658 	struct intr_event *ie;
659 
660 	mtx_lock(&isrc_table_lock);
661 	ie = isrc->isrc_event;
662 	isrc->isrc_event = NULL;
663 	mtx_unlock(&isrc_table_lock);
664 
665 	if (ie != NULL)
666 		intr_event_destroy(ie);
667 }
668 #endif
669 /*
670  *  Add handler to interrupt source.
671  */
672 static int
673 isrc_add_handler(struct intr_irqsrc *isrc, const char *name,
674     driver_filter_t filter, driver_intr_t handler, void *arg,
675     enum intr_type flags, void **cookiep)
676 {
677 	int error;
678 
679 	if (isrc->isrc_event == NULL) {
680 		error = isrc_event_create(isrc);
681 		if (error)
682 			return (error);
683 	}
684 
685 	error = intr_event_add_handler(isrc->isrc_event, name, filter, handler,
686 	    arg, intr_priority(flags), flags, cookiep);
687 	if (error == 0) {
688 		mtx_lock(&isrc_table_lock);
689 		intrcnt_updatename(isrc);
690 		mtx_unlock(&isrc_table_lock);
691 	}
692 
693 	return (error);
694 }
695 
696 /*
697  *  Lookup interrupt controller locked.
698  */
699 static inline struct intr_pic *
700 pic_lookup_locked(device_t dev, intptr_t xref, int flags)
701 {
702 	struct intr_pic *pic;
703 
704 	mtx_assert(&pic_list_lock, MA_OWNED);
705 
706 	if (dev == NULL && xref == 0)
707 		return (NULL);
708 
709 	/* Note that pic->pic_dev is never NULL on registered PIC. */
710 	SLIST_FOREACH(pic, &pic_list, pic_next) {
711 		if ((pic->pic_flags & FLAG_TYPE_MASK) !=
712 		    (flags & FLAG_TYPE_MASK))
713 			continue;
714 
715 		if (dev == NULL) {
716 			if (xref == pic->pic_xref)
717 				return (pic);
718 		} else if (xref == 0 || pic->pic_xref == 0) {
719 			if (dev == pic->pic_dev)
720 				return (pic);
721 		} else if (xref == pic->pic_xref && dev == pic->pic_dev)
722 				return (pic);
723 	}
724 	return (NULL);
725 }
726 
727 /*
728  *  Lookup interrupt controller.
729  */
730 static struct intr_pic *
731 pic_lookup(device_t dev, intptr_t xref, int flags)
732 {
733 	struct intr_pic *pic;
734 
735 	mtx_lock(&pic_list_lock);
736 	pic = pic_lookup_locked(dev, xref, flags);
737 	mtx_unlock(&pic_list_lock);
738 	return (pic);
739 }
740 
741 /*
742  *  Create interrupt controller.
743  */
744 static struct intr_pic *
745 pic_create(device_t dev, intptr_t xref, int flags)
746 {
747 	struct intr_pic *pic;
748 
749 	mtx_lock(&pic_list_lock);
750 	pic = pic_lookup_locked(dev, xref, flags);
751 	if (pic != NULL) {
752 		mtx_unlock(&pic_list_lock);
753 		return (pic);
754 	}
755 	pic = malloc(sizeof(*pic), M_INTRNG, M_NOWAIT | M_ZERO);
756 	if (pic == NULL) {
757 		mtx_unlock(&pic_list_lock);
758 		return (NULL);
759 	}
760 	pic->pic_xref = xref;
761 	pic->pic_dev = dev;
762 	pic->pic_flags = flags;
763 	mtx_init(&pic->pic_child_lock, "pic child lock", NULL, MTX_SPIN);
764 	SLIST_INSERT_HEAD(&pic_list, pic, pic_next);
765 	mtx_unlock(&pic_list_lock);
766 
767 	return (pic);
768 }
769 #ifdef notyet
770 /*
771  *  Destroy interrupt controller.
772  */
773 static void
774 pic_destroy(device_t dev, intptr_t xref, int flags)
775 {
776 	struct intr_pic *pic;
777 
778 	mtx_lock(&pic_list_lock);
779 	pic = pic_lookup_locked(dev, xref, flags);
780 	if (pic == NULL) {
781 		mtx_unlock(&pic_list_lock);
782 		return;
783 	}
784 	SLIST_REMOVE(&pic_list, pic, intr_pic, pic_next);
785 	mtx_unlock(&pic_list_lock);
786 
787 	free(pic, M_INTRNG);
788 }
789 #endif
790 /*
791  *  Register interrupt controller.
792  */
793 struct intr_pic *
794 intr_pic_register(device_t dev, intptr_t xref)
795 {
796 	struct intr_pic *pic;
797 
798 	if (dev == NULL)
799 		return (NULL);
800 	pic = pic_create(dev, xref, FLAG_PIC);
801 	if (pic == NULL)
802 		return (NULL);
803 
804 	debugf("PIC %p registered for %s <dev %p, xref %jx>\n", pic,
805 	    device_get_nameunit(dev), dev, (uintmax_t)xref);
806 	return (pic);
807 }
808 
809 /*
810  *  Unregister interrupt controller.
811  */
812 int
813 intr_pic_deregister(device_t dev, intptr_t xref)
814 {
815 
816 	panic("%s: not implemented", __func__);
817 }
818 
819 /*
820  *  Mark interrupt controller (itself) as a root one.
821  *
822  *  Note that only an interrupt controller can really know its position
823  *  in interrupt controller's tree. So root PIC must claim itself as a root.
824  *
825  *  In FDT case, according to ePAPR approved version 1.1 from 08 April 2011,
826  *  page 30:
827  *    "The root of the interrupt tree is determined when traversal
828  *     of the interrupt tree reaches an interrupt controller node without
829  *     an interrupts property and thus no explicit interrupt parent."
830  */
831 int
832 intr_pic_claim_root(device_t dev, intptr_t xref, intr_irq_filter_t *filter,
833     void *arg, u_int ipicount)
834 {
835 	struct intr_pic *pic;
836 
837 	pic = pic_lookup(dev, xref, FLAG_PIC);
838 	if (pic == NULL) {
839 		device_printf(dev, "not registered\n");
840 		return (EINVAL);
841 	}
842 
843 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_PIC,
844 	    ("%s: Found a non-PIC controller: %s", __func__,
845 	     device_get_name(pic->pic_dev)));
846 
847 	if (filter == NULL) {
848 		device_printf(dev, "filter missing\n");
849 		return (EINVAL);
850 	}
851 
852 	/*
853 	 * Only one interrupt controllers could be on the root for now.
854 	 * Note that we further suppose that there is not threaded interrupt
855 	 * routine (handler) on the root. See intr_irq_handler().
856 	 */
857 	if (intr_irq_root_dev != NULL) {
858 		device_printf(dev, "another root already set\n");
859 		return (EBUSY);
860 	}
861 
862 	intr_irq_root_dev = dev;
863 	irq_root_filter = filter;
864 	irq_root_arg = arg;
865 	irq_root_ipicount = ipicount;
866 
867 	debugf("irq root set to %s\n", device_get_nameunit(dev));
868 	return (0);
869 }
870 
871 /*
872  * Add a handler to manage a sub range of a parents interrupts.
873  */
874 struct intr_pic *
875 intr_pic_add_handler(device_t parent, struct intr_pic *pic,
876     intr_child_irq_filter_t *filter, void *arg, uintptr_t start,
877     uintptr_t length)
878 {
879 	struct intr_pic *parent_pic;
880 	struct intr_pic_child *newchild;
881 #ifdef INVARIANTS
882 	struct intr_pic_child *child;
883 #endif
884 
885 	/* Find the parent PIC */
886 	parent_pic = pic_lookup(parent, 0, FLAG_PIC);
887 	if (parent_pic == NULL)
888 		return (NULL);
889 
890 	newchild = malloc(sizeof(*newchild), M_INTRNG, M_WAITOK | M_ZERO);
891 	newchild->pc_pic = pic;
892 	newchild->pc_filter = filter;
893 	newchild->pc_filter_arg = arg;
894 	newchild->pc_start = start;
895 	newchild->pc_length = length;
896 
897 	mtx_lock_spin(&parent_pic->pic_child_lock);
898 #ifdef INVARIANTS
899 	SLIST_FOREACH(child, &parent_pic->pic_children, pc_next) {
900 		KASSERT(child->pc_pic != pic, ("%s: Adding a child PIC twice",
901 		    __func__));
902 	}
903 #endif
904 	SLIST_INSERT_HEAD(&parent_pic->pic_children, newchild, pc_next);
905 	mtx_unlock_spin(&parent_pic->pic_child_lock);
906 
907 	return (pic);
908 }
909 
910 static int
911 intr_resolve_irq(device_t dev, intptr_t xref, struct intr_map_data *data,
912     struct intr_irqsrc **isrc)
913 {
914 	struct intr_pic *pic;
915 	struct intr_map_data_msi *msi;
916 
917 	if (data == NULL)
918 		return (EINVAL);
919 
920 	pic = pic_lookup(dev, xref,
921 	    (data->type == INTR_MAP_DATA_MSI) ? FLAG_MSI : FLAG_PIC);
922 	if (pic == NULL)
923 		return (ESRCH);
924 
925 	switch (data->type) {
926 	case INTR_MAP_DATA_MSI:
927 		KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
928 		    ("%s: Found a non-MSI controller: %s", __func__,
929 		     device_get_name(pic->pic_dev)));
930 		msi = (struct intr_map_data_msi *)data;
931 		*isrc = msi->isrc;
932 		return (0);
933 
934 	default:
935 		KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_PIC,
936 		    ("%s: Found a non-PIC controller: %s", __func__,
937 		     device_get_name(pic->pic_dev)));
938 		return (PIC_MAP_INTR(pic->pic_dev, data, isrc));
939 
940 	}
941 }
942 
943 int
944 intr_activate_irq(device_t dev, struct resource *res)
945 {
946 	device_t map_dev;
947 	intptr_t map_xref;
948 	struct intr_map_data *data;
949 	struct intr_irqsrc *isrc;
950 	u_int res_id;
951 	int error;
952 
953 	KASSERT(rman_get_start(res) == rman_get_end(res),
954 	    ("%s: more interrupts in resource", __func__));
955 
956 	res_id = (u_int)rman_get_start(res);
957 	if (intr_map_get_isrc(res_id) != NULL)
958 		panic("Attempt to double activation of resource id: %u\n",
959 		    res_id);
960 	intr_map_copy_map_data(res_id, &map_dev, &map_xref, &data);
961 	error = intr_resolve_irq(map_dev, map_xref, data, &isrc);
962 	if (error != 0) {
963 		free(data, M_INTRNG);
964 		/* XXX TODO DISCONECTED PICs */
965 		/* if (error == EINVAL) return(0); */
966 		return (error);
967 	}
968 	intr_map_set_isrc(res_id, isrc);
969 	rman_set_virtual(res, data);
970 	return (PIC_ACTIVATE_INTR(isrc->isrc_dev, isrc, res, data));
971 }
972 
973 int
974 intr_deactivate_irq(device_t dev, struct resource *res)
975 {
976 	struct intr_map_data *data;
977 	struct intr_irqsrc *isrc;
978 	u_int res_id;
979 	int error;
980 
981 	KASSERT(rman_get_start(res) == rman_get_end(res),
982 	    ("%s: more interrupts in resource", __func__));
983 
984 	res_id = (u_int)rman_get_start(res);
985 	isrc = intr_map_get_isrc(res_id);
986 	if (isrc == NULL)
987 		panic("Attempt to deactivate non-active resource id: %u\n",
988 		    res_id);
989 
990 	data = rman_get_virtual(res);
991 	error = PIC_DEACTIVATE_INTR(isrc->isrc_dev, isrc, res, data);
992 	intr_map_set_isrc(res_id, NULL);
993 	rman_set_virtual(res, NULL);
994 	free(data, M_INTRNG);
995 	return (error);
996 }
997 
998 int
999 intr_setup_irq(device_t dev, struct resource *res, driver_filter_t filt,
1000     driver_intr_t hand, void *arg, int flags, void **cookiep)
1001 {
1002 	int error;
1003 	struct intr_map_data *data;
1004 	struct intr_irqsrc *isrc;
1005 	const char *name;
1006 	u_int res_id;
1007 
1008 	KASSERT(rman_get_start(res) == rman_get_end(res),
1009 	    ("%s: more interrupts in resource", __func__));
1010 
1011 	res_id = (u_int)rman_get_start(res);
1012 	isrc = intr_map_get_isrc(res_id);
1013 	if (isrc == NULL) {
1014 		/* XXX TODO DISCONECTED PICs */
1015 		return (EINVAL);
1016 	}
1017 
1018 	data = rman_get_virtual(res);
1019 	name = device_get_nameunit(dev);
1020 
1021 #ifdef INTR_SOLO
1022 	/*
1023 	 * Standard handling is done through MI interrupt framework. However,
1024 	 * some interrupts could request solely own special handling. This
1025 	 * non standard handling can be used for interrupt controllers without
1026 	 * handler (filter only), so in case that interrupt controllers are
1027 	 * chained, MI interrupt framework is called only in leaf controller.
1028 	 *
1029 	 * Note that root interrupt controller routine is served as well,
1030 	 * however in intr_irq_handler(), i.e. main system dispatch routine.
1031 	 */
1032 	if (flags & INTR_SOLO && hand != NULL) {
1033 		debugf("irq %u cannot solo on %s\n", irq, name);
1034 		return (EINVAL);
1035 	}
1036 
1037 	if (flags & INTR_SOLO) {
1038 		error = iscr_setup_filter(isrc, name, (intr_irq_filter_t *)filt,
1039 		    arg, cookiep);
1040 		debugf("irq %u setup filter error %d on %s\n", isrc->isrc_irq, error,
1041 		    name);
1042 	} else
1043 #endif
1044 		{
1045 		error = isrc_add_handler(isrc, name, filt, hand, arg, flags,
1046 		    cookiep);
1047 		debugf("irq %u add handler error %d on %s\n", isrc->isrc_irq, error, name);
1048 	}
1049 	if (error != 0)
1050 		return (error);
1051 
1052 	mtx_lock(&isrc_table_lock);
1053 	error = PIC_SETUP_INTR(isrc->isrc_dev, isrc, res, data);
1054 	if (error == 0) {
1055 		isrc->isrc_handlers++;
1056 		if (isrc->isrc_handlers == 1)
1057 			PIC_ENABLE_INTR(isrc->isrc_dev, isrc);
1058 	}
1059 	mtx_unlock(&isrc_table_lock);
1060 	if (error != 0)
1061 		intr_event_remove_handler(*cookiep);
1062 	return (error);
1063 }
1064 
1065 int
1066 intr_teardown_irq(device_t dev, struct resource *res, void *cookie)
1067 {
1068 	int error;
1069 	struct intr_map_data *data;
1070 	struct intr_irqsrc *isrc;
1071 	u_int res_id;
1072 
1073 	KASSERT(rman_get_start(res) == rman_get_end(res),
1074 	    ("%s: more interrupts in resource", __func__));
1075 
1076 	res_id = (u_int)rman_get_start(res);
1077 	isrc = intr_map_get_isrc(res_id);
1078 	if (isrc == NULL || isrc->isrc_handlers == 0)
1079 		return (EINVAL);
1080 
1081 	data = rman_get_virtual(res);
1082 
1083 #ifdef INTR_SOLO
1084 	if (isrc->isrc_filter != NULL) {
1085 		if (isrc != cookie)
1086 			return (EINVAL);
1087 
1088 		mtx_lock(&isrc_table_lock);
1089 		isrc->isrc_filter = NULL;
1090 		isrc->isrc_arg = NULL;
1091 		isrc->isrc_handlers = 0;
1092 		PIC_DISABLE_INTR(isrc->isrc_dev, isrc);
1093 		PIC_TEARDOWN_INTR(isrc->isrc_dev, isrc, res, data);
1094 		isrc_update_name(isrc, NULL);
1095 		mtx_unlock(&isrc_table_lock);
1096 		return (0);
1097 	}
1098 #endif
1099 	if (isrc != intr_handler_source(cookie))
1100 		return (EINVAL);
1101 
1102 	error = intr_event_remove_handler(cookie);
1103 	if (error == 0) {
1104 		mtx_lock(&isrc_table_lock);
1105 		isrc->isrc_handlers--;
1106 		if (isrc->isrc_handlers == 0)
1107 			PIC_DISABLE_INTR(isrc->isrc_dev, isrc);
1108 		PIC_TEARDOWN_INTR(isrc->isrc_dev, isrc, res, data);
1109 		intrcnt_updatename(isrc);
1110 		mtx_unlock(&isrc_table_lock);
1111 	}
1112 	return (error);
1113 }
1114 
1115 int
1116 intr_describe_irq(device_t dev, struct resource *res, void *cookie,
1117     const char *descr)
1118 {
1119 	int error;
1120 	struct intr_irqsrc *isrc;
1121 	u_int res_id;
1122 
1123 	KASSERT(rman_get_start(res) == rman_get_end(res),
1124 	    ("%s: more interrupts in resource", __func__));
1125 
1126 	res_id = (u_int)rman_get_start(res);
1127 	isrc = intr_map_get_isrc(res_id);
1128 	if (isrc == NULL || isrc->isrc_handlers == 0)
1129 		return (EINVAL);
1130 #ifdef INTR_SOLO
1131 	if (isrc->isrc_filter != NULL) {
1132 		if (isrc != cookie)
1133 			return (EINVAL);
1134 
1135 		mtx_lock(&isrc_table_lock);
1136 		isrc_update_name(isrc, descr);
1137 		mtx_unlock(&isrc_table_lock);
1138 		return (0);
1139 	}
1140 #endif
1141 	error = intr_event_describe_handler(isrc->isrc_event, cookie, descr);
1142 	if (error == 0) {
1143 		mtx_lock(&isrc_table_lock);
1144 		intrcnt_updatename(isrc);
1145 		mtx_unlock(&isrc_table_lock);
1146 	}
1147 	return (error);
1148 }
1149 
1150 #ifdef SMP
1151 int
1152 intr_bind_irq(device_t dev, struct resource *res, int cpu)
1153 {
1154 	struct intr_irqsrc *isrc;
1155 	u_int res_id;
1156 
1157 	KASSERT(rman_get_start(res) == rman_get_end(res),
1158 	    ("%s: more interrupts in resource", __func__));
1159 
1160 	res_id = (u_int)rman_get_start(res);
1161 	isrc = intr_map_get_isrc(res_id);
1162 	if (isrc == NULL || isrc->isrc_handlers == 0)
1163 		return (EINVAL);
1164 #ifdef INTR_SOLO
1165 	if (isrc->isrc_filter != NULL)
1166 		return (intr_isrc_assign_cpu(isrc, cpu));
1167 #endif
1168 	return (intr_event_bind(isrc->isrc_event, cpu));
1169 }
1170 
1171 /*
1172  * Return the CPU that the next interrupt source should use.
1173  * For now just returns the next CPU according to round-robin.
1174  */
1175 u_int
1176 intr_irq_next_cpu(u_int last_cpu, cpuset_t *cpumask)
1177 {
1178 	u_int cpu;
1179 
1180 	KASSERT(!CPU_EMPTY(cpumask), ("%s: Empty CPU mask", __func__));
1181 	if (!irq_assign_cpu || mp_ncpus == 1) {
1182 		cpu = PCPU_GET(cpuid);
1183 
1184 		if (CPU_ISSET(cpu, cpumask))
1185 			return (curcpu);
1186 
1187 		return (CPU_FFS(cpumask) - 1);
1188 	}
1189 
1190 	do {
1191 		last_cpu++;
1192 		if (last_cpu > mp_maxid)
1193 			last_cpu = 0;
1194 	} while (!CPU_ISSET(last_cpu, cpumask));
1195 	return (last_cpu);
1196 }
1197 
1198 #ifndef EARLY_AP_STARTUP
1199 /*
1200  *  Distribute all the interrupt sources among the available
1201  *  CPUs once the AP's have been launched.
1202  */
1203 static void
1204 intr_irq_shuffle(void *arg __unused)
1205 {
1206 	struct intr_irqsrc *isrc;
1207 	u_int i;
1208 
1209 	if (mp_ncpus == 1)
1210 		return;
1211 
1212 	mtx_lock(&isrc_table_lock);
1213 	irq_assign_cpu = true;
1214 	for (i = 0; i < NIRQ; i++) {
1215 		isrc = irq_sources[i];
1216 		if (isrc == NULL || isrc->isrc_handlers == 0 ||
1217 		    isrc->isrc_flags & (INTR_ISRCF_PPI | INTR_ISRCF_IPI))
1218 			continue;
1219 
1220 		if (isrc->isrc_event != NULL &&
1221 		    isrc->isrc_flags & INTR_ISRCF_BOUND &&
1222 		    isrc->isrc_event->ie_cpu != CPU_FFS(&isrc->isrc_cpu) - 1)
1223 			panic("%s: CPU inconsistency", __func__);
1224 
1225 		if ((isrc->isrc_flags & INTR_ISRCF_BOUND) == 0)
1226 			CPU_ZERO(&isrc->isrc_cpu); /* start again */
1227 
1228 		/*
1229 		 * We are in wicked position here if the following call fails
1230 		 * for bound ISRC. The best thing we can do is to clear
1231 		 * isrc_cpu so inconsistency with ie_cpu will be detectable.
1232 		 */
1233 		if (PIC_BIND_INTR(isrc->isrc_dev, isrc) != 0)
1234 			CPU_ZERO(&isrc->isrc_cpu);
1235 	}
1236 	mtx_unlock(&isrc_table_lock);
1237 }
1238 SYSINIT(intr_irq_shuffle, SI_SUB_SMP, SI_ORDER_SECOND, intr_irq_shuffle, NULL);
1239 #endif /* !EARLY_AP_STARTUP */
1240 
1241 #else
1242 u_int
1243 intr_irq_next_cpu(u_int current_cpu, cpuset_t *cpumask)
1244 {
1245 
1246 	return (PCPU_GET(cpuid));
1247 }
1248 #endif /* SMP */
1249 
1250 /*
1251  * Allocate memory for new intr_map_data structure.
1252  * Initialize common fields.
1253  */
1254 struct intr_map_data *
1255 intr_alloc_map_data(enum intr_map_data_type type, size_t len, int flags)
1256 {
1257 	struct intr_map_data *data;
1258 
1259 	data = malloc(len, M_INTRNG, flags);
1260 	data->type = type;
1261 	data->len = len;
1262 	return (data);
1263 }
1264 
1265 void intr_free_intr_map_data(struct intr_map_data *data)
1266 {
1267 
1268 	free(data, M_INTRNG);
1269 }
1270 
1271 /*
1272  *  Register a MSI/MSI-X interrupt controller
1273  */
1274 int
1275 intr_msi_register(device_t dev, intptr_t xref)
1276 {
1277 	struct intr_pic *pic;
1278 
1279 	if (dev == NULL)
1280 		return (EINVAL);
1281 	pic = pic_create(dev, xref, FLAG_MSI);
1282 	if (pic == NULL)
1283 		return (ENOMEM);
1284 
1285 	debugf("PIC %p registered for %s <dev %p, xref %jx>\n", pic,
1286 	    device_get_nameunit(dev), dev, (uintmax_t)xref);
1287 	return (0);
1288 }
1289 
1290 int
1291 intr_alloc_msi(device_t pci, device_t child, intptr_t xref, int count,
1292     int maxcount, int *irqs)
1293 {
1294 	struct intr_irqsrc **isrc;
1295 	struct intr_pic *pic;
1296 	device_t pdev;
1297 	struct intr_map_data_msi *msi;
1298 	int err, i;
1299 
1300 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1301 	if (pic == NULL)
1302 		return (ESRCH);
1303 
1304 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1305 	    ("%s: Found a non-MSI controller: %s", __func__,
1306 	     device_get_name(pic->pic_dev)));
1307 
1308 	isrc = malloc(sizeof(*isrc) * count, M_INTRNG, M_WAITOK);
1309 	err = MSI_ALLOC_MSI(pic->pic_dev, child, count, maxcount, &pdev, isrc);
1310 	if (err != 0) {
1311 		free(isrc, M_INTRNG);
1312 		return (err);
1313 	}
1314 
1315 	for (i = 0; i < count; i++) {
1316 		msi = (struct intr_map_data_msi *)intr_alloc_map_data(
1317 		    INTR_MAP_DATA_MSI, sizeof(*msi), M_WAITOK | M_ZERO);
1318 		msi-> isrc = isrc[i];
1319 		irqs[i] = intr_map_irq(pic->pic_dev, xref,
1320 		    (struct intr_map_data *)msi);
1321 
1322 	}
1323 	free(isrc, M_INTRNG);
1324 
1325 	return (err);
1326 }
1327 
1328 int
1329 intr_release_msi(device_t pci, device_t child, intptr_t xref, int count,
1330     int *irqs)
1331 {
1332 	struct intr_irqsrc **isrc;
1333 	struct intr_pic *pic;
1334 	struct intr_map_data_msi *msi;
1335 	int i, err;
1336 
1337 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1338 	if (pic == NULL)
1339 		return (ESRCH);
1340 
1341 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1342 	    ("%s: Found a non-MSI controller: %s", __func__,
1343 	     device_get_name(pic->pic_dev)));
1344 
1345 	isrc = malloc(sizeof(*isrc) * count, M_INTRNG, M_WAITOK);
1346 
1347 	for (i = 0; i < count; i++) {
1348 		msi = (struct intr_map_data_msi *)
1349 		    intr_map_get_map_data(irqs[i]);
1350 		KASSERT(msi->hdr.type == INTR_MAP_DATA_MSI,
1351 		    ("%s: irq %d map data is not MSI", __func__,
1352 		    irqs[i]));
1353 		isrc[i] = msi->isrc;
1354 	}
1355 
1356 	err = MSI_RELEASE_MSI(pic->pic_dev, child, count, isrc);
1357 
1358 	for (i = 0; i < count; i++) {
1359 		if (isrc[i] != NULL)
1360 			intr_unmap_irq(irqs[i]);
1361 	}
1362 
1363 	free(isrc, M_INTRNG);
1364 	return (err);
1365 }
1366 
1367 int
1368 intr_alloc_msix(device_t pci, device_t child, intptr_t xref, int *irq)
1369 {
1370 	struct intr_irqsrc *isrc;
1371 	struct intr_pic *pic;
1372 	device_t pdev;
1373 	struct intr_map_data_msi *msi;
1374 	int err;
1375 
1376 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1377 	if (pic == NULL)
1378 		return (ESRCH);
1379 
1380 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1381 	    ("%s: Found a non-MSI controller: %s", __func__,
1382 	     device_get_name(pic->pic_dev)));
1383 
1384 	err = MSI_ALLOC_MSIX(pic->pic_dev, child, &pdev, &isrc);
1385 	if (err != 0)
1386 		return (err);
1387 
1388 	msi = (struct intr_map_data_msi *)intr_alloc_map_data(
1389 		    INTR_MAP_DATA_MSI, sizeof(*msi), M_WAITOK | M_ZERO);
1390 	msi->isrc = isrc;
1391 	*irq = intr_map_irq(pic->pic_dev, xref, (struct intr_map_data *)msi);
1392 	return (0);
1393 }
1394 
1395 int
1396 intr_release_msix(device_t pci, device_t child, intptr_t xref, int irq)
1397 {
1398 	struct intr_irqsrc *isrc;
1399 	struct intr_pic *pic;
1400 	struct intr_map_data_msi *msi;
1401 	int err;
1402 
1403 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1404 	if (pic == NULL)
1405 		return (ESRCH);
1406 
1407 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1408 	    ("%s: Found a non-MSI controller: %s", __func__,
1409 	     device_get_name(pic->pic_dev)));
1410 
1411 	msi = (struct intr_map_data_msi *)
1412 	    intr_map_get_map_data(irq);
1413 	KASSERT(msi->hdr.type == INTR_MAP_DATA_MSI,
1414 	    ("%s: irq %d map data is not MSI", __func__,
1415 	    irq));
1416 	isrc = msi->isrc;
1417 	if (isrc == NULL) {
1418 		intr_unmap_irq(irq);
1419 		return (EINVAL);
1420 	}
1421 
1422 	err = MSI_RELEASE_MSIX(pic->pic_dev, child, isrc);
1423 	intr_unmap_irq(irq);
1424 
1425 	return (err);
1426 }
1427 
1428 int
1429 intr_map_msi(device_t pci, device_t child, intptr_t xref, int irq,
1430     uint64_t *addr, uint32_t *data)
1431 {
1432 	struct intr_irqsrc *isrc;
1433 	struct intr_pic *pic;
1434 	int err;
1435 
1436 	pic = pic_lookup(NULL, xref, FLAG_MSI);
1437 	if (pic == NULL)
1438 		return (ESRCH);
1439 
1440 	KASSERT((pic->pic_flags & FLAG_TYPE_MASK) == FLAG_MSI,
1441 	    ("%s: Found a non-MSI controller: %s", __func__,
1442 	     device_get_name(pic->pic_dev)));
1443 
1444 	isrc = intr_map_get_isrc(irq);
1445 	if (isrc == NULL)
1446 		return (EINVAL);
1447 
1448 	err = MSI_MAP_MSI(pic->pic_dev, child, isrc, addr, data);
1449 	return (err);
1450 }
1451 
1452 void dosoftints(void);
1453 void
1454 dosoftints(void)
1455 {
1456 }
1457 
1458 #ifdef SMP
1459 /*
1460  *  Init interrupt controller on another CPU.
1461  */
1462 void
1463 intr_pic_init_secondary(void)
1464 {
1465 
1466 	/*
1467 	 * QQQ: Only root PIC is aware of other CPUs ???
1468 	 */
1469 	KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__));
1470 
1471 	//mtx_lock(&isrc_table_lock);
1472 	PIC_INIT_SECONDARY(intr_irq_root_dev);
1473 	//mtx_unlock(&isrc_table_lock);
1474 }
1475 #endif
1476 
1477 #ifdef DDB
1478 DB_SHOW_COMMAND(irqs, db_show_irqs)
1479 {
1480 	u_int i, irqsum;
1481 	u_long num;
1482 	struct intr_irqsrc *isrc;
1483 
1484 	for (irqsum = 0, i = 0; i < NIRQ; i++) {
1485 		isrc = irq_sources[i];
1486 		if (isrc == NULL)
1487 			continue;
1488 
1489 		num = isrc->isrc_count != NULL ? isrc->isrc_count[0] : 0;
1490 		db_printf("irq%-3u <%s>: cpu %02lx%s cnt %lu\n", i,
1491 		    isrc->isrc_name, isrc->isrc_cpu.__bits[0],
1492 		    isrc->isrc_flags & INTR_ISRCF_BOUND ? " (bound)" : "", num);
1493 		irqsum += num;
1494 	}
1495 	db_printf("irq total %u\n", irqsum);
1496 }
1497 #endif
1498 
1499 /*
1500  * Interrupt mapping table functions.
1501  *
1502  * Please, keep this part separately, it can be transformed to
1503  * extension of standard resources.
1504  */
1505 struct intr_map_entry
1506 {
1507 	device_t 		dev;
1508 	intptr_t 		xref;
1509 	struct intr_map_data 	*map_data;
1510 	struct intr_irqsrc 	*isrc;
1511 	/* XXX TODO DISCONECTED PICs */
1512 	/*int			flags */
1513 };
1514 
1515 /* XXX Convert irq_map[] to dynamicaly expandable one. */
1516 static struct intr_map_entry *irq_map[2 * NIRQ];
1517 static int irq_map_count = nitems(irq_map);
1518 static int irq_map_first_free_idx;
1519 static struct mtx irq_map_lock;
1520 
1521 static struct intr_irqsrc *
1522 intr_map_get_isrc(u_int res_id)
1523 {
1524 	struct intr_irqsrc *isrc;
1525 
1526 	isrc = NULL;
1527 	mtx_lock(&irq_map_lock);
1528 	if (res_id < irq_map_count && irq_map[res_id] != NULL)
1529 		isrc = irq_map[res_id]->isrc;
1530 	mtx_unlock(&irq_map_lock);
1531 
1532 	return (isrc);
1533 }
1534 
1535 static void
1536 intr_map_set_isrc(u_int res_id, struct intr_irqsrc *isrc)
1537 {
1538 
1539 	mtx_lock(&irq_map_lock);
1540 	if (res_id < irq_map_count && irq_map[res_id] != NULL)
1541 		irq_map[res_id]->isrc = isrc;
1542 	mtx_unlock(&irq_map_lock);
1543 }
1544 
1545 /*
1546  * Get a copy of intr_map_entry data
1547  */
1548 static struct intr_map_data *
1549 intr_map_get_map_data(u_int res_id)
1550 {
1551 	struct intr_map_data *data;
1552 
1553 	data = NULL;
1554 	mtx_lock(&irq_map_lock);
1555 	if (res_id >= irq_map_count || irq_map[res_id] == NULL)
1556 		panic("Attempt to copy invalid resource id: %u\n", res_id);
1557 	data = irq_map[res_id]->map_data;
1558 	mtx_unlock(&irq_map_lock);
1559 
1560 	return (data);
1561 }
1562 
1563 /*
1564  * Get a copy of intr_map_entry data
1565  */
1566 static void
1567 intr_map_copy_map_data(u_int res_id, device_t *map_dev, intptr_t *map_xref,
1568     struct intr_map_data **data)
1569 {
1570 	size_t len;
1571 
1572 	len = 0;
1573 	mtx_lock(&irq_map_lock);
1574 	if (res_id >= irq_map_count || irq_map[res_id] == NULL)
1575 		panic("Attempt to copy invalid resource id: %u\n", res_id);
1576 	if (irq_map[res_id]->map_data != NULL)
1577 		len = irq_map[res_id]->map_data->len;
1578 	mtx_unlock(&irq_map_lock);
1579 
1580 	if (len == 0)
1581 		*data = NULL;
1582 	else
1583 		*data = malloc(len, M_INTRNG, M_WAITOK | M_ZERO);
1584 	mtx_lock(&irq_map_lock);
1585 	if (irq_map[res_id] == NULL)
1586 		panic("Attempt to copy invalid resource id: %u\n", res_id);
1587 	if (len != 0) {
1588 		if (len != irq_map[res_id]->map_data->len)
1589 			panic("Resource id: %u has changed.\n", res_id);
1590 		memcpy(*data, irq_map[res_id]->map_data, len);
1591 	}
1592 	*map_dev = irq_map[res_id]->dev;
1593 	*map_xref = irq_map[res_id]->xref;
1594 	mtx_unlock(&irq_map_lock);
1595 }
1596 
1597 /*
1598  * Allocate and fill new entry in irq_map table.
1599  */
1600 u_int
1601 intr_map_irq(device_t dev, intptr_t xref, struct intr_map_data *data)
1602 {
1603 	u_int i;
1604 	struct intr_map_entry *entry;
1605 
1606 	/* Prepare new entry first. */
1607 	entry = malloc(sizeof(*entry), M_INTRNG, M_WAITOK | M_ZERO);
1608 
1609 	entry->dev = dev;
1610 	entry->xref = xref;
1611 	entry->map_data = data;
1612 	entry->isrc = NULL;
1613 
1614 	mtx_lock(&irq_map_lock);
1615 	for (i = irq_map_first_free_idx; i < irq_map_count; i++) {
1616 		if (irq_map[i] == NULL) {
1617 			irq_map[i] = entry;
1618 			irq_map_first_free_idx = i + 1;
1619 			mtx_unlock(&irq_map_lock);
1620 			return (i);
1621 		}
1622 	}
1623 	mtx_unlock(&irq_map_lock);
1624 
1625 	/* XXX Expand irq_map table */
1626 	panic("IRQ mapping table is full.");
1627 }
1628 
1629 /*
1630  * Remove and free mapping entry.
1631  */
1632 void
1633 intr_unmap_irq(u_int res_id)
1634 {
1635 	struct intr_map_entry *entry;
1636 
1637 	mtx_lock(&irq_map_lock);
1638 	if ((res_id >= irq_map_count) || (irq_map[res_id] == NULL))
1639 		panic("Attempt to unmap invalid resource id: %u\n", res_id);
1640 	entry = irq_map[res_id];
1641 	irq_map[res_id] = NULL;
1642 	irq_map_first_free_idx = res_id;
1643 	mtx_unlock(&irq_map_lock);
1644 	intr_free_intr_map_data(entry->map_data);
1645 	free(entry, M_INTRNG);
1646 }
1647 
1648 /*
1649  * Clone mapping entry.
1650  */
1651 u_int
1652 intr_map_clone_irq(u_int old_res_id)
1653 {
1654 	device_t map_dev;
1655 	intptr_t map_xref;
1656 	struct intr_map_data *data;
1657 
1658 	intr_map_copy_map_data(old_res_id, &map_dev, &map_xref, &data);
1659 	return (intr_map_irq(map_dev, map_xref, data));
1660 }
1661 
1662 static void
1663 intr_map_init(void *dummy __unused)
1664 {
1665 
1666 	mtx_init(&irq_map_lock, "intr map table", NULL, MTX_DEF);
1667 }
1668 SYSINIT(intr_map_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_map_init, NULL);
1669