xref: /freebsd/sys/kern/kern_intr.c (revision d876124d6ae9d56da5b4ff4c6015efd1d0c9222a)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include "opt_ddb.h"
31 
32 #include <sys/param.h>
33 #include <sys/bus.h>
34 #include <sys/conf.h>
35 #include <sys/cpuset.h>
36 #include <sys/rtprio.h>
37 #include <sys/systm.h>
38 #include <sys/interrupt.h>
39 #include <sys/kernel.h>
40 #include <sys/kthread.h>
41 #include <sys/ktr.h>
42 #include <sys/limits.h>
43 #include <sys/lock.h>
44 #include <sys/malloc.h>
45 #include <sys/mutex.h>
46 #include <sys/proc.h>
47 #include <sys/random.h>
48 #include <sys/resourcevar.h>
49 #include <sys/sched.h>
50 #include <sys/smp.h>
51 #include <sys/sysctl.h>
52 #include <sys/unistd.h>
53 #include <sys/vmmeter.h>
54 #include <machine/atomic.h>
55 #include <machine/cpu.h>
56 #include <machine/md_var.h>
57 #include <machine/stdarg.h>
58 #ifdef DDB
59 #include <ddb/ddb.h>
60 #include <ddb/db_sym.h>
61 #endif
62 
63 /*
64  * Describe an interrupt thread.  There is one of these per interrupt event.
65  */
66 struct intr_thread {
67 	struct intr_event *it_event;
68 	struct thread *it_thread;	/* Kernel thread. */
69 	int	it_flags;		/* (j) IT_* flags. */
70 	int	it_need;		/* Needs service. */
71 };
72 
73 /* Interrupt thread flags kept in it_flags */
74 #define	IT_DEAD		0x000001	/* Thread is waiting to exit. */
75 
76 struct	intr_entropy {
77 	struct	thread *td;
78 	uintptr_t event;
79 };
80 
81 struct	intr_event *clk_intr_event;
82 struct	intr_event *tty_intr_event;
83 void	*vm_ih;
84 struct proc *intrproc;
85 
86 static MALLOC_DEFINE(M_ITHREAD, "ithread", "Interrupt Threads");
87 
88 static int intr_storm_threshold = 1000;
89 TUNABLE_INT("hw.intr_storm_threshold", &intr_storm_threshold);
90 SYSCTL_INT(_hw, OID_AUTO, intr_storm_threshold, CTLFLAG_RW,
91     &intr_storm_threshold, 0,
92     "Number of consecutive interrupts before storm protection is enabled");
93 static TAILQ_HEAD(, intr_event) event_list =
94     TAILQ_HEAD_INITIALIZER(event_list);
95 static struct mtx event_lock;
96 MTX_SYSINIT(intr_event_list, &event_lock, "intr event list", MTX_DEF);
97 
98 static void	intr_event_update(struct intr_event *ie);
99 #ifdef INTR_FILTER
100 static int	intr_event_schedule_thread(struct intr_event *ie,
101 		    struct intr_thread *ithd);
102 static int	intr_filter_loop(struct intr_event *ie,
103 		    struct trapframe *frame, struct intr_thread **ithd);
104 static struct intr_thread *ithread_create(const char *name,
105 			      struct intr_handler *ih);
106 #else
107 static int	intr_event_schedule_thread(struct intr_event *ie);
108 static struct intr_thread *ithread_create(const char *name);
109 #endif
110 static void	ithread_destroy(struct intr_thread *ithread);
111 static void	ithread_execute_handlers(struct proc *p,
112 		    struct intr_event *ie);
113 #ifdef INTR_FILTER
114 static void	priv_ithread_execute_handler(struct proc *p,
115 		    struct intr_handler *ih);
116 #endif
117 static void	ithread_loop(void *);
118 static void	ithread_update(struct intr_thread *ithd);
119 static void	start_softintr(void *);
120 
121 /* Map an interrupt type to an ithread priority. */
122 u_char
123 intr_priority(enum intr_type flags)
124 {
125 	u_char pri;
126 
127 	flags &= (INTR_TYPE_TTY | INTR_TYPE_BIO | INTR_TYPE_NET |
128 	    INTR_TYPE_CAM | INTR_TYPE_MISC | INTR_TYPE_CLK | INTR_TYPE_AV);
129 	switch (flags) {
130 	case INTR_TYPE_TTY:
131 		pri = PI_TTYLOW;
132 		break;
133 	case INTR_TYPE_BIO:
134 		/*
135 		 * XXX We need to refine this.  BSD/OS distinguishes
136 		 * between tape and disk priorities.
137 		 */
138 		pri = PI_DISK;
139 		break;
140 	case INTR_TYPE_NET:
141 		pri = PI_NET;
142 		break;
143 	case INTR_TYPE_CAM:
144 		pri = PI_DISK;          /* XXX or PI_CAM? */
145 		break;
146 	case INTR_TYPE_AV:		/* Audio/video */
147 		pri = PI_AV;
148 		break;
149 	case INTR_TYPE_CLK:
150 		pri = PI_REALTIME;
151 		break;
152 	case INTR_TYPE_MISC:
153 		pri = PI_DULL;          /* don't care */
154 		break;
155 	default:
156 		/* We didn't specify an interrupt level. */
157 		panic("intr_priority: no interrupt type in flags");
158 	}
159 
160 	return pri;
161 }
162 
163 /*
164  * Update an ithread based on the associated intr_event.
165  */
166 static void
167 ithread_update(struct intr_thread *ithd)
168 {
169 	struct intr_event *ie;
170 	struct thread *td;
171 	u_char pri;
172 
173 	ie = ithd->it_event;
174 	td = ithd->it_thread;
175 
176 	/* Determine the overall priority of this event. */
177 	if (TAILQ_EMPTY(&ie->ie_handlers))
178 		pri = PRI_MAX_ITHD;
179 	else
180 		pri = TAILQ_FIRST(&ie->ie_handlers)->ih_pri;
181 
182 	/* Update name and priority. */
183 	strlcpy(td->td_name, ie->ie_fullname, sizeof(td->td_name));
184 	thread_lock(td);
185 	sched_prio(td, pri);
186 	thread_unlock(td);
187 }
188 
189 /*
190  * Regenerate the full name of an interrupt event and update its priority.
191  */
192 static void
193 intr_event_update(struct intr_event *ie)
194 {
195 	struct intr_handler *ih;
196 	char *last;
197 	int missed, space;
198 
199 	/* Start off with no entropy and just the name of the event. */
200 	mtx_assert(&ie->ie_lock, MA_OWNED);
201 	strlcpy(ie->ie_fullname, ie->ie_name, sizeof(ie->ie_fullname));
202 	ie->ie_flags &= ~IE_ENTROPY;
203 	missed = 0;
204 	space = 1;
205 
206 	/* Run through all the handlers updating values. */
207 	TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) {
208 		if (strlen(ie->ie_fullname) + strlen(ih->ih_name) + 1 <
209 		    sizeof(ie->ie_fullname)) {
210 			strcat(ie->ie_fullname, " ");
211 			strcat(ie->ie_fullname, ih->ih_name);
212 			space = 0;
213 		} else
214 			missed++;
215 		if (ih->ih_flags & IH_ENTROPY)
216 			ie->ie_flags |= IE_ENTROPY;
217 	}
218 
219 	/*
220 	 * If the handler names were too long, add +'s to indicate missing
221 	 * names. If we run out of room and still have +'s to add, change
222 	 * the last character from a + to a *.
223 	 */
224 	last = &ie->ie_fullname[sizeof(ie->ie_fullname) - 2];
225 	while (missed-- > 0) {
226 		if (strlen(ie->ie_fullname) + 1 == sizeof(ie->ie_fullname)) {
227 			if (*last == '+') {
228 				*last = '*';
229 				break;
230 			} else
231 				*last = '+';
232 		} else if (space) {
233 			strcat(ie->ie_fullname, " +");
234 			space = 0;
235 		} else
236 			strcat(ie->ie_fullname, "+");
237 	}
238 
239 	/*
240 	 * If this event has an ithread, update it's priority and
241 	 * name.
242 	 */
243 	if (ie->ie_thread != NULL)
244 		ithread_update(ie->ie_thread);
245 	CTR2(KTR_INTR, "%s: updated %s", __func__, ie->ie_fullname);
246 }
247 
248 int
249 intr_event_create(struct intr_event **event, void *source,int flags, int irq,
250     void (*pre_ithread)(void *), void (*post_ithread)(void *),
251     void (*post_filter)(void *), int (*assign_cpu)(void *, u_char),
252     const char *fmt, ...)
253 {
254 	struct intr_event *ie;
255 	va_list ap;
256 
257 	/* The only valid flag during creation is IE_SOFT. */
258 	if ((flags & ~IE_SOFT) != 0)
259 		return (EINVAL);
260 	ie = malloc(sizeof(struct intr_event), M_ITHREAD, M_WAITOK | M_ZERO);
261 	ie->ie_source = source;
262 	ie->ie_pre_ithread = pre_ithread;
263 	ie->ie_post_ithread = post_ithread;
264 	ie->ie_post_filter = post_filter;
265 	ie->ie_assign_cpu = assign_cpu;
266 	ie->ie_flags = flags;
267 	ie->ie_irq = irq;
268 	ie->ie_cpu = NOCPU;
269 	TAILQ_INIT(&ie->ie_handlers);
270 	mtx_init(&ie->ie_lock, "intr event", NULL, MTX_DEF);
271 
272 	va_start(ap, fmt);
273 	vsnprintf(ie->ie_name, sizeof(ie->ie_name), fmt, ap);
274 	va_end(ap);
275 	strlcpy(ie->ie_fullname, ie->ie_name, sizeof(ie->ie_fullname));
276 	mtx_lock(&event_lock);
277 	TAILQ_INSERT_TAIL(&event_list, ie, ie_list);
278 	mtx_unlock(&event_lock);
279 	if (event != NULL)
280 		*event = ie;
281 	CTR2(KTR_INTR, "%s: created %s", __func__, ie->ie_name);
282 	return (0);
283 }
284 
285 /*
286  * Bind an interrupt event to the specified CPU.  Note that not all
287  * platforms support binding an interrupt to a CPU.  For those
288  * platforms this request will fail.  For supported platforms, any
289  * associated ithreads as well as the primary interrupt context will
290  * be bound to the specificed CPU.  Using a cpu id of NOCPU unbinds
291  * the interrupt event.
292  */
293 int
294 intr_event_bind(struct intr_event *ie, u_char cpu)
295 {
296 	cpuset_t mask;
297 	lwpid_t id;
298 	int error;
299 
300 	/* Need a CPU to bind to. */
301 	if (cpu != NOCPU && CPU_ABSENT(cpu))
302 		return (EINVAL);
303 
304 	if (ie->ie_assign_cpu == NULL)
305 		return (EOPNOTSUPP);
306 	/*
307 	 * If we have any ithreads try to set their mask first since this
308 	 * can fail.
309 	 */
310 	mtx_lock(&ie->ie_lock);
311 	if (ie->ie_thread != NULL) {
312 		CPU_ZERO(&mask);
313 		if (cpu == NOCPU)
314 			CPU_COPY(cpuset_root, &mask);
315 		else
316 			CPU_SET(cpu, &mask);
317 		id = ie->ie_thread->it_thread->td_tid;
318 		mtx_unlock(&ie->ie_lock);
319 		error = cpuset_setthread(id, &mask);
320 		if (error)
321 			return (error);
322 	} else
323 		mtx_unlock(&ie->ie_lock);
324 	error = ie->ie_assign_cpu(ie->ie_source, cpu);
325 	if (error)
326 		return (error);
327 	mtx_lock(&ie->ie_lock);
328 	ie->ie_cpu = cpu;
329 	mtx_unlock(&ie->ie_lock);
330 
331 	return (error);
332 }
333 
334 static struct intr_event *
335 intr_lookup(int irq)
336 {
337 	struct intr_event *ie;
338 
339 	mtx_lock(&event_lock);
340 	TAILQ_FOREACH(ie, &event_list, ie_list)
341 		if (ie->ie_irq == irq &&
342 		    (ie->ie_flags & IE_SOFT) == 0 &&
343 		    TAILQ_FIRST(&ie->ie_handlers) != NULL)
344 			break;
345 	mtx_unlock(&event_lock);
346 	return (ie);
347 }
348 
349 int
350 intr_setaffinity(int irq, void *m)
351 {
352 	struct intr_event *ie;
353 	cpuset_t *mask;
354 	u_char cpu;
355 	int error;
356 	int n;
357 
358 	mask = m;
359 	error = 0;
360 	cpu = NOCPU;
361 	/*
362 	 * If we're setting all cpus we can unbind.  Otherwise make sure
363 	 * only one cpu is in the set.
364 	 */
365 	if (CPU_CMP(cpuset_root, mask)) {
366 		for (n = 0; n < CPU_SETSIZE; n++) {
367 			if (!CPU_ISSET(n, mask))
368 				continue;
369 			if (cpu != NOCPU)
370 				return (EINVAL);
371 			cpu = (u_char)n;
372 		}
373 	}
374 	ie = intr_lookup(irq);
375 	if (ie == NULL)
376 		return (ESRCH);
377 	intr_event_bind(ie, cpu);
378 	return (error);
379 }
380 
381 int
382 intr_getaffinity(int irq, void *m)
383 {
384 	struct intr_event *ie;
385 	cpuset_t *mask;
386 
387 	mask = m;
388 	ie = intr_lookup(irq);
389 	if (ie == NULL)
390 		return (ESRCH);
391 	CPU_ZERO(mask);
392 	mtx_lock(&ie->ie_lock);
393 	if (ie->ie_cpu == NOCPU)
394 		CPU_COPY(cpuset_root, mask);
395 	else
396 		CPU_SET(ie->ie_cpu, mask);
397 	mtx_unlock(&ie->ie_lock);
398 	return (0);
399 }
400 
401 int
402 intr_event_destroy(struct intr_event *ie)
403 {
404 
405 	mtx_lock(&event_lock);
406 	mtx_lock(&ie->ie_lock);
407 	if (!TAILQ_EMPTY(&ie->ie_handlers)) {
408 		mtx_unlock(&ie->ie_lock);
409 		mtx_unlock(&event_lock);
410 		return (EBUSY);
411 	}
412 	TAILQ_REMOVE(&event_list, ie, ie_list);
413 #ifndef notyet
414 	if (ie->ie_thread != NULL) {
415 		ithread_destroy(ie->ie_thread);
416 		ie->ie_thread = NULL;
417 	}
418 #endif
419 	mtx_unlock(&ie->ie_lock);
420 	mtx_unlock(&event_lock);
421 	mtx_destroy(&ie->ie_lock);
422 	free(ie, M_ITHREAD);
423 	return (0);
424 }
425 
426 #ifndef INTR_FILTER
427 static struct intr_thread *
428 ithread_create(const char *name)
429 {
430 	struct intr_thread *ithd;
431 	struct thread *td;
432 	int error;
433 
434 	ithd = malloc(sizeof(struct intr_thread), M_ITHREAD, M_WAITOK | M_ZERO);
435 
436 	error = kproc_kthread_add(ithread_loop, ithd, &intrproc,
437 		    &td, RFSTOPPED | RFHIGHPID,
438 	    	    0, "intr", "%s", name);
439 	if (error)
440 		panic("kproc_create() failed with %d", error);
441 	thread_lock(td);
442 	sched_class(td, PRI_ITHD);
443 	TD_SET_IWAIT(td);
444 	thread_unlock(td);
445 	td->td_pflags |= TDP_ITHREAD;
446 	ithd->it_thread = td;
447 	CTR2(KTR_INTR, "%s: created %s", __func__, name);
448 	return (ithd);
449 }
450 #else
451 static struct intr_thread *
452 ithread_create(const char *name, struct intr_handler *ih)
453 {
454 	struct intr_thread *ithd;
455 	struct thread *td;
456 	int error;
457 
458 	ithd = malloc(sizeof(struct intr_thread), M_ITHREAD, M_WAITOK | M_ZERO);
459 
460 	error = kproc_kthread_add(ithread_loop, ih, &intrproc,
461 		    &td, RFSTOPPED | RFHIGHPID,
462 	    	    0, "intr", "%s", name);
463 	if (error)
464 		panic("kproc_create() failed with %d", error);
465 	thread_lock(td);
466 	sched_class(td, PRI_ITHD);
467 	TD_SET_IWAIT(td);
468 	thread_unlock(td);
469 	td->td_pflags |= TDP_ITHREAD;
470 	ithd->it_thread = td;
471 	CTR2(KTR_INTR, "%s: created %s", __func__, name);
472 	return (ithd);
473 }
474 #endif
475 
476 static void
477 ithread_destroy(struct intr_thread *ithread)
478 {
479 	struct thread *td;
480 
481 	CTR2(KTR_INTR, "%s: killing %s", __func__, ithread->it_event->ie_name);
482 	td = ithread->it_thread;
483 	thread_lock(td);
484 	ithread->it_flags |= IT_DEAD;
485 	if (TD_AWAITING_INTR(td)) {
486 		TD_CLR_IWAIT(td);
487 		sched_add(td, SRQ_INTR);
488 	}
489 	thread_unlock(td);
490 }
491 
492 #ifndef INTR_FILTER
493 int
494 intr_event_add_handler(struct intr_event *ie, const char *name,
495     driver_filter_t filter, driver_intr_t handler, void *arg, u_char pri,
496     enum intr_type flags, void **cookiep)
497 {
498 	struct intr_handler *ih, *temp_ih;
499 	struct intr_thread *it;
500 
501 	if (ie == NULL || name == NULL || (handler == NULL && filter == NULL))
502 		return (EINVAL);
503 
504 	/* Allocate and populate an interrupt handler structure. */
505 	ih = malloc(sizeof(struct intr_handler), M_ITHREAD, M_WAITOK | M_ZERO);
506 	ih->ih_filter = filter;
507 	ih->ih_handler = handler;
508 	ih->ih_argument = arg;
509 	ih->ih_name = name;
510 	ih->ih_event = ie;
511 	ih->ih_pri = pri;
512 	if (flags & INTR_EXCL)
513 		ih->ih_flags = IH_EXCLUSIVE;
514 	if (flags & INTR_MPSAFE)
515 		ih->ih_flags |= IH_MPSAFE;
516 	if (flags & INTR_ENTROPY)
517 		ih->ih_flags |= IH_ENTROPY;
518 
519 	/* We can only have one exclusive handler in a event. */
520 	mtx_lock(&ie->ie_lock);
521 	if (!TAILQ_EMPTY(&ie->ie_handlers)) {
522 		if ((flags & INTR_EXCL) ||
523 		    (TAILQ_FIRST(&ie->ie_handlers)->ih_flags & IH_EXCLUSIVE)) {
524 			mtx_unlock(&ie->ie_lock);
525 			free(ih, M_ITHREAD);
526 			return (EINVAL);
527 		}
528 	}
529 
530 	/* Add the new handler to the event in priority order. */
531 	TAILQ_FOREACH(temp_ih, &ie->ie_handlers, ih_next) {
532 		if (temp_ih->ih_pri > ih->ih_pri)
533 			break;
534 	}
535 	if (temp_ih == NULL)
536 		TAILQ_INSERT_TAIL(&ie->ie_handlers, ih, ih_next);
537 	else
538 		TAILQ_INSERT_BEFORE(temp_ih, ih, ih_next);
539 	intr_event_update(ie);
540 
541 	/* Create a thread if we need one. */
542 	while (ie->ie_thread == NULL && handler != NULL) {
543 		if (ie->ie_flags & IE_ADDING_THREAD)
544 			msleep(ie, &ie->ie_lock, 0, "ithread", 0);
545 		else {
546 			ie->ie_flags |= IE_ADDING_THREAD;
547 			mtx_unlock(&ie->ie_lock);
548 			it = ithread_create("intr: newborn");
549 			mtx_lock(&ie->ie_lock);
550 			ie->ie_flags &= ~IE_ADDING_THREAD;
551 			ie->ie_thread = it;
552 			it->it_event = ie;
553 			ithread_update(it);
554 			wakeup(ie);
555 		}
556 	}
557 	CTR3(KTR_INTR, "%s: added %s to %s", __func__, ih->ih_name,
558 	    ie->ie_name);
559 	mtx_unlock(&ie->ie_lock);
560 
561 	if (cookiep != NULL)
562 		*cookiep = ih;
563 	return (0);
564 }
565 #else
566 int
567 intr_event_add_handler(struct intr_event *ie, const char *name,
568     driver_filter_t filter, driver_intr_t handler, void *arg, u_char pri,
569     enum intr_type flags, void **cookiep)
570 {
571 	struct intr_handler *ih, *temp_ih;
572 	struct intr_thread *it;
573 
574 	if (ie == NULL || name == NULL || (handler == NULL && filter == NULL))
575 		return (EINVAL);
576 
577 	/* Allocate and populate an interrupt handler structure. */
578 	ih = malloc(sizeof(struct intr_handler), M_ITHREAD, M_WAITOK | M_ZERO);
579 	ih->ih_filter = filter;
580 	ih->ih_handler = handler;
581 	ih->ih_argument = arg;
582 	ih->ih_name = name;
583 	ih->ih_event = ie;
584 	ih->ih_pri = pri;
585 	if (flags & INTR_EXCL)
586 		ih->ih_flags = IH_EXCLUSIVE;
587 	if (flags & INTR_MPSAFE)
588 		ih->ih_flags |= IH_MPSAFE;
589 	if (flags & INTR_ENTROPY)
590 		ih->ih_flags |= IH_ENTROPY;
591 
592 	/* We can only have one exclusive handler in a event. */
593 	mtx_lock(&ie->ie_lock);
594 	if (!TAILQ_EMPTY(&ie->ie_handlers)) {
595 		if ((flags & INTR_EXCL) ||
596 		    (TAILQ_FIRST(&ie->ie_handlers)->ih_flags & IH_EXCLUSIVE)) {
597 			mtx_unlock(&ie->ie_lock);
598 			free(ih, M_ITHREAD);
599 			return (EINVAL);
600 		}
601 	}
602 
603 	/* Add the new handler to the event in priority order. */
604 	TAILQ_FOREACH(temp_ih, &ie->ie_handlers, ih_next) {
605 		if (temp_ih->ih_pri > ih->ih_pri)
606 			break;
607 	}
608 	if (temp_ih == NULL)
609 		TAILQ_INSERT_TAIL(&ie->ie_handlers, ih, ih_next);
610 	else
611 		TAILQ_INSERT_BEFORE(temp_ih, ih, ih_next);
612 	intr_event_update(ie);
613 
614 	/* For filtered handlers, create a private ithread to run on. */
615 	if (filter != NULL && handler != NULL) {
616 		mtx_unlock(&ie->ie_lock);
617 		it = ithread_create("intr: newborn", ih);
618 		mtx_lock(&ie->ie_lock);
619 		it->it_event = ie;
620 		ih->ih_thread = it;
621 		ithread_update(it); // XXX - do we really need this?!?!?
622 	} else { /* Create the global per-event thread if we need one. */
623 		while (ie->ie_thread == NULL && handler != NULL) {
624 			if (ie->ie_flags & IE_ADDING_THREAD)
625 				msleep(ie, &ie->ie_lock, 0, "ithread", 0);
626 			else {
627 				ie->ie_flags |= IE_ADDING_THREAD;
628 				mtx_unlock(&ie->ie_lock);
629 				it = ithread_create("intr: newborn", ih);
630 				mtx_lock(&ie->ie_lock);
631 				ie->ie_flags &= ~IE_ADDING_THREAD;
632 				ie->ie_thread = it;
633 				it->it_event = ie;
634 				ithread_update(it);
635 				wakeup(ie);
636 			}
637 		}
638 	}
639 	CTR3(KTR_INTR, "%s: added %s to %s", __func__, ih->ih_name,
640 	    ie->ie_name);
641 	mtx_unlock(&ie->ie_lock);
642 
643 	if (cookiep != NULL)
644 		*cookiep = ih;
645 	return (0);
646 }
647 #endif
648 
649 /*
650  * Return the ie_source field from the intr_event an intr_handler is
651  * associated with.
652  */
653 void *
654 intr_handler_source(void *cookie)
655 {
656 	struct intr_handler *ih;
657 	struct intr_event *ie;
658 
659 	ih = (struct intr_handler *)cookie;
660 	if (ih == NULL)
661 		return (NULL);
662 	ie = ih->ih_event;
663 	KASSERT(ie != NULL,
664 	    ("interrupt handler \"%s\" has a NULL interrupt event",
665 	    ih->ih_name));
666 	return (ie->ie_source);
667 }
668 
669 #ifndef INTR_FILTER
670 int
671 intr_event_remove_handler(void *cookie)
672 {
673 	struct intr_handler *handler = (struct intr_handler *)cookie;
674 	struct intr_event *ie;
675 #ifdef INVARIANTS
676 	struct intr_handler *ih;
677 #endif
678 #ifdef notyet
679 	int dead;
680 #endif
681 
682 	if (handler == NULL)
683 		return (EINVAL);
684 	ie = handler->ih_event;
685 	KASSERT(ie != NULL,
686 	    ("interrupt handler \"%s\" has a NULL interrupt event",
687 	    handler->ih_name));
688 	mtx_lock(&ie->ie_lock);
689 	CTR3(KTR_INTR, "%s: removing %s from %s", __func__, handler->ih_name,
690 	    ie->ie_name);
691 #ifdef INVARIANTS
692 	TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next)
693 		if (ih == handler)
694 			goto ok;
695 	mtx_unlock(&ie->ie_lock);
696 	panic("interrupt handler \"%s\" not found in interrupt event \"%s\"",
697 	    ih->ih_name, ie->ie_name);
698 ok:
699 #endif
700 	/*
701 	 * If there is no ithread, then just remove the handler and return.
702 	 * XXX: Note that an INTR_FAST handler might be running on another
703 	 * CPU!
704 	 */
705 	if (ie->ie_thread == NULL) {
706 		TAILQ_REMOVE(&ie->ie_handlers, handler, ih_next);
707 		mtx_unlock(&ie->ie_lock);
708 		free(handler, M_ITHREAD);
709 		return (0);
710 	}
711 
712 	/*
713 	 * If the interrupt thread is already running, then just mark this
714 	 * handler as being dead and let the ithread do the actual removal.
715 	 *
716 	 * During a cold boot while cold is set, msleep() does not sleep,
717 	 * so we have to remove the handler here rather than letting the
718 	 * thread do it.
719 	 */
720 	thread_lock(ie->ie_thread->it_thread);
721 	if (!TD_AWAITING_INTR(ie->ie_thread->it_thread) && !cold) {
722 		handler->ih_flags |= IH_DEAD;
723 
724 		/*
725 		 * Ensure that the thread will process the handler list
726 		 * again and remove this handler if it has already passed
727 		 * it on the list.
728 		 */
729 		ie->ie_thread->it_need = 1;
730 	} else
731 		TAILQ_REMOVE(&ie->ie_handlers, handler, ih_next);
732 	thread_unlock(ie->ie_thread->it_thread);
733 	while (handler->ih_flags & IH_DEAD)
734 		msleep(handler, &ie->ie_lock, 0, "iev_rmh", 0);
735 	intr_event_update(ie);
736 #ifdef notyet
737 	/*
738 	 * XXX: This could be bad in the case of ppbus(8).  Also, I think
739 	 * this could lead to races of stale data when servicing an
740 	 * interrupt.
741 	 */
742 	dead = 1;
743 	TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) {
744 		if (!(ih->ih_flags & IH_FAST)) {
745 			dead = 0;
746 			break;
747 		}
748 	}
749 	if (dead) {
750 		ithread_destroy(ie->ie_thread);
751 		ie->ie_thread = NULL;
752 	}
753 #endif
754 	mtx_unlock(&ie->ie_lock);
755 	free(handler, M_ITHREAD);
756 	return (0);
757 }
758 
759 static int
760 intr_event_schedule_thread(struct intr_event *ie)
761 {
762 	struct intr_entropy entropy;
763 	struct intr_thread *it;
764 	struct thread *td;
765 	struct thread *ctd;
766 	struct proc *p;
767 
768 	/*
769 	 * If no ithread or no handlers, then we have a stray interrupt.
770 	 */
771 	if (ie == NULL || TAILQ_EMPTY(&ie->ie_handlers) ||
772 	    ie->ie_thread == NULL)
773 		return (EINVAL);
774 
775 	ctd = curthread;
776 	it = ie->ie_thread;
777 	td = it->it_thread;
778 	p = td->td_proc;
779 
780 	/*
781 	 * If any of the handlers for this ithread claim to be good
782 	 * sources of entropy, then gather some.
783 	 */
784 	if (harvest.interrupt && ie->ie_flags & IE_ENTROPY) {
785 		CTR3(KTR_INTR, "%s: pid %d (%s) gathering entropy", __func__,
786 		    p->p_pid, td->td_name);
787 		entropy.event = (uintptr_t)ie;
788 		entropy.td = ctd;
789 		random_harvest(&entropy, sizeof(entropy), 2, 0,
790 		    RANDOM_INTERRUPT);
791 	}
792 
793 	KASSERT(p != NULL, ("ithread %s has no process", ie->ie_name));
794 
795 	/*
796 	 * Set it_need to tell the thread to keep running if it is already
797 	 * running.  Then, lock the thread and see if we actually need to
798 	 * put it on the runqueue.
799 	 */
800 	it->it_need = 1;
801 	thread_lock(td);
802 	if (TD_AWAITING_INTR(td)) {
803 		CTR3(KTR_INTR, "%s: schedule pid %d (%s)", __func__, p->p_pid,
804 		    td->td_name);
805 		TD_CLR_IWAIT(td);
806 		sched_add(td, SRQ_INTR);
807 	} else {
808 		CTR5(KTR_INTR, "%s: pid %d (%s): it_need %d, state %d",
809 		    __func__, p->p_pid, td->td_name, it->it_need, td->td_state);
810 	}
811 	thread_unlock(td);
812 
813 	return (0);
814 }
815 #else
816 int
817 intr_event_remove_handler(void *cookie)
818 {
819 	struct intr_handler *handler = (struct intr_handler *)cookie;
820 	struct intr_event *ie;
821 	struct intr_thread *it;
822 #ifdef INVARIANTS
823 	struct intr_handler *ih;
824 #endif
825 #ifdef notyet
826 	int dead;
827 #endif
828 
829 	if (handler == NULL)
830 		return (EINVAL);
831 	ie = handler->ih_event;
832 	KASSERT(ie != NULL,
833 	    ("interrupt handler \"%s\" has a NULL interrupt event",
834 	    handler->ih_name));
835 	mtx_lock(&ie->ie_lock);
836 	CTR3(KTR_INTR, "%s: removing %s from %s", __func__, handler->ih_name,
837 	    ie->ie_name);
838 #ifdef INVARIANTS
839 	TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next)
840 		if (ih == handler)
841 			goto ok;
842 	mtx_unlock(&ie->ie_lock);
843 	panic("interrupt handler \"%s\" not found in interrupt event \"%s\"",
844 	    ih->ih_name, ie->ie_name);
845 ok:
846 #endif
847 	/*
848 	 * If there are no ithreads (per event and per handler), then
849 	 * just remove the handler and return.
850 	 * XXX: Note that an INTR_FAST handler might be running on another CPU!
851 	 */
852 	if (ie->ie_thread == NULL && handler->ih_thread == NULL) {
853 		TAILQ_REMOVE(&ie->ie_handlers, handler, ih_next);
854 		mtx_unlock(&ie->ie_lock);
855 		free(handler, M_ITHREAD);
856 		return (0);
857 	}
858 
859 	/* Private or global ithread? */
860 	it = (handler->ih_thread) ? handler->ih_thread : ie->ie_thread;
861 	/*
862 	 * If the interrupt thread is already running, then just mark this
863 	 * handler as being dead and let the ithread do the actual removal.
864 	 *
865 	 * During a cold boot while cold is set, msleep() does not sleep,
866 	 * so we have to remove the handler here rather than letting the
867 	 * thread do it.
868 	 */
869 	thread_lock(it->it_thread);
870 	if (!TD_AWAITING_INTR(it->it_thread) && !cold) {
871 		handler->ih_flags |= IH_DEAD;
872 
873 		/*
874 		 * Ensure that the thread will process the handler list
875 		 * again and remove this handler if it has already passed
876 		 * it on the list.
877 		 */
878 		it->it_need = 1;
879 	} else
880 		TAILQ_REMOVE(&ie->ie_handlers, handler, ih_next);
881 	thread_unlock(it->it_thread);
882 	while (handler->ih_flags & IH_DEAD)
883 		msleep(handler, &ie->ie_lock, 0, "iev_rmh", 0);
884 	/*
885 	 * At this point, the handler has been disconnected from the event,
886 	 * so we can kill the private ithread if any.
887 	 */
888 	if (handler->ih_thread) {
889 		ithread_destroy(handler->ih_thread);
890 		handler->ih_thread = NULL;
891 	}
892 	intr_event_update(ie);
893 #ifdef notyet
894 	/*
895 	 * XXX: This could be bad in the case of ppbus(8).  Also, I think
896 	 * this could lead to races of stale data when servicing an
897 	 * interrupt.
898 	 */
899 	dead = 1;
900 	TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) {
901 		if (handler != NULL) {
902 			dead = 0;
903 			break;
904 		}
905 	}
906 	if (dead) {
907 		ithread_destroy(ie->ie_thread);
908 		ie->ie_thread = NULL;
909 	}
910 #endif
911 	mtx_unlock(&ie->ie_lock);
912 	free(handler, M_ITHREAD);
913 	return (0);
914 }
915 
916 static int
917 intr_event_schedule_thread(struct intr_event *ie, struct intr_thread *it)
918 {
919 	struct intr_entropy entropy;
920 	struct thread *td;
921 	struct thread *ctd;
922 	struct proc *p;
923 
924 	/*
925 	 * If no ithread or no handlers, then we have a stray interrupt.
926 	 */
927 	if (ie == NULL || TAILQ_EMPTY(&ie->ie_handlers) || it == NULL)
928 		return (EINVAL);
929 
930 	ctd = curthread;
931 	td = it->it_thread;
932 	p = td->td_proc;
933 
934 	/*
935 	 * If any of the handlers for this ithread claim to be good
936 	 * sources of entropy, then gather some.
937 	 */
938 	if (harvest.interrupt && ie->ie_flags & IE_ENTROPY) {
939 		CTR3(KTR_INTR, "%s: pid %d (%s) gathering entropy", __func__,
940 		    p->p_pid, td->td_name);
941 		entropy.event = (uintptr_t)ie;
942 		entropy.td = ctd;
943 		random_harvest(&entropy, sizeof(entropy), 2, 0,
944 		    RANDOM_INTERRUPT);
945 	}
946 
947 	KASSERT(p != NULL, ("ithread %s has no process", ie->ie_name));
948 
949 	/*
950 	 * Set it_need to tell the thread to keep running if it is already
951 	 * running.  Then, lock the thread and see if we actually need to
952 	 * put it on the runqueue.
953 	 */
954 	it->it_need = 1;
955 	thread_lock(td);
956 	if (TD_AWAITING_INTR(td)) {
957 		CTR3(KTR_INTR, "%s: schedule pid %d (%s)", __func__, p->p_pid,
958 		    td->td_name);
959 		TD_CLR_IWAIT(td);
960 		sched_add(td, SRQ_INTR);
961 	} else {
962 		CTR5(KTR_INTR, "%s: pid %d (%s): it_need %d, state %d",
963 		    __func__, p->p_pid, td->td_name, it->it_need, td->td_state);
964 	}
965 	thread_unlock(td);
966 
967 	return (0);
968 }
969 #endif
970 
971 /*
972  * Add a software interrupt handler to a specified event.  If a given event
973  * is not specified, then a new event is created.
974  */
975 int
976 swi_add(struct intr_event **eventp, const char *name, driver_intr_t handler,
977 	    void *arg, int pri, enum intr_type flags, void **cookiep)
978 {
979 	struct intr_event *ie;
980 	int error;
981 
982 	if (flags & INTR_ENTROPY)
983 		return (EINVAL);
984 
985 	ie = (eventp != NULL) ? *eventp : NULL;
986 
987 	if (ie != NULL) {
988 		if (!(ie->ie_flags & IE_SOFT))
989 			return (EINVAL);
990 	} else {
991 		error = intr_event_create(&ie, NULL, IE_SOFT, 0,
992 		    NULL, NULL, NULL, NULL, "swi%d:", pri);
993 		if (error)
994 			return (error);
995 		if (eventp != NULL)
996 			*eventp = ie;
997 	}
998 	error = intr_event_add_handler(ie, name, NULL, handler, arg,
999 	    (pri * RQ_PPQ) + PI_SOFT, flags, cookiep);
1000 	if (error)
1001 		return (error);
1002 	if (pri == SWI_CLOCK) {
1003 		struct proc *p;
1004 		p = ie->ie_thread->it_thread->td_proc;
1005 		PROC_LOCK(p);
1006 		p->p_flag |= P_NOLOAD;
1007 		PROC_UNLOCK(p);
1008 	}
1009 	return (0);
1010 }
1011 
1012 /*
1013  * Schedule a software interrupt thread.
1014  */
1015 void
1016 swi_sched(void *cookie, int flags)
1017 {
1018 	struct intr_handler *ih = (struct intr_handler *)cookie;
1019 	struct intr_event *ie = ih->ih_event;
1020 	int error;
1021 
1022 	CTR3(KTR_INTR, "swi_sched: %s %s need=%d", ie->ie_name, ih->ih_name,
1023 	    ih->ih_need);
1024 
1025 	/*
1026 	 * Set ih_need for this handler so that if the ithread is already
1027 	 * running it will execute this handler on the next pass.  Otherwise,
1028 	 * it will execute it the next time it runs.
1029 	 */
1030 	atomic_store_rel_int(&ih->ih_need, 1);
1031 
1032 	if (!(flags & SWI_DELAY)) {
1033 		PCPU_INC(cnt.v_soft);
1034 #ifdef INTR_FILTER
1035 		error = intr_event_schedule_thread(ie, ie->ie_thread);
1036 #else
1037 		error = intr_event_schedule_thread(ie);
1038 #endif
1039 		KASSERT(error == 0, ("stray software interrupt"));
1040 	}
1041 }
1042 
1043 /*
1044  * Remove a software interrupt handler.  Currently this code does not
1045  * remove the associated interrupt event if it becomes empty.  Calling code
1046  * may do so manually via intr_event_destroy(), but that's not really
1047  * an optimal interface.
1048  */
1049 int
1050 swi_remove(void *cookie)
1051 {
1052 
1053 	return (intr_event_remove_handler(cookie));
1054 }
1055 
1056 #ifdef INTR_FILTER
1057 static void
1058 priv_ithread_execute_handler(struct proc *p, struct intr_handler *ih)
1059 {
1060 	struct intr_event *ie;
1061 
1062 	ie = ih->ih_event;
1063 	/*
1064 	 * If this handler is marked for death, remove it from
1065 	 * the list of handlers and wake up the sleeper.
1066 	 */
1067 	if (ih->ih_flags & IH_DEAD) {
1068 		mtx_lock(&ie->ie_lock);
1069 		TAILQ_REMOVE(&ie->ie_handlers, ih, ih_next);
1070 		ih->ih_flags &= ~IH_DEAD;
1071 		wakeup(ih);
1072 		mtx_unlock(&ie->ie_lock);
1073 		return;
1074 	}
1075 
1076 	/* Execute this handler. */
1077 	CTR6(KTR_INTR, "%s: pid %d exec %p(%p) for %s flg=%x",
1078 	     __func__, p->p_pid, (void *)ih->ih_handler, ih->ih_argument,
1079 	     ih->ih_name, ih->ih_flags);
1080 
1081 	if (!(ih->ih_flags & IH_MPSAFE))
1082 		mtx_lock(&Giant);
1083 	ih->ih_handler(ih->ih_argument);
1084 	if (!(ih->ih_flags & IH_MPSAFE))
1085 		mtx_unlock(&Giant);
1086 }
1087 #endif
1088 
1089 static void
1090 ithread_execute_handlers(struct proc *p, struct intr_event *ie)
1091 {
1092 	struct intr_handler *ih, *ihn;
1093 
1094 	/* Interrupt handlers should not sleep. */
1095 	if (!(ie->ie_flags & IE_SOFT))
1096 		THREAD_NO_SLEEPING();
1097 	TAILQ_FOREACH_SAFE(ih, &ie->ie_handlers, ih_next, ihn) {
1098 
1099 		/*
1100 		 * If this handler is marked for death, remove it from
1101 		 * the list of handlers and wake up the sleeper.
1102 		 */
1103 		if (ih->ih_flags & IH_DEAD) {
1104 			mtx_lock(&ie->ie_lock);
1105 			TAILQ_REMOVE(&ie->ie_handlers, ih, ih_next);
1106 			ih->ih_flags &= ~IH_DEAD;
1107 			wakeup(ih);
1108 			mtx_unlock(&ie->ie_lock);
1109 			continue;
1110 		}
1111 
1112 		/* Skip filter only handlers */
1113 		if (ih->ih_handler == NULL)
1114 			continue;
1115 
1116 		/*
1117 		 * For software interrupt threads, we only execute
1118 		 * handlers that have their need flag set.  Hardware
1119 		 * interrupt threads always invoke all of their handlers.
1120 		 */
1121 		if (ie->ie_flags & IE_SOFT) {
1122 			if (!ih->ih_need)
1123 				continue;
1124 			else
1125 				atomic_store_rel_int(&ih->ih_need, 0);
1126 		}
1127 
1128 		/* Execute this handler. */
1129 		CTR6(KTR_INTR, "%s: pid %d exec %p(%p) for %s flg=%x",
1130 		    __func__, p->p_pid, (void *)ih->ih_handler,
1131 		    ih->ih_argument, ih->ih_name, ih->ih_flags);
1132 
1133 		if (!(ih->ih_flags & IH_MPSAFE))
1134 			mtx_lock(&Giant);
1135 		ih->ih_handler(ih->ih_argument);
1136 		if (!(ih->ih_flags & IH_MPSAFE))
1137 			mtx_unlock(&Giant);
1138 	}
1139 	if (!(ie->ie_flags & IE_SOFT))
1140 		THREAD_SLEEPING_OK();
1141 
1142 	/*
1143 	 * Interrupt storm handling:
1144 	 *
1145 	 * If this interrupt source is currently storming, then throttle
1146 	 * it to only fire the handler once  per clock tick.
1147 	 *
1148 	 * If this interrupt source is not currently storming, but the
1149 	 * number of back to back interrupts exceeds the storm threshold,
1150 	 * then enter storming mode.
1151 	 */
1152 	if (intr_storm_threshold != 0 && ie->ie_count >= intr_storm_threshold &&
1153 	    !(ie->ie_flags & IE_SOFT)) {
1154 		/* Report the message only once every second. */
1155 		if (ppsratecheck(&ie->ie_warntm, &ie->ie_warncnt, 1)) {
1156 			printf(
1157 	"interrupt storm detected on \"%s\"; throttling interrupt source\n",
1158 			    ie->ie_name);
1159 		}
1160 		pause("istorm", 1);
1161 	} else
1162 		ie->ie_count++;
1163 
1164 	/*
1165 	 * Now that all the handlers have had a chance to run, reenable
1166 	 * the interrupt source.
1167 	 */
1168 	if (ie->ie_post_ithread != NULL)
1169 		ie->ie_post_ithread(ie->ie_source);
1170 }
1171 
1172 #ifndef INTR_FILTER
1173 /*
1174  * This is the main code for interrupt threads.
1175  */
1176 static void
1177 ithread_loop(void *arg)
1178 {
1179 	struct intr_thread *ithd;
1180 	struct intr_event *ie;
1181 	struct thread *td;
1182 	struct proc *p;
1183 
1184 	td = curthread;
1185 	p = td->td_proc;
1186 	ithd = (struct intr_thread *)arg;
1187 	KASSERT(ithd->it_thread == td,
1188 	    ("%s: ithread and proc linkage out of sync", __func__));
1189 	ie = ithd->it_event;
1190 	ie->ie_count = 0;
1191 
1192 	/*
1193 	 * As long as we have interrupts outstanding, go through the
1194 	 * list of handlers, giving each one a go at it.
1195 	 */
1196 	for (;;) {
1197 		/*
1198 		 * If we are an orphaned thread, then just die.
1199 		 */
1200 		if (ithd->it_flags & IT_DEAD) {
1201 			CTR3(KTR_INTR, "%s: pid %d (%s) exiting", __func__,
1202 			    p->p_pid, td->td_name);
1203 			free(ithd, M_ITHREAD);
1204 			kthread_exit();
1205 		}
1206 
1207 		/*
1208 		 * Service interrupts.  If another interrupt arrives while
1209 		 * we are running, it will set it_need to note that we
1210 		 * should make another pass.
1211 		 */
1212 		while (ithd->it_need) {
1213 			/*
1214 			 * This might need a full read and write barrier
1215 			 * to make sure that this write posts before any
1216 			 * of the memory or device accesses in the
1217 			 * handlers.
1218 			 */
1219 			atomic_store_rel_int(&ithd->it_need, 0);
1220 			ithread_execute_handlers(p, ie);
1221 		}
1222 		WITNESS_WARN(WARN_PANIC, NULL, "suspending ithread");
1223 		mtx_assert(&Giant, MA_NOTOWNED);
1224 
1225 		/*
1226 		 * Processed all our interrupts.  Now get the sched
1227 		 * lock.  This may take a while and it_need may get
1228 		 * set again, so we have to check it again.
1229 		 */
1230 		thread_lock(td);
1231 		if (!ithd->it_need && !(ithd->it_flags & IT_DEAD)) {
1232 			TD_SET_IWAIT(td);
1233 			ie->ie_count = 0;
1234 			mi_switch(SW_VOL | SWT_IWAIT, NULL);
1235 		}
1236 		thread_unlock(td);
1237 	}
1238 }
1239 
1240 /*
1241  * Main interrupt handling body.
1242  *
1243  * Input:
1244  * o ie:                        the event connected to this interrupt.
1245  * o frame:                     some archs (i.e. i386) pass a frame to some.
1246  *                              handlers as their main argument.
1247  * Return value:
1248  * o 0:                         everything ok.
1249  * o EINVAL:                    stray interrupt.
1250  */
1251 int
1252 intr_event_handle(struct intr_event *ie, struct trapframe *frame)
1253 {
1254 	struct intr_handler *ih;
1255 	struct thread *td;
1256 	int error, ret, thread;
1257 
1258 	td = curthread;
1259 
1260 	/* An interrupt with no event or handlers is a stray interrupt. */
1261 	if (ie == NULL || TAILQ_EMPTY(&ie->ie_handlers))
1262 		return (EINVAL);
1263 
1264 	/*
1265 	 * Execute fast interrupt handlers directly.
1266 	 * To support clock handlers, if a handler registers
1267 	 * with a NULL argument, then we pass it a pointer to
1268 	 * a trapframe as its argument.
1269 	 */
1270 	td->td_intr_nesting_level++;
1271 	thread = 0;
1272 	ret = 0;
1273 	critical_enter();
1274 	TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) {
1275 		if (ih->ih_filter == NULL) {
1276 			thread = 1;
1277 			continue;
1278 		}
1279 		CTR4(KTR_INTR, "%s: exec %p(%p) for %s", __func__,
1280 		    ih->ih_filter, ih->ih_argument == NULL ? frame :
1281 		    ih->ih_argument, ih->ih_name);
1282 		if (ih->ih_argument == NULL)
1283 			ret = ih->ih_filter(frame);
1284 		else
1285 			ret = ih->ih_filter(ih->ih_argument);
1286 		/*
1287 		 * Wrapper handler special handling:
1288 		 *
1289 		 * in some particular cases (like pccard and pccbb),
1290 		 * the _real_ device handler is wrapped in a couple of
1291 		 * functions - a filter wrapper and an ithread wrapper.
1292 		 * In this case (and just in this case), the filter wrapper
1293 		 * could ask the system to schedule the ithread and mask
1294 		 * the interrupt source if the wrapped handler is composed
1295 		 * of just an ithread handler.
1296 		 *
1297 		 * TODO: write a generic wrapper to avoid people rolling
1298 		 * their own
1299 		 */
1300 		if (!thread) {
1301 			if (ret == FILTER_SCHEDULE_THREAD)
1302 				thread = 1;
1303 		}
1304 	}
1305 
1306 	if (thread) {
1307 		if (ie->ie_pre_ithread != NULL)
1308 			ie->ie_pre_ithread(ie->ie_source);
1309 	} else {
1310 		if (ie->ie_post_filter != NULL)
1311 			ie->ie_post_filter(ie->ie_source);
1312 	}
1313 
1314 	/* Schedule the ithread if needed. */
1315 	if (thread) {
1316 		error = intr_event_schedule_thread(ie);
1317 		KASSERT(error == 0, ("bad stray interrupt"));
1318 	}
1319 	critical_exit();
1320 	td->td_intr_nesting_level--;
1321 	return (0);
1322 }
1323 #else
1324 /*
1325  * This is the main code for interrupt threads.
1326  */
1327 static void
1328 ithread_loop(void *arg)
1329 {
1330 	struct intr_thread *ithd;
1331 	struct intr_handler *ih;
1332 	struct intr_event *ie;
1333 	struct thread *td;
1334 	struct proc *p;
1335 	int priv;
1336 
1337 	td = curthread;
1338 	p = td->td_proc;
1339 	ih = (struct intr_handler *)arg;
1340 	priv = (ih->ih_thread != NULL) ? 1 : 0;
1341 	ithd = (priv) ? ih->ih_thread : ih->ih_event->ie_thread;
1342 	KASSERT(ithd->it_thread == td,
1343 	    ("%s: ithread and proc linkage out of sync", __func__));
1344 	ie = ithd->it_event;
1345 	ie->ie_count = 0;
1346 
1347 	/*
1348 	 * As long as we have interrupts outstanding, go through the
1349 	 * list of handlers, giving each one a go at it.
1350 	 */
1351 	for (;;) {
1352 		/*
1353 		 * If we are an orphaned thread, then just die.
1354 		 */
1355 		if (ithd->it_flags & IT_DEAD) {
1356 			CTR3(KTR_INTR, "%s: pid %d (%s) exiting", __func__,
1357 			    p->p_pid, td->td_name);
1358 			free(ithd, M_ITHREAD);
1359 			kthread_exit();
1360 		}
1361 
1362 		/*
1363 		 * Service interrupts.  If another interrupt arrives while
1364 		 * we are running, it will set it_need to note that we
1365 		 * should make another pass.
1366 		 */
1367 		while (ithd->it_need) {
1368 			/*
1369 			 * This might need a full read and write barrier
1370 			 * to make sure that this write posts before any
1371 			 * of the memory or device accesses in the
1372 			 * handlers.
1373 			 */
1374 			atomic_store_rel_int(&ithd->it_need, 0);
1375 			if (priv)
1376 				priv_ithread_execute_handler(p, ih);
1377 			else
1378 				ithread_execute_handlers(p, ie);
1379 		}
1380 		WITNESS_WARN(WARN_PANIC, NULL, "suspending ithread");
1381 		mtx_assert(&Giant, MA_NOTOWNED);
1382 
1383 		/*
1384 		 * Processed all our interrupts.  Now get the sched
1385 		 * lock.  This may take a while and it_need may get
1386 		 * set again, so we have to check it again.
1387 		 */
1388 		thread_lock(td);
1389 		if (!ithd->it_need && !(ithd->it_flags & IT_DEAD)) {
1390 			TD_SET_IWAIT(td);
1391 			ie->ie_count = 0;
1392 			mi_switch(SW_VOL | SWT_IWAIT, NULL);
1393 		}
1394 		thread_unlock(td);
1395 	}
1396 }
1397 
1398 /*
1399  * Main loop for interrupt filter.
1400  *
1401  * Some architectures (i386, amd64 and arm) require the optional frame
1402  * parameter, and use it as the main argument for fast handler execution
1403  * when ih_argument == NULL.
1404  *
1405  * Return value:
1406  * o FILTER_STRAY:              No filter recognized the event, and no
1407  *                              filter-less handler is registered on this
1408  *                              line.
1409  * o FILTER_HANDLED:            A filter claimed the event and served it.
1410  * o FILTER_SCHEDULE_THREAD:    No filter claimed the event, but there's at
1411  *                              least one filter-less handler on this line.
1412  * o FILTER_HANDLED |
1413  *   FILTER_SCHEDULE_THREAD:    A filter claimed the event, and asked for
1414  *                              scheduling the per-handler ithread.
1415  *
1416  * In case an ithread has to be scheduled, in *ithd there will be a
1417  * pointer to a struct intr_thread containing the thread to be
1418  * scheduled.
1419  */
1420 
1421 static int
1422 intr_filter_loop(struct intr_event *ie, struct trapframe *frame,
1423 		 struct intr_thread **ithd)
1424 {
1425 	struct intr_handler *ih;
1426 	void *arg;
1427 	int ret, thread_only;
1428 
1429 	ret = 0;
1430 	thread_only = 0;
1431 	TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) {
1432 		/*
1433 		 * Execute fast interrupt handlers directly.
1434 		 * To support clock handlers, if a handler registers
1435 		 * with a NULL argument, then we pass it a pointer to
1436 		 * a trapframe as its argument.
1437 		 */
1438 		arg = ((ih->ih_argument == NULL) ? frame : ih->ih_argument);
1439 
1440 		CTR5(KTR_INTR, "%s: exec %p/%p(%p) for %s", __func__,
1441 		     ih->ih_filter, ih->ih_handler, arg, ih->ih_name);
1442 
1443 		if (ih->ih_filter != NULL)
1444 			ret = ih->ih_filter(arg);
1445 		else {
1446 			thread_only = 1;
1447 			continue;
1448 		}
1449 
1450 		if (ret & FILTER_STRAY)
1451 			continue;
1452 		else {
1453 			*ithd = ih->ih_thread;
1454 			return (ret);
1455 		}
1456 	}
1457 
1458 	/*
1459 	 * No filters handled the interrupt and we have at least
1460 	 * one handler without a filter.  In this case, we schedule
1461 	 * all of the filter-less handlers to run in the ithread.
1462 	 */
1463 	if (thread_only) {
1464 		*ithd = ie->ie_thread;
1465 		return (FILTER_SCHEDULE_THREAD);
1466 	}
1467 	return (FILTER_STRAY);
1468 }
1469 
1470 /*
1471  * Main interrupt handling body.
1472  *
1473  * Input:
1474  * o ie:                        the event connected to this interrupt.
1475  * o frame:                     some archs (i.e. i386) pass a frame to some.
1476  *                              handlers as their main argument.
1477  * Return value:
1478  * o 0:                         everything ok.
1479  * o EINVAL:                    stray interrupt.
1480  */
1481 int
1482 intr_event_handle(struct intr_event *ie, struct trapframe *frame)
1483 {
1484 	struct intr_thread *ithd;
1485 	struct thread *td;
1486 	int thread;
1487 
1488 	ithd = NULL;
1489 	td = curthread;
1490 
1491 	if (ie == NULL || TAILQ_EMPTY(&ie->ie_handlers))
1492 		return (EINVAL);
1493 
1494 	td->td_intr_nesting_level++;
1495 	thread = 0;
1496 	critical_enter();
1497 	thread = intr_filter_loop(ie, frame, &ithd);
1498 	if (thread & FILTER_HANDLED) {
1499 		if (ie->ie_post_filter != NULL)
1500 			ie->ie_post_filter(ie->ie_source);
1501 	} else {
1502 		if (ie->ie_pre_ithread != NULL)
1503 			ie->ie_pre_ithread(ie->ie_source);
1504 	}
1505 	critical_exit();
1506 
1507 	/* Interrupt storm logic */
1508 	if (thread & FILTER_STRAY) {
1509 		ie->ie_count++;
1510 		if (ie->ie_count < intr_storm_threshold)
1511 			printf("Interrupt stray detection not present\n");
1512 	}
1513 
1514 	/* Schedule an ithread if needed. */
1515 	if (thread & FILTER_SCHEDULE_THREAD) {
1516 		if (intr_event_schedule_thread(ie, ithd) != 0)
1517 			panic("%s: impossible stray interrupt", __func__);
1518 	}
1519 	td->td_intr_nesting_level--;
1520 	return (0);
1521 }
1522 #endif
1523 
1524 #ifdef DDB
1525 /*
1526  * Dump details about an interrupt handler
1527  */
1528 static void
1529 db_dump_intrhand(struct intr_handler *ih)
1530 {
1531 	int comma;
1532 
1533 	db_printf("\t%-10s ", ih->ih_name);
1534 	switch (ih->ih_pri) {
1535 	case PI_REALTIME:
1536 		db_printf("CLK ");
1537 		break;
1538 	case PI_AV:
1539 		db_printf("AV  ");
1540 		break;
1541 	case PI_TTYHIGH:
1542 	case PI_TTYLOW:
1543 		db_printf("TTY ");
1544 		break;
1545 	case PI_TAPE:
1546 		db_printf("TAPE");
1547 		break;
1548 	case PI_NET:
1549 		db_printf("NET ");
1550 		break;
1551 	case PI_DISK:
1552 	case PI_DISKLOW:
1553 		db_printf("DISK");
1554 		break;
1555 	case PI_DULL:
1556 		db_printf("DULL");
1557 		break;
1558 	default:
1559 		if (ih->ih_pri >= PI_SOFT)
1560 			db_printf("SWI ");
1561 		else
1562 			db_printf("%4u", ih->ih_pri);
1563 		break;
1564 	}
1565 	db_printf(" ");
1566 	db_printsym((uintptr_t)ih->ih_handler, DB_STGY_PROC);
1567 	db_printf("(%p)", ih->ih_argument);
1568 	if (ih->ih_need ||
1569 	    (ih->ih_flags & (IH_EXCLUSIVE | IH_ENTROPY | IH_DEAD |
1570 	    IH_MPSAFE)) != 0) {
1571 		db_printf(" {");
1572 		comma = 0;
1573 		if (ih->ih_flags & IH_EXCLUSIVE) {
1574 			if (comma)
1575 				db_printf(", ");
1576 			db_printf("EXCL");
1577 			comma = 1;
1578 		}
1579 		if (ih->ih_flags & IH_ENTROPY) {
1580 			if (comma)
1581 				db_printf(", ");
1582 			db_printf("ENTROPY");
1583 			comma = 1;
1584 		}
1585 		if (ih->ih_flags & IH_DEAD) {
1586 			if (comma)
1587 				db_printf(", ");
1588 			db_printf("DEAD");
1589 			comma = 1;
1590 		}
1591 		if (ih->ih_flags & IH_MPSAFE) {
1592 			if (comma)
1593 				db_printf(", ");
1594 			db_printf("MPSAFE");
1595 			comma = 1;
1596 		}
1597 		if (ih->ih_need) {
1598 			if (comma)
1599 				db_printf(", ");
1600 			db_printf("NEED");
1601 		}
1602 		db_printf("}");
1603 	}
1604 	db_printf("\n");
1605 }
1606 
1607 /*
1608  * Dump details about a event.
1609  */
1610 void
1611 db_dump_intr_event(struct intr_event *ie, int handlers)
1612 {
1613 	struct intr_handler *ih;
1614 	struct intr_thread *it;
1615 	int comma;
1616 
1617 	db_printf("%s ", ie->ie_fullname);
1618 	it = ie->ie_thread;
1619 	if (it != NULL)
1620 		db_printf("(pid %d)", it->it_thread->td_proc->p_pid);
1621 	else
1622 		db_printf("(no thread)");
1623 	if ((ie->ie_flags & (IE_SOFT | IE_ENTROPY | IE_ADDING_THREAD)) != 0 ||
1624 	    (it != NULL && it->it_need)) {
1625 		db_printf(" {");
1626 		comma = 0;
1627 		if (ie->ie_flags & IE_SOFT) {
1628 			db_printf("SOFT");
1629 			comma = 1;
1630 		}
1631 		if (ie->ie_flags & IE_ENTROPY) {
1632 			if (comma)
1633 				db_printf(", ");
1634 			db_printf("ENTROPY");
1635 			comma = 1;
1636 		}
1637 		if (ie->ie_flags & IE_ADDING_THREAD) {
1638 			if (comma)
1639 				db_printf(", ");
1640 			db_printf("ADDING_THREAD");
1641 			comma = 1;
1642 		}
1643 		if (it != NULL && it->it_need) {
1644 			if (comma)
1645 				db_printf(", ");
1646 			db_printf("NEED");
1647 		}
1648 		db_printf("}");
1649 	}
1650 	db_printf("\n");
1651 
1652 	if (handlers)
1653 		TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next)
1654 		    db_dump_intrhand(ih);
1655 }
1656 
1657 /*
1658  * Dump data about interrupt handlers
1659  */
1660 DB_SHOW_COMMAND(intr, db_show_intr)
1661 {
1662 	struct intr_event *ie;
1663 	int all, verbose;
1664 
1665 	verbose = index(modif, 'v') != NULL;
1666 	all = index(modif, 'a') != NULL;
1667 	TAILQ_FOREACH(ie, &event_list, ie_list) {
1668 		if (!all && TAILQ_EMPTY(&ie->ie_handlers))
1669 			continue;
1670 		db_dump_intr_event(ie, verbose);
1671 		if (db_pager_quit)
1672 			break;
1673 	}
1674 }
1675 #endif /* DDB */
1676 
1677 /*
1678  * Start standard software interrupt threads
1679  */
1680 static void
1681 start_softintr(void *dummy)
1682 {
1683 
1684 	if (swi_add(NULL, "vm", swi_vm, NULL, SWI_VM, INTR_MPSAFE, &vm_ih))
1685 		panic("died while creating vm swi ithread");
1686 }
1687 SYSINIT(start_softintr, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softintr,
1688     NULL);
1689 
1690 /*
1691  * Sysctls used by systat and others: hw.intrnames and hw.intrcnt.
1692  * The data for this machine dependent, and the declarations are in machine
1693  * dependent code.  The layout of intrnames and intrcnt however is machine
1694  * independent.
1695  *
1696  * We do not know the length of intrcnt and intrnames at compile time, so
1697  * calculate things at run time.
1698  */
1699 static int
1700 sysctl_intrnames(SYSCTL_HANDLER_ARGS)
1701 {
1702 	return (sysctl_handle_opaque(oidp, intrnames, eintrnames - intrnames,
1703 	   req));
1704 }
1705 
1706 SYSCTL_PROC(_hw, OID_AUTO, intrnames, CTLTYPE_OPAQUE | CTLFLAG_RD,
1707     NULL, 0, sysctl_intrnames, "", "Interrupt Names");
1708 
1709 static int
1710 sysctl_intrcnt(SYSCTL_HANDLER_ARGS)
1711 {
1712 	return (sysctl_handle_opaque(oidp, intrcnt,
1713 	    (char *)eintrcnt - (char *)intrcnt, req));
1714 }
1715 
1716 SYSCTL_PROC(_hw, OID_AUTO, intrcnt, CTLTYPE_OPAQUE | CTLFLAG_RD,
1717     NULL, 0, sysctl_intrcnt, "", "Interrupt Counts");
1718 
1719 #ifdef DDB
1720 /*
1721  * DDB command to dump the interrupt statistics.
1722  */
1723 DB_SHOW_COMMAND(intrcnt, db_show_intrcnt)
1724 {
1725 	u_long *i;
1726 	char *cp;
1727 
1728 	cp = intrnames;
1729 	for (i = intrcnt; i != eintrcnt && !db_pager_quit; i++) {
1730 		if (*cp == '\0')
1731 			break;
1732 		if (*i != 0)
1733 			db_printf("%s\t%lu\n", cp, *i);
1734 		cp += strlen(cp) + 1;
1735 	}
1736 }
1737 #endif
1738