xref: /freebsd/sys/kern/kern_intr.c (revision 2faf504d1ab821fe2b9df9d2afb49bb35e1334f4)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_ddb.h"
33 #include "opt_kstack_usage_prof.h"
34 
35 #include <sys/param.h>
36 #include <sys/bus.h>
37 #include <sys/conf.h>
38 #include <sys/cpuset.h>
39 #include <sys/rtprio.h>
40 #include <sys/systm.h>
41 #include <sys/interrupt.h>
42 #include <sys/kernel.h>
43 #include <sys/kthread.h>
44 #include <sys/ktr.h>
45 #include <sys/limits.h>
46 #include <sys/lock.h>
47 #include <sys/malloc.h>
48 #include <sys/mutex.h>
49 #include <sys/priv.h>
50 #include <sys/proc.h>
51 #include <sys/epoch.h>
52 #include <sys/random.h>
53 #include <sys/resourcevar.h>
54 #include <sys/sched.h>
55 #include <sys/smp.h>
56 #include <sys/sysctl.h>
57 #include <sys/syslog.h>
58 #include <sys/unistd.h>
59 #include <sys/vmmeter.h>
60 #include <machine/atomic.h>
61 #include <machine/cpu.h>
62 #include <machine/md_var.h>
63 #include <machine/smp.h>
64 #include <machine/stdarg.h>
65 #ifdef DDB
66 #include <ddb/ddb.h>
67 #include <ddb/db_sym.h>
68 #endif
69 
70 /*
71  * Describe an interrupt thread.  There is one of these per interrupt event.
72  */
73 struct intr_thread {
74 	struct intr_event *it_event;
75 	struct thread *it_thread;	/* Kernel thread. */
76 	int	it_flags;		/* (j) IT_* flags. */
77 	int	it_need;		/* Needs service. */
78 };
79 
80 /* Interrupt thread flags kept in it_flags */
81 #define	IT_DEAD		0x000001	/* Thread is waiting to exit. */
82 #define	IT_WAIT		0x000002	/* Thread is waiting for completion. */
83 
84 struct	intr_entropy {
85 	struct	thread *td;
86 	uintptr_t event;
87 };
88 
89 struct	intr_event *clk_intr_event;
90 struct	intr_event *tty_intr_event;
91 void	*vm_ih;
92 struct proc *intrproc;
93 
94 static MALLOC_DEFINE(M_ITHREAD, "ithread", "Interrupt Threads");
95 
96 static int intr_storm_threshold = 0;
97 SYSCTL_INT(_hw, OID_AUTO, intr_storm_threshold, CTLFLAG_RWTUN,
98     &intr_storm_threshold, 0,
99     "Number of consecutive interrupts before storm protection is enabled");
100 static int intr_epoch_batch = 1000;
101 SYSCTL_INT(_hw, OID_AUTO, intr_epoch_batch, CTLFLAG_RWTUN, &intr_epoch_batch,
102     0, "Maximum interrupt handler executions without re-entering epoch(9)");
103 static TAILQ_HEAD(, intr_event) event_list =
104     TAILQ_HEAD_INITIALIZER(event_list);
105 static struct mtx event_lock;
106 MTX_SYSINIT(intr_event_list, &event_lock, "intr event list", MTX_DEF);
107 
108 static void	intr_event_update(struct intr_event *ie);
109 static int	intr_event_schedule_thread(struct intr_event *ie);
110 static struct intr_thread *ithread_create(const char *name);
111 static void	ithread_destroy(struct intr_thread *ithread);
112 static void	ithread_execute_handlers(struct proc *p,
113 		    struct intr_event *ie);
114 static void	ithread_loop(void *);
115 static void	ithread_update(struct intr_thread *ithd);
116 static void	start_softintr(void *);
117 
118 /* Map an interrupt type to an ithread priority. */
119 u_char
120 intr_priority(enum intr_type flags)
121 {
122 	u_char pri;
123 
124 	flags &= (INTR_TYPE_TTY | INTR_TYPE_BIO | INTR_TYPE_NET |
125 	    INTR_TYPE_CAM | INTR_TYPE_MISC | INTR_TYPE_CLK | INTR_TYPE_AV);
126 	switch (flags) {
127 	case INTR_TYPE_TTY:
128 		pri = PI_TTY;
129 		break;
130 	case INTR_TYPE_BIO:
131 		pri = PI_DISK;
132 		break;
133 	case INTR_TYPE_NET:
134 		pri = PI_NET;
135 		break;
136 	case INTR_TYPE_CAM:
137 		pri = PI_DISK;
138 		break;
139 	case INTR_TYPE_AV:
140 		pri = PI_AV;
141 		break;
142 	case INTR_TYPE_CLK:
143 		pri = PI_REALTIME;
144 		break;
145 	case INTR_TYPE_MISC:
146 		pri = PI_DULL;          /* don't care */
147 		break;
148 	default:
149 		/* We didn't specify an interrupt level. */
150 		panic("intr_priority: no interrupt type in flags");
151 	}
152 
153 	return pri;
154 }
155 
156 /*
157  * Update an ithread based on the associated intr_event.
158  */
159 static void
160 ithread_update(struct intr_thread *ithd)
161 {
162 	struct intr_event *ie;
163 	struct thread *td;
164 	u_char pri;
165 
166 	ie = ithd->it_event;
167 	td = ithd->it_thread;
168 	mtx_assert(&ie->ie_lock, MA_OWNED);
169 
170 	/* Determine the overall priority of this event. */
171 	if (CK_SLIST_EMPTY(&ie->ie_handlers))
172 		pri = PRI_MAX_ITHD;
173 	else
174 		pri = CK_SLIST_FIRST(&ie->ie_handlers)->ih_pri;
175 
176 	/* Update name and priority. */
177 	strlcpy(td->td_name, ie->ie_fullname, sizeof(td->td_name));
178 #ifdef KTR
179 	sched_clear_tdname(td);
180 #endif
181 	thread_lock(td);
182 	sched_prio(td, pri);
183 	thread_unlock(td);
184 }
185 
186 /*
187  * Regenerate the full name of an interrupt event and update its priority.
188  */
189 static void
190 intr_event_update(struct intr_event *ie)
191 {
192 	struct intr_handler *ih;
193 	char *last;
194 	int missed, space, flags;
195 
196 	/* Start off with no entropy and just the name of the event. */
197 	mtx_assert(&ie->ie_lock, MA_OWNED);
198 	strlcpy(ie->ie_fullname, ie->ie_name, sizeof(ie->ie_fullname));
199 	flags = 0;
200 	missed = 0;
201 	space = 1;
202 
203 	/* Run through all the handlers updating values. */
204 	CK_SLIST_FOREACH(ih, &ie->ie_handlers, ih_next) {
205 		if (strlen(ie->ie_fullname) + strlen(ih->ih_name) + 1 <
206 		    sizeof(ie->ie_fullname)) {
207 			strcat(ie->ie_fullname, " ");
208 			strcat(ie->ie_fullname, ih->ih_name);
209 			space = 0;
210 		} else
211 			missed++;
212 		flags |= ih->ih_flags;
213 	}
214 	ie->ie_hflags = flags;
215 
216 	/*
217 	 * If there is only one handler and its name is too long, just copy in
218 	 * as much of the end of the name (includes the unit number) as will
219 	 * fit.  Otherwise, we have multiple handlers and not all of the names
220 	 * will fit.  Add +'s to indicate missing names.  If we run out of room
221 	 * and still have +'s to add, change the last character from a + to a *.
222 	 */
223 	if (missed == 1 && space == 1) {
224 		ih = CK_SLIST_FIRST(&ie->ie_handlers);
225 		missed = strlen(ie->ie_fullname) + strlen(ih->ih_name) + 2 -
226 		    sizeof(ie->ie_fullname);
227 		strcat(ie->ie_fullname, (missed == 0) ? " " : "-");
228 		strcat(ie->ie_fullname, &ih->ih_name[missed]);
229 		missed = 0;
230 	}
231 	last = &ie->ie_fullname[sizeof(ie->ie_fullname) - 2];
232 	while (missed-- > 0) {
233 		if (strlen(ie->ie_fullname) + 1 == sizeof(ie->ie_fullname)) {
234 			if (*last == '+') {
235 				*last = '*';
236 				break;
237 			} else
238 				*last = '+';
239 		} else if (space) {
240 			strcat(ie->ie_fullname, " +");
241 			space = 0;
242 		} else
243 			strcat(ie->ie_fullname, "+");
244 	}
245 
246 	/*
247 	 * If this event has an ithread, update it's priority and
248 	 * name.
249 	 */
250 	if (ie->ie_thread != NULL)
251 		ithread_update(ie->ie_thread);
252 	CTR2(KTR_INTR, "%s: updated %s", __func__, ie->ie_fullname);
253 }
254 
255 int
256 intr_event_create(struct intr_event **event, void *source, int flags, int irq,
257     void (*pre_ithread)(void *), void (*post_ithread)(void *),
258     void (*post_filter)(void *), int (*assign_cpu)(void *, int),
259     const char *fmt, ...)
260 {
261 	struct intr_event *ie;
262 	va_list ap;
263 
264 	/* The only valid flag during creation is IE_SOFT. */
265 	if ((flags & ~IE_SOFT) != 0)
266 		return (EINVAL);
267 	ie = malloc(sizeof(struct intr_event), M_ITHREAD, M_WAITOK | M_ZERO);
268 	ie->ie_source = source;
269 	ie->ie_pre_ithread = pre_ithread;
270 	ie->ie_post_ithread = post_ithread;
271 	ie->ie_post_filter = post_filter;
272 	ie->ie_assign_cpu = assign_cpu;
273 	ie->ie_flags = flags;
274 	ie->ie_irq = irq;
275 	ie->ie_cpu = NOCPU;
276 	CK_SLIST_INIT(&ie->ie_handlers);
277 	mtx_init(&ie->ie_lock, "intr event", NULL, MTX_DEF);
278 
279 	va_start(ap, fmt);
280 	vsnprintf(ie->ie_name, sizeof(ie->ie_name), fmt, ap);
281 	va_end(ap);
282 	strlcpy(ie->ie_fullname, ie->ie_name, sizeof(ie->ie_fullname));
283 	mtx_lock(&event_lock);
284 	TAILQ_INSERT_TAIL(&event_list, ie, ie_list);
285 	mtx_unlock(&event_lock);
286 	if (event != NULL)
287 		*event = ie;
288 	CTR2(KTR_INTR, "%s: created %s", __func__, ie->ie_name);
289 	return (0);
290 }
291 
292 /*
293  * Bind an interrupt event to the specified CPU.  Note that not all
294  * platforms support binding an interrupt to a CPU.  For those
295  * platforms this request will fail.  Using a cpu id of NOCPU unbinds
296  * the interrupt event.
297  */
298 static int
299 _intr_event_bind(struct intr_event *ie, int cpu, bool bindirq, bool bindithread)
300 {
301 	lwpid_t id;
302 	int error;
303 
304 	/* Need a CPU to bind to. */
305 	if (cpu != NOCPU && CPU_ABSENT(cpu))
306 		return (EINVAL);
307 
308 	if (ie->ie_assign_cpu == NULL)
309 		return (EOPNOTSUPP);
310 
311 	error = priv_check(curthread, PRIV_SCHED_CPUSET_INTR);
312 	if (error)
313 		return (error);
314 
315 	/*
316 	 * If we have any ithreads try to set their mask first to verify
317 	 * permissions, etc.
318 	 */
319 	if (bindithread) {
320 		mtx_lock(&ie->ie_lock);
321 		if (ie->ie_thread != NULL) {
322 			id = ie->ie_thread->it_thread->td_tid;
323 			mtx_unlock(&ie->ie_lock);
324 			error = cpuset_setithread(id, cpu);
325 			if (error)
326 				return (error);
327 		} else
328 			mtx_unlock(&ie->ie_lock);
329 	}
330 	if (bindirq)
331 		error = ie->ie_assign_cpu(ie->ie_source, cpu);
332 	if (error) {
333 		if (bindithread) {
334 			mtx_lock(&ie->ie_lock);
335 			if (ie->ie_thread != NULL) {
336 				cpu = ie->ie_cpu;
337 				id = ie->ie_thread->it_thread->td_tid;
338 				mtx_unlock(&ie->ie_lock);
339 				(void)cpuset_setithread(id, cpu);
340 			} else
341 				mtx_unlock(&ie->ie_lock);
342 		}
343 		return (error);
344 	}
345 
346 	if (bindirq) {
347 		mtx_lock(&ie->ie_lock);
348 		ie->ie_cpu = cpu;
349 		mtx_unlock(&ie->ie_lock);
350 	}
351 
352 	return (error);
353 }
354 
355 /*
356  * Bind an interrupt event to the specified CPU.  For supported platforms, any
357  * associated ithreads as well as the primary interrupt context will be bound
358  * to the specificed CPU.
359  */
360 int
361 intr_event_bind(struct intr_event *ie, int cpu)
362 {
363 
364 	return (_intr_event_bind(ie, cpu, true, true));
365 }
366 
367 /*
368  * Bind an interrupt event to the specified CPU, but do not bind associated
369  * ithreads.
370  */
371 int
372 intr_event_bind_irqonly(struct intr_event *ie, int cpu)
373 {
374 
375 	return (_intr_event_bind(ie, cpu, true, false));
376 }
377 
378 /*
379  * Bind an interrupt event's ithread to the specified CPU.
380  */
381 int
382 intr_event_bind_ithread(struct intr_event *ie, int cpu)
383 {
384 
385 	return (_intr_event_bind(ie, cpu, false, true));
386 }
387 
388 /*
389  * Bind an interrupt event's ithread to the specified cpuset.
390  */
391 int
392 intr_event_bind_ithread_cpuset(struct intr_event *ie, cpuset_t *cs)
393 {
394 	lwpid_t id;
395 
396 	mtx_lock(&ie->ie_lock);
397 	if (ie->ie_thread != NULL) {
398 		id = ie->ie_thread->it_thread->td_tid;
399 		mtx_unlock(&ie->ie_lock);
400 		return (cpuset_setthread(id, cs));
401 	} else {
402 		mtx_unlock(&ie->ie_lock);
403 	}
404 	return (ENODEV);
405 }
406 
407 static struct intr_event *
408 intr_lookup(int irq)
409 {
410 	struct intr_event *ie;
411 
412 	mtx_lock(&event_lock);
413 	TAILQ_FOREACH(ie, &event_list, ie_list)
414 		if (ie->ie_irq == irq &&
415 		    (ie->ie_flags & IE_SOFT) == 0 &&
416 		    CK_SLIST_FIRST(&ie->ie_handlers) != NULL)
417 			break;
418 	mtx_unlock(&event_lock);
419 	return (ie);
420 }
421 
422 int
423 intr_setaffinity(int irq, int mode, void *m)
424 {
425 	struct intr_event *ie;
426 	cpuset_t *mask;
427 	int cpu, n;
428 
429 	mask = m;
430 	cpu = NOCPU;
431 	/*
432 	 * If we're setting all cpus we can unbind.  Otherwise make sure
433 	 * only one cpu is in the set.
434 	 */
435 	if (CPU_CMP(cpuset_root, mask)) {
436 		for (n = 0; n < CPU_SETSIZE; n++) {
437 			if (!CPU_ISSET(n, mask))
438 				continue;
439 			if (cpu != NOCPU)
440 				return (EINVAL);
441 			cpu = n;
442 		}
443 	}
444 	ie = intr_lookup(irq);
445 	if (ie == NULL)
446 		return (ESRCH);
447 	switch (mode) {
448 	case CPU_WHICH_IRQ:
449 		return (intr_event_bind(ie, cpu));
450 	case CPU_WHICH_INTRHANDLER:
451 		return (intr_event_bind_irqonly(ie, cpu));
452 	case CPU_WHICH_ITHREAD:
453 		return (intr_event_bind_ithread(ie, cpu));
454 	default:
455 		return (EINVAL);
456 	}
457 }
458 
459 int
460 intr_getaffinity(int irq, int mode, void *m)
461 {
462 	struct intr_event *ie;
463 	struct thread *td;
464 	struct proc *p;
465 	cpuset_t *mask;
466 	lwpid_t id;
467 	int error;
468 
469 	mask = m;
470 	ie = intr_lookup(irq);
471 	if (ie == NULL)
472 		return (ESRCH);
473 
474 	error = 0;
475 	CPU_ZERO(mask);
476 	switch (mode) {
477 	case CPU_WHICH_IRQ:
478 	case CPU_WHICH_INTRHANDLER:
479 		mtx_lock(&ie->ie_lock);
480 		if (ie->ie_cpu == NOCPU)
481 			CPU_COPY(cpuset_root, mask);
482 		else
483 			CPU_SET(ie->ie_cpu, mask);
484 		mtx_unlock(&ie->ie_lock);
485 		break;
486 	case CPU_WHICH_ITHREAD:
487 		mtx_lock(&ie->ie_lock);
488 		if (ie->ie_thread == NULL) {
489 			mtx_unlock(&ie->ie_lock);
490 			CPU_COPY(cpuset_root, mask);
491 		} else {
492 			id = ie->ie_thread->it_thread->td_tid;
493 			mtx_unlock(&ie->ie_lock);
494 			error = cpuset_which(CPU_WHICH_TID, id, &p, &td, NULL);
495 			if (error != 0)
496 				return (error);
497 			CPU_COPY(&td->td_cpuset->cs_mask, mask);
498 			PROC_UNLOCK(p);
499 		}
500 	default:
501 		return (EINVAL);
502 	}
503 	return (0);
504 }
505 
506 int
507 intr_event_destroy(struct intr_event *ie)
508 {
509 
510 	mtx_lock(&event_lock);
511 	mtx_lock(&ie->ie_lock);
512 	if (!CK_SLIST_EMPTY(&ie->ie_handlers)) {
513 		mtx_unlock(&ie->ie_lock);
514 		mtx_unlock(&event_lock);
515 		return (EBUSY);
516 	}
517 	TAILQ_REMOVE(&event_list, ie, ie_list);
518 #ifndef notyet
519 	if (ie->ie_thread != NULL) {
520 		ithread_destroy(ie->ie_thread);
521 		ie->ie_thread = NULL;
522 	}
523 #endif
524 	mtx_unlock(&ie->ie_lock);
525 	mtx_unlock(&event_lock);
526 	mtx_destroy(&ie->ie_lock);
527 	free(ie, M_ITHREAD);
528 	return (0);
529 }
530 
531 static struct intr_thread *
532 ithread_create(const char *name)
533 {
534 	struct intr_thread *ithd;
535 	struct thread *td;
536 	int error;
537 
538 	ithd = malloc(sizeof(struct intr_thread), M_ITHREAD, M_WAITOK | M_ZERO);
539 
540 	error = kproc_kthread_add(ithread_loop, ithd, &intrproc,
541 		    &td, RFSTOPPED | RFHIGHPID,
542 		    0, "intr", "%s", name);
543 	if (error)
544 		panic("kproc_create() failed with %d", error);
545 	thread_lock(td);
546 	sched_class(td, PRI_ITHD);
547 	TD_SET_IWAIT(td);
548 	thread_unlock(td);
549 	td->td_pflags |= TDP_ITHREAD;
550 	ithd->it_thread = td;
551 	CTR2(KTR_INTR, "%s: created %s", __func__, name);
552 	return (ithd);
553 }
554 
555 static void
556 ithread_destroy(struct intr_thread *ithread)
557 {
558 	struct thread *td;
559 
560 	CTR2(KTR_INTR, "%s: killing %s", __func__, ithread->it_event->ie_name);
561 	td = ithread->it_thread;
562 	thread_lock(td);
563 	ithread->it_flags |= IT_DEAD;
564 	if (TD_AWAITING_INTR(td)) {
565 		TD_CLR_IWAIT(td);
566 		sched_add(td, SRQ_INTR);
567 	} else
568 		thread_unlock(td);
569 }
570 
571 int
572 intr_event_add_handler(struct intr_event *ie, const char *name,
573     driver_filter_t filter, driver_intr_t handler, void *arg, u_char pri,
574     enum intr_type flags, void **cookiep)
575 {
576 	struct intr_handler *ih, *temp_ih;
577 	struct intr_handler **prevptr;
578 	struct intr_thread *it;
579 
580 	if (ie == NULL || name == NULL || (handler == NULL && filter == NULL))
581 		return (EINVAL);
582 
583 	/* Allocate and populate an interrupt handler structure. */
584 	ih = malloc(sizeof(struct intr_handler), M_ITHREAD, M_WAITOK | M_ZERO);
585 	ih->ih_filter = filter;
586 	ih->ih_handler = handler;
587 	ih->ih_argument = arg;
588 	strlcpy(ih->ih_name, name, sizeof(ih->ih_name));
589 	ih->ih_event = ie;
590 	ih->ih_pri = pri;
591 	if (flags & INTR_EXCL)
592 		ih->ih_flags = IH_EXCLUSIVE;
593 	if (flags & INTR_MPSAFE)
594 		ih->ih_flags |= IH_MPSAFE;
595 	if (flags & INTR_ENTROPY)
596 		ih->ih_flags |= IH_ENTROPY;
597 	if (flags & INTR_TYPE_NET)
598 		ih->ih_flags |= IH_NET;
599 
600 	/* We can only have one exclusive handler in a event. */
601 	mtx_lock(&ie->ie_lock);
602 	if (!CK_SLIST_EMPTY(&ie->ie_handlers)) {
603 		if ((flags & INTR_EXCL) ||
604 		    (CK_SLIST_FIRST(&ie->ie_handlers)->ih_flags & IH_EXCLUSIVE)) {
605 			mtx_unlock(&ie->ie_lock);
606 			free(ih, M_ITHREAD);
607 			return (EINVAL);
608 		}
609 	}
610 
611 	/* Create a thread if we need one. */
612 	while (ie->ie_thread == NULL && handler != NULL) {
613 		if (ie->ie_flags & IE_ADDING_THREAD)
614 			msleep(ie, &ie->ie_lock, 0, "ithread", 0);
615 		else {
616 			ie->ie_flags |= IE_ADDING_THREAD;
617 			mtx_unlock(&ie->ie_lock);
618 			it = ithread_create("intr: newborn");
619 			mtx_lock(&ie->ie_lock);
620 			ie->ie_flags &= ~IE_ADDING_THREAD;
621 			ie->ie_thread = it;
622 			it->it_event = ie;
623 			ithread_update(it);
624 			wakeup(ie);
625 		}
626 	}
627 
628 	/* Add the new handler to the event in priority order. */
629 	CK_SLIST_FOREACH_PREVPTR(temp_ih, prevptr, &ie->ie_handlers, ih_next) {
630 		if (temp_ih->ih_pri > ih->ih_pri)
631 			break;
632 	}
633 	CK_SLIST_INSERT_PREVPTR(prevptr, temp_ih, ih, ih_next);
634 
635 	intr_event_update(ie);
636 
637 	CTR3(KTR_INTR, "%s: added %s to %s", __func__, ih->ih_name,
638 	    ie->ie_name);
639 	mtx_unlock(&ie->ie_lock);
640 
641 	if (cookiep != NULL)
642 		*cookiep = ih;
643 	return (0);
644 }
645 
646 /*
647  * Append a description preceded by a ':' to the name of the specified
648  * interrupt handler.
649  */
650 int
651 intr_event_describe_handler(struct intr_event *ie, void *cookie,
652     const char *descr)
653 {
654 	struct intr_handler *ih;
655 	size_t space;
656 	char *start;
657 
658 	mtx_lock(&ie->ie_lock);
659 #ifdef INVARIANTS
660 	CK_SLIST_FOREACH(ih, &ie->ie_handlers, ih_next) {
661 		if (ih == cookie)
662 			break;
663 	}
664 	if (ih == NULL) {
665 		mtx_unlock(&ie->ie_lock);
666 		panic("handler %p not found in interrupt event %p", cookie, ie);
667 	}
668 #endif
669 	ih = cookie;
670 
671 	/*
672 	 * Look for an existing description by checking for an
673 	 * existing ":".  This assumes device names do not include
674 	 * colons.  If one is found, prepare to insert the new
675 	 * description at that point.  If one is not found, find the
676 	 * end of the name to use as the insertion point.
677 	 */
678 	start = strchr(ih->ih_name, ':');
679 	if (start == NULL)
680 		start = strchr(ih->ih_name, 0);
681 
682 	/*
683 	 * See if there is enough remaining room in the string for the
684 	 * description + ":".  The "- 1" leaves room for the trailing
685 	 * '\0'.  The "+ 1" accounts for the colon.
686 	 */
687 	space = sizeof(ih->ih_name) - (start - ih->ih_name) - 1;
688 	if (strlen(descr) + 1 > space) {
689 		mtx_unlock(&ie->ie_lock);
690 		return (ENOSPC);
691 	}
692 
693 	/* Append a colon followed by the description. */
694 	*start = ':';
695 	strcpy(start + 1, descr);
696 	intr_event_update(ie);
697 	mtx_unlock(&ie->ie_lock);
698 	return (0);
699 }
700 
701 /*
702  * Return the ie_source field from the intr_event an intr_handler is
703  * associated with.
704  */
705 void *
706 intr_handler_source(void *cookie)
707 {
708 	struct intr_handler *ih;
709 	struct intr_event *ie;
710 
711 	ih = (struct intr_handler *)cookie;
712 	if (ih == NULL)
713 		return (NULL);
714 	ie = ih->ih_event;
715 	KASSERT(ie != NULL,
716 	    ("interrupt handler \"%s\" has a NULL interrupt event",
717 	    ih->ih_name));
718 	return (ie->ie_source);
719 }
720 
721 /*
722  * If intr_event_handle() is running in the ISR context at the time of the call,
723  * then wait for it to complete.
724  */
725 static void
726 intr_event_barrier(struct intr_event *ie)
727 {
728 	int phase;
729 
730 	mtx_assert(&ie->ie_lock, MA_OWNED);
731 	phase = ie->ie_phase;
732 
733 	/*
734 	 * Switch phase to direct future interrupts to the other active counter.
735 	 * Make sure that any preceding stores are visible before the switch.
736 	 */
737 	KASSERT(ie->ie_active[!phase] == 0, ("idle phase has activity"));
738 	atomic_store_rel_int(&ie->ie_phase, !phase);
739 
740 	/*
741 	 * This code cooperates with wait-free iteration of ie_handlers
742 	 * in intr_event_handle.
743 	 * Make sure that the removal and the phase update are not reordered
744 	 * with the active count check.
745 	 * Note that no combination of acquire and release fences can provide
746 	 * that guarantee as Store->Load sequences can always be reordered.
747 	 */
748 	atomic_thread_fence_seq_cst();
749 
750 	/*
751 	 * Now wait on the inactive phase.
752 	 * The acquire fence is needed so that that all post-barrier accesses
753 	 * are after the check.
754 	 */
755 	while (ie->ie_active[phase] > 0)
756 		cpu_spinwait();
757 	atomic_thread_fence_acq();
758 }
759 
760 static void
761 intr_handler_barrier(struct intr_handler *handler)
762 {
763 	struct intr_event *ie;
764 
765 	ie = handler->ih_event;
766 	mtx_assert(&ie->ie_lock, MA_OWNED);
767 	KASSERT((handler->ih_flags & IH_DEAD) == 0,
768 	    ("update for a removed handler"));
769 
770 	if (ie->ie_thread == NULL) {
771 		intr_event_barrier(ie);
772 		return;
773 	}
774 	if ((handler->ih_flags & IH_CHANGED) == 0) {
775 		handler->ih_flags |= IH_CHANGED;
776 		intr_event_schedule_thread(ie);
777 	}
778 	while ((handler->ih_flags & IH_CHANGED) != 0)
779 		msleep(handler, &ie->ie_lock, 0, "ih_barr", 0);
780 }
781 
782 /*
783  * Sleep until an ithread finishes executing an interrupt handler.
784  *
785  * XXX Doesn't currently handle interrupt filters or fast interrupt
786  * handlers. This is intended for LinuxKPI drivers only.
787  * Do not use in BSD code.
788  */
789 void
790 _intr_drain(int irq)
791 {
792 	struct intr_event *ie;
793 	struct intr_thread *ithd;
794 	struct thread *td;
795 
796 	ie = intr_lookup(irq);
797 	if (ie == NULL)
798 		return;
799 	if (ie->ie_thread == NULL)
800 		return;
801 	ithd = ie->ie_thread;
802 	td = ithd->it_thread;
803 	/*
804 	 * We set the flag and wait for it to be cleared to avoid
805 	 * long delays with potentially busy interrupt handlers
806 	 * were we to only sample TD_AWAITING_INTR() every tick.
807 	 */
808 	thread_lock(td);
809 	if (!TD_AWAITING_INTR(td)) {
810 		ithd->it_flags |= IT_WAIT;
811 		while (ithd->it_flags & IT_WAIT) {
812 			thread_unlock(td);
813 			pause("idrain", 1);
814 			thread_lock(td);
815 		}
816 	}
817 	thread_unlock(td);
818 	return;
819 }
820 
821 int
822 intr_event_remove_handler(void *cookie)
823 {
824 	struct intr_handler *handler = (struct intr_handler *)cookie;
825 	struct intr_event *ie;
826 	struct intr_handler *ih;
827 	struct intr_handler **prevptr;
828 #ifdef notyet
829 	int dead;
830 #endif
831 
832 	if (handler == NULL)
833 		return (EINVAL);
834 	ie = handler->ih_event;
835 	KASSERT(ie != NULL,
836 	    ("interrupt handler \"%s\" has a NULL interrupt event",
837 	    handler->ih_name));
838 
839 	mtx_lock(&ie->ie_lock);
840 	CTR3(KTR_INTR, "%s: removing %s from %s", __func__, handler->ih_name,
841 	    ie->ie_name);
842 	CK_SLIST_FOREACH_PREVPTR(ih, prevptr, &ie->ie_handlers, ih_next) {
843 		if (ih == handler)
844 			break;
845 	}
846 	if (ih == NULL) {
847 		panic("interrupt handler \"%s\" not found in "
848 		    "interrupt event \"%s\"", handler->ih_name, ie->ie_name);
849 	}
850 
851 	/*
852 	 * If there is no ithread, then directly remove the handler.  Note that
853 	 * intr_event_handle() iterates ie_handlers in a lock-less fashion, so
854 	 * care needs to be taken to keep ie_handlers consistent and to free
855 	 * the removed handler only when ie_handlers is quiescent.
856 	 */
857 	if (ie->ie_thread == NULL) {
858 		CK_SLIST_REMOVE_PREVPTR(prevptr, ih, ih_next);
859 		intr_event_barrier(ie);
860 		intr_event_update(ie);
861 		mtx_unlock(&ie->ie_lock);
862 		free(handler, M_ITHREAD);
863 		return (0);
864 	}
865 
866 	/*
867 	 * Let the interrupt thread do the job.
868 	 * The interrupt source is disabled when the interrupt thread is
869 	 * running, so it does not have to worry about interaction with
870 	 * intr_event_handle().
871 	 */
872 	KASSERT((handler->ih_flags & IH_DEAD) == 0,
873 	    ("duplicate handle remove"));
874 	handler->ih_flags |= IH_DEAD;
875 	intr_event_schedule_thread(ie);
876 	while (handler->ih_flags & IH_DEAD)
877 		msleep(handler, &ie->ie_lock, 0, "iev_rmh", 0);
878 	intr_event_update(ie);
879 
880 #ifdef notyet
881 	/*
882 	 * XXX: This could be bad in the case of ppbus(8).  Also, I think
883 	 * this could lead to races of stale data when servicing an
884 	 * interrupt.
885 	 */
886 	dead = 1;
887 	CK_SLIST_FOREACH(ih, &ie->ie_handlers, ih_next) {
888 		if (ih->ih_handler != NULL) {
889 			dead = 0;
890 			break;
891 		}
892 	}
893 	if (dead) {
894 		ithread_destroy(ie->ie_thread);
895 		ie->ie_thread = NULL;
896 	}
897 #endif
898 	mtx_unlock(&ie->ie_lock);
899 	free(handler, M_ITHREAD);
900 	return (0);
901 }
902 
903 int
904 intr_event_suspend_handler(void *cookie)
905 {
906 	struct intr_handler *handler = (struct intr_handler *)cookie;
907 	struct intr_event *ie;
908 
909 	if (handler == NULL)
910 		return (EINVAL);
911 	ie = handler->ih_event;
912 	KASSERT(ie != NULL,
913 	    ("interrupt handler \"%s\" has a NULL interrupt event",
914 	    handler->ih_name));
915 	mtx_lock(&ie->ie_lock);
916 	handler->ih_flags |= IH_SUSP;
917 	intr_handler_barrier(handler);
918 	mtx_unlock(&ie->ie_lock);
919 	return (0);
920 }
921 
922 int
923 intr_event_resume_handler(void *cookie)
924 {
925 	struct intr_handler *handler = (struct intr_handler *)cookie;
926 	struct intr_event *ie;
927 
928 	if (handler == NULL)
929 		return (EINVAL);
930 	ie = handler->ih_event;
931 	KASSERT(ie != NULL,
932 	    ("interrupt handler \"%s\" has a NULL interrupt event",
933 	    handler->ih_name));
934 
935 	/*
936 	 * intr_handler_barrier() acts not only as a barrier,
937 	 * it also allows to check for any pending interrupts.
938 	 */
939 	mtx_lock(&ie->ie_lock);
940 	handler->ih_flags &= ~IH_SUSP;
941 	intr_handler_barrier(handler);
942 	mtx_unlock(&ie->ie_lock);
943 	return (0);
944 }
945 
946 static int
947 intr_event_schedule_thread(struct intr_event *ie)
948 {
949 	struct intr_entropy entropy;
950 	struct intr_thread *it;
951 	struct thread *td;
952 	struct thread *ctd;
953 
954 	/*
955 	 * If no ithread or no handlers, then we have a stray interrupt.
956 	 */
957 	if (ie == NULL || CK_SLIST_EMPTY(&ie->ie_handlers) ||
958 	    ie->ie_thread == NULL)
959 		return (EINVAL);
960 
961 	ctd = curthread;
962 	it = ie->ie_thread;
963 	td = it->it_thread;
964 
965 	/*
966 	 * If any of the handlers for this ithread claim to be good
967 	 * sources of entropy, then gather some.
968 	 */
969 	if (ie->ie_hflags & IH_ENTROPY) {
970 		entropy.event = (uintptr_t)ie;
971 		entropy.td = ctd;
972 		random_harvest_queue(&entropy, sizeof(entropy), RANDOM_INTERRUPT);
973 	}
974 
975 	KASSERT(td->td_proc != NULL, ("ithread %s has no process", ie->ie_name));
976 
977 	/*
978 	 * Set it_need to tell the thread to keep running if it is already
979 	 * running.  Then, lock the thread and see if we actually need to
980 	 * put it on the runqueue.
981 	 *
982 	 * Use store_rel to arrange that the store to ih_need in
983 	 * swi_sched() is before the store to it_need and prepare for
984 	 * transfer of this order to loads in the ithread.
985 	 */
986 	atomic_store_rel_int(&it->it_need, 1);
987 	thread_lock(td);
988 	if (TD_AWAITING_INTR(td)) {
989 		CTR3(KTR_INTR, "%s: schedule pid %d (%s)", __func__, td->td_proc->p_pid,
990 		    td->td_name);
991 		TD_CLR_IWAIT(td);
992 		sched_add(td, SRQ_INTR);
993 	} else {
994 		CTR5(KTR_INTR, "%s: pid %d (%s): it_need %d, state %d",
995 		    __func__, td->td_proc->p_pid, td->td_name, it->it_need, TD_GET_STATE(td));
996 		thread_unlock(td);
997 	}
998 
999 	return (0);
1000 }
1001 
1002 /*
1003  * Allow interrupt event binding for software interrupt handlers -- a no-op,
1004  * since interrupts are generated in software rather than being directed by
1005  * a PIC.
1006  */
1007 static int
1008 swi_assign_cpu(void *arg, int cpu)
1009 {
1010 
1011 	return (0);
1012 }
1013 
1014 /*
1015  * Add a software interrupt handler to a specified event.  If a given event
1016  * is not specified, then a new event is created.
1017  */
1018 int
1019 swi_add(struct intr_event **eventp, const char *name, driver_intr_t handler,
1020 	    void *arg, int pri, enum intr_type flags, void **cookiep)
1021 {
1022 	struct intr_event *ie;
1023 	int error = 0;
1024 
1025 	if (flags & INTR_ENTROPY)
1026 		return (EINVAL);
1027 
1028 	ie = (eventp != NULL) ? *eventp : NULL;
1029 
1030 	if (ie != NULL) {
1031 		if (!(ie->ie_flags & IE_SOFT))
1032 			return (EINVAL);
1033 	} else {
1034 		error = intr_event_create(&ie, NULL, IE_SOFT, 0,
1035 		    NULL, NULL, NULL, swi_assign_cpu, "swi%d:", pri);
1036 		if (error)
1037 			return (error);
1038 		if (eventp != NULL)
1039 			*eventp = ie;
1040 	}
1041 	if (handler != NULL) {
1042 		error = intr_event_add_handler(ie, name, NULL, handler, arg,
1043 		    PI_SWI(pri), flags, cookiep);
1044 	}
1045 	return (error);
1046 }
1047 
1048 /*
1049  * Schedule a software interrupt thread.
1050  */
1051 void
1052 swi_sched(void *cookie, int flags)
1053 {
1054 	struct intr_handler *ih = (struct intr_handler *)cookie;
1055 	struct intr_event *ie = ih->ih_event;
1056 	struct intr_entropy entropy;
1057 	int error __unused;
1058 
1059 	CTR3(KTR_INTR, "swi_sched: %s %s need=%d", ie->ie_name, ih->ih_name,
1060 	    ih->ih_need);
1061 
1062 	if ((flags & SWI_FROMNMI) == 0) {
1063 		entropy.event = (uintptr_t)ih;
1064 		entropy.td = curthread;
1065 		random_harvest_queue(&entropy, sizeof(entropy), RANDOM_SWI);
1066 	}
1067 
1068 	/*
1069 	 * Set ih_need for this handler so that if the ithread is already
1070 	 * running it will execute this handler on the next pass.  Otherwise,
1071 	 * it will execute it the next time it runs.
1072 	 */
1073 	ih->ih_need = 1;
1074 
1075 	if (flags & SWI_DELAY)
1076 		return;
1077 
1078 	if (flags & SWI_FROMNMI) {
1079 #if defined(SMP) && (defined(__i386__) || defined(__amd64__))
1080 		KASSERT(ie == clk_intr_event,
1081 		    ("SWI_FROMNMI used not with clk_intr_event"));
1082 		ipi_self_from_nmi(IPI_SWI);
1083 #endif
1084 	} else {
1085 		VM_CNT_INC(v_soft);
1086 		error = intr_event_schedule_thread(ie);
1087 		KASSERT(error == 0, ("stray software interrupt"));
1088 	}
1089 }
1090 
1091 /*
1092  * Remove a software interrupt handler.  Currently this code does not
1093  * remove the associated interrupt event if it becomes empty.  Calling code
1094  * may do so manually via intr_event_destroy(), but that's not really
1095  * an optimal interface.
1096  */
1097 int
1098 swi_remove(void *cookie)
1099 {
1100 
1101 	return (intr_event_remove_handler(cookie));
1102 }
1103 
1104 static void
1105 intr_event_execute_handlers(struct proc *p, struct intr_event *ie)
1106 {
1107 	struct intr_handler *ih, *ihn, *ihp;
1108 
1109 	ihp = NULL;
1110 	CK_SLIST_FOREACH_SAFE(ih, &ie->ie_handlers, ih_next, ihn) {
1111 		/*
1112 		 * If this handler is marked for death, remove it from
1113 		 * the list of handlers and wake up the sleeper.
1114 		 */
1115 		if (ih->ih_flags & IH_DEAD) {
1116 			mtx_lock(&ie->ie_lock);
1117 			if (ihp == NULL)
1118 				CK_SLIST_REMOVE_HEAD(&ie->ie_handlers, ih_next);
1119 			else
1120 				CK_SLIST_REMOVE_AFTER(ihp, ih_next);
1121 			ih->ih_flags &= ~IH_DEAD;
1122 			wakeup(ih);
1123 			mtx_unlock(&ie->ie_lock);
1124 			continue;
1125 		}
1126 
1127 		/*
1128 		 * Now that we know that the current element won't be removed
1129 		 * update the previous element.
1130 		 */
1131 		ihp = ih;
1132 
1133 		if ((ih->ih_flags & IH_CHANGED) != 0) {
1134 			mtx_lock(&ie->ie_lock);
1135 			ih->ih_flags &= ~IH_CHANGED;
1136 			wakeup(ih);
1137 			mtx_unlock(&ie->ie_lock);
1138 		}
1139 
1140 		/* Skip filter only handlers */
1141 		if (ih->ih_handler == NULL)
1142 			continue;
1143 
1144 		/* Skip suspended handlers */
1145 		if ((ih->ih_flags & IH_SUSP) != 0)
1146 			continue;
1147 
1148 		/*
1149 		 * For software interrupt threads, we only execute
1150 		 * handlers that have their need flag set.  Hardware
1151 		 * interrupt threads always invoke all of their handlers.
1152 		 *
1153 		 * ih_need can only be 0 or 1.  Failed cmpset below
1154 		 * means that there is no request to execute handlers,
1155 		 * so a retry of the cmpset is not needed.
1156 		 */
1157 		if ((ie->ie_flags & IE_SOFT) != 0 &&
1158 		    atomic_cmpset_int(&ih->ih_need, 1, 0) == 0)
1159 			continue;
1160 
1161 		/* Execute this handler. */
1162 		CTR6(KTR_INTR, "%s: pid %d exec %p(%p) for %s flg=%x",
1163 		    __func__, p->p_pid, (void *)ih->ih_handler,
1164 		    ih->ih_argument, ih->ih_name, ih->ih_flags);
1165 
1166 		if (!(ih->ih_flags & IH_MPSAFE))
1167 			mtx_lock(&Giant);
1168 		ih->ih_handler(ih->ih_argument);
1169 		if (!(ih->ih_flags & IH_MPSAFE))
1170 			mtx_unlock(&Giant);
1171 	}
1172 }
1173 
1174 static void
1175 ithread_execute_handlers(struct proc *p, struct intr_event *ie)
1176 {
1177 
1178 	/* Interrupt handlers should not sleep. */
1179 	if (!(ie->ie_flags & IE_SOFT))
1180 		THREAD_NO_SLEEPING();
1181 	intr_event_execute_handlers(p, ie);
1182 	if (!(ie->ie_flags & IE_SOFT))
1183 		THREAD_SLEEPING_OK();
1184 
1185 	/*
1186 	 * Interrupt storm handling:
1187 	 *
1188 	 * If this interrupt source is currently storming, then throttle
1189 	 * it to only fire the handler once  per clock tick.
1190 	 *
1191 	 * If this interrupt source is not currently storming, but the
1192 	 * number of back to back interrupts exceeds the storm threshold,
1193 	 * then enter storming mode.
1194 	 */
1195 	if (intr_storm_threshold != 0 && ie->ie_count >= intr_storm_threshold &&
1196 	    !(ie->ie_flags & IE_SOFT)) {
1197 		/* Report the message only once every second. */
1198 		if (ppsratecheck(&ie->ie_warntm, &ie->ie_warncnt, 1)) {
1199 			printf(
1200 	"interrupt storm detected on \"%s\"; throttling interrupt source\n",
1201 			    ie->ie_name);
1202 		}
1203 		pause("istorm", 1);
1204 	} else
1205 		ie->ie_count++;
1206 
1207 	/*
1208 	 * Now that all the handlers have had a chance to run, reenable
1209 	 * the interrupt source.
1210 	 */
1211 	if (ie->ie_post_ithread != NULL)
1212 		ie->ie_post_ithread(ie->ie_source);
1213 }
1214 
1215 /*
1216  * This is the main code for interrupt threads.
1217  */
1218 static void
1219 ithread_loop(void *arg)
1220 {
1221 	struct epoch_tracker et;
1222 	struct intr_thread *ithd;
1223 	struct intr_event *ie;
1224 	struct thread *td;
1225 	struct proc *p;
1226 	int wake, epoch_count;
1227 	bool needs_epoch;
1228 
1229 	td = curthread;
1230 	p = td->td_proc;
1231 	ithd = (struct intr_thread *)arg;
1232 	KASSERT(ithd->it_thread == td,
1233 	    ("%s: ithread and proc linkage out of sync", __func__));
1234 	ie = ithd->it_event;
1235 	ie->ie_count = 0;
1236 	wake = 0;
1237 
1238 	/*
1239 	 * As long as we have interrupts outstanding, go through the
1240 	 * list of handlers, giving each one a go at it.
1241 	 */
1242 	for (;;) {
1243 		/*
1244 		 * If we are an orphaned thread, then just die.
1245 		 */
1246 		if (ithd->it_flags & IT_DEAD) {
1247 			CTR3(KTR_INTR, "%s: pid %d (%s) exiting", __func__,
1248 			    p->p_pid, td->td_name);
1249 			free(ithd, M_ITHREAD);
1250 			kthread_exit();
1251 		}
1252 
1253 		/*
1254 		 * Service interrupts.  If another interrupt arrives while
1255 		 * we are running, it will set it_need to note that we
1256 		 * should make another pass.
1257 		 *
1258 		 * The load_acq part of the following cmpset ensures
1259 		 * that the load of ih_need in ithread_execute_handlers()
1260 		 * is ordered after the load of it_need here.
1261 		 */
1262 		needs_epoch =
1263 		    (atomic_load_int(&ie->ie_hflags) & IH_NET) != 0;
1264 		if (needs_epoch) {
1265 			epoch_count = 0;
1266 			NET_EPOCH_ENTER(et);
1267 		}
1268 		while (atomic_cmpset_acq_int(&ithd->it_need, 1, 0) != 0) {
1269 			ithread_execute_handlers(p, ie);
1270 			if (needs_epoch &&
1271 			    ++epoch_count >= intr_epoch_batch) {
1272 				NET_EPOCH_EXIT(et);
1273 				epoch_count = 0;
1274 				NET_EPOCH_ENTER(et);
1275 			}
1276 		}
1277 		if (needs_epoch)
1278 			NET_EPOCH_EXIT(et);
1279 		WITNESS_WARN(WARN_PANIC, NULL, "suspending ithread");
1280 		mtx_assert(&Giant, MA_NOTOWNED);
1281 
1282 		/*
1283 		 * Processed all our interrupts.  Now get the sched
1284 		 * lock.  This may take a while and it_need may get
1285 		 * set again, so we have to check it again.
1286 		 */
1287 		thread_lock(td);
1288 		if (atomic_load_acq_int(&ithd->it_need) == 0 &&
1289 		    (ithd->it_flags & (IT_DEAD | IT_WAIT)) == 0) {
1290 			TD_SET_IWAIT(td);
1291 			ie->ie_count = 0;
1292 			mi_switch(SW_VOL | SWT_IWAIT);
1293 		} else {
1294 			if (ithd->it_flags & IT_WAIT) {
1295 				wake = 1;
1296 				ithd->it_flags &= ~IT_WAIT;
1297 			}
1298 			thread_unlock(td);
1299 		}
1300 		if (wake) {
1301 			wakeup(ithd);
1302 			wake = 0;
1303 		}
1304 	}
1305 }
1306 
1307 /*
1308  * Main interrupt handling body.
1309  *
1310  * Input:
1311  * o ie:                        the event connected to this interrupt.
1312  * o frame:                     some archs (i.e. i386) pass a frame to some.
1313  *                              handlers as their main argument.
1314  * Return value:
1315  * o 0:                         everything ok.
1316  * o EINVAL:                    stray interrupt.
1317  */
1318 int
1319 intr_event_handle(struct intr_event *ie, struct trapframe *frame)
1320 {
1321 	struct intr_handler *ih;
1322 	struct trapframe *oldframe;
1323 	struct thread *td;
1324 	int phase;
1325 	int ret;
1326 	bool filter, thread;
1327 
1328 	td = curthread;
1329 
1330 #ifdef KSTACK_USAGE_PROF
1331 	intr_prof_stack_use(td, frame);
1332 #endif
1333 
1334 	/* An interrupt with no event or handlers is a stray interrupt. */
1335 	if (ie == NULL || CK_SLIST_EMPTY(&ie->ie_handlers))
1336 		return (EINVAL);
1337 
1338 	/*
1339 	 * Execute fast interrupt handlers directly.
1340 	 * To support clock handlers, if a handler registers
1341 	 * with a NULL argument, then we pass it a pointer to
1342 	 * a trapframe as its argument.
1343 	 */
1344 	td->td_intr_nesting_level++;
1345 	filter = false;
1346 	thread = false;
1347 	ret = 0;
1348 	critical_enter();
1349 	oldframe = td->td_intr_frame;
1350 	td->td_intr_frame = frame;
1351 
1352 	phase = ie->ie_phase;
1353 	atomic_add_int(&ie->ie_active[phase], 1);
1354 
1355 	/*
1356 	 * This fence is required to ensure that no later loads are
1357 	 * re-ordered before the ie_active store.
1358 	 */
1359 	atomic_thread_fence_seq_cst();
1360 
1361 	CK_SLIST_FOREACH(ih, &ie->ie_handlers, ih_next) {
1362 		if ((ih->ih_flags & IH_SUSP) != 0)
1363 			continue;
1364 		if ((ie->ie_flags & IE_SOFT) != 0 && ih->ih_need == 0)
1365 			continue;
1366 		if (ih->ih_filter == NULL) {
1367 			thread = true;
1368 			continue;
1369 		}
1370 		CTR4(KTR_INTR, "%s: exec %p(%p) for %s", __func__,
1371 		    ih->ih_filter, ih->ih_argument == NULL ? frame :
1372 		    ih->ih_argument, ih->ih_name);
1373 		if (ih->ih_argument == NULL)
1374 			ret = ih->ih_filter(frame);
1375 		else
1376 			ret = ih->ih_filter(ih->ih_argument);
1377 		KASSERT(ret == FILTER_STRAY ||
1378 		    ((ret & (FILTER_SCHEDULE_THREAD | FILTER_HANDLED)) != 0 &&
1379 		    (ret & ~(FILTER_SCHEDULE_THREAD | FILTER_HANDLED)) == 0),
1380 		    ("%s: incorrect return value %#x from %s", __func__, ret,
1381 		    ih->ih_name));
1382 		filter = filter || ret == FILTER_HANDLED;
1383 
1384 		/*
1385 		 * Wrapper handler special handling:
1386 		 *
1387 		 * in some particular cases (like pccard and pccbb),
1388 		 * the _real_ device handler is wrapped in a couple of
1389 		 * functions - a filter wrapper and an ithread wrapper.
1390 		 * In this case (and just in this case), the filter wrapper
1391 		 * could ask the system to schedule the ithread and mask
1392 		 * the interrupt source if the wrapped handler is composed
1393 		 * of just an ithread handler.
1394 		 *
1395 		 * TODO: write a generic wrapper to avoid people rolling
1396 		 * their own.
1397 		 */
1398 		if (!thread) {
1399 			if (ret == FILTER_SCHEDULE_THREAD)
1400 				thread = true;
1401 		}
1402 	}
1403 	atomic_add_rel_int(&ie->ie_active[phase], -1);
1404 
1405 	td->td_intr_frame = oldframe;
1406 
1407 	if (thread) {
1408 		if (ie->ie_pre_ithread != NULL)
1409 			ie->ie_pre_ithread(ie->ie_source);
1410 	} else {
1411 		if (ie->ie_post_filter != NULL)
1412 			ie->ie_post_filter(ie->ie_source);
1413 	}
1414 
1415 	/* Schedule the ithread if needed. */
1416 	if (thread) {
1417 		int error __unused;
1418 
1419 		error =  intr_event_schedule_thread(ie);
1420 		KASSERT(error == 0, ("bad stray interrupt"));
1421 	}
1422 	critical_exit();
1423 	td->td_intr_nesting_level--;
1424 #ifdef notyet
1425 	/* The interrupt is not aknowledged by any filter and has no ithread. */
1426 	if (!thread && !filter)
1427 		return (EINVAL);
1428 #endif
1429 	return (0);
1430 }
1431 
1432 #ifdef DDB
1433 /*
1434  * Dump details about an interrupt handler
1435  */
1436 static void
1437 db_dump_intrhand(struct intr_handler *ih)
1438 {
1439 	int comma;
1440 
1441 	db_printf("\t%-10s ", ih->ih_name);
1442 	switch (ih->ih_pri) {
1443 	case PI_REALTIME:
1444 		db_printf("CLK ");
1445 		break;
1446 	case PI_AV:
1447 		db_printf("AV  ");
1448 		break;
1449 	case PI_TTY:
1450 		db_printf("TTY ");
1451 		break;
1452 	case PI_NET:
1453 		db_printf("NET ");
1454 		break;
1455 	case PI_DISK:
1456 		db_printf("DISK");
1457 		break;
1458 	case PI_DULL:
1459 		db_printf("DULL");
1460 		break;
1461 	default:
1462 		if (ih->ih_pri >= PI_SOFT)
1463 			db_printf("SWI ");
1464 		else
1465 			db_printf("%4u", ih->ih_pri);
1466 		break;
1467 	}
1468 	db_printf(" ");
1469 	if (ih->ih_filter != NULL) {
1470 		db_printf("[F]");
1471 		db_printsym((uintptr_t)ih->ih_filter, DB_STGY_PROC);
1472 	}
1473 	if (ih->ih_handler != NULL) {
1474 		if (ih->ih_filter != NULL)
1475 			db_printf(",");
1476 		db_printf("[H]");
1477 		db_printsym((uintptr_t)ih->ih_handler, DB_STGY_PROC);
1478 	}
1479 	db_printf("(%p)", ih->ih_argument);
1480 	if (ih->ih_need ||
1481 	    (ih->ih_flags & (IH_EXCLUSIVE | IH_ENTROPY | IH_DEAD |
1482 	    IH_MPSAFE)) != 0) {
1483 		db_printf(" {");
1484 		comma = 0;
1485 		if (ih->ih_flags & IH_EXCLUSIVE) {
1486 			if (comma)
1487 				db_printf(", ");
1488 			db_printf("EXCL");
1489 			comma = 1;
1490 		}
1491 		if (ih->ih_flags & IH_ENTROPY) {
1492 			if (comma)
1493 				db_printf(", ");
1494 			db_printf("ENTROPY");
1495 			comma = 1;
1496 		}
1497 		if (ih->ih_flags & IH_DEAD) {
1498 			if (comma)
1499 				db_printf(", ");
1500 			db_printf("DEAD");
1501 			comma = 1;
1502 		}
1503 		if (ih->ih_flags & IH_MPSAFE) {
1504 			if (comma)
1505 				db_printf(", ");
1506 			db_printf("MPSAFE");
1507 			comma = 1;
1508 		}
1509 		if (ih->ih_need) {
1510 			if (comma)
1511 				db_printf(", ");
1512 			db_printf("NEED");
1513 		}
1514 		db_printf("}");
1515 	}
1516 	db_printf("\n");
1517 }
1518 
1519 /*
1520  * Dump details about a event.
1521  */
1522 void
1523 db_dump_intr_event(struct intr_event *ie, int handlers)
1524 {
1525 	struct intr_handler *ih;
1526 	struct intr_thread *it;
1527 	int comma;
1528 
1529 	db_printf("%s ", ie->ie_fullname);
1530 	it = ie->ie_thread;
1531 	if (it != NULL)
1532 		db_printf("(pid %d)", it->it_thread->td_proc->p_pid);
1533 	else
1534 		db_printf("(no thread)");
1535 	if ((ie->ie_flags & (IE_SOFT | IE_ADDING_THREAD)) != 0 ||
1536 	    (it != NULL && it->it_need)) {
1537 		db_printf(" {");
1538 		comma = 0;
1539 		if (ie->ie_flags & IE_SOFT) {
1540 			db_printf("SOFT");
1541 			comma = 1;
1542 		}
1543 		if (ie->ie_flags & IE_ADDING_THREAD) {
1544 			if (comma)
1545 				db_printf(", ");
1546 			db_printf("ADDING_THREAD");
1547 			comma = 1;
1548 		}
1549 		if (it != NULL && it->it_need) {
1550 			if (comma)
1551 				db_printf(", ");
1552 			db_printf("NEED");
1553 		}
1554 		db_printf("}");
1555 	}
1556 	db_printf("\n");
1557 
1558 	if (handlers)
1559 		CK_SLIST_FOREACH(ih, &ie->ie_handlers, ih_next)
1560 		    db_dump_intrhand(ih);
1561 }
1562 
1563 /*
1564  * Dump data about interrupt handlers
1565  */
1566 DB_SHOW_COMMAND(intr, db_show_intr)
1567 {
1568 	struct intr_event *ie;
1569 	int all, verbose;
1570 
1571 	verbose = strchr(modif, 'v') != NULL;
1572 	all = strchr(modif, 'a') != NULL;
1573 	TAILQ_FOREACH(ie, &event_list, ie_list) {
1574 		if (!all && CK_SLIST_EMPTY(&ie->ie_handlers))
1575 			continue;
1576 		db_dump_intr_event(ie, verbose);
1577 		if (db_pager_quit)
1578 			break;
1579 	}
1580 }
1581 #endif /* DDB */
1582 
1583 /*
1584  * Start standard software interrupt threads
1585  */
1586 static void
1587 start_softintr(void *dummy)
1588 {
1589 
1590 	if (swi_add(&clk_intr_event, "clk", NULL, NULL, SWI_CLOCK,
1591 	    INTR_MPSAFE, NULL))
1592 		panic("died while creating clk swi ithread");
1593 	if (swi_add(NULL, "vm", swi_vm, NULL, SWI_VM, INTR_MPSAFE, &vm_ih))
1594 		panic("died while creating vm swi ithread");
1595 }
1596 SYSINIT(start_softintr, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softintr,
1597     NULL);
1598 
1599 /*
1600  * Sysctls used by systat and others: hw.intrnames and hw.intrcnt.
1601  * The data for this machine dependent, and the declarations are in machine
1602  * dependent code.  The layout of intrnames and intrcnt however is machine
1603  * independent.
1604  *
1605  * We do not know the length of intrcnt and intrnames at compile time, so
1606  * calculate things at run time.
1607  */
1608 static int
1609 sysctl_intrnames(SYSCTL_HANDLER_ARGS)
1610 {
1611 	return (sysctl_handle_opaque(oidp, intrnames, sintrnames, req));
1612 }
1613 
1614 SYSCTL_PROC(_hw, OID_AUTO, intrnames,
1615     CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
1616     sysctl_intrnames, "",
1617     "Interrupt Names");
1618 
1619 static int
1620 sysctl_intrcnt(SYSCTL_HANDLER_ARGS)
1621 {
1622 #ifdef SCTL_MASK32
1623 	uint32_t *intrcnt32;
1624 	unsigned i;
1625 	int error;
1626 
1627 	if (req->flags & SCTL_MASK32) {
1628 		if (!req->oldptr)
1629 			return (sysctl_handle_opaque(oidp, NULL, sintrcnt / 2, req));
1630 		intrcnt32 = malloc(sintrcnt / 2, M_TEMP, M_NOWAIT);
1631 		if (intrcnt32 == NULL)
1632 			return (ENOMEM);
1633 		for (i = 0; i < sintrcnt / sizeof (u_long); i++)
1634 			intrcnt32[i] = intrcnt[i];
1635 		error = sysctl_handle_opaque(oidp, intrcnt32, sintrcnt / 2, req);
1636 		free(intrcnt32, M_TEMP);
1637 		return (error);
1638 	}
1639 #endif
1640 	return (sysctl_handle_opaque(oidp, intrcnt, sintrcnt, req));
1641 }
1642 
1643 SYSCTL_PROC(_hw, OID_AUTO, intrcnt,
1644     CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
1645     sysctl_intrcnt, "",
1646     "Interrupt Counts");
1647 
1648 #ifdef DDB
1649 /*
1650  * DDB command to dump the interrupt statistics.
1651  */
1652 DB_SHOW_COMMAND(intrcnt, db_show_intrcnt)
1653 {
1654 	u_long *i;
1655 	char *cp;
1656 	u_int j;
1657 
1658 	cp = intrnames;
1659 	j = 0;
1660 	for (i = intrcnt; j < (sintrcnt / sizeof(u_long)) && !db_pager_quit;
1661 	    i++, j++) {
1662 		if (*cp == '\0')
1663 			break;
1664 		if (*i != 0)
1665 			db_printf("%s\t%lu\n", cp, *i);
1666 		cp += strlen(cp) + 1;
1667 	}
1668 }
1669 #endif
1670