xref: /illumos-gate/usr/src/uts/i86pc/io/apix/apix_intr.c (revision 78801af7286cd73dbc996d470f789e75993cf15d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2018 Western Digital Corporation.  All rights reserved.
25  * Copyright 2019 Joyent, Inc.
26  */
27 
28 #include <sys/cpuvar.h>
29 #include <sys/cpu_event.h>
30 #include <sys/param.h>
31 #include <sys/cmn_err.h>
32 #include <sys/t_lock.h>
33 #include <sys/kmem.h>
34 #include <sys/machlock.h>
35 #include <sys/systm.h>
36 #include <sys/archsystm.h>
37 #include <sys/atomic.h>
38 #include <sys/sdt.h>
39 #include <sys/processor.h>
40 #include <sys/time.h>
41 #include <sys/psm.h>
42 #include <sys/smp_impldefs.h>
43 #include <sys/cram.h>
44 #include <sys/apic.h>
45 #include <sys/pit.h>
46 #include <sys/ddi.h>
47 #include <sys/sunddi.h>
48 #include <sys/ddi_impldefs.h>
49 #include <sys/pci.h>
50 #include <sys/promif.h>
51 #include <sys/x86_archext.h>
52 #include <sys/cpc_impl.h>
53 #include <sys/uadmin.h>
54 #include <sys/panic.h>
55 #include <sys/debug.h>
56 #include <sys/trap.h>
57 #include <sys/machsystm.h>
58 #include <sys/sysmacros.h>
59 #include <sys/rm_platter.h>
60 #include <sys/privregs.h>
61 #include <sys/note.h>
62 #include <sys/pci_intr_lib.h>
63 #include <sys/spl.h>
64 #include <sys/clock.h>
65 #include <sys/dditypes.h>
66 #include <sys/sunddi.h>
67 #include <sys/x_call.h>
68 #include <sys/reboot.h>
69 #include <vm/hat_i86.h>
70 #include <sys/stack.h>
71 #include <sys/apix.h>
72 #include <sys/smt.h>
73 
74 static void apix_post_hardint(int);
75 
76 /*
77  * Insert an vector into the tail of the interrupt pending list
78  */
79 static __inline__ void
80 apix_insert_pending_av(apix_impl_t *apixp, struct autovec *avp, int ipl)
81 {
82 	struct autovec **head = apixp->x_intr_head;
83 	struct autovec **tail = apixp->x_intr_tail;
84 
85 	avp->av_ipl_link = NULL;
86 	if (tail[ipl] == NULL) {
87 		head[ipl] = tail[ipl] = avp;
88 		return;
89 	}
90 
91 	tail[ipl]->av_ipl_link = avp;
92 	tail[ipl] = avp;
93 }
94 
95 /*
96  * Remove and return an vector from the head of hardware interrupt
97  * pending list.
98  */
99 static __inline__ struct autovec *
100 apix_remove_pending_av(apix_impl_t *apixp, int ipl)
101 {
102 	struct cpu *cpu = CPU;
103 	struct autovec **head = apixp->x_intr_head;
104 	struct autovec **tail = apixp->x_intr_tail;
105 	struct autovec *avp = head[ipl];
106 
107 	if (avp == NULL)
108 		return (NULL);
109 
110 	if (avp->av_vector != NULL && avp->av_prilevel < cpu->cpu_base_spl) {
111 		/*
112 		 * If there is blocked higher level interrupts, return
113 		 * NULL to quit handling of current IPL level.
114 		 */
115 		apixp->x_intr_pending |= (1 << avp->av_prilevel);
116 		return (NULL);
117 	}
118 
119 	avp->av_flags &= ~AV_PENTRY_PEND;
120 	avp->av_flags |= AV_PENTRY_ONPROC;
121 	head[ipl] = avp->av_ipl_link;
122 	avp->av_ipl_link = NULL;
123 
124 	if (head[ipl] == NULL)
125 		tail[ipl] = NULL;
126 
127 	return (avp);
128 }
129 
130 /*
131  * add_pending_hardint:
132  *
133  * Add hardware interrupts to the interrupt pending list.
134  */
135 static void
136 apix_add_pending_hardint(int vector)
137 {
138 	uint32_t cpuid = psm_get_cpu_id();
139 	apix_impl_t *apixp = apixs[cpuid];
140 	apix_vector_t *vecp = apixp->x_vectbl[vector];
141 	struct autovec *p, *prevp = NULL;
142 	int ipl;
143 
144 	/*
145 	 * The MSI interrupt not supporting per-vector masking could
146 	 * be triggered on a false vector as a result of rebinding
147 	 * operation cannot programme MSI address & data atomically.
148 	 * Add ISR of this interrupt to the pending list for such
149 	 * suspicious interrupt.
150 	 */
151 	APIX_DO_FAKE_INTR(cpuid, vector);
152 	if (vecp == NULL)
153 		return;
154 
155 	for (p = vecp->v_autovect; p != NULL; p = p->av_link) {
156 		if (p->av_vector == NULL)
157 			continue;	/* skip freed entry */
158 
159 		ipl = p->av_prilevel;
160 		prevp = p;
161 
162 		/* set pending at specified priority level */
163 		apixp->x_intr_pending |= (1 << ipl);
164 
165 		if (p->av_flags & AV_PENTRY_PEND)
166 			continue;	/* already in the pending list */
167 		p->av_flags |= AV_PENTRY_PEND;
168 
169 		/* insert into pending list by it original IPL */
170 		apix_insert_pending_av(apixp, p, ipl);
171 	}
172 
173 	/* last one of the linked list */
174 	if (prevp && ((prevp->av_flags & AV_PENTRY_LEVEL) != 0))
175 		prevp->av_flags |= (vector & AV_PENTRY_VECTMASK);
176 }
177 
178 /*
179  * Walk pending hardware interrupts at given priority level, invoking
180  * each interrupt handler as we go.
181  */
182 extern uint64_t intr_get_time(void);
183 
184 static void
185 apix_dispatch_pending_autovect(uint_t ipl)
186 {
187 	uint32_t cpuid = psm_get_cpu_id();
188 	apix_impl_t *apixp = apixs[cpuid];
189 	struct autovec *av;
190 
191 	while ((av = apix_remove_pending_av(apixp, ipl)) != NULL) {
192 		uint_t r;
193 		uint_t (*intr)() = av->av_vector;
194 		caddr_t arg1 = av->av_intarg1;
195 		caddr_t arg2 = av->av_intarg2;
196 		dev_info_t *dip = av->av_dip;
197 		uchar_t vector = av->av_flags & AV_PENTRY_VECTMASK;
198 
199 		if (intr == NULL)
200 			continue;
201 
202 		/* Don't enable interrupts during x-calls */
203 		if (ipl != XC_HI_PIL)
204 			sti();
205 
206 		DTRACE_PROBE4(interrupt__start, dev_info_t *, dip,
207 		    void *, intr, caddr_t, arg1, caddr_t, arg2);
208 		r = (*intr)(arg1, arg2);
209 		DTRACE_PROBE4(interrupt__complete, dev_info_t *, dip,
210 		    void *, intr, caddr_t, arg1, uint_t, r);
211 
212 		if (av->av_ticksp && av->av_prilevel <= LOCK_LEVEL)
213 			atomic_add_64(av->av_ticksp, intr_get_time());
214 
215 		cli();
216 
217 		if (vector) {
218 			if ((av->av_flags & AV_PENTRY_PEND) == 0)
219 				av->av_flags &= ~AV_PENTRY_VECTMASK;
220 
221 			apix_post_hardint(vector);
222 		}
223 
224 		/* mark it as idle */
225 		av->av_flags &= ~AV_PENTRY_ONPROC;
226 	}
227 }
228 
229 static caddr_t
230 apix_do_softint_prolog(struct cpu *cpu, uint_t pil, uint_t oldpil,
231     caddr_t stackptr)
232 {
233 	kthread_t *t, *volatile it;
234 	struct machcpu *mcpu = &cpu->cpu_m;
235 	hrtime_t now;
236 
237 	UNREFERENCED_1PARAMETER(oldpil);
238 	ASSERT(pil > mcpu->mcpu_pri && pil > cpu->cpu_base_spl);
239 
240 	atomic_and_32((uint32_t *)&mcpu->mcpu_softinfo.st_pending, ~(1 << pil));
241 
242 	mcpu->mcpu_pri = pil;
243 
244 	now = tsc_read();
245 
246 	/*
247 	 * Get set to run interrupt thread.
248 	 * There should always be an interrupt thread since we
249 	 * allocate one for each level on the CPU.
250 	 */
251 	it = cpu->cpu_intr_thread;
252 	ASSERT(it != NULL);
253 	cpu->cpu_intr_thread = it->t_link;
254 
255 	/* t_intr_start could be zero due to cpu_intr_swtch_enter. */
256 	t = cpu->cpu_thread;
257 	if ((t->t_flag & T_INTR_THREAD) && t->t_intr_start != 0) {
258 		hrtime_t intrtime = now - t->t_intr_start;
259 		mcpu->intrstat[pil][0] += intrtime;
260 		cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
261 		t->t_intr_start = 0;
262 	}
263 
264 	/*
265 	 * Note that the code in kcpc_overflow_intr -relies- on the
266 	 * ordering of events here - in particular that t->t_lwp of
267 	 * the interrupt thread is set to the pinned thread *before*
268 	 * curthread is changed.
269 	 */
270 	it->t_lwp = t->t_lwp;
271 	it->t_state = TS_ONPROC;
272 
273 	/*
274 	 * Push interrupted thread onto list from new thread.
275 	 * Set the new thread as the current one.
276 	 * Set interrupted thread's T_SP because if it is the idle thread,
277 	 * resume() may use that stack between threads.
278 	 */
279 
280 	ASSERT(SA((uintptr_t)stackptr) == (uintptr_t)stackptr);
281 	t->t_sp = (uintptr_t)stackptr;
282 
283 	it->t_intr = t;
284 	cpu->cpu_thread = it;
285 	smt_begin_intr(pil);
286 
287 	/*
288 	 * Set bit for this pil in CPU's interrupt active bitmask.
289 	 */
290 	ASSERT((cpu->cpu_intr_actv & (1 << pil)) == 0);
291 	cpu->cpu_intr_actv |= (1 << pil);
292 
293 	/*
294 	 * Initialize thread priority level from intr_pri
295 	 */
296 	it->t_pil = (uchar_t)pil;
297 	it->t_pri = (pri_t)pil + intr_pri;
298 	it->t_intr_start = now;
299 
300 	return (it->t_stk);
301 }
302 
303 static void
304 apix_do_softint_epilog(struct cpu *cpu, uint_t oldpil)
305 {
306 	struct machcpu *mcpu = &cpu->cpu_m;
307 	kthread_t *t, *it;
308 	uint_t pil, basespl;
309 	hrtime_t intrtime;
310 	hrtime_t now = tsc_read();
311 
312 	it = cpu->cpu_thread;
313 	pil = it->t_pil;
314 
315 	cpu->cpu_stats.sys.intr[pil - 1]++;
316 
317 	ASSERT(cpu->cpu_intr_actv & (1 << pil));
318 	cpu->cpu_intr_actv &= ~(1 << pil);
319 
320 	intrtime = now - it->t_intr_start;
321 	mcpu->intrstat[pil][0] += intrtime;
322 	cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
323 
324 	/*
325 	 * If there is still an interrupted thread underneath this one
326 	 * then the interrupt was never blocked and the return is
327 	 * fairly simple.  Otherwise it isn't.
328 	 */
329 	if ((t = it->t_intr) == NULL) {
330 		/*
331 		 * Put thread back on the interrupt thread list.
332 		 * This was an interrupt thread, so set CPU's base SPL.
333 		 */
334 		set_base_spl();
335 		/* mcpu->mcpu_pri = cpu->cpu_base_spl; */
336 
337 		/*
338 		 * If there are pending interrupts, send a softint to
339 		 * re-enter apix_do_interrupt() and get them processed.
340 		 */
341 		if (apixs[cpu->cpu_id]->x_intr_pending)
342 			siron();
343 
344 		it->t_state = TS_FREE;
345 		it->t_link = cpu->cpu_intr_thread;
346 		cpu->cpu_intr_thread = it;
347 		(void) splhigh();
348 		sti();
349 		swtch();
350 		/*NOTREACHED*/
351 		panic("dosoftint_epilog: swtch returned");
352 	}
353 	it->t_link = cpu->cpu_intr_thread;
354 	cpu->cpu_intr_thread = it;
355 	it->t_state = TS_FREE;
356 	smt_end_intr();
357 	cpu->cpu_thread = t;
358 
359 	if (t->t_flag & T_INTR_THREAD)
360 		t->t_intr_start = now;
361 	basespl = cpu->cpu_base_spl;
362 	pil = MAX(oldpil, basespl);
363 	mcpu->mcpu_pri = pil;
364 }
365 
366 /*
367  * Dispatch a soft interrupt
368  */
369 static void
370 apix_dispatch_softint(uint_t oldpil, uint_t arg2)
371 {
372 	struct cpu *cpu = CPU;
373 
374 	UNREFERENCED_1PARAMETER(arg2);
375 
376 	sti();
377 	av_dispatch_softvect((int)cpu->cpu_thread->t_pil);
378 	cli();
379 
380 	/*
381 	 * Must run softint_epilog() on the interrupt thread stack, since
382 	 * there may not be a return from it if the interrupt thread blocked.
383 	 */
384 	apix_do_softint_epilog(cpu, oldpil);
385 }
386 
387 /*
388  * Deliver any softints the current interrupt priority allows.
389  * Called with interrupts disabled.
390  */
391 int
392 apix_do_softint(struct regs *regs)
393 {
394 	struct cpu *cpu = CPU;
395 	int oldipl;
396 	int newipl;
397 	volatile uint16_t pending;
398 	caddr_t newsp;
399 
400 	while ((pending = cpu->cpu_softinfo.st_pending) != 0) {
401 		newipl = bsrw_insn(pending);
402 		oldipl = cpu->cpu_pri;
403 		if (newipl <= oldipl || newipl <= cpu->cpu_base_spl)
404 			return (-1);
405 
406 		newsp = apix_do_softint_prolog(cpu, newipl, oldipl,
407 		    (caddr_t)regs);
408 		ASSERT(newsp != NULL);
409 		switch_sp_and_call(newsp, apix_dispatch_softint, oldipl, 0);
410 	}
411 
412 	return (0);
413 }
414 
415 static int
416 apix_hilevel_intr_prolog(struct cpu *cpu, uint_t pil, uint_t oldpil,
417     struct regs *rp)
418 {
419 	struct machcpu *mcpu = &cpu->cpu_m;
420 	hrtime_t intrtime;
421 	hrtime_t now = tsc_read();
422 	apix_impl_t *apixp = apixs[cpu->cpu_id];
423 	uint_t mask;
424 
425 	ASSERT(pil > mcpu->mcpu_pri && pil > cpu->cpu_base_spl);
426 
427 	if (pil == CBE_HIGH_PIL) {	/* 14 */
428 		cpu->cpu_profile_pil = oldpil;
429 		if (USERMODE(rp->r_cs)) {
430 			cpu->cpu_profile_pc = 0;
431 			cpu->cpu_profile_upc = rp->r_pc;
432 			cpu->cpu_cpcprofile_pc = 0;
433 			cpu->cpu_cpcprofile_upc = rp->r_pc;
434 		} else {
435 			cpu->cpu_profile_pc = rp->r_pc;
436 			cpu->cpu_profile_upc = 0;
437 			cpu->cpu_cpcprofile_pc = rp->r_pc;
438 			cpu->cpu_cpcprofile_upc = 0;
439 		}
440 	}
441 
442 	mcpu->mcpu_pri = pil;
443 
444 	mask = cpu->cpu_intr_actv & CPU_INTR_ACTV_HIGH_LEVEL_MASK;
445 	if (mask != 0) {
446 		int nestpil;
447 
448 		/*
449 		 * We have interrupted another high-level interrupt.
450 		 * Load starting timestamp, compute interval, update
451 		 * cumulative counter.
452 		 */
453 		nestpil = bsrw_insn((uint16_t)mask);
454 		intrtime = now -
455 		    mcpu->pil_high_start[nestpil - (LOCK_LEVEL + 1)];
456 		mcpu->intrstat[nestpil][0] += intrtime;
457 		cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
458 	} else {
459 		kthread_t *t = cpu->cpu_thread;
460 
461 		/*
462 		 * See if we are interrupting a low-level interrupt thread.
463 		 * If so, account for its time slice only if its time stamp
464 		 * is non-zero.
465 		 */
466 		if ((t->t_flag & T_INTR_THREAD) != 0 && t->t_intr_start != 0) {
467 			intrtime = now - t->t_intr_start;
468 			mcpu->intrstat[t->t_pil][0] += intrtime;
469 			cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
470 			t->t_intr_start = 0;
471 		}
472 	}
473 
474 	smt_begin_intr(pil);
475 
476 	/* store starting timestamp in CPu structure for this IPL */
477 	mcpu->pil_high_start[pil - (LOCK_LEVEL + 1)] = now;
478 
479 	if (pil == 15) {
480 		/*
481 		 * To support reentrant level 15 interrupts, we maintain a
482 		 * recursion count in the top half of cpu_intr_actv.  Only
483 		 * when this count hits zero do we clear the PIL 15 bit from
484 		 * the lower half of cpu_intr_actv.
485 		 */
486 		uint16_t *refcntp = (uint16_t *)&cpu->cpu_intr_actv + 1;
487 		(*refcntp)++;
488 	}
489 
490 	cpu->cpu_intr_actv |= (1 << pil);
491 	/* clear pending ipl level bit */
492 	apixp->x_intr_pending &= ~(1 << pil);
493 
494 	return (mask);
495 }
496 
497 static int
498 apix_hilevel_intr_epilog(struct cpu *cpu, uint_t oldpil)
499 {
500 	struct machcpu *mcpu = &cpu->cpu_m;
501 	uint_t mask, pil;
502 	hrtime_t intrtime;
503 	hrtime_t now = tsc_read();
504 
505 	pil = mcpu->mcpu_pri;
506 	cpu->cpu_stats.sys.intr[pil - 1]++;
507 
508 	ASSERT(cpu->cpu_intr_actv & (1 << pil));
509 
510 	if (pil == 15) {
511 		/*
512 		 * To support reentrant level 15 interrupts, we maintain a
513 		 * recursion count in the top half of cpu_intr_actv.  Only
514 		 * when this count hits zero do we clear the PIL 15 bit from
515 		 * the lower half of cpu_intr_actv.
516 		 */
517 		uint16_t *refcntp = (uint16_t *)&cpu->cpu_intr_actv + 1;
518 
519 		ASSERT(*refcntp > 0);
520 
521 		if (--(*refcntp) == 0)
522 			cpu->cpu_intr_actv &= ~(1 << pil);
523 	} else {
524 		cpu->cpu_intr_actv &= ~(1 << pil);
525 	}
526 
527 	ASSERT(mcpu->pil_high_start[pil - (LOCK_LEVEL + 1)] != 0);
528 
529 	intrtime = now - mcpu->pil_high_start[pil - (LOCK_LEVEL + 1)];
530 	mcpu->intrstat[pil][0] += intrtime;
531 	cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
532 
533 	/*
534 	 * Check for lower-pil nested high-level interrupt beneath
535 	 * current one.  If so, place a starting timestamp in its
536 	 * pil_high_start entry.
537 	 */
538 	mask = cpu->cpu_intr_actv & CPU_INTR_ACTV_HIGH_LEVEL_MASK;
539 	if (mask != 0) {
540 		int nestpil;
541 
542 		/*
543 		 * find PIL of nested interrupt
544 		 */
545 		nestpil = bsrw_insn((uint16_t)mask);
546 		ASSERT(nestpil < pil);
547 		mcpu->pil_high_start[nestpil - (LOCK_LEVEL + 1)] = now;
548 		/*
549 		 * (Another high-level interrupt is active below this one,
550 		 * so there is no need to check for an interrupt
551 		 * thread.  That will be done by the lowest priority
552 		 * high-level interrupt active.)
553 		 */
554 	} else {
555 		/*
556 		 * Check to see if there is a low-level interrupt active.
557 		 * If so, place a starting timestamp in the thread
558 		 * structure.
559 		 */
560 		kthread_t *t = cpu->cpu_thread;
561 
562 		if (t->t_flag & T_INTR_THREAD)
563 			t->t_intr_start = now;
564 	}
565 
566 	smt_end_intr();
567 
568 	mcpu->mcpu_pri = oldpil;
569 	if (pil < CBE_HIGH_PIL)
570 		(void) (*setlvlx)(oldpil, 0);
571 
572 	return (mask);
573 }
574 
575 /*
576  * Dispatch a hilevel interrupt (one above LOCK_LEVEL)
577  */
578 static void
579 apix_dispatch_pending_hilevel(uint_t ipl, uint_t arg2)
580 {
581 	UNREFERENCED_1PARAMETER(arg2);
582 
583 	apix_dispatch_pending_autovect(ipl);
584 }
585 
586 static __inline__ int
587 apix_do_pending_hilevel(struct cpu *cpu, struct regs *rp)
588 {
589 	volatile uint16_t pending;
590 	uint_t newipl, oldipl;
591 	caddr_t newsp;
592 
593 	while ((pending = HILEVEL_PENDING(cpu)) != 0) {
594 		newipl = bsrw_insn(pending);
595 		ASSERT(newipl > LOCK_LEVEL && newipl > cpu->cpu_base_spl);
596 		oldipl = cpu->cpu_pri;
597 		if (newipl <= oldipl)
598 			return (-1);
599 
600 		/*
601 		 * High priority interrupts run on this cpu's interrupt stack.
602 		 */
603 		if (apix_hilevel_intr_prolog(cpu, newipl, oldipl, rp) == 0) {
604 			newsp = cpu->cpu_intr_stack;
605 			switch_sp_and_call(newsp, apix_dispatch_pending_hilevel,
606 			    newipl, 0);
607 		} else {	/* already on the interrupt stack */
608 			apix_dispatch_pending_hilevel(newipl, 0);
609 		}
610 		(void) apix_hilevel_intr_epilog(cpu, oldipl);
611 	}
612 
613 	return (0);
614 }
615 
616 /*
617  * Get an interrupt thread and swith to it. It's called from do_interrupt().
618  * The IF flag is cleared and thus all maskable interrupts are blocked at
619  * the time of calling.
620  */
621 static caddr_t
622 apix_intr_thread_prolog(struct cpu *cpu, uint_t pil, caddr_t stackptr)
623 {
624 	apix_impl_t *apixp = apixs[cpu->cpu_id];
625 	struct machcpu *mcpu = &cpu->cpu_m;
626 	hrtime_t now = tsc_read();
627 	kthread_t *t, *volatile it;
628 
629 	ASSERT(pil > mcpu->mcpu_pri && pil > cpu->cpu_base_spl);
630 
631 	apixp->x_intr_pending &= ~(1 << pil);
632 	ASSERT((cpu->cpu_intr_actv & (1 << pil)) == 0);
633 	cpu->cpu_intr_actv |= (1 << pil);
634 	mcpu->mcpu_pri = pil;
635 
636 	/*
637 	 * Get set to run interrupt thread.
638 	 * There should always be an interrupt thread since we
639 	 * allocate one for each level on the CPU.
640 	 */
641 	/* t_intr_start could be zero due to cpu_intr_swtch_enter. */
642 	t = cpu->cpu_thread;
643 	if ((t->t_flag & T_INTR_THREAD) && t->t_intr_start != 0) {
644 		hrtime_t intrtime = now - t->t_intr_start;
645 		mcpu->intrstat[pil][0] += intrtime;
646 		cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
647 		t->t_intr_start = 0;
648 	}
649 
650 	/*
651 	 * Push interrupted thread onto list from new thread.
652 	 * Set the new thread as the current one.
653 	 * Set interrupted thread's T_SP because if it is the idle thread,
654 	 * resume() may use that stack between threads.
655 	 */
656 
657 	ASSERT(SA((uintptr_t)stackptr) == (uintptr_t)stackptr);
658 
659 	t->t_sp = (uintptr_t)stackptr;	/* mark stack in curthread for resume */
660 
661 	/*
662 	 * Note that the code in kcpc_overflow_intr -relies- on the
663 	 * ordering of events here - in particular that t->t_lwp of
664 	 * the interrupt thread is set to the pinned thread *before*
665 	 * curthread is changed.
666 	 */
667 	it = cpu->cpu_intr_thread;
668 	cpu->cpu_intr_thread = it->t_link;
669 	it->t_intr = t;
670 	it->t_lwp = t->t_lwp;
671 
672 	/*
673 	 * (threads on the interrupt thread free list could have state
674 	 * preset to TS_ONPROC, but it helps in debugging if
675 	 * they're TS_FREE.)
676 	 */
677 	it->t_state = TS_ONPROC;
678 
679 	cpu->cpu_thread = it;
680 	smt_begin_intr(pil);
681 
682 	/*
683 	 * Initialize thread priority level from intr_pri
684 	 */
685 	it->t_pil = (uchar_t)pil;
686 	it->t_pri = (pri_t)pil + intr_pri;
687 	it->t_intr_start = now;
688 
689 	return (it->t_stk);
690 }
691 
692 static void
693 apix_intr_thread_epilog(struct cpu *cpu, uint_t oldpil)
694 {
695 	struct machcpu *mcpu = &cpu->cpu_m;
696 	kthread_t *t, *it = cpu->cpu_thread;
697 	uint_t pil, basespl;
698 	hrtime_t intrtime;
699 	hrtime_t now = tsc_read();
700 
701 	pil = it->t_pil;
702 	cpu->cpu_stats.sys.intr[pil - 1]++;
703 
704 	ASSERT(cpu->cpu_intr_actv & (1 << pil));
705 	cpu->cpu_intr_actv &= ~(1 << pil);
706 
707 	ASSERT(it->t_intr_start != 0);
708 	intrtime = now - it->t_intr_start;
709 	mcpu->intrstat[pil][0] += intrtime;
710 	cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
711 
712 	/*
713 	 * If there is still an interrupted thread underneath this one
714 	 * then the interrupt was never blocked and the return is
715 	 * fairly simple.  Otherwise it isn't.
716 	 */
717 	if ((t = it->t_intr) == NULL) {
718 		/*
719 		 * The interrupted thread is no longer pinned underneath
720 		 * the interrupt thread.  This means the interrupt must
721 		 * have blocked, and the interrupted thread has been
722 		 * unpinned, and has probably been running around the
723 		 * system for a while.
724 		 *
725 		 * Since there is no longer a thread under this one, put
726 		 * this interrupt thread back on the CPU's free list and
727 		 * resume the idle thread which will dispatch the next
728 		 * thread to run.
729 		 */
730 		cpu->cpu_stats.sys.intrblk++;
731 
732 		/*
733 		 * Put thread back on the interrupt thread list.
734 		 * This was an interrupt thread, so set CPU's base SPL.
735 		 */
736 		set_base_spl();
737 		basespl = cpu->cpu_base_spl;
738 		mcpu->mcpu_pri = basespl;
739 		(*setlvlx)(basespl, 0);
740 
741 		/*
742 		 * If there are pending interrupts, send a softint to
743 		 * re-enter apix_do_interrupt() and get them processed.
744 		 */
745 		if (apixs[cpu->cpu_id]->x_intr_pending)
746 			siron();
747 
748 		it->t_state = TS_FREE;
749 		/*
750 		 * Return interrupt thread to pool
751 		 */
752 		it->t_link = cpu->cpu_intr_thread;
753 		cpu->cpu_intr_thread = it;
754 
755 		(void) splhigh();
756 		sti();
757 		swtch();
758 		/*NOTREACHED*/
759 		panic("dosoftint_epilog: swtch returned");
760 	}
761 
762 	/*
763 	 * Return interrupt thread to the pool
764 	 */
765 	it->t_link = cpu->cpu_intr_thread;
766 	cpu->cpu_intr_thread = it;
767 	it->t_state = TS_FREE;
768 
769 	smt_end_intr();
770 	cpu->cpu_thread = t;
771 
772 	if (t->t_flag & T_INTR_THREAD)
773 		t->t_intr_start = now;
774 	basespl = cpu->cpu_base_spl;
775 	mcpu->mcpu_pri = MAX(oldpil, basespl);
776 	(*setlvlx)(mcpu->mcpu_pri, 0);
777 }
778 
779 
780 static void
781 apix_dispatch_pending_hardint(uint_t oldpil, uint_t arg2)
782 {
783 	struct cpu *cpu = CPU;
784 
785 	UNREFERENCED_1PARAMETER(arg2);
786 
787 	apix_dispatch_pending_autovect((int)cpu->cpu_thread->t_pil);
788 
789 	/*
790 	 * Must run intr_thread_epilog() on the interrupt thread stack, since
791 	 * there may not be a return from it if the interrupt thread blocked.
792 	 */
793 	apix_intr_thread_epilog(cpu, oldpil);
794 }
795 
796 static __inline__ int
797 apix_do_pending_hardint(struct cpu *cpu, struct regs *rp)
798 {
799 	volatile uint16_t pending;
800 	uint_t newipl, oldipl;
801 	caddr_t newsp;
802 
803 	while ((pending = LOWLEVEL_PENDING(cpu)) != 0) {
804 		newipl = bsrw_insn(pending);
805 		ASSERT(newipl <= LOCK_LEVEL);
806 		oldipl = cpu->cpu_pri;
807 		if (newipl <= oldipl || newipl <= cpu->cpu_base_spl)
808 			return (-1);
809 
810 		/*
811 		 * Run this interrupt in a separate thread.
812 		 */
813 		newsp = apix_intr_thread_prolog(cpu, newipl, (caddr_t)rp);
814 		ASSERT(newsp != NULL);
815 		switch_sp_and_call(newsp, apix_dispatch_pending_hardint,
816 		    oldipl, 0);
817 	}
818 
819 	return (0);
820 }
821 
822 /*
823  * Unmask level triggered interrupts
824  */
825 static void
826 apix_post_hardint(int vector)
827 {
828 	apix_vector_t *vecp = xv_vector(psm_get_cpu_id(), vector);
829 	int irqno = vecp->v_inum;
830 
831 	ASSERT(vecp->v_type == APIX_TYPE_FIXED && apic_level_intr[irqno]);
832 
833 	apix_level_intr_post_dispatch(irqno);
834 }
835 
836 static void
837 apix_dispatch_by_vector(uint_t vector)
838 {
839 	struct cpu *cpu = CPU;
840 	apix_vector_t *vecp = xv_vector(cpu->cpu_id, vector);
841 	struct autovec *avp;
842 	uint_t r, (*intr)();
843 	caddr_t arg1, arg2;
844 	dev_info_t *dip;
845 
846 	if (vecp == NULL ||
847 	    (avp = vecp->v_autovect) == NULL || avp->av_vector == NULL)
848 		return;
849 
850 	avp->av_flags |= AV_PENTRY_ONPROC;
851 	intr = avp->av_vector;
852 	arg1 = avp->av_intarg1;
853 	arg2 = avp->av_intarg2;
854 	dip = avp->av_dip;
855 
856 	if (avp->av_prilevel != XC_HI_PIL)
857 		sti();
858 
859 	DTRACE_PROBE4(interrupt__start, dev_info_t *, dip,
860 	    void *, intr, caddr_t, arg1, caddr_t, arg2);
861 	r = (*intr)(arg1, arg2);
862 	DTRACE_PROBE4(interrupt__complete, dev_info_t *, dip,
863 	    void *, intr, caddr_t, arg1, uint_t, r);
864 
865 	cli();
866 	avp->av_flags &= ~AV_PENTRY_ONPROC;
867 }
868 
869 
870 static void
871 apix_dispatch_hilevel(uint_t vector, uint_t arg2)
872 {
873 	UNREFERENCED_1PARAMETER(arg2);
874 
875 	apix_dispatch_by_vector(vector);
876 }
877 
878 static void
879 apix_dispatch_lowlevel(uint_t vector, uint_t oldipl)
880 {
881 	struct cpu *cpu = CPU;
882 
883 	apix_dispatch_by_vector(vector);
884 
885 	/*
886 	 * Must run intr_thread_epilog() on the interrupt thread stack, since
887 	 * there may not be a return from it if the interrupt thread blocked.
888 	 */
889 	apix_intr_thread_epilog(cpu, oldipl);
890 }
891 
892 /*
893  * Interrupt service routine, called with interrupts disabled.
894  */
895 void
896 apix_do_interrupt(struct regs *rp, trap_trace_rec_t *ttp)
897 {
898 	struct cpu *cpu = CPU;
899 	int vector = rp->r_trapno, newipl, oldipl = cpu->cpu_pri, ret;
900 	apix_vector_t *vecp = NULL;
901 
902 #ifdef TRAPTRACE
903 	ttp->ttr_marker = TT_INTERRUPT;
904 	ttp->ttr_cpuid = cpu->cpu_id;
905 	ttp->ttr_ipl = 0xff;
906 	ttp->ttr_pri = (uchar_t)oldipl;
907 	ttp->ttr_spl = cpu->cpu_base_spl;
908 	ttp->ttr_vector = 0xff;
909 #endif	/* TRAPTRACE */
910 
911 	cpu_idle_exit(CPU_IDLE_CB_FLAG_INTR);
912 
913 	++*(uint16_t *)&cpu->cpu_m.mcpu_istamp;
914 
915 	/*
916 	 * If it's a softint go do it now.
917 	 */
918 	if (rp->r_trapno == T_SOFTINT) {
919 		/*
920 		 * It might be the case that when an interrupt is triggered,
921 		 * the spl is raised to high by splhigh(). Later when do_splx()
922 		 * is called to restore the spl, both hardware and software
923 		 * interrupt pending flags are check and an SOFTINT is faked
924 		 * accordingly.
925 		 */
926 		(void) apix_do_pending_hilevel(cpu, rp);
927 		(void) apix_do_pending_hardint(cpu, rp);
928 		(void) apix_do_softint(rp);
929 		ASSERT(!interrupts_enabled());
930 #ifdef TRAPTRACE
931 		ttp->ttr_vector = T_SOFTINT;
932 #endif
933 		/*
934 		 * We need to check again for pending interrupts that may have
935 		 * arrived while the softint was running.
936 		 */
937 		goto do_pending;
938 	}
939 
940 	/*
941 	 * Send EOI to local APIC
942 	 */
943 	newipl = (*setlvl)(oldipl, (int *)&rp->r_trapno);
944 #ifdef TRAPTRACE
945 	ttp->ttr_ipl = (uchar_t)newipl;
946 #endif	/* TRAPTRACE */
947 
948 	/*
949 	 * Bail if it is a spurious interrupt
950 	 */
951 	if (newipl == -1)
952 		return;
953 
954 	vector = rp->r_trapno;
955 	vecp = xv_vector(cpu->cpu_id, vector);
956 #ifdef TRAPTRACE
957 	ttp->ttr_vector = (short)vector;
958 #endif	/* TRAPTRACE */
959 
960 	/*
961 	 * Direct dispatch for IPI, MSI, MSI-X
962 	 */
963 	if (vecp && vecp->v_type != APIX_TYPE_FIXED &&
964 	    newipl > MAX(oldipl, cpu->cpu_base_spl)) {
965 		caddr_t newsp;
966 
967 		if (INTR_PENDING(apixs[cpu->cpu_id], newipl)) {
968 			/*
969 			 * There are already vectors pending at newipl,
970 			 * queue this one and fall through to process
971 			 * all pending.
972 			 */
973 			apix_add_pending_hardint(vector);
974 		} else if (newipl > LOCK_LEVEL) {
975 			if (apix_hilevel_intr_prolog(cpu, newipl, oldipl, rp)
976 			    == 0) {
977 				newsp = cpu->cpu_intr_stack;
978 				switch_sp_and_call(newsp, apix_dispatch_hilevel,
979 				    vector, 0);
980 			} else {
981 				apix_dispatch_hilevel(vector, 0);
982 			}
983 			(void) apix_hilevel_intr_epilog(cpu, oldipl);
984 		} else {
985 			newsp = apix_intr_thread_prolog(cpu, newipl,
986 			    (caddr_t)rp);
987 			switch_sp_and_call(newsp, apix_dispatch_lowlevel,
988 			    vector, oldipl);
989 		}
990 	} else {
991 		/* Add to per-pil pending queue */
992 		apix_add_pending_hardint(vector);
993 		if (newipl <= MAX(oldipl, cpu->cpu_base_spl) ||
994 		    !apixs[cpu->cpu_id]->x_intr_pending)
995 			return;
996 	}
997 
998 do_pending:
999 	if (apix_do_pending_hilevel(cpu, rp) < 0)
1000 		return;
1001 
1002 	do {
1003 		ret = apix_do_pending_hardint(cpu, rp);
1004 
1005 		/*
1006 		 * Deliver any pending soft interrupts.
1007 		 */
1008 		(void) apix_do_softint(rp);
1009 	} while (!ret && LOWLEVEL_PENDING(cpu));
1010 }
1011