xref: /illumos-gate/usr/src/uts/i86pc/io/apix/apix_intr.c (revision add927f8c8d101e16c23eb9cd270be4fd7edf7d5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #include <sys/cpuvar.h>
27 #include <sys/cpu_event.h>
28 #include <sys/param.h>
29 #include <sys/cmn_err.h>
30 #include <sys/t_lock.h>
31 #include <sys/kmem.h>
32 #include <sys/machlock.h>
33 #include <sys/systm.h>
34 #include <sys/archsystm.h>
35 #include <sys/atomic.h>
36 #include <sys/sdt.h>
37 #include <sys/processor.h>
38 #include <sys/time.h>
39 #include <sys/psm.h>
40 #include <sys/smp_impldefs.h>
41 #include <sys/cram.h>
42 #include <sys/apic.h>
43 #include <sys/pit.h>
44 #include <sys/ddi.h>
45 #include <sys/sunddi.h>
46 #include <sys/ddi_impldefs.h>
47 #include <sys/pci.h>
48 #include <sys/promif.h>
49 #include <sys/x86_archext.h>
50 #include <sys/cpc_impl.h>
51 #include <sys/uadmin.h>
52 #include <sys/panic.h>
53 #include <sys/debug.h>
54 #include <sys/trap.h>
55 #include <sys/machsystm.h>
56 #include <sys/sysmacros.h>
57 #include <sys/rm_platter.h>
58 #include <sys/privregs.h>
59 #include <sys/note.h>
60 #include <sys/pci_intr_lib.h>
61 #include <sys/spl.h>
62 #include <sys/clock.h>
63 #include <sys/dditypes.h>
64 #include <sys/sunddi.h>
65 #include <sys/x_call.h>
66 #include <sys/reboot.h>
67 #include <vm/hat_i86.h>
68 #include <sys/stack.h>
69 #include <sys/apix.h>
70 
71 static void apix_post_hardint(int);
72 
73 /*
74  * Insert an vector into the tail of the interrupt pending list
75  */
76 static __inline__ void
77 apix_insert_pending_av(apix_impl_t *apixp, struct autovec *avp, int ipl)
78 {
79 	struct autovec **head = apixp->x_intr_head;
80 	struct autovec **tail = apixp->x_intr_tail;
81 
82 	avp->av_ipl_link = NULL;
83 	if (tail[ipl] == NULL) {
84 		head[ipl] = tail[ipl] = avp;
85 		return;
86 	}
87 
88 	tail[ipl]->av_ipl_link = avp;
89 	tail[ipl] = avp;
90 }
91 
92 /*
93  * Remove and return an vector from the head of hardware interrupt
94  * pending list.
95  */
96 static __inline__ struct autovec *
97 apix_remove_pending_av(apix_impl_t *apixp, int ipl)
98 {
99 	struct cpu *cpu = CPU;
100 	struct autovec **head = apixp->x_intr_head;
101 	struct autovec **tail = apixp->x_intr_tail;
102 	struct autovec *avp = head[ipl];
103 
104 	if (avp == NULL)
105 		return (NULL);
106 
107 	if (avp->av_vector != NULL && avp->av_prilevel < cpu->cpu_base_spl) {
108 		/*
109 		 * If there is blocked higher level interrupts, return
110 		 * NULL to quit handling of current IPL level.
111 		 */
112 		apixp->x_intr_pending |= (1 << avp->av_prilevel);
113 		return (NULL);
114 	}
115 
116 	avp->av_flags &= ~AV_PENTRY_PEND;
117 	avp->av_flags |= AV_PENTRY_ONPROC;
118 	head[ipl] = avp->av_ipl_link;
119 	avp->av_ipl_link = NULL;
120 
121 	if (head[ipl] == NULL)
122 		tail[ipl] = NULL;
123 
124 	return (avp);
125 }
126 
127 /*
128  * add_pending_hardint:
129  *
130  * Add hardware interrupts to the interrupt pending list.
131  */
132 static void
133 apix_add_pending_hardint(int vector)
134 {
135 	uint32_t cpuid = psm_get_cpu_id();
136 	apix_impl_t *apixp = apixs[cpuid];
137 	apix_vector_t *vecp = apixp->x_vectbl[vector];
138 	struct autovec *p, *prevp = NULL;
139 	int ipl;
140 
141 	/*
142 	 * The MSI interrupt not supporting per-vector masking could
143 	 * be triggered on a false vector as a result of rebinding
144 	 * operation cannot programme MSI address & data atomically.
145 	 * Add ISR of this interrupt to the pending list for such
146 	 * suspicious interrupt.
147 	 */
148 	APIX_DO_FAKE_INTR(cpuid, vector);
149 	if (vecp == NULL)
150 		return;
151 
152 	for (p = vecp->v_autovect; p != NULL; p = p->av_link) {
153 		if (p->av_vector == NULL)
154 			continue;	/* skip freed entry */
155 
156 		ipl = p->av_prilevel;
157 		prevp = p;
158 
159 		/* set pending at specified priority level */
160 		apixp->x_intr_pending |= (1 << ipl);
161 
162 		if (p->av_flags & AV_PENTRY_PEND)
163 			continue;	/* already in the pending list */
164 		p->av_flags |= AV_PENTRY_PEND;
165 
166 		/* insert into pending list by it original IPL */
167 		apix_insert_pending_av(apixp, p, ipl);
168 	}
169 
170 	/* last one of the linked list */
171 	if (prevp && ((prevp->av_flags & AV_PENTRY_LEVEL) != 0))
172 		prevp->av_flags |= (vector & AV_PENTRY_VECTMASK);
173 }
174 
175 /*
176  * Walk pending hardware interrupts at given priority level, invoking
177  * each interrupt handler as we go.
178  */
179 extern uint64_t intr_get_time(void);
180 
181 static void
182 apix_dispatch_pending_autovect(uint_t ipl)
183 {
184 	uint32_t cpuid = psm_get_cpu_id();
185 	apix_impl_t *apixp = apixs[cpuid];
186 	struct autovec *av;
187 
188 	while ((av = apix_remove_pending_av(apixp, ipl)) != NULL) {
189 		uint_t r;
190 		uint_t (*intr)() = av->av_vector;
191 		caddr_t arg1 = av->av_intarg1;
192 		caddr_t arg2 = av->av_intarg2;
193 		dev_info_t *dip = av->av_dip;
194 		uchar_t vector = av->av_flags & AV_PENTRY_VECTMASK;
195 
196 		if (intr == NULL)
197 			continue;
198 
199 		/* Don't enable interrupts during x-calls */
200 		if (ipl != XC_HI_PIL)
201 			sti();
202 
203 		DTRACE_PROBE4(interrupt__start, dev_info_t *, dip,
204 		    void *, intr, caddr_t, arg1, caddr_t, arg2);
205 		r = (*intr)(arg1, arg2);
206 		DTRACE_PROBE4(interrupt__complete, dev_info_t *, dip,
207 		    void *, intr, caddr_t, arg1, uint_t, r);
208 
209 		if (av->av_ticksp && av->av_prilevel <= LOCK_LEVEL)
210 			atomic_add_64(av->av_ticksp, intr_get_time());
211 
212 		cli();
213 
214 		if (vector) {
215 			if ((av->av_flags & AV_PENTRY_PEND) == 0)
216 				av->av_flags &= ~AV_PENTRY_VECTMASK;
217 
218 			apix_post_hardint(vector);
219 		}
220 
221 		/* mark it as idle */
222 		av->av_flags &= ~AV_PENTRY_ONPROC;
223 	}
224 }
225 
226 static caddr_t
227 apix_do_softint_prolog(struct cpu *cpu, uint_t pil, uint_t oldpil,
228     caddr_t stackptr)
229 {
230 	kthread_t *t, *volatile it;
231 	struct machcpu *mcpu = &cpu->cpu_m;
232 	hrtime_t now;
233 
234 	UNREFERENCED_1PARAMETER(oldpil);
235 	ASSERT(pil > mcpu->mcpu_pri && pil > cpu->cpu_base_spl);
236 
237 	atomic_and_32((uint32_t *)&mcpu->mcpu_softinfo.st_pending, ~(1 << pil));
238 
239 	mcpu->mcpu_pri = pil;
240 
241 	now = tsc_read();
242 
243 	/*
244 	 * Get set to run interrupt thread.
245 	 * There should always be an interrupt thread since we
246 	 * allocate one for each level on the CPU.
247 	 */
248 	it = cpu->cpu_intr_thread;
249 	ASSERT(it != NULL);
250 	cpu->cpu_intr_thread = it->t_link;
251 
252 	/* t_intr_start could be zero due to cpu_intr_swtch_enter. */
253 	t = cpu->cpu_thread;
254 	if ((t->t_flag & T_INTR_THREAD) && t->t_intr_start != 0) {
255 		hrtime_t intrtime = now - t->t_intr_start;
256 		mcpu->intrstat[pil][0] += intrtime;
257 		cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
258 		t->t_intr_start = 0;
259 	}
260 
261 	/*
262 	 * Note that the code in kcpc_overflow_intr -relies- on the
263 	 * ordering of events here - in particular that t->t_lwp of
264 	 * the interrupt thread is set to the pinned thread *before*
265 	 * curthread is changed.
266 	 */
267 	it->t_lwp = t->t_lwp;
268 	it->t_state = TS_ONPROC;
269 
270 	/*
271 	 * Push interrupted thread onto list from new thread.
272 	 * Set the new thread as the current one.
273 	 * Set interrupted thread's T_SP because if it is the idle thread,
274 	 * resume() may use that stack between threads.
275 	 */
276 
277 	ASSERT(SA((uintptr_t)stackptr) == (uintptr_t)stackptr);
278 	t->t_sp = (uintptr_t)stackptr;
279 
280 	it->t_intr = t;
281 	cpu->cpu_thread = it;
282 
283 	/*
284 	 * Set bit for this pil in CPU's interrupt active bitmask.
285 	 */
286 	ASSERT((cpu->cpu_intr_actv & (1 << pil)) == 0);
287 	cpu->cpu_intr_actv |= (1 << pil);
288 
289 	/*
290 	 * Initialize thread priority level from intr_pri
291 	 */
292 	it->t_pil = (uchar_t)pil;
293 	it->t_pri = (pri_t)pil + intr_pri;
294 	it->t_intr_start = now;
295 
296 	return (it->t_stk);
297 }
298 
299 static void
300 apix_do_softint_epilog(struct cpu *cpu, uint_t oldpil)
301 {
302 	struct machcpu *mcpu = &cpu->cpu_m;
303 	kthread_t *t, *it;
304 	uint_t pil, basespl;
305 	hrtime_t intrtime;
306 	hrtime_t now = tsc_read();
307 
308 	it = cpu->cpu_thread;
309 	pil = it->t_pil;
310 
311 	cpu->cpu_stats.sys.intr[pil - 1]++;
312 
313 	ASSERT(cpu->cpu_intr_actv & (1 << pil));
314 	cpu->cpu_intr_actv &= ~(1 << pil);
315 
316 	intrtime = now - it->t_intr_start;
317 	mcpu->intrstat[pil][0] += intrtime;
318 	cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
319 
320 	/*
321 	 * If there is still an interrupted thread underneath this one
322 	 * then the interrupt was never blocked and the return is
323 	 * fairly simple.  Otherwise it isn't.
324 	 */
325 	if ((t = it->t_intr) == NULL) {
326 		/*
327 		 * Put thread back on the interrupt thread list.
328 		 * This was an interrupt thread, so set CPU's base SPL.
329 		 */
330 		set_base_spl();
331 		/* mcpu->mcpu_pri = cpu->cpu_base_spl; */
332 
333 		it->t_state = TS_FREE;
334 		it->t_link = cpu->cpu_intr_thread;
335 		cpu->cpu_intr_thread = it;
336 		(void) splhigh();
337 		sti();
338 		swtch();
339 		/*NOTREACHED*/
340 		panic("dosoftint_epilog: swtch returned");
341 	}
342 	it->t_link = cpu->cpu_intr_thread;
343 	cpu->cpu_intr_thread = it;
344 	it->t_state = TS_FREE;
345 	cpu->cpu_thread = t;
346 	if (t->t_flag & T_INTR_THREAD)
347 		t->t_intr_start = now;
348 	basespl = cpu->cpu_base_spl;
349 	pil = MAX(oldpil, basespl);
350 	mcpu->mcpu_pri = pil;
351 }
352 
353 /*
354  * Dispatch a soft interrupt
355  */
356 static void
357 apix_dispatch_softint(uint_t oldpil, uint_t arg2)
358 {
359 	struct cpu *cpu = CPU;
360 
361 	UNREFERENCED_1PARAMETER(arg2);
362 
363 	sti();
364 	av_dispatch_softvect((int)cpu->cpu_thread->t_pil);
365 	cli();
366 
367 	/*
368 	 * Must run softint_epilog() on the interrupt thread stack, since
369 	 * there may not be a return from it if the interrupt thread blocked.
370 	 */
371 	apix_do_softint_epilog(cpu, oldpil);
372 }
373 
374 /*
375  * Deliver any softints the current interrupt priority allows.
376  * Called with interrupts disabled.
377  */
378 int
379 apix_do_softint(struct regs *regs)
380 {
381 	struct cpu *cpu = CPU;
382 	int oldipl;
383 	int newipl;
384 	volatile uint16_t pending;
385 	caddr_t newsp;
386 
387 	while ((pending = cpu->cpu_softinfo.st_pending) != 0) {
388 		newipl = bsrw_insn(pending);
389 		oldipl = cpu->cpu_pri;
390 		if (newipl <= oldipl || newipl <= cpu->cpu_base_spl)
391 			return (-1);
392 
393 		newsp = apix_do_softint_prolog(cpu, newipl, oldipl,
394 		    (caddr_t)regs);
395 		ASSERT(newsp != NULL);
396 		switch_sp_and_call(newsp, apix_dispatch_softint, oldipl, 0);
397 	}
398 
399 	return (0);
400 }
401 
402 static int
403 apix_hilevel_intr_prolog(struct cpu *cpu, uint_t pil, uint_t oldpil,
404     struct regs *rp)
405 {
406 	struct machcpu *mcpu = &cpu->cpu_m;
407 	hrtime_t intrtime;
408 	hrtime_t now = tsc_read();
409 	apix_impl_t *apixp = apixs[cpu->cpu_id];
410 	uint_t mask;
411 
412 	ASSERT(pil > mcpu->mcpu_pri && pil > cpu->cpu_base_spl);
413 
414 	if (pil == CBE_HIGH_PIL) {	/* 14 */
415 		cpu->cpu_profile_pil = oldpil;
416 		if (USERMODE(rp->r_cs)) {
417 			cpu->cpu_profile_pc = 0;
418 			cpu->cpu_profile_upc = rp->r_pc;
419 			cpu->cpu_cpcprofile_pc = 0;
420 			cpu->cpu_cpcprofile_upc = rp->r_pc;
421 		} else {
422 			cpu->cpu_profile_pc = rp->r_pc;
423 			cpu->cpu_profile_upc = 0;
424 			cpu->cpu_cpcprofile_pc = rp->r_pc;
425 			cpu->cpu_cpcprofile_upc = 0;
426 		}
427 	}
428 
429 	mcpu->mcpu_pri = pil;
430 
431 	mask = cpu->cpu_intr_actv & CPU_INTR_ACTV_HIGH_LEVEL_MASK;
432 	if (mask != 0) {
433 		int nestpil;
434 
435 		/*
436 		 * We have interrupted another high-level interrupt.
437 		 * Load starting timestamp, compute interval, update
438 		 * cumulative counter.
439 		 */
440 		nestpil = bsrw_insn((uint16_t)mask);
441 		intrtime = now -
442 		    mcpu->pil_high_start[nestpil - (LOCK_LEVEL + 1)];
443 		mcpu->intrstat[nestpil][0] += intrtime;
444 		cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
445 	} else {
446 		kthread_t *t = cpu->cpu_thread;
447 
448 		/*
449 		 * See if we are interrupting a low-level interrupt thread.
450 		 * If so, account for its time slice only if its time stamp
451 		 * is non-zero.
452 		 */
453 		if ((t->t_flag & T_INTR_THREAD) != 0 && t->t_intr_start != 0) {
454 			intrtime = now - t->t_intr_start;
455 			mcpu->intrstat[t->t_pil][0] += intrtime;
456 			cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
457 			t->t_intr_start = 0;
458 		}
459 	}
460 
461 	/* store starting timestamp in CPu structure for this IPL */
462 	mcpu->pil_high_start[pil - (LOCK_LEVEL + 1)] = now;
463 
464 	if (pil == 15) {
465 		/*
466 		 * To support reentrant level 15 interrupts, we maintain a
467 		 * recursion count in the top half of cpu_intr_actv.  Only
468 		 * when this count hits zero do we clear the PIL 15 bit from
469 		 * the lower half of cpu_intr_actv.
470 		 */
471 		uint16_t *refcntp = (uint16_t *)&cpu->cpu_intr_actv + 1;
472 		(*refcntp)++;
473 	}
474 
475 	cpu->cpu_intr_actv |= (1 << pil);
476 	/* clear pending ipl level bit */
477 	apixp->x_intr_pending &= ~(1 << pil);
478 
479 	return (mask);
480 }
481 
482 static int
483 apix_hilevel_intr_epilog(struct cpu *cpu, uint_t oldpil)
484 {
485 	struct machcpu *mcpu = &cpu->cpu_m;
486 	uint_t mask, pil;
487 	hrtime_t intrtime;
488 	hrtime_t now = tsc_read();
489 
490 	pil = mcpu->mcpu_pri;
491 	cpu->cpu_stats.sys.intr[pil - 1]++;
492 
493 	ASSERT(cpu->cpu_intr_actv & (1 << pil));
494 
495 	if (pil == 15) {
496 		/*
497 		 * To support reentrant level 15 interrupts, we maintain a
498 		 * recursion count in the top half of cpu_intr_actv.  Only
499 		 * when this count hits zero do we clear the PIL 15 bit from
500 		 * the lower half of cpu_intr_actv.
501 		 */
502 		uint16_t *refcntp = (uint16_t *)&cpu->cpu_intr_actv + 1;
503 
504 		ASSERT(*refcntp > 0);
505 
506 		if (--(*refcntp) == 0)
507 			cpu->cpu_intr_actv &= ~(1 << pil);
508 	} else {
509 		cpu->cpu_intr_actv &= ~(1 << pil);
510 	}
511 
512 	ASSERT(mcpu->pil_high_start[pil - (LOCK_LEVEL + 1)] != 0);
513 
514 	intrtime = now - mcpu->pil_high_start[pil - (LOCK_LEVEL + 1)];
515 	mcpu->intrstat[pil][0] += intrtime;
516 	cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
517 
518 	/*
519 	 * Check for lower-pil nested high-level interrupt beneath
520 	 * current one.  If so, place a starting timestamp in its
521 	 * pil_high_start entry.
522 	 */
523 	mask = cpu->cpu_intr_actv & CPU_INTR_ACTV_HIGH_LEVEL_MASK;
524 	if (mask != 0) {
525 		int nestpil;
526 
527 		/*
528 		 * find PIL of nested interrupt
529 		 */
530 		nestpil = bsrw_insn((uint16_t)mask);
531 		ASSERT(nestpil < pil);
532 		mcpu->pil_high_start[nestpil - (LOCK_LEVEL + 1)] = now;
533 		/*
534 		 * (Another high-level interrupt is active below this one,
535 		 * so there is no need to check for an interrupt
536 		 * thread.  That will be done by the lowest priority
537 		 * high-level interrupt active.)
538 		 */
539 	} else {
540 		/*
541 		 * Check to see if there is a low-level interrupt active.
542 		 * If so, place a starting timestamp in the thread
543 		 * structure.
544 		 */
545 		kthread_t *t = cpu->cpu_thread;
546 
547 		if (t->t_flag & T_INTR_THREAD)
548 			t->t_intr_start = now;
549 	}
550 
551 	mcpu->mcpu_pri = oldpil;
552 	if (pil < CBE_HIGH_PIL)
553 		(void) (*setlvlx)(oldpil, 0);
554 
555 	return (mask);
556 }
557 
558 /*
559  * Dispatch a hilevel interrupt (one above LOCK_LEVEL)
560  */
561 static void
562 apix_dispatch_pending_hilevel(uint_t ipl, uint_t arg2)
563 {
564 	UNREFERENCED_1PARAMETER(arg2);
565 
566 	apix_dispatch_pending_autovect(ipl);
567 }
568 
569 static __inline__ int
570 apix_do_pending_hilevel(struct cpu *cpu, struct regs *rp)
571 {
572 	volatile uint16_t pending;
573 	uint_t newipl, oldipl;
574 	caddr_t newsp;
575 
576 	while ((pending = HILEVEL_PENDING(cpu)) != 0) {
577 		newipl = bsrw_insn(pending);
578 		ASSERT(newipl > LOCK_LEVEL && newipl > cpu->cpu_base_spl);
579 		oldipl = cpu->cpu_pri;
580 		if (newipl <= oldipl)
581 			return (-1);
582 
583 		/*
584 		 * High priority interrupts run on this cpu's interrupt stack.
585 		 */
586 		if (apix_hilevel_intr_prolog(cpu, newipl, oldipl, rp) == 0) {
587 			newsp = cpu->cpu_intr_stack;
588 			switch_sp_and_call(newsp, apix_dispatch_pending_hilevel,
589 			    newipl, 0);
590 		} else {	/* already on the interrupt stack */
591 			apix_dispatch_pending_hilevel(newipl, 0);
592 		}
593 		(void) apix_hilevel_intr_epilog(cpu, oldipl);
594 	}
595 
596 	return (0);
597 }
598 
599 /*
600  * Get an interrupt thread and swith to it. It's called from do_interrupt().
601  * The IF flag is cleared and thus all maskable interrupts are blocked at
602  * the time of calling.
603  */
604 static caddr_t
605 apix_intr_thread_prolog(struct cpu *cpu, uint_t pil, caddr_t stackptr)
606 {
607 	apix_impl_t *apixp = apixs[cpu->cpu_id];
608 	struct machcpu *mcpu = &cpu->cpu_m;
609 	hrtime_t now = tsc_read();
610 	kthread_t *t, *volatile it;
611 
612 	ASSERT(pil > mcpu->mcpu_pri && pil > cpu->cpu_base_spl);
613 
614 	apixp->x_intr_pending &= ~(1 << pil);
615 	ASSERT((cpu->cpu_intr_actv & (1 << pil)) == 0);
616 	cpu->cpu_intr_actv |= (1 << pil);
617 	mcpu->mcpu_pri = pil;
618 
619 	/*
620 	 * Get set to run interrupt thread.
621 	 * There should always be an interrupt thread since we
622 	 * allocate one for each level on the CPU.
623 	 */
624 	/* t_intr_start could be zero due to cpu_intr_swtch_enter. */
625 	t = cpu->cpu_thread;
626 	if ((t->t_flag & T_INTR_THREAD) && t->t_intr_start != 0) {
627 		hrtime_t intrtime = now - t->t_intr_start;
628 		mcpu->intrstat[pil][0] += intrtime;
629 		cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
630 		t->t_intr_start = 0;
631 	}
632 
633 	/*
634 	 * Push interrupted thread onto list from new thread.
635 	 * Set the new thread as the current one.
636 	 * Set interrupted thread's T_SP because if it is the idle thread,
637 	 * resume() may use that stack between threads.
638 	 */
639 
640 	ASSERT(SA((uintptr_t)stackptr) == (uintptr_t)stackptr);
641 
642 	t->t_sp = (uintptr_t)stackptr;	/* mark stack in curthread for resume */
643 
644 	/*
645 	 * Note that the code in kcpc_overflow_intr -relies- on the
646 	 * ordering of events here - in particular that t->t_lwp of
647 	 * the interrupt thread is set to the pinned thread *before*
648 	 * curthread is changed.
649 	 */
650 	it = cpu->cpu_intr_thread;
651 	cpu->cpu_intr_thread = it->t_link;
652 	it->t_intr = t;
653 	it->t_lwp = t->t_lwp;
654 
655 	/*
656 	 * (threads on the interrupt thread free list could have state
657 	 * preset to TS_ONPROC, but it helps in debugging if
658 	 * they're TS_FREE.)
659 	 */
660 	it->t_state = TS_ONPROC;
661 
662 	cpu->cpu_thread = it;
663 
664 	/*
665 	 * Initialize thread priority level from intr_pri
666 	 */
667 	it->t_pil = (uchar_t)pil;
668 	it->t_pri = (pri_t)pil + intr_pri;
669 	it->t_intr_start = now;
670 
671 	return (it->t_stk);
672 }
673 
674 static void
675 apix_intr_thread_epilog(struct cpu *cpu, uint_t oldpil)
676 {
677 	struct machcpu *mcpu = &cpu->cpu_m;
678 	kthread_t *t, *it = cpu->cpu_thread;
679 	uint_t pil, basespl;
680 	hrtime_t intrtime;
681 	hrtime_t now = tsc_read();
682 
683 	pil = it->t_pil;
684 	cpu->cpu_stats.sys.intr[pil - 1]++;
685 
686 	ASSERT(cpu->cpu_intr_actv & (1 << pil));
687 	cpu->cpu_intr_actv &= ~(1 << pil);
688 
689 	ASSERT(it->t_intr_start != 0);
690 	intrtime = now - it->t_intr_start;
691 	mcpu->intrstat[pil][0] += intrtime;
692 	cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
693 
694 	/*
695 	 * If there is still an interrupted thread underneath this one
696 	 * then the interrupt was never blocked and the return is
697 	 * fairly simple.  Otherwise it isn't.
698 	 */
699 	if ((t = it->t_intr) == NULL) {
700 		/*
701 		 * The interrupted thread is no longer pinned underneath
702 		 * the interrupt thread.  This means the interrupt must
703 		 * have blocked, and the interrupted thread has been
704 		 * unpinned, and has probably been running around the
705 		 * system for a while.
706 		 *
707 		 * Since there is no longer a thread under this one, put
708 		 * this interrupt thread back on the CPU's free list and
709 		 * resume the idle thread which will dispatch the next
710 		 * thread to run.
711 		 */
712 		cpu->cpu_stats.sys.intrblk++;
713 
714 		/*
715 		 * Put thread back on the interrupt thread list.
716 		 * This was an interrupt thread, so set CPU's base SPL.
717 		 */
718 		set_base_spl();
719 		basespl = cpu->cpu_base_spl;
720 		mcpu->mcpu_pri = basespl;
721 		(*setlvlx)(basespl, 0);
722 
723 		it->t_state = TS_FREE;
724 		/*
725 		 * Return interrupt thread to pool
726 		 */
727 		it->t_link = cpu->cpu_intr_thread;
728 		cpu->cpu_intr_thread = it;
729 
730 		(void) splhigh();
731 		sti();
732 		swtch();
733 		/*NOTREACHED*/
734 		panic("dosoftint_epilog: swtch returned");
735 	}
736 
737 	/*
738 	 * Return interrupt thread to the pool
739 	 */
740 	it->t_link = cpu->cpu_intr_thread;
741 	cpu->cpu_intr_thread = it;
742 	it->t_state = TS_FREE;
743 
744 	cpu->cpu_thread = t;
745 	if (t->t_flag & T_INTR_THREAD)
746 		t->t_intr_start = now;
747 	basespl = cpu->cpu_base_spl;
748 	mcpu->mcpu_pri = MAX(oldpil, basespl);
749 	(*setlvlx)(mcpu->mcpu_pri, 0);
750 }
751 
752 
753 static void
754 apix_dispatch_pending_hardint(uint_t oldpil, uint_t arg2)
755 {
756 	struct cpu *cpu = CPU;
757 
758 	UNREFERENCED_1PARAMETER(arg2);
759 
760 	apix_dispatch_pending_autovect((int)cpu->cpu_thread->t_pil);
761 
762 	/*
763 	 * Must run intr_thread_epilog() on the interrupt thread stack, since
764 	 * there may not be a return from it if the interrupt thread blocked.
765 	 */
766 	apix_intr_thread_epilog(cpu, oldpil);
767 }
768 
769 static __inline__ int
770 apix_do_pending_hardint(struct cpu *cpu, struct regs *rp)
771 {
772 	volatile uint16_t pending;
773 	uint_t newipl, oldipl;
774 	caddr_t newsp;
775 
776 	while ((pending = LOWLEVEL_PENDING(cpu)) != 0) {
777 		newipl = bsrw_insn(pending);
778 		ASSERT(newipl <= LOCK_LEVEL);
779 		oldipl = cpu->cpu_pri;
780 		if (newipl <= oldipl || newipl <= cpu->cpu_base_spl)
781 			return (-1);
782 
783 		/*
784 		 * Run this interrupt in a separate thread.
785 		 */
786 		newsp = apix_intr_thread_prolog(cpu, newipl, (caddr_t)rp);
787 		ASSERT(newsp != NULL);
788 		switch_sp_and_call(newsp, apix_dispatch_pending_hardint,
789 		    oldipl, 0);
790 	}
791 
792 	return (0);
793 }
794 
795 /*
796  * Unmask level triggered interrupts
797  */
798 static void
799 apix_post_hardint(int vector)
800 {
801 	apix_vector_t *vecp = xv_vector(psm_get_cpu_id(), vector);
802 	int irqno = vecp->v_inum;
803 
804 	ASSERT(vecp->v_type == APIX_TYPE_FIXED && apic_level_intr[irqno]);
805 
806 	apix_level_intr_post_dispatch(irqno);
807 }
808 
809 static void
810 apix_dispatch_by_vector(uint_t vector)
811 {
812 	struct cpu *cpu = CPU;
813 	apix_vector_t *vecp = xv_vector(cpu->cpu_id, vector);
814 	struct autovec *avp;
815 	uint_t r, (*intr)();
816 	caddr_t arg1, arg2;
817 	dev_info_t *dip;
818 
819 	if (vecp == NULL ||
820 	    (avp = vecp->v_autovect) == NULL || avp->av_vector == NULL)
821 		return;
822 
823 	avp->av_flags |= AV_PENTRY_ONPROC;
824 	intr = avp->av_vector;
825 	arg1 = avp->av_intarg1;
826 	arg2 = avp->av_intarg2;
827 	dip = avp->av_dip;
828 
829 	if (avp->av_prilevel != XC_HI_PIL)
830 		sti();
831 
832 	DTRACE_PROBE4(interrupt__start, dev_info_t *, dip,
833 	    void *, intr, caddr_t, arg1, caddr_t, arg2);
834 	r = (*intr)(arg1, arg2);
835 	DTRACE_PROBE4(interrupt__complete, dev_info_t *, dip,
836 	    void *, intr, caddr_t, arg1, uint_t, r);
837 
838 	cli();
839 	avp->av_flags &= ~AV_PENTRY_ONPROC;
840 }
841 
842 
843 static void
844 apix_dispatch_hilevel(uint_t vector, uint_t arg2)
845 {
846 	UNREFERENCED_1PARAMETER(arg2);
847 
848 	apix_dispatch_by_vector(vector);
849 }
850 
851 static void
852 apix_dispatch_lowlevel(uint_t vector, uint_t oldipl)
853 {
854 	struct cpu *cpu = CPU;
855 
856 	apix_dispatch_by_vector(vector);
857 
858 	/*
859 	 * Must run intr_thread_epilog() on the interrupt thread stack, since
860 	 * there may not be a return from it if the interrupt thread blocked.
861 	 */
862 	apix_intr_thread_epilog(cpu, oldipl);
863 }
864 
865 /*
866  * Interrupt service routine, called with interrupts disabled.
867  */
868 void
869 apix_do_interrupt(struct regs *rp, trap_trace_rec_t *ttp)
870 {
871 	struct cpu *cpu = CPU;
872 	int vector = rp->r_trapno, newipl, oldipl = cpu->cpu_pri, ret;
873 	apix_vector_t *vecp = NULL;
874 
875 #ifdef TRAPTRACE
876 	ttp->ttr_marker = TT_INTERRUPT;
877 	ttp->ttr_cpuid = cpu->cpu_id;
878 	ttp->ttr_ipl = 0xff;
879 	ttp->ttr_pri = (uchar_t)oldipl;
880 	ttp->ttr_spl = cpu->cpu_base_spl;
881 	ttp->ttr_vector = 0xff;
882 #endif	/* TRAPTRACE */
883 
884 	cpu_idle_exit(CPU_IDLE_CB_FLAG_INTR);
885 
886 	++*(uint16_t *)&cpu->cpu_m.mcpu_istamp;
887 
888 	/*
889 	 * If it's a softint go do it now.
890 	 */
891 	if (rp->r_trapno == T_SOFTINT) {
892 		/*
893 		 * It might be the case that when an interrupt is triggered,
894 		 * the spl is raised to high by splhigh(). Later when do_splx()
895 		 * is called to restore the spl, both hardware and software
896 		 * interrupt pending flags are check and an SOFTINT is faked
897 		 * accordingly.
898 		 */
899 		(void) apix_do_pending_hilevel(cpu, rp);
900 		(void) apix_do_pending_hardint(cpu, rp);
901 		(void) apix_do_softint(rp);
902 		ASSERT(!interrupts_enabled());
903 #ifdef TRAPTRACE
904 		ttp->ttr_vector = T_SOFTINT;
905 #endif
906 		/*
907 		 * We need to check again for pending interrupts that may have
908 		 * arrived while the softint was running.
909 		 */
910 		goto do_pending;
911 	}
912 
913 	/*
914 	 * Send EOI to local APIC
915 	 */
916 	newipl = (*setlvl)(oldipl, (int *)&rp->r_trapno);
917 #ifdef TRAPTRACE
918 	ttp->ttr_ipl = (uchar_t)newipl;
919 #endif	/* TRAPTRACE */
920 
921 	/*
922 	 * Bail if it is a spurious interrupt
923 	 */
924 	if (newipl == -1)
925 		return;
926 
927 	vector = rp->r_trapno;
928 	vecp = xv_vector(cpu->cpu_id, vector);
929 #ifdef TRAPTRACE
930 	ttp->ttr_vector = (short)vector;
931 #endif	/* TRAPTRACE */
932 
933 	/*
934 	 * Direct dispatch for IPI, MSI, MSI-X
935 	 */
936 	if (vecp && vecp->v_type != APIX_TYPE_FIXED &&
937 	    newipl > MAX(oldipl, cpu->cpu_base_spl)) {
938 		caddr_t newsp;
939 
940 		if (newipl > LOCK_LEVEL) {
941 			if (apix_hilevel_intr_prolog(cpu, newipl, oldipl, rp)
942 			    == 0) {
943 				newsp = cpu->cpu_intr_stack;
944 				switch_sp_and_call(newsp, apix_dispatch_hilevel,
945 				    vector, 0);
946 			} else {
947 				apix_dispatch_hilevel(vector, 0);
948 			}
949 			(void) apix_hilevel_intr_epilog(cpu, oldipl);
950 		} else {
951 			newsp = apix_intr_thread_prolog(cpu, newipl,
952 			    (caddr_t)rp);
953 			switch_sp_and_call(newsp, apix_dispatch_lowlevel,
954 			    vector, oldipl);
955 		}
956 	} else {
957 		/* Add to per-pil pending queue */
958 		apix_add_pending_hardint(vector);
959 		if (newipl <= MAX(oldipl, cpu->cpu_base_spl) ||
960 		    !apixs[cpu->cpu_id]->x_intr_pending)
961 			return;
962 	}
963 
964 do_pending:
965 	if (apix_do_pending_hilevel(cpu, rp) < 0)
966 		return;
967 
968 	do {
969 		ret = apix_do_pending_hardint(cpu, rp);
970 
971 		/*
972 		 * Deliver any pending soft interrupts.
973 		 */
974 		(void) apix_do_softint(rp);
975 	} while (!ret && LOWLEVEL_PENDING(cpu));
976 }
977