xref: /titanic_52/usr/src/uts/i86pc/io/apix/apix_intr.c (revision 4a5d661a82b942b6538acd26209d959ce98b593a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2018 Western Digital Corporation.  All rights reserved.
25  */
26 
27 #include <sys/cpuvar.h>
28 #include <sys/cpu_event.h>
29 #include <sys/param.h>
30 #include <sys/cmn_err.h>
31 #include <sys/t_lock.h>
32 #include <sys/kmem.h>
33 #include <sys/machlock.h>
34 #include <sys/systm.h>
35 #include <sys/archsystm.h>
36 #include <sys/atomic.h>
37 #include <sys/sdt.h>
38 #include <sys/processor.h>
39 #include <sys/time.h>
40 #include <sys/psm.h>
41 #include <sys/smp_impldefs.h>
42 #include <sys/cram.h>
43 #include <sys/apic.h>
44 #include <sys/pit.h>
45 #include <sys/ddi.h>
46 #include <sys/sunddi.h>
47 #include <sys/ddi_impldefs.h>
48 #include <sys/pci.h>
49 #include <sys/promif.h>
50 #include <sys/x86_archext.h>
51 #include <sys/cpc_impl.h>
52 #include <sys/uadmin.h>
53 #include <sys/panic.h>
54 #include <sys/debug.h>
55 #include <sys/trap.h>
56 #include <sys/machsystm.h>
57 #include <sys/sysmacros.h>
58 #include <sys/rm_platter.h>
59 #include <sys/privregs.h>
60 #include <sys/note.h>
61 #include <sys/pci_intr_lib.h>
62 #include <sys/spl.h>
63 #include <sys/clock.h>
64 #include <sys/dditypes.h>
65 #include <sys/sunddi.h>
66 #include <sys/x_call.h>
67 #include <sys/reboot.h>
68 #include <vm/hat_i86.h>
69 #include <sys/stack.h>
70 #include <sys/apix.h>
71 
72 static void apix_post_hardint(int);
73 
74 /*
75  * Insert an vector into the tail of the interrupt pending list
76  */
77 static __inline__ void
78 apix_insert_pending_av(apix_impl_t *apixp, struct autovec *avp, int ipl)
79 {
80 	struct autovec **head = apixp->x_intr_head;
81 	struct autovec **tail = apixp->x_intr_tail;
82 
83 	avp->av_ipl_link = NULL;
84 	if (tail[ipl] == NULL) {
85 		head[ipl] = tail[ipl] = avp;
86 		return;
87 	}
88 
89 	tail[ipl]->av_ipl_link = avp;
90 	tail[ipl] = avp;
91 }
92 
93 /*
94  * Remove and return an vector from the head of hardware interrupt
95  * pending list.
96  */
97 static __inline__ struct autovec *
98 apix_remove_pending_av(apix_impl_t *apixp, int ipl)
99 {
100 	struct cpu *cpu = CPU;
101 	struct autovec **head = apixp->x_intr_head;
102 	struct autovec **tail = apixp->x_intr_tail;
103 	struct autovec *avp = head[ipl];
104 
105 	if (avp == NULL)
106 		return (NULL);
107 
108 	if (avp->av_vector != NULL && avp->av_prilevel < cpu->cpu_base_spl) {
109 		/*
110 		 * If there is blocked higher level interrupts, return
111 		 * NULL to quit handling of current IPL level.
112 		 */
113 		apixp->x_intr_pending |= (1 << avp->av_prilevel);
114 		return (NULL);
115 	}
116 
117 	avp->av_flags &= ~AV_PENTRY_PEND;
118 	avp->av_flags |= AV_PENTRY_ONPROC;
119 	head[ipl] = avp->av_ipl_link;
120 	avp->av_ipl_link = NULL;
121 
122 	if (head[ipl] == NULL)
123 		tail[ipl] = NULL;
124 
125 	return (avp);
126 }
127 
128 /*
129  * add_pending_hardint:
130  *
131  * Add hardware interrupts to the interrupt pending list.
132  */
133 static void
134 apix_add_pending_hardint(int vector)
135 {
136 	uint32_t cpuid = psm_get_cpu_id();
137 	apix_impl_t *apixp = apixs[cpuid];
138 	apix_vector_t *vecp = apixp->x_vectbl[vector];
139 	struct autovec *p, *prevp = NULL;
140 	int ipl;
141 
142 	/*
143 	 * The MSI interrupt not supporting per-vector masking could
144 	 * be triggered on a false vector as a result of rebinding
145 	 * operation cannot programme MSI address & data atomically.
146 	 * Add ISR of this interrupt to the pending list for such
147 	 * suspicious interrupt.
148 	 */
149 	APIX_DO_FAKE_INTR(cpuid, vector);
150 	if (vecp == NULL)
151 		return;
152 
153 	for (p = vecp->v_autovect; p != NULL; p = p->av_link) {
154 		if (p->av_vector == NULL)
155 			continue;	/* skip freed entry */
156 
157 		ipl = p->av_prilevel;
158 		prevp = p;
159 
160 		/* set pending at specified priority level */
161 		apixp->x_intr_pending |= (1 << ipl);
162 
163 		if (p->av_flags & AV_PENTRY_PEND)
164 			continue;	/* already in the pending list */
165 		p->av_flags |= AV_PENTRY_PEND;
166 
167 		/* insert into pending list by it original IPL */
168 		apix_insert_pending_av(apixp, p, ipl);
169 	}
170 
171 	/* last one of the linked list */
172 	if (prevp && ((prevp->av_flags & AV_PENTRY_LEVEL) != 0))
173 		prevp->av_flags |= (vector & AV_PENTRY_VECTMASK);
174 }
175 
176 /*
177  * Walk pending hardware interrupts at given priority level, invoking
178  * each interrupt handler as we go.
179  */
180 extern uint64_t intr_get_time(void);
181 
182 static void
183 apix_dispatch_pending_autovect(uint_t ipl)
184 {
185 	uint32_t cpuid = psm_get_cpu_id();
186 	apix_impl_t *apixp = apixs[cpuid];
187 	struct autovec *av;
188 
189 	while ((av = apix_remove_pending_av(apixp, ipl)) != NULL) {
190 		uint_t r;
191 		uint_t (*intr)() = av->av_vector;
192 		caddr_t arg1 = av->av_intarg1;
193 		caddr_t arg2 = av->av_intarg2;
194 		dev_info_t *dip = av->av_dip;
195 		uchar_t vector = av->av_flags & AV_PENTRY_VECTMASK;
196 
197 		if (intr == NULL)
198 			continue;
199 
200 		/* Don't enable interrupts during x-calls */
201 		if (ipl != XC_HI_PIL)
202 			sti();
203 
204 		DTRACE_PROBE4(interrupt__start, dev_info_t *, dip,
205 		    void *, intr, caddr_t, arg1, caddr_t, arg2);
206 		r = (*intr)(arg1, arg2);
207 		DTRACE_PROBE4(interrupt__complete, dev_info_t *, dip,
208 		    void *, intr, caddr_t, arg1, uint_t, r);
209 
210 		if (av->av_ticksp && av->av_prilevel <= LOCK_LEVEL)
211 			atomic_add_64(av->av_ticksp, intr_get_time());
212 
213 		cli();
214 
215 		if (vector) {
216 			if ((av->av_flags & AV_PENTRY_PEND) == 0)
217 				av->av_flags &= ~AV_PENTRY_VECTMASK;
218 
219 			apix_post_hardint(vector);
220 		}
221 
222 		/* mark it as idle */
223 		av->av_flags &= ~AV_PENTRY_ONPROC;
224 	}
225 }
226 
227 static caddr_t
228 apix_do_softint_prolog(struct cpu *cpu, uint_t pil, uint_t oldpil,
229     caddr_t stackptr)
230 {
231 	kthread_t *t, *volatile it;
232 	struct machcpu *mcpu = &cpu->cpu_m;
233 	hrtime_t now;
234 
235 	UNREFERENCED_1PARAMETER(oldpil);
236 	ASSERT(pil > mcpu->mcpu_pri && pil > cpu->cpu_base_spl);
237 
238 	atomic_and_32((uint32_t *)&mcpu->mcpu_softinfo.st_pending, ~(1 << pil));
239 
240 	mcpu->mcpu_pri = pil;
241 
242 	now = tsc_read();
243 
244 	/*
245 	 * Get set to run interrupt thread.
246 	 * There should always be an interrupt thread since we
247 	 * allocate one for each level on the CPU.
248 	 */
249 	it = cpu->cpu_intr_thread;
250 	ASSERT(it != NULL);
251 	cpu->cpu_intr_thread = it->t_link;
252 
253 	/* t_intr_start could be zero due to cpu_intr_swtch_enter. */
254 	t = cpu->cpu_thread;
255 	if ((t->t_flag & T_INTR_THREAD) && t->t_intr_start != 0) {
256 		hrtime_t intrtime = now - t->t_intr_start;
257 		mcpu->intrstat[pil][0] += intrtime;
258 		cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
259 		t->t_intr_start = 0;
260 	}
261 
262 	/*
263 	 * Note that the code in kcpc_overflow_intr -relies- on the
264 	 * ordering of events here - in particular that t->t_lwp of
265 	 * the interrupt thread is set to the pinned thread *before*
266 	 * curthread is changed.
267 	 */
268 	it->t_lwp = t->t_lwp;
269 	it->t_state = TS_ONPROC;
270 
271 	/*
272 	 * Push interrupted thread onto list from new thread.
273 	 * Set the new thread as the current one.
274 	 * Set interrupted thread's T_SP because if it is the idle thread,
275 	 * resume() may use that stack between threads.
276 	 */
277 
278 	ASSERT(SA((uintptr_t)stackptr) == (uintptr_t)stackptr);
279 	t->t_sp = (uintptr_t)stackptr;
280 
281 	it->t_intr = t;
282 	cpu->cpu_thread = it;
283 
284 	/*
285 	 * Set bit for this pil in CPU's interrupt active bitmask.
286 	 */
287 	ASSERT((cpu->cpu_intr_actv & (1 << pil)) == 0);
288 	cpu->cpu_intr_actv |= (1 << pil);
289 
290 	/*
291 	 * Initialize thread priority level from intr_pri
292 	 */
293 	it->t_pil = (uchar_t)pil;
294 	it->t_pri = (pri_t)pil + intr_pri;
295 	it->t_intr_start = now;
296 
297 	return (it->t_stk);
298 }
299 
300 static void
301 apix_do_softint_epilog(struct cpu *cpu, uint_t oldpil)
302 {
303 	struct machcpu *mcpu = &cpu->cpu_m;
304 	kthread_t *t, *it;
305 	uint_t pil, basespl;
306 	hrtime_t intrtime;
307 	hrtime_t now = tsc_read();
308 
309 	it = cpu->cpu_thread;
310 	pil = it->t_pil;
311 
312 	cpu->cpu_stats.sys.intr[pil - 1]++;
313 
314 	ASSERT(cpu->cpu_intr_actv & (1 << pil));
315 	cpu->cpu_intr_actv &= ~(1 << pil);
316 
317 	intrtime = now - it->t_intr_start;
318 	mcpu->intrstat[pil][0] += intrtime;
319 	cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
320 
321 	/*
322 	 * If there is still an interrupted thread underneath this one
323 	 * then the interrupt was never blocked and the return is
324 	 * fairly simple.  Otherwise it isn't.
325 	 */
326 	if ((t = it->t_intr) == NULL) {
327 		/*
328 		 * Put thread back on the interrupt thread list.
329 		 * This was an interrupt thread, so set CPU's base SPL.
330 		 */
331 		set_base_spl();
332 		/* mcpu->mcpu_pri = cpu->cpu_base_spl; */
333 
334 		/*
335 		 * If there are pending interrupts, send a softint to
336 		 * re-enter apix_do_interrupt() and get them processed.
337 		 */
338 		if (apixs[cpu->cpu_id]->x_intr_pending)
339 			siron();
340 
341 		it->t_state = TS_FREE;
342 		it->t_link = cpu->cpu_intr_thread;
343 		cpu->cpu_intr_thread = it;
344 		(void) splhigh();
345 		sti();
346 		swtch();
347 		/*NOTREACHED*/
348 		panic("dosoftint_epilog: swtch returned");
349 	}
350 	it->t_link = cpu->cpu_intr_thread;
351 	cpu->cpu_intr_thread = it;
352 	it->t_state = TS_FREE;
353 	cpu->cpu_thread = t;
354 	if (t->t_flag & T_INTR_THREAD)
355 		t->t_intr_start = now;
356 	basespl = cpu->cpu_base_spl;
357 	pil = MAX(oldpil, basespl);
358 	mcpu->mcpu_pri = pil;
359 }
360 
361 /*
362  * Dispatch a soft interrupt
363  */
364 static void
365 apix_dispatch_softint(uint_t oldpil, uint_t arg2)
366 {
367 	struct cpu *cpu = CPU;
368 
369 	UNREFERENCED_1PARAMETER(arg2);
370 
371 	sti();
372 	av_dispatch_softvect((int)cpu->cpu_thread->t_pil);
373 	cli();
374 
375 	/*
376 	 * Must run softint_epilog() on the interrupt thread stack, since
377 	 * there may not be a return from it if the interrupt thread blocked.
378 	 */
379 	apix_do_softint_epilog(cpu, oldpil);
380 }
381 
382 /*
383  * Deliver any softints the current interrupt priority allows.
384  * Called with interrupts disabled.
385  */
386 int
387 apix_do_softint(struct regs *regs)
388 {
389 	struct cpu *cpu = CPU;
390 	int oldipl;
391 	int newipl;
392 	volatile uint16_t pending;
393 	caddr_t newsp;
394 
395 	while ((pending = cpu->cpu_softinfo.st_pending) != 0) {
396 		newipl = bsrw_insn(pending);
397 		oldipl = cpu->cpu_pri;
398 		if (newipl <= oldipl || newipl <= cpu->cpu_base_spl)
399 			return (-1);
400 
401 		newsp = apix_do_softint_prolog(cpu, newipl, oldipl,
402 		    (caddr_t)regs);
403 		ASSERT(newsp != NULL);
404 		switch_sp_and_call(newsp, apix_dispatch_softint, oldipl, 0);
405 	}
406 
407 	return (0);
408 }
409 
410 static int
411 apix_hilevel_intr_prolog(struct cpu *cpu, uint_t pil, uint_t oldpil,
412     struct regs *rp)
413 {
414 	struct machcpu *mcpu = &cpu->cpu_m;
415 	hrtime_t intrtime;
416 	hrtime_t now = tsc_read();
417 	apix_impl_t *apixp = apixs[cpu->cpu_id];
418 	uint_t mask;
419 
420 	ASSERT(pil > mcpu->mcpu_pri && pil > cpu->cpu_base_spl);
421 
422 	if (pil == CBE_HIGH_PIL) {	/* 14 */
423 		cpu->cpu_profile_pil = oldpil;
424 		if (USERMODE(rp->r_cs)) {
425 			cpu->cpu_profile_pc = 0;
426 			cpu->cpu_profile_upc = rp->r_pc;
427 			cpu->cpu_cpcprofile_pc = 0;
428 			cpu->cpu_cpcprofile_upc = rp->r_pc;
429 		} else {
430 			cpu->cpu_profile_pc = rp->r_pc;
431 			cpu->cpu_profile_upc = 0;
432 			cpu->cpu_cpcprofile_pc = rp->r_pc;
433 			cpu->cpu_cpcprofile_upc = 0;
434 		}
435 	}
436 
437 	mcpu->mcpu_pri = pil;
438 
439 	mask = cpu->cpu_intr_actv & CPU_INTR_ACTV_HIGH_LEVEL_MASK;
440 	if (mask != 0) {
441 		int nestpil;
442 
443 		/*
444 		 * We have interrupted another high-level interrupt.
445 		 * Load starting timestamp, compute interval, update
446 		 * cumulative counter.
447 		 */
448 		nestpil = bsrw_insn((uint16_t)mask);
449 		intrtime = now -
450 		    mcpu->pil_high_start[nestpil - (LOCK_LEVEL + 1)];
451 		mcpu->intrstat[nestpil][0] += intrtime;
452 		cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
453 	} else {
454 		kthread_t *t = cpu->cpu_thread;
455 
456 		/*
457 		 * See if we are interrupting a low-level interrupt thread.
458 		 * If so, account for its time slice only if its time stamp
459 		 * is non-zero.
460 		 */
461 		if ((t->t_flag & T_INTR_THREAD) != 0 && t->t_intr_start != 0) {
462 			intrtime = now - t->t_intr_start;
463 			mcpu->intrstat[t->t_pil][0] += intrtime;
464 			cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
465 			t->t_intr_start = 0;
466 		}
467 	}
468 
469 	/* store starting timestamp in CPu structure for this IPL */
470 	mcpu->pil_high_start[pil - (LOCK_LEVEL + 1)] = now;
471 
472 	if (pil == 15) {
473 		/*
474 		 * To support reentrant level 15 interrupts, we maintain a
475 		 * recursion count in the top half of cpu_intr_actv.  Only
476 		 * when this count hits zero do we clear the PIL 15 bit from
477 		 * the lower half of cpu_intr_actv.
478 		 */
479 		uint16_t *refcntp = (uint16_t *)&cpu->cpu_intr_actv + 1;
480 		(*refcntp)++;
481 	}
482 
483 	cpu->cpu_intr_actv |= (1 << pil);
484 	/* clear pending ipl level bit */
485 	apixp->x_intr_pending &= ~(1 << pil);
486 
487 	return (mask);
488 }
489 
490 static int
491 apix_hilevel_intr_epilog(struct cpu *cpu, uint_t oldpil)
492 {
493 	struct machcpu *mcpu = &cpu->cpu_m;
494 	uint_t mask, pil;
495 	hrtime_t intrtime;
496 	hrtime_t now = tsc_read();
497 
498 	pil = mcpu->mcpu_pri;
499 	cpu->cpu_stats.sys.intr[pil - 1]++;
500 
501 	ASSERT(cpu->cpu_intr_actv & (1 << pil));
502 
503 	if (pil == 15) {
504 		/*
505 		 * To support reentrant level 15 interrupts, we maintain a
506 		 * recursion count in the top half of cpu_intr_actv.  Only
507 		 * when this count hits zero do we clear the PIL 15 bit from
508 		 * the lower half of cpu_intr_actv.
509 		 */
510 		uint16_t *refcntp = (uint16_t *)&cpu->cpu_intr_actv + 1;
511 
512 		ASSERT(*refcntp > 0);
513 
514 		if (--(*refcntp) == 0)
515 			cpu->cpu_intr_actv &= ~(1 << pil);
516 	} else {
517 		cpu->cpu_intr_actv &= ~(1 << pil);
518 	}
519 
520 	ASSERT(mcpu->pil_high_start[pil - (LOCK_LEVEL + 1)] != 0);
521 
522 	intrtime = now - mcpu->pil_high_start[pil - (LOCK_LEVEL + 1)];
523 	mcpu->intrstat[pil][0] += intrtime;
524 	cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
525 
526 	/*
527 	 * Check for lower-pil nested high-level interrupt beneath
528 	 * current one.  If so, place a starting timestamp in its
529 	 * pil_high_start entry.
530 	 */
531 	mask = cpu->cpu_intr_actv & CPU_INTR_ACTV_HIGH_LEVEL_MASK;
532 	if (mask != 0) {
533 		int nestpil;
534 
535 		/*
536 		 * find PIL of nested interrupt
537 		 */
538 		nestpil = bsrw_insn((uint16_t)mask);
539 		ASSERT(nestpil < pil);
540 		mcpu->pil_high_start[nestpil - (LOCK_LEVEL + 1)] = now;
541 		/*
542 		 * (Another high-level interrupt is active below this one,
543 		 * so there is no need to check for an interrupt
544 		 * thread.  That will be done by the lowest priority
545 		 * high-level interrupt active.)
546 		 */
547 	} else {
548 		/*
549 		 * Check to see if there is a low-level interrupt active.
550 		 * If so, place a starting timestamp in the thread
551 		 * structure.
552 		 */
553 		kthread_t *t = cpu->cpu_thread;
554 
555 		if (t->t_flag & T_INTR_THREAD)
556 			t->t_intr_start = now;
557 	}
558 
559 	mcpu->mcpu_pri = oldpil;
560 	if (pil < CBE_HIGH_PIL)
561 		(void) (*setlvlx)(oldpil, 0);
562 
563 	return (mask);
564 }
565 
566 /*
567  * Dispatch a hilevel interrupt (one above LOCK_LEVEL)
568  */
569 static void
570 apix_dispatch_pending_hilevel(uint_t ipl, uint_t arg2)
571 {
572 	UNREFERENCED_1PARAMETER(arg2);
573 
574 	apix_dispatch_pending_autovect(ipl);
575 }
576 
577 static __inline__ int
578 apix_do_pending_hilevel(struct cpu *cpu, struct regs *rp)
579 {
580 	volatile uint16_t pending;
581 	uint_t newipl, oldipl;
582 	caddr_t newsp;
583 
584 	while ((pending = HILEVEL_PENDING(cpu)) != 0) {
585 		newipl = bsrw_insn(pending);
586 		ASSERT(newipl > LOCK_LEVEL && newipl > cpu->cpu_base_spl);
587 		oldipl = cpu->cpu_pri;
588 		if (newipl <= oldipl)
589 			return (-1);
590 
591 		/*
592 		 * High priority interrupts run on this cpu's interrupt stack.
593 		 */
594 		if (apix_hilevel_intr_prolog(cpu, newipl, oldipl, rp) == 0) {
595 			newsp = cpu->cpu_intr_stack;
596 			switch_sp_and_call(newsp, apix_dispatch_pending_hilevel,
597 			    newipl, 0);
598 		} else {	/* already on the interrupt stack */
599 			apix_dispatch_pending_hilevel(newipl, 0);
600 		}
601 		(void) apix_hilevel_intr_epilog(cpu, oldipl);
602 	}
603 
604 	return (0);
605 }
606 
607 /*
608  * Get an interrupt thread and swith to it. It's called from do_interrupt().
609  * The IF flag is cleared and thus all maskable interrupts are blocked at
610  * the time of calling.
611  */
612 static caddr_t
613 apix_intr_thread_prolog(struct cpu *cpu, uint_t pil, caddr_t stackptr)
614 {
615 	apix_impl_t *apixp = apixs[cpu->cpu_id];
616 	struct machcpu *mcpu = &cpu->cpu_m;
617 	hrtime_t now = tsc_read();
618 	kthread_t *t, *volatile it;
619 
620 	ASSERT(pil > mcpu->mcpu_pri && pil > cpu->cpu_base_spl);
621 
622 	apixp->x_intr_pending &= ~(1 << pil);
623 	ASSERT((cpu->cpu_intr_actv & (1 << pil)) == 0);
624 	cpu->cpu_intr_actv |= (1 << pil);
625 	mcpu->mcpu_pri = pil;
626 
627 	/*
628 	 * Get set to run interrupt thread.
629 	 * There should always be an interrupt thread since we
630 	 * allocate one for each level on the CPU.
631 	 */
632 	/* t_intr_start could be zero due to cpu_intr_swtch_enter. */
633 	t = cpu->cpu_thread;
634 	if ((t->t_flag & T_INTR_THREAD) && t->t_intr_start != 0) {
635 		hrtime_t intrtime = now - t->t_intr_start;
636 		mcpu->intrstat[pil][0] += intrtime;
637 		cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
638 		t->t_intr_start = 0;
639 	}
640 
641 	/*
642 	 * Push interrupted thread onto list from new thread.
643 	 * Set the new thread as the current one.
644 	 * Set interrupted thread's T_SP because if it is the idle thread,
645 	 * resume() may use that stack between threads.
646 	 */
647 
648 	ASSERT(SA((uintptr_t)stackptr) == (uintptr_t)stackptr);
649 
650 	t->t_sp = (uintptr_t)stackptr;	/* mark stack in curthread for resume */
651 
652 	/*
653 	 * Note that the code in kcpc_overflow_intr -relies- on the
654 	 * ordering of events here - in particular that t->t_lwp of
655 	 * the interrupt thread is set to the pinned thread *before*
656 	 * curthread is changed.
657 	 */
658 	it = cpu->cpu_intr_thread;
659 	cpu->cpu_intr_thread = it->t_link;
660 	it->t_intr = t;
661 	it->t_lwp = t->t_lwp;
662 
663 	/*
664 	 * (threads on the interrupt thread free list could have state
665 	 * preset to TS_ONPROC, but it helps in debugging if
666 	 * they're TS_FREE.)
667 	 */
668 	it->t_state = TS_ONPROC;
669 
670 	cpu->cpu_thread = it;
671 
672 	/*
673 	 * Initialize thread priority level from intr_pri
674 	 */
675 	it->t_pil = (uchar_t)pil;
676 	it->t_pri = (pri_t)pil + intr_pri;
677 	it->t_intr_start = now;
678 
679 	return (it->t_stk);
680 }
681 
682 static void
683 apix_intr_thread_epilog(struct cpu *cpu, uint_t oldpil)
684 {
685 	struct machcpu *mcpu = &cpu->cpu_m;
686 	kthread_t *t, *it = cpu->cpu_thread;
687 	uint_t pil, basespl;
688 	hrtime_t intrtime;
689 	hrtime_t now = tsc_read();
690 
691 	pil = it->t_pil;
692 	cpu->cpu_stats.sys.intr[pil - 1]++;
693 
694 	ASSERT(cpu->cpu_intr_actv & (1 << pil));
695 	cpu->cpu_intr_actv &= ~(1 << pil);
696 
697 	ASSERT(it->t_intr_start != 0);
698 	intrtime = now - it->t_intr_start;
699 	mcpu->intrstat[pil][0] += intrtime;
700 	cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
701 
702 	/*
703 	 * If there is still an interrupted thread underneath this one
704 	 * then the interrupt was never blocked and the return is
705 	 * fairly simple.  Otherwise it isn't.
706 	 */
707 	if ((t = it->t_intr) == NULL) {
708 		/*
709 		 * The interrupted thread is no longer pinned underneath
710 		 * the interrupt thread.  This means the interrupt must
711 		 * have blocked, and the interrupted thread has been
712 		 * unpinned, and has probably been running around the
713 		 * system for a while.
714 		 *
715 		 * Since there is no longer a thread under this one, put
716 		 * this interrupt thread back on the CPU's free list and
717 		 * resume the idle thread which will dispatch the next
718 		 * thread to run.
719 		 */
720 		cpu->cpu_stats.sys.intrblk++;
721 
722 		/*
723 		 * Put thread back on the interrupt thread list.
724 		 * This was an interrupt thread, so set CPU's base SPL.
725 		 */
726 		set_base_spl();
727 		basespl = cpu->cpu_base_spl;
728 		mcpu->mcpu_pri = basespl;
729 		(*setlvlx)(basespl, 0);
730 
731 		/*
732 		 * If there are pending interrupts, send a softint to
733 		 * re-enter apix_do_interrupt() and get them processed.
734 		 */
735 		if (apixs[cpu->cpu_id]->x_intr_pending)
736 			siron();
737 
738 		it->t_state = TS_FREE;
739 		/*
740 		 * Return interrupt thread to pool
741 		 */
742 		it->t_link = cpu->cpu_intr_thread;
743 		cpu->cpu_intr_thread = it;
744 
745 		(void) splhigh();
746 		sti();
747 		swtch();
748 		/*NOTREACHED*/
749 		panic("dosoftint_epilog: swtch returned");
750 	}
751 
752 	/*
753 	 * Return interrupt thread to the pool
754 	 */
755 	it->t_link = cpu->cpu_intr_thread;
756 	cpu->cpu_intr_thread = it;
757 	it->t_state = TS_FREE;
758 
759 	cpu->cpu_thread = t;
760 	if (t->t_flag & T_INTR_THREAD)
761 		t->t_intr_start = now;
762 	basespl = cpu->cpu_base_spl;
763 	mcpu->mcpu_pri = MAX(oldpil, basespl);
764 	(*setlvlx)(mcpu->mcpu_pri, 0);
765 }
766 
767 
768 static void
769 apix_dispatch_pending_hardint(uint_t oldpil, uint_t arg2)
770 {
771 	struct cpu *cpu = CPU;
772 
773 	UNREFERENCED_1PARAMETER(arg2);
774 
775 	apix_dispatch_pending_autovect((int)cpu->cpu_thread->t_pil);
776 
777 	/*
778 	 * Must run intr_thread_epilog() on the interrupt thread stack, since
779 	 * there may not be a return from it if the interrupt thread blocked.
780 	 */
781 	apix_intr_thread_epilog(cpu, oldpil);
782 }
783 
784 static __inline__ int
785 apix_do_pending_hardint(struct cpu *cpu, struct regs *rp)
786 {
787 	volatile uint16_t pending;
788 	uint_t newipl, oldipl;
789 	caddr_t newsp;
790 
791 	while ((pending = LOWLEVEL_PENDING(cpu)) != 0) {
792 		newipl = bsrw_insn(pending);
793 		ASSERT(newipl <= LOCK_LEVEL);
794 		oldipl = cpu->cpu_pri;
795 		if (newipl <= oldipl || newipl <= cpu->cpu_base_spl)
796 			return (-1);
797 
798 		/*
799 		 * Run this interrupt in a separate thread.
800 		 */
801 		newsp = apix_intr_thread_prolog(cpu, newipl, (caddr_t)rp);
802 		ASSERT(newsp != NULL);
803 		switch_sp_and_call(newsp, apix_dispatch_pending_hardint,
804 		    oldipl, 0);
805 	}
806 
807 	return (0);
808 }
809 
810 /*
811  * Unmask level triggered interrupts
812  */
813 static void
814 apix_post_hardint(int vector)
815 {
816 	apix_vector_t *vecp = xv_vector(psm_get_cpu_id(), vector);
817 	int irqno = vecp->v_inum;
818 
819 	ASSERT(vecp->v_type == APIX_TYPE_FIXED && apic_level_intr[irqno]);
820 
821 	apix_level_intr_post_dispatch(irqno);
822 }
823 
824 static void
825 apix_dispatch_by_vector(uint_t vector)
826 {
827 	struct cpu *cpu = CPU;
828 	apix_vector_t *vecp = xv_vector(cpu->cpu_id, vector);
829 	struct autovec *avp;
830 	uint_t r, (*intr)();
831 	caddr_t arg1, arg2;
832 	dev_info_t *dip;
833 
834 	if (vecp == NULL ||
835 	    (avp = vecp->v_autovect) == NULL || avp->av_vector == NULL)
836 		return;
837 
838 	avp->av_flags |= AV_PENTRY_ONPROC;
839 	intr = avp->av_vector;
840 	arg1 = avp->av_intarg1;
841 	arg2 = avp->av_intarg2;
842 	dip = avp->av_dip;
843 
844 	if (avp->av_prilevel != XC_HI_PIL)
845 		sti();
846 
847 	DTRACE_PROBE4(interrupt__start, dev_info_t *, dip,
848 	    void *, intr, caddr_t, arg1, caddr_t, arg2);
849 	r = (*intr)(arg1, arg2);
850 	DTRACE_PROBE4(interrupt__complete, dev_info_t *, dip,
851 	    void *, intr, caddr_t, arg1, uint_t, r);
852 
853 	cli();
854 	avp->av_flags &= ~AV_PENTRY_ONPROC;
855 }
856 
857 
858 static void
859 apix_dispatch_hilevel(uint_t vector, uint_t arg2)
860 {
861 	UNREFERENCED_1PARAMETER(arg2);
862 
863 	apix_dispatch_by_vector(vector);
864 }
865 
866 static void
867 apix_dispatch_lowlevel(uint_t vector, uint_t oldipl)
868 {
869 	struct cpu *cpu = CPU;
870 
871 	apix_dispatch_by_vector(vector);
872 
873 	/*
874 	 * Must run intr_thread_epilog() on the interrupt thread stack, since
875 	 * there may not be a return from it if the interrupt thread blocked.
876 	 */
877 	apix_intr_thread_epilog(cpu, oldipl);
878 }
879 
880 /*
881  * Interrupt service routine, called with interrupts disabled.
882  */
883 void
884 apix_do_interrupt(struct regs *rp, trap_trace_rec_t *ttp)
885 {
886 	struct cpu *cpu = CPU;
887 	int vector = rp->r_trapno, newipl, oldipl = cpu->cpu_pri, ret;
888 	apix_vector_t *vecp = NULL;
889 
890 #ifdef TRAPTRACE
891 	ttp->ttr_marker = TT_INTERRUPT;
892 	ttp->ttr_cpuid = cpu->cpu_id;
893 	ttp->ttr_ipl = 0xff;
894 	ttp->ttr_pri = (uchar_t)oldipl;
895 	ttp->ttr_spl = cpu->cpu_base_spl;
896 	ttp->ttr_vector = 0xff;
897 #endif	/* TRAPTRACE */
898 
899 	cpu_idle_exit(CPU_IDLE_CB_FLAG_INTR);
900 
901 	++*(uint16_t *)&cpu->cpu_m.mcpu_istamp;
902 
903 	/*
904 	 * If it's a softint go do it now.
905 	 */
906 	if (rp->r_trapno == T_SOFTINT) {
907 		/*
908 		 * It might be the case that when an interrupt is triggered,
909 		 * the spl is raised to high by splhigh(). Later when do_splx()
910 		 * is called to restore the spl, both hardware and software
911 		 * interrupt pending flags are check and an SOFTINT is faked
912 		 * accordingly.
913 		 */
914 		(void) apix_do_pending_hilevel(cpu, rp);
915 		(void) apix_do_pending_hardint(cpu, rp);
916 		(void) apix_do_softint(rp);
917 		ASSERT(!interrupts_enabled());
918 #ifdef TRAPTRACE
919 		ttp->ttr_vector = T_SOFTINT;
920 #endif
921 		/*
922 		 * We need to check again for pending interrupts that may have
923 		 * arrived while the softint was running.
924 		 */
925 		goto do_pending;
926 	}
927 
928 	/*
929 	 * Send EOI to local APIC
930 	 */
931 	newipl = (*setlvl)(oldipl, (int *)&rp->r_trapno);
932 #ifdef TRAPTRACE
933 	ttp->ttr_ipl = (uchar_t)newipl;
934 #endif	/* TRAPTRACE */
935 
936 	/*
937 	 * Bail if it is a spurious interrupt
938 	 */
939 	if (newipl == -1)
940 		return;
941 
942 	vector = rp->r_trapno;
943 	vecp = xv_vector(cpu->cpu_id, vector);
944 #ifdef TRAPTRACE
945 	ttp->ttr_vector = (short)vector;
946 #endif	/* TRAPTRACE */
947 
948 	/*
949 	 * Direct dispatch for IPI, MSI, MSI-X
950 	 */
951 	if (vecp && vecp->v_type != APIX_TYPE_FIXED &&
952 	    newipl > MAX(oldipl, cpu->cpu_base_spl)) {
953 		caddr_t newsp;
954 
955 		if (INTR_PENDING(apixs[cpu->cpu_id], newipl)) {
956 			/*
957 			 * There are already vectors pending at newipl,
958 			 * queue this one and fall through to process
959 			 * all pending.
960 			 */
961 			apix_add_pending_hardint(vector);
962 		} else if (newipl > LOCK_LEVEL) {
963 			if (apix_hilevel_intr_prolog(cpu, newipl, oldipl, rp)
964 			    == 0) {
965 				newsp = cpu->cpu_intr_stack;
966 				switch_sp_and_call(newsp, apix_dispatch_hilevel,
967 				    vector, 0);
968 			} else {
969 				apix_dispatch_hilevel(vector, 0);
970 			}
971 			(void) apix_hilevel_intr_epilog(cpu, oldipl);
972 		} else {
973 			newsp = apix_intr_thread_prolog(cpu, newipl,
974 			    (caddr_t)rp);
975 			switch_sp_and_call(newsp, apix_dispatch_lowlevel,
976 			    vector, oldipl);
977 		}
978 	} else {
979 		/* Add to per-pil pending queue */
980 		apix_add_pending_hardint(vector);
981 		if (newipl <= MAX(oldipl, cpu->cpu_base_spl) ||
982 		    !apixs[cpu->cpu_id]->x_intr_pending)
983 			return;
984 	}
985 
986 do_pending:
987 	if (apix_do_pending_hilevel(cpu, rp) < 0)
988 		return;
989 
990 	do {
991 		ret = apix_do_pending_hardint(cpu, rp);
992 
993 		/*
994 		 * Deliver any pending soft interrupts.
995 		 */
996 		(void) apix_do_softint(rp);
997 	} while (!ret && LOWLEVEL_PENDING(cpu));
998 }
999